reflow

Преобликује пасусе у редове
Дневник | Датотеке | Референце | ПРОЧИТАЈМЕ | ЛИЦЕНЦА

чување d130c9e1f9cc8ae6df727076ef57ca1a24a980c1
родитељ 101521eeba7c760bcfe38d03b9420f213f39dc46
Аутор: Страхиња Радић <contact@strahinja.org>
Датум:   Tue, 20 Jul 2021 09:55:41 +0200

Added INSTALL, removed the dependency on libunistring

Signed-off-by: Страхиња Радић <contact@strahinja.org>

Diffstat:
AINSTALL | 21+++++++++++++++++++++
MREADME | 5+----
Mall.do | 2+-
Mreflow.c | 35+++++++++++++++++++----------------
Mreflow.do | 2+-
Dutf8.c | 161-------------------------------------------------------------------------------
Dutf8.h | 40----------------------------------------
измењених датотека: 7, додавања: 43(+), брисања: 223(-)

diff --git a/INSTALL b/INSTALL @@ -0,0 +1,21 @@ +Prerequisites +============= + +* C compiler (GCC-compatible) - ${UFOLD_CC:-gcc} +* gzip + +Install +------- + + $ git clone https://git.sr.ht/~strahinja/reflow + $ cd reflow + $ su + +If you have djb redo: + + # redo install + +if you don't: + + # ./do install + diff --git a/README b/README @@ -5,10 +5,7 @@ Reflows paragraphs into single lines. Useful e.g. for Gemini. How? Read the source code for details. -How to build? Using a C compiler. (Obviously!) Add -lunistring. (Maybe less -obvious.) - -How to install? Copy to a directory in $PATH. +How to build/install? See INSTALL. Why? Initially, to make formatting of text files in TLDP better. diff --git a/all.do b/all.do @@ -1,2 +1,2 @@ -redo-ifchange reflow reflow.1.gz +redo-ifchange version.h reflow reflow.1.gz diff --git a/reflow.c b/reflow.c @@ -7,11 +7,12 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <unistr.h> #include "defs.h" #include "version.h" +char filename[MAXPATH]; + int usage() { @@ -27,7 +28,8 @@ error(int code, char* format, ...) va_start(args, format); vsnprintf(buf, sizeof(buf), format, args); va_end(args); - fprintf(stderr, "%s: %s\n", PROGRAMNAME, buf); + fprintf(stderr, "%s:%s: %s\n", PROGRAMNAME, + *filename ? filename : "(stdin)", buf); return code; } @@ -47,17 +49,16 @@ is_whitespace(char ch) int main(int argc, char** argv) { - size_t buffer_size = 0; - uint8_t* buffer = NULL; - uint8_t* pbuffer = NULL; - size_t line_len = 0; - char* line = calloc(MAXBUF, 1); - uint8_t* pline = NULL; - uint8_t* eol = NULL; - FILE* input = NULL; - int argn = 1; - char* carg = NULL; - char filename[MAXPATH]; + size_t buffer_size = 0; + char* buffer = NULL; + char* pbuffer = NULL; + size_t line_len = 0; + char* line = calloc(MAXBUF, 1); + char* pline = NULL; + char* eol = NULL; + FILE* input = NULL; + int argn = 1; + char* carg = NULL; *filename = 0; while (argn < argc) @@ -74,10 +75,12 @@ main(int argc, char** argv) argn++; } - if (*filename == '-' && !*(filename+1)) + if (!*filename || (*filename == '-' && !*(filename+1))) input = stdin; else input = fopen(filename, "rt"); + if (!input) + return error(errno, "Cannot open file"); while (!feof(stdin)) { @@ -93,7 +96,7 @@ main(int argc, char** argv) if (eol) line_len = eol-line+1; else - line_len = u8_strlen(line); + line_len = strlen(line); buffer_size += line_len; if (!buffer) @@ -142,7 +145,7 @@ main(int argc, char** argv) pbuffer = buffer; while (*pbuffer) { - eol = u8_strchr(pbuffer, '\n'); + eol = strchr(pbuffer, '\n'); if (eol) line_len = eol - pbuffer; else diff --git a/reflow.do b/reflow.do @@ -5,5 +5,5 @@ for f in *.h *.c; do echo $f echo $f | sed -e's/\.c$/.o/g' done | xargs redo-ifchange -${REFLOW_CC:-gcc} -g -Wall -std=c99 -o $3 reflow.o utf8.o +${REFLOW_CC:-gcc} -g -Wall -std=c99 -o $3 reflow.o diff --git a/utf8.c b/utf8.c @@ -1,161 +0,0 @@ -/* See the file LICENSE for copyright and license details. */ - -#include "utf8.h" - -/* - * 00000000 -- 0000007F: 0xxxxxxx - * (2^7 = 128 chars) - * 00000080 -- 000007FF: 110xxxxx 10xxxxxx - * (2^5 = 32 chars) - * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx - * (2^4 = 16 chars) - * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - * (2^3 = 8 chars) - * 00200000 -- 007FFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * (2^2 = 4 chars) - * 00800000 -- 00FFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * 10xxxxxx - * (2^1 = 2 chars) - */ - -const int -utf_length_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 32 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 128 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 160 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 192 */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 224 */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, /* 256 */ -}; - -const int -extract_masks[6] = { - XMASK1, XMASK2, XMASK3, XMASK4, XMASK5, XMASK6 -}; - -int -u8_char_to_u32(u32* to, const u8 *from, size_t* from_delta) -{ - const u8* pfrom = from; - if (!from) - return 1; - *from_delta = 0; - int len = utf_length_table[*from]; - *to = *from & extract_masks[len-1]; - for (int i = 1; i < len; i++) - { - pfrom = from + i; - if (!*pfrom) - return 1; - *to <<= 6; - *to |= *pfrom & XMASKR; - } - *from_delta = len; - - return 0; -} - -size_t -u32_char_to_u8(u8 *to, const u32 from) -{ - size_t len = 0; - u8 start = 0; - u32 cfrom = from; - if (from >= BOUND6) - { - start = START6; - len = 6; - } - else if (from >= BOUND5) - { - start = START5; - len = 5; - } - else if (from >= BOUND4) - { - start = START4; - len = 4; - } - else if (from >= BOUND3) - { - start = START3; - len = 3; - } - else if (from >= BOUND2) - { - start = START2; - len = 2; - } - else - { - start = START1; - len = 1; - } - for (int i = len-1; i > 0; i--) - { - to[len-1] = STARTR | (cfrom & XMASKR); - cfrom >>= 6; - } - to[0] = start | cfrom; - return len; -} - -int -u8_to_u32(u32 *to, const u8 *from, size_t* from_delta) -{ - const u8* pfrom = from; - u32* pto = to; - if (!from) - return 1; - size_t delta = 0; - *from_delta = 0; - while (*pfrom) - { - int result = u8_char_to_u32(pto, pfrom, &delta); - if (result) - return result; - pto++; - pfrom += delta; - *from_delta += delta; - } - *pto = 0; - return 0; -} - -size_t -u32_to_u8(u8 *to, const u32* from) -{ - u8* pto = to; - const u32* pfrom = from; - size_t len = 0; - if (!from) - return 0; - while (*pfrom) - { - size_t delta = u32_char_to_u8(pto, *pfrom); - pto += delta; - len += delta; - pfrom++; - } - return len; -} - -size_t -u32_strlen(const u32* s) -{ - const u32* ps = s; - while (ps && *ps) - ps++; - return ps - s; -} - diff --git a/utf8.h b/utf8.h @@ -1,40 +0,0 @@ -/* See the file LICENSE for copyright and license details. */ - -#include <stdint.h> -#include <sys/types.h> - -#define XMASK1 0x7F /* b01111111 */ -#define XMASK2 0x1F /* b00011111 */ -#define XMASK3 0x0F /* b00001111 */ -#define XMASK4 0x07 /* b00000111 */ -#define XMASK5 0x03 /* b00000011 */ -#define XMASK6 0x01 /* b00000001 */ - -#define XMASKR 0x3F /* b00111111 */ - -/* -#define BOUND1 0x0*/ -#define BOUND2 0x80 -#define BOUND3 0x800 -#define BOUND4 0x10000 -#define BOUND5 0x200000 -#define BOUND6 0x800000 - -#define START1 0x00 /* b0xxxxxxx */ -#define START2 0xC0 /* b110xxxxx */ -#define START3 0xE0 /* b1110xxxx */ -#define START4 0xF0 /* b11110xxx */ -#define START5 0xF8 /* b111110xx */ -#define START6 0xFC /* b1111110x */ - -#define STARTR 0x80 /* b10xxxxxx */ - -typedef uint8_t u8; -typedef uint32_t u32; - -int u8_char_to_u32(u32* to, const u8* from, size_t* from_delta); -size_t u32_char_to_u8(u8* to, const u32 from); -int u8_to_u32(u32* to, const u8* from, size_t* from_delta); -size_t u32_to_u8(u8* to, const u32* from); -size_t u32_strlen(const u32* s); -