чување d130c9e1f9cc8ae6df727076ef57ca1a24a980c1
родитељ 101521eeba7c760bcfe38d03b9420f213f39dc46
Аутор: Страхиња Радић <contact@strahinja.org>
Датум: Tue, 20 Jul 2021 09:55:41 +0200
Added INSTALL, removed the dependency on libunistring
Signed-off-by: Страхиња Радић <contact@strahinja.org>
Diffstat:
| A | INSTALL | | | 21 | +++++++++++++++++++++ |
| M | README | | | 5 | +---- |
| M | all.do | | | 2 | +- |
| M | reflow.c | | | 35 | +++++++++++++++++++---------------- |
| M | reflow.do | | | 2 | +- |
| D | utf8.c | | | 161 | ------------------------------------------------------------------------------- |
| D | utf8.h | | | 40 | ---------------------------------------- |
измењених датотека: 7, додавања: 43(+), брисања: 223(-)
diff --git a/INSTALL b/INSTALL
@@ -0,0 +1,21 @@
+Prerequisites
+=============
+
+* C compiler (GCC-compatible) - ${UFOLD_CC:-gcc}
+* gzip
+
+Install
+-------
+
+ $ git clone https://git.sr.ht/~strahinja/reflow
+ $ cd reflow
+ $ su
+
+If you have djb redo:
+
+ # redo install
+
+if you don't:
+
+ # ./do install
+
diff --git a/README b/README
@@ -5,10 +5,7 @@ Reflows paragraphs into single lines. Useful e.g. for Gemini.
How? Read the source code for details.
-How to build? Using a C compiler. (Obviously!) Add -lunistring. (Maybe less
-obvious.)
-
-How to install? Copy to a directory in $PATH.
+How to build/install? See INSTALL.
Why? Initially, to make formatting of text files in TLDP better.
diff --git a/all.do b/all.do
@@ -1,2 +1,2 @@
-redo-ifchange reflow reflow.1.gz
+redo-ifchange version.h reflow reflow.1.gz
diff --git a/reflow.c b/reflow.c
@@ -7,11 +7,12 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <unistr.h>
#include "defs.h"
#include "version.h"
+char filename[MAXPATH];
+
int
usage()
{
@@ -27,7 +28,8 @@ error(int code, char* format, ...)
va_start(args, format);
vsnprintf(buf, sizeof(buf), format, args);
va_end(args);
- fprintf(stderr, "%s: %s\n", PROGRAMNAME, buf);
+ fprintf(stderr, "%s:%s: %s\n", PROGRAMNAME,
+ *filename ? filename : "(stdin)", buf);
return code;
}
@@ -47,17 +49,16 @@ is_whitespace(char ch)
int
main(int argc, char** argv)
{
- size_t buffer_size = 0;
- uint8_t* buffer = NULL;
- uint8_t* pbuffer = NULL;
- size_t line_len = 0;
- char* line = calloc(MAXBUF, 1);
- uint8_t* pline = NULL;
- uint8_t* eol = NULL;
- FILE* input = NULL;
- int argn = 1;
- char* carg = NULL;
- char filename[MAXPATH];
+ size_t buffer_size = 0;
+ char* buffer = NULL;
+ char* pbuffer = NULL;
+ size_t line_len = 0;
+ char* line = calloc(MAXBUF, 1);
+ char* pline = NULL;
+ char* eol = NULL;
+ FILE* input = NULL;
+ int argn = 1;
+ char* carg = NULL;
*filename = 0;
while (argn < argc)
@@ -74,10 +75,12 @@ main(int argc, char** argv)
argn++;
}
- if (*filename == '-' && !*(filename+1))
+ if (!*filename || (*filename == '-' && !*(filename+1)))
input = stdin;
else
input = fopen(filename, "rt");
+ if (!input)
+ return error(errno, "Cannot open file");
while (!feof(stdin))
{
@@ -93,7 +96,7 @@ main(int argc, char** argv)
if (eol)
line_len = eol-line+1;
else
- line_len = u8_strlen(line);
+ line_len = strlen(line);
buffer_size += line_len;
if (!buffer)
@@ -142,7 +145,7 @@ main(int argc, char** argv)
pbuffer = buffer;
while (*pbuffer)
{
- eol = u8_strchr(pbuffer, '\n');
+ eol = strchr(pbuffer, '\n');
if (eol)
line_len = eol - pbuffer;
else
diff --git a/reflow.do b/reflow.do
@@ -5,5 +5,5 @@ for f in *.h *.c; do
echo $f
echo $f | sed -e's/\.c$/.o/g'
done | xargs redo-ifchange
-${REFLOW_CC:-gcc} -g -Wall -std=c99 -o $3 reflow.o utf8.o
+${REFLOW_CC:-gcc} -g -Wall -std=c99 -o $3 reflow.o
diff --git a/utf8.c b/utf8.c
@@ -1,161 +0,0 @@
-/* See the file LICENSE for copyright and license details. */
-
-#include "utf8.h"
-
-/*
- * 00000000 -- 0000007F: 0xxxxxxx
- * (2^7 = 128 chars)
- * 00000080 -- 000007FF: 110xxxxx 10xxxxxx
- * (2^5 = 32 chars)
- * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
- * (2^4 = 16 chars)
- * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * (2^3 = 8 chars)
- * 00200000 -- 007FFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- * (2^2 = 4 chars)
- * 00800000 -- 00FFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 10xxxxxx
- * (2^1 = 2 chars)
- */
-
-const int
-utf_length_table[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 32 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 128 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 160 */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 192 */
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 224 */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, /* 256 */
-};
-
-const int
-extract_masks[6] = {
- XMASK1, XMASK2, XMASK3, XMASK4, XMASK5, XMASK6
-};
-
-int
-u8_char_to_u32(u32* to, const u8 *from, size_t* from_delta)
-{
- const u8* pfrom = from;
- if (!from)
- return 1;
- *from_delta = 0;
- int len = utf_length_table[*from];
- *to = *from & extract_masks[len-1];
- for (int i = 1; i < len; i++)
- {
- pfrom = from + i;
- if (!*pfrom)
- return 1;
- *to <<= 6;
- *to |= *pfrom & XMASKR;
- }
- *from_delta = len;
-
- return 0;
-}
-
-size_t
-u32_char_to_u8(u8 *to, const u32 from)
-{
- size_t len = 0;
- u8 start = 0;
- u32 cfrom = from;
- if (from >= BOUND6)
- {
- start = START6;
- len = 6;
- }
- else if (from >= BOUND5)
- {
- start = START5;
- len = 5;
- }
- else if (from >= BOUND4)
- {
- start = START4;
- len = 4;
- }
- else if (from >= BOUND3)
- {
- start = START3;
- len = 3;
- }
- else if (from >= BOUND2)
- {
- start = START2;
- len = 2;
- }
- else
- {
- start = START1;
- len = 1;
- }
- for (int i = len-1; i > 0; i--)
- {
- to[len-1] = STARTR | (cfrom & XMASKR);
- cfrom >>= 6;
- }
- to[0] = start | cfrom;
- return len;
-}
-
-int
-u8_to_u32(u32 *to, const u8 *from, size_t* from_delta)
-{
- const u8* pfrom = from;
- u32* pto = to;
- if (!from)
- return 1;
- size_t delta = 0;
- *from_delta = 0;
- while (*pfrom)
- {
- int result = u8_char_to_u32(pto, pfrom, &delta);
- if (result)
- return result;
- pto++;
- pfrom += delta;
- *from_delta += delta;
- }
- *pto = 0;
- return 0;
-}
-
-size_t
-u32_to_u8(u8 *to, const u32* from)
-{
- u8* pto = to;
- const u32* pfrom = from;
- size_t len = 0;
- if (!from)
- return 0;
- while (*pfrom)
- {
- size_t delta = u32_char_to_u8(pto, *pfrom);
- pto += delta;
- len += delta;
- pfrom++;
- }
- return len;
-}
-
-size_t
-u32_strlen(const u32* s)
-{
- const u32* ps = s;
- while (ps && *ps)
- ps++;
- return ps - s;
-}
-
diff --git a/utf8.h b/utf8.h
@@ -1,40 +0,0 @@
-/* See the file LICENSE for copyright and license details. */
-
-#include <stdint.h>
-#include <sys/types.h>
-
-#define XMASK1 0x7F /* b01111111 */
-#define XMASK2 0x1F /* b00011111 */
-#define XMASK3 0x0F /* b00001111 */
-#define XMASK4 0x07 /* b00000111 */
-#define XMASK5 0x03 /* b00000011 */
-#define XMASK6 0x01 /* b00000001 */
-
-#define XMASKR 0x3F /* b00111111 */
-
-/*
-#define BOUND1 0x0*/
-#define BOUND2 0x80
-#define BOUND3 0x800
-#define BOUND4 0x10000
-#define BOUND5 0x200000
-#define BOUND6 0x800000
-
-#define START1 0x00 /* b0xxxxxxx */
-#define START2 0xC0 /* b110xxxxx */
-#define START3 0xE0 /* b1110xxxx */
-#define START4 0xF0 /* b11110xxx */
-#define START5 0xF8 /* b111110xx */
-#define START6 0xFC /* b1111110x */
-
-#define STARTR 0x80 /* b10xxxxxx */
-
-typedef uint8_t u8;
-typedef uint32_t u32;
-
-int u8_char_to_u32(u32* to, const u8* from, size_t* from_delta);
-size_t u32_char_to_u8(u8* to, const u32 from);
-int u8_to_u32(u32* to, const u8* from, size_t* from_delta);
-size_t u32_to_u8(u8* to, const u32* from);
-size_t u32_strlen(const u32* s);
-