ufold

Прелама сваки UTF-8 улазни ред да стане у задату дужину
git clone https://git.sr.ht/~strahinja/ufold
Дневник | Датотеке | Референце | ПРОЧИТАЈМЕ | ЛИЦЕНЦА

ufold.c (6920B)


      1 /* This program is licensed under the terms of GNU GPL v3 or (at your option)
      2  * any later version. Copyright (C) 2021-2024  Страхиња Радић.
      3  * See the file LICENSE for exact copyright and license details. */
      4 
      5 #include <errno.h>
      6 #include <stdarg.h>
      7 #include <stdio.h>
      8 #include <stdint.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 
     12 #include "defs.h"
     13 #include "utf8.h"
     14 #include "version.h"
     15 
     16 #define COPYRIGHT                                                            \
     17 	("  This program is licensed under the terms of GNU GPL v3"          \
     18 	 " or (at your option)\n"                                            \
     19 	 "  any later version. Copyright (C) 2021-2024  Strahinya Radich.\n" \
     20 	 "  See the file LICENSE for exact copyright and license "           \
     21 	 "details.")
     22 
     23 char filename[MAXPATH];
     24 
     25 int
     26 usage(void)
     27 {
     28 	printf("Usage:\t%s -h | --help | -V | --full-version | -v | --version\n"
     29 	       "\t%s [-s | --spaces] [-w | --width=<cols>] [file]\n",
     30 		PROGRAMNAME, PROGRAMNAME);
     31 	return 0;
     32 }
     33 
     34 int
     35 error(const int code, const char* fmt, ...)
     36 {
     37 	char buf[MAXBUF];
     38 	va_list args;
     39 	va_start(args, fmt);
     40 	vsnprintf(buf, sizeof(buf), fmt, args);
     41 	va_end(args);
     42 	fprintf(stderr, "%s:%s: %s\n", PROGRAMNAME,
     43 		*filename ? filename : "(stdin)", buf);
     44 	return code;
     45 }
     46 
     47 int
     48 version(const int full)
     49 {
     50 	printf("%s %s, built on %s\n", PROGRAMNAME, VERSION, DATE);
     51 	if (full)
     52 		puts(COPYRIGHT);
     53 	return 0;
     54 }
     55 
     56 int
     57 starts_with(const char* s, const char* with)
     58 {
     59 	while (*s && *s == *with)
     60 	{
     61 		if (*s != *with)
     62 			return 0;
     63 		s++;
     64 		with++;
     65 	}
     66 	return *with == 0;
     67 }
     68 
     69 void
     70 fold_line_spaces(const u8* line, const int width)
     71 {
     72 	if (!line)
     73 		return;
     74 
     75 	/*
     76 	 * Idea:
     77 	 * 1. store characters in token until line is exhausted
     78 	 * 2. when ' ' or end of line is encountered:
     79 	 *    2,1, if token length > width, see if current column is 0:
     80 	 *         2.1.1. if it isn't, output newline and go to 2.1.2.
     81 	 *         2.1.2. (if it is) output chunks of length "width" until the
     82 	 *                end of token, then reset token
     83 	 *    2.2. if token length <= width, see if token length + current
     84 	 *         column exceed width
     85 	 *         2.2.1. if they do, output newline and go to 2.2.2.
     86 	 *         2.2.2. (if they don't) output token
     87 	 * 3. go to 1. unless line is exhausted
     88 	 */
     89 	const u8* pline = line;
     90 	u32 uch		= 0;
     91 	u32 token[MAXINPUTBUF];
     92 	u32* ptoken = NULL;
     93 	u8 u8ch[7];
     94 	size_t u8ch_len = 0;
     95 	size_t delta	= 0;
     96 	size_t len	= 0;
     97 	size_t col	= 0;
     98 
     99 	*token = 0;
    100 	ptoken = token;
    101 	/* 1. */
    102 	while (1)
    103 	{
    104 		u8_char_to_u32(&uch, pline, &len);
    105 		if (!len)
    106 			return;
    107 		*ptoken++ = uch;
    108 		pline += len;
    109 
    110 		/* 2. */
    111 		if (uch == (u32)L' ' || !uch)
    112 		{
    113 			if (ptoken - token > width)
    114 			{
    115 				/* 2.1. */
    116 				if (col > 0)
    117 				{
    118 					/* 2.1.1. */
    119 					printf("\n");
    120 					col = 0;
    121 				}
    122 
    123 				/* 2.1.2. */
    124 				*ptoken = 0;
    125 				ptoken	= token;
    126 				while (*ptoken)
    127 				{
    128 					u8ch_len
    129 						= u32_char_to_u8(u8ch, *ptoken);
    130 					if (!u8ch_len)
    131 						return;
    132 					u8ch[u8ch_len] = 0;
    133 
    134 					if (*ptoken == (u32)L'\t')
    135 						delta = 8 - (col % 8);
    136 					else
    137 						delta = 1;
    138 
    139 					if (col + delta > width)
    140 					{
    141 						printf("\n");
    142 						col = 0;
    143 						if (*ptoken == (u32)'\t')
    144 							delta = 8;
    145 						else
    146 							delta = 1;
    147 					}
    148 					printf("%s", u8ch);
    149 					col += delta;
    150 					ptoken += u8ch_len;
    151 				}
    152 				*token = 0;
    153 				ptoken = token;
    154 			}
    155 			else
    156 			{
    157 				/* 2.2. */
    158 				if (ptoken - token + col > width)
    159 				{
    160 					/* 2.2.1. */
    161 					printf("\n");
    162 					col = 0;
    163 				}
    164 
    165 				/* 2.2.2. */
    166 				*ptoken = 0;
    167 				ptoken	= token;
    168 				while (*ptoken)
    169 				{
    170 					u8ch_len
    171 						= u32_char_to_u8(u8ch, *ptoken);
    172 					if (!u8ch_len)
    173 						return;
    174 					u8ch[u8ch_len] = 0;
    175 
    176 					if (*ptoken == (u32)L'\t')
    177 						delta = 8 - (col % 8);
    178 					else
    179 						delta = 1;
    180 
    181 					printf("%s", u8ch);
    182 					col += delta;
    183 					ptoken++;
    184 				}
    185 				*token = 0;
    186 				ptoken = token;
    187 			}
    188 		}
    189 
    190 		/* 3. */
    191 		if (!uch)
    192 			break;
    193 	}
    194 }
    195 
    196 void
    197 fold_line_normal(const u8* line, const int width)
    198 {
    199 	if (!line)
    200 		return;
    201 
    202 	const u8* pline	      = line;
    203 	const u8* pline_inner = NULL;
    204 	u32 uch;
    205 	u8 u8_char[7];
    206 	u8* pu8_char = NULL;
    207 	size_t uch_len;
    208 	size_t delta = 0;
    209 	size_t col   = 0;
    210 
    211 	while (*pline)
    212 	{
    213 		/* convert only to get byte length */
    214 		u8_char_to_u32(&uch, pline, &uch_len);
    215 		if (!uch_len)
    216 			return;
    217 
    218 		pline_inner = pline;
    219 		pu8_char    = u8_char;
    220 		while (*pline_inner && pline_inner < pline + uch_len)
    221 			*pu8_char++ = *pline_inner++;
    222 		*pu8_char = 0;
    223 
    224 		if (uch == (u32)L'\t')
    225 			delta = 8 - (col % 8);
    226 		else
    227 			delta = 1;
    228 
    229 		if (col + delta > width)
    230 		{
    231 			printf("\n");
    232 			col = 0;
    233 			if (uch == (u32)L'\t')
    234 				delta = 8;
    235 			else
    236 				delta = 1;
    237 		}
    238 
    239 		printf("%s", u8_char);
    240 		col += delta;
    241 		pline += uch_len;
    242 	}
    243 }
    244 
    245 int
    246 main(int argc, char** argv)
    247 {
    248 	int argn     = 1;
    249 	char* carg   = NULL;
    250 	char** earg  = NULL;
    251 	int spaces   = 0;
    252 	int in_width = 0;
    253 	long width   = 80;
    254 	char width_buf[MAXBUF];
    255 
    256 	*filename = 0;
    257 	while (argn < argc)
    258 	{
    259 		carg = *(argv + argn);
    260 		if (!strcmp(carg, "-s") || !strcmp(carg, "--spaces"))
    261 			spaces = 1;
    262 		else if (!strcmp(carg, "-h") || !strcmp(carg, "--help"))
    263 			return usage();
    264 		else if (!strcmp(carg, "-V") || !strcmp(carg, "--full-version"))
    265 			return version(1);
    266 		else if (!strcmp(carg, "-v") || !strcmp(carg, "--version"))
    267 			return version(0);
    268 		else if (!strcmp(carg, "-w"))
    269 			in_width = 1;
    270 		else if (starts_with(carg, "--width="))
    271 		{
    272 			char* pcarg = carg + strlen("--width=");
    273 			strncpy(width_buf, pcarg, MAXBUF);
    274 			errno = 0;
    275 			earg  = &pcarg;
    276 			width = strtol(width_buf, earg, 10);
    277 			if (errno == ERANGE || errno == EINVAL
    278 				|| (**earg && *pcarg))
    279 				return error(EINVAL,
    280 					"Invalid numerical value "
    281 					"for --width: %s",
    282 					pcarg);
    283 			in_width = 0;
    284 		}
    285 		else if (in_width)
    286 		{
    287 			errno = 0;
    288 			earg  = &carg;
    289 			width = strtol(carg, earg, 10);
    290 			if (errno == ERANGE || errno == EINVAL
    291 				|| (**earg && *carg))
    292 				return error(EINVAL,
    293 					"Invalid numerical value "
    294 					"for -w: %s",
    295 					carg);
    296 			in_width = 0;
    297 		}
    298 		else if (starts_with(carg, "--width"))
    299 			return error(EINVAL,
    300 				"Parameter --width requires an "
    301 				"argument");
    302 		else if (*carg == '-' && *(carg + 1) != 0)
    303 			return usage();
    304 		else
    305 			strncpy(filename, carg, MAXPATH);
    306 		argn++;
    307 	}
    308 
    309 	if (width < 1)
    310 		return error(EINVAL, "Width too small");
    311 
    312 	if (!*filename)
    313 	{
    314 		filename[0] = '-';
    315 		filename[1] = 0;
    316 	}
    317 
    318 	FILE* input = NULL;
    319 	if (!*filename || (*filename == '-' && !*(filename + 1)))
    320 		input = stdin;
    321 	else
    322 		input = fopen(filename, "rt");
    323 	if (!input)
    324 		return error(errno, "Cannot open file");
    325 
    326 	char line[MAXINPUTBUF];
    327 	while (!feof(input))
    328 	{
    329 		if (!fgets(line, MAXINPUTBUF, input))
    330 		{
    331 			if (!feof(input))
    332 				return error(errno, "Error reading file");
    333 			else
    334 				break;
    335 		}
    336 		char* eol = strchr(line, '\n');
    337 		if (eol)
    338 			*eol = 0;
    339 		if (spaces)
    340 			fold_line_spaces((const u8*)line, width);
    341 		else
    342 			fold_line_normal((const u8*)line, width);
    343 		printf("\n");
    344 	}
    345 
    346 	fclose(input);
    347 	return 0;
    348 }