reflow

Преобликује пасусе у редове
Дневник | Датотеке | Референце | ПРОЧИТАЈМЕ | ЛИЦЕНЦА

чување 101521eeba7c760bcfe38d03b9420f213f39dc46
родитељ cb67262e15b0e4bfd03e2de8a68eae87648dba1d
Аутор: Страхиња Радић <contact@strahinja.org>
Датум:   Tue, 20 Jul 2021 09:31:16 +0200

Added redo scripts; standardization wrt arguments, manpage etc

Signed-off-by: Страхиња Радић <contact@strahinja.org>

Diffstat:
M.gitignore | 11++++++-----
Aall.do | 2++
Aclean.do | 2++
Adate.do | 4++++
Adefault.do | 11+++++++++++
Adefault.gz.do | 3+++
Adefault.h.do | 13+++++++++++++
Adefault.o.do | 6++++++
Adefs.h | 3+++
Ado | 446+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainstall.do | 8++++++++
Apack.do | 10++++++++++
Arebuild.do | 2++
Areflow.1.in | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mreflow.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Areflow.do | 9+++++++++
Auninstall.do | 6++++++
Autf8.c | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autf8.h | 40++++++++++++++++++++++++++++++++++++++++
Aversion.do | 7+++++++
Aversion.h.in | 4++++
измењених датотека: 21, додавања: 878(+), брисања: 36(-)

diff --git a/.gitignore b/.gitignore @@ -8,15 +8,16 @@ *.orig *.rej *.o -*.pdf -*.ps *.swp *.did nohup.out -reflow *.gz -compile_commands.json -examples/*/*.html +*.xz +*.pdf +*.1 +reflow +reflow.1 date version +version.h diff --git a/all.do b/all.do @@ -0,0 +1,2 @@ +redo-ifchange reflow reflow.1.gz + diff --git a/clean.do b/clean.do @@ -0,0 +1,2 @@ +rm -f *~ *.o reflow + diff --git a/date.do b/date.do @@ -0,0 +1,4 @@ +LC_ALL=C date +'%d %b %Y' >$3 +redo-always +redo-stamp <$3 + diff --git a/default.do b/default.do @@ -0,0 +1,11 @@ +if [ -r $1.in ]; then + redo-ifchange $1.in version date + read VERSION <version + read DATE <date + sed -e "s/%VERSION%/$VERSION/g" \ + -e "s/%DATE%/$DATE/g" <$1.in +else + echo "$0: don't know how to build '$1'" >&2 + exit 99 +fi + diff --git a/default.gz.do b/default.gz.do @@ -0,0 +1,3 @@ +redo-ifchange $2 +gzip -cf $2 >$3 + diff --git a/default.h.do b/default.h.do @@ -0,0 +1,13 @@ +if [ -r $1.in ]; then + redo-ifchange $1.in version date + read VERSION <version + read DATE <date + sed -e "s/%VERSION%/$VERSION/g" \ + -e "s/%DATE%/$DATE/g" <$1.in +else + if [ -f $2.def.h ]; then + redo-ifchange $2.def.h + cat $2.def.h > $3 + fi +fi + diff --git a/default.o.do b/default.o.do @@ -0,0 +1,6 @@ +for f in *.h; do + echo $f +done | xargs redo-ifchange +redo-ifchange $2.c +${REFLOW_CC:-gcc} -g -Wall -std=c99 -c $2.c -o $3 + diff --git a/defs.h b/defs.h @@ -0,0 +1,3 @@ +#define MAXBUF 4096 +#define MAXPATH 4096 + diff --git a/do b/do @@ -0,0 +1,446 @@ +#!/usr/bin/env sh +# +# A minimal alternative to djb redo that doesn't support incremental builds. +# For the full version, visit http://github.com/apenwarr/redo +# +# The author disclaims copyright to this source file and hereby places it in +# the public domain. (2010 12 14; updated 2019 02 24) +# +USAGE=" +usage: do [-d] [-x] [-v] [-c] <targets...> + -d print extra debug messages (mostly about dependency checks) + -v run .do files with 'set -v' + -x run .do files with 'set -x' + -c clean up all old targets before starting + + Note: do is an implementation of redo that does *not* check dependencies. + It will never rebuild a target it has already built, unless you use -c. +" + +# CDPATH apparently causes unexpected 'cd' output on some platforms. +unset CDPATH + +# By default, no output coloring. +green="" +bold="" +plain="" + +if [ -n "$TERM" -a "$TERM" != "dumb" ] && tty <&2 >/dev/null 2>&1; then + green="$(printf '\033[32m')" + bold="$(printf '\033[1m')" + plain="$(printf '\033[m')" +fi + +# The 'seq' command is not available on all platforms. +_seq() { + local x=0 max="$1" + while [ "$x" -lt "$max" ]; do + x=$((x + 1)) + echo "$x" + done +} + +# Split $1 into a dir part ($_dirsplit_dir) and base filename ($_dirsplit_base) +_dirsplit() { + _dirsplit_base=${1##*/} + _dirsplit_dir=${1%$_dirsplit_base} +} + +# Like /usr/bin/dirname, but avoids a fork and uses _dirsplit semantics. +qdirname() ( + _dirsplit "$1" + dir=${_dirsplit_dir%/} + echo "${dir:-.}" +) + +_dirsplit "$0" +REDO=$(cd "$(pwd -P)" && + cd "${_dirsplit_dir:-.}" && + echo "$PWD/$_dirsplit_base") +export REDO +_cmd=$_dirsplit_base + +DO_TOP= +if [ -z "$DO_BUILT" ]; then + export _do_opt_debug= + export _do_opt_exec= + export _do_opt_verbose= + export _do_opt_clean= +fi +while getopts 'dxvcj:h?' _opt; do + case $_opt in + d) _do_opt_debug=1 ;; + x) _do_opt_exec=x ;; + v) _do_opt_verbose=v ;; + c) _do_opt_clean=1 ;; + j) ;; # silently ignore, for compat with real redo + \?|h|*) printf "%s" "$USAGE" >&2 + exit 99 + ;; + esac +done +shift "$((OPTIND - 1))" +_debug() { + [ -z "$_do_opt_debug" ] || echo "$@" >&2 +} + +if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then + DO_TOP=1 + if [ "$#" -eq 0 ] && [ "$_cmd" = "do" -o "$_cmd" = "redo" ]; then + set all # only toplevel redo has a default target + fi + export DO_STARTDIR="$(pwd -P)" + # If starting /bin/pwd != $PWD, this will fix it. + # That can happen when $PWD contains symlinks that the shell is + # trying helpfully (but unsuccessfully) to hide from the user. + cd "$DO_STARTDIR" || exit 99 + export DO_BUILT="$PWD/.do_built" + if [ -z "$_do_opt_clean" -a -e "$DO_BUILT" ]; then + echo "do: Incremental mode. Use -c for clean rebuild." >&2 + fi + : >>"$DO_BUILT" + sort -u "$DO_BUILT" >"$DO_BUILT.new" + while read f; do + [ -n "$_do_opt_clean" ] && printf "%s\0%s.did\0" "$f" "$f" + printf "%s.did.tmp\0" "$f" + done <"$DO_BUILT.new" | + xargs -0 rm -f 2>/dev/null + mv "$DO_BUILT.new" "$DO_BUILT" + export DO_PATH="$DO_BUILT.dir" + export PATH="$DO_PATH:$PATH" + rm -rf "$DO_PATH" + mkdir "$DO_PATH" + for d in redo redo-ifchange redo-whichdo; do + ln -s "$REDO" "$DO_PATH/$d" + done + for d in redo-ifcreate redo-stamp redo-always redo-ood \ + redo-targets redo-sources; do + echo "#!/bin/sh" >"$DO_PATH/$d" + chmod a+rx "$DO_PATH/$d" + done +fi + + +# Chop the "file" part off a /path/to/file pathname. +# Note that if the filename already ends in a /, we just remove the slash. +_updir() +{ + local v="${1%/*}" + [ "$v" != "$1" ] && echo "$v" + # else "empty" which means we went past the root +} + + +# Returns true if $1 starts with $2. +_startswith() +{ + [ "${1#"$2"}" != "$1" ] +} + + +# Returns true if $1 ends with $2. +_endswith() +{ + [ "${1%"$2"}" != "$1" ] +} + + +# Prints $1 if it's absolute, or $2/$1 if $1 is not absolute. +_abspath() +{ + local here="$2" there="$1" + if _startswith "$1" "/"; then + echo "$1" + else + echo "$2/$1" + fi +} + + +# Prints $1 as a path relative to $PWD (not starting with /). +# If it already doesn't start with a /, doesn't change the string. +_relpath() +{ + local here="$2" there="$1" out= hadslash= + #echo "RP start '$there' hs='$hadslash'" >&2 + _startswith "$there" "/" || { echo "$there" && return; } + [ "$there" != "/" ] && _endswith "$there" "/" && hadslash=/ + here=${here%/}/ + while [ -n "$here" ]; do + #echo "RP out='$out' here='$here' there='$there'" >&2 + [ "${here%/}" = "${there%/}" ] && there= && break; + [ "${there#$here}" != "$there" ] && break + out=../$out + _dirsplit "${here%/}" + here=$_dirsplit_dir + done + there=${there#$here} + if [ -n "$there" ]; then + echo "$out${there%/}$hadslash" + else + echo "${out%/}$hadslash" + fi +} + + +# Prints a "normalized relative" path, with ".." resolved where possible. +# For example, a/b/../c will be reduced to just a/c. +_normpath() +( + local path="$1" relto="$2" out= isabs= + #echo "NP start '$path'" >&2 + if _startswith "$path" "/"; then + isabs=1 + else + path="${relto%/}/$path" + fi + set -f + IFS=/ + for d in ${path%/}; do + #echo "NP out='$out' d='$d'" >&2 + if [ "$d" = ".." ]; then + out=$(_updir "${out%/}")/ + else + out=$out$d/ + fi + done + #echo "NP out='$out' (done)" >&2 + out=${out%/} + if [ -n "$isabs" ]; then + echo "${out:-/}" + else + _relpath "${out:-/}" "$relto" + fi +) + + +# Prints a "real" path, with all symlinks resolved where possible. +_realpath() +{ + local path="$1" relto="$2" isabs= rest= + if _startswith "$path" "/"; then + isabs=1 + else + path="${relto%/}/$path" + fi + ( + for d in $(_seq 100); do + #echo "Trying: $PWD--$path" >&2 + if cd -P "$path" 2>/dev/null; then + # success + pwd=$(pwd -P) + #echo " chdir ok: $pwd--$rest" >&2 + np=$(_normpath "${pwd%/}/$rest" "$relto") + if [ -n "$isabs" ]; then + echo "$np" + else + _relpath "$np" "$relto" + fi + break + fi + _dirsplit "${path%/}" + path=$_dirsplit_dir + rest="$_dirsplit_base/$rest" + done + ) +} + + +# List the possible names for default*.do files in dir $1 matching the target +# pattern in $2. We stop searching when we find the first one that exists. +_find_dofiles_pwd() +{ + local dodir="$1" dofile="$2" + _startswith "$dofile" "default." || dofile=${dofile#*.} + while :; do + dofile=default.${dofile#default.*.} + echo "$dodir$dofile" + [ -e "$dodir$dofile" ] && return 0 + [ "$dofile" = default.do ] && break + done + return 1 +} + + +# List the possible names for default*.do files in $PWD matching the target +# pattern in $1. We stop searching when we find the first name that works. +# If there are no matches in $PWD, we'll search in .., and so on, to the root. +_find_dofiles() +{ + local target="$1" dodir= dofile= newdir= + _debug "find_dofile: '$PWD' '$target'" + dofile="$target.do" + echo "$dofile" + [ -e "$dofile" ] && return 0 + + # Try default.*.do files, walking up the tree + _dirsplit "$dofile" + dodir=$_dirsplit_dir + dofile=$_dirsplit_base + [ -n "$dodir" ] && dodir=${dodir%/}/ + [ -e "$dodir$dofile" ] && return 0 + for i in $(_seq 100); do + [ -n "$dodir" ] && dodir=${dodir%/}/ + #echo "_find_dofiles: '$dodir' '$dofile'" >&2 + _find_dofiles_pwd "$dodir" "$dofile" && return 0 + newdir=$(_realpath "${dodir}.." "$PWD") + [ "$newdir" = "$dodir" ] && break + dodir=$newdir + done + return 1 +} + + +# Print the last .do file returned by _find_dofiles. +# If that file exists, returns 0, else 1. +_find_dofile() +{ + local files="$(_find_dofiles "$1")" + rv=$? + #echo "files='$files'" >&2 + [ "$rv" -ne 0 ] && return $rv + echo "$files" | { + while read -r linex; do line=$linex; done + printf "%s\n" "$line" + } +} + + +# Actually run the given $dofile with the arguments in $@. +# Note: you should always run this in a subshell. +_run_dofile() +{ + export DO_DEPTH="$DO_DEPTH " + export REDO_TARGET="$PWD/$target" + local line1 + set -e + read line1 <"$PWD/$dofile" || true + cmd=${line1#"#!/"} + if [ "$cmd" != "$line1" ]; then + set -$_do_opt_verbose$_do_opt_exec + exec /$cmd "$PWD/$dofile" "$@" + else + set -$_do_opt_verbose$_do_opt_exec + # If $dofile is empty, "." might not change $? at + # all, so we clear it first with ":". + :; . "$PWD/$dofile" + fi +} + + +# Find and run the right .do file, starting in dir $1, for target $2, +# providing a temporary output file as $3. Renames the temp file to $2 when +# done. +_do() +{ + local dir="$1" target="$1$2" tmp="$1$2.redo.tmp" tdir= + local dopath= dodir= dofile= ext= + if [ "$_cmd" = "redo" ] || + ( [ ! -e "$target" -o -d "$target" ] && + [ ! -e "$target.did" ] ); then + printf '%sdo %s%s%s%s\n' \ + "$green" "$DO_DEPTH" "$bold" "$target" "$plain" >&2 + dopath=$(_find_dofile "$target") + if [ ! -e "$dopath" ]; then + echo "do: $target: no .do file ($PWD)" >&2 + return 1 + fi + _dirsplit "$dopath" + dodir=$_dirsplit_dir dofile=$_dirsplit_base + if _startswith "$dofile" "default."; then + ext=${dofile#default} + ext=${ext%.do} + else + ext= + fi + target=$PWD/$target + tmp=$PWD/$tmp + cd "$dodir" || return 99 + target=$(_relpath "$target" "$PWD") || return 98 + tmp=$(_relpath "$tmp" "$PWD") || return 97 + base=${target%$ext} + tdir=$(qdirname "$target") + [ ! -e "$DO_BUILT" ] || [ ! -w "$tdir/." ] || + : >>"$target.did.tmp" + # $qtmp is a temporary file used to capture stdout. + # Since it might be accidentally deleted as a .do file + # does its work, we create it, then open two fds to it, + # then immediately delete the name. We use one fd to + # redirect to stdout, and the other to read from after, + # because there's no way to fseek(fd, 0) in sh. + qtmp=$DO_PATH/do.$$.tmp + ( + rm -f "$qtmp" + ( _run_dofile "$target" "$base" "$tmp" >&3 3>&- 4<&- ) + rv=$? + if [ $rv != 0 ]; then + printf "do: %s%s\n" "$DO_DEPTH" \ + "$target: got exit code $rv" >&2 + rm -f "$tmp.tmp" "$tmp.tmp2" "$target.did" + return $rv + fi + echo "$PWD/$target" >>"$DO_BUILT" + if [ ! -e "$tmp" ]; then + # if $3 wasn't created, copy from stdout file + cat <&4 >$tmp + # if that's zero length too, forget it + [ -s "$tmp" ] || rm -f "$tmp" + fi + ) 3>$qtmp 4<$qtmp # can't use "|| return" here... + # ...because "|| return" would mess up "set -e" inside the () + # on some shells. Running commands in "||" context, even + # deep inside, will stop "set -e" from functioning. + rv=$? + [ "$rv" = 0 ] || return "$rv" + mv "$tmp" "$target" 2>/dev/null + [ -e "$target.did.tmp" ] && + mv "$target.did.tmp" "$target.did" || + : >>"$target.did" + else + _debug "do $DO_DEPTH$target exists." >&2 + fi +} + + +# Implementation of the "redo" command. +_redo() +{ + local i startdir="$PWD" dir base + set +e + for i in "$@"; do + i=$(_abspath "$i" "$startdir") + ( + cd "$DO_STARTDIR" || return 99 + i=$(_realpath "$(_relpath "$i" "$PWD")" "$PWD") + _dirsplit "$i" + dir=$_dirsplit_dir base=$_dirsplit_base + _do "$dir" "$base" + ) + [ "$?" = 0 ] || return 1 + done +} + + +# Implementation of the "redo-whichdo" command. +_whichdo() +{ + _find_dofiles "$1" +} + + +case $_cmd in + do|redo|redo-ifchange) _redo "$@" ;; + redo-whichdo) _whichdo "$1" ;; + do.test) ;; + *) printf "do: '%s': unexpected redo command" "$_cmd" >&2; exit 99 ;; +esac +[ "$?" = 0 ] || exit 1 + +if [ -n "$DO_TOP" ]; then + if [ -n "$_do_opt_clean" ]; then + echo "do: Removing stamp files..." >&2 + [ ! -e "$DO_BUILT" ] || + while read f; do printf "%s.did\0" "$f"; done <"$DO_BUILT" | + xargs -0 rm -f 2>/dev/null + fi +fi diff --git a/install.do b/install.do @@ -0,0 +1,8 @@ +redo-ifchange all +PREFIX=/usr/local +BINDIR=$PREFIX/bin +MANDIR=$PREFIX/share/man/man1 +install -d $BINDIR $MANDIR +install -m 0755 reflow $BINDIR +install -m 0644 reflow.1.gz $MANDIR + diff --git a/pack.do b/pack.do @@ -0,0 +1,10 @@ +redo all +redo-ifchange version +read VERSION <version +VERSION=$(echo $VERSION | sed 's/^v//') +DISTDIR=reflow-$VERSION +mkdir $DISTDIR +cp *.c *.h *.do README LICENSE $DISTDIR +tar cvf $DISTDIR.tar.xz $DISTDIR >/dev/null +rm -fr $DISTDIR + diff --git a/rebuild.do b/rebuild.do @@ -0,0 +1,2 @@ +redo clean all + diff --git a/reflow.1.in b/reflow.1.in @@ -0,0 +1,78 @@ +'\" t +.\" Manpage for reflow(1) +.\" vim: set filetype=groff: +. +.mso an-ext.tmac +.de CDS +.EX +.RS \\$1 +.sp 1 +.. +.de CDE +.sp 1 +.RE +.EE +.. +.de RDS +.RS \\$1 +.sp 1 +.. +.de RDE +.sp 1 +.RE +.. +. +.TH REFLOW "1" "%DATE%" "text-tools" "General Commands Manual" +.SH NAME +reflow \- Reflows paragraphs into single lines +. +.SH SYNOPSIS +. +.SY reflow +.OP "\-h \fR|\fP \-\-help" +.YS +. +.SY reflow +.OP "\-v \fR|\fP \-\-version" +.YS +. +.SY reflow +.RI [ file ] +.YS +. +.SH COPYRIGHT +. +.LP +reflow Copyright \(co 2021 Strahinya Radich. +.br +This program is licensed under GNU GPL v3 or later. See the file +.I LICENSE +in the reflow repository for details. +.SH DESCRIPTION +. +.LP +.B reflow +reflows paragraphs of text into single (long) lines. It will read its input from +.IR file , +writing to standard output. +. +.LP +If the special filename of \[lq]\-\[rq] is given, +.B reflow +will instead read from the standard input. This also happens when no filename is +given. +. +.SH AUTHOR +. +Strahinya Radich, +.UR https://\:strahinja.org +.UE +. +.SH BUGS +. +.LP +Bugs can be reported using the ticket tracker at: +.UR https://\:todo.sr.ht/\:~strahinja/\:text-tools +.UE +. + diff --git a/reflow.c b/reflow.c @@ -1,21 +1,4 @@ -/* - * reflow - Reflow paragraphs into single lines - * Copyright (C) 2021 Страхиња Радић - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <https://www.gnu.org/licenses/>. - * - */ +/* See the file LICENSE for copyright and license details. */ #define _POSIX_C_SOURCE 200809L @@ -26,8 +9,15 @@ #include <string.h> #include <unistr.h> -#define MAXBUF 4096 -#define PROGRAMNAME "reflow" +#include "defs.h" +#include "version.h" + +int +usage() +{ + printf("Usage: %s [-h|--help] [-v|--version] [FILE]\n", PROGRAMNAME); + return 0; +} int error(int code, char* format, ...) @@ -42,28 +32,64 @@ error(int code, char* format, ...) } int +version() +{ + printf("%s %s, built on %s\n", PROGRAMNAME, VERSION, DATE); + return 0; +} + +int is_whitespace(char ch) { return (ch == ' ') || (ch == '\t'); } int -main() +main(int argc, char** argv) { - size_t buffer_size = 0; - uint8_t* buffer = NULL; - uint8_t* pbuffer; - size_t line_len = 0; - uint8_t* line = calloc(MAXBUF, 1); - uint8_t* pline; - uint8_t* eol = NULL; + size_t buffer_size = 0; + uint8_t* buffer = NULL; + uint8_t* pbuffer = NULL; + size_t line_len = 0; + char* line = calloc(MAXBUF, 1); + uint8_t* pline = NULL; + uint8_t* eol = NULL; + FILE* input = NULL; + int argn = 1; + char* carg = NULL; + char filename[MAXPATH]; + + *filename = 0; + while (argn < argc) + { + carg = *(argv + argn); + if (!strcmp(carg, "-h") || !strcmp(carg, "--help")) + return usage(); + else if (!strcmp(carg, "-v") || !strcmp(carg, "--version")) + return version(); + else if (!*filename) + strcpy(filename, carg); + else + return usage(); + argn++; + } + + if (*filename == '-' && !*(filename+1)) + input = stdin; + else + input = fopen(filename, "rt"); while (!feof(stdin)) { - if (!fgets(line, MAXBUF, stdin)) - continue; + if (!fgets(line, MAXBUF, input)) + { + if (!feof(input)) + return error(errno, "Error reading file"); + else + break; + } - eol = u8_strchr(line, '\n'); + eol = strchr(line, '\n'); if (eol) line_len = eol-line+1; else diff --git a/reflow.do b/reflow.do @@ -0,0 +1,9 @@ +for f in *.h.in; do + echo $f | sed -e's/\.in$//g' +done | xargs redo-ifchange +for f in *.h *.c; do + echo $f + echo $f | sed -e's/\.c$/.o/g' +done | xargs redo-ifchange +${REFLOW_CC:-gcc} -g -Wall -std=c99 -o $3 reflow.o utf8.o + diff --git a/uninstall.do b/uninstall.do @@ -0,0 +1,6 @@ +redo-always +PREFIX=/usr/local +BINDIR=$PREFIX/bin +MANDIR=$PREFIX/share/man/man1 +rm -f $BINDIR/reflow $MANDIR/reflow.1.gz + diff --git a/utf8.c b/utf8.c @@ -0,0 +1,161 @@ +/* See the file LICENSE for copyright and license details. */ + +#include "utf8.h" + +/* + * 00000000 -- 0000007F: 0xxxxxxx + * (2^7 = 128 chars) + * 00000080 -- 000007FF: 110xxxxx 10xxxxxx + * (2^5 = 32 chars) + * 00000800 -- 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx + * (2^4 = 16 chars) + * 00010000 -- 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * (2^3 = 8 chars) + * 00200000 -- 007FFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * (2^2 = 4 chars) + * 00800000 -- 00FFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 10xxxxxx + * (2^1 = 2 chars) + */ + +const int +utf_length_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 32 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 128 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 160 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 192 */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 224 */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, /* 256 */ +}; + +const int +extract_masks[6] = { + XMASK1, XMASK2, XMASK3, XMASK4, XMASK5, XMASK6 +}; + +int +u8_char_to_u32(u32* to, const u8 *from, size_t* from_delta) +{ + const u8* pfrom = from; + if (!from) + return 1; + *from_delta = 0; + int len = utf_length_table[*from]; + *to = *from & extract_masks[len-1]; + for (int i = 1; i < len; i++) + { + pfrom = from + i; + if (!*pfrom) + return 1; + *to <<= 6; + *to |= *pfrom & XMASKR; + } + *from_delta = len; + + return 0; +} + +size_t +u32_char_to_u8(u8 *to, const u32 from) +{ + size_t len = 0; + u8 start = 0; + u32 cfrom = from; + if (from >= BOUND6) + { + start = START6; + len = 6; + } + else if (from >= BOUND5) + { + start = START5; + len = 5; + } + else if (from >= BOUND4) + { + start = START4; + len = 4; + } + else if (from >= BOUND3) + { + start = START3; + len = 3; + } + else if (from >= BOUND2) + { + start = START2; + len = 2; + } + else + { + start = START1; + len = 1; + } + for (int i = len-1; i > 0; i--) + { + to[len-1] = STARTR | (cfrom & XMASKR); + cfrom >>= 6; + } + to[0] = start | cfrom; + return len; +} + +int +u8_to_u32(u32 *to, const u8 *from, size_t* from_delta) +{ + const u8* pfrom = from; + u32* pto = to; + if (!from) + return 1; + size_t delta = 0; + *from_delta = 0; + while (*pfrom) + { + int result = u8_char_to_u32(pto, pfrom, &delta); + if (result) + return result; + pto++; + pfrom += delta; + *from_delta += delta; + } + *pto = 0; + return 0; +} + +size_t +u32_to_u8(u8 *to, const u32* from) +{ + u8* pto = to; + const u32* pfrom = from; + size_t len = 0; + if (!from) + return 0; + while (*pfrom) + { + size_t delta = u32_char_to_u8(pto, *pfrom); + pto += delta; + len += delta; + pfrom++; + } + return len; +} + +size_t +u32_strlen(const u32* s) +{ + const u32* ps = s; + while (ps && *ps) + ps++; + return ps - s; +} + diff --git a/utf8.h b/utf8.h @@ -0,0 +1,40 @@ +/* See the file LICENSE for copyright and license details. */ + +#include <stdint.h> +#include <sys/types.h> + +#define XMASK1 0x7F /* b01111111 */ +#define XMASK2 0x1F /* b00011111 */ +#define XMASK3 0x0F /* b00001111 */ +#define XMASK4 0x07 /* b00000111 */ +#define XMASK5 0x03 /* b00000011 */ +#define XMASK6 0x01 /* b00000001 */ + +#define XMASKR 0x3F /* b00111111 */ + +/* +#define BOUND1 0x0*/ +#define BOUND2 0x80 +#define BOUND3 0x800 +#define BOUND4 0x10000 +#define BOUND5 0x200000 +#define BOUND6 0x800000 + +#define START1 0x00 /* b0xxxxxxx */ +#define START2 0xC0 /* b110xxxxx */ +#define START3 0xE0 /* b1110xxxx */ +#define START4 0xF0 /* b11110xxx */ +#define START5 0xF8 /* b111110xx */ +#define START6 0xFC /* b1111110x */ + +#define STARTR 0x80 /* b10xxxxxx */ + +typedef uint8_t u8; +typedef uint32_t u32; + +int u8_char_to_u32(u32* to, const u8* from, size_t* from_delta); +size_t u32_char_to_u8(u8* to, const u32 from); +int u8_to_u32(u32* to, const u8* from, size_t* from_delta); +size_t u32_to_u8(u8* to, const u32* from); +size_t u32_strlen(const u32* s); + diff --git a/version.do b/version.do @@ -0,0 +1,7 @@ +if ! git describe >$3; then + echo "$0: can't call git describe, falling back to 'unknown'" >&2 + echo 'unknown' >$3 +fi +redo-always +redo-stamp <$3 + diff --git a/version.h.in b/version.h.in @@ -0,0 +1,4 @@ +#define PROGRAMNAME "reflow" +#define DATE "%DATE%" +#define VERSION "%VERSION%" +