1
0

does not work, stuck on problem

This commit is contained in:
dtb 2023-09-02 13:00:50 -04:00
parent e917e20416
commit a0278729b6

88
toki/toki_sitelen Executable file
View File

@ -0,0 +1,88 @@
#!/bin/sh
# codepoint -> utf8
while read -r codepoint; do
# normalize to U+000000
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
codepoint="U+$( \
dd if=/dev/zero bs=1 count=$( \
printf '%s\n' "$codepoint" \
| wc -c \
| xargs printf '7 - %s\n' \
| bc \
) 2>/dev/null | tr '\0' 0)$codepoint"
codepoint_bin="$(printf '%s\n' "$codepoint" \
| sed \
-e 's/^U+//' -e 's/0/0000/g' -e 's/1/0001/g' \
-e 's/2/0010/g' -e 's/3/0011/g' -e 's/4/0100/g' \
-e 's/5/0101/g' -e 's/6/0110/g' -e 's/7/0111/g' \
-e 's/8/1000/g' -e 's/9/1001/g' -e 's/A/1010/g' \
-e 's/B/1011/g' -e 's/C/1100/g' -e 's/D/1101/g' \
-e 's/E/1110/g' -e 's/F/1111/g')"
printf '%s\n' "$codepoint_bin" \
| dd bs=17 count=1 2>/dev/null \
| grep 1 2>/dev/null 1>&2 \
|| bytes=1
printf '%s\n' "$codepoint_bin" \
| dd bs=13 count=1 2>/dev/null \
| grep 1 2>/dev/null 1>&2\
|| bytes=2
printf '%s\n' "$codepoint_bin" \
| dd bs=8 count=1 2>/dev/null \
| grep 1 2>/dev/null 1>&2 \
&& bytes=4 \
|| bytes=3
# TODO: How to bring bin,oct, or hex to actual binary in POSIX?
utf8_bin="$(case $bytes in \
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 7)" ;; \
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 11 | head -c 5)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 16 | head -c 4)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 12 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 21 | head -c 3)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 18 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 12 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
esac)"
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
printf '%s\n' "$utf8_bin" \
| wc -c \
| xargs printf '34 - %s\n' \
| bc \
) 2>/dev/null \
| tr '\0' 0 \
| xargs printf "%s$utf8_bin\n" \
| sed 's/.../& /g' \
| sed \
-e 's/000/0/g' -e 's/001/1/g' -e 's/010/2/g' \
-e 's/011/3/g' -e 's/100/4/g' -e 's/101/5/g' \
-e 's/110/6/g' -e 's/111/7/g' \
| tr -d ' ')"
utf8_hex="$(printf '%s\n' "$utf8_bin" \
| sed \
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
-e 's/0011/3/g' -e 's/0100/4/g' -e 's/0101/5/g' \
-e 's/0110/6/g' -e 's/0111/7/g' -e 's/1000/8/g' \
-e 's/1001/9/g' -e 's/1010/A/g' -e 's/1011/B/g' \
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
-e 's/1111/F/g')"
shift
done