1
0

still broken

This commit is contained in:
dtb 2023-09-02 13:35:40 -04:00
parent a0278729b6
commit 9e7b296852
2 changed files with 16 additions and 12 deletions

3
toki/README.txt Normal file
View File

@ -0,0 +1,3 @@
- move executables somewhere in path
- `$ toki_update` to fetch dictionary
- `$ toki_sitelen` to tokiponize

View File

@ -1,7 +1,6 @@
#!/bin/sh
# codepoint -> utf8
while read -r codepoint; do
toki_ucsur "$@" | while read -r codepoint; do
# normalize to U+000000
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
codepoint="U+$( \
@ -39,25 +38,25 @@ while read -r codepoint; do
utf8_bin="$(case $bytes in \
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 7)" ;; \
| tail -c 8)" ;; \
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 11 | head -c 5)" \
| tail -c 12 | head -c 5)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
| tail -c 7)" ;; \
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 16 | head -c 4)" \
| tail -c 17 | head -c 4)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 12 | head -c 6)" \
| tail -c 13 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
| tail -c 7)" ;; \
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
| tail -c 21 | head -c 3)" \
| tail -c 22 | head -c 3)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 18 | head -c 6)" \
| tail -c 19 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 12 | head -c 6)" \
| tail -c 13 | head -c 6)" \
"$(printf '%s\n' "$codepoint_bin" \
| tail -c 6)" ;; \
| tail -c 7)" ;; \
esac)"
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
@ -75,6 +74,7 @@ while read -r codepoint; do
-e 's/110/6/g' -e 's/111/7/g' \
| tr -d ' ')"
# a little fucky
utf8_hex="$(printf '%s\n' "$utf8_bin" \
| sed \
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
@ -84,5 +84,6 @@ while read -r codepoint; do
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
-e 's/1111/F/g')"
printf '%s\n' "$utf8_bin"
shift
done