still broken
This commit is contained in:
parent
a0278729b6
commit
9e7b296852
3
toki/README.txt
Normal file
3
toki/README.txt
Normal file
@ -0,0 +1,3 @@
|
||||
- move executables somewhere in path
|
||||
- `$ toki_update` to fetch dictionary
|
||||
- `$ toki_sitelen` to tokiponize
|
@ -1,7 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
# codepoint -> utf8
|
||||
while read -r codepoint; do
|
||||
toki_ucsur "$@" | while read -r codepoint; do
|
||||
# normalize to U+000000
|
||||
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
|
||||
codepoint="U+$( \
|
||||
@ -39,25 +38,25 @@ while read -r codepoint; do
|
||||
|
||||
utf8_bin="$(case $bytes in \
|
||||
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 7)" ;; \
|
||||
| tail -c 8)" ;; \
|
||||
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 11 | head -c 5)" \
|
||||
| tail -c 12 | head -c 5)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 6)" ;; \
|
||||
| tail -c 7)" ;; \
|
||||
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 16 | head -c 4)" \
|
||||
| tail -c 17 | head -c 4)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 12 | head -c 6)" \
|
||||
| tail -c 13 | head -c 6)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 6)" ;; \
|
||||
| tail -c 7)" ;; \
|
||||
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 21 | head -c 3)" \
|
||||
| tail -c 22 | head -c 3)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 18 | head -c 6)" \
|
||||
| tail -c 19 | head -c 6)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 12 | head -c 6)" \
|
||||
| tail -c 13 | head -c 6)" \
|
||||
"$(printf '%s\n' "$codepoint_bin" \
|
||||
| tail -c 6)" ;; \
|
||||
| tail -c 7)" ;; \
|
||||
esac)"
|
||||
|
||||
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
|
||||
@ -75,6 +74,7 @@ while read -r codepoint; do
|
||||
-e 's/110/6/g' -e 's/111/7/g' \
|
||||
| tr -d ' ')"
|
||||
|
||||
# a little fucky
|
||||
utf8_hex="$(printf '%s\n' "$utf8_bin" \
|
||||
| sed \
|
||||
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
|
||||
@ -84,5 +84,6 @@ while read -r codepoint; do
|
||||
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
|
||||
-e 's/1111/F/g')"
|
||||
|
||||
printf '%s\n' "$utf8_bin"
|
||||
shift
|
||||
done
|
||||
|
Loading…
Reference in New Issue
Block a user