still broken
This commit is contained in:
parent
a0278729b6
commit
9e7b296852
3
toki/README.txt
Normal file
3
toki/README.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
- move executables somewhere in path
|
||||||
|
- `$ toki_update` to fetch dictionary
|
||||||
|
- `$ toki_sitelen` to tokiponize
|
@ -1,7 +1,6 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
# codepoint -> utf8
|
toki_ucsur "$@" | while read -r codepoint; do
|
||||||
while read -r codepoint; do
|
|
||||||
# normalize to U+000000
|
# normalize to U+000000
|
||||||
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
|
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
|
||||||
codepoint="U+$( \
|
codepoint="U+$( \
|
||||||
@ -39,25 +38,25 @@ while read -r codepoint; do
|
|||||||
|
|
||||||
utf8_bin="$(case $bytes in \
|
utf8_bin="$(case $bytes in \
|
||||||
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 7)" ;; \
|
| tail -c 8)" ;; \
|
||||||
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 11 | head -c 5)" \
|
| tail -c 12 | head -c 5)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 6)" ;; \
|
| tail -c 7)" ;; \
|
||||||
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 16 | head -c 4)" \
|
| tail -c 17 | head -c 4)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 12 | head -c 6)" \
|
| tail -c 13 | head -c 6)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 6)" ;; \
|
| tail -c 7)" ;; \
|
||||||
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 21 | head -c 3)" \
|
| tail -c 22 | head -c 3)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 18 | head -c 6)" \
|
| tail -c 19 | head -c 6)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 12 | head -c 6)" \
|
| tail -c 13 | head -c 6)" \
|
||||||
"$(printf '%s\n' "$codepoint_bin" \
|
"$(printf '%s\n' "$codepoint_bin" \
|
||||||
| tail -c 6)" ;; \
|
| tail -c 7)" ;; \
|
||||||
esac)"
|
esac)"
|
||||||
|
|
||||||
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
|
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
|
||||||
@ -75,6 +74,7 @@ while read -r codepoint; do
|
|||||||
-e 's/110/6/g' -e 's/111/7/g' \
|
-e 's/110/6/g' -e 's/111/7/g' \
|
||||||
| tr -d ' ')"
|
| tr -d ' ')"
|
||||||
|
|
||||||
|
# a little fucky
|
||||||
utf8_hex="$(printf '%s\n' "$utf8_bin" \
|
utf8_hex="$(printf '%s\n' "$utf8_bin" \
|
||||||
| sed \
|
| sed \
|
||||||
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
|
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
|
||||||
@ -84,5 +84,6 @@ while read -r codepoint; do
|
|||||||
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
|
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
|
||||||
-e 's/1111/F/g')"
|
-e 's/1111/F/g')"
|
||||||
|
|
||||||
|
printf '%s\n' "$utf8_bin"
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
|
Loading…
Reference in New Issue
Block a user