2023-09-02 17:00:50 +00:00
|
|
|
#!/bin/sh
|
|
|
|
|
2023-09-02 17:35:40 +00:00
|
|
|
toki_ucsur "$@" | while read -r codepoint; do
|
2023-09-02 17:00:50 +00:00
|
|
|
# normalize to U+000000
|
|
|
|
codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')"
|
|
|
|
codepoint="U+$( \
|
|
|
|
dd if=/dev/zero bs=1 count=$( \
|
|
|
|
printf '%s\n' "$codepoint" \
|
|
|
|
| wc -c \
|
|
|
|
| xargs printf '7 - %s\n' \
|
|
|
|
| bc \
|
|
|
|
) 2>/dev/null | tr '\0' 0)$codepoint"
|
|
|
|
|
|
|
|
codepoint_bin="$(printf '%s\n' "$codepoint" \
|
|
|
|
| sed \
|
|
|
|
-e 's/^U+//' -e 's/0/0000/g' -e 's/1/0001/g' \
|
|
|
|
-e 's/2/0010/g' -e 's/3/0011/g' -e 's/4/0100/g' \
|
|
|
|
-e 's/5/0101/g' -e 's/6/0110/g' -e 's/7/0111/g' \
|
|
|
|
-e 's/8/1000/g' -e 's/9/1001/g' -e 's/A/1010/g' \
|
|
|
|
-e 's/B/1011/g' -e 's/C/1100/g' -e 's/D/1101/g' \
|
|
|
|
-e 's/E/1110/g' -e 's/F/1111/g')"
|
|
|
|
|
|
|
|
printf '%s\n' "$codepoint_bin" \
|
|
|
|
| dd bs=17 count=1 2>/dev/null \
|
|
|
|
| grep 1 2>/dev/null 1>&2 \
|
|
|
|
|| bytes=1
|
|
|
|
printf '%s\n' "$codepoint_bin" \
|
|
|
|
| dd bs=13 count=1 2>/dev/null \
|
|
|
|
| grep 1 2>/dev/null 1>&2\
|
|
|
|
|| bytes=2
|
|
|
|
printf '%s\n' "$codepoint_bin" \
|
|
|
|
| dd bs=8 count=1 2>/dev/null \
|
|
|
|
| grep 1 2>/dev/null 1>&2 \
|
|
|
|
&& bytes=4 \
|
|
|
|
|| bytes=3
|
|
|
|
|
|
|
|
# TODO: How to bring bin,oct, or hex to actual binary in POSIX?
|
|
|
|
|
|
|
|
utf8_bin="$(case $bytes in \
|
|
|
|
1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 8)" ;; \
|
2023-09-02 17:00:50 +00:00
|
|
|
2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 12 | head -c 5)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 7)" ;; \
|
2023-09-02 17:00:50 +00:00
|
|
|
3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 17 | head -c 4)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 13 | head -c 6)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 7)" ;; \
|
2023-09-02 17:00:50 +00:00
|
|
|
4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 22 | head -c 3)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 19 | head -c 6)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 13 | head -c 6)" \
|
2023-09-02 17:00:50 +00:00
|
|
|
"$(printf '%s\n' "$codepoint_bin" \
|
2023-09-02 17:35:40 +00:00
|
|
|
| tail -c 7)" ;; \
|
2023-09-02 17:00:50 +00:00
|
|
|
esac)"
|
|
|
|
|
|
|
|
utf8_oct="$(dd if=/dev/zero bs=1 count=$( \
|
|
|
|
printf '%s\n' "$utf8_bin" \
|
|
|
|
| wc -c \
|
|
|
|
| xargs printf '34 - %s\n' \
|
|
|
|
| bc \
|
|
|
|
) 2>/dev/null \
|
|
|
|
| tr '\0' 0 \
|
|
|
|
| xargs printf "%s$utf8_bin\n" \
|
|
|
|
| sed 's/.../& /g' \
|
|
|
|
| sed \
|
|
|
|
-e 's/000/0/g' -e 's/001/1/g' -e 's/010/2/g' \
|
|
|
|
-e 's/011/3/g' -e 's/100/4/g' -e 's/101/5/g' \
|
|
|
|
-e 's/110/6/g' -e 's/111/7/g' \
|
|
|
|
| tr -d ' ')"
|
|
|
|
|
2023-09-02 17:35:40 +00:00
|
|
|
# a little fucky
|
2023-09-02 17:00:50 +00:00
|
|
|
utf8_hex="$(printf '%s\n' "$utf8_bin" \
|
|
|
|
| sed \
|
|
|
|
-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \
|
|
|
|
-e 's/0011/3/g' -e 's/0100/4/g' -e 's/0101/5/g' \
|
|
|
|
-e 's/0110/6/g' -e 's/0111/7/g' -e 's/1000/8/g' \
|
|
|
|
-e 's/1001/9/g' -e 's/1010/A/g' -e 's/1011/B/g' \
|
|
|
|
-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \
|
|
|
|
-e 's/1111/F/g')"
|
|
|
|
|
2023-09-02 17:35:40 +00:00
|
|
|
printf '%s\n' "$utf8_bin"
|
2023-09-02 17:00:50 +00:00
|
|
|
shift
|
|
|
|
done
|