From a0278729b693a1e563ed317e732e67926410835f Mon Sep 17 00:00:00 2001 From: DTB Date: Sat, 2 Sep 2023 13:00:50 -0400 Subject: [PATCH] does not work, stuck on problem --- toki/toki_sitelen | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100755 toki/toki_sitelen diff --git a/toki/toki_sitelen b/toki/toki_sitelen new file mode 100755 index 0000000..b9f8fd7 --- /dev/null +++ b/toki/toki_sitelen @@ -0,0 +1,88 @@ +#!/bin/sh + +# codepoint -> utf8 +while read -r codepoint; do + # normalize to U+000000 + codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')" + codepoint="U+$( \ + dd if=/dev/zero bs=1 count=$( \ + printf '%s\n' "$codepoint" \ + | wc -c \ + | xargs printf '7 - %s\n' \ + | bc \ + ) 2>/dev/null | tr '\0' 0)$codepoint" + + codepoint_bin="$(printf '%s\n' "$codepoint" \ + | sed \ + -e 's/^U+//' -e 's/0/0000/g' -e 's/1/0001/g' \ + -e 's/2/0010/g' -e 's/3/0011/g' -e 's/4/0100/g' \ + -e 's/5/0101/g' -e 's/6/0110/g' -e 's/7/0111/g' \ + -e 's/8/1000/g' -e 's/9/1001/g' -e 's/A/1010/g' \ + -e 's/B/1011/g' -e 's/C/1100/g' -e 's/D/1101/g' \ + -e 's/E/1110/g' -e 's/F/1111/g')" + + printf '%s\n' "$codepoint_bin" \ + | dd bs=17 count=1 2>/dev/null \ + | grep 1 2>/dev/null 1>&2 \ + || bytes=1 + printf '%s\n' "$codepoint_bin" \ + | dd bs=13 count=1 2>/dev/null \ + | grep 1 2>/dev/null 1>&2\ + || bytes=2 + printf '%s\n' "$codepoint_bin" \ + | dd bs=8 count=1 2>/dev/null \ + | grep 1 2>/dev/null 1>&2 \ + && bytes=4 \ + || bytes=3 + + # TODO: How to bring bin,oct, or hex to actual binary in POSIX? + + utf8_bin="$(case $bytes in \ + 1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 7)" ;; \ + 2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 11 | head -c 5)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 6)" ;; \ + 3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 16 | head -c 4)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 12 | head -c 6)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 6)" ;; \ + 4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 21 | head -c 3)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 18 | head -c 6)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 12 | head -c 6)" \ + "$(printf '%s\n' "$codepoint_bin" \ + | tail -c 6)" ;; \ + esac)" + + utf8_oct="$(dd if=/dev/zero bs=1 count=$( \ + printf '%s\n' "$utf8_bin" \ + | wc -c \ + | xargs printf '34 - %s\n' \ + | bc \ + ) 2>/dev/null \ + | tr '\0' 0 \ + | xargs printf "%s$utf8_bin\n" \ + | sed 's/.../& /g' \ + | sed \ + -e 's/000/0/g' -e 's/001/1/g' -e 's/010/2/g' \ + -e 's/011/3/g' -e 's/100/4/g' -e 's/101/5/g' \ + -e 's/110/6/g' -e 's/111/7/g' \ + | tr -d ' ')" + + utf8_hex="$(printf '%s\n' "$utf8_bin" \ + | sed \ + -e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \ + -e 's/0011/3/g' -e 's/0100/4/g' -e 's/0101/5/g' \ + -e 's/0110/6/g' -e 's/0111/7/g' -e 's/1000/8/g' \ + -e 's/1001/9/g' -e 's/1010/A/g' -e 's/1011/B/g' \ + -e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \ + -e 's/1111/F/g')" + + shift +done