#!/bin/sh toki_ucsur "$@" | while read -r codepoint; do # normalize to U+000000 codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')" codepoint="U+$( \ dd if=/dev/zero bs=1 count=$( \ printf '%s\n' "$codepoint" \ | wc -c \ | xargs printf '7 - %s\n' \ | bc \ ) 2>/dev/null | tr '\0' 0)$codepoint" codepoint_bin="$(printf '%s\n' "$codepoint" \ | sed \ -e 's/^U+//' -e 's/0/0000/g' -e 's/1/0001/g' \ -e 's/2/0010/g' -e 's/3/0011/g' -e 's/4/0100/g' \ -e 's/5/0101/g' -e 's/6/0110/g' -e 's/7/0111/g' \ -e 's/8/1000/g' -e 's/9/1001/g' -e 's/A/1010/g' \ -e 's/B/1011/g' -e 's/C/1100/g' -e 's/D/1101/g' \ -e 's/E/1110/g' -e 's/F/1111/g')" printf '%s\n' "$codepoint_bin" \ | dd bs=17 count=1 2>/dev/null \ | grep 1 2>/dev/null 1>&2 \ || bytes=1 printf '%s\n' "$codepoint_bin" \ | dd bs=13 count=1 2>/dev/null \ | grep 1 2>/dev/null 1>&2\ || bytes=2 printf '%s\n' "$codepoint_bin" \ | dd bs=8 count=1 2>/dev/null \ | grep 1 2>/dev/null 1>&2 \ && bytes=4 \ || bytes=3 # TODO: How to bring bin,oct, or hex to actual binary in POSIX? utf8_bin="$(case $bytes in \ 1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \ | tail -c 8)" ;; \ 2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | tail -c 12 | head -c 5)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 7)" ;; \ 3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | tail -c 17 | head -c 4)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 13 | head -c 6)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 7)" ;; \ 4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | tail -c 22 | head -c 3)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 19 | head -c 6)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 13 | head -c 6)" \ "$(printf '%s\n' "$codepoint_bin" \ | tail -c 7)" ;; \ esac)" utf8_oct="$(dd if=/dev/zero bs=1 count=$( \ printf '%s\n' "$utf8_bin" \ | wc -c \ | xargs printf '34 - %s\n' \ | bc \ ) 2>/dev/null \ | tr '\0' 0 \ | xargs printf "%s$utf8_bin\n" \ | sed 's/.../& /g' \ | sed \ -e 's/000/0/g' -e 's/001/1/g' -e 's/010/2/g' \ -e 's/011/3/g' -e 's/100/4/g' -e 's/101/5/g' \ -e 's/110/6/g' -e 's/111/7/g' \ | tr -d ' ')" # a little fucky utf8_hex="$(printf '%s\n' "$utf8_bin" \ | sed \ -e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \ -e 's/0011/3/g' -e 's/0100/4/g' -e 's/0101/5/g' \ -e 's/0110/6/g' -e 's/0111/7/g' -e 's/1000/8/g' \ -e 's/1001/9/g' -e 's/1010/A/g' -e 's/1011/B/g' \ -e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \ -e 's/1111/F/g')" printf '%s\n' "$utf8_bin" shift done