still doesnt work
This commit is contained in:
		
							parent
							
								
									9e7b296852
								
							
						
					
					
						commit
						8920624eb6
					
				| @ -1,89 +1,5 @@ | ||||
| #!/bin/sh | ||||
| 
 | ||||
| toki_ucsur "$@" | while read -r codepoint; do | ||||
| 	# normalize to U+000000 | ||||
| 	codepoint="$(printf '%s\n' "$codepoint" | sed 's/^U+//')" | ||||
| 	codepoint="U+$( \ | ||||
| 		dd if=/dev/zero bs=1 count=$( \ | ||||
| 			printf '%s\n' "$codepoint" \ | ||||
| 				| wc -c \ | ||||
| 				| xargs printf '7 - %s\n' \ | ||||
| 				| bc \ | ||||
| 			) 2>/dev/null | tr '\0' 0)$codepoint" | ||||
| set -e | ||||
| 
 | ||||
| 	codepoint_bin="$(printf '%s\n' "$codepoint" \ | ||||
| 		| sed \ | ||||
| 			-e 's/^U+//' -e 's/0/0000/g' -e 's/1/0001/g' \ | ||||
| 			-e 's/2/0010/g' -e 's/3/0011/g' -e 's/4/0100/g' \ | ||||
| 			-e 's/5/0101/g' -e 's/6/0110/g' -e 's/7/0111/g' \ | ||||
| 			-e 's/8/1000/g' -e 's/9/1001/g' -e 's/A/1010/g' \ | ||||
| 			-e 's/B/1011/g' -e 's/C/1100/g' -e 's/D/1101/g' \ | ||||
| 			-e 's/E/1110/g' -e 's/F/1111/g')" | ||||
| 
 | ||||
| 	printf '%s\n' "$codepoint_bin" \ | ||||
| 		| dd bs=17 count=1 2>/dev/null \ | ||||
| 		| grep 1 2>/dev/null 1>&2 \ | ||||
| 			|| bytes=1 | ||||
| 	printf '%s\n' "$codepoint_bin" \ | ||||
| 		| dd bs=13 count=1 2>/dev/null \ | ||||
| 		| grep 1 2>/dev/null 1>&2\ | ||||
| 			|| bytes=2 | ||||
| 	printf '%s\n' "$codepoint_bin" \ | ||||
| 		| dd bs=8 count=1 2>/dev/null \ | ||||
| 		| grep 1 2>/dev/null 1>&2 \ | ||||
| 			&& bytes=4 \ | ||||
| 			|| bytes=3 | ||||
| 
 | ||||
| 	# TODO: How to bring bin,oct, or hex to actual binary in POSIX? | ||||
| 
 | ||||
| 	utf8_bin="$(case $bytes in \ | ||||
| 	1) printf '0%s\n' "$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 8)" ;; \ | ||||
| 	2) printf '110%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 12 | head -c 5)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 7)" ;; \ | ||||
| 	3) printf '1110%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 17 | head -c 4)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 13 | head -c 6)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 7)" ;; \ | ||||
| 	4) printf '11110%s10%s10%s10%s\n' "$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 22 | head -c 3)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 19 | head -c 6)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 13 | head -c 6)" \ | ||||
| 		"$(printf '%s\n' "$codepoint_bin" \ | ||||
| 			| tail -c 7)" ;; \ | ||||
| 	esac)" | ||||
| 
 | ||||
| 	utf8_oct="$(dd if=/dev/zero bs=1 count=$( \ | ||||
| 			printf '%s\n' "$utf8_bin" \ | ||||
| 				| wc -c \ | ||||
| 				| xargs printf '34 - %s\n' \ | ||||
| 				| bc \ | ||||
| 			) 2>/dev/null \ | ||||
| 		| tr '\0' 0 \ | ||||
| 		| xargs printf "%s$utf8_bin\n" \ | ||||
| 		| sed 's/.../& /g' \ | ||||
| 		| sed \ | ||||
| 			-e 's/000/0/g' -e 's/001/1/g' -e 's/010/2/g' \ | ||||
| 			-e 's/011/3/g' -e 's/100/4/g' -e 's/101/5/g' \ | ||||
| 			-e 's/110/6/g' -e 's/111/7/g' \ | ||||
| 		| tr -d ' ')" | ||||
| 
 | ||||
| 	# a little fucky | ||||
| 	utf8_hex="$(printf '%s\n' "$utf8_bin" \ | ||||
| 		| sed \ | ||||
| 			-e 's/0000/0/g' -e 's/0001/1/g' -e 's/0010/2/g' \ | ||||
| 			-e 's/0011/3/g' -e 's/0100/4/g' -e 's/0101/5/g' \ | ||||
| 			-e 's/0110/6/g' -e 's/0111/7/g' -e 's/1000/8/g' \ | ||||
| 			-e 's/1001/9/g' -e 's/1010/A/g' -e 's/1011/B/g' \ | ||||
| 			-e 's/1100/C/g' -e 's/1101/D/g' -e 's/1110/E/g' \ | ||||
| 			-e 's/1111/F/g')" | ||||
| 
 | ||||
| 	printf '%s\n' "$utf8_bin" | ||||
| 	shift | ||||
| done | ||||
| toki_ucsur "$@" | utf8 | ||||
|  | ||||
							
								
								
									
										2
									
								
								unicode/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								unicode/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | ||||
| utf8: utf8.c | ||||
| 	$(CC) -I../ascii -g -o utf8 utf8.c | ||||
							
								
								
									
										105
									
								
								unicode/utf8.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								unicode/utf8.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,105 @@ | ||||
| #include <stdio.h> /* fprintf(3), getc(3), stderr, stdin, EOF */ | ||||
| #include <string.h> /* memset(3) */ | ||||
| #include "ascii.h" /* ASCII_HEXADECIMAL_DIGITS_LOWER, | ||||
|                     * ASCII_HEXADECIMAL_DIGITS_UPPER */ | ||||
| 
 | ||||
| static char *hex = ASCII_HEXADECIMAL_DIGITS_UPPER | ||||
| 	ASCII_HEXADECIMAL_DIGITS_LOWER; | ||||
| 
 | ||||
| void print_hexascii(unsigned char *hexes, int n){ | ||||
| 	if(n % 2 != 0) | ||||
| 		return; | ||||
| 	while(n --> 0){ | ||||
| 		putc((char)(((hex - strchr(hex, hexes[0])) << 4) | ||||
| 			+ (hex - strchr(hex, hexes[1]))), stdout); | ||||
| 		++hexes; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char *argv[]){ | ||||
| 	int c; | ||||
| 	int i; | ||||
| 	int l; /* line counter */ | ||||
| 	char *n; | ||||
| 	unsigned char utf32_hex[8]; /* nybbles */ | ||||
| 	long int utf32_lit; | ||||
| 	unsigned char utf8_hex[8]; /* nybbles */ | ||||
| 	long int utf8_lit; | ||||
| 
 | ||||
| 	c = '\0'; | ||||
| 	i = 0; | ||||
| 	l = 1; | ||||
| 	while(c != EOF){ | ||||
| 		memset(utf32_hex, 0, sizeof utf32_hex); | ||||
| 		memset(utf8_hex, '0', sizeof utf8_hex); | ||||
| 		for( | ||||
| 			i = 0, n = NULL, utf32_lit = 0, utf8_lit = 0; | ||||
| 			(c = getc(stdin)) != '\n' | ||||
| 			&& c != EOF | ||||
| 			&& i < (sizeof utf32_hex) / (sizeof *utf32_hex); | ||||
| 			++i | ||||
| 		){ | ||||
| 			if( | ||||
| 				(i == 0 && c != 'U') | ||||
| 				|| (i == 1 && c != '+') | ||||
| 				|| i > 10 | ||||
| 				|| (i > 1 && (n = strchr(hex, c)) | ||||
| 					== NULL) | ||||
| 			){ | ||||
| 				fprintf(stderr, "%s: %s: Syntax error.\n", | ||||
| 					argv[0], l); | ||||
| 				while((c = getc(stdin)) != '\n' && c != EOF); | ||||
| 				i = -1; | ||||
| 				break; | ||||
| 			} | ||||
| 			if(n != NULL){ | ||||
| 				n -= 16; | ||||
| 				utf32_hex[i - 2] = *(n -= 16 * (n - hex > 16)); | ||||
| 			}else if(i >= 2) | ||||
| 				utf32_hex[i - 2] = c; | ||||
| 		} | ||||
| 		if(i == -1 || i < 3) | ||||
| 			continue; | ||||
| 		while(utf32_hex[7] == '\0'){ /* slow but easy */ | ||||
| 			for(i = 0; i < 7; ++i) | ||||
| 				utf32_hex[i + 1] = utf32_hex[i]; | ||||
| 			utf32_hex[0] = '0'; | ||||
| 		} | ||||
| 		/* this code is embarrassing */ | ||||
| 		for(i = 0; i < 8; ++i) | ||||
| 			utf32_lit = (utf32_lit << 4) | ||||
| 				+ strchr(hex, utf32_hex[i]) - hex; | ||||
| 		if(utf32_lit < 128){ | ||||
| 			utf8_hex[7] = utf32_hex[7]; | ||||
| 			utf8_hex[6] = utf32_hex[6]; | ||||
| 			i = 6; | ||||
| 			goto done; | ||||
| 		}else{ | ||||
| 			utf8_hex[7] = hex[utf32_lit & 15]; | ||||
| 			utf8_hex[6] = hex[((utf32_lit >> 4) & 3) + 8]; | ||||
| 		} | ||||
| 		if(utf32_lit < 2048){ | ||||
| 			utf8_hex[5] = hex[(utf32_lit >> 6) & 15]; | ||||
| 			utf8_hex[4] = hex[((utf32_lit >> 10) & 1) + 12]; | ||||
| 			i = 4; | ||||
| 			goto done; | ||||
| 		}else{ | ||||
| 			utf8_hex[5] = hex[(utf32_lit >> 6) & 15]; | ||||
| 			utf8_hex[4] = hex[((utf32_lit >> 10) & 3) + 8]; | ||||
| 		} | ||||
| 		if(utf32_lit < 65536){ | ||||
| 			utf8_hex[3] = hex[(utf32_lit >> 12) & 15]; | ||||
| 			utf8_hex[2] = 14; | ||||
| 			i = 2; | ||||
| 			goto done; | ||||
| 		}else{ | ||||
| 			utf8_hex[3] = hex[(utf32_lit >> 12) & 15]; | ||||
| 			utf8_hex[2] = hex[((utf32_lit >> 16) & 3) + 8]; | ||||
| 			utf8_hex[1] = hex[(utf32_lit >> 21) & 3]; | ||||
| 			utf8_hex[0] = hex[15]; | ||||
| 			i = 0; | ||||
| 		} | ||||
| done:		print_hexascii(utf8_hex + i, 8 - i); | ||||
| 		++l; | ||||
| 	} | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user