still doesnt work
This commit is contained in:
2
unicode/Makefile
Normal file
2
unicode/Makefile
Normal file
@@ -0,0 +1,2 @@
|
||||
utf8: utf8.c
|
||||
$(CC) -I../ascii -g -o utf8 utf8.c
|
||||
105
unicode/utf8.c
Normal file
105
unicode/utf8.c
Normal file
@@ -0,0 +1,105 @@
|
||||
#include <stdio.h> /* fprintf(3), getc(3), stderr, stdin, EOF */
|
||||
#include <string.h> /* memset(3) */
|
||||
#include "ascii.h" /* ASCII_HEXADECIMAL_DIGITS_LOWER,
|
||||
* ASCII_HEXADECIMAL_DIGITS_UPPER */
|
||||
|
||||
static char *hex = ASCII_HEXADECIMAL_DIGITS_UPPER
|
||||
ASCII_HEXADECIMAL_DIGITS_LOWER;
|
||||
|
||||
void print_hexascii(unsigned char *hexes, int n){
|
||||
if(n % 2 != 0)
|
||||
return;
|
||||
while(n --> 0){
|
||||
putc((char)(((hex - strchr(hex, hexes[0])) << 4)
|
||||
+ (hex - strchr(hex, hexes[1]))), stdout);
|
||||
++hexes;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int c;
|
||||
int i;
|
||||
int l; /* line counter */
|
||||
char *n;
|
||||
unsigned char utf32_hex[8]; /* nybbles */
|
||||
long int utf32_lit;
|
||||
unsigned char utf8_hex[8]; /* nybbles */
|
||||
long int utf8_lit;
|
||||
|
||||
c = '\0';
|
||||
i = 0;
|
||||
l = 1;
|
||||
while(c != EOF){
|
||||
memset(utf32_hex, 0, sizeof utf32_hex);
|
||||
memset(utf8_hex, '0', sizeof utf8_hex);
|
||||
for(
|
||||
i = 0, n = NULL, utf32_lit = 0, utf8_lit = 0;
|
||||
(c = getc(stdin)) != '\n'
|
||||
&& c != EOF
|
||||
&& i < (sizeof utf32_hex) / (sizeof *utf32_hex);
|
||||
++i
|
||||
){
|
||||
if(
|
||||
(i == 0 && c != 'U')
|
||||
|| (i == 1 && c != '+')
|
||||
|| i > 10
|
||||
|| (i > 1 && (n = strchr(hex, c))
|
||||
== NULL)
|
||||
){
|
||||
fprintf(stderr, "%s: %s: Syntax error.\n",
|
||||
argv[0], l);
|
||||
while((c = getc(stdin)) != '\n' && c != EOF);
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
if(n != NULL){
|
||||
n -= 16;
|
||||
utf32_hex[i - 2] = *(n -= 16 * (n - hex > 16));
|
||||
}else if(i >= 2)
|
||||
utf32_hex[i - 2] = c;
|
||||
}
|
||||
if(i == -1 || i < 3)
|
||||
continue;
|
||||
while(utf32_hex[7] == '\0'){ /* slow but easy */
|
||||
for(i = 0; i < 7; ++i)
|
||||
utf32_hex[i + 1] = utf32_hex[i];
|
||||
utf32_hex[0] = '0';
|
||||
}
|
||||
/* this code is embarrassing */
|
||||
for(i = 0; i < 8; ++i)
|
||||
utf32_lit = (utf32_lit << 4)
|
||||
+ strchr(hex, utf32_hex[i]) - hex;
|
||||
if(utf32_lit < 128){
|
||||
utf8_hex[7] = utf32_hex[7];
|
||||
utf8_hex[6] = utf32_hex[6];
|
||||
i = 6;
|
||||
goto done;
|
||||
}else{
|
||||
utf8_hex[7] = hex[utf32_lit & 15];
|
||||
utf8_hex[6] = hex[((utf32_lit >> 4) & 3) + 8];
|
||||
}
|
||||
if(utf32_lit < 2048){
|
||||
utf8_hex[5] = hex[(utf32_lit >> 6) & 15];
|
||||
utf8_hex[4] = hex[((utf32_lit >> 10) & 1) + 12];
|
||||
i = 4;
|
||||
goto done;
|
||||
}else{
|
||||
utf8_hex[5] = hex[(utf32_lit >> 6) & 15];
|
||||
utf8_hex[4] = hex[((utf32_lit >> 10) & 3) + 8];
|
||||
}
|
||||
if(utf32_lit < 65536){
|
||||
utf8_hex[3] = hex[(utf32_lit >> 12) & 15];
|
||||
utf8_hex[2] = 14;
|
||||
i = 2;
|
||||
goto done;
|
||||
}else{
|
||||
utf8_hex[3] = hex[(utf32_lit >> 12) & 15];
|
||||
utf8_hex[2] = hex[((utf32_lit >> 16) & 3) + 8];
|
||||
utf8_hex[1] = hex[(utf32_lit >> 21) & 3];
|
||||
utf8_hex[0] = hex[15];
|
||||
i = 0;
|
||||
}
|
||||
done: print_hexascii(utf8_hex + i, 8 - i);
|
||||
++l;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user