1
0
src/unicode/utf8.c
2023-09-03 02:23:32 -04:00

73 lines
1.7 KiB
C

#include <stdio.h> /* fprintf(3), getc(3), stderr, stdin, EOF */
#include <string.h> /* memset(3) */
#include "ascii.h" /* ASCII_HEXADECIMAL_DIGITS_LOWER,
* ASCII_HEXADECIMAL_DIGITS_UPPER */
#define SKIPLINE while((c = getc(stdin)) != '\n' && c != EOF)
static char *hex = ASCII_HEXADECIMAL_DIGITS_UPPER
ASCII_HEXADECIMAL_DIGITS_LOWER;
int main(int argc, char *argv[]){
int c;
int i;
int l; /* line counter */
char *n;
long int utf32_lit;
unsigned char utf8_bytes[8];
c = '\0';
i = 0;
l = 1;
while(c != EOF){
memset(utf8_bytes, '0', sizeof utf8_bytes);
for(
i = 0, n = NULL, utf32_lit = 0;
(c = getc(stdin)) != '\n'
&& c != EOF
&& i < 10;
++i
){
if(
(i == 0 && c != 'U')
|| (i == 1 && c != '+')
|| i > 10
|| (i > 1 && (n = strchr(hex, c))
== NULL)
){
fprintf(stderr, "%s: %s: Syntax error.\n",
argv[0], l);
SKIPLINE;
i = -1;
break;
}
if(n != NULL)
utf32_lit = (utf32_lit << 4) + (n - hex) % 16;
}
if(i < 3){
if(c != '\n' && c != EOF)
SKIPLINE;
if(c == EOF)
return 0;
continue;
}
switch(i = (utf32_lit < 65536)
+ (utf32_lit < 2048)
+ (utf32_lit < 128)){
case 0: utf8_bytes[0] =
((utf32_lit >> 18) & 7) + 240; /* 11110xxx */
case 1: utf8_bytes[1] = i == 1
? ((utf32_lit >> 12) & 15) + 224 /* 1110xxxx */
: ((utf32_lit >> 12) & 63) + 80; /* 10xxxxxx */
case 2: utf8_bytes[2] = i == 2
? ((utf32_lit >> 6) & 31) + 192 /* 110xxxxx */
: ((utf32_lit >> 6) & 63) + 80; /* 10xxxxxx */
case 3: utf8_bytes[3] = i == 3
? utf8_bytes[3] = utf32_lit & 127 /* 0xxxxxxx */
: (utf32_lit & 63) + 80; /* 10xxxxxx */
}
for( ; i < 4; ++i)
putc(utf8_bytes[i], stdout);
++l;
}
}