1
0

more work

This commit is contained in:
dtb 2023-09-04 09:24:37 -04:00
parent 0697a13fcd
commit 039a17f454
3 changed files with 32 additions and 33 deletions

View File

@ -4,6 +4,8 @@ utf8_t utf8(utf32_t c){
unsigned char n;
utf8_t r;
if(c > UTF8_MAX)
c = 0;
r = 0;
switch(n = (c < 0x10000) + (c < 0x0800) + (c < 0x0080)){
case 0: r = 0xF0 + ((c >> 18) & 0x07); /* 11110xxx */

View File

@ -10,5 +10,7 @@
* strings; >=32b value */
#define utf8_t unicode_codepoint_t
#define UTF8_MAX 0x10FFFF
/* encode UTF-32 value into UTF-8 */
utf8_t utf8(utf32_t c);

View File

@ -16,44 +16,39 @@ int main(int argc, char *argv[]){
utf32_t codepoint;
utf8_t encoded;
c = '\0';
l = 0;
init: codepoint = 0;
i = 0;
l = 1;
while(c != EOF){
for(
i = 0, n = NULL, codepoint = 0;
(c = getc(stdin)) != '\n'
&& c != EOF
&& i < 10;
++i
){
if(
(i == 0 && c != 'U')
|| (i == 1 && c != '+')
|| i > 10
|| (i > 1 && (n = strchr(hex, c))
== NULL)
){
++l;
n = NULL;
while((c = getc(stdin)) != EOF){
if(c == '\n'){
if(i < 2 && i > 0) /* empty lines are fine */
fprintf(stderr, "%s: %s: Syntax error.\n",
argv[0], l);
SKIPLINE;
i = -1;
break;
else if(i >= 2){
encoded = utf8(codepoint);
for(i = 3; i >= 0; --i)
if((encoded >> 8 * i) > 0 || i == 0)
putc(encoded >> 8 * i, stdout);
}
if(n != NULL)
codepoint = (codepoint << 4) + (n - hex) % 16;
goto init;
}
if(i < 3){
if(c != '\n' && c != EOF)
SKIPLINE;
if(c == EOF)
return 0;
continue;
if(
(i == 0 && c != 'U')
|| (i == 1 && c != '+')
|| i > 8 /* strlen("U+10FFFF") */
|| (i > 1 && ((n = strchr(hex, c)) == NULL))
){
fprintf(stderr, "%s: %s: Syntax error.\n",
argv[0], l);
while((c = getc(stdin)) != '\n' && c != EOF);
++l;
continue;
}
encoded = utf8(codepoint);
for(i = 3; i >= 0; --i)
if((encoded >> 8 * i) > 0 || i == 0)
putc(encoded >> 8 * i, stdout);
++l;
if(n != NULL)
codepoint = (codepoint << 4) + (n - hex) % 16;
++i;
}
}