diff --git a/unicode/utf8.c b/unicode/utf8.c index 24f6c7b..1bed9bf 100644 --- a/unicode/utf8.c +++ b/unicode/utf8.c @@ -12,7 +12,7 @@ int main(int argc, char *argv[]){ int i; int l; /* line counter */ char *n; - long int utf32_lit; + long unsigned int utf32_lit; unsigned char utf8_bytes[8]; c = '\0'; @@ -50,20 +50,21 @@ int main(int argc, char *argv[]){ return 0; continue; } - switch(i = (utf32_lit < 65536) - + (utf32_lit < 2048) - + (utf32_lit < 128)){ + /* something with the bit math is broken */ + switch(i = (utf32_lit < 0x10000) + + (utf32_lit < 0x0800) + + (utf32_lit < 0x0080)){ case 0: utf8_bytes[0] = - ((utf32_lit >> 18) & 7) + 240; /* 11110xxx */ + 0xF0 + ((utf32_lit >> 18) & 0x07); /* 11110xxx */ case 1: utf8_bytes[1] = i == 1 - ? ((utf32_lit >> 12) & 15) + 224 /* 1110xxxx */ - : ((utf32_lit >> 12) & 63) + 80; /* 10xxxxxx */ + ? 0xE0 + ((utf32_lit >> 12) & 0x0F) /* 1110xxxx */ + : 0x50 + ((utf32_lit >> 12) & 0x3F); /* 10xxxxxx */ case 2: utf8_bytes[2] = i == 2 - ? ((utf32_lit >> 6) & 31) + 192 /* 110xxxxx */ - : ((utf32_lit >> 6) & 63) + 80; /* 10xxxxxx */ + ? 0xC0 + ((utf32_lit >> 6) & 0x1F) /* 110xxxxx */ + : 0x50 + ((utf32_lit >> 6) & 0x3F); /* 10xxxxxx */ case 3: utf8_bytes[3] = i == 3 - ? utf8_bytes[3] = utf32_lit & 127 /* 0xxxxxxx */ - : (utf32_lit & 63) + 80; /* 10xxxxxx */ + ? utf8_bytes[3] = utf32_lit & 0x7F /* 0xxxxxxx */ + : 0x50 + (utf32_lit & 0x3F); /* 10xxxxxx */ } for( ; i < 4; ++i) putc(utf8_bytes[i], stdout);