spring cleaning 1
This commit is contained in:
2
Wip/unicode/1.expected
Normal file
2
Wip/unicode/1.expected
Normal file
@@ -0,0 +1,2 @@
|
||||
0000000 41 e2 89 a2 ce 91 2e
|
||||
0000007
|
||||
3
Wip/unicode/1.test
Executable file
3
Wip/unicode/1.test
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
printf '%s\n%s\n%s\n%s\n' U+0041 U+0391 U+002E | ./utf 8 | od -t x1
|
||||
2
Wip/unicode/2.expected
Normal file
2
Wip/unicode/2.expected
Normal file
@@ -0,0 +1,2 @@
|
||||
0000000 ed 95 9c ea b5 ad ec 96 b4
|
||||
0000009
|
||||
3
Wip/unicode/2.test
Executable file
3
Wip/unicode/2.test
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
printf '%s\n%s\n%s\n' U+D55C U+AD6D U+C5B4 | ./utf 8 | od -t x1
|
||||
2
Wip/unicode/3.expected
Normal file
2
Wip/unicode/3.expected
Normal file
@@ -0,0 +1,2 @@
|
||||
0000000 e6 97 a5 e6 9c ac e8 aa 9e
|
||||
0000009
|
||||
3
Wip/unicode/3.test
Executable file
3
Wip/unicode/3.test
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
printf '%s\n%s\n%s\n' U+65E5 U+672C U+8A9E | ./utf 8 | od -t x1
|
||||
1
Wip/unicode/HACKING
Normal file
1
Wip/unicode/HACKING
Normal file
@@ -0,0 +1 @@
|
||||
Tests are from RFC3629, section 7.
|
||||
15
Wip/unicode/Makefile
Normal file
15
Wip/unicode/Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
test: utf
|
||||
sh ../testing/test.sh
|
||||
|
||||
utf: utf.o libunicode.o
|
||||
$(CC) -g -o utf libunicode.o utf.o
|
||||
|
||||
libunicode.o:
|
||||
|
||||
utf.o: libunicode.h utf.c
|
||||
$(CC) -I../ascii -c -g -o utf.o utf.c
|
||||
|
||||
clean:
|
||||
rm -f *.o utf
|
||||
|
||||
.PHONY: clean test
|
||||
23
Wip/unicode/libunicode.c
Normal file
23
Wip/unicode/libunicode.c
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "libunicode.h"
|
||||
|
||||
utf32_t utf8(utf32_t c){
|
||||
unsigned char n; /* 4 - number of bytes - 1 */
|
||||
utf8_t r;
|
||||
|
||||
if(c > UTF8_MAX) /* return 0 if c exceeds max */
|
||||
c = 0;
|
||||
|
||||
switch(n = (c >= 0x010000) + (c >= 0x0800) + (c >= 0x0080)){
|
||||
case 3: r = 0xF0 + ((c >> 18) & 0x07); /* 11110xxx */
|
||||
case 2: r = (r << 8) + (n == 2
|
||||
? 0xE0 + ((c >> 12) & 0x0F) /* 1110xxxx */
|
||||
: 0x50 + ((c >> 12) & 0x3F)); /* 10xxxxxx */
|
||||
case 1: r = (r << 8) + (n == 1
|
||||
? 0xC0 + ((c >> 6) & 0x1F) /* 110xxxxx */
|
||||
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
|
||||
case 0: r = (r << 8) + (n == 0
|
||||
? (c & 0x7F) /* 0xxxxxxx */
|
||||
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
|
||||
}
|
||||
return r;
|
||||
}
|
||||
16
Wip/unicode/libunicode.h
Normal file
16
Wip/unicode/libunicode.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#if defined UINT32_MAX /* indicator <stdint.h> is included */
|
||||
# define unicode_codepoint_t uint32_t
|
||||
#else
|
||||
/* C99 draft 5.2.4.2.1 Sizes of integer types says unsigned long must be able
|
||||
* to hold 32b */
|
||||
# define unicode_codepoint_t unsigned long
|
||||
#endif
|
||||
#define utf32_t unicode_codepoint_t
|
||||
/* for holding the literal numeric value of a utf8 rune, not for assembling
|
||||
* strings; >=32b value */
|
||||
#define utf8_t unicode_codepoint_t
|
||||
|
||||
#define UTF8_MAX 0x10FFFF
|
||||
|
||||
/* encode UTF-32 value into UTF-8 */
|
||||
utf32_t utf8(utf32_t c);
|
||||
58
Wip/unicode/utf.c
Normal file
58
Wip/unicode/utf.c
Normal file
@@ -0,0 +1,58 @@
|
||||
#include <stdio.h> /* fprintf(3), getc(3), putc(3) stderr, stdin, EOF */
|
||||
#include <string.h> /* strchr(3) */
|
||||
#include "ascii.h" /* ASCII_HEXADECIMAL_DIGITS_LOWER,
|
||||
* ASCII_HEXADECIMAL_DIGITS_UPPER */
|
||||
#include "libunicode.h" /* utf8(3) */
|
||||
|
||||
static struct {
|
||||
char *name;
|
||||
utf32_t (*f)(utf32_t);
|
||||
}
|
||||
|
||||
static char *hex = ASCII_HEXADECIMAL_DIGITS_UPPER
|
||||
ASCII_HEXADECIMAL_DIGITS_LOWER;
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int c;
|
||||
int i;
|
||||
int l; /* line counter */
|
||||
char *n;
|
||||
utf32_t codepoint;
|
||||
utf8_t encoded;
|
||||
|
||||
l = 0;
|
||||
|
||||
init: codepoint = 0;
|
||||
i = 0;
|
||||
++l;
|
||||
n = NULL;
|
||||
while((c = getc(stdin)) != EOF){
|
||||
if(c == '\n'){
|
||||
if(i < 2 && i > 0) /* empty lines are fine */
|
||||
fprintf(stderr, "%s: %s: Syntax error.\n",
|
||||
argv[0], l);
|
||||
else if(i >= 2){
|
||||
encoded = utf8(codepoint);
|
||||
for(i = 3; i >= 0; --i)
|
||||
if((encoded >> 8 * i) > 0 || i == 0)
|
||||
putc(encoded >> 8 * i, stdout);
|
||||
}
|
||||
goto init;
|
||||
}
|
||||
if(
|
||||
(i == 0 && c != 'U')
|
||||
|| (i == 1 && c != '+')
|
||||
|| i > 8 /* strlen("U+10FFFF") */
|
||||
|| (i > 1 && ((n = strchr(hex, c)) == NULL))
|
||||
){
|
||||
fprintf(stderr, "%s: %s: Syntax error.\n",
|
||||
argv[0], l);
|
||||
while((c = getc(stdin)) != '\n' && c != EOF);
|
||||
++l;
|
||||
continue;
|
||||
}
|
||||
if(n != NULL)
|
||||
codepoint = (codepoint << 4) + (n - hex) % 16;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user