1
0
This commit is contained in:
dtb 2023-09-04 10:30:16 -04:00
parent 039a17f454
commit 81dd6982aa
11 changed files with 42 additions and 15 deletions

2
unicode/1.expected Normal file
View File

@ -0,0 +1,2 @@
0000000 41 e2 89 a2 ce 91 2e
0000007

3
unicode/1.test Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n%s\n' U+0041 U+0391 U+002E | ./utf 8

2
unicode/2.expected Normal file
View File

@ -0,0 +1,2 @@
0000000 ed 95 9c ea b5 ad ec 96 b4
0000009

3
unicode/2.test Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n' U+D55C U+AD6D U+C5B4 | ./utf 8

2
unicode/3.expected Normal file
View File

@ -0,0 +1,2 @@
0000000 e6 97 a5 e6 9c ac e8 aa 9e
0000009

3
unicode/3.test Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n' U+65E5 U+672C U+8A9E | ./utf 8

1
unicode/HACKING Normal file
View File

@ -0,0 +1 @@
Tests are from RFC3629, section 7.

View File

@ -1,12 +1,12 @@
utf8: utf8.o libunicode.o
$(CC) -g -o utf8 libunicode.o utf8.o
utf: utf.o libunicode.o
$(CC) -g -o utf8 libunicode.o utf.o
libunicode.o:
utf8.o: libunicode.h utf8.c
$(CC) -I../ascii -c -g -o utf8.o utf8.c
utf.o: libunicode.h utf.c
$(CC) -I../ascii -c -g -o utf.o utf.c
clean:
rm -f *.o utf8
rm -f *.o utf
.PHONY: clean

View File

@ -1,23 +1,23 @@
#include "libunicode.h"
utf8_t utf8(utf32_t c){
unsigned char n;
unsigned char n; /* 4 - number of bytes - 1 */
utf8_t r;
if(c > UTF8_MAX)
if(c > UTF8_MAX) /* return 0 if c exceeds max */
c = 0;
r = 0;
switch(n = (c < 0x10000) + (c < 0x0800) + (c < 0x0080)){
case 0: r = 0xF0 + ((c >> 18) & 0x07); /* 11110xxx */
case 1: r = (r << 8) + (n == 1
switch(n = (c >= 0x010000) + (c >= 0x0800) + (c >= 0x0080)){
case 3: r = 0xF0 + ((c >> 18) & 0x07); /* 11110xxx */
case 2: r = (r << 8) + (n == 2
? 0xE0 + ((c >> 12) & 0x0F) /* 1110xxxx */
: 0x50 + ((c >> 12) & 0x3F)); /* 10xxxxxx */
case 2: r = (r << 8) + (n == 2
case 1: r = (r << 8) + (n == 1
? 0xC0 + ((c >> 6) & 0x1F) /* 110xxxxx */
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
case 3: r = (r << 8) + (n == 3
? c & 0x7F /* 0xxxxxxx */
: 0x50 + (c & 0x3F)); /* 10xxxxxx */
case 0: r = (r << 8) + (n == 0
? (c & 0x7F) /* 0xxxxxxx */
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
}
return r;
}

11
unicode/test.sh Normal file
View File

@ -0,0 +1,11 @@
#!/bin/sh
set -e
i=1
while test -e "$i".test; do
./"$i".test >"$i".result
diff 1.expected 1.result
printf 'Test %s passed.\n' "$i"
i="$(printf '%s + %s\n' 1 "$i" | bc)"
done