1
0

spring cleaning 1

This commit is contained in:
dtb
2023-12-31 10:46:23 -07:00
parent ea31d1221c
commit f501a77764
116 changed files with 4712 additions and 8 deletions

2
Wip/unicode/1.expected Normal file
View File

@@ -0,0 +1,2 @@
0000000 41 e2 89 a2 ce 91 2e
0000007

3
Wip/unicode/1.test Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n%s\n' U+0041 U+0391 U+002E | ./utf 8 | od -t x1

2
Wip/unicode/2.expected Normal file
View File

@@ -0,0 +1,2 @@
0000000 ed 95 9c ea b5 ad ec 96 b4
0000009

3
Wip/unicode/2.test Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n' U+D55C U+AD6D U+C5B4 | ./utf 8 | od -t x1

2
Wip/unicode/3.expected Normal file
View File

@@ -0,0 +1,2 @@
0000000 e6 97 a5 e6 9c ac e8 aa 9e
0000009

3
Wip/unicode/3.test Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
printf '%s\n%s\n%s\n' U+65E5 U+672C U+8A9E | ./utf 8 | od -t x1

1
Wip/unicode/HACKING Normal file
View File

@@ -0,0 +1 @@
Tests are from RFC3629, section 7.

15
Wip/unicode/Makefile Normal file
View File

@@ -0,0 +1,15 @@
test: utf
sh ../testing/test.sh
utf: utf.o libunicode.o
$(CC) -g -o utf libunicode.o utf.o
libunicode.o:
utf.o: libunicode.h utf.c
$(CC) -I../ascii -c -g -o utf.o utf.c
clean:
rm -f *.o utf
.PHONY: clean test

23
Wip/unicode/libunicode.c Normal file
View File

@@ -0,0 +1,23 @@
#include "libunicode.h"
utf32_t utf8(utf32_t c){
unsigned char n; /* 4 - number of bytes - 1 */
utf8_t r;
if(c > UTF8_MAX) /* return 0 if c exceeds max */
c = 0;
switch(n = (c >= 0x010000) + (c >= 0x0800) + (c >= 0x0080)){
case 3: r = 0xF0 + ((c >> 18) & 0x07); /* 11110xxx */
case 2: r = (r << 8) + (n == 2
? 0xE0 + ((c >> 12) & 0x0F) /* 1110xxxx */
: 0x50 + ((c >> 12) & 0x3F)); /* 10xxxxxx */
case 1: r = (r << 8) + (n == 1
? 0xC0 + ((c >> 6) & 0x1F) /* 110xxxxx */
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
case 0: r = (r << 8) + (n == 0
? (c & 0x7F) /* 0xxxxxxx */
: 0x50 + ((c >> 6) & 0x3F)); /* 10xxxxxx */
}
return r;
}

16
Wip/unicode/libunicode.h Normal file
View File

@@ -0,0 +1,16 @@
#if defined UINT32_MAX /* indicator <stdint.h> is included */
# define unicode_codepoint_t uint32_t
#else
/* C99 draft 5.2.4.2.1 Sizes of integer types says unsigned long must be able
* to hold 32b */
# define unicode_codepoint_t unsigned long
#endif
#define utf32_t unicode_codepoint_t
/* for holding the literal numeric value of a utf8 rune, not for assembling
* strings; >=32b value */
#define utf8_t unicode_codepoint_t
#define UTF8_MAX 0x10FFFF
/* encode UTF-32 value into UTF-8 */
utf32_t utf8(utf32_t c);

58
Wip/unicode/utf.c Normal file
View File

@@ -0,0 +1,58 @@
#include <stdio.h> /* fprintf(3), getc(3), putc(3) stderr, stdin, EOF */
#include <string.h> /* strchr(3) */
#include "ascii.h" /* ASCII_HEXADECIMAL_DIGITS_LOWER,
* ASCII_HEXADECIMAL_DIGITS_UPPER */
#include "libunicode.h" /* utf8(3) */
static struct {
char *name;
utf32_t (*f)(utf32_t);
}
static char *hex = ASCII_HEXADECIMAL_DIGITS_UPPER
ASCII_HEXADECIMAL_DIGITS_LOWER;
int main(int argc, char *argv[]){
int c;
int i;
int l; /* line counter */
char *n;
utf32_t codepoint;
utf8_t encoded;
l = 0;
init: codepoint = 0;
i = 0;
++l;
n = NULL;
while((c = getc(stdin)) != EOF){
if(c == '\n'){
if(i < 2 && i > 0) /* empty lines are fine */
fprintf(stderr, "%s: %s: Syntax error.\n",
argv[0], l);
else if(i >= 2){
encoded = utf8(codepoint);
for(i = 3; i >= 0; --i)
if((encoded >> 8 * i) > 0 || i == 0)
putc(encoded >> 8 * i, stdout);
}
goto init;
}
if(
(i == 0 && c != 'U')
|| (i == 1 && c != '+')
|| i > 8 /* strlen("U+10FFFF") */
|| (i > 1 && ((n = strchr(hex, c)) == NULL))
){
fprintf(stderr, "%s: %s: Syntax error.\n",
argv[0], l);
while((c = getc(stdin)) != '\n' && c != EOF);
++l;
continue;
}
if(n != NULL)
codepoint = (codepoint << 4) + (n - hex) % 16;
++i;
}
}