libutf: clean up comments
This commit is contained in:
parent
146ea609b6
commit
55fdca9123
@ -9,9 +9,29 @@ typedef unsigned long int rune_t;
|
||||
#endif
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
/* Reverses the order of the bytes in the 32-bit value c. */
|
||||
rune_t swab32(rune_t c);
|
||||
|
||||
/* Returns the byte length of a valid UTF-8 rune. */
|
||||
size_t utf8_size(rune_t c);
|
||||
|
||||
/* Returns the UTF-32BE codepoint of the UTF-8 rune c. */
|
||||
rune_t utf8_to_utf32be(rune_t c);
|
||||
|
||||
/* Stores the UTF-8 rune c as bytes to the memory span s. s should point to a
|
||||
* big enough memory span of chars in which to store c, a (possibly invalid)
|
||||
* UTF-8 rune. Returns a pointer to the memory location after the last written
|
||||
* byte. Returns NULL if n is not 0 and n is less than the number of bytes that
|
||||
* will be written. */
|
||||
char *utf8_to_chars(rune_t c, char *s, size_t n);
|
||||
|
||||
/* Returns the UTF-8 encoding of the UTF-32BE codepoint c. m is the minimum
|
||||
* amount of bytes into which to encode the codepoint c. If m is greater than
|
||||
* 0, this function may return overlong-encoded UTF-8. */
|
||||
rune_t utf32be_to_utf8(rune_t c, size_t m);
|
||||
|
||||
/* Returns the UTF-32BE codepoint of the UTF-32LE codepoint c. */
|
||||
rune_t utf32be_to_utf32le(rune_t c);
|
||||
|
||||
/* Returns the UTF-32LE codepoint of the UTF-32BE codepoint c. */
|
||||
rune_t utf32le_to_utf32be(rune_t c);
|
||||
|
19
src/libutf.c
19
src/libutf.c
@ -18,9 +18,6 @@ chars_to_utf8(char **np, rune_t *rp){
|
||||
return rp;
|
||||
}
|
||||
|
||||
/* This is functionally equivalent to the UTF-32-specific conversion functions
|
||||
* but very slightly slower than each. */
|
||||
/* This operation is symmetrical; swab32(swab32(c)) will always return c. */
|
||||
/* big-endian | ZZZZ YYYY | XXXX WWWW | VVVV UUUU | TTTT SSSS */
|
||||
/* little-endian | TTTT SSSS | VVVV UUUU | XXXX WWWW | ZZZZ YYYY */
|
||||
rune_t
|
||||
@ -49,10 +46,6 @@ utf8_size(rune_t c){ return
|
||||
+ ((c & 0x00000080) >> 7); /* 1B? */
|
||||
}
|
||||
|
||||
/* s should point to a big enough memory span of chars in which to store c, a
|
||||
* (possibly invalid) UTF-8 rune. Returns a pointer to the memory location
|
||||
* after the last written byte. Returns NULL if n is not 0 and n is less than
|
||||
* the number of bytes that will be written. */
|
||||
char *
|
||||
utf8_to_chars(rune_t c, char *s, size_t n){
|
||||
size_t i;
|
||||
@ -93,8 +86,6 @@ utf8_to_utf32be(rune_t c){ return
|
||||
* U+000800 to U+00FFFF | 0000 0000 | 1110 XXXX | 10WW WWVV | 10VV UUUU *
|
||||
* U+000080 to U+0007FF | 0000 0000 | 0000 0000 | 110W WWVV | 10VV UUUU *
|
||||
* U+000000 to U+00007F | 0000 0000 | 0000 0000 | 0000 0000 | 0VVV UUUU */
|
||||
/* m is the minimum amount of bytes into which to encode the codepoint c. If m
|
||||
* is greater than 0, this function may return overlong-encoded UTF-8. */
|
||||
rune_t
|
||||
utf32be_to_utf8(rune_t c, size_t m){
|
||||
rune_t r;
|
||||
@ -122,18 +113,16 @@ utf32be_to_utf8(rune_t c, size_t m){
|
||||
return r;
|
||||
}
|
||||
|
||||
/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> is a good
|
||||
* explanation of this. */
|
||||
/* utf-32be bits
|
||||
* U+000000 to U+10FFFF | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
|
||||
/* utf-32le bits
|
||||
* U+000000 to U+10FFFF | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
|
||||
/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> */
|
||||
/* utf-32be bits | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
|
||||
/* utf-32le bits | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
|
||||
rune_t
|
||||
utf32be_to_utf32le(rune_t c){ return
|
||||
((c & 0x000000FF) << 24)
|
||||
| ((c & 0x0000FF00) << 8)
|
||||
| ((c & 0x001F0000) >> 8);
|
||||
}
|
||||
|
||||
rune_t
|
||||
utf32le_to_utf32be(rune_t c){ return
|
||||
((c & 0xFF000000) >> 24)
|
||||
|
Loading…
Reference in New Issue
Block a user