libutf: clean up comments

2024-05-29 19:56:41 -06:00
parent 146ea609b6
commit 55fdca9123
2 changed files with 24 additions and 15 deletions
--- a/include/libutf.h
+++ b/include/libutf.h
@@ -9,9 +9,29 @@ typedef unsigned long int rune_t;
 #endif
 #include <stddef.h> /* size_t */

+/* Reverses the order of the bytes in the 32-bit value c. */
 rune_t swab32(rune_t c);
+
+/* Returns the byte length of a valid UTF-8 rune. */
+size_t utf8_size(rune_t c);
+
+/* Returns the UTF-32BE codepoint of the UTF-8 rune c. */
 rune_t utf8_to_utf32be(rune_t c);
+
+/* Stores the UTF-8 rune c as bytes to the memory span s. s should point to a
+ * big enough memory span of chars in which to store c, a (possibly invalid)
+ * UTF-8 rune. Returns a pointer to the memory location after the last written
+ * byte. Returns NULL if n is not 0 and n is less than the number of bytes that
+ * will be written. */
 char *utf8_to_chars(rune_t c, char *s, size_t n);
+
+/* Returns the UTF-8 encoding of the UTF-32BE codepoint c. m is the minimum
+ * amount of bytes into which to encode the codepoint c. If m is greater than
+ * 0, this function may return overlong-encoded UTF-8. */
 rune_t utf32be_to_utf8(rune_t c, size_t m);
+
+/* Returns the UTF-32BE codepoint of the UTF-32LE codepoint c. */
 rune_t utf32be_to_utf32le(rune_t c);
+
+/* Returns the UTF-32LE codepoint of the UTF-32BE codepoint c. */
 rune_t utf32le_to_utf32be(rune_t c);
--- a/src/libutf.c
+++ b/src/libutf.c
@@ -18,9 +18,6 @@ chars_to_utf8(char **np, rune_t *rp){
 	return rp;
 }

-/* This is functionally equivalent to the UTF-32-specific conversion functions
- * but very slightly slower than each. */
-/* This operation is symmetrical; swab32(swab32(c)) will always return c. */
 /* big-endian           | ZZZZ YYYY | XXXX WWWW | VVVV UUUU | TTTT SSSS */
 /* little-endian        | TTTT SSSS | VVVV UUUU | XXXX WWWW | ZZZZ YYYY */
 rune_t
@@ -49,10 +46,6 @@ utf8_size(rune_t c){ return
 	  + ((c & 0x00000080) >> 7); /* 1B? */
 }

-/* s should point to a big enough memory span of chars in which to store c, a
- * (possibly invalid) UTF-8 rune. Returns a pointer to the memory location
- * after the last written byte. Returns NULL if n is not 0 and n is less than
- * the number of bytes that will be written. */
 char *
 utf8_to_chars(rune_t c, char *s, size_t n){
 	size_t i;
@@ -93,8 +86,6 @@ utf8_to_utf32be(rune_t c){ return
 * U+000800 to U+00FFFF | 0000 0000 | 1110 XXXX | 10WW WWVV | 10VV UUUU *
 * U+000080 to U+0007FF | 0000 0000 | 0000 0000 | 110W WWVV | 10VV UUUU *
 * U+000000 to U+00007F | 0000 0000 | 0000 0000 | 0000 0000 | 0VVV UUUU */
-/* m is the minimum amount of bytes into which to encode the codepoint c. If m
- * is greater than 0, this function may return overlong-encoded UTF-8. */
 rune_t
 utf32be_to_utf8(rune_t c, size_t m){
 	rune_t r;
@@ -122,18 +113,16 @@ utf32be_to_utf8(rune_t c, size_t m){
 	return r;
 }

-/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> is a good
- * explanation of this. */
-/* utf-32be bits
- * U+000000 to U+10FFFF | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
-/* utf-32le bits
- * U+000000 to U+10FFFF | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
+/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> */
+/* utf-32be bits        | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
+/* utf-32le bits        | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
 rune_t
 utf32be_to_utf32le(rune_t c){ return
 	  ((c & 0x000000FF) << 24)
 	| ((c & 0x0000FF00) << 8)
 	| ((c & 0x001F0000) >> 8);
 }
+
 rune_t
 utf32le_to_utf32be(rune_t c){ return
 	  ((c & 0xFF000000) >> 24)