libutf: clean up comments
This commit is contained in:
		
							parent
							
								
									146ea609b6
								
							
						
					
					
						commit
						55fdca9123
					
				@ -9,9 +9,29 @@ typedef unsigned long int rune_t;
 | 
			
		||||
#endif
 | 
			
		||||
#include <stddef.h> /* size_t */
 | 
			
		||||
 | 
			
		||||
/* Reverses the order of the bytes in the 32-bit value c. */
 | 
			
		||||
rune_t swab32(rune_t c);
 | 
			
		||||
 | 
			
		||||
/* Returns the byte length of a valid UTF-8 rune. */
 | 
			
		||||
size_t utf8_size(rune_t c);
 | 
			
		||||
 | 
			
		||||
/* Returns the UTF-32BE codepoint of the UTF-8 rune c. */
 | 
			
		||||
rune_t utf8_to_utf32be(rune_t c);
 | 
			
		||||
 | 
			
		||||
/* Stores the UTF-8 rune c as bytes to the memory span s. s should point to a
 | 
			
		||||
 * big enough memory span of chars in which to store c, a (possibly invalid)
 | 
			
		||||
 * UTF-8 rune. Returns a pointer to the memory location after the last written
 | 
			
		||||
 * byte. Returns NULL if n is not 0 and n is less than the number of bytes that
 | 
			
		||||
 * will be written. */
 | 
			
		||||
char *utf8_to_chars(rune_t c, char *s, size_t n);
 | 
			
		||||
 | 
			
		||||
/* Returns the UTF-8 encoding of the UTF-32BE codepoint c. m is the minimum
 | 
			
		||||
 * amount of bytes into which to encode the codepoint c. If m is greater than
 | 
			
		||||
 * 0, this function may return overlong-encoded UTF-8. */
 | 
			
		||||
rune_t utf32be_to_utf8(rune_t c, size_t m);
 | 
			
		||||
 | 
			
		||||
/* Returns the UTF-32BE codepoint of the UTF-32LE codepoint c. */
 | 
			
		||||
rune_t utf32be_to_utf32le(rune_t c);
 | 
			
		||||
 | 
			
		||||
/* Returns the UTF-32LE codepoint of the UTF-32BE codepoint c. */
 | 
			
		||||
rune_t utf32le_to_utf32be(rune_t c);
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										19
									
								
								src/libutf.c
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								src/libutf.c
									
									
									
									
									
								
							@ -18,9 +18,6 @@ chars_to_utf8(char **np, rune_t *rp){
 | 
			
		||||
	return rp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* This is functionally equivalent to the UTF-32-specific conversion functions
 | 
			
		||||
 * but very slightly slower than each. */
 | 
			
		||||
/* This operation is symmetrical; swab32(swab32(c)) will always return c. */
 | 
			
		||||
/* big-endian           | ZZZZ YYYY | XXXX WWWW | VVVV UUUU | TTTT SSSS */
 | 
			
		||||
/* little-endian        | TTTT SSSS | VVVV UUUU | XXXX WWWW | ZZZZ YYYY */
 | 
			
		||||
rune_t
 | 
			
		||||
@ -49,10 +46,6 @@ utf8_size(rune_t c){ return
 | 
			
		||||
	  + ((c & 0x00000080) >> 7); /* 1B? */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* s should point to a big enough memory span of chars in which to store c, a
 | 
			
		||||
 * (possibly invalid) UTF-8 rune. Returns a pointer to the memory location
 | 
			
		||||
 * after the last written byte. Returns NULL if n is not 0 and n is less than
 | 
			
		||||
 * the number of bytes that will be written. */
 | 
			
		||||
char *
 | 
			
		||||
utf8_to_chars(rune_t c, char *s, size_t n){
 | 
			
		||||
	size_t i;
 | 
			
		||||
@ -93,8 +86,6 @@ utf8_to_utf32be(rune_t c){ return
 | 
			
		||||
 * U+000800 to U+00FFFF | 0000 0000 | 1110 XXXX | 10WW WWVV | 10VV UUUU *
 | 
			
		||||
 * U+000080 to U+0007FF | 0000 0000 | 0000 0000 | 110W WWVV | 10VV UUUU *
 | 
			
		||||
 * U+000000 to U+00007F | 0000 0000 | 0000 0000 | 0000 0000 | 0VVV UUUU */
 | 
			
		||||
/* m is the minimum amount of bytes into which to encode the codepoint c. If m
 | 
			
		||||
 * is greater than 0, this function may return overlong-encoded UTF-8. */
 | 
			
		||||
rune_t
 | 
			
		||||
utf32be_to_utf8(rune_t c, size_t m){
 | 
			
		||||
	rune_t r;
 | 
			
		||||
@ -122,18 +113,16 @@ utf32be_to_utf8(rune_t c, size_t m){
 | 
			
		||||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> is a good
 | 
			
		||||
 * explanation of this. */
 | 
			
		||||
/* utf-32be bits
 | 
			
		||||
 * U+000000 to U+10FFFF | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
 | 
			
		||||
/* utf-32le bits
 | 
			
		||||
 * U+000000 to U+10FFFF | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
 | 
			
		||||
/* <https://www.herongyang.com/Unicode/UTF-32-UTF-32-Encoding.html> */
 | 
			
		||||
/* utf-32be bits        | 0000 0000 | 000Z YYYY | XXXX WWWW | VVVV UUUU */
 | 
			
		||||
/* utf-32le bits        | VVVV UUUU | XXXX WWWW | 000Z YYYY | 0000 0000 */
 | 
			
		||||
rune_t
 | 
			
		||||
utf32be_to_utf32le(rune_t c){ return
 | 
			
		||||
	  ((c & 0x000000FF) << 24)
 | 
			
		||||
	| ((c & 0x0000FF00) << 8)
 | 
			
		||||
	| ((c & 0x001F0000) >> 8);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
rune_t
 | 
			
		||||
utf32le_to_utf32be(rune_t c){ return
 | 
			
		||||
	  ((c & 0xFF000000) >> 24)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user