teken_wcwidth.h (6d3296f16a06bcaa49918799e683936711dcf9c9) | teken_wcwidth.h (4b9aa38ef0e5bedcdd90b6627cc1c215037a1121) |
---|---|
1/* 2 * Markus Kuhn -- 2007-05-26 (Unicode 5.0) 3 * 4 * Permission to use, copy, modify, and distribute this software 5 * for any purpose and without fee is hereby granted. The author 6 * disclaims all warranties with regard to this software. 7 * 8 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 9 */ 10 11#define TEKEN_UTF8_INVALID_CODEPOINT -1 12 13struct interval { 14 teken_char_t first; 15 teken_char_t last; 16}; 17 18/* auxiliary function for binary search in interval table */ | 1/* 2 * Markus Kuhn -- 2007-05-26 (Unicode 5.0) 3 * 4 * Permission to use, copy, modify, and distribute this software 5 * for any purpose and without fee is hereby granted. The author 6 * disclaims all warranties with regard to this software. 7 * 8 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 9 */ 10 11#define TEKEN_UTF8_INVALID_CODEPOINT -1 12 13struct interval { 14 teken_char_t first; 15 teken_char_t last; 16}; 17 18/* auxiliary function for binary search in interval table */ |
19static int bisearch(teken_char_t ucs, const struct interval *table, int max) { | 19static inline int bisearch(teken_char_t ucs, const struct interval *table, int max) { |
20 int min = 0; 21 int mid; 22 23 if (ucs < table[0].first || ucs > table[max].last) 24 return 0; 25 while (max >= min) { 26 mid = (min + max) / 2; 27 if (ucs > table[mid].last) 28 min = mid + 1; 29 else if (ucs < table[mid].first) 30 max = mid - 1; 31 else 32 return 1; 33 } 34 35 return 0; 36} 37 | 20 int min = 0; 21 int mid; 22 23 if (ucs < table[0].first || ucs > table[max].last) 24 return 0; 25 while (max >= min) { 26 mid = (min + max) / 2; 27 if (ucs > table[mid].last) 28 min = mid + 1; 29 else if (ucs < table[mid].first) 30 max = mid - 1; 31 else 32 return 1; 33 } 34 35 return 0; 36} 37 |
38static int teken_wcwidth(teken_char_t ucs) | 38static inline int teken_wcwidth(teken_char_t ucs) |
39{ 40 /* sorted list of non-overlapping intervals of non-spacing characters */ 41 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ 42 static const struct interval combining[] = { 43 { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, 44 { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 45 { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, 46 { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, --- 72 unchanged lines hidden (view full) --- 119 (ucs >= 0x30000 && ucs <= 0x3fffd))); 120} 121 122/* 123 * Converts an UTF-8 byte sequence to a codepoint as specified in 124 * https://datatracker.ietf.org/doc/html/rfc3629#section-3 . The function 125 * expects the 'bytes' array to start with the leading character. 126 */ | 39{ 40 /* sorted list of non-overlapping intervals of non-spacing characters */ 41 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ 42 static const struct interval combining[] = { 43 { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, 44 { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 45 { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, 46 { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, --- 72 unchanged lines hidden (view full) --- 119 (ucs >= 0x30000 && ucs <= 0x3fffd))); 120} 121 122/* 123 * Converts an UTF-8 byte sequence to a codepoint as specified in 124 * https://datatracker.ietf.org/doc/html/rfc3629#section-3 . The function 125 * expects the 'bytes' array to start with the leading character. 126 */ |
127static teken_char_t | 127static inline teken_char_t |
128teken_utf8_bytes_to_codepoint(uint8_t bytes[4], int nbytes) 129{ 130 131 /* Check for malformed characters. */ 132 if (__bitcount(bytes[0] & 0xf0) != nbytes) 133 return (TEKEN_UTF8_INVALID_CODEPOINT); 134 135 switch (nbytes) { --- 13 unchanged lines hidden --- | 128teken_utf8_bytes_to_codepoint(uint8_t bytes[4], int nbytes) 129{ 130 131 /* Check for malformed characters. */ 132 if (__bitcount(bytes[0] & 0xf0) != nbytes) 133 return (TEKEN_UTF8_INVALID_CODEPOINT); 134 135 switch (nbytes) { --- 13 unchanged lines hidden --- |