Lines Matching +full:- +full:i
1 /*-
17 #define I 2 /* character appears in ISO-8859 text */ macro
18 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
34 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
35 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
36 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
37 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
38 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
39 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
43 * looks_utf8 --
44 * Decide whether some text looks like UTF-8. Returns:
46 * -1: invalid UTF-8
48 * 1: 7-bit text
49 * 2: definitely UTF-8 text (valid high-bit set bytes)
51 * Based on RFC 3629. UTF-8 with BOM is not accepted.
59 size_t i; in looks_utf8() local
63 for (i = 0; i < nbytes; i++) { in looks_utf8()
64 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ in looks_utf8()
66 * Even if the whole file is valid UTF-8 sequences, in looks_utf8()
70 if (text_chars[buf[i]] != T) in looks_utf8()
72 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ in looks_utf8()
73 return -1; in looks_utf8()
74 } else { /* 11xxxxxx begins UTF-8 */ in looks_utf8()
77 if ((buf[i] & 0x20) == 0) /* 110xxxxx */ in looks_utf8()
78 if (buf[i] > 0xC1) /* C0, C1 */ in looks_utf8()
80 else return -1; in looks_utf8()
81 else if ((buf[i] & 0x10) == 0) /* 1110xxxx */ in looks_utf8()
83 else if ((buf[i] & 0x08) == 0) /* 11110xxx */ in looks_utf8()
84 if (buf[i] < 0xF5) in looks_utf8()
86 else return -1; /* F5, F6, F7 */ in looks_utf8()
88 return -1; /* F8~FF */ in looks_utf8()
91 i++; in looks_utf8()
92 if (i >= nbytes) in looks_utf8()
95 if ((buf[i] & 0xc0) != 0x80) /* 10xxxxxx */ in looks_utf8()
96 return -1; in looks_utf8()
107 * looks_utf16 --
108 * Decide whether some text looks like UTF-16. Returns:
110 * 0: invalid UTF-16
111 * 1: Little-endian UTF-16
112 * 2: Big-endian UTF-16
121 size_t i; in looks_utf16() local
137 for (i = 2; i + 1 < nbytes; i += 2) { in looks_utf16()
139 c = buf[i] << 8 ^ buf[i + 1]; in looks_utf16()
141 c = buf[i] ^ buf[i + 1] << 8; in looks_utf16()
164 #undef I
168 * decode_utf8 --
169 * Decode a UTF-8 character from byte string to Unicode.
170 * Returns -1 if the first byte is a not UTF-8 leader.
180 int u = -1; in decode_utf8()
200 * decode_utf16 --
201 * Decode a UTF-16 character from byte string to Unicode.
202 * Returns -1 if the first unsigned integer is invalid.
212 int u = -1; in decode_utf16()