Lines Matching +full:ascii +full:- +full:based
1 /*-
16 #define T 1 /* character appears in plain ASCII text */
17 #define I 2 /* character appears in ISO-8859 text */
18 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
43 * looks_utf8 --
44 * Decide whether some text looks like UTF-8. Returns:
46 * -1: invalid UTF-8
48 * 1: 7-bit text
49 * 2: definitely UTF-8 text (valid high-bit set bytes)
51 * Based on RFC 3629. UTF-8 with BOM is not accepted.
64 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ in looks_utf8()
66 * Even if the whole file is valid UTF-8 sequences, in looks_utf8()
73 return -1; in looks_utf8()
74 } else { /* 11xxxxxx begins UTF-8 */ in looks_utf8()
80 else return -1; in looks_utf8()
86 else return -1; /* F5, F6, F7 */ in looks_utf8()
88 return -1; /* F8~FF */ in looks_utf8()
96 return -1; in looks_utf8()
107 * looks_utf16 --
108 * Decide whether some text looks like UTF-16. Returns:
110 * 0: invalid UTF-16
111 * 1: Little-endian UTF-16
112 * 2: Big-endian UTF-16
168 * decode_utf8 --
169 * Decode a UTF-8 character from byte string to Unicode.
170 * Returns -1 if the first byte is a not UTF-8 leader.
172 * Based on RFC 3629, but without error detection.
180 int u = -1; in decode_utf8()
200 * decode_utf16 --
201 * Decode a UTF-16 character from byte string to Unicode.
202 * Returns -1 if the first unsigned integer is invalid.
212 int u = -1; in decode_utf16()