Lines Matching +full:wide +full:- +full:range
1 /*-
7 * provided under BSD-style license terms by Nexenta Systems, Inc.
34 * to the wide character forms used internally by libc. Unfortunately,
83 * 21 bits to be able to encode the entire range of priorities.
85 { "UTF-8", "UTF-8", 21, towide_utf8, tomb_utf8 },
86 { "UTF8", "UTF-8", 21, towide_utf8, tomb_utf8 },
87 { "utf8", "UTF-8", 21, towide_utf8, tomb_utf8 },
88 { "utf-8", "UTF-8", 21, towide_utf8, tomb_utf8 },
90 { "EUC-CN", "EUC-CN", 16, towide_euccn, tomb_mbs },
91 { "eucCN", "EUC-CN", 16, towide_euccn, tomb_mbs },
93 * Because the 3-byte form of EUC-JP use the same leading byte,
98 { "EUC-JP", "EUC-JP", 17, towide_eucjp, tomb_mbs },
99 { "eucJP", "EUC-JP", 17, towide_eucjp, tomb_mbs },
101 { "EUC-KR", "EUC-KR", 16, towide_euckr, tomb_mbs },
102 { "eucKR", "EUC-KR", 16, towide_euckr, tomb_mbs },
104 * EUC-TW uses 2 bytes most of the time, but 4 bytes if the
106 * the third byte will be A0-B0. So we only need to consider
109 { "EUC-TW", "EUC-TW", 24, towide_euctw, tomb_mbs },
110 { "eucTW", "EUC-TW", 24, towide_euctw, tomb_mbs },
128 * be non-zero.
133 * This should probably be an aliase for euc-cn, or vice versa.
178 * This is used for 8-bit encodings.
185 return (-1); in towide_none()
196 return (-1); in tomb_none()
204 * UTF-8 stores wide characters in UTF-32 form.
218 /* 7-bit ASCII */ in towide_utf8()
222 /* u80-u7ff - two bytes encoded */ in towide_utf8()
227 /* u800-uffff - three bytes encoded */ in towide_utf8()
232 /* u1000-u1fffff - four bytes encoded */ in towide_utf8()
239 return (-1); in towide_utf8()
243 return (-1); in towide_utf8()
249 return (-1); in towide_utf8()
257 return (-1); in towide_utf8()
287 return (-1); in tomb_utf8()
289 for (i = cnt - 1; i; i--) { in tomb_utf8()
302 * encoding as a 16-bit value, although a great many of the possible
306 * 0x00 - 0x7f - 1 byte encoding
307 * 0x80 - 0x7fff - illegal
308 * 0x8000 - 0xffff - 2 byte encoding
319 /* 7-bit */ in towide_dbcs()
325 return (-1); in towide_dbcs()
328 /* Store both bytes as a single 16-bit wide. */ in towide_dbcs()
336 * Most multibyte locales just convert the wide character to the multibyte
337 * form by stripping leading null bytes, and writing the 32-bit quantity
338 * in big-endian order.
357 n--; in tomb_mbs()
397 * GB18030. This encodes as 8, 16, or 32-bits.
398 * 7-bit values are in 1 byte, 4 byte sequences are used when
399 * the second byte encodes 0x30-39 and all other sequences are 2 bytes.
409 /* 7-bit */ in towide_gb18030()
415 return (-1); in towide_gb18030()
424 werr("incomplete 4-byte character sequence (%s)", in towide_gb18030()
426 return (-1); in towide_gb18030()
441 * MS-Kanji (aka SJIS) is almost a clean DBCS like the others, but it
442 * also has a range of single byte characters above 0x80. (0xa1-0xdf).
452 /* 7-bit */ in towide_mskanji()
459 return (-1); in towide_mskanji()
462 /* Store both bytes as a single 16-bit wide. */ in towide_mskanji()
474 * Note that the way in which the different EUC forms vary is how wide
488 * All variations of EUC encode 7-bit ASCII as one byte, and use in towide_euc_impl()
492 /* 7-bit */ in towide_euc_impl()
498 * All EUC variants reserve 0xa1-0xff to identify CS1, which in towide_euc_impl()
499 * is always two bytes wide. Note that unused CS will be zero, in towide_euc_impl()
513 return (-1); in towide_euc_impl()
527 * EUC-CN encodes as follows:
529 * Code set 0 (ASCII): 0x21-0x7E
530 * Code set 1 (CNS 11643-1992 Plane 1): 0xA1A1-0xFEFE
541 * EUC-JP encodes as follows:
543 * Code set 0 (ASCII or JIS X 0201-1976 Roman): 0x21-0x7E
544 * Code set 1 (JIS X 0208): 0xA1A1-0xFEFE
545 * Code set 2 (half-width katakana): 0x8EA1-0x8EDF
546 * Code set 3 (JIS X 0212-1990): 0x8FA1A1-0x8FFEFE
555 * EUC-KR encodes as follows:
557 * Code set 0 (ASCII or KS C 5636-1993): 0x21-0x7E
558 * Code set 1 (KS C 5601-1992): 0xA1A1-0xFEFE
569 * EUC-TW encodes as follows:
571 * Code set 0 (ASCII): 0x21-0x7E
572 * Code set 1 (CNS 11643-1992 Plane 1): 0xA1A1-0xFEFE
573 * Code set 2 (CNS 11643-1992 Planes 1-16): 0x8EA1A1A1-0x8EB0FEFE
663 return ((int)((1U << _nbits) - 1)); in max_wide()