wide.c - OpenGrok cross reference for /freebsd/usr.bin/localedef/wide.c

Lines Matching +full:wide +full:- +full:range
1 /*-
7  * provided under BSD-style license terms by Nexenta Systems, Inc.
34  * to the wide character forms used internally by libc.  Unfortunately,
83 	 * 21 bits to be able to encode the entire range of priorities.
85 	{ "UTF-8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
86 	{ "UTF8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
87 	{ "utf8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
88 	{ "utf-8",	"UTF-8",	21, towide_utf8, tomb_utf8 },
90 	{ "EUC-CN",	"EUC-CN",	16, towide_euccn, tomb_mbs },
91 	{ "eucCN",	"EUC-CN",	16, towide_euccn, tomb_mbs },
93 	 * Because the 3-byte form of EUC-JP use the same leading byte,
98 	{ "EUC-JP",	"EUC-JP",	17, towide_eucjp, tomb_mbs },
99 	{ "eucJP",	"EUC-JP",	17, towide_eucjp, tomb_mbs },
101 	{ "EUC-KR",	"EUC-KR",	16, towide_euckr, tomb_mbs },
102 	{ "eucKR",	"EUC-KR",	16, towide_euckr, tomb_mbs },
104 	 * EUC-TW uses 2 bytes most of the time, but 4 bytes if the
106 	 * the third byte will be A0-B0.  So we only need to consider
109 	{ "EUC-TW",	"EUC-TW",	24, towide_euctw, tomb_mbs },
110 	{ "eucTW",	"EUC-TW",	24, towide_euctw, tomb_mbs },
128 	 * be non-zero.
133 	 * This should probably be an aliase for euc-cn, or vice versa.
178  * This is used for 8-bit encodings.
185 		return (-1);  in towide_none()
196 		return (-1);  in tomb_none()
204  * UTF-8 stores wide characters in UTF-32 form.
218 		/* 7-bit ASCII */  in towide_utf8()
222 		/* u80-u7ff - two bytes encoded */  in towide_utf8()
227 		/* u800-uffff - three bytes encoded */  in towide_utf8()
232 		/* u1000-u1fffff - four bytes encoded */  in towide_utf8()
239 		return (-1);  in towide_utf8()
243 		return (-1);  in towide_utf8()
249 			return (-1);  in towide_utf8()
257 		return (-1);  in towide_utf8()
287 		return (-1);  in tomb_utf8()
289 	for (i = cnt - 1; i; i--) {  in tomb_utf8()
302  * encoding as a 16-bit value, although a great many of the possible
306  * 0x00 - 0x7f		- 1 byte encoding
307  * 0x80 - 0x7fff	- illegal
308  * 0x8000 - 0xffff	- 2 byte encoding
319 		/* 7-bit */  in towide_dbcs()
325 		return (-1);  in towide_dbcs()
328 	/* Store both bytes as a single 16-bit wide. */  in towide_dbcs()
336  * Most multibyte locales just convert the wide character to the multibyte
337  * form by stripping leading null bytes, and writing the 32-bit quantity
338  * in big-endian order.
357 		n--;  in tomb_mbs()
397  * GB18030.  This encodes as 8, 16, or 32-bits.
398  * 7-bit values are in 1 byte,  4 byte sequences are used when
399  * the second byte encodes 0x30-39 and all other sequences are 2 bytes.
409 		/* 7-bit */  in towide_gb18030()
415 		return (-1);  in towide_gb18030()
424 			werr("incomplete 4-byte character sequence (%s)",  in towide_gb18030()
426 			return (-1);  in towide_gb18030()
441  * MS-Kanji (aka SJIS) is almost a clean DBCS like the others, but it
442  * also has a range of single byte characters above 0x80.  (0xa1-0xdf).
452 		/* 7-bit */  in towide_mskanji()
459 		return (-1);  in towide_mskanji()
462 	/* Store both bytes as a single 16-bit wide. */  in towide_mskanji()
474  * Note that the way in which the different EUC forms vary is how wide
488 	 * All variations of EUC encode 7-bit ASCII as one byte, and use  in towide_euc_impl()
492 		/* 7-bit */  in towide_euc_impl()
498 	 * All EUC variants reserve 0xa1-0xff to identify CS1, which  in towide_euc_impl()
499 	 * is always two bytes wide.  Note that unused CS will be zero,  in towide_euc_impl()
513 		return (-1);  in towide_euc_impl()
527  * EUC-CN encodes as follows:
529  * Code set 0 (ASCII):				0x21-0x7E
530  * Code set 1 (CNS 11643-1992 Plane 1):		0xA1A1-0xFEFE
541  * EUC-JP encodes as follows:
543  * Code set 0 (ASCII or JIS X 0201-1976 Roman):	0x21-0x7E
544  * Code set 1 (JIS X 0208):			0xA1A1-0xFEFE
545  * Code set 2 (half-width katakana):		0x8EA1-0x8EDF
546  * Code set 3 (JIS X 0212-1990):		0x8FA1A1-0x8FFEFE
555  * EUC-KR encodes as follows:
557  * Code set 0 (ASCII or KS C 5636-1993):	0x21-0x7E
558  * Code set 1 (KS C 5601-1992):			0xA1A1-0xFEFE
569  * EUC-TW encodes as follows:
571  * Code set 0 (ASCII):				0x21-0x7E
572  * Code set 1 (CNS 11643-1992 Plane 1):		0xA1A1-0xFEFE
573  * Code set 2 (CNS 11643-1992 Planes 1-16):	0x8EA1A1A1-0x8EB0FEFE
663 	return ((int)((1U << _nbits) - 1));  in max_wide()