xref: /illumos-gate/usr/src/lib/libc/port/locale/table.c (revision 67d74cc3e7c9d9461311136a0b2069813a3fd927)
1 /*
2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2017 Nexenta Systems, Inc.
4  * Copyright 2019 Joyent, Inc.
5  * Copyright (c) 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Paul Borman at Krystal Technologies.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "lint.h"
37 #include <ctype.h>
38 #include <wchar.h>
39 #include "runetype.h"
40 #include "mblocal.h"
41 #include "_ctype.h"
42 
43 #define	_DEFRUNETYPE \
44 	/* 00 */ \
45 	_CTYPE_C, \
46 	_CTYPE_C, \
47 	_CTYPE_C, \
48 	_CTYPE_C, \
49 	_CTYPE_C, \
50 	_CTYPE_C, \
51 	_CTYPE_C, \
52 	_CTYPE_C, \
53 	/* 08 */ \
54 	_CTYPE_C, \
55 	_CTYPE_C|_CTYPE_S|_CTYPE_B, \
56 	_CTYPE_C|_CTYPE_S, \
57 	_CTYPE_C|_CTYPE_S, \
58 	_CTYPE_C|_CTYPE_S, \
59 	_CTYPE_C|_CTYPE_S, \
60 	_CTYPE_C, \
61 	_CTYPE_C, \
62 	/* 10 */ \
63 	_CTYPE_C, \
64 	_CTYPE_C, \
65 	_CTYPE_C, \
66 	_CTYPE_C, \
67 	_CTYPE_C, \
68 	_CTYPE_C, \
69 	_CTYPE_C, \
70 	_CTYPE_C, \
71 	/* 18 */ \
72 	_CTYPE_C, \
73 	_CTYPE_C, \
74 	_CTYPE_C, \
75 	_CTYPE_C, \
76 	_CTYPE_C, \
77 	_CTYPE_C, \
78 	_CTYPE_C, \
79 	_CTYPE_C, \
80 	/* 20 */ \
81 	_CTYPE_S|_CTYPE_B|_CTYPE_R, \
82 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
83 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
84 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
85 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
86 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
87 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
88 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
89 	/* 28 */ \
90 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
91 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
92 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
93 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
94 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
95 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
96 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
97 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
98 	/* 30 */ \
99 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
100 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
101 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
102 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
103 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
104 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
105 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
106 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
107 	/* 38 */ \
108 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
109 	_CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
110 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
111 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
112 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
113 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
114 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
115 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
116 	/* 40 */ \
117 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
118 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
119 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
120 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
121 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
122 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
123 	_CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
124 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
125 	/* 48 */ \
126 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
127 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
128 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
129 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
130 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
131 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
132 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
133 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
134 	/* 50 */ \
135 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
136 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
137 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
138 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
139 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
140 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
141 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
142 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
143 	/* 58 */ \
144 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
145 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
146 	_CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
147 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
148 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
149 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
150 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
151 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
152 	/* 60 */ \
153 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
154 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
155 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
156 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
157 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
158 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
159 	_CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
160 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
161 	/* 68 */ \
162 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
163 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
164 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
165 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
166 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
167 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
168 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
169 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
170 	/* 70 */ \
171 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
172 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
173 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
174 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
175 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
176 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
177 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
178 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
179 	/* 78 */ \
180 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
181 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
182 	_CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
183 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
184 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
185 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
186 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
187 	_CTYPE_C
188 
189 #define	_DEFMAPLOWER \
190 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07, \
191 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f, \
192 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17, \
193 	0x18,	0x19,	0x1a,	0x1b,	0x1c,	0x1d,	0x1e,	0x1f, \
194 	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	0x27, \
195 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, \
196 	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37, \
197 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f, \
198 	0x40,	'a',	'b',	'c',	'd',	'e',	'f',	'g', \
199 	'h',	'i',	'j',	'k',	'l',	'm',	'n',	'o', \
200 	'p',	'q',	'r',	's',	't',	'u',	'v',	'w', \
201 	'x',	'y',	'z',	0x5b,	0x5c,	0x5d,	0x5e,	0x5f, \
202 	0x60,	'a',	'b',	'c',	'd',	'e',	'f',	'g', \
203 	'h',	'i',	'j',	'k',	'l',	'm',	'n',	'o', \
204 	'p',	'q',	'r',	's',	't',	'u',	'v',	'w', \
205 	'x',	'y',	'z',	0x7b,	0x7c,	0x7d,	0x7e,	0x7f, \
206 	0x80,	0x81,	0x82,	0x83,	0x84,	0x85,	0x86,	0x87, \
207 	0x88,	0x89,	0x8a,	0x8b,	0x8c,	0x8d,	0x8e,	0x8f, \
208 	0x90,	0x91,	0x92,	0x93,	0x94,	0x95,	0x96,	0x97, \
209 	0x98,	0x99,	0x9a,	0x9b,	0x9c,	0x9d,	0x9e,	0x9f, \
210 	0xa0,	0xa1,	0xa2,	0xa3,	0xa4,	0xa5,	0xa6,	0xa7, \
211 	0xa8,	0xa9,	0xaa,	0xab,	0xac,	0xad,	0xae,	0xaf, \
212 	0xb0,	0xb1,	0xb2,	0xb3,	0xb4,	0xb5,	0xb6,	0xb7, \
213 	0xb8,	0xb9,	0xba,	0xbb,	0xbc,	0xbd,	0xbe,	0xbf, \
214 	0xc0,	0xc1,	0xc2,	0xc3,	0xc4,	0xc5,	0xc6,	0xc7, \
215 	0xc8,	0xc9,	0xca,	0xcb,	0xcc,	0xcd,	0xce,	0xcf, \
216 	0xd0,	0xd1,	0xd2,	0xd3,	0xd4,	0xd5,	0xd6,	0xd7, \
217 	0xd8,	0xd9,	0xda,	0xdb,	0xdc,	0xdd,	0xde,	0xdf, \
218 	0xe0,	0xe1,	0xe2,	0xe3,	0xe4,	0xe5,	0xe6,	0xe7, \
219 	0xe8,	0xe9,	0xea,	0xeb,	0xec,	0xed,	0xee,	0xef, \
220 	0xf0,	0xf1,	0xf2,	0xf3,	0xf4,	0xf5,	0xf6,	0xf7, \
221 	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff
222 
223 #define	_DEFMAPUPPER \
224 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07, \
225 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f, \
226 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17, \
227 	0x18,	0x19,	0x1a,	0x1b,	0x1c,	0x1d,	0x1e,	0x1f, \
228 	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	0x27, \
229 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f, \
230 	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37, \
231 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f, \
232 	0x40,	'A',	'B',	'C',	'D',	'E',	'F',	'G', \
233 	'H',	'I',	'J',	'K',	'L',	'M',	'N',	'O', \
234 	'P',	'Q',	'R',	'S',	'T',	'U',	'V',	'W', \
235 	'X',	'Y',	'Z',	0x5b,	0x5c,	0x5d,	0x5e,	0x5f, \
236 	0x60,	'A',	'B',	'C',	'D',	'E',	'F',	'G', \
237 	'H',	'I',	'J',	'K',	'L',	'M',	'N',	'O', \
238 	'P',	'Q',	'R',	'S',	'T',	'U',	'V',	'W', \
239 	'X',	'Y',	'Z',	0x7b,	0x7c,	0x7d,	0x7e,	0x7f, \
240 	0x80,	0x81,	0x82,	0x83,	0x84,	0x85,	0x86,	0x87, \
241 	0x88,	0x89,	0x8a,	0x8b,	0x8c,	0x8d,	0x8e,	0x8f, \
242 	0x90,	0x91,	0x92,	0x93,	0x94,	0x95,	0x96,	0x97, \
243 	0x98,	0x99,	0x9a,	0x9b,	0x9c,	0x9d,	0x9e,	0x9f, \
244 	0xa0,	0xa1,	0xa2,	0xa3,	0xa4,	0xa5,	0xa6,	0xa7, \
245 	0xa8,	0xa9,	0xaa,	0xab,	0xac,	0xad,	0xae,	0xaf, \
246 	0xb0,	0xb1,	0xb2,	0xb3,	0xb4,	0xb5,	0xb6,	0xb7, \
247 	0xb8,	0xb9,	0xba,	0xbb,	0xbc,	0xbd,	0xbe,	0xbf, \
248 	0xc0,	0xc1,	0xc2,	0xc3,	0xc4,	0xc5,	0xc6,	0xc7, \
249 	0xc8,	0xc9,	0xca,	0xcb,	0xcc,	0xcd,	0xce,	0xcf, \
250 	0xd0,	0xd1,	0xd2,	0xd3,	0xd4,	0xd5,	0xd6,	0xd7, \
251 	0xd8,	0xd9,	0xda,	0xdb,	0xdc,	0xdd,	0xde,	0xdf, \
252 	0xe0,	0xe1,	0xe2,	0xe3,	0xe4,	0xe5,	0xe6,	0xe7, \
253 	0xe8,	0xe9,	0xea,	0xeb,	0xec,	0xed,	0xee,	0xef, \
254 	0xf0,	0xf1,	0xf2,	0xf3,	0xf4,	0xf5,	0xf6,	0xf7, \
255 	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff,
256 
257 _RuneLocale _DefaultRuneLocale = {
258 	_RUNE_MAGIC_1,
259 	"NONE",
260 	{ _DEFRUNETYPE },
261 	{ _DEFMAPLOWER },
262 	{ _DEFMAPUPPER },
263 };
264 
265 /*
266  * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and
267  * have to stay pointers for binary compatibility, so we provide separate
268  * storage for them, initialized to "C" locale contents by default.  Note that
269  * legacy code may dereference __ctype_mask[-1] when checking against EOF,
270  * relying on that value to be 0.  To allow this, ___ctype_mask is expanded by
271  * one value and prepended with a leading 0, with __ctype_mask being set to
272  * point to ___ctype_mask[1].  (__trans_lower and __trans_upper do not suffer
273  * from this as EOF access was prevented in legacy code by a check against
274  * isascii(), which always returned 0 for EOF.)
275  */
276 static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE };
277 unsigned int *__ctype_mask = &___ctype_mask[1];
278 
279 static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER };
280 int *__trans_lower = ___trans_lower;
281 
282 static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER };
283 int *__trans_upper = ___trans_upper;
284