xref: /freebsd/usr.bin/localedef/ctype.c (revision 770fba248daff6cc72ce0bfbd1f2a941c90dc99a)
1e12a957fSPedro F. Giffuni /*-
24644f9beSYuri Pankov  * Copyright 2018 Nexenta Systems, Inc.
3057ca2d4SBaptiste Daroussin  * Copyright 2012 Garrett D'Amore <garrett@damore.org>  All rights reserved.
4057ca2d4SBaptiste Daroussin  * Copyright 2015 John Marino <draco@marino.st>
5057ca2d4SBaptiste Daroussin  *
6057ca2d4SBaptiste Daroussin  * This source code is derived from the illumos localedef command, and
7057ca2d4SBaptiste Daroussin  * provided under BSD-style license terms by Nexenta Systems, Inc.
8057ca2d4SBaptiste Daroussin  *
9057ca2d4SBaptiste Daroussin  * Redistribution and use in source and binary forms, with or without
10057ca2d4SBaptiste Daroussin  * modification, are permitted provided that the following conditions
11057ca2d4SBaptiste Daroussin  * are met:
12057ca2d4SBaptiste Daroussin  *
13057ca2d4SBaptiste Daroussin  * 1. Redistributions of source code must retain the above copyright
14057ca2d4SBaptiste Daroussin  *    notice, this list of conditions and the following disclaimer.
15057ca2d4SBaptiste Daroussin  * 2. Redistributions in binary form must reproduce the above copyright
16057ca2d4SBaptiste Daroussin  *    notice, this list of conditions and the following disclaimer in the
17057ca2d4SBaptiste Daroussin  *    documentation and/or other materials provided with the distribution.
18057ca2d4SBaptiste Daroussin  *
19057ca2d4SBaptiste Daroussin  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20057ca2d4SBaptiste Daroussin  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21057ca2d4SBaptiste Daroussin  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22057ca2d4SBaptiste Daroussin  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23057ca2d4SBaptiste Daroussin  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24057ca2d4SBaptiste Daroussin  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25057ca2d4SBaptiste Daroussin  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26057ca2d4SBaptiste Daroussin  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27057ca2d4SBaptiste Daroussin  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28057ca2d4SBaptiste Daroussin  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29057ca2d4SBaptiste Daroussin  * POSSIBILITY OF SUCH DAMAGE.
30057ca2d4SBaptiste Daroussin  */
31057ca2d4SBaptiste Daroussin 
32057ca2d4SBaptiste Daroussin /*
33057ca2d4SBaptiste Daroussin  * LC_CTYPE database generation routines for localedef.
34057ca2d4SBaptiste Daroussin  */
35057ca2d4SBaptiste Daroussin #include <sys/cdefs.h>
366131dc6aSBaptiste Daroussin #include <sys/tree.h>
37057ca2d4SBaptiste Daroussin 
38057ca2d4SBaptiste Daroussin #include <stdio.h>
39057ca2d4SBaptiste Daroussin #include <stdlib.h>
40057ca2d4SBaptiste Daroussin #include <stddef.h>
41057ca2d4SBaptiste Daroussin #include <string.h>
42057ca2d4SBaptiste Daroussin #include <sys/types.h>
43057ca2d4SBaptiste Daroussin #include <wchar.h>
44057ca2d4SBaptiste Daroussin #include <unistd.h>
45057ca2d4SBaptiste Daroussin #include "localedef.h"
46057ca2d4SBaptiste Daroussin #include "parser.h"
4700c61a3bSAlex Richardson 
4800c61a3bSAlex Richardson /* Always include the defines for the target: */
4900c61a3bSAlex Richardson #define _DONT_USE_CTYPE_INLINE_ /* Avoid dependencies on runetype.h */
5000c61a3bSAlex Richardson #include "_ctype.h"
51057ca2d4SBaptiste Daroussin #include "runefile.h"
52057ca2d4SBaptiste Daroussin 
53057ca2d4SBaptiste Daroussin 
54f5dde016SBaptiste Daroussin /* Needed for bootstrapping, _CTYPE_N */
55f5dde016SBaptiste Daroussin #ifndef _CTYPE_N
56f5dde016SBaptiste Daroussin #define _CTYPE_N       0x00400000L
57f5dde016SBaptiste Daroussin #endif
58f5dde016SBaptiste Daroussin 
59057ca2d4SBaptiste Daroussin #define _ISUPPER	_CTYPE_U
60057ca2d4SBaptiste Daroussin #define _ISLOWER	_CTYPE_L
61057ca2d4SBaptiste Daroussin #define	_ISDIGIT	_CTYPE_D
62057ca2d4SBaptiste Daroussin #define	_ISXDIGIT	_CTYPE_X
63057ca2d4SBaptiste Daroussin #define	_ISSPACE	_CTYPE_S
64057ca2d4SBaptiste Daroussin #define	_ISBLANK	_CTYPE_B
65057ca2d4SBaptiste Daroussin #define	_ISALPHA	_CTYPE_A
66057ca2d4SBaptiste Daroussin #define	_ISPUNCT	_CTYPE_P
67057ca2d4SBaptiste Daroussin #define	_ISGRAPH	_CTYPE_G
68057ca2d4SBaptiste Daroussin #define	_ISPRINT	_CTYPE_R
69057ca2d4SBaptiste Daroussin #define	_ISCNTRL	_CTYPE_C
70057ca2d4SBaptiste Daroussin #define	_E1		_CTYPE_Q
71057ca2d4SBaptiste Daroussin #define	_E2		_CTYPE_I
72057ca2d4SBaptiste Daroussin #define	_E3		0
73f5dde016SBaptiste Daroussin #define	_E4		_CTYPE_N
74057ca2d4SBaptiste Daroussin #define	_E5		_CTYPE_T
75057ca2d4SBaptiste Daroussin 
76057ca2d4SBaptiste Daroussin static wchar_t		last_ctype;
776131dc6aSBaptiste Daroussin static int ctype_compare(const void *n1, const void *n2);
78057ca2d4SBaptiste Daroussin 
79057ca2d4SBaptiste Daroussin typedef struct ctype_node {
80057ca2d4SBaptiste Daroussin 	wchar_t wc;
81057ca2d4SBaptiste Daroussin 	int32_t	ctype;
82057ca2d4SBaptiste Daroussin 	int32_t	toupper;
83057ca2d4SBaptiste Daroussin 	int32_t	tolower;
846131dc6aSBaptiste Daroussin 	RB_ENTRY(ctype_node) entry;
85057ca2d4SBaptiste Daroussin } ctype_node_t;
86057ca2d4SBaptiste Daroussin 
873a444dbdSBaptiste Daroussin static RB_HEAD(ctypes, ctype_node) ctypes;
88e30c75b1SBaptiste Daroussin RB_GENERATE_STATIC(ctypes, ctype_node, entry, ctype_compare);
89057ca2d4SBaptiste Daroussin 
90057ca2d4SBaptiste Daroussin static int
ctype_compare(const void * n1,const void * n2)91057ca2d4SBaptiste Daroussin ctype_compare(const void *n1, const void *n2)
92057ca2d4SBaptiste Daroussin {
93057ca2d4SBaptiste Daroussin 	const ctype_node_t *c1 = n1;
94057ca2d4SBaptiste Daroussin 	const ctype_node_t *c2 = n2;
95057ca2d4SBaptiste Daroussin 
96*770fba24SMark Johnston 	return (wchar_cmp(c1->wc, c2->wc));
97057ca2d4SBaptiste Daroussin }
98057ca2d4SBaptiste Daroussin 
99057ca2d4SBaptiste Daroussin void
init_ctype(void)100057ca2d4SBaptiste Daroussin init_ctype(void)
101057ca2d4SBaptiste Daroussin {
1026131dc6aSBaptiste Daroussin 	RB_INIT(&ctypes);
103057ca2d4SBaptiste Daroussin }
104057ca2d4SBaptiste Daroussin 
105057ca2d4SBaptiste Daroussin 
106057ca2d4SBaptiste Daroussin static void
add_ctype_impl(ctype_node_t * ctn)107057ca2d4SBaptiste Daroussin add_ctype_impl(ctype_node_t *ctn)
108057ca2d4SBaptiste Daroussin {
109057ca2d4SBaptiste Daroussin 	switch (last_kw) {
110057ca2d4SBaptiste Daroussin 	case T_ISUPPER:
111057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
112057ca2d4SBaptiste Daroussin 		break;
113057ca2d4SBaptiste Daroussin 	case T_ISLOWER:
114057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
115057ca2d4SBaptiste Daroussin 		break;
116057ca2d4SBaptiste Daroussin 	case T_ISALPHA:
117057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
118057ca2d4SBaptiste Daroussin 		break;
119057ca2d4SBaptiste Daroussin 	case T_ISDIGIT:
120227d35daSBaptiste Daroussin 		ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4);
121057ca2d4SBaptiste Daroussin 		break;
122057ca2d4SBaptiste Daroussin 	case T_ISSPACE:
1232d1cfed1SYuri Pankov 		/*
1242d1cfed1SYuri Pankov 		 * This can be troublesome as <form-feed>, <newline>,
1252d1cfed1SYuri Pankov 		 * <carriage-return>, <tab>, and <vertical-tab> are defined both
1262d1cfed1SYuri Pankov 		 * as space and cntrl, and POSIX doesn't allow cntrl/print
1272d1cfed1SYuri Pankov 		 * combination.  We will take care of this in dump_ctype().
1282d1cfed1SYuri Pankov 		 */
1292d1cfed1SYuri Pankov 		ctn->ctype |= (_ISSPACE | _ISPRINT);
130057ca2d4SBaptiste Daroussin 		break;
131057ca2d4SBaptiste Daroussin 	case T_ISCNTRL:
132057ca2d4SBaptiste Daroussin 		ctn->ctype |= _ISCNTRL;
133057ca2d4SBaptiste Daroussin 		break;
134057ca2d4SBaptiste Daroussin 	case T_ISGRAPH:
135057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISGRAPH | _ISPRINT);
136057ca2d4SBaptiste Daroussin 		break;
137057ca2d4SBaptiste Daroussin 	case T_ISPRINT:
138057ca2d4SBaptiste Daroussin 		ctn->ctype |= _ISPRINT;
139057ca2d4SBaptiste Daroussin 		break;
140057ca2d4SBaptiste Daroussin 	case T_ISPUNCT:
141057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
142057ca2d4SBaptiste Daroussin 		break;
143057ca2d4SBaptiste Daroussin 	case T_ISXDIGIT:
14471e8badeSBaptiste Daroussin 		ctn->ctype |= (_ISXDIGIT | _ISPRINT);
145057ca2d4SBaptiste Daroussin 		break;
146057ca2d4SBaptiste Daroussin 	case T_ISBLANK:
147057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_ISBLANK | _ISSPACE);
148057ca2d4SBaptiste Daroussin 		break;
149057ca2d4SBaptiste Daroussin 	case T_ISPHONOGRAM:
150057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
151057ca2d4SBaptiste Daroussin 		break;
152057ca2d4SBaptiste Daroussin 	case T_ISIDEOGRAM:
153057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
154057ca2d4SBaptiste Daroussin 		break;
155057ca2d4SBaptiste Daroussin 	case T_ISENGLISH:
156057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
157057ca2d4SBaptiste Daroussin 		break;
158057ca2d4SBaptiste Daroussin 	case T_ISNUMBER:
159057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
160057ca2d4SBaptiste Daroussin 		break;
161057ca2d4SBaptiste Daroussin 	case T_ISSPECIAL:
162057ca2d4SBaptiste Daroussin 		ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
163057ca2d4SBaptiste Daroussin 		break;
164057ca2d4SBaptiste Daroussin 	case T_ISALNUM:
165057ca2d4SBaptiste Daroussin 		/*
166057ca2d4SBaptiste Daroussin 		 * We can't do anything with this.  The character
167057ca2d4SBaptiste Daroussin 		 * should already be specified as a digit or alpha.
168057ca2d4SBaptiste Daroussin 		 */
169057ca2d4SBaptiste Daroussin 		break;
170057ca2d4SBaptiste Daroussin 	default:
171057ca2d4SBaptiste Daroussin 		errf("not a valid character class");
172057ca2d4SBaptiste Daroussin 	}
173057ca2d4SBaptiste Daroussin }
174057ca2d4SBaptiste Daroussin 
175057ca2d4SBaptiste Daroussin static ctype_node_t *
get_ctype(wchar_t wc)176057ca2d4SBaptiste Daroussin get_ctype(wchar_t wc)
177057ca2d4SBaptiste Daroussin {
178057ca2d4SBaptiste Daroussin 	ctype_node_t	srch;
179057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn;
180057ca2d4SBaptiste Daroussin 
181057ca2d4SBaptiste Daroussin 	srch.wc = wc;
1826131dc6aSBaptiste Daroussin 	if ((ctn = RB_FIND(ctypes, &ctypes, &srch)) == NULL) {
183057ca2d4SBaptiste Daroussin 		if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
184057ca2d4SBaptiste Daroussin 			errf("out of memory");
185057ca2d4SBaptiste Daroussin 			return (NULL);
186057ca2d4SBaptiste Daroussin 		}
187057ca2d4SBaptiste Daroussin 		ctn->wc = wc;
188057ca2d4SBaptiste Daroussin 
1896131dc6aSBaptiste Daroussin 		RB_INSERT(ctypes, &ctypes, ctn);
190057ca2d4SBaptiste Daroussin 	}
191057ca2d4SBaptiste Daroussin 	return (ctn);
192057ca2d4SBaptiste Daroussin }
193057ca2d4SBaptiste Daroussin 
194057ca2d4SBaptiste Daroussin void
add_ctype(int val)195057ca2d4SBaptiste Daroussin add_ctype(int val)
196057ca2d4SBaptiste Daroussin {
197057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn;
198057ca2d4SBaptiste Daroussin 
199057ca2d4SBaptiste Daroussin 	if ((ctn = get_ctype(val)) == NULL) {
200057ca2d4SBaptiste Daroussin 		INTERR;
201057ca2d4SBaptiste Daroussin 		return;
202057ca2d4SBaptiste Daroussin 	}
203057ca2d4SBaptiste Daroussin 	add_ctype_impl(ctn);
204057ca2d4SBaptiste Daroussin 	last_ctype = ctn->wc;
205057ca2d4SBaptiste Daroussin }
206057ca2d4SBaptiste Daroussin 
207057ca2d4SBaptiste Daroussin void
add_ctype_range(wchar_t end)2088c859b07SBaptiste Daroussin add_ctype_range(wchar_t end)
209057ca2d4SBaptiste Daroussin {
210057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn;
211057ca2d4SBaptiste Daroussin 	wchar_t		cur;
212057ca2d4SBaptiste Daroussin 
213057ca2d4SBaptiste Daroussin 	if (end < last_ctype) {
214057ca2d4SBaptiste Daroussin 		errf("malformed character range (%u ... %u))",
215057ca2d4SBaptiste Daroussin 		    last_ctype, end);
216057ca2d4SBaptiste Daroussin 		return;
217057ca2d4SBaptiste Daroussin 	}
218057ca2d4SBaptiste Daroussin 	for (cur = last_ctype + 1; cur <= end; cur++) {
219057ca2d4SBaptiste Daroussin 		if ((ctn = get_ctype(cur)) == NULL) {
220057ca2d4SBaptiste Daroussin 			INTERR;
221057ca2d4SBaptiste Daroussin 			return;
222057ca2d4SBaptiste Daroussin 		}
223057ca2d4SBaptiste Daroussin 		add_ctype_impl(ctn);
224057ca2d4SBaptiste Daroussin 	}
225057ca2d4SBaptiste Daroussin 	last_ctype = end;
226057ca2d4SBaptiste Daroussin 
227057ca2d4SBaptiste Daroussin }
228057ca2d4SBaptiste Daroussin 
229057ca2d4SBaptiste Daroussin /*
230057ca2d4SBaptiste Daroussin  * A word about widths: if the width mask is specified, then libc
231057ca2d4SBaptiste Daroussin  * unconditionally honors it.  Otherwise, it assumes printable
232057ca2d4SBaptiste Daroussin  * characters have width 1, and non-printable characters have width
233dae3a64fSEitan Adler  * -1 (except for NULL which is special with width 0).  Hence, we have
234057ca2d4SBaptiste Daroussin  * no need to inject defaults here -- the "default" unset value of 0
235057ca2d4SBaptiste Daroussin  * indicates that libc should use its own logic in wcwidth as described.
236057ca2d4SBaptiste Daroussin  */
237057ca2d4SBaptiste Daroussin void
add_width(int wc,int width)238057ca2d4SBaptiste Daroussin add_width(int wc, int width)
239057ca2d4SBaptiste Daroussin {
240057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn;
241057ca2d4SBaptiste Daroussin 
242057ca2d4SBaptiste Daroussin 	if ((ctn = get_ctype(wc)) == NULL) {
243057ca2d4SBaptiste Daroussin 		INTERR;
244057ca2d4SBaptiste Daroussin 		return;
245057ca2d4SBaptiste Daroussin 	}
246057ca2d4SBaptiste Daroussin 	ctn->ctype &= ~(_CTYPE_SWM);
247057ca2d4SBaptiste Daroussin 	switch (width) {
248057ca2d4SBaptiste Daroussin 	case 0:
249057ca2d4SBaptiste Daroussin 		ctn->ctype |= _CTYPE_SW0;
250057ca2d4SBaptiste Daroussin 		break;
251057ca2d4SBaptiste Daroussin 	case 1:
252057ca2d4SBaptiste Daroussin 		ctn->ctype |= _CTYPE_SW1;
253057ca2d4SBaptiste Daroussin 		break;
254057ca2d4SBaptiste Daroussin 	case 2:
255057ca2d4SBaptiste Daroussin 		ctn->ctype |= _CTYPE_SW2;
256057ca2d4SBaptiste Daroussin 		break;
257057ca2d4SBaptiste Daroussin 	case 3:
258057ca2d4SBaptiste Daroussin 		ctn->ctype |= _CTYPE_SW3;
259057ca2d4SBaptiste Daroussin 		break;
260057ca2d4SBaptiste Daroussin 	}
261057ca2d4SBaptiste Daroussin }
262057ca2d4SBaptiste Daroussin 
263057ca2d4SBaptiste Daroussin void
add_width_range(int start,int end,int width)264057ca2d4SBaptiste Daroussin add_width_range(int start, int end, int width)
265057ca2d4SBaptiste Daroussin {
266057ca2d4SBaptiste Daroussin 	for (; start <= end; start++) {
267057ca2d4SBaptiste Daroussin 		add_width(start, width);
268057ca2d4SBaptiste Daroussin 	}
269057ca2d4SBaptiste Daroussin }
270057ca2d4SBaptiste Daroussin 
271057ca2d4SBaptiste Daroussin void
add_caseconv(int val,int wc)272057ca2d4SBaptiste Daroussin add_caseconv(int val, int wc)
273057ca2d4SBaptiste Daroussin {
274057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn;
275057ca2d4SBaptiste Daroussin 
276057ca2d4SBaptiste Daroussin 	ctn = get_ctype(val);
277057ca2d4SBaptiste Daroussin 	if (ctn == NULL) {
278057ca2d4SBaptiste Daroussin 		INTERR;
279057ca2d4SBaptiste Daroussin 		return;
280057ca2d4SBaptiste Daroussin 	}
281057ca2d4SBaptiste Daroussin 
282057ca2d4SBaptiste Daroussin 	switch (last_kw) {
283057ca2d4SBaptiste Daroussin 	case T_TOUPPER:
284057ca2d4SBaptiste Daroussin 		ctn->toupper = wc;
285057ca2d4SBaptiste Daroussin 		break;
286057ca2d4SBaptiste Daroussin 	case T_TOLOWER:
287057ca2d4SBaptiste Daroussin 		ctn->tolower = wc;
288057ca2d4SBaptiste Daroussin 		break;
289057ca2d4SBaptiste Daroussin 	default:
290057ca2d4SBaptiste Daroussin 		INTERR;
291057ca2d4SBaptiste Daroussin 		break;
292057ca2d4SBaptiste Daroussin 	}
293057ca2d4SBaptiste Daroussin }
294057ca2d4SBaptiste Daroussin 
295057ca2d4SBaptiste Daroussin void
dump_ctype(void)296057ca2d4SBaptiste Daroussin dump_ctype(void)
297057ca2d4SBaptiste Daroussin {
298057ca2d4SBaptiste Daroussin 	FILE		*f;
299057ca2d4SBaptiste Daroussin 	_FileRuneLocale	rl;
300057ca2d4SBaptiste Daroussin 	ctype_node_t	*ctn, *last_ct, *last_lo, *last_up;
301057ca2d4SBaptiste Daroussin 	_FileRuneEntry	*ct = NULL;
302057ca2d4SBaptiste Daroussin 	_FileRuneEntry	*lo = NULL;
303057ca2d4SBaptiste Daroussin 	_FileRuneEntry	*up = NULL;
304057ca2d4SBaptiste Daroussin 	wchar_t		wc;
3054644f9beSYuri Pankov 	uint32_t	runetype_ext_nranges;
3064644f9beSYuri Pankov 	uint32_t	maplower_ext_nranges;
3074644f9beSYuri Pankov 	uint32_t	mapupper_ext_nranges;
308057ca2d4SBaptiste Daroussin 
309057ca2d4SBaptiste Daroussin 	(void) memset(&rl, 0, sizeof (rl));
3104644f9beSYuri Pankov 	runetype_ext_nranges = 0;
311057ca2d4SBaptiste Daroussin 	last_ct = NULL;
3124644f9beSYuri Pankov 	maplower_ext_nranges = 0;
313057ca2d4SBaptiste Daroussin 	last_lo = NULL;
3144644f9beSYuri Pankov 	mapupper_ext_nranges = 0;
315057ca2d4SBaptiste Daroussin 	last_up = NULL;
316057ca2d4SBaptiste Daroussin 
317057ca2d4SBaptiste Daroussin 	if ((f = open_category()) == NULL)
318057ca2d4SBaptiste Daroussin 		return;
319057ca2d4SBaptiste Daroussin 
320057ca2d4SBaptiste Daroussin 	(void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
3211bb0ddf9SPedro F. Giffuni 	(void) strlcpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
322057ca2d4SBaptiste Daroussin 
323057ca2d4SBaptiste Daroussin 	/*
324057ca2d4SBaptiste Daroussin 	 * Initialize the identity map.
325057ca2d4SBaptiste Daroussin 	 */
326057ca2d4SBaptiste Daroussin 	for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
3274644f9beSYuri Pankov 		rl.maplower[wc] = htote(wc);
3284644f9beSYuri Pankov 		rl.mapupper[wc] = htote(wc);
329057ca2d4SBaptiste Daroussin 	}
330057ca2d4SBaptiste Daroussin 
3316131dc6aSBaptiste Daroussin 	RB_FOREACH(ctn, ctypes, &ctypes) {
332057ca2d4SBaptiste Daroussin 		int conflict = 0;
333057ca2d4SBaptiste Daroussin 
334057ca2d4SBaptiste Daroussin 		wc = ctn->wc;
335057ca2d4SBaptiste Daroussin 
336057ca2d4SBaptiste Daroussin 		/*
337057ca2d4SBaptiste Daroussin 		 * POSIX requires certain portable characters have
338057ca2d4SBaptiste Daroussin 		 * certain types.  Add them if they are missing.
339057ca2d4SBaptiste Daroussin 		 */
340057ca2d4SBaptiste Daroussin 		if ((wc >= 1) && (wc <= 127)) {
341057ca2d4SBaptiste Daroussin 			if ((wc >= 'A') && (wc <= 'Z'))
342057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISUPPER;
343057ca2d4SBaptiste Daroussin 			if ((wc >= 'a') && (wc <= 'z'))
344057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISLOWER;
345057ca2d4SBaptiste Daroussin 			if ((wc >= '0') && (wc <= '9'))
346057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISDIGIT;
347046c3cdaSPedro F. Giffuni 			if (wc == ' ')
348046c3cdaSPedro F. Giffuni 				ctn->ctype |= _ISPRINT;
349057ca2d4SBaptiste Daroussin 			if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
350057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISSPACE;
351057ca2d4SBaptiste Daroussin 			if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
352057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISXDIGIT;
353057ca2d4SBaptiste Daroussin 			if (strchr(" \t", (char)wc))
354057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISBLANK;
355057ca2d4SBaptiste Daroussin 
356057ca2d4SBaptiste Daroussin 			/*
357057ca2d4SBaptiste Daroussin 			 * Technically these settings are only
358057ca2d4SBaptiste Daroussin 			 * required for the C locale.  However, it
359057ca2d4SBaptiste Daroussin 			 * turns out that because of the historical
360057ca2d4SBaptiste Daroussin 			 * version of isprint(), we need them for all
361057ca2d4SBaptiste Daroussin 			 * locales as well.  Note that these are not
362057ca2d4SBaptiste Daroussin 			 * necessarily valid punctation characters in
363057ca2d4SBaptiste Daroussin 			 * the current language, but ispunct() needs
364057ca2d4SBaptiste Daroussin 			 * to return TRUE for them.
365057ca2d4SBaptiste Daroussin 			 */
366057ca2d4SBaptiste Daroussin 			if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
367057ca2d4SBaptiste Daroussin 			    (char)wc))
368057ca2d4SBaptiste Daroussin 				ctn->ctype |= _ISPUNCT;
369057ca2d4SBaptiste Daroussin 		}
370057ca2d4SBaptiste Daroussin 
371057ca2d4SBaptiste Daroussin 		/*
372057ca2d4SBaptiste Daroussin 		 * POSIX also requires that certain types imply
373057ca2d4SBaptiste Daroussin 		 * others.  Add any inferred types here.
374057ca2d4SBaptiste Daroussin 		 */
375057ca2d4SBaptiste Daroussin 		if (ctn->ctype & (_ISUPPER |_ISLOWER))
376057ca2d4SBaptiste Daroussin 			ctn->ctype |= _ISALPHA;
377057ca2d4SBaptiste Daroussin 		if (ctn->ctype & _ISDIGIT)
378057ca2d4SBaptiste Daroussin 			ctn->ctype |= _ISXDIGIT;
379057ca2d4SBaptiste Daroussin 		if (ctn->ctype & _ISBLANK)
380057ca2d4SBaptiste Daroussin 			ctn->ctype |= _ISSPACE;
381057ca2d4SBaptiste Daroussin 		if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
382057ca2d4SBaptiste Daroussin 			ctn->ctype |= _ISGRAPH;
383057ca2d4SBaptiste Daroussin 		if (ctn->ctype & _ISGRAPH)
384057ca2d4SBaptiste Daroussin 			ctn->ctype |= _ISPRINT;
385057ca2d4SBaptiste Daroussin 
386057ca2d4SBaptiste Daroussin 		/*
3872d1cfed1SYuri Pankov 		 * POSIX requires that certain combinations are invalid.
3882d1cfed1SYuri Pankov 		 * Try fixing the cases we know about (see add_ctype_impl()).
3892d1cfed1SYuri Pankov 		 */
3902d1cfed1SYuri Pankov 		if ((ctn->ctype & (_ISSPACE|_ISCNTRL)) == (_ISSPACE|_ISCNTRL))
3912d1cfed1SYuri Pankov 			ctn->ctype &= ~_ISPRINT;
3922d1cfed1SYuri Pankov 
3932d1cfed1SYuri Pankov 		/*
3942d1cfed1SYuri Pankov 		 * Finally, don't flag remaining cases as a fatal error,
3952d1cfed1SYuri Pankov 		 * and just warn about them.
396057ca2d4SBaptiste Daroussin 		 */
397057ca2d4SBaptiste Daroussin 		if ((ctn->ctype & _ISALPHA) &&
398057ca2d4SBaptiste Daroussin 		    (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
399057ca2d4SBaptiste Daroussin 			conflict++;
4001bb0ddf9SPedro F. Giffuni 		if ((ctn->ctype & _ISPUNCT) &&
401057ca2d4SBaptiste Daroussin 		    (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
402057ca2d4SBaptiste Daroussin 			conflict++;
403057ca2d4SBaptiste Daroussin 		if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
404057ca2d4SBaptiste Daroussin 			conflict++;
40556b1edd6SPedro F. Giffuni 		if ((ctn->ctype & _ISCNTRL) && (ctn->ctype & _ISPRINT))
406057ca2d4SBaptiste Daroussin 			conflict++;
407057ca2d4SBaptiste Daroussin 		if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
408057ca2d4SBaptiste Daroussin 			conflict++;
409057ca2d4SBaptiste Daroussin 
410057ca2d4SBaptiste Daroussin 		if (conflict) {
411057ca2d4SBaptiste Daroussin 			warn("conflicting classes for character 0x%x (%x)",
412057ca2d4SBaptiste Daroussin 			    wc, ctn->ctype);
413057ca2d4SBaptiste Daroussin 		}
414057ca2d4SBaptiste Daroussin 		/*
415057ca2d4SBaptiste Daroussin 		 * Handle the lower 256 characters using the simple
416057ca2d4SBaptiste Daroussin 		 * optimization.  Note that if we have not defined the
417057ca2d4SBaptiste Daroussin 		 * upper/lower case, then we identity map it.
418057ca2d4SBaptiste Daroussin 		 */
419057ca2d4SBaptiste Daroussin 		if ((unsigned)wc < _CACHED_RUNES) {
4204644f9beSYuri Pankov 			rl.runetype[wc] = htote(ctn->ctype);
421057ca2d4SBaptiste Daroussin 			if (ctn->tolower)
4224644f9beSYuri Pankov 				rl.maplower[wc] = htote(ctn->tolower);
423057ca2d4SBaptiste Daroussin 			if (ctn->toupper)
4244644f9beSYuri Pankov 				rl.mapupper[wc] = htote(ctn->toupper);
425057ca2d4SBaptiste Daroussin 			continue;
426057ca2d4SBaptiste Daroussin 		}
427057ca2d4SBaptiste Daroussin 
428c7edf4fdSBaptiste Daroussin 		if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) &&
429c7edf4fdSBaptiste Daroussin 		    (last_ct->wc + 1 == wc)) {
4304644f9beSYuri Pankov 			ct[runetype_ext_nranges - 1].max = htote(wc);
431057ca2d4SBaptiste Daroussin 		} else {
4324644f9beSYuri Pankov 			runetype_ext_nranges++;
4334644f9beSYuri Pankov 			ct = realloc(ct, sizeof (*ct) * runetype_ext_nranges);
4344644f9beSYuri Pankov 			ct[runetype_ext_nranges - 1].min = htote(wc);
4354644f9beSYuri Pankov 			ct[runetype_ext_nranges - 1].max = htote(wc);
4364644f9beSYuri Pankov 			ct[runetype_ext_nranges - 1].map =
4374644f9beSYuri Pankov 			    htote(ctn->ctype);
438057ca2d4SBaptiste Daroussin 		}
439c7edf4fdSBaptiste Daroussin 		last_ct = ctn;
440057ca2d4SBaptiste Daroussin 		if (ctn->tolower == 0) {
441057ca2d4SBaptiste Daroussin 			last_lo = NULL;
442057ca2d4SBaptiste Daroussin 		} else if ((last_lo != NULL) &&
443057ca2d4SBaptiste Daroussin 		    (last_lo->tolower + 1 == ctn->tolower)) {
4444644f9beSYuri Pankov 			lo[maplower_ext_nranges - 1].max = htote(wc);
445057ca2d4SBaptiste Daroussin 			last_lo = ctn;
446057ca2d4SBaptiste Daroussin 		} else {
4474644f9beSYuri Pankov 			maplower_ext_nranges++;
4484644f9beSYuri Pankov 			lo = realloc(lo, sizeof (*lo) * maplower_ext_nranges);
4494644f9beSYuri Pankov 			lo[maplower_ext_nranges - 1].min = htote(wc);
4504644f9beSYuri Pankov 			lo[maplower_ext_nranges - 1].max = htote(wc);
4514644f9beSYuri Pankov 			lo[maplower_ext_nranges - 1].map =
4524644f9beSYuri Pankov 			    htote(ctn->tolower);
453057ca2d4SBaptiste Daroussin 			last_lo = ctn;
454057ca2d4SBaptiste Daroussin 		}
455057ca2d4SBaptiste Daroussin 
456057ca2d4SBaptiste Daroussin 		if (ctn->toupper == 0) {
457057ca2d4SBaptiste Daroussin 			last_up = NULL;
458057ca2d4SBaptiste Daroussin 		} else if ((last_up != NULL) &&
459057ca2d4SBaptiste Daroussin 		    (last_up->toupper + 1 == ctn->toupper)) {
4604644f9beSYuri Pankov 			up[mapupper_ext_nranges-1].max = htote(wc);
461057ca2d4SBaptiste Daroussin 			last_up = ctn;
462057ca2d4SBaptiste Daroussin 		} else {
4634644f9beSYuri Pankov 			mapupper_ext_nranges++;
4644644f9beSYuri Pankov 			up = realloc(up, sizeof (*up) * mapupper_ext_nranges);
4654644f9beSYuri Pankov 			up[mapupper_ext_nranges - 1].min = htote(wc);
4664644f9beSYuri Pankov 			up[mapupper_ext_nranges - 1].max = htote(wc);
4674644f9beSYuri Pankov 			up[mapupper_ext_nranges - 1].map =
4684644f9beSYuri Pankov 			    htote(ctn->toupper);
469057ca2d4SBaptiste Daroussin 			last_up = ctn;
470057ca2d4SBaptiste Daroussin 		}
471057ca2d4SBaptiste Daroussin 	}
472057ca2d4SBaptiste Daroussin 
4734644f9beSYuri Pankov 	rl.runetype_ext_nranges = htote(runetype_ext_nranges);
4744644f9beSYuri Pankov 	rl.maplower_ext_nranges = htote(maplower_ext_nranges);
4754644f9beSYuri Pankov 	rl.mapupper_ext_nranges = htote(mapupper_ext_nranges);
476c48c87b7SPedro F. Giffuni 	if ((wr_category(&rl, sizeof (rl), f) < 0) ||
4774644f9beSYuri Pankov 	    (wr_category(ct, sizeof (*ct) * runetype_ext_nranges, f) < 0) ||
4784644f9beSYuri Pankov 	    (wr_category(lo, sizeof (*lo) * maplower_ext_nranges, f) < 0) ||
4794644f9beSYuri Pankov 	    (wr_category(up, sizeof (*up) * mapupper_ext_nranges, f) < 0)) {
480c48c87b7SPedro F. Giffuni 		return;
481057ca2d4SBaptiste Daroussin 	}
482057ca2d4SBaptiste Daroussin 
483c48c87b7SPedro F. Giffuni 	close_category(f);
484c48c87b7SPedro F. Giffuni }
485