xref: /freebsd/lib/libc/locale/collate.c (revision 2a6abeebef961038d455abfcfcda14c16aec5b52)
1c3d0cca4SAndrey A. Chernov /*-
2*2a6abeebSBaptiste Daroussin  * Copright 2010 Nexenta Systems, Inc.  All rights reserved.
3c3d0cca4SAndrey A. Chernov  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
4c3d0cca4SAndrey A. Chernov  *		at Electronni Visti IA, Kiev, Ukraine.
5c3d0cca4SAndrey A. Chernov  *			All rights reserved.
6c3d0cca4SAndrey A. Chernov  *
73c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
83c87aa1dSDavid Chisnall  * All rights reserved.
93c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
103c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
113c87aa1dSDavid Chisnall  *
12c3d0cca4SAndrey A. Chernov  * Redistribution and use in source and binary forms, with or without
13c3d0cca4SAndrey A. Chernov  * modification, are permitted provided that the following conditions
14c3d0cca4SAndrey A. Chernov  * are met:
15c3d0cca4SAndrey A. Chernov  * 1. Redistributions of source code must retain the above copyright
16c3d0cca4SAndrey A. Chernov  *    notice, this list of conditions and the following disclaimer.
17c3d0cca4SAndrey A. Chernov  * 2. Redistributions in binary form must reproduce the above copyright
18c3d0cca4SAndrey A. Chernov  *    notice, this list of conditions and the following disclaimer in the
19c3d0cca4SAndrey A. Chernov  *    documentation and/or other materials provided with the distribution.
20c3d0cca4SAndrey A. Chernov  *
21c3d0cca4SAndrey A. Chernov  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
22c3d0cca4SAndrey A. Chernov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23c3d0cca4SAndrey A. Chernov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24c3d0cca4SAndrey A. Chernov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
25c3d0cca4SAndrey A. Chernov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26c3d0cca4SAndrey A. Chernov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27c3d0cca4SAndrey A. Chernov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28c3d0cca4SAndrey A. Chernov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29c3d0cca4SAndrey A. Chernov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30c3d0cca4SAndrey A. Chernov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31c3d0cca4SAndrey A. Chernov  * SUCH DAMAGE.
32*2a6abeebSBaptiste Daroussin  *
33*2a6abeebSBaptiste Daroussin  * Adapted to xlocale by John Marino <draco@marino.st>
34c3d0cca4SAndrey A. Chernov  */
35c3d0cca4SAndrey A. Chernov 
36333fc21eSDavid E. O'Brien #include <sys/cdefs.h>
37333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$");
38333fc21eSDavid E. O'Brien 
39d201fe46SDaniel Eischen #include "namespace.h"
40c3d0cca4SAndrey A. Chernov #include <stdio.h>
41c3d0cca4SAndrey A. Chernov #include <stdlib.h>
42c3d0cca4SAndrey A. Chernov #include <string.h>
43*2a6abeebSBaptiste Daroussin #include <wchar.h>
44926f20c9SAndrey A. Chernov #include <errno.h>
45926f20c9SAndrey A. Chernov #include <unistd.h>
46*2a6abeebSBaptiste Daroussin #include <fcntl.h>
47*2a6abeebSBaptiste Daroussin #include <sys/types.h>
48*2a6abeebSBaptiste Daroussin #include <sys/stat.h>
49*2a6abeebSBaptiste Daroussin #include <sys/mman.h>
50d201fe46SDaniel Eischen #include "un-namespace.h"
51d201fe46SDaniel Eischen 
52c3d0cca4SAndrey A. Chernov #include "collate.h"
5363407d34SAndrey A. Chernov #include "setlocale.h"
5476692b80SAndrey A. Chernov #include "ldpart.h"
55c3d0cca4SAndrey A. Chernov 
563c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_global_collate = {
57*2a6abeebSBaptiste Daroussin 	{{0}, "C"}, 1, 0, 0, 0
583c87aa1dSDavid Chisnall };
593c87aa1dSDavid Chisnall 
603c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_C_collate = {
61*2a6abeebSBaptiste Daroussin 	{{0}, "C"}, 1, 0, 0, 0
623c87aa1dSDavid Chisnall };
63c3d0cca4SAndrey A. Chernov 
64*2a6abeebSBaptiste Daroussin #include "libc_private.h"
65926f20c9SAndrey A. Chernov 
66c3d0cca4SAndrey A. Chernov int
673c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
683c87aa1dSDavid Chisnall 
693c87aa1dSDavid Chisnall static void
703c87aa1dSDavid Chisnall destruct_collate(void *t)
713c87aa1dSDavid Chisnall {
723c87aa1dSDavid Chisnall 	struct xlocale_collate *table = t;
73*2a6abeebSBaptiste Daroussin 	if (table->map && (table->maplen > 0)) {
74*2a6abeebSBaptiste Daroussin 		(void) munmap(table->map, table->maplen);
753c87aa1dSDavid Chisnall 	}
763c87aa1dSDavid Chisnall 	free(t);
773c87aa1dSDavid Chisnall }
783c87aa1dSDavid Chisnall 
793c87aa1dSDavid Chisnall void *
80*2a6abeebSBaptiste Daroussin __collate_load(const char *encoding, __unused locale_t unused)
813c87aa1dSDavid Chisnall {
823c87aa1dSDavid Chisnall 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
833c87aa1dSDavid Chisnall 		return &__xlocale_C_collate;
843c87aa1dSDavid Chisnall 	}
853c87aa1dSDavid Chisnall 	struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1);
863c87aa1dSDavid Chisnall 	table->header.header.destructor = destruct_collate;
873c87aa1dSDavid Chisnall 	// FIXME: Make sure that _LDP_CACHE is never returned.  We should be doing
883c87aa1dSDavid Chisnall 	// the caching outside of this section
893c87aa1dSDavid Chisnall 	if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) {
903c87aa1dSDavid Chisnall 		xlocale_release(table);
913c87aa1dSDavid Chisnall 		return NULL;
923c87aa1dSDavid Chisnall 	}
933c87aa1dSDavid Chisnall 	return table;
943c87aa1dSDavid Chisnall }
953c87aa1dSDavid Chisnall 
963c87aa1dSDavid Chisnall /**
973c87aa1dSDavid Chisnall  * Load the collation tables for the specified encoding into the global table.
983c87aa1dSDavid Chisnall  */
993c87aa1dSDavid Chisnall int
10076692b80SAndrey A. Chernov __collate_load_tables(const char *encoding)
101c3d0cca4SAndrey A. Chernov {
102bb4317bfSDavid Chisnall 	int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate);
103bb4317bfSDavid Chisnall 	return ret;
1043c87aa1dSDavid Chisnall }
1053c87aa1dSDavid Chisnall 
1063c87aa1dSDavid Chisnall int
1073c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
1083c87aa1dSDavid Chisnall {
109*2a6abeebSBaptiste Daroussin 	int i, chains, z;
110*2a6abeebSBaptiste Daroussin 	char buf[PATH_MAX];
111*2a6abeebSBaptiste Daroussin 	char *TMP;
112*2a6abeebSBaptiste Daroussin 	char *map;
113*2a6abeebSBaptiste Daroussin 	collate_info_t *info;
114*2a6abeebSBaptiste Daroussin 	struct stat sbuf;
115*2a6abeebSBaptiste Daroussin 	int fd;
116c3d0cca4SAndrey A. Chernov 
11776692b80SAndrey A. Chernov 	/* 'encoding' must be already checked. */
11876692b80SAndrey A. Chernov 	if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
119bb4317bfSDavid Chisnall 		table->__collate_load_error = 1;
12076692b80SAndrey A. Chernov 		return (_LDP_CACHE);
121377da8e8SAndrey A. Chernov 	}
12276692b80SAndrey A. Chernov 
123*2a6abeebSBaptiste Daroussin 	(void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE",
124*2a6abeebSBaptiste Daroussin 	    _PathLocale, encoding);
12576692b80SAndrey A. Chernov 
126*2a6abeebSBaptiste Daroussin 	if ((fd = _open(buf, O_RDONLY)) < 0)
127*2a6abeebSBaptiste Daroussin 		return (_LDP_ERROR);
128*2a6abeebSBaptiste Daroussin 	if (_fstat(fd, &sbuf) < 0) {
129*2a6abeebSBaptiste Daroussin 		(void) _close(fd);
1308e52da4dSAndrey A. Chernov 		return (_LDP_ERROR);
1318e52da4dSAndrey A. Chernov 	}
132*2a6abeebSBaptiste Daroussin 	if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
133*2a6abeebSBaptiste Daroussin 		(void) _close(fd);
134*2a6abeebSBaptiste Daroussin 		errno = EINVAL;
1358e52da4dSAndrey A. Chernov 		return (_LDP_ERROR);
1368e52da4dSAndrey A. Chernov 	}
137*2a6abeebSBaptiste Daroussin 	map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
138*2a6abeebSBaptiste Daroussin 	(void) _close(fd);
139*2a6abeebSBaptiste Daroussin 	if ((TMP = map) == NULL) {
1408e52da4dSAndrey A. Chernov 		return (_LDP_ERROR);
1418e52da4dSAndrey A. Chernov 	}
142*2a6abeebSBaptiste Daroussin 
143*2a6abeebSBaptiste Daroussin 	if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
144*2a6abeebSBaptiste Daroussin 		(void) munmap(map, sbuf.st_size);
145*2a6abeebSBaptiste Daroussin 		errno = EINVAL;
1468e52da4dSAndrey A. Chernov 		return (_LDP_ERROR);
1478e52da4dSAndrey A. Chernov 	}
148*2a6abeebSBaptiste Daroussin 	TMP += COLLATE_STR_LEN;
149*2a6abeebSBaptiste Daroussin 
150*2a6abeebSBaptiste Daroussin 	info = (void *)TMP;
151*2a6abeebSBaptiste Daroussin 	TMP += sizeof (*info);
152*2a6abeebSBaptiste Daroussin 
153*2a6abeebSBaptiste Daroussin 	if ((info->directive_count < 1) ||
154*2a6abeebSBaptiste Daroussin 	    (info->directive_count >= COLL_WEIGHTS_MAX) ||
155*2a6abeebSBaptiste Daroussin 	    ((chains = info->chain_count) < 0)) {
156*2a6abeebSBaptiste Daroussin 		(void) munmap(map, sbuf.st_size);
157*2a6abeebSBaptiste Daroussin 		errno = EINVAL;
158*2a6abeebSBaptiste Daroussin 		return (_LDP_ERROR);
159*2a6abeebSBaptiste Daroussin 	}
160*2a6abeebSBaptiste Daroussin 
161*2a6abeebSBaptiste Daroussin 	i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) +
162*2a6abeebSBaptiste Daroussin 	    (sizeof (collate_chain_t) * chains) +
163*2a6abeebSBaptiste Daroussin 	    (sizeof (collate_large_t) * info->large_count);
164*2a6abeebSBaptiste Daroussin 	for (z = 0; z < (info->directive_count); z++) {
165*2a6abeebSBaptiste Daroussin 		i += sizeof (collate_subst_t) * info->subst_count[z];
166*2a6abeebSBaptiste Daroussin 	}
167*2a6abeebSBaptiste Daroussin 	if (i != (sbuf.st_size - (TMP - map))) {
168*2a6abeebSBaptiste Daroussin 		(void) munmap(map, sbuf.st_size);
169*2a6abeebSBaptiste Daroussin 		errno = EINVAL;
170*2a6abeebSBaptiste Daroussin 		return (_LDP_ERROR);
171*2a6abeebSBaptiste Daroussin 	}
172*2a6abeebSBaptiste Daroussin 
173*2a6abeebSBaptiste Daroussin 	table->char_pri_table = (void *)TMP;
174*2a6abeebSBaptiste Daroussin 	TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1);
175*2a6abeebSBaptiste Daroussin 
176*2a6abeebSBaptiste Daroussin 	for (z = 0; z < info->directive_count; z++) {
177*2a6abeebSBaptiste Daroussin 		if (info->subst_count[z] > 0) {
178*2a6abeebSBaptiste Daroussin 			table->subst_table[z] = (void *)TMP;
179*2a6abeebSBaptiste Daroussin 			TMP += info->subst_count[z] * sizeof (collate_subst_t);
180*2a6abeebSBaptiste Daroussin 		} else {
181*2a6abeebSBaptiste Daroussin 			table->subst_table[z] = NULL;
182*2a6abeebSBaptiste Daroussin 		}
183*2a6abeebSBaptiste Daroussin 	}
184*2a6abeebSBaptiste Daroussin 
185*2a6abeebSBaptiste Daroussin 	if (chains > 0) {
186*2a6abeebSBaptiste Daroussin 		table->chain_pri_table = (void *)TMP;
187*2a6abeebSBaptiste Daroussin 		TMP += chains * sizeof (collate_chain_t);
1888e52da4dSAndrey A. Chernov 	} else
189*2a6abeebSBaptiste Daroussin 		table->chain_pri_table = NULL;
190*2a6abeebSBaptiste Daroussin 	if (info->large_count > 0)
191*2a6abeebSBaptiste Daroussin 		table->large_pri_table = (void *)TMP;
192*2a6abeebSBaptiste Daroussin 	else
193*2a6abeebSBaptiste Daroussin 		table->large_pri_table = NULL;
1948e52da4dSAndrey A. Chernov 
195*2a6abeebSBaptiste Daroussin 	table->info = info;
196bb4317bfSDavid Chisnall 	table->__collate_load_error = 0;
197e755fb76SDmitrij Tejblum 
19876692b80SAndrey A. Chernov 	return (_LDP_LOADED);
199c3d0cca4SAndrey A. Chernov }
200c3d0cca4SAndrey A. Chernov 
201*2a6abeebSBaptiste Daroussin /*
202*2a6abeebSBaptiste Daroussin  * Note: for performance reasons, we have expanded bsearch here.  This avoids
203*2a6abeebSBaptiste Daroussin  * function call overhead with each comparison.
204*2a6abeebSBaptiste Daroussin  */
205c3d0cca4SAndrey A. Chernov 
206*2a6abeebSBaptiste Daroussin static int32_t *
207*2a6abeebSBaptiste Daroussin substsearch(struct xlocale_collate *table, const wchar_t key, int pass)
208*2a6abeebSBaptiste Daroussin {
209*2a6abeebSBaptiste Daroussin 	collate_subst_t *p;
210*2a6abeebSBaptiste Daroussin 	int n = table->info->subst_count[pass];
211*2a6abeebSBaptiste Daroussin 
212*2a6abeebSBaptiste Daroussin 	if (n == 0)
213*2a6abeebSBaptiste Daroussin 		return (NULL);
214*2a6abeebSBaptiste Daroussin 
215*2a6abeebSBaptiste Daroussin 	if (pass >= table->info->directive_count)
216*2a6abeebSBaptiste Daroussin 		return (NULL);
217*2a6abeebSBaptiste Daroussin 
218*2a6abeebSBaptiste Daroussin 	if (!(key & COLLATE_SUBST_PRIORITY))
219*2a6abeebSBaptiste Daroussin 		return (NULL);
220*2a6abeebSBaptiste Daroussin 
221*2a6abeebSBaptiste Daroussin 	p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY);
222*2a6abeebSBaptiste Daroussin 	return (p->pri);
223c3d0cca4SAndrey A. Chernov }
224*2a6abeebSBaptiste Daroussin 
225*2a6abeebSBaptiste Daroussin static collate_chain_t *
226*2a6abeebSBaptiste Daroussin chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len)
227*2a6abeebSBaptiste Daroussin {
228*2a6abeebSBaptiste Daroussin 	int low;
229*2a6abeebSBaptiste Daroussin 	int high;
230*2a6abeebSBaptiste Daroussin 	int next, compar, l;
231*2a6abeebSBaptiste Daroussin 	collate_chain_t *p;
232*2a6abeebSBaptiste Daroussin 	collate_chain_t *tab;
233*2a6abeebSBaptiste Daroussin 
234*2a6abeebSBaptiste Daroussin 	if (table->info->chain_count == 0)
235*2a6abeebSBaptiste Daroussin 		return (NULL);
236*2a6abeebSBaptiste Daroussin 
237*2a6abeebSBaptiste Daroussin 	low = 0;
238*2a6abeebSBaptiste Daroussin 	high = table->info->chain_count - 1;
239*2a6abeebSBaptiste Daroussin 	tab = table->chain_pri_table;
240*2a6abeebSBaptiste Daroussin 
241*2a6abeebSBaptiste Daroussin 	while (low <= high) {
242*2a6abeebSBaptiste Daroussin 		next = (low + high) / 2;
243*2a6abeebSBaptiste Daroussin 		p = tab + next;
244*2a6abeebSBaptiste Daroussin 		compar = *key - *p->str;
245*2a6abeebSBaptiste Daroussin 		if (compar == 0) {
246*2a6abeebSBaptiste Daroussin 			l = wcsnlen(p->str, COLLATE_STR_LEN);
247*2a6abeebSBaptiste Daroussin 			compar = wcsncmp(key, p->str, l);
248*2a6abeebSBaptiste Daroussin 			if (compar == 0) {
249*2a6abeebSBaptiste Daroussin 				*len = l;
250*2a6abeebSBaptiste Daroussin 				return (p);
251c3d0cca4SAndrey A. Chernov 			}
252*2a6abeebSBaptiste Daroussin 		}
253*2a6abeebSBaptiste Daroussin 		if (compar > 0)
254*2a6abeebSBaptiste Daroussin 			low = next + 1;
255*2a6abeebSBaptiste Daroussin 		else
256*2a6abeebSBaptiste Daroussin 			high = next - 1;
257*2a6abeebSBaptiste Daroussin 	}
258*2a6abeebSBaptiste Daroussin 	return (NULL);
259*2a6abeebSBaptiste Daroussin }
260*2a6abeebSBaptiste Daroussin 
261*2a6abeebSBaptiste Daroussin static collate_large_t *
262*2a6abeebSBaptiste Daroussin largesearch(struct xlocale_collate *table, const wchar_t key)
263*2a6abeebSBaptiste Daroussin {
264*2a6abeebSBaptiste Daroussin 	int low = 0;
265*2a6abeebSBaptiste Daroussin 	int high = table->info->large_count - 1;
266*2a6abeebSBaptiste Daroussin 	int next, compar;
267*2a6abeebSBaptiste Daroussin 	collate_large_t *p;
268*2a6abeebSBaptiste Daroussin 	collate_large_t *tab = table->large_pri_table;
269*2a6abeebSBaptiste Daroussin 
270*2a6abeebSBaptiste Daroussin 	if (table->info->large_count == 0)
271*2a6abeebSBaptiste Daroussin 		return (NULL);
272*2a6abeebSBaptiste Daroussin 
273*2a6abeebSBaptiste Daroussin 	while (low <= high) {
274*2a6abeebSBaptiste Daroussin 		next = (low + high) / 2;
275*2a6abeebSBaptiste Daroussin 		p = tab + next;
276*2a6abeebSBaptiste Daroussin 		compar = key - p->val;
277*2a6abeebSBaptiste Daroussin 		if (compar == 0)
278*2a6abeebSBaptiste Daroussin 			return (p);
279*2a6abeebSBaptiste Daroussin 		if (compar > 0)
280*2a6abeebSBaptiste Daroussin 			low = next + 1;
281*2a6abeebSBaptiste Daroussin 		else
282*2a6abeebSBaptiste Daroussin 			high = next - 1;
283*2a6abeebSBaptiste Daroussin 	}
284*2a6abeebSBaptiste Daroussin 	return (NULL);
285c3d0cca4SAndrey A. Chernov }
286c3d0cca4SAndrey A. Chernov 
287c3d0cca4SAndrey A. Chernov void
288*2a6abeebSBaptiste Daroussin _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
289*2a6abeebSBaptiste Daroussin     int *pri, int which, const int **state)
290c3d0cca4SAndrey A. Chernov {
291*2a6abeebSBaptiste Daroussin 	collate_chain_t *p2;
292*2a6abeebSBaptiste Daroussin 	collate_large_t *match;
293*2a6abeebSBaptiste Daroussin 	int p, l;
294*2a6abeebSBaptiste Daroussin 	const int *sptr;
295c3d0cca4SAndrey A. Chernov 
296*2a6abeebSBaptiste Daroussin 	/*
297*2a6abeebSBaptiste Daroussin 	 * If this is the "last" pass for the UNDEFINED, then
298*2a6abeebSBaptiste Daroussin 	 * we just return the priority itself.
299*2a6abeebSBaptiste Daroussin 	 */
300*2a6abeebSBaptiste Daroussin 	if (which >= table->info->directive_count) {
301*2a6abeebSBaptiste Daroussin 		*pri = *t;
302c3d0cca4SAndrey A. Chernov 		*len = 1;
303*2a6abeebSBaptiste Daroussin 		*state = NULL;
304c3d0cca4SAndrey A. Chernov 		return;
305c3d0cca4SAndrey A. Chernov 	}
306*2a6abeebSBaptiste Daroussin 
307*2a6abeebSBaptiste Daroussin 	/*
308*2a6abeebSBaptiste Daroussin 	 * If we have remaining substitution data from a previous
309*2a6abeebSBaptiste Daroussin 	 * call, consume it first.
310*2a6abeebSBaptiste Daroussin 	 */
311*2a6abeebSBaptiste Daroussin 	if ((sptr = *state) != NULL) {
312*2a6abeebSBaptiste Daroussin 		*pri = *sptr;
313*2a6abeebSBaptiste Daroussin 		sptr++;
314*2a6abeebSBaptiste Daroussin 		*state = *sptr ? sptr : NULL;
315*2a6abeebSBaptiste Daroussin 		*len = 0;
316*2a6abeebSBaptiste Daroussin 		return;
317c3d0cca4SAndrey A. Chernov 	}
318c3d0cca4SAndrey A. Chernov 
319*2a6abeebSBaptiste Daroussin 	/* No active substitutions */
320*2a6abeebSBaptiste Daroussin 	*len = 1;
321*2a6abeebSBaptiste Daroussin 
322*2a6abeebSBaptiste Daroussin 	/*
323*2a6abeebSBaptiste Daroussin 	 * Check for composites such as dipthongs that collate as a
324*2a6abeebSBaptiste Daroussin 	 * single element (aka chains or collating-elements).
325*2a6abeebSBaptiste Daroussin 	 */
326*2a6abeebSBaptiste Daroussin 	if (((p2 = chainsearch(table, t, &l)) != NULL) &&
327*2a6abeebSBaptiste Daroussin 	    ((p = p2->pri[which]) >= 0)) {
328*2a6abeebSBaptiste Daroussin 
329*2a6abeebSBaptiste Daroussin 		*len = l;
330*2a6abeebSBaptiste Daroussin 		*pri = p;
331*2a6abeebSBaptiste Daroussin 
332*2a6abeebSBaptiste Daroussin 	} else if (*t <= UCHAR_MAX) {
333*2a6abeebSBaptiste Daroussin 
334*2a6abeebSBaptiste Daroussin 		/*
335*2a6abeebSBaptiste Daroussin 		 * Character is a small (8-bit) character.
336*2a6abeebSBaptiste Daroussin 		 * We just look these up directly for speed.
337*2a6abeebSBaptiste Daroussin 		 */
338*2a6abeebSBaptiste Daroussin 		*pri = table->char_pri_table[*t].pri[which];
339*2a6abeebSBaptiste Daroussin 
340*2a6abeebSBaptiste Daroussin 	} else if ((table->info->large_count > 0) &&
341*2a6abeebSBaptiste Daroussin 	    ((match = largesearch(table, *t)) != NULL)) {
342*2a6abeebSBaptiste Daroussin 
343*2a6abeebSBaptiste Daroussin 		/*
344*2a6abeebSBaptiste Daroussin 		 * Character was found in the extended table.
345*2a6abeebSBaptiste Daroussin 		 */
346*2a6abeebSBaptiste Daroussin 		*pri = match->pri.pri[which];
347*2a6abeebSBaptiste Daroussin 
348*2a6abeebSBaptiste Daroussin 	} else {
349*2a6abeebSBaptiste Daroussin 		/*
350*2a6abeebSBaptiste Daroussin 		 * Character lacks a specific definition.
351*2a6abeebSBaptiste Daroussin 		 */
352*2a6abeebSBaptiste Daroussin 		if (table->info->directive[which] & DIRECTIVE_UNDEFINED) {
353*2a6abeebSBaptiste Daroussin 			/* Mask off sign bit to prevent ordering confusion. */
354*2a6abeebSBaptiste Daroussin 			*pri = (*t & COLLATE_MAX_PRIORITY);
355*2a6abeebSBaptiste Daroussin 		} else {
356*2a6abeebSBaptiste Daroussin 			*pri = table->info->undef_pri[which];
357*2a6abeebSBaptiste Daroussin 		}
358*2a6abeebSBaptiste Daroussin 		/* No substitutions for undefined characters! */
359*2a6abeebSBaptiste Daroussin 		return;
360*2a6abeebSBaptiste Daroussin 	}
361*2a6abeebSBaptiste Daroussin 
362*2a6abeebSBaptiste Daroussin 	/*
363*2a6abeebSBaptiste Daroussin 	 * Try substituting (expanding) the character.  We are
364*2a6abeebSBaptiste Daroussin 	 * currently doing this *after* the chain compression.  I
365*2a6abeebSBaptiste Daroussin 	 * think it should not matter, but this way might be slightly
366*2a6abeebSBaptiste Daroussin 	 * faster.
367*2a6abeebSBaptiste Daroussin 	 *
368*2a6abeebSBaptiste Daroussin 	 * We do this after the priority search, as this will help us
369*2a6abeebSBaptiste Daroussin 	 * to identify a single key value.  In order for this to work,
370*2a6abeebSBaptiste Daroussin 	 * its important that the priority assigned to a given element
371*2a6abeebSBaptiste Daroussin 	 * to be substituted be unique for that level.  The localedef
372*2a6abeebSBaptiste Daroussin 	 * code ensures this for us.
373*2a6abeebSBaptiste Daroussin 	 */
374*2a6abeebSBaptiste Daroussin 	if ((sptr = substsearch(table, *pri, which)) != NULL) {
375*2a6abeebSBaptiste Daroussin 		if ((*pri = *sptr) != 0) {
376*2a6abeebSBaptiste Daroussin 			sptr++;
377*2a6abeebSBaptiste Daroussin 			*state = *sptr ? sptr : NULL;
378*2a6abeebSBaptiste Daroussin 		}
379*2a6abeebSBaptiste Daroussin 	}
380*2a6abeebSBaptiste Daroussin 
381*2a6abeebSBaptiste Daroussin }
382*2a6abeebSBaptiste Daroussin 
383*2a6abeebSBaptiste Daroussin /*
384*2a6abeebSBaptiste Daroussin  * This is the meaty part of wcsxfrm & strxfrm.  Note that it does
385*2a6abeebSBaptiste Daroussin  * NOT NULL terminate.  That is left to the caller.
386*2a6abeebSBaptiste Daroussin  */
387*2a6abeebSBaptiste Daroussin size_t
388*2a6abeebSBaptiste Daroussin _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf,
389*2a6abeebSBaptiste Daroussin     size_t room)
390c3d0cca4SAndrey A. Chernov {
391*2a6abeebSBaptiste Daroussin 	int		pri;
392*2a6abeebSBaptiste Daroussin 	int		len;
393*2a6abeebSBaptiste Daroussin 	const wchar_t	*t;
394*2a6abeebSBaptiste Daroussin 	wchar_t		*tr = NULL;
395*2a6abeebSBaptiste Daroussin 	int		direc;
396*2a6abeebSBaptiste Daroussin 	int		pass;
397*2a6abeebSBaptiste Daroussin 	const int32_t 	*state;
398*2a6abeebSBaptiste Daroussin 	size_t		want = 0;
399*2a6abeebSBaptiste Daroussin 	size_t		need = 0;
400c3d0cca4SAndrey A. Chernov 
401*2a6abeebSBaptiste Daroussin 	for (pass = 0; pass <= table->info->directive_count; pass++) {
402*2a6abeebSBaptiste Daroussin 
403*2a6abeebSBaptiste Daroussin 		state = NULL;
404*2a6abeebSBaptiste Daroussin 
405*2a6abeebSBaptiste Daroussin 		if (pass != 0) {
406*2a6abeebSBaptiste Daroussin 			/* insert level separator from the previous pass */
407*2a6abeebSBaptiste Daroussin 			if (room) {
408*2a6abeebSBaptiste Daroussin 				*xf++ = 1;
409*2a6abeebSBaptiste Daroussin 				room--;
410*2a6abeebSBaptiste Daroussin 			}
411*2a6abeebSBaptiste Daroussin 			want++;
412c3d0cca4SAndrey A. Chernov 		}
413c3d0cca4SAndrey A. Chernov 
414*2a6abeebSBaptiste Daroussin 		/* special pass for undefined */
415*2a6abeebSBaptiste Daroussin 		if (pass == table->info->directive_count) {
416*2a6abeebSBaptiste Daroussin 			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
417*2a6abeebSBaptiste Daroussin 		} else {
418*2a6abeebSBaptiste Daroussin 			direc = table->info->directive[pass];
419*2a6abeebSBaptiste Daroussin 		}
420*2a6abeebSBaptiste Daroussin 
421*2a6abeebSBaptiste Daroussin 		t = src;
422*2a6abeebSBaptiste Daroussin 
423*2a6abeebSBaptiste Daroussin 		if (direc & DIRECTIVE_BACKWARD) {
424*2a6abeebSBaptiste Daroussin 			wchar_t *bp, *fp, c;
425*2a6abeebSBaptiste Daroussin 			if (tr)
426*2a6abeebSBaptiste Daroussin 				free(tr);
427*2a6abeebSBaptiste Daroussin 			if ((tr = wcsdup(t)) == NULL) {
428*2a6abeebSBaptiste Daroussin 				errno = ENOMEM;
429*2a6abeebSBaptiste Daroussin 				goto fail;
430*2a6abeebSBaptiste Daroussin 			}
431*2a6abeebSBaptiste Daroussin 			bp = tr;
432*2a6abeebSBaptiste Daroussin 			fp = tr + wcslen(tr) - 1;
433*2a6abeebSBaptiste Daroussin 			while (bp < fp) {
434*2a6abeebSBaptiste Daroussin 				c = *bp;
435*2a6abeebSBaptiste Daroussin 				*bp++ = *fp;
436*2a6abeebSBaptiste Daroussin 				*fp-- = c;
437*2a6abeebSBaptiste Daroussin 			}
438*2a6abeebSBaptiste Daroussin 			t = (const wchar_t *)tr;
439*2a6abeebSBaptiste Daroussin 		}
440*2a6abeebSBaptiste Daroussin 
441*2a6abeebSBaptiste Daroussin 		if (direc & DIRECTIVE_POSITION) {
442*2a6abeebSBaptiste Daroussin 			while (*t || state) {
443*2a6abeebSBaptiste Daroussin 				_collate_lookup(table, t, &len, &pri, pass, &state);
444*2a6abeebSBaptiste Daroussin 				t += len;
445*2a6abeebSBaptiste Daroussin 				if (pri <= 0) {
446*2a6abeebSBaptiste Daroussin 					if (pri < 0) {
447*2a6abeebSBaptiste Daroussin 						errno = EINVAL;
448*2a6abeebSBaptiste Daroussin 						goto fail;
449*2a6abeebSBaptiste Daroussin 					}
450*2a6abeebSBaptiste Daroussin 					pri = COLLATE_MAX_PRIORITY;
451*2a6abeebSBaptiste Daroussin 				}
452*2a6abeebSBaptiste Daroussin 				if (room) {
453*2a6abeebSBaptiste Daroussin 					*xf++ = pri;
454*2a6abeebSBaptiste Daroussin 					room--;
455*2a6abeebSBaptiste Daroussin 				}
456*2a6abeebSBaptiste Daroussin 				want++;
457*2a6abeebSBaptiste Daroussin 				need = want;
458*2a6abeebSBaptiste Daroussin 			}
459*2a6abeebSBaptiste Daroussin 		} else {
460*2a6abeebSBaptiste Daroussin 			while (*t || state) {
461*2a6abeebSBaptiste Daroussin 				_collate_lookup(table, t, &len, &pri, pass, &state);
462*2a6abeebSBaptiste Daroussin 				t += len;
463*2a6abeebSBaptiste Daroussin 				if (pri <= 0) {
464*2a6abeebSBaptiste Daroussin 					if (pri < 0) {
465*2a6abeebSBaptiste Daroussin 						errno = EINVAL;
466*2a6abeebSBaptiste Daroussin 						goto fail;
467*2a6abeebSBaptiste Daroussin 					}
468*2a6abeebSBaptiste Daroussin 					continue;
469*2a6abeebSBaptiste Daroussin 				}
470*2a6abeebSBaptiste Daroussin 				if (room) {
471*2a6abeebSBaptiste Daroussin 					*xf++ = pri;
472*2a6abeebSBaptiste Daroussin 					room--;
473*2a6abeebSBaptiste Daroussin 				}
474*2a6abeebSBaptiste Daroussin 				want++;
475*2a6abeebSBaptiste Daroussin 				need = want;
476*2a6abeebSBaptiste Daroussin 			}
477*2a6abeebSBaptiste Daroussin 		}
478*2a6abeebSBaptiste Daroussin 	}
479*2a6abeebSBaptiste Daroussin 	if (tr)
480*2a6abeebSBaptiste Daroussin 		free(tr);
481*2a6abeebSBaptiste Daroussin 	return (need);
482*2a6abeebSBaptiste Daroussin 
483*2a6abeebSBaptiste Daroussin fail:
484*2a6abeebSBaptiste Daroussin 	if (tr)
485*2a6abeebSBaptiste Daroussin 		free(tr);
486*2a6abeebSBaptiste Daroussin 	return ((size_t)(-1));
487*2a6abeebSBaptiste Daroussin }
488*2a6abeebSBaptiste Daroussin 
489*2a6abeebSBaptiste Daroussin /*
490*2a6abeebSBaptiste Daroussin  * In the non-POSIX case, we transform each character into a string of
491*2a6abeebSBaptiste Daroussin  * characters representing the character's priority.  Since char is usually
492*2a6abeebSBaptiste Daroussin  * signed, we are limited by 7 bits per byte.  To avoid zero, we need to add
493*2a6abeebSBaptiste Daroussin  * XFRM_OFFSET, so we can't use a full 7 bits.  For simplicity, we choose 6
494*2a6abeebSBaptiste Daroussin  * bits per byte.
495*2a6abeebSBaptiste Daroussin  *
496*2a6abeebSBaptiste Daroussin  * It turns out that we sometimes have real priorities that are
497*2a6abeebSBaptiste Daroussin  * 31-bits wide.  (But: be careful using priorities where the high
498*2a6abeebSBaptiste Daroussin  * order bit is set -- i.e. the priority is negative.  The sort order
499*2a6abeebSBaptiste Daroussin  * may be surprising!)
500*2a6abeebSBaptiste Daroussin  *
501*2a6abeebSBaptiste Daroussin  * TODO: This would be a good area to optimize somewhat.  It turns out
502*2a6abeebSBaptiste Daroussin  * that real prioririties *except for the last UNDEFINED pass* are generally
503*2a6abeebSBaptiste Daroussin  * very small.  We need the localedef code to precalculate the max
504*2a6abeebSBaptiste Daroussin  * priority for us, and ideally also give us a mask, and then we could
505*2a6abeebSBaptiste Daroussin  * severely limit what we expand to.
506*2a6abeebSBaptiste Daroussin  */
507*2a6abeebSBaptiste Daroussin #define	XFRM_BYTES	6
508*2a6abeebSBaptiste Daroussin #define	XFRM_OFFSET	('0')	/* make all printable characters */
509*2a6abeebSBaptiste Daroussin #define	XFRM_SHIFT	6
510*2a6abeebSBaptiste Daroussin #define	XFRM_MASK	((1 << XFRM_SHIFT) - 1)
511*2a6abeebSBaptiste Daroussin #define	XFRM_SEP	('.')	/* chosen to be less than XFRM_OFFSET */
512*2a6abeebSBaptiste Daroussin 
513*2a6abeebSBaptiste Daroussin static int
514*2a6abeebSBaptiste Daroussin xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass)
515926f20c9SAndrey A. Chernov {
516*2a6abeebSBaptiste Daroussin 	/* we use unsigned to ensure zero fill on right shift */
517*2a6abeebSBaptiste Daroussin 	uint32_t val = (uint32_t)table->info->pri_count[pass];
518*2a6abeebSBaptiste Daroussin 	int nc = 0;
519926f20c9SAndrey A. Chernov 
520*2a6abeebSBaptiste Daroussin 	while (val) {
521*2a6abeebSBaptiste Daroussin 		*p = (pri & XFRM_MASK) + XFRM_OFFSET;
522*2a6abeebSBaptiste Daroussin 		pri >>= XFRM_SHIFT;
523*2a6abeebSBaptiste Daroussin 		val >>= XFRM_SHIFT;
524*2a6abeebSBaptiste Daroussin 		p++;
525*2a6abeebSBaptiste Daroussin 		nc++;
526*2a6abeebSBaptiste Daroussin 	}
527*2a6abeebSBaptiste Daroussin 	return (nc);
528926f20c9SAndrey A. Chernov }
529926f20c9SAndrey A. Chernov 
530*2a6abeebSBaptiste Daroussin size_t
531*2a6abeebSBaptiste Daroussin _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf,
532*2a6abeebSBaptiste Daroussin     size_t room)
533c3d0cca4SAndrey A. Chernov {
534*2a6abeebSBaptiste Daroussin 	int		pri;
535*2a6abeebSBaptiste Daroussin 	int		len;
536*2a6abeebSBaptiste Daroussin 	const wchar_t	*t;
537*2a6abeebSBaptiste Daroussin 	wchar_t		*tr = NULL;
538*2a6abeebSBaptiste Daroussin 	int		direc;
539*2a6abeebSBaptiste Daroussin 	int		pass;
540*2a6abeebSBaptiste Daroussin 	const int32_t 	*state;
541*2a6abeebSBaptiste Daroussin 	size_t		want = 0;
542*2a6abeebSBaptiste Daroussin 	size_t		need = 0;
543*2a6abeebSBaptiste Daroussin 	int		b;
544*2a6abeebSBaptiste Daroussin 	uint8_t		buf[XFRM_BYTES];
545c3d0cca4SAndrey A. Chernov 
546*2a6abeebSBaptiste Daroussin 	for (pass = 0; pass <= table->info->directive_count; pass++) {
547*2a6abeebSBaptiste Daroussin 
548*2a6abeebSBaptiste Daroussin 		state = NULL;
549*2a6abeebSBaptiste Daroussin 
550*2a6abeebSBaptiste Daroussin 		if (pass != 0) {
551*2a6abeebSBaptiste Daroussin 			/* insert level separator from the previous pass */
552*2a6abeebSBaptiste Daroussin 			if (room) {
553*2a6abeebSBaptiste Daroussin 				*xf++ = XFRM_SEP;
554*2a6abeebSBaptiste Daroussin 				room--;
555c3d0cca4SAndrey A. Chernov 			}
556*2a6abeebSBaptiste Daroussin 			want++;
557*2a6abeebSBaptiste Daroussin 		}
558*2a6abeebSBaptiste Daroussin 
559*2a6abeebSBaptiste Daroussin 		/* special pass for undefined */
560*2a6abeebSBaptiste Daroussin 		if (pass == table->info->directive_count) {
561*2a6abeebSBaptiste Daroussin 			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
562*2a6abeebSBaptiste Daroussin 		} else {
563*2a6abeebSBaptiste Daroussin 			direc = table->info->directive[pass];
564*2a6abeebSBaptiste Daroussin 		}
565*2a6abeebSBaptiste Daroussin 
566*2a6abeebSBaptiste Daroussin 		t = src;
567*2a6abeebSBaptiste Daroussin 
568*2a6abeebSBaptiste Daroussin 		if (direc & DIRECTIVE_BACKWARD) {
569*2a6abeebSBaptiste Daroussin 			wchar_t *bp, *fp, c;
570*2a6abeebSBaptiste Daroussin 			if (tr)
571*2a6abeebSBaptiste Daroussin 				free(tr);
572*2a6abeebSBaptiste Daroussin 			if ((tr = wcsdup(t)) == NULL) {
573*2a6abeebSBaptiste Daroussin 				errno = ENOMEM;
574*2a6abeebSBaptiste Daroussin 				goto fail;
575*2a6abeebSBaptiste Daroussin 			}
576*2a6abeebSBaptiste Daroussin 			bp = tr;
577*2a6abeebSBaptiste Daroussin 			fp = tr + wcslen(tr) - 1;
578*2a6abeebSBaptiste Daroussin 			while (bp < fp) {
579*2a6abeebSBaptiste Daroussin 				c = *bp;
580*2a6abeebSBaptiste Daroussin 				*bp++ = *fp;
581*2a6abeebSBaptiste Daroussin 				*fp-- = c;
582*2a6abeebSBaptiste Daroussin 			}
583*2a6abeebSBaptiste Daroussin 			t = (const wchar_t *)tr;
584*2a6abeebSBaptiste Daroussin 		}
585*2a6abeebSBaptiste Daroussin 
586*2a6abeebSBaptiste Daroussin 		if (direc & DIRECTIVE_POSITION) {
587*2a6abeebSBaptiste Daroussin 			while (*t || state) {
588*2a6abeebSBaptiste Daroussin 
589*2a6abeebSBaptiste Daroussin 				_collate_lookup(table, t, &len, &pri, pass, &state);
590*2a6abeebSBaptiste Daroussin 				t += len;
591*2a6abeebSBaptiste Daroussin 				if (pri <= 0) {
592*2a6abeebSBaptiste Daroussin 					if (pri < 0) {
593*2a6abeebSBaptiste Daroussin 						errno = EINVAL;
594*2a6abeebSBaptiste Daroussin 						goto fail;
595*2a6abeebSBaptiste Daroussin 					}
596*2a6abeebSBaptiste Daroussin 					pri = COLLATE_MAX_PRIORITY;
597*2a6abeebSBaptiste Daroussin 				}
598*2a6abeebSBaptiste Daroussin 
599*2a6abeebSBaptiste Daroussin 				b = xfrm(table, buf, pri, pass);
600*2a6abeebSBaptiste Daroussin 				want += b;
601*2a6abeebSBaptiste Daroussin 				if (room) {
602*2a6abeebSBaptiste Daroussin 					while (b) {
603*2a6abeebSBaptiste Daroussin 						b--;
604*2a6abeebSBaptiste Daroussin 						if (room) {
605*2a6abeebSBaptiste Daroussin 							*xf++ = buf[b];
606*2a6abeebSBaptiste Daroussin 							room--;
607*2a6abeebSBaptiste Daroussin 						}
608*2a6abeebSBaptiste Daroussin 					}
609*2a6abeebSBaptiste Daroussin 				}
610*2a6abeebSBaptiste Daroussin 				need = want;
611*2a6abeebSBaptiste Daroussin 			}
612*2a6abeebSBaptiste Daroussin 		} else {
613*2a6abeebSBaptiste Daroussin 			while (*t || state) {
614*2a6abeebSBaptiste Daroussin 				_collate_lookup(table, t, &len, &pri, pass, &state);
615*2a6abeebSBaptiste Daroussin 				t += len;
616*2a6abeebSBaptiste Daroussin 				if (pri <= 0) {
617*2a6abeebSBaptiste Daroussin 					if (pri < 0) {
618*2a6abeebSBaptiste Daroussin 						errno = EINVAL;
619*2a6abeebSBaptiste Daroussin 						goto fail;
620*2a6abeebSBaptiste Daroussin 					}
621*2a6abeebSBaptiste Daroussin 					continue;
622*2a6abeebSBaptiste Daroussin 				}
623*2a6abeebSBaptiste Daroussin 
624*2a6abeebSBaptiste Daroussin 				b = xfrm(table, buf, pri, pass);
625*2a6abeebSBaptiste Daroussin 				want += b;
626*2a6abeebSBaptiste Daroussin 				if (room) {
627*2a6abeebSBaptiste Daroussin 
628*2a6abeebSBaptiste Daroussin 					while (b) {
629*2a6abeebSBaptiste Daroussin 						b--;
630*2a6abeebSBaptiste Daroussin 						if (room) {
631*2a6abeebSBaptiste Daroussin 							*xf++ = buf[b];
632*2a6abeebSBaptiste Daroussin 							room--;
633*2a6abeebSBaptiste Daroussin 						}
634*2a6abeebSBaptiste Daroussin 					}
635*2a6abeebSBaptiste Daroussin 				}
636*2a6abeebSBaptiste Daroussin 				need = want;
637*2a6abeebSBaptiste Daroussin 			}
638*2a6abeebSBaptiste Daroussin 		}
639*2a6abeebSBaptiste Daroussin 	}
640*2a6abeebSBaptiste Daroussin 	if (tr)
641*2a6abeebSBaptiste Daroussin 		free(tr);
642*2a6abeebSBaptiste Daroussin 	return (need);
643*2a6abeebSBaptiste Daroussin 
644*2a6abeebSBaptiste Daroussin fail:
645*2a6abeebSBaptiste Daroussin 	if (tr)
646*2a6abeebSBaptiste Daroussin 		free(tr);
647*2a6abeebSBaptiste Daroussin 	return ((size_t)(-1));
648*2a6abeebSBaptiste Daroussin }
649*2a6abeebSBaptiste Daroussin 
650*2a6abeebSBaptiste Daroussin /*
651*2a6abeebSBaptiste Daroussin  * __collate_equiv_value returns the primary collation value for the given
652*2a6abeebSBaptiste Daroussin  * collating symbol specified by str and len.  Zero or negative is returned
653*2a6abeebSBaptiste Daroussin  * if the collating symbol was not found.  This function is used by bracket
654*2a6abeebSBaptiste Daroussin  * code in the TRE regex library.
655*2a6abeebSBaptiste Daroussin  */
656*2a6abeebSBaptiste Daroussin int
657*2a6abeebSBaptiste Daroussin __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len)
658*2a6abeebSBaptiste Daroussin {
659*2a6abeebSBaptiste Daroussin 	int32_t e;
660*2a6abeebSBaptiste Daroussin 
661*2a6abeebSBaptiste Daroussin 	if (len < 1 || len >= COLLATE_STR_LEN)
662*2a6abeebSBaptiste Daroussin 		return (-1);
663*2a6abeebSBaptiste Daroussin 
664*2a6abeebSBaptiste Daroussin 	FIX_LOCALE(locale);
665*2a6abeebSBaptiste Daroussin 	struct xlocale_collate *table =
666*2a6abeebSBaptiste Daroussin 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
667*2a6abeebSBaptiste Daroussin 
668*2a6abeebSBaptiste Daroussin 	if (table->__collate_load_error)
669*2a6abeebSBaptiste Daroussin 		return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1);
670*2a6abeebSBaptiste Daroussin 
671*2a6abeebSBaptiste Daroussin 	if (len == 1) {
672*2a6abeebSBaptiste Daroussin 		e = -1;
673*2a6abeebSBaptiste Daroussin 		if (*str <= UCHAR_MAX)
674*2a6abeebSBaptiste Daroussin 			e = table->char_pri_table[*str].pri[0];
675*2a6abeebSBaptiste Daroussin 		else if (table->info->large_count > 0) {
676*2a6abeebSBaptiste Daroussin 			collate_large_t *match_large;
677*2a6abeebSBaptiste Daroussin 			match_large = largesearch(table, *str);
678*2a6abeebSBaptiste Daroussin 			if (match_large)
679*2a6abeebSBaptiste Daroussin 				e = match_large->pri.pri[0];
680*2a6abeebSBaptiste Daroussin 		}
681*2a6abeebSBaptiste Daroussin 		if (e == 0)
682*2a6abeebSBaptiste Daroussin 			return (1);
683*2a6abeebSBaptiste Daroussin 		return (e > 0 ? e : 0);
684*2a6abeebSBaptiste Daroussin 	}
685*2a6abeebSBaptiste Daroussin 	if (table->info->chain_count > 0) {
686*2a6abeebSBaptiste Daroussin 		wchar_t name[COLLATE_STR_LEN];
687*2a6abeebSBaptiste Daroussin 		collate_chain_t *match_chain;
688*2a6abeebSBaptiste Daroussin 		int clen;
689*2a6abeebSBaptiste Daroussin 
690*2a6abeebSBaptiste Daroussin 		wcsncpy (name, str, len);
691*2a6abeebSBaptiste Daroussin 		name[len] = 0;
692*2a6abeebSBaptiste Daroussin 		match_chain = chainsearch(table, name, &clen);
693*2a6abeebSBaptiste Daroussin 		if (match_chain) {
694*2a6abeebSBaptiste Daroussin 			e = match_chain->pri[0];
695*2a6abeebSBaptiste Daroussin 			if (e == 0)
696*2a6abeebSBaptiste Daroussin 				return (1);
697*2a6abeebSBaptiste Daroussin 			return (e < 0 ? -e : e);
698*2a6abeebSBaptiste Daroussin 		}
699*2a6abeebSBaptiste Daroussin 	}
700*2a6abeebSBaptiste Daroussin 	return (0);
701*2a6abeebSBaptiste Daroussin }
702