xref: /freebsd/lib/libc/string/wcscoll.c (revision a1df81e7163c76ab7b481cbc7f8dc10b8a9173a7)
1fd4f1dd9STim J. Robbins /*-
2*a1df81e7SBaptiste Daroussin  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3fd4f1dd9STim J. Robbins  * Copyright (c) 2002 Tim J. Robbins
4fd4f1dd9STim J. Robbins  * All rights reserved.
5fd4f1dd9STim J. Robbins  *
63c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
73c87aa1dSDavid Chisnall  * All rights reserved.
83c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
93c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
103c87aa1dSDavid Chisnall  *
11fd4f1dd9STim J. Robbins  * Redistribution and use in source and binary forms, with or without
12fd4f1dd9STim J. Robbins  * modification, are permitted provided that the following conditions
13fd4f1dd9STim J. Robbins  * are met:
14fd4f1dd9STim J. Robbins  * 1. Redistributions of source code must retain the above copyright
15fd4f1dd9STim J. Robbins  *    notice, this list of conditions and the following disclaimer.
16fd4f1dd9STim J. Robbins  * 2. Redistributions in binary form must reproduce the above copyright
17fd4f1dd9STim J. Robbins  *    notice, this list of conditions and the following disclaimer in the
18fd4f1dd9STim J. Robbins  *    documentation and/or other materials provided with the distribution.
19fd4f1dd9STim J. Robbins  *
20fd4f1dd9STim J. Robbins  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21fd4f1dd9STim J. Robbins  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22fd4f1dd9STim J. Robbins  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23fd4f1dd9STim J. Robbins  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24fd4f1dd9STim J. Robbins  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25fd4f1dd9STim J. Robbins  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26fd4f1dd9STim J. Robbins  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27fd4f1dd9STim J. Robbins  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28fd4f1dd9STim J. Robbins  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29fd4f1dd9STim J. Robbins  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30fd4f1dd9STim J. Robbins  * SUCH DAMAGE.
31fd4f1dd9STim J. Robbins  */
32fd4f1dd9STim J. Robbins 
33fd4f1dd9STim J. Robbins #include <sys/cdefs.h>
34fd4f1dd9STim J. Robbins __FBSDID("$FreeBSD$");
35fd4f1dd9STim J. Robbins 
36fd4f1dd9STim J. Robbins #include <errno.h>
37fd4f1dd9STim J. Robbins #include <stdlib.h>
38fd4f1dd9STim J. Robbins #include <string.h>
39fd4f1dd9STim J. Robbins #include <wchar.h>
40fd4f1dd9STim J. Robbins #include "collate.h"
41fd4f1dd9STim J. Robbins 
42fd4f1dd9STim J. Robbins int
433c87aa1dSDavid Chisnall wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
44fd4f1dd9STim J. Robbins {
45*a1df81e7SBaptiste Daroussin 	int len1, len2, pri1, pri2, ret;
46*a1df81e7SBaptiste Daroussin 	wchar_t *tr1 = NULL, *tr2 = NULL;
47*a1df81e7SBaptiste Daroussin 	int direc, pass;
48*a1df81e7SBaptiste Daroussin 
493c87aa1dSDavid Chisnall 	FIX_LOCALE(locale);
503c87aa1dSDavid Chisnall 	struct xlocale_collate *table =
513c87aa1dSDavid Chisnall 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
52fd4f1dd9STim J. Robbins 
53*a1df81e7SBaptiste Daroussin 	if (table->__collate_load_error)
54fd4f1dd9STim J. Robbins 		/*
55*a1df81e7SBaptiste Daroussin 		 * Locale has no special collating order or could not be
56*a1df81e7SBaptiste Daroussin 		 * loaded, do a fast binary comparison.
57fd4f1dd9STim J. Robbins 		 */
58fd4f1dd9STim J. Robbins 		return (wcscmp(ws1, ws2));
59fd4f1dd9STim J. Robbins 
60*a1df81e7SBaptiste Daroussin 	ret = 0;
61*a1df81e7SBaptiste Daroussin 
62fd4f1dd9STim J. Robbins 	/*
63*a1df81e7SBaptiste Daroussin 	 * Once upon a time we had code to try to optimize this, but
64*a1df81e7SBaptiste Daroussin 	 * it turns out that you really can't make many assumptions
65*a1df81e7SBaptiste Daroussin 	 * safely.  You absolutely have to run this pass by pass,
66*a1df81e7SBaptiste Daroussin 	 * because some passes will be ignored for a given character,
67*a1df81e7SBaptiste Daroussin 	 * while others will not.  Simpler locales will benefit from
68*a1df81e7SBaptiste Daroussin 	 * having fewer passes, and most comparisions should resolve
69*a1df81e7SBaptiste Daroussin 	 * during the primary pass anyway.
70*a1df81e7SBaptiste Daroussin 	 *
71*a1df81e7SBaptiste Daroussin 	 * Note that we do one final extra pass at the end to pick
72*a1df81e7SBaptiste Daroussin 	 * up UNDEFINED elements.  There is special handling for them.
73fd4f1dd9STim J. Robbins 	 */
74*a1df81e7SBaptiste Daroussin 	for (pass = 0; pass <= table->info->directive_count; pass++) {
75*a1df81e7SBaptiste Daroussin 
76*a1df81e7SBaptiste Daroussin 		const int32_t *st1 = NULL;
77*a1df81e7SBaptiste Daroussin 		const int32_t *st2 = NULL;
78*a1df81e7SBaptiste Daroussin 		const wchar_t	*w1 = ws1;
79*a1df81e7SBaptiste Daroussin 		const wchar_t	*w2 = ws2;
80*a1df81e7SBaptiste Daroussin 
81*a1df81e7SBaptiste Daroussin 		/* special pass for UNDEFINED */
82*a1df81e7SBaptiste Daroussin 		if (pass == table->info->directive_count) {
83*a1df81e7SBaptiste Daroussin 			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
84*a1df81e7SBaptiste Daroussin 		} else {
85*a1df81e7SBaptiste Daroussin 			direc = table->info->directive[pass];
86fd4f1dd9STim J. Robbins 		}
87fd4f1dd9STim J. Robbins 
88*a1df81e7SBaptiste Daroussin 		if (direc & DIRECTIVE_BACKWARD) {
89*a1df81e7SBaptiste Daroussin 			wchar_t *bp, *fp, c;
90*a1df81e7SBaptiste Daroussin 			if ((tr1 = wcsdup(w1)) == NULL)
91*a1df81e7SBaptiste Daroussin 				goto fail;
92*a1df81e7SBaptiste Daroussin 			bp = tr1;
93*a1df81e7SBaptiste Daroussin 			fp = tr1 + wcslen(tr1) - 1;
94*a1df81e7SBaptiste Daroussin 			while (bp < fp) {
95*a1df81e7SBaptiste Daroussin 				c = *bp;
96*a1df81e7SBaptiste Daroussin 				*bp++ = *fp;
97*a1df81e7SBaptiste Daroussin 				*fp-- = c;
98*a1df81e7SBaptiste Daroussin 			}
99*a1df81e7SBaptiste Daroussin 			if ((tr2 = wcsdup(w2)) == NULL)
100*a1df81e7SBaptiste Daroussin 				goto fail;
101*a1df81e7SBaptiste Daroussin 			bp = tr2;
102*a1df81e7SBaptiste Daroussin 			fp = tr2 + wcslen(tr2) - 1;
103*a1df81e7SBaptiste Daroussin 			while (bp < fp) {
104*a1df81e7SBaptiste Daroussin 				c = *bp;
105*a1df81e7SBaptiste Daroussin 				*bp++ = *fp;
106*a1df81e7SBaptiste Daroussin 				*fp-- = c;
107*a1df81e7SBaptiste Daroussin 			}
108*a1df81e7SBaptiste Daroussin 			w1 = tr1;
109*a1df81e7SBaptiste Daroussin 			w2 = tr2;
110*a1df81e7SBaptiste Daroussin 		}
111fd4f1dd9STim J. Robbins 
112*a1df81e7SBaptiste Daroussin 		if (direc & DIRECTIVE_POSITION) {
113*a1df81e7SBaptiste Daroussin 			while ((*w1 || st1) && (*w2 || st2)) {
114*a1df81e7SBaptiste Daroussin 				pri1 = pri2 = 0;
115*a1df81e7SBaptiste Daroussin 				_collate_lookup(table, w1, &len1, &pri1, pass,
116*a1df81e7SBaptiste Daroussin 				    &st1);
117*a1df81e7SBaptiste Daroussin 				if (pri1 <= 0) {
118*a1df81e7SBaptiste Daroussin 					if (pri1 < 0) {
119*a1df81e7SBaptiste Daroussin 						errno = EINVAL;
120*a1df81e7SBaptiste Daroussin 						goto fail;
121*a1df81e7SBaptiste Daroussin 					}
122*a1df81e7SBaptiste Daroussin 					pri1 = COLLATE_MAX_PRIORITY;
123*a1df81e7SBaptiste Daroussin 				}
124*a1df81e7SBaptiste Daroussin 				_collate_lookup(table, w2, &len2, &pri2, pass,
125*a1df81e7SBaptiste Daroussin 				    &st2);
126*a1df81e7SBaptiste Daroussin 				if (pri2 <= 0) {
127*a1df81e7SBaptiste Daroussin 					if (pri2 < 0) {
128*a1df81e7SBaptiste Daroussin 						errno = EINVAL;
129*a1df81e7SBaptiste Daroussin 						goto fail;
130*a1df81e7SBaptiste Daroussin 					}
131*a1df81e7SBaptiste Daroussin 					pri2 = COLLATE_MAX_PRIORITY;
132*a1df81e7SBaptiste Daroussin 				}
133*a1df81e7SBaptiste Daroussin 				if (pri1 != pri2) {
134*a1df81e7SBaptiste Daroussin 					ret = pri1 - pri2;
135*a1df81e7SBaptiste Daroussin 					goto end;
136*a1df81e7SBaptiste Daroussin 				}
137*a1df81e7SBaptiste Daroussin 				w1 += len1;
138*a1df81e7SBaptiste Daroussin 				w2 += len2;
139*a1df81e7SBaptiste Daroussin 			}
140*a1df81e7SBaptiste Daroussin 		} else {
141*a1df81e7SBaptiste Daroussin 			while ((*w1 || st1) && (*w2 || st2)) {
142*a1df81e7SBaptiste Daroussin 				pri1 = pri2 = 0;
143*a1df81e7SBaptiste Daroussin 				while (*w1) {
144*a1df81e7SBaptiste Daroussin 					_collate_lookup(table, w1, &len1,
145*a1df81e7SBaptiste Daroussin 					    &pri1, pass, &st1);
146*a1df81e7SBaptiste Daroussin 					if (pri1 > 0)
147*a1df81e7SBaptiste Daroussin 						break;
148*a1df81e7SBaptiste Daroussin 					if (pri1 < 0) {
149*a1df81e7SBaptiste Daroussin 						errno = EINVAL;
150*a1df81e7SBaptiste Daroussin 						goto fail;
151*a1df81e7SBaptiste Daroussin 					}
152*a1df81e7SBaptiste Daroussin 					w1 += len1;
153*a1df81e7SBaptiste Daroussin 				}
154*a1df81e7SBaptiste Daroussin 				while (*w2) {
155*a1df81e7SBaptiste Daroussin 					_collate_lookup(table, w2, &len2,
156*a1df81e7SBaptiste Daroussin 					    &pri2, pass, &st2);
157*a1df81e7SBaptiste Daroussin 					if (pri2 > 0)
158*a1df81e7SBaptiste Daroussin 						break;
159*a1df81e7SBaptiste Daroussin 					if (pri2 < 0) {
160*a1df81e7SBaptiste Daroussin 						errno = EINVAL;
161*a1df81e7SBaptiste Daroussin 						goto fail;
162*a1df81e7SBaptiste Daroussin 					}
163*a1df81e7SBaptiste Daroussin 					w2 += len2;
164*a1df81e7SBaptiste Daroussin 				}
165*a1df81e7SBaptiste Daroussin 				if (!pri1 || !pri2)
166*a1df81e7SBaptiste Daroussin 					break;
167*a1df81e7SBaptiste Daroussin 				if (pri1 != pri2) {
168*a1df81e7SBaptiste Daroussin 					ret = pri1 - pri2;
169*a1df81e7SBaptiste Daroussin 					goto end;
170*a1df81e7SBaptiste Daroussin 				}
171*a1df81e7SBaptiste Daroussin 				w1 += len1;
172*a1df81e7SBaptiste Daroussin 				w2 += len2;
173*a1df81e7SBaptiste Daroussin 			}
174*a1df81e7SBaptiste Daroussin 		}
175*a1df81e7SBaptiste Daroussin 		if (!*w1) {
176*a1df81e7SBaptiste Daroussin 			if (*w2) {
177*a1df81e7SBaptiste Daroussin 				ret = -(int)*w2;
178*a1df81e7SBaptiste Daroussin 				goto end;
179*a1df81e7SBaptiste Daroussin 			}
180*a1df81e7SBaptiste Daroussin 		} else {
181*a1df81e7SBaptiste Daroussin 			ret = *w1;
182*a1df81e7SBaptiste Daroussin 			goto end;
183*a1df81e7SBaptiste Daroussin 		}
184*a1df81e7SBaptiste Daroussin 	}
185*a1df81e7SBaptiste Daroussin 	ret = 0;
186*a1df81e7SBaptiste Daroussin 
187*a1df81e7SBaptiste Daroussin end:
188*a1df81e7SBaptiste Daroussin 	if (tr1)
189*a1df81e7SBaptiste Daroussin 		free(tr1);
190*a1df81e7SBaptiste Daroussin 	if (tr2)
191*a1df81e7SBaptiste Daroussin 		free(tr2);
192*a1df81e7SBaptiste Daroussin 
193*a1df81e7SBaptiste Daroussin 	return (ret);
194*a1df81e7SBaptiste Daroussin 
195*a1df81e7SBaptiste Daroussin fail:
196*a1df81e7SBaptiste Daroussin 	ret = wcscmp(ws1, ws2);
197*a1df81e7SBaptiste Daroussin 	goto end;
198fd4f1dd9STim J. Robbins }
199fd4f1dd9STim J. Robbins 
2003c87aa1dSDavid Chisnall int
2013c87aa1dSDavid Chisnall wcscoll(const wchar_t *ws1, const wchar_t *ws2)
2023c87aa1dSDavid Chisnall {
2033c87aa1dSDavid Chisnall 	return wcscoll_l(ws1, ws2, __get_locale());
2043c87aa1dSDavid Chisnall }
205