xref: /freebsd/lib/libc/string/wcscoll.c (revision 5b5fa75acff11d871d0c90045f8c1a58fed85365)
1fd4f1dd9STim J. Robbins /*-
2d915a14eSPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3d915a14eSPedro F. Giffuni  *
4c48dc2a1SBaptiste Daroussin  * Copyright 2017 Nexenta Systems, Inc.
5fd4f1dd9STim J. Robbins  * Copyright (c) 2002 Tim J. Robbins
6fd4f1dd9STim J. Robbins  * All rights reserved.
7fd4f1dd9STim J. Robbins  *
83c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
9*5b5fa75aSEd Maste  *
103c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
113c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
123c87aa1dSDavid Chisnall  *
13fd4f1dd9STim J. Robbins  * Redistribution and use in source and binary forms, with or without
14fd4f1dd9STim J. Robbins  * modification, are permitted provided that the following conditions
15fd4f1dd9STim J. Robbins  * are met:
16fd4f1dd9STim J. Robbins  * 1. Redistributions of source code must retain the above copyright
17fd4f1dd9STim J. Robbins  *    notice, this list of conditions and the following disclaimer.
18fd4f1dd9STim J. Robbins  * 2. Redistributions in binary form must reproduce the above copyright
19fd4f1dd9STim J. Robbins  *    notice, this list of conditions and the following disclaimer in the
20fd4f1dd9STim J. Robbins  *    documentation and/or other materials provided with the distribution.
21fd4f1dd9STim J. Robbins  *
22fd4f1dd9STim J. Robbins  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23fd4f1dd9STim J. Robbins  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24fd4f1dd9STim J. Robbins  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25fd4f1dd9STim J. Robbins  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26fd4f1dd9STim J. Robbins  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27fd4f1dd9STim J. Robbins  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28fd4f1dd9STim J. Robbins  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29fd4f1dd9STim J. Robbins  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30fd4f1dd9STim J. Robbins  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31fd4f1dd9STim J. Robbins  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32fd4f1dd9STim J. Robbins  * SUCH DAMAGE.
33fd4f1dd9STim J. Robbins  */
34fd4f1dd9STim J. Robbins 
35fd4f1dd9STim J. Robbins #include <sys/cdefs.h>
36fd4f1dd9STim J. Robbins __FBSDID("$FreeBSD$");
37fd4f1dd9STim J. Robbins 
38fd4f1dd9STim J. Robbins #include <errno.h>
39fd4f1dd9STim J. Robbins #include <stdlib.h>
40fd4f1dd9STim J. Robbins #include <string.h>
41fd4f1dd9STim J. Robbins #include <wchar.h>
42fd4f1dd9STim J. Robbins #include "collate.h"
43fd4f1dd9STim J. Robbins 
44fd4f1dd9STim J. Robbins int
453c87aa1dSDavid Chisnall wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
46fd4f1dd9STim J. Robbins {
47c48dc2a1SBaptiste Daroussin 	int len1, len2, pri1, pri2;
48a1df81e7SBaptiste Daroussin 	wchar_t *tr1 = NULL, *tr2 = NULL;
49a1df81e7SBaptiste Daroussin 	int direc, pass;
50c48dc2a1SBaptiste Daroussin 	int ret = wcscmp(ws1, ws2);
51a1df81e7SBaptiste Daroussin 
523c87aa1dSDavid Chisnall 	FIX_LOCALE(locale);
533c87aa1dSDavid Chisnall 	struct xlocale_collate *table =
543c87aa1dSDavid Chisnall 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
55fd4f1dd9STim J. Robbins 
56c48dc2a1SBaptiste Daroussin 	if (table->__collate_load_error || ret == 0)
57c48dc2a1SBaptiste Daroussin 		return (ret);
58fd4f1dd9STim J. Robbins 
59c48dc2a1SBaptiste Daroussin 	if (*ws1 == 0 && *ws2 != 0)
60c48dc2a1SBaptiste Daroussin 		return (-1);
61c48dc2a1SBaptiste Daroussin 	if (*ws1 != 0 && *ws2 == 0)
62c48dc2a1SBaptiste Daroussin 		return (1);
63a1df81e7SBaptiste Daroussin 
64fd4f1dd9STim J. Robbins 	/*
65a1df81e7SBaptiste Daroussin 	 * Once upon a time we had code to try to optimize this, but
66a1df81e7SBaptiste Daroussin 	 * it turns out that you really can't make many assumptions
67a1df81e7SBaptiste Daroussin 	 * safely.  You absolutely have to run this pass by pass,
68a1df81e7SBaptiste Daroussin 	 * because some passes will be ignored for a given character,
69a1df81e7SBaptiste Daroussin 	 * while others will not.  Simpler locales will benefit from
7032223c1bSPedro F. Giffuni 	 * having fewer passes, and most comparisons should resolve
71a1df81e7SBaptiste Daroussin 	 * during the primary pass anyway.
72a1df81e7SBaptiste Daroussin 	 *
73a1df81e7SBaptiste Daroussin 	 * Note that we do one final extra pass at the end to pick
74a1df81e7SBaptiste Daroussin 	 * up UNDEFINED elements.  There is special handling for them.
75fd4f1dd9STim J. Robbins 	 */
76a1df81e7SBaptiste Daroussin 	for (pass = 0; pass <= table->info->directive_count; pass++) {
77a1df81e7SBaptiste Daroussin 
78a1df81e7SBaptiste Daroussin 		const int32_t *st1 = NULL;
79a1df81e7SBaptiste Daroussin 		const int32_t *st2 = NULL;
80a1df81e7SBaptiste Daroussin 		const wchar_t	*w1 = ws1;
81a1df81e7SBaptiste Daroussin 		const wchar_t	*w2 = ws2;
82a1df81e7SBaptiste Daroussin 
83a1df81e7SBaptiste Daroussin 		/* special pass for UNDEFINED */
84a1df81e7SBaptiste Daroussin 		if (pass == table->info->directive_count) {
85c48dc2a1SBaptiste Daroussin 			direc = DIRECTIVE_FORWARD;
86a1df81e7SBaptiste Daroussin 		} else {
87a1df81e7SBaptiste Daroussin 			direc = table->info->directive[pass];
88fd4f1dd9STim J. Robbins 		}
89fd4f1dd9STim J. Robbins 
90a1df81e7SBaptiste Daroussin 		if (direc & DIRECTIVE_BACKWARD) {
91a1df81e7SBaptiste Daroussin 			wchar_t *bp, *fp, c;
92c48dc2a1SBaptiste Daroussin 			free(tr1);
93a1df81e7SBaptiste Daroussin 			if ((tr1 = wcsdup(w1)) == NULL)
94c48dc2a1SBaptiste Daroussin 				goto end;
95a1df81e7SBaptiste Daroussin 			bp = tr1;
96a1df81e7SBaptiste Daroussin 			fp = tr1 + wcslen(tr1) - 1;
97a1df81e7SBaptiste Daroussin 			while (bp < fp) {
98a1df81e7SBaptiste Daroussin 				c = *bp;
99a1df81e7SBaptiste Daroussin 				*bp++ = *fp;
100a1df81e7SBaptiste Daroussin 				*fp-- = c;
101a1df81e7SBaptiste Daroussin 			}
102c48dc2a1SBaptiste Daroussin 			free(tr2);
103a1df81e7SBaptiste Daroussin 			if ((tr2 = wcsdup(w2)) == NULL)
104c48dc2a1SBaptiste Daroussin 				goto end;
105a1df81e7SBaptiste Daroussin 			bp = tr2;
106a1df81e7SBaptiste Daroussin 			fp = tr2 + wcslen(tr2) - 1;
107a1df81e7SBaptiste Daroussin 			while (bp < fp) {
108a1df81e7SBaptiste Daroussin 				c = *bp;
109a1df81e7SBaptiste Daroussin 				*bp++ = *fp;
110a1df81e7SBaptiste Daroussin 				*fp-- = c;
111a1df81e7SBaptiste Daroussin 			}
112a1df81e7SBaptiste Daroussin 			w1 = tr1;
113a1df81e7SBaptiste Daroussin 			w2 = tr2;
114a1df81e7SBaptiste Daroussin 		}
115fd4f1dd9STim J. Robbins 
116a1df81e7SBaptiste Daroussin 		if (direc & DIRECTIVE_POSITION) {
117c48dc2a1SBaptiste Daroussin 			int check1, check2;
11876e6db68SBaptiste Daroussin 			while (*w1 && *w2) {
119a1df81e7SBaptiste Daroussin 				pri1 = pri2 = 0;
12076e6db68SBaptiste Daroussin 				check1 = check2 = 1;
12176e6db68SBaptiste Daroussin 				while ((pri1 == pri2) && (check1 || check2)) {
12276e6db68SBaptiste Daroussin 					if (check1) {
12376e6db68SBaptiste Daroussin 						_collate_lookup(table, w1, &len1,
12476e6db68SBaptiste Daroussin 						    &pri1, pass, &st1);
125a1df81e7SBaptiste Daroussin 						if (pri1 < 0) {
126a1df81e7SBaptiste Daroussin 							errno = EINVAL;
127c48dc2a1SBaptiste Daroussin 							goto end;
128a1df81e7SBaptiste Daroussin 						}
12976e6db68SBaptiste Daroussin 						if (!pri1) {
130a1df81e7SBaptiste Daroussin 							pri1 = COLLATE_MAX_PRIORITY;
13176e6db68SBaptiste Daroussin 							st1 = NULL;
132a1df81e7SBaptiste Daroussin 						}
13376e6db68SBaptiste Daroussin 						check1 = (st1 != NULL);
13476e6db68SBaptiste Daroussin 					}
13576e6db68SBaptiste Daroussin 					if (check2) {
13676e6db68SBaptiste Daroussin 						_collate_lookup(table, w2, &len2,
13776e6db68SBaptiste Daroussin 						    &pri2, pass, &st2);
138a1df81e7SBaptiste Daroussin 						if (pri2 < 0) {
139a1df81e7SBaptiste Daroussin 							errno = EINVAL;
140c48dc2a1SBaptiste Daroussin 							goto end;
141a1df81e7SBaptiste Daroussin 						}
14276e6db68SBaptiste Daroussin 						if (!pri2) {
143a1df81e7SBaptiste Daroussin 							pri2 = COLLATE_MAX_PRIORITY;
14476e6db68SBaptiste Daroussin 							st2 = NULL;
14576e6db68SBaptiste Daroussin 						}
14676e6db68SBaptiste Daroussin 						check2 = (st2 != NULL);
14776e6db68SBaptiste Daroussin 					}
148a1df81e7SBaptiste Daroussin 				}
149a1df81e7SBaptiste Daroussin 				if (pri1 != pri2) {
150a1df81e7SBaptiste Daroussin 					ret = pri1 - pri2;
151a1df81e7SBaptiste Daroussin 					goto end;
152a1df81e7SBaptiste Daroussin 				}
153a1df81e7SBaptiste Daroussin 				w1 += len1;
154a1df81e7SBaptiste Daroussin 				w2 += len2;
155a1df81e7SBaptiste Daroussin 			}
156a1df81e7SBaptiste Daroussin 			if (!*w1) {
157a1df81e7SBaptiste Daroussin 				if (*w2) {
158a1df81e7SBaptiste Daroussin 					ret = -(int)*w2;
159a1df81e7SBaptiste Daroussin 					goto end;
160a1df81e7SBaptiste Daroussin 				}
161a1df81e7SBaptiste Daroussin 			} else {
162a1df81e7SBaptiste Daroussin 				ret = *w1;
163a1df81e7SBaptiste Daroussin 				goto end;
164a1df81e7SBaptiste Daroussin 			}
165c48dc2a1SBaptiste Daroussin 		} else {
166c48dc2a1SBaptiste Daroussin 			int vpri1 = 0, vpri2 = 0;
167c48dc2a1SBaptiste Daroussin 			while (*w1 || *w2 || st1 || st2) {
168c48dc2a1SBaptiste Daroussin 				pri1 = 1;
169c48dc2a1SBaptiste Daroussin 				while (*w1 || st1) {
170c48dc2a1SBaptiste Daroussin 					_collate_lookup(table, w1, &len1, &pri1,
171c48dc2a1SBaptiste Daroussin 					    pass, &st1);
172c48dc2a1SBaptiste Daroussin 					w1 += len1;
173c48dc2a1SBaptiste Daroussin 					if (pri1 > 0) {
174c48dc2a1SBaptiste Daroussin 						vpri1++;
175c48dc2a1SBaptiste Daroussin 						break;
176c48dc2a1SBaptiste Daroussin 					}
177c48dc2a1SBaptiste Daroussin 
178c48dc2a1SBaptiste Daroussin 					if (pri1 < 0) {
179c48dc2a1SBaptiste Daroussin 						errno = EINVAL;
180c48dc2a1SBaptiste Daroussin 						goto end;
181c48dc2a1SBaptiste Daroussin 					}
182c48dc2a1SBaptiste Daroussin 					st1 = NULL;
183c48dc2a1SBaptiste Daroussin 				}
184c48dc2a1SBaptiste Daroussin 				pri2 = 1;
185c48dc2a1SBaptiste Daroussin 				while (*w2 || st2) {
186c48dc2a1SBaptiste Daroussin 					_collate_lookup(table, w2, &len2, &pri2,
187c48dc2a1SBaptiste Daroussin 					    pass, &st2);
188c48dc2a1SBaptiste Daroussin 					w2 += len2;
189c48dc2a1SBaptiste Daroussin 					if (pri2 > 0) {
190c48dc2a1SBaptiste Daroussin 						vpri2++;
191c48dc2a1SBaptiste Daroussin 						break;
192c48dc2a1SBaptiste Daroussin 					}
193c48dc2a1SBaptiste Daroussin 					if (pri2 < 0) {
194c48dc2a1SBaptiste Daroussin 						errno = EINVAL;
195c48dc2a1SBaptiste Daroussin 						goto end;
196c48dc2a1SBaptiste Daroussin 					}
197c48dc2a1SBaptiste Daroussin 					st2 = NULL;
198c48dc2a1SBaptiste Daroussin 				}
199c48dc2a1SBaptiste Daroussin 				if ((!pri1 || !pri2) && (vpri1 == vpri2))
200c48dc2a1SBaptiste Daroussin 					break;
201c48dc2a1SBaptiste Daroussin 				if (pri1 != pri2) {
202c48dc2a1SBaptiste Daroussin 					ret = pri1 - pri2;
203c48dc2a1SBaptiste Daroussin 					goto end;
204c48dc2a1SBaptiste Daroussin 				}
205c48dc2a1SBaptiste Daroussin 			}
206c48dc2a1SBaptiste Daroussin 			if (vpri1 && !vpri2) {
207c48dc2a1SBaptiste Daroussin 				ret = 1;
208c48dc2a1SBaptiste Daroussin 				goto end;
209c48dc2a1SBaptiste Daroussin 			}
210c48dc2a1SBaptiste Daroussin 			if (!vpri1 && vpri2) {
211c48dc2a1SBaptiste Daroussin 				ret = -1;
212c48dc2a1SBaptiste Daroussin 				goto end;
213c48dc2a1SBaptiste Daroussin 			}
214c48dc2a1SBaptiste Daroussin 		}
215a1df81e7SBaptiste Daroussin 	}
216a1df81e7SBaptiste Daroussin 	ret = 0;
217a1df81e7SBaptiste Daroussin 
218a1df81e7SBaptiste Daroussin end:
219a1df81e7SBaptiste Daroussin 	free(tr1);
220a1df81e7SBaptiste Daroussin 	free(tr2);
221a1df81e7SBaptiste Daroussin 
222a1df81e7SBaptiste Daroussin 	return (ret);
223fd4f1dd9STim J. Robbins }
224fd4f1dd9STim J. Robbins 
2253c87aa1dSDavid Chisnall int
2263c87aa1dSDavid Chisnall wcscoll(const wchar_t *ws1, const wchar_t *ws2)
2273c87aa1dSDavid Chisnall {
2283c87aa1dSDavid Chisnall 	return wcscoll_l(ws1, ws2, __get_locale());
2293c87aa1dSDavid Chisnall }
230