xref: /freebsd/lib/libc/string/wcscoll.c (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright 2017 Nexenta Systems, Inc.
5  * Copyright (c) 2002 Tim J. Robbins
6  * All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <errno.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <wchar.h>
39 #include "collate.h"
40 
41 int
42 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
43 {
44 	int len1, len2, pri1, pri2;
45 	wchar_t *tr1 = NULL, *tr2 = NULL;
46 	int direc, pass;
47 	int ret = wcscmp(ws1, ws2);
48 
49 	FIX_LOCALE(locale);
50 	struct xlocale_collate *table =
51 		(struct xlocale_collate*)locale->components[XLC_COLLATE];
52 
53 	if (table->__collate_load_error || ret == 0)
54 		return (ret);
55 
56 	if (*ws1 == 0 && *ws2 != 0)
57 		return (-1);
58 	if (*ws1 != 0 && *ws2 == 0)
59 		return (1);
60 
61 	/*
62 	 * Once upon a time we had code to try to optimize this, but
63 	 * it turns out that you really can't make many assumptions
64 	 * safely.  You absolutely have to run this pass by pass,
65 	 * because some passes will be ignored for a given character,
66 	 * while others will not.  Simpler locales will benefit from
67 	 * having fewer passes, and most comparisons should resolve
68 	 * during the primary pass anyway.
69 	 *
70 	 * Note that we do one final extra pass at the end to pick
71 	 * up UNDEFINED elements.  There is special handling for them.
72 	 */
73 	for (pass = 0; pass <= table->info->directive_count; pass++) {
74 
75 		const int32_t *st1 = NULL;
76 		const int32_t *st2 = NULL;
77 		const wchar_t	*w1 = ws1;
78 		const wchar_t	*w2 = ws2;
79 
80 		/* special pass for UNDEFINED */
81 		if (pass == table->info->directive_count) {
82 			direc = DIRECTIVE_FORWARD;
83 		} else {
84 			direc = table->info->directive[pass];
85 		}
86 
87 		if (direc & DIRECTIVE_BACKWARD) {
88 			wchar_t *bp, *fp, c;
89 			free(tr1);
90 			if ((tr1 = wcsdup(w1)) == NULL)
91 				goto end;
92 			bp = tr1;
93 			fp = tr1 + wcslen(tr1) - 1;
94 			while (bp < fp) {
95 				c = *bp;
96 				*bp++ = *fp;
97 				*fp-- = c;
98 			}
99 			free(tr2);
100 			if ((tr2 = wcsdup(w2)) == NULL)
101 				goto end;
102 			bp = tr2;
103 			fp = tr2 + wcslen(tr2) - 1;
104 			while (bp < fp) {
105 				c = *bp;
106 				*bp++ = *fp;
107 				*fp-- = c;
108 			}
109 			w1 = tr1;
110 			w2 = tr2;
111 		}
112 
113 		if (direc & DIRECTIVE_POSITION) {
114 			int check1, check2;
115 			while (*w1 && *w2) {
116 				pri1 = pri2 = 0;
117 				check1 = check2 = 1;
118 				while ((pri1 == pri2) && (check1 || check2)) {
119 					if (check1) {
120 						_collate_lookup(table, w1, &len1,
121 						    &pri1, pass, &st1);
122 						if (pri1 < 0) {
123 							errno = EINVAL;
124 							goto end;
125 						}
126 						if (!pri1) {
127 							pri1 = COLLATE_MAX_PRIORITY;
128 							st1 = NULL;
129 						}
130 						check1 = (st1 != NULL);
131 					}
132 					if (check2) {
133 						_collate_lookup(table, w2, &len2,
134 						    &pri2, pass, &st2);
135 						if (pri2 < 0) {
136 							errno = EINVAL;
137 							goto end;
138 						}
139 						if (!pri2) {
140 							pri2 = COLLATE_MAX_PRIORITY;
141 							st2 = NULL;
142 						}
143 						check2 = (st2 != NULL);
144 					}
145 				}
146 				if (pri1 != pri2) {
147 					ret = pri1 - pri2;
148 					goto end;
149 				}
150 				w1 += len1;
151 				w2 += len2;
152 			}
153 			if (!*w1) {
154 				if (*w2) {
155 					ret = -(int)*w2;
156 					goto end;
157 				}
158 			} else {
159 				ret = *w1;
160 				goto end;
161 			}
162 		} else {
163 			int vpri1 = 0, vpri2 = 0;
164 			while (*w1 || *w2 || st1 || st2) {
165 				pri1 = 1;
166 				while (*w1 || st1) {
167 					_collate_lookup(table, w1, &len1, &pri1,
168 					    pass, &st1);
169 					w1 += len1;
170 					if (pri1 > 0) {
171 						vpri1++;
172 						break;
173 					}
174 
175 					if (pri1 < 0) {
176 						errno = EINVAL;
177 						goto end;
178 					}
179 					st1 = NULL;
180 				}
181 				pri2 = 1;
182 				while (*w2 || st2) {
183 					_collate_lookup(table, w2, &len2, &pri2,
184 					    pass, &st2);
185 					w2 += len2;
186 					if (pri2 > 0) {
187 						vpri2++;
188 						break;
189 					}
190 					if (pri2 < 0) {
191 						errno = EINVAL;
192 						goto end;
193 					}
194 					st2 = NULL;
195 				}
196 				if ((!pri1 || !pri2) && (vpri1 == vpri2))
197 					break;
198 				if (pri1 != pri2) {
199 					ret = pri1 - pri2;
200 					goto end;
201 				}
202 			}
203 			if (vpri1 && !vpri2) {
204 				ret = 1;
205 				goto end;
206 			}
207 			if (!vpri1 && vpri2) {
208 				ret = -1;
209 				goto end;
210 			}
211 		}
212 	}
213 	ret = 0;
214 
215 end:
216 	free(tr1);
217 	free(tr2);
218 
219 	return (ret);
220 }
221 
222 int
223 wcscoll(const wchar_t *ws1, const wchar_t *ws2)
224 {
225 	return wcscoll_l(ws1, ws2, __get_locale());
226 }
227