xref: /titanic_51/usr/src/lib/libc/port/locale/wcscoll.c (revision 2d08521bd15501c8370ba2153b9cca4f094979d0)
14297a3b0SGarrett D'Amore /*
2*2d08521bSGarrett D'Amore  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
36b5e5868SGarrett D'Amore  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
44297a3b0SGarrett D'Amore  * Copyright (c) 2002 Tim J. Robbins
54297a3b0SGarrett D'Amore  * All rights reserved.
64297a3b0SGarrett D'Amore  *
74297a3b0SGarrett D'Amore  * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore  * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore  * are met:
104297a3b0SGarrett D'Amore  * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore  * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore  *    documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore  *
164297a3b0SGarrett D'Amore  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore  * SUCH DAMAGE.
274297a3b0SGarrett D'Amore  */
284297a3b0SGarrett D'Amore 
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <stdlib.h>
324297a3b0SGarrett D'Amore #include <string.h>
334297a3b0SGarrett D'Amore #include <wchar.h>
346b5e5868SGarrett D'Amore #include <assert.h>
354297a3b0SGarrett D'Amore #include "collate.h"
36*2d08521bSGarrett D'Amore #include "localeimpl.h"
374297a3b0SGarrett D'Amore 
384297a3b0SGarrett D'Amore int
39*2d08521bSGarrett D'Amore wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc)
404297a3b0SGarrett D'Amore {
416b5e5868SGarrett D'Amore 	int len1, len2, pri1, pri2, ret;
426b5e5868SGarrett D'Amore 	wchar_t *tr1 = NULL, *tr2 = NULL;
436b5e5868SGarrett D'Amore 	int direc, pass;
44*2d08521bSGarrett D'Amore 	const struct lc_collate *lcc = loc->collate;
454297a3b0SGarrett D'Amore 
46*2d08521bSGarrett D'Amore 	if (lcc->lc_is_posix)
474297a3b0SGarrett D'Amore 		/*
486b5e5868SGarrett D'Amore 		 * Locale has no special collating order or could not be
496b5e5868SGarrett D'Amore 		 * loaded, do a fast binary comparison.
504297a3b0SGarrett D'Amore 		 */
514297a3b0SGarrett D'Amore 		return (wcscmp(ws1, ws2));
524297a3b0SGarrett D'Amore 
536b5e5868SGarrett D'Amore 	ret = 0;
546b5e5868SGarrett D'Amore 
554297a3b0SGarrett D'Amore 	/*
566b5e5868SGarrett D'Amore 	 * Once upon a time we had code to try to optimize this, but
576b5e5868SGarrett D'Amore 	 * it turns out that you really can't make many assumptions
586b5e5868SGarrett D'Amore 	 * safely.  You absolutely have to run this pass by pass,
596b5e5868SGarrett D'Amore 	 * because some passes will be ignored for a given character,
606b5e5868SGarrett D'Amore 	 * while others will not.  Simpler locales will benefit from
616b5e5868SGarrett D'Amore 	 * having fewer passes, and most comparisions should resolve
626b5e5868SGarrett D'Amore 	 * during the primary pass anyway.
636b5e5868SGarrett D'Amore 	 *
646b5e5868SGarrett D'Amore 	 * Note that we do one final extra pass at the end to pick
656b5e5868SGarrett D'Amore 	 * up UNDEFINED elements.  There is special handling for them.
664297a3b0SGarrett D'Amore 	 */
67*2d08521bSGarrett D'Amore 	for (pass = 0; pass <= lcc->lc_directive_count; pass++) {
686b5e5868SGarrett D'Amore 
69*2d08521bSGarrett D'Amore 		const int32_t *st1 = NULL;
70*2d08521bSGarrett D'Amore 		const int32_t *st2 = NULL;
716b5e5868SGarrett D'Amore 		const wchar_t	*w1 = ws1;
726b5e5868SGarrett D'Amore 		const wchar_t	*w2 = ws2;
736b5e5868SGarrett D'Amore 
746b5e5868SGarrett D'Amore 		/* special pass for UNDEFINED */
75*2d08521bSGarrett D'Amore 		if (pass == lcc->lc_directive_count) {
766b5e5868SGarrett D'Amore 			direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
776b5e5868SGarrett D'Amore 		} else {
78*2d08521bSGarrett D'Amore 			direc = lcc->lc_directive[pass];
794297a3b0SGarrett D'Amore 		}
804297a3b0SGarrett D'Amore 
816b5e5868SGarrett D'Amore 		if (direc & DIRECTIVE_BACKWARD) {
826b5e5868SGarrett D'Amore 			wchar_t *bp, *fp, c;
836b5e5868SGarrett D'Amore 			if ((tr1 = wcsdup(w1)) == NULL)
846b5e5868SGarrett D'Amore 				goto fail;
856b5e5868SGarrett D'Amore 			bp = tr1;
866b5e5868SGarrett D'Amore 			fp = tr1 + wcslen(tr1) - 1;
876b5e5868SGarrett D'Amore 			while (bp < fp) {
886b5e5868SGarrett D'Amore 				c = *bp;
896b5e5868SGarrett D'Amore 				*bp++ = *fp;
906b5e5868SGarrett D'Amore 				*fp-- = c;
916b5e5868SGarrett D'Amore 			}
926b5e5868SGarrett D'Amore 			if ((tr2 = wcsdup(w2)) == NULL)
936b5e5868SGarrett D'Amore 				goto fail;
946b5e5868SGarrett D'Amore 			bp = tr2;
956b5e5868SGarrett D'Amore 			fp = tr2 + wcslen(tr2) - 1;
966b5e5868SGarrett D'Amore 			while (bp < fp) {
976b5e5868SGarrett D'Amore 				c = *bp;
986b5e5868SGarrett D'Amore 				*bp++ = *fp;
996b5e5868SGarrett D'Amore 				*fp-- = c;
1006b5e5868SGarrett D'Amore 			}
1016b5e5868SGarrett D'Amore 			w1 = tr1;
1026b5e5868SGarrett D'Amore 			w2 = tr2;
1034297a3b0SGarrett D'Amore 		}
1044297a3b0SGarrett D'Amore 
1056b5e5868SGarrett D'Amore 		if (direc & DIRECTIVE_POSITION) {
1066b5e5868SGarrett D'Amore 			while ((*w1 || st1) && (*w2 || st2)) {
1076b5e5868SGarrett D'Amore 				pri1 = pri2 = 0;
108*2d08521bSGarrett D'Amore 				_collate_lookup(lcc, w1, &len1, &pri1, pass,
109*2d08521bSGarrett D'Amore 				    &st1);
1106b5e5868SGarrett D'Amore 				if (pri1 <= 0) {
1116b5e5868SGarrett D'Amore 					if (pri1 < 0) {
1126b5e5868SGarrett D'Amore 						errno = EINVAL;
1136b5e5868SGarrett D'Amore 						goto fail;
1146b5e5868SGarrett D'Amore 					}
1156b5e5868SGarrett D'Amore 					pri1 = COLLATE_MAX_PRIORITY;
1166b5e5868SGarrett D'Amore 				}
117*2d08521bSGarrett D'Amore 				_collate_lookup(lcc, w2, &len2, &pri2, pass,
118*2d08521bSGarrett D'Amore 				    &st2);
1196b5e5868SGarrett D'Amore 				if (pri2 <= 0) {
1206b5e5868SGarrett D'Amore 					if (pri2 < 0) {
1216b5e5868SGarrett D'Amore 						errno = EINVAL;
1226b5e5868SGarrett D'Amore 						goto fail;
1236b5e5868SGarrett D'Amore 					}
1246b5e5868SGarrett D'Amore 					pri2 = COLLATE_MAX_PRIORITY;
1256b5e5868SGarrett D'Amore 				}
1266b5e5868SGarrett D'Amore 				if (pri1 != pri2) {
1276b5e5868SGarrett D'Amore 					ret = pri1 - pri2;
1286b5e5868SGarrett D'Amore 					goto end;
1296b5e5868SGarrett D'Amore 				}
1306b5e5868SGarrett D'Amore 				w1 += len1;
1316b5e5868SGarrett D'Amore 				w2 += len2;
1326b5e5868SGarrett D'Amore 			}
1336b5e5868SGarrett D'Amore 		} else {
1346b5e5868SGarrett D'Amore 			while ((*w1 || st1) && (*w2 || st2)) {
1356b5e5868SGarrett D'Amore 				pri1 = pri2 = 0;
1366b5e5868SGarrett D'Amore 				while (*w1) {
137*2d08521bSGarrett D'Amore 					_collate_lookup(lcc, w1, &len1,
1386b5e5868SGarrett D'Amore 					    &pri1, pass, &st1);
1396b5e5868SGarrett D'Amore 					if (pri1 > 0)
1406b5e5868SGarrett D'Amore 						break;
1416b5e5868SGarrett D'Amore 					if (pri1 < 0) {
1426b5e5868SGarrett D'Amore 						errno = EINVAL;
1436b5e5868SGarrett D'Amore 						goto fail;
1446b5e5868SGarrett D'Amore 					}
1456b5e5868SGarrett D'Amore 					w1 += len1;
1466b5e5868SGarrett D'Amore 				}
1476b5e5868SGarrett D'Amore 				while (*w2) {
148*2d08521bSGarrett D'Amore 					_collate_lookup(lcc, w2, &len2,
1496b5e5868SGarrett D'Amore 					    &pri2, pass, &st2);
1506b5e5868SGarrett D'Amore 					if (pri2 > 0)
1516b5e5868SGarrett D'Amore 						break;
1526b5e5868SGarrett D'Amore 					if (pri2 < 0) {
1536b5e5868SGarrett D'Amore 						errno = EINVAL;
1546b5e5868SGarrett D'Amore 						goto fail;
1556b5e5868SGarrett D'Amore 					}
1566b5e5868SGarrett D'Amore 					w2 += len2;
1576b5e5868SGarrett D'Amore 				}
1586b5e5868SGarrett D'Amore 				if (!pri1 || !pri2)
1596b5e5868SGarrett D'Amore 					break;
1606b5e5868SGarrett D'Amore 				if (pri1 != pri2) {
1616b5e5868SGarrett D'Amore 					ret = pri1 - pri2;
1626b5e5868SGarrett D'Amore 					goto end;
1636b5e5868SGarrett D'Amore 				}
1646b5e5868SGarrett D'Amore 				w1 += len1;
1656b5e5868SGarrett D'Amore 				w2 += len2;
1666b5e5868SGarrett D'Amore 			}
1676b5e5868SGarrett D'Amore 		}
1686b5e5868SGarrett D'Amore 		if (!*w1) {
1696b5e5868SGarrett D'Amore 			if (*w2) {
1706b5e5868SGarrett D'Amore 				ret = -(int)*w2;
1716b5e5868SGarrett D'Amore 				goto end;
1726b5e5868SGarrett D'Amore 			}
1736b5e5868SGarrett D'Amore 		} else {
1746b5e5868SGarrett D'Amore 			ret = *w1;
1756b5e5868SGarrett D'Amore 			goto end;
1766b5e5868SGarrett D'Amore 		}
1776b5e5868SGarrett D'Amore 	}
1786b5e5868SGarrett D'Amore 	ret = 0;
1794297a3b0SGarrett D'Amore 
1806b5e5868SGarrett D'Amore end:
1816b5e5868SGarrett D'Amore 	if (tr1)
1826b5e5868SGarrett D'Amore 		free(tr1);
1836b5e5868SGarrett D'Amore 	if (tr2)
1846b5e5868SGarrett D'Amore 		free(tr2);
1854297a3b0SGarrett D'Amore 
1866b5e5868SGarrett D'Amore 	return (ret);
1876b5e5868SGarrett D'Amore 
1886b5e5868SGarrett D'Amore fail:
1896b5e5868SGarrett D'Amore 	ret = wcscmp(ws1, ws2);
1906b5e5868SGarrett D'Amore 	goto end;
1914297a3b0SGarrett D'Amore }
192*2d08521bSGarrett D'Amore 
193*2d08521bSGarrett D'Amore 
194*2d08521bSGarrett D'Amore int
195*2d08521bSGarrett D'Amore wcscoll(const wchar_t *ws1, const wchar_t *ws2)
196*2d08521bSGarrett D'Amore {
197*2d08521bSGarrett D'Amore 	return (wcscoll_l(ws1, ws2, uselocale(NULL)));
198*2d08521bSGarrett D'Amore }
199