14297a3b0SGarrett D'Amore /*
2*2d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org>
36b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
44297a3b0SGarrett D'Amore * Copyright (c) 2002 Tim J. Robbins
54297a3b0SGarrett D'Amore * All rights reserved.
64297a3b0SGarrett D'Amore *
74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore * are met:
104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore *
164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore * SUCH DAMAGE.
274297a3b0SGarrett D'Amore */
284297a3b0SGarrett D'Amore
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <stdlib.h>
324297a3b0SGarrett D'Amore #include <string.h>
334297a3b0SGarrett D'Amore #include <wchar.h>
346b5e5868SGarrett D'Amore #include <assert.h>
354297a3b0SGarrett D'Amore #include "collate.h"
36*2d08521bSGarrett D'Amore #include "localeimpl.h"
374297a3b0SGarrett D'Amore
384297a3b0SGarrett D'Amore int
wcscoll_l(const wchar_t * ws1,const wchar_t * ws2,locale_t loc)39*2d08521bSGarrett D'Amore wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc)
404297a3b0SGarrett D'Amore {
416b5e5868SGarrett D'Amore int len1, len2, pri1, pri2, ret;
426b5e5868SGarrett D'Amore wchar_t *tr1 = NULL, *tr2 = NULL;
436b5e5868SGarrett D'Amore int direc, pass;
44*2d08521bSGarrett D'Amore const struct lc_collate *lcc = loc->collate;
454297a3b0SGarrett D'Amore
46*2d08521bSGarrett D'Amore if (lcc->lc_is_posix)
474297a3b0SGarrett D'Amore /*
486b5e5868SGarrett D'Amore * Locale has no special collating order or could not be
496b5e5868SGarrett D'Amore * loaded, do a fast binary comparison.
504297a3b0SGarrett D'Amore */
514297a3b0SGarrett D'Amore return (wcscmp(ws1, ws2));
524297a3b0SGarrett D'Amore
536b5e5868SGarrett D'Amore ret = 0;
546b5e5868SGarrett D'Amore
554297a3b0SGarrett D'Amore /*
566b5e5868SGarrett D'Amore * Once upon a time we had code to try to optimize this, but
576b5e5868SGarrett D'Amore * it turns out that you really can't make many assumptions
586b5e5868SGarrett D'Amore * safely. You absolutely have to run this pass by pass,
596b5e5868SGarrett D'Amore * because some passes will be ignored for a given character,
606b5e5868SGarrett D'Amore * while others will not. Simpler locales will benefit from
616b5e5868SGarrett D'Amore * having fewer passes, and most comparisions should resolve
626b5e5868SGarrett D'Amore * during the primary pass anyway.
636b5e5868SGarrett D'Amore *
646b5e5868SGarrett D'Amore * Note that we do one final extra pass at the end to pick
656b5e5868SGarrett D'Amore * up UNDEFINED elements. There is special handling for them.
664297a3b0SGarrett D'Amore */
67*2d08521bSGarrett D'Amore for (pass = 0; pass <= lcc->lc_directive_count; pass++) {
686b5e5868SGarrett D'Amore
69*2d08521bSGarrett D'Amore const int32_t *st1 = NULL;
70*2d08521bSGarrett D'Amore const int32_t *st2 = NULL;
716b5e5868SGarrett D'Amore const wchar_t *w1 = ws1;
726b5e5868SGarrett D'Amore const wchar_t *w2 = ws2;
736b5e5868SGarrett D'Amore
746b5e5868SGarrett D'Amore /* special pass for UNDEFINED */
75*2d08521bSGarrett D'Amore if (pass == lcc->lc_directive_count) {
766b5e5868SGarrett D'Amore direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
776b5e5868SGarrett D'Amore } else {
78*2d08521bSGarrett D'Amore direc = lcc->lc_directive[pass];
794297a3b0SGarrett D'Amore }
804297a3b0SGarrett D'Amore
816b5e5868SGarrett D'Amore if (direc & DIRECTIVE_BACKWARD) {
826b5e5868SGarrett D'Amore wchar_t *bp, *fp, c;
836b5e5868SGarrett D'Amore if ((tr1 = wcsdup(w1)) == NULL)
846b5e5868SGarrett D'Amore goto fail;
856b5e5868SGarrett D'Amore bp = tr1;
866b5e5868SGarrett D'Amore fp = tr1 + wcslen(tr1) - 1;
876b5e5868SGarrett D'Amore while (bp < fp) {
886b5e5868SGarrett D'Amore c = *bp;
896b5e5868SGarrett D'Amore *bp++ = *fp;
906b5e5868SGarrett D'Amore *fp-- = c;
916b5e5868SGarrett D'Amore }
926b5e5868SGarrett D'Amore if ((tr2 = wcsdup(w2)) == NULL)
936b5e5868SGarrett D'Amore goto fail;
946b5e5868SGarrett D'Amore bp = tr2;
956b5e5868SGarrett D'Amore fp = tr2 + wcslen(tr2) - 1;
966b5e5868SGarrett D'Amore while (bp < fp) {
976b5e5868SGarrett D'Amore c = *bp;
986b5e5868SGarrett D'Amore *bp++ = *fp;
996b5e5868SGarrett D'Amore *fp-- = c;
1006b5e5868SGarrett D'Amore }
1016b5e5868SGarrett D'Amore w1 = tr1;
1026b5e5868SGarrett D'Amore w2 = tr2;
1034297a3b0SGarrett D'Amore }
1044297a3b0SGarrett D'Amore
1056b5e5868SGarrett D'Amore if (direc & DIRECTIVE_POSITION) {
1066b5e5868SGarrett D'Amore while ((*w1 || st1) && (*w2 || st2)) {
1076b5e5868SGarrett D'Amore pri1 = pri2 = 0;
108*2d08521bSGarrett D'Amore _collate_lookup(lcc, w1, &len1, &pri1, pass,
109*2d08521bSGarrett D'Amore &st1);
1106b5e5868SGarrett D'Amore if (pri1 <= 0) {
1116b5e5868SGarrett D'Amore if (pri1 < 0) {
1126b5e5868SGarrett D'Amore errno = EINVAL;
1136b5e5868SGarrett D'Amore goto fail;
1146b5e5868SGarrett D'Amore }
1156b5e5868SGarrett D'Amore pri1 = COLLATE_MAX_PRIORITY;
1166b5e5868SGarrett D'Amore }
117*2d08521bSGarrett D'Amore _collate_lookup(lcc, w2, &len2, &pri2, pass,
118*2d08521bSGarrett D'Amore &st2);
1196b5e5868SGarrett D'Amore if (pri2 <= 0) {
1206b5e5868SGarrett D'Amore if (pri2 < 0) {
1216b5e5868SGarrett D'Amore errno = EINVAL;
1226b5e5868SGarrett D'Amore goto fail;
1236b5e5868SGarrett D'Amore }
1246b5e5868SGarrett D'Amore pri2 = COLLATE_MAX_PRIORITY;
1256b5e5868SGarrett D'Amore }
1266b5e5868SGarrett D'Amore if (pri1 != pri2) {
1276b5e5868SGarrett D'Amore ret = pri1 - pri2;
1286b5e5868SGarrett D'Amore goto end;
1296b5e5868SGarrett D'Amore }
1306b5e5868SGarrett D'Amore w1 += len1;
1316b5e5868SGarrett D'Amore w2 += len2;
1326b5e5868SGarrett D'Amore }
1336b5e5868SGarrett D'Amore } else {
1346b5e5868SGarrett D'Amore while ((*w1 || st1) && (*w2 || st2)) {
1356b5e5868SGarrett D'Amore pri1 = pri2 = 0;
1366b5e5868SGarrett D'Amore while (*w1) {
137*2d08521bSGarrett D'Amore _collate_lookup(lcc, w1, &len1,
1386b5e5868SGarrett D'Amore &pri1, pass, &st1);
1396b5e5868SGarrett D'Amore if (pri1 > 0)
1406b5e5868SGarrett D'Amore break;
1416b5e5868SGarrett D'Amore if (pri1 < 0) {
1426b5e5868SGarrett D'Amore errno = EINVAL;
1436b5e5868SGarrett D'Amore goto fail;
1446b5e5868SGarrett D'Amore }
1456b5e5868SGarrett D'Amore w1 += len1;
1466b5e5868SGarrett D'Amore }
1476b5e5868SGarrett D'Amore while (*w2) {
148*2d08521bSGarrett D'Amore _collate_lookup(lcc, w2, &len2,
1496b5e5868SGarrett D'Amore &pri2, pass, &st2);
1506b5e5868SGarrett D'Amore if (pri2 > 0)
1516b5e5868SGarrett D'Amore break;
1526b5e5868SGarrett D'Amore if (pri2 < 0) {
1536b5e5868SGarrett D'Amore errno = EINVAL;
1546b5e5868SGarrett D'Amore goto fail;
1556b5e5868SGarrett D'Amore }
1566b5e5868SGarrett D'Amore w2 += len2;
1576b5e5868SGarrett D'Amore }
1586b5e5868SGarrett D'Amore if (!pri1 || !pri2)
1596b5e5868SGarrett D'Amore break;
1606b5e5868SGarrett D'Amore if (pri1 != pri2) {
1616b5e5868SGarrett D'Amore ret = pri1 - pri2;
1626b5e5868SGarrett D'Amore goto end;
1636b5e5868SGarrett D'Amore }
1646b5e5868SGarrett D'Amore w1 += len1;
1656b5e5868SGarrett D'Amore w2 += len2;
1666b5e5868SGarrett D'Amore }
1676b5e5868SGarrett D'Amore }
1686b5e5868SGarrett D'Amore if (!*w1) {
1696b5e5868SGarrett D'Amore if (*w2) {
1706b5e5868SGarrett D'Amore ret = -(int)*w2;
1716b5e5868SGarrett D'Amore goto end;
1726b5e5868SGarrett D'Amore }
1736b5e5868SGarrett D'Amore } else {
1746b5e5868SGarrett D'Amore ret = *w1;
1756b5e5868SGarrett D'Amore goto end;
1766b5e5868SGarrett D'Amore }
1776b5e5868SGarrett D'Amore }
1786b5e5868SGarrett D'Amore ret = 0;
1794297a3b0SGarrett D'Amore
1806b5e5868SGarrett D'Amore end:
1816b5e5868SGarrett D'Amore if (tr1)
1826b5e5868SGarrett D'Amore free(tr1);
1836b5e5868SGarrett D'Amore if (tr2)
1846b5e5868SGarrett D'Amore free(tr2);
1854297a3b0SGarrett D'Amore
1866b5e5868SGarrett D'Amore return (ret);
1876b5e5868SGarrett D'Amore
1886b5e5868SGarrett D'Amore fail:
1896b5e5868SGarrett D'Amore ret = wcscmp(ws1, ws2);
1906b5e5868SGarrett D'Amore goto end;
1914297a3b0SGarrett D'Amore }
192*2d08521bSGarrett D'Amore
193*2d08521bSGarrett D'Amore
194*2d08521bSGarrett D'Amore int
wcscoll(const wchar_t * ws1,const wchar_t * ws2)195*2d08521bSGarrett D'Amore wcscoll(const wchar_t *ws1, const wchar_t *ws2)
196*2d08521bSGarrett D'Amore {
197*2d08521bSGarrett D'Amore return (wcscoll_l(ws1, ws2, uselocale(NULL)));
198*2d08521bSGarrett D'Amore }
199