14297a3b0SGarrett D'Amore /* 2*2d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org> 36b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 44297a3b0SGarrett D'Amore * Copyright (c) 2002 Tim J. Robbins 54297a3b0SGarrett D'Amore * All rights reserved. 64297a3b0SGarrett D'Amore * 74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 94297a3b0SGarrett D'Amore * are met: 104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 154297a3b0SGarrett D'Amore * 164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 264297a3b0SGarrett D'Amore * SUCH DAMAGE. 274297a3b0SGarrett D'Amore */ 284297a3b0SGarrett D'Amore 294297a3b0SGarrett D'Amore #include "lint.h" 304297a3b0SGarrett D'Amore #include <errno.h> 314297a3b0SGarrett D'Amore #include <stdlib.h> 324297a3b0SGarrett D'Amore #include <string.h> 334297a3b0SGarrett D'Amore #include <wchar.h> 346b5e5868SGarrett D'Amore #include <assert.h> 354297a3b0SGarrett D'Amore #include "collate.h" 36*2d08521bSGarrett D'Amore #include "localeimpl.h" 374297a3b0SGarrett D'Amore 384297a3b0SGarrett D'Amore int 39*2d08521bSGarrett D'Amore wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) 404297a3b0SGarrett D'Amore { 416b5e5868SGarrett D'Amore int len1, len2, pri1, pri2, ret; 426b5e5868SGarrett D'Amore wchar_t *tr1 = NULL, *tr2 = NULL; 436b5e5868SGarrett D'Amore int direc, pass; 44*2d08521bSGarrett D'Amore const struct lc_collate *lcc = loc->collate; 454297a3b0SGarrett D'Amore 46*2d08521bSGarrett D'Amore if (lcc->lc_is_posix) 474297a3b0SGarrett D'Amore /* 486b5e5868SGarrett D'Amore * Locale has no special collating order or could not be 496b5e5868SGarrett D'Amore * loaded, do a fast binary comparison. 504297a3b0SGarrett D'Amore */ 514297a3b0SGarrett D'Amore return (wcscmp(ws1, ws2)); 524297a3b0SGarrett D'Amore 536b5e5868SGarrett D'Amore ret = 0; 546b5e5868SGarrett D'Amore 554297a3b0SGarrett D'Amore /* 566b5e5868SGarrett D'Amore * Once upon a time we had code to try to optimize this, but 576b5e5868SGarrett D'Amore * it turns out that you really can't make many assumptions 586b5e5868SGarrett D'Amore * safely. You absolutely have to run this pass by pass, 596b5e5868SGarrett D'Amore * because some passes will be ignored for a given character, 606b5e5868SGarrett D'Amore * while others will not. Simpler locales will benefit from 616b5e5868SGarrett D'Amore * having fewer passes, and most comparisions should resolve 626b5e5868SGarrett D'Amore * during the primary pass anyway. 636b5e5868SGarrett D'Amore * 646b5e5868SGarrett D'Amore * Note that we do one final extra pass at the end to pick 656b5e5868SGarrett D'Amore * up UNDEFINED elements. There is special handling for them. 664297a3b0SGarrett D'Amore */ 67*2d08521bSGarrett D'Amore for (pass = 0; pass <= lcc->lc_directive_count; pass++) { 686b5e5868SGarrett D'Amore 69*2d08521bSGarrett D'Amore const int32_t *st1 = NULL; 70*2d08521bSGarrett D'Amore const int32_t *st2 = NULL; 716b5e5868SGarrett D'Amore const wchar_t *w1 = ws1; 726b5e5868SGarrett D'Amore const wchar_t *w2 = ws2; 736b5e5868SGarrett D'Amore 746b5e5868SGarrett D'Amore /* special pass for UNDEFINED */ 75*2d08521bSGarrett D'Amore if (pass == lcc->lc_directive_count) { 766b5e5868SGarrett D'Amore direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 776b5e5868SGarrett D'Amore } else { 78*2d08521bSGarrett D'Amore direc = lcc->lc_directive[pass]; 794297a3b0SGarrett D'Amore } 804297a3b0SGarrett D'Amore 816b5e5868SGarrett D'Amore if (direc & DIRECTIVE_BACKWARD) { 826b5e5868SGarrett D'Amore wchar_t *bp, *fp, c; 836b5e5868SGarrett D'Amore if ((tr1 = wcsdup(w1)) == NULL) 846b5e5868SGarrett D'Amore goto fail; 856b5e5868SGarrett D'Amore bp = tr1; 866b5e5868SGarrett D'Amore fp = tr1 + wcslen(tr1) - 1; 876b5e5868SGarrett D'Amore while (bp < fp) { 886b5e5868SGarrett D'Amore c = *bp; 896b5e5868SGarrett D'Amore *bp++ = *fp; 906b5e5868SGarrett D'Amore *fp-- = c; 916b5e5868SGarrett D'Amore } 926b5e5868SGarrett D'Amore if ((tr2 = wcsdup(w2)) == NULL) 936b5e5868SGarrett D'Amore goto fail; 946b5e5868SGarrett D'Amore bp = tr2; 956b5e5868SGarrett D'Amore fp = tr2 + wcslen(tr2) - 1; 966b5e5868SGarrett D'Amore while (bp < fp) { 976b5e5868SGarrett D'Amore c = *bp; 986b5e5868SGarrett D'Amore *bp++ = *fp; 996b5e5868SGarrett D'Amore *fp-- = c; 1006b5e5868SGarrett D'Amore } 1016b5e5868SGarrett D'Amore w1 = tr1; 1026b5e5868SGarrett D'Amore w2 = tr2; 1034297a3b0SGarrett D'Amore } 1044297a3b0SGarrett D'Amore 1056b5e5868SGarrett D'Amore if (direc & DIRECTIVE_POSITION) { 1066b5e5868SGarrett D'Amore while ((*w1 || st1) && (*w2 || st2)) { 1076b5e5868SGarrett D'Amore pri1 = pri2 = 0; 108*2d08521bSGarrett D'Amore _collate_lookup(lcc, w1, &len1, &pri1, pass, 109*2d08521bSGarrett D'Amore &st1); 1106b5e5868SGarrett D'Amore if (pri1 <= 0) { 1116b5e5868SGarrett D'Amore if (pri1 < 0) { 1126b5e5868SGarrett D'Amore errno = EINVAL; 1136b5e5868SGarrett D'Amore goto fail; 1146b5e5868SGarrett D'Amore } 1156b5e5868SGarrett D'Amore pri1 = COLLATE_MAX_PRIORITY; 1166b5e5868SGarrett D'Amore } 117*2d08521bSGarrett D'Amore _collate_lookup(lcc, w2, &len2, &pri2, pass, 118*2d08521bSGarrett D'Amore &st2); 1196b5e5868SGarrett D'Amore if (pri2 <= 0) { 1206b5e5868SGarrett D'Amore if (pri2 < 0) { 1216b5e5868SGarrett D'Amore errno = EINVAL; 1226b5e5868SGarrett D'Amore goto fail; 1236b5e5868SGarrett D'Amore } 1246b5e5868SGarrett D'Amore pri2 = COLLATE_MAX_PRIORITY; 1256b5e5868SGarrett D'Amore } 1266b5e5868SGarrett D'Amore if (pri1 != pri2) { 1276b5e5868SGarrett D'Amore ret = pri1 - pri2; 1286b5e5868SGarrett D'Amore goto end; 1296b5e5868SGarrett D'Amore } 1306b5e5868SGarrett D'Amore w1 += len1; 1316b5e5868SGarrett D'Amore w2 += len2; 1326b5e5868SGarrett D'Amore } 1336b5e5868SGarrett D'Amore } else { 1346b5e5868SGarrett D'Amore while ((*w1 || st1) && (*w2 || st2)) { 1356b5e5868SGarrett D'Amore pri1 = pri2 = 0; 1366b5e5868SGarrett D'Amore while (*w1) { 137*2d08521bSGarrett D'Amore _collate_lookup(lcc, w1, &len1, 1386b5e5868SGarrett D'Amore &pri1, pass, &st1); 1396b5e5868SGarrett D'Amore if (pri1 > 0) 1406b5e5868SGarrett D'Amore break; 1416b5e5868SGarrett D'Amore if (pri1 < 0) { 1426b5e5868SGarrett D'Amore errno = EINVAL; 1436b5e5868SGarrett D'Amore goto fail; 1446b5e5868SGarrett D'Amore } 1456b5e5868SGarrett D'Amore w1 += len1; 1466b5e5868SGarrett D'Amore } 1476b5e5868SGarrett D'Amore while (*w2) { 148*2d08521bSGarrett D'Amore _collate_lookup(lcc, w2, &len2, 1496b5e5868SGarrett D'Amore &pri2, pass, &st2); 1506b5e5868SGarrett D'Amore if (pri2 > 0) 1516b5e5868SGarrett D'Amore break; 1526b5e5868SGarrett D'Amore if (pri2 < 0) { 1536b5e5868SGarrett D'Amore errno = EINVAL; 1546b5e5868SGarrett D'Amore goto fail; 1556b5e5868SGarrett D'Amore } 1566b5e5868SGarrett D'Amore w2 += len2; 1576b5e5868SGarrett D'Amore } 1586b5e5868SGarrett D'Amore if (!pri1 || !pri2) 1596b5e5868SGarrett D'Amore break; 1606b5e5868SGarrett D'Amore if (pri1 != pri2) { 1616b5e5868SGarrett D'Amore ret = pri1 - pri2; 1626b5e5868SGarrett D'Amore goto end; 1636b5e5868SGarrett D'Amore } 1646b5e5868SGarrett D'Amore w1 += len1; 1656b5e5868SGarrett D'Amore w2 += len2; 1666b5e5868SGarrett D'Amore } 1676b5e5868SGarrett D'Amore } 1686b5e5868SGarrett D'Amore if (!*w1) { 1696b5e5868SGarrett D'Amore if (*w2) { 1706b5e5868SGarrett D'Amore ret = -(int)*w2; 1716b5e5868SGarrett D'Amore goto end; 1726b5e5868SGarrett D'Amore } 1736b5e5868SGarrett D'Amore } else { 1746b5e5868SGarrett D'Amore ret = *w1; 1756b5e5868SGarrett D'Amore goto end; 1766b5e5868SGarrett D'Amore } 1776b5e5868SGarrett D'Amore } 1786b5e5868SGarrett D'Amore ret = 0; 1794297a3b0SGarrett D'Amore 1806b5e5868SGarrett D'Amore end: 1816b5e5868SGarrett D'Amore if (tr1) 1826b5e5868SGarrett D'Amore free(tr1); 1836b5e5868SGarrett D'Amore if (tr2) 1846b5e5868SGarrett D'Amore free(tr2); 1854297a3b0SGarrett D'Amore 1866b5e5868SGarrett D'Amore return (ret); 1876b5e5868SGarrett D'Amore 1886b5e5868SGarrett D'Amore fail: 1896b5e5868SGarrett D'Amore ret = wcscmp(ws1, ws2); 1906b5e5868SGarrett D'Amore goto end; 1914297a3b0SGarrett D'Amore } 192*2d08521bSGarrett D'Amore 193*2d08521bSGarrett D'Amore 194*2d08521bSGarrett D'Amore int 195*2d08521bSGarrett D'Amore wcscoll(const wchar_t *ws1, const wchar_t *ws2) 196*2d08521bSGarrett D'Amore { 197*2d08521bSGarrett D'Amore return (wcscoll_l(ws1, ws2, uselocale(NULL))); 198*2d08521bSGarrett D'Amore } 199