1fd4f1dd9STim J. Robbins /*- 2*a1df81e7SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3fd4f1dd9STim J. Robbins * Copyright (c) 2002 Tim J. Robbins 4fd4f1dd9STim J. Robbins * All rights reserved. 5fd4f1dd9STim J. Robbins * 63c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation 73c87aa1dSDavid Chisnall * All rights reserved. 83c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall 93c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation. 103c87aa1dSDavid Chisnall * 11fd4f1dd9STim J. Robbins * Redistribution and use in source and binary forms, with or without 12fd4f1dd9STim J. Robbins * modification, are permitted provided that the following conditions 13fd4f1dd9STim J. Robbins * are met: 14fd4f1dd9STim J. Robbins * 1. Redistributions of source code must retain the above copyright 15fd4f1dd9STim J. Robbins * notice, this list of conditions and the following disclaimer. 16fd4f1dd9STim J. Robbins * 2. Redistributions in binary form must reproduce the above copyright 17fd4f1dd9STim J. Robbins * notice, this list of conditions and the following disclaimer in the 18fd4f1dd9STim J. Robbins * documentation and/or other materials provided with the distribution. 19fd4f1dd9STim J. Robbins * 20fd4f1dd9STim J. Robbins * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21fd4f1dd9STim J. Robbins * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22fd4f1dd9STim J. Robbins * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23fd4f1dd9STim J. Robbins * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24fd4f1dd9STim J. Robbins * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25fd4f1dd9STim J. Robbins * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26fd4f1dd9STim J. Robbins * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27fd4f1dd9STim J. Robbins * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28fd4f1dd9STim J. Robbins * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29fd4f1dd9STim J. Robbins * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30fd4f1dd9STim J. Robbins * SUCH DAMAGE. 31fd4f1dd9STim J. Robbins */ 32fd4f1dd9STim J. Robbins 33fd4f1dd9STim J. Robbins #include <sys/cdefs.h> 34fd4f1dd9STim J. Robbins __FBSDID("$FreeBSD$"); 35fd4f1dd9STim J. Robbins 36fd4f1dd9STim J. Robbins #include <errno.h> 37fd4f1dd9STim J. Robbins #include <stdlib.h> 38fd4f1dd9STim J. Robbins #include <string.h> 39fd4f1dd9STim J. Robbins #include <wchar.h> 40fd4f1dd9STim J. Robbins #include "collate.h" 41fd4f1dd9STim J. Robbins 42fd4f1dd9STim J. Robbins int 433c87aa1dSDavid Chisnall wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale) 44fd4f1dd9STim J. Robbins { 45*a1df81e7SBaptiste Daroussin int len1, len2, pri1, pri2, ret; 46*a1df81e7SBaptiste Daroussin wchar_t *tr1 = NULL, *tr2 = NULL; 47*a1df81e7SBaptiste Daroussin int direc, pass; 48*a1df81e7SBaptiste Daroussin 493c87aa1dSDavid Chisnall FIX_LOCALE(locale); 503c87aa1dSDavid Chisnall struct xlocale_collate *table = 513c87aa1dSDavid Chisnall (struct xlocale_collate*)locale->components[XLC_COLLATE]; 52fd4f1dd9STim J. Robbins 53*a1df81e7SBaptiste Daroussin if (table->__collate_load_error) 54fd4f1dd9STim J. Robbins /* 55*a1df81e7SBaptiste Daroussin * Locale has no special collating order or could not be 56*a1df81e7SBaptiste Daroussin * loaded, do a fast binary comparison. 57fd4f1dd9STim J. Robbins */ 58fd4f1dd9STim J. Robbins return (wcscmp(ws1, ws2)); 59fd4f1dd9STim J. Robbins 60*a1df81e7SBaptiste Daroussin ret = 0; 61*a1df81e7SBaptiste Daroussin 62fd4f1dd9STim J. Robbins /* 63*a1df81e7SBaptiste Daroussin * Once upon a time we had code to try to optimize this, but 64*a1df81e7SBaptiste Daroussin * it turns out that you really can't make many assumptions 65*a1df81e7SBaptiste Daroussin * safely. You absolutely have to run this pass by pass, 66*a1df81e7SBaptiste Daroussin * because some passes will be ignored for a given character, 67*a1df81e7SBaptiste Daroussin * while others will not. Simpler locales will benefit from 68*a1df81e7SBaptiste Daroussin * having fewer passes, and most comparisions should resolve 69*a1df81e7SBaptiste Daroussin * during the primary pass anyway. 70*a1df81e7SBaptiste Daroussin * 71*a1df81e7SBaptiste Daroussin * Note that we do one final extra pass at the end to pick 72*a1df81e7SBaptiste Daroussin * up UNDEFINED elements. There is special handling for them. 73fd4f1dd9STim J. Robbins */ 74*a1df81e7SBaptiste Daroussin for (pass = 0; pass <= table->info->directive_count; pass++) { 75*a1df81e7SBaptiste Daroussin 76*a1df81e7SBaptiste Daroussin const int32_t *st1 = NULL; 77*a1df81e7SBaptiste Daroussin const int32_t *st2 = NULL; 78*a1df81e7SBaptiste Daroussin const wchar_t *w1 = ws1; 79*a1df81e7SBaptiste Daroussin const wchar_t *w2 = ws2; 80*a1df81e7SBaptiste Daroussin 81*a1df81e7SBaptiste Daroussin /* special pass for UNDEFINED */ 82*a1df81e7SBaptiste Daroussin if (pass == table->info->directive_count) { 83*a1df81e7SBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 84*a1df81e7SBaptiste Daroussin } else { 85*a1df81e7SBaptiste Daroussin direc = table->info->directive[pass]; 86fd4f1dd9STim J. Robbins } 87fd4f1dd9STim J. Robbins 88*a1df81e7SBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 89*a1df81e7SBaptiste Daroussin wchar_t *bp, *fp, c; 90*a1df81e7SBaptiste Daroussin if ((tr1 = wcsdup(w1)) == NULL) 91*a1df81e7SBaptiste Daroussin goto fail; 92*a1df81e7SBaptiste Daroussin bp = tr1; 93*a1df81e7SBaptiste Daroussin fp = tr1 + wcslen(tr1) - 1; 94*a1df81e7SBaptiste Daroussin while (bp < fp) { 95*a1df81e7SBaptiste Daroussin c = *bp; 96*a1df81e7SBaptiste Daroussin *bp++ = *fp; 97*a1df81e7SBaptiste Daroussin *fp-- = c; 98*a1df81e7SBaptiste Daroussin } 99*a1df81e7SBaptiste Daroussin if ((tr2 = wcsdup(w2)) == NULL) 100*a1df81e7SBaptiste Daroussin goto fail; 101*a1df81e7SBaptiste Daroussin bp = tr2; 102*a1df81e7SBaptiste Daroussin fp = tr2 + wcslen(tr2) - 1; 103*a1df81e7SBaptiste Daroussin while (bp < fp) { 104*a1df81e7SBaptiste Daroussin c = *bp; 105*a1df81e7SBaptiste Daroussin *bp++ = *fp; 106*a1df81e7SBaptiste Daroussin *fp-- = c; 107*a1df81e7SBaptiste Daroussin } 108*a1df81e7SBaptiste Daroussin w1 = tr1; 109*a1df81e7SBaptiste Daroussin w2 = tr2; 110*a1df81e7SBaptiste Daroussin } 111fd4f1dd9STim J. Robbins 112*a1df81e7SBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 113*a1df81e7SBaptiste Daroussin while ((*w1 || st1) && (*w2 || st2)) { 114*a1df81e7SBaptiste Daroussin pri1 = pri2 = 0; 115*a1df81e7SBaptiste Daroussin _collate_lookup(table, w1, &len1, &pri1, pass, 116*a1df81e7SBaptiste Daroussin &st1); 117*a1df81e7SBaptiste Daroussin if (pri1 <= 0) { 118*a1df81e7SBaptiste Daroussin if (pri1 < 0) { 119*a1df81e7SBaptiste Daroussin errno = EINVAL; 120*a1df81e7SBaptiste Daroussin goto fail; 121*a1df81e7SBaptiste Daroussin } 122*a1df81e7SBaptiste Daroussin pri1 = COLLATE_MAX_PRIORITY; 123*a1df81e7SBaptiste Daroussin } 124*a1df81e7SBaptiste Daroussin _collate_lookup(table, w2, &len2, &pri2, pass, 125*a1df81e7SBaptiste Daroussin &st2); 126*a1df81e7SBaptiste Daroussin if (pri2 <= 0) { 127*a1df81e7SBaptiste Daroussin if (pri2 < 0) { 128*a1df81e7SBaptiste Daroussin errno = EINVAL; 129*a1df81e7SBaptiste Daroussin goto fail; 130*a1df81e7SBaptiste Daroussin } 131*a1df81e7SBaptiste Daroussin pri2 = COLLATE_MAX_PRIORITY; 132*a1df81e7SBaptiste Daroussin } 133*a1df81e7SBaptiste Daroussin if (pri1 != pri2) { 134*a1df81e7SBaptiste Daroussin ret = pri1 - pri2; 135*a1df81e7SBaptiste Daroussin goto end; 136*a1df81e7SBaptiste Daroussin } 137*a1df81e7SBaptiste Daroussin w1 += len1; 138*a1df81e7SBaptiste Daroussin w2 += len2; 139*a1df81e7SBaptiste Daroussin } 140*a1df81e7SBaptiste Daroussin } else { 141*a1df81e7SBaptiste Daroussin while ((*w1 || st1) && (*w2 || st2)) { 142*a1df81e7SBaptiste Daroussin pri1 = pri2 = 0; 143*a1df81e7SBaptiste Daroussin while (*w1) { 144*a1df81e7SBaptiste Daroussin _collate_lookup(table, w1, &len1, 145*a1df81e7SBaptiste Daroussin &pri1, pass, &st1); 146*a1df81e7SBaptiste Daroussin if (pri1 > 0) 147*a1df81e7SBaptiste Daroussin break; 148*a1df81e7SBaptiste Daroussin if (pri1 < 0) { 149*a1df81e7SBaptiste Daroussin errno = EINVAL; 150*a1df81e7SBaptiste Daroussin goto fail; 151*a1df81e7SBaptiste Daroussin } 152*a1df81e7SBaptiste Daroussin w1 += len1; 153*a1df81e7SBaptiste Daroussin } 154*a1df81e7SBaptiste Daroussin while (*w2) { 155*a1df81e7SBaptiste Daroussin _collate_lookup(table, w2, &len2, 156*a1df81e7SBaptiste Daroussin &pri2, pass, &st2); 157*a1df81e7SBaptiste Daroussin if (pri2 > 0) 158*a1df81e7SBaptiste Daroussin break; 159*a1df81e7SBaptiste Daroussin if (pri2 < 0) { 160*a1df81e7SBaptiste Daroussin errno = EINVAL; 161*a1df81e7SBaptiste Daroussin goto fail; 162*a1df81e7SBaptiste Daroussin } 163*a1df81e7SBaptiste Daroussin w2 += len2; 164*a1df81e7SBaptiste Daroussin } 165*a1df81e7SBaptiste Daroussin if (!pri1 || !pri2) 166*a1df81e7SBaptiste Daroussin break; 167*a1df81e7SBaptiste Daroussin if (pri1 != pri2) { 168*a1df81e7SBaptiste Daroussin ret = pri1 - pri2; 169*a1df81e7SBaptiste Daroussin goto end; 170*a1df81e7SBaptiste Daroussin } 171*a1df81e7SBaptiste Daroussin w1 += len1; 172*a1df81e7SBaptiste Daroussin w2 += len2; 173*a1df81e7SBaptiste Daroussin } 174*a1df81e7SBaptiste Daroussin } 175*a1df81e7SBaptiste Daroussin if (!*w1) { 176*a1df81e7SBaptiste Daroussin if (*w2) { 177*a1df81e7SBaptiste Daroussin ret = -(int)*w2; 178*a1df81e7SBaptiste Daroussin goto end; 179*a1df81e7SBaptiste Daroussin } 180*a1df81e7SBaptiste Daroussin } else { 181*a1df81e7SBaptiste Daroussin ret = *w1; 182*a1df81e7SBaptiste Daroussin goto end; 183*a1df81e7SBaptiste Daroussin } 184*a1df81e7SBaptiste Daroussin } 185*a1df81e7SBaptiste Daroussin ret = 0; 186*a1df81e7SBaptiste Daroussin 187*a1df81e7SBaptiste Daroussin end: 188*a1df81e7SBaptiste Daroussin if (tr1) 189*a1df81e7SBaptiste Daroussin free(tr1); 190*a1df81e7SBaptiste Daroussin if (tr2) 191*a1df81e7SBaptiste Daroussin free(tr2); 192*a1df81e7SBaptiste Daroussin 193*a1df81e7SBaptiste Daroussin return (ret); 194*a1df81e7SBaptiste Daroussin 195*a1df81e7SBaptiste Daroussin fail: 196*a1df81e7SBaptiste Daroussin ret = wcscmp(ws1, ws2); 197*a1df81e7SBaptiste Daroussin goto end; 198fd4f1dd9STim J. Robbins } 199fd4f1dd9STim J. Robbins 2003c87aa1dSDavid Chisnall int 2013c87aa1dSDavid Chisnall wcscoll(const wchar_t *ws1, const wchar_t *ws2) 2023c87aa1dSDavid Chisnall { 2033c87aa1dSDavid Chisnall return wcscoll_l(ws1, ws2, __get_locale()); 2043c87aa1dSDavid Chisnall } 205