1c3d0cca4SAndrey A. Chernov /*- 2d915a14eSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3d915a14eSPedro F. Giffuni * 47b247341SBaptiste Daroussin * Copyright 2014 Garrett D'Amore <garrett@damore.org> 5332fe837SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 6c3d0cca4SAndrey A. Chernov * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 7c3d0cca4SAndrey A. Chernov * at Electronni Visti IA, Kiev, Ukraine. 8c3d0cca4SAndrey A. Chernov * All rights reserved. 9c3d0cca4SAndrey A. Chernov * 103c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation 113c87aa1dSDavid Chisnall * All rights reserved. 123c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall 133c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation. 143c87aa1dSDavid Chisnall * 15c3d0cca4SAndrey A. Chernov * Redistribution and use in source and binary forms, with or without 16c3d0cca4SAndrey A. Chernov * modification, are permitted provided that the following conditions 17c3d0cca4SAndrey A. Chernov * are met: 18c3d0cca4SAndrey A. Chernov * 1. Redistributions of source code must retain the above copyright 19c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer. 20c3d0cca4SAndrey A. Chernov * 2. Redistributions in binary form must reproduce the above copyright 21c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer in the 22c3d0cca4SAndrey A. Chernov * documentation and/or other materials provided with the distribution. 23c3d0cca4SAndrey A. Chernov * 24c3d0cca4SAndrey A. Chernov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 25c3d0cca4SAndrey A. Chernov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26c3d0cca4SAndrey A. Chernov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27c3d0cca4SAndrey A. Chernov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 28c3d0cca4SAndrey A. Chernov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29c3d0cca4SAndrey A. Chernov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30c3d0cca4SAndrey A. Chernov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31c3d0cca4SAndrey A. Chernov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32c3d0cca4SAndrey A. Chernov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33c3d0cca4SAndrey A. Chernov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34c3d0cca4SAndrey A. Chernov * SUCH DAMAGE. 352a6abeebSBaptiste Daroussin * 362a6abeebSBaptiste Daroussin * Adapted to xlocale by John Marino <draco@marino.st> 37c3d0cca4SAndrey A. Chernov */ 38c3d0cca4SAndrey A. Chernov 39333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 40333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 41333fc21eSDavid E. O'Brien 42d201fe46SDaniel Eischen #include "namespace.h" 43c25f5140SBaptiste Daroussin 44c25f5140SBaptiste Daroussin #include <sys/types.h> 45c25f5140SBaptiste Daroussin #include <sys/stat.h> 46c25f5140SBaptiste Daroussin #include <sys/mman.h> 47c25f5140SBaptiste Daroussin 48332fe837SBaptiste Daroussin #include <assert.h> 49c3d0cca4SAndrey A. Chernov #include <stdio.h> 50c3d0cca4SAndrey A. Chernov #include <stdlib.h> 51c3d0cca4SAndrey A. Chernov #include <string.h> 522a6abeebSBaptiste Daroussin #include <wchar.h> 53926f20c9SAndrey A. Chernov #include <errno.h> 54926f20c9SAndrey A. Chernov #include <unistd.h> 552a6abeebSBaptiste Daroussin #include <fcntl.h> 56d201fe46SDaniel Eischen #include "un-namespace.h" 57d201fe46SDaniel Eischen 58c3d0cca4SAndrey A. Chernov #include "collate.h" 5963407d34SAndrey A. Chernov #include "setlocale.h" 6076692b80SAndrey A. Chernov #include "ldpart.h" 61536451f9SBaptiste Daroussin #include "libc_private.h" 62c3d0cca4SAndrey A. Chernov 633c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_global_collate = { 642a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 653c87aa1dSDavid Chisnall }; 663c87aa1dSDavid Chisnall 673c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_C_collate = { 682a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 693c87aa1dSDavid Chisnall }; 70c3d0cca4SAndrey A. Chernov 71a6d2922cSBaptiste Daroussin static int 723c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 733c87aa1dSDavid Chisnall 743c87aa1dSDavid Chisnall static void 753c87aa1dSDavid Chisnall destruct_collate(void *t) 763c87aa1dSDavid Chisnall { 773c87aa1dSDavid Chisnall struct xlocale_collate *table = t; 782a6abeebSBaptiste Daroussin if (table->map && (table->maplen > 0)) { 792a6abeebSBaptiste Daroussin (void) munmap(table->map, table->maplen); 803c87aa1dSDavid Chisnall } 813c87aa1dSDavid Chisnall free(t); 823c87aa1dSDavid Chisnall } 833c87aa1dSDavid Chisnall 843c87aa1dSDavid Chisnall void * 852a6abeebSBaptiste Daroussin __collate_load(const char *encoding, __unused locale_t unused) 863c87aa1dSDavid Chisnall { 873c87aa1dSDavid Chisnall if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 883c87aa1dSDavid Chisnall return &__xlocale_C_collate; 893c87aa1dSDavid Chisnall } 903c87aa1dSDavid Chisnall struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); 913c87aa1dSDavid Chisnall table->header.header.destructor = destruct_collate; 923c87aa1dSDavid Chisnall // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing 933c87aa1dSDavid Chisnall // the caching outside of this section 943c87aa1dSDavid Chisnall if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 953c87aa1dSDavid Chisnall xlocale_release(table); 963c87aa1dSDavid Chisnall return NULL; 973c87aa1dSDavid Chisnall } 983c87aa1dSDavid Chisnall return table; 993c87aa1dSDavid Chisnall } 1003c87aa1dSDavid Chisnall 1013c87aa1dSDavid Chisnall /** 1023c87aa1dSDavid Chisnall * Load the collation tables for the specified encoding into the global table. 1033c87aa1dSDavid Chisnall */ 1043c87aa1dSDavid Chisnall int 10576692b80SAndrey A. Chernov __collate_load_tables(const char *encoding) 106c3d0cca4SAndrey A. Chernov { 1075e4bbc69SBaptiste Daroussin 1085e4bbc69SBaptiste Daroussin return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); 1093c87aa1dSDavid Chisnall } 1103c87aa1dSDavid Chisnall 1113c87aa1dSDavid Chisnall int 1123c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 1133c87aa1dSDavid Chisnall { 1142a6abeebSBaptiste Daroussin int i, chains, z; 115b89704ceSBaptiste Daroussin char *buf; 1162a6abeebSBaptiste Daroussin char *TMP; 1172a6abeebSBaptiste Daroussin char *map; 1182a6abeebSBaptiste Daroussin collate_info_t *info; 1192a6abeebSBaptiste Daroussin struct stat sbuf; 1202a6abeebSBaptiste Daroussin int fd; 121c3d0cca4SAndrey A. Chernov 122332fe837SBaptiste Daroussin table->__collate_load_error = 1; 123332fe837SBaptiste Daroussin 12476692b80SAndrey A. Chernov /* 'encoding' must be already checked. */ 12576692b80SAndrey A. Chernov if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 12676692b80SAndrey A. Chernov return (_LDP_CACHE); 127377da8e8SAndrey A. Chernov } 12876692b80SAndrey A. Chernov 129dc8507e1SBryan Drewery if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) 130b89704ceSBaptiste Daroussin return (_LDP_ERROR); 13176692b80SAndrey A. Chernov 13228a20bb3SBaptiste Daroussin if ((fd = _open(buf, O_RDONLY)) < 0) { 13328a20bb3SBaptiste Daroussin free(buf); 1342a6abeebSBaptiste Daroussin return (_LDP_ERROR); 13528a20bb3SBaptiste Daroussin } 136b89704ceSBaptiste Daroussin free(buf); 1372a6abeebSBaptiste Daroussin if (_fstat(fd, &sbuf) < 0) { 1382a6abeebSBaptiste Daroussin (void) _close(fd); 1398e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1408e52da4dSAndrey A. Chernov } 1412a6abeebSBaptiste Daroussin if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { 1422a6abeebSBaptiste Daroussin (void) _close(fd); 1432a6abeebSBaptiste Daroussin errno = EINVAL; 1448e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1458e52da4dSAndrey A. Chernov } 1462a6abeebSBaptiste Daroussin map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 1472a6abeebSBaptiste Daroussin (void) _close(fd); 1482a6abeebSBaptiste Daroussin if ((TMP = map) == NULL) { 1498e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1508e52da4dSAndrey A. Chernov } 1512a6abeebSBaptiste Daroussin 1522a6abeebSBaptiste Daroussin if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { 1532a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1542a6abeebSBaptiste Daroussin errno = EINVAL; 1558e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1568e52da4dSAndrey A. Chernov } 1572a6abeebSBaptiste Daroussin TMP += COLLATE_STR_LEN; 1582a6abeebSBaptiste Daroussin 1592a6abeebSBaptiste Daroussin info = (void *)TMP; 1602a6abeebSBaptiste Daroussin TMP += sizeof (*info); 1612a6abeebSBaptiste Daroussin 1622a6abeebSBaptiste Daroussin if ((info->directive_count < 1) || 1632a6abeebSBaptiste Daroussin (info->directive_count >= COLL_WEIGHTS_MAX) || 164*4644f9beSYuri Pankov ((chains = info->chain_count) < 0)) { 1652a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1662a6abeebSBaptiste Daroussin errno = EINVAL; 1672a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1682a6abeebSBaptiste Daroussin } 1692a6abeebSBaptiste Daroussin 1702a6abeebSBaptiste Daroussin i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 1712a6abeebSBaptiste Daroussin (sizeof (collate_chain_t) * chains) + 172*4644f9beSYuri Pankov (sizeof (collate_large_t) * info->large_count); 173332fe837SBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 174*4644f9beSYuri Pankov i += sizeof (collate_subst_t) * info->subst_count[z]; 1752a6abeebSBaptiste Daroussin } 1762a6abeebSBaptiste Daroussin if (i != (sbuf.st_size - (TMP - map))) { 1772a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1782a6abeebSBaptiste Daroussin errno = EINVAL; 1792a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1802a6abeebSBaptiste Daroussin } 1812a6abeebSBaptiste Daroussin 182332fe837SBaptiste Daroussin table->info = info; 1832a6abeebSBaptiste Daroussin table->char_pri_table = (void *)TMP; 1842a6abeebSBaptiste Daroussin TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 1852a6abeebSBaptiste Daroussin 1862a6abeebSBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 187*4644f9beSYuri Pankov if (info->subst_count[z] > 0) { 1882a6abeebSBaptiste Daroussin table->subst_table[z] = (void *)TMP; 189*4644f9beSYuri Pankov TMP += info->subst_count[z] * sizeof (collate_subst_t); 1902a6abeebSBaptiste Daroussin } else { 1912a6abeebSBaptiste Daroussin table->subst_table[z] = NULL; 1922a6abeebSBaptiste Daroussin } 1932a6abeebSBaptiste Daroussin } 1942a6abeebSBaptiste Daroussin 1952a6abeebSBaptiste Daroussin if (chains > 0) { 1962a6abeebSBaptiste Daroussin table->chain_pri_table = (void *)TMP; 1972a6abeebSBaptiste Daroussin TMP += chains * sizeof (collate_chain_t); 1988e52da4dSAndrey A. Chernov } else 1992a6abeebSBaptiste Daroussin table->chain_pri_table = NULL; 200*4644f9beSYuri Pankov if (info->large_count > 0) 2012a6abeebSBaptiste Daroussin table->large_pri_table = (void *)TMP; 2022a6abeebSBaptiste Daroussin else 2032a6abeebSBaptiste Daroussin table->large_pri_table = NULL; 2048e52da4dSAndrey A. Chernov 205bb4317bfSDavid Chisnall table->__collate_load_error = 0; 20676692b80SAndrey A. Chernov return (_LDP_LOADED); 207c3d0cca4SAndrey A. Chernov } 208c3d0cca4SAndrey A. Chernov 209332fe837SBaptiste Daroussin static const int32_t * 2102a6abeebSBaptiste Daroussin substsearch(struct xlocale_collate *table, const wchar_t key, int pass) 2112a6abeebSBaptiste Daroussin { 212332fe837SBaptiste Daroussin const collate_subst_t *p; 213*4644f9beSYuri Pankov int n = table->info->subst_count[pass]; 2142a6abeebSBaptiste Daroussin 2152a6abeebSBaptiste Daroussin if (n == 0) 2162a6abeebSBaptiste Daroussin return (NULL); 2172a6abeebSBaptiste Daroussin 2182a6abeebSBaptiste Daroussin if (pass >= table->info->directive_count) 2192a6abeebSBaptiste Daroussin return (NULL); 2202a6abeebSBaptiste Daroussin 2212a6abeebSBaptiste Daroussin if (!(key & COLLATE_SUBST_PRIORITY)) 2222a6abeebSBaptiste Daroussin return (NULL); 2232a6abeebSBaptiste Daroussin 2242a6abeebSBaptiste Daroussin p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 225*4644f9beSYuri Pankov assert(p->key == key); 22677bc2a1cSRuslan Bukin 2272a6abeebSBaptiste Daroussin return (p->pri); 228c3d0cca4SAndrey A. Chernov } 2292a6abeebSBaptiste Daroussin 2302a6abeebSBaptiste Daroussin static collate_chain_t * 2312a6abeebSBaptiste Daroussin chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 2322a6abeebSBaptiste Daroussin { 23376e6db68SBaptiste Daroussin int low = 0; 234*4644f9beSYuri Pankov int high = table->info->chain_count - 1; 2352a6abeebSBaptiste Daroussin int next, compar, l; 2362a6abeebSBaptiste Daroussin collate_chain_t *p; 23776e6db68SBaptiste Daroussin collate_chain_t *tab = table->chain_pri_table; 2382a6abeebSBaptiste Daroussin 23976e6db68SBaptiste Daroussin if (high < 0) 2402a6abeebSBaptiste Daroussin return (NULL); 2412a6abeebSBaptiste Daroussin 2422a6abeebSBaptiste Daroussin while (low <= high) { 2432a6abeebSBaptiste Daroussin next = (low + high) / 2; 2442a6abeebSBaptiste Daroussin p = tab + next; 245*4644f9beSYuri Pankov compar = *key - *p->str; 2462a6abeebSBaptiste Daroussin if (compar == 0) { 2472a6abeebSBaptiste Daroussin l = wcsnlen(p->str, COLLATE_STR_LEN); 2482a6abeebSBaptiste Daroussin compar = wcsncmp(key, p->str, l); 2492a6abeebSBaptiste Daroussin if (compar == 0) { 2502a6abeebSBaptiste Daroussin *len = l; 2512a6abeebSBaptiste Daroussin return (p); 252c3d0cca4SAndrey A. Chernov } 2532a6abeebSBaptiste Daroussin } 2542a6abeebSBaptiste Daroussin if (compar > 0) 2552a6abeebSBaptiste Daroussin low = next + 1; 2562a6abeebSBaptiste Daroussin else 2572a6abeebSBaptiste Daroussin high = next - 1; 2582a6abeebSBaptiste Daroussin } 2592a6abeebSBaptiste Daroussin return (NULL); 2602a6abeebSBaptiste Daroussin } 2612a6abeebSBaptiste Daroussin 2622a6abeebSBaptiste Daroussin static collate_large_t * 2632a6abeebSBaptiste Daroussin largesearch(struct xlocale_collate *table, const wchar_t key) 2642a6abeebSBaptiste Daroussin { 2652a6abeebSBaptiste Daroussin int low = 0; 266*4644f9beSYuri Pankov int high = table->info->large_count - 1; 2672a6abeebSBaptiste Daroussin int next, compar; 2682a6abeebSBaptiste Daroussin collate_large_t *p; 2692a6abeebSBaptiste Daroussin collate_large_t *tab = table->large_pri_table; 2702a6abeebSBaptiste Daroussin 27176e6db68SBaptiste Daroussin if (high < 0) 2722a6abeebSBaptiste Daroussin return (NULL); 2732a6abeebSBaptiste Daroussin 2742a6abeebSBaptiste Daroussin while (low <= high) { 2752a6abeebSBaptiste Daroussin next = (low + high) / 2; 2762a6abeebSBaptiste Daroussin p = tab + next; 277*4644f9beSYuri Pankov compar = key - p->val; 2782a6abeebSBaptiste Daroussin if (compar == 0) 2792a6abeebSBaptiste Daroussin return (p); 2802a6abeebSBaptiste Daroussin if (compar > 0) 2812a6abeebSBaptiste Daroussin low = next + 1; 2822a6abeebSBaptiste Daroussin else 2832a6abeebSBaptiste Daroussin high = next - 1; 2842a6abeebSBaptiste Daroussin } 2852a6abeebSBaptiste Daroussin return (NULL); 286c3d0cca4SAndrey A. Chernov } 287c3d0cca4SAndrey A. Chernov 288c3d0cca4SAndrey A. Chernov void 2892a6abeebSBaptiste Daroussin _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 2902a6abeebSBaptiste Daroussin int *pri, int which, const int **state) 291c3d0cca4SAndrey A. Chernov { 2922a6abeebSBaptiste Daroussin collate_chain_t *p2; 2932a6abeebSBaptiste Daroussin collate_large_t *match; 2942a6abeebSBaptiste Daroussin int p, l; 2952a6abeebSBaptiste Daroussin const int *sptr; 296c3d0cca4SAndrey A. Chernov 2972a6abeebSBaptiste Daroussin /* 2982a6abeebSBaptiste Daroussin * If this is the "last" pass for the UNDEFINED, then 2992a6abeebSBaptiste Daroussin * we just return the priority itself. 3002a6abeebSBaptiste Daroussin */ 3012a6abeebSBaptiste Daroussin if (which >= table->info->directive_count) { 3022a6abeebSBaptiste Daroussin *pri = *t; 303c3d0cca4SAndrey A. Chernov *len = 1; 3042a6abeebSBaptiste Daroussin *state = NULL; 305c3d0cca4SAndrey A. Chernov return; 306c3d0cca4SAndrey A. Chernov } 3072a6abeebSBaptiste Daroussin 3082a6abeebSBaptiste Daroussin /* 3092a6abeebSBaptiste Daroussin * If we have remaining substitution data from a previous 3102a6abeebSBaptiste Daroussin * call, consume it first. 3112a6abeebSBaptiste Daroussin */ 3122a6abeebSBaptiste Daroussin if ((sptr = *state) != NULL) { 3132a6abeebSBaptiste Daroussin *pri = *sptr; 3142a6abeebSBaptiste Daroussin sptr++; 315dee0bbbdSBaptiste Daroussin if ((sptr == *state) || (sptr == NULL)) 31676e6db68SBaptiste Daroussin *state = NULL; 31776e6db68SBaptiste Daroussin else 31876e6db68SBaptiste Daroussin *state = sptr; 3192a6abeebSBaptiste Daroussin *len = 0; 3202a6abeebSBaptiste Daroussin return; 321c3d0cca4SAndrey A. Chernov } 322c3d0cca4SAndrey A. Chernov 3232a6abeebSBaptiste Daroussin /* No active substitutions */ 3242a6abeebSBaptiste Daroussin *len = 1; 3252a6abeebSBaptiste Daroussin 3262a6abeebSBaptiste Daroussin /* 32732223c1bSPedro F. Giffuni * Check for composites such as diphthongs that collate as a 3282a6abeebSBaptiste Daroussin * single element (aka chains or collating-elements). 3292a6abeebSBaptiste Daroussin */ 3302a6abeebSBaptiste Daroussin if (((p2 = chainsearch(table, t, &l)) != NULL) && 3312a6abeebSBaptiste Daroussin ((p = p2->pri[which]) >= 0)) { 3322a6abeebSBaptiste Daroussin 3332a6abeebSBaptiste Daroussin *len = l; 3342a6abeebSBaptiste Daroussin *pri = p; 3352a6abeebSBaptiste Daroussin 3362a6abeebSBaptiste Daroussin } else if (*t <= UCHAR_MAX) { 3372a6abeebSBaptiste Daroussin 3382a6abeebSBaptiste Daroussin /* 3392a6abeebSBaptiste Daroussin * Character is a small (8-bit) character. 3402a6abeebSBaptiste Daroussin * We just look these up directly for speed. 3412a6abeebSBaptiste Daroussin */ 342*4644f9beSYuri Pankov *pri = table->char_pri_table[*t].pri[which]; 3432a6abeebSBaptiste Daroussin 344*4644f9beSYuri Pankov } else if ((table->info->large_count > 0) && 3452a6abeebSBaptiste Daroussin ((match = largesearch(table, *t)) != NULL)) { 3462a6abeebSBaptiste Daroussin 3472a6abeebSBaptiste Daroussin /* 3482a6abeebSBaptiste Daroussin * Character was found in the extended table. 3492a6abeebSBaptiste Daroussin */ 350*4644f9beSYuri Pankov *pri = match->pri.pri[which]; 3512a6abeebSBaptiste Daroussin 3522a6abeebSBaptiste Daroussin } else { 3532a6abeebSBaptiste Daroussin /* 3542a6abeebSBaptiste Daroussin * Character lacks a specific definition. 3552a6abeebSBaptiste Daroussin */ 3562a6abeebSBaptiste Daroussin if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 3572a6abeebSBaptiste Daroussin /* Mask off sign bit to prevent ordering confusion. */ 3582a6abeebSBaptiste Daroussin *pri = (*t & COLLATE_MAX_PRIORITY); 3592a6abeebSBaptiste Daroussin } else { 360*4644f9beSYuri Pankov *pri = table->info->undef_pri[which]; 3612a6abeebSBaptiste Daroussin } 3622a6abeebSBaptiste Daroussin /* No substitutions for undefined characters! */ 3632a6abeebSBaptiste Daroussin return; 3642a6abeebSBaptiste Daroussin } 3652a6abeebSBaptiste Daroussin 3662a6abeebSBaptiste Daroussin /* 3672a6abeebSBaptiste Daroussin * Try substituting (expanding) the character. We are 3682a6abeebSBaptiste Daroussin * currently doing this *after* the chain compression. I 3692a6abeebSBaptiste Daroussin * think it should not matter, but this way might be slightly 3702a6abeebSBaptiste Daroussin * faster. 3712a6abeebSBaptiste Daroussin * 3722a6abeebSBaptiste Daroussin * We do this after the priority search, as this will help us 3732a6abeebSBaptiste Daroussin * to identify a single key value. In order for this to work, 3742a6abeebSBaptiste Daroussin * its important that the priority assigned to a given element 3752a6abeebSBaptiste Daroussin * to be substituted be unique for that level. The localedef 3762a6abeebSBaptiste Daroussin * code ensures this for us. 3772a6abeebSBaptiste Daroussin */ 3782a6abeebSBaptiste Daroussin if ((sptr = substsearch(table, *pri, which)) != NULL) { 379*4644f9beSYuri Pankov if ((*pri = *sptr) > 0) { 3802a6abeebSBaptiste Daroussin sptr++; 381*4644f9beSYuri Pankov *state = *sptr ? sptr : NULL; 3822a6abeebSBaptiste Daroussin } 3832a6abeebSBaptiste Daroussin } 3842a6abeebSBaptiste Daroussin 3852a6abeebSBaptiste Daroussin } 3862a6abeebSBaptiste Daroussin 3872a6abeebSBaptiste Daroussin /* 3882a6abeebSBaptiste Daroussin * This is the meaty part of wcsxfrm & strxfrm. Note that it does 3892a6abeebSBaptiste Daroussin * NOT NULL terminate. That is left to the caller. 3902a6abeebSBaptiste Daroussin */ 3912a6abeebSBaptiste Daroussin size_t 3922a6abeebSBaptiste Daroussin _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 3932a6abeebSBaptiste Daroussin size_t room) 394c3d0cca4SAndrey A. Chernov { 3952a6abeebSBaptiste Daroussin int pri; 3962a6abeebSBaptiste Daroussin int len; 3972a6abeebSBaptiste Daroussin const wchar_t *t; 3982a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 3992a6abeebSBaptiste Daroussin int direc; 4002a6abeebSBaptiste Daroussin int pass; 4012a6abeebSBaptiste Daroussin const int32_t *state; 4022a6abeebSBaptiste Daroussin size_t want = 0; 4032a6abeebSBaptiste Daroussin size_t need = 0; 404332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 405c3d0cca4SAndrey A. Chernov 406332fe837SBaptiste Daroussin assert(src); 407332fe837SBaptiste Daroussin 408332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 4092a6abeebSBaptiste Daroussin 4102a6abeebSBaptiste Daroussin state = NULL; 4112a6abeebSBaptiste Daroussin 4122a6abeebSBaptiste Daroussin if (pass != 0) { 4132a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 4142a6abeebSBaptiste Daroussin if (room) { 4152a6abeebSBaptiste Daroussin *xf++ = 1; 4162a6abeebSBaptiste Daroussin room--; 4172a6abeebSBaptiste Daroussin } 4182a6abeebSBaptiste Daroussin want++; 419c3d0cca4SAndrey A. Chernov } 420c3d0cca4SAndrey A. Chernov 4212a6abeebSBaptiste Daroussin /* special pass for undefined */ 422332fe837SBaptiste Daroussin if (pass == ndir) { 4232a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 4242a6abeebSBaptiste Daroussin } else { 4252a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 4262a6abeebSBaptiste Daroussin } 4272a6abeebSBaptiste Daroussin 4282a6abeebSBaptiste Daroussin t = src; 4292a6abeebSBaptiste Daroussin 4302a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 4312a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 4322a6abeebSBaptiste Daroussin free(tr); 4332a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 4342a6abeebSBaptiste Daroussin errno = ENOMEM; 4352a6abeebSBaptiste Daroussin goto fail; 4362a6abeebSBaptiste Daroussin } 4372a6abeebSBaptiste Daroussin bp = tr; 4382a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 4392a6abeebSBaptiste Daroussin while (bp < fp) { 4402a6abeebSBaptiste Daroussin c = *bp; 4412a6abeebSBaptiste Daroussin *bp++ = *fp; 4422a6abeebSBaptiste Daroussin *fp-- = c; 4432a6abeebSBaptiste Daroussin } 4442a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 4452a6abeebSBaptiste Daroussin } 4462a6abeebSBaptiste Daroussin 4472a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 4482a6abeebSBaptiste Daroussin while (*t || state) { 4492a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4502a6abeebSBaptiste Daroussin t += len; 4512a6abeebSBaptiste Daroussin if (pri <= 0) { 4522a6abeebSBaptiste Daroussin if (pri < 0) { 4532a6abeebSBaptiste Daroussin errno = EINVAL; 4542a6abeebSBaptiste Daroussin goto fail; 4552a6abeebSBaptiste Daroussin } 456dee0bbbdSBaptiste Daroussin state = NULL; 4572a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 4582a6abeebSBaptiste Daroussin } 4592a6abeebSBaptiste Daroussin if (room) { 4602a6abeebSBaptiste Daroussin *xf++ = pri; 4612a6abeebSBaptiste Daroussin room--; 4622a6abeebSBaptiste Daroussin } 4632a6abeebSBaptiste Daroussin want++; 4642a6abeebSBaptiste Daroussin need = want; 4652a6abeebSBaptiste Daroussin } 4662a6abeebSBaptiste Daroussin } else { 4672a6abeebSBaptiste Daroussin while (*t || state) { 4682a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4692a6abeebSBaptiste Daroussin t += len; 4702a6abeebSBaptiste Daroussin if (pri <= 0) { 4712a6abeebSBaptiste Daroussin if (pri < 0) { 4722a6abeebSBaptiste Daroussin errno = EINVAL; 4732a6abeebSBaptiste Daroussin goto fail; 4742a6abeebSBaptiste Daroussin } 475dee0bbbdSBaptiste Daroussin state = NULL; 4762a6abeebSBaptiste Daroussin continue; 4772a6abeebSBaptiste Daroussin } 4782a6abeebSBaptiste Daroussin if (room) { 4792a6abeebSBaptiste Daroussin *xf++ = pri; 4802a6abeebSBaptiste Daroussin room--; 4812a6abeebSBaptiste Daroussin } 4822a6abeebSBaptiste Daroussin want++; 4832a6abeebSBaptiste Daroussin need = want; 4842a6abeebSBaptiste Daroussin } 4852a6abeebSBaptiste Daroussin } 4862a6abeebSBaptiste Daroussin } 4872a6abeebSBaptiste Daroussin free(tr); 4882a6abeebSBaptiste Daroussin return (need); 4892a6abeebSBaptiste Daroussin 4902a6abeebSBaptiste Daroussin fail: 4912a6abeebSBaptiste Daroussin free(tr); 4922a6abeebSBaptiste Daroussin return ((size_t)(-1)); 4932a6abeebSBaptiste Daroussin } 4942a6abeebSBaptiste Daroussin 4952a6abeebSBaptiste Daroussin /* 4962a6abeebSBaptiste Daroussin * In the non-POSIX case, we transform each character into a string of 4972a6abeebSBaptiste Daroussin * characters representing the character's priority. Since char is usually 4982a6abeebSBaptiste Daroussin * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 4992a6abeebSBaptiste Daroussin * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 5002a6abeebSBaptiste Daroussin * bits per byte. 5012a6abeebSBaptiste Daroussin * 5022a6abeebSBaptiste Daroussin * It turns out that we sometimes have real priorities that are 5032a6abeebSBaptiste Daroussin * 31-bits wide. (But: be careful using priorities where the high 5042a6abeebSBaptiste Daroussin * order bit is set -- i.e. the priority is negative. The sort order 5052a6abeebSBaptiste Daroussin * may be surprising!) 5062a6abeebSBaptiste Daroussin * 5072a6abeebSBaptiste Daroussin * TODO: This would be a good area to optimize somewhat. It turns out 5082a6abeebSBaptiste Daroussin * that real prioririties *except for the last UNDEFINED pass* are generally 5092a6abeebSBaptiste Daroussin * very small. We need the localedef code to precalculate the max 5102a6abeebSBaptiste Daroussin * priority for us, and ideally also give us a mask, and then we could 5112a6abeebSBaptiste Daroussin * severely limit what we expand to. 5122a6abeebSBaptiste Daroussin */ 5132a6abeebSBaptiste Daroussin #define XFRM_BYTES 6 5142a6abeebSBaptiste Daroussin #define XFRM_OFFSET ('0') /* make all printable characters */ 5152a6abeebSBaptiste Daroussin #define XFRM_SHIFT 6 5162a6abeebSBaptiste Daroussin #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 5172a6abeebSBaptiste Daroussin #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 5182a6abeebSBaptiste Daroussin 5192a6abeebSBaptiste Daroussin static int 5202a6abeebSBaptiste Daroussin xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 521926f20c9SAndrey A. Chernov { 5222a6abeebSBaptiste Daroussin /* we use unsigned to ensure zero fill on right shift */ 523*4644f9beSYuri Pankov uint32_t val = (uint32_t)table->info->pri_count[pass]; 5242a6abeebSBaptiste Daroussin int nc = 0; 525926f20c9SAndrey A. Chernov 5262a6abeebSBaptiste Daroussin while (val) { 5272a6abeebSBaptiste Daroussin *p = (pri & XFRM_MASK) + XFRM_OFFSET; 5282a6abeebSBaptiste Daroussin pri >>= XFRM_SHIFT; 5292a6abeebSBaptiste Daroussin val >>= XFRM_SHIFT; 5302a6abeebSBaptiste Daroussin p++; 5312a6abeebSBaptiste Daroussin nc++; 5322a6abeebSBaptiste Daroussin } 5332a6abeebSBaptiste Daroussin return (nc); 534926f20c9SAndrey A. Chernov } 535926f20c9SAndrey A. Chernov 5362a6abeebSBaptiste Daroussin size_t 5372a6abeebSBaptiste Daroussin _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 5382a6abeebSBaptiste Daroussin size_t room) 539c3d0cca4SAndrey A. Chernov { 5402a6abeebSBaptiste Daroussin int pri; 5412a6abeebSBaptiste Daroussin int len; 5422a6abeebSBaptiste Daroussin const wchar_t *t; 5432a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 5442a6abeebSBaptiste Daroussin int direc; 5452a6abeebSBaptiste Daroussin int pass; 5462a6abeebSBaptiste Daroussin const int32_t *state; 5472a6abeebSBaptiste Daroussin size_t want = 0; 5482a6abeebSBaptiste Daroussin size_t need = 0; 5492a6abeebSBaptiste Daroussin int b; 5502a6abeebSBaptiste Daroussin uint8_t buf[XFRM_BYTES]; 551332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 552c3d0cca4SAndrey A. Chernov 553332fe837SBaptiste Daroussin assert(src); 554332fe837SBaptiste Daroussin 555332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 5562a6abeebSBaptiste Daroussin 5572a6abeebSBaptiste Daroussin state = NULL; 5582a6abeebSBaptiste Daroussin 5592a6abeebSBaptiste Daroussin if (pass != 0) { 5602a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 5612a6abeebSBaptiste Daroussin if (room) { 5622a6abeebSBaptiste Daroussin *xf++ = XFRM_SEP; 5632a6abeebSBaptiste Daroussin room--; 564c3d0cca4SAndrey A. Chernov } 5652a6abeebSBaptiste Daroussin want++; 5662a6abeebSBaptiste Daroussin } 5672a6abeebSBaptiste Daroussin 5682a6abeebSBaptiste Daroussin /* special pass for undefined */ 569332fe837SBaptiste Daroussin if (pass == ndir) { 5702a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 5712a6abeebSBaptiste Daroussin } else { 5722a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 5732a6abeebSBaptiste Daroussin } 5742a6abeebSBaptiste Daroussin 5752a6abeebSBaptiste Daroussin t = src; 5762a6abeebSBaptiste Daroussin 5772a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 5782a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 5792a6abeebSBaptiste Daroussin free(tr); 5802a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 5812a6abeebSBaptiste Daroussin errno = ENOMEM; 5822a6abeebSBaptiste Daroussin goto fail; 5832a6abeebSBaptiste Daroussin } 5842a6abeebSBaptiste Daroussin bp = tr; 5852a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 5862a6abeebSBaptiste Daroussin while (bp < fp) { 5872a6abeebSBaptiste Daroussin c = *bp; 5882a6abeebSBaptiste Daroussin *bp++ = *fp; 5892a6abeebSBaptiste Daroussin *fp-- = c; 5902a6abeebSBaptiste Daroussin } 5912a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 5922a6abeebSBaptiste Daroussin } 5932a6abeebSBaptiste Daroussin 5942a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 5952a6abeebSBaptiste Daroussin while (*t || state) { 5962a6abeebSBaptiste Daroussin 5972a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 5982a6abeebSBaptiste Daroussin t += len; 5992a6abeebSBaptiste Daroussin if (pri <= 0) { 6002a6abeebSBaptiste Daroussin if (pri < 0) { 6012a6abeebSBaptiste Daroussin errno = EINVAL; 6022a6abeebSBaptiste Daroussin goto fail; 6032a6abeebSBaptiste Daroussin } 604dee0bbbdSBaptiste Daroussin state = NULL; 6052a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 6062a6abeebSBaptiste Daroussin } 6072a6abeebSBaptiste Daroussin 6082a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6092a6abeebSBaptiste Daroussin want += b; 6102a6abeebSBaptiste Daroussin if (room) { 6112a6abeebSBaptiste Daroussin while (b) { 6122a6abeebSBaptiste Daroussin b--; 6132a6abeebSBaptiste Daroussin if (room) { 6142a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6152a6abeebSBaptiste Daroussin room--; 6162a6abeebSBaptiste Daroussin } 6172a6abeebSBaptiste Daroussin } 6182a6abeebSBaptiste Daroussin } 6192a6abeebSBaptiste Daroussin need = want; 6202a6abeebSBaptiste Daroussin } 6212a6abeebSBaptiste Daroussin } else { 6222a6abeebSBaptiste Daroussin while (*t || state) { 6232a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 6242a6abeebSBaptiste Daroussin t += len; 6252a6abeebSBaptiste Daroussin if (pri <= 0) { 6262a6abeebSBaptiste Daroussin if (pri < 0) { 6272a6abeebSBaptiste Daroussin errno = EINVAL; 6282a6abeebSBaptiste Daroussin goto fail; 6292a6abeebSBaptiste Daroussin } 630dee0bbbdSBaptiste Daroussin state = NULL; 6312a6abeebSBaptiste Daroussin continue; 6322a6abeebSBaptiste Daroussin } 6332a6abeebSBaptiste Daroussin 6342a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6352a6abeebSBaptiste Daroussin want += b; 6362a6abeebSBaptiste Daroussin if (room) { 6372a6abeebSBaptiste Daroussin 6382a6abeebSBaptiste Daroussin while (b) { 6392a6abeebSBaptiste Daroussin b--; 6402a6abeebSBaptiste Daroussin if (room) { 6412a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6422a6abeebSBaptiste Daroussin room--; 6432a6abeebSBaptiste Daroussin } 6442a6abeebSBaptiste Daroussin } 6452a6abeebSBaptiste Daroussin } 6462a6abeebSBaptiste Daroussin need = want; 6472a6abeebSBaptiste Daroussin } 6482a6abeebSBaptiste Daroussin } 6492a6abeebSBaptiste Daroussin } 6502a6abeebSBaptiste Daroussin free(tr); 6512a6abeebSBaptiste Daroussin return (need); 6522a6abeebSBaptiste Daroussin 6532a6abeebSBaptiste Daroussin fail: 6542a6abeebSBaptiste Daroussin free(tr); 6552a6abeebSBaptiste Daroussin return ((size_t)(-1)); 6562a6abeebSBaptiste Daroussin } 6572a6abeebSBaptiste Daroussin 6582a6abeebSBaptiste Daroussin /* 6592a6abeebSBaptiste Daroussin * __collate_equiv_value returns the primary collation value for the given 6602a6abeebSBaptiste Daroussin * collating symbol specified by str and len. Zero or negative is returned 6612a6abeebSBaptiste Daroussin * if the collating symbol was not found. This function is used by bracket 6622a6abeebSBaptiste Daroussin * code in the TRE regex library. 6632a6abeebSBaptiste Daroussin */ 6642a6abeebSBaptiste Daroussin int 6652a6abeebSBaptiste Daroussin __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 6662a6abeebSBaptiste Daroussin { 6672a6abeebSBaptiste Daroussin int32_t e; 6682a6abeebSBaptiste Daroussin 6692a6abeebSBaptiste Daroussin if (len < 1 || len >= COLLATE_STR_LEN) 6702a6abeebSBaptiste Daroussin return (-1); 6712a6abeebSBaptiste Daroussin 6722a6abeebSBaptiste Daroussin FIX_LOCALE(locale); 6732a6abeebSBaptiste Daroussin struct xlocale_collate *table = 6742a6abeebSBaptiste Daroussin (struct xlocale_collate*)locale->components[XLC_COLLATE]; 6752a6abeebSBaptiste Daroussin 6762a6abeebSBaptiste Daroussin if (table->__collate_load_error) 6772a6abeebSBaptiste Daroussin return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 6782a6abeebSBaptiste Daroussin 6792a6abeebSBaptiste Daroussin if (len == 1) { 6802a6abeebSBaptiste Daroussin e = -1; 6812a6abeebSBaptiste Daroussin if (*str <= UCHAR_MAX) 6822a6abeebSBaptiste Daroussin e = table->char_pri_table[*str].pri[0]; 683*4644f9beSYuri Pankov else if (table->info->large_count > 0) { 6842a6abeebSBaptiste Daroussin collate_large_t *match_large; 6852a6abeebSBaptiste Daroussin match_large = largesearch(table, *str); 6862a6abeebSBaptiste Daroussin if (match_large) 6872a6abeebSBaptiste Daroussin e = match_large->pri.pri[0]; 6882a6abeebSBaptiste Daroussin } 6892a6abeebSBaptiste Daroussin if (e == 0) 6902a6abeebSBaptiste Daroussin return (1); 6912a6abeebSBaptiste Daroussin return (e > 0 ? e : 0); 6922a6abeebSBaptiste Daroussin } 693*4644f9beSYuri Pankov if (table->info->chain_count > 0) { 6942a6abeebSBaptiste Daroussin wchar_t name[COLLATE_STR_LEN]; 6952a6abeebSBaptiste Daroussin collate_chain_t *match_chain; 6962a6abeebSBaptiste Daroussin int clen; 6972a6abeebSBaptiste Daroussin 6982a6abeebSBaptiste Daroussin wcsncpy (name, str, len); 6992a6abeebSBaptiste Daroussin name[len] = 0; 7002a6abeebSBaptiste Daroussin match_chain = chainsearch(table, name, &clen); 7012a6abeebSBaptiste Daroussin if (match_chain) { 7022a6abeebSBaptiste Daroussin e = match_chain->pri[0]; 7032a6abeebSBaptiste Daroussin if (e == 0) 7042a6abeebSBaptiste Daroussin return (1); 7052a6abeebSBaptiste Daroussin return (e < 0 ? -e : e); 7062a6abeebSBaptiste Daroussin } 7072a6abeebSBaptiste Daroussin } 7082a6abeebSBaptiste Daroussin return (0); 7092a6abeebSBaptiste Daroussin } 710