1c3d0cca4SAndrey A. Chernov /*- 2d915a14eSPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3d915a14eSPedro F. Giffuni * 47b247341SBaptiste Daroussin * Copyright 2014 Garrett D'Amore <garrett@damore.org> 5332fe837SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 6c3d0cca4SAndrey A. Chernov * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 7c3d0cca4SAndrey A. Chernov * at Electronni Visti IA, Kiev, Ukraine. 8c3d0cca4SAndrey A. Chernov * All rights reserved. 9c3d0cca4SAndrey A. Chernov * 103c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation 113c87aa1dSDavid Chisnall * All rights reserved. 123c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall 133c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation. 143c87aa1dSDavid Chisnall * 15c3d0cca4SAndrey A. Chernov * Redistribution and use in source and binary forms, with or without 16c3d0cca4SAndrey A. Chernov * modification, are permitted provided that the following conditions 17c3d0cca4SAndrey A. Chernov * are met: 18c3d0cca4SAndrey A. Chernov * 1. Redistributions of source code must retain the above copyright 19c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer. 20c3d0cca4SAndrey A. Chernov * 2. Redistributions in binary form must reproduce the above copyright 21c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer in the 22c3d0cca4SAndrey A. Chernov * documentation and/or other materials provided with the distribution. 23c3d0cca4SAndrey A. Chernov * 24c3d0cca4SAndrey A. Chernov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 25c3d0cca4SAndrey A. Chernov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26c3d0cca4SAndrey A. Chernov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27c3d0cca4SAndrey A. Chernov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 28c3d0cca4SAndrey A. Chernov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29c3d0cca4SAndrey A. Chernov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30c3d0cca4SAndrey A. Chernov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31c3d0cca4SAndrey A. Chernov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32c3d0cca4SAndrey A. Chernov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33c3d0cca4SAndrey A. Chernov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34c3d0cca4SAndrey A. Chernov * SUCH DAMAGE. 352a6abeebSBaptiste Daroussin * 362a6abeebSBaptiste Daroussin * Adapted to xlocale by John Marino <draco@marino.st> 37c3d0cca4SAndrey A. Chernov */ 38c3d0cca4SAndrey A. Chernov 39333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 40333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 41333fc21eSDavid E. O'Brien 42d201fe46SDaniel Eischen #include "namespace.h" 43c25f5140SBaptiste Daroussin 44c25f5140SBaptiste Daroussin #include <sys/types.h> 45c25f5140SBaptiste Daroussin #include <sys/stat.h> 46c25f5140SBaptiste Daroussin #include <sys/mman.h> 47c25f5140SBaptiste Daroussin 48332fe837SBaptiste Daroussin #include <assert.h> 49c3d0cca4SAndrey A. Chernov #include <stdio.h> 50c3d0cca4SAndrey A. Chernov #include <stdlib.h> 51c3d0cca4SAndrey A. Chernov #include <string.h> 522a6abeebSBaptiste Daroussin #include <wchar.h> 53926f20c9SAndrey A. Chernov #include <errno.h> 54926f20c9SAndrey A. Chernov #include <unistd.h> 552a6abeebSBaptiste Daroussin #include <fcntl.h> 56d201fe46SDaniel Eischen #include "un-namespace.h" 57d201fe46SDaniel Eischen 58c3d0cca4SAndrey A. Chernov #include "collate.h" 5963407d34SAndrey A. Chernov #include "setlocale.h" 6076692b80SAndrey A. Chernov #include "ldpart.h" 61536451f9SBaptiste Daroussin #include "libc_private.h" 62c3d0cca4SAndrey A. Chernov 633c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_global_collate = { 642a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 653c87aa1dSDavid Chisnall }; 663c87aa1dSDavid Chisnall 673c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_C_collate = { 682a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 693c87aa1dSDavid Chisnall }; 70c3d0cca4SAndrey A. Chernov 71a6d2922cSBaptiste Daroussin static int 723c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 733c87aa1dSDavid Chisnall 743c87aa1dSDavid Chisnall static void 753c87aa1dSDavid Chisnall destruct_collate(void *t) 763c87aa1dSDavid Chisnall { 773c87aa1dSDavid Chisnall struct xlocale_collate *table = t; 782a6abeebSBaptiste Daroussin if (table->map && (table->maplen > 0)) { 792a6abeebSBaptiste Daroussin (void) munmap(table->map, table->maplen); 803c87aa1dSDavid Chisnall } 813c87aa1dSDavid Chisnall free(t); 823c87aa1dSDavid Chisnall } 833c87aa1dSDavid Chisnall 843c87aa1dSDavid Chisnall void * 852a6abeebSBaptiste Daroussin __collate_load(const char *encoding, __unused locale_t unused) 863c87aa1dSDavid Chisnall { 87*dd7c41a3SYuri Pankov if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || 88*dd7c41a3SYuri Pankov strncmp(encoding, "C.", 2) == 0) { 893c87aa1dSDavid Chisnall return &__xlocale_C_collate; 903c87aa1dSDavid Chisnall } 913c87aa1dSDavid Chisnall struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); 923c87aa1dSDavid Chisnall table->header.header.destructor = destruct_collate; 933c87aa1dSDavid Chisnall // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing 943c87aa1dSDavid Chisnall // the caching outside of this section 953c87aa1dSDavid Chisnall if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 963c87aa1dSDavid Chisnall xlocale_release(table); 973c87aa1dSDavid Chisnall return NULL; 983c87aa1dSDavid Chisnall } 993c87aa1dSDavid Chisnall return table; 1003c87aa1dSDavid Chisnall } 1013c87aa1dSDavid Chisnall 1023c87aa1dSDavid Chisnall /** 1033c87aa1dSDavid Chisnall * Load the collation tables for the specified encoding into the global table. 1043c87aa1dSDavid Chisnall */ 1053c87aa1dSDavid Chisnall int 10676692b80SAndrey A. Chernov __collate_load_tables(const char *encoding) 107c3d0cca4SAndrey A. Chernov { 1085e4bbc69SBaptiste Daroussin 1095e4bbc69SBaptiste Daroussin return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); 1103c87aa1dSDavid Chisnall } 1113c87aa1dSDavid Chisnall 1123c87aa1dSDavid Chisnall int 1133c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 1143c87aa1dSDavid Chisnall { 1152a6abeebSBaptiste Daroussin int i, chains, z; 116b89704ceSBaptiste Daroussin char *buf; 1172a6abeebSBaptiste Daroussin char *TMP; 1182a6abeebSBaptiste Daroussin char *map; 1192a6abeebSBaptiste Daroussin collate_info_t *info; 1202a6abeebSBaptiste Daroussin struct stat sbuf; 1212a6abeebSBaptiste Daroussin int fd; 122c3d0cca4SAndrey A. Chernov 123332fe837SBaptiste Daroussin table->__collate_load_error = 1; 124332fe837SBaptiste Daroussin 12576692b80SAndrey A. Chernov /* 'encoding' must be already checked. */ 126*dd7c41a3SYuri Pankov if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || 127*dd7c41a3SYuri Pankov strncmp(encoding, "C.", 2) == 0) { 12876692b80SAndrey A. Chernov return (_LDP_CACHE); 129377da8e8SAndrey A. Chernov } 13076692b80SAndrey A. Chernov 131dc8507e1SBryan Drewery if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) 132b89704ceSBaptiste Daroussin return (_LDP_ERROR); 13376692b80SAndrey A. Chernov 13428a20bb3SBaptiste Daroussin if ((fd = _open(buf, O_RDONLY)) < 0) { 13528a20bb3SBaptiste Daroussin free(buf); 1362a6abeebSBaptiste Daroussin return (_LDP_ERROR); 13728a20bb3SBaptiste Daroussin } 138b89704ceSBaptiste Daroussin free(buf); 1392a6abeebSBaptiste Daroussin if (_fstat(fd, &sbuf) < 0) { 1402a6abeebSBaptiste Daroussin (void) _close(fd); 1418e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1428e52da4dSAndrey A. Chernov } 1432a6abeebSBaptiste Daroussin if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { 1442a6abeebSBaptiste Daroussin (void) _close(fd); 1452a6abeebSBaptiste Daroussin errno = EINVAL; 1468e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1478e52da4dSAndrey A. Chernov } 1482a6abeebSBaptiste Daroussin map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 1492a6abeebSBaptiste Daroussin (void) _close(fd); 1502a6abeebSBaptiste Daroussin if ((TMP = map) == NULL) { 1518e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1528e52da4dSAndrey A. Chernov } 1532a6abeebSBaptiste Daroussin 1542a6abeebSBaptiste Daroussin if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { 1552a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1562a6abeebSBaptiste Daroussin errno = EINVAL; 1578e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1588e52da4dSAndrey A. Chernov } 1592a6abeebSBaptiste Daroussin TMP += COLLATE_STR_LEN; 1602a6abeebSBaptiste Daroussin 1612a6abeebSBaptiste Daroussin info = (void *)TMP; 1622a6abeebSBaptiste Daroussin TMP += sizeof (*info); 1632a6abeebSBaptiste Daroussin 1642a6abeebSBaptiste Daroussin if ((info->directive_count < 1) || 1652a6abeebSBaptiste Daroussin (info->directive_count >= COLL_WEIGHTS_MAX) || 1664644f9beSYuri Pankov ((chains = info->chain_count) < 0)) { 1672a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1682a6abeebSBaptiste Daroussin errno = EINVAL; 1692a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1702a6abeebSBaptiste Daroussin } 1712a6abeebSBaptiste Daroussin 1722a6abeebSBaptiste Daroussin i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 1732a6abeebSBaptiste Daroussin (sizeof (collate_chain_t) * chains) + 1744644f9beSYuri Pankov (sizeof (collate_large_t) * info->large_count); 175332fe837SBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 1764644f9beSYuri Pankov i += sizeof (collate_subst_t) * info->subst_count[z]; 1772a6abeebSBaptiste Daroussin } 1782a6abeebSBaptiste Daroussin if (i != (sbuf.st_size - (TMP - map))) { 1792a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1802a6abeebSBaptiste Daroussin errno = EINVAL; 1812a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1822a6abeebSBaptiste Daroussin } 1832a6abeebSBaptiste Daroussin 184332fe837SBaptiste Daroussin table->info = info; 1852a6abeebSBaptiste Daroussin table->char_pri_table = (void *)TMP; 1862a6abeebSBaptiste Daroussin TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 1872a6abeebSBaptiste Daroussin 1882a6abeebSBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 1894644f9beSYuri Pankov if (info->subst_count[z] > 0) { 1902a6abeebSBaptiste Daroussin table->subst_table[z] = (void *)TMP; 1914644f9beSYuri Pankov TMP += info->subst_count[z] * sizeof (collate_subst_t); 1922a6abeebSBaptiste Daroussin } else { 1932a6abeebSBaptiste Daroussin table->subst_table[z] = NULL; 1942a6abeebSBaptiste Daroussin } 1952a6abeebSBaptiste Daroussin } 1962a6abeebSBaptiste Daroussin 1972a6abeebSBaptiste Daroussin if (chains > 0) { 1982a6abeebSBaptiste Daroussin table->chain_pri_table = (void *)TMP; 1992a6abeebSBaptiste Daroussin TMP += chains * sizeof (collate_chain_t); 2008e52da4dSAndrey A. Chernov } else 2012a6abeebSBaptiste Daroussin table->chain_pri_table = NULL; 2024644f9beSYuri Pankov if (info->large_count > 0) 2032a6abeebSBaptiste Daroussin table->large_pri_table = (void *)TMP; 2042a6abeebSBaptiste Daroussin else 2052a6abeebSBaptiste Daroussin table->large_pri_table = NULL; 2068e52da4dSAndrey A. Chernov 207bb4317bfSDavid Chisnall table->__collate_load_error = 0; 20876692b80SAndrey A. Chernov return (_LDP_LOADED); 209c3d0cca4SAndrey A. Chernov } 210c3d0cca4SAndrey A. Chernov 211332fe837SBaptiste Daroussin static const int32_t * 2122a6abeebSBaptiste Daroussin substsearch(struct xlocale_collate *table, const wchar_t key, int pass) 2132a6abeebSBaptiste Daroussin { 214332fe837SBaptiste Daroussin const collate_subst_t *p; 2154644f9beSYuri Pankov int n = table->info->subst_count[pass]; 2162a6abeebSBaptiste Daroussin 2172a6abeebSBaptiste Daroussin if (n == 0) 2182a6abeebSBaptiste Daroussin return (NULL); 2192a6abeebSBaptiste Daroussin 2202a6abeebSBaptiste Daroussin if (pass >= table->info->directive_count) 2212a6abeebSBaptiste Daroussin return (NULL); 2222a6abeebSBaptiste Daroussin 2232a6abeebSBaptiste Daroussin if (!(key & COLLATE_SUBST_PRIORITY)) 2242a6abeebSBaptiste Daroussin return (NULL); 2252a6abeebSBaptiste Daroussin 2262a6abeebSBaptiste Daroussin p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 2274644f9beSYuri Pankov assert(p->key == key); 22877bc2a1cSRuslan Bukin 2292a6abeebSBaptiste Daroussin return (p->pri); 230c3d0cca4SAndrey A. Chernov } 2312a6abeebSBaptiste Daroussin 2322a6abeebSBaptiste Daroussin static collate_chain_t * 2332a6abeebSBaptiste Daroussin chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 2342a6abeebSBaptiste Daroussin { 23576e6db68SBaptiste Daroussin int low = 0; 2364644f9beSYuri Pankov int high = table->info->chain_count - 1; 2372a6abeebSBaptiste Daroussin int next, compar, l; 2382a6abeebSBaptiste Daroussin collate_chain_t *p; 23976e6db68SBaptiste Daroussin collate_chain_t *tab = table->chain_pri_table; 2402a6abeebSBaptiste Daroussin 24176e6db68SBaptiste Daroussin if (high < 0) 2422a6abeebSBaptiste Daroussin return (NULL); 2432a6abeebSBaptiste Daroussin 2442a6abeebSBaptiste Daroussin while (low <= high) { 2452a6abeebSBaptiste Daroussin next = (low + high) / 2; 2462a6abeebSBaptiste Daroussin p = tab + next; 2474644f9beSYuri Pankov compar = *key - *p->str; 2482a6abeebSBaptiste Daroussin if (compar == 0) { 2492a6abeebSBaptiste Daroussin l = wcsnlen(p->str, COLLATE_STR_LEN); 2502a6abeebSBaptiste Daroussin compar = wcsncmp(key, p->str, l); 2512a6abeebSBaptiste Daroussin if (compar == 0) { 2522a6abeebSBaptiste Daroussin *len = l; 2532a6abeebSBaptiste Daroussin return (p); 254c3d0cca4SAndrey A. Chernov } 2552a6abeebSBaptiste Daroussin } 2562a6abeebSBaptiste Daroussin if (compar > 0) 2572a6abeebSBaptiste Daroussin low = next + 1; 2582a6abeebSBaptiste Daroussin else 2592a6abeebSBaptiste Daroussin high = next - 1; 2602a6abeebSBaptiste Daroussin } 2612a6abeebSBaptiste Daroussin return (NULL); 2622a6abeebSBaptiste Daroussin } 2632a6abeebSBaptiste Daroussin 2642a6abeebSBaptiste Daroussin static collate_large_t * 2652a6abeebSBaptiste Daroussin largesearch(struct xlocale_collate *table, const wchar_t key) 2662a6abeebSBaptiste Daroussin { 2672a6abeebSBaptiste Daroussin int low = 0; 2684644f9beSYuri Pankov int high = table->info->large_count - 1; 2692a6abeebSBaptiste Daroussin int next, compar; 2702a6abeebSBaptiste Daroussin collate_large_t *p; 2712a6abeebSBaptiste Daroussin collate_large_t *tab = table->large_pri_table; 2722a6abeebSBaptiste Daroussin 27376e6db68SBaptiste Daroussin if (high < 0) 2742a6abeebSBaptiste Daroussin return (NULL); 2752a6abeebSBaptiste Daroussin 2762a6abeebSBaptiste Daroussin while (low <= high) { 2772a6abeebSBaptiste Daroussin next = (low + high) / 2; 2782a6abeebSBaptiste Daroussin p = tab + next; 2794644f9beSYuri Pankov compar = key - p->val; 2802a6abeebSBaptiste Daroussin if (compar == 0) 2812a6abeebSBaptiste Daroussin return (p); 2822a6abeebSBaptiste Daroussin if (compar > 0) 2832a6abeebSBaptiste Daroussin low = next + 1; 2842a6abeebSBaptiste Daroussin else 2852a6abeebSBaptiste Daroussin high = next - 1; 2862a6abeebSBaptiste Daroussin } 2872a6abeebSBaptiste Daroussin return (NULL); 288c3d0cca4SAndrey A. Chernov } 289c3d0cca4SAndrey A. Chernov 290c3d0cca4SAndrey A. Chernov void 2912a6abeebSBaptiste Daroussin _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 2922a6abeebSBaptiste Daroussin int *pri, int which, const int **state) 293c3d0cca4SAndrey A. Chernov { 2942a6abeebSBaptiste Daroussin collate_chain_t *p2; 2952a6abeebSBaptiste Daroussin collate_large_t *match; 2962a6abeebSBaptiste Daroussin int p, l; 2972a6abeebSBaptiste Daroussin const int *sptr; 298c3d0cca4SAndrey A. Chernov 2992a6abeebSBaptiste Daroussin /* 3002a6abeebSBaptiste Daroussin * If this is the "last" pass for the UNDEFINED, then 3012a6abeebSBaptiste Daroussin * we just return the priority itself. 3022a6abeebSBaptiste Daroussin */ 3032a6abeebSBaptiste Daroussin if (which >= table->info->directive_count) { 3042a6abeebSBaptiste Daroussin *pri = *t; 305c3d0cca4SAndrey A. Chernov *len = 1; 3062a6abeebSBaptiste Daroussin *state = NULL; 307c3d0cca4SAndrey A. Chernov return; 308c3d0cca4SAndrey A. Chernov } 3092a6abeebSBaptiste Daroussin 3102a6abeebSBaptiste Daroussin /* 3112a6abeebSBaptiste Daroussin * If we have remaining substitution data from a previous 3122a6abeebSBaptiste Daroussin * call, consume it first. 3132a6abeebSBaptiste Daroussin */ 3142a6abeebSBaptiste Daroussin if ((sptr = *state) != NULL) { 3152a6abeebSBaptiste Daroussin *pri = *sptr; 3162a6abeebSBaptiste Daroussin sptr++; 317dee0bbbdSBaptiste Daroussin if ((sptr == *state) || (sptr == NULL)) 31876e6db68SBaptiste Daroussin *state = NULL; 31976e6db68SBaptiste Daroussin else 32076e6db68SBaptiste Daroussin *state = sptr; 3212a6abeebSBaptiste Daroussin *len = 0; 3222a6abeebSBaptiste Daroussin return; 323c3d0cca4SAndrey A. Chernov } 324c3d0cca4SAndrey A. Chernov 3252a6abeebSBaptiste Daroussin /* No active substitutions */ 3262a6abeebSBaptiste Daroussin *len = 1; 3272a6abeebSBaptiste Daroussin 3282a6abeebSBaptiste Daroussin /* 32932223c1bSPedro F. Giffuni * Check for composites such as diphthongs that collate as a 3302a6abeebSBaptiste Daroussin * single element (aka chains or collating-elements). 3312a6abeebSBaptiste Daroussin */ 3322a6abeebSBaptiste Daroussin if (((p2 = chainsearch(table, t, &l)) != NULL) && 3332a6abeebSBaptiste Daroussin ((p = p2->pri[which]) >= 0)) { 3342a6abeebSBaptiste Daroussin 3352a6abeebSBaptiste Daroussin *len = l; 3362a6abeebSBaptiste Daroussin *pri = p; 3372a6abeebSBaptiste Daroussin 3382a6abeebSBaptiste Daroussin } else if (*t <= UCHAR_MAX) { 3392a6abeebSBaptiste Daroussin 3402a6abeebSBaptiste Daroussin /* 3412a6abeebSBaptiste Daroussin * Character is a small (8-bit) character. 3422a6abeebSBaptiste Daroussin * We just look these up directly for speed. 3432a6abeebSBaptiste Daroussin */ 3444644f9beSYuri Pankov *pri = table->char_pri_table[*t].pri[which]; 3452a6abeebSBaptiste Daroussin 3464644f9beSYuri Pankov } else if ((table->info->large_count > 0) && 3472a6abeebSBaptiste Daroussin ((match = largesearch(table, *t)) != NULL)) { 3482a6abeebSBaptiste Daroussin 3492a6abeebSBaptiste Daroussin /* 3502a6abeebSBaptiste Daroussin * Character was found in the extended table. 3512a6abeebSBaptiste Daroussin */ 3524644f9beSYuri Pankov *pri = match->pri.pri[which]; 3532a6abeebSBaptiste Daroussin 3542a6abeebSBaptiste Daroussin } else { 3552a6abeebSBaptiste Daroussin /* 3562a6abeebSBaptiste Daroussin * Character lacks a specific definition. 3572a6abeebSBaptiste Daroussin */ 3582a6abeebSBaptiste Daroussin if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 3592a6abeebSBaptiste Daroussin /* Mask off sign bit to prevent ordering confusion. */ 3602a6abeebSBaptiste Daroussin *pri = (*t & COLLATE_MAX_PRIORITY); 3612a6abeebSBaptiste Daroussin } else { 3624644f9beSYuri Pankov *pri = table->info->undef_pri[which]; 3632a6abeebSBaptiste Daroussin } 3642a6abeebSBaptiste Daroussin /* No substitutions for undefined characters! */ 3652a6abeebSBaptiste Daroussin return; 3662a6abeebSBaptiste Daroussin } 3672a6abeebSBaptiste Daroussin 3682a6abeebSBaptiste Daroussin /* 3692a6abeebSBaptiste Daroussin * Try substituting (expanding) the character. We are 3702a6abeebSBaptiste Daroussin * currently doing this *after* the chain compression. I 3712a6abeebSBaptiste Daroussin * think it should not matter, but this way might be slightly 3722a6abeebSBaptiste Daroussin * faster. 3732a6abeebSBaptiste Daroussin * 3742a6abeebSBaptiste Daroussin * We do this after the priority search, as this will help us 3752a6abeebSBaptiste Daroussin * to identify a single key value. In order for this to work, 3762a6abeebSBaptiste Daroussin * its important that the priority assigned to a given element 3772a6abeebSBaptiste Daroussin * to be substituted be unique for that level. The localedef 3782a6abeebSBaptiste Daroussin * code ensures this for us. 3792a6abeebSBaptiste Daroussin */ 3802a6abeebSBaptiste Daroussin if ((sptr = substsearch(table, *pri, which)) != NULL) { 3814644f9beSYuri Pankov if ((*pri = *sptr) > 0) { 3822a6abeebSBaptiste Daroussin sptr++; 3834644f9beSYuri Pankov *state = *sptr ? sptr : NULL; 3842a6abeebSBaptiste Daroussin } 3852a6abeebSBaptiste Daroussin } 3862a6abeebSBaptiste Daroussin 3872a6abeebSBaptiste Daroussin } 3882a6abeebSBaptiste Daroussin 3892a6abeebSBaptiste Daroussin /* 3902a6abeebSBaptiste Daroussin * This is the meaty part of wcsxfrm & strxfrm. Note that it does 3912a6abeebSBaptiste Daroussin * NOT NULL terminate. That is left to the caller. 3922a6abeebSBaptiste Daroussin */ 3932a6abeebSBaptiste Daroussin size_t 3942a6abeebSBaptiste Daroussin _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 3952a6abeebSBaptiste Daroussin size_t room) 396c3d0cca4SAndrey A. Chernov { 3972a6abeebSBaptiste Daroussin int pri; 3982a6abeebSBaptiste Daroussin int len; 3992a6abeebSBaptiste Daroussin const wchar_t *t; 4002a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 4012a6abeebSBaptiste Daroussin int direc; 4022a6abeebSBaptiste Daroussin int pass; 4032a6abeebSBaptiste Daroussin const int32_t *state; 4042a6abeebSBaptiste Daroussin size_t want = 0; 4052a6abeebSBaptiste Daroussin size_t need = 0; 406332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 407c3d0cca4SAndrey A. Chernov 408332fe837SBaptiste Daroussin assert(src); 409332fe837SBaptiste Daroussin 410332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 4112a6abeebSBaptiste Daroussin 4122a6abeebSBaptiste Daroussin state = NULL; 4132a6abeebSBaptiste Daroussin 4142a6abeebSBaptiste Daroussin if (pass != 0) { 4152a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 4162a6abeebSBaptiste Daroussin if (room) { 4172a6abeebSBaptiste Daroussin *xf++ = 1; 4182a6abeebSBaptiste Daroussin room--; 4192a6abeebSBaptiste Daroussin } 4202a6abeebSBaptiste Daroussin want++; 421c3d0cca4SAndrey A. Chernov } 422c3d0cca4SAndrey A. Chernov 4232a6abeebSBaptiste Daroussin /* special pass for undefined */ 424332fe837SBaptiste Daroussin if (pass == ndir) { 4252a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 4262a6abeebSBaptiste Daroussin } else { 4272a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 4282a6abeebSBaptiste Daroussin } 4292a6abeebSBaptiste Daroussin 4302a6abeebSBaptiste Daroussin t = src; 4312a6abeebSBaptiste Daroussin 4322a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 4332a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 4342a6abeebSBaptiste Daroussin free(tr); 4352a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 4362a6abeebSBaptiste Daroussin errno = ENOMEM; 4372a6abeebSBaptiste Daroussin goto fail; 4382a6abeebSBaptiste Daroussin } 4392a6abeebSBaptiste Daroussin bp = tr; 4402a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 4412a6abeebSBaptiste Daroussin while (bp < fp) { 4422a6abeebSBaptiste Daroussin c = *bp; 4432a6abeebSBaptiste Daroussin *bp++ = *fp; 4442a6abeebSBaptiste Daroussin *fp-- = c; 4452a6abeebSBaptiste Daroussin } 4462a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 4472a6abeebSBaptiste Daroussin } 4482a6abeebSBaptiste Daroussin 4492a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 4502a6abeebSBaptiste Daroussin while (*t || state) { 4512a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4522a6abeebSBaptiste Daroussin t += len; 4532a6abeebSBaptiste Daroussin if (pri <= 0) { 4542a6abeebSBaptiste Daroussin if (pri < 0) { 4552a6abeebSBaptiste Daroussin errno = EINVAL; 4562a6abeebSBaptiste Daroussin goto fail; 4572a6abeebSBaptiste Daroussin } 458dee0bbbdSBaptiste Daroussin state = NULL; 4592a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 4602a6abeebSBaptiste Daroussin } 4612a6abeebSBaptiste Daroussin if (room) { 4622a6abeebSBaptiste Daroussin *xf++ = pri; 4632a6abeebSBaptiste Daroussin room--; 4642a6abeebSBaptiste Daroussin } 4652a6abeebSBaptiste Daroussin want++; 4662a6abeebSBaptiste Daroussin need = want; 4672a6abeebSBaptiste Daroussin } 4682a6abeebSBaptiste Daroussin } else { 4692a6abeebSBaptiste Daroussin while (*t || state) { 4702a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4712a6abeebSBaptiste Daroussin t += len; 4722a6abeebSBaptiste Daroussin if (pri <= 0) { 4732a6abeebSBaptiste Daroussin if (pri < 0) { 4742a6abeebSBaptiste Daroussin errno = EINVAL; 4752a6abeebSBaptiste Daroussin goto fail; 4762a6abeebSBaptiste Daroussin } 477dee0bbbdSBaptiste Daroussin state = NULL; 4782a6abeebSBaptiste Daroussin continue; 4792a6abeebSBaptiste Daroussin } 4802a6abeebSBaptiste Daroussin if (room) { 4812a6abeebSBaptiste Daroussin *xf++ = pri; 4822a6abeebSBaptiste Daroussin room--; 4832a6abeebSBaptiste Daroussin } 4842a6abeebSBaptiste Daroussin want++; 4852a6abeebSBaptiste Daroussin need = want; 4862a6abeebSBaptiste Daroussin } 4872a6abeebSBaptiste Daroussin } 4882a6abeebSBaptiste Daroussin } 4892a6abeebSBaptiste Daroussin free(tr); 4902a6abeebSBaptiste Daroussin return (need); 4912a6abeebSBaptiste Daroussin 4922a6abeebSBaptiste Daroussin fail: 4932a6abeebSBaptiste Daroussin free(tr); 4942a6abeebSBaptiste Daroussin return ((size_t)(-1)); 4952a6abeebSBaptiste Daroussin } 4962a6abeebSBaptiste Daroussin 4972a6abeebSBaptiste Daroussin /* 4982a6abeebSBaptiste Daroussin * In the non-POSIX case, we transform each character into a string of 4992a6abeebSBaptiste Daroussin * characters representing the character's priority. Since char is usually 5002a6abeebSBaptiste Daroussin * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 5012a6abeebSBaptiste Daroussin * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 5022a6abeebSBaptiste Daroussin * bits per byte. 5032a6abeebSBaptiste Daroussin * 5042a6abeebSBaptiste Daroussin * It turns out that we sometimes have real priorities that are 5052a6abeebSBaptiste Daroussin * 31-bits wide. (But: be careful using priorities where the high 5062a6abeebSBaptiste Daroussin * order bit is set -- i.e. the priority is negative. The sort order 5072a6abeebSBaptiste Daroussin * may be surprising!) 5082a6abeebSBaptiste Daroussin * 5092a6abeebSBaptiste Daroussin * TODO: This would be a good area to optimize somewhat. It turns out 5102a6abeebSBaptiste Daroussin * that real prioririties *except for the last UNDEFINED pass* are generally 5112a6abeebSBaptiste Daroussin * very small. We need the localedef code to precalculate the max 5122a6abeebSBaptiste Daroussin * priority for us, and ideally also give us a mask, and then we could 5132a6abeebSBaptiste Daroussin * severely limit what we expand to. 5142a6abeebSBaptiste Daroussin */ 5152a6abeebSBaptiste Daroussin #define XFRM_BYTES 6 5162a6abeebSBaptiste Daroussin #define XFRM_OFFSET ('0') /* make all printable characters */ 5172a6abeebSBaptiste Daroussin #define XFRM_SHIFT 6 5182a6abeebSBaptiste Daroussin #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 5192a6abeebSBaptiste Daroussin #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 5202a6abeebSBaptiste Daroussin 5212a6abeebSBaptiste Daroussin static int 5222a6abeebSBaptiste Daroussin xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 523926f20c9SAndrey A. Chernov { 5242a6abeebSBaptiste Daroussin /* we use unsigned to ensure zero fill on right shift */ 5254644f9beSYuri Pankov uint32_t val = (uint32_t)table->info->pri_count[pass]; 5262a6abeebSBaptiste Daroussin int nc = 0; 527926f20c9SAndrey A. Chernov 5282a6abeebSBaptiste Daroussin while (val) { 5292a6abeebSBaptiste Daroussin *p = (pri & XFRM_MASK) + XFRM_OFFSET; 5302a6abeebSBaptiste Daroussin pri >>= XFRM_SHIFT; 5312a6abeebSBaptiste Daroussin val >>= XFRM_SHIFT; 5322a6abeebSBaptiste Daroussin p++; 5332a6abeebSBaptiste Daroussin nc++; 5342a6abeebSBaptiste Daroussin } 5352a6abeebSBaptiste Daroussin return (nc); 536926f20c9SAndrey A. Chernov } 537926f20c9SAndrey A. Chernov 5382a6abeebSBaptiste Daroussin size_t 5392a6abeebSBaptiste Daroussin _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 5402a6abeebSBaptiste Daroussin size_t room) 541c3d0cca4SAndrey A. Chernov { 5422a6abeebSBaptiste Daroussin int pri; 5432a6abeebSBaptiste Daroussin int len; 5442a6abeebSBaptiste Daroussin const wchar_t *t; 5452a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 5462a6abeebSBaptiste Daroussin int direc; 5472a6abeebSBaptiste Daroussin int pass; 5482a6abeebSBaptiste Daroussin const int32_t *state; 5492a6abeebSBaptiste Daroussin size_t want = 0; 5502a6abeebSBaptiste Daroussin size_t need = 0; 5512a6abeebSBaptiste Daroussin int b; 5522a6abeebSBaptiste Daroussin uint8_t buf[XFRM_BYTES]; 553332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 554c3d0cca4SAndrey A. Chernov 555332fe837SBaptiste Daroussin assert(src); 556332fe837SBaptiste Daroussin 557332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 5582a6abeebSBaptiste Daroussin 5592a6abeebSBaptiste Daroussin state = NULL; 5602a6abeebSBaptiste Daroussin 5612a6abeebSBaptiste Daroussin if (pass != 0) { 5622a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 5632a6abeebSBaptiste Daroussin if (room) { 5642a6abeebSBaptiste Daroussin *xf++ = XFRM_SEP; 5652a6abeebSBaptiste Daroussin room--; 566c3d0cca4SAndrey A. Chernov } 5672a6abeebSBaptiste Daroussin want++; 5682a6abeebSBaptiste Daroussin } 5692a6abeebSBaptiste Daroussin 5702a6abeebSBaptiste Daroussin /* special pass for undefined */ 571332fe837SBaptiste Daroussin if (pass == ndir) { 5722a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 5732a6abeebSBaptiste Daroussin } else { 5742a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 5752a6abeebSBaptiste Daroussin } 5762a6abeebSBaptiste Daroussin 5772a6abeebSBaptiste Daroussin t = src; 5782a6abeebSBaptiste Daroussin 5792a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 5802a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 5812a6abeebSBaptiste Daroussin free(tr); 5822a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 5832a6abeebSBaptiste Daroussin errno = ENOMEM; 5842a6abeebSBaptiste Daroussin goto fail; 5852a6abeebSBaptiste Daroussin } 5862a6abeebSBaptiste Daroussin bp = tr; 5872a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 5882a6abeebSBaptiste Daroussin while (bp < fp) { 5892a6abeebSBaptiste Daroussin c = *bp; 5902a6abeebSBaptiste Daroussin *bp++ = *fp; 5912a6abeebSBaptiste Daroussin *fp-- = c; 5922a6abeebSBaptiste Daroussin } 5932a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 5942a6abeebSBaptiste Daroussin } 5952a6abeebSBaptiste Daroussin 5962a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 5972a6abeebSBaptiste Daroussin while (*t || state) { 5982a6abeebSBaptiste Daroussin 5992a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 6002a6abeebSBaptiste Daroussin t += len; 6012a6abeebSBaptiste Daroussin if (pri <= 0) { 6022a6abeebSBaptiste Daroussin if (pri < 0) { 6032a6abeebSBaptiste Daroussin errno = EINVAL; 6042a6abeebSBaptiste Daroussin goto fail; 6052a6abeebSBaptiste Daroussin } 606dee0bbbdSBaptiste Daroussin state = NULL; 6072a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 6082a6abeebSBaptiste Daroussin } 6092a6abeebSBaptiste Daroussin 6102a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6112a6abeebSBaptiste Daroussin want += b; 6122a6abeebSBaptiste Daroussin if (room) { 6132a6abeebSBaptiste Daroussin while (b) { 6142a6abeebSBaptiste Daroussin b--; 6152a6abeebSBaptiste Daroussin if (room) { 6162a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6172a6abeebSBaptiste Daroussin room--; 6182a6abeebSBaptiste Daroussin } 6192a6abeebSBaptiste Daroussin } 6202a6abeebSBaptiste Daroussin } 6212a6abeebSBaptiste Daroussin need = want; 6222a6abeebSBaptiste Daroussin } 6232a6abeebSBaptiste Daroussin } else { 6242a6abeebSBaptiste Daroussin while (*t || state) { 6252a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 6262a6abeebSBaptiste Daroussin t += len; 6272a6abeebSBaptiste Daroussin if (pri <= 0) { 6282a6abeebSBaptiste Daroussin if (pri < 0) { 6292a6abeebSBaptiste Daroussin errno = EINVAL; 6302a6abeebSBaptiste Daroussin goto fail; 6312a6abeebSBaptiste Daroussin } 632dee0bbbdSBaptiste Daroussin state = NULL; 6332a6abeebSBaptiste Daroussin continue; 6342a6abeebSBaptiste Daroussin } 6352a6abeebSBaptiste Daroussin 6362a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6372a6abeebSBaptiste Daroussin want += b; 6382a6abeebSBaptiste Daroussin if (room) { 6392a6abeebSBaptiste Daroussin 6402a6abeebSBaptiste Daroussin while (b) { 6412a6abeebSBaptiste Daroussin b--; 6422a6abeebSBaptiste Daroussin if (room) { 6432a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6442a6abeebSBaptiste Daroussin room--; 6452a6abeebSBaptiste Daroussin } 6462a6abeebSBaptiste Daroussin } 6472a6abeebSBaptiste Daroussin } 6482a6abeebSBaptiste Daroussin need = want; 6492a6abeebSBaptiste Daroussin } 6502a6abeebSBaptiste Daroussin } 6512a6abeebSBaptiste Daroussin } 6522a6abeebSBaptiste Daroussin free(tr); 6532a6abeebSBaptiste Daroussin return (need); 6542a6abeebSBaptiste Daroussin 6552a6abeebSBaptiste Daroussin fail: 6562a6abeebSBaptiste Daroussin free(tr); 6572a6abeebSBaptiste Daroussin return ((size_t)(-1)); 6582a6abeebSBaptiste Daroussin } 6592a6abeebSBaptiste Daroussin 6602a6abeebSBaptiste Daroussin /* 6612a6abeebSBaptiste Daroussin * __collate_equiv_value returns the primary collation value for the given 6622a6abeebSBaptiste Daroussin * collating symbol specified by str and len. Zero or negative is returned 6632a6abeebSBaptiste Daroussin * if the collating symbol was not found. This function is used by bracket 6642a6abeebSBaptiste Daroussin * code in the TRE regex library. 6652a6abeebSBaptiste Daroussin */ 6662a6abeebSBaptiste Daroussin int 6672a6abeebSBaptiste Daroussin __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 6682a6abeebSBaptiste Daroussin { 6692a6abeebSBaptiste Daroussin int32_t e; 6702a6abeebSBaptiste Daroussin 6712a6abeebSBaptiste Daroussin if (len < 1 || len >= COLLATE_STR_LEN) 6722a6abeebSBaptiste Daroussin return (-1); 6732a6abeebSBaptiste Daroussin 6742a6abeebSBaptiste Daroussin FIX_LOCALE(locale); 6752a6abeebSBaptiste Daroussin struct xlocale_collate *table = 6762a6abeebSBaptiste Daroussin (struct xlocale_collate*)locale->components[XLC_COLLATE]; 6772a6abeebSBaptiste Daroussin 6782a6abeebSBaptiste Daroussin if (table->__collate_load_error) 6792a6abeebSBaptiste Daroussin return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 6802a6abeebSBaptiste Daroussin 6812a6abeebSBaptiste Daroussin if (len == 1) { 6822a6abeebSBaptiste Daroussin e = -1; 6832a6abeebSBaptiste Daroussin if (*str <= UCHAR_MAX) 6842a6abeebSBaptiste Daroussin e = table->char_pri_table[*str].pri[0]; 6854644f9beSYuri Pankov else if (table->info->large_count > 0) { 6862a6abeebSBaptiste Daroussin collate_large_t *match_large; 6872a6abeebSBaptiste Daroussin match_large = largesearch(table, *str); 6882a6abeebSBaptiste Daroussin if (match_large) 6892a6abeebSBaptiste Daroussin e = match_large->pri.pri[0]; 6902a6abeebSBaptiste Daroussin } 6912a6abeebSBaptiste Daroussin if (e == 0) 6922a6abeebSBaptiste Daroussin return (1); 6932a6abeebSBaptiste Daroussin return (e > 0 ? e : 0); 6942a6abeebSBaptiste Daroussin } 6954644f9beSYuri Pankov if (table->info->chain_count > 0) { 6962a6abeebSBaptiste Daroussin wchar_t name[COLLATE_STR_LEN]; 6972a6abeebSBaptiste Daroussin collate_chain_t *match_chain; 6982a6abeebSBaptiste Daroussin int clen; 6992a6abeebSBaptiste Daroussin 7002a6abeebSBaptiste Daroussin wcsncpy (name, str, len); 7012a6abeebSBaptiste Daroussin name[len] = 0; 7022a6abeebSBaptiste Daroussin match_chain = chainsearch(table, name, &clen); 7032a6abeebSBaptiste Daroussin if (match_chain) { 7042a6abeebSBaptiste Daroussin e = match_chain->pri[0]; 7052a6abeebSBaptiste Daroussin if (e == 0) 7062a6abeebSBaptiste Daroussin return (1); 7072a6abeebSBaptiste Daroussin return (e < 0 ? -e : e); 7082a6abeebSBaptiste Daroussin } 7092a6abeebSBaptiste Daroussin } 7102a6abeebSBaptiste Daroussin return (0); 7112a6abeebSBaptiste Daroussin } 712