1c3d0cca4SAndrey A. Chernov /*- 27b247341SBaptiste Daroussin * Copyright 2014 Garrett D'Amore <garrett@damore.org> 3332fe837SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4c3d0cca4SAndrey A. Chernov * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 5c3d0cca4SAndrey A. Chernov * at Electronni Visti IA, Kiev, Ukraine. 6c3d0cca4SAndrey A. Chernov * All rights reserved. 7c3d0cca4SAndrey A. Chernov * 83c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation 93c87aa1dSDavid Chisnall * All rights reserved. 103c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall 113c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation. 123c87aa1dSDavid Chisnall * 13c3d0cca4SAndrey A. Chernov * Redistribution and use in source and binary forms, with or without 14c3d0cca4SAndrey A. Chernov * modification, are permitted provided that the following conditions 15c3d0cca4SAndrey A. Chernov * are met: 16c3d0cca4SAndrey A. Chernov * 1. Redistributions of source code must retain the above copyright 17c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer. 18c3d0cca4SAndrey A. Chernov * 2. Redistributions in binary form must reproduce the above copyright 19c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer in the 20c3d0cca4SAndrey A. Chernov * documentation and/or other materials provided with the distribution. 21c3d0cca4SAndrey A. Chernov * 22c3d0cca4SAndrey A. Chernov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 23c3d0cca4SAndrey A. Chernov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24c3d0cca4SAndrey A. Chernov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25c3d0cca4SAndrey A. Chernov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 26c3d0cca4SAndrey A. Chernov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27c3d0cca4SAndrey A. Chernov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28c3d0cca4SAndrey A. Chernov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29c3d0cca4SAndrey A. Chernov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30c3d0cca4SAndrey A. Chernov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31c3d0cca4SAndrey A. Chernov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32c3d0cca4SAndrey A. Chernov * SUCH DAMAGE. 332a6abeebSBaptiste Daroussin * 342a6abeebSBaptiste Daroussin * Adapted to xlocale by John Marino <draco@marino.st> 35c3d0cca4SAndrey A. Chernov */ 36c3d0cca4SAndrey A. Chernov 37333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 38333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 39333fc21eSDavid E. O'Brien 40d201fe46SDaniel Eischen #include "namespace.h" 41c25f5140SBaptiste Daroussin 42c25f5140SBaptiste Daroussin #include <sys/types.h> 43c25f5140SBaptiste Daroussin #include <sys/stat.h> 44c25f5140SBaptiste Daroussin #include <sys/mman.h> 45c25f5140SBaptiste Daroussin 46332fe837SBaptiste Daroussin #include <assert.h> 47c3d0cca4SAndrey A. Chernov #include <stdio.h> 48c3d0cca4SAndrey A. Chernov #include <stdlib.h> 49c3d0cca4SAndrey A. Chernov #include <string.h> 502a6abeebSBaptiste Daroussin #include <wchar.h> 51926f20c9SAndrey A. Chernov #include <errno.h> 52926f20c9SAndrey A. Chernov #include <unistd.h> 532a6abeebSBaptiste Daroussin #include <fcntl.h> 54d201fe46SDaniel Eischen #include "un-namespace.h" 55d201fe46SDaniel Eischen 56c3d0cca4SAndrey A. Chernov #include "collate.h" 5763407d34SAndrey A. Chernov #include "setlocale.h" 5876692b80SAndrey A. Chernov #include "ldpart.h" 59536451f9SBaptiste Daroussin #include "libc_private.h" 60c3d0cca4SAndrey A. Chernov 613c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_global_collate = { 622a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 633c87aa1dSDavid Chisnall }; 643c87aa1dSDavid Chisnall 653c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_C_collate = { 662a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 673c87aa1dSDavid Chisnall }; 68c3d0cca4SAndrey A. Chernov 69a6d2922cSBaptiste Daroussin static int 703c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 713c87aa1dSDavid Chisnall 723c87aa1dSDavid Chisnall static void 733c87aa1dSDavid Chisnall destruct_collate(void *t) 743c87aa1dSDavid Chisnall { 753c87aa1dSDavid Chisnall struct xlocale_collate *table = t; 762a6abeebSBaptiste Daroussin if (table->map && (table->maplen > 0)) { 772a6abeebSBaptiste Daroussin (void) munmap(table->map, table->maplen); 783c87aa1dSDavid Chisnall } 793c87aa1dSDavid Chisnall free(t); 803c87aa1dSDavid Chisnall } 813c87aa1dSDavid Chisnall 823c87aa1dSDavid Chisnall void * 832a6abeebSBaptiste Daroussin __collate_load(const char *encoding, __unused locale_t unused) 843c87aa1dSDavid Chisnall { 853c87aa1dSDavid Chisnall if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 863c87aa1dSDavid Chisnall return &__xlocale_C_collate; 873c87aa1dSDavid Chisnall } 883c87aa1dSDavid Chisnall struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); 893c87aa1dSDavid Chisnall table->header.header.destructor = destruct_collate; 903c87aa1dSDavid Chisnall // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing 913c87aa1dSDavid Chisnall // the caching outside of this section 923c87aa1dSDavid Chisnall if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 933c87aa1dSDavid Chisnall xlocale_release(table); 943c87aa1dSDavid Chisnall return NULL; 953c87aa1dSDavid Chisnall } 963c87aa1dSDavid Chisnall return table; 973c87aa1dSDavid Chisnall } 983c87aa1dSDavid Chisnall 993c87aa1dSDavid Chisnall /** 1003c87aa1dSDavid Chisnall * Load the collation tables for the specified encoding into the global table. 1013c87aa1dSDavid Chisnall */ 1023c87aa1dSDavid Chisnall int 10376692b80SAndrey A. Chernov __collate_load_tables(const char *encoding) 104c3d0cca4SAndrey A. Chernov { 1055e4bbc69SBaptiste Daroussin 1065e4bbc69SBaptiste Daroussin return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); 1073c87aa1dSDavid Chisnall } 1083c87aa1dSDavid Chisnall 1093c87aa1dSDavid Chisnall int 1103c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 1113c87aa1dSDavid Chisnall { 1122a6abeebSBaptiste Daroussin int i, chains, z; 113b89704ceSBaptiste Daroussin char *buf; 1142a6abeebSBaptiste Daroussin char *TMP; 1152a6abeebSBaptiste Daroussin char *map; 1162a6abeebSBaptiste Daroussin collate_info_t *info; 1172a6abeebSBaptiste Daroussin struct stat sbuf; 1182a6abeebSBaptiste Daroussin int fd; 119c3d0cca4SAndrey A. Chernov 120332fe837SBaptiste Daroussin table->__collate_load_error = 1; 121332fe837SBaptiste Daroussin 12276692b80SAndrey A. Chernov /* 'encoding' must be already checked. */ 12376692b80SAndrey A. Chernov if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 12476692b80SAndrey A. Chernov return (_LDP_CACHE); 125377da8e8SAndrey A. Chernov } 12676692b80SAndrey A. Chernov 127eaa94ab4SBaptiste Daroussin asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding); 128b89704ceSBaptiste Daroussin if (buf == NULL) 129b89704ceSBaptiste Daroussin return (_LDP_ERROR); 13076692b80SAndrey A. Chernov 13128a20bb3SBaptiste Daroussin if ((fd = _open(buf, O_RDONLY)) < 0) { 13228a20bb3SBaptiste Daroussin free(buf); 1332a6abeebSBaptiste Daroussin return (_LDP_ERROR); 13428a20bb3SBaptiste Daroussin } 135b89704ceSBaptiste Daroussin free(buf); 1362a6abeebSBaptiste Daroussin if (_fstat(fd, &sbuf) < 0) { 1372a6abeebSBaptiste Daroussin (void) _close(fd); 1388e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1398e52da4dSAndrey A. Chernov } 1402a6abeebSBaptiste Daroussin if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { 1412a6abeebSBaptiste Daroussin (void) _close(fd); 1422a6abeebSBaptiste Daroussin errno = EINVAL; 1438e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1448e52da4dSAndrey A. Chernov } 1452a6abeebSBaptiste Daroussin map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 1462a6abeebSBaptiste Daroussin (void) _close(fd); 1472a6abeebSBaptiste Daroussin if ((TMP = map) == NULL) { 1488e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1498e52da4dSAndrey A. Chernov } 1502a6abeebSBaptiste Daroussin 1512a6abeebSBaptiste Daroussin if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { 1522a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1532a6abeebSBaptiste Daroussin errno = EINVAL; 1548e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1558e52da4dSAndrey A. Chernov } 1562a6abeebSBaptiste Daroussin TMP += COLLATE_STR_LEN; 1572a6abeebSBaptiste Daroussin 1582a6abeebSBaptiste Daroussin info = (void *)TMP; 1592a6abeebSBaptiste Daroussin TMP += sizeof (*info); 1602a6abeebSBaptiste Daroussin 1612a6abeebSBaptiste Daroussin if ((info->directive_count < 1) || 1622a6abeebSBaptiste Daroussin (info->directive_count >= COLL_WEIGHTS_MAX) || 1632a6abeebSBaptiste Daroussin ((chains = info->chain_count) < 0)) { 1642a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1652a6abeebSBaptiste Daroussin errno = EINVAL; 1662a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1672a6abeebSBaptiste Daroussin } 1682a6abeebSBaptiste Daroussin 1692a6abeebSBaptiste Daroussin i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 1702a6abeebSBaptiste Daroussin (sizeof (collate_chain_t) * chains) + 1712a6abeebSBaptiste Daroussin (sizeof (collate_large_t) * info->large_count); 172332fe837SBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 1732a6abeebSBaptiste Daroussin i += sizeof (collate_subst_t) * info->subst_count[z]; 1742a6abeebSBaptiste Daroussin } 1752a6abeebSBaptiste Daroussin if (i != (sbuf.st_size - (TMP - map))) { 1762a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1772a6abeebSBaptiste Daroussin errno = EINVAL; 1782a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1792a6abeebSBaptiste Daroussin } 1802a6abeebSBaptiste Daroussin 181332fe837SBaptiste Daroussin table->info = info; 1822a6abeebSBaptiste Daroussin table->char_pri_table = (void *)TMP; 1832a6abeebSBaptiste Daroussin TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 1842a6abeebSBaptiste Daroussin 1852a6abeebSBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 1862a6abeebSBaptiste Daroussin if (info->subst_count[z] > 0) { 1872a6abeebSBaptiste Daroussin table->subst_table[z] = (void *)TMP; 1882a6abeebSBaptiste Daroussin TMP += info->subst_count[z] * sizeof (collate_subst_t); 1892a6abeebSBaptiste Daroussin } else { 1902a6abeebSBaptiste Daroussin table->subst_table[z] = NULL; 1912a6abeebSBaptiste Daroussin } 1922a6abeebSBaptiste Daroussin } 1932a6abeebSBaptiste Daroussin 1942a6abeebSBaptiste Daroussin if (chains > 0) { 1952a6abeebSBaptiste Daroussin table->chain_pri_table = (void *)TMP; 1962a6abeebSBaptiste Daroussin TMP += chains * sizeof (collate_chain_t); 1978e52da4dSAndrey A. Chernov } else 1982a6abeebSBaptiste Daroussin table->chain_pri_table = NULL; 1992a6abeebSBaptiste Daroussin if (info->large_count > 0) 2002a6abeebSBaptiste Daroussin table->large_pri_table = (void *)TMP; 2012a6abeebSBaptiste Daroussin else 2022a6abeebSBaptiste Daroussin table->large_pri_table = NULL; 2038e52da4dSAndrey A. Chernov 204bb4317bfSDavid Chisnall table->__collate_load_error = 0; 20576692b80SAndrey A. Chernov return (_LDP_LOADED); 206c3d0cca4SAndrey A. Chernov } 207c3d0cca4SAndrey A. Chernov 208332fe837SBaptiste Daroussin static const int32_t * 2092a6abeebSBaptiste Daroussin substsearch(struct xlocale_collate *table, const wchar_t key, int pass) 2102a6abeebSBaptiste Daroussin { 211332fe837SBaptiste Daroussin const collate_subst_t *p; 2122a6abeebSBaptiste Daroussin int n = table->info->subst_count[pass]; 2132a6abeebSBaptiste Daroussin 2142a6abeebSBaptiste Daroussin if (n == 0) 2152a6abeebSBaptiste Daroussin return (NULL); 2162a6abeebSBaptiste Daroussin 2172a6abeebSBaptiste Daroussin if (pass >= table->info->directive_count) 2182a6abeebSBaptiste Daroussin return (NULL); 2192a6abeebSBaptiste Daroussin 2202a6abeebSBaptiste Daroussin if (!(key & COLLATE_SUBST_PRIORITY)) 2212a6abeebSBaptiste Daroussin return (NULL); 2222a6abeebSBaptiste Daroussin 2232a6abeebSBaptiste Daroussin p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 224332fe837SBaptiste Daroussin assert(p->key == key); 2252a6abeebSBaptiste Daroussin return (p->pri); 226c3d0cca4SAndrey A. Chernov } 2272a6abeebSBaptiste Daroussin 2282a6abeebSBaptiste Daroussin static collate_chain_t * 2292a6abeebSBaptiste Daroussin chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 2302a6abeebSBaptiste Daroussin { 23176e6db68SBaptiste Daroussin int low = 0; 23276e6db68SBaptiste Daroussin int high = table->info->chain_count - 1;; 2332a6abeebSBaptiste Daroussin int next, compar, l; 2342a6abeebSBaptiste Daroussin collate_chain_t *p; 23576e6db68SBaptiste Daroussin collate_chain_t *tab = table->chain_pri_table; 2362a6abeebSBaptiste Daroussin 23776e6db68SBaptiste Daroussin if (high < 0) 2382a6abeebSBaptiste Daroussin return (NULL); 2392a6abeebSBaptiste Daroussin 2402a6abeebSBaptiste Daroussin while (low <= high) { 2412a6abeebSBaptiste Daroussin next = (low + high) / 2; 2422a6abeebSBaptiste Daroussin p = tab + next; 2432a6abeebSBaptiste Daroussin compar = *key - *p->str; 2442a6abeebSBaptiste Daroussin if (compar == 0) { 2452a6abeebSBaptiste Daroussin l = wcsnlen(p->str, COLLATE_STR_LEN); 2462a6abeebSBaptiste Daroussin compar = wcsncmp(key, p->str, l); 2472a6abeebSBaptiste Daroussin if (compar == 0) { 2482a6abeebSBaptiste Daroussin *len = l; 2492a6abeebSBaptiste Daroussin return (p); 250c3d0cca4SAndrey A. Chernov } 2512a6abeebSBaptiste Daroussin } 2522a6abeebSBaptiste Daroussin if (compar > 0) 2532a6abeebSBaptiste Daroussin low = next + 1; 2542a6abeebSBaptiste Daroussin else 2552a6abeebSBaptiste Daroussin high = next - 1; 2562a6abeebSBaptiste Daroussin } 2572a6abeebSBaptiste Daroussin return (NULL); 2582a6abeebSBaptiste Daroussin } 2592a6abeebSBaptiste Daroussin 2602a6abeebSBaptiste Daroussin static collate_large_t * 2612a6abeebSBaptiste Daroussin largesearch(struct xlocale_collate *table, const wchar_t key) 2622a6abeebSBaptiste Daroussin { 2632a6abeebSBaptiste Daroussin int low = 0; 2642a6abeebSBaptiste Daroussin int high = table->info->large_count - 1; 2652a6abeebSBaptiste Daroussin int next, compar; 2662a6abeebSBaptiste Daroussin collate_large_t *p; 2672a6abeebSBaptiste Daroussin collate_large_t *tab = table->large_pri_table; 2682a6abeebSBaptiste Daroussin 26976e6db68SBaptiste Daroussin if (high < 0) 2702a6abeebSBaptiste Daroussin return (NULL); 2712a6abeebSBaptiste Daroussin 2722a6abeebSBaptiste Daroussin while (low <= high) { 2732a6abeebSBaptiste Daroussin next = (low + high) / 2; 2742a6abeebSBaptiste Daroussin p = tab + next; 2752a6abeebSBaptiste Daroussin compar = key - p->val; 2762a6abeebSBaptiste Daroussin if (compar == 0) 2772a6abeebSBaptiste Daroussin return (p); 2782a6abeebSBaptiste Daroussin if (compar > 0) 2792a6abeebSBaptiste Daroussin low = next + 1; 2802a6abeebSBaptiste Daroussin else 2812a6abeebSBaptiste Daroussin high = next - 1; 2822a6abeebSBaptiste Daroussin } 2832a6abeebSBaptiste Daroussin return (NULL); 284c3d0cca4SAndrey A. Chernov } 285c3d0cca4SAndrey A. Chernov 286c3d0cca4SAndrey A. Chernov void 2872a6abeebSBaptiste Daroussin _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 2882a6abeebSBaptiste Daroussin int *pri, int which, const int **state) 289c3d0cca4SAndrey A. Chernov { 2902a6abeebSBaptiste Daroussin collate_chain_t *p2; 2912a6abeebSBaptiste Daroussin collate_large_t *match; 2922a6abeebSBaptiste Daroussin int p, l; 2932a6abeebSBaptiste Daroussin const int *sptr; 294c3d0cca4SAndrey A. Chernov 2952a6abeebSBaptiste Daroussin /* 2962a6abeebSBaptiste Daroussin * If this is the "last" pass for the UNDEFINED, then 2972a6abeebSBaptiste Daroussin * we just return the priority itself. 2982a6abeebSBaptiste Daroussin */ 2992a6abeebSBaptiste Daroussin if (which >= table->info->directive_count) { 3002a6abeebSBaptiste Daroussin *pri = *t; 301c3d0cca4SAndrey A. Chernov *len = 1; 3022a6abeebSBaptiste Daroussin *state = NULL; 303c3d0cca4SAndrey A. Chernov return; 304c3d0cca4SAndrey A. Chernov } 3052a6abeebSBaptiste Daroussin 3062a6abeebSBaptiste Daroussin /* 3072a6abeebSBaptiste Daroussin * If we have remaining substitution data from a previous 3082a6abeebSBaptiste Daroussin * call, consume it first. 3092a6abeebSBaptiste Daroussin */ 3102a6abeebSBaptiste Daroussin if ((sptr = *state) != NULL) { 3112a6abeebSBaptiste Daroussin *pri = *sptr; 3122a6abeebSBaptiste Daroussin sptr++; 313*dee0bbbdSBaptiste Daroussin if ((sptr == *state) || (sptr == NULL)) 31476e6db68SBaptiste Daroussin *state = NULL; 31576e6db68SBaptiste Daroussin else 31676e6db68SBaptiste Daroussin *state = sptr; 3172a6abeebSBaptiste Daroussin *len = 0; 3182a6abeebSBaptiste Daroussin return; 319c3d0cca4SAndrey A. Chernov } 320c3d0cca4SAndrey A. Chernov 3212a6abeebSBaptiste Daroussin /* No active substitutions */ 3222a6abeebSBaptiste Daroussin *len = 1; 3232a6abeebSBaptiste Daroussin 3242a6abeebSBaptiste Daroussin /* 32532223c1bSPedro F. Giffuni * Check for composites such as diphthongs that collate as a 3262a6abeebSBaptiste Daroussin * single element (aka chains or collating-elements). 3272a6abeebSBaptiste Daroussin */ 3282a6abeebSBaptiste Daroussin if (((p2 = chainsearch(table, t, &l)) != NULL) && 3292a6abeebSBaptiste Daroussin ((p = p2->pri[which]) >= 0)) { 3302a6abeebSBaptiste Daroussin 3312a6abeebSBaptiste Daroussin *len = l; 3322a6abeebSBaptiste Daroussin *pri = p; 3332a6abeebSBaptiste Daroussin 3342a6abeebSBaptiste Daroussin } else if (*t <= UCHAR_MAX) { 3352a6abeebSBaptiste Daroussin 3362a6abeebSBaptiste Daroussin /* 3372a6abeebSBaptiste Daroussin * Character is a small (8-bit) character. 3382a6abeebSBaptiste Daroussin * We just look these up directly for speed. 3392a6abeebSBaptiste Daroussin */ 3402a6abeebSBaptiste Daroussin *pri = table->char_pri_table[*t].pri[which]; 3412a6abeebSBaptiste Daroussin 3422a6abeebSBaptiste Daroussin } else if ((table->info->large_count > 0) && 3432a6abeebSBaptiste Daroussin ((match = largesearch(table, *t)) != NULL)) { 3442a6abeebSBaptiste Daroussin 3452a6abeebSBaptiste Daroussin /* 3462a6abeebSBaptiste Daroussin * Character was found in the extended table. 3472a6abeebSBaptiste Daroussin */ 3482a6abeebSBaptiste Daroussin *pri = match->pri.pri[which]; 3492a6abeebSBaptiste Daroussin 3502a6abeebSBaptiste Daroussin } else { 3512a6abeebSBaptiste Daroussin /* 3522a6abeebSBaptiste Daroussin * Character lacks a specific definition. 3532a6abeebSBaptiste Daroussin */ 3542a6abeebSBaptiste Daroussin if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 3552a6abeebSBaptiste Daroussin /* Mask off sign bit to prevent ordering confusion. */ 3562a6abeebSBaptiste Daroussin *pri = (*t & COLLATE_MAX_PRIORITY); 3572a6abeebSBaptiste Daroussin } else { 3582a6abeebSBaptiste Daroussin *pri = table->info->undef_pri[which]; 3592a6abeebSBaptiste Daroussin } 3602a6abeebSBaptiste Daroussin /* No substitutions for undefined characters! */ 3612a6abeebSBaptiste Daroussin return; 3622a6abeebSBaptiste Daroussin } 3632a6abeebSBaptiste Daroussin 3642a6abeebSBaptiste Daroussin /* 3652a6abeebSBaptiste Daroussin * Try substituting (expanding) the character. We are 3662a6abeebSBaptiste Daroussin * currently doing this *after* the chain compression. I 3672a6abeebSBaptiste Daroussin * think it should not matter, but this way might be slightly 3682a6abeebSBaptiste Daroussin * faster. 3692a6abeebSBaptiste Daroussin * 3702a6abeebSBaptiste Daroussin * We do this after the priority search, as this will help us 3712a6abeebSBaptiste Daroussin * to identify a single key value. In order for this to work, 3722a6abeebSBaptiste Daroussin * its important that the priority assigned to a given element 3732a6abeebSBaptiste Daroussin * to be substituted be unique for that level. The localedef 3742a6abeebSBaptiste Daroussin * code ensures this for us. 3752a6abeebSBaptiste Daroussin */ 3762a6abeebSBaptiste Daroussin if ((sptr = substsearch(table, *pri, which)) != NULL) { 37776e6db68SBaptiste Daroussin if ((*pri = *sptr) > 0) { 3782a6abeebSBaptiste Daroussin sptr++; 3792a6abeebSBaptiste Daroussin *state = *sptr ? sptr : NULL; 3802a6abeebSBaptiste Daroussin } 3812a6abeebSBaptiste Daroussin } 3822a6abeebSBaptiste Daroussin 3832a6abeebSBaptiste Daroussin } 3842a6abeebSBaptiste Daroussin 3852a6abeebSBaptiste Daroussin /* 3862a6abeebSBaptiste Daroussin * This is the meaty part of wcsxfrm & strxfrm. Note that it does 3872a6abeebSBaptiste Daroussin * NOT NULL terminate. That is left to the caller. 3882a6abeebSBaptiste Daroussin */ 3892a6abeebSBaptiste Daroussin size_t 3902a6abeebSBaptiste Daroussin _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 3912a6abeebSBaptiste Daroussin size_t room) 392c3d0cca4SAndrey A. Chernov { 3932a6abeebSBaptiste Daroussin int pri; 3942a6abeebSBaptiste Daroussin int len; 3952a6abeebSBaptiste Daroussin const wchar_t *t; 3962a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 3972a6abeebSBaptiste Daroussin int direc; 3982a6abeebSBaptiste Daroussin int pass; 3992a6abeebSBaptiste Daroussin const int32_t *state; 4002a6abeebSBaptiste Daroussin size_t want = 0; 4012a6abeebSBaptiste Daroussin size_t need = 0; 402332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 403c3d0cca4SAndrey A. Chernov 404332fe837SBaptiste Daroussin assert(src); 405332fe837SBaptiste Daroussin 406332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 4072a6abeebSBaptiste Daroussin 4082a6abeebSBaptiste Daroussin state = NULL; 4092a6abeebSBaptiste Daroussin 4102a6abeebSBaptiste Daroussin if (pass != 0) { 4112a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 4122a6abeebSBaptiste Daroussin if (room) { 4132a6abeebSBaptiste Daroussin *xf++ = 1; 4142a6abeebSBaptiste Daroussin room--; 4152a6abeebSBaptiste Daroussin } 4162a6abeebSBaptiste Daroussin want++; 417c3d0cca4SAndrey A. Chernov } 418c3d0cca4SAndrey A. Chernov 4192a6abeebSBaptiste Daroussin /* special pass for undefined */ 420332fe837SBaptiste Daroussin if (pass == ndir) { 4212a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 4222a6abeebSBaptiste Daroussin } else { 4232a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 4242a6abeebSBaptiste Daroussin } 4252a6abeebSBaptiste Daroussin 4262a6abeebSBaptiste Daroussin t = src; 4272a6abeebSBaptiste Daroussin 4282a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 4292a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 4302a6abeebSBaptiste Daroussin free(tr); 4312a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 4322a6abeebSBaptiste Daroussin errno = ENOMEM; 4332a6abeebSBaptiste Daroussin goto fail; 4342a6abeebSBaptiste Daroussin } 4352a6abeebSBaptiste Daroussin bp = tr; 4362a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 4372a6abeebSBaptiste Daroussin while (bp < fp) { 4382a6abeebSBaptiste Daroussin c = *bp; 4392a6abeebSBaptiste Daroussin *bp++ = *fp; 4402a6abeebSBaptiste Daroussin *fp-- = c; 4412a6abeebSBaptiste Daroussin } 4422a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 4432a6abeebSBaptiste Daroussin } 4442a6abeebSBaptiste Daroussin 4452a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 4462a6abeebSBaptiste Daroussin while (*t || state) { 4472a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4482a6abeebSBaptiste Daroussin t += len; 4492a6abeebSBaptiste Daroussin if (pri <= 0) { 4502a6abeebSBaptiste Daroussin if (pri < 0) { 4512a6abeebSBaptiste Daroussin errno = EINVAL; 4522a6abeebSBaptiste Daroussin goto fail; 4532a6abeebSBaptiste Daroussin } 454*dee0bbbdSBaptiste Daroussin state = NULL; 4552a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 4562a6abeebSBaptiste Daroussin } 4572a6abeebSBaptiste Daroussin if (room) { 4582a6abeebSBaptiste Daroussin *xf++ = pri; 4592a6abeebSBaptiste Daroussin room--; 4602a6abeebSBaptiste Daroussin } 4612a6abeebSBaptiste Daroussin want++; 4622a6abeebSBaptiste Daroussin need = want; 4632a6abeebSBaptiste Daroussin } 4642a6abeebSBaptiste Daroussin } else { 4652a6abeebSBaptiste Daroussin while (*t || state) { 4662a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4672a6abeebSBaptiste Daroussin t += len; 4682a6abeebSBaptiste Daroussin if (pri <= 0) { 4692a6abeebSBaptiste Daroussin if (pri < 0) { 4702a6abeebSBaptiste Daroussin errno = EINVAL; 4712a6abeebSBaptiste Daroussin goto fail; 4722a6abeebSBaptiste Daroussin } 473*dee0bbbdSBaptiste Daroussin state = NULL; 4742a6abeebSBaptiste Daroussin continue; 4752a6abeebSBaptiste Daroussin } 4762a6abeebSBaptiste Daroussin if (room) { 4772a6abeebSBaptiste Daroussin *xf++ = pri; 4782a6abeebSBaptiste Daroussin room--; 4792a6abeebSBaptiste Daroussin } 4802a6abeebSBaptiste Daroussin want++; 4812a6abeebSBaptiste Daroussin need = want; 4822a6abeebSBaptiste Daroussin } 4832a6abeebSBaptiste Daroussin } 4842a6abeebSBaptiste Daroussin } 4852a6abeebSBaptiste Daroussin free(tr); 4862a6abeebSBaptiste Daroussin return (need); 4872a6abeebSBaptiste Daroussin 4882a6abeebSBaptiste Daroussin fail: 4892a6abeebSBaptiste Daroussin free(tr); 4902a6abeebSBaptiste Daroussin return ((size_t)(-1)); 4912a6abeebSBaptiste Daroussin } 4922a6abeebSBaptiste Daroussin 4932a6abeebSBaptiste Daroussin /* 4942a6abeebSBaptiste Daroussin * In the non-POSIX case, we transform each character into a string of 4952a6abeebSBaptiste Daroussin * characters representing the character's priority. Since char is usually 4962a6abeebSBaptiste Daroussin * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 4972a6abeebSBaptiste Daroussin * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 4982a6abeebSBaptiste Daroussin * bits per byte. 4992a6abeebSBaptiste Daroussin * 5002a6abeebSBaptiste Daroussin * It turns out that we sometimes have real priorities that are 5012a6abeebSBaptiste Daroussin * 31-bits wide. (But: be careful using priorities where the high 5022a6abeebSBaptiste Daroussin * order bit is set -- i.e. the priority is negative. The sort order 5032a6abeebSBaptiste Daroussin * may be surprising!) 5042a6abeebSBaptiste Daroussin * 5052a6abeebSBaptiste Daroussin * TODO: This would be a good area to optimize somewhat. It turns out 5062a6abeebSBaptiste Daroussin * that real prioririties *except for the last UNDEFINED pass* are generally 5072a6abeebSBaptiste Daroussin * very small. We need the localedef code to precalculate the max 5082a6abeebSBaptiste Daroussin * priority for us, and ideally also give us a mask, and then we could 5092a6abeebSBaptiste Daroussin * severely limit what we expand to. 5102a6abeebSBaptiste Daroussin */ 5112a6abeebSBaptiste Daroussin #define XFRM_BYTES 6 5122a6abeebSBaptiste Daroussin #define XFRM_OFFSET ('0') /* make all printable characters */ 5132a6abeebSBaptiste Daroussin #define XFRM_SHIFT 6 5142a6abeebSBaptiste Daroussin #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 5152a6abeebSBaptiste Daroussin #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 5162a6abeebSBaptiste Daroussin 5172a6abeebSBaptiste Daroussin static int 5182a6abeebSBaptiste Daroussin xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 519926f20c9SAndrey A. Chernov { 5202a6abeebSBaptiste Daroussin /* we use unsigned to ensure zero fill on right shift */ 5212a6abeebSBaptiste Daroussin uint32_t val = (uint32_t)table->info->pri_count[pass]; 5222a6abeebSBaptiste Daroussin int nc = 0; 523926f20c9SAndrey A. Chernov 5242a6abeebSBaptiste Daroussin while (val) { 5252a6abeebSBaptiste Daroussin *p = (pri & XFRM_MASK) + XFRM_OFFSET; 5262a6abeebSBaptiste Daroussin pri >>= XFRM_SHIFT; 5272a6abeebSBaptiste Daroussin val >>= XFRM_SHIFT; 5282a6abeebSBaptiste Daroussin p++; 5292a6abeebSBaptiste Daroussin nc++; 5302a6abeebSBaptiste Daroussin } 5312a6abeebSBaptiste Daroussin return (nc); 532926f20c9SAndrey A. Chernov } 533926f20c9SAndrey A. Chernov 5342a6abeebSBaptiste Daroussin size_t 5352a6abeebSBaptiste Daroussin _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 5362a6abeebSBaptiste Daroussin size_t room) 537c3d0cca4SAndrey A. Chernov { 5382a6abeebSBaptiste Daroussin int pri; 5392a6abeebSBaptiste Daroussin int len; 5402a6abeebSBaptiste Daroussin const wchar_t *t; 5412a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 5422a6abeebSBaptiste Daroussin int direc; 5432a6abeebSBaptiste Daroussin int pass; 5442a6abeebSBaptiste Daroussin const int32_t *state; 5452a6abeebSBaptiste Daroussin size_t want = 0; 5462a6abeebSBaptiste Daroussin size_t need = 0; 5472a6abeebSBaptiste Daroussin int b; 5482a6abeebSBaptiste Daroussin uint8_t buf[XFRM_BYTES]; 549332fe837SBaptiste Daroussin int ndir = table->info->directive_count; 550c3d0cca4SAndrey A. Chernov 551332fe837SBaptiste Daroussin assert(src); 552332fe837SBaptiste Daroussin 553332fe837SBaptiste Daroussin for (pass = 0; pass <= ndir; pass++) { 5542a6abeebSBaptiste Daroussin 5552a6abeebSBaptiste Daroussin state = NULL; 5562a6abeebSBaptiste Daroussin 5572a6abeebSBaptiste Daroussin if (pass != 0) { 5582a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 5592a6abeebSBaptiste Daroussin if (room) { 5602a6abeebSBaptiste Daroussin *xf++ = XFRM_SEP; 5612a6abeebSBaptiste Daroussin room--; 562c3d0cca4SAndrey A. Chernov } 5632a6abeebSBaptiste Daroussin want++; 5642a6abeebSBaptiste Daroussin } 5652a6abeebSBaptiste Daroussin 5662a6abeebSBaptiste Daroussin /* special pass for undefined */ 567332fe837SBaptiste Daroussin if (pass == ndir) { 5682a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 5692a6abeebSBaptiste Daroussin } else { 5702a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 5712a6abeebSBaptiste Daroussin } 5722a6abeebSBaptiste Daroussin 5732a6abeebSBaptiste Daroussin t = src; 5742a6abeebSBaptiste Daroussin 5752a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 5762a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 5772a6abeebSBaptiste Daroussin free(tr); 5782a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 5792a6abeebSBaptiste Daroussin errno = ENOMEM; 5802a6abeebSBaptiste Daroussin goto fail; 5812a6abeebSBaptiste Daroussin } 5822a6abeebSBaptiste Daroussin bp = tr; 5832a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 5842a6abeebSBaptiste Daroussin while (bp < fp) { 5852a6abeebSBaptiste Daroussin c = *bp; 5862a6abeebSBaptiste Daroussin *bp++ = *fp; 5872a6abeebSBaptiste Daroussin *fp-- = c; 5882a6abeebSBaptiste Daroussin } 5892a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 5902a6abeebSBaptiste Daroussin } 5912a6abeebSBaptiste Daroussin 5922a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 5932a6abeebSBaptiste Daroussin while (*t || state) { 5942a6abeebSBaptiste Daroussin 5952a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 5962a6abeebSBaptiste Daroussin t += len; 5972a6abeebSBaptiste Daroussin if (pri <= 0) { 5982a6abeebSBaptiste Daroussin if (pri < 0) { 5992a6abeebSBaptiste Daroussin errno = EINVAL; 6002a6abeebSBaptiste Daroussin goto fail; 6012a6abeebSBaptiste Daroussin } 602*dee0bbbdSBaptiste Daroussin state = NULL; 6032a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 6042a6abeebSBaptiste Daroussin } 6052a6abeebSBaptiste Daroussin 6062a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6072a6abeebSBaptiste Daroussin want += b; 6082a6abeebSBaptiste Daroussin if (room) { 6092a6abeebSBaptiste Daroussin while (b) { 6102a6abeebSBaptiste Daroussin b--; 6112a6abeebSBaptiste Daroussin if (room) { 6122a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6132a6abeebSBaptiste Daroussin room--; 6142a6abeebSBaptiste Daroussin } 6152a6abeebSBaptiste Daroussin } 6162a6abeebSBaptiste Daroussin } 6172a6abeebSBaptiste Daroussin need = want; 6182a6abeebSBaptiste Daroussin } 6192a6abeebSBaptiste Daroussin } else { 6202a6abeebSBaptiste Daroussin while (*t || state) { 6212a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 6222a6abeebSBaptiste Daroussin t += len; 6232a6abeebSBaptiste Daroussin if (pri <= 0) { 6242a6abeebSBaptiste Daroussin if (pri < 0) { 6252a6abeebSBaptiste Daroussin errno = EINVAL; 6262a6abeebSBaptiste Daroussin goto fail; 6272a6abeebSBaptiste Daroussin } 628*dee0bbbdSBaptiste Daroussin state = NULL; 6292a6abeebSBaptiste Daroussin continue; 6302a6abeebSBaptiste Daroussin } 6312a6abeebSBaptiste Daroussin 6322a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6332a6abeebSBaptiste Daroussin want += b; 6342a6abeebSBaptiste Daroussin if (room) { 6352a6abeebSBaptiste Daroussin 6362a6abeebSBaptiste Daroussin while (b) { 6372a6abeebSBaptiste Daroussin b--; 6382a6abeebSBaptiste Daroussin if (room) { 6392a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6402a6abeebSBaptiste Daroussin room--; 6412a6abeebSBaptiste Daroussin } 6422a6abeebSBaptiste Daroussin } 6432a6abeebSBaptiste Daroussin } 6442a6abeebSBaptiste Daroussin need = want; 6452a6abeebSBaptiste Daroussin } 6462a6abeebSBaptiste Daroussin } 6472a6abeebSBaptiste Daroussin } 6482a6abeebSBaptiste Daroussin free(tr); 6492a6abeebSBaptiste Daroussin return (need); 6502a6abeebSBaptiste Daroussin 6512a6abeebSBaptiste Daroussin fail: 6522a6abeebSBaptiste Daroussin free(tr); 6532a6abeebSBaptiste Daroussin return ((size_t)(-1)); 6542a6abeebSBaptiste Daroussin } 6552a6abeebSBaptiste Daroussin 6562a6abeebSBaptiste Daroussin /* 6572a6abeebSBaptiste Daroussin * __collate_equiv_value returns the primary collation value for the given 6582a6abeebSBaptiste Daroussin * collating symbol specified by str and len. Zero or negative is returned 6592a6abeebSBaptiste Daroussin * if the collating symbol was not found. This function is used by bracket 6602a6abeebSBaptiste Daroussin * code in the TRE regex library. 6612a6abeebSBaptiste Daroussin */ 6622a6abeebSBaptiste Daroussin int 6632a6abeebSBaptiste Daroussin __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 6642a6abeebSBaptiste Daroussin { 6652a6abeebSBaptiste Daroussin int32_t e; 6662a6abeebSBaptiste Daroussin 6672a6abeebSBaptiste Daroussin if (len < 1 || len >= COLLATE_STR_LEN) 6682a6abeebSBaptiste Daroussin return (-1); 6692a6abeebSBaptiste Daroussin 6702a6abeebSBaptiste Daroussin FIX_LOCALE(locale); 6712a6abeebSBaptiste Daroussin struct xlocale_collate *table = 6722a6abeebSBaptiste Daroussin (struct xlocale_collate*)locale->components[XLC_COLLATE]; 6732a6abeebSBaptiste Daroussin 6742a6abeebSBaptiste Daroussin if (table->__collate_load_error) 6752a6abeebSBaptiste Daroussin return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 6762a6abeebSBaptiste Daroussin 6772a6abeebSBaptiste Daroussin if (len == 1) { 6782a6abeebSBaptiste Daroussin e = -1; 6792a6abeebSBaptiste Daroussin if (*str <= UCHAR_MAX) 6802a6abeebSBaptiste Daroussin e = table->char_pri_table[*str].pri[0]; 6812a6abeebSBaptiste Daroussin else if (table->info->large_count > 0) { 6822a6abeebSBaptiste Daroussin collate_large_t *match_large; 6832a6abeebSBaptiste Daroussin match_large = largesearch(table, *str); 6842a6abeebSBaptiste Daroussin if (match_large) 6852a6abeebSBaptiste Daroussin e = match_large->pri.pri[0]; 6862a6abeebSBaptiste Daroussin } 6872a6abeebSBaptiste Daroussin if (e == 0) 6882a6abeebSBaptiste Daroussin return (1); 6892a6abeebSBaptiste Daroussin return (e > 0 ? e : 0); 6902a6abeebSBaptiste Daroussin } 6912a6abeebSBaptiste Daroussin if (table->info->chain_count > 0) { 6922a6abeebSBaptiste Daroussin wchar_t name[COLLATE_STR_LEN]; 6932a6abeebSBaptiste Daroussin collate_chain_t *match_chain; 6942a6abeebSBaptiste Daroussin int clen; 6952a6abeebSBaptiste Daroussin 6962a6abeebSBaptiste Daroussin wcsncpy (name, str, len); 6972a6abeebSBaptiste Daroussin name[len] = 0; 6982a6abeebSBaptiste Daroussin match_chain = chainsearch(table, name, &clen); 6992a6abeebSBaptiste Daroussin if (match_chain) { 7002a6abeebSBaptiste Daroussin e = match_chain->pri[0]; 7012a6abeebSBaptiste Daroussin if (e == 0) 7022a6abeebSBaptiste Daroussin return (1); 7032a6abeebSBaptiste Daroussin return (e < 0 ? -e : e); 7042a6abeebSBaptiste Daroussin } 7052a6abeebSBaptiste Daroussin } 7062a6abeebSBaptiste Daroussin return (0); 7072a6abeebSBaptiste Daroussin } 708