1c3d0cca4SAndrey A. Chernov /*- 27b247341SBaptiste Daroussin * Copyright 2014 Garrett D'Amore <garrett@damore.org> 32a6abeebSBaptiste Daroussin * Copright 2010 Nexenta Systems, Inc. All rights reserved. 4c3d0cca4SAndrey A. Chernov * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 5c3d0cca4SAndrey A. Chernov * at Electronni Visti IA, Kiev, Ukraine. 6c3d0cca4SAndrey A. Chernov * All rights reserved. 7c3d0cca4SAndrey A. Chernov * 83c87aa1dSDavid Chisnall * Copyright (c) 2011 The FreeBSD Foundation 93c87aa1dSDavid Chisnall * All rights reserved. 103c87aa1dSDavid Chisnall * Portions of this software were developed by David Chisnall 113c87aa1dSDavid Chisnall * under sponsorship from the FreeBSD Foundation. 123c87aa1dSDavid Chisnall * 13c3d0cca4SAndrey A. Chernov * Redistribution and use in source and binary forms, with or without 14c3d0cca4SAndrey A. Chernov * modification, are permitted provided that the following conditions 15c3d0cca4SAndrey A. Chernov * are met: 16c3d0cca4SAndrey A. Chernov * 1. Redistributions of source code must retain the above copyright 17c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer. 18c3d0cca4SAndrey A. Chernov * 2. Redistributions in binary form must reproduce the above copyright 19c3d0cca4SAndrey A. Chernov * notice, this list of conditions and the following disclaimer in the 20c3d0cca4SAndrey A. Chernov * documentation and/or other materials provided with the distribution. 21c3d0cca4SAndrey A. Chernov * 22c3d0cca4SAndrey A. Chernov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 23c3d0cca4SAndrey A. Chernov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24c3d0cca4SAndrey A. Chernov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25c3d0cca4SAndrey A. Chernov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 26c3d0cca4SAndrey A. Chernov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27c3d0cca4SAndrey A. Chernov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28c3d0cca4SAndrey A. Chernov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29c3d0cca4SAndrey A. Chernov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30c3d0cca4SAndrey A. Chernov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31c3d0cca4SAndrey A. Chernov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32c3d0cca4SAndrey A. Chernov * SUCH DAMAGE. 332a6abeebSBaptiste Daroussin * 342a6abeebSBaptiste Daroussin * Adapted to xlocale by John Marino <draco@marino.st> 35c3d0cca4SAndrey A. Chernov */ 36c3d0cca4SAndrey A. Chernov 37333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 38333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 39333fc21eSDavid E. O'Brien 40d201fe46SDaniel Eischen #include "namespace.h" 41c3d0cca4SAndrey A. Chernov #include <stdio.h> 42c3d0cca4SAndrey A. Chernov #include <stdlib.h> 43c3d0cca4SAndrey A. Chernov #include <string.h> 442a6abeebSBaptiste Daroussin #include <wchar.h> 45926f20c9SAndrey A. Chernov #include <errno.h> 46926f20c9SAndrey A. Chernov #include <unistd.h> 472a6abeebSBaptiste Daroussin #include <fcntl.h> 482a6abeebSBaptiste Daroussin #include <sys/types.h> 492a6abeebSBaptiste Daroussin #include <sys/stat.h> 502a6abeebSBaptiste Daroussin #include <sys/mman.h> 51d201fe46SDaniel Eischen #include "un-namespace.h" 52d201fe46SDaniel Eischen 53c3d0cca4SAndrey A. Chernov #include "collate.h" 5463407d34SAndrey A. Chernov #include "setlocale.h" 5576692b80SAndrey A. Chernov #include "ldpart.h" 56536451f9SBaptiste Daroussin #include "libc_private.h" 57c3d0cca4SAndrey A. Chernov 583c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_global_collate = { 592a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 603c87aa1dSDavid Chisnall }; 613c87aa1dSDavid Chisnall 623c87aa1dSDavid Chisnall struct xlocale_collate __xlocale_C_collate = { 632a6abeebSBaptiste Daroussin {{0}, "C"}, 1, 0, 0, 0 643c87aa1dSDavid Chisnall }; 65c3d0cca4SAndrey A. Chernov 66*a6d2922cSBaptiste Daroussin static int 673c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 683c87aa1dSDavid Chisnall 693c87aa1dSDavid Chisnall static void 703c87aa1dSDavid Chisnall destruct_collate(void *t) 713c87aa1dSDavid Chisnall { 723c87aa1dSDavid Chisnall struct xlocale_collate *table = t; 732a6abeebSBaptiste Daroussin if (table->map && (table->maplen > 0)) { 742a6abeebSBaptiste Daroussin (void) munmap(table->map, table->maplen); 753c87aa1dSDavid Chisnall } 763c87aa1dSDavid Chisnall free(t); 773c87aa1dSDavid Chisnall } 783c87aa1dSDavid Chisnall 793c87aa1dSDavid Chisnall void * 802a6abeebSBaptiste Daroussin __collate_load(const char *encoding, __unused locale_t unused) 813c87aa1dSDavid Chisnall { 823c87aa1dSDavid Chisnall if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 833c87aa1dSDavid Chisnall return &__xlocale_C_collate; 843c87aa1dSDavid Chisnall } 853c87aa1dSDavid Chisnall struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); 863c87aa1dSDavid Chisnall table->header.header.destructor = destruct_collate; 873c87aa1dSDavid Chisnall // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing 883c87aa1dSDavid Chisnall // the caching outside of this section 893c87aa1dSDavid Chisnall if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 903c87aa1dSDavid Chisnall xlocale_release(table); 913c87aa1dSDavid Chisnall return NULL; 923c87aa1dSDavid Chisnall } 933c87aa1dSDavid Chisnall return table; 943c87aa1dSDavid Chisnall } 953c87aa1dSDavid Chisnall 963c87aa1dSDavid Chisnall /** 973c87aa1dSDavid Chisnall * Load the collation tables for the specified encoding into the global table. 983c87aa1dSDavid Chisnall */ 993c87aa1dSDavid Chisnall int 10076692b80SAndrey A. Chernov __collate_load_tables(const char *encoding) 101c3d0cca4SAndrey A. Chernov { 102bb4317bfSDavid Chisnall int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate); 103bb4317bfSDavid Chisnall return ret; 1043c87aa1dSDavid Chisnall } 1053c87aa1dSDavid Chisnall 1063c87aa1dSDavid Chisnall int 1073c87aa1dSDavid Chisnall __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 1083c87aa1dSDavid Chisnall { 1092a6abeebSBaptiste Daroussin int i, chains, z; 1102a6abeebSBaptiste Daroussin char buf[PATH_MAX]; 1112a6abeebSBaptiste Daroussin char *TMP; 1122a6abeebSBaptiste Daroussin char *map; 1132a6abeebSBaptiste Daroussin collate_info_t *info; 1142a6abeebSBaptiste Daroussin struct stat sbuf; 1152a6abeebSBaptiste Daroussin int fd; 116c3d0cca4SAndrey A. Chernov 11776692b80SAndrey A. Chernov /* 'encoding' must be already checked. */ 11876692b80SAndrey A. Chernov if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 119bb4317bfSDavid Chisnall table->__collate_load_error = 1; 12076692b80SAndrey A. Chernov return (_LDP_CACHE); 121377da8e8SAndrey A. Chernov } 12276692b80SAndrey A. Chernov 1232a6abeebSBaptiste Daroussin (void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE", 1242a6abeebSBaptiste Daroussin _PathLocale, encoding); 12576692b80SAndrey A. Chernov 1262a6abeebSBaptiste Daroussin if ((fd = _open(buf, O_RDONLY)) < 0) 1272a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1282a6abeebSBaptiste Daroussin if (_fstat(fd, &sbuf) < 0) { 1292a6abeebSBaptiste Daroussin (void) _close(fd); 1308e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1318e52da4dSAndrey A. Chernov } 1322a6abeebSBaptiste Daroussin if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { 1332a6abeebSBaptiste Daroussin (void) _close(fd); 1342a6abeebSBaptiste Daroussin errno = EINVAL; 1358e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1368e52da4dSAndrey A. Chernov } 1372a6abeebSBaptiste Daroussin map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 1382a6abeebSBaptiste Daroussin (void) _close(fd); 1392a6abeebSBaptiste Daroussin if ((TMP = map) == NULL) { 1408e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1418e52da4dSAndrey A. Chernov } 1422a6abeebSBaptiste Daroussin 1432a6abeebSBaptiste Daroussin if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { 1442a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1452a6abeebSBaptiste Daroussin errno = EINVAL; 1468e52da4dSAndrey A. Chernov return (_LDP_ERROR); 1478e52da4dSAndrey A. Chernov } 1482a6abeebSBaptiste Daroussin TMP += COLLATE_STR_LEN; 1492a6abeebSBaptiste Daroussin 1502a6abeebSBaptiste Daroussin info = (void *)TMP; 1512a6abeebSBaptiste Daroussin TMP += sizeof (*info); 1522a6abeebSBaptiste Daroussin 1532a6abeebSBaptiste Daroussin if ((info->directive_count < 1) || 1542a6abeebSBaptiste Daroussin (info->directive_count >= COLL_WEIGHTS_MAX) || 1552a6abeebSBaptiste Daroussin ((chains = info->chain_count) < 0)) { 1562a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1572a6abeebSBaptiste Daroussin errno = EINVAL; 1582a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1592a6abeebSBaptiste Daroussin } 1602a6abeebSBaptiste Daroussin 1612a6abeebSBaptiste Daroussin i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 1622a6abeebSBaptiste Daroussin (sizeof (collate_chain_t) * chains) + 1632a6abeebSBaptiste Daroussin (sizeof (collate_large_t) * info->large_count); 1642a6abeebSBaptiste Daroussin for (z = 0; z < (info->directive_count); z++) { 1652a6abeebSBaptiste Daroussin i += sizeof (collate_subst_t) * info->subst_count[z]; 1662a6abeebSBaptiste Daroussin } 1672a6abeebSBaptiste Daroussin if (i != (sbuf.st_size - (TMP - map))) { 1682a6abeebSBaptiste Daroussin (void) munmap(map, sbuf.st_size); 1692a6abeebSBaptiste Daroussin errno = EINVAL; 1702a6abeebSBaptiste Daroussin return (_LDP_ERROR); 1712a6abeebSBaptiste Daroussin } 1722a6abeebSBaptiste Daroussin 1732a6abeebSBaptiste Daroussin table->char_pri_table = (void *)TMP; 1742a6abeebSBaptiste Daroussin TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 1752a6abeebSBaptiste Daroussin 1762a6abeebSBaptiste Daroussin for (z = 0; z < info->directive_count; z++) { 1772a6abeebSBaptiste Daroussin if (info->subst_count[z] > 0) { 1782a6abeebSBaptiste Daroussin table->subst_table[z] = (void *)TMP; 1792a6abeebSBaptiste Daroussin TMP += info->subst_count[z] * sizeof (collate_subst_t); 1802a6abeebSBaptiste Daroussin } else { 1812a6abeebSBaptiste Daroussin table->subst_table[z] = NULL; 1822a6abeebSBaptiste Daroussin } 1832a6abeebSBaptiste Daroussin } 1842a6abeebSBaptiste Daroussin 1852a6abeebSBaptiste Daroussin if (chains > 0) { 1862a6abeebSBaptiste Daroussin table->chain_pri_table = (void *)TMP; 1872a6abeebSBaptiste Daroussin TMP += chains * sizeof (collate_chain_t); 1888e52da4dSAndrey A. Chernov } else 1892a6abeebSBaptiste Daroussin table->chain_pri_table = NULL; 1902a6abeebSBaptiste Daroussin if (info->large_count > 0) 1912a6abeebSBaptiste Daroussin table->large_pri_table = (void *)TMP; 1922a6abeebSBaptiste Daroussin else 1932a6abeebSBaptiste Daroussin table->large_pri_table = NULL; 1948e52da4dSAndrey A. Chernov 1952a6abeebSBaptiste Daroussin table->info = info; 196bb4317bfSDavid Chisnall table->__collate_load_error = 0; 197e755fb76SDmitrij Tejblum 19876692b80SAndrey A. Chernov return (_LDP_LOADED); 199c3d0cca4SAndrey A. Chernov } 200c3d0cca4SAndrey A. Chernov 2012a6abeebSBaptiste Daroussin /* 2022a6abeebSBaptiste Daroussin * Note: for performance reasons, we have expanded bsearch here. This avoids 2032a6abeebSBaptiste Daroussin * function call overhead with each comparison. 2042a6abeebSBaptiste Daroussin */ 205c3d0cca4SAndrey A. Chernov 2062a6abeebSBaptiste Daroussin static int32_t * 2072a6abeebSBaptiste Daroussin substsearch(struct xlocale_collate *table, const wchar_t key, int pass) 2082a6abeebSBaptiste Daroussin { 2092a6abeebSBaptiste Daroussin collate_subst_t *p; 2102a6abeebSBaptiste Daroussin int n = table->info->subst_count[pass]; 2112a6abeebSBaptiste Daroussin 2122a6abeebSBaptiste Daroussin if (n == 0) 2132a6abeebSBaptiste Daroussin return (NULL); 2142a6abeebSBaptiste Daroussin 2152a6abeebSBaptiste Daroussin if (pass >= table->info->directive_count) 2162a6abeebSBaptiste Daroussin return (NULL); 2172a6abeebSBaptiste Daroussin 2182a6abeebSBaptiste Daroussin if (!(key & COLLATE_SUBST_PRIORITY)) 2192a6abeebSBaptiste Daroussin return (NULL); 2202a6abeebSBaptiste Daroussin 2212a6abeebSBaptiste Daroussin p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 2222a6abeebSBaptiste Daroussin return (p->pri); 223c3d0cca4SAndrey A. Chernov } 2242a6abeebSBaptiste Daroussin 2252a6abeebSBaptiste Daroussin static collate_chain_t * 2262a6abeebSBaptiste Daroussin chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 2272a6abeebSBaptiste Daroussin { 2282a6abeebSBaptiste Daroussin int low; 2292a6abeebSBaptiste Daroussin int high; 2302a6abeebSBaptiste Daroussin int next, compar, l; 2312a6abeebSBaptiste Daroussin collate_chain_t *p; 2322a6abeebSBaptiste Daroussin collate_chain_t *tab; 2332a6abeebSBaptiste Daroussin 2342a6abeebSBaptiste Daroussin if (table->info->chain_count == 0) 2352a6abeebSBaptiste Daroussin return (NULL); 2362a6abeebSBaptiste Daroussin 2372a6abeebSBaptiste Daroussin low = 0; 2382a6abeebSBaptiste Daroussin high = table->info->chain_count - 1; 2392a6abeebSBaptiste Daroussin tab = table->chain_pri_table; 2402a6abeebSBaptiste Daroussin 2412a6abeebSBaptiste Daroussin while (low <= high) { 2422a6abeebSBaptiste Daroussin next = (low + high) / 2; 2432a6abeebSBaptiste Daroussin p = tab + next; 2442a6abeebSBaptiste Daroussin compar = *key - *p->str; 2452a6abeebSBaptiste Daroussin if (compar == 0) { 2462a6abeebSBaptiste Daroussin l = wcsnlen(p->str, COLLATE_STR_LEN); 2472a6abeebSBaptiste Daroussin compar = wcsncmp(key, p->str, l); 2482a6abeebSBaptiste Daroussin if (compar == 0) { 2492a6abeebSBaptiste Daroussin *len = l; 2502a6abeebSBaptiste Daroussin return (p); 251c3d0cca4SAndrey A. Chernov } 2522a6abeebSBaptiste Daroussin } 2532a6abeebSBaptiste Daroussin if (compar > 0) 2542a6abeebSBaptiste Daroussin low = next + 1; 2552a6abeebSBaptiste Daroussin else 2562a6abeebSBaptiste Daroussin high = next - 1; 2572a6abeebSBaptiste Daroussin } 2582a6abeebSBaptiste Daroussin return (NULL); 2592a6abeebSBaptiste Daroussin } 2602a6abeebSBaptiste Daroussin 2612a6abeebSBaptiste Daroussin static collate_large_t * 2622a6abeebSBaptiste Daroussin largesearch(struct xlocale_collate *table, const wchar_t key) 2632a6abeebSBaptiste Daroussin { 2642a6abeebSBaptiste Daroussin int low = 0; 2652a6abeebSBaptiste Daroussin int high = table->info->large_count - 1; 2662a6abeebSBaptiste Daroussin int next, compar; 2672a6abeebSBaptiste Daroussin collate_large_t *p; 2682a6abeebSBaptiste Daroussin collate_large_t *tab = table->large_pri_table; 2692a6abeebSBaptiste Daroussin 2702a6abeebSBaptiste Daroussin if (table->info->large_count == 0) 2712a6abeebSBaptiste Daroussin return (NULL); 2722a6abeebSBaptiste Daroussin 2732a6abeebSBaptiste Daroussin while (low <= high) { 2742a6abeebSBaptiste Daroussin next = (low + high) / 2; 2752a6abeebSBaptiste Daroussin p = tab + next; 2762a6abeebSBaptiste Daroussin compar = key - p->val; 2772a6abeebSBaptiste Daroussin if (compar == 0) 2782a6abeebSBaptiste Daroussin return (p); 2792a6abeebSBaptiste Daroussin if (compar > 0) 2802a6abeebSBaptiste Daroussin low = next + 1; 2812a6abeebSBaptiste Daroussin else 2822a6abeebSBaptiste Daroussin high = next - 1; 2832a6abeebSBaptiste Daroussin } 2842a6abeebSBaptiste Daroussin return (NULL); 285c3d0cca4SAndrey A. Chernov } 286c3d0cca4SAndrey A. Chernov 287c3d0cca4SAndrey A. Chernov void 2882a6abeebSBaptiste Daroussin _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 2892a6abeebSBaptiste Daroussin int *pri, int which, const int **state) 290c3d0cca4SAndrey A. Chernov { 2912a6abeebSBaptiste Daroussin collate_chain_t *p2; 2922a6abeebSBaptiste Daroussin collate_large_t *match; 2932a6abeebSBaptiste Daroussin int p, l; 2942a6abeebSBaptiste Daroussin const int *sptr; 295c3d0cca4SAndrey A. Chernov 2962a6abeebSBaptiste Daroussin /* 2972a6abeebSBaptiste Daroussin * If this is the "last" pass for the UNDEFINED, then 2982a6abeebSBaptiste Daroussin * we just return the priority itself. 2992a6abeebSBaptiste Daroussin */ 3002a6abeebSBaptiste Daroussin if (which >= table->info->directive_count) { 3012a6abeebSBaptiste Daroussin *pri = *t; 302c3d0cca4SAndrey A. Chernov *len = 1; 3032a6abeebSBaptiste Daroussin *state = NULL; 304c3d0cca4SAndrey A. Chernov return; 305c3d0cca4SAndrey A. Chernov } 3062a6abeebSBaptiste Daroussin 3072a6abeebSBaptiste Daroussin /* 3082a6abeebSBaptiste Daroussin * If we have remaining substitution data from a previous 3092a6abeebSBaptiste Daroussin * call, consume it first. 3102a6abeebSBaptiste Daroussin */ 3112a6abeebSBaptiste Daroussin if ((sptr = *state) != NULL) { 3122a6abeebSBaptiste Daroussin *pri = *sptr; 3132a6abeebSBaptiste Daroussin sptr++; 3142a6abeebSBaptiste Daroussin *state = *sptr ? sptr : NULL; 3152a6abeebSBaptiste Daroussin *len = 0; 3162a6abeebSBaptiste Daroussin return; 317c3d0cca4SAndrey A. Chernov } 318c3d0cca4SAndrey A. Chernov 3192a6abeebSBaptiste Daroussin /* No active substitutions */ 3202a6abeebSBaptiste Daroussin *len = 1; 3212a6abeebSBaptiste Daroussin 3222a6abeebSBaptiste Daroussin /* 3232a6abeebSBaptiste Daroussin * Check for composites such as dipthongs that collate as a 3242a6abeebSBaptiste Daroussin * single element (aka chains or collating-elements). 3252a6abeebSBaptiste Daroussin */ 3262a6abeebSBaptiste Daroussin if (((p2 = chainsearch(table, t, &l)) != NULL) && 3272a6abeebSBaptiste Daroussin ((p = p2->pri[which]) >= 0)) { 3282a6abeebSBaptiste Daroussin 3292a6abeebSBaptiste Daroussin *len = l; 3302a6abeebSBaptiste Daroussin *pri = p; 3312a6abeebSBaptiste Daroussin 3322a6abeebSBaptiste Daroussin } else if (*t <= UCHAR_MAX) { 3332a6abeebSBaptiste Daroussin 3342a6abeebSBaptiste Daroussin /* 3352a6abeebSBaptiste Daroussin * Character is a small (8-bit) character. 3362a6abeebSBaptiste Daroussin * We just look these up directly for speed. 3372a6abeebSBaptiste Daroussin */ 3382a6abeebSBaptiste Daroussin *pri = table->char_pri_table[*t].pri[which]; 3392a6abeebSBaptiste Daroussin 3402a6abeebSBaptiste Daroussin } else if ((table->info->large_count > 0) && 3412a6abeebSBaptiste Daroussin ((match = largesearch(table, *t)) != NULL)) { 3422a6abeebSBaptiste Daroussin 3432a6abeebSBaptiste Daroussin /* 3442a6abeebSBaptiste Daroussin * Character was found in the extended table. 3452a6abeebSBaptiste Daroussin */ 3462a6abeebSBaptiste Daroussin *pri = match->pri.pri[which]; 3472a6abeebSBaptiste Daroussin 3482a6abeebSBaptiste Daroussin } else { 3492a6abeebSBaptiste Daroussin /* 3502a6abeebSBaptiste Daroussin * Character lacks a specific definition. 3512a6abeebSBaptiste Daroussin */ 3522a6abeebSBaptiste Daroussin if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 3532a6abeebSBaptiste Daroussin /* Mask off sign bit to prevent ordering confusion. */ 3542a6abeebSBaptiste Daroussin *pri = (*t & COLLATE_MAX_PRIORITY); 3552a6abeebSBaptiste Daroussin } else { 3562a6abeebSBaptiste Daroussin *pri = table->info->undef_pri[which]; 3572a6abeebSBaptiste Daroussin } 3582a6abeebSBaptiste Daroussin /* No substitutions for undefined characters! */ 3592a6abeebSBaptiste Daroussin return; 3602a6abeebSBaptiste Daroussin } 3612a6abeebSBaptiste Daroussin 3622a6abeebSBaptiste Daroussin /* 3632a6abeebSBaptiste Daroussin * Try substituting (expanding) the character. We are 3642a6abeebSBaptiste Daroussin * currently doing this *after* the chain compression. I 3652a6abeebSBaptiste Daroussin * think it should not matter, but this way might be slightly 3662a6abeebSBaptiste Daroussin * faster. 3672a6abeebSBaptiste Daroussin * 3682a6abeebSBaptiste Daroussin * We do this after the priority search, as this will help us 3692a6abeebSBaptiste Daroussin * to identify a single key value. In order for this to work, 3702a6abeebSBaptiste Daroussin * its important that the priority assigned to a given element 3712a6abeebSBaptiste Daroussin * to be substituted be unique for that level. The localedef 3722a6abeebSBaptiste Daroussin * code ensures this for us. 3732a6abeebSBaptiste Daroussin */ 3742a6abeebSBaptiste Daroussin if ((sptr = substsearch(table, *pri, which)) != NULL) { 3752a6abeebSBaptiste Daroussin if ((*pri = *sptr) != 0) { 3762a6abeebSBaptiste Daroussin sptr++; 3772a6abeebSBaptiste Daroussin *state = *sptr ? sptr : NULL; 3782a6abeebSBaptiste Daroussin } 3792a6abeebSBaptiste Daroussin } 3802a6abeebSBaptiste Daroussin 3812a6abeebSBaptiste Daroussin } 3822a6abeebSBaptiste Daroussin 3832a6abeebSBaptiste Daroussin /* 3842a6abeebSBaptiste Daroussin * This is the meaty part of wcsxfrm & strxfrm. Note that it does 3852a6abeebSBaptiste Daroussin * NOT NULL terminate. That is left to the caller. 3862a6abeebSBaptiste Daroussin */ 3872a6abeebSBaptiste Daroussin size_t 3882a6abeebSBaptiste Daroussin _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 3892a6abeebSBaptiste Daroussin size_t room) 390c3d0cca4SAndrey A. Chernov { 3912a6abeebSBaptiste Daroussin int pri; 3922a6abeebSBaptiste Daroussin int len; 3932a6abeebSBaptiste Daroussin const wchar_t *t; 3942a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 3952a6abeebSBaptiste Daroussin int direc; 3962a6abeebSBaptiste Daroussin int pass; 3972a6abeebSBaptiste Daroussin const int32_t *state; 3982a6abeebSBaptiste Daroussin size_t want = 0; 3992a6abeebSBaptiste Daroussin size_t need = 0; 400c3d0cca4SAndrey A. Chernov 4012a6abeebSBaptiste Daroussin for (pass = 0; pass <= table->info->directive_count; pass++) { 4022a6abeebSBaptiste Daroussin 4032a6abeebSBaptiste Daroussin state = NULL; 4042a6abeebSBaptiste Daroussin 4052a6abeebSBaptiste Daroussin if (pass != 0) { 4062a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 4072a6abeebSBaptiste Daroussin if (room) { 4082a6abeebSBaptiste Daroussin *xf++ = 1; 4092a6abeebSBaptiste Daroussin room--; 4102a6abeebSBaptiste Daroussin } 4112a6abeebSBaptiste Daroussin want++; 412c3d0cca4SAndrey A. Chernov } 413c3d0cca4SAndrey A. Chernov 4142a6abeebSBaptiste Daroussin /* special pass for undefined */ 4152a6abeebSBaptiste Daroussin if (pass == table->info->directive_count) { 4162a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 4172a6abeebSBaptiste Daroussin } else { 4182a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 4192a6abeebSBaptiste Daroussin } 4202a6abeebSBaptiste Daroussin 4212a6abeebSBaptiste Daroussin t = src; 4222a6abeebSBaptiste Daroussin 4232a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 4242a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 4252a6abeebSBaptiste Daroussin if (tr) 4262a6abeebSBaptiste Daroussin free(tr); 4272a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 4282a6abeebSBaptiste Daroussin errno = ENOMEM; 4292a6abeebSBaptiste Daroussin goto fail; 4302a6abeebSBaptiste Daroussin } 4312a6abeebSBaptiste Daroussin bp = tr; 4322a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 4332a6abeebSBaptiste Daroussin while (bp < fp) { 4342a6abeebSBaptiste Daroussin c = *bp; 4352a6abeebSBaptiste Daroussin *bp++ = *fp; 4362a6abeebSBaptiste Daroussin *fp-- = c; 4372a6abeebSBaptiste Daroussin } 4382a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 4392a6abeebSBaptiste Daroussin } 4402a6abeebSBaptiste Daroussin 4412a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 4422a6abeebSBaptiste Daroussin while (*t || state) { 4432a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4442a6abeebSBaptiste Daroussin t += len; 4452a6abeebSBaptiste Daroussin if (pri <= 0) { 4462a6abeebSBaptiste Daroussin if (pri < 0) { 4472a6abeebSBaptiste Daroussin errno = EINVAL; 4482a6abeebSBaptiste Daroussin goto fail; 4492a6abeebSBaptiste Daroussin } 4502a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 4512a6abeebSBaptiste Daroussin } 4522a6abeebSBaptiste Daroussin if (room) { 4532a6abeebSBaptiste Daroussin *xf++ = pri; 4542a6abeebSBaptiste Daroussin room--; 4552a6abeebSBaptiste Daroussin } 4562a6abeebSBaptiste Daroussin want++; 4572a6abeebSBaptiste Daroussin need = want; 4582a6abeebSBaptiste Daroussin } 4592a6abeebSBaptiste Daroussin } else { 4602a6abeebSBaptiste Daroussin while (*t || state) { 4612a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 4622a6abeebSBaptiste Daroussin t += len; 4632a6abeebSBaptiste Daroussin if (pri <= 0) { 4642a6abeebSBaptiste Daroussin if (pri < 0) { 4652a6abeebSBaptiste Daroussin errno = EINVAL; 4662a6abeebSBaptiste Daroussin goto fail; 4672a6abeebSBaptiste Daroussin } 4682a6abeebSBaptiste Daroussin continue; 4692a6abeebSBaptiste Daroussin } 4702a6abeebSBaptiste Daroussin if (room) { 4712a6abeebSBaptiste Daroussin *xf++ = pri; 4722a6abeebSBaptiste Daroussin room--; 4732a6abeebSBaptiste Daroussin } 4742a6abeebSBaptiste Daroussin want++; 4752a6abeebSBaptiste Daroussin need = want; 4762a6abeebSBaptiste Daroussin } 4772a6abeebSBaptiste Daroussin } 4782a6abeebSBaptiste Daroussin } 4792a6abeebSBaptiste Daroussin if (tr) 4802a6abeebSBaptiste Daroussin free(tr); 4812a6abeebSBaptiste Daroussin return (need); 4822a6abeebSBaptiste Daroussin 4832a6abeebSBaptiste Daroussin fail: 4842a6abeebSBaptiste Daroussin if (tr) 4852a6abeebSBaptiste Daroussin free(tr); 4862a6abeebSBaptiste Daroussin return ((size_t)(-1)); 4872a6abeebSBaptiste Daroussin } 4882a6abeebSBaptiste Daroussin 4892a6abeebSBaptiste Daroussin /* 4902a6abeebSBaptiste Daroussin * In the non-POSIX case, we transform each character into a string of 4912a6abeebSBaptiste Daroussin * characters representing the character's priority. Since char is usually 4922a6abeebSBaptiste Daroussin * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 4932a6abeebSBaptiste Daroussin * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 4942a6abeebSBaptiste Daroussin * bits per byte. 4952a6abeebSBaptiste Daroussin * 4962a6abeebSBaptiste Daroussin * It turns out that we sometimes have real priorities that are 4972a6abeebSBaptiste Daroussin * 31-bits wide. (But: be careful using priorities where the high 4982a6abeebSBaptiste Daroussin * order bit is set -- i.e. the priority is negative. The sort order 4992a6abeebSBaptiste Daroussin * may be surprising!) 5002a6abeebSBaptiste Daroussin * 5012a6abeebSBaptiste Daroussin * TODO: This would be a good area to optimize somewhat. It turns out 5022a6abeebSBaptiste Daroussin * that real prioririties *except for the last UNDEFINED pass* are generally 5032a6abeebSBaptiste Daroussin * very small. We need the localedef code to precalculate the max 5042a6abeebSBaptiste Daroussin * priority for us, and ideally also give us a mask, and then we could 5052a6abeebSBaptiste Daroussin * severely limit what we expand to. 5062a6abeebSBaptiste Daroussin */ 5072a6abeebSBaptiste Daroussin #define XFRM_BYTES 6 5082a6abeebSBaptiste Daroussin #define XFRM_OFFSET ('0') /* make all printable characters */ 5092a6abeebSBaptiste Daroussin #define XFRM_SHIFT 6 5102a6abeebSBaptiste Daroussin #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 5112a6abeebSBaptiste Daroussin #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 5122a6abeebSBaptiste Daroussin 5132a6abeebSBaptiste Daroussin static int 5142a6abeebSBaptiste Daroussin xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 515926f20c9SAndrey A. Chernov { 5162a6abeebSBaptiste Daroussin /* we use unsigned to ensure zero fill on right shift */ 5172a6abeebSBaptiste Daroussin uint32_t val = (uint32_t)table->info->pri_count[pass]; 5182a6abeebSBaptiste Daroussin int nc = 0; 519926f20c9SAndrey A. Chernov 5202a6abeebSBaptiste Daroussin while (val) { 5212a6abeebSBaptiste Daroussin *p = (pri & XFRM_MASK) + XFRM_OFFSET; 5222a6abeebSBaptiste Daroussin pri >>= XFRM_SHIFT; 5232a6abeebSBaptiste Daroussin val >>= XFRM_SHIFT; 5242a6abeebSBaptiste Daroussin p++; 5252a6abeebSBaptiste Daroussin nc++; 5262a6abeebSBaptiste Daroussin } 5272a6abeebSBaptiste Daroussin return (nc); 528926f20c9SAndrey A. Chernov } 529926f20c9SAndrey A. Chernov 5302a6abeebSBaptiste Daroussin size_t 5312a6abeebSBaptiste Daroussin _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 5322a6abeebSBaptiste Daroussin size_t room) 533c3d0cca4SAndrey A. Chernov { 5342a6abeebSBaptiste Daroussin int pri; 5352a6abeebSBaptiste Daroussin int len; 5362a6abeebSBaptiste Daroussin const wchar_t *t; 5372a6abeebSBaptiste Daroussin wchar_t *tr = NULL; 5382a6abeebSBaptiste Daroussin int direc; 5392a6abeebSBaptiste Daroussin int pass; 5402a6abeebSBaptiste Daroussin const int32_t *state; 5412a6abeebSBaptiste Daroussin size_t want = 0; 5422a6abeebSBaptiste Daroussin size_t need = 0; 5432a6abeebSBaptiste Daroussin int b; 5442a6abeebSBaptiste Daroussin uint8_t buf[XFRM_BYTES]; 545c3d0cca4SAndrey A. Chernov 5462a6abeebSBaptiste Daroussin for (pass = 0; pass <= table->info->directive_count; pass++) { 5472a6abeebSBaptiste Daroussin 5482a6abeebSBaptiste Daroussin state = NULL; 5492a6abeebSBaptiste Daroussin 5502a6abeebSBaptiste Daroussin if (pass != 0) { 5512a6abeebSBaptiste Daroussin /* insert level separator from the previous pass */ 5522a6abeebSBaptiste Daroussin if (room) { 5532a6abeebSBaptiste Daroussin *xf++ = XFRM_SEP; 5542a6abeebSBaptiste Daroussin room--; 555c3d0cca4SAndrey A. Chernov } 5562a6abeebSBaptiste Daroussin want++; 5572a6abeebSBaptiste Daroussin } 5582a6abeebSBaptiste Daroussin 5592a6abeebSBaptiste Daroussin /* special pass for undefined */ 5602a6abeebSBaptiste Daroussin if (pass == table->info->directive_count) { 5612a6abeebSBaptiste Daroussin direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 5622a6abeebSBaptiste Daroussin } else { 5632a6abeebSBaptiste Daroussin direc = table->info->directive[pass]; 5642a6abeebSBaptiste Daroussin } 5652a6abeebSBaptiste Daroussin 5662a6abeebSBaptiste Daroussin t = src; 5672a6abeebSBaptiste Daroussin 5682a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_BACKWARD) { 5692a6abeebSBaptiste Daroussin wchar_t *bp, *fp, c; 5702a6abeebSBaptiste Daroussin if (tr) 5712a6abeebSBaptiste Daroussin free(tr); 5722a6abeebSBaptiste Daroussin if ((tr = wcsdup(t)) == NULL) { 5732a6abeebSBaptiste Daroussin errno = ENOMEM; 5742a6abeebSBaptiste Daroussin goto fail; 5752a6abeebSBaptiste Daroussin } 5762a6abeebSBaptiste Daroussin bp = tr; 5772a6abeebSBaptiste Daroussin fp = tr + wcslen(tr) - 1; 5782a6abeebSBaptiste Daroussin while (bp < fp) { 5792a6abeebSBaptiste Daroussin c = *bp; 5802a6abeebSBaptiste Daroussin *bp++ = *fp; 5812a6abeebSBaptiste Daroussin *fp-- = c; 5822a6abeebSBaptiste Daroussin } 5832a6abeebSBaptiste Daroussin t = (const wchar_t *)tr; 5842a6abeebSBaptiste Daroussin } 5852a6abeebSBaptiste Daroussin 5862a6abeebSBaptiste Daroussin if (direc & DIRECTIVE_POSITION) { 5872a6abeebSBaptiste Daroussin while (*t || state) { 5882a6abeebSBaptiste Daroussin 5892a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 5902a6abeebSBaptiste Daroussin t += len; 5912a6abeebSBaptiste Daroussin if (pri <= 0) { 5922a6abeebSBaptiste Daroussin if (pri < 0) { 5932a6abeebSBaptiste Daroussin errno = EINVAL; 5942a6abeebSBaptiste Daroussin goto fail; 5952a6abeebSBaptiste Daroussin } 5962a6abeebSBaptiste Daroussin pri = COLLATE_MAX_PRIORITY; 5972a6abeebSBaptiste Daroussin } 5982a6abeebSBaptiste Daroussin 5992a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6002a6abeebSBaptiste Daroussin want += b; 6012a6abeebSBaptiste Daroussin if (room) { 6022a6abeebSBaptiste Daroussin while (b) { 6032a6abeebSBaptiste Daroussin b--; 6042a6abeebSBaptiste Daroussin if (room) { 6052a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6062a6abeebSBaptiste Daroussin room--; 6072a6abeebSBaptiste Daroussin } 6082a6abeebSBaptiste Daroussin } 6092a6abeebSBaptiste Daroussin } 6102a6abeebSBaptiste Daroussin need = want; 6112a6abeebSBaptiste Daroussin } 6122a6abeebSBaptiste Daroussin } else { 6132a6abeebSBaptiste Daroussin while (*t || state) { 6142a6abeebSBaptiste Daroussin _collate_lookup(table, t, &len, &pri, pass, &state); 6152a6abeebSBaptiste Daroussin t += len; 6162a6abeebSBaptiste Daroussin if (pri <= 0) { 6172a6abeebSBaptiste Daroussin if (pri < 0) { 6182a6abeebSBaptiste Daroussin errno = EINVAL; 6192a6abeebSBaptiste Daroussin goto fail; 6202a6abeebSBaptiste Daroussin } 6212a6abeebSBaptiste Daroussin continue; 6222a6abeebSBaptiste Daroussin } 6232a6abeebSBaptiste Daroussin 6242a6abeebSBaptiste Daroussin b = xfrm(table, buf, pri, pass); 6252a6abeebSBaptiste Daroussin want += b; 6262a6abeebSBaptiste Daroussin if (room) { 6272a6abeebSBaptiste Daroussin 6282a6abeebSBaptiste Daroussin while (b) { 6292a6abeebSBaptiste Daroussin b--; 6302a6abeebSBaptiste Daroussin if (room) { 6312a6abeebSBaptiste Daroussin *xf++ = buf[b]; 6322a6abeebSBaptiste Daroussin room--; 6332a6abeebSBaptiste Daroussin } 6342a6abeebSBaptiste Daroussin } 6352a6abeebSBaptiste Daroussin } 6362a6abeebSBaptiste Daroussin need = want; 6372a6abeebSBaptiste Daroussin } 6382a6abeebSBaptiste Daroussin } 6392a6abeebSBaptiste Daroussin } 6402a6abeebSBaptiste Daroussin if (tr) 6412a6abeebSBaptiste Daroussin free(tr); 6422a6abeebSBaptiste Daroussin return (need); 6432a6abeebSBaptiste Daroussin 6442a6abeebSBaptiste Daroussin fail: 6452a6abeebSBaptiste Daroussin if (tr) 6462a6abeebSBaptiste Daroussin free(tr); 6472a6abeebSBaptiste Daroussin return ((size_t)(-1)); 6482a6abeebSBaptiste Daroussin } 6492a6abeebSBaptiste Daroussin 6502a6abeebSBaptiste Daroussin /* 6512a6abeebSBaptiste Daroussin * __collate_equiv_value returns the primary collation value for the given 6522a6abeebSBaptiste Daroussin * collating symbol specified by str and len. Zero or negative is returned 6532a6abeebSBaptiste Daroussin * if the collating symbol was not found. This function is used by bracket 6542a6abeebSBaptiste Daroussin * code in the TRE regex library. 6552a6abeebSBaptiste Daroussin */ 6562a6abeebSBaptiste Daroussin int 6572a6abeebSBaptiste Daroussin __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 6582a6abeebSBaptiste Daroussin { 6592a6abeebSBaptiste Daroussin int32_t e; 6602a6abeebSBaptiste Daroussin 6612a6abeebSBaptiste Daroussin if (len < 1 || len >= COLLATE_STR_LEN) 6622a6abeebSBaptiste Daroussin return (-1); 6632a6abeebSBaptiste Daroussin 6642a6abeebSBaptiste Daroussin FIX_LOCALE(locale); 6652a6abeebSBaptiste Daroussin struct xlocale_collate *table = 6662a6abeebSBaptiste Daroussin (struct xlocale_collate*)locale->components[XLC_COLLATE]; 6672a6abeebSBaptiste Daroussin 6682a6abeebSBaptiste Daroussin if (table->__collate_load_error) 6692a6abeebSBaptiste Daroussin return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 6702a6abeebSBaptiste Daroussin 6712a6abeebSBaptiste Daroussin if (len == 1) { 6722a6abeebSBaptiste Daroussin e = -1; 6732a6abeebSBaptiste Daroussin if (*str <= UCHAR_MAX) 6742a6abeebSBaptiste Daroussin e = table->char_pri_table[*str].pri[0]; 6752a6abeebSBaptiste Daroussin else if (table->info->large_count > 0) { 6762a6abeebSBaptiste Daroussin collate_large_t *match_large; 6772a6abeebSBaptiste Daroussin match_large = largesearch(table, *str); 6782a6abeebSBaptiste Daroussin if (match_large) 6792a6abeebSBaptiste Daroussin e = match_large->pri.pri[0]; 6802a6abeebSBaptiste Daroussin } 6812a6abeebSBaptiste Daroussin if (e == 0) 6822a6abeebSBaptiste Daroussin return (1); 6832a6abeebSBaptiste Daroussin return (e > 0 ? e : 0); 6842a6abeebSBaptiste Daroussin } 6852a6abeebSBaptiste Daroussin if (table->info->chain_count > 0) { 6862a6abeebSBaptiste Daroussin wchar_t name[COLLATE_STR_LEN]; 6872a6abeebSBaptiste Daroussin collate_chain_t *match_chain; 6882a6abeebSBaptiste Daroussin int clen; 6892a6abeebSBaptiste Daroussin 6902a6abeebSBaptiste Daroussin wcsncpy (name, str, len); 6912a6abeebSBaptiste Daroussin name[len] = 0; 6922a6abeebSBaptiste Daroussin match_chain = chainsearch(table, name, &clen); 6932a6abeebSBaptiste Daroussin if (match_chain) { 6942a6abeebSBaptiste Daroussin e = match_chain->pri[0]; 6952a6abeebSBaptiste Daroussin if (e == 0) 6962a6abeebSBaptiste Daroussin return (1); 6972a6abeebSBaptiste Daroussin return (e < 0 ? -e : e); 6982a6abeebSBaptiste Daroussin } 6992a6abeebSBaptiste Daroussin } 7002a6abeebSBaptiste Daroussin return (0); 7012a6abeebSBaptiste Daroussin } 702