16b5e5868SGarrett D'Amore /* 26b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the 36b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0. 4*5aec55ebSGarrett D'Amore * You may only use this file in accordance with the terms of version 5*5aec55ebSGarrett D'Amore * 1.0 of the CDDL. 66b5e5868SGarrett D'Amore * 76b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this 86b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at 96b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL. 106b5e5868SGarrett D'Amore */ 116b5e5868SGarrett D'Amore 126b5e5868SGarrett D'Amore /* 136b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 146b5e5868SGarrett D'Amore */ 156b5e5868SGarrett D'Amore 166b5e5868SGarrett D'Amore /* 176b5e5868SGarrett D'Amore * LC_COLLATE database generation routines for localedef. 186b5e5868SGarrett D'Amore */ 196b5e5868SGarrett D'Amore 206b5e5868SGarrett D'Amore #include <stdio.h> 216b5e5868SGarrett D'Amore #include <stdlib.h> 226b5e5868SGarrett D'Amore #include <errno.h> 236b5e5868SGarrett D'Amore #include <string.h> 246b5e5868SGarrett D'Amore #include <sys/types.h> 256b5e5868SGarrett D'Amore #include <sys/avl.h> 266b5e5868SGarrett D'Amore #include <string.h> 276b5e5868SGarrett D'Amore #include <unistd.h> 286b5e5868SGarrett D'Amore #include <wchar.h> 296b5e5868SGarrett D'Amore #include <widec.h> 306b5e5868SGarrett D'Amore #include <limits.h> 316b5e5868SGarrett D'Amore #include "localedef.h" 326b5e5868SGarrett D'Amore #include "parser.tab.h" 336b5e5868SGarrett D'Amore #include "collate.h" 346b5e5868SGarrett D'Amore 356b5e5868SGarrett D'Amore /* 36723fee08SGarrett D'Amore * Design notes. 37723fee08SGarrett D'Amore * 38723fee08SGarrett D'Amore * It will be extremely helpful to the reader if they have access to 39723fee08SGarrett D'Amore * the localedef and locale file format specifications available. 40723fee08SGarrett D'Amore * Latest versions of these are available from www.opengroup.org. 41723fee08SGarrett D'Amore * 42723fee08SGarrett D'Amore * The design for the collation code is a bit complex. The goal is a 43723fee08SGarrett D'Amore * single collation database as described in collate.h (in 44723fee08SGarrett D'Amore * libc/port/locale). However, there are some other tidbits: 45723fee08SGarrett D'Amore * 46723fee08SGarrett D'Amore * a) The substitution entries are now a directly indexable array. A 47723fee08SGarrett D'Amore * priority elsewhere in the table is taken as an index into the 48723fee08SGarrett D'Amore * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY) 49723fee08SGarrett D'Amore * set. (The bit is cleared and the result is the index into the 50723fee08SGarrett D'Amore * table. 51723fee08SGarrett D'Amore * 52723fee08SGarrett D'Amore * b) We eliminate duplicate entries into the substitution table. 53723fee08SGarrett D'Amore * This saves a lot of space. 54723fee08SGarrett D'Amore * 55723fee08SGarrett D'Amore * c) The priorities for each level are "compressed", so that each 56723fee08SGarrett D'Amore * sorting level has consecutively numbered priorities starting at 1. 57723fee08SGarrett D'Amore * (O is reserved for the ignore priority.) This means sort levels 58723fee08SGarrett D'Amore * which only have a few distinct priorities can represent the 59723fee08SGarrett D'Amore * priority level in fewer bits, which makes the strxfrm output 60723fee08SGarrett D'Amore * smaller. 61723fee08SGarrett D'Amore * 62723fee08SGarrett D'Amore * d) We record the total number of priorities so that strxfrm can 63723fee08SGarrett D'Amore * figure out how many bytes to expand a numeric priority into. 64723fee08SGarrett D'Amore * 65723fee08SGarrett D'Amore * e) For the UNDEFINED pass (the last pass), we record the maximum 66723fee08SGarrett D'Amore * number of bits needed to uniquely prioritize these entries, so that 67723fee08SGarrett D'Amore * the last pass can also use smaller strxfrm output when possible. 68723fee08SGarrett D'Amore * 69723fee08SGarrett D'Amore * f) Priorities with the sign bit set are verboten. This works out 70723fee08SGarrett D'Amore * because no active character set needs that bit to carry significant 71723fee08SGarrett D'Amore * information once the character is in wide form. 72723fee08SGarrett D'Amore * 73723fee08SGarrett D'Amore * To process the entire data to make the database, we actually run 74723fee08SGarrett D'Amore * multiple passes over the data. 75723fee08SGarrett D'Amore * 76723fee08SGarrett D'Amore * The first pass, which is done at parse time, identifies elements, 77723fee08SGarrett D'Amore * substitutions, and such, and records them in priority order. As 78723fee08SGarrett D'Amore * some priorities can refer to other priorities, using forward 79723fee08SGarrett D'Amore * references, we use a table of references indicating whether the 80723fee08SGarrett D'Amore * priority's value has been resolved, or whether it is still a 81723fee08SGarrett D'Amore * reference. 82723fee08SGarrett D'Amore * 83723fee08SGarrett D'Amore * The second pass walks over all the items in priority order, noting 84723fee08SGarrett D'Amore * that they are used directly, and not just an indirect reference. 85723fee08SGarrett D'Amore * This is done by creating a "weight" structure for the item. The 86723fee08SGarrett D'Amore * weights are stashed in an AVL tree sorted by relative "priority". 87723fee08SGarrett D'Amore * 88723fee08SGarrett D'Amore * The third pass walks over all the weight structures, in priority 89723fee08SGarrett D'Amore * order, and assigns a new monotonically increasing (per sort level) 90723fee08SGarrett D'Amore * weight value to them. These are the values that will actually be 91723fee08SGarrett D'Amore * written to the file. 92723fee08SGarrett D'Amore * 93723fee08SGarrett D'Amore * The fourth pass just writes the data out. 94723fee08SGarrett D'Amore */ 95723fee08SGarrett D'Amore 96723fee08SGarrett D'Amore /* 976b5e5868SGarrett D'Amore * In order to resolve the priorities, we create a table of priorities. 986b5e5868SGarrett D'Amore * Entries in the table can be in one of three states. 996b5e5868SGarrett D'Amore * 1006b5e5868SGarrett D'Amore * UNKNOWN is for newly allocated entries, and indicates that nothing 1016b5e5868SGarrett D'Amore * is known about the priority. (For example, when new entries are created 1026b5e5868SGarrett D'Amore * for collating-symbols, this is the value assigned for them until the 1036b5e5868SGarrett D'Amore * collating symbol's order has been determined. 1046b5e5868SGarrett D'Amore * 1056b5e5868SGarrett D'Amore * RESOLVED is used for an entry where the priority indicates the final 1066b5e5868SGarrett D'Amore * numeric weight. 1076b5e5868SGarrett D'Amore * 1086b5e5868SGarrett D'Amore * REFER is used for entries that reference other entries. Typically 1096b5e5868SGarrett D'Amore * this is used for forward references. A collating-symbol can never 1106b5e5868SGarrett D'Amore * have this value. 1116b5e5868SGarrett D'Amore * 1126b5e5868SGarrett D'Amore * The "pass" field is used during final resolution to aid in detection 1136b5e5868SGarrett D'Amore * of referencing loops. (For example <A> depends on <B>, but <B> has its 1146b5e5868SGarrett D'Amore * priority dependent on <A>.) 1156b5e5868SGarrett D'Amore */ 1166b5e5868SGarrett D'Amore typedef enum { 1176b5e5868SGarrett D'Amore UNKNOWN, /* priority is totally unknown */ 1186b5e5868SGarrett D'Amore RESOLVED, /* priority value fully resolved */ 1196b5e5868SGarrett D'Amore REFER /* priority is a reference (index) */ 1206b5e5868SGarrett D'Amore } res_t; 1216b5e5868SGarrett D'Amore 122723fee08SGarrett D'Amore typedef struct weight { 123723fee08SGarrett D'Amore int32_t pri; 124723fee08SGarrett D'Amore int opt; 125723fee08SGarrett D'Amore avl_node_t avl; 126723fee08SGarrett D'Amore } weight_t; 127723fee08SGarrett D'Amore 1286b5e5868SGarrett D'Amore typedef struct priority { 1296b5e5868SGarrett D'Amore res_t res; 1306b5e5868SGarrett D'Amore int32_t pri; 1316b5e5868SGarrett D'Amore int pass; 1326b5e5868SGarrett D'Amore int lineno; 1336b5e5868SGarrett D'Amore } collpri_t; 1346b5e5868SGarrett D'Amore 1356b5e5868SGarrett D'Amore #define NUM_WT collinfo.directive_count 1366b5e5868SGarrett D'Amore 1376b5e5868SGarrett D'Amore /* 1386b5e5868SGarrett D'Amore * These are the abstract collating symbols, which are just a symbolic 1396b5e5868SGarrett D'Amore * way to reference a priority. 1406b5e5868SGarrett D'Amore */ 1416b5e5868SGarrett D'Amore struct collsym { 1426b5e5868SGarrett D'Amore char *name; 1436b5e5868SGarrett D'Amore int32_t ref; 1446b5e5868SGarrett D'Amore avl_node_t avl; 1456b5e5868SGarrett D'Amore }; 1466b5e5868SGarrett D'Amore 1476b5e5868SGarrett D'Amore /* 1486b5e5868SGarrett D'Amore * These are also abstract collating symbols, but we allow them to have 1496b5e5868SGarrett D'Amore * different priorities at different levels. 1506b5e5868SGarrett D'Amore */ 1516b5e5868SGarrett D'Amore typedef struct collundef { 1526b5e5868SGarrett D'Amore char *name; 1536b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX]; 1546b5e5868SGarrett D'Amore avl_node_t avl; 1556b5e5868SGarrett D'Amore } collundef_t; 1566b5e5868SGarrett D'Amore 1576b5e5868SGarrett D'Amore /* 1586b5e5868SGarrett D'Amore * These are called "chains" in libc. This records the fact that two 1596b5e5868SGarrett D'Amore * more characters should be treated as a single collating entity when 1606b5e5868SGarrett D'Amore * they appear together. For example, in Spanish <C><h> gets collated 1616b5e5868SGarrett D'Amore * as a character between <C> and <D>. 1626b5e5868SGarrett D'Amore */ 1636b5e5868SGarrett D'Amore struct collelem { 1646b5e5868SGarrett D'Amore char *symbol; 1656b5e5868SGarrett D'Amore wchar_t *expand; 1666b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX]; 1676b5e5868SGarrett D'Amore avl_node_t avl_bysymbol; 1686b5e5868SGarrett D'Amore avl_node_t avl_byexpand; 1696b5e5868SGarrett D'Amore }; 1706b5e5868SGarrett D'Amore 1716b5e5868SGarrett D'Amore /* 1726b5e5868SGarrett D'Amore * Individual characters have a sequence of weights as well. 1736b5e5868SGarrett D'Amore */ 1746b5e5868SGarrett D'Amore typedef struct collchar { 1756b5e5868SGarrett D'Amore wchar_t wc; 176723fee08SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX]; 1776b5e5868SGarrett D'Amore avl_node_t avl; 1786b5e5868SGarrett D'Amore } collchar_t; 1796b5e5868SGarrett D'Amore 1806b5e5868SGarrett D'Amore /* 1816b5e5868SGarrett D'Amore * Substitution entries. The key is itself a priority. Note that 1826b5e5868SGarrett D'Amore * when we create one of these, we *automatically* wind up with a 1836b5e5868SGarrett D'Amore * fully resolved priority for the key, because creation of 1846b5e5868SGarrett D'Amore * substitutions creates a resolved priority at the same time. 1856b5e5868SGarrett D'Amore */ 1866b5e5868SGarrett D'Amore typedef struct { 1876b5e5868SGarrett D'Amore int32_t key; 1886b5e5868SGarrett D'Amore int32_t ref[COLLATE_STR_LEN]; 1896b5e5868SGarrett D'Amore avl_node_t avl; 190723fee08SGarrett D'Amore avl_node_t avl_ref; 1916b5e5868SGarrett D'Amore } subst_t; 1926b5e5868SGarrett D'Amore 1936b5e5868SGarrett D'Amore static avl_tree_t collsyms; 1946b5e5868SGarrett D'Amore static avl_tree_t collundefs; 1956b5e5868SGarrett D'Amore static avl_tree_t elem_by_symbol; 1966b5e5868SGarrett D'Amore static avl_tree_t elem_by_expand; 1976b5e5868SGarrett D'Amore static avl_tree_t collchars; 1986b5e5868SGarrett D'Amore static avl_tree_t substs[COLL_WEIGHTS_MAX]; 199723fee08SGarrett D'Amore static avl_tree_t substs_ref[COLL_WEIGHTS_MAX]; 200723fee08SGarrett D'Amore static avl_tree_t weights[COLL_WEIGHTS_MAX]; 201723fee08SGarrett D'Amore static int32_t nweight[COLL_WEIGHTS_MAX]; 2026b5e5868SGarrett D'Amore 2036b5e5868SGarrett D'Amore /* 2046b5e5868SGarrett D'Amore * This is state tracking for the ellipsis token. Note that we start 2056b5e5868SGarrett D'Amore * the initial values so that the ellipsis logic will think we got a 2066b5e5868SGarrett D'Amore * magic starting value of NUL. It starts at minus one because the 2076b5e5868SGarrett D'Amore * starting point is exclusive -- i.e. the starting point is not 2086b5e5868SGarrett D'Amore * itself handled by the ellipsis code. 2096b5e5868SGarrett D'Amore */ 2106b5e5868SGarrett D'Amore static int currorder = EOF; 2116b5e5868SGarrett D'Amore static int lastorder = EOF; 2126b5e5868SGarrett D'Amore static collelem_t *currelem; 2136b5e5868SGarrett D'Amore static collchar_t *currchar; 2146b5e5868SGarrett D'Amore static collundef_t *currundef; 2156b5e5868SGarrett D'Amore static wchar_t ellipsis_start = 0; 2166b5e5868SGarrett D'Amore static int32_t ellipsis_weights[COLL_WEIGHTS_MAX]; 2176b5e5868SGarrett D'Amore 2186b5e5868SGarrett D'Amore /* 2196b5e5868SGarrett D'Amore * We keep a running tally of weights. 2206b5e5868SGarrett D'Amore */ 2216b5e5868SGarrett D'Amore static int nextpri = 1; 222723fee08SGarrett D'Amore static int nextsubst[COLL_WEIGHTS_MAX] = { 0 }; 2236b5e5868SGarrett D'Amore 2246b5e5868SGarrett D'Amore /* 2256b5e5868SGarrett D'Amore * This array collects up the weights for each level. 2266b5e5868SGarrett D'Amore */ 2276b5e5868SGarrett D'Amore static int32_t order_weights[COLL_WEIGHTS_MAX]; 2286b5e5868SGarrett D'Amore static int curr_weight = 0; 2296b5e5868SGarrett D'Amore static int32_t subst_weights[COLLATE_STR_LEN]; 2306b5e5868SGarrett D'Amore static int curr_subst = 0; 2316b5e5868SGarrett D'Amore 2326b5e5868SGarrett D'Amore /* 2336b5e5868SGarrett D'Amore * Some initial priority values. 2346b5e5868SGarrett D'Amore */ 2356b5e5868SGarrett D'Amore static int32_t pri_undefined[COLL_WEIGHTS_MAX]; 2366b5e5868SGarrett D'Amore static int32_t pri_ignore; 2376b5e5868SGarrett D'Amore 2386b5e5868SGarrett D'Amore static collate_info_t collinfo; 2396b5e5868SGarrett D'Amore 2406b5e5868SGarrett D'Amore static collpri_t *prilist = NULL; 2416b5e5868SGarrett D'Amore static int numpri = 0; 2426b5e5868SGarrett D'Amore static int maxpri = 0; 2436b5e5868SGarrett D'Amore 2446b5e5868SGarrett D'Amore static void start_order(int); 2456b5e5868SGarrett D'Amore 2466b5e5868SGarrett D'Amore static int32_t 2476b5e5868SGarrett D'Amore new_pri(void) 2486b5e5868SGarrett D'Amore { 2496b5e5868SGarrett D'Amore int i; 2506b5e5868SGarrett D'Amore 2516b5e5868SGarrett D'Amore if (numpri >= maxpri) { 2526b5e5868SGarrett D'Amore maxpri = maxpri ? maxpri * 2 : 1024; 2536b5e5868SGarrett D'Amore prilist = realloc(prilist, sizeof (collpri_t) * maxpri); 2546b5e5868SGarrett D'Amore if (prilist == NULL) { 2556b5e5868SGarrett D'Amore errf(_("out of memory")); 2566b5e5868SGarrett D'Amore return (-1); 2576b5e5868SGarrett D'Amore } 2586b5e5868SGarrett D'Amore for (i = numpri; i < maxpri; i++) { 2596b5e5868SGarrett D'Amore prilist[i].res = UNKNOWN; 2606b5e5868SGarrett D'Amore prilist[i].pri = 0; 2616b5e5868SGarrett D'Amore prilist[i].pass = 0; 2626b5e5868SGarrett D'Amore } 2636b5e5868SGarrett D'Amore } 2646b5e5868SGarrett D'Amore return (numpri++); 2656b5e5868SGarrett D'Amore } 2666b5e5868SGarrett D'Amore 2676b5e5868SGarrett D'Amore static collpri_t * 2686b5e5868SGarrett D'Amore get_pri(int32_t ref) 2696b5e5868SGarrett D'Amore { 2706b5e5868SGarrett D'Amore if ((ref < 0) || (ref > numpri)) { 2716b5e5868SGarrett D'Amore INTERR; 2726b5e5868SGarrett D'Amore return (NULL); 2736b5e5868SGarrett D'Amore } 2746b5e5868SGarrett D'Amore return (&prilist[ref]); 2756b5e5868SGarrett D'Amore } 2766b5e5868SGarrett D'Amore 2776b5e5868SGarrett D'Amore static void 2786b5e5868SGarrett D'Amore set_pri(int32_t ref, int32_t v, res_t res) 2796b5e5868SGarrett D'Amore { 2806b5e5868SGarrett D'Amore collpri_t *pri; 2816b5e5868SGarrett D'Amore 2826b5e5868SGarrett D'Amore pri = get_pri(ref); 2836b5e5868SGarrett D'Amore 2846b5e5868SGarrett D'Amore if ((res == REFER) && ((v < 0) || (v >= numpri))) { 2856b5e5868SGarrett D'Amore INTERR; 2866b5e5868SGarrett D'Amore } 2876b5e5868SGarrett D'Amore 2886b5e5868SGarrett D'Amore /* Resolve self references */ 2896b5e5868SGarrett D'Amore if ((res == REFER) && (ref == v)) { 2906b5e5868SGarrett D'Amore v = nextpri; 2916b5e5868SGarrett D'Amore res = RESOLVED; 2926b5e5868SGarrett D'Amore } 2936b5e5868SGarrett D'Amore 2946b5e5868SGarrett D'Amore if (pri->res != UNKNOWN) { 2956b5e5868SGarrett D'Amore warn(_("repeated item in order list (first on %d)"), 2966b5e5868SGarrett D'Amore pri->lineno); 2976b5e5868SGarrett D'Amore return; 2986b5e5868SGarrett D'Amore } 2996b5e5868SGarrett D'Amore pri->lineno = lineno; 3006b5e5868SGarrett D'Amore pri->pri = v; 3016b5e5868SGarrett D'Amore pri->res = res; 3026b5e5868SGarrett D'Amore } 3036b5e5868SGarrett D'Amore 3046b5e5868SGarrett D'Amore static int32_t 3056b5e5868SGarrett D'Amore resolve_pri(int32_t ref) 3066b5e5868SGarrett D'Amore { 3076b5e5868SGarrett D'Amore collpri_t *pri; 3086b5e5868SGarrett D'Amore static int32_t pass = 0; 3096b5e5868SGarrett D'Amore 3106b5e5868SGarrett D'Amore pri = get_pri(ref); 3116b5e5868SGarrett D'Amore pass++; 3126b5e5868SGarrett D'Amore while (pri->res == REFER) { 3136b5e5868SGarrett D'Amore if (pri->pass == pass) { 3146b5e5868SGarrett D'Amore /* report a line with the circular symbol */ 3156b5e5868SGarrett D'Amore lineno = pri->lineno; 3166b5e5868SGarrett D'Amore errf(_("circular reference in order list")); 3176b5e5868SGarrett D'Amore return (-1); 3186b5e5868SGarrett D'Amore } 3196b5e5868SGarrett D'Amore if ((pri->pri < 0) || (pri->pri >= numpri)) { 3206b5e5868SGarrett D'Amore INTERR; 3216b5e5868SGarrett D'Amore return (-1); 3226b5e5868SGarrett D'Amore } 3236b5e5868SGarrett D'Amore pri->pass = pass; 3246b5e5868SGarrett D'Amore pri = &prilist[pri->pri]; 3256b5e5868SGarrett D'Amore } 3266b5e5868SGarrett D'Amore 3276b5e5868SGarrett D'Amore if (pri->res == UNKNOWN) { 3286b5e5868SGarrett D'Amore return (-1); 3296b5e5868SGarrett D'Amore } 3306b5e5868SGarrett D'Amore if (pri->res != RESOLVED) 3316b5e5868SGarrett D'Amore INTERR; 3326b5e5868SGarrett D'Amore 3336b5e5868SGarrett D'Amore return (pri->pri); 3346b5e5868SGarrett D'Amore } 3356b5e5868SGarrett D'Amore 3366b5e5868SGarrett D'Amore static int 337723fee08SGarrett D'Amore weight_compare(const void *n1, const void *n2) 338723fee08SGarrett D'Amore { 339723fee08SGarrett D'Amore int32_t k1 = ((const weight_t *)n1)->pri; 340723fee08SGarrett D'Amore int32_t k2 = ((const weight_t *)n2)->pri; 341723fee08SGarrett D'Amore 342723fee08SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); 343723fee08SGarrett D'Amore } 344723fee08SGarrett D'Amore 345723fee08SGarrett D'Amore static int 3466b5e5868SGarrett D'Amore collsym_compare(const void *n1, const void *n2) 3476b5e5868SGarrett D'Amore { 3486b5e5868SGarrett D'Amore const collsym_t *c1 = n1; 3496b5e5868SGarrett D'Amore const collsym_t *c2 = n2; 3506b5e5868SGarrett D'Amore int rv; 3516b5e5868SGarrett D'Amore 3526b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name); 3536b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 3546b5e5868SGarrett D'Amore } 3556b5e5868SGarrett D'Amore 3566b5e5868SGarrett D'Amore static int 3576b5e5868SGarrett D'Amore collundef_compare(const void *n1, const void *n2) 3586b5e5868SGarrett D'Amore { 3596b5e5868SGarrett D'Amore const collundef_t *c1 = n1; 3606b5e5868SGarrett D'Amore const collundef_t *c2 = n2; 3616b5e5868SGarrett D'Amore int rv; 3626b5e5868SGarrett D'Amore 3636b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name); 3646b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 3656b5e5868SGarrett D'Amore } 3666b5e5868SGarrett D'Amore 3676b5e5868SGarrett D'Amore static int 3686b5e5868SGarrett D'Amore element_compare_symbol(const void *n1, const void *n2) 3696b5e5868SGarrett D'Amore { 3706b5e5868SGarrett D'Amore const collelem_t *c1 = n1; 3716b5e5868SGarrett D'Amore const collelem_t *c2 = n2; 3726b5e5868SGarrett D'Amore int rv; 3736b5e5868SGarrett D'Amore 3746b5e5868SGarrett D'Amore rv = strcmp(c1->symbol, c2->symbol); 3756b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 3766b5e5868SGarrett D'Amore } 3776b5e5868SGarrett D'Amore 3786b5e5868SGarrett D'Amore static int 3796b5e5868SGarrett D'Amore element_compare_expand(const void *n1, const void *n2) 3806b5e5868SGarrett D'Amore { 3816b5e5868SGarrett D'Amore const collelem_t *c1 = n1; 3826b5e5868SGarrett D'Amore const collelem_t *c2 = n2; 3836b5e5868SGarrett D'Amore int rv; 3846b5e5868SGarrett D'Amore 3856b5e5868SGarrett D'Amore rv = wcscmp(c1->expand, c2->expand); 3866b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 3876b5e5868SGarrett D'Amore } 3886b5e5868SGarrett D'Amore 3896b5e5868SGarrett D'Amore static int 3906b5e5868SGarrett D'Amore collchar_compare(const void *n1, const void *n2) 3916b5e5868SGarrett D'Amore { 3926b5e5868SGarrett D'Amore wchar_t k1 = ((const collchar_t *)n1)->wc; 3936b5e5868SGarrett D'Amore wchar_t k2 = ((const collchar_t *)n2)->wc; 3946b5e5868SGarrett D'Amore 3956b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); 3966b5e5868SGarrett D'Amore } 3976b5e5868SGarrett D'Amore 3986b5e5868SGarrett D'Amore static int 3996b5e5868SGarrett D'Amore subst_compare(const void *n1, const void *n2) 4006b5e5868SGarrett D'Amore { 4016b5e5868SGarrett D'Amore int32_t k1 = ((const subst_t *)n1)->key; 4026b5e5868SGarrett D'Amore int32_t k2 = ((const subst_t *)n2)->key; 4036b5e5868SGarrett D'Amore 4046b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); 4056b5e5868SGarrett D'Amore } 4066b5e5868SGarrett D'Amore 407723fee08SGarrett D'Amore static int 408723fee08SGarrett D'Amore subst_compare_ref(const void *n1, const void *n2) 409723fee08SGarrett D'Amore { 410723fee08SGarrett D'Amore int32_t *c1 = ((subst_t *)n1)->ref; 411723fee08SGarrett D'Amore int32_t *c2 = ((subst_t *)n2)->ref; 412723fee08SGarrett D'Amore int rv; 413723fee08SGarrett D'Amore 414723fee08SGarrett D'Amore rv = wcscmp((wchar_t *)c1, (wchar_t *)c2); 415723fee08SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 416723fee08SGarrett D'Amore } 417723fee08SGarrett D'Amore 4186b5e5868SGarrett D'Amore void 4196b5e5868SGarrett D'Amore init_collate(void) 4206b5e5868SGarrett D'Amore { 4216b5e5868SGarrett D'Amore int i; 4226b5e5868SGarrett D'Amore 4236b5e5868SGarrett D'Amore avl_create(&collsyms, collsym_compare, sizeof (collsym_t), 4246b5e5868SGarrett D'Amore offsetof(collsym_t, avl)); 4256b5e5868SGarrett D'Amore 4266b5e5868SGarrett D'Amore avl_create(&collundefs, collundef_compare, sizeof (collsym_t), 4276b5e5868SGarrett D'Amore offsetof(collundef_t, avl)); 4286b5e5868SGarrett D'Amore 4296b5e5868SGarrett D'Amore avl_create(&elem_by_symbol, element_compare_symbol, sizeof (collelem_t), 4306b5e5868SGarrett D'Amore offsetof(collelem_t, avl_bysymbol)); 4316b5e5868SGarrett D'Amore avl_create(&elem_by_expand, element_compare_expand, sizeof (collelem_t), 4326b5e5868SGarrett D'Amore offsetof(collelem_t, avl_byexpand)); 4336b5e5868SGarrett D'Amore 4346b5e5868SGarrett D'Amore avl_create(&collchars, collchar_compare, sizeof (collchar_t), 4356b5e5868SGarrett D'Amore offsetof(collchar_t, avl)); 4366b5e5868SGarrett D'Amore 437723fee08SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 4386b5e5868SGarrett D'Amore avl_create(&substs[i], subst_compare, sizeof (subst_t), 4396b5e5868SGarrett D'Amore offsetof(subst_t, avl)); 440723fee08SGarrett D'Amore avl_create(&substs_ref[i], subst_compare_ref, 441723fee08SGarrett D'Amore sizeof (subst_t), offsetof(subst_t, avl_ref)); 442723fee08SGarrett D'Amore avl_create(&weights[i], weight_compare, sizeof (weight_t), 443723fee08SGarrett D'Amore offsetof(weight_t, avl)); 444723fee08SGarrett D'Amore nweight[i] = 1; 445723fee08SGarrett D'Amore } 4466b5e5868SGarrett D'Amore 4476b5e5868SGarrett D'Amore (void) memset(&collinfo, 0, sizeof (collinfo)); 4486b5e5868SGarrett D'Amore 4496b5e5868SGarrett D'Amore /* allocate some initial priorities */ 4506b5e5868SGarrett D'Amore pri_ignore = new_pri(); 4516b5e5868SGarrett D'Amore 4526b5e5868SGarrett D'Amore set_pri(pri_ignore, 0, RESOLVED); 4536b5e5868SGarrett D'Amore 4546b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 4556b5e5868SGarrett D'Amore pri_undefined[i] = new_pri(); 4566b5e5868SGarrett D'Amore 4576b5e5868SGarrett D'Amore /* we will override this later */ 4586b5e5868SGarrett D'Amore set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN); 4596b5e5868SGarrett D'Amore } 4606b5e5868SGarrett D'Amore } 4616b5e5868SGarrett D'Amore 4626b5e5868SGarrett D'Amore void 4636b5e5868SGarrett D'Amore define_collsym(char *name) 4646b5e5868SGarrett D'Amore { 4656b5e5868SGarrett D'Amore collsym_t *sym; 4666b5e5868SGarrett D'Amore avl_index_t where; 4676b5e5868SGarrett D'Amore 4686b5e5868SGarrett D'Amore if ((sym = calloc(sizeof (*sym), 1)) == NULL) { 4696b5e5868SGarrett D'Amore errf(_("out of memory")); 4706b5e5868SGarrett D'Amore return; 4716b5e5868SGarrett D'Amore } 4726b5e5868SGarrett D'Amore sym->name = name; 4736b5e5868SGarrett D'Amore sym->ref = new_pri(); 4746b5e5868SGarrett D'Amore 4756b5e5868SGarrett D'Amore if (avl_find(&collsyms, sym, &where) != NULL) { 4766b5e5868SGarrett D'Amore /* 4776b5e5868SGarrett D'Amore * This should never happen because we are only called 4786b5e5868SGarrett D'Amore * for undefined symbols. 4796b5e5868SGarrett D'Amore */ 4806b5e5868SGarrett D'Amore INTERR; 4816b5e5868SGarrett D'Amore return; 4826b5e5868SGarrett D'Amore } 4836b5e5868SGarrett D'Amore avl_insert(&collsyms, sym, where); 4846b5e5868SGarrett D'Amore } 4856b5e5868SGarrett D'Amore 4866b5e5868SGarrett D'Amore collsym_t * 4876b5e5868SGarrett D'Amore lookup_collsym(char *name) 4886b5e5868SGarrett D'Amore { 4896b5e5868SGarrett D'Amore collsym_t srch; 4906b5e5868SGarrett D'Amore 4916b5e5868SGarrett D'Amore srch.name = name; 4926b5e5868SGarrett D'Amore return (avl_find(&collsyms, &srch, NULL)); 4936b5e5868SGarrett D'Amore } 4946b5e5868SGarrett D'Amore 4956b5e5868SGarrett D'Amore collelem_t * 4966b5e5868SGarrett D'Amore lookup_collelem(char *symbol) 4976b5e5868SGarrett D'Amore { 4986b5e5868SGarrett D'Amore collelem_t srch; 4996b5e5868SGarrett D'Amore 5006b5e5868SGarrett D'Amore srch.symbol = symbol; 5016b5e5868SGarrett D'Amore return (avl_find(&elem_by_symbol, &srch, NULL)); 5026b5e5868SGarrett D'Amore } 5036b5e5868SGarrett D'Amore 5046b5e5868SGarrett D'Amore static collundef_t * 5056b5e5868SGarrett D'Amore get_collundef(char *name) 5066b5e5868SGarrett D'Amore { 5076b5e5868SGarrett D'Amore collundef_t srch; 5086b5e5868SGarrett D'Amore collundef_t *ud; 5096b5e5868SGarrett D'Amore avl_index_t where; 5106b5e5868SGarrett D'Amore int i; 5116b5e5868SGarrett D'Amore 5126b5e5868SGarrett D'Amore srch.name = name; 5136b5e5868SGarrett D'Amore if ((ud = avl_find(&collundefs, &srch, &where)) == NULL) { 5146b5e5868SGarrett D'Amore if (((ud = calloc(sizeof (*ud), 1)) == NULL) || 5156b5e5868SGarrett D'Amore ((ud->name = strdup(name)) == NULL)) { 5166b5e5868SGarrett D'Amore errf(_("out of memory")); 5176b5e5868SGarrett D'Amore return (NULL); 5186b5e5868SGarrett D'Amore } 5196b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 5206b5e5868SGarrett D'Amore ud->ref[i] = new_pri(); 5216b5e5868SGarrett D'Amore } 5226b5e5868SGarrett D'Amore avl_insert(&collundefs, ud, where); 5236b5e5868SGarrett D'Amore } 5246b5e5868SGarrett D'Amore add_charmap_undefined(name); 5256b5e5868SGarrett D'Amore return (ud); 5266b5e5868SGarrett D'Amore } 5276b5e5868SGarrett D'Amore 5286b5e5868SGarrett D'Amore static collchar_t * 5296b5e5868SGarrett D'Amore get_collchar(wchar_t wc, int create) 5306b5e5868SGarrett D'Amore { 5316b5e5868SGarrett D'Amore collchar_t srch; 5326b5e5868SGarrett D'Amore collchar_t *cc; 5336b5e5868SGarrett D'Amore avl_index_t where; 5346b5e5868SGarrett D'Amore int i; 5356b5e5868SGarrett D'Amore 5366b5e5868SGarrett D'Amore srch.wc = wc; 5376b5e5868SGarrett D'Amore cc = avl_find(&collchars, &srch, &where); 5386b5e5868SGarrett D'Amore if ((cc == NULL) && create) { 5396b5e5868SGarrett D'Amore if ((cc = calloc(sizeof (*cc), 1)) == NULL) { 5406b5e5868SGarrett D'Amore errf(_("out of memory")); 5416b5e5868SGarrett D'Amore return (NULL); 5426b5e5868SGarrett D'Amore } 5436b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 5446b5e5868SGarrett D'Amore cc->ref[i] = new_pri(); 5456b5e5868SGarrett D'Amore } 5466b5e5868SGarrett D'Amore cc->wc = wc; 5476b5e5868SGarrett D'Amore avl_insert(&collchars, cc, where); 5486b5e5868SGarrett D'Amore } 5496b5e5868SGarrett D'Amore return (cc); 5506b5e5868SGarrett D'Amore } 5516b5e5868SGarrett D'Amore 5526b5e5868SGarrett D'Amore void 5536b5e5868SGarrett D'Amore end_order_collsym(collsym_t *sym) 5546b5e5868SGarrett D'Amore { 5556b5e5868SGarrett D'Amore start_order(T_COLLSYM); 5566b5e5868SGarrett D'Amore /* update the weight */ 5576b5e5868SGarrett D'Amore 5586b5e5868SGarrett D'Amore set_pri(sym->ref, nextpri, RESOLVED); 5596b5e5868SGarrett D'Amore nextpri++; 5606b5e5868SGarrett D'Amore } 5616b5e5868SGarrett D'Amore 5626b5e5868SGarrett D'Amore void 5636b5e5868SGarrett D'Amore end_order(void) 5646b5e5868SGarrett D'Amore { 5656b5e5868SGarrett D'Amore int i; 5666b5e5868SGarrett D'Amore int32_t pri; 5676b5e5868SGarrett D'Amore int32_t ref; 5686b5e5868SGarrett D'Amore collpri_t *p; 5696b5e5868SGarrett D'Amore 5706b5e5868SGarrett D'Amore /* advance the priority/weight */ 5716b5e5868SGarrett D'Amore pri = nextpri; 5726b5e5868SGarrett D'Amore 5736b5e5868SGarrett D'Amore switch (currorder) { 5746b5e5868SGarrett D'Amore case T_CHAR: 5756b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 5766b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 5776b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 5786b5e5868SGarrett D'Amore (p->pri == -1)) { 5796b5e5868SGarrett D'Amore /* unspecified weight is a self reference */ 5806b5e5868SGarrett D'Amore set_pri(currchar->ref[i], pri, RESOLVED); 5816b5e5868SGarrett D'Amore } else { 5826b5e5868SGarrett D'Amore set_pri(currchar->ref[i], ref, REFER); 5836b5e5868SGarrett D'Amore } 5846b5e5868SGarrett D'Amore order_weights[i] = -1; 5856b5e5868SGarrett D'Amore } 5866b5e5868SGarrett D'Amore 5876b5e5868SGarrett D'Amore /* leave a cookie trail in case next symbol is ellipsis */ 5886b5e5868SGarrett D'Amore ellipsis_start = currchar->wc + 1; 5896b5e5868SGarrett D'Amore currchar = NULL; 5906b5e5868SGarrett D'Amore break; 5916b5e5868SGarrett D'Amore 5926b5e5868SGarrett D'Amore case T_ELLIPSIS: 5936b5e5868SGarrett D'Amore /* save off the weights were we can find them */ 5946b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 5956b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i]; 5966b5e5868SGarrett D'Amore order_weights[i] = -1; 5976b5e5868SGarrett D'Amore } 5986b5e5868SGarrett D'Amore break; 5996b5e5868SGarrett D'Amore 6006b5e5868SGarrett D'Amore case T_COLLELEM: 6016b5e5868SGarrett D'Amore if (currelem == NULL) { 6026b5e5868SGarrett D'Amore INTERR; 6036b5e5868SGarrett D'Amore } else { 6046b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 6056b5e5868SGarrett D'Amore 6066b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 6076b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 6086b5e5868SGarrett D'Amore (p->pri == -1)) { 6096b5e5868SGarrett D'Amore set_pri(currelem->ref[i], pri, 6106b5e5868SGarrett D'Amore RESOLVED); 6116b5e5868SGarrett D'Amore } else { 6126b5e5868SGarrett D'Amore set_pri(currelem->ref[i], ref, REFER); 6136b5e5868SGarrett D'Amore } 6146b5e5868SGarrett D'Amore order_weights[i] = -1; 6156b5e5868SGarrett D'Amore } 6166b5e5868SGarrett D'Amore } 6176b5e5868SGarrett D'Amore break; 6186b5e5868SGarrett D'Amore 6196b5e5868SGarrett D'Amore case T_UNDEFINED: 6206b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 6216b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 6226b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 6236b5e5868SGarrett D'Amore (p->pri == -1)) { 6246b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED); 6256b5e5868SGarrett D'Amore } else { 6266b5e5868SGarrett D'Amore set_pri(pri_undefined[i], ref, REFER); 6276b5e5868SGarrett D'Amore } 6286b5e5868SGarrett D'Amore order_weights[i] = -1; 6296b5e5868SGarrett D'Amore } 6306b5e5868SGarrett D'Amore break; 6316b5e5868SGarrett D'Amore 6326b5e5868SGarrett D'Amore case T_SYMBOL: 6336b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 6346b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 6356b5e5868SGarrett D'Amore (p->pri == -1)) { 6366b5e5868SGarrett D'Amore set_pri(currundef->ref[i], pri, RESOLVED); 6376b5e5868SGarrett D'Amore } else { 6386b5e5868SGarrett D'Amore set_pri(currundef->ref[i], ref, REFER); 6396b5e5868SGarrett D'Amore } 6406b5e5868SGarrett D'Amore order_weights[i] = -1; 6416b5e5868SGarrett D'Amore break; 6426b5e5868SGarrett D'Amore 6436b5e5868SGarrett D'Amore default: 6446b5e5868SGarrett D'Amore INTERR; 6456b5e5868SGarrett D'Amore } 6466b5e5868SGarrett D'Amore 6476b5e5868SGarrett D'Amore nextpri++; 6486b5e5868SGarrett D'Amore } 6496b5e5868SGarrett D'Amore 6506b5e5868SGarrett D'Amore static void 6516b5e5868SGarrett D'Amore start_order(int type) 6526b5e5868SGarrett D'Amore { 6536b5e5868SGarrett D'Amore int i; 6546b5e5868SGarrett D'Amore 6556b5e5868SGarrett D'Amore lastorder = currorder; 6566b5e5868SGarrett D'Amore currorder = type; 6576b5e5868SGarrett D'Amore 6586b5e5868SGarrett D'Amore /* this is used to protect ELLIPSIS processing */ 6596b5e5868SGarrett D'Amore if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) { 6606b5e5868SGarrett D'Amore errf(_("character value expected")); 6616b5e5868SGarrett D'Amore } 6626b5e5868SGarrett D'Amore 6636b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 6646b5e5868SGarrett D'Amore order_weights[i] = -1; 6656b5e5868SGarrett D'Amore } 6666b5e5868SGarrett D'Amore curr_weight = 0; 6676b5e5868SGarrett D'Amore } 6686b5e5868SGarrett D'Amore 6696b5e5868SGarrett D'Amore void 6706b5e5868SGarrett D'Amore start_order_undefined(void) 6716b5e5868SGarrett D'Amore { 6726b5e5868SGarrett D'Amore start_order(T_UNDEFINED); 6736b5e5868SGarrett D'Amore } 6746b5e5868SGarrett D'Amore 6756b5e5868SGarrett D'Amore void 6766b5e5868SGarrett D'Amore start_order_symbol(char *name) 6776b5e5868SGarrett D'Amore { 6786b5e5868SGarrett D'Amore currundef = get_collundef(name); 6796b5e5868SGarrett D'Amore start_order(T_SYMBOL); 6806b5e5868SGarrett D'Amore } 6816b5e5868SGarrett D'Amore 6826b5e5868SGarrett D'Amore void 6836b5e5868SGarrett D'Amore start_order_char(wchar_t wc) 6846b5e5868SGarrett D'Amore { 6856b5e5868SGarrett D'Amore collchar_t *cc; 6866b5e5868SGarrett D'Amore int32_t ref; 6876b5e5868SGarrett D'Amore 6886b5e5868SGarrett D'Amore start_order(T_CHAR); 6896b5e5868SGarrett D'Amore 6906b5e5868SGarrett D'Amore /* 6916b5e5868SGarrett D'Amore * If we last saw an ellipsis, then we need to close the range. 6926b5e5868SGarrett D'Amore * Handle that here. Note that we have to be careful because the 6936b5e5868SGarrett D'Amore * items *inside* the range are treated exclusiveley to the items 6946b5e5868SGarrett D'Amore * outside of the range. The ends of the range can have quite 6956b5e5868SGarrett D'Amore * different weights than the range members. 6966b5e5868SGarrett D'Amore */ 6976b5e5868SGarrett D'Amore if (lastorder == T_ELLIPSIS) { 6986b5e5868SGarrett D'Amore int i; 6996b5e5868SGarrett D'Amore 7006b5e5868SGarrett D'Amore if (wc < ellipsis_start) { 7016b5e5868SGarrett D'Amore errf(_("malformed range!")); 7026b5e5868SGarrett D'Amore return; 7036b5e5868SGarrett D'Amore } 7046b5e5868SGarrett D'Amore while (ellipsis_start < wc) { 7056b5e5868SGarrett D'Amore /* 7066b5e5868SGarrett D'Amore * pick all of the saved weights for the 7076b5e5868SGarrett D'Amore * ellipsis. note that -1 encodes for the 7086b5e5868SGarrett D'Amore * ellipsis itself, which means to take the 7096b5e5868SGarrett D'Amore * current relative priority. 7106b5e5868SGarrett D'Amore */ 7116b5e5868SGarrett D'Amore if ((cc = get_collchar(ellipsis_start, 1)) == NULL) { 7126b5e5868SGarrett D'Amore INTERR; 7136b5e5868SGarrett D'Amore return; 7146b5e5868SGarrett D'Amore } 7156b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 7166b5e5868SGarrett D'Amore collpri_t *p; 7176b5e5868SGarrett D'Amore if (((ref = ellipsis_weights[i]) == -1) || 7186b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 7196b5e5868SGarrett D'Amore (p->pri == -1)) { 7206b5e5868SGarrett D'Amore set_pri(cc->ref[i], nextpri, RESOLVED); 7216b5e5868SGarrett D'Amore } else { 7226b5e5868SGarrett D'Amore set_pri(cc->ref[i], ref, REFER); 7236b5e5868SGarrett D'Amore } 7246b5e5868SGarrett D'Amore ellipsis_weights[i] = NULL; 7256b5e5868SGarrett D'Amore } 7266b5e5868SGarrett D'Amore ellipsis_start++; 7276b5e5868SGarrett D'Amore nextpri++; 7286b5e5868SGarrett D'Amore } 7296b5e5868SGarrett D'Amore } 7306b5e5868SGarrett D'Amore 7316b5e5868SGarrett D'Amore currchar = get_collchar(wc, 1); 7326b5e5868SGarrett D'Amore } 7336b5e5868SGarrett D'Amore 7346b5e5868SGarrett D'Amore void 7356b5e5868SGarrett D'Amore start_order_collelem(collelem_t *e) 7366b5e5868SGarrett D'Amore { 7376b5e5868SGarrett D'Amore start_order(T_COLLELEM); 7386b5e5868SGarrett D'Amore currelem = e; 7396b5e5868SGarrett D'Amore } 7406b5e5868SGarrett D'Amore 7416b5e5868SGarrett D'Amore void 7426b5e5868SGarrett D'Amore start_order_ellipsis(void) 7436b5e5868SGarrett D'Amore { 7446b5e5868SGarrett D'Amore int i; 7456b5e5868SGarrett D'Amore 7466b5e5868SGarrett D'Amore start_order(T_ELLIPSIS); 7476b5e5868SGarrett D'Amore 7486b5e5868SGarrett D'Amore if (lastorder != T_CHAR) { 7496b5e5868SGarrett D'Amore errf(_("illegal starting point for range")); 7506b5e5868SGarrett D'Amore return; 7516b5e5868SGarrett D'Amore } 7526b5e5868SGarrett D'Amore 7536b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 7546b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i]; 7556b5e5868SGarrett D'Amore } 7566b5e5868SGarrett D'Amore } 7576b5e5868SGarrett D'Amore 7586b5e5868SGarrett D'Amore void 7596b5e5868SGarrett D'Amore define_collelem(char *name, wchar_t *wcs) 7606b5e5868SGarrett D'Amore { 7616b5e5868SGarrett D'Amore collelem_t *e; 7626b5e5868SGarrett D'Amore avl_index_t where1; 7636b5e5868SGarrett D'Amore avl_index_t where2; 7646b5e5868SGarrett D'Amore int i; 7656b5e5868SGarrett D'Amore 7666b5e5868SGarrett D'Amore if (wcslen(wcs) >= COLLATE_STR_LEN) { 7676b5e5868SGarrett D'Amore errf(_("expanded collation element too long")); 7686b5e5868SGarrett D'Amore return; 7696b5e5868SGarrett D'Amore } 7706b5e5868SGarrett D'Amore 7716b5e5868SGarrett D'Amore if ((e = calloc(sizeof (*e), 1)) == NULL) { 7726b5e5868SGarrett D'Amore errf(_("out of memory")); 7736b5e5868SGarrett D'Amore return; 7746b5e5868SGarrett D'Amore } 7756b5e5868SGarrett D'Amore e->expand = wcs; 7766b5e5868SGarrett D'Amore e->symbol = name; 7776b5e5868SGarrett D'Amore 7786b5e5868SGarrett D'Amore /* 7796b5e5868SGarrett D'Amore * This is executed before the order statement, so we don't 7806b5e5868SGarrett D'Amore * know how many priorities we *really* need. We allocate one 7816b5e5868SGarrett D'Amore * for each possible weight. Not a big deal, as collating-elements 7826b5e5868SGarrett D'Amore * prove to be quite rare. 7836b5e5868SGarrett D'Amore */ 7846b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 7856b5e5868SGarrett D'Amore e->ref[i] = new_pri(); 7866b5e5868SGarrett D'Amore } 7876b5e5868SGarrett D'Amore 7886b5e5868SGarrett D'Amore /* A character sequence can only reduce to one element. */ 7896b5e5868SGarrett D'Amore if ((avl_find(&elem_by_symbol, e, &where1) != NULL) || 7906b5e5868SGarrett D'Amore (avl_find(&elem_by_expand, e, &where2) != NULL)) { 7916b5e5868SGarrett D'Amore errf(_("duplicate collating element definition")); 7926b5e5868SGarrett D'Amore return; 7936b5e5868SGarrett D'Amore } 7946b5e5868SGarrett D'Amore avl_insert(&elem_by_symbol, e, where1); 7956b5e5868SGarrett D'Amore avl_insert(&elem_by_expand, e, where2); 7966b5e5868SGarrett D'Amore } 7976b5e5868SGarrett D'Amore 7986b5e5868SGarrett D'Amore void 7996b5e5868SGarrett D'Amore add_order_bit(int kw) 8006b5e5868SGarrett D'Amore { 8016b5e5868SGarrett D'Amore uint8_t bit = DIRECTIVE_UNDEF; 8026b5e5868SGarrett D'Amore 8036b5e5868SGarrett D'Amore switch (kw) { 8046b5e5868SGarrett D'Amore case T_FORWARD: 8056b5e5868SGarrett D'Amore bit = DIRECTIVE_FORWARD; 8066b5e5868SGarrett D'Amore break; 8076b5e5868SGarrett D'Amore case T_BACKWARD: 8086b5e5868SGarrett D'Amore bit = DIRECTIVE_BACKWARD; 8096b5e5868SGarrett D'Amore break; 8106b5e5868SGarrett D'Amore case T_POSITION: 8116b5e5868SGarrett D'Amore bit = DIRECTIVE_POSITION; 8126b5e5868SGarrett D'Amore break; 8136b5e5868SGarrett D'Amore default: 8146b5e5868SGarrett D'Amore INTERR; 8156b5e5868SGarrett D'Amore break; 8166b5e5868SGarrett D'Amore } 8176b5e5868SGarrett D'Amore collinfo.directive[collinfo.directive_count] |= bit; 8186b5e5868SGarrett D'Amore } 8196b5e5868SGarrett D'Amore 8206b5e5868SGarrett D'Amore void 8216b5e5868SGarrett D'Amore add_order_directive(void) 8226b5e5868SGarrett D'Amore { 8236b5e5868SGarrett D'Amore if (collinfo.directive_count >= COLL_WEIGHTS_MAX) { 8246b5e5868SGarrett D'Amore errf(_("too many directives (max %d)"), COLL_WEIGHTS_MAX); 8256b5e5868SGarrett D'Amore } 8266b5e5868SGarrett D'Amore collinfo.directive_count++; 8276b5e5868SGarrett D'Amore } 8286b5e5868SGarrett D'Amore 8296b5e5868SGarrett D'Amore static void 8306b5e5868SGarrett D'Amore add_order_pri(int32_t ref) 8316b5e5868SGarrett D'Amore { 8326b5e5868SGarrett D'Amore if (curr_weight >= NUM_WT) { 8336b5e5868SGarrett D'Amore errf(_("too many weights (max %d)"), NUM_WT); 8346b5e5868SGarrett D'Amore return; 8356b5e5868SGarrett D'Amore } 8366b5e5868SGarrett D'Amore order_weights[curr_weight] = ref; 8376b5e5868SGarrett D'Amore curr_weight++; 8386b5e5868SGarrett D'Amore } 8396b5e5868SGarrett D'Amore 8406b5e5868SGarrett D'Amore void 8416b5e5868SGarrett D'Amore add_order_collsym(collsym_t *s) 8426b5e5868SGarrett D'Amore { 8436b5e5868SGarrett D'Amore add_order_pri(s->ref); 8446b5e5868SGarrett D'Amore } 8456b5e5868SGarrett D'Amore 8466b5e5868SGarrett D'Amore void 8476b5e5868SGarrett D'Amore add_order_char(wchar_t wc) 8486b5e5868SGarrett D'Amore { 8496b5e5868SGarrett D'Amore collchar_t *cc; 8506b5e5868SGarrett D'Amore 8516b5e5868SGarrett D'Amore if ((cc = get_collchar(wc, 1)) == NULL) { 8526b5e5868SGarrett D'Amore INTERR; 8536b5e5868SGarrett D'Amore return; 8546b5e5868SGarrett D'Amore } 8556b5e5868SGarrett D'Amore 8566b5e5868SGarrett D'Amore add_order_pri(cc->ref[curr_weight]); 8576b5e5868SGarrett D'Amore } 8586b5e5868SGarrett D'Amore 8596b5e5868SGarrett D'Amore void 8606b5e5868SGarrett D'Amore add_order_collelem(collelem_t *e) 8616b5e5868SGarrett D'Amore { 8626b5e5868SGarrett D'Amore add_order_pri(e->ref[curr_weight]); 8636b5e5868SGarrett D'Amore } 8646b5e5868SGarrett D'Amore 8656b5e5868SGarrett D'Amore void 8666b5e5868SGarrett D'Amore add_order_ignore(void) 8676b5e5868SGarrett D'Amore { 8686b5e5868SGarrett D'Amore add_order_pri(pri_ignore); 8696b5e5868SGarrett D'Amore } 8706b5e5868SGarrett D'Amore 8716b5e5868SGarrett D'Amore void 8726b5e5868SGarrett D'Amore add_order_symbol(char *sym) 8736b5e5868SGarrett D'Amore { 8746b5e5868SGarrett D'Amore collundef_t *c; 8756b5e5868SGarrett D'Amore if ((c = get_collundef(sym)) == NULL) { 8766b5e5868SGarrett D'Amore INTERR; 8776b5e5868SGarrett D'Amore return; 8786b5e5868SGarrett D'Amore } 8796b5e5868SGarrett D'Amore add_order_pri(c->ref[curr_weight]); 8806b5e5868SGarrett D'Amore } 8816b5e5868SGarrett D'Amore 8826b5e5868SGarrett D'Amore void 8836b5e5868SGarrett D'Amore add_order_ellipsis(void) 8846b5e5868SGarrett D'Amore { 8856b5e5868SGarrett D'Amore /* special NULL value indicates self reference */ 8866b5e5868SGarrett D'Amore add_order_pri(NULL); 8876b5e5868SGarrett D'Amore } 8886b5e5868SGarrett D'Amore 8896b5e5868SGarrett D'Amore void 8906b5e5868SGarrett D'Amore add_order_subst(void) 8916b5e5868SGarrett D'Amore { 892723fee08SGarrett D'Amore subst_t srch; 8936b5e5868SGarrett D'Amore subst_t *s; 8946b5e5868SGarrett D'Amore avl_index_t where; 8956b5e5868SGarrett D'Amore int i; 8966b5e5868SGarrett D'Amore 897723fee08SGarrett D'Amore (void) memset(&srch, 0, sizeof (srch)); 898723fee08SGarrett D'Amore for (i = 0; i < curr_subst; i++) { 899723fee08SGarrett D'Amore srch.ref[i] = subst_weights[i]; 900723fee08SGarrett D'Amore subst_weights[i] = 0; 901723fee08SGarrett D'Amore } 902723fee08SGarrett D'Amore s = avl_find(&substs_ref[curr_weight], &srch, &where); 903723fee08SGarrett D'Amore 904723fee08SGarrett D'Amore if (s == NULL) { 9056b5e5868SGarrett D'Amore if ((s = calloc(sizeof (*s), 1)) == NULL) { 9066b5e5868SGarrett D'Amore errf(_("out of memory")); 9076b5e5868SGarrett D'Amore return; 9086b5e5868SGarrett D'Amore } 9096b5e5868SGarrett D'Amore s->key = new_pri(); 9106b5e5868SGarrett D'Amore 9116b5e5868SGarrett D'Amore /* 912723fee08SGarrett D'Amore * We use a self reference for our key, but we set a 913723fee08SGarrett D'Amore * high bit to indicate that this is a substitution 914723fee08SGarrett D'Amore * reference. This will expedite table lookups later, 915723fee08SGarrett D'Amore * and prevent table lookups for situations that don't 916723fee08SGarrett D'Amore * require it. (In short, its a big win, because we 917723fee08SGarrett D'Amore * can skip a lot of binary searching.) 9186b5e5868SGarrett D'Amore */ 919723fee08SGarrett D'Amore set_pri(s->key, 920723fee08SGarrett D'Amore (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY), 921723fee08SGarrett D'Amore RESOLVED); 922723fee08SGarrett D'Amore nextsubst[curr_weight] += 1; 9236b5e5868SGarrett D'Amore 9246b5e5868SGarrett D'Amore for (i = 0; i < curr_subst; i++) { 925723fee08SGarrett D'Amore s->ref[i] = srch.ref[i]; 9266b5e5868SGarrett D'Amore } 927723fee08SGarrett D'Amore 928723fee08SGarrett D'Amore avl_insert(&substs_ref[curr_weight], s, where); 9296b5e5868SGarrett D'Amore 9306b5e5868SGarrett D'Amore if (avl_find(&substs[curr_weight], s, &where) != NULL) { 9316b5e5868SGarrett D'Amore INTERR; 9326b5e5868SGarrett D'Amore return; 9336b5e5868SGarrett D'Amore } 9346b5e5868SGarrett D'Amore avl_insert(&substs[curr_weight], s, where); 935723fee08SGarrett D'Amore } 936723fee08SGarrett D'Amore curr_subst = 0; 937723fee08SGarrett D'Amore 9386b5e5868SGarrett D'Amore 9396b5e5868SGarrett D'Amore /* 9406b5e5868SGarrett D'Amore * We are using the current (unique) priority as a search key 9416b5e5868SGarrett D'Amore * in the substitution table. 9426b5e5868SGarrett D'Amore */ 9436b5e5868SGarrett D'Amore add_order_pri(s->key); 9446b5e5868SGarrett D'Amore } 9456b5e5868SGarrett D'Amore 9466b5e5868SGarrett D'Amore static void 9476b5e5868SGarrett D'Amore add_subst_pri(int32_t ref) 9486b5e5868SGarrett D'Amore { 9496b5e5868SGarrett D'Amore if (curr_subst >= COLLATE_STR_LEN) { 9506b5e5868SGarrett D'Amore errf(_("substitution string is too long")); 9516b5e5868SGarrett D'Amore return; 9526b5e5868SGarrett D'Amore } 9536b5e5868SGarrett D'Amore subst_weights[curr_subst] = ref; 9546b5e5868SGarrett D'Amore curr_subst++; 9556b5e5868SGarrett D'Amore } 9566b5e5868SGarrett D'Amore 9576b5e5868SGarrett D'Amore void 9586b5e5868SGarrett D'Amore add_subst_char(wchar_t wc) 9596b5e5868SGarrett D'Amore { 9606b5e5868SGarrett D'Amore collchar_t *cc; 9616b5e5868SGarrett D'Amore 9626b5e5868SGarrett D'Amore 9636b5e5868SGarrett D'Amore if (((cc = get_collchar(wc, 1)) == NULL) || 9646b5e5868SGarrett D'Amore (cc->wc != wc)) { 9656b5e5868SGarrett D'Amore INTERR; 9666b5e5868SGarrett D'Amore return; 9676b5e5868SGarrett D'Amore } 9686b5e5868SGarrett D'Amore /* we take the weight for the character at that position */ 9696b5e5868SGarrett D'Amore add_subst_pri(cc->ref[curr_weight]); 9706b5e5868SGarrett D'Amore } 9716b5e5868SGarrett D'Amore 9726b5e5868SGarrett D'Amore void 9736b5e5868SGarrett D'Amore add_subst_collelem(collelem_t *e) 9746b5e5868SGarrett D'Amore { 9756b5e5868SGarrett D'Amore add_subst_pri(e->ref[curr_weight]); 9766b5e5868SGarrett D'Amore } 9776b5e5868SGarrett D'Amore 9786b5e5868SGarrett D'Amore void 9796b5e5868SGarrett D'Amore add_subst_collsym(collsym_t *s) 9806b5e5868SGarrett D'Amore { 9816b5e5868SGarrett D'Amore add_subst_pri(s->ref); 9826b5e5868SGarrett D'Amore } 9836b5e5868SGarrett D'Amore 9846b5e5868SGarrett D'Amore void 9856b5e5868SGarrett D'Amore add_subst_symbol(char *ptr) 9866b5e5868SGarrett D'Amore { 9876b5e5868SGarrett D'Amore collundef_t *cu; 9886b5e5868SGarrett D'Amore 9896b5e5868SGarrett D'Amore if ((cu = get_collundef(ptr)) != NULL) { 9906b5e5868SGarrett D'Amore add_subst_pri(cu->ref[curr_weight]); 9916b5e5868SGarrett D'Amore } 9926b5e5868SGarrett D'Amore } 9936b5e5868SGarrett D'Amore 9946b5e5868SGarrett D'Amore void 995723fee08SGarrett D'Amore add_weight(int32_t ref, int pass) 996723fee08SGarrett D'Amore { 997723fee08SGarrett D'Amore weight_t srch; 998723fee08SGarrett D'Amore weight_t *w; 999723fee08SGarrett D'Amore avl_index_t where; 1000723fee08SGarrett D'Amore 1001723fee08SGarrett D'Amore srch.pri = resolve_pri(ref); 1002723fee08SGarrett D'Amore 1003723fee08SGarrett D'Amore /* No translation of ignores */ 1004723fee08SGarrett D'Amore if (srch.pri == 0) 1005723fee08SGarrett D'Amore return; 1006723fee08SGarrett D'Amore 1007723fee08SGarrett D'Amore /* Substitution priorities are not weights */ 1008723fee08SGarrett D'Amore if (srch.pri & COLLATE_SUBST_PRIORITY) 1009723fee08SGarrett D'Amore return; 1010723fee08SGarrett D'Amore 1011723fee08SGarrett D'Amore if (avl_find(&weights[pass], &srch, &where) != NULL) 1012723fee08SGarrett D'Amore return; 1013723fee08SGarrett D'Amore 1014723fee08SGarrett D'Amore if ((w = calloc(sizeof (*w), 1)) == NULL) { 1015723fee08SGarrett D'Amore errf(_("out of memory")); 1016723fee08SGarrett D'Amore return; 1017723fee08SGarrett D'Amore } 1018723fee08SGarrett D'Amore w->pri = srch.pri; 1019723fee08SGarrett D'Amore avl_insert(&weights[pass], w, where); 1020723fee08SGarrett D'Amore } 1021723fee08SGarrett D'Amore 1022723fee08SGarrett D'Amore void 1023723fee08SGarrett D'Amore add_weights(int32_t *refs) 1024723fee08SGarrett D'Amore { 1025723fee08SGarrett D'Amore int i; 1026723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1027723fee08SGarrett D'Amore add_weight(refs[i], i); 1028723fee08SGarrett D'Amore } 1029723fee08SGarrett D'Amore } 1030723fee08SGarrett D'Amore 1031723fee08SGarrett D'Amore int32_t 1032723fee08SGarrett D'Amore get_weight(int32_t ref, int pass) 1033723fee08SGarrett D'Amore { 1034723fee08SGarrett D'Amore weight_t srch; 1035723fee08SGarrett D'Amore weight_t *w; 1036723fee08SGarrett D'Amore int32_t pri; 1037723fee08SGarrett D'Amore 1038723fee08SGarrett D'Amore pri = resolve_pri(ref); 1039723fee08SGarrett D'Amore if (pri & COLLATE_SUBST_PRIORITY) { 1040723fee08SGarrett D'Amore return (pri); 1041723fee08SGarrett D'Amore } 1042723fee08SGarrett D'Amore if (pri <= 0) { 1043723fee08SGarrett D'Amore return (pri); 1044723fee08SGarrett D'Amore } 1045723fee08SGarrett D'Amore srch.pri = pri; 1046723fee08SGarrett D'Amore if ((w = avl_find(&weights[pass], &srch, NULL)) == NULL) { 1047723fee08SGarrett D'Amore INTERR; 1048723fee08SGarrett D'Amore return (-1); 1049723fee08SGarrett D'Amore } 1050723fee08SGarrett D'Amore return (w->opt); 1051723fee08SGarrett D'Amore } 1052723fee08SGarrett D'Amore 1053723fee08SGarrett D'Amore void 10546b5e5868SGarrett D'Amore dump_collate(void) 10556b5e5868SGarrett D'Amore { 10566b5e5868SGarrett D'Amore FILE *f; 10576b5e5868SGarrett D'Amore int i, j, n; 10586b5e5868SGarrett D'Amore size_t sz; 10596b5e5868SGarrett D'Amore int32_t pri; 10606b5e5868SGarrett D'Amore collelem_t *ce; 10616b5e5868SGarrett D'Amore collchar_t *cc; 10626b5e5868SGarrett D'Amore subst_t *sb; 10636b5e5868SGarrett D'Amore char vers[COLLATE_STR_LEN]; 1064723fee08SGarrett D'Amore collate_char_t chars[UCHAR_MAX + 1]; 1065723fee08SGarrett D'Amore collate_large_t *large; 10666b5e5868SGarrett D'Amore collate_subst_t *subst[COLL_WEIGHTS_MAX]; 1067723fee08SGarrett D'Amore collate_chain_t *chain; 10686b5e5868SGarrett D'Amore 1069723fee08SGarrett D'Amore /* 1070723fee08SGarrett D'Amore * We have to run throught a preliminary pass to identify all the 1071723fee08SGarrett D'Amore * weights that we use for each sorting level. 1072723fee08SGarrett D'Amore */ 1073723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1074723fee08SGarrett D'Amore add_weight(pri_ignore, i); 1075723fee08SGarrett D'Amore } 1076723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1077723fee08SGarrett D'Amore for (sb = avl_first(&substs[i]); sb; 1078723fee08SGarrett D'Amore sb = AVL_NEXT(&substs[i], sb)) { 1079723fee08SGarrett D'Amore for (j = 0; sb->ref[j]; j++) { 1080723fee08SGarrett D'Amore add_weight(sb->ref[j], i); 1081723fee08SGarrett D'Amore } 1082723fee08SGarrett D'Amore } 1083723fee08SGarrett D'Amore } 1084723fee08SGarrett D'Amore for (ce = avl_first(&elem_by_expand); 1085723fee08SGarrett D'Amore ce != NULL; 1086723fee08SGarrett D'Amore ce = AVL_NEXT(&elem_by_expand, ce)) { 1087723fee08SGarrett D'Amore add_weights(ce->ref); 1088723fee08SGarrett D'Amore } 1089723fee08SGarrett D'Amore for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) { 1090723fee08SGarrett D'Amore add_weights(cc->ref); 1091723fee08SGarrett D'Amore } 1092723fee08SGarrett D'Amore 1093723fee08SGarrett D'Amore /* 1094723fee08SGarrett D'Amore * Now we walk the entire set of weights, removing the gaps 1095723fee08SGarrett D'Amore * in the weights. This gives us optimum usage. The walk 1096723fee08SGarrett D'Amore * occurs in priority. 1097723fee08SGarrett D'Amore */ 1098723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1099723fee08SGarrett D'Amore weight_t *w; 1100723fee08SGarrett D'Amore for (w = avl_first(&weights[i]); w; 1101723fee08SGarrett D'Amore w = AVL_NEXT(&weights[i], w)) { 1102723fee08SGarrett D'Amore w->opt = nweight[i]; 1103723fee08SGarrett D'Amore nweight[i] += 1; 1104723fee08SGarrett D'Amore } 1105723fee08SGarrett D'Amore } 1106723fee08SGarrett D'Amore 1107723fee08SGarrett D'Amore (void) memset(&chars, 0, sizeof (chars)); 11086b5e5868SGarrett D'Amore (void) memset(vers, 0, COLLATE_STR_LEN); 1109723fee08SGarrett D'Amore (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers)); 11106b5e5868SGarrett D'Amore 11116b5e5868SGarrett D'Amore /* 11126b5e5868SGarrett D'Amore * We need to make sure we arrange for the UNDEFINED field 1113723fee08SGarrett D'Amore * to show up. Also, set the total weight counts. 11146b5e5868SGarrett D'Amore */ 11156b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 11166b5e5868SGarrett D'Amore if (resolve_pri(pri_undefined[i]) == -1) { 11176b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED); 11186b5e5868SGarrett D'Amore /* they collate at the end of everything else */ 11196b5e5868SGarrett D'Amore collinfo.undef_pri[i] = COLLATE_MAX_PRIORITY; 11206b5e5868SGarrett D'Amore } 1121723fee08SGarrett D'Amore collinfo.pri_count[i] = nweight[i]; 11226b5e5868SGarrett D'Amore } 11236b5e5868SGarrett D'Amore 1124723fee08SGarrett D'Amore collinfo.pri_count[NUM_WT] = max_wide(); 11256b5e5868SGarrett D'Amore collinfo.undef_pri[NUM_WT] = COLLATE_MAX_PRIORITY; 11266b5e5868SGarrett D'Amore collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED; 11276b5e5868SGarrett D'Amore 11286b5e5868SGarrett D'Amore /* 11296b5e5868SGarrett D'Amore * Ordinary character priorities 11306b5e5868SGarrett D'Amore */ 11316b5e5868SGarrett D'Amore for (i = 0; i <= UCHAR_MAX; i++) { 11326b5e5868SGarrett D'Amore if ((cc = get_collchar(i, 0)) != NULL) { 11336b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 1134723fee08SGarrett D'Amore chars[i].pri[j] = get_weight(cc->ref[j], j); 11356b5e5868SGarrett D'Amore } 11366b5e5868SGarrett D'Amore } else { 11376b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 1138723fee08SGarrett D'Amore chars[i].pri[j] = 1139723fee08SGarrett D'Amore get_weight(pri_undefined[j], j); 11406b5e5868SGarrett D'Amore } 11416b5e5868SGarrett D'Amore /* 11426b5e5868SGarrett D'Amore * Per POSIX, for undefined characters, we 11436b5e5868SGarrett D'Amore * also have to add a last item, which is the 11446b5e5868SGarrett D'Amore * character code. 11456b5e5868SGarrett D'Amore */ 1146723fee08SGarrett D'Amore chars[i].pri[NUM_WT] = i; 11476b5e5868SGarrett D'Amore } 11486b5e5868SGarrett D'Amore } 11496b5e5868SGarrett D'Amore 11506b5e5868SGarrett D'Amore /* 11516b5e5868SGarrett D'Amore * Substitution tables 11526b5e5868SGarrett D'Amore */ 1153723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 11546b5e5868SGarrett D'Amore collate_subst_t *st = NULL; 11556b5e5868SGarrett D'Amore n = collinfo.subst_count[i] = avl_numnodes(&substs[i]); 11566b5e5868SGarrett D'Amore if ((st = calloc(sizeof (collate_subst_t) * n, 1)) == NULL) { 11576b5e5868SGarrett D'Amore errf(_("out of memory")); 11586b5e5868SGarrett D'Amore return; 11596b5e5868SGarrett D'Amore } 11606b5e5868SGarrett D'Amore n = 0; 11616b5e5868SGarrett D'Amore for (sb = avl_first(&substs[i]); sb; 11626b5e5868SGarrett D'Amore sb = AVL_NEXT(&substs[i], sb)) { 11636b5e5868SGarrett D'Amore if ((st[n].key = resolve_pri(sb->key)) < 0) { 11646b5e5868SGarrett D'Amore /* by definition these resolve! */ 11656b5e5868SGarrett D'Amore INTERR; 11666b5e5868SGarrett D'Amore } 1167723fee08SGarrett D'Amore if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) { 1168723fee08SGarrett D'Amore INTERR; 1169723fee08SGarrett D'Amore } 11706b5e5868SGarrett D'Amore for (j = 0; sb->ref[j]; j++) { 1171723fee08SGarrett D'Amore st[n].pri[j] = get_weight(sb->ref[j], i); 11726b5e5868SGarrett D'Amore } 11736b5e5868SGarrett D'Amore n++; 11746b5e5868SGarrett D'Amore } 11756b5e5868SGarrett D'Amore if (n != collinfo.subst_count[i]) 11766b5e5868SGarrett D'Amore INTERR; 11776b5e5868SGarrett D'Amore subst[i] = st; 11786b5e5868SGarrett D'Amore } 11796b5e5868SGarrett D'Amore 1180723fee08SGarrett D'Amore 11816b5e5868SGarrett D'Amore /* 11826b5e5868SGarrett D'Amore * Chains, i.e. collating elements 11836b5e5868SGarrett D'Amore */ 11846b5e5868SGarrett D'Amore collinfo.chain_count = avl_numnodes(&elem_by_expand); 1185723fee08SGarrett D'Amore chain = calloc(sizeof (collate_chain_t), collinfo.chain_count); 11866b5e5868SGarrett D'Amore if (chain == NULL) { 11876b5e5868SGarrett D'Amore errf(_("out of memory")); 11886b5e5868SGarrett D'Amore return; 11896b5e5868SGarrett D'Amore } 11906b5e5868SGarrett D'Amore for (n = 0, ce = avl_first(&elem_by_expand); 11916b5e5868SGarrett D'Amore ce != NULL; 11926b5e5868SGarrett D'Amore ce = AVL_NEXT(&elem_by_expand, ce), n++) { 11936b5e5868SGarrett D'Amore (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN); 11946b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1195723fee08SGarrett D'Amore chain[n].pri[i] = get_weight(ce->ref[i], i); 11966b5e5868SGarrett D'Amore } 11976b5e5868SGarrett D'Amore } 11986b5e5868SGarrett D'Amore if (n != collinfo.chain_count) 11996b5e5868SGarrett D'Amore INTERR; 12006b5e5868SGarrett D'Amore 12016b5e5868SGarrett D'Amore /* 12026b5e5868SGarrett D'Amore * Large (> UCHAR_MAX) character priorities 12036b5e5868SGarrett D'Amore */ 1204723fee08SGarrett D'Amore large = calloc(sizeof (collate_large_t) * avl_numnodes(&collchars), 1); 12056b5e5868SGarrett D'Amore if (large == NULL) { 12066b5e5868SGarrett D'Amore errf(_("out of memory")); 12076b5e5868SGarrett D'Amore return; 12086b5e5868SGarrett D'Amore } 12096b5e5868SGarrett D'Amore 12106b5e5868SGarrett D'Amore i = 0; 12116b5e5868SGarrett D'Amore for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) { 12126b5e5868SGarrett D'Amore int undef = 0; 12136b5e5868SGarrett D'Amore /* we already gathered those */ 12146b5e5868SGarrett D'Amore if (cc->wc <= UCHAR_MAX) 12156b5e5868SGarrett D'Amore continue; 12166b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 1217723fee08SGarrett D'Amore if ((pri = get_weight(cc->ref[j], j)) < 0) { 12186b5e5868SGarrett D'Amore undef = 1; 12196b5e5868SGarrett D'Amore } 12206b5e5868SGarrett D'Amore if (undef && (pri >= 0)) { 12216b5e5868SGarrett D'Amore /* if undefined, then all priorities are */ 12226b5e5868SGarrett D'Amore INTERR; 12236b5e5868SGarrett D'Amore } else { 12246b5e5868SGarrett D'Amore large[i].pri.pri[j] = pri; 12256b5e5868SGarrett D'Amore } 12266b5e5868SGarrett D'Amore } 12276b5e5868SGarrett D'Amore if (!undef) { 12286b5e5868SGarrett D'Amore large[i].val = cc->wc; 1229723fee08SGarrett D'Amore collinfo.large_count = i++; 12306b5e5868SGarrett D'Amore } 12316b5e5868SGarrett D'Amore } 12326b5e5868SGarrett D'Amore 12336b5e5868SGarrett D'Amore if ((f = open_category()) == NULL) { 12346b5e5868SGarrett D'Amore return; 12356b5e5868SGarrett D'Amore } 12366b5e5868SGarrett D'Amore 12376b5e5868SGarrett D'Amore /* Time to write the entire data set out */ 12386b5e5868SGarrett D'Amore 12396b5e5868SGarrett D'Amore if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || 12406b5e5868SGarrett D'Amore (wr_category(&collinfo, sizeof (collinfo), f) < 0) || 1241723fee08SGarrett D'Amore (wr_category(&chars, sizeof (chars), f) < 0)) { 12426b5e5868SGarrett D'Amore return; 12436b5e5868SGarrett D'Amore } 12446b5e5868SGarrett D'Amore 12456b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 12466b5e5868SGarrett D'Amore sz = sizeof (collate_subst_t) * collinfo.subst_count[i]; 12476b5e5868SGarrett D'Amore if (wr_category(subst[i], sz, f) < 0) { 12486b5e5868SGarrett D'Amore return; 12496b5e5868SGarrett D'Amore } 12506b5e5868SGarrett D'Amore } 1251723fee08SGarrett D'Amore sz = sizeof (collate_chain_t) * collinfo.chain_count; 12526b5e5868SGarrett D'Amore if (wr_category(chain, sz, f) < 0) { 12536b5e5868SGarrett D'Amore return; 12546b5e5868SGarrett D'Amore } 1255723fee08SGarrett D'Amore sz = sizeof (collate_large_t) * collinfo.large_count; 12566b5e5868SGarrett D'Amore if (wr_category(large, sz, f) < 0) { 12576b5e5868SGarrett D'Amore return; 12586b5e5868SGarrett D'Amore } 12596b5e5868SGarrett D'Amore 12606b5e5868SGarrett D'Amore close_category(f); 12616b5e5868SGarrett D'Amore } 1262