16b5e5868SGarrett D'Amore /*
26b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the
36b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0.
45aec55ebSGarrett D'Amore * You may only use this file in accordance with the terms of version
55aec55ebSGarrett D'Amore * 1.0 of the CDDL.
66b5e5868SGarrett D'Amore *
76b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this
86b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at
96b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL.
106b5e5868SGarrett D'Amore */
116b5e5868SGarrett D'Amore
126b5e5868SGarrett D'Amore /*
136b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
146b5e5868SGarrett D'Amore */
156b5e5868SGarrett D'Amore
166b5e5868SGarrett D'Amore /*
176b5e5868SGarrett D'Amore * LC_COLLATE database generation routines for localedef.
186b5e5868SGarrett D'Amore */
196b5e5868SGarrett D'Amore
206b5e5868SGarrett D'Amore #include <stdio.h>
216b5e5868SGarrett D'Amore #include <stdlib.h>
226b5e5868SGarrett D'Amore #include <errno.h>
236b5e5868SGarrett D'Amore #include <string.h>
246b5e5868SGarrett D'Amore #include <sys/types.h>
256b5e5868SGarrett D'Amore #include <sys/avl.h>
266b5e5868SGarrett D'Amore #include <string.h>
276b5e5868SGarrett D'Amore #include <unistd.h>
286b5e5868SGarrett D'Amore #include <wchar.h>
296b5e5868SGarrett D'Amore #include <widec.h>
306b5e5868SGarrett D'Amore #include <limits.h>
316b5e5868SGarrett D'Amore #include "localedef.h"
326b5e5868SGarrett D'Amore #include "parser.tab.h"
336b5e5868SGarrett D'Amore #include "collate.h"
346b5e5868SGarrett D'Amore
356b5e5868SGarrett D'Amore /*
36723fee08SGarrett D'Amore * Design notes.
37723fee08SGarrett D'Amore *
38723fee08SGarrett D'Amore * It will be extremely helpful to the reader if they have access to
39723fee08SGarrett D'Amore * the localedef and locale file format specifications available.
40723fee08SGarrett D'Amore * Latest versions of these are available from www.opengroup.org.
41723fee08SGarrett D'Amore *
42723fee08SGarrett D'Amore * The design for the collation code is a bit complex. The goal is a
43723fee08SGarrett D'Amore * single collation database as described in collate.h (in
44723fee08SGarrett D'Amore * libc/port/locale). However, there are some other tidbits:
45723fee08SGarrett D'Amore *
46723fee08SGarrett D'Amore * a) The substitution entries are now a directly indexable array. A
47723fee08SGarrett D'Amore * priority elsewhere in the table is taken as an index into the
48723fee08SGarrett D'Amore * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY)
49723fee08SGarrett D'Amore * set. (The bit is cleared and the result is the index into the
50723fee08SGarrett D'Amore * table.
51723fee08SGarrett D'Amore *
52723fee08SGarrett D'Amore * b) We eliminate duplicate entries into the substitution table.
53723fee08SGarrett D'Amore * This saves a lot of space.
54723fee08SGarrett D'Amore *
55723fee08SGarrett D'Amore * c) The priorities for each level are "compressed", so that each
56723fee08SGarrett D'Amore * sorting level has consecutively numbered priorities starting at 1.
57723fee08SGarrett D'Amore * (O is reserved for the ignore priority.) This means sort levels
58723fee08SGarrett D'Amore * which only have a few distinct priorities can represent the
59723fee08SGarrett D'Amore * priority level in fewer bits, which makes the strxfrm output
60723fee08SGarrett D'Amore * smaller.
61723fee08SGarrett D'Amore *
62723fee08SGarrett D'Amore * d) We record the total number of priorities so that strxfrm can
63723fee08SGarrett D'Amore * figure out how many bytes to expand a numeric priority into.
64723fee08SGarrett D'Amore *
65723fee08SGarrett D'Amore * e) For the UNDEFINED pass (the last pass), we record the maximum
66723fee08SGarrett D'Amore * number of bits needed to uniquely prioritize these entries, so that
67723fee08SGarrett D'Amore * the last pass can also use smaller strxfrm output when possible.
68723fee08SGarrett D'Amore *
69723fee08SGarrett D'Amore * f) Priorities with the sign bit set are verboten. This works out
70723fee08SGarrett D'Amore * because no active character set needs that bit to carry significant
71723fee08SGarrett D'Amore * information once the character is in wide form.
72723fee08SGarrett D'Amore *
73723fee08SGarrett D'Amore * To process the entire data to make the database, we actually run
74723fee08SGarrett D'Amore * multiple passes over the data.
75723fee08SGarrett D'Amore *
76723fee08SGarrett D'Amore * The first pass, which is done at parse time, identifies elements,
77723fee08SGarrett D'Amore * substitutions, and such, and records them in priority order. As
78723fee08SGarrett D'Amore * some priorities can refer to other priorities, using forward
79723fee08SGarrett D'Amore * references, we use a table of references indicating whether the
80723fee08SGarrett D'Amore * priority's value has been resolved, or whether it is still a
81723fee08SGarrett D'Amore * reference.
82723fee08SGarrett D'Amore *
83723fee08SGarrett D'Amore * The second pass walks over all the items in priority order, noting
84723fee08SGarrett D'Amore * that they are used directly, and not just an indirect reference.
85723fee08SGarrett D'Amore * This is done by creating a "weight" structure for the item. The
86723fee08SGarrett D'Amore * weights are stashed in an AVL tree sorted by relative "priority".
87723fee08SGarrett D'Amore *
88723fee08SGarrett D'Amore * The third pass walks over all the weight structures, in priority
89723fee08SGarrett D'Amore * order, and assigns a new monotonically increasing (per sort level)
90723fee08SGarrett D'Amore * weight value to them. These are the values that will actually be
91723fee08SGarrett D'Amore * written to the file.
92723fee08SGarrett D'Amore *
93723fee08SGarrett D'Amore * The fourth pass just writes the data out.
94723fee08SGarrett D'Amore */
95723fee08SGarrett D'Amore
96723fee08SGarrett D'Amore /*
976b5e5868SGarrett D'Amore * In order to resolve the priorities, we create a table of priorities.
986b5e5868SGarrett D'Amore * Entries in the table can be in one of three states.
996b5e5868SGarrett D'Amore *
1006b5e5868SGarrett D'Amore * UNKNOWN is for newly allocated entries, and indicates that nothing
1016b5e5868SGarrett D'Amore * is known about the priority. (For example, when new entries are created
1026b5e5868SGarrett D'Amore * for collating-symbols, this is the value assigned for them until the
1036b5e5868SGarrett D'Amore * collating symbol's order has been determined.
1046b5e5868SGarrett D'Amore *
1056b5e5868SGarrett D'Amore * RESOLVED is used for an entry where the priority indicates the final
1066b5e5868SGarrett D'Amore * numeric weight.
1076b5e5868SGarrett D'Amore *
1086b5e5868SGarrett D'Amore * REFER is used for entries that reference other entries. Typically
1096b5e5868SGarrett D'Amore * this is used for forward references. A collating-symbol can never
1106b5e5868SGarrett D'Amore * have this value.
1116b5e5868SGarrett D'Amore *
1126b5e5868SGarrett D'Amore * The "pass" field is used during final resolution to aid in detection
1136b5e5868SGarrett D'Amore * of referencing loops. (For example <A> depends on <B>, but <B> has its
1146b5e5868SGarrett D'Amore * priority dependent on <A>.)
1156b5e5868SGarrett D'Amore */
1166b5e5868SGarrett D'Amore typedef enum {
1176b5e5868SGarrett D'Amore UNKNOWN, /* priority is totally unknown */
1186b5e5868SGarrett D'Amore RESOLVED, /* priority value fully resolved */
1196b5e5868SGarrett D'Amore REFER /* priority is a reference (index) */
1206b5e5868SGarrett D'Amore } res_t;
1216b5e5868SGarrett D'Amore
122723fee08SGarrett D'Amore typedef struct weight {
123723fee08SGarrett D'Amore int32_t pri;
124723fee08SGarrett D'Amore int opt;
125723fee08SGarrett D'Amore avl_node_t avl;
126723fee08SGarrett D'Amore } weight_t;
127723fee08SGarrett D'Amore
1286b5e5868SGarrett D'Amore typedef struct priority {
1296b5e5868SGarrett D'Amore res_t res;
1306b5e5868SGarrett D'Amore int32_t pri;
1316b5e5868SGarrett D'Amore int pass;
1326b5e5868SGarrett D'Amore int lineno;
1336b5e5868SGarrett D'Amore } collpri_t;
1346b5e5868SGarrett D'Amore
1356b5e5868SGarrett D'Amore #define NUM_WT collinfo.directive_count
1366b5e5868SGarrett D'Amore
1376b5e5868SGarrett D'Amore /*
1386b5e5868SGarrett D'Amore * These are the abstract collating symbols, which are just a symbolic
1396b5e5868SGarrett D'Amore * way to reference a priority.
1406b5e5868SGarrett D'Amore */
1416b5e5868SGarrett D'Amore struct collsym {
1426b5e5868SGarrett D'Amore char *name;
1436b5e5868SGarrett D'Amore int32_t ref;
1446b5e5868SGarrett D'Amore avl_node_t avl;
1456b5e5868SGarrett D'Amore };
1466b5e5868SGarrett D'Amore
1476b5e5868SGarrett D'Amore /*
1486b5e5868SGarrett D'Amore * These are also abstract collating symbols, but we allow them to have
1496b5e5868SGarrett D'Amore * different priorities at different levels.
1506b5e5868SGarrett D'Amore */
1516b5e5868SGarrett D'Amore typedef struct collundef {
1526b5e5868SGarrett D'Amore char *name;
1536b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX];
1546b5e5868SGarrett D'Amore avl_node_t avl;
1556b5e5868SGarrett D'Amore } collundef_t;
1566b5e5868SGarrett D'Amore
1576b5e5868SGarrett D'Amore /*
1586b5e5868SGarrett D'Amore * These are called "chains" in libc. This records the fact that two
1596b5e5868SGarrett D'Amore * more characters should be treated as a single collating entity when
1606b5e5868SGarrett D'Amore * they appear together. For example, in Spanish <C><h> gets collated
1616b5e5868SGarrett D'Amore * as a character between <C> and <D>.
1626b5e5868SGarrett D'Amore */
1636b5e5868SGarrett D'Amore struct collelem {
1646b5e5868SGarrett D'Amore char *symbol;
1656b5e5868SGarrett D'Amore wchar_t *expand;
1666b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX];
1676b5e5868SGarrett D'Amore avl_node_t avl_bysymbol;
1686b5e5868SGarrett D'Amore avl_node_t avl_byexpand;
1696b5e5868SGarrett D'Amore };
1706b5e5868SGarrett D'Amore
1716b5e5868SGarrett D'Amore /*
1726b5e5868SGarrett D'Amore * Individual characters have a sequence of weights as well.
1736b5e5868SGarrett D'Amore */
1746b5e5868SGarrett D'Amore typedef struct collchar {
1756b5e5868SGarrett D'Amore wchar_t wc;
176723fee08SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX];
1776b5e5868SGarrett D'Amore avl_node_t avl;
1786b5e5868SGarrett D'Amore } collchar_t;
1796b5e5868SGarrett D'Amore
1806b5e5868SGarrett D'Amore /*
1816b5e5868SGarrett D'Amore * Substitution entries. The key is itself a priority. Note that
1826b5e5868SGarrett D'Amore * when we create one of these, we *automatically* wind up with a
1836b5e5868SGarrett D'Amore * fully resolved priority for the key, because creation of
1846b5e5868SGarrett D'Amore * substitutions creates a resolved priority at the same time.
1856b5e5868SGarrett D'Amore */
1866b5e5868SGarrett D'Amore typedef struct {
1876b5e5868SGarrett D'Amore int32_t key;
1886b5e5868SGarrett D'Amore int32_t ref[COLLATE_STR_LEN];
1896b5e5868SGarrett D'Amore avl_node_t avl;
190723fee08SGarrett D'Amore avl_node_t avl_ref;
1916b5e5868SGarrett D'Amore } subst_t;
1926b5e5868SGarrett D'Amore
1936b5e5868SGarrett D'Amore static avl_tree_t collsyms;
1946b5e5868SGarrett D'Amore static avl_tree_t collundefs;
1956b5e5868SGarrett D'Amore static avl_tree_t elem_by_symbol;
1966b5e5868SGarrett D'Amore static avl_tree_t elem_by_expand;
1976b5e5868SGarrett D'Amore static avl_tree_t collchars;
1986b5e5868SGarrett D'Amore static avl_tree_t substs[COLL_WEIGHTS_MAX];
199723fee08SGarrett D'Amore static avl_tree_t substs_ref[COLL_WEIGHTS_MAX];
200723fee08SGarrett D'Amore static avl_tree_t weights[COLL_WEIGHTS_MAX];
201723fee08SGarrett D'Amore static int32_t nweight[COLL_WEIGHTS_MAX];
2026b5e5868SGarrett D'Amore
2036b5e5868SGarrett D'Amore /*
2046b5e5868SGarrett D'Amore * This is state tracking for the ellipsis token. Note that we start
2056b5e5868SGarrett D'Amore * the initial values so that the ellipsis logic will think we got a
2066b5e5868SGarrett D'Amore * magic starting value of NUL. It starts at minus one because the
2076b5e5868SGarrett D'Amore * starting point is exclusive -- i.e. the starting point is not
2086b5e5868SGarrett D'Amore * itself handled by the ellipsis code.
2096b5e5868SGarrett D'Amore */
2106b5e5868SGarrett D'Amore static int currorder = EOF;
2116b5e5868SGarrett D'Amore static int lastorder = EOF;
2126b5e5868SGarrett D'Amore static collelem_t *currelem;
2136b5e5868SGarrett D'Amore static collchar_t *currchar;
2146b5e5868SGarrett D'Amore static collundef_t *currundef;
2156b5e5868SGarrett D'Amore static wchar_t ellipsis_start = 0;
2166b5e5868SGarrett D'Amore static int32_t ellipsis_weights[COLL_WEIGHTS_MAX];
2176b5e5868SGarrett D'Amore
2186b5e5868SGarrett D'Amore /*
2196b5e5868SGarrett D'Amore * We keep a running tally of weights.
2206b5e5868SGarrett D'Amore */
2216b5e5868SGarrett D'Amore static int nextpri = 1;
222723fee08SGarrett D'Amore static int nextsubst[COLL_WEIGHTS_MAX] = { 0 };
2236b5e5868SGarrett D'Amore
2246b5e5868SGarrett D'Amore /*
2256b5e5868SGarrett D'Amore * This array collects up the weights for each level.
2266b5e5868SGarrett D'Amore */
2276b5e5868SGarrett D'Amore static int32_t order_weights[COLL_WEIGHTS_MAX];
2286b5e5868SGarrett D'Amore static int curr_weight = 0;
2296b5e5868SGarrett D'Amore static int32_t subst_weights[COLLATE_STR_LEN];
2306b5e5868SGarrett D'Amore static int curr_subst = 0;
2316b5e5868SGarrett D'Amore
2326b5e5868SGarrett D'Amore /*
2336b5e5868SGarrett D'Amore * Some initial priority values.
2346b5e5868SGarrett D'Amore */
2356b5e5868SGarrett D'Amore static int32_t pri_undefined[COLL_WEIGHTS_MAX];
2366b5e5868SGarrett D'Amore static int32_t pri_ignore;
2376b5e5868SGarrett D'Amore
2386b5e5868SGarrett D'Amore static collate_info_t collinfo;
2396b5e5868SGarrett D'Amore
2406b5e5868SGarrett D'Amore static collpri_t *prilist = NULL;
2416b5e5868SGarrett D'Amore static int numpri = 0;
2426b5e5868SGarrett D'Amore static int maxpri = 0;
2436b5e5868SGarrett D'Amore
2446b5e5868SGarrett D'Amore static void start_order(int);
2456b5e5868SGarrett D'Amore
2466b5e5868SGarrett D'Amore static int32_t
new_pri(void)2476b5e5868SGarrett D'Amore new_pri(void)
2486b5e5868SGarrett D'Amore {
2496b5e5868SGarrett D'Amore int i;
2506b5e5868SGarrett D'Amore
2516b5e5868SGarrett D'Amore if (numpri >= maxpri) {
2526b5e5868SGarrett D'Amore maxpri = maxpri ? maxpri * 2 : 1024;
2536b5e5868SGarrett D'Amore prilist = realloc(prilist, sizeof (collpri_t) * maxpri);
2546b5e5868SGarrett D'Amore if (prilist == NULL) {
2556b5e5868SGarrett D'Amore errf(_("out of memory"));
2566b5e5868SGarrett D'Amore return (-1);
2576b5e5868SGarrett D'Amore }
2586b5e5868SGarrett D'Amore for (i = numpri; i < maxpri; i++) {
2596b5e5868SGarrett D'Amore prilist[i].res = UNKNOWN;
2606b5e5868SGarrett D'Amore prilist[i].pri = 0;
2616b5e5868SGarrett D'Amore prilist[i].pass = 0;
2626b5e5868SGarrett D'Amore }
2636b5e5868SGarrett D'Amore }
2646b5e5868SGarrett D'Amore return (numpri++);
2656b5e5868SGarrett D'Amore }
2666b5e5868SGarrett D'Amore
2676b5e5868SGarrett D'Amore static collpri_t *
get_pri(int32_t ref)2686b5e5868SGarrett D'Amore get_pri(int32_t ref)
2696b5e5868SGarrett D'Amore {
2706b5e5868SGarrett D'Amore if ((ref < 0) || (ref > numpri)) {
2716b5e5868SGarrett D'Amore INTERR;
2726b5e5868SGarrett D'Amore return (NULL);
2736b5e5868SGarrett D'Amore }
2746b5e5868SGarrett D'Amore return (&prilist[ref]);
2756b5e5868SGarrett D'Amore }
2766b5e5868SGarrett D'Amore
2776b5e5868SGarrett D'Amore static void
set_pri(int32_t ref,int32_t v,res_t res)2786b5e5868SGarrett D'Amore set_pri(int32_t ref, int32_t v, res_t res)
2796b5e5868SGarrett D'Amore {
2806b5e5868SGarrett D'Amore collpri_t *pri;
2816b5e5868SGarrett D'Amore
2826b5e5868SGarrett D'Amore pri = get_pri(ref);
2836b5e5868SGarrett D'Amore
2846b5e5868SGarrett D'Amore if ((res == REFER) && ((v < 0) || (v >= numpri))) {
2856b5e5868SGarrett D'Amore INTERR;
2866b5e5868SGarrett D'Amore }
2876b5e5868SGarrett D'Amore
2886b5e5868SGarrett D'Amore /* Resolve self references */
2896b5e5868SGarrett D'Amore if ((res == REFER) && (ref == v)) {
2906b5e5868SGarrett D'Amore v = nextpri;
2916b5e5868SGarrett D'Amore res = RESOLVED;
2926b5e5868SGarrett D'Amore }
2936b5e5868SGarrett D'Amore
2946b5e5868SGarrett D'Amore if (pri->res != UNKNOWN) {
2956b5e5868SGarrett D'Amore warn(_("repeated item in order list (first on %d)"),
2966b5e5868SGarrett D'Amore pri->lineno);
2976b5e5868SGarrett D'Amore return;
2986b5e5868SGarrett D'Amore }
2996b5e5868SGarrett D'Amore pri->lineno = lineno;
3006b5e5868SGarrett D'Amore pri->pri = v;
3016b5e5868SGarrett D'Amore pri->res = res;
3026b5e5868SGarrett D'Amore }
3036b5e5868SGarrett D'Amore
3046b5e5868SGarrett D'Amore static int32_t
resolve_pri(int32_t ref)3056b5e5868SGarrett D'Amore resolve_pri(int32_t ref)
3066b5e5868SGarrett D'Amore {
3076b5e5868SGarrett D'Amore collpri_t *pri;
3086b5e5868SGarrett D'Amore static int32_t pass = 0;
3096b5e5868SGarrett D'Amore
3106b5e5868SGarrett D'Amore pri = get_pri(ref);
3116b5e5868SGarrett D'Amore pass++;
3126b5e5868SGarrett D'Amore while (pri->res == REFER) {
3136b5e5868SGarrett D'Amore if (pri->pass == pass) {
3146b5e5868SGarrett D'Amore /* report a line with the circular symbol */
3156b5e5868SGarrett D'Amore lineno = pri->lineno;
3166b5e5868SGarrett D'Amore errf(_("circular reference in order list"));
3176b5e5868SGarrett D'Amore return (-1);
3186b5e5868SGarrett D'Amore }
3196b5e5868SGarrett D'Amore if ((pri->pri < 0) || (pri->pri >= numpri)) {
3206b5e5868SGarrett D'Amore INTERR;
3216b5e5868SGarrett D'Amore return (-1);
3226b5e5868SGarrett D'Amore }
3236b5e5868SGarrett D'Amore pri->pass = pass;
3246b5e5868SGarrett D'Amore pri = &prilist[pri->pri];
3256b5e5868SGarrett D'Amore }
3266b5e5868SGarrett D'Amore
3276b5e5868SGarrett D'Amore if (pri->res == UNKNOWN) {
3286b5e5868SGarrett D'Amore return (-1);
3296b5e5868SGarrett D'Amore }
3306b5e5868SGarrett D'Amore if (pri->res != RESOLVED)
3316b5e5868SGarrett D'Amore INTERR;
3326b5e5868SGarrett D'Amore
3336b5e5868SGarrett D'Amore return (pri->pri);
3346b5e5868SGarrett D'Amore }
3356b5e5868SGarrett D'Amore
3366b5e5868SGarrett D'Amore static int
weight_compare(const void * n1,const void * n2)337723fee08SGarrett D'Amore weight_compare(const void *n1, const void *n2)
338723fee08SGarrett D'Amore {
339723fee08SGarrett D'Amore int32_t k1 = ((const weight_t *)n1)->pri;
340723fee08SGarrett D'Amore int32_t k2 = ((const weight_t *)n2)->pri;
341723fee08SGarrett D'Amore
342723fee08SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
343723fee08SGarrett D'Amore }
344723fee08SGarrett D'Amore
345723fee08SGarrett D'Amore static int
collsym_compare(const void * n1,const void * n2)3466b5e5868SGarrett D'Amore collsym_compare(const void *n1, const void *n2)
3476b5e5868SGarrett D'Amore {
3486b5e5868SGarrett D'Amore const collsym_t *c1 = n1;
3496b5e5868SGarrett D'Amore const collsym_t *c2 = n2;
3506b5e5868SGarrett D'Amore int rv;
3516b5e5868SGarrett D'Amore
3526b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name);
3536b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
3546b5e5868SGarrett D'Amore }
3556b5e5868SGarrett D'Amore
3566b5e5868SGarrett D'Amore static int
collundef_compare(const void * n1,const void * n2)3576b5e5868SGarrett D'Amore collundef_compare(const void *n1, const void *n2)
3586b5e5868SGarrett D'Amore {
3596b5e5868SGarrett D'Amore const collundef_t *c1 = n1;
3606b5e5868SGarrett D'Amore const collundef_t *c2 = n2;
3616b5e5868SGarrett D'Amore int rv;
3626b5e5868SGarrett D'Amore
3636b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name);
3646b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
3656b5e5868SGarrett D'Amore }
3666b5e5868SGarrett D'Amore
3676b5e5868SGarrett D'Amore static int
element_compare_symbol(const void * n1,const void * n2)3686b5e5868SGarrett D'Amore element_compare_symbol(const void *n1, const void *n2)
3696b5e5868SGarrett D'Amore {
3706b5e5868SGarrett D'Amore const collelem_t *c1 = n1;
3716b5e5868SGarrett D'Amore const collelem_t *c2 = n2;
3726b5e5868SGarrett D'Amore int rv;
3736b5e5868SGarrett D'Amore
3746b5e5868SGarrett D'Amore rv = strcmp(c1->symbol, c2->symbol);
3756b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
3766b5e5868SGarrett D'Amore }
3776b5e5868SGarrett D'Amore
3786b5e5868SGarrett D'Amore static int
element_compare_expand(const void * n1,const void * n2)3796b5e5868SGarrett D'Amore element_compare_expand(const void *n1, const void *n2)
3806b5e5868SGarrett D'Amore {
3816b5e5868SGarrett D'Amore const collelem_t *c1 = n1;
3826b5e5868SGarrett D'Amore const collelem_t *c2 = n2;
3836b5e5868SGarrett D'Amore int rv;
3846b5e5868SGarrett D'Amore
3856b5e5868SGarrett D'Amore rv = wcscmp(c1->expand, c2->expand);
3866b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
3876b5e5868SGarrett D'Amore }
3886b5e5868SGarrett D'Amore
3896b5e5868SGarrett D'Amore static int
collchar_compare(const void * n1,const void * n2)3906b5e5868SGarrett D'Amore collchar_compare(const void *n1, const void *n2)
3916b5e5868SGarrett D'Amore {
3926b5e5868SGarrett D'Amore wchar_t k1 = ((const collchar_t *)n1)->wc;
3936b5e5868SGarrett D'Amore wchar_t k2 = ((const collchar_t *)n2)->wc;
3946b5e5868SGarrett D'Amore
3956b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
3966b5e5868SGarrett D'Amore }
3976b5e5868SGarrett D'Amore
3986b5e5868SGarrett D'Amore static int
subst_compare(const void * n1,const void * n2)3996b5e5868SGarrett D'Amore subst_compare(const void *n1, const void *n2)
4006b5e5868SGarrett D'Amore {
4016b5e5868SGarrett D'Amore int32_t k1 = ((const subst_t *)n1)->key;
4026b5e5868SGarrett D'Amore int32_t k2 = ((const subst_t *)n2)->key;
4036b5e5868SGarrett D'Amore
4046b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
4056b5e5868SGarrett D'Amore }
4066b5e5868SGarrett D'Amore
407723fee08SGarrett D'Amore static int
subst_compare_ref(const void * n1,const void * n2)408723fee08SGarrett D'Amore subst_compare_ref(const void *n1, const void *n2)
409723fee08SGarrett D'Amore {
410723fee08SGarrett D'Amore int32_t *c1 = ((subst_t *)n1)->ref;
411723fee08SGarrett D'Amore int32_t *c2 = ((subst_t *)n2)->ref;
412723fee08SGarrett D'Amore int rv;
413723fee08SGarrett D'Amore
414723fee08SGarrett D'Amore rv = wcscmp((wchar_t *)c1, (wchar_t *)c2);
415723fee08SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
416723fee08SGarrett D'Amore }
417723fee08SGarrett D'Amore
4186b5e5868SGarrett D'Amore void
init_collate(void)4196b5e5868SGarrett D'Amore init_collate(void)
4206b5e5868SGarrett D'Amore {
4216b5e5868SGarrett D'Amore int i;
4226b5e5868SGarrett D'Amore
4236b5e5868SGarrett D'Amore avl_create(&collsyms, collsym_compare, sizeof (collsym_t),
4246b5e5868SGarrett D'Amore offsetof(collsym_t, avl));
4256b5e5868SGarrett D'Amore
4266b5e5868SGarrett D'Amore avl_create(&collundefs, collundef_compare, sizeof (collsym_t),
4276b5e5868SGarrett D'Amore offsetof(collundef_t, avl));
4286b5e5868SGarrett D'Amore
4296b5e5868SGarrett D'Amore avl_create(&elem_by_symbol, element_compare_symbol, sizeof (collelem_t),
4306b5e5868SGarrett D'Amore offsetof(collelem_t, avl_bysymbol));
4316b5e5868SGarrett D'Amore avl_create(&elem_by_expand, element_compare_expand, sizeof (collelem_t),
4326b5e5868SGarrett D'Amore offsetof(collelem_t, avl_byexpand));
4336b5e5868SGarrett D'Amore
4346b5e5868SGarrett D'Amore avl_create(&collchars, collchar_compare, sizeof (collchar_t),
4356b5e5868SGarrett D'Amore offsetof(collchar_t, avl));
4366b5e5868SGarrett D'Amore
437723fee08SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
4386b5e5868SGarrett D'Amore avl_create(&substs[i], subst_compare, sizeof (subst_t),
4396b5e5868SGarrett D'Amore offsetof(subst_t, avl));
440723fee08SGarrett D'Amore avl_create(&substs_ref[i], subst_compare_ref,
441723fee08SGarrett D'Amore sizeof (subst_t), offsetof(subst_t, avl_ref));
442723fee08SGarrett D'Amore avl_create(&weights[i], weight_compare, sizeof (weight_t),
443723fee08SGarrett D'Amore offsetof(weight_t, avl));
444723fee08SGarrett D'Amore nweight[i] = 1;
445723fee08SGarrett D'Amore }
4466b5e5868SGarrett D'Amore
4476b5e5868SGarrett D'Amore (void) memset(&collinfo, 0, sizeof (collinfo));
4486b5e5868SGarrett D'Amore
4496b5e5868SGarrett D'Amore /* allocate some initial priorities */
4506b5e5868SGarrett D'Amore pri_ignore = new_pri();
4516b5e5868SGarrett D'Amore
4526b5e5868SGarrett D'Amore set_pri(pri_ignore, 0, RESOLVED);
4536b5e5868SGarrett D'Amore
4546b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
4556b5e5868SGarrett D'Amore pri_undefined[i] = new_pri();
4566b5e5868SGarrett D'Amore
4576b5e5868SGarrett D'Amore /* we will override this later */
4586b5e5868SGarrett D'Amore set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN);
4596b5e5868SGarrett D'Amore }
4606b5e5868SGarrett D'Amore }
4616b5e5868SGarrett D'Amore
4626b5e5868SGarrett D'Amore void
define_collsym(char * name)4636b5e5868SGarrett D'Amore define_collsym(char *name)
4646b5e5868SGarrett D'Amore {
4656b5e5868SGarrett D'Amore collsym_t *sym;
4666b5e5868SGarrett D'Amore avl_index_t where;
4676b5e5868SGarrett D'Amore
4686b5e5868SGarrett D'Amore if ((sym = calloc(sizeof (*sym), 1)) == NULL) {
4696b5e5868SGarrett D'Amore errf(_("out of memory"));
4706b5e5868SGarrett D'Amore return;
4716b5e5868SGarrett D'Amore }
4726b5e5868SGarrett D'Amore sym->name = name;
4736b5e5868SGarrett D'Amore sym->ref = new_pri();
4746b5e5868SGarrett D'Amore
4756b5e5868SGarrett D'Amore if (avl_find(&collsyms, sym, &where) != NULL) {
4766b5e5868SGarrett D'Amore /*
4776b5e5868SGarrett D'Amore * This should never happen because we are only called
4786b5e5868SGarrett D'Amore * for undefined symbols.
4796b5e5868SGarrett D'Amore */
4806b5e5868SGarrett D'Amore INTERR;
4816b5e5868SGarrett D'Amore return;
4826b5e5868SGarrett D'Amore }
4836b5e5868SGarrett D'Amore avl_insert(&collsyms, sym, where);
4846b5e5868SGarrett D'Amore }
4856b5e5868SGarrett D'Amore
4866b5e5868SGarrett D'Amore collsym_t *
lookup_collsym(char * name)4876b5e5868SGarrett D'Amore lookup_collsym(char *name)
4886b5e5868SGarrett D'Amore {
4896b5e5868SGarrett D'Amore collsym_t srch;
4906b5e5868SGarrett D'Amore
4916b5e5868SGarrett D'Amore srch.name = name;
4926b5e5868SGarrett D'Amore return (avl_find(&collsyms, &srch, NULL));
4936b5e5868SGarrett D'Amore }
4946b5e5868SGarrett D'Amore
4956b5e5868SGarrett D'Amore collelem_t *
lookup_collelem(char * symbol)4966b5e5868SGarrett D'Amore lookup_collelem(char *symbol)
4976b5e5868SGarrett D'Amore {
4986b5e5868SGarrett D'Amore collelem_t srch;
4996b5e5868SGarrett D'Amore
5006b5e5868SGarrett D'Amore srch.symbol = symbol;
5016b5e5868SGarrett D'Amore return (avl_find(&elem_by_symbol, &srch, NULL));
5026b5e5868SGarrett D'Amore }
5036b5e5868SGarrett D'Amore
5046b5e5868SGarrett D'Amore static collundef_t *
get_collundef(char * name)5056b5e5868SGarrett D'Amore get_collundef(char *name)
5066b5e5868SGarrett D'Amore {
5076b5e5868SGarrett D'Amore collundef_t srch;
5086b5e5868SGarrett D'Amore collundef_t *ud;
5096b5e5868SGarrett D'Amore avl_index_t where;
5106b5e5868SGarrett D'Amore int i;
5116b5e5868SGarrett D'Amore
5126b5e5868SGarrett D'Amore srch.name = name;
5136b5e5868SGarrett D'Amore if ((ud = avl_find(&collundefs, &srch, &where)) == NULL) {
5146b5e5868SGarrett D'Amore if (((ud = calloc(sizeof (*ud), 1)) == NULL) ||
5156b5e5868SGarrett D'Amore ((ud->name = strdup(name)) == NULL)) {
5166b5e5868SGarrett D'Amore errf(_("out of memory"));
5176b5e5868SGarrett D'Amore return (NULL);
5186b5e5868SGarrett D'Amore }
5196b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
5206b5e5868SGarrett D'Amore ud->ref[i] = new_pri();
5216b5e5868SGarrett D'Amore }
5226b5e5868SGarrett D'Amore avl_insert(&collundefs, ud, where);
5236b5e5868SGarrett D'Amore }
5246b5e5868SGarrett D'Amore add_charmap_undefined(name);
5256b5e5868SGarrett D'Amore return (ud);
5266b5e5868SGarrett D'Amore }
5276b5e5868SGarrett D'Amore
5286b5e5868SGarrett D'Amore static collchar_t *
get_collchar(wchar_t wc,int create)5296b5e5868SGarrett D'Amore get_collchar(wchar_t wc, int create)
5306b5e5868SGarrett D'Amore {
5316b5e5868SGarrett D'Amore collchar_t srch;
5326b5e5868SGarrett D'Amore collchar_t *cc;
5336b5e5868SGarrett D'Amore avl_index_t where;
5346b5e5868SGarrett D'Amore int i;
5356b5e5868SGarrett D'Amore
5366b5e5868SGarrett D'Amore srch.wc = wc;
5376b5e5868SGarrett D'Amore cc = avl_find(&collchars, &srch, &where);
5386b5e5868SGarrett D'Amore if ((cc == NULL) && create) {
5396b5e5868SGarrett D'Amore if ((cc = calloc(sizeof (*cc), 1)) == NULL) {
5406b5e5868SGarrett D'Amore errf(_("out of memory"));
5416b5e5868SGarrett D'Amore return (NULL);
5426b5e5868SGarrett D'Amore }
5436b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
5446b5e5868SGarrett D'Amore cc->ref[i] = new_pri();
5456b5e5868SGarrett D'Amore }
5466b5e5868SGarrett D'Amore cc->wc = wc;
5476b5e5868SGarrett D'Amore avl_insert(&collchars, cc, where);
5486b5e5868SGarrett D'Amore }
5496b5e5868SGarrett D'Amore return (cc);
5506b5e5868SGarrett D'Amore }
5516b5e5868SGarrett D'Amore
5526b5e5868SGarrett D'Amore void
end_order_collsym(collsym_t * sym)5536b5e5868SGarrett D'Amore end_order_collsym(collsym_t *sym)
5546b5e5868SGarrett D'Amore {
5556b5e5868SGarrett D'Amore start_order(T_COLLSYM);
5566b5e5868SGarrett D'Amore /* update the weight */
5576b5e5868SGarrett D'Amore
5586b5e5868SGarrett D'Amore set_pri(sym->ref, nextpri, RESOLVED);
5596b5e5868SGarrett D'Amore nextpri++;
5606b5e5868SGarrett D'Amore }
5616b5e5868SGarrett D'Amore
5626b5e5868SGarrett D'Amore void
end_order(void)5636b5e5868SGarrett D'Amore end_order(void)
5646b5e5868SGarrett D'Amore {
5656b5e5868SGarrett D'Amore int i;
5666b5e5868SGarrett D'Amore int32_t pri;
5676b5e5868SGarrett D'Amore int32_t ref;
5686b5e5868SGarrett D'Amore collpri_t *p;
5696b5e5868SGarrett D'Amore
5706b5e5868SGarrett D'Amore /* advance the priority/weight */
5716b5e5868SGarrett D'Amore pri = nextpri;
5726b5e5868SGarrett D'Amore
5736b5e5868SGarrett D'Amore switch (currorder) {
5746b5e5868SGarrett D'Amore case T_CHAR:
5756b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
5766b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) ||
5776b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) ||
5786b5e5868SGarrett D'Amore (p->pri == -1)) {
5796b5e5868SGarrett D'Amore /* unspecified weight is a self reference */
5806b5e5868SGarrett D'Amore set_pri(currchar->ref[i], pri, RESOLVED);
5816b5e5868SGarrett D'Amore } else {
5826b5e5868SGarrett D'Amore set_pri(currchar->ref[i], ref, REFER);
5836b5e5868SGarrett D'Amore }
5846b5e5868SGarrett D'Amore order_weights[i] = -1;
5856b5e5868SGarrett D'Amore }
5866b5e5868SGarrett D'Amore
5876b5e5868SGarrett D'Amore /* leave a cookie trail in case next symbol is ellipsis */
5886b5e5868SGarrett D'Amore ellipsis_start = currchar->wc + 1;
5896b5e5868SGarrett D'Amore currchar = NULL;
5906b5e5868SGarrett D'Amore break;
5916b5e5868SGarrett D'Amore
5926b5e5868SGarrett D'Amore case T_ELLIPSIS:
5936b5e5868SGarrett D'Amore /* save off the weights were we can find them */
5946b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
5956b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i];
5966b5e5868SGarrett D'Amore order_weights[i] = -1;
5976b5e5868SGarrett D'Amore }
5986b5e5868SGarrett D'Amore break;
5996b5e5868SGarrett D'Amore
6006b5e5868SGarrett D'Amore case T_COLLELEM:
6016b5e5868SGarrett D'Amore if (currelem == NULL) {
6026b5e5868SGarrett D'Amore INTERR;
6036b5e5868SGarrett D'Amore } else {
6046b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
6056b5e5868SGarrett D'Amore
6066b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) ||
6076b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) ||
6086b5e5868SGarrett D'Amore (p->pri == -1)) {
6096b5e5868SGarrett D'Amore set_pri(currelem->ref[i], pri,
6106b5e5868SGarrett D'Amore RESOLVED);
6116b5e5868SGarrett D'Amore } else {
6126b5e5868SGarrett D'Amore set_pri(currelem->ref[i], ref, REFER);
6136b5e5868SGarrett D'Amore }
6146b5e5868SGarrett D'Amore order_weights[i] = -1;
6156b5e5868SGarrett D'Amore }
6166b5e5868SGarrett D'Amore }
6176b5e5868SGarrett D'Amore break;
6186b5e5868SGarrett D'Amore
6196b5e5868SGarrett D'Amore case T_UNDEFINED:
6206b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
6216b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) ||
6226b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) ||
6236b5e5868SGarrett D'Amore (p->pri == -1)) {
6246b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED);
6256b5e5868SGarrett D'Amore } else {
6266b5e5868SGarrett D'Amore set_pri(pri_undefined[i], ref, REFER);
6276b5e5868SGarrett D'Amore }
6286b5e5868SGarrett D'Amore order_weights[i] = -1;
6296b5e5868SGarrett D'Amore }
6306b5e5868SGarrett D'Amore break;
6316b5e5868SGarrett D'Amore
6326b5e5868SGarrett D'Amore case T_SYMBOL:
633*68caef18SRichard Lowe for (i = 0; i < NUM_WT; i++) {
6346b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) ||
6356b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) ||
6366b5e5868SGarrett D'Amore (p->pri == -1)) {
6376b5e5868SGarrett D'Amore set_pri(currundef->ref[i], pri, RESOLVED);
6386b5e5868SGarrett D'Amore } else {
6396b5e5868SGarrett D'Amore set_pri(currundef->ref[i], ref, REFER);
6406b5e5868SGarrett D'Amore }
6416b5e5868SGarrett D'Amore order_weights[i] = -1;
642*68caef18SRichard Lowe }
6436b5e5868SGarrett D'Amore break;
6446b5e5868SGarrett D'Amore
6456b5e5868SGarrett D'Amore default:
6466b5e5868SGarrett D'Amore INTERR;
6476b5e5868SGarrett D'Amore }
6486b5e5868SGarrett D'Amore
6496b5e5868SGarrett D'Amore nextpri++;
6506b5e5868SGarrett D'Amore }
6516b5e5868SGarrett D'Amore
6526b5e5868SGarrett D'Amore static void
start_order(int type)6536b5e5868SGarrett D'Amore start_order(int type)
6546b5e5868SGarrett D'Amore {
6556b5e5868SGarrett D'Amore int i;
6566b5e5868SGarrett D'Amore
6576b5e5868SGarrett D'Amore lastorder = currorder;
6586b5e5868SGarrett D'Amore currorder = type;
6596b5e5868SGarrett D'Amore
6606b5e5868SGarrett D'Amore /* this is used to protect ELLIPSIS processing */
6616b5e5868SGarrett D'Amore if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) {
6626b5e5868SGarrett D'Amore errf(_("character value expected"));
6636b5e5868SGarrett D'Amore }
6646b5e5868SGarrett D'Amore
6656b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
6666b5e5868SGarrett D'Amore order_weights[i] = -1;
6676b5e5868SGarrett D'Amore }
6686b5e5868SGarrett D'Amore curr_weight = 0;
6696b5e5868SGarrett D'Amore }
6706b5e5868SGarrett D'Amore
6716b5e5868SGarrett D'Amore void
start_order_undefined(void)6726b5e5868SGarrett D'Amore start_order_undefined(void)
6736b5e5868SGarrett D'Amore {
6746b5e5868SGarrett D'Amore start_order(T_UNDEFINED);
6756b5e5868SGarrett D'Amore }
6766b5e5868SGarrett D'Amore
6776b5e5868SGarrett D'Amore void
start_order_symbol(char * name)6786b5e5868SGarrett D'Amore start_order_symbol(char *name)
6796b5e5868SGarrett D'Amore {
6806b5e5868SGarrett D'Amore currundef = get_collundef(name);
6816b5e5868SGarrett D'Amore start_order(T_SYMBOL);
6826b5e5868SGarrett D'Amore }
6836b5e5868SGarrett D'Amore
6846b5e5868SGarrett D'Amore void
start_order_char(wchar_t wc)6856b5e5868SGarrett D'Amore start_order_char(wchar_t wc)
6866b5e5868SGarrett D'Amore {
6876b5e5868SGarrett D'Amore collchar_t *cc;
6886b5e5868SGarrett D'Amore int32_t ref;
6896b5e5868SGarrett D'Amore
6906b5e5868SGarrett D'Amore start_order(T_CHAR);
6916b5e5868SGarrett D'Amore
6926b5e5868SGarrett D'Amore /*
6936b5e5868SGarrett D'Amore * If we last saw an ellipsis, then we need to close the range.
6946b5e5868SGarrett D'Amore * Handle that here. Note that we have to be careful because the
6956b5e5868SGarrett D'Amore * items *inside* the range are treated exclusiveley to the items
6966b5e5868SGarrett D'Amore * outside of the range. The ends of the range can have quite
6976b5e5868SGarrett D'Amore * different weights than the range members.
6986b5e5868SGarrett D'Amore */
6996b5e5868SGarrett D'Amore if (lastorder == T_ELLIPSIS) {
7006b5e5868SGarrett D'Amore int i;
7016b5e5868SGarrett D'Amore
7026b5e5868SGarrett D'Amore if (wc < ellipsis_start) {
7036b5e5868SGarrett D'Amore errf(_("malformed range!"));
7046b5e5868SGarrett D'Amore return;
7056b5e5868SGarrett D'Amore }
7066b5e5868SGarrett D'Amore while (ellipsis_start < wc) {
7076b5e5868SGarrett D'Amore /*
7086b5e5868SGarrett D'Amore * pick all of the saved weights for the
7096b5e5868SGarrett D'Amore * ellipsis. note that -1 encodes for the
7106b5e5868SGarrett D'Amore * ellipsis itself, which means to take the
7116b5e5868SGarrett D'Amore * current relative priority.
7126b5e5868SGarrett D'Amore */
7136b5e5868SGarrett D'Amore if ((cc = get_collchar(ellipsis_start, 1)) == NULL) {
7146b5e5868SGarrett D'Amore INTERR;
7156b5e5868SGarrett D'Amore return;
7166b5e5868SGarrett D'Amore }
7176b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
7186b5e5868SGarrett D'Amore collpri_t *p;
7196b5e5868SGarrett D'Amore if (((ref = ellipsis_weights[i]) == -1) ||
7206b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) ||
7216b5e5868SGarrett D'Amore (p->pri == -1)) {
7226b5e5868SGarrett D'Amore set_pri(cc->ref[i], nextpri, RESOLVED);
7236b5e5868SGarrett D'Amore } else {
7246b5e5868SGarrett D'Amore set_pri(cc->ref[i], ref, REFER);
7256b5e5868SGarrett D'Amore }
7266b5e5868SGarrett D'Amore ellipsis_weights[i] = NULL;
7276b5e5868SGarrett D'Amore }
7286b5e5868SGarrett D'Amore ellipsis_start++;
7296b5e5868SGarrett D'Amore nextpri++;
7306b5e5868SGarrett D'Amore }
7316b5e5868SGarrett D'Amore }
7326b5e5868SGarrett D'Amore
7336b5e5868SGarrett D'Amore currchar = get_collchar(wc, 1);
7346b5e5868SGarrett D'Amore }
7356b5e5868SGarrett D'Amore
7366b5e5868SGarrett D'Amore void
start_order_collelem(collelem_t * e)7376b5e5868SGarrett D'Amore start_order_collelem(collelem_t *e)
7386b5e5868SGarrett D'Amore {
7396b5e5868SGarrett D'Amore start_order(T_COLLELEM);
7406b5e5868SGarrett D'Amore currelem = e;
7416b5e5868SGarrett D'Amore }
7426b5e5868SGarrett D'Amore
7436b5e5868SGarrett D'Amore void
start_order_ellipsis(void)7446b5e5868SGarrett D'Amore start_order_ellipsis(void)
7456b5e5868SGarrett D'Amore {
7466b5e5868SGarrett D'Amore int i;
7476b5e5868SGarrett D'Amore
7486b5e5868SGarrett D'Amore start_order(T_ELLIPSIS);
7496b5e5868SGarrett D'Amore
7506b5e5868SGarrett D'Amore if (lastorder != T_CHAR) {
7516b5e5868SGarrett D'Amore errf(_("illegal starting point for range"));
7526b5e5868SGarrett D'Amore return;
7536b5e5868SGarrett D'Amore }
7546b5e5868SGarrett D'Amore
7556b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
7566b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i];
7576b5e5868SGarrett D'Amore }
7586b5e5868SGarrett D'Amore }
7596b5e5868SGarrett D'Amore
7606b5e5868SGarrett D'Amore void
define_collelem(char * name,wchar_t * wcs)7616b5e5868SGarrett D'Amore define_collelem(char *name, wchar_t *wcs)
7626b5e5868SGarrett D'Amore {
7636b5e5868SGarrett D'Amore collelem_t *e;
7646b5e5868SGarrett D'Amore avl_index_t where1;
7656b5e5868SGarrett D'Amore avl_index_t where2;
7666b5e5868SGarrett D'Amore int i;
7676b5e5868SGarrett D'Amore
7686b5e5868SGarrett D'Amore if (wcslen(wcs) >= COLLATE_STR_LEN) {
7696b5e5868SGarrett D'Amore errf(_("expanded collation element too long"));
7706b5e5868SGarrett D'Amore return;
7716b5e5868SGarrett D'Amore }
7726b5e5868SGarrett D'Amore
7736b5e5868SGarrett D'Amore if ((e = calloc(sizeof (*e), 1)) == NULL) {
7746b5e5868SGarrett D'Amore errf(_("out of memory"));
7756b5e5868SGarrett D'Amore return;
7766b5e5868SGarrett D'Amore }
7776b5e5868SGarrett D'Amore e->expand = wcs;
7786b5e5868SGarrett D'Amore e->symbol = name;
7796b5e5868SGarrett D'Amore
7806b5e5868SGarrett D'Amore /*
7816b5e5868SGarrett D'Amore * This is executed before the order statement, so we don't
7826b5e5868SGarrett D'Amore * know how many priorities we *really* need. We allocate one
7836b5e5868SGarrett D'Amore * for each possible weight. Not a big deal, as collating-elements
7846b5e5868SGarrett D'Amore * prove to be quite rare.
7856b5e5868SGarrett D'Amore */
7866b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) {
7876b5e5868SGarrett D'Amore e->ref[i] = new_pri();
7886b5e5868SGarrett D'Amore }
7896b5e5868SGarrett D'Amore
7906b5e5868SGarrett D'Amore /* A character sequence can only reduce to one element. */
7916b5e5868SGarrett D'Amore if ((avl_find(&elem_by_symbol, e, &where1) != NULL) ||
7926b5e5868SGarrett D'Amore (avl_find(&elem_by_expand, e, &where2) != NULL)) {
7936b5e5868SGarrett D'Amore errf(_("duplicate collating element definition"));
7946b5e5868SGarrett D'Amore return;
7956b5e5868SGarrett D'Amore }
7966b5e5868SGarrett D'Amore avl_insert(&elem_by_symbol, e, where1);
7976b5e5868SGarrett D'Amore avl_insert(&elem_by_expand, e, where2);
7986b5e5868SGarrett D'Amore }
7996b5e5868SGarrett D'Amore
8006b5e5868SGarrett D'Amore void
add_order_bit(int kw)8016b5e5868SGarrett D'Amore add_order_bit(int kw)
8026b5e5868SGarrett D'Amore {
8036b5e5868SGarrett D'Amore uint8_t bit = DIRECTIVE_UNDEF;
8046b5e5868SGarrett D'Amore
8056b5e5868SGarrett D'Amore switch (kw) {
8066b5e5868SGarrett D'Amore case T_FORWARD:
8076b5e5868SGarrett D'Amore bit = DIRECTIVE_FORWARD;
8086b5e5868SGarrett D'Amore break;
8096b5e5868SGarrett D'Amore case T_BACKWARD:
8106b5e5868SGarrett D'Amore bit = DIRECTIVE_BACKWARD;
8116b5e5868SGarrett D'Amore break;
8126b5e5868SGarrett D'Amore case T_POSITION:
8136b5e5868SGarrett D'Amore bit = DIRECTIVE_POSITION;
8146b5e5868SGarrett D'Amore break;
8156b5e5868SGarrett D'Amore default:
8166b5e5868SGarrett D'Amore INTERR;
8176b5e5868SGarrett D'Amore break;
8186b5e5868SGarrett D'Amore }
8196b5e5868SGarrett D'Amore collinfo.directive[collinfo.directive_count] |= bit;
8206b5e5868SGarrett D'Amore }
8216b5e5868SGarrett D'Amore
8226b5e5868SGarrett D'Amore void
add_order_directive(void)8236b5e5868SGarrett D'Amore add_order_directive(void)
8246b5e5868SGarrett D'Amore {
8256b5e5868SGarrett D'Amore if (collinfo.directive_count >= COLL_WEIGHTS_MAX) {
8266b5e5868SGarrett D'Amore errf(_("too many directives (max %d)"), COLL_WEIGHTS_MAX);
8276b5e5868SGarrett D'Amore }
8286b5e5868SGarrett D'Amore collinfo.directive_count++;
8296b5e5868SGarrett D'Amore }
8306b5e5868SGarrett D'Amore
8316b5e5868SGarrett D'Amore static void
add_order_pri(int32_t ref)8326b5e5868SGarrett D'Amore add_order_pri(int32_t ref)
8336b5e5868SGarrett D'Amore {
8346b5e5868SGarrett D'Amore if (curr_weight >= NUM_WT) {
8356b5e5868SGarrett D'Amore errf(_("too many weights (max %d)"), NUM_WT);
8366b5e5868SGarrett D'Amore return;
8376b5e5868SGarrett D'Amore }
8386b5e5868SGarrett D'Amore order_weights[curr_weight] = ref;
8396b5e5868SGarrett D'Amore curr_weight++;
8406b5e5868SGarrett D'Amore }
8416b5e5868SGarrett D'Amore
8426b5e5868SGarrett D'Amore void
add_order_collsym(collsym_t * s)8436b5e5868SGarrett D'Amore add_order_collsym(collsym_t *s)
8446b5e5868SGarrett D'Amore {
8456b5e5868SGarrett D'Amore add_order_pri(s->ref);
8466b5e5868SGarrett D'Amore }
8476b5e5868SGarrett D'Amore
8486b5e5868SGarrett D'Amore void
add_order_char(wchar_t wc)8496b5e5868SGarrett D'Amore add_order_char(wchar_t wc)
8506b5e5868SGarrett D'Amore {
8516b5e5868SGarrett D'Amore collchar_t *cc;
8526b5e5868SGarrett D'Amore
8536b5e5868SGarrett D'Amore if ((cc = get_collchar(wc, 1)) == NULL) {
8546b5e5868SGarrett D'Amore INTERR;
8556b5e5868SGarrett D'Amore return;
8566b5e5868SGarrett D'Amore }
8576b5e5868SGarrett D'Amore
8586b5e5868SGarrett D'Amore add_order_pri(cc->ref[curr_weight]);
8596b5e5868SGarrett D'Amore }
8606b5e5868SGarrett D'Amore
8616b5e5868SGarrett D'Amore void
add_order_collelem(collelem_t * e)8626b5e5868SGarrett D'Amore add_order_collelem(collelem_t *e)
8636b5e5868SGarrett D'Amore {
8646b5e5868SGarrett D'Amore add_order_pri(e->ref[curr_weight]);
8656b5e5868SGarrett D'Amore }
8666b5e5868SGarrett D'Amore
8676b5e5868SGarrett D'Amore void
add_order_ignore(void)8686b5e5868SGarrett D'Amore add_order_ignore(void)
8696b5e5868SGarrett D'Amore {
8706b5e5868SGarrett D'Amore add_order_pri(pri_ignore);
8716b5e5868SGarrett D'Amore }
8726b5e5868SGarrett D'Amore
8736b5e5868SGarrett D'Amore void
add_order_symbol(char * sym)8746b5e5868SGarrett D'Amore add_order_symbol(char *sym)
8756b5e5868SGarrett D'Amore {
8766b5e5868SGarrett D'Amore collundef_t *c;
8776b5e5868SGarrett D'Amore if ((c = get_collundef(sym)) == NULL) {
8786b5e5868SGarrett D'Amore INTERR;
8796b5e5868SGarrett D'Amore return;
8806b5e5868SGarrett D'Amore }
8816b5e5868SGarrett D'Amore add_order_pri(c->ref[curr_weight]);
8826b5e5868SGarrett D'Amore }
8836b5e5868SGarrett D'Amore
8846b5e5868SGarrett D'Amore void
add_order_ellipsis(void)8856b5e5868SGarrett D'Amore add_order_ellipsis(void)
8866b5e5868SGarrett D'Amore {
8876b5e5868SGarrett D'Amore /* special NULL value indicates self reference */
8886b5e5868SGarrett D'Amore add_order_pri(NULL);
8896b5e5868SGarrett D'Amore }
8906b5e5868SGarrett D'Amore
8916b5e5868SGarrett D'Amore void
add_order_subst(void)8926b5e5868SGarrett D'Amore add_order_subst(void)
8936b5e5868SGarrett D'Amore {
894723fee08SGarrett D'Amore subst_t srch;
8956b5e5868SGarrett D'Amore subst_t *s;
8966b5e5868SGarrett D'Amore avl_index_t where;
8976b5e5868SGarrett D'Amore int i;
8986b5e5868SGarrett D'Amore
899723fee08SGarrett D'Amore (void) memset(&srch, 0, sizeof (srch));
900723fee08SGarrett D'Amore for (i = 0; i < curr_subst; i++) {
901723fee08SGarrett D'Amore srch.ref[i] = subst_weights[i];
902723fee08SGarrett D'Amore subst_weights[i] = 0;
903723fee08SGarrett D'Amore }
904723fee08SGarrett D'Amore s = avl_find(&substs_ref[curr_weight], &srch, &where);
905723fee08SGarrett D'Amore
906723fee08SGarrett D'Amore if (s == NULL) {
9076b5e5868SGarrett D'Amore if ((s = calloc(sizeof (*s), 1)) == NULL) {
9086b5e5868SGarrett D'Amore errf(_("out of memory"));
9096b5e5868SGarrett D'Amore return;
9106b5e5868SGarrett D'Amore }
9116b5e5868SGarrett D'Amore s->key = new_pri();
9126b5e5868SGarrett D'Amore
9136b5e5868SGarrett D'Amore /*
914723fee08SGarrett D'Amore * We use a self reference for our key, but we set a
915723fee08SGarrett D'Amore * high bit to indicate that this is a substitution
916723fee08SGarrett D'Amore * reference. This will expedite table lookups later,
917723fee08SGarrett D'Amore * and prevent table lookups for situations that don't
918723fee08SGarrett D'Amore * require it. (In short, its a big win, because we
919723fee08SGarrett D'Amore * can skip a lot of binary searching.)
9206b5e5868SGarrett D'Amore */
921723fee08SGarrett D'Amore set_pri(s->key,
922723fee08SGarrett D'Amore (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY),
923723fee08SGarrett D'Amore RESOLVED);
924723fee08SGarrett D'Amore nextsubst[curr_weight] += 1;
9256b5e5868SGarrett D'Amore
9266b5e5868SGarrett D'Amore for (i = 0; i < curr_subst; i++) {
927723fee08SGarrett D'Amore s->ref[i] = srch.ref[i];
9286b5e5868SGarrett D'Amore }
929723fee08SGarrett D'Amore
930723fee08SGarrett D'Amore avl_insert(&substs_ref[curr_weight], s, where);
9316b5e5868SGarrett D'Amore
9326b5e5868SGarrett D'Amore if (avl_find(&substs[curr_weight], s, &where) != NULL) {
9336b5e5868SGarrett D'Amore INTERR;
9346b5e5868SGarrett D'Amore return;
9356b5e5868SGarrett D'Amore }
9366b5e5868SGarrett D'Amore avl_insert(&substs[curr_weight], s, where);
937723fee08SGarrett D'Amore }
938723fee08SGarrett D'Amore curr_subst = 0;
939723fee08SGarrett D'Amore
9406b5e5868SGarrett D'Amore
9416b5e5868SGarrett D'Amore /*
9426b5e5868SGarrett D'Amore * We are using the current (unique) priority as a search key
9436b5e5868SGarrett D'Amore * in the substitution table.
9446b5e5868SGarrett D'Amore */
9456b5e5868SGarrett D'Amore add_order_pri(s->key);
9466b5e5868SGarrett D'Amore }
9476b5e5868SGarrett D'Amore
9486b5e5868SGarrett D'Amore static void
add_subst_pri(int32_t ref)9496b5e5868SGarrett D'Amore add_subst_pri(int32_t ref)
9506b5e5868SGarrett D'Amore {
9516b5e5868SGarrett D'Amore if (curr_subst >= COLLATE_STR_LEN) {
9526b5e5868SGarrett D'Amore errf(_("substitution string is too long"));
9536b5e5868SGarrett D'Amore return;
9546b5e5868SGarrett D'Amore }
9556b5e5868SGarrett D'Amore subst_weights[curr_subst] = ref;
9566b5e5868SGarrett D'Amore curr_subst++;
9576b5e5868SGarrett D'Amore }
9586b5e5868SGarrett D'Amore
9596b5e5868SGarrett D'Amore void
add_subst_char(wchar_t wc)9606b5e5868SGarrett D'Amore add_subst_char(wchar_t wc)
9616b5e5868SGarrett D'Amore {
9626b5e5868SGarrett D'Amore collchar_t *cc;
9636b5e5868SGarrett D'Amore
9646b5e5868SGarrett D'Amore
9656b5e5868SGarrett D'Amore if (((cc = get_collchar(wc, 1)) == NULL) ||
9666b5e5868SGarrett D'Amore (cc->wc != wc)) {
9676b5e5868SGarrett D'Amore INTERR;
9686b5e5868SGarrett D'Amore return;
9696b5e5868SGarrett D'Amore }
9706b5e5868SGarrett D'Amore /* we take the weight for the character at that position */
9716b5e5868SGarrett D'Amore add_subst_pri(cc->ref[curr_weight]);
9726b5e5868SGarrett D'Amore }
9736b5e5868SGarrett D'Amore
9746b5e5868SGarrett D'Amore void
add_subst_collelem(collelem_t * e)9756b5e5868SGarrett D'Amore add_subst_collelem(collelem_t *e)
9766b5e5868SGarrett D'Amore {
9776b5e5868SGarrett D'Amore add_subst_pri(e->ref[curr_weight]);
9786b5e5868SGarrett D'Amore }
9796b5e5868SGarrett D'Amore
9806b5e5868SGarrett D'Amore void
add_subst_collsym(collsym_t * s)9816b5e5868SGarrett D'Amore add_subst_collsym(collsym_t *s)
9826b5e5868SGarrett D'Amore {
9836b5e5868SGarrett D'Amore add_subst_pri(s->ref);
9846b5e5868SGarrett D'Amore }
9856b5e5868SGarrett D'Amore
9866b5e5868SGarrett D'Amore void
add_subst_symbol(char * ptr)9876b5e5868SGarrett D'Amore add_subst_symbol(char *ptr)
9886b5e5868SGarrett D'Amore {
9896b5e5868SGarrett D'Amore collundef_t *cu;
9906b5e5868SGarrett D'Amore
9916b5e5868SGarrett D'Amore if ((cu = get_collundef(ptr)) != NULL) {
9926b5e5868SGarrett D'Amore add_subst_pri(cu->ref[curr_weight]);
9936b5e5868SGarrett D'Amore }
9946b5e5868SGarrett D'Amore }
9956b5e5868SGarrett D'Amore
9966b5e5868SGarrett D'Amore void
add_weight(int32_t ref,int pass)997723fee08SGarrett D'Amore add_weight(int32_t ref, int pass)
998723fee08SGarrett D'Amore {
999723fee08SGarrett D'Amore weight_t srch;
1000723fee08SGarrett D'Amore weight_t *w;
1001723fee08SGarrett D'Amore avl_index_t where;
1002723fee08SGarrett D'Amore
1003723fee08SGarrett D'Amore srch.pri = resolve_pri(ref);
1004723fee08SGarrett D'Amore
1005723fee08SGarrett D'Amore /* No translation of ignores */
1006723fee08SGarrett D'Amore if (srch.pri == 0)
1007723fee08SGarrett D'Amore return;
1008723fee08SGarrett D'Amore
1009723fee08SGarrett D'Amore /* Substitution priorities are not weights */
1010723fee08SGarrett D'Amore if (srch.pri & COLLATE_SUBST_PRIORITY)
1011723fee08SGarrett D'Amore return;
1012723fee08SGarrett D'Amore
1013723fee08SGarrett D'Amore if (avl_find(&weights[pass], &srch, &where) != NULL)
1014723fee08SGarrett D'Amore return;
1015723fee08SGarrett D'Amore
1016723fee08SGarrett D'Amore if ((w = calloc(sizeof (*w), 1)) == NULL) {
1017723fee08SGarrett D'Amore errf(_("out of memory"));
1018723fee08SGarrett D'Amore return;
1019723fee08SGarrett D'Amore }
1020723fee08SGarrett D'Amore w->pri = srch.pri;
1021723fee08SGarrett D'Amore avl_insert(&weights[pass], w, where);
1022723fee08SGarrett D'Amore }
1023723fee08SGarrett D'Amore
1024723fee08SGarrett D'Amore void
add_weights(int32_t * refs)1025723fee08SGarrett D'Amore add_weights(int32_t *refs)
1026723fee08SGarrett D'Amore {
1027723fee08SGarrett D'Amore int i;
1028723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
1029723fee08SGarrett D'Amore add_weight(refs[i], i);
1030723fee08SGarrett D'Amore }
1031723fee08SGarrett D'Amore }
1032723fee08SGarrett D'Amore
1033723fee08SGarrett D'Amore int32_t
get_weight(int32_t ref,int pass)1034723fee08SGarrett D'Amore get_weight(int32_t ref, int pass)
1035723fee08SGarrett D'Amore {
1036723fee08SGarrett D'Amore weight_t srch;
1037723fee08SGarrett D'Amore weight_t *w;
1038723fee08SGarrett D'Amore int32_t pri;
1039723fee08SGarrett D'Amore
1040723fee08SGarrett D'Amore pri = resolve_pri(ref);
1041723fee08SGarrett D'Amore if (pri & COLLATE_SUBST_PRIORITY) {
1042723fee08SGarrett D'Amore return (pri);
1043723fee08SGarrett D'Amore }
1044723fee08SGarrett D'Amore if (pri <= 0) {
1045723fee08SGarrett D'Amore return (pri);
1046723fee08SGarrett D'Amore }
1047723fee08SGarrett D'Amore srch.pri = pri;
1048723fee08SGarrett D'Amore if ((w = avl_find(&weights[pass], &srch, NULL)) == NULL) {
1049723fee08SGarrett D'Amore INTERR;
1050723fee08SGarrett D'Amore return (-1);
1051723fee08SGarrett D'Amore }
1052723fee08SGarrett D'Amore return (w->opt);
1053723fee08SGarrett D'Amore }
1054723fee08SGarrett D'Amore
1055723fee08SGarrett D'Amore void
dump_collate(void)10566b5e5868SGarrett D'Amore dump_collate(void)
10576b5e5868SGarrett D'Amore {
10586b5e5868SGarrett D'Amore FILE *f;
10596b5e5868SGarrett D'Amore int i, j, n;
10606b5e5868SGarrett D'Amore size_t sz;
10616b5e5868SGarrett D'Amore int32_t pri;
10626b5e5868SGarrett D'Amore collelem_t *ce;
10636b5e5868SGarrett D'Amore collchar_t *cc;
10646b5e5868SGarrett D'Amore subst_t *sb;
10656b5e5868SGarrett D'Amore char vers[COLLATE_STR_LEN];
1066723fee08SGarrett D'Amore collate_char_t chars[UCHAR_MAX + 1];
1067723fee08SGarrett D'Amore collate_large_t *large;
10686b5e5868SGarrett D'Amore collate_subst_t *subst[COLL_WEIGHTS_MAX];
1069723fee08SGarrett D'Amore collate_chain_t *chain;
10706b5e5868SGarrett D'Amore
1071723fee08SGarrett D'Amore /*
1072723fee08SGarrett D'Amore * We have to run throught a preliminary pass to identify all the
1073723fee08SGarrett D'Amore * weights that we use for each sorting level.
1074723fee08SGarrett D'Amore */
1075723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
1076723fee08SGarrett D'Amore add_weight(pri_ignore, i);
1077723fee08SGarrett D'Amore }
1078723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
1079723fee08SGarrett D'Amore for (sb = avl_first(&substs[i]); sb;
1080723fee08SGarrett D'Amore sb = AVL_NEXT(&substs[i], sb)) {
1081723fee08SGarrett D'Amore for (j = 0; sb->ref[j]; j++) {
1082723fee08SGarrett D'Amore add_weight(sb->ref[j], i);
1083723fee08SGarrett D'Amore }
1084723fee08SGarrett D'Amore }
1085723fee08SGarrett D'Amore }
1086723fee08SGarrett D'Amore for (ce = avl_first(&elem_by_expand);
1087723fee08SGarrett D'Amore ce != NULL;
1088723fee08SGarrett D'Amore ce = AVL_NEXT(&elem_by_expand, ce)) {
1089723fee08SGarrett D'Amore add_weights(ce->ref);
1090723fee08SGarrett D'Amore }
1091723fee08SGarrett D'Amore for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) {
1092723fee08SGarrett D'Amore add_weights(cc->ref);
1093723fee08SGarrett D'Amore }
1094723fee08SGarrett D'Amore
1095723fee08SGarrett D'Amore /*
1096723fee08SGarrett D'Amore * Now we walk the entire set of weights, removing the gaps
1097723fee08SGarrett D'Amore * in the weights. This gives us optimum usage. The walk
1098723fee08SGarrett D'Amore * occurs in priority.
1099723fee08SGarrett D'Amore */
1100723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
1101723fee08SGarrett D'Amore weight_t *w;
1102723fee08SGarrett D'Amore for (w = avl_first(&weights[i]); w;
1103723fee08SGarrett D'Amore w = AVL_NEXT(&weights[i], w)) {
1104723fee08SGarrett D'Amore w->opt = nweight[i];
1105723fee08SGarrett D'Amore nweight[i] += 1;
1106723fee08SGarrett D'Amore }
1107723fee08SGarrett D'Amore }
1108723fee08SGarrett D'Amore
1109723fee08SGarrett D'Amore (void) memset(&chars, 0, sizeof (chars));
11106b5e5868SGarrett D'Amore (void) memset(vers, 0, COLLATE_STR_LEN);
1111723fee08SGarrett D'Amore (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers));
11126b5e5868SGarrett D'Amore
11136b5e5868SGarrett D'Amore /*
11146b5e5868SGarrett D'Amore * We need to make sure we arrange for the UNDEFINED field
1115723fee08SGarrett D'Amore * to show up. Also, set the total weight counts.
11166b5e5868SGarrett D'Amore */
11176b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
11186b5e5868SGarrett D'Amore if (resolve_pri(pri_undefined[i]) == -1) {
11196b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED);
11206b5e5868SGarrett D'Amore /* they collate at the end of everything else */
11216b5e5868SGarrett D'Amore collinfo.undef_pri[i] = COLLATE_MAX_PRIORITY;
11226b5e5868SGarrett D'Amore }
1123723fee08SGarrett D'Amore collinfo.pri_count[i] = nweight[i];
11246b5e5868SGarrett D'Amore }
11256b5e5868SGarrett D'Amore
1126723fee08SGarrett D'Amore collinfo.pri_count[NUM_WT] = max_wide();
11276b5e5868SGarrett D'Amore collinfo.undef_pri[NUM_WT] = COLLATE_MAX_PRIORITY;
11286b5e5868SGarrett D'Amore collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED;
11296b5e5868SGarrett D'Amore
11306b5e5868SGarrett D'Amore /*
11316b5e5868SGarrett D'Amore * Ordinary character priorities
11326b5e5868SGarrett D'Amore */
11336b5e5868SGarrett D'Amore for (i = 0; i <= UCHAR_MAX; i++) {
11346b5e5868SGarrett D'Amore if ((cc = get_collchar(i, 0)) != NULL) {
11356b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) {
1136723fee08SGarrett D'Amore chars[i].pri[j] = get_weight(cc->ref[j], j);
11376b5e5868SGarrett D'Amore }
11386b5e5868SGarrett D'Amore } else {
11396b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) {
1140723fee08SGarrett D'Amore chars[i].pri[j] =
1141723fee08SGarrett D'Amore get_weight(pri_undefined[j], j);
11426b5e5868SGarrett D'Amore }
11436b5e5868SGarrett D'Amore /*
11446b5e5868SGarrett D'Amore * Per POSIX, for undefined characters, we
11456b5e5868SGarrett D'Amore * also have to add a last item, which is the
11466b5e5868SGarrett D'Amore * character code.
11476b5e5868SGarrett D'Amore */
1148723fee08SGarrett D'Amore chars[i].pri[NUM_WT] = i;
11496b5e5868SGarrett D'Amore }
11506b5e5868SGarrett D'Amore }
11516b5e5868SGarrett D'Amore
11526b5e5868SGarrett D'Amore /*
11536b5e5868SGarrett D'Amore * Substitution tables
11546b5e5868SGarrett D'Amore */
1155723fee08SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
11566b5e5868SGarrett D'Amore collate_subst_t *st = NULL;
11576b5e5868SGarrett D'Amore n = collinfo.subst_count[i] = avl_numnodes(&substs[i]);
11586b5e5868SGarrett D'Amore if ((st = calloc(sizeof (collate_subst_t) * n, 1)) == NULL) {
11596b5e5868SGarrett D'Amore errf(_("out of memory"));
11606b5e5868SGarrett D'Amore return;
11616b5e5868SGarrett D'Amore }
11626b5e5868SGarrett D'Amore n = 0;
11636b5e5868SGarrett D'Amore for (sb = avl_first(&substs[i]); sb;
11646b5e5868SGarrett D'Amore sb = AVL_NEXT(&substs[i], sb)) {
11656b5e5868SGarrett D'Amore if ((st[n].key = resolve_pri(sb->key)) < 0) {
11666b5e5868SGarrett D'Amore /* by definition these resolve! */
11676b5e5868SGarrett D'Amore INTERR;
11686b5e5868SGarrett D'Amore }
1169723fee08SGarrett D'Amore if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) {
1170723fee08SGarrett D'Amore INTERR;
1171723fee08SGarrett D'Amore }
11726b5e5868SGarrett D'Amore for (j = 0; sb->ref[j]; j++) {
1173723fee08SGarrett D'Amore st[n].pri[j] = get_weight(sb->ref[j], i);
11746b5e5868SGarrett D'Amore }
11756b5e5868SGarrett D'Amore n++;
11766b5e5868SGarrett D'Amore }
11776b5e5868SGarrett D'Amore if (n != collinfo.subst_count[i])
11786b5e5868SGarrett D'Amore INTERR;
11796b5e5868SGarrett D'Amore subst[i] = st;
11806b5e5868SGarrett D'Amore }
11816b5e5868SGarrett D'Amore
1182723fee08SGarrett D'Amore
11836b5e5868SGarrett D'Amore /*
11846b5e5868SGarrett D'Amore * Chains, i.e. collating elements
11856b5e5868SGarrett D'Amore */
11866b5e5868SGarrett D'Amore collinfo.chain_count = avl_numnodes(&elem_by_expand);
1187723fee08SGarrett D'Amore chain = calloc(sizeof (collate_chain_t), collinfo.chain_count);
11886b5e5868SGarrett D'Amore if (chain == NULL) {
11896b5e5868SGarrett D'Amore errf(_("out of memory"));
11906b5e5868SGarrett D'Amore return;
11916b5e5868SGarrett D'Amore }
11926b5e5868SGarrett D'Amore for (n = 0, ce = avl_first(&elem_by_expand);
11936b5e5868SGarrett D'Amore ce != NULL;
11946b5e5868SGarrett D'Amore ce = AVL_NEXT(&elem_by_expand, ce), n++) {
11956b5e5868SGarrett D'Amore (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN);
11966b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
1197723fee08SGarrett D'Amore chain[n].pri[i] = get_weight(ce->ref[i], i);
11986b5e5868SGarrett D'Amore }
11996b5e5868SGarrett D'Amore }
12006b5e5868SGarrett D'Amore if (n != collinfo.chain_count)
12016b5e5868SGarrett D'Amore INTERR;
12026b5e5868SGarrett D'Amore
12036b5e5868SGarrett D'Amore /*
12046b5e5868SGarrett D'Amore * Large (> UCHAR_MAX) character priorities
12056b5e5868SGarrett D'Amore */
1206723fee08SGarrett D'Amore large = calloc(sizeof (collate_large_t) * avl_numnodes(&collchars), 1);
12076b5e5868SGarrett D'Amore if (large == NULL) {
12086b5e5868SGarrett D'Amore errf(_("out of memory"));
12096b5e5868SGarrett D'Amore return;
12106b5e5868SGarrett D'Amore }
12116b5e5868SGarrett D'Amore
12126b5e5868SGarrett D'Amore i = 0;
12136b5e5868SGarrett D'Amore for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) {
12146b5e5868SGarrett D'Amore int undef = 0;
12156b5e5868SGarrett D'Amore /* we already gathered those */
12166b5e5868SGarrett D'Amore if (cc->wc <= UCHAR_MAX)
12176b5e5868SGarrett D'Amore continue;
12186b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) {
1219723fee08SGarrett D'Amore if ((pri = get_weight(cc->ref[j], j)) < 0) {
12206b5e5868SGarrett D'Amore undef = 1;
12216b5e5868SGarrett D'Amore }
12226b5e5868SGarrett D'Amore if (undef && (pri >= 0)) {
12236b5e5868SGarrett D'Amore /* if undefined, then all priorities are */
12246b5e5868SGarrett D'Amore INTERR;
12256b5e5868SGarrett D'Amore } else {
12266b5e5868SGarrett D'Amore large[i].pri.pri[j] = pri;
12276b5e5868SGarrett D'Amore }
12286b5e5868SGarrett D'Amore }
12296b5e5868SGarrett D'Amore if (!undef) {
12306b5e5868SGarrett D'Amore large[i].val = cc->wc;
1231723fee08SGarrett D'Amore collinfo.large_count = i++;
12326b5e5868SGarrett D'Amore }
12336b5e5868SGarrett D'Amore }
12346b5e5868SGarrett D'Amore
12356b5e5868SGarrett D'Amore if ((f = open_category()) == NULL) {
12366b5e5868SGarrett D'Amore return;
12376b5e5868SGarrett D'Amore }
12386b5e5868SGarrett D'Amore
12396b5e5868SGarrett D'Amore /* Time to write the entire data set out */
12406b5e5868SGarrett D'Amore
12416b5e5868SGarrett D'Amore if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) ||
12426b5e5868SGarrett D'Amore (wr_category(&collinfo, sizeof (collinfo), f) < 0) ||
1243723fee08SGarrett D'Amore (wr_category(&chars, sizeof (chars), f) < 0)) {
12446b5e5868SGarrett D'Amore return;
12456b5e5868SGarrett D'Amore }
12466b5e5868SGarrett D'Amore
12476b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) {
12486b5e5868SGarrett D'Amore sz = sizeof (collate_subst_t) * collinfo.subst_count[i];
12496b5e5868SGarrett D'Amore if (wr_category(subst[i], sz, f) < 0) {
12506b5e5868SGarrett D'Amore return;
12516b5e5868SGarrett D'Amore }
12526b5e5868SGarrett D'Amore }
1253723fee08SGarrett D'Amore sz = sizeof (collate_chain_t) * collinfo.chain_count;
12546b5e5868SGarrett D'Amore if (wr_category(chain, sz, f) < 0) {
12556b5e5868SGarrett D'Amore return;
12566b5e5868SGarrett D'Amore }
1257723fee08SGarrett D'Amore sz = sizeof (collate_large_t) * collinfo.large_count;
12586b5e5868SGarrett D'Amore if (wr_category(large, sz, f) < 0) {
12596b5e5868SGarrett D'Amore return;
12606b5e5868SGarrett D'Amore }
12616b5e5868SGarrett D'Amore
12626b5e5868SGarrett D'Amore close_category(f);
12636b5e5868SGarrett D'Amore }
1264