1*6b5e5868SGarrett D'Amore /* 2*6b5e5868SGarrett D'Amore * This file and its contents are supplied under the terms of the 3*6b5e5868SGarrett D'Amore * Common Development and Distribution License ("CDDL"), version 1.0. 4*6b5e5868SGarrett D'Amore * You may only use this file in accordance with the terms version 1.0 5*6b5e5868SGarrett D'Amore * of the CDDL. 6*6b5e5868SGarrett D'Amore * 7*6b5e5868SGarrett D'Amore * A full copy of the text of the CDDL should have accompanied this 8*6b5e5868SGarrett D'Amore * source. A copy of the CDDL is also available via the Internet at 9*6b5e5868SGarrett D'Amore * http://www.illumos.org/license/CDDL. 10*6b5e5868SGarrett D'Amore */ 11*6b5e5868SGarrett D'Amore 12*6b5e5868SGarrett D'Amore /* 13*6b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 14*6b5e5868SGarrett D'Amore */ 15*6b5e5868SGarrett D'Amore 16*6b5e5868SGarrett D'Amore /* 17*6b5e5868SGarrett D'Amore * LC_COLLATE database generation routines for localedef. 18*6b5e5868SGarrett D'Amore */ 19*6b5e5868SGarrett D'Amore 20*6b5e5868SGarrett D'Amore #include <stdio.h> 21*6b5e5868SGarrett D'Amore #include <stdlib.h> 22*6b5e5868SGarrett D'Amore #include <errno.h> 23*6b5e5868SGarrett D'Amore #include <string.h> 24*6b5e5868SGarrett D'Amore #include <sys/types.h> 25*6b5e5868SGarrett D'Amore #include <sys/avl.h> 26*6b5e5868SGarrett D'Amore #include <string.h> 27*6b5e5868SGarrett D'Amore #include <unistd.h> 28*6b5e5868SGarrett D'Amore #include <wchar.h> 29*6b5e5868SGarrett D'Amore #include <widec.h> 30*6b5e5868SGarrett D'Amore #include <limits.h> 31*6b5e5868SGarrett D'Amore #include "localedef.h" 32*6b5e5868SGarrett D'Amore #include "parser.tab.h" 33*6b5e5868SGarrett D'Amore #include "collate.h" 34*6b5e5868SGarrett D'Amore 35*6b5e5868SGarrett D'Amore /* 36*6b5e5868SGarrett D'Amore * In order to resolve the priorities, we create a table of priorities. 37*6b5e5868SGarrett D'Amore * Entries in the table can be in one of three states. 38*6b5e5868SGarrett D'Amore * 39*6b5e5868SGarrett D'Amore * UNKNOWN is for newly allocated entries, and indicates that nothing 40*6b5e5868SGarrett D'Amore * is known about the priority. (For example, when new entries are created 41*6b5e5868SGarrett D'Amore * for collating-symbols, this is the value assigned for them until the 42*6b5e5868SGarrett D'Amore * collating symbol's order has been determined. 43*6b5e5868SGarrett D'Amore * 44*6b5e5868SGarrett D'Amore * RESOLVED is used for an entry where the priority indicates the final 45*6b5e5868SGarrett D'Amore * numeric weight. 46*6b5e5868SGarrett D'Amore * 47*6b5e5868SGarrett D'Amore * REFER is used for entries that reference other entries. Typically 48*6b5e5868SGarrett D'Amore * this is used for forward references. A collating-symbol can never 49*6b5e5868SGarrett D'Amore * have this value. 50*6b5e5868SGarrett D'Amore * 51*6b5e5868SGarrett D'Amore * The "self" field is a reference to or own index in the pri list. 52*6b5e5868SGarrett D'Amore * 53*6b5e5868SGarrett D'Amore * The "pass" field is used during final resolution to aid in detection 54*6b5e5868SGarrett D'Amore * of referencing loops. (For example <A> depends on <B>, but <B> has its 55*6b5e5868SGarrett D'Amore * priority dependent on <A>.) 56*6b5e5868SGarrett D'Amore */ 57*6b5e5868SGarrett D'Amore typedef enum { 58*6b5e5868SGarrett D'Amore UNKNOWN, /* priority is totally unknown */ 59*6b5e5868SGarrett D'Amore RESOLVED, /* priority value fully resolved */ 60*6b5e5868SGarrett D'Amore REFER /* priority is a reference (index) */ 61*6b5e5868SGarrett D'Amore } res_t; 62*6b5e5868SGarrett D'Amore 63*6b5e5868SGarrett D'Amore typedef struct priority { 64*6b5e5868SGarrett D'Amore res_t res; 65*6b5e5868SGarrett D'Amore int32_t pri; 66*6b5e5868SGarrett D'Amore int32_t self; 67*6b5e5868SGarrett D'Amore int pass; 68*6b5e5868SGarrett D'Amore int lineno; 69*6b5e5868SGarrett D'Amore } collpri_t; 70*6b5e5868SGarrett D'Amore 71*6b5e5868SGarrett D'Amore 72*6b5e5868SGarrett D'Amore #define NUM_WT collinfo.directive_count 73*6b5e5868SGarrett D'Amore 74*6b5e5868SGarrett D'Amore /* 75*6b5e5868SGarrett D'Amore * These are the abstract collating symbols, which are just a symbolic 76*6b5e5868SGarrett D'Amore * way to reference a priority. 77*6b5e5868SGarrett D'Amore */ 78*6b5e5868SGarrett D'Amore struct collsym { 79*6b5e5868SGarrett D'Amore char *name; 80*6b5e5868SGarrett D'Amore int32_t ref; 81*6b5e5868SGarrett D'Amore avl_node_t avl; 82*6b5e5868SGarrett D'Amore }; 83*6b5e5868SGarrett D'Amore 84*6b5e5868SGarrett D'Amore /* 85*6b5e5868SGarrett D'Amore * These are also abstract collating symbols, but we allow them to have 86*6b5e5868SGarrett D'Amore * different priorities at different levels. 87*6b5e5868SGarrett D'Amore */ 88*6b5e5868SGarrett D'Amore typedef struct collundef { 89*6b5e5868SGarrett D'Amore char *name; 90*6b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX]; 91*6b5e5868SGarrett D'Amore avl_node_t avl; 92*6b5e5868SGarrett D'Amore } collundef_t; 93*6b5e5868SGarrett D'Amore 94*6b5e5868SGarrett D'Amore /* 95*6b5e5868SGarrett D'Amore * These are called "chains" in libc. This records the fact that two 96*6b5e5868SGarrett D'Amore * more characters should be treated as a single collating entity when 97*6b5e5868SGarrett D'Amore * they appear together. For example, in Spanish <C><h> gets collated 98*6b5e5868SGarrett D'Amore * as a character between <C> and <D>. 99*6b5e5868SGarrett D'Amore */ 100*6b5e5868SGarrett D'Amore struct collelem { 101*6b5e5868SGarrett D'Amore char *symbol; 102*6b5e5868SGarrett D'Amore wchar_t *expand; 103*6b5e5868SGarrett D'Amore int32_t ref[COLL_WEIGHTS_MAX]; 104*6b5e5868SGarrett D'Amore avl_node_t avl_bysymbol; 105*6b5e5868SGarrett D'Amore avl_node_t avl_byexpand; 106*6b5e5868SGarrett D'Amore }; 107*6b5e5868SGarrett D'Amore 108*6b5e5868SGarrett D'Amore /* 109*6b5e5868SGarrett D'Amore * Individual characters have a sequence of weights as well. 110*6b5e5868SGarrett D'Amore */ 111*6b5e5868SGarrett D'Amore typedef struct collchar { 112*6b5e5868SGarrett D'Amore wchar_t wc; 113*6b5e5868SGarrett D'Amore wchar_t ref[COLL_WEIGHTS_MAX]; 114*6b5e5868SGarrett D'Amore avl_node_t avl; 115*6b5e5868SGarrett D'Amore } collchar_t; 116*6b5e5868SGarrett D'Amore 117*6b5e5868SGarrett D'Amore /* 118*6b5e5868SGarrett D'Amore * Substitution entries. The key is itself a priority. Note that 119*6b5e5868SGarrett D'Amore * when we create one of these, we *automatically* wind up with a 120*6b5e5868SGarrett D'Amore * fully resolved priority for the key, because creation of 121*6b5e5868SGarrett D'Amore * substitutions creates a resolved priority at the same time. 122*6b5e5868SGarrett D'Amore */ 123*6b5e5868SGarrett D'Amore typedef struct { 124*6b5e5868SGarrett D'Amore int32_t key; 125*6b5e5868SGarrett D'Amore int32_t ref[COLLATE_STR_LEN]; 126*6b5e5868SGarrett D'Amore avl_node_t avl; 127*6b5e5868SGarrett D'Amore } subst_t; 128*6b5e5868SGarrett D'Amore 129*6b5e5868SGarrett D'Amore static avl_tree_t collsyms; 130*6b5e5868SGarrett D'Amore static avl_tree_t collundefs; 131*6b5e5868SGarrett D'Amore static avl_tree_t elem_by_symbol; 132*6b5e5868SGarrett D'Amore static avl_tree_t elem_by_expand; 133*6b5e5868SGarrett D'Amore static avl_tree_t collchars; 134*6b5e5868SGarrett D'Amore static avl_tree_t substs[COLL_WEIGHTS_MAX]; 135*6b5e5868SGarrett D'Amore 136*6b5e5868SGarrett D'Amore /* 137*6b5e5868SGarrett D'Amore * This is state tracking for the ellipsis token. Note that we start 138*6b5e5868SGarrett D'Amore * the initial values so that the ellipsis logic will think we got a 139*6b5e5868SGarrett D'Amore * magic starting value of NUL. It starts at minus one because the 140*6b5e5868SGarrett D'Amore * starting point is exclusive -- i.e. the starting point is not 141*6b5e5868SGarrett D'Amore * itself handled by the ellipsis code. 142*6b5e5868SGarrett D'Amore */ 143*6b5e5868SGarrett D'Amore static int currorder = EOF; 144*6b5e5868SGarrett D'Amore static int lastorder = EOF; 145*6b5e5868SGarrett D'Amore static collelem_t *currelem; 146*6b5e5868SGarrett D'Amore static collchar_t *currchar; 147*6b5e5868SGarrett D'Amore static collundef_t *currundef; 148*6b5e5868SGarrett D'Amore static wchar_t ellipsis_start = 0; 149*6b5e5868SGarrett D'Amore static int32_t ellipsis_weights[COLL_WEIGHTS_MAX]; 150*6b5e5868SGarrett D'Amore 151*6b5e5868SGarrett D'Amore /* 152*6b5e5868SGarrett D'Amore * We keep a running tally of weights. 153*6b5e5868SGarrett D'Amore */ 154*6b5e5868SGarrett D'Amore static int nextpri = 1; 155*6b5e5868SGarrett D'Amore 156*6b5e5868SGarrett D'Amore /* 157*6b5e5868SGarrett D'Amore * This array collects up the weights for each level. 158*6b5e5868SGarrett D'Amore */ 159*6b5e5868SGarrett D'Amore static int32_t order_weights[COLL_WEIGHTS_MAX]; 160*6b5e5868SGarrett D'Amore static int curr_weight = 0; 161*6b5e5868SGarrett D'Amore static int32_t subst_weights[COLLATE_STR_LEN]; 162*6b5e5868SGarrett D'Amore static int curr_subst = 0; 163*6b5e5868SGarrett D'Amore 164*6b5e5868SGarrett D'Amore /* 165*6b5e5868SGarrett D'Amore * Some initial priority values. 166*6b5e5868SGarrett D'Amore */ 167*6b5e5868SGarrett D'Amore static int32_t pri_undefined[COLL_WEIGHTS_MAX]; 168*6b5e5868SGarrett D'Amore static int32_t pri_ignore; 169*6b5e5868SGarrett D'Amore 170*6b5e5868SGarrett D'Amore static collate_info_t collinfo; 171*6b5e5868SGarrett D'Amore 172*6b5e5868SGarrett D'Amore static collpri_t *prilist = NULL; 173*6b5e5868SGarrett D'Amore static int numpri = 0; 174*6b5e5868SGarrett D'Amore static int maxpri = 0; 175*6b5e5868SGarrett D'Amore 176*6b5e5868SGarrett D'Amore static void start_order(int); 177*6b5e5868SGarrett D'Amore 178*6b5e5868SGarrett D'Amore static int32_t 179*6b5e5868SGarrett D'Amore new_pri(void) 180*6b5e5868SGarrett D'Amore { 181*6b5e5868SGarrett D'Amore int i; 182*6b5e5868SGarrett D'Amore 183*6b5e5868SGarrett D'Amore if (numpri >= maxpri) { 184*6b5e5868SGarrett D'Amore maxpri = maxpri ? maxpri * 2 : 1024; 185*6b5e5868SGarrett D'Amore prilist = realloc(prilist, sizeof (collpri_t) * maxpri); 186*6b5e5868SGarrett D'Amore if (prilist == NULL) { 187*6b5e5868SGarrett D'Amore errf(_("out of memory")); 188*6b5e5868SGarrett D'Amore return (-1); 189*6b5e5868SGarrett D'Amore } 190*6b5e5868SGarrett D'Amore for (i = numpri; i < maxpri; i++) { 191*6b5e5868SGarrett D'Amore prilist[i].res = UNKNOWN; 192*6b5e5868SGarrett D'Amore prilist[i].pri = 0; 193*6b5e5868SGarrett D'Amore prilist[i].pass = 0; 194*6b5e5868SGarrett D'Amore prilist[i].self = i; 195*6b5e5868SGarrett D'Amore } 196*6b5e5868SGarrett D'Amore } 197*6b5e5868SGarrett D'Amore return (numpri++); 198*6b5e5868SGarrett D'Amore } 199*6b5e5868SGarrett D'Amore 200*6b5e5868SGarrett D'Amore static collpri_t * 201*6b5e5868SGarrett D'Amore get_pri(int32_t ref) 202*6b5e5868SGarrett D'Amore { 203*6b5e5868SGarrett D'Amore if ((ref < 0) || (ref > numpri)) { 204*6b5e5868SGarrett D'Amore INTERR; 205*6b5e5868SGarrett D'Amore return (NULL); 206*6b5e5868SGarrett D'Amore } 207*6b5e5868SGarrett D'Amore return (&prilist[ref]); 208*6b5e5868SGarrett D'Amore } 209*6b5e5868SGarrett D'Amore 210*6b5e5868SGarrett D'Amore static void 211*6b5e5868SGarrett D'Amore set_pri(int32_t ref, int32_t v, res_t res) 212*6b5e5868SGarrett D'Amore { 213*6b5e5868SGarrett D'Amore collpri_t *pri; 214*6b5e5868SGarrett D'Amore 215*6b5e5868SGarrett D'Amore pri = get_pri(ref); 216*6b5e5868SGarrett D'Amore 217*6b5e5868SGarrett D'Amore if ((res == REFER) && ((v < 0) || (v >= numpri))) { 218*6b5e5868SGarrett D'Amore INTERR; 219*6b5e5868SGarrett D'Amore } 220*6b5e5868SGarrett D'Amore 221*6b5e5868SGarrett D'Amore /* Resolve self references */ 222*6b5e5868SGarrett D'Amore if ((res == REFER) && (ref == v)) { 223*6b5e5868SGarrett D'Amore v = nextpri; 224*6b5e5868SGarrett D'Amore res = RESOLVED; 225*6b5e5868SGarrett D'Amore } 226*6b5e5868SGarrett D'Amore 227*6b5e5868SGarrett D'Amore if (pri->res != UNKNOWN) { 228*6b5e5868SGarrett D'Amore warn(_("repeated item in order list (first on %d)"), 229*6b5e5868SGarrett D'Amore pri->lineno); 230*6b5e5868SGarrett D'Amore return; 231*6b5e5868SGarrett D'Amore } 232*6b5e5868SGarrett D'Amore pri->lineno = lineno; 233*6b5e5868SGarrett D'Amore pri->pri = v; 234*6b5e5868SGarrett D'Amore pri->res = res; 235*6b5e5868SGarrett D'Amore } 236*6b5e5868SGarrett D'Amore 237*6b5e5868SGarrett D'Amore static int32_t 238*6b5e5868SGarrett D'Amore resolve_pri(int32_t ref) 239*6b5e5868SGarrett D'Amore { 240*6b5e5868SGarrett D'Amore collpri_t *pri; 241*6b5e5868SGarrett D'Amore static int32_t pass = 0; 242*6b5e5868SGarrett D'Amore 243*6b5e5868SGarrett D'Amore pri = get_pri(ref); 244*6b5e5868SGarrett D'Amore pass++; 245*6b5e5868SGarrett D'Amore while (pri->res == REFER) { 246*6b5e5868SGarrett D'Amore if (pri->pass == pass) { 247*6b5e5868SGarrett D'Amore /* report a line with the circular symbol */ 248*6b5e5868SGarrett D'Amore lineno = pri->lineno; 249*6b5e5868SGarrett D'Amore errf(_("circular reference in order list")); 250*6b5e5868SGarrett D'Amore return (-1); 251*6b5e5868SGarrett D'Amore } 252*6b5e5868SGarrett D'Amore if ((pri->pri < 0) || (pri->pri >= numpri)) { 253*6b5e5868SGarrett D'Amore INTERR; 254*6b5e5868SGarrett D'Amore return (-1); 255*6b5e5868SGarrett D'Amore } 256*6b5e5868SGarrett D'Amore pri->pass = pass; 257*6b5e5868SGarrett D'Amore pri = &prilist[pri->pri]; 258*6b5e5868SGarrett D'Amore } 259*6b5e5868SGarrett D'Amore 260*6b5e5868SGarrett D'Amore if (pri->res == UNKNOWN) { 261*6b5e5868SGarrett D'Amore return (-1); 262*6b5e5868SGarrett D'Amore } 263*6b5e5868SGarrett D'Amore if (pri->res != RESOLVED) 264*6b5e5868SGarrett D'Amore INTERR; 265*6b5e5868SGarrett D'Amore 266*6b5e5868SGarrett D'Amore return (pri->pri); 267*6b5e5868SGarrett D'Amore } 268*6b5e5868SGarrett D'Amore 269*6b5e5868SGarrett D'Amore static int 270*6b5e5868SGarrett D'Amore collsym_compare(const void *n1, const void *n2) 271*6b5e5868SGarrett D'Amore { 272*6b5e5868SGarrett D'Amore const collsym_t *c1 = n1; 273*6b5e5868SGarrett D'Amore const collsym_t *c2 = n2; 274*6b5e5868SGarrett D'Amore int rv; 275*6b5e5868SGarrett D'Amore 276*6b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name); 277*6b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 278*6b5e5868SGarrett D'Amore } 279*6b5e5868SGarrett D'Amore 280*6b5e5868SGarrett D'Amore static int 281*6b5e5868SGarrett D'Amore collundef_compare(const void *n1, const void *n2) 282*6b5e5868SGarrett D'Amore { 283*6b5e5868SGarrett D'Amore const collundef_t *c1 = n1; 284*6b5e5868SGarrett D'Amore const collundef_t *c2 = n2; 285*6b5e5868SGarrett D'Amore int rv; 286*6b5e5868SGarrett D'Amore 287*6b5e5868SGarrett D'Amore rv = strcmp(c1->name, c2->name); 288*6b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 289*6b5e5868SGarrett D'Amore } 290*6b5e5868SGarrett D'Amore 291*6b5e5868SGarrett D'Amore static int 292*6b5e5868SGarrett D'Amore element_compare_symbol(const void *n1, const void *n2) 293*6b5e5868SGarrett D'Amore { 294*6b5e5868SGarrett D'Amore const collelem_t *c1 = n1; 295*6b5e5868SGarrett D'Amore const collelem_t *c2 = n2; 296*6b5e5868SGarrett D'Amore int rv; 297*6b5e5868SGarrett D'Amore 298*6b5e5868SGarrett D'Amore rv = strcmp(c1->symbol, c2->symbol); 299*6b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 300*6b5e5868SGarrett D'Amore } 301*6b5e5868SGarrett D'Amore 302*6b5e5868SGarrett D'Amore static int 303*6b5e5868SGarrett D'Amore element_compare_expand(const void *n1, const void *n2) 304*6b5e5868SGarrett D'Amore { 305*6b5e5868SGarrett D'Amore const collelem_t *c1 = n1; 306*6b5e5868SGarrett D'Amore const collelem_t *c2 = n2; 307*6b5e5868SGarrett D'Amore int rv; 308*6b5e5868SGarrett D'Amore 309*6b5e5868SGarrett D'Amore rv = wcscmp(c1->expand, c2->expand); 310*6b5e5868SGarrett D'Amore return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 311*6b5e5868SGarrett D'Amore } 312*6b5e5868SGarrett D'Amore 313*6b5e5868SGarrett D'Amore static int 314*6b5e5868SGarrett D'Amore collchar_compare(const void *n1, const void *n2) 315*6b5e5868SGarrett D'Amore { 316*6b5e5868SGarrett D'Amore wchar_t k1 = ((const collchar_t *)n1)->wc; 317*6b5e5868SGarrett D'Amore wchar_t k2 = ((const collchar_t *)n2)->wc; 318*6b5e5868SGarrett D'Amore 319*6b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); 320*6b5e5868SGarrett D'Amore } 321*6b5e5868SGarrett D'Amore 322*6b5e5868SGarrett D'Amore static int 323*6b5e5868SGarrett D'Amore subst_compare(const void *n1, const void *n2) 324*6b5e5868SGarrett D'Amore { 325*6b5e5868SGarrett D'Amore int32_t k1 = ((const subst_t *)n1)->key; 326*6b5e5868SGarrett D'Amore int32_t k2 = ((const subst_t *)n2)->key; 327*6b5e5868SGarrett D'Amore 328*6b5e5868SGarrett D'Amore return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); 329*6b5e5868SGarrett D'Amore } 330*6b5e5868SGarrett D'Amore 331*6b5e5868SGarrett D'Amore void 332*6b5e5868SGarrett D'Amore init_collate(void) 333*6b5e5868SGarrett D'Amore { 334*6b5e5868SGarrett D'Amore int i; 335*6b5e5868SGarrett D'Amore 336*6b5e5868SGarrett D'Amore avl_create(&collsyms, collsym_compare, sizeof (collsym_t), 337*6b5e5868SGarrett D'Amore offsetof(collsym_t, avl)); 338*6b5e5868SGarrett D'Amore 339*6b5e5868SGarrett D'Amore avl_create(&collundefs, collundef_compare, sizeof (collsym_t), 340*6b5e5868SGarrett D'Amore offsetof(collundef_t, avl)); 341*6b5e5868SGarrett D'Amore 342*6b5e5868SGarrett D'Amore avl_create(&elem_by_symbol, element_compare_symbol, sizeof (collelem_t), 343*6b5e5868SGarrett D'Amore offsetof(collelem_t, avl_bysymbol)); 344*6b5e5868SGarrett D'Amore avl_create(&elem_by_expand, element_compare_expand, sizeof (collelem_t), 345*6b5e5868SGarrett D'Amore offsetof(collelem_t, avl_byexpand)); 346*6b5e5868SGarrett D'Amore 347*6b5e5868SGarrett D'Amore avl_create(&collchars, collchar_compare, sizeof (collchar_t), 348*6b5e5868SGarrett D'Amore offsetof(collchar_t, avl)); 349*6b5e5868SGarrett D'Amore 350*6b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) 351*6b5e5868SGarrett D'Amore avl_create(&substs[i], subst_compare, sizeof (subst_t), 352*6b5e5868SGarrett D'Amore offsetof(subst_t, avl)); 353*6b5e5868SGarrett D'Amore 354*6b5e5868SGarrett D'Amore (void) memset(&collinfo, 0, sizeof (collinfo)); 355*6b5e5868SGarrett D'Amore 356*6b5e5868SGarrett D'Amore /* allocate some initial priorities */ 357*6b5e5868SGarrett D'Amore pri_ignore = new_pri(); 358*6b5e5868SGarrett D'Amore 359*6b5e5868SGarrett D'Amore set_pri(pri_ignore, 0, RESOLVED); 360*6b5e5868SGarrett D'Amore 361*6b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 362*6b5e5868SGarrett D'Amore pri_undefined[i] = new_pri(); 363*6b5e5868SGarrett D'Amore 364*6b5e5868SGarrett D'Amore /* we will override this later */ 365*6b5e5868SGarrett D'Amore set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN); 366*6b5e5868SGarrett D'Amore } 367*6b5e5868SGarrett D'Amore } 368*6b5e5868SGarrett D'Amore 369*6b5e5868SGarrett D'Amore void 370*6b5e5868SGarrett D'Amore define_collsym(char *name) 371*6b5e5868SGarrett D'Amore { 372*6b5e5868SGarrett D'Amore collsym_t *sym; 373*6b5e5868SGarrett D'Amore avl_index_t where; 374*6b5e5868SGarrett D'Amore 375*6b5e5868SGarrett D'Amore if ((sym = calloc(sizeof (*sym), 1)) == NULL) { 376*6b5e5868SGarrett D'Amore errf(_("out of memory")); 377*6b5e5868SGarrett D'Amore return; 378*6b5e5868SGarrett D'Amore } 379*6b5e5868SGarrett D'Amore sym->name = name; 380*6b5e5868SGarrett D'Amore sym->ref = new_pri(); 381*6b5e5868SGarrett D'Amore 382*6b5e5868SGarrett D'Amore if (avl_find(&collsyms, sym, &where) != NULL) { 383*6b5e5868SGarrett D'Amore /* 384*6b5e5868SGarrett D'Amore * This should never happen because we are only called 385*6b5e5868SGarrett D'Amore * for undefined symbols. 386*6b5e5868SGarrett D'Amore */ 387*6b5e5868SGarrett D'Amore INTERR; 388*6b5e5868SGarrett D'Amore return; 389*6b5e5868SGarrett D'Amore } 390*6b5e5868SGarrett D'Amore avl_insert(&collsyms, sym, where); 391*6b5e5868SGarrett D'Amore } 392*6b5e5868SGarrett D'Amore 393*6b5e5868SGarrett D'Amore collsym_t * 394*6b5e5868SGarrett D'Amore lookup_collsym(char *name) 395*6b5e5868SGarrett D'Amore { 396*6b5e5868SGarrett D'Amore collsym_t srch; 397*6b5e5868SGarrett D'Amore 398*6b5e5868SGarrett D'Amore srch.name = name; 399*6b5e5868SGarrett D'Amore return (avl_find(&collsyms, &srch, NULL)); 400*6b5e5868SGarrett D'Amore } 401*6b5e5868SGarrett D'Amore 402*6b5e5868SGarrett D'Amore collelem_t * 403*6b5e5868SGarrett D'Amore lookup_collelem(char *symbol) 404*6b5e5868SGarrett D'Amore { 405*6b5e5868SGarrett D'Amore collelem_t srch; 406*6b5e5868SGarrett D'Amore 407*6b5e5868SGarrett D'Amore srch.symbol = symbol; 408*6b5e5868SGarrett D'Amore return (avl_find(&elem_by_symbol, &srch, NULL)); 409*6b5e5868SGarrett D'Amore } 410*6b5e5868SGarrett D'Amore 411*6b5e5868SGarrett D'Amore static collundef_t * 412*6b5e5868SGarrett D'Amore get_collundef(char *name) 413*6b5e5868SGarrett D'Amore { 414*6b5e5868SGarrett D'Amore collundef_t srch; 415*6b5e5868SGarrett D'Amore collundef_t *ud; 416*6b5e5868SGarrett D'Amore avl_index_t where; 417*6b5e5868SGarrett D'Amore int i; 418*6b5e5868SGarrett D'Amore 419*6b5e5868SGarrett D'Amore srch.name = name; 420*6b5e5868SGarrett D'Amore if ((ud = avl_find(&collundefs, &srch, &where)) == NULL) { 421*6b5e5868SGarrett D'Amore if (((ud = calloc(sizeof (*ud), 1)) == NULL) || 422*6b5e5868SGarrett D'Amore ((ud->name = strdup(name)) == NULL)) { 423*6b5e5868SGarrett D'Amore errf(_("out of memory")); 424*6b5e5868SGarrett D'Amore return (NULL); 425*6b5e5868SGarrett D'Amore } 426*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 427*6b5e5868SGarrett D'Amore ud->ref[i] = new_pri(); 428*6b5e5868SGarrett D'Amore } 429*6b5e5868SGarrett D'Amore avl_insert(&collundefs, ud, where); 430*6b5e5868SGarrett D'Amore } 431*6b5e5868SGarrett D'Amore add_charmap_undefined(name); 432*6b5e5868SGarrett D'Amore return (ud); 433*6b5e5868SGarrett D'Amore } 434*6b5e5868SGarrett D'Amore 435*6b5e5868SGarrett D'Amore static collchar_t * 436*6b5e5868SGarrett D'Amore get_collchar(wchar_t wc, int create) 437*6b5e5868SGarrett D'Amore { 438*6b5e5868SGarrett D'Amore collchar_t srch; 439*6b5e5868SGarrett D'Amore collchar_t *cc; 440*6b5e5868SGarrett D'Amore avl_index_t where; 441*6b5e5868SGarrett D'Amore int i; 442*6b5e5868SGarrett D'Amore 443*6b5e5868SGarrett D'Amore srch.wc = wc; 444*6b5e5868SGarrett D'Amore cc = avl_find(&collchars, &srch, &where); 445*6b5e5868SGarrett D'Amore if ((cc == NULL) && create) { 446*6b5e5868SGarrett D'Amore if ((cc = calloc(sizeof (*cc), 1)) == NULL) { 447*6b5e5868SGarrett D'Amore errf(_("out of memory")); 448*6b5e5868SGarrett D'Amore return (NULL); 449*6b5e5868SGarrett D'Amore } 450*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 451*6b5e5868SGarrett D'Amore cc->ref[i] = new_pri(); 452*6b5e5868SGarrett D'Amore } 453*6b5e5868SGarrett D'Amore cc->wc = wc; 454*6b5e5868SGarrett D'Amore avl_insert(&collchars, cc, where); 455*6b5e5868SGarrett D'Amore } 456*6b5e5868SGarrett D'Amore return (cc); 457*6b5e5868SGarrett D'Amore } 458*6b5e5868SGarrett D'Amore 459*6b5e5868SGarrett D'Amore void 460*6b5e5868SGarrett D'Amore end_order_collsym(collsym_t *sym) 461*6b5e5868SGarrett D'Amore { 462*6b5e5868SGarrett D'Amore start_order(T_COLLSYM); 463*6b5e5868SGarrett D'Amore /* update the weight */ 464*6b5e5868SGarrett D'Amore 465*6b5e5868SGarrett D'Amore set_pri(sym->ref, nextpri, RESOLVED); 466*6b5e5868SGarrett D'Amore nextpri++; 467*6b5e5868SGarrett D'Amore } 468*6b5e5868SGarrett D'Amore 469*6b5e5868SGarrett D'Amore void 470*6b5e5868SGarrett D'Amore end_order(void) 471*6b5e5868SGarrett D'Amore { 472*6b5e5868SGarrett D'Amore int i; 473*6b5e5868SGarrett D'Amore int32_t pri; 474*6b5e5868SGarrett D'Amore int32_t ref; 475*6b5e5868SGarrett D'Amore collpri_t *p; 476*6b5e5868SGarrett D'Amore 477*6b5e5868SGarrett D'Amore /* advance the priority/weight */ 478*6b5e5868SGarrett D'Amore pri = nextpri; 479*6b5e5868SGarrett D'Amore 480*6b5e5868SGarrett D'Amore switch (currorder) { 481*6b5e5868SGarrett D'Amore case T_CHAR: 482*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 483*6b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 484*6b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 485*6b5e5868SGarrett D'Amore (p->pri == -1)) { 486*6b5e5868SGarrett D'Amore /* unspecified weight is a self reference */ 487*6b5e5868SGarrett D'Amore set_pri(currchar->ref[i], pri, RESOLVED); 488*6b5e5868SGarrett D'Amore } else { 489*6b5e5868SGarrett D'Amore set_pri(currchar->ref[i], ref, REFER); 490*6b5e5868SGarrett D'Amore } 491*6b5e5868SGarrett D'Amore order_weights[i] = -1; 492*6b5e5868SGarrett D'Amore } 493*6b5e5868SGarrett D'Amore 494*6b5e5868SGarrett D'Amore /* leave a cookie trail in case next symbol is ellipsis */ 495*6b5e5868SGarrett D'Amore ellipsis_start = currchar->wc + 1; 496*6b5e5868SGarrett D'Amore currchar = NULL; 497*6b5e5868SGarrett D'Amore break; 498*6b5e5868SGarrett D'Amore 499*6b5e5868SGarrett D'Amore case T_ELLIPSIS: 500*6b5e5868SGarrett D'Amore /* save off the weights were we can find them */ 501*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 502*6b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i]; 503*6b5e5868SGarrett D'Amore order_weights[i] = -1; 504*6b5e5868SGarrett D'Amore } 505*6b5e5868SGarrett D'Amore break; 506*6b5e5868SGarrett D'Amore 507*6b5e5868SGarrett D'Amore case T_COLLELEM: 508*6b5e5868SGarrett D'Amore if (currelem == NULL) { 509*6b5e5868SGarrett D'Amore INTERR; 510*6b5e5868SGarrett D'Amore } else { 511*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 512*6b5e5868SGarrett D'Amore 513*6b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 514*6b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 515*6b5e5868SGarrett D'Amore (p->pri == -1)) { 516*6b5e5868SGarrett D'Amore set_pri(currelem->ref[i], pri, 517*6b5e5868SGarrett D'Amore RESOLVED); 518*6b5e5868SGarrett D'Amore } else { 519*6b5e5868SGarrett D'Amore set_pri(currelem->ref[i], ref, REFER); 520*6b5e5868SGarrett D'Amore } 521*6b5e5868SGarrett D'Amore order_weights[i] = -1; 522*6b5e5868SGarrett D'Amore } 523*6b5e5868SGarrett D'Amore } 524*6b5e5868SGarrett D'Amore break; 525*6b5e5868SGarrett D'Amore 526*6b5e5868SGarrett D'Amore case T_UNDEFINED: 527*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 528*6b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 529*6b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 530*6b5e5868SGarrett D'Amore (p->pri == -1)) { 531*6b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED); 532*6b5e5868SGarrett D'Amore } else { 533*6b5e5868SGarrett D'Amore set_pri(pri_undefined[i], ref, REFER); 534*6b5e5868SGarrett D'Amore } 535*6b5e5868SGarrett D'Amore order_weights[i] = -1; 536*6b5e5868SGarrett D'Amore } 537*6b5e5868SGarrett D'Amore break; 538*6b5e5868SGarrett D'Amore 539*6b5e5868SGarrett D'Amore case T_SYMBOL: 540*6b5e5868SGarrett D'Amore if (((ref = order_weights[i]) < 0) || 541*6b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 542*6b5e5868SGarrett D'Amore (p->pri == -1)) { 543*6b5e5868SGarrett D'Amore set_pri(currundef->ref[i], pri, RESOLVED); 544*6b5e5868SGarrett D'Amore } else { 545*6b5e5868SGarrett D'Amore set_pri(currundef->ref[i], ref, REFER); 546*6b5e5868SGarrett D'Amore } 547*6b5e5868SGarrett D'Amore order_weights[i] = -1; 548*6b5e5868SGarrett D'Amore break; 549*6b5e5868SGarrett D'Amore 550*6b5e5868SGarrett D'Amore default: 551*6b5e5868SGarrett D'Amore INTERR; 552*6b5e5868SGarrett D'Amore } 553*6b5e5868SGarrett D'Amore 554*6b5e5868SGarrett D'Amore nextpri++; 555*6b5e5868SGarrett D'Amore } 556*6b5e5868SGarrett D'Amore 557*6b5e5868SGarrett D'Amore static void 558*6b5e5868SGarrett D'Amore start_order(int type) 559*6b5e5868SGarrett D'Amore { 560*6b5e5868SGarrett D'Amore int i; 561*6b5e5868SGarrett D'Amore 562*6b5e5868SGarrett D'Amore lastorder = currorder; 563*6b5e5868SGarrett D'Amore currorder = type; 564*6b5e5868SGarrett D'Amore 565*6b5e5868SGarrett D'Amore /* this is used to protect ELLIPSIS processing */ 566*6b5e5868SGarrett D'Amore if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) { 567*6b5e5868SGarrett D'Amore errf(_("character value expected")); 568*6b5e5868SGarrett D'Amore } 569*6b5e5868SGarrett D'Amore 570*6b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 571*6b5e5868SGarrett D'Amore order_weights[i] = -1; 572*6b5e5868SGarrett D'Amore } 573*6b5e5868SGarrett D'Amore curr_weight = 0; 574*6b5e5868SGarrett D'Amore } 575*6b5e5868SGarrett D'Amore 576*6b5e5868SGarrett D'Amore void 577*6b5e5868SGarrett D'Amore start_order_undefined(void) 578*6b5e5868SGarrett D'Amore { 579*6b5e5868SGarrett D'Amore start_order(T_UNDEFINED); 580*6b5e5868SGarrett D'Amore } 581*6b5e5868SGarrett D'Amore 582*6b5e5868SGarrett D'Amore void 583*6b5e5868SGarrett D'Amore start_order_symbol(char *name) 584*6b5e5868SGarrett D'Amore { 585*6b5e5868SGarrett D'Amore currundef = get_collundef(name); 586*6b5e5868SGarrett D'Amore start_order(T_SYMBOL); 587*6b5e5868SGarrett D'Amore } 588*6b5e5868SGarrett D'Amore 589*6b5e5868SGarrett D'Amore void 590*6b5e5868SGarrett D'Amore start_order_char(wchar_t wc) 591*6b5e5868SGarrett D'Amore { 592*6b5e5868SGarrett D'Amore collchar_t *cc; 593*6b5e5868SGarrett D'Amore int32_t ref; 594*6b5e5868SGarrett D'Amore 595*6b5e5868SGarrett D'Amore start_order(T_CHAR); 596*6b5e5868SGarrett D'Amore 597*6b5e5868SGarrett D'Amore /* 598*6b5e5868SGarrett D'Amore * If we last saw an ellipsis, then we need to close the range. 599*6b5e5868SGarrett D'Amore * Handle that here. Note that we have to be careful because the 600*6b5e5868SGarrett D'Amore * items *inside* the range are treated exclusiveley to the items 601*6b5e5868SGarrett D'Amore * outside of the range. The ends of the range can have quite 602*6b5e5868SGarrett D'Amore * different weights than the range members. 603*6b5e5868SGarrett D'Amore */ 604*6b5e5868SGarrett D'Amore if (lastorder == T_ELLIPSIS) { 605*6b5e5868SGarrett D'Amore int i; 606*6b5e5868SGarrett D'Amore 607*6b5e5868SGarrett D'Amore if (wc < ellipsis_start) { 608*6b5e5868SGarrett D'Amore errf(_("malformed range!")); 609*6b5e5868SGarrett D'Amore return; 610*6b5e5868SGarrett D'Amore } 611*6b5e5868SGarrett D'Amore while (ellipsis_start < wc) { 612*6b5e5868SGarrett D'Amore /* 613*6b5e5868SGarrett D'Amore * pick all of the saved weights for the 614*6b5e5868SGarrett D'Amore * ellipsis. note that -1 encodes for the 615*6b5e5868SGarrett D'Amore * ellipsis itself, which means to take the 616*6b5e5868SGarrett D'Amore * current relative priority. 617*6b5e5868SGarrett D'Amore */ 618*6b5e5868SGarrett D'Amore if ((cc = get_collchar(ellipsis_start, 1)) == NULL) { 619*6b5e5868SGarrett D'Amore INTERR; 620*6b5e5868SGarrett D'Amore return; 621*6b5e5868SGarrett D'Amore } 622*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 623*6b5e5868SGarrett D'Amore collpri_t *p; 624*6b5e5868SGarrett D'Amore if (((ref = ellipsis_weights[i]) == -1) || 625*6b5e5868SGarrett D'Amore ((p = get_pri(ref)) == NULL) || 626*6b5e5868SGarrett D'Amore (p->pri == -1)) { 627*6b5e5868SGarrett D'Amore set_pri(cc->ref[i], nextpri, RESOLVED); 628*6b5e5868SGarrett D'Amore } else { 629*6b5e5868SGarrett D'Amore set_pri(cc->ref[i], ref, REFER); 630*6b5e5868SGarrett D'Amore } 631*6b5e5868SGarrett D'Amore ellipsis_weights[i] = NULL; 632*6b5e5868SGarrett D'Amore } 633*6b5e5868SGarrett D'Amore ellipsis_start++; 634*6b5e5868SGarrett D'Amore nextpri++; 635*6b5e5868SGarrett D'Amore } 636*6b5e5868SGarrett D'Amore } 637*6b5e5868SGarrett D'Amore 638*6b5e5868SGarrett D'Amore currchar = get_collchar(wc, 1); 639*6b5e5868SGarrett D'Amore } 640*6b5e5868SGarrett D'Amore 641*6b5e5868SGarrett D'Amore void 642*6b5e5868SGarrett D'Amore start_order_collelem(collelem_t *e) 643*6b5e5868SGarrett D'Amore { 644*6b5e5868SGarrett D'Amore start_order(T_COLLELEM); 645*6b5e5868SGarrett D'Amore currelem = e; 646*6b5e5868SGarrett D'Amore } 647*6b5e5868SGarrett D'Amore 648*6b5e5868SGarrett D'Amore void 649*6b5e5868SGarrett D'Amore start_order_ellipsis(void) 650*6b5e5868SGarrett D'Amore { 651*6b5e5868SGarrett D'Amore int i; 652*6b5e5868SGarrett D'Amore 653*6b5e5868SGarrett D'Amore start_order(T_ELLIPSIS); 654*6b5e5868SGarrett D'Amore 655*6b5e5868SGarrett D'Amore if (lastorder != T_CHAR) { 656*6b5e5868SGarrett D'Amore errf(_("illegal starting point for range")); 657*6b5e5868SGarrett D'Amore return; 658*6b5e5868SGarrett D'Amore } 659*6b5e5868SGarrett D'Amore 660*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 661*6b5e5868SGarrett D'Amore ellipsis_weights[i] = order_weights[i]; 662*6b5e5868SGarrett D'Amore } 663*6b5e5868SGarrett D'Amore } 664*6b5e5868SGarrett D'Amore 665*6b5e5868SGarrett D'Amore void 666*6b5e5868SGarrett D'Amore define_collelem(char *name, wchar_t *wcs) 667*6b5e5868SGarrett D'Amore { 668*6b5e5868SGarrett D'Amore collelem_t *e; 669*6b5e5868SGarrett D'Amore avl_index_t where1; 670*6b5e5868SGarrett D'Amore avl_index_t where2; 671*6b5e5868SGarrett D'Amore int i; 672*6b5e5868SGarrett D'Amore 673*6b5e5868SGarrett D'Amore if (wcslen(wcs) >= COLLATE_STR_LEN) { 674*6b5e5868SGarrett D'Amore errf(_("expanded collation element too long")); 675*6b5e5868SGarrett D'Amore return; 676*6b5e5868SGarrett D'Amore } 677*6b5e5868SGarrett D'Amore 678*6b5e5868SGarrett D'Amore if ((e = calloc(sizeof (*e), 1)) == NULL) { 679*6b5e5868SGarrett D'Amore errf(_("out of memory")); 680*6b5e5868SGarrett D'Amore return; 681*6b5e5868SGarrett D'Amore } 682*6b5e5868SGarrett D'Amore e->expand = wcs; 683*6b5e5868SGarrett D'Amore e->symbol = name; 684*6b5e5868SGarrett D'Amore 685*6b5e5868SGarrett D'Amore /* 686*6b5e5868SGarrett D'Amore * This is executed before the order statement, so we don't 687*6b5e5868SGarrett D'Amore * know how many priorities we *really* need. We allocate one 688*6b5e5868SGarrett D'Amore * for each possible weight. Not a big deal, as collating-elements 689*6b5e5868SGarrett D'Amore * prove to be quite rare. 690*6b5e5868SGarrett D'Amore */ 691*6b5e5868SGarrett D'Amore for (i = 0; i < COLL_WEIGHTS_MAX; i++) { 692*6b5e5868SGarrett D'Amore e->ref[i] = new_pri(); 693*6b5e5868SGarrett D'Amore } 694*6b5e5868SGarrett D'Amore 695*6b5e5868SGarrett D'Amore /* A character sequence can only reduce to one element. */ 696*6b5e5868SGarrett D'Amore if ((avl_find(&elem_by_symbol, e, &where1) != NULL) || 697*6b5e5868SGarrett D'Amore (avl_find(&elem_by_expand, e, &where2) != NULL)) { 698*6b5e5868SGarrett D'Amore errf(_("duplicate collating element definition")); 699*6b5e5868SGarrett D'Amore return; 700*6b5e5868SGarrett D'Amore } 701*6b5e5868SGarrett D'Amore avl_insert(&elem_by_symbol, e, where1); 702*6b5e5868SGarrett D'Amore avl_insert(&elem_by_expand, e, where2); 703*6b5e5868SGarrett D'Amore } 704*6b5e5868SGarrett D'Amore 705*6b5e5868SGarrett D'Amore void 706*6b5e5868SGarrett D'Amore add_order_bit(int kw) 707*6b5e5868SGarrett D'Amore { 708*6b5e5868SGarrett D'Amore uint8_t bit = DIRECTIVE_UNDEF; 709*6b5e5868SGarrett D'Amore 710*6b5e5868SGarrett D'Amore switch (kw) { 711*6b5e5868SGarrett D'Amore case T_FORWARD: 712*6b5e5868SGarrett D'Amore bit = DIRECTIVE_FORWARD; 713*6b5e5868SGarrett D'Amore break; 714*6b5e5868SGarrett D'Amore case T_BACKWARD: 715*6b5e5868SGarrett D'Amore bit = DIRECTIVE_BACKWARD; 716*6b5e5868SGarrett D'Amore break; 717*6b5e5868SGarrett D'Amore case T_POSITION: 718*6b5e5868SGarrett D'Amore bit = DIRECTIVE_POSITION; 719*6b5e5868SGarrett D'Amore break; 720*6b5e5868SGarrett D'Amore default: 721*6b5e5868SGarrett D'Amore INTERR; 722*6b5e5868SGarrett D'Amore break; 723*6b5e5868SGarrett D'Amore } 724*6b5e5868SGarrett D'Amore collinfo.directive[collinfo.directive_count] |= bit; 725*6b5e5868SGarrett D'Amore } 726*6b5e5868SGarrett D'Amore 727*6b5e5868SGarrett D'Amore void 728*6b5e5868SGarrett D'Amore add_order_directive(void) 729*6b5e5868SGarrett D'Amore { 730*6b5e5868SGarrett D'Amore if (collinfo.directive_count >= COLL_WEIGHTS_MAX) { 731*6b5e5868SGarrett D'Amore errf(_("too many directives (max %d)"), COLL_WEIGHTS_MAX); 732*6b5e5868SGarrett D'Amore } 733*6b5e5868SGarrett D'Amore collinfo.directive_count++; 734*6b5e5868SGarrett D'Amore } 735*6b5e5868SGarrett D'Amore 736*6b5e5868SGarrett D'Amore static void 737*6b5e5868SGarrett D'Amore add_order_pri(int32_t ref) 738*6b5e5868SGarrett D'Amore { 739*6b5e5868SGarrett D'Amore if (curr_weight >= NUM_WT) { 740*6b5e5868SGarrett D'Amore errf(_("too many weights (max %d)"), NUM_WT); 741*6b5e5868SGarrett D'Amore return; 742*6b5e5868SGarrett D'Amore } 743*6b5e5868SGarrett D'Amore order_weights[curr_weight] = ref; 744*6b5e5868SGarrett D'Amore curr_weight++; 745*6b5e5868SGarrett D'Amore } 746*6b5e5868SGarrett D'Amore 747*6b5e5868SGarrett D'Amore void 748*6b5e5868SGarrett D'Amore add_order_collsym(collsym_t *s) 749*6b5e5868SGarrett D'Amore { 750*6b5e5868SGarrett D'Amore add_order_pri(s->ref); 751*6b5e5868SGarrett D'Amore } 752*6b5e5868SGarrett D'Amore 753*6b5e5868SGarrett D'Amore void 754*6b5e5868SGarrett D'Amore add_order_char(wchar_t wc) 755*6b5e5868SGarrett D'Amore { 756*6b5e5868SGarrett D'Amore collchar_t *cc; 757*6b5e5868SGarrett D'Amore 758*6b5e5868SGarrett D'Amore if ((cc = get_collchar(wc, 1)) == NULL) { 759*6b5e5868SGarrett D'Amore INTERR; 760*6b5e5868SGarrett D'Amore return; 761*6b5e5868SGarrett D'Amore } 762*6b5e5868SGarrett D'Amore 763*6b5e5868SGarrett D'Amore add_order_pri(cc->ref[curr_weight]); 764*6b5e5868SGarrett D'Amore } 765*6b5e5868SGarrett D'Amore 766*6b5e5868SGarrett D'Amore void 767*6b5e5868SGarrett D'Amore add_order_collelem(collelem_t *e) 768*6b5e5868SGarrett D'Amore { 769*6b5e5868SGarrett D'Amore add_order_pri(e->ref[curr_weight]); 770*6b5e5868SGarrett D'Amore } 771*6b5e5868SGarrett D'Amore 772*6b5e5868SGarrett D'Amore void 773*6b5e5868SGarrett D'Amore add_order_ignore(void) 774*6b5e5868SGarrett D'Amore { 775*6b5e5868SGarrett D'Amore add_order_pri(pri_ignore); 776*6b5e5868SGarrett D'Amore } 777*6b5e5868SGarrett D'Amore 778*6b5e5868SGarrett D'Amore void 779*6b5e5868SGarrett D'Amore add_order_symbol(char *sym) 780*6b5e5868SGarrett D'Amore { 781*6b5e5868SGarrett D'Amore collundef_t *c; 782*6b5e5868SGarrett D'Amore if ((c = get_collundef(sym)) == NULL) { 783*6b5e5868SGarrett D'Amore INTERR; 784*6b5e5868SGarrett D'Amore return; 785*6b5e5868SGarrett D'Amore } 786*6b5e5868SGarrett D'Amore add_order_pri(c->ref[curr_weight]); 787*6b5e5868SGarrett D'Amore } 788*6b5e5868SGarrett D'Amore 789*6b5e5868SGarrett D'Amore void 790*6b5e5868SGarrett D'Amore add_order_ellipsis(void) 791*6b5e5868SGarrett D'Amore { 792*6b5e5868SGarrett D'Amore /* special NULL value indicates self reference */ 793*6b5e5868SGarrett D'Amore add_order_pri(NULL); 794*6b5e5868SGarrett D'Amore } 795*6b5e5868SGarrett D'Amore 796*6b5e5868SGarrett D'Amore void 797*6b5e5868SGarrett D'Amore add_order_subst(void) 798*6b5e5868SGarrett D'Amore { 799*6b5e5868SGarrett D'Amore subst_t *s; 800*6b5e5868SGarrett D'Amore avl_index_t where; 801*6b5e5868SGarrett D'Amore int i; 802*6b5e5868SGarrett D'Amore 803*6b5e5868SGarrett D'Amore if ((s = calloc(sizeof (*s), 1)) == NULL) { 804*6b5e5868SGarrett D'Amore errf(_("out of memory")); 805*6b5e5868SGarrett D'Amore return; 806*6b5e5868SGarrett D'Amore } 807*6b5e5868SGarrett D'Amore s->key = new_pri(); 808*6b5e5868SGarrett D'Amore 809*6b5e5868SGarrett D'Amore /* 810*6b5e5868SGarrett D'Amore * We use a self reference for our key, but we set a high bit 811*6b5e5868SGarrett D'Amore * to indicate that this is a substitution reference. This 812*6b5e5868SGarrett D'Amore * will expedite table lookups later, and prevent table 813*6b5e5868SGarrett D'Amore * lookups for situations that don't require it. (In short, 814*6b5e5868SGarrett D'Amore * its a big win, because we can skip a lot of binary 815*6b5e5868SGarrett D'Amore * searching.) 816*6b5e5868SGarrett D'Amore */ 817*6b5e5868SGarrett D'Amore set_pri(s->key, (nextpri | COLLATE_SUBST_PRIORITY), RESOLVED); 818*6b5e5868SGarrett D'Amore 819*6b5e5868SGarrett D'Amore for (i = 0; i < curr_subst; i++) { 820*6b5e5868SGarrett D'Amore s->ref[i] = subst_weights[i]; 821*6b5e5868SGarrett D'Amore subst_weights[i] = 0; 822*6b5e5868SGarrett D'Amore } 823*6b5e5868SGarrett D'Amore curr_subst = 0; 824*6b5e5868SGarrett D'Amore 825*6b5e5868SGarrett D'Amore if (avl_find(&substs[curr_weight], s, &where) != NULL) { 826*6b5e5868SGarrett D'Amore INTERR; 827*6b5e5868SGarrett D'Amore return; 828*6b5e5868SGarrett D'Amore } 829*6b5e5868SGarrett D'Amore avl_insert(&substs[curr_weight], s, where); 830*6b5e5868SGarrett D'Amore 831*6b5e5868SGarrett D'Amore /* 832*6b5e5868SGarrett D'Amore * We are using the current (unique) priority as a search key 833*6b5e5868SGarrett D'Amore * in the substitution table. 834*6b5e5868SGarrett D'Amore */ 835*6b5e5868SGarrett D'Amore add_order_pri(s->key); 836*6b5e5868SGarrett D'Amore } 837*6b5e5868SGarrett D'Amore 838*6b5e5868SGarrett D'Amore static void 839*6b5e5868SGarrett D'Amore add_subst_pri(int32_t ref) 840*6b5e5868SGarrett D'Amore { 841*6b5e5868SGarrett D'Amore if (curr_subst >= COLLATE_STR_LEN) { 842*6b5e5868SGarrett D'Amore errf(_("substitution string is too long")); 843*6b5e5868SGarrett D'Amore return; 844*6b5e5868SGarrett D'Amore } 845*6b5e5868SGarrett D'Amore subst_weights[curr_subst] = ref; 846*6b5e5868SGarrett D'Amore curr_subst++; 847*6b5e5868SGarrett D'Amore } 848*6b5e5868SGarrett D'Amore 849*6b5e5868SGarrett D'Amore void 850*6b5e5868SGarrett D'Amore add_subst_char(wchar_t wc) 851*6b5e5868SGarrett D'Amore { 852*6b5e5868SGarrett D'Amore collchar_t *cc; 853*6b5e5868SGarrett D'Amore 854*6b5e5868SGarrett D'Amore 855*6b5e5868SGarrett D'Amore if (((cc = get_collchar(wc, 1)) == NULL) || 856*6b5e5868SGarrett D'Amore (cc->wc != wc)) { 857*6b5e5868SGarrett D'Amore INTERR; 858*6b5e5868SGarrett D'Amore return; 859*6b5e5868SGarrett D'Amore } 860*6b5e5868SGarrett D'Amore /* we take the weight for the character at that position */ 861*6b5e5868SGarrett D'Amore add_subst_pri(cc->ref[curr_weight]); 862*6b5e5868SGarrett D'Amore } 863*6b5e5868SGarrett D'Amore 864*6b5e5868SGarrett D'Amore void 865*6b5e5868SGarrett D'Amore add_subst_collelem(collelem_t *e) 866*6b5e5868SGarrett D'Amore { 867*6b5e5868SGarrett D'Amore add_subst_pri(e->ref[curr_weight]); 868*6b5e5868SGarrett D'Amore } 869*6b5e5868SGarrett D'Amore 870*6b5e5868SGarrett D'Amore void 871*6b5e5868SGarrett D'Amore add_subst_collsym(collsym_t *s) 872*6b5e5868SGarrett D'Amore { 873*6b5e5868SGarrett D'Amore add_subst_pri(s->ref); 874*6b5e5868SGarrett D'Amore } 875*6b5e5868SGarrett D'Amore 876*6b5e5868SGarrett D'Amore void 877*6b5e5868SGarrett D'Amore add_subst_symbol(char *ptr) 878*6b5e5868SGarrett D'Amore { 879*6b5e5868SGarrett D'Amore collundef_t *cu; 880*6b5e5868SGarrett D'Amore 881*6b5e5868SGarrett D'Amore if ((cu = get_collundef(ptr)) != NULL) { 882*6b5e5868SGarrett D'Amore add_subst_pri(cu->ref[curr_weight]); 883*6b5e5868SGarrett D'Amore } 884*6b5e5868SGarrett D'Amore } 885*6b5e5868SGarrett D'Amore 886*6b5e5868SGarrett D'Amore void 887*6b5e5868SGarrett D'Amore dump_collate(void) 888*6b5e5868SGarrett D'Amore { 889*6b5e5868SGarrett D'Amore FILE *f; 890*6b5e5868SGarrett D'Amore int i, j, n; 891*6b5e5868SGarrett D'Amore size_t sz; 892*6b5e5868SGarrett D'Amore int32_t pri; 893*6b5e5868SGarrett D'Amore collelem_t *ce; 894*6b5e5868SGarrett D'Amore collchar_t *cc; 895*6b5e5868SGarrett D'Amore subst_t *sb; 896*6b5e5868SGarrett D'Amore char vers[COLLATE_STR_LEN]; 897*6b5e5868SGarrett D'Amore collate_char_pri_t char_pri[UCHAR_MAX + 1]; 898*6b5e5868SGarrett D'Amore collate_large_pri_t *large; 899*6b5e5868SGarrett D'Amore collate_subst_t *subst[COLL_WEIGHTS_MAX]; 900*6b5e5868SGarrett D'Amore collate_chain_pri_t *chain; 901*6b5e5868SGarrett D'Amore 902*6b5e5868SGarrett D'Amore (void) memset(&char_pri, 0, sizeof (char_pri)); 903*6b5e5868SGarrett D'Amore (void) memset(vers, 0, COLLATE_STR_LEN); 904*6b5e5868SGarrett D'Amore (void) snprintf(vers, sizeof (vers), COLLATE_VERSION); 905*6b5e5868SGarrett D'Amore 906*6b5e5868SGarrett D'Amore /* 907*6b5e5868SGarrett D'Amore * We need to make sure we arrange for the UNDEFINED field 908*6b5e5868SGarrett D'Amore * to show up. 909*6b5e5868SGarrett D'Amore */ 910*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 911*6b5e5868SGarrett D'Amore if (resolve_pri(pri_undefined[i]) == -1) { 912*6b5e5868SGarrett D'Amore set_pri(pri_undefined[i], -1, RESOLVED); 913*6b5e5868SGarrett D'Amore /* they collate at the end of everything else */ 914*6b5e5868SGarrett D'Amore collinfo.undef_pri[i] = COLLATE_MAX_PRIORITY; 915*6b5e5868SGarrett D'Amore } 916*6b5e5868SGarrett D'Amore } 917*6b5e5868SGarrett D'Amore 918*6b5e5868SGarrett D'Amore collinfo.undef_pri[NUM_WT] = COLLATE_MAX_PRIORITY; 919*6b5e5868SGarrett D'Amore collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED; 920*6b5e5868SGarrett D'Amore 921*6b5e5868SGarrett D'Amore /* 922*6b5e5868SGarrett D'Amore * Ordinary character priorities 923*6b5e5868SGarrett D'Amore */ 924*6b5e5868SGarrett D'Amore for (i = 0; i <= UCHAR_MAX; i++) { 925*6b5e5868SGarrett D'Amore if ((cc = get_collchar(i, 0)) != NULL) { 926*6b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 927*6b5e5868SGarrett D'Amore char_pri[i].pri[j] = resolve_pri(cc->ref[j]); 928*6b5e5868SGarrett D'Amore } 929*6b5e5868SGarrett D'Amore } else { 930*6b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 931*6b5e5868SGarrett D'Amore char_pri[i].pri[j] = 932*6b5e5868SGarrett D'Amore resolve_pri(pri_undefined[j]); 933*6b5e5868SGarrett D'Amore } 934*6b5e5868SGarrett D'Amore /* 935*6b5e5868SGarrett D'Amore * Per POSIX, for undefined characters, we 936*6b5e5868SGarrett D'Amore * also have to add a last item, which is the 937*6b5e5868SGarrett D'Amore * character code. 938*6b5e5868SGarrett D'Amore */ 939*6b5e5868SGarrett D'Amore char_pri[i].pri[NUM_WT] = i; 940*6b5e5868SGarrett D'Amore } 941*6b5e5868SGarrett D'Amore } 942*6b5e5868SGarrett D'Amore 943*6b5e5868SGarrett D'Amore /* 944*6b5e5868SGarrett D'Amore * Substitution tables 945*6b5e5868SGarrett D'Amore */ 946*6b5e5868SGarrett D'Amore for (i = 0; i < collinfo.directive_count; i++) { 947*6b5e5868SGarrett D'Amore collate_subst_t *st = NULL; 948*6b5e5868SGarrett D'Amore n = collinfo.subst_count[i] = avl_numnodes(&substs[i]); 949*6b5e5868SGarrett D'Amore if ((st = calloc(sizeof (collate_subst_t) * n, 1)) == NULL) { 950*6b5e5868SGarrett D'Amore errf(_("out of memory")); 951*6b5e5868SGarrett D'Amore return; 952*6b5e5868SGarrett D'Amore } 953*6b5e5868SGarrett D'Amore n = 0; 954*6b5e5868SGarrett D'Amore for (sb = avl_first(&substs[i]); sb; 955*6b5e5868SGarrett D'Amore sb = AVL_NEXT(&substs[i], sb)) { 956*6b5e5868SGarrett D'Amore if ((st[n].key = resolve_pri(sb->key)) < 0) { 957*6b5e5868SGarrett D'Amore /* by definition these resolve! */ 958*6b5e5868SGarrett D'Amore INTERR; 959*6b5e5868SGarrett D'Amore } 960*6b5e5868SGarrett D'Amore for (j = 0; sb->ref[j]; j++) { 961*6b5e5868SGarrett D'Amore st[n].pri[j] = resolve_pri(sb->ref[j]); 962*6b5e5868SGarrett D'Amore } 963*6b5e5868SGarrett D'Amore n++; 964*6b5e5868SGarrett D'Amore } 965*6b5e5868SGarrett D'Amore if (n != collinfo.subst_count[i]) 966*6b5e5868SGarrett D'Amore INTERR; 967*6b5e5868SGarrett D'Amore subst[i] = st; 968*6b5e5868SGarrett D'Amore } 969*6b5e5868SGarrett D'Amore 970*6b5e5868SGarrett D'Amore /* 971*6b5e5868SGarrett D'Amore * Chains, i.e. collating elements 972*6b5e5868SGarrett D'Amore */ 973*6b5e5868SGarrett D'Amore collinfo.chain_count = avl_numnodes(&elem_by_expand); 974*6b5e5868SGarrett D'Amore chain = calloc(sizeof (collate_chain_pri_t), collinfo.chain_count); 975*6b5e5868SGarrett D'Amore if (chain == NULL) { 976*6b5e5868SGarrett D'Amore errf(_("out of memory")); 977*6b5e5868SGarrett D'Amore return; 978*6b5e5868SGarrett D'Amore } 979*6b5e5868SGarrett D'Amore for (n = 0, ce = avl_first(&elem_by_expand); 980*6b5e5868SGarrett D'Amore ce != NULL; 981*6b5e5868SGarrett D'Amore ce = AVL_NEXT(&elem_by_expand, ce), n++) { 982*6b5e5868SGarrett D'Amore (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN); 983*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 984*6b5e5868SGarrett D'Amore chain[n].pri[i] = resolve_pri(ce->ref[i]); 985*6b5e5868SGarrett D'Amore } 986*6b5e5868SGarrett D'Amore } 987*6b5e5868SGarrett D'Amore if (n != collinfo.chain_count) 988*6b5e5868SGarrett D'Amore INTERR; 989*6b5e5868SGarrett D'Amore 990*6b5e5868SGarrett D'Amore /* 991*6b5e5868SGarrett D'Amore * Large (> UCHAR_MAX) character priorities 992*6b5e5868SGarrett D'Amore */ 993*6b5e5868SGarrett D'Amore large = calloc(sizeof (collate_large_pri_t) * avl_numnodes(&collchars), 994*6b5e5868SGarrett D'Amore 1); 995*6b5e5868SGarrett D'Amore if (large == NULL) { 996*6b5e5868SGarrett D'Amore errf(_("out of memory")); 997*6b5e5868SGarrett D'Amore return; 998*6b5e5868SGarrett D'Amore } 999*6b5e5868SGarrett D'Amore 1000*6b5e5868SGarrett D'Amore i = 0; 1001*6b5e5868SGarrett D'Amore for (cc = avl_first(&collchars); cc; cc = AVL_NEXT(&collchars, cc)) { 1002*6b5e5868SGarrett D'Amore int undef = 0; 1003*6b5e5868SGarrett D'Amore /* we already gathered those */ 1004*6b5e5868SGarrett D'Amore if (cc->wc <= UCHAR_MAX) 1005*6b5e5868SGarrett D'Amore continue; 1006*6b5e5868SGarrett D'Amore for (j = 0; j < NUM_WT; j++) { 1007*6b5e5868SGarrett D'Amore if ((pri = resolve_pri(cc->ref[j])) < 0) { 1008*6b5e5868SGarrett D'Amore undef = 1; 1009*6b5e5868SGarrett D'Amore } 1010*6b5e5868SGarrett D'Amore if (undef && (pri >= 0)) { 1011*6b5e5868SGarrett D'Amore /* if undefined, then all priorities are */ 1012*6b5e5868SGarrett D'Amore INTERR; 1013*6b5e5868SGarrett D'Amore } else { 1014*6b5e5868SGarrett D'Amore large[i].pri.pri[j] = pri; 1015*6b5e5868SGarrett D'Amore } 1016*6b5e5868SGarrett D'Amore } 1017*6b5e5868SGarrett D'Amore if (!undef) { 1018*6b5e5868SGarrett D'Amore large[i].val = cc->wc; 1019*6b5e5868SGarrett D'Amore collinfo.large_pri_count = i++; 1020*6b5e5868SGarrett D'Amore } 1021*6b5e5868SGarrett D'Amore } 1022*6b5e5868SGarrett D'Amore 1023*6b5e5868SGarrett D'Amore if ((f = open_category()) == NULL) { 1024*6b5e5868SGarrett D'Amore return; 1025*6b5e5868SGarrett D'Amore } 1026*6b5e5868SGarrett D'Amore 1027*6b5e5868SGarrett D'Amore /* Time to write the entire data set out */ 1028*6b5e5868SGarrett D'Amore 1029*6b5e5868SGarrett D'Amore if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || 1030*6b5e5868SGarrett D'Amore (wr_category(&collinfo, sizeof (collinfo), f) < 0) || 1031*6b5e5868SGarrett D'Amore (wr_category(&char_pri, sizeof (char_pri), f) < 0)) { 1032*6b5e5868SGarrett D'Amore return; 1033*6b5e5868SGarrett D'Amore } 1034*6b5e5868SGarrett D'Amore 1035*6b5e5868SGarrett D'Amore for (i = 0; i < NUM_WT; i++) { 1036*6b5e5868SGarrett D'Amore sz = sizeof (collate_subst_t) * collinfo.subst_count[i]; 1037*6b5e5868SGarrett D'Amore if (wr_category(subst[i], sz, f) < 0) { 1038*6b5e5868SGarrett D'Amore return; 1039*6b5e5868SGarrett D'Amore } 1040*6b5e5868SGarrett D'Amore } 1041*6b5e5868SGarrett D'Amore sz = sizeof (collate_chain_pri_t) * collinfo.chain_count; 1042*6b5e5868SGarrett D'Amore if (wr_category(chain, sz, f) < 0) { 1043*6b5e5868SGarrett D'Amore return; 1044*6b5e5868SGarrett D'Amore } 1045*6b5e5868SGarrett D'Amore sz = sizeof (collate_large_pri_t) * collinfo.large_pri_count; 1046*6b5e5868SGarrett D'Amore if (wr_category(large, sz, f) < 0) { 1047*6b5e5868SGarrett D'Amore return; 1048*6b5e5868SGarrett D'Amore } 1049*6b5e5868SGarrett D'Amore 1050*6b5e5868SGarrett D'Amore close_category(f); 1051*6b5e5868SGarrett D'Amore } 1052