xref: /freebsd/crypto/krb5/src/lib/krb5/unicode/ucdata/ucgendat.c (revision 7f2fe78b9dd5f51c821d771b63d2e096f6fd49e9)
1 /*
2  * Copyright 1998-2008 The OpenLDAP Foundation.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted only as authorized by the OpenLDAP
7  * Public License.
8  *
9  * A copy of this license is available in file LICENSE in the
10  * top-level directory of the distribution or, alternatively, at
11  * <https://www.OpenLDAP.org/license.html>.
12  */
13 /* Copyright 2001 Computing Research Labs, New Mexico State University
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a
16  * copy of this software and associated documentation files (the "Software"),
17  * to deal in the Software without restriction, including without limitation
18  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19  * and/or sell copies of the Software, and to permit persons to whom the
20  * Software is furnished to do so, subject to the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
28  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
29  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
30  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
31  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32  */
33 
34 /*
35  * This work is part of OpenLDAP Software <https://www.openldap.org/>.
36  * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucgendat.c,v 1.43 2008/01/07 23:20:05 kurt Exp $
37  * $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $"
38  */
39 
40 #include "k5-int.h"
41 #include "k5-utf8.h"
42 #include "k5-unicode.h"
43 
44 #ifndef HARDCODE_DATA
45 #define	HARDCODE_DATA	1
46 #endif
47 
48 #undef ishdigit
49 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
50                       ((cc) >= 'A' && (cc) <= 'F') ||\
51                       ((cc) >= 'a' && (cc) <= 'f'))
52 
53 /*
54  * A header written to the output file with the byte-order-mark and the number
55  * of property nodes.
56  */
57 static krb5_ui_2 hdr[2] = {0xfeff, 0};
58 
59 #define NUMPROPS 50
60 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
61 
62 typedef struct {
63     char *name;
64     int len;
65 } _prop_t;
66 
67 /*
68  * List of properties expected to be found in the Unicode Character Database
69  * including some implementation specific properties.
70  *
71  * The implementation specific properties are:
72  * Cm = Composed (can be decomposed)
73  * Nb = Non-breaking
74  * Sy = Symmetric (has left and right forms)
75  * Hd = Hex digit
76  * Qm = Quote marks
77  * Mr = Mirroring
78  * Ss = Space, other
79  * Cp = Defined character
80  */
81 static _prop_t props[NUMPROPS] = {
82     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
83     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
84     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
85     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
86     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
87     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
88     {"S",  1}, {"WS", 2}, {"ON", 2},
89     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
90     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
91 };
92 
93 typedef struct {
94     krb5_ui_4 *ranges;
95     krb5_ui_2 used;
96     krb5_ui_2 size;
97 } _ranges_t;
98 
99 static _ranges_t proptbl[NUMPROPS];
100 
101 /*
102  * Make sure this array is sized to be on a 4-byte boundary at compile time.
103  */
104 static krb5_ui_2 propcnt[NEEDPROPS];
105 
106 /*
107  * Array used to collect a decomposition before adding it to the decomposition
108  * table.
109  */
110 static krb5_ui_4 dectmp[64];
111 static krb5_ui_4 dectmp_size;
112 
113 typedef struct {
114     krb5_ui_4 code;
115     krb5_ui_2 size;
116     krb5_ui_2 used;
117     krb5_ui_4 *decomp;
118 } _decomp_t;
119 
120 /*
121  * List of decomposition.  Created and expanded in order as the characters are
122  * encountered. First list contains canonical mappings, second also includes
123  * compatibility mappings.
124  */
125 static _decomp_t *decomps;
126 static krb5_ui_4 decomps_used;
127 static krb5_ui_4 decomps_size;
128 
129 static _decomp_t *kdecomps;
130 static krb5_ui_4 kdecomps_used;
131 static krb5_ui_4 kdecomps_size;
132 
133 /*
134  * Composition exclusion table stuff.
135  */
136 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
137 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
138 static krb5_ui_4 compexs[8192];
139 
140 /*
141  * Struct for holding a composition pair, and array of composition pairs
142  */
143 typedef struct {
144     krb5_ui_4 comp;
145     krb5_ui_4 count;
146     krb5_ui_4 code1;
147     krb5_ui_4 code2;
148 } _comp_t;
149 
150 static _comp_t *comps;
151 static krb5_ui_4 comps_used;
152 
153 /*
154  * Types and lists for handling lists of case mappings.
155  */
156 typedef struct {
157     krb5_ui_4 key;
158     krb5_ui_4 other1;
159     krb5_ui_4 other2;
160 } _case_t;
161 
162 static _case_t *upper;
163 static _case_t *lower;
164 static _case_t *title;
165 static krb5_ui_4 upper_used;
166 static krb5_ui_4 upper_size;
167 static krb5_ui_4 lower_used;
168 static krb5_ui_4 lower_size;
169 static krb5_ui_4 title_used;
170 static krb5_ui_4 title_size;
171 
172 /*
173  * Array used to collect case mappings before adding them to a list.
174  */
175 static krb5_ui_4 cases[3];
176 
177 /*
178  * An array to hold ranges for combining classes.
179  */
180 static krb5_ui_4 *ccl;
181 static krb5_ui_4 ccl_used;
182 static krb5_ui_4 ccl_size;
183 
184 /*
185  * Structures for handling numbers.
186  */
187 typedef struct {
188     krb5_ui_4 code;
189     krb5_ui_4 idx;
190 } _codeidx_t;
191 
192 typedef struct {
193     short numerator;
194     short denominator;
195 } _num_t;
196 
197 /*
198  * Arrays to hold the mapping of codes to numbers.
199  */
200 static _codeidx_t *ncodes;
201 static krb5_ui_4 ncodes_used;
202 static krb5_ui_4 ncodes_size;
203 
204 static _num_t *nums;
205 static krb5_ui_4 nums_used;
206 static krb5_ui_4 nums_size;
207 
208 /*
209  * Array for holding numbers.
210  */
211 static _num_t *nums;
212 static krb5_ui_4 nums_used;
213 static krb5_ui_4 nums_size;
214 
215 static void
add_range(krb5_ui_4 start,krb5_ui_4 end,char * p1,char * p2)216 add_range(krb5_ui_4 start, krb5_ui_4 end, char *p1, char *p2)
217 {
218     int i, j, k, len;
219     _ranges_t *rlp;
220     char *name;
221 
222     for (k = 0; k < 2; k++) {
223         if (k == 0) {
224             name = p1;
225             len = 2;
226         } else {
227             if (p2 == 0)
228               break;
229 
230             name = p2;
231             len = 1;
232         }
233 
234         for (i = 0; i < NUMPROPS; i++) {
235             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
236               break;
237         }
238 
239         if (i == NUMPROPS)
240           continue;
241 
242         rlp = &proptbl[i];
243 
244         /*
245          * Resize the range list if necessary.
246          */
247         if (rlp->used == rlp->size) {
248             if (rlp->size == 0)
249               rlp->ranges = (krb5_ui_4 *)
250                   malloc(sizeof(krb5_ui_4) << 3);
251             else
252               rlp->ranges = (krb5_ui_4 *)
253                   realloc((char *) rlp->ranges,
254                           sizeof(krb5_ui_4) * (rlp->size + 8));
255             rlp->size += 8;
256         }
257 
258         /*
259          * If this is the first code for this property list, just add it
260          * and return.
261          */
262         if (rlp->used == 0) {
263             rlp->ranges[0] = start;
264             rlp->ranges[1] = end;
265             rlp->used += 2;
266             continue;
267         }
268 
269         /*
270          * Optimize the case of adding the range to the end.
271          */
272         j = rlp->used - 1;
273         if (start > rlp->ranges[j]) {
274             j = rlp->used;
275             rlp->ranges[j++] = start;
276             rlp->ranges[j++] = end;
277             rlp->used = j;
278             continue;
279         }
280 
281         /*
282          * Need to locate the insertion point.
283          */
284         for (i = 0;
285              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
286 
287         /*
288          * If the start value lies in the current range, then simply set the
289          * new end point of the range to the end value passed as a parameter.
290          */
291         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
292             rlp->ranges[i + 1] = end;
293             return;
294         }
295 
296         /*
297          * Shift following values up by two.
298          */
299         for (j = rlp->used; j > i; j -= 2) {
300             rlp->ranges[j] = rlp->ranges[j - 2];
301             rlp->ranges[j + 1] = rlp->ranges[j - 1];
302         }
303 
304         /*
305          * Add the new range at the insertion point.
306          */
307         rlp->ranges[i] = start;
308         rlp->ranges[i + 1] = end;
309         rlp->used += 2;
310     }
311 }
312 
313 static void
ordered_range_insert(krb5_ui_4 c,char * name,int len)314 ordered_range_insert(krb5_ui_4 c, char *name, int len)
315 {
316     int i, j;
317     krb5_ui_4 s, e;
318     _ranges_t *rlp;
319 
320     if (len == 0)
321       return;
322 
323     /*
324      * Deal with directionality codes introduced in Unicode 3.0.
325      */
326     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
327         (len == 3 &&
328          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
329           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
330           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
331         /*
332          * Mark all of these as Other Neutral to preserve compatibility with
333          * older versions.
334          */
335         len = 2;
336         name = "ON";
337     }
338 
339     for (i = 0; i < NUMPROPS; i++) {
340         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
341           break;
342     }
343 
344     if (i == NUMPROPS)
345       return;
346 
347     /*
348      * Have a match, so insert the code in order.
349      */
350     rlp = &proptbl[i];
351 
352     /*
353      * Resize the range list if necessary.
354      */
355     if (rlp->used == rlp->size) {
356         if (rlp->size == 0)
357           rlp->ranges = (krb5_ui_4 *)
358               malloc(sizeof(krb5_ui_4) << 3);
359         else
360           rlp->ranges = (krb5_ui_4 *)
361               realloc((char *) rlp->ranges,
362                       sizeof(krb5_ui_4) * (rlp->size + 8));
363         rlp->size += 8;
364     }
365 
366     /*
367      * If this is the first code for this property list, just add it
368      * and return.
369      */
370     if (rlp->used == 0) {
371         rlp->ranges[0] = rlp->ranges[1] = c;
372         rlp->used += 2;
373         return;
374     }
375 
376     /*
377      * Optimize the cases of extending the last range and adding new ranges to
378      * the end.
379      */
380     j = rlp->used - 1;
381     e = rlp->ranges[j];
382     s = rlp->ranges[j - 1];
383 
384     if (c == e + 1) {
385         /*
386          * Extend the last range.
387          */
388         rlp->ranges[j] = c;
389         return;
390     }
391 
392     if (c > e + 1) {
393         /*
394          * Start another range on the end.
395          */
396         j = rlp->used;
397         rlp->ranges[j] = rlp->ranges[j + 1] = c;
398         rlp->used += 2;
399         return;
400     }
401 
402     if (c >= s)
403       /*
404        * The code is a duplicate of a code in the last range, so just return.
405        */
406       return;
407 
408     /*
409      * The code should be inserted somewhere before the last range in the
410      * list.  Locate the insertion point.
411      */
412     for (i = 0;
413          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
414 
415     s = rlp->ranges[i];
416     e = rlp->ranges[i + 1];
417 
418     if (c == e + 1)
419       /*
420        * Simply extend the current range.
421        */
422       rlp->ranges[i + 1] = c;
423     else if (c < s) {
424         /*
425          * Add a new entry before the current location.  Shift all entries
426          * before the current one up by one to make room.
427          */
428         for (j = rlp->used; j > i; j -= 2) {
429             rlp->ranges[j] = rlp->ranges[j - 2];
430             rlp->ranges[j + 1] = rlp->ranges[j - 1];
431         }
432         rlp->ranges[i] = rlp->ranges[i + 1] = c;
433 
434         rlp->used += 2;
435     }
436 }
437 
438 static void
add_decomp(krb5_ui_4 code,short compat)439 add_decomp(krb5_ui_4 code, short compat)
440 {
441     krb5_ui_4 i, j, size;
442     _decomp_t **pdecomps;
443     krb5_ui_4 *pdecomps_used;
444     krb5_ui_4 *pdecomps_size;
445 
446     if (compat) {
447 	pdecomps = &kdecomps;
448 	pdecomps_used = &kdecomps_used;
449 	pdecomps_size = &kdecomps_size;
450     } else {
451 	pdecomps = &decomps;
452 	pdecomps_used = &decomps_used;
453 	pdecomps_size = &decomps_size;
454     }
455 
456     /*
457      * Add the code to the composite property.
458      */
459     if (!compat) {
460 	ordered_range_insert(code, "Cm", 2);
461     }
462 
463     /*
464      * Locate the insertion point for the code.
465      */
466     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
467 
468     /*
469      * Allocate space for a new decomposition.
470      */
471     if (*pdecomps_used == *pdecomps_size) {
472         if (*pdecomps_size == 0)
473           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
474         else
475           *pdecomps = (_decomp_t *)
476               realloc((char *) *pdecomps,
477                       sizeof(_decomp_t) * (*pdecomps_size + 8));
478         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
479                       sizeof(_decomp_t) << 3);
480         *pdecomps_size += 8;
481     }
482 
483     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
484         /*
485          * Shift the decomps up by one if the codes don't match.
486          */
487         for (j = *pdecomps_used; j > i; j--)
488           (void) memmove((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
489                          sizeof(_decomp_t));
490     }
491 
492     /*
493      * Insert or replace a decomposition.
494      */
495     size = dectmp_size + (4 - (dectmp_size & 3));
496     if ((*pdecomps)[i].size < size) {
497         if ((*pdecomps)[i].size == 0)
498           (*pdecomps)[i].decomp = (krb5_ui_4 *)
499               malloc(sizeof(krb5_ui_4) * size);
500         else
501           (*pdecomps)[i].decomp = (krb5_ui_4 *)
502               realloc((char *) (*pdecomps)[i].decomp,
503                       sizeof(krb5_ui_4) * size);
504         (*pdecomps)[i].size = size;
505     }
506 
507     if ((*pdecomps)[i].code != code)
508       (*pdecomps_used)++;
509 
510     (*pdecomps)[i].code = code;
511     (*pdecomps)[i].used = dectmp_size;
512     (void) memmove((char *) (*pdecomps)[i].decomp, (char *) dectmp,
513                    sizeof(krb5_ui_4) * dectmp_size);
514 
515     /*
516      * NOTICE: This needs changing later so it is more general than simply
517      * pairs.  This calculation is done here to simplify allocation elsewhere.
518      */
519     if (!compat && dectmp_size == 2)
520       comps_used++;
521 }
522 
523 static void
add_title(krb5_ui_4 code)524 add_title(krb5_ui_4 code)
525 {
526     krb5_ui_4 i, j;
527 
528     /*
529      * Always map the code to itself.
530      */
531     cases[2] = code;
532 
533     if (title_used == title_size) {
534         if (title_size == 0)
535           title = (_case_t *) malloc(sizeof(_case_t) << 3);
536         else
537           title = (_case_t *) realloc((char *) title,
538                                       sizeof(_case_t) * (title_size + 8));
539         title_size += 8;
540     }
541 
542     /*
543      * Locate the insertion point.
544      */
545     for (i = 0; i < title_used && code > title[i].key; i++) ;
546 
547     if (i < title_used) {
548         /*
549          * Shift the array up by one.
550          */
551         for (j = title_used; j > i; j--)
552           (void) memmove((char *) &title[j], (char *) &title[j - 1],
553                          sizeof(_case_t));
554     }
555 
556     title[i].key = cases[2];    /* Title */
557     title[i].other1 = cases[0]; /* Upper */
558     title[i].other2 = cases[1]; /* Lower */
559 
560     title_used++;
561 }
562 
563 static void
add_upper(krb5_ui_4 code)564 add_upper(krb5_ui_4 code)
565 {
566     krb5_ui_4 i, j;
567 
568     /*
569      * Always map the code to itself.
570      */
571     cases[0] = code;
572 
573     /*
574      * If the title case character is not present, then make it the same as
575      * the upper case.
576      */
577     if (cases[2] == 0)
578       cases[2] = code;
579 
580     if (upper_used == upper_size) {
581         if (upper_size == 0)
582           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
583         else
584           upper = (_case_t *) realloc((char *) upper,
585                                       sizeof(_case_t) * (upper_size + 8));
586         upper_size += 8;
587     }
588 
589     /*
590      * Locate the insertion point.
591      */
592     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
593 
594     if (i < upper_used) {
595         /*
596          * Shift the array up by one.
597          */
598         for (j = upper_used; j > i; j--)
599           (void) memmove((char *) &upper[j], (char *) &upper[j - 1],
600                          sizeof(_case_t));
601     }
602 
603     upper[i].key = cases[0];    /* Upper */
604     upper[i].other1 = cases[1]; /* Lower */
605     upper[i].other2 = cases[2]; /* Title */
606 
607     upper_used++;
608 }
609 
610 static void
add_lower(krb5_ui_4 code)611 add_lower(krb5_ui_4 code)
612 {
613     krb5_ui_4 i, j;
614 
615     /*
616      * Always map the code to itself.
617      */
618     cases[1] = code;
619 
620     /*
621      * If the title case character is empty, then make it the same as the
622      * upper case.
623      */
624     if (cases[2] == 0)
625       cases[2] = cases[0];
626 
627     if (lower_used == lower_size) {
628         if (lower_size == 0)
629           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
630         else
631           lower = (_case_t *) realloc((char *) lower,
632                                       sizeof(_case_t) * (lower_size + 8));
633         lower_size += 8;
634     }
635 
636     /*
637      * Locate the insertion point.
638      */
639     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
640 
641     if (i < lower_used) {
642         /*
643          * Shift the array up by one.
644          */
645         for (j = lower_used; j > i; j--)
646           (void) memmove((char *) &lower[j], (char *) &lower[j - 1],
647                          sizeof(_case_t));
648     }
649 
650     lower[i].key = cases[1];    /* Lower */
651     lower[i].other1 = cases[0]; /* Upper */
652     lower[i].other2 = cases[2]; /* Title */
653 
654     lower_used++;
655 }
656 
657 static void
ordered_ccl_insert(krb5_ui_4 c,krb5_ui_4 ccl_code)658 ordered_ccl_insert(krb5_ui_4 c, krb5_ui_4 ccl_code)
659 {
660     krb5_ui_4 i, j;
661 
662     if (ccl_used == ccl_size) {
663         if (ccl_size == 0)
664           ccl = (krb5_ui_4 *) malloc(sizeof(krb5_ui_4) * 24);
665         else
666           ccl = (krb5_ui_4 *)
667               realloc((char *) ccl, sizeof(krb5_ui_4) * (ccl_size + 24));
668         ccl_size += 24;
669     }
670 
671     /*
672      * Optimize adding the first item.
673      */
674     if (ccl_used == 0) {
675         ccl[0] = ccl[1] = c;
676         ccl[2] = ccl_code;
677         ccl_used += 3;
678         return;
679     }
680 
681     /*
682      * Handle the special case of extending the range on the end.  This
683      * requires that the combining class codes are the same.
684      */
685     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
686         ccl[ccl_used - 2] = c;
687         return;
688     }
689 
690     /*
691      * Handle the special case of adding another range on the end.
692      */
693     if (c > ccl[ccl_used - 2] + 1 ||
694         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
695         ccl[ccl_used++] = c;
696         ccl[ccl_used++] = c;
697         ccl[ccl_used++] = ccl_code;
698         return;
699     }
700 
701     /*
702      * Locate either the insertion point or range for the code.
703      */
704     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
705 
706     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
707         /*
708          * Extend an existing range.
709          */
710         ccl[i + 1] = c;
711         return;
712     } else if (c < ccl[i]) {
713         /*
714          * Start a new range before the current location.
715          */
716         for (j = ccl_used; j > i; j -= 3) {
717             ccl[j] = ccl[j - 3];
718             ccl[j - 1] = ccl[j - 4];
719             ccl[j - 2] = ccl[j - 5];
720         }
721         ccl[i] = ccl[i + 1] = c;
722         ccl[i + 2] = ccl_code;
723     }
724 }
725 
726 /*
727  * Adds a number if it does not already exist and returns an index value
728  * multiplied by 2.
729  */
730 static krb5_ui_4
make_number(short num,short denom)731 make_number(short num, short denom)
732 {
733     krb5_ui_4 n;
734 
735     /*
736      * Determine if the number already exists.
737      */
738     for (n = 0; n < nums_used; n++) {
739         if (nums[n].numerator == num && nums[n].denominator == denom)
740           return n << 1;
741     }
742 
743     if (nums_used == nums_size) {
744         if (nums_size == 0)
745           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
746         else
747           nums = (_num_t *) realloc((char *) nums,
748                                     sizeof(_num_t) * (nums_size + 8));
749         nums_size += 8;
750     }
751 
752     n = nums_used++;
753     nums[n].numerator = num;
754     nums[n].denominator = denom;
755 
756     return n << 1;
757 }
758 
759 static void
add_number(krb5_ui_4 code,short num,short denom)760 add_number(krb5_ui_4 code, short num, short denom)
761 {
762     krb5_ui_4 i, j;
763 
764     /*
765      * Insert the code in order.
766      */
767     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
768 
769     /*
770      * Handle the case of the codes matching and simply replace the number
771      * that was there before.
772      */
773     if (i < ncodes_used && code == ncodes[i].code) {
774         ncodes[i].idx = make_number(num, denom);
775         return;
776     }
777 
778     /*
779      * Resize the array if necessary.
780      */
781     if (ncodes_used == ncodes_size) {
782         if (ncodes_size == 0)
783           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
784         else
785           ncodes = (_codeidx_t *)
786               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
787 
788         ncodes_size += 8;
789     }
790 
791     /*
792      * Shift things around to insert the code if necessary.
793      */
794     if (i < ncodes_used) {
795         for (j = ncodes_used; j > i; j--) {
796             ncodes[j].code = ncodes[j - 1].code;
797             ncodes[j].idx = ncodes[j - 1].idx;
798         }
799     }
800     ncodes[i].code = code;
801     ncodes[i].idx = make_number(num, denom);
802 
803     ncodes_used++;
804 }
805 
806 /*
807  * This routine assumes that the line is a valid Unicode Character Database
808  * entry.
809  */
810 static void
read_cdata(FILE * in)811 read_cdata(FILE *in)
812 {
813     krb5_ui_4 i, lineno, skip, code, ccl_code;
814     short wnum, neg, number[2], compat;
815     char line[512], *s, *e;
816 
817     lineno = skip = 0;
818     while (fgets(line, sizeof(line), in)) {
819 	if( (s=strchr(line, '\n')) ) *s = '\0';
820         lineno++;
821 
822         /*
823          * Skip blank lines and lines that start with a '#'.
824          */
825         if (line[0] == 0 || line[0] == '#')
826           continue;
827 
828         /*
829          * If lines need to be skipped, do it here.
830          */
831         if (skip) {
832             skip--;
833             continue;
834         }
835 
836         /*
837          * Collect the code.  The code can be up to 6 hex digits in length to
838          * allow surrogates to be specified.
839          */
840         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
841             code <<= 4;
842             if (*s >= '0' && *s <= '9')
843               code += *s - '0';
844             else if (*s >= 'A' && *s <= 'F')
845               code += (*s - 'A') + 10;
846             else if (*s >= 'a' && *s <= 'f')
847               code += (*s - 'a') + 10;
848         }
849 
850         /*
851          * Handle the following special cases:
852          * 1. 4E00-9FA5 CJK Ideographs.
853          * 2. AC00-D7A3 Hangul Syllables.
854          * 3. D800-DFFF Surrogates.
855          * 4. E000-F8FF Private Use Area.
856          * 5. F900-FA2D Han compatibility.
857 	 * ...Plus additional ranges in newer Unicode versions...
858          */
859         switch (code) {
860 	  case 0x3400:
861 	    /* CJK Ideograph Extension A */
862             add_range(0x3400, 0x4db5, "Lo", "L");
863 
864             add_range(0x3400, 0x4db5, "Cp", 0);
865 
866 	    skip = 1;
867 	    break;
868           case 0x4e00:
869             /*
870              * The Han ideographs.
871              */
872             add_range(0x4e00, 0x9fff, "Lo", "L");
873 
874             /*
875              * Add the characters to the defined category.
876              */
877             add_range(0x4e00, 0x9fa5, "Cp", 0);
878 
879             skip = 1;
880             break;
881           case 0xac00:
882             /*
883              * The Hangul syllables.
884              */
885             add_range(0xac00, 0xd7a3, "Lo", "L");
886 
887             /*
888              * Add the characters to the defined category.
889              */
890             add_range(0xac00, 0xd7a3, "Cp", 0);
891 
892             skip = 1;
893             break;
894           case 0xd800:
895             /*
896              * Make a range of all surrogates and assume some default
897              * properties.
898              */
899             add_range(0x010000, 0x10ffff, "Cs", "L");
900             skip = 5;
901             break;
902           case 0xe000:
903             /*
904              * The Private Use area.  Add with a default set of properties.
905              */
906             add_range(0xe000, 0xf8ff, "Co", "L");
907             skip = 1;
908             break;
909           case 0xf900:
910             /*
911              * The CJK compatibility area.
912              */
913             add_range(0xf900, 0xfaff, "Lo", "L");
914 
915             /*
916              * Add the characters to the defined category.
917              */
918             add_range(0xf900, 0xfaff, "Cp", 0);
919 
920             skip = 1;
921 	    break;
922 	  case 0x20000:
923 	    /* CJK Ideograph Extension B */
924             add_range(0x20000, 0x2a6d6, "Lo", "L");
925 
926             add_range(0x20000, 0x2a6d6, "Cp", 0);
927 
928 	    skip = 1;
929 	    break;
930 	  case 0xf0000:
931 	    /* Plane 15 private use */
932 	    add_range(0xf0000, 0xffffd, "Co", "L");
933 	    skip = 1;
934 	    break;
935 
936 	  case 0x100000:
937 	    /* Plane 16 private use */
938 	    add_range(0x100000, 0x10fffd, "Co", "L");
939 	    skip = 1;
940 	    break;
941         }
942 
943         if (skip)
944           continue;
945 
946         /*
947          * Add the code to the defined category.
948          */
949         ordered_range_insert(code, "Cp", 2);
950 
951         /*
952          * Locate the first character property field.
953          */
954         for (i = 0; *s != 0 && i < 2; s++) {
955             if (*s == ';')
956               i++;
957         }
958         for (e = s; *e && *e != ';'; e++) ;
959 
960         ordered_range_insert(code, s, e - s);
961 
962         /*
963          * Locate the combining class code.
964          */
965         for (s = e; *s != 0 && i < 3; s++) {
966             if (*s == ';')
967               i++;
968         }
969 
970         /*
971          * Convert the combining class code from decimal.
972          */
973         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
974           ccl_code = (ccl_code * 10) + (*e - '0');
975 
976         /*
977          * Add the code if it not 0.
978          */
979         if (ccl_code != 0)
980           ordered_ccl_insert(code, ccl_code);
981 
982         /*
983          * Locate the second character property field.
984          */
985         for (s = e; *s != 0 && i < 4; s++) {
986             if (*s == ';')
987               i++;
988         }
989         for (e = s; *e && *e != ';'; e++) ;
990 
991         ordered_range_insert(code, s, e - s);
992 
993         /*
994          * Check for a decomposition.
995          */
996         s = ++e;
997         if (*s != ';') {
998 	    compat = *s == '<';
999 	    if (compat) {
1000 		/*
1001 		 * Skip compatibility formatting tag.
1002 		 */
1003 		while (*s++ != '>');
1004 	    }
1005             /*
1006              * Collect the codes of the decomposition.
1007              */
1008             for (dectmp_size = 0; *s != ';'; ) {
1009                 /*
1010                  * Skip all leading non-hex digits.
1011                  */
1012                 while (!ishdigit(*s))
1013  		  s++;
1014 
1015                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
1016                     dectmp[dectmp_size] <<= 4;
1017                     if (*s >= '0' && *s <= '9')
1018                       dectmp[dectmp_size] += *s - '0';
1019                     else if (*s >= 'A' && *s <= 'F')
1020                       dectmp[dectmp_size] += (*s - 'A') + 10;
1021                     else if (*s >= 'a' && *s <= 'f')
1022                       dectmp[dectmp_size] += (*s - 'a') + 10;
1023                 }
1024                 dectmp_size++;
1025             }
1026 
1027             /*
1028              * If there are any codes in the temporary decomposition array,
1029              * then add the character with its decomposition.
1030              */
1031             if (dectmp_size > 0) {
1032 		if (!compat) {
1033 		    add_decomp(code, 0);
1034 		}
1035 		add_decomp(code, 1);
1036 	    }
1037         }
1038 
1039         /*
1040          * Skip to the number field.
1041          */
1042         for (i = 0; i < 3 && *s; s++) {
1043             if (*s == ';')
1044               i++;
1045         }
1046 
1047         /*
1048          * Scan the number in.
1049          */
1050         number[0] = number[1] = 0;
1051         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
1052             if (*e == '-') {
1053                 neg = 1;
1054                 continue;
1055             }
1056 
1057             if (*e == '/') {
1058                 /*
1059                  * Move the the denominator of the fraction.
1060                  */
1061                 if (neg)
1062                   number[wnum] *= -1;
1063                 neg = 0;
1064                 e++;
1065                 wnum++;
1066             }
1067             number[wnum] = (number[wnum] * 10) + (*e - '0');
1068         }
1069 
1070         if (e > s) {
1071             /*
1072              * Adjust the denominator in case of integers and add the number.
1073              */
1074             if (wnum == 0)
1075               number[1] = 1;
1076 
1077             add_number(code, number[0], number[1]);
1078         }
1079 
1080         /*
1081          * Skip to the start of the possible case mappings.
1082          */
1083         for (s = e, i = 0; i < 4 && *s; s++) {
1084             if (*s == ';')
1085               i++;
1086         }
1087 
1088         /*
1089          * Collect the case mappings.
1090          */
1091         cases[0] = cases[1] = cases[2] = 0;
1092         for (i = 0; i < 3; i++) {
1093             while (ishdigit(*s)) {
1094                 cases[i] <<= 4;
1095                 if (*s >= '0' && *s <= '9')
1096                   cases[i] += *s - '0';
1097                 else if (*s >= 'A' && *s <= 'F')
1098                   cases[i] += (*s - 'A') + 10;
1099                 else if (*s >= 'a' && *s <= 'f')
1100                   cases[i] += (*s - 'a') + 10;
1101                 s++;
1102             }
1103             if (*s == ';')
1104               s++;
1105         }
1106         if (cases[0] && cases[1])
1107           /*
1108            * Add the upper and lower mappings for a title case character.
1109            */
1110           add_title(code);
1111         else if (cases[1])
1112           /*
1113            * Add the lower and title case mappings for the upper case
1114            * character.
1115            */
1116           add_upper(code);
1117         else if (cases[0])
1118           /*
1119            * Add the upper and title case mappings for the lower case
1120            * character.
1121            */
1122           add_lower(code);
1123     }
1124 }
1125 
1126 static _decomp_t *
find_decomp(krb5_ui_4 code,short compat)1127 find_decomp(krb5_ui_4 code, short compat)
1128 {
1129     long l, r, m;
1130     _decomp_t *decs;
1131 
1132     l = 0;
1133     r = (compat ? kdecomps_used : decomps_used) - 1;
1134     decs = compat ? kdecomps : decomps;
1135     while (l <= r) {
1136         m = (l + r) >> 1;
1137         if (code > decs[m].code)
1138           l = m + 1;
1139         else if (code < decs[m].code)
1140           r = m - 1;
1141         else
1142           return &decs[m];
1143     }
1144     return 0;
1145 }
1146 
1147 static void
decomp_it(_decomp_t * d,short compat)1148 decomp_it(_decomp_t *d, short compat)
1149 {
1150     krb5_ui_4 i;
1151     _decomp_t *dp;
1152 
1153     for (i = 0; i < d->used; i++) {
1154         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
1155           decomp_it(dp, compat);
1156         else
1157           dectmp[dectmp_size++] = d->decomp[i];
1158     }
1159 }
1160 
1161 /*
1162  * Expand all decompositions by recursively decomposing each character
1163  * in the decomposition.
1164  */
1165 static void
expand_decomp(void)1166 expand_decomp(void)
1167 {
1168     krb5_ui_4 i;
1169 
1170     for (i = 0; i < decomps_used; i++) {
1171         dectmp_size = 0;
1172         decomp_it(&decomps[i], 0);
1173         if (dectmp_size > 0)
1174           add_decomp(decomps[i].code, 0);
1175     }
1176 
1177     for (i = 0; i < kdecomps_used; i++) {
1178         dectmp_size = 0;
1179         decomp_it(&kdecomps[i], 1);
1180         if (dectmp_size > 0)
1181           add_decomp(kdecomps[i].code, 1);
1182     }
1183 }
1184 
1185 static int
cmpcomps(const void * v_comp1,const void * v_comp2)1186 cmpcomps(const void *v_comp1, const void *v_comp2)
1187 {
1188 	const _comp_t *comp1 = v_comp1, *comp2 = v_comp2;
1189     long diff = comp1->code1 - comp2->code1;
1190 
1191     if (!diff)
1192 	diff = comp1->code2 - comp2->code2;
1193     return (int) diff;
1194 }
1195 
1196 /*
1197  * Load composition exclusion data
1198  */
1199 static void
read_compexdata(FILE * in)1200 read_compexdata(FILE *in)
1201 {
1202     krb5_ui_2 i;
1203     krb5_ui_4 code;
1204     char line[512], *s;
1205 
1206     (void) memset((char *) compexs, 0, sizeof(compexs));
1207 
1208     while (fgets(line, sizeof(line), in)) {
1209 	if( (s=strchr(line, '\n')) ) *s = '\0';
1210         /*
1211          * Skip blank lines and lines that start with a '#'.
1212          */
1213         if (line[0] == 0 || line[0] == '#')
1214 	    continue;
1215 
1216 	/*
1217          * Collect the code.  Assume max 6 digits
1218          */
1219 
1220 	for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) {
1221 	    if (isspace((unsigned char)*s)) break;
1222             code <<= 4;
1223             if (*s >= '0' && *s <= '9')
1224 		code += *s - '0';
1225             else if (*s >= 'A' && *s <= 'F')
1226 		code += (*s - 'A') + 10;
1227             else if (*s >= 'a' && *s <= 'f')
1228 		code += (*s - 'a') + 10;
1229         }
1230         COMPEX_SET(code);
1231     }
1232 }
1233 
1234 /*
1235  * Creates array of compositions from decomposition array
1236  */
1237 static void
create_comps(void)1238 create_comps(void)
1239 {
1240     krb5_ui_4 i, cu;
1241 
1242     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
1243 
1244     for (i = cu = 0; i < decomps_used; i++) {
1245 	if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
1246 	    continue;
1247 	comps[cu].comp = decomps[i].code;
1248 	comps[cu].count = 2;
1249 	comps[cu].code1 = decomps[i].decomp[0];
1250 	comps[cu].code2 = decomps[i].decomp[1];
1251 	cu++;
1252     }
1253     comps_used = cu;
1254     qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);
1255 }
1256 
1257 #if HARDCODE_DATA
1258 static void
write_case(FILE * out,_case_t * tab,int num,int first)1259 write_case(FILE *out, _case_t *tab, int num, int first)
1260 {
1261     int i;
1262 
1263     for (i=0; i<num; i++) {
1264 	if (first) first = 0;
1265 	else fprintf(out, ",");
1266 	fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx",
1267 		(unsigned long) tab[i].key, (unsigned long) tab[i].other1,
1268 		(unsigned long) tab[i].other2);
1269     }
1270 }
1271 
1272 #define PREF "static const "
1273 
1274 #endif
1275 
1276 static void
write_cdata(char * opath)1277 write_cdata(char *opath)
1278 {
1279     FILE *out;
1280 	krb5_ui_4 bytes;
1281     krb5_ui_4 i, idx, nprops;
1282 #if !(HARDCODE_DATA)
1283     krb5_ui_2 casecnt[2];
1284 #endif
1285     char path[BUFSIZ];
1286 #if HARDCODE_DATA
1287     int j, k;
1288 
1289     /*****************************************************************
1290      *
1291      * Generate the ctype data.
1292      *
1293      *****************************************************************/
1294 
1295     /*
1296      * Open the output file.
1297      */
1298     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath);
1299     if ((out = fopen(path, "w")) == 0)
1300       return;
1301 #else
1302     /*
1303      * Open the ctype.dat file.
1304      */
1305     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
1306     if ((out = fopen(path, "wb")) == 0)
1307       return;
1308 #endif
1309 
1310     /*
1311      * Collect the offsets for the properties.  The offsets array is
1312      * on a 4-byte boundary to keep things efficient for architectures
1313      * that need such a thing.
1314      */
1315     for (i = idx = 0; i < NUMPROPS; i++) {
1316         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1317         idx += proptbl[i].used;
1318     }
1319 
1320     /*
1321      * Add the sentinel index which is used by the binary search as the upper
1322      * bound for a search.
1323      */
1324     propcnt[i] = idx;
1325 
1326     /*
1327      * Record the actual number of property lists.  This may be different than
1328      * the number of offsets actually written because of aligning on a 4-byte
1329      * boundary.
1330      */
1331     hdr[1] = NUMPROPS;
1332 
1333     /*
1334      * Calculate the byte count needed and pad the property counts array to a
1335      * 4-byte boundary.
1336      */
1337     if ((bytes = sizeof(krb5_ui_2) * (NUMPROPS + 1)) & 3)
1338       bytes += 4 - (bytes & 3);
1339     nprops = bytes / sizeof(krb5_ui_2);
1340     bytes += sizeof(krb5_ui_4) * idx;
1341 
1342 #if HARDCODE_DATA
1343     fprintf(out, PREF "krb5_ui_4 _ucprop_size = %d;\n\n", NUMPROPS);
1344 
1345     fprintf(out, PREF "krb5_ui_2 _ucprop_offsets[] = {");
1346 
1347     for (i = 0; i<nprops; i++) {
1348        if (i) fprintf(out, ",");
1349        if (!(i&7)) fprintf(out, "\n\t");
1350        else fprintf(out, " ");
1351        fprintf(out, "0x%04x", propcnt[i]);
1352     }
1353     fprintf(out, "\n};\n\n");
1354 
1355     fprintf(out, PREF "krb5_ui_4 _ucprop_ranges[] = {");
1356 
1357     k = 0;
1358     for (i = 0; i < NUMPROPS; i++) {
1359 	if (proptbl[i].used > 0) {
1360 	  for (j=0; j<proptbl[i].used; j++) {
1361 	    if (k) fprintf(out, ",");
1362 	    if (!(k&3)) fprintf(out,"\n\t");
1363 	    else fprintf(out, " ");
1364 	    k++;
1365 	    fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]);
1366 	  }
1367 	}
1368     }
1369     fprintf(out, "\n};\n\n");
1370 #else
1371     /*
1372      * Write the header.
1373      */
1374     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1375 
1376     /*
1377      * Write the byte count.
1378      */
1379     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1380 
1381     /*
1382      * Write the property list counts.
1383      */
1384     fwrite((char *) propcnt, sizeof(krb5_ui_2), nprops, out);
1385 
1386     /*
1387      * Write the property lists.
1388      */
1389     for (i = 0; i < NUMPROPS; i++) {
1390         if (proptbl[i].used > 0)
1391           fwrite((char *) proptbl[i].ranges, sizeof(krb5_ui_4),
1392                  proptbl[i].used, out);
1393     }
1394 
1395     fclose(out);
1396 #endif
1397 
1398     /*****************************************************************
1399      *
1400      * Generate the case mapping data.
1401      *
1402      *****************************************************************/
1403 
1404 #if HARDCODE_DATA
1405     fprintf(out, PREF "krb5_ui_4 _uccase_size = %ld;\n\n",
1406         (long) (upper_used + lower_used + title_used));
1407 
1408     fprintf(out, PREF "krb5_ui_2 _uccase_len[2] = {%ld, %ld};\n\n",
1409         (long) upper_used, (long) lower_used);
1410     fprintf(out, PREF "krb5_ui_4 _uccase_map[] = {");
1411 
1412     if (upper_used > 0)
1413       /*
1414        * Write the upper case table.
1415        */
1416       write_case(out, upper, upper_used, 1);
1417 
1418     if (lower_used > 0)
1419       /*
1420        * Write the lower case table.
1421        */
1422       write_case(out, lower, lower_used, !upper_used);
1423 
1424     if (title_used > 0)
1425       /*
1426        * Write the title case table.
1427        */
1428       write_case(out, title, title_used, !(upper_used||lower_used));
1429 
1430     if (!(upper_used || lower_used || title_used))
1431 	fprintf(out, "\t0");
1432 
1433     fprintf(out, "\n};\n\n");
1434 #else
1435     /*
1436      * Open the case.dat file.
1437      */
1438     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
1439     if ((out = fopen(path, "wb")) == 0)
1440       return;
1441 
1442     /*
1443      * Write the case mapping tables.
1444      */
1445     hdr[1] = upper_used + lower_used + title_used;
1446     casecnt[0] = upper_used;
1447     casecnt[1] = lower_used;
1448 
1449     /*
1450      * Write the header.
1451      */
1452     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1453 
1454     /*
1455      * Write the upper and lower case table sizes.
1456      */
1457     fwrite((char *) casecnt, sizeof(krb5_ui_2), 2, out);
1458 
1459     if (upper_used > 0)
1460       /*
1461        * Write the upper case table.
1462        */
1463       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1464 
1465     if (lower_used > 0)
1466       /*
1467        * Write the lower case table.
1468        */
1469       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1470 
1471     if (title_used > 0)
1472       /*
1473        * Write the title case table.
1474        */
1475       fwrite((char *) title, sizeof(_case_t), title_used, out);
1476 
1477     fclose(out);
1478 #endif
1479 
1480     /*****************************************************************
1481      *
1482      * Generate the composition data.
1483      *
1484      *****************************************************************/
1485 
1486     /*
1487      * Create compositions from decomposition data
1488      */
1489     create_comps();
1490 
1491 #if HARDCODE_DATA
1492     fprintf(out, PREF "krb5_ui_4 _uccomp_size = %ld;\n\n",
1493         comps_used * 4L);
1494 
1495     fprintf(out, PREF "krb5_ui_4 _uccomp_data[] = {");
1496 
1497      /*
1498       * Now, if comps exist, write them out.
1499       */
1500     if (comps_used > 0) {
1501 	for (i=0; i<comps_used; i++) {
1502 	    if (i) fprintf(out, ",");
1503 	    fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx",
1504 	        (unsigned long) comps[i].comp, (unsigned long) comps[i].count,
1505 	        (unsigned long) comps[i].code1, (unsigned long) comps[i].code2);
1506 	}
1507     } else {
1508 	fprintf(out, "\t0");
1509     }
1510     fprintf(out, "\n};\n\n");
1511 #else
1512     /*
1513      * Open the comp.dat file.
1514      */
1515     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
1516     if ((out = fopen(path, "wb")) == 0)
1517 	return;
1518 
1519     /*
1520      * Write the header.
1521      */
1522     hdr[1] = (krb5_ui_2) comps_used * 4;
1523     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1524 
1525     /*
1526      * Write out the byte count to maintain header size.
1527      */
1528     bytes = comps_used * sizeof(_comp_t);
1529     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1530 
1531     /*
1532      * Now, if comps exist, write them out.
1533      */
1534     if (comps_used > 0)
1535         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
1536 
1537     fclose(out);
1538 #endif
1539 
1540     /*****************************************************************
1541      *
1542      * Generate the decomposition data.
1543      *
1544      *****************************************************************/
1545 
1546     /*
1547      * Fully expand all decompositions before generating the output file.
1548      */
1549     expand_decomp();
1550 
1551 #if HARDCODE_DATA
1552     fprintf(out, PREF "krb5_ui_4 _ucdcmp_size = %ld;\n\n",
1553         decomps_used * 2L);
1554 
1555     fprintf(out, PREF "krb5_ui_4 _ucdcmp_nodes[] = {");
1556 
1557     if (decomps_used) {
1558 	/*
1559 	 * Write the list of decomp nodes.
1560 	 */
1561 	for (i = idx = 0; i < decomps_used; i++) {
1562 	    fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1563 	        (unsigned long) decomps[i].code, (unsigned long) idx);
1564 	    idx += decomps[i].used;
1565 	}
1566 
1567 	/*
1568 	 * Write the sentinel index as the last decomp node.
1569 	 */
1570 	fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1571 
1572 	fprintf(out, PREF "krb5_ui_4 _ucdcmp_decomp[] = {");
1573 	/*
1574 	 * Write the decompositions themselves.
1575 	 */
1576 	k = 0;
1577 	for (i = 0; i < decomps_used; i++)
1578 	  for (j=0; j<decomps[i].used; j++) {
1579 	    if (k) fprintf(out, ",");
1580 	    if (!(k&3)) fprintf(out,"\n\t");
1581 	    else fprintf(out, " ");
1582 	    k++;
1583 	    fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]);
1584 	  }
1585 	fprintf(out, "\n};\n\n");
1586     }
1587 #else
1588     /*
1589      * Open the decomp.dat file.
1590      */
1591     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
1592     if ((out = fopen(path, "wb")) == 0)
1593       return;
1594 
1595     hdr[1] = decomps_used;
1596 
1597     /*
1598      * Write the header.
1599      */
1600     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1601 
1602     /*
1603      * Write a temporary byte count which will be calculated as the
1604      * decompositions are written out.
1605      */
1606     bytes = 0;
1607     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1608 
1609     if (decomps_used) {
1610         /*
1611          * Write the list of decomp nodes.
1612          */
1613         for (i = idx = 0; i < decomps_used; i++) {
1614             fwrite((char *) &decomps[i].code, sizeof(krb5_ui_4), 1, out);
1615             fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1616             idx += decomps[i].used;
1617         }
1618 
1619         /*
1620          * Write the sentinel index as the last decomp node.
1621          */
1622         fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1623 
1624         /*
1625          * Write the decompositions themselves.
1626          */
1627         for (i = 0; i < decomps_used; i++)
1628           fwrite((char *) decomps[i].decomp, sizeof(krb5_ui_4),
1629                  decomps[i].used, out);
1630 
1631         /*
1632          * Seek back to the beginning and write the byte count.
1633          */
1634         bytes = (sizeof(krb5_ui_4) * idx) +
1635             (sizeof(krb5_ui_4) * ((hdr[1] << 1) + 1));
1636         fseek(out, sizeof(krb5_ui_2) << 1, 0L);
1637         fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1638 
1639         fclose(out);
1640     }
1641 #endif
1642 
1643 #ifdef HARDCODE_DATA
1644     fprintf(out, PREF "krb5_ui_4 _uckdcmp_size = %ld;\n\n",
1645         kdecomps_used * 2L);
1646 
1647     fprintf(out, PREF "krb5_ui_4 _uckdcmp_nodes[] = {");
1648 
1649     if (kdecomps_used) {
1650 	/*
1651 	 * Write the list of kdecomp nodes.
1652 	 */
1653 	for (i = idx = 0; i < kdecomps_used; i++) {
1654 	    fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1655 	        (unsigned long) kdecomps[i].code, (unsigned long) idx);
1656 	    idx += kdecomps[i].used;
1657 	}
1658 
1659 	/*
1660 	 * Write the sentinel index as the last decomp node.
1661 	 */
1662 	fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1663 
1664 	fprintf(out, PREF "krb5_ui_4 _uckdcmp_decomp[] = {");
1665 
1666 	/*
1667 	 * Write the decompositions themselves.
1668 	 */
1669 	k = 0;
1670 	for (i = 0; i < kdecomps_used; i++)
1671 	  for (j=0; j<kdecomps[i].used; j++) {
1672 	    if (k) fprintf(out, ",");
1673 	    if (!(k&3)) fprintf(out,"\n\t");
1674 	    else fprintf(out, " ");
1675 	    k++;
1676 	    fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]);
1677 	  }
1678 	fprintf(out, "\n};\n\n");
1679     }
1680 #else
1681     /*
1682      * Open the kdecomp.dat file.
1683      */
1684     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
1685     if ((out = fopen(path, "wb")) == 0)
1686       return;
1687 
1688     hdr[1] = kdecomps_used;
1689 
1690     /*
1691      * Write the header.
1692      */
1693     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1694 
1695     /*
1696      * Write a temporary byte count which will be calculated as the
1697      * decompositions are written out.
1698      */
1699     bytes = 0;
1700     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1701 
1702     if (kdecomps_used) {
1703         /*
1704          * Write the list of kdecomp nodes.
1705          */
1706         for (i = idx = 0; i < kdecomps_used; i++) {
1707             fwrite((char *) &kdecomps[i].code, sizeof(krb5_ui_4), 1, out);
1708             fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1709             idx += kdecomps[i].used;
1710         }
1711 
1712         /*
1713          * Write the sentinel index as the last decomp node.
1714          */
1715         fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1716 
1717         /*
1718          * Write the decompositions themselves.
1719          */
1720         for (i = 0; i < kdecomps_used; i++)
1721           fwrite((char *) kdecomps[i].decomp, sizeof(krb5_ui_4),
1722                  kdecomps[i].used, out);
1723 
1724         /*
1725          * Seek back to the beginning and write the byte count.
1726          */
1727         bytes = (sizeof(krb5_ui_4) * idx) +
1728             (sizeof(krb5_ui_4) * ((hdr[1] << 1) + 1));
1729         fseek(out, sizeof(krb5_ui_2) << 1, 0L);
1730         fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1731 
1732         fclose(out);
1733     }
1734 #endif
1735 
1736     /*****************************************************************
1737      *
1738      * Generate the combining class data.
1739      *
1740      *****************************************************************/
1741 #ifdef HARDCODE_DATA
1742     fprintf(out, PREF "krb5_ui_4 _uccmcl_size = %ld;\n\n", (long) ccl_used);
1743 
1744     fprintf(out, PREF "krb5_ui_4 _uccmcl_nodes[] = {");
1745 
1746     if (ccl_used > 0) {
1747 	/*
1748 	 * Write the combining class ranges out.
1749 	 */
1750 	for (i = 0; i<ccl_used; i++) {
1751 	    if (i) fprintf(out, ",");
1752 	    if (!(i&3)) fprintf(out, "\n\t");
1753 	    else fprintf(out, " ");
1754 	    fprintf(out, "0x%08lx", (unsigned long) ccl[i]);
1755 	}
1756     } else {
1757 	fprintf(out, "\t0");
1758     }
1759     fprintf(out, "\n};\n\n");
1760 #else
1761     /*
1762      * Open the cmbcl.dat file.
1763      */
1764     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
1765     if ((out = fopen(path, "wb")) == 0)
1766       return;
1767 
1768     /*
1769      * Set the number of ranges used.  Each range has a combining class which
1770      * means each entry is a 3-tuple.
1771      */
1772     hdr[1] = ccl_used / 3;
1773 
1774     /*
1775      * Write the header.
1776      */
1777     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1778 
1779     /*
1780      * Write out the byte count to maintain header size.
1781      */
1782     bytes = ccl_used * sizeof(krb5_ui_4);
1783     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1784 
1785     if (ccl_used > 0)
1786       /*
1787        * Write the combining class ranges out.
1788        */
1789       fwrite((char *) ccl, sizeof(krb5_ui_4), ccl_used, out);
1790 
1791     fclose(out);
1792 #endif
1793 
1794     /*****************************************************************
1795      *
1796      * Generate the number data.
1797      *
1798      *****************************************************************/
1799 
1800 #if HARDCODE_DATA
1801     fprintf(out, PREF "krb5_ui_4 _ucnum_size = %lu;\n\n",
1802         (unsigned long)ncodes_used<<1);
1803 
1804     fprintf(out, PREF "krb5_ui_4 _ucnum_nodes[] = {");
1805 
1806     /*
1807      * Now, if number mappings exist, write them out.
1808      */
1809     if (ncodes_used > 0) {
1810 	for (i = 0; i<ncodes_used; i++) {
1811 	    if (i) fprintf(out, ",");
1812 	    if (!(i&1)) fprintf(out, "\n\t");
1813 	    else fprintf(out, " ");
1814 	    fprintf(out, "0x%08lx, 0x%08lx",
1815 	        (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx);
1816 	}
1817 	fprintf(out, "\n};\n\n");
1818 
1819 	fprintf(out, PREF "short _ucnum_vals[] = {");
1820 	for (i = 0; i<nums_used; i++) {
1821 	    if (i) fprintf(out, ",");
1822 	    if (!(i&3)) fprintf(out, "\n\t");
1823 	    else fprintf(out, " ");
1824 	    if (nums[i].numerator < 0) {
1825 		fprintf(out, "%6d, 0x%04x",
1826 		  nums[i].numerator, nums[i].denominator);
1827 	    } else {
1828 		fprintf(out, "0x%04x, 0x%04x",
1829 		  nums[i].numerator, nums[i].denominator);
1830 	    }
1831 	}
1832 	fprintf(out, "\n};\n\n");
1833     }
1834 #else
1835     /*
1836      * Open the num.dat file.
1837      */
1838     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
1839     if ((out = fopen(path, "wb")) == 0)
1840       return;
1841 
1842     /*
1843      * The count part of the header will be the total number of codes that
1844      * have numbers.
1845      */
1846     hdr[1] = (krb5_ui_2) (ncodes_used << 1);
1847     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1848 
1849     /*
1850      * Write the header.
1851      */
1852     fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1853 
1854     /*
1855      * Write out the byte count to maintain header size.
1856      */
1857     fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1858 
1859     /*
1860      * Now, if number mappings exist, write them out.
1861      */
1862     if (ncodes_used > 0) {
1863         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1864         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1865     }
1866 #endif
1867 
1868     fclose(out);
1869 }
1870 
1871 static void
usage(char * prog)1872 usage(char *prog)
1873 {
1874     fprintf(stderr,
1875             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
1876     fprintf(stderr, " datafile1 datafile2 ...\n\n");
1877     fprintf(stderr,
1878             "-o output-directory\n\t\tWrite the output files to a different");
1879     fprintf(stderr, " directory (default: .).\n");
1880     fprintf(stderr,
1881             "-x composition-exclusion\n\t\tFile of composition codes");
1882     fprintf(stderr, " that should be excluded.\n");
1883     exit(1);
1884 }
1885 
1886 int
main(int argc,char * argv[])1887 main(int argc, char *argv[])
1888 {
1889     FILE *in;
1890     char *prog, *opath;
1891 
1892     prog = lutil_progname( "ucgendat", argc, argv );
1893 
1894     opath = 0;
1895     in = stdin;
1896 
1897     argc--;
1898     argv++;
1899 
1900     while (argc > 0) {
1901         if (argv[0][0] == '-') {
1902             switch (argv[0][1]) {
1903               case 'o':
1904                 argc--;
1905                 argv++;
1906                 opath = argv[0];
1907                 break;
1908               case 'x':
1909                 argc--;
1910                 argv++;
1911                 if ((in = fopen(argv[0], "r")) == 0)
1912                   fprintf(stderr,
1913                           "%s: unable to open composition exclusion file %s\n",
1914                           prog, argv[0]);
1915                 else {
1916                     read_compexdata(in);
1917                     fclose(in);
1918                     in = 0;
1919                 }
1920                 break;
1921               default:
1922                 usage(prog);
1923             }
1924         } else {
1925             if (in != stdin && in != NULL)
1926               fclose(in);
1927             if ((in = fopen(argv[0], "r")) == 0)
1928               fprintf(stderr, "%s: unable to open ctype file %s\n",
1929                       prog, argv[0]);
1930             else {
1931                 read_cdata(in);
1932                 fclose(in);
1933                 in = 0;
1934 	    }
1935         }
1936         argc--;
1937         argv++;
1938     }
1939 
1940     if (opath == 0)
1941       opath = ".";
1942     write_cdata(opath);
1943 
1944     return 0;
1945 }
1946