1 /*
2 * Copyright 1998-2008 The OpenLDAP Foundation.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted only as authorized by the OpenLDAP
7 * Public License.
8 *
9 * A copy of this license is available in file LICENSE in the
10 * top-level directory of the distribution or, alternatively, at
11 * <https://www.OpenLDAP.org/license.html>.
12 */
13 /* Copyright 2001 Computing Research Labs, New Mexico State University
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a
16 * copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
29 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
30 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
31 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 */
33
34 /*
35 * This work is part of OpenLDAP Software <https://www.openldap.org/>.
36 * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucgendat.c,v 1.43 2008/01/07 23:20:05 kurt Exp $
37 * $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $"
38 */
39
40 #include "k5-int.h"
41 #include "k5-utf8.h"
42 #include "k5-unicode.h"
43
44 #ifndef HARDCODE_DATA
45 #define HARDCODE_DATA 1
46 #endif
47
48 #undef ishdigit
49 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
50 ((cc) >= 'A' && (cc) <= 'F') ||\
51 ((cc) >= 'a' && (cc) <= 'f'))
52
53 /*
54 * A header written to the output file with the byte-order-mark and the number
55 * of property nodes.
56 */
57 static krb5_ui_2 hdr[2] = {0xfeff, 0};
58
59 #define NUMPROPS 50
60 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
61
62 typedef struct {
63 char *name;
64 int len;
65 } _prop_t;
66
67 /*
68 * List of properties expected to be found in the Unicode Character Database
69 * including some implementation specific properties.
70 *
71 * The implementation specific properties are:
72 * Cm = Composed (can be decomposed)
73 * Nb = Non-breaking
74 * Sy = Symmetric (has left and right forms)
75 * Hd = Hex digit
76 * Qm = Quote marks
77 * Mr = Mirroring
78 * Ss = Space, other
79 * Cp = Defined character
80 */
81 static _prop_t props[NUMPROPS] = {
82 {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
83 {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
84 {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
85 {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
86 {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1},
87 {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1},
88 {"S", 1}, {"WS", 2}, {"ON", 2},
89 {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
90 {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
91 };
92
93 typedef struct {
94 krb5_ui_4 *ranges;
95 krb5_ui_2 used;
96 krb5_ui_2 size;
97 } _ranges_t;
98
99 static _ranges_t proptbl[NUMPROPS];
100
101 /*
102 * Make sure this array is sized to be on a 4-byte boundary at compile time.
103 */
104 static krb5_ui_2 propcnt[NEEDPROPS];
105
106 /*
107 * Array used to collect a decomposition before adding it to the decomposition
108 * table.
109 */
110 static krb5_ui_4 dectmp[64];
111 static krb5_ui_4 dectmp_size;
112
113 typedef struct {
114 krb5_ui_4 code;
115 krb5_ui_2 size;
116 krb5_ui_2 used;
117 krb5_ui_4 *decomp;
118 } _decomp_t;
119
120 /*
121 * List of decomposition. Created and expanded in order as the characters are
122 * encountered. First list contains canonical mappings, second also includes
123 * compatibility mappings.
124 */
125 static _decomp_t *decomps;
126 static krb5_ui_4 decomps_used;
127 static krb5_ui_4 decomps_size;
128
129 static _decomp_t *kdecomps;
130 static krb5_ui_4 kdecomps_used;
131 static krb5_ui_4 kdecomps_size;
132
133 /*
134 * Composition exclusion table stuff.
135 */
136 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
137 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
138 static krb5_ui_4 compexs[8192];
139
140 /*
141 * Struct for holding a composition pair, and array of composition pairs
142 */
143 typedef struct {
144 krb5_ui_4 comp;
145 krb5_ui_4 count;
146 krb5_ui_4 code1;
147 krb5_ui_4 code2;
148 } _comp_t;
149
150 static _comp_t *comps;
151 static krb5_ui_4 comps_used;
152
153 /*
154 * Types and lists for handling lists of case mappings.
155 */
156 typedef struct {
157 krb5_ui_4 key;
158 krb5_ui_4 other1;
159 krb5_ui_4 other2;
160 } _case_t;
161
162 static _case_t *upper;
163 static _case_t *lower;
164 static _case_t *title;
165 static krb5_ui_4 upper_used;
166 static krb5_ui_4 upper_size;
167 static krb5_ui_4 lower_used;
168 static krb5_ui_4 lower_size;
169 static krb5_ui_4 title_used;
170 static krb5_ui_4 title_size;
171
172 /*
173 * Array used to collect case mappings before adding them to a list.
174 */
175 static krb5_ui_4 cases[3];
176
177 /*
178 * An array to hold ranges for combining classes.
179 */
180 static krb5_ui_4 *ccl;
181 static krb5_ui_4 ccl_used;
182 static krb5_ui_4 ccl_size;
183
184 /*
185 * Structures for handling numbers.
186 */
187 typedef struct {
188 krb5_ui_4 code;
189 krb5_ui_4 idx;
190 } _codeidx_t;
191
192 typedef struct {
193 short numerator;
194 short denominator;
195 } _num_t;
196
197 /*
198 * Arrays to hold the mapping of codes to numbers.
199 */
200 static _codeidx_t *ncodes;
201 static krb5_ui_4 ncodes_used;
202 static krb5_ui_4 ncodes_size;
203
204 static _num_t *nums;
205 static krb5_ui_4 nums_used;
206 static krb5_ui_4 nums_size;
207
208 /*
209 * Array for holding numbers.
210 */
211 static _num_t *nums;
212 static krb5_ui_4 nums_used;
213 static krb5_ui_4 nums_size;
214
215 static void
add_range(krb5_ui_4 start,krb5_ui_4 end,char * p1,char * p2)216 add_range(krb5_ui_4 start, krb5_ui_4 end, char *p1, char *p2)
217 {
218 int i, j, k, len;
219 _ranges_t *rlp;
220 char *name;
221
222 for (k = 0; k < 2; k++) {
223 if (k == 0) {
224 name = p1;
225 len = 2;
226 } else {
227 if (p2 == 0)
228 break;
229
230 name = p2;
231 len = 1;
232 }
233
234 for (i = 0; i < NUMPROPS; i++) {
235 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
236 break;
237 }
238
239 if (i == NUMPROPS)
240 continue;
241
242 rlp = &proptbl[i];
243
244 /*
245 * Resize the range list if necessary.
246 */
247 if (rlp->used == rlp->size) {
248 if (rlp->size == 0)
249 rlp->ranges = (krb5_ui_4 *)
250 malloc(sizeof(krb5_ui_4) << 3);
251 else
252 rlp->ranges = (krb5_ui_4 *)
253 realloc((char *) rlp->ranges,
254 sizeof(krb5_ui_4) * (rlp->size + 8));
255 rlp->size += 8;
256 }
257
258 /*
259 * If this is the first code for this property list, just add it
260 * and return.
261 */
262 if (rlp->used == 0) {
263 rlp->ranges[0] = start;
264 rlp->ranges[1] = end;
265 rlp->used += 2;
266 continue;
267 }
268
269 /*
270 * Optimize the case of adding the range to the end.
271 */
272 j = rlp->used - 1;
273 if (start > rlp->ranges[j]) {
274 j = rlp->used;
275 rlp->ranges[j++] = start;
276 rlp->ranges[j++] = end;
277 rlp->used = j;
278 continue;
279 }
280
281 /*
282 * Need to locate the insertion point.
283 */
284 for (i = 0;
285 i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
286
287 /*
288 * If the start value lies in the current range, then simply set the
289 * new end point of the range to the end value passed as a parameter.
290 */
291 if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
292 rlp->ranges[i + 1] = end;
293 return;
294 }
295
296 /*
297 * Shift following values up by two.
298 */
299 for (j = rlp->used; j > i; j -= 2) {
300 rlp->ranges[j] = rlp->ranges[j - 2];
301 rlp->ranges[j + 1] = rlp->ranges[j - 1];
302 }
303
304 /*
305 * Add the new range at the insertion point.
306 */
307 rlp->ranges[i] = start;
308 rlp->ranges[i + 1] = end;
309 rlp->used += 2;
310 }
311 }
312
313 static void
ordered_range_insert(krb5_ui_4 c,char * name,int len)314 ordered_range_insert(krb5_ui_4 c, char *name, int len)
315 {
316 int i, j;
317 krb5_ui_4 s, e;
318 _ranges_t *rlp;
319
320 if (len == 0)
321 return;
322
323 /*
324 * Deal with directionality codes introduced in Unicode 3.0.
325 */
326 if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
327 (len == 3 &&
328 (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
329 memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
330 memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
331 /*
332 * Mark all of these as Other Neutral to preserve compatibility with
333 * older versions.
334 */
335 len = 2;
336 name = "ON";
337 }
338
339 for (i = 0; i < NUMPROPS; i++) {
340 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
341 break;
342 }
343
344 if (i == NUMPROPS)
345 return;
346
347 /*
348 * Have a match, so insert the code in order.
349 */
350 rlp = &proptbl[i];
351
352 /*
353 * Resize the range list if necessary.
354 */
355 if (rlp->used == rlp->size) {
356 if (rlp->size == 0)
357 rlp->ranges = (krb5_ui_4 *)
358 malloc(sizeof(krb5_ui_4) << 3);
359 else
360 rlp->ranges = (krb5_ui_4 *)
361 realloc((char *) rlp->ranges,
362 sizeof(krb5_ui_4) * (rlp->size + 8));
363 rlp->size += 8;
364 }
365
366 /*
367 * If this is the first code for this property list, just add it
368 * and return.
369 */
370 if (rlp->used == 0) {
371 rlp->ranges[0] = rlp->ranges[1] = c;
372 rlp->used += 2;
373 return;
374 }
375
376 /*
377 * Optimize the cases of extending the last range and adding new ranges to
378 * the end.
379 */
380 j = rlp->used - 1;
381 e = rlp->ranges[j];
382 s = rlp->ranges[j - 1];
383
384 if (c == e + 1) {
385 /*
386 * Extend the last range.
387 */
388 rlp->ranges[j] = c;
389 return;
390 }
391
392 if (c > e + 1) {
393 /*
394 * Start another range on the end.
395 */
396 j = rlp->used;
397 rlp->ranges[j] = rlp->ranges[j + 1] = c;
398 rlp->used += 2;
399 return;
400 }
401
402 if (c >= s)
403 /*
404 * The code is a duplicate of a code in the last range, so just return.
405 */
406 return;
407
408 /*
409 * The code should be inserted somewhere before the last range in the
410 * list. Locate the insertion point.
411 */
412 for (i = 0;
413 i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
414
415 s = rlp->ranges[i];
416 e = rlp->ranges[i + 1];
417
418 if (c == e + 1)
419 /*
420 * Simply extend the current range.
421 */
422 rlp->ranges[i + 1] = c;
423 else if (c < s) {
424 /*
425 * Add a new entry before the current location. Shift all entries
426 * before the current one up by one to make room.
427 */
428 for (j = rlp->used; j > i; j -= 2) {
429 rlp->ranges[j] = rlp->ranges[j - 2];
430 rlp->ranges[j + 1] = rlp->ranges[j - 1];
431 }
432 rlp->ranges[i] = rlp->ranges[i + 1] = c;
433
434 rlp->used += 2;
435 }
436 }
437
438 static void
add_decomp(krb5_ui_4 code,short compat)439 add_decomp(krb5_ui_4 code, short compat)
440 {
441 krb5_ui_4 i, j, size;
442 _decomp_t **pdecomps;
443 krb5_ui_4 *pdecomps_used;
444 krb5_ui_4 *pdecomps_size;
445
446 if (compat) {
447 pdecomps = &kdecomps;
448 pdecomps_used = &kdecomps_used;
449 pdecomps_size = &kdecomps_size;
450 } else {
451 pdecomps = &decomps;
452 pdecomps_used = &decomps_used;
453 pdecomps_size = &decomps_size;
454 }
455
456 /*
457 * Add the code to the composite property.
458 */
459 if (!compat) {
460 ordered_range_insert(code, "Cm", 2);
461 }
462
463 /*
464 * Locate the insertion point for the code.
465 */
466 for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
467
468 /*
469 * Allocate space for a new decomposition.
470 */
471 if (*pdecomps_used == *pdecomps_size) {
472 if (*pdecomps_size == 0)
473 *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
474 else
475 *pdecomps = (_decomp_t *)
476 realloc((char *) *pdecomps,
477 sizeof(_decomp_t) * (*pdecomps_size + 8));
478 (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
479 sizeof(_decomp_t) << 3);
480 *pdecomps_size += 8;
481 }
482
483 if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
484 /*
485 * Shift the decomps up by one if the codes don't match.
486 */
487 for (j = *pdecomps_used; j > i; j--)
488 (void) memmove((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
489 sizeof(_decomp_t));
490 }
491
492 /*
493 * Insert or replace a decomposition.
494 */
495 size = dectmp_size + (4 - (dectmp_size & 3));
496 if ((*pdecomps)[i].size < size) {
497 if ((*pdecomps)[i].size == 0)
498 (*pdecomps)[i].decomp = (krb5_ui_4 *)
499 malloc(sizeof(krb5_ui_4) * size);
500 else
501 (*pdecomps)[i].decomp = (krb5_ui_4 *)
502 realloc((char *) (*pdecomps)[i].decomp,
503 sizeof(krb5_ui_4) * size);
504 (*pdecomps)[i].size = size;
505 }
506
507 if ((*pdecomps)[i].code != code)
508 (*pdecomps_used)++;
509
510 (*pdecomps)[i].code = code;
511 (*pdecomps)[i].used = dectmp_size;
512 (void) memmove((char *) (*pdecomps)[i].decomp, (char *) dectmp,
513 sizeof(krb5_ui_4) * dectmp_size);
514
515 /*
516 * NOTICE: This needs changing later so it is more general than simply
517 * pairs. This calculation is done here to simplify allocation elsewhere.
518 */
519 if (!compat && dectmp_size == 2)
520 comps_used++;
521 }
522
523 static void
add_title(krb5_ui_4 code)524 add_title(krb5_ui_4 code)
525 {
526 krb5_ui_4 i, j;
527
528 /*
529 * Always map the code to itself.
530 */
531 cases[2] = code;
532
533 if (title_used == title_size) {
534 if (title_size == 0)
535 title = (_case_t *) malloc(sizeof(_case_t) << 3);
536 else
537 title = (_case_t *) realloc((char *) title,
538 sizeof(_case_t) * (title_size + 8));
539 title_size += 8;
540 }
541
542 /*
543 * Locate the insertion point.
544 */
545 for (i = 0; i < title_used && code > title[i].key; i++) ;
546
547 if (i < title_used) {
548 /*
549 * Shift the array up by one.
550 */
551 for (j = title_used; j > i; j--)
552 (void) memmove((char *) &title[j], (char *) &title[j - 1],
553 sizeof(_case_t));
554 }
555
556 title[i].key = cases[2]; /* Title */
557 title[i].other1 = cases[0]; /* Upper */
558 title[i].other2 = cases[1]; /* Lower */
559
560 title_used++;
561 }
562
563 static void
add_upper(krb5_ui_4 code)564 add_upper(krb5_ui_4 code)
565 {
566 krb5_ui_4 i, j;
567
568 /*
569 * Always map the code to itself.
570 */
571 cases[0] = code;
572
573 /*
574 * If the title case character is not present, then make it the same as
575 * the upper case.
576 */
577 if (cases[2] == 0)
578 cases[2] = code;
579
580 if (upper_used == upper_size) {
581 if (upper_size == 0)
582 upper = (_case_t *) malloc(sizeof(_case_t) << 3);
583 else
584 upper = (_case_t *) realloc((char *) upper,
585 sizeof(_case_t) * (upper_size + 8));
586 upper_size += 8;
587 }
588
589 /*
590 * Locate the insertion point.
591 */
592 for (i = 0; i < upper_used && code > upper[i].key; i++) ;
593
594 if (i < upper_used) {
595 /*
596 * Shift the array up by one.
597 */
598 for (j = upper_used; j > i; j--)
599 (void) memmove((char *) &upper[j], (char *) &upper[j - 1],
600 sizeof(_case_t));
601 }
602
603 upper[i].key = cases[0]; /* Upper */
604 upper[i].other1 = cases[1]; /* Lower */
605 upper[i].other2 = cases[2]; /* Title */
606
607 upper_used++;
608 }
609
610 static void
add_lower(krb5_ui_4 code)611 add_lower(krb5_ui_4 code)
612 {
613 krb5_ui_4 i, j;
614
615 /*
616 * Always map the code to itself.
617 */
618 cases[1] = code;
619
620 /*
621 * If the title case character is empty, then make it the same as the
622 * upper case.
623 */
624 if (cases[2] == 0)
625 cases[2] = cases[0];
626
627 if (lower_used == lower_size) {
628 if (lower_size == 0)
629 lower = (_case_t *) malloc(sizeof(_case_t) << 3);
630 else
631 lower = (_case_t *) realloc((char *) lower,
632 sizeof(_case_t) * (lower_size + 8));
633 lower_size += 8;
634 }
635
636 /*
637 * Locate the insertion point.
638 */
639 for (i = 0; i < lower_used && code > lower[i].key; i++) ;
640
641 if (i < lower_used) {
642 /*
643 * Shift the array up by one.
644 */
645 for (j = lower_used; j > i; j--)
646 (void) memmove((char *) &lower[j], (char *) &lower[j - 1],
647 sizeof(_case_t));
648 }
649
650 lower[i].key = cases[1]; /* Lower */
651 lower[i].other1 = cases[0]; /* Upper */
652 lower[i].other2 = cases[2]; /* Title */
653
654 lower_used++;
655 }
656
657 static void
ordered_ccl_insert(krb5_ui_4 c,krb5_ui_4 ccl_code)658 ordered_ccl_insert(krb5_ui_4 c, krb5_ui_4 ccl_code)
659 {
660 krb5_ui_4 i, j;
661
662 if (ccl_used == ccl_size) {
663 if (ccl_size == 0)
664 ccl = (krb5_ui_4 *) malloc(sizeof(krb5_ui_4) * 24);
665 else
666 ccl = (krb5_ui_4 *)
667 realloc((char *) ccl, sizeof(krb5_ui_4) * (ccl_size + 24));
668 ccl_size += 24;
669 }
670
671 /*
672 * Optimize adding the first item.
673 */
674 if (ccl_used == 0) {
675 ccl[0] = ccl[1] = c;
676 ccl[2] = ccl_code;
677 ccl_used += 3;
678 return;
679 }
680
681 /*
682 * Handle the special case of extending the range on the end. This
683 * requires that the combining class codes are the same.
684 */
685 if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
686 ccl[ccl_used - 2] = c;
687 return;
688 }
689
690 /*
691 * Handle the special case of adding another range on the end.
692 */
693 if (c > ccl[ccl_used - 2] + 1 ||
694 (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
695 ccl[ccl_used++] = c;
696 ccl[ccl_used++] = c;
697 ccl[ccl_used++] = ccl_code;
698 return;
699 }
700
701 /*
702 * Locate either the insertion point or range for the code.
703 */
704 for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
705
706 if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
707 /*
708 * Extend an existing range.
709 */
710 ccl[i + 1] = c;
711 return;
712 } else if (c < ccl[i]) {
713 /*
714 * Start a new range before the current location.
715 */
716 for (j = ccl_used; j > i; j -= 3) {
717 ccl[j] = ccl[j - 3];
718 ccl[j - 1] = ccl[j - 4];
719 ccl[j - 2] = ccl[j - 5];
720 }
721 ccl[i] = ccl[i + 1] = c;
722 ccl[i + 2] = ccl_code;
723 }
724 }
725
726 /*
727 * Adds a number if it does not already exist and returns an index value
728 * multiplied by 2.
729 */
730 static krb5_ui_4
make_number(short num,short denom)731 make_number(short num, short denom)
732 {
733 krb5_ui_4 n;
734
735 /*
736 * Determine if the number already exists.
737 */
738 for (n = 0; n < nums_used; n++) {
739 if (nums[n].numerator == num && nums[n].denominator == denom)
740 return n << 1;
741 }
742
743 if (nums_used == nums_size) {
744 if (nums_size == 0)
745 nums = (_num_t *) malloc(sizeof(_num_t) << 3);
746 else
747 nums = (_num_t *) realloc((char *) nums,
748 sizeof(_num_t) * (nums_size + 8));
749 nums_size += 8;
750 }
751
752 n = nums_used++;
753 nums[n].numerator = num;
754 nums[n].denominator = denom;
755
756 return n << 1;
757 }
758
759 static void
add_number(krb5_ui_4 code,short num,short denom)760 add_number(krb5_ui_4 code, short num, short denom)
761 {
762 krb5_ui_4 i, j;
763
764 /*
765 * Insert the code in order.
766 */
767 for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
768
769 /*
770 * Handle the case of the codes matching and simply replace the number
771 * that was there before.
772 */
773 if (i < ncodes_used && code == ncodes[i].code) {
774 ncodes[i].idx = make_number(num, denom);
775 return;
776 }
777
778 /*
779 * Resize the array if necessary.
780 */
781 if (ncodes_used == ncodes_size) {
782 if (ncodes_size == 0)
783 ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
784 else
785 ncodes = (_codeidx_t *)
786 realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
787
788 ncodes_size += 8;
789 }
790
791 /*
792 * Shift things around to insert the code if necessary.
793 */
794 if (i < ncodes_used) {
795 for (j = ncodes_used; j > i; j--) {
796 ncodes[j].code = ncodes[j - 1].code;
797 ncodes[j].idx = ncodes[j - 1].idx;
798 }
799 }
800 ncodes[i].code = code;
801 ncodes[i].idx = make_number(num, denom);
802
803 ncodes_used++;
804 }
805
806 /*
807 * This routine assumes that the line is a valid Unicode Character Database
808 * entry.
809 */
810 static void
read_cdata(FILE * in)811 read_cdata(FILE *in)
812 {
813 krb5_ui_4 i, lineno, skip, code, ccl_code;
814 short wnum, neg, number[2], compat;
815 char line[512], *s, *e;
816
817 lineno = skip = 0;
818 while (fgets(line, sizeof(line), in)) {
819 if( (s=strchr(line, '\n')) ) *s = '\0';
820 lineno++;
821
822 /*
823 * Skip blank lines and lines that start with a '#'.
824 */
825 if (line[0] == 0 || line[0] == '#')
826 continue;
827
828 /*
829 * If lines need to be skipped, do it here.
830 */
831 if (skip) {
832 skip--;
833 continue;
834 }
835
836 /*
837 * Collect the code. The code can be up to 6 hex digits in length to
838 * allow surrogates to be specified.
839 */
840 for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
841 code <<= 4;
842 if (*s >= '0' && *s <= '9')
843 code += *s - '0';
844 else if (*s >= 'A' && *s <= 'F')
845 code += (*s - 'A') + 10;
846 else if (*s >= 'a' && *s <= 'f')
847 code += (*s - 'a') + 10;
848 }
849
850 /*
851 * Handle the following special cases:
852 * 1. 4E00-9FA5 CJK Ideographs.
853 * 2. AC00-D7A3 Hangul Syllables.
854 * 3. D800-DFFF Surrogates.
855 * 4. E000-F8FF Private Use Area.
856 * 5. F900-FA2D Han compatibility.
857 * ...Plus additional ranges in newer Unicode versions...
858 */
859 switch (code) {
860 case 0x3400:
861 /* CJK Ideograph Extension A */
862 add_range(0x3400, 0x4db5, "Lo", "L");
863
864 add_range(0x3400, 0x4db5, "Cp", 0);
865
866 skip = 1;
867 break;
868 case 0x4e00:
869 /*
870 * The Han ideographs.
871 */
872 add_range(0x4e00, 0x9fff, "Lo", "L");
873
874 /*
875 * Add the characters to the defined category.
876 */
877 add_range(0x4e00, 0x9fa5, "Cp", 0);
878
879 skip = 1;
880 break;
881 case 0xac00:
882 /*
883 * The Hangul syllables.
884 */
885 add_range(0xac00, 0xd7a3, "Lo", "L");
886
887 /*
888 * Add the characters to the defined category.
889 */
890 add_range(0xac00, 0xd7a3, "Cp", 0);
891
892 skip = 1;
893 break;
894 case 0xd800:
895 /*
896 * Make a range of all surrogates and assume some default
897 * properties.
898 */
899 add_range(0x010000, 0x10ffff, "Cs", "L");
900 skip = 5;
901 break;
902 case 0xe000:
903 /*
904 * The Private Use area. Add with a default set of properties.
905 */
906 add_range(0xe000, 0xf8ff, "Co", "L");
907 skip = 1;
908 break;
909 case 0xf900:
910 /*
911 * The CJK compatibility area.
912 */
913 add_range(0xf900, 0xfaff, "Lo", "L");
914
915 /*
916 * Add the characters to the defined category.
917 */
918 add_range(0xf900, 0xfaff, "Cp", 0);
919
920 skip = 1;
921 break;
922 case 0x20000:
923 /* CJK Ideograph Extension B */
924 add_range(0x20000, 0x2a6d6, "Lo", "L");
925
926 add_range(0x20000, 0x2a6d6, "Cp", 0);
927
928 skip = 1;
929 break;
930 case 0xf0000:
931 /* Plane 15 private use */
932 add_range(0xf0000, 0xffffd, "Co", "L");
933 skip = 1;
934 break;
935
936 case 0x100000:
937 /* Plane 16 private use */
938 add_range(0x100000, 0x10fffd, "Co", "L");
939 skip = 1;
940 break;
941 }
942
943 if (skip)
944 continue;
945
946 /*
947 * Add the code to the defined category.
948 */
949 ordered_range_insert(code, "Cp", 2);
950
951 /*
952 * Locate the first character property field.
953 */
954 for (i = 0; *s != 0 && i < 2; s++) {
955 if (*s == ';')
956 i++;
957 }
958 for (e = s; *e && *e != ';'; e++) ;
959
960 ordered_range_insert(code, s, e - s);
961
962 /*
963 * Locate the combining class code.
964 */
965 for (s = e; *s != 0 && i < 3; s++) {
966 if (*s == ';')
967 i++;
968 }
969
970 /*
971 * Convert the combining class code from decimal.
972 */
973 for (ccl_code = 0, e = s; *e && *e != ';'; e++)
974 ccl_code = (ccl_code * 10) + (*e - '0');
975
976 /*
977 * Add the code if it not 0.
978 */
979 if (ccl_code != 0)
980 ordered_ccl_insert(code, ccl_code);
981
982 /*
983 * Locate the second character property field.
984 */
985 for (s = e; *s != 0 && i < 4; s++) {
986 if (*s == ';')
987 i++;
988 }
989 for (e = s; *e && *e != ';'; e++) ;
990
991 ordered_range_insert(code, s, e - s);
992
993 /*
994 * Check for a decomposition.
995 */
996 s = ++e;
997 if (*s != ';') {
998 compat = *s == '<';
999 if (compat) {
1000 /*
1001 * Skip compatibility formatting tag.
1002 */
1003 while (*s++ != '>');
1004 }
1005 /*
1006 * Collect the codes of the decomposition.
1007 */
1008 for (dectmp_size = 0; *s != ';'; ) {
1009 /*
1010 * Skip all leading non-hex digits.
1011 */
1012 while (!ishdigit(*s))
1013 s++;
1014
1015 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
1016 dectmp[dectmp_size] <<= 4;
1017 if (*s >= '0' && *s <= '9')
1018 dectmp[dectmp_size] += *s - '0';
1019 else if (*s >= 'A' && *s <= 'F')
1020 dectmp[dectmp_size] += (*s - 'A') + 10;
1021 else if (*s >= 'a' && *s <= 'f')
1022 dectmp[dectmp_size] += (*s - 'a') + 10;
1023 }
1024 dectmp_size++;
1025 }
1026
1027 /*
1028 * If there are any codes in the temporary decomposition array,
1029 * then add the character with its decomposition.
1030 */
1031 if (dectmp_size > 0) {
1032 if (!compat) {
1033 add_decomp(code, 0);
1034 }
1035 add_decomp(code, 1);
1036 }
1037 }
1038
1039 /*
1040 * Skip to the number field.
1041 */
1042 for (i = 0; i < 3 && *s; s++) {
1043 if (*s == ';')
1044 i++;
1045 }
1046
1047 /*
1048 * Scan the number in.
1049 */
1050 number[0] = number[1] = 0;
1051 for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
1052 if (*e == '-') {
1053 neg = 1;
1054 continue;
1055 }
1056
1057 if (*e == '/') {
1058 /*
1059 * Move the the denominator of the fraction.
1060 */
1061 if (neg)
1062 number[wnum] *= -1;
1063 neg = 0;
1064 e++;
1065 wnum++;
1066 }
1067 number[wnum] = (number[wnum] * 10) + (*e - '0');
1068 }
1069
1070 if (e > s) {
1071 /*
1072 * Adjust the denominator in case of integers and add the number.
1073 */
1074 if (wnum == 0)
1075 number[1] = 1;
1076
1077 add_number(code, number[0], number[1]);
1078 }
1079
1080 /*
1081 * Skip to the start of the possible case mappings.
1082 */
1083 for (s = e, i = 0; i < 4 && *s; s++) {
1084 if (*s == ';')
1085 i++;
1086 }
1087
1088 /*
1089 * Collect the case mappings.
1090 */
1091 cases[0] = cases[1] = cases[2] = 0;
1092 for (i = 0; i < 3; i++) {
1093 while (ishdigit(*s)) {
1094 cases[i] <<= 4;
1095 if (*s >= '0' && *s <= '9')
1096 cases[i] += *s - '0';
1097 else if (*s >= 'A' && *s <= 'F')
1098 cases[i] += (*s - 'A') + 10;
1099 else if (*s >= 'a' && *s <= 'f')
1100 cases[i] += (*s - 'a') + 10;
1101 s++;
1102 }
1103 if (*s == ';')
1104 s++;
1105 }
1106 if (cases[0] && cases[1])
1107 /*
1108 * Add the upper and lower mappings for a title case character.
1109 */
1110 add_title(code);
1111 else if (cases[1])
1112 /*
1113 * Add the lower and title case mappings for the upper case
1114 * character.
1115 */
1116 add_upper(code);
1117 else if (cases[0])
1118 /*
1119 * Add the upper and title case mappings for the lower case
1120 * character.
1121 */
1122 add_lower(code);
1123 }
1124 }
1125
1126 static _decomp_t *
find_decomp(krb5_ui_4 code,short compat)1127 find_decomp(krb5_ui_4 code, short compat)
1128 {
1129 long l, r, m;
1130 _decomp_t *decs;
1131
1132 l = 0;
1133 r = (compat ? kdecomps_used : decomps_used) - 1;
1134 decs = compat ? kdecomps : decomps;
1135 while (l <= r) {
1136 m = (l + r) >> 1;
1137 if (code > decs[m].code)
1138 l = m + 1;
1139 else if (code < decs[m].code)
1140 r = m - 1;
1141 else
1142 return &decs[m];
1143 }
1144 return 0;
1145 }
1146
1147 static void
decomp_it(_decomp_t * d,short compat)1148 decomp_it(_decomp_t *d, short compat)
1149 {
1150 krb5_ui_4 i;
1151 _decomp_t *dp;
1152
1153 for (i = 0; i < d->used; i++) {
1154 if ((dp = find_decomp(d->decomp[i], compat)) != 0)
1155 decomp_it(dp, compat);
1156 else
1157 dectmp[dectmp_size++] = d->decomp[i];
1158 }
1159 }
1160
1161 /*
1162 * Expand all decompositions by recursively decomposing each character
1163 * in the decomposition.
1164 */
1165 static void
expand_decomp(void)1166 expand_decomp(void)
1167 {
1168 krb5_ui_4 i;
1169
1170 for (i = 0; i < decomps_used; i++) {
1171 dectmp_size = 0;
1172 decomp_it(&decomps[i], 0);
1173 if (dectmp_size > 0)
1174 add_decomp(decomps[i].code, 0);
1175 }
1176
1177 for (i = 0; i < kdecomps_used; i++) {
1178 dectmp_size = 0;
1179 decomp_it(&kdecomps[i], 1);
1180 if (dectmp_size > 0)
1181 add_decomp(kdecomps[i].code, 1);
1182 }
1183 }
1184
1185 static int
cmpcomps(const void * v_comp1,const void * v_comp2)1186 cmpcomps(const void *v_comp1, const void *v_comp2)
1187 {
1188 const _comp_t *comp1 = v_comp1, *comp2 = v_comp2;
1189 long diff = comp1->code1 - comp2->code1;
1190
1191 if (!diff)
1192 diff = comp1->code2 - comp2->code2;
1193 return (int) diff;
1194 }
1195
1196 /*
1197 * Load composition exclusion data
1198 */
1199 static void
read_compexdata(FILE * in)1200 read_compexdata(FILE *in)
1201 {
1202 krb5_ui_2 i;
1203 krb5_ui_4 code;
1204 char line[512], *s;
1205
1206 (void) memset((char *) compexs, 0, sizeof(compexs));
1207
1208 while (fgets(line, sizeof(line), in)) {
1209 if( (s=strchr(line, '\n')) ) *s = '\0';
1210 /*
1211 * Skip blank lines and lines that start with a '#'.
1212 */
1213 if (line[0] == 0 || line[0] == '#')
1214 continue;
1215
1216 /*
1217 * Collect the code. Assume max 6 digits
1218 */
1219
1220 for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) {
1221 if (isspace((unsigned char)*s)) break;
1222 code <<= 4;
1223 if (*s >= '0' && *s <= '9')
1224 code += *s - '0';
1225 else if (*s >= 'A' && *s <= 'F')
1226 code += (*s - 'A') + 10;
1227 else if (*s >= 'a' && *s <= 'f')
1228 code += (*s - 'a') + 10;
1229 }
1230 COMPEX_SET(code);
1231 }
1232 }
1233
1234 /*
1235 * Creates array of compositions from decomposition array
1236 */
1237 static void
create_comps(void)1238 create_comps(void)
1239 {
1240 krb5_ui_4 i, cu;
1241
1242 comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
1243
1244 for (i = cu = 0; i < decomps_used; i++) {
1245 if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
1246 continue;
1247 comps[cu].comp = decomps[i].code;
1248 comps[cu].count = 2;
1249 comps[cu].code1 = decomps[i].decomp[0];
1250 comps[cu].code2 = decomps[i].decomp[1];
1251 cu++;
1252 }
1253 comps_used = cu;
1254 qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);
1255 }
1256
1257 #if HARDCODE_DATA
1258 static void
write_case(FILE * out,_case_t * tab,int num,int first)1259 write_case(FILE *out, _case_t *tab, int num, int first)
1260 {
1261 int i;
1262
1263 for (i=0; i<num; i++) {
1264 if (first) first = 0;
1265 else fprintf(out, ",");
1266 fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx",
1267 (unsigned long) tab[i].key, (unsigned long) tab[i].other1,
1268 (unsigned long) tab[i].other2);
1269 }
1270 }
1271
1272 #define PREF "static const "
1273
1274 #endif
1275
1276 static void
write_cdata(char * opath)1277 write_cdata(char *opath)
1278 {
1279 FILE *out;
1280 krb5_ui_4 bytes;
1281 krb5_ui_4 i, idx, nprops;
1282 #if !(HARDCODE_DATA)
1283 krb5_ui_2 casecnt[2];
1284 #endif
1285 char path[BUFSIZ];
1286 #if HARDCODE_DATA
1287 int j, k;
1288
1289 /*****************************************************************
1290 *
1291 * Generate the ctype data.
1292 *
1293 *****************************************************************/
1294
1295 /*
1296 * Open the output file.
1297 */
1298 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath);
1299 if ((out = fopen(path, "w")) == 0)
1300 return;
1301 #else
1302 /*
1303 * Open the ctype.dat file.
1304 */
1305 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
1306 if ((out = fopen(path, "wb")) == 0)
1307 return;
1308 #endif
1309
1310 /*
1311 * Collect the offsets for the properties. The offsets array is
1312 * on a 4-byte boundary to keep things efficient for architectures
1313 * that need such a thing.
1314 */
1315 for (i = idx = 0; i < NUMPROPS; i++) {
1316 propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1317 idx += proptbl[i].used;
1318 }
1319
1320 /*
1321 * Add the sentinel index which is used by the binary search as the upper
1322 * bound for a search.
1323 */
1324 propcnt[i] = idx;
1325
1326 /*
1327 * Record the actual number of property lists. This may be different than
1328 * the number of offsets actually written because of aligning on a 4-byte
1329 * boundary.
1330 */
1331 hdr[1] = NUMPROPS;
1332
1333 /*
1334 * Calculate the byte count needed and pad the property counts array to a
1335 * 4-byte boundary.
1336 */
1337 if ((bytes = sizeof(krb5_ui_2) * (NUMPROPS + 1)) & 3)
1338 bytes += 4 - (bytes & 3);
1339 nprops = bytes / sizeof(krb5_ui_2);
1340 bytes += sizeof(krb5_ui_4) * idx;
1341
1342 #if HARDCODE_DATA
1343 fprintf(out, PREF "krb5_ui_4 _ucprop_size = %d;\n\n", NUMPROPS);
1344
1345 fprintf(out, PREF "krb5_ui_2 _ucprop_offsets[] = {");
1346
1347 for (i = 0; i<nprops; i++) {
1348 if (i) fprintf(out, ",");
1349 if (!(i&7)) fprintf(out, "\n\t");
1350 else fprintf(out, " ");
1351 fprintf(out, "0x%04x", propcnt[i]);
1352 }
1353 fprintf(out, "\n};\n\n");
1354
1355 fprintf(out, PREF "krb5_ui_4 _ucprop_ranges[] = {");
1356
1357 k = 0;
1358 for (i = 0; i < NUMPROPS; i++) {
1359 if (proptbl[i].used > 0) {
1360 for (j=0; j<proptbl[i].used; j++) {
1361 if (k) fprintf(out, ",");
1362 if (!(k&3)) fprintf(out,"\n\t");
1363 else fprintf(out, " ");
1364 k++;
1365 fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]);
1366 }
1367 }
1368 }
1369 fprintf(out, "\n};\n\n");
1370 #else
1371 /*
1372 * Write the header.
1373 */
1374 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1375
1376 /*
1377 * Write the byte count.
1378 */
1379 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1380
1381 /*
1382 * Write the property list counts.
1383 */
1384 fwrite((char *) propcnt, sizeof(krb5_ui_2), nprops, out);
1385
1386 /*
1387 * Write the property lists.
1388 */
1389 for (i = 0; i < NUMPROPS; i++) {
1390 if (proptbl[i].used > 0)
1391 fwrite((char *) proptbl[i].ranges, sizeof(krb5_ui_4),
1392 proptbl[i].used, out);
1393 }
1394
1395 fclose(out);
1396 #endif
1397
1398 /*****************************************************************
1399 *
1400 * Generate the case mapping data.
1401 *
1402 *****************************************************************/
1403
1404 #if HARDCODE_DATA
1405 fprintf(out, PREF "krb5_ui_4 _uccase_size = %ld;\n\n",
1406 (long) (upper_used + lower_used + title_used));
1407
1408 fprintf(out, PREF "krb5_ui_2 _uccase_len[2] = {%ld, %ld};\n\n",
1409 (long) upper_used, (long) lower_used);
1410 fprintf(out, PREF "krb5_ui_4 _uccase_map[] = {");
1411
1412 if (upper_used > 0)
1413 /*
1414 * Write the upper case table.
1415 */
1416 write_case(out, upper, upper_used, 1);
1417
1418 if (lower_used > 0)
1419 /*
1420 * Write the lower case table.
1421 */
1422 write_case(out, lower, lower_used, !upper_used);
1423
1424 if (title_used > 0)
1425 /*
1426 * Write the title case table.
1427 */
1428 write_case(out, title, title_used, !(upper_used||lower_used));
1429
1430 if (!(upper_used || lower_used || title_used))
1431 fprintf(out, "\t0");
1432
1433 fprintf(out, "\n};\n\n");
1434 #else
1435 /*
1436 * Open the case.dat file.
1437 */
1438 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
1439 if ((out = fopen(path, "wb")) == 0)
1440 return;
1441
1442 /*
1443 * Write the case mapping tables.
1444 */
1445 hdr[1] = upper_used + lower_used + title_used;
1446 casecnt[0] = upper_used;
1447 casecnt[1] = lower_used;
1448
1449 /*
1450 * Write the header.
1451 */
1452 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1453
1454 /*
1455 * Write the upper and lower case table sizes.
1456 */
1457 fwrite((char *) casecnt, sizeof(krb5_ui_2), 2, out);
1458
1459 if (upper_used > 0)
1460 /*
1461 * Write the upper case table.
1462 */
1463 fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1464
1465 if (lower_used > 0)
1466 /*
1467 * Write the lower case table.
1468 */
1469 fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1470
1471 if (title_used > 0)
1472 /*
1473 * Write the title case table.
1474 */
1475 fwrite((char *) title, sizeof(_case_t), title_used, out);
1476
1477 fclose(out);
1478 #endif
1479
1480 /*****************************************************************
1481 *
1482 * Generate the composition data.
1483 *
1484 *****************************************************************/
1485
1486 /*
1487 * Create compositions from decomposition data
1488 */
1489 create_comps();
1490
1491 #if HARDCODE_DATA
1492 fprintf(out, PREF "krb5_ui_4 _uccomp_size = %ld;\n\n",
1493 comps_used * 4L);
1494
1495 fprintf(out, PREF "krb5_ui_4 _uccomp_data[] = {");
1496
1497 /*
1498 * Now, if comps exist, write them out.
1499 */
1500 if (comps_used > 0) {
1501 for (i=0; i<comps_used; i++) {
1502 if (i) fprintf(out, ",");
1503 fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx",
1504 (unsigned long) comps[i].comp, (unsigned long) comps[i].count,
1505 (unsigned long) comps[i].code1, (unsigned long) comps[i].code2);
1506 }
1507 } else {
1508 fprintf(out, "\t0");
1509 }
1510 fprintf(out, "\n};\n\n");
1511 #else
1512 /*
1513 * Open the comp.dat file.
1514 */
1515 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
1516 if ((out = fopen(path, "wb")) == 0)
1517 return;
1518
1519 /*
1520 * Write the header.
1521 */
1522 hdr[1] = (krb5_ui_2) comps_used * 4;
1523 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1524
1525 /*
1526 * Write out the byte count to maintain header size.
1527 */
1528 bytes = comps_used * sizeof(_comp_t);
1529 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1530
1531 /*
1532 * Now, if comps exist, write them out.
1533 */
1534 if (comps_used > 0)
1535 fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
1536
1537 fclose(out);
1538 #endif
1539
1540 /*****************************************************************
1541 *
1542 * Generate the decomposition data.
1543 *
1544 *****************************************************************/
1545
1546 /*
1547 * Fully expand all decompositions before generating the output file.
1548 */
1549 expand_decomp();
1550
1551 #if HARDCODE_DATA
1552 fprintf(out, PREF "krb5_ui_4 _ucdcmp_size = %ld;\n\n",
1553 decomps_used * 2L);
1554
1555 fprintf(out, PREF "krb5_ui_4 _ucdcmp_nodes[] = {");
1556
1557 if (decomps_used) {
1558 /*
1559 * Write the list of decomp nodes.
1560 */
1561 for (i = idx = 0; i < decomps_used; i++) {
1562 fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1563 (unsigned long) decomps[i].code, (unsigned long) idx);
1564 idx += decomps[i].used;
1565 }
1566
1567 /*
1568 * Write the sentinel index as the last decomp node.
1569 */
1570 fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1571
1572 fprintf(out, PREF "krb5_ui_4 _ucdcmp_decomp[] = {");
1573 /*
1574 * Write the decompositions themselves.
1575 */
1576 k = 0;
1577 for (i = 0; i < decomps_used; i++)
1578 for (j=0; j<decomps[i].used; j++) {
1579 if (k) fprintf(out, ",");
1580 if (!(k&3)) fprintf(out,"\n\t");
1581 else fprintf(out, " ");
1582 k++;
1583 fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]);
1584 }
1585 fprintf(out, "\n};\n\n");
1586 }
1587 #else
1588 /*
1589 * Open the decomp.dat file.
1590 */
1591 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
1592 if ((out = fopen(path, "wb")) == 0)
1593 return;
1594
1595 hdr[1] = decomps_used;
1596
1597 /*
1598 * Write the header.
1599 */
1600 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1601
1602 /*
1603 * Write a temporary byte count which will be calculated as the
1604 * decompositions are written out.
1605 */
1606 bytes = 0;
1607 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1608
1609 if (decomps_used) {
1610 /*
1611 * Write the list of decomp nodes.
1612 */
1613 for (i = idx = 0; i < decomps_used; i++) {
1614 fwrite((char *) &decomps[i].code, sizeof(krb5_ui_4), 1, out);
1615 fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1616 idx += decomps[i].used;
1617 }
1618
1619 /*
1620 * Write the sentinel index as the last decomp node.
1621 */
1622 fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1623
1624 /*
1625 * Write the decompositions themselves.
1626 */
1627 for (i = 0; i < decomps_used; i++)
1628 fwrite((char *) decomps[i].decomp, sizeof(krb5_ui_4),
1629 decomps[i].used, out);
1630
1631 /*
1632 * Seek back to the beginning and write the byte count.
1633 */
1634 bytes = (sizeof(krb5_ui_4) * idx) +
1635 (sizeof(krb5_ui_4) * ((hdr[1] << 1) + 1));
1636 fseek(out, sizeof(krb5_ui_2) << 1, 0L);
1637 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1638
1639 fclose(out);
1640 }
1641 #endif
1642
1643 #ifdef HARDCODE_DATA
1644 fprintf(out, PREF "krb5_ui_4 _uckdcmp_size = %ld;\n\n",
1645 kdecomps_used * 2L);
1646
1647 fprintf(out, PREF "krb5_ui_4 _uckdcmp_nodes[] = {");
1648
1649 if (kdecomps_used) {
1650 /*
1651 * Write the list of kdecomp nodes.
1652 */
1653 for (i = idx = 0; i < kdecomps_used; i++) {
1654 fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1655 (unsigned long) kdecomps[i].code, (unsigned long) idx);
1656 idx += kdecomps[i].used;
1657 }
1658
1659 /*
1660 * Write the sentinel index as the last decomp node.
1661 */
1662 fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1663
1664 fprintf(out, PREF "krb5_ui_4 _uckdcmp_decomp[] = {");
1665
1666 /*
1667 * Write the decompositions themselves.
1668 */
1669 k = 0;
1670 for (i = 0; i < kdecomps_used; i++)
1671 for (j=0; j<kdecomps[i].used; j++) {
1672 if (k) fprintf(out, ",");
1673 if (!(k&3)) fprintf(out,"\n\t");
1674 else fprintf(out, " ");
1675 k++;
1676 fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]);
1677 }
1678 fprintf(out, "\n};\n\n");
1679 }
1680 #else
1681 /*
1682 * Open the kdecomp.dat file.
1683 */
1684 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
1685 if ((out = fopen(path, "wb")) == 0)
1686 return;
1687
1688 hdr[1] = kdecomps_used;
1689
1690 /*
1691 * Write the header.
1692 */
1693 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1694
1695 /*
1696 * Write a temporary byte count which will be calculated as the
1697 * decompositions are written out.
1698 */
1699 bytes = 0;
1700 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1701
1702 if (kdecomps_used) {
1703 /*
1704 * Write the list of kdecomp nodes.
1705 */
1706 for (i = idx = 0; i < kdecomps_used; i++) {
1707 fwrite((char *) &kdecomps[i].code, sizeof(krb5_ui_4), 1, out);
1708 fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1709 idx += kdecomps[i].used;
1710 }
1711
1712 /*
1713 * Write the sentinel index as the last decomp node.
1714 */
1715 fwrite((char *) &idx, sizeof(krb5_ui_4), 1, out);
1716
1717 /*
1718 * Write the decompositions themselves.
1719 */
1720 for (i = 0; i < kdecomps_used; i++)
1721 fwrite((char *) kdecomps[i].decomp, sizeof(krb5_ui_4),
1722 kdecomps[i].used, out);
1723
1724 /*
1725 * Seek back to the beginning and write the byte count.
1726 */
1727 bytes = (sizeof(krb5_ui_4) * idx) +
1728 (sizeof(krb5_ui_4) * ((hdr[1] << 1) + 1));
1729 fseek(out, sizeof(krb5_ui_2) << 1, 0L);
1730 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1731
1732 fclose(out);
1733 }
1734 #endif
1735
1736 /*****************************************************************
1737 *
1738 * Generate the combining class data.
1739 *
1740 *****************************************************************/
1741 #ifdef HARDCODE_DATA
1742 fprintf(out, PREF "krb5_ui_4 _uccmcl_size = %ld;\n\n", (long) ccl_used);
1743
1744 fprintf(out, PREF "krb5_ui_4 _uccmcl_nodes[] = {");
1745
1746 if (ccl_used > 0) {
1747 /*
1748 * Write the combining class ranges out.
1749 */
1750 for (i = 0; i<ccl_used; i++) {
1751 if (i) fprintf(out, ",");
1752 if (!(i&3)) fprintf(out, "\n\t");
1753 else fprintf(out, " ");
1754 fprintf(out, "0x%08lx", (unsigned long) ccl[i]);
1755 }
1756 } else {
1757 fprintf(out, "\t0");
1758 }
1759 fprintf(out, "\n};\n\n");
1760 #else
1761 /*
1762 * Open the cmbcl.dat file.
1763 */
1764 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
1765 if ((out = fopen(path, "wb")) == 0)
1766 return;
1767
1768 /*
1769 * Set the number of ranges used. Each range has a combining class which
1770 * means each entry is a 3-tuple.
1771 */
1772 hdr[1] = ccl_used / 3;
1773
1774 /*
1775 * Write the header.
1776 */
1777 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1778
1779 /*
1780 * Write out the byte count to maintain header size.
1781 */
1782 bytes = ccl_used * sizeof(krb5_ui_4);
1783 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1784
1785 if (ccl_used > 0)
1786 /*
1787 * Write the combining class ranges out.
1788 */
1789 fwrite((char *) ccl, sizeof(krb5_ui_4), ccl_used, out);
1790
1791 fclose(out);
1792 #endif
1793
1794 /*****************************************************************
1795 *
1796 * Generate the number data.
1797 *
1798 *****************************************************************/
1799
1800 #if HARDCODE_DATA
1801 fprintf(out, PREF "krb5_ui_4 _ucnum_size = %lu;\n\n",
1802 (unsigned long)ncodes_used<<1);
1803
1804 fprintf(out, PREF "krb5_ui_4 _ucnum_nodes[] = {");
1805
1806 /*
1807 * Now, if number mappings exist, write them out.
1808 */
1809 if (ncodes_used > 0) {
1810 for (i = 0; i<ncodes_used; i++) {
1811 if (i) fprintf(out, ",");
1812 if (!(i&1)) fprintf(out, "\n\t");
1813 else fprintf(out, " ");
1814 fprintf(out, "0x%08lx, 0x%08lx",
1815 (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx);
1816 }
1817 fprintf(out, "\n};\n\n");
1818
1819 fprintf(out, PREF "short _ucnum_vals[] = {");
1820 for (i = 0; i<nums_used; i++) {
1821 if (i) fprintf(out, ",");
1822 if (!(i&3)) fprintf(out, "\n\t");
1823 else fprintf(out, " ");
1824 if (nums[i].numerator < 0) {
1825 fprintf(out, "%6d, 0x%04x",
1826 nums[i].numerator, nums[i].denominator);
1827 } else {
1828 fprintf(out, "0x%04x, 0x%04x",
1829 nums[i].numerator, nums[i].denominator);
1830 }
1831 }
1832 fprintf(out, "\n};\n\n");
1833 }
1834 #else
1835 /*
1836 * Open the num.dat file.
1837 */
1838 snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
1839 if ((out = fopen(path, "wb")) == 0)
1840 return;
1841
1842 /*
1843 * The count part of the header will be the total number of codes that
1844 * have numbers.
1845 */
1846 hdr[1] = (krb5_ui_2) (ncodes_used << 1);
1847 bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1848
1849 /*
1850 * Write the header.
1851 */
1852 fwrite((char *) hdr, sizeof(krb5_ui_2), 2, out);
1853
1854 /*
1855 * Write out the byte count to maintain header size.
1856 */
1857 fwrite((char *) &bytes, sizeof(krb5_ui_4), 1, out);
1858
1859 /*
1860 * Now, if number mappings exist, write them out.
1861 */
1862 if (ncodes_used > 0) {
1863 fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1864 fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1865 }
1866 #endif
1867
1868 fclose(out);
1869 }
1870
1871 static void
usage(char * prog)1872 usage(char *prog)
1873 {
1874 fprintf(stderr,
1875 "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
1876 fprintf(stderr, " datafile1 datafile2 ...\n\n");
1877 fprintf(stderr,
1878 "-o output-directory\n\t\tWrite the output files to a different");
1879 fprintf(stderr, " directory (default: .).\n");
1880 fprintf(stderr,
1881 "-x composition-exclusion\n\t\tFile of composition codes");
1882 fprintf(stderr, " that should be excluded.\n");
1883 exit(1);
1884 }
1885
1886 int
main(int argc,char * argv[])1887 main(int argc, char *argv[])
1888 {
1889 FILE *in;
1890 char *prog, *opath;
1891
1892 prog = lutil_progname( "ucgendat", argc, argv );
1893
1894 opath = 0;
1895 in = stdin;
1896
1897 argc--;
1898 argv++;
1899
1900 while (argc > 0) {
1901 if (argv[0][0] == '-') {
1902 switch (argv[0][1]) {
1903 case 'o':
1904 argc--;
1905 argv++;
1906 opath = argv[0];
1907 break;
1908 case 'x':
1909 argc--;
1910 argv++;
1911 if ((in = fopen(argv[0], "r")) == 0)
1912 fprintf(stderr,
1913 "%s: unable to open composition exclusion file %s\n",
1914 prog, argv[0]);
1915 else {
1916 read_compexdata(in);
1917 fclose(in);
1918 in = 0;
1919 }
1920 break;
1921 default:
1922 usage(prog);
1923 }
1924 } else {
1925 if (in != stdin && in != NULL)
1926 fclose(in);
1927 if ((in = fopen(argv[0], "r")) == 0)
1928 fprintf(stderr, "%s: unable to open ctype file %s\n",
1929 prog, argv[0]);
1930 else {
1931 read_cdata(in);
1932 fclose(in);
1933 in = 0;
1934 }
1935 }
1936 argc--;
1937 argv++;
1938 }
1939
1940 if (opath == 0)
1941 opath = ".";
1942 write_cdata(opath);
1943
1944 return 0;
1945 }
1946