xref: /freebsd/crypto/krb5/src/lib/krb5/unicode/ucdata/ucdata.c (revision 7f2fe78b9dd5f51c821d771b63d2e096f6fd49e9)
1 /*
2  * Copyright 1998-2008 The OpenLDAP Foundation.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted only as authorized by the OpenLDAP
7  * Public License.
8  *
9  * A copy of this license is available in file LICENSE in the
10  * top-level directory of the distribution or, alternatively, at
11  * <https://www.OpenLDAP.org/license.html>.
12  */
13 /* Copyright 2001 Computing Research Labs, New Mexico State University
14  *
15  * Permission is hereby granted, free of charge, to any person obtaining a
16  * copy of this software and associated documentation files (the "Software"),
17  * to deal in the Software without restriction, including without limitation
18  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19  * and/or sell copies of the Software, and to permit persons to whom the
20  * Software is furnished to do so, subject to the following conditions:
21  *
22  * The above copyright notice and this permission notice shall be included in
23  * all copies or substantial portions of the Software.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
28  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
29  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
30  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
31  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32  */
33 
34 /*
35  * This work is part of OpenLDAP Software <https://www.openldap.org/>.
36  * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucdata.c,v 1.36 2008/01/07 23:20:05 kurt Exp $
37  * $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $"
38  */
39 
40 #include "k5-int.h"
41 #include "k5-utf8.h"
42 #include "k5-unicode.h"
43 
44 #include "ucdata.h"
45 
46 #ifndef HARDCODE_DATA
47 #define	HARDCODE_DATA	1
48 #endif
49 
50 #if HARDCODE_DATA
51 #include "uctable.h"
52 #endif
53 
54 /**************************************************************************
55  *
56  * Miscellaneous types, data, and support functions.
57  *
58  **************************************************************************/
59 
60 typedef struct {
61     krb5_ui_2 bom;
62     krb5_ui_2 cnt;
63     union {
64         krb5_ui_4 bytes;
65         krb5_ui_2 len[2];
66     } size;
67 } _ucheader_t;
68 
69 /*
70  * A simple array of 32-bit masks for lookup.
71  */
72 static krb5_ui_4 masks32[32] = {
73 	0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
74 	0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
75 	0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
76 	0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
77 	0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
78 	0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
79 	0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
80 	0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
81 };
82 
83 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
84 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
85                         ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
86 
87 #if !HARDCODE_DATA
88 static FILE *
_ucopenfile(char * paths,char * filename,char * mode)89 _ucopenfile(char *paths, char *filename, char *mode)
90 {
91     FILE *f;
92     char *fp, *dp, *pp, path[BUFSIZ];
93 
94     if (filename == 0 || *filename == 0)
95       return 0;
96 
97     dp = paths;
98     while (dp && *dp) {
99         pp = path;
100         while (*dp && *dp != ':')
101           *pp++ = *dp++;
102         *pp++ = *LDAP_DIRSEP;
103 
104         fp = filename;
105         while (*fp)
106           *pp++ = *fp++;
107         *pp = 0;
108 
109         if ((f = fopen(path, mode)) != 0)
110           return f;
111 
112         if (*dp == ':')
113           dp++;
114     }
115 
116     return 0;
117 }
118 #endif
119 
120 /**************************************************************************
121  *
122  * Support for the character properties.
123  *
124  **************************************************************************/
125 
126 #if !HARDCODE_DATA
127 
128 static krb5_ui_4 _ucprop_size;
129 static krb5_ui_2 *_ucprop_offsets;
130 static krb5_ui_4 *_ucprop_ranges;
131 
132 /*
133  * Return -1 on error, 0 if okay
134  */
135 static int
_ucprop_load(char * paths,int reload)136 _ucprop_load(char *paths, int reload)
137 {
138     FILE *in;
139     krb5_ui_4 size, i;
140     _ucheader_t hdr;
141 
142     if (_ucprop_size > 0) {
143         if (!reload)
144           /*
145            * The character properties have already been loaded.
146            */
147           return 0;
148 
149         /*
150          * Unload the current character property data in preparation for
151          * loading a new copy.  Only the first array has to be deallocated
152          * because all the memory for the arrays is allocated as a single
153          * block.
154          */
155         free((char *) _ucprop_offsets);
156         _ucprop_size = 0;
157     }
158 
159     if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
160       return -1;
161 
162     /*
163      * Load the header.
164      */
165     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
166 
167     if (hdr.bom == 0xfffe) {
168         hdr.cnt = endian_short(hdr.cnt);
169         hdr.size.bytes = endian_long(hdr.size.bytes);
170     }
171 
172     if ((_ucprop_size = hdr.cnt) == 0) {
173         fclose(in);
174         return -1;
175     }
176 
177     /*
178      * Allocate all the storage needed for the lookup table.
179      */
180     _ucprop_offsets = (krb5_ui_2 *) malloc(hdr.size.bytes);
181 
182     /*
183      * Calculate the offset into the storage for the ranges.  The offsets
184      * array is on a 4-byte boundary and one larger than the value provided in
185      * the header count field.  This means the offset to the ranges must be
186      * calculated after aligning the count to a 4-byte boundary.
187      */
188     if ((size = ((hdr.cnt + 1) * sizeof(krb5_ui_2))) & 3)
189       size += 4 - (size & 3);
190     size >>= 1;
191     _ucprop_ranges = (krb5_ui_4 *) (_ucprop_offsets + size);
192 
193     /*
194      * Load the offset array.
195      */
196     fread((char *) _ucprop_offsets, sizeof(krb5_ui_2), size, in);
197 
198     /*
199      * Do an endian swap if necessary.  Don't forget there is an extra node on
200      * the end with the final index.
201      */
202     if (hdr.bom == 0xfffe) {
203         for (i = 0; i <= _ucprop_size; i++)
204           _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
205     }
206 
207     /*
208      * Load the ranges.  The number of elements is in the last array position
209      * of the offsets.
210      */
211     fread((char *) _ucprop_ranges, sizeof(krb5_ui_4),
212           _ucprop_offsets[_ucprop_size], in);
213 
214     fclose(in);
215 
216     /*
217      * Do an endian swap if necessary.
218      */
219     if (hdr.bom == 0xfffe) {
220         for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
221           _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
222     }
223     return 0;
224 }
225 
226 static void
_ucprop_unload(void)227 _ucprop_unload(void)
228 {
229     if (_ucprop_size == 0)
230       return;
231 
232     /*
233      * Only need to free the offsets because the memory is allocated as a
234      * single block.
235      */
236     free((char *) _ucprop_offsets);
237     _ucprop_size = 0;
238 }
239 #endif
240 
241 static int
_ucprop_lookup(krb5_ui_4 code,krb5_ui_4 n)242 _ucprop_lookup(krb5_ui_4 code, krb5_ui_4 n)
243 {
244     long l, r, m;
245 
246     if (_ucprop_size == 0)
247       return 0;
248 
249     /*
250      * There is an extra node on the end of the offsets to allow this routine
251      * to work right.  If the index is 0xffff, then there are no nodes for the
252      * property.
253      */
254     if ((l = _ucprop_offsets[n]) == 0xffff)
255       return 0;
256 
257     /*
258      * Locate the next offset that is not 0xffff.  The sentinel at the end of
259      * the array is the max index value.
260      */
261     for (m = 1;
262          n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
263 
264     r = _ucprop_offsets[n + m] - 1;
265 
266     while (l <= r) {
267         /*
268          * Determine a "mid" point and adjust to make sure the mid point is at
269          * the beginning of a range pair.
270          */
271         m = (l + r) >> 1;
272         m -= (m & 1);
273         if (code > _ucprop_ranges[m + 1])
274           l = m + 2;
275         else if (code < _ucprop_ranges[m])
276           r = m - 2;
277         else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
278           return 1;
279     }
280     return 0;
281 }
282 
283 int
ucisprop(krb5_ui_4 code,krb5_ui_4 mask1,krb5_ui_4 mask2)284 ucisprop(krb5_ui_4 code, krb5_ui_4 mask1, krb5_ui_4 mask2)
285 {
286     krb5_ui_4 i;
287 
288     if (mask1 == 0 && mask2 == 0)
289       return 0;
290 
291     for (i = 0; mask1 && i < 32; i++) {
292         if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
293           return 1;
294     }
295 
296     for (i = 32; mask2 && i < _ucprop_size; i++) {
297         if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
298           return 1;
299     }
300 
301     return 0;
302 }
303 
304 /**************************************************************************
305  *
306  * Support for case mapping.
307  *
308  **************************************************************************/
309 
310 #if !HARDCODE_DATA
311 
312 /* These record the number of slots in the map.
313  * There are 3 words per slot.
314  */
315 static krb5_ui_4 _uccase_size;
316 static krb5_ui_2 _uccase_len[2];
317 static krb5_ui_4 *_uccase_map;
318 
319 /*
320  * Return -1 on error, 0 if okay
321  */
322 static int
_uccase_load(char * paths,int reload)323 _uccase_load(char *paths, int reload)
324 {
325     FILE *in;
326     krb5_ui_4 i;
327     _ucheader_t hdr;
328 
329     if (_uccase_size > 0) {
330         if (!reload)
331           /*
332            * The case mappings have already been loaded.
333            */
334           return 0;
335 
336         free((char *) _uccase_map);
337         _uccase_size = 0;
338     }
339 
340     if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
341       return -1;
342 
343     /*
344      * Load the header.
345      */
346     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
347 
348     if (hdr.bom == 0xfffe) {
349         hdr.cnt = endian_short(hdr.cnt);
350         hdr.size.len[0] = endian_short(hdr.size.len[0]);
351         hdr.size.len[1] = endian_short(hdr.size.len[1]);
352     }
353 
354     /*
355      * Set the node count and lengths of the upper and lower case mapping
356      * tables.
357      */
358     _uccase_size = hdr.cnt;
359     _uccase_len[0] = hdr.size.len[0];
360     _uccase_len[1] = hdr.size.len[1];
361 
362     _uccase_map = (krb5_ui_4 *)
363         malloc(_uccase_size * 3 * sizeof(krb5_ui_4));
364 
365     /*
366      * Load the case mapping table.
367      */
368     fread((char *) _uccase_map, sizeof(krb5_ui_4), _uccase_size * 3, in);
369 
370     /*
371      * Do an endian swap if necessary.
372      */
373     if (hdr.bom == 0xfffe) {
374         for (i = 0; i < _uccase_size * 3; i++)
375           _uccase_map[i] = endian_long(_uccase_map[i]);
376     }
377     fclose(in);
378     return 0;
379 }
380 
381 static void
_uccase_unload(void)382 _uccase_unload(void)
383 {
384     if (_uccase_size == 0)
385       return;
386 
387     free((char *) _uccase_map);
388     _uccase_size = 0;
389 }
390 #endif
391 
392 static krb5_ui_4
_uccase_lookup(krb5_ui_4 code,long l,long r,int field)393 _uccase_lookup(krb5_ui_4 code, long l, long r, int field)
394 {
395     long m;
396 	const krb5_ui_4 *tmp;
397 
398     /*
399      * Do the binary search.
400      */
401     while (l <= r) {
402         /*
403          * Determine a "mid" point and adjust to make sure the mid point is at
404          * the beginning of a case mapping triple.
405          */
406         m = (l + r) >> 1;
407 		tmp = &_uccase_map[m*3];
408         if (code > *tmp)
409           l = m + 1;
410         else if (code < *tmp)
411           r = m - 1;
412         else if (code == *tmp)
413           return tmp[field];
414     }
415 
416     return code;
417 }
418 
419 krb5_ui_4
uctoupper(krb5_ui_4 code)420 uctoupper(krb5_ui_4 code)
421 {
422     int field;
423     long l, r;
424 
425     if (ucisupper(code))
426       return code;
427 
428     if (ucislower(code)) {
429         /*
430          * The character is lower case.
431          */
432         field = 2;
433         l = _uccase_len[0];
434         r = (l + _uccase_len[1]) - 1;
435     } else {
436         /*
437          * The character is title case.
438          */
439         field = 1;
440         l = _uccase_len[0] + _uccase_len[1];
441         r = _uccase_size - 1;
442     }
443     return _uccase_lookup(code, l, r, field);
444 }
445 
446 krb5_ui_4
uctolower(krb5_ui_4 code)447 uctolower(krb5_ui_4 code)
448 {
449     int field;
450     long l, r;
451 
452     if (ucislower(code))
453       return code;
454 
455     if (ucisupper(code)) {
456         /*
457          * The character is upper case.
458          */
459         field = 1;
460         l = 0;
461         r = _uccase_len[0] - 1;
462     } else {
463         /*
464          * The character is title case.
465          */
466         field = 2;
467         l = _uccase_len[0] + _uccase_len[1];
468         r = _uccase_size - 1;
469     }
470     return _uccase_lookup(code, l, r, field);
471 }
472 
473 krb5_ui_4
uctotitle(krb5_ui_4 code)474 uctotitle(krb5_ui_4 code)
475 {
476     int field;
477     long l, r;
478 
479     if (ucistitle(code))
480       return code;
481 
482     /*
483      * The offset will always be the same for converting to title case.
484      */
485     field = 2;
486 
487     if (ucisupper(code)) {
488         /*
489          * The character is upper case.
490          */
491         l = 0;
492         r = _uccase_len[0] - 1;
493     } else {
494         /*
495          * The character is lower case.
496          */
497         l = _uccase_len[0];
498         r = (l + _uccase_len[1]) - 1;
499     }
500     return _uccase_lookup(code, l, r, field);
501 }
502 
503 /**************************************************************************
504  *
505  * Support for compositions.
506  *
507  **************************************************************************/
508 
509 #if !HARDCODE_DATA
510 
511 static krb5_ui_4  _uccomp_size;
512 static krb5_ui_4 *_uccomp_data;
513 
514 /*
515  * Return -1 on error, 0 if okay
516  */
517 static int
_uccomp_load(char * paths,int reload)518 _uccomp_load(char *paths, int reload)
519 {
520     FILE *in;
521     krb5_ui_4 size, i;
522     _ucheader_t hdr;
523 
524     if (_uccomp_size > 0) {
525         if (!reload)
526             /*
527              * The compositions have already been loaded.
528              */
529             return 0;
530 
531         free((char *) _uccomp_data);
532         _uccomp_size = 0;
533     }
534 
535     if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
536         return -1;
537 
538     /*
539      * Load the header.
540      */
541     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
542 
543     if (hdr.bom == 0xfffe) {
544         hdr.cnt = endian_short(hdr.cnt);
545         hdr.size.bytes = endian_long(hdr.size.bytes);
546     }
547 
548     _uccomp_size = hdr.cnt;
549     _uccomp_data = (krb5_ui_4 *) malloc(hdr.size.bytes);
550 
551     /*
552      * Read the composition data in.
553      */
554     size = hdr.size.bytes / sizeof(krb5_ui_4);
555     fread((char *) _uccomp_data, sizeof(krb5_ui_4), size, in);
556 
557     /*
558      * Do an endian swap if necessary.
559      */
560     if (hdr.bom == 0xfffe) {
561         for (i = 0; i < size; i++)
562             _uccomp_data[i] = endian_long(_uccomp_data[i]);
563     }
564 
565     /*
566      * Assume that the data is ordered on count, so that all compositions
567      * of length 2 come first. Only handling length 2 for now.
568      */
569     for (i = 1; i < size; i += 4)
570       if (_uccomp_data[i] != 2)
571         break;
572     _uccomp_size = i - 1;
573 
574     fclose(in);
575     return 0;
576 }
577 
578 static void
_uccomp_unload(void)579 _uccomp_unload(void)
580 {
581     if (_uccomp_size == 0)
582         return;
583 
584     free((char *) _uccomp_data);
585     _uccomp_size = 0;
586 }
587 #endif
588 
589 int
uccomp(krb5_ui_4 node1,krb5_ui_4 node2,krb5_ui_4 * comp)590 uccomp(krb5_ui_4 node1, krb5_ui_4 node2, krb5_ui_4 *comp)
591 {
592     int l, r, m;
593 
594     l = 0;
595     r = _uccomp_size - 1;
596 
597     while (l <= r) {
598         m = ((r + l) >> 1);
599         m -= m & 3;
600         if (node1 > _uccomp_data[m+2])
601           l = m + 4;
602         else if (node1 < _uccomp_data[m+2])
603           r = m - 4;
604         else if (node2 > _uccomp_data[m+3])
605           l = m + 4;
606         else if (node2 < _uccomp_data[m+3])
607           r = m - 4;
608         else {
609             *comp = _uccomp_data[m];
610             return 1;
611         }
612     }
613     return 0;
614 }
615 
616 int
uccomp_hangul(krb5_ui_4 * str,int len)617 uccomp_hangul(krb5_ui_4 *str, int len)
618 {
619     const int SBase = 0xAC00, LBase = 0x1100,
620         VBase = 0x1161, TBase = 0x11A7,
621         LCount = 19, VCount = 21, TCount = 28,
622         NCount = VCount * TCount,   /* 588 */
623         SCount = LCount * NCount;   /* 11172 */
624 
625     int i, rlen;
626     krb5_ui_4 ch, last, lindex, sindex;
627 
628     last = str[0];
629     rlen = 1;
630     for ( i = 1; i < len; i++ ) {
631         ch = str[i];
632 
633         /* check if two current characters are L and V */
634         lindex = last - LBase;
635         if (lindex < (krb5_ui_4) LCount) {
636             krb5_ui_4 vindex = ch - VBase;
637             if (vindex < (krb5_ui_4) VCount) {
638                 /* make syllable of form LV */
639                 last = SBase + (lindex * VCount + vindex) * TCount;
640                 str[rlen-1] = last; /* reset last */
641                 continue;
642             }
643         }
644 
645         /* check if two current characters are LV and T */
646         sindex = last - SBase;
647         if (sindex < (krb5_ui_4) SCount
648 			&& (sindex % TCount) == 0)
649 		{
650             krb5_ui_4 tindex = ch - TBase;
651             if (tindex <= (krb5_ui_4) TCount) {
652                 /* make syllable of form LVT */
653                 last += tindex;
654                 str[rlen-1] = last; /* reset last */
655                 continue;
656             }
657         }
658 
659         /* if neither case was true, just add the character */
660         last = ch;
661         str[rlen] = ch;
662         rlen++;
663     }
664     return rlen;
665 }
666 
667 int
uccanoncomp(krb5_ui_4 * str,int len)668 uccanoncomp(krb5_ui_4 *str, int len)
669 {
670     int i, stpos, copos;
671     krb5_ui_4 cl, prevcl, st, ch, co;
672 
673     st = str[0];
674     stpos = 0;
675     copos = 1;
676     prevcl = uccombining_class(st) == 0 ? 0 : 256;
677 
678     for (i = 1; i < len; i++) {
679         ch = str[i];
680         cl = uccombining_class(ch);
681         if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
682           st = str[stpos] = co;
683         else {
684             if (cl == 0) {
685                 stpos = copos;
686                 st = ch;
687             }
688             prevcl = cl;
689             str[copos++] = ch;
690         }
691     }
692 
693     return uccomp_hangul(str, copos);
694 }
695 
696 /**************************************************************************
697  *
698  * Support for decompositions.
699  *
700  **************************************************************************/
701 
702 #if !HARDCODE_DATA
703 
704 static krb5_ui_4  _ucdcmp_size;
705 static krb5_ui_4 *_ucdcmp_nodes;
706 static krb5_ui_4 *_ucdcmp_decomp;
707 
708 static krb5_ui_4  _uckdcmp_size;
709 static krb5_ui_4 *_uckdcmp_nodes;
710 static krb5_ui_4 *_uckdcmp_decomp;
711 
712 /*
713  * Return -1 on error, 0 if okay
714  */
715 static int
_ucdcmp_load(char * paths,int reload)716 _ucdcmp_load(char *paths, int reload)
717 {
718     FILE *in;
719     krb5_ui_4 size, i;
720     _ucheader_t hdr;
721 
722     if (_ucdcmp_size > 0) {
723         if (!reload)
724             /*
725              * The decompositions have already been loaded.
726              */
727           return 0;
728 
729         free((char *) _ucdcmp_nodes);
730         _ucdcmp_size = 0;
731     }
732 
733     if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
734         return -1;
735 
736     /*
737      * Load the header.
738      */
739     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
740 
741     if (hdr.bom == 0xfffe) {
742         hdr.cnt = endian_short(hdr.cnt);
743         hdr.size.bytes = endian_long(hdr.size.bytes);
744     }
745 
746     _ucdcmp_size = hdr.cnt << 1;
747     _ucdcmp_nodes = (krb5_ui_4 *) malloc(hdr.size.bytes);
748     _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
749 
750     /*
751      * Read the decomposition data in.
752      */
753     size = hdr.size.bytes / sizeof(krb5_ui_4);
754     fread((char *) _ucdcmp_nodes, sizeof(krb5_ui_4), size, in);
755 
756     /*
757      * Do an endian swap if necessary.
758      */
759     if (hdr.bom == 0xfffe) {
760         for (i = 0; i < size; i++)
761             _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
762     }
763     fclose(in);
764     return 0;
765 }
766 
767 /*
768  * Return -1 on error, 0 if okay
769  */
770 static int
_uckdcmp_load(char * paths,int reload)771 _uckdcmp_load(char *paths, int reload)
772 {
773     FILE *in;
774     krb5_ui_4 size, i;
775     _ucheader_t hdr;
776 
777     if (_uckdcmp_size > 0) {
778         if (!reload)
779             /*
780              * The decompositions have already been loaded.
781              */
782           return 0;
783 
784         free((char *) _uckdcmp_nodes);
785         _uckdcmp_size = 0;
786     }
787 
788     if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
789         return -1;
790 
791     /*
792      * Load the header.
793      */
794     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
795 
796     if (hdr.bom == 0xfffe) {
797         hdr.cnt = endian_short(hdr.cnt);
798         hdr.size.bytes = endian_long(hdr.size.bytes);
799     }
800 
801     _uckdcmp_size = hdr.cnt << 1;
802     _uckdcmp_nodes = (krb5_ui_4 *) malloc(hdr.size.bytes);
803     _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
804 
805     /*
806      * Read the decomposition data in.
807      */
808     size = hdr.size.bytes / sizeof(krb5_ui_4);
809     fread((char *) _uckdcmp_nodes, sizeof(krb5_ui_4), size, in);
810 
811     /*
812      * Do an endian swap if necessary.
813      */
814     if (hdr.bom == 0xfffe) {
815         for (i = 0; i < size; i++)
816             _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
817     }
818     fclose(in);
819     return 0;
820 }
821 
822 static void
_ucdcmp_unload(void)823 _ucdcmp_unload(void)
824 {
825     if (_ucdcmp_size == 0)
826       return;
827 
828     /*
829      * Only need to free the offsets because the memory is allocated as a
830      * single block.
831      */
832     free((char *) _ucdcmp_nodes);
833     _ucdcmp_size = 0;
834 }
835 
836 static void
_uckdcmp_unload(void)837 _uckdcmp_unload(void)
838 {
839     if (_uckdcmp_size == 0)
840       return;
841 
842     /*
843      * Only need to free the offsets because the memory is allocated as a
844      * single block.
845      */
846     free((char *) _uckdcmp_nodes);
847     _uckdcmp_size = 0;
848 }
849 #endif
850 
851 int
ucdecomp(krb5_ui_4 code,krb5_ui_4 * num,krb5_ui_4 ** decomp)852 ucdecomp(krb5_ui_4 code, krb5_ui_4 *num, krb5_ui_4 **decomp)
853 {
854     long l, r, m;
855 
856     if (code < _ucdcmp_nodes[0]) {
857 	return 0;
858     }
859 
860     l = 0;
861     r = _ucdcmp_nodes[_ucdcmp_size] - 1;
862 
863     while (l <= r) {
864         /*
865          * Determine a "mid" point and adjust to make sure the mid point is at
866          * the beginning of a code+offset pair.
867          */
868         m = (l + r) >> 1;
869         m -= (m & 1);
870         if (code > _ucdcmp_nodes[m])
871           l = m + 2;
872         else if (code < _ucdcmp_nodes[m])
873           r = m - 2;
874         else if (code == _ucdcmp_nodes[m]) {
875             *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
876             *decomp = (krb5_ui_4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
877             return 1;
878         }
879     }
880     return 0;
881 }
882 
883 int
uckdecomp(krb5_ui_4 code,krb5_ui_4 * num,krb5_ui_4 ** decomp)884 uckdecomp(krb5_ui_4 code, krb5_ui_4 *num, krb5_ui_4 **decomp)
885 {
886     long l, r, m;
887 
888     if (code < _uckdcmp_nodes[0]) {
889 	return 0;
890     }
891 
892     l = 0;
893     r = _uckdcmp_nodes[_uckdcmp_size] - 1;
894 
895     while (l <= r) {
896         /*
897          * Determine a "mid" point and adjust to make sure the mid point is at
898          * the beginning of a code+offset pair.
899          */
900         m = (l + r) >> 1;
901         m -= (m & 1);
902         if (code > _uckdcmp_nodes[m])
903           l = m + 2;
904         else if (code < _uckdcmp_nodes[m])
905           r = m - 2;
906         else if (code == _uckdcmp_nodes[m]) {
907             *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
908             *decomp = (krb5_ui_4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
909             return 1;
910         }
911     }
912     return 0;
913 }
914 
915 int
ucdecomp_hangul(krb5_ui_4 code,krb5_ui_4 * num,krb5_ui_4 decomp[])916 ucdecomp_hangul(krb5_ui_4 code, krb5_ui_4 *num, krb5_ui_4 decomp[])
917 {
918     if (!ucishangul(code))
919       return 0;
920 
921     code -= 0xac00;
922     decomp[0] = 0x1100 + (krb5_ui_4) (code / 588);
923     decomp[1] = 0x1161 + (krb5_ui_4) ((code % 588) / 28);
924     decomp[2] = 0x11a7 + (krb5_ui_4) (code % 28);
925     *num = (decomp[2] != 0x11a7) ? 3 : 2;
926 
927     return 1;
928 }
929 
930 /* mode == 0 for canonical, mode == 1 for compatibility */
931 static int
uccanoncompatdecomp(const krb5_ui_4 * in,int inlen,krb5_ui_4 ** out,int * outlen,short mode)932 uccanoncompatdecomp(const krb5_ui_4 *in, int inlen,
933 		    krb5_ui_4 **out, int *outlen, short mode)
934 {
935     int l, size;
936 	unsigned i, j, k;
937     krb5_ui_4 num, class, *decomp, hangdecomp[3];
938 
939     size = inlen * 2;
940     *out = (krb5_ui_4 *) malloc(size * sizeof(**out));
941     if (*out == NULL)
942         return *outlen = -1;
943 
944     i = 0;
945     for (j = 0; j < (unsigned) inlen; j++) {
946 	if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
947             if ( size - i < num) {
948                 size = inlen + i - j + num - 1;
949                 *out = (krb5_ui_4 *) realloc(*out, size * sizeof(**out));
950                 if (*out == NULL)
951                     return *outlen = -1;
952             }
953             for (k = 0; k < num; k++) {
954                 class = uccombining_class(decomp[k]);
955                 if (class == 0) {
956                     (*out)[i] = decomp[k];
957                 } else {
958                     for (l = i; l > 0; l--)
959                         if (class >= uccombining_class((*out)[l-1]))
960                             break;
961                     memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out));
962                     (*out)[l] = decomp[k];
963                 }
964                 i++;
965             }
966         } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
967             if (size - i < num) {
968                 size = inlen + i - j + num - 1;
969                 *out = (krb5_ui_4 *) realloc(*out, size * sizeof(**out));
970                 if (*out == NULL)
971                     return *outlen = -1;
972             }
973             for (k = 0; k < num; k++) {
974                 (*out)[i] = hangdecomp[k];
975                 i++;
976             }
977         } else {
978             if (size - i < 1) {
979                 size = inlen + i - j;
980                 *out = (krb5_ui_4 *) realloc(*out, size * sizeof(**out));
981                 if (*out == NULL)
982                     return *outlen = -1;
983             }
984             class = uccombining_class(in[j]);
985             if (class == 0) {
986                 (*out)[i] = in[j];
987             } else {
988                 for (l = i; l > 0; l--)
989                     if (class >= uccombining_class((*out)[l-1]))
990                         break;
991                 memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out));
992                 (*out)[l] = in[j];
993             }
994             i++;
995         }
996     }
997     return *outlen = i;
998 }
999 
1000 int
uccanondecomp(const krb5_ui_4 * in,int inlen,krb5_ui_4 ** out,int * outlen)1001 uccanondecomp(const krb5_ui_4 *in, int inlen,
1002               krb5_ui_4 **out, int *outlen)
1003 {
1004     return uccanoncompatdecomp(in, inlen, out, outlen, 0);
1005 }
1006 
1007 int
uccompatdecomp(const krb5_ui_4 * in,int inlen,krb5_ui_4 ** out,int * outlen)1008 uccompatdecomp(const krb5_ui_4 *in, int inlen,
1009 	       krb5_ui_4 **out, int *outlen)
1010 {
1011     return uccanoncompatdecomp(in, inlen, out, outlen, 1);
1012 }
1013 
1014 /**************************************************************************
1015  *
1016  * Support for combining classes.
1017  *
1018  **************************************************************************/
1019 
1020 #if !HARDCODE_DATA
1021 static krb5_ui_4  _uccmcl_size;
1022 static krb5_ui_4 *_uccmcl_nodes;
1023 
1024 /*
1025  * Return -1 on error, 0 if okay
1026  */
1027 static int
_uccmcl_load(char * paths,int reload)1028 _uccmcl_load(char *paths, int reload)
1029 {
1030     FILE *in;
1031     krb5_ui_4 i;
1032     _ucheader_t hdr;
1033 
1034     if (_uccmcl_size > 0) {
1035         if (!reload)
1036             /*
1037              * The combining classes have already been loaded.
1038              */
1039             return 0;
1040 
1041         free((char *) _uccmcl_nodes);
1042         _uccmcl_size = 0;
1043     }
1044 
1045     if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
1046         return -1;
1047 
1048     /*
1049      * Load the header.
1050      */
1051     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1052 
1053     if (hdr.bom == 0xfffe) {
1054         hdr.cnt = endian_short(hdr.cnt);
1055         hdr.size.bytes = endian_long(hdr.size.bytes);
1056     }
1057 
1058     _uccmcl_size = hdr.cnt * 3;
1059     _uccmcl_nodes = (krb5_ui_4 *) malloc(hdr.size.bytes);
1060 
1061     /*
1062      * Read the combining classes in.
1063      */
1064     fread((char *) _uccmcl_nodes, sizeof(krb5_ui_4), _uccmcl_size, in);
1065 
1066     /*
1067      * Do an endian swap if necessary.
1068      */
1069     if (hdr.bom == 0xfffe) {
1070         for (i = 0; i < _uccmcl_size; i++)
1071             _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
1072     }
1073     fclose(in);
1074     return 0;
1075 }
1076 
1077 static void
_uccmcl_unload(void)1078 _uccmcl_unload(void)
1079 {
1080     if (_uccmcl_size == 0)
1081       return;
1082 
1083     free((char *) _uccmcl_nodes);
1084     _uccmcl_size = 0;
1085 }
1086 #endif
1087 
1088 krb5_ui_4
uccombining_class(krb5_ui_4 code)1089 uccombining_class(krb5_ui_4 code)
1090 {
1091     long l, r, m;
1092 
1093     l = 0;
1094     r = _uccmcl_size - 1;
1095 
1096     while (l <= r) {
1097         m = (l + r) >> 1;
1098         m -= (m % 3);
1099         if (code > _uccmcl_nodes[m + 1])
1100           l = m + 3;
1101         else if (code < _uccmcl_nodes[m])
1102           r = m - 3;
1103         else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
1104           return _uccmcl_nodes[m + 2];
1105     }
1106     return 0;
1107 }
1108 
1109 /**************************************************************************
1110  *
1111  * Support for numeric values.
1112  *
1113  **************************************************************************/
1114 
1115 #if !HARDCODE_DATA
1116 static krb5_ui_4 *_ucnum_nodes;
1117 static krb5_ui_4 _ucnum_size;
1118 static short *_ucnum_vals;
1119 
1120 /*
1121  * Return -1 on error, 0 if okay
1122  */
1123 static int
_ucnumb_load(char * paths,int reload)1124 _ucnumb_load(char *paths, int reload)
1125 {
1126     FILE *in;
1127     krb5_ui_4 size, i;
1128     _ucheader_t hdr;
1129 
1130     if (_ucnum_size > 0) {
1131         if (!reload)
1132           /*
1133            * The numbers have already been loaded.
1134            */
1135           return 0;
1136 
1137         free((char *) _ucnum_nodes);
1138         _ucnum_size = 0;
1139     }
1140 
1141     if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
1142       return -1;
1143 
1144     /*
1145      * Load the header.
1146      */
1147     fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1148 
1149     if (hdr.bom == 0xfffe) {
1150         hdr.cnt = endian_short(hdr.cnt);
1151         hdr.size.bytes = endian_long(hdr.size.bytes);
1152     }
1153 
1154     _ucnum_size = hdr.cnt;
1155     _ucnum_nodes = (krb5_ui_4 *) malloc(hdr.size.bytes);
1156     _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
1157 
1158     /*
1159      * Read the combining classes in.
1160      */
1161     fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1162 
1163     /*
1164      * Do an endian swap if necessary.
1165      */
1166     if (hdr.bom == 0xfffe) {
1167         for (i = 0; i < _ucnum_size; i++)
1168           _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1169 
1170         /*
1171          * Determine the number of values that have to be adjusted.
1172          */
1173         size = (hdr.size.bytes -
1174                 (_ucnum_size * (sizeof(krb5_ui_4) << 1))) /
1175             sizeof(short);
1176 
1177         for (i = 0; i < size; i++)
1178           _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1179     }
1180     fclose(in);
1181     return 0;
1182 }
1183 
1184 static void
_ucnumb_unload(void)1185 _ucnumb_unload(void)
1186 {
1187     if (_ucnum_size == 0)
1188       return;
1189 
1190     free((char *) _ucnum_nodes);
1191     _ucnum_size = 0;
1192 }
1193 #endif
1194 
1195 int
ucnumber_lookup(krb5_ui_4 code,struct ucnumber * num)1196 ucnumber_lookup(krb5_ui_4 code, struct ucnumber *num)
1197 {
1198     long l, r, m;
1199     short *vp;
1200 
1201     l = 0;
1202     r = _ucnum_size - 1;
1203     while (l <= r) {
1204         /*
1205          * Determine a "mid" point and adjust to make sure the mid point is at
1206          * the beginning of a code+offset pair.
1207          */
1208         m = (l + r) >> 1;
1209         m -= (m & 1);
1210         if (code > _ucnum_nodes[m])
1211           l = m + 2;
1212         else if (code < _ucnum_nodes[m])
1213           r = m - 2;
1214         else {
1215             vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
1216             num->numerator = (int) *vp++;
1217             num->denominator = (int) *vp;
1218             return 1;
1219         }
1220     }
1221     return 0;
1222 }
1223 
1224 int
ucdigit_lookup(krb5_ui_4 code,int * digit)1225 ucdigit_lookup(krb5_ui_4 code, int *digit)
1226 {
1227     long l, r, m;
1228     short *vp;
1229 
1230     l = 0;
1231     r = _ucnum_size - 1;
1232     while (l <= r) {
1233         /*
1234          * Determine a "mid" point and adjust to make sure the mid point is at
1235          * the beginning of a code+offset pair.
1236          */
1237         m = (l + r) >> 1;
1238         m -= (m & 1);
1239         if (code > _ucnum_nodes[m])
1240           l = m + 2;
1241         else if (code < _ucnum_nodes[m])
1242           r = m - 2;
1243         else {
1244             vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
1245             if (*vp == *(vp + 1)) {
1246               *digit = *vp;
1247               return 1;
1248             }
1249             return 0;
1250         }
1251     }
1252     return 0;
1253 }
1254 
1255 struct ucnumber
ucgetnumber(krb5_ui_4 code)1256 ucgetnumber(krb5_ui_4 code)
1257 {
1258     struct ucnumber num;
1259 
1260     /*
1261      * Initialize with some arbitrary value, because the caller simply cannot
1262      * tell for sure if the code is a number without calling the ucisnumber()
1263      * macro before calling this function.
1264      */
1265     num.numerator = num.denominator = -111;
1266 
1267     (void) ucnumber_lookup(code, &num);
1268 
1269     return num;
1270 }
1271 
1272 int
ucgetdigit(krb5_ui_4 code)1273 ucgetdigit(krb5_ui_4 code)
1274 {
1275     int dig;
1276 
1277     /*
1278      * Initialize with some arbitrary value, because the caller simply cannot
1279      * tell for sure if the code is a number without calling the ucisdigit()
1280      * macro before calling this function.
1281      */
1282     dig = -111;
1283 
1284     (void) ucdigit_lookup(code, &dig);
1285 
1286     return dig;
1287 }
1288 
1289 /**************************************************************************
1290  *
1291  * Setup and cleanup routines.
1292  *
1293  **************************************************************************/
1294 
1295 #if HARDCODE_DATA
ucdata_load(char * paths,int masks)1296 int ucdata_load(char *paths, int masks) { return 0; }
ucdata_unload(int masks)1297 void ucdata_unload(int masks) { }
ucdata_reload(char * paths,int masks)1298 int ucdata_reload(char *paths, int masks) { return 0; }
1299 #else
1300 /*
1301  * Return 0 if okay, negative on error
1302  */
1303 int
ucdata_load(char * paths,int masks)1304 ucdata_load(char *paths, int masks)
1305 {
1306     int error = 0;
1307 
1308     if (masks & UCDATA_CTYPE)
1309       error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1310     if (masks & UCDATA_CASE)
1311       error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1312     if (masks & UCDATA_DECOMP)
1313       error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1314     if (masks & UCDATA_CMBCL)
1315       error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1316     if (masks & UCDATA_NUM)
1317       error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1318     if (masks & UCDATA_COMP)
1319       error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1320     if (masks & UCDATA_KDECOMP)
1321       error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
1322 
1323     return -error;
1324 }
1325 
1326 void
ucdata_unload(int masks)1327 ucdata_unload(int masks)
1328 {
1329     if (masks & UCDATA_CTYPE)
1330       _ucprop_unload();
1331     if (masks & UCDATA_CASE)
1332       _uccase_unload();
1333     if (masks & UCDATA_DECOMP)
1334       _ucdcmp_unload();
1335     if (masks & UCDATA_CMBCL)
1336       _uccmcl_unload();
1337     if (masks & UCDATA_NUM)
1338       _ucnumb_unload();
1339     if (masks & UCDATA_COMP)
1340       _uccomp_unload();
1341     if (masks & UCDATA_KDECOMP)
1342       _uckdcmp_unload();
1343 }
1344 
1345 /*
1346  * Return 0 if okay, negative on error
1347  */
1348 int
ucdata_reload(char * paths,int masks)1349 ucdata_reload(char *paths, int masks)
1350 {
1351     int error = 0;
1352 
1353     if (masks & UCDATA_CTYPE)
1354         error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1355     if (masks & UCDATA_CASE)
1356         error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1357     if (masks & UCDATA_DECOMP)
1358         error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1359     if (masks & UCDATA_CMBCL)
1360         error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1361     if (masks & UCDATA_NUM)
1362         error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1363     if (masks & UCDATA_COMP)
1364         error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1365     if (masks & UCDATA_KDECOMP)
1366         error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
1367 
1368     return -error;
1369 }
1370 #endif
1371 
1372 #ifdef TEST
1373 
1374 void
main(void)1375 main(void)
1376 {
1377     int dig;
1378     krb5_ui_4 i, lo, *dec;
1379     struct ucnumber num;
1380 
1381 /*    ucdata_setup("."); */
1382 
1383     if (ucisweak(0x30))
1384       printf("WEAK\n");
1385     else
1386       printf("NOT WEAK\n");
1387 
1388     printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1389     printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1390 
1391     if (ucisalpha(0x1d5))
1392       printf("ALPHA\n");
1393     else
1394       printf("NOT ALPHA\n");
1395 
1396     if (ucisupper(0x1d5)) {
1397         printf("UPPER\n");
1398         lo = uctolower(0x1d5);
1399         printf("0x%04lx\n", lo);
1400         lo = uctotitle(0x1d5);
1401         printf("0x%04lx\n", lo);
1402     } else
1403       printf("NOT UPPER\n");
1404 
1405     if (ucistitle(0x1d5))
1406       printf("TITLE\n");
1407     else
1408       printf("NOT TITLE\n");
1409 
1410     if (uciscomposite(0x1d5))
1411       printf("COMPOSITE\n");
1412     else
1413       printf("NOT COMPOSITE\n");
1414 
1415     if (ucdecomp(0x1d5, &lo, &dec)) {
1416         for (i = 0; i < lo; i++)
1417           printf("0x%04lx ", dec[i]);
1418         putchar('\n');
1419     }
1420 
1421     if ((lo = uccombining_class(0x41)) != 0)
1422       printf("0x41 CCL %ld\n", lo);
1423 
1424     if (ucisxdigit(0xfeff))
1425       printf("0xFEFF HEX DIGIT\n");
1426     else
1427       printf("0xFEFF NOT HEX DIGIT\n");
1428 
1429     if (ucisdefined(0x10000))
1430       printf("0x10000 DEFINED\n");
1431     else
1432       printf("0x10000 NOT DEFINED\n");
1433 
1434     if (ucnumber_lookup(0x30, &num)) {
1435         if (num.denominator != 1)
1436           printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1437         else
1438           printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1439     } else
1440       printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1441 
1442     if (ucnumber_lookup(0xbc, &num)) {
1443         if (num.denominator != 1)
1444           printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1445         else
1446           printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1447     } else
1448       printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1449 
1450 
1451     if (ucnumber_lookup(0xff19, &num)) {
1452         if (num.denominator != 1)
1453           printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1454         else
1455           printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1456     } else
1457       printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1458 
1459     if (ucnumber_lookup(0x4e00, &num)) {
1460         if (num.denominator != 1)
1461           printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1462         else
1463           printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1464     } else
1465       printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1466 
1467     if (ucdigit_lookup(0x06f9, &dig))
1468       printf("UCDIGIT: 0x6f9 = %d\n", dig);
1469     else
1470       printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1471 
1472     dig = ucgetdigit(0x0969);
1473     printf("UCGETDIGIT: 0x969 = %d\n", dig);
1474 
1475     num = ucgetnumber(0x30);
1476     if (num.denominator != 1)
1477       printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1478     else
1479       printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1480 
1481     num = ucgetnumber(0xbc);
1482     if (num.denominator != 1)
1483       printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1484     else
1485       printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1486 
1487     num = ucgetnumber(0xff19);
1488     if (num.denominator != 1)
1489       printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1490     else
1491       printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);
1492 
1493 /*    ucdata_cleanup(); */
1494     exit(0);
1495 }
1496 
1497 #endif /* TEST */
1498