1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23
24 /*
25 * locale state implementation
26 */
27
28 #include "lclib.h"
29 #include "lclang.h"
30
31 #include <ctype.h>
32
33 static Lc_numeric_t default_numeric = { '.', -1 };
34
35 static Lc_t default_lc =
36 {
37 "C",
38 "POSIX",
39 &lc_languages[0],
40 &lc_territories[0],
41 &lc_charsets[0],
42 0,
43 LC_default|LC_checked|LC_local,
44 0,
45 {
46 { &default_lc, 0, 0 },
47 { &default_lc, 0, 0 },
48 { &default_lc, 0, 0 },
49 { &default_lc, 0, 0 },
50 { &default_lc, 0, 0 },
51 { &default_lc, 0, (void*)&default_numeric },
52 { &default_lc, 0, 0 },
53 { &default_lc, 0, 0 },
54 { &default_lc, 0, 0 },
55 { &default_lc, 0, 0 },
56 { &default_lc, 0, 0 },
57 { &default_lc, 0, 0 },
58 { &default_lc, 0, 0 },
59 { &default_lc, 0, 0 }
60 }
61 };
62
63 static Lc_numeric_t debug_numeric = { ',', '.' };
64
65 static Lc_t debug_lc =
66 {
67 "debug",
68 "debug",
69 &lc_languages[1],
70 &lc_territories[1],
71 &lc_charsets[0],
72 0,
73 LC_debug|LC_checked|LC_local,
74 0,
75 {
76 { &debug_lc, 0, 0 },
77 { &debug_lc, 0, 0 },
78 { &debug_lc, 0, 0 },
79 { &debug_lc, 0, 0 },
80 { &debug_lc, 0, 0 },
81 { &debug_lc, 0, (void*)&debug_numeric },
82 { &debug_lc, 0, 0 },
83 { &debug_lc, 0, 0 },
84 { &debug_lc, 0, 0 },
85 { &debug_lc, 0, 0 },
86 { &debug_lc, 0, 0 },
87 { &debug_lc, 0, 0 },
88 { &debug_lc, 0, 0 },
89 { &debug_lc, 0, 0 }
90 },
91 &default_lc
92 };
93
94 static Lc_t* lcs = &debug_lc;
95
96 Lc_t* locales[] =
97 {
98 &default_lc,
99 &default_lc,
100 &default_lc,
101 &default_lc,
102 &default_lc,
103 &default_lc,
104 &default_lc,
105 &default_lc,
106 &default_lc,
107 &default_lc,
108 &default_lc,
109 &default_lc,
110 &default_lc,
111 &default_lc
112 };
113
114 /*
115 * return the internal category index for category
116 */
117
118 int
lcindex(int category,int min)119 lcindex(int category, int min)
120 {
121 switch (category)
122 {
123 case LC_ALL: return min ? -1 : AST_LC_ALL;
124 case LC_ADDRESS: return AST_LC_ADDRESS;
125 case LC_COLLATE: return AST_LC_COLLATE;
126 case LC_CTYPE: return AST_LC_CTYPE;
127 case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION;
128 case LC_LANG: return AST_LC_LANG;
129 case LC_MEASUREMENT: return AST_LC_MEASUREMENT;
130 case LC_MESSAGES: return AST_LC_MESSAGES;
131 case LC_MONETARY: return AST_LC_MONETARY;
132 case LC_NAME: return AST_LC_NAME;
133 case LC_NUMERIC: return AST_LC_NUMERIC;
134 case LC_PAPER: return AST_LC_PAPER;
135 case LC_TELEPHONE: return AST_LC_TELEPHONE;
136 case LC_TIME: return AST_LC_TIME;
137 case LC_XLITERATE: return AST_LC_XLITERATE;
138 }
139 return -1;
140 }
141
142 /*
143 * return the first category table entry
144 */
145
146 Lc_category_t*
lccategories(void)147 lccategories(void)
148 {
149 return (Lc_category_t*)&lc_categories[0];
150 }
151
152 /*
153 * return the current info for category
154 */
155
156 Lc_info_t*
lcinfo(register int category)157 lcinfo(register int category)
158 {
159 if ((category = lcindex(category, 0)) < 0)
160 return 0;
161 return LCINFO(category);
162 }
163
164 /*
165 * return 1 if s matches the alternation pattern p
166 * if minimum!=0 then at least that many chars must match
167 * if standard!=0 and s[0] is a digit leading non-digits are ignored in p
168 */
169
170 static int
match(const char * s,register const char * p,int minimum,int standard)171 match(const char* s, register const char* p, int minimum, int standard)
172 {
173 register const char* t;
174 const char* x;
175 int w;
176 int z;
177
178 z = 0;
179 do
180 {
181 t = s;
182 if (standard)
183 {
184 if (isdigit(*t))
185 while (*p && !isdigit(*p))
186 p++;
187 else if (isdigit(*p))
188 while (*t && !isdigit(*t))
189 t++;
190 }
191 if (*p)
192 {
193 w = 0;
194 x = p;
195 while (*p && *p != '|')
196 {
197 if (!*t || *t == ',')
198 break;
199 else if (*t == *p)
200 /*ok*/;
201 else if (*t == '-')
202 {
203 if (standard && isdigit(*p))
204 {
205 t++;
206 continue;
207 }
208 while (*p && *p != '-')
209 p++;
210 if (!*p)
211 break;
212 }
213 else if (*p == '-')
214 {
215 if (standard && isdigit(*t))
216 {
217 p++;
218 continue;
219 }
220 w = 1;
221 while (*t && *t != '-')
222 t++;
223 if (!*t)
224 break;
225 }
226 else
227 break;
228 t++;
229 p++;
230 }
231 if ((!*t || *t == ',') && (!*p || *p == '|' || w))
232 return p - x;
233 if (minimum && z < (p - x) && (p - x) >= minimum)
234 z = p - x;
235 }
236 while (*p && *p != '|')
237 p++;
238 } while (*p++);
239 return z;
240 }
241
242 /*
243 * return 1 if s matches the charset names in cp
244 */
245
246 static int
match_charset(register const char * s,register const Lc_charset_t * cp)247 match_charset(register const char* s, register const Lc_charset_t* cp)
248 {
249 return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1);
250 }
251
252 /*
253 * low level for lccanon
254 */
255
256 static size_t
canonical(const Lc_language_t * lp,const Lc_territory_t * tp,const Lc_charset_t * cp,const Lc_attribute_list_t * ap,unsigned long flags,char * buf,size_t siz)257 canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz)
258 {
259 register int c;
260 register int u;
261 register char* s;
262 register char* e;
263 register const char* t;
264
265 if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose)))
266 flags |= LC_abbreviated;
267 s = buf;
268 e = &buf[siz - 3];
269 if (lp)
270 {
271 if (lp->flags & (LC_debug|LC_default))
272 {
273 for (t = lp->code; s < e && (*s = *t++); s++);
274 *s++ = 0;
275 return s - buf;
276 }
277 if (flags & LC_verbose)
278 {
279 u = 1;
280 t = lp->name;
281 while (s < e && (c = *t++))
282 {
283 if (u)
284 {
285 u = 0;
286 c = toupper(c);
287 }
288 else if (!isalnum(c))
289 u = 1;
290 *s++ = c;
291 }
292 }
293 else
294 for (t = lp->code; s < e && (*s = *t++); s++);
295 }
296 if (s < e)
297 {
298 if (tp && tp != &lc_territories[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code)))
299 {
300 if (lp)
301 *s++ = '_';
302 if (flags & LC_verbose)
303 {
304 u = 1;
305 t = tp->name;
306 while (s < e && (c = *t++) && c != '|')
307 {
308 if (u)
309 {
310 u = 0;
311 c = toupper(c);
312 }
313 else if (!isalnum(c))
314 u = 1;
315 *s++ = c;
316 }
317 }
318 else
319 for (t = tp->code; s < e && (*s = toupper(*t++)); s++);
320 }
321 if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e)
322 {
323 *s++ = '.';
324 for (t = cp->code; s < e && (c = *t++); s++)
325 {
326 if (islower(c))
327 c = toupper(c);
328 *s = c;
329 }
330 }
331 for (c = '@'; ap && s < e; ap = ap->next)
332 if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default))
333 {
334 *s++ = c;
335 c = ',';
336 for (t = ap->attribute->name; s < e && (*s = *t++); s++);
337 }
338 }
339 *s++ = 0;
340 return s - buf;
341 }
342
343 /*
344 * generate a canonical locale name in buf
345 */
346
347 size_t
lccanon(Lc_t * lc,unsigned long flags,char * buf,size_t siz)348 lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz)
349 {
350 if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default))))
351 {
352 #if _WINIX
353 char lang[64];
354 char code[64];
355 char ctry[64];
356
357 if (lc->index &&
358 GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) &&
359 GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry)))
360 {
361 if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code)))
362 code[0] = 0;
363 if (!lc->charset || !lc->charset->ms)
364 return sfsprintf(buf, siz, "%s_%s", lang, ctry);
365 else if (streq(lc->charset->ms, code))
366 return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code);
367 else
368 return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms);
369 }
370 #endif
371 buf[0] = '-';
372 buf[1] = 0;
373 return 0;
374 }
375 return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz);
376 }
377
378 /*
379 * make an Lc_t from a locale name
380 */
381
382 Lc_t*
lcmake(const char * name)383 lcmake(const char* name)
384 {
385 register int c;
386 register char* s;
387 register char* e;
388 register const char* t;
389 const char* a;
390 char* w;
391 char* language_name;
392 char* territory_name;
393 char* charset_name;
394 char* attributes_name;
395 Lc_t* lc;
396 const Lc_map_t* mp;
397 const Lc_language_t* lp;
398 const Lc_territory_t* tp;
399 const Lc_territory_t* tpb;
400 const Lc_territory_t* primary;
401 const Lc_charset_t* cp;
402 const Lc_charset_t* ppa;
403 const Lc_attribute_t* ap;
404 Lc_attribute_list_t* ai;
405 Lc_attribute_list_t* al;
406 int i;
407 int n;
408 int z;
409 char buf[PATH_MAX / 2];
410 char tmp[PATH_MAX / 2];
411
412 if (!(t = name) || !*t)
413 return &default_lc;
414 for (lc = lcs; lc; lc = lc->next)
415 if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name))
416 return lc;
417 for (mp = lc_maps; mp->code; mp++)
418 if (streq(t, mp->code))
419 {
420 lp = mp->language;
421 tp = mp->territory;
422 cp = mp->charset;
423 if (!mp->attribute)
424 al = 0;
425 else if (al = newof(0, Lc_attribute_list_t, 1, 0))
426 al->attribute = mp->attribute;
427 goto mapped;
428 }
429 language_name = buf;
430 territory_name = charset_name = attributes_name = 0;
431 s = buf;
432 e = &buf[sizeof(buf)-2];
433 a = 0;
434 n = 0;
435 while (s < e && (c = *t++))
436 {
437 if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n)
438 {
439 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n))
440 if (!c)
441 break;
442 if (isalnum(c) && !n)
443 *s++ = '-';
444 else
445 {
446 n = 0;
447 if (!a)
448 {
449 a = t - 1;
450 while (c && c != '_' && c != '.' && c != '@')
451 c = *t++;
452 if (!c)
453 break;
454 }
455 }
456 }
457 if (c == '_' && !territory_name)
458 {
459 *s++ = 0;
460 territory_name = s;
461 }
462 else if (c == '.' && !charset_name)
463 {
464 *s++ = 0;
465 charset_name = s;
466 }
467 else if (c == '@' && !attributes_name)
468 {
469 *s++ = 0;
470 attributes_name = s;
471 }
472 else
473 {
474 if (isupper(c))
475 c = tolower(c);
476 *s++ = c;
477 }
478 }
479 if ((t = a) && s < e)
480 {
481 if (attributes_name)
482 *s++ = ',';
483 else
484 {
485 *s++ = 0;
486 attributes_name = s;
487 }
488 while (s < e && (c = *t++))
489 {
490 if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n)
491 {
492 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n))
493 if (!c)
494 break;
495 if (isalnum(c) && !n)
496 *s++ = '-';
497 else
498 n = 0;
499 }
500 if (c == '_' || c == '.' || c == '@')
501 break;
502 if (isupper(c))
503 c = tolower(c);
504 *s++ = c;
505 }
506 }
507 *s = 0;
508 tp = 0;
509 cp = ppa = 0;
510 al = 0;
511
512 /*
513 * language
514 */
515
516 n = strlen(s = language_name);
517 if (n == 2)
518 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++);
519 else if (n == 3)
520 {
521 for (lp = lc_languages; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++);
522 if (!lp->code)
523 {
524 c = s[2];
525 s[2] = 0;
526 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++);
527 s[2] = c;
528 if (lp->code)
529 n = 1;
530 }
531 }
532 else
533 lp = 0;
534 if (!lp || !lp->code)
535 {
536 for (lp = lc_languages; lp->code && !match(s, lp->name, 0, 0); lp++);
537 if (!lp || !lp->code)
538 {
539 if (!territory_name)
540 {
541 if (n == 2)
542 for (tp = lc_territories; tp->code && !streq(s, tp->code); tp++);
543 else
544 {
545 z = 0;
546 tpb = 0;
547 for (tp = lc_territories; tp->name; tp++)
548 if ((i = match(s, tp->name, 3, 0)) > z)
549 {
550 tpb = tp;
551 if ((z = i) == n)
552 break;
553 }
554 if (tpb)
555 tp = tpb;
556 }
557 if (tp->code)
558 lp = tp->languages[0];
559 }
560 if (!lp || !lp->code)
561 {
562 /*
563 * name not in the tables so let
564 * _ast_setlocale() and/or setlocale()
565 * handle the validity checks
566 */
567
568 s = (char*)name;
569 z = strlen(s) + 1;
570 if (!(lp = newof(0, Lc_language_t, 1, z)))
571 return 0;
572 name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1);
573 memcpy((char*)lp->code, s, z - 1);
574 tp = &lc_territories[0];
575 cp = ((Lc_language_t*)lp)->charset = &lc_charsets[0];
576 al = 0;
577 goto override;
578 }
579 }
580 }
581
582 /*
583 * territory
584 */
585
586 if (!tp || !tp->code)
587 {
588 if (!(s = territory_name))
589 {
590 n = 0;
591 primary = 0;
592 for (tp = lc_territories; tp->code; tp++)
593 if (tp->languages[0] == lp)
594 {
595 if (tp->flags & LC_primary)
596 {
597 n = 1;
598 primary = tp;
599 break;
600 }
601 n++;
602 primary = tp;
603 }
604 if (n == 1)
605 tp = primary;
606 s = (char*)lp->code;
607 }
608 if (!tp || !tp->code)
609 {
610 n = strlen(s);
611 if (n == 2)
612 {
613 for (tp = lc_territories; tp->code; tp++)
614 if (streq(s, tp->code))
615 {
616 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++);
617 if (i >= elementsof(tp->languages))
618 tp = 0;
619 break;
620 }
621 }
622 else
623 {
624 for (tp = lc_territories; tp->code; tp++)
625 if (match(s, tp->name, 3, 0))
626 {
627 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++);
628 if (i < elementsof(tp->languages))
629 break;
630 }
631 }
632 if (tp && !tp->code)
633 tp = 0;
634 }
635 }
636
637 /*
638 * attributes -- done here to catch misplaced charset references
639 */
640
641 if (s = attributes_name)
642 {
643 do
644 {
645 for (w = s; *s && *s != ','; s++);
646 c = *s;
647 *s = 0;
648 if (!(cp = lp->charset) || !match_charset(w, cp))
649 for (cp = lc_charsets; cp->code; cp++)
650 if (match_charset(w, cp))
651 {
652 ppa = cp;
653 break;
654 }
655 if (!cp->code)
656 {
657 for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++)
658 if (match(w, ap->name, 5, 0))
659 {
660 if (ai = newof(0, Lc_attribute_list_t, 1, 0))
661 {
662 ai->attribute = ap;
663 ai->next = al;
664 al = ai;
665 }
666 break;
667 }
668 if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1)))
669 {
670 ai = (Lc_attribute_list_t*)(ap + 1);
671 strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w);
672 ai->attribute = ap;
673 ai->next = al;
674 al = ai;
675 }
676 }
677 *s = c;
678 } while (*s++);
679 }
680
681 /*
682 * charset
683 */
684
685 if (s = charset_name)
686 for (cp = lc_charsets; cp->code; cp++)
687 if (match_charset(s, cp))
688 break;
689 if (!cp || !cp->code)
690 cp = ppa ? ppa : lp->charset;
691 mapped:
692 z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp));
693
694 /*
695 * add to the list of possibly active locales
696 */
697
698 override:
699 n = strlen(name) + 1;
700 if (!(lc = newof(0, Lc_t, 1, n + z)))
701 return 0;
702 strcpy((char*)(lc->name = (const char*)(lc + 1)), name);
703 strcpy((char*)(lc->code = lc->name + n), s);
704 lc->language = lp ? lp : &lc_languages[0];
705 lc->territory = tp ? tp : &lc_territories[0];
706 lc->charset = cp ? cp : &lc_charsets[0];
707 if (!strcmp(lc->charset->code, "utf8"))
708 lc->flags |= LC_utf8;
709 lc->attributes = al;
710 for (i = 0; i < elementsof(lc->info); i++)
711 lc->info[i].lc = lc;
712 #if _WINIX
713 n = SUBLANG_DEFAULT;
714 if (tp)
715 for (i = 0; i < elementsof(tp->languages); i++)
716 if (lp == tp->languages[i])
717 {
718 n = tp->indices[i];
719 break;
720 }
721 lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT);
722 #endif
723 lc->next = lcs;
724 lcs = lc;
725 return lc;
726 }
727
728 /*
729 * return an Lc_t* for each locale in the tables
730 * one Lc_t is allocated on the first call with lc==0
731 * this is freed when 0 returned
732 * the return value is not part of the lcmake() cache
733 */
734
735 typedef struct Lc_scan_s
736 {
737 Lc_t lc;
738 Lc_attribute_list_t list;
739 int territory;
740 int language;
741 int attribute;
742 char buf[256];
743 } Lc_scan_t;
744
745 Lc_t*
lcscan(Lc_t * lc)746 lcscan(Lc_t* lc)
747 {
748 register Lc_scan_t* ls;
749
750 if (!(ls = (Lc_scan_t*)lc))
751 {
752 if (!(ls = newof(0, Lc_scan_t, 1, 0)))
753 return 0;
754 ls->lc.code = ls->lc.name = ls->buf;
755 ls->territory = -1;
756 ls->language = elementsof(ls->lc.territory->languages);
757 ls->attribute = elementsof(ls->lc.language->attributes);
758 }
759 if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute]))
760 {
761 if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language]))
762 {
763 if (!lc_territories[++ls->territory].code)
764 {
765 free(ls);
766 return 0;
767 }
768 ls->lc.territory = &lc_territories[ls->territory];
769 ls->lc.language = ls->lc.territory->languages[ls->language = 0];
770 }
771 if (ls->lc.language)
772 {
773 ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &lc_charsets[0];
774 ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0];
775 }
776 else
777 {
778 ls->lc.charset = &lc_charsets[0];
779 ls->list.attribute = 0;
780 }
781 }
782 ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0;
783 #if _WINIX
784 if (!ls->lc.language || !ls->lc.language->index)
785 ls->lc.index = 0;
786 else
787 {
788 if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) &&
789 (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language])))
790 ls->lc.index = SUBLANG_DEFAULT;
791 ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT);
792 }
793 #endif
794 canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf));
795 return (Lc_t*)ls;
796 }
797