1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2010 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * locale state implementation 26 */ 27 28 #include "lclib.h" 29 #include "lclang.h" 30 31 #include <ctype.h> 32 33 static Lc_numeric_t default_numeric = { '.', -1 }; 34 35 static Lc_t default_lc = 36 { 37 "C", 38 "POSIX", 39 &lc_languages[0], 40 &lc_territories[0], 41 &lc_charsets[0], 42 0, 43 LC_default|LC_checked|LC_local, 44 0, 45 { 46 { &default_lc, 0, 0 }, 47 { &default_lc, 0, 0 }, 48 { &default_lc, 0, 0 }, 49 { &default_lc, 0, 0 }, 50 { &default_lc, 0, 0 }, 51 { &default_lc, 0, (void*)&default_numeric }, 52 { &default_lc, 0, 0 }, 53 { &default_lc, 0, 0 }, 54 { &default_lc, 0, 0 }, 55 { &default_lc, 0, 0 }, 56 { &default_lc, 0, 0 }, 57 { &default_lc, 0, 0 }, 58 { &default_lc, 0, 0 }, 59 { &default_lc, 0, 0 } 60 } 61 }; 62 63 static Lc_numeric_t debug_numeric = { ',', '.' }; 64 65 static Lc_t debug_lc = 66 { 67 "debug", 68 "debug", 69 &lc_languages[1], 70 &lc_territories[1], 71 &lc_charsets[0], 72 0, 73 LC_debug|LC_checked|LC_local, 74 0, 75 { 76 { &debug_lc, 0, 0 }, 77 { &debug_lc, 0, 0 }, 78 { &debug_lc, 0, 0 }, 79 { &debug_lc, 0, 0 }, 80 { &debug_lc, 0, 0 }, 81 { &debug_lc, 0, (void*)&debug_numeric }, 82 { &debug_lc, 0, 0 }, 83 { &debug_lc, 0, 0 }, 84 { &debug_lc, 0, 0 }, 85 { &debug_lc, 0, 0 }, 86 { &debug_lc, 0, 0 }, 87 { &debug_lc, 0, 0 }, 88 { &debug_lc, 0, 0 }, 89 { &debug_lc, 0, 0 } 90 }, 91 &default_lc 92 }; 93 94 static Lc_t* lcs = &debug_lc; 95 96 Lc_t* locales[] = 97 { 98 &default_lc, 99 &default_lc, 100 &default_lc, 101 &default_lc, 102 &default_lc, 103 &default_lc, 104 &default_lc, 105 &default_lc, 106 &default_lc, 107 &default_lc, 108 &default_lc, 109 &default_lc, 110 &default_lc, 111 &default_lc 112 }; 113 114 /* 115 * return the internal category index for category 116 */ 117 118 int 119 lcindex(int category, int min) 120 { 121 switch (category) 122 { 123 case LC_ALL: return min ? -1 : AST_LC_ALL; 124 case LC_ADDRESS: return AST_LC_ADDRESS; 125 case LC_COLLATE: return AST_LC_COLLATE; 126 case LC_CTYPE: return AST_LC_CTYPE; 127 case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION; 128 case LC_LANG: return AST_LC_LANG; 129 case LC_MEASUREMENT: return AST_LC_MEASUREMENT; 130 case LC_MESSAGES: return AST_LC_MESSAGES; 131 case LC_MONETARY: return AST_LC_MONETARY; 132 case LC_NAME: return AST_LC_NAME; 133 case LC_NUMERIC: return AST_LC_NUMERIC; 134 case LC_PAPER: return AST_LC_PAPER; 135 case LC_TELEPHONE: return AST_LC_TELEPHONE; 136 case LC_TIME: return AST_LC_TIME; 137 case LC_XLITERATE: return AST_LC_XLITERATE; 138 } 139 return -1; 140 } 141 142 /* 143 * return the first category table entry 144 */ 145 146 Lc_category_t* 147 lccategories(void) 148 { 149 return (Lc_category_t*)&lc_categories[0]; 150 } 151 152 /* 153 * return the current info for category 154 */ 155 156 Lc_info_t* 157 lcinfo(register int category) 158 { 159 if ((category = lcindex(category, 0)) < 0) 160 return 0; 161 return LCINFO(category); 162 } 163 164 /* 165 * return 1 if s matches the alternation pattern p 166 * if minimum!=0 then at least that many chars must match 167 * if standard!=0 and s[0] is a digit leading non-digits are ignored in p 168 */ 169 170 static int 171 match(const char* s, register const char* p, int minimum, int standard) 172 { 173 register const char* t; 174 const char* x; 175 int w; 176 int z; 177 178 z = 0; 179 do 180 { 181 t = s; 182 if (standard) 183 { 184 if (isdigit(*t)) 185 while (*p && !isdigit(*p)) 186 p++; 187 else if (isdigit(*p)) 188 while (*t && !isdigit(*t)) 189 t++; 190 } 191 if (*p) 192 { 193 w = 0; 194 x = p; 195 while (*p && *p != '|') 196 { 197 if (!*t || *t == ',') 198 break; 199 else if (*t == *p) 200 /*ok*/; 201 else if (*t == '-') 202 { 203 if (standard && isdigit(*p)) 204 { 205 t++; 206 continue; 207 } 208 while (*p && *p != '-') 209 p++; 210 if (!*p) 211 break; 212 } 213 else if (*p == '-') 214 { 215 if (standard && isdigit(*t)) 216 { 217 p++; 218 continue; 219 } 220 w = 1; 221 while (*t && *t != '-') 222 t++; 223 if (!*t) 224 break; 225 } 226 else 227 break; 228 t++; 229 p++; 230 } 231 if ((!*t || *t == ',') && (!*p || *p == '|' || w)) 232 return p - x; 233 if (minimum && z < (p - x) && (p - x) >= minimum) 234 z = p - x; 235 } 236 while (*p && *p != '|') 237 p++; 238 } while (*p++); 239 return z; 240 } 241 242 /* 243 * return 1 if s matches the charset names in cp 244 */ 245 246 static int 247 match_charset(register const char* s, register const Lc_charset_t* cp) 248 { 249 return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1); 250 } 251 252 /* 253 * low level for lccanon 254 */ 255 256 static size_t 257 canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz) 258 { 259 register int c; 260 register int u; 261 register char* s; 262 register char* e; 263 register const char* t; 264 265 if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose))) 266 flags |= LC_abbreviated; 267 s = buf; 268 e = &buf[siz - 3]; 269 if (lp) 270 { 271 if (lp->flags & (LC_debug|LC_default)) 272 { 273 for (t = lp->code; s < e && (*s = *t++); s++); 274 *s++ = 0; 275 return s - buf; 276 } 277 if (flags & LC_verbose) 278 { 279 u = 1; 280 t = lp->name; 281 while (s < e && (c = *t++)) 282 { 283 if (u) 284 { 285 u = 0; 286 c = toupper(c); 287 } 288 else if (!isalnum(c)) 289 u = 1; 290 *s++ = c; 291 } 292 } 293 else 294 for (t = lp->code; s < e && (*s = *t++); s++); 295 } 296 if (s < e) 297 { 298 if (tp && tp != &lc_territories[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code))) 299 { 300 if (lp) 301 *s++ = '_'; 302 if (flags & LC_verbose) 303 { 304 u = 1; 305 t = tp->name; 306 while (s < e && (c = *t++) && c != '|') 307 { 308 if (u) 309 { 310 u = 0; 311 c = toupper(c); 312 } 313 else if (!isalnum(c)) 314 u = 1; 315 *s++ = c; 316 } 317 } 318 else 319 for (t = tp->code; s < e && (*s = toupper(*t++)); s++); 320 } 321 if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e) 322 { 323 *s++ = '.'; 324 for (t = cp->code; s < e && (c = *t++); s++) 325 { 326 if (islower(c)) 327 c = toupper(c); 328 *s = c; 329 } 330 } 331 for (c = '@'; ap && s < e; ap = ap->next) 332 if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default)) 333 { 334 *s++ = c; 335 c = ','; 336 for (t = ap->attribute->name; s < e && (*s = *t++); s++); 337 } 338 } 339 *s++ = 0; 340 return s - buf; 341 } 342 343 /* 344 * generate a canonical locale name in buf 345 */ 346 347 size_t 348 lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz) 349 { 350 if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default)))) 351 { 352 #if _WINIX 353 char lang[64]; 354 char code[64]; 355 char ctry[64]; 356 357 if (lc->index && 358 GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) && 359 GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry))) 360 { 361 if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code))) 362 code[0] = 0; 363 if (!lc->charset || !lc->charset->ms) 364 return sfsprintf(buf, siz, "%s_%s", lang, ctry); 365 else if (streq(lc->charset->ms, code)) 366 return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code); 367 else 368 return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms); 369 } 370 #endif 371 buf[0] = '-'; 372 buf[1] = 0; 373 return 0; 374 } 375 return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz); 376 } 377 378 /* 379 * make an Lc_t from a locale name 380 */ 381 382 Lc_t* 383 lcmake(const char* name) 384 { 385 register int c; 386 register char* s; 387 register char* e; 388 register const char* t; 389 const char* a; 390 char* w; 391 char* language_name; 392 char* territory_name; 393 char* charset_name; 394 char* attributes_name; 395 Lc_t* lc; 396 const Lc_map_t* mp; 397 const Lc_language_t* lp; 398 const Lc_territory_t* tp; 399 const Lc_territory_t* tpb; 400 const Lc_territory_t* primary; 401 const Lc_charset_t* cp; 402 const Lc_charset_t* ppa; 403 const Lc_attribute_t* ap; 404 Lc_attribute_list_t* ai; 405 Lc_attribute_list_t* al; 406 int i; 407 int n; 408 int z; 409 char buf[PATH_MAX / 2]; 410 char tmp[PATH_MAX / 2]; 411 412 if (!(t = name) || !*t) 413 return &default_lc; 414 for (lc = lcs; lc; lc = lc->next) 415 if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name)) 416 return lc; 417 for (mp = lc_maps; mp->code; mp++) 418 if (streq(t, mp->code)) 419 { 420 lp = mp->language; 421 tp = mp->territory; 422 cp = mp->charset; 423 if (!mp->attribute) 424 al = 0; 425 else if (al = newof(0, Lc_attribute_list_t, 1, 0)) 426 al->attribute = mp->attribute; 427 goto mapped; 428 } 429 language_name = buf; 430 territory_name = charset_name = attributes_name = 0; 431 s = buf; 432 e = &buf[sizeof(buf)-2]; 433 a = 0; 434 n = 0; 435 while (s < e && (c = *t++)) 436 { 437 if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n) 438 { 439 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) 440 if (!c) 441 break; 442 if (isalnum(c) && !n) 443 *s++ = '-'; 444 else 445 { 446 n = 0; 447 if (!a) 448 { 449 a = t - 1; 450 while (c && c != '_' && c != '.' && c != '@') 451 c = *t++; 452 if (!c) 453 break; 454 } 455 } 456 } 457 if (c == '_' && !territory_name) 458 { 459 *s++ = 0; 460 territory_name = s; 461 } 462 else if (c == '.' && !charset_name) 463 { 464 *s++ = 0; 465 charset_name = s; 466 } 467 else if (c == '@' && !attributes_name) 468 { 469 *s++ = 0; 470 attributes_name = s; 471 } 472 else 473 { 474 if (isupper(c)) 475 c = tolower(c); 476 *s++ = c; 477 } 478 } 479 if ((t = a) && s < e) 480 { 481 if (attributes_name) 482 *s++ = ','; 483 else 484 { 485 *s++ = 0; 486 attributes_name = s; 487 } 488 while (s < e && (c = *t++)) 489 { 490 if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n) 491 { 492 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) 493 if (!c) 494 break; 495 if (isalnum(c) && !n) 496 *s++ = '-'; 497 else 498 n = 0; 499 } 500 if (c == '_' || c == '.' || c == '@') 501 break; 502 if (isupper(c)) 503 c = tolower(c); 504 *s++ = c; 505 } 506 } 507 *s = 0; 508 tp = 0; 509 cp = ppa = 0; 510 al = 0; 511 512 /* 513 * language 514 */ 515 516 n = strlen(s = language_name); 517 if (n == 2) 518 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++); 519 else if (n == 3) 520 { 521 for (lp = lc_languages; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++); 522 if (!lp->code) 523 { 524 c = s[2]; 525 s[2] = 0; 526 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++); 527 s[2] = c; 528 if (lp->code) 529 n = 1; 530 } 531 } 532 else 533 lp = 0; 534 if (!lp || !lp->code) 535 { 536 for (lp = lc_languages; lp->code && !match(s, lp->name, 0, 0); lp++); 537 if (!lp || !lp->code) 538 { 539 if (!territory_name) 540 { 541 if (n == 2) 542 for (tp = lc_territories; tp->code && !streq(s, tp->code); tp++); 543 else 544 { 545 z = 0; 546 tpb = 0; 547 for (tp = lc_territories; tp->name; tp++) 548 if ((i = match(s, tp->name, 3, 0)) > z) 549 { 550 tpb = tp; 551 if ((z = i) == n) 552 break; 553 } 554 if (tpb) 555 tp = tpb; 556 } 557 if (tp->code) 558 lp = tp->languages[0]; 559 } 560 if (!lp || !lp->code) 561 { 562 /* 563 * name not in the tables so let 564 * _ast_setlocale() and/or setlocale() 565 * handle the validity checks 566 */ 567 568 s = (char*)name; 569 z = strlen(s) + 1; 570 if (!(lp = newof(0, Lc_language_t, 1, z))) 571 return 0; 572 name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1); 573 memcpy((char*)lp->code, s, z - 1); 574 tp = &lc_territories[0]; 575 cp = ((Lc_language_t*)lp)->charset = &lc_charsets[0]; 576 al = 0; 577 goto override; 578 } 579 } 580 } 581 582 /* 583 * territory 584 */ 585 586 if (!tp || !tp->code) 587 { 588 if (!(s = territory_name)) 589 { 590 n = 0; 591 primary = 0; 592 for (tp = lc_territories; tp->code; tp++) 593 if (tp->languages[0] == lp) 594 { 595 if (tp->flags & LC_primary) 596 { 597 n = 1; 598 primary = tp; 599 break; 600 } 601 n++; 602 primary = tp; 603 } 604 if (n == 1) 605 tp = primary; 606 s = (char*)lp->code; 607 } 608 if (!tp || !tp->code) 609 { 610 n = strlen(s); 611 if (n == 2) 612 { 613 for (tp = lc_territories; tp->code; tp++) 614 if (streq(s, tp->code)) 615 { 616 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); 617 if (i >= elementsof(tp->languages)) 618 tp = 0; 619 break; 620 } 621 } 622 else 623 { 624 for (tp = lc_territories; tp->code; tp++) 625 if (match(s, tp->name, 3, 0)) 626 { 627 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); 628 if (i < elementsof(tp->languages)) 629 break; 630 } 631 } 632 if (tp && !tp->code) 633 tp = 0; 634 } 635 } 636 637 /* 638 * attributes -- done here to catch misplaced charset references 639 */ 640 641 if (s = attributes_name) 642 { 643 do 644 { 645 for (w = s; *s && *s != ','; s++); 646 c = *s; 647 *s = 0; 648 if (!(cp = lp->charset) || !match_charset(w, cp)) 649 for (cp = lc_charsets; cp->code; cp++) 650 if (match_charset(w, cp)) 651 { 652 ppa = cp; 653 break; 654 } 655 if (!cp->code) 656 { 657 for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++) 658 if (match(w, ap->name, 5, 0)) 659 { 660 if (ai = newof(0, Lc_attribute_list_t, 1, 0)) 661 { 662 ai->attribute = ap; 663 ai->next = al; 664 al = ai; 665 } 666 break; 667 } 668 if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1))) 669 { 670 ai = (Lc_attribute_list_t*)(ap + 1); 671 strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w); 672 ai->attribute = ap; 673 ai->next = al; 674 al = ai; 675 } 676 } 677 *s = c; 678 } while (*s++); 679 } 680 681 /* 682 * charset 683 */ 684 685 if (s = charset_name) 686 for (cp = lc_charsets; cp->code; cp++) 687 if (match_charset(s, cp)) 688 break; 689 if (!cp || !cp->code) 690 cp = ppa ? ppa : lp->charset; 691 mapped: 692 z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp)); 693 694 /* 695 * add to the list of possibly active locales 696 */ 697 698 override: 699 n = strlen(name) + 1; 700 if (!(lc = newof(0, Lc_t, 1, n + z))) 701 return 0; 702 strcpy((char*)(lc->name = (const char*)(lc + 1)), name); 703 strcpy((char*)(lc->code = lc->name + n), s); 704 lc->language = lp ? lp : &lc_languages[0]; 705 lc->territory = tp ? tp : &lc_territories[0]; 706 lc->charset = cp ? cp : &lc_charsets[0]; 707 if (!strcmp(lc->charset->code, "utf8")) 708 lc->flags |= LC_utf8; 709 lc->attributes = al; 710 for (i = 0; i < elementsof(lc->info); i++) 711 lc->info[i].lc = lc; 712 #if _WINIX 713 n = SUBLANG_DEFAULT; 714 if (tp) 715 for (i = 0; i < elementsof(tp->languages); i++) 716 if (lp == tp->languages[i]) 717 { 718 n = tp->indices[i]; 719 break; 720 } 721 lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT); 722 #endif 723 lc->next = lcs; 724 lcs = lc; 725 return lc; 726 } 727 728 /* 729 * return an Lc_t* for each locale in the tables 730 * one Lc_t is allocated on the first call with lc==0 731 * this is freed when 0 returned 732 * the return value is not part of the lcmake() cache 733 */ 734 735 typedef struct Lc_scan_s 736 { 737 Lc_t lc; 738 Lc_attribute_list_t list; 739 int territory; 740 int language; 741 int attribute; 742 char buf[256]; 743 } Lc_scan_t; 744 745 Lc_t* 746 lcscan(Lc_t* lc) 747 { 748 register Lc_scan_t* ls; 749 750 if (!(ls = (Lc_scan_t*)lc)) 751 { 752 if (!(ls = newof(0, Lc_scan_t, 1, 0))) 753 return 0; 754 ls->lc.code = ls->lc.name = ls->buf; 755 ls->territory = -1; 756 ls->language = elementsof(ls->lc.territory->languages); 757 ls->attribute = elementsof(ls->lc.language->attributes); 758 } 759 if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute])) 760 { 761 if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language])) 762 { 763 if (!lc_territories[++ls->territory].code) 764 { 765 free(ls); 766 return 0; 767 } 768 ls->lc.territory = &lc_territories[ls->territory]; 769 ls->lc.language = ls->lc.territory->languages[ls->language = 0]; 770 } 771 if (ls->lc.language) 772 { 773 ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &lc_charsets[0]; 774 ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0]; 775 } 776 else 777 { 778 ls->lc.charset = &lc_charsets[0]; 779 ls->list.attribute = 0; 780 } 781 } 782 ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0; 783 #if _WINIX 784 if (!ls->lc.language || !ls->lc.language->index) 785 ls->lc.index = 0; 786 else 787 { 788 if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) && 789 (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language]))) 790 ls->lc.index = SUBLANG_DEFAULT; 791 ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT); 792 } 793 #endif 794 canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf)); 795 return (Lc_t*)ls; 796 } 797