1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2008 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * locale state implementation 26 */ 27 28 #include "lclib.h" 29 #include "lclang.h" 30 31 #include <ctype.h> 32 33 static Lc_numeric_t default_numeric = { '.', -1 }; 34 35 static Lc_t default_lc = 36 { 37 "C", 38 "POSIX", 39 &lc_languages[0], 40 &lc_territories[0], 41 &lc_charsets[0], 42 0, 43 LC_default|LC_checked|LC_local, 44 0, 45 { 46 { &default_lc, 0, 0 }, 47 { &default_lc, 0, 0 }, 48 { &default_lc, 0, 0 }, 49 { &default_lc, 0, 0 }, 50 { &default_lc, 0, 0 }, 51 { &default_lc, 0, (void*)&default_numeric }, 52 { &default_lc, 0, 0 }, 53 { &default_lc, 0, 0 }, 54 { &default_lc, 0, 0 }, 55 { &default_lc, 0, 0 }, 56 { &default_lc, 0, 0 }, 57 { &default_lc, 0, 0 }, 58 { &default_lc, 0, 0 }, 59 { &default_lc, 0, 0 } 60 } 61 }; 62 63 static Lc_numeric_t debug_numeric = { ',', '.' }; 64 65 static Lc_t debug_lc = 66 { 67 "debug", 68 "debug", 69 &lc_languages[1], 70 &lc_territories[1], 71 &lc_charsets[0], 72 0, 73 LC_debug|LC_checked|LC_local, 74 0, 75 { 76 { &debug_lc, 0, 0 }, 77 { &debug_lc, 0, 0 }, 78 { &debug_lc, 0, 0 }, 79 { &debug_lc, 0, 0 }, 80 { &debug_lc, 0, 0 }, 81 { &debug_lc, 0, (void*)&debug_numeric }, 82 { &debug_lc, 0, 0 }, 83 { &debug_lc, 0, 0 }, 84 { &debug_lc, 0, 0 }, 85 { &debug_lc, 0, 0 }, 86 { &debug_lc, 0, 0 }, 87 { &debug_lc, 0, 0 }, 88 { &debug_lc, 0, 0 }, 89 { &debug_lc, 0, 0 } 90 }, 91 &default_lc 92 }; 93 94 static Lc_t* lcs = &debug_lc; 95 96 Lc_t* locales[] = 97 { 98 &default_lc, 99 &default_lc, 100 &default_lc, 101 &default_lc, 102 &default_lc, 103 &default_lc, 104 &default_lc, 105 &default_lc, 106 &default_lc, 107 &default_lc, 108 &default_lc, 109 &default_lc, 110 &default_lc, 111 &default_lc 112 }; 113 114 /* 115 * return the internal category index for category 116 */ 117 118 int 119 lcindex(int category, int min) 120 { 121 switch (category) 122 { 123 case LC_ALL: return min ? -1 : AST_LC_ALL; 124 case LC_ADDRESS: return AST_LC_ADDRESS; 125 case LC_COLLATE: return AST_LC_COLLATE; 126 case LC_CTYPE: return AST_LC_CTYPE; 127 case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION; 128 case LC_MEASUREMENT: return AST_LC_MEASUREMENT; 129 case LC_MESSAGES: return AST_LC_MESSAGES; 130 case LC_MONETARY: return AST_LC_MONETARY; 131 case LC_NAME: return AST_LC_NAME; 132 case LC_NUMERIC: return AST_LC_NUMERIC; 133 case LC_PAPER: return AST_LC_PAPER; 134 case LC_TELEPHONE: return AST_LC_TELEPHONE; 135 case LC_TIME: return AST_LC_TIME; 136 case LC_XLITERATE: return AST_LC_XLITERATE; 137 } 138 return -1; 139 } 140 141 /* 142 * return the first category table entry 143 */ 144 145 Lc_category_t* 146 lccategories(void) 147 { 148 return (Lc_category_t*)&lc_categories[0]; 149 } 150 151 /* 152 * return the current info for category 153 */ 154 155 Lc_info_t* 156 lcinfo(register int category) 157 { 158 if ((category = lcindex(category, 0)) < 0) 159 return 0; 160 return LCINFO(category); 161 } 162 163 /* 164 * return 1 if s matches the alternation pattern p 165 * if minimum!=0 then at least that many chars must match 166 * if standard!=0 and s[0] is a digit leading non-digits are ignored in p 167 */ 168 169 static int 170 match(const char* s, register const char* p, int minimum, int standard) 171 { 172 register const char* t; 173 const char* x; 174 int w; 175 int z; 176 177 z = 0; 178 do 179 { 180 t = s; 181 if (standard) 182 { 183 if (isdigit(*t)) 184 while (*p && !isdigit(*p)) 185 p++; 186 else if (isdigit(*p)) 187 while (*t && !isdigit(*t)) 188 t++; 189 } 190 if (*p) 191 { 192 w = 0; 193 x = p; 194 while (*p && *p != '|') 195 { 196 if (!*t || *t == ',') 197 break; 198 else if (*t == *p) 199 /*ok*/; 200 else if (*t == '-') 201 { 202 if (standard && isdigit(*p)) 203 { 204 t++; 205 continue; 206 } 207 while (*p && *p != '-') 208 p++; 209 if (!*p) 210 break; 211 } 212 else if (*p == '-') 213 { 214 if (standard && isdigit(*t)) 215 { 216 p++; 217 continue; 218 } 219 w = 1; 220 while (*t && *t != '-') 221 t++; 222 if (!*t) 223 break; 224 } 225 else 226 break; 227 t++; 228 p++; 229 } 230 if ((!*t || *t == ',') && (!*p || *p == '|' || w)) 231 return p - x; 232 if (minimum && z < (p - x) && (p - x) >= minimum) 233 z = p - x; 234 } 235 while (*p && *p != '|') 236 p++; 237 } while (*p++); 238 return z; 239 } 240 241 /* 242 * return 1 if s matches the charset names in cp 243 */ 244 245 static int 246 match_charset(register const char* s, register const Lc_charset_t* cp) 247 { 248 return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1); 249 } 250 251 /* 252 * low level for lccanon 253 */ 254 255 static size_t 256 canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz) 257 { 258 register int c; 259 register int u; 260 register char* s; 261 register char* e; 262 register const char* t; 263 264 if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose))) 265 flags |= LC_abbreviated; 266 s = buf; 267 e = &buf[siz - 3]; 268 if (lp) 269 { 270 if (lp->flags & (LC_debug|LC_default)) 271 { 272 for (t = lp->code; s < e && (*s = *t++); s++); 273 *s++ = 0; 274 return s - buf; 275 } 276 if (flags & LC_verbose) 277 { 278 u = 1; 279 t = lp->name; 280 while (s < e && (c = *t++)) 281 { 282 if (u) 283 { 284 u = 0; 285 c = toupper(c); 286 } 287 else if (!isalnum(c)) 288 u = 1; 289 *s++ = c; 290 } 291 } 292 else 293 for (t = lp->code; s < e && (*s = *t++); s++); 294 } 295 if (s < e) 296 { 297 if (tp && tp != &lc_territories[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code))) 298 { 299 if (lp) 300 *s++ = '_'; 301 if (flags & LC_verbose) 302 { 303 u = 1; 304 t = tp->name; 305 while (s < e && (c = *t++) && c != '|') 306 { 307 if (u) 308 { 309 u = 0; 310 c = toupper(c); 311 } 312 else if (!isalnum(c)) 313 u = 1; 314 *s++ = c; 315 } 316 } 317 else 318 for (t = tp->code; s < e && (*s = toupper(*t++)); s++); 319 } 320 if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e) 321 { 322 *s++ = '.'; 323 for (t = cp->code; s < e && (c = *t++); s++) 324 { 325 if (islower(c)) 326 c = toupper(c); 327 *s = c; 328 } 329 } 330 for (c = '@'; ap && s < e; ap = ap->next) 331 if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default)) 332 { 333 *s++ = c; 334 c = ','; 335 for (t = ap->attribute->name; s < e && (*s = *t++); s++); 336 } 337 } 338 *s++ = 0; 339 return s - buf; 340 } 341 342 /* 343 * generate a canonical locale name in buf 344 */ 345 346 size_t 347 lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz) 348 { 349 if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default)))) 350 { 351 #if _WINIX 352 char lang[64]; 353 char code[64]; 354 char ctry[64]; 355 356 if (lc->index && 357 GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) && 358 GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry))) 359 { 360 if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code))) 361 code[0] = 0; 362 if (!lc->charset || !lc->charset->ms) 363 return sfsprintf(buf, siz, "%s_%s", lang, ctry); 364 else if (streq(lc->charset->ms, code)) 365 return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code); 366 else 367 return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms); 368 } 369 #endif 370 buf[0] = '-'; 371 buf[1] = 0; 372 return 0; 373 } 374 return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz); 375 } 376 377 /* 378 * make an Lc_t from a locale name 379 */ 380 381 Lc_t* 382 lcmake(const char* name) 383 { 384 register int c; 385 register char* s; 386 register char* e; 387 register const char* t; 388 const char* a; 389 char* w; 390 char* language_name; 391 char* territory_name; 392 char* charset_name; 393 char* attributes_name; 394 Lc_t* lc; 395 const Lc_map_t* mp; 396 const Lc_language_t* lp; 397 const Lc_territory_t* tp; 398 const Lc_territory_t* tpb; 399 const Lc_territory_t* primary; 400 const Lc_charset_t* cp; 401 const Lc_charset_t* ppa; 402 const Lc_attribute_t* ap; 403 Lc_attribute_list_t* ai; 404 Lc_attribute_list_t* al; 405 int i; 406 int n; 407 int z; 408 char buf[PATH_MAX / 2]; 409 char tmp[PATH_MAX / 2]; 410 411 if (!(t = name) || !*t) 412 return &default_lc; 413 for (lc = lcs; lc; lc = lc->next) 414 if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name)) 415 return lc; 416 for (mp = lc_maps; mp->code; mp++) 417 if (streq(t, mp->code)) 418 { 419 lp = mp->language; 420 tp = mp->territory; 421 cp = mp->charset; 422 if (!mp->attribute) 423 al = 0; 424 else if (al = newof(0, Lc_attribute_list_t, 1, 0)) 425 al->attribute = mp->attribute; 426 goto mapped; 427 } 428 language_name = buf; 429 territory_name = charset_name = attributes_name = 0; 430 s = buf; 431 e = &buf[sizeof(buf)-2]; 432 a = 0; 433 n = 0; 434 while (s < e && (c = *t++)) 435 { 436 if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n) 437 { 438 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) 439 if (!c) 440 break; 441 if (isalnum(c) && !n) 442 *s++ = '-'; 443 else 444 { 445 n = 0; 446 if (!a) 447 { 448 a = t - 1; 449 while (c && c != '_' && c != '.' && c != '@') 450 c = *t++; 451 if (!c) 452 break; 453 } 454 } 455 } 456 if (c == '_' && !territory_name) 457 { 458 *s++ = 0; 459 territory_name = s; 460 } 461 else if (c == '.' && !charset_name) 462 { 463 *s++ = 0; 464 charset_name = s; 465 } 466 else if (c == '@' && !attributes_name) 467 { 468 *s++ = 0; 469 attributes_name = s; 470 } 471 else 472 { 473 if (isupper(c)) 474 c = tolower(c); 475 *s++ = c; 476 } 477 } 478 if ((t = a) && s < e) 479 { 480 if (attributes_name) 481 *s++ = ','; 482 else 483 { 484 *s++ = 0; 485 attributes_name = s; 486 } 487 while (s < e && (c = *t++)) 488 { 489 if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n) 490 { 491 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) 492 if (!c) 493 break; 494 if (isalnum(c) && !n) 495 *s++ = '-'; 496 else 497 n = 0; 498 } 499 if (c == '_' || c == '.' || c == '@') 500 break; 501 if (isupper(c)) 502 c = tolower(c); 503 *s++ = c; 504 } 505 } 506 *s = 0; 507 tp = 0; 508 cp = ppa = 0; 509 al = 0; 510 511 /* 512 * language 513 */ 514 515 n = strlen(s = language_name); 516 if (n == 2) 517 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++); 518 else if (n == 3) 519 { 520 for (lp = lc_languages; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++); 521 if (!lp->code) 522 { 523 c = s[2]; 524 s[2] = 0; 525 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++); 526 s[2] = c; 527 if (lp->code) 528 n = 1; 529 } 530 } 531 else 532 lp = 0; 533 if (!lp || !lp->code) 534 { 535 for (lp = lc_languages; lp->code && !match(s, lp->name, 0, 0); lp++); 536 if (!lp || !lp->code) 537 { 538 if (!territory_name) 539 { 540 if (n == 2) 541 for (tp = lc_territories; tp->code && !streq(s, tp->code); tp++); 542 else 543 { 544 z = 0; 545 tpb = 0; 546 for (tp = lc_territories; tp->name; tp++) 547 if ((i = match(s, tp->name, 3, 0)) > z) 548 { 549 tpb = tp; 550 if ((z = i) == n) 551 break; 552 } 553 if (tpb) 554 tp = tpb; 555 } 556 if (tp->code) 557 lp = tp->languages[0]; 558 } 559 if (!lp || !lp->code) 560 { 561 /* 562 * name not in the tables so let 563 * _ast_setlocale() and/or setlocale() 564 * handle the validity checks 565 */ 566 567 s = (char*)name; 568 z = strlen(s) + 1; 569 if (!(lp = newof(0, Lc_language_t, 1, z))) 570 return 0; 571 name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1); 572 memcpy((char*)lp->code, s, z - 1); 573 tp = &lc_territories[0]; 574 cp = ((Lc_language_t*)lp)->charset = &lc_charsets[0]; 575 al = 0; 576 goto override; 577 } 578 } 579 } 580 581 /* 582 * territory 583 */ 584 585 if (!tp || !tp->code) 586 { 587 if (!(s = territory_name)) 588 { 589 n = 0; 590 primary = 0; 591 for (tp = lc_territories; tp->code; tp++) 592 if (tp->languages[0] == lp) 593 { 594 if (tp->flags & LC_primary) 595 { 596 n = 1; 597 primary = tp; 598 break; 599 } 600 n++; 601 primary = tp; 602 } 603 if (n == 1) 604 tp = primary; 605 s = (char*)lp->code; 606 } 607 if (!tp || !tp->code) 608 { 609 n = strlen(s); 610 if (n == 2) 611 { 612 for (tp = lc_territories; tp->code; tp++) 613 if (streq(s, tp->code)) 614 { 615 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); 616 if (i >= elementsof(tp->languages)) 617 tp = 0; 618 break; 619 } 620 } 621 else 622 { 623 for (tp = lc_territories; tp->code; tp++) 624 if (match(s, tp->name, 3, 0)) 625 { 626 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); 627 if (i < elementsof(tp->languages)) 628 break; 629 } 630 } 631 if (tp && !tp->code) 632 tp = 0; 633 } 634 } 635 636 /* 637 * attributes -- done here to catch misplaced charset references 638 */ 639 640 if (s = attributes_name) 641 { 642 do 643 { 644 for (w = s; *s && *s != ','; s++); 645 c = *s; 646 *s = 0; 647 if (!(cp = lp->charset) || !match_charset(w, cp)) 648 for (cp = lc_charsets; cp->code; cp++) 649 if (match_charset(w, cp)) 650 { 651 ppa = cp; 652 break; 653 } 654 if (!cp->code) 655 { 656 for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++) 657 if (match(w, ap->name, 5, 0)) 658 { 659 if (ai = newof(0, Lc_attribute_list_t, 1, 0)) 660 { 661 ai->attribute = ap; 662 ai->next = al; 663 al = ai; 664 } 665 break; 666 } 667 if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1))) 668 { 669 ai = (Lc_attribute_list_t*)(ap + 1); 670 strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w); 671 ai->attribute = ap; 672 ai->next = al; 673 al = ai; 674 } 675 } 676 *s = c; 677 } while (*s++); 678 } 679 680 /* 681 * charset 682 */ 683 684 if (s = charset_name) 685 for (cp = lc_charsets; cp->code; cp++) 686 if (match_charset(s, cp)) 687 break; 688 if (!cp || !cp->code) 689 cp = ppa ? ppa : lp->charset; 690 mapped: 691 z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp)); 692 693 /* 694 * add to the list of possibly active locales 695 */ 696 697 override: 698 n = strlen(name) + 1; 699 if (!(lc = newof(0, Lc_t, 1, n + z))) 700 return 0; 701 strcpy((char*)(lc->name = (const char*)(lc + 1)), name); 702 strcpy((char*)(lc->code = lc->name + n), s); 703 lc->language = lp ? lp : &lc_languages[0]; 704 lc->territory = tp ? tp : &lc_territories[0]; 705 lc->charset = cp ? cp : &lc_charsets[0]; 706 lc->attributes = al; 707 for (i = 0; i < elementsof(lc->info); i++) 708 lc->info[i].lc = lc; 709 #if _WINIX 710 n = SUBLANG_DEFAULT; 711 if (tp) 712 for (i = 0; i < elementsof(tp->languages); i++) 713 if (lp == tp->languages[i]) 714 { 715 n = tp->indices[i]; 716 break; 717 } 718 lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT); 719 #endif 720 lc->next = lcs; 721 lcs = lc; 722 return lc; 723 } 724 725 /* 726 * return an Lc_t* for each locale in the tables 727 * one Lc_t is allocated on the first call with lc==0 728 * this is freed when 0 returned 729 * the return value is not part of the lcmake() cache 730 */ 731 732 typedef struct Lc_scan_s 733 { 734 Lc_t lc; 735 Lc_attribute_list_t list; 736 int territory; 737 int language; 738 int attribute; 739 char buf[256]; 740 } Lc_scan_t; 741 742 Lc_t* 743 lcscan(Lc_t* lc) 744 { 745 register Lc_scan_t* ls; 746 747 if (!(ls = (Lc_scan_t*)lc)) 748 { 749 if (!(ls = newof(0, Lc_scan_t, 1, 0))) 750 return 0; 751 ls->lc.code = ls->lc.name = ls->buf; 752 ls->territory = -1; 753 ls->language = elementsof(ls->lc.territory->languages); 754 ls->attribute = elementsof(ls->lc.language->attributes); 755 } 756 if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute])) 757 { 758 if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language])) 759 { 760 if (!lc_territories[++ls->territory].code) 761 { 762 free(ls); 763 return 0; 764 } 765 ls->lc.territory = &lc_territories[ls->territory]; 766 ls->lc.language = ls->lc.territory->languages[ls->language = 0]; 767 } 768 if (ls->lc.language) 769 { 770 ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &lc_charsets[0]; 771 ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0]; 772 } 773 else 774 { 775 ls->lc.charset = &lc_charsets[0]; 776 ls->list.attribute = 0; 777 } 778 } 779 ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0; 780 #if _WINIX 781 if (!ls->lc.language || !ls->lc.language->index) 782 ls->lc.index = 0; 783 else 784 { 785 if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) && 786 (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language]))) 787 ls->lc.index = SUBLANG_DEFAULT; 788 ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT); 789 } 790 #endif 791 canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf)); 792 return (Lc_t*)ls; 793 } 794