/*********************************************************************** * * * This software is part of the ast package * * Copyright (c) 1985-2007 AT&T Knowledge Ventures * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Knowledge Ventures * * * * A copy of the License is available at * * http://www.opensource.org/licenses/cpl1.0.txt * * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * * * * Information and Software Systems Research * * AT&T Research * * Florham Park NJ * * * * Glenn Fowler * * David Korn * * Phong Vo * * * ***********************************************************************/ #pragma prototyped /* * locale state implementation */ #include "lclib.h" #include #if _WINIX #include #define LANG_CHINESE_SIMPLIFIED LANG_CHINESE #define LANG_CHINESE_TRADITIONAL LANG_CHINESE #define LANG_NORWEGIAN_BOKMAL LANG_NORWEGIAN #define LANG_NORWEGIAN_NYNORSK LANG_NORWEGIAN #define LANG_SERBO_CROATIAN LANG_CROATIAN #define CTRY_CZECH_REPUBLIC CTRY_CZECH #define SUBLANG_CHINESE_SIMPLIFIED_CHINA SUBLANG_CHINESE_SIMPLIFIED #define SUBLANG_CHINESE_SIMPLIFIED_HONG_KONG SUBLANG_CHINESE_HONGKONG #define SUBLANG_CHINESE_SIMPLIFIED_SINGAPORE SUBLANG_CHINESE_SINGAPORE #define SUBLANG_CHINESE_TRADITIONAL_TAIWAN SUBLANG_CHINESE_TRADITIONAL #define SUBLANG_DUTCH_NETHERLANDS_ANTILLES SUBLANG_DUTCH #define SUBLANG_DUTCH_BELGIUM SUBLANG_DUTCH_BELGIAN #define SUBLANG_ENGLISH_AUSTRALIA SUBLANG_ENGLISH_AUS #define SUBLANG_ENGLISH_CANADA SUBLANG_ENGLISH_CAN #define SUBLANG_ENGLISH_IRELAND SUBLANG_ENGLISH_EIRE #define SUBLANG_ENGLISH_NEW_ZEALAND SUBLANG_ENGLISH_NZ #define SUBLANG_ENGLISH_TRINIDAD_TOBAGO SUBLANG_ENGLISH_CARIBBEAN #define SUBLANG_ENGLISH_UNITED_KINGDOM SUBLANG_ENGLISH_UK #define SUBLANG_ENGLISH_UNITED_STATES SUBLANG_ENGLISH_US #define SUBLANG_FRENCH_BELGIUM SUBLANG_FRENCH_BELGIAN #define SUBLANG_FRENCH_CANADA SUBLANG_FRENCH_CANADIAN #define SUBLANG_FRENCH_SWITZERLAND SUBLANG_FRENCH_SWISS #define SUBLANG_GERMAN_AUSTRIA SUBLANG_GERMAN_AUSTRIAN #define SUBLANG_GERMAN_SWITZERLAND SUBLANG_GERMAN_SWISS #define SUBLANG_ITALIAN_SWITZERLAND SUBLANG_ITALIAN_SWISS #define SUBLANG_NORWEGIAN_BOKMAL_NORWAY SUBLANG_NORWEGIAN_BOKMAL #define SUBLANG_NORWEGIAN_NORWAY SUBLANG_NORWEGIAN_BOKMAL #define SUBLANG_NORWEGIAN_NYNORSK_NORWAY SUBLANG_NORWEGIAN_NYNORSK #define SUBLANG_PORTUGUESE_BRAZIL SUBLANG_PORTUGUESE_BRAZILIAN #endif #include "lctab.h" static Lc_numeric_t default_numeric = { '.', -1 }; static Lc_t default_lc = { "C", "POSIX", &language[0], &territory[0], &charset[0], 0, LC_default|LC_checked|LC_local, 0, { { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, (void*)&default_numeric }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 }, { &default_lc, 0, 0 } } }; static Lc_numeric_t debug_numeric = { ',', '.' }; static Lc_t debug_lc = { "debug", "debug", &language[1], &territory[1], &charset[0], 0, LC_debug|LC_checked|LC_local, 0, { { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, (void*)&debug_numeric }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 }, { &debug_lc, 0, 0 } }, &default_lc }; static Lc_t* lcs = &debug_lc; Lc_t* locales[] = { &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc, &default_lc }; /* * return the internal category index for category */ int lcindex(int category, int min) { switch (category) { case LC_ALL: return min ? -1 : AST_LC_ALL; case LC_ADDRESS: return AST_LC_ADDRESS; case LC_COLLATE: return AST_LC_COLLATE; case LC_CTYPE: return AST_LC_CTYPE; case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION; case LC_MEASUREMENT: return AST_LC_MEASUREMENT; case LC_MESSAGES: return AST_LC_MESSAGES; case LC_MONETARY: return AST_LC_MONETARY; case LC_NAME: return AST_LC_NAME; case LC_NUMERIC: return AST_LC_NUMERIC; case LC_PAPER: return AST_LC_PAPER; case LC_TELEPHONE: return AST_LC_TELEPHONE; case LC_TIME: return AST_LC_TIME; case LC_XLITERATE: return AST_LC_XLITERATE; } return -1; } /* * return the first category table entry */ Lc_category_t* lccategories(void) { return &categories[0]; } /* * return the current info for category */ Lc_info_t* lcinfo(register int category) { if ((category = lcindex(category, 0)) < 0) return 0; return LCINFO(category); } /* * return 1 if s matches the alternation pattern p * if minimum!=0 then at least that many chars must match * if standard!=0 and s[0] is a digit leading non-digits are ignored in p */ static int match(const char* s, register const char* p, int minimum, int standard) { register const char* t; const char* x; int w; int z; z = 0; do { t = s; if (standard) { if (isdigit(*t)) while (*p && !isdigit(*p)) p++; else if (isdigit(*p)) while (*t && !isdigit(*t)) t++; } if (*p) { w = 0; x = p; while (*p && *p != '|') { if (!*t || *t == ',') break; else if (*t == *p) /*ok*/; else if (*t == '-') { if (standard && isdigit(*p)) { t++; continue; } while (*p && *p != '-') p++; if (!*p) break; } else if (*p == '-') { if (standard && isdigit(*t)) { p++; continue; } w = 1; while (*t && *t != '-') t++; if (!*t) break; } else break; t++; p++; } if ((!*t || *t == ',') && (!*p || *p == '|' || w)) return p - x; if (minimum && z < (p - x) && (p - x) >= minimum) z = p - x; } while (*p && *p != '|') p++; } while (*p++); return z; } /* * return 1 if s matches the charset names in cp */ static int match_charset(register const char* s, register const Lc_charset_t* cp) { return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1); } /* * low level for lccanon */ static size_t canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz) { register int c; register int u; register char* s; register char* e; register const char* t; if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose))) flags |= LC_abbreviated; s = buf; e = &buf[siz - 3]; if (lp) { if (lp->flags & (LC_debug|LC_default)) { for (t = lp->code; s < e && (*s = *t++); s++); *s++ = 0; return s - buf; } if (flags & LC_verbose) { u = 1; t = lp->name; while (s < e && (c = *t++)) { if (u) { u = 0; c = toupper(c); } else if (!isalnum(c)) u = 1; *s++ = c; } } else for (t = lp->code; s < e && (*s = *t++); s++); } if (s < e) { if (tp && tp != &territory[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code))) { if (lp) *s++ = '_'; if (flags & LC_verbose) { u = 1; t = tp->name; while (s < e && (c = *t++) && c != '|') { if (u) { u = 0; c = toupper(c); } else if (!isalnum(c)) u = 1; *s++ = c; } } else for (t = tp->code; s < e && (*s = toupper(*t++)); s++); } if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e) { *s++ = '.'; for (t = cp->code; s < e && (c = *t++); s++) { if (islower(c)) c = toupper(c); *s = c; } } for (c = '@'; ap && s < e; ap = ap->next) if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default)) { *s++ = c; c = ','; for (t = ap->attribute->name; s < e && (*s = *t++); s++); } } *s++ = 0; return s - buf; } /* * generate a canonical locale name in buf */ size_t lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz) { if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default)))) { #if _WINIX char lang[64]; char code[64]; char ctry[64]; if (lc->index && GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) && GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry))) { if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code))) code[0] = 0; if (!lc->charset || !lc->charset->ms) return sfsprintf(buf, siz, "%s_%s", lang, ctry); else if (streq(lc->charset->ms, code)) return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code); else return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms); } #endif buf[0] = '-'; buf[1] = 0; return 0; } return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz); } /* * make an Lc_t from a locale name */ Lc_t* lcmake(const char* name) { register int c; register char* s; register char* e; register const char* t; const char* a; char* w; char* language_name; char* territory_name; char* charset_name; char* attributes_name; Lc_t* lc; const Lc_map_t* mp; const Lc_language_t* lp; const Lc_territory_t* tp; const Lc_territory_t* tpb; const Lc_territory_t* primary; const Lc_charset_t* cp; const Lc_charset_t* ppa; const Lc_attribute_t* ap; Lc_attribute_list_t* ai; Lc_attribute_list_t* al; int i; int n; int z; char buf[PATH_MAX / 2]; char tmp[PATH_MAX / 2]; if (!(t = name) || !*t) return &default_lc; for (lc = lcs; lc; lc = lc->next) if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name)) return lc; for (mp = map; mp->code; mp++) if (streq(t, mp->code)) { lp = mp->language; tp = mp->territory; cp = mp->charset; if (!mp->attribute) al = 0; else if (al = newof(0, Lc_attribute_list_t, 1, 0)) al->attribute = mp->attribute; goto mapped; } language_name = buf; territory_name = charset_name = attributes_name = 0; s = buf; e = &buf[sizeof(buf)-2]; a = 0; n = 0; while (s < e && (c = *t++)) { if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n) { while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) if (!c) break; if (isalnum(c) && !n) *s++ = '-'; else { n = 0; if (!a) { a = t - 1; while (c && c != '_' && c != '.' && c != '@') c = *t++; if (!c) break; } } } if (c == '_' && !territory_name) { *s++ = 0; territory_name = s; } else if (c == '.' && !charset_name) { *s++ = 0; charset_name = s; } else if (c == '@' && !attributes_name) { *s++ = 0; attributes_name = s; } else { if (isupper(c)) c = tolower(c); *s++ = c; } } if ((t = a) && s < e) { if (attributes_name) *s++ = ','; else { *s++ = 0; attributes_name = s; } while (s < e && (c = *t++)) { if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n) { while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n)) if (!c) break; if (isalnum(c) && !n) *s++ = '-'; else n = 0; } if (c == '_' || c == '.' || c == '@') break; if (isupper(c)) c = tolower(c); *s++ = c; } } *s = 0; tp = 0; cp = ppa = 0; al = 0; /* * language */ n = strlen(s = language_name); if (n == 2) for (lp = language; lp->code && !streq(s, lp->code); lp++); else if (n == 3) { for (lp = language; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++); if (!lp->code) { c = s[2]; s[2] = 0; for (lp = language; lp->code && !streq(s, lp->code); lp++); s[2] = c; if (lp->code) n = 1; } } else lp = 0; if (!lp || !lp->code) { for (lp = language; lp->code && !match(s, lp->name, 0, 0); lp++); if (!lp || !lp->code) { if (!territory_name) { if (n == 2) for (tp = territory; tp->code && !streq(s, tp->code); tp++); else { z = 0; tpb = 0; for (tp = territory; tp->name; tp++) if ((i = match(s, tp->name, 3, 0)) > z) { tpb = tp; if ((z = i) == n) break; } if (tpb) tp = tpb; } if (tp->code) lp = tp->languages[0]; } if (!lp || !lp->code) { /* * name not in the tables so let * _ast_setlocale() and/or setlocale() * handle the validity checks */ s = (char*)name; z = strlen(s) + 1; if (!(lp = newof(0, Lc_language_t, 1, z))) return 0; name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1); memcpy((char*)lp->code, s, z - 1); tp = &territory[0]; cp = ((Lc_language_t*)lp)->charset = &charset[0]; al = 0; goto override; } } } /* * territory */ if (!tp || !tp->code) { if (!(s = territory_name)) { n = 0; primary = 0; for (tp = territory; tp->code; tp++) if (tp->languages[0] == lp) { if (tp->flags & LC_primary) { n = 1; primary = tp; break; } n++; primary = tp; } if (n == 1) tp = primary; s = (char*)lp->code; } if (!tp || !tp->code) { n = strlen(s); if (n == 2) { for (tp = territory; tp->code; tp++) if (streq(s, tp->code)) { for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); if (i >= elementsof(tp->languages)) tp = 0; break; } } else { for (tp = territory; tp->code; tp++) if (match(s, tp->name, 3, 0)) { for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++); if (i < elementsof(tp->languages)) break; } } if (tp && !tp->code) tp = 0; } } /* * attributes -- done here to catch misplaced charset references */ if (s = attributes_name) { do { for (w = s; *s && *s != ','; s++); c = *s; *s = 0; if (!(cp = lp->charset) || !match_charset(w, cp)) for (cp = charset; cp->code; cp++) if (match_charset(w, cp)) { ppa = cp; break; } if (!cp->code) { for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++) if (match(w, ap->name, 5, 0)) { if (ai = newof(0, Lc_attribute_list_t, 1, 0)) { ai->attribute = ap; ai->next = al; al = ai; } break; } if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1))) { ai = (Lc_attribute_list_t*)(ap + 1); strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w); ai->attribute = ap; ai->next = al; al = ai; } } *s = c; } while (*s++); } /* * charset */ if (s = charset_name) for (cp = charset; cp->code; cp++) if (match_charset(s, cp)) break; if (!cp || !cp->code) cp = ppa ? ppa : lp->charset; mapped: z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp)); /* * add to the list of possibly active locales */ override: n = strlen(name) + 1; if (!(lc = newof(0, Lc_t, 1, n + z))) return 0; strcpy((char*)(lc->name = (const char*)(lc + 1)), name); strcpy((char*)(lc->code = lc->name + n), s); lc->language = lp ? lp : &language[0]; lc->territory = tp ? tp : &territory[0]; lc->charset = cp ? cp : &charset[0]; lc->attributes = al; for (i = 0; i < elementsof(lc->info); i++) lc->info[i].lc = lc; #if _WINIX n = SUBLANG_DEFAULT; if (tp) for (i = 0; i < elementsof(tp->languages); i++) if (lp == tp->languages[i]) { n = tp->indices[i]; break; } lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT); #endif lc->next = lcs; lcs = lc; return lc; } /* * return an Lc_t* for each locale in the tables * one Lc_t is allocated on the first call with lc==0 * this is freed when 0 returned * the return value is not part of the lcmake() cache */ typedef struct Lc_scan_s { Lc_t lc; Lc_attribute_list_t list; int territory; int language; int attribute; char buf[256]; } Lc_scan_t; Lc_t* lcscan(Lc_t* lc) { register Lc_scan_t* ls; if (!(ls = (Lc_scan_t*)lc)) { if (!(ls = newof(0, Lc_scan_t, 1, 0))) return 0; ls->lc.code = ls->lc.name = ls->buf; ls->territory = -1; ls->language = elementsof(ls->lc.territory->languages); ls->attribute = elementsof(ls->lc.language->attributes); } if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute])) { if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language])) { if (++ls->territory >= (elementsof(territory) - 1)) { free(ls); return 0; } ls->lc.territory = &territory[ls->territory]; ls->lc.language = ls->lc.territory->languages[ls->language = 0]; } if (ls->lc.language) { ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &charset[0]; ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0]; } else { ls->lc.charset = &charset[0]; ls->list.attribute = 0; } } ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0; #if _WINIX if (!ls->lc.language || !ls->lc.language->index) ls->lc.index = 0; else { if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) && (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language]))) ls->lc.index = SUBLANG_DEFAULT; ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT); } #endif canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf)); return (Lc_t*)ls; }