1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 14 */ 15 16 /* 17 * This file implements the 2008 newlocale and friends handling. 18 */ 19 20 #ifndef _LCONV_C99 21 #define _LCONV_C99 22 #endif 23 24 #include "lint.h" 25 #include <atomic.h> 26 #include <locale.h> 27 #include <sys/types.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 #include <string.h> 31 #include "libc.h" 32 #include "mtlib.h" 33 #include "tsd.h" 34 #include "localeimpl.h" 35 #include "lctype.h" 36 37 /* 38 * Big Theory of Locales: 39 * 40 * (It is recommended that readers familiarize themselves with the POSIX 41 * 2008 (XPG Issue 7) specifications for locales, first.) 42 * 43 * Historically, we had a bunch of global variables that stored locale 44 * data. While this worked well, it limited applications to a single locale 45 * at a time. This doesn't work well in certain server applications. 46 * 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with 48 * versions of functions that can take this object as a parameter, along 49 * with functions to clone and manipulate these locale objects. The new 50 * functions are named with a _l() suffix. 51 * 52 * Additionally uselocale() is introduced which can change the locale of 53 * of a single thread. However, setlocale() can still be used to change 54 * the global locale. 55 * 56 * In our implementation, we use libc's TSD to store the locale data that 57 * was previously global. We still have global data because some applications 58 * have had those global objects compiled into them. (Such applications will 59 * be unable to benefit from uselocale(), btw.) The legacy routines are 60 * reimplemented as wrappers that use the appropriate locale object by 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the 62 * thread-specific locale object if one is present, or the global locale 63 * object otherwise. Note that once the TSD data is set, the only way 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE 65 * to uselocale(). 66 * 67 * We are careful to minimize performance impact of multiple calls to 68 * uselocale() or setlocale() by using a cache of locale data whenever possible. 69 * As a consequence of this, applications that iterate over all possible 70 * locales will burn through a lot of virtual memory, but we find such 71 * applications rare. (locale -a might be an exception, but it is short lived.) 72 * 73 * Category data is never released (although enclosing locale objects might be), 74 * in order to guarantee thread-safety. Calling freelocale() on an object 75 * while it is in use by another thread is a programmer error (use-after-free) 76 * and we don't bother to note it further. 77 * 78 * Locale objects (global locales) established by setlocale() are also 79 * never freed (for MT safety), but we will save previous locale objects 80 * and reuse them when we can. 81 */ 82 83 typedef struct locdata *(*loadfn_t)(const char *); 84 85 static const loadfn_t loaders[LC_ALL] = { 86 __lc_ctype_load, 87 __lc_numeric_load, 88 __lc_time_load, 89 __lc_collate_load, 90 __lc_monetary_load, 91 __lc_messages_load, 92 }; 93 94 extern struct lc_monetary lc_monetary_posix; 95 extern struct lc_numeric lc_numeric_posix; 96 extern struct lc_messages lc_messages_posix; 97 extern struct lc_time lc_time_posix; 98 extern struct lc_ctype lc_ctype_posix; 99 extern struct lc_collate lc_collate_posix; 100 extern struct _RuneLocale _DefaultRuneLocale; 101 102 static struct _locale posix_locale = { 103 /* locdata */ 104 .locdata = { 105 &__posix_ctype_locdata, 106 &__posix_numeric_locdata, 107 &__posix_time_locdata, 108 &__posix_collate_locdata, 109 &__posix_monetary_locdata, 110 &__posix_messages_locdata, 111 }, 112 .locname = "C", 113 .ctype = &lc_ctype_posix, 114 .numeric = &lc_numeric_posix, 115 .collate = &lc_collate_posix, 116 .monetary = &lc_monetary_posix, 117 .messages = &lc_messages_posix, 118 .time = &lc_time_posix, 119 .runelocale = &_DefaultRuneLocale, 120 }; 121 122 locale_t ___global_locale = &posix_locale; 123 124 locale_t 125 __global_locale(void) 126 { 127 return (___global_locale); 128 } 129 130 /* 131 * Locale data for hybrid C.UTF-8 locale having all the characteristics of 132 * default C/POSIX locale, except for LC_CTYPE data which is retrieved from 133 * cache/file as for other UTF-8 locales. 134 */ 135 static struct locdata cutf_locdata[LC_ALL] = { 136 { "C.UTF-8", NULL }, /* unused */ 137 { "C.UTF-8", &lc_numeric_posix }, 138 { "C.UTF-8", &lc_time_posix }, 139 { "C.UTF-8", &lc_collate_posix }, 140 { "C.UTF-8", &lc_monetary_posix }, 141 { "C.UTF-8", &lc_messages_posix }, 142 }; 143 144 /* 145 * Category names for getenv() Note that this was modified 146 * for Solaris. See <iso/locale_iso.h>. 147 */ 148 #define NUM_CATS 7 149 static char *categories[7] = { 150 "LC_CTYPE", 151 "LC_NUMERIC", 152 "LC_TIME", 153 "LC_COLLATE", 154 "LC_MONETARY", 155 "LC_MESSAGES", 156 "LC_ALL", 157 }; 158 159 /* 160 * Prototypes. 161 */ 162 static const char *get_locale_env(int); 163 static struct locdata *locdata_get(int, const char *); 164 static struct locdata *locdata_get_cache(int, const char *); 165 static locale_t mklocname(locale_t); 166 167 /* 168 * Some utility routines. 169 */ 170 171 struct locdata * 172 __locdata_alloc(const char *name, size_t memsz) 173 { 174 struct locdata *ldata; 175 176 if ((ldata = lmalloc(sizeof (*ldata))) == NULL) { 177 return (NULL); 178 } 179 if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) { 180 lfree(ldata, sizeof (*ldata)); 181 errno = ENOMEM; 182 return (NULL); 183 } 184 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); 185 186 return (ldata); 187 } 188 189 /* 190 * Normally we never free locale data truly, but if we failed to load it 191 * for some reason, this routine is used to cleanup the partial mess. 192 */ 193 void 194 __locdata_free(struct locdata *ldata) 195 { 196 for (int i = 0; i < NLOCDATA; i++) 197 libc_free(ldata->l_data[i]); 198 if (ldata->l_map != NULL && ldata->l_map_len) 199 (void) munmap(ldata->l_map, ldata->l_map_len); 200 lfree(ldata, sizeof (*ldata)); 201 } 202 203 /* 204 * It turns out that for performance reasons we would really like to 205 * cache the most recently referenced locale data to avoid wasteful 206 * loading from files. 207 */ 208 209 static struct locdata *cache_data[LC_ALL]; 210 static struct locdata *cat_data[LC_ALL]; 211 static mutex_t cache_lock = DEFAULTMUTEX; 212 213 /* 214 * Returns the cached data if the locale name is the same. If not, 215 * returns NULL (cache miss). The locdata is returned with a hold on 216 * it, taken on behalf of the caller. The caller should drop the hold 217 * when it is finished. 218 */ 219 static struct locdata * 220 locdata_get_cache(int category, const char *locname) 221 { 222 struct locdata *loc; 223 224 if (category < 0 || category >= LC_ALL) 225 return (NULL); 226 227 /* Try cache first. */ 228 lmutex_lock(&cache_lock); 229 loc = cache_data[category]; 230 231 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) { 232 lmutex_unlock(&cache_lock); 233 return (loc); 234 } 235 236 /* 237 * Failing that try previously loaded locales (linear search) -- 238 * this could be optimized to a hash, but its unlikely that a single 239 * application will ever need to work with more than a few locales. 240 */ 241 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) { 242 if (strcmp(locname, loc->l_lname) == 0) { 243 break; 244 } 245 } 246 247 /* 248 * Finally, if we still don't have one, try loading the locale 249 * data from the actual on-disk data. 250 * 251 * We drop the lock (libc wants to ensure no internal locks 252 * are held when we call other routines required to read from 253 * files, allocate memory, etc.) There is a small race here, 254 * but the consequences of the race are benign -- if multiple 255 * threads hit this at precisely the same point, we could 256 * wind up with duplicates of the locale data in the cache. 257 * 258 * This wastes the memory for an extra copy of the locale 259 * data, but there is no further harm beyond that. Its not 260 * worth the effort to recode this to something "safe" 261 * (which would require rescanning the list, etc.), given 262 * that this race will probably never actually occur. 263 */ 264 if (loc == NULL) { 265 lmutex_unlock(&cache_lock); 266 loc = (*loaders[category])(locname); 267 lmutex_lock(&cache_lock); 268 if (loc != NULL) 269 (void) strlcpy(loc->l_lname, locname, 270 sizeof (loc->l_lname)); 271 } 272 273 /* 274 * Assuming we got one, update the cache, and stick us on the list 275 * of loaded locale data. We insert into the head (more recent 276 * use is likely to win.) 277 */ 278 if (loc != NULL) { 279 cache_data[category] = loc; 280 if (!loc->l_cached) { 281 loc->l_cached = 1; 282 loc->l_next = cat_data[category]; 283 cat_data[category] = loc; 284 } 285 } 286 287 lmutex_unlock(&cache_lock); 288 return (loc); 289 } 290 291 /* Charmap aliases, mostly found in Linux */ 292 static const struct { 293 const char *alias; 294 const char *name; 295 } cmalias[] = { 296 { "utf8", "UTF-8" }, 297 { "iso88591", "ISO8859-1" }, 298 { "iso885915", "ISO8859-15" }, 299 { "gb18030", "GB18030" }, 300 { "koi8r", "KOI8-R" }, 301 { NULL, NULL } 302 }; 303 304 /* 305 * Routine to get the locdata for a given category and locale. 306 * This includes retrieving it from cache, retrieving it from 307 * a file, etc. 308 */ 309 static struct locdata * 310 locdata_get(int category, const char *locname) 311 { 312 char scratch[ENCODING_LEN + 1]; 313 char scratch2[ENCODING_LEN + 1]; 314 char *slash, *cm; 315 int cnt; 316 int len; 317 int i; 318 319 if (locname == NULL || *locname == 0) { 320 locname = get_locale_env(category); 321 } 322 323 /* 324 * Extract the locale name for the category if it is a composite 325 * locale. 326 */ 327 if ((slash = strchr(locname, '/')) != NULL) { 328 for (cnt = category; cnt && slash != NULL; cnt--) { 329 locname = slash + 1; 330 slash = strchr(locname, '/'); 331 } 332 if (slash) { 333 len = slash - locname + 1; 334 if (len >= sizeof (scratch)) { 335 len = sizeof (scratch); 336 } 337 } else { 338 len = sizeof (scratch); 339 } 340 (void) strlcpy(scratch, locname, len); 341 locname = scratch; 342 } 343 344 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) 345 return (posix_locale.locdata[category]); 346 347 /* Handle charmap aliases */ 348 for (i = 0; cmalias[i].alias != NULL; i++) { 349 if ((cm = strstr(locname, cmalias[i].alias)) != NULL && 350 strlen(cm) == strlen(cmalias[i].alias)) { 351 len = cm - locname + 1; 352 if (len + strlen(cmalias[i].name) >= sizeof (scratch2)) 353 break; 354 (void) strlcpy(scratch2, locname, len); 355 (void) strlcat(scratch2, cmalias[i].name, 356 sizeof (scratch2)); 357 locname = scratch2; 358 break; 359 } 360 } 361 362 if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE)) 363 return (&cutf_locdata[category]); 364 365 return (locdata_get_cache(category, locname)); 366 } 367 368 /* tsd destructor */ 369 static void 370 freelocptr(void *arg) 371 { 372 locale_t *locptr = arg; 373 if (*locptr != NULL) 374 freelocale(*locptr); 375 } 376 377 static const char * 378 get_locale_env(int category) 379 { 380 const char *env; 381 382 /* 1. check LC_ALL. */ 383 env = getenv(categories[LC_ALL]); 384 385 /* 2. check LC_* */ 386 if (env == NULL || *env == '\0') 387 env = getenv(categories[category]); 388 389 /* 3. check LANG */ 390 if (env == NULL || *env == '\0') 391 env = getenv("LANG"); 392 393 /* 4. if none is set, fall to "C" */ 394 if (env == NULL || *env == '\0') 395 env = "C"; 396 397 return (env); 398 } 399 400 401 /* 402 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy 403 * code will continue to use _ctype[520], but we prefer this function as 404 * it is the only way to get thread-specific information. 405 */ 406 unsigned char 407 __mb_cur_max_l(locale_t loc) 408 { 409 return (loc->ctype->lc_max_mblen); 410 } 411 412 unsigned char 413 __mb_cur_max(void) 414 { 415 return (__mb_cur_max_l(uselocale(NULL))); 416 } 417 418 /* 419 * Public interfaces. 420 */ 421 422 locale_t 423 duplocale(locale_t src) 424 { 425 locale_t loc; 426 int i; 427 428 loc = lmalloc(sizeof (*loc)); 429 if (loc == NULL) { 430 return (NULL); 431 } 432 if (src == NULL) { 433 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */ 434 src = ___global_locale; 435 } 436 for (i = 0; i < LC_ALL; i++) { 437 loc->locdata[i] = src->locdata[i]; 438 loc->loaded[i] = 0; 439 } 440 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 441 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 442 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 443 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 444 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 445 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 446 loc->time = loc->locdata[LC_TIME]->l_data[0]; 447 return (loc); 448 } 449 450 void 451 freelocale(locale_t loc) 452 { 453 /* 454 * We take extra care never to free a saved locale created by 455 * setlocale(). This shouldn't be strictly necessary, but a little 456 * extra safety doesn't hurt here. 457 */ 458 if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list)) 459 lfree(loc, sizeof (*loc)); 460 } 461 462 locale_t 463 newlocale(int catmask, const char *locname, locale_t base) 464 { 465 locale_t loc; 466 int i, e; 467 468 if (catmask & ~(LC_ALL_MASK)) { 469 errno = EINVAL; 470 return (NULL); 471 } 472 473 /* 474 * Technically passing LC_GLOBAL_LOCALE here is illegal, 475 * but we allow it. 476 */ 477 if (base == NULL || base == ___global_locale) { 478 loc = duplocale(___global_locale); 479 } else { 480 loc = duplocale(base); 481 } 482 if (loc == NULL) { 483 return (NULL); 484 } 485 486 for (i = 0; i < LC_ALL; i++) { 487 struct locdata *ldata; 488 loc->loaded[i] = 0; 489 if (((1 << i) & catmask) == 0) { 490 /* Default to base locale if not overriding */ 491 continue; 492 } 493 ldata = locdata_get(i, locname); 494 if (ldata == NULL) { 495 e = errno; 496 freelocale(loc); 497 errno = e; 498 return (NULL); 499 } 500 loc->locdata[i] = ldata; 501 } 502 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 503 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 504 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 505 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 506 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 507 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 508 loc->time = loc->locdata[LC_TIME]->l_data[0]; 509 freelocale(base); 510 511 return (mklocname(loc)); 512 } 513 514 locale_t 515 uselocale(locale_t loc) 516 { 517 locale_t lastloc = ___global_locale; 518 locale_t *locptr; 519 520 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); 521 /* Should never occur */ 522 if (locptr == NULL) { 523 errno = EINVAL; 524 return (NULL); 525 } 526 527 if (*locptr != NULL) 528 lastloc = *locptr; 529 530 /* Argument loc is NULL if we are just querying. */ 531 if (loc != NULL) { 532 /* 533 * Set it to LC_GLOBAL_LOCAL to return to using 534 * the global locale (setlocale). 535 */ 536 if (loc == ___global_locale) { 537 *locptr = NULL; 538 } else { 539 /* No validation of the provided locale at present */ 540 *locptr = loc; 541 } 542 } 543 544 /* 545 * The caller is responsible for freeing, of course it would be 546 * gross error to call freelocale() on a locale object that is still 547 * in use. 548 */ 549 return (lastloc); 550 } 551 552 static locale_t 553 mklocname(locale_t loc) 554 { 555 int composite = 0; 556 557 /* Look to see if any category is different */ 558 for (int i = 1; i < LC_ALL; ++i) { 559 if (strcmp(loc->locdata[0]->l_lname, 560 loc->locdata[i]->l_lname) != 0) { 561 composite = 1; 562 break; 563 } 564 } 565 566 if (composite) { 567 /* 568 * Note ordering of these follows the numeric order, 569 * if the order is changed, then setlocale() will need 570 * to be changed as well. 571 */ 572 (void) snprintf(loc->locname, sizeof (loc->locname), 573 "%s/%s/%s/%s/%s/%s", 574 loc->locdata[LC_CTYPE]->l_lname, 575 loc->locdata[LC_NUMERIC]->l_lname, 576 loc->locdata[LC_TIME]->l_lname, 577 loc->locdata[LC_COLLATE]->l_lname, 578 loc->locdata[LC_MONETARY]->l_lname, 579 loc->locdata[LC_MESSAGES]->l_lname); 580 } else { 581 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname, 582 sizeof (loc->locname)); 583 } 584 return (loc); 585 } 586