1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 14 * Copyright 2025 Oxide Computer Company 15 */ 16 17 /* 18 * This file implements the 2008 newlocale and friends handling. 19 */ 20 21 #ifndef _LCONV_C99 22 #define _LCONV_C99 23 #endif 24 25 #include "lint.h" 26 #include <atomic.h> 27 #include <locale.h> 28 #include <sys/types.h> 29 #include <sys/mman.h> 30 #include <errno.h> 31 #include <string.h> 32 #include "libc.h" 33 #include "mtlib.h" 34 #include "tsd.h" 35 #include "localeimpl.h" 36 #include "lctype.h" 37 38 /* 39 * Big Theory of Locales: 40 * 41 * (It is recommended that readers familiarize themselves with the POSIX 42 * 2008 (XPG Issue 7) specifications for locales, first.) 43 * 44 * Historically, we had a bunch of global variables that stored locale 45 * data. While this worked well, it limited applications to a single locale 46 * at a time. This doesn't work well in certain server applications. 47 * 48 * Issue 7, X/Open introduced the concept of a locale_t object, along with 49 * versions of functions that can take this object as a parameter, along 50 * with functions to clone and manipulate these locale objects. The new 51 * functions are named with a _l() suffix. 52 * 53 * Additionally uselocale() is introduced which can change the locale of 54 * of a single thread. However, setlocale() can still be used to change 55 * the global locale. 56 * 57 * In our implementation, we use libc's TSD to store the locale data that 58 * was previously global. We still have global data because some applications 59 * have had those global objects compiled into them. (Such applications will 60 * be unable to benefit from uselocale(), btw.) The legacy routines are 61 * reimplemented as wrappers that use the appropriate locale object by 62 * calling uselocale(). uselocale() when passed a NULL pointer returns the 63 * thread-specific locale object if one is present, or the global locale 64 * object otherwise. Note that once the TSD data is set, the only way 65 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE 66 * to uselocale(). 67 * 68 * We are careful to minimize performance impact of multiple calls to 69 * uselocale() or setlocale() by using a cache of locale data whenever possible. 70 * As a consequence of this, applications that iterate over all possible 71 * locales will burn through a lot of virtual memory, but we find such 72 * applications rare. (locale -a might be an exception, but it is short lived.) 73 * 74 * Category data is never released (although enclosing locale objects might be), 75 * in order to guarantee thread-safety. Calling freelocale() on an object 76 * while it is in use by another thread is a programmer error (use-after-free) 77 * and we don't bother to note it further. 78 * 79 * Locale objects (global locales) established by setlocale() are also 80 * never freed (for MT safety), but we will save previous locale objects 81 * and reuse them when we can. 82 */ 83 84 typedef struct locdata *(*loadfn_t)(const char *); 85 86 static const loadfn_t loaders[LC_ALL] = { 87 __lc_ctype_load, 88 __lc_numeric_load, 89 __lc_time_load, 90 __lc_collate_load, 91 __lc_monetary_load, 92 __lc_messages_load, 93 }; 94 95 extern struct lc_monetary lc_monetary_posix; 96 extern struct lc_numeric lc_numeric_posix; 97 extern struct lc_messages lc_messages_posix; 98 extern struct lc_time lc_time_posix; 99 extern struct lc_ctype lc_ctype_posix; 100 extern struct lc_collate lc_collate_posix; 101 extern struct _RuneLocale _DefaultRuneLocale; 102 103 static struct _locale posix_locale = { 104 /* locdata */ 105 .locdata = { 106 &__posix_ctype_locdata, 107 &__posix_numeric_locdata, 108 &__posix_time_locdata, 109 &__posix_collate_locdata, 110 &__posix_monetary_locdata, 111 &__posix_messages_locdata, 112 }, 113 .locname = "C", 114 .ctype = &lc_ctype_posix, 115 .numeric = &lc_numeric_posix, 116 .collate = &lc_collate_posix, 117 .monetary = &lc_monetary_posix, 118 .messages = &lc_messages_posix, 119 .time = &lc_time_posix, 120 .runelocale = &_DefaultRuneLocale, 121 }; 122 123 locale_t ___global_locale = &posix_locale; 124 125 locale_t 126 __global_locale(void) 127 { 128 return (___global_locale); 129 } 130 131 /* 132 * Locale data for hybrid C.UTF-8 locale having all the characteristics of 133 * default C/POSIX locale, except for LC_CTYPE data which is retrieved from 134 * cache/file as for other UTF-8 locales. 135 */ 136 static struct locdata cutf_locdata[LC_ALL] = { 137 { "C.UTF-8", NULL }, /* unused */ 138 { "C.UTF-8", &lc_numeric_posix }, 139 { "C.UTF-8", &lc_time_posix }, 140 { "C.UTF-8", &lc_collate_posix }, 141 { "C.UTF-8", &lc_monetary_posix }, 142 { "C.UTF-8", &lc_messages_posix }, 143 }; 144 145 /* 146 * Category names for getenv() Note that this was modified 147 * for Solaris. See <iso/locale_iso.h>. 148 */ 149 #define NUM_CATS 7 150 static char *categories[7] = { 151 "LC_CTYPE", 152 "LC_NUMERIC", 153 "LC_TIME", 154 "LC_COLLATE", 155 "LC_MONETARY", 156 "LC_MESSAGES", 157 "LC_ALL", 158 }; 159 160 /* 161 * Prototypes. 162 */ 163 static const char *get_locale_env(int); 164 static struct locdata *locdata_get(int, const char *); 165 static struct locdata *locdata_get_cache(int, const char *); 166 static locale_t mklocname(locale_t); 167 168 /* 169 * Some utility routines. 170 */ 171 172 struct locdata * 173 __locdata_alloc(const char *name, size_t memsz) 174 { 175 struct locdata *ldata; 176 177 if ((ldata = lmalloc(sizeof (*ldata))) == NULL) { 178 return (NULL); 179 } 180 if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) { 181 lfree(ldata, sizeof (*ldata)); 182 errno = ENOMEM; 183 return (NULL); 184 } 185 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); 186 187 return (ldata); 188 } 189 190 /* 191 * Normally we never free locale data truly, but if we failed to load it 192 * for some reason, this routine is used to cleanup the partial mess. 193 */ 194 void 195 __locdata_free(struct locdata *ldata) 196 { 197 for (int i = 0; i < NLOCDATA; i++) 198 libc_free(ldata->l_data[i]); 199 if (ldata->l_map != NULL && ldata->l_map_len) 200 (void) munmap(ldata->l_map, ldata->l_map_len); 201 lfree(ldata, sizeof (*ldata)); 202 } 203 204 /* 205 * It turns out that for performance reasons we would really like to 206 * cache the most recently referenced locale data to avoid wasteful 207 * loading from files. 208 */ 209 210 static struct locdata *cache_data[LC_ALL]; 211 static struct locdata *cat_data[LC_ALL]; 212 static mutex_t cache_lock = DEFAULTMUTEX; 213 214 /* 215 * Returns the cached data if the locale name is the same. If not, 216 * returns NULL (cache miss). The locdata is returned with a hold on 217 * it, taken on behalf of the caller. The caller should drop the hold 218 * when it is finished. 219 */ 220 static struct locdata * 221 locdata_get_cache(int category, const char *locname) 222 { 223 struct locdata *loc; 224 225 if (category < 0 || category >= LC_ALL) 226 return (NULL); 227 228 /* Try cache first. */ 229 lmutex_lock(&cache_lock); 230 loc = cache_data[category]; 231 232 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) { 233 lmutex_unlock(&cache_lock); 234 return (loc); 235 } 236 237 /* 238 * Failing that try previously loaded locales (linear search) -- 239 * this could be optimized to a hash, but its unlikely that a single 240 * application will ever need to work with more than a few locales. 241 */ 242 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) { 243 if (strcmp(locname, loc->l_lname) == 0) { 244 break; 245 } 246 } 247 248 /* 249 * Finally, if we still don't have one, try loading the locale 250 * data from the actual on-disk data. 251 * 252 * We drop the lock (libc wants to ensure no internal locks 253 * are held when we call other routines required to read from 254 * files, allocate memory, etc.) There is a small race here, 255 * but the consequences of the race are benign -- if multiple 256 * threads hit this at precisely the same point, we could 257 * wind up with duplicates of the locale data in the cache. 258 * 259 * This wastes the memory for an extra copy of the locale 260 * data, but there is no further harm beyond that. Its not 261 * worth the effort to recode this to something "safe" 262 * (which would require rescanning the list, etc.), given 263 * that this race will probably never actually occur. 264 */ 265 if (loc == NULL) { 266 lmutex_unlock(&cache_lock); 267 loc = (*loaders[category])(locname); 268 lmutex_lock(&cache_lock); 269 if (loc != NULL) 270 (void) strlcpy(loc->l_lname, locname, 271 sizeof (loc->l_lname)); 272 } 273 274 /* 275 * Assuming we got one, update the cache, and stick us on the list 276 * of loaded locale data. We insert into the head (more recent 277 * use is likely to win.) 278 */ 279 if (loc != NULL) { 280 cache_data[category] = loc; 281 if (!loc->l_cached) { 282 loc->l_cached = 1; 283 loc->l_next = cat_data[category]; 284 cat_data[category] = loc; 285 } 286 } 287 288 lmutex_unlock(&cache_lock); 289 return (loc); 290 } 291 292 /* Charmap aliases, mostly found in Linux */ 293 static const struct { 294 const char *alias; 295 const char *name; 296 } cmalias[] = { 297 { "utf8", "UTF-8" }, 298 { "iso88591", "ISO8859-1" }, 299 { "iso885915", "ISO8859-15" }, 300 { "gb18030", "GB18030" }, 301 { "koi8r", "KOI8-R" }, 302 { NULL, NULL } 303 }; 304 305 /* 306 * Routine to get the locdata for a given category and locale. 307 * This includes retrieving it from cache, retrieving it from 308 * a file, etc. 309 */ 310 static struct locdata * 311 locdata_get(int category, const char *locname) 312 { 313 char scratch[ENCODING_LEN + 1]; 314 char scratch2[ENCODING_LEN + 1]; 315 char *slash, *cm; 316 int cnt; 317 int len; 318 int i; 319 320 if (locname == NULL || *locname == 0) { 321 locname = get_locale_env(category); 322 } 323 324 /* 325 * Extract the locale name for the category if it is a composite 326 * locale. 327 */ 328 if ((slash = strchr(locname, '/')) != NULL) { 329 for (cnt = category; cnt && slash != NULL; cnt--) { 330 locname = slash + 1; 331 slash = strchr(locname, '/'); 332 } 333 if (slash) { 334 len = slash - locname + 1; 335 if (len >= sizeof (scratch)) { 336 len = sizeof (scratch); 337 } 338 } else { 339 len = sizeof (scratch); 340 } 341 (void) strlcpy(scratch, locname, len); 342 locname = scratch; 343 } 344 345 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) 346 return (posix_locale.locdata[category]); 347 348 /* Handle charmap aliases */ 349 for (i = 0; cmalias[i].alias != NULL; i++) { 350 if ((cm = strstr(locname, cmalias[i].alias)) != NULL && 351 strlen(cm) == strlen(cmalias[i].alias)) { 352 len = cm - locname + 1; 353 if (len + strlen(cmalias[i].name) >= sizeof (scratch2)) 354 break; 355 (void) strlcpy(scratch2, locname, len); 356 (void) strlcat(scratch2, cmalias[i].name, 357 sizeof (scratch2)); 358 locname = scratch2; 359 break; 360 } 361 } 362 363 if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE)) 364 return (&cutf_locdata[category]); 365 366 return (locdata_get_cache(category, locname)); 367 } 368 369 /* tsd destructor */ 370 static void 371 freelocptr(void *arg) 372 { 373 locale_t *locptr = arg; 374 if (*locptr != NULL) 375 freelocale(*locptr); 376 } 377 378 static const char * 379 get_locale_env(int category) 380 { 381 const char *env; 382 383 /* 1. check LC_ALL. */ 384 env = getenv(categories[LC_ALL]); 385 386 /* 2. check LC_* */ 387 if (env == NULL || *env == '\0') 388 env = getenv(categories[category]); 389 390 /* 3. check LANG */ 391 if (env == NULL || *env == '\0') 392 env = getenv("LANG"); 393 394 /* 4. if none is set, fall to "C" */ 395 if (env == NULL || *env == '\0') 396 env = "C"; 397 398 return (env); 399 } 400 401 402 /* 403 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy 404 * code will continue to use _ctype[520], but we prefer this function as 405 * it is the only way to get thread-specific information. 406 */ 407 unsigned char 408 __mb_cur_max_l(locale_t loc) 409 { 410 return (loc->ctype->lc_max_mblen); 411 } 412 413 unsigned char 414 __mb_cur_max(void) 415 { 416 return (__mb_cur_max_l(uselocale(NULL))); 417 } 418 419 /* 420 * Public interfaces. 421 */ 422 423 locale_t 424 duplocale(locale_t src) 425 { 426 locale_t loc; 427 int i; 428 429 loc = lmalloc(sizeof (*loc)); 430 if (loc == NULL) { 431 return (NULL); 432 } 433 if (src == NULL) { 434 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */ 435 src = ___global_locale; 436 } 437 for (i = 0; i < LC_ALL; i++) { 438 loc->locdata[i] = src->locdata[i]; 439 loc->loaded[i] = 0; 440 } 441 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 442 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 443 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 444 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 445 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 446 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 447 loc->time = loc->locdata[LC_TIME]->l_data[0]; 448 return (loc); 449 } 450 451 void 452 freelocale(locale_t loc) 453 { 454 /* 455 * We take extra care never to free a saved locale created by 456 * setlocale(). This shouldn't be strictly necessary, but a little 457 * extra safety doesn't hurt here. 458 */ 459 if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list)) 460 lfree(loc, sizeof (*loc)); 461 } 462 463 locale_t 464 newlocale(int catmask, const char *locname, locale_t base) 465 { 466 locale_t loc; 467 int i, e; 468 469 if (catmask & ~(LC_ALL_MASK)) { 470 errno = EINVAL; 471 return (NULL); 472 } 473 474 /* 475 * Technically passing LC_GLOBAL_LOCALE here is illegal, 476 * but we allow it. 477 */ 478 if (base == NULL || base == ___global_locale) { 479 loc = duplocale(___global_locale); 480 } else { 481 loc = duplocale(base); 482 } 483 if (loc == NULL) { 484 return (NULL); 485 } 486 487 for (i = 0; i < LC_ALL; i++) { 488 struct locdata *ldata; 489 loc->loaded[i] = 0; 490 if (((1 << i) & catmask) == 0) { 491 /* Default to base locale if not overriding */ 492 continue; 493 } 494 ldata = locdata_get(i, locname); 495 if (ldata == NULL) { 496 e = errno; 497 freelocale(loc); 498 errno = e; 499 return (NULL); 500 } 501 loc->locdata[i] = ldata; 502 } 503 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 504 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 505 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 506 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 507 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 508 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 509 loc->time = loc->locdata[LC_TIME]->l_data[0]; 510 freelocale(base); 511 512 return (mklocname(loc)); 513 } 514 515 locale_t 516 uselocale(locale_t loc) 517 { 518 locale_t lastloc = ___global_locale; 519 locale_t *locptr; 520 521 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); 522 /* Should never occur */ 523 if (locptr == NULL) { 524 errno = EINVAL; 525 return (NULL); 526 } 527 528 if (*locptr != NULL) 529 lastloc = *locptr; 530 531 /* Argument loc is NULL if we are just querying. */ 532 if (loc != NULL) { 533 /* 534 * Set it to LC_GLOBAL_LOCAL to return to using 535 * the global locale (setlocale). 536 */ 537 if (loc == ___global_locale) { 538 *locptr = NULL; 539 } else { 540 /* No validation of the provided locale at present */ 541 *locptr = loc; 542 } 543 } 544 545 /* 546 * The caller is responsible for freeing, of course it would be 547 * gross error to call freelocale() on a locale object that is still 548 * in use. 549 */ 550 return (lastloc); 551 } 552 553 static locale_t 554 mklocname(locale_t loc) 555 { 556 int composite = 0; 557 558 /* Look to see if any category is different */ 559 for (int i = 1; i < LC_ALL; ++i) { 560 if (strcmp(loc->locdata[0]->l_lname, 561 loc->locdata[i]->l_lname) != 0) { 562 composite = 1; 563 break; 564 } 565 } 566 567 if (composite) { 568 /* 569 * Note ordering of these follows the numeric order, 570 * if the order is changed, then setlocale() will need 571 * to be changed as well. 572 */ 573 (void) snprintf(loc->locname, sizeof (loc->locname), 574 "%s/%s/%s/%s/%s/%s", 575 loc->locdata[LC_CTYPE]->l_lname, 576 loc->locdata[LC_NUMERIC]->l_lname, 577 loc->locdata[LC_TIME]->l_lname, 578 loc->locdata[LC_COLLATE]->l_lname, 579 loc->locdata[LC_MONETARY]->l_lname, 580 loc->locdata[LC_MESSAGES]->l_lname); 581 } else { 582 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname, 583 sizeof (loc->locname)); 584 } 585 return (loc); 586 } 587 588 /* 589 * POSIX has several lifetime requirements that vary on the type of locale. 590 * 591 * If the locale is LC_GLOBAL_LOCALE, the returned string is required to live 592 * beyond the locale's use as the global locale. The specification suggests that 593 * this use a thread-local buffer and cautions that it may disappear when the 594 * thread terminates or another LC_GLOBAL_LOCALE call is made. In our case, 595 * because we will never free a locale that is set with setlocale() (see 596 * port/locale/setlocale.c), we can simply return the name of the locale 597 * directly. 598 * 599 * If the locale is any other locale, it is allowed to be invalidated by a call 600 * to uselocale() or newlocale(). 601 * 602 * In both of these cases this means that we can simply return the string from 603 * the current object. POSIX importantly states that the application is not 604 * allowed to assume the name will stay the same across invocations and 605 * therefore it cannot be relied upon for serialization. However, it will work 606 * with setlocale() again. 607 */ 608 const char * 609 getlocalename_l(int category, locale_t loc) 610 { 611 if (loc == NULL) { 612 return (NULL); 613 } 614 615 switch (category) { 616 case LC_CTYPE: 617 case LC_NUMERIC: 618 case LC_TIME: 619 case LC_COLLATE: 620 case LC_MONETARY: 621 case LC_MESSAGES: 622 return (loc->locdata[category]->l_lname); 623 case LC_ALL: 624 return (loc->locname); 625 default: 626 /* 627 * POSIX does not define any errors here so we can't indicate 628 * anything via errno or similar. 629 */ 630 return (NULL); 631 } 632 } 633