/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2014 Garrett D'Amore */ /* * This file implements the 2008 newlocale and friends handling. */ #ifndef _LCONV_C99 #define _LCONV_C99 #endif #include "lint.h" #include #include #include #include #include #include #include "libc.h" #include "mtlib.h" #include "tsd.h" #include "localeimpl.h" #include "lctype.h" /* * Big Theory of Locales: * * (It is recommended that readers familiarize themselves with the POSIX * 2008 (XPG Issue 7) specifications for locales, first.) * * Historically, we had a bunch of global variables that stored locale * data. While this worked well, it limited applications to a single locale * at a time. This doesn't work well in certain server applications. * * Issue 7, X/Open introduced the concept of a locale_t object, along with * versions of functions that can take this object as a parameter, along * with functions to clone and manipulate these locale objects. The new * functions are named with a _l() suffix. * * Additionally uselocale() is introduced which can change the locale of * of a single thread. However, setlocale() can still be used to change * the global locale. * * In our implementation, we use libc's TSD to store the locale data that * was previously global. We still have global data because some applications * have had those global objects compiled into them. (Such applications will * be unable to benefit from uselocale(), btw.) The legacy routines are * reimplemented as wrappers that use the appropriate locale object by * calling uselocale(). uselocale() when passed a NULL pointer returns the * thread-specific locale object if one is present, or the global locale * object otherwise. Note that once the TSD data is set, the only way * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE * to uselocale(). * * We are careful to minimize performance impact of multiple calls to * uselocale() or setlocale() by using a cache of locale data whenever possible. * As a consequence of this, applications that iterate over all possible * locales will burn through a lot of virtual memory, but we find such * applications rare. (locale -a might be an exception, but it is short lived.) * * Category data is never released (although enclosing locale objects might be), * in order to guarantee thread-safety. Calling freelocale() on an object * while it is in use by another thread is a programmer error (use-after-free) * and we don't bother to note it further. * * Locale objects (global locales) established by setlocale() are also * never freed (for MT safety), but we will save previous locale objects * and reuse them when we can. */ typedef struct locdata *(*loadfn_t)(const char *); static const loadfn_t loaders[LC_ALL] = { __lc_ctype_load, __lc_numeric_load, __lc_time_load, __lc_collate_load, __lc_monetary_load, __lc_messages_load, }; extern struct lc_monetary lc_monetary_posix; extern struct lc_numeric lc_numeric_posix; extern struct lc_messages lc_messages_posix; extern struct lc_time lc_time_posix; extern struct lc_ctype lc_ctype_posix; extern struct lc_collate lc_collate_posix; extern struct _RuneLocale _DefaultRuneLocale; static struct _locale posix_locale = { /* locdata */ .locdata = { &__posix_ctype_locdata, &__posix_numeric_locdata, &__posix_time_locdata, &__posix_collate_locdata, &__posix_monetary_locdata, &__posix_messages_locdata, }, .locname = "C", .ctype = &lc_ctype_posix, .numeric = &lc_numeric_posix, .collate = &lc_collate_posix, .monetary = &lc_monetary_posix, .messages = &lc_messages_posix, .time = &lc_time_posix, .runelocale = &_DefaultRuneLocale, }; locale_t ___global_locale = &posix_locale; locale_t __global_locale(void) { return (___global_locale); } /* * Locale data for hybrid C.UTF-8 locale having all the characteristics of * default C/POSIX locale, except for LC_CTYPE data which is retrieved from * cache/file as for other UTF-8 locales. */ static struct locdata cutf_locdata[LC_ALL] = { { "C.UTF-8", NULL }, /* unused */ { "C.UTF-8", &lc_numeric_posix }, { "C.UTF-8", &lc_time_posix }, { "C.UTF-8", &lc_collate_posix }, { "C.UTF-8", &lc_monetary_posix }, { "C.UTF-8", &lc_messages_posix }, }; /* * Category names for getenv() Note that this was modified * for Solaris. See . */ #define NUM_CATS 7 static char *categories[7] = { "LC_CTYPE", "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY", "LC_MESSAGES", "LC_ALL", }; /* * Prototypes. */ static const char *get_locale_env(int); static struct locdata *locdata_get(int, const char *); static struct locdata *locdata_get_cache(int, const char *); static locale_t mklocname(locale_t); /* * Some utility routines. */ struct locdata * __locdata_alloc(const char *name, size_t memsz) { struct locdata *ldata; if ((ldata = lmalloc(sizeof (*ldata))) == NULL) { return (NULL); } if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) { lfree(ldata, sizeof (*ldata)); errno = ENOMEM; return (NULL); } (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); return (ldata); } /* * Normally we never free locale data truly, but if we failed to load it * for some reason, this routine is used to cleanup the partial mess. */ void __locdata_free(struct locdata *ldata) { for (int i = 0; i < NLOCDATA; i++) libc_free(ldata->l_data[i]); if (ldata->l_map != NULL && ldata->l_map_len) (void) munmap(ldata->l_map, ldata->l_map_len); lfree(ldata, sizeof (*ldata)); } /* * It turns out that for performance reasons we would really like to * cache the most recently referenced locale data to avoid wasteful * loading from files. */ static struct locdata *cache_data[LC_ALL]; static struct locdata *cat_data[LC_ALL]; static mutex_t cache_lock = DEFAULTMUTEX; /* * Returns the cached data if the locale name is the same. If not, * returns NULL (cache miss). The locdata is returned with a hold on * it, taken on behalf of the caller. The caller should drop the hold * when it is finished. */ static struct locdata * locdata_get_cache(int category, const char *locname) { struct locdata *loc; if (category < 0 || category >= LC_ALL) return (NULL); /* Try cache first. */ lmutex_lock(&cache_lock); loc = cache_data[category]; if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) { lmutex_unlock(&cache_lock); return (loc); } /* * Failing that try previously loaded locales (linear search) -- * this could be optimized to a hash, but its unlikely that a single * application will ever need to work with more than a few locales. */ for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) { if (strcmp(locname, loc->l_lname) == 0) { break; } } /* * Finally, if we still don't have one, try loading the locale * data from the actual on-disk data. * * We drop the lock (libc wants to ensure no internal locks * are held when we call other routines required to read from * files, allocate memory, etc.) There is a small race here, * but the consequences of the race are benign -- if multiple * threads hit this at precisely the same point, we could * wind up with duplicates of the locale data in the cache. * * This wastes the memory for an extra copy of the locale * data, but there is no further harm beyond that. Its not * worth the effort to recode this to something "safe" * (which would require rescanning the list, etc.), given * that this race will probably never actually occur. */ if (loc == NULL) { lmutex_unlock(&cache_lock); loc = (*loaders[category])(locname); lmutex_lock(&cache_lock); if (loc != NULL) (void) strlcpy(loc->l_lname, locname, sizeof (loc->l_lname)); } /* * Assuming we got one, update the cache, and stick us on the list * of loaded locale data. We insert into the head (more recent * use is likely to win.) */ if (loc != NULL) { cache_data[category] = loc; if (!loc->l_cached) { loc->l_cached = 1; loc->l_next = cat_data[category]; cat_data[category] = loc; } } lmutex_unlock(&cache_lock); return (loc); } /* Charmap aliases, mostly found in Linux */ static const struct { const char *alias; const char *name; } cmalias[] = { { "utf8", "UTF-8" }, { "iso88591", "ISO8859-1" }, { "iso885915", "ISO8859-15" }, { "gb18030", "GB18030" }, { "koi8r", "KOI8-R" }, { NULL, NULL } }; /* * Routine to get the locdata for a given category and locale. * This includes retrieving it from cache, retrieving it from * a file, etc. */ static struct locdata * locdata_get(int category, const char *locname) { char scratch[ENCODING_LEN + 1]; char scratch2[ENCODING_LEN + 1]; char *slash, *cm; int cnt; int len; int i; if (locname == NULL || *locname == 0) { locname = get_locale_env(category); } /* * Extract the locale name for the category if it is a composite * locale. */ if ((slash = strchr(locname, '/')) != NULL) { for (cnt = category; cnt && slash != NULL; cnt--) { locname = slash + 1; slash = strchr(locname, '/'); } if (slash) { len = slash - locname + 1; if (len >= sizeof (scratch)) { len = sizeof (scratch); } } else { len = sizeof (scratch); } (void) strlcpy(scratch, locname, len); locname = scratch; } if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) return (posix_locale.locdata[category]); /* Handle charmap aliases */ for (i = 0; cmalias[i].alias != NULL; i++) { if ((cm = strstr(locname, cmalias[i].alias)) != NULL && strlen(cm) == strlen(cmalias[i].alias)) { len = cm - locname + 1; if (len + strlen(cmalias[i].name) >= sizeof (scratch2)) break; (void) strlcpy(scratch2, locname, len); (void) strlcat(scratch2, cmalias[i].name, sizeof (scratch2)); locname = scratch2; break; } } if ((strcmp(locname, "C.UTF-8") == 0) && (category != LC_CTYPE)) return (&cutf_locdata[category]); return (locdata_get_cache(category, locname)); } /* tsd destructor */ static void freelocptr(void *arg) { locale_t *locptr = arg; if (*locptr != NULL) freelocale(*locptr); } static const char * get_locale_env(int category) { const char *env; /* 1. check LC_ALL. */ env = getenv(categories[LC_ALL]); /* 2. check LC_* */ if (env == NULL || *env == '\0') env = getenv(categories[category]); /* 3. check LANG */ if (env == NULL || *env == '\0') env = getenv("LANG"); /* 4. if none is set, fall to "C" */ if (env == NULL || *env == '\0') env = "C"; return (env); } /* * This routine is exposed via the MB_CUR_MAX macro. Note that legacy * code will continue to use _ctype[520], but we prefer this function as * it is the only way to get thread-specific information. */ unsigned char __mb_cur_max_l(locale_t loc) { return (loc->ctype->lc_max_mblen); } unsigned char __mb_cur_max(void) { return (__mb_cur_max_l(uselocale(NULL))); } /* * Public interfaces. */ locale_t duplocale(locale_t src) { locale_t loc; int i; loc = lmalloc(sizeof (*loc)); if (loc == NULL) { return (NULL); } if (src == NULL) { /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */ src = ___global_locale; } for (i = 0; i < LC_ALL; i++) { loc->locdata[i] = src->locdata[i]; loc->loaded[i] = 0; } loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; loc->time = loc->locdata[LC_TIME]->l_data[0]; return (loc); } void freelocale(locale_t loc) { /* * We take extra care never to free a saved locale created by * setlocale(). This shouldn't be strictly necessary, but a little * extra safety doesn't hurt here. */ if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list)) lfree(loc, sizeof (*loc)); } locale_t newlocale(int catmask, const char *locname, locale_t base) { locale_t loc; int i, e; if (catmask & ~(LC_ALL_MASK)) { errno = EINVAL; return (NULL); } /* * Technically passing LC_GLOBAL_LOCALE here is illegal, * but we allow it. */ if (base == NULL || base == ___global_locale) { loc = duplocale(___global_locale); } else { loc = duplocale(base); } if (loc == NULL) { return (NULL); } for (i = 0; i < LC_ALL; i++) { struct locdata *ldata; loc->loaded[i] = 0; if (((1 << i) & catmask) == 0) { /* Default to base locale if not overriding */ continue; } ldata = locdata_get(i, locname); if (ldata == NULL) { e = errno; freelocale(loc); errno = e; return (NULL); } loc->locdata[i] = ldata; } loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; loc->time = loc->locdata[LC_TIME]->l_data[0]; freelocale(base); return (mklocname(loc)); } locale_t uselocale(locale_t loc) { locale_t lastloc = ___global_locale; locale_t *locptr; locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); /* Should never occur */ if (locptr == NULL) { errno = EINVAL; return (NULL); } if (*locptr != NULL) lastloc = *locptr; /* Argument loc is NULL if we are just querying. */ if (loc != NULL) { /* * Set it to LC_GLOBAL_LOCAL to return to using * the global locale (setlocale). */ if (loc == ___global_locale) { *locptr = NULL; } else { /* No validation of the provided locale at present */ *locptr = loc; } } /* * The caller is responsible for freeing, of course it would be * gross error to call freelocale() on a locale object that is still * in use. */ return (lastloc); } static locale_t mklocname(locale_t loc) { int composite = 0; /* Look to see if any category is different */ for (int i = 1; i < LC_ALL; ++i) { if (strcmp(loc->locdata[0]->l_lname, loc->locdata[i]->l_lname) != 0) { composite = 1; break; } } if (composite) { /* * Note ordering of these follows the numeric order, * if the order is changed, then setlocale() will need * to be changed as well. */ (void) snprintf(loc->locname, sizeof (loc->locname), "%s/%s/%s/%s/%s/%s", loc->locdata[LC_CTYPE]->l_lname, loc->locdata[LC_NUMERIC]->l_lname, loc->locdata[LC_TIME]->l_lname, loc->locdata[LC_COLLATE]->l_lname, loc->locdata[LC_MONETARY]->l_lname, loc->locdata[LC_MESSAGES]->l_lname); } else { (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname, sizeof (loc->locname)); } return (loc); }