1 /* 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Paul Borman at Krystal Technologies. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 35 */ 36 37 #include "lint.h" 38 #include "file64.h" 39 #include <errno.h> 40 #include <limits.h> 41 #include <string.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include "runetype.h" 47 #include "ldpart.h" 48 #include "mblocal.h" 49 #include "setlocale.h" 50 #include "_ctype.h" 51 #include "../i18n/_locale.h" 52 53 extern _RuneLocale *_Read_RuneMagi(FILE *); 54 extern unsigned char __ctype_C[]; 55 56 static int __setrunelocale(const char *); 57 58 static int 59 __setrunelocale(const char *encoding) 60 { 61 FILE *fp; 62 char name[PATH_MAX]; 63 _RuneLocale *rl; 64 int saverr, ret; 65 size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, 66 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 67 size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 68 mbstate_t *_RESTRICT_KYWD); 69 int (*old__mbsinit)(const mbstate_t *); 70 size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 71 const char **_RESTRICT_KYWD, size_t, size_t, 72 mbstate_t *_RESTRICT_KYWD); 73 size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, 74 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 75 mbstate_t *_RESTRICT_KYWD); 76 static char ctype_encoding[ENCODING_LEN + 1]; 77 static _RuneLocale *CachedRuneLocale; 78 static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, 79 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 80 static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 81 mbstate_t *_RESTRICT_KYWD); 82 static int (*Cached__mbsinit)(const mbstate_t *); 83 static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 84 const char **_RESTRICT_KYWD, size_t, size_t, 85 mbstate_t *_RESTRICT_KYWD); 86 static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, 87 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 88 mbstate_t *_RESTRICT_KYWD); 89 90 /* 91 * The "C" and "POSIX" locale are always here. 92 */ 93 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 94 int i; 95 96 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); 97 98 for (i = 0; i < _CACHED_RUNES; i++) { 99 __ctype_mask[i] = _DefaultRuneLocale.__runetype[i]; 100 __trans_upper[i] = _DefaultRuneLocale.__mapupper[i]; 101 __trans_lower[i] = _DefaultRuneLocale.__maplower[i]; 102 } 103 104 (void) _none_init(&_DefaultRuneLocale); 105 return (0); 106 } 107 108 /* 109 * If the locale name is the same as our cache, use the cache. 110 */ 111 if (CachedRuneLocale != NULL && 112 strcmp(encoding, ctype_encoding) == 0) { 113 _CurrentRuneLocale = CachedRuneLocale; 114 __mbrtowc = Cached__mbrtowc; 115 __mbsinit = Cached__mbsinit; 116 __mbsnrtowcs = Cached__mbsnrtowcs; 117 __wcrtomb = Cached__wcrtomb; 118 __wcsnrtombs = Cached__wcsnrtombs; 119 return (0); 120 } 121 122 /* 123 * Slurp the locale file into the cache. 124 */ 125 126 (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA", 127 _PathLocale, encoding); 128 129 if ((fp = fopen(name, "r")) == NULL) 130 return (errno == 0 ? ENOENT : errno); 131 132 if ((rl = _Read_RuneMagi(fp)) == NULL) { 133 saverr = (errno == 0 ? EINVAL : errno); 134 (void) fclose(fp); 135 return (saverr); 136 } 137 (void) fclose(fp); 138 139 old__mbrtowc = __mbrtowc; 140 old__mbsinit = __mbsinit; 141 old__mbsnrtowcs = __mbsnrtowcs; 142 old__wcrtomb = __wcrtomb; 143 old__wcsnrtombs = __wcsnrtombs; 144 145 __mbrtowc = NULL; 146 __mbsinit = NULL; 147 __mbsnrtowcs = __mbsnrtowcs_std; 148 __wcrtomb = NULL; 149 __wcsnrtombs = __wcsnrtombs_std; 150 151 if (strcmp(rl->__encoding, "NONE") == 0) 152 ret = _none_init(rl); 153 else if (strcmp(rl->__encoding, "ASCII") == 0) 154 ret = _ascii_init(rl); 155 else if (strcmp(rl->__encoding, "UTF-8") == 0) 156 ret = _UTF8_init(rl); 157 else if (strcmp(rl->__encoding, "EUC") == 0) 158 ret = _EUC_init(rl); 159 else if (strcmp(rl->__encoding, "GB18030") == 0) 160 ret = _GB18030_init(rl); 161 else if (strcmp(rl->__encoding, "GB2312") == 0) 162 ret = _GB2312_init(rl); 163 else if (strcmp(rl->__encoding, "GBK") == 0) 164 ret = _GBK_init(rl); 165 else if (strcmp(rl->__encoding, "BIG5") == 0) 166 ret = _BIG5_init(rl); 167 else if (strcmp(rl->__encoding, "MSKanji") == 0) 168 ret = _MSKanji_init(rl); 169 else 170 ret = EINVAL; 171 172 if (ret == 0) { 173 if (CachedRuneLocale != NULL) { 174 /* See euc.c */ 175 if (strcmp(CachedRuneLocale->__encoding, "EUC") == 0) 176 free(CachedRuneLocale->__variable); 177 free(CachedRuneLocale); 178 } 179 CachedRuneLocale = _CurrentRuneLocale; 180 Cached__mbrtowc = __mbrtowc; 181 Cached__mbsinit = __mbsinit; 182 Cached__mbsnrtowcs = __mbsnrtowcs; 183 Cached__wcrtomb = __wcrtomb; 184 Cached__wcsnrtombs = __wcsnrtombs; 185 (void) strcpy(ctype_encoding, encoding); 186 187 /* 188 * We need to overwrite the _ctype array. This requires 189 * some finagling. This is because references to it may 190 * have been baked into applications. 191 * 192 * Note that it is interesting that toupper/tolower only 193 * produce defined results when the input is representable 194 * as a byte. 195 */ 196 197 /* 198 * The top half is the type mask array. Because we 199 * want to support both legacy Solaris code (which have 200 * mask valeus baked in to them), and we want to be able 201 * to import locale files from other sources (FreeBSD) 202 * which probably uses different masks, we have to perform 203 * a conversion here. Ugh. Note that the _CTYPE definitions 204 * we use from FreeBSD are richer than the Solaris legacy. 205 * 206 * We have to cope with these limitations though, because the 207 * inadequate Solaris definitions were baked into binaries. 208 */ 209 for (int i = 0; i < _CACHED_RUNES; i++) { 210 /* ctype can only encode the lower 8 bits. */ 211 __ctype[i+1] = rl->__runetype[i] & 0xff; 212 __ctype_mask[i] = rl->__runetype[i]; 213 } 214 215 /* The bottom half is the toupper/lower array */ 216 for (int i = 0; i < _CACHED_RUNES; i++) { 217 __ctype[258 + i] = i; 218 if (rl->__mapupper[i] && rl->__mapupper[i] != i) 219 __ctype[258+i] = rl->__mapupper[i]; 220 if (rl->__maplower[i] && rl->__maplower[i] != i) 221 __ctype[258+i] = rl->__maplower[i]; 222 223 /* Don't forget these annoyances either! */ 224 __trans_upper[i] = rl->__mapupper[i]; 225 __trans_lower[i] = rl->__maplower[i]; 226 } 227 228 /* 229 * Note that we expect the init code will have populated 230 * the CSWIDTH array (__ctype[514-520]) properly. 231 */ 232 } else { 233 __mbrtowc = old__mbrtowc; 234 __mbsinit = old__mbsinit; 235 __mbsnrtowcs = old__mbsnrtowcs; 236 __wcrtomb = old__wcrtomb; 237 __wcsnrtombs = old__wcsnrtombs; 238 free(rl); 239 } 240 241 return (ret); 242 } 243 244 int 245 __wrap_setrunelocale(const char *locale) 246 { 247 int ret = __setrunelocale(locale); 248 249 if (ret != 0) { 250 errno = ret; 251 return (_LDP_ERROR); 252 } 253 return (_LDP_LOADED); 254 } 255