1 /* 2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include "lint.h" 35 #include "file64.h" 36 #include <errno.h> 37 #include <limits.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <unistd.h> 42 #include <wchar.h> 43 #include "runetype.h" 44 #include "ldpart.h" 45 #include "mblocal.h" 46 #include "setlocale.h" 47 #include "_ctype.h" 48 #include "../i18n/_locale.h" 49 50 extern _RuneLocale *_Read_RuneMagi(FILE *); 51 extern unsigned char __ctype_C[]; 52 53 static int __setrunelocale(const char *); 54 55 static int 56 __setrunelocale(const char *encoding) 57 { 58 FILE *fp; 59 char name[PATH_MAX]; 60 _RuneLocale *rl; 61 int saverr, ret; 62 size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, 63 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 64 size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 65 mbstate_t *_RESTRICT_KYWD); 66 int (*old__mbsinit)(const mbstate_t *); 67 size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 68 const char **_RESTRICT_KYWD, size_t, size_t, 69 mbstate_t *_RESTRICT_KYWD); 70 size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, 71 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 72 mbstate_t *_RESTRICT_KYWD); 73 static char ctype_encoding[ENCODING_LEN + 1]; 74 static _RuneLocale *CachedRuneLocale; 75 static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, 76 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 77 static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 78 mbstate_t *_RESTRICT_KYWD); 79 static int (*Cached__mbsinit)(const mbstate_t *); 80 static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 81 const char **_RESTRICT_KYWD, size_t, size_t, 82 mbstate_t *_RESTRICT_KYWD); 83 static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, 84 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 85 mbstate_t *_RESTRICT_KYWD); 86 87 /* 88 * The "C" and "POSIX" locale are always here. 89 */ 90 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 91 int i; 92 93 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); 94 95 for (i = 0; i < _CACHED_RUNES; i++) { 96 __ctype_mask[i] = _DefaultRuneLocale.__runetype[i]; 97 __trans_upper[i] = _DefaultRuneLocale.__mapupper[i]; 98 __trans_lower[i] = _DefaultRuneLocale.__maplower[i]; 99 } 100 101 (void) _none_init(&_DefaultRuneLocale); 102 return (0); 103 } 104 105 /* 106 * If the locale name is the same as our cache, use the cache. 107 */ 108 if (CachedRuneLocale != NULL && 109 strcmp(encoding, ctype_encoding) == 0) { 110 _CurrentRuneLocale = CachedRuneLocale; 111 __mbrtowc = Cached__mbrtowc; 112 __mbsinit = Cached__mbsinit; 113 __mbsnrtowcs = Cached__mbsnrtowcs; 114 __wcrtomb = Cached__wcrtomb; 115 __wcsnrtombs = Cached__wcsnrtombs; 116 return (0); 117 } 118 119 /* 120 * Slurp the locale file into the cache. 121 */ 122 123 (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA", 124 _PathLocale, encoding); 125 126 if ((fp = fopen(name, "r")) == NULL) 127 return (errno == 0 ? ENOENT : errno); 128 129 if ((rl = _Read_RuneMagi(fp)) == NULL) { 130 saverr = (errno == 0 ? EINVAL : errno); 131 (void) fclose(fp); 132 return (saverr); 133 } 134 (void) fclose(fp); 135 136 old__mbrtowc = __mbrtowc; 137 old__mbsinit = __mbsinit; 138 old__mbsnrtowcs = __mbsnrtowcs; 139 old__wcrtomb = __wcrtomb; 140 old__wcsnrtombs = __wcsnrtombs; 141 142 __mbrtowc = NULL; 143 __mbsinit = NULL; 144 __mbsnrtowcs = __mbsnrtowcs_std; 145 __wcrtomb = NULL; 146 __wcsnrtombs = __wcsnrtombs_std; 147 148 if (strcmp(rl->__encoding, "NONE") == 0) 149 ret = _none_init(rl); 150 else if (strcmp(rl->__encoding, "ASCII") == 0) 151 ret = _ascii_init(rl); 152 else if (strcmp(rl->__encoding, "UTF-8") == 0) 153 ret = _UTF8_init(rl); 154 else if (strcmp(rl->__encoding, "EUC") == 0) 155 ret = _EUC_init(rl); 156 else if (strcmp(rl->__encoding, "GB18030") == 0) 157 ret = _GB18030_init(rl); 158 else if (strcmp(rl->__encoding, "GB2312") == 0) 159 ret = _GB2312_init(rl); 160 else if (strcmp(rl->__encoding, "GBK") == 0) 161 ret = _GBK_init(rl); 162 else if (strcmp(rl->__encoding, "BIG5") == 0) 163 ret = _BIG5_init(rl); 164 else if (strcmp(rl->__encoding, "MSKanji") == 0) 165 ret = _MSKanji_init(rl); 166 else 167 ret = EINVAL; 168 169 if (ret == 0) { 170 if (CachedRuneLocale != NULL) { 171 /* See euc.c */ 172 if (strcmp(CachedRuneLocale->__encoding, "EUC") == 0) 173 free(CachedRuneLocale->__variable); 174 free(CachedRuneLocale); 175 } 176 CachedRuneLocale = _CurrentRuneLocale; 177 Cached__mbrtowc = __mbrtowc; 178 Cached__mbsinit = __mbsinit; 179 Cached__mbsnrtowcs = __mbsnrtowcs; 180 Cached__wcrtomb = __wcrtomb; 181 Cached__wcsnrtombs = __wcsnrtombs; 182 (void) strcpy(ctype_encoding, encoding); 183 184 /* 185 * We need to overwrite the _ctype array. This requires 186 * some finagling. This is because references to it may 187 * have been baked into applications. 188 * 189 * Note that it is interesting that toupper/tolower only 190 * produce defined results when the input is representable 191 * as a byte. 192 */ 193 194 /* 195 * The top half is the type mask array. Because we 196 * want to support both legacy Solaris code (which have 197 * mask valeus baked in to them), and we want to be able 198 * to import locale files from other sources (FreeBSD) 199 * which probably uses different masks, we have to perform 200 * a conversion here. Ugh. Note that the _CTYPE definitions 201 * we use from FreeBSD are richer than the Solaris legacy. 202 * 203 * We have to cope with these limitations though, because the 204 * inadequate Solaris definitions were baked into binaries. 205 */ 206 for (int i = 0; i < _CACHED_RUNES; i++) { 207 /* ctype can only encode the lower 8 bits. */ 208 __ctype[i+1] = rl->__runetype[i] & 0xff; 209 __ctype_mask[i] = rl->__runetype[i]; 210 } 211 212 /* The bottom half is the toupper/lower array */ 213 for (int i = 0; i < _CACHED_RUNES; i++) { 214 __ctype[258 + i] = i; 215 if (rl->__mapupper[i] && rl->__mapupper[i] != i) 216 __ctype[258+i] = rl->__mapupper[i]; 217 if (rl->__maplower[i] && rl->__maplower[i] != i) 218 __ctype[258+i] = rl->__maplower[i]; 219 220 /* Don't forget these annoyances either! */ 221 __trans_upper[i] = rl->__mapupper[i]; 222 __trans_lower[i] = rl->__maplower[i]; 223 } 224 225 /* 226 * Note that we expect the init code will have populated 227 * the CSWIDTH array (__ctype[514-520]) properly. 228 */ 229 } else { 230 __mbrtowc = old__mbrtowc; 231 __mbsinit = old__mbsinit; 232 __mbsnrtowcs = old__mbsnrtowcs; 233 __wcrtomb = old__wcrtomb; 234 __wcsnrtombs = old__wcsnrtombs; 235 free(rl); 236 } 237 238 return (ret); 239 } 240 241 int 242 __wrap_setrunelocale(const char *locale) 243 { 244 int ret = __setrunelocale(locale); 245 246 if (ret != 0) { 247 errno = ret; 248 return (_LDP_ERROR); 249 } 250 return (_LDP_LOADED); 251 } 252