1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Support for oem <-> unicode translations. 29 */ 30 31 #ifndef _KERNEL 32 #include <stdlib.h> 33 #include <thread.h> 34 #include <synch.h> 35 #include <string.h> 36 #else 37 #include <sys/ksynch.h> 38 #endif /* _KERNEL */ 39 40 #include <sys/byteorder.h> 41 #include <smbsrv/alloc.h> 42 #include <smbsrv/string.h> 43 44 /* 45 * cpid The oemcpg_table index for this oempage. 46 * value The conversion values. 47 */ 48 typedef struct oempage { 49 uint32_t cpid; 50 smb_wchar_t *value; 51 } oempage_t; 52 53 /* 54 * filename The actual filename contains the codepage. 55 * bytesperchar The codepage uses double or single bytes per char. 56 * oempage The oempage is used to convert Unicode characters to 57 * OEM characters. Memory needs to be allocated for 58 * the value field of oempage to store the table. 59 * ucspage The unicode page is used to convert OEM characters 60 * to Unicode characters. Memory needs to be allocated 61 * for the value field of ucspage to store the table. 62 * valid True if the codepage has been initialized. 63 */ 64 typedef struct oem_codepage { 65 char *filename; 66 uint32_t bytesperchar; 67 oempage_t oempage; 68 oempage_t ucspage; 69 boolean_t valid; 70 } oem_codepage_t; 71 72 static oem_codepage_t oemcpg_table[] = { 73 {"850.cpg", 1, {0, 0}, {0, 0}, 0}, /* Multilingual Latin1 */ 74 {"950.cpg", 2, {1, 0}, {1, 0}, 0}, /* Chinese Traditional */ 75 {"1252.cpg", 1, {2, 0}, {2, 0}, 0}, /* MS Latin1 */ 76 {"949.cpg", 2, {3, 0}, {3, 0}, 0}, /* Korean */ 77 {"936.cpg", 2, {4, 0}, {4, 0}, 0}, /* Chinese Simplified */ 78 {"932.cpg", 2, {5, 0}, {5, 0}, 0}, /* Japanese */ 79 {"852.cpg", 1, {6, 0}, {6, 0}, 0}, /* Multilingual Latin2 */ 80 {"1250.cpg", 1, {7, 0}, {7, 0}, 0}, /* MS Latin2 */ 81 {"1253.cpg", 1, {8, 0}, {8, 0}, 0}, /* MS Greek */ 82 {"737.cpg", 1, {9, 0}, {9, 0}, 0}, /* Greek */ 83 {"1254.cpg", 1, {10, 0}, {10, 0}, 0}, /* MS Turkish */ 84 {"857.cpg", 1, {11, 0}, {11, 0}, 0}, /* Multilingual Latin5 */ 85 {"1251.cpg", 1, {12, 0}, {12, 0}, 0}, /* MS Cyrillic */ 86 {"866.cpg", 1, {13, 0}, {13, 0}, 0}, /* Cyrillic II */ 87 {"1255.cpg", 1, {14, 0}, {14, 0}, 0}, /* MS Hebrew */ 88 {"862.cpg", 1, {15, 0}, {15, 0}, 0}, /* Hebrew */ 89 {"1256.cpg", 1, {16, 0}, {16, 0}, 0}, /* MS Arabic */ 90 {"720.cpg", 1, {17, 0}, {17, 0}, 0} /* Arabic */ 91 }; 92 93 #define MAX_OEMPAGES (sizeof (oemcpg_table) / sizeof (oemcpg_table[0])) 94 #define MAX_UNICODE_IDX 65536 95 96 /* 97 * The default SMB OEM codepage for English is codepage 850. 98 */ 99 const smb_wchar_t oem_codepage_850[256] = { 100 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 101 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 102 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 103 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 104 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 105 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 106 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 107 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 108 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 109 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 110 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 111 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 112 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 113 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 114 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 115 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 116 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 117 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, 118 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 119 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, 120 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 121 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, 122 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, 123 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, 124 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, 125 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, 126 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, 127 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, 128 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, 129 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, 130 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, 131 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0 132 }; 133 134 /* 135 * The default telnet OEM codepage for English is codepage 1252. 136 */ 137 const smb_wchar_t oem_codepage_1252[256] = { 138 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 139 0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10, 140 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 141 0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20, 142 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 143 0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30, 144 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 145 0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40, 146 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 147 0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50, 148 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 149 0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60, 150 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 151 0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70, 152 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 153 0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC, 154 0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 155 0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90, 156 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 157 0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0, 158 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 159 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 160 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 161 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 162 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 163 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 164 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 165 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, 166 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 167 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 168 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 169 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF 170 }; 171 172 static oempage_t *oem_get_oempage(uint32_t); 173 static oempage_t *oem_get_ucspage(uint32_t); 174 static void oem_codepage_init(uint32_t); 175 static void oem_codepage_setup(uint32_t); 176 177 /* 178 * Convert a unicode string to an oem string. 179 * 180 * The conversion will stop at the end of the unicode string 181 * or when (nbytes - 1) oem characters have been stored. 182 * 183 * The number of converted unicode characters is returned, 184 * or 0 on error. 185 */ 186 size_t 187 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid) 188 { 189 oempage_t *ucspage; 190 uint32_t count = 0; 191 smb_wchar_t oemchar; 192 193 if (ucs == NULL || oem == NULL) 194 return (0); 195 196 if ((ucspage = oem_get_ucspage(cpid)) == NULL) 197 return (0); 198 199 while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) { 200 if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) { 201 *oem++ = oemchar >> 8; 202 *oem++ = (char)oemchar; 203 nbytes -= 2; 204 } else if (nbytes > 1) { 205 *oem++ = (char)oemchar; 206 nbytes--; 207 } else { 208 break; 209 } 210 211 count++; 212 ucs++; 213 } 214 215 *oem = '\0'; 216 return (count); 217 } 218 219 /* 220 * Convert an oem string to a unicode string. 221 * 222 * The conversion will stop at the end of the oem string or 223 * when nwchars - 1 have been converted. 224 * 225 * The number of converted oem chars is returned, or 0 on error. 226 * An oem char may be either 1 or 2 bytes. 227 */ 228 size_t 229 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid) 230 { 231 oempage_t *oempage; 232 size_t count = nwchars; 233 smb_wchar_t oemchar; 234 235 if (ucs == NULL || oem == NULL) 236 return (0); 237 238 if ((oempage = oem_get_oempage(cpid)) == NULL) 239 return (0); 240 241 while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) { 242 /* 243 * Cannot find one byte oemchar in table. 244 * Must be a lead byte. Try two bytes. 245 */ 246 if ((oempage->value[oemchar] == 0) && (oemchar != 0)) { 247 oemchar = oemchar << 8 | (*oem++ & 0xff); 248 if (oempage->value[oemchar] == 0) { 249 *ucs = 0; 250 break; 251 } 252 } 253 #ifdef _BIG_ENDIAN 254 *ucs = LE_IN16(&oempage->value[oemchar]); 255 #else 256 *ucs = oempage->value[oemchar]; 257 #endif 258 count--; 259 ucs++; 260 } 261 262 *ucs = 0; 263 return (nwchars - count); 264 } 265 266 /* 267 * Get a pointer to the oem page for the specific codepage id. 268 */ 269 static oempage_t * 270 oem_get_oempage(uint32_t cpid) 271 { 272 if (cpid >= MAX_OEMPAGES) 273 return (NULL); 274 275 if (!oemcpg_table[cpid].valid) { 276 oem_codepage_init(cpid); 277 278 if (!oemcpg_table[cpid].valid) 279 return (NULL); 280 } 281 282 return (&oemcpg_table[cpid].oempage); 283 } 284 285 /* 286 * Get a pointer to the ucs page for the specific codepage id. 287 */ 288 static oempage_t * 289 oem_get_ucspage(uint32_t cpid) 290 { 291 if (cpid >= MAX_OEMPAGES) 292 return (NULL); 293 294 if (!oemcpg_table[cpid].valid) { 295 oem_codepage_init(cpid); 296 297 if (!oemcpg_table[cpid].valid) 298 return (NULL); 299 } 300 301 return (&oemcpg_table[cpid].ucspage); 302 } 303 304 /* 305 * Initialize the oem page in the oem table. 306 */ 307 static void 308 oem_codepage_init(uint32_t cpid) 309 { 310 #ifndef _KERNEL 311 static mutex_t mutex; 312 313 (void) mutex_lock(&mutex); 314 oem_codepage_setup(cpid); 315 (void) mutex_unlock(&mutex); 316 #else 317 static kmutex_t mutex; 318 319 mutex_enter(&mutex); 320 oem_codepage_setup(cpid); 321 mutex_exit(&mutex); 322 #endif /* _KERNEL */ 323 } 324 325 static void 326 oem_codepage_setup(uint32_t cpid) 327 { 328 const smb_wchar_t *default_oem_cp; 329 oem_codepage_t *oemcpg; 330 uint32_t bytesperchar; 331 uint32_t max_oem_index; 332 int i; 333 334 switch (cpid) { 335 case OEM_CPG_850: 336 default_oem_cp = oem_codepage_850; 337 break; 338 case OEM_CPG_1252: 339 default_oem_cp = oem_codepage_1252; 340 default: 341 return; 342 } 343 344 oemcpg = &oemcpg_table[cpid]; 345 if (oemcpg->valid) 346 return; 347 348 /* 349 * max_oem_index will be 256 or 65536 dependent 350 * on the OEM codepage. 351 */ 352 bytesperchar = oemcpg_table[cpid].bytesperchar; 353 max_oem_index = 1 << (bytesperchar * 8); 354 355 oemcpg->oempage.value = 356 MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t)); 357 if (oemcpg->oempage.value == NULL) 358 return; 359 360 oemcpg->ucspage.value = 361 MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t)); 362 if (oemcpg->ucspage.value == NULL) { 363 MEM_FREE("oem", oemcpg->oempage.value); 364 oemcpg->oempage.value = NULL; 365 return; 366 } 367 368 for (i = 0; i < max_oem_index; i++) { 369 oemcpg->oempage.value[i] = default_oem_cp[i]; 370 oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i; 371 } 372 373 oemcpg->valid = B_TRUE; 374 } 375