1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 /*
29 * Support for oem <-> unicode translations.
30 */
31
32 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
33 #include <stdlib.h>
34 #include <thread.h>
35 #include <synch.h>
36 #include <string.h>
37 #else
38 #include <sys/ksynch.h>
39 #endif /* _KERNEL */
40
41 #include <sys/byteorder.h>
42 #include <smbsrv/alloc.h>
43 #include <smbsrv/string.h>
44
45 /*
46 * cpid The oemcpg_table index for this oempage.
47 * value The conversion values.
48 */
49 typedef struct oempage {
50 uint32_t cpid;
51 smb_wchar_t *value;
52 } oempage_t;
53
54 /*
55 * filename The actual filename contains the codepage.
56 * bytesperchar The codepage uses double or single bytes per char.
57 * oempage The oempage is used to convert Unicode characters to
58 * OEM characters. Memory needs to be allocated for
59 * the value field of oempage to store the table.
60 * ucspage The unicode page is used to convert OEM characters
61 * to Unicode characters. Memory needs to be allocated
62 * for the value field of ucspage to store the table.
63 * valid True if the codepage has been initialized.
64 */
65 typedef struct oem_codepage {
66 char *filename;
67 uint32_t bytesperchar;
68 oempage_t oempage;
69 oempage_t ucspage;
70 boolean_t valid;
71 } oem_codepage_t;
72
73 static oem_codepage_t oemcpg_table[] = {
74 {"850.cpg", 1, {0, 0}, {0, 0}, 0}, /* Multilingual Latin1 */
75 {"950.cpg", 2, {1, 0}, {1, 0}, 0}, /* Chinese Traditional */
76 {"1252.cpg", 1, {2, 0}, {2, 0}, 0}, /* MS Latin1 */
77 {"949.cpg", 2, {3, 0}, {3, 0}, 0}, /* Korean */
78 {"936.cpg", 2, {4, 0}, {4, 0}, 0}, /* Chinese Simplified */
79 {"932.cpg", 2, {5, 0}, {5, 0}, 0}, /* Japanese */
80 {"852.cpg", 1, {6, 0}, {6, 0}, 0}, /* Multilingual Latin2 */
81 {"1250.cpg", 1, {7, 0}, {7, 0}, 0}, /* MS Latin2 */
82 {"1253.cpg", 1, {8, 0}, {8, 0}, 0}, /* MS Greek */
83 {"737.cpg", 1, {9, 0}, {9, 0}, 0}, /* Greek */
84 {"1254.cpg", 1, {10, 0}, {10, 0}, 0}, /* MS Turkish */
85 {"857.cpg", 1, {11, 0}, {11, 0}, 0}, /* Multilingual Latin5 */
86 {"1251.cpg", 1, {12, 0}, {12, 0}, 0}, /* MS Cyrillic */
87 {"866.cpg", 1, {13, 0}, {13, 0}, 0}, /* Cyrillic II */
88 {"1255.cpg", 1, {14, 0}, {14, 0}, 0}, /* MS Hebrew */
89 {"862.cpg", 1, {15, 0}, {15, 0}, 0}, /* Hebrew */
90 {"1256.cpg", 1, {16, 0}, {16, 0}, 0}, /* MS Arabic */
91 {"720.cpg", 1, {17, 0}, {17, 0}, 0} /* Arabic */
92 };
93
94 #define MAX_OEMPAGES (sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
95 #define MAX_UNICODE_IDX 65536
96
97 /*
98 * The default SMB OEM codepage for English is codepage 850.
99 */
100 const smb_wchar_t oem_codepage_850[256] = {
101 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
102 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
103 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
104 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
105 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
106 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
107 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
108 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
109 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
110 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
111 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
112 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
113 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
114 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
115 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
116 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
117 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
118 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
119 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
120 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
121 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
122 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
123 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
124 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
125 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
126 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
127 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
128 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
129 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
130 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
131 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
132 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
133 };
134
135 /*
136 * The default telnet OEM codepage for English is codepage 1252.
137 */
138 const smb_wchar_t oem_codepage_1252[256] = {
139 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
140 0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
141 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
142 0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
143 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
144 0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
145 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
146 0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
147 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
148 0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
149 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
150 0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
151 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
152 0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
153 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
154 0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
155 0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
156 0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
157 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
158 0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
159 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
160 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
161 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
162 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
163 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
164 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
165 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
166 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
167 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
168 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
169 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
170 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
171 };
172
173 static oempage_t *oem_get_oempage(uint32_t);
174 static oempage_t *oem_get_ucspage(uint32_t);
175 static void oem_codepage_init(uint32_t);
176 static void oem_codepage_setup(uint32_t);
177
178 /*
179 * Convert a unicode string to an oem string.
180 *
181 * The conversion will stop at the end of the unicode string
182 * or when (nbytes - 1) oem characters have been stored.
183 *
184 * The number of converted unicode characters is returned,
185 * or 0 on error.
186 */
187 size_t
ucstooem(char * oem,const smb_wchar_t * ucs,size_t nbytes,uint32_t cpid)188 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
189 {
190 oempage_t *ucspage;
191 uint32_t count = 0;
192 smb_wchar_t oemchar;
193
194 if (ucs == NULL || oem == NULL)
195 return (0);
196
197 if ((ucspage = oem_get_ucspage(cpid)) == NULL)
198 return (0);
199
200 while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
201 if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
202 *oem++ = oemchar >> 8;
203 *oem++ = (char)oemchar;
204 nbytes -= 2;
205 } else if (nbytes > 1) {
206 *oem++ = (char)oemchar;
207 nbytes--;
208 } else {
209 break;
210 }
211
212 count++;
213 ucs++;
214 }
215
216 *oem = '\0';
217 return (count);
218 }
219
220 /*
221 * Convert an oem string to a unicode string.
222 *
223 * The conversion will stop at the end of the oem string or
224 * when nwchars - 1 have been converted.
225 *
226 * The number of converted oem chars is returned, or 0 on error.
227 * An oem char may be either 1 or 2 bytes.
228 */
229 size_t
oemtoucs(smb_wchar_t * ucs,const char * oem,size_t nwchars,uint32_t cpid)230 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
231 {
232 oempage_t *oempage;
233 size_t count = nwchars;
234 smb_wchar_t oemchar;
235
236 if (ucs == NULL || oem == NULL)
237 return (0);
238
239 if ((oempage = oem_get_oempage(cpid)) == NULL)
240 return (0);
241
242 while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
243 /*
244 * Cannot find one byte oemchar in table.
245 * Must be a lead byte. Try two bytes.
246 */
247 if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
248 oemchar = oemchar << 8 | (*oem++ & 0xff);
249 if (oempage->value[oemchar] == 0) {
250 *ucs = 0;
251 break;
252 }
253 }
254 #ifdef _BIG_ENDIAN
255 *ucs = LE_IN16(&oempage->value[oemchar]);
256 #else
257 *ucs = oempage->value[oemchar];
258 #endif
259 count--;
260 ucs++;
261 }
262
263 *ucs = 0;
264 return (nwchars - count);
265 }
266
267 /*
268 * Get a pointer to the oem page for the specific codepage id.
269 */
270 static oempage_t *
oem_get_oempage(uint32_t cpid)271 oem_get_oempage(uint32_t cpid)
272 {
273 if (cpid >= MAX_OEMPAGES)
274 return (NULL);
275
276 if (!oemcpg_table[cpid].valid) {
277 oem_codepage_init(cpid);
278
279 if (!oemcpg_table[cpid].valid)
280 return (NULL);
281 }
282
283 return (&oemcpg_table[cpid].oempage);
284 }
285
286 /*
287 * Get a pointer to the ucs page for the specific codepage id.
288 */
289 static oempage_t *
oem_get_ucspage(uint32_t cpid)290 oem_get_ucspage(uint32_t cpid)
291 {
292 if (cpid >= MAX_OEMPAGES)
293 return (NULL);
294
295 if (!oemcpg_table[cpid].valid) {
296 oem_codepage_init(cpid);
297
298 if (!oemcpg_table[cpid].valid)
299 return (NULL);
300 }
301
302 return (&oemcpg_table[cpid].ucspage);
303 }
304
305 /*
306 * Initialize the oem page in the oem table.
307 */
308 static void
oem_codepage_init(uint32_t cpid)309 oem_codepage_init(uint32_t cpid)
310 {
311 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
312 static mutex_t mutex;
313
314 (void) mutex_lock(&mutex);
315 oem_codepage_setup(cpid);
316 (void) mutex_unlock(&mutex);
317 #else
318 static kmutex_t mutex;
319
320 mutex_enter(&mutex);
321 oem_codepage_setup(cpid);
322 mutex_exit(&mutex);
323 #endif /* _KERNEL */
324 }
325
326 static void
oem_codepage_setup(uint32_t cpid)327 oem_codepage_setup(uint32_t cpid)
328 {
329 const smb_wchar_t *default_oem_cp;
330 oem_codepage_t *oemcpg;
331 uint32_t bytesperchar;
332 uint32_t max_oem_index;
333 int i;
334
335 switch (cpid) {
336 case OEM_CPG_850:
337 default_oem_cp = oem_codepage_850;
338 break;
339 case OEM_CPG_1252:
340 default_oem_cp = oem_codepage_1252;
341 default:
342 return;
343 }
344
345 oemcpg = &oemcpg_table[cpid];
346 if (oemcpg->valid)
347 return;
348
349 /*
350 * max_oem_index will be 256 or 65536 dependent
351 * on the OEM codepage.
352 */
353 bytesperchar = oemcpg_table[cpid].bytesperchar;
354 max_oem_index = 1 << (bytesperchar * 8);
355
356 oemcpg->oempage.value =
357 MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
358 if (oemcpg->oempage.value == NULL)
359 return;
360
361 oemcpg->ucspage.value =
362 MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
363 if (oemcpg->ucspage.value == NULL) {
364 MEM_FREE("oem", oemcpg->oempage.value);
365 oemcpg->oempage.value = NULL;
366 return;
367 }
368
369 for (i = 0; i < max_oem_index; i++) {
370 oemcpg->oempage.value[i] = default_oem_cp[i];
371 oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
372 }
373
374 oemcpg->valid = B_TRUE;
375 }
376