xref: /illumos-gate/usr/src/common/smbsrv/smb_oem.c (revision 89fdfac39633dc6769133c82b68b1ed74c2bc54b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 /*
29  * Support for oem <-> unicode translations.
30  */
31 
32 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
33 #include <stdlib.h>
34 #include <thread.h>
35 #include <synch.h>
36 #include <string.h>
37 #else
38 #include <sys/ksynch.h>
39 #endif /* _KERNEL */
40 
41 #include <sys/byteorder.h>
42 #include <smbsrv/alloc.h>
43 #include <smbsrv/string.h>
44 
45 /*
46  * cpid		The oemcpg_table index for this oempage.
47  * value	The conversion values.
48  */
49 typedef struct oempage {
50 	uint32_t	cpid;
51 	smb_wchar_t	*value;
52 } oempage_t;
53 
54 /*
55  * filename	The actual filename contains the codepage.
56  * bytesperchar	The codepage uses double or single bytes per char.
57  * oempage	The oempage is used to convert Unicode characters to
58  *		OEM characters.  Memory needs to be allocated for
59  *		the value field of oempage to store the table.
60  * ucspage	The unicode page is used to convert OEM characters
61  *		to Unicode characters.  Memory needs to be allocated
62  *		for the value field of ucspage to store the table.
63  * valid	True if the codepage has been initialized.
64  */
65 typedef struct oem_codepage {
66 	char		*filename;
67 	uint32_t	bytesperchar;
68 	oempage_t	oempage;
69 	oempage_t	ucspage;
70 	boolean_t	valid;
71 } oem_codepage_t;
72 
73 static oem_codepage_t oemcpg_table[] = {
74 	{"850.cpg",  1, {0, 0},  {0, 0},  0},	/* Multilingual Latin1 */
75 	{"950.cpg",  2, {1, 0},  {1, 0},  0},	/* Chinese Traditional */
76 	{"1252.cpg", 1, {2, 0},  {2, 0},  0},	/* MS Latin1 */
77 	{"949.cpg",  2, {3, 0},  {3, 0},  0},	/* Korean */
78 	{"936.cpg",  2, {4, 0},  {4, 0},  0},	/* Chinese Simplified */
79 	{"932.cpg",  2, {5, 0},  {5, 0},  0},	/* Japanese */
80 	{"852.cpg",  1, {6, 0},  {6, 0},  0},	/* Multilingual Latin2 */
81 	{"1250.cpg", 1, {7, 0},  {7, 0},  0},	/* MS Latin2 */
82 	{"1253.cpg", 1, {8, 0},  {8, 0},  0},	/* MS Greek */
83 	{"737.cpg",  1, {9, 0},  {9, 0},  0},	/* Greek */
84 	{"1254.cpg", 1, {10, 0}, {10, 0}, 0},	/* MS Turkish */
85 	{"857.cpg",  1, {11, 0}, {11, 0}, 0},	/* Multilingual Latin5 */
86 	{"1251.cpg", 1, {12, 0}, {12, 0}, 0},	/* MS Cyrillic */
87 	{"866.cpg",  1, {13, 0}, {13, 0}, 0},	/* Cyrillic II */
88 	{"1255.cpg", 1, {14, 0}, {14, 0}, 0},	/* MS Hebrew */
89 	{"862.cpg",  1, {15, 0}, {15, 0}, 0},	/* Hebrew */
90 	{"1256.cpg", 1, {16, 0}, {16, 0}, 0},	/* MS Arabic */
91 	{"720.cpg",  1, {17, 0}, {17, 0}, 0}	/* Arabic */
92 };
93 
94 #define	MAX_OEMPAGES	(sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
95 #define	MAX_UNICODE_IDX	65536
96 
97 /*
98  * The default SMB OEM codepage for English is codepage 850.
99  */
100 const smb_wchar_t oem_codepage_850[256] = {
101 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
102 	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
103 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
104 	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
105 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
106 	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
107 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
108 	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
109 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
110 	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
111 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
112 	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
113 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
114 	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
115 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
116 	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
117 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
118 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
119 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
120 	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
121 	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
122 	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
123 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
124 	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
125 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
126 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
127 	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
128 	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
129 	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
130 	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
131 	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
132 	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
133 };
134 
135 /*
136  * The default telnet OEM codepage for English is codepage 1252.
137  */
138 const smb_wchar_t oem_codepage_1252[256] = {
139 	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
140 	0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
141 	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
142 	0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
143 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
144 	0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
145 	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
146 	0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
147 	0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
148 	0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
149 	0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
150 	0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
151 	0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
152 	0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
153 	0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
154 	0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
155 	0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
156 	0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
157 	0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
158 	0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
159 	0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
160 	0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
161 	0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
162 	0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
163 	0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
164 	0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
165 	0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
166 	0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
167 	0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
168 	0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
169 	0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
170 	0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
171 };
172 
173 static oempage_t *oem_get_oempage(uint32_t);
174 static oempage_t *oem_get_ucspage(uint32_t);
175 static void oem_codepage_init(uint32_t);
176 static void oem_codepage_setup(uint32_t);
177 
178 /*
179  * Convert a unicode string to an oem string.
180  *
181  * The conversion will stop at the end of the unicode string
182  * or when (nbytes - 1) oem characters have been stored.
183  *
184  * The number of converted unicode characters is returned,
185  * or 0 on error.
186  */
187 size_t
188 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
189 {
190 	oempage_t	*ucspage;
191 	uint32_t	count = 0;
192 	smb_wchar_t	oemchar;
193 
194 	if (ucs == NULL || oem == NULL)
195 		return (0);
196 
197 	if ((ucspage = oem_get_ucspage(cpid)) == NULL)
198 		return (0);
199 
200 	while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
201 		if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
202 			*oem++ = oemchar >> 8;
203 			*oem++ = (char)oemchar;
204 			nbytes -= 2;
205 		} else if (nbytes > 1) {
206 			*oem++ = (char)oemchar;
207 			nbytes--;
208 		} else {
209 			break;
210 		}
211 
212 		count++;
213 		ucs++;
214 	}
215 
216 	*oem = '\0';
217 	return (count);
218 }
219 
220 /*
221  * Convert an oem string to a unicode string.
222  *
223  * The conversion will stop at the end of the oem string or
224  * when nwchars - 1 have been converted.
225  *
226  * The number of converted oem chars is returned, or 0 on error.
227  * An oem char may be either 1 or 2 bytes.
228  */
229 size_t
230 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
231 {
232 	oempage_t	*oempage;
233 	size_t		count = nwchars;
234 	smb_wchar_t	oemchar;
235 
236 	if (ucs == NULL || oem == NULL)
237 		return (0);
238 
239 	if ((oempage = oem_get_oempage(cpid)) == NULL)
240 		return (0);
241 
242 	while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
243 		/*
244 		 * Cannot find one byte oemchar in table.
245 		 * Must be a lead byte. Try two bytes.
246 		 */
247 		if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
248 			oemchar = oemchar << 8 | (*oem++ & 0xff);
249 			if (oempage->value[oemchar] == 0) {
250 				*ucs = 0;
251 				break;
252 			}
253 		}
254 #ifdef _BIG_ENDIAN
255 		*ucs = LE_IN16(&oempage->value[oemchar]);
256 #else
257 		*ucs = oempage->value[oemchar];
258 #endif
259 		count--;
260 		ucs++;
261 	}
262 
263 	*ucs = 0;
264 	return (nwchars - count);
265 }
266 
267 /*
268  * Get a pointer to the oem page for the specific codepage id.
269  */
270 static oempage_t *
271 oem_get_oempage(uint32_t cpid)
272 {
273 	if (cpid >= MAX_OEMPAGES)
274 		return (NULL);
275 
276 	if (!oemcpg_table[cpid].valid) {
277 		oem_codepage_init(cpid);
278 
279 		if (!oemcpg_table[cpid].valid)
280 			return (NULL);
281 	}
282 
283 	return (&oemcpg_table[cpid].oempage);
284 }
285 
286 /*
287  * Get a pointer to the ucs page for the specific codepage id.
288  */
289 static oempage_t *
290 oem_get_ucspage(uint32_t cpid)
291 {
292 	if (cpid >= MAX_OEMPAGES)
293 		return (NULL);
294 
295 	if (!oemcpg_table[cpid].valid) {
296 		oem_codepage_init(cpid);
297 
298 		if (!oemcpg_table[cpid].valid)
299 			return (NULL);
300 	}
301 
302 	return (&oemcpg_table[cpid].ucspage);
303 }
304 
305 /*
306  * Initialize the oem page in the oem table.
307  */
308 static void
309 oem_codepage_init(uint32_t cpid)
310 {
311 #if !defined(_KERNEL) && !defined(_FAKE_KERNEL)
312 	static mutex_t mutex;
313 
314 	(void) mutex_lock(&mutex);
315 	oem_codepage_setup(cpid);
316 	(void) mutex_unlock(&mutex);
317 #else
318 	static kmutex_t mutex;
319 
320 	mutex_enter(&mutex);
321 	oem_codepage_setup(cpid);
322 	mutex_exit(&mutex);
323 #endif /* _KERNEL */
324 }
325 
326 static void
327 oem_codepage_setup(uint32_t cpid)
328 {
329 	const smb_wchar_t *default_oem_cp;
330 	oem_codepage_t	*oemcpg;
331 	uint32_t	bytesperchar;
332 	uint32_t	max_oem_index;
333 	int		i;
334 
335 	switch (cpid) {
336 	case OEM_CPG_850:
337 		default_oem_cp = oem_codepage_850;
338 		break;
339 	case OEM_CPG_1252:
340 		default_oem_cp = oem_codepage_1252;
341 	default:
342 		return;
343 	}
344 
345 	oemcpg = &oemcpg_table[cpid];
346 	if (oemcpg->valid)
347 		return;
348 
349 	/*
350 	 * max_oem_index will be 256 or 65536 dependent
351 	 * on the OEM codepage.
352 	 */
353 	bytesperchar = oemcpg_table[cpid].bytesperchar;
354 	max_oem_index = 1 << (bytesperchar * 8);
355 
356 	oemcpg->oempage.value =
357 	    MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
358 	if (oemcpg->oempage.value == NULL)
359 		return;
360 
361 	oemcpg->ucspage.value =
362 	    MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
363 	if (oemcpg->ucspage.value == NULL) {
364 		MEM_FREE("oem", oemcpg->oempage.value);
365 		oemcpg->oempage.value = NULL;
366 		return;
367 	}
368 
369 	for (i = 0; i < max_oem_index; i++) {
370 		oemcpg->oempage.value[i] = default_oem_cp[i];
371 		oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
372 	}
373 
374 	oemcpg->valid = B_TRUE;
375 }
376