xref: /illumos-gate/usr/src/common/smbsrv/smb_oem.c (revision 8412fdadc46d5bd0355a53fda7bda83e60803108)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Support for oem <-> unicode translations.
29  */
30 
31 #ifndef _KERNEL
32 #include <stdlib.h>
33 #include <thread.h>
34 #include <synch.h>
35 #include <string.h>
36 #else
37 #include <sys/ksynch.h>
38 #endif /* _KERNEL */
39 
40 #include <sys/byteorder.h>
41 #include <smbsrv/alloc.h>
42 #include <smbsrv/string.h>
43 
44 /*
45  * cpid		The oemcpg_table index for this oempage.
46  * value	The conversion values.
47  */
48 typedef struct oempage {
49 	uint32_t	cpid;
50 	smb_wchar_t	*value;
51 } oempage_t;
52 
53 /*
54  * filename	The actual filename contains the codepage.
55  * bytesperchar	The codepage uses double or single bytes per char.
56  * oempage	The oempage is used to convert Unicode characters to
57  *		OEM characters.  Memory needs to be allocated for
58  *		the value field of oempage to store the table.
59  * ucspage	The unicode page is used to convert OEM characters
60  *		to Unicode characters.  Memory needs to be allocated
61  *		for the value field of ucspage to store the table.
62  * valid	True if the codepage has been initialized.
63  */
64 typedef struct oem_codepage {
65 	char		*filename;
66 	uint32_t	bytesperchar;
67 	oempage_t	oempage;
68 	oempage_t	ucspage;
69 	boolean_t	valid;
70 } oem_codepage_t;
71 
72 static oem_codepage_t oemcpg_table[] = {
73 	{"850.cpg",  1, {0, 0},  {0, 0},  0},	/* Multilingual Latin1 */
74 	{"950.cpg",  2, {1, 0},  {1, 0},  0},	/* Chinese Traditional */
75 	{"1252.cpg", 1, {2, 0},  {2, 0},  0},	/* MS Latin1 */
76 	{"949.cpg",  2, {3, 0},  {3, 0},  0},	/* Korean */
77 	{"936.cpg",  2, {4, 0},  {4, 0},  0},	/* Chinese Simplified */
78 	{"932.cpg",  2, {5, 0},  {5, 0},  0},	/* Japanese */
79 	{"852.cpg",  1, {6, 0},  {6, 0},  0},	/* Multilingual Latin2 */
80 	{"1250.cpg", 1, {7, 0},  {7, 0},  0},	/* MS Latin2 */
81 	{"1253.cpg", 1, {8, 0},  {8, 0},  0},	/* MS Greek */
82 	{"737.cpg",  1, {9, 0},  {9, 0},  0},	/* Greek */
83 	{"1254.cpg", 1, {10, 0}, {10, 0}, 0},	/* MS Turkish */
84 	{"857.cpg",  1, {11, 0}, {11, 0}, 0},	/* Multilingual Latin5 */
85 	{"1251.cpg", 1, {12, 0}, {12, 0}, 0},	/* MS Cyrillic */
86 	{"866.cpg",  1, {13, 0}, {13, 0}, 0},	/* Cyrillic II */
87 	{"1255.cpg", 1, {14, 0}, {14, 0}, 0},	/* MS Hebrew */
88 	{"862.cpg",  1, {15, 0}, {15, 0}, 0},	/* Hebrew */
89 	{"1256.cpg", 1, {16, 0}, {16, 0}, 0},	/* MS Arabic */
90 	{"720.cpg",  1, {17, 0}, {17, 0}, 0}	/* Arabic */
91 };
92 
93 #define	MAX_OEMPAGES	(sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
94 #define	MAX_UNICODE_IDX	65536
95 
96 /*
97  * The default SMB OEM codepage for English is codepage 850.
98  */
99 const smb_wchar_t oem_codepage_850[256] = {
100 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
101 	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
102 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
103 	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
104 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
105 	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
106 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
107 	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
108 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
109 	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
110 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
111 	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
112 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
113 	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
114 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
115 	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
116 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
117 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
118 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
119 	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
120 	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
121 	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
122 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
123 	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
124 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
125 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
126 	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
127 	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
128 	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
129 	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
130 	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
131 	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
132 };
133 
134 /*
135  * The default telnet OEM codepage for English is codepage 1252.
136  */
137 const smb_wchar_t oem_codepage_1252[256] = {
138 	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
139 	0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
140 	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
141 	0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
142 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
143 	0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
144 	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
145 	0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
146 	0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
147 	0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
148 	0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
149 	0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
150 	0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
151 	0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
152 	0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
153 	0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
154 	0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
155 	0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
156 	0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
157 	0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
158 	0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
159 	0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
160 	0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
161 	0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
162 	0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
163 	0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
164 	0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
165 	0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
166 	0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
167 	0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
168 	0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
169 	0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
170 };
171 
172 static oempage_t *oem_get_oempage(uint32_t);
173 static oempage_t *oem_get_ucspage(uint32_t);
174 static void oem_codepage_init(uint32_t);
175 static void oem_codepage_setup(uint32_t);
176 
177 /*
178  * Convert a unicode string to an oem string.
179  *
180  * The conversion will stop at the end of the unicode string
181  * or when (nbytes - 1) oem characters have been stored.
182  *
183  * The number of converted unicode characters is returned,
184  * or 0 on error.
185  */
186 size_t
187 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
188 {
189 	oempage_t	*ucspage;
190 	uint32_t	count = 0;
191 	smb_wchar_t	oemchar;
192 
193 	if (ucs == NULL || oem == NULL)
194 		return (0);
195 
196 	if ((ucspage = oem_get_ucspage(cpid)) == NULL)
197 		return (0);
198 
199 	while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
200 		if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
201 			*oem++ = oemchar >> 8;
202 			*oem++ = (char)oemchar;
203 			nbytes -= 2;
204 		} else if (nbytes > 1) {
205 			*oem++ = (char)oemchar;
206 			nbytes--;
207 		} else {
208 			break;
209 		}
210 
211 		count++;
212 		ucs++;
213 	}
214 
215 	*oem = '\0';
216 	return (count);
217 }
218 
219 /*
220  * Convert an oem string to a unicode string.
221  *
222  * The conversion will stop at the end of the oem string or
223  * when nwchars - 1 have been converted.
224  *
225  * The number of converted oem chars is returned, or 0 on error.
226  * An oem char may be either 1 or 2 bytes.
227  */
228 size_t
229 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
230 {
231 	oempage_t	*oempage;
232 	size_t		count = nwchars;
233 	smb_wchar_t	oemchar;
234 
235 	if (ucs == NULL || oem == NULL)
236 		return (0);
237 
238 	if ((oempage = oem_get_oempage(cpid)) == NULL)
239 		return (0);
240 
241 	while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
242 		/*
243 		 * Cannot find one byte oemchar in table.
244 		 * Must be a lead byte. Try two bytes.
245 		 */
246 		if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
247 			oemchar = oemchar << 8 | (*oem++ & 0xff);
248 			if (oempage->value[oemchar] == 0) {
249 				*ucs = 0;
250 				break;
251 			}
252 		}
253 #ifdef _BIG_ENDIAN
254 		*ucs = LE_IN16(&oempage->value[oemchar]);
255 #else
256 		*ucs = oempage->value[oemchar];
257 #endif
258 		count--;
259 		ucs++;
260 	}
261 
262 	*ucs = 0;
263 	return (nwchars - count);
264 }
265 
266 /*
267  * Get a pointer to the oem page for the specific codepage id.
268  */
269 static oempage_t *
270 oem_get_oempage(uint32_t cpid)
271 {
272 	if (cpid >= MAX_OEMPAGES)
273 		return (NULL);
274 
275 	if (!oemcpg_table[cpid].valid) {
276 		oem_codepage_init(cpid);
277 
278 		if (!oemcpg_table[cpid].valid)
279 			return (NULL);
280 	}
281 
282 	return (&oemcpg_table[cpid].oempage);
283 }
284 
285 /*
286  * Get a pointer to the ucs page for the specific codepage id.
287  */
288 static oempage_t *
289 oem_get_ucspage(uint32_t cpid)
290 {
291 	if (cpid >= MAX_OEMPAGES)
292 		return (NULL);
293 
294 	if (!oemcpg_table[cpid].valid) {
295 		oem_codepage_init(cpid);
296 
297 		if (!oemcpg_table[cpid].valid)
298 			return (NULL);
299 	}
300 
301 	return (&oemcpg_table[cpid].ucspage);
302 }
303 
304 /*
305  * Initialize the oem page in the oem table.
306  */
307 static void
308 oem_codepage_init(uint32_t cpid)
309 {
310 #ifndef _KERNEL
311 	static mutex_t mutex;
312 
313 	(void) mutex_lock(&mutex);
314 	oem_codepage_setup(cpid);
315 	(void) mutex_unlock(&mutex);
316 #else
317 	static kmutex_t mutex;
318 
319 	mutex_enter(&mutex);
320 	oem_codepage_setup(cpid);
321 	mutex_exit(&mutex);
322 #endif /* _KERNEL */
323 }
324 
325 static void
326 oem_codepage_setup(uint32_t cpid)
327 {
328 	const smb_wchar_t *default_oem_cp;
329 	oem_codepage_t	*oemcpg;
330 	uint32_t	bytesperchar;
331 	uint32_t	max_oem_index;
332 	int		i;
333 
334 	switch (cpid) {
335 	case OEM_CPG_850:
336 		default_oem_cp = oem_codepage_850;
337 		break;
338 	case OEM_CPG_1252:
339 		default_oem_cp = oem_codepage_1252;
340 	default:
341 		return;
342 	}
343 
344 	oemcpg = &oemcpg_table[cpid];
345 	if (oemcpg->valid)
346 		return;
347 
348 	/*
349 	 * max_oem_index will be 256 or 65536 dependent
350 	 * on the OEM codepage.
351 	 */
352 	bytesperchar = oemcpg_table[cpid].bytesperchar;
353 	max_oem_index = 1 << (bytesperchar * 8);
354 
355 	oemcpg->oempage.value =
356 	    MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
357 	if (oemcpg->oempage.value == NULL)
358 		return;
359 
360 	oemcpg->ucspage.value =
361 	    MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
362 	if (oemcpg->ucspage.value == NULL) {
363 		MEM_FREE("oem", oemcpg->oempage.value);
364 		oemcpg->oempage.value = NULL;
365 		return;
366 	}
367 
368 	for (i = 0; i < max_oem_index; i++) {
369 		oemcpg->oempage.value[i] = default_oem_cp[i];
370 		oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
371 	}
372 
373 	oemcpg->valid = B_TRUE;
374 }
375