xref: /titanic_44/usr/src/common/smbsrv/smb_oem.c (revision 3c112a2b34403220c06c3e2fcac403358cfba168)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Support for oem <-> unicode translations.
28  */
29 
30 #ifndef _KERNEL
31 #include <stdlib.h>
32 #include <thread.h>
33 #include <synch.h>
34 #include <string.h>
35 #else
36 #include <sys/ksynch.h>
37 #endif /* _KERNEL */
38 
39 #include <sys/byteorder.h>
40 #include <smbsrv/alloc.h>
41 #include <smbsrv/string.h>
42 
43 /*
44  * cpid		The oemcpg_table index for this oempage.
45  * value	The conversion values.
46  */
47 typedef struct oempage {
48 	uint32_t	cpid;
49 	smb_wchar_t	*value;
50 } oempage_t;
51 
52 /*
53  * filename	The actual filename contains the codepage.
54  * bytesperchar	The codepage uses double or single bytes per char.
55  * oempage	The oempage is used to convert Unicode characters to
56  *		OEM characters.  Memory needs to be allocated for
57  *		the value field of oempage to store the table.
58  * ucspage	The unicode page is used to convert OEM characters
59  *		to Unicode characters.  Memory needs to be allocated
60  *		for the value field of ucspage to store the table.
61  * valid	True if the codepage has been initialized.
62  */
63 typedef struct oem_codepage {
64 	char		*filename;
65 	uint32_t	bytesperchar;
66 	oempage_t	oempage;
67 	oempage_t	ucspage;
68 	boolean_t	valid;
69 } oem_codepage_t;
70 
71 static oem_codepage_t oemcpg_table[] = {
72 	{"850.cpg",  1, {0, 0},  {0, 0},  0},	/* Multilingual Latin1 */
73 	{"950.cpg",  2, {1, 0},  {1, 0},  0},	/* Chinese Traditional */
74 	{"1252.cpg", 1, {2, 0},  {2, 0},  0},	/* MS Latin1 */
75 	{"949.cpg",  2, {3, 0},  {3, 0},  0},	/* Korean */
76 	{"936.cpg",  2, {4, 0},  {4, 0},  0},	/* Chinese Simplified */
77 	{"932.cpg",  2, {5, 0},  {5, 0},  0},	/* Japanese */
78 	{"852.cpg",  1, {6, 0},  {6, 0},  0},	/* Multilingual Latin2 */
79 	{"1250.cpg", 1, {7, 0},  {7, 0},  0},	/* MS Latin2 */
80 	{"1253.cpg", 1, {8, 0},  {8, 0},  0},	/* MS Greek */
81 	{"737.cpg",  1, {9, 0},  {9, 0},  0},	/* Greek */
82 	{"1254.cpg", 1, {10, 0}, {10, 0}, 0},	/* MS Turkish */
83 	{"857.cpg",  1, {11, 0}, {11, 0}, 0},	/* Multilingual Latin5 */
84 	{"1251.cpg", 1, {12, 0}, {12, 0}, 0},	/* MS Cyrillic */
85 	{"866.cpg",  1, {13, 0}, {13, 0}, 0},	/* Cyrillic II */
86 	{"1255.cpg", 1, {14, 0}, {14, 0}, 0},	/* MS Hebrew */
87 	{"862.cpg",  1, {15, 0}, {15, 0}, 0},	/* Hebrew */
88 	{"1256.cpg", 1, {16, 0}, {16, 0}, 0},	/* MS Arabic */
89 	{"720.cpg",  1, {17, 0}, {17, 0}, 0}	/* Arabic */
90 };
91 
92 #define	MAX_OEMPAGES	(sizeof (oemcpg_table) / sizeof (oemcpg_table[0]))
93 #define	MAX_UNICODE_IDX	65536
94 
95 /*
96  * The default SMB OEM codepage for English is codepage 850.
97  */
98 smb_wchar_t oem_codepage_850[256] = {
99 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
100 	0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
101 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
102 	0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
103 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
104 	0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
105 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
106 	0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
107 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
108 	0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
109 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
110 	0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
111 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
112 	0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
113 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
114 	0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
115 	0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
116 	0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
117 	0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
118 	0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
119 	0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
120 	0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
121 	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
122 	0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
123 	0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
124 	0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
125 	0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
126 	0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
127 	0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
128 	0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
129 	0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
130 	0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0
131 };
132 
133 /*
134  * The default telnet OEM codepage for English is codepage 1252.
135  */
136 smb_wchar_t oem_codepage_1252[256] = {
137 	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
138 	0x9, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x10,
139 	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
140 	0x19, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x20,
141 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
142 	0x29, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x30,
143 	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
144 	0x39, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x40,
145 	0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
146 	0x49, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x50,
147 	0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
148 	0x59, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x60,
149 	0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
150 	0x69, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x70,
151 	0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
152 	0x79, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x20AC,
153 	0x81, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
154 	0x2030, 0x160, 0x2039, 0x152, 0x8D, 0x017D, 0x8F, 0x90,
155 	0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
156 	0x2122, 0x161, 0x203A, 0x153, 0x9D, 0x017E, 0x178, 0x00A0,
157 	0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8,
158 	0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0,
159 	0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8,
160 	0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0,
161 	0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8,
162 	0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0,
163 	0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8,
164 	0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
165 	0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8,
166 	0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0,
167 	0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8,
168 	0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
169 };
170 
171 static oempage_t *oem_get_oempage(uint32_t);
172 static oempage_t *oem_get_ucspage(uint32_t);
173 static void oem_codepage_init(uint32_t);
174 static void oem_codepage_setup(uint32_t);
175 
176 /*
177  * Convert a unicode string to an oem string.
178  *
179  * The conversion will stop at the end of the unicode string
180  * or when (nbytes - 1) oem characters have been stored.
181  *
182  * The number of converted unicode characters is returned,
183  * or 0 on error.
184  */
185 size_t
186 ucstooem(char *oem, const smb_wchar_t *ucs, size_t nbytes, uint32_t cpid)
187 {
188 	oempage_t	*ucspage;
189 	uint32_t	count = 0;
190 	smb_wchar_t	oemchar;
191 
192 	if (ucs == NULL || oem == NULL)
193 		return (0);
194 
195 	if ((ucspage = oem_get_ucspage(cpid)) == NULL)
196 		return (0);
197 
198 	while (nbytes != 0 && (oemchar = ucspage->value[*ucs]) != 0) {
199 		if (oemchar & 0xff00 && nbytes >= MTS_MB_CHAR_MAX) {
200 			*oem++ = oemchar >> 8;
201 			*oem++ = (char)oemchar;
202 			nbytes -= 2;
203 		} else if (nbytes > 1) {
204 			*oem++ = (char)oemchar;
205 			nbytes--;
206 		} else {
207 			break;
208 		}
209 
210 		count++;
211 		ucs++;
212 	}
213 
214 	*oem = '\0';
215 	return (count);
216 }
217 
218 /*
219  * Convert an oem string to a unicode string.
220  *
221  * The conversion will stop at the end of the oem string or
222  * when nwchars - 1 have been converted.
223  *
224  * The number of converted oem chars is returned, or 0 on error.
225  * An oem char may be either 1 or 2 bytes.
226  */
227 size_t
228 oemtoucs(smb_wchar_t *ucs, const char *oem, size_t nwchars, uint32_t cpid)
229 {
230 	oempage_t	*oempage;
231 	size_t		count = nwchars;
232 	smb_wchar_t	oemchar;
233 
234 	if (ucs == NULL || oem == NULL)
235 		return (0);
236 
237 	if ((oempage = oem_get_oempage(cpid)) == NULL)
238 		return (0);
239 
240 	while ((oemchar = (smb_wchar_t)*oem++ & 0xff) != 0) {
241 		/*
242 		 * Cannot find one byte oemchar in table.
243 		 * Must be a lead byte. Try two bytes.
244 		 */
245 		if ((oempage->value[oemchar] == 0) && (oemchar != 0)) {
246 			oemchar = oemchar << 8 | (*oem++ & 0xff);
247 			if (oempage->value[oemchar] == 0) {
248 				*ucs = 0;
249 				break;
250 			}
251 		}
252 #ifdef _BIG_ENDIAN
253 		*ucs = LE_IN16(&oempage->value[oemchar]);
254 #else
255 		*ucs = oempage->value[oemchar];
256 #endif
257 		count--;
258 		ucs++;
259 	}
260 
261 	*ucs = 0;
262 	return (nwchars - count);
263 }
264 
265 /*
266  * Get a pointer to the oem page for the specific codepage id.
267  */
268 static oempage_t *
269 oem_get_oempage(uint32_t cpid)
270 {
271 	if (cpid >= MAX_OEMPAGES)
272 		return (NULL);
273 
274 	if (!oemcpg_table[cpid].valid) {
275 		oem_codepage_init(cpid);
276 
277 		if (!oemcpg_table[cpid].valid)
278 			return (NULL);
279 	}
280 
281 	return (&oemcpg_table[cpid].oempage);
282 }
283 
284 /*
285  * Get a pointer to the ucs page for the specific codepage id.
286  */
287 static oempage_t *
288 oem_get_ucspage(uint32_t cpid)
289 {
290 	if (cpid >= MAX_OEMPAGES)
291 		return (NULL);
292 
293 	if (!oemcpg_table[cpid].valid) {
294 		oem_codepage_init(cpid);
295 
296 		if (!oemcpg_table[cpid].valid)
297 			return (NULL);
298 	}
299 
300 	return (&oemcpg_table[cpid].ucspage);
301 }
302 
303 /*
304  * Initialize the oem page in the oem table.
305  */
306 static void
307 oem_codepage_init(uint32_t cpid)
308 {
309 #ifndef _KERNEL
310 	static mutex_t mutex;
311 
312 	(void) mutex_lock(&mutex);
313 	oem_codepage_setup(cpid);
314 	(void) mutex_unlock(&mutex);
315 #else
316 	static kmutex_t mutex;
317 
318 	mutex_enter(&mutex);
319 	oem_codepage_setup(cpid);
320 	mutex_exit(&mutex);
321 #endif /* _KERNEL */
322 }
323 
324 static void
325 oem_codepage_setup(uint32_t cpid)
326 {
327 	smb_wchar_t	*default_oem_cp;
328 	oem_codepage_t	*oemcpg;
329 	uint32_t	bytesperchar;
330 	uint32_t	max_oem_index;
331 	int		i;
332 
333 	switch (cpid) {
334 	case OEM_CPG_850:
335 		default_oem_cp = oem_codepage_850;
336 		break;
337 	case OEM_CPG_1252:
338 		default_oem_cp = oem_codepage_1252;
339 	default:
340 		return;
341 	}
342 
343 	oemcpg = &oemcpg_table[cpid];
344 	if (oemcpg->valid)
345 		return;
346 
347 	/*
348 	 * max_oem_index will be 256 or 65536 dependent
349 	 * on the OEM codepage.
350 	 */
351 	bytesperchar = oemcpg_table[cpid].bytesperchar;
352 	max_oem_index = 1 << (bytesperchar * 8);
353 
354 	oemcpg->oempage.value =
355 	    MEM_ZALLOC("oem", max_oem_index * sizeof (smb_wchar_t));
356 	if (oemcpg->oempage.value == NULL)
357 		return;
358 
359 	oemcpg->ucspage.value =
360 	    MEM_ZALLOC("oem", MAX_UNICODE_IDX * sizeof (smb_wchar_t));
361 	if (oemcpg->ucspage.value == NULL) {
362 		MEM_FREE("oem", oemcpg->oempage.value);
363 		oemcpg->oempage.value = NULL;
364 		return;
365 	}
366 
367 	for (i = 0; i < max_oem_index; i++) {
368 		oemcpg->oempage.value[i] = default_oem_cp[i];
369 		oemcpg->ucspage.value[default_oem_cp[i]] = (smb_wchar_t)i;
370 	}
371 
372 	oemcpg->valid = B_TRUE;
373 }
374