xref: /titanic_52/usr/src/lib/libsmbfs/smb/charsets.c (revision d2ec54f7875f7e05edd56195adbeb593c947763f)
1 /*
2  * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
7  * Reserved.  This file contains Original Code and/or Modifications of
8  * Original Code as defined in and that are subject to the Apple Public
9  * Source License Version 1.0 (the 'License').  You may not use this file
10  * except in compliance with the License.  Please obtain a copy of the
11  * License at http://www.apple.com/publicsource and read it before using
12  * this file.
13  *
14  * The Original Code and all software distributed under the License are
15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
19  * License for the specific language governing rights and limitations
20  * under the License."
21  *
22  * @APPLE_LICENSE_HEADER_END@
23  */
24 /*      @(#)charsets.c      *
25  *      (c) 2004   Apple Computer, Inc.  All Rights Reserved
26  *
27  *
28  *      charsets.c -- Routines converting between UTF-8, 16-bit
29  *			little-endian Unicode, and various Windows
30  *			code pages.
31  *
32  *      MODIFICATION HISTORY:
33  *       28-Nov-2004     Guy Harris	New today
34  */
35 
36 #pragma ident	"%Z%%M%	%I%	%E% SMI"
37 
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <iconv.h>
43 #include <langinfo.h>
44 #include <strings.h>
45 
46 #ifdef NOTPORTED
47 #include <CoreFoundation/CoreFoundation.h>
48 #include <CoreFoundation/CFStringDefaultEncoding.h>
49 #include <CoreFoundation/CFStringEncodingConverter.h>
50 #include <sys/mchain.h>
51 #endif /* NOTPORTED */
52 
53 #include <netsmb/smb_lib.h>
54 #include <netsmb/mchain.h>
55 
56 #include "charsets.h"
57 
58 #ifdef NOTPORTED
59 extern 	 uid_t real_uid,eff_uid;
60 #endif /* NOTPORTED */
61 
62 /*
63  * On Solaris, we will need to do some rewriting to use our iconv
64  * routines for the conversions.  For now, we're effectively
65  * stubbing out code, leaving the details of what happens on
66  * Darwin in case it's useful as a guide later.
67  */
68 
69 static unsigned
70 xtoi(char u)
71 {
72         if (isdigit(u))
73                 return (u - '0');
74         else if (islower(u))
75                 return (10 + u - 'a');
76         else if (isupper(u))
77                 return (10 + u - 'A');
78         return (16);
79 }
80 
81 
82 /* Removes the "%" escape sequences from a URL component.
83  * See IETF RFC 2396.
84  */
85 char *
86 unpercent(char * component)
87 {
88         char c, *s;
89         unsigned hi, lo;
90 
91         if (component)
92                 for (s = component; (c = *s) != 0; s++) {
93                         if (c != '%')
94                                 continue;
95                         if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
96                                 continue; /* ignore invalid escapes */
97                         s[0] = hi*16 + lo;
98                         /*
99                          * This was strcpy(s + 1, s + 3);
100                          * But nowadays leftward overlapping copies are
101                          * officially undefined in C.  Ours seems to
102                          * work or not depending upon alignment.
103                          */
104                         memmove(s+1, s+3, strlen(s+3) + 1);
105                 }
106         return (component);
107 }
108 
109 #ifdef NOTPORTED
110 static CFStringEncoding
111 get_windows_encoding_equivalent( void )
112 {
113 
114 	CFStringEncoding encoding;
115 	uint32_t index,region;
116 
117 	/* important! use root ID so you can read the config file! */
118 	seteuid(eff_uid);
119 	__CFStringGetInstallationEncodingAndRegion(&index,&region);
120 	seteuid(real_uid);
121 
122 	switch ( index )
123 	{
124 		case	kCFStringEncodingMacRoman:
125 			if (region) /* anything nonzero is not US */
126 				encoding = kCFStringEncodingDOSLatin1;
127 			else /* US region */
128 				encoding = kCFStringEncodingDOSLatinUS;
129 			break;
130 
131 		case	kCFStringEncodingMacJapanese:
132 			encoding = kCFStringEncodingDOSJapanese;
133 			break;
134 
135 		case	kCFStringEncodingMacChineseTrad:
136 			encoding = kCFStringEncodingDOSChineseTrad;
137 			break;
138 
139 		case	kCFStringEncodingMacKorean:
140 			encoding = kCFStringEncodingDOSKorean;
141 			break;
142 
143 		case	kCFStringEncodingMacArabic:
144 			encoding = kCFStringEncodingDOSArabic;
145 			break;
146 
147 		case	kCFStringEncodingMacHebrew:
148 			encoding = kCFStringEncodingDOSHebrew;
149 			break;
150 
151 		case	kCFStringEncodingMacGreek:
152 			encoding = kCFStringEncodingDOSGreek;
153 			break;
154 
155 		case	kCFStringEncodingMacCyrillic:
156 			encoding = kCFStringEncodingDOSCyrillic;
157 			break;
158 
159 		case	kCFStringEncodingMacThai:
160 			encoding = kCFStringEncodingDOSThai;
161 			break;
162 
163 		case	kCFStringEncodingMacChineseSimp:
164 			encoding = kCFStringEncodingDOSChineseSimplif;
165 			break;
166 
167 		case	kCFStringEncodingMacCentralEurRoman:
168 			encoding = kCFStringEncodingDOSLatin2;
169 			break;
170 
171 		case	kCFStringEncodingMacTurkish:
172 			encoding = kCFStringEncodingDOSTurkish;
173 			break;
174 
175 		case	kCFStringEncodingMacCroatian:
176 			encoding = kCFStringEncodingDOSLatin2;
177 			break;
178 
179 		case	kCFStringEncodingMacIcelandic:
180 			encoding = kCFStringEncodingDOSIcelandic;
181 			break;
182 
183 		case	kCFStringEncodingMacRomanian:
184 			encoding = kCFStringEncodingDOSLatin2;
185 			break;
186 
187 		case	kCFStringEncodingMacFarsi:
188 			encoding = kCFStringEncodingDOSArabic;
189 			break;
190 
191 		case	kCFStringEncodingMacUkrainian:
192 			encoding = kCFStringEncodingDOSCyrillic;
193 			break;
194 
195 		default:
196 			encoding = kCFStringEncodingDOSLatin1;
197 			break;
198 	}
199 
200 	return encoding;
201 }
202 #endif /* NOTPORTED */
203 
204 /*
205  * XXX - NLS, or CF?  We should probably use the same routine for all
206  * conversions.
207  */
208 char *
209 convert_wincs_to_utf8(const char *windows_string)
210 {
211 #ifdef NOTPORTED
212 	CFStringRef s;
213 	CFIndex maxlen;
214 	char *result;
215 
216 	s = CFStringCreateWithCString(NULL, windows_string,
217 		get_windows_encoding_equivalent());
218 	if (s == NULL) {
219 		smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
220 		    windows_string);
221 
222 		/* kCFStringEncodingMacRoman should always succeed */
223 		s = CFStringCreateWithCString(NULL, windows_string,
224 		    kCFStringEncodingMacRoman);
225 		if (s == NULL) {
226 			smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
227 			    -1, windows_string);
228 			return NULL;
229 		}
230 	}
231 
232 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
233 	    kCFStringEncodingUTF8) + 1;
234 	result = malloc(maxlen);
235 	if (result == NULL) {
236 		smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
237 		    windows_string);
238 		CFRelease(s);
239 		return NULL;
240 	}
241 	if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
242 		smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
243 		    -1, windows_string);
244 		CFRelease(s);
245 		return NULL;
246 	}
247 	CFRelease(s);
248 	return result;
249 #else /* NOTPORTED */
250 	return ((char*)windows_string);
251 #endif /* NOTPORTED */
252 }
253 
254 /*
255  * XXX - NLS, or CF?  We should probably use the same routine for all
256  * conversions.
257  */
258 char *
259 convert_utf8_to_wincs(const char *utf8_string)
260 {
261 #ifdef NOTPORTED
262 	CFStringRef s;
263 	CFIndex maxlen;
264 	char *result;
265 
266 	s = CFStringCreateWithCString(NULL, utf8_string,
267 	    kCFStringEncodingUTF8);
268 	if (s == NULL) {
269 		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
270 		    utf8_string);
271 		return NULL;
272 	}
273 
274 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
275 	    get_windows_encoding_equivalent()) + 1;
276 	result = malloc(maxlen);
277 	if (result == NULL) {
278 		smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
279 		    utf8_string);
280 		CFRelease(s);
281 		return NULL;
282 	}
283 	if (!CFStringGetCString(s, result, maxlen,
284 	    get_windows_encoding_equivalent())) {
285 		smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
286 		    -1, utf8_string);
287 		CFRelease(s);
288 		return NULL;
289 	}
290 	CFRelease(s);
291 	return result;
292 #else /* NOTPORTED */
293 	return ((char*)utf8_string);
294 #endif /* NOTPORTED */
295 }
296 
297 /*
298  * Convert little-endian Unicode string to UTF-8.
299  * Converts the Unicode string to host byte order in place.
300  */
301 char *
302 convert_leunicode_to_utf8(unsigned short *unicode_string)
303 {
304 	unsigned short *unicode_charp, unicode_char;
305 	int len = 0;
306 
307 	for (unicode_charp = unicode_string;
308 	    (unicode_char = *unicode_charp) != 0;
309 	    unicode_charp++) {
310 		*unicode_charp = letohs(unicode_char);
311 		len = len + 2;
312 	}
313 	return (convert_unicode_to_utf8(unicode_string, len));
314 }
315 
316 char *
317 convert_unicode_to_utf8(unsigned short *unicode_string, int len)
318 {
319 	iconv_t cd;
320 	char    from[BUFSIZ], to[BUFSIZ];
321 	char *tptr = NULL;
322 	const char *fptr;
323 	size_t  ileft, oleft, ret;
324 
325 	cd = iconv_open("UTF-8", "UTF-16");
326 	if (cd != (iconv_t)-1) {
327 		ileft = len;
328 		bcopy((char *)unicode_string, from, ileft);
329 		fptr = from;
330 		oleft = BUFSIZ;
331 		tptr = to;
332 		ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
333 		if (ret != (size_t)-1) {
334 			to[BUFSIZ-oleft] = '\0';
335 			tptr = to;
336 		} else {
337 			tptr = NULL;
338 		}
339 		(void) iconv_close(cd);
340 	}
341 	return (tptr);
342 }
343 
344 /*
345  * Convert UTF-8 string to little-endian Unicode.
346  */
347 unsigned short *
348 convert_utf8_to_leunicode(const char *utf8_string)
349 {
350 #ifdef NOTPORTED
351 	CFStringRef s;
352 	CFIndex maxlen;
353 	unsigned short *result;
354 	CFRange range;
355 	int i;
356 
357 	s = CFStringCreateWithCString(NULL, utf8_string,
358 	     kCFStringEncodingUTF8);
359 	if (s == NULL) {
360 		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
361 		    utf8_string);
362 		return NULL;
363 	}
364 
365 	maxlen = CFStringGetLength(s);
366 	result = malloc(2*(maxlen + 1));
367 	if (result == NULL) {
368 		smb_error("Couldn't allocate buffer for Unicode string for \"%s\" - skipping", -1,
369 		    utf8_string);
370 		CFRelease(s);
371 		return NULL;
372 	}
373 	range.location = 0;
374 	range.length = maxlen;
375 	CFStringGetCharacters(s, range, result);
376 	for (i = 0; i < maxlen; i++)
377 		result[i] = CFSwapInt16HostToLittle(result[i]);
378 	result[maxlen] = 0;
379 	CFRelease(s);
380 	return result;
381 #else /* NOTPORTED */
382 	/* LINTED */ /* XXX Really need to fix this! */
383 	return ((ushort_t *)utf8_string); /* XXX */
384 #endif /* NOTPORTED */
385 }
386