xref: /titanic_50/usr/src/lib/libsmbfs/smb/charsets.c (revision 8fd04b8338ed5093ec2d1e668fa620b7de44c177)
1 /*
2  * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
7  * Reserved.  This file contains Original Code and/or Modifications of
8  * Original Code as defined in and that are subject to the Apple Public
9  * Source License Version 1.0 (the 'License').  You may not use this file
10  * except in compliance with the License.  Please obtain a copy of the
11  * License at http://www.apple.com/publicsource and read it before using
12  * this file.
13  *
14  * The Original Code and all software distributed under the License are
15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
19  * License for the specific language governing rights and limitations
20  * under the License."
21  *
22  * @APPLE_LICENSE_HEADER_END@
23  */
24 /* CSTYLED */
25 /*
26  *      @(#)charsets.c      *
27  *      (c) 2004   Apple Computer, Inc.  All Rights Reserved
28  *
29  *
30  *      charsets.c -- Routines converting between UTF-8, 16-bit
31  *			little-endian Unicode, and various Windows
32  *			code pages.
33  *
34  *      MODIFICATION HISTORY:
35  *       28-Nov-2004     Guy Harris	New today
36  */
37 
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <errno.h>
43 #include <iconv.h>
44 #include <langinfo.h>
45 #include <strings.h>
46 #include <libintl.h>
47 
48 #include <sys/isa_defs.h>
49 #include <netsmb/smb_lib.h>
50 #include <netsmb/mchain.h>
51 
52 #include "charsets.h"
53 
54 /*
55  * On Solaris, we will need to do some rewriting to use our iconv
56  * routines for the conversions.  For now, we're effectively
57  * stubbing out code, leaving the details of what happens on
58  * Darwin in case it's useful as a guide later.
59  */
60 
61 static unsigned
62 xtoi(char u)
63 {
64 	if (isdigit(u))
65 		return (u - '0');
66 	else if (islower(u))
67 		return (10 + u - 'a');
68 	else if (isupper(u))
69 		return (10 + u - 'A');
70 	return (16);
71 }
72 
73 
74 /*
75  * Removes the "%" escape sequences from a URL component.
76  * See IETF RFC 2396.
77  */
78 char *
79 unpercent(char *component)
80 {
81 	char c, *s;
82 	unsigned hi, lo;
83 
84 	if (component == NULL)
85 		return (component);
86 
87 	for (s = component; (c = *s) != 0; s++) {
88 		if (c != '%')
89 			continue;
90 		if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
91 			continue; /* ignore invalid escapes */
92 		s[0] = hi*16 + lo;
93 		/*
94 		 * This was strcpy(s + 1, s + 3);
95 		 * But nowadays leftward overlapping copies are
96 		 * officially undefined in C.  Ours seems to
97 		 * work or not depending upon alignment.
98 		 */
99 		memmove(s+1, s+3, strlen(s+3) + 1);
100 	}
101 	return (component);
102 }
103 
104 /* BEGIN CSTYLED */
105 #ifdef NOTPORTED
106 static CFStringEncoding
107 get_windows_encoding_equivalent( void )
108 {
109 
110 	CFStringEncoding encoding;
111 	uint32_t index,region;
112 
113 	/* important! use root ID so you can read the config file! */
114 	seteuid(eff_uid);
115 	__CFStringGetInstallationEncodingAndRegion(&index,&region);
116 	seteuid(real_uid);
117 
118 	switch ( index )
119 	{
120 		case	kCFStringEncodingMacRoman:
121 			if (region) /* anything nonzero is not US */
122 				encoding = kCFStringEncodingDOSLatin1;
123 			else /* US region */
124 				encoding = kCFStringEncodingDOSLatinUS;
125 			break;
126 
127 		case	kCFStringEncodingMacJapanese:
128 			encoding = kCFStringEncodingDOSJapanese;
129 			break;
130 
131 		case	kCFStringEncodingMacChineseTrad:
132 			encoding = kCFStringEncodingDOSChineseTrad;
133 			break;
134 
135 		case	kCFStringEncodingMacKorean:
136 			encoding = kCFStringEncodingDOSKorean;
137 			break;
138 
139 		case	kCFStringEncodingMacArabic:
140 			encoding = kCFStringEncodingDOSArabic;
141 			break;
142 
143 		case	kCFStringEncodingMacHebrew:
144 			encoding = kCFStringEncodingDOSHebrew;
145 			break;
146 
147 		case	kCFStringEncodingMacGreek:
148 			encoding = kCFStringEncodingDOSGreek;
149 			break;
150 
151 		case	kCFStringEncodingMacCyrillic:
152 			encoding = kCFStringEncodingDOSCyrillic;
153 			break;
154 
155 		case	kCFStringEncodingMacThai:
156 			encoding = kCFStringEncodingDOSThai;
157 			break;
158 
159 		case	kCFStringEncodingMacChineseSimp:
160 			encoding = kCFStringEncodingDOSChineseSimplif;
161 			break;
162 
163 		case	kCFStringEncodingMacCentralEurRoman:
164 			encoding = kCFStringEncodingDOSLatin2;
165 			break;
166 
167 		case	kCFStringEncodingMacTurkish:
168 			encoding = kCFStringEncodingDOSTurkish;
169 			break;
170 
171 		case	kCFStringEncodingMacCroatian:
172 			encoding = kCFStringEncodingDOSLatin2;
173 			break;
174 
175 		case	kCFStringEncodingMacIcelandic:
176 			encoding = kCFStringEncodingDOSIcelandic;
177 			break;
178 
179 		case	kCFStringEncodingMacRomanian:
180 			encoding = kCFStringEncodingDOSLatin2;
181 			break;
182 
183 		case	kCFStringEncodingMacFarsi:
184 			encoding = kCFStringEncodingDOSArabic;
185 			break;
186 
187 		case	kCFStringEncodingMacUkrainian:
188 			encoding = kCFStringEncodingDOSCyrillic;
189 			break;
190 
191 		default:
192 			encoding = kCFStringEncodingDOSLatin1;
193 			break;
194 	}
195 
196 	return encoding;
197 }
198 #endif /* NOTPORTED */
199 
200 /*
201  * XXX - NLS, or CF?  We should probably use the same routine for all
202  * conversions.
203  */
204 char *
205 convert_wincs_to_utf8(const char *windows_string)
206 {
207 #ifdef NOTPORTED
208 	CFStringRef s;
209 	CFIndex maxlen;
210 	char *result;
211 
212 	s = CFStringCreateWithCString(NULL, windows_string,
213 		get_windows_encoding_equivalent());
214 	if (s == NULL) {
215 		smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
216 		    windows_string);
217 
218 		/* kCFStringEncodingMacRoman should always succeed */
219 		s = CFStringCreateWithCString(NULL, windows_string,
220 		    kCFStringEncodingMacRoman);
221 		if (s == NULL) {
222 			smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
223 			    -1, windows_string);
224 			return NULL;
225 		}
226 	}
227 
228 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
229 	    kCFStringEncodingUTF8) + 1;
230 	result = malloc(maxlen);
231 	if (result == NULL) {
232 		smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
233 		    windows_string);
234 		CFRelease(s);
235 		return NULL;
236 	}
237 	if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
238 		smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
239 		    -1, windows_string);
240 		CFRelease(s);
241 		return NULL;
242 	}
243 	CFRelease(s);
244 	return result;
245 #else /* NOTPORTED */
246 	return (strdup((char*)windows_string));
247 #endif /* NOTPORTED */
248 }
249 
250 /*
251  * XXX - NLS, or CF?  We should probably use the same routine for all
252  * conversions.
253  */
254 char *
255 convert_utf8_to_wincs(const char *utf8_string)
256 {
257 #ifdef NOTPORTED
258 	CFStringRef s;
259 	CFIndex maxlen;
260 	char *result;
261 
262 	s = CFStringCreateWithCString(NULL, utf8_string,
263 	    kCFStringEncodingUTF8);
264 	if (s == NULL) {
265 		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
266 		    utf8_string);
267 		return NULL;
268 	}
269 
270 	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
271 	    get_windows_encoding_equivalent()) + 1;
272 	result = malloc(maxlen);
273 	if (result == NULL) {
274 		smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
275 		    utf8_string);
276 		CFRelease(s);
277 		return NULL;
278 	}
279 	if (!CFStringGetCString(s, result, maxlen,
280 	    get_windows_encoding_equivalent())) {
281 		smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
282 		    -1, utf8_string);
283 		CFRelease(s);
284 		return NULL;
285 	}
286 	CFRelease(s);
287 	return result;
288 #else /* NOTPORTED */
289 	return (strdup((char*)utf8_string));
290 #endif /* NOTPORTED */
291 }
292 /* END CSTYLED */
293 
294 /*
295  * We replaced these routines for Solaris:
296  *	convert_leunicode_to_utf8
297  *	convert_unicode_to_utf8
298  *	convert_utf8_to_leunicode
299  * with new code in: utf_str.c
300  */
301