1 /* 2 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights 7 * Reserved. This file contains Original Code and/or Modifications of 8 * Original Code as defined in and that are subject to the Apple Public 9 * Source License Version 1.0 (the 'License'). You may not use this file 10 * except in compliance with the License. Please obtain a copy of the 11 * License at http://www.apple.com/publicsource and read it before using 12 * this file. 13 * 14 * The Original Code and all software distributed under the License are 15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 19 * License for the specific language governing rights and limitations 20 * under the License." 21 * 22 * @APPLE_LICENSE_HEADER_END@ 23 */ 24 /* CSTYLED */ 25 /* 26 * @(#)charsets.c * 27 * (c) 2004 Apple Computer, Inc. All Rights Reserved 28 * 29 * 30 * charsets.c -- Routines converting between UTF-8, 16-bit 31 * little-endian Unicode, and various Windows 32 * code pages. 33 * 34 * MODIFICATION HISTORY: 35 * 28-Nov-2004 Guy Harris New today 36 */ 37 38 #include <stdlib.h> 39 #include <stdio.h> 40 #include <string.h> 41 #include <ctype.h> 42 #include <errno.h> 43 #include <iconv.h> 44 #include <langinfo.h> 45 #include <strings.h> 46 #include <libintl.h> 47 48 #include <sys/isa_defs.h> 49 #include <netsmb/smb_lib.h> 50 #include <netsmb/mchain.h> 51 52 #include "charsets.h" 53 54 /* 55 * On Solaris, we will need to do some rewriting to use our iconv 56 * routines for the conversions. For now, we're effectively 57 * stubbing out code, leaving the details of what happens on 58 * Darwin in case it's useful as a guide later. 59 */ 60 61 static unsigned 62 xtoi(char u) 63 { 64 if (isdigit(u)) 65 return (u - '0'); 66 else if (islower(u)) 67 return (10 + u - 'a'); 68 else if (isupper(u)) 69 return (10 + u - 'A'); 70 return (16); 71 } 72 73 74 /* 75 * Removes the "%" escape sequences from a URL component. 76 * See IETF RFC 2396. 77 */ 78 char * 79 unpercent(char *component) 80 { 81 char c, *s; 82 unsigned hi, lo; 83 84 if (component == NULL) 85 return (component); 86 87 for (s = component; (c = *s) != 0; s++) { 88 if (c != '%') 89 continue; 90 if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15) 91 continue; /* ignore invalid escapes */ 92 s[0] = hi*16 + lo; 93 /* 94 * This was strcpy(s + 1, s + 3); 95 * But nowadays leftward overlapping copies are 96 * officially undefined in C. Ours seems to 97 * work or not depending upon alignment. 98 */ 99 memmove(s+1, s+3, strlen(s+3) + 1); 100 } 101 return (component); 102 } 103 104 /* BEGIN CSTYLED */ 105 #ifdef NOTPORTED 106 static CFStringEncoding 107 get_windows_encoding_equivalent( void ) 108 { 109 110 CFStringEncoding encoding; 111 uint32_t index,region; 112 113 /* important! use root ID so you can read the config file! */ 114 seteuid(eff_uid); 115 __CFStringGetInstallationEncodingAndRegion(&index,®ion); 116 seteuid(real_uid); 117 118 switch ( index ) 119 { 120 case kCFStringEncodingMacRoman: 121 if (region) /* anything nonzero is not US */ 122 encoding = kCFStringEncodingDOSLatin1; 123 else /* US region */ 124 encoding = kCFStringEncodingDOSLatinUS; 125 break; 126 127 case kCFStringEncodingMacJapanese: 128 encoding = kCFStringEncodingDOSJapanese; 129 break; 130 131 case kCFStringEncodingMacChineseTrad: 132 encoding = kCFStringEncodingDOSChineseTrad; 133 break; 134 135 case kCFStringEncodingMacKorean: 136 encoding = kCFStringEncodingDOSKorean; 137 break; 138 139 case kCFStringEncodingMacArabic: 140 encoding = kCFStringEncodingDOSArabic; 141 break; 142 143 case kCFStringEncodingMacHebrew: 144 encoding = kCFStringEncodingDOSHebrew; 145 break; 146 147 case kCFStringEncodingMacGreek: 148 encoding = kCFStringEncodingDOSGreek; 149 break; 150 151 case kCFStringEncodingMacCyrillic: 152 encoding = kCFStringEncodingDOSCyrillic; 153 break; 154 155 case kCFStringEncodingMacThai: 156 encoding = kCFStringEncodingDOSThai; 157 break; 158 159 case kCFStringEncodingMacChineseSimp: 160 encoding = kCFStringEncodingDOSChineseSimplif; 161 break; 162 163 case kCFStringEncodingMacCentralEurRoman: 164 encoding = kCFStringEncodingDOSLatin2; 165 break; 166 167 case kCFStringEncodingMacTurkish: 168 encoding = kCFStringEncodingDOSTurkish; 169 break; 170 171 case kCFStringEncodingMacCroatian: 172 encoding = kCFStringEncodingDOSLatin2; 173 break; 174 175 case kCFStringEncodingMacIcelandic: 176 encoding = kCFStringEncodingDOSIcelandic; 177 break; 178 179 case kCFStringEncodingMacRomanian: 180 encoding = kCFStringEncodingDOSLatin2; 181 break; 182 183 case kCFStringEncodingMacFarsi: 184 encoding = kCFStringEncodingDOSArabic; 185 break; 186 187 case kCFStringEncodingMacUkrainian: 188 encoding = kCFStringEncodingDOSCyrillic; 189 break; 190 191 default: 192 encoding = kCFStringEncodingDOSLatin1; 193 break; 194 } 195 196 return encoding; 197 } 198 #endif /* NOTPORTED */ 199 200 /* 201 * XXX - NLS, or CF? We should probably use the same routine for all 202 * conversions. 203 */ 204 char * 205 convert_wincs_to_utf8(const char *windows_string) 206 { 207 #ifdef NOTPORTED 208 CFStringRef s; 209 CFIndex maxlen; 210 char *result; 211 212 s = CFStringCreateWithCString(NULL, windows_string, 213 get_windows_encoding_equivalent()); 214 if (s == NULL) { 215 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1, 216 windows_string); 217 218 /* kCFStringEncodingMacRoman should always succeed */ 219 s = CFStringCreateWithCString(NULL, windows_string, 220 kCFStringEncodingMacRoman); 221 if (s == NULL) { 222 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping", 223 -1, windows_string); 224 return NULL; 225 } 226 } 227 228 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s), 229 kCFStringEncodingUTF8) + 1; 230 result = malloc(maxlen); 231 if (result == NULL) { 232 smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1, 233 windows_string); 234 CFRelease(s); 235 return NULL; 236 } 237 if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) { 238 smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping", 239 -1, windows_string); 240 CFRelease(s); 241 return NULL; 242 } 243 CFRelease(s); 244 return result; 245 #else /* NOTPORTED */ 246 return (strdup((char*)windows_string)); 247 #endif /* NOTPORTED */ 248 } 249 250 /* 251 * XXX - NLS, or CF? We should probably use the same routine for all 252 * conversions. 253 */ 254 char * 255 convert_utf8_to_wincs(const char *utf8_string) 256 { 257 #ifdef NOTPORTED 258 CFStringRef s; 259 CFIndex maxlen; 260 char *result; 261 262 s = CFStringCreateWithCString(NULL, utf8_string, 263 kCFStringEncodingUTF8); 264 if (s == NULL) { 265 smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1, 266 utf8_string); 267 return NULL; 268 } 269 270 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s), 271 get_windows_encoding_equivalent()) + 1; 272 result = malloc(maxlen); 273 if (result == NULL) { 274 smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1, 275 utf8_string); 276 CFRelease(s); 277 return NULL; 278 } 279 if (!CFStringGetCString(s, result, maxlen, 280 get_windows_encoding_equivalent())) { 281 smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping", 282 -1, utf8_string); 283 CFRelease(s); 284 return NULL; 285 } 286 CFRelease(s); 287 return result; 288 #else /* NOTPORTED */ 289 return (strdup((char*)utf8_string)); 290 #endif /* NOTPORTED */ 291 } 292 /* END CSTYLED */ 293 294 /* 295 * We replaced these routines for Solaris: 296 * convert_leunicode_to_utf8 297 * convert_unicode_to_utf8 298 * convert_utf8_to_leunicode 299 * with new code in: utf_str.c 300 */ 301