1 /* 2 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights 7 * Reserved. This file contains Original Code and/or Modifications of 8 * Original Code as defined in and that are subject to the Apple Public 9 * Source License Version 1.0 (the 'License'). You may not use this file 10 * except in compliance with the License. Please obtain a copy of the 11 * License at http://www.apple.com/publicsource and read it before using 12 * this file. 13 * 14 * The Original Code and all software distributed under the License are 15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 19 * License for the specific language governing rights and limitations 20 * under the License." 21 * 22 * @APPLE_LICENSE_HEADER_END@ 23 */ 24 /* CSTYLED */ 25 /* 26 * @(#)charsets.c * 27 * (c) 2004 Apple Computer, Inc. All Rights Reserved 28 * 29 * 30 * charsets.c -- Routines converting between UTF-8, 16-bit 31 * little-endian Unicode, and various Windows 32 * code pages. 33 * 34 * MODIFICATION HISTORY: 35 * 28-Nov-2004 Guy Harris New today 36 */ 37 38 #include <stdlib.h> 39 #include <stdio.h> 40 #include <string.h> 41 #include <ctype.h> 42 #include <errno.h> 43 #include <iconv.h> 44 #include <langinfo.h> 45 #include <strings.h> 46 47 #include <netsmb/smb_lib.h> 48 #include <netsmb/mchain.h> 49 50 #include "charsets.h" 51 52 /* 53 * On Solaris, we will need to do some rewriting to use our iconv 54 * routines for the conversions. For now, we're effectively 55 * stubbing out code, leaving the details of what happens on 56 * Darwin in case it's useful as a guide later. 57 */ 58 59 static unsigned 60 xtoi(char u) 61 { 62 if (isdigit(u)) 63 return (u - '0'); 64 else if (islower(u)) 65 return (10 + u - 'a'); 66 else if (isupper(u)) 67 return (10 + u - 'A'); 68 return (16); 69 } 70 71 72 /* 73 * Removes the "%" escape sequences from a URL component. 74 * See IETF RFC 2396. 75 */ 76 char * 77 unpercent(char *component) 78 { 79 char c, *s; 80 unsigned hi, lo; 81 82 if (component == NULL) 83 return (component); 84 85 for (s = component; (c = *s) != 0; s++) { 86 if (c != '%') 87 continue; 88 if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15) 89 continue; /* ignore invalid escapes */ 90 s[0] = hi*16 + lo; 91 /* 92 * This was strcpy(s + 1, s + 3); 93 * But nowadays leftward overlapping copies are 94 * officially undefined in C. Ours seems to 95 * work or not depending upon alignment. 96 */ 97 memmove(s+1, s+3, strlen(s+3) + 1); 98 } 99 return (component); 100 } 101 102 /* BEGIN CSTYLED */ 103 #ifdef NOTPORTED 104 static CFStringEncoding 105 get_windows_encoding_equivalent( void ) 106 { 107 108 CFStringEncoding encoding; 109 uint32_t index,region; 110 111 /* important! use root ID so you can read the config file! */ 112 seteuid(eff_uid); 113 __CFStringGetInstallationEncodingAndRegion(&index,®ion); 114 seteuid(real_uid); 115 116 switch ( index ) 117 { 118 case kCFStringEncodingMacRoman: 119 if (region) /* anything nonzero is not US */ 120 encoding = kCFStringEncodingDOSLatin1; 121 else /* US region */ 122 encoding = kCFStringEncodingDOSLatinUS; 123 break; 124 125 case kCFStringEncodingMacJapanese: 126 encoding = kCFStringEncodingDOSJapanese; 127 break; 128 129 case kCFStringEncodingMacChineseTrad: 130 encoding = kCFStringEncodingDOSChineseTrad; 131 break; 132 133 case kCFStringEncodingMacKorean: 134 encoding = kCFStringEncodingDOSKorean; 135 break; 136 137 case kCFStringEncodingMacArabic: 138 encoding = kCFStringEncodingDOSArabic; 139 break; 140 141 case kCFStringEncodingMacHebrew: 142 encoding = kCFStringEncodingDOSHebrew; 143 break; 144 145 case kCFStringEncodingMacGreek: 146 encoding = kCFStringEncodingDOSGreek; 147 break; 148 149 case kCFStringEncodingMacCyrillic: 150 encoding = kCFStringEncodingDOSCyrillic; 151 break; 152 153 case kCFStringEncodingMacThai: 154 encoding = kCFStringEncodingDOSThai; 155 break; 156 157 case kCFStringEncodingMacChineseSimp: 158 encoding = kCFStringEncodingDOSChineseSimplif; 159 break; 160 161 case kCFStringEncodingMacCentralEurRoman: 162 encoding = kCFStringEncodingDOSLatin2; 163 break; 164 165 case kCFStringEncodingMacTurkish: 166 encoding = kCFStringEncodingDOSTurkish; 167 break; 168 169 case kCFStringEncodingMacCroatian: 170 encoding = kCFStringEncodingDOSLatin2; 171 break; 172 173 case kCFStringEncodingMacIcelandic: 174 encoding = kCFStringEncodingDOSIcelandic; 175 break; 176 177 case kCFStringEncodingMacRomanian: 178 encoding = kCFStringEncodingDOSLatin2; 179 break; 180 181 case kCFStringEncodingMacFarsi: 182 encoding = kCFStringEncodingDOSArabic; 183 break; 184 185 case kCFStringEncodingMacUkrainian: 186 encoding = kCFStringEncodingDOSCyrillic; 187 break; 188 189 default: 190 encoding = kCFStringEncodingDOSLatin1; 191 break; 192 } 193 194 return encoding; 195 } 196 #endif /* NOTPORTED */ 197 198 /* 199 * XXX - NLS, or CF? We should probably use the same routine for all 200 * conversions. 201 */ 202 char * 203 convert_wincs_to_utf8(const char *windows_string) 204 { 205 #ifdef NOTPORTED 206 CFStringRef s; 207 CFIndex maxlen; 208 char *result; 209 210 s = CFStringCreateWithCString(NULL, windows_string, 211 get_windows_encoding_equivalent()); 212 if (s == NULL) { 213 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1, 214 windows_string); 215 216 /* kCFStringEncodingMacRoman should always succeed */ 217 s = CFStringCreateWithCString(NULL, windows_string, 218 kCFStringEncodingMacRoman); 219 if (s == NULL) { 220 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping", 221 -1, windows_string); 222 return NULL; 223 } 224 } 225 226 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s), 227 kCFStringEncodingUTF8) + 1; 228 result = malloc(maxlen); 229 if (result == NULL) { 230 smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1, 231 windows_string); 232 CFRelease(s); 233 return NULL; 234 } 235 if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) { 236 smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping", 237 -1, windows_string); 238 CFRelease(s); 239 return NULL; 240 } 241 CFRelease(s); 242 return result; 243 #else /* NOTPORTED */ 244 return (strdup((char*)windows_string)); 245 #endif /* NOTPORTED */ 246 } 247 248 /* 249 * XXX - NLS, or CF? We should probably use the same routine for all 250 * conversions. 251 */ 252 char * 253 convert_utf8_to_wincs(const char *utf8_string) 254 { 255 #ifdef NOTPORTED 256 CFStringRef s; 257 CFIndex maxlen; 258 char *result; 259 260 s = CFStringCreateWithCString(NULL, utf8_string, 261 kCFStringEncodingUTF8); 262 if (s == NULL) { 263 smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1, 264 utf8_string); 265 return NULL; 266 } 267 268 maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s), 269 get_windows_encoding_equivalent()) + 1; 270 result = malloc(maxlen); 271 if (result == NULL) { 272 smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1, 273 utf8_string); 274 CFRelease(s); 275 return NULL; 276 } 277 if (!CFStringGetCString(s, result, maxlen, 278 get_windows_encoding_equivalent())) { 279 smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping", 280 -1, utf8_string); 281 CFRelease(s); 282 return NULL; 283 } 284 CFRelease(s); 285 return result; 286 #else /* NOTPORTED */ 287 return (strdup((char*)utf8_string)); 288 #endif /* NOTPORTED */ 289 } 290 /* END CSTYLED */ 291 292 /* 293 * We replaced these routines for Solaris: 294 * convert_leunicode_to_utf8 295 * convert_unicode_to_utf8 296 * convert_utf8_to_leunicode 297 * with new code in: utf_str.c 298 */ 299