1 /* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 #pragma ident "%Z%%M% %I% %E% SMI" 7 8 /* 9 * The contents of this file are subject to the Netscape Public 10 * License Version 1.1 (the "License"); you may not use this file 11 * except in compliance with the License. You may obtain a copy of 12 * the License at http://www.mozilla.org/NPL/ 13 * 14 * Software distributed under the License is distributed on an "AS 15 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 16 * implied. See the License for the specific language governing 17 * rights and limitations under the License. 18 * 19 * The Original Code is Mozilla Communicator client code, released 20 * March 31, 1998. 21 * 22 * The Initial Developer of the Original Code is Netscape 23 * Communications Corporation. Portions created by Netscape are 24 * Copyright (C) 1998-1999 Netscape Communications Corporation. All 25 * Rights Reserved. 26 * 27 * Contributor(s): 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <locale.h> 34 #include <ctype.h> 35 36 #ifndef HAVE_LIBICU 37 38 #ifdef SOLARIS_LDAP_CMD 39 #include <errno.h> 40 #include <langinfo.h> 41 #include <iconv.h> 42 #endif 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif 47 48 extern char *ldaptool_charset; 49 char *ldaptool_convdir = NULL; 50 static int charsetset = 0; 51 char *ldaptool_local2UTF8( const char *src ); 52 53 #ifdef SOLARIS_LDAP_CMD 54 static char *ldaptool_convert( const char *src, const char *fcode, 55 const char *tcode); 56 char *ldaptool_UTF82local( const char *src ); 57 #endif /* SOLARIS_LDAP_CMD */ 58 59 #ifdef SOLARIS_LDAP_CMD 60 /* 61 * ICU version always returns string, unless strdup fails. 62 * As in ICU version, in case of error strdup(src) 63 * Usually strdup(src) will be ASCII and legal anyways. 64 */ 65 66 static char * 67 ldaptool_convert( const char *src, const char *fcode, 68 const char *tcode) { 69 char *dest, *tptr, *tmp; 70 const char *fptr; 71 iconv_t cd; 72 size_t ileft, oleft, ret, size; 73 74 if (src == NULL) 75 return (NULL); 76 77 if (fcode == NULL || tcode == NULL) 78 return (strdup(src)); 79 80 if (strcasecmp(fcode, tcode) == 0) 81 return (strdup(src)); 82 83 if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) { 84 /* conversion table not available */ 85 return (strdup(src)); 86 } 87 88 ileft = strlen(src); 89 oleft = 2 * ileft; 90 size = oleft; 91 ret = -1; 92 if ((dest = (char *)malloc(size)) == NULL) { 93 (void) iconv_close(cd); 94 /* maybe sizeof strlen(src) memory still exists */ 95 return (strdup(src)); 96 } 97 tptr = dest; 98 fptr = src; 99 100 for (;;) { 101 ret = iconv(cd, &fptr, &ileft, &tptr, &oleft); 102 103 if (ret != (size_t)-1) { 104 /* 105 * Success. Place 'cd' into its initial shift 106 * state before returning. 107 */ 108 if (fptr == NULL) /* already in initial state */ 109 break; 110 fptr = NULL; 111 ileft = 0; 112 continue; 113 } if (errno == E2BIG) { 114 /* 115 * Lack of space in output buffer. 116 * Hence double the size and retry. 117 * But before calling iconv(), oleft 118 * and tptr have to re-adjusted, so that 119 * iconv() doesn't overwrite the data 120 * which has already been converted. 121 */ 122 oleft += size; 123 size *= 2; 124 if ((tmp = (char *) realloc(dest, size)) == NULL) 125 break; 126 tptr = tmp + (tptr - dest); 127 dest = tmp; 128 continue; 129 } else { 130 /* Other errors */ 131 break; 132 } 133 } 134 135 if (dest != NULL) { 136 if (ret == -1) { 137 /* Free malloc'ed memory on failure */ 138 free(dest); 139 dest = NULL; 140 } else if (oleft > 0) { 141 /* NULL terminate the return value */ 142 *(dest + (size - oleft)) = '\0'; 143 } else { 144 /* realloc one more byte and NULL terminate */ 145 if ((tmp = (char *) realloc(dest, size + 1)) == NULL) { 146 free(dest); 147 dest = NULL; 148 } else { 149 *(dest + size) = '\0'; 150 } 151 } 152 } 153 154 (void) iconv_close(cd); 155 if (dest == NULL) { 156 /* last chance in case some other failure along the way occurs */ 157 return (strdup(src)); 158 } 159 return (dest); 160 } 161 162 char * 163 ldaptool_UTF82local( const char *src ) 164 { 165 char *to_code; 166 if ((to_code = nl_langinfo(CODESET)) == NULL) 167 return (strdup(src)); 168 return (ldaptool_convert(src, "UTF-8", (const char *)to_code)); 169 } 170 #endif /* SOLARIS_LDAP_CMD */ 171 172 char * 173 ldaptool_local2UTF8( const char *src ) 174 { 175 #ifdef SOLARIS_LDAP_CMD 176 char *from_code; 177 if ((from_code = nl_langinfo(CODESET)) == NULL) 178 return (strdup(src)); 179 return (ldaptool_convert(src, (const char *)from_code, "UTF-8")); 180 #else 181 char *utf8; 182 charsetset = 0; 183 if (src == NULL) 184 { 185 return NULL; 186 } 187 utf8 = strdup(src); 188 return ( utf8 ); 189 #endif /* SOLARIS_LDAP_CMD */ 190 } 191 192 #else /* HAVE_LIBICU */ 193 194 #include "unicode/utypes.h" 195 #include "unicode/ucnv.h" 196 197 #define NSPR20 198 199 #ifdef XP_WIN32 200 #define VC_EXTRALEAN 201 #include <afxwin.h> 202 #include <winnls.h> 203 #endif 204 205 extern char *ldaptool_charset; 206 static int charsetset = 0; 207 208 extern "C" { 209 char *ldaptool_convdir = NULL; 210 char *ldaptool_local2UTF8( const char * ); 211 } 212 213 #ifndef XP_WIN32 214 char * GetNormalizedLocaleName(void); 215 216 217 char * 218 GetNormalizedLocaleName(void) 219 { 220 #ifdef _HPUX_SOURCE 221 222 int len; 223 char *locale; 224 225 locale = setlocale(LC_CTYPE, ""); 226 if (locale && *locale) { 227 len = strlen(locale); 228 } else { 229 locale = "C"; 230 len = 1; 231 } 232 233 if ((!strncmp(locale, "/\x03:", 3)) && 234 (!strcmp(&locale[len - 2], ";/"))) { 235 locale += 3; 236 len -= 5; 237 } 238 239 locale = strdup(locale); 240 if (locale) { 241 locale[len] = 0; 242 } 243 244 return locale; 245 246 #else 247 248 char *locale; 249 250 locale = setlocale(LC_CTYPE, ""); 251 if (locale && *locale) { 252 return strdup(locale); 253 } 254 255 return strdup("C"); 256 257 #endif 258 } 259 260 #if defined(IRIX) 261 const char *CHARCONVTABLE[] = 262 { 263 "! This table maps the host's locale names to IANA charsets", 264 "!", 265 "C: ISO_8859-1:1987", 266 "cs: ISO_8859-2:1987", 267 "da: ISO_8859-1:1987", 268 "de: ISO_8859-1:1987", 269 "de_AT: ISO_8859-1:1987", 270 "de_CH: ISO_8859-1:1987", 271 "en: ISO_8859-1:1987", 272 "en_AU: ISO_8859-1:1987", 273 "en_CA: ISO_8859-1:1987", 274 "en_TH: ISO_8859-1:1987", 275 "en_US: ISO_8859-1:1987", 276 "es: ISO_8859-1:1987", 277 "fi: ISO_8859-1:1987", 278 "fr: ISO_8859-1:1987", 279 "fr_BE: ISO_8859-1:1987", 280 "fr_CA: ISO_8859-1:1987", 281 "fr_CH: ISO_8859-1:1987", 282 "is: ISO_8859-1:1987", 283 "it: ISO_8859-1:1987", 284 "it_CH: ISO_8859-1:1987", 285 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", 286 "ko_KR.euc: EUC-KR", 287 "nl: ISO_8859-1:1987", 288 "nl_BE: ISO_8859-1:1987", 289 "no: ISO_8859-1:1987", 290 "pl: ISO_8859-2:1987", 291 "pt: ISO_8859-1:1987", 292 "sh: ISO_8859-2:1987", 293 "sk: ISO_8859-2:1987", 294 "sv: ISO_8859-1:1987", 295 "zh_CN.ugb: GB2312", 296 "zh_TW.ucns: cns11643_1", 297 NULL 298 }; 299 #elif defined(SOLARIS) 300 const char *CHARCONVTABLE[] = 301 { 302 "! This table maps the host's locale names to IANA charsets", 303 "!", 304 "C: ISO_8859-1:1987", 305 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese", 306 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", 307 "ja_JP.PCK: Shift_JIS", 308 "en: ISO_8859-1:1987", 309 "en_AU: ISO_8859-1:1987", 310 "en_CA: ISO_8859-1:1987", 311 "en_UK: ISO_8859-1:1987", 312 "en_US: ISO_8859-1:1987", 313 "es: ISO_8859-1:1987", 314 "es_AR: ISO_8859-1:1987", 315 "es_BO: ISO_8859-1:1987", 316 "es_CL: ISO_8859-1:1987", 317 "es_CO: ISO_8859-1:1987", 318 "es_CR: ISO_8859-1:1987", 319 "es_EC: ISO_8859-1:1987", 320 "es_GT: ISO_8859-1:1987", 321 "es_MX: ISO_8859-1:1987", 322 "es_NI: ISO_8859-1:1987", 323 "es_PA: ISO_8859-1:1987", 324 "es_PE: ISO_8859-1:1987", 325 "es_PY: ISO_8859-1:1987", 326 "es_SV: ISO_8859-1:1987", 327 "es_UY: ISO_8859-1:1987", 328 "es_VE: ISO_8859-1:1987", 329 "fr: ISO_8859-1:1987", 330 "fr_BE: ISO_8859-1:1987", 331 "fr_CA: ISO_8859-1:1987", 332 "fr_CH: ISO_8859-1:1987", 333 "de: ISO_8859-1:1987", 334 "de_AT: ISO_8859-1:1987", 335 "de_CH: ISO_8859-1:1987", 336 "nl: ISO_8859-1:1987", 337 "nl_BE: ISO_8859-1:1987", 338 "it: ISO_8859-1:1987", 339 "sv: ISO_8859-1:1987", 340 "no: ISO_8859-1:1987", 341 "da: ISO_8859-1:1987", 342 "iso_8859_1: ISO_8859-1:1987", 343 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese", 344 "ko: EUC-KR", 345 "zh: GB2312", 346 "zh_TW: cns11643_1", 347 NULL 348 }; 349 #elif defined(OSF1) 350 const char *CHARCONVTABLE[] = 351 { 352 "! This table maps the host's locale names to IANA charsets", 353 "!", 354 "C: ISO_8859-1:1987", 355 "cs_CZ.ISO8859-2: ISO_8859-2:1987", 356 "cs_CZ: ISO_8859-2:1987", 357 "da_DK.ISO8859-1: ISO_8859-1:1987", 358 "de_CH.ISO8859-1: ISO_8859-1:1987", 359 "de_DE.ISO8859-1: ISO_8859-1:1987", 360 "en_GB.ISO8859-1: ISO_8859-1:1987", 361 "en_US.ISO8859-1: ISO_8859-1:1987", 362 "es_ES.ISO8859-1: ISO_8859-1:1987", 363 "fi_FI.ISO8859-1: ISO_8859-1:1987", 364 "fr_BE.ISO8859-1: ISO_8859-1:1987", 365 "fr_CA.ISO8859-1: ISO_8859-1:1987", 366 "fr_CH.ISO8859-1: ISO_8859-1:1987", 367 "fr_FR.ISO8859-1: ISO_8859-1:1987", 368 "hu_HU.ISO8859-2: ISO_8859-2:1987", 369 "hu_HU: ISO_8859-2:1987", 370 "is_IS.ISO8859-1: ISO_8859-1:1987", 371 "it_IT.ISO8859-1: ISO_8859-1:1987", 372 "ja_JP.SJIS: Shift_JIS", 373 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", 374 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", 375 "ko_KR.eucKR: EUC-KR", 376 "ko_KR: EUC-KR", 377 "nl_BE.ISO8859-1: ISO_8859-1:1987", 378 "nl_NL.ISO8859-1: ISO_8859-1:1987", 379 "no_NO.ISO8859-1: ISO_8859-1:1987", 380 "pl_PL.ISO8859-2: ISO_8859-2:1987", 381 "pl_PL: ISO_8859-2:1987", 382 "pt_PT.ISO8859-1: ISO_8859-1:1987", 383 "sk_SK.ISO8859-2: ISO_8859-2:1987", 384 "sk_SK: ISO_8859-2:1987", 385 "sv_SE.ISO8859-1: ISO_8859-1:1987", 386 "zh_CN: GB2312", 387 "zh_HK.big5: Big5", 388 "zh_HK.eucTW: cns11643_1", 389 "zh_TW.big5: Big5", 390 "zh_TW.big5@chuyin: Big5", 391 "zh_TW.big5@radical: Big5", 392 "zh_TW.big5@stroke: Big5", 393 "zh_TW.eucTW: cns11643_1", 394 "zh_TW.eucTW@chuyin: cns11643_1", 395 "zh_TW.eucTW@radical: cns11643_1", 396 "zh_TW.eucTW@stroke: cns11643_1", 397 "zh_TW: cns11643_1", 398 NULL 399 }; 400 #elif defined(HPUX) 401 const char *CHARCONVTABLE[] = 402 { 403 "! This table maps the host's locale names to IANA charsets", 404 "!", 405 "C: ISO_8859-1:1987", 406 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", 407 "ja_JP.SJIS: Shift_JIS", 408 "ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", 409 "es_ES: ISO_8859-1:1987", 410 "es_ES.iso88591: ISO_8859-1:1987", 411 "sv_SE: ISO_8859-1:1987", 412 "sv_SE.iso88591: ISO_8859-1:1987", 413 "da_DK: ISO_8859-1:1987", 414 "da_DK.iso88591: ISO_8859-1:1987", 415 "nl_NL: ISO_8859-1:1987", 416 "nl_NL.iso88591: ISO_8859-1:1987", 417 "en: ISO_8859-1:1987", 418 "en_GB: ISO_8859-1:1987", 419 "en_GB.iso88591: ISO_8859-1:1987", 420 "en_US: ISO_8859-1:1987", 421 "en_US.iso88591: ISO_8859-1:1987", 422 "fi_FI: ISO_8859-1:1987", 423 "fi_FI.iso88591: ISO_8859-1:1987", 424 "fr_CA: ISO_8859-1:1987", 425 "fr_CA.iso88591: ISO_8859-1:1987", 426 "fr_FR: ISO_8859-1:1987", 427 "fr_FR.iso88591: ISO_8859-1:1987", 428 "de_DE: ISO_8859-1:1987", 429 "de_DE.iso88591: ISO_8859-1:1987", 430 "is_IS: ISO_8859-1:1987", 431 "is_IS.iso88591: ISO_8859-1:1987", 432 "it_IT: ISO_8859-1:1987", 433 "it_IT.iso88591: ISO_8859-1:1987", 434 "no_NO: ISO_8859-1:1987", 435 "no_NO.iso88591: ISO_8859-1:1987", 436 "pt_PT: ISO_8859-1:1987", 437 "pt_PT.iso88591: ISO_8859-1:1987", 438 "hu_HU: ISO_8859-2:1987", 439 "hu_HU.iso88592: ISO_8859-2:1987", 440 "cs_CZ: ISO_8859-2:1987", 441 "cs_CZ.iso88592: ISO_8859-2:1987", 442 "pl_PL: ISO_8859-2:1987", 443 "pl_PL.iso88592: ISO_8859-2:1987", 444 "ro_RO: ISO_8859-2:1987", 445 "ro_RO.iso88592: ISO_8859-2:1987", 446 "hr_HR: ISO_8859-2:1987", 447 "hr_HR.iso88592: ISO_8859-2:1987", 448 "sk_SK: ISO_8859-2:1987", 449 "sk_SK.iso88592: ISO_8859-2:1987", 450 "sl_SI: ISO_8859-2:1987", 451 "sl_SI.iso88592: ISO_8859-2:1987", 452 "american.iso88591: ISO_8859-1:1987", 453 "bulgarian: ISO_8859-2:1987", 454 "c-french.iso88591: ISO_8859-1:1987", 455 "chinese-s: GB2312", 456 "chinese-t.big5: Big5", 457 "czech: ISO_8859-2:1987", 458 "danish.iso88591: ISO_8859-1:1987", 459 "dutch.iso88591: ISO_8859-1:1987", 460 "english.iso88591: ISO_8859-1:1987", 461 "finnish.iso88591: ISO_8859-1:1987", 462 "french.iso88591: ISO_8859-1:1987", 463 "german.iso88591: ISO_8859-1:1987", 464 "hungarian: ISO_8859-2:1987", 465 "icelandic.iso88591: ISO_8859-1:1987", 466 "italian.iso88591: ISO_8859-1:1987", 467 "japanese.euc: Extended_UNIX_Code_Packed_Format_for_Japanese", 468 "japanese: Shift_JIS", 469 "katakana: Shift_JIS", 470 "korean: EUC-KR", 471 "norwegian.iso88591: ISO_8859-1:1987", 472 "polish: ISO_8859-2:1987", 473 "portuguese.iso88591: ISO_8859-1:1987", 474 "rumanian: ISO_8859-2:1987", 475 "serbocroatian: ISO_8859-2:1987", 476 "slovene: ISO_8859-2:1987", 477 "spanish.iso88591: ISO_8859-1:1987", 478 "swedish.iso88591: ISO_8859-1:1987", 479 NULL 480 }; 481 #elif defined(AIX) 482 const char *CHARCONVTABLE[] = 483 { 484 "! This table maps the host's locale names to IANA charsets", 485 "!", 486 "C: ISO_8859-1:1987", 487 "En_JP.IBM-932: Shift_JIS", 488 "En_JP: Shift_JIS", 489 "Ja_JP.IBM-932: Shift_JIS", 490 "Ja_JP: Shift_JIS", 491 "da_DK.ISO8859-1: ISO_8859-1:1987", 492 "da_DK: ISO_8859-1:1987", 493 "de_CH.ISO8859-1: ISO_8859-1:1987", 494 "de_CH: ISO_8859-1:1987", 495 "de_DE.ISO8859-1: ISO_8859-1:1987", 496 "de_DE: ISO_8859-1:1987", 497 "en_GB.ISO8859-1: ISO_8859-1:1987", 498 "en_GB: ISO_8859-1:1987", 499 "en_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", 500 "en_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", 501 "en_KR.IBM-eucKR: EUC-KR", 502 "en_KR: EUC-KR", 503 "en_TW.IBM-eucTW: cns11643_1", 504 "en_TW: cns11643_1", 505 "en_US.ISO8859-1: ISO_8859-1:1987", 506 "en_US: ISO_8859-1:1987", 507 "es_ES.ISO8859-1: ISO_8859-1:1987", 508 "es_ES: ISO_8859-1:1987", 509 "fi_FI.ISO8859-1: ISO_8859-1:1987", 510 "fi_FI: ISO_8859-1:1987", 511 "fr_BE.ISO8859-1: ISO_8859-1:1987", 512 "fr_BE: ISO_8859-1:1987", 513 "fr_CA.ISO8859-1: ISO_8859-1:1987", 514 "fr_CA: ISO_8859-1:1987", 515 "fr_CH.ISO8859-1: ISO_8859-1:1987", 516 "fr_CH: ISO_8859-1:1987", 517 "fr_FR.ISO8859-1: ISO_8859-1:1987", 518 "fr_FR: ISO_8859-1:1987", 519 "is_IS.ISO8859-1: ISO_8859-1:1987", 520 "is_IS: ISO_8859-1:1987", 521 "it_IT.ISO8859-1: ISO_8859-1:1987", 522 "it_IT: ISO_8859-1:1987", 523 "ja_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese", 524 "ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese", 525 "ko_KR.IBM-eucKR: EUC-KR", 526 "ko_KR: EUC-KR", 527 "nl_BE.ISO8859-1: ISO_8859-1:1987", 528 "nl_BE: ISO_8859-1:1987", 529 "nl_NL.ISO8859-1: ISO_8859-1:1987", 530 "nl_NL: ISO_8859-1:1987", 531 "no_NO.ISO8859-1: ISO_8859-1:1987", 532 "no_NO: ISO_8859-1:1987", 533 "pt_PT.ISO8859-1: ISO_8859-1:1987", 534 "pt_PT: ISO_8859-1:1987", 535 "sv_SE.ISO8859-1: ISO_8859-1:1987", 536 "sv_SE: ISO_8859-1:1987", 537 "zh_TW.IBM-eucTW: cns11643_1", 538 "zh_TW: cns11643_1", 539 NULL 540 }; 541 #else // sunos by default 542 const char *CHARCONVTABLE[] = 543 { 544 "! This table maps the host's locale names to IANA charsets", 545 "!", 546 "C: ISO_8859-1:1987", 547 "de: ISO_8859-1:1987", 548 "en_US: ISO_8859-1:1987", 549 "es: ISO_8859-1:1987", 550 "fr: ISO_8859-1:1987", 551 "iso_8859_1: ISO_8859-1:1987", 552 "it: ISO_8859-1:1987", 553 "ja: Extended_UNIX_Code_Packed_Format_for_Japanese", 554 "ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese", 555 "japanese: Extended_UNIX_Code_Packed_Format_for_Japanese", 556 "ko: EUC-KR", 557 "sv: ISO_8859-1:1987", 558 "zh: GB2312", 559 "zh_TW: cns11643_1", 560 NULL 561 }; 562 #endif 563 564 #define BSZ 256 565 566 char * 567 GetCharsetFromLocale(char *locale) 568 { 569 char *tmpcharset = NULL; 570 char buf[BSZ]; 571 char *p; 572 const char *line; 573 int i=0; 574 575 line = CHARCONVTABLE[i]; 576 while (line != NULL) 577 { 578 if (*line == 0) 579 { 580 break; 581 } 582 583 strcpy(buf, line); 584 line = CHARCONVTABLE[++i]; 585 586 if (strlen(buf) == 0 || buf[0] == '!') 587 { 588 continue; 589 } 590 p = strchr(buf, ':'); 591 if (p == NULL) 592 { 593 tmpcharset = NULL; 594 break; 595 } 596 *p = 0; 597 if (strcmp(buf, locale) == 0) { 598 while (*++p == ' ' || *p == '\t') 599 ; 600 if (isalpha(*p)) { 601 tmpcharset = strdup(p); 602 } else 603 tmpcharset = NULL; 604 605 break; 606 } 607 } 608 return tmpcharset; 609 } 610 611 #endif /* Not defined XP_WIN32 */ 612 613 #ifdef XP_WIN32 614 char *_convertor(const char *instr, int bFromUTF8) 615 { 616 char *outstr = NULL; 617 int inlen, wclen, outlen; 618 LPWSTR wcstr; 619 620 if (instr == NULL) 621 return NULL; 622 623 if ((inlen = strlen(instr)) <= 0) 624 return NULL; 625 626 /* output never becomes longer than input, 627 * thus we don't have to ask for the length 628 */ 629 wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) ); 630 if (!wcstr) 631 return NULL; 632 633 wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr, 634 inlen, wcstr, inlen); 635 outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr, 636 wclen, NULL, 0, NULL, NULL); 637 638 if (outlen > 0) { 639 outstr = (char *) malloc(outlen + 2); 640 outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr, 641 wclen, outstr, outlen, NULL, NULL); 642 if (outlen > 0) 643 *(outstr+outlen) = _T('\0'); 644 else 645 return NULL; 646 } 647 free( wcstr ); 648 return outstr; 649 } 650 #endif 651 652 char * 653 ldaptool_local2UTF8( const char *src ) 654 { 655 char *utf8; 656 #ifndef XP_WIN32 657 char *locale, *newcharset; 658 size_t outLen, resultLen; 659 UErrorCode err = U_ZERO_ERROR; 660 UConverter *cnv; 661 662 if (src == NULL) 663 { 664 return NULL; 665 } 666 else if (*src == 0 || (ldaptool_charset == NULL) 667 || (!strcmp( ldaptool_charset, "" ))) 668 { 669 /* no option specified, so assume it's already in utf-8 */ 670 utf8 = strdup(src); 671 return utf8; 672 } 673 674 if( !strcmp( ldaptool_charset, "0" ) 675 && (!charsetset) ) 676 { 677 /* zero option specified, so try to get default codepage 678 this sucker is strdup'd immediately so it's OK to cast */ 679 newcharset = (char *)ucnv_getDefaultName(); 680 if (newcharset != NULL) { 681 free( ldaptool_charset ); 682 /* the default codepage lives in ICU */ 683 ldaptool_charset = strdup(newcharset); 684 if (ldaptool_charset == NULL) { 685 return strdup(src); 686 } 687 } 688 charsetset = 1; 689 } 690 else 691 if( strcmp( ldaptool_charset, "" ) && (!charsetset) ) 692 { 693 /* -i option specified with charset name */ 694 charsetset = 1; 695 } 696 697 /* do the preflight - get the size needed for the target buffer */ 698 outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src, 699 strlen( src ) * sizeof(char), &err); 700 701 if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) { 702 /* default to just a copy of the string - this covers 703 the case of an illegal charset also */ 704 return strdup(src); 705 } 706 707 utf8 = (char *) malloc( outLen + 1); 708 if( utf8 == NULL ) { 709 /* if we're already out of memory, does strdup just return NULL? */ 710 return strdup(src); 711 } 712 713 /* do the actual conversion this time */ 714 err = U_ZERO_ERROR; 715 resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src, 716 strlen(src) * sizeof(char), &err ); 717 718 if (!U_SUCCESS(err)) { 719 free(utf8); 720 return strdup(src); 721 } 722 723 #else 724 utf8 = _convertor(src, FALSE); 725 if( utf8 == NULL ) 726 utf8 = strdup(src); 727 #endif 728 729 return utf8; 730 } 731 #endif /* HAVE_LIBICU */ 732 733 #ifndef HAVE_LIBICU 734 #ifdef __cplusplus 735 } 736 #endif 737 #endif 738