xref: /titanic_51/usr/src/cmd/ldap/common/convutf8.c (revision c10c16dec587a0662068f6e2991c29ed3a9db943)
1 /*
2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * The contents of this file are subject to the Netscape Public
8  * License Version 1.1 (the "License"); you may not use this file
9  * except in compliance with the License. You may obtain a copy of
10  * the License at http://www.mozilla.org/NPL/
11  *
12  * Software distributed under the License is distributed on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
14  * implied. See the License for the specific language governing
15  * rights and limitations under the License.
16  *
17  * The Original Code is Mozilla Communicator client code, released
18  * March 31, 1998.
19  *
20  * The Initial Developer of the Original Code is Netscape
21  * Communications Corporation. Portions created by Netscape are
22  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
23  * Rights Reserved.
24  *
25  * Contributor(s):
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <locale.h>
32 #include <ctype.h>
33 
34 #ifndef HAVE_LIBICU
35 
36 #ifdef SOLARIS_LDAP_CMD
37 #include <errno.h>
38 #include <langinfo.h>
39 #include <iconv.h>
40 #endif
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
46 extern char	*ldaptool_charset;
47 char		*ldaptool_convdir = NULL;
48 static		int charsetset = 0;
49 char		*ldaptool_local2UTF8( const char *src );
50 
51 #ifdef SOLARIS_LDAP_CMD
52 static char 	*ldaptool_convert( const char *src, const char *fcode,
53 				const char *tcode);
54 char		*ldaptool_UTF82local( const char *src );
55 #endif	/* SOLARIS_LDAP_CMD */
56 
57 #ifdef SOLARIS_LDAP_CMD
58 /*
59  * ICU version always returns string, unless strdup fails.
60  * As in ICU version, in case of error strdup(src)
61  * Usually strdup(src) will be ASCII and legal anyways.
62  */
63 
64 static char *
65 ldaptool_convert( const char *src, const char *fcode,
66 				 const char *tcode) {
67     char	*dest, *tptr, *tmp;
68     const char	*fptr;
69     iconv_t	cd;
70     size_t	ileft, oleft, ret, size;
71 
72     if (src == NULL)
73 	return (NULL);
74 
75     if (fcode == NULL || tcode == NULL)
76 	return (strdup(src));
77 
78     if (strcasecmp(fcode, tcode) == 0)
79 	return (strdup(src));
80 
81     if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
82 	/* conversion table not available */
83 	return (strdup(src));
84     }
85 
86     ileft = strlen(src);
87     oleft = 2 * ileft;
88     size = oleft;
89     ret = -1;
90     if ((dest = (char *)malloc(size)) == NULL) {
91 	(void) iconv_close(cd);
92 	/* maybe sizeof strlen(src) memory still exists */
93 	return (strdup(src));
94     }
95     tptr = dest;
96     fptr = src;
97 
98     for (;;) {
99 	ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
100 
101 	if (ret != (size_t)-1) {
102 		/*
103 		 * Success. Place 'cd' into its initial shift
104 		 * state before returning.
105 		 */
106 		if (fptr == NULL) /* already in initial state  */
107 			break;
108 		fptr = NULL;
109 		ileft = 0;
110 		continue;
111 	} if (errno == E2BIG) {
112 		/*
113 		 * Lack of space in output buffer.
114 		 * Hence double the size and retry.
115 		 * But before calling  iconv(), oleft
116 		 * and tptr have to re-adjusted, so that
117 		 * iconv() doesn't overwrite the data
118 		 * which has already been converted.
119 		 */
120 		oleft += size;
121 		size *= 2;
122 		if ((tmp = (char *) realloc(dest, size)) == NULL)
123 			break;
124 		tptr = tmp + (tptr - dest);
125 		dest = tmp;
126 		continue;
127 	} else {
128 		/* Other errors */
129 		break;
130 	}
131     }
132 
133     if (dest != NULL) {
134 	if (ret == -1) {
135     		/* Free malloc'ed memory on failure */
136 		free(dest);
137 		dest = NULL;
138 	} else if (oleft > 0) {
139 		/* NULL terminate the return value */
140 		*(dest + (size - oleft)) = '\0';
141 	} else {
142 		/* realloc one more byte and NULL terminate */
143 		if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
144 			free(dest);
145 			dest = NULL;
146 		} else {
147 			*(dest + size) = '\0';
148 		}
149 	}
150     }
151 
152     (void) iconv_close(cd);
153     if (dest == NULL) {
154 	/* last chance in case some other failure along the way occurs */
155 	return (strdup(src));
156     }
157     return (dest);
158 }
159 
160 char *
161 ldaptool_UTF82local( const char *src )
162 {
163     char *to_code;
164     if ((to_code = nl_langinfo(CODESET)) == NULL)
165 	return (strdup(src));
166     return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
167 }
168 #endif	/* SOLARIS_LDAP_CMD */
169 
170 char *
171 ldaptool_local2UTF8( const char *src )
172 {
173 #ifdef SOLARIS_LDAP_CMD
174     char *from_code;
175     if ((from_code = nl_langinfo(CODESET)) == NULL)
176 	return (strdup(src));
177     return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
178 #else
179     char *utf8;
180     charsetset = 0;
181     if (src == NULL)
182     {
183 	return NULL;
184     }
185     utf8 = strdup(src);
186     return ( utf8 );
187 #endif	/* SOLARIS_LDAP_CMD */
188 }
189 
190 #else /* HAVE_LIBICU */
191 
192 #include "unicode/utypes.h"
193 #include "unicode/ucnv.h"
194 
195 #define NSPR20
196 
197 #ifdef XP_WIN32
198 #define  VC_EXTRALEAN
199 #include <afxwin.h>
200 #include <winnls.h>
201 #endif
202 
203 extern char *ldaptool_charset;
204 static int charsetset = 0;
205 
206 extern "C" {
207 char *ldaptool_convdir = NULL;
208 char *ldaptool_local2UTF8( const char * );
209 }
210 
211 #ifndef XP_WIN32
212 char * GetNormalizedLocaleName(void);
213 
214 
215 char *
216 GetNormalizedLocaleName(void)
217 {
218 #ifdef _HPUX_SOURCE
219 
220     int    len;
221     char    *locale;
222 
223     locale = setlocale(LC_CTYPE, "");
224     if (locale && *locale) {
225         len = strlen(locale);
226     } else {
227         locale = "C";
228         len = 1;
229     }
230 
231     if ((!strncmp(locale, "/\x03:", 3)) &&
232         (!strcmp(&locale[len - 2], ";/"))) {
233         locale += 3;
234         len -= 5;
235     }
236 
237     locale = strdup(locale);
238     if (locale) {
239         locale[len] = 0;
240     }
241 
242     return locale;
243 
244 #else
245 
246     char    *locale;
247 
248     locale = setlocale(LC_CTYPE, "");
249     if (locale && *locale) {
250         return strdup(locale);
251     }
252 
253     return strdup("C");
254 
255 #endif
256 }
257 
258 #if defined(IRIX)
259 const char *CHARCONVTABLE[] =
260 {
261 "! This table maps the host's locale names to IANA charsets",
262 "!",
263 "C:             ISO_8859-1:1987",
264 "cs:            ISO_8859-2:1987",
265 "da:            ISO_8859-1:1987",
266 "de:            ISO_8859-1:1987",
267 "de_AT:         ISO_8859-1:1987",
268 "de_CH:         ISO_8859-1:1987",
269 "en:            ISO_8859-1:1987",
270 "en_AU:         ISO_8859-1:1987",
271 "en_CA:         ISO_8859-1:1987",
272 "en_TH:         ISO_8859-1:1987",
273 "en_US:         ISO_8859-1:1987",
274 "es:            ISO_8859-1:1987",
275 "fi:            ISO_8859-1:1987",
276 "fr:            ISO_8859-1:1987",
277 "fr_BE:         ISO_8859-1:1987",
278 "fr_CA:         ISO_8859-1:1987",
279 "fr_CH:         ISO_8859-1:1987",
280 "is:            ISO_8859-1:1987",
281 "it:            ISO_8859-1:1987",
282 "it_CH:         ISO_8859-1:1987",
283 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
284 "ko_KR.euc:     EUC-KR",
285 "nl:            ISO_8859-1:1987",
286 "nl_BE:         ISO_8859-1:1987",
287 "no:            ISO_8859-1:1987",
288 "pl:            ISO_8859-2:1987",
289 "pt:            ISO_8859-1:1987",
290 "sh:            ISO_8859-2:1987",
291 "sk:            ISO_8859-2:1987",
292 "sv:            ISO_8859-1:1987",
293 "zh_CN.ugb:     GB2312",
294 "zh_TW.ucns:    cns11643_1",
295 NULL
296 };
297 #elif defined(SOLARIS)
298 const char *CHARCONVTABLE[] =
299 {
300 "! This table maps the host's locale names to IANA charsets",
301 "!",
302 "C:             ISO_8859-1:1987",
303 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
304 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
305 "ja_JP.PCK:     Shift_JIS",
306 "en:		ISO_8859-1:1987",
307 "en_AU:		ISO_8859-1:1987",
308 "en_CA:		ISO_8859-1:1987",
309 "en_UK:		ISO_8859-1:1987",
310 "en_US:		ISO_8859-1:1987",
311 "es:		ISO_8859-1:1987",
312 "es_AR:		ISO_8859-1:1987",
313 "es_BO:		ISO_8859-1:1987",
314 "es_CL:		ISO_8859-1:1987",
315 "es_CO:		ISO_8859-1:1987",
316 "es_CR:		ISO_8859-1:1987",
317 "es_EC:		ISO_8859-1:1987",
318 "es_GT:		ISO_8859-1:1987",
319 "es_MX:		ISO_8859-1:1987",
320 "es_NI:		ISO_8859-1:1987",
321 "es_PA:		ISO_8859-1:1987",
322 "es_PE:		ISO_8859-1:1987",
323 "es_PY:		ISO_8859-1:1987",
324 "es_SV:		ISO_8859-1:1987",
325 "es_UY:		ISO_8859-1:1987",
326 "es_VE:		ISO_8859-1:1987",
327 "fr:		ISO_8859-1:1987",
328 "fr_BE:		ISO_8859-1:1987",
329 "fr_CA:		ISO_8859-1:1987",
330 "fr_CH:		ISO_8859-1:1987",
331 "de:		ISO_8859-1:1987",
332 "de_AT:		ISO_8859-1:1987",
333 "de_CH:		ISO_8859-1:1987",
334 "nl:		ISO_8859-1:1987",
335 "nl_BE:		ISO_8859-1:1987",
336 "it:		ISO_8859-1:1987",
337 "sv:		ISO_8859-1:1987",
338 "no:		ISO_8859-1:1987",
339 "da:		ISO_8859-1:1987",
340 "iso_8859_1:    ISO_8859-1:1987",
341 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
342 "ko:            EUC-KR",
343 "zh:            GB2312",
344 "zh_TW:         cns11643_1",
345 NULL
346 };
347 #elif defined(OSF1)
348 const char *CHARCONVTABLE[] =
349 {
350 "! This table maps the host's locale names to IANA charsets",
351 "!",
352 "C:                     ISO_8859-1:1987",
353 "cs_CZ.ISO8859-2:       ISO_8859-2:1987",
354 "cs_CZ:                 ISO_8859-2:1987",
355 "da_DK.ISO8859-1:       ISO_8859-1:1987",
356 "de_CH.ISO8859-1:       ISO_8859-1:1987",
357 "de_DE.ISO8859-1:       ISO_8859-1:1987",
358 "en_GB.ISO8859-1:       ISO_8859-1:1987",
359 "en_US.ISO8859-1:       ISO_8859-1:1987",
360 "es_ES.ISO8859-1:       ISO_8859-1:1987",
361 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
362 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
363 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
364 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
365 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
366 "hu_HU.ISO8859-2:       ISO_8859-2:1987",
367 "hu_HU:                 ISO_8859-2:1987",
368 "is_IS.ISO8859-1:       ISO_8859-1:1987",
369 "it_IT.ISO8859-1:       ISO_8859-1:1987",
370 "ja_JP.SJIS:            Shift_JIS",
371 "ja_JP.eucJP:           Extended_UNIX_Code_Packed_Format_for_Japanese",
372 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
373 "ko_KR.eucKR:           EUC-KR",
374 "ko_KR:                 EUC-KR",
375 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
376 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
377 "no_NO.ISO8859-1:       ISO_8859-1:1987",
378 "pl_PL.ISO8859-2:       ISO_8859-2:1987",
379 "pl_PL:                 ISO_8859-2:1987",
380 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
381 "sk_SK.ISO8859-2:       ISO_8859-2:1987",
382 "sk_SK:                 ISO_8859-2:1987",
383 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
384 "zh_CN:                 GB2312",
385 "zh_HK.big5:            Big5",
386 "zh_HK.eucTW:           cns11643_1",
387 "zh_TW.big5:            Big5",
388 "zh_TW.big5@chuyin:     Big5",
389 "zh_TW.big5@radical:    Big5",
390 "zh_TW.big5@stroke:     Big5",
391 "zh_TW.eucTW:           cns11643_1",
392 "zh_TW.eucTW@chuyin:    cns11643_1",
393 "zh_TW.eucTW@radical:   cns11643_1",
394 "zh_TW.eucTW@stroke:    cns11643_1",
395 "zh_TW:                 cns11643_1",
396 NULL
397 };
398 #elif defined(HPUX)
399 const char *CHARCONVTABLE[] =
400 {
401 "! This table maps the host's locale names to IANA charsets",
402 "!",
403 "C:			ISO_8859-1:1987",
404 "ja_JP:			Extended_UNIX_Code_Packed_Format_for_Japanese",
405 "ja_JP.SJIS:		Shift_JIS",
406 "ja_JP.eucJP:		Extended_UNIX_Code_Packed_Format_for_Japanese",
407 "es_ES:			ISO_8859-1:1987",
408 "es_ES.iso88591:	ISO_8859-1:1987",
409 "sv_SE:			ISO_8859-1:1987",
410 "sv_SE.iso88591:	ISO_8859-1:1987",
411 "da_DK:			ISO_8859-1:1987",
412 "da_DK.iso88591:	ISO_8859-1:1987",
413 "nl_NL:			ISO_8859-1:1987",
414 "nl_NL.iso88591:	ISO_8859-1:1987",
415 "en:			ISO_8859-1:1987",
416 "en_GB:			ISO_8859-1:1987",
417 "en_GB.iso88591:	ISO_8859-1:1987",
418 "en_US:			ISO_8859-1:1987",
419 "en_US.iso88591:	ISO_8859-1:1987",
420 "fi_FI:			ISO_8859-1:1987",
421 "fi_FI.iso88591:	ISO_8859-1:1987",
422 "fr_CA:			ISO_8859-1:1987",
423 "fr_CA.iso88591:	ISO_8859-1:1987",
424 "fr_FR:			ISO_8859-1:1987",
425 "fr_FR.iso88591:	ISO_8859-1:1987",
426 "de_DE:			ISO_8859-1:1987",
427 "de_DE.iso88591:	ISO_8859-1:1987",
428 "is_IS:			ISO_8859-1:1987",
429 "is_IS.iso88591:	ISO_8859-1:1987",
430 "it_IT:			ISO_8859-1:1987",
431 "it_IT.iso88591:	ISO_8859-1:1987",
432 "no_NO:			ISO_8859-1:1987",
433 "no_NO.iso88591:	ISO_8859-1:1987",
434 "pt_PT:			ISO_8859-1:1987",
435 "pt_PT.iso88591:	ISO_8859-1:1987",
436 "hu_HU:			ISO_8859-2:1987",
437 "hu_HU.iso88592:	ISO_8859-2:1987",
438 "cs_CZ:			ISO_8859-2:1987",
439 "cs_CZ.iso88592:	ISO_8859-2:1987",
440 "pl_PL:			ISO_8859-2:1987",
441 "pl_PL.iso88592:	ISO_8859-2:1987",
442 "ro_RO:			ISO_8859-2:1987",
443 "ro_RO.iso88592:	ISO_8859-2:1987",
444 "hr_HR:			ISO_8859-2:1987",
445 "hr_HR.iso88592:	ISO_8859-2:1987",
446 "sk_SK:			ISO_8859-2:1987",
447 "sk_SK.iso88592:	ISO_8859-2:1987",
448 "sl_SI:			ISO_8859-2:1987",
449 "sl_SI.iso88592:	ISO_8859-2:1987",
450 "american.iso88591:     ISO_8859-1:1987",
451 "bulgarian:             ISO_8859-2:1987",
452 "c-french.iso88591:     ISO_8859-1:1987",
453 "chinese-s:             GB2312",
454 "chinese-t.big5:                Big5",
455 "czech:                 ISO_8859-2:1987",
456 "danish.iso88591:       ISO_8859-1:1987",
457 "dutch.iso88591:                ISO_8859-1:1987",
458 "english.iso88591:      ISO_8859-1:1987",
459 "finnish.iso88591:      ISO_8859-1:1987",
460 "french.iso88591:       ISO_8859-1:1987",
461 "german.iso88591:       ISO_8859-1:1987",
462 "hungarian:             ISO_8859-2:1987",
463 "icelandic.iso88591:    ISO_8859-1:1987",
464 "italian.iso88591:      ISO_8859-1:1987",
465 "japanese.euc:          Extended_UNIX_Code_Packed_Format_for_Japanese",
466 "japanese:              Shift_JIS",
467 "katakana:              Shift_JIS",
468 "korean:                        EUC-KR",
469 "norwegian.iso88591:    ISO_8859-1:1987",
470 "polish:                        ISO_8859-2:1987",
471 "portuguese.iso88591:   ISO_8859-1:1987",
472 "rumanian:              ISO_8859-2:1987",
473 "serbocroatian:         ISO_8859-2:1987",
474 "slovene:               ISO_8859-2:1987",
475 "spanish.iso88591:      ISO_8859-1:1987",
476 "swedish.iso88591:      ISO_8859-1:1987",
477 NULL
478 };
479 #elif defined(AIX)
480 const char *CHARCONVTABLE[] =
481 {
482 "! This table maps the host's locale names to IANA charsets",
483 "!",
484 "C:                     ISO_8859-1:1987",
485 "En_JP.IBM-932:         Shift_JIS",
486 "En_JP:                 Shift_JIS",
487 "Ja_JP.IBM-932:         Shift_JIS",
488 "Ja_JP:                 Shift_JIS",
489 "da_DK.ISO8859-1:       ISO_8859-1:1987",
490 "da_DK:                 ISO_8859-1:1987",
491 "de_CH.ISO8859-1:       ISO_8859-1:1987",
492 "de_CH:                 ISO_8859-1:1987",
493 "de_DE.ISO8859-1:       ISO_8859-1:1987",
494 "de_DE:                 ISO_8859-1:1987",
495 "en_GB.ISO8859-1:       ISO_8859-1:1987",
496 "en_GB:                 ISO_8859-1:1987",
497 "en_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
498 "en_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
499 "en_KR.IBM-eucKR:       EUC-KR",
500 "en_KR:                 EUC-KR",
501 "en_TW.IBM-eucTW:       cns11643_1",
502 "en_TW:                 cns11643_1",
503 "en_US.ISO8859-1:       ISO_8859-1:1987",
504 "en_US:                 ISO_8859-1:1987",
505 "es_ES.ISO8859-1:       ISO_8859-1:1987",
506 "es_ES:                 ISO_8859-1:1987",
507 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
508 "fi_FI:                 ISO_8859-1:1987",
509 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
510 "fr_BE:                 ISO_8859-1:1987",
511 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
512 "fr_CA:                 ISO_8859-1:1987",
513 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
514 "fr_CH:                 ISO_8859-1:1987",
515 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
516 "fr_FR:                 ISO_8859-1:1987",
517 "is_IS.ISO8859-1:       ISO_8859-1:1987",
518 "is_IS:                 ISO_8859-1:1987",
519 "it_IT.ISO8859-1:       ISO_8859-1:1987",
520 "it_IT:                 ISO_8859-1:1987",
521 "ja_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
522 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
523 "ko_KR.IBM-eucKR:       EUC-KR",
524 "ko_KR:                 EUC-KR",
525 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
526 "nl_BE:                 ISO_8859-1:1987",
527 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
528 "nl_NL:                 ISO_8859-1:1987",
529 "no_NO.ISO8859-1:       ISO_8859-1:1987",
530 "no_NO:                 ISO_8859-1:1987",
531 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
532 "pt_PT:                 ISO_8859-1:1987",
533 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
534 "sv_SE:                 ISO_8859-1:1987",
535 "zh_TW.IBM-eucTW:       cns11643_1",
536 "zh_TW:                 cns11643_1",
537 NULL
538 };
539 #else   // sunos by default
540 const char *CHARCONVTABLE[] =
541 {
542 "! This table maps the host's locale names to IANA charsets",
543 "!",
544 "C:             ISO_8859-1:1987",
545 "de:            ISO_8859-1:1987",
546 "en_US:         ISO_8859-1:1987",
547 "es:            ISO_8859-1:1987",
548 "fr:            ISO_8859-1:1987",
549 "iso_8859_1:    ISO_8859-1:1987",
550 "it:            ISO_8859-1:1987",
551 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
552 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
553 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
554 "ko:            EUC-KR",
555 "sv:            ISO_8859-1:1987",
556 "zh:            GB2312",
557 "zh_TW:         cns11643_1",
558 NULL
559 };
560 #endif
561 
562 #define BSZ     256
563 
564 char *
565 GetCharsetFromLocale(char *locale)
566 {
567     char *tmpcharset = NULL;
568     char buf[BSZ];
569     char *p;
570     const char *line;
571     int i=0;
572 
573     line = CHARCONVTABLE[i];
574     while (line != NULL)
575     {
576        if (*line == 0)
577        {
578           break;
579        }
580 
581        strcpy(buf, line);
582        line = CHARCONVTABLE[++i];
583 
584        if (strlen(buf) == 0 || buf[0] == '!')
585        {
586           continue;
587        }
588        p = strchr(buf, ':');
589        if (p == NULL)
590        {
591           tmpcharset = NULL;
592           break;
593        }
594        *p = 0;
595        if (strcmp(buf, locale) == 0) {
596           while (*++p == ' ' || *p == '\t')
597              ;
598           if (isalpha(*p)) {
599              tmpcharset = strdup(p);
600           } else
601              tmpcharset = NULL;
602 
603           break;
604        }
605     }
606     return tmpcharset;
607 }
608 
609 #endif /* Not defined XP_WIN32 */
610 
611 #ifdef XP_WIN32
612 char *_convertor(const char *instr, int bFromUTF8)
613 {
614     char  *outstr = NULL;
615     int    inlen, wclen, outlen;
616     LPWSTR wcstr;
617 
618     if (instr == NULL)
619             return NULL;
620 
621     if ((inlen = strlen(instr)) <= 0)
622             return NULL;
623 
624     /* output never becomes longer than input,
625      * thus we don't have to ask for the length
626      */
627     wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
628     if (!wcstr)
629         return NULL;
630 
631     wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
632                                  inlen, wcstr, inlen);
633     outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
634                                   wclen, NULL, 0, NULL, NULL);
635 
636     if (outlen > 0) {
637         outstr = (char *) malloc(outlen + 2);
638         outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
639                                       wclen, outstr, outlen, NULL, NULL);
640         if (outlen > 0)
641             *(outstr+outlen) = _T('\0');
642         else
643             return NULL;
644     }
645     free( wcstr );
646     return outstr;
647 }
648 #endif
649 
650 char *
651 ldaptool_local2UTF8( const char *src )
652 {
653     char *utf8;
654 #ifndef XP_WIN32
655     char *locale, *newcharset;
656     size_t outLen, resultLen;
657     UErrorCode err = U_ZERO_ERROR;
658     UConverter *cnv;
659 
660     if (src == NULL)
661     {
662       return NULL;
663     }
664     else if (*src == 0 || (ldaptool_charset == NULL)
665 	     || (!strcmp( ldaptool_charset, "" )))
666     {
667 	/* no option specified, so assume it's already in utf-8 */
668         utf8 = strdup(src);
669         return utf8;
670     }
671 
672     if( !strcmp( ldaptool_charset, "0" )
673 	    && (!charsetset) )
674     {
675 	/* zero option specified, so try to get default codepage
676 	   this sucker is strdup'd immediately so it's OK to cast */
677 	newcharset = (char *)ucnv_getDefaultName();
678 	if (newcharset != NULL) {
679 	    free( ldaptool_charset );
680 	    /* the default codepage lives in ICU */
681 	    ldaptool_charset = strdup(newcharset);
682 	    if (ldaptool_charset == NULL) {
683 		return strdup(src);
684 	    }
685 	}
686 	charsetset = 1;
687     }
688     else
689     if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
690     {
691 	/* -i option specified with charset name */
692         charsetset = 1;
693     }
694 
695     /* do the preflight - get the size needed for the target buffer */
696     outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
697                                       strlen( src ) * sizeof(char), &err);
698 
699     if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
700       /* default to just a copy of the string - this covers
701          the case of an illegal charset also */
702       return strdup(src);
703     }
704 
705     utf8 =  (char *) malloc( outLen + 1);
706     if( utf8 == NULL ) {
707       /* if we're already out of memory, does strdup just return NULL? */
708        return strdup(src);
709     }
710 
711     /* do the actual conversion this time */
712     err = U_ZERO_ERROR;
713     resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
714 		       strlen(src) * sizeof(char), &err );
715 
716     if (!U_SUCCESS(err)) {
717       free(utf8);
718       return strdup(src);
719     }
720 
721 #else
722     utf8 = _convertor(src, FALSE);
723     if( utf8 == NULL )
724         utf8 = strdup(src);
725 #endif
726 
727     return utf8;
728 }
729 #endif /* HAVE_LIBICU */
730 
731 #ifndef HAVE_LIBICU
732 #ifdef __cplusplus
733 }
734 #endif
735 #endif
736