xref: /titanic_41/usr/src/cmd/ldap/common/convutf8.c (revision 25cf1a301a396c38e8adf52c15f537b80d2483f7)
1 /*
2  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 #pragma ident	"%Z%%M%	%I%	%E% SMI"
7 
8 /*
9  * The contents of this file are subject to the Netscape Public
10  * License Version 1.1 (the "License"); you may not use this file
11  * except in compliance with the License. You may obtain a copy of
12  * the License at http://www.mozilla.org/NPL/
13  *
14  * Software distributed under the License is distributed on an "AS
15  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
16  * implied. See the License for the specific language governing
17  * rights and limitations under the License.
18  *
19  * The Original Code is Mozilla Communicator client code, released
20  * March 31, 1998.
21  *
22  * The Initial Developer of the Original Code is Netscape
23  * Communications Corporation. Portions created by Netscape are
24  * Copyright (C) 1998-1999 Netscape Communications Corporation. All
25  * Rights Reserved.
26  *
27  * Contributor(s):
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <locale.h>
34 #include <ctype.h>
35 
36 #ifndef HAVE_LIBICU
37 
38 #ifdef SOLARIS_LDAP_CMD
39 #include <errno.h>
40 #include <langinfo.h>
41 #include <iconv.h>
42 #endif
43 
44 #ifdef __cplusplus
45 extern "C" {
46 #endif
47 
48 extern char	*ldaptool_charset;
49 char		*ldaptool_convdir = NULL;
50 static		int charsetset = 0;
51 char		*ldaptool_local2UTF8( const char *src );
52 
53 #ifdef SOLARIS_LDAP_CMD
54 static char 	*ldaptool_convert( const char *src, const char *fcode,
55 				const char *tcode);
56 char		*ldaptool_UTF82local( const char *src );
57 #endif	/* SOLARIS_LDAP_CMD */
58 
59 #ifdef SOLARIS_LDAP_CMD
60 /*
61  * ICU version always returns string, unless strdup fails.
62  * As in ICU version, in case of error strdup(src)
63  * Usually strdup(src) will be ASCII and legal anyways.
64  */
65 
66 static char *
67 ldaptool_convert( const char *src, const char *fcode,
68 				 const char *tcode) {
69     char	*dest, *tptr, *tmp;
70     const char	*fptr;
71     iconv_t	cd;
72     size_t	ileft, oleft, ret, size;
73 
74     if (src == NULL)
75 	return (NULL);
76 
77     if (fcode == NULL || tcode == NULL)
78 	return (strdup(src));
79 
80     if (strcasecmp(fcode, tcode) == 0)
81 	return (strdup(src));
82 
83     if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
84 	/* conversion table not available */
85 	return (strdup(src));
86     }
87 
88     ileft = strlen(src);
89     oleft = 2 * ileft;
90     size = oleft;
91     ret = -1;
92     if ((dest = (char *)malloc(size)) == NULL) {
93 	(void) iconv_close(cd);
94 	/* maybe sizeof strlen(src) memory still exists */
95 	return (strdup(src));
96     }
97     tptr = dest;
98     fptr = src;
99 
100     for (;;) {
101 	ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
102 
103 	if (ret != (size_t)-1) {
104 		/*
105 		 * Success. Place 'cd' into its initial shift
106 		 * state before returning.
107 		 */
108 		if (fptr == NULL) /* already in initial state  */
109 			break;
110 		fptr = NULL;
111 		ileft = 0;
112 		continue;
113 	} if (errno == E2BIG) {
114 		/*
115 		 * Lack of space in output buffer.
116 		 * Hence double the size and retry.
117 		 * But before calling  iconv(), oleft
118 		 * and tptr have to re-adjusted, so that
119 		 * iconv() doesn't overwrite the data
120 		 * which has already been converted.
121 		 */
122 		oleft += size;
123 		size *= 2;
124 		if ((tmp = (char *) realloc(dest, size)) == NULL)
125 			break;
126 		tptr = tmp + (tptr - dest);
127 		dest = tmp;
128 		continue;
129 	} else {
130 		/* Other errors */
131 		break;
132 	}
133     }
134 
135     if (dest != NULL) {
136 	if (ret == -1) {
137     		/* Free malloc'ed memory on failure */
138 		free(dest);
139 		dest = NULL;
140 	} else if (oleft > 0) {
141 		/* NULL terminate the return value */
142 		*(dest + (size - oleft)) = '\0';
143 	} else {
144 		/* realloc one more byte and NULL terminate */
145 		if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
146 			free(dest);
147 			dest = NULL;
148 		} else {
149 			*(dest + size) = '\0';
150 		}
151 	}
152     }
153 
154     (void) iconv_close(cd);
155     if (dest == NULL) {
156 	/* last chance in case some other failure along the way occurs */
157 	return (strdup(src));
158     }
159     return (dest);
160 }
161 
162 char *
163 ldaptool_UTF82local( const char *src )
164 {
165     char *to_code;
166     if ((to_code = nl_langinfo(CODESET)) == NULL)
167 	return (strdup(src));
168     return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
169 }
170 #endif	/* SOLARIS_LDAP_CMD */
171 
172 char *
173 ldaptool_local2UTF8( const char *src )
174 {
175 #ifdef SOLARIS_LDAP_CMD
176     char *from_code;
177     if ((from_code = nl_langinfo(CODESET)) == NULL)
178 	return (strdup(src));
179     return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
180 #else
181     char *utf8;
182     charsetset = 0;
183     if (src == NULL)
184     {
185 	return NULL;
186     }
187     utf8 = strdup(src);
188     return ( utf8 );
189 #endif	/* SOLARIS_LDAP_CMD */
190 }
191 
192 #else /* HAVE_LIBICU */
193 
194 #include "unicode/utypes.h"
195 #include "unicode/ucnv.h"
196 
197 #define NSPR20
198 
199 #ifdef XP_WIN32
200 #define  VC_EXTRALEAN
201 #include <afxwin.h>
202 #include <winnls.h>
203 #endif
204 
205 extern char *ldaptool_charset;
206 static int charsetset = 0;
207 
208 extern "C" {
209 char *ldaptool_convdir = NULL;
210 char *ldaptool_local2UTF8( const char * );
211 }
212 
213 #ifndef XP_WIN32
214 char * GetNormalizedLocaleName(void);
215 
216 
217 char *
218 GetNormalizedLocaleName(void)
219 {
220 #ifdef _HPUX_SOURCE
221 
222     int    len;
223     char    *locale;
224 
225     locale = setlocale(LC_CTYPE, "");
226     if (locale && *locale) {
227         len = strlen(locale);
228     } else {
229         locale = "C";
230         len = 1;
231     }
232 
233     if ((!strncmp(locale, "/\x03:", 3)) &&
234         (!strcmp(&locale[len - 2], ";/"))) {
235         locale += 3;
236         len -= 5;
237     }
238 
239     locale = strdup(locale);
240     if (locale) {
241         locale[len] = 0;
242     }
243 
244     return locale;
245 
246 #else
247 
248     char    *locale;
249 
250     locale = setlocale(LC_CTYPE, "");
251     if (locale && *locale) {
252         return strdup(locale);
253     }
254 
255     return strdup("C");
256 
257 #endif
258 }
259 
260 #if defined(IRIX)
261 const char *CHARCONVTABLE[] =
262 {
263 "! This table maps the host's locale names to IANA charsets",
264 "!",
265 "C:             ISO_8859-1:1987",
266 "cs:            ISO_8859-2:1987",
267 "da:            ISO_8859-1:1987",
268 "de:            ISO_8859-1:1987",
269 "de_AT:         ISO_8859-1:1987",
270 "de_CH:         ISO_8859-1:1987",
271 "en:            ISO_8859-1:1987",
272 "en_AU:         ISO_8859-1:1987",
273 "en_CA:         ISO_8859-1:1987",
274 "en_TH:         ISO_8859-1:1987",
275 "en_US:         ISO_8859-1:1987",
276 "es:            ISO_8859-1:1987",
277 "fi:            ISO_8859-1:1987",
278 "fr:            ISO_8859-1:1987",
279 "fr_BE:         ISO_8859-1:1987",
280 "fr_CA:         ISO_8859-1:1987",
281 "fr_CH:         ISO_8859-1:1987",
282 "is:            ISO_8859-1:1987",
283 "it:            ISO_8859-1:1987",
284 "it_CH:         ISO_8859-1:1987",
285 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
286 "ko_KR.euc:     EUC-KR",
287 "nl:            ISO_8859-1:1987",
288 "nl_BE:         ISO_8859-1:1987",
289 "no:            ISO_8859-1:1987",
290 "pl:            ISO_8859-2:1987",
291 "pt:            ISO_8859-1:1987",
292 "sh:            ISO_8859-2:1987",
293 "sk:            ISO_8859-2:1987",
294 "sv:            ISO_8859-1:1987",
295 "zh_CN.ugb:     GB2312",
296 "zh_TW.ucns:    cns11643_1",
297 NULL
298 };
299 #elif defined(SOLARIS)
300 const char *CHARCONVTABLE[] =
301 {
302 "! This table maps the host's locale names to IANA charsets",
303 "!",
304 "C:             ISO_8859-1:1987",
305 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
306 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
307 "ja_JP.PCK:     Shift_JIS",
308 "en:		ISO_8859-1:1987",
309 "en_AU:		ISO_8859-1:1987",
310 "en_CA:		ISO_8859-1:1987",
311 "en_UK:		ISO_8859-1:1987",
312 "en_US:		ISO_8859-1:1987",
313 "es:		ISO_8859-1:1987",
314 "es_AR:		ISO_8859-1:1987",
315 "es_BO:		ISO_8859-1:1987",
316 "es_CL:		ISO_8859-1:1987",
317 "es_CO:		ISO_8859-1:1987",
318 "es_CR:		ISO_8859-1:1987",
319 "es_EC:		ISO_8859-1:1987",
320 "es_GT:		ISO_8859-1:1987",
321 "es_MX:		ISO_8859-1:1987",
322 "es_NI:		ISO_8859-1:1987",
323 "es_PA:		ISO_8859-1:1987",
324 "es_PE:		ISO_8859-1:1987",
325 "es_PY:		ISO_8859-1:1987",
326 "es_SV:		ISO_8859-1:1987",
327 "es_UY:		ISO_8859-1:1987",
328 "es_VE:		ISO_8859-1:1987",
329 "fr:		ISO_8859-1:1987",
330 "fr_BE:		ISO_8859-1:1987",
331 "fr_CA:		ISO_8859-1:1987",
332 "fr_CH:		ISO_8859-1:1987",
333 "de:		ISO_8859-1:1987",
334 "de_AT:		ISO_8859-1:1987",
335 "de_CH:		ISO_8859-1:1987",
336 "nl:		ISO_8859-1:1987",
337 "nl_BE:		ISO_8859-1:1987",
338 "it:		ISO_8859-1:1987",
339 "sv:		ISO_8859-1:1987",
340 "no:		ISO_8859-1:1987",
341 "da:		ISO_8859-1:1987",
342 "iso_8859_1:    ISO_8859-1:1987",
343 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
344 "ko:            EUC-KR",
345 "zh:            GB2312",
346 "zh_TW:         cns11643_1",
347 NULL
348 };
349 #elif defined(OSF1)
350 const char *CHARCONVTABLE[] =
351 {
352 "! This table maps the host's locale names to IANA charsets",
353 "!",
354 "C:                     ISO_8859-1:1987",
355 "cs_CZ.ISO8859-2:       ISO_8859-2:1987",
356 "cs_CZ:                 ISO_8859-2:1987",
357 "da_DK.ISO8859-1:       ISO_8859-1:1987",
358 "de_CH.ISO8859-1:       ISO_8859-1:1987",
359 "de_DE.ISO8859-1:       ISO_8859-1:1987",
360 "en_GB.ISO8859-1:       ISO_8859-1:1987",
361 "en_US.ISO8859-1:       ISO_8859-1:1987",
362 "es_ES.ISO8859-1:       ISO_8859-1:1987",
363 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
364 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
365 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
366 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
367 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
368 "hu_HU.ISO8859-2:       ISO_8859-2:1987",
369 "hu_HU:                 ISO_8859-2:1987",
370 "is_IS.ISO8859-1:       ISO_8859-1:1987",
371 "it_IT.ISO8859-1:       ISO_8859-1:1987",
372 "ja_JP.SJIS:            Shift_JIS",
373 "ja_JP.eucJP:           Extended_UNIX_Code_Packed_Format_for_Japanese",
374 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
375 "ko_KR.eucKR:           EUC-KR",
376 "ko_KR:                 EUC-KR",
377 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
378 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
379 "no_NO.ISO8859-1:       ISO_8859-1:1987",
380 "pl_PL.ISO8859-2:       ISO_8859-2:1987",
381 "pl_PL:                 ISO_8859-2:1987",
382 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
383 "sk_SK.ISO8859-2:       ISO_8859-2:1987",
384 "sk_SK:                 ISO_8859-2:1987",
385 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
386 "zh_CN:                 GB2312",
387 "zh_HK.big5:            Big5",
388 "zh_HK.eucTW:           cns11643_1",
389 "zh_TW.big5:            Big5",
390 "zh_TW.big5@chuyin:     Big5",
391 "zh_TW.big5@radical:    Big5",
392 "zh_TW.big5@stroke:     Big5",
393 "zh_TW.eucTW:           cns11643_1",
394 "zh_TW.eucTW@chuyin:    cns11643_1",
395 "zh_TW.eucTW@radical:   cns11643_1",
396 "zh_TW.eucTW@stroke:    cns11643_1",
397 "zh_TW:                 cns11643_1",
398 NULL
399 };
400 #elif defined(HPUX)
401 const char *CHARCONVTABLE[] =
402 {
403 "! This table maps the host's locale names to IANA charsets",
404 "!",
405 "C:			ISO_8859-1:1987",
406 "ja_JP:			Extended_UNIX_Code_Packed_Format_for_Japanese",
407 "ja_JP.SJIS:		Shift_JIS",
408 "ja_JP.eucJP:		Extended_UNIX_Code_Packed_Format_for_Japanese",
409 "es_ES:			ISO_8859-1:1987",
410 "es_ES.iso88591:	ISO_8859-1:1987",
411 "sv_SE:			ISO_8859-1:1987",
412 "sv_SE.iso88591:	ISO_8859-1:1987",
413 "da_DK:			ISO_8859-1:1987",
414 "da_DK.iso88591:	ISO_8859-1:1987",
415 "nl_NL:			ISO_8859-1:1987",
416 "nl_NL.iso88591:	ISO_8859-1:1987",
417 "en:			ISO_8859-1:1987",
418 "en_GB:			ISO_8859-1:1987",
419 "en_GB.iso88591:	ISO_8859-1:1987",
420 "en_US:			ISO_8859-1:1987",
421 "en_US.iso88591:	ISO_8859-1:1987",
422 "fi_FI:			ISO_8859-1:1987",
423 "fi_FI.iso88591:	ISO_8859-1:1987",
424 "fr_CA:			ISO_8859-1:1987",
425 "fr_CA.iso88591:	ISO_8859-1:1987",
426 "fr_FR:			ISO_8859-1:1987",
427 "fr_FR.iso88591:	ISO_8859-1:1987",
428 "de_DE:			ISO_8859-1:1987",
429 "de_DE.iso88591:	ISO_8859-1:1987",
430 "is_IS:			ISO_8859-1:1987",
431 "is_IS.iso88591:	ISO_8859-1:1987",
432 "it_IT:			ISO_8859-1:1987",
433 "it_IT.iso88591:	ISO_8859-1:1987",
434 "no_NO:			ISO_8859-1:1987",
435 "no_NO.iso88591:	ISO_8859-1:1987",
436 "pt_PT:			ISO_8859-1:1987",
437 "pt_PT.iso88591:	ISO_8859-1:1987",
438 "hu_HU:			ISO_8859-2:1987",
439 "hu_HU.iso88592:	ISO_8859-2:1987",
440 "cs_CZ:			ISO_8859-2:1987",
441 "cs_CZ.iso88592:	ISO_8859-2:1987",
442 "pl_PL:			ISO_8859-2:1987",
443 "pl_PL.iso88592:	ISO_8859-2:1987",
444 "ro_RO:			ISO_8859-2:1987",
445 "ro_RO.iso88592:	ISO_8859-2:1987",
446 "hr_HR:			ISO_8859-2:1987",
447 "hr_HR.iso88592:	ISO_8859-2:1987",
448 "sk_SK:			ISO_8859-2:1987",
449 "sk_SK.iso88592:	ISO_8859-2:1987",
450 "sl_SI:			ISO_8859-2:1987",
451 "sl_SI.iso88592:	ISO_8859-2:1987",
452 "american.iso88591:     ISO_8859-1:1987",
453 "bulgarian:             ISO_8859-2:1987",
454 "c-french.iso88591:     ISO_8859-1:1987",
455 "chinese-s:             GB2312",
456 "chinese-t.big5:                Big5",
457 "czech:                 ISO_8859-2:1987",
458 "danish.iso88591:       ISO_8859-1:1987",
459 "dutch.iso88591:                ISO_8859-1:1987",
460 "english.iso88591:      ISO_8859-1:1987",
461 "finnish.iso88591:      ISO_8859-1:1987",
462 "french.iso88591:       ISO_8859-1:1987",
463 "german.iso88591:       ISO_8859-1:1987",
464 "hungarian:             ISO_8859-2:1987",
465 "icelandic.iso88591:    ISO_8859-1:1987",
466 "italian.iso88591:      ISO_8859-1:1987",
467 "japanese.euc:          Extended_UNIX_Code_Packed_Format_for_Japanese",
468 "japanese:              Shift_JIS",
469 "katakana:              Shift_JIS",
470 "korean:                        EUC-KR",
471 "norwegian.iso88591:    ISO_8859-1:1987",
472 "polish:                        ISO_8859-2:1987",
473 "portuguese.iso88591:   ISO_8859-1:1987",
474 "rumanian:              ISO_8859-2:1987",
475 "serbocroatian:         ISO_8859-2:1987",
476 "slovene:               ISO_8859-2:1987",
477 "spanish.iso88591:      ISO_8859-1:1987",
478 "swedish.iso88591:      ISO_8859-1:1987",
479 NULL
480 };
481 #elif defined(AIX)
482 const char *CHARCONVTABLE[] =
483 {
484 "! This table maps the host's locale names to IANA charsets",
485 "!",
486 "C:                     ISO_8859-1:1987",
487 "En_JP.IBM-932:         Shift_JIS",
488 "En_JP:                 Shift_JIS",
489 "Ja_JP.IBM-932:         Shift_JIS",
490 "Ja_JP:                 Shift_JIS",
491 "da_DK.ISO8859-1:       ISO_8859-1:1987",
492 "da_DK:                 ISO_8859-1:1987",
493 "de_CH.ISO8859-1:       ISO_8859-1:1987",
494 "de_CH:                 ISO_8859-1:1987",
495 "de_DE.ISO8859-1:       ISO_8859-1:1987",
496 "de_DE:                 ISO_8859-1:1987",
497 "en_GB.ISO8859-1:       ISO_8859-1:1987",
498 "en_GB:                 ISO_8859-1:1987",
499 "en_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
500 "en_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
501 "en_KR.IBM-eucKR:       EUC-KR",
502 "en_KR:                 EUC-KR",
503 "en_TW.IBM-eucTW:       cns11643_1",
504 "en_TW:                 cns11643_1",
505 "en_US.ISO8859-1:       ISO_8859-1:1987",
506 "en_US:                 ISO_8859-1:1987",
507 "es_ES.ISO8859-1:       ISO_8859-1:1987",
508 "es_ES:                 ISO_8859-1:1987",
509 "fi_FI.ISO8859-1:       ISO_8859-1:1987",
510 "fi_FI:                 ISO_8859-1:1987",
511 "fr_BE.ISO8859-1:       ISO_8859-1:1987",
512 "fr_BE:                 ISO_8859-1:1987",
513 "fr_CA.ISO8859-1:       ISO_8859-1:1987",
514 "fr_CA:                 ISO_8859-1:1987",
515 "fr_CH.ISO8859-1:       ISO_8859-1:1987",
516 "fr_CH:                 ISO_8859-1:1987",
517 "fr_FR.ISO8859-1:       ISO_8859-1:1987",
518 "fr_FR:                 ISO_8859-1:1987",
519 "is_IS.ISO8859-1:       ISO_8859-1:1987",
520 "is_IS:                 ISO_8859-1:1987",
521 "it_IT.ISO8859-1:       ISO_8859-1:1987",
522 "it_IT:                 ISO_8859-1:1987",
523 "ja_JP.IBM-eucJP:       Extended_UNIX_Code_Packed_Format_for_Japanese",
524 "ja_JP:                 Extended_UNIX_Code_Packed_Format_for_Japanese",
525 "ko_KR.IBM-eucKR:       EUC-KR",
526 "ko_KR:                 EUC-KR",
527 "nl_BE.ISO8859-1:       ISO_8859-1:1987",
528 "nl_BE:                 ISO_8859-1:1987",
529 "nl_NL.ISO8859-1:       ISO_8859-1:1987",
530 "nl_NL:                 ISO_8859-1:1987",
531 "no_NO.ISO8859-1:       ISO_8859-1:1987",
532 "no_NO:                 ISO_8859-1:1987",
533 "pt_PT.ISO8859-1:       ISO_8859-1:1987",
534 "pt_PT:                 ISO_8859-1:1987",
535 "sv_SE.ISO8859-1:       ISO_8859-1:1987",
536 "sv_SE:                 ISO_8859-1:1987",
537 "zh_TW.IBM-eucTW:       cns11643_1",
538 "zh_TW:                 cns11643_1",
539 NULL
540 };
541 #else   // sunos by default
542 const char *CHARCONVTABLE[] =
543 {
544 "! This table maps the host's locale names to IANA charsets",
545 "!",
546 "C:             ISO_8859-1:1987",
547 "de:            ISO_8859-1:1987",
548 "en_US:         ISO_8859-1:1987",
549 "es:            ISO_8859-1:1987",
550 "fr:            ISO_8859-1:1987",
551 "iso_8859_1:    ISO_8859-1:1987",
552 "it:            ISO_8859-1:1987",
553 "ja:            Extended_UNIX_Code_Packed_Format_for_Japanese",
554 "ja_JP.EUC:     Extended_UNIX_Code_Packed_Format_for_Japanese",
555 "japanese:      Extended_UNIX_Code_Packed_Format_for_Japanese",
556 "ko:            EUC-KR",
557 "sv:            ISO_8859-1:1987",
558 "zh:            GB2312",
559 "zh_TW:         cns11643_1",
560 NULL
561 };
562 #endif
563 
564 #define BSZ     256
565 
566 char *
567 GetCharsetFromLocale(char *locale)
568 {
569     char *tmpcharset = NULL;
570     char buf[BSZ];
571     char *p;
572     const char *line;
573     int i=0;
574 
575     line = CHARCONVTABLE[i];
576     while (line != NULL)
577     {
578        if (*line == 0)
579        {
580           break;
581        }
582 
583        strcpy(buf, line);
584        line = CHARCONVTABLE[++i];
585 
586        if (strlen(buf) == 0 || buf[0] == '!')
587        {
588           continue;
589        }
590        p = strchr(buf, ':');
591        if (p == NULL)
592        {
593           tmpcharset = NULL;
594           break;
595        }
596        *p = 0;
597        if (strcmp(buf, locale) == 0) {
598           while (*++p == ' ' || *p == '\t')
599              ;
600           if (isalpha(*p)) {
601              tmpcharset = strdup(p);
602           } else
603              tmpcharset = NULL;
604 
605           break;
606        }
607     }
608     return tmpcharset;
609 }
610 
611 #endif /* Not defined XP_WIN32 */
612 
613 #ifdef XP_WIN32
614 char *_convertor(const char *instr, int bFromUTF8)
615 {
616     char  *outstr = NULL;
617     int    inlen, wclen, outlen;
618     LPWSTR wcstr;
619 
620     if (instr == NULL)
621             return NULL;
622 
623     if ((inlen = strlen(instr)) <= 0)
624             return NULL;
625 
626     /* output never becomes longer than input,
627      * thus we don't have to ask for the length
628      */
629     wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
630     if (!wcstr)
631         return NULL;
632 
633     wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
634                                  inlen, wcstr, inlen);
635     outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
636                                   wclen, NULL, 0, NULL, NULL);
637 
638     if (outlen > 0) {
639         outstr = (char *) malloc(outlen + 2);
640         outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
641                                       wclen, outstr, outlen, NULL, NULL);
642         if (outlen > 0)
643             *(outstr+outlen) = _T('\0');
644         else
645             return NULL;
646     }
647     free( wcstr );
648     return outstr;
649 }
650 #endif
651 
652 char *
653 ldaptool_local2UTF8( const char *src )
654 {
655     char *utf8;
656 #ifndef XP_WIN32
657     char *locale, *newcharset;
658     size_t outLen, resultLen;
659     UErrorCode err = U_ZERO_ERROR;
660     UConverter *cnv;
661 
662     if (src == NULL)
663     {
664       return NULL;
665     }
666     else if (*src == 0 || (ldaptool_charset == NULL)
667 	     || (!strcmp( ldaptool_charset, "" )))
668     {
669 	/* no option specified, so assume it's already in utf-8 */
670         utf8 = strdup(src);
671         return utf8;
672     }
673 
674     if( !strcmp( ldaptool_charset, "0" )
675 	    && (!charsetset) )
676     {
677 	/* zero option specified, so try to get default codepage
678 	   this sucker is strdup'd immediately so it's OK to cast */
679 	newcharset = (char *)ucnv_getDefaultName();
680 	if (newcharset != NULL) {
681 	    free( ldaptool_charset );
682 	    /* the default codepage lives in ICU */
683 	    ldaptool_charset = strdup(newcharset);
684 	    if (ldaptool_charset == NULL) {
685 		return strdup(src);
686 	    }
687 	}
688 	charsetset = 1;
689     }
690     else
691     if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
692     {
693 	/* -i option specified with charset name */
694         charsetset = 1;
695     }
696 
697     /* do the preflight - get the size needed for the target buffer */
698     outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
699                                       strlen( src ) * sizeof(char), &err);
700 
701     if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
702       /* default to just a copy of the string - this covers
703          the case of an illegal charset also */
704       return strdup(src);
705     }
706 
707     utf8 =  (char *) malloc( outLen + 1);
708     if( utf8 == NULL ) {
709       /* if we're already out of memory, does strdup just return NULL? */
710        return strdup(src);
711     }
712 
713     /* do the actual conversion this time */
714     err = U_ZERO_ERROR;
715     resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
716 		       strlen(src) * sizeof(char), &err );
717 
718     if (!U_SUCCESS(err)) {
719       free(utf8);
720       return strdup(src);
721     }
722 
723 #else
724     utf8 = _convertor(src, FALSE);
725     if( utf8 == NULL )
726         utf8 = strdup(src);
727 #endif
728 
729     return utf8;
730 }
731 #endif /* HAVE_LIBICU */
732 
733 #ifndef HAVE_LIBICU
734 #ifdef __cplusplus
735 }
736 #endif
737 #endif
738