1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 22 * Use is subject to license terms. 23 */ 24 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 #include <errno.h> 28 #include <locale.h> 29 #include <langinfo.h> 30 #include <iconv.h> 31 #include <ctype.h> 32 #include <strings.h> 33 #include <string.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include "includes.h" 37 #include "xmalloc.h" 38 #include "xlist.h" 39 40 #ifdef MIN 41 #undef MIN 42 #endif /* MIN */ 43 44 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 45 46 #define LOCALE_PATH "/usr/bin/locale" 47 48 #define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */ 49 50 static u_char * do_iconv(iconv_t cd, u_int *mul_ptr, 51 const void *buf, u_int len, 52 u_int *outlen, int *err, 53 u_char **err_str); 54 55 static int locale_cmp(const void *d1, const void *d2); 56 static char *g11n_locale2langtag(char *locale); 57 58 u_int 59 g11n_validate_ascii(const char *str, u_int len, u_char **error_str); 60 61 u_int 62 g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str); 63 64 static 65 char * 66 g11n_locale2langtag(char *locale) 67 { 68 char *langtag; 69 70 /* base cases */ 71 if (!locale || !*locale) return NULL; 72 73 if (strcmp(locale, "POSIX") == 0 || 74 strcmp(locale, "C") == 0) return "i-default"; 75 76 /* Punt for language codes which are not exactly 2 letters */ 77 if (strlen(locale) < 2 || 78 !isalpha(locale[0]) || 79 !isalpha(locale[1]) || 80 (locale[2] != '\0' && 81 locale[2] != '_' && 82 locale[2] != '.' && 83 locale[2] != '@')) 84 return NULL; 85 86 87 /* We have a primary language sub-tag */ 88 langtag = (char *) xmalloc(LANGTAG_MAX + 1); 89 90 strncpy(langtag, locale, 2); 91 langtag[2] = '\0'; 92 93 /* Do we have country sub-tag? */ 94 if (locale[2] == '_') { 95 if (strlen(locale) < 5 || 96 !isalpha(locale[3]) || 97 !isalpha(locale[4]) || 98 (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) { 99 return langtag; 100 } 101 102 /* yes, we do */ 103 /* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag, 104 country_subtag, langtag) == 8) */ 105 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 106 2, locale+3) == 5) 107 return langtag; 108 } 109 110 /* In all other cases we just use the primary language sub-tag */ 111 return langtag; 112 } 113 114 u_int 115 g11n_langtag_is_default(char *langtag) 116 { 117 return (strcmp(langtag, "i-default") == 0); 118 } 119 120 /* 121 * This lang tag / locale matching function works only for two-character 122 * language primary sub-tags and two-character country sub-tags. 123 */ 124 u_int 125 g11n_langtag_matches_locale(char *langtag, char *locale) 126 { 127 /* Match "i-default" to the process' current locale if possible */ 128 if (g11n_langtag_is_default(langtag)) { 129 if (strcasecmp(locale, "POSIX") == 0 || 130 strcasecmp(locale, "C") == 0) 131 return 1; 132 else 133 return 0; 134 } 135 136 /* locale must be at least 2 chars long and the lang part must be 137 * exactly two characters */ 138 if (strlen(locale) < 2 || 139 (!isalpha(locale[0]) || !isalpha(locale[1]) || 140 (locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@'))) 141 return 0; 142 143 /* same thing with the langtag */ 144 if (strlen(langtag) < 2 || 145 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 146 (langtag[2] != '\0' && langtag[2] != '-'))) 147 return 0; 148 149 /* primary language sub-tag and the locale's language part must match */ 150 if (strncasecmp(langtag, locale, 2) != 0) 151 return 0; 152 153 /* primary language sub-tag and the locale's language match, now 154 * fuzzy check country part */ 155 156 /* neither langtag nor locale have more than one component */ 157 if (langtag[2] == '\0' && 158 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 159 return 2; 160 161 /* langtag has only one sub-tag... */ 162 if (langtag[2] == '\0') 163 return 1; 164 165 /* locale has no country code... */ 166 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 167 return 1; 168 169 /* langtag has more than one subtag and the locale has a country code */ 170 171 /* ignore second subtag if not two chars */ 172 if (strlen(langtag) < 5) 173 return 1; 174 175 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 176 (langtag[5] != '\0' && langtag[5] != '-')) 177 return 1; 178 179 /* ignore rest of locale if there is no two-character country part */ 180 if (strlen(locale) < 5) 181 return 1; 182 183 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 184 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 185 return 1; 186 187 /* if the country part matches, return 2 */ 188 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 189 return 2; 190 191 return 1; 192 } 193 194 char * 195 g11n_getlocale() 196 { 197 /* We have one text domain - always set it */ 198 (void) textdomain(TEXT_DOMAIN); 199 200 /* If the locale is not set, set it from the env vars */ 201 if (!setlocale(LC_MESSAGES, NULL)) 202 (void) setlocale(LC_MESSAGES, ""); 203 204 return setlocale(LC_MESSAGES, NULL); 205 } 206 207 void 208 g11n_setlocale(int category, const char *locale) 209 { 210 char *curr; 211 212 /* We have one text domain - always set it */ 213 (void) textdomain(TEXT_DOMAIN); 214 215 if (!locale) 216 return; 217 218 if (*locale && ((curr = setlocale(category, NULL))) && 219 strcmp(curr, locale) == 0) 220 return; 221 222 /* 223 * If <category> is bogus, setlocale() will do nothing. 224 */ 225 (void) setlocale(category, locale); 226 227 return; 228 } 229 230 char ** 231 g11n_getlocales() 232 { 233 FILE *locale_out; 234 u_int n_elems, list_size, long_line = 0; 235 char **list; 236 char locale[64]; /* 64 bytes is plenty for locale names */ 237 238 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) { 239 return NULL; 240 } 241 242 /* 243 * Start with enough room for 65 locales - that's a lot fewer than 244 * all the locales available for installation, but a lot more than 245 * what most users will need and install 246 */ 247 n_elems=0; 248 list_size=192; 249 list = (char **) xmalloc(sizeof(char *) * (list_size + 1)); 250 memset(list, 0, sizeof(char *) * (list_size + 1)); 251 252 while (fgets(locale, sizeof(locale), locale_out)) { 253 /* skip long locale names (if any) */ 254 if (!strchr(locale, '\n')) { 255 long_line = 1; 256 continue; 257 } 258 else if (long_line) { 259 long_line = 0; 260 continue; 261 } 262 if (strncmp(locale, "iso_8859", 8) == 0) 263 continue; /* ignore locale names like "iso_8859-1" */ 264 265 if (n_elems == list_size) { 266 list_size *= 2; 267 list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *)); 268 memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1)); 269 } 270 271 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 272 273 list[n_elems++] = xstrdup(locale); 274 } 275 list[n_elems] = NULL; 276 (void) pclose(locale_out); 277 278 qsort(list, n_elems - 1, sizeof(char *), locale_cmp); 279 return list; 280 } 281 282 char * 283 g11n_getlangs() 284 { 285 char *locale; 286 287 if (getenv("SSH_LANGS")) 288 return xstrdup(getenv("SSH_LANGS")); 289 290 locale = g11n_getlocale(); 291 292 if (!locale || !*locale) 293 return xstrdup("i-default"); 294 295 return g11n_locale2langtag(locale); 296 } 297 298 char * 299 g11n_locales2langs(char **locale_set) 300 { 301 char **p, **r, **q; 302 char *langtag; 303 int locales, skip; 304 305 for (locales = 0, p = locale_set ; p && *p ; p++) 306 locales++; 307 308 r = (char **) xmalloc((locales + 1) * sizeof(char *)); 309 memset(r, 0, (locales + 1) * sizeof(char *)); 310 311 for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) { 312 skip = 0; 313 if ((langtag = g11n_locale2langtag(*p)) == NULL) 314 continue; 315 for (q = r ; (q - r) < locales ; q++) { 316 if (!*q) break; 317 if (*q && strcmp(*q, langtag) == 0) 318 skip = 1; 319 } 320 if (!skip) 321 *(q++) = langtag; 322 *q = NULL; 323 } 324 return xjoin(r, ','); 325 } 326 327 static 328 int 329 sortcmp(const void *d1, const void *d2) 330 { 331 char *s1 = *(char **)d1; 332 char *s2 = *(char **)d2; 333 334 return strcmp(s1, s2); 335 } 336 337 int 338 g11n_langtag_match(char *langtag1, char *langtag2) 339 { 340 int len1, len2; 341 char c1, c2; 342 343 len1 = (strchr(langtag1, '-')) ? 344 (strchr(langtag1, '-') - langtag1) 345 : strlen(langtag1); 346 347 len2 = (strchr(langtag2, '-')) ? 348 (strchr(langtag2, '-') - langtag2) 349 : strlen(langtag2); 350 351 /* no match */ 352 if (len1 != len2 || 353 strncmp(langtag1, langtag2, len1) != 0) 354 return 0; 355 356 c1 = *(langtag1 + len1); 357 c2 = *(langtag2 + len2); 358 359 /* no country sub-tags - exact match */ 360 if (c1 == '\0' && c2 == '\0') 361 return 2; 362 363 /* one langtag has a country sub-tag, the other doesn't */ 364 if (c1 == '\0' || c2 == '\0') 365 return 1; 366 367 /* can't happen - both langtags have a country sub-tag */ 368 if (c1 != '-' || c2 != '-') 369 return 1; 370 371 /* compare country subtags */ 372 langtag1 = langtag1 + len1 + 1; 373 langtag2 = langtag2 + len2 + 1; 374 375 len1 = (strchr(langtag1, '-')) ? 376 (strchr(langtag1, '-') - langtag1) 377 : strlen(langtag1); 378 379 len2 = (strchr(langtag2, '-')) ? 380 (strchr(langtag2, '-') - langtag2) 381 : strlen(langtag2); 382 383 if (len1 != len2 || 384 strncmp(langtag1, langtag2, len1) != 0) 385 return 1; 386 387 /* country tags matched - exact match */ 388 return 2; 389 } 390 391 char * 392 g11n_langtag_set_intersect(char *set1, char *set2) 393 { 394 char **list1, **list2, **list3, **p, **q, **r; 395 char *set3, *lang_subtag; 396 u_int n1, n2, n3; 397 u_int do_append; 398 399 list1 = xsplit(set1, ','); 400 list2 = xsplit(set2, ','); 401 for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ; 402 for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ; 403 404 list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1)); 405 *list3 = NULL; 406 407 /* we must not sort the user langtags - sorting or not the server's 408 * should not affect the outcome 409 */ 410 qsort(list2, n2, sizeof(char *), sortcmp); 411 412 for (n3 = 0, p = list1 ; p && *p ; p++) { 413 do_append = 0; 414 for (q = list2 ; q && *q ; q++) { 415 if (g11n_langtag_match(*p, *q) != 2) continue; 416 /* append element */ 417 for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 418 do_append = 1; 419 if (!*r) break; 420 if (strcmp(*p, *r) == 0) { 421 do_append = 0; 422 break; 423 } 424 } 425 if (do_append && n3 <= (n1 + n2)) { 426 list3[n3++] = xstrdup(*p); 427 list3[n3] = NULL; 428 } 429 } 430 } 431 432 for (p = list1 ; p && *p ; p++) { 433 do_append = 0; 434 for (q = list2 ; q && *q ; q++) { 435 if (g11n_langtag_match(*p, *q) != 1) continue; 436 /* append element */ 437 lang_subtag = xstrdup(*p); 438 if (strchr(lang_subtag, '-')) 439 *(strchr(lang_subtag, '-')) = '\0'; 440 for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 441 do_append = 1; 442 if (!*r) break; 443 if (strcmp(lang_subtag, *r) == 0) { 444 do_append = 0; 445 break; 446 } 447 } 448 if (do_append && n3 <= (n1 + n2)) { 449 list3[n3++] = lang_subtag; 450 list3[n3] = NULL; 451 } 452 else 453 xfree(lang_subtag); 454 } 455 } 456 457 set3 = xjoin(list3, ','); 458 xfree_split_list(list1); 459 xfree_split_list(list2); 460 xfree_split_list(list3); 461 462 return set3; 463 } 464 465 char * 466 g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 467 { 468 char *list, *result; 469 char **xlist; 470 471 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 472 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 473 474 if (!list) 475 return NULL; 476 477 xlist = xsplit(list, ','); 478 479 xfree(list); 480 481 if (!xlist || !*xlist) 482 return NULL; 483 484 result = xstrdup(*xlist); 485 486 xfree_split_list(xlist); 487 488 return result; 489 } 490 491 /* 492 * Compare locales, preferring UTF-8 codesets to others, otherwise doing 493 * a stright strcmp() 494 */ 495 static 496 int 497 locale_cmp(const void *d1, const void *d2) 498 { 499 char *dot_ptr; 500 char *s1 = *(char **)d1; 501 char *s2 = *(char **)d2; 502 int s1_is_utf8 = 0; 503 int s2_is_utf8 = 0; 504 505 /* check if s1 is a UTF-8 locale */ 506 if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') && 507 (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 508 (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 509 s1_is_utf8++; 510 } 511 /* check if s2 is a UTF-8 locale */ 512 if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') && 513 (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 514 (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 515 s2_is_utf8++; 516 } 517 518 /* prefer UTF-8 locales */ 519 if (s1_is_utf8 && !s2_is_utf8) 520 return -1; 521 522 if (s2_is_utf8 && !s1_is_utf8) 523 return 1; 524 525 /* prefer any locale over the default locales */ 526 if (strcmp(s1, "C") == 0 || 527 strcmp(s1, "POSIX") == 0 || 528 strcmp(s1, "common") == 0) 529 if (strcmp(s2, "C") != 0 && 530 strcmp(s2, "POSIX") != 0 && 531 strcmp(s2, "common") != 0) 532 return 1; 533 534 if (strcmp(s2, "C") == 0 || 535 strcmp(s2, "POSIX") == 0 || 536 strcmp(s2, "common") == 0) 537 if (strcmp(s1, "C") != 0 && 538 strcmp(s1, "POSIX") != 0 && 539 strcmp(s1, "common") != 0) 540 return -1; 541 542 return strcmp(s1, s2); 543 } 544 545 546 char ** 547 g11n_langtag_set_locale_set_intersect(char *langtag_set, 548 char **locale_set) 549 { 550 char **langtag_list, **result, **p, **q, **r; 551 char *s; 552 u_int do_append, n_langtags, n_locales, n_results, max_results; 553 554 /* Count lang tags and locales */ 555 for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++; 556 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 557 for ( ; s = strchr(s, ',') ; s++, n_langtags++) ; 558 /* 559 while ((s = strchr(s, ','))) { 560 n_langtags++; 561 s++; 562 } 563 */ 564 565 qsort(locale_set, n_locales, sizeof(char *), locale_cmp); 566 567 langtag_list = xsplit(langtag_set, ','); 568 for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++); 569 570 max_results = MIN(n_locales, n_langtags) * 2; 571 result = (char **) xmalloc(sizeof(char *) * (max_results + 1)); 572 *result = NULL; 573 n_results = 0; 574 575 /* More specific matches first */ 576 for (p = langtag_list ; p && *p ; p++) { 577 do_append = 0; 578 for (q = locale_set ; q && *q ; q++) { 579 if (g11n_langtag_matches_locale(*p, *q) == 2) { 580 do_append = 1; 581 for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 582 if (!*r) break; 583 if (strcmp(*q, *r) == 0) { 584 do_append = 0; 585 break; 586 } 587 } 588 if (do_append && n_results < max_results) { 589 result[n_results++] = xstrdup(*q); 590 result[n_results] = NULL; 591 } 592 break; 593 } 594 } 595 } 596 597 for (p = langtag_list ; p && *p ; p++) { 598 do_append = 0; 599 for (q = locale_set ; q && *q ; q++) { 600 if (g11n_langtag_matches_locale(*p, *q) == 1) { 601 do_append = 1; 602 for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 603 if (!*r) break; 604 if (strcmp(*q, *r) == 0) { 605 do_append = 0; 606 break; 607 } 608 } 609 if (do_append && n_results < max_results) { 610 result[n_results++] = xstrdup(*q); 611 result[n_results] = NULL; 612 } 613 break; 614 } 615 } 616 } 617 xfree_split_list(langtag_list); 618 619 return result; 620 } 621 622 char * 623 g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 624 { 625 char **results, *result = NULL; 626 627 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 628 srvr_locales ? srvr_locales : g11n_getlocales())) == NULL) 629 return NULL; 630 631 if (*results != NULL) 632 result = xstrdup(*results); 633 634 xfree_split_list(results); 635 636 return result; 637 } 638 639 640 /* 641 * Functions for validating ASCII and UTF-8 strings 642 * 643 * The error_str parameter is an optional pointer to a char variable 644 * where to store a string suitable for use with error() or fatal() or 645 * friends. 646 * 647 * The return value is 0 if success, EILSEQ or EINVAL. 648 * 649 */ 650 651 u_int 652 g11n_validate_ascii(const char *str, u_int len, u_char **error_str) 653 { 654 u_char *p; 655 656 for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ; 657 658 if (len && ((p - (u_char *) str) != len)) { 659 return EILSEQ; 660 } 661 return 0; 662 } 663 664 u_int 665 g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str) 666 { 667 u_char *p; 668 u_int c, l; 669 670 if (len == 0) len = strlen((const char *)str); 671 672 for (p = (u_char *) str ; p && (p - str < len) && *p ; ) { 673 /* 8-bit chars begin a UTF-8 sequence */ 674 if (*p & 0x80) { 675 /* Get sequence length and sanity check first byte */ 676 if (*p < 0xc0) 677 return EILSEQ; 678 else if (*p < 0xe0) 679 l=2; 680 else if (*p < 0xf0) 681 l=3; 682 else if (*p < 0xf8) 683 l=4; 684 else if (*p < 0xfc) 685 l=5; 686 else if (*p < 0xfe) 687 l=6; 688 else 689 return EILSEQ; 690 691 if ((p + l - str) >= len) 692 return EILSEQ; 693 694 /* overlong detection - build codepoint */ 695 c = *p & 0x3f; 696 c = c << (6 * (l-1)); /* shift c bits from first byte */ 697 698 if (l > 1) { 699 if (*(p+1) && ((*(p+1) & 0xc0) == 0x80)) 700 c = c | ((*(p+1) & 0x3f) << (6 * (l-2))); 701 else 702 return EILSEQ; 703 if (c < 0x80) 704 return EILSEQ; 705 } 706 if (l > 2) { 707 if (*(p+2) && ((*(p+2) & 0xc0) == 0x80)) 708 c = c | ((*(p+2) & 0x3f) << (6 * (l-3))); 709 else 710 return EILSEQ; 711 if (c < 0x800) 712 return EILSEQ; 713 } 714 if (l > 3) { 715 if (*(p+3) && ((*(p+3) & 0xc0) == 0x80)) 716 c = c | ((*(p+3) & 0x3f) << (6 * (l-4))); 717 else 718 return EILSEQ; 719 if (c < 0x10000) 720 return EILSEQ; 721 } 722 if (l > 4) { 723 if (*(p+4) && ((*(p+4) & 0xc0) == 0x80)) 724 c = c | ((*(p+4) & 0x3f) << (6 * (l-5))); 725 else 726 return EILSEQ; 727 if (c < 0x200000) 728 return EILSEQ; 729 } 730 if (l > 5) { 731 if (*(p+5) && ((*(p+5) & 0xc0) == 0x80)) 732 c = c | (*(p+5) & 0x3f) ; 733 else 734 return EILSEQ; 735 if (c < 0x4000000) 736 return EILSEQ; 737 } 738 739 /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */ 740 if (((c <= 0xdfff) && (c >= 0xd800)) || 741 (c == 0xfffe) || (c == 0xffff)) 742 return EILSEQ; 743 p += l; 744 } 745 /* 7-bit chars are fine */ 746 else 747 p++; 748 } 749 return 0; 750 } 751 752 /* 753 * Functions for converting to ASCII or UTF-8 from the local codeset 754 * Functions for converting from ASCII or UTF-8 to the local codeset 755 * 756 * The error_str parameter is an optional pointer to a char variable 757 * where to store a string suitable for use with error() or fatal() or 758 * friends. 759 * 760 * The err parameter is an optional pointer to an integer where 0 761 * (success) or EILSEQ or EINVAL will be stored (failure). 762 * 763 * These functions return NULL if the conversion fails. 764 * 765 */ 766 767 u_char * 768 g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str) 769 { 770 static u_int initialized = 0; 771 static u_int do_convert = 0; 772 iconv_t cd; 773 int err; 774 775 if (!initialized) { 776 /* 777 * iconv_open() fails if the to/from codesets are the 778 * same, and there are aliases of codesets to boot... 779 */ 780 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 781 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 782 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 783 initialized = 1; 784 do_convert = 0; 785 } 786 else { 787 cd = iconv_open(nl_langinfo(CODESET), "646"); 788 if (cd == (iconv_t) -1) { 789 if (err_ptr) *err_ptr = errno; 790 if (error_str) *error_str = (u_char *) 791 "Cannot convert ASCII strings to the local codeset"; 792 } 793 initialized = 1; 794 do_convert = 1; 795 } 796 } 797 798 if (!do_convert) { 799 if ((err = g11n_validate_ascii(str, 0, error_str))) { 800 if (err_ptr) *err_ptr = err; 801 return NULL; 802 } 803 else 804 return (u_char *) xstrdup(str); 805 } 806 return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 807 } 808 809 u_char * 810 g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str) 811 { 812 static u_int initialized = 0; 813 static u_int do_convert = 0; 814 iconv_t cd; 815 int err; 816 817 if (!initialized) { 818 /* 819 * iconv_open() fails if the to/from codesets are the 820 * same, and there are aliases of codesets to boot... 821 */ 822 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 823 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 824 initialized = 1; 825 do_convert = 0; 826 } 827 else { 828 cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 829 if (cd == (iconv_t) -1) { 830 if (err_ptr) *err_ptr = errno; 831 if (error_str) *error_str = (u_char *) 832 "Cannot convert UTF-8 strings to the local codeset"; 833 } 834 initialized = 1; 835 do_convert = 1; 836 } 837 } 838 839 if (!do_convert) { 840 if ((err = g11n_validate_utf8(str, 0, error_str))) { 841 if (err_ptr) *err_ptr = err; 842 return NULL; 843 } 844 else 845 return (u_char *) xstrdup((char *) str); 846 } 847 return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 848 } 849 850 char * 851 g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str) 852 { 853 static u_int initialized = 0; 854 static u_int do_convert = 0; 855 iconv_t cd; 856 857 if (!initialized) { 858 /* 859 * iconv_open() fails if the to/from codesets are the 860 * same, and there are aliases of codesets to boot... 861 */ 862 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 863 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 864 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 865 initialized = 1; 866 do_convert = 0; 867 } 868 else { 869 cd = iconv_open("646", nl_langinfo(CODESET)); 870 if (cd == (iconv_t) -1) { 871 if (err_ptr) *err_ptr = errno; 872 if (error_str) *error_str = (u_char *) 873 "Cannot convert UTF-8 strings to the local codeset"; 874 } 875 initialized = 1; 876 do_convert = 1; 877 } 878 } 879 880 if (!do_convert) 881 return xstrdup((char *) str); 882 return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 883 } 884 885 u_char * 886 g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str) 887 { 888 static u_int initialized = 0; 889 static u_int do_convert = 0; 890 iconv_t cd; 891 892 if (!initialized) { 893 /* 894 * iconv_open() fails if the to/from codesets are the 895 * same, and there are aliases of codesets to boot... 896 */ 897 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 898 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 899 initialized = 1; 900 do_convert = 0; 901 } 902 else { 903 cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 904 if (cd == (iconv_t) -1) { 905 if (err_ptr) *err_ptr = errno; 906 if (error_str) *error_str = (u_char *) 907 "Cannot convert UTF-8 strings to the local codeset"; 908 } 909 initialized = 1; 910 do_convert = 1; 911 } 912 } 913 914 if (!do_convert) 915 return (u_char *) xstrdup((char *) str); 916 return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 917 } 918 919 920 /* 921 * Wrapper around iconv() 922 * 923 * The caller is responsible for freeing the result and for handling 924 * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 925 */ 926 927 static 928 u_char * 929 do_iconv(iconv_t cd, u_int *mul_ptr, 930 const void *buf, u_int len, 931 u_int *outlen, int *err, 932 u_char **err_str) 933 { 934 size_t inbytesleft, outbytesleft, converted_size; 935 char *outbuf; 936 u_char *converted; 937 const char *inbuf; 938 u_int mul = 0; 939 940 if (!buf || !(*(char *)buf)) return NULL; 941 if (len == 0) len = strlen(buf); 942 /* reset conversion descriptor */ 943 /* XXX Do we need initial shift sequences for UTF-8??? */ 944 (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 945 inbuf = (const char *) buf; 946 if (mul_ptr) mul = *mul_ptr; 947 converted_size = (len << mul); 948 outbuf = (char *) xmalloc(converted_size + 1); /* for null */ 949 converted = (u_char *) outbuf; 950 outbytesleft = len; 951 do { 952 if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 953 (size_t) -1) { 954 if (errno == E2BIG) { 955 /* UTF-8 codepoints are at most 8 bytes long. */ 956 if (mul > 2) { 957 if (err_str) 958 *err_str = (u_char *) "Conversion to UTF-8 failed due to" 959 "preposterous space requirements"; 960 if (err) 961 *err = EILSEQ; 962 return NULL; 963 } 964 965 /* 966 * Re-alloc output and ensure that the outbuf 967 * and outbytesleft values are adjusted. 968 */ 969 converted = xrealloc(converted, converted_size << 1 + 1); 970 outbuf = (char *) converted + converted_size - outbytesleft; 971 converted_size = (len << ++(mul)); 972 outbytesleft = converted_size - outbytesleft; 973 } 974 else { 975 /* 976 * Let the caller deal with iconv() errors, probably by 977 * calling fatal(); xfree() does not set errno. 978 */ 979 if (err) *err = errno; 980 xfree(converted); 981 return NULL; 982 } 983 } 984 } while (inbytesleft); 985 *outbuf = '\0'; /* ensure null-termination */ 986 if (outlen) *outlen = converted_size - outbytesleft; 987 if (mul_ptr) *mul_ptr = mul; 988 return converted; 989 } 990