1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 22 * Use is subject to license terms. 23 */ 24 25 #include <errno.h> 26 #include <locale.h> 27 #include <langinfo.h> 28 #include <iconv.h> 29 #include <ctype.h> 30 #include <wctype.h> 31 #include <strings.h> 32 #include <string.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include "includes.h" 36 #include "xmalloc.h" 37 #include "xlist.h" 38 #include "compat.h" 39 #include "log.h" 40 41 #ifdef MIN 42 #undef MIN 43 #endif /* MIN */ 44 45 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 46 47 #define LOCALE_PATH "/usr/bin/locale" 48 49 /* two-char country code, '-' and two-char region code */ 50 #define LANGTAG_MAX 5 51 52 static int locale_cmp(const void *d1, const void *d2); 53 static char *g11n_locale2langtag(char *locale); 54 55 static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str); 56 57 /* 58 * native_codeset records the codeset of the default system locale. 59 * It is used to convert the contents of file (eg /etc/issue) which is 60 * supposed to be in the codeset of default system locale. 61 */ 62 static char *native_codeset; 63 64 /* 65 * Convert locale string name into a language tag. The caller is responsible for 66 * freeing the memory allocated for the result. 67 */ 68 static char * 69 g11n_locale2langtag(char *locale) 70 { 71 char *langtag; 72 73 /* base cases */ 74 if (!locale || !*locale) 75 return (NULL); 76 77 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 78 return (xstrdup("i-default")); 79 80 /* punt for language codes which are not exactly 2 letters */ 81 if (strlen(locale) < 2 || 82 !isalpha(locale[0]) || 83 !isalpha(locale[1]) || 84 (locale[2] != '\0' && 85 locale[2] != '_' && 86 locale[2] != '.' && 87 locale[2] != '@')) 88 return (NULL); 89 90 91 /* we have a primary language sub-tag */ 92 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 93 94 strncpy(langtag, locale, 2); 95 langtag[2] = '\0'; 96 97 /* do we have country sub-tag? For example: cs_CZ */ 98 if (locale[2] == '_') { 99 if (strlen(locale) < 5 || 100 !isalpha(locale[3]) || 101 !isalpha(locale[4]) || 102 (locale[5] != '\0' && (locale[5] != '.' && 103 locale[5] != '@'))) { 104 return (langtag); 105 } 106 107 /* example: create cs-CZ from cs_CZ */ 108 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 109 locale + 3) == 5) 110 return (langtag); 111 } 112 113 /* in all other cases we just use the primary language sub-tag */ 114 return (langtag); 115 } 116 117 uint_t 118 g11n_langtag_is_default(char *langtag) 119 { 120 return (strcmp(langtag, "i-default") == 0); 121 } 122 123 /* 124 * This lang tag / locale matching function works only for two-character 125 * language primary sub-tags and two-character country sub-tags. 126 */ 127 uint_t 128 g11n_langtag_matches_locale(char *langtag, char *locale) 129 { 130 /* match "i-default" to the process' current locale if possible */ 131 if (g11n_langtag_is_default(langtag)) { 132 if (strcasecmp(locale, "POSIX") == 0 || 133 strcasecmp(locale, "C") == 0) 134 return (1); 135 else 136 return (0); 137 } 138 139 /* 140 * locale must be at least 2 chars long and the lang part must be 141 * exactly two characters 142 */ 143 if (strlen(locale) < 2 || 144 (!isalpha(locale[0]) || !isalpha(locale[1]) || 145 (locale[2] != '\0' && locale[2] != '_' && 146 locale[2] != '.' && locale[2] != '@'))) 147 return (0); 148 149 /* same thing with the langtag */ 150 if (strlen(langtag) < 2 || 151 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 152 (langtag[2] != '\0' && langtag[2] != '-'))) 153 return (0); 154 155 /* primary language sub-tag and the locale's language part must match */ 156 if (strncasecmp(langtag, locale, 2) != 0) 157 return (0); 158 159 /* 160 * primary language sub-tag and the locale's language match, now 161 * fuzzy check country part 162 */ 163 164 /* neither langtag nor locale have more than one component */ 165 if (langtag[2] == '\0' && 166 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 167 return (2); 168 169 /* langtag has only one sub-tag... */ 170 if (langtag[2] == '\0') 171 return (1); 172 173 /* locale has no country code... */ 174 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 175 return (1); 176 177 /* langtag has more than one subtag and the locale has a country code */ 178 179 /* ignore second subtag if not two chars */ 180 if (strlen(langtag) < 5) 181 return (1); 182 183 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 184 (langtag[5] != '\0' && langtag[5] != '-')) 185 return (1); 186 187 /* ignore rest of locale if there is no two-character country part */ 188 if (strlen(locale) < 5) 189 return (1); 190 191 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 192 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 193 return (1); 194 195 /* if the country part matches, return 2 */ 196 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 197 return (2); 198 199 return (1); 200 } 201 202 char * 203 g11n_getlocale() 204 { 205 /* we have one text domain - always set it */ 206 (void) textdomain(TEXT_DOMAIN); 207 208 /* if the locale is not set, set it from the env vars */ 209 if (!setlocale(LC_MESSAGES, NULL)) 210 (void) setlocale(LC_MESSAGES, ""); 211 212 return (setlocale(LC_MESSAGES, NULL)); 213 } 214 215 void 216 g11n_setlocale(int category, const char *locale) 217 { 218 char *curr; 219 220 if (native_codeset == NULL) { 221 /* set default locale, and record current codeset */ 222 (void) setlocale(LC_ALL, ""); 223 curr = nl_langinfo(CODESET); 224 native_codeset = xstrdup(curr); 225 } 226 227 /* we have one text domain - always set it */ 228 (void) textdomain(TEXT_DOMAIN); 229 230 if (!locale) 231 return; 232 233 if (*locale && ((curr = setlocale(category, NULL))) && 234 strcmp(curr, locale) == 0) 235 return; 236 237 /* if <category> is bogus, setlocale() will do nothing */ 238 (void) setlocale(category, locale); 239 } 240 241 char ** 242 g11n_getlocales() 243 { 244 FILE *locale_out; 245 uint_t n_elems, list_size, long_line = 0; 246 char **list; 247 char locale[64]; /* 64 bytes is plenty for locale names */ 248 249 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 250 return (NULL); 251 252 /* 253 * start with enough room for 65 locales - that's a lot fewer than 254 * all the locales available for installation, but a lot more than 255 * what most users will need and install 256 */ 257 n_elems = 0; 258 list_size = 192; 259 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 260 memset(list, 0, sizeof (char *) * (list_size + 1)); 261 262 while (fgets(locale, sizeof (locale), locale_out)) { 263 /* skip long locale names (if any) */ 264 if (!strchr(locale, '\n')) { 265 long_line = 1; 266 continue; 267 } else if (long_line) { 268 long_line = 0; 269 continue; 270 } 271 272 if (strncmp(locale, "iso_8859", 8) == 0) 273 /* ignore locale names like "iso_8859-1" */ 274 continue; 275 276 if (n_elems == list_size) { 277 list_size *= 2; 278 list = (char **)xrealloc((void *) list, 279 (list_size + 1) * sizeof (char *)); 280 memset(&list[n_elems + 1], 0, 281 sizeof (char *) * (list_size - n_elems + 1)); 282 } 283 284 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 285 list[n_elems++] = xstrdup(locale); 286 } 287 288 (void) pclose(locale_out); 289 290 if (n_elems == 0) { 291 xfree(list); 292 return (NULL); 293 } 294 295 list[n_elems] = NULL; 296 297 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 298 return (list); 299 } 300 301 char * 302 g11n_getlangs() 303 { 304 char *locale; 305 306 if (getenv("SSH_LANGS")) 307 return (xstrdup(getenv("SSH_LANGS"))); 308 309 locale = g11n_getlocale(); 310 311 if (!locale || !*locale) 312 return (xstrdup("i-default")); 313 314 return (g11n_locale2langtag(locale)); 315 } 316 317 char * 318 g11n_locales2langs(char **locale_set) 319 { 320 char **p, **r, **q; 321 char *langtag, *langs; 322 int locales, skip; 323 324 for (locales = 0, p = locale_set; p && *p; p++) 325 locales++; 326 327 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 328 memset(r, 0, (locales + 1) * sizeof (char *)); 329 330 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 331 skip = 0; 332 if ((langtag = g11n_locale2langtag(*p)) == NULL) 333 continue; 334 for (q = r; (q - r) < locales; q++) { 335 if (!*q) 336 break; 337 if (*q && strcmp(*q, langtag) == 0) 338 skip = 1; 339 } 340 if (!skip) 341 *(q++) = langtag; 342 else 343 xfree(langtag); 344 *q = NULL; 345 } 346 347 langs = xjoin(r, ','); 348 g11n_freelist(r); 349 350 return (langs); 351 } 352 353 static int 354 sortcmp(const void *d1, const void *d2) 355 { 356 char *s1 = *(char **)d1; 357 char *s2 = *(char **)d2; 358 359 return (strcmp(s1, s2)); 360 } 361 362 int 363 g11n_langtag_match(char *langtag1, char *langtag2) 364 { 365 int len1, len2; 366 char c1, c2; 367 368 len1 = (strchr(langtag1, '-')) ? 369 (strchr(langtag1, '-') - langtag1) 370 : strlen(langtag1); 371 372 len2 = (strchr(langtag2, '-')) ? 373 (strchr(langtag2, '-') - langtag2) 374 : strlen(langtag2); 375 376 /* no match */ 377 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 378 return (0); 379 380 c1 = *(langtag1 + len1); 381 c2 = *(langtag2 + len2); 382 383 /* no country sub-tags - exact match */ 384 if (c1 == '\0' && c2 == '\0') 385 return (2); 386 387 /* one langtag has a country sub-tag, the other doesn't */ 388 if (c1 == '\0' || c2 == '\0') 389 return (1); 390 391 /* can't happen - both langtags have a country sub-tag */ 392 if (c1 != '-' || c2 != '-') 393 return (1); 394 395 /* compare country subtags */ 396 langtag1 = langtag1 + len1 + 1; 397 langtag2 = langtag2 + len2 + 1; 398 399 len1 = (strchr(langtag1, '-')) ? 400 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 401 402 len2 = (strchr(langtag2, '-')) ? 403 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 404 405 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 406 return (1); 407 408 /* country tags matched - exact match */ 409 return (2); 410 } 411 412 char * 413 g11n_langtag_set_intersect(char *set1, char *set2) 414 { 415 char **list1, **list2, **list3, **p, **q, **r; 416 char *set3, *lang_subtag; 417 uint_t n1, n2, n3; 418 uint_t do_append; 419 420 list1 = xsplit(set1, ','); 421 list2 = xsplit(set2, ','); 422 423 for (n1 = 0, p = list1; p && *p; p++, n1++) 424 ; 425 for (n2 = 0, p = list2; p && *p; p++, n2++) 426 ; 427 428 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 429 *list3 = NULL; 430 431 /* 432 * we must not sort the user langtags - sorting or not the server's 433 * should not affect the outcome 434 */ 435 qsort(list2, n2, sizeof (char *), sortcmp); 436 437 for (n3 = 0, p = list1; p && *p; p++) { 438 do_append = 0; 439 for (q = list2; q && *q; q++) { 440 if (g11n_langtag_match(*p, *q) != 2) continue; 441 /* append element */ 442 for (r = list3; (r - list3) <= (n1 + n2); r++) { 443 do_append = 1; 444 if (!*r) 445 break; 446 if (strcmp(*p, *r) == 0) { 447 do_append = 0; 448 break; 449 } 450 } 451 if (do_append && n3 <= (n1 + n2)) { 452 list3[n3++] = xstrdup(*p); 453 list3[n3] = NULL; 454 } 455 } 456 } 457 458 for (p = list1; p && *p; p++) { 459 do_append = 0; 460 for (q = list2; q && *q; q++) { 461 if (g11n_langtag_match(*p, *q) != 1) 462 continue; 463 464 /* append element */ 465 lang_subtag = xstrdup(*p); 466 if (strchr(lang_subtag, '-')) 467 *(strchr(lang_subtag, '-')) = '\0'; 468 for (r = list3; (r - list3) <= (n1 + n2); r++) { 469 do_append = 1; 470 if (!*r) 471 break; 472 if (strcmp(lang_subtag, *r) == 0) { 473 do_append = 0; 474 break; 475 } 476 } 477 if (do_append && n3 <= (n1 + n2)) { 478 list3[n3++] = lang_subtag; 479 list3[n3] = NULL; 480 } else 481 xfree(lang_subtag); 482 } 483 } 484 485 set3 = xjoin(list3, ','); 486 xfree_split_list(list1); 487 xfree_split_list(list2); 488 xfree_split_list(list3); 489 490 return (set3); 491 } 492 493 char * 494 g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 495 { 496 char *list, *result; 497 char **xlist; 498 499 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 500 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 501 502 if (!list) 503 return (NULL); 504 505 xlist = xsplit(list, ','); 506 507 xfree(list); 508 509 if (!xlist || !*xlist) 510 return (NULL); 511 512 result = xstrdup(*xlist); 513 xfree_split_list(xlist); 514 515 return (result); 516 } 517 518 /* 519 * Compare locales, preferring UTF-8 codesets to others, otherwise doing 520 * a stright strcmp() 521 */ 522 static int 523 locale_cmp(const void *d1, const void *d2) 524 { 525 char *dot_ptr; 526 char *s1 = *(char **)d1; 527 char *s2 = *(char **)d2; 528 int s1_is_utf8 = 0; 529 int s2_is_utf8 = 0; 530 531 /* check if s1 is a UTF-8 locale */ 532 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 533 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 534 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 535 s1_is_utf8++; 536 } 537 538 /* check if s2 is a UTF-8 locale */ 539 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 540 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 541 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 542 s2_is_utf8++; 543 } 544 545 /* prefer UTF-8 locales */ 546 if (s1_is_utf8 && !s2_is_utf8) 547 return (-1); 548 549 if (s2_is_utf8 && !s1_is_utf8) 550 return (1); 551 552 /* prefer any locale over the default locales */ 553 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 554 strcmp(s1, "common") == 0) { 555 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 556 strcmp(s2, "common") != 0) 557 return (1); 558 } 559 560 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 561 strcmp(s2, "common") == 0) { 562 if (strcmp(s1, "C") != 0 && 563 strcmp(s1, "POSIX") != 0 && 564 strcmp(s1, "common") != 0) 565 return (-1); 566 } 567 568 return (strcmp(s1, s2)); 569 } 570 571 572 char ** 573 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 574 { 575 char **langtag_list, **result, **p, **q, **r; 576 char *s; 577 uint_t do_append, n_langtags, n_locales, n_results, max_results; 578 579 /* count lang tags and locales */ 580 for (n_locales = 0, p = locale_set; p && *p; p++) 581 n_locales++; 582 583 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 584 /* count the number of langtags */ 585 for (; s = strchr(s, ','); s++, n_langtags++) 586 ; 587 588 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 589 590 langtag_list = xsplit(langtag_set, ','); 591 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 592 ; 593 594 max_results = MIN(n_locales, n_langtags) * 2; 595 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 596 *result = NULL; 597 n_results = 0; 598 599 /* more specific matches first */ 600 for (p = langtag_list; p && *p; p++) { 601 do_append = 0; 602 for (q = locale_set; q && *q; q++) { 603 if (g11n_langtag_matches_locale(*p, *q) == 2) { 604 do_append = 1; 605 for (r = result; (r - result) <= 606 MIN(n_locales, n_langtags); r++) { 607 if (!*r) 608 break; 609 if (strcmp(*q, *r) == 0) { 610 do_append = 0; 611 break; 612 } 613 } 614 if (do_append && n_results < max_results) { 615 result[n_results++] = xstrdup(*q); 616 result[n_results] = NULL; 617 } 618 break; 619 } 620 } 621 } 622 623 for (p = langtag_list; p && *p; p++) { 624 do_append = 0; 625 for (q = locale_set; q && *q; q++) { 626 if (g11n_langtag_matches_locale(*p, *q) == 1) { 627 do_append = 1; 628 for (r = result; (r - result) <= 629 MIN(n_locales, n_langtags); r++) { 630 if (!*r) 631 break; 632 if (strcmp(*q, *r) == 0) { 633 do_append = 0; 634 break; 635 } 636 } 637 if (do_append && n_results < max_results) { 638 result[n_results++] = xstrdup(*q); 639 result[n_results] = NULL; 640 } 641 break; 642 } 643 } 644 } 645 646 xfree_split_list(langtag_list); 647 648 return (result); 649 } 650 651 char * 652 g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 653 { 654 char **results, **locales, *result = NULL; 655 656 if (srvr_locales == NULL) 657 locales = g11n_getlocales(); 658 else 659 locales = srvr_locales; 660 661 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 662 locales)) == NULL) 663 goto err; 664 665 if (*results != NULL) 666 result = xstrdup(*results); 667 668 xfree_split_list(results); 669 670 err: 671 if (locales != srvr_locales) 672 g11n_freelist(locales); 673 return (result); 674 } 675 676 /* 677 * Functions for converting to UTF-8 from the local codeset and 678 * converting from UTF-8 to the local codeset. 679 * 680 * The error_str parameter is an pointer to a char variable where to 681 * store a string suitable for use with error() or fatal() or friends. 682 * It is also used for an error indicator when NULL is returned. 683 * 684 * If conversion isn't necessary, *error_str is set to NULL, and 685 * NULL is returned. 686 * If conversion error occured, *error_str points to an error message, 687 * and NULL is returned. 688 */ 689 char * 690 g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str) 691 { 692 static char *last_codeset; 693 static iconv_t cd = (iconv_t)-1; 694 char *codeset; 695 696 *error_str = NULL; 697 698 codeset = nl_langinfo(CODESET); 699 700 if (strcmp(codeset, "UTF-8") == 0) 701 return (NULL); 702 703 if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 704 if (last_codeset != NULL) { 705 xfree(last_codeset); 706 last_codeset = NULL; 707 } 708 if (cd != (iconv_t)-1) 709 (void) iconv_close(cd); 710 711 if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) { 712 *error_str = gettext("Cannot convert UTF-8 " 713 "strings to the local codeset"); 714 return (NULL); 715 } 716 last_codeset = xstrdup(codeset); 717 } 718 return (do_iconv(cd, str, lenp, error_str)); 719 } 720 721 char * 722 g11n_convert_to_utf8(const char *str, uint_t *lenp, 723 int native, char **error_str) 724 { 725 static char *last_codeset; 726 static iconv_t cd = (iconv_t)-1; 727 char *codeset; 728 729 *error_str = NULL; 730 731 if (native) 732 codeset = native_codeset; 733 else 734 codeset = nl_langinfo(CODESET); 735 736 if (strcmp(codeset, "UTF-8") == 0) 737 return (NULL); 738 739 if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 740 if (last_codeset != NULL) { 741 xfree(last_codeset); 742 last_codeset = NULL; 743 } 744 if (cd != (iconv_t)-1) 745 (void) iconv_close(cd); 746 747 if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) { 748 *error_str = gettext("Cannot convert the " 749 "local codeset strings to UTF-8"); 750 return (NULL); 751 } 752 last_codeset = xstrdup(codeset); 753 } 754 return (do_iconv(cd, str, lenp, error_str)); 755 } 756 757 /* 758 * Wrapper around iconv() 759 * 760 * The caller is responsible for freeing the result. NULL is returned when 761 * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 762 * The caller must ensure that the input string isn't NULL pointer. 763 */ 764 static char * 765 do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str) 766 { 767 int ilen, olen; 768 size_t ileft, oleft; 769 char *ostr, *optr; 770 const char *istr; 771 772 ilen = *lenp; 773 olen = ilen + 1; 774 775 ostr = NULL; 776 for (;;) { 777 olen *= 2; 778 oleft = olen; 779 ostr = optr = xrealloc(ostr, olen); 780 istr = (const char *)str; 781 if ((ileft = ilen) == 0) 782 break; 783 784 if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) { 785 /* success: generate reset sequence */ 786 if (iconv(cd, NULL, NULL, 787 &optr, &oleft) == (size_t)-1 && errno == E2BIG) { 788 continue; 789 } 790 break; 791 } 792 /* failed */ 793 if (errno != E2BIG) { 794 oleft = olen; 795 (void) iconv(cd, NULL, NULL, &ostr, &oleft); 796 xfree(ostr); 797 *err_str = gettext("Codeset conversion failed"); 798 return (NULL); 799 } 800 } 801 olen = optr - ostr; 802 optr = xmalloc(olen + 1); 803 (void) memcpy(optr, ostr, olen); 804 xfree(ostr); 805 806 optr[olen] = '\0'; 807 *lenp = olen; 808 809 return (optr); 810 } 811 812 /* 813 * A filter for output string. Control and unprintable characters 814 * are converted into visible form (eg "\ooo"). 815 */ 816 char * 817 g11n_filter_string(char *s) 818 { 819 int mb_cur_max = MB_CUR_MAX; 820 int mblen, len; 821 char *os = s; 822 wchar_t wc; 823 char *obuf, *op; 824 825 /* all character may be converted into the form of \ooo */ 826 obuf = op = xmalloc(strlen(s) * 4 + 1); 827 828 while (*s != '\0') { 829 mblen = mbtowc(&wc, s, mb_cur_max); 830 if (mblen <= 0) { 831 mblen = 1; 832 wc = (unsigned char)*s; 833 } 834 if (!iswprint(wc) && 835 wc != L'\n' && wc != L'\r' && wc != L'\t') { 836 /* 837 * control chars which need to be replaced 838 * with safe character sequence. 839 */ 840 while (mblen != 0) { 841 op += sprintf(op, "\\%03o", 842 (unsigned char)*s++); 843 mblen--; 844 } 845 } else { 846 while (mblen != 0) { 847 *op++ = *s++; 848 mblen--; 849 } 850 } 851 } 852 *op = '\0'; 853 len = op - obuf + 1; 854 op = xrealloc(os, len); 855 (void) memcpy(op, obuf, len); 856 xfree(obuf); 857 return (op); 858 } 859 860 /* 861 * Once we negotiated with a langtag, server need to map it to a system 862 * locale. That is done based on the locale supported on the server side. 863 * We know (with the locale supported on Solaris) how the langtag is 864 * mapped to. However, from the client point of view, there is no way to 865 * know exactly what locale(encoding) will be used. 866 * 867 * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the 868 * UTF-8 characters always come over the wire, so it is no longer the problem 869 * as long as both side has the bug fix. However if the server side doesn't 870 * have the fix, client can't safely perform the code conversion since the 871 * incoming character encoding is unknown. 872 * 873 * To alleviate this situation, we take an empirical approach to find 874 * encoding from langtag. 875 * 876 * If langtag has a subtag, we can directly map the langtag to UTF-8 locale 877 * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions. 878 * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack 879 * of L10N support ..). Those are: 880 * 881 * no_NO, no_NY, sr_SP, sr_YU 882 * 883 * They all use ISO8859-X encoding. 884 * 885 * For those "xx" langtags, some of them can be mapped to "xx.UTF-8", 886 * but others cannot. So we need to use the "xx" as the locale name. 887 * Those locales are: 888 * 889 * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr 890 * 891 * Their encoding vary. They could be ISO8859-X or EUC or something else. 892 * So we don't perform code conversion for these langtags. 893 */ 894 static const char *non_utf8_langtag[] = { 895 "no-NO", "no-NY", "sr-SP", "sr-YU", 896 "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja", 897 "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL}; 898 899 void 900 g11n_test_langtag(const char *lang, int server) 901 { 902 const char **lp; 903 904 if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) { 905 /* 906 * We negotiated with real locale name (not lang tag). 907 * We shouldn't expect UTF-8, thus shouldn't do code 908 * conversion. 909 */ 910 datafellows |= SSH_BUG_STRING_ENCODING; 911 return; 912 } 913 914 if (datafellows & SSH_BUG_STRING_ENCODING) { 915 if (server) { 916 /* 917 * Whatever bug exists in the client side, server 918 * side has nothing to do, since server has no way 919 * to know what actual encoding is used on the client 920 * side. For example, even if we negotiated with 921 * en_US, client locale could be en_US.ISO8859-X or 922 * en_US.UTF-8. 923 */ 924 return; 925 } 926 /* 927 * We are on the client side. We'll check with known 928 * locales to see if non-UTF8 characters could come in. 929 */ 930 for (lp = non_utf8_langtag; *lp != NULL; lp++) { 931 if (strcmp(lang, *lp) == 0) 932 break; 933 } 934 if (*lp == NULL) { 935 debug2("Server is expected to use UTF-8 locale"); 936 datafellows &= ~SSH_BUG_STRING_ENCODING; 937 } else { 938 /* 939 * Server is expected to use non-UTF8 encoding. 940 */ 941 debug2("Enforcing no code conversion: %s", lang); 942 } 943 } 944 } 945 946 /* 947 * Free all strings in the list and then free the list itself. We know that the 948 * list ends with a NULL pointer. 949 */ 950 void 951 g11n_freelist(char **list) 952 { 953 int i = 0; 954 955 while (list[i] != NULL) { 956 xfree(list[i]); 957 i++; 958 } 959 960 xfree(list); 961 } 962