1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
22 * Use is subject to license terms.
23 */
24
25 #include <errno.h>
26 #include <locale.h>
27 #include <langinfo.h>
28 #include <iconv.h>
29 #include <ctype.h>
30 #include <wctype.h>
31 #include <strings.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include "includes.h"
36 #include "xmalloc.h"
37 #include "xlist.h"
38 #include "compat.h"
39 #include "log.h"
40
41 #ifdef MIN
42 #undef MIN
43 #endif /* MIN */
44
45 #define MIN(x, y) ((x) < (y) ? (x) : (y))
46
47 #define LOCALE_PATH "/usr/bin/locale"
48
49 /* two-char country code, '-' and two-char region code */
50 #define LANGTAG_MAX 5
51
52 static int locale_cmp(const void *d1, const void *d2);
53 static char *g11n_locale2langtag(char *locale);
54
55 static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
56
57 /*
58 * native_codeset records the codeset of the default system locale.
59 * It is used to convert the contents of file (eg /etc/issue) which is
60 * supposed to be in the codeset of default system locale.
61 */
62 static char *native_codeset;
63
64 /*
65 * Convert locale string name into a language tag. The caller is responsible for
66 * freeing the memory allocated for the result.
67 */
68 static char *
g11n_locale2langtag(char * locale)69 g11n_locale2langtag(char *locale)
70 {
71 char *langtag;
72
73 /* base cases */
74 if (!locale || !*locale)
75 return (NULL);
76
77 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
78 return (xstrdup("i-default"));
79
80 /* punt for language codes which are not exactly 2 letters */
81 if (strlen(locale) < 2 ||
82 !isalpha(locale[0]) ||
83 !isalpha(locale[1]) ||
84 (locale[2] != '\0' &&
85 locale[2] != '_' &&
86 locale[2] != '.' &&
87 locale[2] != '@'))
88 return (NULL);
89
90
91 /* we have a primary language sub-tag */
92 langtag = (char *)xmalloc(LANGTAG_MAX + 1);
93
94 strncpy(langtag, locale, 2);
95 langtag[2] = '\0';
96
97 /* do we have country sub-tag? For example: cs_CZ */
98 if (locale[2] == '_') {
99 if (strlen(locale) < 5 ||
100 !isalpha(locale[3]) ||
101 !isalpha(locale[4]) ||
102 (locale[5] != '\0' && (locale[5] != '.' &&
103 locale[5] != '@'))) {
104 return (langtag);
105 }
106
107 /* example: create cs-CZ from cs_CZ */
108 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
109 locale + 3) == 5)
110 return (langtag);
111 }
112
113 /* in all other cases we just use the primary language sub-tag */
114 return (langtag);
115 }
116
117 uint_t
g11n_langtag_is_default(char * langtag)118 g11n_langtag_is_default(char *langtag)
119 {
120 return (strcmp(langtag, "i-default") == 0);
121 }
122
123 /*
124 * This lang tag / locale matching function works only for two-character
125 * language primary sub-tags and two-character country sub-tags.
126 */
127 uint_t
g11n_langtag_matches_locale(char * langtag,char * locale)128 g11n_langtag_matches_locale(char *langtag, char *locale)
129 {
130 /* match "i-default" to the process' current locale if possible */
131 if (g11n_langtag_is_default(langtag)) {
132 if (strcasecmp(locale, "POSIX") == 0 ||
133 strcasecmp(locale, "C") == 0)
134 return (1);
135 else
136 return (0);
137 }
138
139 /*
140 * locale must be at least 2 chars long and the lang part must be
141 * exactly two characters
142 */
143 if (strlen(locale) < 2 ||
144 (!isalpha(locale[0]) || !isalpha(locale[1]) ||
145 (locale[2] != '\0' && locale[2] != '_' &&
146 locale[2] != '.' && locale[2] != '@')))
147 return (0);
148
149 /* same thing with the langtag */
150 if (strlen(langtag) < 2 ||
151 (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
152 (langtag[2] != '\0' && langtag[2] != '-')))
153 return (0);
154
155 /* primary language sub-tag and the locale's language part must match */
156 if (strncasecmp(langtag, locale, 2) != 0)
157 return (0);
158
159 /*
160 * primary language sub-tag and the locale's language match, now
161 * fuzzy check country part
162 */
163
164 /* neither langtag nor locale have more than one component */
165 if (langtag[2] == '\0' &&
166 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
167 return (2);
168
169 /* langtag has only one sub-tag... */
170 if (langtag[2] == '\0')
171 return (1);
172
173 /* locale has no country code... */
174 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
175 return (1);
176
177 /* langtag has more than one subtag and the locale has a country code */
178
179 /* ignore second subtag if not two chars */
180 if (strlen(langtag) < 5)
181 return (1);
182
183 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
184 (langtag[5] != '\0' && langtag[5] != '-'))
185 return (1);
186
187 /* ignore rest of locale if there is no two-character country part */
188 if (strlen(locale) < 5)
189 return (1);
190
191 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
192 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
193 return (1);
194
195 /* if the country part matches, return 2 */
196 if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
197 return (2);
198
199 return (1);
200 }
201
202 char *
g11n_getlocale()203 g11n_getlocale()
204 {
205 /* we have one text domain - always set it */
206 (void) textdomain(TEXT_DOMAIN);
207
208 /* if the locale is not set, set it from the env vars */
209 if (!setlocale(LC_MESSAGES, NULL))
210 (void) setlocale(LC_MESSAGES, "");
211
212 return (setlocale(LC_MESSAGES, NULL));
213 }
214
215 void
g11n_setlocale(int category,const char * locale)216 g11n_setlocale(int category, const char *locale)
217 {
218 char *curr;
219
220 if (native_codeset == NULL) {
221 /* set default locale, and record current codeset */
222 (void) setlocale(LC_ALL, "");
223 curr = nl_langinfo(CODESET);
224 native_codeset = xstrdup(curr);
225 }
226
227 /* we have one text domain - always set it */
228 (void) textdomain(TEXT_DOMAIN);
229
230 if (!locale)
231 return;
232
233 if (*locale && ((curr = setlocale(category, NULL))) &&
234 strcmp(curr, locale) == 0)
235 return;
236
237 /* if <category> is bogus, setlocale() will do nothing */
238 (void) setlocale(category, locale);
239 }
240
241 char **
g11n_getlocales()242 g11n_getlocales()
243 {
244 FILE *locale_out;
245 uint_t n_elems, list_size, long_line = 0;
246 char **list;
247 char locale[64]; /* 64 bytes is plenty for locale names */
248
249 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
250 return (NULL);
251
252 /*
253 * start with enough room for 65 locales - that's a lot fewer than
254 * all the locales available for installation, but a lot more than
255 * what most users will need and install
256 */
257 n_elems = 0;
258 list_size = 192;
259 list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
260 memset(list, 0, sizeof (char *) * (list_size + 1));
261
262 while (fgets(locale, sizeof (locale), locale_out)) {
263 /* skip long locale names (if any) */
264 if (!strchr(locale, '\n')) {
265 long_line = 1;
266 continue;
267 } else if (long_line) {
268 long_line = 0;
269 continue;
270 }
271
272 if (strncmp(locale, "iso_8859", 8) == 0)
273 /* ignore locale names like "iso_8859-1" */
274 continue;
275
276 if (n_elems == list_size) {
277 list_size *= 2;
278 list = (char **)xrealloc((void *) list,
279 (list_size + 1) * sizeof (char *));
280 memset(&list[n_elems + 1], 0,
281 sizeof (char *) * (list_size - n_elems + 1));
282 }
283
284 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */
285 list[n_elems++] = xstrdup(locale);
286 }
287
288 (void) pclose(locale_out);
289
290 if (n_elems == 0) {
291 xfree(list);
292 return (NULL);
293 }
294
295 list[n_elems] = NULL;
296
297 qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
298 return (list);
299 }
300
301 char *
g11n_getlangs()302 g11n_getlangs()
303 {
304 char *locale;
305
306 if (getenv("SSH_LANGS"))
307 return (xstrdup(getenv("SSH_LANGS")));
308
309 locale = g11n_getlocale();
310
311 if (!locale || !*locale)
312 return (xstrdup("i-default"));
313
314 return (g11n_locale2langtag(locale));
315 }
316
317 char *
g11n_locales2langs(char ** locale_set)318 g11n_locales2langs(char **locale_set)
319 {
320 char **p, **r, **q;
321 char *langtag, *langs;
322 int locales, skip;
323
324 for (locales = 0, p = locale_set; p && *p; p++)
325 locales++;
326
327 r = (char **)xmalloc((locales + 1) * sizeof (char *));
328 memset(r, 0, (locales + 1) * sizeof (char *));
329
330 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
331 skip = 0;
332 if ((langtag = g11n_locale2langtag(*p)) == NULL)
333 continue;
334 for (q = r; (q - r) < locales; q++) {
335 if (!*q)
336 break;
337 if (*q && strcmp(*q, langtag) == 0)
338 skip = 1;
339 }
340 if (!skip)
341 *(q++) = langtag;
342 else
343 xfree(langtag);
344 *q = NULL;
345 }
346
347 langs = xjoin(r, ',');
348 g11n_freelist(r);
349
350 return (langs);
351 }
352
353 static int
sortcmp(const void * d1,const void * d2)354 sortcmp(const void *d1, const void *d2)
355 {
356 char *s1 = *(char **)d1;
357 char *s2 = *(char **)d2;
358
359 return (strcmp(s1, s2));
360 }
361
362 int
g11n_langtag_match(char * langtag1,char * langtag2)363 g11n_langtag_match(char *langtag1, char *langtag2)
364 {
365 int len1, len2;
366 char c1, c2;
367
368 len1 = (strchr(langtag1, '-')) ?
369 (strchr(langtag1, '-') - langtag1)
370 : strlen(langtag1);
371
372 len2 = (strchr(langtag2, '-')) ?
373 (strchr(langtag2, '-') - langtag2)
374 : strlen(langtag2);
375
376 /* no match */
377 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
378 return (0);
379
380 c1 = *(langtag1 + len1);
381 c2 = *(langtag2 + len2);
382
383 /* no country sub-tags - exact match */
384 if (c1 == '\0' && c2 == '\0')
385 return (2);
386
387 /* one langtag has a country sub-tag, the other doesn't */
388 if (c1 == '\0' || c2 == '\0')
389 return (1);
390
391 /* can't happen - both langtags have a country sub-tag */
392 if (c1 != '-' || c2 != '-')
393 return (1);
394
395 /* compare country subtags */
396 langtag1 = langtag1 + len1 + 1;
397 langtag2 = langtag2 + len2 + 1;
398
399 len1 = (strchr(langtag1, '-')) ?
400 (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
401
402 len2 = (strchr(langtag2, '-')) ?
403 (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
404
405 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
406 return (1);
407
408 /* country tags matched - exact match */
409 return (2);
410 }
411
412 char *
g11n_langtag_set_intersect(char * set1,char * set2)413 g11n_langtag_set_intersect(char *set1, char *set2)
414 {
415 char **list1, **list2, **list3, **p, **q, **r;
416 char *set3, *lang_subtag;
417 uint_t n1, n2, n3;
418 uint_t do_append;
419
420 list1 = xsplit(set1, ',');
421 list2 = xsplit(set2, ',');
422
423 for (n1 = 0, p = list1; p && *p; p++, n1++)
424 ;
425 for (n2 = 0, p = list2; p && *p; p++, n2++)
426 ;
427
428 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
429 *list3 = NULL;
430
431 /*
432 * we must not sort the user langtags - sorting or not the server's
433 * should not affect the outcome
434 */
435 qsort(list2, n2, sizeof (char *), sortcmp);
436
437 for (n3 = 0, p = list1; p && *p; p++) {
438 do_append = 0;
439 for (q = list2; q && *q; q++) {
440 if (g11n_langtag_match(*p, *q) != 2) continue;
441 /* append element */
442 for (r = list3; (r - list3) <= (n1 + n2); r++) {
443 do_append = 1;
444 if (!*r)
445 break;
446 if (strcmp(*p, *r) == 0) {
447 do_append = 0;
448 break;
449 }
450 }
451 if (do_append && n3 <= (n1 + n2)) {
452 list3[n3++] = xstrdup(*p);
453 list3[n3] = NULL;
454 }
455 }
456 }
457
458 for (p = list1; p && *p; p++) {
459 do_append = 0;
460 for (q = list2; q && *q; q++) {
461 if (g11n_langtag_match(*p, *q) != 1)
462 continue;
463
464 /* append element */
465 lang_subtag = xstrdup(*p);
466 if (strchr(lang_subtag, '-'))
467 *(strchr(lang_subtag, '-')) = '\0';
468 for (r = list3; (r - list3) <= (n1 + n2); r++) {
469 do_append = 1;
470 if (!*r)
471 break;
472 if (strcmp(lang_subtag, *r) == 0) {
473 do_append = 0;
474 break;
475 }
476 }
477 if (do_append && n3 <= (n1 + n2)) {
478 list3[n3++] = lang_subtag;
479 list3[n3] = NULL;
480 } else
481 xfree(lang_subtag);
482 }
483 }
484
485 set3 = xjoin(list3, ',');
486 xfree_split_list(list1);
487 xfree_split_list(list2);
488 xfree_split_list(list3);
489
490 return (set3);
491 }
492
493 char *
g11n_clnt_langtag_negotiate(char * clnt_langtags,char * srvr_langtags)494 g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
495 {
496 char *list, *result;
497 char **xlist;
498
499 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
500 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
501
502 if (!list)
503 return (NULL);
504
505 xlist = xsplit(list, ',');
506
507 xfree(list);
508
509 if (!xlist || !*xlist)
510 return (NULL);
511
512 result = xstrdup(*xlist);
513 xfree_split_list(xlist);
514
515 return (result);
516 }
517
518 /*
519 * Compare locales, preferring UTF-8 codesets to others, otherwise doing
520 * a stright strcmp()
521 */
522 static int
locale_cmp(const void * d1,const void * d2)523 locale_cmp(const void *d1, const void *d2)
524 {
525 char *dot_ptr;
526 char *s1 = *(char **)d1;
527 char *s2 = *(char **)d2;
528 int s1_is_utf8 = 0;
529 int s2_is_utf8 = 0;
530
531 /* check if s1 is a UTF-8 locale */
532 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
533 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
534 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
535 s1_is_utf8++;
536 }
537
538 /* check if s2 is a UTF-8 locale */
539 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
540 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
541 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
542 s2_is_utf8++;
543 }
544
545 /* prefer UTF-8 locales */
546 if (s1_is_utf8 && !s2_is_utf8)
547 return (-1);
548
549 if (s2_is_utf8 && !s1_is_utf8)
550 return (1);
551
552 /* prefer any locale over the default locales */
553 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
554 strcmp(s1, "common") == 0) {
555 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
556 strcmp(s2, "common") != 0)
557 return (1);
558 }
559
560 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
561 strcmp(s2, "common") == 0) {
562 if (strcmp(s1, "C") != 0 &&
563 strcmp(s1, "POSIX") != 0 &&
564 strcmp(s1, "common") != 0)
565 return (-1);
566 }
567
568 return (strcmp(s1, s2));
569 }
570
571
572 char **
g11n_langtag_set_locale_set_intersect(char * langtag_set,char ** locale_set)573 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
574 {
575 char **langtag_list, **result, **p, **q, **r;
576 char *s;
577 uint_t do_append, n_langtags, n_locales, n_results, max_results;
578
579 if (locale_set == NULL)
580 return (NULL);
581
582 /* count lang tags and locales */
583 for (n_locales = 0, p = locale_set; p && *p; p++)
584 n_locales++;
585
586 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
587 /* count the number of langtags */
588 for (; s = strchr(s, ','); s++, n_langtags++)
589 ;
590
591 qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
592
593 langtag_list = xsplit(langtag_set, ',');
594 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
595 ;
596
597 max_results = MIN(n_locales, n_langtags) * 2;
598 result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
599 *result = NULL;
600 n_results = 0;
601
602 /* more specific matches first */
603 for (p = langtag_list; p && *p; p++) {
604 do_append = 0;
605 for (q = locale_set; q && *q; q++) {
606 if (g11n_langtag_matches_locale(*p, *q) == 2) {
607 do_append = 1;
608 for (r = result; (r - result) <=
609 MIN(n_locales, n_langtags); r++) {
610 if (!*r)
611 break;
612 if (strcmp(*q, *r) == 0) {
613 do_append = 0;
614 break;
615 }
616 }
617 if (do_append && n_results < max_results) {
618 result[n_results++] = xstrdup(*q);
619 result[n_results] = NULL;
620 }
621 break;
622 }
623 }
624 }
625
626 for (p = langtag_list; p && *p; p++) {
627 do_append = 0;
628 for (q = locale_set; q && *q; q++) {
629 if (g11n_langtag_matches_locale(*p, *q) == 1) {
630 do_append = 1;
631 for (r = result; (r - result) <=
632 MIN(n_locales, n_langtags); r++) {
633 if (!*r)
634 break;
635 if (strcmp(*q, *r) == 0) {
636 do_append = 0;
637 break;
638 }
639 }
640 if (do_append && n_results < max_results) {
641 result[n_results++] = xstrdup(*q);
642 result[n_results] = NULL;
643 }
644 break;
645 }
646 }
647 }
648
649 xfree_split_list(langtag_list);
650
651 return (result);
652 }
653
654 char *
g11n_srvr_locale_negotiate(char * clnt_langtags,char ** srvr_locales)655 g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
656 {
657 char **results, **locales, *result = NULL;
658
659 if (srvr_locales == NULL)
660 locales = g11n_getlocales();
661 else
662 locales = srvr_locales;
663
664 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
665 locales)) == NULL)
666 goto err;
667
668 if (*results != NULL)
669 result = xstrdup(*results);
670
671 xfree_split_list(results);
672
673 err:
674 if (locales != NULL && locales != srvr_locales)
675 g11n_freelist(locales);
676 return (result);
677 }
678
679 /*
680 * Functions for converting to UTF-8 from the local codeset and
681 * converting from UTF-8 to the local codeset.
682 *
683 * The error_str parameter is an pointer to a char variable where to
684 * store a string suitable for use with error() or fatal() or friends.
685 * It is also used for an error indicator when NULL is returned.
686 *
687 * If conversion isn't necessary, *error_str is set to NULL, and
688 * NULL is returned.
689 * If conversion error occured, *error_str points to an error message,
690 * and NULL is returned.
691 */
692 char *
g11n_convert_from_utf8(const char * str,uint_t * lenp,char ** error_str)693 g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
694 {
695 static char *last_codeset;
696 static iconv_t cd = (iconv_t)-1;
697 char *codeset;
698
699 *error_str = NULL;
700
701 codeset = nl_langinfo(CODESET);
702
703 if (strcmp(codeset, "UTF-8") == 0)
704 return (NULL);
705
706 if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
707 if (last_codeset != NULL) {
708 xfree(last_codeset);
709 last_codeset = NULL;
710 }
711 if (cd != (iconv_t)-1)
712 (void) iconv_close(cd);
713
714 if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
715 *error_str = gettext("Cannot convert UTF-8 "
716 "strings to the local codeset");
717 return (NULL);
718 }
719 last_codeset = xstrdup(codeset);
720 }
721 return (do_iconv(cd, str, lenp, error_str));
722 }
723
724 char *
g11n_convert_to_utf8(const char * str,uint_t * lenp,int native,char ** error_str)725 g11n_convert_to_utf8(const char *str, uint_t *lenp,
726 int native, char **error_str)
727 {
728 static char *last_codeset;
729 static iconv_t cd = (iconv_t)-1;
730 char *codeset;
731
732 *error_str = NULL;
733
734 if (native)
735 codeset = native_codeset;
736 else
737 codeset = nl_langinfo(CODESET);
738
739 if (strcmp(codeset, "UTF-8") == 0)
740 return (NULL);
741
742 if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
743 if (last_codeset != NULL) {
744 xfree(last_codeset);
745 last_codeset = NULL;
746 }
747 if (cd != (iconv_t)-1)
748 (void) iconv_close(cd);
749
750 if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
751 *error_str = gettext("Cannot convert the "
752 "local codeset strings to UTF-8");
753 return (NULL);
754 }
755 last_codeset = xstrdup(codeset);
756 }
757 return (do_iconv(cd, str, lenp, error_str));
758 }
759
760 /*
761 * Wrapper around iconv()
762 *
763 * The caller is responsible for freeing the result. NULL is returned when
764 * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
765 * The caller must ensure that the input string isn't NULL pointer.
766 */
767 static char *
do_iconv(iconv_t cd,const char * str,uint_t * lenp,char ** err_str)768 do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
769 {
770 int ilen, olen;
771 size_t ileft, oleft;
772 char *ostr, *optr;
773 const char *istr;
774
775 ilen = *lenp;
776 olen = ilen + 1;
777
778 ostr = NULL;
779 for (;;) {
780 olen *= 2;
781 oleft = olen;
782 ostr = optr = xrealloc(ostr, olen);
783 istr = (const char *)str;
784 if ((ileft = ilen) == 0)
785 break;
786
787 if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
788 /* success: generate reset sequence */
789 if (iconv(cd, NULL, NULL,
790 &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
791 continue;
792 }
793 break;
794 }
795 /* failed */
796 if (errno != E2BIG) {
797 oleft = olen;
798 (void) iconv(cd, NULL, NULL, &ostr, &oleft);
799 xfree(ostr);
800 *err_str = gettext("Codeset conversion failed");
801 return (NULL);
802 }
803 }
804 olen = optr - ostr;
805 optr = xmalloc(olen + 1);
806 (void) memcpy(optr, ostr, olen);
807 xfree(ostr);
808
809 optr[olen] = '\0';
810 *lenp = olen;
811
812 return (optr);
813 }
814
815 /*
816 * A filter for output string. Control and unprintable characters
817 * are converted into visible form (eg "\ooo").
818 */
819 char *
g11n_filter_string(char * s)820 g11n_filter_string(char *s)
821 {
822 int mb_cur_max = MB_CUR_MAX;
823 int mblen, len;
824 char *os = s;
825 wchar_t wc;
826 char *obuf, *op;
827
828 /* all character may be converted into the form of \ooo */
829 obuf = op = xmalloc(strlen(s) * 4 + 1);
830
831 while (*s != '\0') {
832 mblen = mbtowc(&wc, s, mb_cur_max);
833 if (mblen <= 0) {
834 mblen = 1;
835 wc = (unsigned char)*s;
836 }
837 if (!iswprint(wc) &&
838 wc != L'\n' && wc != L'\r' && wc != L'\t') {
839 /*
840 * control chars which need to be replaced
841 * with safe character sequence.
842 */
843 while (mblen != 0) {
844 op += sprintf(op, "\\%03o",
845 (unsigned char)*s++);
846 mblen--;
847 }
848 } else {
849 while (mblen != 0) {
850 *op++ = *s++;
851 mblen--;
852 }
853 }
854 }
855 *op = '\0';
856 len = op - obuf + 1;
857 op = xrealloc(os, len);
858 (void) memcpy(op, obuf, len);
859 xfree(obuf);
860 return (op);
861 }
862
863 /*
864 * Once we negotiated with a langtag, server need to map it to a system
865 * locale. That is done based on the locale supported on the server side.
866 * We know (with the locale supported on Solaris) how the langtag is
867 * mapped to. However, from the client point of view, there is no way to
868 * know exactly what locale(encoding) will be used.
869 *
870 * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
871 * UTF-8 characters always come over the wire, so it is no longer the problem
872 * as long as both side has the bug fix. However if the server side doesn't
873 * have the fix, client can't safely perform the code conversion since the
874 * incoming character encoding is unknown.
875 *
876 * To alleviate this situation, we take an empirical approach to find
877 * encoding from langtag.
878 *
879 * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
880 * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
881 * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
882 * of L10N support ..). Those are:
883 *
884 * no_NO, no_NY, sr_SP, sr_YU
885 *
886 * They all use ISO8859-X encoding.
887 *
888 * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
889 * but others cannot. So we need to use the "xx" as the locale name.
890 * Those locales are:
891 *
892 * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
893 *
894 * Their encoding vary. They could be ISO8859-X or EUC or something else.
895 * So we don't perform code conversion for these langtags.
896 */
897 static const char *non_utf8_langtag[] = {
898 "no-NO", "no-NY", "sr-SP", "sr-YU",
899 "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
900 "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
901
902 void
g11n_test_langtag(const char * lang,int server)903 g11n_test_langtag(const char *lang, int server)
904 {
905 const char **lp;
906
907 if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
908 /*
909 * We negotiated with real locale name (not lang tag).
910 * We shouldn't expect UTF-8, thus shouldn't do code
911 * conversion.
912 */
913 datafellows |= SSH_BUG_STRING_ENCODING;
914 return;
915 }
916
917 if (datafellows & SSH_BUG_STRING_ENCODING) {
918 if (server) {
919 /*
920 * Whatever bug exists in the client side, server
921 * side has nothing to do, since server has no way
922 * to know what actual encoding is used on the client
923 * side. For example, even if we negotiated with
924 * en_US, client locale could be en_US.ISO8859-X or
925 * en_US.UTF-8.
926 */
927 return;
928 }
929 /*
930 * We are on the client side. We'll check with known
931 * locales to see if non-UTF8 characters could come in.
932 */
933 for (lp = non_utf8_langtag; *lp != NULL; lp++) {
934 if (strcmp(lang, *lp) == 0)
935 break;
936 }
937 if (*lp == NULL) {
938 debug2("Server is expected to use UTF-8 locale");
939 datafellows &= ~SSH_BUG_STRING_ENCODING;
940 } else {
941 /*
942 * Server is expected to use non-UTF8 encoding.
943 */
944 debug2("Enforcing no code conversion: %s", lang);
945 }
946 }
947 }
948
949 /*
950 * Free all strings in the list and then free the list itself. We know that the
951 * list ends with a NULL pointer.
952 */
953 void
g11n_freelist(char ** list)954 g11n_freelist(char **list)
955 {
956 int i = 0;
957
958 while (list[i] != NULL) {
959 xfree(list[i]);
960 i++;
961 }
962
963 xfree(list);
964 }
965