1 /*-
2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
6 * Copyright (c) 2011, 2012
7 * Zhihao Yuan. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12 #include "config.h"
13
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17
18 #include <bitstring.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <langinfo.h>
22 #include <locale.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <strings.h>
27 #include <unistd.h>
28
29 #include "common.h"
30
31 /*
32 * codeset --
33 * Get the locale encoding.
34 *
35 * PUBLIC: char * codeset(void);
36 */
37 char *
codeset(void)38 codeset(void)
39 {
40 static char *cs;
41
42 if (cs == NULL)
43 cs = nl_langinfo(CODESET);
44
45 return cs;
46 }
47
48 #ifdef USE_WIDECHAR
49 static int
raw2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,CHAR_T ** dst)50 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
51 CHAR_T **dst)
52 {
53 int i;
54 CHAR_T **tostr = &cw->bp1.wc;
55 size_t *blen = &cw->blen1;
56
57 BINC_RETW(NULL, *tostr, *blen, len);
58
59 *tolen = len;
60 for (i = 0; i < len; ++i)
61 (*tostr)[i] = (u_char) str[i];
62
63 *dst = cw->bp1.wc;
64
65 return 0;
66 }
67
68 #define CONV_BUFFER_SIZE 512
69 /* fill the buffer with codeset encoding of string pointed to by str
70 * left has the number of bytes left in str and is adjusted
71 * len contains the number of bytes put in the buffer
72 */
73 #ifdef USE_ICONV
74 #define CONVERT(str, left, src, len) \
75 do { \
76 size_t outleft; \
77 char *bp = buffer; \
78 outleft = CONV_BUFFER_SIZE; \
79 errno = 0; \
80 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \
81 == -1 && errno != E2BIG) \
82 goto err; \
83 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
84 error = -left; \
85 goto err; \
86 } \
87 src = buffer; \
88 } while (0)
89
90 #define IC_RESET() \
91 do { \
92 if (id != (iconv_t)-1) \
93 iconv(id, NULL, NULL, NULL, NULL); \
94 } while(0)
95 #else
96 #define CONVERT(str, left, src, len)
97 #define IC_RESET()
98 #endif
99
100 static int
default_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,CHAR_T ** dst,iconv_t id)101 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
102 size_t *tolen, CHAR_T **dst, iconv_t id)
103 {
104 size_t i = 0, j;
105 CHAR_T **tostr = &cw->bp1.wc;
106 size_t *blen = &cw->blen1;
107 mbstate_t mbs;
108 size_t n;
109 ssize_t nlen = len;
110 char *src = (char *)str;
111 #ifdef USE_ICONV
112 char buffer[CONV_BUFFER_SIZE];
113 #endif
114 size_t left = len;
115 int error = 1;
116
117 memset(&mbs, 0, sizeof(mbs));
118 BINC_RETW(NULL, *tostr, *blen, nlen);
119
120 #ifdef USE_ICONV
121 if (id != (iconv_t)-1)
122 CONVERT(str, left, src, len);
123 #endif
124
125 for (i = 0, j = 0; j < len; ) {
126 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
127 /* NULL character converted */
128 if (n == -2)
129 error = -(len-j);
130 if (n == -1 || n == -2)
131 goto err;
132 if (n == 0)
133 n = 1;
134 j += n;
135 if (++i >= *blen) {
136 nlen += 256;
137 BINC_RETW(NULL, *tostr, *blen, nlen);
138 }
139 if (id != (iconv_t)-1 && j == len && left) {
140 CONVERT(str, left, src, len);
141 j = 0;
142 }
143 }
144
145 error = 0;
146 err:
147 *tolen = i;
148 *dst = cw->bp1.wc;
149 IC_RESET();
150
151 return error;
152 }
153
154 static int
fe_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,CHAR_T ** dst)155 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
156 CHAR_T **dst)
157 {
158 return default_char2int(sp, str, len, cw, tolen, dst,
159 sp->conv.id[IC_FE_CHAR2INT]);
160 }
161
162 static int
ie_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,CHAR_T ** dst)163 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
164 CHAR_T **dst)
165 {
166 return default_char2int(sp, str, len, cw, tolen, dst,
167 sp->conv.id[IC_IE_CHAR2INT]);
168 }
169
170 static int
cs_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,CHAR_T ** dst)171 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
172 CHAR_T **dst)
173 {
174 return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
175 }
176
177 static int
int2raw(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,char ** dst)178 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
179 char **dst)
180 {
181 int i;
182 char **tostr = &cw->bp1.c;
183 size_t *blen = &cw->blen1;
184
185 BINC_RETC(NULL, *tostr, *blen, len);
186
187 *tolen = len;
188 for (i = 0; i < len; ++i)
189 (*tostr)[i] = str[i];
190
191 *dst = cw->bp1.c;
192
193 return 0;
194 }
195
196 static int
default_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,char ** pdst,iconv_t id)197 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
198 size_t *tolen, char **pdst, iconv_t id)
199 {
200 size_t i, j, offset = 0;
201 char **tostr = &cw->bp1.c;
202 size_t *blen = &cw->blen1;
203 mbstate_t mbs;
204 size_t n;
205 ssize_t nlen = len + MB_CUR_MAX;
206 char *dst;
207 size_t buflen;
208 #ifdef USE_ICONV
209 char buffer[CONV_BUFFER_SIZE];
210 #endif
211 int error = 1;
212
213 /* convert first len bytes of buffer and append it to cw->bp
214 * len is adjusted => 0
215 * offset contains the offset in cw->bp and is adjusted
216 * cw->bp is grown as required
217 */
218 #ifdef USE_ICONV
219 #define CONVERT2(_buffer, lenp, cw, offset) \
220 do { \
221 char *bp = _buffer; \
222 int ret; \
223 do { \
224 size_t outleft = cw->blen1 - offset; \
225 char *obp = cw->bp1.c + offset; \
226 if (cw->blen1 < offset + MB_CUR_MAX) { \
227 nlen += 256; \
228 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \
229 nlen); \
230 } \
231 errno = 0; \
232 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \
233 &outleft); \
234 if (ret == -1 && errno != E2BIG) \
235 goto err; \
236 offset = cw->blen1 - outleft; \
237 } while (ret != 0); \
238 } while (0)
239 #else
240 #define CONVERT2(_buffer, lenp, cw, offset)
241 #endif
242
243
244 memset(&mbs, 0, sizeof(mbs));
245 BINC_RETC(NULL, *tostr, *blen, nlen);
246 dst = *tostr; buflen = *blen;
247
248 #ifdef USE_ICONV
249 if (id != (iconv_t)-1) {
250 dst = buffer; buflen = CONV_BUFFER_SIZE;
251 }
252 #endif
253
254 for (i = 0, j = 0; i < len; ++i) {
255 n = wcrtomb(dst+j, str[i], &mbs);
256 if (n == -1)
257 goto err;
258 j += n;
259 if (buflen < j + MB_CUR_MAX) {
260 if (id != (iconv_t)-1) {
261 CONVERT2(buffer, &j, cw, offset);
262 } else {
263 nlen += 256;
264 BINC_RETC(NULL, *tostr, *blen, nlen);
265 dst = *tostr; buflen = *blen;
266 }
267 }
268 }
269
270 n = wcrtomb(dst+j, L'\0', &mbs);
271 j += n - 1; /* don't count NUL at the end */
272 *tolen = j;
273
274 if (id != (iconv_t)-1) {
275 CONVERT2(buffer, &j, cw, offset);
276 /* back to the initial state */
277 CONVERT2(NULL, NULL, cw, offset);
278 *tolen = offset;
279 }
280
281 error = 0;
282 err:
283 if (error)
284 *tolen = j;
285 *pdst = cw->bp1.c;
286 IC_RESET();
287
288 return error;
289 }
290
291 static int
fe_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,char ** dst)292 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
293 size_t *tolen, char **dst)
294 {
295 return default_int2char(sp, str, len, cw, tolen, dst,
296 sp->conv.id[IC_FE_INT2CHAR]);
297 }
298
299 static int
cs_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,char ** dst)300 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
301 size_t *tolen, char **dst)
302 {
303 return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
304 }
305
306 #endif
307
308 /*
309 * conv_init --
310 * Initialize the iconv environment.
311 *
312 * PUBLIC: void conv_init(SCR *, SCR *);
313 */
314 void
conv_init(SCR * orig,SCR * sp)315 conv_init(SCR *orig, SCR *sp)
316 {
317 int i;
318
319 if (orig == NULL)
320 setlocale(LC_ALL, "");
321 if (orig != NULL)
322 memmove(&sp->conv, &orig->conv, sizeof(CONV));
323 #ifdef USE_WIDECHAR
324 else {
325 char *ctype = setlocale(LC_CTYPE, NULL);
326
327 /*
328 * XXX
329 * This hack fixes the libncursesw issue on FreeBSD.
330 */
331 if (!strcmp(ctype, "ko_KR.CP949"))
332 setlocale(LC_CTYPE, "ko_KR.eucKR");
333 else if (!strcmp(ctype, "zh_CN.GB2312"))
334 setlocale(LC_CTYPE, "zh_CN.eucCN");
335 else if (!strcmp(ctype, "zh_CN.GBK"))
336 setlocale(LC_CTYPE, "zh_CN.GB18030");
337
338 /*
339 * Switch to 8bit mode if locale is C;
340 * LC_CTYPE should be reseted to C if unmatched.
341 */
342 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
343 sp->conv.sys2int = sp->conv.file2int = raw2int;
344 sp->conv.int2sys = sp->conv.int2file = int2raw;
345 sp->conv.input2int = raw2int;
346 } else {
347 sp->conv.sys2int = cs_char2int;
348 sp->conv.int2sys = cs_int2char;
349 sp->conv.file2int = fe_char2int;
350 sp->conv.int2file = fe_int2char;
351 sp->conv.input2int = ie_char2int;
352 }
353 #ifdef USE_ICONV
354 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
355 #endif
356 }
357 #endif
358
359 /* iconv descriptors must be distinct to screens. */
360 for (i = 0; i <= IC_IE_TO_UTF16; ++i)
361 sp->conv.id[i] = (iconv_t)-1;
362 #ifdef USE_ICONV
363 conv_enc(sp, O_INPUTENCODING, 0);
364 #endif
365 }
366
367 /*
368 * conv_enc --
369 * Convert file/input encoding.
370 *
371 * PUBLIC: int conv_enc(SCR *, int, char *);
372 */
373 int
conv_enc(SCR * sp,int option,char * enc)374 conv_enc(SCR *sp, int option, char *enc)
375 {
376 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
377 iconv_t *c2w, *w2c;
378 iconv_t id_c2w, id_w2c;
379
380 switch (option) {
381 case O_FILEENCODING:
382 c2w = sp->conv.id + IC_FE_CHAR2INT;
383 w2c = sp->conv.id + IC_FE_INT2CHAR;
384 if (!enc)
385 enc = O_STR(sp, O_FILEENCODING);
386
387 if (strcasecmp(codeset(), enc)) {
388 if ((id_c2w = iconv_open(codeset(), enc)) ==
389 (iconv_t)-1)
390 goto err;
391 if ((id_w2c = iconv_open(enc, codeset())) ==
392 (iconv_t)-1)
393 goto err;
394 } else {
395 id_c2w = (iconv_t)-1;
396 id_w2c = (iconv_t)-1;
397 }
398
399 break;
400
401 case O_INPUTENCODING:
402 c2w = sp->conv.id + IC_IE_CHAR2INT;
403 w2c = sp->conv.id + IC_IE_TO_UTF16;
404 if (!enc)
405 enc = O_STR(sp, O_INPUTENCODING);
406
407 if (strcasecmp(codeset(), enc)) {
408 if ((id_c2w = iconv_open(codeset(), enc)) ==
409 (iconv_t)-1)
410 goto err;
411 } else
412 id_c2w = (iconv_t)-1;
413
414 /* UTF-16 can not be locale and can not be inputed. */
415 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
416 goto err;
417
418 break;
419
420 default:
421 abort();
422 }
423
424 if (*c2w != (iconv_t)-1)
425 iconv_close(*c2w);
426 if (*w2c != (iconv_t)-1)
427 iconv_close(*w2c);
428
429 *c2w = id_c2w;
430 *w2c = id_w2c;
431
432 F_CLR(sp, SC_CONV_ERROR);
433 F_SET(sp, SC_SCR_REFORMAT);
434
435 return 0;
436 err:
437 #endif
438 switch (option) {
439 case O_FILEENCODING:
440 msgq(sp, M_ERR, "321|File encoding conversion not supported");
441 break;
442 case O_INPUTENCODING:
443 msgq(sp, M_ERR, "322|Input encoding conversion not supported");
444 break;
445 }
446 return 1;
447 }
448
449 /*
450 * conv_end --
451 * Close the iconv descriptors, release the buffer.
452 *
453 * PUBLIC: void conv_end(SCR *);
454 */
455 void
conv_end(SCR * sp)456 conv_end(SCR *sp)
457 {
458 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
459 int i;
460 for (i = 0; i <= IC_IE_TO_UTF16; ++i)
461 if (sp->conv.id[i] != (iconv_t)-1)
462 iconv_close(sp->conv.id[i]);
463 free(sp->cw.bp1.c);
464 #endif
465 }
466