xref: /freebsd/contrib/nvi/common/conv.c (revision 8d20be1e22095c27faf8fe8b2f0d089739cc742e)
1 /*-
2  * Copyright (c) 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  * Copyright (c) 2011, 2012
7  *	Zhihao Yuan.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #ifndef lint
15 static const char sccsid[] = "$Id: conv.c,v 2.39 2013/07/01 23:28:13 zy Exp $";
16 #endif /* not lint */
17 
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/time.h>
21 
22 #include <bitstring.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <langinfo.h>
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <strings.h>
31 #include <unistd.h>
32 
33 #include "common.h"
34 
35 /*
36  * codeset --
37  *	Get the locale encoding.
38  *
39  * PUBLIC: char * codeset __P((void));
40  */
41 char *
42 codeset(void) {
43     static char *cs;
44 
45     if (cs == NULL)
46 	cs = nl_langinfo(CODESET);
47     return cs;
48 }
49 
50 #ifdef USE_WIDECHAR
51 static int
52 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
53 	size_t *tolen, CHAR_T **dst)
54 {
55     int i;
56     CHAR_T **tostr = &cw->bp1.wc;
57     size_t  *blen = &cw->blen1;
58 
59     BINC_RETW(NULL, *tostr, *blen, len);
60 
61     *tolen = len;
62     for (i = 0; i < len; ++i)
63 	(*tostr)[i] = (u_char) str[i];
64 
65     *dst = cw->bp1.wc;
66 
67     return 0;
68 }
69 
70 #define CONV_BUFFER_SIZE    512
71 /* fill the buffer with codeset encoding of string pointed to by str
72  * left has the number of bytes left in str and is adjusted
73  * len contains the number of bytes put in the buffer
74  */
75 #ifdef USE_ICONV
76 #define CONVERT(str, left, src, len)				    	\
77     do {								\
78 	size_t outleft;							\
79 	char *bp = buffer;						\
80 	outleft = CONV_BUFFER_SIZE;					\
81 	errno = 0;							\
82 	if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) == -1 &&	\
83 		errno != E2BIG)						\
84 	    goto err;							\
85 	if ((len = CONV_BUFFER_SIZE - outleft) == 0) {			\
86 	    error = -left;						\
87 	    goto err;							\
88 	}				    				\
89 	src = buffer;							\
90     } while (0)
91 
92 #define IC_RESET()							\
93     do {								\
94 	if (id != (iconv_t)-1)						\
95 	    iconv(id, NULL, NULL, NULL, NULL);				\
96     } while(0)
97 #else
98 #define CONVERT(str, left, src, len)
99 #define IC_RESET()
100 #endif
101 
102 static int
103 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
104 		size_t *tolen, CHAR_T **dst, iconv_t id)
105 {
106     size_t i = 0, j;
107     CHAR_T **tostr = &cw->bp1.wc;
108     size_t  *blen = &cw->blen1;
109     mbstate_t mbs;
110     size_t   n;
111     ssize_t  nlen = len;
112     char *src = (char *)str;
113 #ifdef USE_ICONV
114     char	buffer[CONV_BUFFER_SIZE];
115 #endif
116     size_t	left = len;
117     int		error = 1;
118 
119     BZERO(&mbs, 1);
120     BINC_RETW(NULL, *tostr, *blen, nlen);
121 
122 #ifdef USE_ICONV
123     if (id != (iconv_t)-1)
124 	CONVERT(str, left, src, len);
125 #endif
126 
127     for (i = 0, j = 0; j < len; ) {
128 	n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
129 	/* NULL character converted */
130 	if (n == -2) error = -(len-j);
131 	if (n == -1 || n == -2) goto err;
132 	if (n == 0) n = 1;
133 	j += n;
134 	if (++i >= *blen) {
135 	    nlen += 256;
136 	    BINC_RETW(NULL, *tostr, *blen, nlen);
137 	}
138 	if (id != (iconv_t)-1 && j == len && left) {
139 	    CONVERT(str, left, src, len);
140 	    j = 0;
141 	}
142     }
143 
144     error = 0;
145 err:
146     *tolen = i;
147     *dst = cw->bp1.wc;
148     IC_RESET();
149 
150     return error;
151 }
152 
153 static int
154 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
155 	    size_t *tolen, CHAR_T **dst)
156 {
157     return default_char2int(sp, str, len, cw, tolen, dst,
158 	sp->conv.id[IC_FE_CHAR2INT]);
159 }
160 
161 static int
162 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
163 	    size_t *tolen, CHAR_T **dst)
164 {
165     return default_char2int(sp, str, len, cw, tolen, dst,
166 	sp->conv.id[IC_IE_CHAR2INT]);
167 }
168 
169 static int
170 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
171 	    size_t *tolen, CHAR_T **dst)
172 {
173     return default_char2int(sp, str, len, cw, tolen, dst,
174 	(iconv_t)-1);
175 }
176 
177 static int
178 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
179 	size_t *tolen, char **dst)
180 {
181     int i;
182     char **tostr = &cw->bp1.c;
183     size_t  *blen = &cw->blen1;
184 
185     BINC_RETC(NULL, *tostr, *blen, len);
186 
187     *tolen = len;
188     for (i = 0; i < len; ++i)
189 	(*tostr)[i] = str[i];
190 
191     *dst = cw->bp1.c;
192 
193     return 0;
194 }
195 
196 static int
197 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
198 		size_t *tolen, char **pdst, iconv_t id)
199 {
200     size_t i, j, offset = 0;
201     char **tostr = &cw->bp1.c;
202     size_t  *blen = &cw->blen1;
203     mbstate_t mbs;
204     size_t n;
205     ssize_t  nlen = len + MB_CUR_MAX;
206     char *dst;
207     size_t buflen;
208 #ifdef USE_ICONV
209     char	buffer[CONV_BUFFER_SIZE];
210 #endif
211     int		error = 1;
212 
213 /* convert first len bytes of buffer and append it to cw->bp
214  * len is adjusted => 0
215  * offset contains the offset in cw->bp and is adjusted
216  * cw->bp is grown as required
217  */
218 #ifdef USE_ICONV
219 #define CONVERT2(_buffer, lenp, cw, offset)				\
220     do {								\
221 	char *bp = _buffer;						\
222 	int ret;							\
223 	do {								\
224 	    size_t outleft = cw->blen1 - offset;			\
225 	    char *obp = cw->bp1.c + offset;				\
226 	    if (cw->blen1 < offset + MB_CUR_MAX) {		    	\
227 		nlen += 256;						\
228 		BINC_RETC(NULL, cw->bp1.c, cw->blen1, nlen);		\
229 	    }						    		\
230 	    errno = 0;						    	\
231 	    ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, &outleft);	\
232 	    if (ret == -1 && errno != E2BIG)				\
233 		goto err;						\
234 	    offset = cw->blen1 - outleft;			        \
235 	} while (ret != 0); 					        \
236     } while (0)
237 #else
238 #define CONVERT2(_buffer, lenp, cw, offset)
239 #endif
240 
241 
242     BZERO(&mbs, 1);
243     BINC_RETC(NULL, *tostr, *blen, nlen);
244     dst = *tostr; buflen = *blen;
245 
246 #ifdef USE_ICONV
247     if (id != (iconv_t)-1) {
248 	dst = buffer; buflen = CONV_BUFFER_SIZE;
249     }
250 #endif
251 
252     for (i = 0, j = 0; i < len; ++i) {
253 	n = wcrtomb(dst+j, str[i], &mbs);
254 	if (n == -1) goto err;
255 	j += n;
256 	if (buflen < j + MB_CUR_MAX) {
257 	    if (id != (iconv_t)-1) {
258 		CONVERT2(buffer, &j, cw, offset);
259 	    } else {
260 		nlen += 256;
261 		BINC_RETC(NULL, *tostr, *blen, nlen);
262 		dst = *tostr; buflen = *blen;
263 	    }
264 	}
265     }
266 
267     n = wcrtomb(dst+j, L'\0', &mbs);
268     j += n - 1;				/* don't count NUL at the end */
269     *tolen = j;
270 
271     if (id != (iconv_t)-1) {
272 	CONVERT2(buffer, &j, cw, offset);
273 	CONVERT2(NULL, NULL, cw, offset);  /* back to the initial state */
274 	*tolen = offset;
275     }
276 
277     error = 0;
278 err:
279     if (error)
280 	*tolen = j;
281     *pdst = cw->bp1.c;
282     IC_RESET();
283 
284     return error;
285 }
286 
287 static int
288 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
289 	    size_t *tolen, char **dst)
290 {
291     return default_int2char(sp, str, len, cw, tolen, dst,
292 	sp->conv.id[IC_FE_INT2CHAR]);
293 }
294 
295 static int
296 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
297 	    size_t *tolen, char **dst)
298 {
299     return default_int2char(sp, str, len, cw, tolen, dst,
300 	(iconv_t)-1);
301 }
302 
303 #endif
304 
305 /*
306  * conv_init --
307  *	Initialize the iconv environment.
308  *
309  * PUBLIC: void conv_init __P((SCR *, SCR *));
310  */
311 void
312 conv_init(SCR *orig, SCR *sp)
313 {
314     int i;
315 
316     if (orig == NULL)
317 	setlocale(LC_ALL, "");
318     if (orig != NULL)
319 	BCOPY(&orig->conv, &sp->conv, 1);
320 #ifdef USE_WIDECHAR
321     else {
322 	char *ctype = setlocale(LC_CTYPE, NULL);
323 
324 	/*
325 	 * XXX
326 	 * This hack fixes the libncursesw issue on FreeBSD.
327 	 */
328 	if (!strcmp(ctype, "ko_KR.CP949"))
329 	    setlocale(LC_CTYPE, "ko_KR.eucKR");
330 	else if (!strcmp(ctype, "zh_CN.GB2312"))
331 	    setlocale(LC_CTYPE, "zh_CN.eucCN");
332 	else if (!strcmp(ctype, "zh_CN.GBK"))
333 	    setlocale(LC_CTYPE, "zh_CN.GB18030");
334 
335 	/*
336 	 * Switch to 8bit mode if locale is C;
337 	 * LC_CTYPE should be reseted to C if unmatched.
338 	 */
339 	if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
340 	    sp->conv.sys2int = sp->conv.file2int = raw2int;
341 	    sp->conv.int2sys = sp->conv.int2file = int2raw;
342 	    sp->conv.input2int = raw2int;
343 	} else {
344 	    sp->conv.sys2int = cs_char2int;
345 	    sp->conv.int2sys = cs_int2char;
346 	    sp->conv.file2int = fe_char2int;
347 	    sp->conv.int2file = fe_int2char;
348 	    sp->conv.input2int = ie_char2int;
349 	}
350 #ifdef USE_ICONV
351 	o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
352 #endif
353     }
354 #endif
355 
356     /* iconv descriptors must be distinct to screens. */
357     for (i = 0; i <= IC_IE_TO_UTF16; ++i)
358 	sp->conv.id[i] = (iconv_t)-1;
359 #ifdef USE_ICONV
360     conv_enc(sp, O_INPUTENCODING, 0);
361 #endif
362 }
363 
364 /*
365  * conv_enc --
366  *	Convert file/input encoding.
367  *
368  * PUBLIC: int conv_enc __P((SCR *, int, char *));
369  */
370 int
371 conv_enc(SCR *sp, int option, char *enc)
372 {
373 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
374     iconv_t *c2w, *w2c;
375 
376     switch (option) {
377     case O_FILEENCODING:
378 	c2w = sp->conv.id + IC_FE_CHAR2INT;
379 	w2c = sp->conv.id + IC_FE_INT2CHAR;
380 	if (!enc) enc = O_STR(sp, O_FILEENCODING);
381 	if (*c2w != (iconv_t)-1)
382 	    iconv_close(*c2w);
383 	if (*w2c != (iconv_t)-1)
384 	    iconv_close(*w2c);
385 	if (strcasecmp(codeset(), enc)) {
386 	    if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1)
387 		goto err;
388 	    if ((*w2c = iconv_open(enc, codeset())) == (iconv_t)-1)
389 		goto err;
390 	} else *c2w = *w2c = (iconv_t)-1;
391 	break;
392     case O_INPUTENCODING:
393 	c2w = sp->conv.id + IC_IE_CHAR2INT;
394 	w2c = sp->conv.id + IC_IE_TO_UTF16;
395 	if (!enc) enc = O_STR(sp, O_INPUTENCODING);
396 	if (*c2w != (iconv_t)-1)
397 	    iconv_close(*c2w);
398 	if (*w2c != (iconv_t)-1)
399 	    iconv_close(*w2c);
400 	if (strcasecmp(codeset(), enc)) {
401 	    if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1)
402 		goto err;
403 	} else *c2w = (iconv_t)-1;
404 	/* UTF-16 can not be locale and can not be inputed. */
405 	if ((*w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
406 	    goto err;
407 	break;
408     }
409 
410     F_CLR(sp, SC_CONV_ERROR);
411     F_SET(sp, SC_SCR_REFORMAT);
412 
413     return 0;
414 err:
415 #endif
416     switch (option) {
417     case O_FILEENCODING:
418 	msgq(sp, M_ERR,
419 	    "321|File encoding conversion not supported");
420 	break;
421     case O_INPUTENCODING:
422 	msgq(sp, M_ERR,
423 	    "322|Input encoding conversion not supported");
424 	break;
425     }
426     return 1;
427 }
428 
429 /*
430  * conv_end --
431  *	Close the iconv descriptors, release the buffer.
432  *
433  * PUBLIC: void conv_end __P((SCR *));
434  */
435 void
436 conv_end(SCR *sp)
437 {
438 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
439     int i;
440     for (i = 0; i <= IC_IE_TO_UTF16; ++i)
441 	if (sp->conv.id[i] != (iconv_t)-1)
442 	    iconv_close(sp->conv.id[i]);
443 	if (sp->cw.bp1.c != NULL)
444 	    free(sp->cw.bp1.c);
445 #endif
446 }
447