1 /*- 2 * Copyright (c) 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * Copyright (c) 2011, 2012 7 * Zhihao Yuan. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #ifndef lint 15 static const char sccsid[] = "$Id: conv.c,v 2.39 2013/07/01 23:28:13 zy Exp $"; 16 #endif /* not lint */ 17 18 #include <sys/types.h> 19 #include <sys/queue.h> 20 #include <sys/time.h> 21 22 #include <bitstring.h> 23 #include <errno.h> 24 #include <limits.h> 25 #include <langinfo.h> 26 #include <locale.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <strings.h> 31 #include <unistd.h> 32 33 #include "common.h" 34 35 /* 36 * codeset -- 37 * Get the locale encoding. 38 * 39 * PUBLIC: char * codeset __P((void)); 40 */ 41 char * 42 codeset(void) { 43 static char *cs; 44 45 if (cs == NULL) 46 cs = nl_langinfo(CODESET); 47 return cs; 48 } 49 50 #ifdef USE_WIDECHAR 51 static int 52 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 53 size_t *tolen, CHAR_T **dst) 54 { 55 int i; 56 CHAR_T **tostr = &cw->bp1.wc; 57 size_t *blen = &cw->blen1; 58 59 BINC_RETW(NULL, *tostr, *blen, len); 60 61 *tolen = len; 62 for (i = 0; i < len; ++i) 63 (*tostr)[i] = (u_char) str[i]; 64 65 *dst = cw->bp1.wc; 66 67 return 0; 68 } 69 70 #define CONV_BUFFER_SIZE 512 71 /* fill the buffer with codeset encoding of string pointed to by str 72 * left has the number of bytes left in str and is adjusted 73 * len contains the number of bytes put in the buffer 74 */ 75 #ifdef USE_ICONV 76 #define CONVERT(str, left, src, len) \ 77 do { \ 78 size_t outleft; \ 79 char *bp = buffer; \ 80 outleft = CONV_BUFFER_SIZE; \ 81 errno = 0; \ 82 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) == -1 && \ 83 errno != E2BIG) \ 84 goto err; \ 85 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 86 error = -left; \ 87 goto err; \ 88 } \ 89 src = buffer; \ 90 } while (0) 91 92 #define IC_RESET() \ 93 do { \ 94 if (id != (iconv_t)-1) \ 95 iconv(id, NULL, NULL, NULL, NULL); \ 96 } while(0) 97 #else 98 #define CONVERT(str, left, src, len) 99 #define IC_RESET() 100 #endif 101 102 static int 103 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 104 size_t *tolen, CHAR_T **dst, iconv_t id) 105 { 106 size_t i = 0, j; 107 CHAR_T **tostr = &cw->bp1.wc; 108 size_t *blen = &cw->blen1; 109 mbstate_t mbs; 110 size_t n; 111 ssize_t nlen = len; 112 char *src = (char *)str; 113 #ifdef USE_ICONV 114 char buffer[CONV_BUFFER_SIZE]; 115 #endif 116 size_t left = len; 117 int error = 1; 118 119 BZERO(&mbs, 1); 120 BINC_RETW(NULL, *tostr, *blen, nlen); 121 122 #ifdef USE_ICONV 123 if (id != (iconv_t)-1) 124 CONVERT(str, left, src, len); 125 #endif 126 127 for (i = 0, j = 0; j < len; ) { 128 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 129 /* NULL character converted */ 130 if (n == -2) error = -(len-j); 131 if (n == -1 || n == -2) goto err; 132 if (n == 0) n = 1; 133 j += n; 134 if (++i >= *blen) { 135 nlen += 256; 136 BINC_RETW(NULL, *tostr, *blen, nlen); 137 } 138 if (id != (iconv_t)-1 && j == len && left) { 139 CONVERT(str, left, src, len); 140 j = 0; 141 } 142 } 143 144 error = 0; 145 err: 146 *tolen = i; 147 *dst = cw->bp1.wc; 148 IC_RESET(); 149 150 return error; 151 } 152 153 static int 154 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 155 size_t *tolen, CHAR_T **dst) 156 { 157 return default_char2int(sp, str, len, cw, tolen, dst, 158 sp->conv.id[IC_FE_CHAR2INT]); 159 } 160 161 static int 162 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 163 size_t *tolen, CHAR_T **dst) 164 { 165 return default_char2int(sp, str, len, cw, tolen, dst, 166 sp->conv.id[IC_IE_CHAR2INT]); 167 } 168 169 static int 170 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 171 size_t *tolen, CHAR_T **dst) 172 { 173 return default_char2int(sp, str, len, cw, tolen, dst, 174 (iconv_t)-1); 175 } 176 177 static int 178 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 179 size_t *tolen, char **dst) 180 { 181 int i; 182 char **tostr = &cw->bp1.c; 183 size_t *blen = &cw->blen1; 184 185 BINC_RETC(NULL, *tostr, *blen, len); 186 187 *tolen = len; 188 for (i = 0; i < len; ++i) 189 (*tostr)[i] = str[i]; 190 191 *dst = cw->bp1.c; 192 193 return 0; 194 } 195 196 static int 197 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 198 size_t *tolen, char **pdst, iconv_t id) 199 { 200 size_t i, j, offset = 0; 201 char **tostr = &cw->bp1.c; 202 size_t *blen = &cw->blen1; 203 mbstate_t mbs; 204 size_t n; 205 ssize_t nlen = len + MB_CUR_MAX; 206 char *dst; 207 size_t buflen; 208 #ifdef USE_ICONV 209 char buffer[CONV_BUFFER_SIZE]; 210 #endif 211 int error = 1; 212 213 /* convert first len bytes of buffer and append it to cw->bp 214 * len is adjusted => 0 215 * offset contains the offset in cw->bp and is adjusted 216 * cw->bp is grown as required 217 */ 218 #ifdef USE_ICONV 219 #define CONVERT2(_buffer, lenp, cw, offset) \ 220 do { \ 221 char *bp = _buffer; \ 222 int ret; \ 223 do { \ 224 size_t outleft = cw->blen1 - offset; \ 225 char *obp = cw->bp1.c + offset; \ 226 if (cw->blen1 < offset + MB_CUR_MAX) { \ 227 nlen += 256; \ 228 BINC_RETC(NULL, cw->bp1.c, cw->blen1, nlen); \ 229 } \ 230 errno = 0; \ 231 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, &outleft); \ 232 if (ret == -1 && errno != E2BIG) \ 233 goto err; \ 234 offset = cw->blen1 - outleft; \ 235 } while (ret != 0); \ 236 } while (0) 237 #else 238 #define CONVERT2(_buffer, lenp, cw, offset) 239 #endif 240 241 242 BZERO(&mbs, 1); 243 BINC_RETC(NULL, *tostr, *blen, nlen); 244 dst = *tostr; buflen = *blen; 245 246 #ifdef USE_ICONV 247 if (id != (iconv_t)-1) { 248 dst = buffer; buflen = CONV_BUFFER_SIZE; 249 } 250 #endif 251 252 for (i = 0, j = 0; i < len; ++i) { 253 n = wcrtomb(dst+j, str[i], &mbs); 254 if (n == -1) goto err; 255 j += n; 256 if (buflen < j + MB_CUR_MAX) { 257 if (id != (iconv_t)-1) { 258 CONVERT2(buffer, &j, cw, offset); 259 } else { 260 nlen += 256; 261 BINC_RETC(NULL, *tostr, *blen, nlen); 262 dst = *tostr; buflen = *blen; 263 } 264 } 265 } 266 267 n = wcrtomb(dst+j, L'\0', &mbs); 268 j += n - 1; /* don't count NUL at the end */ 269 *tolen = j; 270 271 if (id != (iconv_t)-1) { 272 CONVERT2(buffer, &j, cw, offset); 273 CONVERT2(NULL, NULL, cw, offset); /* back to the initial state */ 274 *tolen = offset; 275 } 276 277 error = 0; 278 err: 279 if (error) 280 *tolen = j; 281 *pdst = cw->bp1.c; 282 IC_RESET(); 283 284 return error; 285 } 286 287 static int 288 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 289 size_t *tolen, char **dst) 290 { 291 return default_int2char(sp, str, len, cw, tolen, dst, 292 sp->conv.id[IC_FE_INT2CHAR]); 293 } 294 295 static int 296 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 297 size_t *tolen, char **dst) 298 { 299 return default_int2char(sp, str, len, cw, tolen, dst, 300 (iconv_t)-1); 301 } 302 303 #endif 304 305 /* 306 * conv_init -- 307 * Initialize the iconv environment. 308 * 309 * PUBLIC: void conv_init __P((SCR *, SCR *)); 310 */ 311 void 312 conv_init(SCR *orig, SCR *sp) 313 { 314 int i; 315 316 if (orig == NULL) 317 setlocale(LC_ALL, ""); 318 if (orig != NULL) 319 BCOPY(&orig->conv, &sp->conv, 1); 320 #ifdef USE_WIDECHAR 321 else { 322 char *ctype = setlocale(LC_CTYPE, NULL); 323 324 /* 325 * XXX 326 * This hack fixes the libncursesw issue on FreeBSD. 327 */ 328 if (!strcmp(ctype, "ko_KR.CP949")) 329 setlocale(LC_CTYPE, "ko_KR.eucKR"); 330 else if (!strcmp(ctype, "zh_CN.GB2312")) 331 setlocale(LC_CTYPE, "zh_CN.eucCN"); 332 else if (!strcmp(ctype, "zh_CN.GBK")) 333 setlocale(LC_CTYPE, "zh_CN.GB18030"); 334 335 /* 336 * Switch to 8bit mode if locale is C; 337 * LC_CTYPE should be reseted to C if unmatched. 338 */ 339 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { 340 sp->conv.sys2int = sp->conv.file2int = raw2int; 341 sp->conv.int2sys = sp->conv.int2file = int2raw; 342 sp->conv.input2int = raw2int; 343 } else { 344 sp->conv.sys2int = cs_char2int; 345 sp->conv.int2sys = cs_int2char; 346 sp->conv.file2int = fe_char2int; 347 sp->conv.int2file = fe_int2char; 348 sp->conv.input2int = ie_char2int; 349 } 350 #ifdef USE_ICONV 351 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); 352 #endif 353 } 354 #endif 355 356 /* iconv descriptors must be distinct to screens. */ 357 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 358 sp->conv.id[i] = (iconv_t)-1; 359 #ifdef USE_ICONV 360 conv_enc(sp, O_INPUTENCODING, 0); 361 #endif 362 } 363 364 /* 365 * conv_enc -- 366 * Convert file/input encoding. 367 * 368 * PUBLIC: int conv_enc __P((SCR *, int, char *)); 369 */ 370 int 371 conv_enc(SCR *sp, int option, char *enc) 372 { 373 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 374 iconv_t *c2w, *w2c; 375 376 switch (option) { 377 case O_FILEENCODING: 378 c2w = sp->conv.id + IC_FE_CHAR2INT; 379 w2c = sp->conv.id + IC_FE_INT2CHAR; 380 if (!enc) enc = O_STR(sp, O_FILEENCODING); 381 if (*c2w != (iconv_t)-1) 382 iconv_close(*c2w); 383 if (*w2c != (iconv_t)-1) 384 iconv_close(*w2c); 385 if (strcasecmp(codeset(), enc)) { 386 if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) 387 goto err; 388 if ((*w2c = iconv_open(enc, codeset())) == (iconv_t)-1) 389 goto err; 390 } else *c2w = *w2c = (iconv_t)-1; 391 break; 392 case O_INPUTENCODING: 393 c2w = sp->conv.id + IC_IE_CHAR2INT; 394 w2c = sp->conv.id + IC_IE_TO_UTF16; 395 if (!enc) enc = O_STR(sp, O_INPUTENCODING); 396 if (*c2w != (iconv_t)-1) 397 iconv_close(*c2w); 398 if (*w2c != (iconv_t)-1) 399 iconv_close(*w2c); 400 if (strcasecmp(codeset(), enc)) { 401 if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) 402 goto err; 403 } else *c2w = (iconv_t)-1; 404 /* UTF-16 can not be locale and can not be inputed. */ 405 if ((*w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) 406 goto err; 407 break; 408 } 409 410 F_CLR(sp, SC_CONV_ERROR); 411 F_SET(sp, SC_SCR_REFORMAT); 412 413 return 0; 414 err: 415 #endif 416 switch (option) { 417 case O_FILEENCODING: 418 msgq(sp, M_ERR, 419 "321|File encoding conversion not supported"); 420 break; 421 case O_INPUTENCODING: 422 msgq(sp, M_ERR, 423 "322|Input encoding conversion not supported"); 424 break; 425 } 426 return 1; 427 } 428 429 /* 430 * conv_end -- 431 * Close the iconv descriptors, release the buffer. 432 * 433 * PUBLIC: void conv_end __P((SCR *)); 434 */ 435 void 436 conv_end(SCR *sp) 437 { 438 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 439 int i; 440 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 441 if (sp->conv.id[i] != (iconv_t)-1) 442 iconv_close(sp->conv.id[i]); 443 if (sp->cw.bp1.c != NULL) 444 free(sp->cw.bp1.c); 445 #endif 446 } 447