1 /*- 2 * Copyright (c) 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * Copyright (c) 2011, 2012 7 * Zhihao Yuan. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #ifndef lint 15 static const char sccsid[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $"; 16 #endif /* not lint */ 17 18 #include <sys/types.h> 19 #include <sys/queue.h> 20 #include <sys/time.h> 21 22 #include <bitstring.h> 23 #include <errno.h> 24 #include <limits.h> 25 #include <langinfo.h> 26 #include <locale.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <strings.h> 31 #include <unistd.h> 32 33 #include "common.h" 34 35 /* 36 * codeset -- 37 * Get the locale encoding. 38 * 39 * PUBLIC: char * codeset(void); 40 */ 41 char * 42 codeset(void) 43 { 44 static char *cs; 45 46 if (cs == NULL) 47 cs = nl_langinfo(CODESET); 48 49 return cs; 50 } 51 52 #ifdef USE_WIDECHAR 53 static int 54 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 55 CHAR_T **dst) 56 { 57 int i; 58 CHAR_T **tostr = &cw->bp1.wc; 59 size_t *blen = &cw->blen1; 60 61 BINC_RETW(NULL, *tostr, *blen, len); 62 63 *tolen = len; 64 for (i = 0; i < len; ++i) 65 (*tostr)[i] = (u_char) str[i]; 66 67 *dst = cw->bp1.wc; 68 69 return 0; 70 } 71 72 #define CONV_BUFFER_SIZE 512 73 /* fill the buffer with codeset encoding of string pointed to by str 74 * left has the number of bytes left in str and is adjusted 75 * len contains the number of bytes put in the buffer 76 */ 77 #ifdef USE_ICONV 78 #define CONVERT(str, left, src, len) \ 79 do { \ 80 size_t outleft; \ 81 char *bp = buffer; \ 82 outleft = CONV_BUFFER_SIZE; \ 83 errno = 0; \ 84 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \ 85 == -1 && errno != E2BIG) \ 86 goto err; \ 87 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 88 error = -left; \ 89 goto err; \ 90 } \ 91 src = buffer; \ 92 } while (0) 93 94 #define IC_RESET() \ 95 do { \ 96 if (id != (iconv_t)-1) \ 97 iconv(id, NULL, NULL, NULL, NULL); \ 98 } while(0) 99 #else 100 #define CONVERT(str, left, src, len) 101 #define IC_RESET() 102 #endif 103 104 static int 105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 106 size_t *tolen, CHAR_T **dst, iconv_t id) 107 { 108 size_t i = 0, j; 109 CHAR_T **tostr = &cw->bp1.wc; 110 size_t *blen = &cw->blen1; 111 mbstate_t mbs; 112 size_t n; 113 ssize_t nlen = len; 114 char *src = (char *)str; 115 #ifdef USE_ICONV 116 char buffer[CONV_BUFFER_SIZE]; 117 #endif 118 size_t left = len; 119 int error = 1; 120 121 BZERO(&mbs, 1); 122 BINC_RETW(NULL, *tostr, *blen, nlen); 123 124 #ifdef USE_ICONV 125 if (id != (iconv_t)-1) 126 CONVERT(str, left, src, len); 127 #endif 128 129 for (i = 0, j = 0; j < len; ) { 130 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 131 /* NULL character converted */ 132 if (n == -2) 133 error = -(len-j); 134 if (n == -1 || n == -2) 135 goto err; 136 if (n == 0) 137 n = 1; 138 j += n; 139 if (++i >= *blen) { 140 nlen += 256; 141 BINC_RETW(NULL, *tostr, *blen, nlen); 142 } 143 if (id != (iconv_t)-1 && j == len && left) { 144 CONVERT(str, left, src, len); 145 j = 0; 146 } 147 } 148 149 error = 0; 150 err: 151 *tolen = i; 152 *dst = cw->bp1.wc; 153 IC_RESET(); 154 155 return error; 156 } 157 158 static int 159 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 160 CHAR_T **dst) 161 { 162 return default_char2int(sp, str, len, cw, tolen, dst, 163 sp->conv.id[IC_FE_CHAR2INT]); 164 } 165 166 static int 167 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 168 CHAR_T **dst) 169 { 170 return default_char2int(sp, str, len, cw, tolen, dst, 171 sp->conv.id[IC_IE_CHAR2INT]); 172 } 173 174 static int 175 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 176 CHAR_T **dst) 177 { 178 return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1); 179 } 180 181 static int 182 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 183 char **dst) 184 { 185 int i; 186 char **tostr = &cw->bp1.c; 187 size_t *blen = &cw->blen1; 188 189 BINC_RETC(NULL, *tostr, *blen, len); 190 191 *tolen = len; 192 for (i = 0; i < len; ++i) 193 (*tostr)[i] = str[i]; 194 195 *dst = cw->bp1.c; 196 197 return 0; 198 } 199 200 static int 201 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 202 size_t *tolen, char **pdst, iconv_t id) 203 { 204 size_t i, j, offset = 0; 205 char **tostr = &cw->bp1.c; 206 size_t *blen = &cw->blen1; 207 mbstate_t mbs; 208 size_t n; 209 ssize_t nlen = len + MB_CUR_MAX; 210 char *dst; 211 size_t buflen; 212 #ifdef USE_ICONV 213 char buffer[CONV_BUFFER_SIZE]; 214 #endif 215 int error = 1; 216 217 /* convert first len bytes of buffer and append it to cw->bp 218 * len is adjusted => 0 219 * offset contains the offset in cw->bp and is adjusted 220 * cw->bp is grown as required 221 */ 222 #ifdef USE_ICONV 223 #define CONVERT2(_buffer, lenp, cw, offset) \ 224 do { \ 225 char *bp = _buffer; \ 226 int ret; \ 227 do { \ 228 size_t outleft = cw->blen1 - offset; \ 229 char *obp = cw->bp1.c + offset; \ 230 if (cw->blen1 < offset + MB_CUR_MAX) { \ 231 nlen += 256; \ 232 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \ 233 nlen); \ 234 } \ 235 errno = 0; \ 236 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \ 237 &outleft); \ 238 if (ret == -1 && errno != E2BIG) \ 239 goto err; \ 240 offset = cw->blen1 - outleft; \ 241 } while (ret != 0); \ 242 } while (0) 243 #else 244 #define CONVERT2(_buffer, lenp, cw, offset) 245 #endif 246 247 248 BZERO(&mbs, 1); 249 BINC_RETC(NULL, *tostr, *blen, nlen); 250 dst = *tostr; buflen = *blen; 251 252 #ifdef USE_ICONV 253 if (id != (iconv_t)-1) { 254 dst = buffer; buflen = CONV_BUFFER_SIZE; 255 } 256 #endif 257 258 for (i = 0, j = 0; i < len; ++i) { 259 n = wcrtomb(dst+j, str[i], &mbs); 260 if (n == -1) 261 goto err; 262 j += n; 263 if (buflen < j + MB_CUR_MAX) { 264 if (id != (iconv_t)-1) { 265 CONVERT2(buffer, &j, cw, offset); 266 } else { 267 nlen += 256; 268 BINC_RETC(NULL, *tostr, *blen, nlen); 269 dst = *tostr; buflen = *blen; 270 } 271 } 272 } 273 274 n = wcrtomb(dst+j, L'\0', &mbs); 275 j += n - 1; /* don't count NUL at the end */ 276 *tolen = j; 277 278 if (id != (iconv_t)-1) { 279 CONVERT2(buffer, &j, cw, offset); 280 /* back to the initial state */ 281 CONVERT2(NULL, NULL, cw, offset); 282 *tolen = offset; 283 } 284 285 error = 0; 286 err: 287 if (error) 288 *tolen = j; 289 *pdst = cw->bp1.c; 290 IC_RESET(); 291 292 return error; 293 } 294 295 static int 296 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 297 size_t *tolen, char **dst) 298 { 299 return default_int2char(sp, str, len, cw, tolen, dst, 300 sp->conv.id[IC_FE_INT2CHAR]); 301 } 302 303 static int 304 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 305 size_t *tolen, char **dst) 306 { 307 return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1); 308 } 309 310 #endif 311 312 /* 313 * conv_init -- 314 * Initialize the iconv environment. 315 * 316 * PUBLIC: void conv_init(SCR *, SCR *); 317 */ 318 void 319 conv_init(SCR *orig, SCR *sp) 320 { 321 int i; 322 323 if (orig == NULL) 324 setlocale(LC_ALL, ""); 325 if (orig != NULL) 326 BCOPY(&orig->conv, &sp->conv, 1); 327 #ifdef USE_WIDECHAR 328 else { 329 char *ctype = setlocale(LC_CTYPE, NULL); 330 331 /* 332 * XXX 333 * This hack fixes the libncursesw issue on FreeBSD. 334 */ 335 if (!strcmp(ctype, "ko_KR.CP949")) 336 setlocale(LC_CTYPE, "ko_KR.eucKR"); 337 else if (!strcmp(ctype, "zh_CN.GB2312")) 338 setlocale(LC_CTYPE, "zh_CN.eucCN"); 339 else if (!strcmp(ctype, "zh_CN.GBK")) 340 setlocale(LC_CTYPE, "zh_CN.GB18030"); 341 342 /* 343 * Switch to 8bit mode if locale is C; 344 * LC_CTYPE should be reseted to C if unmatched. 345 */ 346 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { 347 sp->conv.sys2int = sp->conv.file2int = raw2int; 348 sp->conv.int2sys = sp->conv.int2file = int2raw; 349 sp->conv.input2int = raw2int; 350 } else { 351 sp->conv.sys2int = cs_char2int; 352 sp->conv.int2sys = cs_int2char; 353 sp->conv.file2int = fe_char2int; 354 sp->conv.int2file = fe_int2char; 355 sp->conv.input2int = ie_char2int; 356 } 357 #ifdef USE_ICONV 358 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); 359 #endif 360 } 361 #endif 362 363 /* iconv descriptors must be distinct to screens. */ 364 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 365 sp->conv.id[i] = (iconv_t)-1; 366 #ifdef USE_ICONV 367 conv_enc(sp, O_INPUTENCODING, 0); 368 #endif 369 } 370 371 /* 372 * conv_enc -- 373 * Convert file/input encoding. 374 * 375 * PUBLIC: int conv_enc(SCR *, int, char *); 376 */ 377 int 378 conv_enc(SCR *sp, int option, char *enc) 379 { 380 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 381 iconv_t *c2w, *w2c; 382 iconv_t id_c2w, id_w2c; 383 384 switch (option) { 385 case O_FILEENCODING: 386 c2w = sp->conv.id + IC_FE_CHAR2INT; 387 w2c = sp->conv.id + IC_FE_INT2CHAR; 388 if (!enc) 389 enc = O_STR(sp, O_FILEENCODING); 390 391 if (strcasecmp(codeset(), enc)) { 392 if ((id_c2w = iconv_open(codeset(), enc)) == 393 (iconv_t)-1) 394 goto err; 395 if ((id_w2c = iconv_open(enc, codeset())) == 396 (iconv_t)-1) 397 goto err; 398 } else { 399 id_c2w = (iconv_t)-1; 400 id_w2c = (iconv_t)-1; 401 } 402 403 break; 404 405 case O_INPUTENCODING: 406 c2w = sp->conv.id + IC_IE_CHAR2INT; 407 w2c = sp->conv.id + IC_IE_TO_UTF16; 408 if (!enc) 409 enc = O_STR(sp, O_INPUTENCODING); 410 411 if (strcasecmp(codeset(), enc)) { 412 if ((id_c2w = iconv_open(codeset(), enc)) == 413 (iconv_t)-1) 414 goto err; 415 } else 416 id_c2w = (iconv_t)-1; 417 418 /* UTF-16 can not be locale and can not be inputed. */ 419 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) 420 goto err; 421 422 break; 423 424 default: 425 abort(); 426 } 427 428 if (*c2w != (iconv_t)-1) 429 iconv_close(*c2w); 430 if (*w2c != (iconv_t)-1) 431 iconv_close(*w2c); 432 433 *c2w = id_c2w; 434 *w2c = id_w2c; 435 436 F_CLR(sp, SC_CONV_ERROR); 437 F_SET(sp, SC_SCR_REFORMAT); 438 439 return 0; 440 err: 441 #endif 442 switch (option) { 443 case O_FILEENCODING: 444 msgq(sp, M_ERR, "321|File encoding conversion not supported"); 445 break; 446 case O_INPUTENCODING: 447 msgq(sp, M_ERR, "322|Input encoding conversion not supported"); 448 break; 449 } 450 return 1; 451 } 452 453 /* 454 * conv_end -- 455 * Close the iconv descriptors, release the buffer. 456 * 457 * PUBLIC: void conv_end(SCR *); 458 */ 459 void 460 conv_end(SCR *sp) 461 { 462 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 463 int i; 464 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 465 if (sp->conv.id[i] != (iconv_t)-1) 466 iconv_close(sp->conv.id[i]); 467 if (sp->cw.bp1.c != NULL) 468 free(sp->cw.bp1.c); 469 #endif 470 } 471