1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */ 3 4 /*- 5 * Copyright (c)2003 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/endian.h> 32 #include <sys/queue.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdbool.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include "citrus_namespace.h" 43 #include "citrus_types.h" 44 #include "citrus_module.h" 45 #include "citrus_region.h" 46 #include "citrus_mmap.h" 47 #include "citrus_hash.h" 48 #include "citrus_iconv.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_mapper.h" 51 #include "citrus_csmapper.h" 52 #include "citrus_memstream.h" 53 #include "citrus_iconv_std.h" 54 #include "citrus_esdb.h" 55 56 /* ---------------------------------------------------------------------- */ 57 58 _CITRUS_ICONV_DECLS(iconv_std); 59 _CITRUS_ICONV_DEF_OPS(iconv_std); 60 61 62 /* ---------------------------------------------------------------------- */ 63 64 int 65 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops) 66 { 67 68 memcpy(ops, &_citrus_iconv_std_iconv_ops, 69 sizeof(_citrus_iconv_std_iconv_ops)); 70 71 return (0); 72 } 73 74 /* ---------------------------------------------------------------------- */ 75 76 /* 77 * convenience routines for stdenc. 78 */ 79 static __inline void 80 save_encoding_state(struct _citrus_iconv_std_encoding *se) 81 { 82 83 if (se->se_ps) 84 memcpy(se->se_pssaved, se->se_ps, 85 _stdenc_get_state_size(se->se_handle)); 86 } 87 88 static __inline void 89 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 90 { 91 92 if (se->se_ps) 93 memcpy(se->se_ps, se->se_pssaved, 94 _stdenc_get_state_size(se->se_handle)); 95 } 96 97 static __inline void 98 init_encoding_state(struct _citrus_iconv_std_encoding *se) 99 { 100 101 if (se->se_ps) 102 _stdenc_init_state(se->se_handle, se->se_ps); 103 } 104 105 static __inline int 106 mbtocsx(struct _citrus_iconv_std_encoding *se, 107 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult, 108 struct iconv_hooks *hooks) 109 { 110 111 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 112 nresult, hooks)); 113 } 114 115 static __inline int 116 cstombx(struct _citrus_iconv_std_encoding *se, 117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult, 118 struct iconv_hooks *hooks) 119 { 120 121 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 122 nresult, hooks)); 123 } 124 125 static __inline int 126 wctombx(struct _citrus_iconv_std_encoding *se, 127 char *s, size_t n, _wc_t wc, size_t *nresult, 128 struct iconv_hooks *hooks) 129 { 130 131 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult, 132 hooks)); 133 } 134 135 static __inline int 136 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n, 137 size_t *nresult) 138 { 139 140 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult)); 141 } 142 143 static __inline int 144 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) 145 { 146 struct _stdenc_state_desc ssd; 147 int ret; 148 149 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, 150 _STDENC_SDID_GENERIC, &ssd); 151 if (!ret) 152 *rstate = ssd.u.generic.state; 153 154 return (ret); 155 } 156 157 /* 158 * init encoding context 159 */ 160 static int 161 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 162 void *ps1, void *ps2) 163 { 164 int ret = -1; 165 166 se->se_handle = cs; 167 se->se_ps = ps1; 168 se->se_pssaved = ps2; 169 170 if (se->se_ps) 171 ret = _stdenc_init_state(cs, se->se_ps); 172 if (!ret && se->se_pssaved) 173 ret = _stdenc_init_state(cs, se->se_pssaved); 174 175 return (ret); 176 } 177 178 static int 179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 180 unsigned long *rnorm) 181 { 182 struct _csmapper *cm; 183 int ret; 184 185 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 186 if (ret) 187 return (ret); 188 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 189 _csmapper_get_state_size(cm) != 0) { 190 _csmapper_close(cm); 191 return (EINVAL); 192 } 193 194 *rcm = cm; 195 196 return (0); 197 } 198 199 static void 200 close_dsts(struct _citrus_iconv_std_dst_list *dl) 201 { 202 struct _citrus_iconv_std_dst *sd; 203 204 while ((sd = TAILQ_FIRST(dl)) != NULL) { 205 TAILQ_REMOVE(dl, sd, sd_entry); 206 _csmapper_close(sd->sd_mapper); 207 free(sd); 208 } 209 } 210 211 static int 212 open_dsts(struct _citrus_iconv_std_dst_list *dl, 213 const struct _esdb_charset *ec, const struct _esdb *dbdst) 214 { 215 struct _citrus_iconv_std_dst *sd, *sdtmp; 216 unsigned long norm; 217 int i, ret; 218 219 sd = malloc(sizeof(*sd)); 220 if (sd == NULL) 221 return (errno); 222 223 for (i = 0; i < dbdst->db_num_charsets; i++) { 224 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 225 dbdst->db_charsets[i].ec_csname, &norm); 226 if (ret == 0) { 227 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 228 sd->sd_norm = norm; 229 /* insert this mapper by sorted order. */ 230 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 231 if (sdtmp->sd_norm > norm) { 232 TAILQ_INSERT_BEFORE(sdtmp, sd, 233 sd_entry); 234 sd = NULL; 235 break; 236 } 237 } 238 if (sd) 239 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 240 sd = malloc(sizeof(*sd)); 241 if (sd == NULL) { 242 ret = errno; 243 close_dsts(dl); 244 return (ret); 245 } 246 } else if (ret != ENOENT) { 247 close_dsts(dl); 248 free(sd); 249 return (ret); 250 } 251 } 252 free(sd); 253 return (0); 254 } 255 256 static void 257 close_srcs(struct _citrus_iconv_std_src_list *sl) 258 { 259 struct _citrus_iconv_std_src *ss; 260 261 while ((ss = TAILQ_FIRST(sl)) != NULL) { 262 TAILQ_REMOVE(sl, ss, ss_entry); 263 close_dsts(&ss->ss_dsts); 264 free(ss); 265 } 266 } 267 268 static int 269 open_srcs(struct _citrus_iconv_std_src_list *sl, 270 const struct _esdb *dbsrc, const struct _esdb *dbdst) 271 { 272 struct _citrus_iconv_std_src *ss; 273 int count = 0, i, ret; 274 275 ss = malloc(sizeof(*ss)); 276 if (ss == NULL) 277 return (errno); 278 279 TAILQ_INIT(&ss->ss_dsts); 280 281 for (i = 0; i < dbsrc->db_num_charsets; i++) { 282 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 283 if (ret) 284 goto err; 285 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 286 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 287 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 288 ss = malloc(sizeof(*ss)); 289 if (ss == NULL) { 290 ret = errno; 291 goto err; 292 } 293 count++; 294 TAILQ_INIT(&ss->ss_dsts); 295 } 296 } 297 free(ss); 298 299 return (count ? 0 : ENOENT); 300 301 err: 302 free(ss); 303 close_srcs(sl); 304 return (ret); 305 } 306 307 /* do convert a character */ 308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 309 static int 310 /*ARGSUSED*/ 311 do_conv(const struct _citrus_iconv_std_shared *is, 312 _csid_t *csid, _index_t *idx) 313 { 314 struct _citrus_iconv_std_dst *sd; 315 struct _citrus_iconv_std_src *ss; 316 _index_t tmpidx; 317 int ret; 318 319 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 320 if (ss->ss_csid == *csid) { 321 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 322 ret = _csmapper_convert(sd->sd_mapper, 323 &tmpidx, *idx, NULL); 324 switch (ret) { 325 case _MAPPER_CONVERT_SUCCESS: 326 *csid = sd->sd_csid; 327 *idx = tmpidx; 328 return (0); 329 case _MAPPER_CONVERT_NONIDENTICAL: 330 break; 331 case _MAPPER_CONVERT_SRC_MORE: 332 /*FALLTHROUGH*/ 333 case _MAPPER_CONVERT_DST_MORE: 334 /*FALLTHROUGH*/ 335 case _MAPPER_CONVERT_ILSEQ: 336 return (EILSEQ); 337 case _MAPPER_CONVERT_FATAL: 338 return (EINVAL); 339 } 340 } 341 break; 342 } 343 } 344 345 return (E_NO_CORRESPONDING_CHAR); 346 } 347 /* ---------------------------------------------------------------------- */ 348 349 static int 350 /*ARGSUSED*/ 351 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 352 const char * __restrict src, const char * __restrict dst) 353 { 354 struct _citrus_esdb esdbdst, esdbsrc; 355 struct _citrus_iconv_std_shared *is; 356 int ret; 357 358 is = malloc(sizeof(*is)); 359 if (is == NULL) { 360 ret = errno; 361 goto err0; 362 } 363 ret = _citrus_esdb_open(&esdbsrc, src); 364 if (ret) 365 goto err1; 366 ret = _citrus_esdb_open(&esdbdst, dst); 367 if (ret) 368 goto err2; 369 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 370 esdbsrc.db_variable, esdbsrc.db_len_variable); 371 if (ret) 372 goto err3; 373 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 374 esdbdst.db_variable, esdbdst.db_len_variable); 375 if (ret) 376 goto err4; 377 is->is_use_invalid = esdbdst.db_use_invalid; 378 is->is_invalid = esdbdst.db_invalid; 379 380 TAILQ_INIT(&is->is_srcs); 381 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 382 if (ret) 383 goto err5; 384 385 _esdb_close(&esdbsrc); 386 _esdb_close(&esdbdst); 387 ci->ci_closure = is; 388 389 return (0); 390 391 err5: 392 _stdenc_close(is->is_dst_encoding); 393 err4: 394 _stdenc_close(is->is_src_encoding); 395 err3: 396 _esdb_close(&esdbdst); 397 err2: 398 _esdb_close(&esdbsrc); 399 err1: 400 free(is); 401 err0: 402 return (ret); 403 } 404 405 static void 406 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 407 { 408 struct _citrus_iconv_std_shared *is = ci->ci_closure; 409 410 if (is == NULL) 411 return; 412 413 _stdenc_close(is->is_src_encoding); 414 _stdenc_close(is->is_dst_encoding); 415 close_srcs(&is->is_srcs); 416 free(is); 417 } 418 419 static int 420 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 421 { 422 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 423 struct _citrus_iconv_std_context *sc; 424 char *ptr; 425 size_t sz, szpsdst, szpssrc; 426 427 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 428 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 429 430 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 431 sc = malloc(sz); 432 if (sc == NULL) 433 return (errno); 434 435 ptr = (char *)&sc[1]; 436 if (szpssrc > 0) 437 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 438 ptr, ptr+szpssrc); 439 else 440 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 441 NULL, NULL); 442 ptr += szpssrc*2; 443 if (szpsdst > 0) 444 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 445 ptr, ptr+szpsdst); 446 else 447 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 448 NULL, NULL); 449 450 cv->cv_closure = (void *)sc; 451 452 return (0); 453 } 454 455 static void 456 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 457 { 458 459 free(cv->cv_closure); 460 } 461 462 static int 463 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 464 char * __restrict * __restrict in, size_t * __restrict inbytes, 465 char * __restrict * __restrict out, size_t * __restrict outbytes, 466 uint32_t flags, size_t * __restrict invalids) 467 { 468 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 469 struct _citrus_iconv_std_context *sc = cv->cv_closure; 470 _csid_t csid; 471 _index_t idx; 472 char *tmpin; 473 size_t inval, szrin, szrout; 474 int ret, state = 0; 475 476 inval = 0; 477 if (in == NULL || *in == NULL) { 478 /* special cases */ 479 if (out != NULL && *out != NULL) { 480 /* init output state and store the shift sequence */ 481 save_encoding_state(&sc->sc_src_encoding); 482 save_encoding_state(&sc->sc_dst_encoding); 483 szrout = 0; 484 485 ret = put_state_resetx(&sc->sc_dst_encoding, 486 *out, *outbytes, &szrout); 487 if (ret) 488 goto err; 489 490 if (szrout == (size_t)-2) { 491 /* too small to store the character */ 492 ret = EINVAL; 493 goto err; 494 } 495 *out += szrout; 496 *outbytes -= szrout; 497 } else 498 /* otherwise, discard the shift sequence */ 499 init_encoding_state(&sc->sc_dst_encoding); 500 init_encoding_state(&sc->sc_src_encoding); 501 *invalids = 0; 502 return (0); 503 } 504 505 /* normal case */ 506 for (;;) { 507 if (*inbytes == 0) { 508 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 509 if (state == _STDENC_SDGEN_INITIAL || 510 state == _STDENC_SDGEN_STABLE) 511 break; 512 } 513 514 /* save the encoding states for the error recovery */ 515 save_encoding_state(&sc->sc_src_encoding); 516 save_encoding_state(&sc->sc_dst_encoding); 517 518 /* mb -> csid/index */ 519 tmpin = *in; 520 szrin = szrout = 0; 521 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin, 522 *inbytes, &szrin, cv->cv_shared->ci_hooks); 523 if (ret) 524 goto err; 525 526 if (szrin == (size_t)-2) { 527 /* incompleted character */ 528 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 529 if (ret) { 530 ret = EINVAL; 531 goto err; 532 } 533 switch (state) { 534 case _STDENC_SDGEN_INITIAL: 535 case _STDENC_SDGEN_STABLE: 536 /* fetch shift sequences only. */ 537 goto next; 538 } 539 ret = EINVAL; 540 goto err; 541 } 542 /* convert the character */ 543 ret = do_conv(is, &csid, &idx); 544 if (ret) { 545 if (ret == E_NO_CORRESPONDING_CHAR) { 546 /* 547 * GNU iconv returns EILSEQ when no 548 * corresponding character in the output. 549 * Some software depends on this behavior 550 * though this is against POSIX specification. 551 */ 552 if (cv->cv_shared->ci_ilseq_invalid != 0) { 553 ret = EILSEQ; 554 goto err; 555 } 556 inval++; 557 szrout = 0; 558 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) && 559 !cv->cv_shared->ci_discard_ilseq) && 560 is->is_use_invalid) { 561 ret = wctombx(&sc->sc_dst_encoding, 562 *out, *outbytes, is->is_invalid, 563 &szrout, cv->cv_shared->ci_hooks); 564 if (ret) 565 goto err; 566 } 567 goto next; 568 } else 569 goto err; 570 } 571 /* csid/index -> mb */ 572 ret = cstombx(&sc->sc_dst_encoding, 573 *out, *outbytes, csid, idx, &szrout, 574 cv->cv_shared->ci_hooks); 575 if (ret) 576 goto err; 577 next: 578 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 579 *in = tmpin; 580 *outbytes -= szrout; 581 *out += szrout; 582 } 583 *invalids = inval; 584 585 return (0); 586 587 err: 588 restore_encoding_state(&sc->sc_src_encoding); 589 restore_encoding_state(&sc->sc_dst_encoding); 590 *invalids = inval; 591 592 return (ret); 593 } 594