1 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2003 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/endian.h> 33 #include <sys/queue.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stdbool.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_module.h" 46 #include "citrus_region.h" 47 #include "citrus_mmap.h" 48 #include "citrus_hash.h" 49 #include "citrus_iconv.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_mapper.h" 52 #include "citrus_csmapper.h" 53 #include "citrus_memstream.h" 54 #include "citrus_iconv_std.h" 55 #include "citrus_esdb.h" 56 57 /* ---------------------------------------------------------------------- */ 58 59 _CITRUS_ICONV_DECLS(iconv_std); 60 _CITRUS_ICONV_DEF_OPS(iconv_std); 61 62 63 /* ---------------------------------------------------------------------- */ 64 65 int 66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops) 67 { 68 69 memcpy(ops, &_citrus_iconv_std_iconv_ops, 70 sizeof(_citrus_iconv_std_iconv_ops)); 71 72 return (0); 73 } 74 75 /* ---------------------------------------------------------------------- */ 76 77 /* 78 * convenience routines for stdenc. 79 */ 80 static __inline void 81 save_encoding_state(struct _citrus_iconv_std_encoding *se) 82 { 83 84 if (se->se_ps) 85 memcpy(se->se_pssaved, se->se_ps, 86 _stdenc_get_state_size(se->se_handle)); 87 } 88 89 static __inline void 90 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 91 { 92 93 if (se->se_ps) 94 memcpy(se->se_ps, se->se_pssaved, 95 _stdenc_get_state_size(se->se_handle)); 96 } 97 98 static __inline void 99 init_encoding_state(struct _citrus_iconv_std_encoding *se) 100 { 101 102 if (se->se_ps) 103 _stdenc_init_state(se->se_handle, se->se_ps); 104 } 105 106 static __inline int 107 mbtocsx(struct _citrus_iconv_std_encoding *se, 108 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult, 109 struct iconv_hooks *hooks) 110 { 111 112 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 113 nresult, hooks)); 114 } 115 116 static __inline int 117 cstombx(struct _citrus_iconv_std_encoding *se, 118 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult, 119 struct iconv_hooks *hooks) 120 { 121 122 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 123 nresult, hooks)); 124 } 125 126 static __inline int 127 wctombx(struct _citrus_iconv_std_encoding *se, 128 char *s, size_t n, _wc_t wc, size_t *nresult, 129 struct iconv_hooks *hooks) 130 { 131 132 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult, 133 hooks)); 134 } 135 136 static __inline int 137 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n, 138 size_t *nresult) 139 { 140 141 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult)); 142 } 143 144 static __inline int 145 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) 146 { 147 struct _stdenc_state_desc ssd; 148 int ret; 149 150 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, 151 _STDENC_SDID_GENERIC, &ssd); 152 if (!ret) 153 *rstate = ssd.u.generic.state; 154 155 return (ret); 156 } 157 158 /* 159 * init encoding context 160 */ 161 static int 162 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 163 void *ps1, void *ps2) 164 { 165 int ret = -1; 166 167 se->se_handle = cs; 168 se->se_ps = ps1; 169 se->se_pssaved = ps2; 170 171 if (se->se_ps) 172 ret = _stdenc_init_state(cs, se->se_ps); 173 if (!ret && se->se_pssaved) 174 ret = _stdenc_init_state(cs, se->se_pssaved); 175 176 return (ret); 177 } 178 179 static int 180 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 181 unsigned long *rnorm) 182 { 183 struct _csmapper *cm; 184 int ret; 185 186 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 187 if (ret) 188 return (ret); 189 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 190 _csmapper_get_state_size(cm) != 0) { 191 _csmapper_close(cm); 192 return (EINVAL); 193 } 194 195 *rcm = cm; 196 197 return (0); 198 } 199 200 static void 201 close_dsts(struct _citrus_iconv_std_dst_list *dl) 202 { 203 struct _citrus_iconv_std_dst *sd; 204 205 while ((sd = TAILQ_FIRST(dl)) != NULL) { 206 TAILQ_REMOVE(dl, sd, sd_entry); 207 _csmapper_close(sd->sd_mapper); 208 free(sd); 209 } 210 } 211 212 static int 213 open_dsts(struct _citrus_iconv_std_dst_list *dl, 214 const struct _esdb_charset *ec, const struct _esdb *dbdst) 215 { 216 struct _citrus_iconv_std_dst *sd, *sdtmp; 217 unsigned long norm; 218 int i, ret; 219 220 sd = malloc(sizeof(*sd)); 221 if (sd == NULL) 222 return (errno); 223 224 for (i = 0; i < dbdst->db_num_charsets; i++) { 225 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 226 dbdst->db_charsets[i].ec_csname, &norm); 227 if (ret == 0) { 228 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 229 sd->sd_norm = norm; 230 /* insert this mapper by sorted order. */ 231 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 232 if (sdtmp->sd_norm > norm) { 233 TAILQ_INSERT_BEFORE(sdtmp, sd, 234 sd_entry); 235 sd = NULL; 236 break; 237 } 238 } 239 if (sd) 240 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 241 sd = malloc(sizeof(*sd)); 242 if (sd == NULL) { 243 ret = errno; 244 close_dsts(dl); 245 return (ret); 246 } 247 } else if (ret != ENOENT) { 248 close_dsts(dl); 249 free(sd); 250 return (ret); 251 } 252 } 253 free(sd); 254 return (0); 255 } 256 257 static void 258 close_srcs(struct _citrus_iconv_std_src_list *sl) 259 { 260 struct _citrus_iconv_std_src *ss; 261 262 while ((ss = TAILQ_FIRST(sl)) != NULL) { 263 TAILQ_REMOVE(sl, ss, ss_entry); 264 close_dsts(&ss->ss_dsts); 265 free(ss); 266 } 267 } 268 269 static int 270 open_srcs(struct _citrus_iconv_std_src_list *sl, 271 const struct _esdb *dbsrc, const struct _esdb *dbdst) 272 { 273 struct _citrus_iconv_std_src *ss; 274 int count = 0, i, ret; 275 276 ss = malloc(sizeof(*ss)); 277 if (ss == NULL) 278 return (errno); 279 280 TAILQ_INIT(&ss->ss_dsts); 281 282 for (i = 0; i < dbsrc->db_num_charsets; i++) { 283 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 284 if (ret) 285 goto err; 286 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 287 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 288 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 289 ss = malloc(sizeof(*ss)); 290 if (ss == NULL) { 291 ret = errno; 292 goto err; 293 } 294 count++; 295 TAILQ_INIT(&ss->ss_dsts); 296 } 297 } 298 free(ss); 299 300 return (count ? 0 : ENOENT); 301 302 err: 303 free(ss); 304 close_srcs(sl); 305 return (ret); 306 } 307 308 /* do convert a character */ 309 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 310 static int 311 /*ARGSUSED*/ 312 do_conv(const struct _citrus_iconv_std_shared *is, 313 _csid_t *csid, _index_t *idx) 314 { 315 struct _citrus_iconv_std_dst *sd; 316 struct _citrus_iconv_std_src *ss; 317 _index_t tmpidx; 318 int ret; 319 320 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 321 if (ss->ss_csid == *csid) { 322 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 323 ret = _csmapper_convert(sd->sd_mapper, 324 &tmpidx, *idx, NULL); 325 switch (ret) { 326 case _MAPPER_CONVERT_SUCCESS: 327 *csid = sd->sd_csid; 328 *idx = tmpidx; 329 return (0); 330 case _MAPPER_CONVERT_NONIDENTICAL: 331 break; 332 case _MAPPER_CONVERT_SRC_MORE: 333 /*FALLTHROUGH*/ 334 case _MAPPER_CONVERT_DST_MORE: 335 /*FALLTHROUGH*/ 336 case _MAPPER_CONVERT_ILSEQ: 337 return (EILSEQ); 338 case _MAPPER_CONVERT_FATAL: 339 return (EINVAL); 340 } 341 } 342 break; 343 } 344 } 345 346 return (E_NO_CORRESPONDING_CHAR); 347 } 348 /* ---------------------------------------------------------------------- */ 349 350 static int 351 /*ARGSUSED*/ 352 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 353 const char * __restrict src, const char * __restrict dst) 354 { 355 struct _citrus_esdb esdbdst, esdbsrc; 356 struct _citrus_iconv_std_shared *is; 357 int ret; 358 359 is = malloc(sizeof(*is)); 360 if (is == NULL) { 361 ret = errno; 362 goto err0; 363 } 364 ret = _citrus_esdb_open(&esdbsrc, src); 365 if (ret) 366 goto err1; 367 ret = _citrus_esdb_open(&esdbdst, dst); 368 if (ret) 369 goto err2; 370 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 371 esdbsrc.db_variable, esdbsrc.db_len_variable); 372 if (ret) 373 goto err3; 374 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 375 esdbdst.db_variable, esdbdst.db_len_variable); 376 if (ret) 377 goto err4; 378 is->is_use_invalid = esdbdst.db_use_invalid; 379 is->is_invalid = esdbdst.db_invalid; 380 381 TAILQ_INIT(&is->is_srcs); 382 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 383 if (ret) 384 goto err5; 385 386 _esdb_close(&esdbsrc); 387 _esdb_close(&esdbdst); 388 ci->ci_closure = is; 389 390 return (0); 391 392 err5: 393 _stdenc_close(is->is_dst_encoding); 394 err4: 395 _stdenc_close(is->is_src_encoding); 396 err3: 397 _esdb_close(&esdbdst); 398 err2: 399 _esdb_close(&esdbsrc); 400 err1: 401 free(is); 402 err0: 403 return (ret); 404 } 405 406 static void 407 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 408 { 409 struct _citrus_iconv_std_shared *is = ci->ci_closure; 410 411 if (is == NULL) 412 return; 413 414 _stdenc_close(is->is_src_encoding); 415 _stdenc_close(is->is_dst_encoding); 416 close_srcs(&is->is_srcs); 417 free(is); 418 } 419 420 static int 421 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 422 { 423 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 424 struct _citrus_iconv_std_context *sc; 425 char *ptr; 426 size_t sz, szpsdst, szpssrc; 427 428 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 429 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 430 431 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 432 sc = malloc(sz); 433 if (sc == NULL) 434 return (errno); 435 436 ptr = (char *)&sc[1]; 437 if (szpssrc > 0) 438 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 439 ptr, ptr+szpssrc); 440 else 441 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 442 NULL, NULL); 443 ptr += szpssrc*2; 444 if (szpsdst > 0) 445 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 446 ptr, ptr+szpsdst); 447 else 448 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 449 NULL, NULL); 450 451 cv->cv_closure = (void *)sc; 452 453 return (0); 454 } 455 456 static void 457 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 458 { 459 460 free(cv->cv_closure); 461 } 462 463 static int 464 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 465 char * __restrict * __restrict in, size_t * __restrict inbytes, 466 char * __restrict * __restrict out, size_t * __restrict outbytes, 467 uint32_t flags, size_t * __restrict invalids) 468 { 469 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 470 struct _citrus_iconv_std_context *sc = cv->cv_closure; 471 _csid_t csid; 472 _index_t idx; 473 char *tmpin; 474 size_t inval, in_mb_cur_min, szrin, szrout; 475 int ret, state = 0; 476 477 inval = 0; 478 if (in == NULL || *in == NULL) { 479 /* special cases */ 480 if (out != NULL && *out != NULL) { 481 /* init output state and store the shift sequence */ 482 save_encoding_state(&sc->sc_src_encoding); 483 save_encoding_state(&sc->sc_dst_encoding); 484 szrout = 0; 485 486 ret = put_state_resetx(&sc->sc_dst_encoding, 487 *out, *outbytes, &szrout); 488 if (ret) 489 goto err; 490 491 if (szrout == (size_t)-2) { 492 /* too small to store the character */ 493 ret = EINVAL; 494 goto err; 495 } 496 *out += szrout; 497 *outbytes -= szrout; 498 } else 499 /* otherwise, discard the shift sequence */ 500 init_encoding_state(&sc->sc_dst_encoding); 501 init_encoding_state(&sc->sc_src_encoding); 502 *invalids = 0; 503 return (0); 504 } 505 506 in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding); 507 508 /* normal case */ 509 for (;;) { 510 if (*inbytes == 0) { 511 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 512 if (state == _STDENC_SDGEN_INITIAL || 513 state == _STDENC_SDGEN_STABLE) 514 break; 515 } 516 517 /* save the encoding states for the error recovery */ 518 save_encoding_state(&sc->sc_src_encoding); 519 save_encoding_state(&sc->sc_dst_encoding); 520 521 /* mb -> csid/index */ 522 tmpin = *in; 523 szrin = szrout = 0; 524 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin, 525 *inbytes, &szrin, cv->cv_shared->ci_hooks); 526 if (ret != 0 && (ret != EILSEQ || 527 !cv->cv_shared->ci_discard_ilseq)) { 528 goto err; 529 } else if (ret == EILSEQ) { 530 /* 531 * If //IGNORE was specified, we'll just keep crunching 532 * through invalid characters. 533 */ 534 *in += in_mb_cur_min; 535 *inbytes -= in_mb_cur_min; 536 restore_encoding_state(&sc->sc_src_encoding); 537 restore_encoding_state(&sc->sc_dst_encoding); 538 continue; 539 } 540 541 if (szrin == (size_t)-2) { 542 /* incompleted character */ 543 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 544 if (ret) { 545 ret = EINVAL; 546 goto err; 547 } 548 switch (state) { 549 case _STDENC_SDGEN_INITIAL: 550 case _STDENC_SDGEN_STABLE: 551 /* fetch shift sequences only. */ 552 goto next; 553 } 554 ret = EINVAL; 555 goto err; 556 } 557 /* convert the character */ 558 ret = do_conv(is, &csid, &idx); 559 if (ret) { 560 if (ret == E_NO_CORRESPONDING_CHAR) { 561 /* 562 * GNU iconv returns EILSEQ when no 563 * corresponding character in the output. 564 * Some software depends on this behavior 565 * though this is against POSIX specification. 566 */ 567 if (cv->cv_shared->ci_ilseq_invalid != 0) { 568 ret = EILSEQ; 569 goto err; 570 } 571 inval++; 572 szrout = 0; 573 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) && 574 !cv->cv_shared->ci_discard_ilseq) && 575 is->is_use_invalid) { 576 ret = wctombx(&sc->sc_dst_encoding, 577 *out, *outbytes, is->is_invalid, 578 &szrout, cv->cv_shared->ci_hooks); 579 if (ret) 580 goto err; 581 } 582 goto next; 583 } else 584 goto err; 585 } 586 /* csid/index -> mb */ 587 ret = cstombx(&sc->sc_dst_encoding, 588 *out, *outbytes, csid, idx, &szrout, 589 cv->cv_shared->ci_hooks); 590 if (ret) 591 goto err; 592 next: 593 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 594 *in = tmpin; 595 *outbytes -= szrout; 596 *out += szrout; 597 } 598 *invalids = inval; 599 600 return (0); 601 602 err: 603 restore_encoding_state(&sc->sc_src_encoding); 604 restore_encoding_state(&sc->sc_dst_encoding); 605 *invalids = inval; 606 607 return (ret); 608 } 609