1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2003 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/endian.h> 34 #include <sys/queue.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stdbool.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_module.h" 47 #include "citrus_region.h" 48 #include "citrus_mmap.h" 49 #include "citrus_hash.h" 50 #include "citrus_iconv.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_mapper.h" 53 #include "citrus_csmapper.h" 54 #include "citrus_memstream.h" 55 #include "citrus_iconv_std.h" 56 #include "citrus_esdb.h" 57 58 /* ---------------------------------------------------------------------- */ 59 60 _CITRUS_ICONV_DECLS(iconv_std); 61 _CITRUS_ICONV_DEF_OPS(iconv_std); 62 63 64 /* ---------------------------------------------------------------------- */ 65 66 int 67 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops) 68 { 69 70 memcpy(ops, &_citrus_iconv_std_iconv_ops, 71 sizeof(_citrus_iconv_std_iconv_ops)); 72 73 return (0); 74 } 75 76 /* ---------------------------------------------------------------------- */ 77 78 /* 79 * convenience routines for stdenc. 80 */ 81 static __inline void 82 save_encoding_state(struct _citrus_iconv_std_encoding *se) 83 { 84 85 if (se->se_ps) 86 memcpy(se->se_pssaved, se->se_ps, 87 _stdenc_get_state_size(se->se_handle)); 88 } 89 90 static __inline void 91 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 92 { 93 94 if (se->se_ps) 95 memcpy(se->se_ps, se->se_pssaved, 96 _stdenc_get_state_size(se->se_handle)); 97 } 98 99 static __inline void 100 init_encoding_state(struct _citrus_iconv_std_encoding *se) 101 { 102 103 if (se->se_ps) 104 _stdenc_init_state(se->se_handle, se->se_ps); 105 } 106 107 static __inline int 108 mbtocsx(struct _citrus_iconv_std_encoding *se, 109 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult, 110 struct iconv_hooks *hooks) 111 { 112 113 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 114 nresult, hooks)); 115 } 116 117 static __inline int 118 cstombx(struct _citrus_iconv_std_encoding *se, 119 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult, 120 struct iconv_hooks *hooks) 121 { 122 123 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 124 nresult, hooks)); 125 } 126 127 static __inline int 128 wctombx(struct _citrus_iconv_std_encoding *se, 129 char *s, size_t n, _wc_t wc, size_t *nresult, 130 struct iconv_hooks *hooks) 131 { 132 133 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult, 134 hooks)); 135 } 136 137 static __inline int 138 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n, 139 size_t *nresult) 140 { 141 142 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult)); 143 } 144 145 static __inline int 146 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) 147 { 148 struct _stdenc_state_desc ssd; 149 int ret; 150 151 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, 152 _STDENC_SDID_GENERIC, &ssd); 153 if (!ret) 154 *rstate = ssd.u.generic.state; 155 156 return (ret); 157 } 158 159 /* 160 * init encoding context 161 */ 162 static int 163 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 164 void *ps1, void *ps2) 165 { 166 int ret = -1; 167 168 se->se_handle = cs; 169 se->se_ps = ps1; 170 se->se_pssaved = ps2; 171 172 if (se->se_ps) 173 ret = _stdenc_init_state(cs, se->se_ps); 174 if (!ret && se->se_pssaved) 175 ret = _stdenc_init_state(cs, se->se_pssaved); 176 177 return (ret); 178 } 179 180 static int 181 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 182 unsigned long *rnorm) 183 { 184 struct _csmapper *cm; 185 int ret; 186 187 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 188 if (ret) 189 return (ret); 190 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 191 _csmapper_get_state_size(cm) != 0) { 192 _csmapper_close(cm); 193 return (EINVAL); 194 } 195 196 *rcm = cm; 197 198 return (0); 199 } 200 201 static void 202 close_dsts(struct _citrus_iconv_std_dst_list *dl) 203 { 204 struct _citrus_iconv_std_dst *sd; 205 206 while ((sd = TAILQ_FIRST(dl)) != NULL) { 207 TAILQ_REMOVE(dl, sd, sd_entry); 208 _csmapper_close(sd->sd_mapper); 209 free(sd); 210 } 211 } 212 213 static int 214 open_dsts(struct _citrus_iconv_std_dst_list *dl, 215 const struct _esdb_charset *ec, const struct _esdb *dbdst) 216 { 217 struct _citrus_iconv_std_dst *sd, *sdtmp; 218 unsigned long norm; 219 int i, ret; 220 221 sd = malloc(sizeof(*sd)); 222 if (sd == NULL) 223 return (errno); 224 225 for (i = 0; i < dbdst->db_num_charsets; i++) { 226 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 227 dbdst->db_charsets[i].ec_csname, &norm); 228 if (ret == 0) { 229 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 230 sd->sd_norm = norm; 231 /* insert this mapper by sorted order. */ 232 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 233 if (sdtmp->sd_norm > norm) { 234 TAILQ_INSERT_BEFORE(sdtmp, sd, 235 sd_entry); 236 sd = NULL; 237 break; 238 } 239 } 240 if (sd) 241 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 242 sd = malloc(sizeof(*sd)); 243 if (sd == NULL) { 244 ret = errno; 245 close_dsts(dl); 246 return (ret); 247 } 248 } else if (ret != ENOENT) { 249 close_dsts(dl); 250 free(sd); 251 return (ret); 252 } 253 } 254 free(sd); 255 return (0); 256 } 257 258 static void 259 close_srcs(struct _citrus_iconv_std_src_list *sl) 260 { 261 struct _citrus_iconv_std_src *ss; 262 263 while ((ss = TAILQ_FIRST(sl)) != NULL) { 264 TAILQ_REMOVE(sl, ss, ss_entry); 265 close_dsts(&ss->ss_dsts); 266 free(ss); 267 } 268 } 269 270 static int 271 open_srcs(struct _citrus_iconv_std_src_list *sl, 272 const struct _esdb *dbsrc, const struct _esdb *dbdst) 273 { 274 struct _citrus_iconv_std_src *ss; 275 int count = 0, i, ret; 276 277 ss = malloc(sizeof(*ss)); 278 if (ss == NULL) 279 return (errno); 280 281 TAILQ_INIT(&ss->ss_dsts); 282 283 for (i = 0; i < dbsrc->db_num_charsets; i++) { 284 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 285 if (ret) 286 goto err; 287 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 288 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 289 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 290 ss = malloc(sizeof(*ss)); 291 if (ss == NULL) { 292 ret = errno; 293 goto err; 294 } 295 count++; 296 TAILQ_INIT(&ss->ss_dsts); 297 } 298 } 299 free(ss); 300 301 return (count ? 0 : ENOENT); 302 303 err: 304 free(ss); 305 close_srcs(sl); 306 return (ret); 307 } 308 309 /* do convert a character */ 310 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 311 static int 312 /*ARGSUSED*/ 313 do_conv(const struct _citrus_iconv_std_shared *is, 314 _csid_t *csid, _index_t *idx) 315 { 316 struct _citrus_iconv_std_dst *sd; 317 struct _citrus_iconv_std_src *ss; 318 _index_t tmpidx; 319 int ret; 320 321 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 322 if (ss->ss_csid == *csid) { 323 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 324 ret = _csmapper_convert(sd->sd_mapper, 325 &tmpidx, *idx, NULL); 326 switch (ret) { 327 case _MAPPER_CONVERT_SUCCESS: 328 *csid = sd->sd_csid; 329 *idx = tmpidx; 330 return (0); 331 case _MAPPER_CONVERT_NONIDENTICAL: 332 break; 333 case _MAPPER_CONVERT_SRC_MORE: 334 /*FALLTHROUGH*/ 335 case _MAPPER_CONVERT_DST_MORE: 336 /*FALLTHROUGH*/ 337 case _MAPPER_CONVERT_ILSEQ: 338 return (EILSEQ); 339 case _MAPPER_CONVERT_FATAL: 340 return (EINVAL); 341 } 342 } 343 break; 344 } 345 } 346 347 return (E_NO_CORRESPONDING_CHAR); 348 } 349 /* ---------------------------------------------------------------------- */ 350 351 static int 352 /*ARGSUSED*/ 353 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 354 const char * __restrict src, const char * __restrict dst) 355 { 356 struct _citrus_esdb esdbdst, esdbsrc; 357 struct _citrus_iconv_std_shared *is; 358 int ret; 359 360 is = malloc(sizeof(*is)); 361 if (is == NULL) { 362 ret = errno; 363 goto err0; 364 } 365 ret = _citrus_esdb_open(&esdbsrc, src); 366 if (ret) 367 goto err1; 368 ret = _citrus_esdb_open(&esdbdst, dst); 369 if (ret) 370 goto err2; 371 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 372 esdbsrc.db_variable, esdbsrc.db_len_variable); 373 if (ret) 374 goto err3; 375 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 376 esdbdst.db_variable, esdbdst.db_len_variable); 377 if (ret) 378 goto err4; 379 is->is_use_invalid = esdbdst.db_use_invalid; 380 is->is_invalid = esdbdst.db_invalid; 381 382 TAILQ_INIT(&is->is_srcs); 383 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 384 if (ret) 385 goto err5; 386 387 _esdb_close(&esdbsrc); 388 _esdb_close(&esdbdst); 389 ci->ci_closure = is; 390 391 return (0); 392 393 err5: 394 _stdenc_close(is->is_dst_encoding); 395 err4: 396 _stdenc_close(is->is_src_encoding); 397 err3: 398 _esdb_close(&esdbdst); 399 err2: 400 _esdb_close(&esdbsrc); 401 err1: 402 free(is); 403 err0: 404 return (ret); 405 } 406 407 static void 408 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 409 { 410 struct _citrus_iconv_std_shared *is = ci->ci_closure; 411 412 if (is == NULL) 413 return; 414 415 _stdenc_close(is->is_src_encoding); 416 _stdenc_close(is->is_dst_encoding); 417 close_srcs(&is->is_srcs); 418 free(is); 419 } 420 421 static int 422 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 423 { 424 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 425 struct _citrus_iconv_std_context *sc; 426 char *ptr; 427 size_t sz, szpsdst, szpssrc; 428 429 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 430 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 431 432 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 433 sc = malloc(sz); 434 if (sc == NULL) 435 return (errno); 436 437 ptr = (char *)&sc[1]; 438 if (szpssrc > 0) 439 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 440 ptr, ptr+szpssrc); 441 else 442 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 443 NULL, NULL); 444 ptr += szpssrc*2; 445 if (szpsdst > 0) 446 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 447 ptr, ptr+szpsdst); 448 else 449 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 450 NULL, NULL); 451 452 cv->cv_closure = (void *)sc; 453 454 return (0); 455 } 456 457 static void 458 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 459 { 460 461 free(cv->cv_closure); 462 } 463 464 static int 465 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 466 char * __restrict * __restrict in, size_t * __restrict inbytes, 467 char * __restrict * __restrict out, size_t * __restrict outbytes, 468 uint32_t flags, size_t * __restrict invalids) 469 { 470 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 471 struct _citrus_iconv_std_context *sc = cv->cv_closure; 472 _csid_t csid; 473 _index_t idx; 474 char *tmpin; 475 size_t inval, in_mb_cur_min, szrin, szrout; 476 int ret, state = 0; 477 478 inval = 0; 479 if (in == NULL || *in == NULL) { 480 /* special cases */ 481 if (out != NULL && *out != NULL) { 482 /* init output state and store the shift sequence */ 483 save_encoding_state(&sc->sc_src_encoding); 484 save_encoding_state(&sc->sc_dst_encoding); 485 szrout = 0; 486 487 ret = put_state_resetx(&sc->sc_dst_encoding, 488 *out, *outbytes, &szrout); 489 if (ret) 490 goto err; 491 492 if (szrout == (size_t)-2) { 493 /* too small to store the character */ 494 ret = EINVAL; 495 goto err; 496 } 497 *out += szrout; 498 *outbytes -= szrout; 499 } else 500 /* otherwise, discard the shift sequence */ 501 init_encoding_state(&sc->sc_dst_encoding); 502 init_encoding_state(&sc->sc_src_encoding); 503 *invalids = 0; 504 return (0); 505 } 506 507 in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding); 508 509 /* normal case */ 510 for (;;) { 511 if (*inbytes == 0) { 512 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 513 if (state == _STDENC_SDGEN_INITIAL || 514 state == _STDENC_SDGEN_STABLE) 515 break; 516 } 517 518 /* save the encoding states for the error recovery */ 519 save_encoding_state(&sc->sc_src_encoding); 520 save_encoding_state(&sc->sc_dst_encoding); 521 522 /* mb -> csid/index */ 523 tmpin = *in; 524 szrin = szrout = 0; 525 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin, 526 *inbytes, &szrin, cv->cv_shared->ci_hooks); 527 if (ret != 0 && (ret != EILSEQ || 528 !cv->cv_shared->ci_discard_ilseq)) { 529 goto err; 530 } else if (ret == EILSEQ) { 531 /* 532 * If //IGNORE was specified, we'll just keep crunching 533 * through invalid characters. 534 */ 535 *in += in_mb_cur_min; 536 *inbytes -= in_mb_cur_min; 537 restore_encoding_state(&sc->sc_src_encoding); 538 restore_encoding_state(&sc->sc_dst_encoding); 539 continue; 540 } 541 542 if (szrin == (size_t)-2) { 543 /* incompleted character */ 544 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 545 if (ret) { 546 ret = EINVAL; 547 goto err; 548 } 549 switch (state) { 550 case _STDENC_SDGEN_INITIAL: 551 case _STDENC_SDGEN_STABLE: 552 /* fetch shift sequences only. */ 553 goto next; 554 } 555 ret = EINVAL; 556 goto err; 557 } 558 /* convert the character */ 559 ret = do_conv(is, &csid, &idx); 560 if (ret) { 561 if (ret == E_NO_CORRESPONDING_CHAR) { 562 /* 563 * GNU iconv returns EILSEQ when no 564 * corresponding character in the output. 565 * Some software depends on this behavior 566 * though this is against POSIX specification. 567 */ 568 if (cv->cv_shared->ci_ilseq_invalid != 0) { 569 ret = EILSEQ; 570 goto err; 571 } 572 inval++; 573 szrout = 0; 574 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) && 575 !cv->cv_shared->ci_discard_ilseq) && 576 is->is_use_invalid) { 577 ret = wctombx(&sc->sc_dst_encoding, 578 *out, *outbytes, is->is_invalid, 579 &szrout, cv->cv_shared->ci_hooks); 580 if (ret) 581 goto err; 582 } 583 goto next; 584 } else 585 goto err; 586 } 587 /* csid/index -> mb */ 588 ret = cstombx(&sc->sc_dst_encoding, 589 *out, *outbytes, csid, idx, &szrout, 590 cv->cv_shared->ci_hooks); 591 if (ret) 592 goto err; 593 next: 594 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 595 *in = tmpin; 596 *outbytes -= szrout; 597 *out += szrout; 598 } 599 *invalids = inval; 600 601 return (0); 602 603 err: 604 restore_encoding_state(&sc->sc_src_encoding); 605 restore_encoding_state(&sc->sc_dst_encoding); 606 *invalids = inval; 607 608 return (ret); 609 } 610