1 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2003 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #include <sys/endian.h>
33 #include <sys/queue.h>
34
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stdbool.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56
57 /* ---------------------------------------------------------------------- */
58
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63 /* ---------------------------------------------------------------------- */
64
65 int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops * ops)66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
67 {
68
69 memcpy(ops, &_citrus_iconv_std_iconv_ops,
70 sizeof(_citrus_iconv_std_iconv_ops));
71
72 return (0);
73 }
74
75 /* ---------------------------------------------------------------------- */
76
77 /*
78 * convenience routines for stdenc.
79 */
80 static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding * se)81 save_encoding_state(struct _citrus_iconv_std_encoding *se)
82 {
83
84 if (se->se_ps)
85 memcpy(se->se_pssaved, se->se_ps,
86 _stdenc_get_state_size(se->se_handle));
87 }
88
89 static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding * se)90 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
91 {
92
93 if (se->se_ps)
94 memcpy(se->se_ps, se->se_pssaved,
95 _stdenc_get_state_size(se->se_handle));
96 }
97
98 static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding * se)99 init_encoding_state(struct _citrus_iconv_std_encoding *se)
100 {
101
102 if (se->se_ps)
103 _stdenc_init_state(se->se_handle, se->se_ps);
104 }
105
106 static __inline int
mbtocsx(struct _citrus_iconv_std_encoding * se,_csid_t * csid,_index_t * idx,char ** s,size_t n,size_t * nresult,struct iconv_hooks * hooks)107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
109 struct iconv_hooks *hooks)
110 {
111
112 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
113 nresult, hooks));
114 }
115
116 static __inline int
cstombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_csid_t csid,_index_t idx,size_t * nresult,struct iconv_hooks * hooks)117 cstombx(struct _citrus_iconv_std_encoding *se,
118 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
119 struct iconv_hooks *hooks)
120 {
121
122 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
123 nresult, hooks));
124 }
125
126 static __inline int
wctombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_wc_t wc,size_t * nresult,struct iconv_hooks * hooks)127 wctombx(struct _citrus_iconv_std_encoding *se,
128 char *s, size_t n, _wc_t wc, size_t *nresult,
129 struct iconv_hooks *hooks)
130 {
131
132 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
133 hooks));
134 }
135
136 static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,size_t * nresult)137 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
138 size_t *nresult)
139 {
140
141 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
142 }
143
144 static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding * se,int * rstate)145 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
146 {
147 struct _stdenc_state_desc ssd;
148 int ret;
149
150 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
151 _STDENC_SDID_GENERIC, &ssd);
152 if (!ret)
153 *rstate = ssd.u.generic.state;
154
155 return (ret);
156 }
157
158 /*
159 * init encoding context
160 */
161 static int
init_encoding(struct _citrus_iconv_std_encoding * se,struct _stdenc * cs,void * ps1,void * ps2)162 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
163 void *ps1, void *ps2)
164 {
165 int ret = -1;
166
167 se->se_handle = cs;
168 se->se_ps = ps1;
169 se->se_pssaved = ps2;
170
171 if (se->se_ps)
172 ret = _stdenc_init_state(cs, se->se_ps);
173 if (!ret && se->se_pssaved)
174 ret = _stdenc_init_state(cs, se->se_pssaved);
175
176 return (ret);
177 }
178
179 static int
open_csmapper(struct _csmapper ** rcm,const char * src,const char * dst,unsigned long * rnorm)180 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
181 unsigned long *rnorm)
182 {
183 struct _csmapper *cm;
184 int ret;
185
186 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
187 if (ret)
188 return (ret);
189 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
190 _csmapper_get_state_size(cm) != 0) {
191 _csmapper_close(cm);
192 return (EINVAL);
193 }
194
195 *rcm = cm;
196
197 return (0);
198 }
199
200 static void
close_dsts(struct _citrus_iconv_std_dst_list * dl)201 close_dsts(struct _citrus_iconv_std_dst_list *dl)
202 {
203 struct _citrus_iconv_std_dst *sd;
204
205 while ((sd = TAILQ_FIRST(dl)) != NULL) {
206 TAILQ_REMOVE(dl, sd, sd_entry);
207 _csmapper_close(sd->sd_mapper);
208 free(sd);
209 }
210 }
211
212 static int
open_dsts(struct _citrus_iconv_std_dst_list * dl,const struct _esdb_charset * ec,const struct _esdb * dbdst)213 open_dsts(struct _citrus_iconv_std_dst_list *dl,
214 const struct _esdb_charset *ec, const struct _esdb *dbdst)
215 {
216 struct _citrus_iconv_std_dst *sd, *sdtmp;
217 unsigned long norm;
218 int i, ret;
219
220 sd = malloc(sizeof(*sd));
221 if (sd == NULL)
222 return (errno);
223
224 for (i = 0; i < dbdst->db_num_charsets; i++) {
225 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
226 dbdst->db_charsets[i].ec_csname, &norm);
227 if (ret == 0) {
228 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
229 sd->sd_norm = norm;
230 /* insert this mapper by sorted order. */
231 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
232 if (sdtmp->sd_norm > norm) {
233 TAILQ_INSERT_BEFORE(sdtmp, sd,
234 sd_entry);
235 sd = NULL;
236 break;
237 }
238 }
239 if (sd)
240 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
241 sd = malloc(sizeof(*sd));
242 if (sd == NULL) {
243 ret = errno;
244 close_dsts(dl);
245 return (ret);
246 }
247 } else if (ret != ENOENT) {
248 close_dsts(dl);
249 free(sd);
250 return (ret);
251 }
252 }
253 free(sd);
254 return (0);
255 }
256
257 static void
close_srcs(struct _citrus_iconv_std_src_list * sl)258 close_srcs(struct _citrus_iconv_std_src_list *sl)
259 {
260 struct _citrus_iconv_std_src *ss;
261
262 while ((ss = TAILQ_FIRST(sl)) != NULL) {
263 TAILQ_REMOVE(sl, ss, ss_entry);
264 close_dsts(&ss->ss_dsts);
265 free(ss);
266 }
267 }
268
269 static int
open_srcs(struct _citrus_iconv_std_src_list * sl,const struct _esdb * dbsrc,const struct _esdb * dbdst)270 open_srcs(struct _citrus_iconv_std_src_list *sl,
271 const struct _esdb *dbsrc, const struct _esdb *dbdst)
272 {
273 struct _citrus_iconv_std_src *ss;
274 int count = 0, i, ret;
275
276 ss = malloc(sizeof(*ss));
277 if (ss == NULL)
278 return (errno);
279
280 TAILQ_INIT(&ss->ss_dsts);
281
282 for (i = 0; i < dbsrc->db_num_charsets; i++) {
283 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
284 if (ret)
285 goto err;
286 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
287 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
288 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
289 ss = malloc(sizeof(*ss));
290 if (ss == NULL) {
291 ret = errno;
292 goto err;
293 }
294 count++;
295 TAILQ_INIT(&ss->ss_dsts);
296 }
297 }
298 free(ss);
299
300 return (count ? 0 : ENOENT);
301
302 err:
303 free(ss);
304 close_srcs(sl);
305 return (ret);
306 }
307
308 /* do convert a character */
309 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
310 static int
311 /*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared * is,_csid_t * csid,_index_t * idx)312 do_conv(const struct _citrus_iconv_std_shared *is,
313 _csid_t *csid, _index_t *idx)
314 {
315 struct _citrus_iconv_std_dst *sd;
316 struct _citrus_iconv_std_src *ss;
317 _index_t tmpidx;
318 int ret;
319
320 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
321 if (ss->ss_csid == *csid) {
322 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
323 ret = _csmapper_convert(sd->sd_mapper,
324 &tmpidx, *idx, NULL);
325 switch (ret) {
326 case _MAPPER_CONVERT_SUCCESS:
327 *csid = sd->sd_csid;
328 *idx = tmpidx;
329 return (0);
330 case _MAPPER_CONVERT_NONIDENTICAL:
331 break;
332 case _MAPPER_CONVERT_SRC_MORE:
333 /*FALLTHROUGH*/
334 case _MAPPER_CONVERT_DST_MORE:
335 /*FALLTHROUGH*/
336 case _MAPPER_CONVERT_ILSEQ:
337 return (EILSEQ);
338 case _MAPPER_CONVERT_FATAL:
339 return (EINVAL);
340 }
341 }
342 break;
343 }
344 }
345
346 return (E_NO_CORRESPONDING_CHAR);
347 }
348 /* ---------------------------------------------------------------------- */
349
350 static int
351 /*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared * ci,const char * __restrict src,const char * __restrict dst)352 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
353 const char * __restrict src, const char * __restrict dst)
354 {
355 struct _citrus_esdb esdbdst, esdbsrc;
356 struct _citrus_iconv_std_shared *is;
357 int ret;
358
359 is = malloc(sizeof(*is));
360 if (is == NULL) {
361 ret = errno;
362 goto err0;
363 }
364 ret = _citrus_esdb_open(&esdbsrc, src);
365 if (ret)
366 goto err1;
367 ret = _citrus_esdb_open(&esdbdst, dst);
368 if (ret)
369 goto err2;
370 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
371 esdbsrc.db_variable, esdbsrc.db_len_variable);
372 if (ret)
373 goto err3;
374 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
375 esdbdst.db_variable, esdbdst.db_len_variable);
376 if (ret)
377 goto err4;
378 is->is_use_invalid = esdbdst.db_use_invalid;
379 is->is_invalid = esdbdst.db_invalid;
380
381 TAILQ_INIT(&is->is_srcs);
382 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
383 if (ret)
384 goto err5;
385
386 _esdb_close(&esdbsrc);
387 _esdb_close(&esdbdst);
388 ci->ci_closure = is;
389
390 return (0);
391
392 err5:
393 _stdenc_close(is->is_dst_encoding);
394 err4:
395 _stdenc_close(is->is_src_encoding);
396 err3:
397 _esdb_close(&esdbdst);
398 err2:
399 _esdb_close(&esdbsrc);
400 err1:
401 free(is);
402 err0:
403 return (ret);
404 }
405
406 static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared * ci)407 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
408 {
409 struct _citrus_iconv_std_shared *is = ci->ci_closure;
410
411 if (is == NULL)
412 return;
413
414 _stdenc_close(is->is_src_encoding);
415 _stdenc_close(is->is_dst_encoding);
416 close_srcs(&is->is_srcs);
417 free(is);
418 }
419
420 static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv * cv)421 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
422 {
423 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
424 struct _citrus_iconv_std_context *sc;
425 char *ptr;
426 size_t sz, szpsdst, szpssrc;
427
428 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
429 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
430
431 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
432 sc = malloc(sz);
433 if (sc == NULL)
434 return (errno);
435
436 ptr = (char *)&sc[1];
437 if (szpssrc > 0)
438 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
439 ptr, ptr+szpssrc);
440 else
441 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
442 NULL, NULL);
443 ptr += szpssrc*2;
444 if (szpsdst > 0)
445 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
446 ptr, ptr+szpsdst);
447 else
448 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
449 NULL, NULL);
450
451 cv->cv_closure = (void *)sc;
452
453 return (0);
454 }
455
456 static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv * cv)457 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
458 {
459
460 free(cv->cv_closure);
461 }
462
463 static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,char * __restrict * __restrict in,size_t * __restrict inbytes,char * __restrict * __restrict out,size_t * __restrict outbytes,uint32_t flags,size_t * __restrict invalids)464 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
465 char * __restrict * __restrict in, size_t * __restrict inbytes,
466 char * __restrict * __restrict out, size_t * __restrict outbytes,
467 uint32_t flags, size_t * __restrict invalids)
468 {
469 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
470 struct _citrus_iconv_std_context *sc = cv->cv_closure;
471 _csid_t csid;
472 _index_t idx;
473 char *tmpin;
474 size_t inval, in_mb_cur_min, szrin, szrout;
475 int ret, state = 0;
476
477 inval = 0;
478 if (in == NULL || *in == NULL) {
479 /* special cases */
480 if (out != NULL && *out != NULL) {
481 /* init output state and store the shift sequence */
482 save_encoding_state(&sc->sc_src_encoding);
483 save_encoding_state(&sc->sc_dst_encoding);
484 szrout = 0;
485
486 ret = put_state_resetx(&sc->sc_dst_encoding,
487 *out, *outbytes, &szrout);
488 if (ret)
489 goto err;
490
491 if (szrout == (size_t)-2) {
492 /* too small to store the character */
493 ret = EINVAL;
494 goto err;
495 }
496 *out += szrout;
497 *outbytes -= szrout;
498 } else
499 /* otherwise, discard the shift sequence */
500 init_encoding_state(&sc->sc_dst_encoding);
501 init_encoding_state(&sc->sc_src_encoding);
502 *invalids = 0;
503 return (0);
504 }
505
506 in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);
507
508 /* normal case */
509 for (;;) {
510 if (*inbytes == 0) {
511 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
512 if (state == _STDENC_SDGEN_INITIAL ||
513 state == _STDENC_SDGEN_STABLE)
514 break;
515 }
516
517 /* save the encoding states for the error recovery */
518 save_encoding_state(&sc->sc_src_encoding);
519 save_encoding_state(&sc->sc_dst_encoding);
520
521 /* mb -> csid/index */
522 tmpin = *in;
523 szrin = szrout = 0;
524 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
525 *inbytes, &szrin, cv->cv_shared->ci_hooks);
526 if (ret != 0 && (ret != EILSEQ ||
527 !cv->cv_shared->ci_discard_ilseq)) {
528 goto err;
529 } else if (ret == EILSEQ) {
530 /*
531 * If //IGNORE was specified, we'll just keep crunching
532 * through invalid characters.
533 */
534 *in += in_mb_cur_min;
535 *inbytes -= in_mb_cur_min;
536 restore_encoding_state(&sc->sc_src_encoding);
537 restore_encoding_state(&sc->sc_dst_encoding);
538 continue;
539 }
540
541 if (szrin == (size_t)-2) {
542 /* incompleted character */
543 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
544 if (ret) {
545 ret = EINVAL;
546 goto err;
547 }
548 switch (state) {
549 case _STDENC_SDGEN_INITIAL:
550 case _STDENC_SDGEN_STABLE:
551 /* fetch shift sequences only. */
552 goto next;
553 }
554 ret = EINVAL;
555 goto err;
556 }
557 /* convert the character */
558 ret = do_conv(is, &csid, &idx);
559 if (ret) {
560 if (ret == E_NO_CORRESPONDING_CHAR) {
561 /*
562 * GNU iconv returns EILSEQ when no
563 * corresponding character in the output.
564 * Some software depends on this behavior
565 * though this is against POSIX specification.
566 */
567 if (cv->cv_shared->ci_ilseq_invalid != 0) {
568 ret = EILSEQ;
569 goto err;
570 }
571 inval++;
572 szrout = 0;
573 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
574 !cv->cv_shared->ci_discard_ilseq) &&
575 is->is_use_invalid) {
576 ret = wctombx(&sc->sc_dst_encoding,
577 *out, *outbytes, is->is_invalid,
578 &szrout, cv->cv_shared->ci_hooks);
579 if (ret)
580 goto err;
581 }
582 goto next;
583 } else
584 goto err;
585 }
586 /* csid/index -> mb */
587 ret = cstombx(&sc->sc_dst_encoding,
588 *out, *outbytes, csid, idx, &szrout,
589 cv->cv_shared->ci_hooks);
590 if (ret)
591 goto err;
592 next:
593 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
594 *in = tmpin;
595 *outbytes -= szrout;
596 *out += szrout;
597 }
598 *invalids = inval;
599
600 return (0);
601
602 err:
603 restore_encoding_state(&sc->sc_src_encoding);
604 restore_encoding_state(&sc->sc_dst_encoding);
605 *invalids = inval;
606
607 return (ret);
608 }
609