xref: /freebsd/lib/libiconv_modules/iconv_std/citrus_iconv_std.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c)2003 Citrus Project,
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/endian.h>
33 #include <sys/queue.h>
34 
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stdbool.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56 
57 /* ---------------------------------------------------------------------- */
58 
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61 
62 
63 /* ---------------------------------------------------------------------- */
64 
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
67 {
68 
69 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
70 	    sizeof(_citrus_iconv_std_iconv_ops));
71 
72 	return (0);
73 }
74 
75 /* ---------------------------------------------------------------------- */
76 
77 /*
78  * convenience routines for stdenc.
79  */
80 static __inline void
81 save_encoding_state(struct _citrus_iconv_std_encoding *se)
82 {
83 
84 	if (se->se_ps)
85 		memcpy(se->se_pssaved, se->se_ps,
86 		    _stdenc_get_state_size(se->se_handle));
87 }
88 
89 static __inline void
90 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
91 {
92 
93 	if (se->se_ps)
94 		memcpy(se->se_ps, se->se_pssaved,
95 		    _stdenc_get_state_size(se->se_handle));
96 }
97 
98 static __inline void
99 init_encoding_state(struct _citrus_iconv_std_encoding *se)
100 {
101 
102 	if (se->se_ps)
103 		_stdenc_init_state(se->se_handle, se->se_ps);
104 }
105 
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108     _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
109     struct iconv_hooks *hooks)
110 {
111 
112 	return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
113 			      nresult, hooks));
114 }
115 
116 static __inline int
117 cstombx(struct _citrus_iconv_std_encoding *se,
118     char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
119     struct iconv_hooks *hooks)
120 {
121 
122 	return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
123 			      nresult, hooks));
124 }
125 
126 static __inline int
127 wctombx(struct _citrus_iconv_std_encoding *se,
128     char *s, size_t n, _wc_t wc, size_t *nresult,
129     struct iconv_hooks *hooks)
130 {
131 
132 	return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
133 			     hooks));
134 }
135 
136 static __inline int
137 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
138     size_t *nresult)
139 {
140 
141 	return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
142 }
143 
144 static __inline int
145 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
146 {
147 	struct _stdenc_state_desc ssd;
148 	int ret;
149 
150 	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
151 	    _STDENC_SDID_GENERIC, &ssd);
152 	if (!ret)
153 		*rstate = ssd.u.generic.state;
154 
155 	return (ret);
156 }
157 
158 /*
159  * init encoding context
160  */
161 static int
162 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
163     void *ps1, void *ps2)
164 {
165 	int ret = -1;
166 
167 	se->se_handle = cs;
168 	se->se_ps = ps1;
169 	se->se_pssaved = ps2;
170 
171 	if (se->se_ps)
172 		ret = _stdenc_init_state(cs, se->se_ps);
173 	if (!ret && se->se_pssaved)
174 		ret = _stdenc_init_state(cs, se->se_pssaved);
175 
176 	return (ret);
177 }
178 
179 static int
180 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
181     unsigned long *rnorm)
182 {
183 	struct _csmapper *cm;
184 	int ret;
185 
186 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
187 	if (ret)
188 		return (ret);
189 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
190 	    _csmapper_get_state_size(cm) != 0) {
191 		_csmapper_close(cm);
192 		return (EINVAL);
193 	}
194 
195 	*rcm = cm;
196 
197 	return (0);
198 }
199 
200 static void
201 close_dsts(struct _citrus_iconv_std_dst_list *dl)
202 {
203 	struct _citrus_iconv_std_dst *sd;
204 
205 	while ((sd = TAILQ_FIRST(dl)) != NULL) {
206 		TAILQ_REMOVE(dl, sd, sd_entry);
207 		_csmapper_close(sd->sd_mapper);
208 		free(sd);
209 	}
210 }
211 
212 static int
213 open_dsts(struct _citrus_iconv_std_dst_list *dl,
214     const struct _esdb_charset *ec, const struct _esdb *dbdst)
215 {
216 	struct _citrus_iconv_std_dst *sd, *sdtmp;
217 	unsigned long norm;
218 	int i, ret;
219 
220 	sd = malloc(sizeof(*sd));
221 	if (sd == NULL)
222 		return (errno);
223 
224 	for (i = 0; i < dbdst->db_num_charsets; i++) {
225 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
226 		    dbdst->db_charsets[i].ec_csname, &norm);
227 		if (ret == 0) {
228 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
229 			sd->sd_norm = norm;
230 			/* insert this mapper by sorted order. */
231 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
232 				if (sdtmp->sd_norm > norm) {
233 					TAILQ_INSERT_BEFORE(sdtmp, sd,
234 					    sd_entry);
235 					sd = NULL;
236 					break;
237 				}
238 			}
239 			if (sd)
240 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
241 			sd = malloc(sizeof(*sd));
242 			if (sd == NULL) {
243 				ret = errno;
244 				close_dsts(dl);
245 				return (ret);
246 			}
247 		} else if (ret != ENOENT) {
248 			close_dsts(dl);
249 			free(sd);
250 			return (ret);
251 		}
252 	}
253 	free(sd);
254 	return (0);
255 }
256 
257 static void
258 close_srcs(struct _citrus_iconv_std_src_list *sl)
259 {
260 	struct _citrus_iconv_std_src *ss;
261 
262 	while ((ss = TAILQ_FIRST(sl)) != NULL) {
263 		TAILQ_REMOVE(sl, ss, ss_entry);
264 		close_dsts(&ss->ss_dsts);
265 		free(ss);
266 	}
267 }
268 
269 static int
270 open_srcs(struct _citrus_iconv_std_src_list *sl,
271     const struct _esdb *dbsrc, const struct _esdb *dbdst)
272 {
273 	struct _citrus_iconv_std_src *ss;
274 	int count = 0, i, ret;
275 
276 	ss = malloc(sizeof(*ss));
277 	if (ss == NULL)
278 		return (errno);
279 
280 	TAILQ_INIT(&ss->ss_dsts);
281 
282 	for (i = 0; i < dbsrc->db_num_charsets; i++) {
283 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
284 		if (ret)
285 			goto err;
286 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
287 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
288 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
289 			ss = malloc(sizeof(*ss));
290 			if (ss == NULL) {
291 				ret = errno;
292 				goto err;
293 			}
294 			count++;
295 			TAILQ_INIT(&ss->ss_dsts);
296 		}
297 	}
298 	free(ss);
299 
300 	return (count ? 0 : ENOENT);
301 
302 err:
303 	free(ss);
304 	close_srcs(sl);
305 	return (ret);
306 }
307 
308 /* do convert a character */
309 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
310 static int
311 /*ARGSUSED*/
312 do_conv(const struct _citrus_iconv_std_shared *is,
313 	_csid_t *csid, _index_t *idx)
314 {
315 	struct _citrus_iconv_std_dst *sd;
316 	struct _citrus_iconv_std_src *ss;
317 	_index_t tmpidx;
318 	int ret;
319 
320 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
321 		if (ss->ss_csid == *csid) {
322 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
323 				ret = _csmapper_convert(sd->sd_mapper,
324 				    &tmpidx, *idx, NULL);
325 				switch (ret) {
326 				case _MAPPER_CONVERT_SUCCESS:
327 					*csid = sd->sd_csid;
328 					*idx = tmpidx;
329 					return (0);
330 				case _MAPPER_CONVERT_NONIDENTICAL:
331 					break;
332 				case _MAPPER_CONVERT_SRC_MORE:
333 					/*FALLTHROUGH*/
334 				case _MAPPER_CONVERT_DST_MORE:
335 					/*FALLTHROUGH*/
336 				case _MAPPER_CONVERT_ILSEQ:
337 					return (EILSEQ);
338 				case _MAPPER_CONVERT_FATAL:
339 					return (EINVAL);
340 				}
341 			}
342 			break;
343 		}
344 	}
345 
346 	return (E_NO_CORRESPONDING_CHAR);
347 }
348 /* ---------------------------------------------------------------------- */
349 
350 static int
351 /*ARGSUSED*/
352 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
353     const char * __restrict src, const char * __restrict dst)
354 {
355 	struct _citrus_esdb esdbdst, esdbsrc;
356 	struct _citrus_iconv_std_shared *is;
357 	int ret;
358 
359 	is = malloc(sizeof(*is));
360 	if (is == NULL) {
361 		ret = errno;
362 		goto err0;
363 	}
364 	ret = _citrus_esdb_open(&esdbsrc, src);
365 	if (ret)
366 		goto err1;
367 	ret = _citrus_esdb_open(&esdbdst, dst);
368 	if (ret)
369 		goto err2;
370 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
371 	    esdbsrc.db_variable, esdbsrc.db_len_variable);
372 	if (ret)
373 		goto err3;
374 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
375 	    esdbdst.db_variable, esdbdst.db_len_variable);
376 	if (ret)
377 		goto err4;
378 	is->is_use_invalid = esdbdst.db_use_invalid;
379 	is->is_invalid = esdbdst.db_invalid;
380 
381 	TAILQ_INIT(&is->is_srcs);
382 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
383 	if (ret)
384 		goto err5;
385 
386 	_esdb_close(&esdbsrc);
387 	_esdb_close(&esdbdst);
388 	ci->ci_closure = is;
389 
390 	return (0);
391 
392 err5:
393 	_stdenc_close(is->is_dst_encoding);
394 err4:
395 	_stdenc_close(is->is_src_encoding);
396 err3:
397 	_esdb_close(&esdbdst);
398 err2:
399 	_esdb_close(&esdbsrc);
400 err1:
401 	free(is);
402 err0:
403 	return (ret);
404 }
405 
406 static void
407 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
408 {
409 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
410 
411 	if (is == NULL)
412 		return;
413 
414 	_stdenc_close(is->is_src_encoding);
415 	_stdenc_close(is->is_dst_encoding);
416 	close_srcs(&is->is_srcs);
417 	free(is);
418 }
419 
420 static int
421 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
422 {
423 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
424 	struct _citrus_iconv_std_context *sc;
425 	char *ptr;
426 	size_t sz, szpsdst, szpssrc;
427 
428 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
429 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
430 
431 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
432 	sc = malloc(sz);
433 	if (sc == NULL)
434 		return (errno);
435 
436 	ptr = (char *)&sc[1];
437 	if (szpssrc > 0)
438 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
439 		    ptr, ptr+szpssrc);
440 	else
441 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
442 		    NULL, NULL);
443 	ptr += szpssrc*2;
444 	if (szpsdst > 0)
445 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
446 		    ptr, ptr+szpsdst);
447 	else
448 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
449 		    NULL, NULL);
450 
451 	cv->cv_closure = (void *)sc;
452 
453 	return (0);
454 }
455 
456 static void
457 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
458 {
459 
460 	free(cv->cv_closure);
461 }
462 
463 static int
464 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
465     char * __restrict * __restrict in, size_t * __restrict inbytes,
466     char * __restrict * __restrict out, size_t * __restrict outbytes,
467     uint32_t flags, size_t * __restrict invalids)
468 {
469 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
470 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
471 	_csid_t csid;
472 	_index_t idx;
473 	char *tmpin;
474 	size_t inval, in_mb_cur_min, szrin, szrout;
475 	int ret, state = 0;
476 
477 	inval = 0;
478 	if (in == NULL || *in == NULL) {
479 		/* special cases */
480 		if (out != NULL && *out != NULL) {
481 			/* init output state and store the shift sequence */
482 			save_encoding_state(&sc->sc_src_encoding);
483 			save_encoding_state(&sc->sc_dst_encoding);
484 			szrout = 0;
485 
486 			ret = put_state_resetx(&sc->sc_dst_encoding,
487 			    *out, *outbytes, &szrout);
488 			if (ret)
489 				goto err;
490 
491 			if (szrout == (size_t)-2) {
492 				/* too small to store the character */
493 				ret = EINVAL;
494 				goto err;
495 			}
496 			*out += szrout;
497 			*outbytes -= szrout;
498 		} else
499 			/* otherwise, discard the shift sequence */
500 			init_encoding_state(&sc->sc_dst_encoding);
501 		init_encoding_state(&sc->sc_src_encoding);
502 		*invalids = 0;
503 		return (0);
504 	}
505 
506 	in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);
507 
508 	/* normal case */
509 	for (;;) {
510 		if (*inbytes == 0) {
511 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
512 			if (state == _STDENC_SDGEN_INITIAL ||
513 			    state == _STDENC_SDGEN_STABLE)
514 				break;
515 		}
516 
517 		/* save the encoding states for the error recovery */
518 		save_encoding_state(&sc->sc_src_encoding);
519 		save_encoding_state(&sc->sc_dst_encoding);
520 
521 		/* mb -> csid/index */
522 		tmpin = *in;
523 		szrin = szrout = 0;
524 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
525 		    *inbytes, &szrin, cv->cv_shared->ci_hooks);
526 		if (ret != 0 && (ret != EILSEQ ||
527 		    !cv->cv_shared->ci_discard_ilseq)) {
528 			goto err;
529 		} else if (ret == EILSEQ) {
530 			/*
531 			 * If //IGNORE was specified, we'll just keep crunching
532 			 * through invalid characters.
533 			 */
534 			*in += in_mb_cur_min;
535 			*inbytes -= in_mb_cur_min;
536 			restore_encoding_state(&sc->sc_src_encoding);
537 			restore_encoding_state(&sc->sc_dst_encoding);
538 			continue;
539 		}
540 
541 		if (szrin == (size_t)-2) {
542 			/* incompleted character */
543 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
544 			if (ret) {
545 				ret = EINVAL;
546 				goto err;
547 			}
548 			switch (state) {
549 			case _STDENC_SDGEN_INITIAL:
550 			case _STDENC_SDGEN_STABLE:
551 				/* fetch shift sequences only. */
552 				goto next;
553 			}
554 			ret = EINVAL;
555 			goto err;
556 		}
557 		/* convert the character */
558 		ret = do_conv(is, &csid, &idx);
559 		if (ret) {
560 			if (ret == E_NO_CORRESPONDING_CHAR) {
561 				/*
562 				 * GNU iconv returns EILSEQ when no
563 				 * corresponding character in the output.
564 				 * Some software depends on this behavior
565 				 * though this is against POSIX specification.
566 				 */
567 				if (cv->cv_shared->ci_ilseq_invalid != 0) {
568 					ret = EILSEQ;
569 					goto err;
570 				}
571 				inval++;
572 				szrout = 0;
573 				if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
574 				    !cv->cv_shared->ci_discard_ilseq) &&
575 				    is->is_use_invalid) {
576 					ret = wctombx(&sc->sc_dst_encoding,
577 					    *out, *outbytes, is->is_invalid,
578 					    &szrout, cv->cv_shared->ci_hooks);
579 					if (ret)
580 						goto err;
581 				}
582 				goto next;
583 			} else
584 				goto err;
585 		}
586 		/* csid/index -> mb */
587 		ret = cstombx(&sc->sc_dst_encoding,
588 		    *out, *outbytes, csid, idx, &szrout,
589 		    cv->cv_shared->ci_hooks);
590 		if (ret)
591 			goto err;
592 next:
593 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
594 		*in = tmpin;
595 		*outbytes -= szrout;
596 		*out += szrout;
597 	}
598 	*invalids = inval;
599 
600 	return (0);
601 
602 err:
603 	restore_encoding_state(&sc->sc_src_encoding);
604 	restore_encoding_state(&sc->sc_dst_encoding);
605 	*invalids = inval;
606 
607 	return (ret);
608 }
609