xref: /freebsd/lib/libiconv_modules/iconv_std/citrus_iconv_std.c (revision f126d349810fdb512c0b01e101342d430b947488)
1 /* $FreeBSD$ */
2 /*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2003 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/endian.h>
34 #include <sys/queue.h>
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_module.h"
47 #include "citrus_region.h"
48 #include "citrus_mmap.h"
49 #include "citrus_hash.h"
50 #include "citrus_iconv.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_mapper.h"
53 #include "citrus_csmapper.h"
54 #include "citrus_memstream.h"
55 #include "citrus_iconv_std.h"
56 #include "citrus_esdb.h"
57 
58 /* ---------------------------------------------------------------------- */
59 
60 _CITRUS_ICONV_DECLS(iconv_std);
61 _CITRUS_ICONV_DEF_OPS(iconv_std);
62 
63 
64 /* ---------------------------------------------------------------------- */
65 
66 int
67 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
68 {
69 
70 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
71 	    sizeof(_citrus_iconv_std_iconv_ops));
72 
73 	return (0);
74 }
75 
76 /* ---------------------------------------------------------------------- */
77 
78 /*
79  * convenience routines for stdenc.
80  */
81 static __inline void
82 save_encoding_state(struct _citrus_iconv_std_encoding *se)
83 {
84 
85 	if (se->se_ps)
86 		memcpy(se->se_pssaved, se->se_ps,
87 		    _stdenc_get_state_size(se->se_handle));
88 }
89 
90 static __inline void
91 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
92 {
93 
94 	if (se->se_ps)
95 		memcpy(se->se_ps, se->se_pssaved,
96 		    _stdenc_get_state_size(se->se_handle));
97 }
98 
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 
103 	if (se->se_ps)
104 		_stdenc_init_state(se->se_handle, se->se_ps);
105 }
106 
107 static __inline int
108 mbtocsx(struct _citrus_iconv_std_encoding *se,
109     _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
110     struct iconv_hooks *hooks)
111 {
112 
113 	return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
114 			      nresult, hooks));
115 }
116 
117 static __inline int
118 cstombx(struct _citrus_iconv_std_encoding *se,
119     char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
120     struct iconv_hooks *hooks)
121 {
122 
123 	return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
124 			      nresult, hooks));
125 }
126 
127 static __inline int
128 wctombx(struct _citrus_iconv_std_encoding *se,
129     char *s, size_t n, _wc_t wc, size_t *nresult,
130     struct iconv_hooks *hooks)
131 {
132 
133 	return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
134 			     hooks));
135 }
136 
137 static __inline int
138 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
139     size_t *nresult)
140 {
141 
142 	return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
143 }
144 
145 static __inline int
146 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
147 {
148 	struct _stdenc_state_desc ssd;
149 	int ret;
150 
151 	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
152 	    _STDENC_SDID_GENERIC, &ssd);
153 	if (!ret)
154 		*rstate = ssd.u.generic.state;
155 
156 	return (ret);
157 }
158 
159 /*
160  * init encoding context
161  */
162 static int
163 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
164     void *ps1, void *ps2)
165 {
166 	int ret = -1;
167 
168 	se->se_handle = cs;
169 	se->se_ps = ps1;
170 	se->se_pssaved = ps2;
171 
172 	if (se->se_ps)
173 		ret = _stdenc_init_state(cs, se->se_ps);
174 	if (!ret && se->se_pssaved)
175 		ret = _stdenc_init_state(cs, se->se_pssaved);
176 
177 	return (ret);
178 }
179 
180 static int
181 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
182     unsigned long *rnorm)
183 {
184 	struct _csmapper *cm;
185 	int ret;
186 
187 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
188 	if (ret)
189 		return (ret);
190 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
191 	    _csmapper_get_state_size(cm) != 0) {
192 		_csmapper_close(cm);
193 		return (EINVAL);
194 	}
195 
196 	*rcm = cm;
197 
198 	return (0);
199 }
200 
201 static void
202 close_dsts(struct _citrus_iconv_std_dst_list *dl)
203 {
204 	struct _citrus_iconv_std_dst *sd;
205 
206 	while ((sd = TAILQ_FIRST(dl)) != NULL) {
207 		TAILQ_REMOVE(dl, sd, sd_entry);
208 		_csmapper_close(sd->sd_mapper);
209 		free(sd);
210 	}
211 }
212 
213 static int
214 open_dsts(struct _citrus_iconv_std_dst_list *dl,
215     const struct _esdb_charset *ec, const struct _esdb *dbdst)
216 {
217 	struct _citrus_iconv_std_dst *sd, *sdtmp;
218 	unsigned long norm;
219 	int i, ret;
220 
221 	sd = malloc(sizeof(*sd));
222 	if (sd == NULL)
223 		return (errno);
224 
225 	for (i = 0; i < dbdst->db_num_charsets; i++) {
226 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
227 		    dbdst->db_charsets[i].ec_csname, &norm);
228 		if (ret == 0) {
229 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
230 			sd->sd_norm = norm;
231 			/* insert this mapper by sorted order. */
232 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
233 				if (sdtmp->sd_norm > norm) {
234 					TAILQ_INSERT_BEFORE(sdtmp, sd,
235 					    sd_entry);
236 					sd = NULL;
237 					break;
238 				}
239 			}
240 			if (sd)
241 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
242 			sd = malloc(sizeof(*sd));
243 			if (sd == NULL) {
244 				ret = errno;
245 				close_dsts(dl);
246 				return (ret);
247 			}
248 		} else if (ret != ENOENT) {
249 			close_dsts(dl);
250 			free(sd);
251 			return (ret);
252 		}
253 	}
254 	free(sd);
255 	return (0);
256 }
257 
258 static void
259 close_srcs(struct _citrus_iconv_std_src_list *sl)
260 {
261 	struct _citrus_iconv_std_src *ss;
262 
263 	while ((ss = TAILQ_FIRST(sl)) != NULL) {
264 		TAILQ_REMOVE(sl, ss, ss_entry);
265 		close_dsts(&ss->ss_dsts);
266 		free(ss);
267 	}
268 }
269 
270 static int
271 open_srcs(struct _citrus_iconv_std_src_list *sl,
272     const struct _esdb *dbsrc, const struct _esdb *dbdst)
273 {
274 	struct _citrus_iconv_std_src *ss;
275 	int count = 0, i, ret;
276 
277 	ss = malloc(sizeof(*ss));
278 	if (ss == NULL)
279 		return (errno);
280 
281 	TAILQ_INIT(&ss->ss_dsts);
282 
283 	for (i = 0; i < dbsrc->db_num_charsets; i++) {
284 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
285 		if (ret)
286 			goto err;
287 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
288 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
289 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
290 			ss = malloc(sizeof(*ss));
291 			if (ss == NULL) {
292 				ret = errno;
293 				goto err;
294 			}
295 			count++;
296 			TAILQ_INIT(&ss->ss_dsts);
297 		}
298 	}
299 	free(ss);
300 
301 	return (count ? 0 : ENOENT);
302 
303 err:
304 	free(ss);
305 	close_srcs(sl);
306 	return (ret);
307 }
308 
309 /* do convert a character */
310 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
311 static int
312 /*ARGSUSED*/
313 do_conv(const struct _citrus_iconv_std_shared *is,
314 	_csid_t *csid, _index_t *idx)
315 {
316 	struct _citrus_iconv_std_dst *sd;
317 	struct _citrus_iconv_std_src *ss;
318 	_index_t tmpidx;
319 	int ret;
320 
321 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
322 		if (ss->ss_csid == *csid) {
323 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
324 				ret = _csmapper_convert(sd->sd_mapper,
325 				    &tmpidx, *idx, NULL);
326 				switch (ret) {
327 				case _MAPPER_CONVERT_SUCCESS:
328 					*csid = sd->sd_csid;
329 					*idx = tmpidx;
330 					return (0);
331 				case _MAPPER_CONVERT_NONIDENTICAL:
332 					break;
333 				case _MAPPER_CONVERT_SRC_MORE:
334 					/*FALLTHROUGH*/
335 				case _MAPPER_CONVERT_DST_MORE:
336 					/*FALLTHROUGH*/
337 				case _MAPPER_CONVERT_ILSEQ:
338 					return (EILSEQ);
339 				case _MAPPER_CONVERT_FATAL:
340 					return (EINVAL);
341 				}
342 			}
343 			break;
344 		}
345 	}
346 
347 	return (E_NO_CORRESPONDING_CHAR);
348 }
349 /* ---------------------------------------------------------------------- */
350 
351 static int
352 /*ARGSUSED*/
353 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
354     const char * __restrict src, const char * __restrict dst)
355 {
356 	struct _citrus_esdb esdbdst, esdbsrc;
357 	struct _citrus_iconv_std_shared *is;
358 	int ret;
359 
360 	is = malloc(sizeof(*is));
361 	if (is == NULL) {
362 		ret = errno;
363 		goto err0;
364 	}
365 	ret = _citrus_esdb_open(&esdbsrc, src);
366 	if (ret)
367 		goto err1;
368 	ret = _citrus_esdb_open(&esdbdst, dst);
369 	if (ret)
370 		goto err2;
371 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
372 	    esdbsrc.db_variable, esdbsrc.db_len_variable);
373 	if (ret)
374 		goto err3;
375 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
376 	    esdbdst.db_variable, esdbdst.db_len_variable);
377 	if (ret)
378 		goto err4;
379 	is->is_use_invalid = esdbdst.db_use_invalid;
380 	is->is_invalid = esdbdst.db_invalid;
381 
382 	TAILQ_INIT(&is->is_srcs);
383 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
384 	if (ret)
385 		goto err5;
386 
387 	_esdb_close(&esdbsrc);
388 	_esdb_close(&esdbdst);
389 	ci->ci_closure = is;
390 
391 	return (0);
392 
393 err5:
394 	_stdenc_close(is->is_dst_encoding);
395 err4:
396 	_stdenc_close(is->is_src_encoding);
397 err3:
398 	_esdb_close(&esdbdst);
399 err2:
400 	_esdb_close(&esdbsrc);
401 err1:
402 	free(is);
403 err0:
404 	return (ret);
405 }
406 
407 static void
408 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
409 {
410 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
411 
412 	if (is == NULL)
413 		return;
414 
415 	_stdenc_close(is->is_src_encoding);
416 	_stdenc_close(is->is_dst_encoding);
417 	close_srcs(&is->is_srcs);
418 	free(is);
419 }
420 
421 static int
422 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
423 {
424 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
425 	struct _citrus_iconv_std_context *sc;
426 	char *ptr;
427 	size_t sz, szpsdst, szpssrc;
428 
429 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
430 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
431 
432 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
433 	sc = malloc(sz);
434 	if (sc == NULL)
435 		return (errno);
436 
437 	ptr = (char *)&sc[1];
438 	if (szpssrc > 0)
439 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
440 		    ptr, ptr+szpssrc);
441 	else
442 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
443 		    NULL, NULL);
444 	ptr += szpssrc*2;
445 	if (szpsdst > 0)
446 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
447 		    ptr, ptr+szpsdst);
448 	else
449 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
450 		    NULL, NULL);
451 
452 	cv->cv_closure = (void *)sc;
453 
454 	return (0);
455 }
456 
457 static void
458 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
459 {
460 
461 	free(cv->cv_closure);
462 }
463 
464 static int
465 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
466     char * __restrict * __restrict in, size_t * __restrict inbytes,
467     char * __restrict * __restrict out, size_t * __restrict outbytes,
468     uint32_t flags, size_t * __restrict invalids)
469 {
470 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
471 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
472 	_csid_t csid;
473 	_index_t idx;
474 	char *tmpin;
475 	size_t inval, in_mb_cur_min, szrin, szrout;
476 	int ret, state = 0;
477 
478 	inval = 0;
479 	if (in == NULL || *in == NULL) {
480 		/* special cases */
481 		if (out != NULL && *out != NULL) {
482 			/* init output state and store the shift sequence */
483 			save_encoding_state(&sc->sc_src_encoding);
484 			save_encoding_state(&sc->sc_dst_encoding);
485 			szrout = 0;
486 
487 			ret = put_state_resetx(&sc->sc_dst_encoding,
488 			    *out, *outbytes, &szrout);
489 			if (ret)
490 				goto err;
491 
492 			if (szrout == (size_t)-2) {
493 				/* too small to store the character */
494 				ret = EINVAL;
495 				goto err;
496 			}
497 			*out += szrout;
498 			*outbytes -= szrout;
499 		} else
500 			/* otherwise, discard the shift sequence */
501 			init_encoding_state(&sc->sc_dst_encoding);
502 		init_encoding_state(&sc->sc_src_encoding);
503 		*invalids = 0;
504 		return (0);
505 	}
506 
507 	in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);
508 
509 	/* normal case */
510 	for (;;) {
511 		if (*inbytes == 0) {
512 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
513 			if (state == _STDENC_SDGEN_INITIAL ||
514 			    state == _STDENC_SDGEN_STABLE)
515 				break;
516 		}
517 
518 		/* save the encoding states for the error recovery */
519 		save_encoding_state(&sc->sc_src_encoding);
520 		save_encoding_state(&sc->sc_dst_encoding);
521 
522 		/* mb -> csid/index */
523 		tmpin = *in;
524 		szrin = szrout = 0;
525 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
526 		    *inbytes, &szrin, cv->cv_shared->ci_hooks);
527 		if (ret != 0 && (ret != EILSEQ ||
528 		    !cv->cv_shared->ci_discard_ilseq)) {
529 			goto err;
530 		} else if (ret == EILSEQ) {
531 			/*
532 			 * If //IGNORE was specified, we'll just keep crunching
533 			 * through invalid characters.
534 			 */
535 			*in += in_mb_cur_min;
536 			*inbytes -= in_mb_cur_min;
537 			restore_encoding_state(&sc->sc_src_encoding);
538 			restore_encoding_state(&sc->sc_dst_encoding);
539 			continue;
540 		}
541 
542 		if (szrin == (size_t)-2) {
543 			/* incompleted character */
544 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
545 			if (ret) {
546 				ret = EINVAL;
547 				goto err;
548 			}
549 			switch (state) {
550 			case _STDENC_SDGEN_INITIAL:
551 			case _STDENC_SDGEN_STABLE:
552 				/* fetch shift sequences only. */
553 				goto next;
554 			}
555 			ret = EINVAL;
556 			goto err;
557 		}
558 		/* convert the character */
559 		ret = do_conv(is, &csid, &idx);
560 		if (ret) {
561 			if (ret == E_NO_CORRESPONDING_CHAR) {
562 				/*
563 				 * GNU iconv returns EILSEQ when no
564 				 * corresponding character in the output.
565 				 * Some software depends on this behavior
566 				 * though this is against POSIX specification.
567 				 */
568 				if (cv->cv_shared->ci_ilseq_invalid != 0) {
569 					ret = EILSEQ;
570 					goto err;
571 				}
572 				inval++;
573 				szrout = 0;
574 				if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
575 				    !cv->cv_shared->ci_discard_ilseq) &&
576 				    is->is_use_invalid) {
577 					ret = wctombx(&sc->sc_dst_encoding,
578 					    *out, *outbytes, is->is_invalid,
579 					    &szrout, cv->cv_shared->ci_hooks);
580 					if (ret)
581 						goto err;
582 				}
583 				goto next;
584 			} else
585 				goto err;
586 		}
587 		/* csid/index -> mb */
588 		ret = cstombx(&sc->sc_dst_encoding,
589 		    *out, *outbytes, csid, idx, &szrout,
590 		    cv->cv_shared->ci_hooks);
591 		if (ret)
592 			goto err;
593 next:
594 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
595 		*in = tmpin;
596 		*outbytes -= szrout;
597 		*out += szrout;
598 	}
599 	*invalids = inval;
600 
601 	return (0);
602 
603 err:
604 	restore_encoding_state(&sc->sc_src_encoding);
605 	restore_encoding_state(&sc->sc_dst_encoding);
606 	*invalids = inval;
607 
608 	return (ret);
609 }
610