xref: /freebsd/lib/libiconv_modules/iconv_std/citrus_iconv_std.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /* $FreeBSD$ */
2 /*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
3 
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include "citrus_namespace.h"
43 #include "citrus_types.h"
44 #include "citrus_module.h"
45 #include "citrus_region.h"
46 #include "citrus_mmap.h"
47 #include "citrus_hash.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_mapper.h"
51 #include "citrus_csmapper.h"
52 #include "citrus_memstream.h"
53 #include "citrus_iconv_std.h"
54 #include "citrus_esdb.h"
55 
56 /* ---------------------------------------------------------------------- */
57 
58 _CITRUS_ICONV_DECLS(iconv_std);
59 _CITRUS_ICONV_DEF_OPS(iconv_std);
60 
61 
62 /* ---------------------------------------------------------------------- */
63 
64 int
65 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
66 {
67 
68 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
69 	    sizeof(_citrus_iconv_std_iconv_ops));
70 
71 	return (0);
72 }
73 
74 /* ---------------------------------------------------------------------- */
75 
76 /*
77  * convenience routines for stdenc.
78  */
79 static __inline void
80 save_encoding_state(struct _citrus_iconv_std_encoding *se)
81 {
82 
83 	if (se->se_ps)
84 		memcpy(se->se_pssaved, se->se_ps,
85 		    _stdenc_get_state_size(se->se_handle));
86 }
87 
88 static __inline void
89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90 {
91 
92 	if (se->se_ps)
93 		memcpy(se->se_ps, se->se_pssaved,
94 		    _stdenc_get_state_size(se->se_handle));
95 }
96 
97 static __inline void
98 init_encoding_state(struct _citrus_iconv_std_encoding *se)
99 {
100 
101 	if (se->se_ps)
102 		_stdenc_init_state(se->se_handle, se->se_ps);
103 }
104 
105 static __inline int
106 mbtocsx(struct _citrus_iconv_std_encoding *se,
107     _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
108     struct iconv_hooks *hooks)
109 {
110 
111 	return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 			      nresult, hooks));
113 }
114 
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117     char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
118     struct iconv_hooks *hooks)
119 {
120 
121 	return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
122 			      nresult, hooks));
123 }
124 
125 static __inline int
126 wctombx(struct _citrus_iconv_std_encoding *se,
127     char *s, size_t n, _wc_t wc, size_t *nresult,
128     struct iconv_hooks *hooks)
129 {
130 
131 	return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
132 			     hooks));
133 }
134 
135 static __inline int
136 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
137     size_t *nresult)
138 {
139 
140 	return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
141 }
142 
143 static __inline int
144 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
145 {
146 	struct _stdenc_state_desc ssd;
147 	int ret;
148 
149 	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
150 	    _STDENC_SDID_GENERIC, &ssd);
151 	if (!ret)
152 		*rstate = ssd.u.generic.state;
153 
154 	return (ret);
155 }
156 
157 /*
158  * init encoding context
159  */
160 static int
161 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
162     void *ps1, void *ps2)
163 {
164 	int ret = -1;
165 
166 	se->se_handle = cs;
167 	se->se_ps = ps1;
168 	se->se_pssaved = ps2;
169 
170 	if (se->se_ps)
171 		ret = _stdenc_init_state(cs, se->se_ps);
172 	if (!ret && se->se_pssaved)
173 		ret = _stdenc_init_state(cs, se->se_pssaved);
174 
175 	return (ret);
176 }
177 
178 static int
179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180     unsigned long *rnorm)
181 {
182 	struct _csmapper *cm;
183 	int ret;
184 
185 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186 	if (ret)
187 		return (ret);
188 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189 	    _csmapper_get_state_size(cm) != 0) {
190 		_csmapper_close(cm);
191 		return (EINVAL);
192 	}
193 
194 	*rcm = cm;
195 
196 	return (0);
197 }
198 
199 static void
200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
201 {
202 	struct _citrus_iconv_std_dst *sd;
203 
204 	while ((sd = TAILQ_FIRST(dl)) != NULL) {
205 		TAILQ_REMOVE(dl, sd, sd_entry);
206 		_csmapper_close(sd->sd_mapper);
207 		free(sd);
208 	}
209 }
210 
211 static int
212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
213     const struct _esdb_charset *ec, const struct _esdb *dbdst)
214 {
215 	struct _citrus_iconv_std_dst *sd, *sdtmp;
216 	unsigned long norm;
217 	int i, ret;
218 
219 	sd = malloc(sizeof(*sd));
220 	if (sd == NULL)
221 		return (errno);
222 
223 	for (i = 0; i < dbdst->db_num_charsets; i++) {
224 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
225 		    dbdst->db_charsets[i].ec_csname, &norm);
226 		if (ret == 0) {
227 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228 			sd->sd_norm = norm;
229 			/* insert this mapper by sorted order. */
230 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231 				if (sdtmp->sd_norm > norm) {
232 					TAILQ_INSERT_BEFORE(sdtmp, sd,
233 					    sd_entry);
234 					sd = NULL;
235 					break;
236 				}
237 			}
238 			if (sd)
239 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240 			sd = malloc(sizeof(*sd));
241 			if (sd == NULL) {
242 				ret = errno;
243 				close_dsts(dl);
244 				return (ret);
245 			}
246 		} else if (ret != ENOENT) {
247 			close_dsts(dl);
248 			free(sd);
249 			return (ret);
250 		}
251 	}
252 	free(sd);
253 	return (0);
254 }
255 
256 static void
257 close_srcs(struct _citrus_iconv_std_src_list *sl)
258 {
259 	struct _citrus_iconv_std_src *ss;
260 
261 	while ((ss = TAILQ_FIRST(sl)) != NULL) {
262 		TAILQ_REMOVE(sl, ss, ss_entry);
263 		close_dsts(&ss->ss_dsts);
264 		free(ss);
265 	}
266 }
267 
268 static int
269 open_srcs(struct _citrus_iconv_std_src_list *sl,
270     const struct _esdb *dbsrc, const struct _esdb *dbdst)
271 {
272 	struct _citrus_iconv_std_src *ss;
273 	int count = 0, i, ret;
274 
275 	ss = malloc(sizeof(*ss));
276 	if (ss == NULL)
277 		return (errno);
278 
279 	TAILQ_INIT(&ss->ss_dsts);
280 
281 	for (i = 0; i < dbsrc->db_num_charsets; i++) {
282 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283 		if (ret)
284 			goto err;
285 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288 			ss = malloc(sizeof(*ss));
289 			if (ss == NULL) {
290 				ret = errno;
291 				goto err;
292 			}
293 			count++;
294 			TAILQ_INIT(&ss->ss_dsts);
295 		}
296 	}
297 	free(ss);
298 
299 	return (count ? 0 : ENOENT);
300 
301 err:
302 	free(ss);
303 	close_srcs(sl);
304 	return (ret);
305 }
306 
307 /* do convert a character */
308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309 static int
310 /*ARGSUSED*/
311 do_conv(const struct _citrus_iconv_std_shared *is,
312 	_csid_t *csid, _index_t *idx)
313 {
314 	struct _citrus_iconv_std_dst *sd;
315 	struct _citrus_iconv_std_src *ss;
316 	_index_t tmpidx;
317 	int ret;
318 
319 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
320 		if (ss->ss_csid == *csid) {
321 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
322 				ret = _csmapper_convert(sd->sd_mapper,
323 				    &tmpidx, *idx, NULL);
324 				switch (ret) {
325 				case _MAPPER_CONVERT_SUCCESS:
326 					*csid = sd->sd_csid;
327 					*idx = tmpidx;
328 					return (0);
329 				case _MAPPER_CONVERT_NONIDENTICAL:
330 					break;
331 				case _MAPPER_CONVERT_SRC_MORE:
332 					/*FALLTHROUGH*/
333 				case _MAPPER_CONVERT_DST_MORE:
334 					/*FALLTHROUGH*/
335 				case _MAPPER_CONVERT_ILSEQ:
336 					return (EILSEQ);
337 				case _MAPPER_CONVERT_FATAL:
338 					return (EINVAL);
339 				}
340 			}
341 			break;
342 		}
343 	}
344 
345 	return (E_NO_CORRESPONDING_CHAR);
346 }
347 /* ---------------------------------------------------------------------- */
348 
349 static int
350 /*ARGSUSED*/
351 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
352     const char * __restrict src, const char * __restrict dst)
353 {
354 	struct _citrus_esdb esdbdst, esdbsrc;
355 	struct _citrus_iconv_std_shared *is;
356 	int ret;
357 
358 	is = malloc(sizeof(*is));
359 	if (is == NULL) {
360 		ret = errno;
361 		goto err0;
362 	}
363 	ret = _citrus_esdb_open(&esdbsrc, src);
364 	if (ret)
365 		goto err1;
366 	ret = _citrus_esdb_open(&esdbdst, dst);
367 	if (ret)
368 		goto err2;
369 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
370 	    esdbsrc.db_variable, esdbsrc.db_len_variable);
371 	if (ret)
372 		goto err3;
373 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
374 	    esdbdst.db_variable, esdbdst.db_len_variable);
375 	if (ret)
376 		goto err4;
377 	is->is_use_invalid = esdbdst.db_use_invalid;
378 	is->is_invalid = esdbdst.db_invalid;
379 
380 	TAILQ_INIT(&is->is_srcs);
381 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
382 	if (ret)
383 		goto err5;
384 
385 	_esdb_close(&esdbsrc);
386 	_esdb_close(&esdbdst);
387 	ci->ci_closure = is;
388 
389 	return (0);
390 
391 err5:
392 	_stdenc_close(is->is_dst_encoding);
393 err4:
394 	_stdenc_close(is->is_src_encoding);
395 err3:
396 	_esdb_close(&esdbdst);
397 err2:
398 	_esdb_close(&esdbsrc);
399 err1:
400 	free(is);
401 err0:
402 	return (ret);
403 }
404 
405 static void
406 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
407 {
408 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
409 
410 	if (is == NULL)
411 		return;
412 
413 	_stdenc_close(is->is_src_encoding);
414 	_stdenc_close(is->is_dst_encoding);
415 	close_srcs(&is->is_srcs);
416 	free(is);
417 }
418 
419 static int
420 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
421 {
422 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
423 	struct _citrus_iconv_std_context *sc;
424 	char *ptr;
425 	size_t sz, szpsdst, szpssrc;
426 
427 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
428 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
429 
430 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
431 	sc = malloc(sz);
432 	if (sc == NULL)
433 		return (errno);
434 
435 	ptr = (char *)&sc[1];
436 	if (szpssrc > 0)
437 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438 		    ptr, ptr+szpssrc);
439 	else
440 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
441 		    NULL, NULL);
442 	ptr += szpssrc*2;
443 	if (szpsdst > 0)
444 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445 		    ptr, ptr+szpsdst);
446 	else
447 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
448 		    NULL, NULL);
449 
450 	cv->cv_closure = (void *)sc;
451 
452 	return (0);
453 }
454 
455 static void
456 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
457 {
458 
459 	free(cv->cv_closure);
460 }
461 
462 static int
463 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
464     char * __restrict * __restrict in, size_t * __restrict inbytes,
465     char * __restrict * __restrict out, size_t * __restrict outbytes,
466     uint32_t flags, size_t * __restrict invalids)
467 {
468 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
469 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
470 	_csid_t csid;
471 	_index_t idx;
472 	char *tmpin;
473 	size_t inval, szrin, szrout;
474 	int ret, state = 0;
475 
476 	inval = 0;
477 	if (in == NULL || *in == NULL) {
478 		/* special cases */
479 		if (out != NULL && *out != NULL) {
480 			/* init output state and store the shift sequence */
481 			save_encoding_state(&sc->sc_src_encoding);
482 			save_encoding_state(&sc->sc_dst_encoding);
483 			szrout = 0;
484 
485 			ret = put_state_resetx(&sc->sc_dst_encoding,
486 			    *out, *outbytes, &szrout);
487 			if (ret)
488 				goto err;
489 
490 			if (szrout == (size_t)-2) {
491 				/* too small to store the character */
492 				ret = EINVAL;
493 				goto err;
494 			}
495 			*out += szrout;
496 			*outbytes -= szrout;
497 		} else
498 			/* otherwise, discard the shift sequence */
499 			init_encoding_state(&sc->sc_dst_encoding);
500 		init_encoding_state(&sc->sc_src_encoding);
501 		*invalids = 0;
502 		return (0);
503 	}
504 
505 	/* normal case */
506 	for (;;) {
507 		if (*inbytes == 0) {
508 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509 			if (state == _STDENC_SDGEN_INITIAL ||
510 			    state == _STDENC_SDGEN_STABLE)
511 				break;
512 		}
513 
514 		/* save the encoding states for the error recovery */
515 		save_encoding_state(&sc->sc_src_encoding);
516 		save_encoding_state(&sc->sc_dst_encoding);
517 
518 		/* mb -> csid/index */
519 		tmpin = *in;
520 		szrin = szrout = 0;
521 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
522 		    *inbytes, &szrin, cv->cv_shared->ci_hooks);
523 		if (ret)
524 			goto err;
525 
526 		if (szrin == (size_t)-2) {
527 			/* incompleted character */
528 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529 			if (ret) {
530 				ret = EINVAL;
531 				goto err;
532 			}
533 			switch (state) {
534 			case _STDENC_SDGEN_INITIAL:
535 			case _STDENC_SDGEN_STABLE:
536 				/* fetch shift sequences only. */
537 				goto next;
538 			}
539 			ret = EINVAL;
540 			goto err;
541 		}
542 		/* convert the character */
543 		ret = do_conv(is, &csid, &idx);
544 		if (ret) {
545 			if (ret == E_NO_CORRESPONDING_CHAR) {
546 				/*
547 				 * GNU iconv returns EILSEQ when no
548 				 * corresponding character in the output.
549 				 * Some software depends on this behavior
550 				 * though this is against POSIX specification.
551 				 */
552 				if (cv->cv_shared->ci_ilseq_invalid != 0) {
553 					ret = EILSEQ;
554 					goto err;
555 				}
556 				inval++;
557 				szrout = 0;
558 				if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
559 				    !cv->cv_shared->ci_discard_ilseq) &&
560 				    is->is_use_invalid) {
561 					ret = wctombx(&sc->sc_dst_encoding,
562 					    *out, *outbytes, is->is_invalid,
563 					    &szrout, cv->cv_shared->ci_hooks);
564 					if (ret)
565 						goto err;
566 				}
567 				goto next;
568 			} else
569 				goto err;
570 		}
571 		/* csid/index -> mb */
572 		ret = cstombx(&sc->sc_dst_encoding,
573 		    *out, *outbytes, csid, idx, &szrout,
574 		    cv->cv_shared->ci_hooks);
575 		if (ret)
576 			goto err;
577 next:
578 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
579 		*in = tmpin;
580 		*outbytes -= szrout;
581 		*out += szrout;
582 	}
583 	*invalids = inval;
584 
585 	return (0);
586 
587 err:
588 	restore_encoding_state(&sc->sc_src_encoding);
589 	restore_encoding_state(&sc->sc_dst_encoding);
590 	*invalids = inval;
591 
592 	return (ret);
593 }
594