xref: /freebsd/lib/libc/iconv/citrus_iconv.c (revision 608da65de9552d5678c1000776ed69da04a45983)
1 /*	$NetBSD: citrus_iconv.c,v 1.10 2011/11/19 18:34:21 tnozaki Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c)2003 Citrus Project,
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/types.h>
33 #include <sys/queue.h>
34 
35 #include <assert.h>
36 #include <dirent.h>
37 #include <errno.h>
38 #include <iconv.h>
39 #include <langinfo.h>
40 #include <limits.h>
41 #include <paths.h>
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 
48 #include "citrus_namespace.h"
49 #include "citrus_bcs.h"
50 #include "citrus_esdb.h"
51 #include "citrus_region.h"
52 #include "citrus_memstream.h"
53 #include "citrus_mmap.h"
54 #include "citrus_module.h"
55 #include "citrus_lock.h"
56 #include "citrus_lookup.h"
57 #include "citrus_hash.h"
58 #include "citrus_iconv.h"
59 
60 #define _CITRUS_ICONV_DIR	"iconv.dir"
61 #define _CITRUS_ICONV_ALIAS	"iconv.alias"
62 
63 #define CI_HASH_SIZE 101
64 #define CI_INITIAL_MAX_REUSE	5
65 #define CI_ENV_MAX_REUSE	"ICONV_MAX_REUSE"
66 
67 static bool			 isinit = false;
68 static int			 shared_max_reuse, shared_num_unused;
69 static _CITRUS_HASH_HEAD(, _citrus_iconv_shared, CI_HASH_SIZE) shared_pool;
70 static TAILQ_HEAD(, _citrus_iconv_shared) shared_unused;
71 
72 static pthread_rwlock_t		 ci_lock = PTHREAD_RWLOCK_INITIALIZER;
73 
74 static __inline void
75 init_cache(void)
76 {
77 
78 	WLOCK(&ci_lock);
79 	if (!isinit) {
80 		_CITRUS_HASH_INIT(&shared_pool, CI_HASH_SIZE);
81 		TAILQ_INIT(&shared_unused);
82 		shared_max_reuse = -1;
83 		if (secure_getenv(CI_ENV_MAX_REUSE) != NULL)
84 			shared_max_reuse =
85 			    atoi(secure_getenv(CI_ENV_MAX_REUSE));
86 		if (shared_max_reuse < 0)
87 			shared_max_reuse = CI_INITIAL_MAX_REUSE;
88 		isinit = true;
89 	}
90 	UNLOCK(&ci_lock);
91 }
92 
93 static __inline void
94 close_shared(struct _citrus_iconv_shared *ci)
95 {
96 
97 	if (ci) {
98 		if (ci->ci_module) {
99 			if (ci->ci_ops) {
100 				if (ci->ci_closure)
101 					(*ci->ci_ops->io_uninit_shared)(ci);
102 				free(ci->ci_ops);
103 			}
104 			_citrus_unload_module(ci->ci_module);
105 		}
106 		free(ci);
107 	}
108 }
109 
110 static __inline int
111 open_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
112     const char * __restrict convname, const char * __restrict src,
113     const char * __restrict dst)
114 {
115 	struct _citrus_iconv_shared *ci;
116 	_citrus_iconv_getops_t getops;
117 	const char *module;
118 	size_t len_convname;
119 	int ret;
120 
121 #ifdef INCOMPATIBLE_WITH_GNU_ICONV
122 	/*
123 	 * Sadly, the gnu tools expect iconv to actually parse the
124 	 * byte stream and don't allow for a pass-through when
125 	 * the (src,dest) encodings are the same.
126 	 * See gettext-0.18.3+ NEWS:
127 	 *   msgfmt now checks PO file headers more strictly with less
128 	 *   false-positives.
129 	 * NetBSD, also, doesn't do the below pass-through.
130 	 *
131 	 * Also note that this currently falls short if dst options have been
132 	 * specified. It may be the case that we want to ignore EILSEQ, in which
133 	 * case we should also select iconv_std anyways.  This trick, while
134 	 * clever, may not be worth it.
135 	 */
136 	module = (strcmp(src, dst) != 0) ? "iconv_std" : "iconv_none";
137 #else
138 	module = "iconv_std";
139 #endif
140 
141 	/* initialize iconv handle */
142 	len_convname = strlen(convname);
143 	ci = calloc(1, sizeof(*ci) + len_convname + 1);
144 	if (!ci) {
145 		ret = errno;
146 		goto err;
147 	}
148 	ci->ci_convname = (void *)&ci[1];
149 	memcpy(ci->ci_convname, convname, len_convname + 1);
150 
151 	/* load module */
152 	ret = _citrus_load_module(&ci->ci_module, module);
153 	if (ret)
154 		goto err;
155 
156 	/* get operators */
157 	getops = (_citrus_iconv_getops_t)_citrus_find_getops(ci->ci_module,
158 	    module, "iconv");
159 	if (!getops) {
160 		ret = EOPNOTSUPP;
161 		goto err;
162 	}
163 	ci->ci_ops = malloc(sizeof(*ci->ci_ops));
164 	if (!ci->ci_ops) {
165 		ret = errno;
166 		goto err;
167 	}
168 	ret = (*getops)(ci->ci_ops);
169 	if (ret)
170 		goto err;
171 
172 	if (ci->ci_ops->io_init_shared == NULL ||
173 	    ci->ci_ops->io_uninit_shared == NULL ||
174 	    ci->ci_ops->io_init_context == NULL ||
175 	    ci->ci_ops->io_uninit_context == NULL ||
176 	    ci->ci_ops->io_convert == NULL) {
177 		ret = EINVAL;
178 		goto err;
179 	}
180 
181 	/* initialize the converter */
182 	ret = (*ci->ci_ops->io_init_shared)(ci, src, dst);
183 	if (ret)
184 		goto err;
185 
186 	*rci = ci;
187 
188 	return (0);
189 err:
190 	close_shared(ci);
191 	return (ret);
192 }
193 
194 static __inline int
195 hash_func(const char *key)
196 {
197 
198 	return (_string_hash_func(key, CI_HASH_SIZE));
199 }
200 
201 static __inline int
202 match_func(struct _citrus_iconv_shared * __restrict ci,
203     const char * __restrict key)
204 {
205 
206 	return (strcmp(ci->ci_convname, key));
207 }
208 
209 static int
210 get_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
211     const char *src, const char *dst)
212 {
213 	struct _citrus_iconv_shared * ci;
214 	char convname[PATH_MAX];
215 	int hashval, ret = 0;
216 
217 	snprintf(convname, sizeof(convname), "%s/%s", src, dst);
218 
219 	WLOCK(&ci_lock);
220 
221 	/* lookup alread existing entry */
222 	hashval = hash_func(convname);
223 	_CITRUS_HASH_SEARCH(&shared_pool, ci, ci_hash_entry, match_func,
224 	    convname, hashval);
225 	if (ci != NULL) {
226 		/* found */
227 		if (ci->ci_used_count == 0) {
228 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
229 			shared_num_unused--;
230 		}
231 		ci->ci_used_count++;
232 		*rci = ci;
233 		goto quit;
234 	}
235 
236 	/* create new entry */
237 	ret = open_shared(&ci, convname, src, dst);
238 	if (ret)
239 		goto quit;
240 
241 	_CITRUS_HASH_INSERT(&shared_pool, ci, ci_hash_entry, hashval);
242 	ci->ci_used_count = 1;
243 	*rci = ci;
244 
245 quit:
246 	UNLOCK(&ci_lock);
247 
248 	return (ret);
249 }
250 
251 static void
252 release_shared(struct _citrus_iconv_shared * __restrict ci)
253 {
254 
255 	WLOCK(&ci_lock);
256 	ci->ci_used_count--;
257 	if (ci->ci_used_count == 0) {
258 		/* put it into unused list */
259 		shared_num_unused++;
260 		TAILQ_INSERT_TAIL(&shared_unused, ci, ci_tailq_entry);
261 		/* flood out */
262 		while (shared_num_unused > shared_max_reuse) {
263 			ci = TAILQ_FIRST(&shared_unused);
264 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
265 			_CITRUS_HASH_REMOVE(ci, ci_hash_entry);
266 			shared_num_unused--;
267 			close_shared(ci);
268 		}
269 	}
270 
271 	UNLOCK(&ci_lock);
272 }
273 
274 /*
275  * _citrus_iconv_open:
276  *	open a converter for the specified in/out codes.
277  */
278 int
279 _citrus_iconv_open(struct _citrus_iconv * __restrict * __restrict rcv,
280     const char * __restrict src, const char * __restrict dst)
281 {
282 	struct _citrus_iconv *cv = NULL;
283 	struct _citrus_iconv_shared *ci = NULL;
284 	char realdst[PATH_MAX], realsrc[PATH_MAX], *slashes;
285 #ifdef _PATH_ICONV
286 	char buf[PATH_MAX], path[PATH_MAX];
287 #endif
288 	int ret;
289 
290 	init_cache();
291 
292 	/* GNU behaviour, using locale encoding if "" or "char" is specified */
293 	if ((strcmp(src, "") == 0) || (strcmp(src, "char") == 0))
294 		src = nl_langinfo(CODESET);
295 	if ((strcmp(dst, "") == 0) || (strcmp(dst, "char") == 0))
296 		dst = nl_langinfo(CODESET);
297 
298 	strlcpy(realsrc, src, (size_t)PATH_MAX);
299 	if ((slashes = strstr(realsrc, "//")) != NULL)
300 		*slashes = '\0';
301 	strlcpy(realdst, dst, (size_t)PATH_MAX);
302 	if ((slashes = strstr(realdst, "//")) != NULL)
303 		*slashes = '\0';
304 
305 	/* resolve codeset name aliases */
306 #ifdef _PATH_ICONV
307 	/*
308 	 * Note that the below reads from realsrc and realdst while it's
309 	 * repopulating (writing to) realsrc and realdst, but it's done so with
310 	 * a trip through `buf`.
311 	 */
312 	snprintf(path, sizeof(path), "%s/%s", _PATH_ICONV, _CITRUS_ICONV_ALIAS);
313 	strlcpy(realsrc, _lookup_alias(path, realsrc, buf, (size_t)PATH_MAX,
314 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
315 	strlcpy(realdst, _lookup_alias(path, realdst, buf, (size_t)PATH_MAX,
316 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
317 #endif
318 
319 	/* sanity check */
320 	if (strchr(realsrc, '/') != NULL || strchr(realdst, '/'))
321 		return (EINVAL);
322 
323 	/* get shared record */
324 	ret = get_shared(&ci, realsrc, realdst);
325 	if (ret)
326 		return (ret);
327 
328 	/* create/init context */
329 	if (*rcv == NULL) {
330 		cv = malloc(sizeof(*cv));
331 		if (cv == NULL) {
332 			ret = errno;
333 			release_shared(ci);
334 			return (ret);
335 		}
336 		*rcv = cv;
337 	}
338 	(*rcv)->cv_shared = ci;
339 	ret = (*ci->ci_ops->io_init_context)(*rcv);
340 	if (ret) {
341 		release_shared(ci);
342 		free(cv);
343 		return (ret);
344 	}
345 	return (0);
346 }
347 
348 /*
349  * _citrus_iconv_close:
350  *	close the specified converter.
351  */
352 void
353 _citrus_iconv_close(struct _citrus_iconv *cv)
354 {
355 
356 	if (cv) {
357 		(*cv->cv_shared->ci_ops->io_uninit_context)(cv);
358 		release_shared(cv->cv_shared);
359 		free(cv);
360 	}
361 }
362 
363 const char
364 *_citrus_iconv_canonicalize(const char *name)
365 {
366 	char *buf;
367 
368 	if ((buf = calloc((size_t)PATH_MAX, sizeof(*buf))) == NULL)
369 		return (NULL);
370 	_citrus_esdb_alias(name, buf, (size_t)PATH_MAX);
371 	return (buf);
372 }
373