xref: /freebsd/lib/libc/iconv/citrus_iconv.c (revision 26a222dc0c048fc071b548eadad7b80405a1b126)
1 /* $FreeBSD$ */
2 /*	$NetBSD: citrus_iconv.c,v 1.10 2011/11/19 18:34:21 tnozaki Exp $	*/
3 
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/types.h>
32 #include <sys/queue.h>
33 
34 #include <assert.h>
35 #include <dirent.h>
36 #include <errno.h>
37 #include <iconv.h>
38 #include <langinfo.h>
39 #include <limits.h>
40 #include <paths.h>
41 #include <stdbool.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 #include "citrus_namespace.h"
48 #include "citrus_bcs.h"
49 #include "citrus_esdb.h"
50 #include "citrus_region.h"
51 #include "citrus_memstream.h"
52 #include "citrus_mmap.h"
53 #include "citrus_module.h"
54 #include "citrus_lock.h"
55 #include "citrus_lookup.h"
56 #include "citrus_hash.h"
57 #include "citrus_iconv.h"
58 
59 #define _CITRUS_ICONV_DIR	"iconv.dir"
60 #define _CITRUS_ICONV_ALIAS	"iconv.alias"
61 
62 #define CI_HASH_SIZE 101
63 #define CI_INITIAL_MAX_REUSE	5
64 #define CI_ENV_MAX_REUSE	"ICONV_MAX_REUSE"
65 
66 static bool			 isinit = false;
67 static int			 shared_max_reuse, shared_num_unused;
68 static _CITRUS_HASH_HEAD(, _citrus_iconv_shared, CI_HASH_SIZE) shared_pool;
69 static TAILQ_HEAD(, _citrus_iconv_shared) shared_unused;
70 
71 static pthread_rwlock_t		 ci_lock = PTHREAD_RWLOCK_INITIALIZER;
72 
73 static __inline void
74 init_cache(void)
75 {
76 
77 	WLOCK(&ci_lock);
78 	if (!isinit) {
79 		_CITRUS_HASH_INIT(&shared_pool, CI_HASH_SIZE);
80 		TAILQ_INIT(&shared_unused);
81 		shared_max_reuse = -1;
82 		if (!issetugid() && getenv(CI_ENV_MAX_REUSE))
83 			shared_max_reuse = atoi(getenv(CI_ENV_MAX_REUSE));
84 		if (shared_max_reuse < 0)
85 			shared_max_reuse = CI_INITIAL_MAX_REUSE;
86 		isinit = true;
87 	}
88 	UNLOCK(&ci_lock);
89 }
90 
91 static __inline void
92 close_shared(struct _citrus_iconv_shared *ci)
93 {
94 
95 	if (ci) {
96 		if (ci->ci_module) {
97 			if (ci->ci_ops) {
98 				if (ci->ci_closure)
99 					(*ci->ci_ops->io_uninit_shared)(ci);
100 				free(ci->ci_ops);
101 			}
102 			_citrus_unload_module(ci->ci_module);
103 		}
104 		free(ci);
105 	}
106 }
107 
108 static __inline int
109 open_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
110     const char * __restrict convname, const char * __restrict src,
111     const char * __restrict dst)
112 {
113 	struct _citrus_iconv_shared *ci;
114 	_citrus_iconv_getops_t getops;
115 	const char *module;
116 	size_t len_convname;
117 	int ret;
118 
119 #ifdef INCOMPATIBLE_WITH_GNU_ICONV
120 	/*
121 	 * Sadly, the gnu tools expect iconv to actually parse the
122 	 * byte stream and don't allow for a pass-through when
123 	 * the (src,dest) encodings are the same.
124 	 * See gettext-0.18.3+ NEWS:
125 	 *   msgfmt now checks PO file headers more strictly with less
126 	 *   false-positives.
127 	 * NetBSD don't do this either.
128 	 */
129 	module = (strcmp(src, dst) != 0) ? "iconv_std" : "iconv_none";
130 #else
131 	module = "iconv_std";
132 #endif
133 
134 	/* initialize iconv handle */
135 	len_convname = strlen(convname);
136 	ci = malloc(sizeof(*ci) + len_convname + 1);
137 	if (!ci) {
138 		ret = errno;
139 		goto err;
140 	}
141 	ci->ci_module = NULL;
142 	ci->ci_ops = NULL;
143 	ci->ci_closure = NULL;
144 	ci->ci_convname = (void *)&ci[1];
145 	memcpy(ci->ci_convname, convname, len_convname + 1);
146 
147 	/* load module */
148 	ret = _citrus_load_module(&ci->ci_module, module);
149 	if (ret)
150 		goto err;
151 
152 	/* get operators */
153 	getops = (_citrus_iconv_getops_t)_citrus_find_getops(ci->ci_module,
154 	    module, "iconv");
155 	if (!getops) {
156 		ret = EOPNOTSUPP;
157 		goto err;
158 	}
159 	ci->ci_ops = malloc(sizeof(*ci->ci_ops));
160 	if (!ci->ci_ops) {
161 		ret = errno;
162 		goto err;
163 	}
164 	ret = (*getops)(ci->ci_ops);
165 	if (ret)
166 		goto err;
167 
168 	if (ci->ci_ops->io_init_shared == NULL ||
169 	    ci->ci_ops->io_uninit_shared == NULL ||
170 	    ci->ci_ops->io_init_context == NULL ||
171 	    ci->ci_ops->io_uninit_context == NULL ||
172 	    ci->ci_ops->io_convert == NULL) {
173 		ret = EINVAL;
174 		goto err;
175 	}
176 
177 	/* initialize the converter */
178 	ret = (*ci->ci_ops->io_init_shared)(ci, src, dst);
179 	if (ret)
180 		goto err;
181 
182 	*rci = ci;
183 
184 	return (0);
185 err:
186 	close_shared(ci);
187 	return (ret);
188 }
189 
190 static __inline int
191 hash_func(const char *key)
192 {
193 
194 	return (_string_hash_func(key, CI_HASH_SIZE));
195 }
196 
197 static __inline int
198 match_func(struct _citrus_iconv_shared * __restrict ci,
199     const char * __restrict key)
200 {
201 
202 	return (strcmp(ci->ci_convname, key));
203 }
204 
205 static int
206 get_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
207     const char *src, const char *dst)
208 {
209 	struct _citrus_iconv_shared * ci;
210 	char convname[PATH_MAX];
211 	int hashval, ret = 0;
212 
213 	snprintf(convname, sizeof(convname), "%s/%s", src, dst);
214 
215 	WLOCK(&ci_lock);
216 
217 	/* lookup alread existing entry */
218 	hashval = hash_func(convname);
219 	_CITRUS_HASH_SEARCH(&shared_pool, ci, ci_hash_entry, match_func,
220 	    convname, hashval);
221 	if (ci != NULL) {
222 		/* found */
223 		if (ci->ci_used_count == 0) {
224 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
225 			shared_num_unused--;
226 		}
227 		ci->ci_used_count++;
228 		*rci = ci;
229 		goto quit;
230 	}
231 
232 	/* create new entry */
233 	ret = open_shared(&ci, convname, src, dst);
234 	if (ret)
235 		goto quit;
236 
237 	_CITRUS_HASH_INSERT(&shared_pool, ci, ci_hash_entry, hashval);
238 	ci->ci_used_count = 1;
239 	*rci = ci;
240 
241 quit:
242 	UNLOCK(&ci_lock);
243 
244 	return (ret);
245 }
246 
247 static void
248 release_shared(struct _citrus_iconv_shared * __restrict ci)
249 {
250 
251 	WLOCK(&ci_lock);
252 	ci->ci_used_count--;
253 	if (ci->ci_used_count == 0) {
254 		/* put it into unused list */
255 		shared_num_unused++;
256 		TAILQ_INSERT_TAIL(&shared_unused, ci, ci_tailq_entry);
257 		/* flood out */
258 		while (shared_num_unused > shared_max_reuse) {
259 			ci = TAILQ_FIRST(&shared_unused);
260 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
261 			_CITRUS_HASH_REMOVE(ci, ci_hash_entry);
262 			shared_num_unused--;
263 			close_shared(ci);
264 		}
265 	}
266 
267 	UNLOCK(&ci_lock);
268 }
269 
270 /*
271  * _citrus_iconv_open:
272  *	open a converter for the specified in/out codes.
273  */
274 int
275 _citrus_iconv_open(struct _citrus_iconv * __restrict * __restrict rcv,
276     const char * __restrict src, const char * __restrict dst)
277 {
278 	struct _citrus_iconv *cv = NULL;
279 	struct _citrus_iconv_shared *ci = NULL;
280 	char realdst[PATH_MAX], realsrc[PATH_MAX];
281 #ifdef _PATH_ICONV
282 	char buf[PATH_MAX], path[PATH_MAX];
283 #endif
284 	int ret;
285 
286 	init_cache();
287 
288 	/* GNU behaviour, using locale encoding if "" or "char" is specified */
289 	if ((strcmp(src, "") == 0) || (strcmp(src, "char") == 0))
290 		src = nl_langinfo(CODESET);
291 	if ((strcmp(dst, "") == 0) || (strcmp(dst, "char") == 0))
292 		dst = nl_langinfo(CODESET);
293 
294 	/* resolve codeset name aliases */
295 #ifdef _PATH_ICONV
296 	snprintf(path, sizeof(path), "%s/%s", _PATH_ICONV, _CITRUS_ICONV_ALIAS);
297 	strlcpy(realsrc, _lookup_alias(path, src, buf, (size_t)PATH_MAX,
298 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
299 	strlcpy(realdst, _lookup_alias(path, dst, buf, (size_t)PATH_MAX,
300 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
301 #else
302 	strlcpy(realsrc, src, (size_t)PATH_MAX);
303 	strlcpy(realdst, dst, (size_t)PATH_MAX);
304 #endif
305 
306 	/* sanity check */
307 	if (strchr(realsrc, '/') != NULL || strchr(realdst, '/'))
308 		return (EINVAL);
309 
310 	/* get shared record */
311 	ret = get_shared(&ci, realsrc, realdst);
312 	if (ret)
313 		return (ret);
314 
315 	/* create/init context */
316 	if (*rcv == NULL) {
317 		cv = malloc(sizeof(*cv));
318 		if (cv == NULL) {
319 			ret = errno;
320 			release_shared(ci);
321 			return (ret);
322 		}
323 		*rcv = cv;
324 	}
325 	(*rcv)->cv_shared = ci;
326 	ret = (*ci->ci_ops->io_init_context)(*rcv);
327 	if (ret) {
328 		release_shared(ci);
329 		free(cv);
330 		return (ret);
331 	}
332 	return (0);
333 }
334 
335 /*
336  * _citrus_iconv_close:
337  *	close the specified converter.
338  */
339 void
340 _citrus_iconv_close(struct _citrus_iconv *cv)
341 {
342 
343 	if (cv) {
344 		(*cv->cv_shared->ci_ops->io_uninit_context)(cv);
345 		release_shared(cv->cv_shared);
346 		free(cv);
347 	}
348 }
349 
350 const char
351 *_citrus_iconv_canonicalize(const char *name)
352 {
353 	char *buf;
354 
355 	if ((buf = calloc((size_t)PATH_MAX, sizeof(*buf))) == NULL)
356 		return (NULL);
357 	_citrus_esdb_alias(name, buf, (size_t)PATH_MAX);
358 	return (buf);
359 }
360