xref: /freebsd/lib/libc/iconv/citrus_iconv.c (revision d9a42747950146bf03cda7f6e25d219253f8a57a)
1 /* $FreeBSD$ */
2 /*	$NetBSD: citrus_iconv.c,v 1.10 2011/11/19 18:34:21 tnozaki Exp $	*/
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2003 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/types.h>
34 #include <sys/queue.h>
35 
36 #include <assert.h>
37 #include <dirent.h>
38 #include <errno.h>
39 #include <iconv.h>
40 #include <langinfo.h>
41 #include <limits.h>
42 #include <paths.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 
49 #include "citrus_namespace.h"
50 #include "citrus_bcs.h"
51 #include "citrus_esdb.h"
52 #include "citrus_region.h"
53 #include "citrus_memstream.h"
54 #include "citrus_mmap.h"
55 #include "citrus_module.h"
56 #include "citrus_lock.h"
57 #include "citrus_lookup.h"
58 #include "citrus_hash.h"
59 #include "citrus_iconv.h"
60 
61 #define _CITRUS_ICONV_DIR	"iconv.dir"
62 #define _CITRUS_ICONV_ALIAS	"iconv.alias"
63 
64 #define CI_HASH_SIZE 101
65 #define CI_INITIAL_MAX_REUSE	5
66 #define CI_ENV_MAX_REUSE	"ICONV_MAX_REUSE"
67 
68 static bool			 isinit = false;
69 static int			 shared_max_reuse, shared_num_unused;
70 static _CITRUS_HASH_HEAD(, _citrus_iconv_shared, CI_HASH_SIZE) shared_pool;
71 static TAILQ_HEAD(, _citrus_iconv_shared) shared_unused;
72 
73 static pthread_rwlock_t		 ci_lock = PTHREAD_RWLOCK_INITIALIZER;
74 
75 static __inline void
76 init_cache(void)
77 {
78 
79 	WLOCK(&ci_lock);
80 	if (!isinit) {
81 		_CITRUS_HASH_INIT(&shared_pool, CI_HASH_SIZE);
82 		TAILQ_INIT(&shared_unused);
83 		shared_max_reuse = -1;
84 		if (secure_getenv(CI_ENV_MAX_REUSE) != NULL)
85 			shared_max_reuse =
86 			    atoi(secure_getenv(CI_ENV_MAX_REUSE));
87 		if (shared_max_reuse < 0)
88 			shared_max_reuse = CI_INITIAL_MAX_REUSE;
89 		isinit = true;
90 	}
91 	UNLOCK(&ci_lock);
92 }
93 
94 static __inline void
95 close_shared(struct _citrus_iconv_shared *ci)
96 {
97 
98 	if (ci) {
99 		if (ci->ci_module) {
100 			if (ci->ci_ops) {
101 				if (ci->ci_closure)
102 					(*ci->ci_ops->io_uninit_shared)(ci);
103 				free(ci->ci_ops);
104 			}
105 			_citrus_unload_module(ci->ci_module);
106 		}
107 		free(ci);
108 	}
109 }
110 
111 static __inline int
112 open_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
113     const char * __restrict convname, const char * __restrict src,
114     const char * __restrict dst)
115 {
116 	struct _citrus_iconv_shared *ci;
117 	_citrus_iconv_getops_t getops;
118 	const char *module;
119 	size_t len_convname;
120 	int ret;
121 
122 #ifdef INCOMPATIBLE_WITH_GNU_ICONV
123 	/*
124 	 * Sadly, the gnu tools expect iconv to actually parse the
125 	 * byte stream and don't allow for a pass-through when
126 	 * the (src,dest) encodings are the same.
127 	 * See gettext-0.18.3+ NEWS:
128 	 *   msgfmt now checks PO file headers more strictly with less
129 	 *   false-positives.
130 	 * NetBSD, also, doesn't do the below pass-through.
131 	 *
132 	 * Also note that this currently falls short if dst options have been
133 	 * specified. It may be the case that we want to ignore EILSEQ, in which
134 	 * case we should also select iconv_std anyways.  This trick, while
135 	 * clever, may not be worth it.
136 	 */
137 	module = (strcmp(src, dst) != 0) ? "iconv_std" : "iconv_none";
138 #else
139 	module = "iconv_std";
140 #endif
141 
142 	/* initialize iconv handle */
143 	len_convname = strlen(convname);
144 	ci = malloc(sizeof(*ci) + len_convname + 1);
145 	if (!ci) {
146 		ret = errno;
147 		goto err;
148 	}
149 	ci->ci_module = NULL;
150 	ci->ci_ops = NULL;
151 	ci->ci_closure = NULL;
152 	ci->ci_convname = (void *)&ci[1];
153 	memcpy(ci->ci_convname, convname, len_convname + 1);
154 
155 	/* load module */
156 	ret = _citrus_load_module(&ci->ci_module, module);
157 	if (ret)
158 		goto err;
159 
160 	/* get operators */
161 	getops = (_citrus_iconv_getops_t)_citrus_find_getops(ci->ci_module,
162 	    module, "iconv");
163 	if (!getops) {
164 		ret = EOPNOTSUPP;
165 		goto err;
166 	}
167 	ci->ci_ops = malloc(sizeof(*ci->ci_ops));
168 	if (!ci->ci_ops) {
169 		ret = errno;
170 		goto err;
171 	}
172 	ret = (*getops)(ci->ci_ops);
173 	if (ret)
174 		goto err;
175 
176 	if (ci->ci_ops->io_init_shared == NULL ||
177 	    ci->ci_ops->io_uninit_shared == NULL ||
178 	    ci->ci_ops->io_init_context == NULL ||
179 	    ci->ci_ops->io_uninit_context == NULL ||
180 	    ci->ci_ops->io_convert == NULL) {
181 		ret = EINVAL;
182 		goto err;
183 	}
184 
185 	/* initialize the converter */
186 	ret = (*ci->ci_ops->io_init_shared)(ci, src, dst);
187 	if (ret)
188 		goto err;
189 
190 	*rci = ci;
191 
192 	return (0);
193 err:
194 	close_shared(ci);
195 	return (ret);
196 }
197 
198 static __inline int
199 hash_func(const char *key)
200 {
201 
202 	return (_string_hash_func(key, CI_HASH_SIZE));
203 }
204 
205 static __inline int
206 match_func(struct _citrus_iconv_shared * __restrict ci,
207     const char * __restrict key)
208 {
209 
210 	return (strcmp(ci->ci_convname, key));
211 }
212 
213 static int
214 get_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
215     const char *src, const char *dst)
216 {
217 	struct _citrus_iconv_shared * ci;
218 	char convname[PATH_MAX];
219 	int hashval, ret = 0;
220 
221 	snprintf(convname, sizeof(convname), "%s/%s", src, dst);
222 
223 	WLOCK(&ci_lock);
224 
225 	/* lookup alread existing entry */
226 	hashval = hash_func(convname);
227 	_CITRUS_HASH_SEARCH(&shared_pool, ci, ci_hash_entry, match_func,
228 	    convname, hashval);
229 	if (ci != NULL) {
230 		/* found */
231 		if (ci->ci_used_count == 0) {
232 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
233 			shared_num_unused--;
234 		}
235 		ci->ci_used_count++;
236 		*rci = ci;
237 		goto quit;
238 	}
239 
240 	/* create new entry */
241 	ret = open_shared(&ci, convname, src, dst);
242 	if (ret)
243 		goto quit;
244 
245 	_CITRUS_HASH_INSERT(&shared_pool, ci, ci_hash_entry, hashval);
246 	ci->ci_used_count = 1;
247 	*rci = ci;
248 
249 quit:
250 	UNLOCK(&ci_lock);
251 
252 	return (ret);
253 }
254 
255 static void
256 release_shared(struct _citrus_iconv_shared * __restrict ci)
257 {
258 
259 	WLOCK(&ci_lock);
260 	ci->ci_used_count--;
261 	if (ci->ci_used_count == 0) {
262 		/* put it into unused list */
263 		shared_num_unused++;
264 		TAILQ_INSERT_TAIL(&shared_unused, ci, ci_tailq_entry);
265 		/* flood out */
266 		while (shared_num_unused > shared_max_reuse) {
267 			ci = TAILQ_FIRST(&shared_unused);
268 			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
269 			_CITRUS_HASH_REMOVE(ci, ci_hash_entry);
270 			shared_num_unused--;
271 			close_shared(ci);
272 		}
273 	}
274 
275 	UNLOCK(&ci_lock);
276 }
277 
278 /*
279  * _citrus_iconv_open:
280  *	open a converter for the specified in/out codes.
281  */
282 int
283 _citrus_iconv_open(struct _citrus_iconv * __restrict * __restrict rcv,
284     const char * __restrict src, const char * __restrict dst)
285 {
286 	struct _citrus_iconv *cv = NULL;
287 	struct _citrus_iconv_shared *ci = NULL;
288 	char realdst[PATH_MAX], realsrc[PATH_MAX], *slashes;
289 #ifdef _PATH_ICONV
290 	char buf[PATH_MAX], path[PATH_MAX];
291 #endif
292 	int ret;
293 
294 	init_cache();
295 
296 	/* GNU behaviour, using locale encoding if "" or "char" is specified */
297 	if ((strcmp(src, "") == 0) || (strcmp(src, "char") == 0))
298 		src = nl_langinfo(CODESET);
299 	if ((strcmp(dst, "") == 0) || (strcmp(dst, "char") == 0))
300 		dst = nl_langinfo(CODESET);
301 
302 	strlcpy(realsrc, src, (size_t)PATH_MAX);
303 	if ((slashes = strstr(realsrc, "//")) != NULL)
304 		*slashes = '\0';
305 	strlcpy(realdst, dst, (size_t)PATH_MAX);
306 	if ((slashes = strstr(realdst, "//")) != NULL)
307 		*slashes = '\0';
308 
309 	/* resolve codeset name aliases */
310 #ifdef _PATH_ICONV
311 	/*
312 	 * Note that the below reads from realsrc and realdst while it's
313 	 * repopulating (writing to) realsrc and realdst, but it's done so with
314 	 * a trip through `buf`.
315 	 */
316 	snprintf(path, sizeof(path), "%s/%s", _PATH_ICONV, _CITRUS_ICONV_ALIAS);
317 	strlcpy(realsrc, _lookup_alias(path, realsrc, buf, (size_t)PATH_MAX,
318 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
319 	strlcpy(realdst, _lookup_alias(path, realdst, buf, (size_t)PATH_MAX,
320 	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
321 #endif
322 
323 	/* sanity check */
324 	if (strchr(realsrc, '/') != NULL || strchr(realdst, '/'))
325 		return (EINVAL);
326 
327 	/* get shared record */
328 	ret = get_shared(&ci, realsrc, realdst);
329 	if (ret)
330 		return (ret);
331 
332 	/* create/init context */
333 	if (*rcv == NULL) {
334 		cv = malloc(sizeof(*cv));
335 		if (cv == NULL) {
336 			ret = errno;
337 			release_shared(ci);
338 			return (ret);
339 		}
340 		*rcv = cv;
341 	}
342 	(*rcv)->cv_shared = ci;
343 	ret = (*ci->ci_ops->io_init_context)(*rcv);
344 	if (ret) {
345 		release_shared(ci);
346 		free(cv);
347 		return (ret);
348 	}
349 	return (0);
350 }
351 
352 /*
353  * _citrus_iconv_close:
354  *	close the specified converter.
355  */
356 void
357 _citrus_iconv_close(struct _citrus_iconv *cv)
358 {
359 
360 	if (cv) {
361 		(*cv->cv_shared->ci_ops->io_uninit_context)(cv);
362 		release_shared(cv->cv_shared);
363 		free(cv);
364 	}
365 }
366 
367 const char
368 *_citrus_iconv_canonicalize(const char *name)
369 {
370 	char *buf;
371 
372 	if ((buf = calloc((size_t)PATH_MAX, sizeof(*buf))) == NULL)
373 		return (NULL);
374 	_citrus_esdb_alias(name, buf, (size_t)PATH_MAX);
375 	return (buf);
376 }
377