xref: /illumos-gate/usr/src/lib/libc/port/gen/iconv.c (revision 942c5e3c2dd127463517e5cc1694ee94ca45e021)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #pragma weak iconv_open = _iconv_open
30 #pragma weak iconv_close = _iconv_close
31 #pragma weak iconv = _iconv
32 
33 #include "synonyms.h"
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/mman.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <dlfcn.h>
40 #include <fcntl.h>
41 #include <unistd.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <sys/param.h>
45 #include <alloca.h>
46 #include "iconv.h"
47 #include "iconvP.h"
48 #include "../i18n/_loc_path.h"
49 
50 static iconv_p	iconv_open_all(const char *, const char *, char *);
51 static iconv_p	iconv_open_private(const char *, const char *);
52 static iconv_p	iconv_search_alias(const char *, const char *, char *);
53 
54 /*
55  * These functions are implemented using a shared object and the dlopen()
56  * functions.   Then, the actual conversion  algorithm for a particular
57  * conversion is implemented as a shared object in a separate file in
58  * a loadable conversion module and linked dynamically at run time.
59  * The loadable conversion module resides in
60  *	/usr/lib/iconv/fromcode%tocode.so
61  * where fromcode is the source encoding and tocode is the target encoding.
62  * The module has 3 entries: _icv_open(), _icv_iconv(),  _icv_close().
63  */
64 
65 iconv_t
66 _iconv_open(const char *tocode, const char *fromcode)
67 {
68 	iconv_t	cd;
69 	char	*ipath;
70 
71 	if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
72 		return ((iconv_t)-1);
73 
74 	/*
75 	 * Memory for ipath is allocated/released in this function.
76 	 */
77 	ipath = malloc(MAXPATHLEN);
78 	if (ipath == NULL) {
79 		free(cd);
80 		return ((iconv_t)-1);
81 	}
82 
83 	cd->_conv = iconv_open_all(tocode, fromcode, ipath);
84 	if (cd->_conv != (iconv_p)-1) {
85 		/* found a valid module for this conversion */
86 		free(ipath);
87 		return (cd);
88 	}
89 
90 	/*
91 	 * Now, try using the encoding name aliasing table
92 	 */
93 	cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
94 	free(ipath);
95 	if (cd->_conv == (iconv_p)-1) {
96 		/* no valid module for this conversion found */
97 		free(cd);
98 		/* errno set by iconv_search_alias */
99 		return ((iconv_t)-1);
100 	}
101 	/* found a valid module for this conversion */
102 	return (cd);
103 }
104 
105 static size_t
106 search_alias(char **paddr, size_t size, const char *variant)
107 {
108 	char	*addr = *paddr;
109 	char 	*p, *sp, *q;
110 	size_t	var_len, can_len;
111 
112 	var_len = strlen(variant);
113 	p = addr;
114 	q = addr + size;
115 	while (q > p) {
116 		if (*p == '#') {
117 			/*
118 			 * Line beginning with '#' is a comment
119 			 */
120 			p++;
121 			while ((q > p) && (*p++ != '\n'))
122 				;
123 			continue;
124 		}
125 		/* skip leading spaces */
126 		while ((q > p) &&
127 			((*p == ' ') || (*p == '\t')))
128 			p++;
129 		if (q <= p)
130 			break;
131 		sp = p;
132 		while ((q > p) && (*p != ' ') &&
133 			(*p != '\t') && (*p != '\n'))
134 			p++;
135 		if (q <= p) {
136 			/* invalid entry */
137 			break;
138 		}
139 		if (*p == '\n') {
140 			/* invalid entry */
141 			p++;
142 			continue;
143 		}
144 
145 		if (((p - sp) != var_len) ||
146 			((strncmp(sp, variant, var_len) != 0) &&
147 			(strncasecmp(sp, variant, var_len) != 0))) {
148 			/*
149 			 * didn't match
150 			 */
151 
152 			/* skip remaining chars in this line */
153 			p++;
154 			while ((q > p) && (*p++ != '\n'))
155 				;
156 			continue;
157 		}
158 
159 		/* matching entry found */
160 
161 		/* skip spaces */
162 		while ((q > p) &&
163 			((*p == ' ') || (*p == '\t')))
164 			p++;
165 		if (q <= p)
166 			break;
167 		sp = p;
168 		while ((q > p) && (*p != ' ') &&
169 			(*p != '\t') && (*p != '\n'))
170 			p++;
171 		can_len = p - sp;
172 		if (can_len == 0) {
173 			while ((q > p) && (*p++ != '\n'))
174 				;
175 			continue;
176 		}
177 		*paddr = sp;
178 		return (can_len);
179 		/* NOTREACHED */
180 	}
181 	return (0);
182 }
183 
184 static iconv_p
185 iconv_open_all(const char *to, const char *from, char *ipath)
186 {
187 	iconv_p	cv;
188 	int	len;
189 
190 	/*
191 	 * First, try using the geniconvtbl conversion, which is
192 	 * performed by /usr/lib/iconv/geniconvtbl.so with
193 	 * the conversion table file:
194 	 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
195 	 *
196 	 * If the geniconvtbl conversion cannot be done,
197 	 * try the conversion by the individual shared object.
198 	 */
199 
200 	len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
201 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
202 		/*
203 		 * from%to.bt exists in the table dir
204 		 */
205 		cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
206 		if (cv != (iconv_p)-1) {
207 			/* found a valid module for this conversion */
208 			return (cv);
209 		}
210 	}
211 
212 	/* Next, try /usr/lib/iconv/from%to.so */
213 	len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
214 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
215 		/*
216 		 * /usr/lib/iconv/from%to.so exists
217 		 * errno will be set by iconv_open_private on error
218 		 */
219 		return (iconv_open_private(ipath, NULL));
220 	}
221 	/* no valid module for this conversion found */
222 	errno = EINVAL;
223 	return ((iconv_p)-1);
224 }
225 
226 static iconv_p
227 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
228 {
229 	char	*p;
230 	char	*to_canonical, *from_canonical;
231 	size_t	tolen, fromlen;
232 	iconv_p	cv;
233 	int	fd;
234 	struct stat64	statbuf;
235 	caddr_t	addr;
236 	size_t	buflen;
237 
238 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
239 	if (fd == -1) {
240 		/*
241 		 * if no alias file found,
242 		 * errno will be set to EINVAL.
243 		 */
244 		errno = EINVAL;
245 		return ((iconv_p)-1);
246 	}
247 	if (fstat64(fd, &statbuf) == -1) {
248 		(void) close(fd);
249 		/* use errno set by fstat64 */
250 		return ((iconv_p)-1);
251 	}
252 	buflen = (size_t)statbuf.st_size;
253 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
254 	(void) close(fd);
255 	if (addr == MAP_FAILED) {
256 		/* use errno set by mmap */
257 		return ((iconv_p)-1);
258 	}
259 	p = (char *)addr;
260 	tolen = search_alias(&p, buflen, tocode);
261 	if (tolen) {
262 		to_canonical = alloca(tolen + 1);
263 		(void) memcpy(to_canonical, p, tolen);
264 		to_canonical[tolen] = '\0';
265 	} else {
266 		to_canonical = (char *)tocode;
267 	}
268 	p = (char *)addr;
269 	fromlen = search_alias(&p, buflen, fromcode);
270 	if (fromlen) {
271 		from_canonical = alloca(fromlen + 1);
272 		(void) memcpy(from_canonical, p, fromlen);
273 		from_canonical[fromlen] = '\0';
274 	} else {
275 		from_canonical = (char *)fromcode;
276 	}
277 	(void) munmap(addr, buflen);
278 	if (tolen == 0 && fromlen == 0) {
279 		errno = EINVAL;
280 		return ((iconv_p)-1);
281 	}
282 
283 	cv = iconv_open_all(to_canonical, from_canonical, ipath);
284 
285 	/* errno set by iconv_open_all on error */
286 	return (cv);
287 }
288 
289 static iconv_p
290 iconv_open_private(const char *lib, const char *tbl)
291 {
292 	iconv_t (*fptr)(const char *);
293 	iconv_p cdpath;
294 
295 	if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
296 		return ((iconv_p)-1);
297 
298 	if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
299 		free(cdpath);
300 		/* dlopen does not define error no */
301 		errno = EINVAL;
302 		return ((iconv_p)-1);
303 	}
304 
305 	/* gets address of _icv_open */
306 	if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
307 		"_icv_open")) == NULL) {
308 		(void) dlclose(cdpath->_icv_handle);
309 		free(cdpath);
310 		/* dlsym does not define errno */
311 		errno = EINVAL;
312 		return ((iconv_p)-1);
313 	}
314 
315 	/*
316 	 * gets address of _icv_iconv in the loadable conversion module
317 	 * and stores it in cdpath->_icv_iconv
318 	 */
319 
320 	if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
321 		size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
322 		"_icv_iconv")) == NULL) {
323 		(void) dlclose(cdpath->_icv_handle);
324 		free(cdpath);
325 		/* dlsym does not define errno */
326 		errno = EINVAL;
327 		return ((iconv_p)-1);
328 	}
329 
330 	/*
331 	 * gets address of _icv_close in the loadable conversion module
332 	 * and stores it in cd->_icv_close
333 	 */
334 	if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
335 		"_icv_close")) == NULL) {
336 		(void) dlclose(cdpath->_icv_handle);
337 		free(cdpath);
338 		/* dlsym does not define errno */
339 		errno = EINVAL;
340 		return ((iconv_p)-1);
341 	}
342 
343 	/*
344 	 * initialize the state of the actual _icv_iconv conversion routine
345 	 * For the normal iconv module, NULL will be passed as an argument
346 	 * although the _iconv_open() of the module won't use that.
347 	 */
348 	cdpath->_icv_state = (void *)(*fptr)(tbl);
349 
350 	if (cdpath->_icv_state == (struct _icv_state *)-1) {
351 		(void) dlclose(cdpath->_icv_handle);
352 		free(cdpath);
353 		/* this module does not satisfy this conversion */
354 		errno = EINVAL;
355 		return ((iconv_p)-1);
356 	}
357 
358 	return (cdpath);
359 }
360 
361 int
362 _iconv_close(iconv_t cd)
363 {
364 	if (cd == NULL) {
365 		errno = EBADF;
366 		return (-1);
367 	}
368 	(*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
369 	(void) dlclose(cd->_conv->_icv_handle);
370 	free(cd->_conv);
371 	free(cd);
372 	return (0);
373 }
374 
375 size_t
376 _iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
377 	char **outbuf, size_t *outbytesleft)
378 {
379 	/* check if cd is valid */
380 	if (cd == NULL) {
381 		errno = EBADF;
382 		return ((size_t)-1);
383 	}
384 
385 	/* direct conversion */
386 	return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
387 		inbuf, inbytesleft, outbuf, outbytesleft));
388 }
389