xref: /illumos-gate/usr/src/lib/libc/port/gen/iconv.c (revision b07ce584f4e28873b8927d7f83d9d3275a0f3ed2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <sys/mman.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <dlfcn.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <sys/param.h>
41 #include <alloca.h>
42 #include "iconv.h"
43 #include "iconvP.h"
44 #include "../i18n/_loc_path.h"
45 
46 static iconv_p	iconv_open_all(const char *, const char *, char *);
47 static iconv_p	iconv_open_private(const char *, const char *);
48 static iconv_p	iconv_search_alias(const char *, const char *, char *);
49 
50 /*
51  * These functions are implemented using a shared object and the dlopen()
52  * functions.   Then, the actual conversion  algorithm for a particular
53  * conversion is implemented as a shared object in a separate file in
54  * a loadable conversion module and linked dynamically at run time.
55  * The loadable conversion module resides in
56  *	/usr/lib/iconv/fromcode%tocode.so
57  * where fromcode is the source encoding and tocode is the target encoding.
58  * The module has 3 entries: _icv_open(), _icv_iconv(),  _icv_close().
59  */
60 
61 iconv_t
62 iconv_open(const char *tocode, const char *fromcode)
63 {
64 	iconv_t	cd;
65 	char	*ipath;
66 
67 	if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
68 		return ((iconv_t)-1);
69 
70 	/*
71 	 * Memory for ipath is allocated/released in this function.
72 	 */
73 	ipath = malloc(MAXPATHLEN);
74 	if (ipath == NULL) {
75 		free(cd);
76 		return ((iconv_t)-1);
77 	}
78 
79 	cd->_conv = iconv_open_all(tocode, fromcode, ipath);
80 	if (cd->_conv != (iconv_p)-1) {
81 		/* found a valid module for this conversion */
82 		free(ipath);
83 		return (cd);
84 	}
85 
86 	/*
87 	 * Now, try using the encoding name aliasing table
88 	 */
89 	cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
90 	free(ipath);
91 	if (cd->_conv == (iconv_p)-1) {
92 		/* no valid module for this conversion found */
93 		free(cd);
94 		/* errno set by iconv_search_alias */
95 		return ((iconv_t)-1);
96 	}
97 	/* found a valid module for this conversion */
98 	return (cd);
99 }
100 
101 static size_t
102 search_alias(char **paddr, size_t size, const char *variant)
103 {
104 	char	*addr = *paddr;
105 	char 	*p, *sp, *q;
106 	size_t	var_len, can_len;
107 
108 	var_len = strlen(variant);
109 	p = addr;
110 	q = addr + size;
111 	while (q > p) {
112 		if (*p == '#') {
113 			/*
114 			 * Line beginning with '#' is a comment
115 			 */
116 			p++;
117 			while ((q > p) && (*p++ != '\n'))
118 				;
119 			continue;
120 		}
121 		/* skip leading spaces */
122 		while ((q > p) &&
123 		    ((*p == ' ') || (*p == '\t')))
124 			p++;
125 		if (q <= p)
126 			break;
127 		sp = p;
128 		while ((q > p) && (*p != ' ') &&
129 		    (*p != '\t') && (*p != '\n'))
130 			p++;
131 		if (q <= p) {
132 			/* invalid entry */
133 			break;
134 		}
135 		if (*p == '\n') {
136 			/* invalid entry */
137 			p++;
138 			continue;
139 		}
140 
141 		if (((p - sp) != var_len) ||
142 		    ((strncmp(sp, variant, var_len) != 0) &&
143 		    (strncasecmp(sp, variant, var_len) != 0))) {
144 			/*
145 			 * didn't match
146 			 */
147 
148 			/* skip remaining chars in this line */
149 			p++;
150 			while ((q > p) && (*p++ != '\n'))
151 				;
152 			continue;
153 		}
154 
155 		/* matching entry found */
156 
157 		/* skip spaces */
158 		while ((q > p) &&
159 		    ((*p == ' ') || (*p == '\t')))
160 			p++;
161 		if (q <= p)
162 			break;
163 		sp = p;
164 		while ((q > p) && (*p != ' ') &&
165 		    (*p != '\t') && (*p != '\n'))
166 			p++;
167 		can_len = p - sp;
168 		if (can_len == 0) {
169 			while ((q > p) && (*p++ != '\n'))
170 				;
171 			continue;
172 		}
173 		*paddr = sp;
174 		return (can_len);
175 		/* NOTREACHED */
176 	}
177 	return (0);
178 }
179 
180 static iconv_p
181 iconv_open_all(const char *to, const char *from, char *ipath)
182 {
183 	iconv_p	cv;
184 	int	len;
185 
186 	/*
187 	 * First, try using the geniconvtbl conversion, which is
188 	 * performed by /usr/lib/iconv/geniconvtbl.so with
189 	 * the conversion table file:
190 	 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
191 	 *
192 	 * If the geniconvtbl conversion cannot be done,
193 	 * try the conversion by the individual shared object.
194 	 */
195 
196 	len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
197 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
198 		/*
199 		 * from%to.bt exists in the table dir
200 		 */
201 		cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
202 		if (cv != (iconv_p)-1) {
203 			/* found a valid module for this conversion */
204 			return (cv);
205 		}
206 	}
207 
208 	/* Next, try /usr/lib/iconv/from%to.so */
209 	len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
210 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
211 		/*
212 		 * /usr/lib/iconv/from%to.so exists
213 		 * errno will be set by iconv_open_private on error
214 		 */
215 		return (iconv_open_private(ipath, NULL));
216 	}
217 	/* no valid module for this conversion found */
218 	errno = EINVAL;
219 	return ((iconv_p)-1);
220 }
221 
222 static iconv_p
223 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
224 {
225 	char	*p;
226 	char	*to_canonical, *from_canonical;
227 	size_t	tolen, fromlen;
228 	iconv_p	cv;
229 	int	fd;
230 	struct stat64	statbuf;
231 	caddr_t	addr;
232 	size_t	buflen;
233 
234 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
235 	if (fd == -1) {
236 		/*
237 		 * if no alias file found,
238 		 * errno will be set to EINVAL.
239 		 */
240 		errno = EINVAL;
241 		return ((iconv_p)-1);
242 	}
243 	if (fstat64(fd, &statbuf) == -1) {
244 		(void) close(fd);
245 		/* use errno set by fstat64 */
246 		return ((iconv_p)-1);
247 	}
248 	buflen = (size_t)statbuf.st_size;
249 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
250 	(void) close(fd);
251 	if (addr == MAP_FAILED) {
252 		/* use errno set by mmap */
253 		return ((iconv_p)-1);
254 	}
255 	p = (char *)addr;
256 	tolen = search_alias(&p, buflen, tocode);
257 	if (tolen) {
258 		to_canonical = alloca(tolen + 1);
259 		(void) memcpy(to_canonical, p, tolen);
260 		to_canonical[tolen] = '\0';
261 	} else {
262 		to_canonical = (char *)tocode;
263 	}
264 	p = (char *)addr;
265 	fromlen = search_alias(&p, buflen, fromcode);
266 	if (fromlen) {
267 		from_canonical = alloca(fromlen + 1);
268 		(void) memcpy(from_canonical, p, fromlen);
269 		from_canonical[fromlen] = '\0';
270 	} else {
271 		from_canonical = (char *)fromcode;
272 	}
273 	(void) munmap(addr, buflen);
274 	if (tolen == 0 && fromlen == 0) {
275 		errno = EINVAL;
276 		return ((iconv_p)-1);
277 	}
278 
279 	cv = iconv_open_all(to_canonical, from_canonical, ipath);
280 
281 	/* errno set by iconv_open_all on error */
282 	return (cv);
283 }
284 
285 static iconv_p
286 iconv_open_private(const char *lib, const char *tbl)
287 {
288 	iconv_t (*fptr)(const char *);
289 	iconv_p cdpath;
290 
291 	if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
292 		return ((iconv_p)-1);
293 
294 	if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
295 		free(cdpath);
296 		/* dlopen does not define error no */
297 		errno = EINVAL;
298 		return ((iconv_p)-1);
299 	}
300 
301 	/* gets address of _icv_open */
302 	if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
303 	    "_icv_open")) == NULL) {
304 		(void) dlclose(cdpath->_icv_handle);
305 		free(cdpath);
306 		/* dlsym does not define errno */
307 		errno = EINVAL;
308 		return ((iconv_p)-1);
309 	}
310 
311 	/*
312 	 * gets address of _icv_iconv in the loadable conversion module
313 	 * and stores it in cdpath->_icv_iconv
314 	 */
315 
316 	if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
317 	    size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
318 	    "_icv_iconv")) == NULL) {
319 		(void) dlclose(cdpath->_icv_handle);
320 		free(cdpath);
321 		/* dlsym does not define errno */
322 		errno = EINVAL;
323 		return ((iconv_p)-1);
324 	}
325 
326 	/*
327 	 * gets address of _icv_close in the loadable conversion module
328 	 * and stores it in cd->_icv_close
329 	 */
330 	if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
331 	    "_icv_close")) == NULL) {
332 		(void) dlclose(cdpath->_icv_handle);
333 		free(cdpath);
334 		/* dlsym does not define errno */
335 		errno = EINVAL;
336 		return ((iconv_p)-1);
337 	}
338 
339 	/*
340 	 * initialize the state of the actual _icv_iconv conversion routine
341 	 * For the normal iconv module, NULL will be passed as an argument
342 	 * although the iconv_open() of the module won't use that.
343 	 */
344 	cdpath->_icv_state = (void *)(*fptr)(tbl);
345 
346 	if (cdpath->_icv_state == (struct _icv_state *)-1) {
347 		(void) dlclose(cdpath->_icv_handle);
348 		free(cdpath);
349 		/* this module does not satisfy this conversion */
350 		errno = EINVAL;
351 		return ((iconv_p)-1);
352 	}
353 
354 	return (cdpath);
355 }
356 
357 int
358 iconv_close(iconv_t cd)
359 {
360 	if (cd == NULL) {
361 		errno = EBADF;
362 		return (-1);
363 	}
364 	(*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
365 	(void) dlclose(cd->_conv->_icv_handle);
366 	free(cd->_conv);
367 	free(cd);
368 	return (0);
369 }
370 
371 size_t
372 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
373 	char **outbuf, size_t *outbytesleft)
374 {
375 	/* check if cd is valid */
376 	if (cd == NULL) {
377 		errno = EBADF;
378 		return ((size_t)-1);
379 	}
380 
381 	/* direct conversion */
382 	return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
383 	    inbuf, inbytesleft, outbuf, outbytesleft));
384 }
385