1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "lint.h"
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/mman.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <dlfcn.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <sys/param.h>
39 #include <alloca.h>
40 #include "iconv.h"
41 #include "iconvP.h"
42 #include "../i18n/_loc_path.h"
43
44 static iconv_p iconv_open_all(const char *, const char *, char *);
45 static iconv_p iconv_open_private(const char *, const char *);
46 static iconv_p iconv_search_alias(const char *, const char *, char *);
47 static size_t passthru_icv_iconv(iconv_t, const char **, size_t *, char **,
48 size_t *);
49 static void passthru_icv_close(iconv_t);
50
51 #define PASSTHRU_MAGIC_NUMBER (0x53756e)
52
53
54 /*
55 * These functions are mainly implemented by using a shared object and
56 * the dlopen() functions. The actual conversion algorithm for a particular
57 * conversion is implemented via a shared object as a loadable conversion
58 * module which is linked dynamically at run time.
59 *
60 * The loadable conversion module resides as either:
61 *
62 * /usr/lib/iconv/geniconvtbl.so
63 *
64 * if the conversion is supported through a geniconvtbl code conversion
65 * binary table or as a module that directly specifies the conversion at:
66 *
67 * /usr/lib/iconv/fromcode%tocode.so
68 *
69 * where fromcode is the source encoding and tocode is the target encoding.
70 * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close().
71 *
72 * If there is no code conversion supported and if the fromcode and the tocode
73 * are specifying the same codeset, then, the byte-by-byte, pass-through code
74 * conversion that is embedded in the libc is used instead.
75 *
76 * The following are the related PSARC cases:
77 *
78 * PSARC/1993/153 iconv/iconv_open/iconv_close
79 * PSARC/1999/292 Addition of geniconvtbl(1)
80 * PSARC/2001/072 GNU gettext support
81 * PSARC/2009/561 Pass-through iconv code conversion
82 *
83 * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface.
84 */
85
86 iconv_t
iconv_open(const char * tocode,const char * fromcode)87 iconv_open(const char *tocode, const char *fromcode)
88 {
89 iconv_t cd;
90 char *ipath;
91
92 if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
93 return ((iconv_t)-1);
94
95 /*
96 * Memory for ipath is allocated/released in this function.
97 */
98 ipath = malloc(MAXPATHLEN);
99 if (ipath == NULL) {
100 free(cd);
101 return ((iconv_t)-1);
102 }
103
104 cd->_conv = iconv_open_all(tocode, fromcode, ipath);
105 if (cd->_conv != (iconv_p)-1) {
106 /* found a valid module for this conversion */
107 free(ipath);
108 return (cd);
109 }
110
111 /*
112 * Now, try using the encoding name aliasing table
113 */
114 cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
115 free(ipath);
116 if (cd->_conv == (iconv_p)-1) {
117 /*
118 * As the last resort, check if the tocode and the fromcode
119 * are referring to the same codeset name or not. If so,
120 * assign the embedded pass-through code conversion.
121 */
122 if (strcasecmp(tocode, fromcode) != 0) {
123 /*
124 * No valid conversion available. Do failure retrun
125 * with the errno set by iconv_search_alias().
126 */
127 free(cd);
128 return ((iconv_t)-1);
129 }
130
131 /*
132 * For a pass-through byte-by-byte code conversion, allocate
133 * an internal conversion descriptor and initialize the data
134 * fields appropriately and we are done.
135 */
136 cd->_conv = malloc(sizeof (struct _iconv_fields));
137 if (cd->_conv == NULL) {
138 free(cd);
139 return ((iconv_t)-1);
140 }
141
142 cd->_conv->_icv_handle = NULL;
143 cd->_conv->_icv_iconv = passthru_icv_iconv;
144 cd->_conv->_icv_close = passthru_icv_close;
145 cd->_conv->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER;
146 }
147
148 /* found a valid module for this conversion */
149 return (cd);
150 }
151
152 static size_t
search_alias(char ** paddr,size_t size,const char * variant)153 search_alias(char **paddr, size_t size, const char *variant)
154 {
155 char *addr = *paddr;
156 char *p, *sp, *q;
157 size_t var_len, can_len;
158
159 var_len = strlen(variant);
160 p = addr;
161 q = addr + size;
162 while (q > p) {
163 if (*p == '#') {
164 /*
165 * Line beginning with '#' is a comment
166 */
167 p++;
168 while ((q > p) && (*p++ != '\n'))
169 ;
170 continue;
171 }
172 /* skip leading spaces */
173 while ((q > p) &&
174 ((*p == ' ') || (*p == '\t')))
175 p++;
176 if (q <= p)
177 break;
178 sp = p;
179 while ((q > p) && (*p != ' ') &&
180 (*p != '\t') && (*p != '\n'))
181 p++;
182 if (q <= p) {
183 /* invalid entry */
184 break;
185 }
186 if (*p == '\n') {
187 /* invalid entry */
188 p++;
189 continue;
190 }
191
192 if (((p - sp) != var_len) ||
193 ((strncmp(sp, variant, var_len) != 0) &&
194 (strncasecmp(sp, variant, var_len) != 0))) {
195 /*
196 * didn't match
197 */
198
199 /* skip remaining chars in this line */
200 p++;
201 while ((q > p) && (*p++ != '\n'))
202 ;
203 continue;
204 }
205
206 /* matching entry found */
207
208 /* skip spaces */
209 while ((q > p) &&
210 ((*p == ' ') || (*p == '\t')))
211 p++;
212 if (q <= p)
213 break;
214 sp = p;
215 while ((q > p) && (*p != ' ') &&
216 (*p != '\t') && (*p != '\n'))
217 p++;
218 can_len = p - sp;
219 if (can_len == 0) {
220 while ((q > p) && (*p++ != '\n'))
221 ;
222 continue;
223 }
224 *paddr = sp;
225 return (can_len);
226 /* NOTREACHED */
227 }
228 return (0);
229 }
230
231 static iconv_p
iconv_open_all(const char * to,const char * from,char * ipath)232 iconv_open_all(const char *to, const char *from, char *ipath)
233 {
234 iconv_p cv;
235 int len;
236
237 /*
238 * First, try using the geniconvtbl conversion, which is
239 * performed by /usr/lib/iconv/geniconvtbl.so with
240 * the conversion table file:
241 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
242 *
243 * If the geniconvtbl conversion cannot be done,
244 * try the conversion by the individual shared object.
245 */
246
247 len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
248 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
249 /*
250 * from%to.bt exists in the table dir
251 */
252 cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
253 if (cv != (iconv_p)-1) {
254 /* found a valid module for this conversion */
255 return (cv);
256 }
257 }
258
259 /* Next, try /usr/lib/iconv/from%to.so */
260 len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
261 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
262 /*
263 * /usr/lib/iconv/from%to.so exists
264 * errno will be set by iconv_open_private on error
265 */
266 return (iconv_open_private(ipath, NULL));
267 }
268 /* no valid module for this conversion found */
269 errno = EINVAL;
270 return ((iconv_p)-1);
271 }
272
273 static iconv_p
iconv_search_alias(const char * tocode,const char * fromcode,char * ipath)274 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
275 {
276 char *p;
277 char *to_canonical, *from_canonical;
278 size_t tolen, fromlen;
279 iconv_p cv;
280 int fd;
281 struct stat64 statbuf;
282 caddr_t addr;
283 size_t buflen;
284
285 fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
286 if (fd == -1) {
287 /*
288 * if no alias file found,
289 * errno will be set to EINVAL.
290 */
291 errno = EINVAL;
292 return ((iconv_p)-1);
293 }
294 if (fstat64(fd, &statbuf) == -1) {
295 (void) close(fd);
296 /* use errno set by fstat64 */
297 return ((iconv_p)-1);
298 }
299 buflen = (size_t)statbuf.st_size;
300 addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
301 (void) close(fd);
302 if (addr == MAP_FAILED) {
303 /* use errno set by mmap */
304 return ((iconv_p)-1);
305 }
306 p = (char *)addr;
307 tolen = search_alias(&p, buflen, tocode);
308 if (tolen) {
309 to_canonical = alloca(tolen + 1);
310 (void) memcpy(to_canonical, p, tolen);
311 to_canonical[tolen] = '\0';
312 } else {
313 to_canonical = (char *)tocode;
314 }
315 p = (char *)addr;
316 fromlen = search_alias(&p, buflen, fromcode);
317 if (fromlen) {
318 from_canonical = alloca(fromlen + 1);
319 (void) memcpy(from_canonical, p, fromlen);
320 from_canonical[fromlen] = '\0';
321 } else {
322 from_canonical = (char *)fromcode;
323 }
324 (void) munmap(addr, buflen);
325 if (tolen == 0 && fromlen == 0) {
326 errno = EINVAL;
327 return ((iconv_p)-1);
328 }
329
330 cv = iconv_open_all(to_canonical, from_canonical, ipath);
331
332 /* errno set by iconv_open_all on error */
333 return (cv);
334 }
335
336 static iconv_p
iconv_open_private(const char * lib,const char * tbl)337 iconv_open_private(const char *lib, const char *tbl)
338 {
339 iconv_t (*fptr)(const char *);
340 iconv_p cdpath;
341
342 if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
343 return ((iconv_p)-1);
344
345 if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
346 free(cdpath);
347 /* dlopen does not define error no */
348 errno = EINVAL;
349 return ((iconv_p)-1);
350 }
351
352 /* gets address of _icv_open */
353 if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
354 "_icv_open")) == NULL) {
355 (void) dlclose(cdpath->_icv_handle);
356 free(cdpath);
357 /* dlsym does not define errno */
358 errno = EINVAL;
359 return ((iconv_p)-1);
360 }
361
362 /*
363 * gets address of _icv_iconv in the loadable conversion module
364 * and stores it in cdpath->_icv_iconv
365 */
366
367 if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
368 size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
369 "_icv_iconv")) == NULL) {
370 (void) dlclose(cdpath->_icv_handle);
371 free(cdpath);
372 /* dlsym does not define errno */
373 errno = EINVAL;
374 return ((iconv_p)-1);
375 }
376
377 /*
378 * gets address of _icv_close in the loadable conversion module
379 * and stores it in cd->_icv_close
380 */
381 if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
382 "_icv_close")) == NULL) {
383 (void) dlclose(cdpath->_icv_handle);
384 free(cdpath);
385 /* dlsym does not define errno */
386 errno = EINVAL;
387 return ((iconv_p)-1);
388 }
389
390 /*
391 * initialize the state of the actual _icv_iconv conversion routine
392 * For the normal iconv module, NULL will be passed as an argument
393 * although the iconv_open() of the module won't use that.
394 */
395 cdpath->_icv_state = (void *)(*fptr)(tbl);
396
397 if (cdpath->_icv_state == (struct _icv_state *)-1) {
398 (void) dlclose(cdpath->_icv_handle);
399 free(cdpath);
400 /* this module does not satisfy this conversion */
401 errno = EINVAL;
402 return ((iconv_p)-1);
403 }
404
405 return (cdpath);
406 }
407
408 int
iconv_close(iconv_t cd)409 iconv_close(iconv_t cd)
410 {
411 if (cd == NULL) {
412 errno = EBADF;
413 return (-1);
414 }
415 (*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
416 if (cd->_conv->_icv_handle != NULL)
417 (void) dlclose(cd->_conv->_icv_handle);
418 free(cd->_conv);
419 free(cd);
420 return (0);
421 }
422
423 /*
424 * To have minimal performance impact to the existing run-time behavior,
425 * we supply a dummy passthru_icv_close() that will just return.
426 */
427 static void
passthru_icv_close(iconv_t cd __unused)428 passthru_icv_close(iconv_t cd __unused)
429 {
430 }
431
432 size_t
iconv(iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)433 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
434 char **outbuf, size_t *outbytesleft)
435 {
436 /* check if cd is valid */
437 if (cd == NULL || cd == (iconv_t)-1) {
438 errno = EBADF;
439 return ((size_t)-1);
440 }
441
442 /* direct conversion */
443 return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
444 inbuf, inbytesleft, outbuf, outbytesleft));
445 }
446
447 static size_t
passthru_icv_iconv(iconv_t cd,const char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)448 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft,
449 char **outbuf, size_t *outbufleft)
450 {
451 size_t ibl;
452 size_t obl;
453 size_t len;
454 size_t ret_val;
455
456 /* Check if the conversion descriptor is a valid one. */
457 if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) {
458 errno = EBADF;
459 return ((size_t)-1);
460 }
461
462 /* For any state reset request, return success. */
463 if (inbuf == NULL || *inbuf == NULL)
464 return (0);
465
466 /*
467 * Initialize internally used variables for a better performance
468 * and prepare for a couple of the return values before the actual
469 * copying of the bytes.
470 */
471 ibl = *inbufleft;
472 obl = *outbufleft;
473
474 if (ibl > obl) {
475 len = obl;
476 errno = E2BIG;
477 ret_val = (size_t)-1;
478 } else {
479 len = ibl;
480 ret_val = 0;
481 }
482
483 /*
484 * Do the copy using memmove(). There are no EILSEQ or EINVAL
485 * checkings since this is a simple copying.
486 */
487 (void) memmove((void *)*outbuf, (const void *)*inbuf, len);
488
489 /* Update the return values related to the buffers then do return. */
490 *inbuf = *inbuf + len;
491 *outbuf = *outbuf + len;
492 *inbufleft = ibl - len;
493 *outbufleft = obl - len;
494
495 return (ret_val);
496 }
497