1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2025 Hans Rosenfeld
24 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 #include "lint.h"
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/mman.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <dlfcn.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/param.h>
40 #include <alloca.h>
41 #include "iconv.h"
42 #include "iconvP.h"
43 #include "../i18n/_loc_path.h"
44
45 static iconv_p iconv_open_all(const char *, const char *, char *);
46 static iconv_p iconv_open_passthru(void);
47 static iconv_p iconv_open_private(const char *, const char *);
48 static iconv_p iconv_search_alias(const char *, const char *, char *);
49 static size_t passthru_icv_iconv(iconv_t, const char **, size_t *, char **,
50 size_t *);
51 static void passthru_icv_close(iconv_t);
52
53 #define PASSTHRU_MAGIC_NUMBER (0x53756e)
54
55
56 /*
57 * These functions are mainly implemented by using a shared object and
58 * the dlopen() functions. The actual conversion algorithm for a particular
59 * conversion is implemented via a shared object as a loadable conversion
60 * module which is linked dynamically at run time.
61 *
62 * The loadable conversion module resides as either:
63 *
64 * /usr/lib/iconv/geniconvtbl.so
65 *
66 * if the conversion is supported through a geniconvtbl code conversion
67 * binary table or as a module that directly specifies the conversion at:
68 *
69 * /usr/lib/iconv/fromcode%tocode.so
70 *
71 * where fromcode is the source encoding and tocode is the target encoding.
72 * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close().
73 *
74 * If there is no code conversion supported and if the fromcode and the tocode
75 * are specifying the same codeset, then, the byte-by-byte, pass-through code
76 * conversion that is embedded in the libc is used instead.
77 *
78 * The following are the related PSARC cases:
79 *
80 * PSARC/1993/153 iconv/iconv_open/iconv_close
81 * PSARC/1999/292 Addition of geniconvtbl(1)
82 * PSARC/2001/072 GNU gettext support
83 * PSARC/2009/561 Pass-through iconv code conversion
84 *
85 * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface.
86 */
87
88 iconv_t
iconv_open(const char * tocode,const char * fromcode)89 iconv_open(const char *tocode, const char *fromcode)
90 {
91 iconv_t cd;
92 char *ipath;
93
94 if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
95 return ((iconv_t)-1);
96
97 /*
98 * Memory for ipath is allocated/released in this function.
99 */
100 ipath = malloc(MAXPATHLEN);
101 if (ipath == NULL) {
102 free(cd);
103 return ((iconv_t)-1);
104 }
105
106 cd->_conv = iconv_open_all(tocode, fromcode, ipath);
107 if (cd->_conv != (iconv_p)-1) {
108 /* found a valid module for this conversion */
109 free(ipath);
110 return (cd);
111 }
112
113 /*
114 * Now, try using the encoding name aliasing table
115 */
116 cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
117 free(ipath);
118 if (cd->_conv == (iconv_p)-1) {
119 free(cd);
120 return ((iconv_t)-1);
121 }
122
123 /* found a valid module for this conversion */
124 return (cd);
125 }
126
127 static size_t
search_alias(char ** paddr,size_t size,const char * variant)128 search_alias(char **paddr, size_t size, const char *variant)
129 {
130 char *addr = *paddr;
131 char *p, *sp, *q;
132 size_t var_len, can_len;
133
134 var_len = strlen(variant);
135 p = addr;
136 q = addr + size;
137 while (q > p) {
138 if (*p == '#') {
139 /*
140 * Line beginning with '#' is a comment
141 */
142 p++;
143 while ((q > p) && (*p++ != '\n'))
144 ;
145 continue;
146 }
147 /* skip leading spaces */
148 while ((q > p) &&
149 ((*p == ' ') || (*p == '\t')))
150 p++;
151 if (q <= p)
152 break;
153 sp = p;
154 while ((q > p) && (*p != ' ') &&
155 (*p != '\t') && (*p != '\n'))
156 p++;
157 if (q <= p) {
158 /* invalid entry */
159 break;
160 }
161 if (*p == '\n') {
162 /* invalid entry */
163 p++;
164 continue;
165 }
166
167 if (((p - sp) != var_len) ||
168 ((strncmp(sp, variant, var_len) != 0) &&
169 (strncasecmp(sp, variant, var_len) != 0))) {
170 /*
171 * didn't match
172 */
173
174 /* skip remaining chars in this line */
175 p++;
176 while ((q > p) && (*p++ != '\n'))
177 ;
178 continue;
179 }
180
181 /* matching entry found */
182
183 /* skip spaces */
184 while ((q > p) &&
185 ((*p == ' ') || (*p == '\t')))
186 p++;
187 if (q <= p)
188 break;
189 sp = p;
190 while ((q > p) && (*p != ' ') &&
191 (*p != '\t') && (*p != '\n'))
192 p++;
193 can_len = p - sp;
194 if (can_len == 0) {
195 while ((q > p) && (*p++ != '\n'))
196 ;
197 continue;
198 }
199 *paddr = sp;
200 return (can_len);
201 /* NOTREACHED */
202 }
203 return (0);
204 }
205
206 static iconv_p
iconv_open_all(const char * to,const char * from,char * ipath)207 iconv_open_all(const char *to, const char *from, char *ipath)
208 {
209 iconv_p cv;
210 int len;
211
212 /*
213 * First, try using the geniconvtbl conversion, which is performed by
214 * /usr/lib/iconv/geniconvtbl.so with the conversion table file:
215 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
216 *
217 * If the geniconvtbl conversion cannot be done, try the conversion
218 * by the individual shared object.
219 */
220
221 len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
222 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
223 /*
224 * from%to.bt exists in the table dir
225 */
226 cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
227 if (cv != (iconv_p)-1) {
228 /* found a valid module for this conversion */
229 return (cv);
230 }
231 }
232
233 /* Next, try /usr/lib/iconv/from%to.so */
234 len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
235 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
236 /*
237 * /usr/lib/iconv/from%to.so exists
238 * errno will be set by iconv_open_private on error
239 */
240 return (iconv_open_private(ipath, NULL));
241 }
242
243 /*
244 * Finally, as a last resort check if the 'to' and the 'from' are
245 * referring to the same codeset name or not. If so, assign the
246 * embedded pass-through code conversion.
247 */
248 if (strcasecmp(to, from) == 0)
249 return (iconv_open_passthru());
250
251 /* no valid module for this conversion found */
252 errno = EINVAL;
253 return ((iconv_p)-1);
254 }
255
256 static iconv_p
iconv_search_alias(const char * tocode,const char * fromcode,char * ipath)257 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
258 {
259 char *p;
260 char *to_canonical, *from_canonical;
261 size_t tolen, fromlen;
262 iconv_p cv;
263 int fd;
264 struct stat64 statbuf;
265 caddr_t addr;
266 size_t buflen;
267
268 fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
269 if (fd == -1) {
270 /*
271 * if no alias file found,
272 * errno will be set to EINVAL.
273 */
274 errno = EINVAL;
275 return ((iconv_p)-1);
276 }
277 if (fstat64(fd, &statbuf) == -1) {
278 (void) close(fd);
279 /* use errno set by fstat64 */
280 return ((iconv_p)-1);
281 }
282 buflen = (size_t)statbuf.st_size;
283 addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
284 (void) close(fd);
285 if (addr == MAP_FAILED) {
286 /* use errno set by mmap */
287 return ((iconv_p)-1);
288 }
289 p = (char *)addr;
290 tolen = search_alias(&p, buflen, tocode);
291 if (tolen) {
292 to_canonical = alloca(tolen + 1);
293 (void) memcpy(to_canonical, p, tolen);
294 to_canonical[tolen] = '\0';
295 } else {
296 to_canonical = (char *)tocode;
297 }
298 p = (char *)addr;
299 fromlen = search_alias(&p, buflen, fromcode);
300 if (fromlen) {
301 from_canonical = alloca(fromlen + 1);
302 (void) memcpy(from_canonical, p, fromlen);
303 from_canonical[fromlen] = '\0';
304 } else {
305 from_canonical = (char *)fromcode;
306 }
307 (void) munmap(addr, buflen);
308 if (tolen == 0 && fromlen == 0) {
309 errno = EINVAL;
310 return ((iconv_p)-1);
311 }
312
313 cv = iconv_open_all(to_canonical, from_canonical, ipath);
314
315 /* errno set by iconv_open_all on error */
316 return (cv);
317 }
318
319 static iconv_p
iconv_open_passthru(void)320 iconv_open_passthru(void)
321 {
322 iconv_p cdpath;
323
324 /*
325 * For a pass-through byte-by-byte code conversion, allocate
326 * an internal conversion descriptor and initialize the data
327 * fields appropriately and we are done.
328 */
329 cdpath = malloc(sizeof (struct _iconv_fields));
330 if (cdpath == NULL)
331 return ((iconv_p)-1);
332
333 cdpath->_icv_handle = NULL;
334 cdpath->_icv_iconv = passthru_icv_iconv;
335 cdpath->_icv_close = passthru_icv_close;
336 cdpath->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER;
337
338 return (cdpath);
339 }
340
341 static iconv_p
iconv_open_private(const char * lib,const char * tbl)342 iconv_open_private(const char *lib, const char *tbl)
343 {
344 iconv_t (*fptr)(const char *);
345 iconv_p cdpath;
346
347 if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
348 return ((iconv_p)-1);
349
350 if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
351 free(cdpath);
352 /* dlopen does not define error no */
353 errno = EINVAL;
354 return ((iconv_p)-1);
355 }
356
357 /* gets address of _icv_open */
358 if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
359 "_icv_open")) == NULL) {
360 (void) dlclose(cdpath->_icv_handle);
361 free(cdpath);
362 /* dlsym does not define errno */
363 errno = EINVAL;
364 return ((iconv_p)-1);
365 }
366
367 /*
368 * gets address of _icv_iconv in the loadable conversion module
369 * and stores it in cdpath->_icv_iconv
370 */
371
372 if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
373 size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
374 "_icv_iconv")) == NULL) {
375 (void) dlclose(cdpath->_icv_handle);
376 free(cdpath);
377 /* dlsym does not define errno */
378 errno = EINVAL;
379 return ((iconv_p)-1);
380 }
381
382 /*
383 * gets address of _icv_close in the loadable conversion module
384 * and stores it in cd->_icv_close
385 */
386 if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
387 "_icv_close")) == NULL) {
388 (void) dlclose(cdpath->_icv_handle);
389 free(cdpath);
390 /* dlsym does not define errno */
391 errno = EINVAL;
392 return ((iconv_p)-1);
393 }
394
395 /*
396 * initialize the state of the actual _icv_iconv conversion routine
397 * For the normal iconv module, NULL will be passed as an argument
398 * although the iconv_open() of the module won't use that.
399 */
400 cdpath->_icv_state = (void *)(*fptr)(tbl);
401
402 if (cdpath->_icv_state == (struct _icv_state *)-1) {
403 (void) dlclose(cdpath->_icv_handle);
404 free(cdpath);
405 /* this module does not satisfy this conversion */
406 errno = EINVAL;
407 return ((iconv_p)-1);
408 }
409
410 return (cdpath);
411 }
412
413 int
iconv_close(iconv_t cd)414 iconv_close(iconv_t cd)
415 {
416 if (cd == NULL) {
417 errno = EBADF;
418 return (-1);
419 }
420 (*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
421 if (cd->_conv->_icv_handle != NULL)
422 (void) dlclose(cd->_conv->_icv_handle);
423 free(cd->_conv);
424 free(cd);
425 return (0);
426 }
427
428 /*
429 * To have minimal performance impact to the existing run-time behavior,
430 * we supply a dummy passthru_icv_close() that will just return.
431 */
432 static void
passthru_icv_close(iconv_t cd __unused)433 passthru_icv_close(iconv_t cd __unused)
434 {
435 }
436
437 size_t
iconv(iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)438 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
439 char **outbuf, size_t *outbytesleft)
440 {
441 /* check if cd is valid */
442 if (cd == NULL || cd == (iconv_t)-1) {
443 errno = EBADF;
444 return ((size_t)-1);
445 }
446
447 /* direct conversion */
448 return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
449 inbuf, inbytesleft, outbuf, outbytesleft));
450 }
451
452 static size_t
passthru_icv_iconv(iconv_t cd,const char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)453 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft,
454 char **outbuf, size_t *outbufleft)
455 {
456 size_t ibl;
457 size_t obl;
458 size_t len;
459 size_t ret_val;
460
461 /* Check if the conversion descriptor is a valid one. */
462 if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) {
463 errno = EBADF;
464 return ((size_t)-1);
465 }
466
467 /* For any state reset request, return success. */
468 if (inbuf == NULL || *inbuf == NULL)
469 return (0);
470
471 /*
472 * Initialize internally used variables for a better performance
473 * and prepare for a couple of the return values before the actual
474 * copying of the bytes.
475 */
476 ibl = *inbufleft;
477 obl = *outbufleft;
478
479 if (ibl > obl) {
480 len = obl;
481 errno = E2BIG;
482 ret_val = (size_t)-1;
483 } else {
484 len = ibl;
485 ret_val = 0;
486 }
487
488 /*
489 * Do the copy using memmove(). There are no EILSEQ or EINVAL
490 * checkings since this is a simple copying.
491 */
492 (void) memmove((void *)*outbuf, (const void *)*inbuf, len);
493
494 /* Update the return values related to the buffers then do return. */
495 *inbuf = *inbuf + len;
496 *outbuf = *outbuf + len;
497 *inbufleft = ibl - len;
498 *outbufleft = obl - len;
499
500 return (ret_val);
501 }
502