xref: /illumos-gate/usr/src/lib/libc/port/gen/iconv.c (revision 1ffbc8eb6a1226e34441eff48b6b078ed52db3b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2025 Hans Rosenfeld
24  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #include "lint.h"
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/mman.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <dlfcn.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/param.h>
40 #include <alloca.h>
41 #include "iconv.h"
42 #include "iconvP.h"
43 #include "../i18n/_loc_path.h"
44 
45 static iconv_p	iconv_open_all(const char *, const char *, char *);
46 static iconv_p	iconv_open_passthru(void);
47 static iconv_p	iconv_open_private(const char *, const char *);
48 static iconv_p	iconv_search_alias(const char *, const char *, char *);
49 static size_t	passthru_icv_iconv(iconv_t, const char **, size_t *, char **,
50     size_t *);
51 static void	passthru_icv_close(iconv_t);
52 
53 #define	PASSTHRU_MAGIC_NUMBER	(0x53756e)
54 
55 
56 /*
57  * These functions are mainly implemented by using a shared object and
58  * the dlopen() functions. The actual conversion algorithm for a particular
59  * conversion is implemented via a shared object as a loadable conversion
60  * module which is linked dynamically at run time.
61  *
62  * The loadable conversion module resides as either:
63  *
64  *	/usr/lib/iconv/geniconvtbl.so
65  *
66  * if the conversion is supported through a geniconvtbl code conversion
67  * binary table or as a module that directly specifies the conversion at:
68  *
69  *	/usr/lib/iconv/fromcode%tocode.so
70  *
71  * where fromcode is the source encoding and tocode is the target encoding.
72  * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close().
73  *
74  * If there is no code conversion supported and if the fromcode and the tocode
75  * are specifying the same codeset, then, the byte-by-byte, pass-through code
76  * conversion that is embedded in the libc is used instead.
77  *
78  * The following are the related PSARC cases:
79  *
80  *	PSARC/1993/153 iconv/iconv_open/iconv_close
81  *	PSARC/1999/292 Addition of geniconvtbl(1)
82  *	PSARC/2001/072 GNU gettext support
83  *	PSARC/2009/561 Pass-through iconv code conversion
84  *
85  * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface.
86  */
87 
88 iconv_t
iconv_open(const char * tocode,const char * fromcode)89 iconv_open(const char *tocode, const char *fromcode)
90 {
91 	iconv_t	cd;
92 	char	*ipath;
93 
94 	if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
95 		return ((iconv_t)-1);
96 
97 	/*
98 	 * Memory for ipath is allocated/released in this function.
99 	 */
100 	ipath = malloc(MAXPATHLEN);
101 	if (ipath == NULL) {
102 		free(cd);
103 		return ((iconv_t)-1);
104 	}
105 
106 	cd->_conv = iconv_open_all(tocode, fromcode, ipath);
107 	if (cd->_conv != (iconv_p)-1) {
108 		/* found a valid module for this conversion */
109 		free(ipath);
110 		return (cd);
111 	}
112 
113 	/*
114 	 * Now, try using the encoding name aliasing table
115 	 */
116 	cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
117 	free(ipath);
118 	if (cd->_conv == (iconv_p)-1) {
119 		free(cd);
120 		return ((iconv_t)-1);
121 	}
122 
123 	/* found a valid module for this conversion */
124 	return (cd);
125 }
126 
127 static size_t
search_alias(char ** paddr,size_t size,const char * variant)128 search_alias(char **paddr, size_t size, const char *variant)
129 {
130 	char	*addr = *paddr;
131 	char	*p, *sp, *q;
132 	size_t	var_len, can_len;
133 
134 	var_len = strlen(variant);
135 	p = addr;
136 	q = addr + size;
137 	while (q > p) {
138 		if (*p == '#') {
139 			/*
140 			 * Line beginning with '#' is a comment
141 			 */
142 			p++;
143 			while ((q > p) && (*p++ != '\n'))
144 				;
145 			continue;
146 		}
147 		/* skip leading spaces */
148 		while ((q > p) &&
149 		    ((*p == ' ') || (*p == '\t')))
150 			p++;
151 		if (q <= p)
152 			break;
153 		sp = p;
154 		while ((q > p) && (*p != ' ') &&
155 		    (*p != '\t') && (*p != '\n'))
156 			p++;
157 		if (q <= p) {
158 			/* invalid entry */
159 			break;
160 		}
161 		if (*p == '\n') {
162 			/* invalid entry */
163 			p++;
164 			continue;
165 		}
166 
167 		if (((p - sp) != var_len) ||
168 		    ((strncmp(sp, variant, var_len) != 0) &&
169 		    (strncasecmp(sp, variant, var_len) != 0))) {
170 			/*
171 			 * didn't match
172 			 */
173 
174 			/* skip remaining chars in this line */
175 			p++;
176 			while ((q > p) && (*p++ != '\n'))
177 				;
178 			continue;
179 		}
180 
181 		/* matching entry found */
182 
183 		/* skip spaces */
184 		while ((q > p) &&
185 		    ((*p == ' ') || (*p == '\t')))
186 			p++;
187 		if (q <= p)
188 			break;
189 		sp = p;
190 		while ((q > p) && (*p != ' ') &&
191 		    (*p != '\t') && (*p != '\n'))
192 			p++;
193 		can_len = p - sp;
194 		if (can_len == 0) {
195 			while ((q > p) && (*p++ != '\n'))
196 				;
197 			continue;
198 		}
199 		*paddr = sp;
200 		return (can_len);
201 		/* NOTREACHED */
202 	}
203 	return (0);
204 }
205 
206 static iconv_p
iconv_open_all(const char * to,const char * from,char * ipath)207 iconv_open_all(const char *to, const char *from, char *ipath)
208 {
209 	iconv_p	cv;
210 	int	len;
211 
212 	/*
213 	 * First, try using the geniconvtbl conversion, which is performed by
214 	 * /usr/lib/iconv/geniconvtbl.so with the conversion table file:
215 	 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
216 	 *
217 	 * If the geniconvtbl conversion cannot be done, try the conversion
218 	 * by the individual shared object.
219 	 */
220 
221 	len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
222 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
223 		/*
224 		 * from%to.bt exists in the table dir
225 		 */
226 		cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
227 		if (cv != (iconv_p)-1) {
228 			/* found a valid module for this conversion */
229 			return (cv);
230 		}
231 	}
232 
233 	/* Next, try /usr/lib/iconv/from%to.so */
234 	len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
235 	if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
236 		/*
237 		 * /usr/lib/iconv/from%to.so exists
238 		 * errno will be set by iconv_open_private on error
239 		 */
240 		return (iconv_open_private(ipath, NULL));
241 	}
242 
243 	/*
244 	 * Finally, as a last resort check if the 'to' and the 'from' are
245 	 * referring to the same codeset name or not. If so, assign the
246 	 * embedded pass-through code conversion.
247 	 */
248 	if (strcasecmp(to, from) == 0)
249 		return (iconv_open_passthru());
250 
251 	/* no valid module for this conversion found */
252 	errno = EINVAL;
253 	return ((iconv_p)-1);
254 }
255 
256 static iconv_p
iconv_search_alias(const char * tocode,const char * fromcode,char * ipath)257 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
258 {
259 	char	*p;
260 	char	*to_canonical, *from_canonical;
261 	size_t	tolen, fromlen;
262 	iconv_p	cv;
263 	int	fd;
264 	struct stat64	statbuf;
265 	caddr_t	addr;
266 	size_t	buflen;
267 
268 	fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
269 	if (fd == -1) {
270 		/*
271 		 * if no alias file found,
272 		 * errno will be set to EINVAL.
273 		 */
274 		errno = EINVAL;
275 		return ((iconv_p)-1);
276 	}
277 	if (fstat64(fd, &statbuf) == -1) {
278 		(void) close(fd);
279 		/* use errno set by fstat64 */
280 		return ((iconv_p)-1);
281 	}
282 	buflen = (size_t)statbuf.st_size;
283 	addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
284 	(void) close(fd);
285 	if (addr == MAP_FAILED) {
286 		/* use errno set by mmap */
287 		return ((iconv_p)-1);
288 	}
289 	p = (char *)addr;
290 	tolen = search_alias(&p, buflen, tocode);
291 	if (tolen) {
292 		to_canonical = alloca(tolen + 1);
293 		(void) memcpy(to_canonical, p, tolen);
294 		to_canonical[tolen] = '\0';
295 	} else {
296 		to_canonical = (char *)tocode;
297 	}
298 	p = (char *)addr;
299 	fromlen = search_alias(&p, buflen, fromcode);
300 	if (fromlen) {
301 		from_canonical = alloca(fromlen + 1);
302 		(void) memcpy(from_canonical, p, fromlen);
303 		from_canonical[fromlen] = '\0';
304 	} else {
305 		from_canonical = (char *)fromcode;
306 	}
307 	(void) munmap(addr, buflen);
308 	if (tolen == 0 && fromlen == 0) {
309 		errno = EINVAL;
310 		return ((iconv_p)-1);
311 	}
312 
313 	cv = iconv_open_all(to_canonical, from_canonical, ipath);
314 
315 	/* errno set by iconv_open_all on error */
316 	return (cv);
317 }
318 
319 static iconv_p
iconv_open_passthru(void)320 iconv_open_passthru(void)
321 {
322 	iconv_p cdpath;
323 
324 	/*
325 	 * For a pass-through byte-by-byte code conversion, allocate
326 	 * an internal conversion descriptor and initialize the data
327 	 * fields appropriately and we are done.
328 	 */
329 	cdpath = malloc(sizeof (struct _iconv_fields));
330 	if (cdpath == NULL)
331 		return ((iconv_p)-1);
332 
333 	cdpath->_icv_handle = NULL;
334 	cdpath->_icv_iconv = passthru_icv_iconv;
335 	cdpath->_icv_close = passthru_icv_close;
336 	cdpath->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER;
337 
338 	return (cdpath);
339 }
340 
341 static iconv_p
iconv_open_private(const char * lib,const char * tbl)342 iconv_open_private(const char *lib, const char *tbl)
343 {
344 	iconv_t (*fptr)(const char *);
345 	iconv_p cdpath;
346 
347 	if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
348 		return ((iconv_p)-1);
349 
350 	if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
351 		free(cdpath);
352 		/* dlopen does not define error no */
353 		errno = EINVAL;
354 		return ((iconv_p)-1);
355 	}
356 
357 	/* gets address of _icv_open */
358 	if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
359 	    "_icv_open")) == NULL) {
360 		(void) dlclose(cdpath->_icv_handle);
361 		free(cdpath);
362 		/* dlsym does not define errno */
363 		errno = EINVAL;
364 		return ((iconv_p)-1);
365 	}
366 
367 	/*
368 	 * gets address of _icv_iconv in the loadable conversion module
369 	 * and stores it in cdpath->_icv_iconv
370 	 */
371 
372 	if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
373 	    size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
374 	    "_icv_iconv")) == NULL) {
375 		(void) dlclose(cdpath->_icv_handle);
376 		free(cdpath);
377 		/* dlsym does not define errno */
378 		errno = EINVAL;
379 		return ((iconv_p)-1);
380 	}
381 
382 	/*
383 	 * gets address of _icv_close in the loadable conversion module
384 	 * and stores it in cd->_icv_close
385 	 */
386 	if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
387 	    "_icv_close")) == NULL) {
388 		(void) dlclose(cdpath->_icv_handle);
389 		free(cdpath);
390 		/* dlsym does not define errno */
391 		errno = EINVAL;
392 		return ((iconv_p)-1);
393 	}
394 
395 	/*
396 	 * initialize the state of the actual _icv_iconv conversion routine
397 	 * For the normal iconv module, NULL will be passed as an argument
398 	 * although the iconv_open() of the module won't use that.
399 	 */
400 	cdpath->_icv_state = (void *)(*fptr)(tbl);
401 
402 	if (cdpath->_icv_state == (struct _icv_state *)-1) {
403 		(void) dlclose(cdpath->_icv_handle);
404 		free(cdpath);
405 		/* this module does not satisfy this conversion */
406 		errno = EINVAL;
407 		return ((iconv_p)-1);
408 	}
409 
410 	return (cdpath);
411 }
412 
413 int
iconv_close(iconv_t cd)414 iconv_close(iconv_t cd)
415 {
416 	if (cd == NULL) {
417 		errno = EBADF;
418 		return (-1);
419 	}
420 	(*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
421 	if (cd->_conv->_icv_handle != NULL)
422 		(void) dlclose(cd->_conv->_icv_handle);
423 	free(cd->_conv);
424 	free(cd);
425 	return (0);
426 }
427 
428 /*
429  * To have minimal performance impact to the existing run-time behavior,
430  * we supply a dummy passthru_icv_close() that will just return.
431  */
432 static void
passthru_icv_close(iconv_t cd __unused)433 passthru_icv_close(iconv_t cd __unused)
434 {
435 }
436 
437 size_t
iconv(iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)438 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
439     char **outbuf, size_t *outbytesleft)
440 {
441 	/* check if cd is valid */
442 	if (cd == NULL || cd == (iconv_t)-1) {
443 		errno = EBADF;
444 		return ((size_t)-1);
445 	}
446 
447 	/* direct conversion */
448 	return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
449 	    inbuf, inbytesleft, outbuf, outbytesleft));
450 }
451 
452 static size_t
passthru_icv_iconv(iconv_t cd,const char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)453 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft,
454     char **outbuf, size_t *outbufleft)
455 {
456 	size_t ibl;
457 	size_t obl;
458 	size_t len;
459 	size_t ret_val;
460 
461 	/* Check if the conversion descriptor is a valid one. */
462 	if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) {
463 		errno = EBADF;
464 		return ((size_t)-1);
465 	}
466 
467 	/* For any state reset request, return success. */
468 	if (inbuf == NULL || *inbuf == NULL)
469 		return (0);
470 
471 	/*
472 	 * Initialize internally used variables for a better performance
473 	 * and prepare for a couple of the return values before the actual
474 	 * copying of the bytes.
475 	 */
476 	ibl = *inbufleft;
477 	obl = *outbufleft;
478 
479 	if (ibl > obl) {
480 		len = obl;
481 		errno = E2BIG;
482 		ret_val = (size_t)-1;
483 	} else {
484 		len = ibl;
485 		ret_val = 0;
486 	}
487 
488 	/*
489 	 * Do the copy using memmove(). There are no EILSEQ or EINVAL
490 	 * checkings since this is a simple copying.
491 	 */
492 	(void) memmove((void *)*outbuf, (const void *)*inbuf, len);
493 
494 	/* Update the return values related to the buffers then do return. */
495 	*inbuf = *inbuf + len;
496 	*outbuf = *outbuf + len;
497 	*inbufleft = ibl - len;
498 	*outbufleft = obl - len;
499 
500 	return (ret_val);
501 }
502