1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2025 Hans Rosenfeld 24 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "lint.h" 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 #include <sys/mman.h> 32 #include <stdlib.h> 33 #include <stdio.h> 34 #include <dlfcn.h> 35 #include <fcntl.h> 36 #include <unistd.h> 37 #include <string.h> 38 #include <errno.h> 39 #include <sys/param.h> 40 #include <alloca.h> 41 #include "iconv.h" 42 #include "iconvP.h" 43 #include "../i18n/_loc_path.h" 44 45 static iconv_p iconv_open_all(const char *, const char *, char *); 46 static iconv_p iconv_open_passthru(void); 47 static iconv_p iconv_open_private(const char *, const char *); 48 static iconv_p iconv_search_alias(const char *, const char *, char *); 49 static size_t passthru_icv_iconv(iconv_t, const char **, size_t *, char **, 50 size_t *); 51 static void passthru_icv_close(iconv_t); 52 53 #define PASSTHRU_MAGIC_NUMBER (0x53756e) 54 55 56 /* 57 * These functions are mainly implemented by using a shared object and 58 * the dlopen() functions. The actual conversion algorithm for a particular 59 * conversion is implemented via a shared object as a loadable conversion 60 * module which is linked dynamically at run time. 61 * 62 * The loadable conversion module resides as either: 63 * 64 * /usr/lib/iconv/geniconvtbl.so 65 * 66 * if the conversion is supported through a geniconvtbl code conversion 67 * binary table or as a module that directly specifies the conversion at: 68 * 69 * /usr/lib/iconv/fromcode%tocode.so 70 * 71 * where fromcode is the source encoding and tocode is the target encoding. 72 * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close(). 73 * 74 * If there is no code conversion supported and if the fromcode and the tocode 75 * are specifying the same codeset, then, the byte-by-byte, pass-through code 76 * conversion that is embedded in the libc is used instead. 77 * 78 * The following are the related PSARC cases: 79 * 80 * PSARC/1993/153 iconv/iconv_open/iconv_close 81 * PSARC/1999/292 Addition of geniconvtbl(1) 82 * PSARC/2001/072 GNU gettext support 83 * PSARC/2009/561 Pass-through iconv code conversion 84 * 85 * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface. 86 */ 87 88 iconv_t 89 iconv_open(const char *tocode, const char *fromcode) 90 { 91 iconv_t cd; 92 char *ipath; 93 94 if ((cd = malloc(sizeof (struct _iconv_info))) == NULL) 95 return ((iconv_t)-1); 96 97 /* 98 * Memory for ipath is allocated/released in this function. 99 */ 100 ipath = malloc(MAXPATHLEN); 101 if (ipath == NULL) { 102 free(cd); 103 return ((iconv_t)-1); 104 } 105 106 cd->_conv = iconv_open_all(tocode, fromcode, ipath); 107 if (cd->_conv != (iconv_p)-1) { 108 /* found a valid module for this conversion */ 109 free(ipath); 110 return (cd); 111 } 112 113 /* 114 * Now, try using the encoding name aliasing table 115 */ 116 cd->_conv = iconv_search_alias(tocode, fromcode, ipath); 117 free(ipath); 118 if (cd->_conv == (iconv_p)-1) { 119 free(cd); 120 return ((iconv_t)-1); 121 } 122 123 /* found a valid module for this conversion */ 124 return (cd); 125 } 126 127 static size_t 128 search_alias(char **paddr, size_t size, const char *variant) 129 { 130 char *addr = *paddr; 131 char *p, *sp, *q; 132 size_t var_len, can_len; 133 134 var_len = strlen(variant); 135 p = addr; 136 q = addr + size; 137 while (q > p) { 138 if (*p == '#') { 139 /* 140 * Line beginning with '#' is a comment 141 */ 142 p++; 143 while ((q > p) && (*p++ != '\n')) 144 ; 145 continue; 146 } 147 /* skip leading spaces */ 148 while ((q > p) && 149 ((*p == ' ') || (*p == '\t'))) 150 p++; 151 if (q <= p) 152 break; 153 sp = p; 154 while ((q > p) && (*p != ' ') && 155 (*p != '\t') && (*p != '\n')) 156 p++; 157 if (q <= p) { 158 /* invalid entry */ 159 break; 160 } 161 if (*p == '\n') { 162 /* invalid entry */ 163 p++; 164 continue; 165 } 166 167 if (((p - sp) != var_len) || 168 ((strncmp(sp, variant, var_len) != 0) && 169 (strncasecmp(sp, variant, var_len) != 0))) { 170 /* 171 * didn't match 172 */ 173 174 /* skip remaining chars in this line */ 175 p++; 176 while ((q > p) && (*p++ != '\n')) 177 ; 178 continue; 179 } 180 181 /* matching entry found */ 182 183 /* skip spaces */ 184 while ((q > p) && 185 ((*p == ' ') || (*p == '\t'))) 186 p++; 187 if (q <= p) 188 break; 189 sp = p; 190 while ((q > p) && (*p != ' ') && 191 (*p != '\t') && (*p != '\n')) 192 p++; 193 can_len = p - sp; 194 if (can_len == 0) { 195 while ((q > p) && (*p++ != '\n')) 196 ; 197 continue; 198 } 199 *paddr = sp; 200 return (can_len); 201 /* NOTREACHED */ 202 } 203 return (0); 204 } 205 206 static iconv_p 207 iconv_open_all(const char *to, const char *from, char *ipath) 208 { 209 iconv_p cv; 210 int len; 211 212 /* 213 * First, try using the geniconvtbl conversion, which is performed by 214 * /usr/lib/iconv/geniconvtbl.so with the conversion table file: 215 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt 216 * 217 * If the geniconvtbl conversion cannot be done, try the conversion 218 * by the individual shared object. 219 */ 220 221 len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to); 222 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) { 223 /* 224 * from%to.bt exists in the table dir 225 */ 226 cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath); 227 if (cv != (iconv_p)-1) { 228 /* found a valid module for this conversion */ 229 return (cv); 230 } 231 } 232 233 /* Next, try /usr/lib/iconv/from%to.so */ 234 len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to); 235 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) { 236 /* 237 * /usr/lib/iconv/from%to.so exists 238 * errno will be set by iconv_open_private on error 239 */ 240 return (iconv_open_private(ipath, NULL)); 241 } 242 243 /* 244 * Finally, as a last resort check if the 'to' and the 'from' are 245 * referring to the same codeset name or not. If so, assign the 246 * embedded pass-through code conversion. 247 */ 248 if (strcasecmp(to, from) == 0) 249 return (iconv_open_passthru()); 250 251 /* no valid module for this conversion found */ 252 errno = EINVAL; 253 return ((iconv_p)-1); 254 } 255 256 static iconv_p 257 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath) 258 { 259 char *p; 260 char *to_canonical, *from_canonical; 261 size_t tolen, fromlen; 262 iconv_p cv; 263 int fd; 264 struct stat64 statbuf; 265 caddr_t addr; 266 size_t buflen; 267 268 fd = open(_ENCODING_ALIAS_PATH, O_RDONLY); 269 if (fd == -1) { 270 /* 271 * if no alias file found, 272 * errno will be set to EINVAL. 273 */ 274 errno = EINVAL; 275 return ((iconv_p)-1); 276 } 277 if (fstat64(fd, &statbuf) == -1) { 278 (void) close(fd); 279 /* use errno set by fstat64 */ 280 return ((iconv_p)-1); 281 } 282 buflen = (size_t)statbuf.st_size; 283 addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0); 284 (void) close(fd); 285 if (addr == MAP_FAILED) { 286 /* use errno set by mmap */ 287 return ((iconv_p)-1); 288 } 289 p = (char *)addr; 290 tolen = search_alias(&p, buflen, tocode); 291 if (tolen) { 292 to_canonical = alloca(tolen + 1); 293 (void) memcpy(to_canonical, p, tolen); 294 to_canonical[tolen] = '\0'; 295 } else { 296 to_canonical = (char *)tocode; 297 } 298 p = (char *)addr; 299 fromlen = search_alias(&p, buflen, fromcode); 300 if (fromlen) { 301 from_canonical = alloca(fromlen + 1); 302 (void) memcpy(from_canonical, p, fromlen); 303 from_canonical[fromlen] = '\0'; 304 } else { 305 from_canonical = (char *)fromcode; 306 } 307 (void) munmap(addr, buflen); 308 if (tolen == 0 && fromlen == 0) { 309 errno = EINVAL; 310 return ((iconv_p)-1); 311 } 312 313 cv = iconv_open_all(to_canonical, from_canonical, ipath); 314 315 /* errno set by iconv_open_all on error */ 316 return (cv); 317 } 318 319 static iconv_p 320 iconv_open_passthru(void) 321 { 322 iconv_p cdpath; 323 324 /* 325 * For a pass-through byte-by-byte code conversion, allocate 326 * an internal conversion descriptor and initialize the data 327 * fields appropriately and we are done. 328 */ 329 cdpath = malloc(sizeof (struct _iconv_fields)); 330 if (cdpath == NULL) 331 return ((iconv_p)-1); 332 333 cdpath->_icv_handle = NULL; 334 cdpath->_icv_iconv = passthru_icv_iconv; 335 cdpath->_icv_close = passthru_icv_close; 336 cdpath->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER; 337 338 return (cdpath); 339 } 340 341 static iconv_p 342 iconv_open_private(const char *lib, const char *tbl) 343 { 344 iconv_t (*fptr)(const char *); 345 iconv_p cdpath; 346 347 if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL) 348 return ((iconv_p)-1); 349 350 if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) { 351 free(cdpath); 352 /* dlopen does not define error no */ 353 errno = EINVAL; 354 return ((iconv_p)-1); 355 } 356 357 /* gets address of _icv_open */ 358 if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle, 359 "_icv_open")) == NULL) { 360 (void) dlclose(cdpath->_icv_handle); 361 free(cdpath); 362 /* dlsym does not define errno */ 363 errno = EINVAL; 364 return ((iconv_p)-1); 365 } 366 367 /* 368 * gets address of _icv_iconv in the loadable conversion module 369 * and stores it in cdpath->_icv_iconv 370 */ 371 372 if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **, 373 size_t *, char **, size_t *))dlsym(cdpath->_icv_handle, 374 "_icv_iconv")) == NULL) { 375 (void) dlclose(cdpath->_icv_handle); 376 free(cdpath); 377 /* dlsym does not define errno */ 378 errno = EINVAL; 379 return ((iconv_p)-1); 380 } 381 382 /* 383 * gets address of _icv_close in the loadable conversion module 384 * and stores it in cd->_icv_close 385 */ 386 if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle, 387 "_icv_close")) == NULL) { 388 (void) dlclose(cdpath->_icv_handle); 389 free(cdpath); 390 /* dlsym does not define errno */ 391 errno = EINVAL; 392 return ((iconv_p)-1); 393 } 394 395 /* 396 * initialize the state of the actual _icv_iconv conversion routine 397 * For the normal iconv module, NULL will be passed as an argument 398 * although the iconv_open() of the module won't use that. 399 */ 400 cdpath->_icv_state = (void *)(*fptr)(tbl); 401 402 if (cdpath->_icv_state == (struct _icv_state *)-1) { 403 (void) dlclose(cdpath->_icv_handle); 404 free(cdpath); 405 /* this module does not satisfy this conversion */ 406 errno = EINVAL; 407 return ((iconv_p)-1); 408 } 409 410 return (cdpath); 411 } 412 413 int 414 iconv_close(iconv_t cd) 415 { 416 if (cd == NULL) { 417 errno = EBADF; 418 return (-1); 419 } 420 (*(cd->_conv)->_icv_close)(cd->_conv->_icv_state); 421 if (cd->_conv->_icv_handle != NULL) 422 (void) dlclose(cd->_conv->_icv_handle); 423 free(cd->_conv); 424 free(cd); 425 return (0); 426 } 427 428 /* 429 * To have minimal performance impact to the existing run-time behavior, 430 * we supply a dummy passthru_icv_close() that will just return. 431 */ 432 static void 433 passthru_icv_close(iconv_t cd __unused) 434 { 435 } 436 437 size_t 438 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, 439 char **outbuf, size_t *outbytesleft) 440 { 441 /* check if cd is valid */ 442 if (cd == NULL || cd == (iconv_t)-1) { 443 errno = EBADF; 444 return ((size_t)-1); 445 } 446 447 /* direct conversion */ 448 return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state, 449 inbuf, inbytesleft, outbuf, outbytesleft)); 450 } 451 452 static size_t 453 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft, 454 char **outbuf, size_t *outbufleft) 455 { 456 size_t ibl; 457 size_t obl; 458 size_t len; 459 size_t ret_val; 460 461 /* Check if the conversion descriptor is a valid one. */ 462 if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) { 463 errno = EBADF; 464 return ((size_t)-1); 465 } 466 467 /* For any state reset request, return success. */ 468 if (inbuf == NULL || *inbuf == NULL) 469 return (0); 470 471 /* 472 * Initialize internally used variables for a better performance 473 * and prepare for a couple of the return values before the actual 474 * copying of the bytes. 475 */ 476 ibl = *inbufleft; 477 obl = *outbufleft; 478 479 if (ibl > obl) { 480 len = obl; 481 errno = E2BIG; 482 ret_val = (size_t)-1; 483 } else { 484 len = ibl; 485 ret_val = 0; 486 } 487 488 /* 489 * Do the copy using memmove(). There are no EILSEQ or EINVAL 490 * checkings since this is a simple copying. 491 */ 492 (void) memmove((void *)*outbuf, (const void *)*inbuf, len); 493 494 /* Update the return values related to the buffers then do return. */ 495 *inbuf = *inbuf + len; 496 *outbuf = *outbuf + len; 497 *inbufleft = ibl - len; 498 *outbufleft = obl - len; 499 500 return (ret_val); 501 } 502