1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include <sys/types.h> 29 #include <sys/stat.h> 30 #include <sys/mman.h> 31 #include <stdlib.h> 32 #include <stdio.h> 33 #include <dlfcn.h> 34 #include <fcntl.h> 35 #include <unistd.h> 36 #include <string.h> 37 #include <errno.h> 38 #include <sys/param.h> 39 #include <alloca.h> 40 #include "iconv.h" 41 #include "iconvP.h" 42 #include "../i18n/_loc_path.h" 43 44 static iconv_p iconv_open_all(const char *, const char *, char *); 45 static iconv_p iconv_open_private(const char *, const char *); 46 static iconv_p iconv_search_alias(const char *, const char *, char *); 47 static size_t passthru_icv_iconv(iconv_t, const char **, size_t *, char **, 48 size_t *); 49 static void passthru_icv_close(iconv_t); 50 51 #define PASSTHRU_MAGIC_NUMBER (0x53756e) 52 53 54 /* 55 * These functions are mainly implemented by using a shared object and 56 * the dlopen() functions. The actual conversion algorithm for a particular 57 * conversion is implemented via a shared object as a loadable conversion 58 * module which is linked dynamically at run time. 59 * 60 * The loadable conversion module resides as either: 61 * 62 * /usr/lib/iconv/geniconvtbl.so 63 * 64 * if the conversion is supported through a geniconvtbl code conversion 65 * binary table or as a module that directly specifies the conversion at: 66 * 67 * /usr/lib/iconv/fromcode%tocode.so 68 * 69 * where fromcode is the source encoding and tocode is the target encoding. 70 * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close(). 71 * 72 * If there is no code conversion supported and if the fromcode and the tocode 73 * are specifying the same codeset, then, the byte-by-byte, pass-through code 74 * conversion that is embedded in the libc is used instead. 75 * 76 * The following are the related PSARC cases: 77 * 78 * PSARC/1993/153 iconv/iconv_open/iconv_close 79 * PSARC/1999/292 Addition of geniconvtbl(1) 80 * PSARC/2001/072 GNU gettext support 81 * PSARC/2009/561 Pass-through iconv code conversion 82 * 83 * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface. 84 */ 85 86 iconv_t 87 iconv_open(const char *tocode, const char *fromcode) 88 { 89 iconv_t cd; 90 char *ipath; 91 92 if ((cd = malloc(sizeof (struct _iconv_info))) == NULL) 93 return ((iconv_t)-1); 94 95 /* 96 * Memory for ipath is allocated/released in this function. 97 */ 98 ipath = malloc(MAXPATHLEN); 99 if (ipath == NULL) { 100 free(cd); 101 return ((iconv_t)-1); 102 } 103 104 cd->_conv = iconv_open_all(tocode, fromcode, ipath); 105 if (cd->_conv != (iconv_p)-1) { 106 /* found a valid module for this conversion */ 107 free(ipath); 108 return (cd); 109 } 110 111 /* 112 * Now, try using the encoding name aliasing table 113 */ 114 cd->_conv = iconv_search_alias(tocode, fromcode, ipath); 115 free(ipath); 116 if (cd->_conv == (iconv_p)-1) { 117 /* 118 * As the last resort, check if the tocode and the fromcode 119 * are referring to the same codeset name or not. If so, 120 * assign the embedded pass-through code conversion. 121 */ 122 if (strcasecmp(tocode, fromcode) != 0) { 123 /* 124 * No valid conversion available. Do failure retrun 125 * with the errno set by iconv_search_alias(). 126 */ 127 free(cd); 128 return ((iconv_t)-1); 129 } 130 131 /* 132 * For a pass-through byte-by-byte code conversion, allocate 133 * an internal conversion descriptor and initialize the data 134 * fields appropriately and we are done. 135 */ 136 cd->_conv = malloc(sizeof (struct _iconv_fields)); 137 if (cd->_conv == NULL) { 138 free(cd); 139 return ((iconv_t)-1); 140 } 141 142 cd->_conv->_icv_handle = NULL; 143 cd->_conv->_icv_iconv = passthru_icv_iconv; 144 cd->_conv->_icv_close = passthru_icv_close; 145 cd->_conv->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER; 146 } 147 148 /* found a valid module for this conversion */ 149 return (cd); 150 } 151 152 static size_t 153 search_alias(char **paddr, size_t size, const char *variant) 154 { 155 char *addr = *paddr; 156 char *p, *sp, *q; 157 size_t var_len, can_len; 158 159 var_len = strlen(variant); 160 p = addr; 161 q = addr + size; 162 while (q > p) { 163 if (*p == '#') { 164 /* 165 * Line beginning with '#' is a comment 166 */ 167 p++; 168 while ((q > p) && (*p++ != '\n')) 169 ; 170 continue; 171 } 172 /* skip leading spaces */ 173 while ((q > p) && 174 ((*p == ' ') || (*p == '\t'))) 175 p++; 176 if (q <= p) 177 break; 178 sp = p; 179 while ((q > p) && (*p != ' ') && 180 (*p != '\t') && (*p != '\n')) 181 p++; 182 if (q <= p) { 183 /* invalid entry */ 184 break; 185 } 186 if (*p == '\n') { 187 /* invalid entry */ 188 p++; 189 continue; 190 } 191 192 if (((p - sp) != var_len) || 193 ((strncmp(sp, variant, var_len) != 0) && 194 (strncasecmp(sp, variant, var_len) != 0))) { 195 /* 196 * didn't match 197 */ 198 199 /* skip remaining chars in this line */ 200 p++; 201 while ((q > p) && (*p++ != '\n')) 202 ; 203 continue; 204 } 205 206 /* matching entry found */ 207 208 /* skip spaces */ 209 while ((q > p) && 210 ((*p == ' ') || (*p == '\t'))) 211 p++; 212 if (q <= p) 213 break; 214 sp = p; 215 while ((q > p) && (*p != ' ') && 216 (*p != '\t') && (*p != '\n')) 217 p++; 218 can_len = p - sp; 219 if (can_len == 0) { 220 while ((q > p) && (*p++ != '\n')) 221 ; 222 continue; 223 } 224 *paddr = sp; 225 return (can_len); 226 /* NOTREACHED */ 227 } 228 return (0); 229 } 230 231 static iconv_p 232 iconv_open_all(const char *to, const char *from, char *ipath) 233 { 234 iconv_p cv; 235 int len; 236 237 /* 238 * First, try using the geniconvtbl conversion, which is 239 * performed by /usr/lib/iconv/geniconvtbl.so with 240 * the conversion table file: 241 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt 242 * 243 * If the geniconvtbl conversion cannot be done, 244 * try the conversion by the individual shared object. 245 */ 246 247 len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to); 248 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) { 249 /* 250 * from%to.bt exists in the table dir 251 */ 252 cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath); 253 if (cv != (iconv_p)-1) { 254 /* found a valid module for this conversion */ 255 return (cv); 256 } 257 } 258 259 /* Next, try /usr/lib/iconv/from%to.so */ 260 len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to); 261 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) { 262 /* 263 * /usr/lib/iconv/from%to.so exists 264 * errno will be set by iconv_open_private on error 265 */ 266 return (iconv_open_private(ipath, NULL)); 267 } 268 /* no valid module for this conversion found */ 269 errno = EINVAL; 270 return ((iconv_p)-1); 271 } 272 273 static iconv_p 274 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath) 275 { 276 char *p; 277 char *to_canonical, *from_canonical; 278 size_t tolen, fromlen; 279 iconv_p cv; 280 int fd; 281 struct stat64 statbuf; 282 caddr_t addr; 283 size_t buflen; 284 285 fd = open(_ENCODING_ALIAS_PATH, O_RDONLY); 286 if (fd == -1) { 287 /* 288 * if no alias file found, 289 * errno will be set to EINVAL. 290 */ 291 errno = EINVAL; 292 return ((iconv_p)-1); 293 } 294 if (fstat64(fd, &statbuf) == -1) { 295 (void) close(fd); 296 /* use errno set by fstat64 */ 297 return ((iconv_p)-1); 298 } 299 buflen = (size_t)statbuf.st_size; 300 addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0); 301 (void) close(fd); 302 if (addr == MAP_FAILED) { 303 /* use errno set by mmap */ 304 return ((iconv_p)-1); 305 } 306 p = (char *)addr; 307 tolen = search_alias(&p, buflen, tocode); 308 if (tolen) { 309 to_canonical = alloca(tolen + 1); 310 (void) memcpy(to_canonical, p, tolen); 311 to_canonical[tolen] = '\0'; 312 } else { 313 to_canonical = (char *)tocode; 314 } 315 p = (char *)addr; 316 fromlen = search_alias(&p, buflen, fromcode); 317 if (fromlen) { 318 from_canonical = alloca(fromlen + 1); 319 (void) memcpy(from_canonical, p, fromlen); 320 from_canonical[fromlen] = '\0'; 321 } else { 322 from_canonical = (char *)fromcode; 323 } 324 (void) munmap(addr, buflen); 325 if (tolen == 0 && fromlen == 0) { 326 errno = EINVAL; 327 return ((iconv_p)-1); 328 } 329 330 cv = iconv_open_all(to_canonical, from_canonical, ipath); 331 332 /* errno set by iconv_open_all on error */ 333 return (cv); 334 } 335 336 static iconv_p 337 iconv_open_private(const char *lib, const char *tbl) 338 { 339 iconv_t (*fptr)(const char *); 340 iconv_p cdpath; 341 342 if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL) 343 return ((iconv_p)-1); 344 345 if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) { 346 free(cdpath); 347 /* dlopen does not define error no */ 348 errno = EINVAL; 349 return ((iconv_p)-1); 350 } 351 352 /* gets address of _icv_open */ 353 if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle, 354 "_icv_open")) == NULL) { 355 (void) dlclose(cdpath->_icv_handle); 356 free(cdpath); 357 /* dlsym does not define errno */ 358 errno = EINVAL; 359 return ((iconv_p)-1); 360 } 361 362 /* 363 * gets address of _icv_iconv in the loadable conversion module 364 * and stores it in cdpath->_icv_iconv 365 */ 366 367 if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **, 368 size_t *, char **, size_t *))dlsym(cdpath->_icv_handle, 369 "_icv_iconv")) == NULL) { 370 (void) dlclose(cdpath->_icv_handle); 371 free(cdpath); 372 /* dlsym does not define errno */ 373 errno = EINVAL; 374 return ((iconv_p)-1); 375 } 376 377 /* 378 * gets address of _icv_close in the loadable conversion module 379 * and stores it in cd->_icv_close 380 */ 381 if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle, 382 "_icv_close")) == NULL) { 383 (void) dlclose(cdpath->_icv_handle); 384 free(cdpath); 385 /* dlsym does not define errno */ 386 errno = EINVAL; 387 return ((iconv_p)-1); 388 } 389 390 /* 391 * initialize the state of the actual _icv_iconv conversion routine 392 * For the normal iconv module, NULL will be passed as an argument 393 * although the iconv_open() of the module won't use that. 394 */ 395 cdpath->_icv_state = (void *)(*fptr)(tbl); 396 397 if (cdpath->_icv_state == (struct _icv_state *)-1) { 398 (void) dlclose(cdpath->_icv_handle); 399 free(cdpath); 400 /* this module does not satisfy this conversion */ 401 errno = EINVAL; 402 return ((iconv_p)-1); 403 } 404 405 return (cdpath); 406 } 407 408 int 409 iconv_close(iconv_t cd) 410 { 411 if (cd == NULL) { 412 errno = EBADF; 413 return (-1); 414 } 415 (*(cd->_conv)->_icv_close)(cd->_conv->_icv_state); 416 if (cd->_conv->_icv_handle != NULL) 417 (void) dlclose(cd->_conv->_icv_handle); 418 free(cd->_conv); 419 free(cd); 420 return (0); 421 } 422 423 /* 424 * To have minimal performance impact to the existing run-time behavior, 425 * we supply a dummy passthru_icv_close() that will just return. 426 */ 427 static void 428 passthru_icv_close(iconv_t cd __unused) 429 { 430 } 431 432 size_t 433 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, 434 char **outbuf, size_t *outbytesleft) 435 { 436 /* check if cd is valid */ 437 if (cd == NULL || cd == (iconv_t)-1) { 438 errno = EBADF; 439 return ((size_t)-1); 440 } 441 442 /* direct conversion */ 443 return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state, 444 inbuf, inbytesleft, outbuf, outbytesleft)); 445 } 446 447 static size_t 448 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft, 449 char **outbuf, size_t *outbufleft) 450 { 451 size_t ibl; 452 size_t obl; 453 size_t len; 454 size_t ret_val; 455 456 /* Check if the conversion descriptor is a valid one. */ 457 if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) { 458 errno = EBADF; 459 return ((size_t)-1); 460 } 461 462 /* For any state reset request, return success. */ 463 if (inbuf == NULL || *inbuf == NULL) 464 return (0); 465 466 /* 467 * Initialize internally used variables for a better performance 468 * and prepare for a couple of the return values before the actual 469 * copying of the bytes. 470 */ 471 ibl = *inbufleft; 472 obl = *outbufleft; 473 474 if (ibl > obl) { 475 len = obl; 476 errno = E2BIG; 477 ret_val = (size_t)-1; 478 } else { 479 len = ibl; 480 ret_val = 0; 481 } 482 483 /* 484 * Do the copy using memmove(). There are no EILSEQ or EINVAL 485 * checkings since this is a simple copying. 486 */ 487 (void) memmove((void *)*outbuf, (const void *)*inbuf, len); 488 489 /* Update the return values related to the buffers then do return. */ 490 *inbuf = *inbuf + len; 491 *outbuf = *outbuf + len; 492 *inbufleft = ibl - len; 493 *outbufleft = obl - len; 494 495 return (ret_val); 496 } 497