1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003, 2005 Ryuichiro Imura 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* 32 * kiconv(3) requires shared linked, and reduce module size 33 * when statically linked. 34 */ 35 36 #ifdef PIC 37 38 #include <sys/types.h> 39 #include <sys/iconv.h> 40 #include <sys/sysctl.h> 41 42 #include <ctype.h> 43 #include <dlfcn.h> 44 #include <err.h> 45 #include <errno.h> 46 #include <locale.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <wctype.h> 51 52 #include "quirks.h" 53 54 struct xlat16_table { 55 uint32_t * idx[0x200]; 56 void * data; 57 size_t size; 58 }; 59 60 static struct xlat16_table kiconv_xlat16_open(const char *, const char *, int); 61 static int chklocale(int, const char *); 62 63 #ifdef ICONV_DLOPEN 64 typedef void *iconv_t; 65 static int my_iconv_init(void); 66 static iconv_t (*my_iconv_open)(const char *, const char *); 67 static size_t (*my_iconv)(iconv_t, char **, size_t *, char **, size_t *); 68 static int (*my_iconv_close)(iconv_t); 69 #else 70 #include <iconv.h> 71 #define my_iconv_init() 0 72 #define my_iconv_open iconv_open 73 #define my_iconv iconv 74 #define my_iconv_close iconv_close 75 #endif 76 static size_t my_iconv_char(iconv_t, u_char **, size_t *, u_char **, size_t *); 77 78 int 79 kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag) 80 { 81 int error; 82 size_t idxsize; 83 struct xlat16_table xt; 84 void *data; 85 char *p; 86 const char unicode[] = ENCODING_UNICODE; 87 88 if ((flag & KICONV_WCTYPE) == 0 && 89 strcmp(unicode, tocode) != 0 && 90 strcmp(unicode, fromcode) != 0 && 91 kiconv_lookupconv(unicode) == 0) { 92 error = kiconv_add_xlat16_cspair(unicode, fromcode, flag); 93 if (error) 94 return (-1); 95 error = kiconv_add_xlat16_cspair(tocode, unicode, flag); 96 return (error); 97 } 98 99 if (kiconv_lookupcs(tocode, fromcode) == 0) 100 return (0); 101 102 if (flag & KICONV_WCTYPE) 103 xt = kiconv_xlat16_open(fromcode, fromcode, flag); 104 else 105 xt = kiconv_xlat16_open(tocode, fromcode, flag); 106 if (xt.size == 0) 107 return (-1); 108 109 idxsize = sizeof(xt.idx); 110 111 if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) { 112 errno = E2BIG; 113 return (-1); 114 } 115 116 if ((data = malloc(idxsize + xt.size)) != NULL) { 117 p = data; 118 memcpy(p, xt.idx, idxsize); 119 p += idxsize; 120 memcpy(p, xt.data, xt.size); 121 error = kiconv_add_xlat16_table(tocode, fromcode, data, 122 (int)(idxsize + xt.size)); 123 return (error); 124 } 125 126 return (-1); 127 } 128 129 int 130 kiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode) 131 { 132 int error, locale; 133 134 error = kiconv_add_xlat16_cspair(foreigncode, localcode, 135 KICONV_FROM_LOWER | KICONV_FROM_UPPER); 136 if (error) 137 return (error); 138 error = kiconv_add_xlat16_cspair(localcode, foreigncode, 139 KICONV_LOWER | KICONV_UPPER); 140 if (error) 141 return (error); 142 locale = chklocale(LC_CTYPE, localcode); 143 if (locale == 0) { 144 error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode, 145 KICONV_WCTYPE); 146 if (error) 147 return (error); 148 } 149 150 return (0); 151 } 152 153 static struct xlat16_table 154 kiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase) 155 { 156 u_char src[3], dst[4], *srcp, *dstp, ud, ld; 157 int us, ls, ret; 158 uint16_t c; 159 uint32_t table[0x80]; 160 size_t inbytesleft, outbytesleft, pre_q_size, post_q_size; 161 struct xlat16_table xt; 162 struct quirk_replace_list *pre_q_list, *post_q_list; 163 iconv_t cd; 164 char *p; 165 166 xt.data = NULL; 167 xt.size = 0; 168 169 src[2] = '\0'; 170 dst[3] = '\0'; 171 172 ret = my_iconv_init(); 173 if (ret) 174 return (xt); 175 176 cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size), 177 search_quirk(fromcode, tocode, &post_q_list, &post_q_size)); 178 if (cd == (iconv_t) (-1)) 179 return (xt); 180 181 if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL) 182 return (xt); 183 184 p = xt.data; 185 186 for (ls = 0 ; ls < 0x200 ; ls++) { 187 xt.idx[ls] = NULL; 188 for (us = 0 ; us < 0x80 ; us++) { 189 srcp = src; 190 dstp = dst; 191 192 inbytesleft = 2; 193 outbytesleft = 3; 194 bzero(dst, outbytesleft); 195 196 c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls; 197 198 if (lcase & KICONV_WCTYPE) { 199 if ((c & 0xff) == 0) 200 c >>= 8; 201 if (iswupper(c)) { 202 c = towlower(c); 203 if ((c & 0xff00) == 0) 204 c <<= 8; 205 table[us] = c | XLAT16_HAS_LOWER_CASE; 206 } else if (iswlower(c)) { 207 c = towupper(c); 208 if ((c & 0xff00) == 0) 209 c <<= 8; 210 table[us] = c | XLAT16_HAS_UPPER_CASE; 211 } else 212 table[us] = 0; 213 /* 214 * store not NULL 215 */ 216 if (table[us]) 217 xt.idx[ls] = table; 218 219 continue; 220 } 221 222 c = quirk_vendor2unix(c, pre_q_list, pre_q_size); 223 src[0] = (u_char)(c >> 8); 224 src[1] = (u_char)c; 225 226 ret = my_iconv_char(cd, &srcp, &inbytesleft, 227 &dstp, &outbytesleft); 228 if (ret == -1) { 229 table[us] = 0; 230 continue; 231 } 232 233 ud = (u_char)dst[0]; 234 ld = (u_char)dst[1]; 235 236 switch(outbytesleft) { 237 case 0: 238 #ifdef XLAT16_ACCEPT_3BYTE_CHR 239 table[us] = (ud << 8) | ld; 240 table[us] |= (u_char)dst[2] << 16; 241 table[us] |= XLAT16_IS_3BYTE_CHR; 242 #else 243 table[us] = 0; 244 continue; 245 #endif 246 break; 247 case 1: 248 table[us] = quirk_unix2vendor((ud << 8) | ld, 249 post_q_list, post_q_size); 250 if ((table[us] >> 8) == 0) 251 table[us] |= XLAT16_ACCEPT_NULL_OUT; 252 break; 253 case 2: 254 table[us] = ud; 255 if (lcase & KICONV_LOWER && ud != tolower(ud)) { 256 table[us] |= (u_char)tolower(ud) << 16; 257 table[us] |= XLAT16_HAS_LOWER_CASE; 258 } 259 if (lcase & KICONV_UPPER && ud != toupper(ud)) { 260 table[us] |= (u_char)toupper(ud) << 16; 261 table[us] |= XLAT16_HAS_UPPER_CASE; 262 } 263 break; 264 } 265 266 switch(inbytesleft) { 267 case 0: 268 if ((ls & 0xff) == 0) 269 table[us] |= XLAT16_ACCEPT_NULL_IN; 270 break; 271 case 1: 272 c = ls > 0xff ? us | 0x80 : us; 273 if (lcase & KICONV_FROM_LOWER && c != tolower(c)) { 274 table[us] |= (u_char)tolower(c) << 16; 275 table[us] |= XLAT16_HAS_FROM_LOWER_CASE; 276 } 277 if (lcase & KICONV_FROM_UPPER && c != toupper(c)) { 278 table[us] |= (u_char)toupper(c) << 16; 279 table[us] |= XLAT16_HAS_FROM_UPPER_CASE; 280 } 281 break; 282 } 283 284 if (table[us] == 0) 285 continue; 286 287 /* 288 * store not NULL 289 */ 290 xt.idx[ls] = table; 291 } 292 if (xt.idx[ls]) { 293 memcpy(p, table, sizeof(table)); 294 p += sizeof(table); 295 } 296 } 297 my_iconv_close(cd); 298 299 xt.size = p - (char *)xt.data; 300 xt.data = realloc(xt.data, xt.size); 301 return (xt); 302 } 303 304 static int 305 chklocale(int category, const char *code) 306 { 307 char *p; 308 int error = -1; 309 310 p = strchr(setlocale(category, NULL), '.'); 311 if (p++) { 312 error = strcasecmp(code, p); 313 if (error) { 314 /* XXX - can't avoid calling quirk here... */ 315 error = strcasecmp(code, kiconv_quirkcs(p, 316 KICONV_VENDOR_MICSFT)); 317 } 318 } 319 return (error); 320 } 321 322 #ifdef ICONV_DLOPEN 323 static int 324 my_iconv_init(void) 325 { 326 void *iconv_lib; 327 328 iconv_lib = dlopen("libiconv.so", RTLD_LAZY | RTLD_GLOBAL); 329 if (iconv_lib == NULL) { 330 warn("Unable to load iconv library: %s\n", dlerror()); 331 errno = ENOENT; 332 return (-1); 333 } 334 my_iconv_open = dlsym(iconv_lib, "iconv_open"); 335 my_iconv = dlsym(iconv_lib, "iconv"); 336 my_iconv_close = dlsym(iconv_lib, "iconv_close"); 337 338 return (0); 339 } 340 #endif 341 342 static size_t 343 my_iconv_char(iconv_t cd, u_char **ibuf, size_t * ilen, u_char **obuf, 344 size_t * olen) 345 { 346 u_char *sp, *dp, ilocal[3], olocal[3]; 347 u_char c1, c2; 348 int ret; 349 size_t ir, or; 350 351 sp = *ibuf; 352 dp = *obuf; 353 ir = *ilen; 354 355 bzero(*obuf, *olen); 356 ret = my_iconv(cd, (char **)&sp, ilen, (char **)&dp, olen); 357 c1 = (*obuf)[0]; 358 c2 = (*obuf)[1]; 359 360 if (ret == -1) { 361 if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2)) 362 return (0); 363 else 364 return (-1); 365 } 366 367 /* 368 * We must judge if inbuf is a single byte char or double byte char. 369 * Here, to judge, try first byte(*sp) conversion and compare. 370 */ 371 ir = 1; 372 or = 3; 373 374 bzero(olocal, or); 375 memcpy(ilocal, *ibuf, sizeof(ilocal)); 376 sp = ilocal; 377 dp = olocal; 378 379 if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) { 380 if (olocal[0] != c1) 381 return (ret); 382 383 if (olocal[1] == c2 && (*ibuf)[1] == '\0') { 384 /* 385 * inbuf is a single byte char 386 */ 387 *ilen = 1; 388 *olen = or; 389 return (ret); 390 } 391 392 switch(or) { 393 case 0: 394 case 1: 395 if (olocal[1] == c2) { 396 /* 397 * inbuf is a single byte char, 398 * so return false here. 399 */ 400 return (-1); 401 } else { 402 /* 403 * inbuf is a double byte char 404 */ 405 return (ret); 406 } 407 break; 408 case 2: 409 /* 410 * should compare second byte of inbuf 411 */ 412 break; 413 } 414 } else { 415 /* 416 * inbuf clould not be splitted, so inbuf is 417 * a double byte char. 418 */ 419 return (ret); 420 } 421 422 /* 423 * try second byte(*(sp+1)) conversion, and compare 424 */ 425 ir = 1; 426 or = 3; 427 428 bzero(olocal, or); 429 430 sp = ilocal + 1; 431 dp = olocal; 432 433 if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) { 434 if (olocal[0] == c2) 435 /* 436 * inbuf is a single byte char 437 */ 438 return (-1); 439 } 440 441 return (ret); 442 } 443 444 #else /* statically linked */ 445 446 #include <sys/types.h> 447 #include <sys/iconv.h> 448 #include <errno.h> 449 450 int 451 kiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused, 452 int flag __unused) 453 { 454 455 errno = EINVAL; 456 return (-1); 457 } 458 459 int 460 kiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused) 461 { 462 errno = EINVAL; 463 return (-1); 464 } 465 466 #endif /* PIC */ 467