1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/endian.h> 29 #include <sys/types.h> 30 31 #include <err.h> 32 #include <errno.h> 33 #include <iconv.h> 34 #include <stdbool.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 static bool uc_hook = false; 40 static bool wc_hook = false; 41 static bool mb_uc_fb = false; 42 43 void unicode_hook(unsigned int mbr, void *data); 44 void wchar_hook(wchar_t wc, void *data); 45 46 void mb_to_uc_fb(const char *, size_t, 47 void (*write_replacement) (const unsigned int *, size_t, void *), 48 void *, void *); 49 50 static int 51 ctl_get_translit1(void) 52 { 53 iconv_t cd; 54 int arg, ret; 55 56 cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 57 if (cd == (iconv_t)-1) 58 return (-1); 59 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 60 ret = (arg == 1) ? 0 : -1; 61 else 62 ret = -1; 63 if (iconv_close(cd) == -1) 64 return (-1); 65 return (ret); 66 } 67 68 static int 69 ctl_get_translit2(void) 70 { 71 iconv_t cd; 72 int arg, ret; 73 74 cd = iconv_open("ASCII", "UTF-8"); 75 if (cd == (iconv_t)-1) 76 return (-1); 77 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 78 ret = (arg == 0) ? 0 : -1; 79 else 80 ret = -1; 81 if (iconv_close(cd) == -1) 82 return (-1); 83 return (ret); 84 } 85 86 static int 87 ctl_set_translit1(void) 88 { 89 iconv_t cd; 90 int arg = 1, ret; 91 92 cd = iconv_open("ASCII", "UTF-8"); 93 if (cd == (iconv_t)-1) 94 return (-1); 95 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 96 if (iconv_close(cd) == -1) 97 return (-1); 98 return (ret); 99 } 100 101 static int 102 ctl_set_translit2(void) 103 { 104 iconv_t cd; 105 int arg = 0, ret; 106 107 cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 108 if (cd == (iconv_t)-1) 109 return (-1); 110 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 111 if (iconv_close(cd) == -1) 112 return (-1); 113 return (ret); 114 } 115 116 static int 117 ctl_get_discard_ilseq1(void) 118 { 119 iconv_t cd; 120 int arg, ret; 121 122 cd = iconv_open("ASCII", "UTF-8"); 123 if (cd == (iconv_t)-1) 124 return (-1); 125 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 126 ret = arg == 0 ? 0 : -1; 127 else 128 ret = -1; 129 if (iconv_close(cd) == -1) 130 return (-1); 131 return (ret); 132 } 133 134 static int 135 ctl_get_discard_ilseq2(void) 136 { 137 iconv_t cd; 138 int arg, ret; 139 140 cd = iconv_open("ASCII//IGNORE", "UTF-8"); 141 if (cd == (iconv_t)-1) 142 return (-1); 143 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 144 ret = arg == 1 ? 0 : -1; 145 else 146 ret = -1; 147 if (iconv_close(cd) == -1) 148 return (-1); 149 return (ret); 150 } 151 152 static int 153 ctl_set_discard_ilseq1(void) 154 { 155 iconv_t cd; 156 int arg = 1, ret; 157 158 cd = iconv_open("ASCII", "UTF-8"); 159 if (cd == (iconv_t)-1) 160 return (-1); 161 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 162 if (iconv_close(cd) == -1) 163 return (-1); 164 return (ret); 165 } 166 167 static int 168 ctl_set_discard_ilseq2(void) 169 { 170 iconv_t cd; 171 int arg = 0, ret; 172 173 cd = iconv_open("ASCII//IGNORE", "UTF-8"); 174 if (cd == (iconv_t)-1) 175 return (-1); 176 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 177 if (iconv_close(cd) == -1) 178 return (-1); 179 return (ret); 180 } 181 182 static int 183 ctl_trivialp1(void) 184 { 185 iconv_t cd; 186 int arg, ret; 187 188 cd = iconv_open("latin2", "latin2"); 189 if (cd == (iconv_t)-1) 190 return (-1); 191 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 192 ret = (arg == 1) ? 0 : -1; 193 } else 194 ret = -1; 195 if (iconv_close(cd) == -1) 196 return (-1); 197 return (ret); 198 } 199 200 static int 201 ctl_trivialp2(void) 202 { 203 iconv_t cd; 204 int arg, ret; 205 206 cd = iconv_open("ASCII", "KOI8-R"); 207 if (cd == (iconv_t)-1) 208 return (-1); 209 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 210 ret = (arg == 0) ? 0 : -1; 211 } else 212 ret = -1; 213 if (iconv_close(cd) == -1) 214 return (-1); 215 return (ret); 216 } 217 218 void 219 unicode_hook(unsigned int mbr, void *data) 220 { 221 222 #ifdef VERBOSE 223 printf("Unicode hook: %u\n", mbr); 224 #endif 225 uc_hook = true; 226 } 227 228 void 229 wchar_hook(wchar_t wc, void *data) 230 { 231 232 #ifdef VERBOSE 233 printf("Wchar hook: %ull\n", wc); 234 #endif 235 wc_hook = true; 236 } 237 238 static int 239 ctl_uc_hook(void) 240 { 241 struct iconv_hooks hooks; 242 iconv_t cd; 243 size_t inbytesleft = 15, outbytesleft = 40; 244 char **inptr; 245 char *s = "Hello World!"; 246 char **outptr; 247 char *outbuf; 248 249 inptr = &s; 250 hooks.uc_hook = unicode_hook; 251 hooks.wc_hook = NULL; 252 253 outbuf = malloc(40); 254 outptr = &outbuf; 255 256 cd = iconv_open("UTF-8", "ASCII"); 257 if (cd == (iconv_t)-1) 258 return (-1); 259 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 260 return (-1); 261 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 262 return (-1); 263 if (iconv_close(cd) == -1) 264 return (-1); 265 return (uc_hook ? 0 : 1); 266 } 267 268 static int 269 ctl_wc_hook(void) 270 { 271 struct iconv_hooks hooks; 272 iconv_t cd; 273 size_t inbytesleft, outbytesleft = 40; 274 char **inptr; 275 char *s = "Hello World!"; 276 char **outptr; 277 char *outbuf; 278 279 inptr = &s; 280 hooks.wc_hook = wchar_hook; 281 hooks.uc_hook = NULL; 282 283 outbuf = malloc(40); 284 outptr = &outbuf; 285 inbytesleft = sizeof(s); 286 287 cd = iconv_open("SHIFT_JIS", "ASCII"); 288 if (cd == (iconv_t)-1) 289 return (-1); 290 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 291 return (-1); 292 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 293 return (-1); 294 if (iconv_close(cd) == -1) 295 return (-1); 296 return (wc_hook ? 0 : 1); 297 } 298 299 300 301 static int 302 gnu_canonicalize1(void) 303 { 304 305 return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2")); 306 } 307 308 static int 309 gnu_canonicalize2(void) 310 { 311 312 return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2"))); 313 } 314 315 316 static int 317 iconvlist_cb(unsigned int count, const char * const *names, void *data) 318 { 319 320 return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0); 321 } 322 323 static int 324 gnu_iconvlist(void) 325 { 326 int i; 327 328 iconvlist(iconvlist_cb, (void *)&i); 329 return (i); 330 } 331 332 void 333 mb_to_uc_fb(const char* inbuf, size_t inbufsize, 334 void (*write_replacement)(const unsigned int *buf, size_t buflen, 335 void* callback_arg), void* callback_arg, void* data) 336 { 337 unsigned int c = 0x3F; 338 339 mb_uc_fb = true; 340 write_replacement((const unsigned int *)&c, 1, NULL); 341 } 342 343 static int __unused 344 ctl_mb_to_uc_fb(void) 345 { 346 struct iconv_fallbacks fb; 347 iconv_t cd; 348 size_t inbytesleft, outbytesleft; 349 uint16_t inbuf[1] = { 0xF187 }; 350 uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 }; 351 char *inptr; 352 char *outptr; 353 int ret; 354 355 if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1) 356 return (1); 357 358 fb.uc_to_mb_fallback = NULL; 359 fb.mb_to_wc_fallback = NULL; 360 fb.wc_to_mb_fallback = NULL; 361 fb.mb_to_uc_fallback = mb_to_uc_fb; 362 fb.data = NULL; 363 364 if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0) 365 return (1); 366 367 inptr = (char *)inbuf; 368 outptr = (char *)outbuf; 369 inbytesleft = 2; 370 outbytesleft = 4; 371 372 errno = 0; 373 ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); 374 375 #ifdef VERBOSE 376 printf("mb_uc fallback: %c\n", outbuf[0]); 377 #endif 378 379 if (mb_uc_fb && (outbuf[0] == 0x3F)) 380 return (0); 381 else 382 return (1); 383 } 384 385 static int 386 gnu_openinto(void) 387 { 388 iconv_allocation_t *myspace; 389 size_t inbytesleft, outbytesleft; 390 char *inptr; 391 char *inbuf = "works!", *outptr; 392 char outbuf[6]; 393 394 if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL) 395 return (1); 396 if (iconv_open_into("ASCII", "ASCII", myspace) == -1) 397 return (1); 398 399 inptr = (char *)inbuf; 400 outptr = (char *)outbuf; 401 inbytesleft = 6; 402 outbytesleft = 6; 403 404 iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft); 405 406 return ((memcmp(inbuf, outbuf, 6) == 0) ? 0 : 1); 407 } 408 409 static void 410 test(int (tester) (void), const char * label) 411 { 412 int ret; 413 414 if ((ret = tester())) 415 printf("%s failed (%d)\n", label, ret); 416 else 417 printf("%s succeeded\n", label); 418 } 419 420 int 421 main(void) 422 { 423 test(ctl_get_translit1, "ctl_get_translit1"); 424 test(ctl_get_translit2, "ctl_get_translit2"); 425 test(ctl_set_translit1, "ctl_set_translit1"); 426 test(ctl_set_translit2, "ctl_set_translit2"); 427 test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1"); 428 test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2"); 429 test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1"); 430 test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2"); 431 test(ctl_trivialp1, "ctl_trivialp1"); 432 test(ctl_trivialp2, "ctl_trivialp2"); 433 test(ctl_uc_hook, "ctl_uc_hook"); 434 test(ctl_wc_hook, "ctl_wc_hook"); 435 // test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb"); 436 test(gnu_openinto, "gnu_openinto"); 437 test(gnu_canonicalize1, "gnu_canonicalize1"); 438 test(gnu_canonicalize2, "gnu_canonicalize2"); 439 test(gnu_iconvlist, "gnu_iconvlist"); 440 } 441