1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/endian.h> 31 #include <sys/types.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <iconv.h> 36 #include <stdbool.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 41 static bool uc_hook = false; 42 static bool wc_hook = false; 43 static bool mb_uc_fb = false; 44 45 void unicode_hook(unsigned int mbr, void *data); 46 void wchar_hook(wchar_t wc, void *data); 47 48 void mb_to_uc_fb(const char *, size_t, 49 void (*write_replacement) (const unsigned int *, size_t, void *), 50 void *, void *); 51 52 static int 53 ctl_get_translit1(void) 54 { 55 iconv_t cd; 56 int arg, ret; 57 58 cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 59 if (cd == (iconv_t)-1) 60 return (-1); 61 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 62 ret = (arg == 1) ? 0 : -1; 63 else 64 ret = -1; 65 if (iconv_close(cd) == -1) 66 return (-1); 67 return (ret); 68 } 69 70 static int 71 ctl_get_translit2(void) 72 { 73 iconv_t cd; 74 int arg, ret; 75 76 cd = iconv_open("ASCII", "UTF-8"); 77 if (cd == (iconv_t)-1) 78 return (-1); 79 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 80 ret = (arg == 0) ? 0 : -1; 81 else 82 ret = -1; 83 if (iconv_close(cd) == -1) 84 return (-1); 85 return (ret); 86 } 87 88 static int 89 ctl_set_translit1(void) 90 { 91 iconv_t cd; 92 int arg = 1, ret; 93 94 cd = iconv_open("ASCII", "UTF-8"); 95 if (cd == (iconv_t)-1) 96 return (-1); 97 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 98 if (iconv_close(cd) == -1) 99 return (-1); 100 return (ret); 101 } 102 103 static int 104 ctl_set_translit2(void) 105 { 106 iconv_t cd; 107 int arg = 0, ret; 108 109 cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 110 if (cd == (iconv_t)-1) 111 return (-1); 112 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 113 if (iconv_close(cd) == -1) 114 return (-1); 115 return (ret); 116 } 117 118 static int 119 ctl_get_discard_ilseq1(void) 120 { 121 iconv_t cd; 122 int arg, ret; 123 124 cd = iconv_open("ASCII", "UTF-8"); 125 if (cd == (iconv_t)-1) 126 return (-1); 127 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 128 ret = arg == 0 ? 0 : -1; 129 else 130 ret = -1; 131 if (iconv_close(cd) == -1) 132 return (-1); 133 return (ret); 134 } 135 136 static int 137 ctl_get_discard_ilseq2(void) 138 { 139 iconv_t cd; 140 int arg, ret; 141 142 cd = iconv_open("ASCII//IGNORE", "UTF-8"); 143 if (cd == (iconv_t)-1) 144 return (-1); 145 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 146 ret = arg == 1 ? 0 : -1; 147 else 148 ret = -1; 149 if (iconv_close(cd) == -1) 150 return (-1); 151 return (ret); 152 } 153 154 static int 155 ctl_set_discard_ilseq1(void) 156 { 157 iconv_t cd; 158 int arg = 1, ret; 159 160 cd = iconv_open("ASCII", "UTF-8"); 161 if (cd == (iconv_t)-1) 162 return (-1); 163 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 164 if (iconv_close(cd) == -1) 165 return (-1); 166 return (ret); 167 } 168 169 static int 170 ctl_set_discard_ilseq2(void) 171 { 172 iconv_t cd; 173 int arg = 0, ret; 174 175 cd = iconv_open("ASCII//IGNORE", "UTF-8"); 176 if (cd == (iconv_t)-1) 177 return (-1); 178 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 179 if (iconv_close(cd) == -1) 180 return (-1); 181 return (ret); 182 } 183 184 static int 185 ctl_trivialp1(void) 186 { 187 iconv_t cd; 188 int arg, ret; 189 190 cd = iconv_open("latin2", "latin2"); 191 if (cd == (iconv_t)-1) 192 return (-1); 193 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 194 ret = (arg == 1) ? 0 : -1; 195 } else 196 ret = -1; 197 if (iconv_close(cd) == -1) 198 return (-1); 199 return (ret); 200 } 201 202 static int 203 ctl_trivialp2(void) 204 { 205 iconv_t cd; 206 int arg, ret; 207 208 cd = iconv_open("ASCII", "KOI8-R"); 209 if (cd == (iconv_t)-1) 210 return (-1); 211 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 212 ret = (arg == 0) ? 0 : -1; 213 } else 214 ret = -1; 215 if (iconv_close(cd) == -1) 216 return (-1); 217 return (ret); 218 } 219 220 void 221 unicode_hook(unsigned int mbr, void *data) 222 { 223 224 #ifdef VERBOSE 225 printf("Unicode hook: %u\n", mbr); 226 #endif 227 uc_hook = true; 228 } 229 230 void 231 wchar_hook(wchar_t wc, void *data) 232 { 233 234 #ifdef VERBOSE 235 printf("Wchar hook: %ull\n", wc); 236 #endif 237 wc_hook = true; 238 } 239 240 static int 241 ctl_uc_hook(void) 242 { 243 struct iconv_hooks hooks; 244 iconv_t cd; 245 size_t inbytesleft = 15, outbytesleft = 40; 246 char **inptr; 247 char *s = "Hello World!"; 248 char **outptr; 249 char *outbuf; 250 251 inptr = &s; 252 hooks.uc_hook = unicode_hook; 253 hooks.wc_hook = NULL; 254 255 outbuf = malloc(40); 256 outptr = &outbuf; 257 258 cd = iconv_open("UTF-8", "ASCII"); 259 if (cd == (iconv_t)-1) 260 return (-1); 261 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 262 return (-1); 263 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 264 return (-1); 265 if (iconv_close(cd) == -1) 266 return (-1); 267 return (uc_hook ? 0 : 1); 268 } 269 270 static int 271 ctl_wc_hook(void) 272 { 273 struct iconv_hooks hooks; 274 iconv_t cd; 275 size_t inbytesleft, outbytesleft = 40; 276 char **inptr; 277 char *s = "Hello World!"; 278 char **outptr; 279 char *outbuf; 280 281 inptr = &s; 282 hooks.wc_hook = wchar_hook; 283 hooks.uc_hook = NULL; 284 285 outbuf = malloc(40); 286 outptr = &outbuf; 287 inbytesleft = sizeof(s); 288 289 cd = iconv_open("SHIFT_JIS", "ASCII"); 290 if (cd == (iconv_t)-1) 291 return (-1); 292 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 293 return (-1); 294 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 295 return (-1); 296 if (iconv_close(cd) == -1) 297 return (-1); 298 return (wc_hook ? 0 : 1); 299 } 300 301 302 303 static int 304 gnu_canonicalize1(void) 305 { 306 307 return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2")); 308 } 309 310 static int 311 gnu_canonicalize2(void) 312 { 313 314 return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2"))); 315 } 316 317 318 static int 319 iconvlist_cb(unsigned int count, const char * const *names, void *data) 320 { 321 322 return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0); 323 } 324 325 static int 326 gnu_iconvlist(void) 327 { 328 int i; 329 330 iconvlist(iconvlist_cb, (void *)&i); 331 return (i); 332 } 333 334 void 335 mb_to_uc_fb(const char* inbuf, size_t inbufsize, 336 void (*write_replacement)(const unsigned int *buf, size_t buflen, 337 void* callback_arg), void* callback_arg, void* data) 338 { 339 unsigned int c = 0x3F; 340 341 mb_uc_fb = true; 342 write_replacement((const unsigned int *)&c, 1, NULL); 343 } 344 345 static int __unused 346 ctl_mb_to_uc_fb(void) 347 { 348 struct iconv_fallbacks fb; 349 iconv_t cd; 350 size_t inbytesleft, outbytesleft; 351 uint16_t inbuf[1] = { 0xF187 }; 352 uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 }; 353 char *inptr; 354 char *outptr; 355 int ret; 356 357 if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1) 358 return (1); 359 360 fb.uc_to_mb_fallback = NULL; 361 fb.mb_to_wc_fallback = NULL; 362 fb.wc_to_mb_fallback = NULL; 363 fb.mb_to_uc_fallback = mb_to_uc_fb; 364 fb.data = NULL; 365 366 if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0) 367 return (1); 368 369 inptr = (char *)inbuf; 370 outptr = (char *)outbuf; 371 inbytesleft = 2; 372 outbytesleft = 4; 373 374 errno = 0; 375 ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); 376 377 #ifdef VERBOSE 378 printf("mb_uc fallback: %c\n", outbuf[0]); 379 #endif 380 381 if (mb_uc_fb && (outbuf[0] == 0x3F)) 382 return (0); 383 else 384 return (1); 385 } 386 387 static int 388 gnu_openinto(void) 389 { 390 iconv_allocation_t *myspace; 391 size_t inbytesleft, outbytesleft; 392 char *inptr; 393 char *inbuf = "works!", *outptr; 394 char outbuf[6]; 395 396 if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL) 397 return (1); 398 if (iconv_open_into("ASCII", "ASCII", myspace) == -1) 399 return (1); 400 401 inptr = (char *)inbuf; 402 outptr = (char *)outbuf; 403 inbytesleft = 6; 404 outbytesleft = 6; 405 406 iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft); 407 408 return ((memcmp(inbuf, outbuf, 6) == 0) ? 0 : 1); 409 } 410 411 static void 412 test(int (tester) (void), const char * label) 413 { 414 int ret; 415 416 if ((ret = tester())) 417 printf("%s failed (%d)\n", label, ret); 418 else 419 printf("%s succeeded\n", label); 420 } 421 422 int 423 main(void) 424 { 425 test(ctl_get_translit1, "ctl_get_translit1"); 426 test(ctl_get_translit2, "ctl_get_translit2"); 427 test(ctl_set_translit1, "ctl_set_translit1"); 428 test(ctl_set_translit2, "ctl_set_translit2"); 429 test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1"); 430 test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2"); 431 test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1"); 432 test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2"); 433 test(ctl_trivialp1, "ctl_trivialp1"); 434 test(ctl_trivialp2, "ctl_trivialp2"); 435 test(ctl_uc_hook, "ctl_uc_hook"); 436 test(ctl_wc_hook, "ctl_wc_hook"); 437 // test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb"); 438 test(gnu_openinto, "gnu_openinto"); 439 test(gnu_canonicalize1, "gnu_canonicalize1"); 440 test(gnu_canonicalize2, "gnu_canonicalize2"); 441 test(gnu_iconvlist, "gnu_iconvlist"); 442 } 443