1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 #include <sys/endian.h>
29 #include <sys/types.h>
30
31 #include <err.h>
32 #include <errno.h>
33 #include <iconv.h>
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 static bool uc_hook = false;
40 static bool wc_hook = false;
41 static bool mb_uc_fb = false;
42
43 void unicode_hook(unsigned int mbr, void *data);
44 void wchar_hook(wchar_t wc, void *data);
45
46 void mb_to_uc_fb(const char *, size_t,
47 void (*write_replacement) (const unsigned int *, size_t, void *),
48 void *, void *);
49
50 static int
ctl_get_translit1(void)51 ctl_get_translit1(void)
52 {
53 iconv_t cd;
54 int arg, ret;
55
56 cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
57 if (cd == (iconv_t)-1)
58 return (-1);
59 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
60 ret = (arg == 1) ? 0 : -1;
61 else
62 ret = -1;
63 if (iconv_close(cd) == -1)
64 return (-1);
65 return (ret);
66 }
67
68 static int
ctl_get_translit2(void)69 ctl_get_translit2(void)
70 {
71 iconv_t cd;
72 int arg, ret;
73
74 cd = iconv_open("ASCII", "UTF-8");
75 if (cd == (iconv_t)-1)
76 return (-1);
77 if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
78 ret = (arg == 0) ? 0 : -1;
79 else
80 ret = -1;
81 if (iconv_close(cd) == -1)
82 return (-1);
83 return (ret);
84 }
85
86 static int
ctl_set_translit1(void)87 ctl_set_translit1(void)
88 {
89 iconv_t cd;
90 int arg = 1, ret;
91
92 cd = iconv_open("ASCII", "UTF-8");
93 if (cd == (iconv_t)-1)
94 return (-1);
95 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
96 if (iconv_close(cd) == -1)
97 return (-1);
98 return (ret);
99 }
100
101 static int
ctl_set_translit2(void)102 ctl_set_translit2(void)
103 {
104 iconv_t cd;
105 int arg = 0, ret;
106
107 cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
108 if (cd == (iconv_t)-1)
109 return (-1);
110 ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
111 if (iconv_close(cd) == -1)
112 return (-1);
113 return (ret);
114 }
115
116 static int
ctl_get_discard_ilseq1(void)117 ctl_get_discard_ilseq1(void)
118 {
119 iconv_t cd;
120 int arg, ret;
121
122 cd = iconv_open("ASCII", "UTF-8");
123 if (cd == (iconv_t)-1)
124 return (-1);
125 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
126 ret = arg == 0 ? 0 : -1;
127 else
128 ret = -1;
129 if (iconv_close(cd) == -1)
130 return (-1);
131 return (ret);
132 }
133
134 static int
ctl_get_discard_ilseq2(void)135 ctl_get_discard_ilseq2(void)
136 {
137 iconv_t cd;
138 int arg, ret;
139
140 cd = iconv_open("ASCII//IGNORE", "UTF-8");
141 if (cd == (iconv_t)-1)
142 return (-1);
143 if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
144 ret = arg == 1 ? 0 : -1;
145 else
146 ret = -1;
147 if (iconv_close(cd) == -1)
148 return (-1);
149 return (ret);
150 }
151
152 static int
ctl_set_discard_ilseq1(void)153 ctl_set_discard_ilseq1(void)
154 {
155 iconv_t cd;
156 int arg = 1, ret;
157
158 cd = iconv_open("ASCII", "UTF-8");
159 if (cd == (iconv_t)-1)
160 return (-1);
161 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
162 if (iconv_close(cd) == -1)
163 return (-1);
164 return (ret);
165 }
166
167 static int
ctl_set_discard_ilseq2(void)168 ctl_set_discard_ilseq2(void)
169 {
170 iconv_t cd;
171 int arg = 0, ret;
172
173 cd = iconv_open("ASCII//IGNORE", "UTF-8");
174 if (cd == (iconv_t)-1)
175 return (-1);
176 ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
177 if (iconv_close(cd) == -1)
178 return (-1);
179 return (ret);
180 }
181
182 static int
ctl_trivialp1(void)183 ctl_trivialp1(void)
184 {
185 iconv_t cd;
186 int arg, ret;
187
188 cd = iconv_open("latin2", "latin2");
189 if (cd == (iconv_t)-1)
190 return (-1);
191 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
192 ret = (arg == 1) ? 0 : -1;
193 } else
194 ret = -1;
195 if (iconv_close(cd) == -1)
196 return (-1);
197 return (ret);
198 }
199
200 static int
ctl_trivialp2(void)201 ctl_trivialp2(void)
202 {
203 iconv_t cd;
204 int arg, ret;
205
206 cd = iconv_open("ASCII", "KOI8-R");
207 if (cd == (iconv_t)-1)
208 return (-1);
209 if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
210 ret = (arg == 0) ? 0 : -1;
211 } else
212 ret = -1;
213 if (iconv_close(cd) == -1)
214 return (-1);
215 return (ret);
216 }
217
218 void
unicode_hook(unsigned int mbr,void * data)219 unicode_hook(unsigned int mbr, void *data)
220 {
221
222 #ifdef VERBOSE
223 printf("Unicode hook: %u\n", mbr);
224 #endif
225 uc_hook = true;
226 }
227
228 void
wchar_hook(wchar_t wc,void * data)229 wchar_hook(wchar_t wc, void *data)
230 {
231
232 #ifdef VERBOSE
233 printf("Wchar hook: %ull\n", wc);
234 #endif
235 wc_hook = true;
236 }
237
238 static int
ctl_uc_hook(void)239 ctl_uc_hook(void)
240 {
241 struct iconv_hooks hooks;
242 iconv_t cd;
243 size_t inbytesleft = 15, outbytesleft = 40;
244 char **inptr;
245 char *s = "Hello World!";
246 char **outptr;
247 char *outbuf;
248
249 inptr = &s;
250 hooks.uc_hook = unicode_hook;
251 hooks.wc_hook = NULL;
252
253 outbuf = malloc(40);
254 outptr = &outbuf;
255
256 cd = iconv_open("UTF-8", "ASCII");
257 if (cd == (iconv_t)-1)
258 return (-1);
259 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
260 return (-1);
261 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
262 return (-1);
263 if (iconv_close(cd) == -1)
264 return (-1);
265 return (uc_hook ? 0 : 1);
266 }
267
268 static int
ctl_wc_hook(void)269 ctl_wc_hook(void)
270 {
271 struct iconv_hooks hooks;
272 iconv_t cd;
273 size_t inbytesleft, outbytesleft = 40;
274 char **inptr;
275 char *s = "Hello World!";
276 char **outptr;
277 char *outbuf;
278
279 inptr = &s;
280 hooks.wc_hook = wchar_hook;
281 hooks.uc_hook = NULL;
282
283 outbuf = malloc(40);
284 outptr = &outbuf;
285 inbytesleft = sizeof(s);
286
287 cd = iconv_open("SHIFT_JIS", "ASCII");
288 if (cd == (iconv_t)-1)
289 return (-1);
290 if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
291 return (-1);
292 if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
293 return (-1);
294 if (iconv_close(cd) == -1)
295 return (-1);
296 return (wc_hook ? 0 : 1);
297 }
298
299
300
301 static int
gnu_canonicalize1(void)302 gnu_canonicalize1(void)
303 {
304
305 return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
306 }
307
308 static int
gnu_canonicalize2(void)309 gnu_canonicalize2(void)
310 {
311
312 return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
313 }
314
315
316 static int
iconvlist_cb(unsigned int count,const char * const * names,void * data)317 iconvlist_cb(unsigned int count, const char * const *names, void *data)
318 {
319
320 return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
321 }
322
323 static int
gnu_iconvlist(void)324 gnu_iconvlist(void)
325 {
326 int i;
327
328 iconvlist(iconvlist_cb, (void *)&i);
329 return (i);
330 }
331
332 void
mb_to_uc_fb(const char * inbuf,size_t inbufsize,void (* write_replacement)(const unsigned int * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)333 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
334 void (*write_replacement)(const unsigned int *buf, size_t buflen,
335 void* callback_arg), void* callback_arg, void* data)
336 {
337 unsigned int c = 0x3F;
338
339 mb_uc_fb = true;
340 write_replacement((const unsigned int *)&c, 1, NULL);
341 }
342
343 static int __unused
ctl_mb_to_uc_fb(void)344 ctl_mb_to_uc_fb(void)
345 {
346 struct iconv_fallbacks fb;
347 iconv_t cd;
348 size_t inbytesleft, outbytesleft;
349 uint16_t inbuf[1] = { 0xF187 };
350 uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
351 char *inptr;
352 char *outptr;
353 int ret;
354
355 if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
356 return (1);
357
358 fb.uc_to_mb_fallback = NULL;
359 fb.mb_to_wc_fallback = NULL;
360 fb.wc_to_mb_fallback = NULL;
361 fb.mb_to_uc_fallback = mb_to_uc_fb;
362 fb.data = NULL;
363
364 if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
365 return (1);
366
367 inptr = (char *)inbuf;
368 outptr = (char *)outbuf;
369 inbytesleft = 2;
370 outbytesleft = 4;
371
372 errno = 0;
373 ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
374
375 #ifdef VERBOSE
376 printf("mb_uc fallback: %c\n", outbuf[0]);
377 #endif
378
379 if (mb_uc_fb && (outbuf[0] == 0x3F))
380 return (0);
381 else
382 return (1);
383 }
384
385 static int
gnu_openinto(void)386 gnu_openinto(void)
387 {
388 iconv_allocation_t *myspace;
389 size_t inbytesleft, outbytesleft;
390 char *inptr;
391 char *inbuf = "works!", *outptr;
392 char outbuf[6];
393
394 if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
395 return (1);
396 if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
397 return (1);
398
399 inptr = (char *)inbuf;
400 outptr = (char *)outbuf;
401 inbytesleft = 6;
402 outbytesleft = 6;
403
404 iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
405
406 return ((memcmp(inbuf, outbuf, 6) == 0) ? 0 : 1);
407 }
408
409 static void
test(int (tester)(void),const char * label)410 test(int (tester) (void), const char * label)
411 {
412 int ret;
413
414 if ((ret = tester()))
415 printf("%s failed (%d)\n", label, ret);
416 else
417 printf("%s succeeded\n", label);
418 }
419
420 int
main(void)421 main(void)
422 {
423 test(ctl_get_translit1, "ctl_get_translit1");
424 test(ctl_get_translit2, "ctl_get_translit2");
425 test(ctl_set_translit1, "ctl_set_translit1");
426 test(ctl_set_translit2, "ctl_set_translit2");
427 test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
428 test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
429 test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
430 test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
431 test(ctl_trivialp1, "ctl_trivialp1");
432 test(ctl_trivialp2, "ctl_trivialp2");
433 test(ctl_uc_hook, "ctl_uc_hook");
434 test(ctl_wc_hook, "ctl_wc_hook");
435 // test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
436 test(gnu_openinto, "gnu_openinto");
437 test(gnu_canonicalize1, "gnu_canonicalize1");
438 test(gnu_canonicalize2, "gnu_canonicalize2");
439 test(gnu_iconvlist, "gnu_iconvlist");
440 }
441