xref: /freebsd/tools/test/iconv/gnu/gnu.c (revision 53b70c86d93c1e4d3c76f1282e94154e88780d7e)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/endian.h>
31 #include <sys/types.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <iconv.h>
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 static bool uc_hook = false;
42 static bool wc_hook = false;
43 static bool mb_uc_fb = false;
44 
45 void	 unicode_hook(unsigned int mbr, void *data);
46 void	 wchar_hook(wchar_t wc, void *data);
47 
48 void    mb_to_uc_fb(const char *, size_t,
49             void (*write_replacement) (const unsigned int *, size_t, void *),
50             void *, void *);
51 
52 static int
53 ctl_get_translit1(void)
54 {
55 	iconv_t cd;
56 	int arg, ret;
57 
58 	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
59 	if (cd == (iconv_t)-1)
60 		return (-1);
61 	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
62 		ret = (arg == 1) ? 0 : -1;
63 	else
64 		ret = -1;
65 	if (iconv_close(cd) == -1)
66 		return (-1);
67 	return (ret);
68 }
69 
70 static int
71 ctl_get_translit2(void)
72 {
73 	iconv_t cd;
74 	int arg, ret;
75 
76 	cd = iconv_open("ASCII", "UTF-8");
77 	if (cd == (iconv_t)-1)
78 		return (-1);
79 	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
80 		ret = (arg == 0) ? 0 : -1;
81 	else
82 		ret = -1;
83 	if (iconv_close(cd) == -1)
84 		return (-1);
85 	return (ret);
86 }
87 
88 static int
89 ctl_set_translit1(void)
90 {
91 	iconv_t cd;
92 	int arg = 1, ret;
93 
94 	cd = iconv_open("ASCII", "UTF-8");
95 	if (cd == (iconv_t)-1)
96 		return (-1);
97 	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
98 	if (iconv_close(cd) == -1)
99 		return (-1);
100 	return (ret);
101 }
102 
103 static int
104 ctl_set_translit2(void)
105 {
106 	iconv_t cd;
107 	int arg = 0, ret;
108 
109 	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
110 	if (cd == (iconv_t)-1)
111 		return (-1);
112 	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
113 	if (iconv_close(cd) == -1)
114 		return (-1);
115 	return (ret);
116 }
117 
118 static int
119 ctl_get_discard_ilseq1(void)
120 {
121 	iconv_t cd;
122         int arg, ret;
123 
124 	cd = iconv_open("ASCII", "UTF-8");
125 	if (cd == (iconv_t)-1)
126 		return (-1);
127 	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
128 		ret = arg == 0 ? 0 : -1;
129 	else
130 		ret = -1;
131 	if (iconv_close(cd) == -1)
132 		return (-1);
133 	return (ret);
134 }
135 
136 static int
137 ctl_get_discard_ilseq2(void)
138 {
139 	iconv_t cd;
140 	int arg, ret;
141 
142 	cd = iconv_open("ASCII//IGNORE", "UTF-8");
143 	if (cd == (iconv_t)-1)
144 		return (-1);
145 	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
146 		ret = arg == 1 ? 0 : -1;
147 	else
148 		ret = -1;
149 	if (iconv_close(cd) == -1)
150 		return (-1);
151 	return (ret);
152 }
153 
154 static int
155 ctl_set_discard_ilseq1(void)
156 {
157 	iconv_t cd;
158 	int arg = 1, ret;
159 
160 	cd = iconv_open("ASCII", "UTF-8");
161 	if (cd == (iconv_t)-1)
162 		return (-1);
163 	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
164 	if (iconv_close(cd) == -1)
165 		return (-1);
166 	return (ret);
167 }
168 
169 static int
170 ctl_set_discard_ilseq2(void)
171 {
172 	iconv_t cd;
173         int arg = 0, ret;
174 
175 	cd = iconv_open("ASCII//IGNORE", "UTF-8");
176 	if (cd == (iconv_t)-1)
177 	return (-1);
178 	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
179 	if (iconv_close(cd) == -1)
180 		return (-1);
181 	return (ret);
182 }
183 
184 static int
185 ctl_trivialp1(void)
186 {
187 	iconv_t cd;
188         int arg, ret;
189 
190 	cd = iconv_open("latin2", "latin2");
191 	if (cd == (iconv_t)-1)
192 		return (-1);
193 	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
194 		ret = (arg == 1) ? 0 : -1;
195         } else
196                 ret = -1;
197 	if (iconv_close(cd) == -1)
198 		return (-1);
199 	return (ret);
200 }
201 
202 static int
203 ctl_trivialp2(void)
204 {
205 	iconv_t cd;
206 	int arg, ret;
207 
208 	cd = iconv_open("ASCII", "KOI8-R");
209 	if (cd == (iconv_t)-1)
210 		return (-1);
211 	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
212 		ret = (arg == 0) ? 0 : -1;
213 	} else
214 		ret = -1;
215 	if (iconv_close(cd) == -1)
216 		return (-1);
217 	return (ret);
218 }
219 
220 void
221 unicode_hook(unsigned int mbr, void *data)
222 {
223 
224 #ifdef VERBOSE
225 	printf("Unicode hook: %u\n", mbr);
226 #endif
227 	uc_hook = true;
228 }
229 
230 void
231 wchar_hook(wchar_t wc, void *data)
232 {
233 
234 #ifdef VERBOSE
235 	printf("Wchar hook: %ull\n", wc);
236 #endif
237 	wc_hook = true;
238 }
239 
240 static int
241 ctl_uc_hook(void)
242 {
243 	struct iconv_hooks hooks;
244 	iconv_t cd;
245 	size_t inbytesleft = 15, outbytesleft = 40;
246 	char **inptr;
247 	char *s = "Hello World!";
248 	char **outptr;
249 	char *outbuf;
250 
251 	inptr = &s;
252 	hooks.uc_hook = unicode_hook;
253 	hooks.wc_hook = NULL;
254 
255 	outbuf = malloc(40);
256 	outptr = &outbuf;
257 
258 	cd = iconv_open("UTF-8", "ASCII");
259 	if (cd == (iconv_t)-1)
260 		return (-1);
261 	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
262 		return (-1);
263 	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
264 		return (-1);
265 	if (iconv_close(cd) == -1)
266 		return (-1);
267 	return (uc_hook ? 0 : 1);
268 }
269 
270 static int
271 ctl_wc_hook(void)
272 {
273 	struct iconv_hooks hooks;
274 	iconv_t cd;
275 	size_t inbytesleft, outbytesleft = 40;
276 	char **inptr;
277 	char *s = "Hello World!";
278 	char **outptr;
279 	char *outbuf;
280 
281 	inptr = &s;
282 	hooks.wc_hook = wchar_hook;
283 	hooks.uc_hook = NULL;
284 
285 	outbuf = malloc(40);
286 	outptr = &outbuf;
287 	inbytesleft = sizeof(s);
288 
289 	cd = iconv_open("SHIFT_JIS", "ASCII");
290 	if (cd == (iconv_t)-1)
291 		return (-1);
292 	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
293 		return (-1);
294 	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
295 		return (-1);
296 	if (iconv_close(cd) == -1)
297 		return (-1);
298 	return (wc_hook ? 0 : 1);
299 }
300 
301 
302 
303 static int
304 gnu_canonicalize1(void)
305 {
306 
307 	return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
308 }
309 
310 static int
311 gnu_canonicalize2(void)
312 {
313 
314 	return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
315 }
316 
317 
318 static int
319 iconvlist_cb(unsigned int count, const char * const *names, void *data)
320 {
321 
322 	return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
323 }
324 
325 static int
326 gnu_iconvlist(void)
327 {
328 	int i;
329 
330 	iconvlist(iconvlist_cb, (void *)&i);
331 	return (i);
332 }
333 
334 void
335 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
336     void (*write_replacement)(const unsigned int *buf, size_t buflen,
337        void* callback_arg), void* callback_arg, void* data)
338 {
339 	unsigned int c = 0x3F;
340 
341 	mb_uc_fb = true;
342 	write_replacement((const unsigned int *)&c, 1, NULL);
343 }
344 
345 static int __unused
346 ctl_mb_to_uc_fb(void)
347 {
348 	struct iconv_fallbacks fb;
349 	iconv_t cd;
350 	size_t inbytesleft, outbytesleft;
351 	uint16_t inbuf[1] = { 0xF187 };
352 	uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
353 	char *inptr;
354 	char *outptr;
355 	int ret;
356 
357 	if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
358 		return (1);
359 
360 	fb.uc_to_mb_fallback = NULL;
361 	fb.mb_to_wc_fallback = NULL;
362 	fb.wc_to_mb_fallback = NULL;
363 	fb.mb_to_uc_fallback = mb_to_uc_fb;
364 	fb.data = NULL;
365 
366 	if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
367 		return (1);
368 
369 	inptr = (char *)inbuf;
370 	outptr = (char *)outbuf;
371 	inbytesleft = 2;
372 	outbytesleft = 4;
373 
374 	errno = 0;
375 	ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
376 
377 #ifdef VERBOSE
378 	printf("mb_uc fallback: %c\n", outbuf[0]);
379 #endif
380 
381 	if (mb_uc_fb && (outbuf[0] == 0x3F))
382 		return (0);
383 	else
384 		return (1);
385 }
386 
387 static int
388 gnu_openinto(void)
389 {
390 	iconv_allocation_t *myspace;
391 	size_t inbytesleft, outbytesleft;
392 	char *inptr;
393 	char *inbuf = "works!", *outptr;
394 	char outbuf[6];
395 
396 	if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
397 		return (1);
398 	if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
399 		return (1);
400 
401 	inptr = (char *)inbuf;
402 	outptr = (char *)outbuf;
403 	inbytesleft = 6;
404 	outbytesleft = 6;
405 
406 	iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
407 
408 	return ((memcmp(inbuf, outbuf, 6) == 0)	? 0 : 1);
409 }
410 
411 static void
412 test(int (tester) (void), const char * label)
413 {
414 	int ret;
415 
416 	if ((ret = tester()))
417 		printf("%s failed (%d)\n", label, ret);
418 	else
419 		printf("%s succeeded\n", label);
420 }
421 
422 int
423 main(void)
424 {
425 	test(ctl_get_translit1, "ctl_get_translit1");
426 	test(ctl_get_translit2, "ctl_get_translit2");
427 	test(ctl_set_translit1, "ctl_set_translit1");
428 	test(ctl_set_translit2, "ctl_set_translit2");
429 	test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
430 	test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
431 	test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
432 	test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
433 	test(ctl_trivialp1, "ctl_trivialp1");
434 	test(ctl_trivialp2, "ctl_trivialp2");
435 	test(ctl_uc_hook, "ctl_uc_hook");
436 	test(ctl_wc_hook, "ctl_wc_hook");
437 //	test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
438 	test(gnu_openinto, "gnu_openinto");
439 	test(gnu_canonicalize1, "gnu_canonicalize1");
440 	test(gnu_canonicalize2, "gnu_canonicalize2");
441 	test(gnu_iconvlist, "gnu_iconvlist");
442 }
443