xref: /freebsd/tools/test/iconv/gnu/gnu.c (revision 1d386b48a555f61cb7325543adbbb5c3f3407a66)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/endian.h>
29 #include <sys/types.h>
30 
31 #include <err.h>
32 #include <errno.h>
33 #include <iconv.h>
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 static bool uc_hook = false;
40 static bool wc_hook = false;
41 static bool mb_uc_fb = false;
42 
43 void	 unicode_hook(unsigned int mbr, void *data);
44 void	 wchar_hook(wchar_t wc, void *data);
45 
46 void    mb_to_uc_fb(const char *, size_t,
47             void (*write_replacement) (const unsigned int *, size_t, void *),
48             void *, void *);
49 
50 static int
ctl_get_translit1(void)51 ctl_get_translit1(void)
52 {
53 	iconv_t cd;
54 	int arg, ret;
55 
56 	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
57 	if (cd == (iconv_t)-1)
58 		return (-1);
59 	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
60 		ret = (arg == 1) ? 0 : -1;
61 	else
62 		ret = -1;
63 	if (iconv_close(cd) == -1)
64 		return (-1);
65 	return (ret);
66 }
67 
68 static int
ctl_get_translit2(void)69 ctl_get_translit2(void)
70 {
71 	iconv_t cd;
72 	int arg, ret;
73 
74 	cd = iconv_open("ASCII", "UTF-8");
75 	if (cd == (iconv_t)-1)
76 		return (-1);
77 	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
78 		ret = (arg == 0) ? 0 : -1;
79 	else
80 		ret = -1;
81 	if (iconv_close(cd) == -1)
82 		return (-1);
83 	return (ret);
84 }
85 
86 static int
ctl_set_translit1(void)87 ctl_set_translit1(void)
88 {
89 	iconv_t cd;
90 	int arg = 1, ret;
91 
92 	cd = iconv_open("ASCII", "UTF-8");
93 	if (cd == (iconv_t)-1)
94 		return (-1);
95 	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
96 	if (iconv_close(cd) == -1)
97 		return (-1);
98 	return (ret);
99 }
100 
101 static int
ctl_set_translit2(void)102 ctl_set_translit2(void)
103 {
104 	iconv_t cd;
105 	int arg = 0, ret;
106 
107 	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
108 	if (cd == (iconv_t)-1)
109 		return (-1);
110 	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
111 	if (iconv_close(cd) == -1)
112 		return (-1);
113 	return (ret);
114 }
115 
116 static int
ctl_get_discard_ilseq1(void)117 ctl_get_discard_ilseq1(void)
118 {
119 	iconv_t cd;
120         int arg, ret;
121 
122 	cd = iconv_open("ASCII", "UTF-8");
123 	if (cd == (iconv_t)-1)
124 		return (-1);
125 	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
126 		ret = arg == 0 ? 0 : -1;
127 	else
128 		ret = -1;
129 	if (iconv_close(cd) == -1)
130 		return (-1);
131 	return (ret);
132 }
133 
134 static int
ctl_get_discard_ilseq2(void)135 ctl_get_discard_ilseq2(void)
136 {
137 	iconv_t cd;
138 	int arg, ret;
139 
140 	cd = iconv_open("ASCII//IGNORE", "UTF-8");
141 	if (cd == (iconv_t)-1)
142 		return (-1);
143 	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
144 		ret = arg == 1 ? 0 : -1;
145 	else
146 		ret = -1;
147 	if (iconv_close(cd) == -1)
148 		return (-1);
149 	return (ret);
150 }
151 
152 static int
ctl_set_discard_ilseq1(void)153 ctl_set_discard_ilseq1(void)
154 {
155 	iconv_t cd;
156 	int arg = 1, ret;
157 
158 	cd = iconv_open("ASCII", "UTF-8");
159 	if (cd == (iconv_t)-1)
160 		return (-1);
161 	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
162 	if (iconv_close(cd) == -1)
163 		return (-1);
164 	return (ret);
165 }
166 
167 static int
ctl_set_discard_ilseq2(void)168 ctl_set_discard_ilseq2(void)
169 {
170 	iconv_t cd;
171         int arg = 0, ret;
172 
173 	cd = iconv_open("ASCII//IGNORE", "UTF-8");
174 	if (cd == (iconv_t)-1)
175 	return (-1);
176 	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
177 	if (iconv_close(cd) == -1)
178 		return (-1);
179 	return (ret);
180 }
181 
182 static int
ctl_trivialp1(void)183 ctl_trivialp1(void)
184 {
185 	iconv_t cd;
186         int arg, ret;
187 
188 	cd = iconv_open("latin2", "latin2");
189 	if (cd == (iconv_t)-1)
190 		return (-1);
191 	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
192 		ret = (arg == 1) ? 0 : -1;
193         } else
194                 ret = -1;
195 	if (iconv_close(cd) == -1)
196 		return (-1);
197 	return (ret);
198 }
199 
200 static int
ctl_trivialp2(void)201 ctl_trivialp2(void)
202 {
203 	iconv_t cd;
204 	int arg, ret;
205 
206 	cd = iconv_open("ASCII", "KOI8-R");
207 	if (cd == (iconv_t)-1)
208 		return (-1);
209 	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
210 		ret = (arg == 0) ? 0 : -1;
211 	} else
212 		ret = -1;
213 	if (iconv_close(cd) == -1)
214 		return (-1);
215 	return (ret);
216 }
217 
218 void
unicode_hook(unsigned int mbr,void * data)219 unicode_hook(unsigned int mbr, void *data)
220 {
221 
222 #ifdef VERBOSE
223 	printf("Unicode hook: %u\n", mbr);
224 #endif
225 	uc_hook = true;
226 }
227 
228 void
wchar_hook(wchar_t wc,void * data)229 wchar_hook(wchar_t wc, void *data)
230 {
231 
232 #ifdef VERBOSE
233 	printf("Wchar hook: %ull\n", wc);
234 #endif
235 	wc_hook = true;
236 }
237 
238 static int
ctl_uc_hook(void)239 ctl_uc_hook(void)
240 {
241 	struct iconv_hooks hooks;
242 	iconv_t cd;
243 	size_t inbytesleft = 15, outbytesleft = 40;
244 	char **inptr;
245 	char *s = "Hello World!";
246 	char **outptr;
247 	char *outbuf;
248 
249 	inptr = &s;
250 	hooks.uc_hook = unicode_hook;
251 	hooks.wc_hook = NULL;
252 
253 	outbuf = malloc(40);
254 	outptr = &outbuf;
255 
256 	cd = iconv_open("UTF-8", "ASCII");
257 	if (cd == (iconv_t)-1)
258 		return (-1);
259 	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
260 		return (-1);
261 	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
262 		return (-1);
263 	if (iconv_close(cd) == -1)
264 		return (-1);
265 	return (uc_hook ? 0 : 1);
266 }
267 
268 static int
ctl_wc_hook(void)269 ctl_wc_hook(void)
270 {
271 	struct iconv_hooks hooks;
272 	iconv_t cd;
273 	size_t inbytesleft, outbytesleft = 40;
274 	char **inptr;
275 	char *s = "Hello World!";
276 	char **outptr;
277 	char *outbuf;
278 
279 	inptr = &s;
280 	hooks.wc_hook = wchar_hook;
281 	hooks.uc_hook = NULL;
282 
283 	outbuf = malloc(40);
284 	outptr = &outbuf;
285 	inbytesleft = sizeof(s);
286 
287 	cd = iconv_open("SHIFT_JIS", "ASCII");
288 	if (cd == (iconv_t)-1)
289 		return (-1);
290 	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
291 		return (-1);
292 	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
293 		return (-1);
294 	if (iconv_close(cd) == -1)
295 		return (-1);
296 	return (wc_hook ? 0 : 1);
297 }
298 
299 
300 
301 static int
gnu_canonicalize1(void)302 gnu_canonicalize1(void)
303 {
304 
305 	return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
306 }
307 
308 static int
gnu_canonicalize2(void)309 gnu_canonicalize2(void)
310 {
311 
312 	return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
313 }
314 
315 
316 static int
iconvlist_cb(unsigned int count,const char * const * names,void * data)317 iconvlist_cb(unsigned int count, const char * const *names, void *data)
318 {
319 
320 	return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
321 }
322 
323 static int
gnu_iconvlist(void)324 gnu_iconvlist(void)
325 {
326 	int i;
327 
328 	iconvlist(iconvlist_cb, (void *)&i);
329 	return (i);
330 }
331 
332 void
mb_to_uc_fb(const char * inbuf,size_t inbufsize,void (* write_replacement)(const unsigned int * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)333 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
334     void (*write_replacement)(const unsigned int *buf, size_t buflen,
335        void* callback_arg), void* callback_arg, void* data)
336 {
337 	unsigned int c = 0x3F;
338 
339 	mb_uc_fb = true;
340 	write_replacement((const unsigned int *)&c, 1, NULL);
341 }
342 
343 static int __unused
ctl_mb_to_uc_fb(void)344 ctl_mb_to_uc_fb(void)
345 {
346 	struct iconv_fallbacks fb;
347 	iconv_t cd;
348 	size_t inbytesleft, outbytesleft;
349 	uint16_t inbuf[1] = { 0xF187 };
350 	uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
351 	char *inptr;
352 	char *outptr;
353 	int ret;
354 
355 	if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
356 		return (1);
357 
358 	fb.uc_to_mb_fallback = NULL;
359 	fb.mb_to_wc_fallback = NULL;
360 	fb.wc_to_mb_fallback = NULL;
361 	fb.mb_to_uc_fallback = mb_to_uc_fb;
362 	fb.data = NULL;
363 
364 	if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
365 		return (1);
366 
367 	inptr = (char *)inbuf;
368 	outptr = (char *)outbuf;
369 	inbytesleft = 2;
370 	outbytesleft = 4;
371 
372 	errno = 0;
373 	ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
374 
375 #ifdef VERBOSE
376 	printf("mb_uc fallback: %c\n", outbuf[0]);
377 #endif
378 
379 	if (mb_uc_fb && (outbuf[0] == 0x3F))
380 		return (0);
381 	else
382 		return (1);
383 }
384 
385 static int
gnu_openinto(void)386 gnu_openinto(void)
387 {
388 	iconv_allocation_t *myspace;
389 	size_t inbytesleft, outbytesleft;
390 	char *inptr;
391 	char *inbuf = "works!", *outptr;
392 	char outbuf[6];
393 
394 	if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
395 		return (1);
396 	if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
397 		return (1);
398 
399 	inptr = (char *)inbuf;
400 	outptr = (char *)outbuf;
401 	inbytesleft = 6;
402 	outbytesleft = 6;
403 
404 	iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
405 
406 	return ((memcmp(inbuf, outbuf, 6) == 0)	? 0 : 1);
407 }
408 
409 static void
test(int (tester)(void),const char * label)410 test(int (tester) (void), const char * label)
411 {
412 	int ret;
413 
414 	if ((ret = tester()))
415 		printf("%s failed (%d)\n", label, ret);
416 	else
417 		printf("%s succeeded\n", label);
418 }
419 
420 int
main(void)421 main(void)
422 {
423 	test(ctl_get_translit1, "ctl_get_translit1");
424 	test(ctl_get_translit2, "ctl_get_translit2");
425 	test(ctl_set_translit1, "ctl_set_translit1");
426 	test(ctl_set_translit2, "ctl_set_translit2");
427 	test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
428 	test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
429 	test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
430 	test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
431 	test(ctl_trivialp1, "ctl_trivialp1");
432 	test(ctl_trivialp2, "ctl_trivialp2");
433 	test(ctl_uc_hook, "ctl_uc_hook");
434 	test(ctl_wc_hook, "ctl_wc_hook");
435 //	test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
436 	test(gnu_openinto, "gnu_openinto");
437 	test(gnu_canonicalize1, "gnu_canonicalize1");
438 	test(gnu_canonicalize2, "gnu_canonicalize2");
439 	test(gnu_iconvlist, "gnu_iconvlist");
440 }
441