xref: /freebsd/tools/test/iconv/tablegen/tablegen.c (revision ec0ea6efa1ad229d75c394c1a9b9cac33af2b1d3)
1 /*-
2  * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/endian.h>
31 #include <sys/types.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <getopt.h>
36 #include <iconv.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 
41 #define	UC_TO_MB_FLAG	1
42 #define MB_TO_WC_FLAG	2
43 #define MB_TO_UC_FLAG	4
44 #define WC_TO_MB_FLAG	8
45 
46 #define MAX(a,b)	((a) < (b) ? (b) : (a))
47 
48 extern char	*__progname;
49 
50 static const char	*optstr = "cdilrt";
51 static const char	*citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
52 					"OOB_MODE\tILSEQ\n"
53 					"DST_ILSEQ\t0xFFFE\n"
54 					"DST_UNIT_BITS\t32\n\n"
55 					"BEGIN_MAP\n"
56 					"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
57 bool			 cflag;
58 bool			 dflag;
59 bool			 iflag;
60 bool			 lflag;
61 bool			 tflag;
62 bool			 rflag;
63 int			 fb_flags;
64 
65 static void		 do_conv(iconv_t, bool);
66 void			 mb_to_uc_fb(const char*, size_t,
67 			     void (*write_replacement)(const unsigned int *,
68 			     size_t, void *), void *, void *);
69 void			 mb_to_wc_fb(const char*, size_t,
70 			     void (*write_replacement) (const wchar_t *, size_t, void *),
71 			     void *, void *);
72 void			 uc_to_mb_fb(unsigned int,
73 			     void (*write_replacement) (const char *, size_t, void *), void *,
74 			     void *);
75 void			 wc_to_mb_fb(wchar_t,
76 			     void (*write_replacement)(const char *,
77 			     size_t, void *), void *, void *);
78 
79 struct option long_options[] =
80 {
81 	{"citrus",	no_argument,	NULL,	'c'},
82 	{"diagnostic",	no_argument,	NULL,	'd'},
83 	{"ignore",	no_argument,	NULL,	'i'},
84 	{"long",	no_argument,	NULL,	'l'},
85 	{"reverse",	no_argument,	NULL,	'r'},
86 	{"translit",	no_argument,	NULL,	't'},
87 	{NULL,		no_argument,	NULL,	0}
88 };
89 
90 static void
91 usage(void) {
92 
93 	fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
94 	exit(EXIT_FAILURE);
95 }
96 
97 static void
98 format_diag(int errcode)
99 {
100 	const char *errstr;
101 	const char *u2m, *m2u, *m2w, *w2m;
102 
103 	switch (errcode) {
104 	case EINVAL:
105 		errstr = "EINVAL ";
106 		break;
107 	case EILSEQ:
108 		errstr = "EILSEQ ";
109 		break;
110 	case E2BIG:
111 		errstr = "E2BIG ";
112 		break;
113 	default:
114 		errstr = "UNKNOWN ";
115 		break;
116 	}
117 
118 	u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
119 	m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
120 	m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
121 	w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
122 
123 	printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
124 }
125 
126 static int
127 magnitude(const uint32_t p)
128 {
129 
130 	if (p >> 8 == 0)
131 		return (1);
132 	else if (p >> 16 == 0)
133 		return (2);
134 	else
135 		return (p >> 24 == 0 ? 3 : 4);
136 }
137 
138 static void
139 format(const uint32_t data)
140 {
141 
142   /* XXX: could be simpler, something like this but with leading 0s?
143 
144 	printf("0x%.*X", magnitude(data), data);
145   */
146 
147 	switch (magnitude(data)) {
148 	default:
149 	case 2:
150 		printf("0x%04X", data);
151 		break;
152 	case 3:
153 		printf("0x%06X", data);
154 		break;
155 	case 4:
156 		printf("0x%08X", data);
157 		break;
158         }
159 }
160 
161 void
162 uc_to_mb_fb(unsigned int code,
163     void (*write_replacement)(const char *buf, size_t buflen,
164        void* callback_arg), void* callback_arg, void* data)
165 {
166 
167 	fb_flags |= UC_TO_MB_FLAG;
168 }
169 
170 void
171 mb_to_wc_fb(const char* inbuf, size_t inbufsize,
172     void (*write_replacement)(const wchar_t *buf, size_t buflen,
173        void* callback_arg), void* callback_arg, void* data)
174 {
175 
176 	fb_flags |= MB_TO_WC_FLAG;
177 }
178 
179 void
180 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
181     void (*write_replacement)(const unsigned int *buf, size_t buflen,
182        void* callback_arg), void* callback_arg, void* data)
183 {
184 
185 	fb_flags |= MB_TO_UC_FLAG;
186 }
187 
188 void
189 wc_to_mb_fb(wchar_t wc,
190     void (*write_replacement)(const char *buf, size_t buflen,
191        void* callback_arg), void* callback_arg, void* data)
192 {
193 
194 	fb_flags |= WC_TO_MB_FLAG;
195 }
196 
197 int
198 main (int argc, char *argv[])
199 {
200 	struct iconv_fallbacks fbs;
201 	iconv_t cd;
202 	char *tocode;
203 	int c;
204 
205 	while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
206 		switch (c) {
207 		case 'c':
208 			cflag = true;
209 			break;
210 		case 'd':
211 			dflag = true;
212 			break;
213 		case 'i':
214 			iflag = true;
215 			break;
216 		case 'l':
217 			lflag = true;
218 			break;
219 		case 'r':
220 			rflag = true;
221 			break;
222 		case 't':
223 			tflag = true;
224 			break;
225 		}
226 	}
227 	argc -= optind;
228 	argv += optind;
229 
230 	if (argc < 1)
231 		usage();
232 
233 	fbs.uc_to_mb_fallback = uc_to_mb_fb;
234 	fbs.mb_to_wc_fallback = mb_to_wc_fb;
235 	fbs.mb_to_uc_fallback = mb_to_uc_fb;
236 	fbs.wc_to_mb_fallback = wc_to_mb_fb;
237 	fbs.data = NULL;
238 
239 	if (argc == 2) {
240 		asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
241 		    iflag ? "//IGNORE" : "");
242 
243 		if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
244 			err(1, NULL);
245 		if (dflag) {
246 			if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
247 				err(1, NULL);
248 		}
249 		do_conv(cd, false);
250 	} else if (rflag) {
251 		asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
252 		    iflag ? "//IGNORE" : "");
253 
254 		if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
255 			err(1, NULL);
256 		if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
257 			err(1, NULL);
258 		if (cflag) {
259 			printf("# $FreeBSD$\n\n");
260 			printf("TYPE\t\tROWCOL\n");
261 			printf("NAME\t\tUCS/%s\n", argv[0]);
262 			printf("%s", citrus_common);
263 		}
264 		do_conv(cd, true);
265 	} else {
266 		if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
267 			err(1, NULL);
268 		if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
269 			err(1, NULL);
270 		if (cflag) {
271 			printf("# $FreeBSD$\n\n");
272 			printf("TYPE\t\tROWCOL\n");
273 			printf("NAME\t\t%s/UCS\n", argv[0]);
274 			printf("%s", citrus_common);
275                 }
276 		do_conv(cd, false);
277 	}
278 
279 	if (iconv_close(cd) != 0)
280 		err(1, NULL);
281 
282 	return (EXIT_SUCCESS);
283 }
284 
285 static void
286 do_conv(iconv_t cd, bool uniinput) {
287 	size_t inbytesleft, outbytesleft, ret;
288 	uint32_t outbuf;
289 	uint32_t inbuf;
290 	char *inbuf_;
291 	char *outbuf_;
292 
293 	for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
294 		if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
295 			continue;
296 		inbytesleft = uniinput ? 4 : magnitude(inbuf);
297 		outbytesleft = 4;
298 		outbuf = 0x00000000;
299 		outbuf_ = (char *)&outbuf;
300 		inbuf_ = (char *)&inbuf;
301 		iconv(cd, NULL, NULL, NULL, NULL);
302 		fb_flags = 0;
303 		errno = 0;
304 		ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
305 		if (ret == (size_t)-1) {
306 			if (dflag) {
307 				format(inbuf);
308 				printf(" = ");
309 				format_diag(errno);
310 				printf("\n");
311 			}
312 			continue;
313 		}
314 		format(inbuf);
315 		printf(" = ");
316 		format(outbuf);
317 		printf("\n");
318 	}
319 	if (cflag)
320 		printf("END_MAP\n");
321 }
322