xref: /freebsd/tools/test/iconv/tablegen/tablegen.c (revision 1719886f6d08408b834d270c59ffcfd821c8f63a)
1 /*-
2  * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/endian.h>
29 #include <sys/types.h>
30 
31 #include <err.h>
32 #include <errno.h>
33 #include <getopt.h>
34 #include <iconv.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 
39 #define	UC_TO_MB_FLAG	1
40 #define MB_TO_WC_FLAG	2
41 #define MB_TO_UC_FLAG	4
42 #define WC_TO_MB_FLAG	8
43 
44 #define MAX(a,b)	((a) < (b) ? (b) : (a))
45 
46 extern char	*__progname;
47 
48 static const char	*optstr = "cdilrt";
49 static const char	*citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
50 					"OOB_MODE\tILSEQ\n"
51 					"DST_ILSEQ\t0xFFFE\n"
52 					"DST_UNIT_BITS\t32\n\n"
53 					"BEGIN_MAP\n"
54 					"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
55 bool			 cflag;
56 bool			 dflag;
57 bool			 iflag;
58 bool			 lflag;
59 bool			 tflag;
60 bool			 rflag;
61 int			 fb_flags;
62 
63 static void		 do_conv(iconv_t, bool);
64 void			 mb_to_uc_fb(const char*, size_t,
65 			     void (*write_replacement)(const unsigned int *,
66 			     size_t, void *), void *, void *);
67 void			 mb_to_wc_fb(const char*, size_t,
68 			     void (*write_replacement) (const wchar_t *, size_t, void *),
69 			     void *, void *);
70 void			 uc_to_mb_fb(unsigned int,
71 			     void (*write_replacement) (const char *, size_t, void *), void *,
72 			     void *);
73 void			 wc_to_mb_fb(wchar_t,
74 			     void (*write_replacement)(const char *,
75 			     size_t, void *), void *, void *);
76 
77 struct option long_options[] =
78 {
79 	{"citrus",	no_argument,	NULL,	'c'},
80 	{"diagnostic",	no_argument,	NULL,	'd'},
81 	{"ignore",	no_argument,	NULL,	'i'},
82 	{"long",	no_argument,	NULL,	'l'},
83 	{"reverse",	no_argument,	NULL,	'r'},
84 	{"translit",	no_argument,	NULL,	't'},
85 	{NULL,		no_argument,	NULL,	0}
86 };
87 
88 static void
89 usage(void) {
90 
91 	fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
92 	exit(EXIT_FAILURE);
93 }
94 
95 static void
96 format_diag(int errcode)
97 {
98 	const char *errstr;
99 	const char *u2m, *m2u, *m2w, *w2m;
100 
101 	switch (errcode) {
102 	case EINVAL:
103 		errstr = "EINVAL ";
104 		break;
105 	case EILSEQ:
106 		errstr = "EILSEQ ";
107 		break;
108 	case E2BIG:
109 		errstr = "E2BIG ";
110 		break;
111 	default:
112 		errstr = "UNKNOWN ";
113 		break;
114 	}
115 
116 	u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
117 	m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
118 	m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
119 	w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
120 
121 	printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
122 }
123 
124 static int
125 magnitude(const uint32_t p)
126 {
127 
128 	if (p >> 8 == 0)
129 		return (1);
130 	else if (p >> 16 == 0)
131 		return (2);
132 	else
133 		return (p >> 24 == 0 ? 3 : 4);
134 }
135 
136 static void
137 format(const uint32_t data)
138 {
139 
140   /* XXX: could be simpler, something like this but with leading 0s?
141 
142 	printf("0x%.*X", magnitude(data), data);
143   */
144 
145 	switch (magnitude(data)) {
146 	default:
147 	case 2:
148 		printf("0x%04X", data);
149 		break;
150 	case 3:
151 		printf("0x%06X", data);
152 		break;
153 	case 4:
154 		printf("0x%08X", data);
155 		break;
156         }
157 }
158 
159 void
160 uc_to_mb_fb(unsigned int code,
161     void (*write_replacement)(const char *buf, size_t buflen,
162        void* callback_arg), void* callback_arg, void* data)
163 {
164 
165 	fb_flags |= UC_TO_MB_FLAG;
166 }
167 
168 void
169 mb_to_wc_fb(const char* inbuf, size_t inbufsize,
170     void (*write_replacement)(const wchar_t *buf, size_t buflen,
171        void* callback_arg), void* callback_arg, void* data)
172 {
173 
174 	fb_flags |= MB_TO_WC_FLAG;
175 }
176 
177 void
178 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
179     void (*write_replacement)(const unsigned int *buf, size_t buflen,
180        void* callback_arg), void* callback_arg, void* data)
181 {
182 
183 	fb_flags |= MB_TO_UC_FLAG;
184 }
185 
186 void
187 wc_to_mb_fb(wchar_t wc,
188     void (*write_replacement)(const char *buf, size_t buflen,
189        void* callback_arg), void* callback_arg, void* data)
190 {
191 
192 	fb_flags |= WC_TO_MB_FLAG;
193 }
194 
195 int
196 main (int argc, char *argv[])
197 {
198 	struct iconv_fallbacks fbs;
199 	iconv_t cd;
200 	char *tocode;
201 	int c;
202 
203 	while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
204 		switch (c) {
205 		case 'c':
206 			cflag = true;
207 			break;
208 		case 'd':
209 			dflag = true;
210 			break;
211 		case 'i':
212 			iflag = true;
213 			break;
214 		case 'l':
215 			lflag = true;
216 			break;
217 		case 'r':
218 			rflag = true;
219 			break;
220 		case 't':
221 			tflag = true;
222 			break;
223 		}
224 	}
225 	argc -= optind;
226 	argv += optind;
227 
228 	if (argc < 1)
229 		usage();
230 
231 	fbs.uc_to_mb_fallback = uc_to_mb_fb;
232 	fbs.mb_to_wc_fallback = mb_to_wc_fb;
233 	fbs.mb_to_uc_fallback = mb_to_uc_fb;
234 	fbs.wc_to_mb_fallback = wc_to_mb_fb;
235 	fbs.data = NULL;
236 
237 	if (argc == 2) {
238 		asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
239 		    iflag ? "//IGNORE" : "");
240 
241 		if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
242 			err(1, NULL);
243 		if (dflag) {
244 			if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
245 				err(1, NULL);
246 		}
247 		do_conv(cd, false);
248 	} else if (rflag) {
249 		asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
250 		    iflag ? "//IGNORE" : "");
251 
252 		if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
253 			err(1, NULL);
254 		if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
255 			err(1, NULL);
256 		if (cflag) {
257 			printf("TYPE\t\tROWCOL\n");
258 			printf("NAME\t\tUCS/%s\n", argv[0]);
259 			printf("%s", citrus_common);
260 		}
261 		do_conv(cd, true);
262 	} else {
263 		if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
264 			err(1, NULL);
265 		if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
266 			err(1, NULL);
267 		if (cflag) {
268 			printf("TYPE\t\tROWCOL\n");
269 			printf("NAME\t\t%s/UCS\n", argv[0]);
270 			printf("%s", citrus_common);
271                 }
272 		do_conv(cd, false);
273 	}
274 
275 	if (iconv_close(cd) != 0)
276 		err(1, NULL);
277 
278 	return (EXIT_SUCCESS);
279 }
280 
281 static void
282 do_conv(iconv_t cd, bool uniinput) {
283 	size_t inbytesleft, outbytesleft, ret;
284 	uint32_t outbuf;
285 	uint32_t inbuf;
286 	char *inbuf_;
287 	char *outbuf_;
288 
289 	for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
290 		if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
291 			continue;
292 		inbytesleft = uniinput ? 4 : magnitude(inbuf);
293 		outbytesleft = 4;
294 		outbuf = 0x00000000;
295 		outbuf_ = (char *)&outbuf;
296 		inbuf_ = (char *)&inbuf;
297 		iconv(cd, NULL, NULL, NULL, NULL);
298 		fb_flags = 0;
299 		errno = 0;
300 		ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
301 		if (ret == (size_t)-1) {
302 			if (dflag) {
303 				format(inbuf);
304 				printf(" = ");
305 				format_diag(errno);
306 				printf("\n");
307 			}
308 			continue;
309 		}
310 		format(inbuf);
311 		printf(" = ");
312 		format(outbuf);
313 		printf("\n");
314 	}
315 	if (cflag)
316 		printf("END_MAP\n");
317 }
318