1 /*-
2 * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 #include <sys/endian.h>
29 #include <sys/types.h>
30
31 #include <err.h>
32 #include <errno.h>
33 #include <getopt.h>
34 #include <iconv.h>
35 #include <stdbool.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38
39 #define UC_TO_MB_FLAG 1
40 #define MB_TO_WC_FLAG 2
41 #define MB_TO_UC_FLAG 4
42 #define WC_TO_MB_FLAG 8
43
44 #define MAX(a,b) ((a) < (b) ? (b) : (a))
45
46 extern char *__progname;
47
48 static const char *optstr = "cdilrt";
49 static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
50 "OOB_MODE\tILSEQ\n"
51 "DST_ILSEQ\t0xFFFE\n"
52 "DST_UNIT_BITS\t32\n\n"
53 "BEGIN_MAP\n"
54 "#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
55 bool cflag;
56 bool dflag;
57 bool iflag;
58 bool lflag;
59 bool tflag;
60 bool rflag;
61 int fb_flags;
62
63 static void do_conv(iconv_t, bool);
64 void mb_to_uc_fb(const char*, size_t,
65 void (*write_replacement)(const unsigned int *,
66 size_t, void *), void *, void *);
67 void mb_to_wc_fb(const char*, size_t,
68 void (*write_replacement) (const wchar_t *, size_t, void *),
69 void *, void *);
70 void uc_to_mb_fb(unsigned int,
71 void (*write_replacement) (const char *, size_t, void *), void *,
72 void *);
73 void wc_to_mb_fb(wchar_t,
74 void (*write_replacement)(const char *,
75 size_t, void *), void *, void *);
76
77 struct option long_options[] =
78 {
79 {"citrus", no_argument, NULL, 'c'},
80 {"diagnostic", no_argument, NULL, 'd'},
81 {"ignore", no_argument, NULL, 'i'},
82 {"long", no_argument, NULL, 'l'},
83 {"reverse", no_argument, NULL, 'r'},
84 {"translit", no_argument, NULL, 't'},
85 {NULL, no_argument, NULL, 0}
86 };
87
88 static void
usage(void)89 usage(void) {
90
91 fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
92 exit(EXIT_FAILURE);
93 }
94
95 static void
format_diag(int errcode)96 format_diag(int errcode)
97 {
98 const char *errstr;
99 const char *u2m, *m2u, *m2w, *w2m;
100
101 switch (errcode) {
102 case EINVAL:
103 errstr = "EINVAL ";
104 break;
105 case EILSEQ:
106 errstr = "EILSEQ ";
107 break;
108 case E2BIG:
109 errstr = "E2BIG ";
110 break;
111 default:
112 errstr = "UNKNOWN ";
113 break;
114 }
115
116 u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
117 m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
118 m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
119 w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
120
121 printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
122 }
123
124 static int
magnitude(const uint32_t p)125 magnitude(const uint32_t p)
126 {
127
128 if (p >> 8 == 0)
129 return (1);
130 else if (p >> 16 == 0)
131 return (2);
132 else
133 return (p >> 24 == 0 ? 3 : 4);
134 }
135
136 static void
format(const uint32_t data)137 format(const uint32_t data)
138 {
139
140 /* XXX: could be simpler, something like this but with leading 0s?
141
142 printf("0x%.*X", magnitude(data), data);
143 */
144
145 switch (magnitude(data)) {
146 default:
147 case 2:
148 printf("0x%04X", data);
149 break;
150 case 3:
151 printf("0x%06X", data);
152 break;
153 case 4:
154 printf("0x%08X", data);
155 break;
156 }
157 }
158
159 void
uc_to_mb_fb(unsigned int code,void (* write_replacement)(const char * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)160 uc_to_mb_fb(unsigned int code,
161 void (*write_replacement)(const char *buf, size_t buflen,
162 void* callback_arg), void* callback_arg, void* data)
163 {
164
165 fb_flags |= UC_TO_MB_FLAG;
166 }
167
168 void
mb_to_wc_fb(const char * inbuf,size_t inbufsize,void (* write_replacement)(const wchar_t * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)169 mb_to_wc_fb(const char* inbuf, size_t inbufsize,
170 void (*write_replacement)(const wchar_t *buf, size_t buflen,
171 void* callback_arg), void* callback_arg, void* data)
172 {
173
174 fb_flags |= MB_TO_WC_FLAG;
175 }
176
177 void
mb_to_uc_fb(const char * inbuf,size_t inbufsize,void (* write_replacement)(const unsigned int * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)178 mb_to_uc_fb(const char* inbuf, size_t inbufsize,
179 void (*write_replacement)(const unsigned int *buf, size_t buflen,
180 void* callback_arg), void* callback_arg, void* data)
181 {
182
183 fb_flags |= MB_TO_UC_FLAG;
184 }
185
186 void
wc_to_mb_fb(wchar_t wc,void (* write_replacement)(const char * buf,size_t buflen,void * callback_arg),void * callback_arg,void * data)187 wc_to_mb_fb(wchar_t wc,
188 void (*write_replacement)(const char *buf, size_t buflen,
189 void* callback_arg), void* callback_arg, void* data)
190 {
191
192 fb_flags |= WC_TO_MB_FLAG;
193 }
194
195 int
main(int argc,char * argv[])196 main (int argc, char *argv[])
197 {
198 struct iconv_fallbacks fbs;
199 iconv_t cd;
200 char *tocode;
201 int c;
202
203 while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
204 switch (c) {
205 case 'c':
206 cflag = true;
207 break;
208 case 'd':
209 dflag = true;
210 break;
211 case 'i':
212 iflag = true;
213 break;
214 case 'l':
215 lflag = true;
216 break;
217 case 'r':
218 rflag = true;
219 break;
220 case 't':
221 tflag = true;
222 break;
223 }
224 }
225 argc -= optind;
226 argv += optind;
227
228 if (argc < 1)
229 usage();
230
231 fbs.uc_to_mb_fallback = uc_to_mb_fb;
232 fbs.mb_to_wc_fallback = mb_to_wc_fb;
233 fbs.mb_to_uc_fallback = mb_to_uc_fb;
234 fbs.wc_to_mb_fallback = wc_to_mb_fb;
235 fbs.data = NULL;
236
237 if (argc == 2) {
238 asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
239 iflag ? "//IGNORE" : "");
240
241 if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
242 err(1, NULL);
243 if (dflag) {
244 if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
245 err(1, NULL);
246 }
247 do_conv(cd, false);
248 } else if (rflag) {
249 asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
250 iflag ? "//IGNORE" : "");
251
252 if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
253 err(1, NULL);
254 if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
255 err(1, NULL);
256 if (cflag) {
257 printf("TYPE\t\tROWCOL\n");
258 printf("NAME\t\tUCS/%s\n", argv[0]);
259 printf("%s", citrus_common);
260 }
261 do_conv(cd, true);
262 } else {
263 if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
264 err(1, NULL);
265 if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
266 err(1, NULL);
267 if (cflag) {
268 printf("TYPE\t\tROWCOL\n");
269 printf("NAME\t\t%s/UCS\n", argv[0]);
270 printf("%s", citrus_common);
271 }
272 do_conv(cd, false);
273 }
274
275 if (iconv_close(cd) != 0)
276 err(1, NULL);
277
278 return (EXIT_SUCCESS);
279 }
280
281 static void
do_conv(iconv_t cd,bool uniinput)282 do_conv(iconv_t cd, bool uniinput) {
283 size_t inbytesleft, outbytesleft, ret;
284 uint32_t outbuf;
285 uint32_t inbuf;
286 char *inbuf_;
287 char *outbuf_;
288
289 for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
290 if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
291 continue;
292 inbytesleft = uniinput ? 4 : magnitude(inbuf);
293 outbytesleft = 4;
294 outbuf = 0x00000000;
295 outbuf_ = (char *)&outbuf;
296 inbuf_ = (char *)&inbuf;
297 iconv(cd, NULL, NULL, NULL, NULL);
298 fb_flags = 0;
299 errno = 0;
300 ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
301 if (ret == (size_t)-1) {
302 if (dflag) {
303 format(inbuf);
304 printf(" = ");
305 format_diag(errno);
306 printf("\n");
307 }
308 continue;
309 }
310 format(inbuf);
311 printf(" = ");
312 format(outbuf);
313 printf("\n");
314 }
315 if (cflag)
316 printf("END_MAP\n");
317 }
318