1 /*- 2 * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/endian.h> 29 #include <sys/types.h> 30 31 #include <err.h> 32 #include <errno.h> 33 #include <getopt.h> 34 #include <iconv.h> 35 #include <stdbool.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 39 #define UC_TO_MB_FLAG 1 40 #define MB_TO_WC_FLAG 2 41 #define MB_TO_UC_FLAG 4 42 #define WC_TO_MB_FLAG 8 43 44 #define MAX(a,b) ((a) < (b) ? (b) : (a)) 45 46 extern char *__progname; 47 48 static const char *optstr = "cdilrt"; 49 static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n" 50 "OOB_MODE\tILSEQ\n" 51 "DST_ILSEQ\t0xFFFE\n" 52 "DST_UNIT_BITS\t32\n\n" 53 "BEGIN_MAP\n" 54 "#\n# Generated with Citrus iconv (FreeBSD)\n#\n"; 55 bool cflag; 56 bool dflag; 57 bool iflag; 58 bool lflag; 59 bool tflag; 60 bool rflag; 61 int fb_flags; 62 63 static void do_conv(iconv_t, bool); 64 void mb_to_uc_fb(const char*, size_t, 65 void (*write_replacement)(const unsigned int *, 66 size_t, void *), void *, void *); 67 void mb_to_wc_fb(const char*, size_t, 68 void (*write_replacement) (const wchar_t *, size_t, void *), 69 void *, void *); 70 void uc_to_mb_fb(unsigned int, 71 void (*write_replacement) (const char *, size_t, void *), void *, 72 void *); 73 void wc_to_mb_fb(wchar_t, 74 void (*write_replacement)(const char *, 75 size_t, void *), void *, void *); 76 77 struct option long_options[] = 78 { 79 {"citrus", no_argument, NULL, 'c'}, 80 {"diagnostic", no_argument, NULL, 'd'}, 81 {"ignore", no_argument, NULL, 'i'}, 82 {"long", no_argument, NULL, 'l'}, 83 {"reverse", no_argument, NULL, 'r'}, 84 {"translit", no_argument, NULL, 't'}, 85 {NULL, no_argument, NULL, 0} 86 }; 87 88 static void 89 usage(void) { 90 91 fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname); 92 exit(EXIT_FAILURE); 93 } 94 95 static void 96 format_diag(int errcode) 97 { 98 const char *errstr; 99 const char *u2m, *m2u, *m2w, *w2m; 100 101 switch (errcode) { 102 case EINVAL: 103 errstr = "EINVAL "; 104 break; 105 case EILSEQ: 106 errstr = "EILSEQ "; 107 break; 108 case E2BIG: 109 errstr = "E2BIG "; 110 break; 111 default: 112 errstr = "UNKNOWN "; 113 break; 114 } 115 116 u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : ""; 117 m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : ""; 118 m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : ""; 119 w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : ""; 120 121 printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m); 122 } 123 124 static int 125 magnitude(const uint32_t p) 126 { 127 128 if (p >> 8 == 0) 129 return (1); 130 else if (p >> 16 == 0) 131 return (2); 132 else 133 return (p >> 24 == 0 ? 3 : 4); 134 } 135 136 static void 137 format(const uint32_t data) 138 { 139 140 /* XXX: could be simpler, something like this but with leading 0s? 141 142 printf("0x%.*X", magnitude(data), data); 143 */ 144 145 switch (magnitude(data)) { 146 default: 147 case 2: 148 printf("0x%04X", data); 149 break; 150 case 3: 151 printf("0x%06X", data); 152 break; 153 case 4: 154 printf("0x%08X", data); 155 break; 156 } 157 } 158 159 void 160 uc_to_mb_fb(unsigned int code, 161 void (*write_replacement)(const char *buf, size_t buflen, 162 void* callback_arg), void* callback_arg, void* data) 163 { 164 165 fb_flags |= UC_TO_MB_FLAG; 166 } 167 168 void 169 mb_to_wc_fb(const char* inbuf, size_t inbufsize, 170 void (*write_replacement)(const wchar_t *buf, size_t buflen, 171 void* callback_arg), void* callback_arg, void* data) 172 { 173 174 fb_flags |= MB_TO_WC_FLAG; 175 } 176 177 void 178 mb_to_uc_fb(const char* inbuf, size_t inbufsize, 179 void (*write_replacement)(const unsigned int *buf, size_t buflen, 180 void* callback_arg), void* callback_arg, void* data) 181 { 182 183 fb_flags |= MB_TO_UC_FLAG; 184 } 185 186 void 187 wc_to_mb_fb(wchar_t wc, 188 void (*write_replacement)(const char *buf, size_t buflen, 189 void* callback_arg), void* callback_arg, void* data) 190 { 191 192 fb_flags |= WC_TO_MB_FLAG; 193 } 194 195 int 196 main (int argc, char *argv[]) 197 { 198 struct iconv_fallbacks fbs; 199 iconv_t cd; 200 char *tocode; 201 int c; 202 203 while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) { 204 switch (c) { 205 case 'c': 206 cflag = true; 207 break; 208 case 'd': 209 dflag = true; 210 break; 211 case 'i': 212 iflag = true; 213 break; 214 case 'l': 215 lflag = true; 216 break; 217 case 'r': 218 rflag = true; 219 break; 220 case 't': 221 tflag = true; 222 break; 223 } 224 } 225 argc -= optind; 226 argv += optind; 227 228 if (argc < 1) 229 usage(); 230 231 fbs.uc_to_mb_fallback = uc_to_mb_fb; 232 fbs.mb_to_wc_fallback = mb_to_wc_fb; 233 fbs.mb_to_uc_fallback = mb_to_uc_fb; 234 fbs.wc_to_mb_fallback = wc_to_mb_fb; 235 fbs.data = NULL; 236 237 if (argc == 2) { 238 asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "", 239 iflag ? "//IGNORE" : ""); 240 241 if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1) 242 err(1, NULL); 243 if (dflag) { 244 if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 245 err(1, NULL); 246 } 247 do_conv(cd, false); 248 } else if (rflag) { 249 asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "", 250 iflag ? "//IGNORE" : ""); 251 252 if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1) 253 err(1, NULL); 254 if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 255 err(1, NULL); 256 if (cflag) { 257 printf("TYPE\t\tROWCOL\n"); 258 printf("NAME\t\tUCS/%s\n", argv[0]); 259 printf("%s", citrus_common); 260 } 261 do_conv(cd, true); 262 } else { 263 if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1) 264 err(1, NULL); 265 if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)) 266 err(1, NULL); 267 if (cflag) { 268 printf("TYPE\t\tROWCOL\n"); 269 printf("NAME\t\t%s/UCS\n", argv[0]); 270 printf("%s", citrus_common); 271 } 272 do_conv(cd, false); 273 } 274 275 if (iconv_close(cd) != 0) 276 err(1, NULL); 277 278 return (EXIT_SUCCESS); 279 } 280 281 static void 282 do_conv(iconv_t cd, bool uniinput) { 283 size_t inbytesleft, outbytesleft, ret; 284 uint32_t outbuf; 285 uint32_t inbuf; 286 char *inbuf_; 287 char *outbuf_; 288 289 for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) { 290 if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00)) 291 continue; 292 inbytesleft = uniinput ? 4 : magnitude(inbuf); 293 outbytesleft = 4; 294 outbuf = 0x00000000; 295 outbuf_ = (char *)&outbuf; 296 inbuf_ = (char *)&inbuf; 297 iconv(cd, NULL, NULL, NULL, NULL); 298 fb_flags = 0; 299 errno = 0; 300 ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft); 301 if (ret == (size_t)-1) { 302 if (dflag) { 303 format(inbuf); 304 printf(" = "); 305 format_diag(errno); 306 printf("\n"); 307 } 308 continue; 309 } 310 format(inbuf); 311 printf(" = "); 312 format(outbuf); 313 printf("\n"); 314 } 315 if (cflag) 316 printf("END_MAP\n"); 317 } 318