1 /*- 2 * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/endian.h> 31 #include <sys/types.h> 32 33 #include <err.h> 34 #include <errno.h> 35 #include <getopt.h> 36 #include <iconv.h> 37 #include <stdbool.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 41 #define UC_TO_MB_FLAG 1 42 #define MB_TO_WC_FLAG 2 43 #define MB_TO_UC_FLAG 4 44 #define WC_TO_MB_FLAG 8 45 46 #define MAX(a,b) ((a) < (b) ? (b) : (a)) 47 48 extern char *__progname; 49 50 static const char *optstr = "cdilrt"; 51 static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n" 52 "OOB_MODE\tILSEQ\n" 53 "DST_ILSEQ\t0xFFFE\n" 54 "DST_UNIT_BITS\t32\n\n" 55 "BEGIN_MAP\n" 56 "#\n# Generated with Citrus iconv (FreeBSD)\n#\n"; 57 bool cflag; 58 bool dflag; 59 bool iflag; 60 bool lflag; 61 bool tflag; 62 bool rflag; 63 int fb_flags; 64 65 static void do_conv(iconv_t, bool); 66 void mb_to_uc_fb(const char*, size_t, 67 void (*write_replacement)(const unsigned int *, 68 size_t, void *), void *, void *); 69 void mb_to_wc_fb(const char*, size_t, 70 void (*write_replacement) (const wchar_t *, size_t, void *), 71 void *, void *); 72 void uc_to_mb_fb(unsigned int, 73 void (*write_replacement) (const char *, size_t, void *), void *, 74 void *); 75 void wc_to_mb_fb(wchar_t, 76 void (*write_replacement)(const char *, 77 size_t, void *), void *, void *); 78 79 struct option long_options[] = 80 { 81 {"citrus", no_argument, NULL, 'c'}, 82 {"diagnostic", no_argument, NULL, 'd'}, 83 {"ignore", no_argument, NULL, 'i'}, 84 {"long", no_argument, NULL, 'l'}, 85 {"reverse", no_argument, NULL, 'r'}, 86 {"translit", no_argument, NULL, 't'}, 87 {NULL, no_argument, NULL, 0} 88 }; 89 90 static void 91 usage(void) { 92 93 fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname); 94 exit(EXIT_FAILURE); 95 } 96 97 static void 98 format_diag(int errcode) 99 { 100 const char *errstr; 101 const char *u2m, *m2u, *m2w, *w2m; 102 103 switch (errcode) { 104 case EINVAL: 105 errstr = "EINVAL "; 106 break; 107 case EILSEQ: 108 errstr = "EILSEQ "; 109 break; 110 case E2BIG: 111 errstr = "E2BIG "; 112 break; 113 default: 114 errstr = "UNKNOWN "; 115 break; 116 } 117 118 u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : ""; 119 m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : ""; 120 m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : ""; 121 w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : ""; 122 123 printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m); 124 } 125 126 static int 127 magnitude(const uint32_t p) 128 { 129 130 if (p >> 8 == 0) 131 return (1); 132 else if (p >> 16 == 0) 133 return (2); 134 else 135 return (p >> 24 == 0 ? 3 : 4); 136 } 137 138 static void 139 format(const uint32_t data) 140 { 141 142 /* XXX: could be simpler, something like this but with leading 0s? 143 144 printf("0x%.*X", magnitude(data), data); 145 */ 146 147 switch (magnitude(data)) { 148 default: 149 case 2: 150 printf("0x%04X", data); 151 break; 152 case 3: 153 printf("0x%06X", data); 154 break; 155 case 4: 156 printf("0x%08X", data); 157 break; 158 } 159 } 160 161 void 162 uc_to_mb_fb(unsigned int code, 163 void (*write_replacement)(const char *buf, size_t buflen, 164 void* callback_arg), void* callback_arg, void* data) 165 { 166 167 fb_flags |= UC_TO_MB_FLAG; 168 } 169 170 void 171 mb_to_wc_fb(const char* inbuf, size_t inbufsize, 172 void (*write_replacement)(const wchar_t *buf, size_t buflen, 173 void* callback_arg), void* callback_arg, void* data) 174 { 175 176 fb_flags |= MB_TO_WC_FLAG; 177 } 178 179 void 180 mb_to_uc_fb(const char* inbuf, size_t inbufsize, 181 void (*write_replacement)(const unsigned int *buf, size_t buflen, 182 void* callback_arg), void* callback_arg, void* data) 183 { 184 185 fb_flags |= MB_TO_UC_FLAG; 186 } 187 188 void 189 wc_to_mb_fb(wchar_t wc, 190 void (*write_replacement)(const char *buf, size_t buflen, 191 void* callback_arg), void* callback_arg, void* data) 192 { 193 194 fb_flags |= WC_TO_MB_FLAG; 195 } 196 197 int 198 main (int argc, char *argv[]) 199 { 200 struct iconv_fallbacks fbs; 201 iconv_t cd; 202 char *tocode; 203 char c; 204 205 while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) { 206 switch (c) { 207 case 'c': 208 cflag = true; 209 break; 210 case 'd': 211 dflag = true; 212 break; 213 case 'i': 214 iflag = true; 215 break; 216 case 'l': 217 lflag = true; 218 break; 219 case 'r': 220 rflag = true; 221 break; 222 case 't': 223 tflag = true; 224 break; 225 } 226 } 227 argc -= optind; 228 argv += optind; 229 230 if (argc < 1) 231 usage(); 232 233 fbs.uc_to_mb_fallback = uc_to_mb_fb; 234 fbs.mb_to_wc_fallback = mb_to_wc_fb; 235 fbs.mb_to_uc_fallback = mb_to_uc_fb; 236 fbs.wc_to_mb_fallback = wc_to_mb_fb; 237 fbs.data = NULL; 238 239 if (argc == 2) { 240 asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "", 241 iflag ? "//IGNORE" : ""); 242 243 if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1) 244 err(1, NULL); 245 if (dflag) { 246 if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 247 err(1, NULL); 248 } 249 do_conv(cd, false); 250 } else if (rflag) { 251 asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "", 252 iflag ? "//IGNORE" : ""); 253 254 if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1) 255 err(1, NULL); 256 if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 257 err(1, NULL); 258 if (cflag) { 259 printf("# $FreeBSD$\n\n"); 260 printf("TYPE\t\tROWCOL\n"); 261 printf("NAME\t\tUCS/%s\n", argv[0]); 262 printf("%s", citrus_common); 263 } 264 do_conv(cd, true); 265 } else { 266 if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1) 267 err(1, NULL); 268 if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)) 269 err(1, NULL); 270 if (cflag) { 271 printf("# $FreeBSD$\n\n"); 272 printf("TYPE\t\tROWCOL\n"); 273 printf("NAME\t\t%s/UCS\n", argv[0]); 274 printf("%s", citrus_common); 275 } 276 do_conv(cd, false); 277 } 278 279 if (iconv_close(cd) != 0) 280 err(1, NULL); 281 282 return (EXIT_SUCCESS); 283 } 284 285 static void 286 do_conv(iconv_t cd, bool uniinput) { 287 size_t inbytesleft, outbytesleft, ret; 288 uint32_t outbuf; 289 uint32_t inbuf; 290 const char *inbuf_; 291 char *outbuf_; 292 293 for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) { 294 if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00)) 295 continue; 296 inbytesleft = uniinput ? 4 : magnitude(inbuf); 297 outbytesleft = 4; 298 outbuf = 0x00000000; 299 outbuf_ = (char *)&outbuf; 300 inbuf_ = (const char *)&inbuf; 301 iconv(cd, NULL, NULL, NULL, NULL); 302 fb_flags = 0; 303 errno = 0; 304 ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft); 305 if (ret == (size_t)-1) { 306 if (dflag) { 307 format(inbuf); 308 printf(" = "); 309 format_diag(errno); 310 printf("\n"); 311 } 312 continue; 313 } 314 format(inbuf); 315 printf(" = "); 316 format(outbuf); 317 printf("\n"); 318 } 319 if (cflag) 320 printf("END_MAP\n"); 321 } 322