1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 /* 17 * iconv(1) command. 18 */ 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <errno.h> 25 #include <limits.h> 26 #include <iconv.h> 27 #include <libintl.h> 28 #include <langinfo.h> 29 #include <locale.h> 30 #include "charmap.h" 31 32 #include <assert.h> 33 34 const char *progname; 35 36 char *from_cs; 37 char *to_cs; 38 int debug; 39 int cflag; /* skip invalid characters */ 40 int sflag; /* silent */ 41 int lflag; /* list conversions */ 42 43 void iconv_file(FILE *, const char *); 44 extern int list_codesets(void); 45 46 iconv_t ich; /* iconv(3c) lib handle */ 47 size_t (*pconv)(const char **iptr, size_t *ileft, 48 char **optr, size_t *oleft); 49 50 size_t 51 lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft) 52 { 53 return (iconv(ich, iptr, ileft, optr, oleft)); 54 } 55 56 void 57 usage(void) 58 { 59 (void) fprintf(stderr, gettext( 60 "usage: %s [-cs] [-f from-codeset] [-t to-codeset] " 61 "[file ...]\n"), progname); 62 (void) fprintf(stderr, gettext("\t%s -l\n"), progname); 63 exit(1); 64 } 65 66 int 67 main(int argc, char **argv) 68 { 69 FILE *fp; 70 char *fslash, *tslash; 71 int c; 72 73 yydebug = 0; 74 progname = getprogname(); 75 76 (void) setlocale(LC_ALL, ""); 77 78 #if !defined(TEXT_DOMAIN) 79 #define TEXT_DOMAIN "SYS_TEST" 80 #endif 81 (void) textdomain(TEXT_DOMAIN); 82 83 while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) { 84 switch (c) { 85 case 'c': 86 cflag++; 87 break; 88 case 'd': 89 debug++; 90 break; 91 case 'l': 92 lflag++; 93 break; 94 case 's': 95 sflag++; 96 break; 97 case 'f': 98 from_cs = optarg; 99 break; 100 case 't': 101 to_cs = optarg; 102 break; 103 case '?': 104 usage(); 105 } 106 } 107 108 if (lflag) { 109 if (from_cs != NULL || to_cs != NULL || optind != argc) 110 usage(); 111 exit(list_codesets()); 112 } 113 114 if (from_cs == NULL) 115 from_cs = nl_langinfo(CODESET); 116 if (to_cs == NULL) 117 to_cs = nl_langinfo(CODESET); 118 119 /* 120 * If either "from" or "to" contains a slash, 121 * then we're using charmaps. 122 */ 123 fslash = strchr(from_cs, '/'); 124 tslash = strchr(to_cs, '/'); 125 if (fslash != NULL || tslash != NULL) { 126 charmap_init(to_cs, from_cs); 127 pconv = cm_iconv; 128 if (debug) 129 charmap_dump(); 130 } else { 131 ich = iconv_open(to_cs, from_cs); 132 if (ich == ((iconv_t)-1)) { 133 switch (errno) { 134 case EINVAL: 135 (void) fprintf(stderr, 136 _("Not supported %s to %s\n"), 137 from_cs, to_cs); 138 break; 139 default: 140 (void) fprintf(stderr, 141 _("iconv_open failed: %s\n"), 142 strerror(errno)); 143 break; 144 } 145 exit(1); 146 } 147 pconv = lib_iconv; 148 } 149 150 if (optind == argc || 151 (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) { 152 iconv_file(stdin, "stdin"); 153 exit(warnings ? 1 : 0); 154 } 155 156 for (; optind < argc; optind++) { 157 fp = fopen(argv[optind], "r"); 158 if (fp == NULL) { 159 perror(argv[optind]); 160 exit(1); 161 } 162 iconv_file(fp, argv[optind]); 163 (void) fclose(fp); 164 } 165 exit(warnings ? 1 : 0); 166 } 167 168 /* 169 * Conversion buffer sizes: 170 * 171 * The input buffer has room to prepend one mbs character if needed for 172 * handling a left-over at the end of a previous conversion buffer. 173 * 174 * Conversions may grow or shrink data, so using a larger output buffer 175 * to reduce the likelihood of leftover input buffer data in each pass. 176 */ 177 #define IBUFSIZ (MB_LEN_MAX + BUFSIZ) 178 #define OBUFSIZ (2 * BUFSIZ) 179 180 void 181 iconv_file(FILE *fp, const char *fname) 182 { 183 static char ibuf[IBUFSIZ]; 184 static char obuf[OBUFSIZ]; 185 const char *iptr; 186 char *optr; 187 off64_t offset; 188 size_t ileft, oleft, ocnt; 189 int iconv_errno; 190 int nr, nw, rc; 191 192 offset = 0; 193 ileft = 0; 194 iptr = ibuf + MB_LEN_MAX; 195 196 while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) { 197 198 assert(iptr <= ibuf+MB_LEN_MAX); 199 assert(ileft <= MB_LEN_MAX); 200 ileft += nr; 201 offset += nr; 202 203 optr = obuf; 204 oleft = OBUFSIZ; 205 206 /* 207 * Note: the *pconv function is either iconv(3c) or our 208 * private equivalent when using charmaps. Both update 209 * ileft, oleft etc. even when conversion stops due to 210 * an illegal sequence or whatever, so we need to copy 211 * the partially converted buffer even on error. 212 */ 213 iconv_again: 214 rc = (*pconv)(&iptr, &ileft, &optr, &oleft); 215 iconv_errno = errno; 216 217 ocnt = OBUFSIZ - oleft; 218 if (ocnt > 0) { 219 nw = fwrite(obuf, 1, ocnt, stdout); 220 if (nw != ocnt) { 221 perror("fwrite"); 222 exit(1); 223 } 224 } 225 optr = obuf; 226 oleft = OBUFSIZ; 227 228 if (rc == (size_t)-1) { 229 switch (iconv_errno) { 230 231 case E2BIG: /* no room in output buffer */ 232 goto iconv_again; 233 234 case EINVAL: /* incomplete sequence on input */ 235 if (debug) { 236 (void) fprintf(stderr, 237 _("Incomplete sequence in %s at offset %lld\n"), 238 fname, offset - ileft); 239 } 240 /* 241 * Copy the remainder to the space reserved 242 * at the start of the input buffer. 243 */ 244 assert(ileft > 0); 245 if (ileft <= MB_LEN_MAX) { 246 char *p = ibuf+MB_LEN_MAX-ileft; 247 (void) memmove(p, iptr, ileft); 248 iptr = p; 249 continue; /* read again */ 250 } 251 /* 252 * Should not see ileft > MB_LEN_MAX, 253 * but if we do, handle as EILSEQ. 254 */ 255 /* FALLTHROUGH */ 256 257 case EILSEQ: /* invalid sequence on input */ 258 if (!sflag) { 259 (void) fprintf(stderr, 260 _("Illegal sequence in %s at offset %lld\n"), 261 fname, offset - ileft); 262 (void) fprintf(stderr, 263 _("bad seq: \\x%02x\\x%02x\\x%02x\n"), 264 iptr[0] & 0xff, 265 iptr[1] & 0xff, 266 iptr[2] & 0xff); 267 } 268 assert(ileft > 0); 269 /* skip one */ 270 iptr++; 271 ileft--; 272 assert(oleft > 0); 273 if (!cflag) { 274 *optr++ = '?'; 275 oleft--; 276 } 277 goto iconv_again; 278 279 default: 280 (void) fprintf(stderr, 281 _("iconv error (%s) in file $s at offset %lld\n"), 282 strerror(iconv_errno), fname, 283 offset - ileft); 284 break; 285 } 286 } 287 288 /* normal iconv return */ 289 ileft = 0; 290 iptr = ibuf + MB_LEN_MAX; 291 } 292 293 /* 294 * End of file 295 * Flush any shift encodings. 296 */ 297 iptr = NULL; 298 ileft = 0; 299 optr = obuf; 300 oleft = OBUFSIZ; 301 (*pconv)(&iptr, &ileft, &optr, &oleft); 302 ocnt = OBUFSIZ - oleft; 303 if (ocnt > 0) { 304 nw = fwrite(obuf, 1, ocnt, stdout); 305 if (nw != ocnt) { 306 perror("fwrite"); 307 exit(1); 308 } 309 } 310 } 311