1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
14 */
15
16 /*
17 * iconv(1) command.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <iconv.h>
27 #include <libintl.h>
28 #include <langinfo.h>
29 #include <locale.h>
30 #include "charmap.h"
31
32 #include <assert.h>
33
34 const char *progname;
35
36 char *from_cs;
37 char *to_cs;
38 int debug;
39 int cflag; /* skip invalid characters */
40 int sflag; /* silent */
41 int lflag; /* list conversions */
42
43 void iconv_file(FILE *, const char *);
44 extern int list_codesets(void);
45
46 iconv_t ich; /* iconv(3c) lib handle */
47 size_t (*pconv)(const char **iptr, size_t *ileft,
48 char **optr, size_t *oleft);
49
50 size_t
lib_iconv(const char ** iptr,size_t * ileft,char ** optr,size_t * oleft)51 lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
52 {
53 return (iconv(ich, iptr, ileft, optr, oleft));
54 }
55
56 void
usage(void)57 usage(void)
58 {
59 (void) fprintf(stderr, gettext(
60 "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
61 "[file ...]\n"), progname);
62 (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
63 exit(1);
64 }
65
66 int
main(int argc,char ** argv)67 main(int argc, char **argv)
68 {
69 FILE *fp;
70 char *fslash, *tslash;
71 int c;
72
73 yydebug = 0;
74 progname = getprogname();
75
76 (void) setlocale(LC_ALL, "");
77
78 #if !defined(TEXT_DOMAIN)
79 #define TEXT_DOMAIN "SYS_TEST"
80 #endif
81 (void) textdomain(TEXT_DOMAIN);
82
83 while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
84 switch (c) {
85 case 'c':
86 cflag++;
87 break;
88 case 'd':
89 debug++;
90 break;
91 case 'l':
92 lflag++;
93 break;
94 case 's':
95 sflag++;
96 break;
97 case 'f':
98 from_cs = optarg;
99 break;
100 case 't':
101 to_cs = optarg;
102 break;
103 case '?':
104 usage();
105 }
106 }
107
108 if (lflag) {
109 if (from_cs != NULL || to_cs != NULL || optind != argc)
110 usage();
111 exit(list_codesets());
112 }
113
114 if (from_cs == NULL)
115 from_cs = nl_langinfo(CODESET);
116 if (to_cs == NULL)
117 to_cs = nl_langinfo(CODESET);
118
119 /*
120 * If either "from" or "to" contains a slash,
121 * then we're using charmaps.
122 */
123 fslash = strchr(from_cs, '/');
124 tslash = strchr(to_cs, '/');
125 if (fslash != NULL || tslash != NULL) {
126 charmap_init(to_cs, from_cs);
127 pconv = cm_iconv;
128 if (debug)
129 charmap_dump();
130 } else {
131 ich = iconv_open(to_cs, from_cs);
132 if (ich == ((iconv_t)-1)) {
133 switch (errno) {
134 case EINVAL:
135 (void) fprintf(stderr,
136 _("Not supported %s to %s\n"),
137 from_cs, to_cs);
138 break;
139 default:
140 (void) fprintf(stderr,
141 _("iconv_open failed: %s\n"),
142 strerror(errno));
143 break;
144 }
145 exit(1);
146 }
147 pconv = lib_iconv;
148 }
149
150 if (optind == argc ||
151 (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
152 iconv_file(stdin, "stdin");
153 exit(warnings ? 1 : 0);
154 }
155
156 for (; optind < argc; optind++) {
157 fp = fopen(argv[optind], "r");
158 if (fp == NULL) {
159 perror(argv[optind]);
160 exit(1);
161 }
162 iconv_file(fp, argv[optind]);
163 (void) fclose(fp);
164 }
165 exit(warnings ? 1 : 0);
166 }
167
168 /*
169 * Conversion buffer sizes:
170 *
171 * The input buffer has room to prepend one mbs character if needed for
172 * handling a left-over at the end of a previous conversion buffer.
173 *
174 * Conversions may grow or shrink data, so using a larger output buffer
175 * to reduce the likelihood of leftover input buffer data in each pass.
176 */
177 #define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
178 #define OBUFSIZ (2 * BUFSIZ)
179
180 void
iconv_file(FILE * fp,const char * fname)181 iconv_file(FILE *fp, const char *fname)
182 {
183 static char ibuf[IBUFSIZ];
184 static char obuf[OBUFSIZ];
185 const char *iptr;
186 char *optr;
187 off64_t offset;
188 size_t ileft, oleft, ocnt;
189 int iconv_errno;
190 int nr, nw, rc;
191
192 offset = 0;
193 ileft = 0;
194 iptr = ibuf + MB_LEN_MAX;
195
196 while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
197
198 assert(iptr <= ibuf+MB_LEN_MAX);
199 assert(ileft <= MB_LEN_MAX);
200 ileft += nr;
201 offset += nr;
202
203 optr = obuf;
204 oleft = OBUFSIZ;
205
206 /*
207 * Note: the *pconv function is either iconv(3c) or our
208 * private equivalent when using charmaps. Both update
209 * ileft, oleft etc. even when conversion stops due to
210 * an illegal sequence or whatever, so we need to copy
211 * the partially converted buffer even on error.
212 */
213 iconv_again:
214 rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
215 iconv_errno = errno;
216
217 ocnt = OBUFSIZ - oleft;
218 if (ocnt > 0) {
219 nw = fwrite(obuf, 1, ocnt, stdout);
220 if (nw != ocnt) {
221 perror("fwrite");
222 exit(1);
223 }
224 }
225 optr = obuf;
226 oleft = OBUFSIZ;
227
228 if (rc == (size_t)-1) {
229 switch (iconv_errno) {
230
231 case E2BIG: /* no room in output buffer */
232 goto iconv_again;
233
234 case EINVAL: /* incomplete sequence on input */
235 if (debug) {
236 (void) fprintf(stderr,
237 _("Incomplete sequence in %s at offset %lld\n"),
238 fname, offset - ileft);
239 }
240 /*
241 * Copy the remainder to the space reserved
242 * at the start of the input buffer.
243 */
244 assert(ileft > 0);
245 if (ileft <= MB_LEN_MAX) {
246 char *p = ibuf+MB_LEN_MAX-ileft;
247 (void) memmove(p, iptr, ileft);
248 iptr = p;
249 continue; /* read again */
250 }
251 /*
252 * Should not see ileft > MB_LEN_MAX,
253 * but if we do, handle as EILSEQ.
254 */
255 /* FALLTHROUGH */
256
257 case EILSEQ: /* invalid sequence on input */
258 if (!sflag) {
259 (void) fprintf(stderr,
260 _("Illegal sequence in %s at offset %lld\n"),
261 fname, offset - ileft);
262 (void) fprintf(stderr,
263 _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
264 iptr[0] & 0xff,
265 iptr[1] & 0xff,
266 iptr[2] & 0xff);
267 }
268 assert(ileft > 0);
269 /* skip one */
270 iptr++;
271 ileft--;
272 assert(oleft > 0);
273 if (!cflag) {
274 *optr++ = '?';
275 oleft--;
276 }
277 goto iconv_again;
278
279 default:
280 (void) fprintf(stderr,
281 _("iconv error (%s) in file $s at offset %lld\n"),
282 strerror(iconv_errno), fname,
283 offset - ileft);
284 break;
285 }
286 }
287
288 /* normal iconv return */
289 ileft = 0;
290 iptr = ibuf + MB_LEN_MAX;
291 }
292
293 /*
294 * End of file
295 * Flush any shift encodings.
296 */
297 iptr = NULL;
298 ileft = 0;
299 optr = obuf;
300 oleft = OBUFSIZ;
301 (*pconv)(&iptr, &ileft, &optr, &oleft);
302 ocnt = OBUFSIZ - oleft;
303 if (ocnt > 0) {
304 nw = fwrite(obuf, 1, ocnt, stdout);
305 if (nw != ocnt) {
306 perror("fwrite");
307 exit(1);
308 }
309 }
310 }
311