xref: /illumos-gate/usr/src/cmd/iconv/iconv_main.c (revision e86372a01d2d16a5dd4a64e144ed978ba17fe7dd)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
14  */
15 
16 /*
17  * iconv(1) command.
18  */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <iconv.h>
27 #include <libintl.h>
28 #include <langinfo.h>
29 #include <locale.h>
30 #include "charmap.h"
31 
32 #include <assert.h>
33 
34 const char *progname;
35 
36 char *from_cs;
37 char *to_cs;
38 int debug;
39 int cflag;	/* skip invalid characters */
40 int sflag;	/* silent */
41 int lflag;	/* list conversions */
42 
43 void iconv_file(FILE *, const char *);
44 extern int list_codesets(void);
45 
46 iconv_t ich;	/* iconv(3c) lib handle */
47 size_t (*pconv)(const char **iptr, size_t *ileft,
48 		char **optr, size_t *oleft);
49 
50 size_t
51 lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
52 {
53 	return (iconv(ich, iptr, ileft, optr, oleft));
54 }
55 
56 void
57 usage(void)
58 {
59 	(void) fprintf(stderr, gettext(
60 	    "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
61 	    "[file ...]\n"), progname);
62 	(void) fprintf(stderr, gettext("\t%s -l\n"), progname);
63 	exit(1);
64 }
65 
66 int
67 main(int argc, char **argv)
68 {
69 	FILE *fp;
70 	char *fslash, *tslash;
71 	int c;
72 
73 	yydebug = 0;
74 	progname = getprogname();
75 
76 	(void) setlocale(LC_ALL, "");
77 
78 #if !defined(TEXT_DOMAIN)
79 #define	TEXT_DOMAIN	"SYS_TEST"
80 #endif
81 	(void) textdomain(TEXT_DOMAIN);
82 
83 	while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
84 		switch (c) {
85 		case 'c':
86 			cflag++;
87 			break;
88 		case 'd':
89 			debug++;
90 			break;
91 		case 'l':
92 			lflag++;
93 			break;
94 		case 's':
95 			sflag++;
96 			break;
97 		case 'f':
98 			from_cs = optarg;
99 			break;
100 		case 't':
101 			to_cs = optarg;
102 			break;
103 		case '?':
104 			usage();
105 		}
106 	}
107 
108 	if (lflag) {
109 		if (from_cs != NULL || to_cs != NULL || optind != argc)
110 			usage();
111 		exit(list_codesets());
112 	}
113 
114 	if (from_cs == NULL)
115 		from_cs = nl_langinfo(CODESET);
116 	if (to_cs == NULL)
117 		to_cs = nl_langinfo(CODESET);
118 
119 	/*
120 	 * If either "from" or "to" contains a slash,
121 	 * then we're using charmaps.
122 	 */
123 	fslash = strchr(from_cs, '/');
124 	tslash = strchr(to_cs, '/');
125 	if (fslash != NULL || tslash != NULL) {
126 		charmap_init(to_cs, from_cs);
127 		pconv = cm_iconv;
128 		if (debug)
129 			charmap_dump();
130 	} else {
131 		ich = iconv_open(to_cs, from_cs);
132 		if (ich == ((iconv_t)-1)) {
133 			switch (errno) {
134 			case EINVAL:
135 				(void) fprintf(stderr,
136 				    _("Not supported %s to %s\n"),
137 				    from_cs, to_cs);
138 				break;
139 			default:
140 				(void) fprintf(stderr,
141 				    _("iconv_open failed: %s\n"),
142 				    strerror(errno));
143 				break;
144 			}
145 			exit(1);
146 		}
147 		pconv = lib_iconv;
148 	}
149 
150 	if (optind == argc ||
151 	    (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
152 		iconv_file(stdin, "stdin");
153 		exit(warnings ? 1 : 0);
154 	}
155 
156 	for (; optind < argc; optind++) {
157 		fp = fopen(argv[optind], "r");
158 		if (fp == NULL) {
159 			perror(argv[optind]);
160 			exit(1);
161 		}
162 		iconv_file(fp, argv[optind]);
163 		(void) fclose(fp);
164 	}
165 	exit(warnings ? 1 : 0);
166 }
167 
168 /*
169  * Conversion buffer sizes:
170  *
171  * The input buffer has room to prepend one mbs character if needed for
172  * handling a left-over at the end of a previous conversion buffer.
173  *
174  * Conversions may grow or shrink data, so using a larger output buffer
175  * to reduce the likelihood of leftover input buffer data in each pass.
176  */
177 #define	IBUFSIZ	(MB_LEN_MAX + BUFSIZ)
178 #define	OBUFSIZ	(2 * BUFSIZ)
179 
180 void
181 iconv_file(FILE *fp, const char *fname)
182 {
183 	static char ibuf[IBUFSIZ];
184 	static char obuf[OBUFSIZ];
185 	const char *iptr;
186 	char *optr;
187 	off64_t offset;
188 	size_t ileft, oleft, ocnt;
189 	int iconv_errno;
190 	int nr, nw, rc;
191 
192 	offset = 0;
193 	ileft = 0;
194 	iptr = ibuf + MB_LEN_MAX;
195 
196 	while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
197 
198 		assert(iptr <= ibuf+MB_LEN_MAX);
199 		assert(ileft <= MB_LEN_MAX);
200 		ileft += nr;
201 		offset += nr;
202 
203 		optr = obuf;
204 		oleft = OBUFSIZ;
205 
206 		/*
207 		 * Note: the *pconv function is either iconv(3c) or our
208 		 * private equivalent when using charmaps. Both update
209 		 * ileft, oleft etc. even when conversion stops due to
210 		 * an illegal sequence or whatever, so we need to copy
211 		 * the partially converted buffer even on error.
212 		 */
213 	iconv_again:
214 		rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
215 		iconv_errno = errno;
216 
217 		ocnt = OBUFSIZ - oleft;
218 		if (ocnt > 0) {
219 			nw = fwrite(obuf, 1, ocnt, stdout);
220 			if (nw != ocnt) {
221 				perror("fwrite");
222 				exit(1);
223 			}
224 		}
225 		optr = obuf;
226 		oleft = OBUFSIZ;
227 
228 		if (rc == (size_t)-1) {
229 			switch (iconv_errno) {
230 
231 			case E2BIG:	/* no room in output buffer */
232 				goto iconv_again;
233 
234 			case EINVAL:	/* incomplete sequence on input */
235 				if (debug) {
236 					(void) fprintf(stderr,
237 			_("Incomplete sequence in %s at offset %lld\n"),
238 					    fname, offset - ileft);
239 				}
240 				/*
241 				 * Copy the remainder to the space reserved
242 				 * at the start of the input buffer.
243 				 */
244 				assert(ileft > 0);
245 				if (ileft <= MB_LEN_MAX) {
246 					char *p = ibuf+MB_LEN_MAX-ileft;
247 					(void) memmove(p, iptr, ileft);
248 					iptr = p;
249 					continue; /* read again */
250 				}
251 				/*
252 				 * Should not see ileft > MB_LEN_MAX,
253 				 * but if we do, handle as EILSEQ.
254 				 */
255 				/* FALLTHROUGH */
256 
257 			case EILSEQ:	/* invalid sequence on input */
258 				if (!sflag) {
259 					(void) fprintf(stderr,
260 			_("Illegal sequence in %s at offset %lld\n"),
261 					    fname, offset - ileft);
262 					(void) fprintf(stderr,
263 			_("bad seq: \\x%02x\\x%02x\\x%02x\n"),
264 					    iptr[0] & 0xff,
265 					    iptr[1] & 0xff,
266 					    iptr[2] & 0xff);
267 				}
268 				assert(ileft > 0);
269 				/* skip one */
270 				iptr++;
271 				ileft--;
272 				assert(oleft > 0);
273 				if (!cflag) {
274 					*optr++ = '?';
275 					oleft--;
276 				}
277 				goto iconv_again;
278 
279 			default:
280 				(void) fprintf(stderr,
281 			_("iconv error (%s) in file $s at offset %lld\n"),
282 				    strerror(iconv_errno), fname,
283 				    offset - ileft);
284 				break;
285 			}
286 		}
287 
288 		/* normal iconv return */
289 		ileft = 0;
290 		iptr = ibuf + MB_LEN_MAX;
291 	}
292 
293 	/*
294 	 * End of file
295 	 * Flush any shift encodings.
296 	 */
297 	iptr = NULL;
298 	ileft = 0;
299 	optr = obuf;
300 	oleft = OBUFSIZ;
301 	(*pconv)(&iptr, &ileft, &optr, &oleft);
302 	ocnt = OBUFSIZ - oleft;
303 	if (ocnt > 0) {
304 		nw = fwrite(obuf, 1, ocnt, stdout);
305 		if (nw != ocnt) {
306 			perror("fwrite");
307 			exit(1);
308 		}
309 	}
310 }
311