xref: /freebsd/usr.bin/cut/cut.c (revision 4b2eaea43fec8e8792be611dea204071a10b655a)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
42 #endif /* not lint */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include <ctype.h>
47 #include <err.h>
48 #include <limits.h>
49 #include <locale.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 
55 int	bflag;
56 int	cflag;
57 char	dchar;
58 int	dflag;
59 int	fflag;
60 int	nflag;
61 int	sflag;
62 
63 void	b_n_cut(FILE *, const char *);
64 void	c_cut(FILE *, const char *);
65 void	f_cut(FILE *, const char *);
66 void	get_list(char *);
67 void	needpos(size_t);
68 static 	void usage(void);
69 
70 int
71 main(int argc, char *argv[])
72 {
73 	FILE *fp;
74 	void (*fcn)(FILE *, const char *);
75 	int ch, rval;
76 
77 	setlocale(LC_ALL, "");
78 
79 	fcn = NULL;
80 	dchar = '\t';			/* default delimiter is \t */
81 
82 	/*
83 	 * Since we don't support multi-byte characters, the -c and -b
84 	 * options are equivalent.
85 	 */
86 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
87 		switch(ch) {
88 		case 'b':
89 			fcn = c_cut;
90 			get_list(optarg);
91 			bflag = 1;
92 			break;
93 		case 'c':
94 			fcn = c_cut;
95 			get_list(optarg);
96 			cflag = 1;
97 			break;
98 		case 'd':
99 			dchar = *optarg;
100 			dflag = 1;
101 			break;
102 		case 'f':
103 			get_list(optarg);
104 			fcn = f_cut;
105 			fflag = 1;
106 			break;
107 		case 's':
108 			sflag = 1;
109 			break;
110 		case 'n':
111 			nflag = 1;
112 			break;
113 		case '?':
114 		default:
115 			usage();
116 		}
117 	argc -= optind;
118 	argv += optind;
119 
120 	if (fflag) {
121 		if (bflag || cflag || nflag)
122 			usage();
123 	} else if (!(bflag || cflag) || dflag || sflag)
124 		usage();
125 	else if (!bflag && nflag)
126 		usage();
127 
128 	if (nflag)
129 		fcn = b_n_cut;
130 
131 	rval = 0;
132 	if (*argv)
133 		for (; *argv; ++argv) {
134 			if (strcmp(*argv, "-") == 0)
135 				fcn(stdin, "stdin");
136 			else {
137 				if (!(fp = fopen(*argv, "r"))) {
138 					warn("%s", *argv);
139 					rval = 1;
140 					continue;
141 				}
142 				fcn(fp, *argv);
143 				(void)fclose(fp);
144 			}
145 		}
146 	else
147 		fcn(stdin, "stdin");
148 	exit(rval);
149 }
150 
151 size_t autostart, autostop, maxval;
152 
153 char *positions;
154 
155 void
156 get_list(char *list)
157 {
158 	size_t setautostart, start, stop;
159 	char *pos;
160 	char *p;
161 
162 	/*
163 	 * set a byte in the positions array to indicate if a field or
164 	 * column is to be selected; use +1, it's 1-based, not 0-based.
165 	 * This parser is less restrictive than the Draft 9 POSIX spec.
166 	 * POSIX doesn't allow lists that aren't in increasing order or
167 	 * overlapping lists.  We also handle "-3-5" although there's no
168 	 * real reason too.
169 	 */
170 	for (; (p = strsep(&list, ", \t")) != NULL;) {
171 		setautostart = start = stop = 0;
172 		if (*p == '-') {
173 			++p;
174 			setautostart = 1;
175 		}
176 		if (isdigit((unsigned char)*p)) {
177 			start = stop = strtol(p, &p, 10);
178 			if (setautostart && start > autostart)
179 				autostart = start;
180 		}
181 		if (*p == '-') {
182 			if (isdigit((unsigned char)p[1]))
183 				stop = strtol(p + 1, &p, 10);
184 			if (*p == '-') {
185 				++p;
186 				if (!autostop || autostop > stop)
187 					autostop = stop;
188 			}
189 		}
190 		if (*p)
191 			errx(1, "[-cf] list: illegal list value");
192 		if (!stop || !start)
193 			errx(1, "[-cf] list: values may not include zero");
194 		if (maxval < stop) {
195 			maxval = stop;
196 			needpos(maxval + 1);
197 		}
198 		for (pos = positions + start; start++ <= stop; *pos++ = 1);
199 	}
200 
201 	/* overlapping ranges */
202 	if (autostop && maxval > autostop) {
203 		maxval = autostop;
204 		needpos(maxval + 1);
205 	}
206 
207 	/* set autostart */
208 	if (autostart)
209 		memset(positions + 1, '1', autostart);
210 }
211 
212 void
213 needpos(size_t n)
214 {
215 	static size_t npos;
216 	size_t oldnpos;
217 
218 	/* Grow the positions array to at least the specified size. */
219 	if (n > npos) {
220 		oldnpos = npos;
221 		if (npos == 0)
222 			npos = n;
223 		while (n > npos)
224 			npos *= 2;
225 		if ((positions = realloc(positions, npos)) == NULL)
226 			err(1, "realloc");
227 		memset((char *)positions + oldnpos, 0, npos - oldnpos);
228 	}
229 }
230 
231 /*
232  * Cut based on byte positions, taking care not to split multibyte characters.
233  * Although this function also handles the case where -n is not specified,
234  * c_cut() ought to be much faster.
235  */
236 void
237 b_n_cut(FILE *fp, const char *fname)
238 {
239 	size_t col, i, lbuflen;
240 	char *lbuf;
241 	int canwrite, clen, warned;
242 
243 	warned = 0;
244 	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
245 		for (col = 0; lbuflen > 0; col += clen) {
246 			if ((clen = mblen(lbuf, lbuflen)) < 0) {
247 				if (!warned) {
248 					warn("%s", fname);
249 					warned = 1;
250 				}
251 				clen = 1;
252 			}
253 			if (clen == 0 || *lbuf == '\n')
254 				break;
255 			if (col < maxval && !positions[1 + col]) {
256 				/*
257 				 * Print the character if (1) after an initial
258 				 * segment of un-selected bytes, the rest of
259 				 * it is selected, and (2) the last byte is
260 				 * selected.
261 				 */
262 				i = col;
263 				while (i < col + clen && i < maxval &&
264 				    !positions[1 + i])
265 					i++;
266 				canwrite = i < col + clen;
267 				for (; i < col + clen && i < maxval; i++)
268 					canwrite &= positions[1 + i];
269 				if (canwrite)
270 					fwrite(lbuf, 1, clen, stdout);
271 			} else {
272 				/*
273 				 * Print the character if all of it has
274 				 * been selected.
275 				 */
276 				canwrite = 1;
277 				for (i = col; i < col + clen; i++)
278 					if ((i >= maxval && !autostop) ||
279 					    (i < maxval && !positions[1 + i])) {
280 						canwrite = 0;
281 						break;
282 					}
283 				if (canwrite)
284 					fwrite(lbuf, 1, clen, stdout);
285 			}
286 			lbuf += clen;
287 			lbuflen -= clen;
288 		}
289 		if (lbuflen > 0)
290 			putchar('\n');
291 	}
292 }
293 
294 void
295 c_cut(FILE *fp, const char *fname __unused)
296 {
297 	int ch, col;
298 	char *pos;
299 
300 	ch = 0;
301 	for (;;) {
302 		pos = positions + 1;
303 		for (col = maxval; col; --col) {
304 			if ((ch = getc(fp)) == EOF)
305 				return;
306 			if (ch == '\n')
307 				break;
308 			if (*pos++)
309 				(void)putchar(ch);
310 		}
311 		if (ch != '\n') {
312 			if (autostop)
313 				while ((ch = getc(fp)) != EOF && ch != '\n')
314 					(void)putchar(ch);
315 			else
316 				while ((ch = getc(fp)) != EOF && ch != '\n');
317 		}
318 		(void)putchar('\n');
319 	}
320 }
321 
322 void
323 f_cut(FILE *fp, const char *fname __unused)
324 {
325 	int ch, field, isdelim;
326 	char *pos, *p, sep;
327 	int output;
328 	char *lbuf, *mlbuf;
329 	size_t lbuflen;
330 
331 	mlbuf = NULL;
332 	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
333 		/* Assert EOL has a newline. */
334 		if (*(lbuf + lbuflen - 1) != '\n') {
335 			/* Can't have > 1 line with no trailing newline. */
336 			mlbuf = malloc(lbuflen + 1);
337 			if (mlbuf == NULL)
338 				err(1, "malloc");
339 			memcpy(mlbuf, lbuf, lbuflen);
340 			*(mlbuf + lbuflen) = '\n';
341 			lbuf = mlbuf;
342 		}
343 		output = 0;
344 		for (isdelim = 0, p = lbuf;; ++p) {
345 			ch = *p;
346 			/* this should work if newline is delimiter */
347 			if (ch == sep)
348 				isdelim = 1;
349 			if (ch == '\n') {
350 				if (!isdelim && !sflag)
351 					(void)fwrite(lbuf, lbuflen, 1, stdout);
352 				break;
353 			}
354 		}
355 		if (!isdelim)
356 			continue;
357 
358 		pos = positions + 1;
359 		for (field = maxval, p = lbuf; field; --field, ++pos) {
360 			if (*pos) {
361 				if (output++)
362 					(void)putchar(sep);
363 				while ((ch = *p++) != '\n' && ch != sep)
364 					(void)putchar(ch);
365 			} else {
366 				while ((ch = *p++) != '\n' && ch != sep)
367 					continue;
368 			}
369 			if (ch == '\n')
370 				break;
371 		}
372 		if (ch != '\n') {
373 			if (autostop) {
374 				if (output)
375 					(void)putchar(sep);
376 				for (; (ch = *p) != '\n'; ++p)
377 					(void)putchar(ch);
378 			} else
379 				for (; (ch = *p) != '\n'; ++p);
380 		}
381 		(void)putchar('\n');
382 	}
383 	if (mlbuf != NULL)
384 		free(mlbuf);
385 }
386 
387 static void
388 usage(void)
389 {
390 	(void)fprintf(stderr, "%s\n%s\n%s\n",
391 		"usage: cut -b list [-n] [file ...]",
392 		"       cut -c list [file ...]",
393 		"       cut -f list [-s] [-d delim] [file ...]");
394 	exit(1);
395 }
396