xref: /freebsd/usr.bin/cut/cut.c (revision 09e8dea79366f1e5b3a73e8a271b26e4b6bf2e6a)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
42 static const char rcsid[] =
43   "$FreeBSD$";
44 #endif /* not lint */
45 
46 #include <ctype.h>
47 #include <err.h>
48 #include <limits.h>
49 #include <locale.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 
55 int	bflag;
56 int	cflag;
57 char	dchar;
58 int	dflag;
59 int	fflag;
60 int	nflag;
61 int	sflag;
62 
63 void	b_n_cut(FILE *, const char *);
64 void	c_cut(FILE *, const char *);
65 void	f_cut(FILE *, const char *);
66 void	get_list(char *);
67 int	main(int, char **);
68 void	needpos(size_t);
69 static 	void usage(void);
70 
71 int
72 main(argc, argv)
73 	int argc;
74 	char *argv[];
75 {
76 	FILE *fp;
77 	void (*fcn)(FILE *, const char *);
78 	int ch, rval;
79 
80 	setlocale(LC_ALL, "");
81 
82 	fcn = NULL;
83 	dchar = '\t';			/* default delimiter is \t */
84 
85 	/*
86 	 * Since we don't support multi-byte characters, the -c and -b
87 	 * options are equivalent.
88 	 */
89 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
90 		switch(ch) {
91 		case 'b':
92 			fcn = c_cut;
93 			get_list(optarg);
94 			bflag = 1;
95 			break;
96 		case 'c':
97 			fcn = c_cut;
98 			get_list(optarg);
99 			cflag = 1;
100 			break;
101 		case 'd':
102 			dchar = *optarg;
103 			dflag = 1;
104 			break;
105 		case 'f':
106 			get_list(optarg);
107 			fcn = f_cut;
108 			fflag = 1;
109 			break;
110 		case 's':
111 			sflag = 1;
112 			break;
113 		case 'n':
114 			nflag = 1;
115 			break;
116 		case '?':
117 		default:
118 			usage();
119 		}
120 	argc -= optind;
121 	argv += optind;
122 
123 	if (fflag) {
124 		if (bflag || cflag || nflag)
125 			usage();
126 	} else if (!(bflag || cflag) || dflag || sflag)
127 		usage();
128 	else if (!bflag && nflag)
129 		usage();
130 
131 	if (nflag)
132 		fcn = b_n_cut;
133 
134 	rval = 0;
135 	if (*argv)
136 		for (; *argv; ++argv) {
137 			if (strcmp(*argv, "-") == 0)
138 				fcn(stdin, "stdin");
139 			else {
140 				if (!(fp = fopen(*argv, "r"))) {
141 					warn("%s", *argv);
142 					rval = 1;
143 					continue;
144 				}
145 				fcn(fp, *argv);
146 				(void)fclose(fp);
147 			}
148 		}
149 	else
150 		fcn(stdin, "stdin");
151 	exit(rval);
152 }
153 
154 size_t autostart, autostop, maxval;
155 
156 char *positions;
157 
158 void
159 get_list(list)
160 	char *list;
161 {
162 	size_t setautostart, start, stop;
163 	char *pos;
164 	char *p;
165 
166 	/*
167 	 * set a byte in the positions array to indicate if a field or
168 	 * column is to be selected; use +1, it's 1-based, not 0-based.
169 	 * This parser is less restrictive than the Draft 9 POSIX spec.
170 	 * POSIX doesn't allow lists that aren't in increasing order or
171 	 * overlapping lists.  We also handle "-3-5" although there's no
172 	 * real reason too.
173 	 */
174 	for (; (p = strsep(&list, ", \t")) != NULL;) {
175 		setautostart = start = stop = 0;
176 		if (*p == '-') {
177 			++p;
178 			setautostart = 1;
179 		}
180 		if (isdigit((unsigned char)*p)) {
181 			start = stop = strtol(p, &p, 10);
182 			if (setautostart && start > autostart)
183 				autostart = start;
184 		}
185 		if (*p == '-') {
186 			if (isdigit((unsigned char)p[1]))
187 				stop = strtol(p + 1, &p, 10);
188 			if (*p == '-') {
189 				++p;
190 				if (!autostop || autostop > stop)
191 					autostop = stop;
192 			}
193 		}
194 		if (*p)
195 			errx(1, "[-cf] list: illegal list value");
196 		if (!stop || !start)
197 			errx(1, "[-cf] list: values may not include zero");
198 		if (maxval < stop) {
199 			maxval = stop;
200 			needpos(maxval + 1);
201 		}
202 		for (pos = positions + start; start++ <= stop; *pos++ = 1);
203 	}
204 
205 	/* overlapping ranges */
206 	if (autostop && maxval > autostop) {
207 		maxval = autostop;
208 		needpos(maxval + 1);
209 	}
210 
211 	/* set autostart */
212 	if (autostart)
213 		memset(positions + 1, '1', autostart);
214 }
215 
216 void
217 needpos(size_t n)
218 {
219 	static size_t npos;
220 	size_t oldnpos;
221 
222 	/* Grow the positions array to at least the specified size. */
223 	if (n > npos) {
224 		oldnpos = npos;
225 		if (npos == 0)
226 			npos = n;
227 		while (n > npos)
228 			npos *= 2;
229 		if ((positions = realloc(positions, npos)) == NULL)
230 			err(1, "realloc");
231 		memset((char *)positions + oldnpos, 0, npos - oldnpos);
232 	}
233 }
234 
235 /*
236  * Cut based on byte positions, taking care not to split multibyte characters.
237  * Although this function also handles the case where -n is not specified,
238  * c_cut() ought to be much faster.
239  */
240 void
241 b_n_cut(fp, fname)
242 	FILE *fp;
243 	const char *fname;
244 {
245 	size_t col, i, lbuflen;
246 	char *lbuf;
247 	int canwrite, clen, warned;
248 
249 	warned = 0;
250 	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
251 		for (col = 0; lbuflen > 0; col += clen) {
252 			if ((clen = mblen(lbuf, lbuflen)) < 0) {
253 				if (!warned) {
254 					warn("%s", fname);
255 					warned = 1;
256 				}
257 				clen = 1;
258 			}
259 			if (clen == 0 || *lbuf == '\n')
260 				break;
261 			if (col < maxval && !positions[1 + col]) {
262 				/*
263 				 * Print the character if (1) after an initial
264 				 * segment of un-selected bytes, the rest of
265 				 * it is selected, and (2) the last byte is
266 				 * selected.
267 				 */
268 				i = col;
269 				while (i < col + clen && i < maxval &&
270 				    !positions[1 + i])
271 					i++;
272 				canwrite = i < col + clen;
273 				for (; i < col + clen && i < maxval; i++)
274 					canwrite &= positions[1 + i];
275 				if (canwrite)
276 					fwrite(lbuf, 1, clen, stdout);
277 			} else {
278 				/*
279 				 * Print the character if all of it has
280 				 * been selected.
281 				 */
282 				canwrite = 1;
283 				for (i = col; i < col + clen; i++)
284 					if ((i >= maxval && !autostop) ||
285 					    (i < maxval && !positions[1 + i])) {
286 						canwrite = 0;
287 						break;
288 					}
289 				if (canwrite)
290 					fwrite(lbuf, 1, clen, stdout);
291 			}
292 			lbuf += clen;
293 			lbuflen -= clen;
294 		}
295 		if (lbuflen > 0)
296 			putchar('\n');
297 	}
298 }
299 
300 void
301 c_cut(fp, fname)
302 	FILE *fp;
303 	const char *fname __unused;
304 {
305 	int ch, col;
306 	char *pos;
307 
308 	ch = 0;
309 	for (;;) {
310 		pos = positions + 1;
311 		for (col = maxval; col; --col) {
312 			if ((ch = getc(fp)) == EOF)
313 				return;
314 			if (ch == '\n')
315 				break;
316 			if (*pos++)
317 				(void)putchar(ch);
318 		}
319 		if (ch != '\n') {
320 			if (autostop)
321 				while ((ch = getc(fp)) != EOF && ch != '\n')
322 					(void)putchar(ch);
323 			else
324 				while ((ch = getc(fp)) != EOF && ch != '\n');
325 		}
326 		(void)putchar('\n');
327 	}
328 }
329 
330 void
331 f_cut(fp, fname)
332 	FILE *fp;
333 	const char *fname __unused;
334 {
335 	int ch, field, isdelim;
336 	char *pos, *p, sep;
337 	int output;
338 	char *lbuf, *mlbuf;
339 	size_t lbuflen;
340 
341 	mlbuf = NULL;
342 	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
343 		/* Assert EOL has a newline. */
344 		if (*(lbuf + lbuflen - 1) != '\n') {
345 			/* Can't have > 1 line with no trailing newline. */
346 			mlbuf = malloc(lbuflen + 1);
347 			if (mlbuf == NULL)
348 				err(1, "malloc");
349 			memcpy(mlbuf, lbuf, lbuflen);
350 			*(mlbuf + lbuflen) = '\n';
351 			lbuf = mlbuf;
352 		}
353 		output = 0;
354 		for (isdelim = 0, p = lbuf;; ++p) {
355 			ch = *p;
356 			/* this should work if newline is delimiter */
357 			if (ch == sep)
358 				isdelim = 1;
359 			if (ch == '\n') {
360 				if (!isdelim && !sflag)
361 					(void)fwrite(lbuf, lbuflen, 1, stdout);
362 				break;
363 			}
364 		}
365 		if (!isdelim)
366 			continue;
367 
368 		pos = positions + 1;
369 		for (field = maxval, p = lbuf; field; --field, ++pos) {
370 			if (*pos) {
371 				if (output++)
372 					(void)putchar(sep);
373 				while ((ch = *p++) != '\n' && ch != sep)
374 					(void)putchar(ch);
375 			} else {
376 				while ((ch = *p++) != '\n' && ch != sep)
377 					continue;
378 			}
379 			if (ch == '\n')
380 				break;
381 		}
382 		if (ch != '\n') {
383 			if (autostop) {
384 				if (output)
385 					(void)putchar(sep);
386 				for (; (ch = *p) != '\n'; ++p)
387 					(void)putchar(ch);
388 			} else
389 				for (; (ch = *p) != '\n'; ++p);
390 		}
391 		(void)putchar('\n');
392 	}
393 	if (mlbuf != NULL)
394 		free(mlbuf);
395 }
396 
397 static void
398 usage()
399 {
400 	(void)fprintf(stderr, "%s\n%s\n%s\n",
401 		"usage: cut -b list [-n] [file ...]",
402 		"       cut -c list [file ...]",
403 		"       cut -f list [-s] [-d delim] [file ...]");
404 	exit(1);
405 }
406