xref: /freebsd/usr.bin/cut/cut.c (revision c4f6a2a9e1b1879b618c436ab4f56ff75c73a0f5)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1989, 1993\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
42 #endif /* not lint */
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include <ctype.h>
47 #include <err.h>
48 #include <limits.h>
49 #include <locale.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 
55 int	bflag;
56 int	cflag;
57 char	dchar;
58 int	dflag;
59 int	fflag;
60 int	nflag;
61 int	sflag;
62 
63 void	b_n_cut(FILE *, const char *);
64 void	c_cut(FILE *, const char *);
65 void	f_cut(FILE *, const char *);
66 void	get_list(char *);
67 void	needpos(size_t);
68 static 	void usage(void);
69 
70 int
71 main(argc, argv)
72 	int argc;
73 	char *argv[];
74 {
75 	FILE *fp;
76 	void (*fcn)(FILE *, const char *);
77 	int ch, rval;
78 
79 	setlocale(LC_ALL, "");
80 
81 	fcn = NULL;
82 	dchar = '\t';			/* default delimiter is \t */
83 
84 	/*
85 	 * Since we don't support multi-byte characters, the -c and -b
86 	 * options are equivalent.
87 	 */
88 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
89 		switch(ch) {
90 		case 'b':
91 			fcn = c_cut;
92 			get_list(optarg);
93 			bflag = 1;
94 			break;
95 		case 'c':
96 			fcn = c_cut;
97 			get_list(optarg);
98 			cflag = 1;
99 			break;
100 		case 'd':
101 			dchar = *optarg;
102 			dflag = 1;
103 			break;
104 		case 'f':
105 			get_list(optarg);
106 			fcn = f_cut;
107 			fflag = 1;
108 			break;
109 		case 's':
110 			sflag = 1;
111 			break;
112 		case 'n':
113 			nflag = 1;
114 			break;
115 		case '?':
116 		default:
117 			usage();
118 		}
119 	argc -= optind;
120 	argv += optind;
121 
122 	if (fflag) {
123 		if (bflag || cflag || nflag)
124 			usage();
125 	} else if (!(bflag || cflag) || dflag || sflag)
126 		usage();
127 	else if (!bflag && nflag)
128 		usage();
129 
130 	if (nflag)
131 		fcn = b_n_cut;
132 
133 	rval = 0;
134 	if (*argv)
135 		for (; *argv; ++argv) {
136 			if (strcmp(*argv, "-") == 0)
137 				fcn(stdin, "stdin");
138 			else {
139 				if (!(fp = fopen(*argv, "r"))) {
140 					warn("%s", *argv);
141 					rval = 1;
142 					continue;
143 				}
144 				fcn(fp, *argv);
145 				(void)fclose(fp);
146 			}
147 		}
148 	else
149 		fcn(stdin, "stdin");
150 	exit(rval);
151 }
152 
153 size_t autostart, autostop, maxval;
154 
155 char *positions;
156 
157 void
158 get_list(list)
159 	char *list;
160 {
161 	size_t setautostart, start, stop;
162 	char *pos;
163 	char *p;
164 
165 	/*
166 	 * set a byte in the positions array to indicate if a field or
167 	 * column is to be selected; use +1, it's 1-based, not 0-based.
168 	 * This parser is less restrictive than the Draft 9 POSIX spec.
169 	 * POSIX doesn't allow lists that aren't in increasing order or
170 	 * overlapping lists.  We also handle "-3-5" although there's no
171 	 * real reason too.
172 	 */
173 	for (; (p = strsep(&list, ", \t")) != NULL;) {
174 		setautostart = start = stop = 0;
175 		if (*p == '-') {
176 			++p;
177 			setautostart = 1;
178 		}
179 		if (isdigit((unsigned char)*p)) {
180 			start = stop = strtol(p, &p, 10);
181 			if (setautostart && start > autostart)
182 				autostart = start;
183 		}
184 		if (*p == '-') {
185 			if (isdigit((unsigned char)p[1]))
186 				stop = strtol(p + 1, &p, 10);
187 			if (*p == '-') {
188 				++p;
189 				if (!autostop || autostop > stop)
190 					autostop = stop;
191 			}
192 		}
193 		if (*p)
194 			errx(1, "[-cf] list: illegal list value");
195 		if (!stop || !start)
196 			errx(1, "[-cf] list: values may not include zero");
197 		if (maxval < stop) {
198 			maxval = stop;
199 			needpos(maxval + 1);
200 		}
201 		for (pos = positions + start; start++ <= stop; *pos++ = 1);
202 	}
203 
204 	/* overlapping ranges */
205 	if (autostop && maxval > autostop) {
206 		maxval = autostop;
207 		needpos(maxval + 1);
208 	}
209 
210 	/* set autostart */
211 	if (autostart)
212 		memset(positions + 1, '1', autostart);
213 }
214 
215 void
216 needpos(size_t n)
217 {
218 	static size_t npos;
219 	size_t oldnpos;
220 
221 	/* Grow the positions array to at least the specified size. */
222 	if (n > npos) {
223 		oldnpos = npos;
224 		if (npos == 0)
225 			npos = n;
226 		while (n > npos)
227 			npos *= 2;
228 		if ((positions = realloc(positions, npos)) == NULL)
229 			err(1, "realloc");
230 		memset((char *)positions + oldnpos, 0, npos - oldnpos);
231 	}
232 }
233 
234 /*
235  * Cut based on byte positions, taking care not to split multibyte characters.
236  * Although this function also handles the case where -n is not specified,
237  * c_cut() ought to be much faster.
238  */
239 void
240 b_n_cut(fp, fname)
241 	FILE *fp;
242 	const char *fname;
243 {
244 	size_t col, i, lbuflen;
245 	char *lbuf;
246 	int canwrite, clen, warned;
247 
248 	warned = 0;
249 	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
250 		for (col = 0; lbuflen > 0; col += clen) {
251 			if ((clen = mblen(lbuf, lbuflen)) < 0) {
252 				if (!warned) {
253 					warn("%s", fname);
254 					warned = 1;
255 				}
256 				clen = 1;
257 			}
258 			if (clen == 0 || *lbuf == '\n')
259 				break;
260 			if (col < maxval && !positions[1 + col]) {
261 				/*
262 				 * Print the character if (1) after an initial
263 				 * segment of un-selected bytes, the rest of
264 				 * it is selected, and (2) the last byte is
265 				 * selected.
266 				 */
267 				i = col;
268 				while (i < col + clen && i < maxval &&
269 				    !positions[1 + i])
270 					i++;
271 				canwrite = i < col + clen;
272 				for (; i < col + clen && i < maxval; i++)
273 					canwrite &= positions[1 + i];
274 				if (canwrite)
275 					fwrite(lbuf, 1, clen, stdout);
276 			} else {
277 				/*
278 				 * Print the character if all of it has
279 				 * been selected.
280 				 */
281 				canwrite = 1;
282 				for (i = col; i < col + clen; i++)
283 					if ((i >= maxval && !autostop) ||
284 					    (i < maxval && !positions[1 + i])) {
285 						canwrite = 0;
286 						break;
287 					}
288 				if (canwrite)
289 					fwrite(lbuf, 1, clen, stdout);
290 			}
291 			lbuf += clen;
292 			lbuflen -= clen;
293 		}
294 		if (lbuflen > 0)
295 			putchar('\n');
296 	}
297 }
298 
299 void
300 c_cut(fp, fname)
301 	FILE *fp;
302 	const char *fname __unused;
303 {
304 	int ch, col;
305 	char *pos;
306 
307 	ch = 0;
308 	for (;;) {
309 		pos = positions + 1;
310 		for (col = maxval; col; --col) {
311 			if ((ch = getc(fp)) == EOF)
312 				return;
313 			if (ch == '\n')
314 				break;
315 			if (*pos++)
316 				(void)putchar(ch);
317 		}
318 		if (ch != '\n') {
319 			if (autostop)
320 				while ((ch = getc(fp)) != EOF && ch != '\n')
321 					(void)putchar(ch);
322 			else
323 				while ((ch = getc(fp)) != EOF && ch != '\n');
324 		}
325 		(void)putchar('\n');
326 	}
327 }
328 
329 void
330 f_cut(fp, fname)
331 	FILE *fp;
332 	const char *fname __unused;
333 {
334 	int ch, field, isdelim;
335 	char *pos, *p, sep;
336 	int output;
337 	char *lbuf, *mlbuf;
338 	size_t lbuflen;
339 
340 	mlbuf = NULL;
341 	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
342 		/* Assert EOL has a newline. */
343 		if (*(lbuf + lbuflen - 1) != '\n') {
344 			/* Can't have > 1 line with no trailing newline. */
345 			mlbuf = malloc(lbuflen + 1);
346 			if (mlbuf == NULL)
347 				err(1, "malloc");
348 			memcpy(mlbuf, lbuf, lbuflen);
349 			*(mlbuf + lbuflen) = '\n';
350 			lbuf = mlbuf;
351 		}
352 		output = 0;
353 		for (isdelim = 0, p = lbuf;; ++p) {
354 			ch = *p;
355 			/* this should work if newline is delimiter */
356 			if (ch == sep)
357 				isdelim = 1;
358 			if (ch == '\n') {
359 				if (!isdelim && !sflag)
360 					(void)fwrite(lbuf, lbuflen, 1, stdout);
361 				break;
362 			}
363 		}
364 		if (!isdelim)
365 			continue;
366 
367 		pos = positions + 1;
368 		for (field = maxval, p = lbuf; field; --field, ++pos) {
369 			if (*pos) {
370 				if (output++)
371 					(void)putchar(sep);
372 				while ((ch = *p++) != '\n' && ch != sep)
373 					(void)putchar(ch);
374 			} else {
375 				while ((ch = *p++) != '\n' && ch != sep)
376 					continue;
377 			}
378 			if (ch == '\n')
379 				break;
380 		}
381 		if (ch != '\n') {
382 			if (autostop) {
383 				if (output)
384 					(void)putchar(sep);
385 				for (; (ch = *p) != '\n'; ++p)
386 					(void)putchar(ch);
387 			} else
388 				for (; (ch = *p) != '\n'; ++p);
389 		}
390 		(void)putchar('\n');
391 	}
392 	if (mlbuf != NULL)
393 		free(mlbuf);
394 }
395 
396 static void
397 usage()
398 {
399 	(void)fprintf(stderr, "%s\n%s\n%s\n",
400 		"usage: cut -b list [-n] [file ...]",
401 		"       cut -c list [file ...]",
402 		"       cut -f list [-s] [-d delim] [file ...]");
403 	exit(1);
404 }
405