xref: /freebsd/usr.bin/split/split.c (revision 71fe318b852b8dfb3e799cb12ef184750f7f8eac)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1987, 1993, 1994\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif
42 
43 #ifndef lint
44 static const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
45 #endif
46 
47 #include <sys/param.h>
48 
49 #include <ctype.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <inttypes.h>
54 #include <limits.h>
55 #include <stdint.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <unistd.h>
60 #include <regex.h>
61 #include <sysexits.h>
62 
63 #define DEFLINE	1000			/* Default num lines per file. */
64 
65 off_t	 bytecnt;			/* Byte count to split on. */
66 long	 numlines;			/* Line count to split on. */
67 int	 file_open;			/* If a file open. */
68 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
69 char	 bfr[MAXBSIZE];			/* I/O buffer. */
70 char	 fname[MAXPATHLEN];		/* File name prefix. */
71 regex_t	 rgx;
72 int	 pflag;
73 long	 sufflen = 2;			/* File name suffix length. */
74 
75 void newfile(void);
76 void split1(void);
77 void split2(void);
78 static void usage(void);
79 
80 int
81 main(int argc, char **argv)
82 {
83 	intmax_t bytecnti;
84 	long scale;
85 	int ch;
86 	char *ep, *p;
87 
88 	while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
89 		switch (ch) {
90 		case '0': case '1': case '2': case '3': case '4':
91 		case '5': case '6': case '7': case '8': case '9':
92 			/*
93 			 * Undocumented kludge: split was originally designed
94 			 * to take a number after a dash.
95 			 */
96 			if (numlines == 0) {
97 				p = argv[optind - 1];
98 				if (p[0] == '-' && p[1] == ch && !p[2])
99 					numlines = strtol(++p, &ep, 10);
100 				else
101 					numlines =
102 					    strtol(argv[optind] + 1, &ep, 10);
103 				if (numlines <= 0 || *ep)
104 					errx(EX_USAGE,
105 					    "%s: illegal line count", optarg);
106 			}
107 			break;
108 		case '-':		/* Undocumented: historic stdin flag. */
109 			if (ifd != -1)
110 				usage();
111 			ifd = 0;
112 			break;
113 		case 'a':		/* Suffix length */
114 			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
115 				errx(EX_USAGE,
116 				    "%s: illegal suffix length", optarg);
117 			break;
118 		case 'b':		/* Byte count. */
119 			errno = 0;
120 			if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 ||
121 			    (*ep != '\0' && *ep != 'k' && *ep != 'm') ||
122 			    errno != 0)
123 				errx(EX_USAGE,
124 				    "%s: illegal byte count", optarg);
125 			if (*ep == 'k')
126 				scale = 1024;
127 			else if (*ep == 'm')
128 				scale = 1024 * 1024;
129 			else
130 				scale = 1;
131 			if (bytecnti > OFF_MAX / scale)
132 				errx(EX_USAGE, "%s: offset too large", optarg);
133 			bytecnt = (off_t)(bytecnti * scale);
134 			break;
135 		case 'p' :      /* pattern matching. */
136 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
137 				errx(EX_USAGE, "%s: illegal regexp", optarg);
138 			pflag = 1;
139 			break;
140 		case 'l':		/* Line count. */
141 			if (numlines != 0)
142 				usage();
143 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
144 				errx(EX_USAGE,
145 				    "%s: illegal line count", optarg);
146 			break;
147 		default:
148 			usage();
149 		}
150 	argv += optind;
151 	argc -= optind;
152 
153 	if (*argv != NULL)
154 		if (ifd == -1) {		/* Input file. */
155 			if (strcmp(*argv, "-") == 0)
156 				ifd = STDIN_FILENO;
157 			else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
158 				err(EX_NOINPUT, "%s", *argv);
159 			++argv;
160 		}
161 	if (*argv != NULL)			/* File name prefix. */
162 		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
163 			errx(EX_USAGE, "file name prefix is too long");
164 	if (*argv != NULL)
165 		usage();
166 
167 	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
168 		errx(EX_USAGE, "suffix is too long");
169 	if (pflag && (numlines != 0 || bytecnt != 0))
170 		usage();
171 
172 	if (numlines == 0)
173 		numlines = DEFLINE;
174 	else if (bytecnt != 0)
175 		usage();
176 
177 	if (ifd == -1)				/* Stdin by default. */
178 		ifd = 0;
179 
180 	if (bytecnt) {
181 		split1();
182 		exit (0);
183 	}
184 	split2();
185 	if (pflag)
186 		regfree(&rgx);
187 	exit(0);
188 }
189 
190 /*
191  * split1 --
192  *	Split the input by bytes.
193  */
194 void
195 split1(void)
196 {
197 	off_t bcnt;
198 	char *C;
199 	ssize_t dist, len;
200 
201 	for (bcnt = 0;;)
202 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
203 		case 0:
204 			exit(0);
205 		case -1:
206 			err(EX_IOERR, "read");
207 			/* NOTREACHED */
208 		default:
209 			if (!file_open)
210 				newfile();
211 			if (bcnt + len >= bytecnt) {
212 				dist = bytecnt - bcnt;
213 				if (write(ofd, bfr, dist) != dist)
214 					err(EX_IOERR, "write");
215 				len -= dist;
216 				for (C = bfr + dist; len >= bytecnt;
217 				    len -= bytecnt, C += bytecnt) {
218 					newfile();
219 					if (write(ofd,
220 					    C, bytecnt) != bytecnt)
221 						err(EX_IOERR, "write");
222 				}
223 				if (len != 0) {
224 					newfile();
225 					if (write(ofd, C, len) != len)
226 						err(EX_IOERR, "write");
227 				} else
228 					file_open = 0;
229 				bcnt = len;
230 			} else {
231 				bcnt += len;
232 				if (write(ofd, bfr, len) != len)
233 					err(EX_IOERR, "write");
234 			}
235 		}
236 }
237 
238 /*
239  * split2 --
240  *	Split the input by lines.
241  */
242 void
243 split2(void)
244 {
245 	long lcnt = 0;
246 	FILE *infp;
247 
248 	/* Stick a stream on top of input file descriptor */
249 	if ((infp = fdopen(ifd, "r")) == NULL)
250 		err(EX_NOINPUT, "fdopen");
251 
252 	/* Process input one line at a time */
253 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
254 		const int len = strlen(bfr);
255 
256 		/* If line is too long to deal with, just write it out */
257 		if (bfr[len - 1] != '\n')
258 			goto writeit;
259 
260 		/* Check if we need to start a new file */
261 		if (pflag) {
262 			regmatch_t pmatch;
263 
264 			pmatch.rm_so = 0;
265 			pmatch.rm_eo = len - 1;
266 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
267 				newfile();
268 		} else if (lcnt++ == numlines) {
269 			newfile();
270 			lcnt = 1;
271 		}
272 
273 writeit:
274 		/* Open output file if needed */
275 		if (!file_open)
276 			newfile();
277 
278 		/* Write out line */
279 		if (write(ofd, bfr, len) != len)
280 			err(EX_IOERR, "write");
281 	}
282 
283 	/* EOF or error? */
284 	if (ferror(infp))
285 		err(EX_IOERR, "read");
286 	else
287 		exit(0);
288 }
289 
290 /*
291  * newfile --
292  *	Open a new output file.
293  */
294 void
295 newfile(void)
296 {
297 	long i, maxfiles, tfnum;
298 	static long fnum;
299 	static int defname;
300 	static char *fpnt;
301 
302 	if (ofd == -1) {
303 		if (fname[0] == '\0') {
304 			fname[0] = 'x';
305 			fpnt = fname + 1;
306 			defname = 1;
307 		} else {
308 			fpnt = fname + strlen(fname);
309 			defname = 0;
310 		}
311 		ofd = fileno(stdout);
312 	}
313 
314 	/* maxfiles = 26^sufflen, but don't use libm. */
315 	for (maxfiles = 1, i = 0; i < sufflen; i++)
316 		if ((maxfiles *= 26) <= 0)
317 			errx(EX_USAGE, "suffix is too long (max %ld)", i);
318 
319 	/*
320 	 * Hack to increase max files; original code wandered through
321 	 * magic characters.
322 	 */
323 	if (fnum == maxfiles) {
324 		if (!defname || fname[0] == 'z')
325 			errx(EX_DATAERR, "too many files");
326 		++fname[0];
327 		fnum = 0;
328 	}
329 
330 	/* Generate suffix of sufflen letters */
331 	tfnum = fnum;
332 	i = sufflen - 1;
333 	do {
334 		fpnt[i] = tfnum % 26 + 'a';
335 		tfnum /= 26;
336 	} while (i-- > 0);
337 	fpnt[sufflen] = '\0';
338 
339 	++fnum;
340 	if (!freopen(fname, "w", stdout))
341 		err(EX_IOERR, "%s", fname);
342 	file_open = 1;
343 }
344 
345 static void
346 usage(void)
347 {
348 	(void)fprintf(stderr,
349 "usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
350 	(void)fprintf(stderr,
351 "             [file [prefix]]\n");
352 	exit(EX_USAGE);
353 }
354