xref: /freebsd/usr.bin/split/split.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1987, 1993, 1994\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
43 #else
44 static const char rcsid[] =
45   "$FreeBSD$";
46 #endif
47 #endif /* not lint */
48 
49 #include <sys/param.h>
50 #include <sys/types.h>
51 
52 #include <ctype.h>
53 #include <err.h>
54 #include <fcntl.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <regex.h>
60 #include <sysexits.h>
61 
62 #define DEFLINE	1000			/* Default num lines per file. */
63 
64 size_t	 bytecnt;			/* Byte count to split on. */
65 long	 numlines;			/* Line count to split on. */
66 int	 file_open;			/* If a file open. */
67 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
68 char	 bfr[MAXBSIZE];			/* I/O buffer. */
69 char	 fname[MAXPATHLEN];		/* File name prefix. */
70 regex_t	 rgx;
71 int	 pflag;
72 
73 void newfile __P((void));
74 void split1 __P((void));
75 void split2 __P((void));
76 static void usage __P((void));
77 
78 int
79 main(argc, argv)
80 	int argc;
81 	char *argv[];
82 {
83 	int ch;
84 	char *ep, *p;
85 
86 	while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
87 		switch (ch) {
88 		case '0': case '1': case '2': case '3': case '4':
89 		case '5': case '6': case '7': case '8': case '9':
90 			/*
91 			 * Undocumented kludge: split was originally designed
92 			 * to take a number after a dash.
93 			 */
94 			if (numlines == 0) {
95 				p = argv[optind - 1];
96 				if (p[0] == '-' && p[1] == ch && !p[2])
97 					numlines = strtol(++p, &ep, 10);
98 				else
99 					numlines =
100 					    strtol(argv[optind] + 1, &ep, 10);
101 				if (numlines <= 0 || *ep)
102 					errx(EX_USAGE,
103 					    "%s: illegal line count", optarg);
104 			}
105 			break;
106 		case '-':		/* Undocumented: historic stdin flag. */
107 			if (ifd != -1)
108 				usage();
109 			ifd = 0;
110 			break;
111 		case 'b':		/* Byte count. */
112 			if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
113 			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
114 				errx(EX_USAGE,
115 				    "%s: illegal byte count", optarg);
116 			if (*ep == 'k')
117 				bytecnt *= 1024;
118 			else if (*ep == 'm')
119 				bytecnt *= 1048576;
120 			break;
121 		case 'p' :      /* pattern matching. */
122 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
123 				errx(EX_USAGE, "%s: illegal regexp", optarg);
124 			pflag = 1;
125 			break;
126 		case 'l':		/* Line count. */
127 			if (numlines != 0)
128 				usage();
129 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
130 				errx(EX_USAGE,
131 				    "%s: illegal line count", optarg);
132 			break;
133 		default:
134 			usage();
135 		}
136 	argv += optind;
137 	argc -= optind;
138 
139 	if (*argv != NULL)
140 		if (ifd == -1) {		/* Input file. */
141 			if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
142 				err(EX_NOINPUT, "%s", *argv);
143 			++argv;
144 		}
145 	if (*argv != NULL)			/* File name prefix. */
146 		(void)strcpy(fname, *argv++);
147 	if (*argv != NULL)
148 		usage();
149 
150 	if (pflag && (numlines != 0 || bytecnt != 0))
151 		usage();
152 
153 	if (numlines == 0)
154 		numlines = DEFLINE;
155 	else if (bytecnt != 0)
156 		usage();
157 
158 	if (ifd == -1)				/* Stdin by default. */
159 		ifd = 0;
160 
161 	if (bytecnt) {
162 		split1();
163 		exit (0);
164 	}
165 	split2();
166 	if (pflag)
167 		regfree(&rgx);
168 	exit(0);
169 }
170 
171 /*
172  * split1 --
173  *	Split the input by bytes.
174  */
175 void
176 split1()
177 {
178 	size_t bcnt, dist, len;
179 	char *C;
180 
181 	for (bcnt = 0;;)
182 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
183 		case 0:
184 			exit(0);
185 		case -1:
186 			err(EX_IOERR, "read");
187 			/* NOTREACHED */
188 		default:
189 			if (!file_open)
190 				newfile();
191 			if (bcnt + len >= bytecnt) {
192 				dist = bytecnt - bcnt;
193 				if (write(ofd, bfr, dist) != dist)
194 					err(EX_IOERR, "write");
195 				len -= dist;
196 				for (C = bfr + dist; len >= bytecnt;
197 				    len -= bytecnt, C += bytecnt) {
198 					newfile();
199 					if (write(ofd,
200 					    C, (int)bytecnt) != bytecnt)
201 						err(EX_IOERR, "write");
202 				}
203 				if (len != 0) {
204 					newfile();
205 					if (write(ofd, C, len) != len)
206 						err(EX_IOERR, "write");
207 				} else
208 					file_open = 0;
209 				bcnt = len;
210 			} else {
211 				bcnt += len;
212 				if (write(ofd, bfr, len) != len)
213 					err(EX_IOERR, "write");
214 			}
215 		}
216 }
217 
218 /*
219  * split2 --
220  *	Split the input by lines.
221  */
222 void
223 split2()
224 {
225 	long lcnt = 0;
226 	FILE *infp;
227 
228 	/* Stick a stream on top of input file descriptor */
229 	if ((infp = fdopen(ifd, "r")) == NULL)
230 		err(EX_NOINPUT, "fdopen");
231 
232 	/* Process input one line at a time */
233 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
234 		const int len = strlen(bfr);
235 
236 		/* If line is too long to deal with, just write it out */
237 		if (bfr[len - 1] != '\n')
238 			goto writeit;
239 
240 		/* Check if we need to start a new file */
241 		if (pflag) {
242 			regmatch_t pmatch;
243 
244 			pmatch.rm_so = 0;
245 			pmatch.rm_eo = len - 1;
246 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
247 				newfile();
248 		} else if (lcnt++ == numlines) {
249 			newfile();
250 			lcnt = 1;
251 		}
252 
253 writeit:
254 		/* Open output file if needed */
255 		if (!file_open)
256 			newfile();
257 
258 		/* Write out line */
259 		if (write(ofd, bfr, len) != len)
260 			err(EX_IOERR, "write");
261 	}
262 
263 	/* EOF or error? */
264 	if (ferror(infp))
265 		err(EX_IOERR, "read");
266 	else
267 		exit(0);
268 }
269 
270 /*
271  * newfile --
272  *	Open a new output file.
273  */
274 void
275 newfile()
276 {
277 	static long fnum;
278 	static int defname;
279 	static char *fpnt;
280 
281 	if (ofd == -1) {
282 		if (fname[0] == '\0') {
283 			fname[0] = 'x';
284 			fpnt = fname + 1;
285 			defname = 1;
286 		} else {
287 			fpnt = fname + strlen(fname);
288 			defname = 0;
289 		}
290 		ofd = fileno(stdout);
291 	}
292 	/*
293 	 * Hack to increase max files; original code wandered through
294 	 * magic characters.  Maximum files is 3 * 26 * 26 == 2028
295 	 */
296 #define MAXFILES	676
297 	if (fnum == MAXFILES) {
298 		if (!defname || fname[0] == 'z')
299 			errx(EX_DATAERR, "too many files");
300 		++fname[0];
301 		fnum = 0;
302 	}
303 	fpnt[0] = fnum / 26 + 'a';
304 	fpnt[1] = fnum % 26 + 'a';
305 	++fnum;
306 	if (!freopen(fname, "w", stdout))
307 		err(EX_IOERR, "%s", fname);
308 	file_open = 1;
309 }
310 
311 static void
312 usage()
313 {
314 	(void)fprintf(stderr,
315 "usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n");
316 	exit(EX_USAGE);
317 }
318