xref: /illumos-gate/usr/src/cmd/csplit/csplit.c (revision fec047081731fd77caf46ec0471c501b2cb33894)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * Copyright (c) 2018, Joyent, Inc.
31  */
32 
33 /*
34  * csplit - Context or line file splitter
35  * Compile: cc -O -s -o csplit csplit.c
36  */
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <limits.h>
45 #include <regexpr.h>
46 #include <signal.h>
47 #include <locale.h>
48 #include <libintl.h>
49 
50 #define	LAST	0LL
51 #define	ERR	-1
52 #define	FALSE	0
53 #define	TRUE	1
54 #define	EXPMODE	2
55 #define	LINMODE	3
56 #define	LINSIZ	LINE_MAX	/* POSIX.2 - read lines LINE_MAX long */
57 
58 	/* Globals */
59 
60 char linbuf[LINSIZ];		/* Input line buffer */
61 char *expbuf;
62 char tmpbuf[BUFSIZ];		/* Temporary buffer for stdin */
63 char file[8192] = "xx";		/* File name buffer */
64 char *targ;			/* Arg ptr for error messages */
65 char *sptr;
66 FILE *infile, *outfile;		/* I/O file streams */
67 int silent, keep, create;	/* Flags: -s(ilent), -k(eep), (create) */
68 int errflg;
69 int fiwidth = 2;		/* file index width (output file names) */
70 extern int optind;
71 extern char *optarg;
72 offset_t offset;		/* Regular expression offset value */
73 offset_t curline;		/* Current line in input file */
74 
75 /*
76  * These defines are needed for regexp handling(see regexp(7))
77  */
78 #define	PERROR(x)	fatal("%s: Illegal Regular Expression\n", targ);
79 
80 static int asc_to_ll(char *, long long *);
81 static void closefile(void);
82 static void fatal(char *, char *);
83 static offset_t findline(char *, offset_t);
84 static void flush(void);
85 static FILE *getfile(void);
86 static char *getaline(int);
87 static void line_arg(char *);
88 static void num_arg(char *, int);
89 static void re_arg(char *);
90 static void sig(int);
91 static void to_line(offset_t);
92 static void usage(void);
93 
94 int
95 main(int argc, char **argv)
96 {
97 	int ch, mode;
98 	char *ptr;
99 
100 	(void) setlocale(LC_ALL, "");
101 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
102 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
103 #endif
104 	(void) textdomain(TEXT_DOMAIN);
105 
106 	while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
107 		switch (ch) {
108 			case 'f':
109 				(void) strcpy(file, optarg);
110 				if ((ptr = strrchr(optarg, '/')) == NULL)
111 					ptr = optarg;
112 				else
113 					ptr++;
114 
115 				break;
116 			case 'n':		/* POSIX.2 */
117 				for (ptr = optarg; *ptr != '\0'; ptr++)
118 					if (!isdigit((int)*ptr))
119 						fatal("-n num\n", NULL);
120 				fiwidth = atoi(optarg);
121 				break;
122 			case 'k':
123 				keep++;
124 				break;
125 			case 's':
126 				silent++;
127 				break;
128 			case '?':
129 				errflg++;
130 		}
131 	}
132 
133 	argv = &argv[optind];
134 	argc -= optind;
135 	if (argc <= 1 || errflg)
136 		usage();
137 
138 	if (strcmp(*argv, "-") == 0) {
139 		infile = tmpfile();
140 
141 		while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
142 			if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
143 				if (errno == ENOSPC) {
144 					(void) fprintf(stderr, "csplit: ");
145 					(void) fprintf(stderr, gettext(
146 					    "No space left on device\n"));
147 					exit(1);
148 				} else {
149 					(void) fprintf(stderr, "csplit: ");
150 					(void) fprintf(stderr, gettext(
151 					    "Bad write to temporary "
152 					    "file\n"));
153 					exit(1);
154 				}
155 
156 	/* clear the buffer to get correct size when writing buffer */
157 
158 			(void) memset(tmpbuf, '\0', sizeof (tmpbuf));
159 		}
160 		rewind(infile);
161 	} else if ((infile = fopen(*argv, "r")) == NULL)
162 		fatal("Cannot open %s\n", *argv);
163 	++argv;
164 	curline = (offset_t)1;
165 	(void) signal(SIGINT, sig);
166 
167 	/*
168 	 * The following for loop handles the different argument types.
169 	 * A switch is performed on the first character of the argument
170 	 * and each case calls the appropriate argument handling routine.
171 	 */
172 
173 	for (; *argv; ++argv) {
174 		targ = *argv;
175 		switch (**argv) {
176 		case '/':
177 			mode = EXPMODE;
178 			create = TRUE;
179 			re_arg(*argv);
180 			break;
181 		case '%':
182 			mode = EXPMODE;
183 			create = FALSE;
184 			re_arg(*argv);
185 			break;
186 		case '{':
187 			num_arg(*argv, mode);
188 			mode = FALSE;
189 			break;
190 		default:
191 			mode = LINMODE;
192 			create = TRUE;
193 			line_arg(*argv);
194 			break;
195 		}
196 	}
197 	create = TRUE;
198 	to_line(LAST);
199 	return (0);
200 }
201 
202 /*
203  * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
204  * It returns ERR if an illegal character.  The reason that asc_to_ll
205  * does not return an answer(long long) is that any value for the long
206  * long is legal, and this version of asc_to_ll detects error strings.
207  */
208 
209 static int
210 asc_to_ll(char *str, long long *plc)
211 {
212 	int f;
213 	*plc = 0;
214 	f = 0;
215 	for (; ; str++) {
216 		switch (*str) {
217 		case ' ':
218 		case '\t':
219 			continue;
220 		case '-':
221 			f++;
222 			/* FALLTHROUGH */
223 		case '+':
224 			str++;
225 		}
226 		break;
227 	}
228 	for (; *str != '\0'; str++)
229 		if (*str >= '0' && *str <= '9')
230 			*plc = *plc * 10 + *str - '0';
231 		else
232 			return (ERR);
233 	if (f)
234 		*plc = -(*plc);
235 	return (TRUE);	/* not error */
236 }
237 
238 /*
239  * Closefile prints the byte count of the file created,(via fseeko
240  * and ftello), if the create flag is on and the silent flag is not on.
241  * If the create flag is on closefile then closes the file(fclose).
242  */
243 
244 static void
245 closefile()
246 {
247 	if (!silent && create) {
248 		(void) fseeko(outfile, (offset_t)0, SEEK_END);
249 		(void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
250 	}
251 	if (create)
252 		(void) fclose(outfile);
253 }
254 
255 /*
256  * Fatal handles error messages and cleanup.
257  * Because "arg" can be the global file, and the cleanup processing
258  * uses the global file, the error message is printed first.  If the
259  * "keep" flag is not set, fatal unlinks all created files.  If the
260  * "keep" flag is set, fatal closes the current file(if there is one).
261  * Fatal exits with a value of 1.
262  */
263 
264 static void
265 fatal(char *string, char *arg)
266 {
267 	char *fls;
268 	int num;
269 
270 	(void) fprintf(stderr, "csplit: ");
271 
272 	/* gettext dynamically replaces string */
273 
274 	(void) fprintf(stderr, gettext(string), arg);
275 	if (!keep) {
276 		if (outfile) {
277 			(void) fclose(outfile);
278 			for (fls = file; *fls != '\0'; fls++)
279 				continue;
280 			fls -= fiwidth;
281 			for (num = atoi(fls); num >= 0; num--) {
282 				(void) sprintf(fls, "%.*d", fiwidth, num);
283 				(void) unlink(file);
284 			}
285 		}
286 	} else
287 		if (outfile)
288 			closefile();
289 	exit(1);
290 }
291 
292 /*
293  * Findline returns the line number referenced by the current argument.
294  * Its arguments are a pointer to the compiled regular expression(expr),
295  * and an offset(oset).  The variable lncnt is used to count the number
296  * of lines searched.  First the current stream location is saved via
297  * ftello(), and getaline is called so that R.E. searching starts at the
298  * line after the previously referenced line.  The while loop checks
299  * that there are more lines(error if none), bumps the line count, and
300  * checks for the R.E. on each line.  If the R.E. matches on one of the
301  * lines the old stream location is restored, and the line number
302  * referenced by the R.E. and the offset is returned.
303  */
304 
305 static offset_t
306 findline(char *expr, offset_t oset)
307 {
308 	static int benhere = 0;
309 	offset_t lncnt = 0, saveloc;
310 
311 	saveloc = ftello(infile);
312 	if (curline != (offset_t)1 || benhere)	/* If first line, first time, */
313 		(void) getaline(FALSE);		/* then don't skip */
314 	else
315 		lncnt--;
316 	benhere = 1;
317 	while (getaline(FALSE) != NULL) {
318 		lncnt++;
319 		if ((sptr = strrchr(linbuf, '\n')) != NULL)
320 			*sptr = '\0';
321 		if (step(linbuf, expr)) {
322 			(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323 			return (curline+lncnt+oset);
324 		}
325 	}
326 	(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
327 	return (curline+lncnt+oset+2);
328 }
329 
330 /*
331  * Flush uses fputs to put lines on the output file stream(outfile)
332  * Since fputs does its own buffering, flush doesn't need to.
333  * Flush does nothing if the create flag is not set.
334  */
335 
336 static void
337 flush()
338 {
339 	if (create)
340 		(void) fputs(linbuf, outfile);
341 }
342 
343 /*
344  * Getfile does nothing if the create flag is not set.  If the create
345  * flag is set, getfile positions the file pointer(fptr) at the end of
346  * the file name prefix on the first call(fptr=0).  The file counter is
347  * stored in the file name and incremented.  If the subsequent fopen
348  * fails, the file name is copied to tfile for the error message, the
349  * previous file name is restored for cleanup, and fatal is called.  If
350  * the fopen succeeds, the stream(opfil) is returned.
351  */
352 
353 FILE *
354 getfile()
355 {
356 	static char *fptr;
357 	static int ctr;
358 	FILE *opfil;
359 	char tfile[15];
360 	char *delim;
361 	char savedelim;
362 
363 	if (create) {
364 		if (fptr == 0)
365 			for (fptr = file; *fptr != '\0'; fptr++)
366 				continue;
367 		(void) sprintf(fptr, "%.*d", fiwidth, ctr++);
368 
369 		/* check for suffix length overflow */
370 		if (strlen(fptr) > fiwidth) {
371 			fatal("Suffix longer than %ld chars; increase -n\n",
372 			    (char *)fiwidth);
373 		}
374 
375 		/* check for filename length overflow */
376 
377 		delim = strrchr(file, '/');
378 		if (delim == (char *)NULL) {
379 			if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
380 				fatal("Name too long: %s\n", file);
381 			}
382 		} else {
383 			/* truncate file at pathname delim to do pathconf */
384 			savedelim = *delim;
385 			*delim = '\0';
386 			/*
387 			 * file: pppppppp\0fffff\0
388 			 * ..... ^ file
389 			 * ............. ^ delim
390 			 */
391 			if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
392 				fatal("Name too long: %s\n", delim + 1);
393 			}
394 			*delim = savedelim;
395 		}
396 
397 		if ((opfil = fopen(file, "w")) == NULL) {
398 			(void) strlcpy(tfile, file, sizeof (tfile));
399 			(void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
400 			fatal("Cannot create %s\n", tfile);
401 		}
402 		return (opfil);
403 	}
404 	return (NULL);
405 }
406 
407 /*
408  * Getline gets a line via fgets from the input stream "infile".
409  * The line is put into linbuf and may not be larger than LINSIZ.
410  * If getaline is called with a non-zero value, the current line
411  * is bumped, otherwise it is not(for R.E. searching).
412  */
413 
414 static char *
415 getaline(int bumpcur)
416 {
417 	char *ret;
418 	if (bumpcur)
419 		curline++;
420 	ret = fgets(linbuf, LINSIZ, infile);
421 	return (ret);
422 }
423 
424 /*
425  * Line_arg handles line number arguments.
426  * line_arg takes as its argument a pointer to a character string
427  * (assumed to be a line number).  If that character string can be
428  * converted to a number(long long), to_line is called with that number,
429  * otherwise error.
430  */
431 
432 static void
433 line_arg(char *line)
434 {
435 	long long to;
436 
437 	if (asc_to_ll(line, &to) == ERR)
438 		fatal("%s: bad line number\n", line);
439 	to_line(to);
440 }
441 
442 /*
443  * Num_arg handles repeat arguments.
444  * Num_arg copies the numeric argument to "rep" (error if number is
445  * larger than 20 characters or } is left off).  Num_arg then converts
446  * the number and checks for validity.  Next num_arg checks the mode
447  * of the previous argument, and applys the argument the correct number
448  * of times. If the mode is not set properly its an error.
449  */
450 
451 static void
452 num_arg(char *arg, int md)
453 {
454 	offset_t repeat, toline;
455 	char rep[21];
456 	char *ptr;
457 	int		len;
458 
459 	ptr = rep;
460 	for (++arg; *arg != '}'; arg += len) {
461 		if (*arg == '\0')
462 			fatal("%s: missing '}'\n", targ);
463 		if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
464 			len = 1;
465 		if ((ptr + len) >= &rep[20])
466 			fatal("%s: Repeat count too large\n", targ);
467 		(void) memcpy(ptr, arg, len);
468 		ptr += len;
469 	}
470 	*ptr = '\0';
471 	if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
472 		fatal("Illegal repeat count: %s\n", targ);
473 	if (md == LINMODE) {
474 		toline = offset = curline;
475 		for (; repeat > 0LL; repeat--) {
476 			toline += offset;
477 			to_line(toline);
478 		}
479 	} else	if (md == EXPMODE)
480 			for (; repeat > 0LL; repeat--)
481 				to_line(findline(expbuf, offset));
482 		else
483 			fatal("No operation for %s\n", targ);
484 }
485 
486 /*
487  * Re_arg handles regular expression arguments.
488  * Re_arg takes a csplit regular expression argument.  It checks for
489  * delimiter balance, computes any offset, and compiles the regular
490  * expression.  Findline is called with the compiled expression and
491  * offset, and returns the corresponding line number, which is used
492  * as input to the to_line function.
493  */
494 
495 static void
496 re_arg(char *string)
497 {
498 	char *ptr;
499 	char ch;
500 	int		len;
501 
502 	ch = *string;
503 	ptr = string;
504 	ptr++;
505 	while (*ptr != ch) {
506 		if (*ptr == '\\')
507 			++ptr;
508 
509 		if (*ptr == '\0')
510 			fatal("%s: missing delimiter\n", targ);
511 
512 		if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
513 			len = 1;
514 		ptr += len;
515 	}
516 
517 	/*
518 	 * The line below was added because compile no longer supports
519 	 * the fourth argument being passed.  The fourth argument used
520 	 * to be '/' or '%'.
521 	 */
522 
523 	*ptr = '\0';
524 	if (asc_to_ll(++ptr, &offset) == ERR)
525 		fatal("%s: illegal offset\n", string);
526 
527 	/*
528 	 * The line below was added because INIT which did this for us
529 	 * was removed from compile in regexp.h
530 	 */
531 
532 	string++;
533 	expbuf = compile(string, (char *)0, (char *)0);
534 	if (regerrno)
535 		PERROR(regerrno);
536 	to_line(findline(expbuf, offset));
537 }
538 
539 /*
540  * Sig handles breaks.  When a break occurs the signal is reset,
541  * and fatal is called to clean up and print the argument which
542  * was being processed at the time the interrupt occured.
543  */
544 
545 /* ARGSUSED */
546 static void
547 sig(int s)
548 {
549 	(void) signal(SIGINT, sig);
550 	fatal("Interrupt - program aborted at arg '%s'\n", targ);
551 }
552 
553 /*
554  * To_line creates split files.
555  * To_line gets as its argument the line which the current argument
556  * referenced.  To_line calls getfile for a new output stream, which
557  * does nothing if create is False.  If to_line's argument is not LAST
558  * it checks that the current line is not greater than its argument.
559  * While the current line is less than the desired line to_line gets
560  * lines and flushes(error if EOF is reached).
561  * If to_line's argument is LAST, it checks for more lines, and gets
562  * and flushes lines till the end of file.
563  * Finally, to_line calls closefile to close the output stream.
564  */
565 
566 static void
567 to_line(offset_t ln)
568 {
569 	outfile = getfile();
570 	if (ln != LAST) {
571 		if (curline > ln)
572 			fatal("%s - out of range\n", targ);
573 		while (curline < ln) {
574 			if (getaline(TRUE) == NULL)
575 				fatal("%s - out of range\n", targ);
576 			flush();
577 		}
578 	} else		/* last file */
579 		if (getaline(TRUE) != NULL) {
580 			flush();
581 			for (;;) {
582 				if (getaline(TRUE) == NULL)
583 					break;
584 				flush();
585 			}
586 		} else
587 			fatal("%s - out of range\n", targ);
588 	closefile();
589 }
590 
591 static void
592 usage()
593 {
594 	(void) fprintf(stderr, gettext(
595 	    "usage: csplit [-ks] [-f prefix] [-n number] "
596 	    "file arg1 ...argn\n"));
597 	exit(1);
598 }
599