xref: /illumos-gate/usr/src/cmd/csplit/csplit.c (revision 598f4ceed9327d2d6c2325dd67cae3aa06f7fea6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * csplit - Context or line file splitter
31  * Compile: cc -O -s -o csplit csplit.c
32  */
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <regexpr.h>
42 #include <signal.h>
43 #include <locale.h>
44 #include <libintl.h>
45 
46 #define	LAST	0LL
47 #define	ERR	-1
48 #define	FALSE	0
49 #define	TRUE	1
50 #define	EXPMODE	2
51 #define	LINMODE	3
52 #define	LINSIZ	LINE_MAX	/* POSIX.2 - read lines LINE_MAX long */
53 
54 	/* Globals */
55 
56 char linbuf[LINSIZ];		/* Input line buffer */
57 char *expbuf;
58 char tmpbuf[BUFSIZ];		/* Temporary buffer for stdin */
59 char file[8192] = "xx";		/* File name buffer */
60 char *targ;			/* Arg ptr for error messages */
61 char *sptr;
62 FILE *infile, *outfile;		/* I/O file streams */
63 int silent, keep, create;	/* Flags: -s(ilent), -k(eep), (create) */
64 int errflg;
65 int fiwidth = 2;		/* file index width (output file names) */
66 extern int optind;
67 extern char *optarg;
68 offset_t offset;		/* Regular expression offset value */
69 offset_t curline;		/* Current line in input file */
70 
71 /*
72  * These defines are needed for regexp handling(see regexp(7))
73  */
74 #define	PERROR(x)	fatal("%s: Illegal Regular Expression\n", targ);
75 
76 static int asc_to_ll(char *, long long *);
77 static void closefile(void);
78 static void fatal(char *, char *);
79 static offset_t findline(char *, offset_t);
80 static void flush(void);
81 static FILE *getfile(void);
82 static char *getaline(int);
83 static void line_arg(char *);
84 static void num_arg(char *, int);
85 static void re_arg(char *);
86 static void sig(int);
87 static void to_line(offset_t);
88 static void usage(void);
89 
90 int
91 main(int argc, char **argv)
92 {
93 	int ch, mode;
94 	char *ptr;
95 
96 	(void) setlocale(LC_ALL, "");
97 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
98 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
99 #endif
100 	(void) textdomain(TEXT_DOMAIN);
101 
102 	while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
103 		switch (ch) {
104 			case 'f':
105 				(void) strcpy(file, optarg);
106 				if ((ptr = strrchr(optarg, '/')) == NULL)
107 					ptr = optarg;
108 				else
109 					ptr++;
110 
111 				break;
112 			case 'n':		/* POSIX.2 */
113 				for (ptr = optarg; *ptr != NULL; ptr++)
114 					if (!isdigit((int)*ptr))
115 						fatal("-n num\n", NULL);
116 				fiwidth = atoi(optarg);
117 				break;
118 			case 'k':
119 				keep++;
120 				break;
121 			case 's':
122 				silent++;
123 				break;
124 			case '?':
125 				errflg++;
126 		}
127 	}
128 
129 	argv = &argv[optind];
130 	argc -= optind;
131 	if (argc <= 1 || errflg)
132 		usage();
133 
134 	if (strcmp(*argv, "-") == 0) {
135 		infile = tmpfile();
136 
137 		while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
138 			if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
139 				if (errno == ENOSPC) {
140 					(void) fprintf(stderr, "csplit: ");
141 					(void) fprintf(stderr, gettext(
142 					    "No space left on device\n"));
143 					exit(1);
144 				} else {
145 					(void) fprintf(stderr, "csplit: ");
146 					(void) fprintf(stderr, gettext(
147 					    "Bad write to temporary "
148 					    "file\n"));
149 					exit(1);
150 				}
151 
152 	/* clear the buffer to get correct size when writing buffer */
153 
154 			(void) memset(tmpbuf, '\0', sizeof (tmpbuf));
155 		}
156 		rewind(infile);
157 	} else if ((infile = fopen(*argv, "r")) == NULL)
158 		fatal("Cannot open %s\n", *argv);
159 	++argv;
160 	curline = (offset_t)1;
161 	(void) signal(SIGINT, sig);
162 
163 	/*
164 	 * The following for loop handles the different argument types.
165 	 * A switch is performed on the first character of the argument
166 	 * and each case calls the appropriate argument handling routine.
167 	 */
168 
169 	for (; *argv; ++argv) {
170 		targ = *argv;
171 		switch (**argv) {
172 		case '/':
173 			mode = EXPMODE;
174 			create = TRUE;
175 			re_arg(*argv);
176 			break;
177 		case '%':
178 			mode = EXPMODE;
179 			create = FALSE;
180 			re_arg(*argv);
181 			break;
182 		case '{':
183 			num_arg(*argv, mode);
184 			mode = FALSE;
185 			break;
186 		default:
187 			mode = LINMODE;
188 			create = TRUE;
189 			line_arg(*argv);
190 			break;
191 		}
192 	}
193 	create = TRUE;
194 	to_line(LAST);
195 	return (0);
196 }
197 
198 /*
199  * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
200  * It returns ERR if an illegal character.  The reason that asc_to_ll
201  * does not return an answer(long long) is that any value for the long
202  * long is legal, and this version of asc_to_ll detects error strings.
203  */
204 
205 static int
206 asc_to_ll(char *str, long long *plc)
207 {
208 	int f;
209 	*plc = 0;
210 	f = 0;
211 	for (; ; str++) {
212 		switch (*str) {
213 		case ' ':
214 		case '\t':
215 			continue;
216 		case '-':
217 			f++;
218 			/* FALLTHROUGH */
219 		case '+':
220 			str++;
221 		}
222 		break;
223 	}
224 	for (; *str != NULL; str++)
225 		if (*str >= '0' && *str <= '9')
226 			*plc = *plc * 10 + *str - '0';
227 		else
228 			return (ERR);
229 	if (f)
230 		*plc = -(*plc);
231 	return (TRUE);	/* not error */
232 }
233 
234 /*
235  * Closefile prints the byte count of the file created,(via fseeko
236  * and ftello), if the create flag is on and the silent flag is not on.
237  * If the create flag is on closefile then closes the file(fclose).
238  */
239 
240 static void
241 closefile()
242 {
243 	if (!silent && create) {
244 		(void) fseeko(outfile, (offset_t)0, SEEK_END);
245 		(void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
246 	}
247 	if (create)
248 		(void) fclose(outfile);
249 }
250 
251 /*
252  * Fatal handles error messages and cleanup.
253  * Because "arg" can be the global file, and the cleanup processing
254  * uses the global file, the error message is printed first.  If the
255  * "keep" flag is not set, fatal unlinks all created files.  If the
256  * "keep" flag is set, fatal closes the current file(if there is one).
257  * Fatal exits with a value of 1.
258  */
259 
260 static void
261 fatal(char *string, char *arg)
262 {
263 	char *fls;
264 	int num;
265 
266 	(void) fprintf(stderr, "csplit: ");
267 
268 	/* gettext dynamically replaces string */
269 
270 	(void) fprintf(stderr, gettext(string), arg);
271 	if (!keep) {
272 		if (outfile) {
273 			(void) fclose(outfile);
274 			for (fls = file; *fls != '\0'; fls++)
275 				continue;
276 			fls -= fiwidth;
277 			for (num = atoi(fls); num >= 0; num--) {
278 				(void) sprintf(fls, "%.*d", fiwidth, num);
279 				(void) unlink(file);
280 			}
281 		}
282 	} else
283 		if (outfile)
284 			closefile();
285 	exit(1);
286 }
287 
288 /*
289  * Findline returns the line number referenced by the current argument.
290  * Its arguments are a pointer to the compiled regular expression(expr),
291  * and an offset(oset).  The variable lncnt is used to count the number
292  * of lines searched.  First the current stream location is saved via
293  * ftello(), and getaline is called so that R.E. searching starts at the
294  * line after the previously referenced line.  The while loop checks
295  * that there are more lines(error if none), bumps the line count, and
296  * checks for the R.E. on each line.  If the R.E. matches on one of the
297  * lines the old stream location is restored, and the line number
298  * referenced by the R.E. and the offset is returned.
299  */
300 
301 static offset_t
302 findline(char *expr, offset_t oset)
303 {
304 	static int benhere = 0;
305 	offset_t lncnt = 0, saveloc;
306 
307 	saveloc = ftello(infile);
308 	if (curline != (offset_t)1 || benhere)	/* If first line, first time, */
309 		(void) getaline(FALSE);		/* then don't skip */
310 	else
311 		lncnt--;
312 	benhere = 1;
313 	while (getaline(FALSE) != NULL) {
314 		lncnt++;
315 		if ((sptr = strrchr(linbuf, '\n')) != NULL)
316 			*sptr = '\0';
317 		if (step(linbuf, expr)) {
318 			(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
319 			return (curline+lncnt+oset);
320 		}
321 	}
322 	(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323 	return (curline+lncnt+oset+2);
324 }
325 
326 /*
327  * Flush uses fputs to put lines on the output file stream(outfile)
328  * Since fputs does its own buffering, flush doesn't need to.
329  * Flush does nothing if the create flag is not set.
330  */
331 
332 static void
333 flush()
334 {
335 	if (create)
336 		(void) fputs(linbuf, outfile);
337 }
338 
339 /*
340  * Getfile does nothing if the create flag is not set.  If the create
341  * flag is set, getfile positions the file pointer(fptr) at the end of
342  * the file name prefix on the first call(fptr=0).  The file counter is
343  * stored in the file name and incremented.  If the subsequent fopen
344  * fails, the file name is copied to tfile for the error message, the
345  * previous file name is restored for cleanup, and fatal is called.  If
346  * the fopen succeeds, the stream(opfil) is returned.
347  */
348 
349 FILE *
350 getfile()
351 {
352 	static char *fptr;
353 	static int ctr;
354 	FILE *opfil;
355 	char tfile[15];
356 	char *delim;
357 	char savedelim;
358 
359 	if (create) {
360 		if (fptr == 0)
361 			for (fptr = file; *fptr != NULL; fptr++)
362 				continue;
363 		(void) sprintf(fptr, "%.*d", fiwidth, ctr++);
364 
365 		/* check for suffix length overflow */
366 		if (strlen(fptr) > fiwidth) {
367 			fatal("Suffix longer than %ld chars; increase -n\n",
368 			    (char *)fiwidth);
369 		}
370 
371 		/* check for filename length overflow */
372 
373 		delim = strrchr(file, '/');
374 		if (delim == (char *)NULL) {
375 			if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
376 				fatal("Name too long: %s\n", file);
377 			}
378 		} else {
379 			/* truncate file at pathname delim to do pathconf */
380 			savedelim = *delim;
381 			*delim = '\0';
382 			/*
383 			 * file: pppppppp\0fffff\0
384 			 * ..... ^ file
385 			 * ............. ^ delim
386 			 */
387 			if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
388 				fatal("Name too long: %s\n", delim + 1);
389 			}
390 			*delim = savedelim;
391 		}
392 
393 		if ((opfil = fopen(file, "w")) == NULL) {
394 			(void) strcpy(tfile, file);
395 			(void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
396 			fatal("Cannot create %s\n", tfile);
397 		}
398 		return (opfil);
399 	}
400 	return (NULL);
401 }
402 
403 /*
404  * Getline gets a line via fgets from the input stream "infile".
405  * The line is put into linbuf and may not be larger than LINSIZ.
406  * If getaline is called with a non-zero value, the current line
407  * is bumped, otherwise it is not(for R.E. searching).
408  */
409 
410 static char *
411 getaline(int bumpcur)
412 {
413 	char *ret;
414 	if (bumpcur)
415 		curline++;
416 	ret = fgets(linbuf, LINSIZ, infile);
417 	return (ret);
418 }
419 
420 /*
421  * Line_arg handles line number arguments.
422  * line_arg takes as its argument a pointer to a character string
423  * (assumed to be a line number).  If that character string can be
424  * converted to a number(long long), to_line is called with that number,
425  * otherwise error.
426  */
427 
428 static void
429 line_arg(char *line)
430 {
431 	long long to;
432 
433 	if (asc_to_ll(line, &to) == ERR)
434 		fatal("%s: bad line number\n", line);
435 	to_line(to);
436 }
437 
438 /*
439  * Num_arg handles repeat arguments.
440  * Num_arg copies the numeric argument to "rep" (error if number is
441  * larger than 20 characters or } is left off).  Num_arg then converts
442  * the number and checks for validity.  Next num_arg checks the mode
443  * of the previous argument, and applys the argument the correct number
444  * of times. If the mode is not set properly its an error.
445  */
446 
447 static void
448 num_arg(char *arg, int md)
449 {
450 	offset_t repeat, toline;
451 	char rep[21];
452 	char *ptr;
453 	int		len;
454 
455 	ptr = rep;
456 	for (++arg; *arg != '}'; arg += len) {
457 		if (*arg == NULL)
458 			fatal("%s: missing '}'\n", targ);
459 		if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
460 			len = 1;
461 		if ((ptr + len) >= &rep[20])
462 			fatal("%s: Repeat count too large\n", targ);
463 		(void) memcpy(ptr, arg, len);
464 		ptr += len;
465 	}
466 	*ptr = NULL;
467 	if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
468 		fatal("Illegal repeat count: %s\n", targ);
469 	if (md == LINMODE) {
470 		toline = offset = curline;
471 		for (; repeat > 0LL; repeat--) {
472 			toline += offset;
473 			to_line(toline);
474 		}
475 	} else	if (md == EXPMODE)
476 			for (; repeat > 0LL; repeat--)
477 				to_line(findline(expbuf, offset));
478 		else
479 			fatal("No operation for %s\n", targ);
480 }
481 
482 /*
483  * Re_arg handles regular expression arguments.
484  * Re_arg takes a csplit regular expression argument.  It checks for
485  * delimiter balance, computes any offset, and compiles the regular
486  * expression.  Findline is called with the compiled expression and
487  * offset, and returns the corresponding line number, which is used
488  * as input to the to_line function.
489  */
490 
491 static void
492 re_arg(char *string)
493 {
494 	char *ptr;
495 	char ch;
496 	int		len;
497 
498 	ch = *string;
499 	ptr = string;
500 	ptr++;
501 	while (*ptr != ch) {
502 		if (*ptr == '\\')
503 			++ptr;
504 
505 		if (*ptr == NULL)
506 			fatal("%s: missing delimiter\n", targ);
507 
508 		if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
509 			len = 1;
510 		ptr += len;
511 	}
512 
513 	/*
514 	 * The line below was added because compile no longer supports
515 	 * the fourth argument being passed.  The fourth argument used
516 	 * to be '/' or '%'.
517 	 */
518 
519 	*ptr = NULL;
520 	if (asc_to_ll(++ptr, &offset) == ERR)
521 		fatal("%s: illegal offset\n", string);
522 
523 	/*
524 	 * The line below was added because INIT which did this for us
525 	 * was removed from compile in regexp.h
526 	 */
527 
528 	string++;
529 	expbuf = compile(string, (char *)0, (char *)0);
530 	if (regerrno)
531 		PERROR(regerrno);
532 	to_line(findline(expbuf, offset));
533 }
534 
535 /*
536  * Sig handles breaks.  When a break occurs the signal is reset,
537  * and fatal is called to clean up and print the argument which
538  * was being processed at the time the interrupt occured.
539  */
540 
541 /* ARGSUSED */
542 static void
543 sig(int s)
544 {
545 	(void) signal(SIGINT, sig);
546 	fatal("Interrupt - program aborted at arg '%s'\n", targ);
547 }
548 
549 /*
550  * To_line creates split files.
551  * To_line gets as its argument the line which the current argument
552  * referenced.  To_line calls getfile for a new output stream, which
553  * does nothing if create is False.  If to_line's argument is not LAST
554  * it checks that the current line is not greater than its argument.
555  * While the current line is less than the desired line to_line gets
556  * lines and flushes(error if EOF is reached).
557  * If to_line's argument is LAST, it checks for more lines, and gets
558  * and flushes lines till the end of file.
559  * Finally, to_line calls closefile to close the output stream.
560  */
561 
562 static void
563 to_line(offset_t ln)
564 {
565 	outfile = getfile();
566 	if (ln != LAST) {
567 		if (curline > ln)
568 			fatal("%s - out of range\n", targ);
569 		while (curline < ln) {
570 			if (getaline(TRUE) == NULL)
571 				fatal("%s - out of range\n", targ);
572 			flush();
573 		}
574 	} else		/* last file */
575 		if (getaline(TRUE) != NULL) {
576 			flush();
577 			for (;;) {
578 				if (getaline(TRUE) == NULL)
579 					break;
580 				flush();
581 			}
582 		} else
583 			fatal("%s - out of range\n", targ);
584 	closefile();
585 }
586 
587 static void
588 usage()
589 {
590 	(void) fprintf(stderr, gettext(
591 	    "usage: csplit [-ks] [-f prefix] [-n number] "
592 	    "file arg1 ...argn\n"));
593 	exit(1);
594 }
595