1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * Copyright (c) 2018, Joyent, Inc.
31 */
32
33 /*
34 * csplit - Context or line file splitter
35 * Compile: cc -O -s -o csplit csplit.c
36 */
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <limits.h>
45 #include <regexpr.h>
46 #include <signal.h>
47 #include <locale.h>
48 #include <libintl.h>
49
50 #define LAST 0LL
51 #define ERR -1
52 #define FALSE 0
53 #define TRUE 1
54 #define EXPMODE 2
55 #define LINMODE 3
56 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */
57
58 /* Globals */
59
60 char linbuf[LINSIZ]; /* Input line buffer */
61 char *expbuf;
62 char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */
63 char file[8192] = "xx"; /* File name buffer */
64 char *targ; /* Arg ptr for error messages */
65 char *sptr;
66 FILE *infile, *outfile; /* I/O file streams */
67 int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */
68 int errflg;
69 int fiwidth = 2; /* file index width (output file names) */
70 extern int optind;
71 extern char *optarg;
72 offset_t offset; /* Regular expression offset value */
73 offset_t curline; /* Current line in input file */
74
75 /*
76 * These defines are needed for regexp handling(see regexp(7))
77 */
78 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ);
79
80 static int asc_to_ll(char *, long long *);
81 static void closefile(void);
82 static void fatal(char *, char *);
83 static offset_t findline(char *, offset_t);
84 static void flush(void);
85 static FILE *getfile(void);
86 static char *getaline(int);
87 static void line_arg(char *);
88 static void num_arg(char *, int);
89 static void re_arg(char *);
90 static void sig(int);
91 static void to_line(offset_t);
92 static void usage(void);
93
94 int
main(int argc,char ** argv)95 main(int argc, char **argv)
96 {
97 int ch, mode;
98 char *ptr;
99
100 (void) setlocale(LC_ALL, "");
101 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
102 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
103 #endif
104 (void) textdomain(TEXT_DOMAIN);
105
106 while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
107 switch (ch) {
108 case 'f':
109 (void) strcpy(file, optarg);
110 if ((ptr = strrchr(optarg, '/')) == NULL)
111 ptr = optarg;
112 else
113 ptr++;
114
115 break;
116 case 'n': /* POSIX.2 */
117 for (ptr = optarg; *ptr != '\0'; ptr++)
118 if (!isdigit((int)*ptr))
119 fatal("-n num\n", NULL);
120 fiwidth = atoi(optarg);
121 break;
122 case 'k':
123 keep++;
124 break;
125 case 's':
126 silent++;
127 break;
128 case '?':
129 errflg++;
130 }
131 }
132
133 argv = &argv[optind];
134 argc -= optind;
135 if (argc <= 1 || errflg)
136 usage();
137
138 if (strcmp(*argv, "-") == 0) {
139 infile = tmpfile();
140
141 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
142 if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
143 if (errno == ENOSPC) {
144 (void) fprintf(stderr, "csplit: ");
145 (void) fprintf(stderr, gettext(
146 "No space left on device\n"));
147 exit(1);
148 } else {
149 (void) fprintf(stderr, "csplit: ");
150 (void) fprintf(stderr, gettext(
151 "Bad write to temporary "
152 "file\n"));
153 exit(1);
154 }
155
156 /* clear the buffer to get correct size when writing buffer */
157
158 (void) memset(tmpbuf, '\0', sizeof (tmpbuf));
159 }
160 rewind(infile);
161 } else if ((infile = fopen(*argv, "r")) == NULL)
162 fatal("Cannot open %s\n", *argv);
163 ++argv;
164 curline = (offset_t)1;
165 (void) signal(SIGINT, sig);
166
167 /*
168 * The following for loop handles the different argument types.
169 * A switch is performed on the first character of the argument
170 * and each case calls the appropriate argument handling routine.
171 */
172
173 for (; *argv; ++argv) {
174 targ = *argv;
175 switch (**argv) {
176 case '/':
177 mode = EXPMODE;
178 create = TRUE;
179 re_arg(*argv);
180 break;
181 case '%':
182 mode = EXPMODE;
183 create = FALSE;
184 re_arg(*argv);
185 break;
186 case '{':
187 num_arg(*argv, mode);
188 mode = FALSE;
189 break;
190 default:
191 mode = LINMODE;
192 create = TRUE;
193 line_arg(*argv);
194 break;
195 }
196 }
197 create = TRUE;
198 to_line(LAST);
199 return (0);
200 }
201
202 /*
203 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
204 * It returns ERR if an illegal character. The reason that asc_to_ll
205 * does not return an answer(long long) is that any value for the long
206 * long is legal, and this version of asc_to_ll detects error strings.
207 */
208
209 static int
asc_to_ll(char * str,long long * plc)210 asc_to_ll(char *str, long long *plc)
211 {
212 int f;
213 *plc = 0;
214 f = 0;
215 for (; ; str++) {
216 switch (*str) {
217 case ' ':
218 case '\t':
219 continue;
220 case '-':
221 f++;
222 /* FALLTHROUGH */
223 case '+':
224 str++;
225 }
226 break;
227 }
228 for (; *str != '\0'; str++)
229 if (*str >= '0' && *str <= '9')
230 *plc = *plc * 10 + *str - '0';
231 else
232 return (ERR);
233 if (f)
234 *plc = -(*plc);
235 return (TRUE); /* not error */
236 }
237
238 /*
239 * Closefile prints the byte count of the file created,(via fseeko
240 * and ftello), if the create flag is on and the silent flag is not on.
241 * If the create flag is on closefile then closes the file(fclose).
242 */
243
244 static void
closefile()245 closefile()
246 {
247 if (!silent && create) {
248 (void) fseeko(outfile, (offset_t)0, SEEK_END);
249 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
250 }
251 if (create)
252 (void) fclose(outfile);
253 }
254
255 /*
256 * Fatal handles error messages and cleanup.
257 * Because "arg" can be the global file, and the cleanup processing
258 * uses the global file, the error message is printed first. If the
259 * "keep" flag is not set, fatal unlinks all created files. If the
260 * "keep" flag is set, fatal closes the current file(if there is one).
261 * Fatal exits with a value of 1.
262 */
263
264 static void
fatal(char * string,char * arg)265 fatal(char *string, char *arg)
266 {
267 char *fls;
268 int num;
269
270 (void) fprintf(stderr, "csplit: ");
271
272 /* gettext dynamically replaces string */
273
274 (void) fprintf(stderr, gettext(string), arg);
275 if (!keep) {
276 if (outfile) {
277 (void) fclose(outfile);
278 for (fls = file; *fls != '\0'; fls++)
279 continue;
280 fls -= fiwidth;
281 for (num = atoi(fls); num >= 0; num--) {
282 (void) sprintf(fls, "%.*d", fiwidth, num);
283 (void) unlink(file);
284 }
285 }
286 } else
287 if (outfile)
288 closefile();
289 exit(1);
290 }
291
292 /*
293 * Findline returns the line number referenced by the current argument.
294 * Its arguments are a pointer to the compiled regular expression(expr),
295 * and an offset(oset). The variable lncnt is used to count the number
296 * of lines searched. First the current stream location is saved via
297 * ftello(), and getaline is called so that R.E. searching starts at the
298 * line after the previously referenced line. The while loop checks
299 * that there are more lines(error if none), bumps the line count, and
300 * checks for the R.E. on each line. If the R.E. matches on one of the
301 * lines the old stream location is restored, and the line number
302 * referenced by the R.E. and the offset is returned.
303 */
304
305 static offset_t
findline(char * expr,offset_t oset)306 findline(char *expr, offset_t oset)
307 {
308 static int benhere = 0;
309 offset_t lncnt = 0, saveloc;
310
311 saveloc = ftello(infile);
312 if (curline != (offset_t)1 || benhere) /* If first line, first time, */
313 (void) getaline(FALSE); /* then don't skip */
314 else
315 lncnt--;
316 benhere = 1;
317 while (getaline(FALSE) != NULL) {
318 lncnt++;
319 if ((sptr = strrchr(linbuf, '\n')) != NULL)
320 *sptr = '\0';
321 if (step(linbuf, expr)) {
322 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323 return (curline+lncnt+oset);
324 }
325 }
326 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
327 return (curline+lncnt+oset+2);
328 }
329
330 /*
331 * Flush uses fputs to put lines on the output file stream(outfile)
332 * Since fputs does its own buffering, flush doesn't need to.
333 * Flush does nothing if the create flag is not set.
334 */
335
336 static void
flush()337 flush()
338 {
339 if (create)
340 (void) fputs(linbuf, outfile);
341 }
342
343 /*
344 * Getfile does nothing if the create flag is not set. If the create
345 * flag is set, getfile positions the file pointer(fptr) at the end of
346 * the file name prefix on the first call(fptr=0). The file counter is
347 * stored in the file name and incremented. If the subsequent fopen
348 * fails, the file name is copied to tfile for the error message, the
349 * previous file name is restored for cleanup, and fatal is called. If
350 * the fopen succeeds, the stream(opfil) is returned.
351 */
352
353 FILE *
getfile()354 getfile()
355 {
356 static char *fptr;
357 static int ctr;
358 FILE *opfil;
359 char tfile[15];
360 char *delim;
361 char savedelim;
362
363 if (create) {
364 if (fptr == 0)
365 for (fptr = file; *fptr != '\0'; fptr++)
366 continue;
367 (void) sprintf(fptr, "%.*d", fiwidth, ctr++);
368
369 /* check for suffix length overflow */
370 if (strlen(fptr) > fiwidth) {
371 fatal("Suffix longer than %ld chars; increase -n\n",
372 (char *)fiwidth);
373 }
374
375 /* check for filename length overflow */
376
377 delim = strrchr(file, '/');
378 if (delim == (char *)NULL) {
379 if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
380 fatal("Name too long: %s\n", file);
381 }
382 } else {
383 /* truncate file at pathname delim to do pathconf */
384 savedelim = *delim;
385 *delim = '\0';
386 /*
387 * file: pppppppp\0fffff\0
388 * ..... ^ file
389 * ............. ^ delim
390 */
391 if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
392 fatal("Name too long: %s\n", delim + 1);
393 }
394 *delim = savedelim;
395 }
396
397 if ((opfil = fopen(file, "w")) == NULL) {
398 (void) strlcpy(tfile, file, sizeof (tfile));
399 (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
400 fatal("Cannot create %s\n", tfile);
401 }
402 return (opfil);
403 }
404 return (NULL);
405 }
406
407 /*
408 * Getline gets a line via fgets from the input stream "infile".
409 * The line is put into linbuf and may not be larger than LINSIZ.
410 * If getaline is called with a non-zero value, the current line
411 * is bumped, otherwise it is not(for R.E. searching).
412 */
413
414 static char *
getaline(int bumpcur)415 getaline(int bumpcur)
416 {
417 char *ret;
418 if (bumpcur)
419 curline++;
420 ret = fgets(linbuf, LINSIZ, infile);
421 return (ret);
422 }
423
424 /*
425 * Line_arg handles line number arguments.
426 * line_arg takes as its argument a pointer to a character string
427 * (assumed to be a line number). If that character string can be
428 * converted to a number(long long), to_line is called with that number,
429 * otherwise error.
430 */
431
432 static void
line_arg(char * line)433 line_arg(char *line)
434 {
435 long long to;
436
437 if (asc_to_ll(line, &to) == ERR)
438 fatal("%s: bad line number\n", line);
439 to_line(to);
440 }
441
442 /*
443 * Num_arg handles repeat arguments.
444 * Num_arg copies the numeric argument to "rep" (error if number is
445 * larger than 20 characters or } is left off). Num_arg then converts
446 * the number and checks for validity. Next num_arg checks the mode
447 * of the previous argument, and applys the argument the correct number
448 * of times. If the mode is not set properly its an error.
449 */
450
451 static void
num_arg(char * arg,int md)452 num_arg(char *arg, int md)
453 {
454 offset_t repeat, toline;
455 char rep[21];
456 char *ptr;
457 int len;
458
459 ptr = rep;
460 for (++arg; *arg != '}'; arg += len) {
461 if (*arg == '\0')
462 fatal("%s: missing '}'\n", targ);
463 if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
464 len = 1;
465 if ((ptr + len) >= &rep[20])
466 fatal("%s: Repeat count too large\n", targ);
467 (void) memcpy(ptr, arg, len);
468 ptr += len;
469 }
470 *ptr = '\0';
471 if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
472 fatal("Illegal repeat count: %s\n", targ);
473 if (md == LINMODE) {
474 toline = offset = curline;
475 for (; repeat > 0LL; repeat--) {
476 toline += offset;
477 to_line(toline);
478 }
479 } else if (md == EXPMODE)
480 for (; repeat > 0LL; repeat--)
481 to_line(findline(expbuf, offset));
482 else
483 fatal("No operation for %s\n", targ);
484 }
485
486 /*
487 * Re_arg handles regular expression arguments.
488 * Re_arg takes a csplit regular expression argument. It checks for
489 * delimiter balance, computes any offset, and compiles the regular
490 * expression. Findline is called with the compiled expression and
491 * offset, and returns the corresponding line number, which is used
492 * as input to the to_line function.
493 */
494
495 static void
re_arg(char * string)496 re_arg(char *string)
497 {
498 char *ptr;
499 char ch;
500 int len;
501
502 ch = *string;
503 ptr = string;
504 ptr++;
505 while (*ptr != ch) {
506 if (*ptr == '\\')
507 ++ptr;
508
509 if (*ptr == '\0')
510 fatal("%s: missing delimiter\n", targ);
511
512 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
513 len = 1;
514 ptr += len;
515 }
516
517 /*
518 * The line below was added because compile no longer supports
519 * the fourth argument being passed. The fourth argument used
520 * to be '/' or '%'.
521 */
522
523 *ptr = '\0';
524 if (asc_to_ll(++ptr, &offset) == ERR)
525 fatal("%s: illegal offset\n", string);
526
527 /*
528 * The line below was added because INIT which did this for us
529 * was removed from compile in regexp.h
530 */
531
532 string++;
533 expbuf = compile(string, (char *)0, (char *)0);
534 if (regerrno)
535 PERROR(regerrno);
536 to_line(findline(expbuf, offset));
537 }
538
539 /*
540 * Sig handles breaks. When a break occurs the signal is reset,
541 * and fatal is called to clean up and print the argument which
542 * was being processed at the time the interrupt occured.
543 */
544
545 /* ARGSUSED */
546 static void
sig(int s)547 sig(int s)
548 {
549 (void) signal(SIGINT, sig);
550 fatal("Interrupt - program aborted at arg '%s'\n", targ);
551 }
552
553 /*
554 * To_line creates split files.
555 * To_line gets as its argument the line which the current argument
556 * referenced. To_line calls getfile for a new output stream, which
557 * does nothing if create is False. If to_line's argument is not LAST
558 * it checks that the current line is not greater than its argument.
559 * While the current line is less than the desired line to_line gets
560 * lines and flushes(error if EOF is reached).
561 * If to_line's argument is LAST, it checks for more lines, and gets
562 * and flushes lines till the end of file.
563 * Finally, to_line calls closefile to close the output stream.
564 */
565
566 static void
to_line(offset_t ln)567 to_line(offset_t ln)
568 {
569 outfile = getfile();
570 if (ln != LAST) {
571 if (curline > ln)
572 fatal("%s - out of range\n", targ);
573 while (curline < ln) {
574 if (getaline(TRUE) == NULL)
575 fatal("%s - out of range\n", targ);
576 flush();
577 }
578 } else /* last file */
579 if (getaline(TRUE) != NULL) {
580 flush();
581 for (;;) {
582 if (getaline(TRUE) == NULL)
583 break;
584 flush();
585 }
586 } else
587 fatal("%s - out of range\n", targ);
588 closefile();
589 }
590
591 static void
usage()592 usage()
593 {
594 (void) fprintf(stderr, gettext(
595 "usage: csplit [-ks] [-f prefix] [-n number] "
596 "file arg1 ...argn\n"));
597 exit(1);
598 }
599