1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * csplit - Context or line file splitter
31 * Compile: cc -O -s -o csplit csplit.c
32 */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <regexpr.h>
42 #include <signal.h>
43 #include <locale.h>
44 #include <libintl.h>
45
46 #define LAST 0LL
47 #define ERR -1
48 #define FALSE 0
49 #define TRUE 1
50 #define EXPMODE 2
51 #define LINMODE 3
52 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */
53
54 /* Globals */
55
56 char linbuf[LINSIZ]; /* Input line buffer */
57 char *expbuf;
58 char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */
59 char file[8192] = "xx"; /* File name buffer */
60 char *targ; /* Arg ptr for error messages */
61 char *sptr;
62 FILE *infile, *outfile; /* I/O file streams */
63 int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */
64 int errflg;
65 int fiwidth = 2; /* file index width (output file names) */
66 extern int optind;
67 extern char *optarg;
68 offset_t offset; /* Regular expression offset value */
69 offset_t curline; /* Current line in input file */
70
71 /*
72 * These defines are needed for regexp handling(see regexp(7))
73 */
74 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ);
75
76 static int asc_to_ll(char *, long long *);
77 static void closefile(void);
78 static void fatal(char *, char *);
79 static offset_t findline(char *, offset_t);
80 static void flush(void);
81 static FILE *getfile(void);
82 static char *getaline(int);
83 static void line_arg(char *);
84 static void num_arg(char *, int);
85 static void re_arg(char *);
86 static void sig(int);
87 static void to_line(offset_t);
88 static void usage(void);
89
90 int
main(int argc,char ** argv)91 main(int argc, char **argv)
92 {
93 int ch, mode;
94 char *ptr;
95
96 (void) setlocale(LC_ALL, "");
97 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
98 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
99 #endif
100 (void) textdomain(TEXT_DOMAIN);
101
102 while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
103 switch (ch) {
104 case 'f':
105 (void) strcpy(file, optarg);
106 if ((ptr = strrchr(optarg, '/')) == NULL)
107 ptr = optarg;
108 else
109 ptr++;
110
111 break;
112 case 'n': /* POSIX.2 */
113 for (ptr = optarg; *ptr != NULL; ptr++)
114 if (!isdigit((int)*ptr))
115 fatal("-n num\n", NULL);
116 fiwidth = atoi(optarg);
117 break;
118 case 'k':
119 keep++;
120 break;
121 case 's':
122 silent++;
123 break;
124 case '?':
125 errflg++;
126 }
127 }
128
129 argv = &argv[optind];
130 argc -= optind;
131 if (argc <= 1 || errflg)
132 usage();
133
134 if (strcmp(*argv, "-") == 0) {
135 infile = tmpfile();
136
137 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
138 if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
139 if (errno == ENOSPC) {
140 (void) fprintf(stderr, "csplit: ");
141 (void) fprintf(stderr, gettext(
142 "No space left on device\n"));
143 exit(1);
144 } else {
145 (void) fprintf(stderr, "csplit: ");
146 (void) fprintf(stderr, gettext(
147 "Bad write to temporary "
148 "file\n"));
149 exit(1);
150 }
151
152 /* clear the buffer to get correct size when writing buffer */
153
154 (void) memset(tmpbuf, '\0', sizeof (tmpbuf));
155 }
156 rewind(infile);
157 } else if ((infile = fopen(*argv, "r")) == NULL)
158 fatal("Cannot open %s\n", *argv);
159 ++argv;
160 curline = (offset_t)1;
161 (void) signal(SIGINT, sig);
162
163 /*
164 * The following for loop handles the different argument types.
165 * A switch is performed on the first character of the argument
166 * and each case calls the appropriate argument handling routine.
167 */
168
169 for (; *argv; ++argv) {
170 targ = *argv;
171 switch (**argv) {
172 case '/':
173 mode = EXPMODE;
174 create = TRUE;
175 re_arg(*argv);
176 break;
177 case '%':
178 mode = EXPMODE;
179 create = FALSE;
180 re_arg(*argv);
181 break;
182 case '{':
183 num_arg(*argv, mode);
184 mode = FALSE;
185 break;
186 default:
187 mode = LINMODE;
188 create = TRUE;
189 line_arg(*argv);
190 break;
191 }
192 }
193 create = TRUE;
194 to_line(LAST);
195 return (0);
196 }
197
198 /*
199 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
200 * It returns ERR if an illegal character. The reason that asc_to_ll
201 * does not return an answer(long long) is that any value for the long
202 * long is legal, and this version of asc_to_ll detects error strings.
203 */
204
205 static int
asc_to_ll(char * str,long long * plc)206 asc_to_ll(char *str, long long *plc)
207 {
208 int f;
209 *plc = 0;
210 f = 0;
211 for (; ; str++) {
212 switch (*str) {
213 case ' ':
214 case '\t':
215 continue;
216 case '-':
217 f++;
218 /* FALLTHROUGH */
219 case '+':
220 str++;
221 }
222 break;
223 }
224 for (; *str != NULL; str++)
225 if (*str >= '0' && *str <= '9')
226 *plc = *plc * 10 + *str - '0';
227 else
228 return (ERR);
229 if (f)
230 *plc = -(*plc);
231 return (TRUE); /* not error */
232 }
233
234 /*
235 * Closefile prints the byte count of the file created,(via fseeko
236 * and ftello), if the create flag is on and the silent flag is not on.
237 * If the create flag is on closefile then closes the file(fclose).
238 */
239
240 static void
closefile()241 closefile()
242 {
243 if (!silent && create) {
244 (void) fseeko(outfile, (offset_t)0, SEEK_END);
245 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
246 }
247 if (create)
248 (void) fclose(outfile);
249 }
250
251 /*
252 * Fatal handles error messages and cleanup.
253 * Because "arg" can be the global file, and the cleanup processing
254 * uses the global file, the error message is printed first. If the
255 * "keep" flag is not set, fatal unlinks all created files. If the
256 * "keep" flag is set, fatal closes the current file(if there is one).
257 * Fatal exits with a value of 1.
258 */
259
260 static void
fatal(char * string,char * arg)261 fatal(char *string, char *arg)
262 {
263 char *fls;
264 int num;
265
266 (void) fprintf(stderr, "csplit: ");
267
268 /* gettext dynamically replaces string */
269
270 (void) fprintf(stderr, gettext(string), arg);
271 if (!keep) {
272 if (outfile) {
273 (void) fclose(outfile);
274 for (fls = file; *fls != '\0'; fls++)
275 continue;
276 fls -= fiwidth;
277 for (num = atoi(fls); num >= 0; num--) {
278 (void) sprintf(fls, "%.*d", fiwidth, num);
279 (void) unlink(file);
280 }
281 }
282 } else
283 if (outfile)
284 closefile();
285 exit(1);
286 }
287
288 /*
289 * Findline returns the line number referenced by the current argument.
290 * Its arguments are a pointer to the compiled regular expression(expr),
291 * and an offset(oset). The variable lncnt is used to count the number
292 * of lines searched. First the current stream location is saved via
293 * ftello(), and getaline is called so that R.E. searching starts at the
294 * line after the previously referenced line. The while loop checks
295 * that there are more lines(error if none), bumps the line count, and
296 * checks for the R.E. on each line. If the R.E. matches on one of the
297 * lines the old stream location is restored, and the line number
298 * referenced by the R.E. and the offset is returned.
299 */
300
301 static offset_t
findline(char * expr,offset_t oset)302 findline(char *expr, offset_t oset)
303 {
304 static int benhere = 0;
305 offset_t lncnt = 0, saveloc;
306
307 saveloc = ftello(infile);
308 if (curline != (offset_t)1 || benhere) /* If first line, first time, */
309 (void) getaline(FALSE); /* then don't skip */
310 else
311 lncnt--;
312 benhere = 1;
313 while (getaline(FALSE) != NULL) {
314 lncnt++;
315 if ((sptr = strrchr(linbuf, '\n')) != NULL)
316 *sptr = '\0';
317 if (step(linbuf, expr)) {
318 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
319 return (curline+lncnt+oset);
320 }
321 }
322 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
323 return (curline+lncnt+oset+2);
324 }
325
326 /*
327 * Flush uses fputs to put lines on the output file stream(outfile)
328 * Since fputs does its own buffering, flush doesn't need to.
329 * Flush does nothing if the create flag is not set.
330 */
331
332 static void
flush()333 flush()
334 {
335 if (create)
336 (void) fputs(linbuf, outfile);
337 }
338
339 /*
340 * Getfile does nothing if the create flag is not set. If the create
341 * flag is set, getfile positions the file pointer(fptr) at the end of
342 * the file name prefix on the first call(fptr=0). The file counter is
343 * stored in the file name and incremented. If the subsequent fopen
344 * fails, the file name is copied to tfile for the error message, the
345 * previous file name is restored for cleanup, and fatal is called. If
346 * the fopen succeeds, the stream(opfil) is returned.
347 */
348
349 FILE *
getfile()350 getfile()
351 {
352 static char *fptr;
353 static int ctr;
354 FILE *opfil;
355 char tfile[15];
356 char *delim;
357 char savedelim;
358
359 if (create) {
360 if (fptr == 0)
361 for (fptr = file; *fptr != NULL; fptr++)
362 continue;
363 (void) sprintf(fptr, "%.*d", fiwidth, ctr++);
364
365 /* check for suffix length overflow */
366 if (strlen(fptr) > fiwidth) {
367 fatal("Suffix longer than %ld chars; increase -n\n",
368 (char *)fiwidth);
369 }
370
371 /* check for filename length overflow */
372
373 delim = strrchr(file, '/');
374 if (delim == (char *)NULL) {
375 if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
376 fatal("Name too long: %s\n", file);
377 }
378 } else {
379 /* truncate file at pathname delim to do pathconf */
380 savedelim = *delim;
381 *delim = '\0';
382 /*
383 * file: pppppppp\0fffff\0
384 * ..... ^ file
385 * ............. ^ delim
386 */
387 if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
388 fatal("Name too long: %s\n", delim + 1);
389 }
390 *delim = savedelim;
391 }
392
393 if ((opfil = fopen(file, "w")) == NULL) {
394 (void) strcpy(tfile, file);
395 (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
396 fatal("Cannot create %s\n", tfile);
397 }
398 return (opfil);
399 }
400 return (NULL);
401 }
402
403 /*
404 * Getline gets a line via fgets from the input stream "infile".
405 * The line is put into linbuf and may not be larger than LINSIZ.
406 * If getaline is called with a non-zero value, the current line
407 * is bumped, otherwise it is not(for R.E. searching).
408 */
409
410 static char *
getaline(int bumpcur)411 getaline(int bumpcur)
412 {
413 char *ret;
414 if (bumpcur)
415 curline++;
416 ret = fgets(linbuf, LINSIZ, infile);
417 return (ret);
418 }
419
420 /*
421 * Line_arg handles line number arguments.
422 * line_arg takes as its argument a pointer to a character string
423 * (assumed to be a line number). If that character string can be
424 * converted to a number(long long), to_line is called with that number,
425 * otherwise error.
426 */
427
428 static void
line_arg(char * line)429 line_arg(char *line)
430 {
431 long long to;
432
433 if (asc_to_ll(line, &to) == ERR)
434 fatal("%s: bad line number\n", line);
435 to_line(to);
436 }
437
438 /*
439 * Num_arg handles repeat arguments.
440 * Num_arg copies the numeric argument to "rep" (error if number is
441 * larger than 20 characters or } is left off). Num_arg then converts
442 * the number and checks for validity. Next num_arg checks the mode
443 * of the previous argument, and applys the argument the correct number
444 * of times. If the mode is not set properly its an error.
445 */
446
447 static void
num_arg(char * arg,int md)448 num_arg(char *arg, int md)
449 {
450 offset_t repeat, toline;
451 char rep[21];
452 char *ptr;
453 int len;
454
455 ptr = rep;
456 for (++arg; *arg != '}'; arg += len) {
457 if (*arg == NULL)
458 fatal("%s: missing '}'\n", targ);
459 if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
460 len = 1;
461 if ((ptr + len) >= &rep[20])
462 fatal("%s: Repeat count too large\n", targ);
463 (void) memcpy(ptr, arg, len);
464 ptr += len;
465 }
466 *ptr = NULL;
467 if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
468 fatal("Illegal repeat count: %s\n", targ);
469 if (md == LINMODE) {
470 toline = offset = curline;
471 for (; repeat > 0LL; repeat--) {
472 toline += offset;
473 to_line(toline);
474 }
475 } else if (md == EXPMODE)
476 for (; repeat > 0LL; repeat--)
477 to_line(findline(expbuf, offset));
478 else
479 fatal("No operation for %s\n", targ);
480 }
481
482 /*
483 * Re_arg handles regular expression arguments.
484 * Re_arg takes a csplit regular expression argument. It checks for
485 * delimiter balance, computes any offset, and compiles the regular
486 * expression. Findline is called with the compiled expression and
487 * offset, and returns the corresponding line number, which is used
488 * as input to the to_line function.
489 */
490
491 static void
re_arg(char * string)492 re_arg(char *string)
493 {
494 char *ptr;
495 char ch;
496 int len;
497
498 ch = *string;
499 ptr = string;
500 ptr++;
501 while (*ptr != ch) {
502 if (*ptr == '\\')
503 ++ptr;
504
505 if (*ptr == NULL)
506 fatal("%s: missing delimiter\n", targ);
507
508 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
509 len = 1;
510 ptr += len;
511 }
512
513 /*
514 * The line below was added because compile no longer supports
515 * the fourth argument being passed. The fourth argument used
516 * to be '/' or '%'.
517 */
518
519 *ptr = NULL;
520 if (asc_to_ll(++ptr, &offset) == ERR)
521 fatal("%s: illegal offset\n", string);
522
523 /*
524 * The line below was added because INIT which did this for us
525 * was removed from compile in regexp.h
526 */
527
528 string++;
529 expbuf = compile(string, (char *)0, (char *)0);
530 if (regerrno)
531 PERROR(regerrno);
532 to_line(findline(expbuf, offset));
533 }
534
535 /*
536 * Sig handles breaks. When a break occurs the signal is reset,
537 * and fatal is called to clean up and print the argument which
538 * was being processed at the time the interrupt occured.
539 */
540
541 /* ARGSUSED */
542 static void
sig(int s)543 sig(int s)
544 {
545 (void) signal(SIGINT, sig);
546 fatal("Interrupt - program aborted at arg '%s'\n", targ);
547 }
548
549 /*
550 * To_line creates split files.
551 * To_line gets as its argument the line which the current argument
552 * referenced. To_line calls getfile for a new output stream, which
553 * does nothing if create is False. If to_line's argument is not LAST
554 * it checks that the current line is not greater than its argument.
555 * While the current line is less than the desired line to_line gets
556 * lines and flushes(error if EOF is reached).
557 * If to_line's argument is LAST, it checks for more lines, and gets
558 * and flushes lines till the end of file.
559 * Finally, to_line calls closefile to close the output stream.
560 */
561
562 static void
to_line(offset_t ln)563 to_line(offset_t ln)
564 {
565 outfile = getfile();
566 if (ln != LAST) {
567 if (curline > ln)
568 fatal("%s - out of range\n", targ);
569 while (curline < ln) {
570 if (getaline(TRUE) == NULL)
571 fatal("%s - out of range\n", targ);
572 flush();
573 }
574 } else /* last file */
575 if (getaline(TRUE) != NULL) {
576 flush();
577 for (;;) {
578 if (getaline(TRUE) == NULL)
579 break;
580 flush();
581 }
582 } else
583 fatal("%s - out of range\n", targ);
584 closefile();
585 }
586
587 static void
usage()588 usage()
589 {
590 (void) fprintf(stderr, gettext(
591 "usage: csplit [-ks] [-f prefix] [-n number] "
592 "file arg1 ...argn\n"));
593 exit(1);
594 }
595