1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * csplit - Context or line file splitter 31 * Compile: cc -O -s -o csplit csplit.c 32 */ 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <unistd.h> 37 #include <string.h> 38 #include <ctype.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <regexpr.h> 42 #include <signal.h> 43 #include <locale.h> 44 #include <libintl.h> 45 46 #define LAST 0LL 47 #define ERR -1 48 #define FALSE 0 49 #define TRUE 1 50 #define EXPMODE 2 51 #define LINMODE 3 52 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */ 53 54 /* Globals */ 55 56 char linbuf[LINSIZ]; /* Input line buffer */ 57 char *expbuf; 58 char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */ 59 char file[8192] = "xx"; /* File name buffer */ 60 char *targ; /* Arg ptr for error messages */ 61 char *sptr; 62 FILE *infile, *outfile; /* I/O file streams */ 63 int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */ 64 int errflg; 65 int fiwidth = 2; /* file index width (output file names) */ 66 extern int optind; 67 extern char *optarg; 68 offset_t offset; /* Regular expression offset value */ 69 offset_t curline; /* Current line in input file */ 70 71 /* 72 * These defines are needed for regexp handling(see regexp(7)) 73 */ 74 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ); 75 76 static int asc_to_ll(char *, long long *); 77 static void closefile(void); 78 static void fatal(char *, char *); 79 static offset_t findline(char *, offset_t); 80 static void flush(void); 81 static FILE *getfile(void); 82 static char *getaline(int); 83 static void line_arg(char *); 84 static void num_arg(char *, int); 85 static void re_arg(char *); 86 static void sig(int); 87 static void to_line(offset_t); 88 static void usage(void); 89 90 int 91 main(int argc, char **argv) 92 { 93 int ch, mode; 94 char *ptr; 95 96 (void) setlocale(LC_ALL, ""); 97 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 98 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 99 #endif 100 (void) textdomain(TEXT_DOMAIN); 101 102 while ((ch = getopt(argc, argv, "skf:n:")) != EOF) { 103 switch (ch) { 104 case 'f': 105 (void) strcpy(file, optarg); 106 if ((ptr = strrchr(optarg, '/')) == NULL) 107 ptr = optarg; 108 else 109 ptr++; 110 111 break; 112 case 'n': /* POSIX.2 */ 113 for (ptr = optarg; *ptr != NULL; ptr++) 114 if (!isdigit((int)*ptr)) 115 fatal("-n num\n", NULL); 116 fiwidth = atoi(optarg); 117 break; 118 case 'k': 119 keep++; 120 break; 121 case 's': 122 silent++; 123 break; 124 case '?': 125 errflg++; 126 } 127 } 128 129 argv = &argv[optind]; 130 argc -= optind; 131 if (argc <= 1 || errflg) 132 usage(); 133 134 if (strcmp(*argv, "-") == 0) { 135 infile = tmpfile(); 136 137 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) { 138 if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0) 139 if (errno == ENOSPC) { 140 (void) fprintf(stderr, "csplit: "); 141 (void) fprintf(stderr, gettext( 142 "No space left on device\n")); 143 exit(1); 144 } else { 145 (void) fprintf(stderr, "csplit: "); 146 (void) fprintf(stderr, gettext( 147 "Bad write to temporary " 148 "file\n")); 149 exit(1); 150 } 151 152 /* clear the buffer to get correct size when writing buffer */ 153 154 (void) memset(tmpbuf, '\0', sizeof (tmpbuf)); 155 } 156 rewind(infile); 157 } else if ((infile = fopen(*argv, "r")) == NULL) 158 fatal("Cannot open %s\n", *argv); 159 ++argv; 160 curline = (offset_t)1; 161 (void) signal(SIGINT, sig); 162 163 /* 164 * The following for loop handles the different argument types. 165 * A switch is performed on the first character of the argument 166 * and each case calls the appropriate argument handling routine. 167 */ 168 169 for (; *argv; ++argv) { 170 targ = *argv; 171 switch (**argv) { 172 case '/': 173 mode = EXPMODE; 174 create = TRUE; 175 re_arg(*argv); 176 break; 177 case '%': 178 mode = EXPMODE; 179 create = FALSE; 180 re_arg(*argv); 181 break; 182 case '{': 183 num_arg(*argv, mode); 184 mode = FALSE; 185 break; 186 default: 187 mode = LINMODE; 188 create = TRUE; 189 line_arg(*argv); 190 break; 191 } 192 } 193 create = TRUE; 194 to_line(LAST); 195 return (0); 196 } 197 198 /* 199 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc) 200 * It returns ERR if an illegal character. The reason that asc_to_ll 201 * does not return an answer(long long) is that any value for the long 202 * long is legal, and this version of asc_to_ll detects error strings. 203 */ 204 205 static int 206 asc_to_ll(char *str, long long *plc) 207 { 208 int f; 209 *plc = 0; 210 f = 0; 211 for (; ; str++) { 212 switch (*str) { 213 case ' ': 214 case '\t': 215 continue; 216 case '-': 217 f++; 218 /* FALLTHROUGH */ 219 case '+': 220 str++; 221 } 222 break; 223 } 224 for (; *str != NULL; str++) 225 if (*str >= '0' && *str <= '9') 226 *plc = *plc * 10 + *str - '0'; 227 else 228 return (ERR); 229 if (f) 230 *plc = -(*plc); 231 return (TRUE); /* not error */ 232 } 233 234 /* 235 * Closefile prints the byte count of the file created,(via fseeko 236 * and ftello), if the create flag is on and the silent flag is not on. 237 * If the create flag is on closefile then closes the file(fclose). 238 */ 239 240 static void 241 closefile() 242 { 243 if (!silent && create) { 244 (void) fseeko(outfile, (offset_t)0, SEEK_END); 245 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile)); 246 } 247 if (create) 248 (void) fclose(outfile); 249 } 250 251 /* 252 * Fatal handles error messages and cleanup. 253 * Because "arg" can be the global file, and the cleanup processing 254 * uses the global file, the error message is printed first. If the 255 * "keep" flag is not set, fatal unlinks all created files. If the 256 * "keep" flag is set, fatal closes the current file(if there is one). 257 * Fatal exits with a value of 1. 258 */ 259 260 static void 261 fatal(char *string, char *arg) 262 { 263 char *fls; 264 int num; 265 266 (void) fprintf(stderr, "csplit: "); 267 268 /* gettext dynamically replaces string */ 269 270 (void) fprintf(stderr, gettext(string), arg); 271 if (!keep) { 272 if (outfile) { 273 (void) fclose(outfile); 274 for (fls = file; *fls != '\0'; fls++) 275 continue; 276 fls -= fiwidth; 277 for (num = atoi(fls); num >= 0; num--) { 278 (void) sprintf(fls, "%.*d", fiwidth, num); 279 (void) unlink(file); 280 } 281 } 282 } else 283 if (outfile) 284 closefile(); 285 exit(1); 286 } 287 288 /* 289 * Findline returns the line number referenced by the current argument. 290 * Its arguments are a pointer to the compiled regular expression(expr), 291 * and an offset(oset). The variable lncnt is used to count the number 292 * of lines searched. First the current stream location is saved via 293 * ftello(), and getaline is called so that R.E. searching starts at the 294 * line after the previously referenced line. The while loop checks 295 * that there are more lines(error if none), bumps the line count, and 296 * checks for the R.E. on each line. If the R.E. matches on one of the 297 * lines the old stream location is restored, and the line number 298 * referenced by the R.E. and the offset is returned. 299 */ 300 301 static offset_t 302 findline(char *expr, offset_t oset) 303 { 304 static int benhere = 0; 305 offset_t lncnt = 0, saveloc; 306 307 saveloc = ftello(infile); 308 if (curline != (offset_t)1 || benhere) /* If first line, first time, */ 309 (void) getaline(FALSE); /* then don't skip */ 310 else 311 lncnt--; 312 benhere = 1; 313 while (getaline(FALSE) != NULL) { 314 lncnt++; 315 if ((sptr = strrchr(linbuf, '\n')) != NULL) 316 *sptr = '\0'; 317 if (step(linbuf, expr)) { 318 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 319 return (curline+lncnt+oset); 320 } 321 } 322 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 323 return (curline+lncnt+oset+2); 324 } 325 326 /* 327 * Flush uses fputs to put lines on the output file stream(outfile) 328 * Since fputs does its own buffering, flush doesn't need to. 329 * Flush does nothing if the create flag is not set. 330 */ 331 332 static void 333 flush() 334 { 335 if (create) 336 (void) fputs(linbuf, outfile); 337 } 338 339 /* 340 * Getfile does nothing if the create flag is not set. If the create 341 * flag is set, getfile positions the file pointer(fptr) at the end of 342 * the file name prefix on the first call(fptr=0). The file counter is 343 * stored in the file name and incremented. If the subsequent fopen 344 * fails, the file name is copied to tfile for the error message, the 345 * previous file name is restored for cleanup, and fatal is called. If 346 * the fopen succeeds, the stream(opfil) is returned. 347 */ 348 349 FILE * 350 getfile() 351 { 352 static char *fptr; 353 static int ctr; 354 FILE *opfil; 355 char tfile[15]; 356 char *delim; 357 char savedelim; 358 359 if (create) { 360 if (fptr == 0) 361 for (fptr = file; *fptr != NULL; fptr++) 362 continue; 363 (void) sprintf(fptr, "%.*d", fiwidth, ctr++); 364 365 /* check for suffix length overflow */ 366 if (strlen(fptr) > fiwidth) { 367 fatal("Suffix longer than %ld chars; increase -n\n", 368 (char *)fiwidth); 369 } 370 371 /* check for filename length overflow */ 372 373 delim = strrchr(file, '/'); 374 if (delim == (char *)NULL) { 375 if (strlen(file) > pathconf(".", _PC_NAME_MAX)) { 376 fatal("Name too long: %s\n", file); 377 } 378 } else { 379 /* truncate file at pathname delim to do pathconf */ 380 savedelim = *delim; 381 *delim = '\0'; 382 /* 383 * file: pppppppp\0fffff\0 384 * ..... ^ file 385 * ............. ^ delim 386 */ 387 if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) { 388 fatal("Name too long: %s\n", delim + 1); 389 } 390 *delim = savedelim; 391 } 392 393 if ((opfil = fopen(file, "w")) == NULL) { 394 (void) strcpy(tfile, file); 395 (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2)); 396 fatal("Cannot create %s\n", tfile); 397 } 398 return (opfil); 399 } 400 return (NULL); 401 } 402 403 /* 404 * Getline gets a line via fgets from the input stream "infile". 405 * The line is put into linbuf and may not be larger than LINSIZ. 406 * If getaline is called with a non-zero value, the current line 407 * is bumped, otherwise it is not(for R.E. searching). 408 */ 409 410 static char * 411 getaline(int bumpcur) 412 { 413 char *ret; 414 if (bumpcur) 415 curline++; 416 ret = fgets(linbuf, LINSIZ, infile); 417 return (ret); 418 } 419 420 /* 421 * Line_arg handles line number arguments. 422 * line_arg takes as its argument a pointer to a character string 423 * (assumed to be a line number). If that character string can be 424 * converted to a number(long long), to_line is called with that number, 425 * otherwise error. 426 */ 427 428 static void 429 line_arg(char *line) 430 { 431 long long to; 432 433 if (asc_to_ll(line, &to) == ERR) 434 fatal("%s: bad line number\n", line); 435 to_line(to); 436 } 437 438 /* 439 * Num_arg handles repeat arguments. 440 * Num_arg copies the numeric argument to "rep" (error if number is 441 * larger than 20 characters or } is left off). Num_arg then converts 442 * the number and checks for validity. Next num_arg checks the mode 443 * of the previous argument, and applys the argument the correct number 444 * of times. If the mode is not set properly its an error. 445 */ 446 447 static void 448 num_arg(char *arg, int md) 449 { 450 offset_t repeat, toline; 451 char rep[21]; 452 char *ptr; 453 int len; 454 455 ptr = rep; 456 for (++arg; *arg != '}'; arg += len) { 457 if (*arg == NULL) 458 fatal("%s: missing '}'\n", targ); 459 if ((len = mblen(arg, MB_LEN_MAX)) <= 0) 460 len = 1; 461 if ((ptr + len) >= &rep[20]) 462 fatal("%s: Repeat count too large\n", targ); 463 (void) memcpy(ptr, arg, len); 464 ptr += len; 465 } 466 *ptr = NULL; 467 if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L) 468 fatal("Illegal repeat count: %s\n", targ); 469 if (md == LINMODE) { 470 toline = offset = curline; 471 for (; repeat > 0LL; repeat--) { 472 toline += offset; 473 to_line(toline); 474 } 475 } else if (md == EXPMODE) 476 for (; repeat > 0LL; repeat--) 477 to_line(findline(expbuf, offset)); 478 else 479 fatal("No operation for %s\n", targ); 480 } 481 482 /* 483 * Re_arg handles regular expression arguments. 484 * Re_arg takes a csplit regular expression argument. It checks for 485 * delimiter balance, computes any offset, and compiles the regular 486 * expression. Findline is called with the compiled expression and 487 * offset, and returns the corresponding line number, which is used 488 * as input to the to_line function. 489 */ 490 491 static void 492 re_arg(char *string) 493 { 494 char *ptr; 495 char ch; 496 int len; 497 498 ch = *string; 499 ptr = string; 500 ptr++; 501 while (*ptr != ch) { 502 if (*ptr == '\\') 503 ++ptr; 504 505 if (*ptr == NULL) 506 fatal("%s: missing delimiter\n", targ); 507 508 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0) 509 len = 1; 510 ptr += len; 511 } 512 513 /* 514 * The line below was added because compile no longer supports 515 * the fourth argument being passed. The fourth argument used 516 * to be '/' or '%'. 517 */ 518 519 *ptr = NULL; 520 if (asc_to_ll(++ptr, &offset) == ERR) 521 fatal("%s: illegal offset\n", string); 522 523 /* 524 * The line below was added because INIT which did this for us 525 * was removed from compile in regexp.h 526 */ 527 528 string++; 529 expbuf = compile(string, (char *)0, (char *)0); 530 if (regerrno) 531 PERROR(regerrno); 532 to_line(findline(expbuf, offset)); 533 } 534 535 /* 536 * Sig handles breaks. When a break occurs the signal is reset, 537 * and fatal is called to clean up and print the argument which 538 * was being processed at the time the interrupt occured. 539 */ 540 541 /* ARGSUSED */ 542 static void 543 sig(int s) 544 { 545 (void) signal(SIGINT, sig); 546 fatal("Interrupt - program aborted at arg '%s'\n", targ); 547 } 548 549 /* 550 * To_line creates split files. 551 * To_line gets as its argument the line which the current argument 552 * referenced. To_line calls getfile for a new output stream, which 553 * does nothing if create is False. If to_line's argument is not LAST 554 * it checks that the current line is not greater than its argument. 555 * While the current line is less than the desired line to_line gets 556 * lines and flushes(error if EOF is reached). 557 * If to_line's argument is LAST, it checks for more lines, and gets 558 * and flushes lines till the end of file. 559 * Finally, to_line calls closefile to close the output stream. 560 */ 561 562 static void 563 to_line(offset_t ln) 564 { 565 outfile = getfile(); 566 if (ln != LAST) { 567 if (curline > ln) 568 fatal("%s - out of range\n", targ); 569 while (curline < ln) { 570 if (getaline(TRUE) == NULL) 571 fatal("%s - out of range\n", targ); 572 flush(); 573 } 574 } else /* last file */ 575 if (getaline(TRUE) != NULL) { 576 flush(); 577 for (;;) { 578 if (getaline(TRUE) == NULL) 579 break; 580 flush(); 581 } 582 } else 583 fatal("%s - out of range\n", targ); 584 closefile(); 585 } 586 587 static void 588 usage() 589 { 590 (void) fprintf(stderr, gettext( 591 "usage: csplit [-ks] [-f prefix] [-n number] " 592 "file arg1 ...argn\n")); 593 exit(1); 594 } 595