1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * csplit - Context or line file splitter 35 * Compile: cc -O -s -o csplit csplit.c 36 */ 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <string.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <limits.h> 45 #include <regexpr.h> 46 #include <signal.h> 47 #include <locale.h> 48 #include <libintl.h> 49 50 #define LAST 0LL 51 #define ERR -1 52 #define FALSE 0 53 #define TRUE 1 54 #define EXPMODE 2 55 #define LINMODE 3 56 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */ 57 58 /* Globals */ 59 60 char linbuf[LINSIZ]; /* Input line buffer */ 61 char *expbuf; 62 char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */ 63 char file[8192] = "xx"; /* File name buffer */ 64 char *targ; /* Arg ptr for error messages */ 65 char *sptr; 66 FILE *infile, *outfile; /* I/O file streams */ 67 int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */ 68 int errflg; 69 int fiwidth = 2; /* file index width (output file names) */ 70 extern int optind; 71 extern char *optarg; 72 offset_t offset; /* Regular expression offset value */ 73 offset_t curline; /* Current line in input file */ 74 75 /* 76 * These defines are needed for regexp handling(see regexp(7)) 77 */ 78 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ); 79 80 static int asc_to_ll(char *, long long *); 81 static void closefile(void); 82 static void fatal(char *, char *); 83 static offset_t findline(char *, offset_t); 84 static void flush(void); 85 static FILE *getfile(void); 86 static char *getline(int); 87 static void line_arg(char *); 88 static void num_arg(char *, int); 89 static void re_arg(char *); 90 static void sig(int); 91 static void to_line(offset_t); 92 static void usage(void); 93 94 int 95 main(int argc, char **argv) 96 { 97 int ch, mode; 98 char *ptr; 99 100 (void) setlocale(LC_ALL, ""); 101 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 102 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 103 #endif 104 (void) textdomain(TEXT_DOMAIN); 105 106 while ((ch = getopt(argc, argv, "skf:n:")) != EOF) { 107 switch (ch) { 108 case 'f': 109 (void) strcpy(file, optarg); 110 if ((ptr = strrchr(optarg, '/')) == NULL) 111 ptr = optarg; 112 else 113 ptr++; 114 115 break; 116 case 'n': /* POSIX.2 */ 117 for (ptr = optarg; *ptr != NULL; ptr++) 118 if (!isdigit((int)*ptr)) 119 fatal("-n num\n", NULL); 120 fiwidth = atoi(optarg); 121 break; 122 case 'k': 123 keep++; 124 break; 125 case 's': 126 silent++; 127 break; 128 case '?': 129 errflg++; 130 } 131 } 132 133 argv = &argv[optind]; 134 argc -= optind; 135 if (argc <= 1 || errflg) 136 usage(); 137 138 if (strcmp(*argv, "-") == 0) { 139 infile = tmpfile(); 140 141 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) { 142 if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0) 143 if (errno == ENOSPC) { 144 (void) fprintf(stderr, "csplit: "); 145 (void) fprintf(stderr, gettext( 146 "No space left on device\n")); 147 exit(1); 148 } else { 149 (void) fprintf(stderr, "csplit: "); 150 (void) fprintf(stderr, gettext( 151 "Bad write to temporary " 152 "file\n")); 153 exit(1); 154 } 155 156 /* clear the buffer to get correct size when writing buffer */ 157 158 (void) memset(tmpbuf, '\0', sizeof (tmpbuf)); 159 } 160 rewind(infile); 161 } else if ((infile = fopen(*argv, "r")) == NULL) 162 fatal("Cannot open %s\n", *argv); 163 ++argv; 164 curline = (offset_t)1; 165 (void) signal(SIGINT, sig); 166 167 /* 168 * The following for loop handles the different argument types. 169 * A switch is performed on the first character of the argument 170 * and each case calls the appropriate argument handling routine. 171 */ 172 173 for (; *argv; ++argv) { 174 targ = *argv; 175 switch (**argv) { 176 case '/': 177 mode = EXPMODE; 178 create = TRUE; 179 re_arg(*argv); 180 break; 181 case '%': 182 mode = EXPMODE; 183 create = FALSE; 184 re_arg(*argv); 185 break; 186 case '{': 187 num_arg(*argv, mode); 188 mode = FALSE; 189 break; 190 default: 191 mode = LINMODE; 192 create = TRUE; 193 line_arg(*argv); 194 break; 195 } 196 } 197 create = TRUE; 198 to_line(LAST); 199 return (0); 200 } 201 202 /* 203 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc) 204 * It returns ERR if an illegal character. The reason that asc_to_ll 205 * does not return an answer(long long) is that any value for the long 206 * long is legal, and this version of asc_to_ll detects error strings. 207 */ 208 209 static int 210 asc_to_ll(char *str, long long *plc) 211 { 212 int f; 213 *plc = 0; 214 f = 0; 215 for (; ; str++) { 216 switch (*str) { 217 case ' ': 218 case '\t': 219 continue; 220 case '-': 221 f++; 222 /* FALLTHROUGH */ 223 case '+': 224 str++; 225 } 226 break; 227 } 228 for (; *str != NULL; str++) 229 if (*str >= '0' && *str <= '9') 230 *plc = *plc * 10 + *str - '0'; 231 else 232 return (ERR); 233 if (f) 234 *plc = -(*plc); 235 return (TRUE); /* not error */ 236 } 237 238 /* 239 * Closefile prints the byte count of the file created,(via fseeko 240 * and ftello), if the create flag is on and the silent flag is not on. 241 * If the create flag is on closefile then closes the file(fclose). 242 */ 243 244 static void 245 closefile() 246 { 247 if (!silent && create) { 248 (void) fseeko(outfile, (offset_t)0, SEEK_END); 249 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile)); 250 } 251 if (create) 252 (void) fclose(outfile); 253 } 254 255 /* 256 * Fatal handles error messages and cleanup. 257 * Because "arg" can be the global file, and the cleanup processing 258 * uses the global file, the error message is printed first. If the 259 * "keep" flag is not set, fatal unlinks all created files. If the 260 * "keep" flag is set, fatal closes the current file(if there is one). 261 * Fatal exits with a value of 1. 262 */ 263 264 static void 265 fatal(char *string, char *arg) 266 { 267 char *fls; 268 int num; 269 270 (void) fprintf(stderr, "csplit: "); 271 272 /* gettext dynamically replaces string */ 273 274 (void) fprintf(stderr, gettext(string), arg); 275 if (!keep) { 276 if (outfile) { 277 (void) fclose(outfile); 278 for (fls = file; *fls != '\0'; fls++) 279 continue; 280 fls -= fiwidth; 281 for (num = atoi(fls); num >= 0; num--) { 282 (void) sprintf(fls, "%.*d", fiwidth, num); 283 (void) unlink(file); 284 } 285 } 286 } else 287 if (outfile) 288 closefile(); 289 exit(1); 290 } 291 292 /* 293 * Findline returns the line number referenced by the current argument. 294 * Its arguments are a pointer to the compiled regular expression(expr), 295 * and an offset(oset). The variable lncnt is used to count the number 296 * of lines searched. First the current stream location is saved via 297 * ftello(), and getline is called so that R.E. searching starts at the 298 * line after the previously referenced line. The while loop checks 299 * that there are more lines(error if none), bumps the line count, and 300 * checks for the R.E. on each line. If the R.E. matches on one of the 301 * lines the old stream location is restored, and the line number 302 * referenced by the R.E. and the offset is returned. 303 */ 304 305 static offset_t 306 findline(char *expr, offset_t oset) 307 { 308 static int benhere = 0; 309 offset_t lncnt = 0, saveloc; 310 311 saveloc = ftello(infile); 312 if (curline != (offset_t)1 || benhere) /* If first line, first time, */ 313 (void) getline(FALSE); /* then don't skip */ 314 else 315 lncnt--; 316 benhere = 1; 317 while (getline(FALSE) != NULL) { 318 lncnt++; 319 if ((sptr = strrchr(linbuf, '\n')) != NULL) 320 *sptr = '\0'; 321 if (step(linbuf, expr)) { 322 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 323 return (curline+lncnt+oset); 324 } 325 } 326 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 327 return (curline+lncnt+oset+2); 328 } 329 330 /* 331 * Flush uses fputs to put lines on the output file stream(outfile) 332 * Since fputs does its own buffering, flush doesn't need to. 333 * Flush does nothing if the create flag is not set. 334 */ 335 336 static void 337 flush() 338 { 339 if (create) 340 (void) fputs(linbuf, outfile); 341 } 342 343 /* 344 * Getfile does nothing if the create flag is not set. If the create 345 * flag is set, getfile positions the file pointer(fptr) at the end of 346 * the file name prefix on the first call(fptr=0). The file counter is 347 * stored in the file name and incremented. If the subsequent fopen 348 * fails, the file name is copied to tfile for the error message, the 349 * previous file name is restored for cleanup, and fatal is called. If 350 * the fopen succeeds, the stream(opfil) is returned. 351 */ 352 353 FILE * 354 getfile() 355 { 356 static char *fptr; 357 static int ctr; 358 FILE *opfil; 359 char tfile[15]; 360 char *delim; 361 char savedelim; 362 363 if (create) { 364 if (fptr == 0) 365 for (fptr = file; *fptr != NULL; fptr++); 366 (void) sprintf(fptr, "%.*d", fiwidth, ctr++); 367 368 /* check for suffix length overflow */ 369 if (strlen(fptr) > fiwidth) { 370 fatal("Suffix longer than %ld chars; increase -n\n", 371 (char *)fiwidth); 372 } 373 374 /* check for filename length overflow */ 375 376 delim = strrchr(file, '/'); 377 if (delim == (char *)NULL) { 378 if (strlen(file) > pathconf(".", _PC_NAME_MAX)) { 379 fatal("Name too long: %s\n", file); 380 } 381 } else { 382 /* truncate file at pathname delim to do pathconf */ 383 savedelim = *delim; 384 *delim = '\0'; 385 /* 386 * file: pppppppp\0fffff\0 387 * ..... ^ file 388 * ............. ^ delim 389 */ 390 if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) { 391 fatal("Name too long: %s\n", delim + 1); 392 } 393 *delim = savedelim; 394 } 395 396 if ((opfil = fopen(file, "w")) == NULL) { 397 (void) strcpy(tfile, file); 398 (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2)); 399 fatal("Cannot create %s\n", tfile); 400 } 401 return (opfil); 402 } 403 return (NULL); 404 } 405 406 /* 407 * Getline gets a line via fgets from the input stream "infile". 408 * The line is put into linbuf and may not be larger than LINSIZ. 409 * If getline is called with a non-zero value, the current line 410 * is bumped, otherwise it is not(for R.E. searching). 411 */ 412 413 static char * 414 getline(int bumpcur) 415 { 416 char *ret; 417 if (bumpcur) 418 curline++; 419 ret = fgets(linbuf, LINSIZ, infile); 420 return (ret); 421 } 422 423 /* 424 * Line_arg handles line number arguments. 425 * line_arg takes as its argument a pointer to a character string 426 * (assumed to be a line number). If that character string can be 427 * converted to a number(long long), to_line is called with that number, 428 * otherwise error. 429 */ 430 431 static void 432 line_arg(char *line) 433 { 434 long long to; 435 436 if (asc_to_ll(line, &to) == ERR) 437 fatal("%s: bad line number\n", line); 438 to_line(to); 439 } 440 441 /* 442 * Num_arg handles repeat arguments. 443 * Num_arg copies the numeric argument to "rep" (error if number is 444 * larger than 20 characters or } is left off). Num_arg then converts 445 * the number and checks for validity. Next num_arg checks the mode 446 * of the previous argument, and applys the argument the correct number 447 * of times. If the mode is not set properly its an error. 448 */ 449 450 static void 451 num_arg(char *arg, int md) 452 { 453 offset_t repeat, toline; 454 char rep[21]; 455 char *ptr; 456 int len; 457 458 ptr = rep; 459 for (++arg; *arg != '}'; arg += len) { 460 if (*arg == NULL) 461 fatal("%s: missing '}'\n", targ); 462 if ((len = mblen(arg, MB_LEN_MAX)) <= 0) 463 len = 1; 464 if ((ptr + len) >= &rep[20]) 465 fatal("%s: Repeat count too large\n", targ); 466 (void) memcpy(ptr, arg, len); 467 ptr += len; 468 } 469 *ptr = NULL; 470 if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L) 471 fatal("Illegal repeat count: %s\n", targ); 472 if (md == LINMODE) { 473 toline = offset = curline; 474 for (; repeat > 0LL; repeat--) { 475 toline += offset; 476 to_line(toline); 477 } 478 } else if (md == EXPMODE) 479 for (; repeat > 0LL; repeat--) 480 to_line(findline(expbuf, offset)); 481 else 482 fatal("No operation for %s\n", targ); 483 } 484 485 /* 486 * Re_arg handles regular expression arguments. 487 * Re_arg takes a csplit regular expression argument. It checks for 488 * delimiter balance, computes any offset, and compiles the regular 489 * expression. Findline is called with the compiled expression and 490 * offset, and returns the corresponding line number, which is used 491 * as input to the to_line function. 492 */ 493 494 static void 495 re_arg(char *string) 496 { 497 char *ptr; 498 char ch; 499 int len; 500 501 ch = *string; 502 ptr = string; 503 ptr++; 504 while (*ptr != ch) { 505 if (*ptr == '\\') 506 ++ptr; 507 508 if (*ptr == NULL) 509 fatal("%s: missing delimiter\n", targ); 510 511 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0) 512 len = 1; 513 ptr += len; 514 } 515 516 /* 517 * The line below was added because compile no longer supports 518 * the fourth argument being passed. The fourth argument used 519 * to be '/' or '%'. 520 */ 521 522 *ptr = NULL; 523 if (asc_to_ll(++ptr, &offset) == ERR) 524 fatal("%s: illegal offset\n", string); 525 526 /* 527 * The line below was added because INIT which did this for us 528 * was removed from compile in regexp.h 529 */ 530 531 string++; 532 expbuf = compile(string, (char *)0, (char *)0); 533 if (regerrno) 534 PERROR(regerrno); 535 to_line(findline(expbuf, offset)); 536 } 537 538 /* 539 * Sig handles breaks. When a break occurs the signal is reset, 540 * and fatal is called to clean up and print the argument which 541 * was being processed at the time the interrupt occured. 542 */ 543 544 /* ARGSUSED */ 545 static void 546 sig(int s) 547 { 548 (void) signal(SIGINT, sig); 549 fatal("Interrupt - program aborted at arg '%s'\n", targ); 550 } 551 552 /* 553 * To_line creates split files. 554 * To_line gets as its argument the line which the current argument 555 * referenced. To_line calls getfile for a new output stream, which 556 * does nothing if create is False. If to_line's argument is not LAST 557 * it checks that the current line is not greater than its argument. 558 * While the current line is less than the desired line to_line gets 559 * lines and flushes(error if EOF is reached). 560 * If to_line's argument is LAST, it checks for more lines, and gets 561 * and flushes lines till the end of file. 562 * Finally, to_line calls closefile to close the output stream. 563 */ 564 565 static void 566 to_line(offset_t ln) 567 { 568 outfile = getfile(); 569 if (ln != LAST) { 570 if (curline > ln) 571 fatal("%s - out of range\n", targ); 572 while (curline < ln) { 573 if (getline(TRUE) == NULL) 574 fatal("%s - out of range\n", targ); 575 flush(); 576 } 577 } else /* last file */ 578 if (getline(TRUE) != NULL) { 579 flush(); 580 for (;;) { 581 if (getline(TRUE) == NULL) 582 break; 583 flush(); 584 } 585 } else 586 fatal("%s - out of range\n", targ); 587 closefile(); 588 } 589 590 static void 591 usage() 592 { 593 (void) fprintf(stderr, gettext( 594 "usage: csplit [-ks] [-f prefix] [-n number] " 595 "file arg1 ...argn\n")); 596 exit(1); 597 } 598