1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */ 2 3 /* 4 * Written by Raymond Lai <ray@cyth.net>. 5 * Public domain. 6 */ 7 8 #include <sys/cdefs.h> 9 __FBSDID("$FreeBSD$"); 10 11 #include <sys/param.h> 12 #include <sys/queue.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/wait.h> 16 17 #include <ctype.h> 18 #include <err.h> 19 #include <errno.h> 20 #include <fcntl.h> 21 #include <getopt.h> 22 #include <limits.h> 23 #include <paths.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <unistd.h> 29 30 #include "common.h" 31 #include "extern.h" 32 33 #define DIFF_PATH "/usr/bin/diff" 34 35 #define WIDTH 126 36 /* 37 * Each column must be at least one character wide, plus three 38 * characters between the columns (space, [<|>], space). 39 */ 40 #define WIDTH_MIN 5 41 42 /* 3 kilobytes of chars */ 43 #define MAX_CHECK 768 44 45 /* A single diff line. */ 46 struct diffline { 47 STAILQ_ENTRY(diffline) diffentries; 48 char *left; 49 char div; 50 char *right; 51 }; 52 53 static void astrcat(char **, const char *); 54 static void enqueue(char *, char, char *); 55 static char *mktmpcpy(const char *); 56 static int istextfile(FILE *); 57 static void binexec(char *, char *, char *) __dead2; 58 static void freediff(struct diffline *); 59 static void int_usage(void); 60 static int parsecmd(FILE *, FILE *, FILE *); 61 static void printa(FILE *, size_t); 62 static void printc(FILE *, size_t, FILE *, size_t); 63 static void printcol(const char *, size_t *, const size_t); 64 static void printd(FILE *, size_t); 65 static void println(const char *, const char, const char *); 66 static void processq(void); 67 static void prompt(const char *, const char *); 68 static void usage(void) __dead2; 69 static char *xfgets(FILE *); 70 71 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead); 72 static size_t line_width; /* width of a line (two columns and divider) */ 73 static size_t width; /* width of each column */ 74 static size_t file1ln, file2ln; /* line number of file1 and file2 */ 75 static int Iflag = 0; /* ignore sets matching regexp */ 76 static int lflag; /* print only left column for identical lines */ 77 static int sflag; /* skip identical lines */ 78 FILE *outfp; /* file to save changes to */ 79 const char *tmpdir; /* TMPDIR or /tmp */ 80 81 enum { 82 HELP_OPT = CHAR_MAX + 1, 83 NORMAL_OPT, 84 FCASE_SENSITIVE_OPT, 85 FCASE_IGNORE_OPT, 86 FROMFILE_OPT, 87 TOFILE_OPT, 88 UNIDIR_OPT, 89 STRIPCR_OPT, 90 HORIZ_OPT, 91 LEFTC_OPT, 92 SUPCL_OPT, 93 LF_OPT, 94 /* the following groupings must be in sequence */ 95 OLDGF_OPT, 96 NEWGF_OPT, 97 UNCGF_OPT, 98 CHGF_OPT, 99 OLDLF_OPT, 100 NEWLF_OPT, 101 UNCLF_OPT, 102 /* end order-sensitive enums */ 103 TSIZE_OPT, 104 HLINES_OPT, 105 LFILES_OPT, 106 DIFFPROG_OPT, 107 108 NOOP_OPT, 109 }; 110 111 static struct option longopts[] = { 112 /* options only processed in sdiff */ 113 { "left-column", no_argument, NULL, LEFTC_OPT }, 114 { "suppress-common-lines", no_argument, NULL, 's' }, 115 { "width", required_argument, NULL, 'w' }, 116 117 { "output", required_argument, NULL, 'o' }, 118 { "diff-program", required_argument, NULL, DIFFPROG_OPT }, 119 120 /* Options processed by diff. */ 121 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT }, 122 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT }, 123 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT }, 124 { "tabsize", required_argument, NULL, TSIZE_OPT }, 125 { "help", no_argument, NULL, HELP_OPT }, 126 { "text", no_argument, NULL, 'a' }, 127 { "ignore-blank-lines", no_argument, NULL, 'B' }, 128 { "ignore-space-change", no_argument, NULL, 'b' }, 129 { "minimal", no_argument, NULL, 'd' }, 130 { "ignore-tab-expansion", no_argument, NULL, 'E' }, 131 { "ignore-matching-lines", required_argument, NULL, 'I' }, 132 { "ignore-case", no_argument, NULL, 'i' }, 133 { "expand-tabs", no_argument, NULL, 't' }, 134 { "speed-large-files", no_argument, NULL, 'H' }, 135 { "ignore-all-space", no_argument, NULL, 'W' }, 136 137 { NULL, 0, NULL, '\0'} 138 }; 139 140 static const char *help_msg[] = { 141 "\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n", 142 "\t-l, --left-column, Only print the left column for identical lines.", 143 "\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.", 144 "\t-s, --suppress-common-lines, Skip identical lines.", 145 "\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.", 146 "\tOptions passed to diff(1) are:", 147 "\t\t-a, --text, Treat file1 and file2 as text files.", 148 "\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.", 149 "\t\t-d, --minimal, Minimize diff size.", 150 "\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.", 151 "\t\t-i, --ignore-case, Do a case-insensitive comparison.", 152 "\t\t-t, --expand-tabs Expand tabs to spaces.", 153 "\t\t-W, --ignore-all-spaces, Ignore all spaces.", 154 "\t\t--speed-large-files, Assume large file with scattered changes.", 155 "\t\t--strip-trailing-cr, Strip trailing carriage return.", 156 "\t\t--ignore-file-name-case, Ignore case of file names.", 157 "\t\t--no-ignore-file-name-case, Do not ignore file name case", 158 "\t\t--tabsize NUM, Change size of tabs (default 8.)", 159 160 NULL, 161 }; 162 163 /* 164 * Create temporary file if source_file is not a regular file. 165 * Returns temporary file name if one was malloced, NULL if unnecessary. 166 */ 167 static char * 168 mktmpcpy(const char *source_file) 169 { 170 struct stat sb; 171 ssize_t rcount; 172 int ifd, ofd; 173 u_char buf[BUFSIZ]; 174 char *target_file; 175 176 /* Open input and output. */ 177 ifd = open(source_file, O_RDONLY, 0); 178 /* File was opened successfully. */ 179 if (ifd != -1) { 180 if (fstat(ifd, &sb) == -1) 181 err(2, "error getting file status from %s", source_file); 182 183 /* Regular file. */ 184 if (S_ISREG(sb.st_mode)) { 185 close(ifd); 186 return (NULL); 187 } 188 } else { 189 /* If ``-'' does not exist the user meant stdin. */ 190 if (errno == ENOENT && strcmp(source_file, "-") == 0) 191 ifd = STDIN_FILENO; 192 else 193 err(2, "error opening %s", source_file); 194 } 195 196 /* Not a regular file, so copy input into temporary file. */ 197 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1) 198 err(2, "asprintf"); 199 if ((ofd = mkstemp(target_file)) == -1) { 200 warn("error opening %s", target_file); 201 goto FAIL; 202 } 203 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 && 204 rcount != 0) { 205 ssize_t wcount; 206 207 wcount = write(ofd, buf, (size_t)rcount); 208 if (-1 == wcount || rcount != wcount) { 209 warn("error writing to %s", target_file); 210 goto FAIL; 211 } 212 } 213 if (rcount == -1) { 214 warn("error reading from %s", source_file); 215 goto FAIL; 216 } 217 218 close(ifd); 219 close(ofd); 220 221 return (target_file); 222 223 FAIL: 224 unlink(target_file); 225 exit(2); 226 } 227 228 int 229 main(int argc, char **argv) 230 { 231 FILE *diffpipe=NULL, *file1, *file2; 232 size_t diffargc = 0, wflag = WIDTH; 233 int ch, fd[2] = {-1}, status; 234 pid_t pid=0; 235 const char *outfile = NULL; 236 struct option *popt; 237 char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2, 238 *tmp1, *tmp2, *s1, *s2; 239 int i; 240 241 /* 242 * Process diff flags. 243 */ 244 /* 245 * Allocate memory for diff arguments and NULL. 246 * Each flag has at most one argument, so doubling argc gives an 247 * upper limit of how many diff args can be passed. argv[0], 248 * file1, and file2 won't have arguments so doubling them will 249 * waste some memory; however we need an extra space for the 250 * NULL at the end, so it sort of works out. 251 */ 252 if (!(diffargv = calloc(argc, sizeof(char **) * 2))) 253 err(2, "main"); 254 255 /* Add first argument, the program name. */ 256 diffargv[diffargc++] = diffprog; 257 258 /* create a dynamic string for merging single-switch options */ 259 if ( asprintf(&diffargv[diffargc++], "-") < 0 ) 260 err(2, "main"); 261 262 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:", 263 longopts, NULL)) != -1) { 264 const char *errstr; 265 266 switch (ch) { 267 /* only compatible --long-name-form with diff */ 268 case FCASE_IGNORE_OPT: 269 case FCASE_SENSITIVE_OPT: 270 case STRIPCR_OPT: 271 case TSIZE_OPT: 272 case 'S': 273 break; 274 /* combine no-arg single switches */ 275 case 'a': 276 case 'B': 277 case 'b': 278 case 'd': 279 case 'E': 280 case 'i': 281 case 't': 282 case 'H': 283 case 'W': 284 for(popt = longopts; ch != popt->val && popt->name != NULL; popt++); 285 diffargv[1] = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2); 286 /* 287 * In diff, the 'W' option is 'w' and the 'w' is 'W'. 288 */ 289 if (ch == 'W') 290 sprintf(diffargv[1], "%sw", diffargv[1]); 291 else 292 sprintf(diffargv[1], "%s%c", diffargv[1], ch); 293 break; 294 case DIFFPROG_OPT: 295 diffargv[0] = diffprog = optarg; 296 break; 297 case 'I': 298 Iflag = 1; 299 diffargv[diffargc++] = "-I"; 300 diffargv[diffargc++] = optarg; 301 break; 302 case 'l': 303 lflag = 1; 304 break; 305 case 'o': 306 outfile = optarg; 307 break; 308 case 's': 309 sflag = 1; 310 break; 311 case 'w': 312 wflag = strtonum(optarg, WIDTH_MIN, 313 INT_MAX, &errstr); 314 if (errstr) 315 errx(2, "width is %s: %s", errstr, optarg); 316 break; 317 case HELP_OPT: 318 for (i = 0; help_msg[i] != NULL; i++) 319 printf("%s\n", help_msg[i]); 320 exit(0); 321 break; 322 default: 323 usage(); 324 break; 325 } 326 } 327 328 /* no single switches were used */ 329 if (strcmp(diffargv[1], "-") == 0 ) { 330 for ( i = 1; i < argc-1; i++) { 331 diffargv[i] = diffargv[i+1]; 332 } 333 diffargv[diffargc-1] = NULL; 334 diffargc--; 335 } 336 337 argc -= optind; 338 argv += optind; 339 340 if (argc != 2) 341 usage(); 342 343 if (outfile && (outfp = fopen(outfile, "w")) == NULL) 344 err(2, "could not open: %s", optarg); 345 346 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 347 tmpdir = _PATH_TMP; 348 349 filename1 = argv[0]; 350 filename2 = argv[1]; 351 352 /* 353 * Create temporary files for diff and sdiff to share if file1 354 * or file2 are not regular files. This allows sdiff and diff 355 * to read the same inputs if one or both inputs are stdin. 356 * 357 * If any temporary files were created, their names would be 358 * saved in tmp1 or tmp2. tmp1 should never equal tmp2. 359 */ 360 tmp1 = tmp2 = NULL; 361 /* file1 and file2 are the same, so copy to same temp file. */ 362 if (strcmp(filename1, filename2) == 0) { 363 if ((tmp1 = mktmpcpy(filename1))) 364 filename1 = filename2 = tmp1; 365 /* Copy file1 and file2 into separate temp files. */ 366 } else { 367 if ((tmp1 = mktmpcpy(filename1))) 368 filename1 = tmp1; 369 if ((tmp2 = mktmpcpy(filename2))) 370 filename2 = tmp2; 371 } 372 373 diffargv[diffargc++] = filename1; 374 diffargv[diffargc++] = filename2; 375 /* Add NULL to end of array to indicate end of array. */ 376 diffargv[diffargc++] = NULL; 377 378 /* Subtract column divider and divide by two. */ 379 width = (wflag - 3) / 2; 380 /* Make sure line_width can fit in size_t. */ 381 if (width > (SIZE_MAX - 3) / 2) 382 errx(2, "width is too large: %zu", width); 383 line_width = width * 2 + 3; 384 385 if (pipe(fd)) 386 err(2, "pipe"); 387 388 switch (pid = fork()) { 389 case 0: 390 /* child */ 391 /* We don't read from the pipe. */ 392 close(fd[0]); 393 if (dup2(fd[1], STDOUT_FILENO) == -1) 394 err(2, "child could not duplicate descriptor"); 395 /* Free unused descriptor. */ 396 close(fd[1]); 397 execvp(diffprog, diffargv); 398 err(2, "could not execute diff: %s", diffprog); 399 break; 400 case -1: 401 err(2, "could not fork"); 402 break; 403 } 404 405 /* parent */ 406 /* We don't write to the pipe. */ 407 close(fd[1]); 408 409 /* Open pipe to diff command. */ 410 if ((diffpipe = fdopen(fd[0], "r")) == NULL) 411 err(2, "could not open diff pipe"); 412 413 if ((file1 = fopen(filename1, "r")) == NULL) 414 err(2, "could not open %s", filename1); 415 if ((file2 = fopen(filename2, "r")) == NULL) 416 err(2, "could not open %s", filename2); 417 if (!istextfile(file1) || !istextfile(file2)) { 418 /* Close open files and pipe, delete temps */ 419 fclose(file1); 420 fclose(file2); 421 if (diffpipe != NULL) 422 fclose(diffpipe); 423 if (tmp1) 424 if (unlink(tmp1)) 425 warn("Error deleting %s.", tmp1); 426 if (tmp2) 427 if (unlink(tmp2)) 428 warn("Error deleting %s.", tmp2); 429 free(tmp1); 430 free(tmp2); 431 binexec(diffprog, filename1, filename2); 432 } 433 /* Line numbers start at one. */ 434 file1ln = file2ln = 1; 435 436 /* Read and parse diff output. */ 437 while (parsecmd(diffpipe, file1, file2) != EOF) 438 ; 439 fclose(diffpipe); 440 441 /* Wait for diff to exit. */ 442 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) || 443 WEXITSTATUS(status) >= 2) 444 err(2, "diff exited abnormally."); 445 446 /* Delete and free unneeded temporary files. */ 447 if (tmp1) 448 if (unlink(tmp1)) 449 warn("Error deleting %s.", tmp1); 450 if (tmp2) 451 if (unlink(tmp2)) 452 warn("Error deleting %s.", tmp2); 453 free(tmp1); 454 free(tmp2); 455 filename1 = filename2 = tmp1 = tmp2 = NULL; 456 457 /* No more diffs, so print common lines. */ 458 if (lflag) 459 while ((s1 = xfgets(file1))) 460 enqueue(s1, ' ', NULL); 461 else 462 for (;;) { 463 s1 = xfgets(file1); 464 s2 = xfgets(file2); 465 if (s1 || s2) 466 enqueue(s1, ' ', s2); 467 else 468 break; 469 } 470 fclose(file1); 471 fclose(file2); 472 /* Process unmodified lines. */ 473 processq(); 474 475 /* Return diff exit status. */ 476 return (WEXITSTATUS(status)); 477 } 478 479 /* 480 * When sdiff/zsdiff detects a binary file as input, executes them with 481 * diff/zdiff to maintain the same behavior as GNU sdiff with binary input. 482 */ 483 static void 484 binexec(char *diffprog, char *f1, char *f2) 485 { 486 487 char *args[] = {diffprog, f1, f2, (char *) 0}; 488 execv(diffprog, args); 489 490 /* If execv() fails, sdiff's execution will continue below. */ 491 errx(1, "could not execute diff process"); 492 } 493 494 /* 495 * Checks whether a file appears to be a text file. 496 */ 497 static int 498 istextfile(FILE *f) 499 { 500 int ch, i; 501 502 if (f == NULL) 503 return (1); 504 rewind(f); 505 for (i = 0; i <= MAX_CHECK; i++) { 506 ch = fgetc(f); 507 if (ch == '\0') { 508 rewind(f); 509 return (0); 510 } 511 if (ch == EOF) 512 break; 513 } 514 rewind(f); 515 return (1); 516 } 517 518 /* 519 * Prints an individual column (left or right), taking into account 520 * that tabs are variable-width. Takes a string, the current column 521 * the cursor is on the screen, and the maximum value of the column. 522 * The column value is updated as we go along. 523 */ 524 static void 525 printcol(const char *s, size_t *col, const size_t col_max) 526 { 527 528 for (; *s && *col < col_max; ++s) { 529 size_t new_col; 530 531 switch (*s) { 532 case '\t': 533 /* 534 * If rounding to next multiple of eight causes 535 * an integer overflow, just return. 536 */ 537 if (*col > SIZE_MAX - 8) 538 return; 539 540 /* Round to next multiple of eight. */ 541 new_col = (*col / 8 + 1) * 8; 542 543 /* 544 * If printing the tab goes past the column 545 * width, don't print it and just quit. 546 */ 547 if (new_col > col_max) 548 return; 549 *col = new_col; 550 break; 551 default: 552 ++(*col); 553 } 554 putchar(*s); 555 } 556 } 557 558 /* 559 * Prompts user to either choose between two strings or edit one, both, 560 * or neither. 561 */ 562 static void 563 prompt(const char *s1, const char *s2) 564 { 565 char *cmd; 566 567 /* Print command prompt. */ 568 putchar('%'); 569 570 /* Get user input. */ 571 for (; (cmd = xfgets(stdin)); free(cmd)) { 572 const char *p; 573 574 /* Skip leading whitespace. */ 575 for (p = cmd; isspace(*p); ++p) 576 ; 577 switch (*p) { 578 case 'e': 579 /* Skip `e'. */ 580 ++p; 581 if (eparse(p, s1, s2) == -1) 582 goto USAGE; 583 break; 584 case 'l': 585 case '1': 586 /* Choose left column as-is. */ 587 if (s1 != NULL) 588 fprintf(outfp, "%s\n", s1); 589 /* End of command parsing. */ 590 break; 591 case 'q': 592 goto QUIT; 593 case 'r': 594 case '2': 595 /* Choose right column as-is. */ 596 if (s2 != NULL) 597 fprintf(outfp, "%s\n", s2); 598 /* End of command parsing. */ 599 break; 600 case 's': 601 sflag = 1; 602 goto PROMPT; 603 case 'v': 604 sflag = 0; 605 /* FALLTHROUGH */ 606 default: 607 /* Interactive usage help. */ 608 USAGE: 609 int_usage(); 610 PROMPT: 611 putchar('%'); 612 613 /* Prompt user again. */ 614 continue; 615 } 616 free(cmd); 617 return; 618 } 619 620 /* 621 * If there was no error, we received an EOF from stdin, so we 622 * should quit. 623 */ 624 QUIT: 625 fclose(outfp); 626 exit(0); 627 } 628 629 /* 630 * Takes two strings, separated by a column divider. NULL strings are 631 * treated as empty columns. If the divider is the ` ' character, the 632 * second column is not printed (-l flag). In this case, the second 633 * string must be NULL. When the second column is NULL, the divider 634 * does not print the trailing space following the divider character. 635 * 636 * Takes into account that tabs can take multiple columns. 637 */ 638 static void 639 println(const char *s1, const char div, const char *s2) 640 { 641 size_t col; 642 643 /* Print first column. Skips if s1 == NULL. */ 644 col = 0; 645 if (s1) { 646 /* Skip angle bracket and space. */ 647 printcol(s1, &col, width); 648 649 } 650 651 /* Otherwise, we pad this column up to width. */ 652 for (; col < width; ++col) 653 putchar(' '); 654 655 /* Only print left column. */ 656 if (div == ' ' && !s2) { 657 printf(" (\n"); 658 return; 659 } 660 661 /* 662 * Print column divider. If there is no second column, we don't 663 * need to add the space for padding. 664 */ 665 if (!s2) { 666 printf(" %c\n", div); 667 return; 668 } 669 printf(" %c ", div); 670 col += 3; 671 672 /* Skip angle bracket and space. */ 673 printcol(s2, &col, line_width); 674 675 putchar('\n'); 676 } 677 678 /* 679 * Reads a line from file and returns as a string. If EOF is reached, 680 * NULL is returned. The returned string must be freed afterwards. 681 */ 682 static char * 683 xfgets(FILE *file) 684 { 685 size_t linecap; 686 ssize_t l; 687 char *s; 688 689 clearerr(file); 690 linecap = 0; 691 s = NULL; 692 693 if ((l = getline(&s, &linecap, file)) == -1) { 694 if (ferror(file)) 695 err(2, "error reading file"); 696 return (NULL); 697 } 698 699 if (s[l-1] == '\n') 700 s[l-1] = '\0'; 701 702 return (s); 703 } 704 705 /* 706 * Parse ed commands from diffpipe and print lines from file1 (lines 707 * to change or delete) or file2 (lines to add or change). 708 * Returns EOF or 0. 709 */ 710 static int 711 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2) 712 { 713 size_t file1start, file1end, file2start, file2end, n; 714 /* ed command line and pointer to characters in line */ 715 char *line, *p, *q; 716 const char *errstr; 717 char c, cmd; 718 719 /* Read ed command. */ 720 if (!(line = xfgets(diffpipe))) 721 return (EOF); 722 723 p = line; 724 /* Go to character after line number. */ 725 while (isdigit(*p)) 726 ++p; 727 c = *p; 728 *p++ = 0; 729 file1start = strtonum(line, 0, INT_MAX, &errstr); 730 if (errstr) 731 errx(2, "file1 start is %s: %s", errstr, line); 732 733 /* A range is specified for file1. */ 734 if (c == ',') { 735 q = p; 736 /* Go to character after file2end. */ 737 while (isdigit(*p)) 738 ++p; 739 c = *p; 740 *p++ = 0; 741 file1end = strtonum(q, 0, INT_MAX, &errstr); 742 if (errstr) 743 errx(2, "file1 end is %s: %s", errstr, line); 744 if (file1start > file1end) 745 errx(2, "invalid line range in file1: %s", line); 746 } else 747 file1end = file1start; 748 749 cmd = c; 750 /* Check that cmd is valid. */ 751 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd')) 752 errx(2, "ed command not recognized: %c: %s", cmd, line); 753 754 q = p; 755 /* Go to character after line number. */ 756 while (isdigit(*p)) 757 ++p; 758 c = *p; 759 *p++ = 0; 760 file2start = strtonum(q, 0, INT_MAX, &errstr); 761 if (errstr) 762 errx(2, "file2 start is %s: %s", errstr, line); 763 764 /* 765 * There should either be a comma signifying a second line 766 * number or the line should just end here. 767 */ 768 if (c != ',' && c != '\0') 769 errx(2, "invalid line range in file2: %c: %s", c, line); 770 771 if (c == ',') { 772 773 file2end = strtonum(p, 0, INT_MAX, &errstr); 774 if (errstr) 775 errx(2, "file2 end is %s: %s", errstr, line); 776 if (file2start >= file2end) 777 errx(2, "invalid line range in file2: %s", line); 778 } else 779 file2end = file2start; 780 781 /* Appends happen _after_ stated line. */ 782 if (cmd == 'a') { 783 if (file1start != file1end) 784 errx(2, "append cannot have a file1 range: %s", 785 line); 786 if (file1start == SIZE_MAX) 787 errx(2, "file1 line range too high: %s", line); 788 file1start = ++file1end; 789 } 790 /* 791 * I'm not sure what the deal is with the line numbers for 792 * deletes, though. 793 */ 794 else if (cmd == 'd') { 795 if (file2start != file2end) 796 errx(2, "delete cannot have a file2 range: %s", 797 line); 798 if (file2start == SIZE_MAX) 799 errx(2, "file2 line range too high: %s", line); 800 file2start = ++file2end; 801 } 802 803 /* 804 * Continue reading file1 and file2 until we reach line numbers 805 * specified by diff. Should only happen with -I flag. 806 */ 807 for (; file1ln < file1start && file2ln < file2start; 808 ++file1ln, ++file2ln) { 809 char *s1, *s2; 810 811 if (!(s1 = xfgets(file1))) 812 errx(2, "file1 shorter than expected"); 813 if (!(s2 = xfgets(file2))) 814 errx(2, "file2 shorter than expected"); 815 816 /* If the -l flag was specified, print only left column. */ 817 if (lflag) { 818 free(s2); 819 /* 820 * XXX - If -l and -I are both specified, all 821 * unchanged or ignored lines are shown with a 822 * `(' divider. This matches GNU sdiff, but I 823 * believe it is a bug. Just check out: 824 * gsdiff -l -I '^$' samefile samefile. 825 */ 826 if (Iflag) 827 enqueue(s1, '(', NULL); 828 else 829 enqueue(s1, ' ', NULL); 830 } else 831 enqueue(s1, ' ', s2); 832 } 833 /* Ignore deleted lines. */ 834 for (; file1ln < file1start; ++file1ln) { 835 char *s; 836 837 if (!(s = xfgets(file1))) 838 errx(2, "file1 shorter than expected"); 839 840 enqueue(s, '(', NULL); 841 } 842 /* Ignore added lines. */ 843 for (; file2ln < file2start; ++file2ln) { 844 char *s; 845 846 if (!(s = xfgets(file2))) 847 errx(2, "file2 shorter than expected"); 848 849 /* If -l flag was given, don't print right column. */ 850 if (lflag) 851 free(s); 852 else 853 enqueue(NULL, ')', s); 854 } 855 856 /* Process unmodified or skipped lines. */ 857 processq(); 858 859 switch (cmd) { 860 case 'a': 861 printa(file2, file2end); 862 n = file2end - file2start + 1; 863 break; 864 case 'c': 865 printc(file1, file1end, file2, file2end); 866 n = file1end - file1start + 1 + 1 + file2end - file2start + 1; 867 break; 868 case 'd': 869 printd(file1, file1end); 870 n = file1end - file1start + 1; 871 break; 872 default: 873 errx(2, "invalid diff command: %c: %s", cmd, line); 874 } 875 free(line); 876 877 /* Skip to next ed line. */ 878 while (n--) { 879 if (!(line = xfgets(diffpipe))) 880 errx(2, "diff ended early"); 881 free(line); 882 } 883 884 return (0); 885 } 886 887 /* 888 * Queues up a diff line. 889 */ 890 static void 891 enqueue(char *left, char div, char *right) 892 { 893 struct diffline *diffp; 894 895 if (!(diffp = malloc(sizeof(struct diffline)))) 896 err(2, "enqueue"); 897 diffp->left = left; 898 diffp->div = div; 899 diffp->right = right; 900 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries); 901 } 902 903 /* 904 * Free a diffline structure and its elements. 905 */ 906 static void 907 freediff(struct diffline *diffp) 908 { 909 910 free(diffp->left); 911 free(diffp->right); 912 free(diffp); 913 } 914 915 /* 916 * Append second string into first. Repeated appends to the same string 917 * are cached, making this an O(n) function, where n = strlen(append). 918 */ 919 static void 920 astrcat(char **s, const char *append) 921 { 922 /* Length of string in previous run. */ 923 static size_t offset = 0; 924 size_t newsiz; 925 /* 926 * String from previous run. Compared to *s to see if we are 927 * dealing with the same string. If so, we can use offset. 928 */ 929 static const char *oldstr = NULL; 930 char *newstr; 931 932 /* 933 * First string is NULL, so just copy append. 934 */ 935 if (!*s) { 936 if (!(*s = strdup(append))) 937 err(2, "astrcat"); 938 939 /* Keep track of string. */ 940 offset = strlen(*s); 941 oldstr = *s; 942 943 return; 944 } 945 946 /* 947 * *s is a string so concatenate. 948 */ 949 950 /* Did we process the same string in the last run? */ 951 /* 952 * If this is a different string from the one we just processed 953 * cache new string. 954 */ 955 if (oldstr != *s) { 956 offset = strlen(*s); 957 oldstr = *s; 958 } 959 960 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */ 961 newsiz = offset + 1 + strlen(append) + 1; 962 963 /* Resize *s to fit new string. */ 964 newstr = realloc(*s, newsiz); 965 if (newstr == NULL) 966 err(2, "astrcat"); 967 *s = newstr; 968 969 /* *s + offset should be end of string. */ 970 /* Concatenate. */ 971 strlcpy(*s + offset, "\n", newsiz - offset); 972 strlcat(*s + offset, append, newsiz - offset); 973 974 /* New string length should be exactly newsiz - 1 characters. */ 975 /* Store generated string's values. */ 976 offset = newsiz - 1; 977 oldstr = *s; 978 } 979 980 /* 981 * Process diff set queue, printing, prompting, and saving each diff 982 * line stored in queue. 983 */ 984 static void 985 processq(void) 986 { 987 struct diffline *diffp; 988 char divc, *left, *right; 989 990 /* Don't process empty queue. */ 991 if (STAILQ_EMPTY(&diffhead)) 992 return; 993 994 /* Remember the divider. */ 995 divc = STAILQ_FIRST(&diffhead)->div; 996 997 left = NULL; 998 right = NULL; 999 /* 1000 * Go through set of diffs, concatenating each line in left or 1001 * right column into two long strings, `left' and `right'. 1002 */ 1003 STAILQ_FOREACH(diffp, &diffhead, diffentries) { 1004 /* 1005 * Print changed lines if -s was given, 1006 * print all lines if -s was not given. 1007 */ 1008 if (!sflag || diffp->div == '|' || diffp->div == '<' || 1009 diffp->div == '>') 1010 println(diffp->left, diffp->div, diffp->right); 1011 1012 /* Append new lines to diff set. */ 1013 if (diffp->left) 1014 astrcat(&left, diffp->left); 1015 if (diffp->right) 1016 astrcat(&right, diffp->right); 1017 } 1018 1019 /* Empty queue and free each diff line and its elements. */ 1020 while (!STAILQ_EMPTY(&diffhead)) { 1021 diffp = STAILQ_FIRST(&diffhead); 1022 STAILQ_REMOVE_HEAD(&diffhead, diffentries); 1023 freediff(diffp); 1024 } 1025 1026 /* Write to outfp, prompting user if lines are different. */ 1027 if (outfp) 1028 switch (divc) { 1029 case ' ': case '(': case ')': 1030 fprintf(outfp, "%s\n", left); 1031 break; 1032 case '|': case '<': case '>': 1033 prompt(left, right); 1034 break; 1035 default: 1036 errx(2, "invalid divider: %c", divc); 1037 } 1038 1039 /* Free left and right. */ 1040 free(left); 1041 free(right); 1042 } 1043 1044 /* 1045 * Print lines following an (a)ppend command. 1046 */ 1047 static void 1048 printa(FILE *file, size_t line2) 1049 { 1050 char *line; 1051 1052 for (; file2ln <= line2; ++file2ln) { 1053 if (!(line = xfgets(file))) 1054 errx(2, "append ended early"); 1055 enqueue(NULL, '>', line); 1056 } 1057 processq(); 1058 } 1059 1060 /* 1061 * Print lines following a (c)hange command, from file1ln to file1end 1062 * and from file2ln to file2end. 1063 */ 1064 static void 1065 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end) 1066 { 1067 struct fileline { 1068 STAILQ_ENTRY(fileline) fileentries; 1069 char *line; 1070 }; 1071 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead); 1072 1073 /* Read lines to be deleted. */ 1074 for (; file1ln <= file1end; ++file1ln) { 1075 struct fileline *linep; 1076 char *line1; 1077 1078 /* Read lines from both. */ 1079 if (!(line1 = xfgets(file1))) 1080 errx(2, "error reading file1 in delete in change"); 1081 1082 /* Add to delete queue. */ 1083 if (!(linep = malloc(sizeof(struct fileline)))) 1084 err(2, "printc"); 1085 linep->line = line1; 1086 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries); 1087 } 1088 1089 /* Process changed lines.. */ 1090 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end; 1091 ++file2ln) { 1092 struct fileline *del; 1093 char *add; 1094 1095 /* Get add line. */ 1096 if (!(add = xfgets(file2))) 1097 errx(2, "error reading add in change"); 1098 1099 del = STAILQ_FIRST(&delqhead); 1100 enqueue(del->line, '|', add); 1101 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1102 /* 1103 * Free fileline structure but not its elements since 1104 * they are queued up. 1105 */ 1106 free(del); 1107 } 1108 processq(); 1109 1110 /* Process remaining lines to add. */ 1111 for (; file2ln <= file2end; ++file2ln) { 1112 char *add; 1113 1114 /* Get add line. */ 1115 if (!(add = xfgets(file2))) 1116 errx(2, "error reading add in change"); 1117 1118 enqueue(NULL, '>', add); 1119 } 1120 processq(); 1121 1122 /* Process remaining lines to delete. */ 1123 while (!STAILQ_EMPTY(&delqhead)) { 1124 struct fileline *filep; 1125 1126 filep = STAILQ_FIRST(&delqhead); 1127 enqueue(filep->line, '<', NULL); 1128 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1129 free(filep); 1130 } 1131 processq(); 1132 } 1133 1134 /* 1135 * Print deleted lines from file, from file1ln to file1end. 1136 */ 1137 static void 1138 printd(FILE *file1, size_t file1end) 1139 { 1140 char *line1; 1141 1142 /* Print out lines file1ln to line2. */ 1143 for (; file1ln <= file1end; ++file1ln) { 1144 if (!(line1 = xfgets(file1))) 1145 errx(2, "file1 ended early in delete"); 1146 enqueue(line1, '<', NULL); 1147 } 1148 processq(); 1149 } 1150 1151 /* 1152 * Interactive mode usage. 1153 */ 1154 static void 1155 int_usage(void) 1156 { 1157 1158 puts("e:\tedit blank diff\n" 1159 "eb:\tedit both diffs concatenated\n" 1160 "el:\tedit left diff\n" 1161 "er:\tedit right diff\n" 1162 "l | 1:\tchoose left diff\n" 1163 "r | 2:\tchoose right diff\n" 1164 "s:\tsilent mode--don't print identical lines\n" 1165 "v:\tverbose mode--print identical lines\n" 1166 "q:\tquit"); 1167 } 1168 1169 static void 1170 usage(void) 1171 { 1172 1173 fprintf(stderr, 1174 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1" 1175 " file2\n"); 1176 exit(2); 1177 } 1178