1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */ 2 3 /* 4 * Written by Raymond Lai <ray@cyth.net>. 5 * Public domain. 6 */ 7 8 #include <sys/param.h> 9 #include <sys/queue.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <sys/wait.h> 13 14 #include <ctype.h> 15 #include <err.h> 16 #include <errno.h> 17 #include <fcntl.h> 18 #include <getopt.h> 19 #include <limits.h> 20 #include <paths.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include "extern.h" 28 29 static char diff_path[] = "/usr/bin/diff"; 30 31 #define WIDTH 126 32 /* 33 * Each column must be at least one character wide, plus three 34 * characters between the columns (space, [<|>], space). 35 */ 36 #define WIDTH_MIN 5 37 38 /* 3 kilobytes of chars */ 39 #define MAX_CHECK 768 40 41 /* A single diff line. */ 42 struct diffline { 43 STAILQ_ENTRY(diffline) diffentries; 44 char *left; 45 char div; 46 char *right; 47 }; 48 49 static void astrcat(char **, const char *); 50 static void enqueue(char *, char, char *); 51 static char *mktmpcpy(const char *); 52 static int istextfile(FILE *); 53 static void binexec(char *, char *, char *) __dead2; 54 static void freediff(struct diffline *); 55 static void int_usage(void); 56 static int parsecmd(FILE *, FILE *, FILE *); 57 static void printa(FILE *, size_t); 58 static void printc(FILE *, size_t, FILE *, size_t); 59 static void printcol(const char *, size_t *, const size_t); 60 static void printd(FILE *, size_t); 61 static void println(const char *, const char, const char *); 62 static void processq(void); 63 static void prompt(const char *, const char *); 64 static void usage(void) __dead2; 65 static char *xfgets(FILE *); 66 67 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead); 68 static size_t line_width; /* width of a line (two columns and divider) */ 69 static size_t width; /* width of each column */ 70 static size_t file1ln, file2ln; /* line number of file1 and file2 */ 71 static int Iflag = 0; /* ignore sets matching regexp */ 72 static int lflag; /* print only left column for identical lines */ 73 static int sflag; /* skip identical lines */ 74 FILE *outfp; /* file to save changes to */ 75 const char *tmpdir; /* TMPDIR or /tmp */ 76 77 enum { 78 HELP_OPT = CHAR_MAX + 1, 79 NORMAL_OPT, 80 FCASE_SENSITIVE_OPT, 81 FCASE_IGNORE_OPT, 82 STRIPCR_OPT, 83 TSIZE_OPT, 84 DIFFPROG_OPT, 85 }; 86 87 static struct option longopts[] = { 88 /* options only processed in sdiff */ 89 { "suppress-common-lines", no_argument, NULL, 's' }, 90 { "width", required_argument, NULL, 'w' }, 91 92 { "output", required_argument, NULL, 'o' }, 93 { "diff-program", required_argument, NULL, DIFFPROG_OPT }, 94 95 /* Options processed by diff. */ 96 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT }, 97 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT }, 98 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT }, 99 { "tabsize", required_argument, NULL, TSIZE_OPT }, 100 { "help", no_argument, NULL, HELP_OPT }, 101 { "text", no_argument, NULL, 'a' }, 102 { "ignore-blank-lines", no_argument, NULL, 'B' }, 103 { "ignore-space-change", no_argument, NULL, 'b' }, 104 { "minimal", no_argument, NULL, 'd' }, 105 { "ignore-tab-expansion", no_argument, NULL, 'E' }, 106 { "ignore-matching-lines", required_argument, NULL, 'I' }, 107 { "ignore-case", no_argument, NULL, 'i' }, 108 { "left-column", no_argument, NULL, 'l' }, 109 { "expand-tabs", no_argument, NULL, 't' }, 110 { "speed-large-files", no_argument, NULL, 'H' }, 111 { "ignore-all-space", no_argument, NULL, 'W' }, 112 113 { NULL, 0, NULL, '\0'} 114 }; 115 116 static const char *help_msg[] = { 117 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n", 118 "-l, --left-column: only print the left column for identical lines.", 119 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.", 120 "-s, --suppress-common-lines: skip identical lines.", 121 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.", 122 "", 123 "Options passed to diff(1) are:", 124 "\t-a, --text: treat file1 and file2 as text files.", 125 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.", 126 "\t-d, --minimal: minimize diff size.", 127 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.", 128 "\t-i, --ignore-case: do a case-insensitive comparison.", 129 "\t-t, --expand-tabs: sxpand tabs to spaces.", 130 "\t-W, --ignore-all-spaces: ignore all spaces.", 131 "\t--speed-large-files: assume large file with scattered changes.", 132 "\t--strip-trailing-cr: strip trailing carriage return.", 133 "\t--ignore-file-name-case: ignore case of file names.", 134 "\t--no-ignore-file-name-case: do not ignore file name case", 135 "\t--tabsize NUM: change size of tabs (default 8.)", 136 137 NULL, 138 }; 139 140 /* 141 * Create temporary file if source_file is not a regular file. 142 * Returns temporary file name if one was malloced, NULL if unnecessary. 143 */ 144 static char * 145 mktmpcpy(const char *source_file) 146 { 147 struct stat sb; 148 ssize_t rcount; 149 int ifd, ofd; 150 u_char buf[BUFSIZ]; 151 char *target_file; 152 153 /* Open input and output. */ 154 ifd = open(source_file, O_RDONLY, 0); 155 /* File was opened successfully. */ 156 if (ifd != -1) { 157 if (fstat(ifd, &sb) == -1) 158 err(2, "error getting file status from %s", source_file); 159 160 /* Regular file. */ 161 if (S_ISREG(sb.st_mode)) { 162 close(ifd); 163 return (NULL); 164 } 165 } else { 166 /* If ``-'' does not exist the user meant stdin. */ 167 if (errno == ENOENT && strcmp(source_file, "-") == 0) 168 ifd = STDIN_FILENO; 169 else 170 err(2, "error opening %s", source_file); 171 } 172 173 /* Not a regular file, so copy input into temporary file. */ 174 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1) 175 err(2, "asprintf"); 176 if ((ofd = mkstemp(target_file)) == -1) { 177 warn("error opening %s", target_file); 178 goto FAIL; 179 } 180 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 && 181 rcount != 0) { 182 ssize_t wcount; 183 184 wcount = write(ofd, buf, (size_t)rcount); 185 if (-1 == wcount || rcount != wcount) { 186 warn("error writing to %s", target_file); 187 goto FAIL; 188 } 189 } 190 if (rcount == -1) { 191 warn("error reading from %s", source_file); 192 goto FAIL; 193 } 194 195 close(ifd); 196 close(ofd); 197 198 return (target_file); 199 200 FAIL: 201 unlink(target_file); 202 exit(2); 203 } 204 205 int 206 main(int argc, char **argv) 207 { 208 FILE *diffpipe=NULL, *file1, *file2; 209 size_t diffargc = 0, wflag = WIDTH; 210 int ch, fd[2] = {-1}, status; 211 pid_t pid=0; 212 const char *outfile = NULL; 213 char **diffargv, *diffprog = diff_path, *filename1, *filename2, 214 *tmp1, *tmp2, *s1, *s2; 215 int i; 216 char I_arg[] = "-I"; 217 char speed_lf[] = "--speed-large-files"; 218 219 /* 220 * Process diff flags. 221 */ 222 /* 223 * Allocate memory for diff arguments and NULL. 224 * Each flag has at most one argument, so doubling argc gives an 225 * upper limit of how many diff args can be passed. argv[0], 226 * file1, and file2 won't have arguments so doubling them will 227 * waste some memory; however we need an extra space for the 228 * NULL at the end, so it sort of works out. 229 */ 230 if (!(diffargv = calloc(argc, sizeof(char **) * 2))) 231 err(2, "main"); 232 233 /* Add first argument, the program name. */ 234 diffargv[diffargc++] = diffprog; 235 236 /* create a dynamic string for merging single-switch options */ 237 if ( asprintf(&diffargv[diffargc++], "-") < 0 ) 238 err(2, "main"); 239 240 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:", 241 longopts, NULL)) != -1) { 242 const char *errstr; 243 244 switch (ch) { 245 /* only compatible --long-name-form with diff */ 246 case FCASE_IGNORE_OPT: 247 case FCASE_SENSITIVE_OPT: 248 case STRIPCR_OPT: 249 case TSIZE_OPT: 250 case 'S': 251 break; 252 /* combine no-arg single switches */ 253 case 'a': 254 case 'B': 255 case 'b': 256 case 'd': 257 case 'E': 258 case 'i': 259 case 't': 260 case 'W': 261 diffargv[1] = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2); 262 /* 263 * In diff, the 'W' option is 'w' and the 'w' is 'W'. 264 */ 265 if (ch == 'W') 266 sprintf(diffargv[1], "%sw", diffargv[1]); 267 else 268 sprintf(diffargv[1], "%s%c", diffargv[1], ch); 269 break; 270 case 'H': 271 diffargv[diffargc++] = speed_lf; 272 break; 273 case DIFFPROG_OPT: 274 diffargv[0] = diffprog = optarg; 275 break; 276 case 'I': 277 Iflag = 1; 278 diffargv[diffargc++] = I_arg; 279 diffargv[diffargc++] = optarg; 280 break; 281 case 'l': 282 lflag = 1; 283 break; 284 case 'o': 285 outfile = optarg; 286 break; 287 case 's': 288 sflag = 1; 289 break; 290 case 'w': 291 wflag = strtonum(optarg, WIDTH_MIN, 292 INT_MAX, &errstr); 293 if (errstr) 294 errx(2, "width is %s: %s", errstr, optarg); 295 break; 296 case HELP_OPT: 297 for (i = 0; help_msg[i] != NULL; i++) 298 printf("%s\n", help_msg[i]); 299 exit(0); 300 break; 301 default: 302 usage(); 303 break; 304 } 305 } 306 307 /* no single switches were used */ 308 if (strcmp(diffargv[1], "-") == 0 ) { 309 for ( i = 1; i < argc-1; i++) { 310 diffargv[i] = diffargv[i+1]; 311 } 312 diffargv[diffargc-1] = NULL; 313 diffargc--; 314 } 315 316 argc -= optind; 317 argv += optind; 318 319 if (argc != 2) 320 usage(); 321 322 if (outfile && (outfp = fopen(outfile, "w")) == NULL) 323 err(2, "could not open: %s", optarg); 324 325 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 326 tmpdir = _PATH_TMP; 327 328 filename1 = argv[0]; 329 filename2 = argv[1]; 330 331 /* 332 * Create temporary files for diff and sdiff to share if file1 333 * or file2 are not regular files. This allows sdiff and diff 334 * to read the same inputs if one or both inputs are stdin. 335 * 336 * If any temporary files were created, their names would be 337 * saved in tmp1 or tmp2. tmp1 should never equal tmp2. 338 */ 339 tmp1 = tmp2 = NULL; 340 /* file1 and file2 are the same, so copy to same temp file. */ 341 if (strcmp(filename1, filename2) == 0) { 342 if ((tmp1 = mktmpcpy(filename1))) 343 filename1 = filename2 = tmp1; 344 /* Copy file1 and file2 into separate temp files. */ 345 } else { 346 if ((tmp1 = mktmpcpy(filename1))) 347 filename1 = tmp1; 348 if ((tmp2 = mktmpcpy(filename2))) 349 filename2 = tmp2; 350 } 351 352 diffargv[diffargc++] = filename1; 353 diffargv[diffargc++] = filename2; 354 /* Add NULL to end of array to indicate end of array. */ 355 diffargv[diffargc++] = NULL; 356 357 /* Subtract column divider and divide by two. */ 358 width = (wflag - 3) / 2; 359 /* Make sure line_width can fit in size_t. */ 360 if (width > (SIZE_MAX - 3) / 2) 361 errx(2, "width is too large: %zu", width); 362 line_width = width * 2 + 3; 363 364 if (pipe(fd)) 365 err(2, "pipe"); 366 367 switch (pid = fork()) { 368 case 0: 369 /* child */ 370 /* We don't read from the pipe. */ 371 close(fd[0]); 372 if (dup2(fd[1], STDOUT_FILENO) == -1) 373 err(2, "child could not duplicate descriptor"); 374 /* Free unused descriptor. */ 375 close(fd[1]); 376 execvp(diffprog, diffargv); 377 err(2, "could not execute diff: %s", diffprog); 378 break; 379 case -1: 380 err(2, "could not fork"); 381 break; 382 } 383 384 /* parent */ 385 /* We don't write to the pipe. */ 386 close(fd[1]); 387 388 /* Open pipe to diff command. */ 389 if ((diffpipe = fdopen(fd[0], "r")) == NULL) 390 err(2, "could not open diff pipe"); 391 392 if ((file1 = fopen(filename1, "r")) == NULL) 393 err(2, "could not open %s", filename1); 394 if ((file2 = fopen(filename2, "r")) == NULL) 395 err(2, "could not open %s", filename2); 396 if (!istextfile(file1) || !istextfile(file2)) { 397 /* Close open files and pipe, delete temps */ 398 fclose(file1); 399 fclose(file2); 400 if (diffpipe != NULL) 401 fclose(diffpipe); 402 if (tmp1) 403 if (unlink(tmp1)) 404 warn("Error deleting %s.", tmp1); 405 if (tmp2) 406 if (unlink(tmp2)) 407 warn("Error deleting %s.", tmp2); 408 free(tmp1); 409 free(tmp2); 410 binexec(diffprog, filename1, filename2); 411 } 412 /* Line numbers start at one. */ 413 file1ln = file2ln = 1; 414 415 /* Read and parse diff output. */ 416 while (parsecmd(diffpipe, file1, file2) != EOF) 417 ; 418 fclose(diffpipe); 419 420 /* Wait for diff to exit. */ 421 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) || 422 WEXITSTATUS(status) >= 2) 423 err(2, "diff exited abnormally."); 424 425 /* Delete and free unneeded temporary files. */ 426 if (tmp1) 427 if (unlink(tmp1)) 428 warn("Error deleting %s.", tmp1); 429 if (tmp2) 430 if (unlink(tmp2)) 431 warn("Error deleting %s.", tmp2); 432 free(tmp1); 433 free(tmp2); 434 filename1 = filename2 = tmp1 = tmp2 = NULL; 435 436 /* No more diffs, so print common lines. */ 437 if (lflag) 438 while ((s1 = xfgets(file1))) 439 enqueue(s1, ' ', NULL); 440 else 441 for (;;) { 442 s1 = xfgets(file1); 443 s2 = xfgets(file2); 444 if (s1 || s2) 445 enqueue(s1, ' ', s2); 446 else 447 break; 448 } 449 fclose(file1); 450 fclose(file2); 451 /* Process unmodified lines. */ 452 processq(); 453 454 /* Return diff exit status. */ 455 return (WEXITSTATUS(status)); 456 } 457 458 /* 459 * When sdiff detects a binary file as input, executes them with 460 * diff to maintain the same behavior as GNU sdiff with binary input. 461 */ 462 static void 463 binexec(char *diffprog, char *f1, char *f2) 464 { 465 466 char *args[] = {diffprog, f1, f2, (char *) 0}; 467 execv(diffprog, args); 468 469 /* If execv() fails, sdiff's execution will continue below. */ 470 errx(1, "could not execute diff process"); 471 } 472 473 /* 474 * Checks whether a file appears to be a text file. 475 */ 476 static int 477 istextfile(FILE *f) 478 { 479 int ch, i; 480 481 if (f == NULL) 482 return (1); 483 rewind(f); 484 for (i = 0; i <= MAX_CHECK; i++) { 485 ch = fgetc(f); 486 if (ch == '\0') { 487 rewind(f); 488 return (0); 489 } 490 if (ch == EOF) 491 break; 492 } 493 rewind(f); 494 return (1); 495 } 496 497 /* 498 * Prints an individual column (left or right), taking into account 499 * that tabs are variable-width. Takes a string, the current column 500 * the cursor is on the screen, and the maximum value of the column. 501 * The column value is updated as we go along. 502 */ 503 static void 504 printcol(const char *s, size_t *col, const size_t col_max) 505 { 506 507 for (; *s && *col < col_max; ++s) { 508 size_t new_col; 509 510 switch (*s) { 511 case '\t': 512 /* 513 * If rounding to next multiple of eight causes 514 * an integer overflow, just return. 515 */ 516 if (*col > SIZE_MAX - 8) 517 return; 518 519 /* Round to next multiple of eight. */ 520 new_col = (*col / 8 + 1) * 8; 521 522 /* 523 * If printing the tab goes past the column 524 * width, don't print it and just quit. 525 */ 526 if (new_col > col_max) 527 return; 528 *col = new_col; 529 break; 530 default: 531 ++(*col); 532 } 533 putchar(*s); 534 } 535 } 536 537 /* 538 * Prompts user to either choose between two strings or edit one, both, 539 * or neither. 540 */ 541 static void 542 prompt(const char *s1, const char *s2) 543 { 544 char *cmd; 545 546 /* Print command prompt. */ 547 putchar('%'); 548 549 /* Get user input. */ 550 for (; (cmd = xfgets(stdin)); free(cmd)) { 551 const char *p; 552 553 /* Skip leading whitespace. */ 554 for (p = cmd; isspace(*p); ++p) 555 ; 556 switch (*p) { 557 case 'e': 558 /* Skip `e'. */ 559 ++p; 560 if (eparse(p, s1, s2) == -1) 561 goto USAGE; 562 break; 563 case 'l': 564 case '1': 565 /* Choose left column as-is. */ 566 if (s1 != NULL) 567 fprintf(outfp, "%s\n", s1); 568 /* End of command parsing. */ 569 break; 570 case 'q': 571 goto QUIT; 572 case 'r': 573 case '2': 574 /* Choose right column as-is. */ 575 if (s2 != NULL) 576 fprintf(outfp, "%s\n", s2); 577 /* End of command parsing. */ 578 break; 579 case 's': 580 sflag = 1; 581 goto PROMPT; 582 case 'v': 583 sflag = 0; 584 /* FALLTHROUGH */ 585 default: 586 /* Interactive usage help. */ 587 USAGE: 588 int_usage(); 589 PROMPT: 590 putchar('%'); 591 592 /* Prompt user again. */ 593 continue; 594 } 595 free(cmd); 596 return; 597 } 598 599 /* 600 * If there was no error, we received an EOF from stdin, so we 601 * should quit. 602 */ 603 QUIT: 604 fclose(outfp); 605 exit(0); 606 } 607 608 /* 609 * Takes two strings, separated by a column divider. NULL strings are 610 * treated as empty columns. If the divider is the ` ' character, the 611 * second column is not printed (-l flag). In this case, the second 612 * string must be NULL. When the second column is NULL, the divider 613 * does not print the trailing space following the divider character. 614 * 615 * Takes into account that tabs can take multiple columns. 616 */ 617 static void 618 println(const char *s1, const char divider, const char *s2) 619 { 620 size_t col; 621 622 /* Print first column. Skips if s1 == NULL. */ 623 col = 0; 624 if (s1) { 625 /* Skip angle bracket and space. */ 626 printcol(s1, &col, width); 627 628 } 629 630 /* Otherwise, we pad this column up to width. */ 631 for (; col < width; ++col) 632 putchar(' '); 633 634 /* Only print left column. */ 635 if (divider == ' ' && !s2) { 636 printf(" (\n"); 637 return; 638 } 639 640 /* 641 * Print column divider. If there is no second column, we don't 642 * need to add the space for padding. 643 */ 644 if (!s2) { 645 printf(" %c\n", divider); 646 return; 647 } 648 printf(" %c ", divider); 649 col += 3; 650 651 /* Skip angle bracket and space. */ 652 printcol(s2, &col, line_width); 653 654 putchar('\n'); 655 } 656 657 /* 658 * Reads a line from file and returns as a string. If EOF is reached, 659 * NULL is returned. The returned string must be freed afterwards. 660 */ 661 static char * 662 xfgets(FILE *file) 663 { 664 size_t linecap; 665 ssize_t l; 666 char *s; 667 668 clearerr(file); 669 linecap = 0; 670 s = NULL; 671 672 if ((l = getline(&s, &linecap, file)) == -1) { 673 if (ferror(file)) 674 err(2, "error reading file"); 675 return (NULL); 676 } 677 678 if (s[l-1] == '\n') 679 s[l-1] = '\0'; 680 681 return (s); 682 } 683 684 /* 685 * Parse ed commands from diffpipe and print lines from file1 (lines 686 * to change or delete) or file2 (lines to add or change). 687 * Returns EOF or 0. 688 */ 689 static int 690 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2) 691 { 692 size_t file1start, file1end, file2start, file2end, n; 693 /* ed command line and pointer to characters in line */ 694 char *line, *p, *q; 695 const char *errstr; 696 char c, cmd; 697 698 /* Read ed command. */ 699 if (!(line = xfgets(diffpipe))) 700 return (EOF); 701 702 p = line; 703 /* Go to character after line number. */ 704 while (isdigit(*p)) 705 ++p; 706 c = *p; 707 *p++ = 0; 708 file1start = strtonum(line, 0, INT_MAX, &errstr); 709 if (errstr) 710 errx(2, "file1 start is %s: %s", errstr, line); 711 712 /* A range is specified for file1. */ 713 if (c == ',') { 714 q = p; 715 /* Go to character after file2end. */ 716 while (isdigit(*p)) 717 ++p; 718 c = *p; 719 *p++ = 0; 720 file1end = strtonum(q, 0, INT_MAX, &errstr); 721 if (errstr) 722 errx(2, "file1 end is %s: %s", errstr, line); 723 if (file1start > file1end) 724 errx(2, "invalid line range in file1: %s", line); 725 } else 726 file1end = file1start; 727 728 cmd = c; 729 /* Check that cmd is valid. */ 730 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd')) 731 errx(2, "ed command not recognized: %c: %s", cmd, line); 732 733 q = p; 734 /* Go to character after line number. */ 735 while (isdigit(*p)) 736 ++p; 737 c = *p; 738 *p++ = 0; 739 file2start = strtonum(q, 0, INT_MAX, &errstr); 740 if (errstr) 741 errx(2, "file2 start is %s: %s", errstr, line); 742 743 /* 744 * There should either be a comma signifying a second line 745 * number or the line should just end here. 746 */ 747 if (c != ',' && c != '\0') 748 errx(2, "invalid line range in file2: %c: %s", c, line); 749 750 if (c == ',') { 751 752 file2end = strtonum(p, 0, INT_MAX, &errstr); 753 if (errstr) 754 errx(2, "file2 end is %s: %s", errstr, line); 755 if (file2start >= file2end) 756 errx(2, "invalid line range in file2: %s", line); 757 } else 758 file2end = file2start; 759 760 /* Appends happen _after_ stated line. */ 761 if (cmd == 'a') { 762 if (file1start != file1end) 763 errx(2, "append cannot have a file1 range: %s", 764 line); 765 if (file1start == SIZE_MAX) 766 errx(2, "file1 line range too high: %s", line); 767 file1start = ++file1end; 768 } 769 /* 770 * I'm not sure what the deal is with the line numbers for 771 * deletes, though. 772 */ 773 else if (cmd == 'd') { 774 if (file2start != file2end) 775 errx(2, "delete cannot have a file2 range: %s", 776 line); 777 if (file2start == SIZE_MAX) 778 errx(2, "file2 line range too high: %s", line); 779 file2start = ++file2end; 780 } 781 782 /* 783 * Continue reading file1 and file2 until we reach line numbers 784 * specified by diff. Should only happen with -I flag. 785 */ 786 for (; file1ln < file1start && file2ln < file2start; 787 ++file1ln, ++file2ln) { 788 char *s1, *s2; 789 790 if (!(s1 = xfgets(file1))) 791 errx(2, "file1 shorter than expected"); 792 if (!(s2 = xfgets(file2))) 793 errx(2, "file2 shorter than expected"); 794 795 /* If the -l flag was specified, print only left column. */ 796 if (lflag) { 797 free(s2); 798 /* 799 * XXX - If -l and -I are both specified, all 800 * unchanged or ignored lines are shown with a 801 * `(' divider. This matches GNU sdiff, but I 802 * believe it is a bug. Just check out: 803 * gsdiff -l -I '^$' samefile samefile. 804 */ 805 if (Iflag) 806 enqueue(s1, '(', NULL); 807 else 808 enqueue(s1, ' ', NULL); 809 } else 810 enqueue(s1, ' ', s2); 811 } 812 /* Ignore deleted lines. */ 813 for (; file1ln < file1start; ++file1ln) { 814 char *s; 815 816 if (!(s = xfgets(file1))) 817 errx(2, "file1 shorter than expected"); 818 819 enqueue(s, '(', NULL); 820 } 821 /* Ignore added lines. */ 822 for (; file2ln < file2start; ++file2ln) { 823 char *s; 824 825 if (!(s = xfgets(file2))) 826 errx(2, "file2 shorter than expected"); 827 828 /* If -l flag was given, don't print right column. */ 829 if (lflag) 830 free(s); 831 else 832 enqueue(NULL, ')', s); 833 } 834 835 /* Process unmodified or skipped lines. */ 836 processq(); 837 838 switch (cmd) { 839 case 'a': 840 printa(file2, file2end); 841 n = file2end - file2start + 1; 842 break; 843 case 'c': 844 printc(file1, file1end, file2, file2end); 845 n = file1end - file1start + 1 + 1 + file2end - file2start + 1; 846 break; 847 case 'd': 848 printd(file1, file1end); 849 n = file1end - file1start + 1; 850 break; 851 default: 852 errx(2, "invalid diff command: %c: %s", cmd, line); 853 } 854 free(line); 855 856 /* Skip to next ed line. */ 857 while (n--) { 858 if (!(line = xfgets(diffpipe))) 859 errx(2, "diff ended early"); 860 free(line); 861 } 862 863 return (0); 864 } 865 866 /* 867 * Queues up a diff line. 868 */ 869 static void 870 enqueue(char *left, char divider, char *right) 871 { 872 struct diffline *diffp; 873 874 if (!(diffp = malloc(sizeof(struct diffline)))) 875 err(2, "enqueue"); 876 diffp->left = left; 877 diffp->div = divider; 878 diffp->right = right; 879 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries); 880 } 881 882 /* 883 * Free a diffline structure and its elements. 884 */ 885 static void 886 freediff(struct diffline *diffp) 887 { 888 889 free(diffp->left); 890 free(diffp->right); 891 free(diffp); 892 } 893 894 /* 895 * Append second string into first. Repeated appends to the same string 896 * are cached, making this an O(n) function, where n = strlen(append). 897 */ 898 static void 899 astrcat(char **s, const char *append) 900 { 901 /* Length of string in previous run. */ 902 static size_t offset = 0; 903 size_t newsiz; 904 /* 905 * String from previous run. Compared to *s to see if we are 906 * dealing with the same string. If so, we can use offset. 907 */ 908 static const char *oldstr = NULL; 909 char *newstr; 910 911 /* 912 * First string is NULL, so just copy append. 913 */ 914 if (!*s) { 915 if (!(*s = strdup(append))) 916 err(2, "astrcat"); 917 918 /* Keep track of string. */ 919 offset = strlen(*s); 920 oldstr = *s; 921 922 return; 923 } 924 925 /* 926 * *s is a string so concatenate. 927 */ 928 929 /* Did we process the same string in the last run? */ 930 /* 931 * If this is a different string from the one we just processed 932 * cache new string. 933 */ 934 if (oldstr != *s) { 935 offset = strlen(*s); 936 oldstr = *s; 937 } 938 939 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */ 940 newsiz = offset + 1 + strlen(append) + 1; 941 942 /* Resize *s to fit new string. */ 943 newstr = realloc(*s, newsiz); 944 if (newstr == NULL) 945 err(2, "astrcat"); 946 *s = newstr; 947 948 /* *s + offset should be end of string. */ 949 /* Concatenate. */ 950 strlcpy(*s + offset, "\n", newsiz - offset); 951 strlcat(*s + offset, append, newsiz - offset); 952 953 /* New string length should be exactly newsiz - 1 characters. */ 954 /* Store generated string's values. */ 955 offset = newsiz - 1; 956 oldstr = *s; 957 } 958 959 /* 960 * Process diff set queue, printing, prompting, and saving each diff 961 * line stored in queue. 962 */ 963 static void 964 processq(void) 965 { 966 struct diffline *diffp; 967 char divc, *left, *right; 968 969 /* Don't process empty queue. */ 970 if (STAILQ_EMPTY(&diffhead)) 971 return; 972 973 /* Remember the divider. */ 974 divc = STAILQ_FIRST(&diffhead)->div; 975 976 left = NULL; 977 right = NULL; 978 /* 979 * Go through set of diffs, concatenating each line in left or 980 * right column into two long strings, `left' and `right'. 981 */ 982 STAILQ_FOREACH(diffp, &diffhead, diffentries) { 983 /* 984 * Print changed lines if -s was given, 985 * print all lines if -s was not given. 986 */ 987 if (!sflag || diffp->div == '|' || diffp->div == '<' || 988 diffp->div == '>') 989 println(diffp->left, diffp->div, diffp->right); 990 991 /* Append new lines to diff set. */ 992 if (diffp->left) 993 astrcat(&left, diffp->left); 994 if (diffp->right) 995 astrcat(&right, diffp->right); 996 } 997 998 /* Empty queue and free each diff line and its elements. */ 999 while (!STAILQ_EMPTY(&diffhead)) { 1000 diffp = STAILQ_FIRST(&diffhead); 1001 STAILQ_REMOVE_HEAD(&diffhead, diffentries); 1002 freediff(diffp); 1003 } 1004 1005 /* Write to outfp, prompting user if lines are different. */ 1006 if (outfp) 1007 switch (divc) { 1008 case ' ': case '(': case ')': 1009 fprintf(outfp, "%s\n", left); 1010 break; 1011 case '|': case '<': case '>': 1012 prompt(left, right); 1013 break; 1014 default: 1015 errx(2, "invalid divider: %c", divc); 1016 } 1017 1018 /* Free left and right. */ 1019 free(left); 1020 free(right); 1021 } 1022 1023 /* 1024 * Print lines following an (a)ppend command. 1025 */ 1026 static void 1027 printa(FILE *file, size_t line2) 1028 { 1029 char *line; 1030 1031 for (; file2ln <= line2; ++file2ln) { 1032 if (!(line = xfgets(file))) 1033 errx(2, "append ended early"); 1034 enqueue(NULL, '>', line); 1035 } 1036 processq(); 1037 } 1038 1039 /* 1040 * Print lines following a (c)hange command, from file1ln to file1end 1041 * and from file2ln to file2end. 1042 */ 1043 static void 1044 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end) 1045 { 1046 struct fileline { 1047 STAILQ_ENTRY(fileline) fileentries; 1048 char *line; 1049 }; 1050 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead); 1051 1052 /* Read lines to be deleted. */ 1053 for (; file1ln <= file1end; ++file1ln) { 1054 struct fileline *linep; 1055 char *line1; 1056 1057 /* Read lines from both. */ 1058 if (!(line1 = xfgets(file1))) 1059 errx(2, "error reading file1 in delete in change"); 1060 1061 /* Add to delete queue. */ 1062 if (!(linep = malloc(sizeof(struct fileline)))) 1063 err(2, "printc"); 1064 linep->line = line1; 1065 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries); 1066 } 1067 1068 /* Process changed lines.. */ 1069 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end; 1070 ++file2ln) { 1071 struct fileline *del; 1072 char *add; 1073 1074 /* Get add line. */ 1075 if (!(add = xfgets(file2))) 1076 errx(2, "error reading add in change"); 1077 1078 del = STAILQ_FIRST(&delqhead); 1079 enqueue(del->line, '|', add); 1080 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1081 /* 1082 * Free fileline structure but not its elements since 1083 * they are queued up. 1084 */ 1085 free(del); 1086 } 1087 processq(); 1088 1089 /* Process remaining lines to add. */ 1090 for (; file2ln <= file2end; ++file2ln) { 1091 char *add; 1092 1093 /* Get add line. */ 1094 if (!(add = xfgets(file2))) 1095 errx(2, "error reading add in change"); 1096 1097 enqueue(NULL, '>', add); 1098 } 1099 processq(); 1100 1101 /* Process remaining lines to delete. */ 1102 while (!STAILQ_EMPTY(&delqhead)) { 1103 struct fileline *filep; 1104 1105 filep = STAILQ_FIRST(&delqhead); 1106 enqueue(filep->line, '<', NULL); 1107 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1108 free(filep); 1109 } 1110 processq(); 1111 } 1112 1113 /* 1114 * Print deleted lines from file, from file1ln to file1end. 1115 */ 1116 static void 1117 printd(FILE *file1, size_t file1end) 1118 { 1119 char *line1; 1120 1121 /* Print out lines file1ln to line2. */ 1122 for (; file1ln <= file1end; ++file1ln) { 1123 if (!(line1 = xfgets(file1))) 1124 errx(2, "file1 ended early in delete"); 1125 enqueue(line1, '<', NULL); 1126 } 1127 processq(); 1128 } 1129 1130 /* 1131 * Interactive mode usage. 1132 */ 1133 static void 1134 int_usage(void) 1135 { 1136 1137 puts("e:\tedit blank diff\n" 1138 "eb:\tedit both diffs concatenated\n" 1139 "el:\tedit left diff\n" 1140 "er:\tedit right diff\n" 1141 "l | 1:\tchoose left diff\n" 1142 "r | 2:\tchoose right diff\n" 1143 "s:\tsilent mode--don't print identical lines\n" 1144 "v:\tverbose mode--print identical lines\n" 1145 "q:\tquit"); 1146 } 1147 1148 static void 1149 usage(void) 1150 { 1151 1152 fprintf(stderr, 1153 "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1" 1154 " file2\n"); 1155 exit(2); 1156 } 1157