1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */ 2 3 /* 4 * Written by Raymond Lai <ray@cyth.net>. 5 * Public domain. 6 */ 7 8 #include <sys/param.h> 9 #include <sys/queue.h> 10 #include <sys/stat.h> 11 #include <sys/wait.h> 12 13 #include <ctype.h> 14 #include <err.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <getopt.h> 18 #include <limits.h> 19 #include <paths.h> 20 #include <stdbool.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <unistd.h> 26 27 #include "extern.h" 28 29 static char diff_path[] = "/usr/bin/diff"; 30 31 #define WIDTH 126 32 /* 33 * Each column must be at least one character wide, plus three 34 * characters between the columns (space, [<|>], space). 35 */ 36 #define WIDTH_MIN 5 37 38 /* 3 kilobytes of chars */ 39 #define MAX_CHECK 768 40 41 /* A single diff line. */ 42 struct diffline { 43 STAILQ_ENTRY(diffline) diffentries; 44 char *left; 45 char div; 46 char *right; 47 }; 48 49 static void astrcat(char **, const char *); 50 static void enqueue(char *, char, char *); 51 static char *mktmpcpy(const char *); 52 static int istextfile(FILE *); 53 static int bindiff(FILE *, char *, FILE *, char *); 54 static void freediff(struct diffline *); 55 static void int_usage(void); 56 static int parsecmd(FILE *, FILE *, FILE *); 57 static void printa(FILE *, size_t); 58 static void printc(FILE *, size_t, FILE *, size_t); 59 static void printcol(const char *, size_t *, const size_t); 60 static void printd(FILE *, size_t); 61 static void println(const char *, const char, const char *); 62 static void processq(void); 63 static void prompt(const char *, const char *); 64 static void usage(void) __dead2; 65 static char *xfgets(FILE *); 66 67 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead); 68 static size_t line_width; /* width of a line (two columns and divider) */ 69 static size_t width; /* width of each column */ 70 static size_t file1ln, file2ln; /* line number of file1 and file2 */ 71 static bool Iflag; /* ignore sets matching regexp */ 72 static bool lflag; /* print only left column for identical lines */ 73 static bool sflag; /* skip identical lines */ 74 static bool tflag; /* expand tabs */ 75 static int tabsize = 8; /* tab size */ 76 FILE *outfp; /* file to save changes to */ 77 const char *tmpdir; /* TMPDIR or /tmp */ 78 79 enum { 80 HELP_OPT = CHAR_MAX + 1, 81 NORMAL_OPT, 82 FCASE_SENSITIVE_OPT, 83 FCASE_IGNORE_OPT, 84 STRIPCR_OPT, 85 TSIZE_OPT, 86 DIFFPROG_OPT, 87 }; 88 89 static struct option longopts[] = { 90 /* options only processed in sdiff */ 91 { "suppress-common-lines", no_argument, NULL, 's' }, 92 { "width", required_argument, NULL, 'w' }, 93 94 { "output", required_argument, NULL, 'o' }, 95 { "diff-program", required_argument, NULL, DIFFPROG_OPT }, 96 97 /* Options processed by diff. */ 98 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT }, 99 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT }, 100 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT }, 101 { "tabsize", required_argument, NULL, TSIZE_OPT }, 102 { "help", no_argument, NULL, HELP_OPT }, 103 { "text", no_argument, NULL, 'a' }, 104 { "ignore-blank-lines", no_argument, NULL, 'B' }, 105 { "ignore-space-change", no_argument, NULL, 'b' }, 106 { "minimal", no_argument, NULL, 'd' }, 107 { "ignore-tab-expansion", no_argument, NULL, 'E' }, 108 { "ignore-matching-lines", required_argument, NULL, 'I' }, 109 { "ignore-case", no_argument, NULL, 'i' }, 110 { "left-column", no_argument, NULL, 'l' }, 111 { "expand-tabs", no_argument, NULL, 't' }, 112 { "speed-large-files", no_argument, NULL, 'H' }, 113 { "ignore-all-space", no_argument, NULL, 'W' }, 114 115 { NULL, 0, NULL, '\0'} 116 }; 117 118 static const char *help_msg[] = { 119 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n", 120 "-l, --left-column: only print the left column for identical lines.", 121 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.", 122 "-s, --suppress-common-lines: skip identical lines.", 123 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.", 124 "", 125 "Options passed to diff(1) are:", 126 "\t-a, --text: treat file1 and file2 as text files.", 127 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.", 128 "\t-d, --minimal: minimize diff size.", 129 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.", 130 "\t-i, --ignore-case: do a case-insensitive comparison.", 131 "\t-t, --expand-tabs: expand tabs to spaces.", 132 "\t-W, --ignore-all-space: ignore all whitespace.", 133 "\t--speed-large-files: assume large file with scattered changes.", 134 "\t--strip-trailing-cr: strip trailing carriage return.", 135 "\t--ignore-file-name-case: ignore case of file names.", 136 "\t--no-ignore-file-name-case: do not ignore file name case", 137 "\t--tabsize NUM: change size of tabs (default 8.)", 138 139 NULL, 140 }; 141 142 /* 143 * Create temporary file if source_file is not a regular file. 144 * Returns temporary file name if one was malloced, NULL if unnecessary. 145 */ 146 static char * 147 mktmpcpy(const char *source_file) 148 { 149 struct stat sb; 150 ssize_t rcount; 151 int ifd, ofd; 152 u_char buf[BUFSIZ]; 153 char *target_file; 154 155 /* Open input and output. */ 156 ifd = open(source_file, O_RDONLY, 0); 157 /* File was opened successfully. */ 158 if (ifd != -1) { 159 if (fstat(ifd, &sb) == -1) 160 err(2, "error getting file status from %s", source_file); 161 162 /* Regular file. */ 163 if (S_ISREG(sb.st_mode)) { 164 close(ifd); 165 return (NULL); 166 } 167 } else { 168 /* If ``-'' does not exist the user meant stdin. */ 169 if (errno == ENOENT && strcmp(source_file, "-") == 0) 170 ifd = STDIN_FILENO; 171 else 172 err(2, "error opening %s", source_file); 173 } 174 175 /* Not a regular file, so copy input into temporary file. */ 176 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1) 177 err(2, "asprintf"); 178 if ((ofd = mkstemp(target_file)) == -1) { 179 warn("error opening %s", target_file); 180 goto FAIL; 181 } 182 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 && 183 rcount != 0) { 184 ssize_t wcount; 185 186 wcount = write(ofd, buf, (size_t)rcount); 187 if (-1 == wcount || rcount != wcount) { 188 warn("error writing to %s", target_file); 189 goto FAIL; 190 } 191 } 192 if (rcount == -1) { 193 warn("error reading from %s", source_file); 194 goto FAIL; 195 } 196 197 close(ifd); 198 close(ofd); 199 200 return (target_file); 201 202 FAIL: 203 unlink(target_file); 204 exit(2); 205 } 206 207 int 208 main(int argc, char **argv) 209 { 210 FILE *diffpipe, *file1, *file2; 211 size_t diffargc = 0, flagc = 0, wval = WIDTH; 212 int ch, fd[2], i, ret, status; 213 pid_t pid; 214 const char *errstr, *outfile = NULL; 215 char **diffargv, *diffprog = diff_path, *flagv; 216 char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2; 217 char I_arg[] = "-I"; 218 char speed_lf[] = "--speed-large-files"; 219 220 /* 221 * Process diff flags. 222 */ 223 /* 224 * Allocate memory for diff arguments and NULL. 225 * Each flag has at most one argument, so doubling argc gives an 226 * upper limit of how many diff args can be passed. argv[0], 227 * file1, and file2 won't have arguments so doubling them will 228 * waste some memory; however we need an extra space for the 229 * NULL at the end, so it sort of works out. 230 */ 231 if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL) 232 err(2, NULL); 233 234 /* Add first argument, the program name. */ 235 diffargv[diffargc++] = diffprog; 236 237 /* create a dynamic string for merging single-character options */ 238 if ((flagv = malloc(flagc + 2)) == NULL) 239 err(2, NULL); 240 flagv[flagc] = '-'; 241 flagv[flagc + 1] = '\0'; 242 diffargv[diffargc++] = flagv; 243 244 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:", 245 longopts, NULL)) != -1) { 246 switch (ch) { 247 /* only compatible --long-name-form with diff */ 248 case FCASE_IGNORE_OPT: 249 case FCASE_SENSITIVE_OPT: 250 case STRIPCR_OPT: 251 case 'S': 252 break; 253 /* combine no-arg single switches */ 254 case 'a': 255 case 'B': 256 case 'b': 257 case 'd': 258 case 'E': 259 case 'i': 260 case 'W': 261 flagc++; 262 flagv = realloc(flagv, flagc + 2); 263 /* 264 * In diff, the 'W' option is 'w' and the 'w' is 'W'. 265 */ 266 flagv[flagc] = ch == 'W' ? 'w' : ch; 267 flagv[flagc + 1] = '\0'; 268 break; 269 case 'H': 270 diffargv[diffargc++] = speed_lf; 271 break; 272 case DIFFPROG_OPT: 273 diffargv[0] = diffprog = optarg; 274 break; 275 case 'I': 276 Iflag = true; 277 diffargv[diffargc++] = I_arg; 278 diffargv[diffargc++] = optarg; 279 break; 280 case 'l': 281 lflag = true; 282 break; 283 case 'o': 284 outfile = optarg; 285 break; 286 case 's': 287 sflag = true; 288 break; 289 case 't': 290 tflag = true; 291 break; 292 case 'w': 293 wval = strtonum(optarg, WIDTH_MIN, 294 INT_MAX, &errstr); 295 if (errstr) 296 errx(2, "width is %s: %s", errstr, optarg); 297 break; 298 case HELP_OPT: 299 for (i = 0; help_msg[i] != NULL; i++) 300 printf("%s\n", help_msg[i]); 301 exit(0); 302 break; 303 case TSIZE_OPT: 304 tabsize = strtonum(optarg, 1, INT_MAX, &errstr); 305 if (errstr) 306 errx(2, "tabsize is %s: %s", errstr, optarg); 307 break; 308 default: 309 usage(); 310 break; 311 } 312 } 313 314 /* no single-character options were used */ 315 if (flagc == 0) { 316 memmove(diffargv + 1, diffargv + 2, 317 sizeof(char *) * (diffargc - 2)); 318 diffargc--; 319 free(flagv); 320 } 321 322 argc -= optind; 323 argv += optind; 324 325 if (argc != 2) 326 usage(); 327 328 if (outfile && (outfp = fopen(outfile, "w")) == NULL) 329 err(2, "could not open: %s", optarg); 330 331 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 332 tmpdir = _PATH_TMP; 333 334 filename1 = argv[0]; 335 filename2 = argv[1]; 336 337 /* 338 * Create temporary files for diff and sdiff to share if file1 339 * or file2 are not regular files. This allows sdiff and diff 340 * to read the same inputs if one or both inputs are stdin. 341 * 342 * If any temporary files were created, their names would be 343 * saved in tmp1 or tmp2. tmp1 should never equal tmp2. 344 */ 345 tmp1 = tmp2 = NULL; 346 /* file1 and file2 are the same, so copy to same temp file. */ 347 if (strcmp(filename1, filename2) == 0) { 348 if ((tmp1 = mktmpcpy(filename1))) 349 filename1 = filename2 = tmp1; 350 /* Copy file1 and file2 into separate temp files. */ 351 } else { 352 if ((tmp1 = mktmpcpy(filename1))) 353 filename1 = tmp1; 354 if ((tmp2 = mktmpcpy(filename2))) 355 filename2 = tmp2; 356 } 357 358 if ((file1 = fopen(filename1, "r")) == NULL) 359 err(2, "could not open %s", filename1); 360 if ((file2 = fopen(filename2, "r")) == NULL) 361 err(2, "could not open %s", filename2); 362 if (!istextfile(file1) || !istextfile(file2)) { 363 ret = bindiff(file1, filename1, file2, filename2); 364 goto done; 365 } 366 367 diffargv[diffargc++] = filename1; 368 diffargv[diffargc++] = filename2; 369 /* Add NULL to end of array to indicate end of array. */ 370 diffargv[diffargc++] = NULL; 371 372 /* Subtract column divider and divide by two. */ 373 width = (wval - 3) / 2; 374 /* Make sure line_width can fit in size_t. */ 375 if (width > (SIZE_MAX - 3) / 2) 376 errx(2, "width is too large: %zu", width); 377 line_width = width * 2 + 3; 378 379 if (pipe(fd)) 380 err(2, "pipe"); 381 382 if ((pid = fork()) < 0) 383 err(1, "fork()"); 384 if (pid == 0) { 385 /* child */ 386 /* We don't read from the pipe. */ 387 close(fd[0]); 388 if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO) 389 _exit(2); 390 /* Free unused descriptor. */ 391 close(fd[1]); 392 execvp(diffprog, diffargv); 393 _exit(2); 394 } 395 396 /* parent */ 397 /* We don't write to the pipe. */ 398 close(fd[1]); 399 400 /* Open pipe to diff command. */ 401 if ((diffpipe = fdopen(fd[0], "r")) == NULL) 402 err(2, "could not open diff pipe"); 403 404 /* Line numbers start at one. */ 405 file1ln = file2ln = 1; 406 407 /* Read and parse diff output. */ 408 while (parsecmd(diffpipe, file1, file2) != EOF) 409 ; 410 fclose(diffpipe); 411 412 /* Wait for diff to exit. */ 413 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) || 414 WEXITSTATUS(status) >= 2) 415 errx(2, "diff exited abnormally"); 416 ret = WEXITSTATUS(status); 417 418 /* No more diffs, so enqueue common lines. */ 419 if (lflag) 420 while ((s1 = xfgets(file1))) 421 enqueue(s1, ' ', NULL); 422 else 423 for (;;) { 424 s1 = xfgets(file1); 425 s2 = xfgets(file2); 426 if (s1 || s2) 427 enqueue(s1, ' ', s2); 428 else 429 break; 430 } 431 fclose(file1); 432 fclose(file2); 433 /* Process unmodified lines. */ 434 processq(); 435 436 done: 437 /* Delete and free unneeded temporary files. */ 438 if (tmp1 != NULL) { 439 if (unlink(tmp1) != 0) 440 warn("failed to delete %s", tmp1); 441 free(tmp1); 442 } 443 if (tmp2 != NULL) { 444 if (unlink(tmp2) != 0) 445 warn("failed to delete %s", tmp2); 446 free(tmp2); 447 } 448 449 /* Return diff exit status. */ 450 free(diffargv); 451 if (flagc > 0) 452 free(flagv); 453 return (ret); 454 } 455 456 /* 457 * When sdiff detects a binary file as input. 458 */ 459 static int 460 bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2) 461 { 462 int ch1, ch2; 463 464 flockfile(f1); 465 flockfile(f2); 466 do { 467 ch1 = getc_unlocked(f1); 468 ch2 = getc_unlocked(f2); 469 } while (ch1 != EOF && ch2 != EOF && ch1 == ch2); 470 funlockfile(f2); 471 funlockfile(f1); 472 if (ferror(f1)) { 473 warn("%s", fn1); 474 return (2); 475 } 476 if (ferror(f2)) { 477 warn("%s", fn2); 478 return (2); 479 } 480 if (ch1 != EOF || ch2 != EOF) { 481 printf("Binary files %s and %s differ\n", fn1, fn2); 482 return (1); 483 } 484 return (0); 485 } 486 487 /* 488 * Checks whether a file appears to be a text file. 489 */ 490 static int 491 istextfile(FILE *f) 492 { 493 int ch, i; 494 495 if (f == NULL) 496 return (1); 497 rewind(f); 498 for (i = 0; i <= MAX_CHECK; i++) { 499 ch = fgetc(f); 500 if (ch == '\0') { 501 rewind(f); 502 return (0); 503 } 504 if (ch == EOF) 505 break; 506 } 507 rewind(f); 508 return (1); 509 } 510 511 /* 512 * Prints an individual column (left or right), taking into account 513 * that tabs are variable-width. Takes a string, the current column 514 * the cursor is on the screen, and the maximum value of the column. 515 * The column value is updated as we go along. 516 */ 517 static void 518 printcol(const char *s, size_t *col, const size_t col_max) 519 { 520 521 for (; *s && *col < col_max; ++s) { 522 size_t new_col; 523 524 switch (*s) { 525 case '\t': 526 /* 527 * If rounding to next multiple of eight causes 528 * an integer overflow, just return. 529 */ 530 if (*col > SIZE_MAX - tabsize) 531 return; 532 533 /* Round to next multiple of eight. */ 534 new_col = (*col / tabsize + 1) * tabsize; 535 536 /* 537 * If printing the tab goes past the column 538 * width, don't print it and just quit. 539 */ 540 if (new_col > col_max) 541 return; 542 543 if (tflag) { 544 do { 545 putchar(' '); 546 } while (++*col < new_col); 547 } else { 548 putchar(*s); 549 *col = new_col; 550 } 551 break; 552 default: 553 ++*col; 554 putchar(*s); 555 } 556 } 557 } 558 559 /* 560 * Prompts user to either choose between two strings or edit one, both, 561 * or neither. 562 */ 563 static void 564 prompt(const char *s1, const char *s2) 565 { 566 char *cmd; 567 568 /* Print command prompt. */ 569 putchar('%'); 570 571 /* Get user input. */ 572 for (; (cmd = xfgets(stdin)); free(cmd)) { 573 const char *p; 574 575 /* Skip leading whitespace. */ 576 for (p = cmd; isspace((unsigned char)*p); ++p) 577 ; 578 switch (*p) { 579 case 'e': 580 /* Skip `e'. */ 581 ++p; 582 if (eparse(p, s1, s2) == -1) 583 goto USAGE; 584 break; 585 case 'l': 586 case '1': 587 /* Choose left column as-is. */ 588 if (s1 != NULL) 589 fprintf(outfp, "%s\n", s1); 590 /* End of command parsing. */ 591 break; 592 case 'q': 593 goto QUIT; 594 case 'r': 595 case '2': 596 /* Choose right column as-is. */ 597 if (s2 != NULL) 598 fprintf(outfp, "%s\n", s2); 599 /* End of command parsing. */ 600 break; 601 case 's': 602 sflag = true; 603 goto PROMPT; 604 case 'v': 605 sflag = false; 606 /* FALLTHROUGH */ 607 default: 608 /* Interactive usage help. */ 609 USAGE: 610 int_usage(); 611 PROMPT: 612 putchar('%'); 613 614 /* Prompt user again. */ 615 continue; 616 } 617 free(cmd); 618 return; 619 } 620 621 /* 622 * If there was no error, we received an EOF from stdin, so we 623 * should quit. 624 */ 625 QUIT: 626 fclose(outfp); 627 exit(0); 628 } 629 630 /* 631 * Takes two strings, separated by a column divider. NULL strings are 632 * treated as empty columns. If the divider is the ` ' character, the 633 * second column is not printed (-l flag). In this case, the second 634 * string must be NULL. When the second column is NULL, the divider 635 * does not print the trailing space following the divider character. 636 * 637 * Takes into account that tabs can take multiple columns. 638 */ 639 static void 640 println(const char *s1, const char divider, const char *s2) 641 { 642 size_t col; 643 644 /* Print first column. Skips if s1 == NULL. */ 645 col = 0; 646 if (s1) { 647 /* Skip angle bracket and space. */ 648 printcol(s1, &col, width); 649 650 } 651 652 /* Otherwise, we pad this column up to width. */ 653 for (; col < width; ++col) 654 putchar(' '); 655 656 /* Only print left column. */ 657 if (divider == ' ' && !s2) { 658 printf(" (\n"); 659 return; 660 } 661 662 /* 663 * Print column divider. If there is no second column, we don't 664 * need to add the space for padding. 665 */ 666 if (!s2) { 667 printf(" %c\n", divider); 668 return; 669 } 670 printf(" %c ", divider); 671 col += 3; 672 673 /* Skip angle bracket and space. */ 674 printcol(s2, &col, line_width); 675 676 putchar('\n'); 677 } 678 679 /* 680 * Reads a line from file and returns as a string. If EOF is reached, 681 * NULL is returned. The returned string must be freed afterwards. 682 */ 683 static char * 684 xfgets(FILE *file) 685 { 686 size_t linecap; 687 ssize_t l; 688 char *s; 689 690 clearerr(file); 691 linecap = 0; 692 s = NULL; 693 694 if ((l = getline(&s, &linecap, file)) == -1) { 695 if (ferror(file)) 696 err(2, "error reading file"); 697 return (NULL); 698 } 699 700 if (s[l-1] == '\n') 701 s[l-1] = '\0'; 702 703 return (s); 704 } 705 706 /* 707 * Parse ed commands from diffpipe and print lines from file1 (lines 708 * to change or delete) or file2 (lines to add or change). 709 * Returns EOF or 0. 710 */ 711 static int 712 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2) 713 { 714 size_t file1start, file1end, file2start, file2end, n; 715 /* ed command line and pointer to characters in line */ 716 char *line, *p, *q; 717 const char *errstr; 718 char c, cmd; 719 720 /* Read ed command. */ 721 if (!(line = xfgets(diffpipe))) 722 return (EOF); 723 724 p = line; 725 /* Go to character after line number. */ 726 while (isdigit((unsigned char)*p)) 727 ++p; 728 c = *p; 729 *p++ = 0; 730 file1start = strtonum(line, 0, INT_MAX, &errstr); 731 if (errstr) 732 errx(2, "file1 start is %s: %s", errstr, line); 733 734 /* A range is specified for file1. */ 735 if (c == ',') { 736 q = p; 737 /* Go to character after file2end. */ 738 while (isdigit((unsigned char)*p)) 739 ++p; 740 c = *p; 741 *p++ = 0; 742 file1end = strtonum(q, 0, INT_MAX, &errstr); 743 if (errstr) 744 errx(2, "file1 end is %s: %s", errstr, line); 745 if (file1start > file1end) 746 errx(2, "invalid line range in file1: %s", line); 747 } else 748 file1end = file1start; 749 750 cmd = c; 751 /* Check that cmd is valid. */ 752 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd')) 753 errx(2, "ed command not recognized: %c: %s", cmd, line); 754 755 q = p; 756 /* Go to character after line number. */ 757 while (isdigit((unsigned char)*p)) 758 ++p; 759 c = *p; 760 *p++ = 0; 761 file2start = strtonum(q, 0, INT_MAX, &errstr); 762 if (errstr) 763 errx(2, "file2 start is %s: %s", errstr, line); 764 765 /* 766 * There should either be a comma signifying a second line 767 * number or the line should just end here. 768 */ 769 if (c != ',' && c != '\0') 770 errx(2, "invalid line range in file2: %c: %s", c, line); 771 772 if (c == ',') { 773 774 file2end = strtonum(p, 0, INT_MAX, &errstr); 775 if (errstr) 776 errx(2, "file2 end is %s: %s", errstr, line); 777 if (file2start >= file2end) 778 errx(2, "invalid line range in file2: %s", line); 779 } else 780 file2end = file2start; 781 782 /* Appends happen _after_ stated line. */ 783 if (cmd == 'a') { 784 if (file1start != file1end) 785 errx(2, "append cannot have a file1 range: %s", 786 line); 787 if (file1start == SIZE_MAX) 788 errx(2, "file1 line range too high: %s", line); 789 file1start = ++file1end; 790 } 791 /* 792 * I'm not sure what the deal is with the line numbers for 793 * deletes, though. 794 */ 795 else if (cmd == 'd') { 796 if (file2start != file2end) 797 errx(2, "delete cannot have a file2 range: %s", 798 line); 799 if (file2start == SIZE_MAX) 800 errx(2, "file2 line range too high: %s", line); 801 file2start = ++file2end; 802 } 803 804 /* 805 * Continue reading file1 and file2 until we reach line numbers 806 * specified by diff. Should only happen with -I flag. 807 */ 808 for (; file1ln < file1start && file2ln < file2start; 809 ++file1ln, ++file2ln) { 810 char *s1, *s2; 811 812 if (!(s1 = xfgets(file1))) 813 errx(2, "file1 shorter than expected"); 814 if (!(s2 = xfgets(file2))) 815 errx(2, "file2 shorter than expected"); 816 817 /* If the -l flag was specified, print only left column. */ 818 if (lflag) { 819 free(s2); 820 /* 821 * XXX - If -l and -I are both specified, all 822 * unchanged or ignored lines are shown with a 823 * `(' divider. This matches GNU sdiff, but I 824 * believe it is a bug. Just check out: 825 * gsdiff -l -I '^$' samefile samefile. 826 */ 827 if (Iflag) 828 enqueue(s1, '(', NULL); 829 else 830 enqueue(s1, ' ', NULL); 831 } else 832 enqueue(s1, ' ', s2); 833 } 834 /* Ignore deleted lines. */ 835 for (; file1ln < file1start; ++file1ln) { 836 char *s; 837 838 if (!(s = xfgets(file1))) 839 errx(2, "file1 shorter than expected"); 840 841 enqueue(s, '(', NULL); 842 } 843 /* Ignore added lines. */ 844 for (; file2ln < file2start; ++file2ln) { 845 char *s; 846 847 if (!(s = xfgets(file2))) 848 errx(2, "file2 shorter than expected"); 849 850 /* If -l flag was given, don't print right column. */ 851 if (lflag) 852 free(s); 853 else 854 enqueue(NULL, ')', s); 855 } 856 857 /* Process unmodified or skipped lines. */ 858 processq(); 859 860 switch (cmd) { 861 case 'a': 862 printa(file2, file2end); 863 n = file2end - file2start + 1; 864 break; 865 case 'c': 866 printc(file1, file1end, file2, file2end); 867 n = file1end - file1start + 1 + 1 + file2end - file2start + 1; 868 break; 869 case 'd': 870 printd(file1, file1end); 871 n = file1end - file1start + 1; 872 break; 873 default: 874 errx(2, "invalid diff command: %c: %s", cmd, line); 875 } 876 free(line); 877 878 /* Skip to next ed line. */ 879 while (n--) { 880 if (!(line = xfgets(diffpipe))) 881 errx(2, "diff ended early"); 882 free(line); 883 } 884 885 return (0); 886 } 887 888 /* 889 * Queues up a diff line. 890 */ 891 static void 892 enqueue(char *left, char divider, char *right) 893 { 894 struct diffline *diffp; 895 896 if (!(diffp = malloc(sizeof(struct diffline)))) 897 err(2, "enqueue"); 898 diffp->left = left; 899 diffp->div = divider; 900 diffp->right = right; 901 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries); 902 } 903 904 /* 905 * Free a diffline structure and its elements. 906 */ 907 static void 908 freediff(struct diffline *diffp) 909 { 910 911 free(diffp->left); 912 free(diffp->right); 913 free(diffp); 914 } 915 916 /* 917 * Append second string into first. Repeated appends to the same string 918 * are cached, making this an O(n) function, where n = strlen(append). 919 */ 920 static void 921 astrcat(char **s, const char *append) 922 { 923 /* Length of string in previous run. */ 924 static size_t offset = 0; 925 size_t newsiz; 926 /* 927 * String from previous run. Compared to *s to see if we are 928 * dealing with the same string. If so, we can use offset. 929 */ 930 static const char *oldstr = NULL; 931 char *newstr; 932 933 /* 934 * First string is NULL, so just copy append. 935 */ 936 if (!*s) { 937 if (!(*s = strdup(append))) 938 err(2, "astrcat"); 939 940 /* Keep track of string. */ 941 offset = strlen(*s); 942 oldstr = *s; 943 944 return; 945 } 946 947 /* 948 * *s is a string so concatenate. 949 */ 950 951 /* Did we process the same string in the last run? */ 952 /* 953 * If this is a different string from the one we just processed 954 * cache new string. 955 */ 956 if (oldstr != *s) { 957 offset = strlen(*s); 958 oldstr = *s; 959 } 960 961 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */ 962 newsiz = offset + 1 + strlen(append) + 1; 963 964 /* Resize *s to fit new string. */ 965 newstr = realloc(*s, newsiz); 966 if (newstr == NULL) 967 err(2, "astrcat"); 968 *s = newstr; 969 970 /* *s + offset should be end of string. */ 971 /* Concatenate. */ 972 strlcpy(*s + offset, "\n", newsiz - offset); 973 strlcat(*s + offset, append, newsiz - offset); 974 975 /* New string length should be exactly newsiz - 1 characters. */ 976 /* Store generated string's values. */ 977 offset = newsiz - 1; 978 oldstr = *s; 979 } 980 981 /* 982 * Process diff set queue, printing, prompting, and saving each diff 983 * line stored in queue. 984 */ 985 static void 986 processq(void) 987 { 988 struct diffline *diffp; 989 char divc, *left, *right; 990 991 /* Don't process empty queue. */ 992 if (STAILQ_EMPTY(&diffhead)) 993 return; 994 995 /* Remember the divider. */ 996 divc = STAILQ_FIRST(&diffhead)->div; 997 998 left = NULL; 999 right = NULL; 1000 /* 1001 * Go through set of diffs, concatenating each line in left or 1002 * right column into two long strings, `left' and `right'. 1003 */ 1004 STAILQ_FOREACH(diffp, &diffhead, diffentries) { 1005 /* 1006 * Print changed lines if -s was given, 1007 * print all lines if -s was not given. 1008 */ 1009 if (!sflag || diffp->div == '|' || diffp->div == '<' || 1010 diffp->div == '>') 1011 println(diffp->left, diffp->div, diffp->right); 1012 1013 /* Append new lines to diff set. */ 1014 if (diffp->left) 1015 astrcat(&left, diffp->left); 1016 if (diffp->right) 1017 astrcat(&right, diffp->right); 1018 } 1019 1020 /* Empty queue and free each diff line and its elements. */ 1021 while (!STAILQ_EMPTY(&diffhead)) { 1022 diffp = STAILQ_FIRST(&diffhead); 1023 STAILQ_REMOVE_HEAD(&diffhead, diffentries); 1024 freediff(diffp); 1025 } 1026 1027 /* Write to outfp, prompting user if lines are different. */ 1028 if (outfp) 1029 switch (divc) { 1030 case ' ': case '(': case ')': 1031 fprintf(outfp, "%s\n", left); 1032 break; 1033 case '|': case '<': case '>': 1034 prompt(left, right); 1035 break; 1036 default: 1037 errx(2, "invalid divider: %c", divc); 1038 } 1039 1040 /* Free left and right. */ 1041 free(left); 1042 free(right); 1043 } 1044 1045 /* 1046 * Print lines following an (a)ppend command. 1047 */ 1048 static void 1049 printa(FILE *file, size_t line2) 1050 { 1051 char *line; 1052 1053 for (; file2ln <= line2; ++file2ln) { 1054 if (!(line = xfgets(file))) 1055 errx(2, "append ended early"); 1056 enqueue(NULL, '>', line); 1057 } 1058 processq(); 1059 } 1060 1061 /* 1062 * Print lines following a (c)hange command, from file1ln to file1end 1063 * and from file2ln to file2end. 1064 */ 1065 static void 1066 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end) 1067 { 1068 struct fileline { 1069 STAILQ_ENTRY(fileline) fileentries; 1070 char *line; 1071 }; 1072 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead); 1073 1074 /* Read lines to be deleted. */ 1075 for (; file1ln <= file1end; ++file1ln) { 1076 struct fileline *linep; 1077 char *line1; 1078 1079 /* Read lines from both. */ 1080 if (!(line1 = xfgets(file1))) 1081 errx(2, "error reading file1 in delete in change"); 1082 1083 /* Add to delete queue. */ 1084 if (!(linep = malloc(sizeof(struct fileline)))) 1085 err(2, "printc"); 1086 linep->line = line1; 1087 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries); 1088 } 1089 1090 /* Process changed lines.. */ 1091 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end; 1092 ++file2ln) { 1093 struct fileline *del; 1094 char *add; 1095 1096 /* Get add line. */ 1097 if (!(add = xfgets(file2))) 1098 errx(2, "error reading add in change"); 1099 1100 del = STAILQ_FIRST(&delqhead); 1101 enqueue(del->line, '|', add); 1102 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1103 /* 1104 * Free fileline structure but not its elements since 1105 * they are queued up. 1106 */ 1107 free(del); 1108 } 1109 processq(); 1110 1111 /* Process remaining lines to add. */ 1112 for (; file2ln <= file2end; ++file2ln) { 1113 char *add; 1114 1115 /* Get add line. */ 1116 if (!(add = xfgets(file2))) 1117 errx(2, "error reading add in change"); 1118 1119 enqueue(NULL, '>', add); 1120 } 1121 processq(); 1122 1123 /* Process remaining lines to delete. */ 1124 while (!STAILQ_EMPTY(&delqhead)) { 1125 struct fileline *filep; 1126 1127 filep = STAILQ_FIRST(&delqhead); 1128 enqueue(filep->line, '<', NULL); 1129 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1130 free(filep); 1131 } 1132 processq(); 1133 } 1134 1135 /* 1136 * Print deleted lines from file, from file1ln to file1end. 1137 */ 1138 static void 1139 printd(FILE *file1, size_t file1end) 1140 { 1141 char *line1; 1142 1143 /* Print out lines file1ln to line2. */ 1144 for (; file1ln <= file1end; ++file1ln) { 1145 if (!(line1 = xfgets(file1))) 1146 errx(2, "file1 ended early in delete"); 1147 enqueue(line1, '<', NULL); 1148 } 1149 processq(); 1150 } 1151 1152 /* 1153 * Interactive mode usage. 1154 */ 1155 static void 1156 int_usage(void) 1157 { 1158 1159 puts("e:\tedit blank diff\n" 1160 "eb:\tedit both diffs concatenated\n" 1161 "el:\tedit left diff\n" 1162 "er:\tedit right diff\n" 1163 "l | 1:\tchoose left diff\n" 1164 "r | 2:\tchoose right diff\n" 1165 "s:\tsilent mode--don't print identical lines\n" 1166 "v:\tverbose mode--print identical lines\n" 1167 "q:\tquit"); 1168 } 1169 1170 static void 1171 usage(void) 1172 { 1173 1174 fprintf(stderr, 1175 "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1" 1176 " file2\n"); 1177 exit(2); 1178 } 1179