1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */ 2 3 /* 4 * Written by Raymond Lai <ray@cyth.net>. 5 * Public domain. 6 */ 7 8 #include <sys/cdefs.h> 9 __FBSDID("$FreeBSD$"); 10 11 #include <sys/param.h> 12 #include <sys/queue.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/wait.h> 16 17 #include <ctype.h> 18 #include <err.h> 19 #include <errno.h> 20 #include <fcntl.h> 21 #include <getopt.h> 22 #include <limits.h> 23 #include <paths.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <unistd.h> 29 30 #include "extern.h" 31 32 #define DIFF_PATH "/usr/bin/diff" 33 34 #define WIDTH 126 35 /* 36 * Each column must be at least one character wide, plus three 37 * characters between the columns (space, [<|>], space). 38 */ 39 #define WIDTH_MIN 5 40 41 /* 3 kilobytes of chars */ 42 #define MAX_CHECK 768 43 44 /* A single diff line. */ 45 struct diffline { 46 STAILQ_ENTRY(diffline) diffentries; 47 char *left; 48 char div; 49 char *right; 50 }; 51 52 static void astrcat(char **, const char *); 53 static void enqueue(char *, char, char *); 54 static char *mktmpcpy(const char *); 55 static int istextfile(FILE *); 56 static void binexec(char *, char *, char *) __dead2; 57 static void freediff(struct diffline *); 58 static void int_usage(void); 59 static int parsecmd(FILE *, FILE *, FILE *); 60 static void printa(FILE *, size_t); 61 static void printc(FILE *, size_t, FILE *, size_t); 62 static void printcol(const char *, size_t *, const size_t); 63 static void printd(FILE *, size_t); 64 static void println(const char *, const char, const char *); 65 static void processq(void); 66 static void prompt(const char *, const char *); 67 static void usage(void) __dead2; 68 static char *xfgets(FILE *); 69 70 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead); 71 static size_t line_width; /* width of a line (two columns and divider) */ 72 static size_t width; /* width of each column */ 73 static size_t file1ln, file2ln; /* line number of file1 and file2 */ 74 static int Iflag = 0; /* ignore sets matching regexp */ 75 static int lflag; /* print only left column for identical lines */ 76 static int sflag; /* skip identical lines */ 77 FILE *outfp; /* file to save changes to */ 78 const char *tmpdir; /* TMPDIR or /tmp */ 79 80 enum { 81 HELP_OPT = CHAR_MAX + 1, 82 NORMAL_OPT, 83 FCASE_SENSITIVE_OPT, 84 FCASE_IGNORE_OPT, 85 STRIPCR_OPT, 86 TSIZE_OPT, 87 DIFFPROG_OPT, 88 }; 89 90 static struct option longopts[] = { 91 /* options only processed in sdiff */ 92 { "suppress-common-lines", no_argument, NULL, 's' }, 93 { "width", required_argument, NULL, 'w' }, 94 95 { "output", required_argument, NULL, 'o' }, 96 { "diff-program", required_argument, NULL, DIFFPROG_OPT }, 97 98 /* Options processed by diff. */ 99 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT }, 100 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT }, 101 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT }, 102 { "tabsize", required_argument, NULL, TSIZE_OPT }, 103 { "help", no_argument, NULL, HELP_OPT }, 104 { "text", no_argument, NULL, 'a' }, 105 { "ignore-blank-lines", no_argument, NULL, 'B' }, 106 { "ignore-space-change", no_argument, NULL, 'b' }, 107 { "minimal", no_argument, NULL, 'd' }, 108 { "ignore-tab-expansion", no_argument, NULL, 'E' }, 109 { "ignore-matching-lines", required_argument, NULL, 'I' }, 110 { "ignore-case", no_argument, NULL, 'i' }, 111 { "left-column", no_argument, NULL, 'l' }, 112 { "expand-tabs", no_argument, NULL, 't' }, 113 { "speed-large-files", no_argument, NULL, 'H' }, 114 { "ignore-all-space", no_argument, NULL, 'W' }, 115 116 { NULL, 0, NULL, '\0'} 117 }; 118 119 static const char *help_msg[] = { 120 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n", 121 "-l, --left-column: only print the left column for identical lines.", 122 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.", 123 "-s, --suppress-common-lines: skip identical lines.", 124 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.", 125 "", 126 "Options passed to diff(1) are:", 127 "\t-a, --text: treat file1 and file2 as text files.", 128 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.", 129 "\t-d, --minimal: minimize diff size.", 130 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.", 131 "\t-i, --ignore-case: do a case-insensitive comparison.", 132 "\t-t, --expand-tabs: sxpand tabs to spaces.", 133 "\t-W, --ignore-all-spaces: ignore all spaces.", 134 "\t--speed-large-files: assume large file with scattered changes.", 135 "\t--strip-trailing-cr: strip trailing carriage return.", 136 "\t--ignore-file-name-case: ignore case of file names.", 137 "\t--no-ignore-file-name-case: do not ignore file name case", 138 "\t--tabsize NUM: change size of tabs (default 8.)", 139 140 NULL, 141 }; 142 143 /* 144 * Create temporary file if source_file is not a regular file. 145 * Returns temporary file name if one was malloced, NULL if unnecessary. 146 */ 147 static char * 148 mktmpcpy(const char *source_file) 149 { 150 struct stat sb; 151 ssize_t rcount; 152 int ifd, ofd; 153 u_char buf[BUFSIZ]; 154 char *target_file; 155 156 /* Open input and output. */ 157 ifd = open(source_file, O_RDONLY, 0); 158 /* File was opened successfully. */ 159 if (ifd != -1) { 160 if (fstat(ifd, &sb) == -1) 161 err(2, "error getting file status from %s", source_file); 162 163 /* Regular file. */ 164 if (S_ISREG(sb.st_mode)) { 165 close(ifd); 166 return (NULL); 167 } 168 } else { 169 /* If ``-'' does not exist the user meant stdin. */ 170 if (errno == ENOENT && strcmp(source_file, "-") == 0) 171 ifd = STDIN_FILENO; 172 else 173 err(2, "error opening %s", source_file); 174 } 175 176 /* Not a regular file, so copy input into temporary file. */ 177 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1) 178 err(2, "asprintf"); 179 if ((ofd = mkstemp(target_file)) == -1) { 180 warn("error opening %s", target_file); 181 goto FAIL; 182 } 183 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 && 184 rcount != 0) { 185 ssize_t wcount; 186 187 wcount = write(ofd, buf, (size_t)rcount); 188 if (-1 == wcount || rcount != wcount) { 189 warn("error writing to %s", target_file); 190 goto FAIL; 191 } 192 } 193 if (rcount == -1) { 194 warn("error reading from %s", source_file); 195 goto FAIL; 196 } 197 198 close(ifd); 199 close(ofd); 200 201 return (target_file); 202 203 FAIL: 204 unlink(target_file); 205 exit(2); 206 } 207 208 int 209 main(int argc, char **argv) 210 { 211 FILE *diffpipe=NULL, *file1, *file2; 212 size_t diffargc = 0, wflag = WIDTH; 213 int ch, fd[2] = {-1}, status; 214 pid_t pid=0; 215 const char *outfile = NULL; 216 char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2, 217 *tmp1, *tmp2, *s1, *s2; 218 int i; 219 220 /* 221 * Process diff flags. 222 */ 223 /* 224 * Allocate memory for diff arguments and NULL. 225 * Each flag has at most one argument, so doubling argc gives an 226 * upper limit of how many diff args can be passed. argv[0], 227 * file1, and file2 won't have arguments so doubling them will 228 * waste some memory; however we need an extra space for the 229 * NULL at the end, so it sort of works out. 230 */ 231 if (!(diffargv = calloc(argc, sizeof(char **) * 2))) 232 err(2, "main"); 233 234 /* Add first argument, the program name. */ 235 diffargv[diffargc++] = diffprog; 236 237 /* create a dynamic string for merging single-switch options */ 238 if ( asprintf(&diffargv[diffargc++], "-") < 0 ) 239 err(2, "main"); 240 241 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:", 242 longopts, NULL)) != -1) { 243 const char *errstr; 244 245 switch (ch) { 246 /* only compatible --long-name-form with diff */ 247 case FCASE_IGNORE_OPT: 248 case FCASE_SENSITIVE_OPT: 249 case STRIPCR_OPT: 250 case TSIZE_OPT: 251 case 'S': 252 break; 253 /* combine no-arg single switches */ 254 case 'a': 255 case 'B': 256 case 'b': 257 case 'd': 258 case 'E': 259 case 'i': 260 case 't': 261 case 'W': 262 diffargv[1] = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2); 263 /* 264 * In diff, the 'W' option is 'w' and the 'w' is 'W'. 265 */ 266 if (ch == 'W') 267 sprintf(diffargv[1], "%sw", diffargv[1]); 268 else 269 sprintf(diffargv[1], "%s%c", diffargv[1], ch); 270 break; 271 case 'H': 272 diffargv[diffargc++] = "--speed-large-files"; 273 break; 274 case DIFFPROG_OPT: 275 diffargv[0] = diffprog = optarg; 276 break; 277 case 'I': 278 Iflag = 1; 279 diffargv[diffargc++] = "-I"; 280 diffargv[diffargc++] = optarg; 281 break; 282 case 'l': 283 lflag = 1; 284 break; 285 case 'o': 286 outfile = optarg; 287 break; 288 case 's': 289 sflag = 1; 290 break; 291 case 'w': 292 wflag = strtonum(optarg, WIDTH_MIN, 293 INT_MAX, &errstr); 294 if (errstr) 295 errx(2, "width is %s: %s", errstr, optarg); 296 break; 297 case HELP_OPT: 298 for (i = 0; help_msg[i] != NULL; i++) 299 printf("%s\n", help_msg[i]); 300 exit(0); 301 break; 302 default: 303 usage(); 304 break; 305 } 306 } 307 308 /* no single switches were used */ 309 if (strcmp(diffargv[1], "-") == 0 ) { 310 for ( i = 1; i < argc-1; i++) { 311 diffargv[i] = diffargv[i+1]; 312 } 313 diffargv[diffargc-1] = NULL; 314 diffargc--; 315 } 316 317 argc -= optind; 318 argv += optind; 319 320 if (argc != 2) 321 usage(); 322 323 if (outfile && (outfp = fopen(outfile, "w")) == NULL) 324 err(2, "could not open: %s", optarg); 325 326 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 327 tmpdir = _PATH_TMP; 328 329 filename1 = argv[0]; 330 filename2 = argv[1]; 331 332 /* 333 * Create temporary files for diff and sdiff to share if file1 334 * or file2 are not regular files. This allows sdiff and diff 335 * to read the same inputs if one or both inputs are stdin. 336 * 337 * If any temporary files were created, their names would be 338 * saved in tmp1 or tmp2. tmp1 should never equal tmp2. 339 */ 340 tmp1 = tmp2 = NULL; 341 /* file1 and file2 are the same, so copy to same temp file. */ 342 if (strcmp(filename1, filename2) == 0) { 343 if ((tmp1 = mktmpcpy(filename1))) 344 filename1 = filename2 = tmp1; 345 /* Copy file1 and file2 into separate temp files. */ 346 } else { 347 if ((tmp1 = mktmpcpy(filename1))) 348 filename1 = tmp1; 349 if ((tmp2 = mktmpcpy(filename2))) 350 filename2 = tmp2; 351 } 352 353 diffargv[diffargc++] = filename1; 354 diffargv[diffargc++] = filename2; 355 /* Add NULL to end of array to indicate end of array. */ 356 diffargv[diffargc++] = NULL; 357 358 /* Subtract column divider and divide by two. */ 359 width = (wflag - 3) / 2; 360 /* Make sure line_width can fit in size_t. */ 361 if (width > (SIZE_MAX - 3) / 2) 362 errx(2, "width is too large: %zu", width); 363 line_width = width * 2 + 3; 364 365 if (pipe(fd)) 366 err(2, "pipe"); 367 368 switch (pid = fork()) { 369 case 0: 370 /* child */ 371 /* We don't read from the pipe. */ 372 close(fd[0]); 373 if (dup2(fd[1], STDOUT_FILENO) == -1) 374 err(2, "child could not duplicate descriptor"); 375 /* Free unused descriptor. */ 376 close(fd[1]); 377 execvp(diffprog, diffargv); 378 err(2, "could not execute diff: %s", diffprog); 379 break; 380 case -1: 381 err(2, "could not fork"); 382 break; 383 } 384 385 /* parent */ 386 /* We don't write to the pipe. */ 387 close(fd[1]); 388 389 /* Open pipe to diff command. */ 390 if ((diffpipe = fdopen(fd[0], "r")) == NULL) 391 err(2, "could not open diff pipe"); 392 393 if ((file1 = fopen(filename1, "r")) == NULL) 394 err(2, "could not open %s", filename1); 395 if ((file2 = fopen(filename2, "r")) == NULL) 396 err(2, "could not open %s", filename2); 397 if (!istextfile(file1) || !istextfile(file2)) { 398 /* Close open files and pipe, delete temps */ 399 fclose(file1); 400 fclose(file2); 401 if (diffpipe != NULL) 402 fclose(diffpipe); 403 if (tmp1) 404 if (unlink(tmp1)) 405 warn("Error deleting %s.", tmp1); 406 if (tmp2) 407 if (unlink(tmp2)) 408 warn("Error deleting %s.", tmp2); 409 free(tmp1); 410 free(tmp2); 411 binexec(diffprog, filename1, filename2); 412 } 413 /* Line numbers start at one. */ 414 file1ln = file2ln = 1; 415 416 /* Read and parse diff output. */ 417 while (parsecmd(diffpipe, file1, file2) != EOF) 418 ; 419 fclose(diffpipe); 420 421 /* Wait for diff to exit. */ 422 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) || 423 WEXITSTATUS(status) >= 2) 424 err(2, "diff exited abnormally."); 425 426 /* Delete and free unneeded temporary files. */ 427 if (tmp1) 428 if (unlink(tmp1)) 429 warn("Error deleting %s.", tmp1); 430 if (tmp2) 431 if (unlink(tmp2)) 432 warn("Error deleting %s.", tmp2); 433 free(tmp1); 434 free(tmp2); 435 filename1 = filename2 = tmp1 = tmp2 = NULL; 436 437 /* No more diffs, so print common lines. */ 438 if (lflag) 439 while ((s1 = xfgets(file1))) 440 enqueue(s1, ' ', NULL); 441 else 442 for (;;) { 443 s1 = xfgets(file1); 444 s2 = xfgets(file2); 445 if (s1 || s2) 446 enqueue(s1, ' ', s2); 447 else 448 break; 449 } 450 fclose(file1); 451 fclose(file2); 452 /* Process unmodified lines. */ 453 processq(); 454 455 /* Return diff exit status. */ 456 return (WEXITSTATUS(status)); 457 } 458 459 /* 460 * When sdiff detects a binary file as input, executes them with 461 * diff to maintain the same behavior as GNU sdiff with binary input. 462 */ 463 static void 464 binexec(char *diffprog, char *f1, char *f2) 465 { 466 467 char *args[] = {diffprog, f1, f2, (char *) 0}; 468 execv(diffprog, args); 469 470 /* If execv() fails, sdiff's execution will continue below. */ 471 errx(1, "could not execute diff process"); 472 } 473 474 /* 475 * Checks whether a file appears to be a text file. 476 */ 477 static int 478 istextfile(FILE *f) 479 { 480 int ch, i; 481 482 if (f == NULL) 483 return (1); 484 rewind(f); 485 for (i = 0; i <= MAX_CHECK; i++) { 486 ch = fgetc(f); 487 if (ch == '\0') { 488 rewind(f); 489 return (0); 490 } 491 if (ch == EOF) 492 break; 493 } 494 rewind(f); 495 return (1); 496 } 497 498 /* 499 * Prints an individual column (left or right), taking into account 500 * that tabs are variable-width. Takes a string, the current column 501 * the cursor is on the screen, and the maximum value of the column. 502 * The column value is updated as we go along. 503 */ 504 static void 505 printcol(const char *s, size_t *col, const size_t col_max) 506 { 507 508 for (; *s && *col < col_max; ++s) { 509 size_t new_col; 510 511 switch (*s) { 512 case '\t': 513 /* 514 * If rounding to next multiple of eight causes 515 * an integer overflow, just return. 516 */ 517 if (*col > SIZE_MAX - 8) 518 return; 519 520 /* Round to next multiple of eight. */ 521 new_col = (*col / 8 + 1) * 8; 522 523 /* 524 * If printing the tab goes past the column 525 * width, don't print it and just quit. 526 */ 527 if (new_col > col_max) 528 return; 529 *col = new_col; 530 break; 531 default: 532 ++(*col); 533 } 534 putchar(*s); 535 } 536 } 537 538 /* 539 * Prompts user to either choose between two strings or edit one, both, 540 * or neither. 541 */ 542 static void 543 prompt(const char *s1, const char *s2) 544 { 545 char *cmd; 546 547 /* Print command prompt. */ 548 putchar('%'); 549 550 /* Get user input. */ 551 for (; (cmd = xfgets(stdin)); free(cmd)) { 552 const char *p; 553 554 /* Skip leading whitespace. */ 555 for (p = cmd; isspace(*p); ++p) 556 ; 557 switch (*p) { 558 case 'e': 559 /* Skip `e'. */ 560 ++p; 561 if (eparse(p, s1, s2) == -1) 562 goto USAGE; 563 break; 564 case 'l': 565 case '1': 566 /* Choose left column as-is. */ 567 if (s1 != NULL) 568 fprintf(outfp, "%s\n", s1); 569 /* End of command parsing. */ 570 break; 571 case 'q': 572 goto QUIT; 573 case 'r': 574 case '2': 575 /* Choose right column as-is. */ 576 if (s2 != NULL) 577 fprintf(outfp, "%s\n", s2); 578 /* End of command parsing. */ 579 break; 580 case 's': 581 sflag = 1; 582 goto PROMPT; 583 case 'v': 584 sflag = 0; 585 /* FALLTHROUGH */ 586 default: 587 /* Interactive usage help. */ 588 USAGE: 589 int_usage(); 590 PROMPT: 591 putchar('%'); 592 593 /* Prompt user again. */ 594 continue; 595 } 596 free(cmd); 597 return; 598 } 599 600 /* 601 * If there was no error, we received an EOF from stdin, so we 602 * should quit. 603 */ 604 QUIT: 605 fclose(outfp); 606 exit(0); 607 } 608 609 /* 610 * Takes two strings, separated by a column divider. NULL strings are 611 * treated as empty columns. If the divider is the ` ' character, the 612 * second column is not printed (-l flag). In this case, the second 613 * string must be NULL. When the second column is NULL, the divider 614 * does not print the trailing space following the divider character. 615 * 616 * Takes into account that tabs can take multiple columns. 617 */ 618 static void 619 println(const char *s1, const char div, const char *s2) 620 { 621 size_t col; 622 623 /* Print first column. Skips if s1 == NULL. */ 624 col = 0; 625 if (s1) { 626 /* Skip angle bracket and space. */ 627 printcol(s1, &col, width); 628 629 } 630 631 /* Otherwise, we pad this column up to width. */ 632 for (; col < width; ++col) 633 putchar(' '); 634 635 /* Only print left column. */ 636 if (div == ' ' && !s2) { 637 printf(" (\n"); 638 return; 639 } 640 641 /* 642 * Print column divider. If there is no second column, we don't 643 * need to add the space for padding. 644 */ 645 if (!s2) { 646 printf(" %c\n", div); 647 return; 648 } 649 printf(" %c ", div); 650 col += 3; 651 652 /* Skip angle bracket and space. */ 653 printcol(s2, &col, line_width); 654 655 putchar('\n'); 656 } 657 658 /* 659 * Reads a line from file and returns as a string. If EOF is reached, 660 * NULL is returned. The returned string must be freed afterwards. 661 */ 662 static char * 663 xfgets(FILE *file) 664 { 665 size_t linecap; 666 ssize_t l; 667 char *s; 668 669 clearerr(file); 670 linecap = 0; 671 s = NULL; 672 673 if ((l = getline(&s, &linecap, file)) == -1) { 674 if (ferror(file)) 675 err(2, "error reading file"); 676 return (NULL); 677 } 678 679 if (s[l-1] == '\n') 680 s[l-1] = '\0'; 681 682 return (s); 683 } 684 685 /* 686 * Parse ed commands from diffpipe and print lines from file1 (lines 687 * to change or delete) or file2 (lines to add or change). 688 * Returns EOF or 0. 689 */ 690 static int 691 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2) 692 { 693 size_t file1start, file1end, file2start, file2end, n; 694 /* ed command line and pointer to characters in line */ 695 char *line, *p, *q; 696 const char *errstr; 697 char c, cmd; 698 699 /* Read ed command. */ 700 if (!(line = xfgets(diffpipe))) 701 return (EOF); 702 703 p = line; 704 /* Go to character after line number. */ 705 while (isdigit(*p)) 706 ++p; 707 c = *p; 708 *p++ = 0; 709 file1start = strtonum(line, 0, INT_MAX, &errstr); 710 if (errstr) 711 errx(2, "file1 start is %s: %s", errstr, line); 712 713 /* A range is specified for file1. */ 714 if (c == ',') { 715 q = p; 716 /* Go to character after file2end. */ 717 while (isdigit(*p)) 718 ++p; 719 c = *p; 720 *p++ = 0; 721 file1end = strtonum(q, 0, INT_MAX, &errstr); 722 if (errstr) 723 errx(2, "file1 end is %s: %s", errstr, line); 724 if (file1start > file1end) 725 errx(2, "invalid line range in file1: %s", line); 726 } else 727 file1end = file1start; 728 729 cmd = c; 730 /* Check that cmd is valid. */ 731 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd')) 732 errx(2, "ed command not recognized: %c: %s", cmd, line); 733 734 q = p; 735 /* Go to character after line number. */ 736 while (isdigit(*p)) 737 ++p; 738 c = *p; 739 *p++ = 0; 740 file2start = strtonum(q, 0, INT_MAX, &errstr); 741 if (errstr) 742 errx(2, "file2 start is %s: %s", errstr, line); 743 744 /* 745 * There should either be a comma signifying a second line 746 * number or the line should just end here. 747 */ 748 if (c != ',' && c != '\0') 749 errx(2, "invalid line range in file2: %c: %s", c, line); 750 751 if (c == ',') { 752 753 file2end = strtonum(p, 0, INT_MAX, &errstr); 754 if (errstr) 755 errx(2, "file2 end is %s: %s", errstr, line); 756 if (file2start >= file2end) 757 errx(2, "invalid line range in file2: %s", line); 758 } else 759 file2end = file2start; 760 761 /* Appends happen _after_ stated line. */ 762 if (cmd == 'a') { 763 if (file1start != file1end) 764 errx(2, "append cannot have a file1 range: %s", 765 line); 766 if (file1start == SIZE_MAX) 767 errx(2, "file1 line range too high: %s", line); 768 file1start = ++file1end; 769 } 770 /* 771 * I'm not sure what the deal is with the line numbers for 772 * deletes, though. 773 */ 774 else if (cmd == 'd') { 775 if (file2start != file2end) 776 errx(2, "delete cannot have a file2 range: %s", 777 line); 778 if (file2start == SIZE_MAX) 779 errx(2, "file2 line range too high: %s", line); 780 file2start = ++file2end; 781 } 782 783 /* 784 * Continue reading file1 and file2 until we reach line numbers 785 * specified by diff. Should only happen with -I flag. 786 */ 787 for (; file1ln < file1start && file2ln < file2start; 788 ++file1ln, ++file2ln) { 789 char *s1, *s2; 790 791 if (!(s1 = xfgets(file1))) 792 errx(2, "file1 shorter than expected"); 793 if (!(s2 = xfgets(file2))) 794 errx(2, "file2 shorter than expected"); 795 796 /* If the -l flag was specified, print only left column. */ 797 if (lflag) { 798 free(s2); 799 /* 800 * XXX - If -l and -I are both specified, all 801 * unchanged or ignored lines are shown with a 802 * `(' divider. This matches GNU sdiff, but I 803 * believe it is a bug. Just check out: 804 * gsdiff -l -I '^$' samefile samefile. 805 */ 806 if (Iflag) 807 enqueue(s1, '(', NULL); 808 else 809 enqueue(s1, ' ', NULL); 810 } else 811 enqueue(s1, ' ', s2); 812 } 813 /* Ignore deleted lines. */ 814 for (; file1ln < file1start; ++file1ln) { 815 char *s; 816 817 if (!(s = xfgets(file1))) 818 errx(2, "file1 shorter than expected"); 819 820 enqueue(s, '(', NULL); 821 } 822 /* Ignore added lines. */ 823 for (; file2ln < file2start; ++file2ln) { 824 char *s; 825 826 if (!(s = xfgets(file2))) 827 errx(2, "file2 shorter than expected"); 828 829 /* If -l flag was given, don't print right column. */ 830 if (lflag) 831 free(s); 832 else 833 enqueue(NULL, ')', s); 834 } 835 836 /* Process unmodified or skipped lines. */ 837 processq(); 838 839 switch (cmd) { 840 case 'a': 841 printa(file2, file2end); 842 n = file2end - file2start + 1; 843 break; 844 case 'c': 845 printc(file1, file1end, file2, file2end); 846 n = file1end - file1start + 1 + 1 + file2end - file2start + 1; 847 break; 848 case 'd': 849 printd(file1, file1end); 850 n = file1end - file1start + 1; 851 break; 852 default: 853 errx(2, "invalid diff command: %c: %s", cmd, line); 854 } 855 free(line); 856 857 /* Skip to next ed line. */ 858 while (n--) { 859 if (!(line = xfgets(diffpipe))) 860 errx(2, "diff ended early"); 861 free(line); 862 } 863 864 return (0); 865 } 866 867 /* 868 * Queues up a diff line. 869 */ 870 static void 871 enqueue(char *left, char div, char *right) 872 { 873 struct diffline *diffp; 874 875 if (!(diffp = malloc(sizeof(struct diffline)))) 876 err(2, "enqueue"); 877 diffp->left = left; 878 diffp->div = div; 879 diffp->right = right; 880 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries); 881 } 882 883 /* 884 * Free a diffline structure and its elements. 885 */ 886 static void 887 freediff(struct diffline *diffp) 888 { 889 890 free(diffp->left); 891 free(diffp->right); 892 free(diffp); 893 } 894 895 /* 896 * Append second string into first. Repeated appends to the same string 897 * are cached, making this an O(n) function, where n = strlen(append). 898 */ 899 static void 900 astrcat(char **s, const char *append) 901 { 902 /* Length of string in previous run. */ 903 static size_t offset = 0; 904 size_t newsiz; 905 /* 906 * String from previous run. Compared to *s to see if we are 907 * dealing with the same string. If so, we can use offset. 908 */ 909 static const char *oldstr = NULL; 910 char *newstr; 911 912 /* 913 * First string is NULL, so just copy append. 914 */ 915 if (!*s) { 916 if (!(*s = strdup(append))) 917 err(2, "astrcat"); 918 919 /* Keep track of string. */ 920 offset = strlen(*s); 921 oldstr = *s; 922 923 return; 924 } 925 926 /* 927 * *s is a string so concatenate. 928 */ 929 930 /* Did we process the same string in the last run? */ 931 /* 932 * If this is a different string from the one we just processed 933 * cache new string. 934 */ 935 if (oldstr != *s) { 936 offset = strlen(*s); 937 oldstr = *s; 938 } 939 940 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */ 941 newsiz = offset + 1 + strlen(append) + 1; 942 943 /* Resize *s to fit new string. */ 944 newstr = realloc(*s, newsiz); 945 if (newstr == NULL) 946 err(2, "astrcat"); 947 *s = newstr; 948 949 /* *s + offset should be end of string. */ 950 /* Concatenate. */ 951 strlcpy(*s + offset, "\n", newsiz - offset); 952 strlcat(*s + offset, append, newsiz - offset); 953 954 /* New string length should be exactly newsiz - 1 characters. */ 955 /* Store generated string's values. */ 956 offset = newsiz - 1; 957 oldstr = *s; 958 } 959 960 /* 961 * Process diff set queue, printing, prompting, and saving each diff 962 * line stored in queue. 963 */ 964 static void 965 processq(void) 966 { 967 struct diffline *diffp; 968 char divc, *left, *right; 969 970 /* Don't process empty queue. */ 971 if (STAILQ_EMPTY(&diffhead)) 972 return; 973 974 /* Remember the divider. */ 975 divc = STAILQ_FIRST(&diffhead)->div; 976 977 left = NULL; 978 right = NULL; 979 /* 980 * Go through set of diffs, concatenating each line in left or 981 * right column into two long strings, `left' and `right'. 982 */ 983 STAILQ_FOREACH(diffp, &diffhead, diffentries) { 984 /* 985 * Print changed lines if -s was given, 986 * print all lines if -s was not given. 987 */ 988 if (!sflag || diffp->div == '|' || diffp->div == '<' || 989 diffp->div == '>') 990 println(diffp->left, diffp->div, diffp->right); 991 992 /* Append new lines to diff set. */ 993 if (diffp->left) 994 astrcat(&left, diffp->left); 995 if (diffp->right) 996 astrcat(&right, diffp->right); 997 } 998 999 /* Empty queue and free each diff line and its elements. */ 1000 while (!STAILQ_EMPTY(&diffhead)) { 1001 diffp = STAILQ_FIRST(&diffhead); 1002 STAILQ_REMOVE_HEAD(&diffhead, diffentries); 1003 freediff(diffp); 1004 } 1005 1006 /* Write to outfp, prompting user if lines are different. */ 1007 if (outfp) 1008 switch (divc) { 1009 case ' ': case '(': case ')': 1010 fprintf(outfp, "%s\n", left); 1011 break; 1012 case '|': case '<': case '>': 1013 prompt(left, right); 1014 break; 1015 default: 1016 errx(2, "invalid divider: %c", divc); 1017 } 1018 1019 /* Free left and right. */ 1020 free(left); 1021 free(right); 1022 } 1023 1024 /* 1025 * Print lines following an (a)ppend command. 1026 */ 1027 static void 1028 printa(FILE *file, size_t line2) 1029 { 1030 char *line; 1031 1032 for (; file2ln <= line2; ++file2ln) { 1033 if (!(line = xfgets(file))) 1034 errx(2, "append ended early"); 1035 enqueue(NULL, '>', line); 1036 } 1037 processq(); 1038 } 1039 1040 /* 1041 * Print lines following a (c)hange command, from file1ln to file1end 1042 * and from file2ln to file2end. 1043 */ 1044 static void 1045 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end) 1046 { 1047 struct fileline { 1048 STAILQ_ENTRY(fileline) fileentries; 1049 char *line; 1050 }; 1051 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead); 1052 1053 /* Read lines to be deleted. */ 1054 for (; file1ln <= file1end; ++file1ln) { 1055 struct fileline *linep; 1056 char *line1; 1057 1058 /* Read lines from both. */ 1059 if (!(line1 = xfgets(file1))) 1060 errx(2, "error reading file1 in delete in change"); 1061 1062 /* Add to delete queue. */ 1063 if (!(linep = malloc(sizeof(struct fileline)))) 1064 err(2, "printc"); 1065 linep->line = line1; 1066 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries); 1067 } 1068 1069 /* Process changed lines.. */ 1070 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end; 1071 ++file2ln) { 1072 struct fileline *del; 1073 char *add; 1074 1075 /* Get add line. */ 1076 if (!(add = xfgets(file2))) 1077 errx(2, "error reading add in change"); 1078 1079 del = STAILQ_FIRST(&delqhead); 1080 enqueue(del->line, '|', add); 1081 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1082 /* 1083 * Free fileline structure but not its elements since 1084 * they are queued up. 1085 */ 1086 free(del); 1087 } 1088 processq(); 1089 1090 /* Process remaining lines to add. */ 1091 for (; file2ln <= file2end; ++file2ln) { 1092 char *add; 1093 1094 /* Get add line. */ 1095 if (!(add = xfgets(file2))) 1096 errx(2, "error reading add in change"); 1097 1098 enqueue(NULL, '>', add); 1099 } 1100 processq(); 1101 1102 /* Process remaining lines to delete. */ 1103 while (!STAILQ_EMPTY(&delqhead)) { 1104 struct fileline *filep; 1105 1106 filep = STAILQ_FIRST(&delqhead); 1107 enqueue(filep->line, '<', NULL); 1108 STAILQ_REMOVE_HEAD(&delqhead, fileentries); 1109 free(filep); 1110 } 1111 processq(); 1112 } 1113 1114 /* 1115 * Print deleted lines from file, from file1ln to file1end. 1116 */ 1117 static void 1118 printd(FILE *file1, size_t file1end) 1119 { 1120 char *line1; 1121 1122 /* Print out lines file1ln to line2. */ 1123 for (; file1ln <= file1end; ++file1ln) { 1124 if (!(line1 = xfgets(file1))) 1125 errx(2, "file1 ended early in delete"); 1126 enqueue(line1, '<', NULL); 1127 } 1128 processq(); 1129 } 1130 1131 /* 1132 * Interactive mode usage. 1133 */ 1134 static void 1135 int_usage(void) 1136 { 1137 1138 puts("e:\tedit blank diff\n" 1139 "eb:\tedit both diffs concatenated\n" 1140 "el:\tedit left diff\n" 1141 "er:\tedit right diff\n" 1142 "l | 1:\tchoose left diff\n" 1143 "r | 2:\tchoose right diff\n" 1144 "s:\tsilent mode--don't print identical lines\n" 1145 "v:\tverbose mode--print identical lines\n" 1146 "q:\tquit"); 1147 } 1148 1149 static void 1150 usage(void) 1151 { 1152 1153 fprintf(stderr, 1154 "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1" 1155 " file2\n"); 1156 exit(2); 1157 } 1158