1 /* diff - compare files line by line 2 3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002, 4 2004 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 GNU DIFF is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 GNU DIFF is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16 See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GNU DIFF; see the file COPYING. 20 If not, write to the Free Software Foundation, 21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 #define GDIFF_MAIN 24 #include "diff.h" 25 #include "paths.h" 26 #include <c-stack.h> 27 #include <dirname.h> 28 #include <error.h> 29 #include <exclude.h> 30 #include <exit.h> 31 #include <exitfail.h> 32 #include <file-type.h> 33 #include <fnmatch.h> 34 #include <getopt.h> 35 #include <hard-locale.h> 36 #include <posixver.h> 37 #include <prepargs.h> 38 #include <quotesys.h> 39 #include <setmode.h> 40 #include <version-etc.h> 41 #include <xalloc.h> 42 43 #ifndef GUTTER_WIDTH_MINIMUM 44 # define GUTTER_WIDTH_MINIMUM 3 45 #endif 46 47 struct regexp_list 48 { 49 char *regexps; /* chars representing disjunction of the regexps */ 50 size_t len; /* chars used in `regexps' */ 51 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 52 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 53 struct re_pattern_buffer *buf; 54 }; 55 56 static int compare_files (struct comparison const *, char const *, char const *); 57 static void add_regexp (struct regexp_list *, char const *); 58 static void summarize_regexp_list (struct regexp_list *); 59 static void specify_style (enum output_style); 60 static void specify_value (char const **, char const *, char const *); 61 static void try_help (char const *, char const *) __attribute__((noreturn)); 62 static void check_stdout (void); 63 static void usage (void); 64 65 /* If comparing directories, compare their common subdirectories 66 recursively. */ 67 static bool recursive; 68 69 /* In context diffs, show previous lines that match these regexps. */ 70 static struct regexp_list function_regexp_list; 71 72 /* Ignore changes affecting only lines that match these regexps. */ 73 static struct regexp_list ignore_regexp_list; 74 75 #if HAVE_SETMODE_DOS 76 /* Use binary I/O when reading and writing data (--binary). 77 On POSIX hosts, this has no effect. */ 78 static bool binary; 79 #else 80 enum { binary = true }; 81 #endif 82 83 /* When comparing directories, if a file appears only in one 84 directory, treat it as present but empty in the other (-N). 85 Then `patch' would create the file with appropriate contents. */ 86 static bool new_file; 87 88 /* When comparing directories, if a file appears only in the second 89 directory of the two, treat it as present but empty in the other 90 (--unidirectional-new-file). 91 Then `patch' would create the file with appropriate contents. */ 92 static bool unidirectional_new_file; 93 94 /* Report files compared that are the same (-s). 95 Normally nothing is output when that happens. */ 96 static bool report_identical_files; 97 98 99 /* Return a string containing the command options with which diff was invoked. 100 Spaces appear between what were separate ARGV-elements. 101 There is a space at the beginning but none at the end. 102 If there were no options, the result is an empty string. 103 104 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 105 the length of that vector. */ 106 107 static char * 108 option_list (char **optionvec, int count) 109 { 110 int i; 111 size_t size = 1; 112 char *result; 113 char *p; 114 115 for (i = 0; i < count; i++) 116 size += 1 + quote_system_arg ((char *) 0, optionvec[i]); 117 118 p = result = xmalloc (size); 119 120 for (i = 0; i < count; i++) 121 { 122 *p++ = ' '; 123 p += quote_system_arg (p, optionvec[i]); 124 } 125 126 *p = 0; 127 return result; 128 } 129 130 131 /* Return an option value suitable for add_exclude. */ 132 133 static int 134 exclude_options (void) 135 { 136 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 137 } 138 139 static char const shortopts[] = 140 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 141 142 /* Values for long options that do not have single-letter equivalents. */ 143 enum 144 { 145 BINARY_OPTION = CHAR_MAX + 1, 146 FROM_FILE_OPTION, 147 HELP_OPTION, 148 HORIZON_LINES_OPTION, 149 IGNORE_FILE_NAME_CASE_OPTION, 150 INHIBIT_HUNK_MERGE_OPTION, 151 LEFT_COLUMN_OPTION, 152 LINE_FORMAT_OPTION, 153 NO_IGNORE_FILE_NAME_CASE_OPTION, 154 NORMAL_OPTION, 155 SDIFF_MERGE_ASSIST_OPTION, 156 STRIP_TRAILING_CR_OPTION, 157 SUPPRESS_COMMON_LINES_OPTION, 158 TABSIZE_OPTION, 159 TO_FILE_OPTION, 160 161 /* These options must be in sequence. */ 162 UNCHANGED_LINE_FORMAT_OPTION, 163 OLD_LINE_FORMAT_OPTION, 164 NEW_LINE_FORMAT_OPTION, 165 166 /* These options must be in sequence. */ 167 UNCHANGED_GROUP_FORMAT_OPTION, 168 OLD_GROUP_FORMAT_OPTION, 169 NEW_GROUP_FORMAT_OPTION, 170 CHANGED_GROUP_FORMAT_OPTION 171 }; 172 173 static char const group_format_option[][sizeof "--unchanged-group-format"] = 174 { 175 "--unchanged-group-format", 176 "--old-group-format", 177 "--new-group-format", 178 "--changed-group-format" 179 }; 180 181 static char const line_format_option[][sizeof "--unchanged-line-format"] = 182 { 183 "--unchanged-line-format", 184 "--old-line-format", 185 "--new-line-format" 186 }; 187 188 static struct option const longopts[] = 189 { 190 {"binary", 0, 0, BINARY_OPTION}, 191 {"brief", 0, 0, 'q'}, 192 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 193 {"context", 2, 0, 'C'}, 194 {"ed", 0, 0, 'e'}, 195 {"exclude", 1, 0, 'x'}, 196 {"exclude-from", 1, 0, 'X'}, 197 {"expand-tabs", 0, 0, 't'}, 198 {"forward-ed", 0, 0, 'f'}, 199 {"from-file", 1, 0, FROM_FILE_OPTION}, 200 {"help", 0, 0, HELP_OPTION}, 201 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 202 {"ifdef", 1, 0, 'D'}, 203 {"ignore-all-space", 0, 0, 'w'}, 204 {"ignore-blank-lines", 0, 0, 'B'}, 205 {"ignore-case", 0, 0, 'i'}, 206 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 207 {"ignore-matching-lines", 1, 0, 'I'}, 208 {"ignore-space-change", 0, 0, 'b'}, 209 {"ignore-tab-expansion", 0, 0, 'E'}, 210 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 211 {"initial-tab", 0, 0, 'T'}, 212 {"label", 1, 0, 'L'}, 213 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 214 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 215 {"minimal", 0, 0, 'd'}, 216 {"new-file", 0, 0, 'N'}, 217 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 218 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 219 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 220 {"normal", 0, 0, NORMAL_OPTION}, 221 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 222 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 223 {"paginate", 0, 0, 'l'}, 224 {"rcs", 0, 0, 'n'}, 225 {"recursive", 0, 0, 'r'}, 226 {"report-identical-files", 0, 0, 's'}, 227 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 228 {"show-c-function", 0, 0, 'p'}, 229 {"show-function-line", 1, 0, 'F'}, 230 {"side-by-side", 0, 0, 'y'}, 231 {"speed-large-files", 0, 0, 'H'}, 232 {"starting-file", 1, 0, 'S'}, 233 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 234 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 235 {"tabsize", 1, 0, TABSIZE_OPTION}, 236 {"text", 0, 0, 'a'}, 237 {"to-file", 1, 0, TO_FILE_OPTION}, 238 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 239 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 240 {"unidirectional-new-file", 0, 0, 'P'}, 241 {"unified", 2, 0, 'U'}, 242 {"version", 0, 0, 'v'}, 243 {"width", 1, 0, 'W'}, 244 {0, 0, 0, 0} 245 }; 246 247 int 248 main (int argc, char **argv) 249 { 250 int exit_status = EXIT_SUCCESS; 251 int c; 252 int i; 253 int prev = -1; 254 lin ocontext = -1; 255 bool explicit_context = false; 256 size_t width = 0; 257 bool show_c_function = false; 258 char const *from_file = 0; 259 char const *to_file = 0; 260 uintmax_t numval; 261 char *numend; 262 263 /* Do our initializations. */ 264 exit_failure = 2; 265 initialize_main (&argc, &argv); 266 program_name = argv[0]; 267 setlocale (LC_ALL, ""); 268 bindtextdomain (PACKAGE, LOCALEDIR); 269 textdomain (PACKAGE); 270 c_stack_action (0); 271 function_regexp_list.buf = &function_regexp; 272 ignore_regexp_list.buf = &ignore_regexp; 273 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); 274 excluded = new_exclude (); 275 276 /* Decode the options. */ 277 278 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) 279 { 280 switch (c) 281 { 282 case 0: 283 break; 284 285 case '0': 286 case '1': 287 case '2': 288 case '3': 289 case '4': 290 case '5': 291 case '6': 292 case '7': 293 case '8': 294 case '9': 295 if (! ISDIGIT (prev)) 296 ocontext = c - '0'; 297 else if (LIN_MAX / 10 < ocontext 298 || ((ocontext = 10 * ocontext + c - '0') < 0)) 299 ocontext = LIN_MAX; 300 break; 301 302 case 'a': 303 text = true; 304 break; 305 306 case 'b': 307 if (ignore_white_space < IGNORE_SPACE_CHANGE) 308 ignore_white_space = IGNORE_SPACE_CHANGE; 309 break; 310 311 case 'B': 312 ignore_blank_lines = true; 313 break; 314 315 case 'C': 316 case 'U': 317 { 318 if (optarg) 319 { 320 numval = strtoumax (optarg, &numend, 10); 321 if (*numend) 322 try_help ("invalid context length `%s'", optarg); 323 if (LIN_MAX < numval) 324 numval = LIN_MAX; 325 } 326 else 327 numval = 3; 328 329 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 330 if (context < numval) 331 context = numval; 332 explicit_context = true; 333 } 334 break; 335 336 case 'c': 337 specify_style (OUTPUT_CONTEXT); 338 if (context < 3) 339 context = 3; 340 break; 341 342 case 'd': 343 minimal = true; 344 break; 345 346 case 'D': 347 specify_style (OUTPUT_IFDEF); 348 { 349 static char const C_ifdef_group_formats[] = 350 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 351 char *b = xmalloc (sizeof C_ifdef_group_formats 352 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 353 - 8 /* 5*"%%" + 3*"%c" */); 354 sprintf (b, C_ifdef_group_formats, 355 0, 356 optarg, optarg, 0, 357 optarg, optarg, 0, 358 optarg, optarg, optarg); 359 for (i = 0; i < sizeof group_format / sizeof *group_format; i++) 360 { 361 specify_value (&group_format[i], b, "-D"); 362 b += strlen (b) + 1; 363 } 364 } 365 break; 366 367 case 'e': 368 specify_style (OUTPUT_ED); 369 break; 370 371 case 'E': 372 if (ignore_white_space < IGNORE_TAB_EXPANSION) 373 ignore_white_space = IGNORE_TAB_EXPANSION; 374 break; 375 376 case 'f': 377 specify_style (OUTPUT_FORWARD_ED); 378 break; 379 380 case 'F': 381 add_regexp (&function_regexp_list, optarg); 382 break; 383 384 case 'h': 385 /* Split the files into chunks for faster processing. 386 Usually does not change the result. 387 388 This currently has no effect. */ 389 break; 390 391 case 'H': 392 speed_large_files = true; 393 break; 394 395 case 'i': 396 ignore_case = true; 397 break; 398 399 case 'I': 400 add_regexp (&ignore_regexp_list, optarg); 401 break; 402 403 case 'l': 404 if (!pr_program[0]) 405 try_help ("pagination not supported on this host", 0); 406 paginate = true; 407 #ifdef SIGCHLD 408 /* Pagination requires forking and waiting, and 409 System V fork+wait does not work if SIGCHLD is ignored. */ 410 signal (SIGCHLD, SIG_DFL); 411 #endif 412 break; 413 414 case 'L': 415 if (!file_label[0]) 416 file_label[0] = optarg; 417 else if (!file_label[1]) 418 file_label[1] = optarg; 419 else 420 fatal ("too many file label options"); 421 break; 422 423 case 'n': 424 specify_style (OUTPUT_RCS); 425 break; 426 427 case 'N': 428 new_file = true; 429 break; 430 431 case 'p': 432 show_c_function = true; 433 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 434 break; 435 436 case 'P': 437 unidirectional_new_file = true; 438 break; 439 440 case 'q': 441 brief = true; 442 break; 443 444 case 'r': 445 recursive = true; 446 break; 447 448 case 's': 449 report_identical_files = true; 450 break; 451 452 case 'S': 453 specify_value (&starting_file, optarg, "-S"); 454 break; 455 456 case 't': 457 expand_tabs = true; 458 break; 459 460 case 'T': 461 initial_tab = true; 462 break; 463 464 case 'u': 465 specify_style (OUTPUT_UNIFIED); 466 if (context < 3) 467 context = 3; 468 break; 469 470 case 'v': 471 version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION, 472 "Paul Eggert", "Mike Haertel", "David Hayes", 473 "Richard Stallman", "Len Tower", (char *) 0); 474 check_stdout (); 475 return EXIT_SUCCESS; 476 477 case 'w': 478 ignore_white_space = IGNORE_ALL_SPACE; 479 break; 480 481 case 'x': 482 add_exclude (excluded, optarg, exclude_options ()); 483 break; 484 485 case 'X': 486 if (add_exclude_file (add_exclude, excluded, optarg, 487 exclude_options (), '\n')) 488 pfatal_with_name (optarg); 489 break; 490 491 case 'y': 492 specify_style (OUTPUT_SDIFF); 493 break; 494 495 case 'W': 496 numval = strtoumax (optarg, &numend, 10); 497 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 498 try_help ("invalid width `%s'", optarg); 499 if (width != numval) 500 { 501 if (width) 502 fatal ("conflicting width options"); 503 width = numval; 504 } 505 break; 506 507 case BINARY_OPTION: 508 #if HAVE_SETMODE_DOS 509 binary = true; 510 set_binary_mode (STDOUT_FILENO, true); 511 #endif 512 break; 513 514 case FROM_FILE_OPTION: 515 specify_value (&from_file, optarg, "--from-file"); 516 break; 517 518 case HELP_OPTION: 519 usage (); 520 check_stdout (); 521 return EXIT_SUCCESS; 522 523 case HORIZON_LINES_OPTION: 524 numval = strtoumax (optarg, &numend, 10); 525 if (*numend) 526 try_help ("invalid horizon length `%s'", optarg); 527 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 528 break; 529 530 case IGNORE_FILE_NAME_CASE_OPTION: 531 ignore_file_name_case = true; 532 break; 533 534 case INHIBIT_HUNK_MERGE_OPTION: 535 /* This option is obsolete, but accept it for backward 536 compatibility. */ 537 break; 538 539 case LEFT_COLUMN_OPTION: 540 left_column = true; 541 break; 542 543 case LINE_FORMAT_OPTION: 544 specify_style (OUTPUT_IFDEF); 545 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 546 specify_value (&line_format[i], optarg, "--line-format"); 547 break; 548 549 case NO_IGNORE_FILE_NAME_CASE_OPTION: 550 ignore_file_name_case = false; 551 break; 552 553 case NORMAL_OPTION: 554 specify_style (OUTPUT_NORMAL); 555 break; 556 557 case SDIFF_MERGE_ASSIST_OPTION: 558 specify_style (OUTPUT_SDIFF); 559 sdiff_merge_assist = true; 560 break; 561 562 case STRIP_TRAILING_CR_OPTION: 563 strip_trailing_cr = true; 564 break; 565 566 case SUPPRESS_COMMON_LINES_OPTION: 567 suppress_common_lines = true; 568 break; 569 570 case TABSIZE_OPTION: 571 numval = strtoumax (optarg, &numend, 10); 572 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 573 try_help ("invalid tabsize `%s'", optarg); 574 if (tabsize != numval) 575 { 576 if (tabsize) 577 fatal ("conflicting tabsize options"); 578 tabsize = numval; 579 } 580 break; 581 582 case TO_FILE_OPTION: 583 specify_value (&to_file, optarg, "--to-file"); 584 break; 585 586 case UNCHANGED_LINE_FORMAT_OPTION: 587 case OLD_LINE_FORMAT_OPTION: 588 case NEW_LINE_FORMAT_OPTION: 589 specify_style (OUTPUT_IFDEF); 590 c -= UNCHANGED_LINE_FORMAT_OPTION; 591 specify_value (&line_format[c], optarg, line_format_option[c]); 592 break; 593 594 case UNCHANGED_GROUP_FORMAT_OPTION: 595 case OLD_GROUP_FORMAT_OPTION: 596 case NEW_GROUP_FORMAT_OPTION: 597 case CHANGED_GROUP_FORMAT_OPTION: 598 specify_style (OUTPUT_IFDEF); 599 c -= UNCHANGED_GROUP_FORMAT_OPTION; 600 specify_value (&group_format[c], optarg, group_format_option[c]); 601 break; 602 603 default: 604 try_help (0, 0); 605 } 606 prev = c; 607 } 608 609 if (output_style == OUTPUT_UNSPECIFIED) 610 { 611 if (show_c_function) 612 { 613 specify_style (OUTPUT_CONTEXT); 614 if (ocontext < 0) 615 context = 3; 616 } 617 else 618 specify_style (OUTPUT_NORMAL); 619 } 620 621 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 622 { 623 #ifdef ST_MTIM_NSEC 624 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 625 #else 626 time_format = "%Y-%m-%d %H:%M:%S %z"; 627 #endif 628 } 629 else 630 { 631 /* See POSIX 1003.1-2001 for this format. */ 632 time_format = "%a %b %e %T %Y"; 633 } 634 635 if (0 <= ocontext) 636 { 637 bool modern_usage = 200112 <= posix2_version (); 638 639 if ((output_style == OUTPUT_CONTEXT 640 || output_style == OUTPUT_UNIFIED) 641 && (context < ocontext 642 || (ocontext < context && ! explicit_context))) 643 { 644 if (modern_usage) 645 { 646 error (0, 0, 647 _("`-%ld' option is obsolete; use `-%c %ld'"), 648 (long int) ocontext, 649 output_style == OUTPUT_CONTEXT ? 'C' : 'U', 650 (long int) ocontext); 651 try_help (0, 0); 652 } 653 context = ocontext; 654 } 655 else 656 { 657 if (modern_usage) 658 { 659 error (0, 0, _("`-%ld' option is obsolete; omit it"), 660 (long int) ocontext); 661 try_help (0, 0); 662 } 663 } 664 } 665 666 if (! tabsize) 667 tabsize = 8; 668 if (! width) 669 width = 130; 670 671 { 672 /* Maximize first the half line width, and then the gutter width, 673 according to the following constraints: 674 675 1. Two half lines plus a gutter must fit in a line. 676 2. If the half line width is nonzero: 677 a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 678 b. If tabs are not expanded to spaces, 679 a half line plus a gutter is an integral number of tabs, 680 so that tabs in the right column line up. */ 681 682 intmax_t t = expand_tabs ? 1 : tabsize; 683 intmax_t w = width; 684 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 685 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 686 sdiff_column2_offset = sdiff_half_width ? off : w; 687 } 688 689 /* Make the horizon at least as large as the context, so that 690 shift_boundaries has more freedom to shift the first and last hunks. */ 691 if (horizon_lines < context) 692 horizon_lines = context; 693 694 summarize_regexp_list (&function_regexp_list); 695 summarize_regexp_list (&ignore_regexp_list); 696 697 if (output_style == OUTPUT_IFDEF) 698 { 699 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 700 if (!line_format[i]) 701 line_format[i] = "%l\n"; 702 if (!group_format[OLD]) 703 group_format[OLD] 704 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 705 if (!group_format[NEW]) 706 group_format[NEW] 707 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 708 if (!group_format[UNCHANGED]) 709 group_format[UNCHANGED] = "%="; 710 if (!group_format[CHANGED]) 711 group_format[CHANGED] = concat (group_format[OLD], 712 group_format[NEW], ""); 713 } 714 715 no_diff_means_no_output = 716 (output_style == OUTPUT_IFDEF ? 717 (!*group_format[UNCHANGED] 718 || (strcmp (group_format[UNCHANGED], "%=") == 0 719 && !*line_format[UNCHANGED])) 720 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 721 722 files_can_be_treated_as_binary = 723 (brief & binary 724 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 725 | (ignore_regexp_list.regexps || ignore_white_space))); 726 727 switch_string = option_list (argv + 1, optind - 1); 728 729 if (from_file) 730 { 731 if (to_file) 732 fatal ("--from-file and --to-file both specified"); 733 else 734 for (; optind < argc; optind++) 735 { 736 int status = compare_files ((struct comparison *) 0, 737 from_file, argv[optind]); 738 if (exit_status < status) 739 exit_status = status; 740 } 741 } 742 else 743 { 744 if (to_file) 745 for (; optind < argc; optind++) 746 { 747 int status = compare_files ((struct comparison *) 0, 748 argv[optind], to_file); 749 if (exit_status < status) 750 exit_status = status; 751 } 752 else 753 { 754 if (argc - optind != 2) 755 { 756 if (argc - optind < 2) 757 try_help ("missing operand after `%s'", argv[argc - 1]); 758 else 759 try_help ("extra operand `%s'", argv[optind + 2]); 760 } 761 762 exit_status = compare_files ((struct comparison *) 0, 763 argv[optind], argv[optind + 1]); 764 } 765 } 766 767 /* Print any messages that were saved up for last. */ 768 print_message_queue (); 769 770 check_stdout (); 771 exit (exit_status); 772 return exit_status; 773 } 774 775 /* Append to REGLIST the regexp PATTERN. */ 776 777 static void 778 add_regexp (struct regexp_list *reglist, char const *pattern) 779 { 780 size_t patlen = strlen (pattern); 781 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 782 783 if (m != 0) 784 error (0, 0, "%s: %s", pattern, m); 785 else 786 { 787 char *regexps = reglist->regexps; 788 size_t len = reglist->len; 789 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 790 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 791 size_t size = reglist->size; 792 793 if (size <= newlen) 794 { 795 if (!size) 796 size = 1; 797 798 do size *= 2; 799 while (size <= newlen); 800 801 reglist->size = size; 802 reglist->regexps = regexps = xrealloc (regexps, size); 803 } 804 if (multiple_regexps) 805 { 806 regexps[len++] = '\\'; 807 regexps[len++] = '|'; 808 } 809 memcpy (regexps + len, pattern, patlen + 1); 810 } 811 } 812 813 /* Ensure that REGLIST represents the disjunction of its regexps. 814 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 815 816 static void 817 summarize_regexp_list (struct regexp_list *reglist) 818 { 819 if (reglist->regexps) 820 { 821 /* At least one regexp was specified. Allocate a fastmap for it. */ 822 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 823 if (reglist->multiple_regexps) 824 { 825 /* Compile the disjunction of the regexps. 826 (If just one regexp was specified, it is already compiled.) */ 827 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 828 reglist->buf); 829 if (m != 0) 830 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 831 } 832 } 833 } 834 835 static void 836 try_help (char const *reason_msgid, char const *operand) 837 { 838 if (reason_msgid) 839 error (0, 0, _(reason_msgid), operand); 840 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 841 program_name); 842 abort (); 843 } 844 845 static void 846 check_stdout (void) 847 { 848 if (ferror (stdout)) 849 fatal ("write failed"); 850 else if (fclose (stdout) != 0) 851 pfatal_with_name (_("standard output")); 852 } 853 854 static char const * const option_help_msgid[] = { 855 N_("Compare files line by line."), 856 "", 857 N_("-i --ignore-case Ignore case differences in file contents."), 858 N_("--ignore-file-name-case Ignore case when comparing file names."), 859 N_("--no-ignore-file-name-case Consider case when comparing file names."), 860 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 861 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 862 N_("-w --ignore-all-space Ignore all white space."), 863 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 864 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 865 N_("--strip-trailing-cr Strip trailing carriage return on input."), 866 #if HAVE_SETMODE_DOS 867 N_("--binary Read and write data in binary mode."), 868 #endif 869 N_("-a --text Treat all files as text."), 870 "", 871 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 872 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 873 --label LABEL Use LABEL instead of file name.\n\ 874 -p --show-c-function Show which C function each change is in.\n\ 875 -F RE --show-function-line=RE Show the most recent line matching RE."), 876 N_("-q --brief Output only whether files differ."), 877 N_("-e --ed Output an ed script."), 878 N_("--normal Output a normal diff."), 879 N_("-n --rcs Output an RCS format diff."), 880 N_("-y --side-by-side Output in two columns.\n\ 881 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 882 --left-column Output only the left column of common lines.\n\ 883 --suppress-common-lines Do not output common lines."), 884 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 885 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 886 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 887 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 888 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 889 N_(" GFMT may contain:\n\ 890 %< lines from FILE1\n\ 891 %> lines from FILE2\n\ 892 %= lines common to FILE1 and FILE2\n\ 893 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 894 LETTERs are as follows for new group, lower case for old group:\n\ 895 F first line number\n\ 896 L last line number\n\ 897 N number of lines = L-F+1\n\ 898 E F-1\n\ 899 M L+1"), 900 N_(" LFMT may contain:\n\ 901 %L contents of line\n\ 902 %l contents of line, excluding any trailing newline\n\ 903 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 904 N_(" Either GFMT or LFMT may contain:\n\ 905 %% %\n\ 906 %c'C' the single character C\n\ 907 %c'\\OOO' the character with octal code OOO"), 908 "", 909 N_("-l --paginate Pass the output through `pr' to paginate it."), 910 N_("-t --expand-tabs Expand tabs to spaces in output."), 911 N_("-T --initial-tab Make tabs line up by prepending a tab."), 912 N_("--tabsize=NUM Tab stops are every NUM (default 8) print columns."), 913 "", 914 N_("-r --recursive Recursively compare any subdirectories found."), 915 N_("-N --new-file Treat absent files as empty."), 916 N_("--unidirectional-new-file Treat absent first files as empty."), 917 N_("-s --report-identical-files Report when two files are the same."), 918 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 919 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 920 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 921 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 922 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 923 "", 924 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 925 N_("-d --minimal Try hard to find a smaller set of changes."), 926 N_("--speed-large-files Assume large files and many scattered small changes."), 927 "", 928 N_("-v --version Output version info."), 929 N_("--help Output this help."), 930 "", 931 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 932 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 933 N_("If a FILE is `-', read standard input."), 934 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 935 "", 936 N_("Report bugs to <bug-gnu-utils@gnu.org>."), 937 0 938 }; 939 940 static void 941 usage (void) 942 { 943 char const * const *p; 944 945 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 946 947 for (p = option_help_msgid; *p; p++) 948 { 949 if (!**p) 950 putchar ('\n'); 951 else 952 { 953 char const *msg = _(*p); 954 char const *nl; 955 while ((nl = strchr (msg, '\n'))) 956 { 957 int msglen = nl + 1 - msg; 958 printf (" %.*s", msglen, msg); 959 msg = nl + 1; 960 } 961 962 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 963 } 964 } 965 } 966 967 /* Set VAR to VALUE, reporting an OPTION error if this is a 968 conflict. */ 969 static void 970 specify_value (char const **var, char const *value, char const *option) 971 { 972 if (*var && strcmp (*var, value) != 0) 973 { 974 error (0, 0, _("conflicting %s option value `%s'"), option, value); 975 try_help (0, 0); 976 } 977 *var = value; 978 } 979 980 /* Set the output style to STYLE, diagnosing conflicts. */ 981 static void 982 specify_style (enum output_style style) 983 { 984 if (output_style != style) 985 { 986 if (output_style != OUTPUT_UNSPECIFIED) 987 try_help ("conflicting output style options", 0); 988 output_style = style; 989 } 990 } 991 992 /* Set the last-modified time of *ST to be the current time. */ 993 994 static void 995 set_mtime_to_now (struct stat *st) 996 { 997 #ifdef ST_MTIM_NSEC 998 999 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME 1000 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0) 1001 return; 1002 # endif 1003 1004 # if HAVE_GETTIMEOFDAY 1005 { 1006 struct timeval timeval; 1007 if (gettimeofday (&timeval, 0) == 0) 1008 { 1009 st->st_mtime = timeval.tv_sec; 1010 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000; 1011 return; 1012 } 1013 } 1014 # endif 1015 1016 #endif /* ST_MTIM_NSEC */ 1017 1018 time (&st->st_mtime); 1019 } 1020 1021 /* Compare two files (or dirs) with parent comparison PARENT 1022 and names NAME0 and NAME1. 1023 (If PARENT is 0, then the first name is just NAME0, etc.) 1024 This is self-contained; it opens the files and closes them. 1025 1026 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1027 different, EXIT_TROUBLE if there is a problem opening them. */ 1028 1029 static int 1030 compare_files (struct comparison const *parent, 1031 char const *name0, 1032 char const *name1) 1033 { 1034 struct comparison cmp; 1035 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1036 register int f; 1037 int status = EXIT_SUCCESS; 1038 bool same_files; 1039 char *free0, *free1; 1040 1041 /* If this is directory comparison, perhaps we have a file 1042 that exists only in one of the directories. 1043 If so, just print a message to that effect. */ 1044 1045 if (! ((name0 && name1) 1046 || (unidirectional_new_file && name1) 1047 || new_file)) 1048 { 1049 char const *name = name0 == 0 ? name1 : name0; 1050 char const *dir = parent->file[name0 == 0].name; 1051 1052 /* See POSIX 1003.1-2001 for this format. */ 1053 message ("Only in %s: %s\n", dir, name); 1054 1055 /* Return EXIT_FAILURE so that diff_dirs will return 1056 EXIT_FAILURE ("some files differ"). */ 1057 return EXIT_FAILURE; 1058 } 1059 1060 memset (cmp.file, 0, sizeof cmp.file); 1061 cmp.parent = parent; 1062 1063 /* cmp.file[f].desc markers */ 1064 #define NONEXISTENT (-1) /* nonexistent file */ 1065 #define UNOPENED (-2) /* unopened file (e.g. directory) */ 1066 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1067 1068 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1069 1070 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED; 1071 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED; 1072 1073 /* Now record the full name of each file, including nonexistent ones. */ 1074 1075 if (name0 == 0) 1076 name0 = name1; 1077 if (name1 == 0) 1078 name1 = name0; 1079 1080 if (!parent) 1081 { 1082 free0 = 0; 1083 free1 = 0; 1084 cmp.file[0].name = name0; 1085 cmp.file[1].name = name1; 1086 } 1087 else 1088 { 1089 cmp.file[0].name = free0 1090 = dir_file_pathname (parent->file[0].name, name0); 1091 cmp.file[1].name = free1 1092 = dir_file_pathname (parent->file[1].name, name1); 1093 } 1094 1095 /* Stat the files. */ 1096 1097 for (f = 0; f < 2; f++) 1098 { 1099 if (cmp.file[f].desc != NONEXISTENT) 1100 { 1101 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1102 { 1103 cmp.file[f].desc = cmp.file[0].desc; 1104 cmp.file[f].stat = cmp.file[0].stat; 1105 } 1106 else if (strcmp (cmp.file[f].name, "-") == 0) 1107 { 1108 cmp.file[f].desc = STDIN_FILENO; 1109 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1110 cmp.file[f].desc = ERRNO_ENCODE (errno); 1111 else 1112 { 1113 if (S_ISREG (cmp.file[f].stat.st_mode)) 1114 { 1115 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 1116 if (pos < 0) 1117 cmp.file[f].desc = ERRNO_ENCODE (errno); 1118 else 1119 cmp.file[f].stat.st_size = 1120 MAX (0, cmp.file[f].stat.st_size - pos); 1121 } 1122 1123 /* POSIX 1003.1-2001 requires current time for 1124 stdin. */ 1125 set_mtime_to_now (&cmp.file[f].stat); 1126 } 1127 } 1128 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1129 cmp.file[f].desc = ERRNO_ENCODE (errno); 1130 } 1131 } 1132 1133 /* Mark files as nonexistent as needed for -N and -P, if they are 1134 inaccessible empty regular files (the kind of files that 'patch' 1135 creates to indicate nonexistent backups), or if they are 1136 top-level files that do not exist but their counterparts do 1137 exist. */ 1138 for (f = 0; f < 2; f++) 1139 if ((new_file || (f == 0 && unidirectional_new_file)) 1140 && (cmp.file[f].desc == UNOPENED 1141 ? (S_ISREG (cmp.file[f].stat.st_mode) 1142 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) 1143 && cmp.file[f].stat.st_size == 0) 1144 : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT) 1145 && ! parent 1146 && cmp.file[1 - f].desc == UNOPENED))) 1147 cmp.file[f].desc = NONEXISTENT; 1148 1149 for (f = 0; f < 2; f++) 1150 if (cmp.file[f].desc == NONEXISTENT) 1151 { 1152 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); 1153 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1154 } 1155 1156 for (f = 0; f < 2; f++) 1157 { 1158 int e = ERRNO_DECODE (cmp.file[f].desc); 1159 if (0 <= e) 1160 { 1161 errno = e; 1162 perror_with_name (cmp.file[f].name); 1163 status = EXIT_TROUBLE; 1164 } 1165 } 1166 1167 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1168 { 1169 /* If one is a directory, and it was specified in the command line, 1170 use the file in that dir with the other file's basename. */ 1171 1172 int fnm_arg = DIR_P (0); 1173 int dir_arg = 1 - fnm_arg; 1174 char const *fnm = cmp.file[fnm_arg].name; 1175 char const *dir = cmp.file[dir_arg].name; 1176 char const *filename = cmp.file[dir_arg].name = free0 1177 = dir_file_pathname (dir, base_name (fnm)); 1178 1179 if (strcmp (fnm, "-") == 0) 1180 fatal ("cannot compare `-' to a directory"); 1181 1182 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1183 { 1184 perror_with_name (filename); 1185 status = EXIT_TROUBLE; 1186 } 1187 } 1188 1189 if (status != EXIT_SUCCESS) 1190 { 1191 /* One of the files should exist but does not. */ 1192 } 1193 else if (cmp.file[0].desc == NONEXISTENT 1194 && cmp.file[1].desc == NONEXISTENT) 1195 { 1196 /* Neither file "exists", so there's nothing to compare. */ 1197 } 1198 else if ((same_files 1199 = (cmp.file[0].desc != NONEXISTENT 1200 && cmp.file[1].desc != NONEXISTENT 1201 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1202 && same_file_attributes (&cmp.file[0].stat, 1203 &cmp.file[1].stat))) 1204 && no_diff_means_no_output) 1205 { 1206 /* The two named files are actually the same physical file. 1207 We know they are identical without actually reading them. */ 1208 } 1209 else if (DIR_P (0) & DIR_P (1)) 1210 { 1211 if (output_style == OUTPUT_IFDEF) 1212 fatal ("-D option not supported with directories"); 1213 1214 /* If both are directories, compare the files in them. */ 1215 1216 if (parent && !recursive) 1217 { 1218 /* But don't compare dir contents one level down 1219 unless -r was specified. 1220 See POSIX 1003.1-2001 for this format. */ 1221 message ("Common subdirectories: %s and %s\n", 1222 cmp.file[0].name, cmp.file[1].name); 1223 } 1224 else 1225 status = diff_dirs (&cmp, compare_files); 1226 } 1227 else if ((DIR_P (0) | DIR_P (1)) 1228 || (parent 1229 && (! S_ISREG (cmp.file[0].stat.st_mode) 1230 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1231 { 1232 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1233 { 1234 /* We have a subdirectory that exists only in one directory. */ 1235 1236 if ((DIR_P (0) | DIR_P (1)) 1237 && recursive 1238 && (new_file 1239 || (unidirectional_new_file 1240 && cmp.file[0].desc == NONEXISTENT))) 1241 status = diff_dirs (&cmp, compare_files); 1242 else 1243 { 1244 char const *dir 1245 = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1246 1247 /* See POSIX 1003.1-2001 for this format. */ 1248 message ("Only in %s: %s\n", dir, name0); 1249 1250 status = EXIT_FAILURE; 1251 } 1252 } 1253 else 1254 { 1255 /* We have two files that are not to be compared. */ 1256 1257 /* See POSIX 1003.1-2001 for this format. */ 1258 message5 ("File %s is a %s while file %s is a %s\n", 1259 file_label[0] ? file_label[0] : cmp.file[0].name, 1260 file_type (&cmp.file[0].stat), 1261 file_label[1] ? file_label[1] : cmp.file[1].name, 1262 file_type (&cmp.file[1].stat)); 1263 1264 /* This is a difference. */ 1265 status = EXIT_FAILURE; 1266 } 1267 } 1268 else if (files_can_be_treated_as_binary 1269 && S_ISREG (cmp.file[0].stat.st_mode) 1270 && S_ISREG (cmp.file[1].stat.st_mode) 1271 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size) 1272 { 1273 message ("Files %s and %s differ\n", 1274 file_label[0] ? file_label[0] : cmp.file[0].name, 1275 file_label[1] ? file_label[1] : cmp.file[1].name); 1276 status = EXIT_FAILURE; 1277 } 1278 else 1279 { 1280 /* Both exist and neither is a directory. */ 1281 1282 /* Open the files and record their descriptors. */ 1283 1284 if (cmp.file[0].desc == UNOPENED) 1285 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0) 1286 { 1287 perror_with_name (cmp.file[0].name); 1288 status = EXIT_TROUBLE; 1289 } 1290 if (cmp.file[1].desc == UNOPENED) 1291 { 1292 if (same_files) 1293 cmp.file[1].desc = cmp.file[0].desc; 1294 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0)) 1295 < 0) 1296 { 1297 perror_with_name (cmp.file[1].name); 1298 status = EXIT_TROUBLE; 1299 } 1300 } 1301 1302 #if HAVE_SETMODE_DOS 1303 if (binary) 1304 for (f = 0; f < 2; f++) 1305 if (0 <= cmp.file[f].desc) 1306 set_binary_mode (cmp.file[f].desc, true); 1307 #endif 1308 1309 /* Compare the files, if no error was found. */ 1310 1311 if (status == EXIT_SUCCESS) 1312 status = diff_2_files (&cmp); 1313 1314 /* Close the file descriptors. */ 1315 1316 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1317 { 1318 perror_with_name (cmp.file[0].name); 1319 status = EXIT_TROUBLE; 1320 } 1321 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1322 && close (cmp.file[1].desc) != 0) 1323 { 1324 perror_with_name (cmp.file[1].name); 1325 status = EXIT_TROUBLE; 1326 } 1327 } 1328 1329 /* Now the comparison has been done, if no error prevented it, 1330 and STATUS is the value this function will return. */ 1331 1332 if (status == EXIT_SUCCESS) 1333 { 1334 if (report_identical_files && !DIR_P (0)) 1335 message ("Files %s and %s are identical\n", 1336 file_label[0] ? file_label[0] : cmp.file[0].name, 1337 file_label[1] ? file_label[1] : cmp.file[1].name); 1338 } 1339 else 1340 { 1341 /* Flush stdout so that the user sees differences immediately. 1342 This can hurt performance, unfortunately. */ 1343 if (fflush (stdout) != 0) 1344 pfatal_with_name (_("standard output")); 1345 } 1346 1347 if (free0) 1348 free (free0); 1349 if (free1) 1350 free (free1); 1351 1352 return status; 1353 } 1354