1 /* diff - compare files line by line
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002,
4 2004 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 See the GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU DIFF; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 #define GDIFF_MAIN
24 #include "diff.h"
25 #include "paths.h"
26 #include <c-stack.h>
27 #include <dirname.h>
28 #include <error.h>
29 #include <exclude.h>
30 #include <exit.h>
31 #include <exitfail.h>
32 #include <file-type.h>
33 #include <fnmatch.h>
34 #include <getopt.h>
35 #include <hard-locale.h>
36 #include <posixver.h>
37 #include <prepargs.h>
38 #include <quotesys.h>
39 #include <setmode.h>
40 #include <version-etc.h>
41 #include <xalloc.h>
42
43 #ifndef GUTTER_WIDTH_MINIMUM
44 # define GUTTER_WIDTH_MINIMUM 3
45 #endif
46
47 struct regexp_list
48 {
49 char *regexps; /* chars representing disjunction of the regexps */
50 size_t len; /* chars used in `regexps' */
51 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */
52 bool multiple_regexps;/* Does `regexps' represent a disjunction? */
53 struct re_pattern_buffer *buf;
54 };
55
56 static int compare_files (struct comparison const *, char const *, char const *);
57 static void add_regexp (struct regexp_list *, char const *);
58 static void summarize_regexp_list (struct regexp_list *);
59 static void specify_style (enum output_style);
60 static void specify_value (char const **, char const *, char const *);
61 static void try_help (char const *, char const *) __attribute__((noreturn));
62 static void check_stdout (void);
63 static void usage (void);
64
65 /* If comparing directories, compare their common subdirectories
66 recursively. */
67 static bool recursive;
68
69 /* In context diffs, show previous lines that match these regexps. */
70 static struct regexp_list function_regexp_list;
71
72 /* Ignore changes affecting only lines that match these regexps. */
73 static struct regexp_list ignore_regexp_list;
74
75 #if HAVE_SETMODE_DOS
76 /* Use binary I/O when reading and writing data (--binary).
77 On POSIX hosts, this has no effect. */
78 static bool binary;
79 #else
80 enum { binary = true };
81 #endif
82
83 /* When comparing directories, if a file appears only in one
84 directory, treat it as present but empty in the other (-N).
85 Then `patch' would create the file with appropriate contents. */
86 static bool new_file;
87
88 /* When comparing directories, if a file appears only in the second
89 directory of the two, treat it as present but empty in the other
90 (--unidirectional-new-file).
91 Then `patch' would create the file with appropriate contents. */
92 static bool unidirectional_new_file;
93
94 /* Report files compared that are the same (-s).
95 Normally nothing is output when that happens. */
96 static bool report_identical_files;
97
98
99 /* Return a string containing the command options with which diff was invoked.
100 Spaces appear between what were separate ARGV-elements.
101 There is a space at the beginning but none at the end.
102 If there were no options, the result is an empty string.
103
104 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
105 the length of that vector. */
106
107 static char *
option_list(char ** optionvec,int count)108 option_list (char **optionvec, int count)
109 {
110 int i;
111 size_t size = 1;
112 char *result;
113 char *p;
114
115 for (i = 0; i < count; i++)
116 size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
117
118 p = result = xmalloc (size);
119
120 for (i = 0; i < count; i++)
121 {
122 *p++ = ' ';
123 p += quote_system_arg (p, optionvec[i]);
124 }
125
126 *p = 0;
127 return result;
128 }
129
130
131 /* Return an option value suitable for add_exclude. */
132
133 static int
exclude_options(void)134 exclude_options (void)
135 {
136 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
137 }
138
139 static char const shortopts[] =
140 "0123456789abBcC:dD:eEfF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y";
141
142 /* Values for long options that do not have single-letter equivalents. */
143 enum
144 {
145 BINARY_OPTION = CHAR_MAX + 1,
146 FROM_FILE_OPTION,
147 HELP_OPTION,
148 HORIZON_LINES_OPTION,
149 IGNORE_FILE_NAME_CASE_OPTION,
150 INHIBIT_HUNK_MERGE_OPTION,
151 LEFT_COLUMN_OPTION,
152 LINE_FORMAT_OPTION,
153 NO_IGNORE_FILE_NAME_CASE_OPTION,
154 NORMAL_OPTION,
155 SDIFF_MERGE_ASSIST_OPTION,
156 STRIP_TRAILING_CR_OPTION,
157 SUPPRESS_COMMON_LINES_OPTION,
158 TABSIZE_OPTION,
159 TO_FILE_OPTION,
160
161 /* These options must be in sequence. */
162 UNCHANGED_LINE_FORMAT_OPTION,
163 OLD_LINE_FORMAT_OPTION,
164 NEW_LINE_FORMAT_OPTION,
165
166 /* These options must be in sequence. */
167 UNCHANGED_GROUP_FORMAT_OPTION,
168 OLD_GROUP_FORMAT_OPTION,
169 NEW_GROUP_FORMAT_OPTION,
170 CHANGED_GROUP_FORMAT_OPTION
171 };
172
173 static char const group_format_option[][sizeof "--unchanged-group-format"] =
174 {
175 "--unchanged-group-format",
176 "--old-group-format",
177 "--new-group-format",
178 "--changed-group-format"
179 };
180
181 static char const line_format_option[][sizeof "--unchanged-line-format"] =
182 {
183 "--unchanged-line-format",
184 "--old-line-format",
185 "--new-line-format"
186 };
187
188 static struct option const longopts[] =
189 {
190 {"binary", 0, 0, BINARY_OPTION},
191 {"brief", 0, 0, 'q'},
192 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
193 {"context", 2, 0, 'C'},
194 {"ed", 0, 0, 'e'},
195 {"exclude", 1, 0, 'x'},
196 {"exclude-from", 1, 0, 'X'},
197 {"expand-tabs", 0, 0, 't'},
198 {"forward-ed", 0, 0, 'f'},
199 {"from-file", 1, 0, FROM_FILE_OPTION},
200 {"help", 0, 0, HELP_OPTION},
201 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
202 {"ifdef", 1, 0, 'D'},
203 {"ignore-all-space", 0, 0, 'w'},
204 {"ignore-blank-lines", 0, 0, 'B'},
205 {"ignore-case", 0, 0, 'i'},
206 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
207 {"ignore-matching-lines", 1, 0, 'I'},
208 {"ignore-space-change", 0, 0, 'b'},
209 {"ignore-tab-expansion", 0, 0, 'E'},
210 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
211 {"initial-tab", 0, 0, 'T'},
212 {"label", 1, 0, 'L'},
213 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
214 {"line-format", 1, 0, LINE_FORMAT_OPTION},
215 {"minimal", 0, 0, 'd'},
216 {"new-file", 0, 0, 'N'},
217 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
218 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
219 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
220 {"normal", 0, 0, NORMAL_OPTION},
221 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
222 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
223 {"paginate", 0, 0, 'l'},
224 {"rcs", 0, 0, 'n'},
225 {"recursive", 0, 0, 'r'},
226 {"report-identical-files", 0, 0, 's'},
227 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
228 {"show-c-function", 0, 0, 'p'},
229 {"show-function-line", 1, 0, 'F'},
230 {"side-by-side", 0, 0, 'y'},
231 {"speed-large-files", 0, 0, 'H'},
232 {"starting-file", 1, 0, 'S'},
233 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
234 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
235 {"tabsize", 1, 0, TABSIZE_OPTION},
236 {"text", 0, 0, 'a'},
237 {"to-file", 1, 0, TO_FILE_OPTION},
238 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
239 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
240 {"unidirectional-new-file", 0, 0, 'P'},
241 {"unified", 2, 0, 'U'},
242 {"version", 0, 0, 'v'},
243 {"width", 1, 0, 'W'},
244 {0, 0, 0, 0}
245 };
246
247 int
main(int argc,char ** argv)248 main (int argc, char **argv)
249 {
250 int exit_status = EXIT_SUCCESS;
251 int c;
252 int i;
253 int prev = -1;
254 lin ocontext = -1;
255 bool explicit_context = false;
256 size_t width = 0;
257 bool show_c_function = false;
258 char const *from_file = 0;
259 char const *to_file = 0;
260 uintmax_t numval;
261 char *numend;
262
263 /* Do our initializations. */
264 exit_failure = 2;
265 initialize_main (&argc, &argv);
266 program_name = argv[0];
267 setlocale (LC_ALL, "");
268 textdomain (PACKAGE);
269 c_stack_action (0);
270 function_regexp_list.buf = &function_regexp;
271 ignore_regexp_list.buf = &ignore_regexp;
272 re_set_syntax (RE_SYNTAX_GREP);
273 excluded = new_exclude ();
274
275 prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
276
277 /* Decode the options. */
278
279 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
280 {
281 switch (c)
282 {
283 case 0:
284 break;
285
286 case '0':
287 case '1':
288 case '2':
289 case '3':
290 case '4':
291 case '5':
292 case '6':
293 case '7':
294 case '8':
295 case '9':
296 if (! ISDIGIT (prev))
297 ocontext = c - '0';
298 else if (LIN_MAX / 10 < ocontext
299 || ((ocontext = 10 * ocontext + c - '0') < 0))
300 ocontext = LIN_MAX;
301 break;
302
303 case 'a':
304 text = true;
305 break;
306
307 case 'b':
308 if (ignore_white_space < IGNORE_SPACE_CHANGE)
309 ignore_white_space = IGNORE_SPACE_CHANGE;
310 break;
311
312 case 'B':
313 ignore_blank_lines = true;
314 break;
315
316 case 'C':
317 case 'U':
318 {
319 if (optarg)
320 {
321 numval = strtoumax (optarg, &numend, 10);
322 if (*numend)
323 try_help ("invalid context length `%s'", optarg);
324 if (LIN_MAX < numval)
325 numval = LIN_MAX;
326 }
327 else
328 numval = 3;
329
330 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
331 if (context < numval)
332 context = numval;
333 explicit_context = true;
334 }
335 break;
336
337 case 'c':
338 specify_style (OUTPUT_CONTEXT);
339 if (context < 3)
340 context = 3;
341 break;
342
343 case 'd':
344 minimal = true;
345 break;
346
347 case 'D':
348 specify_style (OUTPUT_IFDEF);
349 {
350 static char const C_ifdef_group_formats[] =
351 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
352 char *b = xmalloc (sizeof C_ifdef_group_formats
353 + 7 * strlen (optarg) - 14 /* 7*"%s" */
354 - 8 /* 5*"%%" + 3*"%c" */);
355 sprintf (b, C_ifdef_group_formats,
356 0,
357 optarg, optarg, 0,
358 optarg, optarg, 0,
359 optarg, optarg, optarg);
360 for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
361 {
362 specify_value (&group_format[i], b, "-D");
363 b += strlen (b) + 1;
364 }
365 }
366 break;
367
368 case 'e':
369 specify_style (OUTPUT_ED);
370 break;
371
372 case 'E':
373 if (ignore_white_space < IGNORE_TAB_EXPANSION)
374 ignore_white_space = IGNORE_TAB_EXPANSION;
375 break;
376
377 case 'f':
378 specify_style (OUTPUT_FORWARD_ED);
379 break;
380
381 case 'F':
382 add_regexp (&function_regexp_list, optarg);
383 break;
384
385 case 'h':
386 /* Split the files into chunks for faster processing.
387 Usually does not change the result.
388
389 This currently has no effect. */
390 break;
391
392 case 'H':
393 speed_large_files = true;
394 break;
395
396 case 'i':
397 ignore_case = true;
398 break;
399
400 case 'I':
401 add_regexp (&ignore_regexp_list, optarg);
402 break;
403
404 case 'l':
405 if (!pr_program[0])
406 try_help ("pagination not supported on this host", 0);
407 paginate = true;
408 #ifdef SIGCHLD
409 /* Pagination requires forking and waiting, and
410 System V fork+wait does not work if SIGCHLD is ignored. */
411 signal (SIGCHLD, SIG_DFL);
412 #endif
413 break;
414
415 case 'L':
416 if (!file_label[0])
417 file_label[0] = optarg;
418 else if (!file_label[1])
419 file_label[1] = optarg;
420 else
421 fatal ("too many file label options");
422 break;
423
424 case 'n':
425 specify_style (OUTPUT_RCS);
426 break;
427
428 case 'N':
429 new_file = true;
430 break;
431
432 case 'o':
433 /* Output in the old tradition style. */
434 specify_style (OUTPUT_NORMAL);
435 break;
436
437 case 'p':
438 show_c_function = true;
439 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
440 break;
441
442 case 'P':
443 unidirectional_new_file = true;
444 break;
445
446 case 'q':
447 brief = true;
448 break;
449
450 case 'r':
451 recursive = true;
452 break;
453
454 case 's':
455 report_identical_files = true;
456 break;
457
458 case 'S':
459 specify_value (&starting_file, optarg, "-S");
460 break;
461
462 case 't':
463 expand_tabs = true;
464 break;
465
466 case 'T':
467 initial_tab = true;
468 break;
469
470 case 'u':
471 specify_style (OUTPUT_UNIFIED);
472 if (context < 3)
473 context = 3;
474 break;
475
476 case 'v':
477 version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION,
478 "Paul Eggert", "Mike Haertel", "David Hayes",
479 "Richard Stallman", "Len Tower", (char *) 0);
480 check_stdout ();
481 return EXIT_SUCCESS;
482
483 case 'w':
484 ignore_white_space = IGNORE_ALL_SPACE;
485 break;
486
487 case 'x':
488 add_exclude (excluded, optarg, exclude_options ());
489 break;
490
491 case 'X':
492 if (add_exclude_file (add_exclude, excluded, optarg,
493 exclude_options (), '\n'))
494 pfatal_with_name (optarg);
495 break;
496
497 case 'y':
498 specify_style (OUTPUT_SDIFF);
499 break;
500
501 case 'W':
502 numval = strtoumax (optarg, &numend, 10);
503 if (! (0 < numval && numval <= SIZE_MAX) || *numend)
504 try_help ("invalid width `%s'", optarg);
505 if (width != numval)
506 {
507 if (width)
508 fatal ("conflicting width options");
509 width = numval;
510 }
511 break;
512
513 case BINARY_OPTION:
514 #if HAVE_SETMODE_DOS
515 binary = true;
516 set_binary_mode (STDOUT_FILENO, true);
517 #endif
518 break;
519
520 case FROM_FILE_OPTION:
521 specify_value (&from_file, optarg, "--from-file");
522 break;
523
524 case HELP_OPTION:
525 usage ();
526 check_stdout ();
527 return EXIT_SUCCESS;
528
529 case HORIZON_LINES_OPTION:
530 numval = strtoumax (optarg, &numend, 10);
531 if (*numend)
532 try_help ("invalid horizon length `%s'", optarg);
533 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
534 break;
535
536 case IGNORE_FILE_NAME_CASE_OPTION:
537 ignore_file_name_case = true;
538 break;
539
540 case INHIBIT_HUNK_MERGE_OPTION:
541 /* This option is obsolete, but accept it for backward
542 compatibility. */
543 break;
544
545 case LEFT_COLUMN_OPTION:
546 left_column = true;
547 break;
548
549 case LINE_FORMAT_OPTION:
550 specify_style (OUTPUT_IFDEF);
551 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
552 specify_value (&line_format[i], optarg, "--line-format");
553 break;
554
555 case NO_IGNORE_FILE_NAME_CASE_OPTION:
556 ignore_file_name_case = false;
557 break;
558
559 case NORMAL_OPTION:
560 specify_style (OUTPUT_NORMAL);
561 break;
562
563 case SDIFF_MERGE_ASSIST_OPTION:
564 specify_style (OUTPUT_SDIFF);
565 sdiff_merge_assist = true;
566 break;
567
568 case STRIP_TRAILING_CR_OPTION:
569 strip_trailing_cr = true;
570 break;
571
572 case SUPPRESS_COMMON_LINES_OPTION:
573 suppress_common_lines = true;
574 break;
575
576 case TABSIZE_OPTION:
577 numval = strtoumax (optarg, &numend, 10);
578 if (! (0 < numval && numval <= SIZE_MAX) || *numend)
579 try_help ("invalid tabsize `%s'", optarg);
580 if (tabsize != numval)
581 {
582 if (tabsize)
583 fatal ("conflicting tabsize options");
584 tabsize = numval;
585 }
586 break;
587
588 case TO_FILE_OPTION:
589 specify_value (&to_file, optarg, "--to-file");
590 break;
591
592 case UNCHANGED_LINE_FORMAT_OPTION:
593 case OLD_LINE_FORMAT_OPTION:
594 case NEW_LINE_FORMAT_OPTION:
595 specify_style (OUTPUT_IFDEF);
596 c -= UNCHANGED_LINE_FORMAT_OPTION;
597 specify_value (&line_format[c], optarg, line_format_option[c]);
598 break;
599
600 case UNCHANGED_GROUP_FORMAT_OPTION:
601 case OLD_GROUP_FORMAT_OPTION:
602 case NEW_GROUP_FORMAT_OPTION:
603 case CHANGED_GROUP_FORMAT_OPTION:
604 specify_style (OUTPUT_IFDEF);
605 c -= UNCHANGED_GROUP_FORMAT_OPTION;
606 specify_value (&group_format[c], optarg, group_format_option[c]);
607 break;
608
609 default:
610 try_help (0, 0);
611 }
612 prev = c;
613 }
614
615 if (output_style == OUTPUT_UNSPECIFIED)
616 {
617 if (show_c_function)
618 {
619 specify_style (OUTPUT_CONTEXT);
620 if (ocontext < 0)
621 context = 3;
622 }
623 else
624 specify_style (OUTPUT_NORMAL);
625 }
626
627 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
628 {
629 #ifdef ST_MTIM_NSEC
630 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
631 #else
632 time_format = "%Y-%m-%d %H:%M:%S %z";
633 #endif
634 }
635 else
636 {
637 /* See POSIX 1003.1-2001 for this format. */
638 time_format = "%a %b %e %T %Y";
639 }
640
641 if (0 <= ocontext)
642 {
643 bool modern_usage = 200112 <= posix2_version ();
644
645 if ((output_style == OUTPUT_CONTEXT
646 || output_style == OUTPUT_UNIFIED)
647 && (context < ocontext
648 || (ocontext < context && ! explicit_context)))
649 {
650 if (modern_usage)
651 {
652 error (0, 0,
653 _("`-%ld' option is obsolete; use `-%c %ld'"),
654 (long int) ocontext,
655 output_style == OUTPUT_CONTEXT ? 'C' : 'U',
656 (long int) ocontext);
657 try_help (0, 0);
658 }
659 context = ocontext;
660 }
661 else
662 {
663 if (modern_usage)
664 {
665 error (0, 0, _("`-%ld' option is obsolete; omit it"),
666 (long int) ocontext);
667 try_help (0, 0);
668 }
669 }
670 }
671
672 if (! tabsize)
673 tabsize = 8;
674 if (! width)
675 width = 130;
676
677 {
678 /* Maximize first the half line width, and then the gutter width,
679 according to the following constraints:
680
681 1. Two half lines plus a gutter must fit in a line.
682 2. If the half line width is nonzero:
683 a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
684 b. If tabs are not expanded to spaces,
685 a half line plus a gutter is an integral number of tabs,
686 so that tabs in the right column line up. */
687
688 intmax_t t = expand_tabs ? 1 : tabsize;
689 intmax_t w = width;
690 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
691 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
692 sdiff_column2_offset = sdiff_half_width ? off : w;
693 }
694
695 /* Make the horizon at least as large as the context, so that
696 shift_boundaries has more freedom to shift the first and last hunks. */
697 if (horizon_lines < context)
698 horizon_lines = context;
699
700 summarize_regexp_list (&function_regexp_list);
701 summarize_regexp_list (&ignore_regexp_list);
702
703 if (output_style == OUTPUT_IFDEF)
704 {
705 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
706 if (!line_format[i])
707 line_format[i] = "%l\n";
708 if (!group_format[OLD])
709 group_format[OLD]
710 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
711 if (!group_format[NEW])
712 group_format[NEW]
713 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
714 if (!group_format[UNCHANGED])
715 group_format[UNCHANGED] = "%=";
716 if (!group_format[CHANGED])
717 group_format[CHANGED] = concat (group_format[OLD],
718 group_format[NEW], "");
719 }
720
721 no_diff_means_no_output =
722 (output_style == OUTPUT_IFDEF ?
723 (!*group_format[UNCHANGED]
724 || (strcmp (group_format[UNCHANGED], "%=") == 0
725 && !*line_format[UNCHANGED]))
726 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
727
728 files_can_be_treated_as_binary =
729 (brief & binary
730 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
731 | (ignore_regexp_list.regexps || ignore_white_space)));
732
733 switch_string = option_list (argv + 1, optind - 1);
734
735 if (from_file)
736 {
737 if (to_file)
738 fatal ("--from-file and --to-file both specified");
739 else
740 for (; optind < argc; optind++)
741 {
742 int status = compare_files ((struct comparison *) 0,
743 from_file, argv[optind]);
744 if (exit_status < status)
745 exit_status = status;
746 }
747 }
748 else
749 {
750 if (to_file)
751 for (; optind < argc; optind++)
752 {
753 int status = compare_files ((struct comparison *) 0,
754 argv[optind], to_file);
755 if (exit_status < status)
756 exit_status = status;
757 }
758 else
759 {
760 if (argc - optind != 2)
761 {
762 if (argc - optind < 2)
763 try_help ("missing operand after `%s'", argv[argc - 1]);
764 else
765 try_help ("extra operand `%s'", argv[optind + 2]);
766 }
767
768 exit_status = compare_files ((struct comparison *) 0,
769 argv[optind], argv[optind + 1]);
770 }
771 }
772
773 /* Print any messages that were saved up for last. */
774 print_message_queue ();
775
776 check_stdout ();
777 exit (exit_status);
778 return exit_status;
779 }
780
781 /* Append to REGLIST the regexp PATTERN. */
782
783 static void
add_regexp(struct regexp_list * reglist,char const * pattern)784 add_regexp (struct regexp_list *reglist, char const *pattern)
785 {
786 size_t patlen = strlen (pattern);
787 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
788
789 if (m != 0)
790 error (0, 0, "%s: %s", pattern, m);
791 else
792 {
793 char *regexps = reglist->regexps;
794 size_t len = reglist->len;
795 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
796 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
797 size_t size = reglist->size;
798
799 if (size <= newlen)
800 {
801 if (!size)
802 size = 1;
803
804 do size *= 2;
805 while (size <= newlen);
806
807 reglist->size = size;
808 reglist->regexps = regexps = xrealloc (regexps, size);
809 }
810 if (multiple_regexps)
811 {
812 regexps[len++] = '\\';
813 regexps[len++] = '|';
814 }
815 memcpy (regexps + len, pattern, patlen + 1);
816 }
817 }
818
819 /* Ensure that REGLIST represents the disjunction of its regexps.
820 This is done here, rather than earlier, to avoid O(N^2) behavior. */
821
822 static void
summarize_regexp_list(struct regexp_list * reglist)823 summarize_regexp_list (struct regexp_list *reglist)
824 {
825 if (reglist->regexps)
826 {
827 /* At least one regexp was specified. Allocate a fastmap for it. */
828 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
829 if (reglist->multiple_regexps)
830 {
831 /* Compile the disjunction of the regexps.
832 (If just one regexp was specified, it is already compiled.) */
833 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
834 reglist->buf);
835 if (m != 0)
836 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
837 }
838 }
839 }
840
841 static void
try_help(char const * reason_msgid,char const * operand)842 try_help (char const *reason_msgid, char const *operand)
843 {
844 if (reason_msgid)
845 error (0, 0, _(reason_msgid), operand);
846 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
847 program_name);
848 abort ();
849 }
850
851 static void
check_stdout(void)852 check_stdout (void)
853 {
854 if (ferror (stdout))
855 fatal ("write failed");
856 else if (fclose (stdout) != 0)
857 pfatal_with_name (_("standard output"));
858 }
859
860 static char const * const option_help_msgid[] = {
861 N_("Compare files line by line."),
862 "",
863 N_("-i --ignore-case Ignore case differences in file contents."),
864 N_("--ignore-file-name-case Ignore case when comparing file names."),
865 N_("--no-ignore-file-name-case Consider case when comparing file names."),
866 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."),
867 N_("-b --ignore-space-change Ignore changes in the amount of white space."),
868 N_("-w --ignore-all-space Ignore all white space."),
869 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."),
870 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."),
871 N_("--strip-trailing-cr Strip trailing carriage return on input."),
872 #if HAVE_SETMODE_DOS
873 N_("--binary Read and write data in binary mode."),
874 #endif
875 N_("-a --text Treat all files as text."),
876 "",
877 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\
878 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\
879 --label LABEL Use LABEL instead of file name.\n\
880 -p --show-c-function Show which C function each change is in.\n\
881 -F RE --show-function-line=RE Show the most recent line matching RE."),
882 N_("-q --brief Output only whether files differ."),
883 N_("-e --ed Output an ed script."),
884 N_("--normal Output a normal diff."),
885 N_("-n --rcs Output an RCS format diff."),
886 N_("-y --side-by-side Output in two columns.\n\
887 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\
888 --left-column Output only the left column of common lines.\n\
889 --suppress-common-lines Do not output common lines."),
890 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."),
891 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."),
892 N_("--line-format=LFMT Similar, but format all input lines with LFMT."),
893 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."),
894 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."),
895 N_(" GFMT may contain:\n\
896 %< lines from FILE1\n\
897 %> lines from FILE2\n\
898 %= lines common to FILE1 and FILE2\n\
899 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
900 LETTERs are as follows for new group, lower case for old group:\n\
901 F first line number\n\
902 L last line number\n\
903 N number of lines = L-F+1\n\
904 E F-1\n\
905 M L+1"),
906 N_(" LFMT may contain:\n\
907 %L contents of line\n\
908 %l contents of line, excluding any trailing newline\n\
909 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
910 N_(" Either GFMT or LFMT may contain:\n\
911 %% %\n\
912 %c'C' the single character C\n\
913 %c'\\OOO' the character with octal code OOO"),
914 "",
915 N_("-l --paginate Pass the output through `pr' to paginate it."),
916 N_("-t --expand-tabs Expand tabs to spaces in output."),
917 N_("-T --initial-tab Make tabs line up by prepending a tab."),
918 N_("--tabsize=NUM Tab stops are every NUM (default 8) print columns."),
919 "",
920 N_("-r --recursive Recursively compare any subdirectories found."),
921 N_("-N --new-file Treat absent files as empty."),
922 N_("--unidirectional-new-file Treat absent first files as empty."),
923 N_("-s --report-identical-files Report when two files are the same."),
924 N_("-x PAT --exclude=PAT Exclude files that match PAT."),
925 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."),
926 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."),
927 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."),
928 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."),
929 "",
930 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."),
931 N_("-d --minimal Try hard to find a smaller set of changes."),
932 N_("--speed-large-files Assume large files and many scattered small changes."),
933 "",
934 N_("-v --version Output version info."),
935 N_("--help Output this help."),
936 "",
937 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
938 N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
939 N_("If a FILE is `-', read standard input."),
940 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
941 "",
942 N_("Report bugs to <bug-gnu-utils@gnu.org>."),
943 0
944 };
945
946 static void
usage(void)947 usage (void)
948 {
949 char const * const *p;
950
951 printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
952
953 for (p = option_help_msgid; *p; p++)
954 {
955 if (!**p)
956 putchar ('\n');
957 else
958 {
959 char const *msg = _(*p);
960 char const *nl;
961 while ((nl = strchr (msg, '\n')))
962 {
963 int msglen = nl + 1 - msg;
964 printf (" %.*s", msglen, msg);
965 msg = nl + 1;
966 }
967
968 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
969 }
970 }
971 }
972
973 /* Set VAR to VALUE, reporting an OPTION error if this is a
974 conflict. */
975 static void
specify_value(char const ** var,char const * value,char const * option)976 specify_value (char const **var, char const *value, char const *option)
977 {
978 if (*var && strcmp (*var, value) != 0)
979 {
980 error (0, 0, _("conflicting %s option value `%s'"), option, value);
981 try_help (0, 0);
982 }
983 *var = value;
984 }
985
986 /* Set the output style to STYLE, diagnosing conflicts. */
987 static void
specify_style(enum output_style style)988 specify_style (enum output_style style)
989 {
990 if (output_style != style)
991 {
992 output_style = style;
993 }
994 }
995
996 /* Set the last-modified time of *ST to be the current time. */
997
998 static void
set_mtime_to_now(struct stat * st)999 set_mtime_to_now (struct stat *st)
1000 {
1001 #ifdef ST_MTIM_NSEC
1002
1003 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1004 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1005 return;
1006 # endif
1007
1008 # if HAVE_GETTIMEOFDAY
1009 {
1010 struct timeval timeval;
1011 if (gettimeofday (&timeval, 0) == 0)
1012 {
1013 st->st_mtime = timeval.tv_sec;
1014 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1015 return;
1016 }
1017 }
1018 # endif
1019
1020 #endif /* ST_MTIM_NSEC */
1021
1022 time (&st->st_mtime);
1023 }
1024
1025 /* Compare two files (or dirs) with parent comparison PARENT
1026 and names NAME0 and NAME1.
1027 (If PARENT is 0, then the first name is just NAME0, etc.)
1028 This is self-contained; it opens the files and closes them.
1029
1030 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1031 different, EXIT_TROUBLE if there is a problem opening them. */
1032
1033 static int
compare_files(struct comparison const * parent,char const * name0,char const * name1)1034 compare_files (struct comparison const *parent,
1035 char const *name0,
1036 char const *name1)
1037 {
1038 struct comparison cmp;
1039 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1040 register int f;
1041 int status = EXIT_SUCCESS;
1042 bool same_files;
1043 char *free0, *free1;
1044
1045 /* If this is directory comparison, perhaps we have a file
1046 that exists only in one of the directories.
1047 If so, just print a message to that effect. */
1048
1049 if (! ((name0 && name1)
1050 || (unidirectional_new_file && name1)
1051 || new_file))
1052 {
1053 char const *name = name0 == 0 ? name1 : name0;
1054 char const *dir = parent->file[name0 == 0].name;
1055
1056 /* See POSIX 1003.1-2001 for this format. */
1057 message ("Only in %s: %s\n", dir, name);
1058
1059 /* Return EXIT_FAILURE so that diff_dirs will return
1060 EXIT_FAILURE ("some files differ"). */
1061 return EXIT_FAILURE;
1062 }
1063
1064 memset (cmp.file, 0, sizeof cmp.file);
1065 cmp.parent = parent;
1066
1067 /* cmp.file[f].desc markers */
1068 #define NONEXISTENT (-1) /* nonexistent file */
1069 #define UNOPENED (-2) /* unopened file (e.g. directory) */
1070 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1071
1072 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1073
1074 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1075 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1076
1077 /* Now record the full name of each file, including nonexistent ones. */
1078
1079 if (name0 == 0)
1080 name0 = name1;
1081 if (name1 == 0)
1082 name1 = name0;
1083
1084 if (!parent)
1085 {
1086 free0 = 0;
1087 free1 = 0;
1088 cmp.file[0].name = name0;
1089 cmp.file[1].name = name1;
1090 }
1091 else
1092 {
1093 cmp.file[0].name = free0
1094 = dir_file_pathname (parent->file[0].name, name0);
1095 cmp.file[1].name = free1
1096 = dir_file_pathname (parent->file[1].name, name1);
1097 }
1098
1099 /* Stat the files. */
1100
1101 for (f = 0; f < 2; f++)
1102 {
1103 if (cmp.file[f].desc != NONEXISTENT)
1104 {
1105 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1106 {
1107 cmp.file[f].desc = cmp.file[0].desc;
1108 cmp.file[f].stat = cmp.file[0].stat;
1109 }
1110 else if (strcmp (cmp.file[f].name, "-") == 0)
1111 {
1112 cmp.file[f].desc = STDIN_FILENO;
1113 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1114 cmp.file[f].desc = ERRNO_ENCODE (errno);
1115 else
1116 {
1117 if (S_ISREG (cmp.file[f].stat.st_mode))
1118 {
1119 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1120 if (pos < 0)
1121 cmp.file[f].desc = ERRNO_ENCODE (errno);
1122 else
1123 cmp.file[f].stat.st_size =
1124 MAX (0, cmp.file[f].stat.st_size - pos);
1125 }
1126
1127 /* POSIX 1003.1-2001 requires current time for
1128 stdin. */
1129 set_mtime_to_now (&cmp.file[f].stat);
1130 }
1131 }
1132 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1133 cmp.file[f].desc = ERRNO_ENCODE (errno);
1134 }
1135 }
1136
1137 /* Mark files as nonexistent as needed for -N and -P, if they are
1138 inaccessible empty regular files (the kind of files that 'patch'
1139 creates to indicate nonexistent backups), or if they are
1140 top-level files that do not exist but their counterparts do
1141 exist. */
1142 for (f = 0; f < 2; f++)
1143 if ((new_file || (f == 0 && unidirectional_new_file))
1144 && (cmp.file[f].desc == UNOPENED
1145 ? (S_ISREG (cmp.file[f].stat.st_mode)
1146 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1147 && cmp.file[f].stat.st_size == 0)
1148 : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1149 && ! parent
1150 && cmp.file[1 - f].desc == UNOPENED)))
1151 cmp.file[f].desc = NONEXISTENT;
1152
1153 for (f = 0; f < 2; f++)
1154 if (cmp.file[f].desc == NONEXISTENT)
1155 {
1156 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1157 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1158 }
1159
1160 for (f = 0; f < 2; f++)
1161 {
1162 int e = ERRNO_DECODE (cmp.file[f].desc);
1163 if (0 <= e)
1164 {
1165 errno = e;
1166 perror_with_name (cmp.file[f].name);
1167 status = EXIT_TROUBLE;
1168 }
1169 }
1170
1171 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1172 {
1173 /* If one is a directory, and it was specified in the command line,
1174 use the file in that dir with the other file's basename. */
1175
1176 int fnm_arg = DIR_P (0);
1177 int dir_arg = 1 - fnm_arg;
1178 char const *fnm = cmp.file[fnm_arg].name;
1179 char const *dir = cmp.file[dir_arg].name;
1180 char const *filename = cmp.file[dir_arg].name = free0
1181 = dir_file_pathname (dir, base_name (fnm));
1182
1183 if (strcmp (fnm, "-") == 0)
1184 fatal ("cannot compare `-' to a directory");
1185
1186 if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1187 {
1188 perror_with_name (filename);
1189 status = EXIT_TROUBLE;
1190 }
1191 }
1192
1193 if (status != EXIT_SUCCESS)
1194 {
1195 /* One of the files should exist but does not. */
1196 }
1197 else if (cmp.file[0].desc == NONEXISTENT
1198 && cmp.file[1].desc == NONEXISTENT)
1199 {
1200 /* Neither file "exists", so there's nothing to compare. */
1201 }
1202 else if ((same_files
1203 = (cmp.file[0].desc != NONEXISTENT
1204 && cmp.file[1].desc != NONEXISTENT
1205 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1206 && same_file_attributes (&cmp.file[0].stat,
1207 &cmp.file[1].stat)))
1208 && no_diff_means_no_output)
1209 {
1210 /* The two named files are actually the same physical file.
1211 We know they are identical without actually reading them. */
1212 }
1213 else if (DIR_P (0) & DIR_P (1))
1214 {
1215 if (output_style == OUTPUT_IFDEF)
1216 fatal ("-D option not supported with directories");
1217
1218 /* If both are directories, compare the files in them. */
1219
1220 if (parent && !recursive)
1221 {
1222 /* But don't compare dir contents one level down
1223 unless -r was specified.
1224 See POSIX 1003.1-2001 for this format. */
1225 message ("Common subdirectories: %s and %s\n",
1226 cmp.file[0].name, cmp.file[1].name);
1227 }
1228 else
1229 status = diff_dirs (&cmp, compare_files);
1230 }
1231 else if ((DIR_P (0) | DIR_P (1))
1232 || (parent
1233 && (! S_ISREG (cmp.file[0].stat.st_mode)
1234 || ! S_ISREG (cmp.file[1].stat.st_mode))))
1235 {
1236 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1237 {
1238 /* We have a subdirectory that exists only in one directory. */
1239
1240 if ((DIR_P (0) | DIR_P (1))
1241 && recursive
1242 && (new_file
1243 || (unidirectional_new_file
1244 && cmp.file[0].desc == NONEXISTENT)))
1245 status = diff_dirs (&cmp, compare_files);
1246 else
1247 {
1248 char const *dir
1249 = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1250
1251 /* See POSIX 1003.1-2001 for this format. */
1252 message ("Only in %s: %s\n", dir, name0);
1253
1254 status = EXIT_FAILURE;
1255 }
1256 }
1257 else
1258 {
1259 /* We have two files that are not to be compared. */
1260
1261 /* See POSIX 1003.1-2001 for this format. */
1262 message5 ("File %s is a %s while file %s is a %s\n",
1263 file_label[0] ? file_label[0] : cmp.file[0].name,
1264 file_type (&cmp.file[0].stat),
1265 file_label[1] ? file_label[1] : cmp.file[1].name,
1266 file_type (&cmp.file[1].stat));
1267
1268 /* This is a difference. */
1269 status = EXIT_FAILURE;
1270 }
1271 }
1272 else if (files_can_be_treated_as_binary
1273 && S_ISREG (cmp.file[0].stat.st_mode)
1274 && S_ISREG (cmp.file[1].stat.st_mode)
1275 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
1276 {
1277 message ("Files %s and %s differ\n",
1278 file_label[0] ? file_label[0] : cmp.file[0].name,
1279 file_label[1] ? file_label[1] : cmp.file[1].name);
1280 status = EXIT_FAILURE;
1281 }
1282 else
1283 {
1284 /* Both exist and neither is a directory. */
1285
1286 /* Open the files and record their descriptors. */
1287
1288 if (cmp.file[0].desc == UNOPENED)
1289 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1290 {
1291 perror_with_name (cmp.file[0].name);
1292 status = EXIT_TROUBLE;
1293 }
1294 if (cmp.file[1].desc == UNOPENED)
1295 {
1296 if (same_files)
1297 cmp.file[1].desc = cmp.file[0].desc;
1298 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1299 < 0)
1300 {
1301 perror_with_name (cmp.file[1].name);
1302 status = EXIT_TROUBLE;
1303 }
1304 }
1305
1306 #if HAVE_SETMODE_DOS
1307 if (binary)
1308 for (f = 0; f < 2; f++)
1309 if (0 <= cmp.file[f].desc)
1310 set_binary_mode (cmp.file[f].desc, true);
1311 #endif
1312
1313 /* Compare the files, if no error was found. */
1314
1315 if (status == EXIT_SUCCESS)
1316 status = diff_2_files (&cmp);
1317
1318 /* Close the file descriptors. */
1319
1320 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1321 {
1322 perror_with_name (cmp.file[0].name);
1323 status = EXIT_TROUBLE;
1324 }
1325 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1326 && close (cmp.file[1].desc) != 0)
1327 {
1328 perror_with_name (cmp.file[1].name);
1329 status = EXIT_TROUBLE;
1330 }
1331 }
1332
1333 /* Now the comparison has been done, if no error prevented it,
1334 and STATUS is the value this function will return. */
1335
1336 if (status == EXIT_SUCCESS)
1337 {
1338 if (report_identical_files && !DIR_P (0))
1339 message ("Files %s and %s are identical\n",
1340 file_label[0] ? file_label[0] : cmp.file[0].name,
1341 file_label[1] ? file_label[1] : cmp.file[1].name);
1342 }
1343 else
1344 {
1345 /* Flush stdout so that the user sees differences immediately.
1346 This can hurt performance, unfortunately. */
1347 if (fflush (stdout) != 0)
1348 pfatal_with_name (_("standard output"));
1349 }
1350
1351 if (free0)
1352 free (free0);
1353 if (free1)
1354 free (free1);
1355
1356 return status;
1357 }
1358