1 /* cmp - compare two files byte by byte
2
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4 2002, 2004 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING.
18 If not, write to the Free Software Foundation,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 #include "system.h"
22 #include "paths.h"
23
24 #include <stdio.h>
25
26 #include <c-stack.h>
27 #include <cmpbuf.h>
28 #include <error.h>
29 #include <exit.h>
30 #include <exitfail.h>
31 #include <file-type.h>
32 #include <getopt.h>
33 #include <hard-locale.h>
34 #include <inttostr.h>
35 #include <setmode.h>
36 #include <unlocked-io.h>
37 #include <version-etc.h>
38 #include <xalloc.h>
39 #include <xstrtol.h>
40
41 #if defined LC_MESSAGES && ENABLE_NLS
42 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
43 #else
44 # define hard_locale_LC_MESSAGES 0
45 #endif
46
47 static int cmp (void);
48 static off_t file_position (int);
49 static size_t block_compare (word const *, word const *);
50 static size_t block_compare_and_count (word const *, word const *, off_t *);
51 static void sprintc (char *, unsigned char);
52
53 /* Name under which this program was invoked. */
54 char *program_name;
55
56 /* Filenames of the compared files. */
57 static char const *file[2];
58
59 /* File descriptors of the files. */
60 static int file_desc[2];
61
62 /* Status of the files. */
63 static struct stat stat_buf[2];
64
65 /* Read buffers for the files. */
66 static word *buffer[2];
67
68 /* Optimal block size for the files. */
69 static size_t buf_size;
70
71 /* Initial prefix to ignore for each file. */
72 static off_t ignore_initial[2];
73
74 /* Number of bytes to compare. */
75 static uintmax_t bytes = UINTMAX_MAX;
76
77 /* Output format. */
78 static enum comparison_type
79 {
80 type_first_diff, /* Print the first difference. */
81 type_all_diffs, /* Print all differences. */
82 type_status /* Exit status only. */
83 } comparison_type;
84
85 /* If nonzero, print values of bytes quoted like cat -t does. */
86 static bool opt_print_bytes;
87
88 /* Values for long options that do not have single-letter equivalents. */
89 enum
90 {
91 HELP_OPTION = CHAR_MAX + 1
92 };
93
94 static struct option const long_options[] =
95 {
96 {"print-bytes", 0, 0, 'b'},
97 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98 {"ignore-initial", 1, 0, 'i'},
99 {"verbose", 0, 0, 'l'},
100 {"bytes", 1, 0, 'n'},
101 {"silent", 0, 0, 's'},
102 {"quiet", 0, 0, 's'},
103 {"version", 0, 0, 'v'},
104 {"help", 0, 0, HELP_OPTION},
105 {0, 0, 0, 0}
106 };
107
108 static void try_help (char const *, char const *) __attribute__((noreturn));
109 static void
try_help(char const * reason_msgid,char const * operand)110 try_help (char const *reason_msgid, char const *operand)
111 {
112 if (reason_msgid)
113 error (0, 0, _(reason_msgid), operand);
114 error (EXIT_TROUBLE, 0,
115 _("Try `%s --help' for more information."), program_name);
116 abort ();
117 }
118
119 static char const valid_suffixes[] = "kKMGTPEZY0";
120
121 /* Update ignore_initial[F] according to the result of parsing an
122 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123 *after the operand. If DELIMITER is nonzero, the operand may be
124 *followed by DELIMITER; otherwise it must be null-terminated. */
125 static void
specify_ignore_initial(int f,char ** argptr,char delimiter)126 specify_ignore_initial (int f, char **argptr, char delimiter)
127 {
128 uintmax_t val;
129 off_t o;
130 char const *arg = *argptr;
131 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132 if (! (e == LONGINT_OK
133 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
135 try_help ("invalid --ignore-initial value `%s'", arg);
136 if (ignore_initial[f] < o)
137 ignore_initial[f] = o;
138 }
139
140 /* Specify the output format. */
141 static void
specify_comparison_type(enum comparison_type t)142 specify_comparison_type (enum comparison_type t)
143 {
144 if (comparison_type && comparison_type != t)
145 try_help ("options -l and -s are incompatible", 0);
146 comparison_type = t;
147 }
148
149 static void
check_stdout(void)150 check_stdout (void)
151 {
152 if (ferror (stdout))
153 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154 else if (fclose (stdout) != 0)
155 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
156 }
157
158 static char const * const option_help_msgid[] = {
159 N_("-b --print-bytes Print differing bytes."),
160 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
161 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
162 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163 N_("-l --verbose Output byte numbers and values of all differing bytes."),
164 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
165 N_("-s --quiet --silent Output nothing; yield exit status only."),
166 N_("-v --version Output version info."),
167 N_("--help Output this help."),
168 0
169 };
170
171 static void
usage(void)172 usage (void)
173 {
174 char const * const *p;
175
176 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
177 program_name);
178 printf ("%s\n\n", _("Compare two files byte by byte."));
179 for (p = option_help_msgid; *p; p++)
180 printf (" %s\n", _(*p));
181 printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183 _("SKIP values may be followed by the following multiplicative suffixes:\n\
184 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186 _("If a FILE is `-' or missing, read standard input."),
187 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188 _("Report bugs to <bug-gnu-utils@gnu.org>."));
189 }
190
191 int
main(int argc,char ** argv)192 main (int argc, char **argv)
193 {
194 int c, f, exit_status;
195 size_t words_per_buffer;
196
197 exit_failure = EXIT_TROUBLE;
198 initialize_main (&argc, &argv);
199 program_name = argv[0];
200 setlocale (LC_ALL, "");
201 bindtextdomain (PACKAGE, LOCALEDIR);
202 textdomain (PACKAGE);
203 c_stack_action (0);
204
205 /* Parse command line options. */
206
207 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208 != -1)
209 switch (c)
210 {
211 case 'b':
212 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213 opt_print_bytes = true;
214 break;
215
216 case 'i':
217 specify_ignore_initial (0, &optarg, ':');
218 if (*optarg++ == ':')
219 specify_ignore_initial (1, &optarg, 0);
220 else if (ignore_initial[1] < ignore_initial[0])
221 ignore_initial[1] = ignore_initial[0];
222 break;
223
224 case 'l':
225 specify_comparison_type (type_all_diffs);
226 break;
227
228 case 'n':
229 {
230 uintmax_t n;
231 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232 try_help ("invalid --bytes value `%s'", optarg);
233 if (n < bytes)
234 bytes = n;
235 }
236 break;
237
238 case 's':
239 specify_comparison_type (type_status);
240 break;
241
242 case 'v':
243 /* TRANSLATORS: Please translate the second "o" in "Torbjorn
244 Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245 WITH DIAERESIS) if possible. */
246 version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247 _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
248 check_stdout ();
249 return EXIT_SUCCESS;
250
251 case HELP_OPTION:
252 usage ();
253 check_stdout ();
254 return EXIT_SUCCESS;
255
256 default:
257 try_help (0, 0);
258 }
259
260 if (optind == argc)
261 try_help ("missing operand after `%s'", argv[argc - 1]);
262
263 file[0] = argv[optind++];
264 file[1] = optind < argc ? argv[optind++] : "-";
265
266 for (f = 0; f < 2 && optind < argc; f++)
267 {
268 char *arg = argv[optind++];
269 specify_ignore_initial (f, &arg, 0);
270 }
271
272 if (optind < argc)
273 try_help ("extra operand `%s'", argv[optind]);
274
275 for (f = 0; f < 2; f++)
276 {
277 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278 stdin is closed and opening file[0] yields file descriptor 0. */
279 int f1 = f ^ (strcmp (file[1], "-") == 0);
280
281 /* Two files with the same name and offset are identical.
282 But wait until we open the file once, for proper diagnostics. */
283 if (f && ignore_initial[0] == ignore_initial[1]
284 && file_name_cmp (file[0], file[1]) == 0)
285 return EXIT_SUCCESS;
286
287 file_desc[f1] = (strcmp (file[f1], "-") == 0
288 ? STDIN_FILENO
289 : open (file[f1], O_RDONLY, 0));
290 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291 {
292 if (file_desc[f1] < 0 && comparison_type == type_status)
293 exit (EXIT_TROUBLE);
294 else
295 error (EXIT_TROUBLE, errno, "%s", file[f1]);
296 }
297
298 set_binary_mode (file_desc[f1], true);
299 }
300
301 /* If the files are links to the same inode and have the same file position,
302 they are identical. */
303
304 if (0 < same_file (&stat_buf[0], &stat_buf[1])
305 && same_file_attributes (&stat_buf[0], &stat_buf[1])
306 && file_position (0) == file_position (1))
307 return EXIT_SUCCESS;
308
309 /* If output is redirected to the null device, we may assume `-s'. */
310
311 if (comparison_type != type_status)
312 {
313 struct stat outstat, nullstat;
314
315 if (fstat (STDOUT_FILENO, &outstat) == 0
316 && stat (NULL_DEVICE, &nullstat) == 0
317 && 0 < same_file (&outstat, &nullstat))
318 comparison_type = type_status;
319 }
320
321 /* If only a return code is needed,
322 and if both input descriptors are associated with plain files,
323 conclude that the files differ if they have different sizes
324 and if more bytes will be compared than are in the smaller file. */
325
326 if (comparison_type == type_status
327 && S_ISREG (stat_buf[0].st_mode)
328 && S_ISREG (stat_buf[1].st_mode))
329 {
330 off_t s0 = stat_buf[0].st_size - file_position (0);
331 off_t s1 = stat_buf[1].st_size - file_position (1);
332 if (s0 < 0)
333 s0 = 0;
334 if (s1 < 0)
335 s1 = 0;
336 if (s0 != s1 && MIN (s0, s1) < bytes)
337 exit (EXIT_FAILURE);
338 }
339
340 /* Get the optimal block size of the files. */
341
342 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343 STAT_BLOCKSIZE (stat_buf[1]),
344 PTRDIFF_MAX - sizeof (word));
345
346 /* Allocate word-aligned buffers, with space for sentinels at the end. */
347
348 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350 buffer[1] = buffer[0] + words_per_buffer;
351
352 exit_status = cmp ();
353
354 for (f = 0; f < 2; f++)
355 if (close (file_desc[f]) != 0)
356 error (EXIT_TROUBLE, errno, "%s", file[f]);
357 if (exit_status != 0 && comparison_type != type_status)
358 check_stdout ();
359 exit (exit_status);
360 return exit_status;
361 }
362
363 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364 using `buffer[0]' and `buffer[1]'.
365 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366 >1 if error. */
367
368 static int
cmp(void)369 cmp (void)
370 {
371 off_t line_number = 1; /* Line number (1...) of difference. */
372 off_t byte_number = 1; /* Byte number (1...) of difference. */
373 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
374 size_t read0, read1; /* Number of bytes read from each file. */
375 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
376 size_t smaller; /* The lesser of `read0' and `read1'. */
377 word *buffer0 = buffer[0];
378 word *buffer1 = buffer[1];
379 char *buf0 = (char *) buffer0;
380 char *buf1 = (char *) buffer1;
381 int ret = EXIT_SUCCESS;
382 int f;
383 int offset_width;
384
385 if (comparison_type == type_all_diffs)
386 {
387 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388
389 for (f = 0; f < 2; f++)
390 if (S_ISREG (stat_buf[f].st_mode))
391 {
392 off_t file_bytes = stat_buf[f].st_size - file_position (f);
393 if (file_bytes < byte_number_max)
394 byte_number_max = file_bytes;
395 }
396
397 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398 continue;
399 }
400
401 for (f = 0; f < 2; f++)
402 {
403 off_t ig = ignore_initial[f];
404 if (ig && file_position (f) == -1)
405 {
406 /* lseek failed; read and discard the ignored initial prefix. */
407 do
408 {
409 size_t bytes_to_read = MIN (ig, buf_size);
410 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411 if (r != bytes_to_read)
412 {
413 if (r == SIZE_MAX)
414 error (EXIT_TROUBLE, errno, "%s", file[f]);
415 break;
416 }
417 ig -= r;
418 }
419 while (ig);
420 }
421 }
422
423 do
424 {
425 size_t bytes_to_read = buf_size;
426
427 if (remaining != UINTMAX_MAX)
428 {
429 if (remaining < bytes_to_read)
430 bytes_to_read = remaining;
431 remaining -= bytes_to_read;
432 }
433
434 read0 = block_read (file_desc[0], buf0, bytes_to_read);
435 if (read0 == SIZE_MAX)
436 error (EXIT_TROUBLE, errno, "%s", file[0]);
437 read1 = block_read (file_desc[1], buf1, bytes_to_read);
438 if (read1 == SIZE_MAX)
439 error (EXIT_TROUBLE, errno, "%s", file[1]);
440
441 /* Insert sentinels for the block compare. */
442
443 buf0[read0] = ~buf1[read0];
444 buf1[read1] = ~buf0[read1];
445
446 /* If the line number should be written for differing files,
447 compare the blocks and count the number of newlines
448 simultaneously. */
449 first_diff = (comparison_type == type_first_diff
450 ? block_compare_and_count (buffer0, buffer1, &line_number)
451 : block_compare (buffer0, buffer1));
452
453 byte_number += first_diff;
454 smaller = MIN (read0, read1);
455
456 if (first_diff < smaller)
457 {
458 switch (comparison_type)
459 {
460 case type_first_diff:
461 {
462 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463 char line_buf[INT_BUFSIZE_BOUND (off_t)];
464 char const *byte_num = offtostr (byte_number, byte_buf);
465 char const *line_num = offtostr (line_number, line_buf);
466 if (!opt_print_bytes)
467 {
468 /* See POSIX 1003.1-2001 for this format. This
469 message is used only in the POSIX locale, so it
470 need not be translated. */
471 static char const char_message[] =
472 "%s %s differ: char %s, line %s\n";
473
474 /* The POSIX rationale recommends using the word
475 "byte" outside the POSIX locale. Some gettext
476 implementations translate even in the POSIX
477 locale if certain other environment variables
478 are set, so use "byte" if a translation is
479 available, or if outside the POSIX locale. */
480 static char const byte_msgid[] =
481 N_("%s %s differ: byte %s, line %s\n");
482 char const *byte_message = _(byte_msgid);
483 bool use_byte_message = (byte_message != byte_msgid
484 || hard_locale_LC_MESSAGES);
485
486 printf (use_byte_message ? byte_message : char_message,
487 file[0], file[1], byte_num, line_num);
488 }
489 else
490 {
491 unsigned char c0 = buf0[first_diff];
492 unsigned char c1 = buf1[first_diff];
493 char s0[5];
494 char s1[5];
495 sprintc (s0, c0);
496 sprintc (s1, c1);
497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 file[0], file[1], byte_num, line_num,
499 c0, s0, c1, s1);
500 }
501 }
502 /* Fall through. */
503 case type_status:
504 return EXIT_FAILURE;
505
506 case type_all_diffs:
507 do
508 {
509 unsigned char c0 = buf0[first_diff];
510 unsigned char c1 = buf1[first_diff];
511 if (c0 != c1)
512 {
513 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 char const *byte_num = offtostr (byte_number, byte_buf);
515 if (!opt_print_bytes)
516 {
517 /* See POSIX 1003.1-2001 for this format. */
518 printf ("%*s %3o %3o\n",
519 offset_width, byte_num, c0, c1);
520 }
521 else
522 {
523 char s0[5];
524 char s1[5];
525 sprintc (s0, c0);
526 sprintc (s1, c1);
527 printf ("%*s %3o %-4s %3o %s\n",
528 offset_width, byte_num, c0, s0, c1, s1);
529 }
530 }
531 byte_number++;
532 first_diff++;
533 }
534 while (first_diff < smaller);
535 ret = EXIT_FAILURE;
536 break;
537 }
538 }
539
540 if (read0 != read1)
541 {
542 if (comparison_type != type_status)
543 {
544 /* See POSIX 1003.1-2001 for this format. */
545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546 }
547
548 return EXIT_FAILURE;
549 }
550 }
551 while (read0 == buf_size);
552
553 return ret;
554 }
555
556 /* Compare two blocks of memory P0 and P1 until they differ,
557 and count the number of '\n' occurrences in the common
558 part of P0 and P1.
559 If the blocks are not guaranteed to be different, put sentinels at the ends
560 of the blocks before calling this function.
561
562 Return the offset of the first byte that differs.
563 Increment *COUNT by the count of '\n' occurrences. */
564
565 static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
567 {
568 word l; /* One word from first buffer. */
569 word const *l0, *l1; /* Pointers into each buffer. */
570 char const *c0, *c1; /* Pointers for finding exact address. */
571 size_t cnt = 0; /* Number of '\n' occurrences. */
572 word nnnn; /* Newline, sizeof (word) times. */
573 int i;
574
575 nnnn = 0;
576 for (i = 0; i < sizeof nnnn; i++)
577 nnnn = (nnnn << CHAR_BIT) | '\n';
578
579 /* Find the rough position of the first difference by reading words,
580 not bytes. */
581
582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
583 {
584 l ^= nnnn;
585 for (i = 0; i < sizeof l; i++)
586 {
587 unsigned char uc = l;
588 cnt += ! uc;
589 l >>= CHAR_BIT;
590 }
591 }
592
593 /* Find the exact differing position (endianness independent). */
594
595 for (c0 = (char const *) l0, c1 = (char const *) l1;
596 *c0 == *c1;
597 c0++, c1++)
598 cnt += *c0 == '\n';
599
600 *count += cnt;
601 return c0 - (char const *) p0;
602 }
603
604 /* Compare two blocks of memory P0 and P1 until they differ.
605 If the blocks are not guaranteed to be different, put sentinels at the ends
606 of the blocks before calling this function.
607
608 Return the offset of the first byte that differs. */
609
610 static size_t
block_compare(word const * p0,word const * p1)611 block_compare (word const *p0, word const *p1)
612 {
613 word const *l0, *l1;
614 char const *c0, *c1;
615
616 /* Find the rough position of the first difference by reading words,
617 not bytes. */
618
619 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
620 continue;
621
622 /* Find the exact differing position (endianness independent). */
623
624 for (c0 = (char const *) l0, c1 = (char const *) l1;
625 *c0 == *c1;
626 c0++, c1++)
627 continue;
628
629 return c0 - (char const *) p0;
630 }
631
632 /* Put into BUF the unsigned char C, making unprintable bytes
633 visible by quoting like cat -t does. */
634
635 static void
sprintc(char * buf,unsigned char c)636 sprintc (char *buf, unsigned char c)
637 {
638 if (! isprint (c))
639 {
640 if (c >= 128)
641 {
642 *buf++ = 'M';
643 *buf++ = '-';
644 c -= 128;
645 }
646 if (c < 32)
647 {
648 *buf++ = '^';
649 c += 64;
650 }
651 else if (c == 127)
652 {
653 *buf++ = '^';
654 c = '?';
655 }
656 }
657
658 *buf++ = c;
659 *buf = 0;
660 }
661
662 /* Position file F to ignore_initial[F] bytes from its initial position,
663 and yield its new position. Don't try more than once. */
664
665 static off_t
file_position(int f)666 file_position (int f)
667 {
668 static bool positioned[2];
669 static off_t position[2];
670
671 if (! positioned[f])
672 {
673 positioned[f] = true;
674 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675 }
676 return position[f];
677 }
678