xref: /freebsd/contrib/diff/src/cmp.c (revision 28f6c2f292806bf31230a959bc4b19d7081669a7)
1 /* cmp - compare two files byte by byte
2 
3    Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4    2002, 2004 Free Software Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14    See the GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; see the file COPYING.
18    If not, write to the Free Software Foundation,
19    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
20 
21 #include "system.h"
22 #include "paths.h"
23 
24 #include <stdio.h>
25 
26 #include <c-stack.h>
27 #include <cmpbuf.h>
28 #include <error.h>
29 #include <exit.h>
30 #include <exitfail.h>
31 #include <file-type.h>
32 #include <getopt.h>
33 #include <hard-locale.h>
34 #include <inttostr.h>
35 #include <setmode.h>
36 #include <unlocked-io.h>
37 #include <version-etc.h>
38 #include <xalloc.h>
39 #include <xstrtol.h>
40 
41 #if defined LC_MESSAGES && ENABLE_NLS
42 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
43 #else
44 # define hard_locale_LC_MESSAGES 0
45 #endif
46 
47 static int cmp (void);
48 static off_t file_position (int);
49 static size_t block_compare (word const *, word const *);
50 static size_t block_compare_and_count (word const *, word const *, off_t *);
51 static void sprintc (char *, unsigned char);
52 
53 /* Name under which this program was invoked.  */
54 char *program_name;
55 
56 /* Filenames of the compared files.  */
57 static char const *file[2];
58 
59 /* File descriptors of the files.  */
60 static int file_desc[2];
61 
62 /* Status of the files.  */
63 static struct stat stat_buf[2];
64 
65 /* Read buffers for the files.  */
66 static word *buffer[2];
67 
68 /* Optimal block size for the files.  */
69 static size_t buf_size;
70 
71 /* Initial prefix to ignore for each file.  */
72 static off_t ignore_initial[2];
73 
74 /* Number of bytes to compare.  */
75 static uintmax_t bytes = UINTMAX_MAX;
76 
77 /* Output format.  */
78 static enum comparison_type
79   {
80     type_first_diff,	/* Print the first difference.  */
81     type_all_diffs,	/* Print all differences.  */
82     type_status		/* Exit status only.  */
83   } comparison_type;
84 
85 /* If nonzero, print values of bytes quoted like cat -t does. */
86 static bool opt_print_bytes;
87 
88 /* Values for long options that do not have single-letter equivalents.  */
89 enum
90 {
91   HELP_OPTION = CHAR_MAX + 1
92 };
93 
94 static struct option const long_options[] =
95 {
96   {"print-bytes", 0, 0, 'b'},
97   {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98   {"ignore-initial", 1, 0, 'i'},
99   {"verbose", 0, 0, 'l'},
100   {"bytes", 1, 0, 'n'},
101   {"silent", 0, 0, 's'},
102   {"quiet", 0, 0, 's'},
103   {"version", 0, 0, 'v'},
104   {"help", 0, 0, HELP_OPTION},
105   {0, 0, 0, 0}
106 };
107 
108 static void try_help (char const *, char const *) __attribute__((noreturn));
109 static void
110 try_help (char const *reason_msgid, char const *operand)
111 {
112   if (reason_msgid)
113     error (0, 0, _(reason_msgid), operand);
114   error (EXIT_TROUBLE, 0,
115 	 _("Try `%s --help' for more information."), program_name);
116   abort ();
117 }
118 
119 static char const valid_suffixes[] = "kKMGTPEZY0";
120 
121 /* Update ignore_initial[F] according to the result of parsing an
122    *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123    *after the operand.  If DELIMITER is nonzero, the operand may be
124    *followed by DELIMITER; otherwise it must be null-terminated.  */
125 static void
126 specify_ignore_initial (int f, char **argptr, char delimiter)
127 {
128   uintmax_t val;
129   off_t o;
130   char const *arg = *argptr;
131   strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132   if (! (e == LONGINT_OK
133 	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134       || (o = val) < 0 || o != val || val == UINTMAX_MAX)
135     try_help ("invalid --ignore-initial value `%s'", arg);
136   if (ignore_initial[f] < o)
137     ignore_initial[f] = o;
138 }
139 
140 /* Specify the output format.  */
141 static void
142 specify_comparison_type (enum comparison_type t)
143 {
144   if (comparison_type && comparison_type != t)
145     try_help ("options -l and -s are incompatible", 0);
146   comparison_type = t;
147 }
148 
149 static void
150 check_stdout (void)
151 {
152   if (ferror (stdout))
153     error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154   else if (fclose (stdout) != 0)
155     error (EXIT_TROUBLE, errno, "%s", _("standard output"));
156 }
157 
158 static char const * const option_help_msgid[] = {
159   N_("-b  --print-bytes  Print differing bytes."),
160   N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
161   N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
162   N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163   N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
164   N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
165   N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
166   N_("-v  --version  Output version info."),
167   N_("--help  Output this help."),
168   0
169 };
170 
171 static void
172 usage (void)
173 {
174   char const * const *p;
175 
176   printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
177 	  program_name);
178   printf ("%s\n\n", _("Compare two files byte by byte."));
179   for (p = option_help_msgid;  *p;  p++)
180     printf ("  %s\n", _(*p));
181   printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182 	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183 	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
184 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186 	  _("If a FILE is `-' or missing, read standard input."),
187 	  _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188 	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
189 }
190 
191 int
192 main (int argc, char **argv)
193 {
194   int c, f, exit_status;
195   size_t words_per_buffer;
196 
197   exit_failure = EXIT_TROUBLE;
198   initialize_main (&argc, &argv);
199   program_name = argv[0];
200   setlocale (LC_ALL, "");
201   bindtextdomain (PACKAGE, LOCALEDIR);
202   textdomain (PACKAGE);
203   c_stack_action (0);
204 
205   /* Parse command line options.  */
206 
207   while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208 	 != -1)
209     switch (c)
210       {
211       case 'b':
212       case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213 	opt_print_bytes = true;
214 	break;
215 
216       case 'i':
217 	specify_ignore_initial (0, &optarg, ':');
218 	if (*optarg++ == ':')
219 	  specify_ignore_initial (1, &optarg, 0);
220 	else if (ignore_initial[1] < ignore_initial[0])
221 	  ignore_initial[1] = ignore_initial[0];
222 	break;
223 
224       case 'l':
225 	specify_comparison_type (type_all_diffs);
226 	break;
227 
228       case 'n':
229 	{
230 	  uintmax_t n;
231 	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232 	    try_help ("invalid --bytes value `%s'", optarg);
233 	  if (n < bytes)
234 	    bytes = n;
235 	}
236 	break;
237 
238       case 's':
239 	specify_comparison_type (type_status);
240 	break;
241 
242       case 'v':
243 	/* TRANSLATORS: Please translate the second "o" in "Torbjorn
244 	   Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245 	   WITH DIAERESIS) if possible.  */
246 	version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247 		     _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
248 	check_stdout ();
249 	return EXIT_SUCCESS;
250 
251       case HELP_OPTION:
252 	usage ();
253 	check_stdout ();
254 	return EXIT_SUCCESS;
255 
256       default:
257 	try_help (0, 0);
258       }
259 
260   if (optind == argc)
261     try_help ("missing operand after `%s'", argv[argc - 1]);
262 
263   file[0] = argv[optind++];
264   file[1] = optind < argc ? argv[optind++] : "-";
265 
266   for (f = 0; f < 2 && optind < argc; f++)
267     {
268       char *arg = argv[optind++];
269       specify_ignore_initial (f, &arg, 0);
270     }
271 
272   if (optind < argc)
273     try_help ("extra operand `%s'", argv[optind]);
274 
275   for (f = 0; f < 2; f++)
276     {
277       /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278 	 stdin is closed and opening file[0] yields file descriptor 0.  */
279       int f1 = f ^ (strcmp (file[1], "-") == 0);
280 
281       /* Two files with the same name and offset are identical.
282 	 But wait until we open the file once, for proper diagnostics.  */
283       if (f && ignore_initial[0] == ignore_initial[1]
284 	  && file_name_cmp (file[0], file[1]) == 0)
285 	return EXIT_SUCCESS;
286 
287       file_desc[f1] = (strcmp (file[f1], "-") == 0
288 		       ? STDIN_FILENO
289 		       : open (file[f1], O_RDONLY, 0));
290       if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291 	{
292 	  if (file_desc[f1] < 0 && comparison_type == type_status)
293 	    exit (EXIT_TROUBLE);
294 	  else
295 	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
296 	}
297 
298       set_binary_mode (file_desc[f1], true);
299     }
300 
301   /* If the files are links to the same inode and have the same file position,
302      they are identical.  */
303 
304   if (0 < same_file (&stat_buf[0], &stat_buf[1])
305       && same_file_attributes (&stat_buf[0], &stat_buf[1])
306       && file_position (0) == file_position (1))
307     return EXIT_SUCCESS;
308 
309   /* If output is redirected to the null device, we may assume `-s'.  */
310 
311   if (comparison_type != type_status)
312     {
313       struct stat outstat, nullstat;
314 
315       if (fstat (STDOUT_FILENO, &outstat) == 0
316 	  && stat (NULL_DEVICE, &nullstat) == 0
317 	  && 0 < same_file (&outstat, &nullstat))
318 	comparison_type = type_status;
319     }
320 
321   /* If only a return code is needed,
322      and if both input descriptors are associated with plain files,
323      conclude that the files differ if they have different sizes
324      and if more bytes will be compared than are in the smaller file.  */
325 
326   if (comparison_type == type_status
327       && S_ISREG (stat_buf[0].st_mode)
328       && S_ISREG (stat_buf[1].st_mode))
329     {
330       off_t s0 = stat_buf[0].st_size - file_position (0);
331       off_t s1 = stat_buf[1].st_size - file_position (1);
332       if (s0 < 0)
333 	s0 = 0;
334       if (s1 < 0)
335 	s1 = 0;
336       if (s0 != s1 && MIN (s0, s1) < bytes)
337 	exit (EXIT_FAILURE);
338     }
339 
340   /* Get the optimal block size of the files.  */
341 
342   buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343 			 STAT_BLOCKSIZE (stat_buf[1]),
344 			 PTRDIFF_MAX - sizeof (word));
345 
346   /* Allocate word-aligned buffers, with space for sentinels at the end.  */
347 
348   words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349   buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350   buffer[1] = buffer[0] + words_per_buffer;
351 
352   exit_status = cmp ();
353 
354   for (f = 0; f < 2; f++)
355     if (close (file_desc[f]) != 0)
356       error (EXIT_TROUBLE, errno, "%s", file[f]);
357   if (exit_status != 0  &&  comparison_type != type_status)
358     check_stdout ();
359   exit (exit_status);
360   return exit_status;
361 }
362 
363 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364    using `buffer[0]' and `buffer[1]'.
365    Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366    >1 if error.  */
367 
368 static int
369 cmp (void)
370 {
371   off_t line_number = 1;	/* Line number (1...) of difference. */
372   off_t byte_number = 1;	/* Byte number (1...) of difference. */
373   uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
374   size_t read0, read1;		/* Number of bytes read from each file. */
375   size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
376   size_t smaller;		/* The lesser of `read0' and `read1'. */
377   word *buffer0 = buffer[0];
378   word *buffer1 = buffer[1];
379   char *buf0 = (char *) buffer0;
380   char *buf1 = (char *) buffer1;
381   int ret = EXIT_SUCCESS;
382   int f;
383   int offset_width;
384 
385   if (comparison_type == type_all_diffs)
386     {
387       off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388 
389       for (f = 0; f < 2; f++)
390 	if (S_ISREG (stat_buf[f].st_mode))
391 	  {
392 	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
393 	    if (file_bytes < byte_number_max)
394 	      byte_number_max = file_bytes;
395 	  }
396 
397       for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398 	continue;
399     }
400 
401   for (f = 0; f < 2; f++)
402     {
403       off_t ig = ignore_initial[f];
404       if (ig && file_position (f) == -1)
405 	{
406 	  /* lseek failed; read and discard the ignored initial prefix.  */
407 	  do
408 	    {
409 	      size_t bytes_to_read = MIN (ig, buf_size);
410 	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411 	      if (r != bytes_to_read)
412 		{
413 		  if (r == SIZE_MAX)
414 		    error (EXIT_TROUBLE, errno, "%s", file[f]);
415 		  break;
416 		}
417 	      ig -= r;
418 	    }
419 	  while (ig);
420 	}
421     }
422 
423   do
424     {
425       size_t bytes_to_read = buf_size;
426 
427       if (remaining != UINTMAX_MAX)
428 	{
429 	  if (remaining < bytes_to_read)
430 	    bytes_to_read = remaining;
431 	  remaining -= bytes_to_read;
432 	}
433 
434       read0 = block_read (file_desc[0], buf0, bytes_to_read);
435       if (read0 == SIZE_MAX)
436 	error (EXIT_TROUBLE, errno, "%s", file[0]);
437       read1 = block_read (file_desc[1], buf1, bytes_to_read);
438       if (read1 == SIZE_MAX)
439 	error (EXIT_TROUBLE, errno, "%s", file[1]);
440 
441       /* Insert sentinels for the block compare.  */
442 
443       buf0[read0] = ~buf1[read0];
444       buf1[read1] = ~buf0[read1];
445 
446       /* If the line number should be written for differing files,
447 	 compare the blocks and count the number of newlines
448 	 simultaneously.  */
449       first_diff = (comparison_type == type_first_diff
450 		    ? block_compare_and_count (buffer0, buffer1, &line_number)
451 		    : block_compare (buffer0, buffer1));
452 
453       byte_number += first_diff;
454       smaller = MIN (read0, read1);
455 
456       if (first_diff < smaller)
457 	{
458 	  switch (comparison_type)
459 	    {
460 	    case type_first_diff:
461 	      {
462 		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463 		char line_buf[INT_BUFSIZE_BOUND (off_t)];
464 		char const *byte_num = offtostr (byte_number, byte_buf);
465 		char const *line_num = offtostr (line_number, line_buf);
466 		if (!opt_print_bytes)
467 		  {
468 		    /* See POSIX 1003.1-2001 for this format.  This
469 		       message is used only in the POSIX locale, so it
470 		       need not be translated.  */
471 		    static char const char_message[] =
472 		      "%s %s differ: char %s, line %s\n";
473 
474 		    /* The POSIX rationale recommends using the word
475 		       "byte" outside the POSIX locale.  Some gettext
476 		       implementations translate even in the POSIX
477 		       locale if certain other environment variables
478 		       are set, so use "byte" if a translation is
479 		       available, or if outside the POSIX locale.  */
480 		    static char const byte_msgid[] =
481 		      N_("%s %s differ: byte %s, line %s\n");
482 		    char const *byte_message = _(byte_msgid);
483 		    bool use_byte_message = (byte_message != byte_msgid
484 					     || hard_locale_LC_MESSAGES);
485 
486 		    printf (use_byte_message ? byte_message : char_message,
487 			    file[0], file[1], byte_num, line_num);
488 		  }
489 		else
490 		  {
491 		    unsigned char c0 = buf0[first_diff];
492 		    unsigned char c1 = buf1[first_diff];
493 		    char s0[5];
494 		    char s1[5];
495 		    sprintc (s0, c0);
496 		    sprintc (s1, c1);
497 		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 			    file[0], file[1], byte_num, line_num,
499 			    c0, s0, c1, s1);
500 		}
501 	      }
502 	      /* Fall through.  */
503 	    case type_status:
504 	      return EXIT_FAILURE;
505 
506 	    case type_all_diffs:
507 	      do
508 		{
509 		  unsigned char c0 = buf0[first_diff];
510 		  unsigned char c1 = buf1[first_diff];
511 		  if (c0 != c1)
512 		    {
513 		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 		      char const *byte_num = offtostr (byte_number, byte_buf);
515 		      if (!opt_print_bytes)
516 			{
517 			  /* See POSIX 1003.1-2001 for this format.  */
518 			  printf ("%*s %3o %3o\n",
519 				  offset_width, byte_num, c0, c1);
520 			}
521 		      else
522 			{
523 			  char s0[5];
524 			  char s1[5];
525 			  sprintc (s0, c0);
526 			  sprintc (s1, c1);
527 			  printf ("%*s %3o %-4s %3o %s\n",
528 				  offset_width, byte_num, c0, s0, c1, s1);
529 			}
530 		    }
531 		  byte_number++;
532 		  first_diff++;
533 		}
534 	      while (first_diff < smaller);
535 	      ret = EXIT_FAILURE;
536 	      break;
537 	    }
538 	}
539 
540       if (read0 != read1)
541 	{
542 	  if (comparison_type != type_status)
543 	    {
544 	      /* See POSIX 1003.1-2001 for this format.  */
545 	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546 	    }
547 
548 	  return EXIT_FAILURE;
549 	}
550     }
551   while (read0 == buf_size);
552 
553   return ret;
554 }
555 
556 /* Compare two blocks of memory P0 and P1 until they differ,
557    and count the number of '\n' occurrences in the common
558    part of P0 and P1.
559    If the blocks are not guaranteed to be different, put sentinels at the ends
560    of the blocks before calling this function.
561 
562    Return the offset of the first byte that differs.
563    Increment *COUNT by the count of '\n' occurrences.  */
564 
565 static size_t
566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
567 {
568   word l;		/* One word from first buffer. */
569   word const *l0, *l1;	/* Pointers into each buffer. */
570   char const *c0, *c1;	/* Pointers for finding exact address. */
571   size_t cnt = 0;	/* Number of '\n' occurrences. */
572   word nnnn;		/* Newline, sizeof (word) times.  */
573   int i;
574 
575   nnnn = 0;
576   for (i = 0; i < sizeof nnnn; i++)
577     nnnn = (nnnn << CHAR_BIT) | '\n';
578 
579   /* Find the rough position of the first difference by reading words,
580      not bytes.  */
581 
582   for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
583     {
584       l ^= nnnn;
585       for (i = 0; i < sizeof l; i++)
586 	{
587 	  unsigned char uc = l;
588 	  cnt += ! uc;
589 	  l >>= CHAR_BIT;
590 	}
591     }
592 
593   /* Find the exact differing position (endianness independent).  */
594 
595   for (c0 = (char const *) l0, c1 = (char const *) l1;
596        *c0 == *c1;
597        c0++, c1++)
598     cnt += *c0 == '\n';
599 
600   *count += cnt;
601   return c0 - (char const *) p0;
602 }
603 
604 /* Compare two blocks of memory P0 and P1 until they differ.
605    If the blocks are not guaranteed to be different, put sentinels at the ends
606    of the blocks before calling this function.
607 
608    Return the offset of the first byte that differs.  */
609 
610 static size_t
611 block_compare (word const *p0, word const *p1)
612 {
613   word const *l0, *l1;
614   char const *c0, *c1;
615 
616   /* Find the rough position of the first difference by reading words,
617      not bytes.  */
618 
619   for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
620     continue;
621 
622   /* Find the exact differing position (endianness independent).  */
623 
624   for (c0 = (char const *) l0, c1 = (char const *) l1;
625        *c0 == *c1;
626        c0++, c1++)
627     continue;
628 
629   return c0 - (char const *) p0;
630 }
631 
632 /* Put into BUF the unsigned char C, making unprintable bytes
633    visible by quoting like cat -t does.  */
634 
635 static void
636 sprintc (char *buf, unsigned char c)
637 {
638   if (! isprint (c))
639     {
640       if (c >= 128)
641 	{
642 	  *buf++ = 'M';
643 	  *buf++ = '-';
644 	  c -= 128;
645 	}
646       if (c < 32)
647 	{
648 	  *buf++ = '^';
649 	  c += 64;
650 	}
651       else if (c == 127)
652 	{
653 	  *buf++ = '^';
654 	  c = '?';
655 	}
656     }
657 
658   *buf++ = c;
659   *buf = 0;
660 }
661 
662 /* Position file F to ignore_initial[F] bytes from its initial position,
663    and yield its new position.  Don't try more than once.  */
664 
665 static off_t
666 file_position (int f)
667 {
668   static bool positioned[2];
669   static off_t position[2];
670 
671   if (! positioned[f])
672     {
673       positioned[f] = true;
674       position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
675     }
676   return position[f];
677 }
678