xref: /freebsd/contrib/diff/src/cmp.c (revision 18fd37a72c3a7549d2d4f6c6ea00bdcd2bdaca01)
118fd37a7SXin LI /* cmp - compare two files byte by byte
218fd37a7SXin LI 
318fd37a7SXin LI    Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
418fd37a7SXin LI    2002, 2004 Free Software Foundation, Inc.
518fd37a7SXin LI 
618fd37a7SXin LI    This program is free software; you can redistribute it and/or modify
718fd37a7SXin LI    it under the terms of the GNU General Public License as published by
818fd37a7SXin LI    the Free Software Foundation; either version 2, or (at your option)
918fd37a7SXin LI    any later version.
1018fd37a7SXin LI 
1118fd37a7SXin LI    This program is distributed in the hope that it will be useful,
1218fd37a7SXin LI    but WITHOUT ANY WARRANTY; without even the implied warranty of
1318fd37a7SXin LI    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1418fd37a7SXin LI    See the GNU General Public License for more details.
1518fd37a7SXin LI 
1618fd37a7SXin LI    You should have received a copy of the GNU General Public License
1718fd37a7SXin LI    along with this program; see the file COPYING.
1818fd37a7SXin LI    If not, write to the Free Software Foundation,
1918fd37a7SXin LI    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
2018fd37a7SXin LI 
2118fd37a7SXin LI #include "system.h"
2218fd37a7SXin LI #include "paths.h"
2318fd37a7SXin LI 
2418fd37a7SXin LI #include <stdio.h>
2518fd37a7SXin LI 
2618fd37a7SXin LI #include <c-stack.h>
2718fd37a7SXin LI #include <cmpbuf.h>
2818fd37a7SXin LI #include <error.h>
2918fd37a7SXin LI #include <exit.h>
3018fd37a7SXin LI #include <exitfail.h>
3118fd37a7SXin LI #include <file-type.h>
3218fd37a7SXin LI #include <getopt.h>
3318fd37a7SXin LI #include <hard-locale.h>
3418fd37a7SXin LI #include <inttostr.h>
3518fd37a7SXin LI #include <setmode.h>
3618fd37a7SXin LI #include <unlocked-io.h>
3718fd37a7SXin LI #include <version-etc.h>
3818fd37a7SXin LI #include <xalloc.h>
3918fd37a7SXin LI #include <xstrtol.h>
4018fd37a7SXin LI 
4118fd37a7SXin LI #if defined LC_MESSAGES && ENABLE_NLS
4218fd37a7SXin LI # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
4318fd37a7SXin LI #else
4418fd37a7SXin LI # define hard_locale_LC_MESSAGES 0
4518fd37a7SXin LI #endif
4618fd37a7SXin LI 
4718fd37a7SXin LI static int cmp (void);
4818fd37a7SXin LI static off_t file_position (int);
4918fd37a7SXin LI static size_t block_compare (word const *, word const *);
5018fd37a7SXin LI static size_t block_compare_and_count (word const *, word const *, off_t *);
5118fd37a7SXin LI static void sprintc (char *, unsigned char);
5218fd37a7SXin LI 
5318fd37a7SXin LI /* Name under which this program was invoked.  */
5418fd37a7SXin LI char *program_name;
5518fd37a7SXin LI 
5618fd37a7SXin LI /* Filenames of the compared files.  */
5718fd37a7SXin LI static char const *file[2];
5818fd37a7SXin LI 
5918fd37a7SXin LI /* File descriptors of the files.  */
6018fd37a7SXin LI static int file_desc[2];
6118fd37a7SXin LI 
6218fd37a7SXin LI /* Status of the files.  */
6318fd37a7SXin LI static struct stat stat_buf[2];
6418fd37a7SXin LI 
6518fd37a7SXin LI /* Read buffers for the files.  */
6618fd37a7SXin LI static word *buffer[2];
6718fd37a7SXin LI 
6818fd37a7SXin LI /* Optimal block size for the files.  */
6918fd37a7SXin LI static size_t buf_size;
7018fd37a7SXin LI 
7118fd37a7SXin LI /* Initial prefix to ignore for each file.  */
7218fd37a7SXin LI static off_t ignore_initial[2];
7318fd37a7SXin LI 
7418fd37a7SXin LI /* Number of bytes to compare.  */
7518fd37a7SXin LI static uintmax_t bytes = UINTMAX_MAX;
7618fd37a7SXin LI 
7718fd37a7SXin LI /* Output format.  */
7818fd37a7SXin LI static enum comparison_type
7918fd37a7SXin LI   {
8018fd37a7SXin LI     type_first_diff,	/* Print the first difference.  */
8118fd37a7SXin LI     type_all_diffs,	/* Print all differences.  */
8218fd37a7SXin LI     type_status		/* Exit status only.  */
8318fd37a7SXin LI   } comparison_type;
8418fd37a7SXin LI 
8518fd37a7SXin LI /* If nonzero, print values of bytes quoted like cat -t does. */
8618fd37a7SXin LI static bool opt_print_bytes;
8718fd37a7SXin LI 
8818fd37a7SXin LI /* Values for long options that do not have single-letter equivalents.  */
8918fd37a7SXin LI enum
9018fd37a7SXin LI {
9118fd37a7SXin LI   HELP_OPTION = CHAR_MAX + 1
9218fd37a7SXin LI };
9318fd37a7SXin LI 
9418fd37a7SXin LI static struct option const long_options[] =
9518fd37a7SXin LI {
9618fd37a7SXin LI   {"print-bytes", 0, 0, 'b'},
9718fd37a7SXin LI   {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
9818fd37a7SXin LI   {"ignore-initial", 1, 0, 'i'},
9918fd37a7SXin LI   {"verbose", 0, 0, 'l'},
10018fd37a7SXin LI   {"bytes", 1, 0, 'n'},
10118fd37a7SXin LI   {"silent", 0, 0, 's'},
10218fd37a7SXin LI   {"quiet", 0, 0, 's'},
10318fd37a7SXin LI   {"version", 0, 0, 'v'},
10418fd37a7SXin LI   {"help", 0, 0, HELP_OPTION},
10518fd37a7SXin LI   {0, 0, 0, 0}
10618fd37a7SXin LI };
10718fd37a7SXin LI 
10818fd37a7SXin LI static void try_help (char const *, char const *) __attribute__((noreturn));
10918fd37a7SXin LI static void
try_help(char const * reason_msgid,char const * operand)11018fd37a7SXin LI try_help (char const *reason_msgid, char const *operand)
11118fd37a7SXin LI {
11218fd37a7SXin LI   if (reason_msgid)
11318fd37a7SXin LI     error (0, 0, _(reason_msgid), operand);
11418fd37a7SXin LI   error (EXIT_TROUBLE, 0,
11518fd37a7SXin LI 	 _("Try `%s --help' for more information."), program_name);
11618fd37a7SXin LI   abort ();
11718fd37a7SXin LI }
11818fd37a7SXin LI 
11918fd37a7SXin LI static char const valid_suffixes[] = "kKMGTPEZY0";
12018fd37a7SXin LI 
12118fd37a7SXin LI /* Update ignore_initial[F] according to the result of parsing an
12218fd37a7SXin LI    *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
12318fd37a7SXin LI    *after the operand.  If DELIMITER is nonzero, the operand may be
12418fd37a7SXin LI    *followed by DELIMITER; otherwise it must be null-terminated.  */
12518fd37a7SXin LI static void
specify_ignore_initial(int f,char ** argptr,char delimiter)12618fd37a7SXin LI specify_ignore_initial (int f, char **argptr, char delimiter)
12718fd37a7SXin LI {
12818fd37a7SXin LI   uintmax_t val;
12918fd37a7SXin LI   off_t o;
13018fd37a7SXin LI   char const *arg = *argptr;
13118fd37a7SXin LI   strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
13218fd37a7SXin LI   if (! (e == LONGINT_OK
13318fd37a7SXin LI 	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
13418fd37a7SXin LI       || (o = val) < 0 || o != val || val == UINTMAX_MAX)
13518fd37a7SXin LI     try_help ("invalid --ignore-initial value `%s'", arg);
13618fd37a7SXin LI   if (ignore_initial[f] < o)
13718fd37a7SXin LI     ignore_initial[f] = o;
13818fd37a7SXin LI }
13918fd37a7SXin LI 
14018fd37a7SXin LI /* Specify the output format.  */
14118fd37a7SXin LI static void
specify_comparison_type(enum comparison_type t)14218fd37a7SXin LI specify_comparison_type (enum comparison_type t)
14318fd37a7SXin LI {
14418fd37a7SXin LI   if (comparison_type && comparison_type != t)
14518fd37a7SXin LI     try_help ("options -l and -s are incompatible", 0);
14618fd37a7SXin LI   comparison_type = t;
14718fd37a7SXin LI }
14818fd37a7SXin LI 
14918fd37a7SXin LI static void
check_stdout(void)15018fd37a7SXin LI check_stdout (void)
15118fd37a7SXin LI {
15218fd37a7SXin LI   if (ferror (stdout))
15318fd37a7SXin LI     error (EXIT_TROUBLE, 0, "%s", _("write failed"));
15418fd37a7SXin LI   else if (fclose (stdout) != 0)
15518fd37a7SXin LI     error (EXIT_TROUBLE, errno, "%s", _("standard output"));
15618fd37a7SXin LI }
15718fd37a7SXin LI 
15818fd37a7SXin LI static char const * const option_help_msgid[] = {
15918fd37a7SXin LI   N_("-b  --print-bytes  Print differing bytes."),
16018fd37a7SXin LI   N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
16118fd37a7SXin LI   N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
16218fd37a7SXin LI   N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
16318fd37a7SXin LI   N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
16418fd37a7SXin LI   N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
16518fd37a7SXin LI   N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
16618fd37a7SXin LI   N_("-v  --version  Output version info."),
16718fd37a7SXin LI   N_("--help  Output this help."),
16818fd37a7SXin LI   0
16918fd37a7SXin LI };
17018fd37a7SXin LI 
17118fd37a7SXin LI static void
usage(void)17218fd37a7SXin LI usage (void)
17318fd37a7SXin LI {
17418fd37a7SXin LI   char const * const *p;
17518fd37a7SXin LI 
17618fd37a7SXin LI   printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
17718fd37a7SXin LI 	  program_name);
17818fd37a7SXin LI   printf ("%s\n\n", _("Compare two files byte by byte."));
17918fd37a7SXin LI   for (p = option_help_msgid;  *p;  p++)
18018fd37a7SXin LI     printf ("  %s\n", _(*p));
18118fd37a7SXin LI   printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
18218fd37a7SXin LI 	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
18318fd37a7SXin LI 	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
18418fd37a7SXin LI kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
18518fd37a7SXin LI GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
18618fd37a7SXin LI 	  _("If a FILE is `-' or missing, read standard input."),
18718fd37a7SXin LI 	  _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
18818fd37a7SXin LI 	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
18918fd37a7SXin LI }
19018fd37a7SXin LI 
19118fd37a7SXin LI int
main(int argc,char ** argv)19218fd37a7SXin LI main (int argc, char **argv)
19318fd37a7SXin LI {
19418fd37a7SXin LI   int c, f, exit_status;
19518fd37a7SXin LI   size_t words_per_buffer;
19618fd37a7SXin LI 
19718fd37a7SXin LI   exit_failure = EXIT_TROUBLE;
19818fd37a7SXin LI   initialize_main (&argc, &argv);
19918fd37a7SXin LI   program_name = argv[0];
20018fd37a7SXin LI   setlocale (LC_ALL, "");
20118fd37a7SXin LI   bindtextdomain (PACKAGE, LOCALEDIR);
20218fd37a7SXin LI   textdomain (PACKAGE);
20318fd37a7SXin LI   c_stack_action (0);
20418fd37a7SXin LI 
20518fd37a7SXin LI   /* Parse command line options.  */
20618fd37a7SXin LI 
20718fd37a7SXin LI   while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
20818fd37a7SXin LI 	 != -1)
20918fd37a7SXin LI     switch (c)
21018fd37a7SXin LI       {
21118fd37a7SXin LI       case 'b':
21218fd37a7SXin LI       case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
21318fd37a7SXin LI 	opt_print_bytes = true;
21418fd37a7SXin LI 	break;
21518fd37a7SXin LI 
21618fd37a7SXin LI       case 'i':
21718fd37a7SXin LI 	specify_ignore_initial (0, &optarg, ':');
21818fd37a7SXin LI 	if (*optarg++ == ':')
21918fd37a7SXin LI 	  specify_ignore_initial (1, &optarg, 0);
22018fd37a7SXin LI 	else if (ignore_initial[1] < ignore_initial[0])
22118fd37a7SXin LI 	  ignore_initial[1] = ignore_initial[0];
22218fd37a7SXin LI 	break;
22318fd37a7SXin LI 
22418fd37a7SXin LI       case 'l':
22518fd37a7SXin LI 	specify_comparison_type (type_all_diffs);
22618fd37a7SXin LI 	break;
22718fd37a7SXin LI 
22818fd37a7SXin LI       case 'n':
22918fd37a7SXin LI 	{
23018fd37a7SXin LI 	  uintmax_t n;
23118fd37a7SXin LI 	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
23218fd37a7SXin LI 	    try_help ("invalid --bytes value `%s'", optarg);
23318fd37a7SXin LI 	  if (n < bytes)
23418fd37a7SXin LI 	    bytes = n;
23518fd37a7SXin LI 	}
23618fd37a7SXin LI 	break;
23718fd37a7SXin LI 
23818fd37a7SXin LI       case 's':
23918fd37a7SXin LI 	specify_comparison_type (type_status);
24018fd37a7SXin LI 	break;
24118fd37a7SXin LI 
24218fd37a7SXin LI       case 'v':
24318fd37a7SXin LI 	/* TRANSLATORS: Please translate the second "o" in "Torbjorn
24418fd37a7SXin LI 	   Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
24518fd37a7SXin LI 	   WITH DIAERESIS) if possible.  */
24618fd37a7SXin LI 	version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
24718fd37a7SXin LI 		     _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
24818fd37a7SXin LI 	check_stdout ();
24918fd37a7SXin LI 	return EXIT_SUCCESS;
25018fd37a7SXin LI 
25118fd37a7SXin LI       case HELP_OPTION:
25218fd37a7SXin LI 	usage ();
25318fd37a7SXin LI 	check_stdout ();
25418fd37a7SXin LI 	return EXIT_SUCCESS;
25518fd37a7SXin LI 
25618fd37a7SXin LI       default:
25718fd37a7SXin LI 	try_help (0, 0);
25818fd37a7SXin LI       }
25918fd37a7SXin LI 
26018fd37a7SXin LI   if (optind == argc)
26118fd37a7SXin LI     try_help ("missing operand after `%s'", argv[argc - 1]);
26218fd37a7SXin LI 
26318fd37a7SXin LI   file[0] = argv[optind++];
26418fd37a7SXin LI   file[1] = optind < argc ? argv[optind++] : "-";
26518fd37a7SXin LI 
26618fd37a7SXin LI   for (f = 0; f < 2 && optind < argc; f++)
26718fd37a7SXin LI     {
26818fd37a7SXin LI       char *arg = argv[optind++];
26918fd37a7SXin LI       specify_ignore_initial (f, &arg, 0);
27018fd37a7SXin LI     }
27118fd37a7SXin LI 
27218fd37a7SXin LI   if (optind < argc)
27318fd37a7SXin LI     try_help ("extra operand `%s'", argv[optind]);
27418fd37a7SXin LI 
27518fd37a7SXin LI   for (f = 0; f < 2; f++)
27618fd37a7SXin LI     {
27718fd37a7SXin LI       /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
27818fd37a7SXin LI 	 stdin is closed and opening file[0] yields file descriptor 0.  */
27918fd37a7SXin LI       int f1 = f ^ (strcmp (file[1], "-") == 0);
28018fd37a7SXin LI 
28118fd37a7SXin LI       /* Two files with the same name and offset are identical.
28218fd37a7SXin LI 	 But wait until we open the file once, for proper diagnostics.  */
28318fd37a7SXin LI       if (f && ignore_initial[0] == ignore_initial[1]
28418fd37a7SXin LI 	  && file_name_cmp (file[0], file[1]) == 0)
28518fd37a7SXin LI 	return EXIT_SUCCESS;
28618fd37a7SXin LI 
28718fd37a7SXin LI       file_desc[f1] = (strcmp (file[f1], "-") == 0
28818fd37a7SXin LI 		       ? STDIN_FILENO
28918fd37a7SXin LI 		       : open (file[f1], O_RDONLY, 0));
29018fd37a7SXin LI       if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
29118fd37a7SXin LI 	{
29218fd37a7SXin LI 	  if (file_desc[f1] < 0 && comparison_type == type_status)
29318fd37a7SXin LI 	    exit (EXIT_TROUBLE);
29418fd37a7SXin LI 	  else
29518fd37a7SXin LI 	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
29618fd37a7SXin LI 	}
29718fd37a7SXin LI 
29818fd37a7SXin LI       set_binary_mode (file_desc[f1], true);
29918fd37a7SXin LI     }
30018fd37a7SXin LI 
30118fd37a7SXin LI   /* If the files are links to the same inode and have the same file position,
30218fd37a7SXin LI      they are identical.  */
30318fd37a7SXin LI 
30418fd37a7SXin LI   if (0 < same_file (&stat_buf[0], &stat_buf[1])
30518fd37a7SXin LI       && same_file_attributes (&stat_buf[0], &stat_buf[1])
30618fd37a7SXin LI       && file_position (0) == file_position (1))
30718fd37a7SXin LI     return EXIT_SUCCESS;
30818fd37a7SXin LI 
30918fd37a7SXin LI   /* If output is redirected to the null device, we may assume `-s'.  */
31018fd37a7SXin LI 
31118fd37a7SXin LI   if (comparison_type != type_status)
31218fd37a7SXin LI     {
31318fd37a7SXin LI       struct stat outstat, nullstat;
31418fd37a7SXin LI 
31518fd37a7SXin LI       if (fstat (STDOUT_FILENO, &outstat) == 0
31618fd37a7SXin LI 	  && stat (NULL_DEVICE, &nullstat) == 0
31718fd37a7SXin LI 	  && 0 < same_file (&outstat, &nullstat))
31818fd37a7SXin LI 	comparison_type = type_status;
31918fd37a7SXin LI     }
32018fd37a7SXin LI 
32118fd37a7SXin LI   /* If only a return code is needed,
32218fd37a7SXin LI      and if both input descriptors are associated with plain files,
32318fd37a7SXin LI      conclude that the files differ if they have different sizes
32418fd37a7SXin LI      and if more bytes will be compared than are in the smaller file.  */
32518fd37a7SXin LI 
32618fd37a7SXin LI   if (comparison_type == type_status
32718fd37a7SXin LI       && S_ISREG (stat_buf[0].st_mode)
32818fd37a7SXin LI       && S_ISREG (stat_buf[1].st_mode))
32918fd37a7SXin LI     {
33018fd37a7SXin LI       off_t s0 = stat_buf[0].st_size - file_position (0);
33118fd37a7SXin LI       off_t s1 = stat_buf[1].st_size - file_position (1);
33218fd37a7SXin LI       if (s0 < 0)
33318fd37a7SXin LI 	s0 = 0;
33418fd37a7SXin LI       if (s1 < 0)
33518fd37a7SXin LI 	s1 = 0;
33618fd37a7SXin LI       if (s0 != s1 && MIN (s0, s1) < bytes)
33718fd37a7SXin LI 	exit (EXIT_FAILURE);
33818fd37a7SXin LI     }
33918fd37a7SXin LI 
34018fd37a7SXin LI   /* Get the optimal block size of the files.  */
34118fd37a7SXin LI 
34218fd37a7SXin LI   buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
34318fd37a7SXin LI 			 STAT_BLOCKSIZE (stat_buf[1]),
34418fd37a7SXin LI 			 PTRDIFF_MAX - sizeof (word));
34518fd37a7SXin LI 
34618fd37a7SXin LI   /* Allocate word-aligned buffers, with space for sentinels at the end.  */
34718fd37a7SXin LI 
34818fd37a7SXin LI   words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
34918fd37a7SXin LI   buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
35018fd37a7SXin LI   buffer[1] = buffer[0] + words_per_buffer;
35118fd37a7SXin LI 
35218fd37a7SXin LI   exit_status = cmp ();
35318fd37a7SXin LI 
35418fd37a7SXin LI   for (f = 0; f < 2; f++)
35518fd37a7SXin LI     if (close (file_desc[f]) != 0)
35618fd37a7SXin LI       error (EXIT_TROUBLE, errno, "%s", file[f]);
35718fd37a7SXin LI   if (exit_status != 0  &&  comparison_type != type_status)
35818fd37a7SXin LI     check_stdout ();
35918fd37a7SXin LI   exit (exit_status);
36018fd37a7SXin LI   return exit_status;
36118fd37a7SXin LI }
36218fd37a7SXin LI 
36318fd37a7SXin LI /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
36418fd37a7SXin LI    using `buffer[0]' and `buffer[1]'.
36518fd37a7SXin LI    Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
36618fd37a7SXin LI    >1 if error.  */
36718fd37a7SXin LI 
36818fd37a7SXin LI static int
cmp(void)36918fd37a7SXin LI cmp (void)
37018fd37a7SXin LI {
37118fd37a7SXin LI   off_t line_number = 1;	/* Line number (1...) of difference. */
37218fd37a7SXin LI   off_t byte_number = 1;	/* Byte number (1...) of difference. */
37318fd37a7SXin LI   uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
37418fd37a7SXin LI   size_t read0, read1;		/* Number of bytes read from each file. */
37518fd37a7SXin LI   size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
37618fd37a7SXin LI   size_t smaller;		/* The lesser of `read0' and `read1'. */
37718fd37a7SXin LI   word *buffer0 = buffer[0];
37818fd37a7SXin LI   word *buffer1 = buffer[1];
37918fd37a7SXin LI   char *buf0 = (char *) buffer0;
38018fd37a7SXin LI   char *buf1 = (char *) buffer1;
38118fd37a7SXin LI   int ret = EXIT_SUCCESS;
38218fd37a7SXin LI   int f;
38318fd37a7SXin LI   int offset_width;
38418fd37a7SXin LI 
38518fd37a7SXin LI   if (comparison_type == type_all_diffs)
38618fd37a7SXin LI     {
38718fd37a7SXin LI       off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
38818fd37a7SXin LI 
38918fd37a7SXin LI       for (f = 0; f < 2; f++)
39018fd37a7SXin LI 	if (S_ISREG (stat_buf[f].st_mode))
39118fd37a7SXin LI 	  {
39218fd37a7SXin LI 	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
39318fd37a7SXin LI 	    if (file_bytes < byte_number_max)
39418fd37a7SXin LI 	      byte_number_max = file_bytes;
39518fd37a7SXin LI 	  }
39618fd37a7SXin LI 
39718fd37a7SXin LI       for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
39818fd37a7SXin LI 	continue;
39918fd37a7SXin LI     }
40018fd37a7SXin LI 
40118fd37a7SXin LI   for (f = 0; f < 2; f++)
40218fd37a7SXin LI     {
40318fd37a7SXin LI       off_t ig = ignore_initial[f];
40418fd37a7SXin LI       if (ig && file_position (f) == -1)
40518fd37a7SXin LI 	{
40618fd37a7SXin LI 	  /* lseek failed; read and discard the ignored initial prefix.  */
40718fd37a7SXin LI 	  do
40818fd37a7SXin LI 	    {
40918fd37a7SXin LI 	      size_t bytes_to_read = MIN (ig, buf_size);
41018fd37a7SXin LI 	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
41118fd37a7SXin LI 	      if (r != bytes_to_read)
41218fd37a7SXin LI 		{
41318fd37a7SXin LI 		  if (r == SIZE_MAX)
41418fd37a7SXin LI 		    error (EXIT_TROUBLE, errno, "%s", file[f]);
41518fd37a7SXin LI 		  break;
41618fd37a7SXin LI 		}
41718fd37a7SXin LI 	      ig -= r;
41818fd37a7SXin LI 	    }
41918fd37a7SXin LI 	  while (ig);
42018fd37a7SXin LI 	}
42118fd37a7SXin LI     }
42218fd37a7SXin LI 
42318fd37a7SXin LI   do
42418fd37a7SXin LI     {
42518fd37a7SXin LI       size_t bytes_to_read = buf_size;
42618fd37a7SXin LI 
42718fd37a7SXin LI       if (remaining != UINTMAX_MAX)
42818fd37a7SXin LI 	{
42918fd37a7SXin LI 	  if (remaining < bytes_to_read)
43018fd37a7SXin LI 	    bytes_to_read = remaining;
43118fd37a7SXin LI 	  remaining -= bytes_to_read;
43218fd37a7SXin LI 	}
43318fd37a7SXin LI 
43418fd37a7SXin LI       read0 = block_read (file_desc[0], buf0, bytes_to_read);
43518fd37a7SXin LI       if (read0 == SIZE_MAX)
43618fd37a7SXin LI 	error (EXIT_TROUBLE, errno, "%s", file[0]);
43718fd37a7SXin LI       read1 = block_read (file_desc[1], buf1, bytes_to_read);
43818fd37a7SXin LI       if (read1 == SIZE_MAX)
43918fd37a7SXin LI 	error (EXIT_TROUBLE, errno, "%s", file[1]);
44018fd37a7SXin LI 
44118fd37a7SXin LI       /* Insert sentinels for the block compare.  */
44218fd37a7SXin LI 
44318fd37a7SXin LI       buf0[read0] = ~buf1[read0];
44418fd37a7SXin LI       buf1[read1] = ~buf0[read1];
44518fd37a7SXin LI 
44618fd37a7SXin LI       /* If the line number should be written for differing files,
44718fd37a7SXin LI 	 compare the blocks and count the number of newlines
44818fd37a7SXin LI 	 simultaneously.  */
44918fd37a7SXin LI       first_diff = (comparison_type == type_first_diff
45018fd37a7SXin LI 		    ? block_compare_and_count (buffer0, buffer1, &line_number)
45118fd37a7SXin LI 		    : block_compare (buffer0, buffer1));
45218fd37a7SXin LI 
45318fd37a7SXin LI       byte_number += first_diff;
45418fd37a7SXin LI       smaller = MIN (read0, read1);
45518fd37a7SXin LI 
45618fd37a7SXin LI       if (first_diff < smaller)
45718fd37a7SXin LI 	{
45818fd37a7SXin LI 	  switch (comparison_type)
45918fd37a7SXin LI 	    {
46018fd37a7SXin LI 	    case type_first_diff:
46118fd37a7SXin LI 	      {
46218fd37a7SXin LI 		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
46318fd37a7SXin LI 		char line_buf[INT_BUFSIZE_BOUND (off_t)];
46418fd37a7SXin LI 		char const *byte_num = offtostr (byte_number, byte_buf);
46518fd37a7SXin LI 		char const *line_num = offtostr (line_number, line_buf);
46618fd37a7SXin LI 		if (!opt_print_bytes)
46718fd37a7SXin LI 		  {
46818fd37a7SXin LI 		    /* See POSIX 1003.1-2001 for this format.  This
46918fd37a7SXin LI 		       message is used only in the POSIX locale, so it
47018fd37a7SXin LI 		       need not be translated.  */
47118fd37a7SXin LI 		    static char const char_message[] =
47218fd37a7SXin LI 		      "%s %s differ: char %s, line %s\n";
47318fd37a7SXin LI 
47418fd37a7SXin LI 		    /* The POSIX rationale recommends using the word
47518fd37a7SXin LI 		       "byte" outside the POSIX locale.  Some gettext
47618fd37a7SXin LI 		       implementations translate even in the POSIX
47718fd37a7SXin LI 		       locale if certain other environment variables
47818fd37a7SXin LI 		       are set, so use "byte" if a translation is
47918fd37a7SXin LI 		       available, or if outside the POSIX locale.  */
48018fd37a7SXin LI 		    static char const byte_msgid[] =
48118fd37a7SXin LI 		      N_("%s %s differ: byte %s, line %s\n");
48218fd37a7SXin LI 		    char const *byte_message = _(byte_msgid);
48318fd37a7SXin LI 		    bool use_byte_message = (byte_message != byte_msgid
48418fd37a7SXin LI 					     || hard_locale_LC_MESSAGES);
48518fd37a7SXin LI 
48618fd37a7SXin LI 		    printf (use_byte_message ? byte_message : char_message,
48718fd37a7SXin LI 			    file[0], file[1], byte_num, line_num);
48818fd37a7SXin LI 		  }
48918fd37a7SXin LI 		else
49018fd37a7SXin LI 		  {
49118fd37a7SXin LI 		    unsigned char c0 = buf0[first_diff];
49218fd37a7SXin LI 		    unsigned char c1 = buf1[first_diff];
49318fd37a7SXin LI 		    char s0[5];
49418fd37a7SXin LI 		    char s1[5];
49518fd37a7SXin LI 		    sprintc (s0, c0);
49618fd37a7SXin LI 		    sprintc (s1, c1);
49718fd37a7SXin LI 		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
49818fd37a7SXin LI 			    file[0], file[1], byte_num, line_num,
49918fd37a7SXin LI 			    c0, s0, c1, s1);
50018fd37a7SXin LI 		}
50118fd37a7SXin LI 	      }
50218fd37a7SXin LI 	      /* Fall through.  */
50318fd37a7SXin LI 	    case type_status:
50418fd37a7SXin LI 	      return EXIT_FAILURE;
50518fd37a7SXin LI 
50618fd37a7SXin LI 	    case type_all_diffs:
50718fd37a7SXin LI 	      do
50818fd37a7SXin LI 		{
50918fd37a7SXin LI 		  unsigned char c0 = buf0[first_diff];
51018fd37a7SXin LI 		  unsigned char c1 = buf1[first_diff];
51118fd37a7SXin LI 		  if (c0 != c1)
51218fd37a7SXin LI 		    {
51318fd37a7SXin LI 		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
51418fd37a7SXin LI 		      char const *byte_num = offtostr (byte_number, byte_buf);
51518fd37a7SXin LI 		      if (!opt_print_bytes)
51618fd37a7SXin LI 			{
51718fd37a7SXin LI 			  /* See POSIX 1003.1-2001 for this format.  */
51818fd37a7SXin LI 			  printf ("%*s %3o %3o\n",
51918fd37a7SXin LI 				  offset_width, byte_num, c0, c1);
52018fd37a7SXin LI 			}
52118fd37a7SXin LI 		      else
52218fd37a7SXin LI 			{
52318fd37a7SXin LI 			  char s0[5];
52418fd37a7SXin LI 			  char s1[5];
52518fd37a7SXin LI 			  sprintc (s0, c0);
52618fd37a7SXin LI 			  sprintc (s1, c1);
52718fd37a7SXin LI 			  printf ("%*s %3o %-4s %3o %s\n",
52818fd37a7SXin LI 				  offset_width, byte_num, c0, s0, c1, s1);
52918fd37a7SXin LI 			}
53018fd37a7SXin LI 		    }
53118fd37a7SXin LI 		  byte_number++;
53218fd37a7SXin LI 		  first_diff++;
53318fd37a7SXin LI 		}
53418fd37a7SXin LI 	      while (first_diff < smaller);
53518fd37a7SXin LI 	      ret = EXIT_FAILURE;
53618fd37a7SXin LI 	      break;
53718fd37a7SXin LI 	    }
53818fd37a7SXin LI 	}
53918fd37a7SXin LI 
54018fd37a7SXin LI       if (read0 != read1)
54118fd37a7SXin LI 	{
54218fd37a7SXin LI 	  if (comparison_type != type_status)
54318fd37a7SXin LI 	    {
54418fd37a7SXin LI 	      /* See POSIX 1003.1-2001 for this format.  */
54518fd37a7SXin LI 	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
54618fd37a7SXin LI 	    }
54718fd37a7SXin LI 
54818fd37a7SXin LI 	  return EXIT_FAILURE;
54918fd37a7SXin LI 	}
55018fd37a7SXin LI     }
55118fd37a7SXin LI   while (read0 == buf_size);
55218fd37a7SXin LI 
55318fd37a7SXin LI   return ret;
55418fd37a7SXin LI }
55518fd37a7SXin LI 
55618fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ,
55718fd37a7SXin LI    and count the number of '\n' occurrences in the common
55818fd37a7SXin LI    part of P0 and P1.
55918fd37a7SXin LI    If the blocks are not guaranteed to be different, put sentinels at the ends
56018fd37a7SXin LI    of the blocks before calling this function.
56118fd37a7SXin LI 
56218fd37a7SXin LI    Return the offset of the first byte that differs.
56318fd37a7SXin LI    Increment *COUNT by the count of '\n' occurrences.  */
56418fd37a7SXin LI 
56518fd37a7SXin LI static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)56618fd37a7SXin LI block_compare_and_count (word const *p0, word const *p1, off_t *count)
56718fd37a7SXin LI {
56818fd37a7SXin LI   word l;		/* One word from first buffer. */
56918fd37a7SXin LI   word const *l0, *l1;	/* Pointers into each buffer. */
57018fd37a7SXin LI   char const *c0, *c1;	/* Pointers for finding exact address. */
57118fd37a7SXin LI   size_t cnt = 0;	/* Number of '\n' occurrences. */
57218fd37a7SXin LI   word nnnn;		/* Newline, sizeof (word) times.  */
57318fd37a7SXin LI   int i;
57418fd37a7SXin LI 
57518fd37a7SXin LI   nnnn = 0;
57618fd37a7SXin LI   for (i = 0; i < sizeof nnnn; i++)
57718fd37a7SXin LI     nnnn = (nnnn << CHAR_BIT) | '\n';
57818fd37a7SXin LI 
57918fd37a7SXin LI   /* Find the rough position of the first difference by reading words,
58018fd37a7SXin LI      not bytes.  */
58118fd37a7SXin LI 
58218fd37a7SXin LI   for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
58318fd37a7SXin LI     {
58418fd37a7SXin LI       l ^= nnnn;
58518fd37a7SXin LI       for (i = 0; i < sizeof l; i++)
58618fd37a7SXin LI 	{
58718fd37a7SXin LI 	  unsigned char uc = l;
58818fd37a7SXin LI 	  cnt += ! uc;
58918fd37a7SXin LI 	  l >>= CHAR_BIT;
59018fd37a7SXin LI 	}
59118fd37a7SXin LI     }
59218fd37a7SXin LI 
59318fd37a7SXin LI   /* Find the exact differing position (endianness independent).  */
59418fd37a7SXin LI 
59518fd37a7SXin LI   for (c0 = (char const *) l0, c1 = (char const *) l1;
59618fd37a7SXin LI        *c0 == *c1;
59718fd37a7SXin LI        c0++, c1++)
59818fd37a7SXin LI     cnt += *c0 == '\n';
59918fd37a7SXin LI 
60018fd37a7SXin LI   *count += cnt;
60118fd37a7SXin LI   return c0 - (char const *) p0;
60218fd37a7SXin LI }
60318fd37a7SXin LI 
60418fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ.
60518fd37a7SXin LI    If the blocks are not guaranteed to be different, put sentinels at the ends
60618fd37a7SXin LI    of the blocks before calling this function.
60718fd37a7SXin LI 
60818fd37a7SXin LI    Return the offset of the first byte that differs.  */
60918fd37a7SXin LI 
61018fd37a7SXin LI static size_t
block_compare(word const * p0,word const * p1)61118fd37a7SXin LI block_compare (word const *p0, word const *p1)
61218fd37a7SXin LI {
61318fd37a7SXin LI   word const *l0, *l1;
61418fd37a7SXin LI   char const *c0, *c1;
61518fd37a7SXin LI 
61618fd37a7SXin LI   /* Find the rough position of the first difference by reading words,
61718fd37a7SXin LI      not bytes.  */
61818fd37a7SXin LI 
61918fd37a7SXin LI   for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
62018fd37a7SXin LI     continue;
62118fd37a7SXin LI 
62218fd37a7SXin LI   /* Find the exact differing position (endianness independent).  */
62318fd37a7SXin LI 
62418fd37a7SXin LI   for (c0 = (char const *) l0, c1 = (char const *) l1;
62518fd37a7SXin LI        *c0 == *c1;
62618fd37a7SXin LI        c0++, c1++)
62718fd37a7SXin LI     continue;
62818fd37a7SXin LI 
62918fd37a7SXin LI   return c0 - (char const *) p0;
63018fd37a7SXin LI }
63118fd37a7SXin LI 
63218fd37a7SXin LI /* Put into BUF the unsigned char C, making unprintable bytes
63318fd37a7SXin LI    visible by quoting like cat -t does.  */
63418fd37a7SXin LI 
63518fd37a7SXin LI static void
sprintc(char * buf,unsigned char c)63618fd37a7SXin LI sprintc (char *buf, unsigned char c)
63718fd37a7SXin LI {
63818fd37a7SXin LI   if (! isprint (c))
63918fd37a7SXin LI     {
64018fd37a7SXin LI       if (c >= 128)
64118fd37a7SXin LI 	{
64218fd37a7SXin LI 	  *buf++ = 'M';
64318fd37a7SXin LI 	  *buf++ = '-';
64418fd37a7SXin LI 	  c -= 128;
64518fd37a7SXin LI 	}
64618fd37a7SXin LI       if (c < 32)
64718fd37a7SXin LI 	{
64818fd37a7SXin LI 	  *buf++ = '^';
64918fd37a7SXin LI 	  c += 64;
65018fd37a7SXin LI 	}
65118fd37a7SXin LI       else if (c == 127)
65218fd37a7SXin LI 	{
65318fd37a7SXin LI 	  *buf++ = '^';
65418fd37a7SXin LI 	  c = '?';
65518fd37a7SXin LI 	}
65618fd37a7SXin LI     }
65718fd37a7SXin LI 
65818fd37a7SXin LI   *buf++ = c;
65918fd37a7SXin LI   *buf = 0;
66018fd37a7SXin LI }
66118fd37a7SXin LI 
66218fd37a7SXin LI /* Position file F to ignore_initial[F] bytes from its initial position,
66318fd37a7SXin LI    and yield its new position.  Don't try more than once.  */
66418fd37a7SXin LI 
66518fd37a7SXin LI static off_t
file_position(int f)66618fd37a7SXin LI file_position (int f)
66718fd37a7SXin LI {
66818fd37a7SXin LI   static bool positioned[2];
66918fd37a7SXin LI   static off_t position[2];
67018fd37a7SXin LI 
67118fd37a7SXin LI   if (! positioned[f])
67218fd37a7SXin LI     {
67318fd37a7SXin LI       positioned[f] = true;
67418fd37a7SXin LI       position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
67518fd37a7SXin LI     }
67618fd37a7SXin LI   return position[f];
67718fd37a7SXin LI }
678