118fd37a7SXin LI /* cmp - compare two files byte by byte
218fd37a7SXin LI
318fd37a7SXin LI Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
418fd37a7SXin LI 2002, 2004 Free Software Foundation, Inc.
518fd37a7SXin LI
618fd37a7SXin LI This program is free software; you can redistribute it and/or modify
718fd37a7SXin LI it under the terms of the GNU General Public License as published by
818fd37a7SXin LI the Free Software Foundation; either version 2, or (at your option)
918fd37a7SXin LI any later version.
1018fd37a7SXin LI
1118fd37a7SXin LI This program is distributed in the hope that it will be useful,
1218fd37a7SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of
1318fd37a7SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1418fd37a7SXin LI See the GNU General Public License for more details.
1518fd37a7SXin LI
1618fd37a7SXin LI You should have received a copy of the GNU General Public License
1718fd37a7SXin LI along with this program; see the file COPYING.
1818fd37a7SXin LI If not, write to the Free Software Foundation,
1918fd37a7SXin LI 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
2018fd37a7SXin LI
2118fd37a7SXin LI #include "system.h"
2218fd37a7SXin LI #include "paths.h"
2318fd37a7SXin LI
2418fd37a7SXin LI #include <stdio.h>
2518fd37a7SXin LI
2618fd37a7SXin LI #include <c-stack.h>
2718fd37a7SXin LI #include <cmpbuf.h>
2818fd37a7SXin LI #include <error.h>
2918fd37a7SXin LI #include <exit.h>
3018fd37a7SXin LI #include <exitfail.h>
3118fd37a7SXin LI #include <file-type.h>
3218fd37a7SXin LI #include <getopt.h>
3318fd37a7SXin LI #include <hard-locale.h>
3418fd37a7SXin LI #include <inttostr.h>
3518fd37a7SXin LI #include <setmode.h>
3618fd37a7SXin LI #include <unlocked-io.h>
3718fd37a7SXin LI #include <version-etc.h>
3818fd37a7SXin LI #include <xalloc.h>
3918fd37a7SXin LI #include <xstrtol.h>
4018fd37a7SXin LI
4118fd37a7SXin LI #if defined LC_MESSAGES && ENABLE_NLS
4218fd37a7SXin LI # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
4318fd37a7SXin LI #else
4418fd37a7SXin LI # define hard_locale_LC_MESSAGES 0
4518fd37a7SXin LI #endif
4618fd37a7SXin LI
4718fd37a7SXin LI static int cmp (void);
4818fd37a7SXin LI static off_t file_position (int);
4918fd37a7SXin LI static size_t block_compare (word const *, word const *);
5018fd37a7SXin LI static size_t block_compare_and_count (word const *, word const *, off_t *);
5118fd37a7SXin LI static void sprintc (char *, unsigned char);
5218fd37a7SXin LI
5318fd37a7SXin LI /* Name under which this program was invoked. */
5418fd37a7SXin LI char *program_name;
5518fd37a7SXin LI
5618fd37a7SXin LI /* Filenames of the compared files. */
5718fd37a7SXin LI static char const *file[2];
5818fd37a7SXin LI
5918fd37a7SXin LI /* File descriptors of the files. */
6018fd37a7SXin LI static int file_desc[2];
6118fd37a7SXin LI
6218fd37a7SXin LI /* Status of the files. */
6318fd37a7SXin LI static struct stat stat_buf[2];
6418fd37a7SXin LI
6518fd37a7SXin LI /* Read buffers for the files. */
6618fd37a7SXin LI static word *buffer[2];
6718fd37a7SXin LI
6818fd37a7SXin LI /* Optimal block size for the files. */
6918fd37a7SXin LI static size_t buf_size;
7018fd37a7SXin LI
7118fd37a7SXin LI /* Initial prefix to ignore for each file. */
7218fd37a7SXin LI static off_t ignore_initial[2];
7318fd37a7SXin LI
7418fd37a7SXin LI /* Number of bytes to compare. */
7518fd37a7SXin LI static uintmax_t bytes = UINTMAX_MAX;
7618fd37a7SXin LI
7718fd37a7SXin LI /* Output format. */
7818fd37a7SXin LI static enum comparison_type
7918fd37a7SXin LI {
8018fd37a7SXin LI type_first_diff, /* Print the first difference. */
8118fd37a7SXin LI type_all_diffs, /* Print all differences. */
8218fd37a7SXin LI type_status /* Exit status only. */
8318fd37a7SXin LI } comparison_type;
8418fd37a7SXin LI
8518fd37a7SXin LI /* If nonzero, print values of bytes quoted like cat -t does. */
8618fd37a7SXin LI static bool opt_print_bytes;
8718fd37a7SXin LI
8818fd37a7SXin LI /* Values for long options that do not have single-letter equivalents. */
8918fd37a7SXin LI enum
9018fd37a7SXin LI {
9118fd37a7SXin LI HELP_OPTION = CHAR_MAX + 1
9218fd37a7SXin LI };
9318fd37a7SXin LI
9418fd37a7SXin LI static struct option const long_options[] =
9518fd37a7SXin LI {
9618fd37a7SXin LI {"print-bytes", 0, 0, 'b'},
9718fd37a7SXin LI {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
9818fd37a7SXin LI {"ignore-initial", 1, 0, 'i'},
9918fd37a7SXin LI {"verbose", 0, 0, 'l'},
10018fd37a7SXin LI {"bytes", 1, 0, 'n'},
10118fd37a7SXin LI {"silent", 0, 0, 's'},
10218fd37a7SXin LI {"quiet", 0, 0, 's'},
10318fd37a7SXin LI {"version", 0, 0, 'v'},
10418fd37a7SXin LI {"help", 0, 0, HELP_OPTION},
10518fd37a7SXin LI {0, 0, 0, 0}
10618fd37a7SXin LI };
10718fd37a7SXin LI
10818fd37a7SXin LI static void try_help (char const *, char const *) __attribute__((noreturn));
10918fd37a7SXin LI static void
try_help(char const * reason_msgid,char const * operand)11018fd37a7SXin LI try_help (char const *reason_msgid, char const *operand)
11118fd37a7SXin LI {
11218fd37a7SXin LI if (reason_msgid)
11318fd37a7SXin LI error (0, 0, _(reason_msgid), operand);
11418fd37a7SXin LI error (EXIT_TROUBLE, 0,
11518fd37a7SXin LI _("Try `%s --help' for more information."), program_name);
11618fd37a7SXin LI abort ();
11718fd37a7SXin LI }
11818fd37a7SXin LI
11918fd37a7SXin LI static char const valid_suffixes[] = "kKMGTPEZY0";
12018fd37a7SXin LI
12118fd37a7SXin LI /* Update ignore_initial[F] according to the result of parsing an
12218fd37a7SXin LI *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
12318fd37a7SXin LI *after the operand. If DELIMITER is nonzero, the operand may be
12418fd37a7SXin LI *followed by DELIMITER; otherwise it must be null-terminated. */
12518fd37a7SXin LI static void
specify_ignore_initial(int f,char ** argptr,char delimiter)12618fd37a7SXin LI specify_ignore_initial (int f, char **argptr, char delimiter)
12718fd37a7SXin LI {
12818fd37a7SXin LI uintmax_t val;
12918fd37a7SXin LI off_t o;
13018fd37a7SXin LI char const *arg = *argptr;
13118fd37a7SXin LI strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
13218fd37a7SXin LI if (! (e == LONGINT_OK
13318fd37a7SXin LI || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
13418fd37a7SXin LI || (o = val) < 0 || o != val || val == UINTMAX_MAX)
13518fd37a7SXin LI try_help ("invalid --ignore-initial value `%s'", arg);
13618fd37a7SXin LI if (ignore_initial[f] < o)
13718fd37a7SXin LI ignore_initial[f] = o;
13818fd37a7SXin LI }
13918fd37a7SXin LI
14018fd37a7SXin LI /* Specify the output format. */
14118fd37a7SXin LI static void
specify_comparison_type(enum comparison_type t)14218fd37a7SXin LI specify_comparison_type (enum comparison_type t)
14318fd37a7SXin LI {
14418fd37a7SXin LI if (comparison_type && comparison_type != t)
14518fd37a7SXin LI try_help ("options -l and -s are incompatible", 0);
14618fd37a7SXin LI comparison_type = t;
14718fd37a7SXin LI }
14818fd37a7SXin LI
14918fd37a7SXin LI static void
check_stdout(void)15018fd37a7SXin LI check_stdout (void)
15118fd37a7SXin LI {
15218fd37a7SXin LI if (ferror (stdout))
15318fd37a7SXin LI error (EXIT_TROUBLE, 0, "%s", _("write failed"));
15418fd37a7SXin LI else if (fclose (stdout) != 0)
15518fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", _("standard output"));
15618fd37a7SXin LI }
15718fd37a7SXin LI
15818fd37a7SXin LI static char const * const option_help_msgid[] = {
15918fd37a7SXin LI N_("-b --print-bytes Print differing bytes."),
16018fd37a7SXin LI N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
16118fd37a7SXin LI N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
16218fd37a7SXin LI N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
16318fd37a7SXin LI N_("-l --verbose Output byte numbers and values of all differing bytes."),
16418fd37a7SXin LI N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
16518fd37a7SXin LI N_("-s --quiet --silent Output nothing; yield exit status only."),
16618fd37a7SXin LI N_("-v --version Output version info."),
16718fd37a7SXin LI N_("--help Output this help."),
16818fd37a7SXin LI 0
16918fd37a7SXin LI };
17018fd37a7SXin LI
17118fd37a7SXin LI static void
usage(void)17218fd37a7SXin LI usage (void)
17318fd37a7SXin LI {
17418fd37a7SXin LI char const * const *p;
17518fd37a7SXin LI
17618fd37a7SXin LI printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
17718fd37a7SXin LI program_name);
17818fd37a7SXin LI printf ("%s\n\n", _("Compare two files byte by byte."));
17918fd37a7SXin LI for (p = option_help_msgid; *p; p++)
18018fd37a7SXin LI printf (" %s\n", _(*p));
18118fd37a7SXin LI printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
18218fd37a7SXin LI _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
18318fd37a7SXin LI _("SKIP values may be followed by the following multiplicative suffixes:\n\
18418fd37a7SXin LI kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
18518fd37a7SXin LI GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
18618fd37a7SXin LI _("If a FILE is `-' or missing, read standard input."),
18718fd37a7SXin LI _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
18818fd37a7SXin LI _("Report bugs to <bug-gnu-utils@gnu.org>."));
18918fd37a7SXin LI }
19018fd37a7SXin LI
19118fd37a7SXin LI int
main(int argc,char ** argv)19218fd37a7SXin LI main (int argc, char **argv)
19318fd37a7SXin LI {
19418fd37a7SXin LI int c, f, exit_status;
19518fd37a7SXin LI size_t words_per_buffer;
19618fd37a7SXin LI
19718fd37a7SXin LI exit_failure = EXIT_TROUBLE;
19818fd37a7SXin LI initialize_main (&argc, &argv);
19918fd37a7SXin LI program_name = argv[0];
20018fd37a7SXin LI setlocale (LC_ALL, "");
20118fd37a7SXin LI bindtextdomain (PACKAGE, LOCALEDIR);
20218fd37a7SXin LI textdomain (PACKAGE);
20318fd37a7SXin LI c_stack_action (0);
20418fd37a7SXin LI
20518fd37a7SXin LI /* Parse command line options. */
20618fd37a7SXin LI
20718fd37a7SXin LI while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
20818fd37a7SXin LI != -1)
20918fd37a7SXin LI switch (c)
21018fd37a7SXin LI {
21118fd37a7SXin LI case 'b':
21218fd37a7SXin LI case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
21318fd37a7SXin LI opt_print_bytes = true;
21418fd37a7SXin LI break;
21518fd37a7SXin LI
21618fd37a7SXin LI case 'i':
21718fd37a7SXin LI specify_ignore_initial (0, &optarg, ':');
21818fd37a7SXin LI if (*optarg++ == ':')
21918fd37a7SXin LI specify_ignore_initial (1, &optarg, 0);
22018fd37a7SXin LI else if (ignore_initial[1] < ignore_initial[0])
22118fd37a7SXin LI ignore_initial[1] = ignore_initial[0];
22218fd37a7SXin LI break;
22318fd37a7SXin LI
22418fd37a7SXin LI case 'l':
22518fd37a7SXin LI specify_comparison_type (type_all_diffs);
22618fd37a7SXin LI break;
22718fd37a7SXin LI
22818fd37a7SXin LI case 'n':
22918fd37a7SXin LI {
23018fd37a7SXin LI uintmax_t n;
23118fd37a7SXin LI if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
23218fd37a7SXin LI try_help ("invalid --bytes value `%s'", optarg);
23318fd37a7SXin LI if (n < bytes)
23418fd37a7SXin LI bytes = n;
23518fd37a7SXin LI }
23618fd37a7SXin LI break;
23718fd37a7SXin LI
23818fd37a7SXin LI case 's':
23918fd37a7SXin LI specify_comparison_type (type_status);
24018fd37a7SXin LI break;
24118fd37a7SXin LI
24218fd37a7SXin LI case 'v':
24318fd37a7SXin LI /* TRANSLATORS: Please translate the second "o" in "Torbjorn
24418fd37a7SXin LI Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
24518fd37a7SXin LI WITH DIAERESIS) if possible. */
24618fd37a7SXin LI version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
24718fd37a7SXin LI _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
24818fd37a7SXin LI check_stdout ();
24918fd37a7SXin LI return EXIT_SUCCESS;
25018fd37a7SXin LI
25118fd37a7SXin LI case HELP_OPTION:
25218fd37a7SXin LI usage ();
25318fd37a7SXin LI check_stdout ();
25418fd37a7SXin LI return EXIT_SUCCESS;
25518fd37a7SXin LI
25618fd37a7SXin LI default:
25718fd37a7SXin LI try_help (0, 0);
25818fd37a7SXin LI }
25918fd37a7SXin LI
26018fd37a7SXin LI if (optind == argc)
26118fd37a7SXin LI try_help ("missing operand after `%s'", argv[argc - 1]);
26218fd37a7SXin LI
26318fd37a7SXin LI file[0] = argv[optind++];
26418fd37a7SXin LI file[1] = optind < argc ? argv[optind++] : "-";
26518fd37a7SXin LI
26618fd37a7SXin LI for (f = 0; f < 2 && optind < argc; f++)
26718fd37a7SXin LI {
26818fd37a7SXin LI char *arg = argv[optind++];
26918fd37a7SXin LI specify_ignore_initial (f, &arg, 0);
27018fd37a7SXin LI }
27118fd37a7SXin LI
27218fd37a7SXin LI if (optind < argc)
27318fd37a7SXin LI try_help ("extra operand `%s'", argv[optind]);
27418fd37a7SXin LI
27518fd37a7SXin LI for (f = 0; f < 2; f++)
27618fd37a7SXin LI {
27718fd37a7SXin LI /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
27818fd37a7SXin LI stdin is closed and opening file[0] yields file descriptor 0. */
27918fd37a7SXin LI int f1 = f ^ (strcmp (file[1], "-") == 0);
28018fd37a7SXin LI
28118fd37a7SXin LI /* Two files with the same name and offset are identical.
28218fd37a7SXin LI But wait until we open the file once, for proper diagnostics. */
28318fd37a7SXin LI if (f && ignore_initial[0] == ignore_initial[1]
28418fd37a7SXin LI && file_name_cmp (file[0], file[1]) == 0)
28518fd37a7SXin LI return EXIT_SUCCESS;
28618fd37a7SXin LI
28718fd37a7SXin LI file_desc[f1] = (strcmp (file[f1], "-") == 0
28818fd37a7SXin LI ? STDIN_FILENO
28918fd37a7SXin LI : open (file[f1], O_RDONLY, 0));
29018fd37a7SXin LI if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
29118fd37a7SXin LI {
29218fd37a7SXin LI if (file_desc[f1] < 0 && comparison_type == type_status)
29318fd37a7SXin LI exit (EXIT_TROUBLE);
29418fd37a7SXin LI else
29518fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f1]);
29618fd37a7SXin LI }
29718fd37a7SXin LI
29818fd37a7SXin LI set_binary_mode (file_desc[f1], true);
29918fd37a7SXin LI }
30018fd37a7SXin LI
30118fd37a7SXin LI /* If the files are links to the same inode and have the same file position,
30218fd37a7SXin LI they are identical. */
30318fd37a7SXin LI
30418fd37a7SXin LI if (0 < same_file (&stat_buf[0], &stat_buf[1])
30518fd37a7SXin LI && same_file_attributes (&stat_buf[0], &stat_buf[1])
30618fd37a7SXin LI && file_position (0) == file_position (1))
30718fd37a7SXin LI return EXIT_SUCCESS;
30818fd37a7SXin LI
30918fd37a7SXin LI /* If output is redirected to the null device, we may assume `-s'. */
31018fd37a7SXin LI
31118fd37a7SXin LI if (comparison_type != type_status)
31218fd37a7SXin LI {
31318fd37a7SXin LI struct stat outstat, nullstat;
31418fd37a7SXin LI
31518fd37a7SXin LI if (fstat (STDOUT_FILENO, &outstat) == 0
31618fd37a7SXin LI && stat (NULL_DEVICE, &nullstat) == 0
31718fd37a7SXin LI && 0 < same_file (&outstat, &nullstat))
31818fd37a7SXin LI comparison_type = type_status;
31918fd37a7SXin LI }
32018fd37a7SXin LI
32118fd37a7SXin LI /* If only a return code is needed,
32218fd37a7SXin LI and if both input descriptors are associated with plain files,
32318fd37a7SXin LI conclude that the files differ if they have different sizes
32418fd37a7SXin LI and if more bytes will be compared than are in the smaller file. */
32518fd37a7SXin LI
32618fd37a7SXin LI if (comparison_type == type_status
32718fd37a7SXin LI && S_ISREG (stat_buf[0].st_mode)
32818fd37a7SXin LI && S_ISREG (stat_buf[1].st_mode))
32918fd37a7SXin LI {
33018fd37a7SXin LI off_t s0 = stat_buf[0].st_size - file_position (0);
33118fd37a7SXin LI off_t s1 = stat_buf[1].st_size - file_position (1);
33218fd37a7SXin LI if (s0 < 0)
33318fd37a7SXin LI s0 = 0;
33418fd37a7SXin LI if (s1 < 0)
33518fd37a7SXin LI s1 = 0;
33618fd37a7SXin LI if (s0 != s1 && MIN (s0, s1) < bytes)
33718fd37a7SXin LI exit (EXIT_FAILURE);
33818fd37a7SXin LI }
33918fd37a7SXin LI
34018fd37a7SXin LI /* Get the optimal block size of the files. */
34118fd37a7SXin LI
34218fd37a7SXin LI buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
34318fd37a7SXin LI STAT_BLOCKSIZE (stat_buf[1]),
34418fd37a7SXin LI PTRDIFF_MAX - sizeof (word));
34518fd37a7SXin LI
34618fd37a7SXin LI /* Allocate word-aligned buffers, with space for sentinels at the end. */
34718fd37a7SXin LI
34818fd37a7SXin LI words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
34918fd37a7SXin LI buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
35018fd37a7SXin LI buffer[1] = buffer[0] + words_per_buffer;
35118fd37a7SXin LI
35218fd37a7SXin LI exit_status = cmp ();
35318fd37a7SXin LI
35418fd37a7SXin LI for (f = 0; f < 2; f++)
35518fd37a7SXin LI if (close (file_desc[f]) != 0)
35618fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f]);
35718fd37a7SXin LI if (exit_status != 0 && comparison_type != type_status)
35818fd37a7SXin LI check_stdout ();
35918fd37a7SXin LI exit (exit_status);
36018fd37a7SXin LI return exit_status;
36118fd37a7SXin LI }
36218fd37a7SXin LI
36318fd37a7SXin LI /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
36418fd37a7SXin LI using `buffer[0]' and `buffer[1]'.
36518fd37a7SXin LI Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
36618fd37a7SXin LI >1 if error. */
36718fd37a7SXin LI
36818fd37a7SXin LI static int
cmp(void)36918fd37a7SXin LI cmp (void)
37018fd37a7SXin LI {
37118fd37a7SXin LI off_t line_number = 1; /* Line number (1...) of difference. */
37218fd37a7SXin LI off_t byte_number = 1; /* Byte number (1...) of difference. */
37318fd37a7SXin LI uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
37418fd37a7SXin LI size_t read0, read1; /* Number of bytes read from each file. */
37518fd37a7SXin LI size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
37618fd37a7SXin LI size_t smaller; /* The lesser of `read0' and `read1'. */
37718fd37a7SXin LI word *buffer0 = buffer[0];
37818fd37a7SXin LI word *buffer1 = buffer[1];
37918fd37a7SXin LI char *buf0 = (char *) buffer0;
38018fd37a7SXin LI char *buf1 = (char *) buffer1;
38118fd37a7SXin LI int ret = EXIT_SUCCESS;
38218fd37a7SXin LI int f;
38318fd37a7SXin LI int offset_width;
38418fd37a7SXin LI
38518fd37a7SXin LI if (comparison_type == type_all_diffs)
38618fd37a7SXin LI {
38718fd37a7SXin LI off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
38818fd37a7SXin LI
38918fd37a7SXin LI for (f = 0; f < 2; f++)
39018fd37a7SXin LI if (S_ISREG (stat_buf[f].st_mode))
39118fd37a7SXin LI {
39218fd37a7SXin LI off_t file_bytes = stat_buf[f].st_size - file_position (f);
39318fd37a7SXin LI if (file_bytes < byte_number_max)
39418fd37a7SXin LI byte_number_max = file_bytes;
39518fd37a7SXin LI }
39618fd37a7SXin LI
39718fd37a7SXin LI for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
39818fd37a7SXin LI continue;
39918fd37a7SXin LI }
40018fd37a7SXin LI
40118fd37a7SXin LI for (f = 0; f < 2; f++)
40218fd37a7SXin LI {
40318fd37a7SXin LI off_t ig = ignore_initial[f];
40418fd37a7SXin LI if (ig && file_position (f) == -1)
40518fd37a7SXin LI {
40618fd37a7SXin LI /* lseek failed; read and discard the ignored initial prefix. */
40718fd37a7SXin LI do
40818fd37a7SXin LI {
40918fd37a7SXin LI size_t bytes_to_read = MIN (ig, buf_size);
41018fd37a7SXin LI size_t r = block_read (file_desc[f], buf0, bytes_to_read);
41118fd37a7SXin LI if (r != bytes_to_read)
41218fd37a7SXin LI {
41318fd37a7SXin LI if (r == SIZE_MAX)
41418fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[f]);
41518fd37a7SXin LI break;
41618fd37a7SXin LI }
41718fd37a7SXin LI ig -= r;
41818fd37a7SXin LI }
41918fd37a7SXin LI while (ig);
42018fd37a7SXin LI }
42118fd37a7SXin LI }
42218fd37a7SXin LI
42318fd37a7SXin LI do
42418fd37a7SXin LI {
42518fd37a7SXin LI size_t bytes_to_read = buf_size;
42618fd37a7SXin LI
42718fd37a7SXin LI if (remaining != UINTMAX_MAX)
42818fd37a7SXin LI {
42918fd37a7SXin LI if (remaining < bytes_to_read)
43018fd37a7SXin LI bytes_to_read = remaining;
43118fd37a7SXin LI remaining -= bytes_to_read;
43218fd37a7SXin LI }
43318fd37a7SXin LI
43418fd37a7SXin LI read0 = block_read (file_desc[0], buf0, bytes_to_read);
43518fd37a7SXin LI if (read0 == SIZE_MAX)
43618fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[0]);
43718fd37a7SXin LI read1 = block_read (file_desc[1], buf1, bytes_to_read);
43818fd37a7SXin LI if (read1 == SIZE_MAX)
43918fd37a7SXin LI error (EXIT_TROUBLE, errno, "%s", file[1]);
44018fd37a7SXin LI
44118fd37a7SXin LI /* Insert sentinels for the block compare. */
44218fd37a7SXin LI
44318fd37a7SXin LI buf0[read0] = ~buf1[read0];
44418fd37a7SXin LI buf1[read1] = ~buf0[read1];
44518fd37a7SXin LI
44618fd37a7SXin LI /* If the line number should be written for differing files,
44718fd37a7SXin LI compare the blocks and count the number of newlines
44818fd37a7SXin LI simultaneously. */
44918fd37a7SXin LI first_diff = (comparison_type == type_first_diff
45018fd37a7SXin LI ? block_compare_and_count (buffer0, buffer1, &line_number)
45118fd37a7SXin LI : block_compare (buffer0, buffer1));
45218fd37a7SXin LI
45318fd37a7SXin LI byte_number += first_diff;
45418fd37a7SXin LI smaller = MIN (read0, read1);
45518fd37a7SXin LI
45618fd37a7SXin LI if (first_diff < smaller)
45718fd37a7SXin LI {
45818fd37a7SXin LI switch (comparison_type)
45918fd37a7SXin LI {
46018fd37a7SXin LI case type_first_diff:
46118fd37a7SXin LI {
46218fd37a7SXin LI char byte_buf[INT_BUFSIZE_BOUND (off_t)];
46318fd37a7SXin LI char line_buf[INT_BUFSIZE_BOUND (off_t)];
46418fd37a7SXin LI char const *byte_num = offtostr (byte_number, byte_buf);
46518fd37a7SXin LI char const *line_num = offtostr (line_number, line_buf);
46618fd37a7SXin LI if (!opt_print_bytes)
46718fd37a7SXin LI {
46818fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. This
46918fd37a7SXin LI message is used only in the POSIX locale, so it
47018fd37a7SXin LI need not be translated. */
47118fd37a7SXin LI static char const char_message[] =
47218fd37a7SXin LI "%s %s differ: char %s, line %s\n";
47318fd37a7SXin LI
47418fd37a7SXin LI /* The POSIX rationale recommends using the word
47518fd37a7SXin LI "byte" outside the POSIX locale. Some gettext
47618fd37a7SXin LI implementations translate even in the POSIX
47718fd37a7SXin LI locale if certain other environment variables
47818fd37a7SXin LI are set, so use "byte" if a translation is
47918fd37a7SXin LI available, or if outside the POSIX locale. */
48018fd37a7SXin LI static char const byte_msgid[] =
48118fd37a7SXin LI N_("%s %s differ: byte %s, line %s\n");
48218fd37a7SXin LI char const *byte_message = _(byte_msgid);
48318fd37a7SXin LI bool use_byte_message = (byte_message != byte_msgid
48418fd37a7SXin LI || hard_locale_LC_MESSAGES);
48518fd37a7SXin LI
48618fd37a7SXin LI printf (use_byte_message ? byte_message : char_message,
48718fd37a7SXin LI file[0], file[1], byte_num, line_num);
48818fd37a7SXin LI }
48918fd37a7SXin LI else
49018fd37a7SXin LI {
49118fd37a7SXin LI unsigned char c0 = buf0[first_diff];
49218fd37a7SXin LI unsigned char c1 = buf1[first_diff];
49318fd37a7SXin LI char s0[5];
49418fd37a7SXin LI char s1[5];
49518fd37a7SXin LI sprintc (s0, c0);
49618fd37a7SXin LI sprintc (s1, c1);
49718fd37a7SXin LI printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
49818fd37a7SXin LI file[0], file[1], byte_num, line_num,
49918fd37a7SXin LI c0, s0, c1, s1);
50018fd37a7SXin LI }
50118fd37a7SXin LI }
50218fd37a7SXin LI /* Fall through. */
50318fd37a7SXin LI case type_status:
50418fd37a7SXin LI return EXIT_FAILURE;
50518fd37a7SXin LI
50618fd37a7SXin LI case type_all_diffs:
50718fd37a7SXin LI do
50818fd37a7SXin LI {
50918fd37a7SXin LI unsigned char c0 = buf0[first_diff];
51018fd37a7SXin LI unsigned char c1 = buf1[first_diff];
51118fd37a7SXin LI if (c0 != c1)
51218fd37a7SXin LI {
51318fd37a7SXin LI char byte_buf[INT_BUFSIZE_BOUND (off_t)];
51418fd37a7SXin LI char const *byte_num = offtostr (byte_number, byte_buf);
51518fd37a7SXin LI if (!opt_print_bytes)
51618fd37a7SXin LI {
51718fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. */
51818fd37a7SXin LI printf ("%*s %3o %3o\n",
51918fd37a7SXin LI offset_width, byte_num, c0, c1);
52018fd37a7SXin LI }
52118fd37a7SXin LI else
52218fd37a7SXin LI {
52318fd37a7SXin LI char s0[5];
52418fd37a7SXin LI char s1[5];
52518fd37a7SXin LI sprintc (s0, c0);
52618fd37a7SXin LI sprintc (s1, c1);
52718fd37a7SXin LI printf ("%*s %3o %-4s %3o %s\n",
52818fd37a7SXin LI offset_width, byte_num, c0, s0, c1, s1);
52918fd37a7SXin LI }
53018fd37a7SXin LI }
53118fd37a7SXin LI byte_number++;
53218fd37a7SXin LI first_diff++;
53318fd37a7SXin LI }
53418fd37a7SXin LI while (first_diff < smaller);
53518fd37a7SXin LI ret = EXIT_FAILURE;
53618fd37a7SXin LI break;
53718fd37a7SXin LI }
53818fd37a7SXin LI }
53918fd37a7SXin LI
54018fd37a7SXin LI if (read0 != read1)
54118fd37a7SXin LI {
54218fd37a7SXin LI if (comparison_type != type_status)
54318fd37a7SXin LI {
54418fd37a7SXin LI /* See POSIX 1003.1-2001 for this format. */
54518fd37a7SXin LI fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
54618fd37a7SXin LI }
54718fd37a7SXin LI
54818fd37a7SXin LI return EXIT_FAILURE;
54918fd37a7SXin LI }
55018fd37a7SXin LI }
55118fd37a7SXin LI while (read0 == buf_size);
55218fd37a7SXin LI
55318fd37a7SXin LI return ret;
55418fd37a7SXin LI }
55518fd37a7SXin LI
55618fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ,
55718fd37a7SXin LI and count the number of '\n' occurrences in the common
55818fd37a7SXin LI part of P0 and P1.
55918fd37a7SXin LI If the blocks are not guaranteed to be different, put sentinels at the ends
56018fd37a7SXin LI of the blocks before calling this function.
56118fd37a7SXin LI
56218fd37a7SXin LI Return the offset of the first byte that differs.
56318fd37a7SXin LI Increment *COUNT by the count of '\n' occurrences. */
56418fd37a7SXin LI
56518fd37a7SXin LI static size_t
block_compare_and_count(word const * p0,word const * p1,off_t * count)56618fd37a7SXin LI block_compare_and_count (word const *p0, word const *p1, off_t *count)
56718fd37a7SXin LI {
56818fd37a7SXin LI word l; /* One word from first buffer. */
56918fd37a7SXin LI word const *l0, *l1; /* Pointers into each buffer. */
57018fd37a7SXin LI char const *c0, *c1; /* Pointers for finding exact address. */
57118fd37a7SXin LI size_t cnt = 0; /* Number of '\n' occurrences. */
57218fd37a7SXin LI word nnnn; /* Newline, sizeof (word) times. */
57318fd37a7SXin LI int i;
57418fd37a7SXin LI
57518fd37a7SXin LI nnnn = 0;
57618fd37a7SXin LI for (i = 0; i < sizeof nnnn; i++)
57718fd37a7SXin LI nnnn = (nnnn << CHAR_BIT) | '\n';
57818fd37a7SXin LI
57918fd37a7SXin LI /* Find the rough position of the first difference by reading words,
58018fd37a7SXin LI not bytes. */
58118fd37a7SXin LI
58218fd37a7SXin LI for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
58318fd37a7SXin LI {
58418fd37a7SXin LI l ^= nnnn;
58518fd37a7SXin LI for (i = 0; i < sizeof l; i++)
58618fd37a7SXin LI {
58718fd37a7SXin LI unsigned char uc = l;
58818fd37a7SXin LI cnt += ! uc;
58918fd37a7SXin LI l >>= CHAR_BIT;
59018fd37a7SXin LI }
59118fd37a7SXin LI }
59218fd37a7SXin LI
59318fd37a7SXin LI /* Find the exact differing position (endianness independent). */
59418fd37a7SXin LI
59518fd37a7SXin LI for (c0 = (char const *) l0, c1 = (char const *) l1;
59618fd37a7SXin LI *c0 == *c1;
59718fd37a7SXin LI c0++, c1++)
59818fd37a7SXin LI cnt += *c0 == '\n';
59918fd37a7SXin LI
60018fd37a7SXin LI *count += cnt;
60118fd37a7SXin LI return c0 - (char const *) p0;
60218fd37a7SXin LI }
60318fd37a7SXin LI
60418fd37a7SXin LI /* Compare two blocks of memory P0 and P1 until they differ.
60518fd37a7SXin LI If the blocks are not guaranteed to be different, put sentinels at the ends
60618fd37a7SXin LI of the blocks before calling this function.
60718fd37a7SXin LI
60818fd37a7SXin LI Return the offset of the first byte that differs. */
60918fd37a7SXin LI
61018fd37a7SXin LI static size_t
block_compare(word const * p0,word const * p1)61118fd37a7SXin LI block_compare (word const *p0, word const *p1)
61218fd37a7SXin LI {
61318fd37a7SXin LI word const *l0, *l1;
61418fd37a7SXin LI char const *c0, *c1;
61518fd37a7SXin LI
61618fd37a7SXin LI /* Find the rough position of the first difference by reading words,
61718fd37a7SXin LI not bytes. */
61818fd37a7SXin LI
61918fd37a7SXin LI for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
62018fd37a7SXin LI continue;
62118fd37a7SXin LI
62218fd37a7SXin LI /* Find the exact differing position (endianness independent). */
62318fd37a7SXin LI
62418fd37a7SXin LI for (c0 = (char const *) l0, c1 = (char const *) l1;
62518fd37a7SXin LI *c0 == *c1;
62618fd37a7SXin LI c0++, c1++)
62718fd37a7SXin LI continue;
62818fd37a7SXin LI
62918fd37a7SXin LI return c0 - (char const *) p0;
63018fd37a7SXin LI }
63118fd37a7SXin LI
63218fd37a7SXin LI /* Put into BUF the unsigned char C, making unprintable bytes
63318fd37a7SXin LI visible by quoting like cat -t does. */
63418fd37a7SXin LI
63518fd37a7SXin LI static void
sprintc(char * buf,unsigned char c)63618fd37a7SXin LI sprintc (char *buf, unsigned char c)
63718fd37a7SXin LI {
63818fd37a7SXin LI if (! isprint (c))
63918fd37a7SXin LI {
64018fd37a7SXin LI if (c >= 128)
64118fd37a7SXin LI {
64218fd37a7SXin LI *buf++ = 'M';
64318fd37a7SXin LI *buf++ = '-';
64418fd37a7SXin LI c -= 128;
64518fd37a7SXin LI }
64618fd37a7SXin LI if (c < 32)
64718fd37a7SXin LI {
64818fd37a7SXin LI *buf++ = '^';
64918fd37a7SXin LI c += 64;
65018fd37a7SXin LI }
65118fd37a7SXin LI else if (c == 127)
65218fd37a7SXin LI {
65318fd37a7SXin LI *buf++ = '^';
65418fd37a7SXin LI c = '?';
65518fd37a7SXin LI }
65618fd37a7SXin LI }
65718fd37a7SXin LI
65818fd37a7SXin LI *buf++ = c;
65918fd37a7SXin LI *buf = 0;
66018fd37a7SXin LI }
66118fd37a7SXin LI
66218fd37a7SXin LI /* Position file F to ignore_initial[F] bytes from its initial position,
66318fd37a7SXin LI and yield its new position. Don't try more than once. */
66418fd37a7SXin LI
66518fd37a7SXin LI static off_t
file_position(int f)66618fd37a7SXin LI file_position (int f)
66718fd37a7SXin LI {
66818fd37a7SXin LI static bool positioned[2];
66918fd37a7SXin LI static off_t position[2];
67018fd37a7SXin LI
67118fd37a7SXin LI if (! positioned[f])
67218fd37a7SXin LI {
67318fd37a7SXin LI positioned[f] = true;
67418fd37a7SXin LI position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
67518fd37a7SXin LI }
67618fd37a7SXin LI return position[f];
67718fd37a7SXin LI }
678