1 /* cmp - compare two files byte by byte 2 3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001, 4 2002, 2004 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 See the GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; see the file COPYING. 18 If not, write to the Free Software Foundation, 19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21 #include "system.h" 22 #include "paths.h" 23 24 #include <stdio.h> 25 26 #include <c-stack.h> 27 #include <cmpbuf.h> 28 #include <error.h> 29 #include <exit.h> 30 #include <exitfail.h> 31 #include <file-type.h> 32 #include <getopt.h> 33 #include <hard-locale.h> 34 #include <inttostr.h> 35 #include <setmode.h> 36 #include <unlocked-io.h> 37 #include <version-etc.h> 38 #include <xalloc.h> 39 #include <xstrtol.h> 40 41 #if defined LC_MESSAGES && ENABLE_NLS 42 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES) 43 #else 44 # define hard_locale_LC_MESSAGES 0 45 #endif 46 47 static int cmp (void); 48 static off_t file_position (int); 49 static size_t block_compare (word const *, word const *); 50 static size_t block_compare_and_count (word const *, word const *, off_t *); 51 static void sprintc (char *, unsigned char); 52 53 /* Name under which this program was invoked. */ 54 char *program_name; 55 56 /* Filenames of the compared files. */ 57 static char const *file[2]; 58 59 /* File descriptors of the files. */ 60 static int file_desc[2]; 61 62 /* Status of the files. */ 63 static struct stat stat_buf[2]; 64 65 /* Read buffers for the files. */ 66 static word *buffer[2]; 67 68 /* Optimal block size for the files. */ 69 static size_t buf_size; 70 71 /* Initial prefix to ignore for each file. */ 72 static off_t ignore_initial[2]; 73 74 /* Number of bytes to compare. */ 75 static uintmax_t bytes = UINTMAX_MAX; 76 77 /* Output format. */ 78 static enum comparison_type 79 { 80 type_first_diff, /* Print the first difference. */ 81 type_all_diffs, /* Print all differences. */ 82 type_status /* Exit status only. */ 83 } comparison_type; 84 85 /* If nonzero, print values of bytes quoted like cat -t does. */ 86 static bool opt_print_bytes; 87 88 /* Values for long options that do not have single-letter equivalents. */ 89 enum 90 { 91 HELP_OPTION = CHAR_MAX + 1 92 }; 93 94 static struct option const long_options[] = 95 { 96 {"print-bytes", 0, 0, 'b'}, 97 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */ 98 {"ignore-initial", 1, 0, 'i'}, 99 {"verbose", 0, 0, 'l'}, 100 {"bytes", 1, 0, 'n'}, 101 {"silent", 0, 0, 's'}, 102 {"quiet", 0, 0, 's'}, 103 {"version", 0, 0, 'v'}, 104 {"help", 0, 0, HELP_OPTION}, 105 {0, 0, 0, 0} 106 }; 107 108 static void try_help (char const *, char const *) __attribute__((noreturn)); 109 static void 110 try_help (char const *reason_msgid, char const *operand) 111 { 112 if (reason_msgid) 113 error (0, 0, _(reason_msgid), operand); 114 error (EXIT_TROUBLE, 0, 115 _("Try `%s --help' for more information."), program_name); 116 abort (); 117 } 118 119 static char const valid_suffixes[] = "kKMGTPEZY0"; 120 121 /* Update ignore_initial[F] according to the result of parsing an 122 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point 123 *after the operand. If DELIMITER is nonzero, the operand may be 124 *followed by DELIMITER; otherwise it must be null-terminated. */ 125 static void 126 specify_ignore_initial (int f, char **argptr, char delimiter) 127 { 128 uintmax_t val; 129 off_t o; 130 char const *arg = *argptr; 131 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes); 132 if (! (e == LONGINT_OK 133 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter)) 134 || (o = val) < 0 || o != val || val == UINTMAX_MAX) 135 try_help ("invalid --ignore-initial value `%s'", arg); 136 if (ignore_initial[f] < o) 137 ignore_initial[f] = o; 138 } 139 140 /* Specify the output format. */ 141 static void 142 specify_comparison_type (enum comparison_type t) 143 { 144 if (comparison_type && comparison_type != t) 145 try_help ("options -l and -s are incompatible", 0); 146 comparison_type = t; 147 } 148 149 static void 150 check_stdout (void) 151 { 152 if (ferror (stdout)) 153 error (EXIT_TROUBLE, 0, "%s", _("write failed")); 154 else if (fclose (stdout) != 0) 155 error (EXIT_TROUBLE, errno, "%s", _("standard output")); 156 } 157 158 static char const * const option_help_msgid[] = { 159 N_("-b --print-bytes Print differing bytes."), 160 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."), 161 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"), 162 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."), 163 N_("-l --verbose Output byte numbers and values of all differing bytes."), 164 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."), 165 N_("-s --quiet --silent Output nothing; yield exit status only."), 166 N_("-v --version Output version info."), 167 N_("--help Output this help."), 168 0 169 }; 170 171 static void 172 usage (void) 173 { 174 char const * const *p; 175 176 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"), 177 program_name); 178 printf ("%s\n\n", _("Compare two files byte by byte.")); 179 for (p = option_help_msgid; *p; p++) 180 printf (" %s\n", _(*p)); 181 printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n", 182 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."), 183 _("SKIP values may be followed by the following multiplicative suffixes:\n\ 184 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\ 185 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."), 186 _("If a FILE is `-' or missing, read standard input."), 187 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 188 _("Report bugs to <bug-gnu-utils@gnu.org>.")); 189 } 190 191 int 192 main (int argc, char **argv) 193 { 194 int c, f, exit_status; 195 size_t words_per_buffer; 196 197 exit_failure = EXIT_TROUBLE; 198 initialize_main (&argc, &argv); 199 program_name = argv[0]; 200 setlocale (LC_ALL, ""); 201 bindtextdomain (PACKAGE, LOCALEDIR); 202 textdomain (PACKAGE); 203 c_stack_action (0); 204 205 /* Parse command line options. */ 206 207 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0)) 208 != -1) 209 switch (c) 210 { 211 case 'b': 212 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */ 213 opt_print_bytes = true; 214 break; 215 216 case 'i': 217 specify_ignore_initial (0, &optarg, ':'); 218 if (*optarg++ == ':') 219 specify_ignore_initial (1, &optarg, 0); 220 else if (ignore_initial[1] < ignore_initial[0]) 221 ignore_initial[1] = ignore_initial[0]; 222 break; 223 224 case 'l': 225 specify_comparison_type (type_all_diffs); 226 break; 227 228 case 'n': 229 { 230 uintmax_t n; 231 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK) 232 try_help ("invalid --bytes value `%s'", optarg); 233 if (n < bytes) 234 bytes = n; 235 } 236 break; 237 238 case 's': 239 specify_comparison_type (type_status); 240 break; 241 242 case 'v': 243 /* TRANSLATORS: Please translate the second "o" in "Torbjorn 244 Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O 245 WITH DIAERESIS) if possible. */ 246 version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION, 247 _("Torbjorn Granlund"), "David MacKenzie", (char *) 0); 248 check_stdout (); 249 return EXIT_SUCCESS; 250 251 case HELP_OPTION: 252 usage (); 253 check_stdout (); 254 return EXIT_SUCCESS; 255 256 default: 257 try_help (0, 0); 258 } 259 260 if (optind == argc) 261 try_help ("missing operand after `%s'", argv[argc - 1]); 262 263 file[0] = argv[optind++]; 264 file[1] = optind < argc ? argv[optind++] : "-"; 265 266 for (f = 0; f < 2 && optind < argc; f++) 267 { 268 char *arg = argv[optind++]; 269 specify_ignore_initial (f, &arg, 0); 270 } 271 272 if (optind < argc) 273 try_help ("extra operand `%s'", argv[optind]); 274 275 for (f = 0; f < 2; f++) 276 { 277 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if 278 stdin is closed and opening file[0] yields file descriptor 0. */ 279 int f1 = f ^ (strcmp (file[1], "-") == 0); 280 281 /* Two files with the same name and offset are identical. 282 But wait until we open the file once, for proper diagnostics. */ 283 if (f && ignore_initial[0] == ignore_initial[1] 284 && file_name_cmp (file[0], file[1]) == 0) 285 return EXIT_SUCCESS; 286 287 file_desc[f1] = (strcmp (file[f1], "-") == 0 288 ? STDIN_FILENO 289 : open (file[f1], O_RDONLY, 0)); 290 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0) 291 { 292 if (file_desc[f1] < 0 && comparison_type == type_status) 293 exit (EXIT_TROUBLE); 294 else 295 error (EXIT_TROUBLE, errno, "%s", file[f1]); 296 } 297 298 set_binary_mode (file_desc[f1], true); 299 } 300 301 /* If the files are links to the same inode and have the same file position, 302 they are identical. */ 303 304 if (0 < same_file (&stat_buf[0], &stat_buf[1]) 305 && same_file_attributes (&stat_buf[0], &stat_buf[1]) 306 && file_position (0) == file_position (1)) 307 return EXIT_SUCCESS; 308 309 /* If output is redirected to the null device, we may assume `-s'. */ 310 311 if (comparison_type != type_status) 312 { 313 struct stat outstat, nullstat; 314 315 if (fstat (STDOUT_FILENO, &outstat) == 0 316 && stat (NULL_DEVICE, &nullstat) == 0 317 && 0 < same_file (&outstat, &nullstat)) 318 comparison_type = type_status; 319 } 320 321 /* If only a return code is needed, 322 and if both input descriptors are associated with plain files, 323 conclude that the files differ if they have different sizes 324 and if more bytes will be compared than are in the smaller file. */ 325 326 if (comparison_type == type_status 327 && S_ISREG (stat_buf[0].st_mode) 328 && S_ISREG (stat_buf[1].st_mode)) 329 { 330 off_t s0 = stat_buf[0].st_size - file_position (0); 331 off_t s1 = stat_buf[1].st_size - file_position (1); 332 if (s0 < 0) 333 s0 = 0; 334 if (s1 < 0) 335 s1 = 0; 336 if (s0 != s1 && MIN (s0, s1) < bytes) 337 exit (EXIT_FAILURE); 338 } 339 340 /* Get the optimal block size of the files. */ 341 342 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]), 343 STAT_BLOCKSIZE (stat_buf[1]), 344 PTRDIFF_MAX - sizeof (word)); 345 346 /* Allocate word-aligned buffers, with space for sentinels at the end. */ 347 348 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word); 349 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer); 350 buffer[1] = buffer[0] + words_per_buffer; 351 352 exit_status = cmp (); 353 354 for (f = 0; f < 2; f++) 355 if (close (file_desc[f]) != 0) 356 error (EXIT_TROUBLE, errno, "%s", file[f]); 357 if (exit_status != 0 && comparison_type != type_status) 358 check_stdout (); 359 exit (exit_status); 360 return exit_status; 361 } 362 363 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]', 364 using `buffer[0]' and `buffer[1]'. 365 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different, 366 >1 if error. */ 367 368 static int 369 cmp (void) 370 { 371 off_t line_number = 1; /* Line number (1...) of difference. */ 372 off_t byte_number = 1; /* Byte number (1...) of difference. */ 373 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */ 374 size_t read0, read1; /* Number of bytes read from each file. */ 375 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ 376 size_t smaller; /* The lesser of `read0' and `read1'. */ 377 word *buffer0 = buffer[0]; 378 word *buffer1 = buffer[1]; 379 char *buf0 = (char *) buffer0; 380 char *buf1 = (char *) buffer1; 381 int ret = EXIT_SUCCESS; 382 int f; 383 int offset_width; 384 385 if (comparison_type == type_all_diffs) 386 { 387 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t)); 388 389 for (f = 0; f < 2; f++) 390 if (S_ISREG (stat_buf[f].st_mode)) 391 { 392 off_t file_bytes = stat_buf[f].st_size - file_position (f); 393 if (file_bytes < byte_number_max) 394 byte_number_max = file_bytes; 395 } 396 397 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++) 398 continue; 399 } 400 401 for (f = 0; f < 2; f++) 402 { 403 off_t ig = ignore_initial[f]; 404 if (ig && file_position (f) == -1) 405 { 406 /* lseek failed; read and discard the ignored initial prefix. */ 407 do 408 { 409 size_t bytes_to_read = MIN (ig, buf_size); 410 size_t r = block_read (file_desc[f], buf0, bytes_to_read); 411 if (r != bytes_to_read) 412 { 413 if (r == SIZE_MAX) 414 error (EXIT_TROUBLE, errno, "%s", file[f]); 415 break; 416 } 417 ig -= r; 418 } 419 while (ig); 420 } 421 } 422 423 do 424 { 425 size_t bytes_to_read = buf_size; 426 427 if (remaining != UINTMAX_MAX) 428 { 429 if (remaining < bytes_to_read) 430 bytes_to_read = remaining; 431 remaining -= bytes_to_read; 432 } 433 434 read0 = block_read (file_desc[0], buf0, bytes_to_read); 435 if (read0 == SIZE_MAX) 436 error (EXIT_TROUBLE, errno, "%s", file[0]); 437 read1 = block_read (file_desc[1], buf1, bytes_to_read); 438 if (read1 == SIZE_MAX) 439 error (EXIT_TROUBLE, errno, "%s", file[1]); 440 441 /* Insert sentinels for the block compare. */ 442 443 buf0[read0] = ~buf1[read0]; 444 buf1[read1] = ~buf0[read1]; 445 446 /* If the line number should be written for differing files, 447 compare the blocks and count the number of newlines 448 simultaneously. */ 449 first_diff = (comparison_type == type_first_diff 450 ? block_compare_and_count (buffer0, buffer1, &line_number) 451 : block_compare (buffer0, buffer1)); 452 453 byte_number += first_diff; 454 smaller = MIN (read0, read1); 455 456 if (first_diff < smaller) 457 { 458 switch (comparison_type) 459 { 460 case type_first_diff: 461 { 462 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 463 char line_buf[INT_BUFSIZE_BOUND (off_t)]; 464 char const *byte_num = offtostr (byte_number, byte_buf); 465 char const *line_num = offtostr (line_number, line_buf); 466 if (!opt_print_bytes) 467 { 468 /* See POSIX 1003.1-2001 for this format. This 469 message is used only in the POSIX locale, so it 470 need not be translated. */ 471 static char const char_message[] = 472 "%s %s differ: char %s, line %s\n"; 473 474 /* The POSIX rationale recommends using the word 475 "byte" outside the POSIX locale. Some gettext 476 implementations translate even in the POSIX 477 locale if certain other environment variables 478 are set, so use "byte" if a translation is 479 available, or if outside the POSIX locale. */ 480 static char const byte_msgid[] = 481 N_("%s %s differ: byte %s, line %s\n"); 482 char const *byte_message = _(byte_msgid); 483 bool use_byte_message = (byte_message != byte_msgid 484 || hard_locale_LC_MESSAGES); 485 486 printf (use_byte_message ? byte_message : char_message, 487 file[0], file[1], byte_num, line_num); 488 } 489 else 490 { 491 unsigned char c0 = buf0[first_diff]; 492 unsigned char c1 = buf1[first_diff]; 493 char s0[5]; 494 char s1[5]; 495 sprintc (s0, c0); 496 sprintc (s1, c1); 497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"), 498 file[0], file[1], byte_num, line_num, 499 c0, s0, c1, s1); 500 } 501 } 502 /* Fall through. */ 503 case type_status: 504 return EXIT_FAILURE; 505 506 case type_all_diffs: 507 do 508 { 509 unsigned char c0 = buf0[first_diff]; 510 unsigned char c1 = buf1[first_diff]; 511 if (c0 != c1) 512 { 513 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 514 char const *byte_num = offtostr (byte_number, byte_buf); 515 if (!opt_print_bytes) 516 { 517 /* See POSIX 1003.1-2001 for this format. */ 518 printf ("%*s %3o %3o\n", 519 offset_width, byte_num, c0, c1); 520 } 521 else 522 { 523 char s0[5]; 524 char s1[5]; 525 sprintc (s0, c0); 526 sprintc (s1, c1); 527 printf ("%*s %3o %-4s %3o %s\n", 528 offset_width, byte_num, c0, s0, c1, s1); 529 } 530 } 531 byte_number++; 532 first_diff++; 533 } 534 while (first_diff < smaller); 535 ret = EXIT_FAILURE; 536 break; 537 } 538 } 539 540 if (read0 != read1) 541 { 542 if (comparison_type != type_status) 543 { 544 /* See POSIX 1003.1-2001 for this format. */ 545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]); 546 } 547 548 return EXIT_FAILURE; 549 } 550 } 551 while (read0 == buf_size); 552 553 return ret; 554 } 555 556 /* Compare two blocks of memory P0 and P1 until they differ, 557 and count the number of '\n' occurrences in the common 558 part of P0 and P1. 559 If the blocks are not guaranteed to be different, put sentinels at the ends 560 of the blocks before calling this function. 561 562 Return the offset of the first byte that differs. 563 Increment *COUNT by the count of '\n' occurrences. */ 564 565 static size_t 566 block_compare_and_count (word const *p0, word const *p1, off_t *count) 567 { 568 word l; /* One word from first buffer. */ 569 word const *l0, *l1; /* Pointers into each buffer. */ 570 char const *c0, *c1; /* Pointers for finding exact address. */ 571 size_t cnt = 0; /* Number of '\n' occurrences. */ 572 word nnnn; /* Newline, sizeof (word) times. */ 573 int i; 574 575 nnnn = 0; 576 for (i = 0; i < sizeof nnnn; i++) 577 nnnn = (nnnn << CHAR_BIT) | '\n'; 578 579 /* Find the rough position of the first difference by reading words, 580 not bytes. */ 581 582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++) 583 { 584 l ^= nnnn; 585 for (i = 0; i < sizeof l; i++) 586 { 587 unsigned char uc = l; 588 cnt += ! uc; 589 l >>= CHAR_BIT; 590 } 591 } 592 593 /* Find the exact differing position (endianness independent). */ 594 595 for (c0 = (char const *) l0, c1 = (char const *) l1; 596 *c0 == *c1; 597 c0++, c1++) 598 cnt += *c0 == '\n'; 599 600 *count += cnt; 601 return c0 - (char const *) p0; 602 } 603 604 /* Compare two blocks of memory P0 and P1 until they differ. 605 If the blocks are not guaranteed to be different, put sentinels at the ends 606 of the blocks before calling this function. 607 608 Return the offset of the first byte that differs. */ 609 610 static size_t 611 block_compare (word const *p0, word const *p1) 612 { 613 word const *l0, *l1; 614 char const *c0, *c1; 615 616 /* Find the rough position of the first difference by reading words, 617 not bytes. */ 618 619 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) 620 continue; 621 622 /* Find the exact differing position (endianness independent). */ 623 624 for (c0 = (char const *) l0, c1 = (char const *) l1; 625 *c0 == *c1; 626 c0++, c1++) 627 continue; 628 629 return c0 - (char const *) p0; 630 } 631 632 /* Put into BUF the unsigned char C, making unprintable bytes 633 visible by quoting like cat -t does. */ 634 635 static void 636 sprintc (char *buf, unsigned char c) 637 { 638 if (! isprint (c)) 639 { 640 if (c >= 128) 641 { 642 *buf++ = 'M'; 643 *buf++ = '-'; 644 c -= 128; 645 } 646 if (c < 32) 647 { 648 *buf++ = '^'; 649 c += 64; 650 } 651 else if (c == 127) 652 { 653 *buf++ = '^'; 654 c = '?'; 655 } 656 } 657 658 *buf++ = c; 659 *buf = 0; 660 } 661 662 /* Position file F to ignore_initial[F] bytes from its initial position, 663 and yield its new position. Don't try more than once. */ 664 665 static off_t 666 file_position (int f) 667 { 668 static bool positioned[2]; 669 static off_t position[2]; 670 671 if (! positioned[f]) 672 { 673 positioned[f] = true; 674 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR); 675 } 676 return position[f]; 677 } 678