1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <stddef.h> 84 #include <stdint.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <unistd.h> 89 #include <limits.h> 90 #include <signal.h> 91 92 #include "diff.h" 93 #include "xmalloc.h" 94 95 #define _PATH_PR "/usr/bin/pr" 96 97 #ifdef ST_MTIM_NSEC 98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC) 99 #else 100 # define TIMESPEC_NS(timespec) 0 101 #endif 102 103 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) 104 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 105 106 /* 107 * diff - compare two files. 108 */ 109 110 /* 111 * Uses an algorithm due to Harold Stone, which finds 112 * a pair of longest identical subsequences in the two 113 * files. 114 * 115 * The major goal is to generate the match vector J. 116 * J[i] is the index of the line in file1 corresponding 117 * to line i file0. J[i] = 0 if there is no 118 * such line in file1. 119 * 120 * Lines are hashed so as to work in core. All potential 121 * matches are located by sorting the lines of each file 122 * on the hash (called ``value''). In particular, this 123 * collects the equivalence classes in file1 together. 124 * Subroutine equiv replaces the value of each line in 125 * file0 by the index of the first element of its 126 * matching equivalence in (the reordered) file1. 127 * To save space equiv squeezes file1 into a single 128 * array member in which the equivalence classes 129 * are simply concatenated, except that their first 130 * members are flagged by changing sign. 131 * 132 * Next the indices that point into member are unsorted into 133 * array class according to the original order of file0. 134 * 135 * The cleverness lies in routine stone. This marches 136 * through the lines of file0, developing a vector klist 137 * of "k-candidates". At step i a k-candidate is a matched 138 * pair of lines x,y (x in file0 y in file1) such that 139 * there is a common subsequence of length k 140 * between the first i lines of file0 and the first y 141 * lines of file1, but there is no such subsequence for 142 * any smaller y. x is the earliest possible mate to y 143 * that occurs in such a subsequence. 144 * 145 * Whenever any of the members of the equivalence class of 146 * lines in file1 matable to a line in file0 has serial number 147 * less than the y of some k-candidate, that k-candidate 148 * with the smallest such y is replaced. The new 149 * k-candidate is chained (via pred) to the current 150 * k-1 candidate so that the actual subsequence can 151 * be recovered. When a member has serial number greater 152 * that the y of all k-candidates, the klist is extended. 153 * At the end, the longest subsequence is pulled out 154 * and placed in the array J by unravel 155 * 156 * With J in hand, the matches there recorded are 157 * check'ed against reality to assure that no spurious 158 * matches have crept in due to hashing. If they have, 159 * they are broken, and "jackpot" is recorded--a harmless 160 * matter except that a true match for a spuriously 161 * mated line may now be unnecessarily reported as a change. 162 * 163 * Much of the complexity of the program comes simply 164 * from trying to minimize core utilization and 165 * maximize the range of doable problems by dynamically 166 * allocating what is needed and reusing what is not. 167 * The core requirements for problems larger than somewhat 168 * are (in words) 2*length(file0) + length(file1) + 169 * 3*(number of k-candidates installed), typically about 170 * 6n words for files of length n. 171 */ 172 173 struct cand { 174 int x; 175 int y; 176 int pred; 177 }; 178 179 static struct line { 180 int serial; 181 int value; 182 } *file[2]; 183 184 /* 185 * The following struct is used to record change information when 186 * doing a "context" or "unified" diff. (see routine "change" to 187 * understand the highly mnemonic field names) 188 */ 189 struct context_vec { 190 int a; /* start line in old file */ 191 int b; /* end line in old file */ 192 int c; /* start line in new file */ 193 int d; /* end line in new file */ 194 }; 195 196 #define diff_output printf 197 static void output(char *, FILE *, char *, FILE *, int); 198 static void check(FILE *, FILE *, int); 199 static void range(int, int, const char *); 200 static void uni_range(int, int); 201 static void dump_context_vec(FILE *, FILE *, int); 202 static void dump_unified_vec(FILE *, FILE *, int); 203 static void prepare(int, FILE *, size_t, int); 204 static void prune(void); 205 static void equiv(struct line *, int, struct line *, int, int *); 206 static void unravel(int); 207 static void unsort(struct line *, int, int *); 208 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 209 static void sort(struct line *, int); 210 static void print_header(const char *, const char *); 211 static int ignoreline(char *); 212 static int asciifile(FILE *); 213 static int fetch(long *, int, int, FILE *, int, int, int); 214 static int newcand(int, int, int); 215 static int search(int *, int, int); 216 static int skipline(FILE *); 217 static int isqrt(int); 218 static int stone(int *, int, int *, int *, int); 219 static int readhash(FILE *, int); 220 static int files_differ(FILE *, FILE *, int); 221 static char *match_function(const long *, int, FILE *); 222 static char *preadline(int, size_t, off_t); 223 224 static int *J; /* will be overlaid on class */ 225 static int *class; /* will be overlaid on file[0] */ 226 static int *klist; /* will be overlaid on file[0] after class */ 227 static int *member; /* will be overlaid on file[1] */ 228 static int clen; 229 static int inifdef; /* whether or not we are in a #ifdef block */ 230 static int len[2]; 231 static int pref, suff; /* length of prefix and suffix */ 232 static int slen[2]; 233 static int anychange; 234 static long *ixnew; /* will be overlaid on file[1] */ 235 static long *ixold; /* will be overlaid on klist */ 236 static struct cand *clist; /* merely a free storage pot for candidates */ 237 static int clistlen; /* the length of clist */ 238 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 239 static u_char *chrtran; /* translation table for case-folding */ 240 static struct context_vec *context_vec_start; 241 static struct context_vec *context_vec_end; 242 static struct context_vec *context_vec_ptr; 243 244 #define FUNCTION_CONTEXT_SIZE 55 245 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 246 static int lastline; 247 static int lastmatchline; 248 249 250 /* 251 * chrtran points to one of 2 translation tables: cup2low if folding upper to 252 * lower case clow2low if not folding case 253 */ 254 static u_char clow2low[256] = { 255 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 256 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 257 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 258 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 259 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 260 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 261 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 262 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 263 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 264 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 265 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 266 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 267 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 268 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 269 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 270 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 271 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 272 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 273 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 274 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 275 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 276 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 277 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 278 0xfd, 0xfe, 0xff 279 }; 280 281 static u_char cup2low[256] = { 282 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 283 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 284 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 285 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 286 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 287 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 288 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 289 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 290 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 291 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 292 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 293 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 294 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 295 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 296 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 297 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 298 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 299 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 300 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 301 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 302 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 303 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 304 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 305 0xfd, 0xfe, 0xff 306 }; 307 308 int 309 diffreg(char *file1, char *file2, int flags, int capsicum) 310 { 311 FILE *f1, *f2; 312 int i, rval; 313 int ostdout = -1; 314 int pr_pd, kq; 315 struct kevent *e; 316 cap_rights_t rights_ro; 317 318 e = NULL; 319 kq = -1; 320 f1 = f2 = NULL; 321 rval = D_SAME; 322 anychange = 0; 323 lastline = 0; 324 lastmatchline = 0; 325 context_vec_ptr = context_vec_start - 1; 326 if (flags & D_IGNORECASE) 327 chrtran = cup2low; 328 else 329 chrtran = clow2low; 330 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 331 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 332 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 333 goto closem; 334 335 if (flags & D_EMPTY1) 336 f1 = fopen(_PATH_DEVNULL, "r"); 337 else { 338 if (strcmp(file1, "-") == 0) 339 f1 = stdin; 340 else 341 f1 = fopen(file1, "r"); 342 } 343 if (f1 == NULL) { 344 warn("%s", file1); 345 status |= 2; 346 goto closem; 347 } 348 349 if (flags & D_EMPTY2) 350 f2 = fopen(_PATH_DEVNULL, "r"); 351 else { 352 if (strcmp(file2, "-") == 0) 353 f2 = stdin; 354 else 355 f2 = fopen(file2, "r"); 356 } 357 if (f2 == NULL) { 358 warn("%s", file2); 359 status |= 2; 360 goto closem; 361 } 362 363 if (lflag) { 364 /* redirect stdout to pr */ 365 int pfd[2]; 366 pid_t pid; 367 char *header; 368 369 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 370 signal(SIGPIPE, SIG_IGN); 371 fflush(stdout); 372 rewind(stdout); 373 pipe(pfd); 374 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 375 case -1: 376 status |= 2; 377 free(header); 378 err(2, "No more processes"); 379 case 0: 380 /* child */ 381 if (pfd[0] != STDIN_FILENO) { 382 dup2(pfd[0], STDIN_FILENO); 383 close(pfd[0]); 384 } 385 close(pfd[1]); 386 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 387 _exit(127); 388 default: 389 390 /* parent */ 391 if (pfd[1] != STDOUT_FILENO) { 392 ostdout = dup(STDOUT_FILENO); 393 dup2(pfd[1], STDOUT_FILENO); 394 close(pfd[1]); 395 } 396 close(pfd[0]); 397 rewind(stdout); 398 free(header); 399 kq = kqueue(); 400 if (kq == -1) 401 err(2, "kqueue"); 402 e = xmalloc(sizeof(struct kevent)); 403 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 404 NULL); 405 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 406 err(2, "kevent"); 407 } 408 } 409 410 if (capsicum) { 411 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 412 if (cap_rights_limit(fileno(f1), &rights_ro) < 0) 413 err(2, "unable to limit rights on: %s", file1); 414 if (cap_rights_limit(fileno(f2), &rights_ro) < 0) 415 err(2, "unable to limit rights on: %s", file2); 416 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 417 /* stding has already been limited */ 418 if (caph_limit_stderr() == -1) 419 err(2, "unable to limit stderr"); 420 if (caph_limit_stdout() == -1) 421 err(2, "unable to limit stdout"); 422 } else if (caph_limit_stdio() == -1) 423 err(2, "unable to limit stdio"); 424 425 caph_cache_catpages(); 426 if (cap_enter() < 0 && errno != ENOSYS) 427 err(2, "unable to enter capability mode"); 428 } 429 430 switch (files_differ(f1, f2, flags)) { 431 case 0: 432 goto closem; 433 case 1: 434 break; 435 default: 436 /* error */ 437 status |= 2; 438 goto closem; 439 } 440 441 if ((flags & D_FORCEASCII) == 0 && 442 (!asciifile(f1) || !asciifile(f2))) { 443 rval = D_BINARY; 444 status |= 1; 445 goto closem; 446 } 447 prepare(0, f1, stb1.st_size, flags); 448 prepare(1, f2, stb2.st_size, flags); 449 450 prune(); 451 sort(sfile[0], slen[0]); 452 sort(sfile[1], slen[1]); 453 454 member = (int *)file[1]; 455 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 456 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 457 458 class = (int *)file[0]; 459 unsort(sfile[0], slen[0], class); 460 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 461 462 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 463 clen = 0; 464 clistlen = 100; 465 clist = xcalloc(clistlen, sizeof(*clist)); 466 i = stone(class, slen[0], member, klist, flags); 467 free(member); 468 free(class); 469 470 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 471 unravel(klist[i]); 472 free(clist); 473 free(klist); 474 475 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 476 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 477 check(f1, f2, flags); 478 output(file1, f1, file2, f2, flags); 479 if (ostdout != -1 && e != NULL) { 480 /* close the pipe to pr and restore stdout */ 481 int wstatus; 482 483 fflush(stdout); 484 if (ostdout != STDOUT_FILENO) { 485 close(STDOUT_FILENO); 486 dup2(ostdout, STDOUT_FILENO); 487 close(ostdout); 488 } 489 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 490 err(2, "kevent"); 491 wstatus = e[0].data; 492 close(kq); 493 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 494 errx(2, "pr exited abnormally"); 495 else if (WIFSIGNALED(wstatus)) 496 errx(2, "pr killed by signal %d", 497 WTERMSIG(wstatus)); 498 } 499 500 closem: 501 if (anychange) { 502 status |= 1; 503 if (rval == D_SAME) 504 rval = D_DIFFER; 505 } 506 if (f1 != NULL) 507 fclose(f1); 508 if (f2 != NULL) 509 fclose(f2); 510 511 return (rval); 512 } 513 514 /* 515 * Check to see if the given files differ. 516 * Returns 0 if they are the same, 1 if different, and -1 on error. 517 * XXX - could use code from cmp(1) [faster] 518 */ 519 static int 520 files_differ(FILE *f1, FILE *f2, int flags) 521 { 522 char buf1[BUFSIZ], buf2[BUFSIZ]; 523 size_t i, j; 524 525 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 526 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 527 return (1); 528 for (;;) { 529 i = fread(buf1, 1, sizeof(buf1), f1); 530 j = fread(buf2, 1, sizeof(buf2), f2); 531 if ((!i && ferror(f1)) || (!j && ferror(f2))) 532 return (-1); 533 if (i != j) 534 return (1); 535 if (i == 0) 536 return (0); 537 if (memcmp(buf1, buf2, i) != 0) 538 return (1); 539 } 540 } 541 542 char * 543 splice(char *dir, char *path) 544 { 545 char *tail, *buf; 546 size_t dirlen; 547 548 dirlen = strlen(dir); 549 while (dirlen != 0 && dir[dirlen - 1] == '/') 550 dirlen--; 551 if ((tail = strrchr(path, '/')) == NULL) 552 tail = path; 553 else 554 tail++; 555 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 556 return (buf); 557 } 558 559 static void 560 prepare(int i, FILE *fd, size_t filesize, int flags) 561 { 562 struct line *p; 563 int h; 564 size_t sz, j; 565 566 rewind(fd); 567 568 sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; 569 if (sz < 100) 570 sz = 100; 571 572 p = xcalloc(sz + 3, sizeof(*p)); 573 for (j = 0; (h = readhash(fd, flags));) { 574 if (j == sz) { 575 sz = sz * 3 / 2; 576 p = xreallocarray(p, sz + 3, sizeof(*p)); 577 } 578 p[++j].value = h; 579 } 580 len[i] = j; 581 file[i] = p; 582 } 583 584 static void 585 prune(void) 586 { 587 int i, j; 588 589 for (pref = 0; pref < len[0] && pref < len[1] && 590 file[0][pref + 1].value == file[1][pref + 1].value; 591 pref++) 592 ; 593 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 594 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 595 suff++) 596 ; 597 for (j = 0; j < 2; j++) { 598 sfile[j] = file[j] + pref; 599 slen[j] = len[j] - pref - suff; 600 for (i = 0; i <= slen[j]; i++) 601 sfile[j][i].serial = i; 602 } 603 } 604 605 static void 606 equiv(struct line *a, int n, struct line *b, int m, int *c) 607 { 608 int i, j; 609 610 i = j = 1; 611 while (i <= n && j <= m) { 612 if (a[i].value < b[j].value) 613 a[i++].value = 0; 614 else if (a[i].value == b[j].value) 615 a[i++].value = j; 616 else 617 j++; 618 } 619 while (i <= n) 620 a[i++].value = 0; 621 b[m + 1].value = 0; 622 j = 0; 623 while (++j <= m) { 624 c[j] = -b[j].serial; 625 while (b[j + 1].value == b[j].value) { 626 j++; 627 c[j] = b[j].serial; 628 } 629 } 630 c[j] = -1; 631 } 632 633 /* Code taken from ping.c */ 634 static int 635 isqrt(int n) 636 { 637 int y, x = 1; 638 639 if (n == 0) 640 return (0); 641 642 do { /* newton was a stinker */ 643 y = x; 644 x = n / x; 645 x += y; 646 x /= 2; 647 } while ((x - y) > 1 || (x - y) < -1); 648 649 return (x); 650 } 651 652 static int 653 stone(int *a, int n, int *b, int *c, int flags) 654 { 655 int i, k, y, j, l; 656 int oldc, tc, oldl, sq; 657 u_int numtries, bound; 658 659 if (flags & D_MINIMAL) 660 bound = UINT_MAX; 661 else { 662 sq = isqrt(n); 663 bound = MAXIMUM(256, sq); 664 } 665 666 k = 0; 667 c[0] = newcand(0, 0, 0); 668 for (i = 1; i <= n; i++) { 669 j = a[i]; 670 if (j == 0) 671 continue; 672 y = -b[j]; 673 oldl = 0; 674 oldc = c[0]; 675 numtries = 0; 676 do { 677 if (y <= clist[oldc].y) 678 continue; 679 l = search(c, k, y); 680 if (l != oldl + 1) 681 oldc = c[l - 1]; 682 if (l <= k) { 683 if (clist[c[l]].y <= y) 684 continue; 685 tc = c[l]; 686 c[l] = newcand(i, y, oldc); 687 oldc = tc; 688 oldl = l; 689 numtries++; 690 } else { 691 c[l] = newcand(i, y, oldc); 692 k++; 693 break; 694 } 695 } while ((y = b[++j]) > 0 && numtries < bound); 696 } 697 return (k); 698 } 699 700 static int 701 newcand(int x, int y, int pred) 702 { 703 struct cand *q; 704 705 if (clen == clistlen) { 706 clistlen = clistlen * 11 / 10; 707 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 708 } 709 q = clist + clen; 710 q->x = x; 711 q->y = y; 712 q->pred = pred; 713 return (clen++); 714 } 715 716 static int 717 search(int *c, int k, int y) 718 { 719 int i, j, l, t; 720 721 if (clist[c[k]].y < y) /* quick look for typical case */ 722 return (k + 1); 723 i = 0; 724 j = k + 1; 725 for (;;) { 726 l = (i + j) / 2; 727 if (l <= i) 728 break; 729 t = clist[c[l]].y; 730 if (t > y) 731 j = l; 732 else if (t < y) 733 i = l; 734 else 735 return (l); 736 } 737 return (l + 1); 738 } 739 740 static void 741 unravel(int p) 742 { 743 struct cand *q; 744 int i; 745 746 for (i = 0; i <= len[0]; i++) 747 J[i] = i <= pref ? i : 748 i > len[0] - suff ? i + len[1] - len[0] : 0; 749 for (q = clist + p; q->y != 0; q = clist + q->pred) 750 J[q->x + pref] = q->y + pref; 751 } 752 753 /* 754 * Check does double duty: 755 * 1. ferret out any fortuitous correspondences due 756 * to confounding by hashing (which result in "jackpot") 757 * 2. collect random access indexes to the two files 758 */ 759 static void 760 check(FILE *f1, FILE *f2, int flags) 761 { 762 int i, j, jackpot, c, d; 763 long ctold, ctnew; 764 765 rewind(f1); 766 rewind(f2); 767 j = 1; 768 ixold[0] = ixnew[0] = 0; 769 jackpot = 0; 770 ctold = ctnew = 0; 771 for (i = 1; i <= len[0]; i++) { 772 if (J[i] == 0) { 773 ixold[i] = ctold += skipline(f1); 774 continue; 775 } 776 while (j < J[i]) { 777 ixnew[j] = ctnew += skipline(f2); 778 j++; 779 } 780 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 781 for (;;) { 782 c = getc(f1); 783 d = getc(f2); 784 /* 785 * GNU diff ignores a missing newline 786 * in one file for -b or -w. 787 */ 788 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 789 if (c == EOF && d == '\n') { 790 ctnew++; 791 break; 792 } else if (c == '\n' && d == EOF) { 793 ctold++; 794 break; 795 } 796 } 797 ctold++; 798 ctnew++; 799 if (flags & D_STRIPCR) { 800 if (c == '\r') { 801 if ((c = getc(f1)) == '\n') { 802 ctnew++; 803 break; 804 } 805 } 806 if (d == '\r') { 807 if ((d = getc(f2)) == '\n') { 808 ctold++; 809 break; 810 } 811 } 812 } 813 if ((flags & D_FOLDBLANKS) && isspace(c) && 814 isspace(d)) { 815 do { 816 if (c == '\n') 817 break; 818 ctold++; 819 } while (isspace(c = getc(f1))); 820 do { 821 if (d == '\n') 822 break; 823 ctnew++; 824 } while (isspace(d = getc(f2))); 825 } else if ((flags & D_IGNOREBLANKS)) { 826 while (isspace(c) && c != '\n') { 827 c = getc(f1); 828 ctold++; 829 } 830 while (isspace(d) && d != '\n') { 831 d = getc(f2); 832 ctnew++; 833 } 834 } 835 if (chrtran[c] != chrtran[d]) { 836 jackpot++; 837 J[i] = 0; 838 if (c != '\n' && c != EOF) 839 ctold += skipline(f1); 840 if (d != '\n' && c != EOF) 841 ctnew += skipline(f2); 842 break; 843 } 844 if (c == '\n' || c == EOF) 845 break; 846 } 847 } else { 848 for (;;) { 849 ctold++; 850 ctnew++; 851 if ((c = getc(f1)) != (d = getc(f2))) { 852 /* jackpot++; */ 853 J[i] = 0; 854 if (c != '\n' && c != EOF) 855 ctold += skipline(f1); 856 if (d != '\n' && c != EOF) 857 ctnew += skipline(f2); 858 break; 859 } 860 if (c == '\n' || c == EOF) 861 break; 862 } 863 } 864 ixold[i] = ctold; 865 ixnew[j] = ctnew; 866 j++; 867 } 868 for (; j <= len[1]; j++) { 869 ixnew[j] = ctnew += skipline(f2); 870 } 871 /* 872 * if (jackpot) 873 * fprintf(stderr, "jackpot\n"); 874 */ 875 } 876 877 /* shellsort CACM #201 */ 878 static void 879 sort(struct line *a, int n) 880 { 881 struct line *ai, *aim, w; 882 int j, m = 0, k; 883 884 if (n == 0) 885 return; 886 for (j = 1; j <= n; j *= 2) 887 m = 2 * j - 1; 888 for (m /= 2; m != 0; m /= 2) { 889 k = n - m; 890 for (j = 1; j <= k; j++) { 891 for (ai = &a[j]; ai > a; ai -= m) { 892 aim = &ai[m]; 893 if (aim < ai) 894 break; /* wraparound */ 895 if (aim->value > ai[0].value || 896 (aim->value == ai[0].value && 897 aim->serial > ai[0].serial)) 898 break; 899 w.value = ai[0].value; 900 ai[0].value = aim->value; 901 aim->value = w.value; 902 w.serial = ai[0].serial; 903 ai[0].serial = aim->serial; 904 aim->serial = w.serial; 905 } 906 } 907 } 908 } 909 910 static void 911 unsort(struct line *f, int l, int *b) 912 { 913 int *a, i; 914 915 a = xcalloc(l + 1, sizeof(*a)); 916 for (i = 1; i <= l; i++) 917 a[f[i].serial] = f[i].value; 918 for (i = 1; i <= l; i++) 919 b[i] = a[i]; 920 free(a); 921 } 922 923 static int 924 skipline(FILE *f) 925 { 926 int i, c; 927 928 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 929 continue; 930 return (i); 931 } 932 933 static void 934 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 935 { 936 int m, i0, i1, j0, j1; 937 938 rewind(f1); 939 rewind(f2); 940 m = len[0]; 941 J[0] = 0; 942 J[m + 1] = len[1] + 1; 943 if (diff_format != D_EDIT) { 944 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 945 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 946 i0++; 947 j0 = J[i0 - 1] + 1; 948 i1 = i0 - 1; 949 while (i1 < m && J[i1 + 1] == 0) 950 i1++; 951 j1 = J[i1 + 1] - 1; 952 J[i1] = j1; 953 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 954 } 955 } else { 956 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 957 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 958 i0--; 959 j0 = J[i0 + 1] - 1; 960 i1 = i0 + 1; 961 while (i1 > 1 && J[i1 - 1] == 0) 962 i1--; 963 j1 = J[i1 - 1] + 1; 964 J[i1] = j1; 965 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 966 } 967 } 968 if (m == 0) 969 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 970 if (diff_format == D_IFDEF) { 971 for (;;) { 972 #define c i0 973 if ((c = getc(f1)) == EOF) 974 return; 975 diff_output("%c", c); 976 } 977 #undef c 978 } 979 if (anychange != 0) { 980 if (diff_format == D_CONTEXT) 981 dump_context_vec(f1, f2, flags); 982 else if (diff_format == D_UNIFIED) 983 dump_unified_vec(f1, f2, flags); 984 } 985 } 986 987 static void 988 range(int a, int b, const char *separator) 989 { 990 diff_output("%d", a > b ? b : a); 991 if (a < b) 992 diff_output("%s%d", separator, b); 993 } 994 995 static void 996 uni_range(int a, int b) 997 { 998 if (a < b) 999 diff_output("%d,%d", a, b - a + 1); 1000 else if (a == b) 1001 diff_output("%d", b); 1002 else 1003 diff_output("%d,0", b); 1004 } 1005 1006 static char * 1007 preadline(int fd, size_t rlen, off_t off) 1008 { 1009 char *line; 1010 ssize_t nr; 1011 1012 line = xmalloc(rlen + 1); 1013 if ((nr = pread(fd, line, rlen, off)) < 0) 1014 err(2, "preadline"); 1015 if (nr > 0 && line[nr-1] == '\n') 1016 nr--; 1017 line[nr] = '\0'; 1018 return (line); 1019 } 1020 1021 static int 1022 ignoreline(char *line) 1023 { 1024 int ret; 1025 1026 ret = regexec(&ignore_re, line, 0, NULL, 0); 1027 free(line); 1028 return (ret == 0); /* if it matched, it should be ignored. */ 1029 } 1030 1031 /* 1032 * Indicate that there is a difference between lines a and b of the from file 1033 * to get to lines c to d of the to file. If a is greater then b then there 1034 * are no lines in the from file involved and this means that there were 1035 * lines appended (beginning at b). If c is greater than d then there are 1036 * lines missing from the to file. 1037 */ 1038 static void 1039 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1040 int *pflags) 1041 { 1042 static size_t max_context = 64; 1043 int i; 1044 1045 restart: 1046 if (diff_format != D_IFDEF && a > b && c > d) 1047 return; 1048 if (ignore_pats != NULL) { 1049 char *line; 1050 /* 1051 * All lines in the change, insert, or delete must 1052 * match an ignore pattern for the change to be 1053 * ignored. 1054 */ 1055 if (a <= b) { /* Changes and deletes. */ 1056 for (i = a; i <= b; i++) { 1057 line = preadline(fileno(f1), 1058 ixold[i] - ixold[i - 1], ixold[i - 1]); 1059 if (!ignoreline(line)) 1060 goto proceed; 1061 } 1062 } 1063 if (a > b || c <= d) { /* Changes and inserts. */ 1064 for (i = c; i <= d; i++) { 1065 line = preadline(fileno(f2), 1066 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1067 if (!ignoreline(line)) 1068 goto proceed; 1069 } 1070 } 1071 return; 1072 } 1073 proceed: 1074 if (*pflags & D_HEADER) { 1075 diff_output("%s %s %s\n", diffargs, file1, file2); 1076 *pflags &= ~D_HEADER; 1077 } 1078 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1079 /* 1080 * Allocate change records as needed. 1081 */ 1082 if (context_vec_ptr == context_vec_end - 1) { 1083 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1084 max_context <<= 1; 1085 context_vec_start = xreallocarray(context_vec_start, 1086 max_context, sizeof(*context_vec_start)); 1087 context_vec_end = context_vec_start + max_context; 1088 context_vec_ptr = context_vec_start + offset; 1089 } 1090 if (anychange == 0) { 1091 /* 1092 * Print the context/unidiff header first time through. 1093 */ 1094 print_header(file1, file2); 1095 anychange = 1; 1096 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1097 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1098 /* 1099 * If this change is more than 'diff_context' lines from the 1100 * previous change, dump the record and reset it. 1101 */ 1102 if (diff_format == D_CONTEXT) 1103 dump_context_vec(f1, f2, *pflags); 1104 else 1105 dump_unified_vec(f1, f2, *pflags); 1106 } 1107 context_vec_ptr++; 1108 context_vec_ptr->a = a; 1109 context_vec_ptr->b = b; 1110 context_vec_ptr->c = c; 1111 context_vec_ptr->d = d; 1112 return; 1113 } 1114 if (anychange == 0) 1115 anychange = 1; 1116 switch (diff_format) { 1117 case D_BRIEF: 1118 return; 1119 case D_NORMAL: 1120 case D_EDIT: 1121 range(a, b, ","); 1122 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1123 if (diff_format == D_NORMAL) 1124 range(c, d, ","); 1125 diff_output("\n"); 1126 break; 1127 case D_REVERSE: 1128 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1129 range(a, b, " "); 1130 diff_output("\n"); 1131 break; 1132 case D_NREVERSE: 1133 if (a > b) 1134 diff_output("a%d %d\n", b, d - c + 1); 1135 else { 1136 diff_output("d%d %d\n", a, b - a + 1); 1137 if (!(c > d)) 1138 /* add changed lines */ 1139 diff_output("a%d %d\n", b, d - c + 1); 1140 } 1141 break; 1142 } 1143 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1144 fetch(ixold, a, b, f1, '<', 1, *pflags); 1145 if (a <= b && c <= d && diff_format == D_NORMAL) 1146 diff_output("---\n"); 1147 } 1148 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1149 if (i != 0 && diff_format == D_EDIT) { 1150 /* 1151 * A non-zero return value for D_EDIT indicates that the 1152 * last line printed was a bare dot (".") that has been 1153 * escaped as ".." to prevent ed(1) from misinterpreting 1154 * it. We have to add a substitute command to change this 1155 * back and restart where we left off. 1156 */ 1157 diff_output(".\n"); 1158 diff_output("%ds/.//\n", a + i - 1); 1159 b = a + i - 1; 1160 a = b + 1; 1161 c += i; 1162 goto restart; 1163 } 1164 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1165 diff_output(".\n"); 1166 if (inifdef) { 1167 diff_output("#endif /* %s */\n", ifdefname); 1168 inifdef = 0; 1169 } 1170 } 1171 1172 static int 1173 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1174 { 1175 int i, j, c, lastc, col, nc; 1176 int newcol; 1177 1178 /* 1179 * When doing #ifdef's, copy down to current line 1180 * if this is the first file, so that stuff makes it to output. 1181 */ 1182 if (diff_format == D_IFDEF && oldfile) { 1183 long curpos = ftell(lb); 1184 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1185 nc = f[a > b ? b : a - 1] - curpos; 1186 for (i = 0; i < nc; i++) 1187 diff_output("%c", getc(lb)); 1188 } 1189 if (a > b) 1190 return (0); 1191 if (diff_format == D_IFDEF) { 1192 if (inifdef) { 1193 diff_output("#else /* %s%s */\n", 1194 oldfile == 1 ? "!" : "", ifdefname); 1195 } else { 1196 if (oldfile) 1197 diff_output("#ifndef %s\n", ifdefname); 1198 else 1199 diff_output("#ifdef %s\n", ifdefname); 1200 } 1201 inifdef = 1 + oldfile; 1202 } 1203 for (i = a; i <= b; i++) { 1204 fseek(lb, f[i - 1], SEEK_SET); 1205 nc = f[i] - f[i - 1]; 1206 if (diff_format != D_IFDEF && ch != '\0') { 1207 diff_output("%c", ch); 1208 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1209 || diff_format == D_UNIFIED)) 1210 diff_output("\t"); 1211 else if (diff_format != D_UNIFIED) 1212 diff_output(" "); 1213 } 1214 col = 0; 1215 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1216 if ((c = getc(lb)) == EOF) { 1217 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1218 diff_format == D_NREVERSE) 1219 warnx("No newline at end of file"); 1220 else 1221 diff_output("\n\\ No newline at end of " 1222 "file\n"); 1223 return (0); 1224 } 1225 if (c == '\t' && (flags & D_EXPANDTABS)) { 1226 newcol = ((col/tabsize)+1)*tabsize; 1227 do { 1228 diff_output(" "); 1229 } while (++col < newcol); 1230 } else { 1231 if (diff_format == D_EDIT && j == 1 && c == '\n' 1232 && lastc == '.') { 1233 /* 1234 * Don't print a bare "." line 1235 * since that will confuse ed(1). 1236 * Print ".." instead and return, 1237 * giving the caller an offset 1238 * from which to restart. 1239 */ 1240 diff_output(".\n"); 1241 return (i - a + 1); 1242 } 1243 diff_output("%c", c); 1244 col++; 1245 } 1246 } 1247 } 1248 return (0); 1249 } 1250 1251 /* 1252 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1253 */ 1254 static int 1255 readhash(FILE *f, int flags) 1256 { 1257 int i, t, space; 1258 int sum; 1259 1260 sum = 1; 1261 space = 0; 1262 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1263 if (flags & D_IGNORECASE) 1264 for (i = 0; (t = getc(f)) != '\n'; i++) { 1265 if (flags & D_STRIPCR && t == '\r') { 1266 t = getc(f); 1267 if (t == '\n') 1268 break; 1269 ungetc(t, f); 1270 } 1271 if (t == EOF) { 1272 if (i == 0) 1273 return (0); 1274 break; 1275 } 1276 sum = sum * 127 + chrtran[t]; 1277 } 1278 else 1279 for (i = 0; (t = getc(f)) != '\n'; i++) { 1280 if (flags & D_STRIPCR && t == '\r') { 1281 t = getc(f); 1282 if (t == '\n') 1283 break; 1284 ungetc(t, f); 1285 } 1286 if (t == EOF) { 1287 if (i == 0) 1288 return (0); 1289 break; 1290 } 1291 sum = sum * 127 + t; 1292 } 1293 } else { 1294 for (i = 0;;) { 1295 switch (t = getc(f)) { 1296 case '\r': 1297 case '\t': 1298 case '\v': 1299 case '\f': 1300 case ' ': 1301 space++; 1302 continue; 1303 default: 1304 if (space && (flags & D_IGNOREBLANKS) == 0) { 1305 i++; 1306 space = 0; 1307 } 1308 sum = sum * 127 + chrtran[t]; 1309 i++; 1310 continue; 1311 case EOF: 1312 if (i == 0) 1313 return (0); 1314 /* FALLTHROUGH */ 1315 case '\n': 1316 break; 1317 } 1318 break; 1319 } 1320 } 1321 /* 1322 * There is a remote possibility that we end up with a zero sum. 1323 * Zero is used as an EOF marker, so return 1 instead. 1324 */ 1325 return (sum == 0 ? 1 : sum); 1326 } 1327 1328 static int 1329 asciifile(FILE *f) 1330 { 1331 unsigned char buf[BUFSIZ]; 1332 size_t cnt; 1333 1334 if (f == NULL) 1335 return (1); 1336 1337 rewind(f); 1338 cnt = fread(buf, 1, sizeof(buf), f); 1339 return (memchr(buf, '\0', cnt) == NULL); 1340 } 1341 1342 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1343 1344 static char * 1345 match_function(const long *f, int pos, FILE *fp) 1346 { 1347 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1348 size_t nc; 1349 int last = lastline; 1350 const char *state = NULL; 1351 1352 lastline = pos; 1353 while (pos > last) { 1354 fseek(fp, f[pos - 1], SEEK_SET); 1355 nc = f[pos] - f[pos - 1]; 1356 if (nc >= sizeof(buf)) 1357 nc = sizeof(buf) - 1; 1358 nc = fread(buf, 1, nc, fp); 1359 if (nc > 0) { 1360 buf[nc] = '\0'; 1361 buf[strcspn(buf, "\n")] = '\0'; 1362 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1363 if (begins_with(buf, "private:")) { 1364 if (!state) 1365 state = " (private)"; 1366 } else if (begins_with(buf, "protected:")) { 1367 if (!state) 1368 state = " (protected)"; 1369 } else if (begins_with(buf, "public:")) { 1370 if (!state) 1371 state = " (public)"; 1372 } else { 1373 strlcpy(lastbuf, buf, sizeof lastbuf); 1374 if (state) 1375 strlcat(lastbuf, state, 1376 sizeof lastbuf); 1377 lastmatchline = pos; 1378 return lastbuf; 1379 } 1380 } 1381 } 1382 pos--; 1383 } 1384 return lastmatchline > 0 ? lastbuf : NULL; 1385 } 1386 1387 /* dump accumulated "context" diff changes */ 1388 static void 1389 dump_context_vec(FILE *f1, FILE *f2, int flags) 1390 { 1391 struct context_vec *cvp = context_vec_start; 1392 int lowa, upb, lowc, upd, do_output; 1393 int a, b, c, d; 1394 char ch, *f; 1395 1396 if (context_vec_start > context_vec_ptr) 1397 return; 1398 1399 b = d = 0; /* gcc */ 1400 lowa = MAXIMUM(1, cvp->a - diff_context); 1401 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1402 lowc = MAXIMUM(1, cvp->c - diff_context); 1403 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1404 1405 diff_output("***************"); 1406 if ((flags & D_PROTOTYPE)) { 1407 f = match_function(ixold, lowa-1, f1); 1408 if (f != NULL) 1409 diff_output(" %s", f); 1410 } 1411 diff_output("\n*** "); 1412 range(lowa, upb, ","); 1413 diff_output(" ****\n"); 1414 1415 /* 1416 * Output changes to the "old" file. The first loop suppresses 1417 * output if there were no changes to the "old" file (we'll see 1418 * the "old" lines as context in the "new" list). 1419 */ 1420 do_output = 0; 1421 for (; cvp <= context_vec_ptr; cvp++) 1422 if (cvp->a <= cvp->b) { 1423 cvp = context_vec_start; 1424 do_output++; 1425 break; 1426 } 1427 if (do_output) { 1428 while (cvp <= context_vec_ptr) { 1429 a = cvp->a; 1430 b = cvp->b; 1431 c = cvp->c; 1432 d = cvp->d; 1433 1434 if (a <= b && c <= d) 1435 ch = 'c'; 1436 else 1437 ch = (a <= b) ? 'd' : 'a'; 1438 1439 if (ch == 'a') 1440 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1441 else { 1442 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1443 fetch(ixold, a, b, f1, 1444 ch == 'c' ? '!' : '-', 0, flags); 1445 } 1446 lowa = b + 1; 1447 cvp++; 1448 } 1449 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1450 } 1451 /* output changes to the "new" file */ 1452 diff_output("--- "); 1453 range(lowc, upd, ","); 1454 diff_output(" ----\n"); 1455 1456 do_output = 0; 1457 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1458 if (cvp->c <= cvp->d) { 1459 cvp = context_vec_start; 1460 do_output++; 1461 break; 1462 } 1463 if (do_output) { 1464 while (cvp <= context_vec_ptr) { 1465 a = cvp->a; 1466 b = cvp->b; 1467 c = cvp->c; 1468 d = cvp->d; 1469 1470 if (a <= b && c <= d) 1471 ch = 'c'; 1472 else 1473 ch = (a <= b) ? 'd' : 'a'; 1474 1475 if (ch == 'd') 1476 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1477 else { 1478 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1479 fetch(ixnew, c, d, f2, 1480 ch == 'c' ? '!' : '+', 0, flags); 1481 } 1482 lowc = d + 1; 1483 cvp++; 1484 } 1485 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1486 } 1487 context_vec_ptr = context_vec_start - 1; 1488 } 1489 1490 /* dump accumulated "unified" diff changes */ 1491 static void 1492 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1493 { 1494 struct context_vec *cvp = context_vec_start; 1495 int lowa, upb, lowc, upd; 1496 int a, b, c, d; 1497 char ch, *f; 1498 1499 if (context_vec_start > context_vec_ptr) 1500 return; 1501 1502 b = d = 0; /* gcc */ 1503 lowa = MAXIMUM(1, cvp->a - diff_context); 1504 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1505 lowc = MAXIMUM(1, cvp->c - diff_context); 1506 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1507 1508 diff_output("@@ -"); 1509 uni_range(lowa, upb); 1510 diff_output(" +"); 1511 uni_range(lowc, upd); 1512 diff_output(" @@"); 1513 if ((flags & D_PROTOTYPE)) { 1514 f = match_function(ixold, lowa-1, f1); 1515 if (f != NULL) 1516 diff_output(" %s", f); 1517 } 1518 diff_output("\n"); 1519 1520 /* 1521 * Output changes in "unified" diff format--the old and new lines 1522 * are printed together. 1523 */ 1524 for (; cvp <= context_vec_ptr; cvp++) { 1525 a = cvp->a; 1526 b = cvp->b; 1527 c = cvp->c; 1528 d = cvp->d; 1529 1530 /* 1531 * c: both new and old changes 1532 * d: only changes in the old file 1533 * a: only changes in the new file 1534 */ 1535 if (a <= b && c <= d) 1536 ch = 'c'; 1537 else 1538 ch = (a <= b) ? 'd' : 'a'; 1539 1540 switch (ch) { 1541 case 'c': 1542 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1543 fetch(ixold, a, b, f1, '-', 0, flags); 1544 fetch(ixnew, c, d, f2, '+', 0, flags); 1545 break; 1546 case 'd': 1547 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1548 fetch(ixold, a, b, f1, '-', 0, flags); 1549 break; 1550 case 'a': 1551 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1552 fetch(ixnew, c, d, f2, '+', 0, flags); 1553 break; 1554 } 1555 lowa = b + 1; 1556 lowc = d + 1; 1557 } 1558 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1559 1560 context_vec_ptr = context_vec_start - 1; 1561 } 1562 1563 static void 1564 print_header(const char *file1, const char *file2) 1565 { 1566 const char *time_format; 1567 char buf1[256]; 1568 char buf2[256]; 1569 char end1[10]; 1570 char end2[10]; 1571 struct tm *tm_ptr1, *tm_ptr2; 1572 int nsec1 = TIMESPEC_NS (stb1.st_mtime); 1573 int nsec2 = TIMESPEC_NS (stb2.st_mtime); 1574 1575 #ifdef ST_MTIM_NSEC 1576 time_format = "%Y-%m-%d %H:%M:%S.%N"; 1577 #else 1578 time_format = "%Y-%m-%d %H:%M:%S"; 1579 #endif 1580 1581 if (cflag) 1582 time_format = "%c"; 1583 tm_ptr1 = localtime(&stb1.st_mtime); 1584 tm_ptr2 = localtime(&stb2.st_mtime); 1585 strftime(buf1, 256, time_format, tm_ptr1); 1586 strftime(buf2, 256, time_format, tm_ptr2); 1587 if (!cflag) { 1588 strftime(end1, 10, "%z", tm_ptr1); 1589 strftime(end2, 10, "%z", tm_ptr2); 1590 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1591 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1592 } 1593 if (label[0] != NULL) 1594 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1595 label[0]); 1596 else 1597 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1598 file1, buf1); 1599 if (label[1] != NULL) 1600 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1601 label[1]); 1602 else 1603 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1604 file2, buf2); 1605 } 1606