1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <stddef.h> 84 #include <stdint.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <unistd.h> 89 #include <limits.h> 90 #include <signal.h> 91 92 #include "diff.h" 93 #include "xmalloc.h" 94 95 #define _PATH_PR "/usr/bin/pr" 96 97 /* 98 * diff - compare two files. 99 */ 100 101 /* 102 * Uses an algorithm due to Harold Stone, which finds 103 * a pair of longest identical subsequences in the two 104 * files. 105 * 106 * The major goal is to generate the match vector J. 107 * J[i] is the index of the line in file1 corresponding 108 * to line i file0. J[i] = 0 if there is no 109 * such line in file1. 110 * 111 * Lines are hashed so as to work in core. All potential 112 * matches are located by sorting the lines of each file 113 * on the hash (called ``value''). In particular, this 114 * collects the equivalence classes in file1 together. 115 * Subroutine equiv replaces the value of each line in 116 * file0 by the index of the first element of its 117 * matching equivalence in (the reordered) file1. 118 * To save space equiv squeezes file1 into a single 119 * array member in which the equivalence classes 120 * are simply concatenated, except that their first 121 * members are flagged by changing sign. 122 * 123 * Next the indices that point into member are unsorted into 124 * array class according to the original order of file0. 125 * 126 * The cleverness lies in routine stone. This marches 127 * through the lines of file0, developing a vector klist 128 * of "k-candidates". At step i a k-candidate is a matched 129 * pair of lines x,y (x in file0 y in file1) such that 130 * there is a common subsequence of length k 131 * between the first i lines of file0 and the first y 132 * lines of file1, but there is no such subsequence for 133 * any smaller y. x is the earliest possible mate to y 134 * that occurs in such a subsequence. 135 * 136 * Whenever any of the members of the equivalence class of 137 * lines in file1 matable to a line in file0 has serial number 138 * less than the y of some k-candidate, that k-candidate 139 * with the smallest such y is replaced. The new 140 * k-candidate is chained (via pred) to the current 141 * k-1 candidate so that the actual subsequence can 142 * be recovered. When a member has serial number greater 143 * that the y of all k-candidates, the klist is extended. 144 * At the end, the longest subsequence is pulled out 145 * and placed in the array J by unravel 146 * 147 * With J in hand, the matches there recorded are 148 * check'ed against reality to assure that no spurious 149 * matches have crept in due to hashing. If they have, 150 * they are broken, and "jackpot" is recorded--a harmless 151 * matter except that a true match for a spuriously 152 * mated line may now be unnecessarily reported as a change. 153 * 154 * Much of the complexity of the program comes simply 155 * from trying to minimize core utilization and 156 * maximize the range of doable problems by dynamically 157 * allocating what is needed and reusing what is not. 158 * The core requirements for problems larger than somewhat 159 * are (in words) 2*length(file0) + length(file1) + 160 * 3*(number of k-candidates installed), typically about 161 * 6n words for files of length n. 162 */ 163 164 struct cand { 165 int x; 166 int y; 167 int pred; 168 }; 169 170 static struct line { 171 int serial; 172 int value; 173 } *file[2]; 174 175 /* 176 * The following struct is used to record change information when 177 * doing a "context" or "unified" diff. (see routine "change" to 178 * understand the highly mnemonic field names) 179 */ 180 struct context_vec { 181 int a; /* start line in old file */ 182 int b; /* end line in old file */ 183 int c; /* start line in new file */ 184 int d; /* end line in new file */ 185 }; 186 187 #define diff_output printf 188 static FILE *opentemp(const char *); 189 static void output(char *, FILE *, char *, FILE *, int); 190 static void check(FILE *, FILE *, int); 191 static void range(int, int, const char *); 192 static void uni_range(int, int); 193 static void dump_context_vec(FILE *, FILE *, int); 194 static void dump_unified_vec(FILE *, FILE *, int); 195 static void prepare(int, FILE *, size_t, int); 196 static void prune(void); 197 static void equiv(struct line *, int, struct line *, int, int *); 198 static void unravel(int); 199 static void unsort(struct line *, int, int *); 200 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 201 static void sort(struct line *, int); 202 static void print_header(const char *, const char *); 203 static int ignoreline(char *); 204 static int asciifile(FILE *); 205 static int fetch(long *, int, int, FILE *, int, int, int); 206 static int newcand(int, int, int); 207 static int search(int *, int, int); 208 static int skipline(FILE *); 209 static int isqrt(int); 210 static int stone(int *, int, int *, int *, int); 211 static int readhash(FILE *, int); 212 static int files_differ(FILE *, FILE *, int); 213 static char *match_function(const long *, int, FILE *); 214 static char *preadline(int, size_t, off_t); 215 216 static int *J; /* will be overlaid on class */ 217 static int *class; /* will be overlaid on file[0] */ 218 static int *klist; /* will be overlaid on file[0] after class */ 219 static int *member; /* will be overlaid on file[1] */ 220 static int clen; 221 static int inifdef; /* whether or not we are in a #ifdef block */ 222 static int len[2]; 223 static int pref, suff; /* length of prefix and suffix */ 224 static int slen[2]; 225 static int anychange; 226 static long *ixnew; /* will be overlaid on file[1] */ 227 static long *ixold; /* will be overlaid on klist */ 228 static struct cand *clist; /* merely a free storage pot for candidates */ 229 static int clistlen; /* the length of clist */ 230 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 231 static u_char *chrtran; /* translation table for case-folding */ 232 static struct context_vec *context_vec_start; 233 static struct context_vec *context_vec_end; 234 static struct context_vec *context_vec_ptr; 235 236 #define FUNCTION_CONTEXT_SIZE 55 237 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 238 static int lastline; 239 static int lastmatchline; 240 241 242 /* 243 * chrtran points to one of 2 translation tables: cup2low if folding upper to 244 * lower case clow2low if not folding case 245 */ 246 static u_char clow2low[256] = { 247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 248 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 249 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 250 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 251 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 252 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 253 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 254 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 255 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 256 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 257 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 258 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 259 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 260 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 261 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 262 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 263 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 264 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 265 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 266 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 267 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 268 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 269 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 270 0xfd, 0xfe, 0xff 271 }; 272 273 static u_char cup2low[256] = { 274 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 275 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 276 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 277 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 278 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 279 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 280 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 281 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 282 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 283 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 284 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 285 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 286 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 287 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 288 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 289 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 290 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 291 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 292 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 293 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 294 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 295 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 296 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 297 0xfd, 0xfe, 0xff 298 }; 299 300 int 301 diffreg(char *file1, char *file2, int flags, int capsicum) 302 { 303 FILE *f1, *f2; 304 int i, rval; 305 int ostdout = -1; 306 int pr_pd, kq; 307 struct kevent *e; 308 cap_rights_t rights_ro; 309 310 e = NULL; 311 kq = -1; 312 f1 = f2 = NULL; 313 rval = D_SAME; 314 anychange = 0; 315 lastline = 0; 316 lastmatchline = 0; 317 context_vec_ptr = context_vec_start - 1; 318 if (flags & D_IGNORECASE) 319 chrtran = cup2low; 320 else 321 chrtran = clow2low; 322 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 323 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 324 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 325 goto closem; 326 327 if (flags & D_EMPTY1) 328 f1 = fopen(_PATH_DEVNULL, "r"); 329 else { 330 if (!S_ISREG(stb1.st_mode)) { 331 if ((f1 = opentemp(file1)) == NULL || 332 fstat(fileno(f1), &stb1) < 0) { 333 warn("%s", file1); 334 status |= 2; 335 goto closem; 336 } 337 } else if (strcmp(file1, "-") == 0) 338 f1 = stdin; 339 else 340 f1 = fopen(file1, "r"); 341 } 342 if (f1 == NULL) { 343 warn("%s", file1); 344 status |= 2; 345 goto closem; 346 } 347 348 if (flags & D_EMPTY2) 349 f2 = fopen(_PATH_DEVNULL, "r"); 350 else { 351 if (!S_ISREG(stb2.st_mode)) { 352 if ((f2 = opentemp(file2)) == NULL || 353 fstat(fileno(f2), &stb2) < 0) { 354 warn("%s", file2); 355 status |= 2; 356 goto closem; 357 } 358 } else if (strcmp(file2, "-") == 0) 359 f2 = stdin; 360 else 361 f2 = fopen(file2, "r"); 362 } 363 if (f2 == NULL) { 364 warn("%s", file2); 365 status |= 2; 366 goto closem; 367 } 368 369 if (lflag) { 370 /* redirect stdout to pr */ 371 int pfd[2]; 372 pid_t pid; 373 char *header; 374 375 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 376 signal(SIGPIPE, SIG_IGN); 377 fflush(stdout); 378 rewind(stdout); 379 pipe(pfd); 380 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 381 case -1: 382 status |= 2; 383 free(header); 384 err(2, "No more processes"); 385 case 0: 386 /* child */ 387 if (pfd[0] != STDIN_FILENO) { 388 dup2(pfd[0], STDIN_FILENO); 389 close(pfd[0]); 390 } 391 close(pfd[1]); 392 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 393 _exit(127); 394 default: 395 396 /* parent */ 397 if (pfd[1] != STDOUT_FILENO) { 398 ostdout = dup(STDOUT_FILENO); 399 dup2(pfd[1], STDOUT_FILENO); 400 close(pfd[1]); 401 } 402 close(pfd[0]); 403 rewind(stdout); 404 free(header); 405 kq = kqueue(); 406 if (kq == -1) 407 err(2, "kqueue"); 408 e = xmalloc(sizeof(struct kevent)); 409 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 410 NULL); 411 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 412 err(2, "kevent"); 413 } 414 } 415 416 if (capsicum) { 417 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 418 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 419 && errno != ENOSYS) 420 err(2, "unable to limit rights on: %s", file1); 421 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 422 errno != ENOSYS) 423 err(2, "unable to limit rights on: %s", file2); 424 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 425 /* stding has already been limited */ 426 if (caph_limit_stderr() == -1) 427 err(2, "unable to limit stderr"); 428 if (caph_limit_stdout() == -1) 429 err(2, "unable to limit stdout"); 430 } else if (caph_limit_stdio() == -1) 431 err(2, "unable to limit stdio"); 432 433 caph_cache_catpages(); 434 caph_cache_tzdata(); 435 if (cap_enter() < 0 && errno != ENOSYS) 436 err(2, "unable to enter capability mode"); 437 } 438 439 switch (files_differ(f1, f2, flags)) { 440 case 0: 441 goto closem; 442 case 1: 443 break; 444 default: 445 /* error */ 446 status |= 2; 447 goto closem; 448 } 449 450 if ((flags & D_FORCEASCII) == 0 && 451 (!asciifile(f1) || !asciifile(f2))) { 452 rval = D_BINARY; 453 status |= 1; 454 goto closem; 455 } 456 prepare(0, f1, stb1.st_size, flags); 457 prepare(1, f2, stb2.st_size, flags); 458 459 prune(); 460 sort(sfile[0], slen[0]); 461 sort(sfile[1], slen[1]); 462 463 member = (int *)file[1]; 464 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 465 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 466 467 class = (int *)file[0]; 468 unsort(sfile[0], slen[0], class); 469 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 470 471 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 472 clen = 0; 473 clistlen = 100; 474 clist = xcalloc(clistlen, sizeof(*clist)); 475 i = stone(class, slen[0], member, klist, flags); 476 free(member); 477 free(class); 478 479 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 480 unravel(klist[i]); 481 free(clist); 482 free(klist); 483 484 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 485 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 486 check(f1, f2, flags); 487 output(file1, f1, file2, f2, flags); 488 if (ostdout != -1 && e != NULL) { 489 /* close the pipe to pr and restore stdout */ 490 int wstatus; 491 492 fflush(stdout); 493 if (ostdout != STDOUT_FILENO) { 494 close(STDOUT_FILENO); 495 dup2(ostdout, STDOUT_FILENO); 496 close(ostdout); 497 } 498 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 499 err(2, "kevent"); 500 wstatus = e[0].data; 501 close(kq); 502 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 503 errx(2, "pr exited abnormally"); 504 else if (WIFSIGNALED(wstatus)) 505 errx(2, "pr killed by signal %d", 506 WTERMSIG(wstatus)); 507 } 508 509 closem: 510 if (anychange) { 511 status |= 1; 512 if (rval == D_SAME) 513 rval = D_DIFFER; 514 } 515 if (f1 != NULL) 516 fclose(f1); 517 if (f2 != NULL) 518 fclose(f2); 519 520 return (rval); 521 } 522 523 /* 524 * Check to see if the given files differ. 525 * Returns 0 if they are the same, 1 if different, and -1 on error. 526 * XXX - could use code from cmp(1) [faster] 527 */ 528 static int 529 files_differ(FILE *f1, FILE *f2, int flags) 530 { 531 char buf1[BUFSIZ], buf2[BUFSIZ]; 532 size_t i, j; 533 534 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 535 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 536 return (1); 537 for (;;) { 538 i = fread(buf1, 1, sizeof(buf1), f1); 539 j = fread(buf2, 1, sizeof(buf2), f2); 540 if ((!i && ferror(f1)) || (!j && ferror(f2))) 541 return (-1); 542 if (i != j) 543 return (1); 544 if (i == 0) 545 return (0); 546 if (memcmp(buf1, buf2, i) != 0) 547 return (1); 548 } 549 } 550 551 static FILE * 552 opentemp(const char *f) 553 { 554 char buf[BUFSIZ], tempfile[PATH_MAX]; 555 ssize_t nread; 556 int ifd, ofd; 557 558 if (strcmp(f, "-") == 0) 559 ifd = STDIN_FILENO; 560 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 561 return (NULL); 562 563 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 564 565 if ((ofd = mkstemp(tempfile)) < 0) { 566 close(ifd); 567 return (NULL); 568 } 569 unlink(tempfile); 570 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 571 if (write(ofd, buf, nread) != nread) { 572 close(ifd); 573 close(ofd); 574 return (NULL); 575 } 576 } 577 close(ifd); 578 lseek(ofd, (off_t)0, SEEK_SET); 579 return (fdopen(ofd, "r")); 580 } 581 582 char * 583 splice(char *dir, char *path) 584 { 585 char *tail, *buf; 586 size_t dirlen; 587 588 dirlen = strlen(dir); 589 while (dirlen != 0 && dir[dirlen - 1] == '/') 590 dirlen--; 591 if ((tail = strrchr(path, '/')) == NULL) 592 tail = path; 593 else 594 tail++; 595 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 596 return (buf); 597 } 598 599 static void 600 prepare(int i, FILE *fd, size_t filesize, int flags) 601 { 602 struct line *p; 603 int h; 604 size_t sz, j; 605 606 rewind(fd); 607 608 sz = MIN(filesize, SIZE_MAX) / 25; 609 if (sz < 100) 610 sz = 100; 611 612 p = xcalloc(sz + 3, sizeof(*p)); 613 for (j = 0; (h = readhash(fd, flags));) { 614 if (j == sz) { 615 sz = sz * 3 / 2; 616 p = xreallocarray(p, sz + 3, sizeof(*p)); 617 } 618 p[++j].value = h; 619 } 620 len[i] = j; 621 file[i] = p; 622 } 623 624 static void 625 prune(void) 626 { 627 int i, j; 628 629 for (pref = 0; pref < len[0] && pref < len[1] && 630 file[0][pref + 1].value == file[1][pref + 1].value; 631 pref++) 632 ; 633 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 634 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 635 suff++) 636 ; 637 for (j = 0; j < 2; j++) { 638 sfile[j] = file[j] + pref; 639 slen[j] = len[j] - pref - suff; 640 for (i = 0; i <= slen[j]; i++) 641 sfile[j][i].serial = i; 642 } 643 } 644 645 static void 646 equiv(struct line *a, int n, struct line *b, int m, int *c) 647 { 648 int i, j; 649 650 i = j = 1; 651 while (i <= n && j <= m) { 652 if (a[i].value < b[j].value) 653 a[i++].value = 0; 654 else if (a[i].value == b[j].value) 655 a[i++].value = j; 656 else 657 j++; 658 } 659 while (i <= n) 660 a[i++].value = 0; 661 b[m + 1].value = 0; 662 j = 0; 663 while (++j <= m) { 664 c[j] = -b[j].serial; 665 while (b[j + 1].value == b[j].value) { 666 j++; 667 c[j] = b[j].serial; 668 } 669 } 670 c[j] = -1; 671 } 672 673 /* Code taken from ping.c */ 674 static int 675 isqrt(int n) 676 { 677 int y, x = 1; 678 679 if (n == 0) 680 return (0); 681 682 do { /* newton was a stinker */ 683 y = x; 684 x = n / x; 685 x += y; 686 x /= 2; 687 } while ((x - y) > 1 || (x - y) < -1); 688 689 return (x); 690 } 691 692 static int 693 stone(int *a, int n, int *b, int *c, int flags) 694 { 695 int i, k, y, j, l; 696 int oldc, tc, oldl, sq; 697 u_int numtries, bound; 698 699 if (flags & D_MINIMAL) 700 bound = UINT_MAX; 701 else { 702 sq = isqrt(n); 703 bound = MAX(256, sq); 704 } 705 706 k = 0; 707 c[0] = newcand(0, 0, 0); 708 for (i = 1; i <= n; i++) { 709 j = a[i]; 710 if (j == 0) 711 continue; 712 y = -b[j]; 713 oldl = 0; 714 oldc = c[0]; 715 numtries = 0; 716 do { 717 if (y <= clist[oldc].y) 718 continue; 719 l = search(c, k, y); 720 if (l != oldl + 1) 721 oldc = c[l - 1]; 722 if (l <= k) { 723 if (clist[c[l]].y <= y) 724 continue; 725 tc = c[l]; 726 c[l] = newcand(i, y, oldc); 727 oldc = tc; 728 oldl = l; 729 numtries++; 730 } else { 731 c[l] = newcand(i, y, oldc); 732 k++; 733 break; 734 } 735 } while ((y = b[++j]) > 0 && numtries < bound); 736 } 737 return (k); 738 } 739 740 static int 741 newcand(int x, int y, int pred) 742 { 743 struct cand *q; 744 745 if (clen == clistlen) { 746 clistlen = clistlen * 11 / 10; 747 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 748 } 749 q = clist + clen; 750 q->x = x; 751 q->y = y; 752 q->pred = pred; 753 return (clen++); 754 } 755 756 static int 757 search(int *c, int k, int y) 758 { 759 int i, j, l, t; 760 761 if (clist[c[k]].y < y) /* quick look for typical case */ 762 return (k + 1); 763 i = 0; 764 j = k + 1; 765 for (;;) { 766 l = (i + j) / 2; 767 if (l <= i) 768 break; 769 t = clist[c[l]].y; 770 if (t > y) 771 j = l; 772 else if (t < y) 773 i = l; 774 else 775 return (l); 776 } 777 return (l + 1); 778 } 779 780 static void 781 unravel(int p) 782 { 783 struct cand *q; 784 int i; 785 786 for (i = 0; i <= len[0]; i++) 787 J[i] = i <= pref ? i : 788 i > len[0] - suff ? i + len[1] - len[0] : 0; 789 for (q = clist + p; q->y != 0; q = clist + q->pred) 790 J[q->x + pref] = q->y + pref; 791 } 792 793 /* 794 * Check does double duty: 795 * 1. ferret out any fortuitous correspondences due 796 * to confounding by hashing (which result in "jackpot") 797 * 2. collect random access indexes to the two files 798 */ 799 static void 800 check(FILE *f1, FILE *f2, int flags) 801 { 802 int i, j, jackpot, c, d; 803 long ctold, ctnew; 804 805 rewind(f1); 806 rewind(f2); 807 j = 1; 808 ixold[0] = ixnew[0] = 0; 809 jackpot = 0; 810 ctold = ctnew = 0; 811 for (i = 1; i <= len[0]; i++) { 812 if (J[i] == 0) { 813 ixold[i] = ctold += skipline(f1); 814 continue; 815 } 816 while (j < J[i]) { 817 ixnew[j] = ctnew += skipline(f2); 818 j++; 819 } 820 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 821 for (;;) { 822 c = getc(f1); 823 d = getc(f2); 824 /* 825 * GNU diff ignores a missing newline 826 * in one file for -b or -w. 827 */ 828 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 829 if (c == EOF && d == '\n') { 830 ctnew++; 831 break; 832 } else if (c == '\n' && d == EOF) { 833 ctold++; 834 break; 835 } 836 } 837 ctold++; 838 ctnew++; 839 if (flags & D_STRIPCR) { 840 if (c == '\r') { 841 if ((c = getc(f1)) == '\n') { 842 ctnew++; 843 break; 844 } 845 } 846 if (d == '\r') { 847 if ((d = getc(f2)) == '\n') { 848 ctold++; 849 break; 850 } 851 } 852 } 853 if ((flags & D_FOLDBLANKS) && isspace(c) && 854 isspace(d)) { 855 do { 856 if (c == '\n') 857 break; 858 ctold++; 859 } while (isspace(c = getc(f1))); 860 do { 861 if (d == '\n') 862 break; 863 ctnew++; 864 } while (isspace(d = getc(f2))); 865 } else if ((flags & D_IGNOREBLANKS)) { 866 while (isspace(c) && c != '\n') { 867 c = getc(f1); 868 ctold++; 869 } 870 while (isspace(d) && d != '\n') { 871 d = getc(f2); 872 ctnew++; 873 } 874 } 875 if (chrtran[c] != chrtran[d]) { 876 jackpot++; 877 J[i] = 0; 878 if (c != '\n' && c != EOF) 879 ctold += skipline(f1); 880 if (d != '\n' && c != EOF) 881 ctnew += skipline(f2); 882 break; 883 } 884 if (c == '\n' || c == EOF) 885 break; 886 } 887 } else { 888 for (;;) { 889 ctold++; 890 ctnew++; 891 if ((c = getc(f1)) != (d = getc(f2))) { 892 /* jackpot++; */ 893 J[i] = 0; 894 if (c != '\n' && c != EOF) 895 ctold += skipline(f1); 896 if (d != '\n' && c != EOF) 897 ctnew += skipline(f2); 898 break; 899 } 900 if (c == '\n' || c == EOF) 901 break; 902 } 903 } 904 ixold[i] = ctold; 905 ixnew[j] = ctnew; 906 j++; 907 } 908 for (; j <= len[1]; j++) { 909 ixnew[j] = ctnew += skipline(f2); 910 } 911 /* 912 * if (jackpot) 913 * fprintf(stderr, "jackpot\n"); 914 */ 915 } 916 917 /* shellsort CACM #201 */ 918 static void 919 sort(struct line *a, int n) 920 { 921 struct line *ai, *aim, w; 922 int j, m = 0, k; 923 924 if (n == 0) 925 return; 926 for (j = 1; j <= n; j *= 2) 927 m = 2 * j - 1; 928 for (m /= 2; m != 0; m /= 2) { 929 k = n - m; 930 for (j = 1; j <= k; j++) { 931 for (ai = &a[j]; ai > a; ai -= m) { 932 aim = &ai[m]; 933 if (aim < ai) 934 break; /* wraparound */ 935 if (aim->value > ai[0].value || 936 (aim->value == ai[0].value && 937 aim->serial > ai[0].serial)) 938 break; 939 w.value = ai[0].value; 940 ai[0].value = aim->value; 941 aim->value = w.value; 942 w.serial = ai[0].serial; 943 ai[0].serial = aim->serial; 944 aim->serial = w.serial; 945 } 946 } 947 } 948 } 949 950 static void 951 unsort(struct line *f, int l, int *b) 952 { 953 int *a, i; 954 955 a = xcalloc(l + 1, sizeof(*a)); 956 for (i = 1; i <= l; i++) 957 a[f[i].serial] = f[i].value; 958 for (i = 1; i <= l; i++) 959 b[i] = a[i]; 960 free(a); 961 } 962 963 static int 964 skipline(FILE *f) 965 { 966 int i, c; 967 968 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 969 continue; 970 return (i); 971 } 972 973 static void 974 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 975 { 976 int m, i0, i1, j0, j1; 977 978 rewind(f1); 979 rewind(f2); 980 m = len[0]; 981 J[0] = 0; 982 J[m + 1] = len[1] + 1; 983 if (diff_format != D_EDIT) { 984 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 985 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 986 i0++; 987 j0 = J[i0 - 1] + 1; 988 i1 = i0 - 1; 989 while (i1 < m && J[i1 + 1] == 0) 990 i1++; 991 j1 = J[i1 + 1] - 1; 992 J[i1] = j1; 993 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 994 } 995 } else { 996 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 997 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 998 i0--; 999 j0 = J[i0 + 1] - 1; 1000 i1 = i0 + 1; 1001 while (i1 > 1 && J[i1 - 1] == 0) 1002 i1--; 1003 j1 = J[i1 - 1] + 1; 1004 J[i1] = j1; 1005 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1006 } 1007 } 1008 if (m == 0) 1009 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1010 if (diff_format == D_IFDEF) { 1011 for (;;) { 1012 #define c i0 1013 if ((c = getc(f1)) == EOF) 1014 return; 1015 diff_output("%c", c); 1016 } 1017 #undef c 1018 } 1019 if (anychange != 0) { 1020 if (diff_format == D_CONTEXT) 1021 dump_context_vec(f1, f2, flags); 1022 else if (diff_format == D_UNIFIED) 1023 dump_unified_vec(f1, f2, flags); 1024 } 1025 } 1026 1027 static void 1028 range(int a, int b, const char *separator) 1029 { 1030 diff_output("%d", a > b ? b : a); 1031 if (a < b) 1032 diff_output("%s%d", separator, b); 1033 } 1034 1035 static void 1036 uni_range(int a, int b) 1037 { 1038 if (a < b) 1039 diff_output("%d,%d", a, b - a + 1); 1040 else if (a == b) 1041 diff_output("%d", b); 1042 else 1043 diff_output("%d,0", b); 1044 } 1045 1046 static char * 1047 preadline(int fd, size_t rlen, off_t off) 1048 { 1049 char *line; 1050 ssize_t nr; 1051 1052 line = xmalloc(rlen + 1); 1053 if ((nr = pread(fd, line, rlen, off)) < 0) 1054 err(2, "preadline"); 1055 if (nr > 0 && line[nr-1] == '\n') 1056 nr--; 1057 line[nr] = '\0'; 1058 return (line); 1059 } 1060 1061 static int 1062 ignoreline(char *line) 1063 { 1064 int ret; 1065 1066 ret = regexec(&ignore_re, line, 0, NULL, 0); 1067 free(line); 1068 return (ret == 0); /* if it matched, it should be ignored. */ 1069 } 1070 1071 /* 1072 * Indicate that there is a difference between lines a and b of the from file 1073 * to get to lines c to d of the to file. If a is greater then b then there 1074 * are no lines in the from file involved and this means that there were 1075 * lines appended (beginning at b). If c is greater than d then there are 1076 * lines missing from the to file. 1077 */ 1078 static void 1079 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1080 int *pflags) 1081 { 1082 static size_t max_context = 64; 1083 int i; 1084 1085 restart: 1086 if (diff_format != D_IFDEF && a > b && c > d) 1087 return; 1088 if (ignore_pats != NULL) { 1089 char *line; 1090 /* 1091 * All lines in the change, insert, or delete must 1092 * match an ignore pattern for the change to be 1093 * ignored. 1094 */ 1095 if (a <= b) { /* Changes and deletes. */ 1096 for (i = a; i <= b; i++) { 1097 line = preadline(fileno(f1), 1098 ixold[i] - ixold[i - 1], ixold[i - 1]); 1099 if (!ignoreline(line)) 1100 goto proceed; 1101 } 1102 } 1103 if (a > b || c <= d) { /* Changes and inserts. */ 1104 for (i = c; i <= d; i++) { 1105 line = preadline(fileno(f2), 1106 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1107 if (!ignoreline(line)) 1108 goto proceed; 1109 } 1110 } 1111 return; 1112 } 1113 proceed: 1114 if (*pflags & D_HEADER) { 1115 diff_output("%s %s %s\n", diffargs, file1, file2); 1116 *pflags &= ~D_HEADER; 1117 } 1118 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1119 /* 1120 * Allocate change records as needed. 1121 */ 1122 if (context_vec_ptr == context_vec_end - 1) { 1123 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1124 max_context <<= 1; 1125 context_vec_start = xreallocarray(context_vec_start, 1126 max_context, sizeof(*context_vec_start)); 1127 context_vec_end = context_vec_start + max_context; 1128 context_vec_ptr = context_vec_start + offset; 1129 } 1130 if (anychange == 0) { 1131 /* 1132 * Print the context/unidiff header first time through. 1133 */ 1134 print_header(file1, file2); 1135 anychange = 1; 1136 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1137 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1138 /* 1139 * If this change is more than 'diff_context' lines from the 1140 * previous change, dump the record and reset it. 1141 */ 1142 if (diff_format == D_CONTEXT) 1143 dump_context_vec(f1, f2, *pflags); 1144 else 1145 dump_unified_vec(f1, f2, *pflags); 1146 } 1147 context_vec_ptr++; 1148 context_vec_ptr->a = a; 1149 context_vec_ptr->b = b; 1150 context_vec_ptr->c = c; 1151 context_vec_ptr->d = d; 1152 return; 1153 } 1154 if (anychange == 0) 1155 anychange = 1; 1156 switch (diff_format) { 1157 case D_BRIEF: 1158 return; 1159 case D_NORMAL: 1160 case D_EDIT: 1161 range(a, b, ","); 1162 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1163 if (diff_format == D_NORMAL) 1164 range(c, d, ","); 1165 diff_output("\n"); 1166 break; 1167 case D_REVERSE: 1168 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1169 range(a, b, " "); 1170 diff_output("\n"); 1171 break; 1172 case D_NREVERSE: 1173 if (a > b) 1174 diff_output("a%d %d\n", b, d - c + 1); 1175 else { 1176 diff_output("d%d %d\n", a, b - a + 1); 1177 if (!(c > d)) 1178 /* add changed lines */ 1179 diff_output("a%d %d\n", b, d - c + 1); 1180 } 1181 break; 1182 } 1183 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1184 fetch(ixold, a, b, f1, '<', 1, *pflags); 1185 if (a <= b && c <= d && diff_format == D_NORMAL) 1186 diff_output("---\n"); 1187 } 1188 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1189 if (i != 0 && diff_format == D_EDIT) { 1190 /* 1191 * A non-zero return value for D_EDIT indicates that the 1192 * last line printed was a bare dot (".") that has been 1193 * escaped as ".." to prevent ed(1) from misinterpreting 1194 * it. We have to add a substitute command to change this 1195 * back and restart where we left off. 1196 */ 1197 diff_output(".\n"); 1198 diff_output("%ds/.//\n", a + i - 1); 1199 b = a + i - 1; 1200 a = b + 1; 1201 c += i; 1202 goto restart; 1203 } 1204 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1205 diff_output(".\n"); 1206 if (inifdef) { 1207 diff_output("#endif /* %s */\n", ifdefname); 1208 inifdef = 0; 1209 } 1210 } 1211 1212 static int 1213 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1214 { 1215 int i, j, c, lastc, col, nc; 1216 int newcol; 1217 1218 /* 1219 * When doing #ifdef's, copy down to current line 1220 * if this is the first file, so that stuff makes it to output. 1221 */ 1222 if (diff_format == D_IFDEF && oldfile) { 1223 long curpos = ftell(lb); 1224 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1225 nc = f[a > b ? b : a - 1] - curpos; 1226 for (i = 0; i < nc; i++) 1227 diff_output("%c", getc(lb)); 1228 } 1229 if (a > b) 1230 return (0); 1231 if (diff_format == D_IFDEF) { 1232 if (inifdef) { 1233 diff_output("#else /* %s%s */\n", 1234 oldfile == 1 ? "!" : "", ifdefname); 1235 } else { 1236 if (oldfile) 1237 diff_output("#ifndef %s\n", ifdefname); 1238 else 1239 diff_output("#ifdef %s\n", ifdefname); 1240 } 1241 inifdef = 1 + oldfile; 1242 } 1243 for (i = a; i <= b; i++) { 1244 fseek(lb, f[i - 1], SEEK_SET); 1245 nc = f[i] - f[i - 1]; 1246 if (diff_format != D_IFDEF && ch != '\0') { 1247 diff_output("%c", ch); 1248 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1249 || diff_format == D_UNIFIED)) 1250 diff_output("\t"); 1251 else if (diff_format != D_UNIFIED) 1252 diff_output(" "); 1253 } 1254 col = 0; 1255 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1256 if ((c = getc(lb)) == EOF) { 1257 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1258 diff_format == D_NREVERSE) 1259 warnx("No newline at end of file"); 1260 else 1261 diff_output("\n\\ No newline at end of " 1262 "file\n"); 1263 return (0); 1264 } 1265 if (c == '\t' && (flags & D_EXPANDTABS)) { 1266 newcol = ((col/tabsize)+1)*tabsize; 1267 do { 1268 diff_output(" "); 1269 } while (++col < newcol); 1270 } else { 1271 if (diff_format == D_EDIT && j == 1 && c == '\n' 1272 && lastc == '.') { 1273 /* 1274 * Don't print a bare "." line 1275 * since that will confuse ed(1). 1276 * Print ".." instead and return, 1277 * giving the caller an offset 1278 * from which to restart. 1279 */ 1280 diff_output(".\n"); 1281 return (i - a + 1); 1282 } 1283 diff_output("%c", c); 1284 col++; 1285 } 1286 } 1287 } 1288 return (0); 1289 } 1290 1291 /* 1292 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1293 */ 1294 static int 1295 readhash(FILE *f, int flags) 1296 { 1297 int i, t, space; 1298 int sum; 1299 1300 sum = 1; 1301 space = 0; 1302 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1303 if (flags & D_IGNORECASE) 1304 for (i = 0; (t = getc(f)) != '\n'; i++) { 1305 if (flags & D_STRIPCR && t == '\r') { 1306 t = getc(f); 1307 if (t == '\n') 1308 break; 1309 ungetc(t, f); 1310 } 1311 if (t == EOF) { 1312 if (i == 0) 1313 return (0); 1314 break; 1315 } 1316 sum = sum * 127 + chrtran[t]; 1317 } 1318 else 1319 for (i = 0; (t = getc(f)) != '\n'; i++) { 1320 if (flags & D_STRIPCR && t == '\r') { 1321 t = getc(f); 1322 if (t == '\n') 1323 break; 1324 ungetc(t, f); 1325 } 1326 if (t == EOF) { 1327 if (i == 0) 1328 return (0); 1329 break; 1330 } 1331 sum = sum * 127 + t; 1332 } 1333 } else { 1334 for (i = 0;;) { 1335 switch (t = getc(f)) { 1336 case '\r': 1337 case '\t': 1338 case '\v': 1339 case '\f': 1340 case ' ': 1341 space++; 1342 continue; 1343 default: 1344 if (space && (flags & D_IGNOREBLANKS) == 0) { 1345 i++; 1346 space = 0; 1347 } 1348 sum = sum * 127 + chrtran[t]; 1349 i++; 1350 continue; 1351 case EOF: 1352 if (i == 0) 1353 return (0); 1354 /* FALLTHROUGH */ 1355 case '\n': 1356 break; 1357 } 1358 break; 1359 } 1360 } 1361 /* 1362 * There is a remote possibility that we end up with a zero sum. 1363 * Zero is used as an EOF marker, so return 1 instead. 1364 */ 1365 return (sum == 0 ? 1 : sum); 1366 } 1367 1368 static int 1369 asciifile(FILE *f) 1370 { 1371 unsigned char buf[BUFSIZ]; 1372 size_t cnt; 1373 1374 if (f == NULL) 1375 return (1); 1376 1377 rewind(f); 1378 cnt = fread(buf, 1, sizeof(buf), f); 1379 return (memchr(buf, '\0', cnt) == NULL); 1380 } 1381 1382 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1383 1384 static char * 1385 match_function(const long *f, int pos, FILE *fp) 1386 { 1387 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1388 size_t nc; 1389 int last = lastline; 1390 const char *state = NULL; 1391 1392 lastline = pos; 1393 while (pos > last) { 1394 fseek(fp, f[pos - 1], SEEK_SET); 1395 nc = f[pos] - f[pos - 1]; 1396 if (nc >= sizeof(buf)) 1397 nc = sizeof(buf) - 1; 1398 nc = fread(buf, 1, nc, fp); 1399 if (nc > 0) { 1400 buf[nc] = '\0'; 1401 buf[strcspn(buf, "\n")] = '\0'; 1402 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1403 if (begins_with(buf, "private:")) { 1404 if (!state) 1405 state = " (private)"; 1406 } else if (begins_with(buf, "protected:")) { 1407 if (!state) 1408 state = " (protected)"; 1409 } else if (begins_with(buf, "public:")) { 1410 if (!state) 1411 state = " (public)"; 1412 } else { 1413 strlcpy(lastbuf, buf, sizeof lastbuf); 1414 if (state) 1415 strlcat(lastbuf, state, 1416 sizeof lastbuf); 1417 lastmatchline = pos; 1418 return lastbuf; 1419 } 1420 } 1421 } 1422 pos--; 1423 } 1424 return lastmatchline > 0 ? lastbuf : NULL; 1425 } 1426 1427 /* dump accumulated "context" diff changes */ 1428 static void 1429 dump_context_vec(FILE *f1, FILE *f2, int flags) 1430 { 1431 struct context_vec *cvp = context_vec_start; 1432 int lowa, upb, lowc, upd, do_output; 1433 int a, b, c, d; 1434 char ch, *f; 1435 1436 if (context_vec_start > context_vec_ptr) 1437 return; 1438 1439 b = d = 0; /* gcc */ 1440 lowa = MAX(1, cvp->a - diff_context); 1441 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1442 lowc = MAX(1, cvp->c - diff_context); 1443 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1444 1445 diff_output("***************"); 1446 if ((flags & D_PROTOTYPE)) { 1447 f = match_function(ixold, lowa-1, f1); 1448 if (f != NULL) 1449 diff_output(" %s", f); 1450 } 1451 diff_output("\n*** "); 1452 range(lowa, upb, ","); 1453 diff_output(" ****\n"); 1454 1455 /* 1456 * Output changes to the "old" file. The first loop suppresses 1457 * output if there were no changes to the "old" file (we'll see 1458 * the "old" lines as context in the "new" list). 1459 */ 1460 do_output = 0; 1461 for (; cvp <= context_vec_ptr; cvp++) 1462 if (cvp->a <= cvp->b) { 1463 cvp = context_vec_start; 1464 do_output++; 1465 break; 1466 } 1467 if (do_output) { 1468 while (cvp <= context_vec_ptr) { 1469 a = cvp->a; 1470 b = cvp->b; 1471 c = cvp->c; 1472 d = cvp->d; 1473 1474 if (a <= b && c <= d) 1475 ch = 'c'; 1476 else 1477 ch = (a <= b) ? 'd' : 'a'; 1478 1479 if (ch == 'a') 1480 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1481 else { 1482 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1483 fetch(ixold, a, b, f1, 1484 ch == 'c' ? '!' : '-', 0, flags); 1485 } 1486 lowa = b + 1; 1487 cvp++; 1488 } 1489 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1490 } 1491 /* output changes to the "new" file */ 1492 diff_output("--- "); 1493 range(lowc, upd, ","); 1494 diff_output(" ----\n"); 1495 1496 do_output = 0; 1497 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1498 if (cvp->c <= cvp->d) { 1499 cvp = context_vec_start; 1500 do_output++; 1501 break; 1502 } 1503 if (do_output) { 1504 while (cvp <= context_vec_ptr) { 1505 a = cvp->a; 1506 b = cvp->b; 1507 c = cvp->c; 1508 d = cvp->d; 1509 1510 if (a <= b && c <= d) 1511 ch = 'c'; 1512 else 1513 ch = (a <= b) ? 'd' : 'a'; 1514 1515 if (ch == 'd') 1516 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1517 else { 1518 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1519 fetch(ixnew, c, d, f2, 1520 ch == 'c' ? '!' : '+', 0, flags); 1521 } 1522 lowc = d + 1; 1523 cvp++; 1524 } 1525 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1526 } 1527 context_vec_ptr = context_vec_start - 1; 1528 } 1529 1530 /* dump accumulated "unified" diff changes */ 1531 static void 1532 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1533 { 1534 struct context_vec *cvp = context_vec_start; 1535 int lowa, upb, lowc, upd; 1536 int a, b, c, d; 1537 char ch, *f; 1538 1539 if (context_vec_start > context_vec_ptr) 1540 return; 1541 1542 b = d = 0; /* gcc */ 1543 lowa = MAX(1, cvp->a - diff_context); 1544 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1545 lowc = MAX(1, cvp->c - diff_context); 1546 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1547 1548 diff_output("@@ -"); 1549 uni_range(lowa, upb); 1550 diff_output(" +"); 1551 uni_range(lowc, upd); 1552 diff_output(" @@"); 1553 if ((flags & D_PROTOTYPE)) { 1554 f = match_function(ixold, lowa-1, f1); 1555 if (f != NULL) 1556 diff_output(" %s", f); 1557 } 1558 diff_output("\n"); 1559 1560 /* 1561 * Output changes in "unified" diff format--the old and new lines 1562 * are printed together. 1563 */ 1564 for (; cvp <= context_vec_ptr; cvp++) { 1565 a = cvp->a; 1566 b = cvp->b; 1567 c = cvp->c; 1568 d = cvp->d; 1569 1570 /* 1571 * c: both new and old changes 1572 * d: only changes in the old file 1573 * a: only changes in the new file 1574 */ 1575 if (a <= b && c <= d) 1576 ch = 'c'; 1577 else 1578 ch = (a <= b) ? 'd' : 'a'; 1579 1580 switch (ch) { 1581 case 'c': 1582 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1583 fetch(ixold, a, b, f1, '-', 0, flags); 1584 fetch(ixnew, c, d, f2, '+', 0, flags); 1585 break; 1586 case 'd': 1587 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1588 fetch(ixold, a, b, f1, '-', 0, flags); 1589 break; 1590 case 'a': 1591 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1592 fetch(ixnew, c, d, f2, '+', 0, flags); 1593 break; 1594 } 1595 lowa = b + 1; 1596 lowc = d + 1; 1597 } 1598 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1599 1600 context_vec_ptr = context_vec_start - 1; 1601 } 1602 1603 static void 1604 print_header(const char *file1, const char *file2) 1605 { 1606 const char *time_format; 1607 char buf1[256]; 1608 char buf2[256]; 1609 char end1[10]; 1610 char end2[10]; 1611 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; 1612 int nsec1 = stb1.st_mtim.tv_nsec; 1613 int nsec2 = stb2.st_mtim.tv_nsec; 1614 1615 time_format = "%Y-%m-%d %H:%M:%S"; 1616 1617 if (cflag) 1618 time_format = "%c"; 1619 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); 1620 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); 1621 strftime(buf1, 256, time_format, tm_ptr1); 1622 strftime(buf2, 256, time_format, tm_ptr2); 1623 if (!cflag) { 1624 strftime(end1, 10, "%z", tm_ptr1); 1625 strftime(end2, 10, "%z", tm_ptr2); 1626 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1627 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1628 } 1629 if (label[0] != NULL) 1630 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1631 label[0]); 1632 else 1633 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1634 file1, buf1); 1635 if (label[1] != NULL) 1636 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1637 label[1]); 1638 else 1639 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1640 file2, buf2); 1641 } 1642