1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <regex.h> 84 #include <stddef.h> 85 #include <stdint.h> 86 #include <stdio.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 #include <limits.h> 91 #include <signal.h> 92 93 #include "diff.h" 94 #include "xmalloc.h" 95 96 #define _PATH_PR "/usr/bin/pr" 97 98 /* 99 * diff - compare two files. 100 */ 101 102 /* 103 * Uses an algorithm due to Harold Stone, which finds 104 * a pair of longest identical subsequences in the two 105 * files. 106 * 107 * The major goal is to generate the match vector J. 108 * J[i] is the index of the line in file1 corresponding 109 * to line i file0. J[i] = 0 if there is no 110 * such line in file1. 111 * 112 * Lines are hashed so as to work in core. All potential 113 * matches are located by sorting the lines of each file 114 * on the hash (called ``value''). In particular, this 115 * collects the equivalence classes in file1 together. 116 * Subroutine equiv replaces the value of each line in 117 * file0 by the index of the first element of its 118 * matching equivalence in (the reordered) file1. 119 * To save space equiv squeezes file1 into a single 120 * array member in which the equivalence classes 121 * are simply concatenated, except that their first 122 * members are flagged by changing sign. 123 * 124 * Next the indices that point into member are unsorted into 125 * array class according to the original order of file0. 126 * 127 * The cleverness lies in routine stone. This marches 128 * through the lines of file0, developing a vector klist 129 * of "k-candidates". At step i a k-candidate is a matched 130 * pair of lines x,y (x in file0 y in file1) such that 131 * there is a common subsequence of length k 132 * between the first i lines of file0 and the first y 133 * lines of file1, but there is no such subsequence for 134 * any smaller y. x is the earliest possible mate to y 135 * that occurs in such a subsequence. 136 * 137 * Whenever any of the members of the equivalence class of 138 * lines in file1 matable to a line in file0 has serial number 139 * less than the y of some k-candidate, that k-candidate 140 * with the smallest such y is replaced. The new 141 * k-candidate is chained (via pred) to the current 142 * k-1 candidate so that the actual subsequence can 143 * be recovered. When a member has serial number greater 144 * that the y of all k-candidates, the klist is extended. 145 * At the end, the longest subsequence is pulled out 146 * and placed in the array J by unravel 147 * 148 * With J in hand, the matches there recorded are 149 * check'ed against reality to assure that no spurious 150 * matches have crept in due to hashing. If they have, 151 * they are broken, and "jackpot" is recorded--a harmless 152 * matter except that a true match for a spuriously 153 * mated line may now be unnecessarily reported as a change. 154 * 155 * Much of the complexity of the program comes simply 156 * from trying to minimize core utilization and 157 * maximize the range of doable problems by dynamically 158 * allocating what is needed and reusing what is not. 159 * The core requirements for problems larger than somewhat 160 * are (in words) 2*length(file0) + length(file1) + 161 * 3*(number of k-candidates installed), typically about 162 * 6n words for files of length n. 163 */ 164 165 struct cand { 166 int x; 167 int y; 168 int pred; 169 }; 170 171 static struct line { 172 int serial; 173 int value; 174 } *file[2]; 175 176 /* 177 * The following struct is used to record change information when 178 * doing a "context" or "unified" diff. (see routine "change" to 179 * understand the highly mnemonic field names) 180 */ 181 struct context_vec { 182 int a; /* start line in old file */ 183 int b; /* end line in old file */ 184 int c; /* start line in new file */ 185 int d; /* end line in new file */ 186 }; 187 188 #define diff_output printf 189 static FILE *opentemp(const char *); 190 static void output(char *, FILE *, char *, FILE *, int); 191 static void check(FILE *, FILE *, int); 192 static void range(int, int, const char *); 193 static void uni_range(int, int); 194 static void dump_context_vec(FILE *, FILE *, int); 195 static void dump_unified_vec(FILE *, FILE *, int); 196 static void prepare(int, FILE *, size_t, int); 197 static void prune(void); 198 static void equiv(struct line *, int, struct line *, int, int *); 199 static void unravel(int); 200 static void unsort(struct line *, int, int *); 201 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 202 static void sort(struct line *, int); 203 static void print_header(const char *, const char *); 204 static int ignoreline(char *); 205 static int asciifile(FILE *); 206 static int fetch(long *, int, int, FILE *, int, int, int); 207 static int newcand(int, int, int); 208 static int search(int *, int, int); 209 static int skipline(FILE *); 210 static int isqrt(int); 211 static int stone(int *, int, int *, int *, int); 212 static int readhash(FILE *, int); 213 static int files_differ(FILE *, FILE *, int); 214 static char *match_function(const long *, int, FILE *); 215 static char *preadline(int, size_t, off_t); 216 217 static int *J; /* will be overlaid on class */ 218 static int *class; /* will be overlaid on file[0] */ 219 static int *klist; /* will be overlaid on file[0] after class */ 220 static int *member; /* will be overlaid on file[1] */ 221 static int clen; 222 static int inifdef; /* whether or not we are in a #ifdef block */ 223 static int len[2]; 224 static int pref, suff; /* length of prefix and suffix */ 225 static int slen[2]; 226 static int anychange; 227 static long *ixnew; /* will be overlaid on file[1] */ 228 static long *ixold; /* will be overlaid on klist */ 229 static struct cand *clist; /* merely a free storage pot for candidates */ 230 static int clistlen; /* the length of clist */ 231 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 232 static u_char *chrtran; /* translation table for case-folding */ 233 static struct context_vec *context_vec_start; 234 static struct context_vec *context_vec_end; 235 static struct context_vec *context_vec_ptr; 236 237 #define FUNCTION_CONTEXT_SIZE 55 238 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 239 static int lastline; 240 static int lastmatchline; 241 242 243 /* 244 * chrtran points to one of 2 translation tables: cup2low if folding upper to 245 * lower case clow2low if not folding case 246 */ 247 static u_char clow2low[256] = { 248 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 249 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 250 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 251 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 252 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 253 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 254 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 255 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 256 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 257 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 258 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 259 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 260 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 261 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 262 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 263 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 264 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 265 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 266 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 267 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 268 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 269 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 270 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 271 0xfd, 0xfe, 0xff 272 }; 273 274 static u_char cup2low[256] = { 275 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 276 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 277 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 278 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 279 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 280 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 281 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 282 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 283 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 284 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 285 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 286 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 287 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 288 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 289 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 290 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 291 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 292 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 293 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 294 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 295 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 296 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 297 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 298 0xfd, 0xfe, 0xff 299 }; 300 301 int 302 diffreg(char *file1, char *file2, int flags, int capsicum) 303 { 304 FILE *f1, *f2; 305 int i, rval; 306 int ostdout = -1; 307 int pr_pd, kq; 308 struct kevent *e; 309 cap_rights_t rights_ro; 310 311 e = NULL; 312 kq = -1; 313 f1 = f2 = NULL; 314 rval = D_SAME; 315 anychange = 0; 316 lastline = 0; 317 lastmatchline = 0; 318 context_vec_ptr = context_vec_start - 1; 319 if (flags & D_IGNORECASE) 320 chrtran = cup2low; 321 else 322 chrtran = clow2low; 323 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 324 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 325 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 326 goto closem; 327 328 if (flags & D_EMPTY1) 329 f1 = fopen(_PATH_DEVNULL, "r"); 330 else { 331 if (!S_ISREG(stb1.st_mode)) { 332 if ((f1 = opentemp(file1)) == NULL || 333 fstat(fileno(f1), &stb1) < 0) { 334 warn("%s", file1); 335 status |= 2; 336 goto closem; 337 } 338 } else if (strcmp(file1, "-") == 0) 339 f1 = stdin; 340 else 341 f1 = fopen(file1, "r"); 342 } 343 if (f1 == NULL) { 344 warn("%s", file1); 345 status |= 2; 346 goto closem; 347 } 348 349 if (flags & D_EMPTY2) 350 f2 = fopen(_PATH_DEVNULL, "r"); 351 else { 352 if (!S_ISREG(stb2.st_mode)) { 353 if ((f2 = opentemp(file2)) == NULL || 354 fstat(fileno(f2), &stb2) < 0) { 355 warn("%s", file2); 356 status |= 2; 357 goto closem; 358 } 359 } else if (strcmp(file2, "-") == 0) 360 f2 = stdin; 361 else 362 f2 = fopen(file2, "r"); 363 } 364 if (f2 == NULL) { 365 warn("%s", file2); 366 status |= 2; 367 goto closem; 368 } 369 370 if (lflag) { 371 /* redirect stdout to pr */ 372 int pfd[2]; 373 pid_t pid; 374 char *header; 375 376 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 377 signal(SIGPIPE, SIG_IGN); 378 fflush(stdout); 379 rewind(stdout); 380 pipe(pfd); 381 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 382 case -1: 383 status |= 2; 384 free(header); 385 err(2, "No more processes"); 386 case 0: 387 /* child */ 388 if (pfd[0] != STDIN_FILENO) { 389 dup2(pfd[0], STDIN_FILENO); 390 close(pfd[0]); 391 } 392 close(pfd[1]); 393 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 394 _exit(127); 395 default: 396 397 /* parent */ 398 if (pfd[1] != STDOUT_FILENO) { 399 ostdout = dup(STDOUT_FILENO); 400 dup2(pfd[1], STDOUT_FILENO); 401 close(pfd[1]); 402 } 403 close(pfd[0]); 404 rewind(stdout); 405 free(header); 406 kq = kqueue(); 407 if (kq == -1) 408 err(2, "kqueue"); 409 e = xmalloc(sizeof(struct kevent)); 410 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 411 NULL); 412 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 413 err(2, "kevent"); 414 } 415 } 416 417 if (capsicum) { 418 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 419 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 420 && errno != ENOSYS) 421 err(2, "unable to limit rights on: %s", file1); 422 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 423 errno != ENOSYS) 424 err(2, "unable to limit rights on: %s", file2); 425 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 426 /* stding has already been limited */ 427 if (caph_limit_stderr() == -1) 428 err(2, "unable to limit stderr"); 429 if (caph_limit_stdout() == -1) 430 err(2, "unable to limit stdout"); 431 } else if (caph_limit_stdio() == -1) 432 err(2, "unable to limit stdio"); 433 434 caph_cache_catpages(); 435 caph_cache_tzdata(); 436 if (cap_enter() < 0 && errno != ENOSYS) 437 err(2, "unable to enter capability mode"); 438 } 439 440 switch (files_differ(f1, f2, flags)) { 441 case 0: 442 goto closem; 443 case 1: 444 break; 445 default: 446 /* error */ 447 status |= 2; 448 goto closem; 449 } 450 451 if ((flags & D_FORCEASCII) == 0 && 452 (!asciifile(f1) || !asciifile(f2))) { 453 rval = D_BINARY; 454 status |= 1; 455 goto closem; 456 } 457 prepare(0, f1, stb1.st_size, flags); 458 prepare(1, f2, stb2.st_size, flags); 459 460 prune(); 461 sort(sfile[0], slen[0]); 462 sort(sfile[1], slen[1]); 463 464 member = (int *)file[1]; 465 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 466 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 467 468 class = (int *)file[0]; 469 unsort(sfile[0], slen[0], class); 470 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 471 472 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 473 clen = 0; 474 clistlen = 100; 475 clist = xcalloc(clistlen, sizeof(*clist)); 476 i = stone(class, slen[0], member, klist, flags); 477 free(member); 478 free(class); 479 480 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 481 unravel(klist[i]); 482 free(clist); 483 free(klist); 484 485 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 486 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 487 check(f1, f2, flags); 488 output(file1, f1, file2, f2, flags); 489 if (ostdout != -1 && e != NULL) { 490 /* close the pipe to pr and restore stdout */ 491 int wstatus; 492 493 fflush(stdout); 494 if (ostdout != STDOUT_FILENO) { 495 close(STDOUT_FILENO); 496 dup2(ostdout, STDOUT_FILENO); 497 close(ostdout); 498 } 499 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 500 err(2, "kevent"); 501 wstatus = e[0].data; 502 close(kq); 503 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 504 errx(2, "pr exited abnormally"); 505 else if (WIFSIGNALED(wstatus)) 506 errx(2, "pr killed by signal %d", 507 WTERMSIG(wstatus)); 508 } 509 510 closem: 511 if (anychange) { 512 status |= 1; 513 if (rval == D_SAME) 514 rval = D_DIFFER; 515 } 516 if (f1 != NULL) 517 fclose(f1); 518 if (f2 != NULL) 519 fclose(f2); 520 521 return (rval); 522 } 523 524 /* 525 * Check to see if the given files differ. 526 * Returns 0 if they are the same, 1 if different, and -1 on error. 527 * XXX - could use code from cmp(1) [faster] 528 */ 529 static int 530 files_differ(FILE *f1, FILE *f2, int flags) 531 { 532 char buf1[BUFSIZ], buf2[BUFSIZ]; 533 size_t i, j; 534 535 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 536 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 537 return (1); 538 for (;;) { 539 i = fread(buf1, 1, sizeof(buf1), f1); 540 j = fread(buf2, 1, sizeof(buf2), f2); 541 if ((!i && ferror(f1)) || (!j && ferror(f2))) 542 return (-1); 543 if (i != j) 544 return (1); 545 if (i == 0) 546 return (0); 547 if (memcmp(buf1, buf2, i) != 0) 548 return (1); 549 } 550 } 551 552 static FILE * 553 opentemp(const char *f) 554 { 555 char buf[BUFSIZ], tempfile[PATH_MAX]; 556 ssize_t nread; 557 int ifd, ofd; 558 559 if (strcmp(f, "-") == 0) 560 ifd = STDIN_FILENO; 561 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 562 return (NULL); 563 564 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 565 566 if ((ofd = mkstemp(tempfile)) < 0) { 567 close(ifd); 568 return (NULL); 569 } 570 unlink(tempfile); 571 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 572 if (write(ofd, buf, nread) != nread) { 573 close(ifd); 574 close(ofd); 575 return (NULL); 576 } 577 } 578 close(ifd); 579 lseek(ofd, (off_t)0, SEEK_SET); 580 return (fdopen(ofd, "r")); 581 } 582 583 char * 584 splice(char *dir, char *path) 585 { 586 char *tail, *buf; 587 size_t dirlen; 588 589 dirlen = strlen(dir); 590 while (dirlen != 0 && dir[dirlen - 1] == '/') 591 dirlen--; 592 if ((tail = strrchr(path, '/')) == NULL) 593 tail = path; 594 else 595 tail++; 596 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 597 return (buf); 598 } 599 600 static void 601 prepare(int i, FILE *fd, size_t filesize, int flags) 602 { 603 struct line *p; 604 int h; 605 size_t sz, j; 606 607 rewind(fd); 608 609 sz = MIN(filesize, SIZE_MAX) / 25; 610 if (sz < 100) 611 sz = 100; 612 613 p = xcalloc(sz + 3, sizeof(*p)); 614 for (j = 0; (h = readhash(fd, flags));) { 615 if (j == sz) { 616 sz = sz * 3 / 2; 617 p = xreallocarray(p, sz + 3, sizeof(*p)); 618 } 619 p[++j].value = h; 620 } 621 len[i] = j; 622 file[i] = p; 623 } 624 625 static void 626 prune(void) 627 { 628 int i, j; 629 630 for (pref = 0; pref < len[0] && pref < len[1] && 631 file[0][pref + 1].value == file[1][pref + 1].value; 632 pref++) 633 ; 634 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 635 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 636 suff++) 637 ; 638 for (j = 0; j < 2; j++) { 639 sfile[j] = file[j] + pref; 640 slen[j] = len[j] - pref - suff; 641 for (i = 0; i <= slen[j]; i++) 642 sfile[j][i].serial = i; 643 } 644 } 645 646 static void 647 equiv(struct line *a, int n, struct line *b, int m, int *c) 648 { 649 int i, j; 650 651 i = j = 1; 652 while (i <= n && j <= m) { 653 if (a[i].value < b[j].value) 654 a[i++].value = 0; 655 else if (a[i].value == b[j].value) 656 a[i++].value = j; 657 else 658 j++; 659 } 660 while (i <= n) 661 a[i++].value = 0; 662 b[m + 1].value = 0; 663 j = 0; 664 while (++j <= m) { 665 c[j] = -b[j].serial; 666 while (b[j + 1].value == b[j].value) { 667 j++; 668 c[j] = b[j].serial; 669 } 670 } 671 c[j] = -1; 672 } 673 674 /* Code taken from ping.c */ 675 static int 676 isqrt(int n) 677 { 678 int y, x = 1; 679 680 if (n == 0) 681 return (0); 682 683 do { /* newton was a stinker */ 684 y = x; 685 x = n / x; 686 x += y; 687 x /= 2; 688 } while ((x - y) > 1 || (x - y) < -1); 689 690 return (x); 691 } 692 693 static int 694 stone(int *a, int n, int *b, int *c, int flags) 695 { 696 int i, k, y, j, l; 697 int oldc, tc, oldl, sq; 698 u_int numtries, bound; 699 700 if (flags & D_MINIMAL) 701 bound = UINT_MAX; 702 else { 703 sq = isqrt(n); 704 bound = MAX(256, sq); 705 } 706 707 k = 0; 708 c[0] = newcand(0, 0, 0); 709 for (i = 1; i <= n; i++) { 710 j = a[i]; 711 if (j == 0) 712 continue; 713 y = -b[j]; 714 oldl = 0; 715 oldc = c[0]; 716 numtries = 0; 717 do { 718 if (y <= clist[oldc].y) 719 continue; 720 l = search(c, k, y); 721 if (l != oldl + 1) 722 oldc = c[l - 1]; 723 if (l <= k) { 724 if (clist[c[l]].y <= y) 725 continue; 726 tc = c[l]; 727 c[l] = newcand(i, y, oldc); 728 oldc = tc; 729 oldl = l; 730 numtries++; 731 } else { 732 c[l] = newcand(i, y, oldc); 733 k++; 734 break; 735 } 736 } while ((y = b[++j]) > 0 && numtries < bound); 737 } 738 return (k); 739 } 740 741 static int 742 newcand(int x, int y, int pred) 743 { 744 struct cand *q; 745 746 if (clen == clistlen) { 747 clistlen = clistlen * 11 / 10; 748 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 749 } 750 q = clist + clen; 751 q->x = x; 752 q->y = y; 753 q->pred = pred; 754 return (clen++); 755 } 756 757 static int 758 search(int *c, int k, int y) 759 { 760 int i, j, l, t; 761 762 if (clist[c[k]].y < y) /* quick look for typical case */ 763 return (k + 1); 764 i = 0; 765 j = k + 1; 766 for (;;) { 767 l = (i + j) / 2; 768 if (l <= i) 769 break; 770 t = clist[c[l]].y; 771 if (t > y) 772 j = l; 773 else if (t < y) 774 i = l; 775 else 776 return (l); 777 } 778 return (l + 1); 779 } 780 781 static void 782 unravel(int p) 783 { 784 struct cand *q; 785 int i; 786 787 for (i = 0; i <= len[0]; i++) 788 J[i] = i <= pref ? i : 789 i > len[0] - suff ? i + len[1] - len[0] : 0; 790 for (q = clist + p; q->y != 0; q = clist + q->pred) 791 J[q->x + pref] = q->y + pref; 792 } 793 794 /* 795 * Check does double duty: 796 * 1. ferret out any fortuitous correspondences due 797 * to confounding by hashing (which result in "jackpot") 798 * 2. collect random access indexes to the two files 799 */ 800 static void 801 check(FILE *f1, FILE *f2, int flags) 802 { 803 int i, j, jackpot, c, d; 804 long ctold, ctnew; 805 806 rewind(f1); 807 rewind(f2); 808 j = 1; 809 ixold[0] = ixnew[0] = 0; 810 jackpot = 0; 811 ctold = ctnew = 0; 812 for (i = 1; i <= len[0]; i++) { 813 if (J[i] == 0) { 814 ixold[i] = ctold += skipline(f1); 815 continue; 816 } 817 while (j < J[i]) { 818 ixnew[j] = ctnew += skipline(f2); 819 j++; 820 } 821 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 822 for (;;) { 823 c = getc(f1); 824 d = getc(f2); 825 /* 826 * GNU diff ignores a missing newline 827 * in one file for -b or -w. 828 */ 829 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 830 if (c == EOF && d == '\n') { 831 ctnew++; 832 break; 833 } else if (c == '\n' && d == EOF) { 834 ctold++; 835 break; 836 } 837 } 838 ctold++; 839 ctnew++; 840 if (flags & D_STRIPCR) { 841 if (c == '\r') { 842 if ((c = getc(f1)) == '\n') { 843 ctnew++; 844 break; 845 } 846 } 847 if (d == '\r') { 848 if ((d = getc(f2)) == '\n') { 849 ctold++; 850 break; 851 } 852 } 853 } 854 if ((flags & D_FOLDBLANKS) && isspace(c) && 855 isspace(d)) { 856 do { 857 if (c == '\n') 858 break; 859 ctold++; 860 } while (isspace(c = getc(f1))); 861 do { 862 if (d == '\n') 863 break; 864 ctnew++; 865 } while (isspace(d = getc(f2))); 866 } else if ((flags & D_IGNOREBLANKS)) { 867 while (isspace(c) && c != '\n') { 868 c = getc(f1); 869 ctold++; 870 } 871 while (isspace(d) && d != '\n') { 872 d = getc(f2); 873 ctnew++; 874 } 875 } 876 if (chrtran[c] != chrtran[d]) { 877 jackpot++; 878 J[i] = 0; 879 if (c != '\n' && c != EOF) 880 ctold += skipline(f1); 881 if (d != '\n' && c != EOF) 882 ctnew += skipline(f2); 883 break; 884 } 885 if (c == '\n' || c == EOF) 886 break; 887 } 888 } else { 889 for (;;) { 890 ctold++; 891 ctnew++; 892 if ((c = getc(f1)) != (d = getc(f2))) { 893 /* jackpot++; */ 894 J[i] = 0; 895 if (c != '\n' && c != EOF) 896 ctold += skipline(f1); 897 if (d != '\n' && c != EOF) 898 ctnew += skipline(f2); 899 break; 900 } 901 if (c == '\n' || c == EOF) 902 break; 903 } 904 } 905 ixold[i] = ctold; 906 ixnew[j] = ctnew; 907 j++; 908 } 909 for (; j <= len[1]; j++) { 910 ixnew[j] = ctnew += skipline(f2); 911 } 912 /* 913 * if (jackpot) 914 * fprintf(stderr, "jackpot\n"); 915 */ 916 } 917 918 /* shellsort CACM #201 */ 919 static void 920 sort(struct line *a, int n) 921 { 922 struct line *ai, *aim, w; 923 int j, m = 0, k; 924 925 if (n == 0) 926 return; 927 for (j = 1; j <= n; j *= 2) 928 m = 2 * j - 1; 929 for (m /= 2; m != 0; m /= 2) { 930 k = n - m; 931 for (j = 1; j <= k; j++) { 932 for (ai = &a[j]; ai > a; ai -= m) { 933 aim = &ai[m]; 934 if (aim < ai) 935 break; /* wraparound */ 936 if (aim->value > ai[0].value || 937 (aim->value == ai[0].value && 938 aim->serial > ai[0].serial)) 939 break; 940 w.value = ai[0].value; 941 ai[0].value = aim->value; 942 aim->value = w.value; 943 w.serial = ai[0].serial; 944 ai[0].serial = aim->serial; 945 aim->serial = w.serial; 946 } 947 } 948 } 949 } 950 951 static void 952 unsort(struct line *f, int l, int *b) 953 { 954 int *a, i; 955 956 a = xcalloc(l + 1, sizeof(*a)); 957 for (i = 1; i <= l; i++) 958 a[f[i].serial] = f[i].value; 959 for (i = 1; i <= l; i++) 960 b[i] = a[i]; 961 free(a); 962 } 963 964 static int 965 skipline(FILE *f) 966 { 967 int i, c; 968 969 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 970 continue; 971 return (i); 972 } 973 974 static void 975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 976 { 977 int m, i0, i1, j0, j1; 978 979 rewind(f1); 980 rewind(f2); 981 m = len[0]; 982 J[0] = 0; 983 J[m + 1] = len[1] + 1; 984 if (diff_format != D_EDIT) { 985 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 986 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 987 i0++; 988 j0 = J[i0 - 1] + 1; 989 i1 = i0 - 1; 990 while (i1 < m && J[i1 + 1] == 0) 991 i1++; 992 j1 = J[i1 + 1] - 1; 993 J[i1] = j1; 994 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 995 } 996 } else { 997 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 998 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 999 i0--; 1000 j0 = J[i0 + 1] - 1; 1001 i1 = i0 + 1; 1002 while (i1 > 1 && J[i1 - 1] == 0) 1003 i1--; 1004 j1 = J[i1 - 1] + 1; 1005 J[i1] = j1; 1006 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1007 } 1008 } 1009 if (m == 0) 1010 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1011 if (diff_format == D_IFDEF) { 1012 for (;;) { 1013 #define c i0 1014 if ((c = getc(f1)) == EOF) 1015 return; 1016 diff_output("%c", c); 1017 } 1018 #undef c 1019 } 1020 if (anychange != 0) { 1021 if (diff_format == D_CONTEXT) 1022 dump_context_vec(f1, f2, flags); 1023 else if (diff_format == D_UNIFIED) 1024 dump_unified_vec(f1, f2, flags); 1025 } 1026 } 1027 1028 static void 1029 range(int a, int b, const char *separator) 1030 { 1031 diff_output("%d", a > b ? b : a); 1032 if (a < b) 1033 diff_output("%s%d", separator, b); 1034 } 1035 1036 static void 1037 uni_range(int a, int b) 1038 { 1039 if (a < b) 1040 diff_output("%d,%d", a, b - a + 1); 1041 else if (a == b) 1042 diff_output("%d", b); 1043 else 1044 diff_output("%d,0", b); 1045 } 1046 1047 static char * 1048 preadline(int fd, size_t rlen, off_t off) 1049 { 1050 char *line; 1051 ssize_t nr; 1052 1053 line = xmalloc(rlen + 1); 1054 if ((nr = pread(fd, line, rlen, off)) < 0) 1055 err(2, "preadline"); 1056 if (nr > 0 && line[nr-1] == '\n') 1057 nr--; 1058 line[nr] = '\0'; 1059 return (line); 1060 } 1061 1062 static int 1063 ignoreline(char *line) 1064 { 1065 int ret; 1066 1067 ret = regexec(&ignore_re, line, 0, NULL, 0); 1068 free(line); 1069 return (ret == 0); /* if it matched, it should be ignored. */ 1070 } 1071 1072 /* 1073 * Indicate that there is a difference between lines a and b of the from file 1074 * to get to lines c to d of the to file. If a is greater then b then there 1075 * are no lines in the from file involved and this means that there were 1076 * lines appended (beginning at b). If c is greater than d then there are 1077 * lines missing from the to file. 1078 */ 1079 static void 1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1081 int *pflags) 1082 { 1083 static size_t max_context = 64; 1084 int i; 1085 1086 restart: 1087 if (diff_format != D_IFDEF && a > b && c > d) 1088 return; 1089 if (ignore_pats != NULL) { 1090 char *line; 1091 /* 1092 * All lines in the change, insert, or delete must 1093 * match an ignore pattern for the change to be 1094 * ignored. 1095 */ 1096 if (a <= b) { /* Changes and deletes. */ 1097 for (i = a; i <= b; i++) { 1098 line = preadline(fileno(f1), 1099 ixold[i] - ixold[i - 1], ixold[i - 1]); 1100 if (!ignoreline(line)) 1101 goto proceed; 1102 } 1103 } 1104 if (a > b || c <= d) { /* Changes and inserts. */ 1105 for (i = c; i <= d; i++) { 1106 line = preadline(fileno(f2), 1107 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1108 if (!ignoreline(line)) 1109 goto proceed; 1110 } 1111 } 1112 return; 1113 } 1114 proceed: 1115 if (*pflags & D_HEADER) { 1116 diff_output("%s %s %s\n", diffargs, file1, file2); 1117 *pflags &= ~D_HEADER; 1118 } 1119 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1120 /* 1121 * Allocate change records as needed. 1122 */ 1123 if (context_vec_ptr == context_vec_end - 1) { 1124 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1125 max_context <<= 1; 1126 context_vec_start = xreallocarray(context_vec_start, 1127 max_context, sizeof(*context_vec_start)); 1128 context_vec_end = context_vec_start + max_context; 1129 context_vec_ptr = context_vec_start + offset; 1130 } 1131 if (anychange == 0) { 1132 /* 1133 * Print the context/unidiff header first time through. 1134 */ 1135 print_header(file1, file2); 1136 anychange = 1; 1137 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1138 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1139 /* 1140 * If this change is more than 'diff_context' lines from the 1141 * previous change, dump the record and reset it. 1142 */ 1143 if (diff_format == D_CONTEXT) 1144 dump_context_vec(f1, f2, *pflags); 1145 else 1146 dump_unified_vec(f1, f2, *pflags); 1147 } 1148 context_vec_ptr++; 1149 context_vec_ptr->a = a; 1150 context_vec_ptr->b = b; 1151 context_vec_ptr->c = c; 1152 context_vec_ptr->d = d; 1153 return; 1154 } 1155 if (anychange == 0) 1156 anychange = 1; 1157 switch (diff_format) { 1158 case D_BRIEF: 1159 return; 1160 case D_NORMAL: 1161 case D_EDIT: 1162 range(a, b, ","); 1163 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1164 if (diff_format == D_NORMAL) 1165 range(c, d, ","); 1166 diff_output("\n"); 1167 break; 1168 case D_REVERSE: 1169 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1170 range(a, b, " "); 1171 diff_output("\n"); 1172 break; 1173 case D_NREVERSE: 1174 if (a > b) 1175 diff_output("a%d %d\n", b, d - c + 1); 1176 else { 1177 diff_output("d%d %d\n", a, b - a + 1); 1178 if (!(c > d)) 1179 /* add changed lines */ 1180 diff_output("a%d %d\n", b, d - c + 1); 1181 } 1182 break; 1183 } 1184 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1185 fetch(ixold, a, b, f1, '<', 1, *pflags); 1186 if (a <= b && c <= d && diff_format == D_NORMAL) 1187 diff_output("---\n"); 1188 } 1189 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1190 if (i != 0 && diff_format == D_EDIT) { 1191 /* 1192 * A non-zero return value for D_EDIT indicates that the 1193 * last line printed was a bare dot (".") that has been 1194 * escaped as ".." to prevent ed(1) from misinterpreting 1195 * it. We have to add a substitute command to change this 1196 * back and restart where we left off. 1197 */ 1198 diff_output(".\n"); 1199 diff_output("%ds/.//\n", a + i - 1); 1200 b = a + i - 1; 1201 a = b + 1; 1202 c += i; 1203 goto restart; 1204 } 1205 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1206 diff_output(".\n"); 1207 if (inifdef) { 1208 diff_output("#endif /* %s */\n", ifdefname); 1209 inifdef = 0; 1210 } 1211 } 1212 1213 static int 1214 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1215 { 1216 int i, j, c, lastc, col, nc; 1217 int newcol; 1218 1219 /* 1220 * When doing #ifdef's, copy down to current line 1221 * if this is the first file, so that stuff makes it to output. 1222 */ 1223 if (diff_format == D_IFDEF && oldfile) { 1224 long curpos = ftell(lb); 1225 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1226 nc = f[a > b ? b : a - 1] - curpos; 1227 for (i = 0; i < nc; i++) 1228 diff_output("%c", getc(lb)); 1229 } 1230 if (a > b) 1231 return (0); 1232 if (diff_format == D_IFDEF) { 1233 if (inifdef) { 1234 diff_output("#else /* %s%s */\n", 1235 oldfile == 1 ? "!" : "", ifdefname); 1236 } else { 1237 if (oldfile) 1238 diff_output("#ifndef %s\n", ifdefname); 1239 else 1240 diff_output("#ifdef %s\n", ifdefname); 1241 } 1242 inifdef = 1 + oldfile; 1243 } 1244 for (i = a; i <= b; i++) { 1245 fseek(lb, f[i - 1], SEEK_SET); 1246 nc = f[i] - f[i - 1]; 1247 if (diff_format != D_IFDEF && ch != '\0') { 1248 diff_output("%c", ch); 1249 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1250 || diff_format == D_UNIFIED)) 1251 diff_output("\t"); 1252 else if (diff_format != D_UNIFIED) 1253 diff_output(" "); 1254 } 1255 col = 0; 1256 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1257 if ((c = getc(lb)) == EOF) { 1258 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1259 diff_format == D_NREVERSE) 1260 warnx("No newline at end of file"); 1261 else 1262 diff_output("\n\\ No newline at end of " 1263 "file\n"); 1264 return (0); 1265 } 1266 if (c == '\t' && (flags & D_EXPANDTABS)) { 1267 newcol = ((col/tabsize)+1)*tabsize; 1268 do { 1269 diff_output(" "); 1270 } while (++col < newcol); 1271 } else { 1272 if (diff_format == D_EDIT && j == 1 && c == '\n' 1273 && lastc == '.') { 1274 /* 1275 * Don't print a bare "." line 1276 * since that will confuse ed(1). 1277 * Print ".." instead and return, 1278 * giving the caller an offset 1279 * from which to restart. 1280 */ 1281 diff_output(".\n"); 1282 return (i - a + 1); 1283 } 1284 diff_output("%c", c); 1285 col++; 1286 } 1287 } 1288 } 1289 return (0); 1290 } 1291 1292 /* 1293 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1294 */ 1295 static int 1296 readhash(FILE *f, int flags) 1297 { 1298 int i, t, space; 1299 int sum; 1300 1301 sum = 1; 1302 space = 0; 1303 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1304 if (flags & D_IGNORECASE) 1305 for (i = 0; (t = getc(f)) != '\n'; i++) { 1306 if (flags & D_STRIPCR && t == '\r') { 1307 t = getc(f); 1308 if (t == '\n') 1309 break; 1310 ungetc(t, f); 1311 } 1312 if (t == EOF) { 1313 if (i == 0) 1314 return (0); 1315 break; 1316 } 1317 sum = sum * 127 + chrtran[t]; 1318 } 1319 else 1320 for (i = 0; (t = getc(f)) != '\n'; i++) { 1321 if (flags & D_STRIPCR && t == '\r') { 1322 t = getc(f); 1323 if (t == '\n') 1324 break; 1325 ungetc(t, f); 1326 } 1327 if (t == EOF) { 1328 if (i == 0) 1329 return (0); 1330 break; 1331 } 1332 sum = sum * 127 + t; 1333 } 1334 } else { 1335 for (i = 0;;) { 1336 switch (t = getc(f)) { 1337 case '\r': 1338 case '\t': 1339 case '\v': 1340 case '\f': 1341 case ' ': 1342 space++; 1343 continue; 1344 default: 1345 if (space && (flags & D_IGNOREBLANKS) == 0) { 1346 i++; 1347 space = 0; 1348 } 1349 sum = sum * 127 + chrtran[t]; 1350 i++; 1351 continue; 1352 case EOF: 1353 if (i == 0) 1354 return (0); 1355 /* FALLTHROUGH */ 1356 case '\n': 1357 break; 1358 } 1359 break; 1360 } 1361 } 1362 /* 1363 * There is a remote possibility that we end up with a zero sum. 1364 * Zero is used as an EOF marker, so return 1 instead. 1365 */ 1366 return (sum == 0 ? 1 : sum); 1367 } 1368 1369 static int 1370 asciifile(FILE *f) 1371 { 1372 unsigned char buf[BUFSIZ]; 1373 size_t cnt; 1374 1375 if (f == NULL) 1376 return (1); 1377 1378 rewind(f); 1379 cnt = fread(buf, 1, sizeof(buf), f); 1380 return (memchr(buf, '\0', cnt) == NULL); 1381 } 1382 1383 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1384 1385 static char * 1386 match_function(const long *f, int pos, FILE *fp) 1387 { 1388 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1389 size_t nc; 1390 int last = lastline; 1391 const char *state = NULL; 1392 1393 lastline = pos; 1394 while (pos > last) { 1395 fseek(fp, f[pos - 1], SEEK_SET); 1396 nc = f[pos] - f[pos - 1]; 1397 if (nc >= sizeof(buf)) 1398 nc = sizeof(buf) - 1; 1399 nc = fread(buf, 1, nc, fp); 1400 if (nc > 0) { 1401 buf[nc] = '\0'; 1402 buf[strcspn(buf, "\n")] = '\0'; 1403 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1404 if (begins_with(buf, "private:")) { 1405 if (!state) 1406 state = " (private)"; 1407 } else if (begins_with(buf, "protected:")) { 1408 if (!state) 1409 state = " (protected)"; 1410 } else if (begins_with(buf, "public:")) { 1411 if (!state) 1412 state = " (public)"; 1413 } else { 1414 strlcpy(lastbuf, buf, sizeof lastbuf); 1415 if (state) 1416 strlcat(lastbuf, state, 1417 sizeof lastbuf); 1418 lastmatchline = pos; 1419 return lastbuf; 1420 } 1421 } 1422 } 1423 pos--; 1424 } 1425 return lastmatchline > 0 ? lastbuf : NULL; 1426 } 1427 1428 /* dump accumulated "context" diff changes */ 1429 static void 1430 dump_context_vec(FILE *f1, FILE *f2, int flags) 1431 { 1432 struct context_vec *cvp = context_vec_start; 1433 int lowa, upb, lowc, upd, do_output; 1434 int a, b, c, d; 1435 char ch, *f; 1436 1437 if (context_vec_start > context_vec_ptr) 1438 return; 1439 1440 b = d = 0; /* gcc */ 1441 lowa = MAX(1, cvp->a - diff_context); 1442 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1443 lowc = MAX(1, cvp->c - diff_context); 1444 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1445 1446 diff_output("***************"); 1447 if ((flags & D_PROTOTYPE)) { 1448 f = match_function(ixold, lowa-1, f1); 1449 if (f != NULL) 1450 diff_output(" %s", f); 1451 } 1452 diff_output("\n*** "); 1453 range(lowa, upb, ","); 1454 diff_output(" ****\n"); 1455 1456 /* 1457 * Output changes to the "old" file. The first loop suppresses 1458 * output if there were no changes to the "old" file (we'll see 1459 * the "old" lines as context in the "new" list). 1460 */ 1461 do_output = 0; 1462 for (; cvp <= context_vec_ptr; cvp++) 1463 if (cvp->a <= cvp->b) { 1464 cvp = context_vec_start; 1465 do_output++; 1466 break; 1467 } 1468 if (do_output) { 1469 while (cvp <= context_vec_ptr) { 1470 a = cvp->a; 1471 b = cvp->b; 1472 c = cvp->c; 1473 d = cvp->d; 1474 1475 if (a <= b && c <= d) 1476 ch = 'c'; 1477 else 1478 ch = (a <= b) ? 'd' : 'a'; 1479 1480 if (ch == 'a') 1481 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1482 else { 1483 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1484 fetch(ixold, a, b, f1, 1485 ch == 'c' ? '!' : '-', 0, flags); 1486 } 1487 lowa = b + 1; 1488 cvp++; 1489 } 1490 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1491 } 1492 /* output changes to the "new" file */ 1493 diff_output("--- "); 1494 range(lowc, upd, ","); 1495 diff_output(" ----\n"); 1496 1497 do_output = 0; 1498 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1499 if (cvp->c <= cvp->d) { 1500 cvp = context_vec_start; 1501 do_output++; 1502 break; 1503 } 1504 if (do_output) { 1505 while (cvp <= context_vec_ptr) { 1506 a = cvp->a; 1507 b = cvp->b; 1508 c = cvp->c; 1509 d = cvp->d; 1510 1511 if (a <= b && c <= d) 1512 ch = 'c'; 1513 else 1514 ch = (a <= b) ? 'd' : 'a'; 1515 1516 if (ch == 'd') 1517 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1518 else { 1519 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1520 fetch(ixnew, c, d, f2, 1521 ch == 'c' ? '!' : '+', 0, flags); 1522 } 1523 lowc = d + 1; 1524 cvp++; 1525 } 1526 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1527 } 1528 context_vec_ptr = context_vec_start - 1; 1529 } 1530 1531 /* dump accumulated "unified" diff changes */ 1532 static void 1533 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1534 { 1535 struct context_vec *cvp = context_vec_start; 1536 int lowa, upb, lowc, upd; 1537 int a, b, c, d; 1538 char ch, *f; 1539 1540 if (context_vec_start > context_vec_ptr) 1541 return; 1542 1543 b = d = 0; /* gcc */ 1544 lowa = MAX(1, cvp->a - diff_context); 1545 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1546 lowc = MAX(1, cvp->c - diff_context); 1547 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1548 1549 diff_output("@@ -"); 1550 uni_range(lowa, upb); 1551 diff_output(" +"); 1552 uni_range(lowc, upd); 1553 diff_output(" @@"); 1554 if ((flags & D_PROTOTYPE)) { 1555 f = match_function(ixold, lowa-1, f1); 1556 if (f != NULL) 1557 diff_output(" %s", f); 1558 } 1559 diff_output("\n"); 1560 1561 /* 1562 * Output changes in "unified" diff format--the old and new lines 1563 * are printed together. 1564 */ 1565 for (; cvp <= context_vec_ptr; cvp++) { 1566 a = cvp->a; 1567 b = cvp->b; 1568 c = cvp->c; 1569 d = cvp->d; 1570 1571 /* 1572 * c: both new and old changes 1573 * d: only changes in the old file 1574 * a: only changes in the new file 1575 */ 1576 if (a <= b && c <= d) 1577 ch = 'c'; 1578 else 1579 ch = (a <= b) ? 'd' : 'a'; 1580 1581 switch (ch) { 1582 case 'c': 1583 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1584 fetch(ixold, a, b, f1, '-', 0, flags); 1585 fetch(ixnew, c, d, f2, '+', 0, flags); 1586 break; 1587 case 'd': 1588 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1589 fetch(ixold, a, b, f1, '-', 0, flags); 1590 break; 1591 case 'a': 1592 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1593 fetch(ixnew, c, d, f2, '+', 0, flags); 1594 break; 1595 } 1596 lowa = b + 1; 1597 lowc = d + 1; 1598 } 1599 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1600 1601 context_vec_ptr = context_vec_start - 1; 1602 } 1603 1604 static void 1605 print_header(const char *file1, const char *file2) 1606 { 1607 const char *time_format; 1608 char buf1[256]; 1609 char buf2[256]; 1610 char end1[10]; 1611 char end2[10]; 1612 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; 1613 int nsec1 = stb1.st_mtim.tv_nsec; 1614 int nsec2 = stb2.st_mtim.tv_nsec; 1615 1616 time_format = "%Y-%m-%d %H:%M:%S"; 1617 1618 if (cflag) 1619 time_format = "%c"; 1620 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); 1621 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); 1622 strftime(buf1, 256, time_format, tm_ptr1); 1623 strftime(buf2, 256, time_format, tm_ptr2); 1624 if (!cflag) { 1625 strftime(end1, 10, "%z", tm_ptr1); 1626 strftime(end2, 10, "%z", tm_ptr2); 1627 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1628 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1629 } 1630 if (label[0] != NULL) 1631 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1632 label[0]); 1633 else 1634 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1635 file1, buf1); 1636 if (label[1] != NULL) 1637 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1638 label[1]); 1639 else 1640 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1641 file2, buf2); 1642 } 1643