1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <regex.h> 84 #include <stddef.h> 85 #include <stdint.h> 86 #include <stdio.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 #include <limits.h> 91 #include <signal.h> 92 93 #include "diff.h" 94 #include "xmalloc.h" 95 96 #define _PATH_PR "/usr/bin/pr" 97 98 /* 99 * diff - compare two files. 100 */ 101 102 /* 103 * Uses an algorithm due to Harold Stone, which finds 104 * a pair of longest identical subsequences in the two 105 * files. 106 * 107 * The major goal is to generate the match vector J. 108 * J[i] is the index of the line in file1 corresponding 109 * to line i file0. J[i] = 0 if there is no 110 * such line in file1. 111 * 112 * Lines are hashed so as to work in core. All potential 113 * matches are located by sorting the lines of each file 114 * on the hash (called ``value''). In particular, this 115 * collects the equivalence classes in file1 together. 116 * Subroutine equiv replaces the value of each line in 117 * file0 by the index of the first element of its 118 * matching equivalence in (the reordered) file1. 119 * To save space equiv squeezes file1 into a single 120 * array member in which the equivalence classes 121 * are simply concatenated, except that their first 122 * members are flagged by changing sign. 123 * 124 * Next the indices that point into member are unsorted into 125 * array class according to the original order of file0. 126 * 127 * The cleverness lies in routine stone. This marches 128 * through the lines of file0, developing a vector klist 129 * of "k-candidates". At step i a k-candidate is a matched 130 * pair of lines x,y (x in file0 y in file1) such that 131 * there is a common subsequence of length k 132 * between the first i lines of file0 and the first y 133 * lines of file1, but there is no such subsequence for 134 * any smaller y. x is the earliest possible mate to y 135 * that occurs in such a subsequence. 136 * 137 * Whenever any of the members of the equivalence class of 138 * lines in file1 matable to a line in file0 has serial number 139 * less than the y of some k-candidate, that k-candidate 140 * with the smallest such y is replaced. The new 141 * k-candidate is chained (via pred) to the current 142 * k-1 candidate so that the actual subsequence can 143 * be recovered. When a member has serial number greater 144 * that the y of all k-candidates, the klist is extended. 145 * At the end, the longest subsequence is pulled out 146 * and placed in the array J by unravel 147 * 148 * With J in hand, the matches there recorded are 149 * check'ed against reality to assure that no spurious 150 * matches have crept in due to hashing. If they have, 151 * they are broken, and "jackpot" is recorded--a harmless 152 * matter except that a true match for a spuriously 153 * mated line may now be unnecessarily reported as a change. 154 * 155 * Much of the complexity of the program comes simply 156 * from trying to minimize core utilization and 157 * maximize the range of doable problems by dynamically 158 * allocating what is needed and reusing what is not. 159 * The core requirements for problems larger than somewhat 160 * are (in words) 2*length(file0) + length(file1) + 161 * 3*(number of k-candidates installed), typically about 162 * 6n words for files of length n. 163 */ 164 165 struct cand { 166 int x; 167 int y; 168 int pred; 169 }; 170 171 static struct line { 172 int serial; 173 int value; 174 } *file[2]; 175 176 /* 177 * The following struct is used to record change information when 178 * doing a "context" or "unified" diff. (see routine "change" to 179 * understand the highly mnemonic field names) 180 */ 181 struct context_vec { 182 int a; /* start line in old file */ 183 int b; /* end line in old file */ 184 int c; /* start line in new file */ 185 int d; /* end line in new file */ 186 }; 187 188 #define diff_output printf 189 static FILE *opentemp(const char *); 190 static void output(char *, FILE *, char *, FILE *, int); 191 static void check(FILE *, FILE *, int); 192 static void range(int, int, const char *); 193 static void uni_range(int, int); 194 static void dump_context_vec(FILE *, FILE *, int); 195 static void dump_unified_vec(FILE *, FILE *, int); 196 static void prepare(int, FILE *, size_t, int); 197 static void prune(void); 198 static void equiv(struct line *, int, struct line *, int, int *); 199 static void unravel(int); 200 static void unsort(struct line *, int, int *); 201 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 202 static void sort(struct line *, int); 203 static void print_header(const char *, const char *); 204 static int ignoreline(char *); 205 static int asciifile(FILE *); 206 static int fetch(long *, int, int, FILE *, int, int, int); 207 static int newcand(int, int, int); 208 static int search(int *, int, int); 209 static int skipline(FILE *); 210 static int isqrt(int); 211 static int stone(int *, int, int *, int *, int); 212 static int readhash(FILE *, int); 213 static int files_differ(FILE *, FILE *, int); 214 static char *match_function(const long *, int, FILE *); 215 static char *preadline(int, size_t, off_t); 216 217 static int *J; /* will be overlaid on class */ 218 static int *class; /* will be overlaid on file[0] */ 219 static int *klist; /* will be overlaid on file[0] after class */ 220 static int *member; /* will be overlaid on file[1] */ 221 static int clen; 222 static int inifdef; /* whether or not we are in a #ifdef block */ 223 static int len[2]; 224 static int pref, suff; /* length of prefix and suffix */ 225 static int slen[2]; 226 static int anychange; 227 static long *ixnew; /* will be overlaid on file[1] */ 228 static long *ixold; /* will be overlaid on klist */ 229 static struct cand *clist; /* merely a free storage pot for candidates */ 230 static int clistlen; /* the length of clist */ 231 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 232 static u_char *chrtran; /* translation table for case-folding */ 233 static struct context_vec *context_vec_start; 234 static struct context_vec *context_vec_end; 235 static struct context_vec *context_vec_ptr; 236 237 #define FUNCTION_CONTEXT_SIZE 55 238 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 239 static int lastline; 240 static int lastmatchline; 241 242 243 /* 244 * chrtran points to one of 2 translation tables: cup2low if folding upper to 245 * lower case clow2low if not folding case 246 */ 247 static u_char clow2low[256] = { 248 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 249 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 250 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 251 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 252 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 253 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 254 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 255 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 256 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 257 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 258 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 259 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 260 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 261 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 262 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 263 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 264 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 265 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 266 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 267 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 268 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 269 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 270 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 271 0xfd, 0xfe, 0xff 272 }; 273 274 static u_char cup2low[256] = { 275 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 276 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 277 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 278 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 279 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 280 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 281 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 282 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 283 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 284 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 285 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 286 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 287 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 288 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 289 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 290 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 291 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 292 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 293 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 294 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 295 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 296 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 297 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 298 0xfd, 0xfe, 0xff 299 }; 300 301 int 302 diffreg(char *file1, char *file2, int flags, int capsicum) 303 { 304 FILE *f1, *f2; 305 int i, rval; 306 int ostdout = -1; 307 int pr_pd, kq; 308 struct kevent *e; 309 cap_rights_t rights_ro; 310 311 e = NULL; 312 kq = -1; 313 f1 = f2 = NULL; 314 rval = D_SAME; 315 anychange = 0; 316 lastline = 0; 317 lastmatchline = 0; 318 context_vec_ptr = context_vec_start - 1; 319 if (flags & D_IGNORECASE) 320 chrtran = cup2low; 321 else 322 chrtran = clow2low; 323 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 324 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 325 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 326 goto closem; 327 328 if (flags & D_EMPTY1) 329 f1 = fopen(_PATH_DEVNULL, "r"); 330 else { 331 if (!S_ISREG(stb1.st_mode)) { 332 if ((f1 = opentemp(file1)) == NULL || 333 fstat(fileno(f1), &stb1) < 0) { 334 warn("%s", file1); 335 status |= 2; 336 goto closem; 337 } 338 } else if (strcmp(file1, "-") == 0) 339 f1 = stdin; 340 else 341 f1 = fopen(file1, "r"); 342 } 343 if (f1 == NULL) { 344 warn("%s", file1); 345 status |= 2; 346 goto closem; 347 } 348 349 if (flags & D_EMPTY2) 350 f2 = fopen(_PATH_DEVNULL, "r"); 351 else { 352 if (!S_ISREG(stb2.st_mode)) { 353 if ((f2 = opentemp(file2)) == NULL || 354 fstat(fileno(f2), &stb2) < 0) { 355 warn("%s", file2); 356 status |= 2; 357 goto closem; 358 } 359 } else if (strcmp(file2, "-") == 0) 360 f2 = stdin; 361 else 362 f2 = fopen(file2, "r"); 363 } 364 if (f2 == NULL) { 365 warn("%s", file2); 366 status |= 2; 367 goto closem; 368 } 369 370 if (lflag) { 371 /* redirect stdout to pr */ 372 int pfd[2]; 373 pid_t pid; 374 char *header; 375 376 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 377 signal(SIGPIPE, SIG_IGN); 378 fflush(stdout); 379 rewind(stdout); 380 pipe(pfd); 381 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 382 case -1: 383 status |= 2; 384 free(header); 385 err(2, "No more processes"); 386 case 0: 387 /* child */ 388 if (pfd[0] != STDIN_FILENO) { 389 dup2(pfd[0], STDIN_FILENO); 390 close(pfd[0]); 391 } 392 close(pfd[1]); 393 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 394 _exit(127); 395 default: 396 397 /* parent */ 398 if (pfd[1] != STDOUT_FILENO) { 399 ostdout = dup(STDOUT_FILENO); 400 dup2(pfd[1], STDOUT_FILENO); 401 close(pfd[1]); 402 } 403 close(pfd[0]); 404 rewind(stdout); 405 free(header); 406 kq = kqueue(); 407 if (kq == -1) 408 err(2, "kqueue"); 409 e = xmalloc(sizeof(struct kevent)); 410 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 411 NULL); 412 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 413 err(2, "kevent"); 414 } 415 } 416 417 if (capsicum) { 418 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 419 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 420 && errno != ENOSYS) 421 err(2, "unable to limit rights on: %s", file1); 422 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 423 errno != ENOSYS) 424 err(2, "unable to limit rights on: %s", file2); 425 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 426 /* stding has already been limited */ 427 if (caph_limit_stderr() == -1) 428 err(2, "unable to limit stderr"); 429 if (caph_limit_stdout() == -1) 430 err(2, "unable to limit stdout"); 431 } else if (caph_limit_stdio() == -1) 432 err(2, "unable to limit stdio"); 433 434 caph_cache_catpages(); 435 caph_cache_tzdata(); 436 if (cap_enter() < 0 && errno != ENOSYS) 437 err(2, "unable to enter capability mode"); 438 } 439 440 switch (files_differ(f1, f2, flags)) { 441 case 0: 442 goto closem; 443 case 1: 444 break; 445 default: 446 /* error */ 447 status |= 2; 448 goto closem; 449 } 450 451 if ((flags & D_FORCEASCII) == 0 && 452 (!asciifile(f1) || !asciifile(f2))) { 453 rval = D_BINARY; 454 status |= 1; 455 goto closem; 456 } 457 prepare(0, f1, stb1.st_size, flags); 458 prepare(1, f2, stb2.st_size, flags); 459 460 prune(); 461 sort(sfile[0], slen[0]); 462 sort(sfile[1], slen[1]); 463 464 member = (int *)file[1]; 465 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 466 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 467 468 class = (int *)file[0]; 469 unsort(sfile[0], slen[0], class); 470 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 471 472 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 473 clen = 0; 474 clistlen = 100; 475 clist = xcalloc(clistlen, sizeof(*clist)); 476 i = stone(class, slen[0], member, klist, flags); 477 free(member); 478 free(class); 479 480 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 481 unravel(klist[i]); 482 free(clist); 483 free(klist); 484 485 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 486 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 487 check(f1, f2, flags); 488 output(file1, f1, file2, f2, flags); 489 if (ostdout != -1 && e != NULL) { 490 /* close the pipe to pr and restore stdout */ 491 int wstatus; 492 493 fflush(stdout); 494 if (ostdout != STDOUT_FILENO) { 495 close(STDOUT_FILENO); 496 dup2(ostdout, STDOUT_FILENO); 497 close(ostdout); 498 } 499 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 500 err(2, "kevent"); 501 wstatus = e[0].data; 502 close(kq); 503 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 504 errx(2, "pr exited abnormally"); 505 else if (WIFSIGNALED(wstatus)) 506 errx(2, "pr killed by signal %d", 507 WTERMSIG(wstatus)); 508 } 509 510 closem: 511 if (anychange) { 512 status |= 1; 513 if (rval == D_SAME) 514 rval = D_DIFFER; 515 } 516 if (f1 != NULL) 517 fclose(f1); 518 if (f2 != NULL) 519 fclose(f2); 520 521 return (rval); 522 } 523 524 /* 525 * Check to see if the given files differ. 526 * Returns 0 if they are the same, 1 if different, and -1 on error. 527 * XXX - could use code from cmp(1) [faster] 528 */ 529 static int 530 files_differ(FILE *f1, FILE *f2, int flags) 531 { 532 char buf1[BUFSIZ], buf2[BUFSIZ]; 533 size_t i, j; 534 535 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 536 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 537 return (1); 538 for (;;) { 539 i = fread(buf1, 1, sizeof(buf1), f1); 540 j = fread(buf2, 1, sizeof(buf2), f2); 541 if ((!i && ferror(f1)) || (!j && ferror(f2))) 542 return (-1); 543 if (i != j) 544 return (1); 545 if (i == 0) 546 return (0); 547 if (memcmp(buf1, buf2, i) != 0) 548 return (1); 549 } 550 } 551 552 static FILE * 553 opentemp(const char *f) 554 { 555 char buf[BUFSIZ], tempfile[PATH_MAX]; 556 ssize_t nread; 557 int ifd, ofd; 558 559 if (strcmp(f, "-") == 0) 560 ifd = STDIN_FILENO; 561 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 562 return (NULL); 563 564 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 565 566 if ((ofd = mkstemp(tempfile)) < 0) { 567 close(ifd); 568 return (NULL); 569 } 570 unlink(tempfile); 571 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 572 if (write(ofd, buf, nread) != nread) { 573 close(ifd); 574 close(ofd); 575 return (NULL); 576 } 577 } 578 close(ifd); 579 lseek(ofd, (off_t)0, SEEK_SET); 580 return (fdopen(ofd, "r")); 581 } 582 583 char * 584 splice(char *dir, char *path) 585 { 586 char *tail, *buf; 587 size_t dirlen; 588 589 dirlen = strlen(dir); 590 while (dirlen != 0 && dir[dirlen - 1] == '/') 591 dirlen--; 592 if ((tail = strrchr(path, '/')) == NULL) 593 tail = path; 594 else 595 tail++; 596 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 597 return (buf); 598 } 599 600 static void 601 prepare(int i, FILE *fd, size_t filesize, int flags) 602 { 603 struct line *p; 604 int h; 605 size_t sz, j; 606 607 rewind(fd); 608 609 sz = MIN(filesize, SIZE_MAX) / 25; 610 if (sz < 100) 611 sz = 100; 612 613 p = xcalloc(sz + 3, sizeof(*p)); 614 for (j = 0; (h = readhash(fd, flags));) { 615 if (j == sz) { 616 sz = sz * 3 / 2; 617 p = xreallocarray(p, sz + 3, sizeof(*p)); 618 } 619 p[++j].value = h; 620 } 621 len[i] = j; 622 file[i] = p; 623 } 624 625 static void 626 prune(void) 627 { 628 int i, j; 629 630 for (pref = 0; pref < len[0] && pref < len[1] && 631 file[0][pref + 1].value == file[1][pref + 1].value; 632 pref++) 633 ; 634 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 635 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 636 suff++) 637 ; 638 for (j = 0; j < 2; j++) { 639 sfile[j] = file[j] + pref; 640 slen[j] = len[j] - pref - suff; 641 for (i = 0; i <= slen[j]; i++) 642 sfile[j][i].serial = i; 643 } 644 } 645 646 static void 647 equiv(struct line *a, int n, struct line *b, int m, int *c) 648 { 649 int i, j; 650 651 i = j = 1; 652 while (i <= n && j <= m) { 653 if (a[i].value < b[j].value) 654 a[i++].value = 0; 655 else if (a[i].value == b[j].value) 656 a[i++].value = j; 657 else 658 j++; 659 } 660 while (i <= n) 661 a[i++].value = 0; 662 b[m + 1].value = 0; 663 j = 0; 664 while (++j <= m) { 665 c[j] = -b[j].serial; 666 while (b[j + 1].value == b[j].value) { 667 j++; 668 c[j] = b[j].serial; 669 } 670 } 671 c[j] = -1; 672 } 673 674 /* Code taken from ping.c */ 675 static int 676 isqrt(int n) 677 { 678 int y, x = 1; 679 680 if (n == 0) 681 return (0); 682 683 do { /* newton was a stinker */ 684 y = x; 685 x = n / x; 686 x += y; 687 x /= 2; 688 } while ((x - y) > 1 || (x - y) < -1); 689 690 return (x); 691 } 692 693 static int 694 stone(int *a, int n, int *b, int *c, int flags) 695 { 696 int i, k, y, j, l; 697 int oldc, tc, oldl, sq; 698 u_int numtries, bound; 699 700 if (flags & D_MINIMAL) 701 bound = UINT_MAX; 702 else { 703 sq = isqrt(n); 704 bound = MAX(256, sq); 705 } 706 707 k = 0; 708 c[0] = newcand(0, 0, 0); 709 for (i = 1; i <= n; i++) { 710 j = a[i]; 711 if (j == 0) 712 continue; 713 y = -b[j]; 714 oldl = 0; 715 oldc = c[0]; 716 numtries = 0; 717 do { 718 if (y <= clist[oldc].y) 719 continue; 720 l = search(c, k, y); 721 if (l != oldl + 1) 722 oldc = c[l - 1]; 723 if (l <= k) { 724 if (clist[c[l]].y <= y) 725 continue; 726 tc = c[l]; 727 c[l] = newcand(i, y, oldc); 728 oldc = tc; 729 oldl = l; 730 numtries++; 731 } else { 732 c[l] = newcand(i, y, oldc); 733 k++; 734 break; 735 } 736 } while ((y = b[++j]) > 0 && numtries < bound); 737 } 738 return (k); 739 } 740 741 static int 742 newcand(int x, int y, int pred) 743 { 744 struct cand *q; 745 746 if (clen == clistlen) { 747 clistlen = clistlen * 11 / 10; 748 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 749 } 750 q = clist + clen; 751 q->x = x; 752 q->y = y; 753 q->pred = pred; 754 return (clen++); 755 } 756 757 static int 758 search(int *c, int k, int y) 759 { 760 int i, j, l, t; 761 762 if (clist[c[k]].y < y) /* quick look for typical case */ 763 return (k + 1); 764 i = 0; 765 j = k + 1; 766 for (;;) { 767 l = (i + j) / 2; 768 if (l <= i) 769 break; 770 t = clist[c[l]].y; 771 if (t > y) 772 j = l; 773 else if (t < y) 774 i = l; 775 else 776 return (l); 777 } 778 return (l + 1); 779 } 780 781 static void 782 unravel(int p) 783 { 784 struct cand *q; 785 int i; 786 787 for (i = 0; i <= len[0]; i++) 788 J[i] = i <= pref ? i : 789 i > len[0] - suff ? i + len[1] - len[0] : 0; 790 for (q = clist + p; q->y != 0; q = clist + q->pred) 791 J[q->x + pref] = q->y + pref; 792 } 793 794 /* 795 * Check does double duty: 796 * 1. ferret out any fortuitous correspondences due 797 * to confounding by hashing (which result in "jackpot") 798 * 2. collect random access indexes to the two files 799 */ 800 static void 801 check(FILE *f1, FILE *f2, int flags) 802 { 803 int i, j, jackpot, c, d; 804 long ctold, ctnew; 805 806 rewind(f1); 807 rewind(f2); 808 j = 1; 809 ixold[0] = ixnew[0] = 0; 810 jackpot = 0; 811 ctold = ctnew = 0; 812 for (i = 1; i <= len[0]; i++) { 813 if (J[i] == 0) { 814 ixold[i] = ctold += skipline(f1); 815 continue; 816 } 817 while (j < J[i]) { 818 ixnew[j] = ctnew += skipline(f2); 819 j++; 820 } 821 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 822 for (;;) { 823 c = getc(f1); 824 d = getc(f2); 825 /* 826 * GNU diff ignores a missing newline 827 * in one file for -b or -w. 828 */ 829 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 830 if (c == EOF && d == '\n') { 831 ctnew++; 832 break; 833 } else if (c == '\n' && d == EOF) { 834 ctold++; 835 break; 836 } 837 } 838 ctold++; 839 ctnew++; 840 if (flags & D_STRIPCR) { 841 if (c == '\r') { 842 if ((c = getc(f1)) == '\n') { 843 ctnew++; 844 break; 845 } 846 } 847 if (d == '\r') { 848 if ((d = getc(f2)) == '\n') { 849 ctold++; 850 break; 851 } 852 } 853 } 854 if ((flags & D_FOLDBLANKS) && isspace(c) && 855 isspace(d)) { 856 do { 857 if (c == '\n') 858 break; 859 ctold++; 860 } while (isspace(c = getc(f1))); 861 do { 862 if (d == '\n') 863 break; 864 ctnew++; 865 } while (isspace(d = getc(f2))); 866 } else if ((flags & D_IGNOREBLANKS)) { 867 while (isspace(c) && c != '\n') { 868 c = getc(f1); 869 ctold++; 870 } 871 while (isspace(d) && d != '\n') { 872 d = getc(f2); 873 ctnew++; 874 } 875 } 876 if (chrtran[c] != chrtran[d]) { 877 jackpot++; 878 J[i] = 0; 879 if (c != '\n' && c != EOF) 880 ctold += skipline(f1); 881 if (d != '\n' && c != EOF) 882 ctnew += skipline(f2); 883 break; 884 } 885 if (c == '\n' || c == EOF) 886 break; 887 } 888 } else { 889 for (;;) { 890 ctold++; 891 ctnew++; 892 if ((c = getc(f1)) != (d = getc(f2))) { 893 /* jackpot++; */ 894 J[i] = 0; 895 if (c != '\n' && c != EOF) 896 ctold += skipline(f1); 897 if (d != '\n' && c != EOF) 898 ctnew += skipline(f2); 899 break; 900 } 901 if (c == '\n' || c == EOF) 902 break; 903 } 904 } 905 ixold[i] = ctold; 906 ixnew[j] = ctnew; 907 j++; 908 } 909 for (; j <= len[1]; j++) { 910 ixnew[j] = ctnew += skipline(f2); 911 } 912 /* 913 * if (jackpot) 914 * fprintf(stderr, "jackpot\n"); 915 */ 916 } 917 918 /* shellsort CACM #201 */ 919 static void 920 sort(struct line *a, int n) 921 { 922 struct line *ai, *aim, w; 923 int j, m = 0, k; 924 925 if (n == 0) 926 return; 927 for (j = 1; j <= n; j *= 2) 928 m = 2 * j - 1; 929 for (m /= 2; m != 0; m /= 2) { 930 k = n - m; 931 for (j = 1; j <= k; j++) { 932 for (ai = &a[j]; ai > a; ai -= m) { 933 aim = &ai[m]; 934 if (aim < ai) 935 break; /* wraparound */ 936 if (aim->value > ai[0].value || 937 (aim->value == ai[0].value && 938 aim->serial > ai[0].serial)) 939 break; 940 w.value = ai[0].value; 941 ai[0].value = aim->value; 942 aim->value = w.value; 943 w.serial = ai[0].serial; 944 ai[0].serial = aim->serial; 945 aim->serial = w.serial; 946 } 947 } 948 } 949 } 950 951 static void 952 unsort(struct line *f, int l, int *b) 953 { 954 int *a, i; 955 956 a = xcalloc(l + 1, sizeof(*a)); 957 for (i = 1; i <= l; i++) 958 a[f[i].serial] = f[i].value; 959 for (i = 1; i <= l; i++) 960 b[i] = a[i]; 961 free(a); 962 } 963 964 static int 965 skipline(FILE *f) 966 { 967 int i, c; 968 969 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 970 continue; 971 return (i); 972 } 973 974 static void 975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 976 { 977 int m, i0, i1, j0, j1; 978 979 rewind(f1); 980 rewind(f2); 981 m = len[0]; 982 J[0] = 0; 983 J[m + 1] = len[1] + 1; 984 if (diff_format != D_EDIT) { 985 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 986 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 987 i0++; 988 j0 = J[i0 - 1] + 1; 989 i1 = i0 - 1; 990 while (i1 < m && J[i1 + 1] == 0) 991 i1++; 992 j1 = J[i1 + 1] - 1; 993 J[i1] = j1; 994 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 995 } 996 } else { 997 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 998 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 999 i0--; 1000 j0 = J[i0 + 1] - 1; 1001 i1 = i0 + 1; 1002 while (i1 > 1 && J[i1 - 1] == 0) 1003 i1--; 1004 j1 = J[i1 - 1] + 1; 1005 J[i1] = j1; 1006 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1007 } 1008 } 1009 if (m == 0) 1010 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1011 if (diff_format == D_IFDEF || diff_format == D_GFORMAT) { 1012 for (;;) { 1013 #define c i0 1014 if ((c = getc(f1)) == EOF) 1015 return; 1016 diff_output("%c", c); 1017 } 1018 #undef c 1019 } 1020 if (anychange != 0) { 1021 if (diff_format == D_CONTEXT) 1022 dump_context_vec(f1, f2, flags); 1023 else if (diff_format == D_UNIFIED) 1024 dump_unified_vec(f1, f2, flags); 1025 } 1026 } 1027 1028 static void 1029 range(int a, int b, const char *separator) 1030 { 1031 diff_output("%d", a > b ? b : a); 1032 if (a < b) 1033 diff_output("%s%d", separator, b); 1034 } 1035 1036 static void 1037 uni_range(int a, int b) 1038 { 1039 if (a < b) 1040 diff_output("%d,%d", a, b - a + 1); 1041 else if (a == b) 1042 diff_output("%d", b); 1043 else 1044 diff_output("%d,0", b); 1045 } 1046 1047 static char * 1048 preadline(int fd, size_t rlen, off_t off) 1049 { 1050 char *line; 1051 ssize_t nr; 1052 1053 line = xmalloc(rlen + 1); 1054 if ((nr = pread(fd, line, rlen, off)) < 0) 1055 err(2, "preadline"); 1056 if (nr > 0 && line[nr-1] == '\n') 1057 nr--; 1058 line[nr] = '\0'; 1059 return (line); 1060 } 1061 1062 static int 1063 ignoreline(char *line) 1064 { 1065 int ret; 1066 1067 ret = regexec(&ignore_re, line, 0, NULL, 0); 1068 free(line); 1069 return (ret == 0); /* if it matched, it should be ignored. */ 1070 } 1071 1072 /* 1073 * Indicate that there is a difference between lines a and b of the from file 1074 * to get to lines c to d of the to file. If a is greater then b then there 1075 * are no lines in the from file involved and this means that there were 1076 * lines appended (beginning at b). If c is greater than d then there are 1077 * lines missing from the to file. 1078 */ 1079 static void 1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1081 int *pflags) 1082 { 1083 static size_t max_context = 64; 1084 long curpos; 1085 int i, nc, f; 1086 const char *walk; 1087 1088 restart: 1089 if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) && 1090 a > b && c > d) 1091 return; 1092 if (ignore_pats != NULL) { 1093 char *line; 1094 /* 1095 * All lines in the change, insert, or delete must 1096 * match an ignore pattern for the change to be 1097 * ignored. 1098 */ 1099 if (a <= b) { /* Changes and deletes. */ 1100 for (i = a; i <= b; i++) { 1101 line = preadline(fileno(f1), 1102 ixold[i] - ixold[i - 1], ixold[i - 1]); 1103 if (!ignoreline(line)) 1104 goto proceed; 1105 } 1106 } 1107 if (a > b || c <= d) { /* Changes and inserts. */ 1108 for (i = c; i <= d; i++) { 1109 line = preadline(fileno(f2), 1110 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1111 if (!ignoreline(line)) 1112 goto proceed; 1113 } 1114 } 1115 return; 1116 } 1117 proceed: 1118 if (*pflags & D_HEADER && diff_format != D_BRIEF) { 1119 diff_output("%s %s %s\n", diffargs, file1, file2); 1120 *pflags &= ~D_HEADER; 1121 } 1122 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1123 /* 1124 * Allocate change records as needed. 1125 */ 1126 if (context_vec_ptr == context_vec_end - 1) { 1127 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1128 max_context <<= 1; 1129 context_vec_start = xreallocarray(context_vec_start, 1130 max_context, sizeof(*context_vec_start)); 1131 context_vec_end = context_vec_start + max_context; 1132 context_vec_ptr = context_vec_start + offset; 1133 } 1134 if (anychange == 0) { 1135 /* 1136 * Print the context/unidiff header first time through. 1137 */ 1138 print_header(file1, file2); 1139 anychange = 1; 1140 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1141 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1142 /* 1143 * If this change is more than 'diff_context' lines from the 1144 * previous change, dump the record and reset it. 1145 */ 1146 if (diff_format == D_CONTEXT) 1147 dump_context_vec(f1, f2, *pflags); 1148 else 1149 dump_unified_vec(f1, f2, *pflags); 1150 } 1151 context_vec_ptr++; 1152 context_vec_ptr->a = a; 1153 context_vec_ptr->b = b; 1154 context_vec_ptr->c = c; 1155 context_vec_ptr->d = d; 1156 return; 1157 } 1158 if (anychange == 0) 1159 anychange = 1; 1160 switch (diff_format) { 1161 case D_BRIEF: 1162 return; 1163 case D_NORMAL: 1164 case D_EDIT: 1165 range(a, b, ","); 1166 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1167 if (diff_format == D_NORMAL) 1168 range(c, d, ","); 1169 diff_output("\n"); 1170 break; 1171 case D_REVERSE: 1172 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1173 range(a, b, " "); 1174 diff_output("\n"); 1175 break; 1176 case D_NREVERSE: 1177 if (a > b) 1178 diff_output("a%d %d\n", b, d - c + 1); 1179 else { 1180 diff_output("d%d %d\n", a, b - a + 1); 1181 if (!(c > d)) 1182 /* add changed lines */ 1183 diff_output("a%d %d\n", b, d - c + 1); 1184 } 1185 break; 1186 } 1187 if (diff_format == D_GFORMAT) { 1188 curpos = ftell(f1); 1189 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1190 nc = ixold[a > b ? b : a - 1] - curpos; 1191 for (i = 0; i < nc; i++) 1192 diff_output("%c", getc(f1)); 1193 for (walk = group_format; *walk != '\0'; walk++) { 1194 if (*walk == '%') { 1195 walk++; 1196 switch (*walk) { 1197 case '<': 1198 fetch(ixold, a, b, f1, '<', 1, *pflags); 1199 break; 1200 case '>': 1201 fetch(ixnew, c, d, f2, '>', 0, *pflags); 1202 break; 1203 default: 1204 diff_output("%%%c", *walk); 1205 break; 1206 } 1207 continue; 1208 } 1209 diff_output("%c", *walk); 1210 } 1211 } 1212 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1213 fetch(ixold, a, b, f1, '<', 1, *pflags); 1214 if (a <= b && c <= d && diff_format == D_NORMAL) 1215 diff_output("---\n"); 1216 } 1217 f = 0; 1218 if (diff_format != D_GFORMAT) 1219 f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1220 if (f != 0 && diff_format == D_EDIT) { 1221 /* 1222 * A non-zero return value for D_EDIT indicates that the 1223 * last line printed was a bare dot (".") that has been 1224 * escaped as ".." to prevent ed(1) from misinterpreting 1225 * it. We have to add a substitute command to change this 1226 * back and restart where we left off. 1227 */ 1228 diff_output(".\n"); 1229 diff_output("%ds/.//\n", a + f - 1); 1230 b = a + f - 1; 1231 a = b + 1; 1232 c += f; 1233 goto restart; 1234 } 1235 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1236 diff_output(".\n"); 1237 if (inifdef) { 1238 diff_output("#endif /* %s */\n", ifdefname); 1239 inifdef = 0; 1240 } 1241 } 1242 1243 static int 1244 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1245 { 1246 int i, j, c, lastc, col, nc; 1247 int newcol; 1248 1249 /* 1250 * When doing #ifdef's, copy down to current line 1251 * if this is the first file, so that stuff makes it to output. 1252 */ 1253 if ((diff_format == D_IFDEF) && oldfile) { 1254 long curpos = ftell(lb); 1255 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1256 nc = f[a > b ? b : a - 1] - curpos; 1257 for (i = 0; i < nc; i++) 1258 diff_output("%c", getc(lb)); 1259 } 1260 if (a > b) 1261 return (0); 1262 if (diff_format == D_IFDEF) { 1263 if (inifdef) { 1264 diff_output("#else /* %s%s */\n", 1265 oldfile == 1 ? "!" : "", ifdefname); 1266 } else { 1267 if (oldfile) 1268 diff_output("#ifndef %s\n", ifdefname); 1269 else 1270 diff_output("#ifdef %s\n", ifdefname); 1271 } 1272 inifdef = 1 + oldfile; 1273 } 1274 for (i = a; i <= b; i++) { 1275 fseek(lb, f[i - 1], SEEK_SET); 1276 nc = f[i] - f[i - 1]; 1277 if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && 1278 ch != '\0') { 1279 diff_output("%c", ch); 1280 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1281 || diff_format == D_UNIFIED)) 1282 diff_output("\t"); 1283 else if (diff_format != D_UNIFIED) 1284 diff_output(" "); 1285 } 1286 col = 0; 1287 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1288 if ((c = getc(lb)) == EOF) { 1289 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1290 diff_format == D_NREVERSE) 1291 warnx("No newline at end of file"); 1292 else 1293 diff_output("\n\\ No newline at end of " 1294 "file\n"); 1295 return (0); 1296 } 1297 if (c == '\t' && (flags & D_EXPANDTABS)) { 1298 newcol = ((col/tabsize)+1)*tabsize; 1299 do { 1300 diff_output(" "); 1301 } while (++col < newcol); 1302 } else { 1303 if (diff_format == D_EDIT && j == 1 && c == '\n' 1304 && lastc == '.') { 1305 /* 1306 * Don't print a bare "." line 1307 * since that will confuse ed(1). 1308 * Print ".." instead and return, 1309 * giving the caller an offset 1310 * from which to restart. 1311 */ 1312 diff_output(".\n"); 1313 return (i - a + 1); 1314 } 1315 diff_output("%c", c); 1316 col++; 1317 } 1318 } 1319 } 1320 return (0); 1321 } 1322 1323 /* 1324 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1325 */ 1326 static int 1327 readhash(FILE *f, int flags) 1328 { 1329 int i, t, space; 1330 int sum; 1331 1332 sum = 1; 1333 space = 0; 1334 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1335 if (flags & D_IGNORECASE) 1336 for (i = 0; (t = getc(f)) != '\n'; i++) { 1337 if (flags & D_STRIPCR && t == '\r') { 1338 t = getc(f); 1339 if (t == '\n') 1340 break; 1341 ungetc(t, f); 1342 } 1343 if (t == EOF) { 1344 if (i == 0) 1345 return (0); 1346 break; 1347 } 1348 sum = sum * 127 + chrtran[t]; 1349 } 1350 else 1351 for (i = 0; (t = getc(f)) != '\n'; i++) { 1352 if (flags & D_STRIPCR && t == '\r') { 1353 t = getc(f); 1354 if (t == '\n') 1355 break; 1356 ungetc(t, f); 1357 } 1358 if (t == EOF) { 1359 if (i == 0) 1360 return (0); 1361 break; 1362 } 1363 sum = sum * 127 + t; 1364 } 1365 } else { 1366 for (i = 0;;) { 1367 switch (t = getc(f)) { 1368 case '\r': 1369 case '\t': 1370 case '\v': 1371 case '\f': 1372 case ' ': 1373 space++; 1374 continue; 1375 default: 1376 if (space && (flags & D_IGNOREBLANKS) == 0) { 1377 i++; 1378 space = 0; 1379 } 1380 sum = sum * 127 + chrtran[t]; 1381 i++; 1382 continue; 1383 case EOF: 1384 if (i == 0) 1385 return (0); 1386 /* FALLTHROUGH */ 1387 case '\n': 1388 break; 1389 } 1390 break; 1391 } 1392 } 1393 /* 1394 * There is a remote possibility that we end up with a zero sum. 1395 * Zero is used as an EOF marker, so return 1 instead. 1396 */ 1397 return (sum == 0 ? 1 : sum); 1398 } 1399 1400 static int 1401 asciifile(FILE *f) 1402 { 1403 unsigned char buf[BUFSIZ]; 1404 size_t cnt; 1405 1406 if (f == NULL) 1407 return (1); 1408 1409 rewind(f); 1410 cnt = fread(buf, 1, sizeof(buf), f); 1411 return (memchr(buf, '\0', cnt) == NULL); 1412 } 1413 1414 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1415 1416 static char * 1417 match_function(const long *f, int pos, FILE *fp) 1418 { 1419 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1420 size_t nc; 1421 int last = lastline; 1422 const char *state = NULL; 1423 1424 lastline = pos; 1425 while (pos > last) { 1426 fseek(fp, f[pos - 1], SEEK_SET); 1427 nc = f[pos] - f[pos - 1]; 1428 if (nc >= sizeof(buf)) 1429 nc = sizeof(buf) - 1; 1430 nc = fread(buf, 1, nc, fp); 1431 if (nc > 0) { 1432 buf[nc] = '\0'; 1433 buf[strcspn(buf, "\n")] = '\0'; 1434 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1435 if (begins_with(buf, "private:")) { 1436 if (!state) 1437 state = " (private)"; 1438 } else if (begins_with(buf, "protected:")) { 1439 if (!state) 1440 state = " (protected)"; 1441 } else if (begins_with(buf, "public:")) { 1442 if (!state) 1443 state = " (public)"; 1444 } else { 1445 strlcpy(lastbuf, buf, sizeof lastbuf); 1446 if (state) 1447 strlcat(lastbuf, state, 1448 sizeof lastbuf); 1449 lastmatchline = pos; 1450 return lastbuf; 1451 } 1452 } 1453 } 1454 pos--; 1455 } 1456 return lastmatchline > 0 ? lastbuf : NULL; 1457 } 1458 1459 /* dump accumulated "context" diff changes */ 1460 static void 1461 dump_context_vec(FILE *f1, FILE *f2, int flags) 1462 { 1463 struct context_vec *cvp = context_vec_start; 1464 int lowa, upb, lowc, upd, do_output; 1465 int a, b, c, d; 1466 char ch, *f; 1467 1468 if (context_vec_start > context_vec_ptr) 1469 return; 1470 1471 b = d = 0; /* gcc */ 1472 lowa = MAX(1, cvp->a - diff_context); 1473 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1474 lowc = MAX(1, cvp->c - diff_context); 1475 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1476 1477 diff_output("***************"); 1478 if ((flags & D_PROTOTYPE)) { 1479 f = match_function(ixold, lowa-1, f1); 1480 if (f != NULL) 1481 diff_output(" %s", f); 1482 } 1483 diff_output("\n*** "); 1484 range(lowa, upb, ","); 1485 diff_output(" ****\n"); 1486 1487 /* 1488 * Output changes to the "old" file. The first loop suppresses 1489 * output if there were no changes to the "old" file (we'll see 1490 * the "old" lines as context in the "new" list). 1491 */ 1492 do_output = 0; 1493 for (; cvp <= context_vec_ptr; cvp++) 1494 if (cvp->a <= cvp->b) { 1495 cvp = context_vec_start; 1496 do_output++; 1497 break; 1498 } 1499 if (do_output) { 1500 while (cvp <= context_vec_ptr) { 1501 a = cvp->a; 1502 b = cvp->b; 1503 c = cvp->c; 1504 d = cvp->d; 1505 1506 if (a <= b && c <= d) 1507 ch = 'c'; 1508 else 1509 ch = (a <= b) ? 'd' : 'a'; 1510 1511 if (ch == 'a') 1512 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1513 else { 1514 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1515 fetch(ixold, a, b, f1, 1516 ch == 'c' ? '!' : '-', 0, flags); 1517 } 1518 lowa = b + 1; 1519 cvp++; 1520 } 1521 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1522 } 1523 /* output changes to the "new" file */ 1524 diff_output("--- "); 1525 range(lowc, upd, ","); 1526 diff_output(" ----\n"); 1527 1528 do_output = 0; 1529 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1530 if (cvp->c <= cvp->d) { 1531 cvp = context_vec_start; 1532 do_output++; 1533 break; 1534 } 1535 if (do_output) { 1536 while (cvp <= context_vec_ptr) { 1537 a = cvp->a; 1538 b = cvp->b; 1539 c = cvp->c; 1540 d = cvp->d; 1541 1542 if (a <= b && c <= d) 1543 ch = 'c'; 1544 else 1545 ch = (a <= b) ? 'd' : 'a'; 1546 1547 if (ch == 'd') 1548 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1549 else { 1550 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1551 fetch(ixnew, c, d, f2, 1552 ch == 'c' ? '!' : '+', 0, flags); 1553 } 1554 lowc = d + 1; 1555 cvp++; 1556 } 1557 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1558 } 1559 context_vec_ptr = context_vec_start - 1; 1560 } 1561 1562 /* dump accumulated "unified" diff changes */ 1563 static void 1564 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1565 { 1566 struct context_vec *cvp = context_vec_start; 1567 int lowa, upb, lowc, upd; 1568 int a, b, c, d; 1569 char ch, *f; 1570 1571 if (context_vec_start > context_vec_ptr) 1572 return; 1573 1574 b = d = 0; /* gcc */ 1575 lowa = MAX(1, cvp->a - diff_context); 1576 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1577 lowc = MAX(1, cvp->c - diff_context); 1578 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1579 1580 diff_output("@@ -"); 1581 uni_range(lowa, upb); 1582 diff_output(" +"); 1583 uni_range(lowc, upd); 1584 diff_output(" @@"); 1585 if ((flags & D_PROTOTYPE)) { 1586 f = match_function(ixold, lowa-1, f1); 1587 if (f != NULL) 1588 diff_output(" %s", f); 1589 } 1590 diff_output("\n"); 1591 1592 /* 1593 * Output changes in "unified" diff format--the old and new lines 1594 * are printed together. 1595 */ 1596 for (; cvp <= context_vec_ptr; cvp++) { 1597 a = cvp->a; 1598 b = cvp->b; 1599 c = cvp->c; 1600 d = cvp->d; 1601 1602 /* 1603 * c: both new and old changes 1604 * d: only changes in the old file 1605 * a: only changes in the new file 1606 */ 1607 if (a <= b && c <= d) 1608 ch = 'c'; 1609 else 1610 ch = (a <= b) ? 'd' : 'a'; 1611 1612 switch (ch) { 1613 case 'c': 1614 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1615 fetch(ixold, a, b, f1, '-', 0, flags); 1616 fetch(ixnew, c, d, f2, '+', 0, flags); 1617 break; 1618 case 'd': 1619 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1620 fetch(ixold, a, b, f1, '-', 0, flags); 1621 break; 1622 case 'a': 1623 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1624 fetch(ixnew, c, d, f2, '+', 0, flags); 1625 break; 1626 } 1627 lowa = b + 1; 1628 lowc = d + 1; 1629 } 1630 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1631 1632 context_vec_ptr = context_vec_start - 1; 1633 } 1634 1635 static void 1636 print_header(const char *file1, const char *file2) 1637 { 1638 const char *time_format; 1639 char buf1[256]; 1640 char buf2[256]; 1641 char end1[10]; 1642 char end2[10]; 1643 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; 1644 int nsec1 = stb1.st_mtim.tv_nsec; 1645 int nsec2 = stb2.st_mtim.tv_nsec; 1646 1647 time_format = "%Y-%m-%d %H:%M:%S"; 1648 1649 if (cflag) 1650 time_format = "%c"; 1651 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); 1652 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); 1653 strftime(buf1, 256, time_format, tm_ptr1); 1654 strftime(buf2, 256, time_format, tm_ptr2); 1655 if (!cflag) { 1656 strftime(end1, 10, "%z", tm_ptr1); 1657 strftime(end2, 10, "%z", tm_ptr2); 1658 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1659 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1660 } 1661 if (label[0] != NULL) 1662 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1663 label[0]); 1664 else 1665 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1666 file1, buf1); 1667 if (label[1] != NULL) 1668 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1669 label[1]); 1670 else 1671 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1672 file2, buf2); 1673 } 1674