1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <stddef.h> 84 #include <stdint.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <unistd.h> 89 #include <limits.h> 90 #include <signal.h> 91 92 #include "diff.h" 93 #include "xmalloc.h" 94 95 #define _PATH_PR "/usr/bin/pr" 96 97 #ifdef ST_MTIM_NSEC 98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC) 99 #else 100 # define TIMESPEC_NS(timespec) 0 101 #endif 102 103 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) 104 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 105 106 /* 107 * diff - compare two files. 108 */ 109 110 /* 111 * Uses an algorithm due to Harold Stone, which finds 112 * a pair of longest identical subsequences in the two 113 * files. 114 * 115 * The major goal is to generate the match vector J. 116 * J[i] is the index of the line in file1 corresponding 117 * to line i file0. J[i] = 0 if there is no 118 * such line in file1. 119 * 120 * Lines are hashed so as to work in core. All potential 121 * matches are located by sorting the lines of each file 122 * on the hash (called ``value''). In particular, this 123 * collects the equivalence classes in file1 together. 124 * Subroutine equiv replaces the value of each line in 125 * file0 by the index of the first element of its 126 * matching equivalence in (the reordered) file1. 127 * To save space equiv squeezes file1 into a single 128 * array member in which the equivalence classes 129 * are simply concatenated, except that their first 130 * members are flagged by changing sign. 131 * 132 * Next the indices that point into member are unsorted into 133 * array class according to the original order of file0. 134 * 135 * The cleverness lies in routine stone. This marches 136 * through the lines of file0, developing a vector klist 137 * of "k-candidates". At step i a k-candidate is a matched 138 * pair of lines x,y (x in file0 y in file1) such that 139 * there is a common subsequence of length k 140 * between the first i lines of file0 and the first y 141 * lines of file1, but there is no such subsequence for 142 * any smaller y. x is the earliest possible mate to y 143 * that occurs in such a subsequence. 144 * 145 * Whenever any of the members of the equivalence class of 146 * lines in file1 matable to a line in file0 has serial number 147 * less than the y of some k-candidate, that k-candidate 148 * with the smallest such y is replaced. The new 149 * k-candidate is chained (via pred) to the current 150 * k-1 candidate so that the actual subsequence can 151 * be recovered. When a member has serial number greater 152 * that the y of all k-candidates, the klist is extended. 153 * At the end, the longest subsequence is pulled out 154 * and placed in the array J by unravel 155 * 156 * With J in hand, the matches there recorded are 157 * check'ed against reality to assure that no spurious 158 * matches have crept in due to hashing. If they have, 159 * they are broken, and "jackpot" is recorded--a harmless 160 * matter except that a true match for a spuriously 161 * mated line may now be unnecessarily reported as a change. 162 * 163 * Much of the complexity of the program comes simply 164 * from trying to minimize core utilization and 165 * maximize the range of doable problems by dynamically 166 * allocating what is needed and reusing what is not. 167 * The core requirements for problems larger than somewhat 168 * are (in words) 2*length(file0) + length(file1) + 169 * 3*(number of k-candidates installed), typically about 170 * 6n words for files of length n. 171 */ 172 173 struct cand { 174 int x; 175 int y; 176 int pred; 177 }; 178 179 static struct line { 180 int serial; 181 int value; 182 } *file[2]; 183 184 /* 185 * The following struct is used to record change information when 186 * doing a "context" or "unified" diff. (see routine "change" to 187 * understand the highly mnemonic field names) 188 */ 189 struct context_vec { 190 int a; /* start line in old file */ 191 int b; /* end line in old file */ 192 int c; /* start line in new file */ 193 int d; /* end line in new file */ 194 }; 195 196 #define diff_output printf 197 static FILE *opentemp(const char *); 198 static void output(char *, FILE *, char *, FILE *, int); 199 static void check(FILE *, FILE *, int); 200 static void range(int, int, const char *); 201 static void uni_range(int, int); 202 static void dump_context_vec(FILE *, FILE *, int); 203 static void dump_unified_vec(FILE *, FILE *, int); 204 static void prepare(int, FILE *, size_t, int); 205 static void prune(void); 206 static void equiv(struct line *, int, struct line *, int, int *); 207 static void unravel(int); 208 static void unsort(struct line *, int, int *); 209 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 210 static void sort(struct line *, int); 211 static void print_header(const char *, const char *); 212 static int ignoreline(char *); 213 static int asciifile(FILE *); 214 static int fetch(long *, int, int, FILE *, int, int, int); 215 static int newcand(int, int, int); 216 static int search(int *, int, int); 217 static int skipline(FILE *); 218 static int isqrt(int); 219 static int stone(int *, int, int *, int *, int); 220 static int readhash(FILE *, int); 221 static int files_differ(FILE *, FILE *, int); 222 static char *match_function(const long *, int, FILE *); 223 static char *preadline(int, size_t, off_t); 224 225 static int *J; /* will be overlaid on class */ 226 static int *class; /* will be overlaid on file[0] */ 227 static int *klist; /* will be overlaid on file[0] after class */ 228 static int *member; /* will be overlaid on file[1] */ 229 static int clen; 230 static int inifdef; /* whether or not we are in a #ifdef block */ 231 static int len[2]; 232 static int pref, suff; /* length of prefix and suffix */ 233 static int slen[2]; 234 static int anychange; 235 static long *ixnew; /* will be overlaid on file[1] */ 236 static long *ixold; /* will be overlaid on klist */ 237 static struct cand *clist; /* merely a free storage pot for candidates */ 238 static int clistlen; /* the length of clist */ 239 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 240 static u_char *chrtran; /* translation table for case-folding */ 241 static struct context_vec *context_vec_start; 242 static struct context_vec *context_vec_end; 243 static struct context_vec *context_vec_ptr; 244 245 #define FUNCTION_CONTEXT_SIZE 55 246 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 247 static int lastline; 248 static int lastmatchline; 249 250 251 /* 252 * chrtran points to one of 2 translation tables: cup2low if folding upper to 253 * lower case clow2low if not folding case 254 */ 255 static u_char clow2low[256] = { 256 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 257 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 258 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 259 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 260 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 261 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 262 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 263 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 264 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 265 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 266 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 267 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 268 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 269 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 270 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 271 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 272 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 273 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 274 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 275 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 276 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 277 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 278 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 279 0xfd, 0xfe, 0xff 280 }; 281 282 static u_char cup2low[256] = { 283 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 284 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 285 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 286 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 287 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 288 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 289 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 290 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 291 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 292 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 293 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 294 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 295 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 296 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 297 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 298 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 299 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 300 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 301 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 302 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 303 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 304 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 305 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 306 0xfd, 0xfe, 0xff 307 }; 308 309 int 310 diffreg(char *file1, char *file2, int flags, int capsicum) 311 { 312 FILE *f1, *f2; 313 int i, rval; 314 int ostdout = -1; 315 int pr_pd, kq; 316 struct kevent *e; 317 cap_rights_t rights_ro; 318 319 e = NULL; 320 kq = -1; 321 f1 = f2 = NULL; 322 rval = D_SAME; 323 anychange = 0; 324 lastline = 0; 325 lastmatchline = 0; 326 context_vec_ptr = context_vec_start - 1; 327 if (flags & D_IGNORECASE) 328 chrtran = cup2low; 329 else 330 chrtran = clow2low; 331 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 332 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 333 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 334 goto closem; 335 336 if (flags & D_EMPTY1) 337 f1 = fopen(_PATH_DEVNULL, "r"); 338 else { 339 if (!S_ISREG(stb1.st_mode)) { 340 if ((f1 = opentemp(file1)) == NULL || 341 fstat(fileno(f1), &stb1) < 0) { 342 warn("%s", file1); 343 status |= 2; 344 goto closem; 345 } 346 } else if (strcmp(file1, "-") == 0) 347 f1 = stdin; 348 else 349 f1 = fopen(file1, "r"); 350 } 351 if (f1 == NULL) { 352 warn("%s", file1); 353 status |= 2; 354 goto closem; 355 } 356 357 if (flags & D_EMPTY2) 358 f2 = fopen(_PATH_DEVNULL, "r"); 359 else { 360 if (!S_ISREG(stb2.st_mode)) { 361 if ((f2 = opentemp(file2)) == NULL || 362 fstat(fileno(f2), &stb2) < 0) { 363 warn("%s", file2); 364 status |= 2; 365 goto closem; 366 } 367 } else if (strcmp(file2, "-") == 0) 368 f2 = stdin; 369 else 370 f2 = fopen(file2, "r"); 371 } 372 if (f2 == NULL) { 373 warn("%s", file2); 374 status |= 2; 375 goto closem; 376 } 377 378 if (lflag) { 379 /* redirect stdout to pr */ 380 int pfd[2]; 381 pid_t pid; 382 char *header; 383 384 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 385 signal(SIGPIPE, SIG_IGN); 386 fflush(stdout); 387 rewind(stdout); 388 pipe(pfd); 389 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 390 case -1: 391 status |= 2; 392 free(header); 393 err(2, "No more processes"); 394 case 0: 395 /* child */ 396 if (pfd[0] != STDIN_FILENO) { 397 dup2(pfd[0], STDIN_FILENO); 398 close(pfd[0]); 399 } 400 close(pfd[1]); 401 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 402 _exit(127); 403 default: 404 405 /* parent */ 406 if (pfd[1] != STDOUT_FILENO) { 407 ostdout = dup(STDOUT_FILENO); 408 dup2(pfd[1], STDOUT_FILENO); 409 close(pfd[1]); 410 } 411 close(pfd[0]); 412 rewind(stdout); 413 free(header); 414 kq = kqueue(); 415 if (kq == -1) 416 err(2, "kqueue"); 417 e = xmalloc(sizeof(struct kevent)); 418 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 419 NULL); 420 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 421 err(2, "kevent"); 422 } 423 } 424 425 if (capsicum) { 426 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 427 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 428 && errno != ENOSYS) 429 err(2, "unable to limit rights on: %s", file1); 430 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 431 errno != ENOSYS) 432 err(2, "unable to limit rights on: %s", file2); 433 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 434 /* stding has already been limited */ 435 if (caph_limit_stderr() == -1) 436 err(2, "unable to limit stderr"); 437 if (caph_limit_stdout() == -1) 438 err(2, "unable to limit stdout"); 439 } else if (caph_limit_stdio() == -1) 440 err(2, "unable to limit stdio"); 441 442 caph_cache_catpages(); 443 caph_cache_tzdata(); 444 if (cap_enter() < 0 && errno != ENOSYS) 445 err(2, "unable to enter capability mode"); 446 } 447 448 switch (files_differ(f1, f2, flags)) { 449 case 0: 450 goto closem; 451 case 1: 452 break; 453 default: 454 /* error */ 455 status |= 2; 456 goto closem; 457 } 458 459 if ((flags & D_FORCEASCII) == 0 && 460 (!asciifile(f1) || !asciifile(f2))) { 461 rval = D_BINARY; 462 status |= 1; 463 goto closem; 464 } 465 prepare(0, f1, stb1.st_size, flags); 466 prepare(1, f2, stb2.st_size, flags); 467 468 prune(); 469 sort(sfile[0], slen[0]); 470 sort(sfile[1], slen[1]); 471 472 member = (int *)file[1]; 473 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 474 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 475 476 class = (int *)file[0]; 477 unsort(sfile[0], slen[0], class); 478 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 479 480 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 481 clen = 0; 482 clistlen = 100; 483 clist = xcalloc(clistlen, sizeof(*clist)); 484 i = stone(class, slen[0], member, klist, flags); 485 free(member); 486 free(class); 487 488 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 489 unravel(klist[i]); 490 free(clist); 491 free(klist); 492 493 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 494 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 495 check(f1, f2, flags); 496 output(file1, f1, file2, f2, flags); 497 if (ostdout != -1 && e != NULL) { 498 /* close the pipe to pr and restore stdout */ 499 int wstatus; 500 501 fflush(stdout); 502 if (ostdout != STDOUT_FILENO) { 503 close(STDOUT_FILENO); 504 dup2(ostdout, STDOUT_FILENO); 505 close(ostdout); 506 } 507 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 508 err(2, "kevent"); 509 wstatus = e[0].data; 510 close(kq); 511 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 512 errx(2, "pr exited abnormally"); 513 else if (WIFSIGNALED(wstatus)) 514 errx(2, "pr killed by signal %d", 515 WTERMSIG(wstatus)); 516 } 517 518 closem: 519 if (anychange) { 520 status |= 1; 521 if (rval == D_SAME) 522 rval = D_DIFFER; 523 } 524 if (f1 != NULL) 525 fclose(f1); 526 if (f2 != NULL) 527 fclose(f2); 528 529 return (rval); 530 } 531 532 /* 533 * Check to see if the given files differ. 534 * Returns 0 if they are the same, 1 if different, and -1 on error. 535 * XXX - could use code from cmp(1) [faster] 536 */ 537 static int 538 files_differ(FILE *f1, FILE *f2, int flags) 539 { 540 char buf1[BUFSIZ], buf2[BUFSIZ]; 541 size_t i, j; 542 543 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 544 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 545 return (1); 546 for (;;) { 547 i = fread(buf1, 1, sizeof(buf1), f1); 548 j = fread(buf2, 1, sizeof(buf2), f2); 549 if ((!i && ferror(f1)) || (!j && ferror(f2))) 550 return (-1); 551 if (i != j) 552 return (1); 553 if (i == 0) 554 return (0); 555 if (memcmp(buf1, buf2, i) != 0) 556 return (1); 557 } 558 } 559 560 static FILE * 561 opentemp(const char *f) 562 { 563 char buf[BUFSIZ], tempfile[PATH_MAX]; 564 ssize_t nread; 565 int ifd, ofd; 566 567 if (strcmp(f, "-") == 0) 568 ifd = STDIN_FILENO; 569 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 570 return (NULL); 571 572 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 573 574 if ((ofd = mkstemp(tempfile)) < 0) { 575 close(ifd); 576 return (NULL); 577 } 578 unlink(tempfile); 579 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 580 if (write(ofd, buf, nread) != nread) { 581 close(ifd); 582 close(ofd); 583 return (NULL); 584 } 585 } 586 close(ifd); 587 lseek(ofd, (off_t)0, SEEK_SET); 588 return (fdopen(ofd, "r")); 589 } 590 591 char * 592 splice(char *dir, char *path) 593 { 594 char *tail, *buf; 595 size_t dirlen; 596 597 dirlen = strlen(dir); 598 while (dirlen != 0 && dir[dirlen - 1] == '/') 599 dirlen--; 600 if ((tail = strrchr(path, '/')) == NULL) 601 tail = path; 602 else 603 tail++; 604 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 605 return (buf); 606 } 607 608 static void 609 prepare(int i, FILE *fd, size_t filesize, int flags) 610 { 611 struct line *p; 612 int h; 613 size_t sz, j; 614 615 rewind(fd); 616 617 sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; 618 if (sz < 100) 619 sz = 100; 620 621 p = xcalloc(sz + 3, sizeof(*p)); 622 for (j = 0; (h = readhash(fd, flags));) { 623 if (j == sz) { 624 sz = sz * 3 / 2; 625 p = xreallocarray(p, sz + 3, sizeof(*p)); 626 } 627 p[++j].value = h; 628 } 629 len[i] = j; 630 file[i] = p; 631 } 632 633 static void 634 prune(void) 635 { 636 int i, j; 637 638 for (pref = 0; pref < len[0] && pref < len[1] && 639 file[0][pref + 1].value == file[1][pref + 1].value; 640 pref++) 641 ; 642 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 643 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 644 suff++) 645 ; 646 for (j = 0; j < 2; j++) { 647 sfile[j] = file[j] + pref; 648 slen[j] = len[j] - pref - suff; 649 for (i = 0; i <= slen[j]; i++) 650 sfile[j][i].serial = i; 651 } 652 } 653 654 static void 655 equiv(struct line *a, int n, struct line *b, int m, int *c) 656 { 657 int i, j; 658 659 i = j = 1; 660 while (i <= n && j <= m) { 661 if (a[i].value < b[j].value) 662 a[i++].value = 0; 663 else if (a[i].value == b[j].value) 664 a[i++].value = j; 665 else 666 j++; 667 } 668 while (i <= n) 669 a[i++].value = 0; 670 b[m + 1].value = 0; 671 j = 0; 672 while (++j <= m) { 673 c[j] = -b[j].serial; 674 while (b[j + 1].value == b[j].value) { 675 j++; 676 c[j] = b[j].serial; 677 } 678 } 679 c[j] = -1; 680 } 681 682 /* Code taken from ping.c */ 683 static int 684 isqrt(int n) 685 { 686 int y, x = 1; 687 688 if (n == 0) 689 return (0); 690 691 do { /* newton was a stinker */ 692 y = x; 693 x = n / x; 694 x += y; 695 x /= 2; 696 } while ((x - y) > 1 || (x - y) < -1); 697 698 return (x); 699 } 700 701 static int 702 stone(int *a, int n, int *b, int *c, int flags) 703 { 704 int i, k, y, j, l; 705 int oldc, tc, oldl, sq; 706 u_int numtries, bound; 707 708 if (flags & D_MINIMAL) 709 bound = UINT_MAX; 710 else { 711 sq = isqrt(n); 712 bound = MAXIMUM(256, sq); 713 } 714 715 k = 0; 716 c[0] = newcand(0, 0, 0); 717 for (i = 1; i <= n; i++) { 718 j = a[i]; 719 if (j == 0) 720 continue; 721 y = -b[j]; 722 oldl = 0; 723 oldc = c[0]; 724 numtries = 0; 725 do { 726 if (y <= clist[oldc].y) 727 continue; 728 l = search(c, k, y); 729 if (l != oldl + 1) 730 oldc = c[l - 1]; 731 if (l <= k) { 732 if (clist[c[l]].y <= y) 733 continue; 734 tc = c[l]; 735 c[l] = newcand(i, y, oldc); 736 oldc = tc; 737 oldl = l; 738 numtries++; 739 } else { 740 c[l] = newcand(i, y, oldc); 741 k++; 742 break; 743 } 744 } while ((y = b[++j]) > 0 && numtries < bound); 745 } 746 return (k); 747 } 748 749 static int 750 newcand(int x, int y, int pred) 751 { 752 struct cand *q; 753 754 if (clen == clistlen) { 755 clistlen = clistlen * 11 / 10; 756 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 757 } 758 q = clist + clen; 759 q->x = x; 760 q->y = y; 761 q->pred = pred; 762 return (clen++); 763 } 764 765 static int 766 search(int *c, int k, int y) 767 { 768 int i, j, l, t; 769 770 if (clist[c[k]].y < y) /* quick look for typical case */ 771 return (k + 1); 772 i = 0; 773 j = k + 1; 774 for (;;) { 775 l = (i + j) / 2; 776 if (l <= i) 777 break; 778 t = clist[c[l]].y; 779 if (t > y) 780 j = l; 781 else if (t < y) 782 i = l; 783 else 784 return (l); 785 } 786 return (l + 1); 787 } 788 789 static void 790 unravel(int p) 791 { 792 struct cand *q; 793 int i; 794 795 for (i = 0; i <= len[0]; i++) 796 J[i] = i <= pref ? i : 797 i > len[0] - suff ? i + len[1] - len[0] : 0; 798 for (q = clist + p; q->y != 0; q = clist + q->pred) 799 J[q->x + pref] = q->y + pref; 800 } 801 802 /* 803 * Check does double duty: 804 * 1. ferret out any fortuitous correspondences due 805 * to confounding by hashing (which result in "jackpot") 806 * 2. collect random access indexes to the two files 807 */ 808 static void 809 check(FILE *f1, FILE *f2, int flags) 810 { 811 int i, j, jackpot, c, d; 812 long ctold, ctnew; 813 814 rewind(f1); 815 rewind(f2); 816 j = 1; 817 ixold[0] = ixnew[0] = 0; 818 jackpot = 0; 819 ctold = ctnew = 0; 820 for (i = 1; i <= len[0]; i++) { 821 if (J[i] == 0) { 822 ixold[i] = ctold += skipline(f1); 823 continue; 824 } 825 while (j < J[i]) { 826 ixnew[j] = ctnew += skipline(f2); 827 j++; 828 } 829 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 830 for (;;) { 831 c = getc(f1); 832 d = getc(f2); 833 /* 834 * GNU diff ignores a missing newline 835 * in one file for -b or -w. 836 */ 837 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 838 if (c == EOF && d == '\n') { 839 ctnew++; 840 break; 841 } else if (c == '\n' && d == EOF) { 842 ctold++; 843 break; 844 } 845 } 846 ctold++; 847 ctnew++; 848 if (flags & D_STRIPCR) { 849 if (c == '\r') { 850 if ((c = getc(f1)) == '\n') { 851 ctnew++; 852 break; 853 } 854 } 855 if (d == '\r') { 856 if ((d = getc(f2)) == '\n') { 857 ctold++; 858 break; 859 } 860 } 861 } 862 if ((flags & D_FOLDBLANKS) && isspace(c) && 863 isspace(d)) { 864 do { 865 if (c == '\n') 866 break; 867 ctold++; 868 } while (isspace(c = getc(f1))); 869 do { 870 if (d == '\n') 871 break; 872 ctnew++; 873 } while (isspace(d = getc(f2))); 874 } else if ((flags & D_IGNOREBLANKS)) { 875 while (isspace(c) && c != '\n') { 876 c = getc(f1); 877 ctold++; 878 } 879 while (isspace(d) && d != '\n') { 880 d = getc(f2); 881 ctnew++; 882 } 883 } 884 if (chrtran[c] != chrtran[d]) { 885 jackpot++; 886 J[i] = 0; 887 if (c != '\n' && c != EOF) 888 ctold += skipline(f1); 889 if (d != '\n' && c != EOF) 890 ctnew += skipline(f2); 891 break; 892 } 893 if (c == '\n' || c == EOF) 894 break; 895 } 896 } else { 897 for (;;) { 898 ctold++; 899 ctnew++; 900 if ((c = getc(f1)) != (d = getc(f2))) { 901 /* jackpot++; */ 902 J[i] = 0; 903 if (c != '\n' && c != EOF) 904 ctold += skipline(f1); 905 if (d != '\n' && c != EOF) 906 ctnew += skipline(f2); 907 break; 908 } 909 if (c == '\n' || c == EOF) 910 break; 911 } 912 } 913 ixold[i] = ctold; 914 ixnew[j] = ctnew; 915 j++; 916 } 917 for (; j <= len[1]; j++) { 918 ixnew[j] = ctnew += skipline(f2); 919 } 920 /* 921 * if (jackpot) 922 * fprintf(stderr, "jackpot\n"); 923 */ 924 } 925 926 /* shellsort CACM #201 */ 927 static void 928 sort(struct line *a, int n) 929 { 930 struct line *ai, *aim, w; 931 int j, m = 0, k; 932 933 if (n == 0) 934 return; 935 for (j = 1; j <= n; j *= 2) 936 m = 2 * j - 1; 937 for (m /= 2; m != 0; m /= 2) { 938 k = n - m; 939 for (j = 1; j <= k; j++) { 940 for (ai = &a[j]; ai > a; ai -= m) { 941 aim = &ai[m]; 942 if (aim < ai) 943 break; /* wraparound */ 944 if (aim->value > ai[0].value || 945 (aim->value == ai[0].value && 946 aim->serial > ai[0].serial)) 947 break; 948 w.value = ai[0].value; 949 ai[0].value = aim->value; 950 aim->value = w.value; 951 w.serial = ai[0].serial; 952 ai[0].serial = aim->serial; 953 aim->serial = w.serial; 954 } 955 } 956 } 957 } 958 959 static void 960 unsort(struct line *f, int l, int *b) 961 { 962 int *a, i; 963 964 a = xcalloc(l + 1, sizeof(*a)); 965 for (i = 1; i <= l; i++) 966 a[f[i].serial] = f[i].value; 967 for (i = 1; i <= l; i++) 968 b[i] = a[i]; 969 free(a); 970 } 971 972 static int 973 skipline(FILE *f) 974 { 975 int i, c; 976 977 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 978 continue; 979 return (i); 980 } 981 982 static void 983 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 984 { 985 int m, i0, i1, j0, j1; 986 987 rewind(f1); 988 rewind(f2); 989 m = len[0]; 990 J[0] = 0; 991 J[m + 1] = len[1] + 1; 992 if (diff_format != D_EDIT) { 993 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 994 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 995 i0++; 996 j0 = J[i0 - 1] + 1; 997 i1 = i0 - 1; 998 while (i1 < m && J[i1 + 1] == 0) 999 i1++; 1000 j1 = J[i1 + 1] - 1; 1001 J[i1] = j1; 1002 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 1003 } 1004 } else { 1005 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 1006 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 1007 i0--; 1008 j0 = J[i0 + 1] - 1; 1009 i1 = i0 + 1; 1010 while (i1 > 1 && J[i1 - 1] == 0) 1011 i1--; 1012 j1 = J[i1 - 1] + 1; 1013 J[i1] = j1; 1014 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1015 } 1016 } 1017 if (m == 0) 1018 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1019 if (diff_format == D_IFDEF) { 1020 for (;;) { 1021 #define c i0 1022 if ((c = getc(f1)) == EOF) 1023 return; 1024 diff_output("%c", c); 1025 } 1026 #undef c 1027 } 1028 if (anychange != 0) { 1029 if (diff_format == D_CONTEXT) 1030 dump_context_vec(f1, f2, flags); 1031 else if (diff_format == D_UNIFIED) 1032 dump_unified_vec(f1, f2, flags); 1033 } 1034 } 1035 1036 static void 1037 range(int a, int b, const char *separator) 1038 { 1039 diff_output("%d", a > b ? b : a); 1040 if (a < b) 1041 diff_output("%s%d", separator, b); 1042 } 1043 1044 static void 1045 uni_range(int a, int b) 1046 { 1047 if (a < b) 1048 diff_output("%d,%d", a, b - a + 1); 1049 else if (a == b) 1050 diff_output("%d", b); 1051 else 1052 diff_output("%d,0", b); 1053 } 1054 1055 static char * 1056 preadline(int fd, size_t rlen, off_t off) 1057 { 1058 char *line; 1059 ssize_t nr; 1060 1061 line = xmalloc(rlen + 1); 1062 if ((nr = pread(fd, line, rlen, off)) < 0) 1063 err(2, "preadline"); 1064 if (nr > 0 && line[nr-1] == '\n') 1065 nr--; 1066 line[nr] = '\0'; 1067 return (line); 1068 } 1069 1070 static int 1071 ignoreline(char *line) 1072 { 1073 int ret; 1074 1075 ret = regexec(&ignore_re, line, 0, NULL, 0); 1076 free(line); 1077 return (ret == 0); /* if it matched, it should be ignored. */ 1078 } 1079 1080 /* 1081 * Indicate that there is a difference between lines a and b of the from file 1082 * to get to lines c to d of the to file. If a is greater then b then there 1083 * are no lines in the from file involved and this means that there were 1084 * lines appended (beginning at b). If c is greater than d then there are 1085 * lines missing from the to file. 1086 */ 1087 static void 1088 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1089 int *pflags) 1090 { 1091 static size_t max_context = 64; 1092 int i; 1093 1094 restart: 1095 if (diff_format != D_IFDEF && a > b && c > d) 1096 return; 1097 if (ignore_pats != NULL) { 1098 char *line; 1099 /* 1100 * All lines in the change, insert, or delete must 1101 * match an ignore pattern for the change to be 1102 * ignored. 1103 */ 1104 if (a <= b) { /* Changes and deletes. */ 1105 for (i = a; i <= b; i++) { 1106 line = preadline(fileno(f1), 1107 ixold[i] - ixold[i - 1], ixold[i - 1]); 1108 if (!ignoreline(line)) 1109 goto proceed; 1110 } 1111 } 1112 if (a > b || c <= d) { /* Changes and inserts. */ 1113 for (i = c; i <= d; i++) { 1114 line = preadline(fileno(f2), 1115 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1116 if (!ignoreline(line)) 1117 goto proceed; 1118 } 1119 } 1120 return; 1121 } 1122 proceed: 1123 if (*pflags & D_HEADER) { 1124 diff_output("%s %s %s\n", diffargs, file1, file2); 1125 *pflags &= ~D_HEADER; 1126 } 1127 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1128 /* 1129 * Allocate change records as needed. 1130 */ 1131 if (context_vec_ptr == context_vec_end - 1) { 1132 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1133 max_context <<= 1; 1134 context_vec_start = xreallocarray(context_vec_start, 1135 max_context, sizeof(*context_vec_start)); 1136 context_vec_end = context_vec_start + max_context; 1137 context_vec_ptr = context_vec_start + offset; 1138 } 1139 if (anychange == 0) { 1140 /* 1141 * Print the context/unidiff header first time through. 1142 */ 1143 print_header(file1, file2); 1144 anychange = 1; 1145 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1146 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1147 /* 1148 * If this change is more than 'diff_context' lines from the 1149 * previous change, dump the record and reset it. 1150 */ 1151 if (diff_format == D_CONTEXT) 1152 dump_context_vec(f1, f2, *pflags); 1153 else 1154 dump_unified_vec(f1, f2, *pflags); 1155 } 1156 context_vec_ptr++; 1157 context_vec_ptr->a = a; 1158 context_vec_ptr->b = b; 1159 context_vec_ptr->c = c; 1160 context_vec_ptr->d = d; 1161 return; 1162 } 1163 if (anychange == 0) 1164 anychange = 1; 1165 switch (diff_format) { 1166 case D_BRIEF: 1167 return; 1168 case D_NORMAL: 1169 case D_EDIT: 1170 range(a, b, ","); 1171 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1172 if (diff_format == D_NORMAL) 1173 range(c, d, ","); 1174 diff_output("\n"); 1175 break; 1176 case D_REVERSE: 1177 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1178 range(a, b, " "); 1179 diff_output("\n"); 1180 break; 1181 case D_NREVERSE: 1182 if (a > b) 1183 diff_output("a%d %d\n", b, d - c + 1); 1184 else { 1185 diff_output("d%d %d\n", a, b - a + 1); 1186 if (!(c > d)) 1187 /* add changed lines */ 1188 diff_output("a%d %d\n", b, d - c + 1); 1189 } 1190 break; 1191 } 1192 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1193 fetch(ixold, a, b, f1, '<', 1, *pflags); 1194 if (a <= b && c <= d && diff_format == D_NORMAL) 1195 diff_output("---\n"); 1196 } 1197 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1198 if (i != 0 && diff_format == D_EDIT) { 1199 /* 1200 * A non-zero return value for D_EDIT indicates that the 1201 * last line printed was a bare dot (".") that has been 1202 * escaped as ".." to prevent ed(1) from misinterpreting 1203 * it. We have to add a substitute command to change this 1204 * back and restart where we left off. 1205 */ 1206 diff_output(".\n"); 1207 diff_output("%ds/.//\n", a + i - 1); 1208 b = a + i - 1; 1209 a = b + 1; 1210 c += i; 1211 goto restart; 1212 } 1213 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1214 diff_output(".\n"); 1215 if (inifdef) { 1216 diff_output("#endif /* %s */\n", ifdefname); 1217 inifdef = 0; 1218 } 1219 } 1220 1221 static int 1222 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1223 { 1224 int i, j, c, lastc, col, nc; 1225 int newcol; 1226 1227 /* 1228 * When doing #ifdef's, copy down to current line 1229 * if this is the first file, so that stuff makes it to output. 1230 */ 1231 if (diff_format == D_IFDEF && oldfile) { 1232 long curpos = ftell(lb); 1233 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1234 nc = f[a > b ? b : a - 1] - curpos; 1235 for (i = 0; i < nc; i++) 1236 diff_output("%c", getc(lb)); 1237 } 1238 if (a > b) 1239 return (0); 1240 if (diff_format == D_IFDEF) { 1241 if (inifdef) { 1242 diff_output("#else /* %s%s */\n", 1243 oldfile == 1 ? "!" : "", ifdefname); 1244 } else { 1245 if (oldfile) 1246 diff_output("#ifndef %s\n", ifdefname); 1247 else 1248 diff_output("#ifdef %s\n", ifdefname); 1249 } 1250 inifdef = 1 + oldfile; 1251 } 1252 for (i = a; i <= b; i++) { 1253 fseek(lb, f[i - 1], SEEK_SET); 1254 nc = f[i] - f[i - 1]; 1255 if (diff_format != D_IFDEF && ch != '\0') { 1256 diff_output("%c", ch); 1257 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1258 || diff_format == D_UNIFIED)) 1259 diff_output("\t"); 1260 else if (diff_format != D_UNIFIED) 1261 diff_output(" "); 1262 } 1263 col = 0; 1264 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1265 if ((c = getc(lb)) == EOF) { 1266 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1267 diff_format == D_NREVERSE) 1268 warnx("No newline at end of file"); 1269 else 1270 diff_output("\n\\ No newline at end of " 1271 "file\n"); 1272 return (0); 1273 } 1274 if (c == '\t' && (flags & D_EXPANDTABS)) { 1275 newcol = ((col/tabsize)+1)*tabsize; 1276 do { 1277 diff_output(" "); 1278 } while (++col < newcol); 1279 } else { 1280 if (diff_format == D_EDIT && j == 1 && c == '\n' 1281 && lastc == '.') { 1282 /* 1283 * Don't print a bare "." line 1284 * since that will confuse ed(1). 1285 * Print ".." instead and return, 1286 * giving the caller an offset 1287 * from which to restart. 1288 */ 1289 diff_output(".\n"); 1290 return (i - a + 1); 1291 } 1292 diff_output("%c", c); 1293 col++; 1294 } 1295 } 1296 } 1297 return (0); 1298 } 1299 1300 /* 1301 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1302 */ 1303 static int 1304 readhash(FILE *f, int flags) 1305 { 1306 int i, t, space; 1307 int sum; 1308 1309 sum = 1; 1310 space = 0; 1311 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1312 if (flags & D_IGNORECASE) 1313 for (i = 0; (t = getc(f)) != '\n'; i++) { 1314 if (flags & D_STRIPCR && t == '\r') { 1315 t = getc(f); 1316 if (t == '\n') 1317 break; 1318 ungetc(t, f); 1319 } 1320 if (t == EOF) { 1321 if (i == 0) 1322 return (0); 1323 break; 1324 } 1325 sum = sum * 127 + chrtran[t]; 1326 } 1327 else 1328 for (i = 0; (t = getc(f)) != '\n'; i++) { 1329 if (flags & D_STRIPCR && t == '\r') { 1330 t = getc(f); 1331 if (t == '\n') 1332 break; 1333 ungetc(t, f); 1334 } 1335 if (t == EOF) { 1336 if (i == 0) 1337 return (0); 1338 break; 1339 } 1340 sum = sum * 127 + t; 1341 } 1342 } else { 1343 for (i = 0;;) { 1344 switch (t = getc(f)) { 1345 case '\r': 1346 case '\t': 1347 case '\v': 1348 case '\f': 1349 case ' ': 1350 space++; 1351 continue; 1352 default: 1353 if (space && (flags & D_IGNOREBLANKS) == 0) { 1354 i++; 1355 space = 0; 1356 } 1357 sum = sum * 127 + chrtran[t]; 1358 i++; 1359 continue; 1360 case EOF: 1361 if (i == 0) 1362 return (0); 1363 /* FALLTHROUGH */ 1364 case '\n': 1365 break; 1366 } 1367 break; 1368 } 1369 } 1370 /* 1371 * There is a remote possibility that we end up with a zero sum. 1372 * Zero is used as an EOF marker, so return 1 instead. 1373 */ 1374 return (sum == 0 ? 1 : sum); 1375 } 1376 1377 static int 1378 asciifile(FILE *f) 1379 { 1380 unsigned char buf[BUFSIZ]; 1381 size_t cnt; 1382 1383 if (f == NULL) 1384 return (1); 1385 1386 rewind(f); 1387 cnt = fread(buf, 1, sizeof(buf), f); 1388 return (memchr(buf, '\0', cnt) == NULL); 1389 } 1390 1391 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1392 1393 static char * 1394 match_function(const long *f, int pos, FILE *fp) 1395 { 1396 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1397 size_t nc; 1398 int last = lastline; 1399 const char *state = NULL; 1400 1401 lastline = pos; 1402 while (pos > last) { 1403 fseek(fp, f[pos - 1], SEEK_SET); 1404 nc = f[pos] - f[pos - 1]; 1405 if (nc >= sizeof(buf)) 1406 nc = sizeof(buf) - 1; 1407 nc = fread(buf, 1, nc, fp); 1408 if (nc > 0) { 1409 buf[nc] = '\0'; 1410 buf[strcspn(buf, "\n")] = '\0'; 1411 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1412 if (begins_with(buf, "private:")) { 1413 if (!state) 1414 state = " (private)"; 1415 } else if (begins_with(buf, "protected:")) { 1416 if (!state) 1417 state = " (protected)"; 1418 } else if (begins_with(buf, "public:")) { 1419 if (!state) 1420 state = " (public)"; 1421 } else { 1422 strlcpy(lastbuf, buf, sizeof lastbuf); 1423 if (state) 1424 strlcat(lastbuf, state, 1425 sizeof lastbuf); 1426 lastmatchline = pos; 1427 return lastbuf; 1428 } 1429 } 1430 } 1431 pos--; 1432 } 1433 return lastmatchline > 0 ? lastbuf : NULL; 1434 } 1435 1436 /* dump accumulated "context" diff changes */ 1437 static void 1438 dump_context_vec(FILE *f1, FILE *f2, int flags) 1439 { 1440 struct context_vec *cvp = context_vec_start; 1441 int lowa, upb, lowc, upd, do_output; 1442 int a, b, c, d; 1443 char ch, *f; 1444 1445 if (context_vec_start > context_vec_ptr) 1446 return; 1447 1448 b = d = 0; /* gcc */ 1449 lowa = MAXIMUM(1, cvp->a - diff_context); 1450 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1451 lowc = MAXIMUM(1, cvp->c - diff_context); 1452 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1453 1454 diff_output("***************"); 1455 if ((flags & D_PROTOTYPE)) { 1456 f = match_function(ixold, lowa-1, f1); 1457 if (f != NULL) 1458 diff_output(" %s", f); 1459 } 1460 diff_output("\n*** "); 1461 range(lowa, upb, ","); 1462 diff_output(" ****\n"); 1463 1464 /* 1465 * Output changes to the "old" file. The first loop suppresses 1466 * output if there were no changes to the "old" file (we'll see 1467 * the "old" lines as context in the "new" list). 1468 */ 1469 do_output = 0; 1470 for (; cvp <= context_vec_ptr; cvp++) 1471 if (cvp->a <= cvp->b) { 1472 cvp = context_vec_start; 1473 do_output++; 1474 break; 1475 } 1476 if (do_output) { 1477 while (cvp <= context_vec_ptr) { 1478 a = cvp->a; 1479 b = cvp->b; 1480 c = cvp->c; 1481 d = cvp->d; 1482 1483 if (a <= b && c <= d) 1484 ch = 'c'; 1485 else 1486 ch = (a <= b) ? 'd' : 'a'; 1487 1488 if (ch == 'a') 1489 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1490 else { 1491 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1492 fetch(ixold, a, b, f1, 1493 ch == 'c' ? '!' : '-', 0, flags); 1494 } 1495 lowa = b + 1; 1496 cvp++; 1497 } 1498 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1499 } 1500 /* output changes to the "new" file */ 1501 diff_output("--- "); 1502 range(lowc, upd, ","); 1503 diff_output(" ----\n"); 1504 1505 do_output = 0; 1506 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1507 if (cvp->c <= cvp->d) { 1508 cvp = context_vec_start; 1509 do_output++; 1510 break; 1511 } 1512 if (do_output) { 1513 while (cvp <= context_vec_ptr) { 1514 a = cvp->a; 1515 b = cvp->b; 1516 c = cvp->c; 1517 d = cvp->d; 1518 1519 if (a <= b && c <= d) 1520 ch = 'c'; 1521 else 1522 ch = (a <= b) ? 'd' : 'a'; 1523 1524 if (ch == 'd') 1525 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1526 else { 1527 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1528 fetch(ixnew, c, d, f2, 1529 ch == 'c' ? '!' : '+', 0, flags); 1530 } 1531 lowc = d + 1; 1532 cvp++; 1533 } 1534 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1535 } 1536 context_vec_ptr = context_vec_start - 1; 1537 } 1538 1539 /* dump accumulated "unified" diff changes */ 1540 static void 1541 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1542 { 1543 struct context_vec *cvp = context_vec_start; 1544 int lowa, upb, lowc, upd; 1545 int a, b, c, d; 1546 char ch, *f; 1547 1548 if (context_vec_start > context_vec_ptr) 1549 return; 1550 1551 b = d = 0; /* gcc */ 1552 lowa = MAXIMUM(1, cvp->a - diff_context); 1553 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1554 lowc = MAXIMUM(1, cvp->c - diff_context); 1555 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1556 1557 diff_output("@@ -"); 1558 uni_range(lowa, upb); 1559 diff_output(" +"); 1560 uni_range(lowc, upd); 1561 diff_output(" @@"); 1562 if ((flags & D_PROTOTYPE)) { 1563 f = match_function(ixold, lowa-1, f1); 1564 if (f != NULL) 1565 diff_output(" %s", f); 1566 } 1567 diff_output("\n"); 1568 1569 /* 1570 * Output changes in "unified" diff format--the old and new lines 1571 * are printed together. 1572 */ 1573 for (; cvp <= context_vec_ptr; cvp++) { 1574 a = cvp->a; 1575 b = cvp->b; 1576 c = cvp->c; 1577 d = cvp->d; 1578 1579 /* 1580 * c: both new and old changes 1581 * d: only changes in the old file 1582 * a: only changes in the new file 1583 */ 1584 if (a <= b && c <= d) 1585 ch = 'c'; 1586 else 1587 ch = (a <= b) ? 'd' : 'a'; 1588 1589 switch (ch) { 1590 case 'c': 1591 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1592 fetch(ixold, a, b, f1, '-', 0, flags); 1593 fetch(ixnew, c, d, f2, '+', 0, flags); 1594 break; 1595 case 'd': 1596 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1597 fetch(ixold, a, b, f1, '-', 0, flags); 1598 break; 1599 case 'a': 1600 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1601 fetch(ixnew, c, d, f2, '+', 0, flags); 1602 break; 1603 } 1604 lowa = b + 1; 1605 lowc = d + 1; 1606 } 1607 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1608 1609 context_vec_ptr = context_vec_start - 1; 1610 } 1611 1612 static void 1613 print_header(const char *file1, const char *file2) 1614 { 1615 const char *time_format; 1616 char buf1[256]; 1617 char buf2[256]; 1618 char end1[10]; 1619 char end2[10]; 1620 struct tm *tm_ptr1, *tm_ptr2; 1621 int nsec1 = TIMESPEC_NS (stb1.st_mtime); 1622 int nsec2 = TIMESPEC_NS (stb2.st_mtime); 1623 1624 #ifdef ST_MTIM_NSEC 1625 time_format = "%Y-%m-%d %H:%M:%S.%N"; 1626 #else 1627 time_format = "%Y-%m-%d %H:%M:%S"; 1628 #endif 1629 1630 if (cflag) 1631 time_format = "%c"; 1632 tm_ptr1 = localtime(&stb1.st_mtime); 1633 tm_ptr2 = localtime(&stb2.st_mtime); 1634 strftime(buf1, 256, time_format, tm_ptr1); 1635 strftime(buf2, 256, time_format, tm_ptr2); 1636 if (!cflag) { 1637 strftime(end1, 10, "%z", tm_ptr1); 1638 strftime(end2, 10, "%z", tm_ptr2); 1639 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1640 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1641 } 1642 if (label[0] != NULL) 1643 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1644 label[0]); 1645 else 1646 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1647 file1, buf1); 1648 if (label[1] != NULL) 1649 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1650 label[1]); 1651 else 1652 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1653 file2, buf2); 1654 } 1655