1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <stddef.h> 84 #include <stdint.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <unistd.h> 89 #include <limits.h> 90 #include <signal.h> 91 92 #include "diff.h" 93 #include "xmalloc.h" 94 95 #define _PATH_PR "/usr/bin/pr" 96 97 #ifdef ST_MTIM_NSEC 98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC) 99 #else 100 # define TIMESPEC_NS(timespec) 0 101 #endif 102 103 /* 104 * diff - compare two files. 105 */ 106 107 /* 108 * Uses an algorithm due to Harold Stone, which finds 109 * a pair of longest identical subsequences in the two 110 * files. 111 * 112 * The major goal is to generate the match vector J. 113 * J[i] is the index of the line in file1 corresponding 114 * to line i file0. J[i] = 0 if there is no 115 * such line in file1. 116 * 117 * Lines are hashed so as to work in core. All potential 118 * matches are located by sorting the lines of each file 119 * on the hash (called ``value''). In particular, this 120 * collects the equivalence classes in file1 together. 121 * Subroutine equiv replaces the value of each line in 122 * file0 by the index of the first element of its 123 * matching equivalence in (the reordered) file1. 124 * To save space equiv squeezes file1 into a single 125 * array member in which the equivalence classes 126 * are simply concatenated, except that their first 127 * members are flagged by changing sign. 128 * 129 * Next the indices that point into member are unsorted into 130 * array class according to the original order of file0. 131 * 132 * The cleverness lies in routine stone. This marches 133 * through the lines of file0, developing a vector klist 134 * of "k-candidates". At step i a k-candidate is a matched 135 * pair of lines x,y (x in file0 y in file1) such that 136 * there is a common subsequence of length k 137 * between the first i lines of file0 and the first y 138 * lines of file1, but there is no such subsequence for 139 * any smaller y. x is the earliest possible mate to y 140 * that occurs in such a subsequence. 141 * 142 * Whenever any of the members of the equivalence class of 143 * lines in file1 matable to a line in file0 has serial number 144 * less than the y of some k-candidate, that k-candidate 145 * with the smallest such y is replaced. The new 146 * k-candidate is chained (via pred) to the current 147 * k-1 candidate so that the actual subsequence can 148 * be recovered. When a member has serial number greater 149 * that the y of all k-candidates, the klist is extended. 150 * At the end, the longest subsequence is pulled out 151 * and placed in the array J by unravel 152 * 153 * With J in hand, the matches there recorded are 154 * check'ed against reality to assure that no spurious 155 * matches have crept in due to hashing. If they have, 156 * they are broken, and "jackpot" is recorded--a harmless 157 * matter except that a true match for a spuriously 158 * mated line may now be unnecessarily reported as a change. 159 * 160 * Much of the complexity of the program comes simply 161 * from trying to minimize core utilization and 162 * maximize the range of doable problems by dynamically 163 * allocating what is needed and reusing what is not. 164 * The core requirements for problems larger than somewhat 165 * are (in words) 2*length(file0) + length(file1) + 166 * 3*(number of k-candidates installed), typically about 167 * 6n words for files of length n. 168 */ 169 170 struct cand { 171 int x; 172 int y; 173 int pred; 174 }; 175 176 static struct line { 177 int serial; 178 int value; 179 } *file[2]; 180 181 /* 182 * The following struct is used to record change information when 183 * doing a "context" or "unified" diff. (see routine "change" to 184 * understand the highly mnemonic field names) 185 */ 186 struct context_vec { 187 int a; /* start line in old file */ 188 int b; /* end line in old file */ 189 int c; /* start line in new file */ 190 int d; /* end line in new file */ 191 }; 192 193 #define diff_output printf 194 static FILE *opentemp(const char *); 195 static void output(char *, FILE *, char *, FILE *, int); 196 static void check(FILE *, FILE *, int); 197 static void range(int, int, const char *); 198 static void uni_range(int, int); 199 static void dump_context_vec(FILE *, FILE *, int); 200 static void dump_unified_vec(FILE *, FILE *, int); 201 static void prepare(int, FILE *, size_t, int); 202 static void prune(void); 203 static void equiv(struct line *, int, struct line *, int, int *); 204 static void unravel(int); 205 static void unsort(struct line *, int, int *); 206 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 207 static void sort(struct line *, int); 208 static void print_header(const char *, const char *); 209 static int ignoreline(char *); 210 static int asciifile(FILE *); 211 static int fetch(long *, int, int, FILE *, int, int, int); 212 static int newcand(int, int, int); 213 static int search(int *, int, int); 214 static int skipline(FILE *); 215 static int isqrt(int); 216 static int stone(int *, int, int *, int *, int); 217 static int readhash(FILE *, int); 218 static int files_differ(FILE *, FILE *, int); 219 static char *match_function(const long *, int, FILE *); 220 static char *preadline(int, size_t, off_t); 221 222 static int *J; /* will be overlaid on class */ 223 static int *class; /* will be overlaid on file[0] */ 224 static int *klist; /* will be overlaid on file[0] after class */ 225 static int *member; /* will be overlaid on file[1] */ 226 static int clen; 227 static int inifdef; /* whether or not we are in a #ifdef block */ 228 static int len[2]; 229 static int pref, suff; /* length of prefix and suffix */ 230 static int slen[2]; 231 static int anychange; 232 static long *ixnew; /* will be overlaid on file[1] */ 233 static long *ixold; /* will be overlaid on klist */ 234 static struct cand *clist; /* merely a free storage pot for candidates */ 235 static int clistlen; /* the length of clist */ 236 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 237 static u_char *chrtran; /* translation table for case-folding */ 238 static struct context_vec *context_vec_start; 239 static struct context_vec *context_vec_end; 240 static struct context_vec *context_vec_ptr; 241 242 #define FUNCTION_CONTEXT_SIZE 55 243 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 244 static int lastline; 245 static int lastmatchline; 246 247 248 /* 249 * chrtran points to one of 2 translation tables: cup2low if folding upper to 250 * lower case clow2low if not folding case 251 */ 252 static u_char clow2low[256] = { 253 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 254 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 255 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 256 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 257 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 258 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 259 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 260 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 261 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 262 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 263 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 264 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 265 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 266 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 267 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 268 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 269 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 270 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 271 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 272 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 273 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 274 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 275 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 276 0xfd, 0xfe, 0xff 277 }; 278 279 static u_char cup2low[256] = { 280 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 281 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 282 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 283 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 284 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 285 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 286 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 287 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 288 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 289 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 290 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 291 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 292 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 293 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 294 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 295 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 296 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 297 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 298 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 299 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 300 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 301 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 302 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 303 0xfd, 0xfe, 0xff 304 }; 305 306 int 307 diffreg(char *file1, char *file2, int flags, int capsicum) 308 { 309 FILE *f1, *f2; 310 int i, rval; 311 int ostdout = -1; 312 int pr_pd, kq; 313 struct kevent *e; 314 cap_rights_t rights_ro; 315 316 e = NULL; 317 kq = -1; 318 f1 = f2 = NULL; 319 rval = D_SAME; 320 anychange = 0; 321 lastline = 0; 322 lastmatchline = 0; 323 context_vec_ptr = context_vec_start - 1; 324 if (flags & D_IGNORECASE) 325 chrtran = cup2low; 326 else 327 chrtran = clow2low; 328 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 329 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 330 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 331 goto closem; 332 333 if (flags & D_EMPTY1) 334 f1 = fopen(_PATH_DEVNULL, "r"); 335 else { 336 if (!S_ISREG(stb1.st_mode)) { 337 if ((f1 = opentemp(file1)) == NULL || 338 fstat(fileno(f1), &stb1) < 0) { 339 warn("%s", file1); 340 status |= 2; 341 goto closem; 342 } 343 } else if (strcmp(file1, "-") == 0) 344 f1 = stdin; 345 else 346 f1 = fopen(file1, "r"); 347 } 348 if (f1 == NULL) { 349 warn("%s", file1); 350 status |= 2; 351 goto closem; 352 } 353 354 if (flags & D_EMPTY2) 355 f2 = fopen(_PATH_DEVNULL, "r"); 356 else { 357 if (!S_ISREG(stb2.st_mode)) { 358 if ((f2 = opentemp(file2)) == NULL || 359 fstat(fileno(f2), &stb2) < 0) { 360 warn("%s", file2); 361 status |= 2; 362 goto closem; 363 } 364 } else if (strcmp(file2, "-") == 0) 365 f2 = stdin; 366 else 367 f2 = fopen(file2, "r"); 368 } 369 if (f2 == NULL) { 370 warn("%s", file2); 371 status |= 2; 372 goto closem; 373 } 374 375 if (lflag) { 376 /* redirect stdout to pr */ 377 int pfd[2]; 378 pid_t pid; 379 char *header; 380 381 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 382 signal(SIGPIPE, SIG_IGN); 383 fflush(stdout); 384 rewind(stdout); 385 pipe(pfd); 386 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 387 case -1: 388 status |= 2; 389 free(header); 390 err(2, "No more processes"); 391 case 0: 392 /* child */ 393 if (pfd[0] != STDIN_FILENO) { 394 dup2(pfd[0], STDIN_FILENO); 395 close(pfd[0]); 396 } 397 close(pfd[1]); 398 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 399 _exit(127); 400 default: 401 402 /* parent */ 403 if (pfd[1] != STDOUT_FILENO) { 404 ostdout = dup(STDOUT_FILENO); 405 dup2(pfd[1], STDOUT_FILENO); 406 close(pfd[1]); 407 } 408 close(pfd[0]); 409 rewind(stdout); 410 free(header); 411 kq = kqueue(); 412 if (kq == -1) 413 err(2, "kqueue"); 414 e = xmalloc(sizeof(struct kevent)); 415 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 416 NULL); 417 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 418 err(2, "kevent"); 419 } 420 } 421 422 if (capsicum) { 423 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 424 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 425 && errno != ENOSYS) 426 err(2, "unable to limit rights on: %s", file1); 427 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 428 errno != ENOSYS) 429 err(2, "unable to limit rights on: %s", file2); 430 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 431 /* stding has already been limited */ 432 if (caph_limit_stderr() == -1) 433 err(2, "unable to limit stderr"); 434 if (caph_limit_stdout() == -1) 435 err(2, "unable to limit stdout"); 436 } else if (caph_limit_stdio() == -1) 437 err(2, "unable to limit stdio"); 438 439 caph_cache_catpages(); 440 caph_cache_tzdata(); 441 if (cap_enter() < 0 && errno != ENOSYS) 442 err(2, "unable to enter capability mode"); 443 } 444 445 switch (files_differ(f1, f2, flags)) { 446 case 0: 447 goto closem; 448 case 1: 449 break; 450 default: 451 /* error */ 452 status |= 2; 453 goto closem; 454 } 455 456 if ((flags & D_FORCEASCII) == 0 && 457 (!asciifile(f1) || !asciifile(f2))) { 458 rval = D_BINARY; 459 status |= 1; 460 goto closem; 461 } 462 prepare(0, f1, stb1.st_size, flags); 463 prepare(1, f2, stb2.st_size, flags); 464 465 prune(); 466 sort(sfile[0], slen[0]); 467 sort(sfile[1], slen[1]); 468 469 member = (int *)file[1]; 470 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 471 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 472 473 class = (int *)file[0]; 474 unsort(sfile[0], slen[0], class); 475 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 476 477 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 478 clen = 0; 479 clistlen = 100; 480 clist = xcalloc(clistlen, sizeof(*clist)); 481 i = stone(class, slen[0], member, klist, flags); 482 free(member); 483 free(class); 484 485 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 486 unravel(klist[i]); 487 free(clist); 488 free(klist); 489 490 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 491 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 492 check(f1, f2, flags); 493 output(file1, f1, file2, f2, flags); 494 if (ostdout != -1 && e != NULL) { 495 /* close the pipe to pr and restore stdout */ 496 int wstatus; 497 498 fflush(stdout); 499 if (ostdout != STDOUT_FILENO) { 500 close(STDOUT_FILENO); 501 dup2(ostdout, STDOUT_FILENO); 502 close(ostdout); 503 } 504 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 505 err(2, "kevent"); 506 wstatus = e[0].data; 507 close(kq); 508 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 509 errx(2, "pr exited abnormally"); 510 else if (WIFSIGNALED(wstatus)) 511 errx(2, "pr killed by signal %d", 512 WTERMSIG(wstatus)); 513 } 514 515 closem: 516 if (anychange) { 517 status |= 1; 518 if (rval == D_SAME) 519 rval = D_DIFFER; 520 } 521 if (f1 != NULL) 522 fclose(f1); 523 if (f2 != NULL) 524 fclose(f2); 525 526 return (rval); 527 } 528 529 /* 530 * Check to see if the given files differ. 531 * Returns 0 if they are the same, 1 if different, and -1 on error. 532 * XXX - could use code from cmp(1) [faster] 533 */ 534 static int 535 files_differ(FILE *f1, FILE *f2, int flags) 536 { 537 char buf1[BUFSIZ], buf2[BUFSIZ]; 538 size_t i, j; 539 540 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 541 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 542 return (1); 543 for (;;) { 544 i = fread(buf1, 1, sizeof(buf1), f1); 545 j = fread(buf2, 1, sizeof(buf2), f2); 546 if ((!i && ferror(f1)) || (!j && ferror(f2))) 547 return (-1); 548 if (i != j) 549 return (1); 550 if (i == 0) 551 return (0); 552 if (memcmp(buf1, buf2, i) != 0) 553 return (1); 554 } 555 } 556 557 static FILE * 558 opentemp(const char *f) 559 { 560 char buf[BUFSIZ], tempfile[PATH_MAX]; 561 ssize_t nread; 562 int ifd, ofd; 563 564 if (strcmp(f, "-") == 0) 565 ifd = STDIN_FILENO; 566 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 567 return (NULL); 568 569 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 570 571 if ((ofd = mkstemp(tempfile)) < 0) { 572 close(ifd); 573 return (NULL); 574 } 575 unlink(tempfile); 576 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 577 if (write(ofd, buf, nread) != nread) { 578 close(ifd); 579 close(ofd); 580 return (NULL); 581 } 582 } 583 close(ifd); 584 lseek(ofd, (off_t)0, SEEK_SET); 585 return (fdopen(ofd, "r")); 586 } 587 588 char * 589 splice(char *dir, char *path) 590 { 591 char *tail, *buf; 592 size_t dirlen; 593 594 dirlen = strlen(dir); 595 while (dirlen != 0 && dir[dirlen - 1] == '/') 596 dirlen--; 597 if ((tail = strrchr(path, '/')) == NULL) 598 tail = path; 599 else 600 tail++; 601 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 602 return (buf); 603 } 604 605 static void 606 prepare(int i, FILE *fd, size_t filesize, int flags) 607 { 608 struct line *p; 609 int h; 610 size_t sz, j; 611 612 rewind(fd); 613 614 sz = MIN(filesize, SIZE_MAX) / 25; 615 if (sz < 100) 616 sz = 100; 617 618 p = xcalloc(sz + 3, sizeof(*p)); 619 for (j = 0; (h = readhash(fd, flags));) { 620 if (j == sz) { 621 sz = sz * 3 / 2; 622 p = xreallocarray(p, sz + 3, sizeof(*p)); 623 } 624 p[++j].value = h; 625 } 626 len[i] = j; 627 file[i] = p; 628 } 629 630 static void 631 prune(void) 632 { 633 int i, j; 634 635 for (pref = 0; pref < len[0] && pref < len[1] && 636 file[0][pref + 1].value == file[1][pref + 1].value; 637 pref++) 638 ; 639 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 640 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 641 suff++) 642 ; 643 for (j = 0; j < 2; j++) { 644 sfile[j] = file[j] + pref; 645 slen[j] = len[j] - pref - suff; 646 for (i = 0; i <= slen[j]; i++) 647 sfile[j][i].serial = i; 648 } 649 } 650 651 static void 652 equiv(struct line *a, int n, struct line *b, int m, int *c) 653 { 654 int i, j; 655 656 i = j = 1; 657 while (i <= n && j <= m) { 658 if (a[i].value < b[j].value) 659 a[i++].value = 0; 660 else if (a[i].value == b[j].value) 661 a[i++].value = j; 662 else 663 j++; 664 } 665 while (i <= n) 666 a[i++].value = 0; 667 b[m + 1].value = 0; 668 j = 0; 669 while (++j <= m) { 670 c[j] = -b[j].serial; 671 while (b[j + 1].value == b[j].value) { 672 j++; 673 c[j] = b[j].serial; 674 } 675 } 676 c[j] = -1; 677 } 678 679 /* Code taken from ping.c */ 680 static int 681 isqrt(int n) 682 { 683 int y, x = 1; 684 685 if (n == 0) 686 return (0); 687 688 do { /* newton was a stinker */ 689 y = x; 690 x = n / x; 691 x += y; 692 x /= 2; 693 } while ((x - y) > 1 || (x - y) < -1); 694 695 return (x); 696 } 697 698 static int 699 stone(int *a, int n, int *b, int *c, int flags) 700 { 701 int i, k, y, j, l; 702 int oldc, tc, oldl, sq; 703 u_int numtries, bound; 704 705 if (flags & D_MINIMAL) 706 bound = UINT_MAX; 707 else { 708 sq = isqrt(n); 709 bound = MAX(256, sq); 710 } 711 712 k = 0; 713 c[0] = newcand(0, 0, 0); 714 for (i = 1; i <= n; i++) { 715 j = a[i]; 716 if (j == 0) 717 continue; 718 y = -b[j]; 719 oldl = 0; 720 oldc = c[0]; 721 numtries = 0; 722 do { 723 if (y <= clist[oldc].y) 724 continue; 725 l = search(c, k, y); 726 if (l != oldl + 1) 727 oldc = c[l - 1]; 728 if (l <= k) { 729 if (clist[c[l]].y <= y) 730 continue; 731 tc = c[l]; 732 c[l] = newcand(i, y, oldc); 733 oldc = tc; 734 oldl = l; 735 numtries++; 736 } else { 737 c[l] = newcand(i, y, oldc); 738 k++; 739 break; 740 } 741 } while ((y = b[++j]) > 0 && numtries < bound); 742 } 743 return (k); 744 } 745 746 static int 747 newcand(int x, int y, int pred) 748 { 749 struct cand *q; 750 751 if (clen == clistlen) { 752 clistlen = clistlen * 11 / 10; 753 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 754 } 755 q = clist + clen; 756 q->x = x; 757 q->y = y; 758 q->pred = pred; 759 return (clen++); 760 } 761 762 static int 763 search(int *c, int k, int y) 764 { 765 int i, j, l, t; 766 767 if (clist[c[k]].y < y) /* quick look for typical case */ 768 return (k + 1); 769 i = 0; 770 j = k + 1; 771 for (;;) { 772 l = (i + j) / 2; 773 if (l <= i) 774 break; 775 t = clist[c[l]].y; 776 if (t > y) 777 j = l; 778 else if (t < y) 779 i = l; 780 else 781 return (l); 782 } 783 return (l + 1); 784 } 785 786 static void 787 unravel(int p) 788 { 789 struct cand *q; 790 int i; 791 792 for (i = 0; i <= len[0]; i++) 793 J[i] = i <= pref ? i : 794 i > len[0] - suff ? i + len[1] - len[0] : 0; 795 for (q = clist + p; q->y != 0; q = clist + q->pred) 796 J[q->x + pref] = q->y + pref; 797 } 798 799 /* 800 * Check does double duty: 801 * 1. ferret out any fortuitous correspondences due 802 * to confounding by hashing (which result in "jackpot") 803 * 2. collect random access indexes to the two files 804 */ 805 static void 806 check(FILE *f1, FILE *f2, int flags) 807 { 808 int i, j, jackpot, c, d; 809 long ctold, ctnew; 810 811 rewind(f1); 812 rewind(f2); 813 j = 1; 814 ixold[0] = ixnew[0] = 0; 815 jackpot = 0; 816 ctold = ctnew = 0; 817 for (i = 1; i <= len[0]; i++) { 818 if (J[i] == 0) { 819 ixold[i] = ctold += skipline(f1); 820 continue; 821 } 822 while (j < J[i]) { 823 ixnew[j] = ctnew += skipline(f2); 824 j++; 825 } 826 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 827 for (;;) { 828 c = getc(f1); 829 d = getc(f2); 830 /* 831 * GNU diff ignores a missing newline 832 * in one file for -b or -w. 833 */ 834 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 835 if (c == EOF && d == '\n') { 836 ctnew++; 837 break; 838 } else if (c == '\n' && d == EOF) { 839 ctold++; 840 break; 841 } 842 } 843 ctold++; 844 ctnew++; 845 if (flags & D_STRIPCR) { 846 if (c == '\r') { 847 if ((c = getc(f1)) == '\n') { 848 ctnew++; 849 break; 850 } 851 } 852 if (d == '\r') { 853 if ((d = getc(f2)) == '\n') { 854 ctold++; 855 break; 856 } 857 } 858 } 859 if ((flags & D_FOLDBLANKS) && isspace(c) && 860 isspace(d)) { 861 do { 862 if (c == '\n') 863 break; 864 ctold++; 865 } while (isspace(c = getc(f1))); 866 do { 867 if (d == '\n') 868 break; 869 ctnew++; 870 } while (isspace(d = getc(f2))); 871 } else if ((flags & D_IGNOREBLANKS)) { 872 while (isspace(c) && c != '\n') { 873 c = getc(f1); 874 ctold++; 875 } 876 while (isspace(d) && d != '\n') { 877 d = getc(f2); 878 ctnew++; 879 } 880 } 881 if (chrtran[c] != chrtran[d]) { 882 jackpot++; 883 J[i] = 0; 884 if (c != '\n' && c != EOF) 885 ctold += skipline(f1); 886 if (d != '\n' && c != EOF) 887 ctnew += skipline(f2); 888 break; 889 } 890 if (c == '\n' || c == EOF) 891 break; 892 } 893 } else { 894 for (;;) { 895 ctold++; 896 ctnew++; 897 if ((c = getc(f1)) != (d = getc(f2))) { 898 /* jackpot++; */ 899 J[i] = 0; 900 if (c != '\n' && c != EOF) 901 ctold += skipline(f1); 902 if (d != '\n' && c != EOF) 903 ctnew += skipline(f2); 904 break; 905 } 906 if (c == '\n' || c == EOF) 907 break; 908 } 909 } 910 ixold[i] = ctold; 911 ixnew[j] = ctnew; 912 j++; 913 } 914 for (; j <= len[1]; j++) { 915 ixnew[j] = ctnew += skipline(f2); 916 } 917 /* 918 * if (jackpot) 919 * fprintf(stderr, "jackpot\n"); 920 */ 921 } 922 923 /* shellsort CACM #201 */ 924 static void 925 sort(struct line *a, int n) 926 { 927 struct line *ai, *aim, w; 928 int j, m = 0, k; 929 930 if (n == 0) 931 return; 932 for (j = 1; j <= n; j *= 2) 933 m = 2 * j - 1; 934 for (m /= 2; m != 0; m /= 2) { 935 k = n - m; 936 for (j = 1; j <= k; j++) { 937 for (ai = &a[j]; ai > a; ai -= m) { 938 aim = &ai[m]; 939 if (aim < ai) 940 break; /* wraparound */ 941 if (aim->value > ai[0].value || 942 (aim->value == ai[0].value && 943 aim->serial > ai[0].serial)) 944 break; 945 w.value = ai[0].value; 946 ai[0].value = aim->value; 947 aim->value = w.value; 948 w.serial = ai[0].serial; 949 ai[0].serial = aim->serial; 950 aim->serial = w.serial; 951 } 952 } 953 } 954 } 955 956 static void 957 unsort(struct line *f, int l, int *b) 958 { 959 int *a, i; 960 961 a = xcalloc(l + 1, sizeof(*a)); 962 for (i = 1; i <= l; i++) 963 a[f[i].serial] = f[i].value; 964 for (i = 1; i <= l; i++) 965 b[i] = a[i]; 966 free(a); 967 } 968 969 static int 970 skipline(FILE *f) 971 { 972 int i, c; 973 974 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 975 continue; 976 return (i); 977 } 978 979 static void 980 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 981 { 982 int m, i0, i1, j0, j1; 983 984 rewind(f1); 985 rewind(f2); 986 m = len[0]; 987 J[0] = 0; 988 J[m + 1] = len[1] + 1; 989 if (diff_format != D_EDIT) { 990 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 991 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 992 i0++; 993 j0 = J[i0 - 1] + 1; 994 i1 = i0 - 1; 995 while (i1 < m && J[i1 + 1] == 0) 996 i1++; 997 j1 = J[i1 + 1] - 1; 998 J[i1] = j1; 999 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 1000 } 1001 } else { 1002 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 1003 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 1004 i0--; 1005 j0 = J[i0 + 1] - 1; 1006 i1 = i0 + 1; 1007 while (i1 > 1 && J[i1 - 1] == 0) 1008 i1--; 1009 j1 = J[i1 - 1] + 1; 1010 J[i1] = j1; 1011 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1012 } 1013 } 1014 if (m == 0) 1015 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1016 if (diff_format == D_IFDEF) { 1017 for (;;) { 1018 #define c i0 1019 if ((c = getc(f1)) == EOF) 1020 return; 1021 diff_output("%c", c); 1022 } 1023 #undef c 1024 } 1025 if (anychange != 0) { 1026 if (diff_format == D_CONTEXT) 1027 dump_context_vec(f1, f2, flags); 1028 else if (diff_format == D_UNIFIED) 1029 dump_unified_vec(f1, f2, flags); 1030 } 1031 } 1032 1033 static void 1034 range(int a, int b, const char *separator) 1035 { 1036 diff_output("%d", a > b ? b : a); 1037 if (a < b) 1038 diff_output("%s%d", separator, b); 1039 } 1040 1041 static void 1042 uni_range(int a, int b) 1043 { 1044 if (a < b) 1045 diff_output("%d,%d", a, b - a + 1); 1046 else if (a == b) 1047 diff_output("%d", b); 1048 else 1049 diff_output("%d,0", b); 1050 } 1051 1052 static char * 1053 preadline(int fd, size_t rlen, off_t off) 1054 { 1055 char *line; 1056 ssize_t nr; 1057 1058 line = xmalloc(rlen + 1); 1059 if ((nr = pread(fd, line, rlen, off)) < 0) 1060 err(2, "preadline"); 1061 if (nr > 0 && line[nr-1] == '\n') 1062 nr--; 1063 line[nr] = '\0'; 1064 return (line); 1065 } 1066 1067 static int 1068 ignoreline(char *line) 1069 { 1070 int ret; 1071 1072 ret = regexec(&ignore_re, line, 0, NULL, 0); 1073 free(line); 1074 return (ret == 0); /* if it matched, it should be ignored. */ 1075 } 1076 1077 /* 1078 * Indicate that there is a difference between lines a and b of the from file 1079 * to get to lines c to d of the to file. If a is greater then b then there 1080 * are no lines in the from file involved and this means that there were 1081 * lines appended (beginning at b). If c is greater than d then there are 1082 * lines missing from the to file. 1083 */ 1084 static void 1085 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1086 int *pflags) 1087 { 1088 static size_t max_context = 64; 1089 int i; 1090 1091 restart: 1092 if (diff_format != D_IFDEF && a > b && c > d) 1093 return; 1094 if (ignore_pats != NULL) { 1095 char *line; 1096 /* 1097 * All lines in the change, insert, or delete must 1098 * match an ignore pattern for the change to be 1099 * ignored. 1100 */ 1101 if (a <= b) { /* Changes and deletes. */ 1102 for (i = a; i <= b; i++) { 1103 line = preadline(fileno(f1), 1104 ixold[i] - ixold[i - 1], ixold[i - 1]); 1105 if (!ignoreline(line)) 1106 goto proceed; 1107 } 1108 } 1109 if (a > b || c <= d) { /* Changes and inserts. */ 1110 for (i = c; i <= d; i++) { 1111 line = preadline(fileno(f2), 1112 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1113 if (!ignoreline(line)) 1114 goto proceed; 1115 } 1116 } 1117 return; 1118 } 1119 proceed: 1120 if (*pflags & D_HEADER) { 1121 diff_output("%s %s %s\n", diffargs, file1, file2); 1122 *pflags &= ~D_HEADER; 1123 } 1124 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1125 /* 1126 * Allocate change records as needed. 1127 */ 1128 if (context_vec_ptr == context_vec_end - 1) { 1129 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1130 max_context <<= 1; 1131 context_vec_start = xreallocarray(context_vec_start, 1132 max_context, sizeof(*context_vec_start)); 1133 context_vec_end = context_vec_start + max_context; 1134 context_vec_ptr = context_vec_start + offset; 1135 } 1136 if (anychange == 0) { 1137 /* 1138 * Print the context/unidiff header first time through. 1139 */ 1140 print_header(file1, file2); 1141 anychange = 1; 1142 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1143 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1144 /* 1145 * If this change is more than 'diff_context' lines from the 1146 * previous change, dump the record and reset it. 1147 */ 1148 if (diff_format == D_CONTEXT) 1149 dump_context_vec(f1, f2, *pflags); 1150 else 1151 dump_unified_vec(f1, f2, *pflags); 1152 } 1153 context_vec_ptr++; 1154 context_vec_ptr->a = a; 1155 context_vec_ptr->b = b; 1156 context_vec_ptr->c = c; 1157 context_vec_ptr->d = d; 1158 return; 1159 } 1160 if (anychange == 0) 1161 anychange = 1; 1162 switch (diff_format) { 1163 case D_BRIEF: 1164 return; 1165 case D_NORMAL: 1166 case D_EDIT: 1167 range(a, b, ","); 1168 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1169 if (diff_format == D_NORMAL) 1170 range(c, d, ","); 1171 diff_output("\n"); 1172 break; 1173 case D_REVERSE: 1174 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1175 range(a, b, " "); 1176 diff_output("\n"); 1177 break; 1178 case D_NREVERSE: 1179 if (a > b) 1180 diff_output("a%d %d\n", b, d - c + 1); 1181 else { 1182 diff_output("d%d %d\n", a, b - a + 1); 1183 if (!(c > d)) 1184 /* add changed lines */ 1185 diff_output("a%d %d\n", b, d - c + 1); 1186 } 1187 break; 1188 } 1189 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1190 fetch(ixold, a, b, f1, '<', 1, *pflags); 1191 if (a <= b && c <= d && diff_format == D_NORMAL) 1192 diff_output("---\n"); 1193 } 1194 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1195 if (i != 0 && diff_format == D_EDIT) { 1196 /* 1197 * A non-zero return value for D_EDIT indicates that the 1198 * last line printed was a bare dot (".") that has been 1199 * escaped as ".." to prevent ed(1) from misinterpreting 1200 * it. We have to add a substitute command to change this 1201 * back and restart where we left off. 1202 */ 1203 diff_output(".\n"); 1204 diff_output("%ds/.//\n", a + i - 1); 1205 b = a + i - 1; 1206 a = b + 1; 1207 c += i; 1208 goto restart; 1209 } 1210 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1211 diff_output(".\n"); 1212 if (inifdef) { 1213 diff_output("#endif /* %s */\n", ifdefname); 1214 inifdef = 0; 1215 } 1216 } 1217 1218 static int 1219 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1220 { 1221 int i, j, c, lastc, col, nc; 1222 int newcol; 1223 1224 /* 1225 * When doing #ifdef's, copy down to current line 1226 * if this is the first file, so that stuff makes it to output. 1227 */ 1228 if (diff_format == D_IFDEF && oldfile) { 1229 long curpos = ftell(lb); 1230 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1231 nc = f[a > b ? b : a - 1] - curpos; 1232 for (i = 0; i < nc; i++) 1233 diff_output("%c", getc(lb)); 1234 } 1235 if (a > b) 1236 return (0); 1237 if (diff_format == D_IFDEF) { 1238 if (inifdef) { 1239 diff_output("#else /* %s%s */\n", 1240 oldfile == 1 ? "!" : "", ifdefname); 1241 } else { 1242 if (oldfile) 1243 diff_output("#ifndef %s\n", ifdefname); 1244 else 1245 diff_output("#ifdef %s\n", ifdefname); 1246 } 1247 inifdef = 1 + oldfile; 1248 } 1249 for (i = a; i <= b; i++) { 1250 fseek(lb, f[i - 1], SEEK_SET); 1251 nc = f[i] - f[i - 1]; 1252 if (diff_format != D_IFDEF && ch != '\0') { 1253 diff_output("%c", ch); 1254 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1255 || diff_format == D_UNIFIED)) 1256 diff_output("\t"); 1257 else if (diff_format != D_UNIFIED) 1258 diff_output(" "); 1259 } 1260 col = 0; 1261 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1262 if ((c = getc(lb)) == EOF) { 1263 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1264 diff_format == D_NREVERSE) 1265 warnx("No newline at end of file"); 1266 else 1267 diff_output("\n\\ No newline at end of " 1268 "file\n"); 1269 return (0); 1270 } 1271 if (c == '\t' && (flags & D_EXPANDTABS)) { 1272 newcol = ((col/tabsize)+1)*tabsize; 1273 do { 1274 diff_output(" "); 1275 } while (++col < newcol); 1276 } else { 1277 if (diff_format == D_EDIT && j == 1 && c == '\n' 1278 && lastc == '.') { 1279 /* 1280 * Don't print a bare "." line 1281 * since that will confuse ed(1). 1282 * Print ".." instead and return, 1283 * giving the caller an offset 1284 * from which to restart. 1285 */ 1286 diff_output(".\n"); 1287 return (i - a + 1); 1288 } 1289 diff_output("%c", c); 1290 col++; 1291 } 1292 } 1293 } 1294 return (0); 1295 } 1296 1297 /* 1298 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1299 */ 1300 static int 1301 readhash(FILE *f, int flags) 1302 { 1303 int i, t, space; 1304 int sum; 1305 1306 sum = 1; 1307 space = 0; 1308 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1309 if (flags & D_IGNORECASE) 1310 for (i = 0; (t = getc(f)) != '\n'; i++) { 1311 if (flags & D_STRIPCR && t == '\r') { 1312 t = getc(f); 1313 if (t == '\n') 1314 break; 1315 ungetc(t, f); 1316 } 1317 if (t == EOF) { 1318 if (i == 0) 1319 return (0); 1320 break; 1321 } 1322 sum = sum * 127 + chrtran[t]; 1323 } 1324 else 1325 for (i = 0; (t = getc(f)) != '\n'; i++) { 1326 if (flags & D_STRIPCR && t == '\r') { 1327 t = getc(f); 1328 if (t == '\n') 1329 break; 1330 ungetc(t, f); 1331 } 1332 if (t == EOF) { 1333 if (i == 0) 1334 return (0); 1335 break; 1336 } 1337 sum = sum * 127 + t; 1338 } 1339 } else { 1340 for (i = 0;;) { 1341 switch (t = getc(f)) { 1342 case '\r': 1343 case '\t': 1344 case '\v': 1345 case '\f': 1346 case ' ': 1347 space++; 1348 continue; 1349 default: 1350 if (space && (flags & D_IGNOREBLANKS) == 0) { 1351 i++; 1352 space = 0; 1353 } 1354 sum = sum * 127 + chrtran[t]; 1355 i++; 1356 continue; 1357 case EOF: 1358 if (i == 0) 1359 return (0); 1360 /* FALLTHROUGH */ 1361 case '\n': 1362 break; 1363 } 1364 break; 1365 } 1366 } 1367 /* 1368 * There is a remote possibility that we end up with a zero sum. 1369 * Zero is used as an EOF marker, so return 1 instead. 1370 */ 1371 return (sum == 0 ? 1 : sum); 1372 } 1373 1374 static int 1375 asciifile(FILE *f) 1376 { 1377 unsigned char buf[BUFSIZ]; 1378 size_t cnt; 1379 1380 if (f == NULL) 1381 return (1); 1382 1383 rewind(f); 1384 cnt = fread(buf, 1, sizeof(buf), f); 1385 return (memchr(buf, '\0', cnt) == NULL); 1386 } 1387 1388 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1389 1390 static char * 1391 match_function(const long *f, int pos, FILE *fp) 1392 { 1393 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1394 size_t nc; 1395 int last = lastline; 1396 const char *state = NULL; 1397 1398 lastline = pos; 1399 while (pos > last) { 1400 fseek(fp, f[pos - 1], SEEK_SET); 1401 nc = f[pos] - f[pos - 1]; 1402 if (nc >= sizeof(buf)) 1403 nc = sizeof(buf) - 1; 1404 nc = fread(buf, 1, nc, fp); 1405 if (nc > 0) { 1406 buf[nc] = '\0'; 1407 buf[strcspn(buf, "\n")] = '\0'; 1408 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1409 if (begins_with(buf, "private:")) { 1410 if (!state) 1411 state = " (private)"; 1412 } else if (begins_with(buf, "protected:")) { 1413 if (!state) 1414 state = " (protected)"; 1415 } else if (begins_with(buf, "public:")) { 1416 if (!state) 1417 state = " (public)"; 1418 } else { 1419 strlcpy(lastbuf, buf, sizeof lastbuf); 1420 if (state) 1421 strlcat(lastbuf, state, 1422 sizeof lastbuf); 1423 lastmatchline = pos; 1424 return lastbuf; 1425 } 1426 } 1427 } 1428 pos--; 1429 } 1430 return lastmatchline > 0 ? lastbuf : NULL; 1431 } 1432 1433 /* dump accumulated "context" diff changes */ 1434 static void 1435 dump_context_vec(FILE *f1, FILE *f2, int flags) 1436 { 1437 struct context_vec *cvp = context_vec_start; 1438 int lowa, upb, lowc, upd, do_output; 1439 int a, b, c, d; 1440 char ch, *f; 1441 1442 if (context_vec_start > context_vec_ptr) 1443 return; 1444 1445 b = d = 0; /* gcc */ 1446 lowa = MAX(1, cvp->a - diff_context); 1447 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1448 lowc = MAX(1, cvp->c - diff_context); 1449 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1450 1451 diff_output("***************"); 1452 if ((flags & D_PROTOTYPE)) { 1453 f = match_function(ixold, lowa-1, f1); 1454 if (f != NULL) 1455 diff_output(" %s", f); 1456 } 1457 diff_output("\n*** "); 1458 range(lowa, upb, ","); 1459 diff_output(" ****\n"); 1460 1461 /* 1462 * Output changes to the "old" file. The first loop suppresses 1463 * output if there were no changes to the "old" file (we'll see 1464 * the "old" lines as context in the "new" list). 1465 */ 1466 do_output = 0; 1467 for (; cvp <= context_vec_ptr; cvp++) 1468 if (cvp->a <= cvp->b) { 1469 cvp = context_vec_start; 1470 do_output++; 1471 break; 1472 } 1473 if (do_output) { 1474 while (cvp <= context_vec_ptr) { 1475 a = cvp->a; 1476 b = cvp->b; 1477 c = cvp->c; 1478 d = cvp->d; 1479 1480 if (a <= b && c <= d) 1481 ch = 'c'; 1482 else 1483 ch = (a <= b) ? 'd' : 'a'; 1484 1485 if (ch == 'a') 1486 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1487 else { 1488 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1489 fetch(ixold, a, b, f1, 1490 ch == 'c' ? '!' : '-', 0, flags); 1491 } 1492 lowa = b + 1; 1493 cvp++; 1494 } 1495 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1496 } 1497 /* output changes to the "new" file */ 1498 diff_output("--- "); 1499 range(lowc, upd, ","); 1500 diff_output(" ----\n"); 1501 1502 do_output = 0; 1503 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1504 if (cvp->c <= cvp->d) { 1505 cvp = context_vec_start; 1506 do_output++; 1507 break; 1508 } 1509 if (do_output) { 1510 while (cvp <= context_vec_ptr) { 1511 a = cvp->a; 1512 b = cvp->b; 1513 c = cvp->c; 1514 d = cvp->d; 1515 1516 if (a <= b && c <= d) 1517 ch = 'c'; 1518 else 1519 ch = (a <= b) ? 'd' : 'a'; 1520 1521 if (ch == 'd') 1522 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1523 else { 1524 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1525 fetch(ixnew, c, d, f2, 1526 ch == 'c' ? '!' : '+', 0, flags); 1527 } 1528 lowc = d + 1; 1529 cvp++; 1530 } 1531 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1532 } 1533 context_vec_ptr = context_vec_start - 1; 1534 } 1535 1536 /* dump accumulated "unified" diff changes */ 1537 static void 1538 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1539 { 1540 struct context_vec *cvp = context_vec_start; 1541 int lowa, upb, lowc, upd; 1542 int a, b, c, d; 1543 char ch, *f; 1544 1545 if (context_vec_start > context_vec_ptr) 1546 return; 1547 1548 b = d = 0; /* gcc */ 1549 lowa = MAX(1, cvp->a - diff_context); 1550 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1551 lowc = MAX(1, cvp->c - diff_context); 1552 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1553 1554 diff_output("@@ -"); 1555 uni_range(lowa, upb); 1556 diff_output(" +"); 1557 uni_range(lowc, upd); 1558 diff_output(" @@"); 1559 if ((flags & D_PROTOTYPE)) { 1560 f = match_function(ixold, lowa-1, f1); 1561 if (f != NULL) 1562 diff_output(" %s", f); 1563 } 1564 diff_output("\n"); 1565 1566 /* 1567 * Output changes in "unified" diff format--the old and new lines 1568 * are printed together. 1569 */ 1570 for (; cvp <= context_vec_ptr; cvp++) { 1571 a = cvp->a; 1572 b = cvp->b; 1573 c = cvp->c; 1574 d = cvp->d; 1575 1576 /* 1577 * c: both new and old changes 1578 * d: only changes in the old file 1579 * a: only changes in the new file 1580 */ 1581 if (a <= b && c <= d) 1582 ch = 'c'; 1583 else 1584 ch = (a <= b) ? 'd' : 'a'; 1585 1586 switch (ch) { 1587 case 'c': 1588 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1589 fetch(ixold, a, b, f1, '-', 0, flags); 1590 fetch(ixnew, c, d, f2, '+', 0, flags); 1591 break; 1592 case 'd': 1593 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1594 fetch(ixold, a, b, f1, '-', 0, flags); 1595 break; 1596 case 'a': 1597 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1598 fetch(ixnew, c, d, f2, '+', 0, flags); 1599 break; 1600 } 1601 lowa = b + 1; 1602 lowc = d + 1; 1603 } 1604 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1605 1606 context_vec_ptr = context_vec_start - 1; 1607 } 1608 1609 static void 1610 print_header(const char *file1, const char *file2) 1611 { 1612 const char *time_format; 1613 char buf1[256]; 1614 char buf2[256]; 1615 char end1[10]; 1616 char end2[10]; 1617 struct tm *tm_ptr1, *tm_ptr2; 1618 int nsec1 = TIMESPEC_NS (stb1.st_mtime); 1619 int nsec2 = TIMESPEC_NS (stb2.st_mtime); 1620 1621 #ifdef ST_MTIM_NSEC 1622 time_format = "%Y-%m-%d %H:%M:%S.%N"; 1623 #else 1624 time_format = "%Y-%m-%d %H:%M:%S"; 1625 #endif 1626 1627 if (cflag) 1628 time_format = "%c"; 1629 tm_ptr1 = localtime(&stb1.st_mtime); 1630 tm_ptr2 = localtime(&stb2.st_mtime); 1631 strftime(buf1, 256, time_format, tm_ptr1); 1632 strftime(buf2, 256, time_format, tm_ptr2); 1633 if (!cflag) { 1634 strftime(end1, 10, "%z", tm_ptr1); 1635 strftime(end2, 10, "%z", tm_ptr2); 1636 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1637 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1638 } 1639 if (label[0] != NULL) 1640 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1641 label[0]); 1642 else 1643 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1644 file1, buf1); 1645 if (label[1] != NULL) 1646 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1647 label[1]); 1648 else 1649 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1650 file2, buf2); 1651 } 1652