1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <stddef.h> 84 #include <stdint.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <unistd.h> 89 #include <limits.h> 90 #include <signal.h> 91 92 #include "diff.h" 93 #include "xmalloc.h" 94 95 #define _PATH_PR "/usr/bin/pr" 96 97 #ifdef ST_MTIM_NSEC 98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC) 99 #else 100 # define TIMESPEC_NS(timespec) 0 101 #endif 102 103 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) 104 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 105 106 /* 107 * diff - compare two files. 108 */ 109 110 /* 111 * Uses an algorithm due to Harold Stone, which finds 112 * a pair of longest identical subsequences in the two 113 * files. 114 * 115 * The major goal is to generate the match vector J. 116 * J[i] is the index of the line in file1 corresponding 117 * to line i file0. J[i] = 0 if there is no 118 * such line in file1. 119 * 120 * Lines are hashed so as to work in core. All potential 121 * matches are located by sorting the lines of each file 122 * on the hash (called ``value''). In particular, this 123 * collects the equivalence classes in file1 together. 124 * Subroutine equiv replaces the value of each line in 125 * file0 by the index of the first element of its 126 * matching equivalence in (the reordered) file1. 127 * To save space equiv squeezes file1 into a single 128 * array member in which the equivalence classes 129 * are simply concatenated, except that their first 130 * members are flagged by changing sign. 131 * 132 * Next the indices that point into member are unsorted into 133 * array class according to the original order of file0. 134 * 135 * The cleverness lies in routine stone. This marches 136 * through the lines of file0, developing a vector klist 137 * of "k-candidates". At step i a k-candidate is a matched 138 * pair of lines x,y (x in file0 y in file1) such that 139 * there is a common subsequence of length k 140 * between the first i lines of file0 and the first y 141 * lines of file1, but there is no such subsequence for 142 * any smaller y. x is the earliest possible mate to y 143 * that occurs in such a subsequence. 144 * 145 * Whenever any of the members of the equivalence class of 146 * lines in file1 matable to a line in file0 has serial number 147 * less than the y of some k-candidate, that k-candidate 148 * with the smallest such y is replaced. The new 149 * k-candidate is chained (via pred) to the current 150 * k-1 candidate so that the actual subsequence can 151 * be recovered. When a member has serial number greater 152 * that the y of all k-candidates, the klist is extended. 153 * At the end, the longest subsequence is pulled out 154 * and placed in the array J by unravel 155 * 156 * With J in hand, the matches there recorded are 157 * check'ed against reality to assure that no spurious 158 * matches have crept in due to hashing. If they have, 159 * they are broken, and "jackpot" is recorded--a harmless 160 * matter except that a true match for a spuriously 161 * mated line may now be unnecessarily reported as a change. 162 * 163 * Much of the complexity of the program comes simply 164 * from trying to minimize core utilization and 165 * maximize the range of doable problems by dynamically 166 * allocating what is needed and reusing what is not. 167 * The core requirements for problems larger than somewhat 168 * are (in words) 2*length(file0) + length(file1) + 169 * 3*(number of k-candidates installed), typically about 170 * 6n words for files of length n. 171 */ 172 173 struct cand { 174 int x; 175 int y; 176 int pred; 177 }; 178 179 static struct line { 180 int serial; 181 int value; 182 } *file[2]; 183 184 /* 185 * The following struct is used to record change information when 186 * doing a "context" or "unified" diff. (see routine "change" to 187 * understand the highly mnemonic field names) 188 */ 189 struct context_vec { 190 int a; /* start line in old file */ 191 int b; /* end line in old file */ 192 int c; /* start line in new file */ 193 int d; /* end line in new file */ 194 }; 195 196 #define diff_output printf 197 static FILE *opentemp(const char *); 198 static void output(char *, FILE *, char *, FILE *, int); 199 static void check(FILE *, FILE *, int); 200 static void range(int, int, const char *); 201 static void uni_range(int, int); 202 static void dump_context_vec(FILE *, FILE *, int); 203 static void dump_unified_vec(FILE *, FILE *, int); 204 static void prepare(int, FILE *, size_t, int); 205 static void prune(void); 206 static void equiv(struct line *, int, struct line *, int, int *); 207 static void unravel(int); 208 static void unsort(struct line *, int, int *); 209 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 210 static void sort(struct line *, int); 211 static void print_header(const char *, const char *); 212 static int ignoreline(char *); 213 static int asciifile(FILE *); 214 static int fetch(long *, int, int, FILE *, int, int, int); 215 static int newcand(int, int, int); 216 static int search(int *, int, int); 217 static int skipline(FILE *); 218 static int isqrt(int); 219 static int stone(int *, int, int *, int *, int); 220 static int readhash(FILE *, int); 221 static int files_differ(FILE *, FILE *, int); 222 static char *match_function(const long *, int, FILE *); 223 static char *preadline(int, size_t, off_t); 224 225 static int *J; /* will be overlaid on class */ 226 static int *class; /* will be overlaid on file[0] */ 227 static int *klist; /* will be overlaid on file[0] after class */ 228 static int *member; /* will be overlaid on file[1] */ 229 static int clen; 230 static int inifdef; /* whether or not we are in a #ifdef block */ 231 static int len[2]; 232 static int pref, suff; /* length of prefix and suffix */ 233 static int slen[2]; 234 static int anychange; 235 static long *ixnew; /* will be overlaid on file[1] */ 236 static long *ixold; /* will be overlaid on klist */ 237 static struct cand *clist; /* merely a free storage pot for candidates */ 238 static int clistlen; /* the length of clist */ 239 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 240 static u_char *chrtran; /* translation table for case-folding */ 241 static struct context_vec *context_vec_start; 242 static struct context_vec *context_vec_end; 243 static struct context_vec *context_vec_ptr; 244 245 #define FUNCTION_CONTEXT_SIZE 55 246 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 247 static int lastline; 248 static int lastmatchline; 249 250 251 /* 252 * chrtran points to one of 2 translation tables: cup2low if folding upper to 253 * lower case clow2low if not folding case 254 */ 255 static u_char clow2low[256] = { 256 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 257 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 258 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 259 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 260 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 261 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 262 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 263 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 264 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 265 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 266 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 267 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 268 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 269 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 270 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 271 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 272 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 273 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 274 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 275 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 276 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 277 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 278 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 279 0xfd, 0xfe, 0xff 280 }; 281 282 static u_char cup2low[256] = { 283 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 284 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 285 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 286 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 287 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 288 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 289 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 290 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 291 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 292 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 293 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 294 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 295 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 296 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 297 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 298 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 299 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 300 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 301 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 302 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 303 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 304 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 305 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 306 0xfd, 0xfe, 0xff 307 }; 308 309 int 310 diffreg(char *file1, char *file2, int flags, int capsicum) 311 { 312 FILE *f1, *f2; 313 int i, rval; 314 int ostdout = -1; 315 int pr_pd, kq; 316 struct kevent *e; 317 cap_rights_t rights_ro; 318 319 e = NULL; 320 kq = -1; 321 f1 = f2 = NULL; 322 rval = D_SAME; 323 anychange = 0; 324 lastline = 0; 325 lastmatchline = 0; 326 context_vec_ptr = context_vec_start - 1; 327 if (flags & D_IGNORECASE) 328 chrtran = cup2low; 329 else 330 chrtran = clow2low; 331 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 332 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 333 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 334 goto closem; 335 336 if (flags & D_EMPTY1) 337 f1 = fopen(_PATH_DEVNULL, "r"); 338 else { 339 if (!S_ISREG(stb1.st_mode)) { 340 if ((f1 = opentemp(file1)) == NULL || 341 fstat(fileno(f1), &stb1) < 0) { 342 warn("%s", file1); 343 status |= 2; 344 goto closem; 345 } 346 } else if (strcmp(file1, "-") == 0) 347 f1 = stdin; 348 else 349 f1 = fopen(file1, "r"); 350 } 351 if (f1 == NULL) { 352 warn("%s", file1); 353 status |= 2; 354 goto closem; 355 } 356 357 if (flags & D_EMPTY2) 358 f2 = fopen(_PATH_DEVNULL, "r"); 359 else { 360 if (!S_ISREG(stb2.st_mode)) { 361 if ((f2 = opentemp(file2)) == NULL || 362 fstat(fileno(f2), &stb2) < 0) { 363 warn("%s", file2); 364 status |= 2; 365 goto closem; 366 } 367 } else if (strcmp(file2, "-") == 0) 368 f2 = stdin; 369 else 370 f2 = fopen(file2, "r"); 371 } 372 if (f2 == NULL) { 373 warn("%s", file2); 374 status |= 2; 375 goto closem; 376 } 377 378 if (lflag) { 379 /* redirect stdout to pr */ 380 int pfd[2]; 381 pid_t pid; 382 char *header; 383 384 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 385 signal(SIGPIPE, SIG_IGN); 386 fflush(stdout); 387 rewind(stdout); 388 pipe(pfd); 389 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 390 case -1: 391 status |= 2; 392 free(header); 393 err(2, "No more processes"); 394 case 0: 395 /* child */ 396 if (pfd[0] != STDIN_FILENO) { 397 dup2(pfd[0], STDIN_FILENO); 398 close(pfd[0]); 399 } 400 close(pfd[1]); 401 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 402 _exit(127); 403 default: 404 405 /* parent */ 406 if (pfd[1] != STDOUT_FILENO) { 407 ostdout = dup(STDOUT_FILENO); 408 dup2(pfd[1], STDOUT_FILENO); 409 close(pfd[1]); 410 } 411 close(pfd[0]); 412 rewind(stdout); 413 free(header); 414 kq = kqueue(); 415 if (kq == -1) 416 err(2, "kqueue"); 417 e = xmalloc(sizeof(struct kevent)); 418 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 419 NULL); 420 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 421 err(2, "kevent"); 422 } 423 } 424 425 if (capsicum) { 426 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 427 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 428 && errno != ENOSYS) 429 err(2, "unable to limit rights on: %s", file1); 430 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 431 errno != ENOSYS) 432 err(2, "unable to limit rights on: %s", file2); 433 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 434 /* stding has already been limited */ 435 if (caph_limit_stderr() == -1) 436 err(2, "unable to limit stderr"); 437 if (caph_limit_stdout() == -1) 438 err(2, "unable to limit stdout"); 439 } else if (caph_limit_stdio() == -1) 440 err(2, "unable to limit stdio"); 441 442 caph_cache_catpages(); 443 if (cap_enter() < 0 && errno != ENOSYS) 444 err(2, "unable to enter capability mode"); 445 } 446 447 switch (files_differ(f1, f2, flags)) { 448 case 0: 449 goto closem; 450 case 1: 451 break; 452 default: 453 /* error */ 454 status |= 2; 455 goto closem; 456 } 457 458 if ((flags & D_FORCEASCII) == 0 && 459 (!asciifile(f1) || !asciifile(f2))) { 460 rval = D_BINARY; 461 status |= 1; 462 goto closem; 463 } 464 prepare(0, f1, stb1.st_size, flags); 465 prepare(1, f2, stb2.st_size, flags); 466 467 prune(); 468 sort(sfile[0], slen[0]); 469 sort(sfile[1], slen[1]); 470 471 member = (int *)file[1]; 472 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 473 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 474 475 class = (int *)file[0]; 476 unsort(sfile[0], slen[0], class); 477 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 478 479 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 480 clen = 0; 481 clistlen = 100; 482 clist = xcalloc(clistlen, sizeof(*clist)); 483 i = stone(class, slen[0], member, klist, flags); 484 free(member); 485 free(class); 486 487 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 488 unravel(klist[i]); 489 free(clist); 490 free(klist); 491 492 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 493 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 494 check(f1, f2, flags); 495 output(file1, f1, file2, f2, flags); 496 if (ostdout != -1 && e != NULL) { 497 /* close the pipe to pr and restore stdout */ 498 int wstatus; 499 500 fflush(stdout); 501 if (ostdout != STDOUT_FILENO) { 502 close(STDOUT_FILENO); 503 dup2(ostdout, STDOUT_FILENO); 504 close(ostdout); 505 } 506 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 507 err(2, "kevent"); 508 wstatus = e[0].data; 509 close(kq); 510 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 511 errx(2, "pr exited abnormally"); 512 else if (WIFSIGNALED(wstatus)) 513 errx(2, "pr killed by signal %d", 514 WTERMSIG(wstatus)); 515 } 516 517 closem: 518 if (anychange) { 519 status |= 1; 520 if (rval == D_SAME) 521 rval = D_DIFFER; 522 } 523 if (f1 != NULL) 524 fclose(f1); 525 if (f2 != NULL) 526 fclose(f2); 527 528 return (rval); 529 } 530 531 /* 532 * Check to see if the given files differ. 533 * Returns 0 if they are the same, 1 if different, and -1 on error. 534 * XXX - could use code from cmp(1) [faster] 535 */ 536 static int 537 files_differ(FILE *f1, FILE *f2, int flags) 538 { 539 char buf1[BUFSIZ], buf2[BUFSIZ]; 540 size_t i, j; 541 542 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 543 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 544 return (1); 545 for (;;) { 546 i = fread(buf1, 1, sizeof(buf1), f1); 547 j = fread(buf2, 1, sizeof(buf2), f2); 548 if ((!i && ferror(f1)) || (!j && ferror(f2))) 549 return (-1); 550 if (i != j) 551 return (1); 552 if (i == 0) 553 return (0); 554 if (memcmp(buf1, buf2, i) != 0) 555 return (1); 556 } 557 } 558 559 static FILE * 560 opentemp(const char *f) 561 { 562 char buf[BUFSIZ], tempfile[PATH_MAX]; 563 ssize_t nread; 564 int ifd, ofd; 565 566 if (strcmp(f, "-") == 0) 567 ifd = STDIN_FILENO; 568 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 569 return (NULL); 570 571 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 572 573 if ((ofd = mkstemp(tempfile)) < 0) { 574 close(ifd); 575 return (NULL); 576 } 577 unlink(tempfile); 578 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 579 if (write(ofd, buf, nread) != nread) { 580 close(ifd); 581 close(ofd); 582 return (NULL); 583 } 584 } 585 close(ifd); 586 lseek(ofd, (off_t)0, SEEK_SET); 587 return (fdopen(ofd, "r")); 588 } 589 590 char * 591 splice(char *dir, char *path) 592 { 593 char *tail, *buf; 594 size_t dirlen; 595 596 dirlen = strlen(dir); 597 while (dirlen != 0 && dir[dirlen - 1] == '/') 598 dirlen--; 599 if ((tail = strrchr(path, '/')) == NULL) 600 tail = path; 601 else 602 tail++; 603 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 604 return (buf); 605 } 606 607 static void 608 prepare(int i, FILE *fd, size_t filesize, int flags) 609 { 610 struct line *p; 611 int h; 612 size_t sz, j; 613 614 rewind(fd); 615 616 sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; 617 if (sz < 100) 618 sz = 100; 619 620 p = xcalloc(sz + 3, sizeof(*p)); 621 for (j = 0; (h = readhash(fd, flags));) { 622 if (j == sz) { 623 sz = sz * 3 / 2; 624 p = xreallocarray(p, sz + 3, sizeof(*p)); 625 } 626 p[++j].value = h; 627 } 628 len[i] = j; 629 file[i] = p; 630 } 631 632 static void 633 prune(void) 634 { 635 int i, j; 636 637 for (pref = 0; pref < len[0] && pref < len[1] && 638 file[0][pref + 1].value == file[1][pref + 1].value; 639 pref++) 640 ; 641 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 642 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 643 suff++) 644 ; 645 for (j = 0; j < 2; j++) { 646 sfile[j] = file[j] + pref; 647 slen[j] = len[j] - pref - suff; 648 for (i = 0; i <= slen[j]; i++) 649 sfile[j][i].serial = i; 650 } 651 } 652 653 static void 654 equiv(struct line *a, int n, struct line *b, int m, int *c) 655 { 656 int i, j; 657 658 i = j = 1; 659 while (i <= n && j <= m) { 660 if (a[i].value < b[j].value) 661 a[i++].value = 0; 662 else if (a[i].value == b[j].value) 663 a[i++].value = j; 664 else 665 j++; 666 } 667 while (i <= n) 668 a[i++].value = 0; 669 b[m + 1].value = 0; 670 j = 0; 671 while (++j <= m) { 672 c[j] = -b[j].serial; 673 while (b[j + 1].value == b[j].value) { 674 j++; 675 c[j] = b[j].serial; 676 } 677 } 678 c[j] = -1; 679 } 680 681 /* Code taken from ping.c */ 682 static int 683 isqrt(int n) 684 { 685 int y, x = 1; 686 687 if (n == 0) 688 return (0); 689 690 do { /* newton was a stinker */ 691 y = x; 692 x = n / x; 693 x += y; 694 x /= 2; 695 } while ((x - y) > 1 || (x - y) < -1); 696 697 return (x); 698 } 699 700 static int 701 stone(int *a, int n, int *b, int *c, int flags) 702 { 703 int i, k, y, j, l; 704 int oldc, tc, oldl, sq; 705 u_int numtries, bound; 706 707 if (flags & D_MINIMAL) 708 bound = UINT_MAX; 709 else { 710 sq = isqrt(n); 711 bound = MAXIMUM(256, sq); 712 } 713 714 k = 0; 715 c[0] = newcand(0, 0, 0); 716 for (i = 1; i <= n; i++) { 717 j = a[i]; 718 if (j == 0) 719 continue; 720 y = -b[j]; 721 oldl = 0; 722 oldc = c[0]; 723 numtries = 0; 724 do { 725 if (y <= clist[oldc].y) 726 continue; 727 l = search(c, k, y); 728 if (l != oldl + 1) 729 oldc = c[l - 1]; 730 if (l <= k) { 731 if (clist[c[l]].y <= y) 732 continue; 733 tc = c[l]; 734 c[l] = newcand(i, y, oldc); 735 oldc = tc; 736 oldl = l; 737 numtries++; 738 } else { 739 c[l] = newcand(i, y, oldc); 740 k++; 741 break; 742 } 743 } while ((y = b[++j]) > 0 && numtries < bound); 744 } 745 return (k); 746 } 747 748 static int 749 newcand(int x, int y, int pred) 750 { 751 struct cand *q; 752 753 if (clen == clistlen) { 754 clistlen = clistlen * 11 / 10; 755 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 756 } 757 q = clist + clen; 758 q->x = x; 759 q->y = y; 760 q->pred = pred; 761 return (clen++); 762 } 763 764 static int 765 search(int *c, int k, int y) 766 { 767 int i, j, l, t; 768 769 if (clist[c[k]].y < y) /* quick look for typical case */ 770 return (k + 1); 771 i = 0; 772 j = k + 1; 773 for (;;) { 774 l = (i + j) / 2; 775 if (l <= i) 776 break; 777 t = clist[c[l]].y; 778 if (t > y) 779 j = l; 780 else if (t < y) 781 i = l; 782 else 783 return (l); 784 } 785 return (l + 1); 786 } 787 788 static void 789 unravel(int p) 790 { 791 struct cand *q; 792 int i; 793 794 for (i = 0; i <= len[0]; i++) 795 J[i] = i <= pref ? i : 796 i > len[0] - suff ? i + len[1] - len[0] : 0; 797 for (q = clist + p; q->y != 0; q = clist + q->pred) 798 J[q->x + pref] = q->y + pref; 799 } 800 801 /* 802 * Check does double duty: 803 * 1. ferret out any fortuitous correspondences due 804 * to confounding by hashing (which result in "jackpot") 805 * 2. collect random access indexes to the two files 806 */ 807 static void 808 check(FILE *f1, FILE *f2, int flags) 809 { 810 int i, j, jackpot, c, d; 811 long ctold, ctnew; 812 813 rewind(f1); 814 rewind(f2); 815 j = 1; 816 ixold[0] = ixnew[0] = 0; 817 jackpot = 0; 818 ctold = ctnew = 0; 819 for (i = 1; i <= len[0]; i++) { 820 if (J[i] == 0) { 821 ixold[i] = ctold += skipline(f1); 822 continue; 823 } 824 while (j < J[i]) { 825 ixnew[j] = ctnew += skipline(f2); 826 j++; 827 } 828 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 829 for (;;) { 830 c = getc(f1); 831 d = getc(f2); 832 /* 833 * GNU diff ignores a missing newline 834 * in one file for -b or -w. 835 */ 836 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 837 if (c == EOF && d == '\n') { 838 ctnew++; 839 break; 840 } else if (c == '\n' && d == EOF) { 841 ctold++; 842 break; 843 } 844 } 845 ctold++; 846 ctnew++; 847 if (flags & D_STRIPCR) { 848 if (c == '\r') { 849 if ((c = getc(f1)) == '\n') { 850 ctnew++; 851 break; 852 } 853 } 854 if (d == '\r') { 855 if ((d = getc(f2)) == '\n') { 856 ctold++; 857 break; 858 } 859 } 860 } 861 if ((flags & D_FOLDBLANKS) && isspace(c) && 862 isspace(d)) { 863 do { 864 if (c == '\n') 865 break; 866 ctold++; 867 } while (isspace(c = getc(f1))); 868 do { 869 if (d == '\n') 870 break; 871 ctnew++; 872 } while (isspace(d = getc(f2))); 873 } else if ((flags & D_IGNOREBLANKS)) { 874 while (isspace(c) && c != '\n') { 875 c = getc(f1); 876 ctold++; 877 } 878 while (isspace(d) && d != '\n') { 879 d = getc(f2); 880 ctnew++; 881 } 882 } 883 if (chrtran[c] != chrtran[d]) { 884 jackpot++; 885 J[i] = 0; 886 if (c != '\n' && c != EOF) 887 ctold += skipline(f1); 888 if (d != '\n' && c != EOF) 889 ctnew += skipline(f2); 890 break; 891 } 892 if (c == '\n' || c == EOF) 893 break; 894 } 895 } else { 896 for (;;) { 897 ctold++; 898 ctnew++; 899 if ((c = getc(f1)) != (d = getc(f2))) { 900 /* jackpot++; */ 901 J[i] = 0; 902 if (c != '\n' && c != EOF) 903 ctold += skipline(f1); 904 if (d != '\n' && c != EOF) 905 ctnew += skipline(f2); 906 break; 907 } 908 if (c == '\n' || c == EOF) 909 break; 910 } 911 } 912 ixold[i] = ctold; 913 ixnew[j] = ctnew; 914 j++; 915 } 916 for (; j <= len[1]; j++) { 917 ixnew[j] = ctnew += skipline(f2); 918 } 919 /* 920 * if (jackpot) 921 * fprintf(stderr, "jackpot\n"); 922 */ 923 } 924 925 /* shellsort CACM #201 */ 926 static void 927 sort(struct line *a, int n) 928 { 929 struct line *ai, *aim, w; 930 int j, m = 0, k; 931 932 if (n == 0) 933 return; 934 for (j = 1; j <= n; j *= 2) 935 m = 2 * j - 1; 936 for (m /= 2; m != 0; m /= 2) { 937 k = n - m; 938 for (j = 1; j <= k; j++) { 939 for (ai = &a[j]; ai > a; ai -= m) { 940 aim = &ai[m]; 941 if (aim < ai) 942 break; /* wraparound */ 943 if (aim->value > ai[0].value || 944 (aim->value == ai[0].value && 945 aim->serial > ai[0].serial)) 946 break; 947 w.value = ai[0].value; 948 ai[0].value = aim->value; 949 aim->value = w.value; 950 w.serial = ai[0].serial; 951 ai[0].serial = aim->serial; 952 aim->serial = w.serial; 953 } 954 } 955 } 956 } 957 958 static void 959 unsort(struct line *f, int l, int *b) 960 { 961 int *a, i; 962 963 a = xcalloc(l + 1, sizeof(*a)); 964 for (i = 1; i <= l; i++) 965 a[f[i].serial] = f[i].value; 966 for (i = 1; i <= l; i++) 967 b[i] = a[i]; 968 free(a); 969 } 970 971 static int 972 skipline(FILE *f) 973 { 974 int i, c; 975 976 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 977 continue; 978 return (i); 979 } 980 981 static void 982 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 983 { 984 int m, i0, i1, j0, j1; 985 986 rewind(f1); 987 rewind(f2); 988 m = len[0]; 989 J[0] = 0; 990 J[m + 1] = len[1] + 1; 991 if (diff_format != D_EDIT) { 992 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 993 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 994 i0++; 995 j0 = J[i0 - 1] + 1; 996 i1 = i0 - 1; 997 while (i1 < m && J[i1 + 1] == 0) 998 i1++; 999 j1 = J[i1 + 1] - 1; 1000 J[i1] = j1; 1001 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 1002 } 1003 } else { 1004 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 1005 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 1006 i0--; 1007 j0 = J[i0 + 1] - 1; 1008 i1 = i0 + 1; 1009 while (i1 > 1 && J[i1 - 1] == 0) 1010 i1--; 1011 j1 = J[i1 - 1] + 1; 1012 J[i1] = j1; 1013 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1014 } 1015 } 1016 if (m == 0) 1017 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1018 if (diff_format == D_IFDEF) { 1019 for (;;) { 1020 #define c i0 1021 if ((c = getc(f1)) == EOF) 1022 return; 1023 diff_output("%c", c); 1024 } 1025 #undef c 1026 } 1027 if (anychange != 0) { 1028 if (diff_format == D_CONTEXT) 1029 dump_context_vec(f1, f2, flags); 1030 else if (diff_format == D_UNIFIED) 1031 dump_unified_vec(f1, f2, flags); 1032 } 1033 } 1034 1035 static void 1036 range(int a, int b, const char *separator) 1037 { 1038 diff_output("%d", a > b ? b : a); 1039 if (a < b) 1040 diff_output("%s%d", separator, b); 1041 } 1042 1043 static void 1044 uni_range(int a, int b) 1045 { 1046 if (a < b) 1047 diff_output("%d,%d", a, b - a + 1); 1048 else if (a == b) 1049 diff_output("%d", b); 1050 else 1051 diff_output("%d,0", b); 1052 } 1053 1054 static char * 1055 preadline(int fd, size_t rlen, off_t off) 1056 { 1057 char *line; 1058 ssize_t nr; 1059 1060 line = xmalloc(rlen + 1); 1061 if ((nr = pread(fd, line, rlen, off)) < 0) 1062 err(2, "preadline"); 1063 if (nr > 0 && line[nr-1] == '\n') 1064 nr--; 1065 line[nr] = '\0'; 1066 return (line); 1067 } 1068 1069 static int 1070 ignoreline(char *line) 1071 { 1072 int ret; 1073 1074 ret = regexec(&ignore_re, line, 0, NULL, 0); 1075 free(line); 1076 return (ret == 0); /* if it matched, it should be ignored. */ 1077 } 1078 1079 /* 1080 * Indicate that there is a difference between lines a and b of the from file 1081 * to get to lines c to d of the to file. If a is greater then b then there 1082 * are no lines in the from file involved and this means that there were 1083 * lines appended (beginning at b). If c is greater than d then there are 1084 * lines missing from the to file. 1085 */ 1086 static void 1087 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1088 int *pflags) 1089 { 1090 static size_t max_context = 64; 1091 int i; 1092 1093 restart: 1094 if (diff_format != D_IFDEF && a > b && c > d) 1095 return; 1096 if (ignore_pats != NULL) { 1097 char *line; 1098 /* 1099 * All lines in the change, insert, or delete must 1100 * match an ignore pattern for the change to be 1101 * ignored. 1102 */ 1103 if (a <= b) { /* Changes and deletes. */ 1104 for (i = a; i <= b; i++) { 1105 line = preadline(fileno(f1), 1106 ixold[i] - ixold[i - 1], ixold[i - 1]); 1107 if (!ignoreline(line)) 1108 goto proceed; 1109 } 1110 } 1111 if (a > b || c <= d) { /* Changes and inserts. */ 1112 for (i = c; i <= d; i++) { 1113 line = preadline(fileno(f2), 1114 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1115 if (!ignoreline(line)) 1116 goto proceed; 1117 } 1118 } 1119 return; 1120 } 1121 proceed: 1122 if (*pflags & D_HEADER) { 1123 diff_output("%s %s %s\n", diffargs, file1, file2); 1124 *pflags &= ~D_HEADER; 1125 } 1126 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1127 /* 1128 * Allocate change records as needed. 1129 */ 1130 if (context_vec_ptr == context_vec_end - 1) { 1131 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1132 max_context <<= 1; 1133 context_vec_start = xreallocarray(context_vec_start, 1134 max_context, sizeof(*context_vec_start)); 1135 context_vec_end = context_vec_start + max_context; 1136 context_vec_ptr = context_vec_start + offset; 1137 } 1138 if (anychange == 0) { 1139 /* 1140 * Print the context/unidiff header first time through. 1141 */ 1142 print_header(file1, file2); 1143 anychange = 1; 1144 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1145 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1146 /* 1147 * If this change is more than 'diff_context' lines from the 1148 * previous change, dump the record and reset it. 1149 */ 1150 if (diff_format == D_CONTEXT) 1151 dump_context_vec(f1, f2, *pflags); 1152 else 1153 dump_unified_vec(f1, f2, *pflags); 1154 } 1155 context_vec_ptr++; 1156 context_vec_ptr->a = a; 1157 context_vec_ptr->b = b; 1158 context_vec_ptr->c = c; 1159 context_vec_ptr->d = d; 1160 return; 1161 } 1162 if (anychange == 0) 1163 anychange = 1; 1164 switch (diff_format) { 1165 case D_BRIEF: 1166 return; 1167 case D_NORMAL: 1168 case D_EDIT: 1169 range(a, b, ","); 1170 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1171 if (diff_format == D_NORMAL) 1172 range(c, d, ","); 1173 diff_output("\n"); 1174 break; 1175 case D_REVERSE: 1176 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1177 range(a, b, " "); 1178 diff_output("\n"); 1179 break; 1180 case D_NREVERSE: 1181 if (a > b) 1182 diff_output("a%d %d\n", b, d - c + 1); 1183 else { 1184 diff_output("d%d %d\n", a, b - a + 1); 1185 if (!(c > d)) 1186 /* add changed lines */ 1187 diff_output("a%d %d\n", b, d - c + 1); 1188 } 1189 break; 1190 } 1191 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1192 fetch(ixold, a, b, f1, '<', 1, *pflags); 1193 if (a <= b && c <= d && diff_format == D_NORMAL) 1194 diff_output("---\n"); 1195 } 1196 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1197 if (i != 0 && diff_format == D_EDIT) { 1198 /* 1199 * A non-zero return value for D_EDIT indicates that the 1200 * last line printed was a bare dot (".") that has been 1201 * escaped as ".." to prevent ed(1) from misinterpreting 1202 * it. We have to add a substitute command to change this 1203 * back and restart where we left off. 1204 */ 1205 diff_output(".\n"); 1206 diff_output("%ds/.//\n", a + i - 1); 1207 b = a + i - 1; 1208 a = b + 1; 1209 c += i; 1210 goto restart; 1211 } 1212 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1213 diff_output(".\n"); 1214 if (inifdef) { 1215 diff_output("#endif /* %s */\n", ifdefname); 1216 inifdef = 0; 1217 } 1218 } 1219 1220 static int 1221 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1222 { 1223 int i, j, c, lastc, col, nc; 1224 int newcol; 1225 1226 /* 1227 * When doing #ifdef's, copy down to current line 1228 * if this is the first file, so that stuff makes it to output. 1229 */ 1230 if (diff_format == D_IFDEF && oldfile) { 1231 long curpos = ftell(lb); 1232 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1233 nc = f[a > b ? b : a - 1] - curpos; 1234 for (i = 0; i < nc; i++) 1235 diff_output("%c", getc(lb)); 1236 } 1237 if (a > b) 1238 return (0); 1239 if (diff_format == D_IFDEF) { 1240 if (inifdef) { 1241 diff_output("#else /* %s%s */\n", 1242 oldfile == 1 ? "!" : "", ifdefname); 1243 } else { 1244 if (oldfile) 1245 diff_output("#ifndef %s\n", ifdefname); 1246 else 1247 diff_output("#ifdef %s\n", ifdefname); 1248 } 1249 inifdef = 1 + oldfile; 1250 } 1251 for (i = a; i <= b; i++) { 1252 fseek(lb, f[i - 1], SEEK_SET); 1253 nc = f[i] - f[i - 1]; 1254 if (diff_format != D_IFDEF && ch != '\0') { 1255 diff_output("%c", ch); 1256 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1257 || diff_format == D_UNIFIED)) 1258 diff_output("\t"); 1259 else if (diff_format != D_UNIFIED) 1260 diff_output(" "); 1261 } 1262 col = 0; 1263 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1264 if ((c = getc(lb)) == EOF) { 1265 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1266 diff_format == D_NREVERSE) 1267 warnx("No newline at end of file"); 1268 else 1269 diff_output("\n\\ No newline at end of " 1270 "file\n"); 1271 return (0); 1272 } 1273 if (c == '\t' && (flags & D_EXPANDTABS)) { 1274 newcol = ((col/tabsize)+1)*tabsize; 1275 do { 1276 diff_output(" "); 1277 } while (++col < newcol); 1278 } else { 1279 if (diff_format == D_EDIT && j == 1 && c == '\n' 1280 && lastc == '.') { 1281 /* 1282 * Don't print a bare "." line 1283 * since that will confuse ed(1). 1284 * Print ".." instead and return, 1285 * giving the caller an offset 1286 * from which to restart. 1287 */ 1288 diff_output(".\n"); 1289 return (i - a + 1); 1290 } 1291 diff_output("%c", c); 1292 col++; 1293 } 1294 } 1295 } 1296 return (0); 1297 } 1298 1299 /* 1300 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1301 */ 1302 static int 1303 readhash(FILE *f, int flags) 1304 { 1305 int i, t, space; 1306 int sum; 1307 1308 sum = 1; 1309 space = 0; 1310 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1311 if (flags & D_IGNORECASE) 1312 for (i = 0; (t = getc(f)) != '\n'; i++) { 1313 if (flags & D_STRIPCR && t == '\r') { 1314 t = getc(f); 1315 if (t == '\n') 1316 break; 1317 ungetc(t, f); 1318 } 1319 if (t == EOF) { 1320 if (i == 0) 1321 return (0); 1322 break; 1323 } 1324 sum = sum * 127 + chrtran[t]; 1325 } 1326 else 1327 for (i = 0; (t = getc(f)) != '\n'; i++) { 1328 if (flags & D_STRIPCR && t == '\r') { 1329 t = getc(f); 1330 if (t == '\n') 1331 break; 1332 ungetc(t, f); 1333 } 1334 if (t == EOF) { 1335 if (i == 0) 1336 return (0); 1337 break; 1338 } 1339 sum = sum * 127 + t; 1340 } 1341 } else { 1342 for (i = 0;;) { 1343 switch (t = getc(f)) { 1344 case '\r': 1345 case '\t': 1346 case '\v': 1347 case '\f': 1348 case ' ': 1349 space++; 1350 continue; 1351 default: 1352 if (space && (flags & D_IGNOREBLANKS) == 0) { 1353 i++; 1354 space = 0; 1355 } 1356 sum = sum * 127 + chrtran[t]; 1357 i++; 1358 continue; 1359 case EOF: 1360 if (i == 0) 1361 return (0); 1362 /* FALLTHROUGH */ 1363 case '\n': 1364 break; 1365 } 1366 break; 1367 } 1368 } 1369 /* 1370 * There is a remote possibility that we end up with a zero sum. 1371 * Zero is used as an EOF marker, so return 1 instead. 1372 */ 1373 return (sum == 0 ? 1 : sum); 1374 } 1375 1376 static int 1377 asciifile(FILE *f) 1378 { 1379 unsigned char buf[BUFSIZ]; 1380 size_t cnt; 1381 1382 if (f == NULL) 1383 return (1); 1384 1385 rewind(f); 1386 cnt = fread(buf, 1, sizeof(buf), f); 1387 return (memchr(buf, '\0', cnt) == NULL); 1388 } 1389 1390 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1391 1392 static char * 1393 match_function(const long *f, int pos, FILE *fp) 1394 { 1395 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1396 size_t nc; 1397 int last = lastline; 1398 const char *state = NULL; 1399 1400 lastline = pos; 1401 while (pos > last) { 1402 fseek(fp, f[pos - 1], SEEK_SET); 1403 nc = f[pos] - f[pos - 1]; 1404 if (nc >= sizeof(buf)) 1405 nc = sizeof(buf) - 1; 1406 nc = fread(buf, 1, nc, fp); 1407 if (nc > 0) { 1408 buf[nc] = '\0'; 1409 buf[strcspn(buf, "\n")] = '\0'; 1410 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1411 if (begins_with(buf, "private:")) { 1412 if (!state) 1413 state = " (private)"; 1414 } else if (begins_with(buf, "protected:")) { 1415 if (!state) 1416 state = " (protected)"; 1417 } else if (begins_with(buf, "public:")) { 1418 if (!state) 1419 state = " (public)"; 1420 } else { 1421 strlcpy(lastbuf, buf, sizeof lastbuf); 1422 if (state) 1423 strlcat(lastbuf, state, 1424 sizeof lastbuf); 1425 lastmatchline = pos; 1426 return lastbuf; 1427 } 1428 } 1429 } 1430 pos--; 1431 } 1432 return lastmatchline > 0 ? lastbuf : NULL; 1433 } 1434 1435 /* dump accumulated "context" diff changes */ 1436 static void 1437 dump_context_vec(FILE *f1, FILE *f2, int flags) 1438 { 1439 struct context_vec *cvp = context_vec_start; 1440 int lowa, upb, lowc, upd, do_output; 1441 int a, b, c, d; 1442 char ch, *f; 1443 1444 if (context_vec_start > context_vec_ptr) 1445 return; 1446 1447 b = d = 0; /* gcc */ 1448 lowa = MAXIMUM(1, cvp->a - diff_context); 1449 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1450 lowc = MAXIMUM(1, cvp->c - diff_context); 1451 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1452 1453 diff_output("***************"); 1454 if ((flags & D_PROTOTYPE)) { 1455 f = match_function(ixold, lowa-1, f1); 1456 if (f != NULL) 1457 diff_output(" %s", f); 1458 } 1459 diff_output("\n*** "); 1460 range(lowa, upb, ","); 1461 diff_output(" ****\n"); 1462 1463 /* 1464 * Output changes to the "old" file. The first loop suppresses 1465 * output if there were no changes to the "old" file (we'll see 1466 * the "old" lines as context in the "new" list). 1467 */ 1468 do_output = 0; 1469 for (; cvp <= context_vec_ptr; cvp++) 1470 if (cvp->a <= cvp->b) { 1471 cvp = context_vec_start; 1472 do_output++; 1473 break; 1474 } 1475 if (do_output) { 1476 while (cvp <= context_vec_ptr) { 1477 a = cvp->a; 1478 b = cvp->b; 1479 c = cvp->c; 1480 d = cvp->d; 1481 1482 if (a <= b && c <= d) 1483 ch = 'c'; 1484 else 1485 ch = (a <= b) ? 'd' : 'a'; 1486 1487 if (ch == 'a') 1488 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1489 else { 1490 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1491 fetch(ixold, a, b, f1, 1492 ch == 'c' ? '!' : '-', 0, flags); 1493 } 1494 lowa = b + 1; 1495 cvp++; 1496 } 1497 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1498 } 1499 /* output changes to the "new" file */ 1500 diff_output("--- "); 1501 range(lowc, upd, ","); 1502 diff_output(" ----\n"); 1503 1504 do_output = 0; 1505 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1506 if (cvp->c <= cvp->d) { 1507 cvp = context_vec_start; 1508 do_output++; 1509 break; 1510 } 1511 if (do_output) { 1512 while (cvp <= context_vec_ptr) { 1513 a = cvp->a; 1514 b = cvp->b; 1515 c = cvp->c; 1516 d = cvp->d; 1517 1518 if (a <= b && c <= d) 1519 ch = 'c'; 1520 else 1521 ch = (a <= b) ? 'd' : 'a'; 1522 1523 if (ch == 'd') 1524 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1525 else { 1526 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1527 fetch(ixnew, c, d, f2, 1528 ch == 'c' ? '!' : '+', 0, flags); 1529 } 1530 lowc = d + 1; 1531 cvp++; 1532 } 1533 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1534 } 1535 context_vec_ptr = context_vec_start - 1; 1536 } 1537 1538 /* dump accumulated "unified" diff changes */ 1539 static void 1540 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1541 { 1542 struct context_vec *cvp = context_vec_start; 1543 int lowa, upb, lowc, upd; 1544 int a, b, c, d; 1545 char ch, *f; 1546 1547 if (context_vec_start > context_vec_ptr) 1548 return; 1549 1550 b = d = 0; /* gcc */ 1551 lowa = MAXIMUM(1, cvp->a - diff_context); 1552 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context); 1553 lowc = MAXIMUM(1, cvp->c - diff_context); 1554 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context); 1555 1556 diff_output("@@ -"); 1557 uni_range(lowa, upb); 1558 diff_output(" +"); 1559 uni_range(lowc, upd); 1560 diff_output(" @@"); 1561 if ((flags & D_PROTOTYPE)) { 1562 f = match_function(ixold, lowa-1, f1); 1563 if (f != NULL) 1564 diff_output(" %s", f); 1565 } 1566 diff_output("\n"); 1567 1568 /* 1569 * Output changes in "unified" diff format--the old and new lines 1570 * are printed together. 1571 */ 1572 for (; cvp <= context_vec_ptr; cvp++) { 1573 a = cvp->a; 1574 b = cvp->b; 1575 c = cvp->c; 1576 d = cvp->d; 1577 1578 /* 1579 * c: both new and old changes 1580 * d: only changes in the old file 1581 * a: only changes in the new file 1582 */ 1583 if (a <= b && c <= d) 1584 ch = 'c'; 1585 else 1586 ch = (a <= b) ? 'd' : 'a'; 1587 1588 switch (ch) { 1589 case 'c': 1590 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1591 fetch(ixold, a, b, f1, '-', 0, flags); 1592 fetch(ixnew, c, d, f2, '+', 0, flags); 1593 break; 1594 case 'd': 1595 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1596 fetch(ixold, a, b, f1, '-', 0, flags); 1597 break; 1598 case 'a': 1599 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1600 fetch(ixnew, c, d, f2, '+', 0, flags); 1601 break; 1602 } 1603 lowa = b + 1; 1604 lowc = d + 1; 1605 } 1606 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1607 1608 context_vec_ptr = context_vec_start - 1; 1609 } 1610 1611 static void 1612 print_header(const char *file1, const char *file2) 1613 { 1614 const char *time_format; 1615 char buf1[256]; 1616 char buf2[256]; 1617 char end1[10]; 1618 char end2[10]; 1619 struct tm *tm_ptr1, *tm_ptr2; 1620 int nsec1 = TIMESPEC_NS (stb1.st_mtime); 1621 int nsec2 = TIMESPEC_NS (stb2.st_mtime); 1622 1623 #ifdef ST_MTIM_NSEC 1624 time_format = "%Y-%m-%d %H:%M:%S.%N"; 1625 #else 1626 time_format = "%Y-%m-%d %H:%M:%S"; 1627 #endif 1628 1629 if (cflag) 1630 time_format = "%c"; 1631 tm_ptr1 = localtime(&stb1.st_mtime); 1632 tm_ptr2 = localtime(&stb2.st_mtime); 1633 strftime(buf1, 256, time_format, tm_ptr1); 1634 strftime(buf2, 256, time_format, tm_ptr2); 1635 if (!cflag) { 1636 strftime(end1, 10, "%z", tm_ptr1); 1637 strftime(end2, 10, "%z", tm_ptr2); 1638 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1639 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1640 } 1641 if (label[0] != NULL) 1642 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1643 label[0]); 1644 else 1645 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1646 file1, buf1); 1647 if (label[1] != NULL) 1648 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1649 label[1]); 1650 else 1651 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1652 file2, buf2); 1653 } 1654