xref: /freebsd/usr.bin/diff/diffreg.c (revision b03012d0b600793d7501b4cc56757ec6150ec87f)
1  /*	$OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $	*/
2  
3  /*-
4   * SPDX-License-Identifier: BSD-4-Clause
5   *
6   * Copyright (C) Caldera International Inc.  2001-2002.
7   * All rights reserved.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code and documentation must retain the above
13   *    copyright notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. All advertising materials mentioning features or use of this software
18   *    must display the following acknowledgement:
19   *	This product includes software developed or owned by Caldera
20   *	International, Inc.
21   * 4. Neither the name of Caldera International, Inc. nor the names of other
22   *    contributors may be used to endorse or promote products derived from
23   *    this software without specific prior written permission.
24   *
25   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   * POSSIBILITY OF SUCH DAMAGE.
37   */
38  /*-
39   * Copyright (c) 1991, 1993
40   *	The Regents of the University of California.  All rights reserved.
41   *
42   * Redistribution and use in source and binary forms, with or without
43   * modification, are permitted provided that the following conditions
44   * are met:
45   * 1. Redistributions of source code must retain the above copyright
46   *    notice, this list of conditions and the following disclaimer.
47   * 2. Redistributions in binary form must reproduce the above copyright
48   *    notice, this list of conditions and the following disclaimer in the
49   *    documentation and/or other materials provided with the distribution.
50   * 3. Neither the name of the University nor the names of its contributors
51   *    may be used to endorse or promote products derived from this software
52   *    without specific prior written permission.
53   *
54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   *
66   *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
67   */
68  
69  #include <sys/cdefs.h>
70  __FBSDID("$FreeBSD$");
71  
72  #include <sys/capsicum.h>
73  #include <sys/stat.h>
74  
75  #include <capsicum_helpers.h>
76  #include <ctype.h>
77  #include <err.h>
78  #include <errno.h>
79  #include <fcntl.h>
80  #include <math.h>
81  #include <paths.h>
82  #include <regex.h>
83  #include <stdbool.h>
84  #include <stddef.h>
85  #include <stdint.h>
86  #include <stdio.h>
87  #include <stdlib.h>
88  #include <string.h>
89  
90  #include "pr.h"
91  #include "diff.h"
92  #include "xmalloc.h"
93  
94  /*
95   * diff - compare two files.
96   */
97  
98  /*
99   *	Uses an algorithm due to Harold Stone, which finds a pair of longest
100   *	identical subsequences in the two files.
101   *
102   *	The major goal is to generate the match vector J. J[i] is the index of
103   *	the line in file1 corresponding to line i file0. J[i] = 0 if there is no
104   *	such line in file1.
105   *
106   *	Lines are hashed so as to work in core. All potential matches are
107   *	located by sorting the lines of each file on the hash (called
108   *	``value''). In particular, this collects the equivalence classes in
109   *	file1 together. Subroutine equiv replaces the value of each line in
110   *	file0 by the index of the first element of its matching equivalence in
111   *	(the reordered) file1. To save space equiv squeezes file1 into a single
112   *	array member in which the equivalence classes are simply concatenated,
113   *	except that their first members are flagged by changing sign.
114   *
115   *	Next the indices that point into member are unsorted into array class
116   *	according to the original order of file0.
117   *
118   *	The cleverness lies in routine stone. This marches through the lines of
119   *	file0, developing a vector klist of "k-candidates". At step i
120   *	a k-candidate is a matched pair of lines x,y (x in file0 y in file1)
121   *	such that there is a common subsequence of length k between the first
122   *	i lines of file0 and the first y lines of file1, but there is no such
123   *	subsequence for any smaller y. x is the earliest possible mate to y that
124   *	occurs in such a subsequence.
125   *
126   *	Whenever any of the members of the equivalence class of lines in file1
127   *	matable to a line in file0 has serial number less than the y of some
128   *	k-candidate, that k-candidate with the smallest such y is replaced. The
129   *	new k-candidate is chained (via pred) to the current k-1 candidate so
130   *	that the actual subsequence can be recovered. When a member has serial
131   *	number greater that the y of all k-candidates, the klist is extended. At
132   *	the end, the longest subsequence is pulled out and placed in the array J
133   *	by unravel.
134   *
135   *	With J in hand, the matches there recorded are check'ed against reality
136   *	to assure that no spurious matches have crept in due to hashing. If they
137   *	have, they are broken, and "jackpot" is recorded -- a harmless matter
138   *	except that a true match for a spuriously mated line may now be
139   *	unnecessarily reported as a change.
140   *
141   *	Much of the complexity of the program comes simply from trying to
142   *	minimize core utilization and maximize the range of doable problems by
143   *	dynamically allocating what is needed and reusing what is not. The core
144   *	requirements for problems larger than somewhat are (in words)
145   *	2*length(file0) + length(file1) + 3*(number of k-candidates installed),
146   *	typically about 6n words for files of length n.
147   */
148  
149  struct cand {
150  	int	x;
151  	int	y;
152  	int	pred;
153  };
154  
155  static struct line {
156  	int	serial;
157  	int	value;
158  } *file[2];
159  
160  /*
161   * The following struct is used to record change information when
162   * doing a "context" or "unified" diff.  (see routine "change" to
163   * understand the highly mnemonic field names)
164   */
165  struct context_vec {
166  	int	a;		/* start line in old file */
167  	int	b;		/* end line in old file */
168  	int	c;		/* start line in new file */
169  	int	d;		/* end line in new file */
170  };
171  
172  enum readhash { RH_BINARY, RH_OK, RH_EOF };
173  
174  #define MIN_PAD		1
175  static FILE	*opentemp(const char *);
176  static void	 output(char *, FILE *, char *, FILE *, int);
177  static void	 check(FILE *, FILE *, int);
178  static void	 range(int, int, const char *);
179  static void	 uni_range(int, int);
180  static void	 dump_context_vec(FILE *, FILE *, int);
181  static void	 dump_unified_vec(FILE *, FILE *, int);
182  static bool	 prepare(int, FILE *, size_t, int);
183  static void	 prune(void);
184  static void	 equiv(struct line *, int, struct line *, int, int *);
185  static void	 unravel(int);
186  static void	 unsort(struct line *, int, int *);
187  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
188  static void	 sort(struct line *, int);
189  static void	 print_header(const char *, const char *);
190  static void	 print_space(int, int, int);
191  static bool	 ignoreline_pattern(char *);
192  static bool	 ignoreline(char *, bool);
193  static int	 asciifile(FILE *);
194  static int	 fetch(long *, int, int, FILE *, int, int, int);
195  static int	 newcand(int, int, int);
196  static int	 search(int *, int, int);
197  static int	 skipline(FILE *);
198  static int	 stone(int *, int, int *, int *, int);
199  static enum readhash readhash(FILE *, int, unsigned *);
200  static int	 files_differ(FILE *, FILE *, int);
201  static char	*match_function(const long *, int, FILE *);
202  static char	*preadline(int, size_t, off_t);
203  
204  static int	 *J;			/* will be overlaid on class */
205  static int	 *class;		/* will be overlaid on file[0] */
206  static int	 *klist;		/* will be overlaid on file[0] after class */
207  static int	 *member;		/* will be overlaid on file[1] */
208  static int	 clen;
209  static int	 inifdef;		/* whether or not we are in a #ifdef block */
210  static int	 len[2];
211  static int	 pref, suff;	/* length of prefix and suffix */
212  static int	 slen[2];
213  static int	 anychange;
214  static int	 hw, padding;	/* half width and padding */
215  static int	 edoffset;
216  static long	*ixnew;		/* will be overlaid on file[1] */
217  static long	*ixold;		/* will be overlaid on klist */
218  static struct cand *clist;	/* merely a free storage pot for candidates */
219  static int	 clistlen;		/* the length of clist */
220  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
221  static int	(*chrtran)(int);	/* translation table for case-folding */
222  static struct context_vec *context_vec_start;
223  static struct context_vec *context_vec_end;
224  static struct context_vec *context_vec_ptr;
225  
226  #define FUNCTION_CONTEXT_SIZE	55
227  static char lastbuf[FUNCTION_CONTEXT_SIZE];
228  static int lastline;
229  static int lastmatchline;
230  
231  static int
232  clow2low(int c)
233  {
234  
235  	return (c);
236  }
237  
238  static int
239  cup2low(int c)
240  {
241  
242  	return (tolower(c));
243  }
244  
245  int
246  diffreg(char *file1, char *file2, int flags, int capsicum)
247  {
248  	FILE *f1, *f2;
249  	int i, rval;
250  	struct pr *pr = NULL;
251  	cap_rights_t rights_ro;
252  
253  	f1 = f2 = NULL;
254  	rval = D_SAME;
255  	anychange = 0;
256  	lastline = 0;
257  	lastmatchline = 0;
258  
259  	 /*
260  	  * hw excludes padding and make sure when -t is not used,
261  	  * the second column always starts from the closest tab stop
262  	  */
263  	if (diff_format == D_SIDEBYSIDE) {
264  		hw = width >> 1;
265  		padding = tabsize - (hw % tabsize);
266  		if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0))
267  			padding = MIN_PAD;
268  
269  		hw = (width >> 1) -
270  		    ((padding == MIN_PAD) ? (padding << 1) : padding) - 1;
271  	}
272  
273  
274  	if (flags & D_IGNORECASE)
275  		chrtran = cup2low;
276  	else
277  		chrtran = clow2low;
278  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
279  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
280  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
281  		goto closem;
282  
283  	if (flags & D_EMPTY1)
284  		f1 = fopen(_PATH_DEVNULL, "r");
285  	else {
286  		if (!S_ISREG(stb1.st_mode)) {
287  			if ((f1 = opentemp(file1)) == NULL ||
288  			    fstat(fileno(f1), &stb1) == -1) {
289  				warn("%s", file1);
290  				rval = D_ERROR;
291  				status |= 2;
292  				goto closem;
293  			}
294  		} else if (strcmp(file1, "-") == 0)
295  			f1 = stdin;
296  		else
297  			f1 = fopen(file1, "r");
298  	}
299  	if (f1 == NULL) {
300  		warn("%s", file1);
301  		rval = D_ERROR;
302  		status |= 2;
303  		goto closem;
304  	}
305  
306  	if (flags & D_EMPTY2)
307  		f2 = fopen(_PATH_DEVNULL, "r");
308  	else {
309  		if (!S_ISREG(stb2.st_mode)) {
310  			if ((f2 = opentemp(file2)) == NULL ||
311  			    fstat(fileno(f2), &stb2) == -1) {
312  				warn("%s", file2);
313  				rval = D_ERROR;
314  				status |= 2;
315  				goto closem;
316  			}
317  		} else if (strcmp(file2, "-") == 0)
318  			f2 = stdin;
319  		else
320  			f2 = fopen(file2, "r");
321  	}
322  	if (f2 == NULL) {
323  		warn("%s", file2);
324  		rval = D_ERROR;
325  		status |= 2;
326  		goto closem;
327  	}
328  
329  	if (lflag)
330  		pr = start_pr(file1, file2);
331  
332  	if (capsicum) {
333  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
334  		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
335  			err(2, "unable to limit rights on: %s", file1);
336  		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
337  			err(2, "unable to limit rights on: %s", file2);
338  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
339  			/* stdin has already been limited */
340  			if (caph_limit_stderr() == -1)
341  				err(2, "unable to limit stderr");
342  			if (caph_limit_stdout() == -1)
343  				err(2, "unable to limit stdout");
344  		} else if (caph_limit_stdio() == -1)
345  				err(2, "unable to limit stdio");
346  
347  		caph_cache_catpages();
348  		caph_cache_tzdata();
349  		if (caph_enter() < 0)
350  			err(2, "unable to enter capability mode");
351  	}
352  
353  	switch (files_differ(f1, f2, flags)) {
354  	case 0:
355  		goto closem;
356  	case 1:
357  		break;
358  	default:
359  		/* error */
360  		rval = D_ERROR;
361  		status |= 2;
362  		goto closem;
363  	}
364  
365  	if (diff_format == D_BRIEF && ignore_pats == NULL &&
366  	    (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) == 0)
367  	{
368  		rval = D_DIFFER;
369  		status |= 1;
370  		goto closem;
371  	}
372  	if ((flags & D_FORCEASCII) != 0) {
373  		(void)prepare(0, f1, stb1.st_size, flags);
374  		(void)prepare(1, f2, stb2.st_size, flags);
375  	} else if (!asciifile(f1) || !asciifile(f2) ||
376  		    !prepare(0, f1, stb1.st_size, flags) ||
377  		    !prepare(1, f2, stb2.st_size, flags)) {
378  		rval = D_BINARY;
379  		status |= 1;
380  		goto closem;
381  	}
382  
383  	prune();
384  	sort(sfile[0], slen[0]);
385  	sort(sfile[1], slen[1]);
386  
387  	member = (int *)file[1];
388  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
389  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
390  
391  	class = (int *)file[0];
392  	unsort(sfile[0], slen[0], class);
393  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
394  
395  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
396  	clen = 0;
397  	clistlen = 100;
398  	clist = xcalloc(clistlen, sizeof(*clist));
399  	i = stone(class, slen[0], member, klist, flags);
400  	free(member);
401  	free(class);
402  
403  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
404  	unravel(klist[i]);
405  	free(clist);
406  	free(klist);
407  
408  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
409  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
410  	check(f1, f2, flags);
411  	output(file1, f1, file2, f2, flags);
412  
413  closem:
414  	if (pr != NULL)
415  		stop_pr(pr);
416  	if (anychange) {
417  		status |= 1;
418  		if (rval == D_SAME)
419  			rval = D_DIFFER;
420  	}
421  	if (f1 != NULL)
422  		fclose(f1);
423  	if (f2 != NULL)
424  		fclose(f2);
425  
426  	return (rval);
427  }
428  
429  /*
430   * Check to see if the given files differ.
431   * Returns 0 if they are the same, 1 if different, and -1 on error.
432   * XXX - could use code from cmp(1) [faster]
433   */
434  static int
435  files_differ(FILE *f1, FILE *f2, int flags)
436  {
437  	char buf1[BUFSIZ], buf2[BUFSIZ];
438  	size_t i, j;
439  
440  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
441  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
442  		return (1);
443  
444  	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
445  		return (0);
446  
447  	for (;;) {
448  		i = fread(buf1, 1, sizeof(buf1), f1);
449  		j = fread(buf2, 1, sizeof(buf2), f2);
450  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
451  			return (-1);
452  		if (i != j)
453  			return (1);
454  		if (i == 0)
455  			return (0);
456  		if (memcmp(buf1, buf2, i) != 0)
457  			return (1);
458  	}
459  }
460  
461  static FILE *
462  opentemp(const char *f)
463  {
464  	char buf[BUFSIZ], tempfile[PATH_MAX];
465  	ssize_t nread;
466  	int ifd, ofd;
467  
468  	if (strcmp(f, "-") == 0)
469  		ifd = STDIN_FILENO;
470  	else if ((ifd = open(f, O_RDONLY, 0644)) == -1)
471  		return (NULL);
472  
473  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
474  
475  	if ((ofd = mkstemp(tempfile)) == -1) {
476  		close(ifd);
477  		return (NULL);
478  	}
479  	unlink(tempfile);
480  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
481  		if (write(ofd, buf, nread) != nread) {
482  			close(ifd);
483  			close(ofd);
484  			return (NULL);
485  		}
486  	}
487  	close(ifd);
488  	lseek(ofd, (off_t)0, SEEK_SET);
489  	return (fdopen(ofd, "r"));
490  }
491  
492  static bool
493  prepare(int i, FILE *fd, size_t filesize, int flags)
494  {
495  	struct line *p;
496  	unsigned h;
497  	size_t sz, j = 0;
498  	enum readhash r;
499  
500  	rewind(fd);
501  
502  	sz = MIN(filesize, SIZE_MAX) / 25;
503  	if (sz < 100)
504  		sz = 100;
505  
506  	p = xcalloc(sz + 3, sizeof(*p));
507  	while ((r = readhash(fd, flags, &h)) != RH_EOF)
508  		switch (r) {
509  		case RH_EOF: /* otherwise clang complains */
510  		case RH_BINARY:
511  			return (false);
512  		case RH_OK:
513  			if (j == sz) {
514  				sz = sz * 3 / 2;
515  				p = xreallocarray(p, sz + 3, sizeof(*p));
516  			}
517  			p[++j].value = h;
518  		}
519  
520  	len[i] = j;
521  	file[i] = p;
522  
523  	return (true);
524  }
525  
526  static void
527  prune(void)
528  {
529  	int i, j;
530  
531  	for (pref = 0; pref < len[0] && pref < len[1] &&
532  	    file[0][pref + 1].value == file[1][pref + 1].value;
533  	    pref++)
534  		;
535  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
536  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
537  	    suff++)
538  		;
539  	for (j = 0; j < 2; j++) {
540  		sfile[j] = file[j] + pref;
541  		slen[j] = len[j] - pref - suff;
542  		for (i = 0; i <= slen[j]; i++)
543  			sfile[j][i].serial = i;
544  	}
545  }
546  
547  static void
548  equiv(struct line *a, int n, struct line *b, int m, int *c)
549  {
550  	int i, j;
551  
552  	i = j = 1;
553  	while (i <= n && j <= m) {
554  		if (a[i].value < b[j].value)
555  			a[i++].value = 0;
556  		else if (a[i].value == b[j].value)
557  			a[i++].value = j;
558  		else
559  			j++;
560  	}
561  	while (i <= n)
562  		a[i++].value = 0;
563  	b[m + 1].value = 0;
564  	j = 0;
565  	while (++j <= m) {
566  		c[j] = -b[j].serial;
567  		while (b[j + 1].value == b[j].value) {
568  			j++;
569  			c[j] = b[j].serial;
570  		}
571  	}
572  	c[j] = -1;
573  }
574  
575  static int
576  stone(int *a, int n, int *b, int *c, int flags)
577  {
578  	int i, k, y, j, l;
579  	int oldc, tc, oldl, sq;
580  	unsigned numtries, bound;
581  
582  	if (flags & D_MINIMAL)
583  		bound = UINT_MAX;
584  	else {
585  		sq = sqrt(n);
586  		bound = MAX(256, sq);
587  	}
588  
589  	k = 0;
590  	c[0] = newcand(0, 0, 0);
591  	for (i = 1; i <= n; i++) {
592  		j = a[i];
593  		if (j == 0)
594  			continue;
595  		y = -b[j];
596  		oldl = 0;
597  		oldc = c[0];
598  		numtries = 0;
599  		do {
600  			if (y <= clist[oldc].y)
601  				continue;
602  			l = search(c, k, y);
603  			if (l != oldl + 1)
604  				oldc = c[l - 1];
605  			if (l <= k) {
606  				if (clist[c[l]].y <= y)
607  					continue;
608  				tc = c[l];
609  				c[l] = newcand(i, y, oldc);
610  				oldc = tc;
611  				oldl = l;
612  				numtries++;
613  			} else {
614  				c[l] = newcand(i, y, oldc);
615  				k++;
616  				break;
617  			}
618  		} while ((y = b[++j]) > 0 && numtries < bound);
619  	}
620  	return (k);
621  }
622  
623  static int
624  newcand(int x, int y, int pred)
625  {
626  	struct cand *q;
627  
628  	if (clen == clistlen) {
629  		clistlen = clistlen * 11 / 10;
630  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
631  	}
632  	q = clist + clen;
633  	q->x = x;
634  	q->y = y;
635  	q->pred = pred;
636  	return (clen++);
637  }
638  
639  static int
640  search(int *c, int k, int y)
641  {
642  	int i, j, l, t;
643  
644  	if (clist[c[k]].y < y)	/* quick look for typical case */
645  		return (k + 1);
646  	i = 0;
647  	j = k + 1;
648  	for (;;) {
649  		l = (i + j) / 2;
650  		if (l <= i)
651  			break;
652  		t = clist[c[l]].y;
653  		if (t > y)
654  			j = l;
655  		else if (t < y)
656  			i = l;
657  		else
658  			return (l);
659  	}
660  	return (l + 1);
661  }
662  
663  static void
664  unravel(int p)
665  {
666  	struct cand *q;
667  	int i;
668  
669  	for (i = 0; i <= len[0]; i++)
670  		J[i] = i <= pref ? i :
671  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
672  	for (q = clist + p; q->y != 0; q = clist + q->pred)
673  		J[q->x + pref] = q->y + pref;
674  }
675  
676  /*
677   * Check does double duty:
678   *  1. ferret out any fortuitous correspondences due to confounding by
679   *     hashing (which result in "jackpot")
680   *  2. collect random access indexes to the two files
681   */
682  static void
683  check(FILE *f1, FILE *f2, int flags)
684  {
685  	int i, j, /* jackpot, */ c, d;
686  	long ctold, ctnew;
687  
688  	rewind(f1);
689  	rewind(f2);
690  	j = 1;
691  	ixold[0] = ixnew[0] = 0;
692  	/* jackpot = 0; */
693  	ctold = ctnew = 0;
694  	for (i = 1; i <= len[0]; i++) {
695  		if (J[i] == 0) {
696  			ixold[i] = ctold += skipline(f1);
697  			continue;
698  		}
699  		while (j < J[i]) {
700  			ixnew[j] = ctnew += skipline(f2);
701  			j++;
702  		}
703  		if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) {
704  			for (;;) {
705  				c = getc(f1);
706  				d = getc(f2);
707  				/*
708  				 * GNU diff ignores a missing newline
709  				 * in one file for -b or -w.
710  				 */
711  				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
712  					if (c == EOF && d == '\n') {
713  						ctnew++;
714  						break;
715  					} else if (c == '\n' && d == EOF) {
716  						ctold++;
717  						break;
718  					}
719  				}
720  				ctold++;
721  				ctnew++;
722  				if (flags & D_STRIPCR && (c == '\r' || d == '\r')) {
723  					if (c == '\r') {
724  						if ((c = getc(f1)) == '\n') {
725  							ctold++;
726  						} else {
727  							ungetc(c, f1);
728  						}
729  					}
730  					if (d == '\r') {
731  						if ((d = getc(f2)) == '\n') {
732  							ctnew++;
733  						} else {
734  							ungetc(d, f2);
735  						}
736  					}
737  					break;
738  				}
739  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
740  				    isspace(d)) {
741  					do {
742  						if (c == '\n')
743  							break;
744  						ctold++;
745  					} while (isspace(c = getc(f1)));
746  					do {
747  						if (d == '\n')
748  							break;
749  						ctnew++;
750  					} while (isspace(d = getc(f2)));
751  				} else if (flags & D_IGNOREBLANKS) {
752  					while (isspace(c) && c != '\n') {
753  						c = getc(f1);
754  						ctold++;
755  					}
756  					while (isspace(d) && d != '\n') {
757  						d = getc(f2);
758  						ctnew++;
759  					}
760  				}
761  				if (chrtran(c) != chrtran(d)) {
762  					/* jackpot++; */
763  					J[i] = 0;
764  					if (c != '\n' && c != EOF)
765  						ctold += skipline(f1);
766  					if (d != '\n' && c != EOF)
767  						ctnew += skipline(f2);
768  					break;
769  				}
770  				if (c == '\n' || c == EOF)
771  					break;
772  			}
773  		} else {
774  			for (;;) {
775  				ctold++;
776  				ctnew++;
777  				if ((c = getc(f1)) != (d = getc(f2))) {
778  					/* jackpot++; */
779  					J[i] = 0;
780  					if (c != '\n' && c != EOF)
781  						ctold += skipline(f1);
782  					if (d != '\n' && c != EOF)
783  						ctnew += skipline(f2);
784  					break;
785  				}
786  				if (c == '\n' || c == EOF)
787  					break;
788  			}
789  		}
790  		ixold[i] = ctold;
791  		ixnew[j] = ctnew;
792  		j++;
793  	}
794  	for (; j <= len[1]; j++) {
795  		ixnew[j] = ctnew += skipline(f2);
796  	}
797  	/*
798  	 * if (jackpot)
799  	 *	fprintf(stderr, "jackpot\n");
800  	 */
801  }
802  
803  /* shellsort CACM #201 */
804  static void
805  sort(struct line *a, int n)
806  {
807  	struct line *ai, *aim, w;
808  	int j, m = 0, k;
809  
810  	if (n == 0)
811  		return;
812  	for (j = 1; j <= n; j *= 2)
813  		m = 2 * j - 1;
814  	for (m /= 2; m != 0; m /= 2) {
815  		k = n - m;
816  		for (j = 1; j <= k; j++) {
817  			for (ai = &a[j]; ai > a; ai -= m) {
818  				aim = &ai[m];
819  				if (aim < ai)
820  					break;	/* wraparound */
821  				if (aim->value > ai[0].value ||
822  				    (aim->value == ai[0].value &&
823  					aim->serial > ai[0].serial))
824  					break;
825  				w.value = ai[0].value;
826  				ai[0].value = aim->value;
827  				aim->value = w.value;
828  				w.serial = ai[0].serial;
829  				ai[0].serial = aim->serial;
830  				aim->serial = w.serial;
831  			}
832  		}
833  	}
834  }
835  
836  static void
837  unsort(struct line *f, int l, int *b)
838  {
839  	int *a, i;
840  
841  	a = xcalloc(l + 1, sizeof(*a));
842  	for (i = 1; i <= l; i++)
843  		a[f[i].serial] = f[i].value;
844  	for (i = 1; i <= l; i++)
845  		b[i] = a[i];
846  	free(a);
847  }
848  
849  static int
850  skipline(FILE *f)
851  {
852  	int i, c;
853  
854  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
855  		continue;
856  	return (i);
857  }
858  
859  static void
860  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
861  {
862  	int i, j, m, i0, i1, j0, j1, nc;
863  
864  	rewind(f1);
865  	rewind(f2);
866  	m = len[0];
867  	J[0] = 0;
868  	J[m + 1] = len[1] + 1;
869  	if (diff_format != D_EDIT) {
870  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
871  			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
872  				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
873  					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
874  					print_space(nc, (hw - nc) + (padding << 1) + 1, flags);
875  					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
876  					printf("\n");
877  				}
878  				i0++;
879  			}
880  			j0 = J[i0 - 1] + 1;
881  			i1 = i0 - 1;
882  			while (i1 < m && J[i1 + 1] == 0)
883  				i1++;
884  			j1 = J[i1 + 1] - 1;
885  			J[i1] = j1;
886  
887  			/*
888  			 * When using side-by-side, lines from both of the files are
889  			 * printed. The algorithm used by diff(1) identifies the ranges
890  			 * in which two files differ.
891  			 * See the change() function below.
892  			 * The for loop below consumes the shorter range, whereas one of
893  			 * the while loops deals with the longer one.
894  			 */
895  			if (diff_format == D_SIDEBYSIDE) {
896  				for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++)
897  					change(file1, f1, file2, f2, i, i, j, j, &flags);
898  
899  				while (i <= i1) {
900  					change(file1, f1, file2, f2, i, i, j + 1, j, &flags);
901  					i++;
902  				}
903  
904  				while (j <= j1) {
905  					change(file1, f1, file2, f2, i + 1, i, j, j, &flags);
906  					j++;
907  				}
908  			} else
909  				change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
910  		}
911  	} else {
912  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
913  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
914  				i0--;
915  			j0 = J[i0 + 1] - 1;
916  			i1 = i0 + 1;
917  			while (i1 > 1 && J[i1 - 1] == 0)
918  				i1--;
919  			j1 = J[i1 - 1] + 1;
920  			J[i1] = j1;
921  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
922  		}
923  	}
924  	if (m == 0)
925  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
926  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
927  		for (;;) {
928  #define	c i0
929  			if ((c = getc(f1)) == EOF)
930  				return;
931  			printf("%c", c);
932  		}
933  #undef c
934  	}
935  	if (anychange != 0) {
936  		if (diff_format == D_CONTEXT)
937  			dump_context_vec(f1, f2, flags);
938  		else if (diff_format == D_UNIFIED)
939  			dump_unified_vec(f1, f2, flags);
940  	}
941  }
942  
943  static void
944  range(int a, int b, const char *separator)
945  {
946  	printf("%d", a > b ? b : a);
947  	if (a < b)
948  		printf("%s%d", separator, b);
949  }
950  
951  static void
952  uni_range(int a, int b)
953  {
954  	if (a < b)
955  		printf("%d,%d", a, b - a + 1);
956  	else if (a == b)
957  		printf("%d", b);
958  	else
959  		printf("%d,0", b);
960  }
961  
962  static char *
963  preadline(int fd, size_t rlen, off_t off)
964  {
965  	char *line;
966  	ssize_t nr;
967  
968  	line = xmalloc(rlen + 1);
969  	if ((nr = pread(fd, line, rlen, off)) == -1)
970  		err(2, "preadline");
971  	if (nr > 0 && line[nr-1] == '\n')
972  		nr--;
973  	line[nr] = '\0';
974  	return (line);
975  }
976  
977  static bool
978  ignoreline_pattern(char *line)
979  {
980  	int ret;
981  
982  	ret = regexec(&ignore_re, line, 0, NULL, 0);
983  	return (ret == 0);	/* if it matched, it should be ignored. */
984  }
985  
986  static bool
987  ignoreline(char *line, bool skip_blanks)
988  {
989  
990  	if (skip_blanks && *line == '\0')
991  		return (true);
992  	if (ignore_pats != NULL && ignoreline_pattern(line))
993  		return (true);
994  	return (false);
995  }
996  
997  /*
998   * Indicate that there is a difference between lines a and b of the from file
999   * to get to lines c to d of the to file.  If a is greater then b then there
1000   * are no lines in the from file involved and this means that there were
1001   * lines appended (beginning at b).  If c is greater than d then there are
1002   * lines missing from the to file.
1003   */
1004  static void
1005  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1006      int *pflags)
1007  {
1008  	static size_t max_context = 64;
1009  	long curpos;
1010  	int i, nc;
1011  	const char *walk;
1012  	bool skip_blanks, ignore;
1013  
1014  	skip_blanks = (*pflags & D_SKIPBLANKLINES);
1015  restart:
1016  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1017  	    a > b && c > d)
1018  		return;
1019  	if (ignore_pats != NULL || skip_blanks) {
1020  		char *line;
1021  		/*
1022  		 * All lines in the change, insert, or delete must match an ignore
1023  		 * pattern for the change to be ignored.
1024  		 */
1025  		if (a <= b) {		/* Changes and deletes. */
1026  			for (i = a; i <= b; i++) {
1027  				line = preadline(fileno(f1),
1028  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1029  				ignore = ignoreline(line, skip_blanks);
1030  				free(line);
1031  				if (!ignore)
1032  					goto proceed;
1033  			}
1034  		}
1035  		if (a > b || c <= d) {	/* Changes and inserts. */
1036  			for (i = c; i <= d; i++) {
1037  				line = preadline(fileno(f2),
1038  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1039  				ignore = ignoreline(line, skip_blanks);
1040  				free(line);
1041  				if (!ignore)
1042  					goto proceed;
1043  			}
1044  		}
1045  		return;
1046  	}
1047  proceed:
1048  	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1049  		printf("%s %s %s\n", diffargs, file1, file2);
1050  		*pflags &= ~D_HEADER;
1051  	}
1052  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1053  		/*
1054  		 * Allocate change records as needed.
1055  		 */
1056  		if (context_vec_start == NULL ||
1057  		    context_vec_ptr == context_vec_end - 1) {
1058  			ptrdiff_t offset = -1;
1059  
1060  			if (context_vec_start != NULL)
1061  				offset = context_vec_ptr - context_vec_start;
1062  			max_context <<= 1;
1063  			context_vec_start = xreallocarray(context_vec_start,
1064  			    max_context, sizeof(*context_vec_start));
1065  			context_vec_end = context_vec_start + max_context;
1066  			context_vec_ptr = context_vec_start + offset;
1067  		}
1068  		if (anychange == 0) {
1069  			/*
1070  			 * Print the context/unidiff header first time through.
1071  			 */
1072  			print_header(file1, file2);
1073  			anychange = 1;
1074  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1075  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1076  			/*
1077  			 * If this change is more than 'diff_context' lines from the
1078  			 * previous change, dump the record and reset it.
1079  			 */
1080  			if (diff_format == D_CONTEXT)
1081  				dump_context_vec(f1, f2, *pflags);
1082  			else
1083  				dump_unified_vec(f1, f2, *pflags);
1084  		}
1085  		context_vec_ptr++;
1086  		context_vec_ptr->a = a;
1087  		context_vec_ptr->b = b;
1088  		context_vec_ptr->c = c;
1089  		context_vec_ptr->d = d;
1090  		return;
1091  	}
1092  	if (anychange == 0)
1093  		anychange = 1;
1094  	switch (diff_format) {
1095  	case D_BRIEF:
1096  		return;
1097  	case D_NORMAL:
1098  	case D_EDIT:
1099  		range(a, b, ",");
1100  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1101  		if (diff_format == D_NORMAL)
1102  			range(c, d, ",");
1103  		printf("\n");
1104  		break;
1105  	case D_REVERSE:
1106  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1107  		range(a, b, " ");
1108  		printf("\n");
1109  		break;
1110  	case D_NREVERSE:
1111  		if (a > b)
1112  			printf("a%d %d\n", b, d - c + 1);
1113  		else {
1114  			printf("d%d %d\n", a, b - a + 1);
1115  			if (!(c > d))
1116  				/* add changed lines */
1117  				printf("a%d %d\n", b, d - c + 1);
1118  		}
1119  		break;
1120  	}
1121  	if (diff_format == D_GFORMAT) {
1122  		curpos = ftell(f1);
1123  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1124  		nc = ixold[a > b ? b : a - 1] - curpos;
1125  		for (i = 0; i < nc; i++)
1126  			printf("%c", getc(f1));
1127  		for (walk = group_format; *walk != '\0'; walk++) {
1128  			if (*walk == '%') {
1129  				walk++;
1130  				switch (*walk) {
1131  				case '<':
1132  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1133  					break;
1134  				case '>':
1135  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1136  					break;
1137  				default:
1138  					printf("%%%c", *walk);
1139  					break;
1140  				}
1141  				continue;
1142  			}
1143  			printf("%c", *walk);
1144  		}
1145  	}
1146  	if (diff_format == D_SIDEBYSIDE) {
1147  		if (color && a > b)
1148  			printf("\033[%sm", add_code);
1149  		else if (color && c > d)
1150  			printf("\033[%sm", del_code);
1151  		if (a > b) {
1152  			print_space(0, hw + padding , *pflags);
1153  		} else {
1154  			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
1155  			print_space(nc, hw - nc + padding, *pflags);
1156  		}
1157  		if (color && a > b)
1158  			printf("\033[%sm", add_code);
1159  		else if (color && c > d)
1160  			printf("\033[%sm", del_code);
1161  		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
1162  		if (color && c > d)
1163  			printf("\033[m");
1164  		print_space(hw + padding + 1 , padding, *pflags);
1165  		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
1166  		printf("\n");
1167  	}
1168  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1169  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1170  		if (a <= b && c <= d && diff_format == D_NORMAL)
1171  			printf("---\n");
1172  	}
1173  	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
1174  		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1175  	if (edoffset != 0 && diff_format == D_EDIT) {
1176  		/*
1177  		 * A non-zero edoffset value for D_EDIT indicates that the last line
1178  		 * printed was a bare dot (".") that has been escaped as ".." to
1179  		 * prevent ed(1) from misinterpreting it.  We have to add a
1180  		 * substitute command to change this back and restart where we left
1181  		 * off.
1182  		 */
1183  		printf(".\n");
1184  		printf("%ds/.//\n", a + edoffset - 1);
1185  		b = a + edoffset - 1;
1186  		a = b + 1;
1187  		c += edoffset;
1188  		goto restart;
1189  	}
1190  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1191  		printf(".\n");
1192  	if (inifdef) {
1193  		printf("#endif /* %s */\n", ifdefname);
1194  		inifdef = 0;
1195  	}
1196  }
1197  
1198  static int
1199  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1200  {
1201  	int i, j, c, lastc, col, nc, newcol;
1202  
1203  	edoffset = 0;
1204  	nc = 0;
1205  	/*
1206  	 * When doing #ifdef's, copy down to current line
1207  	 * if this is the first file, so that stuff makes it to output.
1208  	 */
1209  	if ((diff_format == D_IFDEF) && oldfile) {
1210  		long curpos = ftell(lb);
1211  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1212  		nc = f[a > b ? b : a - 1] - curpos;
1213  		for (i = 0; i < nc; i++)
1214  			printf("%c", getc(lb));
1215  	}
1216  	if (a > b)
1217  		return (0);
1218  	if (diff_format == D_IFDEF) {
1219  		if (inifdef) {
1220  			printf("#else /* %s%s */\n",
1221  			    oldfile == 1 ? "!" : "", ifdefname);
1222  		} else {
1223  			if (oldfile)
1224  				printf("#ifndef %s\n", ifdefname);
1225  			else
1226  				printf("#ifdef %s\n", ifdefname);
1227  		}
1228  		inifdef = 1 + oldfile;
1229  	}
1230  	for (i = a; i <= b; i++) {
1231  		fseek(lb, f[i - 1], SEEK_SET);
1232  		nc = f[i] - f[i - 1];
1233  		if (diff_format == D_SIDEBYSIDE && hw < nc)
1234  			nc = hw;
1235  		if (diff_format != D_IFDEF && diff_format != D_GFORMAT &&
1236  		    ch != '\0') {
1237  			if (color && (ch == '>' || ch == '+'))
1238  				printf("\033[%sm", add_code);
1239  			else if (color && (ch == '<' || ch == '-'))
1240  				printf("\033[%sm", del_code);
1241  			printf("%c", ch);
1242  			if (Tflag && (diff_format == D_NORMAL ||
1243  			    diff_format == D_CONTEXT ||
1244  			    diff_format == D_UNIFIED))
1245  				printf("\t");
1246  			else if (diff_format != D_UNIFIED)
1247  				printf(" ");
1248  		}
1249  		col = j = 0;
1250  		lastc = '\0';
1251  		while (j < nc && (hw == 0 || col < hw)) {
1252  			c = getc(lb);
1253  			if (flags & D_STRIPCR && c == '\r') {
1254  				if ((c = getc(lb)) == '\n')
1255  					j++;
1256  				else {
1257  					ungetc(c, lb);
1258  					c = '\r';
1259  				}
1260  			}
1261  			if (c == EOF) {
1262  				if (diff_format == D_EDIT ||
1263  				    diff_format == D_REVERSE ||
1264  				    diff_format == D_NREVERSE)
1265  					warnx("No newline at end of file");
1266  				else
1267  					printf("\n\\ No newline at end of file\n");
1268  				return (col);
1269  			}
1270  			/*
1271  			 * when using --side-by-side, col needs to be increased
1272  			 * in any case to keep the columns aligned
1273  			 */
1274  			if (c == '\t') {
1275  				if (flags & D_EXPANDTABS) {
1276  					newcol = ((col / tabsize) + 1) * tabsize;
1277  					do {
1278  						printf(" ");
1279  					} while (++col < newcol && col < hw);
1280  				} else {
1281  					if (diff_format == D_SIDEBYSIDE) {
1282  						if ((col + tabsize) > hw) {
1283  							printf("%*s", hw - col, "");
1284  							col = hw;
1285  						} else {
1286  							printf("\t");
1287  							col += tabsize - 1;
1288  						}
1289  					} else {
1290  						printf("\t");
1291  						col++;
1292  					}
1293  				}
1294  			} else {
1295  				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
1296  				    lastc == '.') {
1297  					/*
1298  					 * Don't print a bare "." line since that will confuse
1299  					 * ed(1). Print ".." instead and set the, global variable
1300  					 * edoffset to an offset from which to restart. The
1301  					 * caller must check the value of edoffset
1302  					 */
1303  					printf(".\n");
1304  					edoffset = i - a + 1;
1305  					return (edoffset);
1306  				}
1307  				/* when side-by-side, do not print a newline */
1308  				if (diff_format != D_SIDEBYSIDE || c != '\n') {
1309  					if (color && c == '\n')
1310  						printf("\033[m%c", c);
1311  					else
1312  						printf("%c", c);
1313  					col++;
1314  				}
1315  			}
1316  
1317  			j++;
1318  			lastc = c;
1319  		}
1320  	}
1321  	if (color && diff_format == D_SIDEBYSIDE)
1322  		printf("\033[m");
1323  	return (col);
1324  }
1325  
1326  /*
1327   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1328   */
1329  static enum readhash
1330  readhash(FILE *f, int flags, unsigned *hash)
1331  {
1332  	int i, t, space;
1333  	unsigned sum;
1334  
1335  	sum = 1;
1336  	space = 0;
1337  	for (i = 0;;) {
1338  		switch (t = getc(f)) {
1339  		case '\0':
1340  			if ((flags & D_FORCEASCII) == 0)
1341  				return (RH_BINARY);
1342  			goto hashchar;
1343  		case '\r':
1344  			if (flags & D_STRIPCR) {
1345  				t = getc(f);
1346  				if (t == '\n')
1347  					break;
1348  				ungetc(t, f);
1349  			}
1350  			/* FALLTHROUGH */
1351  		case '\t':
1352  		case '\v':
1353  		case '\f':
1354  		case ' ':
1355  			if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) {
1356  				space++;
1357  				continue;
1358  			}
1359  			/* FALLTHROUGH */
1360  		default:
1361  		hashchar:
1362  			if (space && (flags & D_IGNOREBLANKS) == 0) {
1363  				i++;
1364  				space = 0;
1365  			}
1366  			sum = sum * 127 + chrtran(t);
1367  			i++;
1368  			continue;
1369  		case EOF:
1370  			if (i == 0)
1371  				return (RH_EOF);
1372  			/* FALLTHROUGH */
1373  		case '\n':
1374  			break;
1375  		}
1376  		break;
1377  	}
1378  	*hash = sum;
1379  	return (RH_OK);
1380  }
1381  
1382  static int
1383  asciifile(FILE *f)
1384  {
1385  	unsigned char buf[BUFSIZ];
1386  	size_t cnt;
1387  
1388  	if (f == NULL)
1389  		return (1);
1390  
1391  	rewind(f);
1392  	cnt = fread(buf, 1, sizeof(buf), f);
1393  	return (memchr(buf, '\0', cnt) == NULL);
1394  }
1395  
1396  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0)
1397  
1398  static char *
1399  match_function(const long *f, int pos, FILE *fp)
1400  {
1401  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1402  	size_t nc;
1403  	int last = lastline;
1404  	const char *state = NULL;
1405  
1406  	lastline = pos;
1407  	for (; pos > last; pos--) {
1408  		fseek(fp, f[pos - 1], SEEK_SET);
1409  		nc = f[pos] - f[pos - 1];
1410  		if (nc >= sizeof(buf))
1411  			nc = sizeof(buf) - 1;
1412  		nc = fread(buf, 1, nc, fp);
1413  		if (nc == 0)
1414  			continue;
1415  		buf[nc] = '\0';
1416  		buf[strcspn(buf, "\n")] = '\0';
1417  		if (most_recent_pat != NULL) {
1418  			int ret = regexec(&most_recent_re, buf, 0, NULL, 0);
1419  
1420  			if (ret != 0)
1421  				continue;
1422  			strlcpy(lastbuf, buf, sizeof(lastbuf));
1423  			lastmatchline = pos;
1424  			return (lastbuf);
1425  		} else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'
1426  			|| buf[0] == '-' || buf[0] == '+') {
1427  			if (begins_with(buf, "private:")) {
1428  				if (!state)
1429  					state = " (private)";
1430  			} else if (begins_with(buf, "protected:")) {
1431  				if (!state)
1432  					state = " (protected)";
1433  			} else if (begins_with(buf, "public:")) {
1434  				if (!state)
1435  					state = " (public)";
1436  			} else {
1437  				strlcpy(lastbuf, buf, sizeof(lastbuf));
1438  				if (state)
1439  					strlcat(lastbuf, state, sizeof(lastbuf));
1440  				lastmatchline = pos;
1441  				return (lastbuf);
1442  			}
1443  		}
1444  	}
1445  	return (lastmatchline > 0 ? lastbuf : NULL);
1446  }
1447  
1448  /* dump accumulated "context" diff changes */
1449  static void
1450  dump_context_vec(FILE *f1, FILE *f2, int flags)
1451  {
1452  	struct context_vec *cvp = context_vec_start;
1453  	int lowa, upb, lowc, upd, do_output;
1454  	int a, b, c, d;
1455  	char ch, *f;
1456  
1457  	if (context_vec_start > context_vec_ptr)
1458  		return;
1459  
1460  	b = d = 0;		/* gcc */
1461  	lowa = MAX(1, cvp->a - diff_context);
1462  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1463  	lowc = MAX(1, cvp->c - diff_context);
1464  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1465  
1466  	printf("***************");
1467  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1468  		f = match_function(ixold, cvp->a - 1, f1);
1469  		if (f != NULL)
1470  			printf(" %s", f);
1471  	}
1472  	printf("\n*** ");
1473  	range(lowa, upb, ",");
1474  	printf(" ****\n");
1475  
1476  	/*
1477  	 * Output changes to the "old" file.  The first loop suppresses
1478  	 * output if there were no changes to the "old" file (we'll see
1479  	 * the "old" lines as context in the "new" list).
1480  	 */
1481  	do_output = 0;
1482  	for (; cvp <= context_vec_ptr; cvp++)
1483  		if (cvp->a <= cvp->b) {
1484  			cvp = context_vec_start;
1485  			do_output++;
1486  			break;
1487  		}
1488  	if (do_output) {
1489  		while (cvp <= context_vec_ptr) {
1490  			a = cvp->a;
1491  			b = cvp->b;
1492  			c = cvp->c;
1493  			d = cvp->d;
1494  
1495  			if (a <= b && c <= d)
1496  				ch = 'c';
1497  			else
1498  				ch = (a <= b) ? 'd' : 'a';
1499  
1500  			if (ch == 'a')
1501  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1502  			else {
1503  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1504  				fetch(ixold, a, b, f1,
1505  				    ch == 'c' ? '!' : '-', 0, flags);
1506  			}
1507  			lowa = b + 1;
1508  			cvp++;
1509  		}
1510  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1511  	}
1512  	/* output changes to the "new" file */
1513  	printf("--- ");
1514  	range(lowc, upd, ",");
1515  	printf(" ----\n");
1516  
1517  	do_output = 0;
1518  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1519  		if (cvp->c <= cvp->d) {
1520  			cvp = context_vec_start;
1521  			do_output++;
1522  			break;
1523  		}
1524  	if (do_output) {
1525  		while (cvp <= context_vec_ptr) {
1526  			a = cvp->a;
1527  			b = cvp->b;
1528  			c = cvp->c;
1529  			d = cvp->d;
1530  
1531  			if (a <= b && c <= d)
1532  				ch = 'c';
1533  			else
1534  				ch = (a <= b) ? 'd' : 'a';
1535  
1536  			if (ch == 'd')
1537  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1538  			else {
1539  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1540  				fetch(ixnew, c, d, f2,
1541  				    ch == 'c' ? '!' : '+', 0, flags);
1542  			}
1543  			lowc = d + 1;
1544  			cvp++;
1545  		}
1546  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1547  	}
1548  	context_vec_ptr = context_vec_start - 1;
1549  }
1550  
1551  /* dump accumulated "unified" diff changes */
1552  static void
1553  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1554  {
1555  	struct context_vec *cvp = context_vec_start;
1556  	int lowa, upb, lowc, upd;
1557  	int a, b, c, d;
1558  	char ch, *f;
1559  
1560  	if (context_vec_start > context_vec_ptr)
1561  		return;
1562  
1563  	b = d = 0;		/* gcc */
1564  	lowa = MAX(1, cvp->a - diff_context);
1565  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1566  	lowc = MAX(1, cvp->c - diff_context);
1567  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1568  
1569  	printf("@@ -");
1570  	uni_range(lowa, upb);
1571  	printf(" +");
1572  	uni_range(lowc, upd);
1573  	printf(" @@");
1574  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1575  		f = match_function(ixold, cvp->a - 1, f1);
1576  		if (f != NULL)
1577  			printf(" %s", f);
1578  	}
1579  	printf("\n");
1580  
1581  	/*
1582  	 * Output changes in "unified" diff format--the old and new lines
1583  	 * are printed together.
1584  	 */
1585  	for (; cvp <= context_vec_ptr; cvp++) {
1586  		a = cvp->a;
1587  		b = cvp->b;
1588  		c = cvp->c;
1589  		d = cvp->d;
1590  
1591  		/*
1592  		 * c: both new and old changes
1593  		 * d: only changes in the old file
1594  		 * a: only changes in the new file
1595  		 */
1596  		if (a <= b && c <= d)
1597  			ch = 'c';
1598  		else
1599  			ch = (a <= b) ? 'd' : 'a';
1600  
1601  		switch (ch) {
1602  		case 'c':
1603  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1604  			fetch(ixold, a, b, f1, '-', 0, flags);
1605  			fetch(ixnew, c, d, f2, '+', 0, flags);
1606  			break;
1607  		case 'd':
1608  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1609  			fetch(ixold, a, b, f1, '-', 0, flags);
1610  			break;
1611  		case 'a':
1612  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1613  			fetch(ixnew, c, d, f2, '+', 0, flags);
1614  			break;
1615  		}
1616  		lowa = b + 1;
1617  		lowc = d + 1;
1618  	}
1619  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1620  
1621  	context_vec_ptr = context_vec_start - 1;
1622  }
1623  
1624  static void
1625  print_header(const char *file1, const char *file2)
1626  {
1627  	const char *time_format;
1628  	char buf[256];
1629  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1630  	int nsec1 = stb1.st_mtim.tv_nsec;
1631  	int nsec2 = stb2.st_mtim.tv_nsec;
1632  
1633  	time_format = "%Y-%m-%d %H:%M:%S";
1634  
1635  	if (cflag)
1636  		time_format = "%c";
1637  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1638  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1639  	if (label[0] != NULL)
1640  		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1641  		    label[0]);
1642  	else {
1643  		strftime(buf, sizeof(buf), time_format, tm_ptr1);
1644  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1645  		    file1, buf);
1646  		if (!cflag) {
1647  			strftime(buf, sizeof(buf), "%z", tm_ptr1);
1648  			printf(".%.9d %s", nsec1, buf);
1649  		}
1650  		printf("\n");
1651  	}
1652  	if (label[1] != NULL)
1653  		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1654  		    label[1]);
1655  	else {
1656  		strftime(buf, sizeof(buf), time_format, tm_ptr2);
1657  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1658  		    file2, buf);
1659  		if (!cflag) {
1660  			strftime(buf, sizeof(buf), "%z", tm_ptr2);
1661  			printf(".%.9d %s", nsec2, buf);
1662  		}
1663  		printf("\n");
1664  	}
1665  }
1666  
1667  /*
1668   * Prints n number of space characters either by using tab
1669   * or single space characters.
1670   * nc is the preceding number of characters
1671   */
1672  static void
1673  print_space(int nc, int n, int flags) {
1674  	int i, col;
1675  
1676  	col = n;
1677  	if ((flags & D_EXPANDTABS) == 0) {
1678  		/* first tabstop may be closer than tabsize */
1679  		i = tabsize - (nc % tabsize);
1680  		while (col >= tabsize) {
1681  			printf("\t");
1682  			col -= i;
1683  			i = tabsize;
1684  		}
1685  	}
1686  	printf("%*s", col, "");
1687  }
1688