xref: /freebsd/usr.bin/diff/diffreg.c (revision 60e845ceef25533bfb60450549bea56a17b1e467)
1  /*	$OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $	*/
2  
3  /*-
4   * SPDX-License-Identifier: BSD-4-Clause
5   *
6   * Copyright (C) Caldera International Inc.  2001-2002.
7   * All rights reserved.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code and documentation must retain the above
13   *    copyright notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. All advertising materials mentioning features or use of this software
18   *    must display the following acknowledgement:
19   *	This product includes software developed or owned by Caldera
20   *	International, Inc.
21   * 4. Neither the name of Caldera International, Inc. nor the names of other
22   *    contributors may be used to endorse or promote products derived from
23   *    this software without specific prior written permission.
24   *
25   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   * POSSIBILITY OF SUCH DAMAGE.
37   */
38  /*-
39   * Copyright (c) 1991, 1993
40   *	The Regents of the University of California.  All rights reserved.
41   *
42   * Redistribution and use in source and binary forms, with or without
43   * modification, are permitted provided that the following conditions
44   * are met:
45   * 1. Redistributions of source code must retain the above copyright
46   *    notice, this list of conditions and the following disclaimer.
47   * 2. Redistributions in binary form must reproduce the above copyright
48   *    notice, this list of conditions and the following disclaimer in the
49   *    documentation and/or other materials provided with the distribution.
50   * 3. Neither the name of the University nor the names of its contributors
51   *    may be used to endorse or promote products derived from this software
52   *    without specific prior written permission.
53   *
54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   */
66  
67  #include <sys/capsicum.h>
68  #include <sys/stat.h>
69  
70  #include <capsicum_helpers.h>
71  #include <ctype.h>
72  #include <err.h>
73  #include <errno.h>
74  #include <fcntl.h>
75  #include <math.h>
76  #include <paths.h>
77  #include <regex.h>
78  #include <stdbool.h>
79  #include <stddef.h>
80  #include <stdint.h>
81  #include <stdio.h>
82  #include <stdlib.h>
83  #include <string.h>
84  
85  #include "pr.h"
86  #include "diff.h"
87  #include "xmalloc.h"
88  
89  /*
90   * diff - compare two files.
91   */
92  
93  /*
94   *	Uses an algorithm due to Harold Stone, which finds a pair of longest
95   *	identical subsequences in the two files.
96   *
97   *	The major goal is to generate the match vector J. J[i] is the index of
98   *	the line in file1 corresponding to line i file0. J[i] = 0 if there is no
99   *	such line in file1.
100   *
101   *	Lines are hashed so as to work in core. All potential matches are
102   *	located by sorting the lines of each file on the hash (called
103   *	``value''). In particular, this collects the equivalence classes in
104   *	file1 together. Subroutine equiv replaces the value of each line in
105   *	file0 by the index of the first element of its matching equivalence in
106   *	(the reordered) file1. To save space equiv squeezes file1 into a single
107   *	array member in which the equivalence classes are simply concatenated,
108   *	except that their first members are flagged by changing sign.
109   *
110   *	Next the indices that point into member are unsorted into array class
111   *	according to the original order of file0.
112   *
113   *	The cleverness lies in routine stone. This marches through the lines of
114   *	file0, developing a vector klist of "k-candidates". At step i
115   *	a k-candidate is a matched pair of lines x,y (x in file0 y in file1)
116   *	such that there is a common subsequence of length k between the first
117   *	i lines of file0 and the first y lines of file1, but there is no such
118   *	subsequence for any smaller y. x is the earliest possible mate to y that
119   *	occurs in such a subsequence.
120   *
121   *	Whenever any of the members of the equivalence class of lines in file1
122   *	matable to a line in file0 has serial number less than the y of some
123   *	k-candidate, that k-candidate with the smallest such y is replaced. The
124   *	new k-candidate is chained (via pred) to the current k-1 candidate so
125   *	that the actual subsequence can be recovered. When a member has serial
126   *	number greater that the y of all k-candidates, the klist is extended. At
127   *	the end, the longest subsequence is pulled out and placed in the array J
128   *	by unravel.
129   *
130   *	With J in hand, the matches there recorded are check'ed against reality
131   *	to assure that no spurious matches have crept in due to hashing. If they
132   *	have, they are broken, and "jackpot" is recorded -- a harmless matter
133   *	except that a true match for a spuriously mated line may now be
134   *	unnecessarily reported as a change.
135   *
136   *	Much of the complexity of the program comes simply from trying to
137   *	minimize core utilization and maximize the range of doable problems by
138   *	dynamically allocating what is needed and reusing what is not. The core
139   *	requirements for problems larger than somewhat are (in words)
140   *	2*length(file0) + length(file1) + 3*(number of k-candidates installed),
141   *	typically about 6n words for files of length n.
142   */
143  
144  struct cand {
145  	int	x;
146  	int	y;
147  	int	pred;
148  };
149  
150  static struct line {
151  	int	serial;
152  	int	value;
153  } *file[2];
154  
155  /*
156   * The following struct is used to record change information when
157   * doing a "context" or "unified" diff.  (see routine "change" to
158   * understand the highly mnemonic field names)
159   */
160  struct context_vec {
161  	int	a;		/* start line in old file */
162  	int	b;		/* end line in old file */
163  	int	c;		/* start line in new file */
164  	int	d;		/* end line in new file */
165  };
166  
167  enum readhash { RH_BINARY, RH_OK, RH_EOF };
168  
169  #define MIN_PAD		1
170  static FILE	*opentemp(const char *);
171  static void	 output(char *, FILE *, char *, FILE *, int);
172  static void	 check(FILE *, FILE *, int);
173  static void	 range(int, int, const char *);
174  static void	 uni_range(int, int);
175  static void	 dump_context_vec(FILE *, FILE *, int);
176  static void	 dump_unified_vec(FILE *, FILE *, int);
177  static bool	 prepare(int, FILE *, size_t, int);
178  static void	 prune(void);
179  static void	 equiv(struct line *, int, struct line *, int, int *);
180  static void	 unravel(int);
181  static void	 unsort(struct line *, int, int *);
182  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
183  static void	 sort(struct line *, int);
184  static void	 print_header(const char *, const char *);
185  static void	 print_space(int, int, int);
186  static bool	 ignoreline_pattern(char *);
187  static bool	 ignoreline(char *, bool);
188  static int	 asciifile(FILE *);
189  static int	 fetch(long *, int, int, FILE *, int, int, int);
190  static int	 newcand(int, int, int);
191  static int	 search(int *, int, int);
192  static int	 skipline(FILE *);
193  static int	 stone(int *, int, int *, int *, int);
194  static enum readhash readhash(FILE *, int, unsigned *);
195  static int	 files_differ(FILE *, FILE *, int);
196  static char	*match_function(const long *, int, FILE *);
197  static char	*preadline(int, size_t, off_t);
198  
199  static int	 *J;			/* will be overlaid on class */
200  static int	 *class;		/* will be overlaid on file[0] */
201  static int	 *klist;		/* will be overlaid on file[0] after class */
202  static int	 *member;		/* will be overlaid on file[1] */
203  static int	 clen;
204  static int	 inifdef;		/* whether or not we are in a #ifdef block */
205  static int	 len[2];
206  static int	 pref, suff;	/* length of prefix and suffix */
207  static int	 slen[2];
208  static int	 anychange;
209  static int	 hw, padding;	/* half width and padding */
210  static int	 edoffset;
211  static long	*ixnew;		/* will be overlaid on file[1] */
212  static long	*ixold;		/* will be overlaid on klist */
213  static struct cand *clist;	/* merely a free storage pot for candidates */
214  static int	 clistlen;		/* the length of clist */
215  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
216  static int	(*chrtran)(int);	/* translation table for case-folding */
217  static struct context_vec *context_vec_start;
218  static struct context_vec *context_vec_end;
219  static struct context_vec *context_vec_ptr;
220  
221  #define FUNCTION_CONTEXT_SIZE	55
222  static char lastbuf[FUNCTION_CONTEXT_SIZE];
223  static int lastline;
224  static int lastmatchline;
225  
226  static int
227  clow2low(int c)
228  {
229  
230  	return (c);
231  }
232  
233  static int
234  cup2low(int c)
235  {
236  
237  	return (tolower(c));
238  }
239  
240  int
241  diffreg(char *file1, char *file2, int flags, int capsicum)
242  {
243  	FILE *f1, *f2;
244  	int i, rval;
245  	struct pr *pr = NULL;
246  	cap_rights_t rights_ro;
247  
248  	f1 = f2 = NULL;
249  	rval = D_SAME;
250  	anychange = 0;
251  	lastline = 0;
252  	lastmatchline = 0;
253  
254  	 /*
255  	  * hw excludes padding and make sure when -t is not used,
256  	  * the second column always starts from the closest tab stop
257  	  */
258  	if (diff_format == D_SIDEBYSIDE) {
259  		hw = width >> 1;
260  		padding = tabsize - (hw % tabsize);
261  		if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0))
262  			padding = MIN_PAD;
263  
264  		hw = (width >> 1) -
265  		    ((padding == MIN_PAD) ? (padding << 1) : padding) - 1;
266  	}
267  
268  
269  	if (flags & D_IGNORECASE)
270  		chrtran = cup2low;
271  	else
272  		chrtran = clow2low;
273  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
274  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
275  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
276  		goto closem;
277  
278  	if (flags & D_EMPTY1)
279  		f1 = fopen(_PATH_DEVNULL, "r");
280  	else {
281  		if (!S_ISREG(stb1.st_mode)) {
282  			if ((f1 = opentemp(file1)) == NULL ||
283  			    fstat(fileno(f1), &stb1) == -1) {
284  				warn("%s", file1);
285  				rval = D_ERROR;
286  				status |= 2;
287  				goto closem;
288  			}
289  		} else if (strcmp(file1, "-") == 0)
290  			f1 = stdin;
291  		else
292  			f1 = fopen(file1, "r");
293  	}
294  	if (f1 == NULL) {
295  		warn("%s", file1);
296  		rval = D_ERROR;
297  		status |= 2;
298  		goto closem;
299  	}
300  
301  	if (flags & D_EMPTY2)
302  		f2 = fopen(_PATH_DEVNULL, "r");
303  	else {
304  		if (!S_ISREG(stb2.st_mode)) {
305  			if ((f2 = opentemp(file2)) == NULL ||
306  			    fstat(fileno(f2), &stb2) == -1) {
307  				warn("%s", file2);
308  				rval = D_ERROR;
309  				status |= 2;
310  				goto closem;
311  			}
312  		} else if (strcmp(file2, "-") == 0)
313  			f2 = stdin;
314  		else
315  			f2 = fopen(file2, "r");
316  	}
317  	if (f2 == NULL) {
318  		warn("%s", file2);
319  		rval = D_ERROR;
320  		status |= 2;
321  		goto closem;
322  	}
323  
324  	if (lflag)
325  		pr = start_pr(file1, file2);
326  
327  	if (capsicum) {
328  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
329  		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
330  			err(2, "unable to limit rights on: %s", file1);
331  		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
332  			err(2, "unable to limit rights on: %s", file2);
333  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
334  			/* stdin has already been limited */
335  			if (caph_limit_stderr() == -1)
336  				err(2, "unable to limit stderr");
337  			if (caph_limit_stdout() == -1)
338  				err(2, "unable to limit stdout");
339  		} else if (caph_limit_stdio() == -1)
340  				err(2, "unable to limit stdio");
341  
342  		caph_cache_catpages();
343  		caph_cache_tzdata();
344  		if (caph_enter() < 0)
345  			err(2, "unable to enter capability mode");
346  	}
347  
348  	switch (files_differ(f1, f2, flags)) {
349  	case 0:
350  		goto closem;
351  	case 1:
352  		break;
353  	default:
354  		/* error */
355  		rval = D_ERROR;
356  		status |= 2;
357  		goto closem;
358  	}
359  
360  	if (diff_format == D_BRIEF && ignore_pats == NULL &&
361  	    (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) == 0)
362  	{
363  		rval = D_DIFFER;
364  		status |= 1;
365  		goto closem;
366  	}
367  	if ((flags & D_FORCEASCII) != 0) {
368  		(void)prepare(0, f1, stb1.st_size, flags);
369  		(void)prepare(1, f2, stb2.st_size, flags);
370  	} else if (!asciifile(f1) || !asciifile(f2) ||
371  		    !prepare(0, f1, stb1.st_size, flags) ||
372  		    !prepare(1, f2, stb2.st_size, flags)) {
373  		rval = D_BINARY;
374  		status |= 1;
375  		goto closem;
376  	}
377  
378  	prune();
379  	sort(sfile[0], slen[0]);
380  	sort(sfile[1], slen[1]);
381  
382  	member = (int *)file[1];
383  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
384  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
385  
386  	class = (int *)file[0];
387  	unsort(sfile[0], slen[0], class);
388  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
389  
390  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
391  	clen = 0;
392  	clistlen = 100;
393  	clist = xcalloc(clistlen, sizeof(*clist));
394  	i = stone(class, slen[0], member, klist, flags);
395  	free(member);
396  	free(class);
397  
398  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
399  	unravel(klist[i]);
400  	free(clist);
401  	free(klist);
402  
403  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
404  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
405  	check(f1, f2, flags);
406  	output(file1, f1, file2, f2, flags);
407  
408  closem:
409  	if (pr != NULL)
410  		stop_pr(pr);
411  	if (anychange) {
412  		status |= 1;
413  		if (rval == D_SAME)
414  			rval = D_DIFFER;
415  	}
416  	if (f1 != NULL)
417  		fclose(f1);
418  	if (f2 != NULL)
419  		fclose(f2);
420  
421  	return (rval);
422  }
423  
424  /*
425   * Check to see if the given files differ.
426   * Returns 0 if they are the same, 1 if different, and -1 on error.
427   * XXX - could use code from cmp(1) [faster]
428   */
429  static int
430  files_differ(FILE *f1, FILE *f2, int flags)
431  {
432  	char buf1[BUFSIZ], buf2[BUFSIZ];
433  	size_t i, j;
434  
435  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
436  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
437  		return (1);
438  
439  	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
440  		return (0);
441  
442  	for (;;) {
443  		i = fread(buf1, 1, sizeof(buf1), f1);
444  		j = fread(buf2, 1, sizeof(buf2), f2);
445  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
446  			return (-1);
447  		if (i != j)
448  			return (1);
449  		if (i == 0)
450  			return (0);
451  		if (memcmp(buf1, buf2, i) != 0)
452  			return (1);
453  	}
454  }
455  
456  static FILE *
457  opentemp(const char *f)
458  {
459  	char buf[BUFSIZ], tempfile[PATH_MAX];
460  	ssize_t nread;
461  	int ifd, ofd;
462  
463  	if (strcmp(f, "-") == 0)
464  		ifd = STDIN_FILENO;
465  	else if ((ifd = open(f, O_RDONLY, 0644)) == -1)
466  		return (NULL);
467  
468  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
469  
470  	if ((ofd = mkstemp(tempfile)) == -1) {
471  		close(ifd);
472  		return (NULL);
473  	}
474  	unlink(tempfile);
475  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
476  		if (write(ofd, buf, nread) != nread) {
477  			close(ifd);
478  			close(ofd);
479  			return (NULL);
480  		}
481  	}
482  	close(ifd);
483  	lseek(ofd, (off_t)0, SEEK_SET);
484  	return (fdopen(ofd, "r"));
485  }
486  
487  static bool
488  prepare(int i, FILE *fd, size_t filesize, int flags)
489  {
490  	struct line *p;
491  	unsigned h;
492  	size_t sz, j = 0;
493  	enum readhash r;
494  
495  	rewind(fd);
496  
497  	sz = MIN(filesize, SIZE_MAX) / 25;
498  	if (sz < 100)
499  		sz = 100;
500  
501  	p = xcalloc(sz + 3, sizeof(*p));
502  	while ((r = readhash(fd, flags, &h)) != RH_EOF)
503  		switch (r) {
504  		case RH_EOF: /* otherwise clang complains */
505  		case RH_BINARY:
506  			return (false);
507  		case RH_OK:
508  			if (j == sz) {
509  				sz = sz * 3 / 2;
510  				p = xreallocarray(p, sz + 3, sizeof(*p));
511  			}
512  			p[++j].value = h;
513  		}
514  
515  	len[i] = j;
516  	file[i] = p;
517  
518  	return (true);
519  }
520  
521  static void
522  prune(void)
523  {
524  	int i, j;
525  
526  	for (pref = 0; pref < len[0] && pref < len[1] &&
527  	    file[0][pref + 1].value == file[1][pref + 1].value;
528  	    pref++)
529  		;
530  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
531  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
532  	    suff++)
533  		;
534  	for (j = 0; j < 2; j++) {
535  		sfile[j] = file[j] + pref;
536  		slen[j] = len[j] - pref - suff;
537  		for (i = 0; i <= slen[j]; i++)
538  			sfile[j][i].serial = i;
539  	}
540  }
541  
542  static void
543  equiv(struct line *a, int n, struct line *b, int m, int *c)
544  {
545  	int i, j;
546  
547  	i = j = 1;
548  	while (i <= n && j <= m) {
549  		if (a[i].value < b[j].value)
550  			a[i++].value = 0;
551  		else if (a[i].value == b[j].value)
552  			a[i++].value = j;
553  		else
554  			j++;
555  	}
556  	while (i <= n)
557  		a[i++].value = 0;
558  	b[m + 1].value = 0;
559  	j = 0;
560  	while (++j <= m) {
561  		c[j] = -b[j].serial;
562  		while (b[j + 1].value == b[j].value) {
563  			j++;
564  			c[j] = b[j].serial;
565  		}
566  	}
567  	c[j] = -1;
568  }
569  
570  static int
571  stone(int *a, int n, int *b, int *c, int flags)
572  {
573  	int i, k, y, j, l;
574  	int oldc, tc, oldl, sq;
575  	unsigned numtries, bound;
576  
577  	if (flags & D_MINIMAL)
578  		bound = UINT_MAX;
579  	else {
580  		sq = sqrt(n);
581  		bound = MAX(256, sq);
582  	}
583  
584  	k = 0;
585  	c[0] = newcand(0, 0, 0);
586  	for (i = 1; i <= n; i++) {
587  		j = a[i];
588  		if (j == 0)
589  			continue;
590  		y = -b[j];
591  		oldl = 0;
592  		oldc = c[0];
593  		numtries = 0;
594  		do {
595  			if (y <= clist[oldc].y)
596  				continue;
597  			l = search(c, k, y);
598  			if (l != oldl + 1)
599  				oldc = c[l - 1];
600  			if (l <= k) {
601  				if (clist[c[l]].y <= y)
602  					continue;
603  				tc = c[l];
604  				c[l] = newcand(i, y, oldc);
605  				oldc = tc;
606  				oldl = l;
607  				numtries++;
608  			} else {
609  				c[l] = newcand(i, y, oldc);
610  				k++;
611  				break;
612  			}
613  		} while ((y = b[++j]) > 0 && numtries < bound);
614  	}
615  	return (k);
616  }
617  
618  static int
619  newcand(int x, int y, int pred)
620  {
621  	struct cand *q;
622  
623  	if (clen == clistlen) {
624  		clistlen = clistlen * 11 / 10;
625  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
626  	}
627  	q = clist + clen;
628  	q->x = x;
629  	q->y = y;
630  	q->pred = pred;
631  	return (clen++);
632  }
633  
634  static int
635  search(int *c, int k, int y)
636  {
637  	int i, j, l, t;
638  
639  	if (clist[c[k]].y < y)	/* quick look for typical case */
640  		return (k + 1);
641  	i = 0;
642  	j = k + 1;
643  	for (;;) {
644  		l = (i + j) / 2;
645  		if (l <= i)
646  			break;
647  		t = clist[c[l]].y;
648  		if (t > y)
649  			j = l;
650  		else if (t < y)
651  			i = l;
652  		else
653  			return (l);
654  	}
655  	return (l + 1);
656  }
657  
658  static void
659  unravel(int p)
660  {
661  	struct cand *q;
662  	int i;
663  
664  	for (i = 0; i <= len[0]; i++)
665  		J[i] = i <= pref ? i :
666  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
667  	for (q = clist + p; q->y != 0; q = clist + q->pred)
668  		J[q->x + pref] = q->y + pref;
669  }
670  
671  /*
672   * Check does double duty:
673   *  1. ferret out any fortuitous correspondences due to confounding by
674   *     hashing (which result in "jackpot")
675   *  2. collect random access indexes to the two files
676   */
677  static void
678  check(FILE *f1, FILE *f2, int flags)
679  {
680  	int i, j, /* jackpot, */ c, d;
681  	long ctold, ctnew;
682  
683  	rewind(f1);
684  	rewind(f2);
685  	j = 1;
686  	ixold[0] = ixnew[0] = 0;
687  	/* jackpot = 0; */
688  	ctold = ctnew = 0;
689  	for (i = 1; i <= len[0]; i++) {
690  		if (J[i] == 0) {
691  			ixold[i] = ctold += skipline(f1);
692  			continue;
693  		}
694  		while (j < J[i]) {
695  			ixnew[j] = ctnew += skipline(f2);
696  			j++;
697  		}
698  		if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) {
699  			for (;;) {
700  				c = getc(f1);
701  				d = getc(f2);
702  				/*
703  				 * GNU diff ignores a missing newline
704  				 * in one file for -b or -w.
705  				 */
706  				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
707  					if (c == EOF && d == '\n') {
708  						ctnew++;
709  						break;
710  					} else if (c == '\n' && d == EOF) {
711  						ctold++;
712  						break;
713  					}
714  				}
715  				ctold++;
716  				ctnew++;
717  				if (flags & D_STRIPCR && (c == '\r' || d == '\r')) {
718  					if (c == '\r') {
719  						if ((c = getc(f1)) == '\n') {
720  							ctold++;
721  						} else {
722  							ungetc(c, f1);
723  						}
724  					}
725  					if (d == '\r') {
726  						if ((d = getc(f2)) == '\n') {
727  							ctnew++;
728  						} else {
729  							ungetc(d, f2);
730  						}
731  					}
732  					break;
733  				}
734  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
735  				    isspace(d)) {
736  					do {
737  						if (c == '\n')
738  							break;
739  						ctold++;
740  					} while (isspace(c = getc(f1)));
741  					do {
742  						if (d == '\n')
743  							break;
744  						ctnew++;
745  					} while (isspace(d = getc(f2)));
746  				} else if (flags & D_IGNOREBLANKS) {
747  					while (isspace(c) && c != '\n') {
748  						c = getc(f1);
749  						ctold++;
750  					}
751  					while (isspace(d) && d != '\n') {
752  						d = getc(f2);
753  						ctnew++;
754  					}
755  				}
756  				if (chrtran(c) != chrtran(d)) {
757  					/* jackpot++; */
758  					J[i] = 0;
759  					if (c != '\n' && c != EOF)
760  						ctold += skipline(f1);
761  					if (d != '\n' && c != EOF)
762  						ctnew += skipline(f2);
763  					break;
764  				}
765  				if (c == '\n' || c == EOF)
766  					break;
767  			}
768  		} else {
769  			for (;;) {
770  				ctold++;
771  				ctnew++;
772  				if ((c = getc(f1)) != (d = getc(f2))) {
773  					/* jackpot++; */
774  					J[i] = 0;
775  					if (c != '\n' && c != EOF)
776  						ctold += skipline(f1);
777  					if (d != '\n' && c != EOF)
778  						ctnew += skipline(f2);
779  					break;
780  				}
781  				if (c == '\n' || c == EOF)
782  					break;
783  			}
784  		}
785  		ixold[i] = ctold;
786  		ixnew[j] = ctnew;
787  		j++;
788  	}
789  	for (; j <= len[1]; j++) {
790  		ixnew[j] = ctnew += skipline(f2);
791  	}
792  	/*
793  	 * if (jackpot)
794  	 *	fprintf(stderr, "jackpot\n");
795  	 */
796  }
797  
798  /* shellsort CACM #201 */
799  static void
800  sort(struct line *a, int n)
801  {
802  	struct line *ai, *aim, w;
803  	int j, m = 0, k;
804  
805  	if (n == 0)
806  		return;
807  	for (j = 1; j <= n; j *= 2)
808  		m = 2 * j - 1;
809  	for (m /= 2; m != 0; m /= 2) {
810  		k = n - m;
811  		for (j = 1; j <= k; j++) {
812  			for (ai = &a[j]; ai > a; ai -= m) {
813  				aim = &ai[m];
814  				if (aim < ai)
815  					break;	/* wraparound */
816  				if (aim->value > ai[0].value ||
817  				    (aim->value == ai[0].value &&
818  					aim->serial > ai[0].serial))
819  					break;
820  				w.value = ai[0].value;
821  				ai[0].value = aim->value;
822  				aim->value = w.value;
823  				w.serial = ai[0].serial;
824  				ai[0].serial = aim->serial;
825  				aim->serial = w.serial;
826  			}
827  		}
828  	}
829  }
830  
831  static void
832  unsort(struct line *f, int l, int *b)
833  {
834  	int *a, i;
835  
836  	a = xcalloc(l + 1, sizeof(*a));
837  	for (i = 1; i <= l; i++)
838  		a[f[i].serial] = f[i].value;
839  	for (i = 1; i <= l; i++)
840  		b[i] = a[i];
841  	free(a);
842  }
843  
844  static int
845  skipline(FILE *f)
846  {
847  	int i, c;
848  
849  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
850  		continue;
851  	return (i);
852  }
853  
854  static void
855  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
856  {
857  	int i, j, m, i0, i1, j0, j1, nc;
858  
859  	rewind(f1);
860  	rewind(f2);
861  	m = len[0];
862  	J[0] = 0;
863  	J[m + 1] = len[1] + 1;
864  	if (diff_format != D_EDIT) {
865  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
866  			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
867  				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
868  					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
869  					print_space(nc, (hw - nc) + (padding << 1) + 1, flags);
870  					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
871  					printf("\n");
872  				}
873  				i0++;
874  			}
875  			j0 = J[i0 - 1] + 1;
876  			i1 = i0 - 1;
877  			while (i1 < m && J[i1 + 1] == 0)
878  				i1++;
879  			j1 = J[i1 + 1] - 1;
880  			J[i1] = j1;
881  
882  			/*
883  			 * When using side-by-side, lines from both of the files are
884  			 * printed. The algorithm used by diff(1) identifies the ranges
885  			 * in which two files differ.
886  			 * See the change() function below.
887  			 * The for loop below consumes the shorter range, whereas one of
888  			 * the while loops deals with the longer one.
889  			 */
890  			if (diff_format == D_SIDEBYSIDE) {
891  				for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++)
892  					change(file1, f1, file2, f2, i, i, j, j, &flags);
893  
894  				while (i <= i1) {
895  					change(file1, f1, file2, f2, i, i, j + 1, j, &flags);
896  					i++;
897  				}
898  
899  				while (j <= j1) {
900  					change(file1, f1, file2, f2, i + 1, i, j, j, &flags);
901  					j++;
902  				}
903  			} else
904  				change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
905  		}
906  	} else {
907  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
908  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
909  				i0--;
910  			j0 = J[i0 + 1] - 1;
911  			i1 = i0 + 1;
912  			while (i1 > 1 && J[i1 - 1] == 0)
913  				i1--;
914  			j1 = J[i1 - 1] + 1;
915  			J[i1] = j1;
916  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
917  		}
918  	}
919  	if (m == 0)
920  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
921  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
922  		for (;;) {
923  #define	c i0
924  			if ((c = getc(f1)) == EOF)
925  				return;
926  			printf("%c", c);
927  		}
928  #undef c
929  	}
930  	if (anychange != 0) {
931  		if (diff_format == D_CONTEXT)
932  			dump_context_vec(f1, f2, flags);
933  		else if (diff_format == D_UNIFIED)
934  			dump_unified_vec(f1, f2, flags);
935  	}
936  }
937  
938  static void
939  range(int a, int b, const char *separator)
940  {
941  	printf("%d", a > b ? b : a);
942  	if (a < b)
943  		printf("%s%d", separator, b);
944  }
945  
946  static void
947  uni_range(int a, int b)
948  {
949  	if (a < b)
950  		printf("%d,%d", a, b - a + 1);
951  	else if (a == b)
952  		printf("%d", b);
953  	else
954  		printf("%d,0", b);
955  }
956  
957  static char *
958  preadline(int fd, size_t rlen, off_t off)
959  {
960  	char *line;
961  	ssize_t nr;
962  
963  	line = xmalloc(rlen + 1);
964  	if ((nr = pread(fd, line, rlen, off)) == -1)
965  		err(2, "preadline");
966  	if (nr > 0 && line[nr-1] == '\n')
967  		nr--;
968  	line[nr] = '\0';
969  	return (line);
970  }
971  
972  static bool
973  ignoreline_pattern(char *line)
974  {
975  	int ret;
976  
977  	ret = regexec(&ignore_re, line, 0, NULL, 0);
978  	return (ret == 0);	/* if it matched, it should be ignored. */
979  }
980  
981  static bool
982  ignoreline(char *line, bool skip_blanks)
983  {
984  
985  	if (skip_blanks && *line == '\0')
986  		return (true);
987  	if (ignore_pats != NULL && ignoreline_pattern(line))
988  		return (true);
989  	return (false);
990  }
991  
992  /*
993   * Indicate that there is a difference between lines a and b of the from file
994   * to get to lines c to d of the to file.  If a is greater then b then there
995   * are no lines in the from file involved and this means that there were
996   * lines appended (beginning at b).  If c is greater than d then there are
997   * lines missing from the to file.
998   */
999  static void
1000  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1001      int *pflags)
1002  {
1003  	static size_t max_context = 64;
1004  	long curpos;
1005  	int i, nc;
1006  	const char *walk;
1007  	bool skip_blanks, ignore;
1008  
1009  	skip_blanks = (*pflags & D_SKIPBLANKLINES);
1010  restart:
1011  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1012  	    a > b && c > d)
1013  		return;
1014  	if (ignore_pats != NULL || skip_blanks) {
1015  		char *line;
1016  		/*
1017  		 * All lines in the change, insert, or delete must match an ignore
1018  		 * pattern for the change to be ignored.
1019  		 */
1020  		if (a <= b) {		/* Changes and deletes. */
1021  			for (i = a; i <= b; i++) {
1022  				line = preadline(fileno(f1),
1023  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1024  				ignore = ignoreline(line, skip_blanks);
1025  				free(line);
1026  				if (!ignore)
1027  					goto proceed;
1028  			}
1029  		}
1030  		if (a > b || c <= d) {	/* Changes and inserts. */
1031  			for (i = c; i <= d; i++) {
1032  				line = preadline(fileno(f2),
1033  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1034  				ignore = ignoreline(line, skip_blanks);
1035  				free(line);
1036  				if (!ignore)
1037  					goto proceed;
1038  			}
1039  		}
1040  		return;
1041  	}
1042  proceed:
1043  	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1044  		printf("%s %s %s\n", diffargs, file1, file2);
1045  		*pflags &= ~D_HEADER;
1046  	}
1047  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1048  		/*
1049  		 * Allocate change records as needed.
1050  		 */
1051  		if (context_vec_start == NULL ||
1052  		    context_vec_ptr == context_vec_end - 1) {
1053  			ptrdiff_t offset = -1;
1054  
1055  			if (context_vec_start != NULL)
1056  				offset = context_vec_ptr - context_vec_start;
1057  			max_context <<= 1;
1058  			context_vec_start = xreallocarray(context_vec_start,
1059  			    max_context, sizeof(*context_vec_start));
1060  			context_vec_end = context_vec_start + max_context;
1061  			context_vec_ptr = context_vec_start + offset;
1062  		}
1063  		if (anychange == 0) {
1064  			/*
1065  			 * Print the context/unidiff header first time through.
1066  			 */
1067  			print_header(file1, file2);
1068  			anychange = 1;
1069  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1070  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1071  			/*
1072  			 * If this change is more than 'diff_context' lines from the
1073  			 * previous change, dump the record and reset it.
1074  			 */
1075  			if (diff_format == D_CONTEXT)
1076  				dump_context_vec(f1, f2, *pflags);
1077  			else
1078  				dump_unified_vec(f1, f2, *pflags);
1079  		}
1080  		context_vec_ptr++;
1081  		context_vec_ptr->a = a;
1082  		context_vec_ptr->b = b;
1083  		context_vec_ptr->c = c;
1084  		context_vec_ptr->d = d;
1085  		return;
1086  	}
1087  	if (anychange == 0)
1088  		anychange = 1;
1089  	switch (diff_format) {
1090  	case D_BRIEF:
1091  		return;
1092  	case D_NORMAL:
1093  	case D_EDIT:
1094  		range(a, b, ",");
1095  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1096  		if (diff_format == D_NORMAL)
1097  			range(c, d, ",");
1098  		printf("\n");
1099  		break;
1100  	case D_REVERSE:
1101  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1102  		range(a, b, " ");
1103  		printf("\n");
1104  		break;
1105  	case D_NREVERSE:
1106  		if (a > b)
1107  			printf("a%d %d\n", b, d - c + 1);
1108  		else {
1109  			printf("d%d %d\n", a, b - a + 1);
1110  			if (!(c > d))
1111  				/* add changed lines */
1112  				printf("a%d %d\n", b, d - c + 1);
1113  		}
1114  		break;
1115  	}
1116  	if (diff_format == D_GFORMAT) {
1117  		curpos = ftell(f1);
1118  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1119  		nc = ixold[a > b ? b : a - 1] - curpos;
1120  		for (i = 0; i < nc; i++)
1121  			printf("%c", getc(f1));
1122  		for (walk = group_format; *walk != '\0'; walk++) {
1123  			if (*walk == '%') {
1124  				walk++;
1125  				switch (*walk) {
1126  				case '<':
1127  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1128  					break;
1129  				case '>':
1130  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1131  					break;
1132  				default:
1133  					printf("%%%c", *walk);
1134  					break;
1135  				}
1136  				continue;
1137  			}
1138  			printf("%c", *walk);
1139  		}
1140  	}
1141  	if (diff_format == D_SIDEBYSIDE) {
1142  		if (color && a > b)
1143  			printf("\033[%sm", add_code);
1144  		else if (color && c > d)
1145  			printf("\033[%sm", del_code);
1146  		if (a > b) {
1147  			print_space(0, hw + padding , *pflags);
1148  		} else {
1149  			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
1150  			print_space(nc, hw - nc + padding, *pflags);
1151  		}
1152  		if (color && a > b)
1153  			printf("\033[%sm", add_code);
1154  		else if (color && c > d)
1155  			printf("\033[%sm", del_code);
1156  		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
1157  		if (color && c > d)
1158  			printf("\033[m");
1159  		print_space(hw + padding + 1 , padding, *pflags);
1160  		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
1161  		printf("\n");
1162  	}
1163  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1164  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1165  		if (a <= b && c <= d && diff_format == D_NORMAL)
1166  			printf("---\n");
1167  	}
1168  	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
1169  		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1170  	if (edoffset != 0 && diff_format == D_EDIT) {
1171  		/*
1172  		 * A non-zero edoffset value for D_EDIT indicates that the last line
1173  		 * printed was a bare dot (".") that has been escaped as ".." to
1174  		 * prevent ed(1) from misinterpreting it.  We have to add a
1175  		 * substitute command to change this back and restart where we left
1176  		 * off.
1177  		 */
1178  		printf(".\n");
1179  		printf("%ds/.//\n", a + edoffset - 1);
1180  		b = a + edoffset - 1;
1181  		a = b + 1;
1182  		c += edoffset;
1183  		goto restart;
1184  	}
1185  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1186  		printf(".\n");
1187  	if (inifdef) {
1188  		printf("#endif /* %s */\n", ifdefname);
1189  		inifdef = 0;
1190  	}
1191  }
1192  
1193  static int
1194  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1195  {
1196  	int i, j, c, lastc, col, nc, newcol;
1197  
1198  	edoffset = 0;
1199  	nc = 0;
1200  	/*
1201  	 * When doing #ifdef's, copy down to current line
1202  	 * if this is the first file, so that stuff makes it to output.
1203  	 */
1204  	if ((diff_format == D_IFDEF) && oldfile) {
1205  		long curpos = ftell(lb);
1206  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1207  		nc = f[a > b ? b : a - 1] - curpos;
1208  		for (i = 0; i < nc; i++)
1209  			printf("%c", getc(lb));
1210  	}
1211  	if (a > b)
1212  		return (0);
1213  	if (diff_format == D_IFDEF) {
1214  		if (inifdef) {
1215  			printf("#else /* %s%s */\n",
1216  			    oldfile == 1 ? "!" : "", ifdefname);
1217  		} else {
1218  			if (oldfile)
1219  				printf("#ifndef %s\n", ifdefname);
1220  			else
1221  				printf("#ifdef %s\n", ifdefname);
1222  		}
1223  		inifdef = 1 + oldfile;
1224  	}
1225  	for (i = a; i <= b; i++) {
1226  		fseek(lb, f[i - 1], SEEK_SET);
1227  		nc = f[i] - f[i - 1];
1228  		if (diff_format == D_SIDEBYSIDE && hw < nc)
1229  			nc = hw;
1230  		if (diff_format != D_IFDEF && diff_format != D_GFORMAT &&
1231  		    ch != '\0') {
1232  			if (color && (ch == '>' || ch == '+'))
1233  				printf("\033[%sm", add_code);
1234  			else if (color && (ch == '<' || ch == '-'))
1235  				printf("\033[%sm", del_code);
1236  			printf("%c", ch);
1237  			if (Tflag && (diff_format == D_NORMAL ||
1238  			    diff_format == D_CONTEXT ||
1239  			    diff_format == D_UNIFIED))
1240  				printf("\t");
1241  			else if (diff_format != D_UNIFIED)
1242  				printf(" ");
1243  		}
1244  		col = j = 0;
1245  		lastc = '\0';
1246  		while (j < nc && (hw == 0 || col < hw)) {
1247  			c = getc(lb);
1248  			if (flags & D_STRIPCR && c == '\r') {
1249  				if ((c = getc(lb)) == '\n')
1250  					j++;
1251  				else {
1252  					ungetc(c, lb);
1253  					c = '\r';
1254  				}
1255  			}
1256  			if (c == EOF) {
1257  				if (diff_format == D_EDIT ||
1258  				    diff_format == D_REVERSE ||
1259  				    diff_format == D_NREVERSE)
1260  					warnx("No newline at end of file");
1261  				else
1262  					printf("\n\\ No newline at end of file\n");
1263  				return (col);
1264  			}
1265  			/*
1266  			 * when using --side-by-side, col needs to be increased
1267  			 * in any case to keep the columns aligned
1268  			 */
1269  			if (c == '\t') {
1270  				if (flags & D_EXPANDTABS) {
1271  					newcol = ((col / tabsize) + 1) * tabsize;
1272  					do {
1273  						printf(" ");
1274  					} while (++col < newcol && col < hw);
1275  				} else {
1276  					if (diff_format == D_SIDEBYSIDE) {
1277  						if ((col + tabsize) > hw) {
1278  							printf("%*s", hw - col, "");
1279  							col = hw;
1280  						} else {
1281  							printf("\t");
1282  							col += tabsize - 1;
1283  						}
1284  					} else {
1285  						printf("\t");
1286  						col++;
1287  					}
1288  				}
1289  			} else {
1290  				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
1291  				    lastc == '.') {
1292  					/*
1293  					 * Don't print a bare "." line since that will confuse
1294  					 * ed(1). Print ".." instead and set the, global variable
1295  					 * edoffset to an offset from which to restart. The
1296  					 * caller must check the value of edoffset
1297  					 */
1298  					printf(".\n");
1299  					edoffset = i - a + 1;
1300  					return (edoffset);
1301  				}
1302  				/* when side-by-side, do not print a newline */
1303  				if (diff_format != D_SIDEBYSIDE || c != '\n') {
1304  					if (color && c == '\n')
1305  						printf("\033[m%c", c);
1306  					else
1307  						printf("%c", c);
1308  					col++;
1309  				}
1310  			}
1311  
1312  			j++;
1313  			lastc = c;
1314  		}
1315  	}
1316  	if (color && diff_format == D_SIDEBYSIDE)
1317  		printf("\033[m");
1318  	return (col);
1319  }
1320  
1321  /*
1322   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1323   */
1324  static enum readhash
1325  readhash(FILE *f, int flags, unsigned *hash)
1326  {
1327  	int i, t, space;
1328  	unsigned sum;
1329  
1330  	sum = 1;
1331  	space = 0;
1332  	for (i = 0;;) {
1333  		switch (t = getc(f)) {
1334  		case '\0':
1335  			if ((flags & D_FORCEASCII) == 0)
1336  				return (RH_BINARY);
1337  			goto hashchar;
1338  		case '\r':
1339  			if (flags & D_STRIPCR) {
1340  				t = getc(f);
1341  				if (t == '\n')
1342  					break;
1343  				ungetc(t, f);
1344  			}
1345  			/* FALLTHROUGH */
1346  		case '\t':
1347  		case '\v':
1348  		case '\f':
1349  		case ' ':
1350  			if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) {
1351  				space++;
1352  				continue;
1353  			}
1354  			/* FALLTHROUGH */
1355  		default:
1356  		hashchar:
1357  			if (space && (flags & D_IGNOREBLANKS) == 0) {
1358  				i++;
1359  				space = 0;
1360  			}
1361  			sum = sum * 127 + chrtran(t);
1362  			i++;
1363  			continue;
1364  		case EOF:
1365  			if (i == 0)
1366  				return (RH_EOF);
1367  			/* FALLTHROUGH */
1368  		case '\n':
1369  			break;
1370  		}
1371  		break;
1372  	}
1373  	*hash = sum;
1374  	return (RH_OK);
1375  }
1376  
1377  static int
1378  asciifile(FILE *f)
1379  {
1380  	unsigned char buf[BUFSIZ];
1381  	size_t cnt;
1382  
1383  	if (f == NULL)
1384  		return (1);
1385  
1386  	rewind(f);
1387  	cnt = fread(buf, 1, sizeof(buf), f);
1388  	return (memchr(buf, '\0', cnt) == NULL);
1389  }
1390  
1391  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0)
1392  
1393  static char *
1394  match_function(const long *f, int pos, FILE *fp)
1395  {
1396  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1397  	size_t nc;
1398  	int last = lastline;
1399  	const char *state = NULL;
1400  
1401  	lastline = pos;
1402  	for (; pos > last; pos--) {
1403  		fseek(fp, f[pos - 1], SEEK_SET);
1404  		nc = f[pos] - f[pos - 1];
1405  		if (nc >= sizeof(buf))
1406  			nc = sizeof(buf) - 1;
1407  		nc = fread(buf, 1, nc, fp);
1408  		if (nc == 0)
1409  			continue;
1410  		buf[nc] = '\0';
1411  		buf[strcspn(buf, "\n")] = '\0';
1412  		if (most_recent_pat != NULL) {
1413  			int ret = regexec(&most_recent_re, buf, 0, NULL, 0);
1414  
1415  			if (ret != 0)
1416  				continue;
1417  			strlcpy(lastbuf, buf, sizeof(lastbuf));
1418  			lastmatchline = pos;
1419  			return (lastbuf);
1420  		} else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'
1421  			|| buf[0] == '-' || buf[0] == '+') {
1422  			if (begins_with(buf, "private:")) {
1423  				if (!state)
1424  					state = " (private)";
1425  			} else if (begins_with(buf, "protected:")) {
1426  				if (!state)
1427  					state = " (protected)";
1428  			} else if (begins_with(buf, "public:")) {
1429  				if (!state)
1430  					state = " (public)";
1431  			} else {
1432  				strlcpy(lastbuf, buf, sizeof(lastbuf));
1433  				if (state)
1434  					strlcat(lastbuf, state, sizeof(lastbuf));
1435  				lastmatchline = pos;
1436  				return (lastbuf);
1437  			}
1438  		}
1439  	}
1440  	return (lastmatchline > 0 ? lastbuf : NULL);
1441  }
1442  
1443  /* dump accumulated "context" diff changes */
1444  static void
1445  dump_context_vec(FILE *f1, FILE *f2, int flags)
1446  {
1447  	struct context_vec *cvp = context_vec_start;
1448  	int lowa, upb, lowc, upd, do_output;
1449  	int a, b, c, d;
1450  	char ch, *f;
1451  
1452  	if (context_vec_start > context_vec_ptr)
1453  		return;
1454  
1455  	b = d = 0;		/* gcc */
1456  	lowa = MAX(1, cvp->a - diff_context);
1457  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1458  	lowc = MAX(1, cvp->c - diff_context);
1459  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1460  
1461  	printf("***************");
1462  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1463  		f = match_function(ixold, cvp->a - 1, f1);
1464  		if (f != NULL)
1465  			printf(" %s", f);
1466  	}
1467  	printf("\n*** ");
1468  	range(lowa, upb, ",");
1469  	printf(" ****\n");
1470  
1471  	/*
1472  	 * Output changes to the "old" file.  The first loop suppresses
1473  	 * output if there were no changes to the "old" file (we'll see
1474  	 * the "old" lines as context in the "new" list).
1475  	 */
1476  	do_output = 0;
1477  	for (; cvp <= context_vec_ptr; cvp++)
1478  		if (cvp->a <= cvp->b) {
1479  			cvp = context_vec_start;
1480  			do_output++;
1481  			break;
1482  		}
1483  	if (do_output) {
1484  		while (cvp <= context_vec_ptr) {
1485  			a = cvp->a;
1486  			b = cvp->b;
1487  			c = cvp->c;
1488  			d = cvp->d;
1489  
1490  			if (a <= b && c <= d)
1491  				ch = 'c';
1492  			else
1493  				ch = (a <= b) ? 'd' : 'a';
1494  
1495  			if (ch == 'a')
1496  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1497  			else {
1498  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1499  				fetch(ixold, a, b, f1,
1500  				    ch == 'c' ? '!' : '-', 0, flags);
1501  			}
1502  			lowa = b + 1;
1503  			cvp++;
1504  		}
1505  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1506  	}
1507  	/* output changes to the "new" file */
1508  	printf("--- ");
1509  	range(lowc, upd, ",");
1510  	printf(" ----\n");
1511  
1512  	do_output = 0;
1513  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1514  		if (cvp->c <= cvp->d) {
1515  			cvp = context_vec_start;
1516  			do_output++;
1517  			break;
1518  		}
1519  	if (do_output) {
1520  		while (cvp <= context_vec_ptr) {
1521  			a = cvp->a;
1522  			b = cvp->b;
1523  			c = cvp->c;
1524  			d = cvp->d;
1525  
1526  			if (a <= b && c <= d)
1527  				ch = 'c';
1528  			else
1529  				ch = (a <= b) ? 'd' : 'a';
1530  
1531  			if (ch == 'd')
1532  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1533  			else {
1534  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1535  				fetch(ixnew, c, d, f2,
1536  				    ch == 'c' ? '!' : '+', 0, flags);
1537  			}
1538  			lowc = d + 1;
1539  			cvp++;
1540  		}
1541  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1542  	}
1543  	context_vec_ptr = context_vec_start - 1;
1544  }
1545  
1546  /* dump accumulated "unified" diff changes */
1547  static void
1548  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1549  {
1550  	struct context_vec *cvp = context_vec_start;
1551  	int lowa, upb, lowc, upd;
1552  	int a, b, c, d;
1553  	char ch, *f;
1554  
1555  	if (context_vec_start > context_vec_ptr)
1556  		return;
1557  
1558  	b = d = 0;		/* gcc */
1559  	lowa = MAX(1, cvp->a - diff_context);
1560  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1561  	lowc = MAX(1, cvp->c - diff_context);
1562  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1563  
1564  	printf("@@ -");
1565  	uni_range(lowa, upb);
1566  	printf(" +");
1567  	uni_range(lowc, upd);
1568  	printf(" @@");
1569  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1570  		f = match_function(ixold, cvp->a - 1, f1);
1571  		if (f != NULL)
1572  			printf(" %s", f);
1573  	}
1574  	printf("\n");
1575  
1576  	/*
1577  	 * Output changes in "unified" diff format--the old and new lines
1578  	 * are printed together.
1579  	 */
1580  	for (; cvp <= context_vec_ptr; cvp++) {
1581  		a = cvp->a;
1582  		b = cvp->b;
1583  		c = cvp->c;
1584  		d = cvp->d;
1585  
1586  		/*
1587  		 * c: both new and old changes
1588  		 * d: only changes in the old file
1589  		 * a: only changes in the new file
1590  		 */
1591  		if (a <= b && c <= d)
1592  			ch = 'c';
1593  		else
1594  			ch = (a <= b) ? 'd' : 'a';
1595  
1596  		switch (ch) {
1597  		case 'c':
1598  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1599  			fetch(ixold, a, b, f1, '-', 0, flags);
1600  			fetch(ixnew, c, d, f2, '+', 0, flags);
1601  			break;
1602  		case 'd':
1603  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1604  			fetch(ixold, a, b, f1, '-', 0, flags);
1605  			break;
1606  		case 'a':
1607  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1608  			fetch(ixnew, c, d, f2, '+', 0, flags);
1609  			break;
1610  		}
1611  		lowa = b + 1;
1612  		lowc = d + 1;
1613  	}
1614  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1615  
1616  	context_vec_ptr = context_vec_start - 1;
1617  }
1618  
1619  static void
1620  print_header(const char *file1, const char *file2)
1621  {
1622  	const char *time_format;
1623  	char buf[256];
1624  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1625  	int nsec1 = stb1.st_mtim.tv_nsec;
1626  	int nsec2 = stb2.st_mtim.tv_nsec;
1627  
1628  	time_format = "%Y-%m-%d %H:%M:%S";
1629  
1630  	if (cflag)
1631  		time_format = "%c";
1632  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1633  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1634  	if (label[0] != NULL)
1635  		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1636  		    label[0]);
1637  	else {
1638  		strftime(buf, sizeof(buf), time_format, tm_ptr1);
1639  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1640  		    file1, buf);
1641  		if (!cflag) {
1642  			strftime(buf, sizeof(buf), "%z", tm_ptr1);
1643  			printf(".%.9d %s", nsec1, buf);
1644  		}
1645  		printf("\n");
1646  	}
1647  	if (label[1] != NULL)
1648  		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1649  		    label[1]);
1650  	else {
1651  		strftime(buf, sizeof(buf), time_format, tm_ptr2);
1652  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1653  		    file2, buf);
1654  		if (!cflag) {
1655  			strftime(buf, sizeof(buf), "%z", tm_ptr2);
1656  			printf(".%.9d %s", nsec2, buf);
1657  		}
1658  		printf("\n");
1659  	}
1660  }
1661  
1662  /*
1663   * Prints n number of space characters either by using tab
1664   * or single space characters.
1665   * nc is the preceding number of characters
1666   */
1667  static void
1668  print_space(int nc, int n, int flags) {
1669  	int i, col;
1670  
1671  	col = n;
1672  	if ((flags & D_EXPANDTABS) == 0) {
1673  		/* first tabstop may be closer than tabsize */
1674  		i = tabsize - (nc % tabsize);
1675  		while (col >= tabsize) {
1676  			printf("\t");
1677  			col -= i;
1678  			i = tabsize;
1679  		}
1680  	}
1681  	printf("%*s", col, "");
1682  }
1683