xref: /freebsd/usr.bin/diff/diffreg.c (revision ddd08375c85576b49fb9a34968ba2c2f4f8d56cf)
1  /*	$OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $	*/
2  
3  /*-
4   * SPDX-License-Identifier: BSD-4-Clause
5   *
6   * Copyright (C) Caldera International Inc.  2001-2002.
7   * All rights reserved.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code and documentation must retain the above
13   *    copyright notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. All advertising materials mentioning features or use of this software
18   *    must display the following acknowledgement:
19   *	This product includes software developed or owned by Caldera
20   *	International, Inc.
21   * 4. Neither the name of Caldera International, Inc. nor the names of other
22   *    contributors may be used to endorse or promote products derived from
23   *    this software without specific prior written permission.
24   *
25   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   * POSSIBILITY OF SUCH DAMAGE.
37   */
38  /*-
39   * Copyright (c) 1991, 1993
40   *	The Regents of the University of California.  All rights reserved.
41   *
42   * Redistribution and use in source and binary forms, with or without
43   * modification, are permitted provided that the following conditions
44   * are met:
45   * 1. Redistributions of source code must retain the above copyright
46   *    notice, this list of conditions and the following disclaimer.
47   * 2. Redistributions in binary form must reproduce the above copyright
48   *    notice, this list of conditions and the following disclaimer in the
49   *    documentation and/or other materials provided with the distribution.
50   * 3. Neither the name of the University nor the names of its contributors
51   *    may be used to endorse or promote products derived from this software
52   *    without specific prior written permission.
53   *
54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   *
66   *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
67   */
68  
69  #include <sys/cdefs.h>
70  #include <sys/capsicum.h>
71  #include <sys/stat.h>
72  
73  #include <capsicum_helpers.h>
74  #include <ctype.h>
75  #include <err.h>
76  #include <errno.h>
77  #include <fcntl.h>
78  #include <math.h>
79  #include <paths.h>
80  #include <regex.h>
81  #include <stdbool.h>
82  #include <stddef.h>
83  #include <stdint.h>
84  #include <stdio.h>
85  #include <stdlib.h>
86  #include <string.h>
87  
88  #include "pr.h"
89  #include "diff.h"
90  #include "xmalloc.h"
91  
92  /*
93   * diff - compare two files.
94   */
95  
96  /*
97   *	Uses an algorithm due to Harold Stone, which finds a pair of longest
98   *	identical subsequences in the two files.
99   *
100   *	The major goal is to generate the match vector J. J[i] is the index of
101   *	the line in file1 corresponding to line i file0. J[i] = 0 if there is no
102   *	such line in file1.
103   *
104   *	Lines are hashed so as to work in core. All potential matches are
105   *	located by sorting the lines of each file on the hash (called
106   *	``value''). In particular, this collects the equivalence classes in
107   *	file1 together. Subroutine equiv replaces the value of each line in
108   *	file0 by the index of the first element of its matching equivalence in
109   *	(the reordered) file1. To save space equiv squeezes file1 into a single
110   *	array member in which the equivalence classes are simply concatenated,
111   *	except that their first members are flagged by changing sign.
112   *
113   *	Next the indices that point into member are unsorted into array class
114   *	according to the original order of file0.
115   *
116   *	The cleverness lies in routine stone. This marches through the lines of
117   *	file0, developing a vector klist of "k-candidates". At step i
118   *	a k-candidate is a matched pair of lines x,y (x in file0 y in file1)
119   *	such that there is a common subsequence of length k between the first
120   *	i lines of file0 and the first y lines of file1, but there is no such
121   *	subsequence for any smaller y. x is the earliest possible mate to y that
122   *	occurs in such a subsequence.
123   *
124   *	Whenever any of the members of the equivalence class of lines in file1
125   *	matable to a line in file0 has serial number less than the y of some
126   *	k-candidate, that k-candidate with the smallest such y is replaced. The
127   *	new k-candidate is chained (via pred) to the current k-1 candidate so
128   *	that the actual subsequence can be recovered. When a member has serial
129   *	number greater that the y of all k-candidates, the klist is extended. At
130   *	the end, the longest subsequence is pulled out and placed in the array J
131   *	by unravel.
132   *
133   *	With J in hand, the matches there recorded are check'ed against reality
134   *	to assure that no spurious matches have crept in due to hashing. If they
135   *	have, they are broken, and "jackpot" is recorded -- a harmless matter
136   *	except that a true match for a spuriously mated line may now be
137   *	unnecessarily reported as a change.
138   *
139   *	Much of the complexity of the program comes simply from trying to
140   *	minimize core utilization and maximize the range of doable problems by
141   *	dynamically allocating what is needed and reusing what is not. The core
142   *	requirements for problems larger than somewhat are (in words)
143   *	2*length(file0) + length(file1) + 3*(number of k-candidates installed),
144   *	typically about 6n words for files of length n.
145   */
146  
147  struct cand {
148  	int	x;
149  	int	y;
150  	int	pred;
151  };
152  
153  static struct line {
154  	int	serial;
155  	int	value;
156  } *file[2];
157  
158  /*
159   * The following struct is used to record change information when
160   * doing a "context" or "unified" diff.  (see routine "change" to
161   * understand the highly mnemonic field names)
162   */
163  struct context_vec {
164  	int	a;		/* start line in old file */
165  	int	b;		/* end line in old file */
166  	int	c;		/* start line in new file */
167  	int	d;		/* end line in new file */
168  };
169  
170  enum readhash { RH_BINARY, RH_OK, RH_EOF };
171  
172  #define MIN_PAD		1
173  static FILE	*opentemp(const char *);
174  static void	 output(char *, FILE *, char *, FILE *, int);
175  static void	 check(FILE *, FILE *, int);
176  static void	 range(int, int, const char *);
177  static void	 uni_range(int, int);
178  static void	 dump_context_vec(FILE *, FILE *, int);
179  static void	 dump_unified_vec(FILE *, FILE *, int);
180  static bool	 prepare(int, FILE *, size_t, int);
181  static void	 prune(void);
182  static void	 equiv(struct line *, int, struct line *, int, int *);
183  static void	 unravel(int);
184  static void	 unsort(struct line *, int, int *);
185  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
186  static void	 sort(struct line *, int);
187  static void	 print_header(const char *, const char *);
188  static void	 print_space(int, int, int);
189  static bool	 ignoreline_pattern(char *);
190  static bool	 ignoreline(char *, bool);
191  static int	 asciifile(FILE *);
192  static int	 fetch(long *, int, int, FILE *, int, int, int);
193  static int	 newcand(int, int, int);
194  static int	 search(int *, int, int);
195  static int	 skipline(FILE *);
196  static int	 stone(int *, int, int *, int *, int);
197  static enum readhash readhash(FILE *, int, unsigned *);
198  static int	 files_differ(FILE *, FILE *, int);
199  static char	*match_function(const long *, int, FILE *);
200  static char	*preadline(int, size_t, off_t);
201  
202  static int	 *J;			/* will be overlaid on class */
203  static int	 *class;		/* will be overlaid on file[0] */
204  static int	 *klist;		/* will be overlaid on file[0] after class */
205  static int	 *member;		/* will be overlaid on file[1] */
206  static int	 clen;
207  static int	 inifdef;		/* whether or not we are in a #ifdef block */
208  static int	 len[2];
209  static int	 pref, suff;	/* length of prefix and suffix */
210  static int	 slen[2];
211  static int	 anychange;
212  static int	 hw, padding;	/* half width and padding */
213  static int	 edoffset;
214  static long	*ixnew;		/* will be overlaid on file[1] */
215  static long	*ixold;		/* will be overlaid on klist */
216  static struct cand *clist;	/* merely a free storage pot for candidates */
217  static int	 clistlen;		/* the length of clist */
218  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219  static int	(*chrtran)(int);	/* translation table for case-folding */
220  static struct context_vec *context_vec_start;
221  static struct context_vec *context_vec_end;
222  static struct context_vec *context_vec_ptr;
223  
224  #define FUNCTION_CONTEXT_SIZE	55
225  static char lastbuf[FUNCTION_CONTEXT_SIZE];
226  static int lastline;
227  static int lastmatchline;
228  
229  static int
230  clow2low(int c)
231  {
232  
233  	return (c);
234  }
235  
236  static int
237  cup2low(int c)
238  {
239  
240  	return (tolower(c));
241  }
242  
243  int
244  diffreg(char *file1, char *file2, int flags, int capsicum)
245  {
246  	FILE *f1, *f2;
247  	int i, rval;
248  	struct pr *pr = NULL;
249  	cap_rights_t rights_ro;
250  
251  	f1 = f2 = NULL;
252  	rval = D_SAME;
253  	anychange = 0;
254  	lastline = 0;
255  	lastmatchline = 0;
256  
257  	 /*
258  	  * hw excludes padding and make sure when -t is not used,
259  	  * the second column always starts from the closest tab stop
260  	  */
261  	if (diff_format == D_SIDEBYSIDE) {
262  		hw = width >> 1;
263  		padding = tabsize - (hw % tabsize);
264  		if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0))
265  			padding = MIN_PAD;
266  
267  		hw = (width >> 1) -
268  		    ((padding == MIN_PAD) ? (padding << 1) : padding) - 1;
269  	}
270  
271  
272  	if (flags & D_IGNORECASE)
273  		chrtran = cup2low;
274  	else
275  		chrtran = clow2low;
276  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
277  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
278  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
279  		goto closem;
280  
281  	if (flags & D_EMPTY1)
282  		f1 = fopen(_PATH_DEVNULL, "r");
283  	else {
284  		if (!S_ISREG(stb1.st_mode)) {
285  			if ((f1 = opentemp(file1)) == NULL ||
286  			    fstat(fileno(f1), &stb1) == -1) {
287  				warn("%s", file1);
288  				rval = D_ERROR;
289  				status |= 2;
290  				goto closem;
291  			}
292  		} else if (strcmp(file1, "-") == 0)
293  			f1 = stdin;
294  		else
295  			f1 = fopen(file1, "r");
296  	}
297  	if (f1 == NULL) {
298  		warn("%s", file1);
299  		rval = D_ERROR;
300  		status |= 2;
301  		goto closem;
302  	}
303  
304  	if (flags & D_EMPTY2)
305  		f2 = fopen(_PATH_DEVNULL, "r");
306  	else {
307  		if (!S_ISREG(stb2.st_mode)) {
308  			if ((f2 = opentemp(file2)) == NULL ||
309  			    fstat(fileno(f2), &stb2) == -1) {
310  				warn("%s", file2);
311  				rval = D_ERROR;
312  				status |= 2;
313  				goto closem;
314  			}
315  		} else if (strcmp(file2, "-") == 0)
316  			f2 = stdin;
317  		else
318  			f2 = fopen(file2, "r");
319  	}
320  	if (f2 == NULL) {
321  		warn("%s", file2);
322  		rval = D_ERROR;
323  		status |= 2;
324  		goto closem;
325  	}
326  
327  	if (lflag)
328  		pr = start_pr(file1, file2);
329  
330  	if (capsicum) {
331  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
332  		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
333  			err(2, "unable to limit rights on: %s", file1);
334  		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
335  			err(2, "unable to limit rights on: %s", file2);
336  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
337  			/* stdin has already been limited */
338  			if (caph_limit_stderr() == -1)
339  				err(2, "unable to limit stderr");
340  			if (caph_limit_stdout() == -1)
341  				err(2, "unable to limit stdout");
342  		} else if (caph_limit_stdio() == -1)
343  				err(2, "unable to limit stdio");
344  
345  		caph_cache_catpages();
346  		caph_cache_tzdata();
347  		if (caph_enter() < 0)
348  			err(2, "unable to enter capability mode");
349  	}
350  
351  	switch (files_differ(f1, f2, flags)) {
352  	case 0:
353  		goto closem;
354  	case 1:
355  		break;
356  	default:
357  		/* error */
358  		rval = D_ERROR;
359  		status |= 2;
360  		goto closem;
361  	}
362  
363  	if (diff_format == D_BRIEF && ignore_pats == NULL &&
364  	    (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) == 0)
365  	{
366  		rval = D_DIFFER;
367  		status |= 1;
368  		goto closem;
369  	}
370  	if ((flags & D_FORCEASCII) != 0) {
371  		(void)prepare(0, f1, stb1.st_size, flags);
372  		(void)prepare(1, f2, stb2.st_size, flags);
373  	} else if (!asciifile(f1) || !asciifile(f2) ||
374  		    !prepare(0, f1, stb1.st_size, flags) ||
375  		    !prepare(1, f2, stb2.st_size, flags)) {
376  		rval = D_BINARY;
377  		status |= 1;
378  		goto closem;
379  	}
380  
381  	prune();
382  	sort(sfile[0], slen[0]);
383  	sort(sfile[1], slen[1]);
384  
385  	member = (int *)file[1];
386  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
387  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
388  
389  	class = (int *)file[0];
390  	unsort(sfile[0], slen[0], class);
391  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
392  
393  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
394  	clen = 0;
395  	clistlen = 100;
396  	clist = xcalloc(clistlen, sizeof(*clist));
397  	i = stone(class, slen[0], member, klist, flags);
398  	free(member);
399  	free(class);
400  
401  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
402  	unravel(klist[i]);
403  	free(clist);
404  	free(klist);
405  
406  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
407  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
408  	check(f1, f2, flags);
409  	output(file1, f1, file2, f2, flags);
410  
411  closem:
412  	if (pr != NULL)
413  		stop_pr(pr);
414  	if (anychange) {
415  		status |= 1;
416  		if (rval == D_SAME)
417  			rval = D_DIFFER;
418  	}
419  	if (f1 != NULL)
420  		fclose(f1);
421  	if (f2 != NULL)
422  		fclose(f2);
423  
424  	return (rval);
425  }
426  
427  /*
428   * Check to see if the given files differ.
429   * Returns 0 if they are the same, 1 if different, and -1 on error.
430   * XXX - could use code from cmp(1) [faster]
431   */
432  static int
433  files_differ(FILE *f1, FILE *f2, int flags)
434  {
435  	char buf1[BUFSIZ], buf2[BUFSIZ];
436  	size_t i, j;
437  
438  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
439  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
440  		return (1);
441  
442  	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
443  		return (0);
444  
445  	for (;;) {
446  		i = fread(buf1, 1, sizeof(buf1), f1);
447  		j = fread(buf2, 1, sizeof(buf2), f2);
448  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
449  			return (-1);
450  		if (i != j)
451  			return (1);
452  		if (i == 0)
453  			return (0);
454  		if (memcmp(buf1, buf2, i) != 0)
455  			return (1);
456  	}
457  }
458  
459  static FILE *
460  opentemp(const char *f)
461  {
462  	char buf[BUFSIZ], tempfile[PATH_MAX];
463  	ssize_t nread;
464  	int ifd, ofd;
465  
466  	if (strcmp(f, "-") == 0)
467  		ifd = STDIN_FILENO;
468  	else if ((ifd = open(f, O_RDONLY, 0644)) == -1)
469  		return (NULL);
470  
471  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
472  
473  	if ((ofd = mkstemp(tempfile)) == -1) {
474  		close(ifd);
475  		return (NULL);
476  	}
477  	unlink(tempfile);
478  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
479  		if (write(ofd, buf, nread) != nread) {
480  			close(ifd);
481  			close(ofd);
482  			return (NULL);
483  		}
484  	}
485  	close(ifd);
486  	lseek(ofd, (off_t)0, SEEK_SET);
487  	return (fdopen(ofd, "r"));
488  }
489  
490  static bool
491  prepare(int i, FILE *fd, size_t filesize, int flags)
492  {
493  	struct line *p;
494  	unsigned h;
495  	size_t sz, j = 0;
496  	enum readhash r;
497  
498  	rewind(fd);
499  
500  	sz = MIN(filesize, SIZE_MAX) / 25;
501  	if (sz < 100)
502  		sz = 100;
503  
504  	p = xcalloc(sz + 3, sizeof(*p));
505  	while ((r = readhash(fd, flags, &h)) != RH_EOF)
506  		switch (r) {
507  		case RH_EOF: /* otherwise clang complains */
508  		case RH_BINARY:
509  			return (false);
510  		case RH_OK:
511  			if (j == sz) {
512  				sz = sz * 3 / 2;
513  				p = xreallocarray(p, sz + 3, sizeof(*p));
514  			}
515  			p[++j].value = h;
516  		}
517  
518  	len[i] = j;
519  	file[i] = p;
520  
521  	return (true);
522  }
523  
524  static void
525  prune(void)
526  {
527  	int i, j;
528  
529  	for (pref = 0; pref < len[0] && pref < len[1] &&
530  	    file[0][pref + 1].value == file[1][pref + 1].value;
531  	    pref++)
532  		;
533  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
534  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
535  	    suff++)
536  		;
537  	for (j = 0; j < 2; j++) {
538  		sfile[j] = file[j] + pref;
539  		slen[j] = len[j] - pref - suff;
540  		for (i = 0; i <= slen[j]; i++)
541  			sfile[j][i].serial = i;
542  	}
543  }
544  
545  static void
546  equiv(struct line *a, int n, struct line *b, int m, int *c)
547  {
548  	int i, j;
549  
550  	i = j = 1;
551  	while (i <= n && j <= m) {
552  		if (a[i].value < b[j].value)
553  			a[i++].value = 0;
554  		else if (a[i].value == b[j].value)
555  			a[i++].value = j;
556  		else
557  			j++;
558  	}
559  	while (i <= n)
560  		a[i++].value = 0;
561  	b[m + 1].value = 0;
562  	j = 0;
563  	while (++j <= m) {
564  		c[j] = -b[j].serial;
565  		while (b[j + 1].value == b[j].value) {
566  			j++;
567  			c[j] = b[j].serial;
568  		}
569  	}
570  	c[j] = -1;
571  }
572  
573  static int
574  stone(int *a, int n, int *b, int *c, int flags)
575  {
576  	int i, k, y, j, l;
577  	int oldc, tc, oldl, sq;
578  	unsigned numtries, bound;
579  
580  	if (flags & D_MINIMAL)
581  		bound = UINT_MAX;
582  	else {
583  		sq = sqrt(n);
584  		bound = MAX(256, sq);
585  	}
586  
587  	k = 0;
588  	c[0] = newcand(0, 0, 0);
589  	for (i = 1; i <= n; i++) {
590  		j = a[i];
591  		if (j == 0)
592  			continue;
593  		y = -b[j];
594  		oldl = 0;
595  		oldc = c[0];
596  		numtries = 0;
597  		do {
598  			if (y <= clist[oldc].y)
599  				continue;
600  			l = search(c, k, y);
601  			if (l != oldl + 1)
602  				oldc = c[l - 1];
603  			if (l <= k) {
604  				if (clist[c[l]].y <= y)
605  					continue;
606  				tc = c[l];
607  				c[l] = newcand(i, y, oldc);
608  				oldc = tc;
609  				oldl = l;
610  				numtries++;
611  			} else {
612  				c[l] = newcand(i, y, oldc);
613  				k++;
614  				break;
615  			}
616  		} while ((y = b[++j]) > 0 && numtries < bound);
617  	}
618  	return (k);
619  }
620  
621  static int
622  newcand(int x, int y, int pred)
623  {
624  	struct cand *q;
625  
626  	if (clen == clistlen) {
627  		clistlen = clistlen * 11 / 10;
628  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
629  	}
630  	q = clist + clen;
631  	q->x = x;
632  	q->y = y;
633  	q->pred = pred;
634  	return (clen++);
635  }
636  
637  static int
638  search(int *c, int k, int y)
639  {
640  	int i, j, l, t;
641  
642  	if (clist[c[k]].y < y)	/* quick look for typical case */
643  		return (k + 1);
644  	i = 0;
645  	j = k + 1;
646  	for (;;) {
647  		l = (i + j) / 2;
648  		if (l <= i)
649  			break;
650  		t = clist[c[l]].y;
651  		if (t > y)
652  			j = l;
653  		else if (t < y)
654  			i = l;
655  		else
656  			return (l);
657  	}
658  	return (l + 1);
659  }
660  
661  static void
662  unravel(int p)
663  {
664  	struct cand *q;
665  	int i;
666  
667  	for (i = 0; i <= len[0]; i++)
668  		J[i] = i <= pref ? i :
669  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
670  	for (q = clist + p; q->y != 0; q = clist + q->pred)
671  		J[q->x + pref] = q->y + pref;
672  }
673  
674  /*
675   * Check does double duty:
676   *  1. ferret out any fortuitous correspondences due to confounding by
677   *     hashing (which result in "jackpot")
678   *  2. collect random access indexes to the two files
679   */
680  static void
681  check(FILE *f1, FILE *f2, int flags)
682  {
683  	int i, j, /* jackpot, */ c, d;
684  	long ctold, ctnew;
685  
686  	rewind(f1);
687  	rewind(f2);
688  	j = 1;
689  	ixold[0] = ixnew[0] = 0;
690  	/* jackpot = 0; */
691  	ctold = ctnew = 0;
692  	for (i = 1; i <= len[0]; i++) {
693  		if (J[i] == 0) {
694  			ixold[i] = ctold += skipline(f1);
695  			continue;
696  		}
697  		while (j < J[i]) {
698  			ixnew[j] = ctnew += skipline(f2);
699  			j++;
700  		}
701  		if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) {
702  			for (;;) {
703  				c = getc(f1);
704  				d = getc(f2);
705  				/*
706  				 * GNU diff ignores a missing newline
707  				 * in one file for -b or -w.
708  				 */
709  				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
710  					if (c == EOF && d == '\n') {
711  						ctnew++;
712  						break;
713  					} else if (c == '\n' && d == EOF) {
714  						ctold++;
715  						break;
716  					}
717  				}
718  				ctold++;
719  				ctnew++;
720  				if (flags & D_STRIPCR && (c == '\r' || d == '\r')) {
721  					if (c == '\r') {
722  						if ((c = getc(f1)) == '\n') {
723  							ctold++;
724  						} else {
725  							ungetc(c, f1);
726  						}
727  					}
728  					if (d == '\r') {
729  						if ((d = getc(f2)) == '\n') {
730  							ctnew++;
731  						} else {
732  							ungetc(d, f2);
733  						}
734  					}
735  					break;
736  				}
737  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
738  				    isspace(d)) {
739  					do {
740  						if (c == '\n')
741  							break;
742  						ctold++;
743  					} while (isspace(c = getc(f1)));
744  					do {
745  						if (d == '\n')
746  							break;
747  						ctnew++;
748  					} while (isspace(d = getc(f2)));
749  				} else if (flags & D_IGNOREBLANKS) {
750  					while (isspace(c) && c != '\n') {
751  						c = getc(f1);
752  						ctold++;
753  					}
754  					while (isspace(d) && d != '\n') {
755  						d = getc(f2);
756  						ctnew++;
757  					}
758  				}
759  				if (chrtran(c) != chrtran(d)) {
760  					/* jackpot++; */
761  					J[i] = 0;
762  					if (c != '\n' && c != EOF)
763  						ctold += skipline(f1);
764  					if (d != '\n' && c != EOF)
765  						ctnew += skipline(f2);
766  					break;
767  				}
768  				if (c == '\n' || c == EOF)
769  					break;
770  			}
771  		} else {
772  			for (;;) {
773  				ctold++;
774  				ctnew++;
775  				if ((c = getc(f1)) != (d = getc(f2))) {
776  					/* jackpot++; */
777  					J[i] = 0;
778  					if (c != '\n' && c != EOF)
779  						ctold += skipline(f1);
780  					if (d != '\n' && c != EOF)
781  						ctnew += skipline(f2);
782  					break;
783  				}
784  				if (c == '\n' || c == EOF)
785  					break;
786  			}
787  		}
788  		ixold[i] = ctold;
789  		ixnew[j] = ctnew;
790  		j++;
791  	}
792  	for (; j <= len[1]; j++) {
793  		ixnew[j] = ctnew += skipline(f2);
794  	}
795  	/*
796  	 * if (jackpot)
797  	 *	fprintf(stderr, "jackpot\n");
798  	 */
799  }
800  
801  /* shellsort CACM #201 */
802  static void
803  sort(struct line *a, int n)
804  {
805  	struct line *ai, *aim, w;
806  	int j, m = 0, k;
807  
808  	if (n == 0)
809  		return;
810  	for (j = 1; j <= n; j *= 2)
811  		m = 2 * j - 1;
812  	for (m /= 2; m != 0; m /= 2) {
813  		k = n - m;
814  		for (j = 1; j <= k; j++) {
815  			for (ai = &a[j]; ai > a; ai -= m) {
816  				aim = &ai[m];
817  				if (aim < ai)
818  					break;	/* wraparound */
819  				if (aim->value > ai[0].value ||
820  				    (aim->value == ai[0].value &&
821  					aim->serial > ai[0].serial))
822  					break;
823  				w.value = ai[0].value;
824  				ai[0].value = aim->value;
825  				aim->value = w.value;
826  				w.serial = ai[0].serial;
827  				ai[0].serial = aim->serial;
828  				aim->serial = w.serial;
829  			}
830  		}
831  	}
832  }
833  
834  static void
835  unsort(struct line *f, int l, int *b)
836  {
837  	int *a, i;
838  
839  	a = xcalloc(l + 1, sizeof(*a));
840  	for (i = 1; i <= l; i++)
841  		a[f[i].serial] = f[i].value;
842  	for (i = 1; i <= l; i++)
843  		b[i] = a[i];
844  	free(a);
845  }
846  
847  static int
848  skipline(FILE *f)
849  {
850  	int i, c;
851  
852  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853  		continue;
854  	return (i);
855  }
856  
857  static void
858  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
859  {
860  	int i, j, m, i0, i1, j0, j1, nc;
861  
862  	rewind(f1);
863  	rewind(f2);
864  	m = len[0];
865  	J[0] = 0;
866  	J[m + 1] = len[1] + 1;
867  	if (diff_format != D_EDIT) {
868  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
869  			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
870  				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
871  					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
872  					print_space(nc, (hw - nc) + (padding << 1) + 1, flags);
873  					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
874  					printf("\n");
875  				}
876  				i0++;
877  			}
878  			j0 = J[i0 - 1] + 1;
879  			i1 = i0 - 1;
880  			while (i1 < m && J[i1 + 1] == 0)
881  				i1++;
882  			j1 = J[i1 + 1] - 1;
883  			J[i1] = j1;
884  
885  			/*
886  			 * When using side-by-side, lines from both of the files are
887  			 * printed. The algorithm used by diff(1) identifies the ranges
888  			 * in which two files differ.
889  			 * See the change() function below.
890  			 * The for loop below consumes the shorter range, whereas one of
891  			 * the while loops deals with the longer one.
892  			 */
893  			if (diff_format == D_SIDEBYSIDE) {
894  				for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++)
895  					change(file1, f1, file2, f2, i, i, j, j, &flags);
896  
897  				while (i <= i1) {
898  					change(file1, f1, file2, f2, i, i, j + 1, j, &flags);
899  					i++;
900  				}
901  
902  				while (j <= j1) {
903  					change(file1, f1, file2, f2, i + 1, i, j, j, &flags);
904  					j++;
905  				}
906  			} else
907  				change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
908  		}
909  	} else {
910  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
911  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
912  				i0--;
913  			j0 = J[i0 + 1] - 1;
914  			i1 = i0 + 1;
915  			while (i1 > 1 && J[i1 - 1] == 0)
916  				i1--;
917  			j1 = J[i1 - 1] + 1;
918  			J[i1] = j1;
919  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
920  		}
921  	}
922  	if (m == 0)
923  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
924  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
925  		for (;;) {
926  #define	c i0
927  			if ((c = getc(f1)) == EOF)
928  				return;
929  			printf("%c", c);
930  		}
931  #undef c
932  	}
933  	if (anychange != 0) {
934  		if (diff_format == D_CONTEXT)
935  			dump_context_vec(f1, f2, flags);
936  		else if (diff_format == D_UNIFIED)
937  			dump_unified_vec(f1, f2, flags);
938  	}
939  }
940  
941  static void
942  range(int a, int b, const char *separator)
943  {
944  	printf("%d", a > b ? b : a);
945  	if (a < b)
946  		printf("%s%d", separator, b);
947  }
948  
949  static void
950  uni_range(int a, int b)
951  {
952  	if (a < b)
953  		printf("%d,%d", a, b - a + 1);
954  	else if (a == b)
955  		printf("%d", b);
956  	else
957  		printf("%d,0", b);
958  }
959  
960  static char *
961  preadline(int fd, size_t rlen, off_t off)
962  {
963  	char *line;
964  	ssize_t nr;
965  
966  	line = xmalloc(rlen + 1);
967  	if ((nr = pread(fd, line, rlen, off)) == -1)
968  		err(2, "preadline");
969  	if (nr > 0 && line[nr-1] == '\n')
970  		nr--;
971  	line[nr] = '\0';
972  	return (line);
973  }
974  
975  static bool
976  ignoreline_pattern(char *line)
977  {
978  	int ret;
979  
980  	ret = regexec(&ignore_re, line, 0, NULL, 0);
981  	return (ret == 0);	/* if it matched, it should be ignored. */
982  }
983  
984  static bool
985  ignoreline(char *line, bool skip_blanks)
986  {
987  
988  	if (skip_blanks && *line == '\0')
989  		return (true);
990  	if (ignore_pats != NULL && ignoreline_pattern(line))
991  		return (true);
992  	return (false);
993  }
994  
995  /*
996   * Indicate that there is a difference between lines a and b of the from file
997   * to get to lines c to d of the to file.  If a is greater then b then there
998   * are no lines in the from file involved and this means that there were
999   * lines appended (beginning at b).  If c is greater than d then there are
1000   * lines missing from the to file.
1001   */
1002  static void
1003  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1004      int *pflags)
1005  {
1006  	static size_t max_context = 64;
1007  	long curpos;
1008  	int i, nc;
1009  	const char *walk;
1010  	bool skip_blanks, ignore;
1011  
1012  	skip_blanks = (*pflags & D_SKIPBLANKLINES);
1013  restart:
1014  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1015  	    a > b && c > d)
1016  		return;
1017  	if (ignore_pats != NULL || skip_blanks) {
1018  		char *line;
1019  		/*
1020  		 * All lines in the change, insert, or delete must match an ignore
1021  		 * pattern for the change to be ignored.
1022  		 */
1023  		if (a <= b) {		/* Changes and deletes. */
1024  			for (i = a; i <= b; i++) {
1025  				line = preadline(fileno(f1),
1026  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1027  				ignore = ignoreline(line, skip_blanks);
1028  				free(line);
1029  				if (!ignore)
1030  					goto proceed;
1031  			}
1032  		}
1033  		if (a > b || c <= d) {	/* Changes and inserts. */
1034  			for (i = c; i <= d; i++) {
1035  				line = preadline(fileno(f2),
1036  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1037  				ignore = ignoreline(line, skip_blanks);
1038  				free(line);
1039  				if (!ignore)
1040  					goto proceed;
1041  			}
1042  		}
1043  		return;
1044  	}
1045  proceed:
1046  	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1047  		printf("%s %s %s\n", diffargs, file1, file2);
1048  		*pflags &= ~D_HEADER;
1049  	}
1050  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1051  		/*
1052  		 * Allocate change records as needed.
1053  		 */
1054  		if (context_vec_start == NULL ||
1055  		    context_vec_ptr == context_vec_end - 1) {
1056  			ptrdiff_t offset = -1;
1057  
1058  			if (context_vec_start != NULL)
1059  				offset = context_vec_ptr - context_vec_start;
1060  			max_context <<= 1;
1061  			context_vec_start = xreallocarray(context_vec_start,
1062  			    max_context, sizeof(*context_vec_start));
1063  			context_vec_end = context_vec_start + max_context;
1064  			context_vec_ptr = context_vec_start + offset;
1065  		}
1066  		if (anychange == 0) {
1067  			/*
1068  			 * Print the context/unidiff header first time through.
1069  			 */
1070  			print_header(file1, file2);
1071  			anychange = 1;
1072  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1073  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1074  			/*
1075  			 * If this change is more than 'diff_context' lines from the
1076  			 * previous change, dump the record and reset it.
1077  			 */
1078  			if (diff_format == D_CONTEXT)
1079  				dump_context_vec(f1, f2, *pflags);
1080  			else
1081  				dump_unified_vec(f1, f2, *pflags);
1082  		}
1083  		context_vec_ptr++;
1084  		context_vec_ptr->a = a;
1085  		context_vec_ptr->b = b;
1086  		context_vec_ptr->c = c;
1087  		context_vec_ptr->d = d;
1088  		return;
1089  	}
1090  	if (anychange == 0)
1091  		anychange = 1;
1092  	switch (diff_format) {
1093  	case D_BRIEF:
1094  		return;
1095  	case D_NORMAL:
1096  	case D_EDIT:
1097  		range(a, b, ",");
1098  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1099  		if (diff_format == D_NORMAL)
1100  			range(c, d, ",");
1101  		printf("\n");
1102  		break;
1103  	case D_REVERSE:
1104  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1105  		range(a, b, " ");
1106  		printf("\n");
1107  		break;
1108  	case D_NREVERSE:
1109  		if (a > b)
1110  			printf("a%d %d\n", b, d - c + 1);
1111  		else {
1112  			printf("d%d %d\n", a, b - a + 1);
1113  			if (!(c > d))
1114  				/* add changed lines */
1115  				printf("a%d %d\n", b, d - c + 1);
1116  		}
1117  		break;
1118  	}
1119  	if (diff_format == D_GFORMAT) {
1120  		curpos = ftell(f1);
1121  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1122  		nc = ixold[a > b ? b : a - 1] - curpos;
1123  		for (i = 0; i < nc; i++)
1124  			printf("%c", getc(f1));
1125  		for (walk = group_format; *walk != '\0'; walk++) {
1126  			if (*walk == '%') {
1127  				walk++;
1128  				switch (*walk) {
1129  				case '<':
1130  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1131  					break;
1132  				case '>':
1133  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1134  					break;
1135  				default:
1136  					printf("%%%c", *walk);
1137  					break;
1138  				}
1139  				continue;
1140  			}
1141  			printf("%c", *walk);
1142  		}
1143  	}
1144  	if (diff_format == D_SIDEBYSIDE) {
1145  		if (color && a > b)
1146  			printf("\033[%sm", add_code);
1147  		else if (color && c > d)
1148  			printf("\033[%sm", del_code);
1149  		if (a > b) {
1150  			print_space(0, hw + padding , *pflags);
1151  		} else {
1152  			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
1153  			print_space(nc, hw - nc + padding, *pflags);
1154  		}
1155  		if (color && a > b)
1156  			printf("\033[%sm", add_code);
1157  		else if (color && c > d)
1158  			printf("\033[%sm", del_code);
1159  		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
1160  		if (color && c > d)
1161  			printf("\033[m");
1162  		print_space(hw + padding + 1 , padding, *pflags);
1163  		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
1164  		printf("\n");
1165  	}
1166  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1167  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1168  		if (a <= b && c <= d && diff_format == D_NORMAL)
1169  			printf("---\n");
1170  	}
1171  	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
1172  		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1173  	if (edoffset != 0 && diff_format == D_EDIT) {
1174  		/*
1175  		 * A non-zero edoffset value for D_EDIT indicates that the last line
1176  		 * printed was a bare dot (".") that has been escaped as ".." to
1177  		 * prevent ed(1) from misinterpreting it.  We have to add a
1178  		 * substitute command to change this back and restart where we left
1179  		 * off.
1180  		 */
1181  		printf(".\n");
1182  		printf("%ds/.//\n", a + edoffset - 1);
1183  		b = a + edoffset - 1;
1184  		a = b + 1;
1185  		c += edoffset;
1186  		goto restart;
1187  	}
1188  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1189  		printf(".\n");
1190  	if (inifdef) {
1191  		printf("#endif /* %s */\n", ifdefname);
1192  		inifdef = 0;
1193  	}
1194  }
1195  
1196  static int
1197  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1198  {
1199  	int i, j, c, lastc, col, nc, newcol;
1200  
1201  	edoffset = 0;
1202  	nc = 0;
1203  	/*
1204  	 * When doing #ifdef's, copy down to current line
1205  	 * if this is the first file, so that stuff makes it to output.
1206  	 */
1207  	if ((diff_format == D_IFDEF) && oldfile) {
1208  		long curpos = ftell(lb);
1209  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1210  		nc = f[a > b ? b : a - 1] - curpos;
1211  		for (i = 0; i < nc; i++)
1212  			printf("%c", getc(lb));
1213  	}
1214  	if (a > b)
1215  		return (0);
1216  	if (diff_format == D_IFDEF) {
1217  		if (inifdef) {
1218  			printf("#else /* %s%s */\n",
1219  			    oldfile == 1 ? "!" : "", ifdefname);
1220  		} else {
1221  			if (oldfile)
1222  				printf("#ifndef %s\n", ifdefname);
1223  			else
1224  				printf("#ifdef %s\n", ifdefname);
1225  		}
1226  		inifdef = 1 + oldfile;
1227  	}
1228  	for (i = a; i <= b; i++) {
1229  		fseek(lb, f[i - 1], SEEK_SET);
1230  		nc = f[i] - f[i - 1];
1231  		if (diff_format == D_SIDEBYSIDE && hw < nc)
1232  			nc = hw;
1233  		if (diff_format != D_IFDEF && diff_format != D_GFORMAT &&
1234  		    ch != '\0') {
1235  			if (color && (ch == '>' || ch == '+'))
1236  				printf("\033[%sm", add_code);
1237  			else if (color && (ch == '<' || ch == '-'))
1238  				printf("\033[%sm", del_code);
1239  			printf("%c", ch);
1240  			if (Tflag && (diff_format == D_NORMAL ||
1241  			    diff_format == D_CONTEXT ||
1242  			    diff_format == D_UNIFIED))
1243  				printf("\t");
1244  			else if (diff_format != D_UNIFIED)
1245  				printf(" ");
1246  		}
1247  		col = j = 0;
1248  		lastc = '\0';
1249  		while (j < nc && (hw == 0 || col < hw)) {
1250  			c = getc(lb);
1251  			if (flags & D_STRIPCR && c == '\r') {
1252  				if ((c = getc(lb)) == '\n')
1253  					j++;
1254  				else {
1255  					ungetc(c, lb);
1256  					c = '\r';
1257  				}
1258  			}
1259  			if (c == EOF) {
1260  				if (diff_format == D_EDIT ||
1261  				    diff_format == D_REVERSE ||
1262  				    diff_format == D_NREVERSE)
1263  					warnx("No newline at end of file");
1264  				else
1265  					printf("\n\\ No newline at end of file\n");
1266  				return (col);
1267  			}
1268  			/*
1269  			 * when using --side-by-side, col needs to be increased
1270  			 * in any case to keep the columns aligned
1271  			 */
1272  			if (c == '\t') {
1273  				if (flags & D_EXPANDTABS) {
1274  					newcol = ((col / tabsize) + 1) * tabsize;
1275  					do {
1276  						printf(" ");
1277  					} while (++col < newcol && col < hw);
1278  				} else {
1279  					if (diff_format == D_SIDEBYSIDE) {
1280  						if ((col + tabsize) > hw) {
1281  							printf("%*s", hw - col, "");
1282  							col = hw;
1283  						} else {
1284  							printf("\t");
1285  							col += tabsize - 1;
1286  						}
1287  					} else {
1288  						printf("\t");
1289  						col++;
1290  					}
1291  				}
1292  			} else {
1293  				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
1294  				    lastc == '.') {
1295  					/*
1296  					 * Don't print a bare "." line since that will confuse
1297  					 * ed(1). Print ".." instead and set the, global variable
1298  					 * edoffset to an offset from which to restart. The
1299  					 * caller must check the value of edoffset
1300  					 */
1301  					printf(".\n");
1302  					edoffset = i - a + 1;
1303  					return (edoffset);
1304  				}
1305  				/* when side-by-side, do not print a newline */
1306  				if (diff_format != D_SIDEBYSIDE || c != '\n') {
1307  					if (color && c == '\n')
1308  						printf("\033[m%c", c);
1309  					else
1310  						printf("%c", c);
1311  					col++;
1312  				}
1313  			}
1314  
1315  			j++;
1316  			lastc = c;
1317  		}
1318  	}
1319  	if (color && diff_format == D_SIDEBYSIDE)
1320  		printf("\033[m");
1321  	return (col);
1322  }
1323  
1324  /*
1325   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1326   */
1327  static enum readhash
1328  readhash(FILE *f, int flags, unsigned *hash)
1329  {
1330  	int i, t, space;
1331  	unsigned sum;
1332  
1333  	sum = 1;
1334  	space = 0;
1335  	for (i = 0;;) {
1336  		switch (t = getc(f)) {
1337  		case '\0':
1338  			if ((flags & D_FORCEASCII) == 0)
1339  				return (RH_BINARY);
1340  			goto hashchar;
1341  		case '\r':
1342  			if (flags & D_STRIPCR) {
1343  				t = getc(f);
1344  				if (t == '\n')
1345  					break;
1346  				ungetc(t, f);
1347  			}
1348  			/* FALLTHROUGH */
1349  		case '\t':
1350  		case '\v':
1351  		case '\f':
1352  		case ' ':
1353  			if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) {
1354  				space++;
1355  				continue;
1356  			}
1357  			/* FALLTHROUGH */
1358  		default:
1359  		hashchar:
1360  			if (space && (flags & D_IGNOREBLANKS) == 0) {
1361  				i++;
1362  				space = 0;
1363  			}
1364  			sum = sum * 127 + chrtran(t);
1365  			i++;
1366  			continue;
1367  		case EOF:
1368  			if (i == 0)
1369  				return (RH_EOF);
1370  			/* FALLTHROUGH */
1371  		case '\n':
1372  			break;
1373  		}
1374  		break;
1375  	}
1376  	*hash = sum;
1377  	return (RH_OK);
1378  }
1379  
1380  static int
1381  asciifile(FILE *f)
1382  {
1383  	unsigned char buf[BUFSIZ];
1384  	size_t cnt;
1385  
1386  	if (f == NULL)
1387  		return (1);
1388  
1389  	rewind(f);
1390  	cnt = fread(buf, 1, sizeof(buf), f);
1391  	return (memchr(buf, '\0', cnt) == NULL);
1392  }
1393  
1394  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0)
1395  
1396  static char *
1397  match_function(const long *f, int pos, FILE *fp)
1398  {
1399  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1400  	size_t nc;
1401  	int last = lastline;
1402  	const char *state = NULL;
1403  
1404  	lastline = pos;
1405  	for (; pos > last; pos--) {
1406  		fseek(fp, f[pos - 1], SEEK_SET);
1407  		nc = f[pos] - f[pos - 1];
1408  		if (nc >= sizeof(buf))
1409  			nc = sizeof(buf) - 1;
1410  		nc = fread(buf, 1, nc, fp);
1411  		if (nc == 0)
1412  			continue;
1413  		buf[nc] = '\0';
1414  		buf[strcspn(buf, "\n")] = '\0';
1415  		if (most_recent_pat != NULL) {
1416  			int ret = regexec(&most_recent_re, buf, 0, NULL, 0);
1417  
1418  			if (ret != 0)
1419  				continue;
1420  			strlcpy(lastbuf, buf, sizeof(lastbuf));
1421  			lastmatchline = pos;
1422  			return (lastbuf);
1423  		} else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'
1424  			|| buf[0] == '-' || buf[0] == '+') {
1425  			if (begins_with(buf, "private:")) {
1426  				if (!state)
1427  					state = " (private)";
1428  			} else if (begins_with(buf, "protected:")) {
1429  				if (!state)
1430  					state = " (protected)";
1431  			} else if (begins_with(buf, "public:")) {
1432  				if (!state)
1433  					state = " (public)";
1434  			} else {
1435  				strlcpy(lastbuf, buf, sizeof(lastbuf));
1436  				if (state)
1437  					strlcat(lastbuf, state, sizeof(lastbuf));
1438  				lastmatchline = pos;
1439  				return (lastbuf);
1440  			}
1441  		}
1442  	}
1443  	return (lastmatchline > 0 ? lastbuf : NULL);
1444  }
1445  
1446  /* dump accumulated "context" diff changes */
1447  static void
1448  dump_context_vec(FILE *f1, FILE *f2, int flags)
1449  {
1450  	struct context_vec *cvp = context_vec_start;
1451  	int lowa, upb, lowc, upd, do_output;
1452  	int a, b, c, d;
1453  	char ch, *f;
1454  
1455  	if (context_vec_start > context_vec_ptr)
1456  		return;
1457  
1458  	b = d = 0;		/* gcc */
1459  	lowa = MAX(1, cvp->a - diff_context);
1460  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1461  	lowc = MAX(1, cvp->c - diff_context);
1462  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1463  
1464  	printf("***************");
1465  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1466  		f = match_function(ixold, cvp->a - 1, f1);
1467  		if (f != NULL)
1468  			printf(" %s", f);
1469  	}
1470  	printf("\n*** ");
1471  	range(lowa, upb, ",");
1472  	printf(" ****\n");
1473  
1474  	/*
1475  	 * Output changes to the "old" file.  The first loop suppresses
1476  	 * output if there were no changes to the "old" file (we'll see
1477  	 * the "old" lines as context in the "new" list).
1478  	 */
1479  	do_output = 0;
1480  	for (; cvp <= context_vec_ptr; cvp++)
1481  		if (cvp->a <= cvp->b) {
1482  			cvp = context_vec_start;
1483  			do_output++;
1484  			break;
1485  		}
1486  	if (do_output) {
1487  		while (cvp <= context_vec_ptr) {
1488  			a = cvp->a;
1489  			b = cvp->b;
1490  			c = cvp->c;
1491  			d = cvp->d;
1492  
1493  			if (a <= b && c <= d)
1494  				ch = 'c';
1495  			else
1496  				ch = (a <= b) ? 'd' : 'a';
1497  
1498  			if (ch == 'a')
1499  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1500  			else {
1501  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1502  				fetch(ixold, a, b, f1,
1503  				    ch == 'c' ? '!' : '-', 0, flags);
1504  			}
1505  			lowa = b + 1;
1506  			cvp++;
1507  		}
1508  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1509  	}
1510  	/* output changes to the "new" file */
1511  	printf("--- ");
1512  	range(lowc, upd, ",");
1513  	printf(" ----\n");
1514  
1515  	do_output = 0;
1516  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1517  		if (cvp->c <= cvp->d) {
1518  			cvp = context_vec_start;
1519  			do_output++;
1520  			break;
1521  		}
1522  	if (do_output) {
1523  		while (cvp <= context_vec_ptr) {
1524  			a = cvp->a;
1525  			b = cvp->b;
1526  			c = cvp->c;
1527  			d = cvp->d;
1528  
1529  			if (a <= b && c <= d)
1530  				ch = 'c';
1531  			else
1532  				ch = (a <= b) ? 'd' : 'a';
1533  
1534  			if (ch == 'd')
1535  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1536  			else {
1537  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1538  				fetch(ixnew, c, d, f2,
1539  				    ch == 'c' ? '!' : '+', 0, flags);
1540  			}
1541  			lowc = d + 1;
1542  			cvp++;
1543  		}
1544  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1545  	}
1546  	context_vec_ptr = context_vec_start - 1;
1547  }
1548  
1549  /* dump accumulated "unified" diff changes */
1550  static void
1551  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1552  {
1553  	struct context_vec *cvp = context_vec_start;
1554  	int lowa, upb, lowc, upd;
1555  	int a, b, c, d;
1556  	char ch, *f;
1557  
1558  	if (context_vec_start > context_vec_ptr)
1559  		return;
1560  
1561  	b = d = 0;		/* gcc */
1562  	lowa = MAX(1, cvp->a - diff_context);
1563  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1564  	lowc = MAX(1, cvp->c - diff_context);
1565  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1566  
1567  	printf("@@ -");
1568  	uni_range(lowa, upb);
1569  	printf(" +");
1570  	uni_range(lowc, upd);
1571  	printf(" @@");
1572  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1573  		f = match_function(ixold, cvp->a - 1, f1);
1574  		if (f != NULL)
1575  			printf(" %s", f);
1576  	}
1577  	printf("\n");
1578  
1579  	/*
1580  	 * Output changes in "unified" diff format--the old and new lines
1581  	 * are printed together.
1582  	 */
1583  	for (; cvp <= context_vec_ptr; cvp++) {
1584  		a = cvp->a;
1585  		b = cvp->b;
1586  		c = cvp->c;
1587  		d = cvp->d;
1588  
1589  		/*
1590  		 * c: both new and old changes
1591  		 * d: only changes in the old file
1592  		 * a: only changes in the new file
1593  		 */
1594  		if (a <= b && c <= d)
1595  			ch = 'c';
1596  		else
1597  			ch = (a <= b) ? 'd' : 'a';
1598  
1599  		switch (ch) {
1600  		case 'c':
1601  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1602  			fetch(ixold, a, b, f1, '-', 0, flags);
1603  			fetch(ixnew, c, d, f2, '+', 0, flags);
1604  			break;
1605  		case 'd':
1606  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1607  			fetch(ixold, a, b, f1, '-', 0, flags);
1608  			break;
1609  		case 'a':
1610  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1611  			fetch(ixnew, c, d, f2, '+', 0, flags);
1612  			break;
1613  		}
1614  		lowa = b + 1;
1615  		lowc = d + 1;
1616  	}
1617  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1618  
1619  	context_vec_ptr = context_vec_start - 1;
1620  }
1621  
1622  static void
1623  print_header(const char *file1, const char *file2)
1624  {
1625  	const char *time_format;
1626  	char buf[256];
1627  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1628  	int nsec1 = stb1.st_mtim.tv_nsec;
1629  	int nsec2 = stb2.st_mtim.tv_nsec;
1630  
1631  	time_format = "%Y-%m-%d %H:%M:%S";
1632  
1633  	if (cflag)
1634  		time_format = "%c";
1635  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1636  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1637  	if (label[0] != NULL)
1638  		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1639  		    label[0]);
1640  	else {
1641  		strftime(buf, sizeof(buf), time_format, tm_ptr1);
1642  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1643  		    file1, buf);
1644  		if (!cflag) {
1645  			strftime(buf, sizeof(buf), "%z", tm_ptr1);
1646  			printf(".%.9d %s", nsec1, buf);
1647  		}
1648  		printf("\n");
1649  	}
1650  	if (label[1] != NULL)
1651  		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1652  		    label[1]);
1653  	else {
1654  		strftime(buf, sizeof(buf), time_format, tm_ptr2);
1655  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1656  		    file2, buf);
1657  		if (!cflag) {
1658  			strftime(buf, sizeof(buf), "%z", tm_ptr2);
1659  			printf(".%.9d %s", nsec2, buf);
1660  		}
1661  		printf("\n");
1662  	}
1663  }
1664  
1665  /*
1666   * Prints n number of space characters either by using tab
1667   * or single space characters.
1668   * nc is the preceding number of characters
1669   */
1670  static void
1671  print_space(int nc, int n, int flags) {
1672  	int i, col;
1673  
1674  	col = n;
1675  	if ((flags & D_EXPANDTABS) == 0) {
1676  		/* first tabstop may be closer than tabsize */
1677  		i = tabsize - (nc % tabsize);
1678  		while (col >= tabsize) {
1679  			printf("\t");
1680  			col -= i;
1681  			i = tabsize;
1682  		}
1683  	}
1684  	printf("%*s", col, "");
1685  }
1686