xref: /freebsd/usr.bin/diff/diffreg.c (revision 53de23f4d140becc3166e87665b0064f215a220e)
1  /*	$OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $	*/
2  
3  /*-
4   * SPDX-License-Identifier: BSD-4-Clause
5   *
6   * Copyright (C) Caldera International Inc.  2001-2002.
7   * All rights reserved.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code and documentation must retain the above
13   *    copyright notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. All advertising materials mentioning features or use of this software
18   *    must display the following acknowledgement:
19   *	This product includes software developed or owned by Caldera
20   *	International, Inc.
21   * 4. Neither the name of Caldera International, Inc. nor the names of other
22   *    contributors may be used to endorse or promote products derived from
23   *    this software without specific prior written permission.
24   *
25   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   * POSSIBILITY OF SUCH DAMAGE.
37   */
38  /*-
39   * Copyright (c) 1991, 1993
40   *	The Regents of the University of California.  All rights reserved.
41   *
42   * Redistribution and use in source and binary forms, with or without
43   * modification, are permitted provided that the following conditions
44   * are met:
45   * 1. Redistributions of source code must retain the above copyright
46   *    notice, this list of conditions and the following disclaimer.
47   * 2. Redistributions in binary form must reproduce the above copyright
48   *    notice, this list of conditions and the following disclaimer in the
49   *    documentation and/or other materials provided with the distribution.
50   * 3. Neither the name of the University nor the names of its contributors
51   *    may be used to endorse or promote products derived from this software
52   *    without specific prior written permission.
53   *
54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   */
66  
67  #include <sys/capsicum.h>
68  #include <sys/stat.h>
69  
70  #include <capsicum_helpers.h>
71  #include <ctype.h>
72  #include <err.h>
73  #include <errno.h>
74  #include <fcntl.h>
75  #include <math.h>
76  #include <paths.h>
77  #include <regex.h>
78  #include <stdbool.h>
79  #include <stddef.h>
80  #include <stdint.h>
81  #include <stdio.h>
82  #include <stdlib.h>
83  #include <string.h>
84  
85  #include "pr.h"
86  #include "diff.h"
87  #include "xmalloc.h"
88  
89  /*
90   * diff - compare two files.
91   */
92  
93  /*
94   *	Uses an algorithm due to Harold Stone, which finds a pair of longest
95   *	identical subsequences in the two files.
96   *
97   *	The major goal is to generate the match vector J. J[i] is the index of
98   *	the line in file1 corresponding to line i file0. J[i] = 0 if there is no
99   *	such line in file1.
100   *
101   *	Lines are hashed so as to work in core. All potential matches are
102   *	located by sorting the lines of each file on the hash (called
103   *	``value''). In particular, this collects the equivalence classes in
104   *	file1 together. Subroutine equiv replaces the value of each line in
105   *	file0 by the index of the first element of its matching equivalence in
106   *	(the reordered) file1. To save space equiv squeezes file1 into a single
107   *	array member in which the equivalence classes are simply concatenated,
108   *	except that their first members are flagged by changing sign.
109   *
110   *	Next the indices that point into member are unsorted into array class
111   *	according to the original order of file0.
112   *
113   *	The cleverness lies in routine stone. This marches through the lines of
114   *	file0, developing a vector klist of "k-candidates". At step i
115   *	a k-candidate is a matched pair of lines x,y (x in file0 y in file1)
116   *	such that there is a common subsequence of length k between the first
117   *	i lines of file0 and the first y lines of file1, but there is no such
118   *	subsequence for any smaller y. x is the earliest possible mate to y that
119   *	occurs in such a subsequence.
120   *
121   *	Whenever any of the members of the equivalence class of lines in file1
122   *	matable to a line in file0 has serial number less than the y of some
123   *	k-candidate, that k-candidate with the smallest such y is replaced. The
124   *	new k-candidate is chained (via pred) to the current k-1 candidate so
125   *	that the actual subsequence can be recovered. When a member has serial
126   *	number greater that the y of all k-candidates, the klist is extended. At
127   *	the end, the longest subsequence is pulled out and placed in the array J
128   *	by unravel.
129   *
130   *	With J in hand, the matches there recorded are check'ed against reality
131   *	to assure that no spurious matches have crept in due to hashing. If they
132   *	have, they are broken, and "jackpot" is recorded -- a harmless matter
133   *	except that a true match for a spuriously mated line may now be
134   *	unnecessarily reported as a change.
135   *
136   *	Much of the complexity of the program comes simply from trying to
137   *	minimize core utilization and maximize the range of doable problems by
138   *	dynamically allocating what is needed and reusing what is not. The core
139   *	requirements for problems larger than somewhat are (in words)
140   *	2*length(file0) + length(file1) + 3*(number of k-candidates installed),
141   *	typically about 6n words for files of length n.
142   */
143  
144  struct cand {
145  	int	x;
146  	int	y;
147  	int	pred;
148  };
149  
150  static struct line {
151  	int	serial;
152  	int	value;
153  } *file[2];
154  
155  /*
156   * The following struct is used to record change information when
157   * doing a "context" or "unified" diff.  (see routine "change" to
158   * understand the highly mnemonic field names)
159   */
160  struct context_vec {
161  	int	a;		/* start line in old file */
162  	int	b;		/* end line in old file */
163  	int	c;		/* start line in new file */
164  	int	d;		/* end line in new file */
165  };
166  
167  enum readhash { RH_BINARY, RH_OK, RH_EOF };
168  
169  static FILE	*opentemp(const char *);
170  static void	 output(char *, FILE *, char *, FILE *, int);
171  static void	 check(FILE *, FILE *, int);
172  static void	 range(int, int, const char *);
173  static void	 uni_range(int, int);
174  static void	 dump_context_vec(FILE *, FILE *, int);
175  static void	 dump_unified_vec(FILE *, FILE *, int);
176  static bool	 prepare(int, FILE *, size_t, int);
177  static void	 prune(void);
178  static void	 equiv(struct line *, int, struct line *, int, int *);
179  static void	 unravel(int);
180  static void	 unsort(struct line *, int, int *);
181  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
182  static void	 sort(struct line *, int);
183  static void	 print_header(const char *, const char *);
184  static void	 print_space(int, int, int);
185  static bool	 ignoreline_pattern(char *);
186  static bool	 ignoreline(char *, bool);
187  static int	 asciifile(FILE *);
188  static int	 fetch(long *, int, int, FILE *, int, int, int);
189  static int	 newcand(int, int, int);
190  static int	 search(int *, int, int);
191  static int	 skipline(FILE *);
192  static int	 stone(int *, int, int *, int *, int);
193  static enum readhash readhash(FILE *, int, unsigned *);
194  static int	 files_differ(FILE *, FILE *, int);
195  static char	*match_function(const long *, int, FILE *);
196  static char	*preadline(int, size_t, off_t);
197  
198  static int	 *J;			/* will be overlaid on class */
199  static int	 *class;		/* will be overlaid on file[0] */
200  static int	 *klist;		/* will be overlaid on file[0] after class */
201  static int	 *member;		/* will be overlaid on file[1] */
202  static int	 clen;
203  static int	 inifdef;		/* whether or not we are in a #ifdef block */
204  static int	 len[2];
205  static int	 pref, suff;	/* length of prefix and suffix */
206  static int	 slen[2];
207  static int	 anychange;
208  static int	 hw, lpad, rpad;	/* half width and padding */
209  static int	 edoffset;
210  static long	*ixnew;		/* will be overlaid on file[1] */
211  static long	*ixold;		/* will be overlaid on klist */
212  static struct cand *clist;	/* merely a free storage pot for candidates */
213  static int	 clistlen;		/* the length of clist */
214  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
215  static int	(*chrtran)(int);	/* translation table for case-folding */
216  static struct context_vec *context_vec_start;
217  static struct context_vec *context_vec_end;
218  static struct context_vec *context_vec_ptr;
219  
220  #define FUNCTION_CONTEXT_SIZE	55
221  static char lastbuf[FUNCTION_CONTEXT_SIZE];
222  static int lastline;
223  static int lastmatchline;
224  
225  static int
226  clow2low(int c)
227  {
228  
229  	return (c);
230  }
231  
232  static int
233  cup2low(int c)
234  {
235  
236  	return (tolower(c));
237  }
238  
239  int
240  diffreg(char *file1, char *file2, int flags, int capsicum)
241  {
242  	FILE *f1, *f2;
243  	int i, rval;
244  	struct pr *pr = NULL;
245  	cap_rights_t rights_ro;
246  
247  	f1 = f2 = NULL;
248  	rval = D_SAME;
249  	anychange = 0;
250  	lastline = 0;
251  	lastmatchline = 0;
252  
253  	/*
254  	 * In side-by-side mode, we need to print the left column, a
255  	 * change marker surrounded by padding, and the right column.
256  	 *
257  	 * If expanding tabs, we don't care about alignment, so we simply
258  	 * subtract 3 from the width and divide by two.
259  	 *
260  	 * If not expanding tabs, we need to ensure that the right column
261  	 * is aligned to a tab stop.  We start with the same formula, then
262  	 * decrement until we reach a size that lets us tab-align the
263  	 * right column.  We then adjust the width down if necessary for
264  	 * the padding calculation to work.
265  	 *
266  	 * Left padding is half the space left over, rounded down; right
267  	 * padding is whatever is needed to match the width.
268  	 */
269  	if (diff_format == D_SIDEBYSIDE) {
270  		if (flags & D_EXPANDTABS) {
271  			if (width > 3) {
272  				hw = (width - 3) / 2;
273  			} else {
274  				/* not enough space */
275  				hw = 0;
276  			}
277  		} else if (width <= 3 || width <= tabsize) {
278  			/* not enough space */
279  			hw = 0;
280  		} else {
281  			hw = (width - 3) / 2;
282  			while (hw > 0 && roundup(hw + 3, tabsize) + hw > width)
283  				hw--;
284  			if (width - (roundup(hw + 3, tabsize) + hw) < tabsize)
285  				width = roundup(hw + 3, tabsize) + hw;
286  		}
287  		lpad = (width - hw * 2 - 1) / 2;
288  		rpad = (width - hw * 2 - 1) - lpad;
289  	}
290  
291  	if (flags & D_IGNORECASE)
292  		chrtran = cup2low;
293  	else
294  		chrtran = clow2low;
295  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
296  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
297  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
298  		goto closem;
299  
300  	if (flags & D_EMPTY1)
301  		f1 = fopen(_PATH_DEVNULL, "r");
302  	else {
303  		if (!S_ISREG(stb1.st_mode)) {
304  			if ((f1 = opentemp(file1)) == NULL ||
305  			    fstat(fileno(f1), &stb1) == -1) {
306  				warn("%s", file1);
307  				rval = D_ERROR;
308  				status |= 2;
309  				goto closem;
310  			}
311  		} else if (strcmp(file1, "-") == 0)
312  			f1 = stdin;
313  		else
314  			f1 = fopen(file1, "r");
315  	}
316  	if (f1 == NULL) {
317  		warn("%s", file1);
318  		rval = D_ERROR;
319  		status |= 2;
320  		goto closem;
321  	}
322  
323  	if (flags & D_EMPTY2)
324  		f2 = fopen(_PATH_DEVNULL, "r");
325  	else {
326  		if (!S_ISREG(stb2.st_mode)) {
327  			if ((f2 = opentemp(file2)) == NULL ||
328  			    fstat(fileno(f2), &stb2) == -1) {
329  				warn("%s", file2);
330  				rval = D_ERROR;
331  				status |= 2;
332  				goto closem;
333  			}
334  		} else if (strcmp(file2, "-") == 0)
335  			f2 = stdin;
336  		else
337  			f2 = fopen(file2, "r");
338  	}
339  	if (f2 == NULL) {
340  		warn("%s", file2);
341  		rval = D_ERROR;
342  		status |= 2;
343  		goto closem;
344  	}
345  
346  	if (lflag)
347  		pr = start_pr(file1, file2);
348  
349  	if (capsicum) {
350  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
351  		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
352  			err(2, "unable to limit rights on: %s", file1);
353  		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
354  			err(2, "unable to limit rights on: %s", file2);
355  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
356  			/* stdin has already been limited */
357  			if (caph_limit_stderr() == -1)
358  				err(2, "unable to limit stderr");
359  			if (caph_limit_stdout() == -1)
360  				err(2, "unable to limit stdout");
361  		} else if (caph_limit_stdio() == -1)
362  				err(2, "unable to limit stdio");
363  
364  		caph_cache_catpages();
365  		caph_cache_tzdata();
366  		if (caph_enter() < 0)
367  			err(2, "unable to enter capability mode");
368  	}
369  
370  	switch (files_differ(f1, f2, flags)) {
371  	case 0:
372  		goto closem;
373  	case 1:
374  		break;
375  	default:
376  		/* error */
377  		rval = D_ERROR;
378  		status |= 2;
379  		goto closem;
380  	}
381  
382  	if (diff_format == D_BRIEF && ignore_pats == NULL &&
383  	    (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) == 0)
384  	{
385  		rval = D_DIFFER;
386  		status |= 1;
387  		goto closem;
388  	}
389  	if ((flags & D_FORCEASCII) != 0) {
390  		(void)prepare(0, f1, stb1.st_size, flags);
391  		(void)prepare(1, f2, stb2.st_size, flags);
392  	} else if (!asciifile(f1) || !asciifile(f2) ||
393  		    !prepare(0, f1, stb1.st_size, flags) ||
394  		    !prepare(1, f2, stb2.st_size, flags)) {
395  		rval = D_BINARY;
396  		status |= 1;
397  		goto closem;
398  	}
399  
400  	prune();
401  	sort(sfile[0], slen[0]);
402  	sort(sfile[1], slen[1]);
403  
404  	member = (int *)file[1];
405  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
406  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
407  
408  	class = (int *)file[0];
409  	unsort(sfile[0], slen[0], class);
410  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
411  
412  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
413  	clen = 0;
414  	clistlen = 100;
415  	clist = xcalloc(clistlen, sizeof(*clist));
416  	i = stone(class, slen[0], member, klist, flags);
417  	free(member);
418  	free(class);
419  
420  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
421  	unravel(klist[i]);
422  	free(clist);
423  	free(klist);
424  
425  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
426  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
427  	check(f1, f2, flags);
428  	output(file1, f1, file2, f2, flags);
429  
430  closem:
431  	if (pr != NULL)
432  		stop_pr(pr);
433  	if (anychange) {
434  		status |= 1;
435  		if (rval == D_SAME)
436  			rval = D_DIFFER;
437  	}
438  	if (f1 != NULL)
439  		fclose(f1);
440  	if (f2 != NULL)
441  		fclose(f2);
442  
443  	return (rval);
444  }
445  
446  /*
447   * Check to see if the given files differ.
448   * Returns 0 if they are the same, 1 if different, and -1 on error.
449   * XXX - could use code from cmp(1) [faster]
450   */
451  static int
452  files_differ(FILE *f1, FILE *f2, int flags)
453  {
454  	char buf1[BUFSIZ], buf2[BUFSIZ];
455  	size_t i, j;
456  
457  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
458  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
459  		return (1);
460  
461  	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
462  		return (0);
463  
464  	for (;;) {
465  		i = fread(buf1, 1, sizeof(buf1), f1);
466  		j = fread(buf2, 1, sizeof(buf2), f2);
467  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
468  			return (-1);
469  		if (i != j)
470  			return (1);
471  		if (i == 0)
472  			return (0);
473  		if (memcmp(buf1, buf2, i) != 0)
474  			return (1);
475  	}
476  }
477  
478  static FILE *
479  opentemp(const char *f)
480  {
481  	char buf[BUFSIZ], tempfile[PATH_MAX];
482  	ssize_t nread;
483  	int ifd, ofd;
484  
485  	if (strcmp(f, "-") == 0)
486  		ifd = STDIN_FILENO;
487  	else if ((ifd = open(f, O_RDONLY, 0644)) == -1)
488  		return (NULL);
489  
490  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
491  
492  	if ((ofd = mkstemp(tempfile)) == -1) {
493  		close(ifd);
494  		return (NULL);
495  	}
496  	unlink(tempfile);
497  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
498  		if (write(ofd, buf, nread) != nread) {
499  			close(ifd);
500  			close(ofd);
501  			return (NULL);
502  		}
503  	}
504  	close(ifd);
505  	lseek(ofd, (off_t)0, SEEK_SET);
506  	return (fdopen(ofd, "r"));
507  }
508  
509  static bool
510  prepare(int i, FILE *fd, size_t filesize, int flags)
511  {
512  	struct line *p;
513  	unsigned h;
514  	size_t sz, j = 0;
515  	enum readhash r;
516  
517  	rewind(fd);
518  
519  	sz = MIN(filesize, SIZE_MAX) / 25;
520  	if (sz < 100)
521  		sz = 100;
522  
523  	p = xcalloc(sz + 3, sizeof(*p));
524  	while ((r = readhash(fd, flags, &h)) != RH_EOF)
525  		switch (r) {
526  		case RH_EOF: /* otherwise clang complains */
527  		case RH_BINARY:
528  			return (false);
529  		case RH_OK:
530  			if (j == sz) {
531  				sz = sz * 3 / 2;
532  				p = xreallocarray(p, sz + 3, sizeof(*p));
533  			}
534  			p[++j].value = h;
535  		}
536  
537  	len[i] = j;
538  	file[i] = p;
539  
540  	return (true);
541  }
542  
543  static void
544  prune(void)
545  {
546  	int i, j;
547  
548  	for (pref = 0; pref < len[0] && pref < len[1] &&
549  	    file[0][pref + 1].value == file[1][pref + 1].value;
550  	    pref++)
551  		;
552  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
553  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
554  	    suff++)
555  		;
556  	for (j = 0; j < 2; j++) {
557  		sfile[j] = file[j] + pref;
558  		slen[j] = len[j] - pref - suff;
559  		for (i = 0; i <= slen[j]; i++)
560  			sfile[j][i].serial = i;
561  	}
562  }
563  
564  static void
565  equiv(struct line *a, int n, struct line *b, int m, int *c)
566  {
567  	int i, j;
568  
569  	i = j = 1;
570  	while (i <= n && j <= m) {
571  		if (a[i].value < b[j].value)
572  			a[i++].value = 0;
573  		else if (a[i].value == b[j].value)
574  			a[i++].value = j;
575  		else
576  			j++;
577  	}
578  	while (i <= n)
579  		a[i++].value = 0;
580  	b[m + 1].value = 0;
581  	j = 0;
582  	while (++j <= m) {
583  		c[j] = -b[j].serial;
584  		while (b[j + 1].value == b[j].value) {
585  			j++;
586  			c[j] = b[j].serial;
587  		}
588  	}
589  	c[j] = -1;
590  }
591  
592  static int
593  stone(int *a, int n, int *b, int *c, int flags)
594  {
595  	int i, k, y, j, l;
596  	int oldc, tc, oldl, sq;
597  	unsigned numtries, bound;
598  
599  	if (flags & D_MINIMAL)
600  		bound = UINT_MAX;
601  	else {
602  		sq = sqrt(n);
603  		bound = MAX(256, sq);
604  	}
605  
606  	k = 0;
607  	c[0] = newcand(0, 0, 0);
608  	for (i = 1; i <= n; i++) {
609  		j = a[i];
610  		if (j == 0)
611  			continue;
612  		y = -b[j];
613  		oldl = 0;
614  		oldc = c[0];
615  		numtries = 0;
616  		do {
617  			if (y <= clist[oldc].y)
618  				continue;
619  			l = search(c, k, y);
620  			if (l != oldl + 1)
621  				oldc = c[l - 1];
622  			if (l <= k) {
623  				if (clist[c[l]].y <= y)
624  					continue;
625  				tc = c[l];
626  				c[l] = newcand(i, y, oldc);
627  				oldc = tc;
628  				oldl = l;
629  				numtries++;
630  			} else {
631  				c[l] = newcand(i, y, oldc);
632  				k++;
633  				break;
634  			}
635  		} while ((y = b[++j]) > 0 && numtries < bound);
636  	}
637  	return (k);
638  }
639  
640  static int
641  newcand(int x, int y, int pred)
642  {
643  	struct cand *q;
644  
645  	if (clen == clistlen) {
646  		clistlen = clistlen * 11 / 10;
647  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
648  	}
649  	q = clist + clen;
650  	q->x = x;
651  	q->y = y;
652  	q->pred = pred;
653  	return (clen++);
654  }
655  
656  static int
657  search(int *c, int k, int y)
658  {
659  	int i, j, l, t;
660  
661  	if (clist[c[k]].y < y)	/* quick look for typical case */
662  		return (k + 1);
663  	i = 0;
664  	j = k + 1;
665  	for (;;) {
666  		l = (i + j) / 2;
667  		if (l <= i)
668  			break;
669  		t = clist[c[l]].y;
670  		if (t > y)
671  			j = l;
672  		else if (t < y)
673  			i = l;
674  		else
675  			return (l);
676  	}
677  	return (l + 1);
678  }
679  
680  static void
681  unravel(int p)
682  {
683  	struct cand *q;
684  	int i;
685  
686  	for (i = 0; i <= len[0]; i++)
687  		J[i] = i <= pref ? i :
688  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
689  	for (q = clist + p; q->y != 0; q = clist + q->pred)
690  		J[q->x + pref] = q->y + pref;
691  }
692  
693  /*
694   * Check does double duty:
695   *  1. ferret out any fortuitous correspondences due to confounding by
696   *     hashing (which result in "jackpot")
697   *  2. collect random access indexes to the two files
698   */
699  static void
700  check(FILE *f1, FILE *f2, int flags)
701  {
702  	int i, j, /* jackpot, */ c, d;
703  	long ctold, ctnew;
704  
705  	rewind(f1);
706  	rewind(f2);
707  	j = 1;
708  	ixold[0] = ixnew[0] = 0;
709  	/* jackpot = 0; */
710  	ctold = ctnew = 0;
711  	for (i = 1; i <= len[0]; i++) {
712  		if (J[i] == 0) {
713  			ixold[i] = ctold += skipline(f1);
714  			continue;
715  		}
716  		while (j < J[i]) {
717  			ixnew[j] = ctnew += skipline(f2);
718  			j++;
719  		}
720  		if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) {
721  			for (;;) {
722  				c = getc(f1);
723  				d = getc(f2);
724  				/*
725  				 * GNU diff ignores a missing newline
726  				 * in one file for -b or -w.
727  				 */
728  				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
729  					if (c == EOF && d == '\n') {
730  						ctnew++;
731  						break;
732  					} else if (c == '\n' && d == EOF) {
733  						ctold++;
734  						break;
735  					}
736  				}
737  				ctold++;
738  				ctnew++;
739  				if (flags & D_STRIPCR && (c == '\r' || d == '\r')) {
740  					if (c == '\r') {
741  						if ((c = getc(f1)) == '\n') {
742  							ctold++;
743  						} else {
744  							ungetc(c, f1);
745  						}
746  					}
747  					if (d == '\r') {
748  						if ((d = getc(f2)) == '\n') {
749  							ctnew++;
750  						} else {
751  							ungetc(d, f2);
752  						}
753  					}
754  					break;
755  				}
756  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
757  				    isspace(d)) {
758  					do {
759  						if (c == '\n')
760  							break;
761  						ctold++;
762  					} while (isspace(c = getc(f1)));
763  					do {
764  						if (d == '\n')
765  							break;
766  						ctnew++;
767  					} while (isspace(d = getc(f2)));
768  				} else if (flags & D_IGNOREBLANKS) {
769  					while (isspace(c) && c != '\n') {
770  						c = getc(f1);
771  						ctold++;
772  					}
773  					while (isspace(d) && d != '\n') {
774  						d = getc(f2);
775  						ctnew++;
776  					}
777  				}
778  				if (chrtran(c) != chrtran(d)) {
779  					/* jackpot++; */
780  					J[i] = 0;
781  					if (c != '\n' && c != EOF)
782  						ctold += skipline(f1);
783  					if (d != '\n' && c != EOF)
784  						ctnew += skipline(f2);
785  					break;
786  				}
787  				if (c == '\n' || c == EOF)
788  					break;
789  			}
790  		} else {
791  			for (;;) {
792  				ctold++;
793  				ctnew++;
794  				if ((c = getc(f1)) != (d = getc(f2))) {
795  					/* jackpot++; */
796  					J[i] = 0;
797  					if (c != '\n' && c != EOF)
798  						ctold += skipline(f1);
799  					if (d != '\n' && c != EOF)
800  						ctnew += skipline(f2);
801  					break;
802  				}
803  				if (c == '\n' || c == EOF)
804  					break;
805  			}
806  		}
807  		ixold[i] = ctold;
808  		ixnew[j] = ctnew;
809  		j++;
810  	}
811  	for (; j <= len[1]; j++) {
812  		ixnew[j] = ctnew += skipline(f2);
813  	}
814  	/*
815  	 * if (jackpot)
816  	 *	fprintf(stderr, "jackpot\n");
817  	 */
818  }
819  
820  /* shellsort CACM #201 */
821  static void
822  sort(struct line *a, int n)
823  {
824  	struct line *ai, *aim, w;
825  	int j, m = 0, k;
826  
827  	if (n == 0)
828  		return;
829  	for (j = 1; j <= n; j *= 2)
830  		m = 2 * j - 1;
831  	for (m /= 2; m != 0; m /= 2) {
832  		k = n - m;
833  		for (j = 1; j <= k; j++) {
834  			for (ai = &a[j]; ai > a; ai -= m) {
835  				aim = &ai[m];
836  				if (aim < ai)
837  					break;	/* wraparound */
838  				if (aim->value > ai[0].value ||
839  				    (aim->value == ai[0].value &&
840  					aim->serial > ai[0].serial))
841  					break;
842  				w.value = ai[0].value;
843  				ai[0].value = aim->value;
844  				aim->value = w.value;
845  				w.serial = ai[0].serial;
846  				ai[0].serial = aim->serial;
847  				aim->serial = w.serial;
848  			}
849  		}
850  	}
851  }
852  
853  static void
854  unsort(struct line *f, int l, int *b)
855  {
856  	int *a, i;
857  
858  	a = xcalloc(l + 1, sizeof(*a));
859  	for (i = 1; i <= l; i++)
860  		a[f[i].serial] = f[i].value;
861  	for (i = 1; i <= l; i++)
862  		b[i] = a[i];
863  	free(a);
864  }
865  
866  static int
867  skipline(FILE *f)
868  {
869  	int i, c;
870  
871  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
872  		continue;
873  	return (i);
874  }
875  
876  static void
877  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
878  {
879  	int i, j, m, i0, i1, j0, j1, nc;
880  
881  	rewind(f1);
882  	rewind(f2);
883  	m = len[0];
884  	J[0] = 0;
885  	J[m + 1] = len[1] + 1;
886  	if (diff_format != D_EDIT) {
887  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
888  			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
889  				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
890  					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
891  					print_space(nc, hw - nc + lpad + 1 + rpad, flags);
892  					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
893  					printf("\n");
894  				}
895  				i0++;
896  			}
897  			j0 = J[i0 - 1] + 1;
898  			i1 = i0 - 1;
899  			while (i1 < m && J[i1 + 1] == 0)
900  				i1++;
901  			j1 = J[i1 + 1] - 1;
902  			J[i1] = j1;
903  
904  			/*
905  			 * When using side-by-side, lines from both of the files are
906  			 * printed. The algorithm used by diff(1) identifies the ranges
907  			 * in which two files differ.
908  			 * See the change() function below.
909  			 * The for loop below consumes the shorter range, whereas one of
910  			 * the while loops deals with the longer one.
911  			 */
912  			if (diff_format == D_SIDEBYSIDE) {
913  				for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++)
914  					change(file1, f1, file2, f2, i, i, j, j, &flags);
915  
916  				while (i <= i1) {
917  					change(file1, f1, file2, f2, i, i, j + 1, j, &flags);
918  					i++;
919  				}
920  
921  				while (j <= j1) {
922  					change(file1, f1, file2, f2, i + 1, i, j, j, &flags);
923  					j++;
924  				}
925  			} else
926  				change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
927  		}
928  	} else {
929  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
930  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
931  				i0--;
932  			j0 = J[i0 + 1] - 1;
933  			i1 = i0 + 1;
934  			while (i1 > 1 && J[i1 - 1] == 0)
935  				i1--;
936  			j1 = J[i1 - 1] + 1;
937  			J[i1] = j1;
938  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
939  		}
940  	}
941  	if (m == 0)
942  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
943  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
944  		for (;;) {
945  #define	c i0
946  			if ((c = getc(f1)) == EOF)
947  				return;
948  			printf("%c", c);
949  		}
950  #undef c
951  	}
952  	if (anychange != 0) {
953  		if (diff_format == D_CONTEXT)
954  			dump_context_vec(f1, f2, flags);
955  		else if (diff_format == D_UNIFIED)
956  			dump_unified_vec(f1, f2, flags);
957  	}
958  }
959  
960  static void
961  range(int a, int b, const char *separator)
962  {
963  	printf("%d", a > b ? b : a);
964  	if (a < b)
965  		printf("%s%d", separator, b);
966  }
967  
968  static void
969  uni_range(int a, int b)
970  {
971  	if (a < b)
972  		printf("%d,%d", a, b - a + 1);
973  	else if (a == b)
974  		printf("%d", b);
975  	else
976  		printf("%d,0", b);
977  }
978  
979  static char *
980  preadline(int fd, size_t rlen, off_t off)
981  {
982  	char *line;
983  	ssize_t nr;
984  
985  	line = xmalloc(rlen + 1);
986  	if ((nr = pread(fd, line, rlen, off)) == -1)
987  		err(2, "preadline");
988  	if (nr > 0 && line[nr-1] == '\n')
989  		nr--;
990  	line[nr] = '\0';
991  	return (line);
992  }
993  
994  static bool
995  ignoreline_pattern(char *line)
996  {
997  	int ret;
998  
999  	ret = regexec(&ignore_re, line, 0, NULL, 0);
1000  	return (ret == 0);	/* if it matched, it should be ignored. */
1001  }
1002  
1003  static bool
1004  ignoreline(char *line, bool skip_blanks)
1005  {
1006  
1007  	if (skip_blanks && *line == '\0')
1008  		return (true);
1009  	if (ignore_pats != NULL && ignoreline_pattern(line))
1010  		return (true);
1011  	return (false);
1012  }
1013  
1014  /*
1015   * Indicate that there is a difference between lines a and b of the from file
1016   * to get to lines c to d of the to file.  If a is greater then b then there
1017   * are no lines in the from file involved and this means that there were
1018   * lines appended (beginning at b).  If c is greater than d then there are
1019   * lines missing from the to file.
1020   */
1021  static void
1022  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1023      int *pflags)
1024  {
1025  	static size_t max_context = 64;
1026  	long curpos;
1027  	int i, nc;
1028  	const char *walk;
1029  	bool skip_blanks, ignore;
1030  
1031  	skip_blanks = (*pflags & D_SKIPBLANKLINES);
1032  restart:
1033  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1034  	    a > b && c > d)
1035  		return;
1036  	if (ignore_pats != NULL || skip_blanks) {
1037  		char *line;
1038  		/*
1039  		 * All lines in the change, insert, or delete must match an ignore
1040  		 * pattern for the change to be ignored.
1041  		 */
1042  		if (a <= b) {		/* Changes and deletes. */
1043  			for (i = a; i <= b; i++) {
1044  				line = preadline(fileno(f1),
1045  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1046  				ignore = ignoreline(line, skip_blanks);
1047  				free(line);
1048  				if (!ignore)
1049  					goto proceed;
1050  			}
1051  		}
1052  		if (a > b || c <= d) {	/* Changes and inserts. */
1053  			for (i = c; i <= d; i++) {
1054  				line = preadline(fileno(f2),
1055  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1056  				ignore = ignoreline(line, skip_blanks);
1057  				free(line);
1058  				if (!ignore)
1059  					goto proceed;
1060  			}
1061  		}
1062  		return;
1063  	}
1064  proceed:
1065  	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1066  		printf("%s %s %s\n", diffargs, file1, file2);
1067  		*pflags &= ~D_HEADER;
1068  	}
1069  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1070  		/*
1071  		 * Allocate change records as needed.
1072  		 */
1073  		if (context_vec_start == NULL ||
1074  		    context_vec_ptr == context_vec_end - 1) {
1075  			ptrdiff_t offset = -1;
1076  
1077  			if (context_vec_start != NULL)
1078  				offset = context_vec_ptr - context_vec_start;
1079  			max_context <<= 1;
1080  			context_vec_start = xreallocarray(context_vec_start,
1081  			    max_context, sizeof(*context_vec_start));
1082  			context_vec_end = context_vec_start + max_context;
1083  			context_vec_ptr = context_vec_start + offset;
1084  		}
1085  		if (anychange == 0) {
1086  			/*
1087  			 * Print the context/unidiff header first time through.
1088  			 */
1089  			print_header(file1, file2);
1090  			anychange = 1;
1091  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1092  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1093  			/*
1094  			 * If this change is more than 'diff_context' lines from the
1095  			 * previous change, dump the record and reset it.
1096  			 */
1097  			if (diff_format == D_CONTEXT)
1098  				dump_context_vec(f1, f2, *pflags);
1099  			else
1100  				dump_unified_vec(f1, f2, *pflags);
1101  		}
1102  		context_vec_ptr++;
1103  		context_vec_ptr->a = a;
1104  		context_vec_ptr->b = b;
1105  		context_vec_ptr->c = c;
1106  		context_vec_ptr->d = d;
1107  		return;
1108  	}
1109  	if (anychange == 0)
1110  		anychange = 1;
1111  	switch (diff_format) {
1112  	case D_BRIEF:
1113  		return;
1114  	case D_NORMAL:
1115  	case D_EDIT:
1116  		range(a, b, ",");
1117  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1118  		if (diff_format == D_NORMAL)
1119  			range(c, d, ",");
1120  		printf("\n");
1121  		break;
1122  	case D_REVERSE:
1123  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1124  		range(a, b, " ");
1125  		printf("\n");
1126  		break;
1127  	case D_NREVERSE:
1128  		if (a > b)
1129  			printf("a%d %d\n", b, d - c + 1);
1130  		else {
1131  			printf("d%d %d\n", a, b - a + 1);
1132  			if (!(c > d))
1133  				/* add changed lines */
1134  				printf("a%d %d\n", b, d - c + 1);
1135  		}
1136  		break;
1137  	}
1138  	if (diff_format == D_GFORMAT) {
1139  		curpos = ftell(f1);
1140  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1141  		nc = ixold[a > b ? b : a - 1] - curpos;
1142  		for (i = 0; i < nc; i++)
1143  			printf("%c", getc(f1));
1144  		for (walk = group_format; *walk != '\0'; walk++) {
1145  			if (*walk == '%') {
1146  				walk++;
1147  				switch (*walk) {
1148  				case '<':
1149  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1150  					break;
1151  				case '>':
1152  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1153  					break;
1154  				default:
1155  					printf("%%%c", *walk);
1156  					break;
1157  				}
1158  				continue;
1159  			}
1160  			printf("%c", *walk);
1161  		}
1162  	}
1163  	if (diff_format == D_SIDEBYSIDE) {
1164  		if (color && a > b)
1165  			printf("\033[%sm", add_code);
1166  		else if (color && c > d)
1167  			printf("\033[%sm", del_code);
1168  		if (a > b) {
1169  			print_space(0, hw + lpad, *pflags);
1170  		} else {
1171  			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
1172  			print_space(nc, hw - nc + lpad, *pflags);
1173  		}
1174  		if (color && a > b)
1175  			printf("\033[%sm", add_code);
1176  		else if (color && c > d)
1177  			printf("\033[%sm", del_code);
1178  		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
1179  		if (color && c > d)
1180  			printf("\033[m");
1181  		print_space(hw + lpad + 1, rpad, *pflags);
1182  		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
1183  		printf("\n");
1184  	}
1185  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1186  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1187  		if (a <= b && c <= d && diff_format == D_NORMAL)
1188  			printf("---\n");
1189  	}
1190  	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
1191  		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1192  	if (edoffset != 0 && diff_format == D_EDIT) {
1193  		/*
1194  		 * A non-zero edoffset value for D_EDIT indicates that the last line
1195  		 * printed was a bare dot (".") that has been escaped as ".." to
1196  		 * prevent ed(1) from misinterpreting it.  We have to add a
1197  		 * substitute command to change this back and restart where we left
1198  		 * off.
1199  		 */
1200  		printf(".\n");
1201  		printf("%ds/.//\n", a + edoffset - 1);
1202  		b = a + edoffset - 1;
1203  		a = b + 1;
1204  		c += edoffset;
1205  		goto restart;
1206  	}
1207  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1208  		printf(".\n");
1209  	if (inifdef) {
1210  		printf("#endif /* %s */\n", ifdefname);
1211  		inifdef = 0;
1212  	}
1213  }
1214  
1215  static int
1216  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1217  {
1218  	int i, j, c, lastc, col, nc, newcol;
1219  
1220  	edoffset = 0;
1221  	nc = 0;
1222  	/*
1223  	 * When doing #ifdef's, copy down to current line
1224  	 * if this is the first file, so that stuff makes it to output.
1225  	 */
1226  	if ((diff_format == D_IFDEF) && oldfile) {
1227  		long curpos = ftell(lb);
1228  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1229  		nc = f[a > b ? b : a - 1] - curpos;
1230  		for (i = 0; i < nc; i++)
1231  			printf("%c", getc(lb));
1232  	}
1233  	if (a > b)
1234  		return (0);
1235  	if (diff_format == D_IFDEF) {
1236  		if (inifdef) {
1237  			printf("#else /* %s%s */\n",
1238  			    oldfile == 1 ? "!" : "", ifdefname);
1239  		} else {
1240  			if (oldfile)
1241  				printf("#ifndef %s\n", ifdefname);
1242  			else
1243  				printf("#ifdef %s\n", ifdefname);
1244  		}
1245  		inifdef = 1 + oldfile;
1246  	}
1247  	for (i = a; i <= b; i++) {
1248  		fseek(lb, f[i - 1], SEEK_SET);
1249  		nc = f[i] - f[i - 1];
1250  		if (diff_format == D_SIDEBYSIDE && hw < nc)
1251  			nc = hw;
1252  		if (diff_format != D_IFDEF && diff_format != D_GFORMAT &&
1253  		    ch != '\0') {
1254  			if (color && (ch == '>' || ch == '+'))
1255  				printf("\033[%sm", add_code);
1256  			else if (color && (ch == '<' || ch == '-'))
1257  				printf("\033[%sm", del_code);
1258  			printf("%c", ch);
1259  			if (Tflag && (diff_format == D_NORMAL ||
1260  			    diff_format == D_CONTEXT ||
1261  			    diff_format == D_UNIFIED))
1262  				printf("\t");
1263  			else if (diff_format != D_UNIFIED)
1264  				printf(" ");
1265  		}
1266  		col = j = 0;
1267  		lastc = '\0';
1268  		while (j < nc && (hw == 0 || col < hw)) {
1269  			c = getc(lb);
1270  			if (flags & D_STRIPCR && c == '\r') {
1271  				if ((c = getc(lb)) == '\n')
1272  					j++;
1273  				else {
1274  					ungetc(c, lb);
1275  					c = '\r';
1276  				}
1277  			}
1278  			if (c == EOF) {
1279  				if (diff_format == D_EDIT ||
1280  				    diff_format == D_REVERSE ||
1281  				    diff_format == D_NREVERSE)
1282  					warnx("No newline at end of file");
1283  				else
1284  					printf("\n\\ No newline at end of file\n");
1285  				return (col);
1286  			}
1287  			if (c == '\t') {
1288  				/*
1289  				 * Calculate where the tab would bring us.
1290  				 * If it would take us to the end of the
1291  				 * column, either clip it (if expanding
1292  				 * tabs) or return right away (if not).
1293  				 */
1294  				newcol = roundup(col + 1, tabsize);
1295  				if ((flags & D_EXPANDTABS) == 0) {
1296  					if (hw > 0 && newcol >= hw)
1297  						return (col);
1298  					printf("\t");
1299  				} else {
1300  					if (hw > 0 && newcol > hw)
1301  						newcol = hw;
1302  					printf("%*s", newcol - col, "");
1303  				}
1304  				col = newcol;
1305  			} else {
1306  				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
1307  				    lastc == '.') {
1308  					/*
1309  					 * Don't print a bare "." line since that will confuse
1310  					 * ed(1). Print ".." instead and set the, global variable
1311  					 * edoffset to an offset from which to restart. The
1312  					 * caller must check the value of edoffset
1313  					 */
1314  					printf(".\n");
1315  					edoffset = i - a + 1;
1316  					return (edoffset);
1317  				}
1318  				/* when side-by-side, do not print a newline */
1319  				if (diff_format != D_SIDEBYSIDE || c != '\n') {
1320  					if (color && c == '\n')
1321  						printf("\033[m%c", c);
1322  					else
1323  						printf("%c", c);
1324  					col++;
1325  				}
1326  			}
1327  
1328  			j++;
1329  			lastc = c;
1330  		}
1331  	}
1332  	if (color && diff_format == D_SIDEBYSIDE)
1333  		printf("\033[m");
1334  	return (col);
1335  }
1336  
1337  /*
1338   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1339   */
1340  static enum readhash
1341  readhash(FILE *f, int flags, unsigned *hash)
1342  {
1343  	int i, t, space;
1344  	unsigned sum;
1345  
1346  	sum = 1;
1347  	space = 0;
1348  	for (i = 0;;) {
1349  		switch (t = getc(f)) {
1350  		case '\0':
1351  			if ((flags & D_FORCEASCII) == 0)
1352  				return (RH_BINARY);
1353  			goto hashchar;
1354  		case '\r':
1355  			if (flags & D_STRIPCR) {
1356  				t = getc(f);
1357  				if (t == '\n')
1358  					break;
1359  				ungetc(t, f);
1360  			}
1361  			/* FALLTHROUGH */
1362  		case '\t':
1363  		case '\v':
1364  		case '\f':
1365  		case ' ':
1366  			if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) {
1367  				space++;
1368  				continue;
1369  			}
1370  			/* FALLTHROUGH */
1371  		default:
1372  		hashchar:
1373  			if (space && (flags & D_IGNOREBLANKS) == 0) {
1374  				i++;
1375  				space = 0;
1376  			}
1377  			sum = sum * 127 + chrtran(t);
1378  			i++;
1379  			continue;
1380  		case EOF:
1381  			if (i == 0)
1382  				return (RH_EOF);
1383  			/* FALLTHROUGH */
1384  		case '\n':
1385  			break;
1386  		}
1387  		break;
1388  	}
1389  	*hash = sum;
1390  	return (RH_OK);
1391  }
1392  
1393  static int
1394  asciifile(FILE *f)
1395  {
1396  	unsigned char buf[BUFSIZ];
1397  	size_t cnt;
1398  
1399  	if (f == NULL)
1400  		return (1);
1401  
1402  	rewind(f);
1403  	cnt = fread(buf, 1, sizeof(buf), f);
1404  	return (memchr(buf, '\0', cnt) == NULL);
1405  }
1406  
1407  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0)
1408  
1409  static char *
1410  match_function(const long *f, int pos, FILE *fp)
1411  {
1412  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1413  	size_t nc;
1414  	int last = lastline;
1415  	const char *state = NULL;
1416  
1417  	lastline = pos;
1418  	for (; pos > last; pos--) {
1419  		fseek(fp, f[pos - 1], SEEK_SET);
1420  		nc = f[pos] - f[pos - 1];
1421  		if (nc >= sizeof(buf))
1422  			nc = sizeof(buf) - 1;
1423  		nc = fread(buf, 1, nc, fp);
1424  		if (nc == 0)
1425  			continue;
1426  		buf[nc] = '\0';
1427  		buf[strcspn(buf, "\n")] = '\0';
1428  		if (most_recent_pat != NULL) {
1429  			int ret = regexec(&most_recent_re, buf, 0, NULL, 0);
1430  
1431  			if (ret != 0)
1432  				continue;
1433  			strlcpy(lastbuf, buf, sizeof(lastbuf));
1434  			lastmatchline = pos;
1435  			return (lastbuf);
1436  		} else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'
1437  			|| buf[0] == '-' || buf[0] == '+') {
1438  			if (begins_with(buf, "private:")) {
1439  				if (!state)
1440  					state = " (private)";
1441  			} else if (begins_with(buf, "protected:")) {
1442  				if (!state)
1443  					state = " (protected)";
1444  			} else if (begins_with(buf, "public:")) {
1445  				if (!state)
1446  					state = " (public)";
1447  			} else {
1448  				strlcpy(lastbuf, buf, sizeof(lastbuf));
1449  				if (state)
1450  					strlcat(lastbuf, state, sizeof(lastbuf));
1451  				lastmatchline = pos;
1452  				return (lastbuf);
1453  			}
1454  		}
1455  	}
1456  	return (lastmatchline > 0 ? lastbuf : NULL);
1457  }
1458  
1459  /* dump accumulated "context" diff changes */
1460  static void
1461  dump_context_vec(FILE *f1, FILE *f2, int flags)
1462  {
1463  	struct context_vec *cvp = context_vec_start;
1464  	int lowa, upb, lowc, upd, do_output;
1465  	int a, b, c, d;
1466  	char ch, *f;
1467  
1468  	if (context_vec_start > context_vec_ptr)
1469  		return;
1470  
1471  	b = d = 0;		/* gcc */
1472  	lowa = MAX(1, cvp->a - diff_context);
1473  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1474  	lowc = MAX(1, cvp->c - diff_context);
1475  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1476  
1477  	printf("***************");
1478  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1479  		f = match_function(ixold, cvp->a - 1, f1);
1480  		if (f != NULL)
1481  			printf(" %s", f);
1482  	}
1483  	printf("\n*** ");
1484  	range(lowa, upb, ",");
1485  	printf(" ****\n");
1486  
1487  	/*
1488  	 * Output changes to the "old" file.  The first loop suppresses
1489  	 * output if there were no changes to the "old" file (we'll see
1490  	 * the "old" lines as context in the "new" list).
1491  	 */
1492  	do_output = 0;
1493  	for (; cvp <= context_vec_ptr; cvp++)
1494  		if (cvp->a <= cvp->b) {
1495  			cvp = context_vec_start;
1496  			do_output++;
1497  			break;
1498  		}
1499  	if (do_output) {
1500  		while (cvp <= context_vec_ptr) {
1501  			a = cvp->a;
1502  			b = cvp->b;
1503  			c = cvp->c;
1504  			d = cvp->d;
1505  
1506  			if (a <= b && c <= d)
1507  				ch = 'c';
1508  			else
1509  				ch = (a <= b) ? 'd' : 'a';
1510  
1511  			if (ch == 'a')
1512  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1513  			else {
1514  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515  				fetch(ixold, a, b, f1,
1516  				    ch == 'c' ? '!' : '-', 0, flags);
1517  			}
1518  			lowa = b + 1;
1519  			cvp++;
1520  		}
1521  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1522  	}
1523  	/* output changes to the "new" file */
1524  	printf("--- ");
1525  	range(lowc, upd, ",");
1526  	printf(" ----\n");
1527  
1528  	do_output = 0;
1529  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1530  		if (cvp->c <= cvp->d) {
1531  			cvp = context_vec_start;
1532  			do_output++;
1533  			break;
1534  		}
1535  	if (do_output) {
1536  		while (cvp <= context_vec_ptr) {
1537  			a = cvp->a;
1538  			b = cvp->b;
1539  			c = cvp->c;
1540  			d = cvp->d;
1541  
1542  			if (a <= b && c <= d)
1543  				ch = 'c';
1544  			else
1545  				ch = (a <= b) ? 'd' : 'a';
1546  
1547  			if (ch == 'd')
1548  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1549  			else {
1550  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1551  				fetch(ixnew, c, d, f2,
1552  				    ch == 'c' ? '!' : '+', 0, flags);
1553  			}
1554  			lowc = d + 1;
1555  			cvp++;
1556  		}
1557  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1558  	}
1559  	context_vec_ptr = context_vec_start - 1;
1560  }
1561  
1562  /* dump accumulated "unified" diff changes */
1563  static void
1564  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1565  {
1566  	struct context_vec *cvp = context_vec_start;
1567  	int lowa, upb, lowc, upd;
1568  	int a, b, c, d;
1569  	char ch, *f;
1570  
1571  	if (context_vec_start > context_vec_ptr)
1572  		return;
1573  
1574  	b = d = 0;		/* gcc */
1575  	lowa = MAX(1, cvp->a - diff_context);
1576  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1577  	lowc = MAX(1, cvp->c - diff_context);
1578  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1579  
1580  	printf("@@ -");
1581  	uni_range(lowa, upb);
1582  	printf(" +");
1583  	uni_range(lowc, upd);
1584  	printf(" @@");
1585  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1586  		f = match_function(ixold, cvp->a - 1, f1);
1587  		if (f != NULL)
1588  			printf(" %s", f);
1589  	}
1590  	printf("\n");
1591  
1592  	/*
1593  	 * Output changes in "unified" diff format--the old and new lines
1594  	 * are printed together.
1595  	 */
1596  	for (; cvp <= context_vec_ptr; cvp++) {
1597  		a = cvp->a;
1598  		b = cvp->b;
1599  		c = cvp->c;
1600  		d = cvp->d;
1601  
1602  		/*
1603  		 * c: both new and old changes
1604  		 * d: only changes in the old file
1605  		 * a: only changes in the new file
1606  		 */
1607  		if (a <= b && c <= d)
1608  			ch = 'c';
1609  		else
1610  			ch = (a <= b) ? 'd' : 'a';
1611  
1612  		switch (ch) {
1613  		case 'c':
1614  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1615  			fetch(ixold, a, b, f1, '-', 0, flags);
1616  			fetch(ixnew, c, d, f2, '+', 0, flags);
1617  			break;
1618  		case 'd':
1619  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1620  			fetch(ixold, a, b, f1, '-', 0, flags);
1621  			break;
1622  		case 'a':
1623  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1624  			fetch(ixnew, c, d, f2, '+', 0, flags);
1625  			break;
1626  		}
1627  		lowa = b + 1;
1628  		lowc = d + 1;
1629  	}
1630  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1631  
1632  	context_vec_ptr = context_vec_start - 1;
1633  }
1634  
1635  static void
1636  print_header(const char *file1, const char *file2)
1637  {
1638  	const char *time_format;
1639  	char buf[256];
1640  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1641  	int nsec1 = stb1.st_mtim.tv_nsec;
1642  	int nsec2 = stb2.st_mtim.tv_nsec;
1643  
1644  	time_format = "%Y-%m-%d %H:%M:%S";
1645  
1646  	if (cflag)
1647  		time_format = "%c";
1648  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1649  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1650  	if (label[0] != NULL)
1651  		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1652  		    label[0]);
1653  	else {
1654  		strftime(buf, sizeof(buf), time_format, tm_ptr1);
1655  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1656  		    file1, buf);
1657  		if (!cflag) {
1658  			strftime(buf, sizeof(buf), "%z", tm_ptr1);
1659  			printf(".%.9d %s", nsec1, buf);
1660  		}
1661  		printf("\n");
1662  	}
1663  	if (label[1] != NULL)
1664  		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1665  		    label[1]);
1666  	else {
1667  		strftime(buf, sizeof(buf), time_format, tm_ptr2);
1668  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1669  		    file2, buf);
1670  		if (!cflag) {
1671  			strftime(buf, sizeof(buf), "%z", tm_ptr2);
1672  			printf(".%.9d %s", nsec2, buf);
1673  		}
1674  		printf("\n");
1675  	}
1676  }
1677  
1678  /*
1679   * Prints n number of space characters either by using tab
1680   * or single space characters.
1681   * nc is the preceding number of characters
1682   */
1683  static void
1684  print_space(int nc, int n, int flags)
1685  {
1686  	int col, newcol, tabstop;
1687  
1688  	col = nc;
1689  	newcol = nc + n;
1690  	/* first, use tabs if allowed */
1691  	if ((flags & D_EXPANDTABS) == 0) {
1692  		while ((tabstop = roundup(col + 1, tabsize)) <= newcol) {
1693  			printf("\t");
1694  			col = tabstop;
1695  		}
1696  	}
1697  	/* finish with spaces */
1698  	printf("%*s", newcol - col, "");
1699  }
1700