xref: /freebsd/usr.bin/diff/diffreg.c (revision 2f35419fb26d1e45850696a8e732a12874bc1078)
1  /*	$OpenBSD: diffreg.c,v 1.93 2019/06/28 13:35:00 deraadt Exp $	*/
2  
3  /*-
4   * SPDX-License-Identifier: BSD-4-Clause
5   *
6   * Copyright (C) Caldera International Inc.  2001-2002.
7   * All rights reserved.
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code and documentation must retain the above
13   *    copyright notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   * 3. All advertising materials mentioning features or use of this software
18   *    must display the following acknowledgement:
19   *	This product includes software developed or owned by Caldera
20   *	International, Inc.
21   * 4. Neither the name of Caldera International, Inc. nor the names of other
22   *    contributors may be used to endorse or promote products derived from
23   *    this software without specific prior written permission.
24   *
25   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   * POSSIBILITY OF SUCH DAMAGE.
37   */
38  /*-
39   * Copyright (c) 1991, 1993
40   *	The Regents of the University of California.  All rights reserved.
41   *
42   * Redistribution and use in source and binary forms, with or without
43   * modification, are permitted provided that the following conditions
44   * are met:
45   * 1. Redistributions of source code must retain the above copyright
46   *    notice, this list of conditions and the following disclaimer.
47   * 2. Redistributions in binary form must reproduce the above copyright
48   *    notice, this list of conditions and the following disclaimer in the
49   *    documentation and/or other materials provided with the distribution.
50   * 3. Neither the name of the University nor the names of its contributors
51   *    may be used to endorse or promote products derived from this software
52   *    without specific prior written permission.
53   *
54   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   */
66  
67  #include <sys/capsicum.h>
68  #include <sys/stat.h>
69  
70  #include <capsicum_helpers.h>
71  #include <ctype.h>
72  #include <err.h>
73  #include <errno.h>
74  #include <fcntl.h>
75  #include <limits.h>
76  #include <math.h>
77  #include <paths.h>
78  #include <regex.h>
79  #include <stdbool.h>
80  #include <stddef.h>
81  #include <stdint.h>
82  #include <stdio.h>
83  #include <stdlib.h>
84  #include <string.h>
85  
86  #include "pr.h"
87  #include "diff.h"
88  #include "xmalloc.h"
89  
90  /*
91   * diff - compare two files.
92   */
93  
94  /*
95   *	Uses an algorithm due to Harold Stone, which finds a pair of longest
96   *	identical subsequences in the two files.
97   *
98   *	The major goal is to generate the match vector J. J[i] is the index of
99   *	the line in file1 corresponding to line i file0. J[i] = 0 if there is no
100   *	such line in file1.
101   *
102   *	Lines are hashed so as to work in core. All potential matches are
103   *	located by sorting the lines of each file on the hash (called
104   *	``value''). In particular, this collects the equivalence classes in
105   *	file1 together. Subroutine equiv replaces the value of each line in
106   *	file0 by the index of the first element of its matching equivalence in
107   *	(the reordered) file1. To save space equiv squeezes file1 into a single
108   *	array member in which the equivalence classes are simply concatenated,
109   *	except that their first members are flagged by changing sign.
110   *
111   *	Next the indices that point into member are unsorted into array class
112   *	according to the original order of file0.
113   *
114   *	The cleverness lies in routine stone. This marches through the lines of
115   *	file0, developing a vector klist of "k-candidates". At step i
116   *	a k-candidate is a matched pair of lines x,y (x in file0 y in file1)
117   *	such that there is a common subsequence of length k between the first
118   *	i lines of file0 and the first y lines of file1, but there is no such
119   *	subsequence for any smaller y. x is the earliest possible mate to y that
120   *	occurs in such a subsequence.
121   *
122   *	Whenever any of the members of the equivalence class of lines in file1
123   *	matable to a line in file0 has serial number less than the y of some
124   *	k-candidate, that k-candidate with the smallest such y is replaced. The
125   *	new k-candidate is chained (via pred) to the current k-1 candidate so
126   *	that the actual subsequence can be recovered. When a member has serial
127   *	number greater that the y of all k-candidates, the klist is extended. At
128   *	the end, the longest subsequence is pulled out and placed in the array J
129   *	by unravel.
130   *
131   *	With J in hand, the matches there recorded are check'ed against reality
132   *	to assure that no spurious matches have crept in due to hashing. If they
133   *	have, they are broken, and "jackpot" is recorded -- a harmless matter
134   *	except that a true match for a spuriously mated line may now be
135   *	unnecessarily reported as a change.
136   *
137   *	Much of the complexity of the program comes simply from trying to
138   *	minimize core utilization and maximize the range of doable problems by
139   *	dynamically allocating what is needed and reusing what is not. The core
140   *	requirements for problems larger than somewhat are (in words)
141   *	2*length(file0) + length(file1) + 3*(number of k-candidates installed),
142   *	typically about 6n words for files of length n.
143   */
144  
145  struct cand {
146  	int	x;
147  	int	y;
148  	int	pred;
149  };
150  
151  static struct line {
152  	int	serial;
153  	int	value;
154  } *file[2];
155  
156  /*
157   * The following struct is used to record change information when
158   * doing a "context" or "unified" diff.  (see routine "change" to
159   * understand the highly mnemonic field names)
160   */
161  struct context_vec {
162  	int	a;		/* start line in old file */
163  	int	b;		/* end line in old file */
164  	int	c;		/* start line in new file */
165  	int	d;		/* end line in new file */
166  };
167  
168  enum readhash { RH_BINARY, RH_OK, RH_EOF };
169  
170  static int	 diffreg_stone(char *, char *, int, int);
171  static FILE	*opentemp(const char *);
172  static void	 output(char *, FILE *, char *, FILE *, int);
173  static void	 check(FILE *, FILE *, int);
174  static void	 range(int, int, const char *);
175  static void	 uni_range(int, int);
176  static void	 dump_context_vec(FILE *, FILE *, int);
177  static void	 dump_unified_vec(FILE *, FILE *, int);
178  static bool	 prepare(int, FILE *, size_t, int);
179  static void	 prune(void);
180  static void	 equiv(struct line *, int, struct line *, int, int *);
181  static void	 unravel(int);
182  static void	 unsort(struct line *, int, int *);
183  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
184  static void	 sort(struct line *, int);
185  static void	 print_header(const char *, const char *);
186  static void	 print_space(int, int, int);
187  static bool	 ignoreline_pattern(char *);
188  static bool	 ignoreline(char *, bool);
189  static int	 asciifile(FILE *);
190  static int	 fetch(long *, int, int, FILE *, int, int, int);
191  static int	 newcand(int, int, int);
192  static int	 search(int *, int, int);
193  static int	 skipline(FILE *);
194  static int	 stone(int *, int, int *, int *, int);
195  static enum readhash readhash(FILE *, int, unsigned *);
196  static int	 files_differ(FILE *, FILE *, int);
197  static char	*match_function(const long *, int, FILE *);
198  static char	*preadline(int, size_t, off_t);
199  
200  static int	 *J;			/* will be overlaid on class */
201  static int	 *class;		/* will be overlaid on file[0] */
202  static int	 *klist;		/* will be overlaid on file[0] after class */
203  static int	 *member;		/* will be overlaid on file[1] */
204  static int	 clen;
205  static int	 inifdef;		/* whether or not we are in a #ifdef block */
206  static size_t	 len[2];		/* lengths of files in lines */
207  static size_t	 pref, suff;		/* lengths of prefix and suffix */
208  static size_t	 slen[2];		/* lengths of files minus pref / suff */
209  static int	 anychange;
210  static int	 hw, lpad,rpad;		/* half width and padding */
211  static int	 edoffset;
212  static long	*ixnew;		/* will be overlaid on file[1] */
213  static long	*ixold;		/* will be overlaid on klist */
214  static struct cand *clist;	/* merely a free storage pot for candidates */
215  static int	 clistlen;		/* the length of clist */
216  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
217  static int	(*chrtran)(int);	/* translation table for case-folding */
218  static struct context_vec *context_vec_start;
219  static struct context_vec *context_vec_end;
220  static struct context_vec *context_vec_ptr;
221  
222  #define FUNCTION_CONTEXT_SIZE	55
223  static char lastbuf[FUNCTION_CONTEXT_SIZE];
224  static int lastline;
225  static int lastmatchline;
226  
227  int
228  diffreg(char *file1, char *file2, int flags, int capsicum)
229  {
230  	/*
231  	 * If we have set the algorithm with -A or --algorithm use that if we
232  	 * can and if not print an error.
233  	 */
234  	if (diff_algorithm_set) {
235  		if (diff_algorithm == D_DIFFMYERS ||
236  		    diff_algorithm == D_DIFFPATIENCE) {
237  			if (can_libdiff(flags))
238  				return diffreg_new(file1, file2, flags, capsicum);
239  			else
240  				errx(2, "cannot use Myers algorithm with selected options");
241  		} else {
242  			/* Fallback to using stone. */
243  			return diffreg_stone(file1, file2, flags, capsicum);
244  		}
245  	} else {
246  		if (can_libdiff(flags))
247  			return diffreg_new(file1, file2, flags, capsicum);
248  		else
249  			return diffreg_stone(file1, file2, flags, capsicum);
250  	}
251  }
252  
253  static int
254  clow2low(int c)
255  {
256  
257  	return (c);
258  }
259  
260  static int
261  cup2low(int c)
262  {
263  
264  	return (tolower(c));
265  }
266  
267  int
268  diffreg_stone(char *file1, char *file2, int flags, int capsicum)
269  {
270  	FILE *f1, *f2;
271  	int i, rval;
272  	struct pr *pr = NULL;
273  	cap_rights_t rights_ro;
274  
275  	f1 = f2 = NULL;
276  	rval = D_SAME;
277  	anychange = 0;
278  	lastline = 0;
279  	lastmatchline = 0;
280  
281  	/*
282  	 * In side-by-side mode, we need to print the left column, a
283  	 * change marker surrounded by padding, and the right column.
284  	 *
285  	 * If expanding tabs, we don't care about alignment, so we simply
286  	 * subtract 3 from the width and divide by two.
287  	 *
288  	 * If not expanding tabs, we need to ensure that the right column
289  	 * is aligned to a tab stop.  We start with the same formula, then
290  	 * decrement until we reach a size that lets us tab-align the
291  	 * right column.  We then adjust the width down if necessary for
292  	 * the padding calculation to work.
293  	 *
294  	 * Left padding is half the space left over, rounded down; right
295  	 * padding is whatever is needed to match the width.
296  	 */
297  	if (diff_format == D_SIDEBYSIDE) {
298  		if (flags & D_EXPANDTABS) {
299  			if (width > 3) {
300  				hw = (width - 3) / 2;
301  			} else {
302  				/* not enough space */
303  				hw = 0;
304  			}
305  		} else if (width <= 3 || width <= tabsize) {
306  			/* not enough space */
307  			hw = 0;
308  		} else {
309  			hw = (width - 3) / 2;
310  			while (hw > 0 && roundup(hw + 3, tabsize) + hw > width)
311  				hw--;
312  			if (width - (roundup(hw + 3, tabsize) + hw) < tabsize)
313  				width = roundup(hw + 3, tabsize) + hw;
314  		}
315  		lpad = (width - hw * 2 - 1) / 2;
316  		rpad = (width - hw * 2 - 1) - lpad;
317  	}
318  
319  	if (flags & D_IGNORECASE)
320  		chrtran = cup2low;
321  	else
322  		chrtran = clow2low;
323  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
326  		goto closem;
327  
328  	if (flags & D_EMPTY1)
329  		f1 = fopen(_PATH_DEVNULL, "r");
330  	else {
331  		if (!S_ISREG(stb1.st_mode)) {
332  			if ((f1 = opentemp(file1)) == NULL ||
333  			    fstat(fileno(f1), &stb1) == -1) {
334  				warn("%s", file1);
335  				rval = D_ERROR;
336  				status |= 2;
337  				goto closem;
338  			}
339  		} else if (strcmp(file1, "-") == 0)
340  			f1 = stdin;
341  		else
342  			f1 = fopen(file1, "r");
343  	}
344  	if (f1 == NULL) {
345  		warn("%s", file1);
346  		rval = D_ERROR;
347  		status |= 2;
348  		goto closem;
349  	}
350  
351  	if (flags & D_EMPTY2)
352  		f2 = fopen(_PATH_DEVNULL, "r");
353  	else {
354  		if (!S_ISREG(stb2.st_mode)) {
355  			if ((f2 = opentemp(file2)) == NULL ||
356  			    fstat(fileno(f2), &stb2) == -1) {
357  				warn("%s", file2);
358  				rval = D_ERROR;
359  				status |= 2;
360  				goto closem;
361  			}
362  		} else if (strcmp(file2, "-") == 0)
363  			f2 = stdin;
364  		else
365  			f2 = fopen(file2, "r");
366  	}
367  	if (f2 == NULL) {
368  		warn("%s", file2);
369  		rval = D_ERROR;
370  		status |= 2;
371  		goto closem;
372  	}
373  
374  	if (lflag)
375  		pr = start_pr(file1, file2);
376  
377  	if (capsicum) {
378  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
379  		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
380  			err(2, "unable to limit rights on: %s", file1);
381  		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
382  			err(2, "unable to limit rights on: %s", file2);
383  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
384  			/* stdin has already been limited */
385  			if (caph_limit_stderr() == -1)
386  				err(2, "unable to limit stderr");
387  			if (caph_limit_stdout() == -1)
388  				err(2, "unable to limit stdout");
389  		} else if (caph_limit_stdio() == -1)
390  				err(2, "unable to limit stdio");
391  
392  		caph_cache_catpages();
393  		caph_cache_tzdata();
394  		if (caph_enter() < 0)
395  			err(2, "unable to enter capability mode");
396  	}
397  
398  	switch (files_differ(f1, f2, flags)) {
399  	case 0:
400  		goto closem;
401  	case 1:
402  		break;
403  	default:
404  		/* error */
405  		rval = D_ERROR;
406  		status |= 2;
407  		goto closem;
408  	}
409  
410  	if (diff_format == D_BRIEF && ignore_pats == NULL &&
411  	    (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|
412  	    D_SKIPBLANKLINES|D_STRIPCR)) == 0)
413  	{
414  		rval = D_DIFFER;
415  		status |= 1;
416  		goto closem;
417  	}
418  	if ((flags & D_FORCEASCII) != 0) {
419  		(void)prepare(0, f1, stb1.st_size, flags);
420  		(void)prepare(1, f2, stb2.st_size, flags);
421  	} else if (!asciifile(f1) || !asciifile(f2) ||
422  		    !prepare(0, f1, stb1.st_size, flags) ||
423  		    !prepare(1, f2, stb2.st_size, flags)) {
424  		rval = D_BINARY;
425  		status |= 1;
426  		goto closem;
427  	}
428  	if (len[0] > INT_MAX - 2)
429  		errc(1, EFBIG, "%s", file1);
430  	if (len[1] > INT_MAX - 2)
431  		errc(1, EFBIG, "%s", file2);
432  
433  	prune();
434  	sort(sfile[0], slen[0]);
435  	sort(sfile[1], slen[1]);
436  
437  	member = (int *)file[1];
438  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
439  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
440  
441  	class = (int *)file[0];
442  	unsort(sfile[0], slen[0], class);
443  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
444  
445  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
446  	clen = 0;
447  	clistlen = 100;
448  	clist = xcalloc(clistlen, sizeof(*clist));
449  	i = stone(class, slen[0], member, klist, flags);
450  	free(member);
451  	free(class);
452  
453  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
454  	unravel(klist[i]);
455  	free(clist);
456  	free(klist);
457  
458  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
459  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
460  	check(f1, f2, flags);
461  	output(file1, f1, file2, f2, flags);
462  
463  closem:
464  	if (pr != NULL)
465  		stop_pr(pr);
466  	if (anychange) {
467  		status |= 1;
468  		if (rval == D_SAME)
469  			rval = D_DIFFER;
470  	}
471  	if (f1 != NULL)
472  		fclose(f1);
473  	if (f2 != NULL)
474  		fclose(f2);
475  
476  	return (rval);
477  }
478  
479  /*
480   * Check to see if the given files differ.
481   * Returns 0 if they are the same, 1 if different, and -1 on error.
482   * XXX - could use code from cmp(1) [faster]
483   */
484  static int
485  files_differ(FILE *f1, FILE *f2, int flags)
486  {
487  	char buf1[BUFSIZ], buf2[BUFSIZ];
488  	size_t i, j;
489  
490  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
491  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
492  		return (1);
493  
494  	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
495  		return (0);
496  
497  	for (;;) {
498  		i = fread(buf1, 1, sizeof(buf1), f1);
499  		j = fread(buf2, 1, sizeof(buf2), f2);
500  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
501  			return (-1);
502  		if (i != j)
503  			return (1);
504  		if (i == 0)
505  			return (0);
506  		if (memcmp(buf1, buf2, i) != 0)
507  			return (1);
508  	}
509  }
510  
511  static FILE *
512  opentemp(const char *f)
513  {
514  	char buf[BUFSIZ], tempfile[PATH_MAX];
515  	ssize_t nread;
516  	int ifd, ofd;
517  
518  	if (strcmp(f, "-") == 0)
519  		ifd = STDIN_FILENO;
520  	else if ((ifd = open(f, O_RDONLY, 0644)) == -1)
521  		return (NULL);
522  
523  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
524  
525  	if ((ofd = mkstemp(tempfile)) == -1) {
526  		close(ifd);
527  		return (NULL);
528  	}
529  	unlink(tempfile);
530  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
531  		if (write(ofd, buf, nread) != nread) {
532  			close(ifd);
533  			close(ofd);
534  			return (NULL);
535  		}
536  	}
537  	close(ifd);
538  	lseek(ofd, (off_t)0, SEEK_SET);
539  	return (fdopen(ofd, "r"));
540  }
541  
542  static bool
543  prepare(int i, FILE *fd, size_t filesize, int flags)
544  {
545  	struct line *p;
546  	unsigned h;
547  	size_t sz, j = 0;
548  	enum readhash r;
549  
550  	rewind(fd);
551  
552  	sz = MIN(filesize, SIZE_MAX) / 25;
553  	if (sz < 100)
554  		sz = 100;
555  
556  	p = xcalloc(sz + 3, sizeof(*p));
557  	while ((r = readhash(fd, flags, &h)) != RH_EOF) {
558  		if (r == RH_BINARY)
559  			return (false);
560  		if (j == SIZE_MAX)
561  			break;
562  		if (j == sz) {
563  			sz = sz * 3 / 2;
564  			p = xreallocarray(p, sz + 3, sizeof(*p));
565  		}
566  		p[++j].value = h;
567  	}
568  
569  	len[i] = j;
570  	file[i] = p;
571  
572  	return (true);
573  }
574  
575  static void
576  prune(void)
577  {
578  	size_t i, j;
579  
580  	for (pref = 0; pref < len[0] && pref < len[1] &&
581  	    file[0][pref + 1].value == file[1][pref + 1].value;
582  	    pref++)
583  		;
584  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
585  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
586  	    suff++)
587  		;
588  	for (j = 0; j < 2; j++) {
589  		sfile[j] = file[j] + pref;
590  		slen[j] = len[j] - pref - suff;
591  		for (i = 0; i <= slen[j]; i++)
592  			sfile[j][i].serial = i;
593  	}
594  }
595  
596  static void
597  equiv(struct line *a, int n, struct line *b, int m, int *c)
598  {
599  	int i, j;
600  
601  	i = j = 1;
602  	while (i <= n && j <= m) {
603  		if (a[i].value < b[j].value)
604  			a[i++].value = 0;
605  		else if (a[i].value == b[j].value)
606  			a[i++].value = j;
607  		else
608  			j++;
609  	}
610  	while (i <= n)
611  		a[i++].value = 0;
612  	b[m + 1].value = 0;
613  	j = 0;
614  	while (++j <= m) {
615  		c[j] = -b[j].serial;
616  		while (b[j + 1].value == b[j].value) {
617  			j++;
618  			c[j] = b[j].serial;
619  		}
620  	}
621  	c[j] = -1;
622  }
623  
624  static int
625  stone(int *a, int n, int *b, int *c, int flags)
626  {
627  	int i, k, y, j, l;
628  	int oldc, tc, oldl, sq;
629  	unsigned numtries, bound;
630  
631  	if (flags & D_MINIMAL)
632  		bound = UINT_MAX;
633  	else {
634  		sq = sqrt(n);
635  		bound = MAX(256, sq);
636  	}
637  
638  	k = 0;
639  	c[0] = newcand(0, 0, 0);
640  	for (i = 1; i <= n; i++) {
641  		j = a[i];
642  		if (j == 0)
643  			continue;
644  		y = -b[j];
645  		oldl = 0;
646  		oldc = c[0];
647  		numtries = 0;
648  		do {
649  			if (y <= clist[oldc].y)
650  				continue;
651  			l = search(c, k, y);
652  			if (l != oldl + 1)
653  				oldc = c[l - 1];
654  			if (l <= k) {
655  				if (clist[c[l]].y <= y)
656  					continue;
657  				tc = c[l];
658  				c[l] = newcand(i, y, oldc);
659  				oldc = tc;
660  				oldl = l;
661  				numtries++;
662  			} else {
663  				c[l] = newcand(i, y, oldc);
664  				k++;
665  				break;
666  			}
667  		} while ((y = b[++j]) > 0 && numtries < bound);
668  	}
669  	return (k);
670  }
671  
672  static int
673  newcand(int x, int y, int pred)
674  {
675  	struct cand *q;
676  
677  	if (clen == clistlen) {
678  		clistlen = clistlen * 11 / 10;
679  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
680  	}
681  	q = clist + clen;
682  	q->x = x;
683  	q->y = y;
684  	q->pred = pred;
685  	return (clen++);
686  }
687  
688  static int
689  search(int *c, int k, int y)
690  {
691  	int i, j, l, t;
692  
693  	if (clist[c[k]].y < y)	/* quick look for typical case */
694  		return (k + 1);
695  	i = 0;
696  	j = k + 1;
697  	for (;;) {
698  		l = (i + j) / 2;
699  		if (l <= i)
700  			break;
701  		t = clist[c[l]].y;
702  		if (t > y)
703  			j = l;
704  		else if (t < y)
705  			i = l;
706  		else
707  			return (l);
708  	}
709  	return (l + 1);
710  }
711  
712  static void
713  unravel(int p)
714  {
715  	struct cand *q;
716  	size_t i;
717  
718  	for (i = 0; i <= len[0]; i++)
719  		J[i] = i <= pref ? i :
720  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
721  	for (q = clist + p; q->y != 0; q = clist + q->pred)
722  		J[q->x + pref] = q->y + pref;
723  }
724  
725  /*
726   * Check does double duty:
727   *  1. ferret out any fortuitous correspondences due to confounding by
728   *     hashing (which result in "jackpot")
729   *  2. collect random access indexes to the two files
730   */
731  static void
732  check(FILE *f1, FILE *f2, int flags)
733  {
734  	int i, j, /* jackpot, */ c, d;
735  	long ctold, ctnew;
736  
737  	rewind(f1);
738  	rewind(f2);
739  	j = 1;
740  	ixold[0] = ixnew[0] = 0;
741  	/* jackpot = 0; */
742  	ctold = ctnew = 0;
743  	for (i = 1; i <= (int)len[0]; i++) {
744  		if (J[i] == 0) {
745  			ixold[i] = ctold += skipline(f1);
746  			continue;
747  		}
748  		while (j < J[i]) {
749  			ixnew[j] = ctnew += skipline(f2);
750  			j++;
751  		}
752  		if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS | D_IGNORECASE | D_STRIPCR)) {
753  			for (;;) {
754  				c = getc(f1);
755  				d = getc(f2);
756  				/*
757  				 * GNU diff ignores a missing newline
758  				 * in one file for -b or -w.
759  				 */
760  				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
761  					if (c == EOF && isspace(d)) {
762  						ctnew++;
763  						break;
764  					} else if (isspace(c) && d == EOF) {
765  						ctold++;
766  						break;
767  					}
768  				}
769  				ctold++;
770  				ctnew++;
771  				if (flags & D_STRIPCR && (c == '\r' || d == '\r')) {
772  					if (c == '\r') {
773  						if ((c = getc(f1)) == '\n') {
774  							ctold++;
775  						} else {
776  							ungetc(c, f1);
777  						}
778  					}
779  					if (d == '\r') {
780  						if ((d = getc(f2)) == '\n') {
781  							ctnew++;
782  						} else {
783  							ungetc(d, f2);
784  						}
785  					}
786  					break;
787  				}
788  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
789  				    isspace(d)) {
790  					do {
791  						if (c == '\n')
792  							break;
793  						ctold++;
794  					} while (isspace(c = getc(f1)));
795  					do {
796  						if (d == '\n')
797  							break;
798  						ctnew++;
799  					} while (isspace(d = getc(f2)));
800  				} else if (flags & D_IGNOREBLANKS) {
801  					while (isspace(c) && c != '\n') {
802  						c = getc(f1);
803  						ctold++;
804  					}
805  					while (isspace(d) && d != '\n') {
806  						d = getc(f2);
807  						ctnew++;
808  					}
809  				}
810  				if (chrtran(c) != chrtran(d)) {
811  					/* jackpot++; */
812  					J[i] = 0;
813  					if (c != '\n' && c != EOF)
814  						ctold += skipline(f1);
815  					if (d != '\n' && c != EOF)
816  						ctnew += skipline(f2);
817  					break;
818  				}
819  				if (c == '\n' || c == EOF)
820  					break;
821  			}
822  		} else {
823  			for (;;) {
824  				ctold++;
825  				ctnew++;
826  				if ((c = getc(f1)) != (d = getc(f2))) {
827  					/* jackpot++; */
828  					J[i] = 0;
829  					if (c != '\n' && c != EOF)
830  						ctold += skipline(f1);
831  					if (d != '\n' && c != EOF)
832  						ctnew += skipline(f2);
833  					break;
834  				}
835  				if (c == '\n' || c == EOF)
836  					break;
837  			}
838  		}
839  		ixold[i] = ctold;
840  		ixnew[j] = ctnew;
841  		j++;
842  	}
843  	for (; j <= (int)len[1]; j++) {
844  		ixnew[j] = ctnew += skipline(f2);
845  	}
846  	/*
847  	 * if (jackpot)
848  	 *	fprintf(stderr, "jackpot\n");
849  	 */
850  }
851  
852  /* shellsort CACM #201 */
853  static void
854  sort(struct line *a, int n)
855  {
856  	struct line *ai, *aim, w;
857  	int j, m = 0, k;
858  
859  	if (n == 0)
860  		return;
861  	for (j = 1; j <= n; j *= 2)
862  		m = 2 * j - 1;
863  	for (m /= 2; m != 0; m /= 2) {
864  		k = n - m;
865  		for (j = 1; j <= k; j++) {
866  			for (ai = &a[j]; ai > a; ai -= m) {
867  				aim = &ai[m];
868  				if (aim < ai)
869  					break;	/* wraparound */
870  				if (aim->value > ai[0].value ||
871  				    (aim->value == ai[0].value &&
872  					aim->serial > ai[0].serial))
873  					break;
874  				w.value = ai[0].value;
875  				ai[0].value = aim->value;
876  				aim->value = w.value;
877  				w.serial = ai[0].serial;
878  				ai[0].serial = aim->serial;
879  				aim->serial = w.serial;
880  			}
881  		}
882  	}
883  }
884  
885  static void
886  unsort(struct line *f, int l, int *b)
887  {
888  	int *a, i;
889  
890  	a = xcalloc(l + 1, sizeof(*a));
891  	for (i = 1; i <= l; i++)
892  		a[f[i].serial] = f[i].value;
893  	for (i = 1; i <= l; i++)
894  		b[i] = a[i];
895  	free(a);
896  }
897  
898  static int
899  skipline(FILE *f)
900  {
901  	int i, c;
902  
903  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
904  		continue;
905  	return (i);
906  }
907  
908  static void
909  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
910  {
911  	int i, j, m, i0, i1, j0, j1, nc;
912  
913  	rewind(f1);
914  	rewind(f2);
915  	m = len[0];
916  	J[0] = 0;
917  	J[m + 1] = len[1] + 1;
918  	if (diff_format != D_EDIT) {
919  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
920  			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
921  				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
922  					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
923  					print_space(nc, hw - nc + lpad + 1 + rpad, flags);
924  					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
925  					printf("\n");
926  				}
927  				i0++;
928  			}
929  			j0 = J[i0 - 1] + 1;
930  			i1 = i0 - 1;
931  			while (i1 < m && J[i1 + 1] == 0)
932  				i1++;
933  			j1 = J[i1 + 1] - 1;
934  			J[i1] = j1;
935  
936  			/*
937  			 * When using side-by-side, lines from both of the files are
938  			 * printed. The algorithm used by diff(1) identifies the ranges
939  			 * in which two files differ.
940  			 * See the change() function below.
941  			 * The for loop below consumes the shorter range, whereas one of
942  			 * the while loops deals with the longer one.
943  			 */
944  			if (diff_format == D_SIDEBYSIDE) {
945  				for (i = i0, j = j0; i <= i1 && j <= j1; i++, j++)
946  					change(file1, f1, file2, f2, i, i, j, j, &flags);
947  
948  				while (i <= i1) {
949  					change(file1, f1, file2, f2, i, i, j + 1, j, &flags);
950  					i++;
951  				}
952  
953  				while (j <= j1) {
954  					change(file1, f1, file2, f2, i + 1, i, j, j, &flags);
955  					j++;
956  				}
957  			} else
958  				change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
959  		}
960  	} else {
961  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
962  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
963  				i0--;
964  			j0 = J[i0 + 1] - 1;
965  			i1 = i0 + 1;
966  			while (i1 > 1 && J[i1 - 1] == 0)
967  				i1--;
968  			j1 = J[i1 - 1] + 1;
969  			J[i1] = j1;
970  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
971  		}
972  	}
973  	if (m == 0)
974  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
975  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
976  		for (;;) {
977  #define	c i0
978  			if ((c = getc(f1)) == EOF)
979  				return;
980  			printf("%c", c);
981  		}
982  #undef c
983  	}
984  	if (anychange != 0) {
985  		if (diff_format == D_CONTEXT)
986  			dump_context_vec(f1, f2, flags);
987  		else if (diff_format == D_UNIFIED)
988  			dump_unified_vec(f1, f2, flags);
989  	}
990  }
991  
992  static void
993  range(int a, int b, const char *separator)
994  {
995  	printf("%d", a > b ? b : a);
996  	if (a < b)
997  		printf("%s%d", separator, b);
998  }
999  
1000  static void
1001  uni_range(int a, int b)
1002  {
1003  	if (a < b)
1004  		printf("%d,%d", a, b - a + 1);
1005  	else if (a == b)
1006  		printf("%d", b);
1007  	else
1008  		printf("%d,0", b);
1009  }
1010  
1011  static char *
1012  preadline(int fd, size_t rlen, off_t off)
1013  {
1014  	char *line;
1015  	ssize_t nr;
1016  
1017  	line = xmalloc(rlen + 1);
1018  	if ((nr = pread(fd, line, rlen, off)) == -1)
1019  		err(2, "preadline");
1020  	if (nr > 0 && line[nr-1] == '\n')
1021  		nr--;
1022  	line[nr] = '\0';
1023  	return (line);
1024  }
1025  
1026  static bool
1027  ignoreline_pattern(char *line)
1028  {
1029  	int ret;
1030  
1031  	ret = regexec(&ignore_re, line, 0, NULL, 0);
1032  	return (ret == 0);	/* if it matched, it should be ignored. */
1033  }
1034  
1035  static bool
1036  ignoreline(char *line, bool skip_blanks)
1037  {
1038  
1039  	if (skip_blanks && *line == '\0')
1040  		return (true);
1041  	if (ignore_pats != NULL && ignoreline_pattern(line))
1042  		return (true);
1043  	return (false);
1044  }
1045  
1046  /*
1047   * Indicate that there is a difference between lines a and b of the from file
1048   * to get to lines c to d of the to file.  If a is greater then b then there
1049   * are no lines in the from file involved and this means that there were
1050   * lines appended (beginning at b).  If c is greater than d then there are
1051   * lines missing from the to file.
1052   */
1053  static void
1054  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1055      int *pflags)
1056  {
1057  	static size_t max_context = 64;
1058  	long curpos;
1059  	int i, nc;
1060  	const char *walk;
1061  	bool skip_blanks, ignore;
1062  
1063  	skip_blanks = (*pflags & D_SKIPBLANKLINES);
1064  restart:
1065  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1066  	    a > b && c > d)
1067  		return;
1068  	if (ignore_pats != NULL || skip_blanks) {
1069  		char *line;
1070  		/*
1071  		 * All lines in the change, insert, or delete must match an ignore
1072  		 * pattern for the change to be ignored.
1073  		 */
1074  		if (a <= b) {		/* Changes and deletes. */
1075  			for (i = a; i <= b; i++) {
1076  				line = preadline(fileno(f1),
1077  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1078  				ignore = ignoreline(line, skip_blanks);
1079  				free(line);
1080  				if (!ignore)
1081  					goto proceed;
1082  			}
1083  		}
1084  		if (a > b || c <= d) {	/* Changes and inserts. */
1085  			for (i = c; i <= d; i++) {
1086  				line = preadline(fileno(f2),
1087  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1088  				ignore = ignoreline(line, skip_blanks);
1089  				free(line);
1090  				if (!ignore)
1091  					goto proceed;
1092  			}
1093  		}
1094  		return;
1095  	}
1096  proceed:
1097  	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1098  		printf("%s %s %s\n", diffargs, file1, file2);
1099  		*pflags &= ~D_HEADER;
1100  	}
1101  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1102  		/*
1103  		 * Allocate change records as needed.
1104  		 */
1105  		if (context_vec_start == NULL ||
1106  		    context_vec_ptr == context_vec_end - 1) {
1107  			ptrdiff_t offset = -1;
1108  
1109  			if (context_vec_start != NULL)
1110  				offset = context_vec_ptr - context_vec_start;
1111  			max_context <<= 1;
1112  			context_vec_start = xreallocarray(context_vec_start,
1113  			    max_context, sizeof(*context_vec_start));
1114  			context_vec_end = context_vec_start + max_context;
1115  			context_vec_ptr = context_vec_start + offset;
1116  		}
1117  		if (anychange == 0) {
1118  			/*
1119  			 * Print the context/unidiff header first time through.
1120  			 */
1121  			print_header(file1, file2);
1122  			anychange = 1;
1123  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1124  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1125  			/*
1126  			 * If this change is more than 'diff_context' lines from the
1127  			 * previous change, dump the record and reset it.
1128  			 */
1129  			if (diff_format == D_CONTEXT)
1130  				dump_context_vec(f1, f2, *pflags);
1131  			else
1132  				dump_unified_vec(f1, f2, *pflags);
1133  		}
1134  		context_vec_ptr++;
1135  		context_vec_ptr->a = a;
1136  		context_vec_ptr->b = b;
1137  		context_vec_ptr->c = c;
1138  		context_vec_ptr->d = d;
1139  		return;
1140  	}
1141  	if (anychange == 0)
1142  		anychange = 1;
1143  	switch (diff_format) {
1144  	case D_BRIEF:
1145  		return;
1146  	case D_NORMAL:
1147  	case D_EDIT:
1148  		range(a, b, ",");
1149  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1150  		if (diff_format == D_NORMAL)
1151  			range(c, d, ",");
1152  		printf("\n");
1153  		break;
1154  	case D_REVERSE:
1155  		printf("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1156  		range(a, b, " ");
1157  		printf("\n");
1158  		break;
1159  	case D_NREVERSE:
1160  		if (a > b)
1161  			printf("a%d %d\n", b, d - c + 1);
1162  		else {
1163  			printf("d%d %d\n", a, b - a + 1);
1164  			if (!(c > d))
1165  				/* add changed lines */
1166  				printf("a%d %d\n", b, d - c + 1);
1167  		}
1168  		break;
1169  	}
1170  	if (diff_format == D_GFORMAT) {
1171  		curpos = ftell(f1);
1172  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1173  		nc = ixold[a > b ? b : a - 1] - curpos;
1174  		for (i = 0; i < nc; i++)
1175  			printf("%c", getc(f1));
1176  		for (walk = group_format; *walk != '\0'; walk++) {
1177  			if (*walk == '%') {
1178  				walk++;
1179  				switch (*walk) {
1180  				case '<':
1181  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1182  					break;
1183  				case '>':
1184  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1185  					break;
1186  				default:
1187  					printf("%%%c", *walk);
1188  					break;
1189  				}
1190  				continue;
1191  			}
1192  			printf("%c", *walk);
1193  		}
1194  	}
1195  	if (diff_format == D_SIDEBYSIDE) {
1196  		if (color && a > b)
1197  			printf("\033[%sm", add_code);
1198  		else if (color && c > d)
1199  			printf("\033[%sm", del_code);
1200  		if (a > b) {
1201  			print_space(0, hw + lpad, *pflags);
1202  		} else {
1203  			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
1204  			print_space(nc, hw - nc + lpad, *pflags);
1205  		}
1206  		if (color && a > b)
1207  			printf("\033[%sm", add_code);
1208  		else if (color && c > d)
1209  			printf("\033[%sm", del_code);
1210  		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
1211  		if (color && c > d)
1212  			printf("\033[m");
1213  		print_space(hw + lpad + 1, rpad, *pflags);
1214  		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
1215  		printf("\n");
1216  	}
1217  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1218  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1219  		if (a <= b && c <= d && diff_format == D_NORMAL)
1220  			printf("---\n");
1221  	}
1222  	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
1223  		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1224  	if (edoffset != 0 && diff_format == D_EDIT) {
1225  		/*
1226  		 * A non-zero edoffset value for D_EDIT indicates that the last line
1227  		 * printed was a bare dot (".") that has been escaped as ".." to
1228  		 * prevent ed(1) from misinterpreting it.  We have to add a
1229  		 * substitute command to change this back and restart where we left
1230  		 * off.
1231  		 */
1232  		printf(".\n");
1233  		printf("%ds/.//\n", a + edoffset - 1);
1234  		b = a + edoffset - 1;
1235  		a = b + 1;
1236  		c += edoffset;
1237  		goto restart;
1238  	}
1239  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1240  		printf(".\n");
1241  	if (inifdef) {
1242  		printf("#endif /* %s */\n", ifdefname);
1243  		inifdef = 0;
1244  	}
1245  }
1246  
1247  static int
1248  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1249  {
1250  	int i, j, c, lastc, col, nc, newcol;
1251  
1252  	edoffset = 0;
1253  	nc = 0;
1254  	col = 0;
1255  	/*
1256  	 * When doing #ifdef's, copy down to current line
1257  	 * if this is the first file, so that stuff makes it to output.
1258  	 */
1259  	if ((diff_format == D_IFDEF) && oldfile) {
1260  		long curpos = ftell(lb);
1261  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1262  		nc = f[a > b ? b : a - 1] - curpos;
1263  		for (i = 0; i < nc; i++)
1264  			printf("%c", getc(lb));
1265  	}
1266  	if (a > b)
1267  		return (0);
1268  	if (diff_format == D_IFDEF) {
1269  		if (inifdef) {
1270  			printf("#else /* %s%s */\n",
1271  			    oldfile == 1 ? "!" : "", ifdefname);
1272  		} else {
1273  			if (oldfile)
1274  				printf("#ifndef %s\n", ifdefname);
1275  			else
1276  				printf("#ifdef %s\n", ifdefname);
1277  		}
1278  		inifdef = 1 + oldfile;
1279  	}
1280  	for (i = a; i <= b; i++) {
1281  		fseek(lb, f[i - 1], SEEK_SET);
1282  		nc = f[i] - f[i - 1];
1283  		if (diff_format == D_SIDEBYSIDE && hw < nc)
1284  			nc = hw;
1285  		if (diff_format != D_IFDEF && diff_format != D_GFORMAT &&
1286  		    ch != '\0') {
1287  			if (color && (ch == '>' || ch == '+'))
1288  				printf("\033[%sm", add_code);
1289  			else if (color && (ch == '<' || ch == '-'))
1290  				printf("\033[%sm", del_code);
1291  			printf("%c", ch);
1292  			if (Tflag && (diff_format == D_NORMAL ||
1293  			    diff_format == D_CONTEXT ||
1294  			    diff_format == D_UNIFIED))
1295  				printf("\t");
1296  			else if (diff_format != D_UNIFIED)
1297  				printf(" ");
1298  		}
1299  		col = j = 0;
1300  		lastc = '\0';
1301  		while (j < nc && (hw == 0 || col < hw)) {
1302  			c = getc(lb);
1303  			if (flags & D_STRIPCR && c == '\r') {
1304  				if ((c = getc(lb)) == '\n')
1305  					j++;
1306  				else {
1307  					ungetc(c, lb);
1308  					c = '\r';
1309  				}
1310  			}
1311  			if (c == EOF) {
1312  				if (diff_format == D_EDIT ||
1313  				    diff_format == D_REVERSE ||
1314  				    diff_format == D_NREVERSE)
1315  					warnx("No newline at end of file");
1316  				else
1317  					printf("\n\\ No newline at end of file\n");
1318  				return (col);
1319  			}
1320  			/*
1321  			 * when using --side-by-side, col needs to be increased
1322  			 * in any case to keep the columns aligned
1323  			 */
1324  			if (c == '\t') {
1325  				/*
1326  				 * Calculate where the tab would bring us.
1327  				 * If it would take us to the end of the
1328  				 * column, either clip it (if expanding
1329  				 * tabs) or return right away (if not).
1330  				 */
1331  				newcol = roundup(col + 1, tabsize);
1332  				if ((flags & D_EXPANDTABS) == 0) {
1333  					if (hw > 0 && newcol >= hw)
1334  						return (col);
1335  					printf("\t");
1336  				} else {
1337  					if (hw > 0 && newcol > hw)
1338  						newcol = hw;
1339  					printf("%*s", newcol - col, "");
1340  				}
1341  				col = newcol;
1342  			} else {
1343  				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
1344  				    lastc == '.') {
1345  					/*
1346  					 * Don't print a bare "." line since that will confuse
1347  					 * ed(1). Print ".." instead and set the, global variable
1348  					 * edoffset to an offset from which to restart. The
1349  					 * caller must check the value of edoffset
1350  					 */
1351  					printf(".\n");
1352  					edoffset = i - a + 1;
1353  					return (edoffset);
1354  				}
1355  				/* when side-by-side, do not print a newline */
1356  				if (diff_format != D_SIDEBYSIDE || c != '\n') {
1357  					if (color && c == '\n')
1358  						printf("\033[m%c", c);
1359  					else
1360  						printf("%c", c);
1361  					col++;
1362  				}
1363  			}
1364  
1365  			j++;
1366  			lastc = c;
1367  		}
1368  	}
1369  	if (color && diff_format == D_SIDEBYSIDE)
1370  		printf("\033[m");
1371  	return (col);
1372  }
1373  
1374  /*
1375   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1376   */
1377  static enum readhash
1378  readhash(FILE *f, int flags, unsigned *hash)
1379  {
1380  	int i, t, space;
1381  	unsigned sum;
1382  
1383  	sum = 1;
1384  	space = 0;
1385  	for (i = 0;;) {
1386  		switch (t = getc(f)) {
1387  		case '\0':
1388  			if ((flags & D_FORCEASCII) == 0)
1389  				return (RH_BINARY);
1390  			goto hashchar;
1391  		case '\r':
1392  			if (flags & D_STRIPCR) {
1393  				t = getc(f);
1394  				if (t == '\n')
1395  					break;
1396  				ungetc(t, f);
1397  			}
1398  			/* FALLTHROUGH */
1399  		case '\t':
1400  		case '\v':
1401  		case '\f':
1402  		case ' ':
1403  			if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) != 0) {
1404  				space++;
1405  				continue;
1406  			}
1407  			/* FALLTHROUGH */
1408  		default:
1409  		hashchar:
1410  			if (space && (flags & D_IGNOREBLANKS) == 0) {
1411  				i++;
1412  				space = 0;
1413  			}
1414  			sum = sum * 127 + chrtran(t);
1415  			i++;
1416  			continue;
1417  		case EOF:
1418  			if (i == 0)
1419  				return (RH_EOF);
1420  			/* FALLTHROUGH */
1421  		case '\n':
1422  			break;
1423  		}
1424  		break;
1425  	}
1426  	*hash = sum;
1427  	return (RH_OK);
1428  }
1429  
1430  static int
1431  asciifile(FILE *f)
1432  {
1433  	unsigned char buf[BUFSIZ];
1434  	size_t cnt;
1435  
1436  	if (f == NULL)
1437  		return (1);
1438  
1439  	rewind(f);
1440  	cnt = fread(buf, 1, sizeof(buf), f);
1441  	return (memchr(buf, '\0', cnt) == NULL);
1442  }
1443  
1444  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre) - 1) == 0)
1445  
1446  static char *
1447  match_function(const long *f, int pos, FILE *fp)
1448  {
1449  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1450  	size_t nc;
1451  	int last = lastline;
1452  	const char *state = NULL;
1453  
1454  	lastline = pos;
1455  	for (; pos > last; pos--) {
1456  		fseek(fp, f[pos - 1], SEEK_SET);
1457  		nc = f[pos] - f[pos - 1];
1458  		if (nc >= sizeof(buf))
1459  			nc = sizeof(buf) - 1;
1460  		nc = fread(buf, 1, nc, fp);
1461  		if (nc == 0)
1462  			continue;
1463  		buf[nc] = '\0';
1464  		buf[strcspn(buf, "\n")] = '\0';
1465  		if (most_recent_pat != NULL) {
1466  			int ret = regexec(&most_recent_re, buf, 0, NULL, 0);
1467  
1468  			if (ret != 0)
1469  				continue;
1470  			strlcpy(lastbuf, buf, sizeof(lastbuf));
1471  			lastmatchline = pos;
1472  			return (lastbuf);
1473  		} else if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$'
1474  			|| buf[0] == '-' || buf[0] == '+') {
1475  			if (begins_with(buf, "private:")) {
1476  				if (!state)
1477  					state = " (private)";
1478  			} else if (begins_with(buf, "protected:")) {
1479  				if (!state)
1480  					state = " (protected)";
1481  			} else if (begins_with(buf, "public:")) {
1482  				if (!state)
1483  					state = " (public)";
1484  			} else {
1485  				strlcpy(lastbuf, buf, sizeof(lastbuf));
1486  				if (state)
1487  					strlcat(lastbuf, state, sizeof(lastbuf));
1488  				lastmatchline = pos;
1489  				return (lastbuf);
1490  			}
1491  		}
1492  	}
1493  	return (lastmatchline > 0 ? lastbuf : NULL);
1494  }
1495  
1496  /* dump accumulated "context" diff changes */
1497  static void
1498  dump_context_vec(FILE *f1, FILE *f2, int flags)
1499  {
1500  	struct context_vec *cvp = context_vec_start;
1501  	int lowa, upb, lowc, upd, do_output;
1502  	int a, b, c, d;
1503  	char ch, *f;
1504  
1505  	if (context_vec_start > context_vec_ptr)
1506  		return;
1507  
1508  	b = d = 0;		/* gcc */
1509  	lowa = MAX(1, cvp->a - diff_context);
1510  	upb = MIN((int)len[0], context_vec_ptr->b + diff_context);
1511  	lowc = MAX(1, cvp->c - diff_context);
1512  	upd = MIN((int)len[1], context_vec_ptr->d + diff_context);
1513  
1514  	printf("***************");
1515  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1516  		f = match_function(ixold, cvp->a - 1, f1);
1517  		if (f != NULL)
1518  			printf(" %s", f);
1519  	}
1520  	printf("\n*** ");
1521  	range(lowa, upb, ",");
1522  	printf(" ****\n");
1523  
1524  	/*
1525  	 * Output changes to the "old" file.  The first loop suppresses
1526  	 * output if there were no changes to the "old" file (we'll see
1527  	 * the "old" lines as context in the "new" list).
1528  	 */
1529  	do_output = 0;
1530  	for (; cvp <= context_vec_ptr; cvp++)
1531  		if (cvp->a <= cvp->b) {
1532  			cvp = context_vec_start;
1533  			do_output++;
1534  			break;
1535  		}
1536  	if (do_output) {
1537  		while (cvp <= context_vec_ptr) {
1538  			a = cvp->a;
1539  			b = cvp->b;
1540  			c = cvp->c;
1541  			d = cvp->d;
1542  
1543  			if (a <= b && c <= d)
1544  				ch = 'c';
1545  			else
1546  				ch = (a <= b) ? 'd' : 'a';
1547  
1548  			if (ch == 'a')
1549  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1550  			else {
1551  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1552  				fetch(ixold, a, b, f1,
1553  				    ch == 'c' ? '!' : '-', 0, flags);
1554  			}
1555  			lowa = b + 1;
1556  			cvp++;
1557  		}
1558  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1559  	}
1560  	/* output changes to the "new" file */
1561  	printf("--- ");
1562  	range(lowc, upd, ",");
1563  	printf(" ----\n");
1564  
1565  	do_output = 0;
1566  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1567  		if (cvp->c <= cvp->d) {
1568  			cvp = context_vec_start;
1569  			do_output++;
1570  			break;
1571  		}
1572  	if (do_output) {
1573  		while (cvp <= context_vec_ptr) {
1574  			a = cvp->a;
1575  			b = cvp->b;
1576  			c = cvp->c;
1577  			d = cvp->d;
1578  
1579  			if (a <= b && c <= d)
1580  				ch = 'c';
1581  			else
1582  				ch = (a <= b) ? 'd' : 'a';
1583  
1584  			if (ch == 'd')
1585  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1586  			else {
1587  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1588  				fetch(ixnew, c, d, f2,
1589  				    ch == 'c' ? '!' : '+', 0, flags);
1590  			}
1591  			lowc = d + 1;
1592  			cvp++;
1593  		}
1594  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1595  	}
1596  	context_vec_ptr = context_vec_start - 1;
1597  }
1598  
1599  /* dump accumulated "unified" diff changes */
1600  static void
1601  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1602  {
1603  	struct context_vec *cvp = context_vec_start;
1604  	int lowa, upb, lowc, upd;
1605  	int a, b, c, d;
1606  	char ch, *f;
1607  
1608  	if (context_vec_start > context_vec_ptr)
1609  		return;
1610  
1611  	b = d = 0;		/* gcc */
1612  	lowa = MAX(1, cvp->a - diff_context);
1613  	upb = MIN((int)len[0], context_vec_ptr->b + diff_context);
1614  	lowc = MAX(1, cvp->c - diff_context);
1615  	upd = MIN((int)len[1], context_vec_ptr->d + diff_context);
1616  
1617  	printf("@@ -");
1618  	uni_range(lowa, upb);
1619  	printf(" +");
1620  	uni_range(lowc, upd);
1621  	printf(" @@");
1622  	if (flags & (D_PROTOTYPE | D_MATCHLAST)) {
1623  		f = match_function(ixold, cvp->a - 1, f1);
1624  		if (f != NULL)
1625  			printf(" %s", f);
1626  	}
1627  	printf("\n");
1628  
1629  	/*
1630  	 * Output changes in "unified" diff format--the old and new lines
1631  	 * are printed together.
1632  	 */
1633  	for (; cvp <= context_vec_ptr; cvp++) {
1634  		a = cvp->a;
1635  		b = cvp->b;
1636  		c = cvp->c;
1637  		d = cvp->d;
1638  
1639  		/*
1640  		 * c: both new and old changes
1641  		 * d: only changes in the old file
1642  		 * a: only changes in the new file
1643  		 */
1644  		if (a <= b && c <= d)
1645  			ch = 'c';
1646  		else
1647  			ch = (a <= b) ? 'd' : 'a';
1648  
1649  		switch (ch) {
1650  		case 'c':
1651  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1652  			fetch(ixold, a, b, f1, '-', 0, flags);
1653  			fetch(ixnew, c, d, f2, '+', 0, flags);
1654  			break;
1655  		case 'd':
1656  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1657  			fetch(ixold, a, b, f1, '-', 0, flags);
1658  			break;
1659  		case 'a':
1660  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1661  			fetch(ixnew, c, d, f2, '+', 0, flags);
1662  			break;
1663  		}
1664  		lowa = b + 1;
1665  		lowc = d + 1;
1666  	}
1667  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1668  
1669  	context_vec_ptr = context_vec_start - 1;
1670  }
1671  
1672  static void
1673  print_header(const char *file1, const char *file2)
1674  {
1675  	const char *time_format;
1676  	char buf[256];
1677  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1678  	int nsec1 = stb1.st_mtim.tv_nsec;
1679  	int nsec2 = stb2.st_mtim.tv_nsec;
1680  
1681  	time_format = "%Y-%m-%d %H:%M:%S";
1682  
1683  	if (cflag)
1684  		time_format = "%c";
1685  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1686  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1687  	if (label[0] != NULL)
1688  		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1689  		    label[0]);
1690  	else {
1691  		strftime(buf, sizeof(buf), time_format, tm_ptr1);
1692  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1693  		    file1, buf);
1694  		if (!cflag) {
1695  			strftime(buf, sizeof(buf), "%z", tm_ptr1);
1696  			printf(".%.9d %s", nsec1, buf);
1697  		}
1698  		printf("\n");
1699  	}
1700  	if (label[1] != NULL)
1701  		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1702  		    label[1]);
1703  	else {
1704  		strftime(buf, sizeof(buf), time_format, tm_ptr2);
1705  		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1706  		    file2, buf);
1707  		if (!cflag) {
1708  			strftime(buf, sizeof(buf), "%z", tm_ptr2);
1709  			printf(".%.9d %s", nsec2, buf);
1710  		}
1711  		printf("\n");
1712  	}
1713  }
1714  
1715  /*
1716   * Prints n number of space characters either by using tab
1717   * or single space characters.
1718   * nc is the preceding number of characters
1719   */
1720  static void
1721  print_space(int nc, int n, int flags)
1722  {
1723  	int col, newcol, tabstop;
1724  
1725  	col = nc;
1726  	newcol = nc + n;
1727  	/* first, use tabs if allowed */
1728  	if ((flags & D_EXPANDTABS) == 0) {
1729  		while ((tabstop = roundup(col + 1, tabsize)) <= newcol) {
1730  			printf("\t");
1731  			col = tabstop;
1732  		}
1733  	}
1734  	/* finish with spaces */
1735  	printf("%*s", newcol - col, "");
1736  }
1737