xref: /freebsd/usr.bin/diff/diffreg.c (revision 1f73e6d88857c1c39c95fea98775b6897316c08b)
1  /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2  
3  /*
4   * Copyright (C) Caldera International Inc.  2001-2002.
5   * All rights reserved.
6   *
7   * Redistribution and use in source and binary forms, with or without
8   * modification, are permitted provided that the following conditions
9   * are met:
10   * 1. Redistributions of source code and documentation must retain the above
11   *    copyright notice, this list of conditions and the following disclaimer.
12   * 2. Redistributions in binary form must reproduce the above copyright
13   *    notice, this list of conditions and the following disclaimer in the
14   *    documentation and/or other materials provided with the distribution.
15   * 3. All advertising materials mentioning features or use of this software
16   *    must display the following acknowledgement:
17   *	This product includes software developed or owned by Caldera
18   *	International, Inc.
19   * 4. Neither the name of Caldera International, Inc. nor the names of other
20   *    contributors may be used to endorse or promote products derived from
21   *    this software without specific prior written permission.
22   *
23   * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24   * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27   * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28   * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33   * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34   * POSSIBILITY OF SUCH DAMAGE.
35   */
36  /*-
37   * Copyright (c) 1991, 1993
38   *	The Regents of the University of California.  All rights reserved.
39   *
40   * Redistribution and use in source and binary forms, with or without
41   * modification, are permitted provided that the following conditions
42   * are met:
43   * 1. Redistributions of source code must retain the above copyright
44   *    notice, this list of conditions and the following disclaimer.
45   * 2. Redistributions in binary form must reproduce the above copyright
46   *    notice, this list of conditions and the following disclaimer in the
47   *    documentation and/or other materials provided with the distribution.
48   * 3. Neither the name of the University nor the names of its contributors
49   *    may be used to endorse or promote products derived from this software
50   *    without specific prior written permission.
51   *
52   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62   * SUCH DAMAGE.
63   *
64   *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65   */
66  
67  #include <sys/cdefs.h>
68  __FBSDID("$FreeBSD$");
69  
70  #include <sys/capsicum.h>
71  #include <sys/procdesc.h>
72  #include <sys/stat.h>
73  #include <sys/types.h>
74  #include <sys/event.h>
75  #include <sys/wait.h>
76  
77  #include <capsicum_helpers.h>
78  #include <ctype.h>
79  #include <err.h>
80  #include <errno.h>
81  #include <fcntl.h>
82  #include <paths.h>
83  #include <regex.h>
84  #include <stddef.h>
85  #include <stdint.h>
86  #include <stdio.h>
87  #include <stdlib.h>
88  #include <string.h>
89  #include <unistd.h>
90  #include <limits.h>
91  #include <signal.h>
92  
93  #include "diff.h"
94  #include "xmalloc.h"
95  
96  #define _PATH_PR "/usr/bin/pr"
97  
98  /*
99   * diff - compare two files.
100   */
101  
102  /*
103   *	Uses an algorithm due to Harold Stone, which finds
104   *	a pair of longest identical subsequences in the two
105   *	files.
106   *
107   *	The major goal is to generate the match vector J.
108   *	J[i] is the index of the line in file1 corresponding
109   *	to line i file0. J[i] = 0 if there is no
110   *	such line in file1.
111   *
112   *	Lines are hashed so as to work in core. All potential
113   *	matches are located by sorting the lines of each file
114   *	on the hash (called ``value''). In particular, this
115   *	collects the equivalence classes in file1 together.
116   *	Subroutine equiv replaces the value of each line in
117   *	file0 by the index of the first element of its
118   *	matching equivalence in (the reordered) file1.
119   *	To save space equiv squeezes file1 into a single
120   *	array member in which the equivalence classes
121   *	are simply concatenated, except that their first
122   *	members are flagged by changing sign.
123   *
124   *	Next the indices that point into member are unsorted into
125   *	array class according to the original order of file0.
126   *
127   *	The cleverness lies in routine stone. This marches
128   *	through the lines of file0, developing a vector klist
129   *	of "k-candidates". At step i a k-candidate is a matched
130   *	pair of lines x,y (x in file0 y in file1) such that
131   *	there is a common subsequence of length k
132   *	between the first i lines of file0 and the first y
133   *	lines of file1, but there is no such subsequence for
134   *	any smaller y. x is the earliest possible mate to y
135   *	that occurs in such a subsequence.
136   *
137   *	Whenever any of the members of the equivalence class of
138   *	lines in file1 matable to a line in file0 has serial number
139   *	less than the y of some k-candidate, that k-candidate
140   *	with the smallest such y is replaced. The new
141   *	k-candidate is chained (via pred) to the current
142   *	k-1 candidate so that the actual subsequence can
143   *	be recovered. When a member has serial number greater
144   *	that the y of all k-candidates, the klist is extended.
145   *	At the end, the longest subsequence is pulled out
146   *	and placed in the array J by unravel
147   *
148   *	With J in hand, the matches there recorded are
149   *	check'ed against reality to assure that no spurious
150   *	matches have crept in due to hashing. If they have,
151   *	they are broken, and "jackpot" is recorded--a harmless
152   *	matter except that a true match for a spuriously
153   *	mated line may now be unnecessarily reported as a change.
154   *
155   *	Much of the complexity of the program comes simply
156   *	from trying to minimize core utilization and
157   *	maximize the range of doable problems by dynamically
158   *	allocating what is needed and reusing what is not.
159   *	The core requirements for problems larger than somewhat
160   *	are (in words) 2*length(file0) + length(file1) +
161   *	3*(number of k-candidates installed),  typically about
162   *	6n words for files of length n.
163   */
164  
165  struct cand {
166  	int	x;
167  	int	y;
168  	int	pred;
169  };
170  
171  static struct line {
172  	int	serial;
173  	int	value;
174  } *file[2];
175  
176  /*
177   * The following struct is used to record change information when
178   * doing a "context" or "unified" diff.  (see routine "change" to
179   * understand the highly mnemonic field names)
180   */
181  struct context_vec {
182  	int	a;		/* start line in old file */
183  	int	b;		/* end line in old file */
184  	int	c;		/* start line in new file */
185  	int	d;		/* end line in new file */
186  };
187  
188  #define	diff_output	printf
189  static FILE	*opentemp(const char *);
190  static void	 output(char *, FILE *, char *, FILE *, int);
191  static void	 check(FILE *, FILE *, int);
192  static void	 range(int, int, const char *);
193  static void	 uni_range(int, int);
194  static void	 dump_context_vec(FILE *, FILE *, int);
195  static void	 dump_unified_vec(FILE *, FILE *, int);
196  static void	 prepare(int, FILE *, size_t, int);
197  static void	 prune(void);
198  static void	 equiv(struct line *, int, struct line *, int, int *);
199  static void	 unravel(int);
200  static void	 unsort(struct line *, int, int *);
201  static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
202  static void	 sort(struct line *, int);
203  static void	 print_header(const char *, const char *);
204  static int	 ignoreline(char *);
205  static int	 asciifile(FILE *);
206  static int	 fetch(long *, int, int, FILE *, int, int, int);
207  static int	 newcand(int, int, int);
208  static int	 search(int *, int, int);
209  static int	 skipline(FILE *);
210  static int	 isqrt(int);
211  static int	 stone(int *, int, int *, int *, int);
212  static int	 readhash(FILE *, int);
213  static int	 files_differ(FILE *, FILE *, int);
214  static char	*match_function(const long *, int, FILE *);
215  static char	*preadline(int, size_t, off_t);
216  
217  static int  *J;			/* will be overlaid on class */
218  static int  *class;		/* will be overlaid on file[0] */
219  static int  *klist;		/* will be overlaid on file[0] after class */
220  static int  *member;		/* will be overlaid on file[1] */
221  static int   clen;
222  static int   inifdef;		/* whether or not we are in a #ifdef block */
223  static int   len[2];
224  static int   pref, suff;	/* length of prefix and suffix */
225  static int   slen[2];
226  static int   anychange;
227  static long *ixnew;		/* will be overlaid on file[1] */
228  static long *ixold;		/* will be overlaid on klist */
229  static struct cand *clist;	/* merely a free storage pot for candidates */
230  static int   clistlen;		/* the length of clist */
231  static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
232  static u_char *chrtran;		/* translation table for case-folding */
233  static struct context_vec *context_vec_start;
234  static struct context_vec *context_vec_end;
235  static struct context_vec *context_vec_ptr;
236  
237  #define FUNCTION_CONTEXT_SIZE	55
238  static char lastbuf[FUNCTION_CONTEXT_SIZE];
239  static int lastline;
240  static int lastmatchline;
241  
242  
243  /*
244   * chrtran points to one of 2 translation tables: cup2low if folding upper to
245   * lower case clow2low if not folding case
246   */
247  static u_char clow2low[256] = {
248  	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
249  	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
250  	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
251  	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
252  	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
253  	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
254  	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
255  	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
256  	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
257  	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
258  	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
259  	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
260  	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
261  	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
262  	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
263  	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
264  	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
265  	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
266  	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
267  	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
268  	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
269  	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
270  	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
271  	0xfd, 0xfe, 0xff
272  };
273  
274  static u_char cup2low[256] = {
275  	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
276  	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
277  	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
278  	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
279  	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
280  	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
281  	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
282  	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
283  	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
284  	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
285  	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
286  	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
287  	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
288  	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
289  	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
290  	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
291  	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
292  	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
293  	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
294  	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
295  	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
296  	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
297  	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
298  	0xfd, 0xfe, 0xff
299  };
300  
301  int
302  diffreg(char *file1, char *file2, int flags, int capsicum)
303  {
304  	FILE *f1, *f2;
305  	int i, rval;
306  	int	ostdout = -1;
307  	int pr_pd, kq;
308  	struct kevent *e;
309  	cap_rights_t rights_ro;
310  
311  	e = NULL;
312  	kq = -1;
313  	f1 = f2 = NULL;
314  	rval = D_SAME;
315  	anychange = 0;
316  	lastline = 0;
317  	lastmatchline = 0;
318  	context_vec_ptr = context_vec_start - 1;
319  	if (flags & D_IGNORECASE)
320  		chrtran = cup2low;
321  	else
322  		chrtran = clow2low;
323  	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324  		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325  	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
326  		goto closem;
327  
328  	if (flags & D_EMPTY1)
329  		f1 = fopen(_PATH_DEVNULL, "r");
330  	else {
331  		if (!S_ISREG(stb1.st_mode)) {
332  			if ((f1 = opentemp(file1)) == NULL ||
333  			    fstat(fileno(f1), &stb1) < 0) {
334  				warn("%s", file1);
335  				status |= 2;
336  				goto closem;
337  			}
338  		} else if (strcmp(file1, "-") == 0)
339  			f1 = stdin;
340  		else
341  			f1 = fopen(file1, "r");
342  	}
343  	if (f1 == NULL) {
344  		warn("%s", file1);
345  		status |= 2;
346  		goto closem;
347  	}
348  
349  	if (flags & D_EMPTY2)
350  		f2 = fopen(_PATH_DEVNULL, "r");
351  	else {
352  		if (!S_ISREG(stb2.st_mode)) {
353  			if ((f2 = opentemp(file2)) == NULL ||
354  			    fstat(fileno(f2), &stb2) < 0) {
355  				warn("%s", file2);
356  				status |= 2;
357  				goto closem;
358  			}
359  		} else if (strcmp(file2, "-") == 0)
360  			f2 = stdin;
361  		else
362  			f2 = fopen(file2, "r");
363  	}
364  	if (f2 == NULL) {
365  		warn("%s", file2);
366  		status |= 2;
367  		goto closem;
368  	}
369  
370  	if (lflag) {
371  		/* redirect stdout to pr */
372  		int	 pfd[2];
373  		pid_t	pid;
374  		char	*header;
375  
376  		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377  		signal(SIGPIPE, SIG_IGN);
378  		fflush(stdout);
379  		rewind(stdout);
380  		pipe(pfd);
381  		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
382  		case -1:
383  			status |= 2;
384  			free(header);
385  			err(2, "No more processes");
386  		case 0:
387  			/* child */
388  			if (pfd[0] != STDIN_FILENO) {
389  				dup2(pfd[0], STDIN_FILENO);
390  				close(pfd[0]);
391  			}
392  			close(pfd[1]);
393  			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
394  			_exit(127);
395  		default:
396  
397  			/* parent */
398  			if (pfd[1] != STDOUT_FILENO) {
399  				ostdout = dup(STDOUT_FILENO);
400  				dup2(pfd[1], STDOUT_FILENO);
401  				close(pfd[1]);
402  			}
403  			close(pfd[0]);
404  			rewind(stdout);
405  			free(header);
406  			kq = kqueue();
407  			if (kq == -1)
408  				err(2, "kqueue");
409  			e = xmalloc(sizeof(struct kevent));
410  			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
411  			    NULL);
412  			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
413  				err(2, "kevent");
414  		}
415  	}
416  
417  	if (capsicum) {
418  		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
419  		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
420  		    && errno != ENOSYS)
421  			err(2, "unable to limit rights on: %s", file1);
422  		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
423  		    errno != ENOSYS)
424  			err(2, "unable to limit rights on: %s", file2);
425  		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
426  			/* stding has already been limited */
427  			if (caph_limit_stderr() == -1)
428  				err(2, "unable to limit stderr");
429  			if (caph_limit_stdout() == -1)
430  				err(2, "unable to limit stdout");
431  		} else if (caph_limit_stdio() == -1)
432  				err(2, "unable to limit stdio");
433  
434  		caph_cache_catpages();
435  		caph_cache_tzdata();
436  		if (cap_enter() < 0 && errno != ENOSYS)
437  			err(2, "unable to enter capability mode");
438  	}
439  
440  	switch (files_differ(f1, f2, flags)) {
441  	case 0:
442  		goto closem;
443  	case 1:
444  		break;
445  	default:
446  		/* error */
447  		status |= 2;
448  		goto closem;
449  	}
450  
451  	if ((flags & D_FORCEASCII) == 0 &&
452  	    (!asciifile(f1) || !asciifile(f2))) {
453  		rval = D_BINARY;
454  		status |= 1;
455  		goto closem;
456  	}
457  	prepare(0, f1, stb1.st_size, flags);
458  	prepare(1, f2, stb2.st_size, flags);
459  
460  	prune();
461  	sort(sfile[0], slen[0]);
462  	sort(sfile[1], slen[1]);
463  
464  	member = (int *)file[1];
465  	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
466  	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
467  
468  	class = (int *)file[0];
469  	unsort(sfile[0], slen[0], class);
470  	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
471  
472  	klist = xcalloc(slen[0] + 2, sizeof(*klist));
473  	clen = 0;
474  	clistlen = 100;
475  	clist = xcalloc(clistlen, sizeof(*clist));
476  	i = stone(class, slen[0], member, klist, flags);
477  	free(member);
478  	free(class);
479  
480  	J = xreallocarray(J, len[0] + 2, sizeof(*J));
481  	unravel(klist[i]);
482  	free(clist);
483  	free(klist);
484  
485  	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
486  	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
487  	check(f1, f2, flags);
488  	output(file1, f1, file2, f2, flags);
489  	if (ostdout != -1 && e != NULL) {
490  		/* close the pipe to pr and restore stdout */
491  		int wstatus;
492  
493  		fflush(stdout);
494  		if (ostdout != STDOUT_FILENO) {
495  			close(STDOUT_FILENO);
496  			dup2(ostdout, STDOUT_FILENO);
497  			close(ostdout);
498  		}
499  		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
500  			err(2, "kevent");
501  		wstatus = e[0].data;
502  		close(kq);
503  		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
504  			errx(2, "pr exited abnormally");
505  		else if (WIFSIGNALED(wstatus))
506  			errx(2, "pr killed by signal %d",
507  			    WTERMSIG(wstatus));
508  	}
509  
510  closem:
511  	if (anychange) {
512  		status |= 1;
513  		if (rval == D_SAME)
514  			rval = D_DIFFER;
515  	}
516  	if (f1 != NULL)
517  		fclose(f1);
518  	if (f2 != NULL)
519  		fclose(f2);
520  
521  	return (rval);
522  }
523  
524  /*
525   * Check to see if the given files differ.
526   * Returns 0 if they are the same, 1 if different, and -1 on error.
527   * XXX - could use code from cmp(1) [faster]
528   */
529  static int
530  files_differ(FILE *f1, FILE *f2, int flags)
531  {
532  	char buf1[BUFSIZ], buf2[BUFSIZ];
533  	size_t i, j;
534  
535  	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
536  	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
537  		return (1);
538  	for (;;) {
539  		i = fread(buf1, 1, sizeof(buf1), f1);
540  		j = fread(buf2, 1, sizeof(buf2), f2);
541  		if ((!i && ferror(f1)) || (!j && ferror(f2)))
542  			return (-1);
543  		if (i != j)
544  			return (1);
545  		if (i == 0)
546  			return (0);
547  		if (memcmp(buf1, buf2, i) != 0)
548  			return (1);
549  	}
550  }
551  
552  static FILE *
553  opentemp(const char *f)
554  {
555  	char buf[BUFSIZ], tempfile[PATH_MAX];
556  	ssize_t nread;
557  	int ifd, ofd;
558  
559  	if (strcmp(f, "-") == 0)
560  		ifd = STDIN_FILENO;
561  	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
562  		return (NULL);
563  
564  	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
565  
566  	if ((ofd = mkstemp(tempfile)) < 0) {
567  		close(ifd);
568  		return (NULL);
569  	}
570  	unlink(tempfile);
571  	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
572  		if (write(ofd, buf, nread) != nread) {
573  			close(ifd);
574  			close(ofd);
575  			return (NULL);
576  		}
577  	}
578  	close(ifd);
579  	lseek(ofd, (off_t)0, SEEK_SET);
580  	return (fdopen(ofd, "r"));
581  }
582  
583  char *
584  splice(char *dir, char *path)
585  {
586  	char *tail, *buf;
587  	size_t dirlen;
588  
589  	dirlen = strlen(dir);
590  	while (dirlen != 0 && dir[dirlen - 1] == '/')
591  	    dirlen--;
592  	if ((tail = strrchr(path, '/')) == NULL)
593  		tail = path;
594  	else
595  		tail++;
596  	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
597  	return (buf);
598  }
599  
600  static void
601  prepare(int i, FILE *fd, size_t filesize, int flags)
602  {
603  	struct line *p;
604  	int h;
605  	size_t sz, j;
606  
607  	rewind(fd);
608  
609  	sz = MIN(filesize, SIZE_MAX) / 25;
610  	if (sz < 100)
611  		sz = 100;
612  
613  	p = xcalloc(sz + 3, sizeof(*p));
614  	for (j = 0; (h = readhash(fd, flags));) {
615  		if (j == sz) {
616  			sz = sz * 3 / 2;
617  			p = xreallocarray(p, sz + 3, sizeof(*p));
618  		}
619  		p[++j].value = h;
620  	}
621  	len[i] = j;
622  	file[i] = p;
623  }
624  
625  static void
626  prune(void)
627  {
628  	int i, j;
629  
630  	for (pref = 0; pref < len[0] && pref < len[1] &&
631  	    file[0][pref + 1].value == file[1][pref + 1].value;
632  	    pref++)
633  		;
634  	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
635  	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
636  	    suff++)
637  		;
638  	for (j = 0; j < 2; j++) {
639  		sfile[j] = file[j] + pref;
640  		slen[j] = len[j] - pref - suff;
641  		for (i = 0; i <= slen[j]; i++)
642  			sfile[j][i].serial = i;
643  	}
644  }
645  
646  static void
647  equiv(struct line *a, int n, struct line *b, int m, int *c)
648  {
649  	int i, j;
650  
651  	i = j = 1;
652  	while (i <= n && j <= m) {
653  		if (a[i].value < b[j].value)
654  			a[i++].value = 0;
655  		else if (a[i].value == b[j].value)
656  			a[i++].value = j;
657  		else
658  			j++;
659  	}
660  	while (i <= n)
661  		a[i++].value = 0;
662  	b[m + 1].value = 0;
663  	j = 0;
664  	while (++j <= m) {
665  		c[j] = -b[j].serial;
666  		while (b[j + 1].value == b[j].value) {
667  			j++;
668  			c[j] = b[j].serial;
669  		}
670  	}
671  	c[j] = -1;
672  }
673  
674  /* Code taken from ping.c */
675  static int
676  isqrt(int n)
677  {
678  	int y, x = 1;
679  
680  	if (n == 0)
681  		return (0);
682  
683  	do { /* newton was a stinker */
684  		y = x;
685  		x = n / x;
686  		x += y;
687  		x /= 2;
688  	} while ((x - y) > 1 || (x - y) < -1);
689  
690  	return (x);
691  }
692  
693  static int
694  stone(int *a, int n, int *b, int *c, int flags)
695  {
696  	int i, k, y, j, l;
697  	int oldc, tc, oldl, sq;
698  	u_int numtries, bound;
699  
700  	if (flags & D_MINIMAL)
701  		bound = UINT_MAX;
702  	else {
703  		sq = isqrt(n);
704  		bound = MAX(256, sq);
705  	}
706  
707  	k = 0;
708  	c[0] = newcand(0, 0, 0);
709  	for (i = 1; i <= n; i++) {
710  		j = a[i];
711  		if (j == 0)
712  			continue;
713  		y = -b[j];
714  		oldl = 0;
715  		oldc = c[0];
716  		numtries = 0;
717  		do {
718  			if (y <= clist[oldc].y)
719  				continue;
720  			l = search(c, k, y);
721  			if (l != oldl + 1)
722  				oldc = c[l - 1];
723  			if (l <= k) {
724  				if (clist[c[l]].y <= y)
725  					continue;
726  				tc = c[l];
727  				c[l] = newcand(i, y, oldc);
728  				oldc = tc;
729  				oldl = l;
730  				numtries++;
731  			} else {
732  				c[l] = newcand(i, y, oldc);
733  				k++;
734  				break;
735  			}
736  		} while ((y = b[++j]) > 0 && numtries < bound);
737  	}
738  	return (k);
739  }
740  
741  static int
742  newcand(int x, int y, int pred)
743  {
744  	struct cand *q;
745  
746  	if (clen == clistlen) {
747  		clistlen = clistlen * 11 / 10;
748  		clist = xreallocarray(clist, clistlen, sizeof(*clist));
749  	}
750  	q = clist + clen;
751  	q->x = x;
752  	q->y = y;
753  	q->pred = pred;
754  	return (clen++);
755  }
756  
757  static int
758  search(int *c, int k, int y)
759  {
760  	int i, j, l, t;
761  
762  	if (clist[c[k]].y < y)	/* quick look for typical case */
763  		return (k + 1);
764  	i = 0;
765  	j = k + 1;
766  	for (;;) {
767  		l = (i + j) / 2;
768  		if (l <= i)
769  			break;
770  		t = clist[c[l]].y;
771  		if (t > y)
772  			j = l;
773  		else if (t < y)
774  			i = l;
775  		else
776  			return (l);
777  	}
778  	return (l + 1);
779  }
780  
781  static void
782  unravel(int p)
783  {
784  	struct cand *q;
785  	int i;
786  
787  	for (i = 0; i <= len[0]; i++)
788  		J[i] = i <= pref ? i :
789  		    i > len[0] - suff ? i + len[1] - len[0] : 0;
790  	for (q = clist + p; q->y != 0; q = clist + q->pred)
791  		J[q->x + pref] = q->y + pref;
792  }
793  
794  /*
795   * Check does double duty:
796   *  1.	ferret out any fortuitous correspondences due
797   *	to confounding by hashing (which result in "jackpot")
798   *  2.  collect random access indexes to the two files
799   */
800  static void
801  check(FILE *f1, FILE *f2, int flags)
802  {
803  	int i, j, jackpot, c, d;
804  	long ctold, ctnew;
805  
806  	rewind(f1);
807  	rewind(f2);
808  	j = 1;
809  	ixold[0] = ixnew[0] = 0;
810  	jackpot = 0;
811  	ctold = ctnew = 0;
812  	for (i = 1; i <= len[0]; i++) {
813  		if (J[i] == 0) {
814  			ixold[i] = ctold += skipline(f1);
815  			continue;
816  		}
817  		while (j < J[i]) {
818  			ixnew[j] = ctnew += skipline(f2);
819  			j++;
820  		}
821  		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
822  			for (;;) {
823  				c = getc(f1);
824  				d = getc(f2);
825  				/*
826  				 * GNU diff ignores a missing newline
827  				 * in one file for -b or -w.
828  				 */
829  				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
830  					if (c == EOF && d == '\n') {
831  						ctnew++;
832  						break;
833  					} else if (c == '\n' && d == EOF) {
834  						ctold++;
835  						break;
836  					}
837  				}
838  				ctold++;
839  				ctnew++;
840  				if (flags & D_STRIPCR) {
841  					if (c == '\r') {
842  						if ((c = getc(f1)) == '\n') {
843  							ctnew++;
844  							break;
845  						}
846  					}
847  					if (d == '\r') {
848  						if ((d = getc(f2)) == '\n') {
849  							ctold++;
850  							break;
851  						}
852  					}
853  				}
854  				if ((flags & D_FOLDBLANKS) && isspace(c) &&
855  				    isspace(d)) {
856  					do {
857  						if (c == '\n')
858  							break;
859  						ctold++;
860  					} while (isspace(c = getc(f1)));
861  					do {
862  						if (d == '\n')
863  							break;
864  						ctnew++;
865  					} while (isspace(d = getc(f2)));
866  				} else if ((flags & D_IGNOREBLANKS)) {
867  					while (isspace(c) && c != '\n') {
868  						c = getc(f1);
869  						ctold++;
870  					}
871  					while (isspace(d) && d != '\n') {
872  						d = getc(f2);
873  						ctnew++;
874  					}
875  				}
876  				if (chrtran[c] != chrtran[d]) {
877  					jackpot++;
878  					J[i] = 0;
879  					if (c != '\n' && c != EOF)
880  						ctold += skipline(f1);
881  					if (d != '\n' && c != EOF)
882  						ctnew += skipline(f2);
883  					break;
884  				}
885  				if (c == '\n' || c == EOF)
886  					break;
887  			}
888  		} else {
889  			for (;;) {
890  				ctold++;
891  				ctnew++;
892  				if ((c = getc(f1)) != (d = getc(f2))) {
893  					/* jackpot++; */
894  					J[i] = 0;
895  					if (c != '\n' && c != EOF)
896  						ctold += skipline(f1);
897  					if (d != '\n' && c != EOF)
898  						ctnew += skipline(f2);
899  					break;
900  				}
901  				if (c == '\n' || c == EOF)
902  					break;
903  			}
904  		}
905  		ixold[i] = ctold;
906  		ixnew[j] = ctnew;
907  		j++;
908  	}
909  	for (; j <= len[1]; j++) {
910  		ixnew[j] = ctnew += skipline(f2);
911  	}
912  	/*
913  	 * if (jackpot)
914  	 *	fprintf(stderr, "jackpot\n");
915  	 */
916  }
917  
918  /* shellsort CACM #201 */
919  static void
920  sort(struct line *a, int n)
921  {
922  	struct line *ai, *aim, w;
923  	int j, m = 0, k;
924  
925  	if (n == 0)
926  		return;
927  	for (j = 1; j <= n; j *= 2)
928  		m = 2 * j - 1;
929  	for (m /= 2; m != 0; m /= 2) {
930  		k = n - m;
931  		for (j = 1; j <= k; j++) {
932  			for (ai = &a[j]; ai > a; ai -= m) {
933  				aim = &ai[m];
934  				if (aim < ai)
935  					break;	/* wraparound */
936  				if (aim->value > ai[0].value ||
937  				    (aim->value == ai[0].value &&
938  					aim->serial > ai[0].serial))
939  					break;
940  				w.value = ai[0].value;
941  				ai[0].value = aim->value;
942  				aim->value = w.value;
943  				w.serial = ai[0].serial;
944  				ai[0].serial = aim->serial;
945  				aim->serial = w.serial;
946  			}
947  		}
948  	}
949  }
950  
951  static void
952  unsort(struct line *f, int l, int *b)
953  {
954  	int *a, i;
955  
956  	a = xcalloc(l + 1, sizeof(*a));
957  	for (i = 1; i <= l; i++)
958  		a[f[i].serial] = f[i].value;
959  	for (i = 1; i <= l; i++)
960  		b[i] = a[i];
961  	free(a);
962  }
963  
964  static int
965  skipline(FILE *f)
966  {
967  	int i, c;
968  
969  	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
970  		continue;
971  	return (i);
972  }
973  
974  static void
975  output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
976  {
977  	int m, i0, i1, j0, j1;
978  
979  	rewind(f1);
980  	rewind(f2);
981  	m = len[0];
982  	J[0] = 0;
983  	J[m + 1] = len[1] + 1;
984  	if (diff_format != D_EDIT) {
985  		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
986  			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
987  				i0++;
988  			j0 = J[i0 - 1] + 1;
989  			i1 = i0 - 1;
990  			while (i1 < m && J[i1 + 1] == 0)
991  				i1++;
992  			j1 = J[i1 + 1] - 1;
993  			J[i1] = j1;
994  			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
995  		}
996  	} else {
997  		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
998  			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
999  				i0--;
1000  			j0 = J[i0 + 1] - 1;
1001  			i1 = i0 + 1;
1002  			while (i1 > 1 && J[i1 - 1] == 0)
1003  				i1--;
1004  			j1 = J[i1 - 1] + 1;
1005  			J[i1] = j1;
1006  			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1007  		}
1008  	}
1009  	if (m == 0)
1010  		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011  	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1012  		for (;;) {
1013  #define	c i0
1014  			if ((c = getc(f1)) == EOF)
1015  				return;
1016  			diff_output("%c", c);
1017  		}
1018  #undef c
1019  	}
1020  	if (anychange != 0) {
1021  		if (diff_format == D_CONTEXT)
1022  			dump_context_vec(f1, f2, flags);
1023  		else if (diff_format == D_UNIFIED)
1024  			dump_unified_vec(f1, f2, flags);
1025  	}
1026  }
1027  
1028  static void
1029  range(int a, int b, const char *separator)
1030  {
1031  	diff_output("%d", a > b ? b : a);
1032  	if (a < b)
1033  		diff_output("%s%d", separator, b);
1034  }
1035  
1036  static void
1037  uni_range(int a, int b)
1038  {
1039  	if (a < b)
1040  		diff_output("%d,%d", a, b - a + 1);
1041  	else if (a == b)
1042  		diff_output("%d", b);
1043  	else
1044  		diff_output("%d,0", b);
1045  }
1046  
1047  static char *
1048  preadline(int fd, size_t rlen, off_t off)
1049  {
1050  	char *line;
1051  	ssize_t nr;
1052  
1053  	line = xmalloc(rlen + 1);
1054  	if ((nr = pread(fd, line, rlen, off)) < 0)
1055  		err(2, "preadline");
1056  	if (nr > 0 && line[nr-1] == '\n')
1057  		nr--;
1058  	line[nr] = '\0';
1059  	return (line);
1060  }
1061  
1062  static int
1063  ignoreline(char *line)
1064  {
1065  	int ret;
1066  
1067  	ret = regexec(&ignore_re, line, 0, NULL, 0);
1068  	free(line);
1069  	return (ret == 0);	/* if it matched, it should be ignored. */
1070  }
1071  
1072  /*
1073   * Indicate that there is a difference between lines a and b of the from file
1074   * to get to lines c to d of the to file.  If a is greater then b then there
1075   * are no lines in the from file involved and this means that there were
1076   * lines appended (beginning at b).  If c is greater than d then there are
1077   * lines missing from the to file.
1078   */
1079  static void
1080  change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1081      int *pflags)
1082  {
1083  	static size_t max_context = 64;
1084  	long curpos;
1085  	int i, nc, f;
1086  	const char *walk;
1087  
1088  restart:
1089  	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1090  	    a > b && c > d)
1091  		return;
1092  	if (ignore_pats != NULL) {
1093  		char *line;
1094  		/*
1095  		 * All lines in the change, insert, or delete must
1096  		 * match an ignore pattern for the change to be
1097  		 * ignored.
1098  		 */
1099  		if (a <= b) {		/* Changes and deletes. */
1100  			for (i = a; i <= b; i++) {
1101  				line = preadline(fileno(f1),
1102  				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1103  				if (!ignoreline(line))
1104  					goto proceed;
1105  			}
1106  		}
1107  		if (a > b || c <= d) {	/* Changes and inserts. */
1108  			for (i = c; i <= d; i++) {
1109  				line = preadline(fileno(f2),
1110  				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1111  				if (!ignoreline(line))
1112  					goto proceed;
1113  			}
1114  		}
1115  		return;
1116  	}
1117  proceed:
1118  	if (*pflags & D_HEADER) {
1119  		diff_output("%s %s %s\n", diffargs, file1, file2);
1120  		*pflags &= ~D_HEADER;
1121  	}
1122  	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1123  		/*
1124  		 * Allocate change records as needed.
1125  		 */
1126  		if (context_vec_ptr == context_vec_end - 1) {
1127  			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1128  			max_context <<= 1;
1129  			context_vec_start = xreallocarray(context_vec_start,
1130  			    max_context, sizeof(*context_vec_start));
1131  			context_vec_end = context_vec_start + max_context;
1132  			context_vec_ptr = context_vec_start + offset;
1133  		}
1134  		if (anychange == 0) {
1135  			/*
1136  			 * Print the context/unidiff header first time through.
1137  			 */
1138  			print_header(file1, file2);
1139  			anychange = 1;
1140  		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1141  		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1142  			/*
1143  			 * If this change is more than 'diff_context' lines from the
1144  			 * previous change, dump the record and reset it.
1145  			 */
1146  			if (diff_format == D_CONTEXT)
1147  				dump_context_vec(f1, f2, *pflags);
1148  			else
1149  				dump_unified_vec(f1, f2, *pflags);
1150  		}
1151  		context_vec_ptr++;
1152  		context_vec_ptr->a = a;
1153  		context_vec_ptr->b = b;
1154  		context_vec_ptr->c = c;
1155  		context_vec_ptr->d = d;
1156  		return;
1157  	}
1158  	if (anychange == 0)
1159  		anychange = 1;
1160  	switch (diff_format) {
1161  	case D_BRIEF:
1162  		return;
1163  	case D_NORMAL:
1164  	case D_EDIT:
1165  		range(a, b, ",");
1166  		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1167  		if (diff_format == D_NORMAL)
1168  			range(c, d, ",");
1169  		diff_output("\n");
1170  		break;
1171  	case D_REVERSE:
1172  		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1173  		range(a, b, " ");
1174  		diff_output("\n");
1175  		break;
1176  	case D_NREVERSE:
1177  		if (a > b)
1178  			diff_output("a%d %d\n", b, d - c + 1);
1179  		else {
1180  			diff_output("d%d %d\n", a, b - a + 1);
1181  			if (!(c > d))
1182  				/* add changed lines */
1183  				diff_output("a%d %d\n", b, d - c + 1);
1184  		}
1185  		break;
1186  	}
1187  	if (diff_format == D_GFORMAT) {
1188  		curpos = ftell(f1);
1189  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1190  		nc = ixold[a > b ? b : a - 1] - curpos;
1191  		for (i = 0; i < nc; i++)
1192  			diff_output("%c", getc(f1));
1193  		for (walk = group_format; *walk != '\0'; walk++) {
1194  			if (*walk == '%') {
1195  				walk++;
1196  				switch (*walk) {
1197  				case '<':
1198  					fetch(ixold, a, b, f1, '<', 1, *pflags);
1199  					break;
1200  				case '>':
1201  					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1202  					break;
1203  				default:
1204  					diff_output("%%%c", *walk);
1205  					break;
1206  				}
1207  				continue;
1208  			}
1209  			diff_output("%c", *walk);
1210  		}
1211  	}
1212  	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1213  		fetch(ixold, a, b, f1, '<', 1, *pflags);
1214  		if (a <= b && c <= d && diff_format == D_NORMAL)
1215  			diff_output("---\n");
1216  	}
1217  	f = 0;
1218  	if (diff_format != D_GFORMAT)
1219  		f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1220  	if (f != 0 && diff_format == D_EDIT) {
1221  		/*
1222  		 * A non-zero return value for D_EDIT indicates that the
1223  		 * last line printed was a bare dot (".") that has been
1224  		 * escaped as ".." to prevent ed(1) from misinterpreting
1225  		 * it.  We have to add a substitute command to change this
1226  		 * back and restart where we left off.
1227  		 */
1228  		diff_output(".\n");
1229  		diff_output("%ds/.//\n", a + f - 1);
1230  		b = a + f - 1;
1231  		a = b + 1;
1232  		c += f;
1233  		goto restart;
1234  	}
1235  	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1236  		diff_output(".\n");
1237  	if (inifdef) {
1238  		diff_output("#endif /* %s */\n", ifdefname);
1239  		inifdef = 0;
1240  	}
1241  }
1242  
1243  static int
1244  fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1245  {
1246  	int i, j, c, lastc, col, nc;
1247  	int	newcol;
1248  
1249  	/*
1250  	 * When doing #ifdef's, copy down to current line
1251  	 * if this is the first file, so that stuff makes it to output.
1252  	 */
1253  	if ((diff_format == D_IFDEF) && oldfile) {
1254  		long curpos = ftell(lb);
1255  		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1256  		nc = f[a > b ? b : a - 1] - curpos;
1257  		for (i = 0; i < nc; i++)
1258  			diff_output("%c", getc(lb));
1259  	}
1260  	if (a > b)
1261  		return (0);
1262  	if (diff_format == D_IFDEF) {
1263  		if (inifdef) {
1264  			diff_output("#else /* %s%s */\n",
1265  			    oldfile == 1 ? "!" : "", ifdefname);
1266  		} else {
1267  			if (oldfile)
1268  				diff_output("#ifndef %s\n", ifdefname);
1269  			else
1270  				diff_output("#ifdef %s\n", ifdefname);
1271  		}
1272  		inifdef = 1 + oldfile;
1273  	}
1274  	for (i = a; i <= b; i++) {
1275  		fseek(lb, f[i - 1], SEEK_SET);
1276  		nc = f[i] - f[i - 1];
1277  		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1278  		    ch != '\0') {
1279  			diff_output("%c", ch);
1280  			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1281  			    || diff_format == D_UNIFIED))
1282  				diff_output("\t");
1283  			else if (diff_format != D_UNIFIED)
1284  				diff_output(" ");
1285  		}
1286  		col = 0;
1287  		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1288  			if ((c = getc(lb)) == EOF) {
1289  				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1290  				    diff_format == D_NREVERSE)
1291  					warnx("No newline at end of file");
1292  				else
1293  					diff_output("\n\\ No newline at end of "
1294  					    "file\n");
1295  				return (0);
1296  			}
1297  			if (c == '\t' && (flags & D_EXPANDTABS)) {
1298  				newcol = ((col/tabsize)+1)*tabsize;
1299  				do {
1300  					diff_output(" ");
1301  				} while (++col < newcol);
1302  			} else {
1303  				if (diff_format == D_EDIT && j == 1 && c == '\n'
1304  				    && lastc == '.') {
1305  					/*
1306  					 * Don't print a bare "." line
1307  					 * since that will confuse ed(1).
1308  					 * Print ".." instead and return,
1309  					 * giving the caller an offset
1310  					 * from which to restart.
1311  					 */
1312  					diff_output(".\n");
1313  					return (i - a + 1);
1314  				}
1315  				diff_output("%c", c);
1316  				col++;
1317  			}
1318  		}
1319  	}
1320  	return (0);
1321  }
1322  
1323  /*
1324   * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1325   */
1326  static int
1327  readhash(FILE *f, int flags)
1328  {
1329  	int i, t, space;
1330  	int sum;
1331  
1332  	sum = 1;
1333  	space = 0;
1334  	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1335  		if (flags & D_IGNORECASE)
1336  			for (i = 0; (t = getc(f)) != '\n'; i++) {
1337  				if (flags & D_STRIPCR && t == '\r') {
1338  					t = getc(f);
1339  					if (t == '\n')
1340  						break;
1341  					ungetc(t, f);
1342  				}
1343  				if (t == EOF) {
1344  					if (i == 0)
1345  						return (0);
1346  					break;
1347  				}
1348  				sum = sum * 127 + chrtran[t];
1349  			}
1350  		else
1351  			for (i = 0; (t = getc(f)) != '\n'; i++) {
1352  				if (flags & D_STRIPCR && t == '\r') {
1353  					t = getc(f);
1354  					if (t == '\n')
1355  						break;
1356  					ungetc(t, f);
1357  				}
1358  				if (t == EOF) {
1359  					if (i == 0)
1360  						return (0);
1361  					break;
1362  				}
1363  				sum = sum * 127 + t;
1364  			}
1365  	} else {
1366  		for (i = 0;;) {
1367  			switch (t = getc(f)) {
1368  			case '\r':
1369  			case '\t':
1370  			case '\v':
1371  			case '\f':
1372  			case ' ':
1373  				space++;
1374  				continue;
1375  			default:
1376  				if (space && (flags & D_IGNOREBLANKS) == 0) {
1377  					i++;
1378  					space = 0;
1379  				}
1380  				sum = sum * 127 + chrtran[t];
1381  				i++;
1382  				continue;
1383  			case EOF:
1384  				if (i == 0)
1385  					return (0);
1386  				/* FALLTHROUGH */
1387  			case '\n':
1388  				break;
1389  			}
1390  			break;
1391  		}
1392  	}
1393  	/*
1394  	 * There is a remote possibility that we end up with a zero sum.
1395  	 * Zero is used as an EOF marker, so return 1 instead.
1396  	 */
1397  	return (sum == 0 ? 1 : sum);
1398  }
1399  
1400  static int
1401  asciifile(FILE *f)
1402  {
1403  	unsigned char buf[BUFSIZ];
1404  	size_t cnt;
1405  
1406  	if (f == NULL)
1407  		return (1);
1408  
1409  	rewind(f);
1410  	cnt = fread(buf, 1, sizeof(buf), f);
1411  	return (memchr(buf, '\0', cnt) == NULL);
1412  }
1413  
1414  #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1415  
1416  static char *
1417  match_function(const long *f, int pos, FILE *fp)
1418  {
1419  	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1420  	size_t nc;
1421  	int last = lastline;
1422  	const char *state = NULL;
1423  
1424  	lastline = pos;
1425  	while (pos > last) {
1426  		fseek(fp, f[pos - 1], SEEK_SET);
1427  		nc = f[pos] - f[pos - 1];
1428  		if (nc >= sizeof(buf))
1429  			nc = sizeof(buf) - 1;
1430  		nc = fread(buf, 1, nc, fp);
1431  		if (nc > 0) {
1432  			buf[nc] = '\0';
1433  			buf[strcspn(buf, "\n")] = '\0';
1434  			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1435  				if (begins_with(buf, "private:")) {
1436  					if (!state)
1437  						state = " (private)";
1438  				} else if (begins_with(buf, "protected:")) {
1439  					if (!state)
1440  						state = " (protected)";
1441  				} else if (begins_with(buf, "public:")) {
1442  					if (!state)
1443  						state = " (public)";
1444  				} else {
1445  					strlcpy(lastbuf, buf, sizeof lastbuf);
1446  					if (state)
1447  						strlcat(lastbuf, state,
1448  						    sizeof lastbuf);
1449  					lastmatchline = pos;
1450  					return lastbuf;
1451  				}
1452  			}
1453  		}
1454  		pos--;
1455  	}
1456  	return lastmatchline > 0 ? lastbuf : NULL;
1457  }
1458  
1459  /* dump accumulated "context" diff changes */
1460  static void
1461  dump_context_vec(FILE *f1, FILE *f2, int flags)
1462  {
1463  	struct context_vec *cvp = context_vec_start;
1464  	int lowa, upb, lowc, upd, do_output;
1465  	int a, b, c, d;
1466  	char ch, *f;
1467  
1468  	if (context_vec_start > context_vec_ptr)
1469  		return;
1470  
1471  	b = d = 0;		/* gcc */
1472  	lowa = MAX(1, cvp->a - diff_context);
1473  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1474  	lowc = MAX(1, cvp->c - diff_context);
1475  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1476  
1477  	diff_output("***************");
1478  	if ((flags & D_PROTOTYPE)) {
1479  		f = match_function(ixold, lowa-1, f1);
1480  		if (f != NULL)
1481  			diff_output(" %s", f);
1482  	}
1483  	diff_output("\n*** ");
1484  	range(lowa, upb, ",");
1485  	diff_output(" ****\n");
1486  
1487  	/*
1488  	 * Output changes to the "old" file.  The first loop suppresses
1489  	 * output if there were no changes to the "old" file (we'll see
1490  	 * the "old" lines as context in the "new" list).
1491  	 */
1492  	do_output = 0;
1493  	for (; cvp <= context_vec_ptr; cvp++)
1494  		if (cvp->a <= cvp->b) {
1495  			cvp = context_vec_start;
1496  			do_output++;
1497  			break;
1498  		}
1499  	if (do_output) {
1500  		while (cvp <= context_vec_ptr) {
1501  			a = cvp->a;
1502  			b = cvp->b;
1503  			c = cvp->c;
1504  			d = cvp->d;
1505  
1506  			if (a <= b && c <= d)
1507  				ch = 'c';
1508  			else
1509  				ch = (a <= b) ? 'd' : 'a';
1510  
1511  			if (ch == 'a')
1512  				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1513  			else {
1514  				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515  				fetch(ixold, a, b, f1,
1516  				    ch == 'c' ? '!' : '-', 0, flags);
1517  			}
1518  			lowa = b + 1;
1519  			cvp++;
1520  		}
1521  		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1522  	}
1523  	/* output changes to the "new" file */
1524  	diff_output("--- ");
1525  	range(lowc, upd, ",");
1526  	diff_output(" ----\n");
1527  
1528  	do_output = 0;
1529  	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1530  		if (cvp->c <= cvp->d) {
1531  			cvp = context_vec_start;
1532  			do_output++;
1533  			break;
1534  		}
1535  	if (do_output) {
1536  		while (cvp <= context_vec_ptr) {
1537  			a = cvp->a;
1538  			b = cvp->b;
1539  			c = cvp->c;
1540  			d = cvp->d;
1541  
1542  			if (a <= b && c <= d)
1543  				ch = 'c';
1544  			else
1545  				ch = (a <= b) ? 'd' : 'a';
1546  
1547  			if (ch == 'd')
1548  				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1549  			else {
1550  				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1551  				fetch(ixnew, c, d, f2,
1552  				    ch == 'c' ? '!' : '+', 0, flags);
1553  			}
1554  			lowc = d + 1;
1555  			cvp++;
1556  		}
1557  		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1558  	}
1559  	context_vec_ptr = context_vec_start - 1;
1560  }
1561  
1562  /* dump accumulated "unified" diff changes */
1563  static void
1564  dump_unified_vec(FILE *f1, FILE *f2, int flags)
1565  {
1566  	struct context_vec *cvp = context_vec_start;
1567  	int lowa, upb, lowc, upd;
1568  	int a, b, c, d;
1569  	char ch, *f;
1570  
1571  	if (context_vec_start > context_vec_ptr)
1572  		return;
1573  
1574  	b = d = 0;		/* gcc */
1575  	lowa = MAX(1, cvp->a - diff_context);
1576  	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1577  	lowc = MAX(1, cvp->c - diff_context);
1578  	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1579  
1580  	diff_output("@@ -");
1581  	uni_range(lowa, upb);
1582  	diff_output(" +");
1583  	uni_range(lowc, upd);
1584  	diff_output(" @@");
1585  	if ((flags & D_PROTOTYPE)) {
1586  		f = match_function(ixold, lowa-1, f1);
1587  		if (f != NULL)
1588  			diff_output(" %s", f);
1589  	}
1590  	diff_output("\n");
1591  
1592  	/*
1593  	 * Output changes in "unified" diff format--the old and new lines
1594  	 * are printed together.
1595  	 */
1596  	for (; cvp <= context_vec_ptr; cvp++) {
1597  		a = cvp->a;
1598  		b = cvp->b;
1599  		c = cvp->c;
1600  		d = cvp->d;
1601  
1602  		/*
1603  		 * c: both new and old changes
1604  		 * d: only changes in the old file
1605  		 * a: only changes in the new file
1606  		 */
1607  		if (a <= b && c <= d)
1608  			ch = 'c';
1609  		else
1610  			ch = (a <= b) ? 'd' : 'a';
1611  
1612  		switch (ch) {
1613  		case 'c':
1614  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1615  			fetch(ixold, a, b, f1, '-', 0, flags);
1616  			fetch(ixnew, c, d, f2, '+', 0, flags);
1617  			break;
1618  		case 'd':
1619  			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1620  			fetch(ixold, a, b, f1, '-', 0, flags);
1621  			break;
1622  		case 'a':
1623  			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1624  			fetch(ixnew, c, d, f2, '+', 0, flags);
1625  			break;
1626  		}
1627  		lowa = b + 1;
1628  		lowc = d + 1;
1629  	}
1630  	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1631  
1632  	context_vec_ptr = context_vec_start - 1;
1633  }
1634  
1635  static void
1636  print_header(const char *file1, const char *file2)
1637  {
1638  	const char *time_format;
1639  	char buf1[256];
1640  	char buf2[256];
1641  	char end1[10];
1642  	char end2[10];
1643  	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1644  	int nsec1 = stb1.st_mtim.tv_nsec;
1645  	int nsec2 = stb2.st_mtim.tv_nsec;
1646  
1647  	time_format = "%Y-%m-%d %H:%M:%S";
1648  
1649  	if (cflag)
1650  		time_format = "%c";
1651  	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1652  	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1653  	strftime(buf1, 256, time_format, tm_ptr1);
1654  	strftime(buf2, 256, time_format, tm_ptr2);
1655  	if (!cflag) {
1656  		strftime(end1, 10, "%z", tm_ptr1);
1657  		strftime(end2, 10, "%z", tm_ptr2);
1658  		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1659  		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1660  	}
1661  	if (label[0] != NULL)
1662  		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1663  		    label[0]);
1664  	else
1665  		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1666  		    file1, buf1);
1667  	if (label[1] != NULL)
1668  		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1669  		    label[1]);
1670  	else
1671  		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1672  		    file2, buf2);
1673  }
1674