xref: /freebsd/usr.bin/diff/diffreg.c (revision 094fc1ed0f2627525c7b0342efcbad5be7a8546a)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <regex.h>
84 #include <stddef.h>
85 #include <stdint.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <unistd.h>
90 #include <limits.h>
91 #include <signal.h>
92 
93 #include "diff.h"
94 #include "xmalloc.h"
95 
96 #define _PATH_PR "/usr/bin/pr"
97 
98 /*
99  * diff - compare two files.
100  */
101 
102 /*
103  *	Uses an algorithm due to Harold Stone, which finds
104  *	a pair of longest identical subsequences in the two
105  *	files.
106  *
107  *	The major goal is to generate the match vector J.
108  *	J[i] is the index of the line in file1 corresponding
109  *	to line i file0. J[i] = 0 if there is no
110  *	such line in file1.
111  *
112  *	Lines are hashed so as to work in core. All potential
113  *	matches are located by sorting the lines of each file
114  *	on the hash (called ``value''). In particular, this
115  *	collects the equivalence classes in file1 together.
116  *	Subroutine equiv replaces the value of each line in
117  *	file0 by the index of the first element of its
118  *	matching equivalence in (the reordered) file1.
119  *	To save space equiv squeezes file1 into a single
120  *	array member in which the equivalence classes
121  *	are simply concatenated, except that their first
122  *	members are flagged by changing sign.
123  *
124  *	Next the indices that point into member are unsorted into
125  *	array class according to the original order of file0.
126  *
127  *	The cleverness lies in routine stone. This marches
128  *	through the lines of file0, developing a vector klist
129  *	of "k-candidates". At step i a k-candidate is a matched
130  *	pair of lines x,y (x in file0 y in file1) such that
131  *	there is a common subsequence of length k
132  *	between the first i lines of file0 and the first y
133  *	lines of file1, but there is no such subsequence for
134  *	any smaller y. x is the earliest possible mate to y
135  *	that occurs in such a subsequence.
136  *
137  *	Whenever any of the members of the equivalence class of
138  *	lines in file1 matable to a line in file0 has serial number
139  *	less than the y of some k-candidate, that k-candidate
140  *	with the smallest such y is replaced. The new
141  *	k-candidate is chained (via pred) to the current
142  *	k-1 candidate so that the actual subsequence can
143  *	be recovered. When a member has serial number greater
144  *	that the y of all k-candidates, the klist is extended.
145  *	At the end, the longest subsequence is pulled out
146  *	and placed in the array J by unravel
147  *
148  *	With J in hand, the matches there recorded are
149  *	check'ed against reality to assure that no spurious
150  *	matches have crept in due to hashing. If they have,
151  *	they are broken, and "jackpot" is recorded--a harmless
152  *	matter except that a true match for a spuriously
153  *	mated line may now be unnecessarily reported as a change.
154  *
155  *	Much of the complexity of the program comes simply
156  *	from trying to minimize core utilization and
157  *	maximize the range of doable problems by dynamically
158  *	allocating what is needed and reusing what is not.
159  *	The core requirements for problems larger than somewhat
160  *	are (in words) 2*length(file0) + length(file1) +
161  *	3*(number of k-candidates installed),  typically about
162  *	6n words for files of length n.
163  */
164 
165 struct cand {
166 	int	x;
167 	int	y;
168 	int	pred;
169 };
170 
171 static struct line {
172 	int	serial;
173 	int	value;
174 } *file[2];
175 
176 /*
177  * The following struct is used to record change information when
178  * doing a "context" or "unified" diff.  (see routine "change" to
179  * understand the highly mnemonic field names)
180  */
181 struct context_vec {
182 	int	a;		/* start line in old file */
183 	int	b;		/* end line in old file */
184 	int	c;		/* start line in new file */
185 	int	d;		/* end line in new file */
186 };
187 
188 #define	diff_output	printf
189 static FILE	*opentemp(const char *);
190 static void	 output(char *, FILE *, char *, FILE *, int);
191 static void	 check(FILE *, FILE *, int);
192 static void	 range(int, int, const char *);
193 static void	 uni_range(int, int);
194 static void	 dump_context_vec(FILE *, FILE *, int);
195 static void	 dump_unified_vec(FILE *, FILE *, int);
196 static void	 prepare(int, FILE *, size_t, int);
197 static void	 prune(void);
198 static void	 equiv(struct line *, int, struct line *, int, int *);
199 static void	 unravel(int);
200 static void	 unsort(struct line *, int, int *);
201 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
202 static void	 sort(struct line *, int);
203 static void	 print_header(const char *, const char *);
204 static int	 ignoreline(char *);
205 static int	 asciifile(FILE *);
206 static int	 fetch(long *, int, int, FILE *, int, int, int);
207 static int	 newcand(int, int, int);
208 static int	 search(int *, int, int);
209 static int	 skipline(FILE *);
210 static int	 isqrt(int);
211 static int	 stone(int *, int, int *, int *, int);
212 static int	 readhash(FILE *, int);
213 static int	 files_differ(FILE *, FILE *, int);
214 static char	*match_function(const long *, int, FILE *);
215 static char	*preadline(int, size_t, off_t);
216 
217 static int  *J;			/* will be overlaid on class */
218 static int  *class;		/* will be overlaid on file[0] */
219 static int  *klist;		/* will be overlaid on file[0] after class */
220 static int  *member;		/* will be overlaid on file[1] */
221 static int   clen;
222 static int   inifdef;		/* whether or not we are in a #ifdef block */
223 static int   len[2];
224 static int   pref, suff;	/* length of prefix and suffix */
225 static int   slen[2];
226 static int   anychange;
227 static long *ixnew;		/* will be overlaid on file[1] */
228 static long *ixold;		/* will be overlaid on klist */
229 static struct cand *clist;	/* merely a free storage pot for candidates */
230 static int   clistlen;		/* the length of clist */
231 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
232 static u_char *chrtran;		/* translation table for case-folding */
233 static struct context_vec *context_vec_start;
234 static struct context_vec *context_vec_end;
235 static struct context_vec *context_vec_ptr;
236 
237 #define FUNCTION_CONTEXT_SIZE	55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
239 static int lastline;
240 static int lastmatchline;
241 
242 
243 /*
244  * chrtran points to one of 2 translation tables: cup2low if folding upper to
245  * lower case clow2low if not folding case
246  */
247 static u_char clow2low[256] = {
248 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
249 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
250 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
251 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
252 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
253 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
254 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
255 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
256 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
257 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
258 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
259 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
260 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
261 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
262 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
263 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
264 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
265 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
266 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
267 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
268 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
269 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
270 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
271 	0xfd, 0xfe, 0xff
272 };
273 
274 static u_char cup2low[256] = {
275 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
276 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
277 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
278 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
279 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
280 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
281 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
282 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
283 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
284 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
285 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
286 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
287 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
288 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
289 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
290 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
291 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
292 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
293 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
294 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
295 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
296 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
297 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
298 	0xfd, 0xfe, 0xff
299 };
300 
301 int
302 diffreg(char *file1, char *file2, int flags, int capsicum)
303 {
304 	FILE *f1, *f2;
305 	int i, rval;
306 	int	ostdout = -1;
307 	int pr_pd, kq;
308 	struct kevent *e;
309 	cap_rights_t rights_ro;
310 
311 	e = NULL;
312 	kq = -1;
313 	f1 = f2 = NULL;
314 	rval = D_SAME;
315 	anychange = 0;
316 	lastline = 0;
317 	lastmatchline = 0;
318 	context_vec_ptr = context_vec_start - 1;
319 	if (flags & D_IGNORECASE)
320 		chrtran = cup2low;
321 	else
322 		chrtran = clow2low;
323 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
326 		goto closem;
327 
328 	if (flags & D_EMPTY1)
329 		f1 = fopen(_PATH_DEVNULL, "r");
330 	else {
331 		if (!S_ISREG(stb1.st_mode)) {
332 			if ((f1 = opentemp(file1)) == NULL ||
333 			    fstat(fileno(f1), &stb1) < 0) {
334 				warn("%s", file1);
335 				status |= 2;
336 				goto closem;
337 			}
338 		} else if (strcmp(file1, "-") == 0)
339 			f1 = stdin;
340 		else
341 			f1 = fopen(file1, "r");
342 	}
343 	if (f1 == NULL) {
344 		warn("%s", file1);
345 		status |= 2;
346 		goto closem;
347 	}
348 
349 	if (flags & D_EMPTY2)
350 		f2 = fopen(_PATH_DEVNULL, "r");
351 	else {
352 		if (!S_ISREG(stb2.st_mode)) {
353 			if ((f2 = opentemp(file2)) == NULL ||
354 			    fstat(fileno(f2), &stb2) < 0) {
355 				warn("%s", file2);
356 				status |= 2;
357 				goto closem;
358 			}
359 		} else if (strcmp(file2, "-") == 0)
360 			f2 = stdin;
361 		else
362 			f2 = fopen(file2, "r");
363 	}
364 	if (f2 == NULL) {
365 		warn("%s", file2);
366 		status |= 2;
367 		goto closem;
368 	}
369 
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int	 pfd[2];
373 		pid_t	pid;
374 		char	*header;
375 
376 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		signal(SIGPIPE, SIG_IGN);
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
382 		case -1:
383 			status |= 2;
384 			free(header);
385 			err(2, "No more processes");
386 		case 0:
387 			/* child */
388 			if (pfd[0] != STDIN_FILENO) {
389 				dup2(pfd[0], STDIN_FILENO);
390 				close(pfd[0]);
391 			}
392 			close(pfd[1]);
393 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
394 			_exit(127);
395 		default:
396 
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			free(header);
406 			kq = kqueue();
407 			if (kq == -1)
408 				err(2, "kqueue");
409 			e = xmalloc(sizeof(struct kevent));
410 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
411 			    NULL);
412 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
413 				err(2, "kevent");
414 		}
415 	}
416 
417 	if (capsicum) {
418 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
419 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
420 		    && errno != ENOSYS)
421 			err(2, "unable to limit rights on: %s", file1);
422 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
423 		    errno != ENOSYS)
424 			err(2, "unable to limit rights on: %s", file2);
425 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
426 			/* stding has already been limited */
427 			if (caph_limit_stderr() == -1)
428 				err(2, "unable to limit stderr");
429 			if (caph_limit_stdout() == -1)
430 				err(2, "unable to limit stdout");
431 		} else if (caph_limit_stdio() == -1)
432 				err(2, "unable to limit stdio");
433 
434 		caph_cache_catpages();
435 		caph_cache_tzdata();
436 		if (cap_enter() < 0 && errno != ENOSYS)
437 			err(2, "unable to enter capability mode");
438 	}
439 
440 	switch (files_differ(f1, f2, flags)) {
441 	case 0:
442 		goto closem;
443 	case 1:
444 		break;
445 	default:
446 		/* error */
447 		status |= 2;
448 		goto closem;
449 	}
450 
451 	if ((flags & D_FORCEASCII) == 0 &&
452 	    (!asciifile(f1) || !asciifile(f2))) {
453 		rval = D_BINARY;
454 		status |= 1;
455 		goto closem;
456 	}
457 	prepare(0, f1, stb1.st_size, flags);
458 	prepare(1, f2, stb2.st_size, flags);
459 
460 	prune();
461 	sort(sfile[0], slen[0]);
462 	sort(sfile[1], slen[1]);
463 
464 	member = (int *)file[1];
465 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
466 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
467 
468 	class = (int *)file[0];
469 	unsort(sfile[0], slen[0], class);
470 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
471 
472 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
473 	clen = 0;
474 	clistlen = 100;
475 	clist = xcalloc(clistlen, sizeof(*clist));
476 	i = stone(class, slen[0], member, klist, flags);
477 	free(member);
478 	free(class);
479 
480 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
481 	unravel(klist[i]);
482 	free(clist);
483 	free(klist);
484 
485 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
486 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
487 	check(f1, f2, flags);
488 	output(file1, f1, file2, f2, flags);
489 	if (ostdout != -1 && e != NULL) {
490 		/* close the pipe to pr and restore stdout */
491 		int wstatus;
492 
493 		fflush(stdout);
494 		if (ostdout != STDOUT_FILENO) {
495 			close(STDOUT_FILENO);
496 			dup2(ostdout, STDOUT_FILENO);
497 			close(ostdout);
498 		}
499 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
500 			err(2, "kevent");
501 		wstatus = e[0].data;
502 		close(kq);
503 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
504 			errx(2, "pr exited abnormally");
505 		else if (WIFSIGNALED(wstatus))
506 			errx(2, "pr killed by signal %d",
507 			    WTERMSIG(wstatus));
508 	}
509 
510 closem:
511 	if (anychange) {
512 		status |= 1;
513 		if (rval == D_SAME)
514 			rval = D_DIFFER;
515 	}
516 	if (f1 != NULL)
517 		fclose(f1);
518 	if (f2 != NULL)
519 		fclose(f2);
520 
521 	return (rval);
522 }
523 
524 /*
525  * Check to see if the given files differ.
526  * Returns 0 if they are the same, 1 if different, and -1 on error.
527  * XXX - could use code from cmp(1) [faster]
528  */
529 static int
530 files_differ(FILE *f1, FILE *f2, int flags)
531 {
532 	char buf1[BUFSIZ], buf2[BUFSIZ];
533 	size_t i, j;
534 
535 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
536 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
537 		return (1);
538 	for (;;) {
539 		i = fread(buf1, 1, sizeof(buf1), f1);
540 		j = fread(buf2, 1, sizeof(buf2), f2);
541 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
542 			return (-1);
543 		if (i != j)
544 			return (1);
545 		if (i == 0)
546 			return (0);
547 		if (memcmp(buf1, buf2, i) != 0)
548 			return (1);
549 	}
550 }
551 
552 static FILE *
553 opentemp(const char *f)
554 {
555 	char buf[BUFSIZ], tempfile[PATH_MAX];
556 	ssize_t nread;
557 	int ifd, ofd;
558 
559 	if (strcmp(f, "-") == 0)
560 		ifd = STDIN_FILENO;
561 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
562 		return (NULL);
563 
564 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
565 
566 	if ((ofd = mkstemp(tempfile)) < 0) {
567 		close(ifd);
568 		return (NULL);
569 	}
570 	unlink(tempfile);
571 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
572 		if (write(ofd, buf, nread) != nread) {
573 			close(ifd);
574 			close(ofd);
575 			return (NULL);
576 		}
577 	}
578 	close(ifd);
579 	lseek(ofd, (off_t)0, SEEK_SET);
580 	return (fdopen(ofd, "r"));
581 }
582 
583 char *
584 splice(char *dir, char *path)
585 {
586 	char *tail, *buf;
587 	size_t dirlen;
588 
589 	dirlen = strlen(dir);
590 	while (dirlen != 0 && dir[dirlen - 1] == '/')
591 	    dirlen--;
592 	if ((tail = strrchr(path, '/')) == NULL)
593 		tail = path;
594 	else
595 		tail++;
596 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
597 	return (buf);
598 }
599 
600 static void
601 prepare(int i, FILE *fd, size_t filesize, int flags)
602 {
603 	struct line *p;
604 	int h;
605 	size_t sz, j;
606 
607 	rewind(fd);
608 
609 	sz = MIN(filesize, SIZE_MAX) / 25;
610 	if (sz < 100)
611 		sz = 100;
612 
613 	p = xcalloc(sz + 3, sizeof(*p));
614 	for (j = 0; (h = readhash(fd, flags));) {
615 		if (j == sz) {
616 			sz = sz * 3 / 2;
617 			p = xreallocarray(p, sz + 3, sizeof(*p));
618 		}
619 		p[++j].value = h;
620 	}
621 	len[i] = j;
622 	file[i] = p;
623 }
624 
625 static void
626 prune(void)
627 {
628 	int i, j;
629 
630 	for (pref = 0; pref < len[0] && pref < len[1] &&
631 	    file[0][pref + 1].value == file[1][pref + 1].value;
632 	    pref++)
633 		;
634 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
635 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
636 	    suff++)
637 		;
638 	for (j = 0; j < 2; j++) {
639 		sfile[j] = file[j] + pref;
640 		slen[j] = len[j] - pref - suff;
641 		for (i = 0; i <= slen[j]; i++)
642 			sfile[j][i].serial = i;
643 	}
644 }
645 
646 static void
647 equiv(struct line *a, int n, struct line *b, int m, int *c)
648 {
649 	int i, j;
650 
651 	i = j = 1;
652 	while (i <= n && j <= m) {
653 		if (a[i].value < b[j].value)
654 			a[i++].value = 0;
655 		else if (a[i].value == b[j].value)
656 			a[i++].value = j;
657 		else
658 			j++;
659 	}
660 	while (i <= n)
661 		a[i++].value = 0;
662 	b[m + 1].value = 0;
663 	j = 0;
664 	while (++j <= m) {
665 		c[j] = -b[j].serial;
666 		while (b[j + 1].value == b[j].value) {
667 			j++;
668 			c[j] = b[j].serial;
669 		}
670 	}
671 	c[j] = -1;
672 }
673 
674 /* Code taken from ping.c */
675 static int
676 isqrt(int n)
677 {
678 	int y, x = 1;
679 
680 	if (n == 0)
681 		return (0);
682 
683 	do { /* newton was a stinker */
684 		y = x;
685 		x = n / x;
686 		x += y;
687 		x /= 2;
688 	} while ((x - y) > 1 || (x - y) < -1);
689 
690 	return (x);
691 }
692 
693 static int
694 stone(int *a, int n, int *b, int *c, int flags)
695 {
696 	int i, k, y, j, l;
697 	int oldc, tc, oldl, sq;
698 	u_int numtries, bound;
699 
700 	if (flags & D_MINIMAL)
701 		bound = UINT_MAX;
702 	else {
703 		sq = isqrt(n);
704 		bound = MAX(256, sq);
705 	}
706 
707 	k = 0;
708 	c[0] = newcand(0, 0, 0);
709 	for (i = 1; i <= n; i++) {
710 		j = a[i];
711 		if (j == 0)
712 			continue;
713 		y = -b[j];
714 		oldl = 0;
715 		oldc = c[0];
716 		numtries = 0;
717 		do {
718 			if (y <= clist[oldc].y)
719 				continue;
720 			l = search(c, k, y);
721 			if (l != oldl + 1)
722 				oldc = c[l - 1];
723 			if (l <= k) {
724 				if (clist[c[l]].y <= y)
725 					continue;
726 				tc = c[l];
727 				c[l] = newcand(i, y, oldc);
728 				oldc = tc;
729 				oldl = l;
730 				numtries++;
731 			} else {
732 				c[l] = newcand(i, y, oldc);
733 				k++;
734 				break;
735 			}
736 		} while ((y = b[++j]) > 0 && numtries < bound);
737 	}
738 	return (k);
739 }
740 
741 static int
742 newcand(int x, int y, int pred)
743 {
744 	struct cand *q;
745 
746 	if (clen == clistlen) {
747 		clistlen = clistlen * 11 / 10;
748 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
749 	}
750 	q = clist + clen;
751 	q->x = x;
752 	q->y = y;
753 	q->pred = pred;
754 	return (clen++);
755 }
756 
757 static int
758 search(int *c, int k, int y)
759 {
760 	int i, j, l, t;
761 
762 	if (clist[c[k]].y < y)	/* quick look for typical case */
763 		return (k + 1);
764 	i = 0;
765 	j = k + 1;
766 	for (;;) {
767 		l = (i + j) / 2;
768 		if (l <= i)
769 			break;
770 		t = clist[c[l]].y;
771 		if (t > y)
772 			j = l;
773 		else if (t < y)
774 			i = l;
775 		else
776 			return (l);
777 	}
778 	return (l + 1);
779 }
780 
781 static void
782 unravel(int p)
783 {
784 	struct cand *q;
785 	int i;
786 
787 	for (i = 0; i <= len[0]; i++)
788 		J[i] = i <= pref ? i :
789 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
790 	for (q = clist + p; q->y != 0; q = clist + q->pred)
791 		J[q->x + pref] = q->y + pref;
792 }
793 
794 /*
795  * Check does double duty:
796  *  1.	ferret out any fortuitous correspondences due
797  *	to confounding by hashing (which result in "jackpot")
798  *  2.  collect random access indexes to the two files
799  */
800 static void
801 check(FILE *f1, FILE *f2, int flags)
802 {
803 	int i, j, jackpot, c, d;
804 	long ctold, ctnew;
805 
806 	rewind(f1);
807 	rewind(f2);
808 	j = 1;
809 	ixold[0] = ixnew[0] = 0;
810 	jackpot = 0;
811 	ctold = ctnew = 0;
812 	for (i = 1; i <= len[0]; i++) {
813 		if (J[i] == 0) {
814 			ixold[i] = ctold += skipline(f1);
815 			continue;
816 		}
817 		while (j < J[i]) {
818 			ixnew[j] = ctnew += skipline(f2);
819 			j++;
820 		}
821 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
822 			for (;;) {
823 				c = getc(f1);
824 				d = getc(f2);
825 				/*
826 				 * GNU diff ignores a missing newline
827 				 * in one file for -b or -w.
828 				 */
829 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
830 					if (c == EOF && d == '\n') {
831 						ctnew++;
832 						break;
833 					} else if (c == '\n' && d == EOF) {
834 						ctold++;
835 						break;
836 					}
837 				}
838 				ctold++;
839 				ctnew++;
840 				if (flags & D_STRIPCR) {
841 					if (c == '\r') {
842 						if ((c = getc(f1)) == '\n') {
843 							ctnew++;
844 							break;
845 						}
846 					}
847 					if (d == '\r') {
848 						if ((d = getc(f2)) == '\n') {
849 							ctold++;
850 							break;
851 						}
852 					}
853 				}
854 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
855 				    isspace(d)) {
856 					do {
857 						if (c == '\n')
858 							break;
859 						ctold++;
860 					} while (isspace(c = getc(f1)));
861 					do {
862 						if (d == '\n')
863 							break;
864 						ctnew++;
865 					} while (isspace(d = getc(f2)));
866 				} else if ((flags & D_IGNOREBLANKS)) {
867 					while (isspace(c) && c != '\n') {
868 						c = getc(f1);
869 						ctold++;
870 					}
871 					while (isspace(d) && d != '\n') {
872 						d = getc(f2);
873 						ctnew++;
874 					}
875 				}
876 				if (chrtran[c] != chrtran[d]) {
877 					jackpot++;
878 					J[i] = 0;
879 					if (c != '\n' && c != EOF)
880 						ctold += skipline(f1);
881 					if (d != '\n' && c != EOF)
882 						ctnew += skipline(f2);
883 					break;
884 				}
885 				if (c == '\n' || c == EOF)
886 					break;
887 			}
888 		} else {
889 			for (;;) {
890 				ctold++;
891 				ctnew++;
892 				if ((c = getc(f1)) != (d = getc(f2))) {
893 					/* jackpot++; */
894 					J[i] = 0;
895 					if (c != '\n' && c != EOF)
896 						ctold += skipline(f1);
897 					if (d != '\n' && c != EOF)
898 						ctnew += skipline(f2);
899 					break;
900 				}
901 				if (c == '\n' || c == EOF)
902 					break;
903 			}
904 		}
905 		ixold[i] = ctold;
906 		ixnew[j] = ctnew;
907 		j++;
908 	}
909 	for (; j <= len[1]; j++) {
910 		ixnew[j] = ctnew += skipline(f2);
911 	}
912 	/*
913 	 * if (jackpot)
914 	 *	fprintf(stderr, "jackpot\n");
915 	 */
916 }
917 
918 /* shellsort CACM #201 */
919 static void
920 sort(struct line *a, int n)
921 {
922 	struct line *ai, *aim, w;
923 	int j, m = 0, k;
924 
925 	if (n == 0)
926 		return;
927 	for (j = 1; j <= n; j *= 2)
928 		m = 2 * j - 1;
929 	for (m /= 2; m != 0; m /= 2) {
930 		k = n - m;
931 		for (j = 1; j <= k; j++) {
932 			for (ai = &a[j]; ai > a; ai -= m) {
933 				aim = &ai[m];
934 				if (aim < ai)
935 					break;	/* wraparound */
936 				if (aim->value > ai[0].value ||
937 				    (aim->value == ai[0].value &&
938 					aim->serial > ai[0].serial))
939 					break;
940 				w.value = ai[0].value;
941 				ai[0].value = aim->value;
942 				aim->value = w.value;
943 				w.serial = ai[0].serial;
944 				ai[0].serial = aim->serial;
945 				aim->serial = w.serial;
946 			}
947 		}
948 	}
949 }
950 
951 static void
952 unsort(struct line *f, int l, int *b)
953 {
954 	int *a, i;
955 
956 	a = xcalloc(l + 1, sizeof(*a));
957 	for (i = 1; i <= l; i++)
958 		a[f[i].serial] = f[i].value;
959 	for (i = 1; i <= l; i++)
960 		b[i] = a[i];
961 	free(a);
962 }
963 
964 static int
965 skipline(FILE *f)
966 {
967 	int i, c;
968 
969 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
970 		continue;
971 	return (i);
972 }
973 
974 static void
975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
976 {
977 	int m, i0, i1, j0, j1;
978 
979 	rewind(f1);
980 	rewind(f2);
981 	m = len[0];
982 	J[0] = 0;
983 	J[m + 1] = len[1] + 1;
984 	if (diff_format != D_EDIT) {
985 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
986 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
987 				i0++;
988 			j0 = J[i0 - 1] + 1;
989 			i1 = i0 - 1;
990 			while (i1 < m && J[i1 + 1] == 0)
991 				i1++;
992 			j1 = J[i1 + 1] - 1;
993 			J[i1] = j1;
994 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
995 		}
996 	} else {
997 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
998 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
999 				i0--;
1000 			j0 = J[i0 + 1] - 1;
1001 			i1 = i0 + 1;
1002 			while (i1 > 1 && J[i1 - 1] == 0)
1003 				i1--;
1004 			j1 = J[i1 - 1] + 1;
1005 			J[i1] = j1;
1006 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1007 		}
1008 	}
1009 	if (m == 0)
1010 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011 	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1012 		for (;;) {
1013 #define	c i0
1014 			if ((c = getc(f1)) == EOF)
1015 				return;
1016 			diff_output("%c", c);
1017 		}
1018 #undef c
1019 	}
1020 	if (anychange != 0) {
1021 		if (diff_format == D_CONTEXT)
1022 			dump_context_vec(f1, f2, flags);
1023 		else if (diff_format == D_UNIFIED)
1024 			dump_unified_vec(f1, f2, flags);
1025 	}
1026 }
1027 
1028 static void
1029 range(int a, int b, const char *separator)
1030 {
1031 	diff_output("%d", a > b ? b : a);
1032 	if (a < b)
1033 		diff_output("%s%d", separator, b);
1034 }
1035 
1036 static void
1037 uni_range(int a, int b)
1038 {
1039 	if (a < b)
1040 		diff_output("%d,%d", a, b - a + 1);
1041 	else if (a == b)
1042 		diff_output("%d", b);
1043 	else
1044 		diff_output("%d,0", b);
1045 }
1046 
1047 static char *
1048 preadline(int fd, size_t rlen, off_t off)
1049 {
1050 	char *line;
1051 	ssize_t nr;
1052 
1053 	line = xmalloc(rlen + 1);
1054 	if ((nr = pread(fd, line, rlen, off)) < 0)
1055 		err(2, "preadline");
1056 	if (nr > 0 && line[nr-1] == '\n')
1057 		nr--;
1058 	line[nr] = '\0';
1059 	return (line);
1060 }
1061 
1062 static int
1063 ignoreline(char *line)
1064 {
1065 	int ret;
1066 
1067 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1068 	free(line);
1069 	return (ret == 0);	/* if it matched, it should be ignored. */
1070 }
1071 
1072 /*
1073  * Indicate that there is a difference between lines a and b of the from file
1074  * to get to lines c to d of the to file.  If a is greater then b then there
1075  * are no lines in the from file involved and this means that there were
1076  * lines appended (beginning at b).  If c is greater than d then there are
1077  * lines missing from the to file.
1078  */
1079 static void
1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1081     int *pflags)
1082 {
1083 	static size_t max_context = 64;
1084 	long curpos;
1085 	int i, nc, f;
1086 	const char *walk;
1087 
1088 restart:
1089 	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1090 	    a > b && c > d)
1091 		return;
1092 	if (ignore_pats != NULL) {
1093 		char *line;
1094 		/*
1095 		 * All lines in the change, insert, or delete must
1096 		 * match an ignore pattern for the change to be
1097 		 * ignored.
1098 		 */
1099 		if (a <= b) {		/* Changes and deletes. */
1100 			for (i = a; i <= b; i++) {
1101 				line = preadline(fileno(f1),
1102 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1103 				if (!ignoreline(line))
1104 					goto proceed;
1105 			}
1106 		}
1107 		if (a > b || c <= d) {	/* Changes and inserts. */
1108 			for (i = c; i <= d; i++) {
1109 				line = preadline(fileno(f2),
1110 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1111 				if (!ignoreline(line))
1112 					goto proceed;
1113 			}
1114 		}
1115 		return;
1116 	}
1117 proceed:
1118 	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1119 		diff_output("%s %s %s\n", diffargs, file1, file2);
1120 		*pflags &= ~D_HEADER;
1121 	}
1122 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1123 		/*
1124 		 * Allocate change records as needed.
1125 		 */
1126 		if (context_vec_ptr == context_vec_end - 1) {
1127 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1128 			max_context <<= 1;
1129 			context_vec_start = xreallocarray(context_vec_start,
1130 			    max_context, sizeof(*context_vec_start));
1131 			context_vec_end = context_vec_start + max_context;
1132 			context_vec_ptr = context_vec_start + offset;
1133 		}
1134 		if (anychange == 0) {
1135 			/*
1136 			 * Print the context/unidiff header first time through.
1137 			 */
1138 			print_header(file1, file2);
1139 			anychange = 1;
1140 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1141 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1142 			/*
1143 			 * If this change is more than 'diff_context' lines from the
1144 			 * previous change, dump the record and reset it.
1145 			 */
1146 			if (diff_format == D_CONTEXT)
1147 				dump_context_vec(f1, f2, *pflags);
1148 			else
1149 				dump_unified_vec(f1, f2, *pflags);
1150 		}
1151 		context_vec_ptr++;
1152 		context_vec_ptr->a = a;
1153 		context_vec_ptr->b = b;
1154 		context_vec_ptr->c = c;
1155 		context_vec_ptr->d = d;
1156 		return;
1157 	}
1158 	if (anychange == 0)
1159 		anychange = 1;
1160 	switch (diff_format) {
1161 	case D_BRIEF:
1162 		return;
1163 	case D_NORMAL:
1164 	case D_EDIT:
1165 		range(a, b, ",");
1166 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1167 		if (diff_format == D_NORMAL)
1168 			range(c, d, ",");
1169 		diff_output("\n");
1170 		break;
1171 	case D_REVERSE:
1172 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1173 		range(a, b, " ");
1174 		diff_output("\n");
1175 		break;
1176 	case D_NREVERSE:
1177 		if (a > b)
1178 			diff_output("a%d %d\n", b, d - c + 1);
1179 		else {
1180 			diff_output("d%d %d\n", a, b - a + 1);
1181 			if (!(c > d))
1182 				/* add changed lines */
1183 				diff_output("a%d %d\n", b, d - c + 1);
1184 		}
1185 		break;
1186 	}
1187 	if (diff_format == D_GFORMAT) {
1188 		curpos = ftell(f1);
1189 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1190 		nc = ixold[a > b ? b : a - 1] - curpos;
1191 		for (i = 0; i < nc; i++)
1192 			diff_output("%c", getc(f1));
1193 		for (walk = group_format; *walk != '\0'; walk++) {
1194 			if (*walk == '%') {
1195 				walk++;
1196 				switch (*walk) {
1197 				case '<':
1198 					fetch(ixold, a, b, f1, '<', 1, *pflags);
1199 					break;
1200 				case '>':
1201 					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1202 					break;
1203 				default:
1204 					diff_output("%%%c", *walk);
1205 					break;
1206 				}
1207 				continue;
1208 			}
1209 			diff_output("%c", *walk);
1210 		}
1211 	}
1212 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1213 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1214 		if (a <= b && c <= d && diff_format == D_NORMAL)
1215 			diff_output("---\n");
1216 	}
1217 	f = 0;
1218 	if (diff_format != D_GFORMAT)
1219 		f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1220 	if (f != 0 && diff_format == D_EDIT) {
1221 		/*
1222 		 * A non-zero return value for D_EDIT indicates that the
1223 		 * last line printed was a bare dot (".") that has been
1224 		 * escaped as ".." to prevent ed(1) from misinterpreting
1225 		 * it.  We have to add a substitute command to change this
1226 		 * back and restart where we left off.
1227 		 */
1228 		diff_output(".\n");
1229 		diff_output("%ds/.//\n", a + f - 1);
1230 		b = a + f - 1;
1231 		a = b + 1;
1232 		c += f;
1233 		goto restart;
1234 	}
1235 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1236 		diff_output(".\n");
1237 	if (inifdef) {
1238 		diff_output("#endif /* %s */\n", ifdefname);
1239 		inifdef = 0;
1240 	}
1241 }
1242 
1243 static int
1244 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1245 {
1246 	int i, j, c, lastc, col, nc;
1247 	int	newcol;
1248 
1249 	/*
1250 	 * When doing #ifdef's, copy down to current line
1251 	 * if this is the first file, so that stuff makes it to output.
1252 	 */
1253 	if ((diff_format == D_IFDEF) && oldfile) {
1254 		long curpos = ftell(lb);
1255 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1256 		nc = f[a > b ? b : a - 1] - curpos;
1257 		for (i = 0; i < nc; i++)
1258 			diff_output("%c", getc(lb));
1259 	}
1260 	if (a > b)
1261 		return (0);
1262 	if (diff_format == D_IFDEF) {
1263 		if (inifdef) {
1264 			diff_output("#else /* %s%s */\n",
1265 			    oldfile == 1 ? "!" : "", ifdefname);
1266 		} else {
1267 			if (oldfile)
1268 				diff_output("#ifndef %s\n", ifdefname);
1269 			else
1270 				diff_output("#ifdef %s\n", ifdefname);
1271 		}
1272 		inifdef = 1 + oldfile;
1273 	}
1274 	for (i = a; i <= b; i++) {
1275 		fseek(lb, f[i - 1], SEEK_SET);
1276 		nc = f[i] - f[i - 1];
1277 		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1278 		    ch != '\0') {
1279 			diff_output("%c", ch);
1280 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1281 			    || diff_format == D_UNIFIED))
1282 				diff_output("\t");
1283 			else if (diff_format != D_UNIFIED)
1284 				diff_output(" ");
1285 		}
1286 		col = 0;
1287 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1288 			if ((c = getc(lb)) == EOF) {
1289 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1290 				    diff_format == D_NREVERSE)
1291 					warnx("No newline at end of file");
1292 				else
1293 					diff_output("\n\\ No newline at end of "
1294 					    "file\n");
1295 				return (0);
1296 			}
1297 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1298 				newcol = ((col/tabsize)+1)*tabsize;
1299 				do {
1300 					diff_output(" ");
1301 				} while (++col < newcol);
1302 			} else {
1303 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1304 				    && lastc == '.') {
1305 					/*
1306 					 * Don't print a bare "." line
1307 					 * since that will confuse ed(1).
1308 					 * Print ".." instead and return,
1309 					 * giving the caller an offset
1310 					 * from which to restart.
1311 					 */
1312 					diff_output(".\n");
1313 					return (i - a + 1);
1314 				}
1315 				diff_output("%c", c);
1316 				col++;
1317 			}
1318 		}
1319 	}
1320 	return (0);
1321 }
1322 
1323 /*
1324  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1325  */
1326 static int
1327 readhash(FILE *f, int flags)
1328 {
1329 	int i, t, space;
1330 	int sum;
1331 
1332 	sum = 1;
1333 	space = 0;
1334 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1335 		if (flags & D_IGNORECASE)
1336 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1337 				if (flags & D_STRIPCR && t == '\r') {
1338 					t = getc(f);
1339 					if (t == '\n')
1340 						break;
1341 					ungetc(t, f);
1342 				}
1343 				if (t == EOF) {
1344 					if (i == 0)
1345 						return (0);
1346 					break;
1347 				}
1348 				sum = sum * 127 + chrtran[t];
1349 			}
1350 		else
1351 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1352 				if (flags & D_STRIPCR && t == '\r') {
1353 					t = getc(f);
1354 					if (t == '\n')
1355 						break;
1356 					ungetc(t, f);
1357 				}
1358 				if (t == EOF) {
1359 					if (i == 0)
1360 						return (0);
1361 					break;
1362 				}
1363 				sum = sum * 127 + t;
1364 			}
1365 	} else {
1366 		for (i = 0;;) {
1367 			switch (t = getc(f)) {
1368 			case '\r':
1369 			case '\t':
1370 			case '\v':
1371 			case '\f':
1372 			case ' ':
1373 				space++;
1374 				continue;
1375 			default:
1376 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1377 					i++;
1378 					space = 0;
1379 				}
1380 				sum = sum * 127 + chrtran[t];
1381 				i++;
1382 				continue;
1383 			case EOF:
1384 				if (i == 0)
1385 					return (0);
1386 				/* FALLTHROUGH */
1387 			case '\n':
1388 				break;
1389 			}
1390 			break;
1391 		}
1392 	}
1393 	/*
1394 	 * There is a remote possibility that we end up with a zero sum.
1395 	 * Zero is used as an EOF marker, so return 1 instead.
1396 	 */
1397 	return (sum == 0 ? 1 : sum);
1398 }
1399 
1400 static int
1401 asciifile(FILE *f)
1402 {
1403 	unsigned char buf[BUFSIZ];
1404 	size_t cnt;
1405 
1406 	if (f == NULL)
1407 		return (1);
1408 
1409 	rewind(f);
1410 	cnt = fread(buf, 1, sizeof(buf), f);
1411 	return (memchr(buf, '\0', cnt) == NULL);
1412 }
1413 
1414 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1415 
1416 static char *
1417 match_function(const long *f, int pos, FILE *fp)
1418 {
1419 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1420 	size_t nc;
1421 	int last = lastline;
1422 	const char *state = NULL;
1423 
1424 	lastline = pos;
1425 	while (pos > last) {
1426 		fseek(fp, f[pos - 1], SEEK_SET);
1427 		nc = f[pos] - f[pos - 1];
1428 		if (nc >= sizeof(buf))
1429 			nc = sizeof(buf) - 1;
1430 		nc = fread(buf, 1, nc, fp);
1431 		if (nc > 0) {
1432 			buf[nc] = '\0';
1433 			buf[strcspn(buf, "\n")] = '\0';
1434 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1435 				if (begins_with(buf, "private:")) {
1436 					if (!state)
1437 						state = " (private)";
1438 				} else if (begins_with(buf, "protected:")) {
1439 					if (!state)
1440 						state = " (protected)";
1441 				} else if (begins_with(buf, "public:")) {
1442 					if (!state)
1443 						state = " (public)";
1444 				} else {
1445 					strlcpy(lastbuf, buf, sizeof lastbuf);
1446 					if (state)
1447 						strlcat(lastbuf, state,
1448 						    sizeof lastbuf);
1449 					lastmatchline = pos;
1450 					return lastbuf;
1451 				}
1452 			}
1453 		}
1454 		pos--;
1455 	}
1456 	return lastmatchline > 0 ? lastbuf : NULL;
1457 }
1458 
1459 /* dump accumulated "context" diff changes */
1460 static void
1461 dump_context_vec(FILE *f1, FILE *f2, int flags)
1462 {
1463 	struct context_vec *cvp = context_vec_start;
1464 	int lowa, upb, lowc, upd, do_output;
1465 	int a, b, c, d;
1466 	char ch, *f;
1467 
1468 	if (context_vec_start > context_vec_ptr)
1469 		return;
1470 
1471 	b = d = 0;		/* gcc */
1472 	lowa = MAX(1, cvp->a - diff_context);
1473 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1474 	lowc = MAX(1, cvp->c - diff_context);
1475 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1476 
1477 	diff_output("***************");
1478 	if ((flags & D_PROTOTYPE)) {
1479 		f = match_function(ixold, lowa-1, f1);
1480 		if (f != NULL)
1481 			diff_output(" %s", f);
1482 	}
1483 	diff_output("\n*** ");
1484 	range(lowa, upb, ",");
1485 	diff_output(" ****\n");
1486 
1487 	/*
1488 	 * Output changes to the "old" file.  The first loop suppresses
1489 	 * output if there were no changes to the "old" file (we'll see
1490 	 * the "old" lines as context in the "new" list).
1491 	 */
1492 	do_output = 0;
1493 	for (; cvp <= context_vec_ptr; cvp++)
1494 		if (cvp->a <= cvp->b) {
1495 			cvp = context_vec_start;
1496 			do_output++;
1497 			break;
1498 		}
1499 	if (do_output) {
1500 		while (cvp <= context_vec_ptr) {
1501 			a = cvp->a;
1502 			b = cvp->b;
1503 			c = cvp->c;
1504 			d = cvp->d;
1505 
1506 			if (a <= b && c <= d)
1507 				ch = 'c';
1508 			else
1509 				ch = (a <= b) ? 'd' : 'a';
1510 
1511 			if (ch == 'a')
1512 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1513 			else {
1514 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515 				fetch(ixold, a, b, f1,
1516 				    ch == 'c' ? '!' : '-', 0, flags);
1517 			}
1518 			lowa = b + 1;
1519 			cvp++;
1520 		}
1521 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1522 	}
1523 	/* output changes to the "new" file */
1524 	diff_output("--- ");
1525 	range(lowc, upd, ",");
1526 	diff_output(" ----\n");
1527 
1528 	do_output = 0;
1529 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1530 		if (cvp->c <= cvp->d) {
1531 			cvp = context_vec_start;
1532 			do_output++;
1533 			break;
1534 		}
1535 	if (do_output) {
1536 		while (cvp <= context_vec_ptr) {
1537 			a = cvp->a;
1538 			b = cvp->b;
1539 			c = cvp->c;
1540 			d = cvp->d;
1541 
1542 			if (a <= b && c <= d)
1543 				ch = 'c';
1544 			else
1545 				ch = (a <= b) ? 'd' : 'a';
1546 
1547 			if (ch == 'd')
1548 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1549 			else {
1550 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1551 				fetch(ixnew, c, d, f2,
1552 				    ch == 'c' ? '!' : '+', 0, flags);
1553 			}
1554 			lowc = d + 1;
1555 			cvp++;
1556 		}
1557 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1558 	}
1559 	context_vec_ptr = context_vec_start - 1;
1560 }
1561 
1562 /* dump accumulated "unified" diff changes */
1563 static void
1564 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1565 {
1566 	struct context_vec *cvp = context_vec_start;
1567 	int lowa, upb, lowc, upd;
1568 	int a, b, c, d;
1569 	char ch, *f;
1570 
1571 	if (context_vec_start > context_vec_ptr)
1572 		return;
1573 
1574 	b = d = 0;		/* gcc */
1575 	lowa = MAX(1, cvp->a - diff_context);
1576 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1577 	lowc = MAX(1, cvp->c - diff_context);
1578 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1579 
1580 	diff_output("@@ -");
1581 	uni_range(lowa, upb);
1582 	diff_output(" +");
1583 	uni_range(lowc, upd);
1584 	diff_output(" @@");
1585 	if ((flags & D_PROTOTYPE)) {
1586 		f = match_function(ixold, lowa-1, f1);
1587 		if (f != NULL)
1588 			diff_output(" %s", f);
1589 	}
1590 	diff_output("\n");
1591 
1592 	/*
1593 	 * Output changes in "unified" diff format--the old and new lines
1594 	 * are printed together.
1595 	 */
1596 	for (; cvp <= context_vec_ptr; cvp++) {
1597 		a = cvp->a;
1598 		b = cvp->b;
1599 		c = cvp->c;
1600 		d = cvp->d;
1601 
1602 		/*
1603 		 * c: both new and old changes
1604 		 * d: only changes in the old file
1605 		 * a: only changes in the new file
1606 		 */
1607 		if (a <= b && c <= d)
1608 			ch = 'c';
1609 		else
1610 			ch = (a <= b) ? 'd' : 'a';
1611 
1612 		switch (ch) {
1613 		case 'c':
1614 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1615 			fetch(ixold, a, b, f1, '-', 0, flags);
1616 			fetch(ixnew, c, d, f2, '+', 0, flags);
1617 			break;
1618 		case 'd':
1619 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1620 			fetch(ixold, a, b, f1, '-', 0, flags);
1621 			break;
1622 		case 'a':
1623 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1624 			fetch(ixnew, c, d, f2, '+', 0, flags);
1625 			break;
1626 		}
1627 		lowa = b + 1;
1628 		lowc = d + 1;
1629 	}
1630 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1631 
1632 	context_vec_ptr = context_vec_start - 1;
1633 }
1634 
1635 static void
1636 print_header(const char *file1, const char *file2)
1637 {
1638 	const char *time_format;
1639 	char buf1[256];
1640 	char buf2[256];
1641 	char end1[10];
1642 	char end2[10];
1643 	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1644 	int nsec1 = stb1.st_mtim.tv_nsec;
1645 	int nsec2 = stb2.st_mtim.tv_nsec;
1646 
1647 	time_format = "%Y-%m-%d %H:%M:%S";
1648 
1649 	if (cflag)
1650 		time_format = "%c";
1651 	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1652 	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1653 	strftime(buf1, 256, time_format, tm_ptr1);
1654 	strftime(buf2, 256, time_format, tm_ptr2);
1655 	if (!cflag) {
1656 		strftime(end1, 10, "%z", tm_ptr1);
1657 		strftime(end2, 10, "%z", tm_ptr2);
1658 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1659 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1660 	}
1661 	if (label[0] != NULL)
1662 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1663 		    label[0]);
1664 	else
1665 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1666 		    file1, buf1);
1667 	if (label[1] != NULL)
1668 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1669 		    label[1]);
1670 	else
1671 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1672 		    file2, buf2);
1673 }
1674