xref: /freebsd/usr.bin/diff/diffreg.c (revision cc65eb4e792a203a8b4a17b0e3af7831e5044eca)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 #ifdef ST_MTIM_NSEC
98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC)
99 #else
100 # define TIMESPEC_NS(timespec) 0
101 #endif
102 
103 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
104 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
105 
106 /*
107  * diff - compare two files.
108  */
109 
110 /*
111  *	Uses an algorithm due to Harold Stone, which finds
112  *	a pair of longest identical subsequences in the two
113  *	files.
114  *
115  *	The major goal is to generate the match vector J.
116  *	J[i] is the index of the line in file1 corresponding
117  *	to line i file0. J[i] = 0 if there is no
118  *	such line in file1.
119  *
120  *	Lines are hashed so as to work in core. All potential
121  *	matches are located by sorting the lines of each file
122  *	on the hash (called ``value''). In particular, this
123  *	collects the equivalence classes in file1 together.
124  *	Subroutine equiv replaces the value of each line in
125  *	file0 by the index of the first element of its
126  *	matching equivalence in (the reordered) file1.
127  *	To save space equiv squeezes file1 into a single
128  *	array member in which the equivalence classes
129  *	are simply concatenated, except that their first
130  *	members are flagged by changing sign.
131  *
132  *	Next the indices that point into member are unsorted into
133  *	array class according to the original order of file0.
134  *
135  *	The cleverness lies in routine stone. This marches
136  *	through the lines of file0, developing a vector klist
137  *	of "k-candidates". At step i a k-candidate is a matched
138  *	pair of lines x,y (x in file0 y in file1) such that
139  *	there is a common subsequence of length k
140  *	between the first i lines of file0 and the first y
141  *	lines of file1, but there is no such subsequence for
142  *	any smaller y. x is the earliest possible mate to y
143  *	that occurs in such a subsequence.
144  *
145  *	Whenever any of the members of the equivalence class of
146  *	lines in file1 matable to a line in file0 has serial number
147  *	less than the y of some k-candidate, that k-candidate
148  *	with the smallest such y is replaced. The new
149  *	k-candidate is chained (via pred) to the current
150  *	k-1 candidate so that the actual subsequence can
151  *	be recovered. When a member has serial number greater
152  *	that the y of all k-candidates, the klist is extended.
153  *	At the end, the longest subsequence is pulled out
154  *	and placed in the array J by unravel
155  *
156  *	With J in hand, the matches there recorded are
157  *	check'ed against reality to assure that no spurious
158  *	matches have crept in due to hashing. If they have,
159  *	they are broken, and "jackpot" is recorded--a harmless
160  *	matter except that a true match for a spuriously
161  *	mated line may now be unnecessarily reported as a change.
162  *
163  *	Much of the complexity of the program comes simply
164  *	from trying to minimize core utilization and
165  *	maximize the range of doable problems by dynamically
166  *	allocating what is needed and reusing what is not.
167  *	The core requirements for problems larger than somewhat
168  *	are (in words) 2*length(file0) + length(file1) +
169  *	3*(number of k-candidates installed),  typically about
170  *	6n words for files of length n.
171  */
172 
173 struct cand {
174 	int	x;
175 	int	y;
176 	int	pred;
177 };
178 
179 static struct line {
180 	int	serial;
181 	int	value;
182 } *file[2];
183 
184 /*
185  * The following struct is used to record change information when
186  * doing a "context" or "unified" diff.  (see routine "change" to
187  * understand the highly mnemonic field names)
188  */
189 struct context_vec {
190 	int	a;		/* start line in old file */
191 	int	b;		/* end line in old file */
192 	int	c;		/* start line in new file */
193 	int	d;		/* end line in new file */
194 };
195 
196 #define	diff_output	printf
197 static FILE	*opentemp(const char *);
198 static void	 output(char *, FILE *, char *, FILE *, int);
199 static void	 check(FILE *, FILE *, int);
200 static void	 range(int, int, const char *);
201 static void	 uni_range(int, int);
202 static void	 dump_context_vec(FILE *, FILE *, int);
203 static void	 dump_unified_vec(FILE *, FILE *, int);
204 static void	 prepare(int, FILE *, size_t, int);
205 static void	 prune(void);
206 static void	 equiv(struct line *, int, struct line *, int, int *);
207 static void	 unravel(int);
208 static void	 unsort(struct line *, int, int *);
209 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
210 static void	 sort(struct line *, int);
211 static void	 print_header(const char *, const char *);
212 static int	 ignoreline(char *);
213 static int	 asciifile(FILE *);
214 static int	 fetch(long *, int, int, FILE *, int, int, int);
215 static int	 newcand(int, int, int);
216 static int	 search(int *, int, int);
217 static int	 skipline(FILE *);
218 static int	 isqrt(int);
219 static int	 stone(int *, int, int *, int *, int);
220 static int	 readhash(FILE *, int);
221 static int	 files_differ(FILE *, FILE *, int);
222 static char	*match_function(const long *, int, FILE *);
223 static char	*preadline(int, size_t, off_t);
224 
225 static int  *J;			/* will be overlaid on class */
226 static int  *class;		/* will be overlaid on file[0] */
227 static int  *klist;		/* will be overlaid on file[0] after class */
228 static int  *member;		/* will be overlaid on file[1] */
229 static int   clen;
230 static int   inifdef;		/* whether or not we are in a #ifdef block */
231 static int   len[2];
232 static int   pref, suff;	/* length of prefix and suffix */
233 static int   slen[2];
234 static int   anychange;
235 static long *ixnew;		/* will be overlaid on file[1] */
236 static long *ixold;		/* will be overlaid on klist */
237 static struct cand *clist;	/* merely a free storage pot for candidates */
238 static int   clistlen;		/* the length of clist */
239 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
240 static u_char *chrtran;		/* translation table for case-folding */
241 static struct context_vec *context_vec_start;
242 static struct context_vec *context_vec_end;
243 static struct context_vec *context_vec_ptr;
244 
245 #define FUNCTION_CONTEXT_SIZE	55
246 static char lastbuf[FUNCTION_CONTEXT_SIZE];
247 static int lastline;
248 static int lastmatchline;
249 
250 
251 /*
252  * chrtran points to one of 2 translation tables: cup2low if folding upper to
253  * lower case clow2low if not folding case
254  */
255 static u_char clow2low[256] = {
256 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
257 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
258 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
259 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
260 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
261 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
262 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
263 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
264 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
265 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
266 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
267 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
268 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
269 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
270 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
271 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
272 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
273 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
274 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
275 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
276 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
277 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
278 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
279 	0xfd, 0xfe, 0xff
280 };
281 
282 static u_char cup2low[256] = {
283 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
284 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
285 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
286 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
287 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
288 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
289 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
290 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
291 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
292 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
293 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
294 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
295 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
296 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
297 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
298 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
299 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
300 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
301 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
302 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
303 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
304 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
305 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
306 	0xfd, 0xfe, 0xff
307 };
308 
309 int
310 diffreg(char *file1, char *file2, int flags, int capsicum)
311 {
312 	FILE *f1, *f2;
313 	int i, rval;
314 	int	ostdout = -1;
315 	int pr_pd, kq;
316 	struct kevent *e;
317 	cap_rights_t rights_ro;
318 
319 	e = NULL;
320 	kq = -1;
321 	f1 = f2 = NULL;
322 	rval = D_SAME;
323 	anychange = 0;
324 	lastline = 0;
325 	lastmatchline = 0;
326 	context_vec_ptr = context_vec_start - 1;
327 	if (flags & D_IGNORECASE)
328 		chrtran = cup2low;
329 	else
330 		chrtran = clow2low;
331 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
332 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
333 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
334 		goto closem;
335 
336 	if (flags & D_EMPTY1)
337 		f1 = fopen(_PATH_DEVNULL, "r");
338 	else {
339 		if (!S_ISREG(stb1.st_mode)) {
340 			if ((f1 = opentemp(file1)) == NULL ||
341 			    fstat(fileno(f1), &stb1) < 0) {
342 				warn("%s", file1);
343 				status |= 2;
344 				goto closem;
345 			}
346 		} else if (strcmp(file1, "-") == 0)
347 			f1 = stdin;
348 		else
349 			f1 = fopen(file1, "r");
350 	}
351 	if (f1 == NULL) {
352 		warn("%s", file1);
353 		status |= 2;
354 		goto closem;
355 	}
356 
357 	if (flags & D_EMPTY2)
358 		f2 = fopen(_PATH_DEVNULL, "r");
359 	else {
360 		if (!S_ISREG(stb2.st_mode)) {
361 			if ((f2 = opentemp(file2)) == NULL ||
362 			    fstat(fileno(f2), &stb2) < 0) {
363 				warn("%s", file2);
364 				status |= 2;
365 				goto closem;
366 			}
367 		} else if (strcmp(file2, "-") == 0)
368 			f2 = stdin;
369 		else
370 			f2 = fopen(file2, "r");
371 	}
372 	if (f2 == NULL) {
373 		warn("%s", file2);
374 		status |= 2;
375 		goto closem;
376 	}
377 
378 	if (lflag) {
379 		/* redirect stdout to pr */
380 		int	 pfd[2];
381 		pid_t	pid;
382 		char	*header;
383 
384 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
385 		signal(SIGPIPE, SIG_IGN);
386 		fflush(stdout);
387 		rewind(stdout);
388 		pipe(pfd);
389 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
390 		case -1:
391 			status |= 2;
392 			free(header);
393 			err(2, "No more processes");
394 		case 0:
395 			/* child */
396 			if (pfd[0] != STDIN_FILENO) {
397 				dup2(pfd[0], STDIN_FILENO);
398 				close(pfd[0]);
399 			}
400 			close(pfd[1]);
401 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
402 			_exit(127);
403 		default:
404 
405 			/* parent */
406 			if (pfd[1] != STDOUT_FILENO) {
407 				ostdout = dup(STDOUT_FILENO);
408 				dup2(pfd[1], STDOUT_FILENO);
409 				close(pfd[1]);
410 			}
411 			close(pfd[0]);
412 			rewind(stdout);
413 			free(header);
414 			kq = kqueue();
415 			if (kq == -1)
416 				err(2, "kqueue");
417 			e = xmalloc(sizeof(struct kevent));
418 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
419 			    NULL);
420 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
421 				err(2, "kevent");
422 		}
423 	}
424 
425 	if (capsicum) {
426 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
427 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
428 		    && errno != ENOSYS)
429 			err(2, "unable to limit rights on: %s", file1);
430 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
431 		    errno != ENOSYS)
432 			err(2, "unable to limit rights on: %s", file2);
433 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
434 			/* stding has already been limited */
435 			if (caph_limit_stderr() == -1)
436 				err(2, "unable to limit stderr");
437 			if (caph_limit_stdout() == -1)
438 				err(2, "unable to limit stdout");
439 		} else if (caph_limit_stdio() == -1)
440 				err(2, "unable to limit stdio");
441 
442 		caph_cache_catpages();
443 		caph_cache_tzdata();
444 		if (cap_enter() < 0 && errno != ENOSYS)
445 			err(2, "unable to enter capability mode");
446 	}
447 
448 	switch (files_differ(f1, f2, flags)) {
449 	case 0:
450 		goto closem;
451 	case 1:
452 		break;
453 	default:
454 		/* error */
455 		status |= 2;
456 		goto closem;
457 	}
458 
459 	if ((flags & D_FORCEASCII) == 0 &&
460 	    (!asciifile(f1) || !asciifile(f2))) {
461 		rval = D_BINARY;
462 		status |= 1;
463 		goto closem;
464 	}
465 	prepare(0, f1, stb1.st_size, flags);
466 	prepare(1, f2, stb2.st_size, flags);
467 
468 	prune();
469 	sort(sfile[0], slen[0]);
470 	sort(sfile[1], slen[1]);
471 
472 	member = (int *)file[1];
473 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
474 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
475 
476 	class = (int *)file[0];
477 	unsort(sfile[0], slen[0], class);
478 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
479 
480 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
481 	clen = 0;
482 	clistlen = 100;
483 	clist = xcalloc(clistlen, sizeof(*clist));
484 	i = stone(class, slen[0], member, klist, flags);
485 	free(member);
486 	free(class);
487 
488 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
489 	unravel(klist[i]);
490 	free(clist);
491 	free(klist);
492 
493 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
494 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
495 	check(f1, f2, flags);
496 	output(file1, f1, file2, f2, flags);
497 	if (ostdout != -1 && e != NULL) {
498 		/* close the pipe to pr and restore stdout */
499 		int wstatus;
500 
501 		fflush(stdout);
502 		if (ostdout != STDOUT_FILENO) {
503 			close(STDOUT_FILENO);
504 			dup2(ostdout, STDOUT_FILENO);
505 			close(ostdout);
506 		}
507 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
508 			err(2, "kevent");
509 		wstatus = e[0].data;
510 		close(kq);
511 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
512 			errx(2, "pr exited abnormally");
513 		else if (WIFSIGNALED(wstatus))
514 			errx(2, "pr killed by signal %d",
515 			    WTERMSIG(wstatus));
516 	}
517 
518 closem:
519 	if (anychange) {
520 		status |= 1;
521 		if (rval == D_SAME)
522 			rval = D_DIFFER;
523 	}
524 	if (f1 != NULL)
525 		fclose(f1);
526 	if (f2 != NULL)
527 		fclose(f2);
528 
529 	return (rval);
530 }
531 
532 /*
533  * Check to see if the given files differ.
534  * Returns 0 if they are the same, 1 if different, and -1 on error.
535  * XXX - could use code from cmp(1) [faster]
536  */
537 static int
538 files_differ(FILE *f1, FILE *f2, int flags)
539 {
540 	char buf1[BUFSIZ], buf2[BUFSIZ];
541 	size_t i, j;
542 
543 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
544 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
545 		return (1);
546 	for (;;) {
547 		i = fread(buf1, 1, sizeof(buf1), f1);
548 		j = fread(buf2, 1, sizeof(buf2), f2);
549 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
550 			return (-1);
551 		if (i != j)
552 			return (1);
553 		if (i == 0)
554 			return (0);
555 		if (memcmp(buf1, buf2, i) != 0)
556 			return (1);
557 	}
558 }
559 
560 static FILE *
561 opentemp(const char *f)
562 {
563 	char buf[BUFSIZ], tempfile[PATH_MAX];
564 	ssize_t nread;
565 	int ifd, ofd;
566 
567 	if (strcmp(f, "-") == 0)
568 		ifd = STDIN_FILENO;
569 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
570 		return (NULL);
571 
572 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
573 
574 	if ((ofd = mkstemp(tempfile)) < 0) {
575 		close(ifd);
576 		return (NULL);
577 	}
578 	unlink(tempfile);
579 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
580 		if (write(ofd, buf, nread) != nread) {
581 			close(ifd);
582 			close(ofd);
583 			return (NULL);
584 		}
585 	}
586 	close(ifd);
587 	lseek(ofd, (off_t)0, SEEK_SET);
588 	return (fdopen(ofd, "r"));
589 }
590 
591 char *
592 splice(char *dir, char *path)
593 {
594 	char *tail, *buf;
595 	size_t dirlen;
596 
597 	dirlen = strlen(dir);
598 	while (dirlen != 0 && dir[dirlen - 1] == '/')
599 	    dirlen--;
600 	if ((tail = strrchr(path, '/')) == NULL)
601 		tail = path;
602 	else
603 		tail++;
604 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
605 	return (buf);
606 }
607 
608 static void
609 prepare(int i, FILE *fd, size_t filesize, int flags)
610 {
611 	struct line *p;
612 	int h;
613 	size_t sz, j;
614 
615 	rewind(fd);
616 
617 	sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
618 	if (sz < 100)
619 		sz = 100;
620 
621 	p = xcalloc(sz + 3, sizeof(*p));
622 	for (j = 0; (h = readhash(fd, flags));) {
623 		if (j == sz) {
624 			sz = sz * 3 / 2;
625 			p = xreallocarray(p, sz + 3, sizeof(*p));
626 		}
627 		p[++j].value = h;
628 	}
629 	len[i] = j;
630 	file[i] = p;
631 }
632 
633 static void
634 prune(void)
635 {
636 	int i, j;
637 
638 	for (pref = 0; pref < len[0] && pref < len[1] &&
639 	    file[0][pref + 1].value == file[1][pref + 1].value;
640 	    pref++)
641 		;
642 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
643 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
644 	    suff++)
645 		;
646 	for (j = 0; j < 2; j++) {
647 		sfile[j] = file[j] + pref;
648 		slen[j] = len[j] - pref - suff;
649 		for (i = 0; i <= slen[j]; i++)
650 			sfile[j][i].serial = i;
651 	}
652 }
653 
654 static void
655 equiv(struct line *a, int n, struct line *b, int m, int *c)
656 {
657 	int i, j;
658 
659 	i = j = 1;
660 	while (i <= n && j <= m) {
661 		if (a[i].value < b[j].value)
662 			a[i++].value = 0;
663 		else if (a[i].value == b[j].value)
664 			a[i++].value = j;
665 		else
666 			j++;
667 	}
668 	while (i <= n)
669 		a[i++].value = 0;
670 	b[m + 1].value = 0;
671 	j = 0;
672 	while (++j <= m) {
673 		c[j] = -b[j].serial;
674 		while (b[j + 1].value == b[j].value) {
675 			j++;
676 			c[j] = b[j].serial;
677 		}
678 	}
679 	c[j] = -1;
680 }
681 
682 /* Code taken from ping.c */
683 static int
684 isqrt(int n)
685 {
686 	int y, x = 1;
687 
688 	if (n == 0)
689 		return (0);
690 
691 	do { /* newton was a stinker */
692 		y = x;
693 		x = n / x;
694 		x += y;
695 		x /= 2;
696 	} while ((x - y) > 1 || (x - y) < -1);
697 
698 	return (x);
699 }
700 
701 static int
702 stone(int *a, int n, int *b, int *c, int flags)
703 {
704 	int i, k, y, j, l;
705 	int oldc, tc, oldl, sq;
706 	u_int numtries, bound;
707 
708 	if (flags & D_MINIMAL)
709 		bound = UINT_MAX;
710 	else {
711 		sq = isqrt(n);
712 		bound = MAXIMUM(256, sq);
713 	}
714 
715 	k = 0;
716 	c[0] = newcand(0, 0, 0);
717 	for (i = 1; i <= n; i++) {
718 		j = a[i];
719 		if (j == 0)
720 			continue;
721 		y = -b[j];
722 		oldl = 0;
723 		oldc = c[0];
724 		numtries = 0;
725 		do {
726 			if (y <= clist[oldc].y)
727 				continue;
728 			l = search(c, k, y);
729 			if (l != oldl + 1)
730 				oldc = c[l - 1];
731 			if (l <= k) {
732 				if (clist[c[l]].y <= y)
733 					continue;
734 				tc = c[l];
735 				c[l] = newcand(i, y, oldc);
736 				oldc = tc;
737 				oldl = l;
738 				numtries++;
739 			} else {
740 				c[l] = newcand(i, y, oldc);
741 				k++;
742 				break;
743 			}
744 		} while ((y = b[++j]) > 0 && numtries < bound);
745 	}
746 	return (k);
747 }
748 
749 static int
750 newcand(int x, int y, int pred)
751 {
752 	struct cand *q;
753 
754 	if (clen == clistlen) {
755 		clistlen = clistlen * 11 / 10;
756 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
757 	}
758 	q = clist + clen;
759 	q->x = x;
760 	q->y = y;
761 	q->pred = pred;
762 	return (clen++);
763 }
764 
765 static int
766 search(int *c, int k, int y)
767 {
768 	int i, j, l, t;
769 
770 	if (clist[c[k]].y < y)	/* quick look for typical case */
771 		return (k + 1);
772 	i = 0;
773 	j = k + 1;
774 	for (;;) {
775 		l = (i + j) / 2;
776 		if (l <= i)
777 			break;
778 		t = clist[c[l]].y;
779 		if (t > y)
780 			j = l;
781 		else if (t < y)
782 			i = l;
783 		else
784 			return (l);
785 	}
786 	return (l + 1);
787 }
788 
789 static void
790 unravel(int p)
791 {
792 	struct cand *q;
793 	int i;
794 
795 	for (i = 0; i <= len[0]; i++)
796 		J[i] = i <= pref ? i :
797 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
798 	for (q = clist + p; q->y != 0; q = clist + q->pred)
799 		J[q->x + pref] = q->y + pref;
800 }
801 
802 /*
803  * Check does double duty:
804  *  1.	ferret out any fortuitous correspondences due
805  *	to confounding by hashing (which result in "jackpot")
806  *  2.  collect random access indexes to the two files
807  */
808 static void
809 check(FILE *f1, FILE *f2, int flags)
810 {
811 	int i, j, jackpot, c, d;
812 	long ctold, ctnew;
813 
814 	rewind(f1);
815 	rewind(f2);
816 	j = 1;
817 	ixold[0] = ixnew[0] = 0;
818 	jackpot = 0;
819 	ctold = ctnew = 0;
820 	for (i = 1; i <= len[0]; i++) {
821 		if (J[i] == 0) {
822 			ixold[i] = ctold += skipline(f1);
823 			continue;
824 		}
825 		while (j < J[i]) {
826 			ixnew[j] = ctnew += skipline(f2);
827 			j++;
828 		}
829 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
830 			for (;;) {
831 				c = getc(f1);
832 				d = getc(f2);
833 				/*
834 				 * GNU diff ignores a missing newline
835 				 * in one file for -b or -w.
836 				 */
837 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
838 					if (c == EOF && d == '\n') {
839 						ctnew++;
840 						break;
841 					} else if (c == '\n' && d == EOF) {
842 						ctold++;
843 						break;
844 					}
845 				}
846 				ctold++;
847 				ctnew++;
848 				if (flags & D_STRIPCR) {
849 					if (c == '\r') {
850 						if ((c = getc(f1)) == '\n') {
851 							ctnew++;
852 							break;
853 						}
854 					}
855 					if (d == '\r') {
856 						if ((d = getc(f2)) == '\n') {
857 							ctold++;
858 							break;
859 						}
860 					}
861 				}
862 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
863 				    isspace(d)) {
864 					do {
865 						if (c == '\n')
866 							break;
867 						ctold++;
868 					} while (isspace(c = getc(f1)));
869 					do {
870 						if (d == '\n')
871 							break;
872 						ctnew++;
873 					} while (isspace(d = getc(f2)));
874 				} else if ((flags & D_IGNOREBLANKS)) {
875 					while (isspace(c) && c != '\n') {
876 						c = getc(f1);
877 						ctold++;
878 					}
879 					while (isspace(d) && d != '\n') {
880 						d = getc(f2);
881 						ctnew++;
882 					}
883 				}
884 				if (chrtran[c] != chrtran[d]) {
885 					jackpot++;
886 					J[i] = 0;
887 					if (c != '\n' && c != EOF)
888 						ctold += skipline(f1);
889 					if (d != '\n' && c != EOF)
890 						ctnew += skipline(f2);
891 					break;
892 				}
893 				if (c == '\n' || c == EOF)
894 					break;
895 			}
896 		} else {
897 			for (;;) {
898 				ctold++;
899 				ctnew++;
900 				if ((c = getc(f1)) != (d = getc(f2))) {
901 					/* jackpot++; */
902 					J[i] = 0;
903 					if (c != '\n' && c != EOF)
904 						ctold += skipline(f1);
905 					if (d != '\n' && c != EOF)
906 						ctnew += skipline(f2);
907 					break;
908 				}
909 				if (c == '\n' || c == EOF)
910 					break;
911 			}
912 		}
913 		ixold[i] = ctold;
914 		ixnew[j] = ctnew;
915 		j++;
916 	}
917 	for (; j <= len[1]; j++) {
918 		ixnew[j] = ctnew += skipline(f2);
919 	}
920 	/*
921 	 * if (jackpot)
922 	 *	fprintf(stderr, "jackpot\n");
923 	 */
924 }
925 
926 /* shellsort CACM #201 */
927 static void
928 sort(struct line *a, int n)
929 {
930 	struct line *ai, *aim, w;
931 	int j, m = 0, k;
932 
933 	if (n == 0)
934 		return;
935 	for (j = 1; j <= n; j *= 2)
936 		m = 2 * j - 1;
937 	for (m /= 2; m != 0; m /= 2) {
938 		k = n - m;
939 		for (j = 1; j <= k; j++) {
940 			for (ai = &a[j]; ai > a; ai -= m) {
941 				aim = &ai[m];
942 				if (aim < ai)
943 					break;	/* wraparound */
944 				if (aim->value > ai[0].value ||
945 				    (aim->value == ai[0].value &&
946 					aim->serial > ai[0].serial))
947 					break;
948 				w.value = ai[0].value;
949 				ai[0].value = aim->value;
950 				aim->value = w.value;
951 				w.serial = ai[0].serial;
952 				ai[0].serial = aim->serial;
953 				aim->serial = w.serial;
954 			}
955 		}
956 	}
957 }
958 
959 static void
960 unsort(struct line *f, int l, int *b)
961 {
962 	int *a, i;
963 
964 	a = xcalloc(l + 1, sizeof(*a));
965 	for (i = 1; i <= l; i++)
966 		a[f[i].serial] = f[i].value;
967 	for (i = 1; i <= l; i++)
968 		b[i] = a[i];
969 	free(a);
970 }
971 
972 static int
973 skipline(FILE *f)
974 {
975 	int i, c;
976 
977 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
978 		continue;
979 	return (i);
980 }
981 
982 static void
983 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
984 {
985 	int m, i0, i1, j0, j1;
986 
987 	rewind(f1);
988 	rewind(f2);
989 	m = len[0];
990 	J[0] = 0;
991 	J[m + 1] = len[1] + 1;
992 	if (diff_format != D_EDIT) {
993 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
994 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
995 				i0++;
996 			j0 = J[i0 - 1] + 1;
997 			i1 = i0 - 1;
998 			while (i1 < m && J[i1 + 1] == 0)
999 				i1++;
1000 			j1 = J[i1 + 1] - 1;
1001 			J[i1] = j1;
1002 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
1003 		}
1004 	} else {
1005 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
1006 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1007 				i0--;
1008 			j0 = J[i0 + 1] - 1;
1009 			i1 = i0 + 1;
1010 			while (i1 > 1 && J[i1 - 1] == 0)
1011 				i1--;
1012 			j1 = J[i1 - 1] + 1;
1013 			J[i1] = j1;
1014 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1015 		}
1016 	}
1017 	if (m == 0)
1018 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1019 	if (diff_format == D_IFDEF) {
1020 		for (;;) {
1021 #define	c i0
1022 			if ((c = getc(f1)) == EOF)
1023 				return;
1024 			diff_output("%c", c);
1025 		}
1026 #undef c
1027 	}
1028 	if (anychange != 0) {
1029 		if (diff_format == D_CONTEXT)
1030 			dump_context_vec(f1, f2, flags);
1031 		else if (diff_format == D_UNIFIED)
1032 			dump_unified_vec(f1, f2, flags);
1033 	}
1034 }
1035 
1036 static void
1037 range(int a, int b, const char *separator)
1038 {
1039 	diff_output("%d", a > b ? b : a);
1040 	if (a < b)
1041 		diff_output("%s%d", separator, b);
1042 }
1043 
1044 static void
1045 uni_range(int a, int b)
1046 {
1047 	if (a < b)
1048 		diff_output("%d,%d", a, b - a + 1);
1049 	else if (a == b)
1050 		diff_output("%d", b);
1051 	else
1052 		diff_output("%d,0", b);
1053 }
1054 
1055 static char *
1056 preadline(int fd, size_t rlen, off_t off)
1057 {
1058 	char *line;
1059 	ssize_t nr;
1060 
1061 	line = xmalloc(rlen + 1);
1062 	if ((nr = pread(fd, line, rlen, off)) < 0)
1063 		err(2, "preadline");
1064 	if (nr > 0 && line[nr-1] == '\n')
1065 		nr--;
1066 	line[nr] = '\0';
1067 	return (line);
1068 }
1069 
1070 static int
1071 ignoreline(char *line)
1072 {
1073 	int ret;
1074 
1075 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1076 	free(line);
1077 	return (ret == 0);	/* if it matched, it should be ignored. */
1078 }
1079 
1080 /*
1081  * Indicate that there is a difference between lines a and b of the from file
1082  * to get to lines c to d of the to file.  If a is greater then b then there
1083  * are no lines in the from file involved and this means that there were
1084  * lines appended (beginning at b).  If c is greater than d then there are
1085  * lines missing from the to file.
1086  */
1087 static void
1088 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1089     int *pflags)
1090 {
1091 	static size_t max_context = 64;
1092 	int i;
1093 
1094 restart:
1095 	if (diff_format != D_IFDEF && a > b && c > d)
1096 		return;
1097 	if (ignore_pats != NULL) {
1098 		char *line;
1099 		/*
1100 		 * All lines in the change, insert, or delete must
1101 		 * match an ignore pattern for the change to be
1102 		 * ignored.
1103 		 */
1104 		if (a <= b) {		/* Changes and deletes. */
1105 			for (i = a; i <= b; i++) {
1106 				line = preadline(fileno(f1),
1107 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1108 				if (!ignoreline(line))
1109 					goto proceed;
1110 			}
1111 		}
1112 		if (a > b || c <= d) {	/* Changes and inserts. */
1113 			for (i = c; i <= d; i++) {
1114 				line = preadline(fileno(f2),
1115 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1116 				if (!ignoreline(line))
1117 					goto proceed;
1118 			}
1119 		}
1120 		return;
1121 	}
1122 proceed:
1123 	if (*pflags & D_HEADER) {
1124 		diff_output("%s %s %s\n", diffargs, file1, file2);
1125 		*pflags &= ~D_HEADER;
1126 	}
1127 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1128 		/*
1129 		 * Allocate change records as needed.
1130 		 */
1131 		if (context_vec_ptr == context_vec_end - 1) {
1132 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1133 			max_context <<= 1;
1134 			context_vec_start = xreallocarray(context_vec_start,
1135 			    max_context, sizeof(*context_vec_start));
1136 			context_vec_end = context_vec_start + max_context;
1137 			context_vec_ptr = context_vec_start + offset;
1138 		}
1139 		if (anychange == 0) {
1140 			/*
1141 			 * Print the context/unidiff header first time through.
1142 			 */
1143 			print_header(file1, file2);
1144 			anychange = 1;
1145 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1146 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1147 			/*
1148 			 * If this change is more than 'diff_context' lines from the
1149 			 * previous change, dump the record and reset it.
1150 			 */
1151 			if (diff_format == D_CONTEXT)
1152 				dump_context_vec(f1, f2, *pflags);
1153 			else
1154 				dump_unified_vec(f1, f2, *pflags);
1155 		}
1156 		context_vec_ptr++;
1157 		context_vec_ptr->a = a;
1158 		context_vec_ptr->b = b;
1159 		context_vec_ptr->c = c;
1160 		context_vec_ptr->d = d;
1161 		return;
1162 	}
1163 	if (anychange == 0)
1164 		anychange = 1;
1165 	switch (diff_format) {
1166 	case D_BRIEF:
1167 		return;
1168 	case D_NORMAL:
1169 	case D_EDIT:
1170 		range(a, b, ",");
1171 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1172 		if (diff_format == D_NORMAL)
1173 			range(c, d, ",");
1174 		diff_output("\n");
1175 		break;
1176 	case D_REVERSE:
1177 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1178 		range(a, b, " ");
1179 		diff_output("\n");
1180 		break;
1181 	case D_NREVERSE:
1182 		if (a > b)
1183 			diff_output("a%d %d\n", b, d - c + 1);
1184 		else {
1185 			diff_output("d%d %d\n", a, b - a + 1);
1186 			if (!(c > d))
1187 				/* add changed lines */
1188 				diff_output("a%d %d\n", b, d - c + 1);
1189 		}
1190 		break;
1191 	}
1192 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1193 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1194 		if (a <= b && c <= d && diff_format == D_NORMAL)
1195 			diff_output("---\n");
1196 	}
1197 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1198 	if (i != 0 && diff_format == D_EDIT) {
1199 		/*
1200 		 * A non-zero return value for D_EDIT indicates that the
1201 		 * last line printed was a bare dot (".") that has been
1202 		 * escaped as ".." to prevent ed(1) from misinterpreting
1203 		 * it.  We have to add a substitute command to change this
1204 		 * back and restart where we left off.
1205 		 */
1206 		diff_output(".\n");
1207 		diff_output("%ds/.//\n", a + i - 1);
1208 		b = a + i - 1;
1209 		a = b + 1;
1210 		c += i;
1211 		goto restart;
1212 	}
1213 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1214 		diff_output(".\n");
1215 	if (inifdef) {
1216 		diff_output("#endif /* %s */\n", ifdefname);
1217 		inifdef = 0;
1218 	}
1219 }
1220 
1221 static int
1222 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1223 {
1224 	int i, j, c, lastc, col, nc;
1225 	int	newcol;
1226 
1227 	/*
1228 	 * When doing #ifdef's, copy down to current line
1229 	 * if this is the first file, so that stuff makes it to output.
1230 	 */
1231 	if (diff_format == D_IFDEF && oldfile) {
1232 		long curpos = ftell(lb);
1233 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1234 		nc = f[a > b ? b : a - 1] - curpos;
1235 		for (i = 0; i < nc; i++)
1236 			diff_output("%c", getc(lb));
1237 	}
1238 	if (a > b)
1239 		return (0);
1240 	if (diff_format == D_IFDEF) {
1241 		if (inifdef) {
1242 			diff_output("#else /* %s%s */\n",
1243 			    oldfile == 1 ? "!" : "", ifdefname);
1244 		} else {
1245 			if (oldfile)
1246 				diff_output("#ifndef %s\n", ifdefname);
1247 			else
1248 				diff_output("#ifdef %s\n", ifdefname);
1249 		}
1250 		inifdef = 1 + oldfile;
1251 	}
1252 	for (i = a; i <= b; i++) {
1253 		fseek(lb, f[i - 1], SEEK_SET);
1254 		nc = f[i] - f[i - 1];
1255 		if (diff_format != D_IFDEF && ch != '\0') {
1256 			diff_output("%c", ch);
1257 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1258 			    || diff_format == D_UNIFIED))
1259 				diff_output("\t");
1260 			else if (diff_format != D_UNIFIED)
1261 				diff_output(" ");
1262 		}
1263 		col = 0;
1264 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1265 			if ((c = getc(lb)) == EOF) {
1266 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1267 				    diff_format == D_NREVERSE)
1268 					warnx("No newline at end of file");
1269 				else
1270 					diff_output("\n\\ No newline at end of "
1271 					    "file\n");
1272 				return (0);
1273 			}
1274 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1275 				newcol = ((col/tabsize)+1)*tabsize;
1276 				do {
1277 					diff_output(" ");
1278 				} while (++col < newcol);
1279 			} else {
1280 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1281 				    && lastc == '.') {
1282 					/*
1283 					 * Don't print a bare "." line
1284 					 * since that will confuse ed(1).
1285 					 * Print ".." instead and return,
1286 					 * giving the caller an offset
1287 					 * from which to restart.
1288 					 */
1289 					diff_output(".\n");
1290 					return (i - a + 1);
1291 				}
1292 				diff_output("%c", c);
1293 				col++;
1294 			}
1295 		}
1296 	}
1297 	return (0);
1298 }
1299 
1300 /*
1301  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1302  */
1303 static int
1304 readhash(FILE *f, int flags)
1305 {
1306 	int i, t, space;
1307 	int sum;
1308 
1309 	sum = 1;
1310 	space = 0;
1311 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1312 		if (flags & D_IGNORECASE)
1313 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1314 				if (flags & D_STRIPCR && t == '\r') {
1315 					t = getc(f);
1316 					if (t == '\n')
1317 						break;
1318 					ungetc(t, f);
1319 				}
1320 				if (t == EOF) {
1321 					if (i == 0)
1322 						return (0);
1323 					break;
1324 				}
1325 				sum = sum * 127 + chrtran[t];
1326 			}
1327 		else
1328 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1329 				if (flags & D_STRIPCR && t == '\r') {
1330 					t = getc(f);
1331 					if (t == '\n')
1332 						break;
1333 					ungetc(t, f);
1334 				}
1335 				if (t == EOF) {
1336 					if (i == 0)
1337 						return (0);
1338 					break;
1339 				}
1340 				sum = sum * 127 + t;
1341 			}
1342 	} else {
1343 		for (i = 0;;) {
1344 			switch (t = getc(f)) {
1345 			case '\r':
1346 			case '\t':
1347 			case '\v':
1348 			case '\f':
1349 			case ' ':
1350 				space++;
1351 				continue;
1352 			default:
1353 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1354 					i++;
1355 					space = 0;
1356 				}
1357 				sum = sum * 127 + chrtran[t];
1358 				i++;
1359 				continue;
1360 			case EOF:
1361 				if (i == 0)
1362 					return (0);
1363 				/* FALLTHROUGH */
1364 			case '\n':
1365 				break;
1366 			}
1367 			break;
1368 		}
1369 	}
1370 	/*
1371 	 * There is a remote possibility that we end up with a zero sum.
1372 	 * Zero is used as an EOF marker, so return 1 instead.
1373 	 */
1374 	return (sum == 0 ? 1 : sum);
1375 }
1376 
1377 static int
1378 asciifile(FILE *f)
1379 {
1380 	unsigned char buf[BUFSIZ];
1381 	size_t cnt;
1382 
1383 	if (f == NULL)
1384 		return (1);
1385 
1386 	rewind(f);
1387 	cnt = fread(buf, 1, sizeof(buf), f);
1388 	return (memchr(buf, '\0', cnt) == NULL);
1389 }
1390 
1391 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1392 
1393 static char *
1394 match_function(const long *f, int pos, FILE *fp)
1395 {
1396 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1397 	size_t nc;
1398 	int last = lastline;
1399 	const char *state = NULL;
1400 
1401 	lastline = pos;
1402 	while (pos > last) {
1403 		fseek(fp, f[pos - 1], SEEK_SET);
1404 		nc = f[pos] - f[pos - 1];
1405 		if (nc >= sizeof(buf))
1406 			nc = sizeof(buf) - 1;
1407 		nc = fread(buf, 1, nc, fp);
1408 		if (nc > 0) {
1409 			buf[nc] = '\0';
1410 			buf[strcspn(buf, "\n")] = '\0';
1411 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1412 				if (begins_with(buf, "private:")) {
1413 					if (!state)
1414 						state = " (private)";
1415 				} else if (begins_with(buf, "protected:")) {
1416 					if (!state)
1417 						state = " (protected)";
1418 				} else if (begins_with(buf, "public:")) {
1419 					if (!state)
1420 						state = " (public)";
1421 				} else {
1422 					strlcpy(lastbuf, buf, sizeof lastbuf);
1423 					if (state)
1424 						strlcat(lastbuf, state,
1425 						    sizeof lastbuf);
1426 					lastmatchline = pos;
1427 					return lastbuf;
1428 				}
1429 			}
1430 		}
1431 		pos--;
1432 	}
1433 	return lastmatchline > 0 ? lastbuf : NULL;
1434 }
1435 
1436 /* dump accumulated "context" diff changes */
1437 static void
1438 dump_context_vec(FILE *f1, FILE *f2, int flags)
1439 {
1440 	struct context_vec *cvp = context_vec_start;
1441 	int lowa, upb, lowc, upd, do_output;
1442 	int a, b, c, d;
1443 	char ch, *f;
1444 
1445 	if (context_vec_start > context_vec_ptr)
1446 		return;
1447 
1448 	b = d = 0;		/* gcc */
1449 	lowa = MAXIMUM(1, cvp->a - diff_context);
1450 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1451 	lowc = MAXIMUM(1, cvp->c - diff_context);
1452 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1453 
1454 	diff_output("***************");
1455 	if ((flags & D_PROTOTYPE)) {
1456 		f = match_function(ixold, lowa-1, f1);
1457 		if (f != NULL)
1458 			diff_output(" %s", f);
1459 	}
1460 	diff_output("\n*** ");
1461 	range(lowa, upb, ",");
1462 	diff_output(" ****\n");
1463 
1464 	/*
1465 	 * Output changes to the "old" file.  The first loop suppresses
1466 	 * output if there were no changes to the "old" file (we'll see
1467 	 * the "old" lines as context in the "new" list).
1468 	 */
1469 	do_output = 0;
1470 	for (; cvp <= context_vec_ptr; cvp++)
1471 		if (cvp->a <= cvp->b) {
1472 			cvp = context_vec_start;
1473 			do_output++;
1474 			break;
1475 		}
1476 	if (do_output) {
1477 		while (cvp <= context_vec_ptr) {
1478 			a = cvp->a;
1479 			b = cvp->b;
1480 			c = cvp->c;
1481 			d = cvp->d;
1482 
1483 			if (a <= b && c <= d)
1484 				ch = 'c';
1485 			else
1486 				ch = (a <= b) ? 'd' : 'a';
1487 
1488 			if (ch == 'a')
1489 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1490 			else {
1491 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1492 				fetch(ixold, a, b, f1,
1493 				    ch == 'c' ? '!' : '-', 0, flags);
1494 			}
1495 			lowa = b + 1;
1496 			cvp++;
1497 		}
1498 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1499 	}
1500 	/* output changes to the "new" file */
1501 	diff_output("--- ");
1502 	range(lowc, upd, ",");
1503 	diff_output(" ----\n");
1504 
1505 	do_output = 0;
1506 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1507 		if (cvp->c <= cvp->d) {
1508 			cvp = context_vec_start;
1509 			do_output++;
1510 			break;
1511 		}
1512 	if (do_output) {
1513 		while (cvp <= context_vec_ptr) {
1514 			a = cvp->a;
1515 			b = cvp->b;
1516 			c = cvp->c;
1517 			d = cvp->d;
1518 
1519 			if (a <= b && c <= d)
1520 				ch = 'c';
1521 			else
1522 				ch = (a <= b) ? 'd' : 'a';
1523 
1524 			if (ch == 'd')
1525 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1526 			else {
1527 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1528 				fetch(ixnew, c, d, f2,
1529 				    ch == 'c' ? '!' : '+', 0, flags);
1530 			}
1531 			lowc = d + 1;
1532 			cvp++;
1533 		}
1534 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1535 	}
1536 	context_vec_ptr = context_vec_start - 1;
1537 }
1538 
1539 /* dump accumulated "unified" diff changes */
1540 static void
1541 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1542 {
1543 	struct context_vec *cvp = context_vec_start;
1544 	int lowa, upb, lowc, upd;
1545 	int a, b, c, d;
1546 	char ch, *f;
1547 
1548 	if (context_vec_start > context_vec_ptr)
1549 		return;
1550 
1551 	b = d = 0;		/* gcc */
1552 	lowa = MAXIMUM(1, cvp->a - diff_context);
1553 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1554 	lowc = MAXIMUM(1, cvp->c - diff_context);
1555 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1556 
1557 	diff_output("@@ -");
1558 	uni_range(lowa, upb);
1559 	diff_output(" +");
1560 	uni_range(lowc, upd);
1561 	diff_output(" @@");
1562 	if ((flags & D_PROTOTYPE)) {
1563 		f = match_function(ixold, lowa-1, f1);
1564 		if (f != NULL)
1565 			diff_output(" %s", f);
1566 	}
1567 	diff_output("\n");
1568 
1569 	/*
1570 	 * Output changes in "unified" diff format--the old and new lines
1571 	 * are printed together.
1572 	 */
1573 	for (; cvp <= context_vec_ptr; cvp++) {
1574 		a = cvp->a;
1575 		b = cvp->b;
1576 		c = cvp->c;
1577 		d = cvp->d;
1578 
1579 		/*
1580 		 * c: both new and old changes
1581 		 * d: only changes in the old file
1582 		 * a: only changes in the new file
1583 		 */
1584 		if (a <= b && c <= d)
1585 			ch = 'c';
1586 		else
1587 			ch = (a <= b) ? 'd' : 'a';
1588 
1589 		switch (ch) {
1590 		case 'c':
1591 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1592 			fetch(ixold, a, b, f1, '-', 0, flags);
1593 			fetch(ixnew, c, d, f2, '+', 0, flags);
1594 			break;
1595 		case 'd':
1596 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1597 			fetch(ixold, a, b, f1, '-', 0, flags);
1598 			break;
1599 		case 'a':
1600 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1601 			fetch(ixnew, c, d, f2, '+', 0, flags);
1602 			break;
1603 		}
1604 		lowa = b + 1;
1605 		lowc = d + 1;
1606 	}
1607 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1608 
1609 	context_vec_ptr = context_vec_start - 1;
1610 }
1611 
1612 static void
1613 print_header(const char *file1, const char *file2)
1614 {
1615 	const char *time_format;
1616 	char buf1[256];
1617 	char buf2[256];
1618 	char end1[10];
1619 	char end2[10];
1620 	struct tm *tm_ptr1, *tm_ptr2;
1621 	int nsec1 = TIMESPEC_NS (stb1.st_mtime);
1622 	int nsec2 = TIMESPEC_NS (stb2.st_mtime);
1623 
1624 #ifdef ST_MTIM_NSEC
1625 		time_format = "%Y-%m-%d %H:%M:%S.%N";
1626 #else
1627 		time_format = "%Y-%m-%d %H:%M:%S";
1628 #endif
1629 
1630 	if (cflag)
1631 		time_format = "%c";
1632 	tm_ptr1 = localtime(&stb1.st_mtime);
1633 	tm_ptr2 = localtime(&stb2.st_mtime);
1634 	strftime(buf1, 256, time_format, tm_ptr1);
1635 	strftime(buf2, 256, time_format, tm_ptr2);
1636 	if (!cflag) {
1637 		strftime(end1, 10, "%z", tm_ptr1);
1638 		strftime(end2, 10, "%z", tm_ptr2);
1639 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1640 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1641 	}
1642 	if (label[0] != NULL)
1643 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1644 		    label[0]);
1645 	else
1646 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1647 		    file1, buf1);
1648 	if (label[1] != NULL)
1649 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1650 		    label[1]);
1651 	else
1652 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1653 		    file2, buf2);
1654 }
1655