xref: /freebsd/usr.bin/diff/diffreg.c (revision f5b9907c8614c435c231ad7aab21d54b74be92ab)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 #ifdef ST_MTIM_NSEC
98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC)
99 #else
100 # define TIMESPEC_NS(timespec) 0
101 #endif
102 
103 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
104 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
105 
106 /*
107  * diff - compare two files.
108  */
109 
110 /*
111  *	Uses an algorithm due to Harold Stone, which finds
112  *	a pair of longest identical subsequences in the two
113  *	files.
114  *
115  *	The major goal is to generate the match vector J.
116  *	J[i] is the index of the line in file1 corresponding
117  *	to line i file0. J[i] = 0 if there is no
118  *	such line in file1.
119  *
120  *	Lines are hashed so as to work in core. All potential
121  *	matches are located by sorting the lines of each file
122  *	on the hash (called ``value''). In particular, this
123  *	collects the equivalence classes in file1 together.
124  *	Subroutine equiv replaces the value of each line in
125  *	file0 by the index of the first element of its
126  *	matching equivalence in (the reordered) file1.
127  *	To save space equiv squeezes file1 into a single
128  *	array member in which the equivalence classes
129  *	are simply concatenated, except that their first
130  *	members are flagged by changing sign.
131  *
132  *	Next the indices that point into member are unsorted into
133  *	array class according to the original order of file0.
134  *
135  *	The cleverness lies in routine stone. This marches
136  *	through the lines of file0, developing a vector klist
137  *	of "k-candidates". At step i a k-candidate is a matched
138  *	pair of lines x,y (x in file0 y in file1) such that
139  *	there is a common subsequence of length k
140  *	between the first i lines of file0 and the first y
141  *	lines of file1, but there is no such subsequence for
142  *	any smaller y. x is the earliest possible mate to y
143  *	that occurs in such a subsequence.
144  *
145  *	Whenever any of the members of the equivalence class of
146  *	lines in file1 matable to a line in file0 has serial number
147  *	less than the y of some k-candidate, that k-candidate
148  *	with the smallest such y is replaced. The new
149  *	k-candidate is chained (via pred) to the current
150  *	k-1 candidate so that the actual subsequence can
151  *	be recovered. When a member has serial number greater
152  *	that the y of all k-candidates, the klist is extended.
153  *	At the end, the longest subsequence is pulled out
154  *	and placed in the array J by unravel
155  *
156  *	With J in hand, the matches there recorded are
157  *	check'ed against reality to assure that no spurious
158  *	matches have crept in due to hashing. If they have,
159  *	they are broken, and "jackpot" is recorded--a harmless
160  *	matter except that a true match for a spuriously
161  *	mated line may now be unnecessarily reported as a change.
162  *
163  *	Much of the complexity of the program comes simply
164  *	from trying to minimize core utilization and
165  *	maximize the range of doable problems by dynamically
166  *	allocating what is needed and reusing what is not.
167  *	The core requirements for problems larger than somewhat
168  *	are (in words) 2*length(file0) + length(file1) +
169  *	3*(number of k-candidates installed),  typically about
170  *	6n words for files of length n.
171  */
172 
173 struct cand {
174 	int	x;
175 	int	y;
176 	int	pred;
177 };
178 
179 static struct line {
180 	int	serial;
181 	int	value;
182 } *file[2];
183 
184 /*
185  * The following struct is used to record change information when
186  * doing a "context" or "unified" diff.  (see routine "change" to
187  * understand the highly mnemonic field names)
188  */
189 struct context_vec {
190 	int	a;		/* start line in old file */
191 	int	b;		/* end line in old file */
192 	int	c;		/* start line in new file */
193 	int	d;		/* end line in new file */
194 };
195 
196 #define	diff_output	printf
197 static FILE	*opentemp(const char *);
198 static void	 output(char *, FILE *, char *, FILE *, int);
199 static void	 check(FILE *, FILE *, int);
200 static void	 range(int, int, const char *);
201 static void	 uni_range(int, int);
202 static void	 dump_context_vec(FILE *, FILE *, int);
203 static void	 dump_unified_vec(FILE *, FILE *, int);
204 static void	 prepare(int, FILE *, size_t, int);
205 static void	 prune(void);
206 static void	 equiv(struct line *, int, struct line *, int, int *);
207 static void	 unravel(int);
208 static void	 unsort(struct line *, int, int *);
209 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
210 static void	 sort(struct line *, int);
211 static void	 print_header(const char *, const char *);
212 static int	 ignoreline(char *);
213 static int	 asciifile(FILE *);
214 static int	 fetch(long *, int, int, FILE *, int, int, int);
215 static int	 newcand(int, int, int);
216 static int	 search(int *, int, int);
217 static int	 skipline(FILE *);
218 static int	 isqrt(int);
219 static int	 stone(int *, int, int *, int *, int);
220 static int	 readhash(FILE *, int);
221 static int	 files_differ(FILE *, FILE *, int);
222 static char	*match_function(const long *, int, FILE *);
223 static char	*preadline(int, size_t, off_t);
224 
225 static int  *J;			/* will be overlaid on class */
226 static int  *class;		/* will be overlaid on file[0] */
227 static int  *klist;		/* will be overlaid on file[0] after class */
228 static int  *member;		/* will be overlaid on file[1] */
229 static int   clen;
230 static int   inifdef;		/* whether or not we are in a #ifdef block */
231 static int   len[2];
232 static int   pref, suff;	/* length of prefix and suffix */
233 static int   slen[2];
234 static int   anychange;
235 static long *ixnew;		/* will be overlaid on file[1] */
236 static long *ixold;		/* will be overlaid on klist */
237 static struct cand *clist;	/* merely a free storage pot for candidates */
238 static int   clistlen;		/* the length of clist */
239 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
240 static u_char *chrtran;		/* translation table for case-folding */
241 static struct context_vec *context_vec_start;
242 static struct context_vec *context_vec_end;
243 static struct context_vec *context_vec_ptr;
244 
245 #define FUNCTION_CONTEXT_SIZE	55
246 static char lastbuf[FUNCTION_CONTEXT_SIZE];
247 static int lastline;
248 static int lastmatchline;
249 
250 
251 /*
252  * chrtran points to one of 2 translation tables: cup2low if folding upper to
253  * lower case clow2low if not folding case
254  */
255 static u_char clow2low[256] = {
256 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
257 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
258 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
259 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
260 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
261 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
262 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
263 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
264 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
265 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
266 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
267 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
268 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
269 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
270 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
271 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
272 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
273 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
274 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
275 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
276 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
277 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
278 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
279 	0xfd, 0xfe, 0xff
280 };
281 
282 static u_char cup2low[256] = {
283 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
284 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
285 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
286 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
287 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
288 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
289 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
290 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
291 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
292 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
293 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
294 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
295 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
296 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
297 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
298 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
299 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
300 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
301 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
302 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
303 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
304 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
305 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
306 	0xfd, 0xfe, 0xff
307 };
308 
309 int
310 diffreg(char *file1, char *file2, int flags, int capsicum)
311 {
312 	FILE *f1, *f2;
313 	int i, rval;
314 	int	ostdout = -1;
315 	int pr_pd, kq;
316 	struct kevent *e;
317 	cap_rights_t rights_ro;
318 
319 	e = NULL;
320 	kq = -1;
321 	f1 = f2 = NULL;
322 	rval = D_SAME;
323 	anychange = 0;
324 	lastline = 0;
325 	lastmatchline = 0;
326 	context_vec_ptr = context_vec_start - 1;
327 	if (flags & D_IGNORECASE)
328 		chrtran = cup2low;
329 	else
330 		chrtran = clow2low;
331 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
332 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
333 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
334 		goto closem;
335 
336 	if (flags & D_EMPTY1)
337 		f1 = fopen(_PATH_DEVNULL, "r");
338 	else {
339 		if (!S_ISREG(stb1.st_mode)) {
340 			if ((f1 = opentemp(file1)) == NULL ||
341 			    fstat(fileno(f1), &stb1) < 0) {
342 				warn("%s", file1);
343 				status |= 2;
344 				goto closem;
345 			}
346 		} else if (strcmp(file1, "-") == 0)
347 			f1 = stdin;
348 		else
349 			f1 = fopen(file1, "r");
350 	}
351 	if (f1 == NULL) {
352 		warn("%s", file1);
353 		status |= 2;
354 		goto closem;
355 	}
356 
357 	if (flags & D_EMPTY2)
358 		f2 = fopen(_PATH_DEVNULL, "r");
359 	else {
360 		if (!S_ISREG(stb2.st_mode)) {
361 			if ((f2 = opentemp(file2)) == NULL ||
362 			    fstat(fileno(f2), &stb2) < 0) {
363 				warn("%s", file2);
364 				status |= 2;
365 				goto closem;
366 			}
367 		} else if (strcmp(file2, "-") == 0)
368 			f2 = stdin;
369 		else
370 			f2 = fopen(file2, "r");
371 	}
372 	if (f2 == NULL) {
373 		warn("%s", file2);
374 		status |= 2;
375 		goto closem;
376 	}
377 
378 	if (lflag) {
379 		/* redirect stdout to pr */
380 		int	 pfd[2];
381 		pid_t	pid;
382 		char	*header;
383 
384 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
385 		signal(SIGPIPE, SIG_IGN);
386 		fflush(stdout);
387 		rewind(stdout);
388 		pipe(pfd);
389 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
390 		case -1:
391 			status |= 2;
392 			free(header);
393 			err(2, "No more processes");
394 		case 0:
395 			/* child */
396 			if (pfd[0] != STDIN_FILENO) {
397 				dup2(pfd[0], STDIN_FILENO);
398 				close(pfd[0]);
399 			}
400 			close(pfd[1]);
401 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
402 			_exit(127);
403 		default:
404 
405 			/* parent */
406 			if (pfd[1] != STDOUT_FILENO) {
407 				ostdout = dup(STDOUT_FILENO);
408 				dup2(pfd[1], STDOUT_FILENO);
409 				close(pfd[1]);
410 			}
411 			close(pfd[0]);
412 			rewind(stdout);
413 			free(header);
414 			kq = kqueue();
415 			if (kq == -1)
416 				err(2, "kqueue");
417 			e = xmalloc(sizeof(struct kevent));
418 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
419 			    NULL);
420 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
421 				err(2, "kevent");
422 		}
423 	}
424 
425 	if (capsicum) {
426 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
427 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
428 		    && errno != ENOSYS)
429 			err(2, "unable to limit rights on: %s", file1);
430 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
431 		    errno != ENOSYS)
432 			err(2, "unable to limit rights on: %s", file2);
433 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
434 			/* stding has already been limited */
435 			if (caph_limit_stderr() == -1)
436 				err(2, "unable to limit stderr");
437 			if (caph_limit_stdout() == -1)
438 				err(2, "unable to limit stdout");
439 		} else if (caph_limit_stdio() == -1)
440 				err(2, "unable to limit stdio");
441 
442 		caph_cache_catpages();
443 		if (cap_enter() < 0 && errno != ENOSYS)
444 			err(2, "unable to enter capability mode");
445 	}
446 
447 	switch (files_differ(f1, f2, flags)) {
448 	case 0:
449 		goto closem;
450 	case 1:
451 		break;
452 	default:
453 		/* error */
454 		status |= 2;
455 		goto closem;
456 	}
457 
458 	if ((flags & D_FORCEASCII) == 0 &&
459 	    (!asciifile(f1) || !asciifile(f2))) {
460 		rval = D_BINARY;
461 		status |= 1;
462 		goto closem;
463 	}
464 	prepare(0, f1, stb1.st_size, flags);
465 	prepare(1, f2, stb2.st_size, flags);
466 
467 	prune();
468 	sort(sfile[0], slen[0]);
469 	sort(sfile[1], slen[1]);
470 
471 	member = (int *)file[1];
472 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
473 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
474 
475 	class = (int *)file[0];
476 	unsort(sfile[0], slen[0], class);
477 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
478 
479 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
480 	clen = 0;
481 	clistlen = 100;
482 	clist = xcalloc(clistlen, sizeof(*clist));
483 	i = stone(class, slen[0], member, klist, flags);
484 	free(member);
485 	free(class);
486 
487 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
488 	unravel(klist[i]);
489 	free(clist);
490 	free(klist);
491 
492 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
493 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
494 	check(f1, f2, flags);
495 	output(file1, f1, file2, f2, flags);
496 	if (ostdout != -1 && e != NULL) {
497 		/* close the pipe to pr and restore stdout */
498 		int wstatus;
499 
500 		fflush(stdout);
501 		if (ostdout != STDOUT_FILENO) {
502 			close(STDOUT_FILENO);
503 			dup2(ostdout, STDOUT_FILENO);
504 			close(ostdout);
505 		}
506 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
507 			err(2, "kevent");
508 		wstatus = e[0].data;
509 		close(kq);
510 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
511 			errx(2, "pr exited abnormally");
512 		else if (WIFSIGNALED(wstatus))
513 			errx(2, "pr killed by signal %d",
514 			    WTERMSIG(wstatus));
515 	}
516 
517 closem:
518 	if (anychange) {
519 		status |= 1;
520 		if (rval == D_SAME)
521 			rval = D_DIFFER;
522 	}
523 	if (f1 != NULL)
524 		fclose(f1);
525 	if (f2 != NULL)
526 		fclose(f2);
527 
528 	return (rval);
529 }
530 
531 /*
532  * Check to see if the given files differ.
533  * Returns 0 if they are the same, 1 if different, and -1 on error.
534  * XXX - could use code from cmp(1) [faster]
535  */
536 static int
537 files_differ(FILE *f1, FILE *f2, int flags)
538 {
539 	char buf1[BUFSIZ], buf2[BUFSIZ];
540 	size_t i, j;
541 
542 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
543 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
544 		return (1);
545 	for (;;) {
546 		i = fread(buf1, 1, sizeof(buf1), f1);
547 		j = fread(buf2, 1, sizeof(buf2), f2);
548 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
549 			return (-1);
550 		if (i != j)
551 			return (1);
552 		if (i == 0)
553 			return (0);
554 		if (memcmp(buf1, buf2, i) != 0)
555 			return (1);
556 	}
557 }
558 
559 static FILE *
560 opentemp(const char *f)
561 {
562 	char buf[BUFSIZ], tempfile[PATH_MAX];
563 	ssize_t nread;
564 	int ifd, ofd;
565 
566 	if (strcmp(f, "-") == 0)
567 		ifd = STDIN_FILENO;
568 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
569 		return (NULL);
570 
571 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
572 
573 	if ((ofd = mkstemp(tempfile)) < 0) {
574 		close(ifd);
575 		return (NULL);
576 	}
577 	unlink(tempfile);
578 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
579 		if (write(ofd, buf, nread) != nread) {
580 			close(ifd);
581 			close(ofd);
582 			return (NULL);
583 		}
584 	}
585 	close(ifd);
586 	lseek(ofd, (off_t)0, SEEK_SET);
587 	return (fdopen(ofd, "r"));
588 }
589 
590 char *
591 splice(char *dir, char *path)
592 {
593 	char *tail, *buf;
594 	size_t dirlen;
595 
596 	dirlen = strlen(dir);
597 	while (dirlen != 0 && dir[dirlen - 1] == '/')
598 	    dirlen--;
599 	if ((tail = strrchr(path, '/')) == NULL)
600 		tail = path;
601 	else
602 		tail++;
603 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
604 	return (buf);
605 }
606 
607 static void
608 prepare(int i, FILE *fd, size_t filesize, int flags)
609 {
610 	struct line *p;
611 	int h;
612 	size_t sz, j;
613 
614 	rewind(fd);
615 
616 	sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
617 	if (sz < 100)
618 		sz = 100;
619 
620 	p = xcalloc(sz + 3, sizeof(*p));
621 	for (j = 0; (h = readhash(fd, flags));) {
622 		if (j == sz) {
623 			sz = sz * 3 / 2;
624 			p = xreallocarray(p, sz + 3, sizeof(*p));
625 		}
626 		p[++j].value = h;
627 	}
628 	len[i] = j;
629 	file[i] = p;
630 }
631 
632 static void
633 prune(void)
634 {
635 	int i, j;
636 
637 	for (pref = 0; pref < len[0] && pref < len[1] &&
638 	    file[0][pref + 1].value == file[1][pref + 1].value;
639 	    pref++)
640 		;
641 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
642 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
643 	    suff++)
644 		;
645 	for (j = 0; j < 2; j++) {
646 		sfile[j] = file[j] + pref;
647 		slen[j] = len[j] - pref - suff;
648 		for (i = 0; i <= slen[j]; i++)
649 			sfile[j][i].serial = i;
650 	}
651 }
652 
653 static void
654 equiv(struct line *a, int n, struct line *b, int m, int *c)
655 {
656 	int i, j;
657 
658 	i = j = 1;
659 	while (i <= n && j <= m) {
660 		if (a[i].value < b[j].value)
661 			a[i++].value = 0;
662 		else if (a[i].value == b[j].value)
663 			a[i++].value = j;
664 		else
665 			j++;
666 	}
667 	while (i <= n)
668 		a[i++].value = 0;
669 	b[m + 1].value = 0;
670 	j = 0;
671 	while (++j <= m) {
672 		c[j] = -b[j].serial;
673 		while (b[j + 1].value == b[j].value) {
674 			j++;
675 			c[j] = b[j].serial;
676 		}
677 	}
678 	c[j] = -1;
679 }
680 
681 /* Code taken from ping.c */
682 static int
683 isqrt(int n)
684 {
685 	int y, x = 1;
686 
687 	if (n == 0)
688 		return (0);
689 
690 	do { /* newton was a stinker */
691 		y = x;
692 		x = n / x;
693 		x += y;
694 		x /= 2;
695 	} while ((x - y) > 1 || (x - y) < -1);
696 
697 	return (x);
698 }
699 
700 static int
701 stone(int *a, int n, int *b, int *c, int flags)
702 {
703 	int i, k, y, j, l;
704 	int oldc, tc, oldl, sq;
705 	u_int numtries, bound;
706 
707 	if (flags & D_MINIMAL)
708 		bound = UINT_MAX;
709 	else {
710 		sq = isqrt(n);
711 		bound = MAXIMUM(256, sq);
712 	}
713 
714 	k = 0;
715 	c[0] = newcand(0, 0, 0);
716 	for (i = 1; i <= n; i++) {
717 		j = a[i];
718 		if (j == 0)
719 			continue;
720 		y = -b[j];
721 		oldl = 0;
722 		oldc = c[0];
723 		numtries = 0;
724 		do {
725 			if (y <= clist[oldc].y)
726 				continue;
727 			l = search(c, k, y);
728 			if (l != oldl + 1)
729 				oldc = c[l - 1];
730 			if (l <= k) {
731 				if (clist[c[l]].y <= y)
732 					continue;
733 				tc = c[l];
734 				c[l] = newcand(i, y, oldc);
735 				oldc = tc;
736 				oldl = l;
737 				numtries++;
738 			} else {
739 				c[l] = newcand(i, y, oldc);
740 				k++;
741 				break;
742 			}
743 		} while ((y = b[++j]) > 0 && numtries < bound);
744 	}
745 	return (k);
746 }
747 
748 static int
749 newcand(int x, int y, int pred)
750 {
751 	struct cand *q;
752 
753 	if (clen == clistlen) {
754 		clistlen = clistlen * 11 / 10;
755 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
756 	}
757 	q = clist + clen;
758 	q->x = x;
759 	q->y = y;
760 	q->pred = pred;
761 	return (clen++);
762 }
763 
764 static int
765 search(int *c, int k, int y)
766 {
767 	int i, j, l, t;
768 
769 	if (clist[c[k]].y < y)	/* quick look for typical case */
770 		return (k + 1);
771 	i = 0;
772 	j = k + 1;
773 	for (;;) {
774 		l = (i + j) / 2;
775 		if (l <= i)
776 			break;
777 		t = clist[c[l]].y;
778 		if (t > y)
779 			j = l;
780 		else if (t < y)
781 			i = l;
782 		else
783 			return (l);
784 	}
785 	return (l + 1);
786 }
787 
788 static void
789 unravel(int p)
790 {
791 	struct cand *q;
792 	int i;
793 
794 	for (i = 0; i <= len[0]; i++)
795 		J[i] = i <= pref ? i :
796 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
797 	for (q = clist + p; q->y != 0; q = clist + q->pred)
798 		J[q->x + pref] = q->y + pref;
799 }
800 
801 /*
802  * Check does double duty:
803  *  1.	ferret out any fortuitous correspondences due
804  *	to confounding by hashing (which result in "jackpot")
805  *  2.  collect random access indexes to the two files
806  */
807 static void
808 check(FILE *f1, FILE *f2, int flags)
809 {
810 	int i, j, jackpot, c, d;
811 	long ctold, ctnew;
812 
813 	rewind(f1);
814 	rewind(f2);
815 	j = 1;
816 	ixold[0] = ixnew[0] = 0;
817 	jackpot = 0;
818 	ctold = ctnew = 0;
819 	for (i = 1; i <= len[0]; i++) {
820 		if (J[i] == 0) {
821 			ixold[i] = ctold += skipline(f1);
822 			continue;
823 		}
824 		while (j < J[i]) {
825 			ixnew[j] = ctnew += skipline(f2);
826 			j++;
827 		}
828 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
829 			for (;;) {
830 				c = getc(f1);
831 				d = getc(f2);
832 				/*
833 				 * GNU diff ignores a missing newline
834 				 * in one file for -b or -w.
835 				 */
836 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
837 					if (c == EOF && d == '\n') {
838 						ctnew++;
839 						break;
840 					} else if (c == '\n' && d == EOF) {
841 						ctold++;
842 						break;
843 					}
844 				}
845 				ctold++;
846 				ctnew++;
847 				if (flags & D_STRIPCR) {
848 					if (c == '\r') {
849 						if ((c = getc(f1)) == '\n') {
850 							ctnew++;
851 							break;
852 						}
853 					}
854 					if (d == '\r') {
855 						if ((d = getc(f2)) == '\n') {
856 							ctold++;
857 							break;
858 						}
859 					}
860 				}
861 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
862 				    isspace(d)) {
863 					do {
864 						if (c == '\n')
865 							break;
866 						ctold++;
867 					} while (isspace(c = getc(f1)));
868 					do {
869 						if (d == '\n')
870 							break;
871 						ctnew++;
872 					} while (isspace(d = getc(f2)));
873 				} else if ((flags & D_IGNOREBLANKS)) {
874 					while (isspace(c) && c != '\n') {
875 						c = getc(f1);
876 						ctold++;
877 					}
878 					while (isspace(d) && d != '\n') {
879 						d = getc(f2);
880 						ctnew++;
881 					}
882 				}
883 				if (chrtran[c] != chrtran[d]) {
884 					jackpot++;
885 					J[i] = 0;
886 					if (c != '\n' && c != EOF)
887 						ctold += skipline(f1);
888 					if (d != '\n' && c != EOF)
889 						ctnew += skipline(f2);
890 					break;
891 				}
892 				if (c == '\n' || c == EOF)
893 					break;
894 			}
895 		} else {
896 			for (;;) {
897 				ctold++;
898 				ctnew++;
899 				if ((c = getc(f1)) != (d = getc(f2))) {
900 					/* jackpot++; */
901 					J[i] = 0;
902 					if (c != '\n' && c != EOF)
903 						ctold += skipline(f1);
904 					if (d != '\n' && c != EOF)
905 						ctnew += skipline(f2);
906 					break;
907 				}
908 				if (c == '\n' || c == EOF)
909 					break;
910 			}
911 		}
912 		ixold[i] = ctold;
913 		ixnew[j] = ctnew;
914 		j++;
915 	}
916 	for (; j <= len[1]; j++) {
917 		ixnew[j] = ctnew += skipline(f2);
918 	}
919 	/*
920 	 * if (jackpot)
921 	 *	fprintf(stderr, "jackpot\n");
922 	 */
923 }
924 
925 /* shellsort CACM #201 */
926 static void
927 sort(struct line *a, int n)
928 {
929 	struct line *ai, *aim, w;
930 	int j, m = 0, k;
931 
932 	if (n == 0)
933 		return;
934 	for (j = 1; j <= n; j *= 2)
935 		m = 2 * j - 1;
936 	for (m /= 2; m != 0; m /= 2) {
937 		k = n - m;
938 		for (j = 1; j <= k; j++) {
939 			for (ai = &a[j]; ai > a; ai -= m) {
940 				aim = &ai[m];
941 				if (aim < ai)
942 					break;	/* wraparound */
943 				if (aim->value > ai[0].value ||
944 				    (aim->value == ai[0].value &&
945 					aim->serial > ai[0].serial))
946 					break;
947 				w.value = ai[0].value;
948 				ai[0].value = aim->value;
949 				aim->value = w.value;
950 				w.serial = ai[0].serial;
951 				ai[0].serial = aim->serial;
952 				aim->serial = w.serial;
953 			}
954 		}
955 	}
956 }
957 
958 static void
959 unsort(struct line *f, int l, int *b)
960 {
961 	int *a, i;
962 
963 	a = xcalloc(l + 1, sizeof(*a));
964 	for (i = 1; i <= l; i++)
965 		a[f[i].serial] = f[i].value;
966 	for (i = 1; i <= l; i++)
967 		b[i] = a[i];
968 	free(a);
969 }
970 
971 static int
972 skipline(FILE *f)
973 {
974 	int i, c;
975 
976 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
977 		continue;
978 	return (i);
979 }
980 
981 static void
982 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
983 {
984 	int m, i0, i1, j0, j1;
985 
986 	rewind(f1);
987 	rewind(f2);
988 	m = len[0];
989 	J[0] = 0;
990 	J[m + 1] = len[1] + 1;
991 	if (diff_format != D_EDIT) {
992 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
993 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
994 				i0++;
995 			j0 = J[i0 - 1] + 1;
996 			i1 = i0 - 1;
997 			while (i1 < m && J[i1 + 1] == 0)
998 				i1++;
999 			j1 = J[i1 + 1] - 1;
1000 			J[i1] = j1;
1001 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
1002 		}
1003 	} else {
1004 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
1005 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1006 				i0--;
1007 			j0 = J[i0 + 1] - 1;
1008 			i1 = i0 + 1;
1009 			while (i1 > 1 && J[i1 - 1] == 0)
1010 				i1--;
1011 			j1 = J[i1 - 1] + 1;
1012 			J[i1] = j1;
1013 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1014 		}
1015 	}
1016 	if (m == 0)
1017 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1018 	if (diff_format == D_IFDEF) {
1019 		for (;;) {
1020 #define	c i0
1021 			if ((c = getc(f1)) == EOF)
1022 				return;
1023 			diff_output("%c", c);
1024 		}
1025 #undef c
1026 	}
1027 	if (anychange != 0) {
1028 		if (diff_format == D_CONTEXT)
1029 			dump_context_vec(f1, f2, flags);
1030 		else if (diff_format == D_UNIFIED)
1031 			dump_unified_vec(f1, f2, flags);
1032 	}
1033 }
1034 
1035 static void
1036 range(int a, int b, const char *separator)
1037 {
1038 	diff_output("%d", a > b ? b : a);
1039 	if (a < b)
1040 		diff_output("%s%d", separator, b);
1041 }
1042 
1043 static void
1044 uni_range(int a, int b)
1045 {
1046 	if (a < b)
1047 		diff_output("%d,%d", a, b - a + 1);
1048 	else if (a == b)
1049 		diff_output("%d", b);
1050 	else
1051 		diff_output("%d,0", b);
1052 }
1053 
1054 static char *
1055 preadline(int fd, size_t rlen, off_t off)
1056 {
1057 	char *line;
1058 	ssize_t nr;
1059 
1060 	line = xmalloc(rlen + 1);
1061 	if ((nr = pread(fd, line, rlen, off)) < 0)
1062 		err(2, "preadline");
1063 	if (nr > 0 && line[nr-1] == '\n')
1064 		nr--;
1065 	line[nr] = '\0';
1066 	return (line);
1067 }
1068 
1069 static int
1070 ignoreline(char *line)
1071 {
1072 	int ret;
1073 
1074 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1075 	free(line);
1076 	return (ret == 0);	/* if it matched, it should be ignored. */
1077 }
1078 
1079 /*
1080  * Indicate that there is a difference between lines a and b of the from file
1081  * to get to lines c to d of the to file.  If a is greater then b then there
1082  * are no lines in the from file involved and this means that there were
1083  * lines appended (beginning at b).  If c is greater than d then there are
1084  * lines missing from the to file.
1085  */
1086 static void
1087 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1088     int *pflags)
1089 {
1090 	static size_t max_context = 64;
1091 	int i;
1092 
1093 restart:
1094 	if (diff_format != D_IFDEF && a > b && c > d)
1095 		return;
1096 	if (ignore_pats != NULL) {
1097 		char *line;
1098 		/*
1099 		 * All lines in the change, insert, or delete must
1100 		 * match an ignore pattern for the change to be
1101 		 * ignored.
1102 		 */
1103 		if (a <= b) {		/* Changes and deletes. */
1104 			for (i = a; i <= b; i++) {
1105 				line = preadline(fileno(f1),
1106 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1107 				if (!ignoreline(line))
1108 					goto proceed;
1109 			}
1110 		}
1111 		if (a > b || c <= d) {	/* Changes and inserts. */
1112 			for (i = c; i <= d; i++) {
1113 				line = preadline(fileno(f2),
1114 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1115 				if (!ignoreline(line))
1116 					goto proceed;
1117 			}
1118 		}
1119 		return;
1120 	}
1121 proceed:
1122 	if (*pflags & D_HEADER) {
1123 		diff_output("%s %s %s\n", diffargs, file1, file2);
1124 		*pflags &= ~D_HEADER;
1125 	}
1126 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1127 		/*
1128 		 * Allocate change records as needed.
1129 		 */
1130 		if (context_vec_ptr == context_vec_end - 1) {
1131 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1132 			max_context <<= 1;
1133 			context_vec_start = xreallocarray(context_vec_start,
1134 			    max_context, sizeof(*context_vec_start));
1135 			context_vec_end = context_vec_start + max_context;
1136 			context_vec_ptr = context_vec_start + offset;
1137 		}
1138 		if (anychange == 0) {
1139 			/*
1140 			 * Print the context/unidiff header first time through.
1141 			 */
1142 			print_header(file1, file2);
1143 			anychange = 1;
1144 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1145 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1146 			/*
1147 			 * If this change is more than 'diff_context' lines from the
1148 			 * previous change, dump the record and reset it.
1149 			 */
1150 			if (diff_format == D_CONTEXT)
1151 				dump_context_vec(f1, f2, *pflags);
1152 			else
1153 				dump_unified_vec(f1, f2, *pflags);
1154 		}
1155 		context_vec_ptr++;
1156 		context_vec_ptr->a = a;
1157 		context_vec_ptr->b = b;
1158 		context_vec_ptr->c = c;
1159 		context_vec_ptr->d = d;
1160 		return;
1161 	}
1162 	if (anychange == 0)
1163 		anychange = 1;
1164 	switch (diff_format) {
1165 	case D_BRIEF:
1166 		return;
1167 	case D_NORMAL:
1168 	case D_EDIT:
1169 		range(a, b, ",");
1170 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1171 		if (diff_format == D_NORMAL)
1172 			range(c, d, ",");
1173 		diff_output("\n");
1174 		break;
1175 	case D_REVERSE:
1176 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1177 		range(a, b, " ");
1178 		diff_output("\n");
1179 		break;
1180 	case D_NREVERSE:
1181 		if (a > b)
1182 			diff_output("a%d %d\n", b, d - c + 1);
1183 		else {
1184 			diff_output("d%d %d\n", a, b - a + 1);
1185 			if (!(c > d))
1186 				/* add changed lines */
1187 				diff_output("a%d %d\n", b, d - c + 1);
1188 		}
1189 		break;
1190 	}
1191 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1192 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1193 		if (a <= b && c <= d && diff_format == D_NORMAL)
1194 			diff_output("---\n");
1195 	}
1196 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1197 	if (i != 0 && diff_format == D_EDIT) {
1198 		/*
1199 		 * A non-zero return value for D_EDIT indicates that the
1200 		 * last line printed was a bare dot (".") that has been
1201 		 * escaped as ".." to prevent ed(1) from misinterpreting
1202 		 * it.  We have to add a substitute command to change this
1203 		 * back and restart where we left off.
1204 		 */
1205 		diff_output(".\n");
1206 		diff_output("%ds/.//\n", a + i - 1);
1207 		b = a + i - 1;
1208 		a = b + 1;
1209 		c += i;
1210 		goto restart;
1211 	}
1212 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1213 		diff_output(".\n");
1214 	if (inifdef) {
1215 		diff_output("#endif /* %s */\n", ifdefname);
1216 		inifdef = 0;
1217 	}
1218 }
1219 
1220 static int
1221 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1222 {
1223 	int i, j, c, lastc, col, nc;
1224 	int	newcol;
1225 
1226 	/*
1227 	 * When doing #ifdef's, copy down to current line
1228 	 * if this is the first file, so that stuff makes it to output.
1229 	 */
1230 	if (diff_format == D_IFDEF && oldfile) {
1231 		long curpos = ftell(lb);
1232 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1233 		nc = f[a > b ? b : a - 1] - curpos;
1234 		for (i = 0; i < nc; i++)
1235 			diff_output("%c", getc(lb));
1236 	}
1237 	if (a > b)
1238 		return (0);
1239 	if (diff_format == D_IFDEF) {
1240 		if (inifdef) {
1241 			diff_output("#else /* %s%s */\n",
1242 			    oldfile == 1 ? "!" : "", ifdefname);
1243 		} else {
1244 			if (oldfile)
1245 				diff_output("#ifndef %s\n", ifdefname);
1246 			else
1247 				diff_output("#ifdef %s\n", ifdefname);
1248 		}
1249 		inifdef = 1 + oldfile;
1250 	}
1251 	for (i = a; i <= b; i++) {
1252 		fseek(lb, f[i - 1], SEEK_SET);
1253 		nc = f[i] - f[i - 1];
1254 		if (diff_format != D_IFDEF && ch != '\0') {
1255 			diff_output("%c", ch);
1256 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1257 			    || diff_format == D_UNIFIED))
1258 				diff_output("\t");
1259 			else if (diff_format != D_UNIFIED)
1260 				diff_output(" ");
1261 		}
1262 		col = 0;
1263 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1264 			if ((c = getc(lb)) == EOF) {
1265 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1266 				    diff_format == D_NREVERSE)
1267 					warnx("No newline at end of file");
1268 				else
1269 					diff_output("\n\\ No newline at end of "
1270 					    "file\n");
1271 				return (0);
1272 			}
1273 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1274 				newcol = ((col/tabsize)+1)*tabsize;
1275 				do {
1276 					diff_output(" ");
1277 				} while (++col < newcol);
1278 			} else {
1279 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1280 				    && lastc == '.') {
1281 					/*
1282 					 * Don't print a bare "." line
1283 					 * since that will confuse ed(1).
1284 					 * Print ".." instead and return,
1285 					 * giving the caller an offset
1286 					 * from which to restart.
1287 					 */
1288 					diff_output(".\n");
1289 					return (i - a + 1);
1290 				}
1291 				diff_output("%c", c);
1292 				col++;
1293 			}
1294 		}
1295 	}
1296 	return (0);
1297 }
1298 
1299 /*
1300  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1301  */
1302 static int
1303 readhash(FILE *f, int flags)
1304 {
1305 	int i, t, space;
1306 	int sum;
1307 
1308 	sum = 1;
1309 	space = 0;
1310 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1311 		if (flags & D_IGNORECASE)
1312 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1313 				if (flags & D_STRIPCR && t == '\r') {
1314 					t = getc(f);
1315 					if (t == '\n')
1316 						break;
1317 					ungetc(t, f);
1318 				}
1319 				if (t == EOF) {
1320 					if (i == 0)
1321 						return (0);
1322 					break;
1323 				}
1324 				sum = sum * 127 + chrtran[t];
1325 			}
1326 		else
1327 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1328 				if (flags & D_STRIPCR && t == '\r') {
1329 					t = getc(f);
1330 					if (t == '\n')
1331 						break;
1332 					ungetc(t, f);
1333 				}
1334 				if (t == EOF) {
1335 					if (i == 0)
1336 						return (0);
1337 					break;
1338 				}
1339 				sum = sum * 127 + t;
1340 			}
1341 	} else {
1342 		for (i = 0;;) {
1343 			switch (t = getc(f)) {
1344 			case '\r':
1345 			case '\t':
1346 			case '\v':
1347 			case '\f':
1348 			case ' ':
1349 				space++;
1350 				continue;
1351 			default:
1352 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1353 					i++;
1354 					space = 0;
1355 				}
1356 				sum = sum * 127 + chrtran[t];
1357 				i++;
1358 				continue;
1359 			case EOF:
1360 				if (i == 0)
1361 					return (0);
1362 				/* FALLTHROUGH */
1363 			case '\n':
1364 				break;
1365 			}
1366 			break;
1367 		}
1368 	}
1369 	/*
1370 	 * There is a remote possibility that we end up with a zero sum.
1371 	 * Zero is used as an EOF marker, so return 1 instead.
1372 	 */
1373 	return (sum == 0 ? 1 : sum);
1374 }
1375 
1376 static int
1377 asciifile(FILE *f)
1378 {
1379 	unsigned char buf[BUFSIZ];
1380 	size_t cnt;
1381 
1382 	if (f == NULL)
1383 		return (1);
1384 
1385 	rewind(f);
1386 	cnt = fread(buf, 1, sizeof(buf), f);
1387 	return (memchr(buf, '\0', cnt) == NULL);
1388 }
1389 
1390 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1391 
1392 static char *
1393 match_function(const long *f, int pos, FILE *fp)
1394 {
1395 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1396 	size_t nc;
1397 	int last = lastline;
1398 	const char *state = NULL;
1399 
1400 	lastline = pos;
1401 	while (pos > last) {
1402 		fseek(fp, f[pos - 1], SEEK_SET);
1403 		nc = f[pos] - f[pos - 1];
1404 		if (nc >= sizeof(buf))
1405 			nc = sizeof(buf) - 1;
1406 		nc = fread(buf, 1, nc, fp);
1407 		if (nc > 0) {
1408 			buf[nc] = '\0';
1409 			buf[strcspn(buf, "\n")] = '\0';
1410 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1411 				if (begins_with(buf, "private:")) {
1412 					if (!state)
1413 						state = " (private)";
1414 				} else if (begins_with(buf, "protected:")) {
1415 					if (!state)
1416 						state = " (protected)";
1417 				} else if (begins_with(buf, "public:")) {
1418 					if (!state)
1419 						state = " (public)";
1420 				} else {
1421 					strlcpy(lastbuf, buf, sizeof lastbuf);
1422 					if (state)
1423 						strlcat(lastbuf, state,
1424 						    sizeof lastbuf);
1425 					lastmatchline = pos;
1426 					return lastbuf;
1427 				}
1428 			}
1429 		}
1430 		pos--;
1431 	}
1432 	return lastmatchline > 0 ? lastbuf : NULL;
1433 }
1434 
1435 /* dump accumulated "context" diff changes */
1436 static void
1437 dump_context_vec(FILE *f1, FILE *f2, int flags)
1438 {
1439 	struct context_vec *cvp = context_vec_start;
1440 	int lowa, upb, lowc, upd, do_output;
1441 	int a, b, c, d;
1442 	char ch, *f;
1443 
1444 	if (context_vec_start > context_vec_ptr)
1445 		return;
1446 
1447 	b = d = 0;		/* gcc */
1448 	lowa = MAXIMUM(1, cvp->a - diff_context);
1449 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1450 	lowc = MAXIMUM(1, cvp->c - diff_context);
1451 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1452 
1453 	diff_output("***************");
1454 	if ((flags & D_PROTOTYPE)) {
1455 		f = match_function(ixold, lowa-1, f1);
1456 		if (f != NULL)
1457 			diff_output(" %s", f);
1458 	}
1459 	diff_output("\n*** ");
1460 	range(lowa, upb, ",");
1461 	diff_output(" ****\n");
1462 
1463 	/*
1464 	 * Output changes to the "old" file.  The first loop suppresses
1465 	 * output if there were no changes to the "old" file (we'll see
1466 	 * the "old" lines as context in the "new" list).
1467 	 */
1468 	do_output = 0;
1469 	for (; cvp <= context_vec_ptr; cvp++)
1470 		if (cvp->a <= cvp->b) {
1471 			cvp = context_vec_start;
1472 			do_output++;
1473 			break;
1474 		}
1475 	if (do_output) {
1476 		while (cvp <= context_vec_ptr) {
1477 			a = cvp->a;
1478 			b = cvp->b;
1479 			c = cvp->c;
1480 			d = cvp->d;
1481 
1482 			if (a <= b && c <= d)
1483 				ch = 'c';
1484 			else
1485 				ch = (a <= b) ? 'd' : 'a';
1486 
1487 			if (ch == 'a')
1488 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1489 			else {
1490 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1491 				fetch(ixold, a, b, f1,
1492 				    ch == 'c' ? '!' : '-', 0, flags);
1493 			}
1494 			lowa = b + 1;
1495 			cvp++;
1496 		}
1497 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1498 	}
1499 	/* output changes to the "new" file */
1500 	diff_output("--- ");
1501 	range(lowc, upd, ",");
1502 	diff_output(" ----\n");
1503 
1504 	do_output = 0;
1505 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1506 		if (cvp->c <= cvp->d) {
1507 			cvp = context_vec_start;
1508 			do_output++;
1509 			break;
1510 		}
1511 	if (do_output) {
1512 		while (cvp <= context_vec_ptr) {
1513 			a = cvp->a;
1514 			b = cvp->b;
1515 			c = cvp->c;
1516 			d = cvp->d;
1517 
1518 			if (a <= b && c <= d)
1519 				ch = 'c';
1520 			else
1521 				ch = (a <= b) ? 'd' : 'a';
1522 
1523 			if (ch == 'd')
1524 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1525 			else {
1526 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1527 				fetch(ixnew, c, d, f2,
1528 				    ch == 'c' ? '!' : '+', 0, flags);
1529 			}
1530 			lowc = d + 1;
1531 			cvp++;
1532 		}
1533 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1534 	}
1535 	context_vec_ptr = context_vec_start - 1;
1536 }
1537 
1538 /* dump accumulated "unified" diff changes */
1539 static void
1540 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1541 {
1542 	struct context_vec *cvp = context_vec_start;
1543 	int lowa, upb, lowc, upd;
1544 	int a, b, c, d;
1545 	char ch, *f;
1546 
1547 	if (context_vec_start > context_vec_ptr)
1548 		return;
1549 
1550 	b = d = 0;		/* gcc */
1551 	lowa = MAXIMUM(1, cvp->a - diff_context);
1552 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1553 	lowc = MAXIMUM(1, cvp->c - diff_context);
1554 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1555 
1556 	diff_output("@@ -");
1557 	uni_range(lowa, upb);
1558 	diff_output(" +");
1559 	uni_range(lowc, upd);
1560 	diff_output(" @@");
1561 	if ((flags & D_PROTOTYPE)) {
1562 		f = match_function(ixold, lowa-1, f1);
1563 		if (f != NULL)
1564 			diff_output(" %s", f);
1565 	}
1566 	diff_output("\n");
1567 
1568 	/*
1569 	 * Output changes in "unified" diff format--the old and new lines
1570 	 * are printed together.
1571 	 */
1572 	for (; cvp <= context_vec_ptr; cvp++) {
1573 		a = cvp->a;
1574 		b = cvp->b;
1575 		c = cvp->c;
1576 		d = cvp->d;
1577 
1578 		/*
1579 		 * c: both new and old changes
1580 		 * d: only changes in the old file
1581 		 * a: only changes in the new file
1582 		 */
1583 		if (a <= b && c <= d)
1584 			ch = 'c';
1585 		else
1586 			ch = (a <= b) ? 'd' : 'a';
1587 
1588 		switch (ch) {
1589 		case 'c':
1590 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1591 			fetch(ixold, a, b, f1, '-', 0, flags);
1592 			fetch(ixnew, c, d, f2, '+', 0, flags);
1593 			break;
1594 		case 'd':
1595 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1596 			fetch(ixold, a, b, f1, '-', 0, flags);
1597 			break;
1598 		case 'a':
1599 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1600 			fetch(ixnew, c, d, f2, '+', 0, flags);
1601 			break;
1602 		}
1603 		lowa = b + 1;
1604 		lowc = d + 1;
1605 	}
1606 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1607 
1608 	context_vec_ptr = context_vec_start - 1;
1609 }
1610 
1611 static void
1612 print_header(const char *file1, const char *file2)
1613 {
1614 	const char *time_format;
1615 	char buf1[256];
1616 	char buf2[256];
1617 	char end1[10];
1618 	char end2[10];
1619 	struct tm *tm_ptr1, *tm_ptr2;
1620 	int nsec1 = TIMESPEC_NS (stb1.st_mtime);
1621 	int nsec2 = TIMESPEC_NS (stb2.st_mtime);
1622 
1623 #ifdef ST_MTIM_NSEC
1624 		time_format = "%Y-%m-%d %H:%M:%S.%N";
1625 #else
1626 		time_format = "%Y-%m-%d %H:%M:%S";
1627 #endif
1628 
1629 	if (cflag)
1630 		time_format = "%c";
1631 	tm_ptr1 = localtime(&stb1.st_mtime);
1632 	tm_ptr2 = localtime(&stb2.st_mtime);
1633 	strftime(buf1, 256, time_format, tm_ptr1);
1634 	strftime(buf2, 256, time_format, tm_ptr2);
1635 	if (!cflag) {
1636 		strftime(end1, 10, "%z", tm_ptr1);
1637 		strftime(end2, 10, "%z", tm_ptr2);
1638 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1639 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1640 	}
1641 	if (label[0] != NULL)
1642 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1643 		    label[0]);
1644 	else
1645 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1646 		    file1, buf1);
1647 	if (label[1] != NULL)
1648 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1649 		    label[1]);
1650 	else
1651 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1652 		    file2, buf2);
1653 }
1654