xref: /freebsd/usr.bin/diff/diffreg.c (revision a3422d96bd4c08d07bb6c1984c86578b67ee6a41)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <regex.h>
84 #include <stddef.h>
85 #include <stdint.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <unistd.h>
90 #include <limits.h>
91 #include <signal.h>
92 
93 #include "diff.h"
94 #include "xmalloc.h"
95 
96 #define _PATH_PR "/usr/bin/pr"
97 
98 /*
99  * diff - compare two files.
100  */
101 
102 /*
103  *	Uses an algorithm due to Harold Stone, which finds
104  *	a pair of longest identical subsequences in the two
105  *	files.
106  *
107  *	The major goal is to generate the match vector J.
108  *	J[i] is the index of the line in file1 corresponding
109  *	to line i file0. J[i] = 0 if there is no
110  *	such line in file1.
111  *
112  *	Lines are hashed so as to work in core. All potential
113  *	matches are located by sorting the lines of each file
114  *	on the hash (called ``value''). In particular, this
115  *	collects the equivalence classes in file1 together.
116  *	Subroutine equiv replaces the value of each line in
117  *	file0 by the index of the first element of its
118  *	matching equivalence in (the reordered) file1.
119  *	To save space equiv squeezes file1 into a single
120  *	array member in which the equivalence classes
121  *	are simply concatenated, except that their first
122  *	members are flagged by changing sign.
123  *
124  *	Next the indices that point into member are unsorted into
125  *	array class according to the original order of file0.
126  *
127  *	The cleverness lies in routine stone. This marches
128  *	through the lines of file0, developing a vector klist
129  *	of "k-candidates". At step i a k-candidate is a matched
130  *	pair of lines x,y (x in file0 y in file1) such that
131  *	there is a common subsequence of length k
132  *	between the first i lines of file0 and the first y
133  *	lines of file1, but there is no such subsequence for
134  *	any smaller y. x is the earliest possible mate to y
135  *	that occurs in such a subsequence.
136  *
137  *	Whenever any of the members of the equivalence class of
138  *	lines in file1 matable to a line in file0 has serial number
139  *	less than the y of some k-candidate, that k-candidate
140  *	with the smallest such y is replaced. The new
141  *	k-candidate is chained (via pred) to the current
142  *	k-1 candidate so that the actual subsequence can
143  *	be recovered. When a member has serial number greater
144  *	that the y of all k-candidates, the klist is extended.
145  *	At the end, the longest subsequence is pulled out
146  *	and placed in the array J by unravel
147  *
148  *	With J in hand, the matches there recorded are
149  *	check'ed against reality to assure that no spurious
150  *	matches have crept in due to hashing. If they have,
151  *	they are broken, and "jackpot" is recorded--a harmless
152  *	matter except that a true match for a spuriously
153  *	mated line may now be unnecessarily reported as a change.
154  *
155  *	Much of the complexity of the program comes simply
156  *	from trying to minimize core utilization and
157  *	maximize the range of doable problems by dynamically
158  *	allocating what is needed and reusing what is not.
159  *	The core requirements for problems larger than somewhat
160  *	are (in words) 2*length(file0) + length(file1) +
161  *	3*(number of k-candidates installed),  typically about
162  *	6n words for files of length n.
163  */
164 
165 struct cand {
166 	int	x;
167 	int	y;
168 	int	pred;
169 };
170 
171 static struct line {
172 	int	serial;
173 	int	value;
174 } *file[2];
175 
176 /*
177  * The following struct is used to record change information when
178  * doing a "context" or "unified" diff.  (see routine "change" to
179  * understand the highly mnemonic field names)
180  */
181 struct context_vec {
182 	int	a;		/* start line in old file */
183 	int	b;		/* end line in old file */
184 	int	c;		/* start line in new file */
185 	int	d;		/* end line in new file */
186 };
187 
188 #define	diff_output	printf
189 static FILE	*opentemp(const char *);
190 static void	 output(char *, FILE *, char *, FILE *, int);
191 static void	 check(FILE *, FILE *, int);
192 static void	 range(int, int, const char *);
193 static void	 uni_range(int, int);
194 static void	 dump_context_vec(FILE *, FILE *, int);
195 static void	 dump_unified_vec(FILE *, FILE *, int);
196 static void	 prepare(int, FILE *, size_t, int);
197 static void	 prune(void);
198 static void	 equiv(struct line *, int, struct line *, int, int *);
199 static void	 unravel(int);
200 static void	 unsort(struct line *, int, int *);
201 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
202 static void	 sort(struct line *, int);
203 static void	 print_header(const char *, const char *);
204 static int	 ignoreline(char *);
205 static int	 asciifile(FILE *);
206 static int	 fetch(long *, int, int, FILE *, int, int, int);
207 static int	 newcand(int, int, int);
208 static int	 search(int *, int, int);
209 static int	 skipline(FILE *);
210 static int	 isqrt(int);
211 static int	 stone(int *, int, int *, int *, int);
212 static int	 readhash(FILE *, int);
213 static int	 files_differ(FILE *, FILE *, int);
214 static char	*match_function(const long *, int, FILE *);
215 static char	*preadline(int, size_t, off_t);
216 
217 static int  *J;			/* will be overlaid on class */
218 static int  *class;		/* will be overlaid on file[0] */
219 static int  *klist;		/* will be overlaid on file[0] after class */
220 static int  *member;		/* will be overlaid on file[1] */
221 static int   clen;
222 static int   inifdef;		/* whether or not we are in a #ifdef block */
223 static int   len[2];
224 static int   pref, suff;	/* length of prefix and suffix */
225 static int   slen[2];
226 static int   anychange;
227 static long *ixnew;		/* will be overlaid on file[1] */
228 static long *ixold;		/* will be overlaid on klist */
229 static struct cand *clist;	/* merely a free storage pot for candidates */
230 static int   clistlen;		/* the length of clist */
231 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
232 static u_char *chrtran;		/* translation table for case-folding */
233 static struct context_vec *context_vec_start;
234 static struct context_vec *context_vec_end;
235 static struct context_vec *context_vec_ptr;
236 
237 #define FUNCTION_CONTEXT_SIZE	55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
239 static int lastline;
240 static int lastmatchline;
241 
242 
243 /*
244  * chrtran points to one of 2 translation tables: cup2low if folding upper to
245  * lower case clow2low if not folding case
246  */
247 static u_char clow2low[256] = {
248 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
249 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
250 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
251 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
252 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
253 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
254 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
255 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
256 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
257 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
258 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
259 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
260 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
261 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
262 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
263 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
264 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
265 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
266 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
267 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
268 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
269 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
270 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
271 	0xfd, 0xfe, 0xff
272 };
273 
274 static u_char cup2low[256] = {
275 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
276 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
277 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
278 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
279 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
280 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
281 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
282 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
283 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
284 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
285 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
286 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
287 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
288 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
289 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
290 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
291 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
292 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
293 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
294 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
295 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
296 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
297 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
298 	0xfd, 0xfe, 0xff
299 };
300 
301 int
302 diffreg(char *file1, char *file2, int flags, int capsicum)
303 {
304 	FILE *f1, *f2;
305 	int i, rval;
306 	int	ostdout = -1;
307 	int pr_pd, kq;
308 	struct kevent *e;
309 	cap_rights_t rights_ro;
310 
311 	e = NULL;
312 	kq = -1;
313 	f1 = f2 = NULL;
314 	rval = D_SAME;
315 	anychange = 0;
316 	lastline = 0;
317 	lastmatchline = 0;
318 	context_vec_ptr = context_vec_start - 1;
319 	if (flags & D_IGNORECASE)
320 		chrtran = cup2low;
321 	else
322 		chrtran = clow2low;
323 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
326 		goto closem;
327 
328 	if (flags & D_EMPTY1)
329 		f1 = fopen(_PATH_DEVNULL, "r");
330 	else {
331 		if (!S_ISREG(stb1.st_mode)) {
332 			if ((f1 = opentemp(file1)) == NULL ||
333 			    fstat(fileno(f1), &stb1) < 0) {
334 				warn("%s", file1);
335 				status |= 2;
336 				goto closem;
337 			}
338 		} else if (strcmp(file1, "-") == 0)
339 			f1 = stdin;
340 		else
341 			f1 = fopen(file1, "r");
342 	}
343 	if (f1 == NULL) {
344 		warn("%s", file1);
345 		status |= 2;
346 		goto closem;
347 	}
348 
349 	if (flags & D_EMPTY2)
350 		f2 = fopen(_PATH_DEVNULL, "r");
351 	else {
352 		if (!S_ISREG(stb2.st_mode)) {
353 			if ((f2 = opentemp(file2)) == NULL ||
354 			    fstat(fileno(f2), &stb2) < 0) {
355 				warn("%s", file2);
356 				status |= 2;
357 				goto closem;
358 			}
359 		} else if (strcmp(file2, "-") == 0)
360 			f2 = stdin;
361 		else
362 			f2 = fopen(file2, "r");
363 	}
364 	if (f2 == NULL) {
365 		warn("%s", file2);
366 		status |= 2;
367 		goto closem;
368 	}
369 
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int	 pfd[2];
373 		pid_t	pid;
374 		char	*header;
375 
376 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		signal(SIGPIPE, SIG_IGN);
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
382 		case -1:
383 			status |= 2;
384 			free(header);
385 			err(2, "No more processes");
386 		case 0:
387 			/* child */
388 			if (pfd[0] != STDIN_FILENO) {
389 				dup2(pfd[0], STDIN_FILENO);
390 				close(pfd[0]);
391 			}
392 			close(pfd[1]);
393 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
394 			_exit(127);
395 		default:
396 
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			free(header);
406 			kq = kqueue();
407 			if (kq == -1)
408 				err(2, "kqueue");
409 			e = xmalloc(sizeof(struct kevent));
410 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
411 			    NULL);
412 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
413 				err(2, "kevent");
414 		}
415 	}
416 
417 	if (capsicum) {
418 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
419 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
420 		    && errno != ENOSYS)
421 			err(2, "unable to limit rights on: %s", file1);
422 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
423 		    errno != ENOSYS)
424 			err(2, "unable to limit rights on: %s", file2);
425 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
426 			/* stding has already been limited */
427 			if (caph_limit_stderr() == -1)
428 				err(2, "unable to limit stderr");
429 			if (caph_limit_stdout() == -1)
430 				err(2, "unable to limit stdout");
431 		} else if (caph_limit_stdio() == -1)
432 				err(2, "unable to limit stdio");
433 
434 		caph_cache_catpages();
435 		caph_cache_tzdata();
436 		if (cap_enter() < 0 && errno != ENOSYS)
437 			err(2, "unable to enter capability mode");
438 	}
439 
440 	switch (files_differ(f1, f2, flags)) {
441 	case 0:
442 		goto closem;
443 	case 1:
444 		break;
445 	default:
446 		/* error */
447 		status |= 2;
448 		goto closem;
449 	}
450 
451 	if ((flags & D_FORCEASCII) == 0 &&
452 	    (!asciifile(f1) || !asciifile(f2))) {
453 		rval = D_BINARY;
454 		status |= 1;
455 		goto closem;
456 	}
457 	prepare(0, f1, stb1.st_size, flags);
458 	prepare(1, f2, stb2.st_size, flags);
459 
460 	prune();
461 	sort(sfile[0], slen[0]);
462 	sort(sfile[1], slen[1]);
463 
464 	member = (int *)file[1];
465 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
466 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
467 
468 	class = (int *)file[0];
469 	unsort(sfile[0], slen[0], class);
470 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
471 
472 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
473 	clen = 0;
474 	clistlen = 100;
475 	clist = xcalloc(clistlen, sizeof(*clist));
476 	i = stone(class, slen[0], member, klist, flags);
477 	free(member);
478 	free(class);
479 
480 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
481 	unravel(klist[i]);
482 	free(clist);
483 	free(klist);
484 
485 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
486 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
487 	check(f1, f2, flags);
488 	output(file1, f1, file2, f2, flags);
489 	if (ostdout != -1 && e != NULL) {
490 		/* close the pipe to pr and restore stdout */
491 		int wstatus;
492 
493 		fflush(stdout);
494 		if (ostdout != STDOUT_FILENO) {
495 			close(STDOUT_FILENO);
496 			dup2(ostdout, STDOUT_FILENO);
497 			close(ostdout);
498 		}
499 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
500 			err(2, "kevent");
501 		wstatus = e[0].data;
502 		close(kq);
503 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
504 			errx(2, "pr exited abnormally");
505 		else if (WIFSIGNALED(wstatus))
506 			errx(2, "pr killed by signal %d",
507 			    WTERMSIG(wstatus));
508 	}
509 
510 closem:
511 	if (anychange) {
512 		status |= 1;
513 		if (rval == D_SAME)
514 			rval = D_DIFFER;
515 	}
516 	if (f1 != NULL)
517 		fclose(f1);
518 	if (f2 != NULL)
519 		fclose(f2);
520 
521 	return (rval);
522 }
523 
524 /*
525  * Check to see if the given files differ.
526  * Returns 0 if they are the same, 1 if different, and -1 on error.
527  * XXX - could use code from cmp(1) [faster]
528  */
529 static int
530 files_differ(FILE *f1, FILE *f2, int flags)
531 {
532 	char buf1[BUFSIZ], buf2[BUFSIZ];
533 	size_t i, j;
534 
535 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
536 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
537 		return (1);
538 	for (;;) {
539 		i = fread(buf1, 1, sizeof(buf1), f1);
540 		j = fread(buf2, 1, sizeof(buf2), f2);
541 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
542 			return (-1);
543 		if (i != j)
544 			return (1);
545 		if (i == 0)
546 			return (0);
547 		if (memcmp(buf1, buf2, i) != 0)
548 			return (1);
549 	}
550 }
551 
552 static FILE *
553 opentemp(const char *f)
554 {
555 	char buf[BUFSIZ], tempfile[PATH_MAX];
556 	ssize_t nread;
557 	int ifd, ofd;
558 
559 	if (strcmp(f, "-") == 0)
560 		ifd = STDIN_FILENO;
561 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
562 		return (NULL);
563 
564 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
565 
566 	if ((ofd = mkstemp(tempfile)) < 0) {
567 		close(ifd);
568 		return (NULL);
569 	}
570 	unlink(tempfile);
571 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
572 		if (write(ofd, buf, nread) != nread) {
573 			close(ifd);
574 			close(ofd);
575 			return (NULL);
576 		}
577 	}
578 	close(ifd);
579 	lseek(ofd, (off_t)0, SEEK_SET);
580 	return (fdopen(ofd, "r"));
581 }
582 
583 char *
584 splice(char *dir, char *path)
585 {
586 	char *tail, *buf;
587 	size_t dirlen;
588 
589 	dirlen = strlen(dir);
590 	while (dirlen != 0 && dir[dirlen - 1] == '/')
591 	    dirlen--;
592 	if ((tail = strrchr(path, '/')) == NULL)
593 		tail = path;
594 	else
595 		tail++;
596 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
597 	return (buf);
598 }
599 
600 static void
601 prepare(int i, FILE *fd, size_t filesize, int flags)
602 {
603 	struct line *p;
604 	int h;
605 	size_t sz, j;
606 
607 	rewind(fd);
608 
609 	sz = MIN(filesize, SIZE_MAX) / 25;
610 	if (sz < 100)
611 		sz = 100;
612 
613 	p = xcalloc(sz + 3, sizeof(*p));
614 	for (j = 0; (h = readhash(fd, flags));) {
615 		if (j == sz) {
616 			sz = sz * 3 / 2;
617 			p = xreallocarray(p, sz + 3, sizeof(*p));
618 		}
619 		p[++j].value = h;
620 	}
621 	len[i] = j;
622 	file[i] = p;
623 }
624 
625 static void
626 prune(void)
627 {
628 	int i, j;
629 
630 	for (pref = 0; pref < len[0] && pref < len[1] &&
631 	    file[0][pref + 1].value == file[1][pref + 1].value;
632 	    pref++)
633 		;
634 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
635 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
636 	    suff++)
637 		;
638 	for (j = 0; j < 2; j++) {
639 		sfile[j] = file[j] + pref;
640 		slen[j] = len[j] - pref - suff;
641 		for (i = 0; i <= slen[j]; i++)
642 			sfile[j][i].serial = i;
643 	}
644 }
645 
646 static void
647 equiv(struct line *a, int n, struct line *b, int m, int *c)
648 {
649 	int i, j;
650 
651 	i = j = 1;
652 	while (i <= n && j <= m) {
653 		if (a[i].value < b[j].value)
654 			a[i++].value = 0;
655 		else if (a[i].value == b[j].value)
656 			a[i++].value = j;
657 		else
658 			j++;
659 	}
660 	while (i <= n)
661 		a[i++].value = 0;
662 	b[m + 1].value = 0;
663 	j = 0;
664 	while (++j <= m) {
665 		c[j] = -b[j].serial;
666 		while (b[j + 1].value == b[j].value) {
667 			j++;
668 			c[j] = b[j].serial;
669 		}
670 	}
671 	c[j] = -1;
672 }
673 
674 /* Code taken from ping.c */
675 static int
676 isqrt(int n)
677 {
678 	int y, x = 1;
679 
680 	if (n == 0)
681 		return (0);
682 
683 	do { /* newton was a stinker */
684 		y = x;
685 		x = n / x;
686 		x += y;
687 		x /= 2;
688 	} while ((x - y) > 1 || (x - y) < -1);
689 
690 	return (x);
691 }
692 
693 static int
694 stone(int *a, int n, int *b, int *c, int flags)
695 {
696 	int i, k, y, j, l;
697 	int oldc, tc, oldl, sq;
698 	u_int numtries, bound;
699 
700 	if (flags & D_MINIMAL)
701 		bound = UINT_MAX;
702 	else {
703 		sq = isqrt(n);
704 		bound = MAX(256, sq);
705 	}
706 
707 	k = 0;
708 	c[0] = newcand(0, 0, 0);
709 	for (i = 1; i <= n; i++) {
710 		j = a[i];
711 		if (j == 0)
712 			continue;
713 		y = -b[j];
714 		oldl = 0;
715 		oldc = c[0];
716 		numtries = 0;
717 		do {
718 			if (y <= clist[oldc].y)
719 				continue;
720 			l = search(c, k, y);
721 			if (l != oldl + 1)
722 				oldc = c[l - 1];
723 			if (l <= k) {
724 				if (clist[c[l]].y <= y)
725 					continue;
726 				tc = c[l];
727 				c[l] = newcand(i, y, oldc);
728 				oldc = tc;
729 				oldl = l;
730 				numtries++;
731 			} else {
732 				c[l] = newcand(i, y, oldc);
733 				k++;
734 				break;
735 			}
736 		} while ((y = b[++j]) > 0 && numtries < bound);
737 	}
738 	return (k);
739 }
740 
741 static int
742 newcand(int x, int y, int pred)
743 {
744 	struct cand *q;
745 
746 	if (clen == clistlen) {
747 		clistlen = clistlen * 11 / 10;
748 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
749 	}
750 	q = clist + clen;
751 	q->x = x;
752 	q->y = y;
753 	q->pred = pred;
754 	return (clen++);
755 }
756 
757 static int
758 search(int *c, int k, int y)
759 {
760 	int i, j, l, t;
761 
762 	if (clist[c[k]].y < y)	/* quick look for typical case */
763 		return (k + 1);
764 	i = 0;
765 	j = k + 1;
766 	for (;;) {
767 		l = (i + j) / 2;
768 		if (l <= i)
769 			break;
770 		t = clist[c[l]].y;
771 		if (t > y)
772 			j = l;
773 		else if (t < y)
774 			i = l;
775 		else
776 			return (l);
777 	}
778 	return (l + 1);
779 }
780 
781 static void
782 unravel(int p)
783 {
784 	struct cand *q;
785 	int i;
786 
787 	for (i = 0; i <= len[0]; i++)
788 		J[i] = i <= pref ? i :
789 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
790 	for (q = clist + p; q->y != 0; q = clist + q->pred)
791 		J[q->x + pref] = q->y + pref;
792 }
793 
794 /*
795  * Check does double duty:
796  *  1.	ferret out any fortuitous correspondences due
797  *	to confounding by hashing (which result in "jackpot")
798  *  2.  collect random access indexes to the two files
799  */
800 static void
801 check(FILE *f1, FILE *f2, int flags)
802 {
803 	int i, j, jackpot, c, d;
804 	long ctold, ctnew;
805 
806 	rewind(f1);
807 	rewind(f2);
808 	j = 1;
809 	ixold[0] = ixnew[0] = 0;
810 	jackpot = 0;
811 	ctold = ctnew = 0;
812 	for (i = 1; i <= len[0]; i++) {
813 		if (J[i] == 0) {
814 			ixold[i] = ctold += skipline(f1);
815 			continue;
816 		}
817 		while (j < J[i]) {
818 			ixnew[j] = ctnew += skipline(f2);
819 			j++;
820 		}
821 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
822 			for (;;) {
823 				c = getc(f1);
824 				d = getc(f2);
825 				/*
826 				 * GNU diff ignores a missing newline
827 				 * in one file for -b or -w.
828 				 */
829 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
830 					if (c == EOF && d == '\n') {
831 						ctnew++;
832 						break;
833 					} else if (c == '\n' && d == EOF) {
834 						ctold++;
835 						break;
836 					}
837 				}
838 				ctold++;
839 				ctnew++;
840 				if (flags & D_STRIPCR) {
841 					if (c == '\r') {
842 						if ((c = getc(f1)) == '\n') {
843 							ctnew++;
844 							break;
845 						}
846 					}
847 					if (d == '\r') {
848 						if ((d = getc(f2)) == '\n') {
849 							ctold++;
850 							break;
851 						}
852 					}
853 				}
854 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
855 				    isspace(d)) {
856 					do {
857 						if (c == '\n')
858 							break;
859 						ctold++;
860 					} while (isspace(c = getc(f1)));
861 					do {
862 						if (d == '\n')
863 							break;
864 						ctnew++;
865 					} while (isspace(d = getc(f2)));
866 				} else if ((flags & D_IGNOREBLANKS)) {
867 					while (isspace(c) && c != '\n') {
868 						c = getc(f1);
869 						ctold++;
870 					}
871 					while (isspace(d) && d != '\n') {
872 						d = getc(f2);
873 						ctnew++;
874 					}
875 				}
876 				if (chrtran[c] != chrtran[d]) {
877 					jackpot++;
878 					J[i] = 0;
879 					if (c != '\n' && c != EOF)
880 						ctold += skipline(f1);
881 					if (d != '\n' && c != EOF)
882 						ctnew += skipline(f2);
883 					break;
884 				}
885 				if (c == '\n' || c == EOF)
886 					break;
887 			}
888 		} else {
889 			for (;;) {
890 				ctold++;
891 				ctnew++;
892 				if ((c = getc(f1)) != (d = getc(f2))) {
893 					/* jackpot++; */
894 					J[i] = 0;
895 					if (c != '\n' && c != EOF)
896 						ctold += skipline(f1);
897 					if (d != '\n' && c != EOF)
898 						ctnew += skipline(f2);
899 					break;
900 				}
901 				if (c == '\n' || c == EOF)
902 					break;
903 			}
904 		}
905 		ixold[i] = ctold;
906 		ixnew[j] = ctnew;
907 		j++;
908 	}
909 	for (; j <= len[1]; j++) {
910 		ixnew[j] = ctnew += skipline(f2);
911 	}
912 	/*
913 	 * if (jackpot)
914 	 *	fprintf(stderr, "jackpot\n");
915 	 */
916 }
917 
918 /* shellsort CACM #201 */
919 static void
920 sort(struct line *a, int n)
921 {
922 	struct line *ai, *aim, w;
923 	int j, m = 0, k;
924 
925 	if (n == 0)
926 		return;
927 	for (j = 1; j <= n; j *= 2)
928 		m = 2 * j - 1;
929 	for (m /= 2; m != 0; m /= 2) {
930 		k = n - m;
931 		for (j = 1; j <= k; j++) {
932 			for (ai = &a[j]; ai > a; ai -= m) {
933 				aim = &ai[m];
934 				if (aim < ai)
935 					break;	/* wraparound */
936 				if (aim->value > ai[0].value ||
937 				    (aim->value == ai[0].value &&
938 					aim->serial > ai[0].serial))
939 					break;
940 				w.value = ai[0].value;
941 				ai[0].value = aim->value;
942 				aim->value = w.value;
943 				w.serial = ai[0].serial;
944 				ai[0].serial = aim->serial;
945 				aim->serial = w.serial;
946 			}
947 		}
948 	}
949 }
950 
951 static void
952 unsort(struct line *f, int l, int *b)
953 {
954 	int *a, i;
955 
956 	a = xcalloc(l + 1, sizeof(*a));
957 	for (i = 1; i <= l; i++)
958 		a[f[i].serial] = f[i].value;
959 	for (i = 1; i <= l; i++)
960 		b[i] = a[i];
961 	free(a);
962 }
963 
964 static int
965 skipline(FILE *f)
966 {
967 	int i, c;
968 
969 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
970 		continue;
971 	return (i);
972 }
973 
974 static void
975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
976 {
977 	int m, i0, i1, j0, j1;
978 
979 	rewind(f1);
980 	rewind(f2);
981 	m = len[0];
982 	J[0] = 0;
983 	J[m + 1] = len[1] + 1;
984 	if (diff_format != D_EDIT) {
985 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
986 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
987 				i0++;
988 			j0 = J[i0 - 1] + 1;
989 			i1 = i0 - 1;
990 			while (i1 < m && J[i1 + 1] == 0)
991 				i1++;
992 			j1 = J[i1 + 1] - 1;
993 			J[i1] = j1;
994 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
995 		}
996 	} else {
997 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
998 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
999 				i0--;
1000 			j0 = J[i0 + 1] - 1;
1001 			i1 = i0 + 1;
1002 			while (i1 > 1 && J[i1 - 1] == 0)
1003 				i1--;
1004 			j1 = J[i1 - 1] + 1;
1005 			J[i1] = j1;
1006 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1007 		}
1008 	}
1009 	if (m == 0)
1010 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011 	if (diff_format == D_IFDEF) {
1012 		for (;;) {
1013 #define	c i0
1014 			if ((c = getc(f1)) == EOF)
1015 				return;
1016 			diff_output("%c", c);
1017 		}
1018 #undef c
1019 	}
1020 	if (anychange != 0) {
1021 		if (diff_format == D_CONTEXT)
1022 			dump_context_vec(f1, f2, flags);
1023 		else if (diff_format == D_UNIFIED)
1024 			dump_unified_vec(f1, f2, flags);
1025 	}
1026 }
1027 
1028 static void
1029 range(int a, int b, const char *separator)
1030 {
1031 	diff_output("%d", a > b ? b : a);
1032 	if (a < b)
1033 		diff_output("%s%d", separator, b);
1034 }
1035 
1036 static void
1037 uni_range(int a, int b)
1038 {
1039 	if (a < b)
1040 		diff_output("%d,%d", a, b - a + 1);
1041 	else if (a == b)
1042 		diff_output("%d", b);
1043 	else
1044 		diff_output("%d,0", b);
1045 }
1046 
1047 static char *
1048 preadline(int fd, size_t rlen, off_t off)
1049 {
1050 	char *line;
1051 	ssize_t nr;
1052 
1053 	line = xmalloc(rlen + 1);
1054 	if ((nr = pread(fd, line, rlen, off)) < 0)
1055 		err(2, "preadline");
1056 	if (nr > 0 && line[nr-1] == '\n')
1057 		nr--;
1058 	line[nr] = '\0';
1059 	return (line);
1060 }
1061 
1062 static int
1063 ignoreline(char *line)
1064 {
1065 	int ret;
1066 
1067 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1068 	free(line);
1069 	return (ret == 0);	/* if it matched, it should be ignored. */
1070 }
1071 
1072 /*
1073  * Indicate that there is a difference between lines a and b of the from file
1074  * to get to lines c to d of the to file.  If a is greater then b then there
1075  * are no lines in the from file involved and this means that there were
1076  * lines appended (beginning at b).  If c is greater than d then there are
1077  * lines missing from the to file.
1078  */
1079 static void
1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1081     int *pflags)
1082 {
1083 	static size_t max_context = 64;
1084 	int i;
1085 
1086 restart:
1087 	if (diff_format != D_IFDEF && a > b && c > d)
1088 		return;
1089 	if (ignore_pats != NULL) {
1090 		char *line;
1091 		/*
1092 		 * All lines in the change, insert, or delete must
1093 		 * match an ignore pattern for the change to be
1094 		 * ignored.
1095 		 */
1096 		if (a <= b) {		/* Changes and deletes. */
1097 			for (i = a; i <= b; i++) {
1098 				line = preadline(fileno(f1),
1099 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1100 				if (!ignoreline(line))
1101 					goto proceed;
1102 			}
1103 		}
1104 		if (a > b || c <= d) {	/* Changes and inserts. */
1105 			for (i = c; i <= d; i++) {
1106 				line = preadline(fileno(f2),
1107 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1108 				if (!ignoreline(line))
1109 					goto proceed;
1110 			}
1111 		}
1112 		return;
1113 	}
1114 proceed:
1115 	if (*pflags & D_HEADER) {
1116 		diff_output("%s %s %s\n", diffargs, file1, file2);
1117 		*pflags &= ~D_HEADER;
1118 	}
1119 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1120 		/*
1121 		 * Allocate change records as needed.
1122 		 */
1123 		if (context_vec_ptr == context_vec_end - 1) {
1124 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1125 			max_context <<= 1;
1126 			context_vec_start = xreallocarray(context_vec_start,
1127 			    max_context, sizeof(*context_vec_start));
1128 			context_vec_end = context_vec_start + max_context;
1129 			context_vec_ptr = context_vec_start + offset;
1130 		}
1131 		if (anychange == 0) {
1132 			/*
1133 			 * Print the context/unidiff header first time through.
1134 			 */
1135 			print_header(file1, file2);
1136 			anychange = 1;
1137 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1138 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1139 			/*
1140 			 * If this change is more than 'diff_context' lines from the
1141 			 * previous change, dump the record and reset it.
1142 			 */
1143 			if (diff_format == D_CONTEXT)
1144 				dump_context_vec(f1, f2, *pflags);
1145 			else
1146 				dump_unified_vec(f1, f2, *pflags);
1147 		}
1148 		context_vec_ptr++;
1149 		context_vec_ptr->a = a;
1150 		context_vec_ptr->b = b;
1151 		context_vec_ptr->c = c;
1152 		context_vec_ptr->d = d;
1153 		return;
1154 	}
1155 	if (anychange == 0)
1156 		anychange = 1;
1157 	switch (diff_format) {
1158 	case D_BRIEF:
1159 		return;
1160 	case D_NORMAL:
1161 	case D_EDIT:
1162 		range(a, b, ",");
1163 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1164 		if (diff_format == D_NORMAL)
1165 			range(c, d, ",");
1166 		diff_output("\n");
1167 		break;
1168 	case D_REVERSE:
1169 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1170 		range(a, b, " ");
1171 		diff_output("\n");
1172 		break;
1173 	case D_NREVERSE:
1174 		if (a > b)
1175 			diff_output("a%d %d\n", b, d - c + 1);
1176 		else {
1177 			diff_output("d%d %d\n", a, b - a + 1);
1178 			if (!(c > d))
1179 				/* add changed lines */
1180 				diff_output("a%d %d\n", b, d - c + 1);
1181 		}
1182 		break;
1183 	}
1184 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1185 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1186 		if (a <= b && c <= d && diff_format == D_NORMAL)
1187 			diff_output("---\n");
1188 	}
1189 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1190 	if (i != 0 && diff_format == D_EDIT) {
1191 		/*
1192 		 * A non-zero return value for D_EDIT indicates that the
1193 		 * last line printed was a bare dot (".") that has been
1194 		 * escaped as ".." to prevent ed(1) from misinterpreting
1195 		 * it.  We have to add a substitute command to change this
1196 		 * back and restart where we left off.
1197 		 */
1198 		diff_output(".\n");
1199 		diff_output("%ds/.//\n", a + i - 1);
1200 		b = a + i - 1;
1201 		a = b + 1;
1202 		c += i;
1203 		goto restart;
1204 	}
1205 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1206 		diff_output(".\n");
1207 	if (inifdef) {
1208 		diff_output("#endif /* %s */\n", ifdefname);
1209 		inifdef = 0;
1210 	}
1211 }
1212 
1213 static int
1214 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1215 {
1216 	int i, j, c, lastc, col, nc;
1217 	int	newcol;
1218 
1219 	/*
1220 	 * When doing #ifdef's, copy down to current line
1221 	 * if this is the first file, so that stuff makes it to output.
1222 	 */
1223 	if (diff_format == D_IFDEF && oldfile) {
1224 		long curpos = ftell(lb);
1225 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1226 		nc = f[a > b ? b : a - 1] - curpos;
1227 		for (i = 0; i < nc; i++)
1228 			diff_output("%c", getc(lb));
1229 	}
1230 	if (a > b)
1231 		return (0);
1232 	if (diff_format == D_IFDEF) {
1233 		if (inifdef) {
1234 			diff_output("#else /* %s%s */\n",
1235 			    oldfile == 1 ? "!" : "", ifdefname);
1236 		} else {
1237 			if (oldfile)
1238 				diff_output("#ifndef %s\n", ifdefname);
1239 			else
1240 				diff_output("#ifdef %s\n", ifdefname);
1241 		}
1242 		inifdef = 1 + oldfile;
1243 	}
1244 	for (i = a; i <= b; i++) {
1245 		fseek(lb, f[i - 1], SEEK_SET);
1246 		nc = f[i] - f[i - 1];
1247 		if (diff_format != D_IFDEF && ch != '\0') {
1248 			diff_output("%c", ch);
1249 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1250 			    || diff_format == D_UNIFIED))
1251 				diff_output("\t");
1252 			else if (diff_format != D_UNIFIED)
1253 				diff_output(" ");
1254 		}
1255 		col = 0;
1256 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1257 			if ((c = getc(lb)) == EOF) {
1258 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1259 				    diff_format == D_NREVERSE)
1260 					warnx("No newline at end of file");
1261 				else
1262 					diff_output("\n\\ No newline at end of "
1263 					    "file\n");
1264 				return (0);
1265 			}
1266 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1267 				newcol = ((col/tabsize)+1)*tabsize;
1268 				do {
1269 					diff_output(" ");
1270 				} while (++col < newcol);
1271 			} else {
1272 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1273 				    && lastc == '.') {
1274 					/*
1275 					 * Don't print a bare "." line
1276 					 * since that will confuse ed(1).
1277 					 * Print ".." instead and return,
1278 					 * giving the caller an offset
1279 					 * from which to restart.
1280 					 */
1281 					diff_output(".\n");
1282 					return (i - a + 1);
1283 				}
1284 				diff_output("%c", c);
1285 				col++;
1286 			}
1287 		}
1288 	}
1289 	return (0);
1290 }
1291 
1292 /*
1293  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1294  */
1295 static int
1296 readhash(FILE *f, int flags)
1297 {
1298 	int i, t, space;
1299 	int sum;
1300 
1301 	sum = 1;
1302 	space = 0;
1303 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1304 		if (flags & D_IGNORECASE)
1305 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1306 				if (flags & D_STRIPCR && t == '\r') {
1307 					t = getc(f);
1308 					if (t == '\n')
1309 						break;
1310 					ungetc(t, f);
1311 				}
1312 				if (t == EOF) {
1313 					if (i == 0)
1314 						return (0);
1315 					break;
1316 				}
1317 				sum = sum * 127 + chrtran[t];
1318 			}
1319 		else
1320 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1321 				if (flags & D_STRIPCR && t == '\r') {
1322 					t = getc(f);
1323 					if (t == '\n')
1324 						break;
1325 					ungetc(t, f);
1326 				}
1327 				if (t == EOF) {
1328 					if (i == 0)
1329 						return (0);
1330 					break;
1331 				}
1332 				sum = sum * 127 + t;
1333 			}
1334 	} else {
1335 		for (i = 0;;) {
1336 			switch (t = getc(f)) {
1337 			case '\r':
1338 			case '\t':
1339 			case '\v':
1340 			case '\f':
1341 			case ' ':
1342 				space++;
1343 				continue;
1344 			default:
1345 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1346 					i++;
1347 					space = 0;
1348 				}
1349 				sum = sum * 127 + chrtran[t];
1350 				i++;
1351 				continue;
1352 			case EOF:
1353 				if (i == 0)
1354 					return (0);
1355 				/* FALLTHROUGH */
1356 			case '\n':
1357 				break;
1358 			}
1359 			break;
1360 		}
1361 	}
1362 	/*
1363 	 * There is a remote possibility that we end up with a zero sum.
1364 	 * Zero is used as an EOF marker, so return 1 instead.
1365 	 */
1366 	return (sum == 0 ? 1 : sum);
1367 }
1368 
1369 static int
1370 asciifile(FILE *f)
1371 {
1372 	unsigned char buf[BUFSIZ];
1373 	size_t cnt;
1374 
1375 	if (f == NULL)
1376 		return (1);
1377 
1378 	rewind(f);
1379 	cnt = fread(buf, 1, sizeof(buf), f);
1380 	return (memchr(buf, '\0', cnt) == NULL);
1381 }
1382 
1383 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1384 
1385 static char *
1386 match_function(const long *f, int pos, FILE *fp)
1387 {
1388 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1389 	size_t nc;
1390 	int last = lastline;
1391 	const char *state = NULL;
1392 
1393 	lastline = pos;
1394 	while (pos > last) {
1395 		fseek(fp, f[pos - 1], SEEK_SET);
1396 		nc = f[pos] - f[pos - 1];
1397 		if (nc >= sizeof(buf))
1398 			nc = sizeof(buf) - 1;
1399 		nc = fread(buf, 1, nc, fp);
1400 		if (nc > 0) {
1401 			buf[nc] = '\0';
1402 			buf[strcspn(buf, "\n")] = '\0';
1403 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1404 				if (begins_with(buf, "private:")) {
1405 					if (!state)
1406 						state = " (private)";
1407 				} else if (begins_with(buf, "protected:")) {
1408 					if (!state)
1409 						state = " (protected)";
1410 				} else if (begins_with(buf, "public:")) {
1411 					if (!state)
1412 						state = " (public)";
1413 				} else {
1414 					strlcpy(lastbuf, buf, sizeof lastbuf);
1415 					if (state)
1416 						strlcat(lastbuf, state,
1417 						    sizeof lastbuf);
1418 					lastmatchline = pos;
1419 					return lastbuf;
1420 				}
1421 			}
1422 		}
1423 		pos--;
1424 	}
1425 	return lastmatchline > 0 ? lastbuf : NULL;
1426 }
1427 
1428 /* dump accumulated "context" diff changes */
1429 static void
1430 dump_context_vec(FILE *f1, FILE *f2, int flags)
1431 {
1432 	struct context_vec *cvp = context_vec_start;
1433 	int lowa, upb, lowc, upd, do_output;
1434 	int a, b, c, d;
1435 	char ch, *f;
1436 
1437 	if (context_vec_start > context_vec_ptr)
1438 		return;
1439 
1440 	b = d = 0;		/* gcc */
1441 	lowa = MAX(1, cvp->a - diff_context);
1442 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1443 	lowc = MAX(1, cvp->c - diff_context);
1444 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1445 
1446 	diff_output("***************");
1447 	if ((flags & D_PROTOTYPE)) {
1448 		f = match_function(ixold, lowa-1, f1);
1449 		if (f != NULL)
1450 			diff_output(" %s", f);
1451 	}
1452 	diff_output("\n*** ");
1453 	range(lowa, upb, ",");
1454 	diff_output(" ****\n");
1455 
1456 	/*
1457 	 * Output changes to the "old" file.  The first loop suppresses
1458 	 * output if there were no changes to the "old" file (we'll see
1459 	 * the "old" lines as context in the "new" list).
1460 	 */
1461 	do_output = 0;
1462 	for (; cvp <= context_vec_ptr; cvp++)
1463 		if (cvp->a <= cvp->b) {
1464 			cvp = context_vec_start;
1465 			do_output++;
1466 			break;
1467 		}
1468 	if (do_output) {
1469 		while (cvp <= context_vec_ptr) {
1470 			a = cvp->a;
1471 			b = cvp->b;
1472 			c = cvp->c;
1473 			d = cvp->d;
1474 
1475 			if (a <= b && c <= d)
1476 				ch = 'c';
1477 			else
1478 				ch = (a <= b) ? 'd' : 'a';
1479 
1480 			if (ch == 'a')
1481 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1482 			else {
1483 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1484 				fetch(ixold, a, b, f1,
1485 				    ch == 'c' ? '!' : '-', 0, flags);
1486 			}
1487 			lowa = b + 1;
1488 			cvp++;
1489 		}
1490 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1491 	}
1492 	/* output changes to the "new" file */
1493 	diff_output("--- ");
1494 	range(lowc, upd, ",");
1495 	diff_output(" ----\n");
1496 
1497 	do_output = 0;
1498 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1499 		if (cvp->c <= cvp->d) {
1500 			cvp = context_vec_start;
1501 			do_output++;
1502 			break;
1503 		}
1504 	if (do_output) {
1505 		while (cvp <= context_vec_ptr) {
1506 			a = cvp->a;
1507 			b = cvp->b;
1508 			c = cvp->c;
1509 			d = cvp->d;
1510 
1511 			if (a <= b && c <= d)
1512 				ch = 'c';
1513 			else
1514 				ch = (a <= b) ? 'd' : 'a';
1515 
1516 			if (ch == 'd')
1517 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1518 			else {
1519 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1520 				fetch(ixnew, c, d, f2,
1521 				    ch == 'c' ? '!' : '+', 0, flags);
1522 			}
1523 			lowc = d + 1;
1524 			cvp++;
1525 		}
1526 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1527 	}
1528 	context_vec_ptr = context_vec_start - 1;
1529 }
1530 
1531 /* dump accumulated "unified" diff changes */
1532 static void
1533 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1534 {
1535 	struct context_vec *cvp = context_vec_start;
1536 	int lowa, upb, lowc, upd;
1537 	int a, b, c, d;
1538 	char ch, *f;
1539 
1540 	if (context_vec_start > context_vec_ptr)
1541 		return;
1542 
1543 	b = d = 0;		/* gcc */
1544 	lowa = MAX(1, cvp->a - diff_context);
1545 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1546 	lowc = MAX(1, cvp->c - diff_context);
1547 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1548 
1549 	diff_output("@@ -");
1550 	uni_range(lowa, upb);
1551 	diff_output(" +");
1552 	uni_range(lowc, upd);
1553 	diff_output(" @@");
1554 	if ((flags & D_PROTOTYPE)) {
1555 		f = match_function(ixold, lowa-1, f1);
1556 		if (f != NULL)
1557 			diff_output(" %s", f);
1558 	}
1559 	diff_output("\n");
1560 
1561 	/*
1562 	 * Output changes in "unified" diff format--the old and new lines
1563 	 * are printed together.
1564 	 */
1565 	for (; cvp <= context_vec_ptr; cvp++) {
1566 		a = cvp->a;
1567 		b = cvp->b;
1568 		c = cvp->c;
1569 		d = cvp->d;
1570 
1571 		/*
1572 		 * c: both new and old changes
1573 		 * d: only changes in the old file
1574 		 * a: only changes in the new file
1575 		 */
1576 		if (a <= b && c <= d)
1577 			ch = 'c';
1578 		else
1579 			ch = (a <= b) ? 'd' : 'a';
1580 
1581 		switch (ch) {
1582 		case 'c':
1583 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1584 			fetch(ixold, a, b, f1, '-', 0, flags);
1585 			fetch(ixnew, c, d, f2, '+', 0, flags);
1586 			break;
1587 		case 'd':
1588 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1589 			fetch(ixold, a, b, f1, '-', 0, flags);
1590 			break;
1591 		case 'a':
1592 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1593 			fetch(ixnew, c, d, f2, '+', 0, flags);
1594 			break;
1595 		}
1596 		lowa = b + 1;
1597 		lowc = d + 1;
1598 	}
1599 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1600 
1601 	context_vec_ptr = context_vec_start - 1;
1602 }
1603 
1604 static void
1605 print_header(const char *file1, const char *file2)
1606 {
1607 	const char *time_format;
1608 	char buf1[256];
1609 	char buf2[256];
1610 	char end1[10];
1611 	char end2[10];
1612 	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1613 	int nsec1 = stb1.st_mtim.tv_nsec;
1614 	int nsec2 = stb2.st_mtim.tv_nsec;
1615 
1616 	time_format = "%Y-%m-%d %H:%M:%S";
1617 
1618 	if (cflag)
1619 		time_format = "%c";
1620 	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1621 	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1622 	strftime(buf1, 256, time_format, tm_ptr1);
1623 	strftime(buf2, 256, time_format, tm_ptr2);
1624 	if (!cflag) {
1625 		strftime(end1, 10, "%z", tm_ptr1);
1626 		strftime(end2, 10, "%z", tm_ptr2);
1627 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1628 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1629 	}
1630 	if (label[0] != NULL)
1631 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1632 		    label[0]);
1633 	else
1634 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1635 		    file1, buf1);
1636 	if (label[1] != NULL)
1637 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1638 		    label[1]);
1639 	else
1640 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1641 		    file2, buf2);
1642 }
1643