xref: /freebsd/usr.bin/diff/diffreg.c (revision 484482b8ad59a8c68aaa1f0551a983c10b983a22)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 #ifdef ST_MTIM_NSEC
98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC)
99 #else
100 # define TIMESPEC_NS(timespec) 0
101 #endif
102 
103 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
104 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
105 
106 /*
107  * diff - compare two files.
108  */
109 
110 /*
111  *	Uses an algorithm due to Harold Stone, which finds
112  *	a pair of longest identical subsequences in the two
113  *	files.
114  *
115  *	The major goal is to generate the match vector J.
116  *	J[i] is the index of the line in file1 corresponding
117  *	to line i file0. J[i] = 0 if there is no
118  *	such line in file1.
119  *
120  *	Lines are hashed so as to work in core. All potential
121  *	matches are located by sorting the lines of each file
122  *	on the hash (called ``value''). In particular, this
123  *	collects the equivalence classes in file1 together.
124  *	Subroutine equiv replaces the value of each line in
125  *	file0 by the index of the first element of its
126  *	matching equivalence in (the reordered) file1.
127  *	To save space equiv squeezes file1 into a single
128  *	array member in which the equivalence classes
129  *	are simply concatenated, except that their first
130  *	members are flagged by changing sign.
131  *
132  *	Next the indices that point into member are unsorted into
133  *	array class according to the original order of file0.
134  *
135  *	The cleverness lies in routine stone. This marches
136  *	through the lines of file0, developing a vector klist
137  *	of "k-candidates". At step i a k-candidate is a matched
138  *	pair of lines x,y (x in file0 y in file1) such that
139  *	there is a common subsequence of length k
140  *	between the first i lines of file0 and the first y
141  *	lines of file1, but there is no such subsequence for
142  *	any smaller y. x is the earliest possible mate to y
143  *	that occurs in such a subsequence.
144  *
145  *	Whenever any of the members of the equivalence class of
146  *	lines in file1 matable to a line in file0 has serial number
147  *	less than the y of some k-candidate, that k-candidate
148  *	with the smallest such y is replaced. The new
149  *	k-candidate is chained (via pred) to the current
150  *	k-1 candidate so that the actual subsequence can
151  *	be recovered. When a member has serial number greater
152  *	that the y of all k-candidates, the klist is extended.
153  *	At the end, the longest subsequence is pulled out
154  *	and placed in the array J by unravel
155  *
156  *	With J in hand, the matches there recorded are
157  *	check'ed against reality to assure that no spurious
158  *	matches have crept in due to hashing. If they have,
159  *	they are broken, and "jackpot" is recorded--a harmless
160  *	matter except that a true match for a spuriously
161  *	mated line may now be unnecessarily reported as a change.
162  *
163  *	Much of the complexity of the program comes simply
164  *	from trying to minimize core utilization and
165  *	maximize the range of doable problems by dynamically
166  *	allocating what is needed and reusing what is not.
167  *	The core requirements for problems larger than somewhat
168  *	are (in words) 2*length(file0) + length(file1) +
169  *	3*(number of k-candidates installed),  typically about
170  *	6n words for files of length n.
171  */
172 
173 struct cand {
174 	int	x;
175 	int	y;
176 	int	pred;
177 };
178 
179 static struct line {
180 	int	serial;
181 	int	value;
182 } *file[2];
183 
184 /*
185  * The following struct is used to record change information when
186  * doing a "context" or "unified" diff.  (see routine "change" to
187  * understand the highly mnemonic field names)
188  */
189 struct context_vec {
190 	int	a;		/* start line in old file */
191 	int	b;		/* end line in old file */
192 	int	c;		/* start line in new file */
193 	int	d;		/* end line in new file */
194 };
195 
196 #define	diff_output	printf
197 static void	 output(char *, FILE *, char *, FILE *, int);
198 static void	 check(FILE *, FILE *, int);
199 static void	 range(int, int, const char *);
200 static void	 uni_range(int, int);
201 static void	 dump_context_vec(FILE *, FILE *, int);
202 static void	 dump_unified_vec(FILE *, FILE *, int);
203 static void	 prepare(int, FILE *, size_t, int);
204 static void	 prune(void);
205 static void	 equiv(struct line *, int, struct line *, int, int *);
206 static void	 unravel(int);
207 static void	 unsort(struct line *, int, int *);
208 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
209 static void	 sort(struct line *, int);
210 static void	 print_header(const char *, const char *);
211 static int	 ignoreline(char *);
212 static int	 asciifile(FILE *);
213 static int	 fetch(long *, int, int, FILE *, int, int, int);
214 static int	 newcand(int, int, int);
215 static int	 search(int *, int, int);
216 static int	 skipline(FILE *);
217 static int	 isqrt(int);
218 static int	 stone(int *, int, int *, int *, int);
219 static int	 readhash(FILE *, int);
220 static int	 files_differ(FILE *, FILE *, int);
221 static char	*match_function(const long *, int, FILE *);
222 static char	*preadline(int, size_t, off_t);
223 
224 static int  *J;			/* will be overlaid on class */
225 static int  *class;		/* will be overlaid on file[0] */
226 static int  *klist;		/* will be overlaid on file[0] after class */
227 static int  *member;		/* will be overlaid on file[1] */
228 static int   clen;
229 static int   inifdef;		/* whether or not we are in a #ifdef block */
230 static int   len[2];
231 static int   pref, suff;	/* length of prefix and suffix */
232 static int   slen[2];
233 static int   anychange;
234 static long *ixnew;		/* will be overlaid on file[1] */
235 static long *ixold;		/* will be overlaid on klist */
236 static struct cand *clist;	/* merely a free storage pot for candidates */
237 static int   clistlen;		/* the length of clist */
238 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
239 static u_char *chrtran;		/* translation table for case-folding */
240 static struct context_vec *context_vec_start;
241 static struct context_vec *context_vec_end;
242 static struct context_vec *context_vec_ptr;
243 
244 #define FUNCTION_CONTEXT_SIZE	55
245 static char lastbuf[FUNCTION_CONTEXT_SIZE];
246 static int lastline;
247 static int lastmatchline;
248 
249 
250 /*
251  * chrtran points to one of 2 translation tables: cup2low if folding upper to
252  * lower case clow2low if not folding case
253  */
254 static u_char clow2low[256] = {
255 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
256 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
257 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
258 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
259 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
260 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
261 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
262 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
263 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
264 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
265 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
266 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
267 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
268 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
269 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
270 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
271 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
272 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
273 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
274 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
275 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
276 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
277 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
278 	0xfd, 0xfe, 0xff
279 };
280 
281 static u_char cup2low[256] = {
282 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
283 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
284 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
285 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
286 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
287 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
288 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
289 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
290 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
291 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
292 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
293 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
294 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
295 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
296 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
297 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
298 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
299 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
300 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
301 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
302 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
303 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
304 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
305 	0xfd, 0xfe, 0xff
306 };
307 
308 int
309 diffreg(char *file1, char *file2, int flags, int capsicum)
310 {
311 	FILE *f1, *f2;
312 	int i, rval;
313 	int	ostdout = -1;
314 	int pr_pd, kq;
315 	struct kevent *e;
316 	cap_rights_t rights_ro;
317 
318 	e = NULL;
319 	kq = -1;
320 	f1 = f2 = NULL;
321 	rval = D_SAME;
322 	anychange = 0;
323 	lastline = 0;
324 	lastmatchline = 0;
325 	context_vec_ptr = context_vec_start - 1;
326 	if (flags & D_IGNORECASE)
327 		chrtran = cup2low;
328 	else
329 		chrtran = clow2low;
330 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
331 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
332 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
333 		goto closem;
334 
335 	if (flags & D_EMPTY1)
336 		f1 = fopen(_PATH_DEVNULL, "r");
337 	else {
338 		if (strcmp(file1, "-") == 0)
339 			f1 = stdin;
340 		else
341 			f1 = fopen(file1, "r");
342 	}
343 	if (f1 == NULL) {
344 		warn("%s", file1);
345 		status |= 2;
346 		goto closem;
347 	}
348 
349 	if (flags & D_EMPTY2)
350 		f2 = fopen(_PATH_DEVNULL, "r");
351 	else {
352 		if (strcmp(file2, "-") == 0)
353 			f2 = stdin;
354 		else
355 			f2 = fopen(file2, "r");
356 	}
357 	if (f2 == NULL) {
358 		warn("%s", file2);
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if (lflag) {
364 		/* redirect stdout to pr */
365 		int	 pfd[2];
366 		pid_t	pid;
367 		char	*header;
368 
369 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
370 		signal(SIGPIPE, SIG_IGN);
371 		fflush(stdout);
372 		rewind(stdout);
373 		pipe(pfd);
374 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
375 		case -1:
376 			status |= 2;
377 			free(header);
378 			err(2, "No more processes");
379 		case 0:
380 			/* child */
381 			if (pfd[0] != STDIN_FILENO) {
382 				dup2(pfd[0], STDIN_FILENO);
383 				close(pfd[0]);
384 			}
385 			close(pfd[1]);
386 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
387 			_exit(127);
388 		default:
389 
390 			/* parent */
391 			if (pfd[1] != STDOUT_FILENO) {
392 				ostdout = dup(STDOUT_FILENO);
393 				dup2(pfd[1], STDOUT_FILENO);
394 				close(pfd[1]);
395 			}
396 			close(pfd[0]);
397 			rewind(stdout);
398 			free(header);
399 			kq = kqueue();
400 			if (kq == -1)
401 				err(2, "kqueue");
402 			e = xmalloc(sizeof(struct kevent));
403 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
404 			    NULL);
405 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
406 				err(2, "kevent");
407 		}
408 	}
409 
410 	if (capsicum) {
411 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
412 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0)
413 			err(2, "unable to limit rights on: %s", file1);
414 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0)
415 			err(2, "unable to limit rights on: %s", file2);
416 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
417 			/* stding has already been limited */
418 			if (caph_limit_stderr() == -1)
419 				err(2, "unable to limit stderr");
420 			if (caph_limit_stdout() == -1)
421 				err(2, "unable to limit stdout");
422 		} else if (caph_limit_stdio() == -1)
423 				err(2, "unable to limit stdio");
424 
425 		caph_cache_catpages();
426 		if (cap_enter() < 0 && errno != ENOSYS)
427 			err(2, "unable to enter capability mode");
428 	}
429 
430 	switch (files_differ(f1, f2, flags)) {
431 	case 0:
432 		goto closem;
433 	case 1:
434 		break;
435 	default:
436 		/* error */
437 		status |= 2;
438 		goto closem;
439 	}
440 
441 	if ((flags & D_FORCEASCII) == 0 &&
442 	    (!asciifile(f1) || !asciifile(f2))) {
443 		rval = D_BINARY;
444 		status |= 1;
445 		goto closem;
446 	}
447 	prepare(0, f1, stb1.st_size, flags);
448 	prepare(1, f2, stb2.st_size, flags);
449 
450 	prune();
451 	sort(sfile[0], slen[0]);
452 	sort(sfile[1], slen[1]);
453 
454 	member = (int *)file[1];
455 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
456 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
457 
458 	class = (int *)file[0];
459 	unsort(sfile[0], slen[0], class);
460 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
461 
462 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
463 	clen = 0;
464 	clistlen = 100;
465 	clist = xcalloc(clistlen, sizeof(*clist));
466 	i = stone(class, slen[0], member, klist, flags);
467 	free(member);
468 	free(class);
469 
470 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
471 	unravel(klist[i]);
472 	free(clist);
473 	free(klist);
474 
475 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
476 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
477 	check(f1, f2, flags);
478 	output(file1, f1, file2, f2, flags);
479 	if (ostdout != -1 && e != NULL) {
480 		/* close the pipe to pr and restore stdout */
481 		int wstatus;
482 
483 		fflush(stdout);
484 		if (ostdout != STDOUT_FILENO) {
485 			close(STDOUT_FILENO);
486 			dup2(ostdout, STDOUT_FILENO);
487 			close(ostdout);
488 		}
489 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
490 			err(2, "kevent");
491 		wstatus = e[0].data;
492 		close(kq);
493 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
494 			errx(2, "pr exited abnormally");
495 		else if (WIFSIGNALED(wstatus))
496 			errx(2, "pr killed by signal %d",
497 			    WTERMSIG(wstatus));
498 	}
499 
500 closem:
501 	if (anychange) {
502 		status |= 1;
503 		if (rval == D_SAME)
504 			rval = D_DIFFER;
505 	}
506 	if (f1 != NULL)
507 		fclose(f1);
508 	if (f2 != NULL)
509 		fclose(f2);
510 
511 	return (rval);
512 }
513 
514 /*
515  * Check to see if the given files differ.
516  * Returns 0 if they are the same, 1 if different, and -1 on error.
517  * XXX - could use code from cmp(1) [faster]
518  */
519 static int
520 files_differ(FILE *f1, FILE *f2, int flags)
521 {
522 	char buf1[BUFSIZ], buf2[BUFSIZ];
523 	size_t i, j;
524 
525 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
526 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
527 		return (1);
528 	for (;;) {
529 		i = fread(buf1, 1, sizeof(buf1), f1);
530 		j = fread(buf2, 1, sizeof(buf2), f2);
531 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
532 			return (-1);
533 		if (i != j)
534 			return (1);
535 		if (i == 0)
536 			return (0);
537 		if (memcmp(buf1, buf2, i) != 0)
538 			return (1);
539 	}
540 }
541 
542 char *
543 splice(char *dir, char *path)
544 {
545 	char *tail, *buf;
546 	size_t dirlen;
547 
548 	dirlen = strlen(dir);
549 	while (dirlen != 0 && dir[dirlen - 1] == '/')
550 	    dirlen--;
551 	if ((tail = strrchr(path, '/')) == NULL)
552 		tail = path;
553 	else
554 		tail++;
555 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
556 	return (buf);
557 }
558 
559 static void
560 prepare(int i, FILE *fd, size_t filesize, int flags)
561 {
562 	struct line *p;
563 	int h;
564 	size_t sz, j;
565 
566 	rewind(fd);
567 
568 	sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
569 	if (sz < 100)
570 		sz = 100;
571 
572 	p = xcalloc(sz + 3, sizeof(*p));
573 	for (j = 0; (h = readhash(fd, flags));) {
574 		if (j == sz) {
575 			sz = sz * 3 / 2;
576 			p = xreallocarray(p, sz + 3, sizeof(*p));
577 		}
578 		p[++j].value = h;
579 	}
580 	len[i] = j;
581 	file[i] = p;
582 }
583 
584 static void
585 prune(void)
586 {
587 	int i, j;
588 
589 	for (pref = 0; pref < len[0] && pref < len[1] &&
590 	    file[0][pref + 1].value == file[1][pref + 1].value;
591 	    pref++)
592 		;
593 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
594 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
595 	    suff++)
596 		;
597 	for (j = 0; j < 2; j++) {
598 		sfile[j] = file[j] + pref;
599 		slen[j] = len[j] - pref - suff;
600 		for (i = 0; i <= slen[j]; i++)
601 			sfile[j][i].serial = i;
602 	}
603 }
604 
605 static void
606 equiv(struct line *a, int n, struct line *b, int m, int *c)
607 {
608 	int i, j;
609 
610 	i = j = 1;
611 	while (i <= n && j <= m) {
612 		if (a[i].value < b[j].value)
613 			a[i++].value = 0;
614 		else if (a[i].value == b[j].value)
615 			a[i++].value = j;
616 		else
617 			j++;
618 	}
619 	while (i <= n)
620 		a[i++].value = 0;
621 	b[m + 1].value = 0;
622 	j = 0;
623 	while (++j <= m) {
624 		c[j] = -b[j].serial;
625 		while (b[j + 1].value == b[j].value) {
626 			j++;
627 			c[j] = b[j].serial;
628 		}
629 	}
630 	c[j] = -1;
631 }
632 
633 /* Code taken from ping.c */
634 static int
635 isqrt(int n)
636 {
637 	int y, x = 1;
638 
639 	if (n == 0)
640 		return (0);
641 
642 	do { /* newton was a stinker */
643 		y = x;
644 		x = n / x;
645 		x += y;
646 		x /= 2;
647 	} while ((x - y) > 1 || (x - y) < -1);
648 
649 	return (x);
650 }
651 
652 static int
653 stone(int *a, int n, int *b, int *c, int flags)
654 {
655 	int i, k, y, j, l;
656 	int oldc, tc, oldl, sq;
657 	u_int numtries, bound;
658 
659 	if (flags & D_MINIMAL)
660 		bound = UINT_MAX;
661 	else {
662 		sq = isqrt(n);
663 		bound = MAXIMUM(256, sq);
664 	}
665 
666 	k = 0;
667 	c[0] = newcand(0, 0, 0);
668 	for (i = 1; i <= n; i++) {
669 		j = a[i];
670 		if (j == 0)
671 			continue;
672 		y = -b[j];
673 		oldl = 0;
674 		oldc = c[0];
675 		numtries = 0;
676 		do {
677 			if (y <= clist[oldc].y)
678 				continue;
679 			l = search(c, k, y);
680 			if (l != oldl + 1)
681 				oldc = c[l - 1];
682 			if (l <= k) {
683 				if (clist[c[l]].y <= y)
684 					continue;
685 				tc = c[l];
686 				c[l] = newcand(i, y, oldc);
687 				oldc = tc;
688 				oldl = l;
689 				numtries++;
690 			} else {
691 				c[l] = newcand(i, y, oldc);
692 				k++;
693 				break;
694 			}
695 		} while ((y = b[++j]) > 0 && numtries < bound);
696 	}
697 	return (k);
698 }
699 
700 static int
701 newcand(int x, int y, int pred)
702 {
703 	struct cand *q;
704 
705 	if (clen == clistlen) {
706 		clistlen = clistlen * 11 / 10;
707 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
708 	}
709 	q = clist + clen;
710 	q->x = x;
711 	q->y = y;
712 	q->pred = pred;
713 	return (clen++);
714 }
715 
716 static int
717 search(int *c, int k, int y)
718 {
719 	int i, j, l, t;
720 
721 	if (clist[c[k]].y < y)	/* quick look for typical case */
722 		return (k + 1);
723 	i = 0;
724 	j = k + 1;
725 	for (;;) {
726 		l = (i + j) / 2;
727 		if (l <= i)
728 			break;
729 		t = clist[c[l]].y;
730 		if (t > y)
731 			j = l;
732 		else if (t < y)
733 			i = l;
734 		else
735 			return (l);
736 	}
737 	return (l + 1);
738 }
739 
740 static void
741 unravel(int p)
742 {
743 	struct cand *q;
744 	int i;
745 
746 	for (i = 0; i <= len[0]; i++)
747 		J[i] = i <= pref ? i :
748 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
749 	for (q = clist + p; q->y != 0; q = clist + q->pred)
750 		J[q->x + pref] = q->y + pref;
751 }
752 
753 /*
754  * Check does double duty:
755  *  1.	ferret out any fortuitous correspondences due
756  *	to confounding by hashing (which result in "jackpot")
757  *  2.  collect random access indexes to the two files
758  */
759 static void
760 check(FILE *f1, FILE *f2, int flags)
761 {
762 	int i, j, jackpot, c, d;
763 	long ctold, ctnew;
764 
765 	rewind(f1);
766 	rewind(f2);
767 	j = 1;
768 	ixold[0] = ixnew[0] = 0;
769 	jackpot = 0;
770 	ctold = ctnew = 0;
771 	for (i = 1; i <= len[0]; i++) {
772 		if (J[i] == 0) {
773 			ixold[i] = ctold += skipline(f1);
774 			continue;
775 		}
776 		while (j < J[i]) {
777 			ixnew[j] = ctnew += skipline(f2);
778 			j++;
779 		}
780 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
781 			for (;;) {
782 				c = getc(f1);
783 				d = getc(f2);
784 				/*
785 				 * GNU diff ignores a missing newline
786 				 * in one file for -b or -w.
787 				 */
788 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
789 					if (c == EOF && d == '\n') {
790 						ctnew++;
791 						break;
792 					} else if (c == '\n' && d == EOF) {
793 						ctold++;
794 						break;
795 					}
796 				}
797 				ctold++;
798 				ctnew++;
799 				if (flags & D_STRIPCR) {
800 					if (c == '\r') {
801 						if ((c = getc(f1)) == '\n') {
802 							ctnew++;
803 							break;
804 						}
805 					}
806 					if (d == '\r') {
807 						if ((d = getc(f2)) == '\n') {
808 							ctold++;
809 							break;
810 						}
811 					}
812 				}
813 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
814 				    isspace(d)) {
815 					do {
816 						if (c == '\n')
817 							break;
818 						ctold++;
819 					} while (isspace(c = getc(f1)));
820 					do {
821 						if (d == '\n')
822 							break;
823 						ctnew++;
824 					} while (isspace(d = getc(f2)));
825 				} else if ((flags & D_IGNOREBLANKS)) {
826 					while (isspace(c) && c != '\n') {
827 						c = getc(f1);
828 						ctold++;
829 					}
830 					while (isspace(d) && d != '\n') {
831 						d = getc(f2);
832 						ctnew++;
833 					}
834 				}
835 				if (chrtran[c] != chrtran[d]) {
836 					jackpot++;
837 					J[i] = 0;
838 					if (c != '\n' && c != EOF)
839 						ctold += skipline(f1);
840 					if (d != '\n' && c != EOF)
841 						ctnew += skipline(f2);
842 					break;
843 				}
844 				if (c == '\n' || c == EOF)
845 					break;
846 			}
847 		} else {
848 			for (;;) {
849 				ctold++;
850 				ctnew++;
851 				if ((c = getc(f1)) != (d = getc(f2))) {
852 					/* jackpot++; */
853 					J[i] = 0;
854 					if (c != '\n' && c != EOF)
855 						ctold += skipline(f1);
856 					if (d != '\n' && c != EOF)
857 						ctnew += skipline(f2);
858 					break;
859 				}
860 				if (c == '\n' || c == EOF)
861 					break;
862 			}
863 		}
864 		ixold[i] = ctold;
865 		ixnew[j] = ctnew;
866 		j++;
867 	}
868 	for (; j <= len[1]; j++) {
869 		ixnew[j] = ctnew += skipline(f2);
870 	}
871 	/*
872 	 * if (jackpot)
873 	 *	fprintf(stderr, "jackpot\n");
874 	 */
875 }
876 
877 /* shellsort CACM #201 */
878 static void
879 sort(struct line *a, int n)
880 {
881 	struct line *ai, *aim, w;
882 	int j, m = 0, k;
883 
884 	if (n == 0)
885 		return;
886 	for (j = 1; j <= n; j *= 2)
887 		m = 2 * j - 1;
888 	for (m /= 2; m != 0; m /= 2) {
889 		k = n - m;
890 		for (j = 1; j <= k; j++) {
891 			for (ai = &a[j]; ai > a; ai -= m) {
892 				aim = &ai[m];
893 				if (aim < ai)
894 					break;	/* wraparound */
895 				if (aim->value > ai[0].value ||
896 				    (aim->value == ai[0].value &&
897 					aim->serial > ai[0].serial))
898 					break;
899 				w.value = ai[0].value;
900 				ai[0].value = aim->value;
901 				aim->value = w.value;
902 				w.serial = ai[0].serial;
903 				ai[0].serial = aim->serial;
904 				aim->serial = w.serial;
905 			}
906 		}
907 	}
908 }
909 
910 static void
911 unsort(struct line *f, int l, int *b)
912 {
913 	int *a, i;
914 
915 	a = xcalloc(l + 1, sizeof(*a));
916 	for (i = 1; i <= l; i++)
917 		a[f[i].serial] = f[i].value;
918 	for (i = 1; i <= l; i++)
919 		b[i] = a[i];
920 	free(a);
921 }
922 
923 static int
924 skipline(FILE *f)
925 {
926 	int i, c;
927 
928 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
929 		continue;
930 	return (i);
931 }
932 
933 static void
934 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
935 {
936 	int m, i0, i1, j0, j1;
937 
938 	rewind(f1);
939 	rewind(f2);
940 	m = len[0];
941 	J[0] = 0;
942 	J[m + 1] = len[1] + 1;
943 	if (diff_format != D_EDIT) {
944 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
945 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
946 				i0++;
947 			j0 = J[i0 - 1] + 1;
948 			i1 = i0 - 1;
949 			while (i1 < m && J[i1 + 1] == 0)
950 				i1++;
951 			j1 = J[i1 + 1] - 1;
952 			J[i1] = j1;
953 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
954 		}
955 	} else {
956 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
957 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
958 				i0--;
959 			j0 = J[i0 + 1] - 1;
960 			i1 = i0 + 1;
961 			while (i1 > 1 && J[i1 - 1] == 0)
962 				i1--;
963 			j1 = J[i1 - 1] + 1;
964 			J[i1] = j1;
965 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
966 		}
967 	}
968 	if (m == 0)
969 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
970 	if (diff_format == D_IFDEF) {
971 		for (;;) {
972 #define	c i0
973 			if ((c = getc(f1)) == EOF)
974 				return;
975 			diff_output("%c", c);
976 		}
977 #undef c
978 	}
979 	if (anychange != 0) {
980 		if (diff_format == D_CONTEXT)
981 			dump_context_vec(f1, f2, flags);
982 		else if (diff_format == D_UNIFIED)
983 			dump_unified_vec(f1, f2, flags);
984 	}
985 }
986 
987 static void
988 range(int a, int b, const char *separator)
989 {
990 	diff_output("%d", a > b ? b : a);
991 	if (a < b)
992 		diff_output("%s%d", separator, b);
993 }
994 
995 static void
996 uni_range(int a, int b)
997 {
998 	if (a < b)
999 		diff_output("%d,%d", a, b - a + 1);
1000 	else if (a == b)
1001 		diff_output("%d", b);
1002 	else
1003 		diff_output("%d,0", b);
1004 }
1005 
1006 static char *
1007 preadline(int fd, size_t rlen, off_t off)
1008 {
1009 	char *line;
1010 	ssize_t nr;
1011 
1012 	line = xmalloc(rlen + 1);
1013 	if ((nr = pread(fd, line, rlen, off)) < 0)
1014 		err(2, "preadline");
1015 	if (nr > 0 && line[nr-1] == '\n')
1016 		nr--;
1017 	line[nr] = '\0';
1018 	return (line);
1019 }
1020 
1021 static int
1022 ignoreline(char *line)
1023 {
1024 	int ret;
1025 
1026 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1027 	free(line);
1028 	return (ret == 0);	/* if it matched, it should be ignored. */
1029 }
1030 
1031 /*
1032  * Indicate that there is a difference between lines a and b of the from file
1033  * to get to lines c to d of the to file.  If a is greater then b then there
1034  * are no lines in the from file involved and this means that there were
1035  * lines appended (beginning at b).  If c is greater than d then there are
1036  * lines missing from the to file.
1037  */
1038 static void
1039 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1040     int *pflags)
1041 {
1042 	static size_t max_context = 64;
1043 	int i;
1044 
1045 restart:
1046 	if (diff_format != D_IFDEF && a > b && c > d)
1047 		return;
1048 	if (ignore_pats != NULL) {
1049 		char *line;
1050 		/*
1051 		 * All lines in the change, insert, or delete must
1052 		 * match an ignore pattern for the change to be
1053 		 * ignored.
1054 		 */
1055 		if (a <= b) {		/* Changes and deletes. */
1056 			for (i = a; i <= b; i++) {
1057 				line = preadline(fileno(f1),
1058 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1059 				if (!ignoreline(line))
1060 					goto proceed;
1061 			}
1062 		}
1063 		if (a > b || c <= d) {	/* Changes and inserts. */
1064 			for (i = c; i <= d; i++) {
1065 				line = preadline(fileno(f2),
1066 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1067 				if (!ignoreline(line))
1068 					goto proceed;
1069 			}
1070 		}
1071 		return;
1072 	}
1073 proceed:
1074 	if (*pflags & D_HEADER) {
1075 		diff_output("%s %s %s\n", diffargs, file1, file2);
1076 		*pflags &= ~D_HEADER;
1077 	}
1078 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1079 		/*
1080 		 * Allocate change records as needed.
1081 		 */
1082 		if (context_vec_ptr == context_vec_end - 1) {
1083 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1084 			max_context <<= 1;
1085 			context_vec_start = xreallocarray(context_vec_start,
1086 			    max_context, sizeof(*context_vec_start));
1087 			context_vec_end = context_vec_start + max_context;
1088 			context_vec_ptr = context_vec_start + offset;
1089 		}
1090 		if (anychange == 0) {
1091 			/*
1092 			 * Print the context/unidiff header first time through.
1093 			 */
1094 			print_header(file1, file2);
1095 			anychange = 1;
1096 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1097 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1098 			/*
1099 			 * If this change is more than 'diff_context' lines from the
1100 			 * previous change, dump the record and reset it.
1101 			 */
1102 			if (diff_format == D_CONTEXT)
1103 				dump_context_vec(f1, f2, *pflags);
1104 			else
1105 				dump_unified_vec(f1, f2, *pflags);
1106 		}
1107 		context_vec_ptr++;
1108 		context_vec_ptr->a = a;
1109 		context_vec_ptr->b = b;
1110 		context_vec_ptr->c = c;
1111 		context_vec_ptr->d = d;
1112 		return;
1113 	}
1114 	if (anychange == 0)
1115 		anychange = 1;
1116 	switch (diff_format) {
1117 	case D_BRIEF:
1118 		return;
1119 	case D_NORMAL:
1120 	case D_EDIT:
1121 		range(a, b, ",");
1122 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1123 		if (diff_format == D_NORMAL)
1124 			range(c, d, ",");
1125 		diff_output("\n");
1126 		break;
1127 	case D_REVERSE:
1128 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1129 		range(a, b, " ");
1130 		diff_output("\n");
1131 		break;
1132 	case D_NREVERSE:
1133 		if (a > b)
1134 			diff_output("a%d %d\n", b, d - c + 1);
1135 		else {
1136 			diff_output("d%d %d\n", a, b - a + 1);
1137 			if (!(c > d))
1138 				/* add changed lines */
1139 				diff_output("a%d %d\n", b, d - c + 1);
1140 		}
1141 		break;
1142 	}
1143 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1144 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1145 		if (a <= b && c <= d && diff_format == D_NORMAL)
1146 			diff_output("---\n");
1147 	}
1148 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1149 	if (i != 0 && diff_format == D_EDIT) {
1150 		/*
1151 		 * A non-zero return value for D_EDIT indicates that the
1152 		 * last line printed was a bare dot (".") that has been
1153 		 * escaped as ".." to prevent ed(1) from misinterpreting
1154 		 * it.  We have to add a substitute command to change this
1155 		 * back and restart where we left off.
1156 		 */
1157 		diff_output(".\n");
1158 		diff_output("%ds/.//\n", a + i - 1);
1159 		b = a + i - 1;
1160 		a = b + 1;
1161 		c += i;
1162 		goto restart;
1163 	}
1164 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1165 		diff_output(".\n");
1166 	if (inifdef) {
1167 		diff_output("#endif /* %s */\n", ifdefname);
1168 		inifdef = 0;
1169 	}
1170 }
1171 
1172 static int
1173 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1174 {
1175 	int i, j, c, lastc, col, nc;
1176 	int	newcol;
1177 
1178 	/*
1179 	 * When doing #ifdef's, copy down to current line
1180 	 * if this is the first file, so that stuff makes it to output.
1181 	 */
1182 	if (diff_format == D_IFDEF && oldfile) {
1183 		long curpos = ftell(lb);
1184 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1185 		nc = f[a > b ? b : a - 1] - curpos;
1186 		for (i = 0; i < nc; i++)
1187 			diff_output("%c", getc(lb));
1188 	}
1189 	if (a > b)
1190 		return (0);
1191 	if (diff_format == D_IFDEF) {
1192 		if (inifdef) {
1193 			diff_output("#else /* %s%s */\n",
1194 			    oldfile == 1 ? "!" : "", ifdefname);
1195 		} else {
1196 			if (oldfile)
1197 				diff_output("#ifndef %s\n", ifdefname);
1198 			else
1199 				diff_output("#ifdef %s\n", ifdefname);
1200 		}
1201 		inifdef = 1 + oldfile;
1202 	}
1203 	for (i = a; i <= b; i++) {
1204 		fseek(lb, f[i - 1], SEEK_SET);
1205 		nc = f[i] - f[i - 1];
1206 		if (diff_format != D_IFDEF && ch != '\0') {
1207 			diff_output("%c", ch);
1208 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1209 			    || diff_format == D_UNIFIED))
1210 				diff_output("\t");
1211 			else if (diff_format != D_UNIFIED)
1212 				diff_output(" ");
1213 		}
1214 		col = 0;
1215 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1216 			if ((c = getc(lb)) == EOF) {
1217 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1218 				    diff_format == D_NREVERSE)
1219 					warnx("No newline at end of file");
1220 				else
1221 					diff_output("\n\\ No newline at end of "
1222 					    "file\n");
1223 				return (0);
1224 			}
1225 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1226 				newcol = ((col/tabsize)+1)*tabsize;
1227 				do {
1228 					diff_output(" ");
1229 				} while (++col < newcol);
1230 			} else {
1231 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1232 				    && lastc == '.') {
1233 					/*
1234 					 * Don't print a bare "." line
1235 					 * since that will confuse ed(1).
1236 					 * Print ".." instead and return,
1237 					 * giving the caller an offset
1238 					 * from which to restart.
1239 					 */
1240 					diff_output(".\n");
1241 					return (i - a + 1);
1242 				}
1243 				diff_output("%c", c);
1244 				col++;
1245 			}
1246 		}
1247 	}
1248 	return (0);
1249 }
1250 
1251 /*
1252  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1253  */
1254 static int
1255 readhash(FILE *f, int flags)
1256 {
1257 	int i, t, space;
1258 	int sum;
1259 
1260 	sum = 1;
1261 	space = 0;
1262 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1263 		if (flags & D_IGNORECASE)
1264 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1265 				if (flags & D_STRIPCR && t == '\r') {
1266 					t = getc(f);
1267 					if (t == '\n')
1268 						break;
1269 					ungetc(t, f);
1270 				}
1271 				if (t == EOF) {
1272 					if (i == 0)
1273 						return (0);
1274 					break;
1275 				}
1276 				sum = sum * 127 + chrtran[t];
1277 			}
1278 		else
1279 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1280 				if (flags & D_STRIPCR && t == '\r') {
1281 					t = getc(f);
1282 					if (t == '\n')
1283 						break;
1284 					ungetc(t, f);
1285 				}
1286 				if (t == EOF) {
1287 					if (i == 0)
1288 						return (0);
1289 					break;
1290 				}
1291 				sum = sum * 127 + t;
1292 			}
1293 	} else {
1294 		for (i = 0;;) {
1295 			switch (t = getc(f)) {
1296 			case '\r':
1297 			case '\t':
1298 			case '\v':
1299 			case '\f':
1300 			case ' ':
1301 				space++;
1302 				continue;
1303 			default:
1304 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1305 					i++;
1306 					space = 0;
1307 				}
1308 				sum = sum * 127 + chrtran[t];
1309 				i++;
1310 				continue;
1311 			case EOF:
1312 				if (i == 0)
1313 					return (0);
1314 				/* FALLTHROUGH */
1315 			case '\n':
1316 				break;
1317 			}
1318 			break;
1319 		}
1320 	}
1321 	/*
1322 	 * There is a remote possibility that we end up with a zero sum.
1323 	 * Zero is used as an EOF marker, so return 1 instead.
1324 	 */
1325 	return (sum == 0 ? 1 : sum);
1326 }
1327 
1328 static int
1329 asciifile(FILE *f)
1330 {
1331 	unsigned char buf[BUFSIZ];
1332 	size_t cnt;
1333 
1334 	if (f == NULL)
1335 		return (1);
1336 
1337 	rewind(f);
1338 	cnt = fread(buf, 1, sizeof(buf), f);
1339 	return (memchr(buf, '\0', cnt) == NULL);
1340 }
1341 
1342 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1343 
1344 static char *
1345 match_function(const long *f, int pos, FILE *fp)
1346 {
1347 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1348 	size_t nc;
1349 	int last = lastline;
1350 	const char *state = NULL;
1351 
1352 	lastline = pos;
1353 	while (pos > last) {
1354 		fseek(fp, f[pos - 1], SEEK_SET);
1355 		nc = f[pos] - f[pos - 1];
1356 		if (nc >= sizeof(buf))
1357 			nc = sizeof(buf) - 1;
1358 		nc = fread(buf, 1, nc, fp);
1359 		if (nc > 0) {
1360 			buf[nc] = '\0';
1361 			buf[strcspn(buf, "\n")] = '\0';
1362 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1363 				if (begins_with(buf, "private:")) {
1364 					if (!state)
1365 						state = " (private)";
1366 				} else if (begins_with(buf, "protected:")) {
1367 					if (!state)
1368 						state = " (protected)";
1369 				} else if (begins_with(buf, "public:")) {
1370 					if (!state)
1371 						state = " (public)";
1372 				} else {
1373 					strlcpy(lastbuf, buf, sizeof lastbuf);
1374 					if (state)
1375 						strlcat(lastbuf, state,
1376 						    sizeof lastbuf);
1377 					lastmatchline = pos;
1378 					return lastbuf;
1379 				}
1380 			}
1381 		}
1382 		pos--;
1383 	}
1384 	return lastmatchline > 0 ? lastbuf : NULL;
1385 }
1386 
1387 /* dump accumulated "context" diff changes */
1388 static void
1389 dump_context_vec(FILE *f1, FILE *f2, int flags)
1390 {
1391 	struct context_vec *cvp = context_vec_start;
1392 	int lowa, upb, lowc, upd, do_output;
1393 	int a, b, c, d;
1394 	char ch, *f;
1395 
1396 	if (context_vec_start > context_vec_ptr)
1397 		return;
1398 
1399 	b = d = 0;		/* gcc */
1400 	lowa = MAXIMUM(1, cvp->a - diff_context);
1401 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1402 	lowc = MAXIMUM(1, cvp->c - diff_context);
1403 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1404 
1405 	diff_output("***************");
1406 	if ((flags & D_PROTOTYPE)) {
1407 		f = match_function(ixold, lowa-1, f1);
1408 		if (f != NULL)
1409 			diff_output(" %s", f);
1410 	}
1411 	diff_output("\n*** ");
1412 	range(lowa, upb, ",");
1413 	diff_output(" ****\n");
1414 
1415 	/*
1416 	 * Output changes to the "old" file.  The first loop suppresses
1417 	 * output if there were no changes to the "old" file (we'll see
1418 	 * the "old" lines as context in the "new" list).
1419 	 */
1420 	do_output = 0;
1421 	for (; cvp <= context_vec_ptr; cvp++)
1422 		if (cvp->a <= cvp->b) {
1423 			cvp = context_vec_start;
1424 			do_output++;
1425 			break;
1426 		}
1427 	if (do_output) {
1428 		while (cvp <= context_vec_ptr) {
1429 			a = cvp->a;
1430 			b = cvp->b;
1431 			c = cvp->c;
1432 			d = cvp->d;
1433 
1434 			if (a <= b && c <= d)
1435 				ch = 'c';
1436 			else
1437 				ch = (a <= b) ? 'd' : 'a';
1438 
1439 			if (ch == 'a')
1440 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1441 			else {
1442 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1443 				fetch(ixold, a, b, f1,
1444 				    ch == 'c' ? '!' : '-', 0, flags);
1445 			}
1446 			lowa = b + 1;
1447 			cvp++;
1448 		}
1449 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1450 	}
1451 	/* output changes to the "new" file */
1452 	diff_output("--- ");
1453 	range(lowc, upd, ",");
1454 	diff_output(" ----\n");
1455 
1456 	do_output = 0;
1457 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1458 		if (cvp->c <= cvp->d) {
1459 			cvp = context_vec_start;
1460 			do_output++;
1461 			break;
1462 		}
1463 	if (do_output) {
1464 		while (cvp <= context_vec_ptr) {
1465 			a = cvp->a;
1466 			b = cvp->b;
1467 			c = cvp->c;
1468 			d = cvp->d;
1469 
1470 			if (a <= b && c <= d)
1471 				ch = 'c';
1472 			else
1473 				ch = (a <= b) ? 'd' : 'a';
1474 
1475 			if (ch == 'd')
1476 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1477 			else {
1478 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1479 				fetch(ixnew, c, d, f2,
1480 				    ch == 'c' ? '!' : '+', 0, flags);
1481 			}
1482 			lowc = d + 1;
1483 			cvp++;
1484 		}
1485 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1486 	}
1487 	context_vec_ptr = context_vec_start - 1;
1488 }
1489 
1490 /* dump accumulated "unified" diff changes */
1491 static void
1492 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1493 {
1494 	struct context_vec *cvp = context_vec_start;
1495 	int lowa, upb, lowc, upd;
1496 	int a, b, c, d;
1497 	char ch, *f;
1498 
1499 	if (context_vec_start > context_vec_ptr)
1500 		return;
1501 
1502 	b = d = 0;		/* gcc */
1503 	lowa = MAXIMUM(1, cvp->a - diff_context);
1504 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1505 	lowc = MAXIMUM(1, cvp->c - diff_context);
1506 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1507 
1508 	diff_output("@@ -");
1509 	uni_range(lowa, upb);
1510 	diff_output(" +");
1511 	uni_range(lowc, upd);
1512 	diff_output(" @@");
1513 	if ((flags & D_PROTOTYPE)) {
1514 		f = match_function(ixold, lowa-1, f1);
1515 		if (f != NULL)
1516 			diff_output(" %s", f);
1517 	}
1518 	diff_output("\n");
1519 
1520 	/*
1521 	 * Output changes in "unified" diff format--the old and new lines
1522 	 * are printed together.
1523 	 */
1524 	for (; cvp <= context_vec_ptr; cvp++) {
1525 		a = cvp->a;
1526 		b = cvp->b;
1527 		c = cvp->c;
1528 		d = cvp->d;
1529 
1530 		/*
1531 		 * c: both new and old changes
1532 		 * d: only changes in the old file
1533 		 * a: only changes in the new file
1534 		 */
1535 		if (a <= b && c <= d)
1536 			ch = 'c';
1537 		else
1538 			ch = (a <= b) ? 'd' : 'a';
1539 
1540 		switch (ch) {
1541 		case 'c':
1542 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1543 			fetch(ixold, a, b, f1, '-', 0, flags);
1544 			fetch(ixnew, c, d, f2, '+', 0, flags);
1545 			break;
1546 		case 'd':
1547 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1548 			fetch(ixold, a, b, f1, '-', 0, flags);
1549 			break;
1550 		case 'a':
1551 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1552 			fetch(ixnew, c, d, f2, '+', 0, flags);
1553 			break;
1554 		}
1555 		lowa = b + 1;
1556 		lowc = d + 1;
1557 	}
1558 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1559 
1560 	context_vec_ptr = context_vec_start - 1;
1561 }
1562 
1563 static void
1564 print_header(const char *file1, const char *file2)
1565 {
1566 	const char *time_format;
1567 	char buf1[256];
1568 	char buf2[256];
1569 	char end1[10];
1570 	char end2[10];
1571 	struct tm *tm_ptr1, *tm_ptr2;
1572 	int nsec1 = TIMESPEC_NS (stb1.st_mtime);
1573 	int nsec2 = TIMESPEC_NS (stb2.st_mtime);
1574 
1575 #ifdef ST_MTIM_NSEC
1576 		time_format = "%Y-%m-%d %H:%M:%S.%N";
1577 #else
1578 		time_format = "%Y-%m-%d %H:%M:%S";
1579 #endif
1580 
1581 	if (cflag)
1582 		time_format = "%c";
1583 	tm_ptr1 = localtime(&stb1.st_mtime);
1584 	tm_ptr2 = localtime(&stb2.st_mtime);
1585 	strftime(buf1, 256, time_format, tm_ptr1);
1586 	strftime(buf2, 256, time_format, tm_ptr2);
1587 	if (!cflag) {
1588 		strftime(end1, 10, "%z", tm_ptr1);
1589 		strftime(end2, 10, "%z", tm_ptr2);
1590 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1591 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1592 	}
1593 	if (label[0] != NULL)
1594 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1595 		    label[0]);
1596 	else
1597 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1598 		    file1, buf1);
1599 	if (label[1] != NULL)
1600 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1601 		    label[1]);
1602 	else
1603 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1604 		    file2, buf2);
1605 }
1606