xref: /freebsd/usr.bin/diff/diffreg.c (revision 595a52b7711bda663df3277ea82c690a0661bdcd)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 #ifdef ST_MTIM_NSEC
98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC)
99 #else
100 # define TIMESPEC_NS(timespec) 0
101 #endif
102 
103 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
104 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
105 
106 /*
107  * diff - compare two files.
108  */
109 
110 /*
111  *	Uses an algorithm due to Harold Stone, which finds
112  *	a pair of longest identical subsequences in the two
113  *	files.
114  *
115  *	The major goal is to generate the match vector J.
116  *	J[i] is the index of the line in file1 corresponding
117  *	to line i file0. J[i] = 0 if there is no
118  *	such line in file1.
119  *
120  *	Lines are hashed so as to work in core. All potential
121  *	matches are located by sorting the lines of each file
122  *	on the hash (called ``value''). In particular, this
123  *	collects the equivalence classes in file1 together.
124  *	Subroutine equiv replaces the value of each line in
125  *	file0 by the index of the first element of its
126  *	matching equivalence in (the reordered) file1.
127  *	To save space equiv squeezes file1 into a single
128  *	array member in which the equivalence classes
129  *	are simply concatenated, except that their first
130  *	members are flagged by changing sign.
131  *
132  *	Next the indices that point into member are unsorted into
133  *	array class according to the original order of file0.
134  *
135  *	The cleverness lies in routine stone. This marches
136  *	through the lines of file0, developing a vector klist
137  *	of "k-candidates". At step i a k-candidate is a matched
138  *	pair of lines x,y (x in file0 y in file1) such that
139  *	there is a common subsequence of length k
140  *	between the first i lines of file0 and the first y
141  *	lines of file1, but there is no such subsequence for
142  *	any smaller y. x is the earliest possible mate to y
143  *	that occurs in such a subsequence.
144  *
145  *	Whenever any of the members of the equivalence class of
146  *	lines in file1 matable to a line in file0 has serial number
147  *	less than the y of some k-candidate, that k-candidate
148  *	with the smallest such y is replaced. The new
149  *	k-candidate is chained (via pred) to the current
150  *	k-1 candidate so that the actual subsequence can
151  *	be recovered. When a member has serial number greater
152  *	that the y of all k-candidates, the klist is extended.
153  *	At the end, the longest subsequence is pulled out
154  *	and placed in the array J by unravel
155  *
156  *	With J in hand, the matches there recorded are
157  *	check'ed against reality to assure that no spurious
158  *	matches have crept in due to hashing. If they have,
159  *	they are broken, and "jackpot" is recorded--a harmless
160  *	matter except that a true match for a spuriously
161  *	mated line may now be unnecessarily reported as a change.
162  *
163  *	Much of the complexity of the program comes simply
164  *	from trying to minimize core utilization and
165  *	maximize the range of doable problems by dynamically
166  *	allocating what is needed and reusing what is not.
167  *	The core requirements for problems larger than somewhat
168  *	are (in words) 2*length(file0) + length(file1) +
169  *	3*(number of k-candidates installed),  typically about
170  *	6n words for files of length n.
171  */
172 
173 struct cand {
174 	int	x;
175 	int	y;
176 	int	pred;
177 };
178 
179 static struct line {
180 	int	serial;
181 	int	value;
182 } *file[2];
183 
184 /*
185  * The following struct is used to record change information when
186  * doing a "context" or "unified" diff.  (see routine "change" to
187  * understand the highly mnemonic field names)
188  */
189 struct context_vec {
190 	int	a;		/* start line in old file */
191 	int	b;		/* end line in old file */
192 	int	c;		/* start line in new file */
193 	int	d;		/* end line in new file */
194 };
195 
196 #define	diff_output	printf
197 static void	 output(char *, FILE *, char *, FILE *, int);
198 static void	 check(FILE *, FILE *, int);
199 static void	 range(int, int, const char *);
200 static void	 uni_range(int, int);
201 static void	 dump_context_vec(FILE *, FILE *, int);
202 static void	 dump_unified_vec(FILE *, FILE *, int);
203 static void	 prepare(int, FILE *, off_t, int);
204 static void	 prune(void);
205 static void	 equiv(struct line *, int, struct line *, int, int *);
206 static void	 unravel(int);
207 static void	 unsort(struct line *, int, int *);
208 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
209 static void	 sort(struct line *, int);
210 static void	 print_header(const char *, const char *);
211 static int	 ignoreline(char *);
212 static int	 asciifile(FILE *);
213 static int	 fetch(long *, int, int, FILE *, int, int, int);
214 static int	 newcand(int, int, int);
215 static int	 search(int *, int, int);
216 static int	 skipline(FILE *);
217 static int	 isqrt(int);
218 static int	 stone(int *, int, int *, int *, int);
219 static int	 readhash(FILE *, int);
220 static int	 files_differ(FILE *, FILE *, int);
221 static char	*match_function(const long *, int, FILE *);
222 static char	*preadline(int, size_t, off_t);
223 
224 static int  *J;			/* will be overlaid on class */
225 static int  *class;		/* will be overlaid on file[0] */
226 static int  *klist;		/* will be overlaid on file[0] after class */
227 static int  *member;		/* will be overlaid on file[1] */
228 static int   clen;
229 static int   inifdef;		/* whether or not we are in a #ifdef block */
230 static int   len[2];
231 static int   pref, suff;	/* length of prefix and suffix */
232 static int   slen[2];
233 static int   anychange;
234 static long *ixnew;		/* will be overlaid on file[1] */
235 static long *ixold;		/* will be overlaid on klist */
236 static struct cand *clist;	/* merely a free storage pot for candidates */
237 static int   clistlen;		/* the length of clist */
238 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
239 static u_char *chrtran;		/* translation table for case-folding */
240 static struct context_vec *context_vec_start;
241 static struct context_vec *context_vec_end;
242 static struct context_vec *context_vec_ptr;
243 
244 #define FUNCTION_CONTEXT_SIZE	55
245 static char lastbuf[FUNCTION_CONTEXT_SIZE];
246 static int lastline;
247 static int lastmatchline;
248 
249 
250 /*
251  * chrtran points to one of 2 translation tables: cup2low if folding upper to
252  * lower case clow2low if not folding case
253  */
254 static u_char clow2low[256] = {
255 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
256 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
257 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
258 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
259 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
260 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
261 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
262 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
263 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
264 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
265 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
266 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
267 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
268 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
269 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
270 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
271 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
272 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
273 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
274 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
275 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
276 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
277 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
278 	0xfd, 0xfe, 0xff
279 };
280 
281 static u_char cup2low[256] = {
282 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
283 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
284 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
285 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
286 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
287 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
288 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
289 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
290 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
291 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
292 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
293 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
294 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
295 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
296 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
297 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
298 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
299 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
300 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
301 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
302 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
303 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
304 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
305 	0xfd, 0xfe, 0xff
306 };
307 
308 int
309 diffreg(char *file1, char *file2, int flags, int capsicum)
310 {
311 	FILE *f1, *f2;
312 	int i, rval;
313 	int	ostdout = -1;
314 	int pr_pd, kq;
315 	struct kevent *e;
316 	cap_rights_t rights_ro;
317 
318 	f1 = f2 = NULL;
319 	rval = D_SAME;
320 	anychange = 0;
321 	lastline = 0;
322 	lastmatchline = 0;
323 	context_vec_ptr = context_vec_start - 1;
324 	if (flags & D_IGNORECASE)
325 		chrtran = cup2low;
326 	else
327 		chrtran = clow2low;
328 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
329 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
330 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
331 		goto closem;
332 
333 	if (flags & D_EMPTY1)
334 		f1 = fopen(_PATH_DEVNULL, "r");
335 	else {
336 		if (strcmp(file1, "-") == 0)
337 			f1 = stdin;
338 		else
339 			f1 = fopen(file1, "r");
340 	}
341 	if (f1 == NULL) {
342 		warn("%s", file1);
343 		status |= 2;
344 		goto closem;
345 	}
346 
347 	if (flags & D_EMPTY2)
348 		f2 = fopen(_PATH_DEVNULL, "r");
349 	else {
350 		if (strcmp(file2, "-") == 0)
351 			f2 = stdin;
352 		else
353 			f2 = fopen(file2, "r");
354 	}
355 	if (f2 == NULL) {
356 		warn("%s", file2);
357 		status |= 2;
358 		goto closem;
359 	}
360 
361 	if (lflag) {
362 		/* redirect stdout to pr */
363 		int	 pfd[2];
364 		pid_t	pid;
365 		char	*header;
366 
367 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
368 		signal(SIGPIPE, SIG_IGN);
369 		fflush(stdout);
370 		rewind(stdout);
371 		pipe(pfd);
372 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
373 		case -1:
374 			status |= 2;
375 			free(header);
376 			err(2, "No more processes");
377 		case 0:
378 			/* child */
379 			if (pfd[0] != STDIN_FILENO) {
380 				dup2(pfd[0], STDIN_FILENO);
381 				close(pfd[0]);
382 			}
383 			close(pfd[1]);
384 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
385 			_exit(127);
386 		default:
387 
388 			/* parent */
389 			if (pfd[1] != STDOUT_FILENO) {
390 				ostdout = dup(STDOUT_FILENO);
391 				dup2(pfd[1], STDOUT_FILENO);
392 				close(pfd[1]);
393 			}
394 			close(pfd[0]);
395 			rewind(stdout);
396 			free(header);
397 			kq = kqueue();
398 			if (kq == -1)
399 				err(2, "kqueue");
400 			e = xmalloc(sizeof(struct kevent));
401 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
402 			    NULL);
403 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
404 				err(2, "kevent");
405 		}
406 	}
407 
408 	if (capsicum) {
409 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
410 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0)
411 			err(2, "unable to limit rights on: %s", file1);
412 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0)
413 			err(2, "unable to limit rights on: %s", file2);
414 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
415 			/* stding has already been limited */
416 			if (caph_limit_stderr() == -1)
417 				err(2, "unable to limit stderr");
418 			if (caph_limit_stdout() == -1)
419 				err(2, "unable to limit stdout");
420 		} else if (caph_limit_stdio() == -1)
421 				err(2, "unable to limit stdio");
422 
423 		caph_cache_catpages();
424 		if (cap_enter() < 0 && errno != ENOSYS)
425 			err(2, "unable to enter capability mode");
426 	}
427 
428 	switch (files_differ(f1, f2, flags)) {
429 	case 0:
430 		goto closem;
431 	case 1:
432 		break;
433 	default:
434 		/* error */
435 		status |= 2;
436 		goto closem;
437 	}
438 
439 	if ((flags & D_FORCEASCII) == 0 &&
440 	    (!asciifile(f1) || !asciifile(f2))) {
441 		rval = D_BINARY;
442 		status |= 1;
443 		goto closem;
444 	}
445 	prepare(0, f1, stb1.st_size, flags);
446 	prepare(1, f2, stb2.st_size, flags);
447 
448 	prune();
449 	sort(sfile[0], slen[0]);
450 	sort(sfile[1], slen[1]);
451 
452 	member = (int *)file[1];
453 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
454 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
455 
456 	class = (int *)file[0];
457 	unsort(sfile[0], slen[0], class);
458 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
459 
460 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
461 	clen = 0;
462 	clistlen = 100;
463 	clist = xcalloc(clistlen, sizeof(*clist));
464 	i = stone(class, slen[0], member, klist, flags);
465 	free(member);
466 	free(class);
467 
468 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
469 	unravel(klist[i]);
470 	free(clist);
471 	free(klist);
472 
473 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
474 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
475 	check(f1, f2, flags);
476 	output(file1, f1, file2, f2, flags);
477 	if (ostdout != -1) {
478 		/* close the pipe to pr and restore stdout */
479 		int wstatus;
480 
481 		fflush(stdout);
482 		if (ostdout != STDOUT_FILENO) {
483 			close(STDOUT_FILENO);
484 			dup2(ostdout, STDOUT_FILENO);
485 			close(ostdout);
486 		}
487 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
488 			err(2, "kevent");
489 		wstatus = e[0].data;
490 		close(kq);
491 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
492 			errx(2, "pr exited abnormally");
493 		else if (WIFSIGNALED(wstatus))
494 			errx(2, "pr killed by signal %d",
495 			    WTERMSIG(wstatus));
496 	}
497 
498 closem:
499 	if (anychange) {
500 		status |= 1;
501 		if (rval == D_SAME)
502 			rval = D_DIFFER;
503 	}
504 	if (f1 != NULL)
505 		fclose(f1);
506 	if (f2 != NULL)
507 		fclose(f2);
508 
509 	return (rval);
510 }
511 
512 /*
513  * Check to see if the given files differ.
514  * Returns 0 if they are the same, 1 if different, and -1 on error.
515  * XXX - could use code from cmp(1) [faster]
516  */
517 static int
518 files_differ(FILE *f1, FILE *f2, int flags)
519 {
520 	char buf1[BUFSIZ], buf2[BUFSIZ];
521 	size_t i, j;
522 
523 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
524 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
525 		return (1);
526 	for (;;) {
527 		i = fread(buf1, 1, sizeof(buf1), f1);
528 		j = fread(buf2, 1, sizeof(buf2), f2);
529 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
530 			return (-1);
531 		if (i != j)
532 			return (1);
533 		if (i == 0)
534 			return (0);
535 		if (memcmp(buf1, buf2, i) != 0)
536 			return (1);
537 	}
538 }
539 
540 char *
541 splice(char *dir, char *path)
542 {
543 	char *tail, *buf;
544 	size_t dirlen;
545 
546 	dirlen = strlen(dir);
547 	while (dirlen != 0 && dir[dirlen - 1] == '/')
548 	    dirlen--;
549 	if ((tail = strrchr(path, '/')) == NULL)
550 		tail = path;
551 	else
552 		tail++;
553 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
554 	return (buf);
555 }
556 
557 static void
558 prepare(int i, FILE *fd, off_t filesize, int flags)
559 {
560 	struct line *p;
561 	int h;
562 	size_t sz, j;
563 
564 	rewind(fd);
565 
566 	sz = ((unsigned long)filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
567 	if (sz < 100)
568 		sz = 100;
569 
570 	p = xcalloc(sz + 3, sizeof(*p));
571 	for (j = 0; (h = readhash(fd, flags));) {
572 		if (j == sz) {
573 			sz = sz * 3 / 2;
574 			p = xreallocarray(p, sz + 3, sizeof(*p));
575 		}
576 		p[++j].value = h;
577 	}
578 	len[i] = j;
579 	file[i] = p;
580 }
581 
582 static void
583 prune(void)
584 {
585 	int i, j;
586 
587 	for (pref = 0; pref < len[0] && pref < len[1] &&
588 	    file[0][pref + 1].value == file[1][pref + 1].value;
589 	    pref++)
590 		;
591 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
592 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
593 	    suff++)
594 		;
595 	for (j = 0; j < 2; j++) {
596 		sfile[j] = file[j] + pref;
597 		slen[j] = len[j] - pref - suff;
598 		for (i = 0; i <= slen[j]; i++)
599 			sfile[j][i].serial = i;
600 	}
601 }
602 
603 static void
604 equiv(struct line *a, int n, struct line *b, int m, int *c)
605 {
606 	int i, j;
607 
608 	i = j = 1;
609 	while (i <= n && j <= m) {
610 		if (a[i].value < b[j].value)
611 			a[i++].value = 0;
612 		else if (a[i].value == b[j].value)
613 			a[i++].value = j;
614 		else
615 			j++;
616 	}
617 	while (i <= n)
618 		a[i++].value = 0;
619 	b[m + 1].value = 0;
620 	j = 0;
621 	while (++j <= m) {
622 		c[j] = -b[j].serial;
623 		while (b[j + 1].value == b[j].value) {
624 			j++;
625 			c[j] = b[j].serial;
626 		}
627 	}
628 	c[j] = -1;
629 }
630 
631 /* Code taken from ping.c */
632 static int
633 isqrt(int n)
634 {
635 	int y, x = 1;
636 
637 	if (n == 0)
638 		return (0);
639 
640 	do { /* newton was a stinker */
641 		y = x;
642 		x = n / x;
643 		x += y;
644 		x /= 2;
645 	} while ((x - y) > 1 || (x - y) < -1);
646 
647 	return (x);
648 }
649 
650 static int
651 stone(int *a, int n, int *b, int *c, int flags)
652 {
653 	int i, k, y, j, l;
654 	int oldc, tc, oldl, sq;
655 	u_int numtries, bound;
656 
657 	if (flags & D_MINIMAL)
658 		bound = UINT_MAX;
659 	else {
660 		sq = isqrt(n);
661 		bound = MAXIMUM(256, sq);
662 	}
663 
664 	k = 0;
665 	c[0] = newcand(0, 0, 0);
666 	for (i = 1; i <= n; i++) {
667 		j = a[i];
668 		if (j == 0)
669 			continue;
670 		y = -b[j];
671 		oldl = 0;
672 		oldc = c[0];
673 		numtries = 0;
674 		do {
675 			if (y <= clist[oldc].y)
676 				continue;
677 			l = search(c, k, y);
678 			if (l != oldl + 1)
679 				oldc = c[l - 1];
680 			if (l <= k) {
681 				if (clist[c[l]].y <= y)
682 					continue;
683 				tc = c[l];
684 				c[l] = newcand(i, y, oldc);
685 				oldc = tc;
686 				oldl = l;
687 				numtries++;
688 			} else {
689 				c[l] = newcand(i, y, oldc);
690 				k++;
691 				break;
692 			}
693 		} while ((y = b[++j]) > 0 && numtries < bound);
694 	}
695 	return (k);
696 }
697 
698 static int
699 newcand(int x, int y, int pred)
700 {
701 	struct cand *q;
702 
703 	if (clen == clistlen) {
704 		clistlen = clistlen * 11 / 10;
705 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
706 	}
707 	q = clist + clen;
708 	q->x = x;
709 	q->y = y;
710 	q->pred = pred;
711 	return (clen++);
712 }
713 
714 static int
715 search(int *c, int k, int y)
716 {
717 	int i, j, l, t;
718 
719 	if (clist[c[k]].y < y)	/* quick look for typical case */
720 		return (k + 1);
721 	i = 0;
722 	j = k + 1;
723 	for (;;) {
724 		l = (i + j) / 2;
725 		if (l <= i)
726 			break;
727 		t = clist[c[l]].y;
728 		if (t > y)
729 			j = l;
730 		else if (t < y)
731 			i = l;
732 		else
733 			return (l);
734 	}
735 	return (l + 1);
736 }
737 
738 static void
739 unravel(int p)
740 {
741 	struct cand *q;
742 	int i;
743 
744 	for (i = 0; i <= len[0]; i++)
745 		J[i] = i <= pref ? i :
746 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
747 	for (q = clist + p; q->y != 0; q = clist + q->pred)
748 		J[q->x + pref] = q->y + pref;
749 }
750 
751 /*
752  * Check does double duty:
753  *  1.	ferret out any fortuitous correspondences due
754  *	to confounding by hashing (which result in "jackpot")
755  *  2.  collect random access indexes to the two files
756  */
757 static void
758 check(FILE *f1, FILE *f2, int flags)
759 {
760 	int i, j, jackpot, c, d;
761 	long ctold, ctnew;
762 
763 	rewind(f1);
764 	rewind(f2);
765 	j = 1;
766 	ixold[0] = ixnew[0] = 0;
767 	jackpot = 0;
768 	ctold = ctnew = 0;
769 	for (i = 1; i <= len[0]; i++) {
770 		if (J[i] == 0) {
771 			ixold[i] = ctold += skipline(f1);
772 			continue;
773 		}
774 		while (j < J[i]) {
775 			ixnew[j] = ctnew += skipline(f2);
776 			j++;
777 		}
778 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
779 			for (;;) {
780 				c = getc(f1);
781 				d = getc(f2);
782 				/*
783 				 * GNU diff ignores a missing newline
784 				 * in one file for -b or -w.
785 				 */
786 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
787 					if (c == EOF && d == '\n') {
788 						ctnew++;
789 						break;
790 					} else if (c == '\n' && d == EOF) {
791 						ctold++;
792 						break;
793 					}
794 				}
795 				ctold++;
796 				ctnew++;
797 				if (flags & D_STRIPCR) {
798 					if (c == '\r') {
799 						if ((c = getc(f1)) == '\n') {
800 							ctnew++;
801 							break;
802 						}
803 					}
804 					if (d == '\r') {
805 						if ((d = getc(f2)) == '\n') {
806 							ctold++;
807 							break;
808 						}
809 					}
810 				}
811 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
812 				    isspace(d)) {
813 					do {
814 						if (c == '\n')
815 							break;
816 						ctold++;
817 					} while (isspace(c = getc(f1)));
818 					do {
819 						if (d == '\n')
820 							break;
821 						ctnew++;
822 					} while (isspace(d = getc(f2)));
823 				} else if ((flags & D_IGNOREBLANKS)) {
824 					while (isspace(c) && c != '\n') {
825 						c = getc(f1);
826 						ctold++;
827 					}
828 					while (isspace(d) && d != '\n') {
829 						d = getc(f2);
830 						ctnew++;
831 					}
832 				}
833 				if (chrtran[c] != chrtran[d]) {
834 					jackpot++;
835 					J[i] = 0;
836 					if (c != '\n' && c != EOF)
837 						ctold += skipline(f1);
838 					if (d != '\n' && c != EOF)
839 						ctnew += skipline(f2);
840 					break;
841 				}
842 				if (c == '\n' || c == EOF)
843 					break;
844 			}
845 		} else {
846 			for (;;) {
847 				ctold++;
848 				ctnew++;
849 				if ((c = getc(f1)) != (d = getc(f2))) {
850 					/* jackpot++; */
851 					J[i] = 0;
852 					if (c != '\n' && c != EOF)
853 						ctold += skipline(f1);
854 					if (d != '\n' && c != EOF)
855 						ctnew += skipline(f2);
856 					break;
857 				}
858 				if (c == '\n' || c == EOF)
859 					break;
860 			}
861 		}
862 		ixold[i] = ctold;
863 		ixnew[j] = ctnew;
864 		j++;
865 	}
866 	for (; j <= len[1]; j++) {
867 		ixnew[j] = ctnew += skipline(f2);
868 	}
869 	/*
870 	 * if (jackpot)
871 	 *	fprintf(stderr, "jackpot\n");
872 	 */
873 }
874 
875 /* shellsort CACM #201 */
876 static void
877 sort(struct line *a, int n)
878 {
879 	struct line *ai, *aim, w;
880 	int j, m = 0, k;
881 
882 	if (n == 0)
883 		return;
884 	for (j = 1; j <= n; j *= 2)
885 		m = 2 * j - 1;
886 	for (m /= 2; m != 0; m /= 2) {
887 		k = n - m;
888 		for (j = 1; j <= k; j++) {
889 			for (ai = &a[j]; ai > a; ai -= m) {
890 				aim = &ai[m];
891 				if (aim < ai)
892 					break;	/* wraparound */
893 				if (aim->value > ai[0].value ||
894 				    (aim->value == ai[0].value &&
895 					aim->serial > ai[0].serial))
896 					break;
897 				w.value = ai[0].value;
898 				ai[0].value = aim->value;
899 				aim->value = w.value;
900 				w.serial = ai[0].serial;
901 				ai[0].serial = aim->serial;
902 				aim->serial = w.serial;
903 			}
904 		}
905 	}
906 }
907 
908 static void
909 unsort(struct line *f, int l, int *b)
910 {
911 	int *a, i;
912 
913 	a = xcalloc(l + 1, sizeof(*a));
914 	for (i = 1; i <= l; i++)
915 		a[f[i].serial] = f[i].value;
916 	for (i = 1; i <= l; i++)
917 		b[i] = a[i];
918 	free(a);
919 }
920 
921 static int
922 skipline(FILE *f)
923 {
924 	int i, c;
925 
926 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
927 		continue;
928 	return (i);
929 }
930 
931 static void
932 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
933 {
934 	int m, i0, i1, j0, j1;
935 
936 	rewind(f1);
937 	rewind(f2);
938 	m = len[0];
939 	J[0] = 0;
940 	J[m + 1] = len[1] + 1;
941 	if (diff_format != D_EDIT) {
942 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
943 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
944 				i0++;
945 			j0 = J[i0 - 1] + 1;
946 			i1 = i0 - 1;
947 			while (i1 < m && J[i1 + 1] == 0)
948 				i1++;
949 			j1 = J[i1 + 1] - 1;
950 			J[i1] = j1;
951 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
952 		}
953 	} else {
954 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
955 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
956 				i0--;
957 			j0 = J[i0 + 1] - 1;
958 			i1 = i0 + 1;
959 			while (i1 > 1 && J[i1 - 1] == 0)
960 				i1--;
961 			j1 = J[i1 - 1] + 1;
962 			J[i1] = j1;
963 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
964 		}
965 	}
966 	if (m == 0)
967 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
968 	if (diff_format == D_IFDEF) {
969 		for (;;) {
970 #define	c i0
971 			if ((c = getc(f1)) == EOF)
972 				return;
973 			diff_output("%c", c);
974 		}
975 #undef c
976 	}
977 	if (anychange != 0) {
978 		if (diff_format == D_CONTEXT)
979 			dump_context_vec(f1, f2, flags);
980 		else if (diff_format == D_UNIFIED)
981 			dump_unified_vec(f1, f2, flags);
982 	}
983 }
984 
985 static void
986 range(int a, int b, const char *separator)
987 {
988 	diff_output("%d", a > b ? b : a);
989 	if (a < b)
990 		diff_output("%s%d", separator, b);
991 }
992 
993 static void
994 uni_range(int a, int b)
995 {
996 	if (a < b)
997 		diff_output("%d,%d", a, b - a + 1);
998 	else if (a == b)
999 		diff_output("%d", b);
1000 	else
1001 		diff_output("%d,0", b);
1002 }
1003 
1004 static char *
1005 preadline(int fd, size_t rlen, off_t off)
1006 {
1007 	char *line;
1008 	ssize_t nr;
1009 
1010 	line = xmalloc(rlen + 1);
1011 	if ((nr = pread(fd, line, rlen, off)) < 0)
1012 		err(2, "preadline");
1013 	if (nr > 0 && line[nr-1] == '\n')
1014 		nr--;
1015 	line[nr] = '\0';
1016 	return (line);
1017 }
1018 
1019 static int
1020 ignoreline(char *line)
1021 {
1022 	int ret;
1023 
1024 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1025 	free(line);
1026 	return (ret == 0);	/* if it matched, it should be ignored. */
1027 }
1028 
1029 /*
1030  * Indicate that there is a difference between lines a and b of the from file
1031  * to get to lines c to d of the to file.  If a is greater then b then there
1032  * are no lines in the from file involved and this means that there were
1033  * lines appended (beginning at b).  If c is greater than d then there are
1034  * lines missing from the to file.
1035  */
1036 static void
1037 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1038     int *pflags)
1039 {
1040 	static size_t max_context = 64;
1041 	int i;
1042 
1043 restart:
1044 	if (diff_format != D_IFDEF && a > b && c > d)
1045 		return;
1046 	if (ignore_pats != NULL) {
1047 		char *line;
1048 		/*
1049 		 * All lines in the change, insert, or delete must
1050 		 * match an ignore pattern for the change to be
1051 		 * ignored.
1052 		 */
1053 		if (a <= b) {		/* Changes and deletes. */
1054 			for (i = a; i <= b; i++) {
1055 				line = preadline(fileno(f1),
1056 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1057 				if (!ignoreline(line))
1058 					goto proceed;
1059 			}
1060 		}
1061 		if (a > b || c <= d) {	/* Changes and inserts. */
1062 			for (i = c; i <= d; i++) {
1063 				line = preadline(fileno(f2),
1064 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1065 				if (!ignoreline(line))
1066 					goto proceed;
1067 			}
1068 		}
1069 		return;
1070 	}
1071 proceed:
1072 	if (*pflags & D_HEADER) {
1073 		diff_output("%s %s %s\n", diffargs, file1, file2);
1074 		*pflags &= ~D_HEADER;
1075 	}
1076 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1077 		/*
1078 		 * Allocate change records as needed.
1079 		 */
1080 		if (context_vec_ptr == context_vec_end - 1) {
1081 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1082 			max_context <<= 1;
1083 			context_vec_start = xreallocarray(context_vec_start,
1084 			    max_context, sizeof(*context_vec_start));
1085 			context_vec_end = context_vec_start + max_context;
1086 			context_vec_ptr = context_vec_start + offset;
1087 		}
1088 		if (anychange == 0) {
1089 			/*
1090 			 * Print the context/unidiff header first time through.
1091 			 */
1092 			print_header(file1, file2);
1093 			anychange = 1;
1094 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1095 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1096 			/*
1097 			 * If this change is more than 'diff_context' lines from the
1098 			 * previous change, dump the record and reset it.
1099 			 */
1100 			if (diff_format == D_CONTEXT)
1101 				dump_context_vec(f1, f2, *pflags);
1102 			else
1103 				dump_unified_vec(f1, f2, *pflags);
1104 		}
1105 		context_vec_ptr++;
1106 		context_vec_ptr->a = a;
1107 		context_vec_ptr->b = b;
1108 		context_vec_ptr->c = c;
1109 		context_vec_ptr->d = d;
1110 		return;
1111 	}
1112 	if (anychange == 0)
1113 		anychange = 1;
1114 	switch (diff_format) {
1115 	case D_BRIEF:
1116 		return;
1117 	case D_NORMAL:
1118 	case D_EDIT:
1119 		range(a, b, ",");
1120 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1121 		if (diff_format == D_NORMAL)
1122 			range(c, d, ",");
1123 		diff_output("\n");
1124 		break;
1125 	case D_REVERSE:
1126 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1127 		range(a, b, " ");
1128 		diff_output("\n");
1129 		break;
1130 	case D_NREVERSE:
1131 		if (a > b)
1132 			diff_output("a%d %d\n", b, d - c + 1);
1133 		else {
1134 			diff_output("d%d %d\n", a, b - a + 1);
1135 			if (!(c > d))
1136 				/* add changed lines */
1137 				diff_output("a%d %d\n", b, d - c + 1);
1138 		}
1139 		break;
1140 	}
1141 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1142 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1143 		if (a <= b && c <= d && diff_format == D_NORMAL)
1144 			diff_output("---\n");
1145 	}
1146 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1147 	if (i != 0 && diff_format == D_EDIT) {
1148 		/*
1149 		 * A non-zero return value for D_EDIT indicates that the
1150 		 * last line printed was a bare dot (".") that has been
1151 		 * escaped as ".." to prevent ed(1) from misinterpreting
1152 		 * it.  We have to add a substitute command to change this
1153 		 * back and restart where we left off.
1154 		 */
1155 		diff_output(".\n");
1156 		diff_output("%ds/.//\n", a + i - 1);
1157 		b = a + i - 1;
1158 		a = b + 1;
1159 		c += i;
1160 		goto restart;
1161 	}
1162 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1163 		diff_output(".\n");
1164 	if (inifdef) {
1165 		diff_output("#endif /* %s */\n", ifdefname);
1166 		inifdef = 0;
1167 	}
1168 }
1169 
1170 static int
1171 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1172 {
1173 	int i, j, c, lastc, col, nc;
1174 	int	newcol;
1175 
1176 	/*
1177 	 * When doing #ifdef's, copy down to current line
1178 	 * if this is the first file, so that stuff makes it to output.
1179 	 */
1180 	if (diff_format == D_IFDEF && oldfile) {
1181 		long curpos = ftell(lb);
1182 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1183 		nc = f[a > b ? b : a - 1] - curpos;
1184 		for (i = 0; i < nc; i++)
1185 			diff_output("%c", getc(lb));
1186 	}
1187 	if (a > b)
1188 		return (0);
1189 	if (diff_format == D_IFDEF) {
1190 		if (inifdef) {
1191 			diff_output("#else /* %s%s */\n",
1192 			    oldfile == 1 ? "!" : "", ifdefname);
1193 		} else {
1194 			if (oldfile)
1195 				diff_output("#ifndef %s\n", ifdefname);
1196 			else
1197 				diff_output("#ifdef %s\n", ifdefname);
1198 		}
1199 		inifdef = 1 + oldfile;
1200 	}
1201 	for (i = a; i <= b; i++) {
1202 		fseek(lb, f[i - 1], SEEK_SET);
1203 		nc = f[i] - f[i - 1];
1204 		if (diff_format != D_IFDEF && ch != '\0') {
1205 			diff_output("%c", ch);
1206 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1207 			    || diff_format == D_UNIFIED))
1208 				diff_output("\t");
1209 			else if (diff_format != D_UNIFIED)
1210 				diff_output(" ");
1211 		}
1212 		col = 0;
1213 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1214 			if ((c = getc(lb)) == EOF) {
1215 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1216 				    diff_format == D_NREVERSE)
1217 					warnx("No newline at end of file");
1218 				else
1219 					diff_output("\n\\ No newline at end of "
1220 					    "file\n");
1221 				return (0);
1222 			}
1223 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1224 				newcol = ((col/tabsize)+1)*tabsize;
1225 				do {
1226 					diff_output(" ");
1227 				} while (++col < newcol);
1228 			} else {
1229 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1230 				    && lastc == '.') {
1231 					/*
1232 					 * Don't print a bare "." line
1233 					 * since that will confuse ed(1).
1234 					 * Print ".." instead and return,
1235 					 * giving the caller an offset
1236 					 * from which to restart.
1237 					 */
1238 					diff_output(".\n");
1239 					return (i - a + 1);
1240 				}
1241 				diff_output("%c", c);
1242 				col++;
1243 			}
1244 		}
1245 	}
1246 	return (0);
1247 }
1248 
1249 /*
1250  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1251  */
1252 static int
1253 readhash(FILE *f, int flags)
1254 {
1255 	int i, t, space;
1256 	int sum;
1257 
1258 	sum = 1;
1259 	space = 0;
1260 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1261 		if (flags & D_IGNORECASE)
1262 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1263 				if (flags & D_STRIPCR && t == '\r') {
1264 					t = getc(f);
1265 					if (t == '\n')
1266 						break;
1267 					ungetc(t, f);
1268 				}
1269 				if (t == EOF) {
1270 					if (i == 0)
1271 						return (0);
1272 					break;
1273 				}
1274 				sum = sum * 127 + chrtran[t];
1275 			}
1276 		else
1277 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1278 				if (flags & D_STRIPCR && t == '\r') {
1279 					t = getc(f);
1280 					if (t == '\n')
1281 						break;
1282 					ungetc(t, f);
1283 				}
1284 				if (t == EOF) {
1285 					if (i == 0)
1286 						return (0);
1287 					break;
1288 				}
1289 				sum = sum * 127 + t;
1290 			}
1291 	} else {
1292 		for (i = 0;;) {
1293 			switch (t = getc(f)) {
1294 			case '\r':
1295 			case '\t':
1296 			case '\v':
1297 			case '\f':
1298 			case ' ':
1299 				space++;
1300 				continue;
1301 			default:
1302 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1303 					i++;
1304 					space = 0;
1305 				}
1306 				sum = sum * 127 + chrtran[t];
1307 				i++;
1308 				continue;
1309 			case EOF:
1310 				if (i == 0)
1311 					return (0);
1312 				/* FALLTHROUGH */
1313 			case '\n':
1314 				break;
1315 			}
1316 			break;
1317 		}
1318 	}
1319 	/*
1320 	 * There is a remote possibility that we end up with a zero sum.
1321 	 * Zero is used as an EOF marker, so return 1 instead.
1322 	 */
1323 	return (sum == 0 ? 1 : sum);
1324 }
1325 
1326 static int
1327 asciifile(FILE *f)
1328 {
1329 	unsigned char buf[BUFSIZ];
1330 	size_t cnt;
1331 
1332 	if (f == NULL)
1333 		return (1);
1334 
1335 	rewind(f);
1336 	cnt = fread(buf, 1, sizeof(buf), f);
1337 	return (memchr(buf, '\0', cnt) == NULL);
1338 }
1339 
1340 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1341 
1342 static char *
1343 match_function(const long *f, int pos, FILE *fp)
1344 {
1345 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1346 	size_t nc;
1347 	int last = lastline;
1348 	const char *state = NULL;
1349 
1350 	lastline = pos;
1351 	while (pos > last) {
1352 		fseek(fp, f[pos - 1], SEEK_SET);
1353 		nc = f[pos] - f[pos - 1];
1354 		if (nc >= sizeof(buf))
1355 			nc = sizeof(buf) - 1;
1356 		nc = fread(buf, 1, nc, fp);
1357 		if (nc > 0) {
1358 			buf[nc] = '\0';
1359 			buf[strcspn(buf, "\n")] = '\0';
1360 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1361 				if (begins_with(buf, "private:")) {
1362 					if (!state)
1363 						state = " (private)";
1364 				} else if (begins_with(buf, "protected:")) {
1365 					if (!state)
1366 						state = " (protected)";
1367 				} else if (begins_with(buf, "public:")) {
1368 					if (!state)
1369 						state = " (public)";
1370 				} else {
1371 					strlcpy(lastbuf, buf, sizeof lastbuf);
1372 					if (state)
1373 						strlcat(lastbuf, state,
1374 						    sizeof lastbuf);
1375 					lastmatchline = pos;
1376 					return lastbuf;
1377 				}
1378 			}
1379 		}
1380 		pos--;
1381 	}
1382 	return lastmatchline > 0 ? lastbuf : NULL;
1383 }
1384 
1385 /* dump accumulated "context" diff changes */
1386 static void
1387 dump_context_vec(FILE *f1, FILE *f2, int flags)
1388 {
1389 	struct context_vec *cvp = context_vec_start;
1390 	int lowa, upb, lowc, upd, do_output;
1391 	int a, b, c, d;
1392 	char ch, *f;
1393 
1394 	if (context_vec_start > context_vec_ptr)
1395 		return;
1396 
1397 	b = d = 0;		/* gcc */
1398 	lowa = MAXIMUM(1, cvp->a - diff_context);
1399 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1400 	lowc = MAXIMUM(1, cvp->c - diff_context);
1401 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1402 
1403 	diff_output("***************");
1404 	if ((flags & D_PROTOTYPE)) {
1405 		f = match_function(ixold, lowa-1, f1);
1406 		if (f != NULL)
1407 			diff_output(" %s", f);
1408 	}
1409 	diff_output("\n*** ");
1410 	range(lowa, upb, ",");
1411 	diff_output(" ****\n");
1412 
1413 	/*
1414 	 * Output changes to the "old" file.  The first loop suppresses
1415 	 * output if there were no changes to the "old" file (we'll see
1416 	 * the "old" lines as context in the "new" list).
1417 	 */
1418 	do_output = 0;
1419 	for (; cvp <= context_vec_ptr; cvp++)
1420 		if (cvp->a <= cvp->b) {
1421 			cvp = context_vec_start;
1422 			do_output++;
1423 			break;
1424 		}
1425 	if (do_output) {
1426 		while (cvp <= context_vec_ptr) {
1427 			a = cvp->a;
1428 			b = cvp->b;
1429 			c = cvp->c;
1430 			d = cvp->d;
1431 
1432 			if (a <= b && c <= d)
1433 				ch = 'c';
1434 			else
1435 				ch = (a <= b) ? 'd' : 'a';
1436 
1437 			if (ch == 'a')
1438 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1439 			else {
1440 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1441 				fetch(ixold, a, b, f1,
1442 				    ch == 'c' ? '!' : '-', 0, flags);
1443 			}
1444 			lowa = b + 1;
1445 			cvp++;
1446 		}
1447 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1448 	}
1449 	/* output changes to the "new" file */
1450 	diff_output("--- ");
1451 	range(lowc, upd, ",");
1452 	diff_output(" ----\n");
1453 
1454 	do_output = 0;
1455 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1456 		if (cvp->c <= cvp->d) {
1457 			cvp = context_vec_start;
1458 			do_output++;
1459 			break;
1460 		}
1461 	if (do_output) {
1462 		while (cvp <= context_vec_ptr) {
1463 			a = cvp->a;
1464 			b = cvp->b;
1465 			c = cvp->c;
1466 			d = cvp->d;
1467 
1468 			if (a <= b && c <= d)
1469 				ch = 'c';
1470 			else
1471 				ch = (a <= b) ? 'd' : 'a';
1472 
1473 			if (ch == 'd')
1474 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1475 			else {
1476 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1477 				fetch(ixnew, c, d, f2,
1478 				    ch == 'c' ? '!' : '+', 0, flags);
1479 			}
1480 			lowc = d + 1;
1481 			cvp++;
1482 		}
1483 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1484 	}
1485 	context_vec_ptr = context_vec_start - 1;
1486 }
1487 
1488 /* dump accumulated "unified" diff changes */
1489 static void
1490 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1491 {
1492 	struct context_vec *cvp = context_vec_start;
1493 	int lowa, upb, lowc, upd;
1494 	int a, b, c, d;
1495 	char ch, *f;
1496 
1497 	if (context_vec_start > context_vec_ptr)
1498 		return;
1499 
1500 	b = d = 0;		/* gcc */
1501 	lowa = MAXIMUM(1, cvp->a - diff_context);
1502 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1503 	lowc = MAXIMUM(1, cvp->c - diff_context);
1504 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1505 
1506 	diff_output("@@ -");
1507 	uni_range(lowa, upb);
1508 	diff_output(" +");
1509 	uni_range(lowc, upd);
1510 	diff_output(" @@");
1511 	if ((flags & D_PROTOTYPE)) {
1512 		f = match_function(ixold, lowa-1, f1);
1513 		if (f != NULL)
1514 			diff_output(" %s", f);
1515 	}
1516 	diff_output("\n");
1517 
1518 	/*
1519 	 * Output changes in "unified" diff format--the old and new lines
1520 	 * are printed together.
1521 	 */
1522 	for (; cvp <= context_vec_ptr; cvp++) {
1523 		a = cvp->a;
1524 		b = cvp->b;
1525 		c = cvp->c;
1526 		d = cvp->d;
1527 
1528 		/*
1529 		 * c: both new and old changes
1530 		 * d: only changes in the old file
1531 		 * a: only changes in the new file
1532 		 */
1533 		if (a <= b && c <= d)
1534 			ch = 'c';
1535 		else
1536 			ch = (a <= b) ? 'd' : 'a';
1537 
1538 		switch (ch) {
1539 		case 'c':
1540 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1541 			fetch(ixold, a, b, f1, '-', 0, flags);
1542 			fetch(ixnew, c, d, f2, '+', 0, flags);
1543 			break;
1544 		case 'd':
1545 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1546 			fetch(ixold, a, b, f1, '-', 0, flags);
1547 			break;
1548 		case 'a':
1549 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1550 			fetch(ixnew, c, d, f2, '+', 0, flags);
1551 			break;
1552 		}
1553 		lowa = b + 1;
1554 		lowc = d + 1;
1555 	}
1556 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1557 
1558 	context_vec_ptr = context_vec_start - 1;
1559 }
1560 
1561 static void
1562 print_header(const char *file1, const char *file2)
1563 {
1564 	const char *time_format;
1565 	char buf1[256];
1566 	char buf2[256];
1567 	char end1[10];
1568 	char end2[10];
1569 	struct tm *tm_ptr1, *tm_ptr2;
1570 	int nsec1 = TIMESPEC_NS (stb1.st_mtime);
1571 	int nsec2 = TIMESPEC_NS (stb2.st_mtime);
1572 
1573 #ifdef ST_MTIM_NSEC
1574 		time_format = "%Y-%m-%d %H:%M:%S.%N";
1575 #else
1576 		time_format = "%Y-%m-%d %H:%M:%S";
1577 #endif
1578 
1579 	if (cflag)
1580 		time_format = "%c";
1581 	tm_ptr1 = localtime(&stb1.st_mtime);
1582 	tm_ptr2 = localtime(&stb2.st_mtime);
1583 	strftime(buf1, 256, time_format, tm_ptr1);
1584 	strftime(buf2, 256, time_format, tm_ptr2);
1585 	if (!cflag) {
1586 		strftime(end1, 10, "%z", tm_ptr1);
1587 		strftime(end2, 10, "%z", tm_ptr2);
1588 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1589 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1590 	}
1591 	if (label[0] != NULL)
1592 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1593 		    label[0]);
1594 	else
1595 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1596 		    file1, buf1);
1597 	if (label[1] != NULL)
1598 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1599 		    label[1]);
1600 	else
1601 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1602 		    file2, buf2);
1603 }
1604