xref: /freebsd/usr.bin/diff/diffreg.c (revision e4deb071bdf0b1c5e840a38d268e178539c763e3)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 #ifdef ST_MTIM_NSEC
98 # define TIMESPEC_NS(timespec) ((timespec).ST_MTIM_NSEC)
99 #else
100 # define TIMESPEC_NS(timespec) 0
101 #endif
102 
103 /*
104  * diff - compare two files.
105  */
106 
107 /*
108  *	Uses an algorithm due to Harold Stone, which finds
109  *	a pair of longest identical subsequences in the two
110  *	files.
111  *
112  *	The major goal is to generate the match vector J.
113  *	J[i] is the index of the line in file1 corresponding
114  *	to line i file0. J[i] = 0 if there is no
115  *	such line in file1.
116  *
117  *	Lines are hashed so as to work in core. All potential
118  *	matches are located by sorting the lines of each file
119  *	on the hash (called ``value''). In particular, this
120  *	collects the equivalence classes in file1 together.
121  *	Subroutine equiv replaces the value of each line in
122  *	file0 by the index of the first element of its
123  *	matching equivalence in (the reordered) file1.
124  *	To save space equiv squeezes file1 into a single
125  *	array member in which the equivalence classes
126  *	are simply concatenated, except that their first
127  *	members are flagged by changing sign.
128  *
129  *	Next the indices that point into member are unsorted into
130  *	array class according to the original order of file0.
131  *
132  *	The cleverness lies in routine stone. This marches
133  *	through the lines of file0, developing a vector klist
134  *	of "k-candidates". At step i a k-candidate is a matched
135  *	pair of lines x,y (x in file0 y in file1) such that
136  *	there is a common subsequence of length k
137  *	between the first i lines of file0 and the first y
138  *	lines of file1, but there is no such subsequence for
139  *	any smaller y. x is the earliest possible mate to y
140  *	that occurs in such a subsequence.
141  *
142  *	Whenever any of the members of the equivalence class of
143  *	lines in file1 matable to a line in file0 has serial number
144  *	less than the y of some k-candidate, that k-candidate
145  *	with the smallest such y is replaced. The new
146  *	k-candidate is chained (via pred) to the current
147  *	k-1 candidate so that the actual subsequence can
148  *	be recovered. When a member has serial number greater
149  *	that the y of all k-candidates, the klist is extended.
150  *	At the end, the longest subsequence is pulled out
151  *	and placed in the array J by unravel
152  *
153  *	With J in hand, the matches there recorded are
154  *	check'ed against reality to assure that no spurious
155  *	matches have crept in due to hashing. If they have,
156  *	they are broken, and "jackpot" is recorded--a harmless
157  *	matter except that a true match for a spuriously
158  *	mated line may now be unnecessarily reported as a change.
159  *
160  *	Much of the complexity of the program comes simply
161  *	from trying to minimize core utilization and
162  *	maximize the range of doable problems by dynamically
163  *	allocating what is needed and reusing what is not.
164  *	The core requirements for problems larger than somewhat
165  *	are (in words) 2*length(file0) + length(file1) +
166  *	3*(number of k-candidates installed),  typically about
167  *	6n words for files of length n.
168  */
169 
170 struct cand {
171 	int	x;
172 	int	y;
173 	int	pred;
174 };
175 
176 static struct line {
177 	int	serial;
178 	int	value;
179 } *file[2];
180 
181 /*
182  * The following struct is used to record change information when
183  * doing a "context" or "unified" diff.  (see routine "change" to
184  * understand the highly mnemonic field names)
185  */
186 struct context_vec {
187 	int	a;		/* start line in old file */
188 	int	b;		/* end line in old file */
189 	int	c;		/* start line in new file */
190 	int	d;		/* end line in new file */
191 };
192 
193 #define	diff_output	printf
194 static FILE	*opentemp(const char *);
195 static void	 output(char *, FILE *, char *, FILE *, int);
196 static void	 check(FILE *, FILE *, int);
197 static void	 range(int, int, const char *);
198 static void	 uni_range(int, int);
199 static void	 dump_context_vec(FILE *, FILE *, int);
200 static void	 dump_unified_vec(FILE *, FILE *, int);
201 static void	 prepare(int, FILE *, size_t, int);
202 static void	 prune(void);
203 static void	 equiv(struct line *, int, struct line *, int, int *);
204 static void	 unravel(int);
205 static void	 unsort(struct line *, int, int *);
206 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
207 static void	 sort(struct line *, int);
208 static void	 print_header(const char *, const char *);
209 static int	 ignoreline(char *);
210 static int	 asciifile(FILE *);
211 static int	 fetch(long *, int, int, FILE *, int, int, int);
212 static int	 newcand(int, int, int);
213 static int	 search(int *, int, int);
214 static int	 skipline(FILE *);
215 static int	 isqrt(int);
216 static int	 stone(int *, int, int *, int *, int);
217 static int	 readhash(FILE *, int);
218 static int	 files_differ(FILE *, FILE *, int);
219 static char	*match_function(const long *, int, FILE *);
220 static char	*preadline(int, size_t, off_t);
221 
222 static int  *J;			/* will be overlaid on class */
223 static int  *class;		/* will be overlaid on file[0] */
224 static int  *klist;		/* will be overlaid on file[0] after class */
225 static int  *member;		/* will be overlaid on file[1] */
226 static int   clen;
227 static int   inifdef;		/* whether or not we are in a #ifdef block */
228 static int   len[2];
229 static int   pref, suff;	/* length of prefix and suffix */
230 static int   slen[2];
231 static int   anychange;
232 static long *ixnew;		/* will be overlaid on file[1] */
233 static long *ixold;		/* will be overlaid on klist */
234 static struct cand *clist;	/* merely a free storage pot for candidates */
235 static int   clistlen;		/* the length of clist */
236 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
237 static u_char *chrtran;		/* translation table for case-folding */
238 static struct context_vec *context_vec_start;
239 static struct context_vec *context_vec_end;
240 static struct context_vec *context_vec_ptr;
241 
242 #define FUNCTION_CONTEXT_SIZE	55
243 static char lastbuf[FUNCTION_CONTEXT_SIZE];
244 static int lastline;
245 static int lastmatchline;
246 
247 
248 /*
249  * chrtran points to one of 2 translation tables: cup2low if folding upper to
250  * lower case clow2low if not folding case
251  */
252 static u_char clow2low[256] = {
253 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
254 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
255 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
256 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
257 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
258 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
259 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
260 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
261 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
262 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
263 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
264 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
265 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
266 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
267 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
268 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
269 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
270 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
271 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
272 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
273 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
274 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
275 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
276 	0xfd, 0xfe, 0xff
277 };
278 
279 static u_char cup2low[256] = {
280 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
281 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
282 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
283 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
284 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
285 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
286 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
287 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
288 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
289 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
290 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
291 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
292 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
293 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
294 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
295 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
296 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
297 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
298 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
299 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
300 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
301 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
302 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
303 	0xfd, 0xfe, 0xff
304 };
305 
306 int
307 diffreg(char *file1, char *file2, int flags, int capsicum)
308 {
309 	FILE *f1, *f2;
310 	int i, rval;
311 	int	ostdout = -1;
312 	int pr_pd, kq;
313 	struct kevent *e;
314 	cap_rights_t rights_ro;
315 
316 	e = NULL;
317 	kq = -1;
318 	f1 = f2 = NULL;
319 	rval = D_SAME;
320 	anychange = 0;
321 	lastline = 0;
322 	lastmatchline = 0;
323 	context_vec_ptr = context_vec_start - 1;
324 	if (flags & D_IGNORECASE)
325 		chrtran = cup2low;
326 	else
327 		chrtran = clow2low;
328 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
329 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
330 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
331 		goto closem;
332 
333 	if (flags & D_EMPTY1)
334 		f1 = fopen(_PATH_DEVNULL, "r");
335 	else {
336 		if (!S_ISREG(stb1.st_mode)) {
337 			if ((f1 = opentemp(file1)) == NULL ||
338 			    fstat(fileno(f1), &stb1) < 0) {
339 				warn("%s", file1);
340 				status |= 2;
341 				goto closem;
342 			}
343 		} else if (strcmp(file1, "-") == 0)
344 			f1 = stdin;
345 		else
346 			f1 = fopen(file1, "r");
347 	}
348 	if (f1 == NULL) {
349 		warn("%s", file1);
350 		status |= 2;
351 		goto closem;
352 	}
353 
354 	if (flags & D_EMPTY2)
355 		f2 = fopen(_PATH_DEVNULL, "r");
356 	else {
357 		if (!S_ISREG(stb2.st_mode)) {
358 			if ((f2 = opentemp(file2)) == NULL ||
359 			    fstat(fileno(f2), &stb2) < 0) {
360 				warn("%s", file2);
361 				status |= 2;
362 				goto closem;
363 			}
364 		} else if (strcmp(file2, "-") == 0)
365 			f2 = stdin;
366 		else
367 			f2 = fopen(file2, "r");
368 	}
369 	if (f2 == NULL) {
370 		warn("%s", file2);
371 		status |= 2;
372 		goto closem;
373 	}
374 
375 	if (lflag) {
376 		/* redirect stdout to pr */
377 		int	 pfd[2];
378 		pid_t	pid;
379 		char	*header;
380 
381 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
382 		signal(SIGPIPE, SIG_IGN);
383 		fflush(stdout);
384 		rewind(stdout);
385 		pipe(pfd);
386 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
387 		case -1:
388 			status |= 2;
389 			free(header);
390 			err(2, "No more processes");
391 		case 0:
392 			/* child */
393 			if (pfd[0] != STDIN_FILENO) {
394 				dup2(pfd[0], STDIN_FILENO);
395 				close(pfd[0]);
396 			}
397 			close(pfd[1]);
398 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
399 			_exit(127);
400 		default:
401 
402 			/* parent */
403 			if (pfd[1] != STDOUT_FILENO) {
404 				ostdout = dup(STDOUT_FILENO);
405 				dup2(pfd[1], STDOUT_FILENO);
406 				close(pfd[1]);
407 			}
408 			close(pfd[0]);
409 			rewind(stdout);
410 			free(header);
411 			kq = kqueue();
412 			if (kq == -1)
413 				err(2, "kqueue");
414 			e = xmalloc(sizeof(struct kevent));
415 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
416 			    NULL);
417 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
418 				err(2, "kevent");
419 		}
420 	}
421 
422 	if (capsicum) {
423 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
424 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
425 		    && errno != ENOSYS)
426 			err(2, "unable to limit rights on: %s", file1);
427 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
428 		    errno != ENOSYS)
429 			err(2, "unable to limit rights on: %s", file2);
430 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
431 			/* stding has already been limited */
432 			if (caph_limit_stderr() == -1)
433 				err(2, "unable to limit stderr");
434 			if (caph_limit_stdout() == -1)
435 				err(2, "unable to limit stdout");
436 		} else if (caph_limit_stdio() == -1)
437 				err(2, "unable to limit stdio");
438 
439 		caph_cache_catpages();
440 		caph_cache_tzdata();
441 		if (cap_enter() < 0 && errno != ENOSYS)
442 			err(2, "unable to enter capability mode");
443 	}
444 
445 	switch (files_differ(f1, f2, flags)) {
446 	case 0:
447 		goto closem;
448 	case 1:
449 		break;
450 	default:
451 		/* error */
452 		status |= 2;
453 		goto closem;
454 	}
455 
456 	if ((flags & D_FORCEASCII) == 0 &&
457 	    (!asciifile(f1) || !asciifile(f2))) {
458 		rval = D_BINARY;
459 		status |= 1;
460 		goto closem;
461 	}
462 	prepare(0, f1, stb1.st_size, flags);
463 	prepare(1, f2, stb2.st_size, flags);
464 
465 	prune();
466 	sort(sfile[0], slen[0]);
467 	sort(sfile[1], slen[1]);
468 
469 	member = (int *)file[1];
470 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
471 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
472 
473 	class = (int *)file[0];
474 	unsort(sfile[0], slen[0], class);
475 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
476 
477 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
478 	clen = 0;
479 	clistlen = 100;
480 	clist = xcalloc(clistlen, sizeof(*clist));
481 	i = stone(class, slen[0], member, klist, flags);
482 	free(member);
483 	free(class);
484 
485 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
486 	unravel(klist[i]);
487 	free(clist);
488 	free(klist);
489 
490 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
491 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
492 	check(f1, f2, flags);
493 	output(file1, f1, file2, f2, flags);
494 	if (ostdout != -1 && e != NULL) {
495 		/* close the pipe to pr and restore stdout */
496 		int wstatus;
497 
498 		fflush(stdout);
499 		if (ostdout != STDOUT_FILENO) {
500 			close(STDOUT_FILENO);
501 			dup2(ostdout, STDOUT_FILENO);
502 			close(ostdout);
503 		}
504 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
505 			err(2, "kevent");
506 		wstatus = e[0].data;
507 		close(kq);
508 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
509 			errx(2, "pr exited abnormally");
510 		else if (WIFSIGNALED(wstatus))
511 			errx(2, "pr killed by signal %d",
512 			    WTERMSIG(wstatus));
513 	}
514 
515 closem:
516 	if (anychange) {
517 		status |= 1;
518 		if (rval == D_SAME)
519 			rval = D_DIFFER;
520 	}
521 	if (f1 != NULL)
522 		fclose(f1);
523 	if (f2 != NULL)
524 		fclose(f2);
525 
526 	return (rval);
527 }
528 
529 /*
530  * Check to see if the given files differ.
531  * Returns 0 if they are the same, 1 if different, and -1 on error.
532  * XXX - could use code from cmp(1) [faster]
533  */
534 static int
535 files_differ(FILE *f1, FILE *f2, int flags)
536 {
537 	char buf1[BUFSIZ], buf2[BUFSIZ];
538 	size_t i, j;
539 
540 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
541 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
542 		return (1);
543 	for (;;) {
544 		i = fread(buf1, 1, sizeof(buf1), f1);
545 		j = fread(buf2, 1, sizeof(buf2), f2);
546 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
547 			return (-1);
548 		if (i != j)
549 			return (1);
550 		if (i == 0)
551 			return (0);
552 		if (memcmp(buf1, buf2, i) != 0)
553 			return (1);
554 	}
555 }
556 
557 static FILE *
558 opentemp(const char *f)
559 {
560 	char buf[BUFSIZ], tempfile[PATH_MAX];
561 	ssize_t nread;
562 	int ifd, ofd;
563 
564 	if (strcmp(f, "-") == 0)
565 		ifd = STDIN_FILENO;
566 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
567 		return (NULL);
568 
569 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
570 
571 	if ((ofd = mkstemp(tempfile)) < 0) {
572 		close(ifd);
573 		return (NULL);
574 	}
575 	unlink(tempfile);
576 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
577 		if (write(ofd, buf, nread) != nread) {
578 			close(ifd);
579 			close(ofd);
580 			return (NULL);
581 		}
582 	}
583 	close(ifd);
584 	lseek(ofd, (off_t)0, SEEK_SET);
585 	return (fdopen(ofd, "r"));
586 }
587 
588 char *
589 splice(char *dir, char *path)
590 {
591 	char *tail, *buf;
592 	size_t dirlen;
593 
594 	dirlen = strlen(dir);
595 	while (dirlen != 0 && dir[dirlen - 1] == '/')
596 	    dirlen--;
597 	if ((tail = strrchr(path, '/')) == NULL)
598 		tail = path;
599 	else
600 		tail++;
601 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
602 	return (buf);
603 }
604 
605 static void
606 prepare(int i, FILE *fd, size_t filesize, int flags)
607 {
608 	struct line *p;
609 	int h;
610 	size_t sz, j;
611 
612 	rewind(fd);
613 
614 	sz = MIN(filesize, SIZE_MAX) / 25;
615 	if (sz < 100)
616 		sz = 100;
617 
618 	p = xcalloc(sz + 3, sizeof(*p));
619 	for (j = 0; (h = readhash(fd, flags));) {
620 		if (j == sz) {
621 			sz = sz * 3 / 2;
622 			p = xreallocarray(p, sz + 3, sizeof(*p));
623 		}
624 		p[++j].value = h;
625 	}
626 	len[i] = j;
627 	file[i] = p;
628 }
629 
630 static void
631 prune(void)
632 {
633 	int i, j;
634 
635 	for (pref = 0; pref < len[0] && pref < len[1] &&
636 	    file[0][pref + 1].value == file[1][pref + 1].value;
637 	    pref++)
638 		;
639 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
640 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
641 	    suff++)
642 		;
643 	for (j = 0; j < 2; j++) {
644 		sfile[j] = file[j] + pref;
645 		slen[j] = len[j] - pref - suff;
646 		for (i = 0; i <= slen[j]; i++)
647 			sfile[j][i].serial = i;
648 	}
649 }
650 
651 static void
652 equiv(struct line *a, int n, struct line *b, int m, int *c)
653 {
654 	int i, j;
655 
656 	i = j = 1;
657 	while (i <= n && j <= m) {
658 		if (a[i].value < b[j].value)
659 			a[i++].value = 0;
660 		else if (a[i].value == b[j].value)
661 			a[i++].value = j;
662 		else
663 			j++;
664 	}
665 	while (i <= n)
666 		a[i++].value = 0;
667 	b[m + 1].value = 0;
668 	j = 0;
669 	while (++j <= m) {
670 		c[j] = -b[j].serial;
671 		while (b[j + 1].value == b[j].value) {
672 			j++;
673 			c[j] = b[j].serial;
674 		}
675 	}
676 	c[j] = -1;
677 }
678 
679 /* Code taken from ping.c */
680 static int
681 isqrt(int n)
682 {
683 	int y, x = 1;
684 
685 	if (n == 0)
686 		return (0);
687 
688 	do { /* newton was a stinker */
689 		y = x;
690 		x = n / x;
691 		x += y;
692 		x /= 2;
693 	} while ((x - y) > 1 || (x - y) < -1);
694 
695 	return (x);
696 }
697 
698 static int
699 stone(int *a, int n, int *b, int *c, int flags)
700 {
701 	int i, k, y, j, l;
702 	int oldc, tc, oldl, sq;
703 	u_int numtries, bound;
704 
705 	if (flags & D_MINIMAL)
706 		bound = UINT_MAX;
707 	else {
708 		sq = isqrt(n);
709 		bound = MAX(256, sq);
710 	}
711 
712 	k = 0;
713 	c[0] = newcand(0, 0, 0);
714 	for (i = 1; i <= n; i++) {
715 		j = a[i];
716 		if (j == 0)
717 			continue;
718 		y = -b[j];
719 		oldl = 0;
720 		oldc = c[0];
721 		numtries = 0;
722 		do {
723 			if (y <= clist[oldc].y)
724 				continue;
725 			l = search(c, k, y);
726 			if (l != oldl + 1)
727 				oldc = c[l - 1];
728 			if (l <= k) {
729 				if (clist[c[l]].y <= y)
730 					continue;
731 				tc = c[l];
732 				c[l] = newcand(i, y, oldc);
733 				oldc = tc;
734 				oldl = l;
735 				numtries++;
736 			} else {
737 				c[l] = newcand(i, y, oldc);
738 				k++;
739 				break;
740 			}
741 		} while ((y = b[++j]) > 0 && numtries < bound);
742 	}
743 	return (k);
744 }
745 
746 static int
747 newcand(int x, int y, int pred)
748 {
749 	struct cand *q;
750 
751 	if (clen == clistlen) {
752 		clistlen = clistlen * 11 / 10;
753 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
754 	}
755 	q = clist + clen;
756 	q->x = x;
757 	q->y = y;
758 	q->pred = pred;
759 	return (clen++);
760 }
761 
762 static int
763 search(int *c, int k, int y)
764 {
765 	int i, j, l, t;
766 
767 	if (clist[c[k]].y < y)	/* quick look for typical case */
768 		return (k + 1);
769 	i = 0;
770 	j = k + 1;
771 	for (;;) {
772 		l = (i + j) / 2;
773 		if (l <= i)
774 			break;
775 		t = clist[c[l]].y;
776 		if (t > y)
777 			j = l;
778 		else if (t < y)
779 			i = l;
780 		else
781 			return (l);
782 	}
783 	return (l + 1);
784 }
785 
786 static void
787 unravel(int p)
788 {
789 	struct cand *q;
790 	int i;
791 
792 	for (i = 0; i <= len[0]; i++)
793 		J[i] = i <= pref ? i :
794 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
795 	for (q = clist + p; q->y != 0; q = clist + q->pred)
796 		J[q->x + pref] = q->y + pref;
797 }
798 
799 /*
800  * Check does double duty:
801  *  1.	ferret out any fortuitous correspondences due
802  *	to confounding by hashing (which result in "jackpot")
803  *  2.  collect random access indexes to the two files
804  */
805 static void
806 check(FILE *f1, FILE *f2, int flags)
807 {
808 	int i, j, jackpot, c, d;
809 	long ctold, ctnew;
810 
811 	rewind(f1);
812 	rewind(f2);
813 	j = 1;
814 	ixold[0] = ixnew[0] = 0;
815 	jackpot = 0;
816 	ctold = ctnew = 0;
817 	for (i = 1; i <= len[0]; i++) {
818 		if (J[i] == 0) {
819 			ixold[i] = ctold += skipline(f1);
820 			continue;
821 		}
822 		while (j < J[i]) {
823 			ixnew[j] = ctnew += skipline(f2);
824 			j++;
825 		}
826 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
827 			for (;;) {
828 				c = getc(f1);
829 				d = getc(f2);
830 				/*
831 				 * GNU diff ignores a missing newline
832 				 * in one file for -b or -w.
833 				 */
834 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
835 					if (c == EOF && d == '\n') {
836 						ctnew++;
837 						break;
838 					} else if (c == '\n' && d == EOF) {
839 						ctold++;
840 						break;
841 					}
842 				}
843 				ctold++;
844 				ctnew++;
845 				if (flags & D_STRIPCR) {
846 					if (c == '\r') {
847 						if ((c = getc(f1)) == '\n') {
848 							ctnew++;
849 							break;
850 						}
851 					}
852 					if (d == '\r') {
853 						if ((d = getc(f2)) == '\n') {
854 							ctold++;
855 							break;
856 						}
857 					}
858 				}
859 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
860 				    isspace(d)) {
861 					do {
862 						if (c == '\n')
863 							break;
864 						ctold++;
865 					} while (isspace(c = getc(f1)));
866 					do {
867 						if (d == '\n')
868 							break;
869 						ctnew++;
870 					} while (isspace(d = getc(f2)));
871 				} else if ((flags & D_IGNOREBLANKS)) {
872 					while (isspace(c) && c != '\n') {
873 						c = getc(f1);
874 						ctold++;
875 					}
876 					while (isspace(d) && d != '\n') {
877 						d = getc(f2);
878 						ctnew++;
879 					}
880 				}
881 				if (chrtran[c] != chrtran[d]) {
882 					jackpot++;
883 					J[i] = 0;
884 					if (c != '\n' && c != EOF)
885 						ctold += skipline(f1);
886 					if (d != '\n' && c != EOF)
887 						ctnew += skipline(f2);
888 					break;
889 				}
890 				if (c == '\n' || c == EOF)
891 					break;
892 			}
893 		} else {
894 			for (;;) {
895 				ctold++;
896 				ctnew++;
897 				if ((c = getc(f1)) != (d = getc(f2))) {
898 					/* jackpot++; */
899 					J[i] = 0;
900 					if (c != '\n' && c != EOF)
901 						ctold += skipline(f1);
902 					if (d != '\n' && c != EOF)
903 						ctnew += skipline(f2);
904 					break;
905 				}
906 				if (c == '\n' || c == EOF)
907 					break;
908 			}
909 		}
910 		ixold[i] = ctold;
911 		ixnew[j] = ctnew;
912 		j++;
913 	}
914 	for (; j <= len[1]; j++) {
915 		ixnew[j] = ctnew += skipline(f2);
916 	}
917 	/*
918 	 * if (jackpot)
919 	 *	fprintf(stderr, "jackpot\n");
920 	 */
921 }
922 
923 /* shellsort CACM #201 */
924 static void
925 sort(struct line *a, int n)
926 {
927 	struct line *ai, *aim, w;
928 	int j, m = 0, k;
929 
930 	if (n == 0)
931 		return;
932 	for (j = 1; j <= n; j *= 2)
933 		m = 2 * j - 1;
934 	for (m /= 2; m != 0; m /= 2) {
935 		k = n - m;
936 		for (j = 1; j <= k; j++) {
937 			for (ai = &a[j]; ai > a; ai -= m) {
938 				aim = &ai[m];
939 				if (aim < ai)
940 					break;	/* wraparound */
941 				if (aim->value > ai[0].value ||
942 				    (aim->value == ai[0].value &&
943 					aim->serial > ai[0].serial))
944 					break;
945 				w.value = ai[0].value;
946 				ai[0].value = aim->value;
947 				aim->value = w.value;
948 				w.serial = ai[0].serial;
949 				ai[0].serial = aim->serial;
950 				aim->serial = w.serial;
951 			}
952 		}
953 	}
954 }
955 
956 static void
957 unsort(struct line *f, int l, int *b)
958 {
959 	int *a, i;
960 
961 	a = xcalloc(l + 1, sizeof(*a));
962 	for (i = 1; i <= l; i++)
963 		a[f[i].serial] = f[i].value;
964 	for (i = 1; i <= l; i++)
965 		b[i] = a[i];
966 	free(a);
967 }
968 
969 static int
970 skipline(FILE *f)
971 {
972 	int i, c;
973 
974 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
975 		continue;
976 	return (i);
977 }
978 
979 static void
980 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
981 {
982 	int m, i0, i1, j0, j1;
983 
984 	rewind(f1);
985 	rewind(f2);
986 	m = len[0];
987 	J[0] = 0;
988 	J[m + 1] = len[1] + 1;
989 	if (diff_format != D_EDIT) {
990 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
991 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
992 				i0++;
993 			j0 = J[i0 - 1] + 1;
994 			i1 = i0 - 1;
995 			while (i1 < m && J[i1 + 1] == 0)
996 				i1++;
997 			j1 = J[i1 + 1] - 1;
998 			J[i1] = j1;
999 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
1000 		}
1001 	} else {
1002 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
1003 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1004 				i0--;
1005 			j0 = J[i0 + 1] - 1;
1006 			i1 = i0 + 1;
1007 			while (i1 > 1 && J[i1 - 1] == 0)
1008 				i1--;
1009 			j1 = J[i1 - 1] + 1;
1010 			J[i1] = j1;
1011 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1012 		}
1013 	}
1014 	if (m == 0)
1015 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1016 	if (diff_format == D_IFDEF) {
1017 		for (;;) {
1018 #define	c i0
1019 			if ((c = getc(f1)) == EOF)
1020 				return;
1021 			diff_output("%c", c);
1022 		}
1023 #undef c
1024 	}
1025 	if (anychange != 0) {
1026 		if (diff_format == D_CONTEXT)
1027 			dump_context_vec(f1, f2, flags);
1028 		else if (diff_format == D_UNIFIED)
1029 			dump_unified_vec(f1, f2, flags);
1030 	}
1031 }
1032 
1033 static void
1034 range(int a, int b, const char *separator)
1035 {
1036 	diff_output("%d", a > b ? b : a);
1037 	if (a < b)
1038 		diff_output("%s%d", separator, b);
1039 }
1040 
1041 static void
1042 uni_range(int a, int b)
1043 {
1044 	if (a < b)
1045 		diff_output("%d,%d", a, b - a + 1);
1046 	else if (a == b)
1047 		diff_output("%d", b);
1048 	else
1049 		diff_output("%d,0", b);
1050 }
1051 
1052 static char *
1053 preadline(int fd, size_t rlen, off_t off)
1054 {
1055 	char *line;
1056 	ssize_t nr;
1057 
1058 	line = xmalloc(rlen + 1);
1059 	if ((nr = pread(fd, line, rlen, off)) < 0)
1060 		err(2, "preadline");
1061 	if (nr > 0 && line[nr-1] == '\n')
1062 		nr--;
1063 	line[nr] = '\0';
1064 	return (line);
1065 }
1066 
1067 static int
1068 ignoreline(char *line)
1069 {
1070 	int ret;
1071 
1072 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1073 	free(line);
1074 	return (ret == 0);	/* if it matched, it should be ignored. */
1075 }
1076 
1077 /*
1078  * Indicate that there is a difference between lines a and b of the from file
1079  * to get to lines c to d of the to file.  If a is greater then b then there
1080  * are no lines in the from file involved and this means that there were
1081  * lines appended (beginning at b).  If c is greater than d then there are
1082  * lines missing from the to file.
1083  */
1084 static void
1085 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1086     int *pflags)
1087 {
1088 	static size_t max_context = 64;
1089 	int i;
1090 
1091 restart:
1092 	if (diff_format != D_IFDEF && a > b && c > d)
1093 		return;
1094 	if (ignore_pats != NULL) {
1095 		char *line;
1096 		/*
1097 		 * All lines in the change, insert, or delete must
1098 		 * match an ignore pattern for the change to be
1099 		 * ignored.
1100 		 */
1101 		if (a <= b) {		/* Changes and deletes. */
1102 			for (i = a; i <= b; i++) {
1103 				line = preadline(fileno(f1),
1104 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1105 				if (!ignoreline(line))
1106 					goto proceed;
1107 			}
1108 		}
1109 		if (a > b || c <= d) {	/* Changes and inserts. */
1110 			for (i = c; i <= d; i++) {
1111 				line = preadline(fileno(f2),
1112 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1113 				if (!ignoreline(line))
1114 					goto proceed;
1115 			}
1116 		}
1117 		return;
1118 	}
1119 proceed:
1120 	if (*pflags & D_HEADER) {
1121 		diff_output("%s %s %s\n", diffargs, file1, file2);
1122 		*pflags &= ~D_HEADER;
1123 	}
1124 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1125 		/*
1126 		 * Allocate change records as needed.
1127 		 */
1128 		if (context_vec_ptr == context_vec_end - 1) {
1129 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1130 			max_context <<= 1;
1131 			context_vec_start = xreallocarray(context_vec_start,
1132 			    max_context, sizeof(*context_vec_start));
1133 			context_vec_end = context_vec_start + max_context;
1134 			context_vec_ptr = context_vec_start + offset;
1135 		}
1136 		if (anychange == 0) {
1137 			/*
1138 			 * Print the context/unidiff header first time through.
1139 			 */
1140 			print_header(file1, file2);
1141 			anychange = 1;
1142 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1143 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1144 			/*
1145 			 * If this change is more than 'diff_context' lines from the
1146 			 * previous change, dump the record and reset it.
1147 			 */
1148 			if (diff_format == D_CONTEXT)
1149 				dump_context_vec(f1, f2, *pflags);
1150 			else
1151 				dump_unified_vec(f1, f2, *pflags);
1152 		}
1153 		context_vec_ptr++;
1154 		context_vec_ptr->a = a;
1155 		context_vec_ptr->b = b;
1156 		context_vec_ptr->c = c;
1157 		context_vec_ptr->d = d;
1158 		return;
1159 	}
1160 	if (anychange == 0)
1161 		anychange = 1;
1162 	switch (diff_format) {
1163 	case D_BRIEF:
1164 		return;
1165 	case D_NORMAL:
1166 	case D_EDIT:
1167 		range(a, b, ",");
1168 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1169 		if (diff_format == D_NORMAL)
1170 			range(c, d, ",");
1171 		diff_output("\n");
1172 		break;
1173 	case D_REVERSE:
1174 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1175 		range(a, b, " ");
1176 		diff_output("\n");
1177 		break;
1178 	case D_NREVERSE:
1179 		if (a > b)
1180 			diff_output("a%d %d\n", b, d - c + 1);
1181 		else {
1182 			diff_output("d%d %d\n", a, b - a + 1);
1183 			if (!(c > d))
1184 				/* add changed lines */
1185 				diff_output("a%d %d\n", b, d - c + 1);
1186 		}
1187 		break;
1188 	}
1189 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1190 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1191 		if (a <= b && c <= d && diff_format == D_NORMAL)
1192 			diff_output("---\n");
1193 	}
1194 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1195 	if (i != 0 && diff_format == D_EDIT) {
1196 		/*
1197 		 * A non-zero return value for D_EDIT indicates that the
1198 		 * last line printed was a bare dot (".") that has been
1199 		 * escaped as ".." to prevent ed(1) from misinterpreting
1200 		 * it.  We have to add a substitute command to change this
1201 		 * back and restart where we left off.
1202 		 */
1203 		diff_output(".\n");
1204 		diff_output("%ds/.//\n", a + i - 1);
1205 		b = a + i - 1;
1206 		a = b + 1;
1207 		c += i;
1208 		goto restart;
1209 	}
1210 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1211 		diff_output(".\n");
1212 	if (inifdef) {
1213 		diff_output("#endif /* %s */\n", ifdefname);
1214 		inifdef = 0;
1215 	}
1216 }
1217 
1218 static int
1219 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1220 {
1221 	int i, j, c, lastc, col, nc;
1222 	int	newcol;
1223 
1224 	/*
1225 	 * When doing #ifdef's, copy down to current line
1226 	 * if this is the first file, so that stuff makes it to output.
1227 	 */
1228 	if (diff_format == D_IFDEF && oldfile) {
1229 		long curpos = ftell(lb);
1230 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1231 		nc = f[a > b ? b : a - 1] - curpos;
1232 		for (i = 0; i < nc; i++)
1233 			diff_output("%c", getc(lb));
1234 	}
1235 	if (a > b)
1236 		return (0);
1237 	if (diff_format == D_IFDEF) {
1238 		if (inifdef) {
1239 			diff_output("#else /* %s%s */\n",
1240 			    oldfile == 1 ? "!" : "", ifdefname);
1241 		} else {
1242 			if (oldfile)
1243 				diff_output("#ifndef %s\n", ifdefname);
1244 			else
1245 				diff_output("#ifdef %s\n", ifdefname);
1246 		}
1247 		inifdef = 1 + oldfile;
1248 	}
1249 	for (i = a; i <= b; i++) {
1250 		fseek(lb, f[i - 1], SEEK_SET);
1251 		nc = f[i] - f[i - 1];
1252 		if (diff_format != D_IFDEF && ch != '\0') {
1253 			diff_output("%c", ch);
1254 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1255 			    || diff_format == D_UNIFIED))
1256 				diff_output("\t");
1257 			else if (diff_format != D_UNIFIED)
1258 				diff_output(" ");
1259 		}
1260 		col = 0;
1261 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1262 			if ((c = getc(lb)) == EOF) {
1263 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1264 				    diff_format == D_NREVERSE)
1265 					warnx("No newline at end of file");
1266 				else
1267 					diff_output("\n\\ No newline at end of "
1268 					    "file\n");
1269 				return (0);
1270 			}
1271 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1272 				newcol = ((col/tabsize)+1)*tabsize;
1273 				do {
1274 					diff_output(" ");
1275 				} while (++col < newcol);
1276 			} else {
1277 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1278 				    && lastc == '.') {
1279 					/*
1280 					 * Don't print a bare "." line
1281 					 * since that will confuse ed(1).
1282 					 * Print ".." instead and return,
1283 					 * giving the caller an offset
1284 					 * from which to restart.
1285 					 */
1286 					diff_output(".\n");
1287 					return (i - a + 1);
1288 				}
1289 				diff_output("%c", c);
1290 				col++;
1291 			}
1292 		}
1293 	}
1294 	return (0);
1295 }
1296 
1297 /*
1298  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1299  */
1300 static int
1301 readhash(FILE *f, int flags)
1302 {
1303 	int i, t, space;
1304 	int sum;
1305 
1306 	sum = 1;
1307 	space = 0;
1308 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1309 		if (flags & D_IGNORECASE)
1310 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1311 				if (flags & D_STRIPCR && t == '\r') {
1312 					t = getc(f);
1313 					if (t == '\n')
1314 						break;
1315 					ungetc(t, f);
1316 				}
1317 				if (t == EOF) {
1318 					if (i == 0)
1319 						return (0);
1320 					break;
1321 				}
1322 				sum = sum * 127 + chrtran[t];
1323 			}
1324 		else
1325 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1326 				if (flags & D_STRIPCR && t == '\r') {
1327 					t = getc(f);
1328 					if (t == '\n')
1329 						break;
1330 					ungetc(t, f);
1331 				}
1332 				if (t == EOF) {
1333 					if (i == 0)
1334 						return (0);
1335 					break;
1336 				}
1337 				sum = sum * 127 + t;
1338 			}
1339 	} else {
1340 		for (i = 0;;) {
1341 			switch (t = getc(f)) {
1342 			case '\r':
1343 			case '\t':
1344 			case '\v':
1345 			case '\f':
1346 			case ' ':
1347 				space++;
1348 				continue;
1349 			default:
1350 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1351 					i++;
1352 					space = 0;
1353 				}
1354 				sum = sum * 127 + chrtran[t];
1355 				i++;
1356 				continue;
1357 			case EOF:
1358 				if (i == 0)
1359 					return (0);
1360 				/* FALLTHROUGH */
1361 			case '\n':
1362 				break;
1363 			}
1364 			break;
1365 		}
1366 	}
1367 	/*
1368 	 * There is a remote possibility that we end up with a zero sum.
1369 	 * Zero is used as an EOF marker, so return 1 instead.
1370 	 */
1371 	return (sum == 0 ? 1 : sum);
1372 }
1373 
1374 static int
1375 asciifile(FILE *f)
1376 {
1377 	unsigned char buf[BUFSIZ];
1378 	size_t cnt;
1379 
1380 	if (f == NULL)
1381 		return (1);
1382 
1383 	rewind(f);
1384 	cnt = fread(buf, 1, sizeof(buf), f);
1385 	return (memchr(buf, '\0', cnt) == NULL);
1386 }
1387 
1388 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1389 
1390 static char *
1391 match_function(const long *f, int pos, FILE *fp)
1392 {
1393 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1394 	size_t nc;
1395 	int last = lastline;
1396 	const char *state = NULL;
1397 
1398 	lastline = pos;
1399 	while (pos > last) {
1400 		fseek(fp, f[pos - 1], SEEK_SET);
1401 		nc = f[pos] - f[pos - 1];
1402 		if (nc >= sizeof(buf))
1403 			nc = sizeof(buf) - 1;
1404 		nc = fread(buf, 1, nc, fp);
1405 		if (nc > 0) {
1406 			buf[nc] = '\0';
1407 			buf[strcspn(buf, "\n")] = '\0';
1408 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1409 				if (begins_with(buf, "private:")) {
1410 					if (!state)
1411 						state = " (private)";
1412 				} else if (begins_with(buf, "protected:")) {
1413 					if (!state)
1414 						state = " (protected)";
1415 				} else if (begins_with(buf, "public:")) {
1416 					if (!state)
1417 						state = " (public)";
1418 				} else {
1419 					strlcpy(lastbuf, buf, sizeof lastbuf);
1420 					if (state)
1421 						strlcat(lastbuf, state,
1422 						    sizeof lastbuf);
1423 					lastmatchline = pos;
1424 					return lastbuf;
1425 				}
1426 			}
1427 		}
1428 		pos--;
1429 	}
1430 	return lastmatchline > 0 ? lastbuf : NULL;
1431 }
1432 
1433 /* dump accumulated "context" diff changes */
1434 static void
1435 dump_context_vec(FILE *f1, FILE *f2, int flags)
1436 {
1437 	struct context_vec *cvp = context_vec_start;
1438 	int lowa, upb, lowc, upd, do_output;
1439 	int a, b, c, d;
1440 	char ch, *f;
1441 
1442 	if (context_vec_start > context_vec_ptr)
1443 		return;
1444 
1445 	b = d = 0;		/* gcc */
1446 	lowa = MAX(1, cvp->a - diff_context);
1447 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1448 	lowc = MAX(1, cvp->c - diff_context);
1449 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1450 
1451 	diff_output("***************");
1452 	if ((flags & D_PROTOTYPE)) {
1453 		f = match_function(ixold, lowa-1, f1);
1454 		if (f != NULL)
1455 			diff_output(" %s", f);
1456 	}
1457 	diff_output("\n*** ");
1458 	range(lowa, upb, ",");
1459 	diff_output(" ****\n");
1460 
1461 	/*
1462 	 * Output changes to the "old" file.  The first loop suppresses
1463 	 * output if there were no changes to the "old" file (we'll see
1464 	 * the "old" lines as context in the "new" list).
1465 	 */
1466 	do_output = 0;
1467 	for (; cvp <= context_vec_ptr; cvp++)
1468 		if (cvp->a <= cvp->b) {
1469 			cvp = context_vec_start;
1470 			do_output++;
1471 			break;
1472 		}
1473 	if (do_output) {
1474 		while (cvp <= context_vec_ptr) {
1475 			a = cvp->a;
1476 			b = cvp->b;
1477 			c = cvp->c;
1478 			d = cvp->d;
1479 
1480 			if (a <= b && c <= d)
1481 				ch = 'c';
1482 			else
1483 				ch = (a <= b) ? 'd' : 'a';
1484 
1485 			if (ch == 'a')
1486 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1487 			else {
1488 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1489 				fetch(ixold, a, b, f1,
1490 				    ch == 'c' ? '!' : '-', 0, flags);
1491 			}
1492 			lowa = b + 1;
1493 			cvp++;
1494 		}
1495 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1496 	}
1497 	/* output changes to the "new" file */
1498 	diff_output("--- ");
1499 	range(lowc, upd, ",");
1500 	diff_output(" ----\n");
1501 
1502 	do_output = 0;
1503 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1504 		if (cvp->c <= cvp->d) {
1505 			cvp = context_vec_start;
1506 			do_output++;
1507 			break;
1508 		}
1509 	if (do_output) {
1510 		while (cvp <= context_vec_ptr) {
1511 			a = cvp->a;
1512 			b = cvp->b;
1513 			c = cvp->c;
1514 			d = cvp->d;
1515 
1516 			if (a <= b && c <= d)
1517 				ch = 'c';
1518 			else
1519 				ch = (a <= b) ? 'd' : 'a';
1520 
1521 			if (ch == 'd')
1522 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1523 			else {
1524 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1525 				fetch(ixnew, c, d, f2,
1526 				    ch == 'c' ? '!' : '+', 0, flags);
1527 			}
1528 			lowc = d + 1;
1529 			cvp++;
1530 		}
1531 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1532 	}
1533 	context_vec_ptr = context_vec_start - 1;
1534 }
1535 
1536 /* dump accumulated "unified" diff changes */
1537 static void
1538 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1539 {
1540 	struct context_vec *cvp = context_vec_start;
1541 	int lowa, upb, lowc, upd;
1542 	int a, b, c, d;
1543 	char ch, *f;
1544 
1545 	if (context_vec_start > context_vec_ptr)
1546 		return;
1547 
1548 	b = d = 0;		/* gcc */
1549 	lowa = MAX(1, cvp->a - diff_context);
1550 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1551 	lowc = MAX(1, cvp->c - diff_context);
1552 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1553 
1554 	diff_output("@@ -");
1555 	uni_range(lowa, upb);
1556 	diff_output(" +");
1557 	uni_range(lowc, upd);
1558 	diff_output(" @@");
1559 	if ((flags & D_PROTOTYPE)) {
1560 		f = match_function(ixold, lowa-1, f1);
1561 		if (f != NULL)
1562 			diff_output(" %s", f);
1563 	}
1564 	diff_output("\n");
1565 
1566 	/*
1567 	 * Output changes in "unified" diff format--the old and new lines
1568 	 * are printed together.
1569 	 */
1570 	for (; cvp <= context_vec_ptr; cvp++) {
1571 		a = cvp->a;
1572 		b = cvp->b;
1573 		c = cvp->c;
1574 		d = cvp->d;
1575 
1576 		/*
1577 		 * c: both new and old changes
1578 		 * d: only changes in the old file
1579 		 * a: only changes in the new file
1580 		 */
1581 		if (a <= b && c <= d)
1582 			ch = 'c';
1583 		else
1584 			ch = (a <= b) ? 'd' : 'a';
1585 
1586 		switch (ch) {
1587 		case 'c':
1588 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1589 			fetch(ixold, a, b, f1, '-', 0, flags);
1590 			fetch(ixnew, c, d, f2, '+', 0, flags);
1591 			break;
1592 		case 'd':
1593 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1594 			fetch(ixold, a, b, f1, '-', 0, flags);
1595 			break;
1596 		case 'a':
1597 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1598 			fetch(ixnew, c, d, f2, '+', 0, flags);
1599 			break;
1600 		}
1601 		lowa = b + 1;
1602 		lowc = d + 1;
1603 	}
1604 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1605 
1606 	context_vec_ptr = context_vec_start - 1;
1607 }
1608 
1609 static void
1610 print_header(const char *file1, const char *file2)
1611 {
1612 	const char *time_format;
1613 	char buf1[256];
1614 	char buf2[256];
1615 	char end1[10];
1616 	char end2[10];
1617 	struct tm *tm_ptr1, *tm_ptr2;
1618 	int nsec1 = TIMESPEC_NS (stb1.st_mtime);
1619 	int nsec2 = TIMESPEC_NS (stb2.st_mtime);
1620 
1621 #ifdef ST_MTIM_NSEC
1622 		time_format = "%Y-%m-%d %H:%M:%S.%N";
1623 #else
1624 		time_format = "%Y-%m-%d %H:%M:%S";
1625 #endif
1626 
1627 	if (cflag)
1628 		time_format = "%c";
1629 	tm_ptr1 = localtime(&stb1.st_mtime);
1630 	tm_ptr2 = localtime(&stb2.st_mtime);
1631 	strftime(buf1, 256, time_format, tm_ptr1);
1632 	strftime(buf2, 256, time_format, tm_ptr2);
1633 	if (!cflag) {
1634 		strftime(end1, 10, "%z", tm_ptr1);
1635 		strftime(end2, 10, "%z", tm_ptr2);
1636 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1637 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1638 	}
1639 	if (label[0] != NULL)
1640 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1641 		    label[0]);
1642 	else
1643 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1644 		    file1, buf1);
1645 	if (label[1] != NULL)
1646 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1647 		    label[1]);
1648 	else
1649 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1650 		    file2, buf2);
1651 }
1652