xref: /illumos-gate/usr/src/cmd/diff3/diff3prog.c (revision 2aeafac3612e19716bf8164f89c3c9196342979c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <limits.h>
33 #include <sys/param.h>
34 #include <errno.h>
35 #
36 
37 /*
38  * diff3 - 3-way differential file comparison
39  */
40 
41 /*
42  * diff3 [-ex3EX] d13 d23 f1 f2 f3 [m1 m3]
43  *
44  * d13 = diff report on f1 vs f3
45  * d23 = diff report on f2 vs f3
46  * f1, f2, f3 the 3 files
47  * if changes in f1 overlap with changes in f3, m1 and m3 are used
48  * to mark the overlaps; otherwise, the file names f1 and f3 are used
49  * (only for options E and X).
50  */
51 
52 struct  range {int from, to; };
53 	/*
54 	 * from is first in range of changed lines
55 	 * to is last+1
56 	 * from = to = line after point of insertion
57 	 * for added lines
58 	 */
59 struct diff {struct range old, new; };
60 
61 #define	NC 4096
62 /*
63  * de is used to gather editing scripts,
64  * that are later spewed out in reverse order.
65  * its first element must be all zero
66  * the "new" component of de contains line positions
67  * or byte positions depending on when you look(!?)
68  */
69 static struct diff d13[NC];
70 static struct diff d23[NC];
71 
72 /*
73  * array overlap indicates which sections in de correspond to
74  * lines that are different in all three files.
75  */
76 
77 static struct diff de[NC];
78 static char overlap[NC];
79 static int  overlapcnt = 0;
80 
81 static char line[LINE_MAX+1];
82 static FILE *fp[3];
83 /*
84  *	the number of the last-read line in each file
85  *	is kept in cline[0-2]
86  */
87 static int cline[3];
88 /*
89  *	the latest known correspondence between line
90  *	numbers of the 3 files is stored in last[1-3]
91  */
92 static int last[4];
93 static int eflag;
94 static int oflag;	/* indicates whether to mark overlaps (-E or -X) */
95 static int debug  = 0;
96 /* markers for -E and -X: */
97 static char f1mark[8+MAXPATHLEN], f3mark[8+MAXPATHLEN];
98 		/* Need space for "<<<<<<< " or ">>>>>>> " plus filename   */
99 static int save_err;	/* saves errno */
100 
101 static int readin(char *name, struct diff *dd);
102 static int number(char **lc);
103 static int digit(int c);
104 static int getchange(FILE *b);
105 static int getaline(FILE *b);
106 static void merge(int m1, int m2);
107 static void separate(char *s);
108 static void change(int i, struct range *rold, int dup);
109 static void prange(struct range *rold);
110 static void keep(int i, struct range *rnew);
111 static int skip(int i, int from, char *pr);
112 static int duplicate(struct range *r1, struct range *r2);
113 static void repos(int nchar);
114 static void trouble();
115 static int edit(struct diff *diff, int dup, int j);
116 static void edscript(int n);
117 static void usage();
118 
119 int
120 main(int argc, char **argv)
121 {
122 	int i, m, n;
123 	eflag  = 0;
124 	oflag  = 0;
125 	if ((argc > 1) && (*argv[1] == '-')) {
126 		switch (argv[1][1]) {
127 		case 'e':
128 			eflag = 3;
129 			break;
130 		case '3':
131 			eflag = 2;
132 			break;
133 		case 'x':
134 			eflag = 1;
135 			break;
136 		case 'E':
137 			eflag = 3;
138 			oflag = 1;
139 			break;
140 		case 'X':
141 			oflag = eflag = 1;
142 			break;
143 		default:
144 			usage();
145 			break;
146 		}
147 		argv++;
148 		argc--;
149 	}
150 	if (argc < 6)
151 		usage();
152 	if (oflag) {
153 		(void) snprintf(f1mark, sizeof (f1mark), "<<<<<<< %s",
154 		    argc >= 7 ? argv[6] : argv[3]);
155 		(void) snprintf(f3mark, sizeof (f3mark), ">>>>>>> %s",
156 		    argc >= 8 ? argv[7] : argv[5]);
157 	}
158 
159 	m = readin(argv[1], d13);
160 	n = readin(argv[2], d23);
161 	for (i = 0; i <= 2; i++)
162 		if ((fp[i] = fopen(argv[i+3], "r")) == NULL) {
163 			save_err = errno;
164 			(void) fprintf(stderr, "diff3: can't open %s: ",
165 			    argv[i+3]);
166 			errno = save_err;
167 			perror("");
168 			exit(1);
169 		}
170 	merge(m, n);
171 	return (0);
172 }
173 
174 /*
175  * pick up the line numbers of all changes from
176  * one change file
177  * (this puts the numbers in a vector, which is not
178  * strictly necessary, since the vector is processed
179  * in one sequential pass. The vector could be optimized
180  * out of existence)
181  */
182 
183 static int
184 readin(char *name, struct diff *dd)
185 {
186 	int i;
187 	int a, b, c, d;
188 	char kind;
189 	char *p;
190 	if ((fp[0] = fopen(name, "r")) == NULL) {
191 		save_err = errno;
192 		(void) fprintf(stderr, "diff3: can't open %s: ", name);
193 		errno = save_err;
194 		perror("");
195 		exit(1);
196 	}
197 	for (i = 0; getchange(fp[0]); i++) {
198 		if (i >= NC) {
199 			(void) fprintf(stderr, "diff3: too many changes\n");
200 			exit(0);
201 		}
202 		p = line;
203 		a = b = number(&p);
204 		if (*p == ',') {
205 			p++;
206 			b = number(&p);
207 		}
208 		kind = *p++;
209 		c = d = number(&p);
210 		if (*p == ',') {
211 			p++;
212 			d = number(&p);
213 		}
214 		if (kind == 'a')
215 			a++;
216 		if (kind == 'd')
217 			c++;
218 		b++;
219 		d++;
220 		dd[i].old.from = a;
221 		dd[i].old.to = b;
222 		dd[i].new.from = c;
223 		dd[i].new.to = d;
224 	}
225 	dd[i].old.from = dd[i-1].old.to;
226 	dd[i].new.from = dd[i-1].new.to;
227 	(void) fclose(fp[0]);
228 	return (i);
229 }
230 
231 static int
232 number(char **lc)
233 {
234 	int nn;
235 	nn = 0;
236 	while (digit(**lc))
237 		nn = nn*10 + *(*lc)++ - '0';
238 	return (nn);
239 }
240 
241 static int
242 digit(int c)
243 {
244 	return (c >= '0' && c <= '9');
245 }
246 
247 static int
248 getchange(FILE *b)
249 {
250 	while (getaline(b))
251 		if (digit(line[0]))
252 			return (1);
253 	return (0);
254 }
255 
256 static int
257 getaline(FILE *b)
258 {
259 	int i, c;
260 	for (i = 0; i < sizeof (line)-1; i++) {
261 		c = getc(b);
262 		if (c == EOF) {
263 			line[i] = 0;
264 			return (i);
265 		}
266 		line[i] = c;
267 		if (c == '\n') {
268 			line[++i] = 0;
269 			return (i);
270 		}
271 	}
272 	return (0);
273 }
274 
275 static void
276 merge(int m1, int m2)
277 {
278 	struct diff *d1, *d2, *d3;
279 	int dup;
280 	int j;
281 	int t1, t2;
282 	d1 = d13;
283 	d2 = d23;
284 	j = 0;
285 	for (; (t1 = d1 < d13+m1) | (t2 = d2 < d23+m2); ) {
286 		if (debug) {
287 			(void) printf("%d,%d=%d,%d %d,%d=%d,%d\n",
288 			    d1->old.from, d1->old.to,
289 			    d1->new.from, d1->new.to,
290 			    d2->old.from, d2->old.to,
291 			    d2->new.from, d2->new.to);
292 		}
293 
294 		/* first file is different from others */
295 		if (!t2 || t1 && d1->new.to < d2->new.from) {
296 			/* stuff peculiar to 1st file */
297 			if (eflag == 0) {
298 				separate("1");
299 				change(1, &d1->old, 0);
300 				keep(2, &d1->new);
301 				change(3, &d1->new, 0);
302 			}
303 			d1++;
304 			continue;
305 		}
306 
307 		/* second file is different from others */
308 		if (!t1 || t2 && d2->new.to < d1->new.from) {
309 			if (eflag == 0) {
310 				separate("2");
311 				keep(1, &d2->new);
312 				change(2, &d2->old, 0);
313 				change(3, &d2->new, 0);
314 			}
315 			d2++;
316 			continue;
317 		}
318 		/*
319 		 * merge overlapping changes in first file
320 		 * this happens after extension see below
321 		 */
322 		if (d1+1 < d13+m1 && d1->new.to >= d1[1].new.from) {
323 			d1[1].old.from = d1->old.from;
324 			d1[1].new.from = d1->new.from;
325 			d1++;
326 			continue;
327 		}
328 
329 		/* merge overlapping changes in second */
330 		if (d2+1 < d23+m2 && d2->new.to >= d2[1].new.from) {
331 			d2[1].old.from = d2->old.from;
332 			d2[1].new.from = d2->new.from;
333 			d2++;
334 			continue;
335 		}
336 
337 		/* stuff peculiar to third file or different in all */
338 		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
339 			dup = duplicate(&d1->old, &d2->old);
340 			/*
341 			 * dup = 0 means all files differ
342 			 * dup = 1 meands files 1&2 identical
343 			 */
344 			if (eflag == 0) {
345 				separate(dup?"3":"");
346 				change(1, &d1->old, dup);
347 				change(2, &d2->old, 0);
348 				d3 = d1->old.to > d1->old.from ? d1 : d2;
349 				change(3, &d3->new, 0);
350 			} else
351 				j = edit(d1, dup, j);
352 			d1++;
353 			d2++;
354 			continue;
355 		}
356 		/*
357 		 * overlapping changes from file1 & 2
358 		 * extend changes appropriately to
359 		 * make them coincide
360 		 */
361 		if (d1->new.from < d2->new.from) {
362 			d2->old.from -= d2->new.from-d1->new.from;
363 			d2->new.from = d1->new.from;
364 		} else if (d2->new.from < d1->new.from) {
365 			d1->old.from -= d1->new.from-d2->new.from;
366 			d1->new.from = d2->new.from;
367 		}
368 
369 		if (d1->new.to > d2->new.to) {
370 			d2->old.to += d1->new.to - d2->new.to;
371 			d2->new.to = d1->new.to;
372 		} else if (d2->new.to > d1->new.to) {
373 			d1->old.to += d2->new.to - d1->new.to;
374 			d1->new.to = d2->new.to;
375 		}
376 	}
377 	if (eflag) {
378 		edscript(j);
379 		if (j)
380 			(void) printf("w\nq\n");
381 	}
382 }
383 
384 static void
385 separate(char *s)
386 {
387 	(void) printf("====%s\n", s);
388 }
389 
390 /*
391  * the range of ines rold.from thru rold.to in file i
392  * is to be changed. it is to be printed only if
393  * it does not duplicate something to be printed later
394  */
395 static void
396 change(int i, struct range *rold, int dup)
397 {
398 	(void) printf("%d:", i);
399 	last[i] = rold->to;
400 	prange(rold);
401 	if (dup)
402 		return;
403 	if (debug)
404 		return;
405 	i--;
406 	(void) skip(i, rold->from, (char *)0);
407 	(void) skip(i, rold->to, "  ");
408 }
409 
410 /*
411  * print the range of line numbers, rold.from  thru rold.to
412  * as n1, n2 or n1
413  */
414 static void
415 prange(struct range *rold)
416 {
417 	if (rold->to <= rold->from)
418 		(void) printf("%da\n", rold->from-1);
419 	else {
420 		(void) printf("%d", rold->from);
421 		if (rold->to > rold->from+1)
422 			(void) printf(",%d", rold->to-1);
423 		(void) printf("c\n");
424 	}
425 }
426 
427 /*
428  * no difference was reported by diff between file 1(or 2)
429  * and file 3, and an artificial dummy difference (trange)
430  * must be ginned up to correspond to the change reported
431  * in the other file
432  */
433 static void
434 keep(int i, struct range *rnew)
435 {
436 	int delta;
437 	struct range trange;
438 	delta = last[3] - last[i];
439 	trange.from = rnew->from - delta;
440 	trange.to = rnew->to - delta;
441 	change(i, &trange, 1);
442 }
443 
444 /*
445  * skip to just befor line number from in file i
446  * if "pr" is nonzero, print all skipped stuff
447  * with string pr as a prefix
448  */
449 static int
450 skip(int i, int from, char *pr)
451 {
452 	int j, n;
453 	for (n = 0; cline[i] < from-1; n += j) {
454 		if ((j = getaline(fp[i])) == 0)
455 			trouble();
456 		if (pr)
457 			(void) printf("%s%s", pr, line);
458 		cline[i]++;
459 	}
460 	return (n);
461 }
462 
463 /*
464  * return 1 or 0 according as the old range
465  * (in file 1) contains exactly the same data
466  * as the new range (in file 2)
467  */
468 static int
469 duplicate(struct range *r1, struct range *r2)
470 {
471 	int c, d;
472 	int nchar;
473 	int nline;
474 	if (r1->to-r1->from != r2->to-r2->from)
475 		return (0);
476 	(void) skip(0, r1->from, (char *)0);
477 	(void) skip(1, r2->from, (char *)0);
478 	nchar = 0;
479 	for (nline = 0; nline < r1->to-r1->from; nline++) {
480 		do {
481 			c = getc(fp[0]);
482 			d = getc(fp[1]);
483 			if (c == -1 || d == -1)
484 				trouble();
485 			nchar++;
486 			if (c != d) {
487 				repos(nchar);
488 				return (0);
489 			}
490 		} while (c != '\n');
491 	}
492 	repos(nchar);
493 	return (1);
494 }
495 
496 static void
497 repos(int nchar)
498 {
499 	int i;
500 	for (i = 0; i < 2; i++)
501 		(void) fseek(fp[i], (long)-nchar, 1);
502 }
503 
504 static void
505 trouble()
506 {
507 	(void) fprintf(stderr, "diff3: logic error\n");
508 	abort();
509 }
510 
511 /*
512  * collect an editing script for later regurgitation
513  */
514 static int
515 edit(struct diff *diff, int dup, int j)
516 {
517 	if (((dup+1)&eflag) == 0)
518 		return (j);
519 	j++;
520 	overlap[j] = !dup;
521 	if (!dup) overlapcnt++;
522 	de[j].old.from = diff->old.from;
523 	de[j].old.to = diff->old.to;
524 	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, (char *)0);
525 	de[j].new.to = de[j].new.from + skip(2, diff->new.to, (char *)0);
526 	return (j);
527 }
528 
529 /*		regurgitate */
530 static void
531 edscript(int n)
532 {
533 	int j, k;
534 	char	 block[BUFSIZ];
535 
536 	for (n = n; n > 0; n--) {
537 		if (!oflag || !overlap[n])
538 			prange(&de[n].old);
539 		else
540 			(void) printf("%da\n=======\n", de[n].old.to -1);
541 		(void) fseek(fp[2], (long)de[n].new.from, 0);
542 		for (k = de[n].new.to-de[n].new.from; k > 0; k -= j) {
543 			j = k > BUFSIZ?BUFSIZ:k;
544 			if (fread(block, 1, j, fp[2]) != j)
545 				trouble();
546 			(void) fwrite(block, 1, j, stdout);
547 		}
548 		if (!oflag || !overlap[n])
549 			(void) printf(".\n");
550 		else {
551 			(void) printf("%s\n.\n", f3mark);
552 			(void) printf("%da\n%s\n.\n", de[n].old.from-1, f1mark);
553 		}
554 	}
555 }
556 
557 static void
558 usage()
559 {
560 	(void) fprintf(stderr,
561 	    "\tusage: diff3prog [-ex3EX] d13 d23 f1 f2 f3 [m1 m2]\n");
562 	exit(1);
563 }
564