xref: /illumos-gate/usr/src/cmd/bdiff/bdiff.c (revision 806838751b3ce15414781bffd4adfac166204c62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #include <fatal.h>
32 #include <signal.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <wait.h>
40 
41 #define	ONSIG	16
42 
43 /*
44  *	This program segments two files into pieces of <= seglim lines
45  *	(which is passed as a third argument or defaulted to some number)
46  *	and then executes diff upon the pieces. The output of
47  *	'diff' is then processed to make it look as if 'diff' had
48  *	processed the files whole. The reason for all this is that seglim
49  *	is a reasonable upper limit on the size of files that diff can
50  *	process.
51  *	NOTE -- by segmenting the files in this manner, it cannot be
52  *	guaranteed that the 'diffing' of the segments will generate
53  *	a minimal set of differences.
54  *	This process is most definitely not equivalent to 'diffing'
55  *	the files whole, assuming 'diff' could handle such large files.
56  *
57  *	'diff' is executed by a child process, generated by forking,
58  *	and communicates with this program through pipes.
59  */
60 
61 static char Error[128];
62 
63 static int seglim;	/* limit of size of file segment to be generated */
64 
65 static char diff[]  =  "/usr/bin/diff";
66 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
67 static char tempfile[32];
68 static char otmp[32], ntmp[32];
69 static int	fflags;
70 static int	fatal_num = 1;		/* exit number for fatal exit */
71 static offset_t	linenum;
72 static size_t obufsiz, nbufsiz, dbufsiz;
73 static char *readline(char **, size_t *, FILE *);
74 static void addgen(char **, size_t *, FILE *);
75 static void delgen(char **, size_t *, FILE *);
76 static void fixnum(char *);
77 static void fatal(char *);
78 static void setsig(void);
79 static void setsig1(int);
80 static char *satoi(char *, offset_t *);
81 static FILE *maket(char *);
82 
83 static char *prognam;
84 
85 int
86 main(int argc, char *argv[])
87 {
88 	FILE *poldfile, *pnewfile;
89 	char *oline, *nline, *diffline;
90 	char *olp, *nlp, *dp;
91 	int otcnt, ntcnt;
92 	pid_t i;
93 	int pfd[2];
94 	FILE *poldtemp, *pnewtemp, *pipeinp;
95 	int status;
96 
97 	prognam = argv[0];
98 	/*
99 	 * Set flags for 'fatal' so that it will clean up,
100 	 * produce a message, and terminate.
101 	 */
102 	fflags = FTLMSG | FTLCLN | FTLEXIT;
103 
104 	setsig();
105 
106 	if (argc < 3 || argc > 5)
107 		fatal("arg count");
108 
109 	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
110 		fatal("both files standard input");
111 	if (strcmp(argv[1], "-") == 0)
112 		poldfile = stdin;
113 	else
114 		if ((poldfile = fopen(argv[1], "r")) == NULL) {
115 			(void) snprintf(Error, sizeof (Error),
116 				"Can not open '%s'", argv[1]);
117 			fatal(Error);
118 		}
119 	if (strcmp(argv[2], "-") == 0)
120 		pnewfile = stdin;
121 	else
122 		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
123 			(void) snprintf(Error, sizeof (Error),
124 				"Can not open '%s'", argv[2]);
125 			fatal(Error);
126 		}
127 
128 	seglim = 3500;
129 
130 	if (argc > 3) {
131 		if (argv[3][0] == '-' && argv[3][1] == 's')
132 			fflags &= ~FTLMSG;
133 		else {
134 			if ((seglim = atoi(argv[3])) == 0)
135 				fatal("non-numeric limit");
136 			if (argc == 5 && argv[4][0] == '-' &&
137 					argv[4][1] == 's')
138 				fflags &= ~FTLMSG;
139 		}
140 	}
141 
142 	linenum = 0;
143 
144 	/* Allocate the buffers and initialize their lengths */
145 
146 	obufsiz = BUFSIZ;
147 	nbufsiz = BUFSIZ;
148 	dbufsiz = BUFSIZ;
149 
150 	if ((oline = (char *)malloc(obufsiz)) == NULL ||
151 	    (nline = (char *)malloc(nbufsiz)) == NULL ||
152 	    (diffline = (char *)malloc(dbufsiz)) == NULL)
153 		fatal("Out of memory");
154 
155 	/*
156 	 * The following while-loop will prevent any lines
157 	 * common to the beginning of both files from being
158 	 * sent to 'diff'. Since the running time of 'diff' is
159 	 * non-linear, this will help improve performance.
160 	 * If, during this process, both files reach EOF, then
161 	 * the files are equal and the program will terminate.
162 	 * If either file reaches EOF before the other, the
163 	 * program will generate the appropriate 'diff' output
164 	 * itself, since this can be easily determined and will
165 	 * avoid executing 'diff' completely.
166 	 */
167 	for (;;) {
168 		olp = readline(&oline, &obufsiz, poldfile);
169 		nlp = readline(&nline, &nbufsiz, pnewfile);
170 
171 		if (!olp && !nlp)	/* EOF found on both:  files equal */
172 			return (0);
173 
174 		if (!olp) {
175 			/*
176 			 * The entire old file is a prefix of the
177 			 * new file. Generate the appropriate "append"
178 			 * 'diff'-like output, which is of the form:
179 			 * 		nan, n
180 			 * where 'n' represents a line-number.
181 			 */
182 			addgen(&nline, &nbufsiz, pnewfile);
183 		}
184 
185 		if (!nlp) {
186 			/*
187 			 * The entire new file is a prefix of the
188 			 * old file. Generate the appropriate "delete"
189 			 * 'diff'-like output, which is of the form:
190 			 * 		n, ndn
191 			 * where 'n' represents a line-number.
192 			 */
193 			delgen(&oline, &obufsiz, poldfile);
194 		}
195 
196 		if (strcmp(olp, nlp) == 0)
197 			linenum++;
198 		else
199 			break;
200 	}
201 
202 	/*
203 	 * Here, first 'linenum' lines are equal.
204 	 * The following while-loop segments both files into
205 	 * seglim segments, forks and executes 'diff' on the
206 	 * segments, and processes the resulting output of
207 	 * 'diff', which is read from a pipe.
208 	 */
209 	for (;;) {
210 		/* If both files are at EOF, everything is done. */
211 		if (!olp && !nlp)	/* finished */
212 			return (0);
213 
214 		if (!olp) {
215 			/*
216 			 * Generate appropriate "append"
217 			 * output without executing 'diff'.
218 			 */
219 			addgen(&nline, &nbufsiz, pnewfile);
220 		}
221 
222 		if (!nlp) {
223 			/*
224 			 * Generate appropriate "delete"
225 			 * output without executing 'diff'.
226 			 */
227 			delgen(&oline, &obufsiz, poldfile);
228 		}
229 
230 		/*
231 		 * Create a temporary file to hold a segment
232 		 * from the old file, and write it.
233 		 */
234 		poldtemp = maket(otmp);
235 		otcnt = 0;
236 		while (olp && otcnt < seglim) {
237 			(void) fputs(oline, poldtemp);
238 			if (ferror(poldtemp) != 0) {
239 				fflags |= FTLMSG;
240 				fatal("Can not write to temporary file");
241 			}
242 			olp = readline(&oline, &obufsiz, poldfile);
243 			otcnt++;
244 		}
245 		(void) fclose(poldtemp);
246 
247 		/*
248 		 * Create a temporary file to hold a segment
249 		 * from the new file, and write it.
250 		 */
251 		pnewtemp = maket(ntmp);
252 		ntcnt = 0;
253 		while (nlp && ntcnt < seglim) {
254 			(void) fputs(nline, pnewtemp);
255 			if (ferror(pnewtemp) != 0) {
256 				fflags |= FTLMSG;
257 				fatal("Can not write to temporary file");
258 			}
259 			nlp = readline(&nline, &nbufsiz, pnewfile);
260 			ntcnt++;
261 		}
262 		(void) fclose(pnewtemp);
263 
264 		/* Create pipes and fork.  */
265 		if ((pipe(pfd)) == -1)
266 			fatal("Can not create pipe");
267 		if ((i = fork()) < (pid_t)0) {
268 			(void) close(pfd[0]);
269 			(void) close(pfd[1]);
270 			fatal("Can not fork, try again");
271 		} else if (i == (pid_t)0) {	/* child process */
272 			(void) close(pfd[0]);
273 			(void) close(1);
274 			(void) dup(pfd[1]);
275 			(void) close(pfd[1]);
276 
277 			/* Execute 'diff' on the segment files. */
278 			(void) execlp(diff, diff, otmp, ntmp, 0);
279 
280 			/*
281 			 * Exit code here must be > 1.
282 			 * Parent process treats exit code of 1 from the child
283 			 * as non-error because the child process "diff" exits
284 			 * with a status of 1 when a difference is encountered.
285 			 * The error here is a true error--the parent process
286 			 * needs to detect it and exit with a non-zero status.
287 			 */
288 			(void) close(1);
289 			(void) snprintf(Error, sizeof (Error),
290 			    "Can not execute '%s'", diff);
291 			fatal_num = 2;
292 			fatal(Error);
293 		} else {			/* parent process */
294 			(void) close(pfd[1]);
295 			pipeinp = fdopen(pfd[0], "r");
296 
297 			/* Process 'diff' output. */
298 			while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
299 				if (isdigit(*dp))
300 					fixnum(diffline);
301 				else
302 					(void) printf("%s", diffline);
303 			}
304 
305 			(void) fclose(pipeinp);
306 
307 			/* EOF on pipe. */
308 			(void) wait(&status);
309 			if (status&~0x100) {
310 				(void) snprintf(Error, sizeof (Error),
311 				    "'%s' failed", diff);
312 				fatal(Error);
313 			}
314 		}
315 		linenum += seglim;
316 
317 		/* Remove temporary files. */
318 		(void) unlink(otmp);
319 		(void) unlink(ntmp);
320 	}
321 }
322 
323 /* Routine to save remainder of a file. */
324 static void
325 saverest(char **linep, size_t *bufsizp, FILE *iptr)
326 {
327 	char *lp;
328 	FILE *temptr;
329 
330 	temptr = maket(tempfile);
331 
332 	lp = *linep;
333 
334 	while (lp) {
335 		(void) fputs(*linep, temptr);
336 		linenum++;
337 		lp = readline(linep, bufsizp, iptr);
338 	}
339 	(void) fclose(temptr);
340 }
341 
342 /* Routine to write out data saved by 'saverest' and to remove the file. */
343 static void
344 putsave(char **linep, size_t *bufsizp, char type)
345 {
346 	FILE *temptr;
347 
348 	if ((temptr = fopen(tempfile, "r")) == NULL) {
349 		(void) snprintf(Error, sizeof (Error),
350 		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
351 	}
352 
353 	while (readline(linep, bufsizp, temptr))
354 		(void) printf("%c %s", type, *linep);
355 
356 	(void) fclose(temptr);
357 
358 	(void) unlink(tempfile);
359 }
360 
361 static void
362 fixnum(char *lp)
363 {
364 	offset_t num;
365 
366 	while (*lp) {
367 		switch (*lp) {
368 
369 		case 'a':
370 		case 'c':
371 		case 'd':
372 		case ',':
373 		case '\n':
374 			(void) printf("%c", *lp);
375 			lp++;
376 			break;
377 
378 		default:
379 			lp = satoi(lp, &num);
380 			num += linenum;
381 			(void) printf("%lld", num);
382 		}
383 	}
384 }
385 
386 static void
387 addgen(char **lpp, size_t *bufsizp, FILE *fp)
388 {
389 	offset_t oldline;
390 	(void) printf("%llda%lld", linenum, linenum+1);
391 
392 	/* Save lines of new file. */
393 	oldline = linenum + 1;
394 	saverest(lpp, bufsizp, fp);
395 
396 	if (oldline < linenum)
397 		(void) printf(",%lld\n", linenum);
398 	else
399 		(void) printf("\n");
400 
401 	/* Output saved lines, as 'diff' would. */
402 	putsave(lpp, bufsizp, '>');
403 
404 	exit(0);
405 }
406 
407 static void
408 delgen(char **lpp, size_t *bufsizp, FILE *fp)
409 {
410 	offset_t savenum;
411 
412 	(void) printf("%lld", linenum+1);
413 	savenum = linenum;
414 
415 	/* Save lines of old file. */
416 	saverest(lpp, bufsizp, fp);
417 
418 	if (savenum +1 != linenum)
419 		(void) printf(",%lldd%lld\n", linenum, savenum);
420 	else
421 		(void) printf("d%lld\n", savenum);
422 
423 	/* Output saved lines, as 'diff' would.  */
424 	putsave(lpp, bufsizp, '<');
425 
426 	exit(0);
427 }
428 
429 static void
430 clean_up()
431 {
432 	(void) unlink(tempfile);
433 	(void) unlink(otmp);
434 	(void) unlink(ntmp);
435 }
436 
437 static FILE *
438 maket(char *file)
439 {
440 	FILE *iop;
441 	int fd;
442 
443 	(void) strcpy(file, tempskel);
444 	if ((fd = mkstemp(file)) == -1 ||
445 		(iop = fdopen(fd, "w+")) == NULL) {
446 		(void) snprintf(Error, sizeof (Error),
447 		    "Can not open/create temp file ('%s')", file);
448 		fatal(Error);
449 	}
450 	return (iop);
451 }
452 
453 static void
454 fatal(char *msg)
455 /*
456  *	General purpose error handler.
457  *
458  *	The argument to fatal is a pointer to an error message string.
459  *	The action of this routine is driven completely from
460  *	the "fflags" global word (see <fatal.h>).
461  *
462  *	The FTLMSG bit controls the writing of the error
463  *	message on file descriptor 2.  A newline is written
464  *	after the user supplied message.
465  *
466  *	If the FTLCLN bit is on, clean_up is called.
467  */
468 {
469 	if (fflags & FTLMSG)
470 		(void) fprintf(stderr, "%s: %s\n", prognam, msg);
471 	if (fflags & FTLCLN)
472 		clean_up();
473 	if (fflags & FTLEXIT)
474 		exit(fatal_num);
475 }
476 
477 static void
478 setsig()
479 /*
480  *	General-purpose signal setting routine.
481  *	All non-ignored, non-caught signals are caught.
482  *	If a signal other than hangup, interrupt, or quit is caught,
483  *	a "user-oriented" message is printed on file descriptor 2.
484  *	If hangup, interrupt or quit is caught, that signal
485  *	is set to ignore.
486  *	Termination is like that of "fatal",
487  *	via "clean_up()"
488  */
489 {
490 	void (*act)(int);
491 	int j;
492 
493 	for (j = 1; j < ONSIG; j++) {
494 		act = signal(j, setsig1);
495 		if (act == SIG_ERR)
496 			continue;
497 		if (act == SIG_DFL)
498 			continue;
499 		(void) signal(j, act);
500 	}
501 }
502 
503 static void
504 setsig1(int sig)
505 {
506 
507 	(void) signal(sig, SIG_IGN);
508 	clean_up();
509 	exit(1);
510 }
511 
512 static char *
513 satoi(char *p, offset_t *ip)
514 {
515 	offset_t sum;
516 
517 	sum = 0;
518 	while (isdigit(*p))
519 		sum = sum * 10 + (*p++ - '0');
520 	*ip = sum;
521 	return (p);
522 }
523 
524 /*
525  * Read a line of data from a file.  If the current buffer is not large enough
526  * to contain the line, double the size of the buffer and continue reading.
527  * Loop until either the entire line is read or until there is no more space
528  * to be malloc'd.
529  */
530 
531 static char *
532 readline(char **bufferp, size_t *bufsizp, FILE *filep)
533 {
534 	char *bufp;
535 	size_t newsize;		/* number of bytes to make buffer */
536 	size_t oldsize;
537 
538 	(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
539 	(*bufferp)[*bufsizp - 2] = ' ';	/* arbitrary non-newline char */
540 	bufp = fgets(*bufferp, *bufsizp, filep);
541 	if (bufp == NULL)
542 		return (bufp);
543 	while ((*bufferp)[*bufsizp -1] == '\0' &&
544 	    (*bufferp)[*bufsizp - 2] != '\n' &&
545 	    strlen(*bufferp) == *bufsizp - 1) {
546 		newsize = 2 * (*bufsizp);
547 		bufp = (char *)realloc((void *)*bufferp, newsize);
548 		if (bufp == NULL)
549 			fatal("Out of memory");
550 		oldsize = *bufsizp;
551 		*bufsizp = newsize;
552 		*bufferp = bufp;
553 		(*bufferp)[*bufsizp - 1] = '\t';
554 		(*bufferp)[*bufsizp - 2] = ' ';
555 		bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
556 		if (bufp == NULL) {
557 			if (filep->_flag & _IOEOF) {
558 				bufp = *bufferp;
559 				break;
560 			} else
561 				fatal("Read error");
562 		} else
563 			bufp = *bufferp;
564 	}
565 	return (bufp);
566 }
567