xref: /illumos-gate/usr/src/cmd/bdiff/bdiff.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <fatal.h>
34 #include <signal.h>
35 #include <sys/types.h>
36 #include <unistd.h>
37 #include <stdio.h>
38 #include <ctype.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <wait.h>
42 
43 #define	ONSIG	16
44 
45 /*
46  *	This program segments two files into pieces of <= seglim lines
47  *	(which is passed as a third argument or defaulted to some number)
48  *	and then executes diff upon the pieces. The output of
49  *	'diff' is then processed to make it look as if 'diff' had
50  *	processed the files whole. The reason for all this is that seglim
51  *	is a reasonable upper limit on the size of files that diff can
52  *	process.
53  *	NOTE -- by segmenting the files in this manner, it cannot be
54  *	guaranteed that the 'diffing' of the segments will generate
55  *	a minimal set of differences.
56  *	This process is most definitely not equivalent to 'diffing'
57  *	the files whole, assuming 'diff' could handle such large files.
58  *
59  *	'diff' is executed by a child process, generated by forking,
60  *	and communicates with this program through pipes.
61  */
62 
63 static char Error[128];
64 
65 static int seglim;	/* limit of size of file segment to be generated */
66 
67 static char diff[]  =  "/usr/bin/diff";
68 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
69 static char tempfile[32];
70 static char otmp[32], ntmp[32];
71 static int	fflags;
72 static int	fatal_num = 1;		/* exit number for fatal exit */
73 static offset_t	linenum;
74 static size_t obufsiz, nbufsiz, dbufsiz;
75 static char *readline(char **, size_t *, FILE *);
76 static void addgen(char **, size_t *, FILE *);
77 static void delgen(char **, size_t *, FILE *);
78 static void fixnum(char *);
79 static void fatal(char *);
80 static void setsig(void);
81 static void setsig1(int);
82 static char *satoi(char *, offset_t *);
83 static FILE *maket(char *);
84 
85 static char *prognam;
86 
87 int
88 main(int argc, char *argv[])
89 {
90 	FILE *poldfile, *pnewfile;
91 	char *oline, *nline, *diffline;
92 	char *olp, *nlp, *dp;
93 	int otcnt, ntcnt;
94 	pid_t i;
95 	int pfd[2];
96 	FILE *poldtemp, *pnewtemp, *pipeinp;
97 	int status;
98 
99 	prognam = argv[0];
100 	/*
101 	 * Set flags for 'fatal' so that it will clean up,
102 	 * produce a message, and terminate.
103 	 */
104 	fflags = FTLMSG | FTLCLN | FTLEXIT;
105 
106 	setsig();
107 
108 	if (argc < 3 || argc > 5)
109 		fatal("arg count");
110 
111 	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
112 		fatal("both files standard input");
113 	if (strcmp(argv[1], "-") == 0)
114 		poldfile = stdin;
115 	else
116 		if ((poldfile = fopen(argv[1], "r")) == NULL) {
117 			(void) snprintf(Error, sizeof (Error),
118 				"Can not open '%s'", argv[1]);
119 			fatal(Error);
120 		}
121 	if (strcmp(argv[2], "-") == 0)
122 		pnewfile = stdin;
123 	else
124 		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
125 			(void) snprintf(Error, sizeof (Error),
126 				"Can not open '%s'", argv[2]);
127 			fatal(Error);
128 		}
129 
130 	seglim = 3500;
131 
132 	if (argc > 3) {
133 		if (argv[3][0] == '-' && argv[3][1] == 's')
134 			fflags &= ~FTLMSG;
135 		else {
136 			if ((seglim = atoi(argv[3])) == 0)
137 				fatal("non-numeric limit");
138 			if (argc == 5 && argv[4][0] == '-' &&
139 					argv[4][1] == 's')
140 				fflags &= ~FTLMSG;
141 		}
142 	}
143 
144 	linenum = 0;
145 
146 	/* Allocate the buffers and initialize their lengths */
147 
148 	obufsiz = BUFSIZ;
149 	nbufsiz = BUFSIZ;
150 	dbufsiz = BUFSIZ;
151 
152 	if ((oline = (char *)malloc(obufsiz)) == NULL ||
153 	    (nline = (char *)malloc(nbufsiz)) == NULL ||
154 	    (diffline = (char *)malloc(dbufsiz)) == NULL)
155 		fatal("Out of memory");
156 
157 	/*
158 	 * The following while-loop will prevent any lines
159 	 * common to the beginning of both files from being
160 	 * sent to 'diff'. Since the running time of 'diff' is
161 	 * non-linear, this will help improve performance.
162 	 * If, during this process, both files reach EOF, then
163 	 * the files are equal and the program will terminate.
164 	 * If either file reaches EOF before the other, the
165 	 * program will generate the appropriate 'diff' output
166 	 * itself, since this can be easily determined and will
167 	 * avoid executing 'diff' completely.
168 	 */
169 	for (;;) {
170 		olp = readline(&oline, &obufsiz, poldfile);
171 		nlp = readline(&nline, &nbufsiz, pnewfile);
172 
173 		if (!olp && !nlp)	/* EOF found on both:  files equal */
174 			return (0);
175 
176 		if (!olp) {
177 			/*
178 			 * The entire old file is a prefix of the
179 			 * new file. Generate the appropriate "append"
180 			 * 'diff'-like output, which is of the form:
181 			 * 		nan, n
182 			 * where 'n' represents a line-number.
183 			 */
184 			addgen(&nline, &nbufsiz, pnewfile);
185 		}
186 
187 		if (!nlp) {
188 			/*
189 			 * The entire new file is a prefix of the
190 			 * old file. Generate the appropriate "delete"
191 			 * 'diff'-like output, which is of the form:
192 			 * 		n, ndn
193 			 * where 'n' represents a line-number.
194 			 */
195 			delgen(&oline, &obufsiz, poldfile);
196 		}
197 
198 		if (strcmp(olp, nlp) == 0)
199 			linenum++;
200 		else
201 			break;
202 	}
203 
204 	/*
205 	 * Here, first 'linenum' lines are equal.
206 	 * The following while-loop segments both files into
207 	 * seglim segments, forks and executes 'diff' on the
208 	 * segments, and processes the resulting output of
209 	 * 'diff', which is read from a pipe.
210 	 */
211 	for (;;) {
212 		/* If both files are at EOF, everything is done. */
213 		if (!olp && !nlp)	/* finished */
214 			return (0);
215 
216 		if (!olp) {
217 			/*
218 			 * Generate appropriate "append"
219 			 * output without executing 'diff'.
220 			 */
221 			addgen(&nline, &nbufsiz, pnewfile);
222 		}
223 
224 		if (!nlp) {
225 			/*
226 			 * Generate appropriate "delete"
227 			 * output without executing 'diff'.
228 			 */
229 			delgen(&oline, &obufsiz, poldfile);
230 		}
231 
232 		/*
233 		 * Create a temporary file to hold a segment
234 		 * from the old file, and write it.
235 		 */
236 		poldtemp = maket(otmp);
237 		otcnt = 0;
238 		while (olp && otcnt < seglim) {
239 			(void) fputs(oline, poldtemp);
240 			if (ferror(poldtemp) != 0) {
241 				fflags |= FTLMSG;
242 				fatal("Can not write to temporary file");
243 			}
244 			olp = readline(&oline, &obufsiz, poldfile);
245 			otcnt++;
246 		}
247 		(void) fclose(poldtemp);
248 
249 		/*
250 		 * Create a temporary file to hold a segment
251 		 * from the new file, and write it.
252 		 */
253 		pnewtemp = maket(ntmp);
254 		ntcnt = 0;
255 		while (nlp && ntcnt < seglim) {
256 			(void) fputs(nline, pnewtemp);
257 			if (ferror(pnewtemp) != 0) {
258 				fflags |= FTLMSG;
259 				fatal("Can not write to temporary file");
260 			}
261 			nlp = readline(&nline, &nbufsiz, pnewfile);
262 			ntcnt++;
263 		}
264 		(void) fclose(pnewtemp);
265 
266 		/* Create pipes and fork.  */
267 		if ((pipe(pfd)) == -1)
268 			fatal("Can not create pipe");
269 		if ((i = fork()) < (pid_t)0) {
270 			(void) close(pfd[0]);
271 			(void) close(pfd[1]);
272 			fatal("Can not fork, try again");
273 		} else if (i == (pid_t)0) {	/* child process */
274 			(void) close(pfd[0]);
275 			(void) close(1);
276 			(void) dup(pfd[1]);
277 			(void) close(pfd[1]);
278 
279 			/* Execute 'diff' on the segment files. */
280 			(void) execlp(diff, diff, otmp, ntmp, 0);
281 
282 			/*
283 			 * Exit code here must be > 1.
284 			 * Parent process treats exit code of 1 from the child
285 			 * as non-error because the child process "diff" exits
286 			 * with a status of 1 when a difference is encountered.
287 			 * The error here is a true error--the parent process
288 			 * needs to detect it and exit with a non-zero status.
289 			 */
290 			(void) close(1);
291 			(void) snprintf(Error, sizeof (Error),
292 			    "Can not execute '%s'", diff);
293 			fatal_num = 2;
294 			fatal(Error);
295 		} else {			/* parent process */
296 			(void) close(pfd[1]);
297 			pipeinp = fdopen(pfd[0], "r");
298 
299 			/* Process 'diff' output. */
300 			while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
301 				if (isdigit(*dp))
302 					fixnum(diffline);
303 				else
304 					(void) printf("%s", diffline);
305 			}
306 
307 			(void) fclose(pipeinp);
308 
309 			/* EOF on pipe. */
310 			(void) wait(&status);
311 			if (status&~0x100) {
312 				(void) snprintf(Error, sizeof (Error),
313 				    "'%s' failed", diff);
314 				fatal(Error);
315 			}
316 		}
317 		linenum += seglim;
318 
319 		/* Remove temporary files. */
320 		(void) unlink(otmp);
321 		(void) unlink(ntmp);
322 	}
323 }
324 
325 /* Routine to save remainder of a file. */
326 static void
327 saverest(char **linep, size_t *bufsizp, FILE *iptr)
328 {
329 	char *lp;
330 	FILE *temptr;
331 
332 	temptr = maket(tempfile);
333 
334 	lp = *linep;
335 
336 	while (lp) {
337 		(void) fputs(*linep, temptr);
338 		linenum++;
339 		lp = readline(linep, bufsizp, iptr);
340 	}
341 	(void) fclose(temptr);
342 }
343 
344 /* Routine to write out data saved by 'saverest' and to remove the file. */
345 static void
346 putsave(char **linep, size_t *bufsizp, char type)
347 {
348 	FILE *temptr;
349 
350 	if ((temptr = fopen(tempfile, "r")) == NULL) {
351 		(void) snprintf(Error, sizeof (Error),
352 		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
353 	}
354 
355 	while (readline(linep, bufsizp, temptr))
356 		(void) printf("%c %s", type, *linep);
357 
358 	(void) fclose(temptr);
359 
360 	(void) unlink(tempfile);
361 }
362 
363 static void
364 fixnum(char *lp)
365 {
366 	offset_t num;
367 
368 	while (*lp) {
369 		switch (*lp) {
370 
371 		case 'a':
372 		case 'c':
373 		case 'd':
374 		case ',':
375 		case '\n':
376 			(void) printf("%c", *lp);
377 			lp++;
378 			break;
379 
380 		default:
381 			lp = satoi(lp, &num);
382 			num += linenum;
383 			(void) printf("%lld", num);
384 		}
385 	}
386 }
387 
388 static void
389 addgen(char **lpp, size_t *bufsizp, FILE *fp)
390 {
391 	offset_t oldline;
392 	(void) printf("%llda%lld", linenum, linenum+1);
393 
394 	/* Save lines of new file. */
395 	oldline = linenum + 1;
396 	saverest(lpp, bufsizp, fp);
397 
398 	if (oldline < linenum)
399 		(void) printf(",%lld\n", linenum);
400 	else
401 		(void) printf("\n");
402 
403 	/* Output saved lines, as 'diff' would. */
404 	putsave(lpp, bufsizp, '>');
405 
406 	exit(0);
407 }
408 
409 static void
410 delgen(char **lpp, size_t *bufsizp, FILE *fp)
411 {
412 	offset_t savenum;
413 
414 	(void) printf("%lld", linenum+1);
415 	savenum = linenum;
416 
417 	/* Save lines of old file. */
418 	saverest(lpp, bufsizp, fp);
419 
420 	if (savenum +1 != linenum)
421 		(void) printf(",%lldd%lld\n", linenum, savenum);
422 	else
423 		(void) printf("d%lld\n", savenum);
424 
425 	/* Output saved lines, as 'diff' would.  */
426 	putsave(lpp, bufsizp, '<');
427 
428 	exit(0);
429 }
430 
431 static void
432 clean_up()
433 {
434 	(void) unlink(tempfile);
435 	(void) unlink(otmp);
436 	(void) unlink(ntmp);
437 }
438 
439 static FILE *
440 maket(char *file)
441 {
442 	FILE *iop;
443 	int fd;
444 
445 	(void) strcpy(file, tempskel);
446 	if ((fd = mkstemp(file)) == -1 ||
447 		(iop = fdopen(fd, "w+")) == NULL) {
448 		(void) snprintf(Error, sizeof (Error),
449 		    "Can not open/create temp file ('%s')", file);
450 		fatal(Error);
451 	}
452 	return (iop);
453 }
454 
455 static void
456 fatal(char *msg)
457 /*
458  *	General purpose error handler.
459  *
460  *	The argument to fatal is a pointer to an error message string.
461  *	The action of this routine is driven completely from
462  *	the "fflags" global word (see <fatal.h>).
463  *
464  *	The FTLMSG bit controls the writing of the error
465  *	message on file descriptor 2.  A newline is written
466  *	after the user supplied message.
467  *
468  *	If the FTLCLN bit is on, clean_up is called.
469  */
470 {
471 	if (fflags & FTLMSG)
472 		(void) fprintf(stderr, "%s: %s\n", prognam, msg);
473 	if (fflags & FTLCLN)
474 		clean_up();
475 	if (fflags & FTLEXIT)
476 		exit(fatal_num);
477 }
478 
479 static void
480 setsig()
481 /*
482  *	General-purpose signal setting routine.
483  *	All non-ignored, non-caught signals are caught.
484  *	If a signal other than hangup, interrupt, or quit is caught,
485  *	a "user-oriented" message is printed on file descriptor 2.
486  *	If hangup, interrupt or quit is caught, that signal
487  *	is set to ignore.
488  *	Termination is like that of "fatal",
489  *	via "clean_up()"
490  */
491 {
492 	void (*act)(int);
493 	int j;
494 
495 	for (j = 1; j < ONSIG; j++) {
496 		act = signal(j, setsig1);
497 		if (act == SIG_ERR)
498 			continue;
499 		if (act == SIG_DFL)
500 			continue;
501 		(void) signal(j, act);
502 	}
503 }
504 
505 static void
506 setsig1(int sig)
507 {
508 
509 	(void) signal(sig, SIG_IGN);
510 	clean_up();
511 	exit(1);
512 }
513 
514 static char *
515 satoi(char *p, offset_t *ip)
516 {
517 	offset_t sum;
518 
519 	sum = 0;
520 	while (isdigit(*p))
521 		sum = sum * 10 + (*p++ - '0');
522 	*ip = sum;
523 	return (p);
524 }
525 
526 /*
527  * Read a line of data from a file.  If the current buffer is not large enough
528  * to contain the line, double the size of the buffer and continue reading.
529  * Loop until either the entire line is read or until there is no more space
530  * to be malloc'd.
531  */
532 
533 static char *
534 readline(char **bufferp, size_t *bufsizp, FILE *filep)
535 {
536 	char *bufp;
537 	size_t newsize;		/* number of bytes to make buffer */
538 	size_t oldsize;
539 
540 	(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
541 	(*bufferp)[*bufsizp - 2] = ' ';	/* arbitrary non-newline char */
542 	bufp = fgets(*bufferp, *bufsizp, filep);
543 	if (bufp == NULL)
544 		return (bufp);
545 	while ((*bufferp)[*bufsizp -1] == '\0' &&
546 	    (*bufferp)[*bufsizp - 2] != '\n' &&
547 	    strlen(*bufferp) == *bufsizp - 1) {
548 		newsize = 2 * (*bufsizp);
549 		bufp = (char *)realloc((void *)*bufferp, newsize);
550 		if (bufp == NULL)
551 			fatal("Out of memory");
552 		oldsize = *bufsizp;
553 		*bufsizp = newsize;
554 		*bufferp = bufp;
555 		(*bufferp)[*bufsizp - 1] = '\t';
556 		(*bufferp)[*bufsizp - 2] = ' ';
557 		bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
558 		if (bufp == NULL) {
559 			if (filep->_flag & _IOEOF) {
560 				bufp = *bufferp;
561 				break;
562 			} else
563 				fatal("Read error");
564 		} else
565 			bufp = *bufferp;
566 	}
567 	return (bufp);
568 }
569