1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
23 /* All Rights Reserved */
24
25
26 /*
27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #include <fatal.h>
32 #include <signal.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <wait.h>
40
41 #define ONSIG 16
42
43 /*
44 * This program segments two files into pieces of <= seglim lines
45 * (which is passed as a third argument or defaulted to some number)
46 * and then executes diff upon the pieces. The output of
47 * 'diff' is then processed to make it look as if 'diff' had
48 * processed the files whole. The reason for all this is that seglim
49 * is a reasonable upper limit on the size of files that diff can
50 * process.
51 * NOTE -- by segmenting the files in this manner, it cannot be
52 * guaranteed that the 'diffing' of the segments will generate
53 * a minimal set of differences.
54 * This process is most definitely not equivalent to 'diffing'
55 * the files whole, assuming 'diff' could handle such large files.
56 *
57 * 'diff' is executed by a child process, generated by forking,
58 * and communicates with this program through pipes.
59 */
60
61 static char Error[128];
62
63 static int seglim; /* limit of size of file segment to be generated */
64
65 static char diff[] = "/usr/bin/diff";
66 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
67 static char tempfile[32];
68 static char otmp[32], ntmp[32];
69 static int fflags;
70 static int fatal_num = 1; /* exit number for fatal exit */
71 static offset_t linenum;
72 static size_t obufsiz, nbufsiz, dbufsiz;
73 static char *readline(char **, size_t *, FILE *);
74 static void addgen(char **, size_t *, FILE *);
75 static void delgen(char **, size_t *, FILE *);
76 static void fixnum(char *);
77 static void fatal(char *);
78 static void setsig(void);
79 static void setsig1(int);
80 static char *satoi(char *, offset_t *);
81 static FILE *maket(char *);
82
83 static char *prognam;
84
85 int
main(int argc,char * argv[])86 main(int argc, char *argv[])
87 {
88 FILE *poldfile, *pnewfile;
89 char *oline, *nline, *diffline;
90 char *olp, *nlp, *dp;
91 int otcnt, ntcnt;
92 pid_t i;
93 int pfd[2];
94 FILE *poldtemp, *pnewtemp, *pipeinp;
95 int status;
96
97 prognam = argv[0];
98 /*
99 * Set flags for 'fatal' so that it will clean up,
100 * produce a message, and terminate.
101 */
102 fflags = FTLMSG | FTLCLN | FTLEXIT;
103
104 setsig();
105
106 if (argc < 3 || argc > 5)
107 fatal("arg count");
108
109 if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
110 fatal("both files standard input");
111 if (strcmp(argv[1], "-") == 0)
112 poldfile = stdin;
113 else
114 if ((poldfile = fopen(argv[1], "r")) == NULL) {
115 (void) snprintf(Error, sizeof (Error),
116 "Can not open '%s'", argv[1]);
117 fatal(Error);
118 }
119 if (strcmp(argv[2], "-") == 0)
120 pnewfile = stdin;
121 else
122 if ((pnewfile = fopen(argv[2], "r")) == NULL) {
123 (void) snprintf(Error, sizeof (Error),
124 "Can not open '%s'", argv[2]);
125 fatal(Error);
126 }
127
128 seglim = 3500;
129
130 if (argc > 3) {
131 if (argv[3][0] == '-' && argv[3][1] == 's')
132 fflags &= ~FTLMSG;
133 else {
134 if ((seglim = atoi(argv[3])) == 0)
135 fatal("non-numeric limit");
136 if (argc == 5 && argv[4][0] == '-' &&
137 argv[4][1] == 's')
138 fflags &= ~FTLMSG;
139 }
140 }
141
142 linenum = 0;
143
144 /* Allocate the buffers and initialize their lengths */
145
146 obufsiz = BUFSIZ;
147 nbufsiz = BUFSIZ;
148 dbufsiz = BUFSIZ;
149
150 if ((oline = (char *)malloc(obufsiz)) == NULL ||
151 (nline = (char *)malloc(nbufsiz)) == NULL ||
152 (diffline = (char *)malloc(dbufsiz)) == NULL)
153 fatal("Out of memory");
154
155 /*
156 * The following while-loop will prevent any lines
157 * common to the beginning of both files from being
158 * sent to 'diff'. Since the running time of 'diff' is
159 * non-linear, this will help improve performance.
160 * If, during this process, both files reach EOF, then
161 * the files are equal and the program will terminate.
162 * If either file reaches EOF before the other, the
163 * program will generate the appropriate 'diff' output
164 * itself, since this can be easily determined and will
165 * avoid executing 'diff' completely.
166 */
167 for (;;) {
168 olp = readline(&oline, &obufsiz, poldfile);
169 nlp = readline(&nline, &nbufsiz, pnewfile);
170
171 if (!olp && !nlp) /* EOF found on both: files equal */
172 return (0);
173
174 if (!olp) {
175 /*
176 * The entire old file is a prefix of the
177 * new file. Generate the appropriate "append"
178 * 'diff'-like output, which is of the form:
179 * nan, n
180 * where 'n' represents a line-number.
181 */
182 addgen(&nline, &nbufsiz, pnewfile);
183 }
184
185 if (!nlp) {
186 /*
187 * The entire new file is a prefix of the
188 * old file. Generate the appropriate "delete"
189 * 'diff'-like output, which is of the form:
190 * n, ndn
191 * where 'n' represents a line-number.
192 */
193 delgen(&oline, &obufsiz, poldfile);
194 }
195
196 if (strcmp(olp, nlp) == 0)
197 linenum++;
198 else
199 break;
200 }
201
202 /*
203 * Here, first 'linenum' lines are equal.
204 * The following while-loop segments both files into
205 * seglim segments, forks and executes 'diff' on the
206 * segments, and processes the resulting output of
207 * 'diff', which is read from a pipe.
208 */
209 for (;;) {
210 /* If both files are at EOF, everything is done. */
211 if (!olp && !nlp) /* finished */
212 return (0);
213
214 if (!olp) {
215 /*
216 * Generate appropriate "append"
217 * output without executing 'diff'.
218 */
219 addgen(&nline, &nbufsiz, pnewfile);
220 }
221
222 if (!nlp) {
223 /*
224 * Generate appropriate "delete"
225 * output without executing 'diff'.
226 */
227 delgen(&oline, &obufsiz, poldfile);
228 }
229
230 /*
231 * Create a temporary file to hold a segment
232 * from the old file, and write it.
233 */
234 poldtemp = maket(otmp);
235 otcnt = 0;
236 while (olp && otcnt < seglim) {
237 (void) fputs(oline, poldtemp);
238 if (ferror(poldtemp) != 0) {
239 fflags |= FTLMSG;
240 fatal("Can not write to temporary file");
241 }
242 olp = readline(&oline, &obufsiz, poldfile);
243 otcnt++;
244 }
245 (void) fclose(poldtemp);
246
247 /*
248 * Create a temporary file to hold a segment
249 * from the new file, and write it.
250 */
251 pnewtemp = maket(ntmp);
252 ntcnt = 0;
253 while (nlp && ntcnt < seglim) {
254 (void) fputs(nline, pnewtemp);
255 if (ferror(pnewtemp) != 0) {
256 fflags |= FTLMSG;
257 fatal("Can not write to temporary file");
258 }
259 nlp = readline(&nline, &nbufsiz, pnewfile);
260 ntcnt++;
261 }
262 (void) fclose(pnewtemp);
263
264 /* Create pipes and fork. */
265 if ((pipe(pfd)) == -1)
266 fatal("Can not create pipe");
267 if ((i = fork()) < (pid_t)0) {
268 (void) close(pfd[0]);
269 (void) close(pfd[1]);
270 fatal("Can not fork, try again");
271 } else if (i == (pid_t)0) { /* child process */
272 (void) close(pfd[0]);
273 (void) close(1);
274 (void) dup(pfd[1]);
275 (void) close(pfd[1]);
276
277 /* Execute 'diff' on the segment files. */
278 (void) execlp(diff, diff, otmp, ntmp, 0);
279
280 /*
281 * Exit code here must be > 1.
282 * Parent process treats exit code of 1 from the child
283 * as non-error because the child process "diff" exits
284 * with a status of 1 when a difference is encountered.
285 * The error here is a true error--the parent process
286 * needs to detect it and exit with a non-zero status.
287 */
288 (void) close(1);
289 (void) snprintf(Error, sizeof (Error),
290 "Can not execute '%s'", diff);
291 fatal_num = 2;
292 fatal(Error);
293 } else { /* parent process */
294 (void) close(pfd[1]);
295 pipeinp = fdopen(pfd[0], "r");
296
297 /* Process 'diff' output. */
298 while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
299 if (isdigit(*dp))
300 fixnum(diffline);
301 else
302 (void) printf("%s", diffline);
303 }
304
305 (void) fclose(pipeinp);
306
307 /* EOF on pipe. */
308 (void) wait(&status);
309 if (status&~0x100) {
310 (void) snprintf(Error, sizeof (Error),
311 "'%s' failed", diff);
312 fatal(Error);
313 }
314 }
315 linenum += seglim;
316
317 /* Remove temporary files. */
318 (void) unlink(otmp);
319 (void) unlink(ntmp);
320 }
321 }
322
323 /* Routine to save remainder of a file. */
324 static void
saverest(char ** linep,size_t * bufsizp,FILE * iptr)325 saverest(char **linep, size_t *bufsizp, FILE *iptr)
326 {
327 char *lp;
328 FILE *temptr;
329
330 temptr = maket(tempfile);
331
332 lp = *linep;
333
334 while (lp) {
335 (void) fputs(*linep, temptr);
336 linenum++;
337 lp = readline(linep, bufsizp, iptr);
338 }
339 (void) fclose(temptr);
340 }
341
342 /* Routine to write out data saved by 'saverest' and to remove the file. */
343 static void
putsave(char ** linep,size_t * bufsizp,char type)344 putsave(char **linep, size_t *bufsizp, char type)
345 {
346 FILE *temptr;
347
348 if ((temptr = fopen(tempfile, "r")) == NULL) {
349 (void) snprintf(Error, sizeof (Error),
350 "Can not open tempfile ('%s')", tempfile); fatal(Error);
351 }
352
353 while (readline(linep, bufsizp, temptr))
354 (void) printf("%c %s", type, *linep);
355
356 (void) fclose(temptr);
357
358 (void) unlink(tempfile);
359 }
360
361 static void
fixnum(char * lp)362 fixnum(char *lp)
363 {
364 offset_t num;
365
366 while (*lp) {
367 switch (*lp) {
368
369 case 'a':
370 case 'c':
371 case 'd':
372 case ',':
373 case '\n':
374 (void) printf("%c", *lp);
375 lp++;
376 break;
377
378 default:
379 lp = satoi(lp, &num);
380 num += linenum;
381 (void) printf("%lld", num);
382 }
383 }
384 }
385
386 static void
addgen(char ** lpp,size_t * bufsizp,FILE * fp)387 addgen(char **lpp, size_t *bufsizp, FILE *fp)
388 {
389 offset_t oldline;
390 (void) printf("%llda%lld", linenum, linenum+1);
391
392 /* Save lines of new file. */
393 oldline = linenum + 1;
394 saverest(lpp, bufsizp, fp);
395
396 if (oldline < linenum)
397 (void) printf(",%lld\n", linenum);
398 else
399 (void) printf("\n");
400
401 /* Output saved lines, as 'diff' would. */
402 putsave(lpp, bufsizp, '>');
403
404 exit(0);
405 }
406
407 static void
delgen(char ** lpp,size_t * bufsizp,FILE * fp)408 delgen(char **lpp, size_t *bufsizp, FILE *fp)
409 {
410 offset_t savenum;
411
412 (void) printf("%lld", linenum+1);
413 savenum = linenum;
414
415 /* Save lines of old file. */
416 saverest(lpp, bufsizp, fp);
417
418 if (savenum +1 != linenum)
419 (void) printf(",%lldd%lld\n", linenum, savenum);
420 else
421 (void) printf("d%lld\n", savenum);
422
423 /* Output saved lines, as 'diff' would. */
424 putsave(lpp, bufsizp, '<');
425
426 exit(0);
427 }
428
429 static void
clean_up()430 clean_up()
431 {
432 (void) unlink(tempfile);
433 (void) unlink(otmp);
434 (void) unlink(ntmp);
435 }
436
437 static FILE *
maket(char * file)438 maket(char *file)
439 {
440 FILE *iop;
441 int fd;
442
443 (void) strcpy(file, tempskel);
444 if ((fd = mkstemp(file)) == -1 ||
445 (iop = fdopen(fd, "w+")) == NULL) {
446 (void) snprintf(Error, sizeof (Error),
447 "Can not open/create temp file ('%s')", file);
448 fatal(Error);
449 }
450 return (iop);
451 }
452
453 static void
fatal(char * msg)454 fatal(char *msg)
455 /*
456 * General purpose error handler.
457 *
458 * The argument to fatal is a pointer to an error message string.
459 * The action of this routine is driven completely from
460 * the "fflags" global word (see <fatal.h>).
461 *
462 * The FTLMSG bit controls the writing of the error
463 * message on file descriptor 2. A newline is written
464 * after the user supplied message.
465 *
466 * If the FTLCLN bit is on, clean_up is called.
467 */
468 {
469 if (fflags & FTLMSG)
470 (void) fprintf(stderr, "%s: %s\n", prognam, msg);
471 if (fflags & FTLCLN)
472 clean_up();
473 if (fflags & FTLEXIT)
474 exit(fatal_num);
475 }
476
477 static void
setsig()478 setsig()
479 /*
480 * General-purpose signal setting routine.
481 * All non-ignored, non-caught signals are caught.
482 * If a signal other than hangup, interrupt, or quit is caught,
483 * a "user-oriented" message is printed on file descriptor 2.
484 * If hangup, interrupt or quit is caught, that signal
485 * is set to ignore.
486 * Termination is like that of "fatal",
487 * via "clean_up()"
488 */
489 {
490 void (*act)(int);
491 int j;
492
493 for (j = 1; j < ONSIG; j++) {
494 act = signal(j, setsig1);
495 if (act == SIG_ERR)
496 continue;
497 if (act == SIG_DFL)
498 continue;
499 (void) signal(j, act);
500 }
501 }
502
503 static void
setsig1(int sig)504 setsig1(int sig)
505 {
506
507 (void) signal(sig, SIG_IGN);
508 clean_up();
509 exit(1);
510 }
511
512 static char *
satoi(char * p,offset_t * ip)513 satoi(char *p, offset_t *ip)
514 {
515 offset_t sum;
516
517 sum = 0;
518 while (isdigit(*p))
519 sum = sum * 10 + (*p++ - '0');
520 *ip = sum;
521 return (p);
522 }
523
524 /*
525 * Read a line of data from a file. If the current buffer is not large enough
526 * to contain the line, double the size of the buffer and continue reading.
527 * Loop until either the entire line is read or until there is no more space
528 * to be malloc'd.
529 */
530
531 static char *
readline(char ** bufferp,size_t * bufsizp,FILE * filep)532 readline(char **bufferp, size_t *bufsizp, FILE *filep)
533 {
534 char *bufp;
535 size_t newsize; /* number of bytes to make buffer */
536 size_t oldsize;
537
538 (*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
539 (*bufferp)[*bufsizp - 2] = ' '; /* arbitrary non-newline char */
540 bufp = fgets(*bufferp, *bufsizp, filep);
541 if (bufp == NULL)
542 return (bufp);
543 while ((*bufferp)[*bufsizp -1] == '\0' &&
544 (*bufferp)[*bufsizp - 2] != '\n' &&
545 strlen(*bufferp) == *bufsizp - 1) {
546 newsize = 2 * (*bufsizp);
547 bufp = (char *)realloc((void *)*bufferp, newsize);
548 if (bufp == NULL)
549 fatal("Out of memory");
550 oldsize = *bufsizp;
551 *bufsizp = newsize;
552 *bufferp = bufp;
553 (*bufferp)[*bufsizp - 1] = '\t';
554 (*bufferp)[*bufsizp - 2] = ' ';
555 bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
556 if (bufp == NULL) {
557 if (filep->_flag & _IOEOF) {
558 bufp = *bufferp;
559 break;
560 } else
561 fatal("Read error");
562 } else
563 bufp = *bufferp;
564 }
565 return (bufp);
566 }
567