1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
23 /* All Rights Reserved */
24
25
26 /*
27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #pragma ident "%Z%%M% %I% %E% SMI"
32
33 #include <fatal.h>
34 #include <signal.h>
35 #include <sys/types.h>
36 #include <unistd.h>
37 #include <stdio.h>
38 #include <ctype.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <wait.h>
42
43 #define ONSIG 16
44
45 /*
46 * This program segments two files into pieces of <= seglim lines
47 * (which is passed as a third argument or defaulted to some number)
48 * and then executes diff upon the pieces. The output of
49 * 'diff' is then processed to make it look as if 'diff' had
50 * processed the files whole. The reason for all this is that seglim
51 * is a reasonable upper limit on the size of files that diff can
52 * process.
53 * NOTE -- by segmenting the files in this manner, it cannot be
54 * guaranteed that the 'diffing' of the segments will generate
55 * a minimal set of differences.
56 * This process is most definitely not equivalent to 'diffing'
57 * the files whole, assuming 'diff' could handle such large files.
58 *
59 * 'diff' is executed by a child process, generated by forking,
60 * and communicates with this program through pipes.
61 */
62
63 static char Error[128];
64
65 static int seglim; /* limit of size of file segment to be generated */
66
67 static char diff[] = "/usr/bin/diff";
68 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
69 static char tempfile[32];
70 static char otmp[32], ntmp[32];
71 static int fflags;
72 static int fatal_num = 1; /* exit number for fatal exit */
73 static offset_t linenum;
74 static size_t obufsiz, nbufsiz, dbufsiz;
75 static char *readline(char **, size_t *, FILE *);
76 static void addgen(char **, size_t *, FILE *);
77 static void delgen(char **, size_t *, FILE *);
78 static void fixnum(char *);
79 static void fatal(char *);
80 static void setsig(void);
81 static void setsig1(int);
82 static char *satoi(char *, offset_t *);
83 static FILE *maket(char *);
84
85 static char *prognam;
86
87 int
main(int argc,char * argv[])88 main(int argc, char *argv[])
89 {
90 FILE *poldfile, *pnewfile;
91 char *oline, *nline, *diffline;
92 char *olp, *nlp, *dp;
93 int otcnt, ntcnt;
94 pid_t i;
95 int pfd[2];
96 FILE *poldtemp, *pnewtemp, *pipeinp;
97 int status;
98
99 prognam = argv[0];
100 /*
101 * Set flags for 'fatal' so that it will clean up,
102 * produce a message, and terminate.
103 */
104 fflags = FTLMSG | FTLCLN | FTLEXIT;
105
106 setsig();
107
108 if (argc < 3 || argc > 5)
109 fatal("arg count");
110
111 if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
112 fatal("both files standard input");
113 if (strcmp(argv[1], "-") == 0)
114 poldfile = stdin;
115 else
116 if ((poldfile = fopen(argv[1], "r")) == NULL) {
117 (void) snprintf(Error, sizeof (Error),
118 "Can not open '%s'", argv[1]);
119 fatal(Error);
120 }
121 if (strcmp(argv[2], "-") == 0)
122 pnewfile = stdin;
123 else
124 if ((pnewfile = fopen(argv[2], "r")) == NULL) {
125 (void) snprintf(Error, sizeof (Error),
126 "Can not open '%s'", argv[2]);
127 fatal(Error);
128 }
129
130 seglim = 3500;
131
132 if (argc > 3) {
133 if (argv[3][0] == '-' && argv[3][1] == 's')
134 fflags &= ~FTLMSG;
135 else {
136 if ((seglim = atoi(argv[3])) == 0)
137 fatal("non-numeric limit");
138 if (argc == 5 && argv[4][0] == '-' &&
139 argv[4][1] == 's')
140 fflags &= ~FTLMSG;
141 }
142 }
143
144 linenum = 0;
145
146 /* Allocate the buffers and initialize their lengths */
147
148 obufsiz = BUFSIZ;
149 nbufsiz = BUFSIZ;
150 dbufsiz = BUFSIZ;
151
152 if ((oline = (char *)malloc(obufsiz)) == NULL ||
153 (nline = (char *)malloc(nbufsiz)) == NULL ||
154 (diffline = (char *)malloc(dbufsiz)) == NULL)
155 fatal("Out of memory");
156
157 /*
158 * The following while-loop will prevent any lines
159 * common to the beginning of both files from being
160 * sent to 'diff'. Since the running time of 'diff' is
161 * non-linear, this will help improve performance.
162 * If, during this process, both files reach EOF, then
163 * the files are equal and the program will terminate.
164 * If either file reaches EOF before the other, the
165 * program will generate the appropriate 'diff' output
166 * itself, since this can be easily determined and will
167 * avoid executing 'diff' completely.
168 */
169 for (;;) {
170 olp = readline(&oline, &obufsiz, poldfile);
171 nlp = readline(&nline, &nbufsiz, pnewfile);
172
173 if (!olp && !nlp) /* EOF found on both: files equal */
174 return (0);
175
176 if (!olp) {
177 /*
178 * The entire old file is a prefix of the
179 * new file. Generate the appropriate "append"
180 * 'diff'-like output, which is of the form:
181 * nan, n
182 * where 'n' represents a line-number.
183 */
184 addgen(&nline, &nbufsiz, pnewfile);
185 }
186
187 if (!nlp) {
188 /*
189 * The entire new file is a prefix of the
190 * old file. Generate the appropriate "delete"
191 * 'diff'-like output, which is of the form:
192 * n, ndn
193 * where 'n' represents a line-number.
194 */
195 delgen(&oline, &obufsiz, poldfile);
196 }
197
198 if (strcmp(olp, nlp) == 0)
199 linenum++;
200 else
201 break;
202 }
203
204 /*
205 * Here, first 'linenum' lines are equal.
206 * The following while-loop segments both files into
207 * seglim segments, forks and executes 'diff' on the
208 * segments, and processes the resulting output of
209 * 'diff', which is read from a pipe.
210 */
211 for (;;) {
212 /* If both files are at EOF, everything is done. */
213 if (!olp && !nlp) /* finished */
214 return (0);
215
216 if (!olp) {
217 /*
218 * Generate appropriate "append"
219 * output without executing 'diff'.
220 */
221 addgen(&nline, &nbufsiz, pnewfile);
222 }
223
224 if (!nlp) {
225 /*
226 * Generate appropriate "delete"
227 * output without executing 'diff'.
228 */
229 delgen(&oline, &obufsiz, poldfile);
230 }
231
232 /*
233 * Create a temporary file to hold a segment
234 * from the old file, and write it.
235 */
236 poldtemp = maket(otmp);
237 otcnt = 0;
238 while (olp && otcnt < seglim) {
239 (void) fputs(oline, poldtemp);
240 if (ferror(poldtemp) != 0) {
241 fflags |= FTLMSG;
242 fatal("Can not write to temporary file");
243 }
244 olp = readline(&oline, &obufsiz, poldfile);
245 otcnt++;
246 }
247 (void) fclose(poldtemp);
248
249 /*
250 * Create a temporary file to hold a segment
251 * from the new file, and write it.
252 */
253 pnewtemp = maket(ntmp);
254 ntcnt = 0;
255 while (nlp && ntcnt < seglim) {
256 (void) fputs(nline, pnewtemp);
257 if (ferror(pnewtemp) != 0) {
258 fflags |= FTLMSG;
259 fatal("Can not write to temporary file");
260 }
261 nlp = readline(&nline, &nbufsiz, pnewfile);
262 ntcnt++;
263 }
264 (void) fclose(pnewtemp);
265
266 /* Create pipes and fork. */
267 if ((pipe(pfd)) == -1)
268 fatal("Can not create pipe");
269 if ((i = fork()) < (pid_t)0) {
270 (void) close(pfd[0]);
271 (void) close(pfd[1]);
272 fatal("Can not fork, try again");
273 } else if (i == (pid_t)0) { /* child process */
274 (void) close(pfd[0]);
275 (void) close(1);
276 (void) dup(pfd[1]);
277 (void) close(pfd[1]);
278
279 /* Execute 'diff' on the segment files. */
280 (void) execlp(diff, diff, otmp, ntmp, 0);
281
282 /*
283 * Exit code here must be > 1.
284 * Parent process treats exit code of 1 from the child
285 * as non-error because the child process "diff" exits
286 * with a status of 1 when a difference is encountered.
287 * The error here is a true error--the parent process
288 * needs to detect it and exit with a non-zero status.
289 */
290 (void) close(1);
291 (void) snprintf(Error, sizeof (Error),
292 "Can not execute '%s'", diff);
293 fatal_num = 2;
294 fatal(Error);
295 } else { /* parent process */
296 (void) close(pfd[1]);
297 pipeinp = fdopen(pfd[0], "r");
298
299 /* Process 'diff' output. */
300 while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
301 if (isdigit(*dp))
302 fixnum(diffline);
303 else
304 (void) printf("%s", diffline);
305 }
306
307 (void) fclose(pipeinp);
308
309 /* EOF on pipe. */
310 (void) wait(&status);
311 if (status&~0x100) {
312 (void) snprintf(Error, sizeof (Error),
313 "'%s' failed", diff);
314 fatal(Error);
315 }
316 }
317 linenum += seglim;
318
319 /* Remove temporary files. */
320 (void) unlink(otmp);
321 (void) unlink(ntmp);
322 }
323 }
324
325 /* Routine to save remainder of a file. */
326 static void
saverest(char ** linep,size_t * bufsizp,FILE * iptr)327 saverest(char **linep, size_t *bufsizp, FILE *iptr)
328 {
329 char *lp;
330 FILE *temptr;
331
332 temptr = maket(tempfile);
333
334 lp = *linep;
335
336 while (lp) {
337 (void) fputs(*linep, temptr);
338 linenum++;
339 lp = readline(linep, bufsizp, iptr);
340 }
341 (void) fclose(temptr);
342 }
343
344 /* Routine to write out data saved by 'saverest' and to remove the file. */
345 static void
putsave(char ** linep,size_t * bufsizp,char type)346 putsave(char **linep, size_t *bufsizp, char type)
347 {
348 FILE *temptr;
349
350 if ((temptr = fopen(tempfile, "r")) == NULL) {
351 (void) snprintf(Error, sizeof (Error),
352 "Can not open tempfile ('%s')", tempfile); fatal(Error);
353 }
354
355 while (readline(linep, bufsizp, temptr))
356 (void) printf("%c %s", type, *linep);
357
358 (void) fclose(temptr);
359
360 (void) unlink(tempfile);
361 }
362
363 static void
fixnum(char * lp)364 fixnum(char *lp)
365 {
366 offset_t num;
367
368 while (*lp) {
369 switch (*lp) {
370
371 case 'a':
372 case 'c':
373 case 'd':
374 case ',':
375 case '\n':
376 (void) printf("%c", *lp);
377 lp++;
378 break;
379
380 default:
381 lp = satoi(lp, &num);
382 num += linenum;
383 (void) printf("%lld", num);
384 }
385 }
386 }
387
388 static void
addgen(char ** lpp,size_t * bufsizp,FILE * fp)389 addgen(char **lpp, size_t *bufsizp, FILE *fp)
390 {
391 offset_t oldline;
392 (void) printf("%llda%lld", linenum, linenum+1);
393
394 /* Save lines of new file. */
395 oldline = linenum + 1;
396 saverest(lpp, bufsizp, fp);
397
398 if (oldline < linenum)
399 (void) printf(",%lld\n", linenum);
400 else
401 (void) printf("\n");
402
403 /* Output saved lines, as 'diff' would. */
404 putsave(lpp, bufsizp, '>');
405
406 exit(0);
407 }
408
409 static void
delgen(char ** lpp,size_t * bufsizp,FILE * fp)410 delgen(char **lpp, size_t *bufsizp, FILE *fp)
411 {
412 offset_t savenum;
413
414 (void) printf("%lld", linenum+1);
415 savenum = linenum;
416
417 /* Save lines of old file. */
418 saverest(lpp, bufsizp, fp);
419
420 if (savenum +1 != linenum)
421 (void) printf(",%lldd%lld\n", linenum, savenum);
422 else
423 (void) printf("d%lld\n", savenum);
424
425 /* Output saved lines, as 'diff' would. */
426 putsave(lpp, bufsizp, '<');
427
428 exit(0);
429 }
430
431 static void
clean_up()432 clean_up()
433 {
434 (void) unlink(tempfile);
435 (void) unlink(otmp);
436 (void) unlink(ntmp);
437 }
438
439 static FILE *
maket(char * file)440 maket(char *file)
441 {
442 FILE *iop;
443 int fd;
444
445 (void) strcpy(file, tempskel);
446 if ((fd = mkstemp(file)) == -1 ||
447 (iop = fdopen(fd, "w+")) == NULL) {
448 (void) snprintf(Error, sizeof (Error),
449 "Can not open/create temp file ('%s')", file);
450 fatal(Error);
451 }
452 return (iop);
453 }
454
455 static void
fatal(char * msg)456 fatal(char *msg)
457 /*
458 * General purpose error handler.
459 *
460 * The argument to fatal is a pointer to an error message string.
461 * The action of this routine is driven completely from
462 * the "fflags" global word (see <fatal.h>).
463 *
464 * The FTLMSG bit controls the writing of the error
465 * message on file descriptor 2. A newline is written
466 * after the user supplied message.
467 *
468 * If the FTLCLN bit is on, clean_up is called.
469 */
470 {
471 if (fflags & FTLMSG)
472 (void) fprintf(stderr, "%s: %s\n", prognam, msg);
473 if (fflags & FTLCLN)
474 clean_up();
475 if (fflags & FTLEXIT)
476 exit(fatal_num);
477 }
478
479 static void
setsig()480 setsig()
481 /*
482 * General-purpose signal setting routine.
483 * All non-ignored, non-caught signals are caught.
484 * If a signal other than hangup, interrupt, or quit is caught,
485 * a "user-oriented" message is printed on file descriptor 2.
486 * If hangup, interrupt or quit is caught, that signal
487 * is set to ignore.
488 * Termination is like that of "fatal",
489 * via "clean_up()"
490 */
491 {
492 void (*act)(int);
493 int j;
494
495 for (j = 1; j < ONSIG; j++) {
496 act = signal(j, setsig1);
497 if (act == SIG_ERR)
498 continue;
499 if (act == SIG_DFL)
500 continue;
501 (void) signal(j, act);
502 }
503 }
504
505 static void
setsig1(int sig)506 setsig1(int sig)
507 {
508
509 (void) signal(sig, SIG_IGN);
510 clean_up();
511 exit(1);
512 }
513
514 static char *
satoi(char * p,offset_t * ip)515 satoi(char *p, offset_t *ip)
516 {
517 offset_t sum;
518
519 sum = 0;
520 while (isdigit(*p))
521 sum = sum * 10 + (*p++ - '0');
522 *ip = sum;
523 return (p);
524 }
525
526 /*
527 * Read a line of data from a file. If the current buffer is not large enough
528 * to contain the line, double the size of the buffer and continue reading.
529 * Loop until either the entire line is read or until there is no more space
530 * to be malloc'd.
531 */
532
533 static char *
readline(char ** bufferp,size_t * bufsizp,FILE * filep)534 readline(char **bufferp, size_t *bufsizp, FILE *filep)
535 {
536 char *bufp;
537 size_t newsize; /* number of bytes to make buffer */
538 size_t oldsize;
539
540 (*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
541 (*bufferp)[*bufsizp - 2] = ' '; /* arbitrary non-newline char */
542 bufp = fgets(*bufferp, *bufsizp, filep);
543 if (bufp == NULL)
544 return (bufp);
545 while ((*bufferp)[*bufsizp -1] == '\0' &&
546 (*bufferp)[*bufsizp - 2] != '\n' &&
547 strlen(*bufferp) == *bufsizp - 1) {
548 newsize = 2 * (*bufsizp);
549 bufp = (char *)realloc((void *)*bufferp, newsize);
550 if (bufp == NULL)
551 fatal("Out of memory");
552 oldsize = *bufsizp;
553 *bufsizp = newsize;
554 *bufferp = bufp;
555 (*bufferp)[*bufsizp - 1] = '\t';
556 (*bufferp)[*bufsizp - 2] = ' ';
557 bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
558 if (bufp == NULL) {
559 if (filep->_flag & _IOEOF) {
560 bufp = *bufferp;
561 break;
562 } else
563 fatal("Read error");
564 } else
565 bufp = *bufferp;
566 }
567 return (bufp);
568 }
569