1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <fatal.h> 34 #include <signal.h> 35 #include <sys/types.h> 36 #include <unistd.h> 37 #include <stdio.h> 38 #include <ctype.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <wait.h> 42 43 #define ONSIG 16 44 45 /* 46 * This program segments two files into pieces of <= seglim lines 47 * (which is passed as a third argument or defaulted to some number) 48 * and then executes diff upon the pieces. The output of 49 * 'diff' is then processed to make it look as if 'diff' had 50 * processed the files whole. The reason for all this is that seglim 51 * is a reasonable upper limit on the size of files that diff can 52 * process. 53 * NOTE -- by segmenting the files in this manner, it cannot be 54 * guaranteed that the 'diffing' of the segments will generate 55 * a minimal set of differences. 56 * This process is most definitely not equivalent to 'diffing' 57 * the files whole, assuming 'diff' could handle such large files. 58 * 59 * 'diff' is executed by a child process, generated by forking, 60 * and communicates with this program through pipes. 61 */ 62 63 static char Error[128]; 64 65 static int seglim; /* limit of size of file segment to be generated */ 66 67 static char diff[] = "/usr/bin/diff"; 68 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */ 69 static char tempfile[32]; 70 static char otmp[32], ntmp[32]; 71 static int fflags; 72 static int fatal_num = 1; /* exit number for fatal exit */ 73 static offset_t linenum; 74 static size_t obufsiz, nbufsiz, dbufsiz; 75 static char *readline(char **, size_t *, FILE *); 76 static void addgen(char **, size_t *, FILE *); 77 static void delgen(char **, size_t *, FILE *); 78 static void fixnum(char *); 79 static void fatal(char *); 80 static void setsig(void); 81 static void setsig1(int); 82 static char *satoi(char *, offset_t *); 83 static FILE *maket(char *); 84 85 static char *prognam; 86 87 int 88 main(int argc, char *argv[]) 89 { 90 FILE *poldfile, *pnewfile; 91 char *oline, *nline, *diffline; 92 char *olp, *nlp, *dp; 93 int otcnt, ntcnt; 94 pid_t i; 95 int pfd[2]; 96 FILE *poldtemp, *pnewtemp, *pipeinp; 97 int status; 98 99 prognam = argv[0]; 100 /* 101 * Set flags for 'fatal' so that it will clean up, 102 * produce a message, and terminate. 103 */ 104 fflags = FTLMSG | FTLCLN | FTLEXIT; 105 106 setsig(); 107 108 if (argc < 3 || argc > 5) 109 fatal("arg count"); 110 111 if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0) 112 fatal("both files standard input"); 113 if (strcmp(argv[1], "-") == 0) 114 poldfile = stdin; 115 else 116 if ((poldfile = fopen(argv[1], "r")) == NULL) { 117 (void) snprintf(Error, sizeof (Error), 118 "Can not open '%s'", argv[1]); 119 fatal(Error); 120 } 121 if (strcmp(argv[2], "-") == 0) 122 pnewfile = stdin; 123 else 124 if ((pnewfile = fopen(argv[2], "r")) == NULL) { 125 (void) snprintf(Error, sizeof (Error), 126 "Can not open '%s'", argv[2]); 127 fatal(Error); 128 } 129 130 seglim = 3500; 131 132 if (argc > 3) { 133 if (argv[3][0] == '-' && argv[3][1] == 's') 134 fflags &= ~FTLMSG; 135 else { 136 if ((seglim = atoi(argv[3])) == 0) 137 fatal("non-numeric limit"); 138 if (argc == 5 && argv[4][0] == '-' && 139 argv[4][1] == 's') 140 fflags &= ~FTLMSG; 141 } 142 } 143 144 linenum = 0; 145 146 /* Allocate the buffers and initialize their lengths */ 147 148 obufsiz = BUFSIZ; 149 nbufsiz = BUFSIZ; 150 dbufsiz = BUFSIZ; 151 152 if ((oline = (char *)malloc(obufsiz)) == NULL || 153 (nline = (char *)malloc(nbufsiz)) == NULL || 154 (diffline = (char *)malloc(dbufsiz)) == NULL) 155 fatal("Out of memory"); 156 157 /* 158 * The following while-loop will prevent any lines 159 * common to the beginning of both files from being 160 * sent to 'diff'. Since the running time of 'diff' is 161 * non-linear, this will help improve performance. 162 * If, during this process, both files reach EOF, then 163 * the files are equal and the program will terminate. 164 * If either file reaches EOF before the other, the 165 * program will generate the appropriate 'diff' output 166 * itself, since this can be easily determined and will 167 * avoid executing 'diff' completely. 168 */ 169 for (;;) { 170 olp = readline(&oline, &obufsiz, poldfile); 171 nlp = readline(&nline, &nbufsiz, pnewfile); 172 173 if (!olp && !nlp) /* EOF found on both: files equal */ 174 return (0); 175 176 if (!olp) { 177 /* 178 * The entire old file is a prefix of the 179 * new file. Generate the appropriate "append" 180 * 'diff'-like output, which is of the form: 181 * nan, n 182 * where 'n' represents a line-number. 183 */ 184 addgen(&nline, &nbufsiz, pnewfile); 185 } 186 187 if (!nlp) { 188 /* 189 * The entire new file is a prefix of the 190 * old file. Generate the appropriate "delete" 191 * 'diff'-like output, which is of the form: 192 * n, ndn 193 * where 'n' represents a line-number. 194 */ 195 delgen(&oline, &obufsiz, poldfile); 196 } 197 198 if (strcmp(olp, nlp) == 0) 199 linenum++; 200 else 201 break; 202 } 203 204 /* 205 * Here, first 'linenum' lines are equal. 206 * The following while-loop segments both files into 207 * seglim segments, forks and executes 'diff' on the 208 * segments, and processes the resulting output of 209 * 'diff', which is read from a pipe. 210 */ 211 for (;;) { 212 /* If both files are at EOF, everything is done. */ 213 if (!olp && !nlp) /* finished */ 214 return (0); 215 216 if (!olp) { 217 /* 218 * Generate appropriate "append" 219 * output without executing 'diff'. 220 */ 221 addgen(&nline, &nbufsiz, pnewfile); 222 } 223 224 if (!nlp) { 225 /* 226 * Generate appropriate "delete" 227 * output without executing 'diff'. 228 */ 229 delgen(&oline, &obufsiz, poldfile); 230 } 231 232 /* 233 * Create a temporary file to hold a segment 234 * from the old file, and write it. 235 */ 236 poldtemp = maket(otmp); 237 otcnt = 0; 238 while (olp && otcnt < seglim) { 239 (void) fputs(oline, poldtemp); 240 if (ferror(poldtemp) != 0) { 241 fflags |= FTLMSG; 242 fatal("Can not write to temporary file"); 243 } 244 olp = readline(&oline, &obufsiz, poldfile); 245 otcnt++; 246 } 247 (void) fclose(poldtemp); 248 249 /* 250 * Create a temporary file to hold a segment 251 * from the new file, and write it. 252 */ 253 pnewtemp = maket(ntmp); 254 ntcnt = 0; 255 while (nlp && ntcnt < seglim) { 256 (void) fputs(nline, pnewtemp); 257 if (ferror(pnewtemp) != 0) { 258 fflags |= FTLMSG; 259 fatal("Can not write to temporary file"); 260 } 261 nlp = readline(&nline, &nbufsiz, pnewfile); 262 ntcnt++; 263 } 264 (void) fclose(pnewtemp); 265 266 /* Create pipes and fork. */ 267 if ((pipe(pfd)) == -1) 268 fatal("Can not create pipe"); 269 if ((i = fork()) < (pid_t)0) { 270 (void) close(pfd[0]); 271 (void) close(pfd[1]); 272 fatal("Can not fork, try again"); 273 } else if (i == (pid_t)0) { /* child process */ 274 (void) close(pfd[0]); 275 (void) close(1); 276 (void) dup(pfd[1]); 277 (void) close(pfd[1]); 278 279 /* Execute 'diff' on the segment files. */ 280 (void) execlp(diff, diff, otmp, ntmp, 0); 281 282 /* 283 * Exit code here must be > 1. 284 * Parent process treats exit code of 1 from the child 285 * as non-error because the child process "diff" exits 286 * with a status of 1 when a difference is encountered. 287 * The error here is a true error--the parent process 288 * needs to detect it and exit with a non-zero status. 289 */ 290 (void) close(1); 291 (void) snprintf(Error, sizeof (Error), 292 "Can not execute '%s'", diff); 293 fatal_num = 2; 294 fatal(Error); 295 } else { /* parent process */ 296 (void) close(pfd[1]); 297 pipeinp = fdopen(pfd[0], "r"); 298 299 /* Process 'diff' output. */ 300 while ((dp = readline(&diffline, &dbufsiz, pipeinp))) { 301 if (isdigit(*dp)) 302 fixnum(diffline); 303 else 304 (void) printf("%s", diffline); 305 } 306 307 (void) fclose(pipeinp); 308 309 /* EOF on pipe. */ 310 (void) wait(&status); 311 if (status&~0x100) { 312 (void) snprintf(Error, sizeof (Error), 313 "'%s' failed", diff); 314 fatal(Error); 315 } 316 } 317 linenum += seglim; 318 319 /* Remove temporary files. */ 320 (void) unlink(otmp); 321 (void) unlink(ntmp); 322 } 323 } 324 325 /* Routine to save remainder of a file. */ 326 static void 327 saverest(char **linep, size_t *bufsizp, FILE *iptr) 328 { 329 char *lp; 330 FILE *temptr; 331 332 temptr = maket(tempfile); 333 334 lp = *linep; 335 336 while (lp) { 337 (void) fputs(*linep, temptr); 338 linenum++; 339 lp = readline(linep, bufsizp, iptr); 340 } 341 (void) fclose(temptr); 342 } 343 344 /* Routine to write out data saved by 'saverest' and to remove the file. */ 345 static void 346 putsave(char **linep, size_t *bufsizp, char type) 347 { 348 FILE *temptr; 349 350 if ((temptr = fopen(tempfile, "r")) == NULL) { 351 (void) snprintf(Error, sizeof (Error), 352 "Can not open tempfile ('%s')", tempfile); fatal(Error); 353 } 354 355 while (readline(linep, bufsizp, temptr)) 356 (void) printf("%c %s", type, *linep); 357 358 (void) fclose(temptr); 359 360 (void) unlink(tempfile); 361 } 362 363 static void 364 fixnum(char *lp) 365 { 366 offset_t num; 367 368 while (*lp) { 369 switch (*lp) { 370 371 case 'a': 372 case 'c': 373 case 'd': 374 case ',': 375 case '\n': 376 (void) printf("%c", *lp); 377 lp++; 378 break; 379 380 default: 381 lp = satoi(lp, &num); 382 num += linenum; 383 (void) printf("%lld", num); 384 } 385 } 386 } 387 388 static void 389 addgen(char **lpp, size_t *bufsizp, FILE *fp) 390 { 391 offset_t oldline; 392 (void) printf("%llda%lld", linenum, linenum+1); 393 394 /* Save lines of new file. */ 395 oldline = linenum + 1; 396 saverest(lpp, bufsizp, fp); 397 398 if (oldline < linenum) 399 (void) printf(",%lld\n", linenum); 400 else 401 (void) printf("\n"); 402 403 /* Output saved lines, as 'diff' would. */ 404 putsave(lpp, bufsizp, '>'); 405 406 exit(0); 407 } 408 409 static void 410 delgen(char **lpp, size_t *bufsizp, FILE *fp) 411 { 412 offset_t savenum; 413 414 (void) printf("%lld", linenum+1); 415 savenum = linenum; 416 417 /* Save lines of old file. */ 418 saverest(lpp, bufsizp, fp); 419 420 if (savenum +1 != linenum) 421 (void) printf(",%lldd%lld\n", linenum, savenum); 422 else 423 (void) printf("d%lld\n", savenum); 424 425 /* Output saved lines, as 'diff' would. */ 426 putsave(lpp, bufsizp, '<'); 427 428 exit(0); 429 } 430 431 static void 432 clean_up() 433 { 434 (void) unlink(tempfile); 435 (void) unlink(otmp); 436 (void) unlink(ntmp); 437 } 438 439 static FILE * 440 maket(char *file) 441 { 442 FILE *iop; 443 int fd; 444 445 (void) strcpy(file, tempskel); 446 if ((fd = mkstemp(file)) == -1 || 447 (iop = fdopen(fd, "w+")) == NULL) { 448 (void) snprintf(Error, sizeof (Error), 449 "Can not open/create temp file ('%s')", file); 450 fatal(Error); 451 } 452 return (iop); 453 } 454 455 static void 456 fatal(char *msg) 457 /* 458 * General purpose error handler. 459 * 460 * The argument to fatal is a pointer to an error message string. 461 * The action of this routine is driven completely from 462 * the "fflags" global word (see <fatal.h>). 463 * 464 * The FTLMSG bit controls the writing of the error 465 * message on file descriptor 2. A newline is written 466 * after the user supplied message. 467 * 468 * If the FTLCLN bit is on, clean_up is called. 469 */ 470 { 471 if (fflags & FTLMSG) 472 (void) fprintf(stderr, "%s: %s\n", prognam, msg); 473 if (fflags & FTLCLN) 474 clean_up(); 475 if (fflags & FTLEXIT) 476 exit(fatal_num); 477 } 478 479 static void 480 setsig() 481 /* 482 * General-purpose signal setting routine. 483 * All non-ignored, non-caught signals are caught. 484 * If a signal other than hangup, interrupt, or quit is caught, 485 * a "user-oriented" message is printed on file descriptor 2. 486 * If hangup, interrupt or quit is caught, that signal 487 * is set to ignore. 488 * Termination is like that of "fatal", 489 * via "clean_up()" 490 */ 491 { 492 void (*act)(int); 493 int j; 494 495 for (j = 1; j < ONSIG; j++) { 496 act = signal(j, setsig1); 497 if (act == SIG_ERR) 498 continue; 499 if (act == SIG_DFL) 500 continue; 501 (void) signal(j, act); 502 } 503 } 504 505 static void 506 setsig1(int sig) 507 { 508 509 (void) signal(sig, SIG_IGN); 510 clean_up(); 511 exit(1); 512 } 513 514 static char * 515 satoi(char *p, offset_t *ip) 516 { 517 offset_t sum; 518 519 sum = 0; 520 while (isdigit(*p)) 521 sum = sum * 10 + (*p++ - '0'); 522 *ip = sum; 523 return (p); 524 } 525 526 /* 527 * Read a line of data from a file. If the current buffer is not large enough 528 * to contain the line, double the size of the buffer and continue reading. 529 * Loop until either the entire line is read or until there is no more space 530 * to be malloc'd. 531 */ 532 533 static char * 534 readline(char **bufferp, size_t *bufsizp, FILE *filep) 535 { 536 char *bufp; 537 size_t newsize; /* number of bytes to make buffer */ 538 size_t oldsize; 539 540 (*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */ 541 (*bufferp)[*bufsizp - 2] = ' '; /* arbitrary non-newline char */ 542 bufp = fgets(*bufferp, *bufsizp, filep); 543 if (bufp == NULL) 544 return (bufp); 545 while ((*bufferp)[*bufsizp -1] == '\0' && 546 (*bufferp)[*bufsizp - 2] != '\n' && 547 strlen(*bufferp) == *bufsizp - 1) { 548 newsize = 2 * (*bufsizp); 549 bufp = (char *)realloc((void *)*bufferp, newsize); 550 if (bufp == NULL) 551 fatal("Out of memory"); 552 oldsize = *bufsizp; 553 *bufsizp = newsize; 554 *bufferp = bufp; 555 (*bufferp)[*bufsizp - 1] = '\t'; 556 (*bufferp)[*bufsizp - 2] = ' '; 557 bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep); 558 if (bufp == NULL) { 559 if (filep->_flag & _IOEOF) { 560 bufp = *bufferp; 561 break; 562 } else 563 fatal("Read error"); 564 } else 565 bufp = *bufferp; 566 } 567 return (bufp); 568 } 569