1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #include <fatal.h> 32 #include <signal.h> 33 #include <sys/types.h> 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <ctype.h> 37 #include <string.h> 38 #include <stdlib.h> 39 #include <wait.h> 40 41 #define ONSIG 16 42 43 /* 44 * This program segments two files into pieces of <= seglim lines 45 * (which is passed as a third argument or defaulted to some number) 46 * and then executes diff upon the pieces. The output of 47 * 'diff' is then processed to make it look as if 'diff' had 48 * processed the files whole. The reason for all this is that seglim 49 * is a reasonable upper limit on the size of files that diff can 50 * process. 51 * NOTE -- by segmenting the files in this manner, it cannot be 52 * guaranteed that the 'diffing' of the segments will generate 53 * a minimal set of differences. 54 * This process is most definitely not equivalent to 'diffing' 55 * the files whole, assuming 'diff' could handle such large files. 56 * 57 * 'diff' is executed by a child process, generated by forking, 58 * and communicates with this program through pipes. 59 */ 60 61 static char Error[128]; 62 63 static int seglim; /* limit of size of file segment to be generated */ 64 65 static char diff[] = "/usr/bin/diff"; 66 static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */ 67 static char tempfile[32]; 68 static char otmp[32], ntmp[32]; 69 static int fflags; 70 static int fatal_num = 1; /* exit number for fatal exit */ 71 static offset_t linenum; 72 static size_t obufsiz, nbufsiz, dbufsiz; 73 static char *readline(char **, size_t *, FILE *); 74 static void addgen(char **, size_t *, FILE *); 75 static void delgen(char **, size_t *, FILE *); 76 static void fixnum(char *); 77 static void fatal(char *); 78 static void setsig(void); 79 static void setsig1(int); 80 static char *satoi(char *, offset_t *); 81 static FILE *maket(char *); 82 83 static char *prognam; 84 85 int 86 main(int argc, char *argv[]) 87 { 88 FILE *poldfile, *pnewfile; 89 char *oline, *nline, *diffline; 90 char *olp, *nlp, *dp; 91 int otcnt, ntcnt; 92 pid_t i; 93 int pfd[2]; 94 FILE *poldtemp, *pnewtemp, *pipeinp; 95 int status; 96 97 prognam = argv[0]; 98 /* 99 * Set flags for 'fatal' so that it will clean up, 100 * produce a message, and terminate. 101 */ 102 fflags = FTLMSG | FTLCLN | FTLEXIT; 103 104 setsig(); 105 106 if (argc < 3 || argc > 5) 107 fatal("arg count"); 108 109 if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0) 110 fatal("both files standard input"); 111 if (strcmp(argv[1], "-") == 0) 112 poldfile = stdin; 113 else 114 if ((poldfile = fopen(argv[1], "r")) == NULL) { 115 (void) snprintf(Error, sizeof (Error), 116 "Can not open '%s'", argv[1]); 117 fatal(Error); 118 } 119 if (strcmp(argv[2], "-") == 0) 120 pnewfile = stdin; 121 else 122 if ((pnewfile = fopen(argv[2], "r")) == NULL) { 123 (void) snprintf(Error, sizeof (Error), 124 "Can not open '%s'", argv[2]); 125 fatal(Error); 126 } 127 128 seglim = 3500; 129 130 if (argc > 3) { 131 if (argv[3][0] == '-' && argv[3][1] == 's') 132 fflags &= ~FTLMSG; 133 else { 134 if ((seglim = atoi(argv[3])) == 0) 135 fatal("non-numeric limit"); 136 if (argc == 5 && argv[4][0] == '-' && 137 argv[4][1] == 's') 138 fflags &= ~FTLMSG; 139 } 140 } 141 142 linenum = 0; 143 144 /* Allocate the buffers and initialize their lengths */ 145 146 obufsiz = BUFSIZ; 147 nbufsiz = BUFSIZ; 148 dbufsiz = BUFSIZ; 149 150 if ((oline = (char *)malloc(obufsiz)) == NULL || 151 (nline = (char *)malloc(nbufsiz)) == NULL || 152 (diffline = (char *)malloc(dbufsiz)) == NULL) 153 fatal("Out of memory"); 154 155 /* 156 * The following while-loop will prevent any lines 157 * common to the beginning of both files from being 158 * sent to 'diff'. Since the running time of 'diff' is 159 * non-linear, this will help improve performance. 160 * If, during this process, both files reach EOF, then 161 * the files are equal and the program will terminate. 162 * If either file reaches EOF before the other, the 163 * program will generate the appropriate 'diff' output 164 * itself, since this can be easily determined and will 165 * avoid executing 'diff' completely. 166 */ 167 for (;;) { 168 olp = readline(&oline, &obufsiz, poldfile); 169 nlp = readline(&nline, &nbufsiz, pnewfile); 170 171 if (!olp && !nlp) /* EOF found on both: files equal */ 172 return (0); 173 174 if (!olp) { 175 /* 176 * The entire old file is a prefix of the 177 * new file. Generate the appropriate "append" 178 * 'diff'-like output, which is of the form: 179 * nan, n 180 * where 'n' represents a line-number. 181 */ 182 addgen(&nline, &nbufsiz, pnewfile); 183 } 184 185 if (!nlp) { 186 /* 187 * The entire new file is a prefix of the 188 * old file. Generate the appropriate "delete" 189 * 'diff'-like output, which is of the form: 190 * n, ndn 191 * where 'n' represents a line-number. 192 */ 193 delgen(&oline, &obufsiz, poldfile); 194 } 195 196 if (strcmp(olp, nlp) == 0) 197 linenum++; 198 else 199 break; 200 } 201 202 /* 203 * Here, first 'linenum' lines are equal. 204 * The following while-loop segments both files into 205 * seglim segments, forks and executes 'diff' on the 206 * segments, and processes the resulting output of 207 * 'diff', which is read from a pipe. 208 */ 209 for (;;) { 210 /* If both files are at EOF, everything is done. */ 211 if (!olp && !nlp) /* finished */ 212 return (0); 213 214 if (!olp) { 215 /* 216 * Generate appropriate "append" 217 * output without executing 'diff'. 218 */ 219 addgen(&nline, &nbufsiz, pnewfile); 220 } 221 222 if (!nlp) { 223 /* 224 * Generate appropriate "delete" 225 * output without executing 'diff'. 226 */ 227 delgen(&oline, &obufsiz, poldfile); 228 } 229 230 /* 231 * Create a temporary file to hold a segment 232 * from the old file, and write it. 233 */ 234 poldtemp = maket(otmp); 235 otcnt = 0; 236 while (olp && otcnt < seglim) { 237 (void) fputs(oline, poldtemp); 238 if (ferror(poldtemp) != 0) { 239 fflags |= FTLMSG; 240 fatal("Can not write to temporary file"); 241 } 242 olp = readline(&oline, &obufsiz, poldfile); 243 otcnt++; 244 } 245 (void) fclose(poldtemp); 246 247 /* 248 * Create a temporary file to hold a segment 249 * from the new file, and write it. 250 */ 251 pnewtemp = maket(ntmp); 252 ntcnt = 0; 253 while (nlp && ntcnt < seglim) { 254 (void) fputs(nline, pnewtemp); 255 if (ferror(pnewtemp) != 0) { 256 fflags |= FTLMSG; 257 fatal("Can not write to temporary file"); 258 } 259 nlp = readline(&nline, &nbufsiz, pnewfile); 260 ntcnt++; 261 } 262 (void) fclose(pnewtemp); 263 264 /* Create pipes and fork. */ 265 if ((pipe(pfd)) == -1) 266 fatal("Can not create pipe"); 267 if ((i = fork()) < (pid_t)0) { 268 (void) close(pfd[0]); 269 (void) close(pfd[1]); 270 fatal("Can not fork, try again"); 271 } else if (i == (pid_t)0) { /* child process */ 272 (void) close(pfd[0]); 273 (void) close(1); 274 (void) dup(pfd[1]); 275 (void) close(pfd[1]); 276 277 /* Execute 'diff' on the segment files. */ 278 (void) execlp(diff, diff, otmp, ntmp, 0); 279 280 /* 281 * Exit code here must be > 1. 282 * Parent process treats exit code of 1 from the child 283 * as non-error because the child process "diff" exits 284 * with a status of 1 when a difference is encountered. 285 * The error here is a true error--the parent process 286 * needs to detect it and exit with a non-zero status. 287 */ 288 (void) close(1); 289 (void) snprintf(Error, sizeof (Error), 290 "Can not execute '%s'", diff); 291 fatal_num = 2; 292 fatal(Error); 293 } else { /* parent process */ 294 (void) close(pfd[1]); 295 pipeinp = fdopen(pfd[0], "r"); 296 297 /* Process 'diff' output. */ 298 while ((dp = readline(&diffline, &dbufsiz, pipeinp))) { 299 if (isdigit(*dp)) 300 fixnum(diffline); 301 else 302 (void) printf("%s", diffline); 303 } 304 305 (void) fclose(pipeinp); 306 307 /* EOF on pipe. */ 308 (void) wait(&status); 309 if (status&~0x100) { 310 (void) snprintf(Error, sizeof (Error), 311 "'%s' failed", diff); 312 fatal(Error); 313 } 314 } 315 linenum += seglim; 316 317 /* Remove temporary files. */ 318 (void) unlink(otmp); 319 (void) unlink(ntmp); 320 } 321 } 322 323 /* Routine to save remainder of a file. */ 324 static void 325 saverest(char **linep, size_t *bufsizp, FILE *iptr) 326 { 327 char *lp; 328 FILE *temptr; 329 330 temptr = maket(tempfile); 331 332 lp = *linep; 333 334 while (lp) { 335 (void) fputs(*linep, temptr); 336 linenum++; 337 lp = readline(linep, bufsizp, iptr); 338 } 339 (void) fclose(temptr); 340 } 341 342 /* Routine to write out data saved by 'saverest' and to remove the file. */ 343 static void 344 putsave(char **linep, size_t *bufsizp, char type) 345 { 346 FILE *temptr; 347 348 if ((temptr = fopen(tempfile, "r")) == NULL) { 349 (void) snprintf(Error, sizeof (Error), 350 "Can not open tempfile ('%s')", tempfile); fatal(Error); 351 } 352 353 while (readline(linep, bufsizp, temptr)) 354 (void) printf("%c %s", type, *linep); 355 356 (void) fclose(temptr); 357 358 (void) unlink(tempfile); 359 } 360 361 static void 362 fixnum(char *lp) 363 { 364 offset_t num; 365 366 while (*lp) { 367 switch (*lp) { 368 369 case 'a': 370 case 'c': 371 case 'd': 372 case ',': 373 case '\n': 374 (void) printf("%c", *lp); 375 lp++; 376 break; 377 378 default: 379 lp = satoi(lp, &num); 380 num += linenum; 381 (void) printf("%lld", num); 382 } 383 } 384 } 385 386 static void 387 addgen(char **lpp, size_t *bufsizp, FILE *fp) 388 { 389 offset_t oldline; 390 (void) printf("%llda%lld", linenum, linenum+1); 391 392 /* Save lines of new file. */ 393 oldline = linenum + 1; 394 saverest(lpp, bufsizp, fp); 395 396 if (oldline < linenum) 397 (void) printf(",%lld\n", linenum); 398 else 399 (void) printf("\n"); 400 401 /* Output saved lines, as 'diff' would. */ 402 putsave(lpp, bufsizp, '>'); 403 404 exit(0); 405 } 406 407 static void 408 delgen(char **lpp, size_t *bufsizp, FILE *fp) 409 { 410 offset_t savenum; 411 412 (void) printf("%lld", linenum+1); 413 savenum = linenum; 414 415 /* Save lines of old file. */ 416 saverest(lpp, bufsizp, fp); 417 418 if (savenum +1 != linenum) 419 (void) printf(",%lldd%lld\n", linenum, savenum); 420 else 421 (void) printf("d%lld\n", savenum); 422 423 /* Output saved lines, as 'diff' would. */ 424 putsave(lpp, bufsizp, '<'); 425 426 exit(0); 427 } 428 429 static void 430 clean_up() 431 { 432 (void) unlink(tempfile); 433 (void) unlink(otmp); 434 (void) unlink(ntmp); 435 } 436 437 static FILE * 438 maket(char *file) 439 { 440 FILE *iop; 441 int fd; 442 443 (void) strcpy(file, tempskel); 444 if ((fd = mkstemp(file)) == -1 || 445 (iop = fdopen(fd, "w+")) == NULL) { 446 (void) snprintf(Error, sizeof (Error), 447 "Can not open/create temp file ('%s')", file); 448 fatal(Error); 449 } 450 return (iop); 451 } 452 453 static void 454 fatal(char *msg) 455 /* 456 * General purpose error handler. 457 * 458 * The argument to fatal is a pointer to an error message string. 459 * The action of this routine is driven completely from 460 * the "fflags" global word (see <fatal.h>). 461 * 462 * The FTLMSG bit controls the writing of the error 463 * message on file descriptor 2. A newline is written 464 * after the user supplied message. 465 * 466 * If the FTLCLN bit is on, clean_up is called. 467 */ 468 { 469 if (fflags & FTLMSG) 470 (void) fprintf(stderr, "%s: %s\n", prognam, msg); 471 if (fflags & FTLCLN) 472 clean_up(); 473 if (fflags & FTLEXIT) 474 exit(fatal_num); 475 } 476 477 static void 478 setsig() 479 /* 480 * General-purpose signal setting routine. 481 * All non-ignored, non-caught signals are caught. 482 * If a signal other than hangup, interrupt, or quit is caught, 483 * a "user-oriented" message is printed on file descriptor 2. 484 * If hangup, interrupt or quit is caught, that signal 485 * is set to ignore. 486 * Termination is like that of "fatal", 487 * via "clean_up()" 488 */ 489 { 490 void (*act)(int); 491 int j; 492 493 for (j = 1; j < ONSIG; j++) { 494 act = signal(j, setsig1); 495 if (act == SIG_ERR) 496 continue; 497 if (act == SIG_DFL) 498 continue; 499 (void) signal(j, act); 500 } 501 } 502 503 static void 504 setsig1(int sig) 505 { 506 507 (void) signal(sig, SIG_IGN); 508 clean_up(); 509 exit(1); 510 } 511 512 static char * 513 satoi(char *p, offset_t *ip) 514 { 515 offset_t sum; 516 517 sum = 0; 518 while (isdigit(*p)) 519 sum = sum * 10 + (*p++ - '0'); 520 *ip = sum; 521 return (p); 522 } 523 524 /* 525 * Read a line of data from a file. If the current buffer is not large enough 526 * to contain the line, double the size of the buffer and continue reading. 527 * Loop until either the entire line is read or until there is no more space 528 * to be malloc'd. 529 */ 530 531 static char * 532 readline(char **bufferp, size_t *bufsizp, FILE *filep) 533 { 534 char *bufp; 535 size_t newsize; /* number of bytes to make buffer */ 536 size_t oldsize; 537 538 (*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */ 539 (*bufferp)[*bufsizp - 2] = ' '; /* arbitrary non-newline char */ 540 bufp = fgets(*bufferp, *bufsizp, filep); 541 if (bufp == NULL) 542 return (bufp); 543 while ((*bufferp)[*bufsizp -1] == '\0' && 544 (*bufferp)[*bufsizp - 2] != '\n' && 545 strlen(*bufferp) == *bufsizp - 1) { 546 newsize = 2 * (*bufsizp); 547 bufp = (char *)realloc((void *)*bufferp, newsize); 548 if (bufp == NULL) 549 fatal("Out of memory"); 550 oldsize = *bufsizp; 551 *bufsizp = newsize; 552 *bufferp = bufp; 553 (*bufferp)[*bufsizp - 1] = '\t'; 554 (*bufferp)[*bufsizp - 2] = ' '; 555 bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep); 556 if (bufp == NULL) { 557 if (filep->_flag & _IOEOF) { 558 bufp = *bufferp; 559 break; 560 } else 561 fatal("Read error"); 562 } else 563 bufp = *bufferp; 564 } 565 return (bufp); 566 } 567