1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved 24 * 25 * module: 26 * anal.c 27 * 28 * purpose: 29 * routines to analyze the file trees and figure out what has changed 30 * and queue files for reconciliation. It also contains tree enumeration 31 * routines to for other purposes (pruning and link location). 32 * 33 * contents: 34 * 35 * change analysis: 36 * analyze .... (top level) analyze all files in the tree for changes 37 * summary .... print out change/reconciliation statistics for each base 38 * check_file . (static) look for changes and queue file for reconciliation 39 * check_changes (static) figure out if a particular file has changed 40 * queue_file . (static) add a file to the reconciliation list 41 * 42 * other tree enumeration functions: 43 * prune_file . (static) recursive descent and actual pruning 44 * prune ...... (top level) initiate pruning analysis for nonexistant files 45 * find_link .. look for other files to which a file may be a link 46 * link_update. propagate changed stat info to all other links 47 * same_name .. (static) figure out if two nodes describe same file 48 * 49 * misc: 50 * push_name .. maintain a running full pathname as we descend 51 * pop_name ... maintain a running full pathname as we pop back 52 * get_name ... return full pathname for the current file 53 * 54 * notes: 55 * analysis is limited to files that were evaluated in the previous 56 * pass ... since we don't have complete information about files that 57 * were not evaluated in the previous pass. 58 */ 59 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <strings.h> 63 64 #include "messages.h" 65 #include "filesync.h" 66 #include "database.h" 67 #include "debug.h" 68 69 /* 70 * routines: 71 */ 72 void push_name(const char *); 73 void pop_name(); 74 char *get_name(struct file *); 75 static errmask_t check_file(struct file *fp); 76 static diffmask_t check_changes(struct file *fp, int first, int second); 77 static int prune_file(struct file *fp); 78 static void queue_file(struct file *fp); 79 80 /* 81 * globals 82 */ 83 static struct file *changes; /* list of files to be reconciled */ 84 85 static long total_files; /* total number of files being considered */ 86 static long est_deletes; /* estimated number of files to be deleted */ 87 static long est_rmdirs; /* est rmdirs of non-empty directories */ 88 89 int inum_changes; /* LISTed directories whose I#s changed */ 90 91 /* 92 * routine: 93 * analyze 94 * 95 * purpose: 96 * top level routine for the analysis/reconciliation process 97 * 98 * parameters: 99 * none 100 * 101 * returns: 102 * error mask 103 * 104 * notes: 105 * a critical side effect of this routine is the creation of 106 * the reconciliation list, an ordered list of files that 107 * needed to be processed in the subsequent reconciliation pass 108 */ 109 errmask_t 110 analyze() 111 { struct base *bp; 112 struct file *fp; 113 int errs = 0; 114 int err; 115 int percentage; 116 bool_t aborted = FALSE; 117 char msgbuf[MAX_LINE]; 118 119 /* 120 * run through all bases and directories looking for files 121 * that have been renamed. This must be done before the 122 * difference analysis because a directory rename can introduce 123 * radical restructuring into a name-based tree. 124 */ 125 for (bp = bases; bp; bp = bp->b_next) { 126 for (fp = bp->b_files; fp; fp = fp->f_next) 127 if (fp->f_flags & F_EVALUATE) 128 errs |= find_renames(fp); 129 } 130 131 /* 132 * run through all bases and files looking for candidates 133 * note, however that we only descend into trees that have 134 * the evaluate flag turned on. As a result of new rules or 135 * restriction arguments, we may be deliberatly ignoring 136 * large amounts of the baseline. This means we won't do 137 * any stats to update the information in those nodes, and 138 * they will be written back just as they were. 139 * 140 * note that there is code to prune out baseline nodes for 141 * files that no longer exist, but that code is in reconcile 142 * and will never get a chance to run on nodes that aren't 143 * analyzed. 144 * 145 * we also want to run though all nodes with STAT errors 146 * so that we can put them on the reconciliation list. 147 */ 148 for (bp = bases; bp; bp = bp->b_next) { 149 for (fp = bp->b_files; fp; fp = fp->f_next) 150 if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) 151 errs |= check_file(fp); 152 } 153 154 /* 155 * my greatest fear is that someday, somehow, by messing with 156 * variables or baselines or who-knows-what, that someone will 157 * run a reconciliation against a large tree that doesn't correspond 158 * to the baseline, and I will infer that a bazillion files have 159 * been deleted and will propagate the slaughter before anyone 160 * can say somebody stop that maniac. 161 * 162 * in order to prevent such a possibility, we have a few different 163 * sanity checks. There is, of course, a tradeoff here between 164 * danger and irritation. The current set of heuristics for whether 165 * or not to generate a warning are (any of) 166 * 167 * at least CONFIRM_MIN files have been deleted AND 168 * CONFIRM_PCT of all files have been deleted 169 * 170 * the inode number on a LISTed directory has changed 171 * 172 * a non-empty directory has been deleted. 173 */ 174 msgbuf[0] = 0; 175 176 percentage = (est_deletes * 100) / (total_files ? total_files : 1); 177 if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT) 178 sprintf(msgbuf, gettext(WARN_deletes), est_deletes); 179 else if (inum_changes > 0) 180 sprintf(msgbuf, gettext(WARN_ichange), inum_changes); 181 else if (est_rmdirs) 182 sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs); 183 184 if (msgbuf[0]) 185 confirm(msgbuf); 186 187 /* 188 * TRICK: 189 * the change list contains both files that have changed 190 * (and probably warrant reconciliation) and files that 191 * we couldn't get up-to-date stat information on. The 192 * latter files should just be flagged as being in conflict 193 * so they can be reported in the summary. The same is 194 * true of all subsequent files if we abort reconciliation. 195 */ 196 for (fp = changes; fp; fp = fp->f_rnext) 197 if (aborted || (fp->f_flags & F_STAT_ERROR)) { 198 fp->f_flags |= F_CONFLICT; 199 /* if it isn't in the baseline yet, don't add it */ 200 if ((fp->f_flags & F_IN_BASELINE) == 0) 201 fp->f_flags |= F_REMOVE; 202 fp->f_problem = aborted ? PROB_aborted : PROB_restat; 203 (fp->f_base)->b_unresolved++; 204 errs |= ERR_UNRESOLVED; 205 if (opt_verbose) 206 fprintf(stdout, 207 gettext(aborted ? V_suppressed 208 : V_nostat), 209 fp->f_fullname); 210 } else { 211 err = reconcile(fp); 212 errs |= err; 213 if (opt_halt && (err & ERR_ABORT)) { 214 fprintf(stderr, gettext(ERR_abort_h)); 215 aborted = TRUE; 216 } 217 } 218 219 return (errs); 220 } 221 222 /* 223 * routine: 224 * prune_file 225 * 226 * purpose: 227 * to look for file entries that should be pruned from baseline 228 * prune the current file if it needs pruning, and recursively 229 * descend if it is a directory. 230 * 231 * parameters: 232 * pointer to file node 233 */ 234 static int 235 prune_file(struct file *fp) 236 { struct file *cp; 237 int prunes = 0; 238 239 /* if node hasn't been evaluated, mark it for removal */ 240 if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) { 241 fp->f_flags |= F_REMOVE; 242 prunes++; 243 if (opt_debug & DBG_ANAL) 244 fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name); 245 } 246 247 /* now check our children */ 248 for (cp = fp->f_files; cp; cp = cp->f_next) 249 prunes += prune_file(cp); 250 251 return (prunes); 252 } 253 254 /* 255 * routine: 256 * prune 257 * 258 * purpose: 259 * to prune the baseline of entries that no longer correspond to 260 * existing rules. 261 * 262 * notes: 263 * This routine just calls prune_file on the top of each base tree. 264 */ 265 int 266 prune() 267 { struct base *bp; 268 struct file *fp; 269 int prunes = 0; 270 271 for (bp = bases; bp; bp = bp->b_next) { 272 for (fp = bp->b_files; fp; fp = fp->f_next) 273 prunes += prune_file(fp); 274 275 if ((bp->b_flags & F_EVALUATE) == 0) 276 bp->b_flags |= F_REMOVE; 277 } 278 279 return (prunes); 280 } 281 282 /* 283 * routine: 284 * summary 285 * 286 * purpose: 287 * to print out statics and conflict lists 288 */ 289 void 290 summary() 291 { struct base *bp; 292 struct file *fp; 293 extern bool_t need_super; 294 295 (void) fflush(stdout); 296 297 for (bp = bases; bp; bp = bp->b_next) { 298 299 /* see if this base was irrelevant */ 300 if ((bp->b_flags & F_EVALUATE) == 0) 301 continue; 302 303 /* print out a summary for this base */ 304 fprintf(stderr, gettext(SUM_hd), 305 bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles); 306 fprintf(stderr, gettext(SUM_dst), 307 bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc); 308 fprintf(stderr, gettext(SUM_src), 309 bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc); 310 if (bp->b_unresolved) 311 fprintf(stderr, gettext(SUM_unresolved), 312 bp->b_unresolved); 313 314 315 /* print out a list of unreconciled files for this base */ 316 for (fp = changes; fp; fp = fp->f_rnext) { 317 if (fp->f_base != bp) 318 continue; 319 if ((fp->f_flags & F_CONFLICT) == 0) 320 continue; 321 fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname, 322 fp->f_problem ? fp->f_problem : "???"); 323 } 324 325 fprintf(stderr, "\n"); 326 } 327 328 if (need_super) 329 fprintf(stderr, gettext(WARN_super)); 330 } 331 332 /* 333 * routine: 334 * check_file 335 * 336 * purpose: 337 * figure out if a file requires reconciliation and recursively 338 * descend into all sub-files and directories 339 * 340 * parameters: 341 * base pointer 342 * file pointer 343 * 344 * returns: 345 * error mask 346 * built up changes needed list 347 * updated statistics 348 * 349 * notes: 350 * this routine builds up a path name as it descends through 351 * the tree (see push_name, pop_name, get_name). 352 */ 353 static errmask_t 354 check_file(struct file *fp) 355 { struct file *cp; 356 int errs = 0; 357 358 if ((fp->f_flags & F_STAT_ERROR) == 0) { 359 /* see if the source has changed */ 360 fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime; 361 fp->f_info[OPT_BASE].f_ino = fp->f_s_inum; 362 fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj; 363 fp->f_info[OPT_BASE].f_d_min = fp->f_s_min; 364 fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink; 365 fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC); 366 367 /* see if the destination has changed */ 368 fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime; 369 fp->f_info[OPT_BASE].f_ino = fp->f_d_inum; 370 fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj; 371 fp->f_info[OPT_BASE].f_d_min = fp->f_d_min; 372 fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink; 373 fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST); 374 375 /* if nobody thinks the file exists, baseline needs pruning */ 376 if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) { 377 fp->f_srcdiffs |= D_DELETE; 378 fp->f_dstdiffs |= D_DELETE; 379 } 380 381 /* keep track of possible deletions to look for trouble */ 382 if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) { 383 est_deletes++; 384 385 /* see if file is (or has been) a non-empty directory */ 386 if (fp->f_files) 387 est_rmdirs++; 388 } 389 } 390 391 /* if we found differences, queue the file for reconciliation */ 392 if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) { 393 queue_file(fp); 394 395 if (opt_debug & DBG_ANAL) { 396 fprintf(stderr, "ANAL: src=%s", 397 showflags(diffmap, fp->f_srcdiffs)); 398 fprintf(stderr, " dst=%s", 399 showflags(diffmap, fp->f_dstdiffs)); 400 fprintf(stderr, " flgs=%s", 401 showflags(fileflags, fp->f_flags)); 402 fprintf(stderr, " name=%s\n", fp->f_fullname); 403 } 404 } 405 406 /* bump the total file count */ 407 fp->f_base->b_totfiles++; 408 total_files++; 409 410 /* if this is not a directory, we're done */ 411 if (fp->f_files == 0) 412 return (errs); 413 414 /* 415 * If this is a directory, we need to recursively analyze 416 * our children, but only children who have been evaluated. 417 * If a node has not been evaluated, then we don't have 418 * updated stat information and there is nothing to analyze. 419 * 420 * we also want to run though all nodes with STAT errors 421 * so that we can put them on the reconciliation list. 422 * If a directory is unreadable on one side, all files 423 * under that directory (ON BOTH SIDES) must be marked as 424 * blocked by stat errors. 425 */ 426 push_name(fp->f_name); 427 428 for (cp = fp->f_files; cp; cp = cp->f_next) { 429 if (fp->f_flags & F_STAT_ERROR) 430 cp->f_flags |= F_STAT_ERROR; 431 if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR)) 432 errs |= check_file(cp); 433 } 434 435 pop_name(); 436 437 return (errs); 438 } 439 440 /* 441 * routine: 442 * check_changes 443 * 444 * purpose: 445 * to figure out what has changed for a specific file 446 * 447 * parameters: 448 * file pointer 449 * the reference info 450 * the info to be checked for changes 451 * 452 * returns: 453 * diff mask 454 * 455 * notes: 456 * this routine doesn't pretend to understand what happened. 457 * it merely enumerates the ways in which the files differ. 458 */ 459 static diffmask_t 460 check_changes(struct file *fp, int ref, int new) 461 { struct fileinfo *rp, *np; 462 int mask = 0; 463 int type; 464 465 rp = &fp->f_info[ref]; 466 np = &fp->f_info[new]; 467 468 if (np->f_uid != rp->f_uid) 469 mask |= D_UID; 470 471 if (np->f_gid != rp->f_gid) 472 mask |= D_GID; 473 474 if (np->f_mode != rp->f_mode) 475 mask |= D_PROT; 476 477 type = np->f_type; 478 if (type != rp->f_type) { 479 if (type == 0) 480 mask |= D_DELETE; 481 else if (rp->f_type == 0) 482 mask |= D_CREATE; 483 else 484 mask |= D_TYPE; 485 } else if (type == S_IFBLK || type == S_IFCHR) { 486 /* 487 * for special files, we only look at the maj/min 488 */ 489 if (np->f_rd_maj != rp->f_rd_maj) 490 mask |= D_SIZE; 491 if (np->f_rd_min != rp->f_rd_min) 492 mask |= D_SIZE; 493 } else if (type != S_IFDIR) { 494 /* 495 * for directories, we don't look directly at 496 * the contents, so these fields don't mean 497 * anything. If the directories have changed 498 * in any interesting way, we'll find it by 499 * walking the tree. 500 */ 501 if (np->f_modtime > rp->f_modtime) 502 mask |= D_MTIME; 503 504 if (np->f_size != rp->f_size) 505 mask |= D_SIZE; 506 507 if (np->f_nlink != rp->f_nlink) 508 mask |= D_LINKS; 509 } 510 511 if (cmp_acls(rp, np) == 0) 512 mask |= D_FACLS; 513 514 return (mask); 515 } 516 517 /* 518 * routine: 519 * same_name 520 * 521 * purpose: 522 * to figure out whether or not two databsae nodes actually refer to 523 * the same file. 524 * 525 * parameters: 526 * pointers to two file description nodes 527 * which side we should check 528 * 529 * returns: 530 * TRUE/FALSE 531 * 532 * notes: 533 * if a single directory is specified in multiple base pairs, it 534 * is possible to have multiple nodes in the database describing 535 * the same file. This routine is supposed to detect those cases. 536 * 537 * what should be a trivial string comparison is complicated by 538 * the possibility that the two nodes might describe the same file 539 * from base directories at different depths. Thus, rather than 540 * comparing two strings, we really want to compare the concatenation 541 * of two pairs of strings. Unfortunately calling full_name would 542 * be awkward right now, so instead we have our own comparison 543 * routine that automatically skips from the first string to 544 * the second. 545 */ 546 static bool_t 547 same_name(struct file *f1, struct file *f2, side_t srcdst) 548 { 549 char *s1, *s2, *x1, *x2; 550 551 if (srcdst == OPT_SRC) { 552 s1 = (f1->f_base)->b_src_name; 553 s2 = (f2->f_base)->b_src_name; 554 } else { 555 s1 = (f1->f_base)->b_dst_name; 556 s2 = (f2->f_base)->b_dst_name; 557 } 558 x1 = f1->f_fullname; 559 x2 = f2->f_fullname; 560 561 /* 562 * Compare the two names, and if they differ before they end 563 * this is a non-match. If they both end at the same time, 564 * this is a match. 565 * 566 * The trick here is that each string is actually the logical 567 * concatenation of two strings, and we need to automatically 568 * wrap from the first to the second string in each pair. There 569 * is no requirement that the two (concatenated) strings be 570 * broken at the same point, so we have a slightly baroque 571 * comparsion loop. 572 */ 573 while (*s1 && *s1 == *s2) { 574 575 /* 576 * strings have been identical so far, so advance the 577 * pointers and continue the comparison. The trick 578 * is that when either string ends, we have to wrap 579 * over to its extension. 580 */ 581 s1++; s2++; 582 if (*s1 && *s2) 583 continue; 584 585 /* 586 * at least one of the strings has ended. 587 * there is an implicit slash between the string 588 * and its extension, and this has to be matched 589 * against the other string. 590 */ 591 if (*s1 != *s2) { 592 if (*s1 == 0 && *s2 == '/') 593 s2++; 594 else if (*s2 == 0 && *s1 == '/') 595 s1++; 596 else 597 /* the disagreement doesn't come at a slash */ 598 break; 599 } 600 601 /* 602 * if either string has ended, wrap to its extension 603 */ 604 if (*s1 == 0 && x1 != 0) { 605 s1 = x1; 606 x1 = 0; 607 } 608 if (*s2 == 0 && x2 != 0) { 609 s2 = x2; 610 x2 = 0; 611 } 612 } 613 614 return (*s1 == *s2); 615 } 616 617 /* 618 * routine: 619 * find_link 620 * 621 * purpose: 622 * to figure out if there is a file to which we should 623 * be creating a link (rather than making a copy) 624 * 625 * parameters: 626 * file node for the file to be created (that we hope is merely a link) 627 * which side is to be changed (src/dst) 628 * 629 * return: 630 * 0 no link is appropriate 631 * else pointer to file node for link referent 632 * 633 * notes: 634 * there are a few strange heuristics in this routine and I 635 * wouldn't bet my soul that I got all of them right. The general 636 * theory is that when a new file is created, we look to see if it 637 * is a link to another file on the changed side, and if it is, we 638 * find the corresponding file on the unchanged side. 639 * 640 * cases we want to be able to handle: 641 * 1. one or more links are created to a prexisting file 642 * 2. a preexisting only link is renamed 643 * 3. a rename of one of multiple links to a preexisting file 644 * 4. a single file is created with multiple links 645 */ 646 struct file * 647 find_link(struct file *fp, side_t srcdst) 648 { struct file *lp; 649 side_t chgside, tgtside; 650 struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp; 651 652 /* chg = side on which the change was noticed */ 653 /* tgt = side to which the change is to be propagated */ 654 chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC; 655 tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST; 656 fcp = &fp->f_info[chgside]; 657 ftp = &fp->f_info[tgtside]; 658 659 /* 660 * cases 1 and 3 661 * 662 * When a new link is created, we should be able to find 663 * another file in the changed hierarchy that has the same 664 * I-node number. We expect it to be on the changed list 665 * because the link count will have gone up or because all 666 * of the copies are new. If we find one, then the new file 667 * on the receiving file should be a link to the corresponding 668 * existing file. 669 * 670 * case 4 671 * 672 * the first link will be dealt with as a copy, but all 673 * subsequent links should find an existing file analogous 674 * to one of the links on the changed side, and create 675 * corresponding links on the other side. 676 * 677 * in each of these cases, there should be multiple links 678 * on the changed side. If the linkcount on the changed 679 * side is one, we needn't bother searching for other links. 680 */ 681 if (fcp->f_nlink > 1) 682 for (lp = changes; lp; lp = lp->f_rnext) { 683 /* finding the same node doesn't count */ 684 if (fp == lp) 685 continue; 686 687 tgtp = &lp->f_info[tgtside]; 688 chgp = &lp->f_info[chgside]; 689 690 /* 691 * if the file doesn't already exist on the target side 692 * we cannot make a link to it 693 */ 694 if (tgtp->f_mode == 0) 695 continue; 696 697 /* 698 * if this is indeed a link, then the prospective file on 699 * the changed side will have the same dev/inum as the file 700 * we are looking for 701 */ 702 if (fcp->f_d_maj != chgp->f_d_maj) 703 continue; 704 if (fcp->f_d_min != chgp->f_d_min) 705 continue; 706 if (fcp->f_ino != chgp->f_ino) 707 continue; 708 709 /* 710 * if the target side is already a link to this file, 711 * then there is no new link to be created 712 * FIX: how does this interact with copies over links 713 */ 714 if ((ftp->f_d_maj == tgtp->f_d_maj) && 715 (ftp->f_d_min == tgtp->f_d_min) && 716 (ftp->f_ino == tgtp->f_ino)) 717 continue; 718 719 /* 720 * there is a pathological situation where a single file 721 * might appear under multiple base directories. This is 722 * damned awkward to detect in any other way, so we must 723 * check to see if we have just found another database 724 * instance for the same file (on the changed side). 725 */ 726 if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside)) 727 continue; 728 729 if (opt_debug & DBG_ANAL) 730 fprintf(stderr, "ANAL: FIND LINK %s and %s\n", 731 fp->f_fullname, lp->f_fullname); 732 733 return (lp); 734 } 735 736 /* 737 * case 2: a simple rename of the only link 738 * 739 * In this case, there may not be any other existing file on 740 * the changed side that has the same I-node number. There 741 * might, however, be a record of such a file in the baseline. 742 * If we can find an identical file with a different name that 743 * has recently disappeared, we have a likely rename. 744 */ 745 for (lp = changes; lp; lp = lp->f_rnext) { 746 747 /* finding the same node doesn't count */ 748 if (fp == lp) 749 continue; 750 751 tgtp = &lp->f_info[tgtside]; 752 chgp = &lp->f_info[chgside]; 753 754 /* 755 * if the file still exists on the changed side this is 756 * not a simple rename, and in fact the previous pass 757 * would have found it. 758 */ 759 if (chgp->f_mode != 0) 760 continue; 761 762 /* 763 * the inode number for the new link on the changed 764 * side must match the inode number for the old link 765 * from the baseline. 766 */ 767 if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj 768 : lp->f_s_maj)) 769 continue; 770 if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min 771 : lp->f_s_min)) 772 continue; 773 if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum 774 : lp->f_s_inum)) 775 continue; 776 777 /* finding a file we are already linked to doesn't help */ 778 if ((ftp->f_d_maj == tgtp->f_d_maj) && 779 (ftp->f_d_min == tgtp->f_d_min) && 780 (ftp->f_ino == tgtp->f_ino)) 781 continue; 782 783 /* 784 * there is a danger that we will confuse an 785 * inode reallocation with a rename. We should 786 * only consider this to be a rename if the 787 * new file is identical to the old one 788 */ 789 basp = &lp->f_info[OPT_BASE]; 790 if (fcp->f_type != basp->f_type) 791 continue; 792 if (fcp->f_size != basp->f_size) 793 continue; 794 if (fcp->f_mode != basp->f_mode) 795 continue; 796 if (fcp->f_uid != basp->f_uid) 797 continue; 798 if (fcp->f_gid != basp->f_gid) 799 continue; 800 801 if (opt_debug & DBG_ANAL) 802 fprintf(stderr, "ANAL: FIND RENAME %s and %s\n", 803 fp->f_fullname, lp->f_fullname); 804 805 return (lp); 806 } 807 808 return (0); 809 } 810 811 /* 812 * routine: 813 * has_other_links 814 * 815 * purpose: 816 * to determine whether or not there is more that one link to a 817 * particular file. We are willing to delete a link to a file 818 * that has changed if we will still have other links to it. 819 * The trick here is that we only care about links under our 820 * dominion. 821 * 822 * parameters: 823 * file pointer to node we are interested in 824 * which side we are looking to additional links on 825 * 826 * returns: 827 * TRUE if there are multiple links 828 * FALSE if this is the only one we know of 829 */ 830 bool_t 831 has_other_links(struct file *fp, side_t srcdst) 832 { struct file *lp; 833 struct fileinfo *fip, *lip; 834 835 fip = &fp->f_info[srcdst]; 836 837 /* if the link count is one, there couldn't be others */ 838 if (fip->f_nlink < 2) 839 return (FALSE); 840 841 /* look for any other files for the same inode */ 842 for (lp = changes; lp; lp = lp->f_rnext) { 843 /* finding the same node doesn't count */ 844 if (fp == lp) 845 continue; 846 847 lip = &lp->f_info[srcdst]; 848 849 /* 850 * file must still exist on this side 851 */ 852 if (lip->f_mode == 0) 853 continue; 854 855 /* 856 * if this is indeed a link, then the prospective file on 857 * the changed side will have the same dev/inum as the file 858 * we are looking for 859 */ 860 if (lip->f_d_maj != fip->f_d_maj) 861 continue; 862 if (lip->f_d_min != fip->f_d_min) 863 continue; 864 if (lip->f_ino != fip->f_ino) 865 continue; 866 867 /* 868 * we have found at least one other link 869 */ 870 return (TRUE); 871 } 872 873 return (FALSE); 874 } 875 876 /* 877 * routine: 878 * link_update 879 * 880 * purpose: 881 * to propoagate a stat change to all other file nodes that 882 * correspond to the same I-node on the changed side 883 * 884 * parameters: 885 * file pointer for the updated file 886 * which side was changed 887 * 888 * returns: 889 * void 890 * 891 * notes: 892 * if we have copied onto a file, we have copied onto all 893 * of its links, but since we do all stats before we do any 894 * copies, the stat information recently collected for links 895 * is no longer up-to-date, and this would result in incorrect 896 * reconciliation (redundant copies). 897 * 898 * There is an assumption here that all links to a changed 899 * file will be in the change list. This is true for almost 900 * all cases not involving restriction. If we do fail to 901 * update the baseline for a file that was off the change list, 902 * the worst that is likely to happen is that we will think 903 * it changed later (but will almost surely find that both 904 * copies agree). 905 */ 906 void 907 link_update(struct file *fp, side_t which) 908 { struct file *lp; 909 910 for (lp = changes; lp; lp = lp->f_rnext) { 911 /* finding the current entry doesn't count */ 912 if (lp == fp) 913 continue; 914 915 /* look for same i#, maj, min on changed side */ 916 if (lp->f_info[which].f_ino != fp->f_info[which].f_ino) 917 continue; 918 if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj) 919 continue; 920 if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min) 921 continue; 922 923 /* 924 * this appears to be another link to the same file 925 * so the updated stat information for one must be 926 * correct for the other. 927 */ 928 lp->f_info[which].f_type = fp->f_info[which].f_type; 929 lp->f_info[which].f_size = fp->f_info[which].f_size; 930 lp->f_info[which].f_mode = fp->f_info[which].f_mode; 931 lp->f_info[which].f_uid = fp->f_info[which].f_uid; 932 lp->f_info[which].f_gid = fp->f_info[which].f_gid; 933 lp->f_info[which].f_modtime = fp->f_info[which].f_modtime; 934 lp->f_info[which].f_modns = fp->f_info[which].f_modns; 935 lp->f_info[which].f_nlink = fp->f_info[which].f_nlink; 936 lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj; 937 lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min; 938 939 if (opt_debug & DBG_STAT) 940 fprintf(stderr, 941 "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n", 942 lp->f_name, lp->f_info[which].f_modtime, 943 lp->f_info[which].f_modns); 944 } 945 } 946 947 /* 948 * routine: 949 * queue_file 950 * 951 * purpose: 952 * append a file to the list of needed reconciliations 953 * 954 * parameters: 955 * pointer to file 956 * 957 * notes: 958 * when a request is appended to the reconciliation list, 959 * we fill in the full name. We delayed this in hopes that 960 * it wouldn't be necessary (saving cycles and memory) 961 * 962 * There is some funny business with modification times. 963 * In general, we queue files in order of the latest modification 964 * time so that propagations preserve relative ordering. There 965 * are, however, a few important exceptions: 966 * 1. all directory creations happen at time zero, 967 * so that they are created before any files can 968 * be added to them. 969 * 2. all directory deletions happen at time infinity-depth, 970 * so that everything else can be removed before the 971 * directories themselves are removed. 972 * 3. all file deletions happen at time infinity-depth 973 * so that (in renames) the links will preceed the unlinks. 974 */ 975 static void 976 queue_file(struct file *fp) 977 { struct file **pp, *np; 978 979 #define TIME_ZERO 0L /* the earliest possible time */ 980 #define TIME_LONG 0x7FFFFFFF /* the latest possible time */ 981 982 /* 983 * figure out the modification time for sequencing purposes 984 */ 985 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) { 986 /* 987 * deletions are performed last, and depth first 988 */ 989 fp->f_modtime = TIME_LONG - fp->f_depth; 990 } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR && 991 fp->f_info[OPT_DST].f_type != S_IFDIR) { 992 /* 993 * for most files we use the latest mod time 994 */ 995 fp->f_modtime = fp->f_info[OPT_SRC].f_modtime; 996 fp->f_modns = fp->f_info[OPT_SRC].f_modns; 997 if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) { 998 fp->f_modtime = fp->f_info[OPT_DST].f_modtime; 999 fp->f_modns = fp->f_info[OPT_DST].f_modns; 1000 } 1001 } else { 1002 /* 1003 * new directory creations need to happen before anything 1004 * else and are automatically sequenced in traversal order 1005 */ 1006 fp->f_modtime = TIME_ZERO; 1007 } 1008 1009 /* 1010 * insertion is time ordered, and for equal times, 1011 * insertions is in (pre-order) traversal order 1012 */ 1013 for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) { 1014 if (fp->f_modtime > np->f_modtime) 1015 continue; 1016 if (fp->f_modtime < np->f_modtime) 1017 break; 1018 if (fp->f_modns < np->f_modns) 1019 break; 1020 } 1021 1022 fp->f_fullname = strdup(get_name(fp)); 1023 fp->f_rnext = np; 1024 *pp = fp; 1025 } 1026 1027 1028 /* 1029 * routines: 1030 * push_name/pop_name/get_name 1031 * 1032 * purpose: 1033 * maintain a name stack so we can form name of a particular file 1034 * as the concatenation of all of the names between it and the 1035 * (know to be fully qualified) base directory. 1036 * 1037 * notes: 1038 * we go to this trouble because most files never change and 1039 * so we don't need to associate full names with every one. 1040 * This stack is maintained during analysis, and if we decide 1041 * to add a file to the reconciliation list, we can use the 1042 * stack to generate a fully qualified name at that time. 1043 * 1044 * we compress out '/./' when we return a name. Given that the 1045 * stack was built by a tree walk, the only place a /./ should 1046 * appear is at the first level after the base ... but there 1047 * are legitimate ways for them to appear there. 1048 * 1049 * these names can get deep, so we dynamically size our name buffer 1050 */ 1051 static const char *namestack[ MAX_DEPTH + 1 ]; 1052 static int namedepth = 0; 1053 static int namelen = 0; 1054 1055 void 1056 push_name(const char *name) 1057 { 1058 namestack[ namedepth++ ] = name; 1059 namelen += 2 + strlen(name); 1060 1061 /* make sure we don't overflow our name stack */ 1062 if (namedepth >= MAX_DEPTH) { 1063 fprintf(stderr, gettext(ERR_deep), name); 1064 exit(ERR_OTHER); 1065 } 1066 } 1067 1068 void 1069 pop_name(void) 1070 { 1071 namelen -= 2 + strlen(namestack[--namedepth]); 1072 namestack[ namedepth ] = 0; 1073 1074 #ifdef DBG_ERRORS 1075 /* just a little sanity check here */ 1076 if (namedepth <= 0) { 1077 if (namedepth < 0) { 1078 fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n"); 1079 exit(ERR_OTHER); 1080 } else if (namelen != 0) { 1081 fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n"); 1082 exit(ERR_OTHER); 1083 } 1084 } 1085 #endif 1086 } 1087 1088 char 1089 *get_name(struct file *fp) 1090 { int i; 1091 static char *namebuf = 0; 1092 static int buflen = 0; 1093 1094 /* make sure we have an adequate buffer */ 1095 i = namelen + 1 + strlen(fp->f_name); 1096 if (buflen < i) { 1097 for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME); 1098 namebuf = (char *) realloc(namebuf, buflen); 1099 } 1100 1101 /* assemble the name */ 1102 namebuf[0] = 0; 1103 for (i = 0; i < namedepth; i++) { 1104 if (strcmp(namestack[i], ".")) { 1105 strcat(namebuf, namestack[i]); 1106 strcat(namebuf, "/"); 1107 } 1108 } 1109 1110 strcat(namebuf, fp->f_name); 1111 1112 return (namebuf); 1113 } 1114