1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved 24 * 25 * module: 26 * anal.c 27 * 28 * purpose: 29 * routines to analyze the file trees and figure out what has changed 30 * and queue files for reconciliation. It also contains tree enumeration 31 * routines to for other purposes (pruning and link location). 32 * 33 * contents: 34 * 35 * change analysis: 36 * analyze .... (top level) analyze all files in the tree for changes 37 * summary .... print out change/reconciliation statistics for each base 38 * check_file . (static) look for changes and queue file for reconciliation 39 * check_changes (static) figure out if a particular file has changed 40 * queue_file . (static) add a file to the reconciliation list 41 * 42 * other tree enumeration functions: 43 * prune_file . (static) recursive descent and actual pruning 44 * prune ...... (top level) initiate pruning analysis for nonexistant files 45 * find_link .. look for other files to which a file may be a link 46 * link_update. propagate changed stat info to all other links 47 * same_name .. (static) figure out if two nodes describe same file 48 * 49 * misc: 50 * push_name .. maintain a running full pathname as we descend 51 * pop_name ... maintain a running full pathname as we pop back 52 * get_name ... return full pathname for the current file 53 * 54 * notes: 55 * analysis is limited to files that were evaluated in the previous 56 * pass ... since we don't have complete information about files that 57 * were not evaluated in the previous pass. 58 */ 59 #pragma ident "%Z%%M% %I% %E% SMI" 60 61 #include <stdio.h> 62 #include <stdlib.h> 63 #include <strings.h> 64 65 #include "messages.h" 66 #include "filesync.h" 67 #include "database.h" 68 #include "debug.h" 69 70 /* 71 * routines: 72 */ 73 void push_name(const char *); 74 void pop_name(); 75 char *get_name(struct file *); 76 static errmask_t check_file(struct file *fp); 77 static diffmask_t check_changes(struct file *fp, int first, int second); 78 static int prune_file(struct file *fp); 79 static void queue_file(struct file *fp); 80 81 /* 82 * globals 83 */ 84 static struct file *changes; /* list of files to be reconciled */ 85 86 static long total_files; /* total number of files being considered */ 87 static long est_deletes; /* estimated number of files to be deleted */ 88 static long est_rmdirs; /* est rmdirs of non-empty directories */ 89 90 int inum_changes; /* LISTed directories whose I#s changed */ 91 92 /* 93 * routine: 94 * analyze 95 * 96 * purpose: 97 * top level routine for the analysis/reconciliation process 98 * 99 * parameters: 100 * none 101 * 102 * returns: 103 * error mask 104 * 105 * notes: 106 * a critical side effect of this routine is the creation of 107 * the reconciliation list, an ordered list of files that 108 * needed to be processed in the subsequent reconciliation pass 109 */ 110 errmask_t 111 analyze() 112 { struct base *bp; 113 struct file *fp; 114 int errs = 0; 115 int err; 116 int percentage; 117 bool_t aborted = FALSE; 118 char msgbuf[MAX_LINE]; 119 120 /* 121 * run through all bases and directories looking for files 122 * that have been renamed. This must be done before the 123 * difference analysis because a directory rename can introduce 124 * radical restructuring into a name-based tree. 125 */ 126 for (bp = bases; bp; bp = bp->b_next) { 127 for (fp = bp->b_files; fp; fp = fp->f_next) 128 if (fp->f_flags & F_EVALUATE) 129 errs |= find_renames(fp); 130 } 131 132 /* 133 * run through all bases and files looking for candidates 134 * note, however that we only descend into trees that have 135 * the evaluate flag turned on. As a result of new rules or 136 * restriction arguments, we may be deliberatly ignoring 137 * large amounts of the baseline. This means we won't do 138 * any stats to update the information in those nodes, and 139 * they will be written back just as they were. 140 * 141 * note that there is code to prune out baseline nodes for 142 * files that no longer exist, but that code is in reconcile 143 * and will never get a chance to run on nodes that aren't 144 * analyzed. 145 * 146 * we also want to run though all nodes with STAT errors 147 * so that we can put them on the reconciliation list. 148 */ 149 for (bp = bases; bp; bp = bp->b_next) { 150 for (fp = bp->b_files; fp; fp = fp->f_next) 151 if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) 152 errs |= check_file(fp); 153 } 154 155 /* 156 * my greatest fear is that someday, somehow, by messing with 157 * variables or baselines or who-knows-what, that someone will 158 * run a reconciliation against a large tree that doesn't correspond 159 * to the baseline, and I will infer that a bazillion files have 160 * been deleted and will propagate the slaughter before anyone 161 * can say somebody stop that maniac. 162 * 163 * in order to prevent such a possibility, we have a few different 164 * sanity checks. There is, of course, a tradeoff here between 165 * danger and irritation. The current set of heuristics for whether 166 * or not to generate a warning are (any of) 167 * 168 * at least CONFIRM_MIN files have been deleted AND 169 * CONFIRM_PCT of all files have been deleted 170 * 171 * the inode number on a LISTed directory has changed 172 * 173 * a non-empty directory has been deleted. 174 */ 175 msgbuf[0] = 0; 176 177 percentage = (est_deletes * 100) / (total_files ? total_files : 1); 178 if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT) 179 sprintf(msgbuf, gettext(WARN_deletes), est_deletes); 180 else if (inum_changes > 0) 181 sprintf(msgbuf, gettext(WARN_ichange), inum_changes); 182 else if (est_rmdirs) 183 sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs); 184 185 if (msgbuf[0]) 186 confirm(msgbuf); 187 188 /* 189 * TRICK: 190 * the change list contains both files that have changed 191 * (and probably warrant reconciliation) and files that 192 * we couldn't get up-to-date stat information on. The 193 * latter files should just be flagged as being in conflict 194 * so they can be reported in the summary. The same is 195 * true of all subsequent files if we abort reconciliation. 196 */ 197 for (fp = changes; fp; fp = fp->f_rnext) 198 if (aborted || (fp->f_flags & F_STAT_ERROR)) { 199 fp->f_flags |= F_CONFLICT; 200 /* if it isn't in the baseline yet, don't add it */ 201 if ((fp->f_flags & F_IN_BASELINE) == 0) 202 fp->f_flags |= F_REMOVE; 203 fp->f_problem = aborted ? PROB_aborted : PROB_restat; 204 (fp->f_base)->b_unresolved++; 205 errs |= ERR_UNRESOLVED; 206 if (opt_verbose) 207 fprintf(stdout, 208 gettext(aborted ? V_suppressed 209 : V_nostat), 210 fp->f_fullname); 211 } else { 212 err = reconcile(fp); 213 errs |= err; 214 if (opt_halt && (err & ERR_ABORT)) { 215 fprintf(stderr, gettext(ERR_abort_h)); 216 aborted = TRUE; 217 } 218 } 219 220 return (errs); 221 } 222 223 /* 224 * routine: 225 * prune_file 226 * 227 * purpose: 228 * to look for file entries that should be pruned from baseline 229 * prune the current file if it needs pruning, and recursively 230 * descend if it is a directory. 231 * 232 * parameters: 233 * pointer to file node 234 */ 235 static int 236 prune_file(struct file *fp) 237 { struct file *cp; 238 int prunes = 0; 239 240 /* if node hasn't been evaluated, mark it for removal */ 241 if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) { 242 fp->f_flags |= F_REMOVE; 243 prunes++; 244 if (opt_debug & DBG_ANAL) 245 fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name); 246 } 247 248 /* now check our children */ 249 for (cp = fp->f_files; cp; cp = cp->f_next) 250 prunes += prune_file(cp); 251 252 return (prunes); 253 } 254 255 /* 256 * routine: 257 * prune 258 * 259 * purpose: 260 * to prune the baseline of entries that no longer correspond to 261 * existing rules. 262 * 263 * notes: 264 * This routine just calls prune_file on the top of each base tree. 265 */ 266 int 267 prune() 268 { struct base *bp; 269 struct file *fp; 270 int prunes = 0; 271 272 for (bp = bases; bp; bp = bp->b_next) { 273 for (fp = bp->b_files; fp; fp = fp->f_next) 274 prunes += prune_file(fp); 275 276 if ((bp->b_flags & F_EVALUATE) == 0) 277 bp->b_flags |= F_REMOVE; 278 } 279 280 return (prunes); 281 } 282 283 /* 284 * routine: 285 * summary 286 * 287 * purpose: 288 * to print out statics and conflict lists 289 */ 290 void 291 summary() 292 { struct base *bp; 293 struct file *fp; 294 extern bool_t need_super; 295 296 (void) fflush(stdout); 297 298 for (bp = bases; bp; bp = bp->b_next) { 299 300 /* see if this base was irrelevant */ 301 if ((bp->b_flags & F_EVALUATE) == 0) 302 continue; 303 304 /* print out a summary for this base */ 305 fprintf(stderr, gettext(SUM_hd), 306 bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles); 307 fprintf(stderr, gettext(SUM_dst), 308 bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc); 309 fprintf(stderr, gettext(SUM_src), 310 bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc); 311 if (bp->b_unresolved) 312 fprintf(stderr, gettext(SUM_unresolved), 313 bp->b_unresolved); 314 315 316 /* print out a list of unreconciled files for this base */ 317 for (fp = changes; fp; fp = fp->f_rnext) { 318 if (fp->f_base != bp) 319 continue; 320 if ((fp->f_flags & F_CONFLICT) == 0) 321 continue; 322 fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname, 323 fp->f_problem ? fp->f_problem : "???"); 324 } 325 326 fprintf(stderr, "\n"); 327 } 328 329 if (need_super) 330 fprintf(stderr, gettext(WARN_super)); 331 } 332 333 /* 334 * routine: 335 * check_file 336 * 337 * purpose: 338 * figure out if a file requires reconciliation and recursively 339 * descend into all sub-files and directories 340 * 341 * parameters: 342 * base pointer 343 * file pointer 344 * 345 * returns: 346 * error mask 347 * built up changes needed list 348 * updated statistics 349 * 350 * notes: 351 * this routine builds up a path name as it descends through 352 * the tree (see push_name, pop_name, get_name). 353 */ 354 static errmask_t 355 check_file(struct file *fp) 356 { struct file *cp; 357 int errs = 0; 358 359 if ((fp->f_flags & F_STAT_ERROR) == 0) { 360 /* see if the source has changed */ 361 fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime; 362 fp->f_info[OPT_BASE].f_ino = fp->f_s_inum; 363 fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj; 364 fp->f_info[OPT_BASE].f_d_min = fp->f_s_min; 365 fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink; 366 fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC); 367 368 /* see if the destination has changed */ 369 fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime; 370 fp->f_info[OPT_BASE].f_ino = fp->f_d_inum; 371 fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj; 372 fp->f_info[OPT_BASE].f_d_min = fp->f_d_min; 373 fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink; 374 fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST); 375 376 /* if nobody thinks the file exists, baseline needs pruning */ 377 if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) { 378 fp->f_srcdiffs |= D_DELETE; 379 fp->f_dstdiffs |= D_DELETE; 380 } 381 382 /* keep track of possible deletions to look for trouble */ 383 if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) { 384 est_deletes++; 385 386 /* see if file is (or has been) a non-empty directory */ 387 if (fp->f_files) 388 est_rmdirs++; 389 } 390 } 391 392 /* if we found differences, queue the file for reconciliation */ 393 if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) { 394 queue_file(fp); 395 396 if (opt_debug & DBG_ANAL) { 397 fprintf(stderr, "ANAL: src=%s", 398 showflags(diffmap, fp->f_srcdiffs)); 399 fprintf(stderr, " dst=%s", 400 showflags(diffmap, fp->f_dstdiffs)); 401 fprintf(stderr, " flgs=%s", 402 showflags(fileflags, fp->f_flags)); 403 fprintf(stderr, " name=%s\n", fp->f_fullname); 404 } 405 } 406 407 /* bump the total file count */ 408 fp->f_base->b_totfiles++; 409 total_files++; 410 411 /* if this is not a directory, we're done */ 412 if (fp->f_files == 0) 413 return (errs); 414 415 /* 416 * If this is a directory, we need to recursively analyze 417 * our children, but only children who have been evaluated. 418 * If a node has not been evaluated, then we don't have 419 * updated stat information and there is nothing to analyze. 420 * 421 * we also want to run though all nodes with STAT errors 422 * so that we can put them on the reconciliation list. 423 * If a directory is unreadable on one side, all files 424 * under that directory (ON BOTH SIDES) must be marked as 425 * blocked by stat errors. 426 */ 427 push_name(fp->f_name); 428 429 for (cp = fp->f_files; cp; cp = cp->f_next) { 430 if (fp->f_flags & F_STAT_ERROR) 431 cp->f_flags |= F_STAT_ERROR; 432 if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR)) 433 errs |= check_file(cp); 434 } 435 436 pop_name(); 437 438 return (errs); 439 } 440 441 /* 442 * routine: 443 * check_changes 444 * 445 * purpose: 446 * to figure out what has changed for a specific file 447 * 448 * parameters: 449 * file pointer 450 * the reference info 451 * the info to be checked for changes 452 * 453 * returns: 454 * diff mask 455 * 456 * notes: 457 * this routine doesn't pretend to understand what happened. 458 * it merely enumerates the ways in which the files differ. 459 */ 460 static diffmask_t 461 check_changes(struct file *fp, int ref, int new) 462 { struct fileinfo *rp, *np; 463 int mask = 0; 464 int type; 465 466 rp = &fp->f_info[ref]; 467 np = &fp->f_info[new]; 468 469 if (np->f_uid != rp->f_uid) 470 mask |= D_UID; 471 472 if (np->f_gid != rp->f_gid) 473 mask |= D_GID; 474 475 if (np->f_mode != rp->f_mode) 476 mask |= D_PROT; 477 478 type = np->f_type; 479 if (type != rp->f_type) { 480 if (type == 0) 481 mask |= D_DELETE; 482 else if (rp->f_type == 0) 483 mask |= D_CREATE; 484 else 485 mask |= D_TYPE; 486 } else if (type == S_IFBLK || type == S_IFCHR) { 487 /* 488 * for special files, we only look at the maj/min 489 */ 490 if (np->f_rd_maj != rp->f_rd_maj) 491 mask |= D_SIZE; 492 if (np->f_rd_min != rp->f_rd_min) 493 mask |= D_SIZE; 494 } else if (type != S_IFDIR) { 495 /* 496 * for directories, we don't look directly at 497 * the contents, so these fields don't mean 498 * anything. If the directories have changed 499 * in any interesting way, we'll find it by 500 * walking the tree. 501 */ 502 if (np->f_modtime > rp->f_modtime) 503 mask |= D_MTIME; 504 505 if (np->f_size != rp->f_size) 506 mask |= D_SIZE; 507 508 if (np->f_nlink != rp->f_nlink) 509 mask |= D_LINKS; 510 } 511 512 if (cmp_acls(rp, np) == 0) 513 mask |= D_FACLS; 514 515 return (mask); 516 } 517 518 /* 519 * routine: 520 * same_name 521 * 522 * purpose: 523 * to figure out whether or not two databsae nodes actually refer to 524 * the same file. 525 * 526 * parameters: 527 * pointers to two file description nodes 528 * which side we should check 529 * 530 * returns: 531 * TRUE/FALSE 532 * 533 * notes: 534 * if a single directory is specified in multiple base pairs, it 535 * is possible to have multiple nodes in the database describing 536 * the same file. This routine is supposed to detect those cases. 537 * 538 * what should be a trivial string comparison is complicated by 539 * the possibility that the two nodes might describe the same file 540 * from base directories at different depths. Thus, rather than 541 * comparing two strings, we really want to compare the concatenation 542 * of two pairs of strings. Unfortunately calling full_name would 543 * be awkward right now, so instead we have our own comparison 544 * routine that automatically skips from the first string to 545 * the second. 546 */ 547 static bool_t 548 same_name(struct file *f1, struct file *f2, side_t srcdst) 549 { 550 char *s1, *s2, *x1, *x2; 551 552 if (srcdst == OPT_SRC) { 553 s1 = (f1->f_base)->b_src_name; 554 s2 = (f2->f_base)->b_src_name; 555 } else { 556 s1 = (f1->f_base)->b_dst_name; 557 s2 = (f2->f_base)->b_dst_name; 558 } 559 x1 = f1->f_fullname; 560 x2 = f2->f_fullname; 561 562 /* 563 * Compare the two names, and if they differ before they end 564 * this is a non-match. If they both end at the same time, 565 * this is a match. 566 * 567 * The trick here is that each string is actually the logical 568 * concatenation of two strings, and we need to automatically 569 * wrap from the first to the second string in each pair. There 570 * is no requirement that the two (concatenated) strings be 571 * broken at the same point, so we have a slightly baroque 572 * comparsion loop. 573 */ 574 while (*s1 && *s1 == *s2) { 575 576 /* 577 * strings have been identical so far, so advance the 578 * pointers and continue the comparison. The trick 579 * is that when either string ends, we have to wrap 580 * over to its extension. 581 */ 582 s1++; s2++; 583 if (*s1 && *s2) 584 continue; 585 586 /* 587 * at least one of the strings has ended. 588 * there is an implicit slash between the string 589 * and its extension, and this has to be matched 590 * against the other string. 591 */ 592 if (*s1 != *s2) { 593 if (*s1 == 0 && *s2 == '/') 594 s2++; 595 else if (*s2 == 0 && *s1 == '/') 596 s1++; 597 else 598 /* the disagreement doesn't come at a slash */ 599 break; 600 } 601 602 /* 603 * if either string has ended, wrap to its extension 604 */ 605 if (*s1 == 0 && x1 != 0) { 606 s1 = x1; 607 x1 = 0; 608 } 609 if (*s2 == 0 && x2 != 0) { 610 s2 = x2; 611 x2 = 0; 612 } 613 } 614 615 return (*s1 == *s2); 616 } 617 618 /* 619 * routine: 620 * find_link 621 * 622 * purpose: 623 * to figure out if there is a file to which we should 624 * be creating a link (rather than making a copy) 625 * 626 * parameters: 627 * file node for the file to be created (that we hope is merely a link) 628 * which side is to be changed (src/dst) 629 * 630 * return: 631 * 0 no link is appropriate 632 * else pointer to file node for link referent 633 * 634 * notes: 635 * there are a few strange heuristics in this routine and I 636 * wouldn't bet my soul that I got all of them right. The general 637 * theory is that when a new file is created, we look to see if it 638 * is a link to another file on the changed side, and if it is, we 639 * find the corresponding file on the unchanged side. 640 * 641 * cases we want to be able to handle: 642 * 1. one or more links are created to a prexisting file 643 * 2. a preexisting only link is renamed 644 * 3. a rename of one of multiple links to a preexisting file 645 * 4. a single file is created with multiple links 646 */ 647 struct file * 648 find_link(struct file *fp, side_t srcdst) 649 { struct file *lp; 650 side_t chgside, tgtside; 651 struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp; 652 653 /* chg = side on which the change was noticed */ 654 /* tgt = side to which the change is to be propagated */ 655 chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC; 656 tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST; 657 fcp = &fp->f_info[chgside]; 658 ftp = &fp->f_info[tgtside]; 659 660 /* 661 * cases 1 and 3 662 * 663 * When a new link is created, we should be able to find 664 * another file in the changed hierarchy that has the same 665 * I-node number. We expect it to be on the changed list 666 * because the link count will have gone up or because all 667 * of the copies are new. If we find one, then the new file 668 * on the receiving file should be a link to the corresponding 669 * existing file. 670 * 671 * case 4 672 * 673 * the first link will be dealt with as a copy, but all 674 * subsequent links should find an existing file analogous 675 * to one of the links on the changed side, and create 676 * corresponding links on the other side. 677 * 678 * in each of these cases, there should be multiple links 679 * on the changed side. If the linkcount on the changed 680 * side is one, we needn't bother searching for other links. 681 */ 682 if (fcp->f_nlink > 1) 683 for (lp = changes; lp; lp = lp->f_rnext) { 684 /* finding the same node doesn't count */ 685 if (fp == lp) 686 continue; 687 688 tgtp = &lp->f_info[tgtside]; 689 chgp = &lp->f_info[chgside]; 690 691 /* 692 * if the file doesn't already exist on the target side 693 * we cannot make a link to it 694 */ 695 if (tgtp->f_mode == 0) 696 continue; 697 698 /* 699 * if this is indeed a link, then the prospective file on 700 * the changed side will have the same dev/inum as the file 701 * we are looking for 702 */ 703 if (fcp->f_d_maj != chgp->f_d_maj) 704 continue; 705 if (fcp->f_d_min != chgp->f_d_min) 706 continue; 707 if (fcp->f_ino != chgp->f_ino) 708 continue; 709 710 /* 711 * if the target side is already a link to this file, 712 * then there is no new link to be created 713 * FIX: how does this interact with copies over links 714 */ 715 if ((ftp->f_d_maj == tgtp->f_d_maj) && 716 (ftp->f_d_min == tgtp->f_d_min) && 717 (ftp->f_ino == tgtp->f_ino)) 718 continue; 719 720 /* 721 * there is a pathological situation where a single file 722 * might appear under multiple base directories. This is 723 * damned awkward to detect in any other way, so we must 724 * check to see if we have just found another database 725 * instance for the same file (on the changed side). 726 */ 727 if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside)) 728 continue; 729 730 if (opt_debug & DBG_ANAL) 731 fprintf(stderr, "ANAL: FIND LINK %s and %s\n", 732 fp->f_fullname, lp->f_fullname); 733 734 return (lp); 735 } 736 737 /* 738 * case 2: a simple rename of the only link 739 * 740 * In this case, there may not be any other existing file on 741 * the changed side that has the same I-node number. There 742 * might, however, be a record of such a file in the baseline. 743 * If we can find an identical file with a different name that 744 * has recently disappeared, we have a likely rename. 745 */ 746 for (lp = changes; lp; lp = lp->f_rnext) { 747 748 /* finding the same node doesn't count */ 749 if (fp == lp) 750 continue; 751 752 tgtp = &lp->f_info[tgtside]; 753 chgp = &lp->f_info[chgside]; 754 755 /* 756 * if the file still exists on the changed side this is 757 * not a simple rename, and in fact the previous pass 758 * would have found it. 759 */ 760 if (chgp->f_mode != 0) 761 continue; 762 763 /* 764 * the inode number for the new link on the changed 765 * side must match the inode number for the old link 766 * from the baseline. 767 */ 768 if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj 769 : lp->f_s_maj)) 770 continue; 771 if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min 772 : lp->f_s_min)) 773 continue; 774 if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum 775 : lp->f_s_inum)) 776 continue; 777 778 /* finding a file we are already linked to doesn't help */ 779 if ((ftp->f_d_maj == tgtp->f_d_maj) && 780 (ftp->f_d_min == tgtp->f_d_min) && 781 (ftp->f_ino == tgtp->f_ino)) 782 continue; 783 784 /* 785 * there is a danger that we will confuse an 786 * inode reallocation with a rename. We should 787 * only consider this to be a rename if the 788 * new file is identical to the old one 789 */ 790 basp = &lp->f_info[OPT_BASE]; 791 if (fcp->f_type != basp->f_type) 792 continue; 793 if (fcp->f_size != basp->f_size) 794 continue; 795 if (fcp->f_mode != basp->f_mode) 796 continue; 797 if (fcp->f_uid != basp->f_uid) 798 continue; 799 if (fcp->f_gid != basp->f_gid) 800 continue; 801 802 if (opt_debug & DBG_ANAL) 803 fprintf(stderr, "ANAL: FIND RENAME %s and %s\n", 804 fp->f_fullname, lp->f_fullname); 805 806 return (lp); 807 } 808 809 return (0); 810 } 811 812 /* 813 * routine: 814 * has_other_links 815 * 816 * purpose: 817 * to determine whether or not there is more that one link to a 818 * particular file. We are willing to delete a link to a file 819 * that has changed if we will still have other links to it. 820 * The trick here is that we only care about links under our 821 * dominion. 822 * 823 * parameters: 824 * file pointer to node we are interested in 825 * which side we are looking to additional links on 826 * 827 * returns: 828 * TRUE if there are multiple links 829 * FALSE if this is the only one we know of 830 */ 831 bool_t 832 has_other_links(struct file *fp, side_t srcdst) 833 { struct file *lp; 834 struct fileinfo *fip, *lip; 835 836 fip = &fp->f_info[srcdst]; 837 838 /* if the link count is one, there couldn't be others */ 839 if (fip->f_nlink < 2) 840 return (FALSE); 841 842 /* look for any other files for the same inode */ 843 for (lp = changes; lp; lp = lp->f_rnext) { 844 /* finding the same node doesn't count */ 845 if (fp == lp) 846 continue; 847 848 lip = &lp->f_info[srcdst]; 849 850 /* 851 * file must still exist on this side 852 */ 853 if (lip->f_mode == 0) 854 continue; 855 856 /* 857 * if this is indeed a link, then the prospective file on 858 * the changed side will have the same dev/inum as the file 859 * we are looking for 860 */ 861 if (lip->f_d_maj != fip->f_d_maj) 862 continue; 863 if (lip->f_d_min != fip->f_d_min) 864 continue; 865 if (lip->f_ino != fip->f_ino) 866 continue; 867 868 /* 869 * we have found at least one other link 870 */ 871 return (TRUE); 872 } 873 874 return (FALSE); 875 } 876 877 /* 878 * routine: 879 * link_update 880 * 881 * purpose: 882 * to propoagate a stat change to all other file nodes that 883 * correspond to the same I-node on the changed side 884 * 885 * parameters: 886 * file pointer for the updated file 887 * which side was changed 888 * 889 * returns: 890 * void 891 * 892 * notes: 893 * if we have copied onto a file, we have copied onto all 894 * of its links, but since we do all stats before we do any 895 * copies, the stat information recently collected for links 896 * is no longer up-to-date, and this would result in incorrect 897 * reconciliation (redundant copies). 898 * 899 * There is an assumption here that all links to a changed 900 * file will be in the change list. This is true for almost 901 * all cases not involving restriction. If we do fail to 902 * update the baseline for a file that was off the change list, 903 * the worst that is likely to happen is that we will think 904 * it changed later (but will almost surely find that both 905 * copies agree). 906 */ 907 void 908 link_update(struct file *fp, side_t which) 909 { struct file *lp; 910 911 for (lp = changes; lp; lp = lp->f_rnext) { 912 /* finding the current entry doesn't count */ 913 if (lp == fp) 914 continue; 915 916 /* look for same i#, maj, min on changed side */ 917 if (lp->f_info[which].f_ino != fp->f_info[which].f_ino) 918 continue; 919 if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj) 920 continue; 921 if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min) 922 continue; 923 924 /* 925 * this appears to be another link to the same file 926 * so the updated stat information for one must be 927 * correct for the other. 928 */ 929 lp->f_info[which].f_type = fp->f_info[which].f_type; 930 lp->f_info[which].f_size = fp->f_info[which].f_size; 931 lp->f_info[which].f_mode = fp->f_info[which].f_mode; 932 lp->f_info[which].f_uid = fp->f_info[which].f_uid; 933 lp->f_info[which].f_gid = fp->f_info[which].f_gid; 934 lp->f_info[which].f_modtime = fp->f_info[which].f_modtime; 935 lp->f_info[which].f_modns = fp->f_info[which].f_modns; 936 lp->f_info[which].f_nlink = fp->f_info[which].f_nlink; 937 lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj; 938 lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min; 939 940 if (opt_debug & DBG_STAT) 941 fprintf(stderr, 942 "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n", 943 lp->f_name, lp->f_info[which].f_modtime, 944 lp->f_info[which].f_modns); 945 } 946 } 947 948 /* 949 * routine: 950 * queue_file 951 * 952 * purpose: 953 * append a file to the list of needed reconciliations 954 * 955 * parameters: 956 * pointer to file 957 * 958 * notes: 959 * when a request is appended to the reconciliation list, 960 * we fill in the full name. We delayed this in hopes that 961 * it wouldn't be necessary (saving cycles and memory) 962 * 963 * There is some funny business with modification times. 964 * In general, we queue files in order of the latest modification 965 * time so that propagations preserve relative ordering. There 966 * are, however, a few important exceptions: 967 * 1. all directory creations happen at time zero, 968 * so that they are created before any files can 969 * be added to them. 970 * 2. all directory deletions happen at time infinity-depth, 971 * so that everything else can be removed before the 972 * directories themselves are removed. 973 * 3. all file deletions happen at time infinity-depth 974 * so that (in renames) the links will preceed the unlinks. 975 */ 976 static void 977 queue_file(struct file *fp) 978 { struct file **pp, *np; 979 980 #define TIME_ZERO 0L /* the earliest possible time */ 981 #define TIME_LONG 0x7FFFFFFF /* the latest possible time */ 982 983 /* 984 * figure out the modification time for sequencing purposes 985 */ 986 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) { 987 /* 988 * deletions are performed last, and depth first 989 */ 990 fp->f_modtime = TIME_LONG - fp->f_depth; 991 } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR && 992 fp->f_info[OPT_DST].f_type != S_IFDIR) { 993 /* 994 * for most files we use the latest mod time 995 */ 996 fp->f_modtime = fp->f_info[OPT_SRC].f_modtime; 997 fp->f_modns = fp->f_info[OPT_SRC].f_modns; 998 if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) { 999 fp->f_modtime = fp->f_info[OPT_DST].f_modtime; 1000 fp->f_modns = fp->f_info[OPT_DST].f_modns; 1001 } 1002 } else { 1003 /* 1004 * new directory creations need to happen before anything 1005 * else and are automatically sequenced in traversal order 1006 */ 1007 fp->f_modtime = TIME_ZERO; 1008 } 1009 1010 /* 1011 * insertion is time ordered, and for equal times, 1012 * insertions is in (pre-order) traversal order 1013 */ 1014 for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) { 1015 if (fp->f_modtime > np->f_modtime) 1016 continue; 1017 if (fp->f_modtime < np->f_modtime) 1018 break; 1019 if (fp->f_modns < np->f_modns) 1020 break; 1021 } 1022 1023 fp->f_fullname = strdup(get_name(fp)); 1024 fp->f_rnext = np; 1025 *pp = fp; 1026 } 1027 1028 1029 /* 1030 * routines: 1031 * push_name/pop_name/get_name 1032 * 1033 * purpose: 1034 * maintain a name stack so we can form name of a particular file 1035 * as the concatenation of all of the names between it and the 1036 * (know to be fully qualified) base directory. 1037 * 1038 * notes: 1039 * we go to this trouble because most files never change and 1040 * so we don't need to associate full names with every one. 1041 * This stack is maintained during analysis, and if we decide 1042 * to add a file to the reconciliation list, we can use the 1043 * stack to generate a fully qualified name at that time. 1044 * 1045 * we compress out '/./' when we return a name. Given that the 1046 * stack was built by a tree walk, the only place a /./ should 1047 * appear is at the first level after the base ... but there 1048 * are legitimate ways for them to appear there. 1049 * 1050 * these names can get deep, so we dynamically size our name buffer 1051 */ 1052 static const char *namestack[ MAX_DEPTH + 1 ]; 1053 static int namedepth = 0; 1054 static int namelen = 0; 1055 1056 void 1057 push_name(const char *name) 1058 { 1059 namestack[ namedepth++ ] = name; 1060 namelen += 2 + strlen(name); 1061 1062 /* make sure we don't overflow our name stack */ 1063 if (namedepth >= MAX_DEPTH) { 1064 fprintf(stderr, gettext(ERR_deep), name); 1065 exit(ERR_OTHER); 1066 } 1067 } 1068 1069 void 1070 pop_name(void) 1071 { 1072 namelen -= 2 + strlen(namestack[--namedepth]); 1073 namestack[ namedepth ] = 0; 1074 1075 #ifdef DBG_ERRORS 1076 /* just a little sanity check here */ 1077 if (namedepth <= 0) { 1078 if (namedepth < 0) { 1079 fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n"); 1080 exit(ERR_OTHER); 1081 } else if (namelen != 0) { 1082 fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n"); 1083 exit(ERR_OTHER); 1084 } 1085 } 1086 #endif 1087 } 1088 1089 char 1090 *get_name(struct file *fp) 1091 { int i; 1092 static char *namebuf = 0; 1093 static int buflen = 0; 1094 1095 /* make sure we have an adequate buffer */ 1096 i = namelen + 1 + strlen(fp->f_name); 1097 if (buflen < i) { 1098 for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME); 1099 namebuf = (char *) realloc(namebuf, buflen); 1100 } 1101 1102 /* assemble the name */ 1103 namebuf[0] = 0; 1104 for (i = 0; i < namedepth; i++) { 1105 if (strcmp(namestack[i], ".")) { 1106 strcat(namebuf, namestack[i]); 1107 strcat(namebuf, "/"); 1108 } 1109 } 1110 1111 strcat(namebuf, fp->f_name); 1112 1113 return (namebuf); 1114 } 1115