1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved
24 *
25 * module:
26 * anal.c
27 *
28 * purpose:
29 * routines to analyze the file trees and figure out what has changed
30 * and queue files for reconciliation. It also contains tree enumeration
31 * routines to for other purposes (pruning and link location).
32 *
33 * contents:
34 *
35 * change analysis:
36 * analyze .... (top level) analyze all files in the tree for changes
37 * summary .... print out change/reconciliation statistics for each base
38 * check_file . (static) look for changes and queue file for reconciliation
39 * check_changes (static) figure out if a particular file has changed
40 * queue_file . (static) add a file to the reconciliation list
41 *
42 * other tree enumeration functions:
43 * prune_file . (static) recursive descent and actual pruning
44 * prune ...... (top level) initiate pruning analysis for nonexistant files
45 * find_link .. look for other files to which a file may be a link
46 * link_update. propagate changed stat info to all other links
47 * same_name .. (static) figure out if two nodes describe same file
48 *
49 * misc:
50 * push_name .. maintain a running full pathname as we descend
51 * pop_name ... maintain a running full pathname as we pop back
52 * get_name ... return full pathname for the current file
53 *
54 * notes:
55 * analysis is limited to files that were evaluated in the previous
56 * pass ... since we don't have complete information about files that
57 * were not evaluated in the previous pass.
58 */
59
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <strings.h>
63
64 #include "messages.h"
65 #include "filesync.h"
66 #include "database.h"
67 #include "debug.h"
68
69 /*
70 * routines:
71 */
72 void push_name(const char *);
73 void pop_name();
74 char *get_name(struct file *);
75 static errmask_t check_file(struct file *fp);
76 static diffmask_t check_changes(struct file *fp, int first, int second);
77 static int prune_file(struct file *fp);
78 static void queue_file(struct file *fp);
79
80 /*
81 * globals
82 */
83 static struct file *changes; /* list of files to be reconciled */
84
85 static long total_files; /* total number of files being considered */
86 static long est_deletes; /* estimated number of files to be deleted */
87 static long est_rmdirs; /* est rmdirs of non-empty directories */
88
89 int inum_changes; /* LISTed directories whose I#s changed */
90
91 /*
92 * routine:
93 * analyze
94 *
95 * purpose:
96 * top level routine for the analysis/reconciliation process
97 *
98 * parameters:
99 * none
100 *
101 * returns:
102 * error mask
103 *
104 * notes:
105 * a critical side effect of this routine is the creation of
106 * the reconciliation list, an ordered list of files that
107 * needed to be processed in the subsequent reconciliation pass
108 */
109 errmask_t
analyze()110 analyze()
111 { struct base *bp;
112 struct file *fp;
113 int errs = 0;
114 int err;
115 int percentage;
116 bool_t aborted = FALSE;
117 char msgbuf[MAX_LINE];
118
119 /*
120 * run through all bases and directories looking for files
121 * that have been renamed. This must be done before the
122 * difference analysis because a directory rename can introduce
123 * radical restructuring into a name-based tree.
124 */
125 for (bp = bases; bp; bp = bp->b_next) {
126 for (fp = bp->b_files; fp; fp = fp->f_next)
127 if (fp->f_flags & F_EVALUATE)
128 errs |= find_renames(fp);
129 }
130
131 /*
132 * run through all bases and files looking for candidates
133 * note, however that we only descend into trees that have
134 * the evaluate flag turned on. As a result of new rules or
135 * restriction arguments, we may be deliberatly ignoring
136 * large amounts of the baseline. This means we won't do
137 * any stats to update the information in those nodes, and
138 * they will be written back just as they were.
139 *
140 * note that there is code to prune out baseline nodes for
141 * files that no longer exist, but that code is in reconcile
142 * and will never get a chance to run on nodes that aren't
143 * analyzed.
144 *
145 * we also want to run though all nodes with STAT errors
146 * so that we can put them on the reconciliation list.
147 */
148 for (bp = bases; bp; bp = bp->b_next) {
149 for (fp = bp->b_files; fp; fp = fp->f_next)
150 if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
151 errs |= check_file(fp);
152 }
153
154 /*
155 * my greatest fear is that someday, somehow, by messing with
156 * variables or baselines or who-knows-what, that someone will
157 * run a reconciliation against a large tree that doesn't correspond
158 * to the baseline, and I will infer that a bazillion files have
159 * been deleted and will propagate the slaughter before anyone
160 * can say somebody stop that maniac.
161 *
162 * in order to prevent such a possibility, we have a few different
163 * sanity checks. There is, of course, a tradeoff here between
164 * danger and irritation. The current set of heuristics for whether
165 * or not to generate a warning are (any of)
166 *
167 * at least CONFIRM_MIN files have been deleted AND
168 * CONFIRM_PCT of all files have been deleted
169 *
170 * the inode number on a LISTed directory has changed
171 *
172 * a non-empty directory has been deleted.
173 */
174 msgbuf[0] = 0;
175
176 percentage = (est_deletes * 100) / (total_files ? total_files : 1);
177 if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
178 sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
179 else if (inum_changes > 0)
180 sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
181 else if (est_rmdirs)
182 sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
183
184 if (msgbuf[0])
185 confirm(msgbuf);
186
187 /*
188 * TRICK:
189 * the change list contains both files that have changed
190 * (and probably warrant reconciliation) and files that
191 * we couldn't get up-to-date stat information on. The
192 * latter files should just be flagged as being in conflict
193 * so they can be reported in the summary. The same is
194 * true of all subsequent files if we abort reconciliation.
195 */
196 for (fp = changes; fp; fp = fp->f_rnext)
197 if (aborted || (fp->f_flags & F_STAT_ERROR)) {
198 fp->f_flags |= F_CONFLICT;
199 /* if it isn't in the baseline yet, don't add it */
200 if ((fp->f_flags & F_IN_BASELINE) == 0)
201 fp->f_flags |= F_REMOVE;
202 fp->f_problem = aborted ? PROB_aborted : PROB_restat;
203 (fp->f_base)->b_unresolved++;
204 errs |= ERR_UNRESOLVED;
205 if (opt_verbose)
206 fprintf(stdout,
207 gettext(aborted ? V_suppressed
208 : V_nostat),
209 fp->f_fullname);
210 } else {
211 err = reconcile(fp);
212 errs |= err;
213 if (opt_halt && (err & ERR_ABORT)) {
214 fprintf(stderr, gettext(ERR_abort_h));
215 aborted = TRUE;
216 }
217 }
218
219 return (errs);
220 }
221
222 /*
223 * routine:
224 * prune_file
225 *
226 * purpose:
227 * to look for file entries that should be pruned from baseline
228 * prune the current file if it needs pruning, and recursively
229 * descend if it is a directory.
230 *
231 * parameters:
232 * pointer to file node
233 */
234 static int
prune_file(struct file * fp)235 prune_file(struct file *fp)
236 { struct file *cp;
237 int prunes = 0;
238
239 /* if node hasn't been evaluated, mark it for removal */
240 if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
241 fp->f_flags |= F_REMOVE;
242 prunes++;
243 if (opt_debug & DBG_ANAL)
244 fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
245 }
246
247 /* now check our children */
248 for (cp = fp->f_files; cp; cp = cp->f_next)
249 prunes += prune_file(cp);
250
251 return (prunes);
252 }
253
254 /*
255 * routine:
256 * prune
257 *
258 * purpose:
259 * to prune the baseline of entries that no longer correspond to
260 * existing rules.
261 *
262 * notes:
263 * This routine just calls prune_file on the top of each base tree.
264 */
265 int
prune()266 prune()
267 { struct base *bp;
268 struct file *fp;
269 int prunes = 0;
270
271 for (bp = bases; bp; bp = bp->b_next) {
272 for (fp = bp->b_files; fp; fp = fp->f_next)
273 prunes += prune_file(fp);
274
275 if ((bp->b_flags & F_EVALUATE) == 0)
276 bp->b_flags |= F_REMOVE;
277 }
278
279 return (prunes);
280 }
281
282 /*
283 * routine:
284 * summary
285 *
286 * purpose:
287 * to print out statics and conflict lists
288 */
289 void
summary()290 summary()
291 { struct base *bp;
292 struct file *fp;
293 extern bool_t need_super;
294
295 (void) fflush(stdout);
296
297 for (bp = bases; bp; bp = bp->b_next) {
298
299 /* see if this base was irrelevant */
300 if ((bp->b_flags & F_EVALUATE) == 0)
301 continue;
302
303 /* print out a summary for this base */
304 fprintf(stderr, gettext(SUM_hd),
305 bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
306 fprintf(stderr, gettext(SUM_dst),
307 bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
308 fprintf(stderr, gettext(SUM_src),
309 bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
310 if (bp->b_unresolved)
311 fprintf(stderr, gettext(SUM_unresolved),
312 bp->b_unresolved);
313
314
315 /* print out a list of unreconciled files for this base */
316 for (fp = changes; fp; fp = fp->f_rnext) {
317 if (fp->f_base != bp)
318 continue;
319 if ((fp->f_flags & F_CONFLICT) == 0)
320 continue;
321 fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
322 fp->f_problem ? fp->f_problem : "???");
323 }
324
325 fprintf(stderr, "\n");
326 }
327
328 if (need_super)
329 fprintf(stderr, gettext(WARN_super));
330 }
331
332 /*
333 * routine:
334 * check_file
335 *
336 * purpose:
337 * figure out if a file requires reconciliation and recursively
338 * descend into all sub-files and directories
339 *
340 * parameters:
341 * base pointer
342 * file pointer
343 *
344 * returns:
345 * error mask
346 * built up changes needed list
347 * updated statistics
348 *
349 * notes:
350 * this routine builds up a path name as it descends through
351 * the tree (see push_name, pop_name, get_name).
352 */
353 static errmask_t
check_file(struct file * fp)354 check_file(struct file *fp)
355 { struct file *cp;
356 int errs = 0;
357
358 if ((fp->f_flags & F_STAT_ERROR) == 0) {
359 /* see if the source has changed */
360 fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime;
361 fp->f_info[OPT_BASE].f_ino = fp->f_s_inum;
362 fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj;
363 fp->f_info[OPT_BASE].f_d_min = fp->f_s_min;
364 fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink;
365 fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
366
367 /* see if the destination has changed */
368 fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime;
369 fp->f_info[OPT_BASE].f_ino = fp->f_d_inum;
370 fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj;
371 fp->f_info[OPT_BASE].f_d_min = fp->f_d_min;
372 fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink;
373 fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
374
375 /* if nobody thinks the file exists, baseline needs pruning */
376 if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
377 fp->f_srcdiffs |= D_DELETE;
378 fp->f_dstdiffs |= D_DELETE;
379 }
380
381 /* keep track of possible deletions to look for trouble */
382 if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
383 est_deletes++;
384
385 /* see if file is (or has been) a non-empty directory */
386 if (fp->f_files)
387 est_rmdirs++;
388 }
389 }
390
391 /* if we found differences, queue the file for reconciliation */
392 if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
393 queue_file(fp);
394
395 if (opt_debug & DBG_ANAL) {
396 fprintf(stderr, "ANAL: src=%s",
397 showflags(diffmap, fp->f_srcdiffs));
398 fprintf(stderr, " dst=%s",
399 showflags(diffmap, fp->f_dstdiffs));
400 fprintf(stderr, " flgs=%s",
401 showflags(fileflags, fp->f_flags));
402 fprintf(stderr, " name=%s\n", fp->f_fullname);
403 }
404 }
405
406 /* bump the total file count */
407 fp->f_base->b_totfiles++;
408 total_files++;
409
410 /* if this is not a directory, we're done */
411 if (fp->f_files == 0)
412 return (errs);
413
414 /*
415 * If this is a directory, we need to recursively analyze
416 * our children, but only children who have been evaluated.
417 * If a node has not been evaluated, then we don't have
418 * updated stat information and there is nothing to analyze.
419 *
420 * we also want to run though all nodes with STAT errors
421 * so that we can put them on the reconciliation list.
422 * If a directory is unreadable on one side, all files
423 * under that directory (ON BOTH SIDES) must be marked as
424 * blocked by stat errors.
425 */
426 push_name(fp->f_name);
427
428 for (cp = fp->f_files; cp; cp = cp->f_next) {
429 if (fp->f_flags & F_STAT_ERROR)
430 cp->f_flags |= F_STAT_ERROR;
431 if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
432 errs |= check_file(cp);
433 }
434
435 pop_name();
436
437 return (errs);
438 }
439
440 /*
441 * routine:
442 * check_changes
443 *
444 * purpose:
445 * to figure out what has changed for a specific file
446 *
447 * parameters:
448 * file pointer
449 * the reference info
450 * the info to be checked for changes
451 *
452 * returns:
453 * diff mask
454 *
455 * notes:
456 * this routine doesn't pretend to understand what happened.
457 * it merely enumerates the ways in which the files differ.
458 */
459 static diffmask_t
check_changes(struct file * fp,int ref,int new)460 check_changes(struct file *fp, int ref, int new)
461 { struct fileinfo *rp, *np;
462 int mask = 0;
463 int type;
464
465 rp = &fp->f_info[ref];
466 np = &fp->f_info[new];
467
468 if (np->f_uid != rp->f_uid)
469 mask |= D_UID;
470
471 if (np->f_gid != rp->f_gid)
472 mask |= D_GID;
473
474 if (np->f_mode != rp->f_mode)
475 mask |= D_PROT;
476
477 type = np->f_type;
478 if (type != rp->f_type) {
479 if (type == 0)
480 mask |= D_DELETE;
481 else if (rp->f_type == 0)
482 mask |= D_CREATE;
483 else
484 mask |= D_TYPE;
485 } else if (type == S_IFBLK || type == S_IFCHR) {
486 /*
487 * for special files, we only look at the maj/min
488 */
489 if (np->f_rd_maj != rp->f_rd_maj)
490 mask |= D_SIZE;
491 if (np->f_rd_min != rp->f_rd_min)
492 mask |= D_SIZE;
493 } else if (type != S_IFDIR) {
494 /*
495 * for directories, we don't look directly at
496 * the contents, so these fields don't mean
497 * anything. If the directories have changed
498 * in any interesting way, we'll find it by
499 * walking the tree.
500 */
501 if (np->f_modtime > rp->f_modtime)
502 mask |= D_MTIME;
503
504 if (np->f_size != rp->f_size)
505 mask |= D_SIZE;
506
507 if (np->f_nlink != rp->f_nlink)
508 mask |= D_LINKS;
509 }
510
511 if (cmp_acls(rp, np) == 0)
512 mask |= D_FACLS;
513
514 return (mask);
515 }
516
517 /*
518 * routine:
519 * same_name
520 *
521 * purpose:
522 * to figure out whether or not two databsae nodes actually refer to
523 * the same file.
524 *
525 * parameters:
526 * pointers to two file description nodes
527 * which side we should check
528 *
529 * returns:
530 * TRUE/FALSE
531 *
532 * notes:
533 * if a single directory is specified in multiple base pairs, it
534 * is possible to have multiple nodes in the database describing
535 * the same file. This routine is supposed to detect those cases.
536 *
537 * what should be a trivial string comparison is complicated by
538 * the possibility that the two nodes might describe the same file
539 * from base directories at different depths. Thus, rather than
540 * comparing two strings, we really want to compare the concatenation
541 * of two pairs of strings. Unfortunately calling full_name would
542 * be awkward right now, so instead we have our own comparison
543 * routine that automatically skips from the first string to
544 * the second.
545 */
546 static bool_t
same_name(struct file * f1,struct file * f2,side_t srcdst)547 same_name(struct file *f1, struct file *f2, side_t srcdst)
548 {
549 char *s1, *s2, *x1, *x2;
550
551 if (srcdst == OPT_SRC) {
552 s1 = (f1->f_base)->b_src_name;
553 s2 = (f2->f_base)->b_src_name;
554 } else {
555 s1 = (f1->f_base)->b_dst_name;
556 s2 = (f2->f_base)->b_dst_name;
557 }
558 x1 = f1->f_fullname;
559 x2 = f2->f_fullname;
560
561 /*
562 * Compare the two names, and if they differ before they end
563 * this is a non-match. If they both end at the same time,
564 * this is a match.
565 *
566 * The trick here is that each string is actually the logical
567 * concatenation of two strings, and we need to automatically
568 * wrap from the first to the second string in each pair. There
569 * is no requirement that the two (concatenated) strings be
570 * broken at the same point, so we have a slightly baroque
571 * comparsion loop.
572 */
573 while (*s1 && *s1 == *s2) {
574
575 /*
576 * strings have been identical so far, so advance the
577 * pointers and continue the comparison. The trick
578 * is that when either string ends, we have to wrap
579 * over to its extension.
580 */
581 s1++; s2++;
582 if (*s1 && *s2)
583 continue;
584
585 /*
586 * at least one of the strings has ended.
587 * there is an implicit slash between the string
588 * and its extension, and this has to be matched
589 * against the other string.
590 */
591 if (*s1 != *s2) {
592 if (*s1 == 0 && *s2 == '/')
593 s2++;
594 else if (*s2 == 0 && *s1 == '/')
595 s1++;
596 else
597 /* the disagreement doesn't come at a slash */
598 break;
599 }
600
601 /*
602 * if either string has ended, wrap to its extension
603 */
604 if (*s1 == 0 && x1 != 0) {
605 s1 = x1;
606 x1 = 0;
607 }
608 if (*s2 == 0 && x2 != 0) {
609 s2 = x2;
610 x2 = 0;
611 }
612 }
613
614 return (*s1 == *s2);
615 }
616
617 /*
618 * routine:
619 * find_link
620 *
621 * purpose:
622 * to figure out if there is a file to which we should
623 * be creating a link (rather than making a copy)
624 *
625 * parameters:
626 * file node for the file to be created (that we hope is merely a link)
627 * which side is to be changed (src/dst)
628 *
629 * return:
630 * 0 no link is appropriate
631 * else pointer to file node for link referent
632 *
633 * notes:
634 * there are a few strange heuristics in this routine and I
635 * wouldn't bet my soul that I got all of them right. The general
636 * theory is that when a new file is created, we look to see if it
637 * is a link to another file on the changed side, and if it is, we
638 * find the corresponding file on the unchanged side.
639 *
640 * cases we want to be able to handle:
641 * 1. one or more links are created to a prexisting file
642 * 2. a preexisting only link is renamed
643 * 3. a rename of one of multiple links to a preexisting file
644 * 4. a single file is created with multiple links
645 */
646 struct file *
find_link(struct file * fp,side_t srcdst)647 find_link(struct file *fp, side_t srcdst)
648 { struct file *lp;
649 side_t chgside, tgtside;
650 struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
651
652 /* chg = side on which the change was noticed */
653 /* tgt = side to which the change is to be propagated */
654 chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
655 tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
656 fcp = &fp->f_info[chgside];
657 ftp = &fp->f_info[tgtside];
658
659 /*
660 * cases 1 and 3
661 *
662 * When a new link is created, we should be able to find
663 * another file in the changed hierarchy that has the same
664 * I-node number. We expect it to be on the changed list
665 * because the link count will have gone up or because all
666 * of the copies are new. If we find one, then the new file
667 * on the receiving file should be a link to the corresponding
668 * existing file.
669 *
670 * case 4
671 *
672 * the first link will be dealt with as a copy, but all
673 * subsequent links should find an existing file analogous
674 * to one of the links on the changed side, and create
675 * corresponding links on the other side.
676 *
677 * in each of these cases, there should be multiple links
678 * on the changed side. If the linkcount on the changed
679 * side is one, we needn't bother searching for other links.
680 */
681 if (fcp->f_nlink > 1)
682 for (lp = changes; lp; lp = lp->f_rnext) {
683 /* finding the same node doesn't count */
684 if (fp == lp)
685 continue;
686
687 tgtp = &lp->f_info[tgtside];
688 chgp = &lp->f_info[chgside];
689
690 /*
691 * if the file doesn't already exist on the target side
692 * we cannot make a link to it
693 */
694 if (tgtp->f_mode == 0)
695 continue;
696
697 /*
698 * if this is indeed a link, then the prospective file on
699 * the changed side will have the same dev/inum as the file
700 * we are looking for
701 */
702 if (fcp->f_d_maj != chgp->f_d_maj)
703 continue;
704 if (fcp->f_d_min != chgp->f_d_min)
705 continue;
706 if (fcp->f_ino != chgp->f_ino)
707 continue;
708
709 /*
710 * if the target side is already a link to this file,
711 * then there is no new link to be created
712 * FIX: how does this interact with copies over links
713 */
714 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
715 (ftp->f_d_min == tgtp->f_d_min) &&
716 (ftp->f_ino == tgtp->f_ino))
717 continue;
718
719 /*
720 * there is a pathological situation where a single file
721 * might appear under multiple base directories. This is
722 * damned awkward to detect in any other way, so we must
723 * check to see if we have just found another database
724 * instance for the same file (on the changed side).
725 */
726 if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
727 continue;
728
729 if (opt_debug & DBG_ANAL)
730 fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
731 fp->f_fullname, lp->f_fullname);
732
733 return (lp);
734 }
735
736 /*
737 * case 2: a simple rename of the only link
738 *
739 * In this case, there may not be any other existing file on
740 * the changed side that has the same I-node number. There
741 * might, however, be a record of such a file in the baseline.
742 * If we can find an identical file with a different name that
743 * has recently disappeared, we have a likely rename.
744 */
745 for (lp = changes; lp; lp = lp->f_rnext) {
746
747 /* finding the same node doesn't count */
748 if (fp == lp)
749 continue;
750
751 tgtp = &lp->f_info[tgtside];
752 chgp = &lp->f_info[chgside];
753
754 /*
755 * if the file still exists on the changed side this is
756 * not a simple rename, and in fact the previous pass
757 * would have found it.
758 */
759 if (chgp->f_mode != 0)
760 continue;
761
762 /*
763 * the inode number for the new link on the changed
764 * side must match the inode number for the old link
765 * from the baseline.
766 */
767 if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
768 : lp->f_s_maj))
769 continue;
770 if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
771 : lp->f_s_min))
772 continue;
773 if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
774 : lp->f_s_inum))
775 continue;
776
777 /* finding a file we are already linked to doesn't help */
778 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
779 (ftp->f_d_min == tgtp->f_d_min) &&
780 (ftp->f_ino == tgtp->f_ino))
781 continue;
782
783 /*
784 * there is a danger that we will confuse an
785 * inode reallocation with a rename. We should
786 * only consider this to be a rename if the
787 * new file is identical to the old one
788 */
789 basp = &lp->f_info[OPT_BASE];
790 if (fcp->f_type != basp->f_type)
791 continue;
792 if (fcp->f_size != basp->f_size)
793 continue;
794 if (fcp->f_mode != basp->f_mode)
795 continue;
796 if (fcp->f_uid != basp->f_uid)
797 continue;
798 if (fcp->f_gid != basp->f_gid)
799 continue;
800
801 if (opt_debug & DBG_ANAL)
802 fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
803 fp->f_fullname, lp->f_fullname);
804
805 return (lp);
806 }
807
808 return (0);
809 }
810
811 /*
812 * routine:
813 * has_other_links
814 *
815 * purpose:
816 * to determine whether or not there is more that one link to a
817 * particular file. We are willing to delete a link to a file
818 * that has changed if we will still have other links to it.
819 * The trick here is that we only care about links under our
820 * dominion.
821 *
822 * parameters:
823 * file pointer to node we are interested in
824 * which side we are looking to additional links on
825 *
826 * returns:
827 * TRUE if there are multiple links
828 * FALSE if this is the only one we know of
829 */
830 bool_t
has_other_links(struct file * fp,side_t srcdst)831 has_other_links(struct file *fp, side_t srcdst)
832 { struct file *lp;
833 struct fileinfo *fip, *lip;
834
835 fip = &fp->f_info[srcdst];
836
837 /* if the link count is one, there couldn't be others */
838 if (fip->f_nlink < 2)
839 return (FALSE);
840
841 /* look for any other files for the same inode */
842 for (lp = changes; lp; lp = lp->f_rnext) {
843 /* finding the same node doesn't count */
844 if (fp == lp)
845 continue;
846
847 lip = &lp->f_info[srcdst];
848
849 /*
850 * file must still exist on this side
851 */
852 if (lip->f_mode == 0)
853 continue;
854
855 /*
856 * if this is indeed a link, then the prospective file on
857 * the changed side will have the same dev/inum as the file
858 * we are looking for
859 */
860 if (lip->f_d_maj != fip->f_d_maj)
861 continue;
862 if (lip->f_d_min != fip->f_d_min)
863 continue;
864 if (lip->f_ino != fip->f_ino)
865 continue;
866
867 /*
868 * we have found at least one other link
869 */
870 return (TRUE);
871 }
872
873 return (FALSE);
874 }
875
876 /*
877 * routine:
878 * link_update
879 *
880 * purpose:
881 * to propoagate a stat change to all other file nodes that
882 * correspond to the same I-node on the changed side
883 *
884 * parameters:
885 * file pointer for the updated file
886 * which side was changed
887 *
888 * returns:
889 * void
890 *
891 * notes:
892 * if we have copied onto a file, we have copied onto all
893 * of its links, but since we do all stats before we do any
894 * copies, the stat information recently collected for links
895 * is no longer up-to-date, and this would result in incorrect
896 * reconciliation (redundant copies).
897 *
898 * There is an assumption here that all links to a changed
899 * file will be in the change list. This is true for almost
900 * all cases not involving restriction. If we do fail to
901 * update the baseline for a file that was off the change list,
902 * the worst that is likely to happen is that we will think
903 * it changed later (but will almost surely find that both
904 * copies agree).
905 */
906 void
link_update(struct file * fp,side_t which)907 link_update(struct file *fp, side_t which)
908 { struct file *lp;
909
910 for (lp = changes; lp; lp = lp->f_rnext) {
911 /* finding the current entry doesn't count */
912 if (lp == fp)
913 continue;
914
915 /* look for same i#, maj, min on changed side */
916 if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
917 continue;
918 if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
919 continue;
920 if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
921 continue;
922
923 /*
924 * this appears to be another link to the same file
925 * so the updated stat information for one must be
926 * correct for the other.
927 */
928 lp->f_info[which].f_type = fp->f_info[which].f_type;
929 lp->f_info[which].f_size = fp->f_info[which].f_size;
930 lp->f_info[which].f_mode = fp->f_info[which].f_mode;
931 lp->f_info[which].f_uid = fp->f_info[which].f_uid;
932 lp->f_info[which].f_gid = fp->f_info[which].f_gid;
933 lp->f_info[which].f_modtime = fp->f_info[which].f_modtime;
934 lp->f_info[which].f_modns = fp->f_info[which].f_modns;
935 lp->f_info[which].f_nlink = fp->f_info[which].f_nlink;
936 lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj;
937 lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min;
938
939 if (opt_debug & DBG_STAT)
940 fprintf(stderr,
941 "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
942 lp->f_name, lp->f_info[which].f_modtime,
943 lp->f_info[which].f_modns);
944 }
945 }
946
947 /*
948 * routine:
949 * queue_file
950 *
951 * purpose:
952 * append a file to the list of needed reconciliations
953 *
954 * parameters:
955 * pointer to file
956 *
957 * notes:
958 * when a request is appended to the reconciliation list,
959 * we fill in the full name. We delayed this in hopes that
960 * it wouldn't be necessary (saving cycles and memory)
961 *
962 * There is some funny business with modification times.
963 * In general, we queue files in order of the latest modification
964 * time so that propagations preserve relative ordering. There
965 * are, however, a few important exceptions:
966 * 1. all directory creations happen at time zero,
967 * so that they are created before any files can
968 * be added to them.
969 * 2. all directory deletions happen at time infinity-depth,
970 * so that everything else can be removed before the
971 * directories themselves are removed.
972 * 3. all file deletions happen at time infinity-depth
973 * so that (in renames) the links will preceed the unlinks.
974 */
975 static void
queue_file(struct file * fp)976 queue_file(struct file *fp)
977 { struct file **pp, *np;
978
979 #define TIME_ZERO 0L /* the earliest possible time */
980 #define TIME_LONG 0x7FFFFFFF /* the latest possible time */
981
982 /*
983 * figure out the modification time for sequencing purposes
984 */
985 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
986 /*
987 * deletions are performed last, and depth first
988 */
989 fp->f_modtime = TIME_LONG - fp->f_depth;
990 } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
991 fp->f_info[OPT_DST].f_type != S_IFDIR) {
992 /*
993 * for most files we use the latest mod time
994 */
995 fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
996 fp->f_modns = fp->f_info[OPT_SRC].f_modns;
997 if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
998 fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
999 fp->f_modns = fp->f_info[OPT_DST].f_modns;
1000 }
1001 } else {
1002 /*
1003 * new directory creations need to happen before anything
1004 * else and are automatically sequenced in traversal order
1005 */
1006 fp->f_modtime = TIME_ZERO;
1007 }
1008
1009 /*
1010 * insertion is time ordered, and for equal times,
1011 * insertions is in (pre-order) traversal order
1012 */
1013 for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
1014 if (fp->f_modtime > np->f_modtime)
1015 continue;
1016 if (fp->f_modtime < np->f_modtime)
1017 break;
1018 if (fp->f_modns < np->f_modns)
1019 break;
1020 }
1021
1022 fp->f_fullname = strdup(get_name(fp));
1023 fp->f_rnext = np;
1024 *pp = fp;
1025 }
1026
1027
1028 /*
1029 * routines:
1030 * push_name/pop_name/get_name
1031 *
1032 * purpose:
1033 * maintain a name stack so we can form name of a particular file
1034 * as the concatenation of all of the names between it and the
1035 * (know to be fully qualified) base directory.
1036 *
1037 * notes:
1038 * we go to this trouble because most files never change and
1039 * so we don't need to associate full names with every one.
1040 * This stack is maintained during analysis, and if we decide
1041 * to add a file to the reconciliation list, we can use the
1042 * stack to generate a fully qualified name at that time.
1043 *
1044 * we compress out '/./' when we return a name. Given that the
1045 * stack was built by a tree walk, the only place a /./ should
1046 * appear is at the first level after the base ... but there
1047 * are legitimate ways for them to appear there.
1048 *
1049 * these names can get deep, so we dynamically size our name buffer
1050 */
1051 static const char *namestack[ MAX_DEPTH + 1 ];
1052 static int namedepth = 0;
1053 static int namelen = 0;
1054
1055 void
push_name(const char * name)1056 push_name(const char *name)
1057 {
1058 namestack[ namedepth++ ] = name;
1059 namelen += 2 + strlen(name);
1060
1061 /* make sure we don't overflow our name stack */
1062 if (namedepth >= MAX_DEPTH) {
1063 fprintf(stderr, gettext(ERR_deep), name);
1064 exit(ERR_OTHER);
1065 }
1066 }
1067
1068 void
pop_name(void)1069 pop_name(void)
1070 {
1071 namelen -= 2 + strlen(namestack[--namedepth]);
1072 namestack[ namedepth ] = 0;
1073
1074 #ifdef DBG_ERRORS
1075 /* just a little sanity check here */
1076 if (namedepth <= 0) {
1077 if (namedepth < 0) {
1078 fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
1079 exit(ERR_OTHER);
1080 } else if (namelen != 0) {
1081 fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
1082 exit(ERR_OTHER);
1083 }
1084 }
1085 #endif
1086 }
1087
1088 char
get_name(struct file * fp)1089 *get_name(struct file *fp)
1090 { int i;
1091 static char *namebuf = 0;
1092 static int buflen = 0;
1093
1094 /* make sure we have an adequate buffer */
1095 i = namelen + 1 + strlen(fp->f_name);
1096 if (buflen < i) {
1097 for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
1098 namebuf = (char *) realloc(namebuf, buflen);
1099 }
1100
1101 /* assemble the name */
1102 namebuf[0] = 0;
1103 for (i = 0; i < namedepth; i++) {
1104 if (strcmp(namestack[i], ".")) {
1105 strcat(namebuf, namestack[i]);
1106 strcat(namebuf, "/");
1107 }
1108 }
1109
1110 strcat(namebuf, fp->f_name);
1111
1112 return (namebuf);
1113 }
1114