1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved
24 *
25 * module:
26 * anal.c
27 *
28 * purpose:
29 * routines to analyze the file trees and figure out what has changed
30 * and queue files for reconciliation. It also contains tree enumeration
31 * routines to for other purposes (pruning and link location).
32 *
33 * contents:
34 *
35 * change analysis:
36 * analyze .... (top level) analyze all files in the tree for changes
37 * summary .... print out change/reconciliation statistics for each base
38 * check_file . (static) look for changes and queue file for reconciliation
39 * check_changes (static) figure out if a particular file has changed
40 * queue_file . (static) add a file to the reconciliation list
41 *
42 * other tree enumeration functions:
43 * prune_file . (static) recursive descent and actual pruning
44 * prune ...... (top level) initiate pruning analysis for nonexistant files
45 * find_link .. look for other files to which a file may be a link
46 * link_update. propagate changed stat info to all other links
47 * same_name .. (static) figure out if two nodes describe same file
48 *
49 * misc:
50 * push_name .. maintain a running full pathname as we descend
51 * pop_name ... maintain a running full pathname as we pop back
52 * get_name ... return full pathname for the current file
53 *
54 * notes:
55 * analysis is limited to files that were evaluated in the previous
56 * pass ... since we don't have complete information about files that
57 * were not evaluated in the previous pass.
58 */
59 #pragma ident "%Z%%M% %I% %E% SMI"
60
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <strings.h>
64
65 #include "messages.h"
66 #include "filesync.h"
67 #include "database.h"
68 #include "debug.h"
69
70 /*
71 * routines:
72 */
73 void push_name(const char *);
74 void pop_name();
75 char *get_name(struct file *);
76 static errmask_t check_file(struct file *fp);
77 static diffmask_t check_changes(struct file *fp, int first, int second);
78 static int prune_file(struct file *fp);
79 static void queue_file(struct file *fp);
80
81 /*
82 * globals
83 */
84 static struct file *changes; /* list of files to be reconciled */
85
86 static long total_files; /* total number of files being considered */
87 static long est_deletes; /* estimated number of files to be deleted */
88 static long est_rmdirs; /* est rmdirs of non-empty directories */
89
90 int inum_changes; /* LISTed directories whose I#s changed */
91
92 /*
93 * routine:
94 * analyze
95 *
96 * purpose:
97 * top level routine for the analysis/reconciliation process
98 *
99 * parameters:
100 * none
101 *
102 * returns:
103 * error mask
104 *
105 * notes:
106 * a critical side effect of this routine is the creation of
107 * the reconciliation list, an ordered list of files that
108 * needed to be processed in the subsequent reconciliation pass
109 */
110 errmask_t
analyze()111 analyze()
112 { struct base *bp;
113 struct file *fp;
114 int errs = 0;
115 int err;
116 int percentage;
117 bool_t aborted = FALSE;
118 char msgbuf[MAX_LINE];
119
120 /*
121 * run through all bases and directories looking for files
122 * that have been renamed. This must be done before the
123 * difference analysis because a directory rename can introduce
124 * radical restructuring into a name-based tree.
125 */
126 for (bp = bases; bp; bp = bp->b_next) {
127 for (fp = bp->b_files; fp; fp = fp->f_next)
128 if (fp->f_flags & F_EVALUATE)
129 errs |= find_renames(fp);
130 }
131
132 /*
133 * run through all bases and files looking for candidates
134 * note, however that we only descend into trees that have
135 * the evaluate flag turned on. As a result of new rules or
136 * restriction arguments, we may be deliberatly ignoring
137 * large amounts of the baseline. This means we won't do
138 * any stats to update the information in those nodes, and
139 * they will be written back just as they were.
140 *
141 * note that there is code to prune out baseline nodes for
142 * files that no longer exist, but that code is in reconcile
143 * and will never get a chance to run on nodes that aren't
144 * analyzed.
145 *
146 * we also want to run though all nodes with STAT errors
147 * so that we can put them on the reconciliation list.
148 */
149 for (bp = bases; bp; bp = bp->b_next) {
150 for (fp = bp->b_files; fp; fp = fp->f_next)
151 if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
152 errs |= check_file(fp);
153 }
154
155 /*
156 * my greatest fear is that someday, somehow, by messing with
157 * variables or baselines or who-knows-what, that someone will
158 * run a reconciliation against a large tree that doesn't correspond
159 * to the baseline, and I will infer that a bazillion files have
160 * been deleted and will propagate the slaughter before anyone
161 * can say somebody stop that maniac.
162 *
163 * in order to prevent such a possibility, we have a few different
164 * sanity checks. There is, of course, a tradeoff here between
165 * danger and irritation. The current set of heuristics for whether
166 * or not to generate a warning are (any of)
167 *
168 * at least CONFIRM_MIN files have been deleted AND
169 * CONFIRM_PCT of all files have been deleted
170 *
171 * the inode number on a LISTed directory has changed
172 *
173 * a non-empty directory has been deleted.
174 */
175 msgbuf[0] = 0;
176
177 percentage = (est_deletes * 100) / (total_files ? total_files : 1);
178 if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
179 sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
180 else if (inum_changes > 0)
181 sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
182 else if (est_rmdirs)
183 sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
184
185 if (msgbuf[0])
186 confirm(msgbuf);
187
188 /*
189 * TRICK:
190 * the change list contains both files that have changed
191 * (and probably warrant reconciliation) and files that
192 * we couldn't get up-to-date stat information on. The
193 * latter files should just be flagged as being in conflict
194 * so they can be reported in the summary. The same is
195 * true of all subsequent files if we abort reconciliation.
196 */
197 for (fp = changes; fp; fp = fp->f_rnext)
198 if (aborted || (fp->f_flags & F_STAT_ERROR)) {
199 fp->f_flags |= F_CONFLICT;
200 /* if it isn't in the baseline yet, don't add it */
201 if ((fp->f_flags & F_IN_BASELINE) == 0)
202 fp->f_flags |= F_REMOVE;
203 fp->f_problem = aborted ? PROB_aborted : PROB_restat;
204 (fp->f_base)->b_unresolved++;
205 errs |= ERR_UNRESOLVED;
206 if (opt_verbose)
207 fprintf(stdout,
208 gettext(aborted ? V_suppressed
209 : V_nostat),
210 fp->f_fullname);
211 } else {
212 err = reconcile(fp);
213 errs |= err;
214 if (opt_halt && (err & ERR_ABORT)) {
215 fprintf(stderr, gettext(ERR_abort_h));
216 aborted = TRUE;
217 }
218 }
219
220 return (errs);
221 }
222
223 /*
224 * routine:
225 * prune_file
226 *
227 * purpose:
228 * to look for file entries that should be pruned from baseline
229 * prune the current file if it needs pruning, and recursively
230 * descend if it is a directory.
231 *
232 * parameters:
233 * pointer to file node
234 */
235 static int
prune_file(struct file * fp)236 prune_file(struct file *fp)
237 { struct file *cp;
238 int prunes = 0;
239
240 /* if node hasn't been evaluated, mark it for removal */
241 if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
242 fp->f_flags |= F_REMOVE;
243 prunes++;
244 if (opt_debug & DBG_ANAL)
245 fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
246 }
247
248 /* now check our children */
249 for (cp = fp->f_files; cp; cp = cp->f_next)
250 prunes += prune_file(cp);
251
252 return (prunes);
253 }
254
255 /*
256 * routine:
257 * prune
258 *
259 * purpose:
260 * to prune the baseline of entries that no longer correspond to
261 * existing rules.
262 *
263 * notes:
264 * This routine just calls prune_file on the top of each base tree.
265 */
266 int
prune()267 prune()
268 { struct base *bp;
269 struct file *fp;
270 int prunes = 0;
271
272 for (bp = bases; bp; bp = bp->b_next) {
273 for (fp = bp->b_files; fp; fp = fp->f_next)
274 prunes += prune_file(fp);
275
276 if ((bp->b_flags & F_EVALUATE) == 0)
277 bp->b_flags |= F_REMOVE;
278 }
279
280 return (prunes);
281 }
282
283 /*
284 * routine:
285 * summary
286 *
287 * purpose:
288 * to print out statics and conflict lists
289 */
290 void
summary()291 summary()
292 { struct base *bp;
293 struct file *fp;
294 extern bool_t need_super;
295
296 (void) fflush(stdout);
297
298 for (bp = bases; bp; bp = bp->b_next) {
299
300 /* see if this base was irrelevant */
301 if ((bp->b_flags & F_EVALUATE) == 0)
302 continue;
303
304 /* print out a summary for this base */
305 fprintf(stderr, gettext(SUM_hd),
306 bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
307 fprintf(stderr, gettext(SUM_dst),
308 bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
309 fprintf(stderr, gettext(SUM_src),
310 bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
311 if (bp->b_unresolved)
312 fprintf(stderr, gettext(SUM_unresolved),
313 bp->b_unresolved);
314
315
316 /* print out a list of unreconciled files for this base */
317 for (fp = changes; fp; fp = fp->f_rnext) {
318 if (fp->f_base != bp)
319 continue;
320 if ((fp->f_flags & F_CONFLICT) == 0)
321 continue;
322 fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
323 fp->f_problem ? fp->f_problem : "???");
324 }
325
326 fprintf(stderr, "\n");
327 }
328
329 if (need_super)
330 fprintf(stderr, gettext(WARN_super));
331 }
332
333 /*
334 * routine:
335 * check_file
336 *
337 * purpose:
338 * figure out if a file requires reconciliation and recursively
339 * descend into all sub-files and directories
340 *
341 * parameters:
342 * base pointer
343 * file pointer
344 *
345 * returns:
346 * error mask
347 * built up changes needed list
348 * updated statistics
349 *
350 * notes:
351 * this routine builds up a path name as it descends through
352 * the tree (see push_name, pop_name, get_name).
353 */
354 static errmask_t
check_file(struct file * fp)355 check_file(struct file *fp)
356 { struct file *cp;
357 int errs = 0;
358
359 if ((fp->f_flags & F_STAT_ERROR) == 0) {
360 /* see if the source has changed */
361 fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime;
362 fp->f_info[OPT_BASE].f_ino = fp->f_s_inum;
363 fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj;
364 fp->f_info[OPT_BASE].f_d_min = fp->f_s_min;
365 fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink;
366 fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
367
368 /* see if the destination has changed */
369 fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime;
370 fp->f_info[OPT_BASE].f_ino = fp->f_d_inum;
371 fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj;
372 fp->f_info[OPT_BASE].f_d_min = fp->f_d_min;
373 fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink;
374 fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
375
376 /* if nobody thinks the file exists, baseline needs pruning */
377 if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
378 fp->f_srcdiffs |= D_DELETE;
379 fp->f_dstdiffs |= D_DELETE;
380 }
381
382 /* keep track of possible deletions to look for trouble */
383 if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
384 est_deletes++;
385
386 /* see if file is (or has been) a non-empty directory */
387 if (fp->f_files)
388 est_rmdirs++;
389 }
390 }
391
392 /* if we found differences, queue the file for reconciliation */
393 if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
394 queue_file(fp);
395
396 if (opt_debug & DBG_ANAL) {
397 fprintf(stderr, "ANAL: src=%s",
398 showflags(diffmap, fp->f_srcdiffs));
399 fprintf(stderr, " dst=%s",
400 showflags(diffmap, fp->f_dstdiffs));
401 fprintf(stderr, " flgs=%s",
402 showflags(fileflags, fp->f_flags));
403 fprintf(stderr, " name=%s\n", fp->f_fullname);
404 }
405 }
406
407 /* bump the total file count */
408 fp->f_base->b_totfiles++;
409 total_files++;
410
411 /* if this is not a directory, we're done */
412 if (fp->f_files == 0)
413 return (errs);
414
415 /*
416 * If this is a directory, we need to recursively analyze
417 * our children, but only children who have been evaluated.
418 * If a node has not been evaluated, then we don't have
419 * updated stat information and there is nothing to analyze.
420 *
421 * we also want to run though all nodes with STAT errors
422 * so that we can put them on the reconciliation list.
423 * If a directory is unreadable on one side, all files
424 * under that directory (ON BOTH SIDES) must be marked as
425 * blocked by stat errors.
426 */
427 push_name(fp->f_name);
428
429 for (cp = fp->f_files; cp; cp = cp->f_next) {
430 if (fp->f_flags & F_STAT_ERROR)
431 cp->f_flags |= F_STAT_ERROR;
432 if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
433 errs |= check_file(cp);
434 }
435
436 pop_name();
437
438 return (errs);
439 }
440
441 /*
442 * routine:
443 * check_changes
444 *
445 * purpose:
446 * to figure out what has changed for a specific file
447 *
448 * parameters:
449 * file pointer
450 * the reference info
451 * the info to be checked for changes
452 *
453 * returns:
454 * diff mask
455 *
456 * notes:
457 * this routine doesn't pretend to understand what happened.
458 * it merely enumerates the ways in which the files differ.
459 */
460 static diffmask_t
check_changes(struct file * fp,int ref,int new)461 check_changes(struct file *fp, int ref, int new)
462 { struct fileinfo *rp, *np;
463 int mask = 0;
464 int type;
465
466 rp = &fp->f_info[ref];
467 np = &fp->f_info[new];
468
469 if (np->f_uid != rp->f_uid)
470 mask |= D_UID;
471
472 if (np->f_gid != rp->f_gid)
473 mask |= D_GID;
474
475 if (np->f_mode != rp->f_mode)
476 mask |= D_PROT;
477
478 type = np->f_type;
479 if (type != rp->f_type) {
480 if (type == 0)
481 mask |= D_DELETE;
482 else if (rp->f_type == 0)
483 mask |= D_CREATE;
484 else
485 mask |= D_TYPE;
486 } else if (type == S_IFBLK || type == S_IFCHR) {
487 /*
488 * for special files, we only look at the maj/min
489 */
490 if (np->f_rd_maj != rp->f_rd_maj)
491 mask |= D_SIZE;
492 if (np->f_rd_min != rp->f_rd_min)
493 mask |= D_SIZE;
494 } else if (type != S_IFDIR) {
495 /*
496 * for directories, we don't look directly at
497 * the contents, so these fields don't mean
498 * anything. If the directories have changed
499 * in any interesting way, we'll find it by
500 * walking the tree.
501 */
502 if (np->f_modtime > rp->f_modtime)
503 mask |= D_MTIME;
504
505 if (np->f_size != rp->f_size)
506 mask |= D_SIZE;
507
508 if (np->f_nlink != rp->f_nlink)
509 mask |= D_LINKS;
510 }
511
512 if (cmp_acls(rp, np) == 0)
513 mask |= D_FACLS;
514
515 return (mask);
516 }
517
518 /*
519 * routine:
520 * same_name
521 *
522 * purpose:
523 * to figure out whether or not two databsae nodes actually refer to
524 * the same file.
525 *
526 * parameters:
527 * pointers to two file description nodes
528 * which side we should check
529 *
530 * returns:
531 * TRUE/FALSE
532 *
533 * notes:
534 * if a single directory is specified in multiple base pairs, it
535 * is possible to have multiple nodes in the database describing
536 * the same file. This routine is supposed to detect those cases.
537 *
538 * what should be a trivial string comparison is complicated by
539 * the possibility that the two nodes might describe the same file
540 * from base directories at different depths. Thus, rather than
541 * comparing two strings, we really want to compare the concatenation
542 * of two pairs of strings. Unfortunately calling full_name would
543 * be awkward right now, so instead we have our own comparison
544 * routine that automatically skips from the first string to
545 * the second.
546 */
547 static bool_t
same_name(struct file * f1,struct file * f2,side_t srcdst)548 same_name(struct file *f1, struct file *f2, side_t srcdst)
549 {
550 char *s1, *s2, *x1, *x2;
551
552 if (srcdst == OPT_SRC) {
553 s1 = (f1->f_base)->b_src_name;
554 s2 = (f2->f_base)->b_src_name;
555 } else {
556 s1 = (f1->f_base)->b_dst_name;
557 s2 = (f2->f_base)->b_dst_name;
558 }
559 x1 = f1->f_fullname;
560 x2 = f2->f_fullname;
561
562 /*
563 * Compare the two names, and if they differ before they end
564 * this is a non-match. If they both end at the same time,
565 * this is a match.
566 *
567 * The trick here is that each string is actually the logical
568 * concatenation of two strings, and we need to automatically
569 * wrap from the first to the second string in each pair. There
570 * is no requirement that the two (concatenated) strings be
571 * broken at the same point, so we have a slightly baroque
572 * comparsion loop.
573 */
574 while (*s1 && *s1 == *s2) {
575
576 /*
577 * strings have been identical so far, so advance the
578 * pointers and continue the comparison. The trick
579 * is that when either string ends, we have to wrap
580 * over to its extension.
581 */
582 s1++; s2++;
583 if (*s1 && *s2)
584 continue;
585
586 /*
587 * at least one of the strings has ended.
588 * there is an implicit slash between the string
589 * and its extension, and this has to be matched
590 * against the other string.
591 */
592 if (*s1 != *s2) {
593 if (*s1 == 0 && *s2 == '/')
594 s2++;
595 else if (*s2 == 0 && *s1 == '/')
596 s1++;
597 else
598 /* the disagreement doesn't come at a slash */
599 break;
600 }
601
602 /*
603 * if either string has ended, wrap to its extension
604 */
605 if (*s1 == 0 && x1 != 0) {
606 s1 = x1;
607 x1 = 0;
608 }
609 if (*s2 == 0 && x2 != 0) {
610 s2 = x2;
611 x2 = 0;
612 }
613 }
614
615 return (*s1 == *s2);
616 }
617
618 /*
619 * routine:
620 * find_link
621 *
622 * purpose:
623 * to figure out if there is a file to which we should
624 * be creating a link (rather than making a copy)
625 *
626 * parameters:
627 * file node for the file to be created (that we hope is merely a link)
628 * which side is to be changed (src/dst)
629 *
630 * return:
631 * 0 no link is appropriate
632 * else pointer to file node for link referent
633 *
634 * notes:
635 * there are a few strange heuristics in this routine and I
636 * wouldn't bet my soul that I got all of them right. The general
637 * theory is that when a new file is created, we look to see if it
638 * is a link to another file on the changed side, and if it is, we
639 * find the corresponding file on the unchanged side.
640 *
641 * cases we want to be able to handle:
642 * 1. one or more links are created to a prexisting file
643 * 2. a preexisting only link is renamed
644 * 3. a rename of one of multiple links to a preexisting file
645 * 4. a single file is created with multiple links
646 */
647 struct file *
find_link(struct file * fp,side_t srcdst)648 find_link(struct file *fp, side_t srcdst)
649 { struct file *lp;
650 side_t chgside, tgtside;
651 struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
652
653 /* chg = side on which the change was noticed */
654 /* tgt = side to which the change is to be propagated */
655 chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
656 tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
657 fcp = &fp->f_info[chgside];
658 ftp = &fp->f_info[tgtside];
659
660 /*
661 * cases 1 and 3
662 *
663 * When a new link is created, we should be able to find
664 * another file in the changed hierarchy that has the same
665 * I-node number. We expect it to be on the changed list
666 * because the link count will have gone up or because all
667 * of the copies are new. If we find one, then the new file
668 * on the receiving file should be a link to the corresponding
669 * existing file.
670 *
671 * case 4
672 *
673 * the first link will be dealt with as a copy, but all
674 * subsequent links should find an existing file analogous
675 * to one of the links on the changed side, and create
676 * corresponding links on the other side.
677 *
678 * in each of these cases, there should be multiple links
679 * on the changed side. If the linkcount on the changed
680 * side is one, we needn't bother searching for other links.
681 */
682 if (fcp->f_nlink > 1)
683 for (lp = changes; lp; lp = lp->f_rnext) {
684 /* finding the same node doesn't count */
685 if (fp == lp)
686 continue;
687
688 tgtp = &lp->f_info[tgtside];
689 chgp = &lp->f_info[chgside];
690
691 /*
692 * if the file doesn't already exist on the target side
693 * we cannot make a link to it
694 */
695 if (tgtp->f_mode == 0)
696 continue;
697
698 /*
699 * if this is indeed a link, then the prospective file on
700 * the changed side will have the same dev/inum as the file
701 * we are looking for
702 */
703 if (fcp->f_d_maj != chgp->f_d_maj)
704 continue;
705 if (fcp->f_d_min != chgp->f_d_min)
706 continue;
707 if (fcp->f_ino != chgp->f_ino)
708 continue;
709
710 /*
711 * if the target side is already a link to this file,
712 * then there is no new link to be created
713 * FIX: how does this interact with copies over links
714 */
715 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
716 (ftp->f_d_min == tgtp->f_d_min) &&
717 (ftp->f_ino == tgtp->f_ino))
718 continue;
719
720 /*
721 * there is a pathological situation where a single file
722 * might appear under multiple base directories. This is
723 * damned awkward to detect in any other way, so we must
724 * check to see if we have just found another database
725 * instance for the same file (on the changed side).
726 */
727 if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
728 continue;
729
730 if (opt_debug & DBG_ANAL)
731 fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
732 fp->f_fullname, lp->f_fullname);
733
734 return (lp);
735 }
736
737 /*
738 * case 2: a simple rename of the only link
739 *
740 * In this case, there may not be any other existing file on
741 * the changed side that has the same I-node number. There
742 * might, however, be a record of such a file in the baseline.
743 * If we can find an identical file with a different name that
744 * has recently disappeared, we have a likely rename.
745 */
746 for (lp = changes; lp; lp = lp->f_rnext) {
747
748 /* finding the same node doesn't count */
749 if (fp == lp)
750 continue;
751
752 tgtp = &lp->f_info[tgtside];
753 chgp = &lp->f_info[chgside];
754
755 /*
756 * if the file still exists on the changed side this is
757 * not a simple rename, and in fact the previous pass
758 * would have found it.
759 */
760 if (chgp->f_mode != 0)
761 continue;
762
763 /*
764 * the inode number for the new link on the changed
765 * side must match the inode number for the old link
766 * from the baseline.
767 */
768 if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
769 : lp->f_s_maj))
770 continue;
771 if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
772 : lp->f_s_min))
773 continue;
774 if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
775 : lp->f_s_inum))
776 continue;
777
778 /* finding a file we are already linked to doesn't help */
779 if ((ftp->f_d_maj == tgtp->f_d_maj) &&
780 (ftp->f_d_min == tgtp->f_d_min) &&
781 (ftp->f_ino == tgtp->f_ino))
782 continue;
783
784 /*
785 * there is a danger that we will confuse an
786 * inode reallocation with a rename. We should
787 * only consider this to be a rename if the
788 * new file is identical to the old one
789 */
790 basp = &lp->f_info[OPT_BASE];
791 if (fcp->f_type != basp->f_type)
792 continue;
793 if (fcp->f_size != basp->f_size)
794 continue;
795 if (fcp->f_mode != basp->f_mode)
796 continue;
797 if (fcp->f_uid != basp->f_uid)
798 continue;
799 if (fcp->f_gid != basp->f_gid)
800 continue;
801
802 if (opt_debug & DBG_ANAL)
803 fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
804 fp->f_fullname, lp->f_fullname);
805
806 return (lp);
807 }
808
809 return (0);
810 }
811
812 /*
813 * routine:
814 * has_other_links
815 *
816 * purpose:
817 * to determine whether or not there is more that one link to a
818 * particular file. We are willing to delete a link to a file
819 * that has changed if we will still have other links to it.
820 * The trick here is that we only care about links under our
821 * dominion.
822 *
823 * parameters:
824 * file pointer to node we are interested in
825 * which side we are looking to additional links on
826 *
827 * returns:
828 * TRUE if there are multiple links
829 * FALSE if this is the only one we know of
830 */
831 bool_t
has_other_links(struct file * fp,side_t srcdst)832 has_other_links(struct file *fp, side_t srcdst)
833 { struct file *lp;
834 struct fileinfo *fip, *lip;
835
836 fip = &fp->f_info[srcdst];
837
838 /* if the link count is one, there couldn't be others */
839 if (fip->f_nlink < 2)
840 return (FALSE);
841
842 /* look for any other files for the same inode */
843 for (lp = changes; lp; lp = lp->f_rnext) {
844 /* finding the same node doesn't count */
845 if (fp == lp)
846 continue;
847
848 lip = &lp->f_info[srcdst];
849
850 /*
851 * file must still exist on this side
852 */
853 if (lip->f_mode == 0)
854 continue;
855
856 /*
857 * if this is indeed a link, then the prospective file on
858 * the changed side will have the same dev/inum as the file
859 * we are looking for
860 */
861 if (lip->f_d_maj != fip->f_d_maj)
862 continue;
863 if (lip->f_d_min != fip->f_d_min)
864 continue;
865 if (lip->f_ino != fip->f_ino)
866 continue;
867
868 /*
869 * we have found at least one other link
870 */
871 return (TRUE);
872 }
873
874 return (FALSE);
875 }
876
877 /*
878 * routine:
879 * link_update
880 *
881 * purpose:
882 * to propoagate a stat change to all other file nodes that
883 * correspond to the same I-node on the changed side
884 *
885 * parameters:
886 * file pointer for the updated file
887 * which side was changed
888 *
889 * returns:
890 * void
891 *
892 * notes:
893 * if we have copied onto a file, we have copied onto all
894 * of its links, but since we do all stats before we do any
895 * copies, the stat information recently collected for links
896 * is no longer up-to-date, and this would result in incorrect
897 * reconciliation (redundant copies).
898 *
899 * There is an assumption here that all links to a changed
900 * file will be in the change list. This is true for almost
901 * all cases not involving restriction. If we do fail to
902 * update the baseline for a file that was off the change list,
903 * the worst that is likely to happen is that we will think
904 * it changed later (but will almost surely find that both
905 * copies agree).
906 */
907 void
link_update(struct file * fp,side_t which)908 link_update(struct file *fp, side_t which)
909 { struct file *lp;
910
911 for (lp = changes; lp; lp = lp->f_rnext) {
912 /* finding the current entry doesn't count */
913 if (lp == fp)
914 continue;
915
916 /* look for same i#, maj, min on changed side */
917 if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
918 continue;
919 if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
920 continue;
921 if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
922 continue;
923
924 /*
925 * this appears to be another link to the same file
926 * so the updated stat information for one must be
927 * correct for the other.
928 */
929 lp->f_info[which].f_type = fp->f_info[which].f_type;
930 lp->f_info[which].f_size = fp->f_info[which].f_size;
931 lp->f_info[which].f_mode = fp->f_info[which].f_mode;
932 lp->f_info[which].f_uid = fp->f_info[which].f_uid;
933 lp->f_info[which].f_gid = fp->f_info[which].f_gid;
934 lp->f_info[which].f_modtime = fp->f_info[which].f_modtime;
935 lp->f_info[which].f_modns = fp->f_info[which].f_modns;
936 lp->f_info[which].f_nlink = fp->f_info[which].f_nlink;
937 lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj;
938 lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min;
939
940 if (opt_debug & DBG_STAT)
941 fprintf(stderr,
942 "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
943 lp->f_name, lp->f_info[which].f_modtime,
944 lp->f_info[which].f_modns);
945 }
946 }
947
948 /*
949 * routine:
950 * queue_file
951 *
952 * purpose:
953 * append a file to the list of needed reconciliations
954 *
955 * parameters:
956 * pointer to file
957 *
958 * notes:
959 * when a request is appended to the reconciliation list,
960 * we fill in the full name. We delayed this in hopes that
961 * it wouldn't be necessary (saving cycles and memory)
962 *
963 * There is some funny business with modification times.
964 * In general, we queue files in order of the latest modification
965 * time so that propagations preserve relative ordering. There
966 * are, however, a few important exceptions:
967 * 1. all directory creations happen at time zero,
968 * so that they are created before any files can
969 * be added to them.
970 * 2. all directory deletions happen at time infinity-depth,
971 * so that everything else can be removed before the
972 * directories themselves are removed.
973 * 3. all file deletions happen at time infinity-depth
974 * so that (in renames) the links will preceed the unlinks.
975 */
976 static void
queue_file(struct file * fp)977 queue_file(struct file *fp)
978 { struct file **pp, *np;
979
980 #define TIME_ZERO 0L /* the earliest possible time */
981 #define TIME_LONG 0x7FFFFFFF /* the latest possible time */
982
983 /*
984 * figure out the modification time for sequencing purposes
985 */
986 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
987 /*
988 * deletions are performed last, and depth first
989 */
990 fp->f_modtime = TIME_LONG - fp->f_depth;
991 } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
992 fp->f_info[OPT_DST].f_type != S_IFDIR) {
993 /*
994 * for most files we use the latest mod time
995 */
996 fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
997 fp->f_modns = fp->f_info[OPT_SRC].f_modns;
998 if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
999 fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
1000 fp->f_modns = fp->f_info[OPT_DST].f_modns;
1001 }
1002 } else {
1003 /*
1004 * new directory creations need to happen before anything
1005 * else and are automatically sequenced in traversal order
1006 */
1007 fp->f_modtime = TIME_ZERO;
1008 }
1009
1010 /*
1011 * insertion is time ordered, and for equal times,
1012 * insertions is in (pre-order) traversal order
1013 */
1014 for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
1015 if (fp->f_modtime > np->f_modtime)
1016 continue;
1017 if (fp->f_modtime < np->f_modtime)
1018 break;
1019 if (fp->f_modns < np->f_modns)
1020 break;
1021 }
1022
1023 fp->f_fullname = strdup(get_name(fp));
1024 fp->f_rnext = np;
1025 *pp = fp;
1026 }
1027
1028
1029 /*
1030 * routines:
1031 * push_name/pop_name/get_name
1032 *
1033 * purpose:
1034 * maintain a name stack so we can form name of a particular file
1035 * as the concatenation of all of the names between it and the
1036 * (know to be fully qualified) base directory.
1037 *
1038 * notes:
1039 * we go to this trouble because most files never change and
1040 * so we don't need to associate full names with every one.
1041 * This stack is maintained during analysis, and if we decide
1042 * to add a file to the reconciliation list, we can use the
1043 * stack to generate a fully qualified name at that time.
1044 *
1045 * we compress out '/./' when we return a name. Given that the
1046 * stack was built by a tree walk, the only place a /./ should
1047 * appear is at the first level after the base ... but there
1048 * are legitimate ways for them to appear there.
1049 *
1050 * these names can get deep, so we dynamically size our name buffer
1051 */
1052 static const char *namestack[ MAX_DEPTH + 1 ];
1053 static int namedepth = 0;
1054 static int namelen = 0;
1055
1056 void
push_name(const char * name)1057 push_name(const char *name)
1058 {
1059 namestack[ namedepth++ ] = name;
1060 namelen += 2 + strlen(name);
1061
1062 /* make sure we don't overflow our name stack */
1063 if (namedepth >= MAX_DEPTH) {
1064 fprintf(stderr, gettext(ERR_deep), name);
1065 exit(ERR_OTHER);
1066 }
1067 }
1068
1069 void
pop_name(void)1070 pop_name(void)
1071 {
1072 namelen -= 2 + strlen(namestack[--namedepth]);
1073 namestack[ namedepth ] = 0;
1074
1075 #ifdef DBG_ERRORS
1076 /* just a little sanity check here */
1077 if (namedepth <= 0) {
1078 if (namedepth < 0) {
1079 fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
1080 exit(ERR_OTHER);
1081 } else if (namelen != 0) {
1082 fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
1083 exit(ERR_OTHER);
1084 }
1085 }
1086 #endif
1087 }
1088
1089 char
get_name(struct file * fp)1090 *get_name(struct file *fp)
1091 { int i;
1092 static char *namebuf = 0;
1093 static int buflen = 0;
1094
1095 /* make sure we have an adequate buffer */
1096 i = namelen + 1 + strlen(fp->f_name);
1097 if (buflen < i) {
1098 for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
1099 namebuf = (char *) realloc(namebuf, buflen);
1100 }
1101
1102 /* assemble the name */
1103 namebuf[0] = 0;
1104 for (i = 0; i < namedepth; i++) {
1105 if (strcmp(namestack[i], ".")) {
1106 strcat(namebuf, namestack[i]);
1107 strcat(namebuf, "/");
1108 }
1109 }
1110
1111 strcat(namebuf, fp->f_name);
1112
1113 return (namebuf);
1114 }
1115