xref: /illumos-gate/usr/src/cmd/filesync/anal.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
24  *
25  * module:
26  *	anal.c
27  *
28  * purpose:
29  *	routines to analyze the file trees and figure out what has changed
30  *	and queue files for reconciliation.  It also contains tree enumeration
31  *	routines to for other purposes (pruning and link location).
32  *
33  * contents:
34  *
35  *  change analysis:
36  *	analyze .... (top level) analyze all files in the tree for changes
37  *	summary .... print out change/reconciliation statistics for each base
38  *	check_file . (static) look for changes and queue file for reconciliation
39  *	check_changes (static) figure out if a particular file has changed
40  *	queue_file . (static) add a file to the reconciliation list
41  *
42  *  other tree enumeration functions:
43  *	prune_file . (static) recursive descent and actual pruning
44  *	prune ...... (top level) initiate pruning analysis for nonexistant files
45  *	find_link .. look for other files to which a file may be a link
46  *	link_update. propagate changed stat info to all other links
47  *	same_name .. (static) figure out if two nodes describe same file
48  *
49  *  misc:
50  *	push_name .. maintain a running full pathname as we descend
51  *	pop_name ... maintain a running full pathname as we pop back
52  *	get_name ... return full pathname for the current file
53  *
54  * notes:
55  *	analysis is limited to files that were evaluated in the previous
56  *	pass ... since we don't have complete information about files that
57  *	were not evaluated in the previous pass.
58  */
59 
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <strings.h>
63 
64 #include "messages.h"
65 #include "filesync.h"
66 #include "database.h"
67 #include "debug.h"
68 
69 /*
70  * routines:
71  */
72 void push_name(const char *);
73 void pop_name();
74 char *get_name(struct file *);
75 static errmask_t check_file(struct file *fp);
76 static diffmask_t check_changes(struct file *fp, int first, int second);
77 static int prune_file(struct file *fp);
78 static void queue_file(struct file *fp);
79 
80 /*
81  * globals
82  */
83 static struct file *changes;	/* list of files to be reconciled	*/
84 
85 static long total_files;	/* total number of files being considered  */
86 static long est_deletes;	/* estimated number of files to be deleted */
87 static long est_rmdirs;		/* est rmdirs of non-empty directories	   */
88 
89 int inum_changes;		/* LISTed directories whose I#s changed	   */
90 
91 /*
92  * routine:
93  *	analyze
94  *
95  * purpose:
96  *	top level routine for the analysis/reconciliation process
97  *
98  * parameters:
99  *	none
100  *
101  * returns:
102  *	error mask
103  *
104  * notes:
105  *	a critical side effect of this routine is the creation of
106  *	the reconciliation list, an ordered list of files that
107  *	needed to be processed in the subsequent reconciliation pass
108  */
109 errmask_t
analyze()110 analyze()
111 {	struct base *bp;
112 	struct file *fp;
113 	int errs = 0;
114 	int err;
115 	int percentage;
116 	bool_t aborted = FALSE;
117 	char msgbuf[MAX_LINE];
118 
119 	/*
120 	 * run through all bases and directories looking for files
121 	 * that have been renamed.  This must be done before the
122 	 * difference analysis because a directory rename can introduce
123 	 * radical restructuring into a name-based tree.
124 	 */
125 	for (bp = bases; bp; bp = bp->b_next) {
126 		for (fp = bp->b_files; fp; fp = fp->f_next)
127 			if (fp->f_flags & F_EVALUATE)
128 				errs |= find_renames(fp);
129 	}
130 
131 	/*
132 	 * run through all bases and files looking for candidates
133 	 * note, however that we only descend into trees that have
134 	 * the evaluate flag turned on.  As a result of new rules or
135 	 * restriction arguments, we may be deliberatly ignoring
136 	 * large amounts of the baseline.   This means we won't do
137 	 * any stats to update the information in those nodes, and
138 	 * they will be written back just as they were.
139 	 *
140 	 * note that there is code to prune out baseline nodes for
141 	 * files that no longer exist, but that code is in reconcile
142 	 * and will never get a chance to run on nodes that aren't
143 	 * analyzed.
144 	 *
145 	 * we also want to run though all nodes with STAT errors
146 	 * so that we can put them on the reconciliation list.
147 	 */
148 	for (bp = bases; bp; bp = bp->b_next) {
149 		for (fp = bp->b_files; fp; fp = fp->f_next)
150 			if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
151 				errs |= check_file(fp);
152 	}
153 
154 	/*
155 	 * my greatest fear is that someday, somehow, by messing with
156 	 * variables or baselines or who-knows-what, that someone will
157 	 * run a reconciliation against a large tree that doesn't correspond
158 	 * to the baseline, and I will infer that a bazillion files have
159 	 * been deleted and will propagate the slaughter before anyone
160 	 * can say somebody stop that maniac.
161 	 *
162 	 * in order to prevent such a possibility, we have a few different
163 	 * sanity checks.  There is, of course, a tradeoff here between
164 	 * danger and irritation.  The current set of heuristics for whether
165 	 * or not to generate a warning are (any of)
166 	 *
167 	 *	at least CONFIRM_MIN files have been deleted AND
168 	 *	CONFIRM_PCT of all files have been deleted
169 	 *
170 	 *	the inode number on a LISTed directory has changed
171 	 *
172 	 *	a non-empty directory has been deleted.
173 	 */
174 	msgbuf[0] = 0;
175 
176 	percentage = (est_deletes * 100) / (total_files ? total_files : 1);
177 	if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
178 		sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
179 	else if (inum_changes > 0)
180 		sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
181 	else if (est_rmdirs)
182 		sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
183 
184 	if (msgbuf[0])
185 		confirm(msgbuf);
186 
187 	/*
188 	 * TRICK:
189 	 *	the change list contains both files that have changed
190 	 *	(and probably warrant reconciliation) and files that
191 	 *	we couldn't get up-to-date stat information on.  The
192 	 *	latter files should just be flagged as being in conflict
193 	 *	so they can be reported in the summary.  The same is
194 	 *	true of all subsequent files if we abort reconciliation.
195 	 */
196 	for (fp = changes; fp; fp = fp->f_rnext)
197 		if (aborted || (fp->f_flags & F_STAT_ERROR)) {
198 			fp->f_flags |= F_CONFLICT;
199 			/* if it isn't in the baseline yet, don't add it */
200 			if ((fp->f_flags & F_IN_BASELINE) == 0)
201 				fp->f_flags |= F_REMOVE;
202 			fp->f_problem = aborted ? PROB_aborted : PROB_restat;
203 			(fp->f_base)->b_unresolved++;
204 			errs |= ERR_UNRESOLVED;
205 			if (opt_verbose)
206 				fprintf(stdout,
207 					gettext(aborted ? V_suppressed
208 							: V_nostat),
209 					fp->f_fullname);
210 		} else {
211 			err = reconcile(fp);
212 			errs |= err;
213 			if (opt_halt && (err & ERR_ABORT)) {
214 				fprintf(stderr, gettext(ERR_abort_h));
215 				aborted = TRUE;
216 			}
217 		}
218 
219 	return (errs);
220 }
221 
222 /*
223  * routine:
224  *	prune_file
225  *
226  * purpose:
227  *	to look for file entries that should be pruned from baseline
228  *	prune the current file if it needs pruning, and recursively
229  *	descend if it is a directory.
230  *
231  * parameters:
232  *	pointer to file node
233  */
234 static int
prune_file(struct file * fp)235 prune_file(struct file *fp)
236 {	struct file *cp;
237 	int prunes = 0;
238 
239 	/* if node hasn't been evaluated, mark it for removal	*/
240 	if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
241 		fp->f_flags |= F_REMOVE;
242 		prunes++;
243 		if (opt_debug & DBG_ANAL)
244 			fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
245 	}
246 
247 	/* now check our children				*/
248 	for (cp = fp->f_files; cp; cp = cp->f_next)
249 		prunes += prune_file(cp);
250 
251 	return (prunes);
252 }
253 
254 /*
255  * routine:
256  *	prune
257  *
258  * purpose:
259  *	to prune the baseline of entries that no longer correspond to
260  *	existing rules.
261  *
262  * notes:
263  *	This routine just calls prune_file on the top of each base tree.
264  */
265 int
prune()266 prune()
267 {	struct base *bp;
268 	struct file *fp;
269 	int prunes = 0;
270 
271 	for (bp = bases; bp; bp = bp->b_next) {
272 		for (fp = bp->b_files; fp; fp = fp->f_next)
273 			prunes += prune_file(fp);
274 
275 		if ((bp->b_flags & F_EVALUATE) == 0)
276 			bp->b_flags |= F_REMOVE;
277 	}
278 
279 	return (prunes);
280 }
281 
282 /*
283  * routine:
284  *	summary
285  *
286  * purpose:
287  *	to print out statics and conflict lists
288  */
289 void
summary()290 summary()
291 {	struct base *bp;
292 	struct file *fp;
293 	extern bool_t need_super;
294 
295 	(void) fflush(stdout);
296 
297 	for (bp = bases; bp; bp = bp->b_next) {
298 
299 		/* see if this base was irrelevant	*/
300 		if ((bp->b_flags & F_EVALUATE) == 0)
301 			continue;
302 
303 		/* print out a summary for this base	*/
304 		fprintf(stderr, gettext(SUM_hd),
305 			bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
306 		fprintf(stderr, gettext(SUM_dst),
307 			bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
308 		fprintf(stderr, gettext(SUM_src),
309 			bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
310 		if (bp->b_unresolved)
311 			fprintf(stderr, gettext(SUM_unresolved),
312 				bp->b_unresolved);
313 
314 
315 		/* print out a list of unreconciled files for this base	*/
316 		for (fp = changes; fp; fp = fp->f_rnext) {
317 			if (fp->f_base != bp)
318 				continue;
319 			if ((fp->f_flags & F_CONFLICT) == 0)
320 				continue;
321 			fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
322 				fp->f_problem ? fp->f_problem : "???");
323 		}
324 
325 		fprintf(stderr, "\n");
326 	}
327 
328 	if (need_super)
329 		fprintf(stderr, gettext(WARN_super));
330 }
331 
332 /*
333  * routine:
334  *	check_file
335  *
336  * purpose:
337  *	figure out if a file requires reconciliation and recursively
338  *	descend into all sub-files and directories
339  *
340  * parameters:
341  *	base pointer
342  *	file pointer
343  *
344  * returns:
345  *	error mask
346  *	built up changes needed list
347  *	updated statistics
348  *
349  * notes:
350  *	this routine builds up a path name as it descends through
351  *	the tree (see push_name, pop_name, get_name).
352  */
353 static errmask_t
check_file(struct file * fp)354 check_file(struct file *fp)
355 {	struct file *cp;
356 	int errs = 0;
357 
358 	if ((fp->f_flags & F_STAT_ERROR) == 0) {
359 		/* see if the source has changed	*/
360 		fp->f_info[OPT_BASE].f_modtime	= fp->f_s_modtime;
361 		fp->f_info[OPT_BASE].f_ino	= fp->f_s_inum;
362 		fp->f_info[OPT_BASE].f_d_maj	= fp->f_s_maj;
363 		fp->f_info[OPT_BASE].f_d_min	= fp->f_s_min;
364 		fp->f_info[OPT_BASE].f_nlink	= fp->f_s_nlink;
365 		fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
366 
367 		/* see if the destination has changed	*/
368 		fp->f_info[OPT_BASE].f_modtime	= fp->f_d_modtime;
369 		fp->f_info[OPT_BASE].f_ino    	= fp->f_d_inum;
370 		fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
371 		fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
372 		fp->f_info[OPT_BASE].f_nlink	= fp->f_d_nlink;
373 		fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
374 
375 		/* if nobody thinks the file exists, baseline needs pruning */
376 		if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
377 			fp->f_srcdiffs |= D_DELETE;
378 			fp->f_dstdiffs |= D_DELETE;
379 		}
380 
381 		/* keep track of possible deletions to look for trouble	*/
382 		if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
383 			est_deletes++;
384 
385 			/* see if file is (or has been) a non-empty directory */
386 			if (fp->f_files)
387 				est_rmdirs++;
388 		}
389 	}
390 
391 	/* if we found differences, queue the file for reconciliation 	*/
392 	if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
393 		queue_file(fp);
394 
395 		if (opt_debug & DBG_ANAL) {
396 			fprintf(stderr, "ANAL: src=%s",
397 				showflags(diffmap, fp->f_srcdiffs));
398 			fprintf(stderr, " dst=%s",
399 				showflags(diffmap, fp->f_dstdiffs));
400 			fprintf(stderr, " flgs=%s",
401 				showflags(fileflags, fp->f_flags));
402 			fprintf(stderr, " name=%s\n", fp->f_fullname);
403 		}
404 	}
405 
406 	/* bump the total file count	*/
407 	fp->f_base->b_totfiles++;
408 	total_files++;
409 
410 	/* if this is not a directory, we're done	*/
411 	if (fp->f_files == 0)
412 		return (errs);
413 
414 	/*
415 	 * If this is a directory, we need to recursively analyze
416 	 * our children, but only children who have been evaluated.
417 	 * If a node has not been evaluated, then we don't have
418 	 * updated stat information and there is nothing to analyze.
419 	 *
420 	 * we also want to run though all nodes with STAT errors
421 	 * so that we can put them on the reconciliation list.
422 	 * If a directory is unreadable on one side, all files
423 	 * under that directory (ON BOTH SIDES) must be marked as
424 	 * blocked by stat errors.
425 	 */
426 	push_name(fp->f_name);
427 
428 	for (cp = fp->f_files; cp; cp = cp->f_next) {
429 		if (fp->f_flags & F_STAT_ERROR)
430 			cp->f_flags |= F_STAT_ERROR;
431 		if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
432 			errs |= check_file(cp);
433 	}
434 
435 	pop_name();
436 
437 	return (errs);
438 }
439 
440 /*
441  * routine:
442  *	check_changes
443  *
444  * purpose:
445  *	to figure out what has changed for a specific file
446  *
447  * parameters:
448  *	file pointer
449  *	the reference info
450  *	the info to be checked for changes
451  *
452  * returns:
453  *	diff mask
454  *
455  * notes:
456  *	this routine doesn't pretend to understand what happened.
457  *	it merely enumerates the ways in which the files differ.
458  */
459 static diffmask_t
check_changes(struct file * fp,int ref,int new)460 check_changes(struct file *fp, int ref, int new)
461 {	struct fileinfo *rp, *np;
462 	int mask = 0;
463 	int type;
464 
465 	rp = &fp->f_info[ref];
466 	np = &fp->f_info[new];
467 
468 	if (np->f_uid != rp->f_uid)
469 		mask |= D_UID;
470 
471 	if (np->f_gid != rp->f_gid)
472 		mask |= D_GID;
473 
474 	if (np->f_mode != rp->f_mode)
475 		mask |= D_PROT;
476 
477 	type = np->f_type;
478 	if (type != rp->f_type) {
479 		if (type == 0)
480 			mask |= D_DELETE;
481 		else if (rp->f_type == 0)
482 			mask |= D_CREATE;
483 		else
484 			mask |= D_TYPE;
485 	} else if (type == S_IFBLK || type == S_IFCHR) {
486 		/*
487 		 * for special files, we only look at the maj/min
488 		 */
489 		if (np->f_rd_maj != rp->f_rd_maj)
490 			mask |= D_SIZE;
491 		if (np->f_rd_min != rp->f_rd_min)
492 			mask |= D_SIZE;
493 	} else if (type != S_IFDIR) {
494 		/*
495 		 * for directories, we don't look directly at
496 		 * the contents, so these fields don't mean
497 		 * anything.  If the directories have changed
498 		 * in any interesting way, we'll find it by
499 		 * walking the tree.
500 		 */
501 		if (np->f_modtime > rp->f_modtime)
502 			mask |= D_MTIME;
503 
504 		if (np->f_size != rp->f_size)
505 			mask |= D_SIZE;
506 
507 		if (np->f_nlink != rp->f_nlink)
508 			mask |= D_LINKS;
509 	}
510 
511 	if (cmp_acls(rp, np) == 0)
512 		mask |= D_FACLS;
513 
514 	return (mask);
515 }
516 
517 /*
518  * routine:
519  *	same_name
520  *
521  * purpose:
522  *	to figure out whether or not two databsae nodes actually refer to
523  *	the same file.
524  *
525  * parameters:
526  *	pointers to two file description nodes
527  *	which side we should check
528  *
529  * returns:
530  *	TRUE/FALSE
531  *
532  * notes:
533  *	if a single directory is specified in multiple base pairs, it
534  *	is possible to have multiple nodes in the database describing
535  *	the same file.  This routine is supposed to detect those cases.
536  *
537  *	what should be a trivial string comparison is complicated by
538  *	the possibility that the two nodes might describe the same file
539  *	from base directories at different depths.  Thus, rather than
540  *	comparing two strings, we really want to compare the concatenation
541  *	of two pairs of strings.  Unfortunately calling full_name would
542  *	be awkward right now, so instead we have our own comparison
543  *	routine that automatically skips from the first string to
544  *	the second.
545  */
546 static bool_t
same_name(struct file * f1,struct file * f2,side_t srcdst)547 same_name(struct file *f1, struct file *f2, side_t srcdst)
548 {
549 	char *s1, *s2, *x1, *x2;
550 
551 	if (srcdst == OPT_SRC) {
552 		s1 = (f1->f_base)->b_src_name;
553 		s2 = (f2->f_base)->b_src_name;
554 	} else {
555 		s1 = (f1->f_base)->b_dst_name;
556 		s2 = (f2->f_base)->b_dst_name;
557 	}
558 	x1 = f1->f_fullname;
559 	x2 = f2->f_fullname;
560 
561 	/*
562 	 * Compare the two names, and if they differ before they end
563 	 * this is a non-match.  If they both end at the same time,
564 	 * this is a match.
565 	 *
566 	 * The trick here is that each string is actually the logical
567 	 * concatenation of two strings, and we need to automatically
568 	 * wrap from the first to the second string in each pair.  There
569 	 * is no requirement that the two (concatenated) strings be
570 	 * broken at the same point, so we have a slightly baroque
571 	 * comparsion loop.
572 	 */
573 	while (*s1 && *s1 == *s2) {
574 
575 		/*
576 		 * strings have been identical so far, so advance the
577 		 * pointers and continue the comparison.  The trick
578 		 * is that when either string ends, we have to wrap
579 		 * over to its extension.
580 		 */
581 		s1++; s2++;
582 		if (*s1 && *s2)
583 			continue;
584 
585 		/*
586 		 * at least one of the strings has ended.
587 		 * there is an implicit slash between the string
588 		 * and its extension, and this has to be matched
589 		 * against the other string.
590 		 */
591 		if (*s1 != *s2) {
592 			if (*s1 == 0 && *s2 == '/')
593 				s2++;
594 			else if (*s2 == 0 && *s1 == '/')
595 				s1++;
596 			else
597 				/* the disagreement doesn't come at a slash */
598 				break;
599 		}
600 
601 		/*
602 		 * if either string has ended, wrap to its extension
603 		 */
604 		if (*s1 == 0 && x1 != 0) {
605 			s1 = x1;
606 			x1 = 0;
607 		}
608 		if (*s2 == 0 && x2 != 0) {
609 			s2 = x2;
610 			x2 = 0;
611 		}
612 	}
613 
614 	return (*s1 == *s2);
615 }
616 
617 /*
618  * routine:
619  *	find_link
620  *
621  * purpose:
622  *	to figure out if there is a file to which we should
623  *	be creating a link (rather than making a copy)
624  *
625  * parameters:
626  *	file node for the file to be created (that we hope is merely a link)
627  *	which side is to be changed (src/dst)
628  *
629  * return:
630  *	0	no link is appropriate
631  *	else	pointer to file node for link referent
632  *
633  * notes:
634  *	there are a few strange heuristics in this routine and I
635  *	wouldn't bet my soul that I got all of them right.  The general
636  *	theory is that when a new file is created, we look to see if it
637  *	is a link to another file on the changed side, and if it is, we
638  *	find the corresponding file on the unchanged side.
639  *
640  *	cases we want to be able to handle:
641  *	    1.	one or more links are created to a prexisting file
642  *	    2.	a preexisting only link is renamed
643  *	    3.  a rename of one of multiple links to a preexisting file
644  *	    4.	a single file is created with multiple links
645  */
646 struct file *
find_link(struct file * fp,side_t srcdst)647 find_link(struct file *fp, side_t srcdst)
648 {	struct file *lp;
649 	side_t chgside, tgtside;
650 	struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
651 
652 	/* chg = side on which the change was noticed		*/
653 	/* tgt = side to which the change is to be propagated	*/
654 	chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
655 	tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
656 	fcp = &fp->f_info[chgside];
657 	ftp = &fp->f_info[tgtside];
658 
659 	/*
660 	 * cases 1 and 3
661 	 *
662 	 * When a new link is created, we should be able to find
663 	 * another file in the changed hierarchy that has the same
664 	 * I-node number.  We expect it to be on the changed list
665 	 * because the link count will have gone up or because all
666 	 * of the copies are new.  If we find one, then the new file
667 	 * on the receiving file should be a link to the corresponding
668 	 * existing file.
669 	 *
670 	 * case 4
671 	 *
672 	 * the first link will be dealt with as a copy, but all
673 	 * subsequent links should find an existing file analogous
674 	 * to one of the links on the changed side, and create
675 	 * corresponding links on the other side.
676 	 *
677 	 * in each of these cases, there should be multiple links
678 	 * on the changed side.  If the linkcount on the changed
679 	 * side is one, we needn't bother searching for other links.
680 	 */
681 	if (fcp->f_nlink > 1)
682 	for (lp = changes; lp; lp = lp->f_rnext) {
683 		/* finding the same node doesn't count	*/
684 		if (fp == lp)
685 			continue;
686 
687 		tgtp = &lp->f_info[tgtside];
688 		chgp = &lp->f_info[chgside];
689 
690 		/*
691 		 * if the file doesn't already exist on the target side
692 		 * we cannot make a link to it
693 		 */
694 		if (tgtp->f_mode == 0)
695 			continue;
696 
697 		/*
698 		 * if this is indeed a link, then the prospective file on
699 		 * the changed side will have the same dev/inum as the file
700 		 * we are looking for
701 		 */
702 		if (fcp->f_d_maj != chgp->f_d_maj)
703 			continue;
704 		if (fcp->f_d_min != chgp->f_d_min)
705 			continue;
706 		if (fcp->f_ino != chgp->f_ino)
707 			continue;
708 
709 		/*
710 		 * if the target side is already a link to this file,
711 		 * then there is no new link to be created
712 		 * FIX: how does this interact with copies over links
713 		 */
714 		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
715 		    (ftp->f_d_min == tgtp->f_d_min) &&
716 		    (ftp->f_ino   == tgtp->f_ino))
717 			continue;
718 
719 		/*
720 		 * there is a pathological situation where a single file
721 		 * might appear under multiple base directories.  This is
722 		 * damned awkward to detect in any other way, so we must
723 		 * check to see if we have just found another database
724 		 * instance for the same file (on the changed side).
725 		 */
726 		if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
727 			continue;
728 
729 		if (opt_debug & DBG_ANAL)
730 			fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
731 				fp->f_fullname, lp->f_fullname);
732 
733 		return (lp);
734 	}
735 
736 	/*
737 	 * case 2: a simple rename of the only link
738 	 *
739 	 * In this case, there may not be any other existing file on
740 	 * the changed side that has the same I-node number.  There
741 	 * might, however, be a record of such a file in the baseline.
742 	 * If we can find an identical file with a different name that
743 	 * has recently disappeared, we have a likely rename.
744 	 */
745 	for (lp = changes; lp; lp = lp->f_rnext) {
746 
747 		/* finding the same node doesn't count			*/
748 		if (fp == lp)
749 			continue;
750 
751 		tgtp = &lp->f_info[tgtside];
752 		chgp = &lp->f_info[chgside];
753 
754 		/*
755 		 * if the file still exists on the changed side this is
756 		 * not a simple rename, and in fact the previous pass
757 		 * would have found it.
758 		 */
759 		if (chgp->f_mode != 0)
760 			continue;
761 
762 		/*
763 		 * the inode number for the new link on the changed
764 		 * side must match the inode number for the old link
765 		 * from the baseline.
766 		 */
767 		if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
768 							: lp->f_s_maj))
769 			continue;
770 		if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
771 							: lp->f_s_min))
772 			continue;
773 		if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
774 							: lp->f_s_inum))
775 			continue;
776 
777 		/* finding a file we are already linked to doesn't help	*/
778 		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
779 		    (ftp->f_d_min == tgtp->f_d_min) &&
780 		    (ftp->f_ino   == tgtp->f_ino))
781 			continue;
782 
783 		/*
784 		 * there is a danger that we will confuse an
785 		 * inode reallocation with a rename.  We should
786 		 * only consider this to be a rename if the
787 		 * new file is identical to the old one
788 		 */
789 		basp = &lp->f_info[OPT_BASE];
790 		if (fcp->f_type != basp->f_type)
791 			continue;
792 		if (fcp->f_size != basp->f_size)
793 			continue;
794 		if (fcp->f_mode != basp->f_mode)
795 			continue;
796 		if (fcp->f_uid != basp->f_uid)
797 			continue;
798 		if (fcp->f_gid != basp->f_gid)
799 			continue;
800 
801 		if (opt_debug & DBG_ANAL)
802 			fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
803 				fp->f_fullname, lp->f_fullname);
804 
805 		return (lp);
806 	}
807 
808 	return (0);
809 }
810 
811 /*
812  * routine:
813  *	has_other_links
814  *
815  * purpose:
816  *	to determine whether or not there is more that one link to a
817  *	particular file.  We are willing to delete a link to a file
818  *	that has changed if we will still have other links to it.
819  *	The trick here is that we only care about links under our
820  *	dominion.
821  *
822  * parameters:
823  *	file pointer to node we are interested in
824  *	which side we are looking to additional links on
825  *
826  * returns:
827  *	TRUE if there are multiple links
828  *	FALSE if this is the only one we know of
829  */
830 bool_t
has_other_links(struct file * fp,side_t srcdst)831 has_other_links(struct file *fp, side_t srcdst)
832 {	struct file *lp;
833 	struct fileinfo *fip, *lip;
834 
835 	fip = &fp->f_info[srcdst];
836 
837 	/* if the link count is one, there couldn't be others	*/
838 	if (fip->f_nlink < 2)
839 		return (FALSE);
840 
841 	/* look for any other files for the same inode		*/
842 	for (lp = changes; lp; lp = lp->f_rnext) {
843 		/* finding the same node doesn't count	*/
844 		if (fp == lp)
845 			continue;
846 
847 		lip = &lp->f_info[srcdst];
848 
849 		/*
850 		 * file must still exist on this side
851 		 */
852 		if (lip->f_mode == 0)
853 			continue;
854 
855 		/*
856 		 * if this is indeed a link, then the prospective file on
857 		 * the changed side will have the same dev/inum as the file
858 		 * we are looking for
859 		 */
860 		if (lip->f_d_maj != fip->f_d_maj)
861 			continue;
862 		if (lip->f_d_min != fip->f_d_min)
863 			continue;
864 		if (lip->f_ino != fip->f_ino)
865 			continue;
866 
867 		/*
868 		 * we have found at least one other link
869 		 */
870 		return (TRUE);
871 	}
872 
873 	return (FALSE);
874 }
875 
876 /*
877  * routine:
878  *	link_update
879  *
880  * purpose:
881  *	to propoagate a stat change to all other file nodes that
882  *	correspond to the same I-node on the changed side
883  *
884  * parameters:
885  *	file pointer for the updated file
886  *	which side was changed
887  *
888  * returns:
889  *	void
890  *
891  * notes:
892  *	if we have copied onto a file, we have copied onto all
893  *	of its links, but since we do all stats before we do any
894  *	copies, the stat information recently collected for links
895  *	is no longer up-to-date, and this would result in incorrect
896  *	reconciliation (redundant copies).
897  *
898  *	There is an assumption here that all links to a changed
899  *	file will be in the change list.  This is true for almost
900  *	all cases not involving restriction.  If we do fail to
901  *	update the baseline for a file that was off the change list,
902  *	the worst that is likely to happen is that we will think
903  *	it changed later (but will almost surely find that both
904  *	copies agree).
905  */
906 void
link_update(struct file * fp,side_t which)907 link_update(struct file *fp, side_t which)
908 {	struct file *lp;
909 
910 	for (lp = changes; lp; lp = lp->f_rnext) {
911 		/* finding the current entry doesn't count	*/
912 		if (lp == fp)
913 			continue;
914 
915 		/* look for same i#, maj, min on changed side	*/
916 		if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
917 			continue;
918 		if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
919 			continue;
920 		if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
921 			continue;
922 
923 		/*
924 		 * this appears to be another link to the same file
925 		 * so the updated stat information for one must be
926 		 * correct for the other.
927 		 */
928 		lp->f_info[which].f_type	= fp->f_info[which].f_type;
929 		lp->f_info[which].f_size	= fp->f_info[which].f_size;
930 		lp->f_info[which].f_mode	= fp->f_info[which].f_mode;
931 		lp->f_info[which].f_uid		= fp->f_info[which].f_uid;
932 		lp->f_info[which].f_gid		= fp->f_info[which].f_gid;
933 		lp->f_info[which].f_modtime	= fp->f_info[which].f_modtime;
934 		lp->f_info[which].f_modns	= fp->f_info[which].f_modns;
935 		lp->f_info[which].f_nlink	= fp->f_info[which].f_nlink;
936 		lp->f_info[which].f_rd_maj	= fp->f_info[which].f_rd_maj;
937 		lp->f_info[which].f_rd_min	= fp->f_info[which].f_rd_min;
938 
939 		if (opt_debug & DBG_STAT)
940 			fprintf(stderr,
941 				"STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
942 				lp->f_name, lp->f_info[which].f_modtime,
943 				lp->f_info[which].f_modns);
944 	}
945 }
946 
947 /*
948  * routine:
949  *	queue_file
950  *
951  * purpose:
952  *	append a file to the list of needed reconciliations
953  *
954  * parameters:
955  *	pointer to file
956  *
957  * notes:
958  *	when a request is appended to the reconciliation list,
959  *	we fill in the full name.  We delayed this in hopes that
960  *	it wouldn't be necessary (saving cycles and memory)
961  *
962  *	There is some funny business with modification times.
963  *	In general, we queue files in order of the latest modification
964  *	time so that propagations preserve relative ordering.  There
965  *	are, however, a few important exceptions:
966  *	    1.	all directory creations happen at time zero,
967  *		so that they are created before any files can
968  *		be added to them.
969  *	    2.	all directory deletions happen at time infinity-depth,
970  *		so that everything else can be removed before the
971  *		directories themselves are removed.
972  *	    3.	all file deletions happen at time infinity-depth
973  *		so that (in renames) the links will preceed the unlinks.
974  */
975 static void
queue_file(struct file * fp)976 queue_file(struct file *fp)
977 {	struct file **pp, *np;
978 
979 #define	TIME_ZERO	0L		/* the earliest possible time	*/
980 #define	TIME_LONG	0x7FFFFFFF	/* the latest possible time	*/
981 
982 	/*
983 	 * figure out the modification time for sequencing purposes
984 	 */
985 	if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
986 		/*
987 		 * deletions are performed last, and depth first
988 		 */
989 		fp->f_modtime = TIME_LONG - fp->f_depth;
990 	} else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
991 	    fp->f_info[OPT_DST].f_type != S_IFDIR) {
992 		/*
993 		 * for most files we use the latest mod time
994 		 */
995 		fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
996 		fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
997 		if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
998 			fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
999 			fp->f_modns   = fp->f_info[OPT_DST].f_modns;
1000 		}
1001 	} else {
1002 		/*
1003 		 * new directory creations need to happen before anything
1004 		 * else and are automatically sequenced in traversal order
1005 		 */
1006 		fp->f_modtime = TIME_ZERO;
1007 	}
1008 
1009 	/*
1010 	 * insertion is time ordered, and for equal times,
1011 	 * insertions is in (pre-order) traversal order
1012 	 */
1013 	for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
1014 		if (fp->f_modtime > np->f_modtime)
1015 			continue;
1016 		if (fp->f_modtime < np->f_modtime)
1017 			break;
1018 		if (fp->f_modns < np->f_modns)
1019 			break;
1020 	}
1021 
1022 	fp->f_fullname = strdup(get_name(fp));
1023 	fp->f_rnext = np;
1024 	*pp = fp;
1025 }
1026 
1027 
1028 /*
1029  * routines:
1030  *	push_name/pop_name/get_name
1031  *
1032  * purpose:
1033  *	maintain a name stack so we can form name of a particular file
1034  *	as the concatenation of all of the names between it and the
1035  *	(know to be fully qualified) base directory.
1036  *
1037  * notes:
1038  *	we go to this trouble because most files never change and
1039  *	so we don't need to associate full names with every one.
1040  *	This stack is maintained during analysis, and if we decide
1041  *	to add a file to the reconciliation list, we can use the
1042  *	stack to generate a fully qualified name at that time.
1043  *
1044  *	we compress out '/./' when we return a name.  Given that the
1045  *	stack was built by a tree walk, the only place a /./ should
1046  *	appear is at the first level after the base ... but there
1047  *	are legitimate ways for them to appear there.
1048  *
1049  *	these names can get deep, so we dynamically size our name buffer
1050  */
1051 static const char *namestack[ MAX_DEPTH + 1 ];
1052 static int namedepth = 0;
1053 static int namelen = 0;
1054 
1055 void
push_name(const char * name)1056 push_name(const char *name)
1057 {
1058 	namestack[ namedepth++ ] = name;
1059 	namelen += 2 + strlen(name);
1060 
1061 	/* make sure we don't overflow our name stack	*/
1062 	if (namedepth >= MAX_DEPTH) {
1063 		fprintf(stderr, gettext(ERR_deep), name);
1064 		exit(ERR_OTHER);
1065 	}
1066 }
1067 
1068 void
pop_name(void)1069 pop_name(void)
1070 {
1071 	namelen -= 2 + strlen(namestack[--namedepth]);
1072 	namestack[ namedepth ] = 0;
1073 
1074 #ifdef	DBG_ERRORS
1075 	/* just a little sanity check here	*/
1076 	if (namedepth <= 0) {
1077 		if (namedepth < 0) {
1078 			fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
1079 			exit(ERR_OTHER);
1080 		} else if (namelen != 0) {
1081 			fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
1082 			exit(ERR_OTHER);
1083 		}
1084 	}
1085 #endif
1086 }
1087 
1088 char
get_name(struct file * fp)1089 *get_name(struct file *fp)
1090 {	int i;
1091 	static char *namebuf = 0;
1092 	static int buflen = 0;
1093 
1094 	/* make sure we have an adequate buffer	*/
1095 	i = namelen + 1 + strlen(fp->f_name);
1096 	if (buflen < i) {
1097 		for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
1098 		namebuf = (char *) realloc(namebuf, buflen);
1099 	}
1100 
1101 	/* assemble the name	*/
1102 	namebuf[0] = 0;
1103 	for (i = 0; i < namedepth; i++) {
1104 		if (strcmp(namestack[i], ".")) {
1105 			strcat(namebuf, namestack[i]);
1106 			strcat(namebuf, "/");
1107 		}
1108 	}
1109 
1110 	strcat(namebuf, fp->f_name);
1111 
1112 	return (namebuf);
1113 }
1114