cmd/filesync/anal.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
 *
 * module:
 *	anal.c
 *
 * purpose:
 *	routines to analyze the file trees and figure out what has changed
 *	and queue files for reconciliation.  It also contains tree enumeration
 *	routines to for other purposes (pruning and link location).
 *
 * contents:
 *
 *  change analysis:
 *	analyze .... (top level) analyze all files in the tree for changes
 *	summary .... print out change/reconciliation statistics for each base
 *	check_file . (static) look for changes and queue file for reconciliation
 *	check_changes (static) figure out if a particular file has changed
 *	queue_file . (static) add a file to the reconciliation list
 *
 *  other tree enumeration functions:
 *	prune_file . (static) recursive descent and actual pruning
 *	prune ...... (top level) initiate pruning analysis for nonexistant files
 *	find_link .. look for other files to which a file may be a link
 *	link_update. propagate changed stat info to all other links
 *	same_name .. (static) figure out if two nodes describe same file
 *
 *  misc:
 *	push_name .. maintain a running full pathname as we descend
 *	pop_name ... maintain a running full pathname as we pop back
 *	get_name ... return full pathname for the current file
 *
 * notes:
 *	analysis is limited to files that were evaluated in the previous
 *	pass ... since we don't have complete information about files that
 *	were not evaluated in the previous pass.
 */
#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <stdio.h>
#include <stdlib.h>
#include <strings.h>

#include "messages.h"
#include "filesync.h"
#include "database.h"
#include "debug.h"

/*
 * routines:
 */
void push_name(const char *);
void pop_name();
char *get_name(struct file *);
static errmask_t check_file(struct file *fp);
static diffmask_t check_changes(struct file *fp, int first, int second);
static int prune_file(struct file *fp);
static void queue_file(struct file *fp);

/*
 * globals
 */
static struct file *changes;	/* list of files to be reconciled	*/

static long total_files;	/* total number of files being considered  */
static long est_deletes;	/* estimated number of files to be deleted */
static long est_rmdirs;		/* est rmdirs of non-empty directories	   */

int inum_changes;		/* LISTed directories whose I#s changed	   */

/*
 * routine:
 *	analyze
 *
 * purpose:
 *	top level routine for the analysis/reconciliation process
 *
 * parameters:
 *	none
 *
 * returns:
 *	error mask
 *
 * notes:
 *	a critical side effect of this routine is the creation of
 *	the reconciliation list, an ordered list of files that
 *	needed to be processed in the subsequent reconciliation pass
 */
errmask_t
analyze()
{	struct base *bp;
	struct file *fp;
	int errs = 0;
	int err;
	int percentage;
	bool_t aborted = FALSE;
	char msgbuf[MAX_LINE];

	/*
	 * run through all bases and directories looking for files
	 * that have been renamed.  This must be done before the
	 * difference analysis because a directory rename can introduce
	 * radical restructuring into a name-based tree.
	 */
	for (bp = bases; bp; bp = bp->b_next) {
		for (fp = bp->b_files; fp; fp = fp->f_next)
			if (fp->f_flags & F_EVALUATE)
				errs |= find_renames(fp);
	}

	/*
	 * run through all bases and files looking for candidates
	 * note, however that we only descend into trees that have
	 * the evaluate flag turned on.  As a result of new rules or
	 * restriction arguments, we may be deliberatly ignoring
	 * large amounts of the baseline.   This means we won't do
	 * any stats to update the information in those nodes, and
	 * they will be written back just as they were.
	 *
	 * note that there is code to prune out baseline nodes for
	 * files that no longer exist, but that code is in reconcile
	 * and will never get a chance to run on nodes that aren't
	 * analyzed.
	 *
	 * we also want to run though all nodes with STAT errors
	 * so that we can put them on the reconciliation list.
	 */
	for (bp = bases; bp; bp = bp->b_next) {
		for (fp = bp->b_files; fp; fp = fp->f_next)
			if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
				errs |= check_file(fp);
	}

	/*
	 * my greatest fear is that someday, somehow, by messing with
	 * variables or baselines or who-knows-what, that someone will
	 * run a reconciliation against a large tree that doesn't correspond
	 * to the baseline, and I will infer that a bazillion files have
	 * been deleted and will propagate the slaughter before anyone
	 * can say somebody stop that maniac.
	 *
	 * in order to prevent such a possibility, we have a few different
	 * sanity checks.  There is, of course, a tradeoff here between
	 * danger and irritation.  The current set of heuristics for whether
	 * or not to generate a warning are (any of)
	 *
	 *	at least CONFIRM_MIN files have been deleted AND
	 *	CONFIRM_PCT of all files have been deleted
	 *
	 *	the inode number on a LISTed directory has changed
	 *
	 *	a non-empty directory has been deleted.
	 */
	msgbuf[0] = 0;

	percentage = (est_deletes * 100) / (total_files ? total_files : 1);
	if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
		sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
	else if (inum_changes > 0)
		sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
	else if (est_rmdirs)
		sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);

	if (msgbuf[0])
		confirm(msgbuf);

	/*
	 * TRICK:
	 *	the change list contains both files that have changed
	 *	(and probably warrant reconciliation) and files that
	 *	we couldn't get up-to-date stat information on.  The
	 *	latter files should just be flagged as being in conflict
	 *	so they can be reported in the summary.  The same is
	 *	true of all subsequent files if we abort reconciliation.
	 */
	for (fp = changes; fp; fp = fp->f_rnext)
		if (aborted || (fp->f_flags & F_STAT_ERROR)) {
			fp->f_flags |= F_CONFLICT;
			/* if it isn't in the baseline yet, don't add it */
			if ((fp->f_flags & F_IN_BASELINE) == 0)
				fp->f_flags |= F_REMOVE;
			fp->f_problem = aborted ? PROB_aborted : PROB_restat;
			(fp->f_base)->b_unresolved++;
			errs |= ERR_UNRESOLVED;
			if (opt_verbose)
				fprintf(stdout,
					gettext(aborted ? V_suppressed
							: V_nostat),
					fp->f_fullname);
		} else {
			err = reconcile(fp);
			errs |= err;
			if (opt_halt && (err & ERR_ABORT)) {
				fprintf(stderr, gettext(ERR_abort_h));
				aborted = TRUE;
			}
		}

	return (errs);
}

/*
 * routine:
 *	prune_file
 *
 * purpose:
 *	to look for file entries that should be pruned from baseline
 *	prune the current file if it needs pruning, and recursively
 *	descend if it is a directory.
 *
 * parameters:
 *	pointer to file node
 */
static int
prune_file(struct file *fp)
{	struct file *cp;
	int prunes = 0;

	/* if node hasn't been evaluated, mark it for removal	*/
	if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
		fp->f_flags |= F_REMOVE;
		prunes++;
		if (opt_debug & DBG_ANAL)
			fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
	}

	/* now check our children				*/
	for (cp = fp->f_files; cp; cp = cp->f_next)
		prunes += prune_file(cp);

	return (prunes);
}

/*
 * routine:
 *	prune
 *
 * purpose:
 *	to prune the baseline of entries that no longer correspond to
 *	existing rules.
 *
 * notes:
 *	This routine just calls prune_file on the top of each base tree.
 */
int
prune()
{	struct base *bp;
	struct file *fp;
	int prunes = 0;

	for (bp = bases; bp; bp = bp->b_next) {
		for (fp = bp->b_files; fp; fp = fp->f_next)
			prunes += prune_file(fp);

		if ((bp->b_flags & F_EVALUATE) == 0)
			bp->b_flags |= F_REMOVE;
	}

	return (prunes);
}

/*
 * routine:
 *	summary
 *
 * purpose:
 *	to print out statics and conflict lists
 */
void
summary()
{	struct base *bp;
	struct file *fp;
	extern bool_t need_super;

	(void) fflush(stdout);

	for (bp = bases; bp; bp = bp->b_next) {

		/* see if this base was irrelevant	*/
		if ((bp->b_flags & F_EVALUATE) == 0)
			continue;

		/* print out a summary for this base	*/
		fprintf(stderr, gettext(SUM_hd),
			bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
		fprintf(stderr, gettext(SUM_dst),
			bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
		fprintf(stderr, gettext(SUM_src),
			bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
		if (bp->b_unresolved)
			fprintf(stderr, gettext(SUM_unresolved),
				bp->b_unresolved);


		/* print out a list of unreconciled files for this base	*/
		for (fp = changes; fp; fp = fp->f_rnext) {
			if (fp->f_base != bp)
				continue;
			if ((fp->f_flags & F_CONFLICT) == 0)
				continue;
			fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
				fp->f_problem ? fp->f_problem : "???");
		}

		fprintf(stderr, "\n");
	}

	if (need_super)
		fprintf(stderr, gettext(WARN_super));
}

/*
 * routine:
 *	check_file
 *
 * purpose:
 *	figure out if a file requires reconciliation and recursively
 *	descend into all sub-files and directories
 *
 * parameters:
 *	base pointer
 *	file pointer
 *
 * returns:
 *	error mask
 *	built up changes needed list
 *	updated statistics
 *
 * notes:
 *	this routine builds up a path name as it descends through
 *	the tree (see push_name, pop_name, get_name).
 */
static errmask_t
check_file(struct file *fp)
{	struct file *cp;
	int errs = 0;

	if ((fp->f_flags & F_STAT_ERROR) == 0) {
		/* see if the source has changed	*/
		fp->f_info[OPT_BASE].f_modtime	= fp->f_s_modtime;
		fp->f_info[OPT_BASE].f_ino	= fp->f_s_inum;
		fp->f_info[OPT_BASE].f_d_maj	= fp->f_s_maj;
		fp->f_info[OPT_BASE].f_d_min	= fp->f_s_min;
		fp->f_info[OPT_BASE].f_nlink	= fp->f_s_nlink;
		fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);

		/* see if the destination has changed	*/
		fp->f_info[OPT_BASE].f_modtime	= fp->f_d_modtime;
		fp->f_info[OPT_BASE].f_ino    	= fp->f_d_inum;
		fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
		fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
		fp->f_info[OPT_BASE].f_nlink	= fp->f_d_nlink;
		fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);

		/* if nobody thinks the file exists, baseline needs pruning */
		if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
			fp->f_srcdiffs |= D_DELETE;
			fp->f_dstdiffs |= D_DELETE;
		}

		/* keep track of possible deletions to look for trouble	*/
		if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
			est_deletes++;

			/* see if file is (or has been) a non-empty directory */
			if (fp->f_files)
				est_rmdirs++;
		}
	}

	/* if we found differences, queue the file for reconciliation 	*/
	if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
		queue_file(fp);

		if (opt_debug & DBG_ANAL) {
			fprintf(stderr, "ANAL: src=%s",
				showflags(diffmap, fp->f_srcdiffs));
			fprintf(stderr, " dst=%s",
				showflags(diffmap, fp->f_dstdiffs));
			fprintf(stderr, " flgs=%s",
				showflags(fileflags, fp->f_flags));
			fprintf(stderr, " name=%s\n", fp->f_fullname);
		}
	}

	/* bump the total file count	*/
	fp->f_base->b_totfiles++;
	total_files++;

	/* if this is not a directory, we're done	*/
	if (fp->f_files == 0)
		return (errs);

	/*
	 * If this is a directory, we need to recursively analyze
	 * our children, but only children who have been evaluated.
	 * If a node has not been evaluated, then we don't have
	 * updated stat information and there is nothing to analyze.
	 *
	 * we also want to run though all nodes with STAT errors
	 * so that we can put them on the reconciliation list.
	 * If a directory is unreadable on one side, all files
	 * under that directory (ON BOTH SIDES) must be marked as
	 * blocked by stat errors.
	 */
	push_name(fp->f_name);

	for (cp = fp->f_files; cp; cp = cp->f_next) {
		if (fp->f_flags & F_STAT_ERROR)
			cp->f_flags |= F_STAT_ERROR;
		if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
			errs |= check_file(cp);
	}

	pop_name();

	return (errs);
}

/*
 * routine:
 *	check_changes
 *
 * purpose:
 *	to figure out what has changed for a specific file
 *
 * parameters:
 *	file pointer
 *	the reference info
 *	the info to be checked for changes
 *
 * returns:
 *	diff mask
 *
 * notes:
 *	this routine doesn't pretend to understand what happened.
 *	it merely enumerates the ways in which the files differ.
 */
static diffmask_t
check_changes(struct file *fp, int ref, int new)
{	struct fileinfo *rp, *np;
	int mask = 0;
	int type;

	rp = &fp->f_info[ref];
	np = &fp->f_info[new];

	if (np->f_uid != rp->f_uid)
		mask |= D_UID;

	if (np->f_gid != rp->f_gid)
		mask |= D_GID;

	if (np->f_mode != rp->f_mode)
		mask |= D_PROT;

	type = np->f_type;
	if (type != rp->f_type) {
		if (type == 0)
			mask |= D_DELETE;
		else if (rp->f_type == 0)
			mask |= D_CREATE;
		else
			mask |= D_TYPE;
	} else if (type == S_IFBLK || type == S_IFCHR) {
		/*
		 * for special files, we only look at the maj/min
		 */
		if (np->f_rd_maj != rp->f_rd_maj)
			mask |= D_SIZE;
		if (np->f_rd_min != rp->f_rd_min)
			mask |= D_SIZE;
	} else if (type != S_IFDIR) {
		/*
		 * for directories, we don't look directly at
		 * the contents, so these fields don't mean
		 * anything.  If the directories have changed
		 * in any interesting way, we'll find it by
		 * walking the tree.
		 */
		if (np->f_modtime > rp->f_modtime)
			mask |= D_MTIME;

		if (np->f_size != rp->f_size)
			mask |= D_SIZE;

		if (np->f_nlink != rp->f_nlink)
			mask |= D_LINKS;
	}

	if (cmp_acls(rp, np) == 0)
		mask |= D_FACLS;

	return (mask);
}

/*
 * routine:
 *	same_name
 *
 * purpose:
 *	to figure out whether or not two databsae nodes actually refer to
 *	the same file.
 *
 * parameters:
 *	pointers to two file description nodes
 *	which side we should check
 *
 * returns:
 *	TRUE/FALSE
 *
 * notes:
 *	if a single directory is specified in multiple base pairs, it
 *	is possible to have multiple nodes in the database describing
 *	the same file.  This routine is supposed to detect those cases.
 *
 *	what should be a trivial string comparison is complicated by
 *	the possibility that the two nodes might describe the same file
 *	from base directories at different depths.  Thus, rather than
 *	comparing two strings, we really want to compare the concatenation
 *	of two pairs of strings.  Unfortunately calling full_name would
 *	be awkward right now, so instead we have our own comparison
 *	routine that automatically skips from the first string to
 *	the second.
 */
static bool_t
same_name(struct file *f1, struct file *f2, side_t srcdst)
{
	char *s1, *s2, *x1, *x2;

	if (srcdst == OPT_SRC) {
		s1 = (f1->f_base)->b_src_name;
		s2 = (f2->f_base)->b_src_name;
	} else {
		s1 = (f1->f_base)->b_dst_name;
		s2 = (f2->f_base)->b_dst_name;
	}
	x1 = f1->f_fullname;
	x2 = f2->f_fullname;

	/*
	 * Compare the two names, and if they differ before they end
	 * this is a non-match.  If they both end at the same time,
	 * this is a match.
	 *
	 * The trick here is that each string is actually the logical
	 * concatenation of two strings, and we need to automatically
	 * wrap from the first to the second string in each pair.  There
	 * is no requirement that the two (concatenated) strings be
	 * broken at the same point, so we have a slightly baroque
	 * comparsion loop.
	 */
	while (*s1 && *s1 == *s2) {

		/*
		 * strings have been identical so far, so advance the
		 * pointers and continue the comparison.  The trick
		 * is that when either string ends, we have to wrap
		 * over to its extension.
		 */
		s1++; s2++;
		if (*s1 && *s2)
			continue;

		/*
		 * at least one of the strings has ended.
		 * there is an implicit slash between the string
		 * and its extension, and this has to be matched
		 * against the other string.
		 */
		if (*s1 != *s2) {
			if (*s1 == 0 && *s2 == '/')
				s2++;
			else if (*s2 == 0 && *s1 == '/')
				s1++;
			else
				/* the disagreement doesn't come at a slash */
				break;
		}

		/*
		 * if either string has ended, wrap to its extension
		 */
		if (*s1 == 0 && x1 != 0) {
			s1 = x1;
			x1 = 0;
		}
		if (*s2 == 0 && x2 != 0) {
			s2 = x2;
			x2 = 0;
		}
	}

	return (*s1 == *s2);
}

/*
 * routine:
 *	find_link
 *
 * purpose:
 *	to figure out if there is a file to which we should
 *	be creating a link (rather than making a copy)
 *
 * parameters:
 *	file node for the file to be created (that we hope is merely a link)
 *	which side is to be changed (src/dst)
 *
 * return:
 *	0	no link is appropriate
 *	else	pointer to file node for link referent
 *
 * notes:
 *	there are a few strange heuristics in this routine and I
 *	wouldn't bet my soul that I got all of them right.  The general
 *	theory is that when a new file is created, we look to see if it
 *	is a link to another file on the changed side, and if it is, we
 *	find the corresponding file on the unchanged side.
 *
 *	cases we want to be able to handle:
 *	    1.	one or more links are created to a prexisting file
 *	    2.	a preexisting only link is renamed
 *	    3.  a rename of one of multiple links to a preexisting file
 *	    4.	a single file is created with multiple links
 */
struct file *
find_link(struct file *fp, side_t srcdst)
{	struct file *lp;
	side_t chgside, tgtside;
	struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;

	/* chg = side on which the change was noticed		*/
	/* tgt = side to which the change is to be propagated	*/
	chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
	tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
	fcp = &fp->f_info[chgside];
	ftp = &fp->f_info[tgtside];

	/*
	 * cases 1 and 3
	 *
	 * When a new link is created, we should be able to find
	 * another file in the changed hierarchy that has the same
	 * I-node number.  We expect it to be on the changed list
	 * because the link count will have gone up or because all
	 * of the copies are new.  If we find one, then the new file
	 * on the receiving file should be a link to the corresponding
	 * existing file.
	 *
	 * case 4
	 *
	 * the first link will be dealt with as a copy, but all
	 * subsequent links should find an existing file analogous
	 * to one of the links on the changed side, and create
	 * corresponding links on the other side.
	 *
	 * in each of these cases, there should be multiple links
	 * on the changed side.  If the linkcount on the changed
	 * side is one, we needn't bother searching for other links.
	 */
	if (fcp->f_nlink > 1)
	for (lp = changes; lp; lp = lp->f_rnext) {
		/* finding the same node doesn't count	*/
		if (fp == lp)
			continue;

		tgtp = &lp->f_info[tgtside];
		chgp = &lp->f_info[chgside];

		/*
		 * if the file doesn't already exist on the target side
		 * we cannot make a link to it
		 */
		if (tgtp->f_mode == 0)
			continue;

		/*
		 * if this is indeed a link, then the prospective file on
		 * the changed side will have the same dev/inum as the file
		 * we are looking for
		 */
		if (fcp->f_d_maj != chgp->f_d_maj)
			continue;
		if (fcp->f_d_min != chgp->f_d_min)
			continue;
		if (fcp->f_ino != chgp->f_ino)
			continue;

		/*
		 * if the target side is already a link to this file,
		 * then there is no new link to be created
		 * FIX: how does this interact with copies over links
		 */
		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
		    (ftp->f_d_min == tgtp->f_d_min) &&
		    (ftp->f_ino   == tgtp->f_ino))
			continue;

		/*
		 * there is a pathological situation where a single file
		 * might appear under multiple base directories.  This is
		 * damned awkward to detect in any other way, so we must
		 * check to see if we have just found another database
		 * instance for the same file (on the changed side).
		 */
		if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
			continue;

		if (opt_debug & DBG_ANAL)
			fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
				fp->f_fullname, lp->f_fullname);

		return (lp);
	}

	/*
	 * case 2: a simple rename of the only link
	 *
	 * In this case, there may not be any other existing file on
	 * the changed side that has the same I-node number.  There
	 * might, however, be a record of such a file in the baseline.
	 * If we can find an identical file with a different name that
	 * has recently disappeared, we have a likely rename.
	 */
	for (lp = changes; lp; lp = lp->f_rnext) {

		/* finding the same node doesn't count			*/
		if (fp == lp)
			continue;

		tgtp = &lp->f_info[tgtside];
		chgp = &lp->f_info[chgside];

		/*
		 * if the file still exists on the changed side this is
		 * not a simple rename, and in fact the previous pass
		 * would have found it.
		 */
		if (chgp->f_mode != 0)
			continue;

		/*
		 * the inode number for the new link on the changed
		 * side must match the inode number for the old link
		 * from the baseline.
		 */
		if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
							: lp->f_s_maj))
			continue;
		if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
							: lp->f_s_min))
			continue;
		if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
							: lp->f_s_inum))
			continue;

		/* finding a file we are already linked to doesn't help	*/
		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
		    (ftp->f_d_min == tgtp->f_d_min) &&
		    (ftp->f_ino   == tgtp->f_ino))
			continue;

		/*
		 * there is a danger that we will confuse an
		 * inode reallocation with a rename.  We should
		 * only consider this to be a rename if the
		 * new file is identical to the old one
		 */
		basp = &lp->f_info[OPT_BASE];
		if (fcp->f_type != basp->f_type)
			continue;
		if (fcp->f_size != basp->f_size)
			continue;
		if (fcp->f_mode != basp->f_mode)
			continue;
		if (fcp->f_uid != basp->f_uid)
			continue;
		if (fcp->f_gid != basp->f_gid)
			continue;

		if (opt_debug & DBG_ANAL)
			fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
				fp->f_fullname, lp->f_fullname);

		return (lp);
	}

	return (0);
}

/*
 * routine:
 *	has_other_links
 *
 * purpose:
 *	to determine whether or not there is more that one link to a
 *	particular file.  We are willing to delete a link to a file
 *	that has changed if we will still have other links to it.
 *	The trick here is that we only care about links under our
 *	dominion.
 *
 * parameters:
 *	file pointer to node we are interested in
 *	which side we are looking to additional links on
 *
 * returns:
 *	TRUE if there are multiple links
 *	FALSE if this is the only one we know of
 */
bool_t
has_other_links(struct file *fp, side_t srcdst)
{	struct file *lp;
	struct fileinfo *fip, *lip;

	fip = &fp->f_info[srcdst];

	/* if the link count is one, there couldn't be others	*/
	if (fip->f_nlink < 2)
		return (FALSE);

	/* look for any other files for the same inode		*/
	for (lp = changes; lp; lp = lp->f_rnext) {
		/* finding the same node doesn't count	*/
		if (fp == lp)
			continue;

		lip = &lp->f_info[srcdst];

		/*
		 * file must still exist on this side
		 */
		if (lip->f_mode == 0)
			continue;

		/*
		 * if this is indeed a link, then the prospective file on
		 * the changed side will have the same dev/inum as the file
		 * we are looking for
		 */
		if (lip->f_d_maj != fip->f_d_maj)
			continue;
		if (lip->f_d_min != fip->f_d_min)
			continue;
		if (lip->f_ino != fip->f_ino)
			continue;

		/*
		 * we have found at least one other link
		 */
		return (TRUE);
	}

	return (FALSE);
}

/*
 * routine:
 *	link_update
 *
 * purpose:
 *	to propoagate a stat change to all other file nodes that
 *	correspond to the same I-node on the changed side
 *
 * parameters:
 *	file pointer for the updated file
 *	which side was changed
 *
 * returns:
 *	void
 *
 * notes:
 *	if we have copied onto a file, we have copied onto all
 *	of its links, but since we do all stats before we do any
 *	copies, the stat information recently collected for links
 *	is no longer up-to-date, and this would result in incorrect
 *	reconciliation (redundant copies).
 *
 *	There is an assumption here that all links to a changed
 *	file will be in the change list.  This is true for almost
 *	all cases not involving restriction.  If we do fail to
 *	update the baseline for a file that was off the change list,
 *	the worst that is likely to happen is that we will think
 *	it changed later (but will almost surely find that both
 *	copies agree).
 */
void
link_update(struct file *fp, side_t which)
{	struct file *lp;

	for (lp = changes; lp; lp = lp->f_rnext) {
		/* finding the current entry doesn't count	*/
		if (lp == fp)
			continue;

		/* look for same i#, maj, min on changed side	*/
		if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
			continue;
		if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
			continue;
		if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
			continue;

		/*
		 * this appears to be another link to the same file
		 * so the updated stat information for one must be
		 * correct for the other.
		 */
		lp->f_info[which].f_type	= fp->f_info[which].f_type;
		lp->f_info[which].f_size	= fp->f_info[which].f_size;
		lp->f_info[which].f_mode	= fp->f_info[which].f_mode;
		lp->f_info[which].f_uid		= fp->f_info[which].f_uid;
		lp->f_info[which].f_gid		= fp->f_info[which].f_gid;
		lp->f_info[which].f_modtime	= fp->f_info[which].f_modtime;
		lp->f_info[which].f_modns	= fp->f_info[which].f_modns;
		lp->f_info[which].f_nlink	= fp->f_info[which].f_nlink;
		lp->f_info[which].f_rd_maj	= fp->f_info[which].f_rd_maj;
		lp->f_info[which].f_rd_min	= fp->f_info[which].f_rd_min;

		if (opt_debug & DBG_STAT)
			fprintf(stderr,
				"STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
				lp->f_name, lp->f_info[which].f_modtime,
				lp->f_info[which].f_modns);
	}
}

/*
 * routine:
 *	queue_file
 *
 * purpose:
 *	append a file to the list of needed reconciliations
 *
 * parameters:
 *	pointer to file
 *
 * notes:
 *	when a request is appended to the reconciliation list,
 *	we fill in the full name.  We delayed this in hopes that
 *	it wouldn't be necessary (saving cycles and memory)
 *
 *	There is some funny business with modification times.
 *	In general, we queue files in order of the latest modification
 *	time so that propagations preserve relative ordering.  There
 *	are, however, a few important exceptions:
 *	    1.	all directory creations happen at time zero,
 *		so that they are created before any files can
 *		be added to them.
 *	    2.	all directory deletions happen at time infinity-depth,
 *		so that everything else can be removed before the
 *		directories themselves are removed.
 *	    3.	all file deletions happen at time infinity-depth
 *		so that (in renames) the links will preceed the unlinks.
 */
static void
queue_file(struct file *fp)
{	struct file **pp, *np;

#define	TIME_ZERO	0L		/* the earliest possible time	*/
#define	TIME_LONG	0x7FFFFFFF	/* the latest possible time	*/

	/*
	 * figure out the modification time for sequencing purposes
	 */
	if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
		/*
		 * deletions are performed last, and depth first
		 */
		fp->f_modtime = TIME_LONG - fp->f_depth;
	} else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
	    fp->f_info[OPT_DST].f_type != S_IFDIR) {
		/*
		 * for most files we use the latest mod time
		 */
		fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
		fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
		if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
			fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
			fp->f_modns   = fp->f_info[OPT_DST].f_modns;
		}
	} else {
		/*
		 * new directory creations need to happen before anything
		 * else and are automatically sequenced in traversal order
		 */
		fp->f_modtime = TIME_ZERO;
	}

	/*
	 * insertion is time ordered, and for equal times,
	 * insertions is in (pre-order) traversal order
	 */
	for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
		if (fp->f_modtime > np->f_modtime)
			continue;
		if (fp->f_modtime < np->f_modtime)
			break;
		if (fp->f_modns < np->f_modns)
			break;
	}

	fp->f_fullname = strdup(get_name(fp));
	fp->f_rnext = np;
	*pp = fp;
}


/*
 * routines:
 *	push_name/pop_name/get_name
 *
 * purpose:
 *	maintain a name stack so we can form name of a particular file
 *	as the concatenation of all of the names between it and the
 *	(know to be fully qualified) base directory.
 *
 * notes:
 *	we go to this trouble because most files never change and
 *	so we don't need to associate full names with every one.
 *	This stack is maintained during analysis, and if we decide
 *	to add a file to the reconciliation list, we can use the
 *	stack to generate a fully qualified name at that time.
 *
 *	we compress out '/./' when we return a name.  Given that the
 *	stack was built by a tree walk, the only place a /./ should
 *	appear is at the first level after the base ... but there
 *	are legitimate ways for them to appear there.
 *
 *	these names can get deep, so we dynamically size our name buffer
 */
static const char *namestack[ MAX_DEPTH + 1 ];
static int namedepth = 0;
static int namelen = 0;

void
push_name(const char *name)
{
	namestack[ namedepth++ ] = name;
	namelen += 2 + strlen(name);

	/* make sure we don't overflow our name stack	*/
	if (namedepth >= MAX_DEPTH) {
		fprintf(stderr, gettext(ERR_deep), name);
		exit(ERR_OTHER);
	}
}

void
pop_name(void)
{
	namelen -= 2 + strlen(namestack[--namedepth]);
	namestack[ namedepth ] = 0;

#ifdef	DBG_ERRORS
	/* just a little sanity check here	*/
	if (namedepth <= 0) {
		if (namedepth < 0) {
			fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
			exit(ERR_OTHER);
		} else if (namelen != 0) {
			fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
			exit(ERR_OTHER);
		}
	}
#endif
}

char
*get_name(struct file *fp)
{	int i;
	static char *namebuf = 0;
	static int buflen = 0;

	/* make sure we have an adequate buffer	*/
	i = namelen + 1 + strlen(fp->f_name);
	if (buflen < i) {
		for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
		namebuf = (char *) realloc(namebuf, buflen);
	}

	/* assemble the name	*/
	namebuf[0] = 0;
	for (i = 0; i < namedepth; i++) {
		if (strcmp(namestack[i], ".")) {
			strcat(namebuf, namestack[i]);
			strcat(namebuf, "/");
		}
	}

	strcat(namebuf, fp->f_name);

	return (namebuf);
}