xref: /freebsd/sys/fs/unionfs/union_vnops.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*
2  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
3  * Copyright (c) 1992, 1993, 1994, 1995
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Jan-Simon Pendry.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
38  * $FreeBSD$
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/fcntl.h>
44 #include <sys/stat.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/malloc.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/lock.h>
53 #include <sys/sysctl.h>
54 #include <fs/unionfs/union.h>
55 
56 #include <vm/vm.h>
57 #include <vm/vnode_pager.h>
58 
59 #include <vm/vm_page.h>
60 #include <vm/vm_object.h>
61 
62 int uniondebug = 0;
63 
64 #if UDEBUG_ENABLED
65 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
66 #else
67 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
68 #endif
69 
70 static int	union_access(struct vop_access_args *ap);
71 static int	union_advlock(struct vop_advlock_args *ap);
72 static int	union_close(struct vop_close_args *ap);
73 static int	union_create(struct vop_create_args *ap);
74 static int	union_createvobject(struct vop_createvobject_args *ap);
75 static int	union_destroyvobject(struct vop_destroyvobject_args *ap);
76 static int	union_fsync(struct vop_fsync_args *ap);
77 static int	union_getattr(struct vop_getattr_args *ap);
78 static int	union_getvobject(struct vop_getvobject_args *ap);
79 static int	union_inactive(struct vop_inactive_args *ap);
80 static int	union_ioctl(struct vop_ioctl_args *ap);
81 static int	union_lease(struct vop_lease_args *ap);
82 static int	union_link(struct vop_link_args *ap);
83 static int	union_lookup(struct vop_lookup_args *ap);
84 static int	union_lookup1(struct vnode *udvp, struct vnode **dvp,
85 				   struct vnode **vpp,
86 				   struct componentname *cnp);
87 static int	union_mkdir(struct vop_mkdir_args *ap);
88 static int	union_mknod(struct vop_mknod_args *ap);
89 static int	union_open(struct vop_open_args *ap);
90 static int	union_pathconf(struct vop_pathconf_args *ap);
91 static int	union_print(struct vop_print_args *ap);
92 static int	union_read(struct vop_read_args *ap);
93 static int	union_readdir(struct vop_readdir_args *ap);
94 static int	union_readlink(struct vop_readlink_args *ap);
95 static int	union_getwritemount(struct vop_getwritemount_args *ap);
96 static int	union_reclaim(struct vop_reclaim_args *ap);
97 static int	union_remove(struct vop_remove_args *ap);
98 static int	union_rename(struct vop_rename_args *ap);
99 static int	union_revoke(struct vop_revoke_args *ap);
100 static int	union_rmdir(struct vop_rmdir_args *ap);
101 static int	union_poll(struct vop_poll_args *ap);
102 static int	union_setattr(struct vop_setattr_args *ap);
103 static int	union_strategy(struct vop_strategy_args *ap);
104 static int	union_symlink(struct vop_symlink_args *ap);
105 static int	union_whiteout(struct vop_whiteout_args *ap);
106 static int	union_write(struct vop_read_args *ap);
107 
108 static __inline
109 struct vnode *
110 union_lock_upper(struct union_node *un, struct thread *td)
111 {
112 	struct vnode *uppervp;
113 
114 	if ((uppervp = un->un_uppervp) != NULL) {
115 		VREF(uppervp);
116 		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
117 	}
118 	KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0"));
119 	return(uppervp);
120 }
121 
122 static __inline
123 void
124 union_unlock_upper(struct vnode *uppervp, struct thread *td)
125 {
126 	vput(uppervp);
127 }
128 
129 static __inline
130 struct vnode *
131 union_lock_other(struct union_node *un, struct thread *td)
132 {
133 	struct vnode *vp;
134 
135 	if (un->un_uppervp != NULL) {
136 		vp = union_lock_upper(un, td);
137 	} else if ((vp = un->un_lowervp) != NULL) {
138 		VREF(vp);
139 		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
140 	}
141 	return(vp);
142 }
143 
144 static __inline
145 void
146 union_unlock_other(struct vnode *vp, struct thread *td)
147 {
148 	vput(vp);
149 }
150 
151 /*
152  *	union_lookup:
153  *
154  *	udvp	must be exclusively locked on call and will remain
155  *		exclusively locked on return.  This is the mount point
156  *		for our filesystem.
157  *
158  *	dvp	Our base directory, locked and referenced.
159  *		The passed dvp will be dereferenced and unlocked on return
160  *		and a new dvp will be returned which is locked and
161  *		referenced in the same variable.
162  *
163  *	vpp	is filled in with the result if no error occured,
164  *		locked and ref'd.
165  *
166  *		If an error is returned, *vpp is set to NULLVP.  If no
167  *		error occurs, *vpp is returned with a reference and an
168  *		exclusive lock.
169  */
170 
171 static int
172 union_lookup1(udvp, pdvp, vpp, cnp)
173 	struct vnode *udvp;
174 	struct vnode **pdvp;
175 	struct vnode **vpp;
176 	struct componentname *cnp;
177 {
178 	int error;
179 	struct thread *td = cnp->cn_thread;
180 	struct vnode *dvp = *pdvp;
181 	struct vnode *tdvp;
182 	struct mount *mp;
183 
184 	/*
185 	 * If stepping up the directory tree, check for going
186 	 * back across the mount point, in which case do what
187 	 * lookup would do by stepping back down the mount
188 	 * hierarchy.
189 	 */
190 	if (cnp->cn_flags & ISDOTDOT) {
191 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
192 			/*
193 			 * Don't do the NOCROSSMOUNT check
194 			 * at this level.  By definition,
195 			 * union fs deals with namespaces, not
196 			 * filesystems.
197 			 */
198 			tdvp = dvp;
199 			dvp = dvp->v_mount->mnt_vnodecovered;
200 			VREF(dvp);
201 			vput(tdvp);
202 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
203 		}
204 	}
205 
206 	/*
207 	 * Set return dvp to be the upperdvp 'parent directory.
208 	 */
209 	*pdvp = dvp;
210 
211 	/*
212 	 * If the VOP_LOOKUP() call generates an error, tdvp is invalid and
213 	 * no changes will have been made to dvp, so we are set to return.
214 	 */
215 
216         error = VOP_LOOKUP(dvp, &tdvp, cnp);
217 	if (error) {
218 		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
219 		*vpp = NULL;
220 		return (error);
221 	}
222 
223 	/*
224 	 * The parent directory will have been unlocked, unless lookup
225 	 * found the last component or if dvp == tdvp (tdvp must be locked).
226 	 *
227 	 * We want our dvp to remain locked and ref'd.  We also want tdvp
228 	 * to remain locked and ref'd.
229 	 */
230 	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
231 
232 	if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0)
233 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
234 
235 	/*
236 	 * Lastly check if the current node is a mount point in
237 	 * which case walk up the mount hierarchy making sure not to
238 	 * bump into the root of the mount tree (ie. dvp != udvp).
239 	 *
240 	 * We use dvp as a temporary variable here, it is no longer related
241 	 * to the dvp above.  However, we have to ensure that both *pdvp and
242 	 * tdvp are locked on return.
243 	 */
244 
245 	dvp = tdvp;
246 	while (
247 	    dvp != udvp &&
248 	    (dvp->v_type == VDIR) &&
249 	    (mp = dvp->v_mountedhere)
250 	) {
251 		int relock_pdvp = 0;
252 
253 		if (vfs_busy(mp, 0, 0, td))
254 			continue;
255 
256 		if (dvp == *pdvp)
257 			relock_pdvp = 1;
258 		vput(dvp);
259 		dvp = NULL;
260 		error = VFS_ROOT(mp, &dvp);
261 
262 		vfs_unbusy(mp, td);
263 
264 		if (relock_pdvp)
265 			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td);
266 
267 		if (error) {
268 			*vpp = NULL;
269 			return (error);
270 		}
271 	}
272 	*vpp = dvp;
273 	return (0);
274 }
275 
276 static int
277 union_lookup(ap)
278 	struct vop_lookup_args /* {
279 		struct vnodeop_desc *a_desc;
280 		struct vnode *a_dvp;
281 		struct vnode **a_vpp;
282 		struct componentname *a_cnp;
283 	} */ *ap;
284 {
285 	int error;
286 	int uerror, lerror;
287 	struct vnode *uppervp, *lowervp;
288 	struct vnode *upperdvp, *lowerdvp;
289 	struct vnode *dvp = ap->a_dvp;		/* starting dir */
290 	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
291 	struct componentname *cnp = ap->a_cnp;
292 	struct thread *td = cnp->cn_thread;
293 	int lockparent = cnp->cn_flags & LOCKPARENT;
294 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
295 	struct ucred *saved_cred = NULL;
296 	int iswhiteout;
297 	struct vattr va;
298 
299 	*ap->a_vpp = NULLVP;
300 
301 	/*
302 	 * Disallow write attempts to the filesystem mounted read-only.
303 	 */
304 	if ((cnp->cn_flags & ISLASTCN) &&
305 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
306 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
307 		return (EROFS);
308 	}
309 
310 	/*
311 	 * For any lookups we do, always return with the parent locked.
312 	 */
313 	cnp->cn_flags |= LOCKPARENT;
314 
315 	lowerdvp = dun->un_lowervp;
316 	uppervp = NULLVP;
317 	lowervp = NULLVP;
318 	iswhiteout = 0;
319 
320 	uerror = ENOENT;
321 	lerror = ENOENT;
322 
323 	/*
324 	 * Get a private lock on uppervp and a reference, effectively
325 	 * taking it out of the union_node's control.
326 	 *
327 	 * We must lock upperdvp while holding our lock on dvp
328 	 * to avoid a deadlock.
329 	 */
330 	upperdvp = union_lock_upper(dun, td);
331 
332 	/*
333 	 * Do the lookup in the upper level.
334 	 * If that level consumes additional pathnames,
335 	 * then assume that something special is going
336 	 * on and just return that vnode.
337 	 */
338 	if (upperdvp != NULLVP) {
339 		/*
340 		 * We do not have to worry about the DOTDOT case, we've
341 		 * already unlocked dvp.
342 		 */
343 		UDEBUG(("A %p\n", upperdvp));
344 
345 		/*
346 		 * Do the lookup.   We must supply a locked and referenced
347 		 * upperdvp to the function and will get a new locked and
348 		 * referenced upperdvp back, with the old having been
349 		 * dereferenced.
350 		 *
351 		 * If an error is returned, uppervp will be NULLVP.  If no
352 		 * error occurs, uppervp will be the locked and referenced.
353 		 * Return vnode, or possibly NULL, depending on what is being
354 		 * requested.  It is possible that the returned uppervp
355 		 * will be the same as upperdvp.
356 		 */
357 		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
358 		UDEBUG((
359 		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
360 		    uerror,
361 		    upperdvp,
362 		    vrefcnt(upperdvp),
363 		    VOP_ISLOCKED(upperdvp, NULL),
364 		    uppervp,
365 		    (uppervp ? vrefcnt(uppervp) : -99),
366 		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
367 		));
368 
369 		/*
370 		 * Disallow write attempts to the filesystem mounted read-only.
371 		 */
372 		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
373 		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
374 		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
375 			error = EROFS;
376 			goto out;
377 		}
378 
379 		/*
380 		 * Special case: If cn_consume != 0 then skip out.  The result
381 		 * of the lookup is transfered to our return variable.  If
382 		 * an error occured we have to throw away the results.
383 		 */
384 
385 		if (cnp->cn_consume != 0) {
386 			if ((error = uerror) == 0) {
387 				*ap->a_vpp = uppervp;
388 				uppervp = NULL;
389 			}
390 			goto out;
391 		}
392 
393 		/*
394 		 * Calculate whiteout, fall through.
395 		 */
396 
397 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
398 			if (cnp->cn_flags & ISWHITEOUT) {
399 				iswhiteout = 1;
400 			} else if (lowerdvp != NULLVP) {
401 				int terror;
402 
403 				terror = VOP_GETATTR(upperdvp, &va,
404 					cnp->cn_cred, cnp->cn_thread);
405 				if (terror == 0 && (va.va_flags & OPAQUE))
406 					iswhiteout = 1;
407 			}
408 		}
409 	}
410 
411 	/*
412 	 * In a similar way to the upper layer, do the lookup
413 	 * in the lower layer.   This time, if there is some
414 	 * component magic going on, then vput whatever we got
415 	 * back from the upper layer and return the lower vnode
416 	 * instead.
417 	 */
418 
419 	if (lowerdvp != NULLVP && !iswhiteout) {
420 		int nameiop;
421 
422 		UDEBUG(("B %p\n", lowerdvp));
423 
424 		/*
425 		 * Force only LOOKUPs on the lower node, since
426 		 * we won't be making changes to it anyway.
427 		 */
428 		nameiop = cnp->cn_nameiop;
429 		cnp->cn_nameiop = LOOKUP;
430 		if (um->um_op == UNMNT_BELOW) {
431 			saved_cred = cnp->cn_cred;
432 			cnp->cn_cred = um->um_cred;
433 		}
434 
435 		/*
436 		 * We shouldn't have to worry about locking interactions
437 		 * between the lower layer and our union layer (w.r.t.
438 		 * `..' processing) because we don't futz with lowervp
439 		 * locks in the union-node instantiation code path.
440 		 *
441 		 * union_lookup1() requires lowervp to be locked on entry,
442 		 * and it will be unlocked on return.  The ref count will
443 		 * not change.  On return lowervp doesn't represent anything
444 		 * to us so we NULL it out.
445 		 */
446 		VREF(lowerdvp);
447 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td);
448 		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
449 		if (lowerdvp == lowervp)
450 			vrele(lowerdvp);
451 		else
452 			vput(lowerdvp);
453 		lowerdvp = NULL;	/* lowerdvp invalid after vput */
454 
455 		if (um->um_op == UNMNT_BELOW)
456 			cnp->cn_cred = saved_cred;
457 		cnp->cn_nameiop = nameiop;
458 
459 		if (cnp->cn_consume != 0 || lerror == EACCES) {
460 			if ((error = lerror) == 0) {
461 				*ap->a_vpp = lowervp;
462 				lowervp = NULL;
463 			}
464 			goto out;
465 		}
466 	} else {
467 		UDEBUG(("C %p\n", lowerdvp));
468 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
469 			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
470 				VREF(lowervp);
471 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td);
472 				lerror = 0;
473 			}
474 		}
475 	}
476 
477 	/*
478 	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
479 	 *
480 	 * 1. If both layers returned an error, select the upper layer.
481 	 *
482 	 * 2. If the upper layer failed and the bottom layer succeeded,
483 	 *    two subcases occur:
484 	 *
485 	 *	a.  The bottom vnode is not a directory, in which case
486 	 *	    just return a new union vnode referencing an
487 	 *	    empty top layer and the existing bottom layer.
488 	 *
489 	 *	b.  The bottom vnode is a directory, in which case
490 	 *	    create a new directory in the top layer and
491 	 *	    and fall through to case 3.
492 	 *
493 	 * 3. If the top layer succeeded, then return a new union
494 	 *    vnode referencing whatever the new top layer and
495 	 *    whatever the bottom layer returned.
496 	 */
497 
498 	/* case 1. */
499 	if ((uerror != 0) && (lerror != 0)) {
500 		error = uerror;
501 		goto out;
502 	}
503 
504 	/* case 2. */
505 	if (uerror != 0 /* && (lerror == 0) */ ) {
506 		if (lowervp->v_type == VDIR) { /* case 2b. */
507 			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
508 			/*
509 			 * Oops, uppervp has a problem, we may have to shadow.
510 			 */
511 			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
512 			if (uerror) {
513 				error = uerror;
514 				goto out;
515 			}
516 		}
517 	}
518 
519 	/*
520 	 * Must call union_allocvp() with both the upper and lower vnodes
521 	 * referenced and the upper vnode locked.   ap->a_vpp is returned
522 	 * referenced and locked.  lowervp, uppervp, and upperdvp are
523 	 * absorbed by union_allocvp() whether it succeeds or fails.
524 	 *
525 	 * upperdvp is the parent directory of uppervp which may be
526 	 * different, depending on the path, from dvp->un_uppervp.  That's
527 	 * why it is a separate argument.  Note that it must be unlocked.
528 	 *
529 	 * dvp must be locked on entry to the call and will be locked on
530 	 * return.
531 	 */
532 
533 	if (uppervp && uppervp != upperdvp)
534 		VOP_UNLOCK(uppervp, 0, td);
535 	if (lowervp)
536 		VOP_UNLOCK(lowervp, 0, td);
537 	if (upperdvp)
538 		VOP_UNLOCK(upperdvp, 0, td);
539 
540 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
541 			      uppervp, lowervp, 1);
542 
543 	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99));
544 
545 	uppervp = NULL;
546 	upperdvp = NULL;
547 	lowervp = NULL;
548 
549 	/*
550 	 *	Termination Code
551 	 *
552 	 *	- put away any extra junk laying around.  Note that lowervp
553 	 *	  (if not NULL) will never be the same as *ap->a_vp and
554 	 *	  neither will uppervp, because when we set that state we
555 	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
556 	 *	  may match uppervp or *ap->a_vpp.
557 	 *
558 	 *	- relock/unlock dvp if appropriate.
559 	 */
560 
561 out:
562 	if (upperdvp) {
563 		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
564 			vrele(upperdvp);
565 		else
566 			vput(upperdvp);
567 	}
568 
569 	if (uppervp)
570 		vput(uppervp);
571 
572 	if (lowervp)
573 		vput(lowervp);
574 
575 	/*
576 	 * Restore LOCKPARENT state
577 	 */
578 
579 	if (!lockparent)
580 		cnp->cn_flags &= ~LOCKPARENT;
581 
582 	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
583 		((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99),
584 		lowervp, uppervp));
585 
586 	/*
587 	 * dvp lock state, determine whether to relock dvp.  dvp is expected
588 	 * to be locked on return if:
589 	 *
590 	 *	- there was an error (except not EJUSTRETURN), or
591 	 *	- we hit the last component and lockparent is true
592 	 *
593 	 * dvp_is_locked is the current state of the dvp lock, not counting
594 	 * the possibility that *ap->a_vpp == dvp (in which case it is locked
595 	 * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
596 	 */
597 
598 	if (*ap->a_vpp != dvp) {
599 		if ((error == 0 || error == EJUSTRETURN) &&
600 		    (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) {
601 			VOP_UNLOCK(dvp, 0, td);
602 		}
603 	}
604 
605 	/*
606 	 * Diagnostics
607 	 */
608 
609 #ifdef DIAGNOSTIC
610 	if (cnp->cn_namelen == 1 &&
611 	    cnp->cn_nameptr[0] == '.' &&
612 	    *ap->a_vpp != dvp) {
613 		panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
614 	}
615 #endif
616 
617 	return (error);
618 }
619 
620 /*
621  * 	union_create:
622  *
623  * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
624  * locked if no error occurs, otherwise it is garbage.
625  */
626 
627 static int
628 union_create(ap)
629 	struct vop_create_args /* {
630 		struct vnode *a_dvp;
631 		struct vnode **a_vpp;
632 		struct componentname *a_cnp;
633 		struct vattr *a_vap;
634 	} */ *ap;
635 {
636 	struct union_node *dun = VTOUNION(ap->a_dvp);
637 	struct componentname *cnp = ap->a_cnp;
638 	struct thread *td = cnp->cn_thread;
639 	struct vnode *dvp;
640 	int error = EROFS;
641 
642 	if ((dvp = union_lock_upper(dun, td)) != NULL) {
643 		struct vnode *vp;
644 		struct mount *mp;
645 
646 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
647 		if (error == 0) {
648 			mp = ap->a_dvp->v_mount;
649 			VOP_UNLOCK(vp, 0, td);
650 			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp)));
651 			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
652 				cnp, vp, NULLVP, 1);
653 			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
654 		}
655 		union_unlock_upper(dvp, td);
656 	}
657 	return (error);
658 }
659 
660 static int
661 union_whiteout(ap)
662 	struct vop_whiteout_args /* {
663 		struct vnode *a_dvp;
664 		struct componentname *a_cnp;
665 		int a_flags;
666 	} */ *ap;
667 {
668 	struct union_node *un = VTOUNION(ap->a_dvp);
669 	struct componentname *cnp = ap->a_cnp;
670 	struct vnode *uppervp;
671 	int error = EOPNOTSUPP;
672 
673 	if ((uppervp = union_lock_upper(un, cnp->cn_thread)) != NULLVP) {
674 		error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
675 		union_unlock_upper(uppervp, cnp->cn_thread);
676 	}
677 	return(error);
678 }
679 
680 /*
681  * 	union_mknod:
682  *
683  *	a_dvp is locked on entry and should remain locked on return.
684  *	a_vpp is garbagre whether an error occurs or not.
685  */
686 
687 static int
688 union_mknod(ap)
689 	struct vop_mknod_args /* {
690 		struct vnode *a_dvp;
691 		struct vnode **a_vpp;
692 		struct componentname *a_cnp;
693 		struct vattr *a_vap;
694 	} */ *ap;
695 {
696 	struct union_node *dun = VTOUNION(ap->a_dvp);
697 	struct componentname *cnp = ap->a_cnp;
698 	struct vnode *dvp;
699 	int error = EROFS;
700 
701 	if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) {
702 		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
703 		union_unlock_upper(dvp, cnp->cn_thread);
704 	}
705 	return (error);
706 }
707 
708 /*
709  *	union_open:
710  *
711  *	run open VOP.  When opening the underlying vnode we have to mimic
712  *	vn_open().  What we *really* need to do to avoid screwups if the
713  *	open semantics change is to call vn_open().  For example, ufs blows
714  *	up if you open a file but do not vmio it prior to writing.
715  */
716 
717 static int
718 union_open(ap)
719 	struct vop_open_args /* {
720 		struct vnodeop_desc *a_desc;
721 		struct vnode *a_vp;
722 		int a_mode;
723 		struct ucred *a_cred;
724 		struct thread *a_td;
725 	} */ *ap;
726 {
727 	struct union_node *un = VTOUNION(ap->a_vp);
728 	struct vnode *tvp;
729 	int mode = ap->a_mode;
730 	struct ucred *cred = ap->a_cred;
731 	struct thread *td = ap->a_td;
732 	int error = 0;
733 	int tvpisupper = 1;
734 
735 	/*
736 	 * If there is an existing upper vp then simply open that.
737 	 * The upper vp takes precedence over the lower vp.  When opening
738 	 * a lower vp for writing copy it to the uppervp and then open the
739 	 * uppervp.
740 	 *
741 	 * At the end of this section tvp will be left locked.
742 	 */
743 	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
744 		/*
745 		 * If the lower vnode is being opened for writing, then
746 		 * copy the file contents to the upper vnode and open that,
747 		 * otherwise can simply open the lower vnode.
748 		 */
749 		tvp = un->un_lowervp;
750 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
751 			int docopy = !(mode & O_TRUNC);
752 			error = union_copyup(un, docopy, cred, td);
753 			tvp = union_lock_upper(un, td);
754 		} else {
755 			un->un_openl++;
756 			VREF(tvp);
757 			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
758 			tvpisupper = 0;
759 		}
760 	}
761 
762 	/*
763 	 * We are holding the correct vnode, open it.
764 	 */
765 
766 	if (error == 0)
767 		error = VOP_OPEN(tvp, mode, cred, td);
768 
769 	/*
770 	 * This is absolutely necessary or UFS will blow up.
771 	 */
772         if (error == 0 && vn_canvmio(tvp) == TRUE) {
773                 error = vfs_object_create(tvp, td, cred);
774         }
775 
776 	/*
777 	 * Release any locks held.
778 	 */
779 	if (tvpisupper) {
780 		if (tvp)
781 			union_unlock_upper(tvp, td);
782 	} else {
783 		vput(tvp);
784 	}
785 	return (error);
786 }
787 
788 /*
789  *	union_close:
790  *
791  *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
792  *	the case we do not change it.
793  */
794 
795 static int
796 union_close(ap)
797 	struct vop_close_args /* {
798 		struct vnode *a_vp;
799 		int  a_fflag;
800 		struct ucred *a_cred;
801 		struct thread *a_td;
802 	} */ *ap;
803 {
804 	struct union_node *un = VTOUNION(ap->a_vp);
805 	struct vnode *vp;
806 
807 	if ((vp = un->un_uppervp) == NULLVP) {
808 #ifdef UNION_DIAGNOSTIC
809 		if (un->un_openl <= 0)
810 			panic("union: un_openl cnt");
811 #endif
812 		--un->un_openl;
813 		vp = un->un_lowervp;
814 	}
815 	ap->a_vp = vp;
816 	return (VCALL(vp, VOFFSET(vop_close), ap));
817 }
818 
819 /*
820  * Check access permission on the union vnode.
821  * The access check being enforced is to check
822  * against both the underlying vnode, and any
823  * copied vnode.  This ensures that no additional
824  * file permissions are given away simply because
825  * the user caused an implicit file copy.
826  */
827 static int
828 union_access(ap)
829 	struct vop_access_args /* {
830 		struct vnodeop_desc *a_desc;
831 		struct vnode *a_vp;
832 		int a_mode;
833 		struct ucred *a_cred;
834 		struct thread *a_td;
835 	} */ *ap;
836 {
837 	struct union_node *un = VTOUNION(ap->a_vp);
838 	struct thread *td = ap->a_td;
839 	int error = EACCES;
840 	struct vnode *vp;
841 
842 	/*
843 	 * Disallow write attempts on filesystems mounted read-only.
844 	 */
845 	if ((ap->a_mode & VWRITE) &&
846 	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
847 		switch (ap->a_vp->v_type) {
848 		case VREG:
849 		case VDIR:
850 		case VLNK:
851 			return (EROFS);
852 		default:
853 			break;
854 		}
855 	}
856 
857 	if ((vp = union_lock_upper(un, td)) != NULLVP) {
858 		ap->a_vp = vp;
859 		error = VCALL(vp, VOFFSET(vop_access), ap);
860 		union_unlock_upper(vp, td);
861 		return(error);
862 	}
863 
864 	if ((vp = un->un_lowervp) != NULLVP) {
865 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
866 		ap->a_vp = vp;
867 
868 		/*
869 		 * Remove VWRITE from a_mode if our mount point is RW, because
870 		 * we want to allow writes and lowervp may be read-only.
871 		 */
872 		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
873 			ap->a_mode &= ~VWRITE;
874 
875 		error = VCALL(vp, VOFFSET(vop_access), ap);
876 		if (error == 0) {
877 			struct union_mount *um;
878 
879 			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
880 
881 			if (um->um_op == UNMNT_BELOW) {
882 				ap->a_cred = um->um_cred;
883 				error = VCALL(vp, VOFFSET(vop_access), ap);
884 			}
885 		}
886 		VOP_UNLOCK(vp, 0, td);
887 	}
888 	return(error);
889 }
890 
891 /*
892  * We handle getattr only to change the fsid and
893  * track object sizes
894  *
895  * It's not clear whether VOP_GETATTR is to be
896  * called with the vnode locked or not.  stat() calls
897  * it with (vp) locked, and fstat() calls it with
898  * (vp) unlocked.
899  *
900  * Because of this we cannot use our normal locking functions
901  * if we do not intend to lock the main a_vp node.  At the moment
902  * we are running without any specific locking at all, but beware
903  * to any programmer that care must be taken if locking is added
904  * to this function.
905  */
906 
907 static int
908 union_getattr(ap)
909 	struct vop_getattr_args /* {
910 		struct vnode *a_vp;
911 		struct vattr *a_vap;
912 		struct ucred *a_cred;
913 		struct thread *a_td;
914 	} */ *ap;
915 {
916 	int error;
917 	struct union_node *un = VTOUNION(ap->a_vp);
918 	struct vnode *vp;
919 	struct vattr *vap;
920 	struct vattr va;
921 
922 	/*
923 	 * Some programs walk the filesystem hierarchy by counting
924 	 * links to directories to avoid stat'ing all the time.
925 	 * This means the link count on directories needs to be "correct".
926 	 * The only way to do that is to call getattr on both layers
927 	 * and fix up the link count.  The link count will not necessarily
928 	 * be accurate but will be large enough to defeat the tree walkers.
929 	 */
930 
931 	vap = ap->a_vap;
932 
933 	if ((vp = un->un_uppervp) != NULLVP) {
934 		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
935 		if (error)
936 			return (error);
937 		/* XXX isn't this dangerous without a lock? */
938 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
939 	}
940 
941 	if (vp == NULLVP) {
942 		vp = un->un_lowervp;
943 	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
944 		vp = un->un_lowervp;
945 		vap = &va;
946 	} else {
947 		vp = NULLVP;
948 	}
949 
950 	if (vp != NULLVP) {
951 		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
952 		if (error)
953 			return (error);
954 		/* XXX isn't this dangerous without a lock? */
955 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
956 	}
957 
958 	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
959 		ap->a_vap->va_nlink += vap->va_nlink;
960 	return (0);
961 }
962 
963 static int
964 union_setattr(ap)
965 	struct vop_setattr_args /* {
966 		struct vnode *a_vp;
967 		struct vattr *a_vap;
968 		struct ucred *a_cred;
969 		struct thread *a_td;
970 	} */ *ap;
971 {
972 	struct union_node *un = VTOUNION(ap->a_vp);
973 	struct thread *td = ap->a_td;
974 	struct vattr *vap = ap->a_vap;
975 	struct vnode *uppervp;
976 	int error;
977 
978 	/*
979 	 * Disallow write attempts on filesystems mounted read-only.
980 	 */
981 	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
982 	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
983 	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
984 	     vap->va_mtime.tv_sec != VNOVAL ||
985 	     vap->va_mode != (mode_t)VNOVAL)) {
986 		return (EROFS);
987 	}
988 
989 	/*
990 	 * Handle case of truncating lower object to zero size
991 	 * by creating a zero length upper object.  This is to
992 	 * handle the case of open with O_TRUNC and O_CREAT.
993 	 */
994 	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
995 		error = union_copyup(un, (ap->a_vap->va_size != 0),
996 			    ap->a_cred, ap->a_td);
997 		if (error)
998 			return (error);
999 	}
1000 
1001 	/*
1002 	 * Try to set attributes in upper layer,
1003 	 * otherwise return read-only filesystem error.
1004 	 */
1005 	error = EROFS;
1006 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1007 		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
1008 					ap->a_cred, ap->a_td);
1009 		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
1010 			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
1011 		union_unlock_upper(uppervp, td);
1012 	}
1013 	return (error);
1014 }
1015 
1016 static int
1017 union_read(ap)
1018 	struct vop_read_args /* {
1019 		struct vnode *a_vp;
1020 		struct uio *a_uio;
1021 		int  a_ioflag;
1022 		struct ucred *a_cred;
1023 	} */ *ap;
1024 {
1025 	struct union_node *un = VTOUNION(ap->a_vp);
1026 	struct thread *td = ap->a_uio->uio_td;
1027 	struct vnode *uvp;
1028 	int error;
1029 
1030 	uvp = union_lock_other(un, td);
1031 	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
1032 
1033 	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1034 	union_unlock_other(uvp, td);
1035 
1036 	/*
1037 	 * XXX
1038 	 * Perhaps the size of the underlying object has changed under
1039 	 * our feet.  Take advantage of the offset information present
1040 	 * in the uio structure.
1041 	 */
1042 	if (error == 0) {
1043 		struct union_node *un = VTOUNION(ap->a_vp);
1044 		off_t cur = ap->a_uio->uio_offset;
1045 
1046 		if (uvp == un->un_uppervp) {
1047 			if (cur > un->un_uppersz)
1048 				union_newsize(ap->a_vp, cur, VNOVAL);
1049 		} else {
1050 			if (cur > un->un_lowersz)
1051 				union_newsize(ap->a_vp, VNOVAL, cur);
1052 		}
1053 	}
1054 	return (error);
1055 }
1056 
1057 static int
1058 union_write(ap)
1059 	struct vop_read_args /* {
1060 		struct vnode *a_vp;
1061 		struct uio *a_uio;
1062 		int  a_ioflag;
1063 		struct ucred *a_cred;
1064 	} */ *ap;
1065 {
1066 	struct union_node *un = VTOUNION(ap->a_vp);
1067 	struct thread *td = ap->a_uio->uio_td;
1068 	struct vnode *uppervp;
1069 	int error;
1070 
1071 	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
1072 		panic("union: missing upper layer in write");
1073 
1074 	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1075 
1076 	/*
1077 	 * The size of the underlying object may be changed by the
1078 	 * write.
1079 	 */
1080 	if (error == 0) {
1081 		off_t cur = ap->a_uio->uio_offset;
1082 
1083 		if (cur > un->un_uppersz)
1084 			union_newsize(ap->a_vp, cur, VNOVAL);
1085 	}
1086 	union_unlock_upper(uppervp, td);
1087 	return (error);
1088 }
1089 
1090 static int
1091 union_lease(ap)
1092 	struct vop_lease_args /* {
1093 		struct vnode *a_vp;
1094 		struct thread *a_td;
1095 		struct ucred *a_cred;
1096 		int a_flag;
1097 	} */ *ap;
1098 {
1099 	struct vnode *ovp = OTHERVP(ap->a_vp);
1100 
1101 	ap->a_vp = ovp;
1102 	return (VCALL(ovp, VOFFSET(vop_lease), ap));
1103 }
1104 
1105 static int
1106 union_ioctl(ap)
1107 	struct vop_ioctl_args /* {
1108 		struct vnode *a_vp;
1109 		u_long  a_command;
1110 		caddr_t  a_data;
1111 		int  a_fflag;
1112 		struct ucred *a_cred;
1113 		struct thread *a_td;
1114 	} */ *ap;
1115 {
1116 	struct vnode *ovp = OTHERVP(ap->a_vp);
1117 
1118 	ap->a_vp = ovp;
1119 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1120 }
1121 
1122 static int
1123 union_poll(ap)
1124 	struct vop_poll_args /* {
1125 		struct vnode *a_vp;
1126 		int  a_events;
1127 		struct ucred *a_cred;
1128 		struct thread *a_td;
1129 	} */ *ap;
1130 {
1131 	struct vnode *ovp = OTHERVP(ap->a_vp);
1132 
1133 	ap->a_vp = ovp;
1134 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1135 }
1136 
1137 static int
1138 union_revoke(ap)
1139 	struct vop_revoke_args /* {
1140 		struct vnode *a_vp;
1141 		int a_flags;
1142 		struct thread *a_td;
1143 	} */ *ap;
1144 {
1145 	struct vnode *vp = ap->a_vp;
1146 
1147 	if (UPPERVP(vp))
1148 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1149 	if (LOWERVP(vp))
1150 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1151 	vgone(vp);
1152 	return (0);
1153 }
1154 
1155 static int
1156 union_fsync(ap)
1157 	struct vop_fsync_args /* {
1158 		struct vnode *a_vp;
1159 		struct ucred *a_cred;
1160 		int  a_waitfor;
1161 		struct thread *a_td;
1162 	} */ *ap;
1163 {
1164 	int error = 0;
1165 	struct thread *td = ap->a_td;
1166 	struct vnode *targetvp;
1167 	struct union_node *un = VTOUNION(ap->a_vp);
1168 
1169 	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
1170 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, td);
1171 		union_unlock_other(targetvp, td);
1172 	}
1173 
1174 	return (error);
1175 }
1176 
1177 /*
1178  *	union_remove:
1179  *
1180  *	Remove the specified cnp.  The dvp and vp are passed to us locked
1181  *	and must remain locked on return.
1182  */
1183 
1184 static int
1185 union_remove(ap)
1186 	struct vop_remove_args /* {
1187 		struct vnode *a_dvp;
1188 		struct vnode *a_vp;
1189 		struct componentname *a_cnp;
1190 	} */ *ap;
1191 {
1192 	struct union_node *dun = VTOUNION(ap->a_dvp);
1193 	struct union_node *un = VTOUNION(ap->a_vp);
1194 	struct componentname *cnp = ap->a_cnp;
1195 	struct thread *td = cnp->cn_thread;
1196 	struct vnode *uppervp;
1197 	struct vnode *upperdvp;
1198 	int error;
1199 
1200 	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1201 		panic("union remove: null upper vnode");
1202 
1203 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1204 		if (union_dowhiteout(un, cnp->cn_cred, td))
1205 			cnp->cn_flags |= DOWHITEOUT;
1206 		error = VOP_REMOVE(upperdvp, uppervp, cnp);
1207 #if 0
1208 		/* XXX */
1209 		if (!error)
1210 			union_removed_upper(un);
1211 #endif
1212 		union_unlock_upper(uppervp, td);
1213 	} else {
1214 		error = union_mkwhiteout(
1215 			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1216 			    upperdvp, ap->a_cnp, un->un_path);
1217 	}
1218 	union_unlock_upper(upperdvp, td);
1219 	return (error);
1220 }
1221 
1222 /*
1223  *	union_link:
1224  *
1225  *	tdvp and vp will be locked on entry.
1226  *	tdvp and vp should remain locked on return.
1227  */
1228 
1229 static int
1230 union_link(ap)
1231 	struct vop_link_args /* {
1232 		struct vnode *a_tdvp;
1233 		struct vnode *a_vp;
1234 		struct componentname *a_cnp;
1235 	} */ *ap;
1236 {
1237 	struct componentname *cnp = ap->a_cnp;
1238 	struct thread *td = cnp->cn_thread;
1239 	struct union_node *dun = VTOUNION(ap->a_tdvp);
1240 	struct vnode *vp;
1241 	struct vnode *tdvp;
1242 	int error = 0;
1243 
1244 	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
1245 		vp = ap->a_vp;
1246 	} else {
1247 		struct union_node *tun = VTOUNION(ap->a_vp);
1248 
1249 		if (tun->un_uppervp == NULLVP) {
1250 #if 0
1251 			if (dun->un_uppervp == tun->un_dirvp) {
1252 				if (dun->un_flags & UN_ULOCK) {
1253 					dun->un_flags &= ~UN_ULOCK;
1254 					VOP_UNLOCK(dun->un_uppervp, 0, td);
1255 				}
1256 			}
1257 #endif
1258 			error = union_copyup(tun, 1, cnp->cn_cred, td);
1259 #if 0
1260 			if (dun->un_uppervp == tun->un_dirvp) {
1261 				vn_lock(dun->un_uppervp,
1262 					    LK_EXCLUSIVE | LK_RETRY, td);
1263 				dun->un_flags |= UN_ULOCK;
1264 			}
1265 #endif
1266 			if (error)
1267 				return (error);
1268 		}
1269 		vp = tun->un_uppervp;
1270 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1271 	}
1272 
1273 	/*
1274 	 * Make sure upper is locked, then unlock the union directory we were
1275 	 * called with to avoid a deadlock while we are calling VOP_LINK() on
1276 	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
1277 	 * is expected to be locked on return.
1278 	 */
1279 
1280 	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
1281 		return (EROFS);
1282 
1283 	VOP_UNLOCK(ap->a_tdvp, 0, td);		/* unlock calling node */
1284 	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
1285 
1286 	/*
1287 	 * Unlock tun->un_uppervp if we locked it above.
1288 	 */
1289 	if (ap->a_tdvp->v_op == ap->a_vp->v_op)
1290 		VOP_UNLOCK(vp, 0, td);
1291 	/*
1292 	 * We have to unlock tdvp prior to relocking our calling node in
1293 	 * order to avoid a deadlock.  We also have to unlock ap->a_vp
1294 	 * before relocking the directory, but then we have to relock
1295 	 * ap->a_vp as our caller expects.
1296 	 */
1297 	VOP_UNLOCK(ap->a_vp, 0, td);
1298 	union_unlock_upper(tdvp, td);
1299 	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td);
1300 	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
1301 	return (error);
1302 }
1303 
1304 static int
1305 union_rename(ap)
1306 	struct vop_rename_args  /* {
1307 		struct vnode *a_fdvp;
1308 		struct vnode *a_fvp;
1309 		struct componentname *a_fcnp;
1310 		struct vnode *a_tdvp;
1311 		struct vnode *a_tvp;
1312 		struct componentname *a_tcnp;
1313 	} */ *ap;
1314 {
1315 	int error;
1316 	struct vnode *fdvp = ap->a_fdvp;
1317 	struct vnode *fvp = ap->a_fvp;
1318 	struct vnode *tdvp = ap->a_tdvp;
1319 	struct vnode *tvp = ap->a_tvp;
1320 
1321 	/*
1322 	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
1323 	 * replace the fdvp, release the original one and ref the new one.
1324 	 */
1325 
1326 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1327 		struct union_node *un = VTOUNION(fdvp);
1328 		if (un->un_uppervp == NULLVP) {
1329 			/*
1330 			 * this should never happen in normal
1331 			 * operation but might if there was
1332 			 * a problem creating the top-level shadow
1333 			 * directory.
1334 			 */
1335 			error = EXDEV;
1336 			goto bad;
1337 		}
1338 		fdvp = un->un_uppervp;
1339 		VREF(fdvp);
1340 		vrele(ap->a_fdvp);
1341 	}
1342 
1343 	/*
1344 	 * Figure out what fvp to pass to our upper or lower vnode.  If we
1345 	 * replace the fvp, release the original one and ref the new one.
1346 	 */
1347 
1348 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1349 		struct union_node *un = VTOUNION(fvp);
1350 #if 0
1351 		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
1352 #endif
1353 
1354 		if (un->un_uppervp == NULLVP) {
1355 			switch(fvp->v_type) {
1356 			case VREG:
1357 				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1358 				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread);
1359 				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread);
1360 				if (error)
1361 					goto bad;
1362 				break;
1363 			case VDIR:
1364 				/*
1365 				 * XXX not yet.
1366 				 *
1367 				 * There is only one way to rename a directory
1368 				 * based in the lowervp, and that is to copy
1369 				 * the entire directory hierarchy.  Otherwise
1370 				 * it would not last across a reboot.
1371 				 */
1372 #if 0
1373 				vrele(fvp);
1374 				fvp = NULL;
1375 				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1376 				error = union_mkshadow(um, fdvp,
1377 					    ap->a_fcnp, &un->un_uppervp);
1378 				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread);
1379 				if (un->un_uppervp)
1380 					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread);
1381 				if (error)
1382 					goto bad;
1383 				break;
1384 #endif
1385 			default:
1386 				error = EXDEV;
1387 				goto bad;
1388 			}
1389 		}
1390 
1391 		if (un->un_lowervp != NULLVP)
1392 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1393 		fvp = un->un_uppervp;
1394 		VREF(fvp);
1395 		vrele(ap->a_fvp);
1396 	}
1397 
1398 	/*
1399 	 * Figure out what tdvp (destination directory) to pass to the
1400 	 * lower level.  If we replace it with uppervp, we need to vput the
1401 	 * old one.  The exclusive lock is transfered to what we will pass
1402 	 * down in the VOP_RENAME() and we replace uppervp with a simple
1403 	 * reference.
1404 	 */
1405 
1406 	if (tdvp->v_op == union_vnodeop_p) {
1407 		struct union_node *un = VTOUNION(tdvp);
1408 
1409 		if (un->un_uppervp == NULLVP) {
1410 			/*
1411 			 * This should never happen in normal
1412 			 * operation but might if there was
1413 			 * a problem creating the top-level shadow
1414 			 * directory.
1415 			 */
1416 			error = EXDEV;
1417 			goto bad;
1418 		}
1419 
1420 		/*
1421 		 * New tdvp is a lock and reference on uppervp.
1422 		 * Put away the old tdvp.
1423 		 */
1424 		tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1425 		vput(ap->a_tdvp);
1426 	}
1427 
1428 	/*
1429 	 * Figure out what tvp (destination file) to pass to the
1430 	 * lower level.
1431 	 *
1432 	 * If the uppervp file does not exist, put away the (wrong)
1433 	 * file and change tvp to NULL.
1434 	 */
1435 
1436 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1437 		struct union_node *un = VTOUNION(tvp);
1438 
1439 		tvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1440 		vput(ap->a_tvp);
1441 		/* note: tvp may be NULL */
1442 	}
1443 
1444 	/*
1445 	 * VOP_RENAME() releases/vputs prior to returning, so we have no
1446 	 * cleanup to do.
1447 	 */
1448 
1449 	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1450 
1451 	/*
1452 	 * Error.  We still have to release / vput the various elements.
1453 	 */
1454 
1455 bad:
1456 	vrele(fdvp);
1457 	if (fvp)
1458 		vrele(fvp);
1459 	vput(tdvp);
1460 	if (tvp != NULLVP) {
1461 		if (tvp != tdvp)
1462 			vput(tvp);
1463 		else
1464 			vrele(tvp);
1465 	}
1466 	return (error);
1467 }
1468 
1469 static int
1470 union_mkdir(ap)
1471 	struct vop_mkdir_args /* {
1472 		struct vnode *a_dvp;
1473 		struct vnode **a_vpp;
1474 		struct componentname *a_cnp;
1475 		struct vattr *a_vap;
1476 	} */ *ap;
1477 {
1478 	struct union_node *dun = VTOUNION(ap->a_dvp);
1479 	struct componentname *cnp = ap->a_cnp;
1480 	struct thread *td = cnp->cn_thread;
1481 	struct vnode *upperdvp;
1482 	int error = EROFS;
1483 
1484 	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
1485 		struct vnode *vp;
1486 
1487 		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
1488 		union_unlock_upper(upperdvp, td);
1489 
1490 		if (error == 0) {
1491 			VOP_UNLOCK(vp, 0, td);
1492 			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp)));
1493 			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
1494 				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
1495 			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
1496 		}
1497 	}
1498 	return (error);
1499 }
1500 
1501 static int
1502 union_rmdir(ap)
1503 	struct vop_rmdir_args /* {
1504 		struct vnode *a_dvp;
1505 		struct vnode *a_vp;
1506 		struct componentname *a_cnp;
1507 	} */ *ap;
1508 {
1509 	struct union_node *dun = VTOUNION(ap->a_dvp);
1510 	struct union_node *un = VTOUNION(ap->a_vp);
1511 	struct componentname *cnp = ap->a_cnp;
1512 	struct thread *td = cnp->cn_thread;
1513 	struct vnode *upperdvp;
1514 	struct vnode *uppervp;
1515 	int error;
1516 
1517 	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1518 		panic("union rmdir: null upper vnode");
1519 
1520 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1521 		if (union_dowhiteout(un, cnp->cn_cred, td))
1522 			cnp->cn_flags |= DOWHITEOUT;
1523 		error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
1524 		union_unlock_upper(uppervp, td);
1525 	} else {
1526 		error = union_mkwhiteout(
1527 			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1528 			    dun->un_uppervp, ap->a_cnp, un->un_path);
1529 	}
1530 	union_unlock_upper(upperdvp, td);
1531 	return (error);
1532 }
1533 
1534 /*
1535  *	union_symlink:
1536  *
1537  *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
1538  *	(unused).
1539  */
1540 
1541 static int
1542 union_symlink(ap)
1543 	struct vop_symlink_args /* {
1544 		struct vnode *a_dvp;
1545 		struct vnode **a_vpp;
1546 		struct componentname *a_cnp;
1547 		struct vattr *a_vap;
1548 		char *a_target;
1549 	} */ *ap;
1550 {
1551 	struct union_node *dun = VTOUNION(ap->a_dvp);
1552 	struct componentname *cnp = ap->a_cnp;
1553 	struct thread *td = cnp->cn_thread;
1554 	struct vnode *dvp;
1555 	int error = EROFS;
1556 
1557 	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
1558 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1559 			    ap->a_target);
1560 		union_unlock_upper(dvp, td);
1561 	}
1562 	return (error);
1563 }
1564 
1565 /*
1566  * union_readdir ()works in concert with getdirentries() and
1567  * readdir(3) to provide a list of entries in the unioned
1568  * directories.  getdirentries()  is responsible for walking
1569  * down the union stack.  readdir(3) is responsible for
1570  * eliminating duplicate names from the returned data stream.
1571  */
1572 static int
1573 union_readdir(ap)
1574 	struct vop_readdir_args /* {
1575 		struct vnode *a_vp;
1576 		struct uio *a_uio;
1577 		struct ucred *a_cred;
1578 		int *a_eofflag;
1579 		u_long *a_cookies;
1580 		int a_ncookies;
1581 	} */ *ap;
1582 {
1583 	struct union_node *un = VTOUNION(ap->a_vp);
1584 	struct thread *td = ap->a_uio->uio_td;
1585 	struct vnode *uvp;
1586 	int error = 0;
1587 
1588 	if ((uvp = union_lock_upper(un, td)) != NULLVP) {
1589 		ap->a_vp = uvp;
1590 		error = VCALL(uvp, VOFFSET(vop_readdir), ap);
1591 		union_unlock_upper(uvp, td);
1592 	}
1593 	return(error);
1594 }
1595 
1596 static int
1597 union_readlink(ap)
1598 	struct vop_readlink_args /* {
1599 		struct vnode *a_vp;
1600 		struct uio *a_uio;
1601 		struct ucred *a_cred;
1602 	} */ *ap;
1603 {
1604 	int error;
1605 	struct union_node *un = VTOUNION(ap->a_vp);
1606 	struct uio *uio = ap->a_uio;
1607 	struct thread *td = uio->uio_td;
1608 	struct vnode *vp;
1609 
1610 	vp = union_lock_other(un, td);
1611 	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
1612 
1613 	ap->a_vp = vp;
1614 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1615 	union_unlock_other(vp, td);
1616 
1617 	return (error);
1618 }
1619 
1620 static int
1621 union_getwritemount(ap)
1622 	struct vop_getwritemount_args /* {
1623 		struct vnode *a_vp;
1624 		struct mount **a_mpp;
1625 	} */ *ap;
1626 {
1627 	struct vnode *vp = ap->a_vp;
1628 	struct vnode *uvp = UPPERVP(vp);
1629 
1630 	if (uvp == NULL) {
1631 		VI_LOCK(vp);
1632 		if (vp->v_iflag & VI_FREE) {
1633 			VI_UNLOCK(vp);
1634 			return (EOPNOTSUPP);
1635 		}
1636 		VI_UNLOCK(vp);
1637 		return (EACCES);
1638 	}
1639 	return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp));
1640 }
1641 
1642 /*
1643  *	union_inactive:
1644  *
1645  *	Called with the vnode locked.  We are expected to unlock the vnode.
1646  */
1647 
1648 static int
1649 union_inactive(ap)
1650 	struct vop_inactive_args /* {
1651 		struct vnode *a_vp;
1652 		struct thread *a_td;
1653 	} */ *ap;
1654 {
1655 	struct vnode *vp = ap->a_vp;
1656 	struct thread *td = ap->a_td;
1657 	struct union_node *un = VTOUNION(vp);
1658 	struct vnode **vpp;
1659 
1660 	/*
1661 	 * Do nothing (and _don't_ bypass).
1662 	 * Wait to vrele lowervp until reclaim,
1663 	 * so that until then our union_node is in the
1664 	 * cache and reusable.
1665 	 *
1666 	 */
1667 
1668 	if (un->un_dircache != 0) {
1669 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1670 			vrele(*vpp);
1671 		free (un->un_dircache, M_TEMP);
1672 		un->un_dircache = 0;
1673 	}
1674 
1675 #if 0
1676 	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
1677 		un->un_flags &= ~UN_ULOCK;
1678 		VOP_UNLOCK(un->un_uppervp, 0, td);
1679 	}
1680 #endif
1681 
1682 	VOP_UNLOCK(vp, 0, td);
1683 
1684 	if ((un->un_flags & UN_CACHED) == 0)
1685 		vgone(vp);
1686 
1687 	return (0);
1688 }
1689 
1690 static int
1691 union_reclaim(ap)
1692 	struct vop_reclaim_args /* {
1693 		struct vnode *a_vp;
1694 	} */ *ap;
1695 {
1696 	union_freevp(ap->a_vp);
1697 
1698 	return (0);
1699 }
1700 
1701 /*
1702  * unionvp do not hold a VM object and there is no need to create one for
1703  * upper or lower vp because it is done in the union_open()
1704  */
1705 static int
1706 union_createvobject(ap)
1707 	struct vop_createvobject_args /* {
1708 		struct vnode *vp;
1709 		struct ucred *cred;
1710 		struct thread *td;
1711 	} */ *ap;
1712 {
1713 	struct vnode *vp = ap->a_vp;
1714 
1715 	vp->v_vflag |= VV_OBJBUF;
1716 	return (0);
1717 }
1718 
1719 /*
1720  * We have nothing to destroy and this operation shouldn't be bypassed.
1721  */
1722 static int
1723 union_destroyvobject(ap)
1724 	struct vop_destroyvobject_args /* {
1725 		struct vnode *vp;
1726 	} */ *ap;
1727 {
1728 	struct vnode *vp = ap->a_vp;
1729 
1730 	vp->v_vflag &= ~VV_OBJBUF;
1731 	return (0);
1732 }
1733 
1734 /*
1735  * Get VM object from the upper or lower vp
1736  */
1737 static int
1738 union_getvobject(ap)
1739 	struct vop_getvobject_args /* {
1740 		struct vnode *vp;
1741 		struct vm_object **objpp;
1742 	} */ *ap;
1743 {
1744 	struct vnode *ovp = OTHERVP(ap->a_vp);
1745 
1746 	if (ovp == NULL)
1747 		return EINVAL;
1748 	return (VOP_GETVOBJECT(ovp, ap->a_objpp));
1749 }
1750 
1751 static int
1752 union_print(ap)
1753 	struct vop_print_args /* {
1754 		struct vnode *a_vp;
1755 	} */ *ap;
1756 {
1757 	struct vnode *vp = ap->a_vp;
1758 
1759 	printf("\tvp=%p, uppervp=%p, lowervp=%p\n",
1760 	       vp, UPPERVP(vp), LOWERVP(vp));
1761 	if (UPPERVP(vp) != NULLVP)
1762 		vprint("union: upper", UPPERVP(vp));
1763 	if (LOWERVP(vp) != NULLVP)
1764 		vprint("union: lower", LOWERVP(vp));
1765 
1766 	return (0);
1767 }
1768 
1769 static int
1770 union_pathconf(ap)
1771 	struct vop_pathconf_args /* {
1772 		struct vnode *a_vp;
1773 		int a_name;
1774 		int *a_retval;
1775 	} */ *ap;
1776 {
1777 	int error;
1778 	struct thread *td = curthread;		/* XXX */
1779 	struct union_node *un = VTOUNION(ap->a_vp);
1780 	struct vnode *vp;
1781 
1782 	vp = union_lock_other(un, td);
1783 	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
1784 
1785 	ap->a_vp = vp;
1786 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1787 	union_unlock_other(vp, td);
1788 
1789 	return (error);
1790 }
1791 
1792 static int
1793 union_advlock(ap)
1794 	struct vop_advlock_args /* {
1795 		struct vnode *a_vp;
1796 		caddr_t  a_id;
1797 		int  a_op;
1798 		struct flock *a_fl;
1799 		int  a_flags;
1800 	} */ *ap;
1801 {
1802 	register struct vnode *ovp = OTHERVP(ap->a_vp);
1803 
1804 	ap->a_vp = ovp;
1805 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1806 }
1807 
1808 
1809 /*
1810  * XXX - vop_strategy must be hand coded because it has no
1811  * YYY - and it is not coherent with anything
1812  *
1813  * vnode in its arguments.
1814  * This goes away with a merged VM/buffer cache.
1815  */
1816 static int
1817 union_strategy(ap)
1818 	struct vop_strategy_args /* {
1819 		struct vnode *a_vp;
1820 		struct buf *a_bp;
1821 	} */ *ap;
1822 {
1823 	struct buf *bp = ap->a_bp;
1824 	struct vnode *othervp = OTHERVP(bp->b_vp);
1825 
1826 #ifdef DIAGNOSTIC
1827 	if (othervp == NULLVP)
1828 		panic("union_strategy: nil vp");
1829 	if ((bp->b_iocmd == BIO_WRITE) &&
1830 	    (othervp == LOWERVP(bp->b_vp)))
1831 		panic("union_strategy: writing to lowervp");
1832 #endif
1833 	return (VOP_STRATEGY(othervp, bp));
1834 }
1835 
1836 /*
1837  * Global vfs data structures
1838  */
1839 vop_t **union_vnodeop_p;
1840 static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
1841 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
1842 	{ &vop_access_desc,		(vop_t *) union_access },
1843 	{ &vop_advlock_desc,		(vop_t *) union_advlock },
1844 	{ &vop_bmap_desc,		(vop_t *) vop_eopnotsupp },
1845 	{ &vop_close_desc,		(vop_t *) union_close },
1846 	{ &vop_create_desc,		(vop_t *) union_create },
1847 	{ &vop_createvobject_desc,	(vop_t *) union_createvobject },
1848 	{ &vop_destroyvobject_desc,	(vop_t *) union_destroyvobject },
1849 	{ &vop_fsync_desc,		(vop_t *) union_fsync },
1850 	{ &vop_getattr_desc,		(vop_t *) union_getattr },
1851 	{ &vop_getvobject_desc,		(vop_t *) union_getvobject },
1852 	{ &vop_inactive_desc,		(vop_t *) union_inactive },
1853 	{ &vop_ioctl_desc,		(vop_t *) union_ioctl },
1854 	{ &vop_lease_desc,		(vop_t *) union_lease },
1855 	{ &vop_link_desc,		(vop_t *) union_link },
1856 	{ &vop_lookup_desc,		(vop_t *) union_lookup },
1857 	{ &vop_mkdir_desc,		(vop_t *) union_mkdir },
1858 	{ &vop_mknod_desc,		(vop_t *) union_mknod },
1859 	{ &vop_open_desc,		(vop_t *) union_open },
1860 	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
1861 	{ &vop_poll_desc,		(vop_t *) union_poll },
1862 	{ &vop_print_desc,		(vop_t *) union_print },
1863 	{ &vop_read_desc,		(vop_t *) union_read },
1864 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
1865 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
1866 	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
1867 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
1868 	{ &vop_remove_desc,		(vop_t *) union_remove },
1869 	{ &vop_rename_desc,		(vop_t *) union_rename },
1870 	{ &vop_revoke_desc,		(vop_t *) union_revoke },
1871 	{ &vop_rmdir_desc,		(vop_t *) union_rmdir },
1872 	{ &vop_setattr_desc,		(vop_t *) union_setattr },
1873 	{ &vop_strategy_desc,		(vop_t *) union_strategy },
1874 	{ &vop_symlink_desc,		(vop_t *) union_symlink },
1875 	{ &vop_whiteout_desc,		(vop_t *) union_whiteout },
1876 	{ &vop_write_desc,		(vop_t *) union_write },
1877 	{ NULL, NULL }
1878 };
1879 static struct vnodeopv_desc union_vnodeop_opv_desc =
1880 	{ &union_vnodeop_p, union_vnodeop_entries };
1881 
1882 VNODEOP_SET(union_vnodeop_opv_desc);
1883