xref: /freebsd/sys/fs/unionfs/union_vnops.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
3  * Copyright (c) 1992, 1993, 1994, 1995
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Jan-Simon Pendry.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
38  * $FreeBSD$
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/fcntl.h>
44 #include <sys/stat.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/malloc.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/lock.h>
53 #include <sys/sysctl.h>
54 #include <fs/unionfs/union.h>
55 
56 #include <vm/vm.h>
57 #include <vm/vnode_pager.h>
58 
59 #include <vm/vm_page.h>
60 #include <vm/vm_object.h>
61 
62 int uniondebug = 0;
63 
64 #if UDEBUG_ENABLED
65 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
66 #else
67 SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
68 #endif
69 
70 static int	union_access(struct vop_access_args *ap);
71 static int	union_advlock(struct vop_advlock_args *ap);
72 static int	union_close(struct vop_close_args *ap);
73 static int	union_create(struct vop_create_args *ap);
74 static int	union_createvobject(struct vop_createvobject_args *ap);
75 static int	union_destroyvobject(struct vop_destroyvobject_args *ap);
76 static int	union_fsync(struct vop_fsync_args *ap);
77 static int	union_getattr(struct vop_getattr_args *ap);
78 static int	union_getvobject(struct vop_getvobject_args *ap);
79 static int	union_inactive(struct vop_inactive_args *ap);
80 static int	union_ioctl(struct vop_ioctl_args *ap);
81 static int	union_lease(struct vop_lease_args *ap);
82 static int	union_link(struct vop_link_args *ap);
83 static int	union_lookup(struct vop_lookup_args *ap);
84 static int	union_lookup1(struct vnode *udvp, struct vnode **dvp,
85 				   struct vnode **vpp,
86 				   struct componentname *cnp);
87 static int	union_mkdir(struct vop_mkdir_args *ap);
88 static int	union_mknod(struct vop_mknod_args *ap);
89 static int	union_open(struct vop_open_args *ap);
90 static int	union_pathconf(struct vop_pathconf_args *ap);
91 static int	union_print(struct vop_print_args *ap);
92 static int	union_read(struct vop_read_args *ap);
93 static int	union_readdir(struct vop_readdir_args *ap);
94 static int	union_readlink(struct vop_readlink_args *ap);
95 static int	union_getwritemount(struct vop_getwritemount_args *ap);
96 static int	union_reclaim(struct vop_reclaim_args *ap);
97 static int	union_remove(struct vop_remove_args *ap);
98 static int	union_rename(struct vop_rename_args *ap);
99 static int	union_revoke(struct vop_revoke_args *ap);
100 static int	union_rmdir(struct vop_rmdir_args *ap);
101 static int	union_poll(struct vop_poll_args *ap);
102 static int	union_setattr(struct vop_setattr_args *ap);
103 static int	union_strategy(struct vop_strategy_args *ap);
104 static int	union_symlink(struct vop_symlink_args *ap);
105 static int	union_whiteout(struct vop_whiteout_args *ap);
106 static int	union_write(struct vop_read_args *ap);
107 
108 static __inline
109 struct vnode *
110 union_lock_upper(struct union_node *un, struct thread *td)
111 {
112 	struct vnode *uppervp;
113 
114 	if ((uppervp = un->un_uppervp) != NULL) {
115 		VREF(uppervp);
116 		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
117 	}
118 	KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0"));
119 	return(uppervp);
120 }
121 
122 static __inline
123 void
124 union_unlock_upper(struct vnode *uppervp, struct thread *td)
125 {
126 	vput(uppervp);
127 }
128 
129 static __inline
130 struct vnode *
131 union_lock_other(struct union_node *un, struct thread *td)
132 {
133 	struct vnode *vp;
134 
135 	if (un->un_uppervp != NULL) {
136 		vp = union_lock_upper(un, td);
137 	} else if ((vp = un->un_lowervp) != NULL) {
138 		VREF(vp);
139 		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
140 	}
141 	return(vp);
142 }
143 
144 static __inline
145 void
146 union_unlock_other(struct vnode *vp, struct thread *td)
147 {
148 	vput(vp);
149 }
150 
151 /*
152  *	union_lookup:
153  *
154  *	udvp	must be exclusively locked on call and will remain
155  *		exclusively locked on return.  This is the mount point
156  *		for our filesystem.
157  *
158  *	dvp	Our base directory, locked and referenced.
159  *		The passed dvp will be dereferenced and unlocked on return
160  *		and a new dvp will be returned which is locked and
161  *		referenced in the same variable.
162  *
163  *	vpp	is filled in with the result if no error occured,
164  *		locked and ref'd.
165  *
166  *		If an error is returned, *vpp is set to NULLVP.  If no
167  *		error occurs, *vpp is returned with a reference and an
168  *		exclusive lock.
169  */
170 
171 static int
172 union_lookup1(udvp, pdvp, vpp, cnp)
173 	struct vnode *udvp;
174 	struct vnode **pdvp;
175 	struct vnode **vpp;
176 	struct componentname *cnp;
177 {
178 	int error;
179 	struct thread *td = cnp->cn_thread;
180 	struct vnode *dvp = *pdvp;
181 	struct vnode *tdvp;
182 	struct mount *mp;
183 
184 	/*
185 	 * If stepping up the directory tree, check for going
186 	 * back across the mount point, in which case do what
187 	 * lookup would do by stepping back down the mount
188 	 * hierarchy.
189 	 */
190 	if (cnp->cn_flags & ISDOTDOT) {
191 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
192 			/*
193 			 * Don't do the NOCROSSMOUNT check
194 			 * at this level.  By definition,
195 			 * union fs deals with namespaces, not
196 			 * filesystems.
197 			 */
198 			tdvp = dvp;
199 			dvp = dvp->v_mount->mnt_vnodecovered;
200 			VREF(dvp);
201 			vput(tdvp);
202 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
203 		}
204 	}
205 
206 	/*
207 	 * Set return dvp to be the upperdvp 'parent directory.
208 	 */
209 	*pdvp = dvp;
210 
211 	/*
212 	 * If the VOP_LOOKUP() call generates an error, tdvp is invalid and
213 	 * no changes will have been made to dvp, so we are set to return.
214 	 */
215 
216         error = VOP_LOOKUP(dvp, &tdvp, cnp);
217 	if (error) {
218 		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
219 		*vpp = NULL;
220 		return (error);
221 	}
222 
223 	/*
224 	 * The parent directory will have been unlocked, unless lookup
225 	 * found the last component or if dvp == tdvp (tdvp must be locked).
226 	 *
227 	 * We want our dvp to remain locked and ref'd.  We also want tdvp
228 	 * to remain locked and ref'd.
229 	 */
230 	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
231 
232 	if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0)
233 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
234 
235 	/*
236 	 * Lastly check if the current node is a mount point in
237 	 * which case walk up the mount hierarchy making sure not to
238 	 * bump into the root of the mount tree (ie. dvp != udvp).
239 	 *
240 	 * We use dvp as a temporary variable here, it is no longer related
241 	 * to the dvp above.  However, we have to ensure that both *pdvp and
242 	 * tdvp are locked on return.
243 	 */
244 
245 	dvp = tdvp;
246 	while (
247 	    dvp != udvp &&
248 	    (dvp->v_type == VDIR) &&
249 	    (mp = dvp->v_mountedhere)
250 	) {
251 		int relock_pdvp = 0;
252 
253 		if (vfs_busy(mp, 0, 0, td))
254 			continue;
255 
256 		if (dvp == *pdvp)
257 			relock_pdvp = 1;
258 		vput(dvp);
259 		dvp = NULL;
260 		error = VFS_ROOT(mp, &dvp);
261 
262 		vfs_unbusy(mp, td);
263 
264 		if (relock_pdvp)
265 			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td);
266 
267 		if (error) {
268 			*vpp = NULL;
269 			return (error);
270 		}
271 	}
272 	*vpp = dvp;
273 	return (0);
274 }
275 
276 static int
277 union_lookup(ap)
278 	struct vop_lookup_args /* {
279 		struct vnodeop_desc *a_desc;
280 		struct vnode *a_dvp;
281 		struct vnode **a_vpp;
282 		struct componentname *a_cnp;
283 	} */ *ap;
284 {
285 	int error;
286 	int uerror, lerror;
287 	struct vnode *uppervp, *lowervp;
288 	struct vnode *upperdvp, *lowerdvp;
289 	struct vnode *dvp = ap->a_dvp;		/* starting dir */
290 	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
291 	struct componentname *cnp = ap->a_cnp;
292 	struct thread *td = cnp->cn_thread;
293 	int lockparent = cnp->cn_flags & LOCKPARENT;
294 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
295 	struct ucred *saved_cred = NULL;
296 	int iswhiteout;
297 	struct vattr va;
298 
299 	*ap->a_vpp = NULLVP;
300 
301 	/*
302 	 * Disallow write attempts to the filesystem mounted read-only.
303 	 */
304 	if ((cnp->cn_flags & ISLASTCN) &&
305 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
306 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
307 		return (EROFS);
308 	}
309 
310 	/*
311 	 * For any lookups we do, always return with the parent locked.
312 	 */
313 	cnp->cn_flags |= LOCKPARENT;
314 
315 	lowerdvp = dun->un_lowervp;
316 	uppervp = NULLVP;
317 	lowervp = NULLVP;
318 	iswhiteout = 0;
319 
320 	uerror = ENOENT;
321 	lerror = ENOENT;
322 
323 	/*
324 	 * Get a private lock on uppervp and a reference, effectively
325 	 * taking it out of the union_node's control.
326 	 *
327 	 * We must lock upperdvp while holding our lock on dvp
328 	 * to avoid a deadlock.
329 	 */
330 	upperdvp = union_lock_upper(dun, td);
331 
332 	/*
333 	 * Do the lookup in the upper level.
334 	 * If that level consumes additional pathnames,
335 	 * then assume that something special is going
336 	 * on and just return that vnode.
337 	 */
338 	if (upperdvp != NULLVP) {
339 		/*
340 		 * We do not have to worry about the DOTDOT case, we've
341 		 * already unlocked dvp.
342 		 */
343 		UDEBUG(("A %p\n", upperdvp));
344 
345 		/*
346 		 * Do the lookup.   We must supply a locked and referenced
347 		 * upperdvp to the function and will get a new locked and
348 		 * referenced upperdvp back, with the old having been
349 		 * dereferenced.
350 		 *
351 		 * If an error is returned, uppervp will be NULLVP.  If no
352 		 * error occurs, uppervp will be the locked and referenced.
353 		 * Return vnode, or possibly NULL, depending on what is being
354 		 * requested.  It is possible that the returned uppervp
355 		 * will be the same as upperdvp.
356 		 */
357 		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
358 		UDEBUG((
359 		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
360 		    uerror,
361 		    upperdvp,
362 		    vrefcnt(upperdvp),
363 		    VOP_ISLOCKED(upperdvp, NULL),
364 		    uppervp,
365 		    (uppervp ? vrefcnt(uppervp) : -99),
366 		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
367 		));
368 
369 		/*
370 		 * Disallow write attempts to the filesystem mounted read-only.
371 		 */
372 		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
373 		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
374 		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
375 			error = EROFS;
376 			goto out;
377 		}
378 
379 		/*
380 		 * Special case: If cn_consume != 0 then skip out.  The result
381 		 * of the lookup is transfered to our return variable.  If
382 		 * an error occured we have to throw away the results.
383 		 */
384 
385 		if (cnp->cn_consume != 0) {
386 			if ((error = uerror) == 0) {
387 				*ap->a_vpp = uppervp;
388 				uppervp = NULL;
389 			}
390 			goto out;
391 		}
392 
393 		/*
394 		 * Calculate whiteout, fall through.
395 		 */
396 
397 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
398 			if (cnp->cn_flags & ISWHITEOUT) {
399 				iswhiteout = 1;
400 			} else if (lowerdvp != NULLVP) {
401 				int terror;
402 
403 				terror = VOP_GETATTR(upperdvp, &va,
404 					cnp->cn_cred, cnp->cn_thread);
405 				if (terror == 0 && (va.va_flags & OPAQUE))
406 					iswhiteout = 1;
407 			}
408 		}
409 	}
410 
411 	/*
412 	 * In a similar way to the upper layer, do the lookup
413 	 * in the lower layer.   This time, if there is some
414 	 * component magic going on, then vput whatever we got
415 	 * back from the upper layer and return the lower vnode
416 	 * instead.
417 	 */
418 
419 	if (lowerdvp != NULLVP && !iswhiteout) {
420 		int nameiop;
421 
422 		UDEBUG(("B %p\n", lowerdvp));
423 
424 		/*
425 		 * Force only LOOKUPs on the lower node, since
426 		 * we won't be making changes to it anyway.
427 		 */
428 		nameiop = cnp->cn_nameiop;
429 		cnp->cn_nameiop = LOOKUP;
430 		if (um->um_op == UNMNT_BELOW) {
431 			saved_cred = cnp->cn_cred;
432 			cnp->cn_cred = um->um_cred;
433 		}
434 
435 		/*
436 		 * We shouldn't have to worry about locking interactions
437 		 * between the lower layer and our union layer (w.r.t.
438 		 * `..' processing) because we don't futz with lowervp
439 		 * locks in the union-node instantiation code path.
440 		 *
441 		 * union_lookup1() requires lowervp to be locked on entry,
442 		 * and it will be unlocked on return.  The ref count will
443 		 * not change.  On return lowervp doesn't represent anything
444 		 * to us so we NULL it out.
445 		 */
446 		VREF(lowerdvp);
447 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td);
448 		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
449 		if (lowerdvp == lowervp)
450 			vrele(lowerdvp);
451 		else
452 			vput(lowerdvp);
453 		lowerdvp = NULL;	/* lowerdvp invalid after vput */
454 
455 		if (um->um_op == UNMNT_BELOW)
456 			cnp->cn_cred = saved_cred;
457 		cnp->cn_nameiop = nameiop;
458 
459 		if (cnp->cn_consume != 0 || lerror == EACCES) {
460 			if ((error = lerror) == 0) {
461 				*ap->a_vpp = lowervp;
462 				lowervp = NULL;
463 			}
464 			goto out;
465 		}
466 	} else {
467 		UDEBUG(("C %p\n", lowerdvp));
468 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
469 			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
470 				VREF(lowervp);
471 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td);
472 				lerror = 0;
473 			}
474 		}
475 	}
476 
477 	/*
478 	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
479 	 *
480 	 * 1. If both layers returned an error, select the upper layer.
481 	 *
482 	 * 2. If the upper layer failed and the bottom layer succeeded,
483 	 *    two subcases occur:
484 	 *
485 	 *	a.  The bottom vnode is not a directory, in which case
486 	 *	    just return a new union vnode referencing an
487 	 *	    empty top layer and the existing bottom layer.
488 	 *
489 	 *	b.  The bottom vnode is a directory, in which case
490 	 *	    create a new directory in the top layer and
491 	 *	    and fall through to case 3.
492 	 *
493 	 * 3. If the top layer succeeded, then return a new union
494 	 *    vnode referencing whatever the new top layer and
495 	 *    whatever the bottom layer returned.
496 	 */
497 
498 	/* case 1. */
499 	if ((uerror != 0) && (lerror != 0)) {
500 		error = uerror;
501 		goto out;
502 	}
503 
504 	/* case 2. */
505 	if (uerror != 0 /* && (lerror == 0) */ ) {
506 		if (lowervp->v_type == VDIR) { /* case 2b. */
507 			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
508 			/*
509 			 * Oops, uppervp has a problem, we may have to shadow.
510 			 */
511 			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
512 			if (uerror) {
513 				error = uerror;
514 				goto out;
515 			}
516 		}
517 	}
518 
519 	/*
520 	 * Must call union_allocvp() with both the upper and lower vnodes
521 	 * referenced and the upper vnode locked.   ap->a_vpp is returned
522 	 * referenced and locked.  lowervp, uppervp, and upperdvp are
523 	 * absorbed by union_allocvp() whether it succeeds or fails.
524 	 *
525 	 * upperdvp is the parent directory of uppervp which may be
526 	 * different, depending on the path, from dvp->un_uppervp.  That's
527 	 * why it is a separate argument.  Note that it must be unlocked.
528 	 *
529 	 * dvp must be locked on entry to the call and will be locked on
530 	 * return.
531 	 */
532 
533 	if (uppervp && uppervp != upperdvp)
534 		VOP_UNLOCK(uppervp, 0, td);
535 	if (lowervp)
536 		VOP_UNLOCK(lowervp, 0, td);
537 	if (upperdvp)
538 		VOP_UNLOCK(upperdvp, 0, td);
539 
540 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
541 			      uppervp, lowervp, 1);
542 
543 	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99));
544 
545 	uppervp = NULL;
546 	upperdvp = NULL;
547 	lowervp = NULL;
548 
549 	/*
550 	 *	Termination Code
551 	 *
552 	 *	- put away any extra junk laying around.  Note that lowervp
553 	 *	  (if not NULL) will never be the same as *ap->a_vp and
554 	 *	  neither will uppervp, because when we set that state we
555 	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
556 	 *	  may match uppervp or *ap->a_vpp.
557 	 *
558 	 *	- relock/unlock dvp if appropriate.
559 	 */
560 
561 out:
562 	if (upperdvp) {
563 		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
564 			vrele(upperdvp);
565 		else
566 			vput(upperdvp);
567 	}
568 
569 	if (uppervp)
570 		vput(uppervp);
571 
572 	if (lowervp)
573 		vput(lowervp);
574 
575 	/*
576 	 * Restore LOCKPARENT state
577 	 */
578 
579 	if (!lockparent)
580 		cnp->cn_flags &= ~LOCKPARENT;
581 
582 	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
583 		((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99),
584 		lowervp, uppervp));
585 
586 	/*
587 	 * dvp lock state, determine whether to relock dvp.  dvp is expected
588 	 * to be locked on return if:
589 	 *
590 	 *	- there was an error (except not EJUSTRETURN), or
591 	 *	- we hit the last component and lockparent is true
592 	 *
593 	 * dvp_is_locked is the current state of the dvp lock, not counting
594 	 * the possibility that *ap->a_vpp == dvp (in which case it is locked
595 	 * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
596 	 */
597 
598 	if (*ap->a_vpp != dvp) {
599 		if ((error == 0 || error == EJUSTRETURN) &&
600 		    (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) {
601 			VOP_UNLOCK(dvp, 0, td);
602 		}
603 	}
604 
605 	/*
606 	 * Diagnostics
607 	 */
608 
609 #ifdef DIAGNOSTIC
610 	if (cnp->cn_namelen == 1 &&
611 	    cnp->cn_nameptr[0] == '.' &&
612 	    *ap->a_vpp != dvp) {
613 		panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
614 	}
615 #endif
616 
617 	return (error);
618 }
619 
620 /*
621  * 	union_create:
622  *
623  * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
624  * locked if no error occurs, otherwise it is garbage.
625  */
626 
627 static int
628 union_create(ap)
629 	struct vop_create_args /* {
630 		struct vnode *a_dvp;
631 		struct vnode **a_vpp;
632 		struct componentname *a_cnp;
633 		struct vattr *a_vap;
634 	} */ *ap;
635 {
636 	struct union_node *dun = VTOUNION(ap->a_dvp);
637 	struct componentname *cnp = ap->a_cnp;
638 	struct thread *td = cnp->cn_thread;
639 	struct vnode *dvp;
640 	int error = EROFS;
641 
642 	if ((dvp = union_lock_upper(dun, td)) != NULL) {
643 		struct vnode *vp;
644 		struct mount *mp;
645 
646 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
647 		if (error == 0) {
648 			mp = ap->a_dvp->v_mount;
649 			VOP_UNLOCK(vp, 0, td);
650 			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp)));
651 			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
652 				cnp, vp, NULLVP, 1);
653 			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
654 		}
655 		union_unlock_upper(dvp, td);
656 	}
657 	return (error);
658 }
659 
660 static int
661 union_whiteout(ap)
662 	struct vop_whiteout_args /* {
663 		struct vnode *a_dvp;
664 		struct componentname *a_cnp;
665 		int a_flags;
666 	} */ *ap;
667 {
668 	struct union_node *un = VTOUNION(ap->a_dvp);
669 	struct componentname *cnp = ap->a_cnp;
670 	struct vnode *uppervp;
671 	int error;
672 
673 	switch (ap->a_flags) {
674 	case CREATE:
675 	case DELETE:
676 		uppervp = union_lock_upper(un, cnp->cn_thread);
677 		if (uppervp != NULLVP) {
678 			error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
679 			union_unlock_upper(uppervp, cnp->cn_thread);
680 		} else
681 			error = EOPNOTSUPP;
682 		break;
683 	case LOOKUP:
684 		error = EOPNOTSUPP;
685 		break;
686 	default:
687 		panic("union_whiteout: unknown op");
688 	}
689 	return (error);
690 }
691 
692 /*
693  * 	union_mknod:
694  *
695  *	a_dvp is locked on entry and should remain locked on return.
696  *	a_vpp is garbagre whether an error occurs or not.
697  */
698 
699 static int
700 union_mknod(ap)
701 	struct vop_mknod_args /* {
702 		struct vnode *a_dvp;
703 		struct vnode **a_vpp;
704 		struct componentname *a_cnp;
705 		struct vattr *a_vap;
706 	} */ *ap;
707 {
708 	struct union_node *dun = VTOUNION(ap->a_dvp);
709 	struct componentname *cnp = ap->a_cnp;
710 	struct vnode *dvp;
711 	int error = EROFS;
712 
713 	if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) {
714 		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
715 		union_unlock_upper(dvp, cnp->cn_thread);
716 	}
717 	return (error);
718 }
719 
720 /*
721  *	union_open:
722  *
723  *	run open VOP.  When opening the underlying vnode we have to mimic
724  *	vn_open().  What we *really* need to do to avoid screwups if the
725  *	open semantics change is to call vn_open().  For example, ufs blows
726  *	up if you open a file but do not vmio it prior to writing.
727  */
728 
729 static int
730 union_open(ap)
731 	struct vop_open_args /* {
732 		struct vnodeop_desc *a_desc;
733 		struct vnode *a_vp;
734 		int a_mode;
735 		struct ucred *a_cred;
736 		struct thread *a_td;
737 	} */ *ap;
738 {
739 	struct union_node *un = VTOUNION(ap->a_vp);
740 	struct vnode *tvp;
741 	int mode = ap->a_mode;
742 	struct ucred *cred = ap->a_cred;
743 	struct thread *td = ap->a_td;
744 	int error = 0;
745 	int tvpisupper = 1;
746 
747 	/*
748 	 * If there is an existing upper vp then simply open that.
749 	 * The upper vp takes precedence over the lower vp.  When opening
750 	 * a lower vp for writing copy it to the uppervp and then open the
751 	 * uppervp.
752 	 *
753 	 * At the end of this section tvp will be left locked.
754 	 */
755 	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
756 		/*
757 		 * If the lower vnode is being opened for writing, then
758 		 * copy the file contents to the upper vnode and open that,
759 		 * otherwise can simply open the lower vnode.
760 		 */
761 		tvp = un->un_lowervp;
762 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
763 			int docopy = !(mode & O_TRUNC);
764 			error = union_copyup(un, docopy, cred, td);
765 			tvp = union_lock_upper(un, td);
766 		} else {
767 			un->un_openl++;
768 			VREF(tvp);
769 			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
770 			tvpisupper = 0;
771 		}
772 	}
773 
774 	/*
775 	 * We are holding the correct vnode, open it.
776 	 */
777 
778 	if (error == 0)
779 		error = VOP_OPEN(tvp, mode, cred, td, -1);
780 
781 	/*
782 	 * This is absolutely necessary or UFS will blow up.
783 	 */
784         if (error == 0 && vn_canvmio(tvp) == TRUE) {
785                 error = vfs_object_create(tvp, td, cred);
786         }
787 
788 	/*
789 	 * Release any locks held.
790 	 */
791 	if (tvpisupper) {
792 		if (tvp)
793 			union_unlock_upper(tvp, td);
794 	} else {
795 		vput(tvp);
796 	}
797 	return (error);
798 }
799 
800 /*
801  *	union_close:
802  *
803  *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
804  *	the case we do not change it.
805  */
806 
807 static int
808 union_close(ap)
809 	struct vop_close_args /* {
810 		struct vnode *a_vp;
811 		int  a_fflag;
812 		struct ucred *a_cred;
813 		struct thread *a_td;
814 	} */ *ap;
815 {
816 	struct union_node *un = VTOUNION(ap->a_vp);
817 	struct vnode *vp;
818 
819 	if ((vp = un->un_uppervp) == NULLVP) {
820 #ifdef UNION_DIAGNOSTIC
821 		if (un->un_openl <= 0)
822 			panic("union: un_openl cnt");
823 #endif
824 		--un->un_openl;
825 		vp = un->un_lowervp;
826 	}
827 	ap->a_vp = vp;
828 	return (VCALL(vp, VOFFSET(vop_close), ap));
829 }
830 
831 /*
832  * Check access permission on the union vnode.
833  * The access check being enforced is to check
834  * against both the underlying vnode, and any
835  * copied vnode.  This ensures that no additional
836  * file permissions are given away simply because
837  * the user caused an implicit file copy.
838  */
839 static int
840 union_access(ap)
841 	struct vop_access_args /* {
842 		struct vnodeop_desc *a_desc;
843 		struct vnode *a_vp;
844 		int a_mode;
845 		struct ucred *a_cred;
846 		struct thread *a_td;
847 	} */ *ap;
848 {
849 	struct union_node *un = VTOUNION(ap->a_vp);
850 	struct thread *td = ap->a_td;
851 	int error = EACCES;
852 	struct vnode *vp;
853 
854 	/*
855 	 * Disallow write attempts on filesystems mounted read-only.
856 	 */
857 	if ((ap->a_mode & VWRITE) &&
858 	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
859 		switch (ap->a_vp->v_type) {
860 		case VREG:
861 		case VDIR:
862 		case VLNK:
863 			return (EROFS);
864 		default:
865 			break;
866 		}
867 	}
868 
869 	if ((vp = union_lock_upper(un, td)) != NULLVP) {
870 		ap->a_vp = vp;
871 		error = VCALL(vp, VOFFSET(vop_access), ap);
872 		union_unlock_upper(vp, td);
873 		return(error);
874 	}
875 
876 	if ((vp = un->un_lowervp) != NULLVP) {
877 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
878 		ap->a_vp = vp;
879 
880 		/*
881 		 * Remove VWRITE from a_mode if our mount point is RW, because
882 		 * we want to allow writes and lowervp may be read-only.
883 		 */
884 		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
885 			ap->a_mode &= ~VWRITE;
886 
887 		error = VCALL(vp, VOFFSET(vop_access), ap);
888 		if (error == 0) {
889 			struct union_mount *um;
890 
891 			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
892 
893 			if (um->um_op == UNMNT_BELOW) {
894 				ap->a_cred = um->um_cred;
895 				error = VCALL(vp, VOFFSET(vop_access), ap);
896 			}
897 		}
898 		VOP_UNLOCK(vp, 0, td);
899 	}
900 	return(error);
901 }
902 
903 /*
904  * We handle getattr only to change the fsid and
905  * track object sizes
906  *
907  * It's not clear whether VOP_GETATTR is to be
908  * called with the vnode locked or not.  stat() calls
909  * it with (vp) locked, and fstat() calls it with
910  * (vp) unlocked.
911  *
912  * Because of this we cannot use our normal locking functions
913  * if we do not intend to lock the main a_vp node.  At the moment
914  * we are running without any specific locking at all, but beware
915  * to any programmer that care must be taken if locking is added
916  * to this function.
917  */
918 
919 static int
920 union_getattr(ap)
921 	struct vop_getattr_args /* {
922 		struct vnode *a_vp;
923 		struct vattr *a_vap;
924 		struct ucred *a_cred;
925 		struct thread *a_td;
926 	} */ *ap;
927 {
928 	int error;
929 	struct union_node *un = VTOUNION(ap->a_vp);
930 	struct vnode *vp;
931 	struct vattr *vap;
932 	struct vattr va;
933 
934 	/*
935 	 * Some programs walk the filesystem hierarchy by counting
936 	 * links to directories to avoid stat'ing all the time.
937 	 * This means the link count on directories needs to be "correct".
938 	 * The only way to do that is to call getattr on both layers
939 	 * and fix up the link count.  The link count will not necessarily
940 	 * be accurate but will be large enough to defeat the tree walkers.
941 	 */
942 
943 	vap = ap->a_vap;
944 
945 	if ((vp = un->un_uppervp) != NULLVP) {
946 		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
947 		if (error)
948 			return (error);
949 		/* XXX isn't this dangerous without a lock? */
950 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
951 	}
952 
953 	if (vp == NULLVP) {
954 		vp = un->un_lowervp;
955 	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
956 		vp = un->un_lowervp;
957 		vap = &va;
958 	} else {
959 		vp = NULLVP;
960 	}
961 
962 	if (vp != NULLVP) {
963 		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
964 		if (error)
965 			return (error);
966 		/* XXX isn't this dangerous without a lock? */
967 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
968 	}
969 
970 	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
971 		ap->a_vap->va_nlink += vap->va_nlink;
972 	return (0);
973 }
974 
975 static int
976 union_setattr(ap)
977 	struct vop_setattr_args /* {
978 		struct vnode *a_vp;
979 		struct vattr *a_vap;
980 		struct ucred *a_cred;
981 		struct thread *a_td;
982 	} */ *ap;
983 {
984 	struct union_node *un = VTOUNION(ap->a_vp);
985 	struct thread *td = ap->a_td;
986 	struct vattr *vap = ap->a_vap;
987 	struct vnode *uppervp;
988 	int error;
989 
990 	/*
991 	 * Disallow write attempts on filesystems mounted read-only.
992 	 */
993 	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
994 	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
995 	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
996 	     vap->va_mtime.tv_sec != VNOVAL ||
997 	     vap->va_mode != (mode_t)VNOVAL)) {
998 		return (EROFS);
999 	}
1000 
1001 	/*
1002 	 * Handle case of truncating lower object to zero size
1003 	 * by creating a zero length upper object.  This is to
1004 	 * handle the case of open with O_TRUNC and O_CREAT.
1005 	 */
1006 	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
1007 		error = union_copyup(un, (ap->a_vap->va_size != 0),
1008 			    ap->a_cred, ap->a_td);
1009 		if (error)
1010 			return (error);
1011 	}
1012 
1013 	/*
1014 	 * Try to set attributes in upper layer,
1015 	 * otherwise return read-only filesystem error.
1016 	 */
1017 	error = EROFS;
1018 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1019 		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
1020 					ap->a_cred, ap->a_td);
1021 		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
1022 			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
1023 		union_unlock_upper(uppervp, td);
1024 	}
1025 	return (error);
1026 }
1027 
1028 static int
1029 union_read(ap)
1030 	struct vop_read_args /* {
1031 		struct vnode *a_vp;
1032 		struct uio *a_uio;
1033 		int  a_ioflag;
1034 		struct ucred *a_cred;
1035 	} */ *ap;
1036 {
1037 	struct union_node *un = VTOUNION(ap->a_vp);
1038 	struct thread *td = ap->a_uio->uio_td;
1039 	struct vnode *uvp;
1040 	int error;
1041 
1042 	uvp = union_lock_other(un, td);
1043 	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
1044 
1045 	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1046 	union_unlock_other(uvp, td);
1047 
1048 	/*
1049 	 * XXX
1050 	 * Perhaps the size of the underlying object has changed under
1051 	 * our feet.  Take advantage of the offset information present
1052 	 * in the uio structure.
1053 	 */
1054 	if (error == 0) {
1055 		struct union_node *un = VTOUNION(ap->a_vp);
1056 		off_t cur = ap->a_uio->uio_offset;
1057 
1058 		if (uvp == un->un_uppervp) {
1059 			if (cur > un->un_uppersz)
1060 				union_newsize(ap->a_vp, cur, VNOVAL);
1061 		} else {
1062 			if (cur > un->un_lowersz)
1063 				union_newsize(ap->a_vp, VNOVAL, cur);
1064 		}
1065 	}
1066 	return (error);
1067 }
1068 
1069 static int
1070 union_write(ap)
1071 	struct vop_read_args /* {
1072 		struct vnode *a_vp;
1073 		struct uio *a_uio;
1074 		int  a_ioflag;
1075 		struct ucred *a_cred;
1076 	} */ *ap;
1077 {
1078 	struct union_node *un = VTOUNION(ap->a_vp);
1079 	struct thread *td = ap->a_uio->uio_td;
1080 	struct vnode *uppervp;
1081 	int error;
1082 
1083 	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
1084 		panic("union: missing upper layer in write");
1085 
1086 	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1087 
1088 	/*
1089 	 * The size of the underlying object may be changed by the
1090 	 * write.
1091 	 */
1092 	if (error == 0) {
1093 		off_t cur = ap->a_uio->uio_offset;
1094 
1095 		if (cur > un->un_uppersz)
1096 			union_newsize(ap->a_vp, cur, VNOVAL);
1097 	}
1098 	union_unlock_upper(uppervp, td);
1099 	return (error);
1100 }
1101 
1102 static int
1103 union_lease(ap)
1104 	struct vop_lease_args /* {
1105 		struct vnode *a_vp;
1106 		struct thread *a_td;
1107 		struct ucred *a_cred;
1108 		int a_flag;
1109 	} */ *ap;
1110 {
1111 	struct vnode *ovp = OTHERVP(ap->a_vp);
1112 
1113 	ap->a_vp = ovp;
1114 	return (VCALL(ovp, VOFFSET(vop_lease), ap));
1115 }
1116 
1117 static int
1118 union_ioctl(ap)
1119 	struct vop_ioctl_args /* {
1120 		struct vnode *a_vp;
1121 		u_long  a_command;
1122 		caddr_t  a_data;
1123 		int  a_fflag;
1124 		struct ucred *a_cred;
1125 		struct thread *a_td;
1126 	} */ *ap;
1127 {
1128 	struct vnode *ovp = OTHERVP(ap->a_vp);
1129 
1130 	ap->a_vp = ovp;
1131 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1132 }
1133 
1134 static int
1135 union_poll(ap)
1136 	struct vop_poll_args /* {
1137 		struct vnode *a_vp;
1138 		int  a_events;
1139 		struct ucred *a_cred;
1140 		struct thread *a_td;
1141 	} */ *ap;
1142 {
1143 	struct vnode *ovp = OTHERVP(ap->a_vp);
1144 
1145 	ap->a_vp = ovp;
1146 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1147 }
1148 
1149 static int
1150 union_revoke(ap)
1151 	struct vop_revoke_args /* {
1152 		struct vnode *a_vp;
1153 		int a_flags;
1154 		struct thread *a_td;
1155 	} */ *ap;
1156 {
1157 	struct vnode *vp = ap->a_vp;
1158 
1159 	if (UPPERVP(vp))
1160 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1161 	if (LOWERVP(vp))
1162 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1163 	vgone(vp);
1164 	return (0);
1165 }
1166 
1167 static int
1168 union_fsync(ap)
1169 	struct vop_fsync_args /* {
1170 		struct vnode *a_vp;
1171 		struct ucred *a_cred;
1172 		int  a_waitfor;
1173 		struct thread *a_td;
1174 	} */ *ap;
1175 {
1176 	int error = 0;
1177 	struct thread *td = ap->a_td;
1178 	struct vnode *targetvp;
1179 	struct union_node *un = VTOUNION(ap->a_vp);
1180 
1181 	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
1182 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, td);
1183 		union_unlock_other(targetvp, td);
1184 	}
1185 
1186 	return (error);
1187 }
1188 
1189 /*
1190  *	union_remove:
1191  *
1192  *	Remove the specified cnp.  The dvp and vp are passed to us locked
1193  *	and must remain locked on return.
1194  */
1195 
1196 static int
1197 union_remove(ap)
1198 	struct vop_remove_args /* {
1199 		struct vnode *a_dvp;
1200 		struct vnode *a_vp;
1201 		struct componentname *a_cnp;
1202 	} */ *ap;
1203 {
1204 	struct union_node *dun = VTOUNION(ap->a_dvp);
1205 	struct union_node *un = VTOUNION(ap->a_vp);
1206 	struct componentname *cnp = ap->a_cnp;
1207 	struct thread *td = cnp->cn_thread;
1208 	struct vnode *uppervp;
1209 	struct vnode *upperdvp;
1210 	int error;
1211 
1212 	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1213 		panic("union remove: null upper vnode");
1214 
1215 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1216 		if (union_dowhiteout(un, cnp->cn_cred, td))
1217 			cnp->cn_flags |= DOWHITEOUT;
1218 		error = VOP_REMOVE(upperdvp, uppervp, cnp);
1219 #if 0
1220 		/* XXX */
1221 		if (!error)
1222 			union_removed_upper(un);
1223 #endif
1224 		union_unlock_upper(uppervp, td);
1225 	} else {
1226 		error = union_mkwhiteout(
1227 			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1228 			    upperdvp, ap->a_cnp, un->un_path);
1229 	}
1230 	union_unlock_upper(upperdvp, td);
1231 	return (error);
1232 }
1233 
1234 /*
1235  *	union_link:
1236  *
1237  *	tdvp and vp will be locked on entry.
1238  *	tdvp and vp should remain locked on return.
1239  */
1240 
1241 static int
1242 union_link(ap)
1243 	struct vop_link_args /* {
1244 		struct vnode *a_tdvp;
1245 		struct vnode *a_vp;
1246 		struct componentname *a_cnp;
1247 	} */ *ap;
1248 {
1249 	struct componentname *cnp = ap->a_cnp;
1250 	struct thread *td = cnp->cn_thread;
1251 	struct union_node *dun = VTOUNION(ap->a_tdvp);
1252 	struct vnode *vp;
1253 	struct vnode *tdvp;
1254 	int error = 0;
1255 
1256 	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
1257 		vp = ap->a_vp;
1258 	} else {
1259 		struct union_node *tun = VTOUNION(ap->a_vp);
1260 
1261 		if (tun->un_uppervp == NULLVP) {
1262 #if 0
1263 			if (dun->un_uppervp == tun->un_dirvp) {
1264 				if (dun->un_flags & UN_ULOCK) {
1265 					dun->un_flags &= ~UN_ULOCK;
1266 					VOP_UNLOCK(dun->un_uppervp, 0, td);
1267 				}
1268 			}
1269 #endif
1270 			error = union_copyup(tun, 1, cnp->cn_cred, td);
1271 #if 0
1272 			if (dun->un_uppervp == tun->un_dirvp) {
1273 				vn_lock(dun->un_uppervp,
1274 					    LK_EXCLUSIVE | LK_RETRY, td);
1275 				dun->un_flags |= UN_ULOCK;
1276 			}
1277 #endif
1278 			if (error)
1279 				return (error);
1280 		}
1281 		vp = tun->un_uppervp;
1282 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1283 	}
1284 
1285 	/*
1286 	 * Make sure upper is locked, then unlock the union directory we were
1287 	 * called with to avoid a deadlock while we are calling VOP_LINK() on
1288 	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
1289 	 * is expected to be locked on return.
1290 	 */
1291 
1292 	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
1293 		return (EROFS);
1294 
1295 	VOP_UNLOCK(ap->a_tdvp, 0, td);		/* unlock calling node */
1296 	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
1297 
1298 	/*
1299 	 * Unlock tun->un_uppervp if we locked it above.
1300 	 */
1301 	if (ap->a_tdvp->v_op == ap->a_vp->v_op)
1302 		VOP_UNLOCK(vp, 0, td);
1303 	/*
1304 	 * We have to unlock tdvp prior to relocking our calling node in
1305 	 * order to avoid a deadlock.  We also have to unlock ap->a_vp
1306 	 * before relocking the directory, but then we have to relock
1307 	 * ap->a_vp as our caller expects.
1308 	 */
1309 	VOP_UNLOCK(ap->a_vp, 0, td);
1310 	union_unlock_upper(tdvp, td);
1311 	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td);
1312 	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
1313 	return (error);
1314 }
1315 
1316 static int
1317 union_rename(ap)
1318 	struct vop_rename_args  /* {
1319 		struct vnode *a_fdvp;
1320 		struct vnode *a_fvp;
1321 		struct componentname *a_fcnp;
1322 		struct vnode *a_tdvp;
1323 		struct vnode *a_tvp;
1324 		struct componentname *a_tcnp;
1325 	} */ *ap;
1326 {
1327 	int error;
1328 	struct vnode *fdvp = ap->a_fdvp;
1329 	struct vnode *fvp = ap->a_fvp;
1330 	struct vnode *tdvp = ap->a_tdvp;
1331 	struct vnode *tvp = ap->a_tvp;
1332 
1333 	/*
1334 	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
1335 	 * replace the fdvp, release the original one and ref the new one.
1336 	 */
1337 
1338 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1339 		struct union_node *un = VTOUNION(fdvp);
1340 		if (un->un_uppervp == NULLVP) {
1341 			/*
1342 			 * this should never happen in normal
1343 			 * operation but might if there was
1344 			 * a problem creating the top-level shadow
1345 			 * directory.
1346 			 */
1347 			error = EXDEV;
1348 			goto bad;
1349 		}
1350 		fdvp = un->un_uppervp;
1351 		VREF(fdvp);
1352 		vrele(ap->a_fdvp);
1353 	}
1354 
1355 	/*
1356 	 * Figure out what fvp to pass to our upper or lower vnode.  If we
1357 	 * replace the fvp, release the original one and ref the new one.
1358 	 */
1359 
1360 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1361 		struct union_node *un = VTOUNION(fvp);
1362 #if 0
1363 		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
1364 #endif
1365 
1366 		if (un->un_uppervp == NULLVP) {
1367 			switch(fvp->v_type) {
1368 			case VREG:
1369 				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1370 				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread);
1371 				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread);
1372 				if (error)
1373 					goto bad;
1374 				break;
1375 			case VDIR:
1376 				/*
1377 				 * XXX not yet.
1378 				 *
1379 				 * There is only one way to rename a directory
1380 				 * based in the lowervp, and that is to copy
1381 				 * the entire directory hierarchy.  Otherwise
1382 				 * it would not last across a reboot.
1383 				 */
1384 #if 0
1385 				vrele(fvp);
1386 				fvp = NULL;
1387 				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1388 				error = union_mkshadow(um, fdvp,
1389 					    ap->a_fcnp, &un->un_uppervp);
1390 				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread);
1391 				if (un->un_uppervp)
1392 					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread);
1393 				if (error)
1394 					goto bad;
1395 				break;
1396 #endif
1397 			default:
1398 				error = EXDEV;
1399 				goto bad;
1400 			}
1401 		}
1402 
1403 		if (un->un_lowervp != NULLVP)
1404 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1405 		fvp = un->un_uppervp;
1406 		VREF(fvp);
1407 		vrele(ap->a_fvp);
1408 	}
1409 
1410 	/*
1411 	 * Figure out what tdvp (destination directory) to pass to the
1412 	 * lower level.  If we replace it with uppervp, we need to vput the
1413 	 * old one.  The exclusive lock is transfered to what we will pass
1414 	 * down in the VOP_RENAME() and we replace uppervp with a simple
1415 	 * reference.
1416 	 */
1417 
1418 	if (tdvp->v_op == union_vnodeop_p) {
1419 		struct union_node *un = VTOUNION(tdvp);
1420 
1421 		if (un->un_uppervp == NULLVP) {
1422 			/*
1423 			 * This should never happen in normal
1424 			 * operation but might if there was
1425 			 * a problem creating the top-level shadow
1426 			 * directory.
1427 			 */
1428 			error = EXDEV;
1429 			goto bad;
1430 		}
1431 
1432 		/*
1433 		 * New tdvp is a lock and reference on uppervp.
1434 		 * Put away the old tdvp.
1435 		 */
1436 		tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1437 		vput(ap->a_tdvp);
1438 	}
1439 
1440 	/*
1441 	 * Figure out what tvp (destination file) to pass to the
1442 	 * lower level.
1443 	 *
1444 	 * If the uppervp file does not exist, put away the (wrong)
1445 	 * file and change tvp to NULL.
1446 	 */
1447 
1448 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1449 		struct union_node *un = VTOUNION(tvp);
1450 
1451 		tvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1452 		vput(ap->a_tvp);
1453 		/* note: tvp may be NULL */
1454 	}
1455 
1456 	/*
1457 	 * VOP_RENAME() releases/vputs prior to returning, so we have no
1458 	 * cleanup to do.
1459 	 */
1460 
1461 	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1462 
1463 	/*
1464 	 * Error.  We still have to release / vput the various elements.
1465 	 */
1466 
1467 bad:
1468 	vrele(fdvp);
1469 	if (fvp)
1470 		vrele(fvp);
1471 	vput(tdvp);
1472 	if (tvp != NULLVP) {
1473 		if (tvp != tdvp)
1474 			vput(tvp);
1475 		else
1476 			vrele(tvp);
1477 	}
1478 	return (error);
1479 }
1480 
1481 static int
1482 union_mkdir(ap)
1483 	struct vop_mkdir_args /* {
1484 		struct vnode *a_dvp;
1485 		struct vnode **a_vpp;
1486 		struct componentname *a_cnp;
1487 		struct vattr *a_vap;
1488 	} */ *ap;
1489 {
1490 	struct union_node *dun = VTOUNION(ap->a_dvp);
1491 	struct componentname *cnp = ap->a_cnp;
1492 	struct thread *td = cnp->cn_thread;
1493 	struct vnode *upperdvp;
1494 	int error = EROFS;
1495 
1496 	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
1497 		struct vnode *vp;
1498 
1499 		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
1500 		union_unlock_upper(upperdvp, td);
1501 
1502 		if (error == 0) {
1503 			VOP_UNLOCK(vp, 0, td);
1504 			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp)));
1505 			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
1506 				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
1507 			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
1508 		}
1509 	}
1510 	return (error);
1511 }
1512 
1513 static int
1514 union_rmdir(ap)
1515 	struct vop_rmdir_args /* {
1516 		struct vnode *a_dvp;
1517 		struct vnode *a_vp;
1518 		struct componentname *a_cnp;
1519 	} */ *ap;
1520 {
1521 	struct union_node *dun = VTOUNION(ap->a_dvp);
1522 	struct union_node *un = VTOUNION(ap->a_vp);
1523 	struct componentname *cnp = ap->a_cnp;
1524 	struct thread *td = cnp->cn_thread;
1525 	struct vnode *upperdvp;
1526 	struct vnode *uppervp;
1527 	int error;
1528 
1529 	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1530 		panic("union rmdir: null upper vnode");
1531 
1532 	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1533 		if (union_dowhiteout(un, cnp->cn_cred, td))
1534 			cnp->cn_flags |= DOWHITEOUT;
1535 		error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
1536 		union_unlock_upper(uppervp, td);
1537 	} else {
1538 		error = union_mkwhiteout(
1539 			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1540 			    dun->un_uppervp, ap->a_cnp, un->un_path);
1541 	}
1542 	union_unlock_upper(upperdvp, td);
1543 	return (error);
1544 }
1545 
1546 /*
1547  *	union_symlink:
1548  *
1549  *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
1550  *	(unused).
1551  */
1552 
1553 static int
1554 union_symlink(ap)
1555 	struct vop_symlink_args /* {
1556 		struct vnode *a_dvp;
1557 		struct vnode **a_vpp;
1558 		struct componentname *a_cnp;
1559 		struct vattr *a_vap;
1560 		char *a_target;
1561 	} */ *ap;
1562 {
1563 	struct union_node *dun = VTOUNION(ap->a_dvp);
1564 	struct componentname *cnp = ap->a_cnp;
1565 	struct thread *td = cnp->cn_thread;
1566 	struct vnode *dvp;
1567 	int error = EROFS;
1568 
1569 	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
1570 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1571 			    ap->a_target);
1572 		union_unlock_upper(dvp, td);
1573 	}
1574 	return (error);
1575 }
1576 
1577 /*
1578  * union_readdir ()works in concert with getdirentries() and
1579  * readdir(3) to provide a list of entries in the unioned
1580  * directories.  getdirentries()  is responsible for walking
1581  * down the union stack.  readdir(3) is responsible for
1582  * eliminating duplicate names from the returned data stream.
1583  */
1584 static int
1585 union_readdir(ap)
1586 	struct vop_readdir_args /* {
1587 		struct vnode *a_vp;
1588 		struct uio *a_uio;
1589 		struct ucred *a_cred;
1590 		int *a_eofflag;
1591 		u_long *a_cookies;
1592 		int a_ncookies;
1593 	} */ *ap;
1594 {
1595 	struct union_node *un = VTOUNION(ap->a_vp);
1596 	struct thread *td = ap->a_uio->uio_td;
1597 	struct vnode *uvp;
1598 	int error = 0;
1599 
1600 	if ((uvp = union_lock_upper(un, td)) != NULLVP) {
1601 		ap->a_vp = uvp;
1602 		error = VCALL(uvp, VOFFSET(vop_readdir), ap);
1603 		union_unlock_upper(uvp, td);
1604 	}
1605 	return(error);
1606 }
1607 
1608 static int
1609 union_readlink(ap)
1610 	struct vop_readlink_args /* {
1611 		struct vnode *a_vp;
1612 		struct uio *a_uio;
1613 		struct ucred *a_cred;
1614 	} */ *ap;
1615 {
1616 	int error;
1617 	struct union_node *un = VTOUNION(ap->a_vp);
1618 	struct uio *uio = ap->a_uio;
1619 	struct thread *td = uio->uio_td;
1620 	struct vnode *vp;
1621 
1622 	vp = union_lock_other(un, td);
1623 	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
1624 
1625 	ap->a_vp = vp;
1626 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1627 	union_unlock_other(vp, td);
1628 
1629 	return (error);
1630 }
1631 
1632 static int
1633 union_getwritemount(ap)
1634 	struct vop_getwritemount_args /* {
1635 		struct vnode *a_vp;
1636 		struct mount **a_mpp;
1637 	} */ *ap;
1638 {
1639 	struct vnode *vp = ap->a_vp;
1640 	struct vnode *uvp = UPPERVP(vp);
1641 
1642 	if (uvp == NULL) {
1643 		VI_LOCK(vp);
1644 		if (vp->v_iflag & VI_FREE) {
1645 			VI_UNLOCK(vp);
1646 			return (EOPNOTSUPP);
1647 		}
1648 		VI_UNLOCK(vp);
1649 		return (EACCES);
1650 	}
1651 	return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp));
1652 }
1653 
1654 /*
1655  *	union_inactive:
1656  *
1657  *	Called with the vnode locked.  We are expected to unlock the vnode.
1658  */
1659 
1660 static int
1661 union_inactive(ap)
1662 	struct vop_inactive_args /* {
1663 		struct vnode *a_vp;
1664 		struct thread *a_td;
1665 	} */ *ap;
1666 {
1667 	struct vnode *vp = ap->a_vp;
1668 	struct thread *td = ap->a_td;
1669 	struct union_node *un = VTOUNION(vp);
1670 
1671 	/*
1672 	 * Do nothing (and _don't_ bypass).
1673 	 * Wait to vrele lowervp until reclaim,
1674 	 * so that until then our union_node is in the
1675 	 * cache and reusable.
1676 	 *
1677 	 */
1678 
1679 	if (un->un_dircache != NULL)
1680 		union_dircache_free(un);
1681 
1682 #if 0
1683 	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
1684 		un->un_flags &= ~UN_ULOCK;
1685 		VOP_UNLOCK(un->un_uppervp, 0, td);
1686 	}
1687 #endif
1688 
1689 	VOP_UNLOCK(vp, 0, td);
1690 
1691 	if ((un->un_flags & UN_CACHED) == 0)
1692 		vgone(vp);
1693 
1694 	return (0);
1695 }
1696 
1697 static int
1698 union_reclaim(ap)
1699 	struct vop_reclaim_args /* {
1700 		struct vnode *a_vp;
1701 	} */ *ap;
1702 {
1703 	union_freevp(ap->a_vp);
1704 
1705 	return (0);
1706 }
1707 
1708 /*
1709  * unionvp do not hold a VM object and there is no need to create one for
1710  * upper or lower vp because it is done in the union_open()
1711  */
1712 static int
1713 union_createvobject(ap)
1714 	struct vop_createvobject_args /* {
1715 		struct vnode *vp;
1716 		struct ucred *cred;
1717 		struct thread *td;
1718 	} */ *ap;
1719 {
1720 	struct vnode *vp = ap->a_vp;
1721 
1722 	vp->v_vflag |= VV_OBJBUF;
1723 	return (0);
1724 }
1725 
1726 /*
1727  * We have nothing to destroy and this operation shouldn't be bypassed.
1728  */
1729 static int
1730 union_destroyvobject(ap)
1731 	struct vop_destroyvobject_args /* {
1732 		struct vnode *vp;
1733 	} */ *ap;
1734 {
1735 	struct vnode *vp = ap->a_vp;
1736 
1737 	vp->v_vflag &= ~VV_OBJBUF;
1738 	return (0);
1739 }
1740 
1741 /*
1742  * Get VM object from the upper or lower vp
1743  */
1744 static int
1745 union_getvobject(ap)
1746 	struct vop_getvobject_args /* {
1747 		struct vnode *vp;
1748 		struct vm_object **objpp;
1749 	} */ *ap;
1750 {
1751 	struct vnode *ovp = OTHERVP(ap->a_vp);
1752 
1753 	if (ovp == NULL)
1754 		return EINVAL;
1755 	return (VOP_GETVOBJECT(ovp, ap->a_objpp));
1756 }
1757 
1758 static int
1759 union_print(ap)
1760 	struct vop_print_args /* {
1761 		struct vnode *a_vp;
1762 	} */ *ap;
1763 {
1764 	struct vnode *vp = ap->a_vp;
1765 
1766 	printf("\tvp=%p, uppervp=%p, lowervp=%p\n",
1767 	       vp, UPPERVP(vp), LOWERVP(vp));
1768 	if (UPPERVP(vp) != NULLVP)
1769 		vprint("union: upper", UPPERVP(vp));
1770 	if (LOWERVP(vp) != NULLVP)
1771 		vprint("union: lower", LOWERVP(vp));
1772 
1773 	return (0);
1774 }
1775 
1776 static int
1777 union_pathconf(ap)
1778 	struct vop_pathconf_args /* {
1779 		struct vnode *a_vp;
1780 		int a_name;
1781 		int *a_retval;
1782 	} */ *ap;
1783 {
1784 	int error;
1785 	struct thread *td = curthread;		/* XXX */
1786 	struct union_node *un = VTOUNION(ap->a_vp);
1787 	struct vnode *vp;
1788 
1789 	vp = union_lock_other(un, td);
1790 	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
1791 
1792 	ap->a_vp = vp;
1793 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1794 	union_unlock_other(vp, td);
1795 
1796 	return (error);
1797 }
1798 
1799 static int
1800 union_advlock(ap)
1801 	struct vop_advlock_args /* {
1802 		struct vnode *a_vp;
1803 		caddr_t  a_id;
1804 		int  a_op;
1805 		struct flock *a_fl;
1806 		int  a_flags;
1807 	} */ *ap;
1808 {
1809 	register struct vnode *ovp = OTHERVP(ap->a_vp);
1810 
1811 	ap->a_vp = ovp;
1812 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1813 }
1814 
1815 
1816 /*
1817  * XXX - vop_strategy must be hand coded because it has no
1818  * YYY - and it is not coherent with anything
1819  *
1820  * vnode in its arguments.
1821  * This goes away with a merged VM/buffer cache.
1822  */
1823 static int
1824 union_strategy(ap)
1825 	struct vop_strategy_args /* {
1826 		struct vnode *a_vp;
1827 		struct buf *a_bp;
1828 	} */ *ap;
1829 {
1830 	struct buf *bp = ap->a_bp;
1831 	struct vnode *othervp = OTHERVP(bp->b_vp);
1832 
1833 	KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
1834 	    __func__, ap->a_vp, ap->a_bp->b_vp));
1835 #ifdef DIAGNOSTIC
1836 	if (othervp == NULLVP)
1837 		panic("union_strategy: nil vp");
1838 	if ((bp->b_iocmd == BIO_WRITE) &&
1839 	    (othervp == LOWERVP(bp->b_vp)))
1840 		panic("union_strategy: writing to lowervp");
1841 #endif
1842 	return (VOP_STRATEGY(othervp, bp));
1843 }
1844 
1845 /*
1846  * Global vfs data structures
1847  */
1848 vop_t **union_vnodeop_p;
1849 static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
1850 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
1851 	{ &vop_access_desc,		(vop_t *) union_access },
1852 	{ &vop_advlock_desc,		(vop_t *) union_advlock },
1853 	{ &vop_bmap_desc,		(vop_t *) vop_eopnotsupp },
1854 	{ &vop_close_desc,		(vop_t *) union_close },
1855 	{ &vop_create_desc,		(vop_t *) union_create },
1856 	{ &vop_createvobject_desc,	(vop_t *) union_createvobject },
1857 	{ &vop_destroyvobject_desc,	(vop_t *) union_destroyvobject },
1858 	{ &vop_fsync_desc,		(vop_t *) union_fsync },
1859 	{ &vop_getattr_desc,		(vop_t *) union_getattr },
1860 	{ &vop_getvobject_desc,		(vop_t *) union_getvobject },
1861 	{ &vop_inactive_desc,		(vop_t *) union_inactive },
1862 	{ &vop_ioctl_desc,		(vop_t *) union_ioctl },
1863 	{ &vop_lease_desc,		(vop_t *) union_lease },
1864 	{ &vop_link_desc,		(vop_t *) union_link },
1865 	{ &vop_lookup_desc,		(vop_t *) union_lookup },
1866 	{ &vop_mkdir_desc,		(vop_t *) union_mkdir },
1867 	{ &vop_mknod_desc,		(vop_t *) union_mknod },
1868 	{ &vop_open_desc,		(vop_t *) union_open },
1869 	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
1870 	{ &vop_poll_desc,		(vop_t *) union_poll },
1871 	{ &vop_print_desc,		(vop_t *) union_print },
1872 	{ &vop_read_desc,		(vop_t *) union_read },
1873 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
1874 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
1875 	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
1876 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
1877 	{ &vop_remove_desc,		(vop_t *) union_remove },
1878 	{ &vop_rename_desc,		(vop_t *) union_rename },
1879 	{ &vop_revoke_desc,		(vop_t *) union_revoke },
1880 	{ &vop_rmdir_desc,		(vop_t *) union_rmdir },
1881 	{ &vop_setattr_desc,		(vop_t *) union_setattr },
1882 	{ &vop_strategy_desc,		(vop_t *) union_strategy },
1883 	{ &vop_symlink_desc,		(vop_t *) union_symlink },
1884 	{ &vop_whiteout_desc,		(vop_t *) union_whiteout },
1885 	{ &vop_write_desc,		(vop_t *) union_write },
1886 	{ NULL, NULL }
1887 };
1888 static struct vnodeopv_desc union_vnodeop_opv_desc =
1889 	{ &union_vnodeop_p, union_vnodeop_entries };
1890 
1891 VNODEOP_SET(union_vnodeop_opv_desc);
1892