xref: /freebsd/sys/fs/unionfs/union_subr.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #ifdef MAC
56 #include <sys/mac.h>
57 #endif
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
64 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
65 
66 /*
67  * Initialize
68  */
69 int
70 unionfs_init(struct vfsconf *vfsp)
71 {
72 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
73 	return (0);
74 }
75 
76 /*
77  * Uninitialize
78  */
79 int
80 unionfs_uninit(struct vfsconf *vfsp)
81 {
82 	return (0);
83 }
84 
85 /*
86  * Make a new or get existing unionfs node.
87  *
88  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
89  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
90  * you should not lock plurality simultaneously.
91  */
92 int
93 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
94 		struct vnode *lowervp, struct vnode *dvp,
95 		struct vnode **vpp, struct componentname *cnp,
96 		struct thread *td)
97 {
98 	struct unionfs_mount *ump;
99 	struct unionfs_node *unp;
100 	struct vnode   *vp;
101 	int		error;
102 	int		lkflags;
103 	char	       *path;
104 
105 	ump = MOUNTTOUNIONFSMOUNT(mp);
106 	lkflags = (cnp ? cnp->cn_lkflags : 0);
107 	path = (cnp ? cnp->cn_nameptr : NULL);
108 
109 	if (uppervp == NULLVP && lowervp == NULLVP)
110 		panic("unionfs_nodeget: upper and lower is null");
111 
112 	/* If it has no ISLASTCN flag, path check is skipped. */
113 	if (cnp && !(cnp->cn_flags & ISLASTCN))
114 		path = NULL;
115 
116 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
117 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
118 		if (dvp == NULLVP)
119 			return (EINVAL);
120 	}
121 
122 	/*
123 	 * Do the MALLOC before the getnewvnode since doing so afterward
124 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
125 	 * if MALLOC should block.
126 	 */
127 	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
128 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
129 
130 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
131 	if (error != 0) {
132 		FREE(unp, M_UNIONFSNODE);
133 		return (error);
134 	}
135 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
136 	if (error != 0) {
137 		FREE(unp, M_UNIONFSNODE);
138 		return (error);
139 	}
140 	if (dvp != NULLVP)
141 		vref(dvp);
142 	if (uppervp != NULLVP)
143 		vref(uppervp);
144 	if (lowervp != NULLVP)
145 		vref(lowervp);
146 
147 	unp->un_vnode = vp;
148 	unp->un_uppervp = uppervp;
149 	unp->un_lowervp = lowervp;
150 	unp->un_dvp = dvp;
151 	if (uppervp != NULLVP)
152 		vp->v_vnlock = uppervp->v_vnlock;
153 	else
154 		vp->v_vnlock = lowervp->v_vnlock;
155 
156 	if (path != NULL) {
157 		unp->un_path = (char *)
158 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
159 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
160 		unp->un_path[cnp->cn_namelen] = '\0';
161 	}
162 	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
163 	vp->v_data = unp;
164 
165 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
166 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
167 		vp->v_vflag |= VV_ROOT;
168 
169 	if (lkflags & LK_TYPE_MASK)
170 		vn_lock(vp, lkflags | LK_RETRY);
171 
172 	*vpp = vp;
173 
174 	return (0);
175 }
176 
177 /*
178  * Clean up the unionfs node.
179  */
180 void
181 unionfs_noderem(struct vnode *vp, struct thread *td)
182 {
183 	int		vfslocked;
184 	struct unionfs_node *unp;
185 	struct unionfs_node_status *unsp, *unsp_tmp;
186 	struct vnode   *lvp;
187 	struct vnode   *uvp;
188 
189 	/*
190 	 * Use the interlock to protect the clearing of v_data to
191 	 * prevent faults in unionfs_lock().
192 	 */
193 	VI_LOCK(vp);
194 	unp = VTOUNIONFS(vp);
195 	lvp = unp->un_lowervp;
196 	uvp = unp->un_uppervp;
197 	unp->un_lowervp = unp->un_uppervp = NULLVP;
198 
199 	vp->v_vnlock = &(vp->v_lock);
200 	vp->v_data = NULL;
201 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
202 	if (lvp != NULLVP)
203 		VOP_UNLOCK(lvp, 0);
204 	if (uvp != NULLVP)
205 		VOP_UNLOCK(uvp, 0);
206 	vp->v_object = NULL;
207 
208 	if (lvp != NULLVP) {
209 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
210 		vrele(lvp);
211 		VFS_UNLOCK_GIANT(vfslocked);
212 	}
213 	if (uvp != NULLVP) {
214 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
215 		vrele(uvp);
216 		VFS_UNLOCK_GIANT(vfslocked);
217 	}
218 	if (unp->un_dvp != NULLVP) {
219 		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
220 		vrele(unp->un_dvp);
221 		VFS_UNLOCK_GIANT(vfslocked);
222 		unp->un_dvp = NULLVP;
223 	}
224 	if (unp->un_path) {
225 		free(unp->un_path, M_UNIONFSPATH);
226 		unp->un_path = NULL;
227 	}
228 
229 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
230 		LIST_REMOVE(unsp, uns_list);
231 		free(unsp, M_TEMP);
232 	}
233 	FREE(unp, M_UNIONFSNODE);
234 }
235 
236 /*
237  * Get the unionfs node status.
238  * You need exclusive lock this vnode.
239  */
240 void
241 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
242 			struct unionfs_node_status **unspp)
243 {
244 	struct unionfs_node_status *unsp;
245 
246 	KASSERT(NULL != unspp, ("null pointer"));
247 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
248 
249 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
250 		if (unsp->uns_tid == td->td_tid) {
251 			*unspp = unsp;
252 			return;
253 		}
254 	}
255 
256 	/* create a new unionfs node status */
257 	MALLOC(unsp, struct unionfs_node_status *,
258 	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
259 
260 	unsp->uns_tid = td->td_tid;
261 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
262 
263 	*unspp = unsp;
264 }
265 
266 /*
267  * Remove the unionfs node status, if you can.
268  * You need exclusive lock this vnode.
269  */
270 void
271 unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
272 			   struct unionfs_node_status *unsp)
273 {
274 	KASSERT(NULL != unsp, ("null pointer"));
275 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
276 
277 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
278 		return;
279 
280 	LIST_REMOVE(unsp, uns_list);
281 	free(unsp, M_TEMP);
282 }
283 
284 /*
285  * Create upper node attr.
286  */
287 void
288 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
289 			       struct vattr *lva,
290 			       struct vattr *uva,
291 			       struct thread *td)
292 {
293 	VATTR_NULL(uva);
294 	uva->va_type = lva->va_type;
295 	uva->va_atime = lva->va_atime;
296 	uva->va_mtime = lva->va_mtime;
297 	uva->va_ctime = lva->va_ctime;
298 
299 	switch (ump->um_copymode) {
300 	case UNIONFS_TRANSPARENT:
301 		uva->va_mode = lva->va_mode;
302 		uva->va_uid = lva->va_uid;
303 		uva->va_gid = lva->va_gid;
304 		break;
305 	case UNIONFS_MASQUERADE:
306 		if (ump->um_uid == lva->va_uid) {
307 			uva->va_mode = lva->va_mode & 077077;
308 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
309 			uva->va_uid = lva->va_uid;
310 			uva->va_gid = lva->va_gid;
311 		} else {
312 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
313 			uva->va_uid = ump->um_uid;
314 			uva->va_gid = ump->um_gid;
315 		}
316 		break;
317 	default:		/* UNIONFS_TRADITIONAL */
318 		FILEDESC_SLOCK(td->td_proc->p_fd);
319 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
320 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
321 		uva->va_uid = ump->um_uid;
322 		uva->va_gid = ump->um_gid;
323 		break;
324 	}
325 }
326 
327 /*
328  * Create upper node attr.
329  */
330 int
331 unionfs_create_uppervattr(struct unionfs_mount *ump,
332 			  struct vnode *lvp,
333 			  struct vattr *uva,
334 			  struct ucred *cred,
335 			  struct thread *td)
336 {
337 	int		error;
338 	struct vattr	lva;
339 
340 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
341 		return (error);
342 
343 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
344 
345 	return (error);
346 }
347 
348 /*
349  * relookup
350  *
351  * dvp should be locked on entry and will be locked on return.
352  *
353  * If an error is returned, *vpp will be invalid, otherwise it will hold a
354  * locked, referenced vnode. If *vpp == dvp then remember that only one
355  * LK_EXCLUSIVE lock is held.
356  */
357 static int
358 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
359 		 struct componentname *cnp, struct componentname *cn,
360 		 struct thread *td, char *path, int pathlen, u_long nameiop)
361 {
362 	int	error;
363 
364 	cn->cn_namelen = pathlen;
365 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
366 	bcopy(path, cn->cn_pnbuf, pathlen);
367 	cn->cn_pnbuf[pathlen] = '\0';
368 
369 	cn->cn_nameiop = nameiop;
370 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
371 	cn->cn_lkflags = LK_EXCLUSIVE;
372 	cn->cn_thread = td;
373 	cn->cn_cred = cnp->cn_cred;
374 
375 	cn->cn_nameptr = cn->cn_pnbuf;
376 	cn->cn_consume = cnp->cn_consume;
377 
378 	if (nameiop == DELETE)
379 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
380 	else if (RENAME == nameiop)
381 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
382 
383 	vref(dvp);
384 	VOP_UNLOCK(dvp, 0);
385 
386 	if ((error = relookup(dvp, vpp, cn))) {
387 		uma_zfree(namei_zone, cn->cn_pnbuf);
388 		cn->cn_flags &= ~HASBUF;
389 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
390 	} else
391 		vrele(dvp);
392 
393 	return (error);
394 }
395 
396 /*
397  * relookup for CREATE namei operation.
398  *
399  * dvp is unionfs vnode. dvp should be locked.
400  *
401  * If it called 'unionfs_copyfile' function by unionfs_link etc,
402  * VOP_LOOKUP information is broken.
403  * So it need relookup in order to create link etc.
404  */
405 int
406 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
407 			    struct thread *td)
408 {
409 	int	error;
410 	struct vnode *udvp;
411 	struct vnode *vp;
412 	struct componentname cn;
413 
414 	udvp = UNIONFSVPTOUPPERVP(dvp);
415 	vp = NULLVP;
416 
417 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
418 	    strlen(cnp->cn_nameptr), CREATE);
419 	if (error)
420 		return (error);
421 
422 	if (vp != NULLVP) {
423 		if (udvp == vp)
424 			vrele(vp);
425 		else
426 			vput(vp);
427 
428 		error = EEXIST;
429 	}
430 
431 	if (cn.cn_flags & HASBUF) {
432 		uma_zfree(namei_zone, cn.cn_pnbuf);
433 		cn.cn_flags &= ~HASBUF;
434 	}
435 
436 	if (!error) {
437 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
438 		cnp->cn_flags = cn.cn_flags;
439 	}
440 
441 	return (error);
442 }
443 
444 /*
445  * relookup for DELETE namei operation.
446  *
447  * dvp is unionfs vnode. dvp should be locked.
448  */
449 int
450 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
451 			    struct thread *td)
452 {
453 	int	error;
454 	struct vnode *udvp;
455 	struct vnode *vp;
456 	struct componentname cn;
457 
458 	udvp = UNIONFSVPTOUPPERVP(dvp);
459 	vp = NULLVP;
460 
461 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
462 	    strlen(cnp->cn_nameptr), DELETE);
463 	if (error)
464 		return (error);
465 
466 	if (vp == NULLVP)
467 		error = ENOENT;
468 	else {
469 		if (udvp == vp)
470 			vrele(vp);
471 		else
472 			vput(vp);
473 	}
474 
475 	if (cn.cn_flags & HASBUF) {
476 		uma_zfree(namei_zone, cn.cn_pnbuf);
477 		cn.cn_flags &= ~HASBUF;
478 	}
479 
480 	if (!error) {
481 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
482 		cnp->cn_flags = cn.cn_flags;
483 	}
484 
485 	return (error);
486 }
487 
488 /*
489  * relookup for RENAME namei operation.
490  *
491  * dvp is unionfs vnode. dvp should be locked.
492  */
493 int
494 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
495 			    struct thread *td)
496 {
497 	int error;
498 	struct vnode *udvp;
499 	struct vnode *vp;
500 	struct componentname cn;
501 
502 	udvp = UNIONFSVPTOUPPERVP(dvp);
503 	vp = NULLVP;
504 
505 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
506 	    strlen(cnp->cn_nameptr), RENAME);
507 	if (error)
508 		return (error);
509 
510 	if (vp != NULLVP) {
511 		if (udvp == vp)
512 			vrele(vp);
513 		else
514 			vput(vp);
515 	}
516 
517 	if (cn.cn_flags & HASBUF) {
518 		uma_zfree(namei_zone, cn.cn_pnbuf);
519 		cn.cn_flags &= ~HASBUF;
520 	}
521 
522 	if (!error) {
523 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
524 		cnp->cn_flags = cn.cn_flags;
525 	}
526 
527 	return (error);
528 
529 }
530 
531 /*
532  * Update the unionfs_node.
533  *
534  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
535  * uvp's lock and lower's lock will be unlocked.
536  */
537 static void
538 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
539 		    struct thread *td)
540 {
541 	int		count, lockcnt;
542 	struct vnode   *vp;
543 	struct vnode   *lvp;
544 
545 	vp = UNIONFSTOV(unp);
546 	lvp = unp->un_lowervp;
547 
548 	/*
549 	 * lock update
550 	 */
551 	VI_LOCK(vp);
552 	unp->un_uppervp = uvp;
553 	vp->v_vnlock = uvp->v_vnlock;
554 	lockcnt = lvp->v_vnlock->lk_exclusivecount;
555 	if (lockcnt <= 0)
556 		panic("unionfs: no exclusive lock");
557 	VI_UNLOCK(vp);
558 	for (count = 1; count < lockcnt; count++)
559 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
560 }
561 
562 /*
563  * Create a new shadow dir.
564  *
565  * udvp should be locked on entry and will be locked on return.
566  *
567  * If no error returned, unp will be updated.
568  */
569 int
570 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
571 		    struct unionfs_node *unp, struct componentname *cnp,
572 		    struct thread *td)
573 {
574 	int		error;
575 	struct vnode   *lvp;
576 	struct vnode   *uvp;
577 	struct vattr	va;
578 	struct vattr	lva;
579 	struct componentname cn;
580 	struct mount   *mp;
581 	struct ucred   *cred;
582 	struct ucred   *credbk;
583 	struct uidinfo *rootinfo;
584 
585 	if (unp->un_uppervp != NULLVP)
586 		return (EEXIST);
587 
588 	lvp = unp->un_lowervp;
589 	uvp = NULLVP;
590 	credbk = cnp->cn_cred;
591 
592 	/* Authority change to root */
593 	rootinfo = uifind((uid_t)0);
594 	cred = crdup(cnp->cn_cred);
595 	chgproccnt(cred->cr_ruidinfo, 1, 0);
596 	change_euid(cred, rootinfo);
597 	change_ruid(cred, rootinfo);
598 	change_svuid(cred, (uid_t)0);
599 	uifree(rootinfo);
600 	cnp->cn_cred = cred;
601 
602 	memset(&cn, 0, sizeof(cn));
603 
604 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
605 		goto unionfs_mkshadowdir_abort;
606 
607 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
608 		goto unionfs_mkshadowdir_abort;
609 	if (uvp != NULLVP) {
610 		if (udvp == uvp)
611 			vrele(uvp);
612 		else
613 			vput(uvp);
614 
615 		error = EEXIST;
616 		goto unionfs_mkshadowdir_free_out;
617 	}
618 
619 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
620 		goto unionfs_mkshadowdir_free_out;
621 	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
622 		vn_finished_write(mp);
623 		goto unionfs_mkshadowdir_free_out;
624 	}
625 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
626 
627 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
628 
629 	if (!error) {
630 		unionfs_node_update(unp, uvp, td);
631 
632 		/*
633 		 * XXX The bug which cannot set uid/gid was corrected.
634 		 * Ignore errors.
635 		 */
636 		va.va_type = VNON;
637 		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
638 	}
639 	vn_finished_write(mp);
640 
641 unionfs_mkshadowdir_free_out:
642 	if (cn.cn_flags & HASBUF) {
643 		uma_zfree(namei_zone, cn.cn_pnbuf);
644 		cn.cn_flags &= ~HASBUF;
645 	}
646 
647 unionfs_mkshadowdir_abort:
648 	cnp->cn_cred = credbk;
649 	chgproccnt(cred->cr_ruidinfo, -1, 0);
650 	crfree(cred);
651 
652 	return (error);
653 }
654 
655 /*
656  * Create a new whiteout.
657  *
658  * dvp should be locked on entry and will be locked on return.
659  */
660 int
661 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
662 		   struct thread *td, char *path)
663 {
664 	int		error;
665 	struct vnode   *wvp;
666 	struct componentname cn;
667 	struct mount   *mp;
668 
669 	if (path == NULL)
670 		path = cnp->cn_nameptr;
671 
672 	wvp = NULLVP;
673 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
674 		return (error);
675 	if (wvp != NULLVP) {
676 		if (cn.cn_flags & HASBUF) {
677 			uma_zfree(namei_zone, cn.cn_pnbuf);
678 			cn.cn_flags &= ~HASBUF;
679 		}
680 		if (dvp == wvp)
681 			vrele(wvp);
682 		else
683 			vput(wvp);
684 
685 		return (EEXIST);
686 	}
687 
688 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
689 		goto unionfs_mkwhiteout_free_out;
690 	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
691 		error = VOP_WHITEOUT(dvp, &cn, CREATE);
692 
693 	vn_finished_write(mp);
694 
695 unionfs_mkwhiteout_free_out:
696 	if (cn.cn_flags & HASBUF) {
697 		uma_zfree(namei_zone, cn.cn_pnbuf);
698 		cn.cn_flags &= ~HASBUF;
699 	}
700 
701 	return (error);
702 }
703 
704 /*
705  * Create a new vnode for create a new shadow file.
706  *
707  * If an error is returned, *vpp will be invalid, otherwise it will hold a
708  * locked, referenced and opened vnode.
709  *
710  * unp is never updated.
711  */
712 static int
713 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
714 			   struct unionfs_node *unp, struct vattr *uvap,
715 			   struct thread *td)
716 {
717 	struct unionfs_mount *ump;
718 	struct vnode   *vp;
719 	struct vnode   *lvp;
720 	struct ucred   *cred;
721 	struct vattr	lva;
722 	int		fmode;
723 	int		error;
724 	struct componentname cn;
725 
726 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
727 	vp = NULLVP;
728 	lvp = unp->un_lowervp;
729 	cred = td->td_ucred;
730 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
731 	error = 0;
732 
733 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
734 		return (error);
735 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
736 
737 	if (unp->un_path == NULL)
738 		panic("unionfs: un_path is null");
739 
740 	cn.cn_namelen = strlen(unp->un_path);
741 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
742 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
743 	cn.cn_nameiop = CREATE;
744 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
745 	cn.cn_lkflags = LK_EXCLUSIVE;
746 	cn.cn_thread = td;
747 	cn.cn_cred = cred;
748 	cn.cn_nameptr = cn.cn_pnbuf;
749 	cn.cn_consume = 0;
750 
751 	vref(udvp);
752 	if ((error = relookup(udvp, &vp, &cn)) != 0)
753 		goto unionfs_vn_create_on_upper_free_out2;
754 	vrele(udvp);
755 
756 	if (vp != NULLVP) {
757 		if (vp == udvp)
758 			vrele(vp);
759 		else
760 			vput(vp);
761 		error = EEXIST;
762 		goto unionfs_vn_create_on_upper_free_out1;
763 	}
764 
765 	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
766 		goto unionfs_vn_create_on_upper_free_out1;
767 
768 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
769 		goto unionfs_vn_create_on_upper_free_out1;
770 
771 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
772 		vput(vp);
773 		goto unionfs_vn_create_on_upper_free_out1;
774 	}
775 	vp->v_writecount++;
776 	*vpp = vp;
777 
778 unionfs_vn_create_on_upper_free_out1:
779 	VOP_UNLOCK(udvp, 0);
780 
781 unionfs_vn_create_on_upper_free_out2:
782 	if (cn.cn_flags & HASBUF) {
783 		uma_zfree(namei_zone, cn.cn_pnbuf);
784 		cn.cn_flags &= ~HASBUF;
785 	}
786 
787 	return (error);
788 }
789 
790 /*
791  * Copy from lvp to uvp.
792  *
793  * lvp and uvp should be locked and opened on entry and will be locked and
794  * opened on return.
795  */
796 static int
797 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
798 		      struct ucred *cred, struct thread *td)
799 {
800 	int		error;
801 	off_t		offset;
802 	int		count;
803 	int		bufoffset;
804 	char           *buf;
805 	struct uio	uio;
806 	struct iovec	iov;
807 
808 	error = 0;
809 	memset(&uio, 0, sizeof(uio));
810 
811 	uio.uio_td = td;
812 	uio.uio_segflg = UIO_SYSSPACE;
813 	uio.uio_offset = 0;
814 
815 	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
816 		return (error);
817 	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
818 		return (error);
819 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
820 
821 	while (error == 0) {
822 		offset = uio.uio_offset;
823 
824 		uio.uio_iov = &iov;
825 		uio.uio_iovcnt = 1;
826 		iov.iov_base = buf;
827 		iov.iov_len = MAXBSIZE;
828 		uio.uio_resid = iov.iov_len;
829 		uio.uio_rw = UIO_READ;
830 
831 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
832 			break;
833 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
834 			break;
835 
836 		bufoffset = 0;
837 		while (bufoffset < count) {
838 			uio.uio_iov = &iov;
839 			uio.uio_iovcnt = 1;
840 			iov.iov_base = buf + bufoffset;
841 			iov.iov_len = count - bufoffset;
842 			uio.uio_offset = offset + bufoffset;
843 			uio.uio_resid = iov.iov_len;
844 			uio.uio_rw = UIO_WRITE;
845 
846 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
847 				break;
848 
849 			bufoffset += (count - bufoffset) - uio.uio_resid;
850 		}
851 
852 		uio.uio_offset = offset + bufoffset;
853 	}
854 
855 	free(buf, M_TEMP);
856 
857 	return (error);
858 }
859 
860 /*
861  * Copy file from lower to upper.
862  *
863  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
864  * docopy.
865  *
866  * If no error returned, unp will be updated.
867  */
868 int
869 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
870 		 struct thread *td)
871 {
872 	int		error;
873 	struct mount   *mp;
874 	struct vnode   *udvp;
875 	struct vnode   *lvp;
876 	struct vnode   *uvp;
877 	struct vattr	uva;
878 
879 	lvp = unp->un_lowervp;
880 	uvp = NULLVP;
881 
882 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
883 		return (EROFS);
884 	if (unp->un_dvp == NULLVP)
885 		return (EINVAL);
886 	if (unp->un_uppervp != NULLVP)
887 		return (EEXIST);
888 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
889 	if (udvp == NULLVP)
890 		return (EROFS);
891 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
892 		return (EROFS);
893 
894 	error = VOP_ACCESS(lvp, VREAD, cred, td);
895 	if (error != 0)
896 		return (error);
897 
898 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
899 		return (error);
900 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
901 	if (error != 0) {
902 		vn_finished_write(mp);
903 		return (error);
904 	}
905 
906 	if (docopy != 0) {
907 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
908 		if (error == 0) {
909 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
910 			VOP_CLOSE(lvp, FREAD, cred, td);
911 		}
912 	}
913 	VOP_CLOSE(uvp, FWRITE, cred, td);
914 	uvp->v_writecount--;
915 
916 	vn_finished_write(mp);
917 
918 	if (error == 0) {
919 		/* Reset the attributes. Ignore errors. */
920 		uva.va_type = VNON;
921 		VOP_SETATTR(uvp, &uva, cred, td);
922 	}
923 
924 	unionfs_node_update(unp, uvp, td);
925 
926 	return (error);
927 }
928 
929 /*
930  * It checks whether vp can rmdir. (check empty)
931  *
932  * vp is unionfs vnode.
933  * vp should be locked.
934  */
935 int
936 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
937 {
938 	int		error;
939 	int		eofflag;
940 	int		lookuperr;
941 	struct vnode   *uvp;
942 	struct vnode   *lvp;
943 	struct vnode   *tvp;
944 	struct vattr	va;
945 	struct componentname cn;
946 	/*
947 	 * The size of buf needs to be larger than DIRBLKSIZ.
948 	 */
949 	char		buf[256 * 6];
950 	struct dirent  *dp;
951 	struct dirent  *edp;
952 	struct uio	uio;
953 	struct iovec	iov;
954 
955 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
956 
957 	eofflag = 0;
958 	uvp = UNIONFSVPTOUPPERVP(vp);
959 	lvp = UNIONFSVPTOLOWERVP(vp);
960 
961 	/* check opaque */
962 	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
963 		return (error);
964 	if (va.va_flags & OPAQUE)
965 		return (0);
966 
967 	/* open vnode */
968 #ifdef MAC
969 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
970 		return (error);
971 #endif
972 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
973 		return (error);
974 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
975 		return (error);
976 
977 	uio.uio_rw = UIO_READ;
978 	uio.uio_segflg = UIO_SYSSPACE;
979 	uio.uio_td = td;
980 	uio.uio_offset = 0;
981 
982 #ifdef MAC
983 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
984 #endif
985 	while (!error && !eofflag) {
986 		iov.iov_base = buf;
987 		iov.iov_len = sizeof(buf);
988 		uio.uio_iov = &iov;
989 		uio.uio_iovcnt = 1;
990 		uio.uio_resid = iov.iov_len;
991 
992 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
993 		if (error)
994 			break;
995 
996 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
997 		for (dp = (struct dirent*)buf; !error && dp < edp;
998 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
999 			if (dp->d_type == DT_WHT ||
1000 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1001 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1002 				continue;
1003 
1004 			cn.cn_namelen = dp->d_namlen;
1005 			cn.cn_pnbuf = NULL;
1006 			cn.cn_nameptr = dp->d_name;
1007 			cn.cn_nameiop = LOOKUP;
1008 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1009 			cn.cn_lkflags = LK_EXCLUSIVE;
1010 			cn.cn_thread = td;
1011 			cn.cn_cred = cred;
1012 			cn.cn_consume = 0;
1013 
1014 			/*
1015 			 * check entry in lower.
1016 			 * Sometimes, readdir function returns
1017 			 * wrong entry.
1018 			 */
1019 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1020 
1021 			if (!lookuperr)
1022 				vput(tvp);
1023 			else
1024 				continue; /* skip entry */
1025 
1026 			/*
1027 			 * check entry
1028 			 * If it has no exist/whiteout entry in upper,
1029 			 * directory is not empty.
1030 			 */
1031 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1032 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1033 
1034 			if (!lookuperr)
1035 				vput(tvp);
1036 
1037 			/* ignore exist or whiteout entry */
1038 			if (!lookuperr ||
1039 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1040 				continue;
1041 
1042 			error = ENOTEMPTY;
1043 		}
1044 	}
1045 
1046 	/* close vnode */
1047 	VOP_CLOSE(vp, FREAD, cred, td);
1048 
1049 	return (error);
1050 }
1051 
1052 #ifdef DIAGNOSTIC
1053 
1054 struct vnode   *
1055 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1056 {
1057 	struct unionfs_node *unp;
1058 
1059 	unp = VTOUNIONFS(vp);
1060 
1061 #ifdef notyet
1062 	if (vp->v_op != unionfs_vnodeop_p) {
1063 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1064 #ifdef KDB
1065 		kdb_enter(KDB_WHY_UNIONFS,
1066 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1067 #endif
1068 		panic("unionfs_checkuppervp");
1069 	};
1070 #endif
1071 	return (unp->un_uppervp);
1072 }
1073 
1074 struct vnode   *
1075 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1076 {
1077 	struct unionfs_node *unp;
1078 
1079 	unp = VTOUNIONFS(vp);
1080 
1081 #ifdef notyet
1082 	if (vp->v_op != unionfs_vnodeop_p) {
1083 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1084 #ifdef KDB
1085 		kdb_enter(KDB_WHY_UNIONFS,
1086 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1087 #endif
1088 		panic("unionfs_checklowervp");
1089 	};
1090 #endif
1091 	return (unp->un_lowervp);
1092 }
1093 #endif
1094