xref: /freebsd/sys/fs/unionfs/union_subr.c (revision db612abe8df3355d1eb23bb3b50fdd97bc21e979)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #ifdef MAC
56 #include <sys/mac.h>
57 #endif
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
64 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
65 
66 /*
67  * Initialize
68  */
69 int
70 unionfs_init(struct vfsconf *vfsp)
71 {
72 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
73 	return (0);
74 }
75 
76 /*
77  * Uninitialize
78  */
79 int
80 unionfs_uninit(struct vfsconf *vfsp)
81 {
82 	return (0);
83 }
84 
85 /*
86  * Make a new or get existing unionfs node.
87  *
88  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
89  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
90  * you should not lock plurality simultaneously.
91  */
92 int
93 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
94 		struct vnode *lowervp, struct vnode *dvp,
95 		struct vnode **vpp, struct componentname *cnp,
96 		struct thread *td)
97 {
98 	struct unionfs_mount *ump;
99 	struct unionfs_node *unp;
100 	struct vnode   *vp;
101 	int		error;
102 	int		lkflags;
103 	char	       *path;
104 
105 	ump = MOUNTTOUNIONFSMOUNT(mp);
106 	lkflags = (cnp ? cnp->cn_lkflags : 0);
107 	path = (cnp ? cnp->cn_nameptr : NULL);
108 
109 	if (uppervp == NULLVP && lowervp == NULLVP)
110 		panic("unionfs_nodeget: upper and lower is null");
111 
112 	/* If it has no ISLASTCN flag, path check is skipped. */
113 	if (cnp && !(cnp->cn_flags & ISLASTCN))
114 		path = NULL;
115 
116 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
117 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
118 		if (dvp == NULLVP)
119 			return (EINVAL);
120 	}
121 
122 	/*
123 	 * Do the MALLOC before the getnewvnode since doing so afterward
124 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
125 	 * if MALLOC should block.
126 	 */
127 	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
128 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
129 
130 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
131 	if (error != 0) {
132 		FREE(unp, M_UNIONFSNODE);
133 		return (error);
134 	}
135 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
136 	if (error != 0) {
137 		FREE(unp, M_UNIONFSNODE);
138 		return (error);
139 	}
140 	if (dvp != NULLVP)
141 		vref(dvp);
142 	if (uppervp != NULLVP)
143 		vref(uppervp);
144 	if (lowervp != NULLVP)
145 		vref(lowervp);
146 
147 	unp->un_vnode = vp;
148 	unp->un_uppervp = uppervp;
149 	unp->un_lowervp = lowervp;
150 	unp->un_dvp = dvp;
151 	if (uppervp != NULLVP)
152 		vp->v_vnlock = uppervp->v_vnlock;
153 	else
154 		vp->v_vnlock = lowervp->v_vnlock;
155 
156 	if (path != NULL) {
157 		unp->un_path = (char *)
158 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
159 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
160 		unp->un_path[cnp->cn_namelen] = '\0';
161 	}
162 	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
163 	vp->v_data = unp;
164 
165 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
166 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
167 		vp->v_vflag |= VV_ROOT;
168 
169 	if (lkflags & LK_TYPE_MASK)
170 		vn_lock(vp, lkflags | LK_RETRY);
171 
172 	*vpp = vp;
173 
174 	return (0);
175 }
176 
177 /*
178  * Clean up the unionfs node.
179  */
180 void
181 unionfs_noderem(struct vnode *vp, struct thread *td)
182 {
183 	int		vfslocked;
184 	struct unionfs_node *unp;
185 	struct unionfs_node_status *unsp, *unsp_tmp;
186 	struct vnode   *lvp;
187 	struct vnode   *uvp;
188 
189 	/*
190 	 * Use the interlock to protect the clearing of v_data to
191 	 * prevent faults in unionfs_lock().
192 	 */
193 	VI_LOCK(vp);
194 	unp = VTOUNIONFS(vp);
195 	lvp = unp->un_lowervp;
196 	uvp = unp->un_uppervp;
197 	unp->un_lowervp = unp->un_uppervp = NULLVP;
198 
199 	vp->v_vnlock = &(vp->v_lock);
200 	vp->v_data = NULL;
201 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
202 	if (lvp != NULLVP)
203 		VOP_UNLOCK(lvp, 0);
204 	if (uvp != NULLVP)
205 		VOP_UNLOCK(uvp, 0);
206 	vp->v_object = NULL;
207 
208 	if (lvp != NULLVP) {
209 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
210 		vrele(lvp);
211 		VFS_UNLOCK_GIANT(vfslocked);
212 	}
213 	if (uvp != NULLVP) {
214 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
215 		vrele(uvp);
216 		VFS_UNLOCK_GIANT(vfslocked);
217 	}
218 	if (unp->un_dvp != NULLVP) {
219 		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
220 		vrele(unp->un_dvp);
221 		VFS_UNLOCK_GIANT(vfslocked);
222 		unp->un_dvp = NULLVP;
223 	}
224 	if (unp->un_path) {
225 		free(unp->un_path, M_UNIONFSPATH);
226 		unp->un_path = NULL;
227 	}
228 
229 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
230 		LIST_REMOVE(unsp, uns_list);
231 		free(unsp, M_TEMP);
232 	}
233 	FREE(unp, M_UNIONFSNODE);
234 }
235 
236 /*
237  * Get the unionfs node status.
238  * You need exclusive lock this vnode.
239  */
240 void
241 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
242 			struct unionfs_node_status **unspp)
243 {
244 	struct unionfs_node_status *unsp;
245 
246 	KASSERT(NULL != unspp, ("null pointer"));
247 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
248 
249 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
250 		if (unsp->uns_tid == td->td_tid) {
251 			*unspp = unsp;
252 			return;
253 		}
254 	}
255 
256 	/* create a new unionfs node status */
257 	MALLOC(unsp, struct unionfs_node_status *,
258 	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
259 
260 	unsp->uns_tid = td->td_tid;
261 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
262 
263 	*unspp = unsp;
264 }
265 
266 /*
267  * Remove the unionfs node status, if you can.
268  * You need exclusive lock this vnode.
269  */
270 void
271 unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
272 			   struct unionfs_node_status *unsp)
273 {
274 	KASSERT(NULL != unsp, ("null pointer"));
275 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
276 
277 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
278 		return;
279 
280 	LIST_REMOVE(unsp, uns_list);
281 	free(unsp, M_TEMP);
282 }
283 
284 /*
285  * Create upper node attr.
286  */
287 void
288 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
289 			       struct vattr *lva,
290 			       struct vattr *uva,
291 			       struct thread *td)
292 {
293 	VATTR_NULL(uva);
294 	uva->va_type = lva->va_type;
295 	uva->va_atime = lva->va_atime;
296 	uva->va_mtime = lva->va_mtime;
297 	uva->va_ctime = lva->va_ctime;
298 
299 	switch (ump->um_copymode) {
300 	case UNIONFS_TRANSPARENT:
301 		uva->va_mode = lva->va_mode;
302 		uva->va_uid = lva->va_uid;
303 		uva->va_gid = lva->va_gid;
304 		break;
305 	case UNIONFS_MASQUERADE:
306 		if (ump->um_uid == lva->va_uid) {
307 			uva->va_mode = lva->va_mode & 077077;
308 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
309 			uva->va_uid = lva->va_uid;
310 			uva->va_gid = lva->va_gid;
311 		} else {
312 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
313 			uva->va_uid = ump->um_uid;
314 			uva->va_gid = ump->um_gid;
315 		}
316 		break;
317 	default:		/* UNIONFS_TRADITIONAL */
318 		FILEDESC_SLOCK(td->td_proc->p_fd);
319 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
320 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
321 		uva->va_uid = ump->um_uid;
322 		uva->va_gid = ump->um_gid;
323 		break;
324 	}
325 }
326 
327 /*
328  * Create upper node attr.
329  */
330 int
331 unionfs_create_uppervattr(struct unionfs_mount *ump,
332 			  struct vnode *lvp,
333 			  struct vattr *uva,
334 			  struct ucred *cred,
335 			  struct thread *td)
336 {
337 	int		error;
338 	struct vattr	lva;
339 
340 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
341 		return (error);
342 
343 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
344 
345 	return (error);
346 }
347 
348 /*
349  * relookup
350  *
351  * dvp should be locked on entry and will be locked on return.
352  *
353  * If an error is returned, *vpp will be invalid, otherwise it will hold a
354  * locked, referenced vnode. If *vpp == dvp then remember that only one
355  * LK_EXCLUSIVE lock is held.
356  */
357 static int
358 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
359 		 struct componentname *cnp, struct componentname *cn,
360 		 struct thread *td, char *path, int pathlen, u_long nameiop)
361 {
362 	int	error;
363 
364 	cn->cn_namelen = pathlen;
365 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
366 	bcopy(path, cn->cn_pnbuf, pathlen);
367 	cn->cn_pnbuf[pathlen] = '\0';
368 
369 	cn->cn_nameiop = nameiop;
370 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
371 	cn->cn_lkflags = LK_EXCLUSIVE;
372 	cn->cn_thread = td;
373 	cn->cn_cred = cnp->cn_cred;
374 
375 	cn->cn_nameptr = cn->cn_pnbuf;
376 	cn->cn_consume = cnp->cn_consume;
377 
378 	if (nameiop == DELETE)
379 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
380 	else if (RENAME == nameiop)
381 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
382 
383 	vref(dvp);
384 	VOP_UNLOCK(dvp, 0);
385 
386 	if ((error = relookup(dvp, vpp, cn))) {
387 		uma_zfree(namei_zone, cn->cn_pnbuf);
388 		cn->cn_flags &= ~HASBUF;
389 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
390 	} else
391 		vrele(dvp);
392 
393 	return (error);
394 }
395 
396 /*
397  * relookup for CREATE namei operation.
398  *
399  * dvp is unionfs vnode. dvp should be locked.
400  *
401  * If it called 'unionfs_copyfile' function by unionfs_link etc,
402  * VOP_LOOKUP information is broken.
403  * So it need relookup in order to create link etc.
404  */
405 int
406 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
407 			    struct thread *td)
408 {
409 	int	error;
410 	struct vnode *udvp;
411 	struct vnode *vp;
412 	struct componentname cn;
413 
414 	udvp = UNIONFSVPTOUPPERVP(dvp);
415 	vp = NULLVP;
416 
417 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
418 	    strlen(cnp->cn_nameptr), CREATE);
419 	if (error)
420 		return (error);
421 
422 	if (vp != NULLVP) {
423 		if (udvp == vp)
424 			vrele(vp);
425 		else
426 			vput(vp);
427 
428 		error = EEXIST;
429 	}
430 
431 	if (cn.cn_flags & HASBUF) {
432 		uma_zfree(namei_zone, cn.cn_pnbuf);
433 		cn.cn_flags &= ~HASBUF;
434 	}
435 
436 	if (!error) {
437 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
438 		cnp->cn_flags = cn.cn_flags;
439 	}
440 
441 	return (error);
442 }
443 
444 /*
445  * relookup for DELETE namei operation.
446  *
447  * dvp is unionfs vnode. dvp should be locked.
448  */
449 int
450 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
451 			    struct thread *td)
452 {
453 	int	error;
454 	struct vnode *udvp;
455 	struct vnode *vp;
456 	struct componentname cn;
457 
458 	udvp = UNIONFSVPTOUPPERVP(dvp);
459 	vp = NULLVP;
460 
461 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
462 	    strlen(cnp->cn_nameptr), DELETE);
463 	if (error)
464 		return (error);
465 
466 	if (vp == NULLVP)
467 		error = ENOENT;
468 	else {
469 		if (udvp == vp)
470 			vrele(vp);
471 		else
472 			vput(vp);
473 	}
474 
475 	if (cn.cn_flags & HASBUF) {
476 		uma_zfree(namei_zone, cn.cn_pnbuf);
477 		cn.cn_flags &= ~HASBUF;
478 	}
479 
480 	if (!error) {
481 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
482 		cnp->cn_flags = cn.cn_flags;
483 	}
484 
485 	return (error);
486 }
487 
488 /*
489  * relookup for RENAME namei operation.
490  *
491  * dvp is unionfs vnode. dvp should be locked.
492  */
493 int
494 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
495 			    struct thread *td)
496 {
497 	int error;
498 	struct vnode *udvp;
499 	struct vnode *vp;
500 	struct componentname cn;
501 
502 	udvp = UNIONFSVPTOUPPERVP(dvp);
503 	vp = NULLVP;
504 
505 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
506 	    strlen(cnp->cn_nameptr), RENAME);
507 	if (error)
508 		return (error);
509 
510 	if (vp != NULLVP) {
511 		if (udvp == vp)
512 			vrele(vp);
513 		else
514 			vput(vp);
515 	}
516 
517 	if (cn.cn_flags & HASBUF) {
518 		uma_zfree(namei_zone, cn.cn_pnbuf);
519 		cn.cn_flags &= ~HASBUF;
520 	}
521 
522 	if (!error) {
523 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
524 		cnp->cn_flags = cn.cn_flags;
525 	}
526 
527 	return (error);
528 
529 }
530 
531 /*
532  * Update the unionfs_node.
533  *
534  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
535  * uvp's lock and lower's lock will be unlocked.
536  */
537 static void
538 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
539 		    struct thread *td)
540 {
541 	unsigned	count, lockrec;
542 	struct vnode   *vp;
543 	struct vnode   *lvp;
544 
545 	vp = UNIONFSTOV(unp);
546 	lvp = unp->un_lowervp;
547 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
548 
549 	/*
550 	 * lock update
551 	 */
552 	VI_LOCK(vp);
553 	unp->un_uppervp = uvp;
554 	vp->v_vnlock = uvp->v_vnlock;
555 	VI_UNLOCK(vp);
556 	lockrec = lvp->v_vnlock->lk_recurse;
557 	for (count = 0; count < lockrec; count++)
558 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
559 }
560 
561 /*
562  * Create a new shadow dir.
563  *
564  * udvp should be locked on entry and will be locked on return.
565  *
566  * If no error returned, unp will be updated.
567  */
568 int
569 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
570 		    struct unionfs_node *unp, struct componentname *cnp,
571 		    struct thread *td)
572 {
573 	int		error;
574 	struct vnode   *lvp;
575 	struct vnode   *uvp;
576 	struct vattr	va;
577 	struct vattr	lva;
578 	struct componentname cn;
579 	struct mount   *mp;
580 	struct ucred   *cred;
581 	struct ucred   *credbk;
582 	struct uidinfo *rootinfo;
583 
584 	if (unp->un_uppervp != NULLVP)
585 		return (EEXIST);
586 
587 	lvp = unp->un_lowervp;
588 	uvp = NULLVP;
589 	credbk = cnp->cn_cred;
590 
591 	/* Authority change to root */
592 	rootinfo = uifind((uid_t)0);
593 	cred = crdup(cnp->cn_cred);
594 	chgproccnt(cred->cr_ruidinfo, 1, 0);
595 	change_euid(cred, rootinfo);
596 	change_ruid(cred, rootinfo);
597 	change_svuid(cred, (uid_t)0);
598 	uifree(rootinfo);
599 	cnp->cn_cred = cred;
600 
601 	memset(&cn, 0, sizeof(cn));
602 
603 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
604 		goto unionfs_mkshadowdir_abort;
605 
606 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
607 		goto unionfs_mkshadowdir_abort;
608 	if (uvp != NULLVP) {
609 		if (udvp == uvp)
610 			vrele(uvp);
611 		else
612 			vput(uvp);
613 
614 		error = EEXIST;
615 		goto unionfs_mkshadowdir_free_out;
616 	}
617 
618 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
619 		goto unionfs_mkshadowdir_free_out;
620 	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
621 		vn_finished_write(mp);
622 		goto unionfs_mkshadowdir_free_out;
623 	}
624 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
625 
626 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
627 
628 	if (!error) {
629 		unionfs_node_update(unp, uvp, td);
630 
631 		/*
632 		 * XXX The bug which cannot set uid/gid was corrected.
633 		 * Ignore errors.
634 		 */
635 		va.va_type = VNON;
636 		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
637 	}
638 	vn_finished_write(mp);
639 
640 unionfs_mkshadowdir_free_out:
641 	if (cn.cn_flags & HASBUF) {
642 		uma_zfree(namei_zone, cn.cn_pnbuf);
643 		cn.cn_flags &= ~HASBUF;
644 	}
645 
646 unionfs_mkshadowdir_abort:
647 	cnp->cn_cred = credbk;
648 	chgproccnt(cred->cr_ruidinfo, -1, 0);
649 	crfree(cred);
650 
651 	return (error);
652 }
653 
654 /*
655  * Create a new whiteout.
656  *
657  * dvp should be locked on entry and will be locked on return.
658  */
659 int
660 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
661 		   struct thread *td, char *path)
662 {
663 	int		error;
664 	struct vnode   *wvp;
665 	struct componentname cn;
666 	struct mount   *mp;
667 
668 	if (path == NULL)
669 		path = cnp->cn_nameptr;
670 
671 	wvp = NULLVP;
672 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
673 		return (error);
674 	if (wvp != NULLVP) {
675 		if (cn.cn_flags & HASBUF) {
676 			uma_zfree(namei_zone, cn.cn_pnbuf);
677 			cn.cn_flags &= ~HASBUF;
678 		}
679 		if (dvp == wvp)
680 			vrele(wvp);
681 		else
682 			vput(wvp);
683 
684 		return (EEXIST);
685 	}
686 
687 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
688 		goto unionfs_mkwhiteout_free_out;
689 	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
690 		error = VOP_WHITEOUT(dvp, &cn, CREATE);
691 
692 	vn_finished_write(mp);
693 
694 unionfs_mkwhiteout_free_out:
695 	if (cn.cn_flags & HASBUF) {
696 		uma_zfree(namei_zone, cn.cn_pnbuf);
697 		cn.cn_flags &= ~HASBUF;
698 	}
699 
700 	return (error);
701 }
702 
703 /*
704  * Create a new vnode for create a new shadow file.
705  *
706  * If an error is returned, *vpp will be invalid, otherwise it will hold a
707  * locked, referenced and opened vnode.
708  *
709  * unp is never updated.
710  */
711 static int
712 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
713 			   struct unionfs_node *unp, struct vattr *uvap,
714 			   struct thread *td)
715 {
716 	struct unionfs_mount *ump;
717 	struct vnode   *vp;
718 	struct vnode   *lvp;
719 	struct ucred   *cred;
720 	struct vattr	lva;
721 	int		fmode;
722 	int		error;
723 	struct componentname cn;
724 
725 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
726 	vp = NULLVP;
727 	lvp = unp->un_lowervp;
728 	cred = td->td_ucred;
729 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
730 	error = 0;
731 
732 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
733 		return (error);
734 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
735 
736 	if (unp->un_path == NULL)
737 		panic("unionfs: un_path is null");
738 
739 	cn.cn_namelen = strlen(unp->un_path);
740 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
741 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
742 	cn.cn_nameiop = CREATE;
743 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
744 	cn.cn_lkflags = LK_EXCLUSIVE;
745 	cn.cn_thread = td;
746 	cn.cn_cred = cred;
747 	cn.cn_nameptr = cn.cn_pnbuf;
748 	cn.cn_consume = 0;
749 
750 	vref(udvp);
751 	if ((error = relookup(udvp, &vp, &cn)) != 0)
752 		goto unionfs_vn_create_on_upper_free_out2;
753 	vrele(udvp);
754 
755 	if (vp != NULLVP) {
756 		if (vp == udvp)
757 			vrele(vp);
758 		else
759 			vput(vp);
760 		error = EEXIST;
761 		goto unionfs_vn_create_on_upper_free_out1;
762 	}
763 
764 	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
765 		goto unionfs_vn_create_on_upper_free_out1;
766 
767 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
768 		goto unionfs_vn_create_on_upper_free_out1;
769 
770 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
771 		vput(vp);
772 		goto unionfs_vn_create_on_upper_free_out1;
773 	}
774 	vp->v_writecount++;
775 	*vpp = vp;
776 
777 unionfs_vn_create_on_upper_free_out1:
778 	VOP_UNLOCK(udvp, 0);
779 
780 unionfs_vn_create_on_upper_free_out2:
781 	if (cn.cn_flags & HASBUF) {
782 		uma_zfree(namei_zone, cn.cn_pnbuf);
783 		cn.cn_flags &= ~HASBUF;
784 	}
785 
786 	return (error);
787 }
788 
789 /*
790  * Copy from lvp to uvp.
791  *
792  * lvp and uvp should be locked and opened on entry and will be locked and
793  * opened on return.
794  */
795 static int
796 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
797 		      struct ucred *cred, struct thread *td)
798 {
799 	int		error;
800 	off_t		offset;
801 	int		count;
802 	int		bufoffset;
803 	char           *buf;
804 	struct uio	uio;
805 	struct iovec	iov;
806 
807 	error = 0;
808 	memset(&uio, 0, sizeof(uio));
809 
810 	uio.uio_td = td;
811 	uio.uio_segflg = UIO_SYSSPACE;
812 	uio.uio_offset = 0;
813 
814 	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
815 		return (error);
816 	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
817 		return (error);
818 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
819 
820 	while (error == 0) {
821 		offset = uio.uio_offset;
822 
823 		uio.uio_iov = &iov;
824 		uio.uio_iovcnt = 1;
825 		iov.iov_base = buf;
826 		iov.iov_len = MAXBSIZE;
827 		uio.uio_resid = iov.iov_len;
828 		uio.uio_rw = UIO_READ;
829 
830 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
831 			break;
832 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
833 			break;
834 
835 		bufoffset = 0;
836 		while (bufoffset < count) {
837 			uio.uio_iov = &iov;
838 			uio.uio_iovcnt = 1;
839 			iov.iov_base = buf + bufoffset;
840 			iov.iov_len = count - bufoffset;
841 			uio.uio_offset = offset + bufoffset;
842 			uio.uio_resid = iov.iov_len;
843 			uio.uio_rw = UIO_WRITE;
844 
845 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
846 				break;
847 
848 			bufoffset += (count - bufoffset) - uio.uio_resid;
849 		}
850 
851 		uio.uio_offset = offset + bufoffset;
852 	}
853 
854 	free(buf, M_TEMP);
855 
856 	return (error);
857 }
858 
859 /*
860  * Copy file from lower to upper.
861  *
862  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
863  * docopy.
864  *
865  * If no error returned, unp will be updated.
866  */
867 int
868 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
869 		 struct thread *td)
870 {
871 	int		error;
872 	struct mount   *mp;
873 	struct vnode   *udvp;
874 	struct vnode   *lvp;
875 	struct vnode   *uvp;
876 	struct vattr	uva;
877 
878 	lvp = unp->un_lowervp;
879 	uvp = NULLVP;
880 
881 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
882 		return (EROFS);
883 	if (unp->un_dvp == NULLVP)
884 		return (EINVAL);
885 	if (unp->un_uppervp != NULLVP)
886 		return (EEXIST);
887 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
888 	if (udvp == NULLVP)
889 		return (EROFS);
890 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
891 		return (EROFS);
892 
893 	error = VOP_ACCESS(lvp, VREAD, cred, td);
894 	if (error != 0)
895 		return (error);
896 
897 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
898 		return (error);
899 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
900 	if (error != 0) {
901 		vn_finished_write(mp);
902 		return (error);
903 	}
904 
905 	if (docopy != 0) {
906 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
907 		if (error == 0) {
908 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
909 			VOP_CLOSE(lvp, FREAD, cred, td);
910 		}
911 	}
912 	VOP_CLOSE(uvp, FWRITE, cred, td);
913 	uvp->v_writecount--;
914 
915 	vn_finished_write(mp);
916 
917 	if (error == 0) {
918 		/* Reset the attributes. Ignore errors. */
919 		uva.va_type = VNON;
920 		VOP_SETATTR(uvp, &uva, cred, td);
921 	}
922 
923 	unionfs_node_update(unp, uvp, td);
924 
925 	return (error);
926 }
927 
928 /*
929  * It checks whether vp can rmdir. (check empty)
930  *
931  * vp is unionfs vnode.
932  * vp should be locked.
933  */
934 int
935 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
936 {
937 	int		error;
938 	int		eofflag;
939 	int		lookuperr;
940 	struct vnode   *uvp;
941 	struct vnode   *lvp;
942 	struct vnode   *tvp;
943 	struct vattr	va;
944 	struct componentname cn;
945 	/*
946 	 * The size of buf needs to be larger than DIRBLKSIZ.
947 	 */
948 	char		buf[256 * 6];
949 	struct dirent  *dp;
950 	struct dirent  *edp;
951 	struct uio	uio;
952 	struct iovec	iov;
953 
954 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
955 
956 	eofflag = 0;
957 	uvp = UNIONFSVPTOUPPERVP(vp);
958 	lvp = UNIONFSVPTOLOWERVP(vp);
959 
960 	/* check opaque */
961 	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
962 		return (error);
963 	if (va.va_flags & OPAQUE)
964 		return (0);
965 
966 	/* open vnode */
967 #ifdef MAC
968 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
969 		return (error);
970 #endif
971 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
972 		return (error);
973 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
974 		return (error);
975 
976 	uio.uio_rw = UIO_READ;
977 	uio.uio_segflg = UIO_SYSSPACE;
978 	uio.uio_td = td;
979 	uio.uio_offset = 0;
980 
981 #ifdef MAC
982 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
983 #endif
984 	while (!error && !eofflag) {
985 		iov.iov_base = buf;
986 		iov.iov_len = sizeof(buf);
987 		uio.uio_iov = &iov;
988 		uio.uio_iovcnt = 1;
989 		uio.uio_resid = iov.iov_len;
990 
991 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
992 		if (error)
993 			break;
994 
995 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
996 		for (dp = (struct dirent*)buf; !error && dp < edp;
997 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
998 			if (dp->d_type == DT_WHT ||
999 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1000 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1001 				continue;
1002 
1003 			cn.cn_namelen = dp->d_namlen;
1004 			cn.cn_pnbuf = NULL;
1005 			cn.cn_nameptr = dp->d_name;
1006 			cn.cn_nameiop = LOOKUP;
1007 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1008 			cn.cn_lkflags = LK_EXCLUSIVE;
1009 			cn.cn_thread = td;
1010 			cn.cn_cred = cred;
1011 			cn.cn_consume = 0;
1012 
1013 			/*
1014 			 * check entry in lower.
1015 			 * Sometimes, readdir function returns
1016 			 * wrong entry.
1017 			 */
1018 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1019 
1020 			if (!lookuperr)
1021 				vput(tvp);
1022 			else
1023 				continue; /* skip entry */
1024 
1025 			/*
1026 			 * check entry
1027 			 * If it has no exist/whiteout entry in upper,
1028 			 * directory is not empty.
1029 			 */
1030 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1031 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1032 
1033 			if (!lookuperr)
1034 				vput(tvp);
1035 
1036 			/* ignore exist or whiteout entry */
1037 			if (!lookuperr ||
1038 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1039 				continue;
1040 
1041 			error = ENOTEMPTY;
1042 		}
1043 	}
1044 
1045 	/* close vnode */
1046 	VOP_CLOSE(vp, FREAD, cred, td);
1047 
1048 	return (error);
1049 }
1050 
1051 #ifdef DIAGNOSTIC
1052 
1053 struct vnode   *
1054 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1055 {
1056 	struct unionfs_node *unp;
1057 
1058 	unp = VTOUNIONFS(vp);
1059 
1060 #ifdef notyet
1061 	if (vp->v_op != unionfs_vnodeop_p) {
1062 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1063 #ifdef KDB
1064 		kdb_enter(KDB_WHY_UNIONFS,
1065 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1066 #endif
1067 		panic("unionfs_checkuppervp");
1068 	};
1069 #endif
1070 	return (unp->un_uppervp);
1071 }
1072 
1073 struct vnode   *
1074 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1075 {
1076 	struct unionfs_node *unp;
1077 
1078 	unp = VTOUNIONFS(vp);
1079 
1080 #ifdef notyet
1081 	if (vp->v_op != unionfs_vnodeop_p) {
1082 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1083 #ifdef KDB
1084 		kdb_enter(KDB_WHY_UNIONFS,
1085 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1086 #endif
1087 		panic("unionfs_checklowervp");
1088 	};
1089 #endif
1090 	return (unp->un_lowervp);
1091 }
1092 #endif
1093