xref: /freebsd/sys/fs/unionfs/union_subr.c (revision f0a75d274af375d15b97b830966b99a02b7db911)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #ifdef MAC
56 #include <sys/mac.h>
57 #endif
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 #define	NUNIONFSNODECACHE 32
64 
65 #define	UNIONFS_NHASH(upper, lower) \
66 	(&unionfs_node_hashtbl[(((uintptr_t)upper + (uintptr_t)lower) >> 8) & unionfs_node_hash])
67 
68 static LIST_HEAD(unionfs_node_hashhead, unionfs_node) *unionfs_node_hashtbl;
69 static u_long	unionfs_node_hash;
70 struct mtx	unionfs_hashmtx;
71 
72 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
73 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
74 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
75 
76 /*
77  * Initialize cache headers
78  */
79 int
80 unionfs_init(struct vfsconf *vfsp)
81 {
82 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
83 	unionfs_node_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, &unionfs_node_hash);
84 	mtx_init(&unionfs_hashmtx, "unionfs", NULL, MTX_DEF);
85 
86 	return (0);
87 }
88 
89 /*
90  * Destroy cache headers
91  */
92 int
93 unionfs_uninit(struct vfsconf *vfsp)
94 {
95 	mtx_destroy(&unionfs_hashmtx);
96 	free(unionfs_node_hashtbl, M_UNIONFSHASH);
97 	return (0);
98 }
99 
100 /*
101  * Return a VREF'ed alias for unionfs vnode if already exists, else 0.
102  */
103 static struct vnode *
104 unionfs_hashget(struct mount *mp, struct vnode *uppervp,
105 		struct vnode *lowervp, struct vnode *dvp, char *path,
106 		int lkflags, struct thread *td)
107 {
108 	struct unionfs_node_hashhead *hd;
109 	struct unionfs_node *unp;
110 	struct vnode   *vp;
111 	int error;
112 
113 	if (lkflags & LK_TYPE_MASK)
114 		lkflags |= LK_RETRY;
115 	hd = UNIONFS_NHASH(uppervp, lowervp);
116 
117 	mtx_lock(&unionfs_hashmtx);
118 	LIST_FOREACH(unp, hd, un_hash) {
119 		if (unp->un_uppervp == uppervp &&
120 		    unp->un_lowervp == lowervp &&
121 		    unp->un_dvp == dvp &&
122 		    UNIONFSTOV(unp)->v_mount == mp &&
123 		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
124 			vp = UNIONFSTOV(unp);
125 			VI_LOCK(vp);
126 			mtx_unlock(&unionfs_hashmtx);
127 			/*
128 			 * We need to clear the OWEINACT flag here as this
129 			 * may lead vget() to try to lock our vnode which is
130 			 * already locked via vp.
131 			 */
132 			vp->v_iflag &= ~VI_OWEINACT;
133 			error = vget(vp, LK_INTERLOCK, td);
134 			if (error != 0)
135 				panic("unionfs_hashget: vget error %d", error);
136 			if (lkflags & LK_TYPE_MASK)
137 				vn_lock(vp, lkflags, td);
138 			return (vp);
139 		}
140 	}
141 
142 	mtx_unlock(&unionfs_hashmtx);
143 
144 	return (NULLVP);
145 }
146 
147 /*
148  * Act like unionfs_hashget, but add passed unionfs_node to hash if no existing
149  * node found.
150  */
151 static struct vnode *
152 unionfs_hashins(struct mount *mp, struct unionfs_node *uncp,
153 		char *path, int lkflags, struct thread *td)
154 {
155 	struct unionfs_node_hashhead *hd;
156 	struct unionfs_node *unp;
157 	struct vnode   *vp;
158 	int error;
159 
160 	if (lkflags & LK_TYPE_MASK)
161 		lkflags |= LK_RETRY;
162 	hd = UNIONFS_NHASH(uncp->un_uppervp, uncp->un_lowervp);
163 
164 	mtx_lock(&unionfs_hashmtx);
165 	LIST_FOREACH(unp, hd, un_hash) {
166 		if (unp->un_uppervp == uncp->un_uppervp &&
167 		    unp->un_lowervp == uncp->un_lowervp &&
168 		    unp->un_dvp == uncp->un_dvp &&
169 		    UNIONFSTOV(unp)->v_mount == mp &&
170 		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
171 			vp = UNIONFSTOV(unp);
172 			VI_LOCK(vp);
173 			mtx_unlock(&unionfs_hashmtx);
174 			vp->v_iflag &= ~VI_OWEINACT;
175 			error = vget(vp, LK_INTERLOCK, td);
176 			if (error)
177 				panic("unionfs_hashins: vget error %d", error);
178 			if (lkflags & LK_TYPE_MASK)
179 				vn_lock(vp, lkflags, td);
180 			return (vp);
181 		}
182 	}
183 
184 	LIST_INSERT_HEAD(hd, uncp, un_hash);
185 	uncp->un_flag |= UNIONFS_CACHED;
186 	mtx_unlock(&unionfs_hashmtx);
187 
188 	return (NULLVP);
189 }
190 
191 /*
192  * Make a new or get existing unionfs node.
193  *
194  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
195  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
196  * you should not lock plurality simultaneously.
197  */
198 int
199 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
200 		struct vnode *lowervp, struct vnode *dvp,
201 		struct vnode **vpp, struct componentname *cnp,
202 		struct thread *td)
203 {
204 	struct unionfs_mount *ump;
205 	struct unionfs_node *unp;
206 	struct vnode   *vp;
207 	int		error;
208 	int		lkflags;
209 	char	       *path;
210 
211 	ump = MOUNTTOUNIONFSMOUNT(mp);
212 	lkflags = (cnp ? cnp->cn_lkflags : 0);
213 	path = (cnp ? cnp->cn_nameptr : "");
214 
215 	if (uppervp == NULLVP && lowervp == NULLVP)
216 		panic("unionfs_nodeget: upper and lower is null");
217 
218 	/* If it has no ISLASTCN flag, path check is skipped. */
219 	if (!cnp || !(cnp->cn_flags & ISLASTCN))
220 		path = NULL;
221 
222 	/* Lookup the hash first. */
223 	*vpp = unionfs_hashget(mp, uppervp, lowervp, dvp, path, lkflags, td);
224 	if (*vpp != NULLVP)
225 		return (0);
226 
227 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
228 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
229 		if (dvp == NULLVP)
230 			return (EINVAL);
231 	}
232 
233 	/*
234 	 * Do the MALLOC before the getnewvnode since doing so afterward
235 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
236 	 * if MALLOC should block.
237 	 */
238 	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
239 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
240 
241 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
242 	if (error) {
243 		FREE(unp, M_UNIONFSNODE);
244 		return (error);
245 	}
246 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
247 	if (error != 0) {
248 		FREE(unp, M_UNIONFSNODE);
249 		return (error);
250 	}
251 	if (dvp != NULLVP)
252 		vref(dvp);
253 	if (uppervp != NULLVP)
254 		vref(uppervp);
255 	if (lowervp != NULLVP)
256 		vref(lowervp);
257 
258 	unp->un_vnode = vp;
259 	unp->un_uppervp = uppervp;
260 	unp->un_lowervp = lowervp;
261 	unp->un_dvp = dvp;
262 	if (uppervp != NULLVP)
263 		vp->v_vnlock = uppervp->v_vnlock;
264 	else
265 		vp->v_vnlock = lowervp->v_vnlock;
266 
267 	if (cnp) {
268 		unp->un_path = (char *)
269 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK | M_ZERO);
270 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
271 		unp->un_path[cnp->cn_namelen] = '\0';
272 	}
273 	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
274 	vp->v_data = unp;
275 
276 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
277 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
278 		vp->v_vflag |= VV_ROOT;
279 
280 	*vpp = unionfs_hashins(mp, unp, path, lkflags, td);
281 	if (*vpp != NULLVP) {
282 		if (dvp != NULLVP)
283 			vrele(dvp);
284 		if (uppervp != NULLVP)
285 			vrele(uppervp);
286 		if (lowervp != NULLVP)
287 			vrele(lowervp);
288 
289 		unp->un_uppervp = NULLVP;
290 		unp->un_lowervp = NULLVP;
291 		unp->un_dvp = NULLVP;
292 		vrele(vp);
293 
294 		return (0);
295 	}
296 
297 	if (lkflags & LK_TYPE_MASK)
298 		vn_lock(vp, lkflags | LK_RETRY, td);
299 
300 	*vpp = vp;
301 
302 	return (0);
303 }
304 
305 /*
306  * Remove node from hash.
307  */
308 void
309 unionfs_hashrem(struct vnode *vp, struct thread *td)
310 {
311 	int		vfslocked;
312 	struct unionfs_node *unp;
313 	struct unionfs_node_status *unsp, *unsp_tmp;
314 	struct vnode   *lvp;
315 	struct vnode   *uvp;
316 
317 	/*
318 	 * Use the interlock to protect the clearing of v_data to
319 	 * prevent faults in unionfs_lock().
320 	 */
321 	VI_LOCK(vp);
322 	unp = VTOUNIONFS(vp);
323 	lvp = unp->un_lowervp;
324 	uvp = unp->un_uppervp;
325 	unp->un_lowervp = unp->un_uppervp = NULLVP;
326 
327 	vp->v_vnlock = &(vp->v_lock);
328 	vp->v_data = NULL;
329 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
330 	if (lvp != NULLVP)
331 		VOP_UNLOCK(lvp, 0, td);
332 	if (uvp != NULLVP)
333 		VOP_UNLOCK(uvp, 0, td);
334 
335 	mtx_lock(&unionfs_hashmtx);
336 	if (unp->un_flag & UNIONFS_CACHED) {
337 		LIST_REMOVE(unp, un_hash);
338 		unp->un_flag &= ~UNIONFS_CACHED;
339 	}
340 	mtx_unlock(&unionfs_hashmtx);
341 	vp->v_object = NULL;
342 
343 	if (lvp != NULLVP) {
344 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
345 		vrele(lvp);
346 		VFS_UNLOCK_GIANT(vfslocked);
347 	}
348 	if (uvp != NULLVP) {
349 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
350 		vrele(uvp);
351 		VFS_UNLOCK_GIANT(vfslocked);
352 	}
353 	if (unp->un_dvp != NULLVP) {
354 		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
355 		vrele(unp->un_dvp);
356 		VFS_UNLOCK_GIANT(vfslocked);
357 		unp->un_dvp = NULLVP;
358 	}
359 	if (unp->un_path) {
360 		free(unp->un_path, M_UNIONFSPATH);
361 		unp->un_path = NULL;
362 	}
363 
364 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
365 		LIST_REMOVE(unsp, uns_list);
366 		free(unsp, M_TEMP);
367 	}
368 	FREE(unp, M_UNIONFSNODE);
369 }
370 
371 /*
372  * Get the unionfs node status.
373  * You need exclusive lock this vnode.
374  */
375 void
376 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
377 			struct unionfs_node_status **unspp)
378 {
379 	struct unionfs_node_status *unsp;
380 
381 	KASSERT(NULL != unspp, ("null pointer"));
382 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
383 
384 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
385 		if (unsp->uns_tid == td->td_tid) {
386 			*unspp = unsp;
387 			return;
388 		}
389 	}
390 
391 	/* create a new unionfs node status */
392 	MALLOC(unsp, struct unionfs_node_status *,
393 	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
394 
395 	unsp->uns_tid = td->td_tid;
396 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
397 
398 	*unspp = unsp;
399 }
400 
401 /*
402  * Remove the unionfs node status, if you can.
403  * You need exclusive lock this vnode.
404  */
405 void
406 unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
407 			   struct unionfs_node_status *unsp)
408 {
409 	KASSERT(NULL != unsp, ("null pointer"));
410 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
411 
412 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
413 		return;
414 
415 	LIST_REMOVE(unsp, uns_list);
416 	free(unsp, M_TEMP);
417 }
418 
419 /*
420  * Create upper node attr.
421  */
422 void
423 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
424 			       struct vattr *lva,
425 			       struct vattr *uva,
426 			       struct thread *td)
427 {
428 	VATTR_NULL(uva);
429 	uva->va_type = lva->va_type;
430 	uva->va_atime = lva->va_atime;
431 	uva->va_mtime = lva->va_mtime;
432 	uva->va_ctime = lva->va_ctime;
433 
434 	switch (ump->um_copymode) {
435 	case UNIONFS_TRANSPARENT:
436 		uva->va_mode = lva->va_mode;
437 		uva->va_uid = lva->va_uid;
438 		uva->va_gid = lva->va_gid;
439 		break;
440 	case UNIONFS_MASQUERADE:
441 		if (ump->um_uid == lva->va_uid) {
442 			uva->va_mode = lva->va_mode & 077077;
443 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
444 			uva->va_uid = lva->va_uid;
445 			uva->va_gid = lva->va_gid;
446 		} else {
447 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
448 			uva->va_uid = ump->um_uid;
449 			uva->va_gid = ump->um_gid;
450 		}
451 		break;
452 	default:		/* UNIONFS_TRADITIONAL */
453 		FILEDESC_SLOCK(td->td_proc->p_fd);
454 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
455 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
456 		uva->va_uid = ump->um_uid;
457 		uva->va_gid = ump->um_gid;
458 		break;
459 	}
460 }
461 
462 /*
463  * Create upper node attr.
464  */
465 int
466 unionfs_create_uppervattr(struct unionfs_mount *ump,
467 			  struct vnode *lvp,
468 			  struct vattr *uva,
469 			  struct ucred *cred,
470 			  struct thread *td)
471 {
472 	int		error;
473 	struct vattr	lva;
474 
475 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
476 		return (error);
477 
478 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
479 
480 	return (error);
481 }
482 
483 /*
484  * relookup
485  *
486  * dvp should be locked on entry and will be locked on return.
487  *
488  * If an error is returned, *vpp will be invalid, otherwise it will hold a
489  * locked, referenced vnode. If *vpp == dvp then remember that only one
490  * LK_EXCLUSIVE lock is held.
491  */
492 static int
493 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
494 		 struct componentname *cnp, struct componentname *cn,
495 		 struct thread *td, char *path, int pathlen, u_long nameiop)
496 {
497 	int	error;
498 
499 	cn->cn_namelen = pathlen;
500 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
501 	bcopy(path, cn->cn_pnbuf, pathlen);
502 	cn->cn_pnbuf[pathlen] = '\0';
503 
504 	cn->cn_nameiop = nameiop;
505 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
506 	cn->cn_lkflags = LK_EXCLUSIVE;
507 	cn->cn_thread = td;
508 	cn->cn_cred = cnp->cn_cred;
509 
510 	cn->cn_nameptr = cn->cn_pnbuf;
511 	cn->cn_consume = cnp->cn_consume;
512 
513 	if (nameiop == DELETE)
514 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
515 	else if (RENAME == nameiop)
516 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
517 
518 	vref(dvp);
519 	VOP_UNLOCK(dvp, 0, td);
520 
521 	if ((error = relookup(dvp, vpp, cn))) {
522 		uma_zfree(namei_zone, cn->cn_pnbuf);
523 		cn->cn_flags &= ~HASBUF;
524 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
525 	} else
526 		vrele(dvp);
527 
528 	return (error);
529 }
530 
531 /*
532  * relookup for CREATE namei operation.
533  *
534  * dvp is unionfs vnode. dvp should be locked.
535  *
536  * If it called 'unionfs_copyfile' function by unionfs_link etc,
537  * VOP_LOOKUP information is broken.
538  * So it need relookup in order to create link etc.
539  */
540 int
541 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
542 			    struct thread *td)
543 {
544 	int	error;
545 	struct vnode *udvp;
546 	struct vnode *vp;
547 	struct componentname cn;
548 
549 	udvp = UNIONFSVPTOUPPERVP(dvp);
550 	vp = NULLVP;
551 
552 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
553 	    strlen(cnp->cn_nameptr), CREATE);
554 	if (error)
555 		return (error);
556 
557 	if (vp != NULLVP) {
558 		if (udvp == vp)
559 			vrele(vp);
560 		else
561 			vput(vp);
562 
563 		error = EEXIST;
564 	}
565 
566 	if (cn.cn_flags & HASBUF) {
567 		uma_zfree(namei_zone, cn.cn_pnbuf);
568 		cn.cn_flags &= ~HASBUF;
569 	}
570 
571 	if (!error) {
572 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
573 		cnp->cn_flags = cn.cn_flags;
574 	}
575 
576 	return (error);
577 }
578 
579 /*
580  * relookup for DELETE namei operation.
581  *
582  * dvp is unionfs vnode. dvp should be locked.
583  */
584 int
585 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
586 			    struct thread *td)
587 {
588 	int	error;
589 	struct vnode *udvp;
590 	struct vnode *vp;
591 	struct componentname cn;
592 
593 	udvp = UNIONFSVPTOUPPERVP(dvp);
594 	vp = NULLVP;
595 
596 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
597 	    strlen(cnp->cn_nameptr), DELETE);
598 	if (error)
599 		return (error);
600 
601 	if (vp == NULLVP)
602 		error = ENOENT;
603 	else {
604 		if (udvp == vp)
605 			vrele(vp);
606 		else
607 			vput(vp);
608 	}
609 
610 	if (cn.cn_flags & HASBUF) {
611 		uma_zfree(namei_zone, cn.cn_pnbuf);
612 		cn.cn_flags &= ~HASBUF;
613 	}
614 
615 	if (!error) {
616 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
617 		cnp->cn_flags = cn.cn_flags;
618 	}
619 
620 	return (error);
621 }
622 
623 /*
624  * relookup for RENAME namei operation.
625  *
626  * dvp is unionfs vnode. dvp should be locked.
627  */
628 int
629 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
630 			    struct thread *td)
631 {
632 	int error;
633 	struct vnode *udvp;
634 	struct vnode *vp;
635 	struct componentname cn;
636 
637 	udvp = UNIONFSVPTOUPPERVP(dvp);
638 	vp = NULLVP;
639 
640 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
641 	    strlen(cnp->cn_nameptr), RENAME);
642 	if (error)
643 		return (error);
644 
645 	if (vp != NULLVP) {
646 		if (udvp == vp)
647 			vrele(vp);
648 		else
649 			vput(vp);
650 	}
651 
652 	if (cn.cn_flags & HASBUF) {
653 		uma_zfree(namei_zone, cn.cn_pnbuf);
654 		cn.cn_flags &= ~HASBUF;
655 	}
656 
657 	if (!error) {
658 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
659 		cnp->cn_flags = cn.cn_flags;
660 	}
661 
662 	return (error);
663 
664 }
665 
666 /*
667  * Update the unionfs_node.
668  *
669  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
670  * uvp's lock and lower's lock will be unlocked.
671  */
672 static void
673 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
674 		    struct thread *td)
675 {
676 	int		count, lockcnt;
677 	struct vnode   *vp;
678 	struct vnode   *lvp;
679 
680 	vp = UNIONFSTOV(unp);
681 	lvp = unp->un_lowervp;
682 
683 	/*
684 	 * lock update
685 	 */
686 	VI_LOCK(vp);
687 	unp->un_uppervp = uvp;
688 	vp->v_vnlock = uvp->v_vnlock;
689 	lockcnt = lvp->v_vnlock->lk_exclusivecount;
690 	if (lockcnt <= 0)
691 		panic("unionfs: no exclusive lock");
692 	VI_UNLOCK(vp);
693 	for (count = 1; count < lockcnt; count++)
694 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
695 
696 	/*
697 	 * cache update
698 	 */
699 	mtx_lock(&unionfs_hashmtx);
700 	if (unp->un_flag & UNIONFS_CACHED)
701 		LIST_REMOVE(unp, un_hash);
702 	LIST_INSERT_HEAD(UNIONFS_NHASH(uvp, lvp), unp, un_hash);
703 	unp->un_flag |= UNIONFS_CACHED;
704 	mtx_unlock(&unionfs_hashmtx);
705 }
706 
707 /*
708  * Create a new shadow dir.
709  *
710  * udvp should be locked on entry and will be locked on return.
711  *
712  * If no error returned, unp will be updated.
713  */
714 int
715 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
716 		    struct unionfs_node *unp, struct componentname *cnp,
717 		    struct thread *td)
718 {
719 	int		error;
720 	struct vnode   *lvp;
721 	struct vnode   *uvp;
722 	struct vattr	va;
723 	struct vattr	lva;
724 	struct componentname cn;
725 	struct mount   *mp;
726 	struct ucred   *cred;
727 	struct ucred   *credbk;
728 	struct uidinfo *rootinfo;
729 
730 	if (unp->un_uppervp != NULLVP)
731 		return (EEXIST);
732 
733 	lvp = unp->un_lowervp;
734 	uvp = NULLVP;
735 	credbk = cnp->cn_cred;
736 
737 	/* Authority change to root */
738 	rootinfo = uifind((uid_t)0);
739 	cred = crdup(cnp->cn_cred);
740 	chgproccnt(cred->cr_ruidinfo, 1, 0);
741 	change_euid(cred, rootinfo);
742 	change_ruid(cred, rootinfo);
743 	change_svuid(cred, (uid_t)0);
744 	uifree(rootinfo);
745 	cnp->cn_cred = cred;
746 
747 	memset(&cn, 0, sizeof(cn));
748 
749 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
750 		goto unionfs_mkshadowdir_abort;
751 
752 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
753 		goto unionfs_mkshadowdir_abort;
754 	if (uvp != NULLVP) {
755 		if (udvp == uvp)
756 			vrele(uvp);
757 		else
758 			vput(uvp);
759 
760 		error = EEXIST;
761 		goto unionfs_mkshadowdir_free_out;
762 	}
763 
764 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
765 		goto unionfs_mkshadowdir_free_out;
766 	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
767 		vn_finished_write(mp);
768 		goto unionfs_mkshadowdir_free_out;
769 	}
770 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
771 
772 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
773 
774 	if (!error) {
775 		unionfs_node_update(unp, uvp, td);
776 
777 		/*
778 		 * XXX The bug which cannot set uid/gid was corrected.
779 		 * Ignore errors.
780 		 */
781 		va.va_type = VNON;
782 		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
783 	}
784 	vn_finished_write(mp);
785 
786 unionfs_mkshadowdir_free_out:
787 	if (cn.cn_flags & HASBUF) {
788 		uma_zfree(namei_zone, cn.cn_pnbuf);
789 		cn.cn_flags &= ~HASBUF;
790 	}
791 
792 unionfs_mkshadowdir_abort:
793 	cnp->cn_cred = credbk;
794 	chgproccnt(cred->cr_ruidinfo, -1, 0);
795 	crfree(cred);
796 
797 	return (error);
798 }
799 
800 /*
801  * Create a new whiteout.
802  *
803  * dvp should be locked on entry and will be locked on return.
804  */
805 int
806 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
807 		   struct thread *td, char *path)
808 {
809 	int		error;
810 	struct vnode   *wvp;
811 	struct componentname cn;
812 	struct mount   *mp;
813 
814 	if (path == NULL)
815 		path = cnp->cn_nameptr;
816 
817 	wvp = NULLVP;
818 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
819 		return (error);
820 	if (wvp != NULLVP) {
821 		if (cn.cn_flags & HASBUF) {
822 			uma_zfree(namei_zone, cn.cn_pnbuf);
823 			cn.cn_flags &= ~HASBUF;
824 		}
825 		if (dvp == wvp)
826 			vrele(wvp);
827 		else
828 			vput(wvp);
829 
830 		return (EEXIST);
831 	}
832 
833 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
834 		goto unionfs_mkwhiteout_free_out;
835 	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
836 		error = VOP_WHITEOUT(dvp, &cn, CREATE);
837 
838 	vn_finished_write(mp);
839 
840 unionfs_mkwhiteout_free_out:
841 	if (cn.cn_flags & HASBUF) {
842 		uma_zfree(namei_zone, cn.cn_pnbuf);
843 		cn.cn_flags &= ~HASBUF;
844 	}
845 
846 	return (error);
847 }
848 
849 /*
850  * Create a new vnode for create a new shadow file.
851  *
852  * If an error is returned, *vpp will be invalid, otherwise it will hold a
853  * locked, referenced and opened vnode.
854  *
855  * unp is never updated.
856  */
857 static int
858 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
859 			   struct unionfs_node *unp, struct vattr *uvap,
860 			   struct thread *td)
861 {
862 	struct unionfs_mount *ump;
863 	struct vnode   *vp;
864 	struct vnode   *lvp;
865 	struct ucred   *cred;
866 	struct vattr	lva;
867 	int		fmode;
868 	int		error;
869 	struct componentname cn;
870 
871 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
872 	vp = NULLVP;
873 	lvp = unp->un_lowervp;
874 	cred = td->td_ucred;
875 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
876 	error = 0;
877 
878 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
879 		return (error);
880 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
881 
882 	if (unp->un_path == NULL)
883 		panic("unionfs: un_path is null");
884 
885 	cn.cn_namelen = strlen(unp->un_path);
886 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
887 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
888 	cn.cn_nameiop = CREATE;
889 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
890 	cn.cn_lkflags = LK_EXCLUSIVE;
891 	cn.cn_thread = td;
892 	cn.cn_cred = cred;
893 	cn.cn_nameptr = cn.cn_pnbuf;
894 	cn.cn_consume = 0;
895 
896 	vref(udvp);
897 	if ((error = relookup(udvp, &vp, &cn)) != 0)
898 		goto unionfs_vn_create_on_upper_free_out2;
899 	vrele(udvp);
900 
901 	if (vp != NULLVP) {
902 		if (vp == udvp)
903 			vrele(vp);
904 		else
905 			vput(vp);
906 		error = EEXIST;
907 		goto unionfs_vn_create_on_upper_free_out1;
908 	}
909 
910 	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
911 		goto unionfs_vn_create_on_upper_free_out1;
912 
913 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
914 		goto unionfs_vn_create_on_upper_free_out1;
915 
916 	if ((error = VOP_OPEN(vp, fmode, cred, td, -1)) != 0) {
917 		vput(vp);
918 		goto unionfs_vn_create_on_upper_free_out1;
919 	}
920 	vp->v_writecount++;
921 	*vpp = vp;
922 
923 unionfs_vn_create_on_upper_free_out1:
924 	VOP_UNLOCK(udvp, 0, td);
925 
926 unionfs_vn_create_on_upper_free_out2:
927 	if (cn.cn_flags & HASBUF) {
928 		uma_zfree(namei_zone, cn.cn_pnbuf);
929 		cn.cn_flags &= ~HASBUF;
930 	}
931 
932 	return (error);
933 }
934 
935 /*
936  * Copy from lvp to uvp.
937  *
938  * lvp and uvp should be locked and opened on entry and will be locked and
939  * opened on return.
940  */
941 static int
942 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
943 		      struct ucred *cred, struct thread *td)
944 {
945 	int		error;
946 	off_t		offset;
947 	int		count;
948 	int		bufoffset;
949 	char           *buf;
950 	struct uio	uio;
951 	struct iovec	iov;
952 
953 	error = 0;
954 	memset(&uio, 0, sizeof(uio));
955 
956 	uio.uio_td = td;
957 	uio.uio_segflg = UIO_SYSSPACE;
958 	uio.uio_offset = 0;
959 
960 	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
961 		return (error);
962 	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
963 		return (error);
964 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
965 
966 	while (error == 0) {
967 		offset = uio.uio_offset;
968 
969 		uio.uio_iov = &iov;
970 		uio.uio_iovcnt = 1;
971 		iov.iov_base = buf;
972 		iov.iov_len = MAXBSIZE;
973 		uio.uio_resid = iov.iov_len;
974 		uio.uio_rw = UIO_READ;
975 
976 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
977 			break;
978 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
979 			break;
980 
981 		bufoffset = 0;
982 		while (bufoffset < count) {
983 			uio.uio_iov = &iov;
984 			uio.uio_iovcnt = 1;
985 			iov.iov_base = buf + bufoffset;
986 			iov.iov_len = count - bufoffset;
987 			uio.uio_offset = offset + bufoffset;
988 			uio.uio_resid = iov.iov_len;
989 			uio.uio_rw = UIO_WRITE;
990 
991 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
992 				break;
993 
994 			bufoffset += (count - bufoffset) - uio.uio_resid;
995 		}
996 
997 		uio.uio_offset = offset + bufoffset;
998 	}
999 
1000 	free(buf, M_TEMP);
1001 
1002 	return (error);
1003 }
1004 
1005 /*
1006  * Copy file from lower to upper.
1007  *
1008  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1009  * docopy.
1010  *
1011  * If no error returned, unp will be updated.
1012  */
1013 int
1014 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1015 		 struct thread *td)
1016 {
1017 	int		error;
1018 	struct mount   *mp;
1019 	struct vnode   *udvp;
1020 	struct vnode   *lvp;
1021 	struct vnode   *uvp;
1022 	struct vattr	uva;
1023 
1024 	lvp = unp->un_lowervp;
1025 	uvp = NULLVP;
1026 
1027 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1028 		return (EROFS);
1029 	if (unp->un_dvp == NULLVP)
1030 		return (EINVAL);
1031 	if (unp->un_uppervp != NULLVP)
1032 		return (EEXIST);
1033 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1034 	if (udvp == NULLVP)
1035 		return (EROFS);
1036 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1037 		return (EROFS);
1038 
1039 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1040 	if (error != 0)
1041 		return (error);
1042 
1043 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1044 		return (error);
1045 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1046 	if (error != 0) {
1047 		vn_finished_write(mp);
1048 		return (error);
1049 	}
1050 
1051 	if (docopy != 0) {
1052 		error = VOP_OPEN(lvp, FREAD, cred, td, -1);
1053 		if (error == 0) {
1054 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1055 			VOP_CLOSE(lvp, FREAD, cred, td);
1056 		}
1057 	}
1058 	VOP_CLOSE(uvp, FWRITE, cred, td);
1059 	uvp->v_writecount--;
1060 
1061 	vn_finished_write(mp);
1062 
1063 	if (error == 0) {
1064 		/* Reset the attributes. Ignore errors. */
1065 		uva.va_type = VNON;
1066 		VOP_SETATTR(uvp, &uva, cred, td);
1067 	}
1068 
1069 	unionfs_node_update(unp, uvp, td);
1070 
1071 	return (error);
1072 }
1073 
1074 /*
1075  * It checks whether vp can rmdir. (check empty)
1076  *
1077  * vp is unionfs vnode.
1078  * vp should be locked.
1079  */
1080 int
1081 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1082 {
1083 	int		error;
1084 	int		eofflag;
1085 	int		lookuperr;
1086 	struct vnode   *uvp;
1087 	struct vnode   *lvp;
1088 	struct vnode   *tvp;
1089 	struct vattr	va;
1090 	struct componentname cn;
1091 	/*
1092 	 * The size of buf needs to be larger than DIRBLKSIZ.
1093 	 */
1094 	char		buf[256 * 6];
1095 	struct dirent  *dp;
1096 	struct dirent  *edp;
1097 	struct uio	uio;
1098 	struct iovec	iov;
1099 
1100 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1101 
1102 	eofflag = 0;
1103 	uvp = UNIONFSVPTOUPPERVP(vp);
1104 	lvp = UNIONFSVPTOLOWERVP(vp);
1105 
1106 	/* check opaque */
1107 	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
1108 		return (error);
1109 	if (va.va_flags & OPAQUE)
1110 		return (0);
1111 
1112 	/* open vnode */
1113 	if ((error = VOP_OPEN(vp, FREAD, cred, td, -1)) != 0)
1114 		return (error);
1115 
1116 	uio.uio_rw = UIO_READ;
1117 	uio.uio_segflg = UIO_SYSSPACE;
1118 	uio.uio_td = td;
1119 	uio.uio_offset = 0;
1120 
1121 #ifdef MAC
1122 	error = mac_check_vnode_readdir(td->td_ucred, lvp);
1123 #endif
1124 	while (!error && !eofflag) {
1125 		iov.iov_base = buf;
1126 		iov.iov_len = sizeof(buf);
1127 		uio.uio_iov = &iov;
1128 		uio.uio_iovcnt = 1;
1129 		uio.uio_resid = iov.iov_len;
1130 
1131 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1132 		if (error)
1133 			break;
1134 
1135 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1136 		for (dp = (struct dirent*)buf; !error && dp < edp;
1137 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1138 			if (dp->d_type == DT_WHT ||
1139 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1140 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1141 				continue;
1142 
1143 			cn.cn_namelen = dp->d_namlen;
1144 			cn.cn_pnbuf = NULL;
1145 			cn.cn_nameptr = dp->d_name;
1146 			cn.cn_nameiop = LOOKUP;
1147 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1148 			cn.cn_lkflags = LK_EXCLUSIVE;
1149 			cn.cn_thread = td;
1150 			cn.cn_cred = cred;
1151 			cn.cn_consume = 0;
1152 
1153 			/*
1154 			 * check entry in lower.
1155 			 * Sometimes, readdir function returns
1156 			 * wrong entry.
1157 			 */
1158 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1159 
1160 			if (!lookuperr)
1161 				vput(tvp);
1162 			else
1163 				continue; /* skip entry */
1164 
1165 			/*
1166 			 * check entry
1167 			 * If it has no exist/whiteout entry in upper,
1168 			 * directory is not empty.
1169 			 */
1170 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1171 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1172 
1173 			if (!lookuperr)
1174 				vput(tvp);
1175 
1176 			/* ignore exist or whiteout entry */
1177 			if (!lookuperr ||
1178 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1179 				continue;
1180 
1181 			error = ENOTEMPTY;
1182 		}
1183 	}
1184 
1185 	/* close vnode */
1186 	VOP_CLOSE(vp, FREAD, cred, td);
1187 
1188 	return (error);
1189 }
1190 
1191 #ifdef DIAGNOSTIC
1192 
1193 struct vnode   *
1194 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1195 {
1196 	struct unionfs_node *unp;
1197 
1198 	unp = VTOUNIONFS(vp);
1199 
1200 #ifdef notyet
1201 	if (vp->v_op != unionfs_vnodeop_p) {
1202 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1203 #ifdef KDB
1204 		kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n");
1205 #endif
1206 		panic("unionfs_checkuppervp");
1207 	};
1208 #endif
1209 	return (unp->un_uppervp);
1210 }
1211 
1212 struct vnode   *
1213 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1214 {
1215 	struct unionfs_node *unp;
1216 
1217 	unp = VTOUNIONFS(vp);
1218 
1219 #ifdef notyet
1220 	if (vp->v_op != unionfs_vnodeop_p) {
1221 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1222 #ifdef KDB
1223 		kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n");
1224 #endif
1225 		panic("unionfs_checklowervp");
1226 	};
1227 #endif
1228 	return (unp->un_lowervp);
1229 }
1230 #endif
1231