xref: /freebsd/sys/fs/unionfs/union_subr.c (revision f856af0466c076beef4ea9b15d088e1119a945b8)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #ifdef MAC
56 #include <sys/mac.h>
57 #endif
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 #define	NUNIONFSNODECACHE 32
64 
65 #define	UNIONFS_NHASH(upper, lower) \
66 	(&unionfs_node_hashtbl[(((uintptr_t)upper + (uintptr_t)lower) >> 8) & unionfs_node_hash])
67 
68 static LIST_HEAD(unionfs_node_hashhead, unionfs_node) *unionfs_node_hashtbl;
69 static u_long	unionfs_node_hash;
70 struct mtx	unionfs_hashmtx;
71 
72 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
73 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
74 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
75 
76 /*
77  * Initialize cache headers
78  */
79 int
80 unionfs_init(struct vfsconf *vfsp)
81 {
82 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
83 	unionfs_node_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, &unionfs_node_hash);
84 	mtx_init(&unionfs_hashmtx, "unionfs", NULL, MTX_DEF);
85 
86 	return (0);
87 }
88 
89 /*
90  * Destroy cache headers
91  */
92 int
93 unionfs_uninit(struct vfsconf *vfsp)
94 {
95 	mtx_destroy(&unionfs_hashmtx);
96 	free(unionfs_node_hashtbl, M_UNIONFSHASH);
97 	return (0);
98 }
99 
100 /*
101  * Return a VREF'ed alias for unionfs vnode if already exists, else 0.
102  */
103 static struct vnode *
104 unionfs_hashget(struct mount *mp, struct vnode *uppervp,
105 		struct vnode *lowervp, struct vnode *dvp, char *path,
106 		int lkflags, struct thread *td)
107 {
108 	struct unionfs_node_hashhead *hd;
109 	struct unionfs_node *unp;
110 	struct vnode   *vp;
111 	int error;
112 
113 	if (lkflags & LK_TYPE_MASK)
114 		lkflags |= LK_RETRY;
115 	hd = UNIONFS_NHASH(uppervp, lowervp);
116 
117 	mtx_lock(&unionfs_hashmtx);
118 	LIST_FOREACH(unp, hd, un_hash) {
119 		if (unp->un_uppervp == uppervp &&
120 		    unp->un_lowervp == lowervp &&
121 		    unp->un_dvp == dvp &&
122 		    UNIONFSTOV(unp)->v_mount == mp &&
123 		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
124 			vp = UNIONFSTOV(unp);
125 			VI_LOCK(vp);
126 			mtx_unlock(&unionfs_hashmtx);
127 			/*
128 			 * We need to clear the OWEINACT flag here as this
129 			 * may lead vget() to try to lock our vnode which is
130 			 * already locked via vp.
131 			 */
132 			vp->v_iflag &= ~VI_OWEINACT;
133 			error = vget(vp, LK_INTERLOCK, td);
134 			if (error != 0)
135 				panic("unionfs_hashget: vget error %d", error);
136 			if (lkflags & LK_TYPE_MASK)
137 				vn_lock(vp, lkflags, td);
138 			return (vp);
139 		}
140 	}
141 
142 	mtx_unlock(&unionfs_hashmtx);
143 
144 	return (NULLVP);
145 }
146 
147 /*
148  * Act like unionfs_hashget, but add passed unionfs_node to hash if no existing
149  * node found.
150  */
151 static struct vnode *
152 unionfs_hashins(struct mount *mp, struct unionfs_node *uncp,
153 		char *path, int lkflags, struct thread *td)
154 {
155 	struct unionfs_node_hashhead *hd;
156 	struct unionfs_node *unp;
157 	struct vnode   *vp;
158 	int error;
159 
160 	if (lkflags & LK_TYPE_MASK)
161 		lkflags |= LK_RETRY;
162 	hd = UNIONFS_NHASH(uncp->un_uppervp, uncp->un_lowervp);
163 
164 	mtx_lock(&unionfs_hashmtx);
165 	LIST_FOREACH(unp, hd, un_hash) {
166 		if (unp->un_uppervp == uncp->un_uppervp &&
167 		    unp->un_lowervp == uncp->un_lowervp &&
168 		    unp->un_dvp == uncp->un_dvp &&
169 		    UNIONFSTOV(unp)->v_mount == mp &&
170 		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
171 			vp = UNIONFSTOV(unp);
172 			VI_LOCK(vp);
173 			mtx_unlock(&unionfs_hashmtx);
174 			vp->v_iflag &= ~VI_OWEINACT;
175 			error = vget(vp, LK_INTERLOCK, td);
176 			if (error)
177 				panic("unionfs_hashins: vget error %d", error);
178 			if (lkflags & LK_TYPE_MASK)
179 				vn_lock(vp, lkflags, td);
180 			return (vp);
181 		}
182 	}
183 
184 	LIST_INSERT_HEAD(hd, uncp, un_hash);
185 	uncp->un_flag |= UNIONFS_CACHED;
186 	mtx_unlock(&unionfs_hashmtx);
187 
188 	return (NULLVP);
189 }
190 
191 /*
192  * Make a new or get existing unionfs node.
193  *
194  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
195  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
196  * you should not lock plurality simultaneously.
197  */
198 int
199 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
200 		struct vnode *lowervp, struct vnode *dvp,
201 		struct vnode **vpp, struct componentname *cnp,
202 		struct thread *td)
203 {
204 	struct unionfs_mount *ump;
205 	struct unionfs_node *unp;
206 	struct vnode   *vp;
207 	int		error;
208 	int		lkflags;
209 	char	       *path;
210 
211 	ump = MOUNTTOUNIONFSMOUNT(mp);
212 	lkflags = (cnp ? cnp->cn_lkflags : 0);
213 	path = (cnp ? cnp->cn_nameptr : "");
214 
215 	if (uppervp == NULLVP && lowervp == NULLVP)
216 		panic("unionfs_nodeget: upper and lower is null");
217 
218 	/* If it has no ISLASTCN flag, path check is skipped. */
219 	if (!cnp || !(cnp->cn_flags & ISLASTCN))
220 		path = NULL;
221 
222 	/* Lookup the hash first. */
223 	*vpp = unionfs_hashget(mp, uppervp, lowervp, dvp, path, lkflags, td);
224 	if (*vpp != NULLVP)
225 		return (0);
226 
227 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
228 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
229 		if (dvp == NULLVP)
230 			return (EINVAL);
231 	}
232 
233 	/*
234 	 * Do the MALLOC before the getnewvnode since doing so afterward
235 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
236 	 * if MALLOC should block.
237 	 */
238 	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
239 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
240 
241 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
242 	if (error) {
243 		FREE(unp, M_UNIONFSNODE);
244 		return (error);
245 	}
246 	if (dvp != NULLVP)
247 		vref(dvp);
248 	if (uppervp != NULLVP)
249 		vref(uppervp);
250 	if (lowervp != NULLVP)
251 		vref(lowervp);
252 
253 	unp->un_vnode = vp;
254 	unp->un_uppervp = uppervp;
255 	unp->un_lowervp = lowervp;
256 	unp->un_dvp = dvp;
257 	if (uppervp != NULLVP)
258 		vp->v_vnlock = uppervp->v_vnlock;
259 	else
260 		vp->v_vnlock = lowervp->v_vnlock;
261 
262 	if (cnp) {
263 		unp->un_path = (char *)
264 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK | M_ZERO);
265 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
266 		unp->un_path[cnp->cn_namelen] = '\0';
267 	}
268 	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
269 	vp->v_data = unp;
270 
271 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
272 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
273 		vp->v_vflag |= VV_ROOT;
274 
275 	*vpp = unionfs_hashins(mp, unp, path, lkflags, td);
276 	if (*vpp != NULLVP) {
277 		if (dvp != NULLVP)
278 			vrele(dvp);
279 		if (uppervp != NULLVP)
280 			vrele(uppervp);
281 		if (lowervp != NULLVP)
282 			vrele(lowervp);
283 
284 		unp->un_uppervp = NULLVP;
285 		unp->un_lowervp = NULLVP;
286 		unp->un_dvp = NULLVP;
287 		vrele(vp);
288 
289 		return (0);
290 	}
291 
292 	if (lkflags & LK_TYPE_MASK)
293 		vn_lock(vp, lkflags | LK_RETRY, td);
294 
295 	*vpp = vp;
296 
297 	return (0);
298 }
299 
300 /*
301  * Remove node from hash.
302  */
303 void
304 unionfs_hashrem(struct vnode *vp, struct thread *td)
305 {
306 	int		vfslocked;
307 	struct unionfs_node *unp;
308 	struct unionfs_node_status *unsp, *unsp_tmp;
309 	struct vnode   *lvp;
310 	struct vnode   *uvp;
311 
312 	/*
313 	 * Use the interlock to protect the clearing of v_data to
314 	 * prevent faults in unionfs_lock().
315 	 */
316 	VI_LOCK(vp);
317 	unp = VTOUNIONFS(vp);
318 	lvp = unp->un_lowervp;
319 	uvp = unp->un_uppervp;
320 	unp->un_lowervp = unp->un_uppervp = NULLVP;
321 
322 	vp->v_vnlock = &(vp->v_lock);
323 	vp->v_data = NULL;
324 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
325 	if (lvp != NULLVP)
326 		VOP_UNLOCK(lvp, 0, td);
327 	if (uvp != NULLVP)
328 		VOP_UNLOCK(uvp, 0, td);
329 
330 	mtx_lock(&unionfs_hashmtx);
331 	if (unp->un_flag & UNIONFS_CACHED) {
332 		LIST_REMOVE(unp, un_hash);
333 		unp->un_flag &= ~UNIONFS_CACHED;
334 	}
335 	mtx_unlock(&unionfs_hashmtx);
336 	vp->v_object = NULL;
337 
338 	if (lvp != NULLVP) {
339 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
340 		vrele(lvp);
341 		VFS_UNLOCK_GIANT(vfslocked);
342 	}
343 	if (uvp != NULLVP) {
344 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
345 		vrele(uvp);
346 		VFS_UNLOCK_GIANT(vfslocked);
347 	}
348 	if (unp->un_dvp != NULLVP) {
349 		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
350 		vrele(unp->un_dvp);
351 		VFS_UNLOCK_GIANT(vfslocked);
352 		unp->un_dvp = NULLVP;
353 	}
354 	if (unp->un_path) {
355 		free(unp->un_path, M_UNIONFSPATH);
356 		unp->un_path = NULL;
357 	}
358 
359 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
360 		LIST_REMOVE(unsp, uns_list);
361 		free(unsp, M_TEMP);
362 	}
363 	FREE(unp, M_UNIONFSNODE);
364 }
365 
366 /*
367  * Get the unionfs node status.
368  * You need exclusive lock this vnode.
369  */
370 void
371 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
372 			struct unionfs_node_status **unspp)
373 {
374 	struct unionfs_node_status *unsp;
375 
376 	KASSERT(NULL != unspp, ("null pointer"));
377 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
378 
379 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
380 		if (unsp->uns_tid == td->td_tid) {
381 			*unspp = unsp;
382 			return;
383 		}
384 	}
385 
386 	/* create a new unionfs node status */
387 	MALLOC(unsp, struct unionfs_node_status *,
388 	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
389 
390 	unsp->uns_tid = td->td_tid;
391 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
392 
393 	*unspp = unsp;
394 }
395 
396 /*
397  * Remove the unionfs node status, if you can.
398  * You need exclusive lock this vnode.
399  */
400 void
401 unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
402 			   struct unionfs_node_status *unsp)
403 {
404 	KASSERT(NULL != unsp, ("null pointer"));
405 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
406 
407 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
408 		return;
409 
410 	LIST_REMOVE(unsp, uns_list);
411 	free(unsp, M_TEMP);
412 }
413 
414 /*
415  * Create upper node attr.
416  */
417 void
418 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
419 			       struct vattr *lva,
420 			       struct vattr *uva,
421 			       struct thread *td)
422 {
423 	VATTR_NULL(uva);
424 	uva->va_type = lva->va_type;
425 	uva->va_atime = lva->va_atime;
426 	uva->va_mtime = lva->va_mtime;
427 	uva->va_ctime = lva->va_ctime;
428 
429 	switch (ump->um_copymode) {
430 	case UNIONFS_TRANSPARENT:
431 		uva->va_mode = lva->va_mode;
432 		uva->va_uid = lva->va_uid;
433 		uva->va_gid = lva->va_gid;
434 		break;
435 	case UNIONFS_MASQUERADE:
436 		if (ump->um_uid == lva->va_uid) {
437 			uva->va_mode = lva->va_mode & 077077;
438 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
439 			uva->va_uid = lva->va_uid;
440 			uva->va_gid = lva->va_gid;
441 		} else {
442 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
443 			uva->va_uid = ump->um_uid;
444 			uva->va_gid = ump->um_gid;
445 		}
446 		break;
447 	default:		/* UNIONFS_TRADITIONAL */
448 		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
449 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
450 		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
451 		uva->va_uid = ump->um_uid;
452 		uva->va_gid = ump->um_gid;
453 		break;
454 	}
455 }
456 
457 /*
458  * Create upper node attr.
459  */
460 int
461 unionfs_create_uppervattr(struct unionfs_mount *ump,
462 			  struct vnode *lvp,
463 			  struct vattr *uva,
464 			  struct ucred *cred,
465 			  struct thread *td)
466 {
467 	int		error;
468 	struct vattr	lva;
469 
470 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
471 		return (error);
472 
473 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
474 
475 	return (error);
476 }
477 
478 /*
479  * relookup
480  *
481  * dvp should be locked on entry and will be locked on return.
482  *
483  * If an error is returned, *vpp will be invalid, otherwise it will hold a
484  * locked, referenced vnode. If *vpp == dvp then remember that only one
485  * LK_EXCLUSIVE lock is held.
486  */
487 static int
488 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
489 		 struct componentname *cnp, struct componentname *cn,
490 		 struct thread *td, char *path, int pathlen, u_long nameiop)
491 {
492 	int	error;
493 
494 	cn->cn_namelen = pathlen;
495 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
496 	bcopy(path, cn->cn_pnbuf, pathlen);
497 	cn->cn_pnbuf[pathlen] = '\0';
498 
499 	cn->cn_nameiop = nameiop;
500 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
501 	cn->cn_lkflags = LK_EXCLUSIVE;
502 	cn->cn_thread = td;
503 	cn->cn_cred = cnp->cn_cred;
504 
505 	cn->cn_nameptr = cn->cn_pnbuf;
506 	cn->cn_consume = cnp->cn_consume;
507 
508 	if (nameiop == DELETE)
509 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
510 	else if (RENAME == nameiop)
511 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
512 
513 	vref(dvp);
514 	VOP_UNLOCK(dvp, 0, td);
515 
516 	if ((error = relookup(dvp, vpp, cn))) {
517 		uma_zfree(namei_zone, cn->cn_pnbuf);
518 		cn->cn_flags &= ~HASBUF;
519 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
520 	} else
521 		vrele(dvp);
522 
523 	return (error);
524 }
525 
526 /*
527  * relookup for CREATE namei operation.
528  *
529  * dvp is unionfs vnode. dvp should be locked.
530  *
531  * If it called 'unionfs_copyfile' function by unionfs_link etc,
532  * VOP_LOOKUP information is broken.
533  * So it need relookup in order to create link etc.
534  */
535 int
536 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
537 			    struct thread *td)
538 {
539 	int	error;
540 	struct vnode *udvp;
541 	struct vnode *vp;
542 	struct componentname cn;
543 
544 	udvp = UNIONFSVPTOUPPERVP(dvp);
545 	vp = NULLVP;
546 
547 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
548 	    strlen(cnp->cn_nameptr), CREATE);
549 	if (error)
550 		return (error);
551 
552 	if (vp != NULLVP) {
553 		if (udvp == vp)
554 			vrele(vp);
555 		else
556 			vput(vp);
557 
558 		error = EEXIST;
559 	}
560 
561 	if (cn.cn_flags & HASBUF) {
562 		uma_zfree(namei_zone, cn.cn_pnbuf);
563 		cn.cn_flags &= ~HASBUF;
564 	}
565 
566 	if (!error) {
567 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
568 		cnp->cn_flags = cn.cn_flags;
569 	}
570 
571 	return (error);
572 }
573 
574 /*
575  * relookup for DELETE namei operation.
576  *
577  * dvp is unionfs vnode. dvp should be locked.
578  */
579 int
580 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
581 			    struct thread *td)
582 {
583 	int	error;
584 	struct vnode *udvp;
585 	struct vnode *vp;
586 	struct componentname cn;
587 
588 	udvp = UNIONFSVPTOUPPERVP(dvp);
589 	vp = NULLVP;
590 
591 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
592 	    strlen(cnp->cn_nameptr), DELETE);
593 	if (error)
594 		return (error);
595 
596 	if (vp == NULLVP)
597 		error = ENOENT;
598 	else {
599 		if (udvp == vp)
600 			vrele(vp);
601 		else
602 			vput(vp);
603 	}
604 
605 	if (cn.cn_flags & HASBUF) {
606 		uma_zfree(namei_zone, cn.cn_pnbuf);
607 		cn.cn_flags &= ~HASBUF;
608 	}
609 
610 	if (!error) {
611 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
612 		cnp->cn_flags = cn.cn_flags;
613 	}
614 
615 	return (error);
616 }
617 
618 /*
619  * relookup for RENAME namei operation.
620  *
621  * dvp is unionfs vnode. dvp should be locked.
622  */
623 int
624 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
625 			    struct thread *td)
626 {
627 	int error;
628 	struct vnode *udvp;
629 	struct vnode *vp;
630 	struct componentname cn;
631 
632 	udvp = UNIONFSVPTOUPPERVP(dvp);
633 	vp = NULLVP;
634 
635 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
636 	    strlen(cnp->cn_nameptr), RENAME);
637 	if (error)
638 		return (error);
639 
640 	if (vp != NULLVP) {
641 		if (udvp == vp)
642 			vrele(vp);
643 		else
644 			vput(vp);
645 	}
646 
647 	if (cn.cn_flags & HASBUF) {
648 		uma_zfree(namei_zone, cn.cn_pnbuf);
649 		cn.cn_flags &= ~HASBUF;
650 	}
651 
652 	if (!error) {
653 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
654 		cnp->cn_flags = cn.cn_flags;
655 	}
656 
657 	return (error);
658 
659 }
660 
661 /*
662  * Update the unionfs_node.
663  *
664  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
665  * uvp's lock and lower's lock will be unlocked.
666  */
667 static void
668 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
669 		    struct thread *td)
670 {
671 	int		count, lockcnt;
672 	struct vnode   *vp;
673 	struct vnode   *lvp;
674 
675 	vp = UNIONFSTOV(unp);
676 	lvp = unp->un_lowervp;
677 
678 	/*
679 	 * lock update
680 	 */
681 	VI_LOCK(vp);
682 	unp->un_uppervp = uvp;
683 	vp->v_vnlock = uvp->v_vnlock;
684 	lockcnt = lvp->v_vnlock->lk_exclusivecount;
685 	if (lockcnt <= 0)
686 		panic("unionfs: no exclusive lock");
687 	VI_UNLOCK(vp);
688 	for (count = 1; count < lockcnt; count++)
689 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
690 
691 	/*
692 	 * cache update
693 	 */
694 	mtx_lock(&unionfs_hashmtx);
695 	if (unp->un_flag & UNIONFS_CACHED)
696 		LIST_REMOVE(unp, un_hash);
697 	LIST_INSERT_HEAD(UNIONFS_NHASH(uvp, lvp), unp, un_hash);
698 	unp->un_flag |= UNIONFS_CACHED;
699 	mtx_unlock(&unionfs_hashmtx);
700 }
701 
702 /*
703  * Create a new shadow dir.
704  *
705  * udvp should be locked on entry and will be locked on return.
706  *
707  * If no error returned, unp will be updated.
708  */
709 int
710 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
711 		    struct unionfs_node *unp, struct componentname *cnp,
712 		    struct thread *td)
713 {
714 	int		error;
715 	struct vnode   *lvp;
716 	struct vnode   *uvp;
717 	struct vattr	va;
718 	struct vattr	lva;
719 	struct componentname cn;
720 	struct mount   *mp;
721 	struct ucred   *cred;
722 	struct ucred   *credbk;
723 	struct uidinfo *rootinfo;
724 
725 	if (unp->un_uppervp != NULLVP)
726 		return (EEXIST);
727 
728 	lvp = unp->un_lowervp;
729 	uvp = NULLVP;
730 	credbk = cnp->cn_cred;
731 
732 	/* Authority change to root */
733 	rootinfo = uifind((uid_t)0);
734 	cred = crdup(cnp->cn_cred);
735 	chgproccnt(cred->cr_ruidinfo, 1, 0);
736 	change_euid(cred, rootinfo);
737 	change_ruid(cred, rootinfo);
738 	change_svuid(cred, (uid_t)0);
739 	uifree(rootinfo);
740 	cnp->cn_cred = cred;
741 
742 	memset(&cn, 0, sizeof(cn));
743 
744 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
745 		goto unionfs_mkshadowdir_abort;
746 
747 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
748 		goto unionfs_mkshadowdir_abort;
749 	if (uvp != NULLVP) {
750 		if (udvp == uvp)
751 			vrele(uvp);
752 		else
753 			vput(uvp);
754 
755 		error = EEXIST;
756 		goto unionfs_mkshadowdir_free_out;
757 	}
758 
759 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
760 		goto unionfs_mkshadowdir_free_out;
761 	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
762 		vn_finished_write(mp);
763 		goto unionfs_mkshadowdir_free_out;
764 	}
765 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
766 
767 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
768 
769 	if (!error) {
770 		unionfs_node_update(unp, uvp, td);
771 
772 		/*
773 		 * XXX The bug which cannot set uid/gid was corrected.
774 		 * Ignore errors.
775 		 */
776 		va.va_type = VNON;
777 		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
778 	}
779 	vn_finished_write(mp);
780 
781 unionfs_mkshadowdir_free_out:
782 	if (cn.cn_flags & HASBUF) {
783 		uma_zfree(namei_zone, cn.cn_pnbuf);
784 		cn.cn_flags &= ~HASBUF;
785 	}
786 
787 unionfs_mkshadowdir_abort:
788 	cnp->cn_cred = credbk;
789 	chgproccnt(cred->cr_ruidinfo, -1, 0);
790 	crfree(cred);
791 
792 	return (error);
793 }
794 
795 /*
796  * Create a new whiteout.
797  *
798  * dvp should be locked on entry and will be locked on return.
799  */
800 int
801 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
802 		   struct thread *td, char *path)
803 {
804 	int		error;
805 	struct vnode   *wvp;
806 	struct componentname cn;
807 	struct mount   *mp;
808 
809 	if (path == NULL)
810 		path = cnp->cn_nameptr;
811 
812 	wvp = NULLVP;
813 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
814 		return (error);
815 	if (wvp != NULLVP) {
816 		if (cn.cn_flags & HASBUF) {
817 			uma_zfree(namei_zone, cn.cn_pnbuf);
818 			cn.cn_flags &= ~HASBUF;
819 		}
820 		if (dvp == wvp)
821 			vrele(wvp);
822 		else
823 			vput(wvp);
824 
825 		return (EEXIST);
826 	}
827 
828 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
829 		goto unionfs_mkwhiteout_free_out;
830 	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
831 		error = VOP_WHITEOUT(dvp, &cn, CREATE);
832 
833 	vn_finished_write(mp);
834 
835 unionfs_mkwhiteout_free_out:
836 	if (cn.cn_flags & HASBUF) {
837 		uma_zfree(namei_zone, cn.cn_pnbuf);
838 		cn.cn_flags &= ~HASBUF;
839 	}
840 
841 	return (error);
842 }
843 
844 /*
845  * Create a new vnode for create a new shadow file.
846  *
847  * If an error is returned, *vpp will be invalid, otherwise it will hold a
848  * locked, referenced and opened vnode.
849  *
850  * unp is never updated.
851  */
852 static int
853 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
854 			   struct unionfs_node *unp, struct vattr *uvap,
855 			   struct thread *td)
856 {
857 	struct unionfs_mount *ump;
858 	struct vnode   *vp;
859 	struct vnode   *lvp;
860 	struct ucred   *cred;
861 	struct vattr	lva;
862 	int		fmode;
863 	int		error;
864 	struct componentname cn;
865 
866 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
867 	vp = NULLVP;
868 	lvp = unp->un_lowervp;
869 	cred = td->td_ucred;
870 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
871 	error = 0;
872 
873 	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
874 		return (error);
875 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
876 
877 	if (unp->un_path == NULL)
878 		panic("unionfs: un_path is null");
879 
880 	cn.cn_namelen = strlen(unp->un_path);
881 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
882 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
883 	cn.cn_nameiop = CREATE;
884 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
885 	cn.cn_lkflags = LK_EXCLUSIVE;
886 	cn.cn_thread = td;
887 	cn.cn_cred = cred;
888 	cn.cn_nameptr = cn.cn_pnbuf;
889 	cn.cn_consume = 0;
890 
891 	vref(udvp);
892 	if ((error = relookup(udvp, &vp, &cn)) != 0)
893 		goto unionfs_vn_create_on_upper_free_out2;
894 	vrele(udvp);
895 
896 	if (vp != NULLVP) {
897 		if (vp == udvp)
898 			vrele(vp);
899 		else
900 			vput(vp);
901 		error = EEXIST;
902 		goto unionfs_vn_create_on_upper_free_out1;
903 	}
904 
905 	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
906 		goto unionfs_vn_create_on_upper_free_out1;
907 
908 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
909 		goto unionfs_vn_create_on_upper_free_out1;
910 
911 	if ((error = VOP_OPEN(vp, fmode, cred, td, -1)) != 0) {
912 		vput(vp);
913 		goto unionfs_vn_create_on_upper_free_out1;
914 	}
915 	vp->v_writecount++;
916 	*vpp = vp;
917 
918 unionfs_vn_create_on_upper_free_out1:
919 	VOP_UNLOCK(udvp, 0, td);
920 
921 unionfs_vn_create_on_upper_free_out2:
922 	if (cn.cn_flags & HASBUF) {
923 		uma_zfree(namei_zone, cn.cn_pnbuf);
924 		cn.cn_flags &= ~HASBUF;
925 	}
926 
927 	return (error);
928 }
929 
930 /*
931  * Copy from lvp to uvp.
932  *
933  * lvp and uvp should be locked and opened on entry and will be locked and
934  * opened on return.
935  */
936 static int
937 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
938 		      struct ucred *cred, struct thread *td)
939 {
940 	int		error;
941 	off_t		offset;
942 	int		count;
943 	int		bufoffset;
944 	char           *buf;
945 	struct uio	uio;
946 	struct iovec	iov;
947 
948 	error = 0;
949 	memset(&uio, 0, sizeof(uio));
950 
951 	uio.uio_td = td;
952 	uio.uio_segflg = UIO_SYSSPACE;
953 	uio.uio_offset = 0;
954 
955 	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
956 		return (error);
957 	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
958 		return (error);
959 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
960 
961 	while (error == 0) {
962 		offset = uio.uio_offset;
963 
964 		uio.uio_iov = &iov;
965 		uio.uio_iovcnt = 1;
966 		iov.iov_base = buf;
967 		iov.iov_len = MAXBSIZE;
968 		uio.uio_resid = iov.iov_len;
969 		uio.uio_rw = UIO_READ;
970 
971 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
972 			break;
973 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
974 			break;
975 
976 		bufoffset = 0;
977 		while (bufoffset < count) {
978 			uio.uio_iov = &iov;
979 			uio.uio_iovcnt = 1;
980 			iov.iov_base = buf + bufoffset;
981 			iov.iov_len = count - bufoffset;
982 			uio.uio_offset = offset + bufoffset;
983 			uio.uio_resid = iov.iov_len;
984 			uio.uio_rw = UIO_WRITE;
985 
986 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
987 				break;
988 
989 			bufoffset += (count - bufoffset) - uio.uio_resid;
990 		}
991 
992 		uio.uio_offset = offset + bufoffset;
993 	}
994 
995 	free(buf, M_TEMP);
996 
997 	return (error);
998 }
999 
1000 /*
1001  * Copy file from lower to upper.
1002  *
1003  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1004  * docopy.
1005  *
1006  * If no error returned, unp will be updated.
1007  */
1008 int
1009 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1010 		 struct thread *td)
1011 {
1012 	int		error;
1013 	struct mount   *mp;
1014 	struct vnode   *udvp;
1015 	struct vnode   *lvp;
1016 	struct vnode   *uvp;
1017 	struct vattr	uva;
1018 
1019 	lvp = unp->un_lowervp;
1020 	uvp = NULLVP;
1021 
1022 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1023 		return (EROFS);
1024 	if (unp->un_dvp == NULLVP)
1025 		return (EINVAL);
1026 	if (unp->un_uppervp != NULLVP)
1027 		return (EEXIST);
1028 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1029 	if (udvp == NULLVP)
1030 		return (EROFS);
1031 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1032 		return (EROFS);
1033 
1034 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1035 	if (error != 0)
1036 		return (error);
1037 
1038 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1039 		return (error);
1040 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1041 	if (error != 0) {
1042 		vn_finished_write(mp);
1043 		return (error);
1044 	}
1045 
1046 	if (docopy != 0) {
1047 		error = VOP_OPEN(lvp, FREAD, cred, td, -1);
1048 		if (error == 0) {
1049 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1050 			VOP_CLOSE(lvp, FREAD, cred, td);
1051 		}
1052 	}
1053 	VOP_CLOSE(uvp, FWRITE, cred, td);
1054 	uvp->v_writecount--;
1055 
1056 	vn_finished_write(mp);
1057 
1058 	if (error == 0) {
1059 		/* Reset the attributes. Ignore errors. */
1060 		uva.va_type = VNON;
1061 		VOP_SETATTR(uvp, &uva, cred, td);
1062 	}
1063 
1064 	unionfs_node_update(unp, uvp, td);
1065 
1066 	return (error);
1067 }
1068 
1069 /*
1070  * It checks whether vp can rmdir. (check empty)
1071  *
1072  * vp is unionfs vnode.
1073  * vp should be locked.
1074  */
1075 int
1076 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1077 {
1078 	int		error;
1079 	int		eofflag;
1080 	int		lookuperr;
1081 	struct vnode   *uvp;
1082 	struct vnode   *lvp;
1083 	struct vnode   *tvp;
1084 	struct vattr	va;
1085 	struct componentname cn;
1086 	/*
1087 	 * The size of buf needs to be larger than DIRBLKSIZ.
1088 	 */
1089 	char		buf[256 * 6];
1090 	struct dirent  *dp;
1091 	struct dirent  *edp;
1092 	struct uio	uio;
1093 	struct iovec	iov;
1094 
1095 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1096 
1097 	eofflag = 0;
1098 	uvp = UNIONFSVPTOUPPERVP(vp);
1099 	lvp = UNIONFSVPTOLOWERVP(vp);
1100 
1101 	/* check opaque */
1102 	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
1103 		return (error);
1104 	if (va.va_flags & OPAQUE)
1105 		return (0);
1106 
1107 	/* open vnode */
1108 	if ((error = VOP_OPEN(vp, FREAD, cred, td, -1)) != 0)
1109 		return (error);
1110 
1111 	uio.uio_rw = UIO_READ;
1112 	uio.uio_segflg = UIO_SYSSPACE;
1113 	uio.uio_td = td;
1114 	uio.uio_offset = 0;
1115 
1116 #ifdef MAC
1117 	error = mac_check_vnode_readdir(td->td_ucred, lvp);
1118 #endif
1119 	while (!error && !eofflag) {
1120 		iov.iov_base = buf;
1121 		iov.iov_len = sizeof(buf);
1122 		uio.uio_iov = &iov;
1123 		uio.uio_iovcnt = 1;
1124 		uio.uio_resid = iov.iov_len;
1125 
1126 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1127 		if (error)
1128 			break;
1129 
1130 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1131 		for (dp = (struct dirent*)buf; !error && dp < edp;
1132 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1133 			if (dp->d_type == DT_WHT ||
1134 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1135 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1136 				continue;
1137 
1138 			cn.cn_namelen = dp->d_namlen;
1139 			cn.cn_pnbuf = NULL;
1140 			cn.cn_nameptr = dp->d_name;
1141 			cn.cn_nameiop = LOOKUP;
1142 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1143 			cn.cn_lkflags = LK_EXCLUSIVE;
1144 			cn.cn_thread = td;
1145 			cn.cn_cred = cred;
1146 			cn.cn_consume = 0;
1147 
1148 			/*
1149 			 * check entry in lower.
1150 			 * Sometimes, readdir function returns
1151 			 * wrong entry.
1152 			 */
1153 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1154 
1155 			if (!lookuperr)
1156 				vput(tvp);
1157 			else
1158 				continue; /* skip entry */
1159 
1160 			/*
1161 			 * check entry
1162 			 * If it has no exist/whiteout entry in upper,
1163 			 * directory is not empty.
1164 			 */
1165 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1166 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1167 
1168 			if (!lookuperr)
1169 				vput(tvp);
1170 
1171 			/* ignore exist or whiteout entry */
1172 			if (!lookuperr ||
1173 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1174 				continue;
1175 
1176 			error = ENOTEMPTY;
1177 		}
1178 	}
1179 
1180 	/* close vnode */
1181 	VOP_CLOSE(vp, FREAD, cred, td);
1182 
1183 	return (error);
1184 }
1185 
1186 #ifdef DIAGNOSTIC
1187 
1188 struct vnode   *
1189 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1190 {
1191 	struct unionfs_node *unp;
1192 
1193 	unp = VTOUNIONFS(vp);
1194 
1195 #ifdef notyet
1196 	if (vp->v_op != unionfs_vnodeop_p) {
1197 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1198 #ifdef KDB
1199 		kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n");
1200 #endif
1201 		panic("unionfs_checkuppervp");
1202 	};
1203 #endif
1204 	return (unp->un_uppervp);
1205 }
1206 
1207 struct vnode   *
1208 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1209 {
1210 	struct unionfs_node *unp;
1211 
1212 	unp = VTOUNIONFS(vp);
1213 
1214 #ifdef notyet
1215 	if (vp->v_op != unionfs_vnodeop_p) {
1216 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1217 #ifdef KDB
1218 		kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n");
1219 #endif
1220 		panic("unionfs_checklowervp");
1221 	};
1222 #endif
1223 	return (unp->un_lowervp);
1224 }
1225 #endif
1226