xref: /freebsd/sys/fs/unionfs/union_subr.c (revision a10cee30c94cf5944826d2a495e9cdf339dfbcc8)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #include <security/mac/mac_framework.h>
56 
57 #include <vm/uma.h>
58 
59 #include <fs/unionfs/union.h>
60 
61 #define NUNIONFSNODECACHE 16
62 
63 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
64 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
65 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
66 
67 /*
68  * Initialize
69  */
70 int
71 unionfs_init(struct vfsconf *vfsp)
72 {
73 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
74 	return (0);
75 }
76 
77 /*
78  * Uninitialize
79  */
80 int
81 unionfs_uninit(struct vfsconf *vfsp)
82 {
83 	return (0);
84 }
85 
86 static struct unionfs_node_hashhead *
87 unionfs_get_hashhead(struct vnode *dvp, char *path)
88 {
89 	int		count;
90 	char		hash;
91 	struct unionfs_node *unp;
92 
93 	hash = 0;
94 	unp = VTOUNIONFS(dvp);
95 	if (path != NULL) {
96 		for (count = 0; path[count]; count++)
97 			hash += path[count];
98 	}
99 
100 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
101 }
102 
103 /*
104  * Get the cached vnode.
105  */
106 static struct vnode *
107 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
108 			struct vnode *dvp, char *path)
109 {
110 	struct unionfs_node_hashhead *hd;
111 	struct unionfs_node *unp;
112 	struct vnode   *vp;
113 
114 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
115 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
116 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
117 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118 
119 	VI_LOCK(dvp);
120 	hd = unionfs_get_hashhead(dvp, path);
121 	LIST_FOREACH(unp, hd, un_hash) {
122 		if (!strcmp(unp->un_path, path)) {
123 			vp = UNIONFSTOV(unp);
124 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
125 			VI_UNLOCK(dvp);
126 			vp->v_iflag &= ~VI_OWEINACT;
127 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
128 				VI_UNLOCK(vp);
129 				vp = NULLVP;
130 			} else
131 				VI_UNLOCK(vp);
132 			return (vp);
133 		}
134 	}
135 	VI_UNLOCK(dvp);
136 
137 	return (NULLVP);
138 }
139 
140 /*
141  * Add the new vnode into cache.
142  */
143 static struct vnode *
144 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
145 			struct vnode *dvp, char *path)
146 {
147 	struct unionfs_node_hashhead *hd;
148 	struct unionfs_node *unp;
149 	struct vnode   *vp;
150 
151 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
152 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
153 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
154 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155 
156 	VI_LOCK(dvp);
157 	hd = unionfs_get_hashhead(dvp, path);
158 	LIST_FOREACH(unp, hd, un_hash) {
159 		if (!strcmp(unp->un_path, path)) {
160 			vp = UNIONFSTOV(unp);
161 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
162 			vp->v_iflag &= ~VI_OWEINACT;
163 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
164 				LIST_INSERT_HEAD(hd, uncp, un_hash);
165 				VI_UNLOCK(vp);
166 				vp = NULLVP;
167 			} else
168 				VI_UNLOCK(vp);
169 			VI_UNLOCK(dvp);
170 			return (vp);
171 		}
172 	}
173 
174 	LIST_INSERT_HEAD(hd, uncp, un_hash);
175 	VI_UNLOCK(dvp);
176 
177 	return (NULLVP);
178 }
179 
180 /*
181  * Remove the vnode.
182  */
183 static void
184 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
185 {
186 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
187 	KASSERT((dvp != NULLVP),
188 	    ("unionfs_rem_cached_vnode: null parent vnode"));
189 	KASSERT((unp->un_hash.le_prev != NULL),
190 	    ("unionfs_rem_cached_vnode: null hash"));
191 
192 	VI_LOCK(dvp);
193 	LIST_REMOVE(unp, un_hash);
194 	unp->un_hash.le_next = NULL;
195 	unp->un_hash.le_prev = NULL;
196 	VI_UNLOCK(dvp);
197 }
198 
199 /*
200  * Make a new or get existing unionfs node.
201  *
202  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
203  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
204  * you should not lock plurality simultaneously.
205  */
206 int
207 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
208 		struct vnode *lowervp, struct vnode *dvp,
209 		struct vnode **vpp, struct componentname *cnp,
210 		struct thread *td)
211 {
212 	struct unionfs_mount *ump;
213 	struct unionfs_node *unp;
214 	struct vnode   *vp;
215 	int		error;
216 	int		lkflags;
217 	enum vtype	vt;
218 	char	       *path;
219 
220 	ump = MOUNTTOUNIONFSMOUNT(mp);
221 	lkflags = (cnp ? cnp->cn_lkflags : 0);
222 	path = (cnp ? cnp->cn_nameptr : NULL);
223 	*vpp = NULLVP;
224 
225 	if (uppervp == NULLVP && lowervp == NULLVP)
226 		panic("unionfs_nodeget: upper and lower is null");
227 
228 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
229 
230 	/* If it has no ISLASTCN flag, path check is skipped. */
231 	if (cnp && !(cnp->cn_flags & ISLASTCN))
232 		path = NULL;
233 
234 	/* check the cache */
235 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
236 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
237 		if (vp != NULLVP) {
238 			vref(vp);
239 			*vpp = vp;
240 			goto unionfs_nodeget_out;
241 		}
242 	}
243 
244 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
245 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
246 		/* dvp will be NULLVP only in case of root vnode. */
247 		if (dvp == NULLVP)
248 			return (EINVAL);
249 	}
250 
251 	/*
252 	 * Do the MALLOC before the getnewvnode since doing so afterward
253 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
254 	 * if MALLOC should block.
255 	 */
256 	unp = malloc(sizeof(struct unionfs_node),
257 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
258 
259 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
260 	if (error != 0) {
261 		free(unp, M_UNIONFSNODE);
262 		return (error);
263 	}
264 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
265 	if (error != 0) {
266 		free(unp, M_UNIONFSNODE);
267 		return (error);
268 	}
269 	if (dvp != NULLVP)
270 		vref(dvp);
271 	if (uppervp != NULLVP)
272 		vref(uppervp);
273 	if (lowervp != NULLVP)
274 		vref(lowervp);
275 
276 	if (vt == VDIR)
277 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
278 		    &(unp->un_hashmask));
279 
280 	unp->un_vnode = vp;
281 	unp->un_uppervp = uppervp;
282 	unp->un_lowervp = lowervp;
283 	unp->un_dvp = dvp;
284 	if (uppervp != NULLVP)
285 		vp->v_vnlock = uppervp->v_vnlock;
286 	else
287 		vp->v_vnlock = lowervp->v_vnlock;
288 
289 	if (path != NULL) {
290 		unp->un_path = (char *)
291 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
292 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
293 		unp->un_path[cnp->cn_namelen] = '\0';
294 	}
295 	vp->v_type = vt;
296 	vp->v_data = unp;
297 
298 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
299 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
300 		vp->v_vflag |= VV_ROOT;
301 
302 	if (path != NULL && dvp != NULLVP && vt == VDIR)
303 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
304 	if ((*vpp) != NULLVP) {
305 		if (dvp != NULLVP)
306 			vrele(dvp);
307 		if (uppervp != NULLVP)
308 			vrele(uppervp);
309 		if (lowervp != NULLVP)
310 			vrele(lowervp);
311 
312 		unp->un_uppervp = NULLVP;
313 		unp->un_lowervp = NULLVP;
314 		unp->un_dvp = NULLVP;
315 		vrele(vp);
316 		vp = *vpp;
317 		vref(vp);
318 	} else
319 		*vpp = vp;
320 
321 unionfs_nodeget_out:
322 	if (lkflags & LK_TYPE_MASK)
323 		vn_lock(vp, lkflags | LK_RETRY);
324 
325 	return (0);
326 }
327 
328 /*
329  * Clean up the unionfs node.
330  */
331 void
332 unionfs_noderem(struct vnode *vp, struct thread *td)
333 {
334 	int		vfslocked;
335 	int		count;
336 	struct unionfs_node *unp, *unp_t1, *unp_t2;
337 	struct unionfs_node_hashhead *hd;
338 	struct unionfs_node_status *unsp, *unsp_tmp;
339 	struct vnode   *lvp;
340 	struct vnode   *uvp;
341 	struct vnode   *dvp;
342 
343 	/*
344 	 * Use the interlock to protect the clearing of v_data to
345 	 * prevent faults in unionfs_lock().
346 	 */
347 	VI_LOCK(vp);
348 	unp = VTOUNIONFS(vp);
349 	lvp = unp->un_lowervp;
350 	uvp = unp->un_uppervp;
351 	dvp = unp->un_dvp;
352 	unp->un_lowervp = unp->un_uppervp = NULLVP;
353 	vp->v_vnlock = &(vp->v_lock);
354 	vp->v_data = NULL;
355 	vp->v_object = NULL;
356 	VI_UNLOCK(vp);
357 
358 	if (lvp != NULLVP)
359 		VOP_UNLOCK(lvp, LK_RELEASE);
360 	if (uvp != NULLVP)
361 		VOP_UNLOCK(uvp, LK_RELEASE);
362 
363 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
364 		unionfs_rem_cached_vnode(unp, dvp);
365 
366 	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
367 		panic("the lock for deletion is unacquirable.");
368 
369 	if (lvp != NULLVP) {
370 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
371 		vrele(lvp);
372 		VFS_UNLOCK_GIANT(vfslocked);
373 	}
374 	if (uvp != NULLVP) {
375 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
376 		vrele(uvp);
377 		VFS_UNLOCK_GIANT(vfslocked);
378 	}
379 	if (dvp != NULLVP) {
380 		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
381 		vrele(dvp);
382 		VFS_UNLOCK_GIANT(vfslocked);
383 		unp->un_dvp = NULLVP;
384 	}
385 	if (unp->un_path != NULL) {
386 		free(unp->un_path, M_UNIONFSPATH);
387 		unp->un_path = NULL;
388 	}
389 
390 	if (unp->un_hashtbl != NULL) {
391 		for (count = 0; count <= unp->un_hashmask; count++) {
392 			hd = unp->un_hashtbl + count;
393 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
394 				LIST_REMOVE(unp_t1, un_hash);
395 				unp_t1->un_hash.le_next = NULL;
396 				unp_t1->un_hash.le_prev = NULL;
397 			}
398 		}
399 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
400 	}
401 
402 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
403 		LIST_REMOVE(unsp, uns_list);
404 		free(unsp, M_TEMP);
405 	}
406 	free(unp, M_UNIONFSNODE);
407 }
408 
409 /*
410  * Get the unionfs node status.
411  * You need exclusive lock this vnode.
412  */
413 void
414 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
415 			struct unionfs_node_status **unspp)
416 {
417 	struct unionfs_node_status *unsp;
418 	pid_t pid = td->td_proc->p_pid;
419 
420 	KASSERT(NULL != unspp, ("null pointer"));
421 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
422 
423 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
424 		if (unsp->uns_pid == pid) {
425 			*unspp = unsp;
426 			return;
427 		}
428 	}
429 
430 	/* create a new unionfs node status */
431 	unsp = malloc(sizeof(struct unionfs_node_status),
432 	    M_TEMP, M_WAITOK | M_ZERO);
433 
434 	unsp->uns_pid = pid;
435 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
436 
437 	*unspp = unsp;
438 }
439 
440 /*
441  * Remove the unionfs node status, if you can.
442  * You need exclusive lock this vnode.
443  */
444 void
445 unionfs_tryrem_node_status(struct unionfs_node *unp,
446 			   struct unionfs_node_status *unsp)
447 {
448 	KASSERT(NULL != unsp, ("null pointer"));
449 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
450 
451 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
452 		return;
453 
454 	LIST_REMOVE(unsp, uns_list);
455 	free(unsp, M_TEMP);
456 }
457 
458 /*
459  * Create upper node attr.
460  */
461 void
462 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
463 			       struct vattr *lva,
464 			       struct vattr *uva,
465 			       struct thread *td)
466 {
467 	VATTR_NULL(uva);
468 	uva->va_type = lva->va_type;
469 	uva->va_atime = lva->va_atime;
470 	uva->va_mtime = lva->va_mtime;
471 	uva->va_ctime = lva->va_ctime;
472 
473 	switch (ump->um_copymode) {
474 	case UNIONFS_TRANSPARENT:
475 		uva->va_mode = lva->va_mode;
476 		uva->va_uid = lva->va_uid;
477 		uva->va_gid = lva->va_gid;
478 		break;
479 	case UNIONFS_MASQUERADE:
480 		if (ump->um_uid == lva->va_uid) {
481 			uva->va_mode = lva->va_mode & 077077;
482 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
483 			uva->va_uid = lva->va_uid;
484 			uva->va_gid = lva->va_gid;
485 		} else {
486 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
487 			uva->va_uid = ump->um_uid;
488 			uva->va_gid = ump->um_gid;
489 		}
490 		break;
491 	default:		/* UNIONFS_TRADITIONAL */
492 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
493 		uva->va_uid = ump->um_uid;
494 		uva->va_gid = ump->um_gid;
495 		break;
496 	}
497 }
498 
499 /*
500  * Create upper node attr.
501  */
502 int
503 unionfs_create_uppervattr(struct unionfs_mount *ump,
504 			  struct vnode *lvp,
505 			  struct vattr *uva,
506 			  struct ucred *cred,
507 			  struct thread *td)
508 {
509 	int		error;
510 	struct vattr	lva;
511 
512 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
513 		return (error);
514 
515 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
516 
517 	return (error);
518 }
519 
520 /*
521  * relookup
522  *
523  * dvp should be locked on entry and will be locked on return.
524  *
525  * If an error is returned, *vpp will be invalid, otherwise it will hold a
526  * locked, referenced vnode. If *vpp == dvp then remember that only one
527  * LK_EXCLUSIVE lock is held.
528  */
529 int
530 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
531 		 struct componentname *cnp, struct componentname *cn,
532 		 struct thread *td, char *path, int pathlen, u_long nameiop)
533 {
534 	int	error;
535 
536 	cn->cn_namelen = pathlen;
537 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
538 	bcopy(path, cn->cn_pnbuf, pathlen);
539 	cn->cn_pnbuf[pathlen] = '\0';
540 
541 	cn->cn_nameiop = nameiop;
542 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
543 	cn->cn_lkflags = LK_EXCLUSIVE;
544 	cn->cn_thread = td;
545 	cn->cn_cred = cnp->cn_cred;
546 
547 	cn->cn_nameptr = cn->cn_pnbuf;
548 	cn->cn_consume = cnp->cn_consume;
549 
550 	if (nameiop == DELETE)
551 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
552 	else if (RENAME == nameiop)
553 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
554 
555 	vref(dvp);
556 	VOP_UNLOCK(dvp, LK_RELEASE);
557 
558 	if ((error = relookup(dvp, vpp, cn))) {
559 		uma_zfree(namei_zone, cn->cn_pnbuf);
560 		cn->cn_flags &= ~HASBUF;
561 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
562 	} else
563 		vrele(dvp);
564 
565 	return (error);
566 }
567 
568 /*
569  * relookup for CREATE namei operation.
570  *
571  * dvp is unionfs vnode. dvp should be locked.
572  *
573  * If it called 'unionfs_copyfile' function by unionfs_link etc,
574  * VOP_LOOKUP information is broken.
575  * So it need relookup in order to create link etc.
576  */
577 int
578 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
579 			    struct thread *td)
580 {
581 	int	error;
582 	struct vnode *udvp;
583 	struct vnode *vp;
584 	struct componentname cn;
585 
586 	udvp = UNIONFSVPTOUPPERVP(dvp);
587 	vp = NULLVP;
588 
589 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
590 	    strlen(cnp->cn_nameptr), CREATE);
591 	if (error)
592 		return (error);
593 
594 	if (vp != NULLVP) {
595 		if (udvp == vp)
596 			vrele(vp);
597 		else
598 			vput(vp);
599 
600 		error = EEXIST;
601 	}
602 
603 	if (cn.cn_flags & HASBUF) {
604 		uma_zfree(namei_zone, cn.cn_pnbuf);
605 		cn.cn_flags &= ~HASBUF;
606 	}
607 
608 	if (!error) {
609 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
610 		cnp->cn_flags = cn.cn_flags;
611 	}
612 
613 	return (error);
614 }
615 
616 /*
617  * relookup for DELETE namei operation.
618  *
619  * dvp is unionfs vnode. dvp should be locked.
620  */
621 int
622 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
623 			    struct thread *td)
624 {
625 	int	error;
626 	struct vnode *udvp;
627 	struct vnode *vp;
628 	struct componentname cn;
629 
630 	udvp = UNIONFSVPTOUPPERVP(dvp);
631 	vp = NULLVP;
632 
633 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
634 	    strlen(cnp->cn_nameptr), DELETE);
635 	if (error)
636 		return (error);
637 
638 	if (vp == NULLVP)
639 		error = ENOENT;
640 	else {
641 		if (udvp == vp)
642 			vrele(vp);
643 		else
644 			vput(vp);
645 	}
646 
647 	if (cn.cn_flags & HASBUF) {
648 		uma_zfree(namei_zone, cn.cn_pnbuf);
649 		cn.cn_flags &= ~HASBUF;
650 	}
651 
652 	if (!error) {
653 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
654 		cnp->cn_flags = cn.cn_flags;
655 	}
656 
657 	return (error);
658 }
659 
660 /*
661  * relookup for RENAME namei operation.
662  *
663  * dvp is unionfs vnode. dvp should be locked.
664  */
665 int
666 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
667 			    struct thread *td)
668 {
669 	int error;
670 	struct vnode *udvp;
671 	struct vnode *vp;
672 	struct componentname cn;
673 
674 	udvp = UNIONFSVPTOUPPERVP(dvp);
675 	vp = NULLVP;
676 
677 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
678 	    strlen(cnp->cn_nameptr), RENAME);
679 	if (error)
680 		return (error);
681 
682 	if (vp != NULLVP) {
683 		if (udvp == vp)
684 			vrele(vp);
685 		else
686 			vput(vp);
687 	}
688 
689 	if (cn.cn_flags & HASBUF) {
690 		uma_zfree(namei_zone, cn.cn_pnbuf);
691 		cn.cn_flags &= ~HASBUF;
692 	}
693 
694 	if (!error) {
695 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
696 		cnp->cn_flags = cn.cn_flags;
697 	}
698 
699 	return (error);
700 
701 }
702 
703 /*
704  * Update the unionfs_node.
705  *
706  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
707  * uvp's lock and lower's lock will be unlocked.
708  */
709 static void
710 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
711 		    struct thread *td)
712 {
713 	unsigned	count, lockrec;
714 	struct vnode   *vp;
715 	struct vnode   *lvp;
716 	struct vnode   *dvp;
717 
718 	vp = UNIONFSTOV(unp);
719 	lvp = unp->un_lowervp;
720 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
721 	dvp = unp->un_dvp;
722 
723 	/*
724 	 * lock update
725 	 */
726 	VI_LOCK(vp);
727 	unp->un_uppervp = uvp;
728 	vp->v_vnlock = uvp->v_vnlock;
729 	VI_UNLOCK(vp);
730 	lockrec = lvp->v_vnlock->lk_recurse;
731 	for (count = 0; count < lockrec; count++)
732 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
733 
734 	/*
735 	 * cache update
736 	 */
737 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
738 		static struct unionfs_node_hashhead *hd;
739 
740 		VI_LOCK(dvp);
741 		hd = unionfs_get_hashhead(dvp, unp->un_path);
742 		LIST_REMOVE(unp, un_hash);
743 		LIST_INSERT_HEAD(hd, unp, un_hash);
744 		VI_UNLOCK(dvp);
745 	}
746 }
747 
748 /*
749  * Create a new shadow dir.
750  *
751  * udvp should be locked on entry and will be locked on return.
752  *
753  * If no error returned, unp will be updated.
754  */
755 int
756 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
757 		    struct unionfs_node *unp, struct componentname *cnp,
758 		    struct thread *td)
759 {
760 	int		error;
761 	struct vnode   *lvp;
762 	struct vnode   *uvp;
763 	struct vattr	va;
764 	struct vattr	lva;
765 	struct componentname cn;
766 	struct mount   *mp;
767 	struct ucred   *cred;
768 	struct ucred   *credbk;
769 	struct uidinfo *rootinfo;
770 
771 	if (unp->un_uppervp != NULLVP)
772 		return (EEXIST);
773 
774 	lvp = unp->un_lowervp;
775 	uvp = NULLVP;
776 	credbk = cnp->cn_cred;
777 
778 	/* Authority change to root */
779 	rootinfo = uifind((uid_t)0);
780 	cred = crdup(cnp->cn_cred);
781 	/*
782 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
783 	 * calling chgproccnt().
784 	 */
785 	chgproccnt(cred->cr_ruidinfo, 1, 0);
786 	change_euid(cred, rootinfo);
787 	change_ruid(cred, rootinfo);
788 	change_svuid(cred, (uid_t)0);
789 	uifree(rootinfo);
790 	cnp->cn_cred = cred;
791 
792 	memset(&cn, 0, sizeof(cn));
793 
794 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
795 		goto unionfs_mkshadowdir_abort;
796 
797 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
798 		goto unionfs_mkshadowdir_abort;
799 	if (uvp != NULLVP) {
800 		if (udvp == uvp)
801 			vrele(uvp);
802 		else
803 			vput(uvp);
804 
805 		error = EEXIST;
806 		goto unionfs_mkshadowdir_free_out;
807 	}
808 
809 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
810 		goto unionfs_mkshadowdir_free_out;
811 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
812 
813 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
814 
815 	if (!error) {
816 		unionfs_node_update(unp, uvp, td);
817 
818 		/*
819 		 * XXX The bug which cannot set uid/gid was corrected.
820 		 * Ignore errors.
821 		 */
822 		va.va_type = VNON;
823 		VOP_SETATTR(uvp, &va, cn.cn_cred);
824 	}
825 	vn_finished_write(mp);
826 
827 unionfs_mkshadowdir_free_out:
828 	if (cn.cn_flags & HASBUF) {
829 		uma_zfree(namei_zone, cn.cn_pnbuf);
830 		cn.cn_flags &= ~HASBUF;
831 	}
832 
833 unionfs_mkshadowdir_abort:
834 	cnp->cn_cred = credbk;
835 	chgproccnt(cred->cr_ruidinfo, -1, 0);
836 	crfree(cred);
837 
838 	return (error);
839 }
840 
841 /*
842  * Create a new whiteout.
843  *
844  * dvp should be locked on entry and will be locked on return.
845  */
846 int
847 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
848 		   struct thread *td, char *path)
849 {
850 	int		error;
851 	struct vnode   *wvp;
852 	struct componentname cn;
853 	struct mount   *mp;
854 
855 	if (path == NULL)
856 		path = cnp->cn_nameptr;
857 
858 	wvp = NULLVP;
859 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
860 		return (error);
861 	if (wvp != NULLVP) {
862 		if (cn.cn_flags & HASBUF) {
863 			uma_zfree(namei_zone, cn.cn_pnbuf);
864 			cn.cn_flags &= ~HASBUF;
865 		}
866 		if (dvp == wvp)
867 			vrele(wvp);
868 		else
869 			vput(wvp);
870 
871 		return (EEXIST);
872 	}
873 
874 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
875 		goto unionfs_mkwhiteout_free_out;
876 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
877 
878 	vn_finished_write(mp);
879 
880 unionfs_mkwhiteout_free_out:
881 	if (cn.cn_flags & HASBUF) {
882 		uma_zfree(namei_zone, cn.cn_pnbuf);
883 		cn.cn_flags &= ~HASBUF;
884 	}
885 
886 	return (error);
887 }
888 
889 /*
890  * Create a new vnode for create a new shadow file.
891  *
892  * If an error is returned, *vpp will be invalid, otherwise it will hold a
893  * locked, referenced and opened vnode.
894  *
895  * unp is never updated.
896  */
897 static int
898 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
899 			   struct unionfs_node *unp, struct vattr *uvap,
900 			   struct thread *td)
901 {
902 	struct unionfs_mount *ump;
903 	struct vnode   *vp;
904 	struct vnode   *lvp;
905 	struct ucred   *cred;
906 	struct vattr	lva;
907 	int		fmode;
908 	int		error;
909 	struct componentname cn;
910 
911 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
912 	vp = NULLVP;
913 	lvp = unp->un_lowervp;
914 	cred = td->td_ucred;
915 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
916 	error = 0;
917 
918 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
919 		return (error);
920 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
921 
922 	if (unp->un_path == NULL)
923 		panic("unionfs: un_path is null");
924 
925 	cn.cn_namelen = strlen(unp->un_path);
926 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
927 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
928 	cn.cn_nameiop = CREATE;
929 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
930 	cn.cn_lkflags = LK_EXCLUSIVE;
931 	cn.cn_thread = td;
932 	cn.cn_cred = cred;
933 	cn.cn_nameptr = cn.cn_pnbuf;
934 	cn.cn_consume = 0;
935 
936 	vref(udvp);
937 	if ((error = relookup(udvp, &vp, &cn)) != 0)
938 		goto unionfs_vn_create_on_upper_free_out2;
939 	vrele(udvp);
940 
941 	if (vp != NULLVP) {
942 		if (vp == udvp)
943 			vrele(vp);
944 		else
945 			vput(vp);
946 		error = EEXIST;
947 		goto unionfs_vn_create_on_upper_free_out1;
948 	}
949 
950 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
951 		goto unionfs_vn_create_on_upper_free_out1;
952 
953 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
954 		vput(vp);
955 		goto unionfs_vn_create_on_upper_free_out1;
956 	}
957 	vp->v_writecount++;
958 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
959 	    vp->v_writecount);
960 	*vpp = vp;
961 
962 unionfs_vn_create_on_upper_free_out1:
963 	VOP_UNLOCK(udvp, LK_RELEASE);
964 
965 unionfs_vn_create_on_upper_free_out2:
966 	if (cn.cn_flags & HASBUF) {
967 		uma_zfree(namei_zone, cn.cn_pnbuf);
968 		cn.cn_flags &= ~HASBUF;
969 	}
970 
971 	return (error);
972 }
973 
974 /*
975  * Copy from lvp to uvp.
976  *
977  * lvp and uvp should be locked and opened on entry and will be locked and
978  * opened on return.
979  */
980 static int
981 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
982 		      struct ucred *cred, struct thread *td)
983 {
984 	int		error;
985 	off_t		offset;
986 	int		count;
987 	int		bufoffset;
988 	char           *buf;
989 	struct uio	uio;
990 	struct iovec	iov;
991 
992 	error = 0;
993 	memset(&uio, 0, sizeof(uio));
994 
995 	uio.uio_td = td;
996 	uio.uio_segflg = UIO_SYSSPACE;
997 	uio.uio_offset = 0;
998 
999 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
1000 
1001 	while (error == 0) {
1002 		offset = uio.uio_offset;
1003 
1004 		uio.uio_iov = &iov;
1005 		uio.uio_iovcnt = 1;
1006 		iov.iov_base = buf;
1007 		iov.iov_len = MAXBSIZE;
1008 		uio.uio_resid = iov.iov_len;
1009 		uio.uio_rw = UIO_READ;
1010 
1011 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1012 			break;
1013 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1014 			break;
1015 
1016 		bufoffset = 0;
1017 		while (bufoffset < count) {
1018 			uio.uio_iov = &iov;
1019 			uio.uio_iovcnt = 1;
1020 			iov.iov_base = buf + bufoffset;
1021 			iov.iov_len = count - bufoffset;
1022 			uio.uio_offset = offset + bufoffset;
1023 			uio.uio_resid = iov.iov_len;
1024 			uio.uio_rw = UIO_WRITE;
1025 
1026 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1027 				break;
1028 
1029 			bufoffset += (count - bufoffset) - uio.uio_resid;
1030 		}
1031 
1032 		uio.uio_offset = offset + bufoffset;
1033 	}
1034 
1035 	free(buf, M_TEMP);
1036 
1037 	return (error);
1038 }
1039 
1040 /*
1041  * Copy file from lower to upper.
1042  *
1043  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1044  * docopy.
1045  *
1046  * If no error returned, unp will be updated.
1047  */
1048 int
1049 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1050 		 struct thread *td)
1051 {
1052 	int		error;
1053 	struct mount   *mp;
1054 	struct vnode   *udvp;
1055 	struct vnode   *lvp;
1056 	struct vnode   *uvp;
1057 	struct vattr	uva;
1058 
1059 	lvp = unp->un_lowervp;
1060 	uvp = NULLVP;
1061 
1062 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1063 		return (EROFS);
1064 	if (unp->un_dvp == NULLVP)
1065 		return (EINVAL);
1066 	if (unp->un_uppervp != NULLVP)
1067 		return (EEXIST);
1068 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1069 	if (udvp == NULLVP)
1070 		return (EROFS);
1071 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1072 		return (EROFS);
1073 
1074 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1075 	if (error != 0)
1076 		return (error);
1077 
1078 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1079 		return (error);
1080 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1081 	if (error != 0) {
1082 		vn_finished_write(mp);
1083 		return (error);
1084 	}
1085 
1086 	if (docopy != 0) {
1087 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1088 		if (error == 0) {
1089 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1090 			VOP_CLOSE(lvp, FREAD, cred, td);
1091 		}
1092 	}
1093 	VOP_CLOSE(uvp, FWRITE, cred, td);
1094 	uvp->v_writecount--;
1095 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1096 	    uvp->v_writecount);
1097 
1098 	vn_finished_write(mp);
1099 
1100 	if (error == 0) {
1101 		/* Reset the attributes. Ignore errors. */
1102 		uva.va_type = VNON;
1103 		VOP_SETATTR(uvp, &uva, cred);
1104 	}
1105 
1106 	unionfs_node_update(unp, uvp, td);
1107 
1108 	return (error);
1109 }
1110 
1111 /*
1112  * It checks whether vp can rmdir. (check empty)
1113  *
1114  * vp is unionfs vnode.
1115  * vp should be locked.
1116  */
1117 int
1118 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1119 {
1120 	int		error;
1121 	int		eofflag;
1122 	int		lookuperr;
1123 	struct vnode   *uvp;
1124 	struct vnode   *lvp;
1125 	struct vnode   *tvp;
1126 	struct vattr	va;
1127 	struct componentname cn;
1128 	/*
1129 	 * The size of buf needs to be larger than DIRBLKSIZ.
1130 	 */
1131 	char		buf[256 * 6];
1132 	struct dirent  *dp;
1133 	struct dirent  *edp;
1134 	struct uio	uio;
1135 	struct iovec	iov;
1136 
1137 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1138 
1139 	eofflag = 0;
1140 	uvp = UNIONFSVPTOUPPERVP(vp);
1141 	lvp = UNIONFSVPTOLOWERVP(vp);
1142 
1143 	/* check opaque */
1144 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1145 		return (error);
1146 	if (va.va_flags & OPAQUE)
1147 		return (0);
1148 
1149 	/* open vnode */
1150 #ifdef MAC
1151 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1152 		return (error);
1153 #endif
1154 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1155 		return (error);
1156 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1157 		return (error);
1158 
1159 	uio.uio_rw = UIO_READ;
1160 	uio.uio_segflg = UIO_SYSSPACE;
1161 	uio.uio_td = td;
1162 	uio.uio_offset = 0;
1163 
1164 #ifdef MAC
1165 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1166 #endif
1167 	while (!error && !eofflag) {
1168 		iov.iov_base = buf;
1169 		iov.iov_len = sizeof(buf);
1170 		uio.uio_iov = &iov;
1171 		uio.uio_iovcnt = 1;
1172 		uio.uio_resid = iov.iov_len;
1173 
1174 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1175 		if (error != 0)
1176 			break;
1177 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1178 #ifdef DIAGNOSTIC
1179 			panic("bad readdir response from lower FS.");
1180 #endif
1181 			break;
1182 		}
1183 
1184 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1185 		for (dp = (struct dirent*)buf; !error && dp < edp;
1186 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1187 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1188 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1189 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1190 				continue;
1191 
1192 			cn.cn_namelen = dp->d_namlen;
1193 			cn.cn_pnbuf = NULL;
1194 			cn.cn_nameptr = dp->d_name;
1195 			cn.cn_nameiop = LOOKUP;
1196 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1197 			cn.cn_lkflags = LK_EXCLUSIVE;
1198 			cn.cn_thread = td;
1199 			cn.cn_cred = cred;
1200 			cn.cn_consume = 0;
1201 
1202 			/*
1203 			 * check entry in lower.
1204 			 * Sometimes, readdir function returns
1205 			 * wrong entry.
1206 			 */
1207 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1208 
1209 			if (!lookuperr)
1210 				vput(tvp);
1211 			else
1212 				continue; /* skip entry */
1213 
1214 			/*
1215 			 * check entry
1216 			 * If it has no exist/whiteout entry in upper,
1217 			 * directory is not empty.
1218 			 */
1219 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1220 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1221 
1222 			if (!lookuperr)
1223 				vput(tvp);
1224 
1225 			/* ignore exist or whiteout entry */
1226 			if (!lookuperr ||
1227 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1228 				continue;
1229 
1230 			error = ENOTEMPTY;
1231 		}
1232 	}
1233 
1234 	/* close vnode */
1235 	VOP_CLOSE(vp, FREAD, cred, td);
1236 
1237 	return (error);
1238 }
1239 
1240 #ifdef DIAGNOSTIC
1241 
1242 struct vnode   *
1243 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1244 {
1245 	struct unionfs_node *unp;
1246 
1247 	unp = VTOUNIONFS(vp);
1248 
1249 #ifdef notyet
1250 	if (vp->v_op != unionfs_vnodeop_p) {
1251 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1252 #ifdef KDB
1253 		kdb_enter(KDB_WHY_UNIONFS,
1254 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1255 #endif
1256 		panic("unionfs_checkuppervp");
1257 	};
1258 #endif
1259 	return (unp->un_uppervp);
1260 }
1261 
1262 struct vnode   *
1263 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1264 {
1265 	struct unionfs_node *unp;
1266 
1267 	unp = VTOUNIONFS(vp);
1268 
1269 #ifdef notyet
1270 	if (vp->v_op != unionfs_vnodeop_p) {
1271 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1272 #ifdef KDB
1273 		kdb_enter(KDB_WHY_UNIONFS,
1274 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1275 #endif
1276 		panic("unionfs_checklowervp");
1277 	};
1278 #endif
1279 	return (unp->un_lowervp);
1280 }
1281 #endif
1282