xref: /freebsd/sys/fs/unionfs/union_subr.c (revision eb9da1ada8b6b2c74378a5c17029ec5a7fb199e6)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #include <security/mac/mac_framework.h>
56 
57 #include <vm/uma.h>
58 
59 #include <fs/unionfs/union.h>
60 
61 #define NUNIONFSNODECACHE 16
62 
63 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
64 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
65 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
66 
67 /*
68  * Initialize
69  */
70 int
71 unionfs_init(struct vfsconf *vfsp)
72 {
73 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
74 	return (0);
75 }
76 
77 /*
78  * Uninitialize
79  */
80 int
81 unionfs_uninit(struct vfsconf *vfsp)
82 {
83 	return (0);
84 }
85 
86 static struct unionfs_node_hashhead *
87 unionfs_get_hashhead(struct vnode *dvp, char *path)
88 {
89 	int		count;
90 	char		hash;
91 	struct unionfs_node *unp;
92 
93 	hash = 0;
94 	unp = VTOUNIONFS(dvp);
95 	if (path != NULL) {
96 		for (count = 0; path[count]; count++)
97 			hash += path[count];
98 	}
99 
100 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
101 }
102 
103 /*
104  * Get the cached vnode.
105  */
106 static struct vnode *
107 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
108 			struct vnode *dvp, char *path)
109 {
110 	struct unionfs_node_hashhead *hd;
111 	struct unionfs_node *unp;
112 	struct vnode   *vp;
113 
114 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
115 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
116 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
117 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118 
119 	VI_LOCK(dvp);
120 	hd = unionfs_get_hashhead(dvp, path);
121 	LIST_FOREACH(unp, hd, un_hash) {
122 		if (!strcmp(unp->un_path, path)) {
123 			vp = UNIONFSTOV(unp);
124 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
125 			VI_UNLOCK(dvp);
126 			vp->v_iflag &= ~VI_OWEINACT;
127 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
128 				VI_UNLOCK(vp);
129 				vp = NULLVP;
130 			} else
131 				VI_UNLOCK(vp);
132 			return (vp);
133 		}
134 	}
135 	VI_UNLOCK(dvp);
136 
137 	return (NULLVP);
138 }
139 
140 /*
141  * Add the new vnode into cache.
142  */
143 static struct vnode *
144 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
145 			struct vnode *dvp, char *path)
146 {
147 	struct unionfs_node_hashhead *hd;
148 	struct unionfs_node *unp;
149 	struct vnode   *vp;
150 
151 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
152 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
153 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
154 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155 
156 	VI_LOCK(dvp);
157 	hd = unionfs_get_hashhead(dvp, path);
158 	LIST_FOREACH(unp, hd, un_hash) {
159 		if (!strcmp(unp->un_path, path)) {
160 			vp = UNIONFSTOV(unp);
161 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
162 			vp->v_iflag &= ~VI_OWEINACT;
163 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
164 				LIST_INSERT_HEAD(hd, uncp, un_hash);
165 				VI_UNLOCK(vp);
166 				vp = NULLVP;
167 			} else
168 				VI_UNLOCK(vp);
169 			VI_UNLOCK(dvp);
170 			return (vp);
171 		}
172 	}
173 
174 	LIST_INSERT_HEAD(hd, uncp, un_hash);
175 	VI_UNLOCK(dvp);
176 
177 	return (NULLVP);
178 }
179 
180 /*
181  * Remove the vnode.
182  */
183 static void
184 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
185 {
186 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
187 	KASSERT((dvp != NULLVP),
188 	    ("unionfs_rem_cached_vnode: null parent vnode"));
189 	KASSERT((unp->un_hash.le_prev != NULL),
190 	    ("unionfs_rem_cached_vnode: null hash"));
191 
192 	VI_LOCK(dvp);
193 	LIST_REMOVE(unp, un_hash);
194 	unp->un_hash.le_next = NULL;
195 	unp->un_hash.le_prev = NULL;
196 	VI_UNLOCK(dvp);
197 }
198 
199 /*
200  * Make a new or get existing unionfs node.
201  *
202  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
203  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
204  * you should not lock plurality simultaneously.
205  */
206 int
207 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
208 		struct vnode *lowervp, struct vnode *dvp,
209 		struct vnode **vpp, struct componentname *cnp,
210 		struct thread *td)
211 {
212 	struct unionfs_mount *ump;
213 	struct unionfs_node *unp;
214 	struct vnode   *vp;
215 	int		error;
216 	int		lkflags;
217 	enum vtype	vt;
218 	char	       *path;
219 
220 	ump = MOUNTTOUNIONFSMOUNT(mp);
221 	lkflags = (cnp ? cnp->cn_lkflags : 0);
222 	path = (cnp ? cnp->cn_nameptr : NULL);
223 	*vpp = NULLVP;
224 
225 	if (uppervp == NULLVP && lowervp == NULLVP)
226 		panic("unionfs_nodeget: upper and lower is null");
227 
228 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
229 
230 	/* If it has no ISLASTCN flag, path check is skipped. */
231 	if (cnp && !(cnp->cn_flags & ISLASTCN))
232 		path = NULL;
233 
234 	/* check the cache */
235 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
236 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
237 		if (vp != NULLVP) {
238 			vref(vp);
239 			*vpp = vp;
240 			goto unionfs_nodeget_out;
241 		}
242 	}
243 
244 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
245 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
246 		/* dvp will be NULLVP only in case of root vnode. */
247 		if (dvp == NULLVP)
248 			return (EINVAL);
249 	}
250 	unp = malloc(sizeof(struct unionfs_node),
251 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
252 
253 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
254 	if (error != 0) {
255 		free(unp, M_UNIONFSNODE);
256 		return (error);
257 	}
258 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
259 	if (error != 0) {
260 		free(unp, M_UNIONFSNODE);
261 		return (error);
262 	}
263 	if (dvp != NULLVP)
264 		vref(dvp);
265 	if (uppervp != NULLVP)
266 		vref(uppervp);
267 	if (lowervp != NULLVP)
268 		vref(lowervp);
269 
270 	if (vt == VDIR)
271 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
272 		    &(unp->un_hashmask));
273 
274 	unp->un_vnode = vp;
275 	unp->un_uppervp = uppervp;
276 	unp->un_lowervp = lowervp;
277 	unp->un_dvp = dvp;
278 	if (uppervp != NULLVP)
279 		vp->v_vnlock = uppervp->v_vnlock;
280 	else
281 		vp->v_vnlock = lowervp->v_vnlock;
282 
283 	if (path != NULL) {
284 		unp->un_path = (char *)
285 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
286 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
287 		unp->un_path[cnp->cn_namelen] = '\0';
288 	}
289 	vp->v_type = vt;
290 	vp->v_data = unp;
291 
292 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
293 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
294 		vp->v_vflag |= VV_ROOT;
295 
296 	if (path != NULL && dvp != NULLVP && vt == VDIR)
297 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
298 	if ((*vpp) != NULLVP) {
299 		if (dvp != NULLVP)
300 			vrele(dvp);
301 		if (uppervp != NULLVP)
302 			vrele(uppervp);
303 		if (lowervp != NULLVP)
304 			vrele(lowervp);
305 
306 		unp->un_uppervp = NULLVP;
307 		unp->un_lowervp = NULLVP;
308 		unp->un_dvp = NULLVP;
309 		vrele(vp);
310 		vp = *vpp;
311 		vref(vp);
312 	} else
313 		*vpp = vp;
314 
315 unionfs_nodeget_out:
316 	if (lkflags & LK_TYPE_MASK)
317 		vn_lock(vp, lkflags | LK_RETRY);
318 
319 	return (0);
320 }
321 
322 /*
323  * Clean up the unionfs node.
324  */
325 void
326 unionfs_noderem(struct vnode *vp, struct thread *td)
327 {
328 	int		count;
329 	struct unionfs_node *unp, *unp_t1, *unp_t2;
330 	struct unionfs_node_hashhead *hd;
331 	struct unionfs_node_status *unsp, *unsp_tmp;
332 	struct vnode   *lvp;
333 	struct vnode   *uvp;
334 	struct vnode   *dvp;
335 
336 	/*
337 	 * Use the interlock to protect the clearing of v_data to
338 	 * prevent faults in unionfs_lock().
339 	 */
340 	VI_LOCK(vp);
341 	unp = VTOUNIONFS(vp);
342 	lvp = unp->un_lowervp;
343 	uvp = unp->un_uppervp;
344 	dvp = unp->un_dvp;
345 	unp->un_lowervp = unp->un_uppervp = NULLVP;
346 	vp->v_vnlock = &(vp->v_lock);
347 	vp->v_data = NULL;
348 	vp->v_object = NULL;
349 	VI_UNLOCK(vp);
350 
351 	if (lvp != NULLVP)
352 		VOP_UNLOCK(lvp, LK_RELEASE);
353 	if (uvp != NULLVP)
354 		VOP_UNLOCK(uvp, LK_RELEASE);
355 
356 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
357 		unionfs_rem_cached_vnode(unp, dvp);
358 
359 	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
360 		panic("the lock for deletion is unacquirable.");
361 
362 	if (lvp != NULLVP)
363 		vrele(lvp);
364 	if (uvp != NULLVP)
365 		vrele(uvp);
366 	if (dvp != NULLVP) {
367 		vrele(dvp);
368 		unp->un_dvp = NULLVP;
369 	}
370 	if (unp->un_path != NULL) {
371 		free(unp->un_path, M_UNIONFSPATH);
372 		unp->un_path = NULL;
373 	}
374 
375 	if (unp->un_hashtbl != NULL) {
376 		for (count = 0; count <= unp->un_hashmask; count++) {
377 			hd = unp->un_hashtbl + count;
378 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
379 				LIST_REMOVE(unp_t1, un_hash);
380 				unp_t1->un_hash.le_next = NULL;
381 				unp_t1->un_hash.le_prev = NULL;
382 			}
383 		}
384 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
385 	}
386 
387 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
388 		LIST_REMOVE(unsp, uns_list);
389 		free(unsp, M_TEMP);
390 	}
391 	free(unp, M_UNIONFSNODE);
392 }
393 
394 /*
395  * Get the unionfs node status.
396  * You need exclusive lock this vnode.
397  */
398 void
399 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
400 			struct unionfs_node_status **unspp)
401 {
402 	struct unionfs_node_status *unsp;
403 	pid_t pid = td->td_proc->p_pid;
404 
405 	KASSERT(NULL != unspp, ("null pointer"));
406 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
407 
408 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
409 		if (unsp->uns_pid == pid) {
410 			*unspp = unsp;
411 			return;
412 		}
413 	}
414 
415 	/* create a new unionfs node status */
416 	unsp = malloc(sizeof(struct unionfs_node_status),
417 	    M_TEMP, M_WAITOK | M_ZERO);
418 
419 	unsp->uns_pid = pid;
420 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
421 
422 	*unspp = unsp;
423 }
424 
425 /*
426  * Remove the unionfs node status, if you can.
427  * You need exclusive lock this vnode.
428  */
429 void
430 unionfs_tryrem_node_status(struct unionfs_node *unp,
431 			   struct unionfs_node_status *unsp)
432 {
433 	KASSERT(NULL != unsp, ("null pointer"));
434 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
435 
436 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
437 		return;
438 
439 	LIST_REMOVE(unsp, uns_list);
440 	free(unsp, M_TEMP);
441 }
442 
443 /*
444  * Create upper node attr.
445  */
446 void
447 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
448 			       struct vattr *lva,
449 			       struct vattr *uva,
450 			       struct thread *td)
451 {
452 	VATTR_NULL(uva);
453 	uva->va_type = lva->va_type;
454 	uva->va_atime = lva->va_atime;
455 	uva->va_mtime = lva->va_mtime;
456 	uva->va_ctime = lva->va_ctime;
457 
458 	switch (ump->um_copymode) {
459 	case UNIONFS_TRANSPARENT:
460 		uva->va_mode = lva->va_mode;
461 		uva->va_uid = lva->va_uid;
462 		uva->va_gid = lva->va_gid;
463 		break;
464 	case UNIONFS_MASQUERADE:
465 		if (ump->um_uid == lva->va_uid) {
466 			uva->va_mode = lva->va_mode & 077077;
467 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
468 			uva->va_uid = lva->va_uid;
469 			uva->va_gid = lva->va_gid;
470 		} else {
471 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
472 			uva->va_uid = ump->um_uid;
473 			uva->va_gid = ump->um_gid;
474 		}
475 		break;
476 	default:		/* UNIONFS_TRADITIONAL */
477 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
478 		uva->va_uid = ump->um_uid;
479 		uva->va_gid = ump->um_gid;
480 		break;
481 	}
482 }
483 
484 /*
485  * Create upper node attr.
486  */
487 int
488 unionfs_create_uppervattr(struct unionfs_mount *ump,
489 			  struct vnode *lvp,
490 			  struct vattr *uva,
491 			  struct ucred *cred,
492 			  struct thread *td)
493 {
494 	int		error;
495 	struct vattr	lva;
496 
497 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
498 		return (error);
499 
500 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
501 
502 	return (error);
503 }
504 
505 /*
506  * relookup
507  *
508  * dvp should be locked on entry and will be locked on return.
509  *
510  * If an error is returned, *vpp will be invalid, otherwise it will hold a
511  * locked, referenced vnode. If *vpp == dvp then remember that only one
512  * LK_EXCLUSIVE lock is held.
513  */
514 int
515 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
516 		 struct componentname *cnp, struct componentname *cn,
517 		 struct thread *td, char *path, int pathlen, u_long nameiop)
518 {
519 	int	error;
520 
521 	cn->cn_namelen = pathlen;
522 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
523 	bcopy(path, cn->cn_pnbuf, pathlen);
524 	cn->cn_pnbuf[pathlen] = '\0';
525 
526 	cn->cn_nameiop = nameiop;
527 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
528 	cn->cn_lkflags = LK_EXCLUSIVE;
529 	cn->cn_thread = td;
530 	cn->cn_cred = cnp->cn_cred;
531 
532 	cn->cn_nameptr = cn->cn_pnbuf;
533 
534 	if (nameiop == DELETE)
535 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
536 	else if (RENAME == nameiop)
537 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
538 	else if (nameiop == CREATE)
539 		cn->cn_flags |= NOCACHE;
540 
541 	vref(dvp);
542 	VOP_UNLOCK(dvp, LK_RELEASE);
543 
544 	if ((error = relookup(dvp, vpp, cn))) {
545 		uma_zfree(namei_zone, cn->cn_pnbuf);
546 		cn->cn_flags &= ~HASBUF;
547 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
548 	} else
549 		vrele(dvp);
550 
551 	return (error);
552 }
553 
554 /*
555  * relookup for CREATE namei operation.
556  *
557  * dvp is unionfs vnode. dvp should be locked.
558  *
559  * If it called 'unionfs_copyfile' function by unionfs_link etc,
560  * VOP_LOOKUP information is broken.
561  * So it need relookup in order to create link etc.
562  */
563 int
564 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
565 			    struct thread *td)
566 {
567 	int	error;
568 	struct vnode *udvp;
569 	struct vnode *vp;
570 	struct componentname cn;
571 
572 	udvp = UNIONFSVPTOUPPERVP(dvp);
573 	vp = NULLVP;
574 
575 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
576 	    strlen(cnp->cn_nameptr), CREATE);
577 	if (error)
578 		return (error);
579 
580 	if (vp != NULLVP) {
581 		if (udvp == vp)
582 			vrele(vp);
583 		else
584 			vput(vp);
585 
586 		error = EEXIST;
587 	}
588 
589 	if (cn.cn_flags & HASBUF) {
590 		uma_zfree(namei_zone, cn.cn_pnbuf);
591 		cn.cn_flags &= ~HASBUF;
592 	}
593 
594 	if (!error) {
595 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
596 		cnp->cn_flags = cn.cn_flags;
597 	}
598 
599 	return (error);
600 }
601 
602 /*
603  * relookup for DELETE namei operation.
604  *
605  * dvp is unionfs vnode. dvp should be locked.
606  */
607 int
608 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
609 			    struct thread *td)
610 {
611 	int	error;
612 	struct vnode *udvp;
613 	struct vnode *vp;
614 	struct componentname cn;
615 
616 	udvp = UNIONFSVPTOUPPERVP(dvp);
617 	vp = NULLVP;
618 
619 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
620 	    strlen(cnp->cn_nameptr), DELETE);
621 	if (error)
622 		return (error);
623 
624 	if (vp == NULLVP)
625 		error = ENOENT;
626 	else {
627 		if (udvp == vp)
628 			vrele(vp);
629 		else
630 			vput(vp);
631 	}
632 
633 	if (cn.cn_flags & HASBUF) {
634 		uma_zfree(namei_zone, cn.cn_pnbuf);
635 		cn.cn_flags &= ~HASBUF;
636 	}
637 
638 	if (!error) {
639 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
640 		cnp->cn_flags = cn.cn_flags;
641 	}
642 
643 	return (error);
644 }
645 
646 /*
647  * relookup for RENAME namei operation.
648  *
649  * dvp is unionfs vnode. dvp should be locked.
650  */
651 int
652 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
653 			    struct thread *td)
654 {
655 	int error;
656 	struct vnode *udvp;
657 	struct vnode *vp;
658 	struct componentname cn;
659 
660 	udvp = UNIONFSVPTOUPPERVP(dvp);
661 	vp = NULLVP;
662 
663 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
664 	    strlen(cnp->cn_nameptr), RENAME);
665 	if (error)
666 		return (error);
667 
668 	if (vp != NULLVP) {
669 		if (udvp == vp)
670 			vrele(vp);
671 		else
672 			vput(vp);
673 	}
674 
675 	if (cn.cn_flags & HASBUF) {
676 		uma_zfree(namei_zone, cn.cn_pnbuf);
677 		cn.cn_flags &= ~HASBUF;
678 	}
679 
680 	if (!error) {
681 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
682 		cnp->cn_flags = cn.cn_flags;
683 	}
684 
685 	return (error);
686 
687 }
688 
689 /*
690  * Update the unionfs_node.
691  *
692  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
693  * uvp's lock and lower's lock will be unlocked.
694  */
695 static void
696 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
697 		    struct thread *td)
698 {
699 	unsigned	count, lockrec;
700 	struct vnode   *vp;
701 	struct vnode   *lvp;
702 	struct vnode   *dvp;
703 
704 	vp = UNIONFSTOV(unp);
705 	lvp = unp->un_lowervp;
706 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
707 	dvp = unp->un_dvp;
708 
709 	/*
710 	 * lock update
711 	 */
712 	VI_LOCK(vp);
713 	unp->un_uppervp = uvp;
714 	vp->v_vnlock = uvp->v_vnlock;
715 	VI_UNLOCK(vp);
716 	lockrec = lvp->v_vnlock->lk_recurse;
717 	for (count = 0; count < lockrec; count++)
718 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
719 
720 	/*
721 	 * cache update
722 	 */
723 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
724 		static struct unionfs_node_hashhead *hd;
725 
726 		VI_LOCK(dvp);
727 		hd = unionfs_get_hashhead(dvp, unp->un_path);
728 		LIST_REMOVE(unp, un_hash);
729 		LIST_INSERT_HEAD(hd, unp, un_hash);
730 		VI_UNLOCK(dvp);
731 	}
732 }
733 
734 /*
735  * Create a new shadow dir.
736  *
737  * udvp should be locked on entry and will be locked on return.
738  *
739  * If no error returned, unp will be updated.
740  */
741 int
742 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
743 		    struct unionfs_node *unp, struct componentname *cnp,
744 		    struct thread *td)
745 {
746 	int		error;
747 	struct vnode   *lvp;
748 	struct vnode   *uvp;
749 	struct vattr	va;
750 	struct vattr	lva;
751 	struct componentname cn;
752 	struct mount   *mp;
753 	struct ucred   *cred;
754 	struct ucred   *credbk;
755 	struct uidinfo *rootinfo;
756 
757 	if (unp->un_uppervp != NULLVP)
758 		return (EEXIST);
759 
760 	lvp = unp->un_lowervp;
761 	uvp = NULLVP;
762 	credbk = cnp->cn_cred;
763 
764 	/* Authority change to root */
765 	rootinfo = uifind((uid_t)0);
766 	cred = crdup(cnp->cn_cred);
767 	/*
768 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
769 	 * calling chgproccnt().
770 	 */
771 	chgproccnt(cred->cr_ruidinfo, 1, 0);
772 	change_euid(cred, rootinfo);
773 	change_ruid(cred, rootinfo);
774 	change_svuid(cred, (uid_t)0);
775 	uifree(rootinfo);
776 	cnp->cn_cred = cred;
777 
778 	memset(&cn, 0, sizeof(cn));
779 
780 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
781 		goto unionfs_mkshadowdir_abort;
782 
783 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
784 		goto unionfs_mkshadowdir_abort;
785 	if (uvp != NULLVP) {
786 		if (udvp == uvp)
787 			vrele(uvp);
788 		else
789 			vput(uvp);
790 
791 		error = EEXIST;
792 		goto unionfs_mkshadowdir_free_out;
793 	}
794 
795 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
796 		goto unionfs_mkshadowdir_free_out;
797 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
798 
799 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
800 
801 	if (!error) {
802 		unionfs_node_update(unp, uvp, td);
803 
804 		/*
805 		 * XXX The bug which cannot set uid/gid was corrected.
806 		 * Ignore errors.
807 		 */
808 		va.va_type = VNON;
809 		VOP_SETATTR(uvp, &va, cn.cn_cred);
810 	}
811 	vn_finished_write(mp);
812 
813 unionfs_mkshadowdir_free_out:
814 	if (cn.cn_flags & HASBUF) {
815 		uma_zfree(namei_zone, cn.cn_pnbuf);
816 		cn.cn_flags &= ~HASBUF;
817 	}
818 
819 unionfs_mkshadowdir_abort:
820 	cnp->cn_cred = credbk;
821 	chgproccnt(cred->cr_ruidinfo, -1, 0);
822 	crfree(cred);
823 
824 	return (error);
825 }
826 
827 /*
828  * Create a new whiteout.
829  *
830  * dvp should be locked on entry and will be locked on return.
831  */
832 int
833 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
834 		   struct thread *td, char *path)
835 {
836 	int		error;
837 	struct vnode   *wvp;
838 	struct componentname cn;
839 	struct mount   *mp;
840 
841 	if (path == NULL)
842 		path = cnp->cn_nameptr;
843 
844 	wvp = NULLVP;
845 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
846 		return (error);
847 	if (wvp != NULLVP) {
848 		if (cn.cn_flags & HASBUF) {
849 			uma_zfree(namei_zone, cn.cn_pnbuf);
850 			cn.cn_flags &= ~HASBUF;
851 		}
852 		if (dvp == wvp)
853 			vrele(wvp);
854 		else
855 			vput(wvp);
856 
857 		return (EEXIST);
858 	}
859 
860 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
861 		goto unionfs_mkwhiteout_free_out;
862 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
863 
864 	vn_finished_write(mp);
865 
866 unionfs_mkwhiteout_free_out:
867 	if (cn.cn_flags & HASBUF) {
868 		uma_zfree(namei_zone, cn.cn_pnbuf);
869 		cn.cn_flags &= ~HASBUF;
870 	}
871 
872 	return (error);
873 }
874 
875 /*
876  * Create a new vnode for create a new shadow file.
877  *
878  * If an error is returned, *vpp will be invalid, otherwise it will hold a
879  * locked, referenced and opened vnode.
880  *
881  * unp is never updated.
882  */
883 static int
884 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
885 			   struct unionfs_node *unp, struct vattr *uvap,
886 			   struct thread *td)
887 {
888 	struct unionfs_mount *ump;
889 	struct vnode   *vp;
890 	struct vnode   *lvp;
891 	struct ucred   *cred;
892 	struct vattr	lva;
893 	int		fmode;
894 	int		error;
895 	struct componentname cn;
896 
897 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
898 	vp = NULLVP;
899 	lvp = unp->un_lowervp;
900 	cred = td->td_ucred;
901 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
902 	error = 0;
903 
904 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
905 		return (error);
906 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
907 
908 	if (unp->un_path == NULL)
909 		panic("unionfs: un_path is null");
910 
911 	cn.cn_namelen = strlen(unp->un_path);
912 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
913 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
914 	cn.cn_nameiop = CREATE;
915 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
916 	cn.cn_lkflags = LK_EXCLUSIVE;
917 	cn.cn_thread = td;
918 	cn.cn_cred = cred;
919 	cn.cn_nameptr = cn.cn_pnbuf;
920 
921 	vref(udvp);
922 	if ((error = relookup(udvp, &vp, &cn)) != 0)
923 		goto unionfs_vn_create_on_upper_free_out2;
924 	vrele(udvp);
925 
926 	if (vp != NULLVP) {
927 		if (vp == udvp)
928 			vrele(vp);
929 		else
930 			vput(vp);
931 		error = EEXIST;
932 		goto unionfs_vn_create_on_upper_free_out1;
933 	}
934 
935 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
936 		goto unionfs_vn_create_on_upper_free_out1;
937 
938 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
939 		vput(vp);
940 		goto unionfs_vn_create_on_upper_free_out1;
941 	}
942 	VOP_ADD_WRITECOUNT(vp, 1);
943 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
944 	    vp->v_writecount);
945 	*vpp = vp;
946 
947 unionfs_vn_create_on_upper_free_out1:
948 	VOP_UNLOCK(udvp, LK_RELEASE);
949 
950 unionfs_vn_create_on_upper_free_out2:
951 	if (cn.cn_flags & HASBUF) {
952 		uma_zfree(namei_zone, cn.cn_pnbuf);
953 		cn.cn_flags &= ~HASBUF;
954 	}
955 
956 	return (error);
957 }
958 
959 /*
960  * Copy from lvp to uvp.
961  *
962  * lvp and uvp should be locked and opened on entry and will be locked and
963  * opened on return.
964  */
965 static int
966 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
967 		      struct ucred *cred, struct thread *td)
968 {
969 	int		error;
970 	off_t		offset;
971 	int		count;
972 	int		bufoffset;
973 	char           *buf;
974 	struct uio	uio;
975 	struct iovec	iov;
976 
977 	error = 0;
978 	memset(&uio, 0, sizeof(uio));
979 
980 	uio.uio_td = td;
981 	uio.uio_segflg = UIO_SYSSPACE;
982 	uio.uio_offset = 0;
983 
984 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
985 
986 	while (error == 0) {
987 		offset = uio.uio_offset;
988 
989 		uio.uio_iov = &iov;
990 		uio.uio_iovcnt = 1;
991 		iov.iov_base = buf;
992 		iov.iov_len = MAXBSIZE;
993 		uio.uio_resid = iov.iov_len;
994 		uio.uio_rw = UIO_READ;
995 
996 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
997 			break;
998 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
999 			break;
1000 
1001 		bufoffset = 0;
1002 		while (bufoffset < count) {
1003 			uio.uio_iov = &iov;
1004 			uio.uio_iovcnt = 1;
1005 			iov.iov_base = buf + bufoffset;
1006 			iov.iov_len = count - bufoffset;
1007 			uio.uio_offset = offset + bufoffset;
1008 			uio.uio_resid = iov.iov_len;
1009 			uio.uio_rw = UIO_WRITE;
1010 
1011 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1012 				break;
1013 
1014 			bufoffset += (count - bufoffset) - uio.uio_resid;
1015 		}
1016 
1017 		uio.uio_offset = offset + bufoffset;
1018 	}
1019 
1020 	free(buf, M_TEMP);
1021 
1022 	return (error);
1023 }
1024 
1025 /*
1026  * Copy file from lower to upper.
1027  *
1028  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1029  * docopy.
1030  *
1031  * If no error returned, unp will be updated.
1032  */
1033 int
1034 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1035 		 struct thread *td)
1036 {
1037 	int		error;
1038 	struct mount   *mp;
1039 	struct vnode   *udvp;
1040 	struct vnode   *lvp;
1041 	struct vnode   *uvp;
1042 	struct vattr	uva;
1043 
1044 	lvp = unp->un_lowervp;
1045 	uvp = NULLVP;
1046 
1047 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1048 		return (EROFS);
1049 	if (unp->un_dvp == NULLVP)
1050 		return (EINVAL);
1051 	if (unp->un_uppervp != NULLVP)
1052 		return (EEXIST);
1053 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1054 	if (udvp == NULLVP)
1055 		return (EROFS);
1056 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1057 		return (EROFS);
1058 
1059 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1060 	if (error != 0)
1061 		return (error);
1062 
1063 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1064 		return (error);
1065 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1066 	if (error != 0) {
1067 		vn_finished_write(mp);
1068 		return (error);
1069 	}
1070 
1071 	if (docopy != 0) {
1072 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1073 		if (error == 0) {
1074 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1075 			VOP_CLOSE(lvp, FREAD, cred, td);
1076 		}
1077 	}
1078 	VOP_CLOSE(uvp, FWRITE, cred, td);
1079 	VOP_ADD_WRITECOUNT(uvp, -1);
1080 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1081 	    uvp->v_writecount);
1082 
1083 	vn_finished_write(mp);
1084 
1085 	if (error == 0) {
1086 		/* Reset the attributes. Ignore errors. */
1087 		uva.va_type = VNON;
1088 		VOP_SETATTR(uvp, &uva, cred);
1089 	}
1090 
1091 	unionfs_node_update(unp, uvp, td);
1092 
1093 	return (error);
1094 }
1095 
1096 /*
1097  * It checks whether vp can rmdir. (check empty)
1098  *
1099  * vp is unionfs vnode.
1100  * vp should be locked.
1101  */
1102 int
1103 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1104 {
1105 	int		error;
1106 	int		eofflag;
1107 	int		lookuperr;
1108 	struct vnode   *uvp;
1109 	struct vnode   *lvp;
1110 	struct vnode   *tvp;
1111 	struct vattr	va;
1112 	struct componentname cn;
1113 	/*
1114 	 * The size of buf needs to be larger than DIRBLKSIZ.
1115 	 */
1116 	char		buf[256 * 6];
1117 	struct dirent  *dp;
1118 	struct dirent  *edp;
1119 	struct uio	uio;
1120 	struct iovec	iov;
1121 
1122 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1123 
1124 	eofflag = 0;
1125 	uvp = UNIONFSVPTOUPPERVP(vp);
1126 	lvp = UNIONFSVPTOLOWERVP(vp);
1127 
1128 	/* check opaque */
1129 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1130 		return (error);
1131 	if (va.va_flags & OPAQUE)
1132 		return (0);
1133 
1134 	/* open vnode */
1135 #ifdef MAC
1136 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1137 		return (error);
1138 #endif
1139 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1140 		return (error);
1141 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1142 		return (error);
1143 
1144 	uio.uio_rw = UIO_READ;
1145 	uio.uio_segflg = UIO_SYSSPACE;
1146 	uio.uio_td = td;
1147 	uio.uio_offset = 0;
1148 
1149 #ifdef MAC
1150 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1151 #endif
1152 	while (!error && !eofflag) {
1153 		iov.iov_base = buf;
1154 		iov.iov_len = sizeof(buf);
1155 		uio.uio_iov = &iov;
1156 		uio.uio_iovcnt = 1;
1157 		uio.uio_resid = iov.iov_len;
1158 
1159 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1160 		if (error != 0)
1161 			break;
1162 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1163 #ifdef DIAGNOSTIC
1164 			panic("bad readdir response from lower FS.");
1165 #endif
1166 			break;
1167 		}
1168 
1169 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1170 		for (dp = (struct dirent*)buf; !error && dp < edp;
1171 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1172 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1173 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1174 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1175 				continue;
1176 
1177 			cn.cn_namelen = dp->d_namlen;
1178 			cn.cn_pnbuf = NULL;
1179 			cn.cn_nameptr = dp->d_name;
1180 			cn.cn_nameiop = LOOKUP;
1181 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1182 			cn.cn_lkflags = LK_EXCLUSIVE;
1183 			cn.cn_thread = td;
1184 			cn.cn_cred = cred;
1185 
1186 			/*
1187 			 * check entry in lower.
1188 			 * Sometimes, readdir function returns
1189 			 * wrong entry.
1190 			 */
1191 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1192 
1193 			if (!lookuperr)
1194 				vput(tvp);
1195 			else
1196 				continue; /* skip entry */
1197 
1198 			/*
1199 			 * check entry
1200 			 * If it has no exist/whiteout entry in upper,
1201 			 * directory is not empty.
1202 			 */
1203 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1204 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1205 
1206 			if (!lookuperr)
1207 				vput(tvp);
1208 
1209 			/* ignore exist or whiteout entry */
1210 			if (!lookuperr ||
1211 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1212 				continue;
1213 
1214 			error = ENOTEMPTY;
1215 		}
1216 	}
1217 
1218 	/* close vnode */
1219 	VOP_CLOSE(vp, FREAD, cred, td);
1220 
1221 	return (error);
1222 }
1223 
1224 #ifdef DIAGNOSTIC
1225 
1226 struct vnode   *
1227 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1228 {
1229 	struct unionfs_node *unp;
1230 
1231 	unp = VTOUNIONFS(vp);
1232 
1233 #ifdef notyet
1234 	if (vp->v_op != unionfs_vnodeop_p) {
1235 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1236 #ifdef KDB
1237 		kdb_enter(KDB_WHY_UNIONFS,
1238 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1239 #endif
1240 		panic("unionfs_checkuppervp");
1241 	}
1242 #endif
1243 	return (unp->un_uppervp);
1244 }
1245 
1246 struct vnode   *
1247 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1248 {
1249 	struct unionfs_node *unp;
1250 
1251 	unp = VTOUNIONFS(vp);
1252 
1253 #ifdef notyet
1254 	if (vp->v_op != unionfs_vnodeop_p) {
1255 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1256 #ifdef KDB
1257 		kdb_enter(KDB_WHY_UNIONFS,
1258 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1259 #endif
1260 		panic("unionfs_checklowervp");
1261 	}
1262 #endif
1263 	return (unp->un_lowervp);
1264 }
1265 #endif
1266