xref: /freebsd/sys/fs/unionfs/union_subr.c (revision 6683132d54bd6d589889e43dabdc53d35e38a028)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1994 Jan-Simon Pendry
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
8  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
38  * $FreeBSD$
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/ktr.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/malloc.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/dirent.h>
53 #include <sys/fcntl.h>
54 #include <sys/filedesc.h>
55 #include <sys/stat.h>
56 #include <sys/resourcevar.h>
57 
58 #include <security/mac/mac_framework.h>
59 
60 #include <vm/uma.h>
61 
62 #include <fs/unionfs/union.h>
63 
64 #define NUNIONFSNODECACHE 16
65 
66 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
67 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
68 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
69 
70 /*
71  * Initialize
72  */
73 int
74 unionfs_init(struct vfsconf *vfsp)
75 {
76 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
77 	return (0);
78 }
79 
80 /*
81  * Uninitialize
82  */
83 int
84 unionfs_uninit(struct vfsconf *vfsp)
85 {
86 	return (0);
87 }
88 
89 static struct unionfs_node_hashhead *
90 unionfs_get_hashhead(struct vnode *dvp, char *path)
91 {
92 	int		count;
93 	char		hash;
94 	struct unionfs_node *unp;
95 
96 	hash = 0;
97 	unp = VTOUNIONFS(dvp);
98 	if (path != NULL) {
99 		for (count = 0; path[count]; count++)
100 			hash += path[count];
101 	}
102 
103 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
104 }
105 
106 /*
107  * Get the cached vnode.
108  */
109 static struct vnode *
110 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
111 			struct vnode *dvp, char *path)
112 {
113 	struct unionfs_node_hashhead *hd;
114 	struct unionfs_node *unp;
115 	struct vnode   *vp;
116 
117 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
118 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
119 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
120 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
121 
122 	VI_LOCK(dvp);
123 	hd = unionfs_get_hashhead(dvp, path);
124 	LIST_FOREACH(unp, hd, un_hash) {
125 		if (!strcmp(unp->un_path, path)) {
126 			vp = UNIONFSTOV(unp);
127 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
128 			VI_UNLOCK(dvp);
129 			vp->v_iflag &= ~VI_OWEINACT;
130 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
131 				VI_UNLOCK(vp);
132 				vp = NULLVP;
133 			} else
134 				VI_UNLOCK(vp);
135 			return (vp);
136 		}
137 	}
138 	VI_UNLOCK(dvp);
139 
140 	return (NULLVP);
141 }
142 
143 /*
144  * Add the new vnode into cache.
145  */
146 static struct vnode *
147 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
148 			struct vnode *dvp, char *path)
149 {
150 	struct unionfs_node_hashhead *hd;
151 	struct unionfs_node *unp;
152 	struct vnode   *vp;
153 
154 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
155 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
156 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
157 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
158 
159 	VI_LOCK(dvp);
160 	hd = unionfs_get_hashhead(dvp, path);
161 	LIST_FOREACH(unp, hd, un_hash) {
162 		if (!strcmp(unp->un_path, path)) {
163 			vp = UNIONFSTOV(unp);
164 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
165 			vp->v_iflag &= ~VI_OWEINACT;
166 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
167 				LIST_INSERT_HEAD(hd, uncp, un_hash);
168 				VI_UNLOCK(vp);
169 				vp = NULLVP;
170 			} else
171 				VI_UNLOCK(vp);
172 			VI_UNLOCK(dvp);
173 			return (vp);
174 		}
175 	}
176 
177 	LIST_INSERT_HEAD(hd, uncp, un_hash);
178 	VI_UNLOCK(dvp);
179 
180 	return (NULLVP);
181 }
182 
183 /*
184  * Remove the vnode.
185  */
186 static void
187 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
188 {
189 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
190 	KASSERT((dvp != NULLVP),
191 	    ("unionfs_rem_cached_vnode: null parent vnode"));
192 	KASSERT((unp->un_hash.le_prev != NULL),
193 	    ("unionfs_rem_cached_vnode: null hash"));
194 
195 	VI_LOCK(dvp);
196 	LIST_REMOVE(unp, un_hash);
197 	unp->un_hash.le_next = NULL;
198 	unp->un_hash.le_prev = NULL;
199 	VI_UNLOCK(dvp);
200 }
201 
202 /*
203  * Make a new or get existing unionfs node.
204  *
205  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
206  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
207  * you should not lock plurality simultaneously.
208  */
209 int
210 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
211 		struct vnode *lowervp, struct vnode *dvp,
212 		struct vnode **vpp, struct componentname *cnp,
213 		struct thread *td)
214 {
215 	struct unionfs_mount *ump;
216 	struct unionfs_node *unp;
217 	struct vnode   *vp;
218 	int		error;
219 	int		lkflags;
220 	enum vtype	vt;
221 	char	       *path;
222 
223 	ump = MOUNTTOUNIONFSMOUNT(mp);
224 	lkflags = (cnp ? cnp->cn_lkflags : 0);
225 	path = (cnp ? cnp->cn_nameptr : NULL);
226 	*vpp = NULLVP;
227 
228 	if (uppervp == NULLVP && lowervp == NULLVP)
229 		panic("unionfs_nodeget: upper and lower is null");
230 
231 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
232 
233 	/* If it has no ISLASTCN flag, path check is skipped. */
234 	if (cnp && !(cnp->cn_flags & ISLASTCN))
235 		path = NULL;
236 
237 	/* check the cache */
238 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
239 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
240 		if (vp != NULLVP) {
241 			vref(vp);
242 			*vpp = vp;
243 			goto unionfs_nodeget_out;
244 		}
245 	}
246 
247 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
248 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
249 		/* dvp will be NULLVP only in case of root vnode. */
250 		if (dvp == NULLVP)
251 			return (EINVAL);
252 	}
253 	unp = malloc(sizeof(struct unionfs_node),
254 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
255 
256 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
257 	if (error != 0) {
258 		free(unp, M_UNIONFSNODE);
259 		return (error);
260 	}
261 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
262 	if (error != 0) {
263 		free(unp, M_UNIONFSNODE);
264 		return (error);
265 	}
266 	if (dvp != NULLVP)
267 		vref(dvp);
268 	if (uppervp != NULLVP)
269 		vref(uppervp);
270 	if (lowervp != NULLVP)
271 		vref(lowervp);
272 
273 	if (vt == VDIR)
274 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
275 		    &(unp->un_hashmask));
276 
277 	unp->un_vnode = vp;
278 	unp->un_uppervp = uppervp;
279 	unp->un_lowervp = lowervp;
280 	unp->un_dvp = dvp;
281 	if (uppervp != NULLVP)
282 		vp->v_vnlock = uppervp->v_vnlock;
283 	else
284 		vp->v_vnlock = lowervp->v_vnlock;
285 
286 	if (path != NULL) {
287 		unp->un_path = (char *)
288 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
289 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
290 		unp->un_path[cnp->cn_namelen] = '\0';
291 	}
292 	vp->v_type = vt;
293 	vp->v_data = unp;
294 
295 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
296 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
297 		vp->v_vflag |= VV_ROOT;
298 
299 	if (path != NULL && dvp != NULLVP && vt == VDIR)
300 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
301 	if ((*vpp) != NULLVP) {
302 		if (dvp != NULLVP)
303 			vrele(dvp);
304 		if (uppervp != NULLVP)
305 			vrele(uppervp);
306 		if (lowervp != NULLVP)
307 			vrele(lowervp);
308 
309 		unp->un_uppervp = NULLVP;
310 		unp->un_lowervp = NULLVP;
311 		unp->un_dvp = NULLVP;
312 		vrele(vp);
313 		vp = *vpp;
314 		vref(vp);
315 	} else
316 		*vpp = vp;
317 
318 unionfs_nodeget_out:
319 	if (lkflags & LK_TYPE_MASK)
320 		vn_lock(vp, lkflags | LK_RETRY);
321 
322 	return (0);
323 }
324 
325 /*
326  * Clean up the unionfs node.
327  */
328 void
329 unionfs_noderem(struct vnode *vp, struct thread *td)
330 {
331 	int		count;
332 	struct unionfs_node *unp, *unp_t1, *unp_t2;
333 	struct unionfs_node_hashhead *hd;
334 	struct unionfs_node_status *unsp, *unsp_tmp;
335 	struct vnode   *lvp;
336 	struct vnode   *uvp;
337 	struct vnode   *dvp;
338 
339 	/*
340 	 * Use the interlock to protect the clearing of v_data to
341 	 * prevent faults in unionfs_lock().
342 	 */
343 	VI_LOCK(vp);
344 	unp = VTOUNIONFS(vp);
345 	lvp = unp->un_lowervp;
346 	uvp = unp->un_uppervp;
347 	dvp = unp->un_dvp;
348 	unp->un_lowervp = unp->un_uppervp = NULLVP;
349 	vp->v_vnlock = &(vp->v_lock);
350 	vp->v_data = NULL;
351 	vp->v_object = NULL;
352 	VI_UNLOCK(vp);
353 
354 	if (lvp != NULLVP)
355 		VOP_UNLOCK(lvp, LK_RELEASE);
356 	if (uvp != NULLVP)
357 		VOP_UNLOCK(uvp, LK_RELEASE);
358 
359 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
360 		unionfs_rem_cached_vnode(unp, dvp);
361 
362 	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
363 		panic("the lock for deletion is unacquirable.");
364 
365 	if (lvp != NULLVP)
366 		vrele(lvp);
367 	if (uvp != NULLVP)
368 		vrele(uvp);
369 	if (dvp != NULLVP) {
370 		vrele(dvp);
371 		unp->un_dvp = NULLVP;
372 	}
373 	if (unp->un_path != NULL) {
374 		free(unp->un_path, M_UNIONFSPATH);
375 		unp->un_path = NULL;
376 	}
377 
378 	if (unp->un_hashtbl != NULL) {
379 		for (count = 0; count <= unp->un_hashmask; count++) {
380 			hd = unp->un_hashtbl + count;
381 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
382 				LIST_REMOVE(unp_t1, un_hash);
383 				unp_t1->un_hash.le_next = NULL;
384 				unp_t1->un_hash.le_prev = NULL;
385 			}
386 		}
387 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
388 	}
389 
390 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
391 		LIST_REMOVE(unsp, uns_list);
392 		free(unsp, M_TEMP);
393 	}
394 	free(unp, M_UNIONFSNODE);
395 }
396 
397 /*
398  * Get the unionfs node status.
399  * You need exclusive lock this vnode.
400  */
401 void
402 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
403 			struct unionfs_node_status **unspp)
404 {
405 	struct unionfs_node_status *unsp;
406 	pid_t pid = td->td_proc->p_pid;
407 
408 	KASSERT(NULL != unspp, ("null pointer"));
409 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
410 
411 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
412 		if (unsp->uns_pid == pid) {
413 			*unspp = unsp;
414 			return;
415 		}
416 	}
417 
418 	/* create a new unionfs node status */
419 	unsp = malloc(sizeof(struct unionfs_node_status),
420 	    M_TEMP, M_WAITOK | M_ZERO);
421 
422 	unsp->uns_pid = pid;
423 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
424 
425 	*unspp = unsp;
426 }
427 
428 /*
429  * Remove the unionfs node status, if you can.
430  * You need exclusive lock this vnode.
431  */
432 void
433 unionfs_tryrem_node_status(struct unionfs_node *unp,
434 			   struct unionfs_node_status *unsp)
435 {
436 	KASSERT(NULL != unsp, ("null pointer"));
437 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
438 
439 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
440 		return;
441 
442 	LIST_REMOVE(unsp, uns_list);
443 	free(unsp, M_TEMP);
444 }
445 
446 /*
447  * Create upper node attr.
448  */
449 void
450 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
451 			       struct vattr *lva,
452 			       struct vattr *uva,
453 			       struct thread *td)
454 {
455 	VATTR_NULL(uva);
456 	uva->va_type = lva->va_type;
457 	uva->va_atime = lva->va_atime;
458 	uva->va_mtime = lva->va_mtime;
459 	uva->va_ctime = lva->va_ctime;
460 
461 	switch (ump->um_copymode) {
462 	case UNIONFS_TRANSPARENT:
463 		uva->va_mode = lva->va_mode;
464 		uva->va_uid = lva->va_uid;
465 		uva->va_gid = lva->va_gid;
466 		break;
467 	case UNIONFS_MASQUERADE:
468 		if (ump->um_uid == lva->va_uid) {
469 			uva->va_mode = lva->va_mode & 077077;
470 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
471 			uva->va_uid = lva->va_uid;
472 			uva->va_gid = lva->va_gid;
473 		} else {
474 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
475 			uva->va_uid = ump->um_uid;
476 			uva->va_gid = ump->um_gid;
477 		}
478 		break;
479 	default:		/* UNIONFS_TRADITIONAL */
480 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
481 		uva->va_uid = ump->um_uid;
482 		uva->va_gid = ump->um_gid;
483 		break;
484 	}
485 }
486 
487 /*
488  * Create upper node attr.
489  */
490 int
491 unionfs_create_uppervattr(struct unionfs_mount *ump,
492 			  struct vnode *lvp,
493 			  struct vattr *uva,
494 			  struct ucred *cred,
495 			  struct thread *td)
496 {
497 	int		error;
498 	struct vattr	lva;
499 
500 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
501 		return (error);
502 
503 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
504 
505 	return (error);
506 }
507 
508 /*
509  * relookup
510  *
511  * dvp should be locked on entry and will be locked on return.
512  *
513  * If an error is returned, *vpp will be invalid, otherwise it will hold a
514  * locked, referenced vnode. If *vpp == dvp then remember that only one
515  * LK_EXCLUSIVE lock is held.
516  */
517 int
518 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
519 		 struct componentname *cnp, struct componentname *cn,
520 		 struct thread *td, char *path, int pathlen, u_long nameiop)
521 {
522 	int	error;
523 
524 	cn->cn_namelen = pathlen;
525 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
526 	bcopy(path, cn->cn_pnbuf, pathlen);
527 	cn->cn_pnbuf[pathlen] = '\0';
528 
529 	cn->cn_nameiop = nameiop;
530 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
531 	cn->cn_lkflags = LK_EXCLUSIVE;
532 	cn->cn_thread = td;
533 	cn->cn_cred = cnp->cn_cred;
534 
535 	cn->cn_nameptr = cn->cn_pnbuf;
536 
537 	if (nameiop == DELETE)
538 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
539 	else if (RENAME == nameiop)
540 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
541 	else if (nameiop == CREATE)
542 		cn->cn_flags |= NOCACHE;
543 
544 	vref(dvp);
545 	VOP_UNLOCK(dvp, LK_RELEASE);
546 
547 	if ((error = relookup(dvp, vpp, cn))) {
548 		uma_zfree(namei_zone, cn->cn_pnbuf);
549 		cn->cn_flags &= ~HASBUF;
550 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
551 	} else
552 		vrele(dvp);
553 
554 	return (error);
555 }
556 
557 /*
558  * relookup for CREATE namei operation.
559  *
560  * dvp is unionfs vnode. dvp should be locked.
561  *
562  * If it called 'unionfs_copyfile' function by unionfs_link etc,
563  * VOP_LOOKUP information is broken.
564  * So it need relookup in order to create link etc.
565  */
566 int
567 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
568 			    struct thread *td)
569 {
570 	int	error;
571 	struct vnode *udvp;
572 	struct vnode *vp;
573 	struct componentname cn;
574 
575 	udvp = UNIONFSVPTOUPPERVP(dvp);
576 	vp = NULLVP;
577 
578 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
579 	    strlen(cnp->cn_nameptr), CREATE);
580 	if (error)
581 		return (error);
582 
583 	if (vp != NULLVP) {
584 		if (udvp == vp)
585 			vrele(vp);
586 		else
587 			vput(vp);
588 
589 		error = EEXIST;
590 	}
591 
592 	if (cn.cn_flags & HASBUF) {
593 		uma_zfree(namei_zone, cn.cn_pnbuf);
594 		cn.cn_flags &= ~HASBUF;
595 	}
596 
597 	if (!error) {
598 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
599 		cnp->cn_flags = cn.cn_flags;
600 	}
601 
602 	return (error);
603 }
604 
605 /*
606  * relookup for DELETE namei operation.
607  *
608  * dvp is unionfs vnode. dvp should be locked.
609  */
610 int
611 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
612 			    struct thread *td)
613 {
614 	int	error;
615 	struct vnode *udvp;
616 	struct vnode *vp;
617 	struct componentname cn;
618 
619 	udvp = UNIONFSVPTOUPPERVP(dvp);
620 	vp = NULLVP;
621 
622 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
623 	    strlen(cnp->cn_nameptr), DELETE);
624 	if (error)
625 		return (error);
626 
627 	if (vp == NULLVP)
628 		error = ENOENT;
629 	else {
630 		if (udvp == vp)
631 			vrele(vp);
632 		else
633 			vput(vp);
634 	}
635 
636 	if (cn.cn_flags & HASBUF) {
637 		uma_zfree(namei_zone, cn.cn_pnbuf);
638 		cn.cn_flags &= ~HASBUF;
639 	}
640 
641 	if (!error) {
642 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
643 		cnp->cn_flags = cn.cn_flags;
644 	}
645 
646 	return (error);
647 }
648 
649 /*
650  * relookup for RENAME namei operation.
651  *
652  * dvp is unionfs vnode. dvp should be locked.
653  */
654 int
655 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
656 			    struct thread *td)
657 {
658 	int error;
659 	struct vnode *udvp;
660 	struct vnode *vp;
661 	struct componentname cn;
662 
663 	udvp = UNIONFSVPTOUPPERVP(dvp);
664 	vp = NULLVP;
665 
666 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
667 	    strlen(cnp->cn_nameptr), RENAME);
668 	if (error)
669 		return (error);
670 
671 	if (vp != NULLVP) {
672 		if (udvp == vp)
673 			vrele(vp);
674 		else
675 			vput(vp);
676 	}
677 
678 	if (cn.cn_flags & HASBUF) {
679 		uma_zfree(namei_zone, cn.cn_pnbuf);
680 		cn.cn_flags &= ~HASBUF;
681 	}
682 
683 	if (!error) {
684 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
685 		cnp->cn_flags = cn.cn_flags;
686 	}
687 
688 	return (error);
689 
690 }
691 
692 /*
693  * Update the unionfs_node.
694  *
695  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
696  * uvp's lock and lower's lock will be unlocked.
697  */
698 static void
699 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
700 		    struct thread *td)
701 {
702 	unsigned	count, lockrec;
703 	struct vnode   *vp;
704 	struct vnode   *lvp;
705 	struct vnode   *dvp;
706 
707 	vp = UNIONFSTOV(unp);
708 	lvp = unp->un_lowervp;
709 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
710 	dvp = unp->un_dvp;
711 
712 	/*
713 	 * lock update
714 	 */
715 	VI_LOCK(vp);
716 	unp->un_uppervp = uvp;
717 	vp->v_vnlock = uvp->v_vnlock;
718 	VI_UNLOCK(vp);
719 	lockrec = lvp->v_vnlock->lk_recurse;
720 	for (count = 0; count < lockrec; count++)
721 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
722 
723 	/*
724 	 * cache update
725 	 */
726 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
727 		static struct unionfs_node_hashhead *hd;
728 
729 		VI_LOCK(dvp);
730 		hd = unionfs_get_hashhead(dvp, unp->un_path);
731 		LIST_REMOVE(unp, un_hash);
732 		LIST_INSERT_HEAD(hd, unp, un_hash);
733 		VI_UNLOCK(dvp);
734 	}
735 }
736 
737 /*
738  * Create a new shadow dir.
739  *
740  * udvp should be locked on entry and will be locked on return.
741  *
742  * If no error returned, unp will be updated.
743  */
744 int
745 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
746 		    struct unionfs_node *unp, struct componentname *cnp,
747 		    struct thread *td)
748 {
749 	int		error;
750 	struct vnode   *lvp;
751 	struct vnode   *uvp;
752 	struct vattr	va;
753 	struct vattr	lva;
754 	struct componentname cn;
755 	struct mount   *mp;
756 	struct ucred   *cred;
757 	struct ucred   *credbk;
758 	struct uidinfo *rootinfo;
759 
760 	if (unp->un_uppervp != NULLVP)
761 		return (EEXIST);
762 
763 	lvp = unp->un_lowervp;
764 	uvp = NULLVP;
765 	credbk = cnp->cn_cred;
766 
767 	/* Authority change to root */
768 	rootinfo = uifind((uid_t)0);
769 	cred = crdup(cnp->cn_cred);
770 	/*
771 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
772 	 * calling chgproccnt().
773 	 */
774 	chgproccnt(cred->cr_ruidinfo, 1, 0);
775 	change_euid(cred, rootinfo);
776 	change_ruid(cred, rootinfo);
777 	change_svuid(cred, (uid_t)0);
778 	uifree(rootinfo);
779 	cnp->cn_cred = cred;
780 
781 	memset(&cn, 0, sizeof(cn));
782 
783 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
784 		goto unionfs_mkshadowdir_abort;
785 
786 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
787 		goto unionfs_mkshadowdir_abort;
788 	if (uvp != NULLVP) {
789 		if (udvp == uvp)
790 			vrele(uvp);
791 		else
792 			vput(uvp);
793 
794 		error = EEXIST;
795 		goto unionfs_mkshadowdir_free_out;
796 	}
797 
798 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
799 		goto unionfs_mkshadowdir_free_out;
800 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
801 
802 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
803 
804 	if (!error) {
805 		unionfs_node_update(unp, uvp, td);
806 
807 		/*
808 		 * XXX The bug which cannot set uid/gid was corrected.
809 		 * Ignore errors.
810 		 */
811 		va.va_type = VNON;
812 		VOP_SETATTR(uvp, &va, cn.cn_cred);
813 	}
814 	vn_finished_write(mp);
815 
816 unionfs_mkshadowdir_free_out:
817 	if (cn.cn_flags & HASBUF) {
818 		uma_zfree(namei_zone, cn.cn_pnbuf);
819 		cn.cn_flags &= ~HASBUF;
820 	}
821 
822 unionfs_mkshadowdir_abort:
823 	cnp->cn_cred = credbk;
824 	chgproccnt(cred->cr_ruidinfo, -1, 0);
825 	crfree(cred);
826 
827 	return (error);
828 }
829 
830 /*
831  * Create a new whiteout.
832  *
833  * dvp should be locked on entry and will be locked on return.
834  */
835 int
836 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
837 		   struct thread *td, char *path)
838 {
839 	int		error;
840 	struct vnode   *wvp;
841 	struct componentname cn;
842 	struct mount   *mp;
843 
844 	if (path == NULL)
845 		path = cnp->cn_nameptr;
846 
847 	wvp = NULLVP;
848 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
849 		return (error);
850 	if (wvp != NULLVP) {
851 		if (cn.cn_flags & HASBUF) {
852 			uma_zfree(namei_zone, cn.cn_pnbuf);
853 			cn.cn_flags &= ~HASBUF;
854 		}
855 		if (dvp == wvp)
856 			vrele(wvp);
857 		else
858 			vput(wvp);
859 
860 		return (EEXIST);
861 	}
862 
863 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
864 		goto unionfs_mkwhiteout_free_out;
865 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
866 
867 	vn_finished_write(mp);
868 
869 unionfs_mkwhiteout_free_out:
870 	if (cn.cn_flags & HASBUF) {
871 		uma_zfree(namei_zone, cn.cn_pnbuf);
872 		cn.cn_flags &= ~HASBUF;
873 	}
874 
875 	return (error);
876 }
877 
878 /*
879  * Create a new vnode for create a new shadow file.
880  *
881  * If an error is returned, *vpp will be invalid, otherwise it will hold a
882  * locked, referenced and opened vnode.
883  *
884  * unp is never updated.
885  */
886 static int
887 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
888 			   struct unionfs_node *unp, struct vattr *uvap,
889 			   struct thread *td)
890 {
891 	struct unionfs_mount *ump;
892 	struct vnode   *vp;
893 	struct vnode   *lvp;
894 	struct ucred   *cred;
895 	struct vattr	lva;
896 	int		fmode;
897 	int		error;
898 	struct componentname cn;
899 
900 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
901 	vp = NULLVP;
902 	lvp = unp->un_lowervp;
903 	cred = td->td_ucred;
904 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
905 	error = 0;
906 
907 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
908 		return (error);
909 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
910 
911 	if (unp->un_path == NULL)
912 		panic("unionfs: un_path is null");
913 
914 	cn.cn_namelen = strlen(unp->un_path);
915 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
916 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
917 	cn.cn_nameiop = CREATE;
918 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
919 	cn.cn_lkflags = LK_EXCLUSIVE;
920 	cn.cn_thread = td;
921 	cn.cn_cred = cred;
922 	cn.cn_nameptr = cn.cn_pnbuf;
923 
924 	vref(udvp);
925 	if ((error = relookup(udvp, &vp, &cn)) != 0)
926 		goto unionfs_vn_create_on_upper_free_out2;
927 	vrele(udvp);
928 
929 	if (vp != NULLVP) {
930 		if (vp == udvp)
931 			vrele(vp);
932 		else
933 			vput(vp);
934 		error = EEXIST;
935 		goto unionfs_vn_create_on_upper_free_out1;
936 	}
937 
938 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
939 		goto unionfs_vn_create_on_upper_free_out1;
940 
941 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
942 		vput(vp);
943 		goto unionfs_vn_create_on_upper_free_out1;
944 	}
945 	error = VOP_ADD_WRITECOUNT(vp, 1);
946 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
947 	    vp->v_writecount);
948 	if (error == 0) {
949 		*vpp = vp;
950 	} else {
951 		VOP_CLOSE(vp, fmode, cred, td);
952 	}
953 
954 unionfs_vn_create_on_upper_free_out1:
955 	VOP_UNLOCK(udvp, LK_RELEASE);
956 
957 unionfs_vn_create_on_upper_free_out2:
958 	if (cn.cn_flags & HASBUF) {
959 		uma_zfree(namei_zone, cn.cn_pnbuf);
960 		cn.cn_flags &= ~HASBUF;
961 	}
962 
963 	return (error);
964 }
965 
966 /*
967  * Copy from lvp to uvp.
968  *
969  * lvp and uvp should be locked and opened on entry and will be locked and
970  * opened on return.
971  */
972 static int
973 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
974 		      struct ucred *cred, struct thread *td)
975 {
976 	int		error;
977 	off_t		offset;
978 	int		count;
979 	int		bufoffset;
980 	char           *buf;
981 	struct uio	uio;
982 	struct iovec	iov;
983 
984 	error = 0;
985 	memset(&uio, 0, sizeof(uio));
986 
987 	uio.uio_td = td;
988 	uio.uio_segflg = UIO_SYSSPACE;
989 	uio.uio_offset = 0;
990 
991 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
992 
993 	while (error == 0) {
994 		offset = uio.uio_offset;
995 
996 		uio.uio_iov = &iov;
997 		uio.uio_iovcnt = 1;
998 		iov.iov_base = buf;
999 		iov.iov_len = MAXBSIZE;
1000 		uio.uio_resid = iov.iov_len;
1001 		uio.uio_rw = UIO_READ;
1002 
1003 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1004 			break;
1005 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1006 			break;
1007 
1008 		bufoffset = 0;
1009 		while (bufoffset < count) {
1010 			uio.uio_iov = &iov;
1011 			uio.uio_iovcnt = 1;
1012 			iov.iov_base = buf + bufoffset;
1013 			iov.iov_len = count - bufoffset;
1014 			uio.uio_offset = offset + bufoffset;
1015 			uio.uio_resid = iov.iov_len;
1016 			uio.uio_rw = UIO_WRITE;
1017 
1018 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1019 				break;
1020 
1021 			bufoffset += (count - bufoffset) - uio.uio_resid;
1022 		}
1023 
1024 		uio.uio_offset = offset + bufoffset;
1025 	}
1026 
1027 	free(buf, M_TEMP);
1028 
1029 	return (error);
1030 }
1031 
1032 /*
1033  * Copy file from lower to upper.
1034  *
1035  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1036  * docopy.
1037  *
1038  * If no error returned, unp will be updated.
1039  */
1040 int
1041 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1042 		 struct thread *td)
1043 {
1044 	int		error;
1045 	struct mount   *mp;
1046 	struct vnode   *udvp;
1047 	struct vnode   *lvp;
1048 	struct vnode   *uvp;
1049 	struct vattr	uva;
1050 
1051 	lvp = unp->un_lowervp;
1052 	uvp = NULLVP;
1053 
1054 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1055 		return (EROFS);
1056 	if (unp->un_dvp == NULLVP)
1057 		return (EINVAL);
1058 	if (unp->un_uppervp != NULLVP)
1059 		return (EEXIST);
1060 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1061 	if (udvp == NULLVP)
1062 		return (EROFS);
1063 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1064 		return (EROFS);
1065 
1066 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1067 	if (error != 0)
1068 		return (error);
1069 
1070 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1071 		return (error);
1072 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1073 	if (error != 0) {
1074 		vn_finished_write(mp);
1075 		return (error);
1076 	}
1077 
1078 	if (docopy != 0) {
1079 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1080 		if (error == 0) {
1081 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1082 			VOP_CLOSE(lvp, FREAD, cred, td);
1083 		}
1084 	}
1085 	VOP_CLOSE(uvp, FWRITE, cred, td);
1086 	VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
1087 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1088 	    uvp->v_writecount);
1089 
1090 	vn_finished_write(mp);
1091 
1092 	if (error == 0) {
1093 		/* Reset the attributes. Ignore errors. */
1094 		uva.va_type = VNON;
1095 		VOP_SETATTR(uvp, &uva, cred);
1096 	}
1097 
1098 	unionfs_node_update(unp, uvp, td);
1099 
1100 	return (error);
1101 }
1102 
1103 /*
1104  * It checks whether vp can rmdir. (check empty)
1105  *
1106  * vp is unionfs vnode.
1107  * vp should be locked.
1108  */
1109 int
1110 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1111 {
1112 	int		error;
1113 	int		eofflag;
1114 	int		lookuperr;
1115 	struct vnode   *uvp;
1116 	struct vnode   *lvp;
1117 	struct vnode   *tvp;
1118 	struct vattr	va;
1119 	struct componentname cn;
1120 	/*
1121 	 * The size of buf needs to be larger than DIRBLKSIZ.
1122 	 */
1123 	char		buf[256 * 6];
1124 	struct dirent  *dp;
1125 	struct dirent  *edp;
1126 	struct uio	uio;
1127 	struct iovec	iov;
1128 
1129 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1130 
1131 	eofflag = 0;
1132 	uvp = UNIONFSVPTOUPPERVP(vp);
1133 	lvp = UNIONFSVPTOLOWERVP(vp);
1134 
1135 	/* check opaque */
1136 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1137 		return (error);
1138 	if (va.va_flags & OPAQUE)
1139 		return (0);
1140 
1141 	/* open vnode */
1142 #ifdef MAC
1143 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1144 		return (error);
1145 #endif
1146 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1147 		return (error);
1148 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1149 		return (error);
1150 
1151 	uio.uio_rw = UIO_READ;
1152 	uio.uio_segflg = UIO_SYSSPACE;
1153 	uio.uio_td = td;
1154 	uio.uio_offset = 0;
1155 
1156 #ifdef MAC
1157 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1158 #endif
1159 	while (!error && !eofflag) {
1160 		iov.iov_base = buf;
1161 		iov.iov_len = sizeof(buf);
1162 		uio.uio_iov = &iov;
1163 		uio.uio_iovcnt = 1;
1164 		uio.uio_resid = iov.iov_len;
1165 
1166 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1167 		if (error != 0)
1168 			break;
1169 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1170 #ifdef DIAGNOSTIC
1171 			panic("bad readdir response from lower FS.");
1172 #endif
1173 			break;
1174 		}
1175 
1176 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1177 		for (dp = (struct dirent*)buf; !error && dp < edp;
1178 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1179 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1180 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1181 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1182 				continue;
1183 
1184 			cn.cn_namelen = dp->d_namlen;
1185 			cn.cn_pnbuf = NULL;
1186 			cn.cn_nameptr = dp->d_name;
1187 			cn.cn_nameiop = LOOKUP;
1188 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1189 			cn.cn_lkflags = LK_EXCLUSIVE;
1190 			cn.cn_thread = td;
1191 			cn.cn_cred = cred;
1192 
1193 			/*
1194 			 * check entry in lower.
1195 			 * Sometimes, readdir function returns
1196 			 * wrong entry.
1197 			 */
1198 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1199 
1200 			if (!lookuperr)
1201 				vput(tvp);
1202 			else
1203 				continue; /* skip entry */
1204 
1205 			/*
1206 			 * check entry
1207 			 * If it has no exist/whiteout entry in upper,
1208 			 * directory is not empty.
1209 			 */
1210 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1211 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1212 
1213 			if (!lookuperr)
1214 				vput(tvp);
1215 
1216 			/* ignore exist or whiteout entry */
1217 			if (!lookuperr ||
1218 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1219 				continue;
1220 
1221 			error = ENOTEMPTY;
1222 		}
1223 	}
1224 
1225 	/* close vnode */
1226 	VOP_CLOSE(vp, FREAD, cred, td);
1227 
1228 	return (error);
1229 }
1230 
1231 #ifdef DIAGNOSTIC
1232 
1233 struct vnode   *
1234 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1235 {
1236 	struct unionfs_node *unp;
1237 
1238 	unp = VTOUNIONFS(vp);
1239 
1240 #ifdef notyet
1241 	if (vp->v_op != unionfs_vnodeop_p) {
1242 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1243 #ifdef KDB
1244 		kdb_enter(KDB_WHY_UNIONFS,
1245 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1246 #endif
1247 		panic("unionfs_checkuppervp");
1248 	}
1249 #endif
1250 	return (unp->un_uppervp);
1251 }
1252 
1253 struct vnode   *
1254 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1255 {
1256 	struct unionfs_node *unp;
1257 
1258 	unp = VTOUNIONFS(vp);
1259 
1260 #ifdef notyet
1261 	if (vp->v_op != unionfs_vnodeop_p) {
1262 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1263 #ifdef KDB
1264 		kdb_enter(KDB_WHY_UNIONFS,
1265 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1266 #endif
1267 		panic("unionfs_checklowervp");
1268 	}
1269 #endif
1270 	return (unp->un_lowervp);
1271 }
1272 #endif
1273