xref: /freebsd/sys/fs/unionfs/union_subr.c (revision da5069e1f7daaef1e7157876d6044de6f3a08ce2)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1994 Jan-Simon Pendry
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
8  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
38  * $FreeBSD$
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/proc.h>
50 #include <sys/vnode.h>
51 #include <sys/dirent.h>
52 #include <sys/fcntl.h>
53 #include <sys/filedesc.h>
54 #include <sys/stat.h>
55 #include <sys/resourcevar.h>
56 
57 #include <security/mac/mac_framework.h>
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 #define NUNIONFSNODECACHE 16
64 
65 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68 
69 /*
70  * Initialize
71  */
72 int
73 unionfs_init(struct vfsconf *vfsp)
74 {
75 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76 	return (0);
77 }
78 
79 /*
80  * Uninitialize
81  */
82 int
83 unionfs_uninit(struct vfsconf *vfsp)
84 {
85 	return (0);
86 }
87 
88 static struct unionfs_node_hashhead *
89 unionfs_get_hashhead(struct vnode *dvp, char *path)
90 {
91 	int		count;
92 	char		hash;
93 	struct unionfs_node *unp;
94 
95 	hash = 0;
96 	unp = VTOUNIONFS(dvp);
97 	if (path != NULL) {
98 		for (count = 0; path[count]; count++)
99 			hash += path[count];
100 	}
101 
102 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103 }
104 
105 /*
106  * Get the cached vnode.
107  */
108 static struct vnode *
109 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110 			struct vnode *dvp, char *path)
111 {
112 	struct unionfs_node_hashhead *hd;
113 	struct unionfs_node *unp;
114 	struct vnode   *vp;
115 
116 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
117 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
119 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
120 
121 	VI_LOCK(dvp);
122 	hd = unionfs_get_hashhead(dvp, path);
123 	LIST_FOREACH(unp, hd, un_hash) {
124 		if (!strcmp(unp->un_path, path)) {
125 			vp = UNIONFSTOV(unp);
126 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127 			VI_UNLOCK(dvp);
128 			vp->v_iflag &= ~VI_OWEINACT;
129 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130 				VI_UNLOCK(vp);
131 				vp = NULLVP;
132 			} else
133 				VI_UNLOCK(vp);
134 			return (vp);
135 		}
136 	}
137 	VI_UNLOCK(dvp);
138 
139 	return (NULLVP);
140 }
141 
142 /*
143  * Add the new vnode into cache.
144  */
145 static struct vnode *
146 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147 			struct vnode *dvp, char *path)
148 {
149 	struct unionfs_node_hashhead *hd;
150 	struct unionfs_node *unp;
151 	struct vnode   *vp;
152 
153 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
154 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
156 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
157 
158 	VI_LOCK(dvp);
159 	hd = unionfs_get_hashhead(dvp, path);
160 	LIST_FOREACH(unp, hd, un_hash) {
161 		if (!strcmp(unp->un_path, path)) {
162 			vp = UNIONFSTOV(unp);
163 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
164 			vp->v_iflag &= ~VI_OWEINACT;
165 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
166 				LIST_INSERT_HEAD(hd, uncp, un_hash);
167 				VI_UNLOCK(vp);
168 				vp = NULLVP;
169 			} else
170 				VI_UNLOCK(vp);
171 			VI_UNLOCK(dvp);
172 			return (vp);
173 		}
174 	}
175 
176 	LIST_INSERT_HEAD(hd, uncp, un_hash);
177 	VI_UNLOCK(dvp);
178 
179 	return (NULLVP);
180 }
181 
182 /*
183  * Remove the vnode.
184  */
185 static void
186 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
187 {
188 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
189 	KASSERT((dvp != NULLVP),
190 	    ("unionfs_rem_cached_vnode: null parent vnode"));
191 	KASSERT((unp->un_hash.le_prev != NULL),
192 	    ("unionfs_rem_cached_vnode: null hash"));
193 
194 	VI_LOCK(dvp);
195 	LIST_REMOVE(unp, un_hash);
196 	unp->un_hash.le_next = NULL;
197 	unp->un_hash.le_prev = NULL;
198 	VI_UNLOCK(dvp);
199 }
200 
201 /*
202  * Make a new or get existing unionfs node.
203  *
204  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
205  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
206  * you should not lock plurality simultaneously.
207  */
208 int
209 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
210 		struct vnode *lowervp, struct vnode *dvp,
211 		struct vnode **vpp, struct componentname *cnp,
212 		struct thread *td)
213 {
214 	struct unionfs_mount *ump;
215 	struct unionfs_node *unp;
216 	struct vnode   *vp;
217 	int		error;
218 	int		lkflags;
219 	enum vtype	vt;
220 	char	       *path;
221 
222 	ump = MOUNTTOUNIONFSMOUNT(mp);
223 	lkflags = (cnp ? cnp->cn_lkflags : 0);
224 	path = (cnp ? cnp->cn_nameptr : NULL);
225 	*vpp = NULLVP;
226 
227 	if (uppervp == NULLVP && lowervp == NULLVP)
228 		panic("unionfs_nodeget: upper and lower is null");
229 
230 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
231 
232 	/* If it has no ISLASTCN flag, path check is skipped. */
233 	if (cnp && !(cnp->cn_flags & ISLASTCN))
234 		path = NULL;
235 
236 	/* check the cache */
237 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
238 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
239 		if (vp != NULLVP) {
240 			vref(vp);
241 			*vpp = vp;
242 			goto unionfs_nodeget_out;
243 		}
244 	}
245 
246 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
247 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
248 		/* dvp will be NULLVP only in case of root vnode. */
249 		if (dvp == NULLVP)
250 			return (EINVAL);
251 	}
252 	unp = malloc(sizeof(struct unionfs_node),
253 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
254 
255 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
256 	if (error != 0) {
257 		free(unp, M_UNIONFSNODE);
258 		return (error);
259 	}
260 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
261 	if (error != 0) {
262 		free(unp, M_UNIONFSNODE);
263 		return (error);
264 	}
265 	if (dvp != NULLVP)
266 		vref(dvp);
267 	if (uppervp != NULLVP)
268 		vref(uppervp);
269 	if (lowervp != NULLVP)
270 		vref(lowervp);
271 
272 	if (vt == VDIR)
273 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
274 		    &(unp->un_hashmask));
275 
276 	unp->un_vnode = vp;
277 	unp->un_uppervp = uppervp;
278 	unp->un_lowervp = lowervp;
279 	unp->un_dvp = dvp;
280 	if (uppervp != NULLVP)
281 		vp->v_vnlock = uppervp->v_vnlock;
282 	else
283 		vp->v_vnlock = lowervp->v_vnlock;
284 
285 	if (path != NULL) {
286 		unp->un_path = (char *)
287 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
288 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
289 		unp->un_path[cnp->cn_namelen] = '\0';
290 	}
291 	vp->v_type = vt;
292 	vp->v_data = unp;
293 
294 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
295 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
296 		vp->v_vflag |= VV_ROOT;
297 
298 	if (path != NULL && dvp != NULLVP && vt == VDIR)
299 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
300 	if ((*vpp) != NULLVP) {
301 		if (dvp != NULLVP)
302 			vrele(dvp);
303 		if (uppervp != NULLVP)
304 			vrele(uppervp);
305 		if (lowervp != NULLVP)
306 			vrele(lowervp);
307 
308 		unp->un_uppervp = NULLVP;
309 		unp->un_lowervp = NULLVP;
310 		unp->un_dvp = NULLVP;
311 		vrele(vp);
312 		vp = *vpp;
313 		vref(vp);
314 	} else
315 		*vpp = vp;
316 
317 unionfs_nodeget_out:
318 	if (lkflags & LK_TYPE_MASK)
319 		vn_lock(vp, lkflags | LK_RETRY);
320 
321 	return (0);
322 }
323 
324 /*
325  * Clean up the unionfs node.
326  */
327 void
328 unionfs_noderem(struct vnode *vp, struct thread *td)
329 {
330 	int		count;
331 	struct unionfs_node *unp, *unp_t1, *unp_t2;
332 	struct unionfs_node_hashhead *hd;
333 	struct unionfs_node_status *unsp, *unsp_tmp;
334 	struct vnode   *lvp;
335 	struct vnode   *uvp;
336 	struct vnode   *dvp;
337 
338 	/*
339 	 * Use the interlock to protect the clearing of v_data to
340 	 * prevent faults in unionfs_lock().
341 	 */
342 	VI_LOCK(vp);
343 	unp = VTOUNIONFS(vp);
344 	lvp = unp->un_lowervp;
345 	uvp = unp->un_uppervp;
346 	dvp = unp->un_dvp;
347 	unp->un_lowervp = unp->un_uppervp = NULLVP;
348 	vp->v_vnlock = &(vp->v_lock);
349 	vp->v_data = NULL;
350 	vp->v_object = NULL;
351 	VI_UNLOCK(vp);
352 
353 	if (lvp != NULLVP)
354 		VOP_UNLOCK(lvp, LK_RELEASE);
355 	if (uvp != NULLVP)
356 		VOP_UNLOCK(uvp, LK_RELEASE);
357 
358 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
359 		unionfs_rem_cached_vnode(unp, dvp);
360 
361 	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
362 		panic("the lock for deletion is unacquirable.");
363 
364 	if (lvp != NULLVP)
365 		vrele(lvp);
366 	if (uvp != NULLVP)
367 		vrele(uvp);
368 	if (dvp != NULLVP) {
369 		vrele(dvp);
370 		unp->un_dvp = NULLVP;
371 	}
372 	if (unp->un_path != NULL) {
373 		free(unp->un_path, M_UNIONFSPATH);
374 		unp->un_path = NULL;
375 	}
376 
377 	if (unp->un_hashtbl != NULL) {
378 		for (count = 0; count <= unp->un_hashmask; count++) {
379 			hd = unp->un_hashtbl + count;
380 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
381 				LIST_REMOVE(unp_t1, un_hash);
382 				unp_t1->un_hash.le_next = NULL;
383 				unp_t1->un_hash.le_prev = NULL;
384 			}
385 		}
386 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
387 	}
388 
389 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
390 		LIST_REMOVE(unsp, uns_list);
391 		free(unsp, M_TEMP);
392 	}
393 	free(unp, M_UNIONFSNODE);
394 }
395 
396 /*
397  * Get the unionfs node status.
398  * You need exclusive lock this vnode.
399  */
400 void
401 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
402 			struct unionfs_node_status **unspp)
403 {
404 	struct unionfs_node_status *unsp;
405 	pid_t pid = td->td_proc->p_pid;
406 
407 	KASSERT(NULL != unspp, ("null pointer"));
408 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
409 
410 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
411 		if (unsp->uns_pid == pid) {
412 			*unspp = unsp;
413 			return;
414 		}
415 	}
416 
417 	/* create a new unionfs node status */
418 	unsp = malloc(sizeof(struct unionfs_node_status),
419 	    M_TEMP, M_WAITOK | M_ZERO);
420 
421 	unsp->uns_pid = pid;
422 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
423 
424 	*unspp = unsp;
425 }
426 
427 /*
428  * Remove the unionfs node status, if you can.
429  * You need exclusive lock this vnode.
430  */
431 void
432 unionfs_tryrem_node_status(struct unionfs_node *unp,
433 			   struct unionfs_node_status *unsp)
434 {
435 	KASSERT(NULL != unsp, ("null pointer"));
436 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
437 
438 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
439 		return;
440 
441 	LIST_REMOVE(unsp, uns_list);
442 	free(unsp, M_TEMP);
443 }
444 
445 /*
446  * Create upper node attr.
447  */
448 void
449 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
450 			       struct vattr *lva,
451 			       struct vattr *uva,
452 			       struct thread *td)
453 {
454 	VATTR_NULL(uva);
455 	uva->va_type = lva->va_type;
456 	uva->va_atime = lva->va_atime;
457 	uva->va_mtime = lva->va_mtime;
458 	uva->va_ctime = lva->va_ctime;
459 
460 	switch (ump->um_copymode) {
461 	case UNIONFS_TRANSPARENT:
462 		uva->va_mode = lva->va_mode;
463 		uva->va_uid = lva->va_uid;
464 		uva->va_gid = lva->va_gid;
465 		break;
466 	case UNIONFS_MASQUERADE:
467 		if (ump->um_uid == lva->va_uid) {
468 			uva->va_mode = lva->va_mode & 077077;
469 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
470 			uva->va_uid = lva->va_uid;
471 			uva->va_gid = lva->va_gid;
472 		} else {
473 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
474 			uva->va_uid = ump->um_uid;
475 			uva->va_gid = ump->um_gid;
476 		}
477 		break;
478 	default:		/* UNIONFS_TRADITIONAL */
479 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
480 		uva->va_uid = ump->um_uid;
481 		uva->va_gid = ump->um_gid;
482 		break;
483 	}
484 }
485 
486 /*
487  * Create upper node attr.
488  */
489 int
490 unionfs_create_uppervattr(struct unionfs_mount *ump,
491 			  struct vnode *lvp,
492 			  struct vattr *uva,
493 			  struct ucred *cred,
494 			  struct thread *td)
495 {
496 	int		error;
497 	struct vattr	lva;
498 
499 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
500 		return (error);
501 
502 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
503 
504 	return (error);
505 }
506 
507 /*
508  * relookup
509  *
510  * dvp should be locked on entry and will be locked on return.
511  *
512  * If an error is returned, *vpp will be invalid, otherwise it will hold a
513  * locked, referenced vnode. If *vpp == dvp then remember that only one
514  * LK_EXCLUSIVE lock is held.
515  */
516 int
517 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
518 		 struct componentname *cnp, struct componentname *cn,
519 		 struct thread *td, char *path, int pathlen, u_long nameiop)
520 {
521 	int	error;
522 
523 	cn->cn_namelen = pathlen;
524 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
525 	bcopy(path, cn->cn_pnbuf, pathlen);
526 	cn->cn_pnbuf[pathlen] = '\0';
527 
528 	cn->cn_nameiop = nameiop;
529 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
530 	cn->cn_lkflags = LK_EXCLUSIVE;
531 	cn->cn_thread = td;
532 	cn->cn_cred = cnp->cn_cred;
533 
534 	cn->cn_nameptr = cn->cn_pnbuf;
535 
536 	if (nameiop == DELETE)
537 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
538 	else if (RENAME == nameiop)
539 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
540 	else if (nameiop == CREATE)
541 		cn->cn_flags |= NOCACHE;
542 
543 	vref(dvp);
544 	VOP_UNLOCK(dvp, LK_RELEASE);
545 
546 	if ((error = relookup(dvp, vpp, cn))) {
547 		uma_zfree(namei_zone, cn->cn_pnbuf);
548 		cn->cn_flags &= ~HASBUF;
549 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
550 	} else
551 		vrele(dvp);
552 
553 	return (error);
554 }
555 
556 /*
557  * relookup for CREATE namei operation.
558  *
559  * dvp is unionfs vnode. dvp should be locked.
560  *
561  * If it called 'unionfs_copyfile' function by unionfs_link etc,
562  * VOP_LOOKUP information is broken.
563  * So it need relookup in order to create link etc.
564  */
565 int
566 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
567 			    struct thread *td)
568 {
569 	int	error;
570 	struct vnode *udvp;
571 	struct vnode *vp;
572 	struct componentname cn;
573 
574 	udvp = UNIONFSVPTOUPPERVP(dvp);
575 	vp = NULLVP;
576 
577 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
578 	    strlen(cnp->cn_nameptr), CREATE);
579 	if (error)
580 		return (error);
581 
582 	if (vp != NULLVP) {
583 		if (udvp == vp)
584 			vrele(vp);
585 		else
586 			vput(vp);
587 
588 		error = EEXIST;
589 	}
590 
591 	if (cn.cn_flags & HASBUF) {
592 		uma_zfree(namei_zone, cn.cn_pnbuf);
593 		cn.cn_flags &= ~HASBUF;
594 	}
595 
596 	if (!error) {
597 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
598 		cnp->cn_flags = cn.cn_flags;
599 	}
600 
601 	return (error);
602 }
603 
604 /*
605  * relookup for DELETE namei operation.
606  *
607  * dvp is unionfs vnode. dvp should be locked.
608  */
609 int
610 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
611 			    struct thread *td)
612 {
613 	int	error;
614 	struct vnode *udvp;
615 	struct vnode *vp;
616 	struct componentname cn;
617 
618 	udvp = UNIONFSVPTOUPPERVP(dvp);
619 	vp = NULLVP;
620 
621 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
622 	    strlen(cnp->cn_nameptr), DELETE);
623 	if (error)
624 		return (error);
625 
626 	if (vp == NULLVP)
627 		error = ENOENT;
628 	else {
629 		if (udvp == vp)
630 			vrele(vp);
631 		else
632 			vput(vp);
633 	}
634 
635 	if (cn.cn_flags & HASBUF) {
636 		uma_zfree(namei_zone, cn.cn_pnbuf);
637 		cn.cn_flags &= ~HASBUF;
638 	}
639 
640 	if (!error) {
641 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
642 		cnp->cn_flags = cn.cn_flags;
643 	}
644 
645 	return (error);
646 }
647 
648 /*
649  * relookup for RENAME namei operation.
650  *
651  * dvp is unionfs vnode. dvp should be locked.
652  */
653 int
654 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
655 			    struct thread *td)
656 {
657 	int error;
658 	struct vnode *udvp;
659 	struct vnode *vp;
660 	struct componentname cn;
661 
662 	udvp = UNIONFSVPTOUPPERVP(dvp);
663 	vp = NULLVP;
664 
665 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
666 	    strlen(cnp->cn_nameptr), RENAME);
667 	if (error)
668 		return (error);
669 
670 	if (vp != NULLVP) {
671 		if (udvp == vp)
672 			vrele(vp);
673 		else
674 			vput(vp);
675 	}
676 
677 	if (cn.cn_flags & HASBUF) {
678 		uma_zfree(namei_zone, cn.cn_pnbuf);
679 		cn.cn_flags &= ~HASBUF;
680 	}
681 
682 	if (!error) {
683 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
684 		cnp->cn_flags = cn.cn_flags;
685 	}
686 
687 	return (error);
688 
689 }
690 
691 /*
692  * Update the unionfs_node.
693  *
694  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
695  * uvp's lock and lower's lock will be unlocked.
696  */
697 static void
698 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
699 		    struct thread *td)
700 {
701 	unsigned	count, lockrec;
702 	struct vnode   *vp;
703 	struct vnode   *lvp;
704 	struct vnode   *dvp;
705 
706 	vp = UNIONFSTOV(unp);
707 	lvp = unp->un_lowervp;
708 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
709 	dvp = unp->un_dvp;
710 
711 	/*
712 	 * lock update
713 	 */
714 	VI_LOCK(vp);
715 	unp->un_uppervp = uvp;
716 	vp->v_vnlock = uvp->v_vnlock;
717 	VI_UNLOCK(vp);
718 	lockrec = lvp->v_vnlock->lk_recurse;
719 	for (count = 0; count < lockrec; count++)
720 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
721 
722 	/*
723 	 * cache update
724 	 */
725 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
726 		static struct unionfs_node_hashhead *hd;
727 
728 		VI_LOCK(dvp);
729 		hd = unionfs_get_hashhead(dvp, unp->un_path);
730 		LIST_REMOVE(unp, un_hash);
731 		LIST_INSERT_HEAD(hd, unp, un_hash);
732 		VI_UNLOCK(dvp);
733 	}
734 }
735 
736 /*
737  * Create a new shadow dir.
738  *
739  * udvp should be locked on entry and will be locked on return.
740  *
741  * If no error returned, unp will be updated.
742  */
743 int
744 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
745 		    struct unionfs_node *unp, struct componentname *cnp,
746 		    struct thread *td)
747 {
748 	int		error;
749 	struct vnode   *lvp;
750 	struct vnode   *uvp;
751 	struct vattr	va;
752 	struct vattr	lva;
753 	struct componentname cn;
754 	struct mount   *mp;
755 	struct ucred   *cred;
756 	struct ucred   *credbk;
757 	struct uidinfo *rootinfo;
758 
759 	if (unp->un_uppervp != NULLVP)
760 		return (EEXIST);
761 
762 	lvp = unp->un_lowervp;
763 	uvp = NULLVP;
764 	credbk = cnp->cn_cred;
765 
766 	/* Authority change to root */
767 	rootinfo = uifind((uid_t)0);
768 	cred = crdup(cnp->cn_cred);
769 	/*
770 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
771 	 * calling chgproccnt().
772 	 */
773 	chgproccnt(cred->cr_ruidinfo, 1, 0);
774 	change_euid(cred, rootinfo);
775 	change_ruid(cred, rootinfo);
776 	change_svuid(cred, (uid_t)0);
777 	uifree(rootinfo);
778 	cnp->cn_cred = cred;
779 
780 	memset(&cn, 0, sizeof(cn));
781 
782 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
783 		goto unionfs_mkshadowdir_abort;
784 
785 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
786 		goto unionfs_mkshadowdir_abort;
787 	if (uvp != NULLVP) {
788 		if (udvp == uvp)
789 			vrele(uvp);
790 		else
791 			vput(uvp);
792 
793 		error = EEXIST;
794 		goto unionfs_mkshadowdir_free_out;
795 	}
796 
797 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
798 		goto unionfs_mkshadowdir_free_out;
799 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
800 
801 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
802 
803 	if (!error) {
804 		unionfs_node_update(unp, uvp, td);
805 
806 		/*
807 		 * XXX The bug which cannot set uid/gid was corrected.
808 		 * Ignore errors.
809 		 */
810 		va.va_type = VNON;
811 		VOP_SETATTR(uvp, &va, cn.cn_cred);
812 	}
813 	vn_finished_write(mp);
814 
815 unionfs_mkshadowdir_free_out:
816 	if (cn.cn_flags & HASBUF) {
817 		uma_zfree(namei_zone, cn.cn_pnbuf);
818 		cn.cn_flags &= ~HASBUF;
819 	}
820 
821 unionfs_mkshadowdir_abort:
822 	cnp->cn_cred = credbk;
823 	chgproccnt(cred->cr_ruidinfo, -1, 0);
824 	crfree(cred);
825 
826 	return (error);
827 }
828 
829 /*
830  * Create a new whiteout.
831  *
832  * dvp should be locked on entry and will be locked on return.
833  */
834 int
835 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
836 		   struct thread *td, char *path)
837 {
838 	int		error;
839 	struct vnode   *wvp;
840 	struct componentname cn;
841 	struct mount   *mp;
842 
843 	if (path == NULL)
844 		path = cnp->cn_nameptr;
845 
846 	wvp = NULLVP;
847 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
848 		return (error);
849 	if (wvp != NULLVP) {
850 		if (cn.cn_flags & HASBUF) {
851 			uma_zfree(namei_zone, cn.cn_pnbuf);
852 			cn.cn_flags &= ~HASBUF;
853 		}
854 		if (dvp == wvp)
855 			vrele(wvp);
856 		else
857 			vput(wvp);
858 
859 		return (EEXIST);
860 	}
861 
862 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
863 		goto unionfs_mkwhiteout_free_out;
864 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
865 
866 	vn_finished_write(mp);
867 
868 unionfs_mkwhiteout_free_out:
869 	if (cn.cn_flags & HASBUF) {
870 		uma_zfree(namei_zone, cn.cn_pnbuf);
871 		cn.cn_flags &= ~HASBUF;
872 	}
873 
874 	return (error);
875 }
876 
877 /*
878  * Create a new vnode for create a new shadow file.
879  *
880  * If an error is returned, *vpp will be invalid, otherwise it will hold a
881  * locked, referenced and opened vnode.
882  *
883  * unp is never updated.
884  */
885 static int
886 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
887 			   struct unionfs_node *unp, struct vattr *uvap,
888 			   struct thread *td)
889 {
890 	struct unionfs_mount *ump;
891 	struct vnode   *vp;
892 	struct vnode   *lvp;
893 	struct ucred   *cred;
894 	struct vattr	lva;
895 	int		fmode;
896 	int		error;
897 	struct componentname cn;
898 
899 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
900 	vp = NULLVP;
901 	lvp = unp->un_lowervp;
902 	cred = td->td_ucred;
903 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
904 	error = 0;
905 
906 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
907 		return (error);
908 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
909 
910 	if (unp->un_path == NULL)
911 		panic("unionfs: un_path is null");
912 
913 	cn.cn_namelen = strlen(unp->un_path);
914 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
915 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
916 	cn.cn_nameiop = CREATE;
917 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
918 	cn.cn_lkflags = LK_EXCLUSIVE;
919 	cn.cn_thread = td;
920 	cn.cn_cred = cred;
921 	cn.cn_nameptr = cn.cn_pnbuf;
922 
923 	vref(udvp);
924 	if ((error = relookup(udvp, &vp, &cn)) != 0)
925 		goto unionfs_vn_create_on_upper_free_out2;
926 	vrele(udvp);
927 
928 	if (vp != NULLVP) {
929 		if (vp == udvp)
930 			vrele(vp);
931 		else
932 			vput(vp);
933 		error = EEXIST;
934 		goto unionfs_vn_create_on_upper_free_out1;
935 	}
936 
937 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
938 		goto unionfs_vn_create_on_upper_free_out1;
939 
940 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
941 		vput(vp);
942 		goto unionfs_vn_create_on_upper_free_out1;
943 	}
944 	VOP_ADD_WRITECOUNT(vp, 1);
945 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
946 	    vp->v_writecount);
947 	*vpp = vp;
948 
949 unionfs_vn_create_on_upper_free_out1:
950 	VOP_UNLOCK(udvp, LK_RELEASE);
951 
952 unionfs_vn_create_on_upper_free_out2:
953 	if (cn.cn_flags & HASBUF) {
954 		uma_zfree(namei_zone, cn.cn_pnbuf);
955 		cn.cn_flags &= ~HASBUF;
956 	}
957 
958 	return (error);
959 }
960 
961 /*
962  * Copy from lvp to uvp.
963  *
964  * lvp and uvp should be locked and opened on entry and will be locked and
965  * opened on return.
966  */
967 static int
968 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
969 		      struct ucred *cred, struct thread *td)
970 {
971 	int		error;
972 	off_t		offset;
973 	int		count;
974 	int		bufoffset;
975 	char           *buf;
976 	struct uio	uio;
977 	struct iovec	iov;
978 
979 	error = 0;
980 	memset(&uio, 0, sizeof(uio));
981 
982 	uio.uio_td = td;
983 	uio.uio_segflg = UIO_SYSSPACE;
984 	uio.uio_offset = 0;
985 
986 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
987 
988 	while (error == 0) {
989 		offset = uio.uio_offset;
990 
991 		uio.uio_iov = &iov;
992 		uio.uio_iovcnt = 1;
993 		iov.iov_base = buf;
994 		iov.iov_len = MAXBSIZE;
995 		uio.uio_resid = iov.iov_len;
996 		uio.uio_rw = UIO_READ;
997 
998 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
999 			break;
1000 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1001 			break;
1002 
1003 		bufoffset = 0;
1004 		while (bufoffset < count) {
1005 			uio.uio_iov = &iov;
1006 			uio.uio_iovcnt = 1;
1007 			iov.iov_base = buf + bufoffset;
1008 			iov.iov_len = count - bufoffset;
1009 			uio.uio_offset = offset + bufoffset;
1010 			uio.uio_resid = iov.iov_len;
1011 			uio.uio_rw = UIO_WRITE;
1012 
1013 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1014 				break;
1015 
1016 			bufoffset += (count - bufoffset) - uio.uio_resid;
1017 		}
1018 
1019 		uio.uio_offset = offset + bufoffset;
1020 	}
1021 
1022 	free(buf, M_TEMP);
1023 
1024 	return (error);
1025 }
1026 
1027 /*
1028  * Copy file from lower to upper.
1029  *
1030  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1031  * docopy.
1032  *
1033  * If no error returned, unp will be updated.
1034  */
1035 int
1036 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1037 		 struct thread *td)
1038 {
1039 	int		error;
1040 	struct mount   *mp;
1041 	struct vnode   *udvp;
1042 	struct vnode   *lvp;
1043 	struct vnode   *uvp;
1044 	struct vattr	uva;
1045 
1046 	lvp = unp->un_lowervp;
1047 	uvp = NULLVP;
1048 
1049 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1050 		return (EROFS);
1051 	if (unp->un_dvp == NULLVP)
1052 		return (EINVAL);
1053 	if (unp->un_uppervp != NULLVP)
1054 		return (EEXIST);
1055 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1056 	if (udvp == NULLVP)
1057 		return (EROFS);
1058 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1059 		return (EROFS);
1060 
1061 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1062 	if (error != 0)
1063 		return (error);
1064 
1065 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1066 		return (error);
1067 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1068 	if (error != 0) {
1069 		vn_finished_write(mp);
1070 		return (error);
1071 	}
1072 
1073 	if (docopy != 0) {
1074 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1075 		if (error == 0) {
1076 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1077 			VOP_CLOSE(lvp, FREAD, cred, td);
1078 		}
1079 	}
1080 	VOP_CLOSE(uvp, FWRITE, cred, td);
1081 	VOP_ADD_WRITECOUNT(uvp, -1);
1082 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1083 	    uvp->v_writecount);
1084 
1085 	vn_finished_write(mp);
1086 
1087 	if (error == 0) {
1088 		/* Reset the attributes. Ignore errors. */
1089 		uva.va_type = VNON;
1090 		VOP_SETATTR(uvp, &uva, cred);
1091 	}
1092 
1093 	unionfs_node_update(unp, uvp, td);
1094 
1095 	return (error);
1096 }
1097 
1098 /*
1099  * It checks whether vp can rmdir. (check empty)
1100  *
1101  * vp is unionfs vnode.
1102  * vp should be locked.
1103  */
1104 int
1105 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1106 {
1107 	int		error;
1108 	int		eofflag;
1109 	int		lookuperr;
1110 	struct vnode   *uvp;
1111 	struct vnode   *lvp;
1112 	struct vnode   *tvp;
1113 	struct vattr	va;
1114 	struct componentname cn;
1115 	/*
1116 	 * The size of buf needs to be larger than DIRBLKSIZ.
1117 	 */
1118 	char		buf[256 * 6];
1119 	struct dirent  *dp;
1120 	struct dirent  *edp;
1121 	struct uio	uio;
1122 	struct iovec	iov;
1123 
1124 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1125 
1126 	eofflag = 0;
1127 	uvp = UNIONFSVPTOUPPERVP(vp);
1128 	lvp = UNIONFSVPTOLOWERVP(vp);
1129 
1130 	/* check opaque */
1131 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1132 		return (error);
1133 	if (va.va_flags & OPAQUE)
1134 		return (0);
1135 
1136 	/* open vnode */
1137 #ifdef MAC
1138 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1139 		return (error);
1140 #endif
1141 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1142 		return (error);
1143 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1144 		return (error);
1145 
1146 	uio.uio_rw = UIO_READ;
1147 	uio.uio_segflg = UIO_SYSSPACE;
1148 	uio.uio_td = td;
1149 	uio.uio_offset = 0;
1150 
1151 #ifdef MAC
1152 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1153 #endif
1154 	while (!error && !eofflag) {
1155 		iov.iov_base = buf;
1156 		iov.iov_len = sizeof(buf);
1157 		uio.uio_iov = &iov;
1158 		uio.uio_iovcnt = 1;
1159 		uio.uio_resid = iov.iov_len;
1160 
1161 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1162 		if (error != 0)
1163 			break;
1164 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1165 #ifdef DIAGNOSTIC
1166 			panic("bad readdir response from lower FS.");
1167 #endif
1168 			break;
1169 		}
1170 
1171 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1172 		for (dp = (struct dirent*)buf; !error && dp < edp;
1173 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1174 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1175 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1176 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1177 				continue;
1178 
1179 			cn.cn_namelen = dp->d_namlen;
1180 			cn.cn_pnbuf = NULL;
1181 			cn.cn_nameptr = dp->d_name;
1182 			cn.cn_nameiop = LOOKUP;
1183 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1184 			cn.cn_lkflags = LK_EXCLUSIVE;
1185 			cn.cn_thread = td;
1186 			cn.cn_cred = cred;
1187 
1188 			/*
1189 			 * check entry in lower.
1190 			 * Sometimes, readdir function returns
1191 			 * wrong entry.
1192 			 */
1193 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1194 
1195 			if (!lookuperr)
1196 				vput(tvp);
1197 			else
1198 				continue; /* skip entry */
1199 
1200 			/*
1201 			 * check entry
1202 			 * If it has no exist/whiteout entry in upper,
1203 			 * directory is not empty.
1204 			 */
1205 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1206 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1207 
1208 			if (!lookuperr)
1209 				vput(tvp);
1210 
1211 			/* ignore exist or whiteout entry */
1212 			if (!lookuperr ||
1213 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1214 				continue;
1215 
1216 			error = ENOTEMPTY;
1217 		}
1218 	}
1219 
1220 	/* close vnode */
1221 	VOP_CLOSE(vp, FREAD, cred, td);
1222 
1223 	return (error);
1224 }
1225 
1226 #ifdef DIAGNOSTIC
1227 
1228 struct vnode   *
1229 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1230 {
1231 	struct unionfs_node *unp;
1232 
1233 	unp = VTOUNIONFS(vp);
1234 
1235 #ifdef notyet
1236 	if (vp->v_op != unionfs_vnodeop_p) {
1237 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1238 #ifdef KDB
1239 		kdb_enter(KDB_WHY_UNIONFS,
1240 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1241 #endif
1242 		panic("unionfs_checkuppervp");
1243 	}
1244 #endif
1245 	return (unp->un_uppervp);
1246 }
1247 
1248 struct vnode   *
1249 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1250 {
1251 	struct unionfs_node *unp;
1252 
1253 	unp = VTOUNIONFS(vp);
1254 
1255 #ifdef notyet
1256 	if (vp->v_op != unionfs_vnodeop_p) {
1257 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1258 #ifdef KDB
1259 		kdb_enter(KDB_WHY_UNIONFS,
1260 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1261 #endif
1262 		panic("unionfs_checklowervp");
1263 	}
1264 #endif
1265 	return (unp->un_lowervp);
1266 }
1267 #endif
1268