xref: /freebsd/sys/fs/unionfs/union_subr.c (revision 39beb93c3f8bdbf72a61fda42300b5ebed7390c8)
1 /*-
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6  * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jan-Simon Pendry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36  * $FreeBSD$
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/namei.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 #include <sys/dirent.h>
50 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/stat.h>
53 #include <sys/resourcevar.h>
54 
55 #ifdef MAC
56 #include <sys/mac.h>
57 #endif
58 
59 #include <vm/uma.h>
60 
61 #include <fs/unionfs/union.h>
62 
63 #define NUNIONFSNODECACHE 16
64 
65 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68 
69 /*
70  * Initialize
71  */
72 int
73 unionfs_init(struct vfsconf *vfsp)
74 {
75 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76 	return (0);
77 }
78 
79 /*
80  * Uninitialize
81  */
82 int
83 unionfs_uninit(struct vfsconf *vfsp)
84 {
85 	return (0);
86 }
87 
88 static struct unionfs_node_hashhead *
89 unionfs_get_hashhead(struct vnode *dvp, char *path)
90 {
91 	int		count;
92 	char		hash;
93 	struct unionfs_node *unp;
94 
95 	hash = 0;
96 	unp = VTOUNIONFS(dvp);
97 	if (path != NULL) {
98 		for (count = 0; path[count]; count++)
99 			hash += path[count];
100 	}
101 
102 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103 }
104 
105 /*
106  * Get the cached vnode.
107  */
108 static struct vnode *
109 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110 			struct vnode *dvp, char *path)
111 {
112 	struct unionfs_node_hashhead *hd;
113 	struct unionfs_node *unp;
114 	struct vnode   *vp;
115 
116 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
117 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
119 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
120 
121 	VI_LOCK(dvp);
122 	hd = unionfs_get_hashhead(dvp, path);
123 	LIST_FOREACH(unp, hd, un_hash) {
124 		if (!strcmp(unp->un_path, path)) {
125 			vp = UNIONFSTOV(unp);
126 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127 			VI_UNLOCK(dvp);
128 			vp->v_iflag &= ~VI_OWEINACT;
129 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130 				VI_UNLOCK(vp);
131 				vp = NULLVP;
132 			} else
133 				VI_UNLOCK(vp);
134 			return (vp);
135 		}
136 	}
137 	VI_UNLOCK(dvp);
138 
139 	return (NULLVP);
140 }
141 
142 /*
143  * Add the new vnode into cache.
144  */
145 static struct vnode *
146 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147 			struct vnode *dvp, char *path)
148 {
149 	struct unionfs_node_hashhead *hd;
150 	struct unionfs_node *unp;
151 	struct vnode   *vp;
152 
153 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
154 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
156 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
157 
158 	VI_LOCK(dvp);
159 	hd = unionfs_get_hashhead(dvp, path);
160 	LIST_FOREACH(unp, hd, un_hash) {
161 		if (!strcmp(unp->un_path, path)) {
162 			vp = UNIONFSTOV(unp);
163 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
164 			vp->v_iflag &= ~VI_OWEINACT;
165 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
166 				LIST_INSERT_HEAD(hd, uncp, un_hash);
167 				VI_UNLOCK(vp);
168 				vp = NULLVP;
169 			} else
170 				VI_UNLOCK(vp);
171 			VI_UNLOCK(dvp);
172 			return (vp);
173 		}
174 	}
175 
176 	LIST_INSERT_HEAD(hd, uncp, un_hash);
177 	VI_UNLOCK(dvp);
178 
179 	return (NULLVP);
180 }
181 
182 /*
183  * Remove the vnode.
184  */
185 static void
186 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
187 {
188 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
189 	KASSERT((dvp != NULLVP),
190 	    ("unionfs_rem_cached_vnode: null parent vnode"));
191 	KASSERT((unp->un_hash.le_prev != NULL),
192 	    ("unionfs_rem_cached_vnode: null hash"));
193 
194 	VI_LOCK(dvp);
195 	LIST_REMOVE(unp, un_hash);
196 	unp->un_hash.le_next = NULL;
197 	unp->un_hash.le_prev = NULL;
198 	VI_UNLOCK(dvp);
199 }
200 
201 /*
202  * Make a new or get existing unionfs node.
203  *
204  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
205  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
206  * you should not lock plurality simultaneously.
207  */
208 int
209 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
210 		struct vnode *lowervp, struct vnode *dvp,
211 		struct vnode **vpp, struct componentname *cnp,
212 		struct thread *td)
213 {
214 	struct unionfs_mount *ump;
215 	struct unionfs_node *unp;
216 	struct vnode   *vp;
217 	int		error;
218 	int		lkflags;
219 	enum vtype	vt;
220 	char	       *path;
221 
222 	ump = MOUNTTOUNIONFSMOUNT(mp);
223 	lkflags = (cnp ? cnp->cn_lkflags : 0);
224 	path = (cnp ? cnp->cn_nameptr : NULL);
225 	*vpp = NULLVP;
226 
227 	if (uppervp == NULLVP && lowervp == NULLVP)
228 		panic("unionfs_nodeget: upper and lower is null");
229 
230 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
231 
232 	/* If it has no ISLASTCN flag, path check is skipped. */
233 	if (cnp && !(cnp->cn_flags & ISLASTCN))
234 		path = NULL;
235 
236 	/* check the cache */
237 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
238 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
239 		if (vp != NULLVP) {
240 			vref(vp);
241 			*vpp = vp;
242 			goto unionfs_nodeget_out;
243 		}
244 	}
245 
246 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
247 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
248 		/* dvp will be NULLVP only in case of root vnode. */
249 		if (dvp == NULLVP)
250 			return (EINVAL);
251 	}
252 
253 	/*
254 	 * Do the MALLOC before the getnewvnode since doing so afterward
255 	 * might cause a bogus v_data pointer to get dereferenced elsewhere
256 	 * if MALLOC should block.
257 	 */
258 	unp = malloc(sizeof(struct unionfs_node),
259 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
260 
261 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
262 	if (error != 0) {
263 		free(unp, M_UNIONFSNODE);
264 		return (error);
265 	}
266 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
267 	if (error != 0) {
268 		free(unp, M_UNIONFSNODE);
269 		return (error);
270 	}
271 	if (dvp != NULLVP)
272 		vref(dvp);
273 	if (uppervp != NULLVP)
274 		vref(uppervp);
275 	if (lowervp != NULLVP)
276 		vref(lowervp);
277 
278 	if (vt == VDIR)
279 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
280 		    &(unp->un_hashmask));
281 
282 	unp->un_vnode = vp;
283 	unp->un_uppervp = uppervp;
284 	unp->un_lowervp = lowervp;
285 	unp->un_dvp = dvp;
286 	if (uppervp != NULLVP)
287 		vp->v_vnlock = uppervp->v_vnlock;
288 	else
289 		vp->v_vnlock = lowervp->v_vnlock;
290 
291 	if (path != NULL) {
292 		unp->un_path = (char *)
293 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
294 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
295 		unp->un_path[cnp->cn_namelen] = '\0';
296 	}
297 	vp->v_type = vt;
298 	vp->v_data = unp;
299 
300 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
301 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
302 		vp->v_vflag |= VV_ROOT;
303 
304 	if (path != NULL && dvp != NULLVP && vt == VDIR)
305 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
306 	if ((*vpp) != NULLVP) {
307 		if (dvp != NULLVP)
308 			vrele(dvp);
309 		if (uppervp != NULLVP)
310 			vrele(uppervp);
311 		if (lowervp != NULLVP)
312 			vrele(lowervp);
313 
314 		unp->un_uppervp = NULLVP;
315 		unp->un_lowervp = NULLVP;
316 		unp->un_dvp = NULLVP;
317 		vrele(vp);
318 		vp = *vpp;
319 		vref(vp);
320 	} else
321 		*vpp = vp;
322 
323 unionfs_nodeget_out:
324 	if (lkflags & LK_TYPE_MASK)
325 		vn_lock(vp, lkflags | LK_RETRY);
326 
327 	return (0);
328 }
329 
330 /*
331  * Clean up the unionfs node.
332  */
333 void
334 unionfs_noderem(struct vnode *vp, struct thread *td)
335 {
336 	int		vfslocked;
337 	int		count;
338 	struct unionfs_node *unp, *unp_t1, *unp_t2;
339 	struct unionfs_node_hashhead *hd;
340 	struct unionfs_node_status *unsp, *unsp_tmp;
341 	struct vnode   *lvp;
342 	struct vnode   *uvp;
343 	struct vnode   *dvp;
344 
345 	/*
346 	 * Use the interlock to protect the clearing of v_data to
347 	 * prevent faults in unionfs_lock().
348 	 */
349 	VI_LOCK(vp);
350 	unp = VTOUNIONFS(vp);
351 	lvp = unp->un_lowervp;
352 	uvp = unp->un_uppervp;
353 	dvp = unp->un_dvp;
354 	unp->un_lowervp = unp->un_uppervp = NULLVP;
355 
356 	vp->v_vnlock = &(vp->v_lock);
357 	vp->v_data = NULL;
358 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
359 	if (lvp != NULLVP)
360 		VOP_UNLOCK(lvp, 0);
361 	if (uvp != NULLVP)
362 		VOP_UNLOCK(uvp, 0);
363 	vp->v_object = NULL;
364 
365 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
366 		unionfs_rem_cached_vnode(unp, dvp);
367 
368 	if (lvp != NULLVP) {
369 		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
370 		vrele(lvp);
371 		VFS_UNLOCK_GIANT(vfslocked);
372 	}
373 	if (uvp != NULLVP) {
374 		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
375 		vrele(uvp);
376 		VFS_UNLOCK_GIANT(vfslocked);
377 	}
378 	if (dvp != NULLVP) {
379 		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
380 		vrele(dvp);
381 		VFS_UNLOCK_GIANT(vfslocked);
382 		unp->un_dvp = NULLVP;
383 	}
384 	if (unp->un_path != NULL) {
385 		free(unp->un_path, M_UNIONFSPATH);
386 		unp->un_path = NULL;
387 	}
388 
389 	if (unp->un_hashtbl != NULL) {
390 		for (count = 0; count <= unp->un_hashmask; count++) {
391 			hd = unp->un_hashtbl + count;
392 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
393 				LIST_REMOVE(unp_t1, un_hash);
394 				unp_t1->un_hash.le_next = NULL;
395 				unp_t1->un_hash.le_prev = NULL;
396 			}
397 		}
398 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
399 	}
400 
401 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
402 		LIST_REMOVE(unsp, uns_list);
403 		free(unsp, M_TEMP);
404 	}
405 	free(unp, M_UNIONFSNODE);
406 }
407 
408 /*
409  * Get the unionfs node status.
410  * You need exclusive lock this vnode.
411  */
412 void
413 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
414 			struct unionfs_node_status **unspp)
415 {
416 	struct unionfs_node_status *unsp;
417 	pid_t pid = td->td_proc->p_pid;
418 
419 	KASSERT(NULL != unspp, ("null pointer"));
420 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
421 
422 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
423 		if (unsp->uns_pid == pid) {
424 			*unspp = unsp;
425 			return;
426 		}
427 	}
428 
429 	/* create a new unionfs node status */
430 	unsp = malloc(sizeof(struct unionfs_node_status),
431 	    M_TEMP, M_WAITOK | M_ZERO);
432 
433 	unsp->uns_pid = pid;
434 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
435 
436 	*unspp = unsp;
437 }
438 
439 /*
440  * Remove the unionfs node status, if you can.
441  * You need exclusive lock this vnode.
442  */
443 void
444 unionfs_tryrem_node_status(struct unionfs_node *unp,
445 			   struct unionfs_node_status *unsp)
446 {
447 	KASSERT(NULL != unsp, ("null pointer"));
448 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
449 
450 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
451 		return;
452 
453 	LIST_REMOVE(unsp, uns_list);
454 	free(unsp, M_TEMP);
455 }
456 
457 /*
458  * Create upper node attr.
459  */
460 void
461 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
462 			       struct vattr *lva,
463 			       struct vattr *uva,
464 			       struct thread *td)
465 {
466 	VATTR_NULL(uva);
467 	uva->va_type = lva->va_type;
468 	uva->va_atime = lva->va_atime;
469 	uva->va_mtime = lva->va_mtime;
470 	uva->va_ctime = lva->va_ctime;
471 
472 	switch (ump->um_copymode) {
473 	case UNIONFS_TRANSPARENT:
474 		uva->va_mode = lva->va_mode;
475 		uva->va_uid = lva->va_uid;
476 		uva->va_gid = lva->va_gid;
477 		break;
478 	case UNIONFS_MASQUERADE:
479 		if (ump->um_uid == lva->va_uid) {
480 			uva->va_mode = lva->va_mode & 077077;
481 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
482 			uva->va_uid = lva->va_uid;
483 			uva->va_gid = lva->va_gid;
484 		} else {
485 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
486 			uva->va_uid = ump->um_uid;
487 			uva->va_gid = ump->um_gid;
488 		}
489 		break;
490 	default:		/* UNIONFS_TRADITIONAL */
491 		FILEDESC_SLOCK(td->td_proc->p_fd);
492 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
493 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
494 		uva->va_uid = ump->um_uid;
495 		uva->va_gid = ump->um_gid;
496 		break;
497 	}
498 }
499 
500 /*
501  * Create upper node attr.
502  */
503 int
504 unionfs_create_uppervattr(struct unionfs_mount *ump,
505 			  struct vnode *lvp,
506 			  struct vattr *uva,
507 			  struct ucred *cred,
508 			  struct thread *td)
509 {
510 	int		error;
511 	struct vattr	lva;
512 
513 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
514 		return (error);
515 
516 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
517 
518 	return (error);
519 }
520 
521 /*
522  * relookup
523  *
524  * dvp should be locked on entry and will be locked on return.
525  *
526  * If an error is returned, *vpp will be invalid, otherwise it will hold a
527  * locked, referenced vnode. If *vpp == dvp then remember that only one
528  * LK_EXCLUSIVE lock is held.
529  */
530 int
531 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
532 		 struct componentname *cnp, struct componentname *cn,
533 		 struct thread *td, char *path, int pathlen, u_long nameiop)
534 {
535 	int	error;
536 
537 	cn->cn_namelen = pathlen;
538 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
539 	bcopy(path, cn->cn_pnbuf, pathlen);
540 	cn->cn_pnbuf[pathlen] = '\0';
541 
542 	cn->cn_nameiop = nameiop;
543 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
544 	cn->cn_lkflags = LK_EXCLUSIVE;
545 	cn->cn_thread = td;
546 	cn->cn_cred = cnp->cn_cred;
547 
548 	cn->cn_nameptr = cn->cn_pnbuf;
549 	cn->cn_consume = cnp->cn_consume;
550 
551 	if (nameiop == DELETE)
552 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
553 	else if (RENAME == nameiop)
554 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
555 
556 	vref(dvp);
557 	VOP_UNLOCK(dvp, 0);
558 
559 	if ((error = relookup(dvp, vpp, cn))) {
560 		uma_zfree(namei_zone, cn->cn_pnbuf);
561 		cn->cn_flags &= ~HASBUF;
562 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
563 	} else
564 		vrele(dvp);
565 
566 	return (error);
567 }
568 
569 /*
570  * relookup for CREATE namei operation.
571  *
572  * dvp is unionfs vnode. dvp should be locked.
573  *
574  * If it called 'unionfs_copyfile' function by unionfs_link etc,
575  * VOP_LOOKUP information is broken.
576  * So it need relookup in order to create link etc.
577  */
578 int
579 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
580 			    struct thread *td)
581 {
582 	int	error;
583 	struct vnode *udvp;
584 	struct vnode *vp;
585 	struct componentname cn;
586 
587 	udvp = UNIONFSVPTOUPPERVP(dvp);
588 	vp = NULLVP;
589 
590 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
591 	    strlen(cnp->cn_nameptr), CREATE);
592 	if (error)
593 		return (error);
594 
595 	if (vp != NULLVP) {
596 		if (udvp == vp)
597 			vrele(vp);
598 		else
599 			vput(vp);
600 
601 		error = EEXIST;
602 	}
603 
604 	if (cn.cn_flags & HASBUF) {
605 		uma_zfree(namei_zone, cn.cn_pnbuf);
606 		cn.cn_flags &= ~HASBUF;
607 	}
608 
609 	if (!error) {
610 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
611 		cnp->cn_flags = cn.cn_flags;
612 	}
613 
614 	return (error);
615 }
616 
617 /*
618  * relookup for DELETE namei operation.
619  *
620  * dvp is unionfs vnode. dvp should be locked.
621  */
622 int
623 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
624 			    struct thread *td)
625 {
626 	int	error;
627 	struct vnode *udvp;
628 	struct vnode *vp;
629 	struct componentname cn;
630 
631 	udvp = UNIONFSVPTOUPPERVP(dvp);
632 	vp = NULLVP;
633 
634 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
635 	    strlen(cnp->cn_nameptr), DELETE);
636 	if (error)
637 		return (error);
638 
639 	if (vp == NULLVP)
640 		error = ENOENT;
641 	else {
642 		if (udvp == vp)
643 			vrele(vp);
644 		else
645 			vput(vp);
646 	}
647 
648 	if (cn.cn_flags & HASBUF) {
649 		uma_zfree(namei_zone, cn.cn_pnbuf);
650 		cn.cn_flags &= ~HASBUF;
651 	}
652 
653 	if (!error) {
654 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
655 		cnp->cn_flags = cn.cn_flags;
656 	}
657 
658 	return (error);
659 }
660 
661 /*
662  * relookup for RENAME namei operation.
663  *
664  * dvp is unionfs vnode. dvp should be locked.
665  */
666 int
667 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
668 			    struct thread *td)
669 {
670 	int error;
671 	struct vnode *udvp;
672 	struct vnode *vp;
673 	struct componentname cn;
674 
675 	udvp = UNIONFSVPTOUPPERVP(dvp);
676 	vp = NULLVP;
677 
678 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
679 	    strlen(cnp->cn_nameptr), RENAME);
680 	if (error)
681 		return (error);
682 
683 	if (vp != NULLVP) {
684 		if (udvp == vp)
685 			vrele(vp);
686 		else
687 			vput(vp);
688 	}
689 
690 	if (cn.cn_flags & HASBUF) {
691 		uma_zfree(namei_zone, cn.cn_pnbuf);
692 		cn.cn_flags &= ~HASBUF;
693 	}
694 
695 	if (!error) {
696 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
697 		cnp->cn_flags = cn.cn_flags;
698 	}
699 
700 	return (error);
701 
702 }
703 
704 /*
705  * Update the unionfs_node.
706  *
707  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
708  * uvp's lock and lower's lock will be unlocked.
709  */
710 static void
711 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
712 		    struct thread *td)
713 {
714 	unsigned	count, lockrec;
715 	struct vnode   *vp;
716 	struct vnode   *lvp;
717 	struct vnode   *dvp;
718 
719 	vp = UNIONFSTOV(unp);
720 	lvp = unp->un_lowervp;
721 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
722 	dvp = unp->un_dvp;
723 
724 	/*
725 	 * lock update
726 	 */
727 	VI_LOCK(vp);
728 	unp->un_uppervp = uvp;
729 	vp->v_vnlock = uvp->v_vnlock;
730 	VI_UNLOCK(vp);
731 	lockrec = lvp->v_vnlock->lk_recurse;
732 	for (count = 0; count < lockrec; count++)
733 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
734 
735 	/*
736 	 * cache update
737 	 */
738 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
739 		static struct unionfs_node_hashhead *hd;
740 
741 		VI_LOCK(dvp);
742 		hd = unionfs_get_hashhead(dvp, unp->un_path);
743 		LIST_REMOVE(unp, un_hash);
744 		LIST_INSERT_HEAD(hd, unp, un_hash);
745 		VI_UNLOCK(dvp);
746 	}
747 }
748 
749 /*
750  * Create a new shadow dir.
751  *
752  * udvp should be locked on entry and will be locked on return.
753  *
754  * If no error returned, unp will be updated.
755  */
756 int
757 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
758 		    struct unionfs_node *unp, struct componentname *cnp,
759 		    struct thread *td)
760 {
761 	int		error;
762 	struct vnode   *lvp;
763 	struct vnode   *uvp;
764 	struct vattr	va;
765 	struct vattr	lva;
766 	struct componentname cn;
767 	struct mount   *mp;
768 	struct ucred   *cred;
769 	struct ucred   *credbk;
770 	struct uidinfo *rootinfo;
771 
772 	if (unp->un_uppervp != NULLVP)
773 		return (EEXIST);
774 
775 	lvp = unp->un_lowervp;
776 	uvp = NULLVP;
777 	credbk = cnp->cn_cred;
778 
779 	/* Authority change to root */
780 	rootinfo = uifind((uid_t)0);
781 	cred = crdup(cnp->cn_cred);
782 	chgproccnt(cred->cr_ruidinfo, 1, 0);
783 	change_euid(cred, rootinfo);
784 	change_ruid(cred, rootinfo);
785 	change_svuid(cred, (uid_t)0);
786 	uifree(rootinfo);
787 	cnp->cn_cred = cred;
788 
789 	memset(&cn, 0, sizeof(cn));
790 
791 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
792 		goto unionfs_mkshadowdir_abort;
793 
794 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
795 		goto unionfs_mkshadowdir_abort;
796 	if (uvp != NULLVP) {
797 		if (udvp == uvp)
798 			vrele(uvp);
799 		else
800 			vput(uvp);
801 
802 		error = EEXIST;
803 		goto unionfs_mkshadowdir_free_out;
804 	}
805 
806 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
807 		goto unionfs_mkshadowdir_free_out;
808 	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
809 		vn_finished_write(mp);
810 		goto unionfs_mkshadowdir_free_out;
811 	}
812 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
813 
814 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
815 
816 	if (!error) {
817 		unionfs_node_update(unp, uvp, td);
818 
819 		/*
820 		 * XXX The bug which cannot set uid/gid was corrected.
821 		 * Ignore errors.
822 		 */
823 		va.va_type = VNON;
824 		VOP_SETATTR(uvp, &va, cn.cn_cred);
825 	}
826 	vn_finished_write(mp);
827 
828 unionfs_mkshadowdir_free_out:
829 	if (cn.cn_flags & HASBUF) {
830 		uma_zfree(namei_zone, cn.cn_pnbuf);
831 		cn.cn_flags &= ~HASBUF;
832 	}
833 
834 unionfs_mkshadowdir_abort:
835 	cnp->cn_cred = credbk;
836 	chgproccnt(cred->cr_ruidinfo, -1, 0);
837 	crfree(cred);
838 
839 	return (error);
840 }
841 
842 /*
843  * Create a new whiteout.
844  *
845  * dvp should be locked on entry and will be locked on return.
846  */
847 int
848 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
849 		   struct thread *td, char *path)
850 {
851 	int		error;
852 	struct vnode   *wvp;
853 	struct componentname cn;
854 	struct mount   *mp;
855 
856 	if (path == NULL)
857 		path = cnp->cn_nameptr;
858 
859 	wvp = NULLVP;
860 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
861 		return (error);
862 	if (wvp != NULLVP) {
863 		if (cn.cn_flags & HASBUF) {
864 			uma_zfree(namei_zone, cn.cn_pnbuf);
865 			cn.cn_flags &= ~HASBUF;
866 		}
867 		if (dvp == wvp)
868 			vrele(wvp);
869 		else
870 			vput(wvp);
871 
872 		return (EEXIST);
873 	}
874 
875 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
876 		goto unionfs_mkwhiteout_free_out;
877 	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
878 		error = VOP_WHITEOUT(dvp, &cn, CREATE);
879 
880 	vn_finished_write(mp);
881 
882 unionfs_mkwhiteout_free_out:
883 	if (cn.cn_flags & HASBUF) {
884 		uma_zfree(namei_zone, cn.cn_pnbuf);
885 		cn.cn_flags &= ~HASBUF;
886 	}
887 
888 	return (error);
889 }
890 
891 /*
892  * Create a new vnode for create a new shadow file.
893  *
894  * If an error is returned, *vpp will be invalid, otherwise it will hold a
895  * locked, referenced and opened vnode.
896  *
897  * unp is never updated.
898  */
899 static int
900 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
901 			   struct unionfs_node *unp, struct vattr *uvap,
902 			   struct thread *td)
903 {
904 	struct unionfs_mount *ump;
905 	struct vnode   *vp;
906 	struct vnode   *lvp;
907 	struct ucred   *cred;
908 	struct vattr	lva;
909 	int		fmode;
910 	int		error;
911 	struct componentname cn;
912 
913 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
914 	vp = NULLVP;
915 	lvp = unp->un_lowervp;
916 	cred = td->td_ucred;
917 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
918 	error = 0;
919 
920 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
921 		return (error);
922 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
923 
924 	if (unp->un_path == NULL)
925 		panic("unionfs: un_path is null");
926 
927 	cn.cn_namelen = strlen(unp->un_path);
928 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
929 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
930 	cn.cn_nameiop = CREATE;
931 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
932 	cn.cn_lkflags = LK_EXCLUSIVE;
933 	cn.cn_thread = td;
934 	cn.cn_cred = cred;
935 	cn.cn_nameptr = cn.cn_pnbuf;
936 	cn.cn_consume = 0;
937 
938 	vref(udvp);
939 	if ((error = relookup(udvp, &vp, &cn)) != 0)
940 		goto unionfs_vn_create_on_upper_free_out2;
941 	vrele(udvp);
942 
943 	if (vp != NULLVP) {
944 		if (vp == udvp)
945 			vrele(vp);
946 		else
947 			vput(vp);
948 		error = EEXIST;
949 		goto unionfs_vn_create_on_upper_free_out1;
950 	}
951 
952 	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
953 		goto unionfs_vn_create_on_upper_free_out1;
954 
955 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
956 		goto unionfs_vn_create_on_upper_free_out1;
957 
958 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
959 		vput(vp);
960 		goto unionfs_vn_create_on_upper_free_out1;
961 	}
962 	vp->v_writecount++;
963 	*vpp = vp;
964 
965 unionfs_vn_create_on_upper_free_out1:
966 	VOP_UNLOCK(udvp, 0);
967 
968 unionfs_vn_create_on_upper_free_out2:
969 	if (cn.cn_flags & HASBUF) {
970 		uma_zfree(namei_zone, cn.cn_pnbuf);
971 		cn.cn_flags &= ~HASBUF;
972 	}
973 
974 	return (error);
975 }
976 
977 /*
978  * Copy from lvp to uvp.
979  *
980  * lvp and uvp should be locked and opened on entry and will be locked and
981  * opened on return.
982  */
983 static int
984 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
985 		      struct ucred *cred, struct thread *td)
986 {
987 	int		error;
988 	off_t		offset;
989 	int		count;
990 	int		bufoffset;
991 	char           *buf;
992 	struct uio	uio;
993 	struct iovec	iov;
994 
995 	error = 0;
996 	memset(&uio, 0, sizeof(uio));
997 
998 	uio.uio_td = td;
999 	uio.uio_segflg = UIO_SYSSPACE;
1000 	uio.uio_offset = 0;
1001 
1002 	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
1003 		return (error);
1004 	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
1005 		return (error);
1006 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
1007 
1008 	while (error == 0) {
1009 		offset = uio.uio_offset;
1010 
1011 		uio.uio_iov = &iov;
1012 		uio.uio_iovcnt = 1;
1013 		iov.iov_base = buf;
1014 		iov.iov_len = MAXBSIZE;
1015 		uio.uio_resid = iov.iov_len;
1016 		uio.uio_rw = UIO_READ;
1017 
1018 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1019 			break;
1020 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1021 			break;
1022 
1023 		bufoffset = 0;
1024 		while (bufoffset < count) {
1025 			uio.uio_iov = &iov;
1026 			uio.uio_iovcnt = 1;
1027 			iov.iov_base = buf + bufoffset;
1028 			iov.iov_len = count - bufoffset;
1029 			uio.uio_offset = offset + bufoffset;
1030 			uio.uio_resid = iov.iov_len;
1031 			uio.uio_rw = UIO_WRITE;
1032 
1033 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1034 				break;
1035 
1036 			bufoffset += (count - bufoffset) - uio.uio_resid;
1037 		}
1038 
1039 		uio.uio_offset = offset + bufoffset;
1040 	}
1041 
1042 	free(buf, M_TEMP);
1043 
1044 	return (error);
1045 }
1046 
1047 /*
1048  * Copy file from lower to upper.
1049  *
1050  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1051  * docopy.
1052  *
1053  * If no error returned, unp will be updated.
1054  */
1055 int
1056 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1057 		 struct thread *td)
1058 {
1059 	int		error;
1060 	struct mount   *mp;
1061 	struct vnode   *udvp;
1062 	struct vnode   *lvp;
1063 	struct vnode   *uvp;
1064 	struct vattr	uva;
1065 
1066 	lvp = unp->un_lowervp;
1067 	uvp = NULLVP;
1068 
1069 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1070 		return (EROFS);
1071 	if (unp->un_dvp == NULLVP)
1072 		return (EINVAL);
1073 	if (unp->un_uppervp != NULLVP)
1074 		return (EEXIST);
1075 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1076 	if (udvp == NULLVP)
1077 		return (EROFS);
1078 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1079 		return (EROFS);
1080 
1081 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1082 	if (error != 0)
1083 		return (error);
1084 
1085 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1086 		return (error);
1087 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1088 	if (error != 0) {
1089 		vn_finished_write(mp);
1090 		return (error);
1091 	}
1092 
1093 	if (docopy != 0) {
1094 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1095 		if (error == 0) {
1096 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1097 			VOP_CLOSE(lvp, FREAD, cred, td);
1098 		}
1099 	}
1100 	VOP_CLOSE(uvp, FWRITE, cred, td);
1101 	uvp->v_writecount--;
1102 
1103 	vn_finished_write(mp);
1104 
1105 	if (error == 0) {
1106 		/* Reset the attributes. Ignore errors. */
1107 		uva.va_type = VNON;
1108 		VOP_SETATTR(uvp, &uva, cred);
1109 	}
1110 
1111 	unionfs_node_update(unp, uvp, td);
1112 
1113 	return (error);
1114 }
1115 
1116 /*
1117  * It checks whether vp can rmdir. (check empty)
1118  *
1119  * vp is unionfs vnode.
1120  * vp should be locked.
1121  */
1122 int
1123 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1124 {
1125 	int		error;
1126 	int		eofflag;
1127 	int		lookuperr;
1128 	struct vnode   *uvp;
1129 	struct vnode   *lvp;
1130 	struct vnode   *tvp;
1131 	struct vattr	va;
1132 	struct componentname cn;
1133 	/*
1134 	 * The size of buf needs to be larger than DIRBLKSIZ.
1135 	 */
1136 	char		buf[256 * 6];
1137 	struct dirent  *dp;
1138 	struct dirent  *edp;
1139 	struct uio	uio;
1140 	struct iovec	iov;
1141 
1142 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1143 
1144 	eofflag = 0;
1145 	uvp = UNIONFSVPTOUPPERVP(vp);
1146 	lvp = UNIONFSVPTOLOWERVP(vp);
1147 
1148 	/* check opaque */
1149 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1150 		return (error);
1151 	if (va.va_flags & OPAQUE)
1152 		return (0);
1153 
1154 	/* open vnode */
1155 #ifdef MAC
1156 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1157 		return (error);
1158 #endif
1159 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1160 		return (error);
1161 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1162 		return (error);
1163 
1164 	uio.uio_rw = UIO_READ;
1165 	uio.uio_segflg = UIO_SYSSPACE;
1166 	uio.uio_td = td;
1167 	uio.uio_offset = 0;
1168 
1169 #ifdef MAC
1170 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1171 #endif
1172 	while (!error && !eofflag) {
1173 		iov.iov_base = buf;
1174 		iov.iov_len = sizeof(buf);
1175 		uio.uio_iov = &iov;
1176 		uio.uio_iovcnt = 1;
1177 		uio.uio_resid = iov.iov_len;
1178 
1179 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1180 		if (error != 0)
1181 			break;
1182 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1183 #ifdef DIAGNOSTIC
1184 			panic("bad readdir response from lower FS.");
1185 #endif
1186 			break;
1187 		}
1188 
1189 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1190 		for (dp = (struct dirent*)buf; !error && dp < edp;
1191 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1192 			if (dp->d_type == DT_WHT ||
1193 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1194 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1195 				continue;
1196 
1197 			cn.cn_namelen = dp->d_namlen;
1198 			cn.cn_pnbuf = NULL;
1199 			cn.cn_nameptr = dp->d_name;
1200 			cn.cn_nameiop = LOOKUP;
1201 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1202 			cn.cn_lkflags = LK_EXCLUSIVE;
1203 			cn.cn_thread = td;
1204 			cn.cn_cred = cred;
1205 			cn.cn_consume = 0;
1206 
1207 			/*
1208 			 * check entry in lower.
1209 			 * Sometimes, readdir function returns
1210 			 * wrong entry.
1211 			 */
1212 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1213 
1214 			if (!lookuperr)
1215 				vput(tvp);
1216 			else
1217 				continue; /* skip entry */
1218 
1219 			/*
1220 			 * check entry
1221 			 * If it has no exist/whiteout entry in upper,
1222 			 * directory is not empty.
1223 			 */
1224 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1225 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1226 
1227 			if (!lookuperr)
1228 				vput(tvp);
1229 
1230 			/* ignore exist or whiteout entry */
1231 			if (!lookuperr ||
1232 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1233 				continue;
1234 
1235 			error = ENOTEMPTY;
1236 		}
1237 	}
1238 
1239 	/* close vnode */
1240 	VOP_CLOSE(vp, FREAD, cred, td);
1241 
1242 	return (error);
1243 }
1244 
1245 #ifdef DIAGNOSTIC
1246 
1247 struct vnode   *
1248 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1249 {
1250 	struct unionfs_node *unp;
1251 
1252 	unp = VTOUNIONFS(vp);
1253 
1254 #ifdef notyet
1255 	if (vp->v_op != unionfs_vnodeop_p) {
1256 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1257 #ifdef KDB
1258 		kdb_enter(KDB_WHY_UNIONFS,
1259 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1260 #endif
1261 		panic("unionfs_checkuppervp");
1262 	};
1263 #endif
1264 	return (unp->un_uppervp);
1265 }
1266 
1267 struct vnode   *
1268 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1269 {
1270 	struct unionfs_node *unp;
1271 
1272 	unp = VTOUNIONFS(vp);
1273 
1274 #ifdef notyet
1275 	if (vp->v_op != unionfs_vnodeop_p) {
1276 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1277 #ifdef KDB
1278 		kdb_enter(KDB_WHY_UNIONFS,
1279 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1280 #endif
1281 		panic("unionfs_checklowervp");
1282 	};
1283 #endif
1284 	return (unp->un_lowervp);
1285 }
1286 #endif
1287