xref: /freebsd/sys/fs/tmpfs/tmpfs_vnops.c (revision 61898cde69374d5a9994e2074605bc4101aff72d)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5  *
6  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/dirent.h>
44 #include <sys/fcntl.h>
45 #include <sys/limits.h>
46 #include <sys/lockf.h>
47 #include <sys/lock.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/rwlock.h>
53 #include <sys/sched.h>
54 #include <sys/stat.h>
55 #include <sys/sysctl.h>
56 #include <sys/unistd.h>
57 #include <sys/vnode.h>
58 #include <sys/smr.h>
59 
60 #include <vm/vm.h>
61 #include <vm/vm_param.h>
62 #include <vm/vm_object.h>
63 
64 #include <fs/tmpfs/tmpfs_vnops.h>
65 #include <fs/tmpfs/tmpfs.h>
66 
67 SYSCTL_DECL(_vfs_tmpfs);
68 VFS_SMR_DECLARE;
69 
70 static volatile int tmpfs_rename_restarts;
71 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
72     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
73     "Times rename had to restart due to lock contention");
74 
75 static int
76 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
77     struct vnode **rvp)
78 {
79 
80 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
81 }
82 
83 static int
84 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
85 {
86 	struct tmpfs_dirent *de;
87 	struct tmpfs_node *dnode, *pnode;
88 	struct tmpfs_mount *tm;
89 	int error;
90 
91 	dnode = VP_TO_TMPFS_DIR(dvp);
92 	*vpp = NULLVP;
93 
94 	/* Check accessibility of requested node as a first step. */
95 	error = vn_dir_check_exec(dvp, cnp);
96 	if (error != 0)
97 		goto out;
98 
99 	/* We cannot be requesting the parent directory of the root node. */
100 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
101 	    dnode->tn_dir.tn_parent == dnode,
102 	    !(cnp->cn_flags & ISDOTDOT)));
103 
104 	TMPFS_ASSERT_LOCKED(dnode);
105 	if (dnode->tn_dir.tn_parent == NULL) {
106 		error = ENOENT;
107 		goto out;
108 	}
109 	if (cnp->cn_flags & ISDOTDOT) {
110 		tm = VFS_TO_TMPFS(dvp->v_mount);
111 		pnode = dnode->tn_dir.tn_parent;
112 		tmpfs_ref_node(pnode);
113 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
114 		    pnode, cnp->cn_lkflags, vpp);
115 		tmpfs_free_node(tm, pnode);
116 		if (error != 0)
117 			goto out;
118 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
119 		VREF(dvp);
120 		*vpp = dvp;
121 		error = 0;
122 	} else {
123 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
124 		if (de != NULL && de->td_node == NULL)
125 			cnp->cn_flags |= ISWHITEOUT;
126 		if (de == NULL || de->td_node == NULL) {
127 			/*
128 			 * The entry was not found in the directory.
129 			 * This is OK if we are creating or renaming an
130 			 * entry and are working on the last component of
131 			 * the path name.
132 			 */
133 			if ((cnp->cn_flags & ISLASTCN) &&
134 			    (cnp->cn_nameiop == CREATE || \
135 			    cnp->cn_nameiop == RENAME ||
136 			    (cnp->cn_nameiop == DELETE &&
137 			    cnp->cn_flags & DOWHITEOUT &&
138 			    cnp->cn_flags & ISWHITEOUT))) {
139 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
140 				    cnp->cn_thread);
141 				if (error != 0)
142 					goto out;
143 
144 				/*
145 				 * Keep the component name in the buffer for
146 				 * future uses.
147 				 */
148 				cnp->cn_flags |= SAVENAME;
149 
150 				error = EJUSTRETURN;
151 			} else
152 				error = ENOENT;
153 		} else {
154 			struct tmpfs_node *tnode;
155 
156 			/*
157 			 * The entry was found, so get its associated
158 			 * tmpfs_node.
159 			 */
160 			tnode = de->td_node;
161 
162 			/*
163 			 * If we are not at the last path component and
164 			 * found a non-directory or non-link entry (which
165 			 * may itself be pointing to a directory), raise
166 			 * an error.
167 			 */
168 			if ((tnode->tn_type != VDIR &&
169 			    tnode->tn_type != VLNK) &&
170 			    !(cnp->cn_flags & ISLASTCN)) {
171 				error = ENOTDIR;
172 				goto out;
173 			}
174 
175 			/*
176 			 * If we are deleting or renaming the entry, keep
177 			 * track of its tmpfs_dirent so that it can be
178 			 * easily deleted later.
179 			 */
180 			if ((cnp->cn_flags & ISLASTCN) &&
181 			    (cnp->cn_nameiop == DELETE ||
182 			    cnp->cn_nameiop == RENAME)) {
183 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
184 				    cnp->cn_thread);
185 				if (error != 0)
186 					goto out;
187 
188 				/* Allocate a new vnode on the matching entry. */
189 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
190 				    cnp->cn_lkflags, vpp);
191 				if (error != 0)
192 					goto out;
193 
194 				if ((dnode->tn_mode & S_ISTXT) &&
195 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
196 				  cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN,
197 				  cnp->cn_cred, cnp->cn_thread)) {
198 					error = EPERM;
199 					vput(*vpp);
200 					*vpp = NULL;
201 					goto out;
202 				}
203 				cnp->cn_flags |= SAVENAME;
204 			} else {
205 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
206 				    cnp->cn_lkflags, vpp);
207 				if (error != 0)
208 					goto out;
209 			}
210 		}
211 	}
212 
213 	/*
214 	 * Store the result of this lookup in the cache.  Avoid this if the
215 	 * request was for creation, as it does not improve timings on
216 	 * emprical tests.
217 	 */
218 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
219 		cache_enter(dvp, *vpp, cnp);
220 
221 out:
222 	/*
223 	 * If there were no errors, *vpp cannot be null and it must be
224 	 * locked.
225 	 */
226 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
227 
228 	return (error);
229 }
230 
231 static int
232 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
233 {
234 
235 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
236 }
237 
238 static int
239 tmpfs_lookup(struct vop_lookup_args *v)
240 {
241 
242 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
243 }
244 
245 static int
246 tmpfs_create(struct vop_create_args *v)
247 {
248 	struct vnode *dvp = v->a_dvp;
249 	struct vnode **vpp = v->a_vpp;
250 	struct componentname *cnp = v->a_cnp;
251 	struct vattr *vap = v->a_vap;
252 	int error;
253 
254 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
255 
256 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
257 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
258 		cache_enter(dvp, *vpp, cnp);
259 	return (error);
260 }
261 
262 static int
263 tmpfs_mknod(struct vop_mknod_args *v)
264 {
265 	struct vnode *dvp = v->a_dvp;
266 	struct vnode **vpp = v->a_vpp;
267 	struct componentname *cnp = v->a_cnp;
268 	struct vattr *vap = v->a_vap;
269 
270 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
271 	    vap->va_type != VFIFO)
272 		return EINVAL;
273 
274 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
275 }
276 
277 static int
278 tmpfs_open(struct vop_open_args *v)
279 {
280 	struct vnode *vp = v->a_vp;
281 	int mode = v->a_mode;
282 
283 	int error;
284 	struct tmpfs_node *node;
285 
286 	MPASS(VOP_ISLOCKED(vp));
287 
288 	node = VP_TO_TMPFS_NODE(vp);
289 
290 	/* The file is still active but all its names have been removed
291 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
292 	 * it is about to die. */
293 	if (node->tn_links < 1)
294 		return (ENOENT);
295 
296 	/* If the file is marked append-only, deny write requests. */
297 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
298 		error = EPERM;
299 	else {
300 		error = 0;
301 		/* For regular files, the call below is nop. */
302 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
303 		    OBJ_DEAD) == 0, ("dead object"));
304 		vnode_create_vobject(vp, node->tn_size, v->a_td);
305 	}
306 
307 	MPASS(VOP_ISLOCKED(vp));
308 	return error;
309 }
310 
311 static int
312 tmpfs_close(struct vop_close_args *v)
313 {
314 	struct vnode *vp = v->a_vp;
315 
316 	/* Update node times. */
317 	tmpfs_update(vp);
318 
319 	return (0);
320 }
321 
322 /*
323  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
324  * the comment above cache_fplookup for details.
325  */
326 int
327 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
328 {
329 	struct vnode *vp;
330 	struct tmpfs_node *node;
331 	struct ucred *cred;
332 	mode_t all_x, mode;
333 
334 	vp = v->a_vp;
335 	node = VP_TO_TMPFS_NODE_SMR(vp);
336 	if (__predict_false(node == NULL))
337 		return (EAGAIN);
338 
339 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
340 	mode = atomic_load_short(&node->tn_mode);
341 	if (__predict_true((mode & all_x) == all_x))
342 		return (0);
343 
344 	cred = v->a_cred;
345 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
346 }
347 
348 int
349 tmpfs_access(struct vop_access_args *v)
350 {
351 	struct vnode *vp = v->a_vp;
352 	accmode_t accmode = v->a_accmode;
353 	struct ucred *cred = v->a_cred;
354 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
355 	int error;
356 	struct tmpfs_node *node;
357 
358 	MPASS(VOP_ISLOCKED(vp));
359 
360 	node = VP_TO_TMPFS_NODE(vp);
361 
362 	/*
363 	 * Common case path lookup.
364 	 */
365 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
366 		return (0);
367 
368 	switch (vp->v_type) {
369 	case VDIR:
370 		/* FALLTHROUGH */
371 	case VLNK:
372 		/* FALLTHROUGH */
373 	case VREG:
374 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
375 			error = EROFS;
376 			goto out;
377 		}
378 		break;
379 
380 	case VBLK:
381 		/* FALLTHROUGH */
382 	case VCHR:
383 		/* FALLTHROUGH */
384 	case VSOCK:
385 		/* FALLTHROUGH */
386 	case VFIFO:
387 		break;
388 
389 	default:
390 		error = EINVAL;
391 		goto out;
392 	}
393 
394 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
395 		error = EPERM;
396 		goto out;
397 	}
398 
399 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
400 	    node->tn_gid, accmode, cred, NULL);
401 
402 out:
403 	MPASS(VOP_ISLOCKED(vp));
404 
405 	return error;
406 }
407 
408 int
409 tmpfs_getattr(struct vop_getattr_args *v)
410 {
411 	struct vnode *vp = v->a_vp;
412 	struct vattr *vap = v->a_vap;
413 	vm_object_t obj;
414 	struct tmpfs_node *node;
415 
416 	node = VP_TO_TMPFS_NODE(vp);
417 
418 	tmpfs_update_getattr(vp);
419 
420 	vap->va_type = vp->v_type;
421 	vap->va_mode = node->tn_mode;
422 	vap->va_nlink = node->tn_links;
423 	vap->va_uid = node->tn_uid;
424 	vap->va_gid = node->tn_gid;
425 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
426 	vap->va_fileid = node->tn_id;
427 	vap->va_size = node->tn_size;
428 	vap->va_blocksize = PAGE_SIZE;
429 	vap->va_atime = node->tn_atime;
430 	vap->va_mtime = node->tn_mtime;
431 	vap->va_ctime = node->tn_ctime;
432 	vap->va_birthtime = node->tn_birthtime;
433 	vap->va_gen = node->tn_gen;
434 	vap->va_flags = node->tn_flags;
435 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
436 		node->tn_rdev : NODEV;
437 	if (vp->v_type == VREG) {
438 		obj = node->tn_reg.tn_aobj;
439 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
440 	} else
441 		vap->va_bytes = node->tn_size;
442 	vap->va_filerev = 0;
443 
444 	return 0;
445 }
446 
447 int
448 tmpfs_setattr(struct vop_setattr_args *v)
449 {
450 	struct vnode *vp = v->a_vp;
451 	struct vattr *vap = v->a_vap;
452 	struct ucred *cred = v->a_cred;
453 	struct thread *td = curthread;
454 
455 	int error;
456 
457 	MPASS(VOP_ISLOCKED(vp));
458 	ASSERT_VOP_IN_SEQC(vp);
459 
460 	error = 0;
461 
462 	/* Abort if any unsettable attribute is given. */
463 	if (vap->va_type != VNON ||
464 	    vap->va_nlink != VNOVAL ||
465 	    vap->va_fsid != VNOVAL ||
466 	    vap->va_fileid != VNOVAL ||
467 	    vap->va_blocksize != VNOVAL ||
468 	    vap->va_gen != VNOVAL ||
469 	    vap->va_rdev != VNOVAL ||
470 	    vap->va_bytes != VNOVAL)
471 		error = EINVAL;
472 
473 	if (error == 0 && (vap->va_flags != VNOVAL))
474 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
475 
476 	if (error == 0 && (vap->va_size != VNOVAL))
477 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
478 
479 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
480 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
481 
482 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
483 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
484 
485 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
486 	    vap->va_atime.tv_nsec != VNOVAL) ||
487 	    (vap->va_mtime.tv_sec != VNOVAL &&
488 	    vap->va_mtime.tv_nsec != VNOVAL) ||
489 	    (vap->va_birthtime.tv_sec != VNOVAL &&
490 	    vap->va_birthtime.tv_nsec != VNOVAL)))
491 		error = tmpfs_chtimes(vp, vap, cred, td);
492 
493 	/* Update the node times.  We give preference to the error codes
494 	 * generated by this function rather than the ones that may arise
495 	 * from tmpfs_update. */
496 	tmpfs_update(vp);
497 
498 	MPASS(VOP_ISLOCKED(vp));
499 
500 	return error;
501 }
502 
503 static int
504 tmpfs_read(struct vop_read_args *v)
505 {
506 	struct vnode *vp;
507 	struct uio *uio;
508 	struct tmpfs_node *node;
509 
510 	vp = v->a_vp;
511 	if (vp->v_type != VREG)
512 		return (EISDIR);
513 	uio = v->a_uio;
514 	if (uio->uio_offset < 0)
515 		return (EINVAL);
516 	node = VP_TO_TMPFS_NODE(vp);
517 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
518 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
519 }
520 
521 static int
522 tmpfs_write(struct vop_write_args *v)
523 {
524 	struct vnode *vp;
525 	struct uio *uio;
526 	struct tmpfs_node *node;
527 	off_t oldsize;
528 	int error, ioflag;
529 	mode_t newmode;
530 
531 	vp = v->a_vp;
532 	uio = v->a_uio;
533 	ioflag = v->a_ioflag;
534 	error = 0;
535 	node = VP_TO_TMPFS_NODE(vp);
536 	oldsize = node->tn_size;
537 
538 	if (uio->uio_offset < 0 || vp->v_type != VREG)
539 		return (EINVAL);
540 	if (uio->uio_resid == 0)
541 		return (0);
542 	if (ioflag & IO_APPEND)
543 		uio->uio_offset = node->tn_size;
544 	if (uio->uio_offset + uio->uio_resid >
545 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
546 		return (EFBIG);
547 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
548 		return (EFBIG);
549 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
550 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
551 		    FALSE);
552 		if (error != 0)
553 			goto out;
554 	}
555 
556 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
557 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
558 	    TMPFS_NODE_CHANGED;
559 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
560 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
561 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
562 			vn_seqc_write_begin(vp);
563 			atomic_store_short(&node->tn_mode, newmode);
564 			vn_seqc_write_end(vp);
565 		}
566 	}
567 	if (error != 0)
568 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
569 
570 out:
571 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
572 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
573 
574 	return (error);
575 }
576 
577 static int
578 tmpfs_fsync(struct vop_fsync_args *v)
579 {
580 	struct vnode *vp = v->a_vp;
581 
582 	MPASS(VOP_ISLOCKED(vp));
583 
584 	tmpfs_check_mtime(vp);
585 	tmpfs_update(vp);
586 
587 	return 0;
588 }
589 
590 static int
591 tmpfs_remove(struct vop_remove_args *v)
592 {
593 	struct vnode *dvp = v->a_dvp;
594 	struct vnode *vp = v->a_vp;
595 
596 	int error;
597 	struct tmpfs_dirent *de;
598 	struct tmpfs_mount *tmp;
599 	struct tmpfs_node *dnode;
600 	struct tmpfs_node *node;
601 
602 	MPASS(VOP_ISLOCKED(dvp));
603 	MPASS(VOP_ISLOCKED(vp));
604 
605 	if (vp->v_type == VDIR) {
606 		error = EISDIR;
607 		goto out;
608 	}
609 
610 	dnode = VP_TO_TMPFS_DIR(dvp);
611 	node = VP_TO_TMPFS_NODE(vp);
612 	tmp = VFS_TO_TMPFS(vp->v_mount);
613 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
614 	MPASS(de != NULL);
615 
616 	/* Files marked as immutable or append-only cannot be deleted. */
617 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
618 	    (dnode->tn_flags & APPEND)) {
619 		error = EPERM;
620 		goto out;
621 	}
622 
623 	/* Remove the entry from the directory; as it is a file, we do not
624 	 * have to change the number of hard links of the directory. */
625 	tmpfs_dir_detach(dvp, de);
626 	if (v->a_cnp->cn_flags & DOWHITEOUT)
627 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
628 
629 	/* Free the directory entry we just deleted.  Note that the node
630 	 * referred by it will not be removed until the vnode is really
631 	 * reclaimed. */
632 	tmpfs_free_dirent(tmp, de);
633 
634 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
635 	error = 0;
636 
637 out:
638 
639 	return error;
640 }
641 
642 static int
643 tmpfs_link(struct vop_link_args *v)
644 {
645 	struct vnode *dvp = v->a_tdvp;
646 	struct vnode *vp = v->a_vp;
647 	struct componentname *cnp = v->a_cnp;
648 
649 	int error;
650 	struct tmpfs_dirent *de;
651 	struct tmpfs_node *node;
652 
653 	MPASS(VOP_ISLOCKED(dvp));
654 	MPASS(cnp->cn_flags & HASBUF);
655 	MPASS(dvp != vp); /* XXX When can this be false? */
656 	node = VP_TO_TMPFS_NODE(vp);
657 
658 	/* Ensure that we do not overflow the maximum number of links imposed
659 	 * by the system. */
660 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
661 	if (node->tn_links == TMPFS_LINK_MAX) {
662 		error = EMLINK;
663 		goto out;
664 	}
665 
666 	/* We cannot create links of files marked immutable or append-only. */
667 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
668 		error = EPERM;
669 		goto out;
670 	}
671 
672 	/* Allocate a new directory entry to represent the node. */
673 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
674 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
675 	if (error != 0)
676 		goto out;
677 
678 	/* Insert the new directory entry into the appropriate directory. */
679 	if (cnp->cn_flags & ISWHITEOUT)
680 		tmpfs_dir_whiteout_remove(dvp, cnp);
681 	tmpfs_dir_attach(dvp, de);
682 
683 	/* vp link count has changed, so update node times. */
684 	node->tn_status |= TMPFS_NODE_CHANGED;
685 	tmpfs_update(vp);
686 
687 	error = 0;
688 
689 out:
690 	return error;
691 }
692 
693 /*
694  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
695  * fail to acquire any lock in the path we will drop all held locks,
696  * acquire the new lock in a blocking fashion, and then release it and
697  * restart the rename.  This acquire/release step ensures that we do not
698  * spin on a lock waiting for release.  On error release all vnode locks
699  * and decrement references the way tmpfs_rename() would do.
700  */
701 static int
702 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
703     struct vnode *tdvp, struct vnode **tvpp,
704     struct componentname *fcnp, struct componentname *tcnp)
705 {
706 	struct vnode *nvp;
707 	struct mount *mp;
708 	struct tmpfs_dirent *de;
709 	int error, restarts = 0;
710 
711 	VOP_UNLOCK(tdvp);
712 	if (*tvpp != NULL && *tvpp != tdvp)
713 		VOP_UNLOCK(*tvpp);
714 	mp = fdvp->v_mount;
715 
716 relock:
717 	restarts += 1;
718 	error = vn_lock(fdvp, LK_EXCLUSIVE);
719 	if (error)
720 		goto releout;
721 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
722 		VOP_UNLOCK(fdvp);
723 		error = vn_lock(tdvp, LK_EXCLUSIVE);
724 		if (error)
725 			goto releout;
726 		VOP_UNLOCK(tdvp);
727 		goto relock;
728 	}
729 	/*
730 	 * Re-resolve fvp to be certain it still exists and fetch the
731 	 * correct vnode.
732 	 */
733 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
734 	if (de == NULL) {
735 		VOP_UNLOCK(fdvp);
736 		VOP_UNLOCK(tdvp);
737 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
738 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
739 			error = EINVAL;
740 		else
741 			error = ENOENT;
742 		goto releout;
743 	}
744 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
745 	if (error != 0) {
746 		VOP_UNLOCK(fdvp);
747 		VOP_UNLOCK(tdvp);
748 		if (error != EBUSY)
749 			goto releout;
750 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
751 		if (error != 0)
752 			goto releout;
753 		VOP_UNLOCK(nvp);
754 		/*
755 		 * Concurrent rename race.
756 		 */
757 		if (nvp == tdvp) {
758 			vrele(nvp);
759 			error = EINVAL;
760 			goto releout;
761 		}
762 		vrele(*fvpp);
763 		*fvpp = nvp;
764 		goto relock;
765 	}
766 	vrele(*fvpp);
767 	*fvpp = nvp;
768 	VOP_UNLOCK(*fvpp);
769 	/*
770 	 * Re-resolve tvp and acquire the vnode lock if present.
771 	 */
772 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
773 	/*
774 	 * If tvp disappeared we just carry on.
775 	 */
776 	if (de == NULL && *tvpp != NULL) {
777 		vrele(*tvpp);
778 		*tvpp = NULL;
779 	}
780 	/*
781 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
782 	 * if the non-blocking acquire fails.
783 	 */
784 	if (de != NULL) {
785 		nvp = NULL;
786 		error = tmpfs_alloc_vp(mp, de->td_node,
787 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
788 		if (*tvpp != NULL)
789 			vrele(*tvpp);
790 		*tvpp = nvp;
791 		if (error != 0) {
792 			VOP_UNLOCK(fdvp);
793 			VOP_UNLOCK(tdvp);
794 			if (error != EBUSY)
795 				goto releout;
796 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
797 			    &nvp);
798 			if (error != 0)
799 				goto releout;
800 			VOP_UNLOCK(nvp);
801 			/*
802 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
803 			 */
804 			if (nvp == fdvp) {
805 				error = ENOTEMPTY;
806 				goto releout;
807 			}
808 			goto relock;
809 		}
810 	}
811 	tmpfs_rename_restarts += restarts;
812 
813 	return (0);
814 
815 releout:
816 	vrele(fdvp);
817 	vrele(*fvpp);
818 	vrele(tdvp);
819 	if (*tvpp != NULL)
820 		vrele(*tvpp);
821 	tmpfs_rename_restarts += restarts;
822 
823 	return (error);
824 }
825 
826 static int
827 tmpfs_rename(struct vop_rename_args *v)
828 {
829 	struct vnode *fdvp = v->a_fdvp;
830 	struct vnode *fvp = v->a_fvp;
831 	struct componentname *fcnp = v->a_fcnp;
832 	struct vnode *tdvp = v->a_tdvp;
833 	struct vnode *tvp = v->a_tvp;
834 	struct componentname *tcnp = v->a_tcnp;
835 	char *newname;
836 	struct tmpfs_dirent *de;
837 	struct tmpfs_mount *tmp;
838 	struct tmpfs_node *fdnode;
839 	struct tmpfs_node *fnode;
840 	struct tmpfs_node *tnode;
841 	struct tmpfs_node *tdnode;
842 	int error;
843 	bool want_seqc_end;
844 
845 	MPASS(VOP_ISLOCKED(tdvp));
846 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
847 	MPASS(fcnp->cn_flags & HASBUF);
848 	MPASS(tcnp->cn_flags & HASBUF);
849 
850 	want_seqc_end = false;
851 
852 	/*
853 	 * Disallow cross-device renames.
854 	 * XXX Why isn't this done by the caller?
855 	 */
856 	if (fvp->v_mount != tdvp->v_mount ||
857 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
858 		error = EXDEV;
859 		goto out;
860 	}
861 
862 	/* If source and target are the same file, there is nothing to do. */
863 	if (fvp == tvp) {
864 		error = 0;
865 		goto out;
866 	}
867 
868 	/*
869 	 * If we need to move the directory between entries, lock the
870 	 * source so that we can safely operate on it.
871 	 */
872 	if (fdvp != tdvp && fdvp != tvp) {
873 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
874 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
875 			    fcnp, tcnp);
876 			if (error != 0)
877 				return (error);
878 			ASSERT_VOP_ELOCKED(fdvp,
879 			    "tmpfs_rename: fdvp not locked");
880 			ASSERT_VOP_ELOCKED(tdvp,
881 			    "tmpfs_rename: tdvp not locked");
882 			if (tvp != NULL)
883 				ASSERT_VOP_ELOCKED(tvp,
884 				    "tmpfs_rename: tvp not locked");
885 			if (fvp == tvp) {
886 				error = 0;
887 				goto out_locked;
888 			}
889 		}
890 	}
891 
892 	if (tvp != NULL)
893 		vn_seqc_write_begin(tvp);
894 	vn_seqc_write_begin(tdvp);
895 	vn_seqc_write_begin(fvp);
896 	vn_seqc_write_begin(fdvp);
897 	want_seqc_end = true;
898 
899 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
900 	tdnode = VP_TO_TMPFS_DIR(tdvp);
901 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
902 	fdnode = VP_TO_TMPFS_DIR(fdvp);
903 	fnode = VP_TO_TMPFS_NODE(fvp);
904 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
905 
906 	/*
907 	 * Entry can disappear before we lock fdvp,
908 	 * also avoid manipulating '.' and '..' entries.
909 	 */
910 	if (de == NULL) {
911 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
912 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
913 			error = EINVAL;
914 		else
915 			error = ENOENT;
916 		goto out_locked;
917 	}
918 	MPASS(de->td_node == fnode);
919 
920 	/*
921 	 * If re-naming a directory to another preexisting directory
922 	 * ensure that the target directory is empty so that its
923 	 * removal causes no side effects.
924 	 * Kern_rename guarantees the destination to be a directory
925 	 * if the source is one.
926 	 */
927 	if (tvp != NULL) {
928 		MPASS(tnode != NULL);
929 
930 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
931 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
932 			error = EPERM;
933 			goto out_locked;
934 		}
935 
936 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
937 			if (tnode->tn_size > 0) {
938 				error = ENOTEMPTY;
939 				goto out_locked;
940 			}
941 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
942 			error = ENOTDIR;
943 			goto out_locked;
944 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
945 			error = EISDIR;
946 			goto out_locked;
947 		} else {
948 			MPASS(fnode->tn_type != VDIR &&
949 				tnode->tn_type != VDIR);
950 		}
951 	}
952 
953 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
954 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
955 		error = EPERM;
956 		goto out_locked;
957 	}
958 
959 	/*
960 	 * Ensure that we have enough memory to hold the new name, if it
961 	 * has to be changed.
962 	 */
963 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
964 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
965 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
966 	} else
967 		newname = NULL;
968 
969 	/*
970 	 * If the node is being moved to another directory, we have to do
971 	 * the move.
972 	 */
973 	if (fdnode != tdnode) {
974 		/*
975 		 * In case we are moving a directory, we have to adjust its
976 		 * parent to point to the new parent.
977 		 */
978 		if (de->td_node->tn_type == VDIR) {
979 			struct tmpfs_node *n;
980 
981 			/*
982 			 * Ensure the target directory is not a child of the
983 			 * directory being moved.  Otherwise, we'd end up
984 			 * with stale nodes.
985 			 */
986 			n = tdnode;
987 			/*
988 			 * TMPFS_LOCK guaranties that no nodes are freed while
989 			 * traversing the list. Nodes can only be marked as
990 			 * removed: tn_parent == NULL.
991 			 */
992 			TMPFS_LOCK(tmp);
993 			TMPFS_NODE_LOCK(n);
994 			while (n != n->tn_dir.tn_parent) {
995 				struct tmpfs_node *parent;
996 
997 				if (n == fnode) {
998 					TMPFS_NODE_UNLOCK(n);
999 					TMPFS_UNLOCK(tmp);
1000 					error = EINVAL;
1001 					if (newname != NULL)
1002 						    free(newname, M_TMPFSNAME);
1003 					goto out_locked;
1004 				}
1005 				parent = n->tn_dir.tn_parent;
1006 				TMPFS_NODE_UNLOCK(n);
1007 				if (parent == NULL) {
1008 					n = NULL;
1009 					break;
1010 				}
1011 				TMPFS_NODE_LOCK(parent);
1012 				if (parent->tn_dir.tn_parent == NULL) {
1013 					TMPFS_NODE_UNLOCK(parent);
1014 					n = NULL;
1015 					break;
1016 				}
1017 				n = parent;
1018 			}
1019 			TMPFS_UNLOCK(tmp);
1020 			if (n == NULL) {
1021 				error = EINVAL;
1022 				if (newname != NULL)
1023 					    free(newname, M_TMPFSNAME);
1024 				goto out_locked;
1025 			}
1026 			TMPFS_NODE_UNLOCK(n);
1027 
1028 			/* Adjust the parent pointer. */
1029 			TMPFS_VALIDATE_DIR(fnode);
1030 			TMPFS_NODE_LOCK(de->td_node);
1031 			de->td_node->tn_dir.tn_parent = tdnode;
1032 			TMPFS_NODE_UNLOCK(de->td_node);
1033 
1034 			/*
1035 			 * As a result of changing the target of the '..'
1036 			 * entry, the link count of the source and target
1037 			 * directories has to be adjusted.
1038 			 */
1039 			TMPFS_NODE_LOCK(tdnode);
1040 			TMPFS_ASSERT_LOCKED(tdnode);
1041 			tdnode->tn_links++;
1042 			TMPFS_NODE_UNLOCK(tdnode);
1043 
1044 			TMPFS_NODE_LOCK(fdnode);
1045 			TMPFS_ASSERT_LOCKED(fdnode);
1046 			fdnode->tn_links--;
1047 			TMPFS_NODE_UNLOCK(fdnode);
1048 		}
1049 	}
1050 
1051 	/*
1052 	 * Do the move: just remove the entry from the source directory
1053 	 * and insert it into the target one.
1054 	 */
1055 	tmpfs_dir_detach(fdvp, de);
1056 
1057 	if (fcnp->cn_flags & DOWHITEOUT)
1058 		tmpfs_dir_whiteout_add(fdvp, fcnp);
1059 	if (tcnp->cn_flags & ISWHITEOUT)
1060 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
1061 
1062 	/*
1063 	 * If the name has changed, we need to make it effective by changing
1064 	 * it in the directory entry.
1065 	 */
1066 	if (newname != NULL) {
1067 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1068 
1069 		free(de->ud.td_name, M_TMPFSNAME);
1070 		de->ud.td_name = newname;
1071 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1072 
1073 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1074 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1075 	}
1076 
1077 	/*
1078 	 * If we are overwriting an entry, we have to remove the old one
1079 	 * from the target directory.
1080 	 */
1081 	if (tvp != NULL) {
1082 		struct tmpfs_dirent *tde;
1083 
1084 		/* Remove the old entry from the target directory. */
1085 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1086 		tmpfs_dir_detach(tdvp, tde);
1087 
1088 		/*
1089 		 * Free the directory entry we just deleted.  Note that the
1090 		 * node referred by it will not be removed until the vnode is
1091 		 * really reclaimed.
1092 		 */
1093 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1094 	}
1095 
1096 	tmpfs_dir_attach(tdvp, de);
1097 
1098 	if (tmpfs_use_nc(fvp)) {
1099 		cache_purge(fvp);
1100 		if (tvp != NULL)
1101 			cache_purge(tvp);
1102 		cache_purge_negative(tdvp);
1103 	}
1104 
1105 	error = 0;
1106 
1107 out_locked:
1108 	if (fdvp != tdvp && fdvp != tvp)
1109 		VOP_UNLOCK(fdvp);
1110 
1111 out:
1112 	if (want_seqc_end) {
1113 		if (tvp != NULL)
1114 			vn_seqc_write_end(tvp);
1115 		vn_seqc_write_end(tdvp);
1116 		vn_seqc_write_end(fvp);
1117 		vn_seqc_write_end(fdvp);
1118 	}
1119 
1120 	/*
1121 	 * Release target nodes.
1122 	 * XXX: I don't understand when tdvp can be the same as tvp, but
1123 	 * other code takes care of this...
1124 	 */
1125 	if (tdvp == tvp)
1126 		vrele(tdvp);
1127 	else
1128 		vput(tdvp);
1129 	if (tvp != NULL)
1130 		vput(tvp);
1131 
1132 	/* Release source nodes. */
1133 	vrele(fdvp);
1134 	vrele(fvp);
1135 
1136 	return (error);
1137 }
1138 
1139 static int
1140 tmpfs_mkdir(struct vop_mkdir_args *v)
1141 {
1142 	struct vnode *dvp = v->a_dvp;
1143 	struct vnode **vpp = v->a_vpp;
1144 	struct componentname *cnp = v->a_cnp;
1145 	struct vattr *vap = v->a_vap;
1146 
1147 	MPASS(vap->va_type == VDIR);
1148 
1149 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
1150 }
1151 
1152 static int
1153 tmpfs_rmdir(struct vop_rmdir_args *v)
1154 {
1155 	struct vnode *dvp = v->a_dvp;
1156 	struct vnode *vp = v->a_vp;
1157 
1158 	int error;
1159 	struct tmpfs_dirent *de;
1160 	struct tmpfs_mount *tmp;
1161 	struct tmpfs_node *dnode;
1162 	struct tmpfs_node *node;
1163 
1164 	MPASS(VOP_ISLOCKED(dvp));
1165 	MPASS(VOP_ISLOCKED(vp));
1166 
1167 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1168 	dnode = VP_TO_TMPFS_DIR(dvp);
1169 	node = VP_TO_TMPFS_DIR(vp);
1170 
1171 	/* Directories with more than two entries ('.' and '..') cannot be
1172 	 * removed. */
1173 	 if (node->tn_size > 0) {
1174 		 error = ENOTEMPTY;
1175 		 goto out;
1176 	 }
1177 
1178 	if ((dnode->tn_flags & APPEND)
1179 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1180 		error = EPERM;
1181 		goto out;
1182 	}
1183 
1184 	/* This invariant holds only if we are not trying to remove "..".
1185 	  * We checked for that above so this is safe now. */
1186 	MPASS(node->tn_dir.tn_parent == dnode);
1187 
1188 	/* Get the directory entry associated with node (vp).  This was
1189 	 * filled by tmpfs_lookup while looking up the entry. */
1190 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1191 	MPASS(TMPFS_DIRENT_MATCHES(de,
1192 	    v->a_cnp->cn_nameptr,
1193 	    v->a_cnp->cn_namelen));
1194 
1195 	/* Check flags to see if we are allowed to remove the directory. */
1196 	if ((dnode->tn_flags & APPEND) != 0 ||
1197 	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
1198 		error = EPERM;
1199 		goto out;
1200 	}
1201 
1202 
1203 	/* Detach the directory entry from the directory (dnode). */
1204 	tmpfs_dir_detach(dvp, de);
1205 	if (v->a_cnp->cn_flags & DOWHITEOUT)
1206 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1207 
1208 	/* No vnode should be allocated for this entry from this point */
1209 	TMPFS_NODE_LOCK(node);
1210 	node->tn_links--;
1211 	node->tn_dir.tn_parent = NULL;
1212 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1213 	    TMPFS_NODE_MODIFIED;
1214 
1215 	TMPFS_NODE_UNLOCK(node);
1216 
1217 	TMPFS_NODE_LOCK(dnode);
1218 	dnode->tn_links--;
1219 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1220 	    TMPFS_NODE_MODIFIED;
1221 	TMPFS_NODE_UNLOCK(dnode);
1222 
1223 	if (tmpfs_use_nc(dvp)) {
1224 		cache_purge(dvp);
1225 		cache_purge(vp);
1226 	}
1227 
1228 	/* Free the directory entry we just deleted.  Note that the node
1229 	 * referred by it will not be removed until the vnode is really
1230 	 * reclaimed. */
1231 	tmpfs_free_dirent(tmp, de);
1232 
1233 	/* Release the deleted vnode (will destroy the node, notify
1234 	 * interested parties and clean it from the cache). */
1235 
1236 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1237 	tmpfs_update(dvp);
1238 
1239 	error = 0;
1240 
1241 out:
1242 	return error;
1243 }
1244 
1245 static int
1246 tmpfs_symlink(struct vop_symlink_args *v)
1247 {
1248 	struct vnode *dvp = v->a_dvp;
1249 	struct vnode **vpp = v->a_vpp;
1250 	struct componentname *cnp = v->a_cnp;
1251 	struct vattr *vap = v->a_vap;
1252 	const char *target = v->a_target;
1253 
1254 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1255 	MPASS(vap->va_type == VLNK);
1256 #else
1257 	vap->va_type = VLNK;
1258 #endif
1259 
1260 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1261 }
1262 
1263 static int
1264 tmpfs_readdir(struct vop_readdir_args *va)
1265 {
1266 	struct vnode *vp;
1267 	struct uio *uio;
1268 	struct tmpfs_mount *tm;
1269 	struct tmpfs_node *node;
1270 	u_long **cookies;
1271 	int *eofflag, *ncookies;
1272 	ssize_t startresid;
1273 	int error, maxcookies;
1274 
1275 	vp = va->a_vp;
1276 	uio = va->a_uio;
1277 	eofflag = va->a_eofflag;
1278 	cookies = va->a_cookies;
1279 	ncookies = va->a_ncookies;
1280 
1281 	/* This operation only makes sense on directory nodes. */
1282 	if (vp->v_type != VDIR)
1283 		return ENOTDIR;
1284 
1285 	maxcookies = 0;
1286 	node = VP_TO_TMPFS_DIR(vp);
1287 	tm = VFS_TO_TMPFS(vp->v_mount);
1288 
1289 	startresid = uio->uio_resid;
1290 
1291 	/* Allocate cookies for NFS and compat modules. */
1292 	if (cookies != NULL && ncookies != NULL) {
1293 		maxcookies = howmany(node->tn_size,
1294 		    sizeof(struct tmpfs_dirent)) + 2;
1295 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
1296 		    M_WAITOK);
1297 		*ncookies = 0;
1298 	}
1299 
1300 	if (cookies == NULL)
1301 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
1302 	else
1303 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
1304 		    ncookies);
1305 
1306 	/* Buffer was filled without hitting EOF. */
1307 	if (error == EJUSTRETURN)
1308 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1309 
1310 	if (error != 0 && cookies != NULL && ncookies != NULL) {
1311 		free(*cookies, M_TEMP);
1312 		*cookies = NULL;
1313 		*ncookies = 0;
1314 	}
1315 
1316 	if (eofflag != NULL)
1317 		*eofflag =
1318 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1319 
1320 	return error;
1321 }
1322 
1323 static int
1324 tmpfs_readlink(struct vop_readlink_args *v)
1325 {
1326 	struct vnode *vp = v->a_vp;
1327 	struct uio *uio = v->a_uio;
1328 
1329 	int error;
1330 	struct tmpfs_node *node;
1331 
1332 	MPASS(uio->uio_offset == 0);
1333 	MPASS(vp->v_type == VLNK);
1334 
1335 	node = VP_TO_TMPFS_NODE(vp);
1336 
1337 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1338 	    uio);
1339 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
1340 
1341 	return (error);
1342 }
1343 
1344 static int
1345 tmpfs_inactive(struct vop_inactive_args *v)
1346 {
1347 	struct vnode *vp;
1348 	struct tmpfs_node *node;
1349 
1350 	vp = v->a_vp;
1351 	node = VP_TO_TMPFS_NODE(vp);
1352 	if (node->tn_links == 0)
1353 		vrecycle(vp);
1354 	else
1355 		tmpfs_check_mtime(vp);
1356 	return (0);
1357 }
1358 
1359 static int
1360 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
1361 {
1362 	struct vnode *vp;
1363 	struct tmpfs_node *node;
1364 	struct vm_object *obj;
1365 
1366 	vp = ap->a_vp;
1367 	node = VP_TO_TMPFS_NODE(vp);
1368 	if (node->tn_links == 0)
1369 		goto need;
1370 	if (vp->v_type == VREG) {
1371 		obj = vp->v_object;
1372 		if (obj->generation != obj->cleangeneration)
1373 			goto need;
1374 	}
1375 	return (0);
1376 need:
1377 	return (1);
1378 }
1379 
1380 int
1381 tmpfs_reclaim(struct vop_reclaim_args *v)
1382 {
1383 	struct vnode *vp = v->a_vp;
1384 
1385 	struct tmpfs_mount *tmp;
1386 	struct tmpfs_node *node;
1387 
1388 	node = VP_TO_TMPFS_NODE(vp);
1389 	tmp = VFS_TO_TMPFS(vp->v_mount);
1390 
1391 	if (vp->v_type == VREG)
1392 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
1393 	vp->v_object = NULL;
1394 	if (tmpfs_use_nc(vp))
1395 		cache_purge(vp);
1396 
1397 	TMPFS_NODE_LOCK(node);
1398 	tmpfs_free_vp(vp);
1399 
1400 	/* If the node referenced by this vnode was deleted by the user,
1401 	 * we must free its associated data structures (now that the vnode
1402 	 * is being reclaimed). */
1403 	if (node->tn_links == 0 &&
1404 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1405 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1406 		TMPFS_NODE_UNLOCK(node);
1407 		tmpfs_free_node(tmp, node);
1408 	} else
1409 		TMPFS_NODE_UNLOCK(node);
1410 
1411 	MPASS(vp->v_data == NULL);
1412 	return 0;
1413 }
1414 
1415 int
1416 tmpfs_print(struct vop_print_args *v)
1417 {
1418 	struct vnode *vp = v->a_vp;
1419 
1420 	struct tmpfs_node *node;
1421 
1422 	node = VP_TO_TMPFS_NODE(vp);
1423 
1424 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
1425 	    node, node->tn_flags, (uintmax_t)node->tn_links);
1426 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1427 	    node->tn_mode, node->tn_uid, node->tn_gid,
1428 	    (intmax_t)node->tn_size, node->tn_status);
1429 
1430 	if (vp->v_type == VFIFO)
1431 		fifo_printinfo(vp);
1432 
1433 	printf("\n");
1434 
1435 	return 0;
1436 }
1437 
1438 int
1439 tmpfs_pathconf(struct vop_pathconf_args *v)
1440 {
1441 	struct vnode *vp = v->a_vp;
1442 	int name = v->a_name;
1443 	long *retval = v->a_retval;
1444 
1445 	int error;
1446 
1447 	error = 0;
1448 
1449 	switch (name) {
1450 	case _PC_LINK_MAX:
1451 		*retval = TMPFS_LINK_MAX;
1452 		break;
1453 
1454 	case _PC_NAME_MAX:
1455 		*retval = NAME_MAX;
1456 		break;
1457 
1458 	case _PC_PIPE_BUF:
1459 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
1460 			*retval = PIPE_BUF;
1461 		else
1462 			error = EINVAL;
1463 		break;
1464 
1465 	case _PC_CHOWN_RESTRICTED:
1466 		*retval = 1;
1467 		break;
1468 
1469 	case _PC_NO_TRUNC:
1470 		*retval = 1;
1471 		break;
1472 
1473 	case _PC_SYNC_IO:
1474 		*retval = 1;
1475 		break;
1476 
1477 	case _PC_FILESIZEBITS:
1478 		*retval = 64;
1479 		break;
1480 
1481 	default:
1482 		error = vop_stdpathconf(v);
1483 	}
1484 
1485 	return error;
1486 }
1487 
1488 static int
1489 tmpfs_vptofh(struct vop_vptofh_args *ap)
1490 /*
1491 vop_vptofh {
1492 	IN struct vnode *a_vp;
1493 	IN struct fid *a_fhp;
1494 };
1495 */
1496 {
1497 	struct tmpfs_fid_data tfd;
1498 	struct tmpfs_node *node;
1499 	struct fid *fhp;
1500 
1501 	node = VP_TO_TMPFS_NODE(ap->a_vp);
1502 	fhp = ap->a_fhp;
1503 	fhp->fid_len = sizeof(tfd);
1504 
1505 	/*
1506 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
1507 	 * See the comment in sys/mount.h on struct fid for details.
1508 	 */
1509 	tfd.tfd_id = node->tn_id;
1510 	tfd.tfd_gen = node->tn_gen;
1511 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
1512 
1513 	return (0);
1514 }
1515 
1516 static int
1517 tmpfs_whiteout(struct vop_whiteout_args *ap)
1518 {
1519 	struct vnode *dvp = ap->a_dvp;
1520 	struct componentname *cnp = ap->a_cnp;
1521 	struct tmpfs_dirent *de;
1522 
1523 	switch (ap->a_flags) {
1524 	case LOOKUP:
1525 		return (0);
1526 	case CREATE:
1527 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1528 		if (de != NULL)
1529 			return (de->td_node == NULL ? 0 : EEXIST);
1530 		return (tmpfs_dir_whiteout_add(dvp, cnp));
1531 	case DELETE:
1532 		tmpfs_dir_whiteout_remove(dvp, cnp);
1533 		return (0);
1534 	default:
1535 		panic("tmpfs_whiteout: unknown op");
1536 	}
1537 }
1538 
1539 static int
1540 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
1541     struct tmpfs_dirent **pde)
1542 {
1543 	struct tmpfs_dir_cursor dc;
1544 	struct tmpfs_dirent *de;
1545 
1546 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
1547 	     de = tmpfs_dir_next(tnp, &dc)) {
1548 		if (de->td_node == tn) {
1549 			*pde = de;
1550 			return (0);
1551 		}
1552 	}
1553 	return (ENOENT);
1554 }
1555 
1556 static int
1557 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
1558     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
1559 {
1560 	struct tmpfs_dirent *de;
1561 	int error, i;
1562 
1563 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
1564 	    dvp);
1565 	if (error != 0)
1566 		return (error);
1567 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
1568 	if (error == 0) {
1569 		i = *buflen;
1570 		i -= de->td_namelen;
1571 		if (i < 0) {
1572 			error = ENOMEM;
1573 		} else {
1574 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
1575 			*buflen = i;
1576 		}
1577 	}
1578 	if (error == 0) {
1579 		if (vp != *dvp)
1580 			VOP_UNLOCK(*dvp);
1581 	} else {
1582 		if (vp != *dvp)
1583 			vput(*dvp);
1584 		else
1585 			vrele(vp);
1586 	}
1587 	return (error);
1588 }
1589 
1590 static int
1591 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
1592 {
1593 	struct vnode *vp, **dvp;
1594 	struct tmpfs_node *tn, *tnp, *tnp1;
1595 	struct tmpfs_dirent *de;
1596 	struct tmpfs_mount *tm;
1597 	char *buf;
1598 	size_t *buflen;
1599 	int error;
1600 
1601 	vp = ap->a_vp;
1602 	dvp = ap->a_vpp;
1603 	buf = ap->a_buf;
1604 	buflen = ap->a_buflen;
1605 
1606 	tm = VFS_TO_TMPFS(vp->v_mount);
1607 	tn = VP_TO_TMPFS_NODE(vp);
1608 	if (tn->tn_type == VDIR) {
1609 		tnp = tn->tn_dir.tn_parent;
1610 		if (tnp == NULL)
1611 			return (ENOENT);
1612 		tmpfs_ref_node(tnp);
1613 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
1614 		    buflen, dvp);
1615 		tmpfs_free_node(tm, tnp);
1616 		return (error);
1617 	}
1618 restart:
1619 	TMPFS_LOCK(tm);
1620 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
1621 		if (tnp->tn_type != VDIR)
1622 			continue;
1623 		TMPFS_NODE_LOCK(tnp);
1624 		tmpfs_ref_node_locked(tnp);
1625 
1626 		/*
1627 		 * tn_vnode cannot be instantiated while we hold the
1628 		 * node lock, so the directory cannot be changed while
1629 		 * we iterate over it.  Do this to avoid instantiating
1630 		 * vnode for directories which cannot point to our
1631 		 * node.
1632 		 */
1633 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
1634 		    &de) : 0;
1635 
1636 		if (error == 0) {
1637 			TMPFS_NODE_UNLOCK(tnp);
1638 			TMPFS_UNLOCK(tm);
1639 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
1640 			    dvp);
1641 			if (error == 0) {
1642 				tmpfs_free_node(tm, tnp);
1643 				return (0);
1644 			}
1645 			if (VN_IS_DOOMED(vp)) {
1646 				tmpfs_free_node(tm, tnp);
1647 				return (ENOENT);
1648 			}
1649 			TMPFS_LOCK(tm);
1650 			TMPFS_NODE_LOCK(tnp);
1651 		}
1652 		if (tmpfs_free_node_locked(tm, tnp, false)) {
1653 			goto restart;
1654 		} else {
1655 			KASSERT(tnp->tn_refcount > 0,
1656 			    ("node %p refcount zero", tnp));
1657 			tnp1 = LIST_NEXT(tnp, tn_entries);
1658 			TMPFS_NODE_UNLOCK(tnp);
1659 		}
1660 	}
1661 	TMPFS_UNLOCK(tm);
1662 	return (ENOENT);
1663 }
1664 
1665 /*
1666  * Vnode operations vector used for files stored in a tmpfs file system.
1667  */
1668 struct vop_vector tmpfs_vnodeop_entries = {
1669 	.vop_default =			&default_vnodeops,
1670 	.vop_lookup =			vfs_cache_lookup,
1671 	.vop_cachedlookup =		tmpfs_cached_lookup,
1672 	.vop_create =			tmpfs_create,
1673 	.vop_mknod =			tmpfs_mknod,
1674 	.vop_open =			tmpfs_open,
1675 	.vop_close =			tmpfs_close,
1676 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
1677 	.vop_access =			tmpfs_access,
1678 	.vop_getattr =			tmpfs_getattr,
1679 	.vop_setattr =			tmpfs_setattr,
1680 	.vop_read =			tmpfs_read,
1681 	.vop_write =			tmpfs_write,
1682 	.vop_fsync =			tmpfs_fsync,
1683 	.vop_remove =			tmpfs_remove,
1684 	.vop_link =			tmpfs_link,
1685 	.vop_rename =			tmpfs_rename,
1686 	.vop_mkdir =			tmpfs_mkdir,
1687 	.vop_rmdir =			tmpfs_rmdir,
1688 	.vop_symlink =			tmpfs_symlink,
1689 	.vop_readdir =			tmpfs_readdir,
1690 	.vop_readlink =			tmpfs_readlink,
1691 	.vop_inactive =			tmpfs_inactive,
1692 	.vop_need_inactive =		tmpfs_need_inactive,
1693 	.vop_reclaim =			tmpfs_reclaim,
1694 	.vop_print =			tmpfs_print,
1695 	.vop_pathconf =			tmpfs_pathconf,
1696 	.vop_vptofh =			tmpfs_vptofh,
1697 	.vop_whiteout =			tmpfs_whiteout,
1698 	.vop_bmap =			VOP_EOPNOTSUPP,
1699 	.vop_vptocnp =			tmpfs_vptocnp,
1700 	.vop_lock1 =			vop_lock,
1701 	.vop_unlock = 			vop_unlock,
1702 	.vop_islocked = 		vop_islocked,
1703 };
1704 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
1705 
1706 /*
1707  * Same vector for mounts which do not use namecache.
1708  */
1709 struct vop_vector tmpfs_vnodeop_nonc_entries = {
1710 	.vop_default =			&tmpfs_vnodeop_entries,
1711 	.vop_lookup =			tmpfs_lookup,
1712 };
1713 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
1714