xref: /freebsd/sys/fs/tmpfs/tmpfs_vnops.c (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5  *
6  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/dirent.h>
44 #include <sys/fcntl.h>
45 #include <sys/file.h>
46 #include <sys/limits.h>
47 #include <sys/lockf.h>
48 #include <sys/lock.h>
49 #include <sys/mount.h>
50 #include <sys/namei.h>
51 #include <sys/priv.h>
52 #include <sys/proc.h>
53 #include <sys/rwlock.h>
54 #include <sys/sched.h>
55 #include <sys/stat.h>
56 #include <sys/sysctl.h>
57 #include <sys/unistd.h>
58 #include <sys/vnode.h>
59 #include <sys/smr.h>
60 #include <security/audit/audit.h>
61 #include <security/mac/mac_framework.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_param.h>
65 #include <vm/vm_object.h>
66 
67 #include <fs/tmpfs/tmpfs_vnops.h>
68 #include <fs/tmpfs/tmpfs.h>
69 
70 SYSCTL_DECL(_vfs_tmpfs);
71 VFS_SMR_DECLARE;
72 
73 static volatile int tmpfs_rename_restarts;
74 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
75     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
76     "Times rename had to restart due to lock contention");
77 
78 static int
79 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
80     struct vnode **rvp)
81 {
82 
83 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
84 }
85 
86 static int
87 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
88 {
89 	struct tmpfs_dirent *de;
90 	struct tmpfs_node *dnode, *pnode;
91 	struct tmpfs_mount *tm;
92 	int error;
93 
94 	dnode = VP_TO_TMPFS_DIR(dvp);
95 	*vpp = NULLVP;
96 
97 	/* Check accessibility of requested node as a first step. */
98 	error = vn_dir_check_exec(dvp, cnp);
99 	if (error != 0)
100 		goto out;
101 
102 	/* We cannot be requesting the parent directory of the root node. */
103 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
104 	    dnode->tn_dir.tn_parent == dnode,
105 	    !(cnp->cn_flags & ISDOTDOT)));
106 
107 	TMPFS_ASSERT_LOCKED(dnode);
108 	if (dnode->tn_dir.tn_parent == NULL) {
109 		error = ENOENT;
110 		goto out;
111 	}
112 	if (cnp->cn_flags & ISDOTDOT) {
113 		tm = VFS_TO_TMPFS(dvp->v_mount);
114 		pnode = dnode->tn_dir.tn_parent;
115 		tmpfs_ref_node(pnode);
116 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
117 		    pnode, cnp->cn_lkflags, vpp);
118 		tmpfs_free_node(tm, pnode);
119 		if (error != 0)
120 			goto out;
121 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
122 		VREF(dvp);
123 		*vpp = dvp;
124 		error = 0;
125 	} else {
126 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
127 		if (de != NULL && de->td_node == NULL)
128 			cnp->cn_flags |= ISWHITEOUT;
129 		if (de == NULL || de->td_node == NULL) {
130 			/*
131 			 * The entry was not found in the directory.
132 			 * This is OK if we are creating or renaming an
133 			 * entry and are working on the last component of
134 			 * the path name.
135 			 */
136 			if ((cnp->cn_flags & ISLASTCN) &&
137 			    (cnp->cn_nameiop == CREATE || \
138 			    cnp->cn_nameiop == RENAME ||
139 			    (cnp->cn_nameiop == DELETE &&
140 			    cnp->cn_flags & DOWHITEOUT &&
141 			    cnp->cn_flags & ISWHITEOUT))) {
142 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
143 				    cnp->cn_thread);
144 				if (error != 0)
145 					goto out;
146 
147 				/*
148 				 * Keep the component name in the buffer for
149 				 * future uses.
150 				 */
151 				cnp->cn_flags |= SAVENAME;
152 
153 				error = EJUSTRETURN;
154 			} else
155 				error = ENOENT;
156 		} else {
157 			struct tmpfs_node *tnode;
158 
159 			/*
160 			 * The entry was found, so get its associated
161 			 * tmpfs_node.
162 			 */
163 			tnode = de->td_node;
164 
165 			/*
166 			 * If we are not at the last path component and
167 			 * found a non-directory or non-link entry (which
168 			 * may itself be pointing to a directory), raise
169 			 * an error.
170 			 */
171 			if ((tnode->tn_type != VDIR &&
172 			    tnode->tn_type != VLNK) &&
173 			    !(cnp->cn_flags & ISLASTCN)) {
174 				error = ENOTDIR;
175 				goto out;
176 			}
177 
178 			/*
179 			 * If we are deleting or renaming the entry, keep
180 			 * track of its tmpfs_dirent so that it can be
181 			 * easily deleted later.
182 			 */
183 			if ((cnp->cn_flags & ISLASTCN) &&
184 			    (cnp->cn_nameiop == DELETE ||
185 			    cnp->cn_nameiop == RENAME)) {
186 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
187 				    cnp->cn_thread);
188 				if (error != 0)
189 					goto out;
190 
191 				/* Allocate a new vnode on the matching entry. */
192 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
193 				    cnp->cn_lkflags, vpp);
194 				if (error != 0)
195 					goto out;
196 
197 				if ((dnode->tn_mode & S_ISTXT) &&
198 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
199 				  cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN,
200 				  cnp->cn_cred, cnp->cn_thread)) {
201 					error = EPERM;
202 					vput(*vpp);
203 					*vpp = NULL;
204 					goto out;
205 				}
206 				cnp->cn_flags |= SAVENAME;
207 			} else {
208 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
209 				    cnp->cn_lkflags, vpp);
210 				if (error != 0)
211 					goto out;
212 			}
213 		}
214 	}
215 
216 	/*
217 	 * Store the result of this lookup in the cache.  Avoid this if the
218 	 * request was for creation, as it does not improve timings on
219 	 * emprical tests.
220 	 */
221 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
222 		cache_enter(dvp, *vpp, cnp);
223 
224 out:
225 	/*
226 	 * If there were no errors, *vpp cannot be null and it must be
227 	 * locked.
228 	 */
229 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
230 
231 	return (error);
232 }
233 
234 static int
235 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
236 {
237 
238 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
239 }
240 
241 static int
242 tmpfs_lookup(struct vop_lookup_args *v)
243 {
244 
245 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
246 }
247 
248 static int
249 tmpfs_create(struct vop_create_args *v)
250 {
251 	struct vnode *dvp = v->a_dvp;
252 	struct vnode **vpp = v->a_vpp;
253 	struct componentname *cnp = v->a_cnp;
254 	struct vattr *vap = v->a_vap;
255 	int error;
256 
257 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
258 
259 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
260 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
261 		cache_enter(dvp, *vpp, cnp);
262 	return (error);
263 }
264 
265 static int
266 tmpfs_mknod(struct vop_mknod_args *v)
267 {
268 	struct vnode *dvp = v->a_dvp;
269 	struct vnode **vpp = v->a_vpp;
270 	struct componentname *cnp = v->a_cnp;
271 	struct vattr *vap = v->a_vap;
272 
273 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
274 	    vap->va_type != VFIFO)
275 		return EINVAL;
276 
277 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
278 }
279 
280 struct fileops tmpfs_fnops;
281 
282 static int
283 tmpfs_open(struct vop_open_args *v)
284 {
285 	struct vnode *vp;
286 	struct tmpfs_node *node;
287 	struct file *fp;
288 	int error, mode;
289 
290 	vp = v->a_vp;
291 	mode = v->a_mode;
292 	node = VP_TO_TMPFS_NODE(vp);
293 
294 	/*
295 	 * The file is still active but all its names have been removed
296 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
297 	 * it is about to die.
298 	 */
299 	if (node->tn_links < 1)
300 		return (ENOENT);
301 
302 	/* If the file is marked append-only, deny write requests. */
303 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
304 		error = EPERM;
305 	else {
306 		error = 0;
307 		/* For regular files, the call below is nop. */
308 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
309 		    OBJ_DEAD) == 0, ("dead object"));
310 		vnode_create_vobject(vp, node->tn_size, v->a_td);
311 	}
312 
313 	fp = v->a_fp;
314 	if (error == 0 && fp != NULL && vp->v_type == VREG) {
315 		tmpfs_ref_node(node);
316 		finit_vnode(fp, mode, node, &tmpfs_fnops);
317 	}
318 
319 	return (error);
320 }
321 
322 static int
323 tmpfs_close(struct vop_close_args *v)
324 {
325 	struct vnode *vp = v->a_vp;
326 
327 	/* Update node times. */
328 	tmpfs_update(vp);
329 
330 	return (0);
331 }
332 
333 int
334 tmpfs_fo_close(struct file *fp, struct thread *td)
335 {
336 	struct tmpfs_node *node;
337 
338 	node = fp->f_data;
339 	if (node != NULL) {
340 		MPASS(node->tn_type == VREG);
341 		tmpfs_free_node(node->tn_reg.tn_tmp, node);
342 	}
343 	return (vnops.fo_close(fp, td));
344 }
345 
346 /*
347  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
348  * the comment above cache_fplookup for details.
349  */
350 int
351 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
352 {
353 	struct vnode *vp;
354 	struct tmpfs_node *node;
355 	struct ucred *cred;
356 	mode_t all_x, mode;
357 
358 	vp = v->a_vp;
359 	node = VP_TO_TMPFS_NODE_SMR(vp);
360 	if (__predict_false(node == NULL))
361 		return (EAGAIN);
362 
363 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
364 	mode = atomic_load_short(&node->tn_mode);
365 	if (__predict_true((mode & all_x) == all_x))
366 		return (0);
367 
368 	cred = v->a_cred;
369 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
370 }
371 
372 int
373 tmpfs_access(struct vop_access_args *v)
374 {
375 	struct vnode *vp = v->a_vp;
376 	accmode_t accmode = v->a_accmode;
377 	struct ucred *cred = v->a_cred;
378 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
379 	int error;
380 	struct tmpfs_node *node;
381 
382 	MPASS(VOP_ISLOCKED(vp));
383 
384 	node = VP_TO_TMPFS_NODE(vp);
385 
386 	/*
387 	 * Common case path lookup.
388 	 */
389 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
390 		return (0);
391 
392 	switch (vp->v_type) {
393 	case VDIR:
394 		/* FALLTHROUGH */
395 	case VLNK:
396 		/* FALLTHROUGH */
397 	case VREG:
398 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
399 			error = EROFS;
400 			goto out;
401 		}
402 		break;
403 
404 	case VBLK:
405 		/* FALLTHROUGH */
406 	case VCHR:
407 		/* FALLTHROUGH */
408 	case VSOCK:
409 		/* FALLTHROUGH */
410 	case VFIFO:
411 		break;
412 
413 	default:
414 		error = EINVAL;
415 		goto out;
416 	}
417 
418 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
419 		error = EPERM;
420 		goto out;
421 	}
422 
423 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
424 	    accmode, cred);
425 
426 out:
427 	MPASS(VOP_ISLOCKED(vp));
428 
429 	return error;
430 }
431 
432 int
433 tmpfs_stat(struct vop_stat_args *v)
434 {
435 	struct vnode *vp = v->a_vp;
436 	struct stat *sb = v->a_sb;
437 	vm_object_t obj;
438 	struct tmpfs_node *node;
439 	int error;
440 
441 	node = VP_TO_TMPFS_NODE(vp);
442 
443 	tmpfs_update_getattr(vp);
444 
445 	error = vop_stat_helper_pre(v);
446 	if (__predict_false(error))
447 		return (error);
448 
449 	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
450 	sb->st_ino = node->tn_id;
451 	sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
452 	sb->st_nlink = node->tn_links;
453 	sb->st_uid = node->tn_uid;
454 	sb->st_gid = node->tn_gid;
455 	sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
456 		node->tn_rdev : NODEV;
457 	sb->st_size = node->tn_size;
458 	sb->st_atim.tv_sec = node->tn_atime.tv_sec;
459 	sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
460 	sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
461 	sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
462 	sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
463 	sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
464 	sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
465 	sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
466 	sb->st_blksize = PAGE_SIZE;
467 	sb->st_flags = node->tn_flags;
468 	sb->st_gen = node->tn_gen;
469 	if (vp->v_type == VREG) {
470 		obj = node->tn_reg.tn_aobj;
471 		sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
472 	} else
473 		sb->st_blocks = node->tn_size;
474 	sb->st_blocks /= S_BLKSIZE;
475 	return (vop_stat_helper_post(v, error));
476 }
477 
478 int
479 tmpfs_getattr(struct vop_getattr_args *v)
480 {
481 	struct vnode *vp = v->a_vp;
482 	struct vattr *vap = v->a_vap;
483 	vm_object_t obj;
484 	struct tmpfs_node *node;
485 
486 	node = VP_TO_TMPFS_NODE(vp);
487 
488 	tmpfs_update_getattr(vp);
489 
490 	vap->va_type = vp->v_type;
491 	vap->va_mode = node->tn_mode;
492 	vap->va_nlink = node->tn_links;
493 	vap->va_uid = node->tn_uid;
494 	vap->va_gid = node->tn_gid;
495 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
496 	vap->va_fileid = node->tn_id;
497 	vap->va_size = node->tn_size;
498 	vap->va_blocksize = PAGE_SIZE;
499 	vap->va_atime = node->tn_atime;
500 	vap->va_mtime = node->tn_mtime;
501 	vap->va_ctime = node->tn_ctime;
502 	vap->va_birthtime = node->tn_birthtime;
503 	vap->va_gen = node->tn_gen;
504 	vap->va_flags = node->tn_flags;
505 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
506 		node->tn_rdev : NODEV;
507 	if (vp->v_type == VREG) {
508 		obj = node->tn_reg.tn_aobj;
509 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
510 	} else
511 		vap->va_bytes = node->tn_size;
512 	vap->va_filerev = 0;
513 
514 	return 0;
515 }
516 
517 int
518 tmpfs_setattr(struct vop_setattr_args *v)
519 {
520 	struct vnode *vp = v->a_vp;
521 	struct vattr *vap = v->a_vap;
522 	struct ucred *cred = v->a_cred;
523 	struct thread *td = curthread;
524 
525 	int error;
526 
527 	MPASS(VOP_ISLOCKED(vp));
528 	ASSERT_VOP_IN_SEQC(vp);
529 
530 	error = 0;
531 
532 	/* Abort if any unsettable attribute is given. */
533 	if (vap->va_type != VNON ||
534 	    vap->va_nlink != VNOVAL ||
535 	    vap->va_fsid != VNOVAL ||
536 	    vap->va_fileid != VNOVAL ||
537 	    vap->va_blocksize != VNOVAL ||
538 	    vap->va_gen != VNOVAL ||
539 	    vap->va_rdev != VNOVAL ||
540 	    vap->va_bytes != VNOVAL)
541 		error = EINVAL;
542 
543 	if (error == 0 && (vap->va_flags != VNOVAL))
544 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
545 
546 	if (error == 0 && (vap->va_size != VNOVAL))
547 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
548 
549 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
550 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
551 
552 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
553 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
554 
555 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
556 	    vap->va_atime.tv_nsec != VNOVAL) ||
557 	    (vap->va_mtime.tv_sec != VNOVAL &&
558 	    vap->va_mtime.tv_nsec != VNOVAL) ||
559 	    (vap->va_birthtime.tv_sec != VNOVAL &&
560 	    vap->va_birthtime.tv_nsec != VNOVAL)))
561 		error = tmpfs_chtimes(vp, vap, cred, td);
562 
563 	/* Update the node times.  We give preference to the error codes
564 	 * generated by this function rather than the ones that may arise
565 	 * from tmpfs_update. */
566 	tmpfs_update(vp);
567 
568 	MPASS(VOP_ISLOCKED(vp));
569 
570 	return error;
571 }
572 
573 static int
574 tmpfs_read(struct vop_read_args *v)
575 {
576 	struct vnode *vp;
577 	struct uio *uio;
578 	struct tmpfs_node *node;
579 
580 	vp = v->a_vp;
581 	if (vp->v_type != VREG)
582 		return (EISDIR);
583 	uio = v->a_uio;
584 	if (uio->uio_offset < 0)
585 		return (EINVAL);
586 	node = VP_TO_TMPFS_NODE(vp);
587 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
588 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
589 }
590 
591 static int
592 tmpfs_read_pgcache(struct vop_read_pgcache_args *v)
593 {
594 	struct vnode *vp;
595 	struct tmpfs_node *node;
596 	vm_object_t object;
597 	off_t size;
598 	int error;
599 
600 	vp = v->a_vp;
601 	MPASS((vp->v_irflag & VIRF_PGREAD) != 0);
602 
603 	if (v->a_uio->uio_offset < 0)
604 		return (EINVAL);
605 
606 	error = EJUSTRETURN;
607 	vfs_smr_enter();
608 
609 	node = VP_TO_TMPFS_NODE_SMR(vp);
610 	if (node == NULL)
611 		goto out_smr;
612 	MPASS(node->tn_type == VREG);
613 	MPASS(node->tn_refcount >= 1);
614 	object = node->tn_reg.tn_aobj;
615 	if (object == NULL)
616 		goto out_smr;
617 
618 	MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_TMPFS_NODE)) ==
619 	    OBJ_TMPFS_NODE);
620 	if (!VN_IS_DOOMED(vp)) {
621 		/* size cannot become shorter due to rangelock. */
622 		size = node->tn_size;
623 		vfs_smr_exit();
624 		error = uiomove_object(object, size, v->a_uio);
625 		return (error);
626 	}
627 out_smr:
628 	vfs_smr_exit();
629 	return (error);
630 }
631 
632 static int
633 tmpfs_write(struct vop_write_args *v)
634 {
635 	struct vnode *vp;
636 	struct uio *uio;
637 	struct tmpfs_node *node;
638 	off_t oldsize;
639 	int error, ioflag;
640 	mode_t newmode;
641 
642 	vp = v->a_vp;
643 	uio = v->a_uio;
644 	ioflag = v->a_ioflag;
645 	error = 0;
646 	node = VP_TO_TMPFS_NODE(vp);
647 	oldsize = node->tn_size;
648 
649 	if (uio->uio_offset < 0 || vp->v_type != VREG)
650 		return (EINVAL);
651 	if (uio->uio_resid == 0)
652 		return (0);
653 	if (ioflag & IO_APPEND)
654 		uio->uio_offset = node->tn_size;
655 	if (uio->uio_offset + uio->uio_resid >
656 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
657 		return (EFBIG);
658 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
659 		return (EFBIG);
660 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
661 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
662 		    FALSE);
663 		if (error != 0)
664 			goto out;
665 	}
666 
667 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
668 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
669 	    TMPFS_NODE_CHANGED;
670 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
671 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
672 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
673 			vn_seqc_write_begin(vp);
674 			atomic_store_short(&node->tn_mode, newmode);
675 			vn_seqc_write_end(vp);
676 		}
677 	}
678 	if (error != 0)
679 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
680 
681 out:
682 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
683 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
684 
685 	return (error);
686 }
687 
688 static int
689 tmpfs_fsync(struct vop_fsync_args *v)
690 {
691 	struct vnode *vp = v->a_vp;
692 
693 	MPASS(VOP_ISLOCKED(vp));
694 
695 	tmpfs_check_mtime(vp);
696 	tmpfs_update(vp);
697 
698 	return 0;
699 }
700 
701 static int
702 tmpfs_remove(struct vop_remove_args *v)
703 {
704 	struct vnode *dvp = v->a_dvp;
705 	struct vnode *vp = v->a_vp;
706 
707 	int error;
708 	struct tmpfs_dirent *de;
709 	struct tmpfs_mount *tmp;
710 	struct tmpfs_node *dnode;
711 	struct tmpfs_node *node;
712 
713 	MPASS(VOP_ISLOCKED(dvp));
714 	MPASS(VOP_ISLOCKED(vp));
715 
716 	if (vp->v_type == VDIR) {
717 		error = EISDIR;
718 		goto out;
719 	}
720 
721 	dnode = VP_TO_TMPFS_DIR(dvp);
722 	node = VP_TO_TMPFS_NODE(vp);
723 	tmp = VFS_TO_TMPFS(vp->v_mount);
724 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
725 	MPASS(de != NULL);
726 
727 	/* Files marked as immutable or append-only cannot be deleted. */
728 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
729 	    (dnode->tn_flags & APPEND)) {
730 		error = EPERM;
731 		goto out;
732 	}
733 
734 	/* Remove the entry from the directory; as it is a file, we do not
735 	 * have to change the number of hard links of the directory. */
736 	tmpfs_dir_detach(dvp, de);
737 	if (v->a_cnp->cn_flags & DOWHITEOUT)
738 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
739 
740 	/* Free the directory entry we just deleted.  Note that the node
741 	 * referred by it will not be removed until the vnode is really
742 	 * reclaimed. */
743 	tmpfs_free_dirent(tmp, de);
744 
745 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
746 	error = 0;
747 
748 out:
749 
750 	return error;
751 }
752 
753 static int
754 tmpfs_link(struct vop_link_args *v)
755 {
756 	struct vnode *dvp = v->a_tdvp;
757 	struct vnode *vp = v->a_vp;
758 	struct componentname *cnp = v->a_cnp;
759 
760 	int error;
761 	struct tmpfs_dirent *de;
762 	struct tmpfs_node *node;
763 
764 	MPASS(VOP_ISLOCKED(dvp));
765 	MPASS(cnp->cn_flags & HASBUF);
766 	MPASS(dvp != vp); /* XXX When can this be false? */
767 	node = VP_TO_TMPFS_NODE(vp);
768 
769 	/* Ensure that we do not overflow the maximum number of links imposed
770 	 * by the system. */
771 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
772 	if (node->tn_links == TMPFS_LINK_MAX) {
773 		error = EMLINK;
774 		goto out;
775 	}
776 
777 	/* We cannot create links of files marked immutable or append-only. */
778 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
779 		error = EPERM;
780 		goto out;
781 	}
782 
783 	/* Allocate a new directory entry to represent the node. */
784 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
785 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
786 	if (error != 0)
787 		goto out;
788 
789 	/* Insert the new directory entry into the appropriate directory. */
790 	if (cnp->cn_flags & ISWHITEOUT)
791 		tmpfs_dir_whiteout_remove(dvp, cnp);
792 	tmpfs_dir_attach(dvp, de);
793 
794 	/* vp link count has changed, so update node times. */
795 	node->tn_status |= TMPFS_NODE_CHANGED;
796 	tmpfs_update(vp);
797 
798 	error = 0;
799 
800 out:
801 	return error;
802 }
803 
804 /*
805  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
806  * fail to acquire any lock in the path we will drop all held locks,
807  * acquire the new lock in a blocking fashion, and then release it and
808  * restart the rename.  This acquire/release step ensures that we do not
809  * spin on a lock waiting for release.  On error release all vnode locks
810  * and decrement references the way tmpfs_rename() would do.
811  */
812 static int
813 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
814     struct vnode *tdvp, struct vnode **tvpp,
815     struct componentname *fcnp, struct componentname *tcnp)
816 {
817 	struct vnode *nvp;
818 	struct mount *mp;
819 	struct tmpfs_dirent *de;
820 	int error, restarts = 0;
821 
822 	VOP_UNLOCK(tdvp);
823 	if (*tvpp != NULL && *tvpp != tdvp)
824 		VOP_UNLOCK(*tvpp);
825 	mp = fdvp->v_mount;
826 
827 relock:
828 	restarts += 1;
829 	error = vn_lock(fdvp, LK_EXCLUSIVE);
830 	if (error)
831 		goto releout;
832 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
833 		VOP_UNLOCK(fdvp);
834 		error = vn_lock(tdvp, LK_EXCLUSIVE);
835 		if (error)
836 			goto releout;
837 		VOP_UNLOCK(tdvp);
838 		goto relock;
839 	}
840 	/*
841 	 * Re-resolve fvp to be certain it still exists and fetch the
842 	 * correct vnode.
843 	 */
844 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
845 	if (de == NULL) {
846 		VOP_UNLOCK(fdvp);
847 		VOP_UNLOCK(tdvp);
848 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
849 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
850 			error = EINVAL;
851 		else
852 			error = ENOENT;
853 		goto releout;
854 	}
855 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
856 	if (error != 0) {
857 		VOP_UNLOCK(fdvp);
858 		VOP_UNLOCK(tdvp);
859 		if (error != EBUSY)
860 			goto releout;
861 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
862 		if (error != 0)
863 			goto releout;
864 		VOP_UNLOCK(nvp);
865 		/*
866 		 * Concurrent rename race.
867 		 */
868 		if (nvp == tdvp) {
869 			vrele(nvp);
870 			error = EINVAL;
871 			goto releout;
872 		}
873 		vrele(*fvpp);
874 		*fvpp = nvp;
875 		goto relock;
876 	}
877 	vrele(*fvpp);
878 	*fvpp = nvp;
879 	VOP_UNLOCK(*fvpp);
880 	/*
881 	 * Re-resolve tvp and acquire the vnode lock if present.
882 	 */
883 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
884 	/*
885 	 * If tvp disappeared we just carry on.
886 	 */
887 	if (de == NULL && *tvpp != NULL) {
888 		vrele(*tvpp);
889 		*tvpp = NULL;
890 	}
891 	/*
892 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
893 	 * if the non-blocking acquire fails.
894 	 */
895 	if (de != NULL) {
896 		nvp = NULL;
897 		error = tmpfs_alloc_vp(mp, de->td_node,
898 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
899 		if (*tvpp != NULL)
900 			vrele(*tvpp);
901 		*tvpp = nvp;
902 		if (error != 0) {
903 			VOP_UNLOCK(fdvp);
904 			VOP_UNLOCK(tdvp);
905 			if (error != EBUSY)
906 				goto releout;
907 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
908 			    &nvp);
909 			if (error != 0)
910 				goto releout;
911 			VOP_UNLOCK(nvp);
912 			/*
913 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
914 			 */
915 			if (nvp == fdvp) {
916 				error = ENOTEMPTY;
917 				goto releout;
918 			}
919 			goto relock;
920 		}
921 	}
922 	tmpfs_rename_restarts += restarts;
923 
924 	return (0);
925 
926 releout:
927 	vrele(fdvp);
928 	vrele(*fvpp);
929 	vrele(tdvp);
930 	if (*tvpp != NULL)
931 		vrele(*tvpp);
932 	tmpfs_rename_restarts += restarts;
933 
934 	return (error);
935 }
936 
937 static int
938 tmpfs_rename(struct vop_rename_args *v)
939 {
940 	struct vnode *fdvp = v->a_fdvp;
941 	struct vnode *fvp = v->a_fvp;
942 	struct componentname *fcnp = v->a_fcnp;
943 	struct vnode *tdvp = v->a_tdvp;
944 	struct vnode *tvp = v->a_tvp;
945 	struct componentname *tcnp = v->a_tcnp;
946 	char *newname;
947 	struct tmpfs_dirent *de;
948 	struct tmpfs_mount *tmp;
949 	struct tmpfs_node *fdnode;
950 	struct tmpfs_node *fnode;
951 	struct tmpfs_node *tnode;
952 	struct tmpfs_node *tdnode;
953 	int error;
954 	bool want_seqc_end;
955 
956 	MPASS(VOP_ISLOCKED(tdvp));
957 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
958 	MPASS(fcnp->cn_flags & HASBUF);
959 	MPASS(tcnp->cn_flags & HASBUF);
960 
961 	want_seqc_end = false;
962 
963 	/*
964 	 * Disallow cross-device renames.
965 	 * XXX Why isn't this done by the caller?
966 	 */
967 	if (fvp->v_mount != tdvp->v_mount ||
968 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
969 		error = EXDEV;
970 		goto out;
971 	}
972 
973 	/* If source and target are the same file, there is nothing to do. */
974 	if (fvp == tvp) {
975 		error = 0;
976 		goto out;
977 	}
978 
979 	/*
980 	 * If we need to move the directory between entries, lock the
981 	 * source so that we can safely operate on it.
982 	 */
983 	if (fdvp != tdvp && fdvp != tvp) {
984 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
985 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
986 			    fcnp, tcnp);
987 			if (error != 0)
988 				return (error);
989 			ASSERT_VOP_ELOCKED(fdvp,
990 			    "tmpfs_rename: fdvp not locked");
991 			ASSERT_VOP_ELOCKED(tdvp,
992 			    "tmpfs_rename: tdvp not locked");
993 			if (tvp != NULL)
994 				ASSERT_VOP_ELOCKED(tvp,
995 				    "tmpfs_rename: tvp not locked");
996 			if (fvp == tvp) {
997 				error = 0;
998 				goto out_locked;
999 			}
1000 		}
1001 	}
1002 
1003 	if (tvp != NULL)
1004 		vn_seqc_write_begin(tvp);
1005 	vn_seqc_write_begin(tdvp);
1006 	vn_seqc_write_begin(fvp);
1007 	vn_seqc_write_begin(fdvp);
1008 	want_seqc_end = true;
1009 
1010 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1011 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1012 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
1013 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1014 	fnode = VP_TO_TMPFS_NODE(fvp);
1015 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
1016 
1017 	/*
1018 	 * Entry can disappear before we lock fdvp,
1019 	 * also avoid manipulating '.' and '..' entries.
1020 	 */
1021 	if (de == NULL) {
1022 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
1023 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
1024 			error = EINVAL;
1025 		else
1026 			error = ENOENT;
1027 		goto out_locked;
1028 	}
1029 	MPASS(de->td_node == fnode);
1030 
1031 	/*
1032 	 * If re-naming a directory to another preexisting directory
1033 	 * ensure that the target directory is empty so that its
1034 	 * removal causes no side effects.
1035 	 * Kern_rename guarantees the destination to be a directory
1036 	 * if the source is one.
1037 	 */
1038 	if (tvp != NULL) {
1039 		MPASS(tnode != NULL);
1040 
1041 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1042 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1043 			error = EPERM;
1044 			goto out_locked;
1045 		}
1046 
1047 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1048 			if (tnode->tn_size > 0) {
1049 				error = ENOTEMPTY;
1050 				goto out_locked;
1051 			}
1052 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1053 			error = ENOTDIR;
1054 			goto out_locked;
1055 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1056 			error = EISDIR;
1057 			goto out_locked;
1058 		} else {
1059 			MPASS(fnode->tn_type != VDIR &&
1060 				tnode->tn_type != VDIR);
1061 		}
1062 	}
1063 
1064 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1065 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1066 		error = EPERM;
1067 		goto out_locked;
1068 	}
1069 
1070 	/*
1071 	 * Ensure that we have enough memory to hold the new name, if it
1072 	 * has to be changed.
1073 	 */
1074 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
1075 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1076 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1077 	} else
1078 		newname = NULL;
1079 
1080 	/*
1081 	 * If the node is being moved to another directory, we have to do
1082 	 * the move.
1083 	 */
1084 	if (fdnode != tdnode) {
1085 		/*
1086 		 * In case we are moving a directory, we have to adjust its
1087 		 * parent to point to the new parent.
1088 		 */
1089 		if (de->td_node->tn_type == VDIR) {
1090 			struct tmpfs_node *n;
1091 
1092 			/*
1093 			 * Ensure the target directory is not a child of the
1094 			 * directory being moved.  Otherwise, we'd end up
1095 			 * with stale nodes.
1096 			 */
1097 			n = tdnode;
1098 			/*
1099 			 * TMPFS_LOCK guaranties that no nodes are freed while
1100 			 * traversing the list. Nodes can only be marked as
1101 			 * removed: tn_parent == NULL.
1102 			 */
1103 			TMPFS_LOCK(tmp);
1104 			TMPFS_NODE_LOCK(n);
1105 			while (n != n->tn_dir.tn_parent) {
1106 				struct tmpfs_node *parent;
1107 
1108 				if (n == fnode) {
1109 					TMPFS_NODE_UNLOCK(n);
1110 					TMPFS_UNLOCK(tmp);
1111 					error = EINVAL;
1112 					if (newname != NULL)
1113 						    free(newname, M_TMPFSNAME);
1114 					goto out_locked;
1115 				}
1116 				parent = n->tn_dir.tn_parent;
1117 				TMPFS_NODE_UNLOCK(n);
1118 				if (parent == NULL) {
1119 					n = NULL;
1120 					break;
1121 				}
1122 				TMPFS_NODE_LOCK(parent);
1123 				if (parent->tn_dir.tn_parent == NULL) {
1124 					TMPFS_NODE_UNLOCK(parent);
1125 					n = NULL;
1126 					break;
1127 				}
1128 				n = parent;
1129 			}
1130 			TMPFS_UNLOCK(tmp);
1131 			if (n == NULL) {
1132 				error = EINVAL;
1133 				if (newname != NULL)
1134 					    free(newname, M_TMPFSNAME);
1135 				goto out_locked;
1136 			}
1137 			TMPFS_NODE_UNLOCK(n);
1138 
1139 			/* Adjust the parent pointer. */
1140 			TMPFS_VALIDATE_DIR(fnode);
1141 			TMPFS_NODE_LOCK(de->td_node);
1142 			de->td_node->tn_dir.tn_parent = tdnode;
1143 			TMPFS_NODE_UNLOCK(de->td_node);
1144 
1145 			/*
1146 			 * As a result of changing the target of the '..'
1147 			 * entry, the link count of the source and target
1148 			 * directories has to be adjusted.
1149 			 */
1150 			TMPFS_NODE_LOCK(tdnode);
1151 			TMPFS_ASSERT_LOCKED(tdnode);
1152 			tdnode->tn_links++;
1153 			TMPFS_NODE_UNLOCK(tdnode);
1154 
1155 			TMPFS_NODE_LOCK(fdnode);
1156 			TMPFS_ASSERT_LOCKED(fdnode);
1157 			fdnode->tn_links--;
1158 			TMPFS_NODE_UNLOCK(fdnode);
1159 		}
1160 	}
1161 
1162 	/*
1163 	 * Do the move: just remove the entry from the source directory
1164 	 * and insert it into the target one.
1165 	 */
1166 	tmpfs_dir_detach(fdvp, de);
1167 
1168 	if (fcnp->cn_flags & DOWHITEOUT)
1169 		tmpfs_dir_whiteout_add(fdvp, fcnp);
1170 	if (tcnp->cn_flags & ISWHITEOUT)
1171 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
1172 
1173 	/*
1174 	 * If the name has changed, we need to make it effective by changing
1175 	 * it in the directory entry.
1176 	 */
1177 	if (newname != NULL) {
1178 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1179 
1180 		free(de->ud.td_name, M_TMPFSNAME);
1181 		de->ud.td_name = newname;
1182 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1183 
1184 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1185 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1186 	}
1187 
1188 	/*
1189 	 * If we are overwriting an entry, we have to remove the old one
1190 	 * from the target directory.
1191 	 */
1192 	if (tvp != NULL) {
1193 		struct tmpfs_dirent *tde;
1194 
1195 		/* Remove the old entry from the target directory. */
1196 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1197 		tmpfs_dir_detach(tdvp, tde);
1198 
1199 		/*
1200 		 * Free the directory entry we just deleted.  Note that the
1201 		 * node referred by it will not be removed until the vnode is
1202 		 * really reclaimed.
1203 		 */
1204 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1205 	}
1206 
1207 	tmpfs_dir_attach(tdvp, de);
1208 
1209 	if (tmpfs_use_nc(fvp)) {
1210 		cache_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
1211 	}
1212 
1213 	error = 0;
1214 
1215 out_locked:
1216 	if (fdvp != tdvp && fdvp != tvp)
1217 		VOP_UNLOCK(fdvp);
1218 
1219 out:
1220 	if (want_seqc_end) {
1221 		if (tvp != NULL)
1222 			vn_seqc_write_end(tvp);
1223 		vn_seqc_write_end(tdvp);
1224 		vn_seqc_write_end(fvp);
1225 		vn_seqc_write_end(fdvp);
1226 	}
1227 
1228 	/*
1229 	 * Release target nodes.
1230 	 * XXX: I don't understand when tdvp can be the same as tvp, but
1231 	 * other code takes care of this...
1232 	 */
1233 	if (tdvp == tvp)
1234 		vrele(tdvp);
1235 	else
1236 		vput(tdvp);
1237 	if (tvp != NULL)
1238 		vput(tvp);
1239 
1240 	/* Release source nodes. */
1241 	vrele(fdvp);
1242 	vrele(fvp);
1243 
1244 	return (error);
1245 }
1246 
1247 static int
1248 tmpfs_mkdir(struct vop_mkdir_args *v)
1249 {
1250 	struct vnode *dvp = v->a_dvp;
1251 	struct vnode **vpp = v->a_vpp;
1252 	struct componentname *cnp = v->a_cnp;
1253 	struct vattr *vap = v->a_vap;
1254 
1255 	MPASS(vap->va_type == VDIR);
1256 
1257 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
1258 }
1259 
1260 static int
1261 tmpfs_rmdir(struct vop_rmdir_args *v)
1262 {
1263 	struct vnode *dvp = v->a_dvp;
1264 	struct vnode *vp = v->a_vp;
1265 
1266 	int error;
1267 	struct tmpfs_dirent *de;
1268 	struct tmpfs_mount *tmp;
1269 	struct tmpfs_node *dnode;
1270 	struct tmpfs_node *node;
1271 
1272 	MPASS(VOP_ISLOCKED(dvp));
1273 	MPASS(VOP_ISLOCKED(vp));
1274 
1275 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1276 	dnode = VP_TO_TMPFS_DIR(dvp);
1277 	node = VP_TO_TMPFS_DIR(vp);
1278 
1279 	/* Directories with more than two entries ('.' and '..') cannot be
1280 	 * removed. */
1281 	 if (node->tn_size > 0) {
1282 		 error = ENOTEMPTY;
1283 		 goto out;
1284 	 }
1285 
1286 	if ((dnode->tn_flags & APPEND)
1287 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1288 		error = EPERM;
1289 		goto out;
1290 	}
1291 
1292 	/* This invariant holds only if we are not trying to remove "..".
1293 	  * We checked for that above so this is safe now. */
1294 	MPASS(node->tn_dir.tn_parent == dnode);
1295 
1296 	/* Get the directory entry associated with node (vp).  This was
1297 	 * filled by tmpfs_lookup while looking up the entry. */
1298 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1299 	MPASS(TMPFS_DIRENT_MATCHES(de,
1300 	    v->a_cnp->cn_nameptr,
1301 	    v->a_cnp->cn_namelen));
1302 
1303 	/* Check flags to see if we are allowed to remove the directory. */
1304 	if ((dnode->tn_flags & APPEND) != 0 ||
1305 	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
1306 		error = EPERM;
1307 		goto out;
1308 	}
1309 
1310 	/* Detach the directory entry from the directory (dnode). */
1311 	tmpfs_dir_detach(dvp, de);
1312 	if (v->a_cnp->cn_flags & DOWHITEOUT)
1313 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1314 
1315 	/* No vnode should be allocated for this entry from this point */
1316 	TMPFS_NODE_LOCK(node);
1317 	node->tn_links--;
1318 	node->tn_dir.tn_parent = NULL;
1319 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1320 	    TMPFS_NODE_MODIFIED;
1321 
1322 	TMPFS_NODE_UNLOCK(node);
1323 
1324 	TMPFS_NODE_LOCK(dnode);
1325 	dnode->tn_links--;
1326 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1327 	    TMPFS_NODE_MODIFIED;
1328 	TMPFS_NODE_UNLOCK(dnode);
1329 
1330 	if (tmpfs_use_nc(dvp)) {
1331 		cache_purge(dvp);
1332 		cache_purge(vp);
1333 	}
1334 
1335 	/* Free the directory entry we just deleted.  Note that the node
1336 	 * referred by it will not be removed until the vnode is really
1337 	 * reclaimed. */
1338 	tmpfs_free_dirent(tmp, de);
1339 
1340 	/* Release the deleted vnode (will destroy the node, notify
1341 	 * interested parties and clean it from the cache). */
1342 
1343 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1344 	tmpfs_update(dvp);
1345 
1346 	error = 0;
1347 
1348 out:
1349 	return error;
1350 }
1351 
1352 static int
1353 tmpfs_symlink(struct vop_symlink_args *v)
1354 {
1355 	struct vnode *dvp = v->a_dvp;
1356 	struct vnode **vpp = v->a_vpp;
1357 	struct componentname *cnp = v->a_cnp;
1358 	struct vattr *vap = v->a_vap;
1359 	const char *target = v->a_target;
1360 
1361 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1362 	MPASS(vap->va_type == VLNK);
1363 #else
1364 	vap->va_type = VLNK;
1365 #endif
1366 
1367 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1368 }
1369 
1370 static int
1371 tmpfs_readdir(struct vop_readdir_args *va)
1372 {
1373 	struct vnode *vp;
1374 	struct uio *uio;
1375 	struct tmpfs_mount *tm;
1376 	struct tmpfs_node *node;
1377 	u_long **cookies;
1378 	int *eofflag, *ncookies;
1379 	ssize_t startresid;
1380 	int error, maxcookies;
1381 
1382 	vp = va->a_vp;
1383 	uio = va->a_uio;
1384 	eofflag = va->a_eofflag;
1385 	cookies = va->a_cookies;
1386 	ncookies = va->a_ncookies;
1387 
1388 	/* This operation only makes sense on directory nodes. */
1389 	if (vp->v_type != VDIR)
1390 		return ENOTDIR;
1391 
1392 	maxcookies = 0;
1393 	node = VP_TO_TMPFS_DIR(vp);
1394 	tm = VFS_TO_TMPFS(vp->v_mount);
1395 
1396 	startresid = uio->uio_resid;
1397 
1398 	/* Allocate cookies for NFS and compat modules. */
1399 	if (cookies != NULL && ncookies != NULL) {
1400 		maxcookies = howmany(node->tn_size,
1401 		    sizeof(struct tmpfs_dirent)) + 2;
1402 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
1403 		    M_WAITOK);
1404 		*ncookies = 0;
1405 	}
1406 
1407 	if (cookies == NULL)
1408 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
1409 	else
1410 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
1411 		    ncookies);
1412 
1413 	/* Buffer was filled without hitting EOF. */
1414 	if (error == EJUSTRETURN)
1415 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1416 
1417 	if (error != 0 && cookies != NULL && ncookies != NULL) {
1418 		free(*cookies, M_TEMP);
1419 		*cookies = NULL;
1420 		*ncookies = 0;
1421 	}
1422 
1423 	if (eofflag != NULL)
1424 		*eofflag =
1425 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1426 
1427 	return error;
1428 }
1429 
1430 static int
1431 tmpfs_readlink(struct vop_readlink_args *v)
1432 {
1433 	struct vnode *vp = v->a_vp;
1434 	struct uio *uio = v->a_uio;
1435 
1436 	int error;
1437 	struct tmpfs_node *node;
1438 
1439 	MPASS(uio->uio_offset == 0);
1440 	MPASS(vp->v_type == VLNK);
1441 
1442 	node = VP_TO_TMPFS_NODE(vp);
1443 
1444 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1445 	    uio);
1446 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
1447 
1448 	return (error);
1449 }
1450 
1451 static int
1452 tmpfs_inactive(struct vop_inactive_args *v)
1453 {
1454 	struct vnode *vp;
1455 	struct tmpfs_node *node;
1456 
1457 	vp = v->a_vp;
1458 	node = VP_TO_TMPFS_NODE(vp);
1459 	if (node->tn_links == 0)
1460 		vrecycle(vp);
1461 	else
1462 		tmpfs_check_mtime(vp);
1463 	return (0);
1464 }
1465 
1466 static int
1467 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
1468 {
1469 	struct vnode *vp;
1470 	struct tmpfs_node *node;
1471 	struct vm_object *obj;
1472 
1473 	vp = ap->a_vp;
1474 	node = VP_TO_TMPFS_NODE(vp);
1475 	if (node->tn_links == 0)
1476 		goto need;
1477 	if (vp->v_type == VREG) {
1478 		obj = vp->v_object;
1479 		if (obj->generation != obj->cleangeneration)
1480 			goto need;
1481 	}
1482 	return (0);
1483 need:
1484 	return (1);
1485 }
1486 
1487 int
1488 tmpfs_reclaim(struct vop_reclaim_args *v)
1489 {
1490 	struct vnode *vp = v->a_vp;
1491 
1492 	struct tmpfs_mount *tmp;
1493 	struct tmpfs_node *node;
1494 
1495 	node = VP_TO_TMPFS_NODE(vp);
1496 	tmp = VFS_TO_TMPFS(vp->v_mount);
1497 
1498 	if (vp->v_type == VREG)
1499 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
1500 	vp->v_object = NULL;
1501 
1502 	TMPFS_NODE_LOCK(node);
1503 	tmpfs_free_vp(vp);
1504 
1505 	/* If the node referenced by this vnode was deleted by the user,
1506 	 * we must free its associated data structures (now that the vnode
1507 	 * is being reclaimed). */
1508 	if (node->tn_links == 0 &&
1509 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1510 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1511 		TMPFS_NODE_UNLOCK(node);
1512 		tmpfs_free_node(tmp, node);
1513 	} else
1514 		TMPFS_NODE_UNLOCK(node);
1515 
1516 	MPASS(vp->v_data == NULL);
1517 	return 0;
1518 }
1519 
1520 int
1521 tmpfs_print(struct vop_print_args *v)
1522 {
1523 	struct vnode *vp = v->a_vp;
1524 
1525 	struct tmpfs_node *node;
1526 
1527 	node = VP_TO_TMPFS_NODE(vp);
1528 
1529 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
1530 	    node, node->tn_flags, (uintmax_t)node->tn_links);
1531 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1532 	    node->tn_mode, node->tn_uid, node->tn_gid,
1533 	    (intmax_t)node->tn_size, node->tn_status);
1534 
1535 	if (vp->v_type == VFIFO)
1536 		fifo_printinfo(vp);
1537 
1538 	printf("\n");
1539 
1540 	return 0;
1541 }
1542 
1543 int
1544 tmpfs_pathconf(struct vop_pathconf_args *v)
1545 {
1546 	struct vnode *vp = v->a_vp;
1547 	int name = v->a_name;
1548 	long *retval = v->a_retval;
1549 
1550 	int error;
1551 
1552 	error = 0;
1553 
1554 	switch (name) {
1555 	case _PC_LINK_MAX:
1556 		*retval = TMPFS_LINK_MAX;
1557 		break;
1558 
1559 	case _PC_NAME_MAX:
1560 		*retval = NAME_MAX;
1561 		break;
1562 
1563 	case _PC_PIPE_BUF:
1564 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
1565 			*retval = PIPE_BUF;
1566 		else
1567 			error = EINVAL;
1568 		break;
1569 
1570 	case _PC_CHOWN_RESTRICTED:
1571 		*retval = 1;
1572 		break;
1573 
1574 	case _PC_NO_TRUNC:
1575 		*retval = 1;
1576 		break;
1577 
1578 	case _PC_SYNC_IO:
1579 		*retval = 1;
1580 		break;
1581 
1582 	case _PC_FILESIZEBITS:
1583 		*retval = 64;
1584 		break;
1585 
1586 	default:
1587 		error = vop_stdpathconf(v);
1588 	}
1589 
1590 	return error;
1591 }
1592 
1593 static int
1594 tmpfs_vptofh(struct vop_vptofh_args *ap)
1595 /*
1596 vop_vptofh {
1597 	IN struct vnode *a_vp;
1598 	IN struct fid *a_fhp;
1599 };
1600 */
1601 {
1602 	struct tmpfs_fid_data tfd;
1603 	struct tmpfs_node *node;
1604 	struct fid *fhp;
1605 
1606 	node = VP_TO_TMPFS_NODE(ap->a_vp);
1607 	fhp = ap->a_fhp;
1608 	fhp->fid_len = sizeof(tfd);
1609 
1610 	/*
1611 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
1612 	 * See the comment in sys/mount.h on struct fid for details.
1613 	 */
1614 	tfd.tfd_id = node->tn_id;
1615 	tfd.tfd_gen = node->tn_gen;
1616 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
1617 
1618 	return (0);
1619 }
1620 
1621 static int
1622 tmpfs_whiteout(struct vop_whiteout_args *ap)
1623 {
1624 	struct vnode *dvp = ap->a_dvp;
1625 	struct componentname *cnp = ap->a_cnp;
1626 	struct tmpfs_dirent *de;
1627 
1628 	switch (ap->a_flags) {
1629 	case LOOKUP:
1630 		return (0);
1631 	case CREATE:
1632 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1633 		if (de != NULL)
1634 			return (de->td_node == NULL ? 0 : EEXIST);
1635 		return (tmpfs_dir_whiteout_add(dvp, cnp));
1636 	case DELETE:
1637 		tmpfs_dir_whiteout_remove(dvp, cnp);
1638 		return (0);
1639 	default:
1640 		panic("tmpfs_whiteout: unknown op");
1641 	}
1642 }
1643 
1644 static int
1645 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
1646     struct tmpfs_dirent **pde)
1647 {
1648 	struct tmpfs_dir_cursor dc;
1649 	struct tmpfs_dirent *de;
1650 
1651 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
1652 	     de = tmpfs_dir_next(tnp, &dc)) {
1653 		if (de->td_node == tn) {
1654 			*pde = de;
1655 			return (0);
1656 		}
1657 	}
1658 	return (ENOENT);
1659 }
1660 
1661 static int
1662 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
1663     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
1664 {
1665 	struct tmpfs_dirent *de;
1666 	int error, i;
1667 
1668 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
1669 	    dvp);
1670 	if (error != 0)
1671 		return (error);
1672 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
1673 	if (error == 0) {
1674 		i = *buflen;
1675 		i -= de->td_namelen;
1676 		if (i < 0) {
1677 			error = ENOMEM;
1678 		} else {
1679 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
1680 			*buflen = i;
1681 		}
1682 	}
1683 	if (error == 0) {
1684 		if (vp != *dvp)
1685 			VOP_UNLOCK(*dvp);
1686 	} else {
1687 		if (vp != *dvp)
1688 			vput(*dvp);
1689 		else
1690 			vrele(vp);
1691 	}
1692 	return (error);
1693 }
1694 
1695 static int
1696 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
1697 {
1698 	struct vnode *vp, **dvp;
1699 	struct tmpfs_node *tn, *tnp, *tnp1;
1700 	struct tmpfs_dirent *de;
1701 	struct tmpfs_mount *tm;
1702 	char *buf;
1703 	size_t *buflen;
1704 	int error;
1705 
1706 	vp = ap->a_vp;
1707 	dvp = ap->a_vpp;
1708 	buf = ap->a_buf;
1709 	buflen = ap->a_buflen;
1710 
1711 	tm = VFS_TO_TMPFS(vp->v_mount);
1712 	tn = VP_TO_TMPFS_NODE(vp);
1713 	if (tn->tn_type == VDIR) {
1714 		tnp = tn->tn_dir.tn_parent;
1715 		if (tnp == NULL)
1716 			return (ENOENT);
1717 		tmpfs_ref_node(tnp);
1718 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
1719 		    buflen, dvp);
1720 		tmpfs_free_node(tm, tnp);
1721 		return (error);
1722 	}
1723 restart:
1724 	TMPFS_LOCK(tm);
1725 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
1726 		if (tnp->tn_type != VDIR)
1727 			continue;
1728 		TMPFS_NODE_LOCK(tnp);
1729 		tmpfs_ref_node(tnp);
1730 
1731 		/*
1732 		 * tn_vnode cannot be instantiated while we hold the
1733 		 * node lock, so the directory cannot be changed while
1734 		 * we iterate over it.  Do this to avoid instantiating
1735 		 * vnode for directories which cannot point to our
1736 		 * node.
1737 		 */
1738 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
1739 		    &de) : 0;
1740 
1741 		if (error == 0) {
1742 			TMPFS_NODE_UNLOCK(tnp);
1743 			TMPFS_UNLOCK(tm);
1744 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
1745 			    dvp);
1746 			if (error == 0) {
1747 				tmpfs_free_node(tm, tnp);
1748 				return (0);
1749 			}
1750 			if (VN_IS_DOOMED(vp)) {
1751 				tmpfs_free_node(tm, tnp);
1752 				return (ENOENT);
1753 			}
1754 			TMPFS_LOCK(tm);
1755 			TMPFS_NODE_LOCK(tnp);
1756 		}
1757 		if (tmpfs_free_node_locked(tm, tnp, false)) {
1758 			goto restart;
1759 		} else {
1760 			KASSERT(tnp->tn_refcount > 0,
1761 			    ("node %p refcount zero", tnp));
1762 			tnp1 = LIST_NEXT(tnp, tn_entries);
1763 			TMPFS_NODE_UNLOCK(tnp);
1764 		}
1765 	}
1766 	TMPFS_UNLOCK(tm);
1767 	return (ENOENT);
1768 }
1769 
1770 /*
1771  * Vnode operations vector used for files stored in a tmpfs file system.
1772  */
1773 struct vop_vector tmpfs_vnodeop_entries = {
1774 	.vop_default =			&default_vnodeops,
1775 	.vop_lookup =			vfs_cache_lookup,
1776 	.vop_cachedlookup =		tmpfs_cached_lookup,
1777 	.vop_create =			tmpfs_create,
1778 	.vop_mknod =			tmpfs_mknod,
1779 	.vop_open =			tmpfs_open,
1780 	.vop_close =			tmpfs_close,
1781 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
1782 	.vop_access =			tmpfs_access,
1783 	.vop_stat =			tmpfs_stat,
1784 	.vop_getattr =			tmpfs_getattr,
1785 	.vop_setattr =			tmpfs_setattr,
1786 	.vop_read =			tmpfs_read,
1787 	.vop_read_pgcache =		tmpfs_read_pgcache,
1788 	.vop_write =			tmpfs_write,
1789 	.vop_fsync =			tmpfs_fsync,
1790 	.vop_remove =			tmpfs_remove,
1791 	.vop_link =			tmpfs_link,
1792 	.vop_rename =			tmpfs_rename,
1793 	.vop_mkdir =			tmpfs_mkdir,
1794 	.vop_rmdir =			tmpfs_rmdir,
1795 	.vop_symlink =			tmpfs_symlink,
1796 	.vop_readdir =			tmpfs_readdir,
1797 	.vop_readlink =			tmpfs_readlink,
1798 	.vop_inactive =			tmpfs_inactive,
1799 	.vop_need_inactive =		tmpfs_need_inactive,
1800 	.vop_reclaim =			tmpfs_reclaim,
1801 	.vop_print =			tmpfs_print,
1802 	.vop_pathconf =			tmpfs_pathconf,
1803 	.vop_vptofh =			tmpfs_vptofh,
1804 	.vop_whiteout =			tmpfs_whiteout,
1805 	.vop_bmap =			VOP_EOPNOTSUPP,
1806 	.vop_vptocnp =			tmpfs_vptocnp,
1807 	.vop_lock1 =			vop_lock,
1808 	.vop_unlock = 			vop_unlock,
1809 	.vop_islocked = 		vop_islocked,
1810 };
1811 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
1812 
1813 /*
1814  * Same vector for mounts which do not use namecache.
1815  */
1816 struct vop_vector tmpfs_vnodeop_nonc_entries = {
1817 	.vop_default =			&tmpfs_vnodeop_entries,
1818 	.vop_lookup =			tmpfs_lookup,
1819 };
1820 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
1821