xref: /freebsd/sys/fs/tmpfs/tmpfs_vnops.c (revision 1106035d5bec5d667e553508a88e1012a89b67d3)
1 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5  *
6  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * tmpfs vnode interface.
37  */
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/dirent.h>
44 #include <sys/fcntl.h>
45 #include <sys/limits.h>
46 #include <sys/lockf.h>
47 #include <sys/lock.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/rwlock.h>
53 #include <sys/sched.h>
54 #include <sys/stat.h>
55 #include <sys/sysctl.h>
56 #include <sys/unistd.h>
57 #include <sys/vnode.h>
58 #include <sys/smr.h>
59 #include <security/audit/audit.h>
60 #include <security/mac/mac_framework.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_object.h>
65 
66 #include <fs/tmpfs/tmpfs_vnops.h>
67 #include <fs/tmpfs/tmpfs.h>
68 
69 SYSCTL_DECL(_vfs_tmpfs);
70 VFS_SMR_DECLARE;
71 
72 static volatile int tmpfs_rename_restarts;
73 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
74     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
75     "Times rename had to restart due to lock contention");
76 
77 static int
78 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
79     struct vnode **rvp)
80 {
81 
82 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
83 }
84 
85 static int
86 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
87 {
88 	struct tmpfs_dirent *de;
89 	struct tmpfs_node *dnode, *pnode;
90 	struct tmpfs_mount *tm;
91 	int error;
92 
93 	dnode = VP_TO_TMPFS_DIR(dvp);
94 	*vpp = NULLVP;
95 
96 	/* Check accessibility of requested node as a first step. */
97 	error = vn_dir_check_exec(dvp, cnp);
98 	if (error != 0)
99 		goto out;
100 
101 	/* We cannot be requesting the parent directory of the root node. */
102 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
103 	    dnode->tn_dir.tn_parent == dnode,
104 	    !(cnp->cn_flags & ISDOTDOT)));
105 
106 	TMPFS_ASSERT_LOCKED(dnode);
107 	if (dnode->tn_dir.tn_parent == NULL) {
108 		error = ENOENT;
109 		goto out;
110 	}
111 	if (cnp->cn_flags & ISDOTDOT) {
112 		tm = VFS_TO_TMPFS(dvp->v_mount);
113 		pnode = dnode->tn_dir.tn_parent;
114 		tmpfs_ref_node(pnode);
115 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
116 		    pnode, cnp->cn_lkflags, vpp);
117 		tmpfs_free_node(tm, pnode);
118 		if (error != 0)
119 			goto out;
120 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
121 		VREF(dvp);
122 		*vpp = dvp;
123 		error = 0;
124 	} else {
125 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
126 		if (de != NULL && de->td_node == NULL)
127 			cnp->cn_flags |= ISWHITEOUT;
128 		if (de == NULL || de->td_node == NULL) {
129 			/*
130 			 * The entry was not found in the directory.
131 			 * This is OK if we are creating or renaming an
132 			 * entry and are working on the last component of
133 			 * the path name.
134 			 */
135 			if ((cnp->cn_flags & ISLASTCN) &&
136 			    (cnp->cn_nameiop == CREATE || \
137 			    cnp->cn_nameiop == RENAME ||
138 			    (cnp->cn_nameiop == DELETE &&
139 			    cnp->cn_flags & DOWHITEOUT &&
140 			    cnp->cn_flags & ISWHITEOUT))) {
141 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
142 				    cnp->cn_thread);
143 				if (error != 0)
144 					goto out;
145 
146 				/*
147 				 * Keep the component name in the buffer for
148 				 * future uses.
149 				 */
150 				cnp->cn_flags |= SAVENAME;
151 
152 				error = EJUSTRETURN;
153 			} else
154 				error = ENOENT;
155 		} else {
156 			struct tmpfs_node *tnode;
157 
158 			/*
159 			 * The entry was found, so get its associated
160 			 * tmpfs_node.
161 			 */
162 			tnode = de->td_node;
163 
164 			/*
165 			 * If we are not at the last path component and
166 			 * found a non-directory or non-link entry (which
167 			 * may itself be pointing to a directory), raise
168 			 * an error.
169 			 */
170 			if ((tnode->tn_type != VDIR &&
171 			    tnode->tn_type != VLNK) &&
172 			    !(cnp->cn_flags & ISLASTCN)) {
173 				error = ENOTDIR;
174 				goto out;
175 			}
176 
177 			/*
178 			 * If we are deleting or renaming the entry, keep
179 			 * track of its tmpfs_dirent so that it can be
180 			 * easily deleted later.
181 			 */
182 			if ((cnp->cn_flags & ISLASTCN) &&
183 			    (cnp->cn_nameiop == DELETE ||
184 			    cnp->cn_nameiop == RENAME)) {
185 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
186 				    cnp->cn_thread);
187 				if (error != 0)
188 					goto out;
189 
190 				/* Allocate a new vnode on the matching entry. */
191 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
192 				    cnp->cn_lkflags, vpp);
193 				if (error != 0)
194 					goto out;
195 
196 				if ((dnode->tn_mode & S_ISTXT) &&
197 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
198 				  cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN,
199 				  cnp->cn_cred, cnp->cn_thread)) {
200 					error = EPERM;
201 					vput(*vpp);
202 					*vpp = NULL;
203 					goto out;
204 				}
205 				cnp->cn_flags |= SAVENAME;
206 			} else {
207 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
208 				    cnp->cn_lkflags, vpp);
209 				if (error != 0)
210 					goto out;
211 			}
212 		}
213 	}
214 
215 	/*
216 	 * Store the result of this lookup in the cache.  Avoid this if the
217 	 * request was for creation, as it does not improve timings on
218 	 * emprical tests.
219 	 */
220 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
221 		cache_enter(dvp, *vpp, cnp);
222 
223 out:
224 	/*
225 	 * If there were no errors, *vpp cannot be null and it must be
226 	 * locked.
227 	 */
228 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
229 
230 	return (error);
231 }
232 
233 static int
234 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
235 {
236 
237 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
238 }
239 
240 static int
241 tmpfs_lookup(struct vop_lookup_args *v)
242 {
243 
244 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
245 }
246 
247 static int
248 tmpfs_create(struct vop_create_args *v)
249 {
250 	struct vnode *dvp = v->a_dvp;
251 	struct vnode **vpp = v->a_vpp;
252 	struct componentname *cnp = v->a_cnp;
253 	struct vattr *vap = v->a_vap;
254 	int error;
255 
256 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
257 
258 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
259 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
260 		cache_enter(dvp, *vpp, cnp);
261 	return (error);
262 }
263 
264 static int
265 tmpfs_mknod(struct vop_mknod_args *v)
266 {
267 	struct vnode *dvp = v->a_dvp;
268 	struct vnode **vpp = v->a_vpp;
269 	struct componentname *cnp = v->a_cnp;
270 	struct vattr *vap = v->a_vap;
271 
272 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
273 	    vap->va_type != VFIFO)
274 		return EINVAL;
275 
276 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
277 }
278 
279 static int
280 tmpfs_open(struct vop_open_args *v)
281 {
282 	struct vnode *vp = v->a_vp;
283 	int mode = v->a_mode;
284 
285 	int error;
286 	struct tmpfs_node *node;
287 
288 	MPASS(VOP_ISLOCKED(vp));
289 
290 	node = VP_TO_TMPFS_NODE(vp);
291 
292 	/* The file is still active but all its names have been removed
293 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
294 	 * it is about to die. */
295 	if (node->tn_links < 1)
296 		return (ENOENT);
297 
298 	/* If the file is marked append-only, deny write requests. */
299 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
300 		error = EPERM;
301 	else {
302 		error = 0;
303 		/* For regular files, the call below is nop. */
304 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
305 		    OBJ_DEAD) == 0, ("dead object"));
306 		vnode_create_vobject(vp, node->tn_size, v->a_td);
307 	}
308 
309 	MPASS(VOP_ISLOCKED(vp));
310 	return error;
311 }
312 
313 static int
314 tmpfs_close(struct vop_close_args *v)
315 {
316 	struct vnode *vp = v->a_vp;
317 
318 	/* Update node times. */
319 	tmpfs_update(vp);
320 
321 	return (0);
322 }
323 
324 /*
325  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
326  * the comment above cache_fplookup for details.
327  */
328 int
329 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
330 {
331 	struct vnode *vp;
332 	struct tmpfs_node *node;
333 	struct ucred *cred;
334 	mode_t all_x, mode;
335 
336 	vp = v->a_vp;
337 	node = VP_TO_TMPFS_NODE_SMR(vp);
338 	if (__predict_false(node == NULL))
339 		return (EAGAIN);
340 
341 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
342 	mode = atomic_load_short(&node->tn_mode);
343 	if (__predict_true((mode & all_x) == all_x))
344 		return (0);
345 
346 	cred = v->a_cred;
347 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
348 }
349 
350 int
351 tmpfs_access(struct vop_access_args *v)
352 {
353 	struct vnode *vp = v->a_vp;
354 	accmode_t accmode = v->a_accmode;
355 	struct ucred *cred = v->a_cred;
356 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
357 	int error;
358 	struct tmpfs_node *node;
359 
360 	MPASS(VOP_ISLOCKED(vp));
361 
362 	node = VP_TO_TMPFS_NODE(vp);
363 
364 	/*
365 	 * Common case path lookup.
366 	 */
367 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
368 		return (0);
369 
370 	switch (vp->v_type) {
371 	case VDIR:
372 		/* FALLTHROUGH */
373 	case VLNK:
374 		/* FALLTHROUGH */
375 	case VREG:
376 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
377 			error = EROFS;
378 			goto out;
379 		}
380 		break;
381 
382 	case VBLK:
383 		/* FALLTHROUGH */
384 	case VCHR:
385 		/* FALLTHROUGH */
386 	case VSOCK:
387 		/* FALLTHROUGH */
388 	case VFIFO:
389 		break;
390 
391 	default:
392 		error = EINVAL;
393 		goto out;
394 	}
395 
396 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
397 		error = EPERM;
398 		goto out;
399 	}
400 
401 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
402 	    accmode, cred);
403 
404 out:
405 	MPASS(VOP_ISLOCKED(vp));
406 
407 	return error;
408 }
409 
410 int
411 tmpfs_stat(struct vop_stat_args *v)
412 {
413 	struct vnode *vp = v->a_vp;
414 	struct stat *sb = v->a_sb;
415 	vm_object_t obj;
416 	struct tmpfs_node *node;
417 	int error;
418 
419 	node = VP_TO_TMPFS_NODE(vp);
420 
421 	tmpfs_update_getattr(vp);
422 
423 	error = vop_stat_helper_pre(v);
424 	if (__predict_false(error))
425 		return (error);
426 
427 	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
428 	sb->st_ino = node->tn_id;
429 	sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
430 	sb->st_nlink = node->tn_links;
431 	sb->st_uid = node->tn_uid;
432 	sb->st_gid = node->tn_gid;
433 	sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
434 		node->tn_rdev : NODEV;
435 	sb->st_size = node->tn_size;
436 	sb->st_atim.tv_sec = node->tn_atime.tv_sec;
437 	sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
438 	sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
439 	sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
440 	sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
441 	sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
442 	sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
443 	sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
444 	sb->st_blksize = PAGE_SIZE;
445 	sb->st_flags = node->tn_flags;
446 	sb->st_gen = node->tn_gen;
447 	if (vp->v_type == VREG) {
448 		obj = node->tn_reg.tn_aobj;
449 		sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
450 	} else
451 		sb->st_blocks = node->tn_size;
452 	sb->st_blocks /= S_BLKSIZE;
453 	return (vop_stat_helper_post(v, error));
454 }
455 
456 int
457 tmpfs_getattr(struct vop_getattr_args *v)
458 {
459 	struct vnode *vp = v->a_vp;
460 	struct vattr *vap = v->a_vap;
461 	vm_object_t obj;
462 	struct tmpfs_node *node;
463 
464 	node = VP_TO_TMPFS_NODE(vp);
465 
466 	tmpfs_update_getattr(vp);
467 
468 	vap->va_type = vp->v_type;
469 	vap->va_mode = node->tn_mode;
470 	vap->va_nlink = node->tn_links;
471 	vap->va_uid = node->tn_uid;
472 	vap->va_gid = node->tn_gid;
473 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
474 	vap->va_fileid = node->tn_id;
475 	vap->va_size = node->tn_size;
476 	vap->va_blocksize = PAGE_SIZE;
477 	vap->va_atime = node->tn_atime;
478 	vap->va_mtime = node->tn_mtime;
479 	vap->va_ctime = node->tn_ctime;
480 	vap->va_birthtime = node->tn_birthtime;
481 	vap->va_gen = node->tn_gen;
482 	vap->va_flags = node->tn_flags;
483 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
484 		node->tn_rdev : NODEV;
485 	if (vp->v_type == VREG) {
486 		obj = node->tn_reg.tn_aobj;
487 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
488 	} else
489 		vap->va_bytes = node->tn_size;
490 	vap->va_filerev = 0;
491 
492 	return 0;
493 }
494 
495 int
496 tmpfs_setattr(struct vop_setattr_args *v)
497 {
498 	struct vnode *vp = v->a_vp;
499 	struct vattr *vap = v->a_vap;
500 	struct ucred *cred = v->a_cred;
501 	struct thread *td = curthread;
502 
503 	int error;
504 
505 	MPASS(VOP_ISLOCKED(vp));
506 	ASSERT_VOP_IN_SEQC(vp);
507 
508 	error = 0;
509 
510 	/* Abort if any unsettable attribute is given. */
511 	if (vap->va_type != VNON ||
512 	    vap->va_nlink != VNOVAL ||
513 	    vap->va_fsid != VNOVAL ||
514 	    vap->va_fileid != VNOVAL ||
515 	    vap->va_blocksize != VNOVAL ||
516 	    vap->va_gen != VNOVAL ||
517 	    vap->va_rdev != VNOVAL ||
518 	    vap->va_bytes != VNOVAL)
519 		error = EINVAL;
520 
521 	if (error == 0 && (vap->va_flags != VNOVAL))
522 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
523 
524 	if (error == 0 && (vap->va_size != VNOVAL))
525 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
526 
527 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
528 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
529 
530 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
531 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
532 
533 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
534 	    vap->va_atime.tv_nsec != VNOVAL) ||
535 	    (vap->va_mtime.tv_sec != VNOVAL &&
536 	    vap->va_mtime.tv_nsec != VNOVAL) ||
537 	    (vap->va_birthtime.tv_sec != VNOVAL &&
538 	    vap->va_birthtime.tv_nsec != VNOVAL)))
539 		error = tmpfs_chtimes(vp, vap, cred, td);
540 
541 	/* Update the node times.  We give preference to the error codes
542 	 * generated by this function rather than the ones that may arise
543 	 * from tmpfs_update. */
544 	tmpfs_update(vp);
545 
546 	MPASS(VOP_ISLOCKED(vp));
547 
548 	return error;
549 }
550 
551 static int
552 tmpfs_read(struct vop_read_args *v)
553 {
554 	struct vnode *vp;
555 	struct uio *uio;
556 	struct tmpfs_node *node;
557 
558 	vp = v->a_vp;
559 	if (vp->v_type != VREG)
560 		return (EISDIR);
561 	uio = v->a_uio;
562 	if (uio->uio_offset < 0)
563 		return (EINVAL);
564 	node = VP_TO_TMPFS_NODE(vp);
565 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
566 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
567 }
568 
569 static int
570 tmpfs_write(struct vop_write_args *v)
571 {
572 	struct vnode *vp;
573 	struct uio *uio;
574 	struct tmpfs_node *node;
575 	off_t oldsize;
576 	int error, ioflag;
577 	mode_t newmode;
578 
579 	vp = v->a_vp;
580 	uio = v->a_uio;
581 	ioflag = v->a_ioflag;
582 	error = 0;
583 	node = VP_TO_TMPFS_NODE(vp);
584 	oldsize = node->tn_size;
585 
586 	if (uio->uio_offset < 0 || vp->v_type != VREG)
587 		return (EINVAL);
588 	if (uio->uio_resid == 0)
589 		return (0);
590 	if (ioflag & IO_APPEND)
591 		uio->uio_offset = node->tn_size;
592 	if (uio->uio_offset + uio->uio_resid >
593 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
594 		return (EFBIG);
595 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
596 		return (EFBIG);
597 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
598 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
599 		    FALSE);
600 		if (error != 0)
601 			goto out;
602 	}
603 
604 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
605 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
606 	    TMPFS_NODE_CHANGED;
607 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
608 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
609 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
610 			vn_seqc_write_begin(vp);
611 			atomic_store_short(&node->tn_mode, newmode);
612 			vn_seqc_write_end(vp);
613 		}
614 	}
615 	if (error != 0)
616 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
617 
618 out:
619 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
620 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
621 
622 	return (error);
623 }
624 
625 static int
626 tmpfs_fsync(struct vop_fsync_args *v)
627 {
628 	struct vnode *vp = v->a_vp;
629 
630 	MPASS(VOP_ISLOCKED(vp));
631 
632 	tmpfs_check_mtime(vp);
633 	tmpfs_update(vp);
634 
635 	return 0;
636 }
637 
638 static int
639 tmpfs_remove(struct vop_remove_args *v)
640 {
641 	struct vnode *dvp = v->a_dvp;
642 	struct vnode *vp = v->a_vp;
643 
644 	int error;
645 	struct tmpfs_dirent *de;
646 	struct tmpfs_mount *tmp;
647 	struct tmpfs_node *dnode;
648 	struct tmpfs_node *node;
649 
650 	MPASS(VOP_ISLOCKED(dvp));
651 	MPASS(VOP_ISLOCKED(vp));
652 
653 	if (vp->v_type == VDIR) {
654 		error = EISDIR;
655 		goto out;
656 	}
657 
658 	dnode = VP_TO_TMPFS_DIR(dvp);
659 	node = VP_TO_TMPFS_NODE(vp);
660 	tmp = VFS_TO_TMPFS(vp->v_mount);
661 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
662 	MPASS(de != NULL);
663 
664 	/* Files marked as immutable or append-only cannot be deleted. */
665 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
666 	    (dnode->tn_flags & APPEND)) {
667 		error = EPERM;
668 		goto out;
669 	}
670 
671 	/* Remove the entry from the directory; as it is a file, we do not
672 	 * have to change the number of hard links of the directory. */
673 	tmpfs_dir_detach(dvp, de);
674 	if (v->a_cnp->cn_flags & DOWHITEOUT)
675 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
676 
677 	/* Free the directory entry we just deleted.  Note that the node
678 	 * referred by it will not be removed until the vnode is really
679 	 * reclaimed. */
680 	tmpfs_free_dirent(tmp, de);
681 
682 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
683 	error = 0;
684 
685 out:
686 
687 	return error;
688 }
689 
690 static int
691 tmpfs_link(struct vop_link_args *v)
692 {
693 	struct vnode *dvp = v->a_tdvp;
694 	struct vnode *vp = v->a_vp;
695 	struct componentname *cnp = v->a_cnp;
696 
697 	int error;
698 	struct tmpfs_dirent *de;
699 	struct tmpfs_node *node;
700 
701 	MPASS(VOP_ISLOCKED(dvp));
702 	MPASS(cnp->cn_flags & HASBUF);
703 	MPASS(dvp != vp); /* XXX When can this be false? */
704 	node = VP_TO_TMPFS_NODE(vp);
705 
706 	/* Ensure that we do not overflow the maximum number of links imposed
707 	 * by the system. */
708 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
709 	if (node->tn_links == TMPFS_LINK_MAX) {
710 		error = EMLINK;
711 		goto out;
712 	}
713 
714 	/* We cannot create links of files marked immutable or append-only. */
715 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
716 		error = EPERM;
717 		goto out;
718 	}
719 
720 	/* Allocate a new directory entry to represent the node. */
721 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
722 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
723 	if (error != 0)
724 		goto out;
725 
726 	/* Insert the new directory entry into the appropriate directory. */
727 	if (cnp->cn_flags & ISWHITEOUT)
728 		tmpfs_dir_whiteout_remove(dvp, cnp);
729 	tmpfs_dir_attach(dvp, de);
730 
731 	/* vp link count has changed, so update node times. */
732 	node->tn_status |= TMPFS_NODE_CHANGED;
733 	tmpfs_update(vp);
734 
735 	error = 0;
736 
737 out:
738 	return error;
739 }
740 
741 /*
742  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
743  * fail to acquire any lock in the path we will drop all held locks,
744  * acquire the new lock in a blocking fashion, and then release it and
745  * restart the rename.  This acquire/release step ensures that we do not
746  * spin on a lock waiting for release.  On error release all vnode locks
747  * and decrement references the way tmpfs_rename() would do.
748  */
749 static int
750 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
751     struct vnode *tdvp, struct vnode **tvpp,
752     struct componentname *fcnp, struct componentname *tcnp)
753 {
754 	struct vnode *nvp;
755 	struct mount *mp;
756 	struct tmpfs_dirent *de;
757 	int error, restarts = 0;
758 
759 	VOP_UNLOCK(tdvp);
760 	if (*tvpp != NULL && *tvpp != tdvp)
761 		VOP_UNLOCK(*tvpp);
762 	mp = fdvp->v_mount;
763 
764 relock:
765 	restarts += 1;
766 	error = vn_lock(fdvp, LK_EXCLUSIVE);
767 	if (error)
768 		goto releout;
769 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
770 		VOP_UNLOCK(fdvp);
771 		error = vn_lock(tdvp, LK_EXCLUSIVE);
772 		if (error)
773 			goto releout;
774 		VOP_UNLOCK(tdvp);
775 		goto relock;
776 	}
777 	/*
778 	 * Re-resolve fvp to be certain it still exists and fetch the
779 	 * correct vnode.
780 	 */
781 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
782 	if (de == NULL) {
783 		VOP_UNLOCK(fdvp);
784 		VOP_UNLOCK(tdvp);
785 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
786 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
787 			error = EINVAL;
788 		else
789 			error = ENOENT;
790 		goto releout;
791 	}
792 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
793 	if (error != 0) {
794 		VOP_UNLOCK(fdvp);
795 		VOP_UNLOCK(tdvp);
796 		if (error != EBUSY)
797 			goto releout;
798 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
799 		if (error != 0)
800 			goto releout;
801 		VOP_UNLOCK(nvp);
802 		/*
803 		 * Concurrent rename race.
804 		 */
805 		if (nvp == tdvp) {
806 			vrele(nvp);
807 			error = EINVAL;
808 			goto releout;
809 		}
810 		vrele(*fvpp);
811 		*fvpp = nvp;
812 		goto relock;
813 	}
814 	vrele(*fvpp);
815 	*fvpp = nvp;
816 	VOP_UNLOCK(*fvpp);
817 	/*
818 	 * Re-resolve tvp and acquire the vnode lock if present.
819 	 */
820 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
821 	/*
822 	 * If tvp disappeared we just carry on.
823 	 */
824 	if (de == NULL && *tvpp != NULL) {
825 		vrele(*tvpp);
826 		*tvpp = NULL;
827 	}
828 	/*
829 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
830 	 * if the non-blocking acquire fails.
831 	 */
832 	if (de != NULL) {
833 		nvp = NULL;
834 		error = tmpfs_alloc_vp(mp, de->td_node,
835 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
836 		if (*tvpp != NULL)
837 			vrele(*tvpp);
838 		*tvpp = nvp;
839 		if (error != 0) {
840 			VOP_UNLOCK(fdvp);
841 			VOP_UNLOCK(tdvp);
842 			if (error != EBUSY)
843 				goto releout;
844 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
845 			    &nvp);
846 			if (error != 0)
847 				goto releout;
848 			VOP_UNLOCK(nvp);
849 			/*
850 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
851 			 */
852 			if (nvp == fdvp) {
853 				error = ENOTEMPTY;
854 				goto releout;
855 			}
856 			goto relock;
857 		}
858 	}
859 	tmpfs_rename_restarts += restarts;
860 
861 	return (0);
862 
863 releout:
864 	vrele(fdvp);
865 	vrele(*fvpp);
866 	vrele(tdvp);
867 	if (*tvpp != NULL)
868 		vrele(*tvpp);
869 	tmpfs_rename_restarts += restarts;
870 
871 	return (error);
872 }
873 
874 static int
875 tmpfs_rename(struct vop_rename_args *v)
876 {
877 	struct vnode *fdvp = v->a_fdvp;
878 	struct vnode *fvp = v->a_fvp;
879 	struct componentname *fcnp = v->a_fcnp;
880 	struct vnode *tdvp = v->a_tdvp;
881 	struct vnode *tvp = v->a_tvp;
882 	struct componentname *tcnp = v->a_tcnp;
883 	char *newname;
884 	struct tmpfs_dirent *de;
885 	struct tmpfs_mount *tmp;
886 	struct tmpfs_node *fdnode;
887 	struct tmpfs_node *fnode;
888 	struct tmpfs_node *tnode;
889 	struct tmpfs_node *tdnode;
890 	int error;
891 	bool want_seqc_end;
892 
893 	MPASS(VOP_ISLOCKED(tdvp));
894 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
895 	MPASS(fcnp->cn_flags & HASBUF);
896 	MPASS(tcnp->cn_flags & HASBUF);
897 
898 	want_seqc_end = false;
899 
900 	/*
901 	 * Disallow cross-device renames.
902 	 * XXX Why isn't this done by the caller?
903 	 */
904 	if (fvp->v_mount != tdvp->v_mount ||
905 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
906 		error = EXDEV;
907 		goto out;
908 	}
909 
910 	/* If source and target are the same file, there is nothing to do. */
911 	if (fvp == tvp) {
912 		error = 0;
913 		goto out;
914 	}
915 
916 	/*
917 	 * If we need to move the directory between entries, lock the
918 	 * source so that we can safely operate on it.
919 	 */
920 	if (fdvp != tdvp && fdvp != tvp) {
921 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
922 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
923 			    fcnp, tcnp);
924 			if (error != 0)
925 				return (error);
926 			ASSERT_VOP_ELOCKED(fdvp,
927 			    "tmpfs_rename: fdvp not locked");
928 			ASSERT_VOP_ELOCKED(tdvp,
929 			    "tmpfs_rename: tdvp not locked");
930 			if (tvp != NULL)
931 				ASSERT_VOP_ELOCKED(tvp,
932 				    "tmpfs_rename: tvp not locked");
933 			if (fvp == tvp) {
934 				error = 0;
935 				goto out_locked;
936 			}
937 		}
938 	}
939 
940 	if (tvp != NULL)
941 		vn_seqc_write_begin(tvp);
942 	vn_seqc_write_begin(tdvp);
943 	vn_seqc_write_begin(fvp);
944 	vn_seqc_write_begin(fdvp);
945 	want_seqc_end = true;
946 
947 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
948 	tdnode = VP_TO_TMPFS_DIR(tdvp);
949 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
950 	fdnode = VP_TO_TMPFS_DIR(fdvp);
951 	fnode = VP_TO_TMPFS_NODE(fvp);
952 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
953 
954 	/*
955 	 * Entry can disappear before we lock fdvp,
956 	 * also avoid manipulating '.' and '..' entries.
957 	 */
958 	if (de == NULL) {
959 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
960 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
961 			error = EINVAL;
962 		else
963 			error = ENOENT;
964 		goto out_locked;
965 	}
966 	MPASS(de->td_node == fnode);
967 
968 	/*
969 	 * If re-naming a directory to another preexisting directory
970 	 * ensure that the target directory is empty so that its
971 	 * removal causes no side effects.
972 	 * Kern_rename guarantees the destination to be a directory
973 	 * if the source is one.
974 	 */
975 	if (tvp != NULL) {
976 		MPASS(tnode != NULL);
977 
978 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
979 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
980 			error = EPERM;
981 			goto out_locked;
982 		}
983 
984 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
985 			if (tnode->tn_size > 0) {
986 				error = ENOTEMPTY;
987 				goto out_locked;
988 			}
989 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
990 			error = ENOTDIR;
991 			goto out_locked;
992 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
993 			error = EISDIR;
994 			goto out_locked;
995 		} else {
996 			MPASS(fnode->tn_type != VDIR &&
997 				tnode->tn_type != VDIR);
998 		}
999 	}
1000 
1001 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1002 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1003 		error = EPERM;
1004 		goto out_locked;
1005 	}
1006 
1007 	/*
1008 	 * Ensure that we have enough memory to hold the new name, if it
1009 	 * has to be changed.
1010 	 */
1011 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
1012 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1013 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1014 	} else
1015 		newname = NULL;
1016 
1017 	/*
1018 	 * If the node is being moved to another directory, we have to do
1019 	 * the move.
1020 	 */
1021 	if (fdnode != tdnode) {
1022 		/*
1023 		 * In case we are moving a directory, we have to adjust its
1024 		 * parent to point to the new parent.
1025 		 */
1026 		if (de->td_node->tn_type == VDIR) {
1027 			struct tmpfs_node *n;
1028 
1029 			/*
1030 			 * Ensure the target directory is not a child of the
1031 			 * directory being moved.  Otherwise, we'd end up
1032 			 * with stale nodes.
1033 			 */
1034 			n = tdnode;
1035 			/*
1036 			 * TMPFS_LOCK guaranties that no nodes are freed while
1037 			 * traversing the list. Nodes can only be marked as
1038 			 * removed: tn_parent == NULL.
1039 			 */
1040 			TMPFS_LOCK(tmp);
1041 			TMPFS_NODE_LOCK(n);
1042 			while (n != n->tn_dir.tn_parent) {
1043 				struct tmpfs_node *parent;
1044 
1045 				if (n == fnode) {
1046 					TMPFS_NODE_UNLOCK(n);
1047 					TMPFS_UNLOCK(tmp);
1048 					error = EINVAL;
1049 					if (newname != NULL)
1050 						    free(newname, M_TMPFSNAME);
1051 					goto out_locked;
1052 				}
1053 				parent = n->tn_dir.tn_parent;
1054 				TMPFS_NODE_UNLOCK(n);
1055 				if (parent == NULL) {
1056 					n = NULL;
1057 					break;
1058 				}
1059 				TMPFS_NODE_LOCK(parent);
1060 				if (parent->tn_dir.tn_parent == NULL) {
1061 					TMPFS_NODE_UNLOCK(parent);
1062 					n = NULL;
1063 					break;
1064 				}
1065 				n = parent;
1066 			}
1067 			TMPFS_UNLOCK(tmp);
1068 			if (n == NULL) {
1069 				error = EINVAL;
1070 				if (newname != NULL)
1071 					    free(newname, M_TMPFSNAME);
1072 				goto out_locked;
1073 			}
1074 			TMPFS_NODE_UNLOCK(n);
1075 
1076 			/* Adjust the parent pointer. */
1077 			TMPFS_VALIDATE_DIR(fnode);
1078 			TMPFS_NODE_LOCK(de->td_node);
1079 			de->td_node->tn_dir.tn_parent = tdnode;
1080 			TMPFS_NODE_UNLOCK(de->td_node);
1081 
1082 			/*
1083 			 * As a result of changing the target of the '..'
1084 			 * entry, the link count of the source and target
1085 			 * directories has to be adjusted.
1086 			 */
1087 			TMPFS_NODE_LOCK(tdnode);
1088 			TMPFS_ASSERT_LOCKED(tdnode);
1089 			tdnode->tn_links++;
1090 			TMPFS_NODE_UNLOCK(tdnode);
1091 
1092 			TMPFS_NODE_LOCK(fdnode);
1093 			TMPFS_ASSERT_LOCKED(fdnode);
1094 			fdnode->tn_links--;
1095 			TMPFS_NODE_UNLOCK(fdnode);
1096 		}
1097 	}
1098 
1099 	/*
1100 	 * Do the move: just remove the entry from the source directory
1101 	 * and insert it into the target one.
1102 	 */
1103 	tmpfs_dir_detach(fdvp, de);
1104 
1105 	if (fcnp->cn_flags & DOWHITEOUT)
1106 		tmpfs_dir_whiteout_add(fdvp, fcnp);
1107 	if (tcnp->cn_flags & ISWHITEOUT)
1108 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
1109 
1110 	/*
1111 	 * If the name has changed, we need to make it effective by changing
1112 	 * it in the directory entry.
1113 	 */
1114 	if (newname != NULL) {
1115 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1116 
1117 		free(de->ud.td_name, M_TMPFSNAME);
1118 		de->ud.td_name = newname;
1119 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1120 
1121 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1122 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1123 	}
1124 
1125 	/*
1126 	 * If we are overwriting an entry, we have to remove the old one
1127 	 * from the target directory.
1128 	 */
1129 	if (tvp != NULL) {
1130 		struct tmpfs_dirent *tde;
1131 
1132 		/* Remove the old entry from the target directory. */
1133 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1134 		tmpfs_dir_detach(tdvp, tde);
1135 
1136 		/*
1137 		 * Free the directory entry we just deleted.  Note that the
1138 		 * node referred by it will not be removed until the vnode is
1139 		 * really reclaimed.
1140 		 */
1141 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1142 	}
1143 
1144 	tmpfs_dir_attach(tdvp, de);
1145 
1146 	if (tmpfs_use_nc(fvp)) {
1147 		cache_purge(fvp);
1148 		if (tvp != NULL)
1149 			cache_purge(tvp);
1150 		cache_purge_negative(tdvp);
1151 	}
1152 
1153 	error = 0;
1154 
1155 out_locked:
1156 	if (fdvp != tdvp && fdvp != tvp)
1157 		VOP_UNLOCK(fdvp);
1158 
1159 out:
1160 	if (want_seqc_end) {
1161 		if (tvp != NULL)
1162 			vn_seqc_write_end(tvp);
1163 		vn_seqc_write_end(tdvp);
1164 		vn_seqc_write_end(fvp);
1165 		vn_seqc_write_end(fdvp);
1166 	}
1167 
1168 	/*
1169 	 * Release target nodes.
1170 	 * XXX: I don't understand when tdvp can be the same as tvp, but
1171 	 * other code takes care of this...
1172 	 */
1173 	if (tdvp == tvp)
1174 		vrele(tdvp);
1175 	else
1176 		vput(tdvp);
1177 	if (tvp != NULL)
1178 		vput(tvp);
1179 
1180 	/* Release source nodes. */
1181 	vrele(fdvp);
1182 	vrele(fvp);
1183 
1184 	return (error);
1185 }
1186 
1187 static int
1188 tmpfs_mkdir(struct vop_mkdir_args *v)
1189 {
1190 	struct vnode *dvp = v->a_dvp;
1191 	struct vnode **vpp = v->a_vpp;
1192 	struct componentname *cnp = v->a_cnp;
1193 	struct vattr *vap = v->a_vap;
1194 
1195 	MPASS(vap->va_type == VDIR);
1196 
1197 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
1198 }
1199 
1200 static int
1201 tmpfs_rmdir(struct vop_rmdir_args *v)
1202 {
1203 	struct vnode *dvp = v->a_dvp;
1204 	struct vnode *vp = v->a_vp;
1205 
1206 	int error;
1207 	struct tmpfs_dirent *de;
1208 	struct tmpfs_mount *tmp;
1209 	struct tmpfs_node *dnode;
1210 	struct tmpfs_node *node;
1211 
1212 	MPASS(VOP_ISLOCKED(dvp));
1213 	MPASS(VOP_ISLOCKED(vp));
1214 
1215 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1216 	dnode = VP_TO_TMPFS_DIR(dvp);
1217 	node = VP_TO_TMPFS_DIR(vp);
1218 
1219 	/* Directories with more than two entries ('.' and '..') cannot be
1220 	 * removed. */
1221 	 if (node->tn_size > 0) {
1222 		 error = ENOTEMPTY;
1223 		 goto out;
1224 	 }
1225 
1226 	if ((dnode->tn_flags & APPEND)
1227 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1228 		error = EPERM;
1229 		goto out;
1230 	}
1231 
1232 	/* This invariant holds only if we are not trying to remove "..".
1233 	  * We checked for that above so this is safe now. */
1234 	MPASS(node->tn_dir.tn_parent == dnode);
1235 
1236 	/* Get the directory entry associated with node (vp).  This was
1237 	 * filled by tmpfs_lookup while looking up the entry. */
1238 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1239 	MPASS(TMPFS_DIRENT_MATCHES(de,
1240 	    v->a_cnp->cn_nameptr,
1241 	    v->a_cnp->cn_namelen));
1242 
1243 	/* Check flags to see if we are allowed to remove the directory. */
1244 	if ((dnode->tn_flags & APPEND) != 0 ||
1245 	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
1246 		error = EPERM;
1247 		goto out;
1248 	}
1249 
1250 
1251 	/* Detach the directory entry from the directory (dnode). */
1252 	tmpfs_dir_detach(dvp, de);
1253 	if (v->a_cnp->cn_flags & DOWHITEOUT)
1254 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1255 
1256 	/* No vnode should be allocated for this entry from this point */
1257 	TMPFS_NODE_LOCK(node);
1258 	node->tn_links--;
1259 	node->tn_dir.tn_parent = NULL;
1260 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1261 	    TMPFS_NODE_MODIFIED;
1262 
1263 	TMPFS_NODE_UNLOCK(node);
1264 
1265 	TMPFS_NODE_LOCK(dnode);
1266 	dnode->tn_links--;
1267 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1268 	    TMPFS_NODE_MODIFIED;
1269 	TMPFS_NODE_UNLOCK(dnode);
1270 
1271 	if (tmpfs_use_nc(dvp)) {
1272 		cache_purge(dvp);
1273 		cache_purge(vp);
1274 	}
1275 
1276 	/* Free the directory entry we just deleted.  Note that the node
1277 	 * referred by it will not be removed until the vnode is really
1278 	 * reclaimed. */
1279 	tmpfs_free_dirent(tmp, de);
1280 
1281 	/* Release the deleted vnode (will destroy the node, notify
1282 	 * interested parties and clean it from the cache). */
1283 
1284 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1285 	tmpfs_update(dvp);
1286 
1287 	error = 0;
1288 
1289 out:
1290 	return error;
1291 }
1292 
1293 static int
1294 tmpfs_symlink(struct vop_symlink_args *v)
1295 {
1296 	struct vnode *dvp = v->a_dvp;
1297 	struct vnode **vpp = v->a_vpp;
1298 	struct componentname *cnp = v->a_cnp;
1299 	struct vattr *vap = v->a_vap;
1300 	const char *target = v->a_target;
1301 
1302 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1303 	MPASS(vap->va_type == VLNK);
1304 #else
1305 	vap->va_type = VLNK;
1306 #endif
1307 
1308 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1309 }
1310 
1311 static int
1312 tmpfs_readdir(struct vop_readdir_args *va)
1313 {
1314 	struct vnode *vp;
1315 	struct uio *uio;
1316 	struct tmpfs_mount *tm;
1317 	struct tmpfs_node *node;
1318 	u_long **cookies;
1319 	int *eofflag, *ncookies;
1320 	ssize_t startresid;
1321 	int error, maxcookies;
1322 
1323 	vp = va->a_vp;
1324 	uio = va->a_uio;
1325 	eofflag = va->a_eofflag;
1326 	cookies = va->a_cookies;
1327 	ncookies = va->a_ncookies;
1328 
1329 	/* This operation only makes sense on directory nodes. */
1330 	if (vp->v_type != VDIR)
1331 		return ENOTDIR;
1332 
1333 	maxcookies = 0;
1334 	node = VP_TO_TMPFS_DIR(vp);
1335 	tm = VFS_TO_TMPFS(vp->v_mount);
1336 
1337 	startresid = uio->uio_resid;
1338 
1339 	/* Allocate cookies for NFS and compat modules. */
1340 	if (cookies != NULL && ncookies != NULL) {
1341 		maxcookies = howmany(node->tn_size,
1342 		    sizeof(struct tmpfs_dirent)) + 2;
1343 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
1344 		    M_WAITOK);
1345 		*ncookies = 0;
1346 	}
1347 
1348 	if (cookies == NULL)
1349 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
1350 	else
1351 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
1352 		    ncookies);
1353 
1354 	/* Buffer was filled without hitting EOF. */
1355 	if (error == EJUSTRETURN)
1356 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1357 
1358 	if (error != 0 && cookies != NULL && ncookies != NULL) {
1359 		free(*cookies, M_TEMP);
1360 		*cookies = NULL;
1361 		*ncookies = 0;
1362 	}
1363 
1364 	if (eofflag != NULL)
1365 		*eofflag =
1366 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1367 
1368 	return error;
1369 }
1370 
1371 static int
1372 tmpfs_readlink(struct vop_readlink_args *v)
1373 {
1374 	struct vnode *vp = v->a_vp;
1375 	struct uio *uio = v->a_uio;
1376 
1377 	int error;
1378 	struct tmpfs_node *node;
1379 
1380 	MPASS(uio->uio_offset == 0);
1381 	MPASS(vp->v_type == VLNK);
1382 
1383 	node = VP_TO_TMPFS_NODE(vp);
1384 
1385 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1386 	    uio);
1387 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
1388 
1389 	return (error);
1390 }
1391 
1392 static int
1393 tmpfs_inactive(struct vop_inactive_args *v)
1394 {
1395 	struct vnode *vp;
1396 	struct tmpfs_node *node;
1397 
1398 	vp = v->a_vp;
1399 	node = VP_TO_TMPFS_NODE(vp);
1400 	if (node->tn_links == 0)
1401 		vrecycle(vp);
1402 	else
1403 		tmpfs_check_mtime(vp);
1404 	return (0);
1405 }
1406 
1407 static int
1408 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
1409 {
1410 	struct vnode *vp;
1411 	struct tmpfs_node *node;
1412 	struct vm_object *obj;
1413 
1414 	vp = ap->a_vp;
1415 	node = VP_TO_TMPFS_NODE(vp);
1416 	if (node->tn_links == 0)
1417 		goto need;
1418 	if (vp->v_type == VREG) {
1419 		obj = vp->v_object;
1420 		if (obj->generation != obj->cleangeneration)
1421 			goto need;
1422 	}
1423 	return (0);
1424 need:
1425 	return (1);
1426 }
1427 
1428 int
1429 tmpfs_reclaim(struct vop_reclaim_args *v)
1430 {
1431 	struct vnode *vp = v->a_vp;
1432 
1433 	struct tmpfs_mount *tmp;
1434 	struct tmpfs_node *node;
1435 
1436 	node = VP_TO_TMPFS_NODE(vp);
1437 	tmp = VFS_TO_TMPFS(vp->v_mount);
1438 
1439 	if (vp->v_type == VREG)
1440 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
1441 	vp->v_object = NULL;
1442 	if (tmpfs_use_nc(vp))
1443 		cache_purge(vp);
1444 
1445 	TMPFS_NODE_LOCK(node);
1446 	tmpfs_free_vp(vp);
1447 
1448 	/* If the node referenced by this vnode was deleted by the user,
1449 	 * we must free its associated data structures (now that the vnode
1450 	 * is being reclaimed). */
1451 	if (node->tn_links == 0 &&
1452 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1453 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1454 		TMPFS_NODE_UNLOCK(node);
1455 		tmpfs_free_node(tmp, node);
1456 	} else
1457 		TMPFS_NODE_UNLOCK(node);
1458 
1459 	MPASS(vp->v_data == NULL);
1460 	return 0;
1461 }
1462 
1463 int
1464 tmpfs_print(struct vop_print_args *v)
1465 {
1466 	struct vnode *vp = v->a_vp;
1467 
1468 	struct tmpfs_node *node;
1469 
1470 	node = VP_TO_TMPFS_NODE(vp);
1471 
1472 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
1473 	    node, node->tn_flags, (uintmax_t)node->tn_links);
1474 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1475 	    node->tn_mode, node->tn_uid, node->tn_gid,
1476 	    (intmax_t)node->tn_size, node->tn_status);
1477 
1478 	if (vp->v_type == VFIFO)
1479 		fifo_printinfo(vp);
1480 
1481 	printf("\n");
1482 
1483 	return 0;
1484 }
1485 
1486 int
1487 tmpfs_pathconf(struct vop_pathconf_args *v)
1488 {
1489 	struct vnode *vp = v->a_vp;
1490 	int name = v->a_name;
1491 	long *retval = v->a_retval;
1492 
1493 	int error;
1494 
1495 	error = 0;
1496 
1497 	switch (name) {
1498 	case _PC_LINK_MAX:
1499 		*retval = TMPFS_LINK_MAX;
1500 		break;
1501 
1502 	case _PC_NAME_MAX:
1503 		*retval = NAME_MAX;
1504 		break;
1505 
1506 	case _PC_PIPE_BUF:
1507 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
1508 			*retval = PIPE_BUF;
1509 		else
1510 			error = EINVAL;
1511 		break;
1512 
1513 	case _PC_CHOWN_RESTRICTED:
1514 		*retval = 1;
1515 		break;
1516 
1517 	case _PC_NO_TRUNC:
1518 		*retval = 1;
1519 		break;
1520 
1521 	case _PC_SYNC_IO:
1522 		*retval = 1;
1523 		break;
1524 
1525 	case _PC_FILESIZEBITS:
1526 		*retval = 64;
1527 		break;
1528 
1529 	default:
1530 		error = vop_stdpathconf(v);
1531 	}
1532 
1533 	return error;
1534 }
1535 
1536 static int
1537 tmpfs_vptofh(struct vop_vptofh_args *ap)
1538 /*
1539 vop_vptofh {
1540 	IN struct vnode *a_vp;
1541 	IN struct fid *a_fhp;
1542 };
1543 */
1544 {
1545 	struct tmpfs_fid_data tfd;
1546 	struct tmpfs_node *node;
1547 	struct fid *fhp;
1548 
1549 	node = VP_TO_TMPFS_NODE(ap->a_vp);
1550 	fhp = ap->a_fhp;
1551 	fhp->fid_len = sizeof(tfd);
1552 
1553 	/*
1554 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
1555 	 * See the comment in sys/mount.h on struct fid for details.
1556 	 */
1557 	tfd.tfd_id = node->tn_id;
1558 	tfd.tfd_gen = node->tn_gen;
1559 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
1560 
1561 	return (0);
1562 }
1563 
1564 static int
1565 tmpfs_whiteout(struct vop_whiteout_args *ap)
1566 {
1567 	struct vnode *dvp = ap->a_dvp;
1568 	struct componentname *cnp = ap->a_cnp;
1569 	struct tmpfs_dirent *de;
1570 
1571 	switch (ap->a_flags) {
1572 	case LOOKUP:
1573 		return (0);
1574 	case CREATE:
1575 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1576 		if (de != NULL)
1577 			return (de->td_node == NULL ? 0 : EEXIST);
1578 		return (tmpfs_dir_whiteout_add(dvp, cnp));
1579 	case DELETE:
1580 		tmpfs_dir_whiteout_remove(dvp, cnp);
1581 		return (0);
1582 	default:
1583 		panic("tmpfs_whiteout: unknown op");
1584 	}
1585 }
1586 
1587 static int
1588 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
1589     struct tmpfs_dirent **pde)
1590 {
1591 	struct tmpfs_dir_cursor dc;
1592 	struct tmpfs_dirent *de;
1593 
1594 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
1595 	     de = tmpfs_dir_next(tnp, &dc)) {
1596 		if (de->td_node == tn) {
1597 			*pde = de;
1598 			return (0);
1599 		}
1600 	}
1601 	return (ENOENT);
1602 }
1603 
1604 static int
1605 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
1606     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
1607 {
1608 	struct tmpfs_dirent *de;
1609 	int error, i;
1610 
1611 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
1612 	    dvp);
1613 	if (error != 0)
1614 		return (error);
1615 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
1616 	if (error == 0) {
1617 		i = *buflen;
1618 		i -= de->td_namelen;
1619 		if (i < 0) {
1620 			error = ENOMEM;
1621 		} else {
1622 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
1623 			*buflen = i;
1624 		}
1625 	}
1626 	if (error == 0) {
1627 		if (vp != *dvp)
1628 			VOP_UNLOCK(*dvp);
1629 	} else {
1630 		if (vp != *dvp)
1631 			vput(*dvp);
1632 		else
1633 			vrele(vp);
1634 	}
1635 	return (error);
1636 }
1637 
1638 static int
1639 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
1640 {
1641 	struct vnode *vp, **dvp;
1642 	struct tmpfs_node *tn, *tnp, *tnp1;
1643 	struct tmpfs_dirent *de;
1644 	struct tmpfs_mount *tm;
1645 	char *buf;
1646 	size_t *buflen;
1647 	int error;
1648 
1649 	vp = ap->a_vp;
1650 	dvp = ap->a_vpp;
1651 	buf = ap->a_buf;
1652 	buflen = ap->a_buflen;
1653 
1654 	tm = VFS_TO_TMPFS(vp->v_mount);
1655 	tn = VP_TO_TMPFS_NODE(vp);
1656 	if (tn->tn_type == VDIR) {
1657 		tnp = tn->tn_dir.tn_parent;
1658 		if (tnp == NULL)
1659 			return (ENOENT);
1660 		tmpfs_ref_node(tnp);
1661 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
1662 		    buflen, dvp);
1663 		tmpfs_free_node(tm, tnp);
1664 		return (error);
1665 	}
1666 restart:
1667 	TMPFS_LOCK(tm);
1668 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
1669 		if (tnp->tn_type != VDIR)
1670 			continue;
1671 		TMPFS_NODE_LOCK(tnp);
1672 		tmpfs_ref_node_locked(tnp);
1673 
1674 		/*
1675 		 * tn_vnode cannot be instantiated while we hold the
1676 		 * node lock, so the directory cannot be changed while
1677 		 * we iterate over it.  Do this to avoid instantiating
1678 		 * vnode for directories which cannot point to our
1679 		 * node.
1680 		 */
1681 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
1682 		    &de) : 0;
1683 
1684 		if (error == 0) {
1685 			TMPFS_NODE_UNLOCK(tnp);
1686 			TMPFS_UNLOCK(tm);
1687 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
1688 			    dvp);
1689 			if (error == 0) {
1690 				tmpfs_free_node(tm, tnp);
1691 				return (0);
1692 			}
1693 			if (VN_IS_DOOMED(vp)) {
1694 				tmpfs_free_node(tm, tnp);
1695 				return (ENOENT);
1696 			}
1697 			TMPFS_LOCK(tm);
1698 			TMPFS_NODE_LOCK(tnp);
1699 		}
1700 		if (tmpfs_free_node_locked(tm, tnp, false)) {
1701 			goto restart;
1702 		} else {
1703 			KASSERT(tnp->tn_refcount > 0,
1704 			    ("node %p refcount zero", tnp));
1705 			tnp1 = LIST_NEXT(tnp, tn_entries);
1706 			TMPFS_NODE_UNLOCK(tnp);
1707 		}
1708 	}
1709 	TMPFS_UNLOCK(tm);
1710 	return (ENOENT);
1711 }
1712 
1713 /*
1714  * Vnode operations vector used for files stored in a tmpfs file system.
1715  */
1716 struct vop_vector tmpfs_vnodeop_entries = {
1717 	.vop_default =			&default_vnodeops,
1718 	.vop_lookup =			vfs_cache_lookup,
1719 	.vop_cachedlookup =		tmpfs_cached_lookup,
1720 	.vop_create =			tmpfs_create,
1721 	.vop_mknod =			tmpfs_mknod,
1722 	.vop_open =			tmpfs_open,
1723 	.vop_close =			tmpfs_close,
1724 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
1725 	.vop_access =			tmpfs_access,
1726 	.vop_stat =			tmpfs_stat,
1727 	.vop_getattr =			tmpfs_getattr,
1728 	.vop_setattr =			tmpfs_setattr,
1729 	.vop_read =			tmpfs_read,
1730 	.vop_write =			tmpfs_write,
1731 	.vop_fsync =			tmpfs_fsync,
1732 	.vop_remove =			tmpfs_remove,
1733 	.vop_link =			tmpfs_link,
1734 	.vop_rename =			tmpfs_rename,
1735 	.vop_mkdir =			tmpfs_mkdir,
1736 	.vop_rmdir =			tmpfs_rmdir,
1737 	.vop_symlink =			tmpfs_symlink,
1738 	.vop_readdir =			tmpfs_readdir,
1739 	.vop_readlink =			tmpfs_readlink,
1740 	.vop_inactive =			tmpfs_inactive,
1741 	.vop_need_inactive =		tmpfs_need_inactive,
1742 	.vop_reclaim =			tmpfs_reclaim,
1743 	.vop_print =			tmpfs_print,
1744 	.vop_pathconf =			tmpfs_pathconf,
1745 	.vop_vptofh =			tmpfs_vptofh,
1746 	.vop_whiteout =			tmpfs_whiteout,
1747 	.vop_bmap =			VOP_EOPNOTSUPP,
1748 	.vop_vptocnp =			tmpfs_vptocnp,
1749 	.vop_lock1 =			vop_lock,
1750 	.vop_unlock = 			vop_unlock,
1751 	.vop_islocked = 		vop_islocked,
1752 };
1753 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
1754 
1755 /*
1756  * Same vector for mounts which do not use namecache.
1757  */
1758 struct vop_vector tmpfs_vnodeop_nonc_entries = {
1759 	.vop_default =			&tmpfs_vnodeop_entries,
1760 	.vop_lookup =			tmpfs_lookup,
1761 };
1762 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
1763