xref: /freebsd/sys/fs/tmpfs/tmpfs_subr.c (revision 7c72c0822b946626909f5a538d66a43d63c778dc)
1ad3638eeSXin LI /*	$NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $	*/
2d1fa59e9SXin LI 
3e08d5567SXin LI /*-
4b61a5730SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
5d63027b6SPedro F. Giffuni  *
6d1fa59e9SXin LI  * Copyright (c) 2005 The NetBSD Foundation, Inc.
7d1fa59e9SXin LI  * All rights reserved.
8d1fa59e9SXin LI  *
9d1fa59e9SXin LI  * This code is derived from software contributed to The NetBSD Foundation
10d1fa59e9SXin LI  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11d1fa59e9SXin LI  * 2005 program.
12d1fa59e9SXin LI  *
13d1fa59e9SXin LI  * Redistribution and use in source and binary forms, with or without
14d1fa59e9SXin LI  * modification, are permitted provided that the following conditions
15d1fa59e9SXin LI  * are met:
16d1fa59e9SXin LI  * 1. Redistributions of source code must retain the above copyright
17d1fa59e9SXin LI  *    notice, this list of conditions and the following disclaimer.
18d1fa59e9SXin LI  * 2. Redistributions in binary form must reproduce the above copyright
19d1fa59e9SXin LI  *    notice, this list of conditions and the following disclaimer in the
20d1fa59e9SXin LI  *    documentation and/or other materials provided with the distribution.
21d1fa59e9SXin LI  *
22d1fa59e9SXin LI  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23d1fa59e9SXin LI  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24d1fa59e9SXin LI  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25d1fa59e9SXin LI  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26d1fa59e9SXin LI  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27d1fa59e9SXin LI  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28d1fa59e9SXin LI  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29d1fa59e9SXin LI  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30d1fa59e9SXin LI  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31d1fa59e9SXin LI  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32d1fa59e9SXin LI  * POSSIBILITY OF SUCH DAMAGE.
33d1fa59e9SXin LI  */
34d1fa59e9SXin LI 
35d1fa59e9SXin LI /*
36d1fa59e9SXin LI  * Efficient memory file system supporting functions.
37d1fa59e9SXin LI  */
38fdafd315SWarner Losh 
39d1fa59e9SXin LI #include <sys/param.h>
406d2e2df7SMark Johnston #include <sys/systm.h>
41135beaf6SGleb Smirnoff #include <sys/dirent.h>
424fd5efe7SGleb Kurtsou #include <sys/fnv_hash.h>
4389f6b863SAttilio Rao #include <sys/lock.h>
44135beaf6SGleb Smirnoff #include <sys/limits.h>
45135beaf6SGleb Smirnoff #include <sys/mount.h>
46d1fa59e9SXin LI #include <sys/namei.h>
47d1fa59e9SXin LI #include <sys/priv.h>
48d1fa59e9SXin LI #include <sys/proc.h>
49d1b06863SMark Murray #include <sys/random.h>
504601f5f5SKonstantin Belousov #include <sys/refcount.h>
5189f6b863SAttilio Rao #include <sys/rwlock.h>
52081e36e7SKonstantin Belousov #include <sys/smr.h>
53d1fa59e9SXin LI #include <sys/stat.h>
54db94ad12SGleb Kurtsou #include <sys/sysctl.h>
5528bc23abSKonstantin Belousov #include <sys/user.h>
56d1fa59e9SXin LI #include <sys/vnode.h>
57d1fa59e9SXin LI #include <sys/vmmeter.h>
58d1fa59e9SXin LI 
59d1fa59e9SXin LI #include <vm/vm.h>
601c771f92SKonstantin Belousov #include <vm/vm_param.h>
61d1fa59e9SXin LI #include <vm/vm_object.h>
62d1fa59e9SXin LI #include <vm/vm_page.h>
632971897dSAlan Cox #include <vm/vm_pageout.h>
64d1fa59e9SXin LI #include <vm/vm_pager.h>
65d1fa59e9SXin LI #include <vm/vm_extern.h>
66135beaf6SGleb Smirnoff #include <vm/swap_pager.h>
676bb132baSBrooks Davis #include <vm/uma.h>
68d1fa59e9SXin LI 
69d1fa59e9SXin LI #include <fs/tmpfs/tmpfs.h>
70d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_fifoops.h>
71d1fa59e9SXin LI #include <fs/tmpfs/tmpfs_vnops.h>
72d1fa59e9SXin LI 
737029da5cSPawel Biernacki SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
747029da5cSPawel Biernacki     "tmpfs file system");
75db94ad12SGleb Kurtsou 
76da7aa277SGleb Kurtsou static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
7763659234SMike Karels static long tmpfs_pages_avail_init;
7863659234SMike Karels static int tmpfs_mem_percent = TMPFS_MEM_PERCENT;
7963659234SMike Karels static void tmpfs_set_reserve_from_percent(void);
80da7aa277SGleb Kurtsou 
817c58c37eSMateusz Guzik MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure");
82a51c8071SKonstantin Belousov static uma_zone_t tmpfs_node_pool;
83172ffe70SMateusz Guzik VFS_SMR_DECLARE;
84a51c8071SKonstantin Belousov 
8528bc23abSKonstantin Belousov int tmpfs_pager_type = -1;
8628bc23abSKonstantin Belousov 
8728bc23abSKonstantin Belousov static vm_object_t
tmpfs_pager_alloc(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t offset,struct ucred * cred)8828bc23abSKonstantin Belousov tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
8928bc23abSKonstantin Belousov     vm_ooffset_t offset, struct ucred *cred)
9028bc23abSKonstantin Belousov {
9128bc23abSKonstantin Belousov 	vm_object_t object;
9228bc23abSKonstantin Belousov 
9328bc23abSKonstantin Belousov 	MPASS(handle == NULL);
9428bc23abSKonstantin Belousov 	MPASS(offset == 0);
9528bc23abSKonstantin Belousov 	object = vm_object_allocate_dyn(tmpfs_pager_type, size,
9628bc23abSKonstantin Belousov 	    OBJ_COLORED | OBJ_SWAP);
9728bc23abSKonstantin Belousov 	if (!swap_pager_init_object(object, NULL, NULL, size, 0)) {
9828bc23abSKonstantin Belousov 		vm_object_deallocate(object);
9928bc23abSKonstantin Belousov 		object = NULL;
10028bc23abSKonstantin Belousov 	}
10128bc23abSKonstantin Belousov 	return (object);
10228bc23abSKonstantin Belousov }
10328bc23abSKonstantin Belousov 
104eec2e4efSMateusz Guzik /*
105eec2e4efSMateusz Guzik  * Make sure tmpfs vnodes with writable mappings can be found on the lazy list.
106eec2e4efSMateusz Guzik  *
107eec2e4efSMateusz Guzik  * This allows for periodic mtime updates while only scanning vnodes which are
108eec2e4efSMateusz Guzik  * plausibly dirty, see tmpfs_update_mtime_lazy.
109eec2e4efSMateusz Guzik  */
110eec2e4efSMateusz Guzik static void
tmpfs_pager_writecount_recalc(vm_object_t object,vm_offset_t old,vm_offset_t new)111eec2e4efSMateusz Guzik tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old,
112eec2e4efSMateusz Guzik     vm_offset_t new)
113eec2e4efSMateusz Guzik {
114eec2e4efSMateusz Guzik 	struct vnode *vp;
115eec2e4efSMateusz Guzik 
116eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_WLOCKED(object);
117eec2e4efSMateusz Guzik 
118d9dc64f1SKonstantin Belousov 	vp = VM_TO_TMPFS_VP(object);
119eec2e4efSMateusz Guzik 
120eec2e4efSMateusz Guzik 	/*
121eec2e4efSMateusz Guzik 	 * Forced unmount?
122eec2e4efSMateusz Guzik 	 */
12346811949SKonstantin Belousov 	if (vp == NULL || vp->v_object == NULL) {
124eec2e4efSMateusz Guzik 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
1250f01fb01SKonstantin Belousov 		    ("object %p with OBJ_TMPFS_VREF but without vnode",
1260f01fb01SKonstantin Belousov 		    object));
127eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
128eec2e4efSMateusz Guzik 		return;
129eec2e4efSMateusz Guzik 	}
130eec2e4efSMateusz Guzik 
131eec2e4efSMateusz Guzik 	if (old == 0) {
132eec2e4efSMateusz Guzik 		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
133eec2e4efSMateusz Guzik 		    ("object without writable mappings has a reference"));
134eec2e4efSMateusz Guzik 		VNPASS(vp->v_usecount > 0, vp);
135eec2e4efSMateusz Guzik 	} else {
136eec2e4efSMateusz Guzik 		VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp,
1370f01fb01SKonstantin Belousov 		    ("object with writable mappings does not "
1380f01fb01SKonstantin Belousov 		    "have a reference"));
139eec2e4efSMateusz Guzik 	}
140eec2e4efSMateusz Guzik 
141eec2e4efSMateusz Guzik 	if (old == new) {
142eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
143eec2e4efSMateusz Guzik 		return;
144eec2e4efSMateusz Guzik 	}
145eec2e4efSMateusz Guzik 
146eec2e4efSMateusz Guzik 	if (new == 0) {
147eec2e4efSMateusz Guzik 		vm_object_clear_flag(object, OBJ_TMPFS_VREF);
148eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
149eec2e4efSMateusz Guzik 		vrele(vp);
150eec2e4efSMateusz Guzik 	} else {
151eec2e4efSMateusz Guzik 		if ((object->flags & OBJ_TMPFS_VREF) == 0) {
152eec2e4efSMateusz Guzik 			vref(vp);
153eec2e4efSMateusz Guzik 			vlazy(vp);
154eec2e4efSMateusz Guzik 			vm_object_set_flag(object, OBJ_TMPFS_VREF);
155eec2e4efSMateusz Guzik 		}
156eec2e4efSMateusz Guzik 		VM_OBJECT_WUNLOCK(object);
157eec2e4efSMateusz Guzik 	}
158eec2e4efSMateusz Guzik }
159eec2e4efSMateusz Guzik 
160eec2e4efSMateusz Guzik static void
tmpfs_pager_update_writecount(vm_object_t object,vm_offset_t start,vm_offset_t end)161eec2e4efSMateusz Guzik tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start,
162eec2e4efSMateusz Guzik     vm_offset_t end)
163eec2e4efSMateusz Guzik {
164eec2e4efSMateusz Guzik 	vm_offset_t new, old;
165eec2e4efSMateusz Guzik 
166eec2e4efSMateusz Guzik 	VM_OBJECT_WLOCK(object);
167eec2e4efSMateusz Guzik 	KASSERT((object->flags & OBJ_ANON) == 0,
168eec2e4efSMateusz Guzik 	    ("%s: object %p with OBJ_ANON", __func__, object));
169eec2e4efSMateusz Guzik 	old = object->un_pager.swp.writemappings;
170eec2e4efSMateusz Guzik 	object->un_pager.swp.writemappings += (vm_ooffset_t)end - start;
171eec2e4efSMateusz Guzik 	new = object->un_pager.swp.writemappings;
172eec2e4efSMateusz Guzik 	tmpfs_pager_writecount_recalc(object, old, new);
173eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_UNLOCKED(object);
174eec2e4efSMateusz Guzik }
175eec2e4efSMateusz Guzik 
176eec2e4efSMateusz Guzik static void
tmpfs_pager_release_writecount(vm_object_t object,vm_offset_t start,vm_offset_t end)177eec2e4efSMateusz Guzik tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start,
178eec2e4efSMateusz Guzik     vm_offset_t end)
179eec2e4efSMateusz Guzik {
180eec2e4efSMateusz Guzik 	vm_offset_t new, old;
181eec2e4efSMateusz Guzik 
182eec2e4efSMateusz Guzik 	VM_OBJECT_WLOCK(object);
183eec2e4efSMateusz Guzik 	KASSERT((object->flags & OBJ_ANON) == 0,
184eec2e4efSMateusz Guzik 	    ("%s: object %p with OBJ_ANON", __func__, object));
185eec2e4efSMateusz Guzik 	old = object->un_pager.swp.writemappings;
1866ada4e8aSKonstantin Belousov 	KASSERT(old >= (vm_ooffset_t)end - start,
1876ada4e8aSKonstantin Belousov 	    ("tmpfs obj %p writecount %jx dec %jx", object, (uintmax_t)old,
1886ada4e8aSKonstantin Belousov 	    (uintmax_t)((vm_ooffset_t)end - start)));
189eec2e4efSMateusz Guzik 	object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
190eec2e4efSMateusz Guzik 	new = object->un_pager.swp.writemappings;
191eec2e4efSMateusz Guzik 	tmpfs_pager_writecount_recalc(object, old, new);
192eec2e4efSMateusz Guzik 	VM_OBJECT_ASSERT_UNLOCKED(object);
193eec2e4efSMateusz Guzik }
194eec2e4efSMateusz Guzik 
19528bc23abSKonstantin Belousov static void
tmpfs_pager_getvp(vm_object_t object,struct vnode ** vpp,bool * vp_heldp)19628bc23abSKonstantin Belousov tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp)
19728bc23abSKonstantin Belousov {
19828bc23abSKonstantin Belousov 	struct vnode *vp;
19928bc23abSKonstantin Belousov 
20028bc23abSKonstantin Belousov 	/*
20128bc23abSKonstantin Belousov 	 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type
202d9dc64f1SKonstantin Belousov 	 * type.  In this case there is no v_writecount to adjust.
20328bc23abSKonstantin Belousov 	 */
20428bc23abSKonstantin Belousov 	if (vp_heldp != NULL)
20528bc23abSKonstantin Belousov 		VM_OBJECT_RLOCK(object);
20628bc23abSKonstantin Belousov 	else
20728bc23abSKonstantin Belousov 		VM_OBJECT_ASSERT_LOCKED(object);
20828bc23abSKonstantin Belousov 	if ((object->flags & OBJ_TMPFS) != 0) {
209d9dc64f1SKonstantin Belousov 		vp = VM_TO_TMPFS_VP(object);
21028bc23abSKonstantin Belousov 		if (vp != NULL) {
21128bc23abSKonstantin Belousov 			*vpp = vp;
21228bc23abSKonstantin Belousov 			if (vp_heldp != NULL) {
21328bc23abSKonstantin Belousov 				vhold(vp);
21428bc23abSKonstantin Belousov 				*vp_heldp = true;
21528bc23abSKonstantin Belousov 			}
21628bc23abSKonstantin Belousov 		}
21728bc23abSKonstantin Belousov 	}
21828bc23abSKonstantin Belousov 	if (vp_heldp != NULL)
21928bc23abSKonstantin Belousov 		VM_OBJECT_RUNLOCK(object);
22028bc23abSKonstantin Belousov }
22128bc23abSKonstantin Belousov 
22237aea264SKonstantin Belousov static void
tmpfs_pager_freespace(vm_object_t obj,vm_pindex_t start,vm_size_t size)22337aea264SKonstantin Belousov tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size)
22437aea264SKonstantin Belousov {
22537aea264SKonstantin Belousov 	struct tmpfs_node *node;
22637aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
22737aea264SKonstantin Belousov 	vm_size_t c;
22837aea264SKonstantin Belousov 
22937aea264SKonstantin Belousov 	swap_pager_freespace(obj, start, size, &c);
23037aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0 || c == 0)
23137aea264SKonstantin Belousov 		return;
23237aea264SKonstantin Belousov 
23337aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
23437aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
23537aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
23637aea264SKonstantin Belousov 
23737aea264SKonstantin Belousov 	KASSERT(tm->tm_pages_used >= c,
23837aea264SKonstantin Belousov 	    ("tmpfs tm %p pages %jd free %jd", tm,
23937aea264SKonstantin Belousov 	    (uintmax_t)tm->tm_pages_used, (uintmax_t)c));
24037aea264SKonstantin Belousov 	atomic_add_long(&tm->tm_pages_used, -c);
24137aea264SKonstantin Belousov 	KASSERT(node->tn_reg.tn_pages >= c,
24237aea264SKonstantin Belousov 	    ("tmpfs node %p pages %jd free %jd", node,
24337aea264SKonstantin Belousov 	    (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c));
24437aea264SKonstantin Belousov 	node->tn_reg.tn_pages -= c;
24537aea264SKonstantin Belousov }
24637aea264SKonstantin Belousov 
24737aea264SKonstantin Belousov static void
tmpfs_page_inserted(vm_object_t obj,vm_page_t m)24837aea264SKonstantin Belousov tmpfs_page_inserted(vm_object_t obj, vm_page_t m)
24937aea264SKonstantin Belousov {
25037aea264SKonstantin Belousov 	struct tmpfs_node *node;
25137aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
25237aea264SKonstantin Belousov 
25337aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0)
25437aea264SKonstantin Belousov 		return;
25537aea264SKonstantin Belousov 
25637aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
25737aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
25837aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
25937aea264SKonstantin Belousov 
26037aea264SKonstantin Belousov 	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
26137aea264SKonstantin Belousov 		atomic_add_long(&tm->tm_pages_used, 1);
26237aea264SKonstantin Belousov 		node->tn_reg.tn_pages += 1;
26337aea264SKonstantin Belousov 	}
26437aea264SKonstantin Belousov }
26537aea264SKonstantin Belousov 
26637aea264SKonstantin Belousov static void
tmpfs_page_removed(vm_object_t obj,vm_page_t m)26737aea264SKonstantin Belousov tmpfs_page_removed(vm_object_t obj, vm_page_t m)
26837aea264SKonstantin Belousov {
26937aea264SKonstantin Belousov 	struct tmpfs_node *node;
27037aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
27137aea264SKonstantin Belousov 
27237aea264SKonstantin Belousov 	if ((obj->flags & OBJ_TMPFS) == 0)
27337aea264SKonstantin Belousov 		return;
27437aea264SKonstantin Belousov 
27537aea264SKonstantin Belousov 	node = obj->un_pager.swp.swp_priv;
27637aea264SKonstantin Belousov 	MPASS(node->tn_type == VREG);
27737aea264SKonstantin Belousov 	tm = node->tn_reg.tn_tmp;
27837aea264SKonstantin Belousov 
27937aea264SKonstantin Belousov 	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
28037aea264SKonstantin Belousov 		KASSERT(tm->tm_pages_used >= 1,
28137aea264SKonstantin Belousov 		    ("tmpfs tm %p pages %jd free 1", tm,
28237aea264SKonstantin Belousov 		    (uintmax_t)tm->tm_pages_used));
28337aea264SKonstantin Belousov 		atomic_add_long(&tm->tm_pages_used, -1);
28437aea264SKonstantin Belousov 		KASSERT(node->tn_reg.tn_pages >= 1,
28537aea264SKonstantin Belousov 		    ("tmpfs node %p pages %jd free 1", node,
28637aea264SKonstantin Belousov 		    (uintmax_t)node->tn_reg.tn_pages));
28737aea264SKonstantin Belousov 		node->tn_reg.tn_pages -= 1;
28837aea264SKonstantin Belousov 	}
28937aea264SKonstantin Belousov }
29037aea264SKonstantin Belousov 
29137aea264SKonstantin Belousov static boolean_t
tmpfs_can_alloc_page(vm_object_t obj,vm_pindex_t pindex)29237aea264SKonstantin Belousov tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex)
29337aea264SKonstantin Belousov {
29437aea264SKonstantin Belousov 	struct tmpfs_mount *tm;
29537aea264SKonstantin Belousov 
29637aea264SKonstantin Belousov 	tm = VM_TO_TMPFS_MP(obj);
29737aea264SKonstantin Belousov 	if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) ||
29837aea264SKonstantin Belousov 	    tm->tm_pages_max == 0)
29937aea264SKonstantin Belousov 		return (true);
300ed19c098SMike Karels 	if (tm->tm_pages_max == ULONG_MAX)
301ed19c098SMike Karels 		return (tmpfs_mem_avail() >= 1);
30237aea264SKonstantin Belousov 	return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used));
30337aea264SKonstantin Belousov }
30437aea264SKonstantin Belousov 
30528bc23abSKonstantin Belousov struct pagerops tmpfs_pager_ops = {
30628bc23abSKonstantin Belousov 	.pgo_kvme_type = KVME_TYPE_VNODE,
30728bc23abSKonstantin Belousov 	.pgo_alloc = tmpfs_pager_alloc,
30828bc23abSKonstantin Belousov 	.pgo_set_writeable_dirty = vm_object_set_writeable_dirty_,
309eec2e4efSMateusz Guzik 	.pgo_update_writecount = tmpfs_pager_update_writecount,
310eec2e4efSMateusz Guzik 	.pgo_release_writecount = tmpfs_pager_release_writecount,
31128bc23abSKonstantin Belousov 	.pgo_mightbedirty = vm_object_mightbedirty_,
31228bc23abSKonstantin Belousov 	.pgo_getvp = tmpfs_pager_getvp,
31337aea264SKonstantin Belousov 	.pgo_freespace = tmpfs_pager_freespace,
31437aea264SKonstantin Belousov 	.pgo_page_inserted = tmpfs_page_inserted,
31537aea264SKonstantin Belousov 	.pgo_page_removed = tmpfs_page_removed,
31637aea264SKonstantin Belousov 	.pgo_can_alloc_page = tmpfs_can_alloc_page,
31728bc23abSKonstantin Belousov };
31828bc23abSKonstantin Belousov 
319a51c8071SKonstantin Belousov static int
tmpfs_node_ctor(void * mem,int size,void * arg,int flags)320a51c8071SKonstantin Belousov tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
321a51c8071SKonstantin Belousov {
322a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
323a51c8071SKonstantin Belousov 
324a51c8071SKonstantin Belousov 	node = mem;
325a51c8071SKonstantin Belousov 	node->tn_gen++;
326a51c8071SKonstantin Belousov 	node->tn_size = 0;
327a51c8071SKonstantin Belousov 	node->tn_status = 0;
328016b7c7eSKonstantin Belousov 	node->tn_accessed = false;
329a51c8071SKonstantin Belousov 	node->tn_flags = 0;
330a51c8071SKonstantin Belousov 	node->tn_links = 0;
331a51c8071SKonstantin Belousov 	node->tn_vnode = NULL;
332a51c8071SKonstantin Belousov 	node->tn_vpstate = 0;
333a51c8071SKonstantin Belousov 	return (0);
334a51c8071SKonstantin Belousov }
335a51c8071SKonstantin Belousov 
336a51c8071SKonstantin Belousov static void
tmpfs_node_dtor(void * mem,int size,void * arg)337a51c8071SKonstantin Belousov tmpfs_node_dtor(void *mem, int size, void *arg)
338a51c8071SKonstantin Belousov {
339a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
340a51c8071SKonstantin Belousov 
341a51c8071SKonstantin Belousov 	node = mem;
342a51c8071SKonstantin Belousov 	node->tn_type = VNON;
343a51c8071SKonstantin Belousov }
344a51c8071SKonstantin Belousov 
345a51c8071SKonstantin Belousov static int
tmpfs_node_init(void * mem,int size,int flags)346a51c8071SKonstantin Belousov tmpfs_node_init(void *mem, int size, int flags)
347a51c8071SKonstantin Belousov {
348a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
349a51c8071SKonstantin Belousov 
350a51c8071SKonstantin Belousov 	node = mem;
351a51c8071SKonstantin Belousov 	node->tn_id = 0;
3526bd3f23aSRyan Libby 	mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF | MTX_NEW);
353a51c8071SKonstantin Belousov 	node->tn_gen = arc4random();
354a51c8071SKonstantin Belousov 	return (0);
355a51c8071SKonstantin Belousov }
356a51c8071SKonstantin Belousov 
357a51c8071SKonstantin Belousov static void
tmpfs_node_fini(void * mem,int size)358a51c8071SKonstantin Belousov tmpfs_node_fini(void *mem, int size)
359a51c8071SKonstantin Belousov {
360a51c8071SKonstantin Belousov 	struct tmpfs_node *node;
361a51c8071SKonstantin Belousov 
362a51c8071SKonstantin Belousov 	node = mem;
363a51c8071SKonstantin Belousov 	mtx_destroy(&node->tn_interlock);
364a51c8071SKonstantin Belousov }
365a51c8071SKonstantin Belousov 
36628bc23abSKonstantin Belousov int
tmpfs_subr_init(void)367a51c8071SKonstantin Belousov tmpfs_subr_init(void)
368a51c8071SKonstantin Belousov {
36928bc23abSKonstantin Belousov 	tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops,
37028bc23abSKonstantin Belousov 	    OBJT_SWAP);
37128bc23abSKonstantin Belousov 	if (tmpfs_pager_type == -1)
37228bc23abSKonstantin Belousov 		return (EINVAL);
373a51c8071SKonstantin Belousov 	tmpfs_node_pool = uma_zcreate("TMPFS node",
374a51c8071SKonstantin Belousov 	    sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor,
375a51c8071SKonstantin Belousov 	    tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0);
376172ffe70SMateusz Guzik 	VFS_SMR_ZONE_SET(tmpfs_node_pool);
37763659234SMike Karels 
37863659234SMike Karels 	tmpfs_pages_avail_init = tmpfs_mem_avail();
37963659234SMike Karels 	tmpfs_set_reserve_from_percent();
38028bc23abSKonstantin Belousov 	return (0);
381a51c8071SKonstantin Belousov }
382a51c8071SKonstantin Belousov 
383a51c8071SKonstantin Belousov void
tmpfs_subr_uninit(void)384a51c8071SKonstantin Belousov tmpfs_subr_uninit(void)
385a51c8071SKonstantin Belousov {
38628bc23abSKonstantin Belousov 	if (tmpfs_pager_type != -1)
38728bc23abSKonstantin Belousov 		vm_pager_free_dyn_type(tmpfs_pager_type);
38828bc23abSKonstantin Belousov 	tmpfs_pager_type = -1;
389a51c8071SKonstantin Belousov 	uma_zdestroy(tmpfs_node_pool);
390a51c8071SKonstantin Belousov }
391a51c8071SKonstantin Belousov 
392da7aa277SGleb Kurtsou static int
sysctl_mem_reserved(SYSCTL_HANDLER_ARGS)393da7aa277SGleb Kurtsou sysctl_mem_reserved(SYSCTL_HANDLER_ARGS)
394da7aa277SGleb Kurtsou {
395da7aa277SGleb Kurtsou 	int error;
396da7aa277SGleb Kurtsou 	long pages, bytes;
397da7aa277SGleb Kurtsou 
398da7aa277SGleb Kurtsou 	pages = *(long *)arg1;
399da7aa277SGleb Kurtsou 	bytes = pages * PAGE_SIZE;
400da7aa277SGleb Kurtsou 
401da7aa277SGleb Kurtsou 	error = sysctl_handle_long(oidp, &bytes, 0, req);
402da7aa277SGleb Kurtsou 	if (error || !req->newptr)
403da7aa277SGleb Kurtsou 		return (error);
404da7aa277SGleb Kurtsou 
405da7aa277SGleb Kurtsou 	pages = bytes / PAGE_SIZE;
406da7aa277SGleb Kurtsou 	if (pages < TMPFS_PAGES_MINRESERVED)
407da7aa277SGleb Kurtsou 		return (EINVAL);
408da7aa277SGleb Kurtsou 
409da7aa277SGleb Kurtsou 	*(long *)arg1 = pages;
410da7aa277SGleb Kurtsou 	return (0);
411da7aa277SGleb Kurtsou }
412da7aa277SGleb Kurtsou 
4132a829749SMateusz Guzik SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved,
4142a829749SMateusz Guzik     CTLTYPE_LONG | CTLFLAG_MPSAFE | CTLFLAG_RW, &tmpfs_pages_reserved, 0,
4152a829749SMateusz Guzik     sysctl_mem_reserved, "L",
416f8439900SGleb Kurtsou     "Amount of available memory and swap below which tmpfs growth stops");
417da7aa277SGleb Kurtsou 
41863659234SMike Karels static int
sysctl_mem_percent(SYSCTL_HANDLER_ARGS)41963659234SMike Karels sysctl_mem_percent(SYSCTL_HANDLER_ARGS)
42063659234SMike Karels {
42163659234SMike Karels 	int error, percent;
42263659234SMike Karels 
42363659234SMike Karels 	percent = *(int *)arg1;
42463659234SMike Karels 	error = sysctl_handle_int(oidp, &percent, 0, req);
42563659234SMike Karels 	if (error || !req->newptr)
42663659234SMike Karels 		return (error);
42763659234SMike Karels 
42863659234SMike Karels 	if ((unsigned) percent > 100)
42963659234SMike Karels 		return (EINVAL);
43063659234SMike Karels 
4313cded059SJessica Clarke 	*(int *)arg1 = percent;
43263659234SMike Karels 	tmpfs_set_reserve_from_percent();
43363659234SMike Karels 	return (0);
43463659234SMike Karels }
43563659234SMike Karels 
43663659234SMike Karels static void
tmpfs_set_reserve_from_percent(void)43763659234SMike Karels tmpfs_set_reserve_from_percent(void)
43863659234SMike Karels {
43963659234SMike Karels 	size_t reserved;
44063659234SMike Karels 
44163659234SMike Karels 	reserved = tmpfs_pages_avail_init * (100 - tmpfs_mem_percent) / 100;
44263659234SMike Karels 	tmpfs_pages_reserved = max(reserved, TMPFS_PAGES_MINRESERVED);
44363659234SMike Karels }
44463659234SMike Karels 
44563659234SMike Karels SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_percent,
446*7c72c082SKa Ho Ng     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, &tmpfs_mem_percent, 0,
44763659234SMike Karels     sysctl_mem_percent, "I",
44863659234SMike Karels     "Percent of available memory that can be used if no size limit");
44963659234SMike Karels 
4504fd5efe7SGleb Kurtsou static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
4514fd5efe7SGleb Kurtsou     struct tmpfs_dirent *b);
4524fd5efe7SGleb Kurtsou RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
4534fd5efe7SGleb Kurtsou 
454da7aa277SGleb Kurtsou size_t
tmpfs_mem_avail(void)455da7aa277SGleb Kurtsou tmpfs_mem_avail(void)
456da7aa277SGleb Kurtsou {
457f9cc8410SEric van Gyzen 	size_t avail;
458f9cc8410SEric van Gyzen 	long reserved;
459da7aa277SGleb Kurtsou 
460f9cc8410SEric van Gyzen 	avail = swap_pager_avail + vm_free_count();
461f9cc8410SEric van Gyzen 	reserved = atomic_load_long(&tmpfs_pages_reserved);
462f9cc8410SEric van Gyzen 	if (__predict_false(avail < reserved))
463f9cc8410SEric van Gyzen 		return (0);
464f9cc8410SEric van Gyzen 	return (avail - reserved);
465da7aa277SGleb Kurtsou }
466da7aa277SGleb Kurtsou 
467da7aa277SGleb Kurtsou size_t
tmpfs_pages_used(struct tmpfs_mount * tmp)468da7aa277SGleb Kurtsou tmpfs_pages_used(struct tmpfs_mount *tmp)
469da7aa277SGleb Kurtsou {
470da7aa277SGleb Kurtsou 	const size_t node_size = sizeof(struct tmpfs_node) +
471da7aa277SGleb Kurtsou 	    sizeof(struct tmpfs_dirent);
472da7aa277SGleb Kurtsou 	size_t meta_pages;
473da7aa277SGleb Kurtsou 
474da7aa277SGleb Kurtsou 	meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
475da7aa277SGleb Kurtsou 	    PAGE_SIZE);
476da7aa277SGleb Kurtsou 	return (meta_pages + tmp->tm_pages_used);
477da7aa277SGleb Kurtsou }
478da7aa277SGleb Kurtsou 
47956242a4cSFedor Uporov bool
tmpfs_pages_check_avail(struct tmpfs_mount * tmp,size_t req_pages)480da7aa277SGleb Kurtsou tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages)
481da7aa277SGleb Kurtsou {
482da7aa277SGleb Kurtsou 	if (tmpfs_mem_avail() < req_pages)
4837f055843SKonstantin Belousov 		return (false);
484da7aa277SGleb Kurtsou 
485ed2159c9SMateusz Guzik 	if (tmp->tm_pages_max != ULONG_MAX &&
486da7aa277SGleb Kurtsou 	    tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp))
4877f055843SKonstantin Belousov 		return (false);
488da7aa277SGleb Kurtsou 
4897f055843SKonstantin Belousov 	return (true);
490da7aa277SGleb Kurtsou }
491da7aa277SGleb Kurtsou 
492399be910SKa Ho Ng static int
tmpfs_partial_page_invalidate(vm_object_t object,vm_pindex_t idx,int base,int end,boolean_t ignerr)493399be910SKa Ho Ng tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
494399be910SKa Ho Ng     int end, boolean_t ignerr)
495399be910SKa Ho Ng {
496bb1dc6cfSDoug Moore 	int error;
497399be910SKa Ho Ng 
498bb1dc6cfSDoug Moore 	error = vm_page_grab_zero_partial(object, idx, base, end);
499bb1dc6cfSDoug Moore 	if (ignerr)
500399be910SKa Ho Ng 		error = 0;
501399be910SKa Ho Ng 	return (error);
502399be910SKa Ho Ng }
503399be910SKa Ho Ng 
50464c25043SKonstantin Belousov void
tmpfs_ref_node(struct tmpfs_node * node)50564c25043SKonstantin Belousov tmpfs_ref_node(struct tmpfs_node *node)
50664c25043SKonstantin Belousov {
5074601f5f5SKonstantin Belousov #ifdef INVARIANTS
5084601f5f5SKonstantin Belousov 	u_int old;
50964c25043SKonstantin Belousov 
5104601f5f5SKonstantin Belousov 	old =
5114601f5f5SKonstantin Belousov #endif
5124601f5f5SKonstantin Belousov 	refcount_acquire(&node->tn_refcount);
5134601f5f5SKonstantin Belousov #ifdef INVARIANTS
5144601f5f5SKonstantin Belousov 	KASSERT(old > 0, ("node %p zero refcount", node));
5154601f5f5SKonstantin Belousov #endif
51664c25043SKonstantin Belousov }
51764c25043SKonstantin Belousov 
518d1fa59e9SXin LI /*
519d1fa59e9SXin LI  * Allocates a new node of type 'type' inside the 'tmp' mount point, with
520d1fa59e9SXin LI  * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
521d1fa59e9SXin LI  * using the credentials of the process 'p'.
522d1fa59e9SXin LI  *
523d1fa59e9SXin LI  * If the node type is set to 'VDIR', then the parent parameter must point
524d1fa59e9SXin LI  * to the parent directory of the node being created.  It may only be NULL
525d1fa59e9SXin LI  * while allocating the root node.
526d1fa59e9SXin LI  *
527d1fa59e9SXin LI  * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
528d1fa59e9SXin LI  * specifies the device the node represents.
529d1fa59e9SXin LI  *
530d1fa59e9SXin LI  * If the node type is set to 'VLNK', then the parameter target specifies
531d1fa59e9SXin LI  * the file name of the target file for the symbolic link that is being
532d1fa59e9SXin LI  * created.
533d1fa59e9SXin LI  *
534d1fa59e9SXin LI  * Note that new nodes are retrieved from the available list if it has
535d1fa59e9SXin LI  * items or, if it is empty, from the node pool as long as there is enough
536d1fa59e9SXin LI  * space to create them.
537d1fa59e9SXin LI  *
538d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
539d1fa59e9SXin LI  */
540d1fa59e9SXin LI int
tmpfs_alloc_node(struct mount * mp,struct tmpfs_mount * tmp,__enum_uint8 (vtype)type,uid_t uid,gid_t gid,mode_t mode,struct tmpfs_node * parent,const char * target,dev_t rdev,struct tmpfs_node ** node)541ba8cc6d7SMateusz Guzik tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype) type,
542d1fa59e9SXin LI     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
5431493c2eeSBrooks Davis     const char *target, dev_t rdev, struct tmpfs_node **node)
544d1fa59e9SXin LI {
545d1fa59e9SXin LI 	struct tmpfs_node *nnode;
546618029afSMateusz Guzik 	char *symlink;
547618029afSMateusz Guzik 	char symlink_smr;
548d1fa59e9SXin LI 
549d1fa59e9SXin LI 	/* If the root directory of the 'tmp' file system is not yet
550d1fa59e9SXin LI 	 * allocated, this must be the request to do it. */
551d1fa59e9SXin LI 	MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
552d1fa59e9SXin LI 
553b918ee2cSKonstantin Belousov 	MPASS((type == VLNK) ^ (target == NULL));
554b918ee2cSKonstantin Belousov 	MPASS((type == VBLK || type == VCHR) ^ (rdev == VNOVAL));
555d1fa59e9SXin LI 
556189ee6beSJaakko Heinonen 	if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max)
5577adb1776SXin LI 		return (ENOSPC);
5587f055843SKonstantin Belousov 	if (!tmpfs_pages_check_avail(tmp, 1))
559da7aa277SGleb Kurtsou 		return (ENOSPC);
560d1fa59e9SXin LI 
5614cda7f7eSKonstantin Belousov 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
5624cda7f7eSKonstantin Belousov 		/*
5634cda7f7eSKonstantin Belousov 		 * When a new tmpfs node is created for fully
5644cda7f7eSKonstantin Belousov 		 * constructed mount point, there must be a parent
5654cda7f7eSKonstantin Belousov 		 * node, which vnode is locked exclusively.  As
5664cda7f7eSKonstantin Belousov 		 * consequence, if the unmount is executing in
5674cda7f7eSKonstantin Belousov 		 * parallel, vflush() cannot reclaim the parent vnode.
5684cda7f7eSKonstantin Belousov 		 * Due to this, the check for MNTK_UNMOUNT flag is not
5694cda7f7eSKonstantin Belousov 		 * racy: if we did not see MNTK_UNMOUNT flag, then tmp
5704cda7f7eSKonstantin Belousov 		 * cannot be destroyed until node construction is
5714cda7f7eSKonstantin Belousov 		 * finished and the parent vnode unlocked.
5724cda7f7eSKonstantin Belousov 		 *
5734cda7f7eSKonstantin Belousov 		 * Tmpfs does not need to instantiate new nodes during
5744cda7f7eSKonstantin Belousov 		 * unmount.
5754cda7f7eSKonstantin Belousov 		 */
5764cda7f7eSKonstantin Belousov 		return (EBUSY);
5774cda7f7eSKonstantin Belousov 	}
578ae265753SKonstantin Belousov 	if ((mp->mnt_kern_flag & MNT_RDONLY) != 0)
579ae265753SKonstantin Belousov 		return (EROFS);
5804cda7f7eSKonstantin Belousov 
581172ffe70SMateusz Guzik 	nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK);
582d1fa59e9SXin LI 
583d1fa59e9SXin LI 	/* Generic initialization. */
584d1fa59e9SXin LI 	nnode->tn_type = type;
5858d5892eeSXin LI 	vfs_timestamp(&nnode->tn_atime);
586d1fa59e9SXin LI 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
587d1fa59e9SXin LI 	    nnode->tn_atime;
588d1fa59e9SXin LI 	nnode->tn_uid = uid;
589d1fa59e9SXin LI 	nnode->tn_gid = gid;
590d1fa59e9SXin LI 	nnode->tn_mode = mode;
59130e0cf49SMateusz Guzik 	nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr);
59264c25043SKonstantin Belousov 	nnode->tn_refcount = 1;
59356242a4cSFedor Uporov 	LIST_INIT(&nnode->tn_extattrs);
594d1fa59e9SXin LI 
595d1fa59e9SXin LI 	/* Type-specific initialization. */
596d1fa59e9SXin LI 	switch (nnode->tn_type) {
597d1fa59e9SXin LI 	case VBLK:
598d1fa59e9SXin LI 	case VCHR:
599d1fa59e9SXin LI 		nnode->tn_rdev = rdev;
600d1fa59e9SXin LI 		break;
601d1fa59e9SXin LI 
602d1fa59e9SXin LI 	case VDIR:
6034fd5efe7SGleb Kurtsou 		RB_INIT(&nnode->tn_dir.tn_dirhead);
6044fd5efe7SGleb Kurtsou 		LIST_INIT(&nnode->tn_dir.tn_dupindex);
6057871e52bSXin LI 		MPASS(parent != nnode);
6067871e52bSXin LI 		MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
607d1fa59e9SXin LI 		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
608d1fa59e9SXin LI 		nnode->tn_dir.tn_readdir_lastn = 0;
609d1fa59e9SXin LI 		nnode->tn_dir.tn_readdir_lastp = NULL;
6108fa5e0f2SJason A. Harmening 		nnode->tn_dir.tn_wht_size = 0;
611d1fa59e9SXin LI 		nnode->tn_links++;
61282cf92d4SXin LI 		TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
613d1fa59e9SXin LI 		nnode->tn_dir.tn_parent->tn_links++;
61482cf92d4SXin LI 		TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent);
615d1fa59e9SXin LI 		break;
616d1fa59e9SXin LI 
617d1fa59e9SXin LI 	case VFIFO:
618d1fa59e9SXin LI 		/* FALLTHROUGH */
619d1fa59e9SXin LI 	case VSOCK:
620d1fa59e9SXin LI 		break;
621d1fa59e9SXin LI 
622d1fa59e9SXin LI 	case VLNK:
623d1fa59e9SXin LI 		MPASS(strlen(target) < MAXPATHLEN);
624d1fa59e9SXin LI 		nnode->tn_size = strlen(target);
625618029afSMateusz Guzik 
626618029afSMateusz Guzik 		symlink = NULL;
627618029afSMateusz Guzik 		if (!tmp->tm_nonc) {
6280f01fb01SKonstantin Belousov 			symlink = cache_symlink_alloc(nnode->tn_size + 1,
6290f01fb01SKonstantin Belousov 			    M_WAITOK);
630618029afSMateusz Guzik 			symlink_smr = true;
631618029afSMateusz Guzik 		}
632618029afSMateusz Guzik 		if (symlink == NULL) {
6330f01fb01SKonstantin Belousov 			symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME,
6340f01fb01SKonstantin Belousov 			    M_WAITOK);
635618029afSMateusz Guzik 			symlink_smr = false;
636618029afSMateusz Guzik 		}
637618029afSMateusz Guzik 		memcpy(symlink, target, nnode->tn_size + 1);
638618029afSMateusz Guzik 
639618029afSMateusz Guzik 		/*
640618029afSMateusz Guzik 		 * Allow safe symlink resolving for lockless lookup.
641618029afSMateusz Guzik 		 * tmpfs_fplookup_symlink references this comment.
642618029afSMateusz Guzik 		 *
643618029afSMateusz Guzik 		 * 1. nnode is not yet visible to the world
644618029afSMateusz Guzik 		 * 2. both tn_link_target and tn_link_smr get populated
645618029afSMateusz Guzik 		 * 3. release fence publishes their content
6460f01fb01SKonstantin Belousov 		 * 4. tn_link_target content is immutable until node
6470f01fb01SKonstantin Belousov 		 *    destruction, where the pointer gets set to NULL
648618029afSMateusz Guzik 		 * 5. tn_link_smr is never changed once set
649618029afSMateusz Guzik 		 *
6500f01fb01SKonstantin Belousov 		 * As a result it is sufficient to issue load consume
6510f01fb01SKonstantin Belousov 		 * on the node pointer to also get the above content
6520f01fb01SKonstantin Belousov 		 * in a stable manner.  Worst case tn_link_smr flag
6530f01fb01SKonstantin Belousov 		 * may be set to true despite being stale, while the
6540f01fb01SKonstantin Belousov 		 * target buffer is already cleared out.
655618029afSMateusz Guzik 		 */
656cc96f92aSMateusz Guzik 		atomic_store_ptr(&nnode->tn_link_target, symlink);
657618029afSMateusz Guzik 		atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr);
658618029afSMateusz Guzik 		atomic_thread_fence_rel();
659d1fa59e9SXin LI 		break;
660d1fa59e9SXin LI 
661d1fa59e9SXin LI 	case VREG:
66280bca63cSKonstantin Belousov 		nnode->tn_reg.tn_aobj =
66328bc23abSKonstantin Belousov 		    vm_pager_allocate(tmpfs_pager_type, NULL, 0,
6644b8365d7SKonstantin Belousov 		    VM_PROT_DEFAULT, 0,
6653364c323SKonstantin Belousov 		    NULL /* XXXKIB - tmpfs needs swap reservation */);
666d9dc64f1SKonstantin Belousov 		nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode;
667d9dc64f1SKonstantin Belousov 		vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS);
668081e36e7SKonstantin Belousov 		nnode->tn_reg.tn_tmp = tmp;
66937aea264SKonstantin Belousov 		nnode->tn_reg.tn_pages = 0;
670d1fa59e9SXin LI 		break;
671d1fa59e9SXin LI 
672d1fa59e9SXin LI 	default:
673bba7ed20SKonstantin Belousov 		panic("tmpfs_alloc_node: type %p %d", nnode,
674bba7ed20SKonstantin Belousov 		    (int)nnode->tn_type);
675d1fa59e9SXin LI 	}
676d1fa59e9SXin LI 
677d1fa59e9SXin LI 	TMPFS_LOCK(tmp);
678d1fa59e9SXin LI 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
67964c25043SKonstantin Belousov 	nnode->tn_attached = true;
680d1fa59e9SXin LI 	tmp->tm_nodes_inuse++;
68164c25043SKonstantin Belousov 	tmp->tm_refcount++;
682d1fa59e9SXin LI 	TMPFS_UNLOCK(tmp);
683d1fa59e9SXin LI 
684d1fa59e9SXin LI 	*node = nnode;
685bba7ed20SKonstantin Belousov 	return (0);
686d1fa59e9SXin LI }
687d1fa59e9SXin LI 
688d1fa59e9SXin LI /*
689d1fa59e9SXin LI  * Destroys the node pointed to by node from the file system 'tmp'.
690bba7ed20SKonstantin Belousov  * If the node references a directory, no entries are allowed.
691d1fa59e9SXin LI  */
692d1fa59e9SXin LI void
tmpfs_free_node(struct tmpfs_mount * tmp,struct tmpfs_node * node)693d1fa59e9SXin LI tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
694d1fa59e9SXin LI {
6954601f5f5SKonstantin Belousov 	if (refcount_release_if_not_last(&node->tn_refcount))
6964601f5f5SKonstantin Belousov 		return;
6970ae6383dSXin LI 
698d1fa59e9SXin LI 	TMPFS_LOCK(tmp);
69964c25043SKonstantin Belousov 	TMPFS_NODE_LOCK(node);
70064c25043SKonstantin Belousov 	if (!tmpfs_free_node_locked(tmp, node, false)) {
70164c25043SKonstantin Belousov 		TMPFS_NODE_UNLOCK(node);
70264c25043SKonstantin Belousov 		TMPFS_UNLOCK(tmp);
70364c25043SKonstantin Belousov 	}
70464c25043SKonstantin Belousov }
70564c25043SKonstantin Belousov 
70664c25043SKonstantin Belousov bool
tmpfs_free_node_locked(struct tmpfs_mount * tmp,struct tmpfs_node * node,bool detach)70764c25043SKonstantin Belousov tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
70864c25043SKonstantin Belousov     bool detach)
70964c25043SKonstantin Belousov {
71056242a4cSFedor Uporov 	struct tmpfs_extattr *ea;
71164c25043SKonstantin Belousov 	vm_object_t uobj;
712618029afSMateusz Guzik 	char *symlink;
7134601f5f5SKonstantin Belousov 	bool last;
71464c25043SKonstantin Belousov 
71564c25043SKonstantin Belousov 	TMPFS_MP_ASSERT_LOCKED(tmp);
71664c25043SKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
71764c25043SKonstantin Belousov 
7184601f5f5SKonstantin Belousov 	last = refcount_release(&node->tn_refcount);
7194601f5f5SKonstantin Belousov 	if (node->tn_attached && (detach || last)) {
72064c25043SKonstantin Belousov 		MPASS(tmp->tm_nodes_inuse > 0);
721d1fa59e9SXin LI 		tmp->tm_nodes_inuse--;
72264c25043SKonstantin Belousov 		LIST_REMOVE(node, tn_entries);
72364c25043SKonstantin Belousov 		node->tn_attached = false;
72464c25043SKonstantin Belousov 	}
7254601f5f5SKonstantin Belousov 	if (!last)
72664c25043SKonstantin Belousov 		return (false);
72764c25043SKonstantin Belousov 
728f4aa6452SMateusz Guzik 	TMPFS_NODE_UNLOCK(node);
729f4aa6452SMateusz Guzik 
73064c25043SKonstantin Belousov #ifdef INVARIANTS
73164c25043SKonstantin Belousov 	MPASS(node->tn_vnode == NULL);
73264c25043SKonstantin Belousov 	MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0);
733d1fa59e9SXin LI 
734f4aa6452SMateusz Guzik 	/*
7350f01fb01SKonstantin Belousov 	 * Make sure this is a node type we can deal with. Everything
7360f01fb01SKonstantin Belousov 	 * is explicitly enumerated without the 'default' clause so
7370f01fb01SKonstantin Belousov 	 * the compiler can throw an error in case a new type is
7380f01fb01SKonstantin Belousov 	 * added.
739f4aa6452SMateusz Guzik 	 */
740d1fa59e9SXin LI 	switch (node->tn_type) {
741d1fa59e9SXin LI 	case VBLK:
742d1fa59e9SXin LI 	case VCHR:
743d1fa59e9SXin LI 	case VDIR:
744d1fa59e9SXin LI 	case VFIFO:
745d1fa59e9SXin LI 	case VSOCK:
746d1fa59e9SXin LI 	case VLNK:
747f4aa6452SMateusz Guzik 	case VREG:
748f4aa6452SMateusz Guzik 		break;
749f4aa6452SMateusz Guzik 	case VNON:
750f4aa6452SMateusz Guzik 	case VBAD:
751f4aa6452SMateusz Guzik 	case VMARKER:
7520f01fb01SKonstantin Belousov 		panic("%s: bad type %d for node %p", __func__,
7530f01fb01SKonstantin Belousov 		    (int)node->tn_type, node);
754f4aa6452SMateusz Guzik 	}
755f4aa6452SMateusz Guzik #endif
756f4aa6452SMateusz Guzik 
75756242a4cSFedor Uporov 	while ((ea = LIST_FIRST(&node->tn_extattrs)) != NULL) {
75856242a4cSFedor Uporov 		LIST_REMOVE(ea, ea_extattrs);
75956242a4cSFedor Uporov 		tmpfs_extattr_free(ea);
76056242a4cSFedor Uporov 	}
76156242a4cSFedor Uporov 
762f4aa6452SMateusz Guzik 	switch (node->tn_type) {
763f4aa6452SMateusz Guzik 	case VREG:
764f4aa6452SMateusz Guzik 		uobj = node->tn_reg.tn_aobj;
76537aea264SKonstantin Belousov 		node->tn_reg.tn_aobj = NULL;
76637aea264SKonstantin Belousov 		if (uobj != NULL) {
76737aea264SKonstantin Belousov 			VM_OBJECT_WLOCK(uobj);
76837aea264SKonstantin Belousov 			KASSERT((uobj->flags & OBJ_TMPFS) != 0,
76937aea264SKonstantin Belousov 			    ("tmpfs node %p uobj %p not tmpfs", node, uobj));
77037aea264SKonstantin Belousov 			vm_object_clear_flag(uobj, OBJ_TMPFS);
77137aea264SKonstantin Belousov 			KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages,
77237aea264SKonstantin Belousov 			    ("tmpfs tmp %p node %p pages %jd free %jd", tmp,
77337aea264SKonstantin Belousov 			    node, (uintmax_t)tmp->tm_pages_used,
77437aea264SKonstantin Belousov 			    (uintmax_t)node->tn_reg.tn_pages));
77537aea264SKonstantin Belousov 			atomic_add_long(&tmp->tm_pages_used,
77637aea264SKonstantin Belousov 			    -node->tn_reg.tn_pages);
77737aea264SKonstantin Belousov 			VM_OBJECT_WUNLOCK(uobj);
77837aea264SKonstantin Belousov 		}
779f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
78037aea264SKonstantin Belousov 
78137aea264SKonstantin Belousov 		/*
78237aea264SKonstantin Belousov 		 * vm_object_deallocate() must not be called while
78337aea264SKonstantin Belousov 		 * owning tm_allnode_lock, because deallocate might
78437aea264SKonstantin Belousov 		 * sleep.  Call it after tmpfs_free_tmp() does the
78537aea264SKonstantin Belousov 		 * unlock.
78637aea264SKonstantin Belousov 		 */
787d9dc64f1SKonstantin Belousov 		if (uobj != NULL)
788f4aa6452SMateusz Guzik 			vm_object_deallocate(uobj);
78937aea264SKonstantin Belousov 
790f4aa6452SMateusz Guzik 		break;
791f4aa6452SMateusz Guzik 	case VLNK:
792f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
793f4aa6452SMateusz Guzik 
794618029afSMateusz Guzik 		symlink = node->tn_link_target;
795cc96f92aSMateusz Guzik 		atomic_store_ptr(&node->tn_link_target, NULL);
796618029afSMateusz Guzik 		if (atomic_load_char(&node->tn_link_smr)) {
797618029afSMateusz Guzik 			cache_symlink_free(symlink, node->tn_size + 1);
798618029afSMateusz Guzik 		} else {
799618029afSMateusz Guzik 			free(symlink, M_TMPFSNAME);
800618029afSMateusz Guzik 		}
801d1fa59e9SXin LI 		break;
802d1fa59e9SXin LI 	default:
803f4aa6452SMateusz Guzik 		tmpfs_free_tmp(tmp);
804f4aa6452SMateusz Guzik 		break;
805d1fa59e9SXin LI 	}
806d1fa59e9SXin LI 
807172ffe70SMateusz Guzik 	uma_zfree_smr(tmpfs_node_pool, node);
80864c25043SKonstantin Belousov 	return (true);
809d1fa59e9SXin LI }
810d1fa59e9SXin LI 
8114fd5efe7SGleb Kurtsou static __inline uint32_t
tmpfs_dirent_hash(const char * name,u_int len)8124fd5efe7SGleb Kurtsou tmpfs_dirent_hash(const char *name, u_int len)
8134fd5efe7SGleb Kurtsou {
8144fd5efe7SGleb Kurtsou 	uint32_t hash;
8154fd5efe7SGleb Kurtsou 
8164fd5efe7SGleb Kurtsou 	hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
8174fd5efe7SGleb Kurtsou #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
8184fd5efe7SGleb Kurtsou 	hash &= 0xf;
8194fd5efe7SGleb Kurtsou #endif
8204fd5efe7SGleb Kurtsou 	if (hash < TMPFS_DIRCOOKIE_MIN)
8214fd5efe7SGleb Kurtsou 		hash += TMPFS_DIRCOOKIE_MIN;
8224fd5efe7SGleb Kurtsou 
8234fd5efe7SGleb Kurtsou 	return (hash);
8244fd5efe7SGleb Kurtsou }
8254fd5efe7SGleb Kurtsou 
8264fd5efe7SGleb Kurtsou static __inline off_t
tmpfs_dirent_cookie(struct tmpfs_dirent * de)8274fd5efe7SGleb Kurtsou tmpfs_dirent_cookie(struct tmpfs_dirent *de)
8284fd5efe7SGleb Kurtsou {
82962dca316SBryan Drewery 	if (de == NULL)
83062dca316SBryan Drewery 		return (TMPFS_DIRCOOKIE_EOF);
83162dca316SBryan Drewery 
8324fd5efe7SGleb Kurtsou 	MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
8334fd5efe7SGleb Kurtsou 
8344fd5efe7SGleb Kurtsou 	return (de->td_cookie);
8354fd5efe7SGleb Kurtsou }
8364fd5efe7SGleb Kurtsou 
8374fd5efe7SGleb Kurtsou static __inline boolean_t
tmpfs_dirent_dup(struct tmpfs_dirent * de)8384fd5efe7SGleb Kurtsou tmpfs_dirent_dup(struct tmpfs_dirent *de)
8394fd5efe7SGleb Kurtsou {
8404fd5efe7SGleb Kurtsou 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
8414fd5efe7SGleb Kurtsou }
8424fd5efe7SGleb Kurtsou 
8434fd5efe7SGleb Kurtsou static __inline boolean_t
tmpfs_dirent_duphead(struct tmpfs_dirent * de)8444fd5efe7SGleb Kurtsou tmpfs_dirent_duphead(struct tmpfs_dirent *de)
8454fd5efe7SGleb Kurtsou {
8464fd5efe7SGleb Kurtsou 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
8474fd5efe7SGleb Kurtsou }
8484fd5efe7SGleb Kurtsou 
8494fd5efe7SGleb Kurtsou void
tmpfs_dirent_init(struct tmpfs_dirent * de,const char * name,u_int namelen)8504fd5efe7SGleb Kurtsou tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
8514fd5efe7SGleb Kurtsou {
8524fd5efe7SGleb Kurtsou 	de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
8534fd5efe7SGleb Kurtsou 	memcpy(de->ud.td_name, name, namelen);
8544fd5efe7SGleb Kurtsou 	de->td_namelen = namelen;
8554fd5efe7SGleb Kurtsou }
8564fd5efe7SGleb Kurtsou 
857d1fa59e9SXin LI /*
858d1fa59e9SXin LI  * Allocates a new directory entry for the node node with a name of name.
859d1fa59e9SXin LI  * The new directory entry is returned in *de.
860d1fa59e9SXin LI  *
861d1fa59e9SXin LI  * The link count of node is increased by one to reflect the new object
862d1fa59e9SXin LI  * referencing it.
863d1fa59e9SXin LI  *
864d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
865d1fa59e9SXin LI  */
866d1fa59e9SXin LI int
tmpfs_alloc_dirent(struct tmpfs_mount * tmp,struct tmpfs_node * node,const char * name,u_int len,struct tmpfs_dirent ** de)867d1fa59e9SXin LI tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
8684fd5efe7SGleb Kurtsou     const char *name, u_int len, struct tmpfs_dirent **de)
869d1fa59e9SXin LI {
870d1fa59e9SXin LI 	struct tmpfs_dirent *nde;
871d1fa59e9SXin LI 
8727c58c37eSMateusz Guzik 	nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK);
873d1fa59e9SXin LI 	nde->td_node = node;
8744fd5efe7SGleb Kurtsou 	if (name != NULL) {
8754fd5efe7SGleb Kurtsou 		nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
8764fd5efe7SGleb Kurtsou 		tmpfs_dirent_init(nde, name, len);
8774fd5efe7SGleb Kurtsou 	} else
8784fd5efe7SGleb Kurtsou 		nde->td_namelen = 0;
87999d57a6bSEd Schouten 	if (node != NULL)
880d1fa59e9SXin LI 		node->tn_links++;
881d1fa59e9SXin LI 
882d1fa59e9SXin LI 	*de = nde;
883d1fa59e9SXin LI 
884c12118f6SKa Ho Ng 	return (0);
885d1fa59e9SXin LI }
886d1fa59e9SXin LI 
887d1fa59e9SXin LI /*
888d1fa59e9SXin LI  * Frees a directory entry.  It is the caller's responsibility to destroy
889d1fa59e9SXin LI  * the node referenced by it if needed.
890d1fa59e9SXin LI  *
891d1fa59e9SXin LI  * The link count of node is decreased by one to reflect the removal of an
892d1fa59e9SXin LI  * object that referenced it.  This only happens if 'node_exists' is true;
893d1fa59e9SXin LI  * otherwise the function will not access the node referred to by the
894d1fa59e9SXin LI  * directory entry, as it may already have been released from the outside.
895d1fa59e9SXin LI  */
896d1fa59e9SXin LI void
tmpfs_free_dirent(struct tmpfs_mount * tmp,struct tmpfs_dirent * de)8974fd5efe7SGleb Kurtsou tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
898d1fa59e9SXin LI {
899d1fa59e9SXin LI 	struct tmpfs_node *node;
900d1fa59e9SXin LI 
901d1fa59e9SXin LI 	node = de->td_node;
90299d57a6bSEd Schouten 	if (node != NULL) {
903d1fa59e9SXin LI 		MPASS(node->tn_links > 0);
904d1fa59e9SXin LI 		node->tn_links--;
905d1fa59e9SXin LI 	}
9064fd5efe7SGleb Kurtsou 	if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
9074fd5efe7SGleb Kurtsou 		free(de->ud.td_name, M_TMPFSNAME);
9087c58c37eSMateusz Guzik 	free(de, M_TMPFSDIR);
909d1fa59e9SXin LI }
910d1fa59e9SXin LI 
911158cc900SKonstantin Belousov void
tmpfs_destroy_vobject(struct vnode * vp,vm_object_t obj)912158cc900SKonstantin Belousov tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
913158cc900SKonstantin Belousov {
914eec2e4efSMateusz Guzik 	bool want_vrele;
915158cc900SKonstantin Belousov 
91655781cb9SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject");
917158cc900SKonstantin Belousov 	if (vp->v_type != VREG || obj == NULL)
918158cc900SKonstantin Belousov 		return;
919158cc900SKonstantin Belousov 
920158cc900SKonstantin Belousov 	VM_OBJECT_WLOCK(obj);
921158cc900SKonstantin Belousov 	VI_LOCK(vp);
92246811949SKonstantin Belousov 	vp->v_object = NULL;
92346811949SKonstantin Belousov 
924eec2e4efSMateusz Guzik 	/*
925eec2e4efSMateusz Guzik 	 * May be going through forced unmount.
926eec2e4efSMateusz Guzik 	 */
927eec2e4efSMateusz Guzik 	want_vrele = false;
928eec2e4efSMateusz Guzik 	if ((obj->flags & OBJ_TMPFS_VREF) != 0) {
929eec2e4efSMateusz Guzik 		vm_object_clear_flag(obj, OBJ_TMPFS_VREF);
930eec2e4efSMateusz Guzik 		want_vrele = true;
931eec2e4efSMateusz Guzik 	}
932eec2e4efSMateusz Guzik 
9333c93d227SKonstantin Belousov 	if (vp->v_writecount < 0)
9343c93d227SKonstantin Belousov 		vp->v_writecount = 0;
935158cc900SKonstantin Belousov 	VI_UNLOCK(vp);
936158cc900SKonstantin Belousov 	VM_OBJECT_WUNLOCK(obj);
937eec2e4efSMateusz Guzik 	if (want_vrele) {
938eec2e4efSMateusz Guzik 		vrele(vp);
939eec2e4efSMateusz Guzik 	}
940158cc900SKonstantin Belousov }
941158cc900SKonstantin Belousov 
942158cc900SKonstantin Belousov /*
943d1fa59e9SXin LI  * Allocates a new vnode for the node node or returns a new reference to
944d1fa59e9SXin LI  * an existing one if the node had already a vnode referencing it.  The
945d1fa59e9SXin LI  * resulting locked vnode is returned in *vpp.
946d1fa59e9SXin LI  *
947d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
948d1fa59e9SXin LI  */
949d1fa59e9SXin LI int
tmpfs_alloc_vp(struct mount * mp,struct tmpfs_node * node,int lkflag,struct vnode ** vpp)9500ae6383dSXin LI tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
951dfd233edSAttilio Rao     struct vnode **vpp)
952d1fa59e9SXin LI {
953d1fa59e9SXin LI 	struct vnode *vp;
9541abe3656SMateusz Guzik 	enum vgetstate vs;
95564c25043SKonstantin Belousov 	struct tmpfs_mount *tm;
9566f2af3fcSKonstantin Belousov 	vm_object_t object;
9576f2af3fcSKonstantin Belousov 	int error;
958d1fa59e9SXin LI 
9596f2af3fcSKonstantin Belousov 	error = 0;
96064c25043SKonstantin Belousov 	tm = VFS_TO_TMPFS(mp);
9610ae6383dSXin LI 	TMPFS_NODE_LOCK(node);
9624601f5f5SKonstantin Belousov 	tmpfs_ref_node(node);
96364c25043SKonstantin Belousov loop:
96464c25043SKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
965fb755714SXin LI 	if ((vp = node->tn_vnode) != NULL) {
96682cf92d4SXin LI 		MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
9678239a7a8SKonstantin Belousov 		if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) ||
968abd80ddbSMateusz Guzik 		    (VN_IS_DOOMED(vp) &&
9698239a7a8SKonstantin Belousov 		     (lkflag & LK_NOWAIT) != 0)) {
9708239a7a8SKonstantin Belousov 			TMPFS_NODE_UNLOCK(node);
9718239a7a8SKonstantin Belousov 			error = ENOENT;
9728239a7a8SKonstantin Belousov 			vp = NULL;
9738239a7a8SKonstantin Belousov 			goto out;
9748239a7a8SKonstantin Belousov 		}
975abd80ddbSMateusz Guzik 		if (VN_IS_DOOMED(vp)) {
9768239a7a8SKonstantin Belousov 			node->tn_vpstate |= TMPFS_VNODE_WRECLAIM;
9778239a7a8SKonstantin Belousov 			while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) {
9788239a7a8SKonstantin Belousov 				msleep(&node->tn_vnode, TMPFS_NODE_MTX(node),
9798239a7a8SKonstantin Belousov 				    0, "tmpfsE", 0);
9808239a7a8SKonstantin Belousov 			}
98164c25043SKonstantin Belousov 			goto loop;
9828239a7a8SKonstantin Belousov 		}
9831abe3656SMateusz Guzik 		vs = vget_prep(vp);
9840ae6383dSXin LI 		TMPFS_NODE_UNLOCK(node);
9851abe3656SMateusz Guzik 		error = vget_finish(vp, lkflag, vs);
98664c25043SKonstantin Belousov 		if (error == ENOENT) {
98764c25043SKonstantin Belousov 			TMPFS_NODE_LOCK(node);
9888239a7a8SKonstantin Belousov 			goto loop;
98964c25043SKonstantin Belousov 		}
990ca846258SGleb Kurtsou 		if (error != 0) {
991ca846258SGleb Kurtsou 			vp = NULL;
992ca846258SGleb Kurtsou 			goto out;
993ca846258SGleb Kurtsou 		}
994d1fa59e9SXin LI 
995d1fa59e9SXin LI 		/*
996d1fa59e9SXin LI 		 * Make sure the vnode is still there after
997d1fa59e9SXin LI 		 * getting the interlock to avoid racing a free.
998d1fa59e9SXin LI 		 */
999439d942bSMateusz Guzik 		if (node->tn_vnode != vp) {
1000d1fa59e9SXin LI 			vput(vp);
100164c25043SKonstantin Belousov 			TMPFS_NODE_LOCK(node);
1002d1fa59e9SXin LI 			goto loop;
1003d1fa59e9SXin LI 		}
1004d1fa59e9SXin LI 
1005d1fa59e9SXin LI 		goto out;
1006d1fa59e9SXin LI 	}
1007d1fa59e9SXin LI 
100882cf92d4SXin LI 	if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) ||
100982cf92d4SXin LI 	    (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) {
101082cf92d4SXin LI 		TMPFS_NODE_UNLOCK(node);
101182cf92d4SXin LI 		error = ENOENT;
101282cf92d4SXin LI 		vp = NULL;
101382cf92d4SXin LI 		goto out;
101482cf92d4SXin LI 	}
101582cf92d4SXin LI 
1016d1fa59e9SXin LI 	/*
1017d1fa59e9SXin LI 	 * otherwise lock the vp list while we call getnewvnode
1018d1fa59e9SXin LI 	 * since that can block.
1019d1fa59e9SXin LI 	 */
1020d1fa59e9SXin LI 	if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
1021d1fa59e9SXin LI 		node->tn_vpstate |= TMPFS_VNODE_WANT;
1022fb755714SXin LI 		error = msleep((caddr_t) &node->tn_vpstate,
102364c25043SKonstantin Belousov 		    TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0);
102464c25043SKonstantin Belousov 		if (error != 0)
102564c25043SKonstantin Belousov 			goto out;
1026fb755714SXin LI 		goto loop;
1027fb755714SXin LI 	} else
1028d1fa59e9SXin LI 		node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
1029fb755714SXin LI 
1030d1fa59e9SXin LI 	TMPFS_NODE_UNLOCK(node);
1031d1fa59e9SXin LI 
1032d1fa59e9SXin LI 	/* Get a new vnode and associate it with our node. */
103300ac6a98SKonstantin Belousov 	error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ?
103400ac6a98SKonstantin Belousov 	    &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp);
1035d1fa59e9SXin LI 	if (error != 0)
1036d1fa59e9SXin LI 		goto unlock;
1037d1fa59e9SXin LI 	MPASS(vp != NULL);
1038d1fa59e9SXin LI 
1039fd63693dSKonstantin Belousov 	/* lkflag is ignored, the lock is exclusive */
1040c8b29d12SMateusz Guzik 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1041d1fa59e9SXin LI 
1042d1fa59e9SXin LI 	vp->v_data = node;
1043d1fa59e9SXin LI 	vp->v_type = node->tn_type;
1044d1fa59e9SXin LI 
1045d1fa59e9SXin LI 	/* Type-specific initialization. */
1046d1fa59e9SXin LI 	switch (node->tn_type) {
1047d1fa59e9SXin LI 	case VBLK:
1048d1fa59e9SXin LI 		/* FALLTHROUGH */
1049d1fa59e9SXin LI 	case VCHR:
1050fb755714SXin LI 		/* FALLTHROUGH */
1051d1fa59e9SXin LI 	case VLNK:
1052d1fa59e9SXin LI 		/* FALLTHROUGH */
1053d1fa59e9SXin LI 	case VSOCK:
1054d1fa59e9SXin LI 		break;
1055fb755714SXin LI 	case VFIFO:
1056fb755714SXin LI 		vp->v_op = &tmpfs_fifoop_entries;
1057fb755714SXin LI 		break;
10586f2af3fcSKonstantin Belousov 	case VREG:
10596f2af3fcSKonstantin Belousov 		object = node->tn_reg.tn_aobj;
10606f2af3fcSKonstantin Belousov 		VM_OBJECT_WLOCK(object);
1061eec2e4efSMateusz Guzik 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
1062eec2e4efSMateusz Guzik 		    ("%s: object %p with OBJ_TMPFS_VREF but without vnode",
1063eec2e4efSMateusz Guzik 		    __func__, object));
10646f2af3fcSKonstantin Belousov 		VI_LOCK(vp);
10656f2af3fcSKonstantin Belousov 		KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
10666f2af3fcSKonstantin Belousov 		vp->v_object = object;
10670f613ab8SKonstantin Belousov 		vn_irflag_set_locked(vp, (tm->tm_pgread ? VIRF_PGREAD : 0) |
10680f613ab8SKonstantin Belousov 		    VIRF_TEXT_REF);
10696f2af3fcSKonstantin Belousov 		VI_UNLOCK(vp);
107058d7ac11SKonstantin Belousov 		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
107158d7ac11SKonstantin Belousov 		    ("leaked OBJ_TMPFS_VREF"));
107258d7ac11SKonstantin Belousov 		if (object->un_pager.swp.writemappings > 0) {
107358d7ac11SKonstantin Belousov 			vrefact(vp);
107458d7ac11SKonstantin Belousov 			vlazy(vp);
107558d7ac11SKonstantin Belousov 			vm_object_set_flag(object, OBJ_TMPFS_VREF);
107658d7ac11SKonstantin Belousov 		}
10776f2af3fcSKonstantin Belousov 		VM_OBJECT_WUNLOCK(object);
10786f2af3fcSKonstantin Belousov 		break;
10797871e52bSXin LI 	case VDIR:
108082cf92d4SXin LI 		MPASS(node->tn_dir.tn_parent != NULL);
10817871e52bSXin LI 		if (node->tn_dir.tn_parent == node)
10827871e52bSXin LI 			vp->v_vflag |= VV_ROOT;
10837871e52bSXin LI 		break;
1084d1fa59e9SXin LI 
1085d1fa59e9SXin LI 	default:
10861fa8f5f0SXin LI 		panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
1087d1fa59e9SXin LI 	}
108860c5c866SKonstantin Belousov 	if (vp->v_type != VFIFO)
108960c5c866SKonstantin Belousov 		VN_LOCK_ASHARE(vp);
1090d1fa59e9SXin LI 
109166c5fbcaSKonstantin Belousov 	error = insmntque1(vp, mp);
10925ccdfdabSMateusz Guzik 	if (error != 0) {
109366c5fbcaSKonstantin Belousov 		/* Need to clear v_object for insmntque failure. */
109466c5fbcaSKonstantin Belousov 		tmpfs_destroy_vobject(vp, vp->v_object);
109566c5fbcaSKonstantin Belousov 		vp->v_object = NULL;
109666c5fbcaSKonstantin Belousov 		vp->v_data = NULL;
109766c5fbcaSKonstantin Belousov 		vp->v_op = &dead_vnodeops;
109866c5fbcaSKonstantin Belousov 		vgone(vp);
109966c5fbcaSKonstantin Belousov 		vput(vp);
11000ae6383dSXin LI 		vp = NULL;
1101829f0bcbSMateusz Guzik 	} else {
1102829f0bcbSMateusz Guzik 		vn_set_state(vp, VSTATE_CONSTRUCTED);
11035ccdfdabSMateusz Guzik 	}
1104d1fa59e9SXin LI 
1105d1fa59e9SXin LI unlock:
1106d1fa59e9SXin LI 	TMPFS_NODE_LOCK(node);
11070ae6383dSXin LI 
1108fb755714SXin LI 	MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
1109d1fa59e9SXin LI 	node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
11100ae6383dSXin LI 	node->tn_vnode = vp;
1111d1fa59e9SXin LI 
1112d1fa59e9SXin LI 	if (node->tn_vpstate & TMPFS_VNODE_WANT) {
1113d1fa59e9SXin LI 		node->tn_vpstate &= ~TMPFS_VNODE_WANT;
1114d1fa59e9SXin LI 		TMPFS_NODE_UNLOCK(node);
1115d1fa59e9SXin LI 		wakeup((caddr_t) &node->tn_vpstate);
11168d5892eeSXin LI 	} else
1117d1fa59e9SXin LI 		TMPFS_NODE_UNLOCK(node);
1118d1fa59e9SXin LI 
1119d1fa59e9SXin LI out:
112064c25043SKonstantin Belousov 	if (error == 0) {
1121d1fa59e9SXin LI 		*vpp = vp;
1122d1fa59e9SXin LI 
11230ae6383dSXin LI #ifdef INVARIANTS
11249ff2fbdfSKonstantin Belousov 		MPASS(*vpp != NULL);
11259ff2fbdfSKonstantin Belousov 		ASSERT_VOP_LOCKED(*vpp, __func__);
11260ae6383dSXin LI 		TMPFS_NODE_LOCK(node);
1127d1fa59e9SXin LI 		MPASS(*vpp == node->tn_vnode);
11280ae6383dSXin LI 		TMPFS_NODE_UNLOCK(node);
11290ae6383dSXin LI #endif
113064c25043SKonstantin Belousov 	}
113164c25043SKonstantin Belousov 	tmpfs_free_node(tm, node);
1132d1fa59e9SXin LI 
1133bba7ed20SKonstantin Belousov 	return (error);
1134d1fa59e9SXin LI }
1135d1fa59e9SXin LI 
1136d1fa59e9SXin LI /*
1137d1fa59e9SXin LI  * Destroys the association between the vnode vp and the node it
1138d1fa59e9SXin LI  * references.
1139d1fa59e9SXin LI  */
1140d1fa59e9SXin LI void
tmpfs_free_vp(struct vnode * vp)1141d1fa59e9SXin LI tmpfs_free_vp(struct vnode *vp)
1142d1fa59e9SXin LI {
1143d1fa59e9SXin LI 	struct tmpfs_node *node;
1144d1fa59e9SXin LI 
1145d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
1146d1fa59e9SXin LI 
1147d2ca06cdSKonstantin Belousov 	TMPFS_NODE_ASSERT_LOCKED(node);
1148d1fa59e9SXin LI 	node->tn_vnode = NULL;
11498239a7a8SKonstantin Belousov 	if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0)
11508239a7a8SKonstantin Belousov 		wakeup(&node->tn_vnode);
11518239a7a8SKonstantin Belousov 	node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM;
1152d1fa59e9SXin LI 	vp->v_data = NULL;
1153d1fa59e9SXin LI }
1154d1fa59e9SXin LI 
1155d1fa59e9SXin LI /*
1156d1fa59e9SXin LI  * Allocates a new file of type 'type' and adds it to the parent directory
1157d1fa59e9SXin LI  * 'dvp'; this addition is done using the component name given in 'cnp'.
1158d1fa59e9SXin LI  * The ownership of the new file is automatically assigned based on the
1159d1fa59e9SXin LI  * credentials of the caller (through 'cnp'), the group is set based on
1160d1fa59e9SXin LI  * the parent directory and the mode is determined from the 'vap' argument.
1161d1fa59e9SXin LI  * If successful, *vpp holds a vnode to the newly created file and zero
1162d1fa59e9SXin LI  * is returned.  Otherwise *vpp is NULL and the function returns an
1163d1fa59e9SXin LI  * appropriate error code.
1164d1fa59e9SXin LI  */
1165d1fa59e9SXin LI int
tmpfs_alloc_file(struct vnode * dvp,struct vnode ** vpp,struct vattr * vap,struct componentname * cnp,const char * target)1166d1fa59e9SXin LI tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
11671493c2eeSBrooks Davis     struct componentname *cnp, const char *target)
1168d1fa59e9SXin LI {
1169d1fa59e9SXin LI 	int error;
1170d1fa59e9SXin LI 	struct tmpfs_dirent *de;
1171d1fa59e9SXin LI 	struct tmpfs_mount *tmp;
1172d1fa59e9SXin LI 	struct tmpfs_node *dnode;
1173d1fa59e9SXin LI 	struct tmpfs_node *node;
1174d1fa59e9SXin LI 	struct tmpfs_node *parent;
1175d1fa59e9SXin LI 
1176e7e6c820SKonstantin Belousov 	ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file");
1177d1fa59e9SXin LI 
1178d1fa59e9SXin LI 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1179d1fa59e9SXin LI 	dnode = VP_TO_TMPFS_DIR(dvp);
1180d1fa59e9SXin LI 	*vpp = NULL;
1181d1fa59e9SXin LI 
1182d1fa59e9SXin LI 	/* If the entry we are creating is a directory, we cannot overflow
1183d1fa59e9SXin LI 	 * the number of links of its parent, because it will get a new
1184d1fa59e9SXin LI 	 * link. */
1185d1fa59e9SXin LI 	if (vap->va_type == VDIR) {
1186d1fa59e9SXin LI 		/* Ensure that we do not overflow the maximum number of links
1187d1fa59e9SXin LI 		 * imposed by the system. */
118835b1a3abSJohn Baldwin 		MPASS(dnode->tn_links <= TMPFS_LINK_MAX);
118935b1a3abSJohn Baldwin 		if (dnode->tn_links == TMPFS_LINK_MAX) {
11907a41bc2fSKonstantin Belousov 			return (EMLINK);
1191d1fa59e9SXin LI 		}
1192d1fa59e9SXin LI 
1193d1fa59e9SXin LI 		parent = dnode;
11947871e52bSXin LI 		MPASS(parent != NULL);
1195d1fa59e9SXin LI 	} else
1196d1fa59e9SXin LI 		parent = NULL;
1197d1fa59e9SXin LI 
1198d1fa59e9SXin LI 	/* Allocate a node that represents the new file. */
11994cda7f7eSKonstantin Belousov 	error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type,
1200bba7ed20SKonstantin Belousov 	    cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent,
1201bba7ed20SKonstantin Belousov 	    target, vap->va_rdev, &node);
1202d1fa59e9SXin LI 	if (error != 0)
12037a41bc2fSKonstantin Belousov 		return (error);
1204d1fa59e9SXin LI 
1205d1fa59e9SXin LI 	/* Allocate a directory entry that points to the new file. */
1206d1fa59e9SXin LI 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
1207d1fa59e9SXin LI 	    &de);
1208d1fa59e9SXin LI 	if (error != 0) {
1209d1fa59e9SXin LI 		tmpfs_free_node(tmp, node);
12107a41bc2fSKonstantin Belousov 		return (error);
1211d1fa59e9SXin LI 	}
1212d1fa59e9SXin LI 
1213d1fa59e9SXin LI 	/* Allocate a vnode for the new file. */
1214dfd233edSAttilio Rao 	error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
1215d1fa59e9SXin LI 	if (error != 0) {
12164fd5efe7SGleb Kurtsou 		tmpfs_free_dirent(tmp, de);
1217d1fa59e9SXin LI 		tmpfs_free_node(tmp, node);
12187a41bc2fSKonstantin Belousov 		return (error);
1219d1fa59e9SXin LI 	}
1220d1fa59e9SXin LI 
1221d1fa59e9SXin LI 	/* Now that all required items are allocated, we can proceed to
1222d1fa59e9SXin LI 	 * insert the new node into the directory, an operation that
1223d1fa59e9SXin LI 	 * cannot fail. */
122499d57a6bSEd Schouten 	if (cnp->cn_flags & ISWHITEOUT)
122599d57a6bSEd Schouten 		tmpfs_dir_whiteout_remove(dvp, cnp);
1226d1fa59e9SXin LI 	tmpfs_dir_attach(dvp, de);
12277a41bc2fSKonstantin Belousov 	return (0);
1228d1fa59e9SXin LI }
1229d1fa59e9SXin LI 
12301c07d69bSKonstantin Belousov struct tmpfs_dirent *
tmpfs_dir_first(struct tmpfs_node * dnode,struct tmpfs_dir_cursor * dc)12314fd5efe7SGleb Kurtsou tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
12324fd5efe7SGleb Kurtsou {
12334fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
12344fd5efe7SGleb Kurtsou 
12354fd5efe7SGleb Kurtsou 	de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
12364fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
12374fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de))
12384fd5efe7SGleb Kurtsou 		de = LIST_FIRST(&de->ud.td_duphead);
12394fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
12404fd5efe7SGleb Kurtsou 
12414fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
12424fd5efe7SGleb Kurtsou }
12434fd5efe7SGleb Kurtsou 
12441c07d69bSKonstantin Belousov struct tmpfs_dirent *
tmpfs_dir_next(struct tmpfs_node * dnode,struct tmpfs_dir_cursor * dc)12454fd5efe7SGleb Kurtsou tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
12464fd5efe7SGleb Kurtsou {
12474fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
12484fd5efe7SGleb Kurtsou 
12494fd5efe7SGleb Kurtsou 	MPASS(dc->tdc_tree != NULL);
12504fd5efe7SGleb Kurtsou 	if (tmpfs_dirent_dup(dc->tdc_current)) {
12514fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
12524fd5efe7SGleb Kurtsou 		if (dc->tdc_current != NULL)
12534fd5efe7SGleb Kurtsou 			return (dc->tdc_current);
12544fd5efe7SGleb Kurtsou 	}
12554fd5efe7SGleb Kurtsou 	dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
12564fd5efe7SGleb Kurtsou 	    &dnode->tn_dir.tn_dirhead, dc->tdc_tree);
12574fd5efe7SGleb Kurtsou 	if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
12584fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
12594fd5efe7SGleb Kurtsou 		MPASS(dc->tdc_current != NULL);
12604fd5efe7SGleb Kurtsou 	}
12614fd5efe7SGleb Kurtsou 
12624fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
12634fd5efe7SGleb Kurtsou }
12644fd5efe7SGleb Kurtsou 
12654fd5efe7SGleb Kurtsou /* Lookup directory entry in RB-Tree. Function may return duphead entry. */
12664fd5efe7SGleb Kurtsou static struct tmpfs_dirent *
tmpfs_dir_xlookup_hash(struct tmpfs_node * dnode,uint32_t hash)12674fd5efe7SGleb Kurtsou tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
12684fd5efe7SGleb Kurtsou {
12694fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, dekey;
12704fd5efe7SGleb Kurtsou 
12714fd5efe7SGleb Kurtsou 	dekey.td_hash = hash;
12724fd5efe7SGleb Kurtsou 	de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
12734fd5efe7SGleb Kurtsou 	return (de);
12744fd5efe7SGleb Kurtsou }
12754fd5efe7SGleb Kurtsou 
12764fd5efe7SGleb Kurtsou /* Lookup directory entry by cookie, initialize directory cursor accordingly. */
12774fd5efe7SGleb Kurtsou static struct tmpfs_dirent *
tmpfs_dir_lookup_cookie(struct tmpfs_node * node,off_t cookie,struct tmpfs_dir_cursor * dc)12784fd5efe7SGleb Kurtsou tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
12794fd5efe7SGleb Kurtsou     struct tmpfs_dir_cursor *dc)
12804fd5efe7SGleb Kurtsou {
12814fd5efe7SGleb Kurtsou 	struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
12824fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, dekey;
12834fd5efe7SGleb Kurtsou 
12844fd5efe7SGleb Kurtsou 	MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
12854fd5efe7SGleb Kurtsou 
12864fd5efe7SGleb Kurtsou 	if (cookie == node->tn_dir.tn_readdir_lastn &&
12874fd5efe7SGleb Kurtsou 	    (de = node->tn_dir.tn_readdir_lastp) != NULL) {
12884fd5efe7SGleb Kurtsou 		/* Protect against possible race, tn_readdir_last[pn]
12894fd5efe7SGleb Kurtsou 		 * may be updated with only shared vnode lock held. */
12904fd5efe7SGleb Kurtsou 		if (cookie == tmpfs_dirent_cookie(de))
12914fd5efe7SGleb Kurtsou 			goto out;
12924fd5efe7SGleb Kurtsou 	}
12934fd5efe7SGleb Kurtsou 
12944fd5efe7SGleb Kurtsou 	if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
12954fd5efe7SGleb Kurtsou 		LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
12964fd5efe7SGleb Kurtsou 		    uh.td_dup.index_entries) {
12974fd5efe7SGleb Kurtsou 			MPASS(tmpfs_dirent_dup(de));
12984fd5efe7SGleb Kurtsou 			if (de->td_cookie == cookie)
12994fd5efe7SGleb Kurtsou 				goto out;
13004fd5efe7SGleb Kurtsou 			/* dupindex list is sorted. */
13014fd5efe7SGleb Kurtsou 			if (de->td_cookie < cookie) {
13024fd5efe7SGleb Kurtsou 				de = NULL;
13034fd5efe7SGleb Kurtsou 				goto out;
13044fd5efe7SGleb Kurtsou 			}
13054fd5efe7SGleb Kurtsou 		}
13064fd5efe7SGleb Kurtsou 		MPASS(de == NULL);
13074fd5efe7SGleb Kurtsou 		goto out;
13084fd5efe7SGleb Kurtsou 	}
13094fd5efe7SGleb Kurtsou 
131015ad3e51SKonstantin Belousov 	if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) {
131115ad3e51SKonstantin Belousov 		de = NULL;
131215ad3e51SKonstantin Belousov 	} else {
13134fd5efe7SGleb Kurtsou 		dekey.td_hash = cookie;
13144fd5efe7SGleb Kurtsou 		/* Recover if direntry for cookie was removed */
13154fd5efe7SGleb Kurtsou 		de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
131615ad3e51SKonstantin Belousov 	}
13174fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
13184fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
13194fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de)) {
13204fd5efe7SGleb Kurtsou 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
13214fd5efe7SGleb Kurtsou 		MPASS(dc->tdc_current != NULL);
13224fd5efe7SGleb Kurtsou 	}
13234fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
13244fd5efe7SGleb Kurtsou 
13254fd5efe7SGleb Kurtsou out:
13264fd5efe7SGleb Kurtsou 	dc->tdc_tree = de;
13274fd5efe7SGleb Kurtsou 	dc->tdc_current = de;
13284fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_dup(de))
13294fd5efe7SGleb Kurtsou 		dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
13304fd5efe7SGleb Kurtsou 		    de->td_hash);
13314fd5efe7SGleb Kurtsou 	return (dc->tdc_current);
13324fd5efe7SGleb Kurtsou }
13334fd5efe7SGleb Kurtsou 
13344fd5efe7SGleb Kurtsou /*
13354fd5efe7SGleb Kurtsou  * Looks for a directory entry in the directory represented by node.
13364fd5efe7SGleb Kurtsou  * 'cnp' describes the name of the entry to look for.  Note that the .
13374fd5efe7SGleb Kurtsou  * and .. components are not allowed as they do not physically exist
13384fd5efe7SGleb Kurtsou  * within directories.
13394fd5efe7SGleb Kurtsou  *
13404fd5efe7SGleb Kurtsou  * Returns a pointer to the entry when found, otherwise NULL.
13414fd5efe7SGleb Kurtsou  */
13424fd5efe7SGleb Kurtsou struct tmpfs_dirent *
tmpfs_dir_lookup(struct tmpfs_node * node,struct tmpfs_node * f,struct componentname * cnp)13434fd5efe7SGleb Kurtsou tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
13444fd5efe7SGleb Kurtsou     struct componentname *cnp)
13454fd5efe7SGleb Kurtsou {
13464fd5efe7SGleb Kurtsou 	struct tmpfs_dir_duphead *duphead;
13474fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de;
13484fd5efe7SGleb Kurtsou 	uint32_t hash;
13494fd5efe7SGleb Kurtsou 
13504fd5efe7SGleb Kurtsou 	MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
13514fd5efe7SGleb Kurtsou 	MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
13524fd5efe7SGleb Kurtsou 	    cnp->cn_nameptr[1] == '.')));
13534fd5efe7SGleb Kurtsou 	TMPFS_VALIDATE_DIR(node);
13544fd5efe7SGleb Kurtsou 
13554fd5efe7SGleb Kurtsou 	hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
13564fd5efe7SGleb Kurtsou 	de = tmpfs_dir_xlookup_hash(node, hash);
13574fd5efe7SGleb Kurtsou 	if (de != NULL && tmpfs_dirent_duphead(de)) {
13584fd5efe7SGleb Kurtsou 		duphead = &de->ud.td_duphead;
13594fd5efe7SGleb Kurtsou 		LIST_FOREACH(de, duphead, uh.td_dup.entries) {
13604fd5efe7SGleb Kurtsou 			if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
13614fd5efe7SGleb Kurtsou 			    cnp->cn_namelen))
13624fd5efe7SGleb Kurtsou 				break;
13634fd5efe7SGleb Kurtsou 		}
13644fd5efe7SGleb Kurtsou 	} else if (de != NULL) {
13654fd5efe7SGleb Kurtsou 		if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
13664fd5efe7SGleb Kurtsou 		    cnp->cn_namelen))
13674fd5efe7SGleb Kurtsou 			de = NULL;
13684fd5efe7SGleb Kurtsou 	}
13694fd5efe7SGleb Kurtsou 	if (de != NULL && f != NULL && de->td_node != f)
13704fd5efe7SGleb Kurtsou 		de = NULL;
13714fd5efe7SGleb Kurtsou 
13724fd5efe7SGleb Kurtsou 	return (de);
13734fd5efe7SGleb Kurtsou }
13744fd5efe7SGleb Kurtsou 
13754fd5efe7SGleb Kurtsou /*
13764fd5efe7SGleb Kurtsou  * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
13774fd5efe7SGleb Kurtsou  * list, allocate new cookie value.
13784fd5efe7SGleb Kurtsou  */
13794fd5efe7SGleb Kurtsou static void
tmpfs_dir_attach_dup(struct tmpfs_node * dnode,struct tmpfs_dir_duphead * duphead,struct tmpfs_dirent * nde)13804fd5efe7SGleb Kurtsou tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
13814fd5efe7SGleb Kurtsou     struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
13824fd5efe7SGleb Kurtsou {
13834fd5efe7SGleb Kurtsou 	struct tmpfs_dir_duphead *dupindex;
13844fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, *pde;
13854fd5efe7SGleb Kurtsou 
13864fd5efe7SGleb Kurtsou 	dupindex = &dnode->tn_dir.tn_dupindex;
13874fd5efe7SGleb Kurtsou 	de = LIST_FIRST(dupindex);
13884fd5efe7SGleb Kurtsou 	if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
13894fd5efe7SGleb Kurtsou 		if (de == NULL)
13904fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
13914fd5efe7SGleb Kurtsou 		else
13924fd5efe7SGleb Kurtsou 			nde->td_cookie = de->td_cookie + 1;
13934fd5efe7SGleb Kurtsou 		MPASS(tmpfs_dirent_dup(nde));
13944fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
13954fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
13964fd5efe7SGleb Kurtsou 		return;
13974fd5efe7SGleb Kurtsou 	}
13984fd5efe7SGleb Kurtsou 
13994fd5efe7SGleb Kurtsou 	/*
14004fd5efe7SGleb Kurtsou 	 * Cookie numbers are near exhaustion. Scan dupindex list for unused
14014fd5efe7SGleb Kurtsou 	 * numbers. dupindex list is sorted in descending order. Keep it so
14024fd5efe7SGleb Kurtsou 	 * after inserting nde.
14034fd5efe7SGleb Kurtsou 	 */
14044fd5efe7SGleb Kurtsou 	while (1) {
14054fd5efe7SGleb Kurtsou 		pde = de;
14064fd5efe7SGleb Kurtsou 		de = LIST_NEXT(de, uh.td_dup.index_entries);
14074fd5efe7SGleb Kurtsou 		if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
14084fd5efe7SGleb Kurtsou 			/*
14094fd5efe7SGleb Kurtsou 			 * Last element of the index doesn't have minimal cookie
14104fd5efe7SGleb Kurtsou 			 * value, use it.
14114fd5efe7SGleb Kurtsou 			 */
14124fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
14134fd5efe7SGleb Kurtsou 			LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
14144fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14154fd5efe7SGleb Kurtsou 			return;
14164fd5efe7SGleb Kurtsou 		} else if (de == NULL) {
14174fd5efe7SGleb Kurtsou 			/*
14184fd5efe7SGleb Kurtsou 			 * We are so lucky have 2^30 hash duplicates in single
14194fd5efe7SGleb Kurtsou 			 * directory :) Return largest possible cookie value.
14204fd5efe7SGleb Kurtsou 			 * It should be fine except possible issues with
14214fd5efe7SGleb Kurtsou 			 * VOP_READDIR restart.
14224fd5efe7SGleb Kurtsou 			 */
14234fd5efe7SGleb Kurtsou 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
14244fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(dupindex, nde,
14254fd5efe7SGleb Kurtsou 			    uh.td_dup.index_entries);
14264fd5efe7SGleb Kurtsou 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14274fd5efe7SGleb Kurtsou 			return;
14284fd5efe7SGleb Kurtsou 		}
14294fd5efe7SGleb Kurtsou 		if (de->td_cookie + 1 == pde->td_cookie ||
14304fd5efe7SGleb Kurtsou 		    de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
14314fd5efe7SGleb Kurtsou 			continue;	/* No hole or invalid cookie. */
14324fd5efe7SGleb Kurtsou 		nde->td_cookie = de->td_cookie + 1;
14334fd5efe7SGleb Kurtsou 		MPASS(tmpfs_dirent_dup(nde));
14344fd5efe7SGleb Kurtsou 		MPASS(pde->td_cookie > nde->td_cookie);
14354fd5efe7SGleb Kurtsou 		MPASS(nde->td_cookie > de->td_cookie);
14364fd5efe7SGleb Kurtsou 		LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
14374fd5efe7SGleb Kurtsou 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
14384fd5efe7SGleb Kurtsou 		return;
143974b8d63dSPedro F. Giffuni 	}
14404fd5efe7SGleb Kurtsou }
14414fd5efe7SGleb Kurtsou 
1442d1fa59e9SXin LI /*
1443d1fa59e9SXin LI  * Attaches the directory entry de to the directory represented by vp.
1444d1fa59e9SXin LI  * Note that this does not change the link count of the node pointed by
1445d1fa59e9SXin LI  * the directory entry, as this is done by tmpfs_alloc_dirent.
1446d1fa59e9SXin LI  */
1447d1fa59e9SXin LI void
tmpfs_dir_attach(struct vnode * vp,struct tmpfs_dirent * de)1448d1fa59e9SXin LI tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
1449d1fa59e9SXin LI {
1450d1fa59e9SXin LI 	struct tmpfs_node *dnode;
14514fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *xde, *nde;
1452d1fa59e9SXin LI 
1453fb755714SXin LI 	ASSERT_VOP_ELOCKED(vp, __func__);
14544fd5efe7SGleb Kurtsou 	MPASS(de->td_namelen > 0);
14554fd5efe7SGleb Kurtsou 	MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
14564fd5efe7SGleb Kurtsou 	MPASS(de->td_cookie == de->td_hash);
14574fd5efe7SGleb Kurtsou 
1458d1fa59e9SXin LI 	dnode = VP_TO_TMPFS_DIR(vp);
14594fd5efe7SGleb Kurtsou 	dnode->tn_dir.tn_readdir_lastn = 0;
14604fd5efe7SGleb Kurtsou 	dnode->tn_dir.tn_readdir_lastp = NULL;
14614fd5efe7SGleb Kurtsou 
14624fd5efe7SGleb Kurtsou 	MPASS(!tmpfs_dirent_dup(de));
14634fd5efe7SGleb Kurtsou 	xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
14644fd5efe7SGleb Kurtsou 	if (xde != NULL && tmpfs_dirent_duphead(xde))
14654fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
14664fd5efe7SGleb Kurtsou 	else if (xde != NULL) {
14674fd5efe7SGleb Kurtsou 		/*
14684fd5efe7SGleb Kurtsou 		 * Allocate new duphead. Swap xde with duphead to avoid
14694fd5efe7SGleb Kurtsou 		 * adding/removing elements with the same hash.
14704fd5efe7SGleb Kurtsou 		 */
14714fd5efe7SGleb Kurtsou 		MPASS(!tmpfs_dirent_dup(xde));
14724fd5efe7SGleb Kurtsou 		tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
14734fd5efe7SGleb Kurtsou 		    &nde);
14744fd5efe7SGleb Kurtsou 		/* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
14754fd5efe7SGleb Kurtsou 		memcpy(nde, xde, sizeof(*xde));
14764fd5efe7SGleb Kurtsou 		xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
14774fd5efe7SGleb Kurtsou 		LIST_INIT(&xde->ud.td_duphead);
14784fd5efe7SGleb Kurtsou 		xde->td_namelen = 0;
14794fd5efe7SGleb Kurtsou 		xde->td_node = NULL;
14804fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
14814fd5efe7SGleb Kurtsou 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
14824fd5efe7SGleb Kurtsou 	}
1483d1fa59e9SXin LI 	dnode->tn_size += sizeof(struct tmpfs_dirent);
1484016b7c7eSKonstantin Belousov 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1485016b7c7eSKonstantin Belousov 	dnode->tn_accessed = true;
1486e0a60ae1SKonstantin Belousov 	tmpfs_update(vp);
1487d1fa59e9SXin LI }
1488d1fa59e9SXin LI 
1489d1fa59e9SXin LI /*
1490d1fa59e9SXin LI  * Detaches the directory entry de from the directory represented by vp.
1491d1fa59e9SXin LI  * Note that this does not change the link count of the node pointed by
1492d1fa59e9SXin LI  * the directory entry, as this is done by tmpfs_free_dirent.
1493d1fa59e9SXin LI  */
1494d1fa59e9SXin LI void
tmpfs_dir_detach(struct vnode * vp,struct tmpfs_dirent * de)1495d1fa59e9SXin LI tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
1496d1fa59e9SXin LI {
14974fd5efe7SGleb Kurtsou 	struct tmpfs_mount *tmp;
14984fd5efe7SGleb Kurtsou 	struct tmpfs_dir *head;
1499d1fa59e9SXin LI 	struct tmpfs_node *dnode;
15004fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *xde;
1501d1fa59e9SXin LI 
1502fb755714SXin LI 	ASSERT_VOP_ELOCKED(vp, __func__);
1503d1fa59e9SXin LI 
15044fd5efe7SGleb Kurtsou 	dnode = VP_TO_TMPFS_DIR(vp);
15054fd5efe7SGleb Kurtsou 	head = &dnode->tn_dir.tn_dirhead;
1506d1fa59e9SXin LI 	dnode->tn_dir.tn_readdir_lastn = 0;
1507d1fa59e9SXin LI 	dnode->tn_dir.tn_readdir_lastp = NULL;
1508d1fa59e9SXin LI 
15094fd5efe7SGleb Kurtsou 	if (tmpfs_dirent_dup(de)) {
15104fd5efe7SGleb Kurtsou 		/* Remove duphead if de was last entry. */
15114fd5efe7SGleb Kurtsou 		if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
15124fd5efe7SGleb Kurtsou 			xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
15134fd5efe7SGleb Kurtsou 			MPASS(tmpfs_dirent_duphead(xde));
15144fd5efe7SGleb Kurtsou 		} else
15154fd5efe7SGleb Kurtsou 			xde = NULL;
15164fd5efe7SGleb Kurtsou 		LIST_REMOVE(de, uh.td_dup.entries);
15174fd5efe7SGleb Kurtsou 		LIST_REMOVE(de, uh.td_dup.index_entries);
15184fd5efe7SGleb Kurtsou 		if (xde != NULL) {
15194fd5efe7SGleb Kurtsou 			if (LIST_EMPTY(&xde->ud.td_duphead)) {
15204fd5efe7SGleb Kurtsou 				RB_REMOVE(tmpfs_dir, head, xde);
15214fd5efe7SGleb Kurtsou 				tmp = VFS_TO_TMPFS(vp->v_mount);
15224fd5efe7SGleb Kurtsou 				MPASS(xde->td_node == NULL);
15234fd5efe7SGleb Kurtsou 				tmpfs_free_dirent(tmp, xde);
15244fd5efe7SGleb Kurtsou 			}
15254fd5efe7SGleb Kurtsou 		}
152685512850SKonstantin Belousov 		de->td_cookie = de->td_hash;
15274fd5efe7SGleb Kurtsou 	} else
15284fd5efe7SGleb Kurtsou 		RB_REMOVE(tmpfs_dir, head, de);
15294fd5efe7SGleb Kurtsou 
1530d1fa59e9SXin LI 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
1531016b7c7eSKonstantin Belousov 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1532016b7c7eSKonstantin Belousov 	dnode->tn_accessed = true;
1533e0a60ae1SKonstantin Belousov 	tmpfs_update(vp);
1534d1fa59e9SXin LI }
1535d1fa59e9SXin LI 
15364fd5efe7SGleb Kurtsou void
tmpfs_dir_destroy(struct tmpfs_mount * tmp,struct tmpfs_node * dnode)15374fd5efe7SGleb Kurtsou tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
1538d1fa59e9SXin LI {
15394fd5efe7SGleb Kurtsou 	struct tmpfs_dirent *de, *dde, *nde;
1540d1fa59e9SXin LI 
15414fd5efe7SGleb Kurtsou 	RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
15424fd5efe7SGleb Kurtsou 		RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
15434fd5efe7SGleb Kurtsou 		/* Node may already be destroyed. */
15444fd5efe7SGleb Kurtsou 		de->td_node = NULL;
15454fd5efe7SGleb Kurtsou 		if (tmpfs_dirent_duphead(de)) {
15464fd5efe7SGleb Kurtsou 			while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
15474fd5efe7SGleb Kurtsou 				LIST_REMOVE(dde, uh.td_dup.entries);
15484fd5efe7SGleb Kurtsou 				dde->td_node = NULL;
15494fd5efe7SGleb Kurtsou 				tmpfs_free_dirent(tmp, dde);
1550d1fa59e9SXin LI 			}
1551d1fa59e9SXin LI 		}
15524fd5efe7SGleb Kurtsou 		tmpfs_free_dirent(tmp, de);
15534fd5efe7SGleb Kurtsou 	}
1554d1fa59e9SXin LI }
1555d1fa59e9SXin LI 
1556d1fa59e9SXin LI /*
1557d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
1558d1fa59e9SXin LI  * directory and returns it in the uio space.  The function returns 0
1559d1fa59e9SXin LI  * on success, -1 if there was not enough space in the uio structure to
1560d1fa59e9SXin LI  * hold the directory entry or an appropriate error code if another
1561d1fa59e9SXin LI  * error happens.
1562d1fa59e9SXin LI  */
15634fd5efe7SGleb Kurtsou static int
tmpfs_dir_getdotdent(struct tmpfs_mount * tm,struct tmpfs_node * node,struct uio * uio)1564e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
1565e1cdc30fSKonstantin Belousov     struct uio *uio)
1566d1fa59e9SXin LI {
1567d1fa59e9SXin LI 	int error;
1568d1fa59e9SXin LI 	struct dirent dent;
1569d1fa59e9SXin LI 
1570d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1571d1fa59e9SXin LI 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
1572d1fa59e9SXin LI 
1573d1fa59e9SXin LI 	dent.d_fileno = node->tn_id;
157490f580b9SMark Johnston 	dent.d_off = TMPFS_DIRCOOKIE_DOTDOT;
1575d1fa59e9SXin LI 	dent.d_type = DT_DIR;
1576d1fa59e9SXin LI 	dent.d_namlen = 1;
1577d1fa59e9SXin LI 	dent.d_name[0] = '.';
1578d1fa59e9SXin LI 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
15796d2e2df7SMark Johnston 	dirent_terminate(&dent);
1580d1fa59e9SXin LI 
1581d1fa59e9SXin LI 	if (dent.d_reclen > uio->uio_resid)
15824fd5efe7SGleb Kurtsou 		error = EJUSTRETURN;
15834fd5efe7SGleb Kurtsou 	else
1584d1fa59e9SXin LI 		error = uiomove(&dent, dent.d_reclen, uio);
1585d1fa59e9SXin LI 
1586016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1587d1fa59e9SXin LI 
15885dc11286SKonstantin Belousov 	return (error);
1589d1fa59e9SXin LI }
1590d1fa59e9SXin LI 
1591d1fa59e9SXin LI /*
1592d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
1593d1fa59e9SXin LI  * directory and returns it in the uio space.  The function returns 0
1594d1fa59e9SXin LI  * on success, -1 if there was not enough space in the uio structure to
1595d1fa59e9SXin LI  * hold the directory entry or an appropriate error code if another
1596d1fa59e9SXin LI  * error happens.
1597d1fa59e9SXin LI  */
15984fd5efe7SGleb Kurtsou static int
tmpfs_dir_getdotdotdent(struct tmpfs_mount * tm,struct tmpfs_node * node,struct uio * uio,off_t next)1599e1cdc30fSKonstantin Belousov tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
160090f580b9SMark Johnston     struct uio *uio, off_t next)
1601d1fa59e9SXin LI {
1602c5dac63cSKonstantin Belousov 	struct tmpfs_node *parent;
1603d1fa59e9SXin LI 	struct dirent dent;
1604c5dac63cSKonstantin Belousov 	int error;
1605d1fa59e9SXin LI 
1606d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1607d1fa59e9SXin LI 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
1608d1fa59e9SXin LI 
160982cf92d4SXin LI 	/*
161082cf92d4SXin LI 	 * Return ENOENT if the current node is already removed.
161182cf92d4SXin LI 	 */
161282cf92d4SXin LI 	TMPFS_ASSERT_LOCKED(node);
1613c5dac63cSKonstantin Belousov 	parent = node->tn_dir.tn_parent;
1614c5dac63cSKonstantin Belousov 	if (parent == NULL)
161582cf92d4SXin LI 		return (ENOENT);
161682cf92d4SXin LI 
1617c5dac63cSKonstantin Belousov 	dent.d_fileno = parent->tn_id;
161890f580b9SMark Johnston 	dent.d_off = next;
1619d1fa59e9SXin LI 	dent.d_type = DT_DIR;
1620d1fa59e9SXin LI 	dent.d_namlen = 2;
1621d1fa59e9SXin LI 	dent.d_name[0] = '.';
1622d1fa59e9SXin LI 	dent.d_name[1] = '.';
1623d1fa59e9SXin LI 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
16246d2e2df7SMark Johnston 	dirent_terminate(&dent);
1625d1fa59e9SXin LI 
1626d1fa59e9SXin LI 	if (dent.d_reclen > uio->uio_resid)
16274fd5efe7SGleb Kurtsou 		error = EJUSTRETURN;
1628d1fa59e9SXin LI 	else
16294fd5efe7SGleb Kurtsou 		error = uiomove(&dent, dent.d_reclen, uio);
1630d1fa59e9SXin LI 
1631016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1632d1fa59e9SXin LI 
16335dc11286SKonstantin Belousov 	return (error);
1634d1fa59e9SXin LI }
1635d1fa59e9SXin LI 
1636d1fa59e9SXin LI /*
1637d1fa59e9SXin LI  * Helper function for tmpfs_readdir.  Returns as much directory entries
1638d1fa59e9SXin LI  * as can fit in the uio space.  The read starts at uio->uio_offset.
1639d1fa59e9SXin LI  * The function returns 0 on success, -1 if there was not enough space
1640d1fa59e9SXin LI  * in the uio structure to hold the directory entry or an appropriate
1641d1fa59e9SXin LI  * error code if another error happens.
1642d1fa59e9SXin LI  */
1643d1fa59e9SXin LI int
tmpfs_dir_getdents(struct tmpfs_mount * tm,struct tmpfs_node * node,struct uio * uio,int maxcookies,uint64_t * cookies,int * ncookies)1644e1cdc30fSKonstantin Belousov tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node,
1645b214fcceSAlan Somers     struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies)
1646d1fa59e9SXin LI {
16474fd5efe7SGleb Kurtsou 	struct tmpfs_dir_cursor dc;
164890f580b9SMark Johnston 	struct tmpfs_dirent *de, *nde;
16494fd5efe7SGleb Kurtsou 	off_t off;
16504fd5efe7SGleb Kurtsou 	int error;
1651d1fa59e9SXin LI 
1652d1fa59e9SXin LI 	TMPFS_VALIDATE_DIR(node);
1653d1fa59e9SXin LI 
16544fd5efe7SGleb Kurtsou 	off = 0;
165562dca316SBryan Drewery 
165662dca316SBryan Drewery 	/*
165762dca316SBryan Drewery 	 * Lookup the node from the current offset.  The starting offset of
165862dca316SBryan Drewery 	 * 0 will lookup both '.' and '..', and then the first real entry,
165962dca316SBryan Drewery 	 * or EOF if there are none.  Then find all entries for the dir that
166062dca316SBryan Drewery 	 * fit into the buffer.  Once no more entries are found (de == NULL),
166162dca316SBryan Drewery 	 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next
166262dca316SBryan Drewery 	 * call to return 0.
166362dca316SBryan Drewery 	 */
16644fd5efe7SGleb Kurtsou 	switch (uio->uio_offset) {
16654fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_DOT:
1666e1cdc30fSKonstantin Belousov 		error = tmpfs_dir_getdotdent(tm, node, uio);
16674fd5efe7SGleb Kurtsou 		if (error != 0)
16684fd5efe7SGleb Kurtsou 			return (error);
166990f580b9SMark Johnston 		uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT;
1670ac09d109SBryan Drewery 		if (cookies != NULL)
167190f580b9SMark Johnston 			cookies[(*ncookies)++] = off;
1672504bde01SBryan Drewery 		/* FALLTHROUGH */
16734fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_DOTDOT:
167490f580b9SMark Johnston 		de = tmpfs_dir_first(node, &dc);
167590f580b9SMark Johnston 		off = tmpfs_dirent_cookie(de);
167690f580b9SMark Johnston 		error = tmpfs_dir_getdotdotdent(tm, node, uio, off);
16774fd5efe7SGleb Kurtsou 		if (error != 0)
16784fd5efe7SGleb Kurtsou 			return (error);
167990f580b9SMark Johnston 		uio->uio_offset = off;
1680ac09d109SBryan Drewery 		if (cookies != NULL)
168190f580b9SMark Johnston 			cookies[(*ncookies)++] = off;
168262dca316SBryan Drewery 		/* EOF. */
16834fd5efe7SGleb Kurtsou 		if (de == NULL)
16844fd5efe7SGleb Kurtsou 			return (0);
16854fd5efe7SGleb Kurtsou 		break;
16864fd5efe7SGleb Kurtsou 	case TMPFS_DIRCOOKIE_EOF:
16874fd5efe7SGleb Kurtsou 		return (0);
16884fd5efe7SGleb Kurtsou 	default:
16894fd5efe7SGleb Kurtsou 		de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
16904fd5efe7SGleb Kurtsou 		if (de == NULL)
16914fd5efe7SGleb Kurtsou 			return (EINVAL);
1692ac09d109SBryan Drewery 		if (cookies != NULL)
16934fd5efe7SGleb Kurtsou 			off = tmpfs_dirent_cookie(de);
1694d1fa59e9SXin LI 	}
1695d1fa59e9SXin LI 
169690f580b9SMark Johnston 	/*
169790f580b9SMark Johnston 	 * Read as much entries as possible; i.e., until we reach the end of the
169890f580b9SMark Johnston 	 * directory or we exhaust uio space.
169990f580b9SMark Johnston 	 */
1700d1fa59e9SXin LI 	do {
1701d1fa59e9SXin LI 		struct dirent d;
1702d1fa59e9SXin LI 
170390f580b9SMark Johnston 		/*
170490f580b9SMark Johnston 		 * Create a dirent structure representing the current tmpfs_node
170590f580b9SMark Johnston 		 * and fill it.
170690f580b9SMark Johnston 		 */
170799d57a6bSEd Schouten 		if (de->td_node == NULL) {
170899d57a6bSEd Schouten 			d.d_fileno = 1;
170999d57a6bSEd Schouten 			d.d_type = DT_WHT;
171099d57a6bSEd Schouten 		} else {
1711d1fa59e9SXin LI 			d.d_fileno = de->td_node->tn_id;
1712d1fa59e9SXin LI 			switch (de->td_node->tn_type) {
1713d1fa59e9SXin LI 			case VBLK:
1714d1fa59e9SXin LI 				d.d_type = DT_BLK;
1715d1fa59e9SXin LI 				break;
1716d1fa59e9SXin LI 
1717d1fa59e9SXin LI 			case VCHR:
1718d1fa59e9SXin LI 				d.d_type = DT_CHR;
1719d1fa59e9SXin LI 				break;
1720d1fa59e9SXin LI 
1721d1fa59e9SXin LI 			case VDIR:
1722d1fa59e9SXin LI 				d.d_type = DT_DIR;
1723d1fa59e9SXin LI 				break;
1724d1fa59e9SXin LI 
1725d1fa59e9SXin LI 			case VFIFO:
1726d1fa59e9SXin LI 				d.d_type = DT_FIFO;
1727d1fa59e9SXin LI 				break;
1728d1fa59e9SXin LI 
1729d1fa59e9SXin LI 			case VLNK:
1730d1fa59e9SXin LI 				d.d_type = DT_LNK;
1731d1fa59e9SXin LI 				break;
1732d1fa59e9SXin LI 
1733d1fa59e9SXin LI 			case VREG:
1734d1fa59e9SXin LI 				d.d_type = DT_REG;
1735d1fa59e9SXin LI 				break;
1736d1fa59e9SXin LI 
1737d1fa59e9SXin LI 			case VSOCK:
1738d1fa59e9SXin LI 				d.d_type = DT_SOCK;
1739d1fa59e9SXin LI 				break;
1740d1fa59e9SXin LI 
1741d1fa59e9SXin LI 			default:
17421fa8f5f0SXin LI 				panic("tmpfs_dir_getdents: type %p %d",
17431fa8f5f0SXin LI 				    de->td_node, (int)de->td_node->tn_type);
1744d1fa59e9SXin LI 			}
174599d57a6bSEd Schouten 		}
1746d1fa59e9SXin LI 		d.d_namlen = de->td_namelen;
1747d1fa59e9SXin LI 		MPASS(de->td_namelen < sizeof(d.d_name));
17484fd5efe7SGleb Kurtsou 		(void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
1749d1fa59e9SXin LI 		d.d_reclen = GENERIC_DIRSIZ(&d);
1750d1fa59e9SXin LI 
175190f580b9SMark Johnston 		/*
175290f580b9SMark Johnston 		 * Stop reading if the directory entry we are treating is bigger
175390f580b9SMark Johnston 		 * than the amount of data that can be returned.
175490f580b9SMark Johnston 		 */
1755d1fa59e9SXin LI 		if (d.d_reclen > uio->uio_resid) {
17564fd5efe7SGleb Kurtsou 			error = EJUSTRETURN;
1757d1fa59e9SXin LI 			break;
1758d1fa59e9SXin LI 		}
1759d1fa59e9SXin LI 
176090f580b9SMark Johnston 		nde = tmpfs_dir_next(node, &dc);
176190f580b9SMark Johnston 		d.d_off = tmpfs_dirent_cookie(nde);
176290f580b9SMark Johnston 		dirent_terminate(&d);
176390f580b9SMark Johnston 
176490f580b9SMark Johnston 		/*
176590f580b9SMark Johnston 		 * Copy the new dirent structure into the output buffer and
176690f580b9SMark Johnston 		 * advance pointers.
176790f580b9SMark Johnston 		 */
1768d1fa59e9SXin LI 		error = uiomove(&d, d.d_reclen, uio);
17699fb9c623SKonstantin Belousov 		if (error == 0) {
177090f580b9SMark Johnston 			de = nde;
1771ac09d109SBryan Drewery 			if (cookies != NULL) {
17724fd5efe7SGleb Kurtsou 				off = tmpfs_dirent_cookie(de);
1773ac09d109SBryan Drewery 				MPASS(*ncookies < maxcookies);
17744fd5efe7SGleb Kurtsou 				cookies[(*ncookies)++] = off;
17754fd5efe7SGleb Kurtsou 			}
17769fb9c623SKonstantin Belousov 		}
1777d1fa59e9SXin LI 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
1778d1fa59e9SXin LI 
1779ac09d109SBryan Drewery 	/* Skip setting off when using cookies as it is already done above. */
1780ac09d109SBryan Drewery 	if (cookies == NULL)
17814fd5efe7SGleb Kurtsou 		off = tmpfs_dirent_cookie(de);
1782d1fa59e9SXin LI 
1783ac09d109SBryan Drewery 	/* Update the offset and cache. */
17844fd5efe7SGleb Kurtsou 	uio->uio_offset = off;
17854fd5efe7SGleb Kurtsou 	node->tn_dir.tn_readdir_lastn = off;
17864fd5efe7SGleb Kurtsou 	node->tn_dir.tn_readdir_lastp = de;
17874fd5efe7SGleb Kurtsou 
1788016b7c7eSKonstantin Belousov 	tmpfs_set_accessed(tm, node);
1789016b7c7eSKonstantin Belousov 	return (error);
1790d1fa59e9SXin LI }
1791d1fa59e9SXin LI 
179299d57a6bSEd Schouten int
tmpfs_dir_whiteout_add(struct vnode * dvp,struct componentname * cnp)179399d57a6bSEd Schouten tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
179499d57a6bSEd Schouten {
179599d57a6bSEd Schouten 	struct tmpfs_dirent *de;
17968fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
179799d57a6bSEd Schouten 	int error;
179899d57a6bSEd Schouten 
179999d57a6bSEd Schouten 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
180099d57a6bSEd Schouten 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
180199d57a6bSEd Schouten 	if (error != 0)
180299d57a6bSEd Schouten 		return (error);
18038fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
180499d57a6bSEd Schouten 	tmpfs_dir_attach(dvp, de);
18058fa5e0f2SJason A. Harmening 	dnode->tn_dir.tn_wht_size += sizeof(*de);
180699d57a6bSEd Schouten 	return (0);
180799d57a6bSEd Schouten }
180899d57a6bSEd Schouten 
180999d57a6bSEd Schouten void
tmpfs_dir_whiteout_remove(struct vnode * dvp,struct componentname * cnp)181099d57a6bSEd Schouten tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
181199d57a6bSEd Schouten {
181299d57a6bSEd Schouten 	struct tmpfs_dirent *de;
18138fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
181499d57a6bSEd Schouten 
18158fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
18168fa5e0f2SJason A. Harmening 	de = tmpfs_dir_lookup(dnode, NULL, cnp);
181799d57a6bSEd Schouten 	MPASS(de != NULL && de->td_node == NULL);
18188fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de));
18198fa5e0f2SJason A. Harmening 	dnode->tn_dir.tn_wht_size -= sizeof(*de);
182099d57a6bSEd Schouten 	tmpfs_dir_detach(dvp, de);
18214fd5efe7SGleb Kurtsou 	tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
182299d57a6bSEd Schouten }
182399d57a6bSEd Schouten 
1824d1fa59e9SXin LI /*
18258fa5e0f2SJason A. Harmening  * Frees any dirents still associated with the directory represented
18268fa5e0f2SJason A. Harmening  * by dvp in preparation for the removal of the directory.  This is
18278fa5e0f2SJason A. Harmening  * required when removing a directory which contains only whiteout
18288fa5e0f2SJason A. Harmening  * entries.
18298fa5e0f2SJason A. Harmening  */
18308fa5e0f2SJason A. Harmening void
tmpfs_dir_clear_whiteouts(struct vnode * dvp)18318fa5e0f2SJason A. Harmening tmpfs_dir_clear_whiteouts(struct vnode *dvp)
18328fa5e0f2SJason A. Harmening {
18338fa5e0f2SJason A. Harmening 	struct tmpfs_dir_cursor dc;
18348fa5e0f2SJason A. Harmening 	struct tmpfs_dirent *de;
18358fa5e0f2SJason A. Harmening 	struct tmpfs_node *dnode;
18368fa5e0f2SJason A. Harmening 
18378fa5e0f2SJason A. Harmening 	dnode = VP_TO_TMPFS_DIR(dvp);
18388fa5e0f2SJason A. Harmening 
18398fa5e0f2SJason A. Harmening 	while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) {
18408fa5e0f2SJason A. Harmening 		KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p",
18418fa5e0f2SJason A. Harmening 		    __func__, de));
18428fa5e0f2SJason A. Harmening 		dnode->tn_dir.tn_wht_size -= sizeof(*de);
18438fa5e0f2SJason A. Harmening 		tmpfs_dir_detach(dvp, de);
18448fa5e0f2SJason A. Harmening 		tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
18458fa5e0f2SJason A. Harmening 	}
18468fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_size == 0);
18478fa5e0f2SJason A. Harmening 	MPASS(dnode->tn_dir.tn_wht_size == 0);
18488fa5e0f2SJason A. Harmening }
18498fa5e0f2SJason A. Harmening 
18508fa5e0f2SJason A. Harmening /*
18514673c751SAlan Cox  * Resizes the aobj associated with the regular file pointed to by 'vp' to the
18524673c751SAlan Cox  * size 'newsize'.  'vp' must point to a vnode that represents a regular file.
18534673c751SAlan Cox  * 'newsize' must be positive.
1854d1fa59e9SXin LI  *
1855d1fa59e9SXin LI  * Returns zero on success or an appropriate error code on failure.
1856d1fa59e9SXin LI  */
1857d1fa59e9SXin LI int
tmpfs_reg_resize(struct vnode * vp,off_t newsize,boolean_t ignerr)18580b05cac3SAlan Cox tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
1859d1fa59e9SXin LI {
1860d1fa59e9SXin LI 	struct tmpfs_node *node;
1861b10d1d5dSAlan Cox 	vm_object_t uobj;
18622971897dSAlan Cox 	vm_pindex_t idx, newpages, oldpages;
1863d1fa59e9SXin LI 	off_t oldsize;
1864399be910SKa Ho Ng 	int base, error;
1865d1fa59e9SXin LI 
1866d1fa59e9SXin LI 	MPASS(vp->v_type == VREG);
1867d1fa59e9SXin LI 	MPASS(newsize >= 0);
1868d1fa59e9SXin LI 
1869d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
1870b10d1d5dSAlan Cox 	uobj = node->tn_reg.tn_aobj;
1871d1fa59e9SXin LI 
18724673c751SAlan Cox 	/*
18734673c751SAlan Cox 	 * Convert the old and new sizes to the number of pages needed to
1874d1fa59e9SXin LI 	 * store them.  It may happen that we do not need to do anything
1875d1fa59e9SXin LI 	 * because the last allocated page can accommodate the change on
18764673c751SAlan Cox 	 * its own.
18774673c751SAlan Cox 	 */
1878d1fa59e9SXin LI 	oldsize = node->tn_size;
1879b10d1d5dSAlan Cox 	oldpages = OFF_TO_IDX(oldsize + PAGE_MASK);
1880b10d1d5dSAlan Cox 	MPASS(oldpages == uobj->size);
1881b10d1d5dSAlan Cox 	newpages = OFF_TO_IDX(newsize + PAGE_MASK);
1882e3e10c39SMateusz Guzik 
1883e3e10c39SMateusz Guzik 	if (__predict_true(newpages == oldpages && newsize >= oldsize)) {
1884e3e10c39SMateusz Guzik 		node->tn_size = newsize;
1885e3e10c39SMateusz Guzik 		return (0);
1886e3e10c39SMateusz Guzik 	}
1887e3e10c39SMateusz Guzik 
188889f6b863SAttilio Rao 	VM_OBJECT_WLOCK(uobj);
1889d1fa59e9SXin LI 	if (newsize < oldsize) {
1890d1fa59e9SXin LI 		/*
18912971897dSAlan Cox 		 * Zero the truncated part of the last page.
18922971897dSAlan Cox 		 */
18932971897dSAlan Cox 		base = newsize & PAGE_MASK;
18942971897dSAlan Cox 		if (base != 0) {
18952971897dSAlan Cox 			idx = OFF_TO_IDX(newsize);
1896399be910SKa Ho Ng 			error = tmpfs_partial_page_invalidate(uobj, idx, base,
1897399be910SKa Ho Ng 			    PAGE_SIZE, ignerr);
1898399be910SKa Ho Ng 			if (error != 0) {
1899d6e13f3bSJeff Roberson 				VM_OBJECT_WUNLOCK(uobj);
1900399be910SKa Ho Ng 				return (error);
19012971897dSAlan Cox 			}
19022971897dSAlan Cox 		}
19032971897dSAlan Cox 
19042971897dSAlan Cox 		/*
19054673c751SAlan Cox 		 * Release any swap space and free any whole pages.
1906d1fa59e9SXin LI 		 */
190784242cf6SMark Johnston 		if (newpages < oldpages)
19086bbee8e2SAlan Cox 			vm_object_page_remove(uobj, newpages, 0, 0);
1909d1fa59e9SXin LI 	}
1910b10d1d5dSAlan Cox 	uobj->size = newpages;
191189f6b863SAttilio Rao 	VM_OBJECT_WUNLOCK(uobj);
19122971897dSAlan Cox 
19132971897dSAlan Cox 	node->tn_size = newsize;
19144673c751SAlan Cox 	return (0);
1915d1fa59e9SXin LI }
1916d1fa59e9SXin LI 
19178d7cd10bSKa Ho Ng /*
19188d7cd10bSKa Ho Ng  * Punch hole in the aobj associated with the regular file pointed to by 'vp'.
19198d7cd10bSKa Ho Ng  * Requests completely beyond the end-of-file are converted to no-op.
19208d7cd10bSKa Ho Ng  *
19218d7cd10bSKa Ho Ng  * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on
19228d7cd10bSKa Ho Ng  * failure.
19238d7cd10bSKa Ho Ng  */
19248d7cd10bSKa Ho Ng int
tmpfs_reg_punch_hole(struct vnode * vp,off_t * offset,off_t * length)19258d7cd10bSKa Ho Ng tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length)
19268d7cd10bSKa Ho Ng {
19278d7cd10bSKa Ho Ng 	struct tmpfs_node *node;
19288d7cd10bSKa Ho Ng 	vm_object_t object;
19298d7cd10bSKa Ho Ng 	vm_pindex_t pistart, pi, piend;
19308d7cd10bSKa Ho Ng 	int startofs, endofs, end;
19318d7cd10bSKa Ho Ng 	off_t off, len;
19328d7cd10bSKa Ho Ng 	int error;
19338d7cd10bSKa Ho Ng 
19348d7cd10bSKa Ho Ng 	KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows",
19358d7cd10bSKa Ho Ng 	    __func__));
19368d7cd10bSKa Ho Ng 	node = VP_TO_TMPFS_NODE(vp);
19378d7cd10bSKa Ho Ng 	KASSERT(node->tn_type == VREG, ("%s: node is not regular file",
19388d7cd10bSKa Ho Ng 	    __func__));
19398d7cd10bSKa Ho Ng 	object = node->tn_reg.tn_aobj;
19408d7cd10bSKa Ho Ng 	off = *offset;
19418d7cd10bSKa Ho Ng 	len = omin(node->tn_size - off, *length);
19428d7cd10bSKa Ho Ng 	startofs = off & PAGE_MASK;
19438d7cd10bSKa Ho Ng 	endofs = (off + len) & PAGE_MASK;
19448d7cd10bSKa Ho Ng 	pistart = OFF_TO_IDX(off);
19458d7cd10bSKa Ho Ng 	piend = OFF_TO_IDX(off + len);
19468d7cd10bSKa Ho Ng 	pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK);
19478d7cd10bSKa Ho Ng 	error = 0;
19488d7cd10bSKa Ho Ng 
19498d7cd10bSKa Ho Ng 	/* Handle the case when offset is on or beyond file size. */
19508d7cd10bSKa Ho Ng 	if (len <= 0) {
19518d7cd10bSKa Ho Ng 		*length = 0;
19528d7cd10bSKa Ho Ng 		return (0);
19538d7cd10bSKa Ho Ng 	}
19548d7cd10bSKa Ho Ng 
19558d7cd10bSKa Ho Ng 	VM_OBJECT_WLOCK(object);
19568d7cd10bSKa Ho Ng 
19578d7cd10bSKa Ho Ng 	/*
19588d7cd10bSKa Ho Ng 	 * If there is a partial page at the beginning of the hole-punching
19598d7cd10bSKa Ho Ng 	 * request, fill the partial page with zeroes.
19608d7cd10bSKa Ho Ng 	 */
19618d7cd10bSKa Ho Ng 	if (startofs != 0) {
19628d7cd10bSKa Ho Ng 		end = pistart != piend ? PAGE_SIZE : endofs;
19638d7cd10bSKa Ho Ng 		error = tmpfs_partial_page_invalidate(object, pistart, startofs,
19648d7cd10bSKa Ho Ng 		    end, FALSE);
19658d7cd10bSKa Ho Ng 		if (error != 0)
19668d7cd10bSKa Ho Ng 			goto out;
19678d7cd10bSKa Ho Ng 		off += end - startofs;
19688d7cd10bSKa Ho Ng 		len -= end - startofs;
19698d7cd10bSKa Ho Ng 	}
19708d7cd10bSKa Ho Ng 
19718d7cd10bSKa Ho Ng 	/*
19728d7cd10bSKa Ho Ng 	 * Toss away the full pages in the affected area.
19738d7cd10bSKa Ho Ng 	 */
19748d7cd10bSKa Ho Ng 	if (pi < piend) {
19758d7cd10bSKa Ho Ng 		vm_object_page_remove(object, pi, piend, 0);
19768d7cd10bSKa Ho Ng 		off += IDX_TO_OFF(piend - pi);
19778d7cd10bSKa Ho Ng 		len -= IDX_TO_OFF(piend - pi);
19788d7cd10bSKa Ho Ng 	}
19798d7cd10bSKa Ho Ng 
19808d7cd10bSKa Ho Ng 	/*
19818d7cd10bSKa Ho Ng 	 * If there is a partial page at the end of the hole-punching request,
19828d7cd10bSKa Ho Ng 	 * fill the partial page with zeroes.
19838d7cd10bSKa Ho Ng 	 */
19848d7cd10bSKa Ho Ng 	if (endofs != 0 && pistart != piend) {
19858d7cd10bSKa Ho Ng 		error = tmpfs_partial_page_invalidate(object, piend, 0, endofs,
19868d7cd10bSKa Ho Ng 		    FALSE);
19878d7cd10bSKa Ho Ng 		if (error != 0)
19888d7cd10bSKa Ho Ng 			goto out;
19898d7cd10bSKa Ho Ng 		off += endofs;
19908d7cd10bSKa Ho Ng 		len -= endofs;
19918d7cd10bSKa Ho Ng 	}
19928d7cd10bSKa Ho Ng 
19938d7cd10bSKa Ho Ng out:
19948d7cd10bSKa Ho Ng 	VM_OBJECT_WUNLOCK(object);
19958d7cd10bSKa Ho Ng 	*offset = off;
19968d7cd10bSKa Ho Ng 	*length = len;
19978d7cd10bSKa Ho Ng 	return (error);
19988d7cd10bSKa Ho Ng }
19998d7cd10bSKa Ho Ng 
2000f40cb1c6SKonstantin Belousov void
tmpfs_check_mtime(struct vnode * vp)2001f40cb1c6SKonstantin Belousov tmpfs_check_mtime(struct vnode *vp)
2002f40cb1c6SKonstantin Belousov {
2003f40cb1c6SKonstantin Belousov 	struct tmpfs_node *node;
2004f40cb1c6SKonstantin Belousov 	struct vm_object *obj;
2005f40cb1c6SKonstantin Belousov 
2006f40cb1c6SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "check_mtime");
2007f40cb1c6SKonstantin Belousov 	if (vp->v_type != VREG)
2008f40cb1c6SKonstantin Belousov 		return;
2009f40cb1c6SKonstantin Belousov 	obj = vp->v_object;
201028bc23abSKonstantin Belousov 	KASSERT(obj->type == tmpfs_pager_type &&
20114b8365d7SKonstantin Belousov 	    (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) ==
20124b8365d7SKonstantin Belousov 	    (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj"));
2013f40cb1c6SKonstantin Belousov 	/* unlocked read */
201467d0e293SJeff Roberson 	if (obj->generation != obj->cleangeneration) {
2015f40cb1c6SKonstantin Belousov 		VM_OBJECT_WLOCK(obj);
201667d0e293SJeff Roberson 		if (obj->generation != obj->cleangeneration) {
201767d0e293SJeff Roberson 			obj->cleangeneration = obj->generation;
2018f40cb1c6SKonstantin Belousov 			node = VP_TO_TMPFS_NODE(vp);
2019311d39f2SKonstantin Belousov 			node->tn_status |= TMPFS_NODE_MODIFIED |
2020311d39f2SKonstantin Belousov 			    TMPFS_NODE_CHANGED;
2021f40cb1c6SKonstantin Belousov 		}
2022f40cb1c6SKonstantin Belousov 		VM_OBJECT_WUNLOCK(obj);
2023f40cb1c6SKonstantin Belousov 	}
2024f40cb1c6SKonstantin Belousov }
2025f40cb1c6SKonstantin Belousov 
2026d1fa59e9SXin LI /*
2027d1fa59e9SXin LI  * Change flags of the given vnode.
2028d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2029d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2030d1fa59e9SXin LI  */
2031d1fa59e9SXin LI int
tmpfs_chflags(struct vnode * vp,u_long flags,struct ucred * cred,struct thread * td)2032b4b2596bSPawel Jakub Dawidek tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
20330f01fb01SKonstantin Belousov     struct thread *td)
2034d1fa59e9SXin LI {
2035d1fa59e9SXin LI 	int error;
2036d1fa59e9SXin LI 	struct tmpfs_node *node;
2037d1fa59e9SXin LI 
2038305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chflags");
2039d1fa59e9SXin LI 
2040d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2041d1fa59e9SXin LI 
20423b5f179dSKenneth D. Merry 	if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK |
20433b5f179dSKenneth D. Merry 	    UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP |
20443b5f179dSKenneth D. Merry 	    UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
20453b5f179dSKenneth D. Merry 	    UF_SPARSE | UF_SYSTEM)) != 0)
2046587fdb53SJaakko Heinonen 		return (EOPNOTSUPP);
2047587fdb53SJaakko Heinonen 
2048d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2049d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
205023f90714SKonstantin Belousov 		return (EROFS);
2051d1fa59e9SXin LI 
2052d1fa59e9SXin LI 	/*
2053d1fa59e9SXin LI 	 * Callers may only modify the file flags on objects they
2054d1fa59e9SXin LI 	 * have VADMIN rights for.
2055d1fa59e9SXin LI 	 */
20560f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2057d1fa59e9SXin LI 		return (error);
2058d1fa59e9SXin LI 	/*
2059d1fa59e9SXin LI 	 * Unprivileged processes are not permitted to unset system
2060d1fa59e9SXin LI 	 * flags, or modify flags if any system flags are set.
2061d1fa59e9SXin LI 	 */
2062cc426dd3SMateusz Guzik 	if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
2063587fdb53SJaakko Heinonen 		if (node->tn_flags &
2064587fdb53SJaakko Heinonen 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
2065d1fa59e9SXin LI 			error = securelevel_gt(cred, 0);
2066d1fa59e9SXin LI 			if (error)
2067d1fa59e9SXin LI 				return (error);
2068d1fa59e9SXin LI 		}
2069d1fa59e9SXin LI 	} else {
2070587fdb53SJaakko Heinonen 		if (node->tn_flags &
2071587fdb53SJaakko Heinonen 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
2072587fdb53SJaakko Heinonen 		    ((flags ^ node->tn_flags) & SF_SETTABLE))
2073d1fa59e9SXin LI 			return (EPERM);
2074d1fa59e9SXin LI 	}
2075587fdb53SJaakko Heinonen 	node->tn_flags = flags;
2076d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2077d1fa59e9SXin LI 
2078305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chflags2");
2079d1fa59e9SXin LI 
2080305b4229SKonstantin Belousov 	return (0);
2081d1fa59e9SXin LI }
2082d1fa59e9SXin LI 
2083d1fa59e9SXin LI /*
2084d1fa59e9SXin LI  * Change access mode on the given vnode.
2085d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2086d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2087d1fa59e9SXin LI  */
2088d1fa59e9SXin LI int
tmpfs_chmod(struct vnode * vp,mode_t mode,struct ucred * cred,struct thread * td)20890f01fb01SKonstantin Belousov tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred,
20900f01fb01SKonstantin Belousov     struct thread *td)
2091d1fa59e9SXin LI {
2092d1fa59e9SXin LI 	int error;
2093d1fa59e9SXin LI 	struct tmpfs_node *node;
2094172ffe70SMateusz Guzik 	mode_t newmode;
2095d1fa59e9SXin LI 
2096305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chmod");
2097172ffe70SMateusz Guzik 	ASSERT_VOP_IN_SEQC(vp);
2098d1fa59e9SXin LI 
2099d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2100d1fa59e9SXin LI 
2101d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2102d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
2103c12118f6SKa Ho Ng 		return (EROFS);
2104d1fa59e9SXin LI 
2105d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2106d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
2107c12118f6SKa Ho Ng 		return (EPERM);
2108d1fa59e9SXin LI 
2109d1fa59e9SXin LI 	/*
2110d1fa59e9SXin LI 	 * To modify the permissions on a file, must possess VADMIN
2111d1fa59e9SXin LI 	 * for that file.
2112d1fa59e9SXin LI 	 */
21130f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2114d1fa59e9SXin LI 		return (error);
2115d1fa59e9SXin LI 
2116d1fa59e9SXin LI 	/*
2117d1fa59e9SXin LI 	 * Privileged processes may set the sticky bit on non-directories,
2118d1fa59e9SXin LI 	 * as well as set the setgid bit on a file with a group that the
2119d1fa59e9SXin LI 	 * process is not a member of.
2120d1fa59e9SXin LI 	 */
2121d1fa59e9SXin LI 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
2122cc426dd3SMateusz Guzik 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
2123d1fa59e9SXin LI 			return (EFTYPE);
2124d1fa59e9SXin LI 	}
2125d1fa59e9SXin LI 	if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) {
2126cc426dd3SMateusz Guzik 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
2127d1fa59e9SXin LI 		if (error)
2128d1fa59e9SXin LI 			return (error);
2129d1fa59e9SXin LI 	}
2130d1fa59e9SXin LI 
2131172ffe70SMateusz Guzik 	newmode = node->tn_mode & ~ALLPERMS;
2132172ffe70SMateusz Guzik 	newmode |= mode & ALLPERMS;
2133172ffe70SMateusz Guzik 	atomic_store_short(&node->tn_mode, newmode);
2134d1fa59e9SXin LI 
2135d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2136d1fa59e9SXin LI 
2137305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chmod2");
2138d1fa59e9SXin LI 
2139305b4229SKonstantin Belousov 	return (0);
2140d1fa59e9SXin LI }
2141d1fa59e9SXin LI 
2142d1fa59e9SXin LI /*
2143d1fa59e9SXin LI  * Change ownership of the given vnode.  At least one of uid or gid must
2144d1fa59e9SXin LI  * be different than VNOVAL.  If one is set to that value, the attribute
2145d1fa59e9SXin LI  * is unchanged.
2146d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2147d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2148d1fa59e9SXin LI  */
2149d1fa59e9SXin LI int
tmpfs_chown(struct vnode * vp,uid_t uid,gid_t gid,struct ucred * cred,struct thread * td)2150d1fa59e9SXin LI tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
21510f01fb01SKonstantin Belousov     struct thread *td)
2152d1fa59e9SXin LI {
2153d1fa59e9SXin LI 	int error;
2154d1fa59e9SXin LI 	struct tmpfs_node *node;
2155d1fa59e9SXin LI 	uid_t ouid;
2156d1fa59e9SXin LI 	gid_t ogid;
2157172ffe70SMateusz Guzik 	mode_t newmode;
2158d1fa59e9SXin LI 
2159305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chown");
2160172ffe70SMateusz Guzik 	ASSERT_VOP_IN_SEQC(vp);
2161d1fa59e9SXin LI 
2162d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2163d1fa59e9SXin LI 
2164d1fa59e9SXin LI 	/* Assign default values if they are unknown. */
2165d1fa59e9SXin LI 	MPASS(uid != VNOVAL || gid != VNOVAL);
2166d1fa59e9SXin LI 	if (uid == VNOVAL)
2167d1fa59e9SXin LI 		uid = node->tn_uid;
2168d1fa59e9SXin LI 	if (gid == VNOVAL)
2169d1fa59e9SXin LI 		gid = node->tn_gid;
2170d1fa59e9SXin LI 	MPASS(uid != VNOVAL && gid != VNOVAL);
2171d1fa59e9SXin LI 
2172d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2173d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
217423f90714SKonstantin Belousov 		return (EROFS);
2175d1fa59e9SXin LI 
2176d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2177d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
217823f90714SKonstantin Belousov 		return (EPERM);
2179d1fa59e9SXin LI 
2180d1fa59e9SXin LI 	/*
2181d1fa59e9SXin LI 	 * To modify the ownership of a file, must possess VADMIN for that
2182d1fa59e9SXin LI 	 * file.
2183d1fa59e9SXin LI 	 */
21840f01fb01SKonstantin Belousov 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
2185d1fa59e9SXin LI 		return (error);
2186d1fa59e9SXin LI 
2187d1fa59e9SXin LI 	/*
2188d1fa59e9SXin LI 	 * To change the owner of a file, or change the group of a file to a
2189d1fa59e9SXin LI 	 * group of which we are not a member, the caller must have
2190d1fa59e9SXin LI 	 * privilege.
2191d1fa59e9SXin LI 	 */
2192d1fa59e9SXin LI 	if ((uid != node->tn_uid ||
2193d1fa59e9SXin LI 	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
2194cc426dd3SMateusz Guzik 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
2195d1fa59e9SXin LI 		return (error);
2196d1fa59e9SXin LI 
2197d1fa59e9SXin LI 	ogid = node->tn_gid;
2198d1fa59e9SXin LI 	ouid = node->tn_uid;
2199d1fa59e9SXin LI 
2200d1fa59e9SXin LI 	node->tn_uid = uid;
2201d1fa59e9SXin LI 	node->tn_gid = gid;
2202d1fa59e9SXin LI 
2203d1fa59e9SXin LI 	node->tn_status |= TMPFS_NODE_CHANGED;
2204d1fa59e9SXin LI 
22050f01fb01SKonstantin Belousov 	if ((node->tn_mode & (S_ISUID | S_ISGID)) != 0 &&
22060f01fb01SKonstantin Belousov 	    (ouid != uid || ogid != gid)) {
2207172ffe70SMateusz Guzik 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
2208172ffe70SMateusz Guzik 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
2209172ffe70SMateusz Guzik 			atomic_store_short(&node->tn_mode, newmode);
2210172ffe70SMateusz Guzik 		}
2211d1fa59e9SXin LI 	}
2212d1fa59e9SXin LI 
2213305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chown2");
2214d1fa59e9SXin LI 
2215305b4229SKonstantin Belousov 	return (0);
2216d1fa59e9SXin LI }
2217d1fa59e9SXin LI 
2218d1fa59e9SXin LI /*
2219d1fa59e9SXin LI  * Change size of the given vnode.
2220d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2221d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2222d1fa59e9SXin LI  */
2223d1fa59e9SXin LI int
tmpfs_chsize(struct vnode * vp,u_quad_t size,struct ucred * cred,struct thread * td)2224d1fa59e9SXin LI tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
22250f01fb01SKonstantin Belousov     struct thread *td)
2226d1fa59e9SXin LI {
2227d1fa59e9SXin LI 	int error;
2228d1fa59e9SXin LI 	struct tmpfs_node *node;
2229d1fa59e9SXin LI 
2230305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chsize");
2231d1fa59e9SXin LI 
2232d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2233d1fa59e9SXin LI 
2234d1fa59e9SXin LI 	/* Decide whether this is a valid operation based on the file type. */
2235d1fa59e9SXin LI 	error = 0;
2236d1fa59e9SXin LI 	switch (vp->v_type) {
2237d1fa59e9SXin LI 	case VDIR:
223823f90714SKonstantin Belousov 		return (EISDIR);
2239d1fa59e9SXin LI 
2240d1fa59e9SXin LI 	case VREG:
2241d1fa59e9SXin LI 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
224223f90714SKonstantin Belousov 			return (EROFS);
2243d1fa59e9SXin LI 		break;
2244d1fa59e9SXin LI 
2245d1fa59e9SXin LI 	case VBLK:
2246d1fa59e9SXin LI 		/* FALLTHROUGH */
2247d1fa59e9SXin LI 	case VCHR:
2248d1fa59e9SXin LI 		/* FALLTHROUGH */
2249d1fa59e9SXin LI 	case VFIFO:
225023f90714SKonstantin Belousov 		/*
225123f90714SKonstantin Belousov 		 * Allow modifications of special files even if in the file
2252d1fa59e9SXin LI 		 * system is mounted read-only (we are not modifying the
225323f90714SKonstantin Belousov 		 * files themselves, but the objects they represent).
225423f90714SKonstantin Belousov 		 */
225523f90714SKonstantin Belousov 		return (0);
2256d1fa59e9SXin LI 
2257d1fa59e9SXin LI 	default:
2258d1fa59e9SXin LI 		/* Anything else is unsupported. */
225923f90714SKonstantin Belousov 		return (EOPNOTSUPP);
2260d1fa59e9SXin LI 	}
2261d1fa59e9SXin LI 
2262d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2263d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
226423f90714SKonstantin Belousov 		return (EPERM);
2265d1fa59e9SXin LI 
2266b5b16659SKonstantin Belousov 	error = vn_rlimit_trunc(size, td);
2267b5b16659SKonstantin Belousov 	if (error != 0)
2268b5b16659SKonstantin Belousov 		return (error);
2269b5b16659SKonstantin Belousov 
2270d1fa59e9SXin LI 	error = tmpfs_truncate(vp, size);
227123f90714SKonstantin Belousov 	/*
227223f90714SKonstantin Belousov 	 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
227323f90714SKonstantin Belousov 	 * for us, as will update tn_status; no need to do that here.
227423f90714SKonstantin Belousov 	 */
2275d1fa59e9SXin LI 
2276305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chsize2");
2277d1fa59e9SXin LI 
2278305b4229SKonstantin Belousov 	return (error);
2279d1fa59e9SXin LI }
2280d1fa59e9SXin LI 
2281d1fa59e9SXin LI /*
2282d1fa59e9SXin LI  * Change access and modification times of the given vnode.
2283d1fa59e9SXin LI  * Caller should execute tmpfs_update on vp after a successful execution.
2284d1fa59e9SXin LI  * The vnode must be locked on entry and remain locked on exit.
2285d1fa59e9SXin LI  */
2286d1fa59e9SXin LI int
tmpfs_chtimes(struct vnode * vp,struct vattr * vap,struct ucred * cred,struct thread * td)22877b81a399SKonstantin Belousov tmpfs_chtimes(struct vnode *vp, struct vattr *vap,
22880f01fb01SKonstantin Belousov     struct ucred *cred, struct thread *td)
2289d1fa59e9SXin LI {
2290d1fa59e9SXin LI 	int error;
2291d1fa59e9SXin LI 	struct tmpfs_node *node;
2292d1fa59e9SXin LI 
2293305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chtimes");
2294d1fa59e9SXin LI 
2295d1fa59e9SXin LI 	node = VP_TO_TMPFS_NODE(vp);
2296d1fa59e9SXin LI 
2297d1fa59e9SXin LI 	/* Disallow this operation if the file system is mounted read-only. */
2298d1fa59e9SXin LI 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
229923f90714SKonstantin Belousov 		return (EROFS);
2300d1fa59e9SXin LI 
2301d1fa59e9SXin LI 	/* Immutable or append-only files cannot be modified, either. */
2302d1fa59e9SXin LI 	if (node->tn_flags & (IMMUTABLE | APPEND))
230323f90714SKonstantin Belousov 		return (EPERM);
2304d1fa59e9SXin LI 
23050f01fb01SKonstantin Belousov 	error = vn_utimes_perm(vp, vap, cred, td);
23067b81a399SKonstantin Belousov 	if (error != 0)
23079b258fcaSXin LI 		return (error);
2308d1fa59e9SXin LI 
2309382353e2SChristian Brueffer 	if (vap->va_atime.tv_sec != VNOVAL)
2310016b7c7eSKonstantin Belousov 		node->tn_accessed = true;
2311382353e2SChristian Brueffer 	if (vap->va_mtime.tv_sec != VNOVAL)
2312d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_MODIFIED;
2313382353e2SChristian Brueffer 	if (vap->va_birthtime.tv_sec != VNOVAL)
2314d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_MODIFIED;
23157b81a399SKonstantin Belousov 	tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime);
2316382353e2SChristian Brueffer 	if (vap->va_birthtime.tv_sec != VNOVAL)
23177b81a399SKonstantin Belousov 		node->tn_birthtime = vap->va_birthtime;
2318305b4229SKonstantin Belousov 	ASSERT_VOP_ELOCKED(vp, "chtimes2");
2319d1fa59e9SXin LI 
2320305b4229SKonstantin Belousov 	return (0);
2321d1fa59e9SXin LI }
2322d1fa59e9SXin LI 
2323d1fa59e9SXin LI void
tmpfs_set_status(struct tmpfs_mount * tm,struct tmpfs_node * node,int status)2324e1cdc30fSKonstantin Belousov tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status)
23255dc11286SKonstantin Belousov {
23265dc11286SKonstantin Belousov 
2327e1cdc30fSKonstantin Belousov 	if ((node->tn_status & status) == status || tm->tm_ronly)
23285dc11286SKonstantin Belousov 		return;
23295dc11286SKonstantin Belousov 	TMPFS_NODE_LOCK(node);
23305dc11286SKonstantin Belousov 	node->tn_status |= status;
23315dc11286SKonstantin Belousov 	TMPFS_NODE_UNLOCK(node);
23325dc11286SKonstantin Belousov }
23335dc11286SKonstantin Belousov 
2334016b7c7eSKonstantin Belousov void
tmpfs_set_accessed(struct tmpfs_mount * tm,struct tmpfs_node * node)2335016b7c7eSKonstantin Belousov tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node)
2336016b7c7eSKonstantin Belousov {
2337016b7c7eSKonstantin Belousov 	if (node->tn_accessed || tm->tm_ronly)
2338016b7c7eSKonstantin Belousov 		return;
2339016b7c7eSKonstantin Belousov 	atomic_store_8(&node->tn_accessed, true);
2340016b7c7eSKonstantin Belousov }
2341016b7c7eSKonstantin Belousov 
23425dc11286SKonstantin Belousov /* Sync timestamps */
23433b622fc8SMateusz Guzik void
tmpfs_itimes(struct vnode * vp,const struct timespec * acc,const struct timespec * mod)23443b622fc8SMateusz Guzik tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
2345d1fa59e9SXin LI     const struct timespec *mod)
2346d1fa59e9SXin LI {
23473b622fc8SMateusz Guzik 	struct tmpfs_node *node;
2348d1fa59e9SXin LI 	struct timespec now;
2349d1fa59e9SXin LI 
23503b622fc8SMateusz Guzik 	ASSERT_VOP_LOCKED(vp, "tmpfs_itimes");
23513b622fc8SMateusz Guzik 	node = VP_TO_TMPFS_NODE(vp);
2352d1fa59e9SXin LI 
2353016b7c7eSKonstantin Belousov 	if (!node->tn_accessed &&
2354016b7c7eSKonstantin Belousov 	    (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0)
2355d1fa59e9SXin LI 		return;
2356d1fa59e9SXin LI 
2357b746bf08SXin LI 	vfs_timestamp(&now);
23583b622fc8SMateusz Guzik 	TMPFS_NODE_LOCK(node);
2359016b7c7eSKonstantin Belousov 	if (node->tn_accessed) {
2360d1fa59e9SXin LI 		if (acc == NULL)
2361d1fa59e9SXin LI 			 acc = &now;
2362d1fa59e9SXin LI 		node->tn_atime = *acc;
2363d1fa59e9SXin LI 	}
2364d1fa59e9SXin LI 	if (node->tn_status & TMPFS_NODE_MODIFIED) {
2365d1fa59e9SXin LI 		if (mod == NULL)
2366d1fa59e9SXin LI 			mod = &now;
2367d1fa59e9SXin LI 		node->tn_mtime = *mod;
2368d1fa59e9SXin LI 	}
23695dc11286SKonstantin Belousov 	if (node->tn_status & TMPFS_NODE_CHANGED)
2370d1fa59e9SXin LI 		node->tn_ctime = now;
2371016b7c7eSKonstantin Belousov 	node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
2372016b7c7eSKonstantin Belousov 	node->tn_accessed = false;
23735dc11286SKonstantin Belousov 	TMPFS_NODE_UNLOCK(node);
23745dc11286SKonstantin Belousov 
2375d1b06863SMark Murray 	/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
237619fa89e9SMark Murray 	random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME);
2377d1fa59e9SXin LI }
2378d1fa59e9SXin LI 
2379d1fa59e9SXin LI int
tmpfs_truncate(struct vnode * vp,off_t length)2380d1fa59e9SXin LI tmpfs_truncate(struct vnode *vp, off_t length)
2381d1fa59e9SXin LI {
2382d1fa59e9SXin LI 	struct tmpfs_node *node;
2383860399ebSKonstantin Belousov 	int error;
2384d1fa59e9SXin LI 
2385860399ebSKonstantin Belousov 	if (length < 0)
2386860399ebSKonstantin Belousov 		return (EINVAL);
2387d1fa59e9SXin LI 	if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
2388d1fa59e9SXin LI 		return (EFBIG);
2389d1fa59e9SXin LI 
2390860399ebSKonstantin Belousov 	node = VP_TO_TMPFS_NODE(vp);
2391860399ebSKonstantin Belousov 	error = node->tn_size == length ? 0 : tmpfs_reg_resize(vp, length,
2392860399ebSKonstantin Belousov 	    FALSE);
23935dc11286SKonstantin Belousov 	if (error == 0)
2394d1fa59e9SXin LI 		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
2395d1fa59e9SXin LI 	tmpfs_update(vp);
2396d1fa59e9SXin LI 
23975dc11286SKonstantin Belousov 	return (error);
2398d1fa59e9SXin LI }
23994fd5efe7SGleb Kurtsou 
24004fd5efe7SGleb Kurtsou static __inline int
tmpfs_dirtree_cmp(struct tmpfs_dirent * a,struct tmpfs_dirent * b)24014fd5efe7SGleb Kurtsou tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
24024fd5efe7SGleb Kurtsou {
24034fd5efe7SGleb Kurtsou 	if (a->td_hash > b->td_hash)
24044fd5efe7SGleb Kurtsou 		return (1);
24054fd5efe7SGleb Kurtsou 	else if (a->td_hash < b->td_hash)
24064fd5efe7SGleb Kurtsou 		return (-1);
24074fd5efe7SGleb Kurtsou 	return (0);
24084fd5efe7SGleb Kurtsou }
24094fd5efe7SGleb Kurtsou 
24104fd5efe7SGleb Kurtsou RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
2411