1d1fa59e9SXin LI /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2d1fa59e9SXin LI 3e08d5567SXin LI /*- 4d63027b6SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5d63027b6SPedro F. Giffuni * 6d1fa59e9SXin LI * Copyright (c) 2005 The NetBSD Foundation, Inc. 7d1fa59e9SXin LI * All rights reserved. 8d1fa59e9SXin LI * 9d1fa59e9SXin LI * This code is derived from software contributed to The NetBSD Foundation 10d1fa59e9SXin LI * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11d1fa59e9SXin LI * 2005 program. 12d1fa59e9SXin LI * 13d1fa59e9SXin LI * Redistribution and use in source and binary forms, with or without 14d1fa59e9SXin LI * modification, are permitted provided that the following conditions 15d1fa59e9SXin LI * are met: 16d1fa59e9SXin LI * 1. Redistributions of source code must retain the above copyright 17d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer. 18d1fa59e9SXin LI * 2. Redistributions in binary form must reproduce the above copyright 19d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer in the 20d1fa59e9SXin LI * documentation and/or other materials provided with the distribution. 21d1fa59e9SXin LI * 22d1fa59e9SXin LI * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23d1fa59e9SXin LI * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24d1fa59e9SXin LI * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25d1fa59e9SXin LI * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26d1fa59e9SXin LI * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27d1fa59e9SXin LI * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28d1fa59e9SXin LI * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29d1fa59e9SXin LI * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30d1fa59e9SXin LI * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31d1fa59e9SXin LI * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32d1fa59e9SXin LI * POSSIBILITY OF SUCH DAMAGE. 33d1fa59e9SXin LI */ 34d1fa59e9SXin LI 35d1fa59e9SXin LI /* 36d1fa59e9SXin LI * Efficient memory file system. 37d1fa59e9SXin LI * 383544b0f6SKonstantin Belousov * tmpfs is a file system that uses FreeBSD's virtual memory 393544b0f6SKonstantin Belousov * sub-system to store file data and metadata in an efficient way. 403544b0f6SKonstantin Belousov * This means that it does not follow the structure of an on-disk file 413544b0f6SKonstantin Belousov * system because it simply does not need to. Instead, it uses 42d1fa59e9SXin LI * memory-specific data structures and algorithms to automatically 43d1fa59e9SXin LI * allocate and release resources. 44d1fa59e9SXin LI */ 45b4b3e349SAllan Jude 46b4b3e349SAllan Jude #include "opt_tmpfs.h" 47b4b3e349SAllan Jude 48d1fa59e9SXin LI #include <sys/cdefs.h> 49d1fa59e9SXin LI __FBSDID("$FreeBSD$"); 50d1fa59e9SXin LI 51d1fa59e9SXin LI #include <sys/param.h> 526d2e2df7SMark Johnston #include <sys/systm.h> 53135beaf6SGleb Smirnoff #include <sys/dirent.h> 541df86a32SXin LI #include <sys/limits.h> 55d1fa59e9SXin LI #include <sys/lock.h> 56135beaf6SGleb Smirnoff #include <sys/mount.h> 57d1fa59e9SXin LI #include <sys/mutex.h> 582454886eSXin LI #include <sys/proc.h> 592454886eSXin LI #include <sys/jail.h> 60d1fa59e9SXin LI #include <sys/kernel.h> 61f40cb1c6SKonstantin Belousov #include <sys/rwlock.h> 62d1fa59e9SXin LI #include <sys/stat.h> 635c4ce6faSKonstantin Belousov #include <sys/sx.h> 64d1fa59e9SXin LI #include <sys/sysctl.h> 65135beaf6SGleb Smirnoff #include <sys/vnode.h> 66d1fa59e9SXin LI 67d1fa59e9SXin LI #include <vm/vm.h> 685c4ce6faSKonstantin Belousov #include <vm/vm_param.h> 695c4ce6faSKonstantin Belousov #include <vm/pmap.h> 705c4ce6faSKonstantin Belousov #include <vm/vm_extern.h> 715c4ce6faSKonstantin Belousov #include <vm/vm_map.h> 72d1fa59e9SXin LI #include <vm/vm_object.h> 73d1fa59e9SXin LI #include <vm/vm_param.h> 74d1fa59e9SXin LI 75d1fa59e9SXin LI #include <fs/tmpfs/tmpfs.h> 76d1fa59e9SXin LI 77d1fa59e9SXin LI /* 78d1fa59e9SXin LI * Default permission for root node 79d1fa59e9SXin LI */ 80d1fa59e9SXin LI #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 81d1fa59e9SXin LI 82d1fa59e9SXin LI MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 839b258fcaSXin LI MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names"); 84d1fa59e9SXin LI 85dfd233edSAttilio Rao static int tmpfs_mount(struct mount *); 86dfd233edSAttilio Rao static int tmpfs_unmount(struct mount *, int); 87dfd233edSAttilio Rao static int tmpfs_root(struct mount *, int flags, struct vnode **); 88694a586aSRick Macklem static int tmpfs_fhtovp(struct mount *, struct fid *, int, 89694a586aSRick Macklem struct vnode **); 90dfd233edSAttilio Rao static int tmpfs_statfs(struct mount *, struct statfs *); 91d1fa59e9SXin LI 92d1fa59e9SXin LI static const char *tmpfs_opts[] = { 93dec3772eSJaakko Heinonen "from", "size", "maxfilesize", "inodes", "uid", "gid", "mode", "export", 94c1e84733SKonstantin Belousov "union", "nonc", "nomtime", NULL 95d1fa59e9SXin LI }; 96d1fa59e9SXin LI 97c5ab5ce3SJaakko Heinonen static const char *tmpfs_updateopts[] = { 98c1e84733SKonstantin Belousov "from", "export", "nomtime", "size", NULL 99c5ab5ce3SJaakko Heinonen }; 100c5ab5ce3SJaakko Heinonen 1015c4ce6faSKonstantin Belousov /* 102c1e84733SKonstantin Belousov * Handle updates of time from writes to mmaped regions, if allowed. 103c1e84733SKonstantin Belousov * Use MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_LAZY, since 1045c4ce6faSKonstantin Belousov * unmap of the tmpfs-backed vnode does not call vinactive(), due to 105c1e84733SKonstantin Belousov * vm object type is OBJT_SWAP. If lazy, only handle delayed update 106c1e84733SKonstantin Belousov * of mtime due to the writes to mapped files. 1075c4ce6faSKonstantin Belousov */ 1085c4ce6faSKonstantin Belousov static void 1095c4ce6faSKonstantin Belousov tmpfs_update_mtime(struct mount *mp, bool lazy) 1105c4ce6faSKonstantin Belousov { 1115c4ce6faSKonstantin Belousov struct vnode *vp, *mvp; 1125c4ce6faSKonstantin Belousov struct vm_object *obj; 1135c4ce6faSKonstantin Belousov 114c1e84733SKonstantin Belousov if (VFS_TO_TMPFS(mp)->tm_nomtime) 115c1e84733SKonstantin Belousov return; 1165c4ce6faSKonstantin Belousov MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1175c4ce6faSKonstantin Belousov if (vp->v_type != VREG) { 1185c4ce6faSKonstantin Belousov VI_UNLOCK(vp); 1195c4ce6faSKonstantin Belousov continue; 1205c4ce6faSKonstantin Belousov } 1215c4ce6faSKonstantin Belousov obj = vp->v_object; 1225c4ce6faSKonstantin Belousov KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == 1235c4ce6faSKonstantin Belousov (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); 1245c4ce6faSKonstantin Belousov 1255c4ce6faSKonstantin Belousov /* 1265c4ce6faSKonstantin Belousov * In lazy case, do unlocked read, avoid taking vnode 1275c4ce6faSKonstantin Belousov * lock if not needed. Lost update will be handled on 1285c4ce6faSKonstantin Belousov * the next call. 1295c4ce6faSKonstantin Belousov * For non-lazy case, we must flush all pending 1305c4ce6faSKonstantin Belousov * metadata changes now. 1315c4ce6faSKonstantin Belousov */ 13267d0e293SJeff Roberson if (!lazy || obj->generation != obj->cleangeneration) { 13328ce2bc1SKonstantin Belousov if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, 1345c4ce6faSKonstantin Belousov curthread) != 0) 1355c4ce6faSKonstantin Belousov continue; 1365c4ce6faSKonstantin Belousov tmpfs_check_mtime(vp); 1375c4ce6faSKonstantin Belousov if (!lazy) 1385c4ce6faSKonstantin Belousov tmpfs_update(vp); 1395c4ce6faSKonstantin Belousov vput(vp); 1405c4ce6faSKonstantin Belousov } else { 1415c4ce6faSKonstantin Belousov VI_UNLOCK(vp); 1425c4ce6faSKonstantin Belousov continue; 1435c4ce6faSKonstantin Belousov } 1445c4ce6faSKonstantin Belousov } 1455c4ce6faSKonstantin Belousov } 1465c4ce6faSKonstantin Belousov 1475c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg { 1485c4ce6faSKonstantin Belousov bool found; 1495c4ce6faSKonstantin Belousov }; 1505c4ce6faSKonstantin Belousov 1515c4ce6faSKonstantin Belousov static bool 1525c4ce6faSKonstantin Belousov tmpfs_check_rw_maps_cb(struct mount *mp __unused, vm_map_t map __unused, 1535c4ce6faSKonstantin Belousov vm_map_entry_t entry __unused, void *arg) 1545c4ce6faSKonstantin Belousov { 1555c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg *a; 1565c4ce6faSKonstantin Belousov 1575c4ce6faSKonstantin Belousov a = arg; 1585c4ce6faSKonstantin Belousov a->found = true; 1595c4ce6faSKonstantin Belousov return (true); 1605c4ce6faSKonstantin Belousov } 1615c4ce6faSKonstantin Belousov 1625c4ce6faSKonstantin Belousov /* 1635c4ce6faSKonstantin Belousov * Revoke write permissions from all mappings of regular files 1645c4ce6faSKonstantin Belousov * belonging to the specified tmpfs mount. 1655c4ce6faSKonstantin Belousov */ 1665c4ce6faSKonstantin Belousov static bool 1675c4ce6faSKonstantin Belousov tmpfs_revoke_rw_maps_cb(struct mount *mp __unused, vm_map_t map, 1685c4ce6faSKonstantin Belousov vm_map_entry_t entry, void *arg __unused) 1695c4ce6faSKonstantin Belousov { 1705c4ce6faSKonstantin Belousov 1715c4ce6faSKonstantin Belousov /* 1725c4ce6faSKonstantin Belousov * XXXKIB: might be invalidate the mapping 1735c4ce6faSKonstantin Belousov * instead ? The process is not going to be 1745c4ce6faSKonstantin Belousov * happy in any case. 1755c4ce6faSKonstantin Belousov */ 1765c4ce6faSKonstantin Belousov entry->max_protection &= ~VM_PROT_WRITE; 1775c4ce6faSKonstantin Belousov if ((entry->protection & VM_PROT_WRITE) != 0) { 1785c4ce6faSKonstantin Belousov entry->protection &= ~VM_PROT_WRITE; 1795c4ce6faSKonstantin Belousov pmap_protect(map->pmap, entry->start, entry->end, 1805c4ce6faSKonstantin Belousov entry->protection); 1815c4ce6faSKonstantin Belousov } 1825c4ce6faSKonstantin Belousov return (false); 1835c4ce6faSKonstantin Belousov } 1845c4ce6faSKonstantin Belousov 1855c4ce6faSKonstantin Belousov static void 1865c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(struct mount *mp, bool (*cb)(struct mount *mp, vm_map_t, 1875c4ce6faSKonstantin Belousov vm_map_entry_t, void *), void *cb_arg) 1885c4ce6faSKonstantin Belousov { 1895c4ce6faSKonstantin Belousov struct proc *p; 1905c4ce6faSKonstantin Belousov struct vmspace *vm; 1915c4ce6faSKonstantin Belousov vm_map_t map; 1925c4ce6faSKonstantin Belousov vm_map_entry_t entry; 1935c4ce6faSKonstantin Belousov vm_object_t object; 1945c4ce6faSKonstantin Belousov struct vnode *vp; 1955c4ce6faSKonstantin Belousov int gen; 1965c4ce6faSKonstantin Belousov bool terminate; 1975c4ce6faSKonstantin Belousov 1985c4ce6faSKonstantin Belousov terminate = false; 1995c4ce6faSKonstantin Belousov sx_slock(&allproc_lock); 2005c4ce6faSKonstantin Belousov again: 2015c4ce6faSKonstantin Belousov gen = allproc_gen; 2025c4ce6faSKonstantin Belousov FOREACH_PROC_IN_SYSTEM(p) { 2035c4ce6faSKonstantin Belousov PROC_LOCK(p); 2045c4ce6faSKonstantin Belousov if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | 2055c4ce6faSKonstantin Belousov P_SYSTEM | P_WEXIT)) != 0) { 2065c4ce6faSKonstantin Belousov PROC_UNLOCK(p); 2075c4ce6faSKonstantin Belousov continue; 2085c4ce6faSKonstantin Belousov } 2095c4ce6faSKonstantin Belousov vm = vmspace_acquire_ref(p); 2105c4ce6faSKonstantin Belousov _PHOLD_LITE(p); 2115c4ce6faSKonstantin Belousov PROC_UNLOCK(p); 2125c4ce6faSKonstantin Belousov if (vm == NULL) { 2135c4ce6faSKonstantin Belousov PRELE(p); 2145c4ce6faSKonstantin Belousov continue; 2155c4ce6faSKonstantin Belousov } 2165c4ce6faSKonstantin Belousov sx_sunlock(&allproc_lock); 2175c4ce6faSKonstantin Belousov map = &vm->vm_map; 2185c4ce6faSKonstantin Belousov 2195c4ce6faSKonstantin Belousov vm_map_lock(map); 2205c4ce6faSKonstantin Belousov if (map->busy) 2215c4ce6faSKonstantin Belousov vm_map_wait_busy(map); 2222288078cSDoug Moore VM_MAP_ENTRY_FOREACH(entry, map) { 2235c4ce6faSKonstantin Belousov if ((entry->eflags & (MAP_ENTRY_GUARD | 2245c4ce6faSKonstantin Belousov MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_COW)) != 0 || 2255c4ce6faSKonstantin Belousov (entry->max_protection & VM_PROT_WRITE) == 0) 2265c4ce6faSKonstantin Belousov continue; 2275c4ce6faSKonstantin Belousov object = entry->object.vm_object; 2285c4ce6faSKonstantin Belousov if (object == NULL || object->type != OBJT_SWAP || 2295c4ce6faSKonstantin Belousov (object->flags & OBJ_TMPFS_NODE) == 0) 2305c4ce6faSKonstantin Belousov continue; 2315c4ce6faSKonstantin Belousov /* 2325c4ce6faSKonstantin Belousov * No need to dig into shadow chain, mapping 2335c4ce6faSKonstantin Belousov * of the object not at top is readonly. 2345c4ce6faSKonstantin Belousov */ 2355c4ce6faSKonstantin Belousov 2365c4ce6faSKonstantin Belousov VM_OBJECT_RLOCK(object); 2375c4ce6faSKonstantin Belousov if (object->type == OBJT_DEAD) { 2385c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2395c4ce6faSKonstantin Belousov continue; 2405c4ce6faSKonstantin Belousov } 2415c4ce6faSKonstantin Belousov MPASS(object->ref_count > 1); 2425c4ce6faSKonstantin Belousov if ((object->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) != 2435c4ce6faSKonstantin Belousov (OBJ_TMPFS_NODE | OBJ_TMPFS)) { 2445c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2455c4ce6faSKonstantin Belousov continue; 2465c4ce6faSKonstantin Belousov } 2475c4ce6faSKonstantin Belousov vp = object->un_pager.swp.swp_tmpfs; 2485c4ce6faSKonstantin Belousov if (vp->v_mount != mp) { 2495c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2505c4ce6faSKonstantin Belousov continue; 2515c4ce6faSKonstantin Belousov } 2525c4ce6faSKonstantin Belousov 2535c4ce6faSKonstantin Belousov terminate = cb(mp, map, entry, cb_arg); 2545c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2555c4ce6faSKonstantin Belousov if (terminate) 2565c4ce6faSKonstantin Belousov break; 2575c4ce6faSKonstantin Belousov } 2585c4ce6faSKonstantin Belousov vm_map_unlock(map); 2595c4ce6faSKonstantin Belousov 2605c4ce6faSKonstantin Belousov vmspace_free(vm); 2615c4ce6faSKonstantin Belousov sx_slock(&allproc_lock); 2625c4ce6faSKonstantin Belousov PRELE(p); 2635c4ce6faSKonstantin Belousov if (terminate) 2645c4ce6faSKonstantin Belousov break; 2655c4ce6faSKonstantin Belousov } 2665c4ce6faSKonstantin Belousov if (!terminate && gen != allproc_gen) 2675c4ce6faSKonstantin Belousov goto again; 2685c4ce6faSKonstantin Belousov sx_sunlock(&allproc_lock); 2695c4ce6faSKonstantin Belousov } 2705c4ce6faSKonstantin Belousov 2715c4ce6faSKonstantin Belousov static bool 2725c4ce6faSKonstantin Belousov tmpfs_check_rw_maps(struct mount *mp) 2735c4ce6faSKonstantin Belousov { 2745c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg ca; 2755c4ce6faSKonstantin Belousov 2765c4ce6faSKonstantin Belousov ca.found = false; 2775c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(mp, tmpfs_check_rw_maps_cb, &ca); 2785c4ce6faSKonstantin Belousov return (ca.found); 2795c4ce6faSKonstantin Belousov } 2805c4ce6faSKonstantin Belousov 2815c4ce6faSKonstantin Belousov static int 2825c4ce6faSKonstantin Belousov tmpfs_rw_to_ro(struct mount *mp) 2835c4ce6faSKonstantin Belousov { 2845c4ce6faSKonstantin Belousov int error, flags; 2855c4ce6faSKonstantin Belousov bool forced; 2865c4ce6faSKonstantin Belousov 2875c4ce6faSKonstantin Belousov forced = (mp->mnt_flag & MNT_FORCE) != 0; 2885c4ce6faSKonstantin Belousov flags = WRITECLOSE | (forced ? FORCECLOSE : 0); 2895c4ce6faSKonstantin Belousov 2905c4ce6faSKonstantin Belousov if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) 2915c4ce6faSKonstantin Belousov return (error); 2925c4ce6faSKonstantin Belousov error = vfs_write_suspend_umnt(mp); 2935c4ce6faSKonstantin Belousov if (error != 0) 2945c4ce6faSKonstantin Belousov return (error); 2955c4ce6faSKonstantin Belousov if (!forced && tmpfs_check_rw_maps(mp)) { 2965c4ce6faSKonstantin Belousov error = EBUSY; 2975c4ce6faSKonstantin Belousov goto out; 2985c4ce6faSKonstantin Belousov } 2995c4ce6faSKonstantin Belousov VFS_TO_TMPFS(mp)->tm_ronly = 1; 3005c4ce6faSKonstantin Belousov MNT_ILOCK(mp); 3015c4ce6faSKonstantin Belousov mp->mnt_flag |= MNT_RDONLY; 3025c4ce6faSKonstantin Belousov MNT_IUNLOCK(mp); 3035c4ce6faSKonstantin Belousov for (;;) { 3045c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(mp, tmpfs_revoke_rw_maps_cb, NULL); 3055c4ce6faSKonstantin Belousov tmpfs_update_mtime(mp, false); 3065c4ce6faSKonstantin Belousov error = vflush(mp, 0, flags, curthread); 3075c4ce6faSKonstantin Belousov if (error != 0) { 3085c4ce6faSKonstantin Belousov VFS_TO_TMPFS(mp)->tm_ronly = 0; 3095c4ce6faSKonstantin Belousov MNT_ILOCK(mp); 3105c4ce6faSKonstantin Belousov mp->mnt_flag &= ~MNT_RDONLY; 3115c4ce6faSKonstantin Belousov MNT_IUNLOCK(mp); 3125c4ce6faSKonstantin Belousov goto out; 3135c4ce6faSKonstantin Belousov } 3145c4ce6faSKonstantin Belousov if (!tmpfs_check_rw_maps(mp)) 3155c4ce6faSKonstantin Belousov break; 3165c4ce6faSKonstantin Belousov } 3175c4ce6faSKonstantin Belousov out: 3185c4ce6faSKonstantin Belousov vfs_write_resume(mp, 0); 3195c4ce6faSKonstantin Belousov return (error); 3205c4ce6faSKonstantin Belousov } 3215c4ce6faSKonstantin Belousov 322d1fa59e9SXin LI static int 323dfd233edSAttilio Rao tmpfs_mount(struct mount *mp) 324d1fa59e9SXin LI { 3259295c628SGleb Kurtsou const size_t nodes_per_page = howmany(PAGE_SIZE, 3269295c628SGleb Kurtsou sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node)); 327d1fa59e9SXin LI struct tmpfs_mount *tmp; 328d1fa59e9SXin LI struct tmpfs_node *root; 3295c4ce6faSKonstantin Belousov int error; 330c1e84733SKonstantin Belousov bool nomtime, nonc; 3311df86a32SXin LI /* Size counters. */ 3320ff93c48SGleb Kurtsou u_quad_t pages; 3330ff93c48SGleb Kurtsou off_t nodes_max, size_max, maxfilesize; 3341df86a32SXin LI 3351df86a32SXin LI /* Root node attributes. */ 3361df86a32SXin LI uid_t root_uid; 3371df86a32SXin LI gid_t root_gid; 3381df86a32SXin LI mode_t root_mode; 3391df86a32SXin LI 3401df86a32SXin LI struct vattr va; 341d1fa59e9SXin LI 342d1fa59e9SXin LI if (vfs_filteropt(mp->mnt_optnew, tmpfs_opts)) 343d1fa59e9SXin LI return (EINVAL); 344d1fa59e9SXin LI 345d1fa59e9SXin LI if (mp->mnt_flag & MNT_UPDATE) { 346c5ab5ce3SJaakko Heinonen /* Only support update mounts for certain options. */ 347c5ab5ce3SJaakko Heinonen if (vfs_filteropt(mp->mnt_optnew, tmpfs_updateopts) != 0) 348e0d3195bSKevin Lo return (EOPNOTSUPP); 349b66352b7SKonstantin Belousov tmp = VFS_TO_TMPFS(mp); 350ac1a10efSMaxim Sobolev if (vfs_getopt_size(mp->mnt_optnew, "size", &size_max) == 0) { 351ac1a10efSMaxim Sobolev /* 352ac1a10efSMaxim Sobolev * On-the-fly resizing is not supported (yet). We still 353ac1a10efSMaxim Sobolev * need to have "size" listed as "supported", otherwise 354ac1a10efSMaxim Sobolev * trying to update fs that is listed in fstab with size 355ac1a10efSMaxim Sobolev * parameter, say trying to change rw to ro or vice 356ac1a10efSMaxim Sobolev * versa, would cause vfs_filteropt() to bail. 357ac1a10efSMaxim Sobolev */ 358b66352b7SKonstantin Belousov if (size_max != tmp->tm_size_max) 359c5ab5ce3SJaakko Heinonen return (EOPNOTSUPP); 360ac1a10efSMaxim Sobolev } 361ac1a10efSMaxim Sobolev if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) && 362b66352b7SKonstantin Belousov !tmp->tm_ronly) { 363ac1a10efSMaxim Sobolev /* RW -> RO */ 3645c4ce6faSKonstantin Belousov return (tmpfs_rw_to_ro(mp)); 365ac1a10efSMaxim Sobolev } else if (!vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) && 366b66352b7SKonstantin Belousov tmp->tm_ronly) { 367ac1a10efSMaxim Sobolev /* RO -> RW */ 368b66352b7SKonstantin Belousov tmp->tm_ronly = 0; 369ac1a10efSMaxim Sobolev MNT_ILOCK(mp); 370ac1a10efSMaxim Sobolev mp->mnt_flag &= ~MNT_RDONLY; 371ac1a10efSMaxim Sobolev MNT_IUNLOCK(mp); 372ac1a10efSMaxim Sobolev } 373c1e84733SKonstantin Belousov tmp->tm_nomtime = vfs_getopt(mp->mnt_optnew, "nomtime", NULL, 374c1e84733SKonstantin Belousov 0) == 0; 375c5ab5ce3SJaakko Heinonen return (0); 376d1fa59e9SXin LI } 377d1fa59e9SXin LI 378cb05b60aSAttilio Rao vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); 3790359a12eSAttilio Rao error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); 380b249ce48SMateusz Guzik VOP_UNLOCK(mp->mnt_vnodecovered); 3811df86a32SXin LI if (error) 3821df86a32SXin LI return (error); 3831df86a32SXin LI 3841df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 3851df86a32SXin LI vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1) 3861df86a32SXin LI root_gid = va.va_gid; 3871df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 3881df86a32SXin LI vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1) 3891df86a32SXin LI root_uid = va.va_uid; 3901df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 391eed4ee29SXin LI vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) 3921df86a32SXin LI root_mode = va.va_mode; 3930ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "inodes", &nodes_max) != 0) 3941df86a32SXin LI nodes_max = 0; 3950ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "size", &size_max) != 0) 3961df86a32SXin LI size_max = 0; 3970ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "maxfilesize", &maxfilesize) != 0) 398dec3772eSJaakko Heinonen maxfilesize = 0; 39900ac6a98SKonstantin Belousov nonc = vfs_getopt(mp->mnt_optnew, "nonc", NULL, NULL) == 0; 400c1e84733SKonstantin Belousov nomtime = vfs_getopt(mp->mnt_optnew, "nomtime", NULL, NULL) == 0; 401d1fa59e9SXin LI 402d1fa59e9SXin LI /* Do not allow mounts if we do not have enough memory to preserve 403d1fa59e9SXin LI * the minimum reserved pages. */ 404da7aa277SGleb Kurtsou if (tmpfs_mem_avail() < TMPFS_PAGES_MINRESERVED) 405bba7ed20SKonstantin Belousov return (ENOSPC); 406d1fa59e9SXin LI 407d1fa59e9SXin LI /* Get the maximum number of memory pages this file system is 408d1fa59e9SXin LI * allowed to use, based on the maximum size the user passed in 409d1fa59e9SXin LI * the mount structure. A value of zero is treated as if the 410d1fa59e9SXin LI * maximum available space was requested. */ 4110742ebc9SBryan Drewery if (size_max == 0 || size_max > OFF_MAX - PAGE_SIZE || 4120ff93c48SGleb Kurtsou (SIZE_MAX < OFF_MAX && size_max / PAGE_SIZE >= SIZE_MAX)) 413d1fa59e9SXin LI pages = SIZE_MAX; 4140742ebc9SBryan Drewery else { 4150742ebc9SBryan Drewery size_max = roundup(size_max, PAGE_SIZE); 4161df86a32SXin LI pages = howmany(size_max, PAGE_SIZE); 4170742ebc9SBryan Drewery } 418d1fa59e9SXin LI MPASS(pages > 0); 419d1fa59e9SXin LI 420189ee6beSJaakko Heinonen if (nodes_max <= 3) { 4219295c628SGleb Kurtsou if (pages < INT_MAX / nodes_per_page) 4229295c628SGleb Kurtsou nodes_max = pages * nodes_per_page; 423d1fa59e9SXin LI else 4249295c628SGleb Kurtsou nodes_max = INT_MAX; 4250ff93c48SGleb Kurtsou } 4269295c628SGleb Kurtsou if (nodes_max > INT_MAX) 4279295c628SGleb Kurtsou nodes_max = INT_MAX; 4280ff93c48SGleb Kurtsou MPASS(nodes_max >= 3); 429d1fa59e9SXin LI 430d1fa59e9SXin LI /* Allocate the tmpfs mount structure and fill it. */ 431d1fa59e9SXin LI tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), 432d1fa59e9SXin LI M_TMPFSMNT, M_WAITOK | M_ZERO); 433d1fa59e9SXin LI 434280ffa5eSKonstantin Belousov mtx_init(&tmp->tm_allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF); 4350ff93c48SGleb Kurtsou tmp->tm_nodes_max = nodes_max; 436d1fa59e9SXin LI tmp->tm_nodes_inuse = 0; 43764c25043SKonstantin Belousov tmp->tm_refcount = 1; 4380ff93c48SGleb Kurtsou tmp->tm_maxfilesize = maxfilesize > 0 ? maxfilesize : OFF_MAX; 439d1fa59e9SXin LI LIST_INIT(&tmp->tm_nodes_used); 440d1fa59e9SXin LI 441ac1a10efSMaxim Sobolev tmp->tm_size_max = size_max; 442d1fa59e9SXin LI tmp->tm_pages_max = pages; 443d1fa59e9SXin LI tmp->tm_pages_used = 0; 44430e0cf49SMateusz Guzik new_unrhdr64(&tmp->tm_ino_unr, 2); 445c5ab5ce3SJaakko Heinonen tmp->tm_ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 44600ac6a98SKonstantin Belousov tmp->tm_nonc = nonc; 447c1e84733SKonstantin Belousov tmp->tm_nomtime = nomtime; 448d1fa59e9SXin LI 449d1fa59e9SXin LI /* Allocate the root node. */ 450bba7ed20SKonstantin Belousov error = tmpfs_alloc_node(mp, tmp, VDIR, root_uid, root_gid, 451bba7ed20SKonstantin Belousov root_mode & ALLPERMS, NULL, NULL, VNOVAL, &root); 452d1fa59e9SXin LI 453d1fa59e9SXin LI if (error != 0 || root == NULL) { 454d1fa59e9SXin LI free(tmp, M_TMPFSMNT); 455bba7ed20SKonstantin Belousov return (error); 456d1fa59e9SXin LI } 457fc8fdae0SMatthew D Fleming KASSERT(root->tn_id == 2, 458fc8fdae0SMatthew D Fleming ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 459d1fa59e9SXin LI tmp->tm_root = root; 460d1fa59e9SXin LI 461d1fa59e9SXin LI MNT_ILOCK(mp); 462d1fa59e9SXin LI mp->mnt_flag |= MNT_LOCAL; 463de4e1aebSKonstantin Belousov mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 4649c04e4c0SMateusz Guzik MNTK_TEXT_REFS | MNTK_NOMSYNC; 465d1fa59e9SXin LI MNT_IUNLOCK(mp); 466d1fa59e9SXin LI 467d1fa59e9SXin LI mp->mnt_data = tmp; 468d1fa59e9SXin LI mp->mnt_stat.f_namemax = MAXNAMLEN; 469d1fa59e9SXin LI vfs_getnewfsid(mp); 470d1fa59e9SXin LI vfs_mountedfrom(mp, "tmpfs"); 471d1fa59e9SXin LI 472d1fa59e9SXin LI return 0; 473d1fa59e9SXin LI } 474d1fa59e9SXin LI 475d1fa59e9SXin LI /* ARGSUSED2 */ 476d1fa59e9SXin LI static int 477dfd233edSAttilio Rao tmpfs_unmount(struct mount *mp, int mntflags) 478d1fa59e9SXin LI { 479d1fa59e9SXin LI struct tmpfs_mount *tmp; 480d1fa59e9SXin LI struct tmpfs_node *node; 4814cda7f7eSKonstantin Belousov int error, flags; 482d1fa59e9SXin LI 4834cda7f7eSKonstantin Belousov flags = (mntflags & MNT_FORCE) != 0 ? FORCECLOSE : 0; 484d1fa59e9SXin LI tmp = VFS_TO_TMPFS(mp); 485d1fa59e9SXin LI 4864cda7f7eSKonstantin Belousov /* Stop writers */ 4874cda7f7eSKonstantin Belousov error = vfs_write_suspend_umnt(mp); 4884cda7f7eSKonstantin Belousov if (error != 0) 4894cda7f7eSKonstantin Belousov return (error); 4904cda7f7eSKonstantin Belousov /* 4914cda7f7eSKonstantin Belousov * At this point, nodes cannot be destroyed by any other 4924cda7f7eSKonstantin Belousov * thread because write suspension is started. 4934cda7f7eSKonstantin Belousov */ 494d1fa59e9SXin LI 4954cda7f7eSKonstantin Belousov for (;;) { 4964cda7f7eSKonstantin Belousov error = vflush(mp, 0, flags, curthread); 4974cda7f7eSKonstantin Belousov if (error != 0) { 4984cda7f7eSKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 4994cda7f7eSKonstantin Belousov return (error); 5004cda7f7eSKonstantin Belousov } 5014cda7f7eSKonstantin Belousov MNT_ILOCK(mp); 5024cda7f7eSKonstantin Belousov if (mp->mnt_nvnodelistsize == 0) { 5034cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 5044cda7f7eSKonstantin Belousov break; 5054cda7f7eSKonstantin Belousov } 5064cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 5074cda7f7eSKonstantin Belousov if ((mntflags & MNT_FORCE) == 0) { 5084cda7f7eSKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 5094cda7f7eSKonstantin Belousov return (EBUSY); 5104cda7f7eSKonstantin Belousov } 5114cda7f7eSKonstantin Belousov } 5124cda7f7eSKonstantin Belousov 5134cda7f7eSKonstantin Belousov TMPFS_LOCK(tmp); 5144cda7f7eSKonstantin Belousov while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 51564c25043SKonstantin Belousov TMPFS_NODE_LOCK(node); 5164fd5efe7SGleb Kurtsou if (node->tn_type == VDIR) 5174fd5efe7SGleb Kurtsou tmpfs_dir_destroy(tmp, node); 51864c25043SKonstantin Belousov if (tmpfs_free_node_locked(tmp, node, true)) 5194cda7f7eSKonstantin Belousov TMPFS_LOCK(tmp); 52064c25043SKonstantin Belousov else 52164c25043SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 52264c25043SKonstantin Belousov } 52364c25043SKonstantin Belousov 52464c25043SKonstantin Belousov mp->mnt_data = NULL; 52564c25043SKonstantin Belousov tmpfs_free_tmp(tmp); 52664c25043SKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 52764c25043SKonstantin Belousov 52864c25043SKonstantin Belousov MNT_ILOCK(mp); 52964c25043SKonstantin Belousov mp->mnt_flag &= ~MNT_LOCAL; 53064c25043SKonstantin Belousov MNT_IUNLOCK(mp); 53164c25043SKonstantin Belousov 53264c25043SKonstantin Belousov return (0); 53364c25043SKonstantin Belousov } 53464c25043SKonstantin Belousov 53564c25043SKonstantin Belousov void 53664c25043SKonstantin Belousov tmpfs_free_tmp(struct tmpfs_mount *tmp) 53764c25043SKonstantin Belousov { 53864c25043SKonstantin Belousov 53964c25043SKonstantin Belousov MPASS(tmp->tm_refcount > 0); 54064c25043SKonstantin Belousov tmp->tm_refcount--; 54164c25043SKonstantin Belousov if (tmp->tm_refcount > 0) { 54264c25043SKonstantin Belousov TMPFS_UNLOCK(tmp); 54364c25043SKonstantin Belousov return; 544d1fa59e9SXin LI } 5454cda7f7eSKonstantin Belousov TMPFS_UNLOCK(tmp); 546d1fa59e9SXin LI 547280ffa5eSKonstantin Belousov mtx_destroy(&tmp->tm_allnode_lock); 548d1fa59e9SXin LI MPASS(tmp->tm_pages_used == 0); 5491df86a32SXin LI MPASS(tmp->tm_nodes_inuse == 0); 550d1fa59e9SXin LI 55164c25043SKonstantin Belousov free(tmp, M_TMPFSMNT); 552d1fa59e9SXin LI } 553d1fa59e9SXin LI 554d1fa59e9SXin LI static int 555dfd233edSAttilio Rao tmpfs_root(struct mount *mp, int flags, struct vnode **vpp) 556d1fa59e9SXin LI { 557d1fa59e9SXin LI int error; 558bba7ed20SKonstantin Belousov 559dfd233edSAttilio Rao error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, flags, vpp); 560bba7ed20SKonstantin Belousov if (error == 0) 5617adb1776SXin LI (*vpp)->v_vflag |= VV_ROOT; 562bba7ed20SKonstantin Belousov return (error); 563d1fa59e9SXin LI } 564d1fa59e9SXin LI 565d1fa59e9SXin LI static int 566694a586aSRick Macklem tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, 567694a586aSRick Macklem struct vnode **vpp) 568d1fa59e9SXin LI { 569*693d10a2SRyan Moeller struct tmpfs_fid_data tfd; 570d1fa59e9SXin LI struct tmpfs_mount *tmp; 571d1fa59e9SXin LI struct tmpfs_node *node; 57264c25043SKonstantin Belousov int error; 573d1fa59e9SXin LI 574*693d10a2SRyan Moeller if (fhp->fid_len != sizeof(tfd)) 57564c25043SKonstantin Belousov return (EINVAL); 576d1fa59e9SXin LI 577*693d10a2SRyan Moeller /* 578*693d10a2SRyan Moeller * Copy from fid_data onto the stack to avoid unaligned pointer use. 579*693d10a2SRyan Moeller * See the comment in sys/mount.h on struct fid for details. 580*693d10a2SRyan Moeller */ 581*693d10a2SRyan Moeller memcpy(&tfd, fhp->fid_data, fhp->fid_len); 582*693d10a2SRyan Moeller 583*693d10a2SRyan Moeller tmp = VFS_TO_TMPFS(mp); 584*693d10a2SRyan Moeller 585*693d10a2SRyan Moeller if (tfd.tfd_id >= tmp->tm_nodes_max) 58664c25043SKonstantin Belousov return (EINVAL); 587d1fa59e9SXin LI 588d1fa59e9SXin LI TMPFS_LOCK(tmp); 589d1fa59e9SXin LI LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 590*693d10a2SRyan Moeller if (node->tn_id == tfd.tfd_id && 591*693d10a2SRyan Moeller node->tn_gen == tfd.tfd_gen) { 59264c25043SKonstantin Belousov tmpfs_ref_node(node); 593d1fa59e9SXin LI break; 594d1fa59e9SXin LI } 595d1fa59e9SXin LI } 596d1fa59e9SXin LI TMPFS_UNLOCK(tmp); 597d1fa59e9SXin LI 59864c25043SKonstantin Belousov if (node != NULL) { 59964c25043SKonstantin Belousov error = tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp); 60064c25043SKonstantin Belousov tmpfs_free_node(tmp, node); 60164c25043SKonstantin Belousov } else 60264c25043SKonstantin Belousov error = EINVAL; 60364c25043SKonstantin Belousov return (error); 604d1fa59e9SXin LI } 605d1fa59e9SXin LI 606d1fa59e9SXin LI /* ARGSUSED2 */ 607d1fa59e9SXin LI static int 608dfd233edSAttilio Rao tmpfs_statfs(struct mount *mp, struct statfs *sbp) 609d1fa59e9SXin LI { 610d1fa59e9SXin LI struct tmpfs_mount *tmp; 611da7aa277SGleb Kurtsou size_t used; 612d1fa59e9SXin LI 613d1fa59e9SXin LI tmp = VFS_TO_TMPFS(mp); 614d1fa59e9SXin LI 615d1fa59e9SXin LI sbp->f_iosize = PAGE_SIZE; 616d1fa59e9SXin LI sbp->f_bsize = PAGE_SIZE; 617d1fa59e9SXin LI 618da7aa277SGleb Kurtsou used = tmpfs_pages_used(tmp); 619ed2159c9SMateusz Guzik if (tmp->tm_pages_max != ULONG_MAX) 620da7aa277SGleb Kurtsou sbp->f_blocks = tmp->tm_pages_max; 621da7aa277SGleb Kurtsou else 622da7aa277SGleb Kurtsou sbp->f_blocks = used + tmpfs_mem_avail(); 623da7aa277SGleb Kurtsou if (sbp->f_blocks <= used) 624da7aa277SGleb Kurtsou sbp->f_bavail = 0; 625da7aa277SGleb Kurtsou else 626da7aa277SGleb Kurtsou sbp->f_bavail = sbp->f_blocks - used; 627da7aa277SGleb Kurtsou sbp->f_bfree = sbp->f_bavail; 628da7aa277SGleb Kurtsou used = tmp->tm_nodes_inuse; 629da7aa277SGleb Kurtsou sbp->f_files = tmp->tm_nodes_max; 630da7aa277SGleb Kurtsou if (sbp->f_files <= used) 631da7aa277SGleb Kurtsou sbp->f_ffree = 0; 632da7aa277SGleb Kurtsou else 633da7aa277SGleb Kurtsou sbp->f_ffree = sbp->f_files - used; 634d1fa59e9SXin LI /* sbp->f_owner = tmp->tn_uid; */ 635d1fa59e9SXin LI 636d1fa59e9SXin LI return 0; 637d1fa59e9SXin LI } 638d1fa59e9SXin LI 6394cda7f7eSKonstantin Belousov static int 6404cda7f7eSKonstantin Belousov tmpfs_sync(struct mount *mp, int waitfor) 6414cda7f7eSKonstantin Belousov { 6424cda7f7eSKonstantin Belousov 6434cda7f7eSKonstantin Belousov if (waitfor == MNT_SUSPEND) { 6444cda7f7eSKonstantin Belousov MNT_ILOCK(mp); 6454cda7f7eSKonstantin Belousov mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; 6464cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 647f40cb1c6SKonstantin Belousov } else if (waitfor == MNT_LAZY) { 6485c4ce6faSKonstantin Belousov tmpfs_update_mtime(mp, true); 6494cda7f7eSKonstantin Belousov } 6504cda7f7eSKonstantin Belousov return (0); 6514cda7f7eSKonstantin Belousov } 6524cda7f7eSKonstantin Belousov 653a51c8071SKonstantin Belousov static int 654a51c8071SKonstantin Belousov tmpfs_init(struct vfsconf *conf) 655a51c8071SKonstantin Belousov { 656a51c8071SKonstantin Belousov tmpfs_subr_init(); 657a51c8071SKonstantin Belousov return (0); 658a51c8071SKonstantin Belousov } 659a51c8071SKonstantin Belousov 660a51c8071SKonstantin Belousov static int 661a51c8071SKonstantin Belousov tmpfs_uninit(struct vfsconf *conf) 662a51c8071SKonstantin Belousov { 663a51c8071SKonstantin Belousov tmpfs_subr_uninit(); 664a51c8071SKonstantin Belousov return (0); 665a51c8071SKonstantin Belousov } 666a51c8071SKonstantin Belousov 6675f34e93cSMark Johnston /* 668d1fa59e9SXin LI * tmpfs vfs operations. 669d1fa59e9SXin LI */ 670d1fa59e9SXin LI struct vfsops tmpfs_vfsops = { 671d1fa59e9SXin LI .vfs_mount = tmpfs_mount, 672d1fa59e9SXin LI .vfs_unmount = tmpfs_unmount, 6737682d0beSMateusz Guzik .vfs_root = vfs_cache_root, 6747682d0beSMateusz Guzik .vfs_cachedroot = tmpfs_root, 675d1fa59e9SXin LI .vfs_statfs = tmpfs_statfs, 676d1fa59e9SXin LI .vfs_fhtovp = tmpfs_fhtovp, 6774cda7f7eSKonstantin Belousov .vfs_sync = tmpfs_sync, 678a51c8071SKonstantin Belousov .vfs_init = tmpfs_init, 679a51c8071SKonstantin Belousov .vfs_uninit = tmpfs_uninit, 680d1fa59e9SXin LI }; 6812454886eSXin LI VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); 682