1d1fa59e9SXin LI /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2d1fa59e9SXin LI 3e08d5567SXin LI /*- 4d63027b6SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5d63027b6SPedro F. Giffuni * 6d1fa59e9SXin LI * Copyright (c) 2005 The NetBSD Foundation, Inc. 7d1fa59e9SXin LI * All rights reserved. 8d1fa59e9SXin LI * 9d1fa59e9SXin LI * This code is derived from software contributed to The NetBSD Foundation 10d1fa59e9SXin LI * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 11d1fa59e9SXin LI * 2005 program. 12d1fa59e9SXin LI * 13d1fa59e9SXin LI * Redistribution and use in source and binary forms, with or without 14d1fa59e9SXin LI * modification, are permitted provided that the following conditions 15d1fa59e9SXin LI * are met: 16d1fa59e9SXin LI * 1. Redistributions of source code must retain the above copyright 17d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer. 18d1fa59e9SXin LI * 2. Redistributions in binary form must reproduce the above copyright 19d1fa59e9SXin LI * notice, this list of conditions and the following disclaimer in the 20d1fa59e9SXin LI * documentation and/or other materials provided with the distribution. 21d1fa59e9SXin LI * 22d1fa59e9SXin LI * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23d1fa59e9SXin LI * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24d1fa59e9SXin LI * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25d1fa59e9SXin LI * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26d1fa59e9SXin LI * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27d1fa59e9SXin LI * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28d1fa59e9SXin LI * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29d1fa59e9SXin LI * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30d1fa59e9SXin LI * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31d1fa59e9SXin LI * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32d1fa59e9SXin LI * POSSIBILITY OF SUCH DAMAGE. 33d1fa59e9SXin LI */ 34d1fa59e9SXin LI 35d1fa59e9SXin LI /* 36d1fa59e9SXin LI * Efficient memory file system. 37d1fa59e9SXin LI * 383544b0f6SKonstantin Belousov * tmpfs is a file system that uses FreeBSD's virtual memory 393544b0f6SKonstantin Belousov * sub-system to store file data and metadata in an efficient way. 403544b0f6SKonstantin Belousov * This means that it does not follow the structure of an on-disk file 413544b0f6SKonstantin Belousov * system because it simply does not need to. Instead, it uses 42d1fa59e9SXin LI * memory-specific data structures and algorithms to automatically 43d1fa59e9SXin LI * allocate and release resources. 44d1fa59e9SXin LI */ 45b4b3e349SAllan Jude 46b4b3e349SAllan Jude #include "opt_tmpfs.h" 47b4b3e349SAllan Jude 48d1fa59e9SXin LI #include <sys/cdefs.h> 49d1fa59e9SXin LI __FBSDID("$FreeBSD$"); 50d1fa59e9SXin LI 51d1fa59e9SXin LI #include <sys/param.h> 526d2e2df7SMark Johnston #include <sys/systm.h> 53135beaf6SGleb Smirnoff #include <sys/dirent.h> 541df86a32SXin LI #include <sys/limits.h> 55d1fa59e9SXin LI #include <sys/lock.h> 56135beaf6SGleb Smirnoff #include <sys/mount.h> 57d1fa59e9SXin LI #include <sys/mutex.h> 582454886eSXin LI #include <sys/proc.h> 592454886eSXin LI #include <sys/jail.h> 60d1fa59e9SXin LI #include <sys/kernel.h> 61f40cb1c6SKonstantin Belousov #include <sys/rwlock.h> 62d1fa59e9SXin LI #include <sys/stat.h> 635c4ce6faSKonstantin Belousov #include <sys/sx.h> 64d1fa59e9SXin LI #include <sys/sysctl.h> 65135beaf6SGleb Smirnoff #include <sys/vnode.h> 66d1fa59e9SXin LI 67d1fa59e9SXin LI #include <vm/vm.h> 685c4ce6faSKonstantin Belousov #include <vm/vm_param.h> 695c4ce6faSKonstantin Belousov #include <vm/pmap.h> 705c4ce6faSKonstantin Belousov #include <vm/vm_extern.h> 715c4ce6faSKonstantin Belousov #include <vm/vm_map.h> 72d1fa59e9SXin LI #include <vm/vm_object.h> 73d1fa59e9SXin LI #include <vm/vm_param.h> 74d1fa59e9SXin LI 75d1fa59e9SXin LI #include <fs/tmpfs/tmpfs.h> 76d1fa59e9SXin LI 77d1fa59e9SXin LI /* 78d1fa59e9SXin LI * Default permission for root node 79d1fa59e9SXin LI */ 80d1fa59e9SXin LI #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 81d1fa59e9SXin LI 82d1fa59e9SXin LI MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 839b258fcaSXin LI MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names"); 84d1fa59e9SXin LI 85dfd233edSAttilio Rao static int tmpfs_mount(struct mount *); 86dfd233edSAttilio Rao static int tmpfs_unmount(struct mount *, int); 87dfd233edSAttilio Rao static int tmpfs_root(struct mount *, int flags, struct vnode **); 88694a586aSRick Macklem static int tmpfs_fhtovp(struct mount *, struct fid *, int, 89694a586aSRick Macklem struct vnode **); 90dfd233edSAttilio Rao static int tmpfs_statfs(struct mount *, struct statfs *); 915f34e93cSMark Johnston static void tmpfs_susp_clean(struct mount *); 92d1fa59e9SXin LI 93d1fa59e9SXin LI static const char *tmpfs_opts[] = { 94dec3772eSJaakko Heinonen "from", "size", "maxfilesize", "inodes", "uid", "gid", "mode", "export", 95*c1e84733SKonstantin Belousov "union", "nonc", "nomtime", NULL 96d1fa59e9SXin LI }; 97d1fa59e9SXin LI 98c5ab5ce3SJaakko Heinonen static const char *tmpfs_updateopts[] = { 99*c1e84733SKonstantin Belousov "from", "export", "nomtime", "size", NULL 100c5ab5ce3SJaakko Heinonen }; 101c5ab5ce3SJaakko Heinonen 1025c4ce6faSKonstantin Belousov /* 103*c1e84733SKonstantin Belousov * Handle updates of time from writes to mmaped regions, if allowed. 104*c1e84733SKonstantin Belousov * Use MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_LAZY, since 1055c4ce6faSKonstantin Belousov * unmap of the tmpfs-backed vnode does not call vinactive(), due to 106*c1e84733SKonstantin Belousov * vm object type is OBJT_SWAP. If lazy, only handle delayed update 107*c1e84733SKonstantin Belousov * of mtime due to the writes to mapped files. 1085c4ce6faSKonstantin Belousov */ 1095c4ce6faSKonstantin Belousov static void 1105c4ce6faSKonstantin Belousov tmpfs_update_mtime(struct mount *mp, bool lazy) 1115c4ce6faSKonstantin Belousov { 1125c4ce6faSKonstantin Belousov struct vnode *vp, *mvp; 1135c4ce6faSKonstantin Belousov struct vm_object *obj; 1145c4ce6faSKonstantin Belousov 115*c1e84733SKonstantin Belousov if (VFS_TO_TMPFS(mp)->tm_nomtime) 116*c1e84733SKonstantin Belousov return; 1175c4ce6faSKonstantin Belousov MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1185c4ce6faSKonstantin Belousov if (vp->v_type != VREG) { 1195c4ce6faSKonstantin Belousov VI_UNLOCK(vp); 1205c4ce6faSKonstantin Belousov continue; 1215c4ce6faSKonstantin Belousov } 1225c4ce6faSKonstantin Belousov obj = vp->v_object; 1235c4ce6faSKonstantin Belousov KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == 1245c4ce6faSKonstantin Belousov (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); 1255c4ce6faSKonstantin Belousov 1265c4ce6faSKonstantin Belousov /* 1275c4ce6faSKonstantin Belousov * In lazy case, do unlocked read, avoid taking vnode 1285c4ce6faSKonstantin Belousov * lock if not needed. Lost update will be handled on 1295c4ce6faSKonstantin Belousov * the next call. 1305c4ce6faSKonstantin Belousov * For non-lazy case, we must flush all pending 1315c4ce6faSKonstantin Belousov * metadata changes now. 1325c4ce6faSKonstantin Belousov */ 13367d0e293SJeff Roberson if (!lazy || obj->generation != obj->cleangeneration) { 13428ce2bc1SKonstantin Belousov if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, 1355c4ce6faSKonstantin Belousov curthread) != 0) 1365c4ce6faSKonstantin Belousov continue; 1375c4ce6faSKonstantin Belousov tmpfs_check_mtime(vp); 1385c4ce6faSKonstantin Belousov if (!lazy) 1395c4ce6faSKonstantin Belousov tmpfs_update(vp); 1405c4ce6faSKonstantin Belousov vput(vp); 1415c4ce6faSKonstantin Belousov } else { 1425c4ce6faSKonstantin Belousov VI_UNLOCK(vp); 1435c4ce6faSKonstantin Belousov continue; 1445c4ce6faSKonstantin Belousov } 1455c4ce6faSKonstantin Belousov } 1465c4ce6faSKonstantin Belousov } 1475c4ce6faSKonstantin Belousov 1485c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg { 1495c4ce6faSKonstantin Belousov bool found; 1505c4ce6faSKonstantin Belousov }; 1515c4ce6faSKonstantin Belousov 1525c4ce6faSKonstantin Belousov static bool 1535c4ce6faSKonstantin Belousov tmpfs_check_rw_maps_cb(struct mount *mp __unused, vm_map_t map __unused, 1545c4ce6faSKonstantin Belousov vm_map_entry_t entry __unused, void *arg) 1555c4ce6faSKonstantin Belousov { 1565c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg *a; 1575c4ce6faSKonstantin Belousov 1585c4ce6faSKonstantin Belousov a = arg; 1595c4ce6faSKonstantin Belousov a->found = true; 1605c4ce6faSKonstantin Belousov return (true); 1615c4ce6faSKonstantin Belousov } 1625c4ce6faSKonstantin Belousov 1635c4ce6faSKonstantin Belousov /* 1645c4ce6faSKonstantin Belousov * Revoke write permissions from all mappings of regular files 1655c4ce6faSKonstantin Belousov * belonging to the specified tmpfs mount. 1665c4ce6faSKonstantin Belousov */ 1675c4ce6faSKonstantin Belousov static bool 1685c4ce6faSKonstantin Belousov tmpfs_revoke_rw_maps_cb(struct mount *mp __unused, vm_map_t map, 1695c4ce6faSKonstantin Belousov vm_map_entry_t entry, void *arg __unused) 1705c4ce6faSKonstantin Belousov { 1715c4ce6faSKonstantin Belousov 1725c4ce6faSKonstantin Belousov /* 1735c4ce6faSKonstantin Belousov * XXXKIB: might be invalidate the mapping 1745c4ce6faSKonstantin Belousov * instead ? The process is not going to be 1755c4ce6faSKonstantin Belousov * happy in any case. 1765c4ce6faSKonstantin Belousov */ 1775c4ce6faSKonstantin Belousov entry->max_protection &= ~VM_PROT_WRITE; 1785c4ce6faSKonstantin Belousov if ((entry->protection & VM_PROT_WRITE) != 0) { 1795c4ce6faSKonstantin Belousov entry->protection &= ~VM_PROT_WRITE; 1805c4ce6faSKonstantin Belousov pmap_protect(map->pmap, entry->start, entry->end, 1815c4ce6faSKonstantin Belousov entry->protection); 1825c4ce6faSKonstantin Belousov } 1835c4ce6faSKonstantin Belousov return (false); 1845c4ce6faSKonstantin Belousov } 1855c4ce6faSKonstantin Belousov 1865c4ce6faSKonstantin Belousov static void 1875c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(struct mount *mp, bool (*cb)(struct mount *mp, vm_map_t, 1885c4ce6faSKonstantin Belousov vm_map_entry_t, void *), void *cb_arg) 1895c4ce6faSKonstantin Belousov { 1905c4ce6faSKonstantin Belousov struct proc *p; 1915c4ce6faSKonstantin Belousov struct vmspace *vm; 1925c4ce6faSKonstantin Belousov vm_map_t map; 1935c4ce6faSKonstantin Belousov vm_map_entry_t entry; 1945c4ce6faSKonstantin Belousov vm_object_t object; 1955c4ce6faSKonstantin Belousov struct vnode *vp; 1965c4ce6faSKonstantin Belousov int gen; 1975c4ce6faSKonstantin Belousov bool terminate; 1985c4ce6faSKonstantin Belousov 1995c4ce6faSKonstantin Belousov terminate = false; 2005c4ce6faSKonstantin Belousov sx_slock(&allproc_lock); 2015c4ce6faSKonstantin Belousov again: 2025c4ce6faSKonstantin Belousov gen = allproc_gen; 2035c4ce6faSKonstantin Belousov FOREACH_PROC_IN_SYSTEM(p) { 2045c4ce6faSKonstantin Belousov PROC_LOCK(p); 2055c4ce6faSKonstantin Belousov if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | 2065c4ce6faSKonstantin Belousov P_SYSTEM | P_WEXIT)) != 0) { 2075c4ce6faSKonstantin Belousov PROC_UNLOCK(p); 2085c4ce6faSKonstantin Belousov continue; 2095c4ce6faSKonstantin Belousov } 2105c4ce6faSKonstantin Belousov vm = vmspace_acquire_ref(p); 2115c4ce6faSKonstantin Belousov _PHOLD_LITE(p); 2125c4ce6faSKonstantin Belousov PROC_UNLOCK(p); 2135c4ce6faSKonstantin Belousov if (vm == NULL) { 2145c4ce6faSKonstantin Belousov PRELE(p); 2155c4ce6faSKonstantin Belousov continue; 2165c4ce6faSKonstantin Belousov } 2175c4ce6faSKonstantin Belousov sx_sunlock(&allproc_lock); 2185c4ce6faSKonstantin Belousov map = &vm->vm_map; 2195c4ce6faSKonstantin Belousov 2205c4ce6faSKonstantin Belousov vm_map_lock(map); 2215c4ce6faSKonstantin Belousov if (map->busy) 2225c4ce6faSKonstantin Belousov vm_map_wait_busy(map); 2232288078cSDoug Moore VM_MAP_ENTRY_FOREACH(entry, map) { 2245c4ce6faSKonstantin Belousov if ((entry->eflags & (MAP_ENTRY_GUARD | 2255c4ce6faSKonstantin Belousov MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_COW)) != 0 || 2265c4ce6faSKonstantin Belousov (entry->max_protection & VM_PROT_WRITE) == 0) 2275c4ce6faSKonstantin Belousov continue; 2285c4ce6faSKonstantin Belousov object = entry->object.vm_object; 2295c4ce6faSKonstantin Belousov if (object == NULL || object->type != OBJT_SWAP || 2305c4ce6faSKonstantin Belousov (object->flags & OBJ_TMPFS_NODE) == 0) 2315c4ce6faSKonstantin Belousov continue; 2325c4ce6faSKonstantin Belousov /* 2335c4ce6faSKonstantin Belousov * No need to dig into shadow chain, mapping 2345c4ce6faSKonstantin Belousov * of the object not at top is readonly. 2355c4ce6faSKonstantin Belousov */ 2365c4ce6faSKonstantin Belousov 2375c4ce6faSKonstantin Belousov VM_OBJECT_RLOCK(object); 2385c4ce6faSKonstantin Belousov if (object->type == OBJT_DEAD) { 2395c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2405c4ce6faSKonstantin Belousov continue; 2415c4ce6faSKonstantin Belousov } 2425c4ce6faSKonstantin Belousov MPASS(object->ref_count > 1); 2435c4ce6faSKonstantin Belousov if ((object->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) != 2445c4ce6faSKonstantin Belousov (OBJ_TMPFS_NODE | OBJ_TMPFS)) { 2455c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2465c4ce6faSKonstantin Belousov continue; 2475c4ce6faSKonstantin Belousov } 2485c4ce6faSKonstantin Belousov vp = object->un_pager.swp.swp_tmpfs; 2495c4ce6faSKonstantin Belousov if (vp->v_mount != mp) { 2505c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2515c4ce6faSKonstantin Belousov continue; 2525c4ce6faSKonstantin Belousov } 2535c4ce6faSKonstantin Belousov 2545c4ce6faSKonstantin Belousov terminate = cb(mp, map, entry, cb_arg); 2555c4ce6faSKonstantin Belousov VM_OBJECT_RUNLOCK(object); 2565c4ce6faSKonstantin Belousov if (terminate) 2575c4ce6faSKonstantin Belousov break; 2585c4ce6faSKonstantin Belousov } 2595c4ce6faSKonstantin Belousov vm_map_unlock(map); 2605c4ce6faSKonstantin Belousov 2615c4ce6faSKonstantin Belousov vmspace_free(vm); 2625c4ce6faSKonstantin Belousov sx_slock(&allproc_lock); 2635c4ce6faSKonstantin Belousov PRELE(p); 2645c4ce6faSKonstantin Belousov if (terminate) 2655c4ce6faSKonstantin Belousov break; 2665c4ce6faSKonstantin Belousov } 2675c4ce6faSKonstantin Belousov if (!terminate && gen != allproc_gen) 2685c4ce6faSKonstantin Belousov goto again; 2695c4ce6faSKonstantin Belousov sx_sunlock(&allproc_lock); 2705c4ce6faSKonstantin Belousov } 2715c4ce6faSKonstantin Belousov 2725c4ce6faSKonstantin Belousov static bool 2735c4ce6faSKonstantin Belousov tmpfs_check_rw_maps(struct mount *mp) 2745c4ce6faSKonstantin Belousov { 2755c4ce6faSKonstantin Belousov struct tmpfs_check_rw_maps_arg ca; 2765c4ce6faSKonstantin Belousov 2775c4ce6faSKonstantin Belousov ca.found = false; 2785c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(mp, tmpfs_check_rw_maps_cb, &ca); 2795c4ce6faSKonstantin Belousov return (ca.found); 2805c4ce6faSKonstantin Belousov } 2815c4ce6faSKonstantin Belousov 2825c4ce6faSKonstantin Belousov static int 2835c4ce6faSKonstantin Belousov tmpfs_rw_to_ro(struct mount *mp) 2845c4ce6faSKonstantin Belousov { 2855c4ce6faSKonstantin Belousov int error, flags; 2865c4ce6faSKonstantin Belousov bool forced; 2875c4ce6faSKonstantin Belousov 2885c4ce6faSKonstantin Belousov forced = (mp->mnt_flag & MNT_FORCE) != 0; 2895c4ce6faSKonstantin Belousov flags = WRITECLOSE | (forced ? FORCECLOSE : 0); 2905c4ce6faSKonstantin Belousov 2915c4ce6faSKonstantin Belousov if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) 2925c4ce6faSKonstantin Belousov return (error); 2935c4ce6faSKonstantin Belousov error = vfs_write_suspend_umnt(mp); 2945c4ce6faSKonstantin Belousov if (error != 0) 2955c4ce6faSKonstantin Belousov return (error); 2965c4ce6faSKonstantin Belousov if (!forced && tmpfs_check_rw_maps(mp)) { 2975c4ce6faSKonstantin Belousov error = EBUSY; 2985c4ce6faSKonstantin Belousov goto out; 2995c4ce6faSKonstantin Belousov } 3005c4ce6faSKonstantin Belousov VFS_TO_TMPFS(mp)->tm_ronly = 1; 3015c4ce6faSKonstantin Belousov MNT_ILOCK(mp); 3025c4ce6faSKonstantin Belousov mp->mnt_flag |= MNT_RDONLY; 3035c4ce6faSKonstantin Belousov MNT_IUNLOCK(mp); 3045c4ce6faSKonstantin Belousov for (;;) { 3055c4ce6faSKonstantin Belousov tmpfs_all_rw_maps(mp, tmpfs_revoke_rw_maps_cb, NULL); 3065c4ce6faSKonstantin Belousov tmpfs_update_mtime(mp, false); 3075c4ce6faSKonstantin Belousov error = vflush(mp, 0, flags, curthread); 3085c4ce6faSKonstantin Belousov if (error != 0) { 3095c4ce6faSKonstantin Belousov VFS_TO_TMPFS(mp)->tm_ronly = 0; 3105c4ce6faSKonstantin Belousov MNT_ILOCK(mp); 3115c4ce6faSKonstantin Belousov mp->mnt_flag &= ~MNT_RDONLY; 3125c4ce6faSKonstantin Belousov MNT_IUNLOCK(mp); 3135c4ce6faSKonstantin Belousov goto out; 3145c4ce6faSKonstantin Belousov } 3155c4ce6faSKonstantin Belousov if (!tmpfs_check_rw_maps(mp)) 3165c4ce6faSKonstantin Belousov break; 3175c4ce6faSKonstantin Belousov } 3185c4ce6faSKonstantin Belousov out: 3195c4ce6faSKonstantin Belousov vfs_write_resume(mp, 0); 3205c4ce6faSKonstantin Belousov return (error); 3215c4ce6faSKonstantin Belousov } 3225c4ce6faSKonstantin Belousov 323d1fa59e9SXin LI static int 324dfd233edSAttilio Rao tmpfs_mount(struct mount *mp) 325d1fa59e9SXin LI { 3269295c628SGleb Kurtsou const size_t nodes_per_page = howmany(PAGE_SIZE, 3279295c628SGleb Kurtsou sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node)); 328d1fa59e9SXin LI struct tmpfs_mount *tmp; 329d1fa59e9SXin LI struct tmpfs_node *root; 3305c4ce6faSKonstantin Belousov int error; 331*c1e84733SKonstantin Belousov bool nomtime, nonc; 3321df86a32SXin LI /* Size counters. */ 3330ff93c48SGleb Kurtsou u_quad_t pages; 3340ff93c48SGleb Kurtsou off_t nodes_max, size_max, maxfilesize; 3351df86a32SXin LI 3361df86a32SXin LI /* Root node attributes. */ 3371df86a32SXin LI uid_t root_uid; 3381df86a32SXin LI gid_t root_gid; 3391df86a32SXin LI mode_t root_mode; 3401df86a32SXin LI 3411df86a32SXin LI struct vattr va; 342d1fa59e9SXin LI 343d1fa59e9SXin LI if (vfs_filteropt(mp->mnt_optnew, tmpfs_opts)) 344d1fa59e9SXin LI return (EINVAL); 345d1fa59e9SXin LI 346d1fa59e9SXin LI if (mp->mnt_flag & MNT_UPDATE) { 347c5ab5ce3SJaakko Heinonen /* Only support update mounts for certain options. */ 348c5ab5ce3SJaakko Heinonen if (vfs_filteropt(mp->mnt_optnew, tmpfs_updateopts) != 0) 349e0d3195bSKevin Lo return (EOPNOTSUPP); 350b66352b7SKonstantin Belousov tmp = VFS_TO_TMPFS(mp); 351ac1a10efSMaxim Sobolev if (vfs_getopt_size(mp->mnt_optnew, "size", &size_max) == 0) { 352ac1a10efSMaxim Sobolev /* 353ac1a10efSMaxim Sobolev * On-the-fly resizing is not supported (yet). We still 354ac1a10efSMaxim Sobolev * need to have "size" listed as "supported", otherwise 355ac1a10efSMaxim Sobolev * trying to update fs that is listed in fstab with size 356ac1a10efSMaxim Sobolev * parameter, say trying to change rw to ro or vice 357ac1a10efSMaxim Sobolev * versa, would cause vfs_filteropt() to bail. 358ac1a10efSMaxim Sobolev */ 359b66352b7SKonstantin Belousov if (size_max != tmp->tm_size_max) 360c5ab5ce3SJaakko Heinonen return (EOPNOTSUPP); 361ac1a10efSMaxim Sobolev } 362ac1a10efSMaxim Sobolev if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) && 363b66352b7SKonstantin Belousov !tmp->tm_ronly) { 364ac1a10efSMaxim Sobolev /* RW -> RO */ 3655c4ce6faSKonstantin Belousov return (tmpfs_rw_to_ro(mp)); 366ac1a10efSMaxim Sobolev } else if (!vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) && 367b66352b7SKonstantin Belousov tmp->tm_ronly) { 368ac1a10efSMaxim Sobolev /* RO -> RW */ 369b66352b7SKonstantin Belousov tmp->tm_ronly = 0; 370ac1a10efSMaxim Sobolev MNT_ILOCK(mp); 371ac1a10efSMaxim Sobolev mp->mnt_flag &= ~MNT_RDONLY; 372ac1a10efSMaxim Sobolev MNT_IUNLOCK(mp); 373ac1a10efSMaxim Sobolev } 374*c1e84733SKonstantin Belousov tmp->tm_nomtime = vfs_getopt(mp->mnt_optnew, "nomtime", NULL, 375*c1e84733SKonstantin Belousov 0) == 0; 376c5ab5ce3SJaakko Heinonen return (0); 377d1fa59e9SXin LI } 378d1fa59e9SXin LI 379cb05b60aSAttilio Rao vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); 3800359a12eSAttilio Rao error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); 381b249ce48SMateusz Guzik VOP_UNLOCK(mp->mnt_vnodecovered); 3821df86a32SXin LI if (error) 3831df86a32SXin LI return (error); 3841df86a32SXin LI 3851df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 3861df86a32SXin LI vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1) 3871df86a32SXin LI root_gid = va.va_gid; 3881df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 3891df86a32SXin LI vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1) 3901df86a32SXin LI root_uid = va.va_uid; 3911df86a32SXin LI if (mp->mnt_cred->cr_ruid != 0 || 392eed4ee29SXin LI vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) 3931df86a32SXin LI root_mode = va.va_mode; 3940ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "inodes", &nodes_max) != 0) 3951df86a32SXin LI nodes_max = 0; 3960ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "size", &size_max) != 0) 3971df86a32SXin LI size_max = 0; 3980ff93c48SGleb Kurtsou if (vfs_getopt_size(mp->mnt_optnew, "maxfilesize", &maxfilesize) != 0) 399dec3772eSJaakko Heinonen maxfilesize = 0; 40000ac6a98SKonstantin Belousov nonc = vfs_getopt(mp->mnt_optnew, "nonc", NULL, NULL) == 0; 401*c1e84733SKonstantin Belousov nomtime = vfs_getopt(mp->mnt_optnew, "nomtime", NULL, NULL) == 0; 402d1fa59e9SXin LI 403d1fa59e9SXin LI /* Do not allow mounts if we do not have enough memory to preserve 404d1fa59e9SXin LI * the minimum reserved pages. */ 405da7aa277SGleb Kurtsou if (tmpfs_mem_avail() < TMPFS_PAGES_MINRESERVED) 406bba7ed20SKonstantin Belousov return (ENOSPC); 407d1fa59e9SXin LI 408d1fa59e9SXin LI /* Get the maximum number of memory pages this file system is 409d1fa59e9SXin LI * allowed to use, based on the maximum size the user passed in 410d1fa59e9SXin LI * the mount structure. A value of zero is treated as if the 411d1fa59e9SXin LI * maximum available space was requested. */ 4120742ebc9SBryan Drewery if (size_max == 0 || size_max > OFF_MAX - PAGE_SIZE || 4130ff93c48SGleb Kurtsou (SIZE_MAX < OFF_MAX && size_max / PAGE_SIZE >= SIZE_MAX)) 414d1fa59e9SXin LI pages = SIZE_MAX; 4150742ebc9SBryan Drewery else { 4160742ebc9SBryan Drewery size_max = roundup(size_max, PAGE_SIZE); 4171df86a32SXin LI pages = howmany(size_max, PAGE_SIZE); 4180742ebc9SBryan Drewery } 419d1fa59e9SXin LI MPASS(pages > 0); 420d1fa59e9SXin LI 421189ee6beSJaakko Heinonen if (nodes_max <= 3) { 4229295c628SGleb Kurtsou if (pages < INT_MAX / nodes_per_page) 4239295c628SGleb Kurtsou nodes_max = pages * nodes_per_page; 424d1fa59e9SXin LI else 4259295c628SGleb Kurtsou nodes_max = INT_MAX; 4260ff93c48SGleb Kurtsou } 4279295c628SGleb Kurtsou if (nodes_max > INT_MAX) 4289295c628SGleb Kurtsou nodes_max = INT_MAX; 4290ff93c48SGleb Kurtsou MPASS(nodes_max >= 3); 430d1fa59e9SXin LI 431d1fa59e9SXin LI /* Allocate the tmpfs mount structure and fill it. */ 432d1fa59e9SXin LI tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), 433d1fa59e9SXin LI M_TMPFSMNT, M_WAITOK | M_ZERO); 434d1fa59e9SXin LI 435280ffa5eSKonstantin Belousov mtx_init(&tmp->tm_allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF); 4360ff93c48SGleb Kurtsou tmp->tm_nodes_max = nodes_max; 437d1fa59e9SXin LI tmp->tm_nodes_inuse = 0; 43864c25043SKonstantin Belousov tmp->tm_refcount = 1; 4390ff93c48SGleb Kurtsou tmp->tm_maxfilesize = maxfilesize > 0 ? maxfilesize : OFF_MAX; 440d1fa59e9SXin LI LIST_INIT(&tmp->tm_nodes_used); 441d1fa59e9SXin LI 442ac1a10efSMaxim Sobolev tmp->tm_size_max = size_max; 443d1fa59e9SXin LI tmp->tm_pages_max = pages; 444d1fa59e9SXin LI tmp->tm_pages_used = 0; 44530e0cf49SMateusz Guzik new_unrhdr64(&tmp->tm_ino_unr, 2); 446c5ab5ce3SJaakko Heinonen tmp->tm_ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 44700ac6a98SKonstantin Belousov tmp->tm_nonc = nonc; 448*c1e84733SKonstantin Belousov tmp->tm_nomtime = nomtime; 449d1fa59e9SXin LI 450d1fa59e9SXin LI /* Allocate the root node. */ 451bba7ed20SKonstantin Belousov error = tmpfs_alloc_node(mp, tmp, VDIR, root_uid, root_gid, 452bba7ed20SKonstantin Belousov root_mode & ALLPERMS, NULL, NULL, VNOVAL, &root); 453d1fa59e9SXin LI 454d1fa59e9SXin LI if (error != 0 || root == NULL) { 455d1fa59e9SXin LI free(tmp, M_TMPFSMNT); 456bba7ed20SKonstantin Belousov return (error); 457d1fa59e9SXin LI } 458fc8fdae0SMatthew D Fleming KASSERT(root->tn_id == 2, 459fc8fdae0SMatthew D Fleming ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 460d1fa59e9SXin LI tmp->tm_root = root; 461d1fa59e9SXin LI 462d1fa59e9SXin LI MNT_ILOCK(mp); 463d1fa59e9SXin LI mp->mnt_flag |= MNT_LOCAL; 464de4e1aebSKonstantin Belousov mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | 4659c04e4c0SMateusz Guzik MNTK_TEXT_REFS | MNTK_NOMSYNC; 466d1fa59e9SXin LI MNT_IUNLOCK(mp); 467d1fa59e9SXin LI 468d1fa59e9SXin LI mp->mnt_data = tmp; 469d1fa59e9SXin LI mp->mnt_stat.f_namemax = MAXNAMLEN; 470d1fa59e9SXin LI vfs_getnewfsid(mp); 471d1fa59e9SXin LI vfs_mountedfrom(mp, "tmpfs"); 472d1fa59e9SXin LI 473d1fa59e9SXin LI return 0; 474d1fa59e9SXin LI } 475d1fa59e9SXin LI 476d1fa59e9SXin LI /* ARGSUSED2 */ 477d1fa59e9SXin LI static int 478dfd233edSAttilio Rao tmpfs_unmount(struct mount *mp, int mntflags) 479d1fa59e9SXin LI { 480d1fa59e9SXin LI struct tmpfs_mount *tmp; 481d1fa59e9SXin LI struct tmpfs_node *node; 4824cda7f7eSKonstantin Belousov int error, flags; 483d1fa59e9SXin LI 4844cda7f7eSKonstantin Belousov flags = (mntflags & MNT_FORCE) != 0 ? FORCECLOSE : 0; 485d1fa59e9SXin LI tmp = VFS_TO_TMPFS(mp); 486d1fa59e9SXin LI 4874cda7f7eSKonstantin Belousov /* Stop writers */ 4884cda7f7eSKonstantin Belousov error = vfs_write_suspend_umnt(mp); 4894cda7f7eSKonstantin Belousov if (error != 0) 4904cda7f7eSKonstantin Belousov return (error); 4914cda7f7eSKonstantin Belousov /* 4924cda7f7eSKonstantin Belousov * At this point, nodes cannot be destroyed by any other 4934cda7f7eSKonstantin Belousov * thread because write suspension is started. 4944cda7f7eSKonstantin Belousov */ 495d1fa59e9SXin LI 4964cda7f7eSKonstantin Belousov for (;;) { 4974cda7f7eSKonstantin Belousov error = vflush(mp, 0, flags, curthread); 4984cda7f7eSKonstantin Belousov if (error != 0) { 4994cda7f7eSKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 5004cda7f7eSKonstantin Belousov return (error); 5014cda7f7eSKonstantin Belousov } 5024cda7f7eSKonstantin Belousov MNT_ILOCK(mp); 5034cda7f7eSKonstantin Belousov if (mp->mnt_nvnodelistsize == 0) { 5044cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 5054cda7f7eSKonstantin Belousov break; 5064cda7f7eSKonstantin Belousov } 5074cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 5084cda7f7eSKonstantin Belousov if ((mntflags & MNT_FORCE) == 0) { 5094cda7f7eSKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 5104cda7f7eSKonstantin Belousov return (EBUSY); 5114cda7f7eSKonstantin Belousov } 5124cda7f7eSKonstantin Belousov } 5134cda7f7eSKonstantin Belousov 5144cda7f7eSKonstantin Belousov TMPFS_LOCK(tmp); 5154cda7f7eSKonstantin Belousov while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 51664c25043SKonstantin Belousov TMPFS_NODE_LOCK(node); 5174fd5efe7SGleb Kurtsou if (node->tn_type == VDIR) 5184fd5efe7SGleb Kurtsou tmpfs_dir_destroy(tmp, node); 51964c25043SKonstantin Belousov if (tmpfs_free_node_locked(tmp, node, true)) 5204cda7f7eSKonstantin Belousov TMPFS_LOCK(tmp); 52164c25043SKonstantin Belousov else 52264c25043SKonstantin Belousov TMPFS_NODE_UNLOCK(node); 52364c25043SKonstantin Belousov } 52464c25043SKonstantin Belousov 52564c25043SKonstantin Belousov mp->mnt_data = NULL; 52664c25043SKonstantin Belousov tmpfs_free_tmp(tmp); 52764c25043SKonstantin Belousov vfs_write_resume(mp, VR_START_WRITE); 52864c25043SKonstantin Belousov 52964c25043SKonstantin Belousov MNT_ILOCK(mp); 53064c25043SKonstantin Belousov mp->mnt_flag &= ~MNT_LOCAL; 53164c25043SKonstantin Belousov MNT_IUNLOCK(mp); 53264c25043SKonstantin Belousov 53364c25043SKonstantin Belousov return (0); 53464c25043SKonstantin Belousov } 53564c25043SKonstantin Belousov 53664c25043SKonstantin Belousov void 53764c25043SKonstantin Belousov tmpfs_free_tmp(struct tmpfs_mount *tmp) 53864c25043SKonstantin Belousov { 53964c25043SKonstantin Belousov 54064c25043SKonstantin Belousov MPASS(tmp->tm_refcount > 0); 54164c25043SKonstantin Belousov tmp->tm_refcount--; 54264c25043SKonstantin Belousov if (tmp->tm_refcount > 0) { 54364c25043SKonstantin Belousov TMPFS_UNLOCK(tmp); 54464c25043SKonstantin Belousov return; 545d1fa59e9SXin LI } 5464cda7f7eSKonstantin Belousov TMPFS_UNLOCK(tmp); 547d1fa59e9SXin LI 548280ffa5eSKonstantin Belousov mtx_destroy(&tmp->tm_allnode_lock); 549d1fa59e9SXin LI MPASS(tmp->tm_pages_used == 0); 5501df86a32SXin LI MPASS(tmp->tm_nodes_inuse == 0); 551d1fa59e9SXin LI 55264c25043SKonstantin Belousov free(tmp, M_TMPFSMNT); 553d1fa59e9SXin LI } 554d1fa59e9SXin LI 555d1fa59e9SXin LI static int 556dfd233edSAttilio Rao tmpfs_root(struct mount *mp, int flags, struct vnode **vpp) 557d1fa59e9SXin LI { 558d1fa59e9SXin LI int error; 559bba7ed20SKonstantin Belousov 560dfd233edSAttilio Rao error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, flags, vpp); 561bba7ed20SKonstantin Belousov if (error == 0) 5627adb1776SXin LI (*vpp)->v_vflag |= VV_ROOT; 563bba7ed20SKonstantin Belousov return (error); 564d1fa59e9SXin LI } 565d1fa59e9SXin LI 566d1fa59e9SXin LI static int 567694a586aSRick Macklem tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, 568694a586aSRick Macklem struct vnode **vpp) 569d1fa59e9SXin LI { 570d1fa59e9SXin LI struct tmpfs_fid *tfhp; 571d1fa59e9SXin LI struct tmpfs_mount *tmp; 572d1fa59e9SXin LI struct tmpfs_node *node; 57364c25043SKonstantin Belousov int error; 574d1fa59e9SXin LI 575d1fa59e9SXin LI tmp = VFS_TO_TMPFS(mp); 576d1fa59e9SXin LI 577d1fa59e9SXin LI tfhp = (struct tmpfs_fid *)fhp; 578d1fa59e9SXin LI if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 57964c25043SKonstantin Belousov return (EINVAL); 580d1fa59e9SXin LI 581d1fa59e9SXin LI if (tfhp->tf_id >= tmp->tm_nodes_max) 58264c25043SKonstantin Belousov return (EINVAL); 583d1fa59e9SXin LI 584d1fa59e9SXin LI TMPFS_LOCK(tmp); 585d1fa59e9SXin LI LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 586d1fa59e9SXin LI if (node->tn_id == tfhp->tf_id && 587d1fa59e9SXin LI node->tn_gen == tfhp->tf_gen) { 58864c25043SKonstantin Belousov tmpfs_ref_node(node); 589d1fa59e9SXin LI break; 590d1fa59e9SXin LI } 591d1fa59e9SXin LI } 592d1fa59e9SXin LI TMPFS_UNLOCK(tmp); 593d1fa59e9SXin LI 59464c25043SKonstantin Belousov if (node != NULL) { 59564c25043SKonstantin Belousov error = tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp); 59664c25043SKonstantin Belousov tmpfs_free_node(tmp, node); 59764c25043SKonstantin Belousov } else 59864c25043SKonstantin Belousov error = EINVAL; 59964c25043SKonstantin Belousov return (error); 600d1fa59e9SXin LI } 601d1fa59e9SXin LI 602d1fa59e9SXin LI /* ARGSUSED2 */ 603d1fa59e9SXin LI static int 604dfd233edSAttilio Rao tmpfs_statfs(struct mount *mp, struct statfs *sbp) 605d1fa59e9SXin LI { 606d1fa59e9SXin LI struct tmpfs_mount *tmp; 607da7aa277SGleb Kurtsou size_t used; 608d1fa59e9SXin LI 609d1fa59e9SXin LI tmp = VFS_TO_TMPFS(mp); 610d1fa59e9SXin LI 611d1fa59e9SXin LI sbp->f_iosize = PAGE_SIZE; 612d1fa59e9SXin LI sbp->f_bsize = PAGE_SIZE; 613d1fa59e9SXin LI 614da7aa277SGleb Kurtsou used = tmpfs_pages_used(tmp); 615ed2159c9SMateusz Guzik if (tmp->tm_pages_max != ULONG_MAX) 616da7aa277SGleb Kurtsou sbp->f_blocks = tmp->tm_pages_max; 617da7aa277SGleb Kurtsou else 618da7aa277SGleb Kurtsou sbp->f_blocks = used + tmpfs_mem_avail(); 619da7aa277SGleb Kurtsou if (sbp->f_blocks <= used) 620da7aa277SGleb Kurtsou sbp->f_bavail = 0; 621da7aa277SGleb Kurtsou else 622da7aa277SGleb Kurtsou sbp->f_bavail = sbp->f_blocks - used; 623da7aa277SGleb Kurtsou sbp->f_bfree = sbp->f_bavail; 624da7aa277SGleb Kurtsou used = tmp->tm_nodes_inuse; 625da7aa277SGleb Kurtsou sbp->f_files = tmp->tm_nodes_max; 626da7aa277SGleb Kurtsou if (sbp->f_files <= used) 627da7aa277SGleb Kurtsou sbp->f_ffree = 0; 628da7aa277SGleb Kurtsou else 629da7aa277SGleb Kurtsou sbp->f_ffree = sbp->f_files - used; 630d1fa59e9SXin LI /* sbp->f_owner = tmp->tn_uid; */ 631d1fa59e9SXin LI 632d1fa59e9SXin LI return 0; 633d1fa59e9SXin LI } 634d1fa59e9SXin LI 6354cda7f7eSKonstantin Belousov static int 6364cda7f7eSKonstantin Belousov tmpfs_sync(struct mount *mp, int waitfor) 6374cda7f7eSKonstantin Belousov { 6384cda7f7eSKonstantin Belousov 6394cda7f7eSKonstantin Belousov if (waitfor == MNT_SUSPEND) { 6404cda7f7eSKonstantin Belousov MNT_ILOCK(mp); 6414cda7f7eSKonstantin Belousov mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; 6424cda7f7eSKonstantin Belousov MNT_IUNLOCK(mp); 643f40cb1c6SKonstantin Belousov } else if (waitfor == MNT_LAZY) { 6445c4ce6faSKonstantin Belousov tmpfs_update_mtime(mp, true); 6454cda7f7eSKonstantin Belousov } 6464cda7f7eSKonstantin Belousov return (0); 6474cda7f7eSKonstantin Belousov } 6484cda7f7eSKonstantin Belousov 649d1fa59e9SXin LI /* 6505f34e93cSMark Johnston * The presence of a susp_clean method tells the VFS to track writes. 6515f34e93cSMark Johnston */ 6525f34e93cSMark Johnston static void 6535f34e93cSMark Johnston tmpfs_susp_clean(struct mount *mp __unused) 6545f34e93cSMark Johnston { 6555f34e93cSMark Johnston } 6565f34e93cSMark Johnston 657a51c8071SKonstantin Belousov static int 658a51c8071SKonstantin Belousov tmpfs_init(struct vfsconf *conf) 659a51c8071SKonstantin Belousov { 660a51c8071SKonstantin Belousov tmpfs_subr_init(); 661a51c8071SKonstantin Belousov return (0); 662a51c8071SKonstantin Belousov } 663a51c8071SKonstantin Belousov 664a51c8071SKonstantin Belousov static int 665a51c8071SKonstantin Belousov tmpfs_uninit(struct vfsconf *conf) 666a51c8071SKonstantin Belousov { 667a51c8071SKonstantin Belousov tmpfs_subr_uninit(); 668a51c8071SKonstantin Belousov return (0); 669a51c8071SKonstantin Belousov } 670a51c8071SKonstantin Belousov 6715f34e93cSMark Johnston /* 672d1fa59e9SXin LI * tmpfs vfs operations. 673d1fa59e9SXin LI */ 674d1fa59e9SXin LI struct vfsops tmpfs_vfsops = { 675d1fa59e9SXin LI .vfs_mount = tmpfs_mount, 676d1fa59e9SXin LI .vfs_unmount = tmpfs_unmount, 6777682d0beSMateusz Guzik .vfs_root = vfs_cache_root, 6787682d0beSMateusz Guzik .vfs_cachedroot = tmpfs_root, 679d1fa59e9SXin LI .vfs_statfs = tmpfs_statfs, 680d1fa59e9SXin LI .vfs_fhtovp = tmpfs_fhtovp, 6814cda7f7eSKonstantin Belousov .vfs_sync = tmpfs_sync, 6825f34e93cSMark Johnston .vfs_susp_clean = tmpfs_susp_clean, 683a51c8071SKonstantin Belousov .vfs_init = tmpfs_init, 684a51c8071SKonstantin Belousov .vfs_uninit = tmpfs_uninit, 685d1fa59e9SXin LI }; 6862454886eSXin LI VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); 687