xref: /freebsd/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3  * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33 #include <sys/mount.h>
34 #include <sys/cred.h>
35 #include <sys/vfs.h>
36 #include <sys/priv.h>
37 #include <sys/libkern.h>
38 
39 #include <sys/mutex.h>
40 #include <sys/vnode.h>
41 #include <sys/taskq.h>
42 
43 #include <sys/ccompat.h>
44 
45 MALLOC_DECLARE(M_MOUNT);
46 
47 void
vfs_setmntopt(vfs_t * vfsp,const char * name,const char * arg,int flags __unused)48 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
49     int flags __unused)
50 {
51 	struct vfsopt *opt;
52 	size_t namesize;
53 	int locked;
54 
55 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
56 		MNT_ILOCK(vfsp);
57 
58 	if (vfsp->mnt_opt == NULL) {
59 		void *opts;
60 
61 		MNT_IUNLOCK(vfsp);
62 		opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
63 		MNT_ILOCK(vfsp);
64 		if (vfsp->mnt_opt == NULL) {
65 			vfsp->mnt_opt = opts;
66 			TAILQ_INIT(vfsp->mnt_opt);
67 		} else {
68 			free(opts, M_MOUNT);
69 		}
70 	}
71 
72 	MNT_IUNLOCK(vfsp);
73 
74 	opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
75 	namesize = strlen(name) + 1;
76 	opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
77 	strlcpy(opt->name, name, namesize);
78 	opt->pos = -1;
79 	opt->seen = 1;
80 	if (arg == NULL) {
81 		opt->value = NULL;
82 		opt->len = 0;
83 	} else {
84 		opt->len = strlen(arg) + 1;
85 		opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
86 		memcpy(opt->value, arg, opt->len);
87 	}
88 
89 	MNT_ILOCK(vfsp);
90 	TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
91 	if (!locked)
92 		MNT_IUNLOCK(vfsp);
93 }
94 
95 void
vfs_clearmntopt(vfs_t * vfsp,const char * name)96 vfs_clearmntopt(vfs_t *vfsp, const char *name)
97 {
98 	int locked;
99 
100 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
101 		MNT_ILOCK(vfsp);
102 	vfs_deleteopt(vfsp->mnt_opt, name);
103 	if (!locked)
104 		MNT_IUNLOCK(vfsp);
105 }
106 
107 int
vfs_optionisset(const vfs_t * vfsp,const char * opt,char ** argp)108 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
109 {
110 	struct vfsoptlist *opts = vfsp->mnt_optnew;
111 	int error;
112 
113 	if (opts == NULL)
114 		return (0);
115 	error = vfs_getopt(opts, opt, (void **)argp, NULL);
116 	return (error != 0 ? 0 : 1);
117 }
118 
119 int
mount_snapshot(kthread_t * td,vnode_t ** vpp,const char * fstype,char * fspath,char * fspec,int fsflags,vfs_t * parent_vfsp)120 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
121     char *fspec, int fsflags, vfs_t *parent_vfsp)
122 {
123 	struct vfsconf *vfsp;
124 	struct mount *mp;
125 	vnode_t *vp, *mvp;
126 	int error;
127 
128 	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
129 
130 	vp = *vpp;
131 	*vpp = NULL;
132 	error = 0;
133 
134 	/*
135 	 * Be ultra-paranoid about making sure the type and fspath
136 	 * variables will fit in our mp buffers, including the
137 	 * terminating NUL.
138 	 */
139 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
140 		error = ENAMETOOLONG;
141 	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
142 		error = ENODEV;
143 	if (error == 0 && vp->v_type != VDIR)
144 		error = ENOTDIR;
145 	/*
146 	 * We need vnode lock to protect v_mountedhere and vnode interlock
147 	 * to protect v_iflag.
148 	 */
149 	if (error == 0) {
150 		VI_LOCK(vp);
151 		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
152 			vp->v_iflag |= VI_MOUNT;
153 		else
154 			error = EBUSY;
155 		VI_UNLOCK(vp);
156 	}
157 	if (error != 0) {
158 		vput(vp);
159 		return (error);
160 	}
161 	vn_seqc_write_begin(vp);
162 	VOP_UNLOCK(vp);
163 
164 	/*
165 	 * Allocate and initialize the filesystem.
166 	 * We don't want regular user that triggered snapshot mount to be able
167 	 * to unmount it, so pass credentials of the parent mount.
168 	 */
169 	mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
170 
171 	mp->mnt_optnew = NULL;
172 	vfs_setmntopt(mp, "from", fspec, 0);
173 	mp->mnt_optnew = mp->mnt_opt;
174 	mp->mnt_opt = NULL;
175 
176 	/*
177 	 * Set the mount level flags.
178 	 */
179 	mp->mnt_flag = fsflags & MNT_UPDATEMASK;
180 	/*
181 	 * Snapshots are always read-only.
182 	 */
183 	mp->mnt_flag |= MNT_RDONLY;
184 	/*
185 	 * We don't want snapshots to allow access to vulnerable setuid
186 	 * programs, so we turn off setuid when mounting snapshots.
187 	 */
188 	mp->mnt_flag |= MNT_NOSUID;
189 	/*
190 	 * We don't want snapshots to be visible in regular
191 	 * mount(8) and df(1) output.
192 	 */
193 	mp->mnt_flag |= MNT_IGNORE;
194 
195 	error = VFS_MOUNT(mp);
196 	if (error != 0) {
197 		/*
198 		 * Clear VI_MOUNT and decrement the use count "atomically",
199 		 * under the vnode lock.  This is not strictly required,
200 		 * but makes it easier to reason about the life-cycle and
201 		 * ownership of the covered vnode.
202 		 */
203 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
204 		VI_LOCK(vp);
205 		vp->v_iflag &= ~VI_MOUNT;
206 		VI_UNLOCK(vp);
207 		vn_seqc_write_end(vp);
208 		vput(vp);
209 		vfs_unbusy(mp);
210 		vfs_freeopts(mp->mnt_optnew);
211 		mp->mnt_vnodecovered = NULL;
212 		vfs_mount_destroy(mp);
213 		return (error);
214 	}
215 
216 	if (mp->mnt_opt != NULL)
217 		vfs_freeopts(mp->mnt_opt);
218 	mp->mnt_opt = mp->mnt_optnew;
219 	(void) VFS_STATFS(mp, &mp->mnt_stat);
220 
221 #ifdef VFS_SUPPORTS_EXJAIL_CLONE
222 	/*
223 	 * Clone the mnt_exjail credentials of the parent, as required.
224 	 */
225 	vfs_exjail_clone(parent_vfsp, mp);
226 #endif
227 
228 	/*
229 	 * Prevent external consumers of mount options from reading
230 	 * mnt_optnew.
231 	 */
232 	mp->mnt_optnew = NULL;
233 
234 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
235 #ifdef FREEBSD_NAMECACHE
236 	cache_purge(vp);
237 #endif
238 	VI_LOCK(vp);
239 	vp->v_iflag &= ~VI_MOUNT;
240 #ifdef VIRF_MOUNTPOINT
241 	vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
242 #endif
243 	vp->v_mountedhere = mp;
244 	VI_UNLOCK(vp);
245 	/* Put the new filesystem on the mount list. */
246 	mtx_lock(&mountlist_mtx);
247 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
248 	mtx_unlock(&mountlist_mtx);
249 	vfs_event_signal(NULL, VQ_MOUNT, 0);
250 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
251 		panic("mount: lost mount");
252 	vn_seqc_write_end(vp);
253 	VOP_UNLOCK(vp);
254 	vfs_op_exit(mp);
255 	vfs_unbusy(mp);
256 	*vpp = mvp;
257 	return (0);
258 }
259 
260 static void
vrele_task_runner(void * vp)261 vrele_task_runner(void *vp)
262 {
263 	vrele((vnode_t *)vp);
264 }
265 
266 /*
267  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
268  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
269  * the file system as a result of releasing the vnode. Note, file systems
270  * already have to handle the race where the vnode is incremented before the
271  * inactive routine is called and does its locking.
272  *
273  * Warning: Excessive use of this routine can lead to performance problems.
274  * This is because taskqs throttle back allocation if too many are created.
275  */
276 void
vn_rele_async(vnode_t * vp,taskq_t * taskq)277 vn_rele_async(vnode_t *vp, taskq_t *taskq)
278 {
279 	VERIFY3U(vp->v_usecount, >, 0);
280 	if (refcount_release_if_not_last(&vp->v_usecount))
281 		return;
282 	VERIFY3U(taskq_dispatch((taskq_t *)taskq, vrele_task_runner, vp,
283 	    TQ_SLEEP), !=, 0);
284 }
285