1 /*
2 * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/kernel.h>
30 #include <sys/systm.h>
31 #include <sys/malloc.h>
32 #include <sys/mount.h>
33 #include <sys/cred.h>
34 #include <sys/vfs.h>
35 #include <sys/priv.h>
36 #include <sys/libkern.h>
37
38 #include <sys/mutex.h>
39 #include <sys/vnode.h>
40 #include <sys/taskq.h>
41
42 #include <sys/ccompat.h>
43
44 MALLOC_DECLARE(M_MOUNT);
45
46 void
vfs_setmntopt(vfs_t * vfsp,const char * name,const char * arg,int flags __unused)47 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
48 int flags __unused)
49 {
50 struct vfsopt *opt;
51 size_t namesize;
52 int locked;
53
54 if (!(locked = mtx_owned(MNT_MTX(vfsp))))
55 MNT_ILOCK(vfsp);
56
57 if (vfsp->mnt_opt == NULL) {
58 void *opts;
59
60 MNT_IUNLOCK(vfsp);
61 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
62 MNT_ILOCK(vfsp);
63 if (vfsp->mnt_opt == NULL) {
64 vfsp->mnt_opt = opts;
65 TAILQ_INIT(vfsp->mnt_opt);
66 } else {
67 free(opts, M_MOUNT);
68 }
69 }
70
71 MNT_IUNLOCK(vfsp);
72
73 opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
74 namesize = strlen(name) + 1;
75 opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
76 strlcpy(opt->name, name, namesize);
77 opt->pos = -1;
78 opt->seen = 1;
79 if (arg == NULL) {
80 opt->value = NULL;
81 opt->len = 0;
82 } else {
83 opt->len = strlen(arg) + 1;
84 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
85 memcpy(opt->value, arg, opt->len);
86 }
87
88 MNT_ILOCK(vfsp);
89 TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
90 if (!locked)
91 MNT_IUNLOCK(vfsp);
92 }
93
94 void
vfs_clearmntopt(vfs_t * vfsp,const char * name)95 vfs_clearmntopt(vfs_t *vfsp, const char *name)
96 {
97 int locked;
98
99 if (!(locked = mtx_owned(MNT_MTX(vfsp))))
100 MNT_ILOCK(vfsp);
101 vfs_deleteopt(vfsp->mnt_opt, name);
102 if (!locked)
103 MNT_IUNLOCK(vfsp);
104 }
105
106 int
vfs_optionisset(const vfs_t * vfsp,const char * opt,char ** argp)107 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
108 {
109 struct vfsoptlist *opts = vfsp->mnt_optnew;
110 int error;
111
112 if (opts == NULL)
113 return (0);
114 error = vfs_getopt(opts, opt, (void **)argp, NULL);
115 return (error != 0 ? 0 : 1);
116 }
117
118 int
mount_snapshot(kthread_t * td,vnode_t ** vpp,const char * fstype,char * fspath,char * fspec,int fsflags,vfs_t * parent_vfsp)119 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
120 char *fspec, int fsflags, vfs_t *parent_vfsp)
121 {
122 struct vfsconf *vfsp;
123 struct mount *mp;
124 vnode_t *vp, *mvp;
125 int error;
126
127 ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
128
129 vp = *vpp;
130 *vpp = NULL;
131 error = 0;
132
133 /*
134 * Be ultra-paranoid about making sure the type and fspath
135 * variables will fit in our mp buffers, including the
136 * terminating NUL.
137 */
138 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
139 error = ENAMETOOLONG;
140 if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
141 error = ENODEV;
142 if (error == 0 && vp->v_type != VDIR)
143 error = ENOTDIR;
144 /*
145 * We need vnode lock to protect v_mountedhere and vnode interlock
146 * to protect v_iflag.
147 */
148 if (error == 0) {
149 VI_LOCK(vp);
150 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
151 vp->v_iflag |= VI_MOUNT;
152 else
153 error = EBUSY;
154 VI_UNLOCK(vp);
155 }
156 if (error != 0) {
157 vput(vp);
158 return (error);
159 }
160 vn_seqc_write_begin(vp);
161 VOP_UNLOCK(vp);
162
163 /*
164 * Allocate and initialize the filesystem.
165 * We don't want regular user that triggered snapshot mount to be able
166 * to unmount it, so pass credentials of the parent mount.
167 */
168 mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
169
170 mp->mnt_optnew = NULL;
171 vfs_setmntopt(mp, "from", fspec, 0);
172 mp->mnt_optnew = mp->mnt_opt;
173 mp->mnt_opt = NULL;
174
175 /*
176 * Set the mount level flags.
177 */
178 mp->mnt_flag = fsflags & MNT_UPDATEMASK;
179 /*
180 * Snapshots are always read-only.
181 */
182 mp->mnt_flag |= MNT_RDONLY;
183 /*
184 * We don't want snapshots to allow access to vulnerable setuid
185 * programs, so we turn off setuid when mounting snapshots.
186 */
187 mp->mnt_flag |= MNT_NOSUID;
188 /*
189 * We don't want snapshots to be visible in regular
190 * mount(8) and df(1) output.
191 */
192 mp->mnt_flag |= MNT_IGNORE;
193
194 error = VFS_MOUNT(mp);
195 if (error != 0) {
196 /*
197 * Clear VI_MOUNT and decrement the use count "atomically",
198 * under the vnode lock. This is not strictly required,
199 * but makes it easier to reason about the life-cycle and
200 * ownership of the covered vnode.
201 */
202 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
203 VI_LOCK(vp);
204 vp->v_iflag &= ~VI_MOUNT;
205 VI_UNLOCK(vp);
206 vn_seqc_write_end(vp);
207 vput(vp);
208 vfs_unbusy(mp);
209 vfs_freeopts(mp->mnt_optnew);
210 mp->mnt_vnodecovered = NULL;
211 vfs_mount_destroy(mp);
212 return (error);
213 }
214
215 if (mp->mnt_opt != NULL)
216 vfs_freeopts(mp->mnt_opt);
217 mp->mnt_opt = mp->mnt_optnew;
218 (void) VFS_STATFS(mp, &mp->mnt_stat);
219
220 #ifdef VFS_SUPPORTS_EXJAIL_CLONE
221 /*
222 * Clone the mnt_exjail credentials of the parent, as required.
223 */
224 vfs_exjail_clone(parent_vfsp, mp);
225 #endif
226
227 /*
228 * Prevent external consumers of mount options from reading
229 * mnt_optnew.
230 */
231 mp->mnt_optnew = NULL;
232
233 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
234 #ifdef FREEBSD_NAMECACHE
235 cache_purge(vp);
236 #endif
237 VI_LOCK(vp);
238 vp->v_iflag &= ~VI_MOUNT;
239 #ifdef VIRF_MOUNTPOINT
240 vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
241 #endif
242 vp->v_mountedhere = mp;
243 VI_UNLOCK(vp);
244 /* Put the new filesystem on the mount list. */
245 mtx_lock(&mountlist_mtx);
246 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
247 mtx_unlock(&mountlist_mtx);
248 vfs_event_signal(NULL, VQ_MOUNT, 0);
249 if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
250 panic("mount: lost mount");
251 vn_seqc_write_end(vp);
252 VOP_UNLOCK(vp);
253 vfs_op_exit(mp);
254 vfs_unbusy(mp);
255 *vpp = mvp;
256 return (0);
257 }
258
259 /*
260 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
261 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
262 * the file system as a result of releasing the vnode. Note, file systems
263 * already have to handle the race where the vnode is incremented before the
264 * inactive routine is called and does its locking.
265 *
266 * Warning: Excessive use of this routine can lead to performance problems.
267 * This is because taskqs throttle back allocation if too many are created.
268 */
269 void
vn_rele_async(vnode_t * vp,taskq_t * taskq)270 vn_rele_async(vnode_t *vp, taskq_t *taskq)
271 {
272 VERIFY3U(vp->v_usecount, >, 0);
273 if (refcount_release_if_not_last(&vp->v_usecount))
274 return;
275 VERIFY3U(taskq_dispatch((taskq_t *)taskq,
276 (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
277 }
278