xref: /freebsd/sys/ufs/ffs/ffs_suspend.c (revision cdc58367265a2bd6e8f913db2bdc591699ee229f)
1 /*-
2  * Copyright (c) 2012 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/buf.h>
38 #include <sys/ioccom.h>
39 #include <sys/mount.h>
40 #include <sys/vnode.h>
41 #include <sys/conf.h>
42 #include <sys/jail.h>
43 #include <sys/sx.h>
44 
45 #include <security/mac/mac_framework.h>
46 
47 #include <ufs/ufs/extattr.h>
48 #include <ufs/ufs/quota.h>
49 #include <ufs/ufs/ufsmount.h>
50 #include <ufs/ufs/inode.h>
51 
52 #include <ufs/ffs/fs.h>
53 #include <ufs/ffs/ffs_extern.h>
54 
55 static d_open_t ffs_susp_open;
56 static d_write_t ffs_susp_rdwr;
57 static d_ioctl_t ffs_susp_ioctl;
58 
59 static struct cdevsw ffs_susp_cdevsw = {
60 	.d_version =	D_VERSION,
61 	.d_open =	ffs_susp_open,
62 	.d_read =	ffs_susp_rdwr,
63 	.d_write =	ffs_susp_rdwr,
64 	.d_ioctl =	ffs_susp_ioctl,
65 	.d_name =	"ffs_susp",
66 };
67 
68 static struct cdev *ffs_susp_dev;
69 static struct sx ffs_susp_lock;
70 
71 static int
72 ffs_susp_suspended(struct mount *mp)
73 {
74 	struct ufsmount *ump;
75 
76 	sx_assert(&ffs_susp_lock, SA_LOCKED);
77 
78 	ump = VFSTOUFS(mp);
79 	if (ump->um_writesuspended)
80 		return (1);
81 	return (0);
82 }
83 
84 static int
85 ffs_susp_open(struct cdev *dev __unused, int flags __unused,
86     int fmt __unused, struct thread *td __unused)
87 {
88 
89 	return (0);
90 }
91 
92 static int
93 ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag)
94 {
95 	int error, i;
96 	struct vnode *devvp;
97 	struct mount *mp;
98 	struct ufsmount *ump;
99 	struct buf *bp;
100 	void *base;
101 	size_t len;
102 	ssize_t cnt;
103 	struct fs *fs;
104 
105 	sx_slock(&ffs_susp_lock);
106 
107 	error = devfs_get_cdevpriv((void **)&mp);
108 	if (error != 0) {
109 		sx_sunlock(&ffs_susp_lock);
110 		return (ENXIO);
111 	}
112 
113 	ump = VFSTOUFS(mp);
114 	devvp = ump->um_devvp;
115 	fs = ump->um_fs;
116 
117 	if (ffs_susp_suspended(mp) == 0) {
118 		sx_sunlock(&ffs_susp_lock);
119 		return (ENXIO);
120 	}
121 
122 	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
123 	    ("neither UIO_READ or UIO_WRITE"));
124 	KASSERT(uio->uio_segflg == UIO_USERSPACE,
125 	    ("uio->uio_segflg != UIO_USERSPACE"));
126 
127 	cnt = uio->uio_resid;
128 
129 	for (i = 0; i < uio->uio_iovcnt; i++) {
130 		while (uio->uio_iov[i].iov_len) {
131 			base = uio->uio_iov[i].iov_base;
132 			len = uio->uio_iov[i].iov_len;
133 			if (len > fs->fs_bsize)
134 				len = fs->fs_bsize;
135 			if (fragoff(fs, uio->uio_offset) != 0 ||
136 			    fragoff(fs, len) != 0) {
137 				error = EINVAL;
138 				goto out;
139 			}
140 			error = bread(devvp, btodb(uio->uio_offset), len,
141 			    NOCRED, &bp);
142 			if (error != 0)
143 				goto out;
144 			if (uio->uio_rw == UIO_WRITE) {
145 				error = copyin(base, bp->b_data, len);
146 				if (error != 0) {
147 					bp->b_flags |= B_INVAL | B_NOCACHE;
148 					brelse(bp);
149 					goto out;
150 				}
151 				error = bwrite(bp);
152 				if (error != 0)
153 					goto out;
154 			} else {
155 				error = copyout(bp->b_data, base, len);
156 				brelse(bp);
157 				if (error != 0)
158 					goto out;
159 			}
160 			uio->uio_iov[i].iov_base =
161 			    (char *)uio->uio_iov[i].iov_base + len;
162 			uio->uio_iov[i].iov_len -= len;
163 			uio->uio_resid -= len;
164 			uio->uio_offset += len;
165 		}
166 	}
167 
168 out:
169 	sx_sunlock(&ffs_susp_lock);
170 
171 	if (uio->uio_resid < cnt)
172 		return (0);
173 
174 	return (error);
175 }
176 
177 static int
178 ffs_susp_suspend(struct mount *mp)
179 {
180 	struct fs *fs;
181 	struct ufsmount *ump;
182 	int error;
183 
184 	sx_assert(&ffs_susp_lock, SA_XLOCKED);
185 
186 	if (!ffs_own_mount(mp))
187 		return (EINVAL);
188 	if (ffs_susp_suspended(mp))
189 		return (EBUSY);
190 
191 	ump = VFSTOUFS(mp);
192 	fs = ump->um_fs;
193 
194 	/*
195 	 * Make sure the calling thread is permitted to access the mounted
196 	 * device.  The permissions can change after we unlock the vnode;
197 	 * it's harmless.
198 	 */
199 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
200 	error = VOP_ACCESS(ump->um_devvp, VREAD | VWRITE,
201 	    curthread->td_ucred, curthread);
202 	VOP_UNLOCK(ump->um_devvp, 0);
203 	if (error != 0)
204 		return (error);
205 #ifdef MAC
206 	if (mac_mount_check_stat(curthread->td_ucred, mp) != 0)
207 		return (EPERM);
208 #endif
209 
210 	if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0)
211 		return (error);
212 
213 	ump->um_writesuspended = 1;
214 
215 	return (0);
216 }
217 
218 static void
219 ffs_susp_dtor(void *data)
220 {
221 	struct fs *fs;
222 	struct ufsmount *ump;
223 	struct mount *mp;
224 	int error;
225 
226 	sx_xlock(&ffs_susp_lock);
227 
228 	mp = (struct mount *)data;
229 	ump = VFSTOUFS(mp);
230 	fs = ump->um_fs;
231 
232 	if (ffs_susp_suspended(mp) == 0) {
233 		sx_xunlock(&ffs_susp_lock);
234 		return;
235 	}
236 
237 	KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0,
238 	    ("MNTK_SUSPEND not set"));
239 
240 	error = ffs_reload(mp, curthread, 1);
241 	if (error != 0)
242 		panic("failed to unsuspend writes on %s", fs->fs_fsmnt);
243 
244 	/*
245 	 * XXX: The status is kept per-process; the vfs_write_resume() routine
246 	 * 	asserts that the resuming thread is the same one that called
247 	 * 	vfs_write_suspend().  The cdevpriv data, however, is attached
248 	 * 	to the file descriptor, e.g. is inherited during fork.  Thus,
249 	 * 	it's possible that the resuming process will be different from
250 	 * 	the one that started the suspension.
251 	 *
252 	 * 	Work around by fooling the check in vfs_write_resume().
253 	 */
254 	mp->mnt_susp_owner = curthread;
255 
256 	vfs_write_resume(mp, 0);
257 	vfs_unbusy(mp);
258 	ump->um_writesuspended = 0;
259 
260 	sx_xunlock(&ffs_susp_lock);
261 }
262 
263 static int
264 ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
265     struct thread *td)
266 {
267 	struct mount *mp;
268 	fsid_t *fsidp;
269 	int error;
270 
271 	/*
272 	 * No suspend inside the jail.  Allowing it would require making
273 	 * sure that e.g. the devfs ruleset for that jail permits access
274 	 * to the devvp.
275 	 */
276 	if (jailed(td->td_ucred))
277 		return (EPERM);
278 
279 	sx_xlock(&ffs_susp_lock);
280 
281 	switch (cmd) {
282 	case UFSSUSPEND:
283 		fsidp = (fsid_t *)addr;
284 		mp = vfs_getvfs(fsidp);
285 		if (mp == NULL) {
286 			error = ENOENT;
287 			break;
288 		}
289 		error = vfs_busy(mp, 0);
290 		vfs_rel(mp);
291 		if (error != 0)
292 			break;
293 		error = ffs_susp_suspend(mp);
294 		if (error != 0) {
295 			vfs_unbusy(mp);
296 			break;
297 		}
298 		error = devfs_set_cdevpriv(mp, ffs_susp_dtor);
299 		KASSERT(error == 0, ("devfs_set_cdevpriv failed"));
300 		break;
301 	case UFSRESUME:
302 		error = devfs_get_cdevpriv((void **)&mp);
303 		if (error != 0)
304 			break;
305 		/*
306 		 * This calls ffs_susp_dtor, which in turn unsuspends the fs.
307 		 * The dtor expects to be called without lock held, because
308 		 * sometimes it's called from here, and sometimes due to the
309 		 * file being closed or process exiting.
310 		 */
311 		sx_xunlock(&ffs_susp_lock);
312 		devfs_clear_cdevpriv();
313 		return (0);
314 	default:
315 		error = ENXIO;
316 		break;
317 	}
318 
319 	sx_xunlock(&ffs_susp_lock);
320 
321 	return (error);
322 }
323 
324 void
325 ffs_susp_initialize(void)
326 {
327 
328 	sx_init(&ffs_susp_lock, "ffs_susp");
329 	ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
330 	    "ufssuspend");
331 }
332 
333 void
334 ffs_susp_uninitialize(void)
335 {
336 
337 	destroy_dev(ffs_susp_dev);
338 	sx_destroy(&ffs_susp_lock);
339 }
340