1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/systm.h> 37 #include <sys/buf.h> 38 #include <sys/conf.h> 39 #include <sys/filedesc.h> 40 #include <sys/ioccom.h> 41 #include <sys/jail.h> 42 #include <sys/mount.h> 43 #include <sys/sx.h> 44 #include <sys/vnode.h> 45 46 #include <security/mac/mac_framework.h> 47 48 #include <ufs/ufs/extattr.h> 49 #include <ufs/ufs/quota.h> 50 #include <ufs/ufs/ufsmount.h> 51 #include <ufs/ufs/inode.h> 52 53 #include <ufs/ffs/fs.h> 54 #include <ufs/ffs/ffs_extern.h> 55 56 static d_open_t ffs_susp_open; 57 static d_write_t ffs_susp_rdwr; 58 static d_ioctl_t ffs_susp_ioctl; 59 60 static struct cdevsw ffs_susp_cdevsw = { 61 .d_version = D_VERSION, 62 .d_open = ffs_susp_open, 63 .d_read = ffs_susp_rdwr, 64 .d_write = ffs_susp_rdwr, 65 .d_ioctl = ffs_susp_ioctl, 66 .d_name = "ffs_susp", 67 }; 68 69 static struct cdev *ffs_susp_dev; 70 static struct sx ffs_susp_lock; 71 72 static int 73 ffs_susp_suspended(struct mount *mp) 74 { 75 struct ufsmount *ump; 76 77 sx_assert(&ffs_susp_lock, SA_LOCKED); 78 79 ump = VFSTOUFS(mp); 80 if ((ump->um_flags & UM_WRITESUSPENDED) != 0) 81 return (1); 82 return (0); 83 } 84 85 static int 86 ffs_susp_open(struct cdev *dev __unused, int flags __unused, 87 int fmt __unused, struct thread *td __unused) 88 { 89 90 return (0); 91 } 92 93 static int 94 ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag) 95 { 96 int error, i; 97 struct vnode *devvp; 98 struct mount *mp; 99 struct ufsmount *ump; 100 struct buf *bp; 101 void *base; 102 size_t len; 103 ssize_t cnt; 104 struct fs *fs; 105 106 sx_slock(&ffs_susp_lock); 107 108 error = devfs_get_cdevpriv((void **)&mp); 109 if (error != 0) { 110 sx_sunlock(&ffs_susp_lock); 111 return (ENXIO); 112 } 113 114 ump = VFSTOUFS(mp); 115 devvp = ump->um_devvp; 116 fs = ump->um_fs; 117 118 if (ffs_susp_suspended(mp) == 0) { 119 sx_sunlock(&ffs_susp_lock); 120 return (ENXIO); 121 } 122 123 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 124 ("neither UIO_READ or UIO_WRITE")); 125 KASSERT(uio->uio_segflg == UIO_USERSPACE, 126 ("uio->uio_segflg != UIO_USERSPACE")); 127 128 cnt = uio->uio_resid; 129 130 for (i = 0; i < uio->uio_iovcnt; i++) { 131 while (uio->uio_iov[i].iov_len) { 132 base = uio->uio_iov[i].iov_base; 133 len = uio->uio_iov[i].iov_len; 134 if (len > fs->fs_bsize) 135 len = fs->fs_bsize; 136 if (fragoff(fs, uio->uio_offset) != 0 || 137 fragoff(fs, len) != 0) { 138 error = EINVAL; 139 goto out; 140 } 141 error = bread(devvp, btodb(uio->uio_offset), len, 142 NOCRED, &bp); 143 if (error != 0) 144 goto out; 145 if (uio->uio_rw == UIO_WRITE) { 146 error = copyin(base, bp->b_data, len); 147 if (error != 0) { 148 bp->b_flags |= B_INVAL | B_NOCACHE; 149 brelse(bp); 150 goto out; 151 } 152 error = bwrite(bp); 153 if (error != 0) 154 goto out; 155 } else { 156 error = copyout(bp->b_data, base, len); 157 brelse(bp); 158 if (error != 0) 159 goto out; 160 } 161 uio->uio_iov[i].iov_base = 162 (char *)uio->uio_iov[i].iov_base + len; 163 uio->uio_iov[i].iov_len -= len; 164 uio->uio_resid -= len; 165 uio->uio_offset += len; 166 } 167 } 168 169 out: 170 sx_sunlock(&ffs_susp_lock); 171 172 if (uio->uio_resid < cnt) 173 return (0); 174 175 return (error); 176 } 177 178 static int 179 ffs_susp_suspend(struct mount *mp) 180 { 181 struct ufsmount *ump; 182 int error; 183 184 sx_assert(&ffs_susp_lock, SA_XLOCKED); 185 186 if (!ffs_own_mount(mp)) 187 return (EINVAL); 188 if (ffs_susp_suspended(mp)) 189 return (EBUSY); 190 191 ump = VFSTOUFS(mp); 192 193 /* 194 * Make sure the calling thread is permitted to access the mounted 195 * device. The permissions can change after we unlock the vnode; 196 * it's harmless. 197 */ 198 vn_lock(ump->um_odevvp, LK_EXCLUSIVE | LK_RETRY); 199 error = VOP_ACCESS(ump->um_odevvp, VREAD | VWRITE, 200 curthread->td_ucred, curthread); 201 VOP_UNLOCK(ump->um_odevvp); 202 if (error != 0) 203 return (error); 204 #ifdef MAC 205 if (mac_mount_check_stat(curthread->td_ucred, mp) != 0) 206 return (EPERM); 207 #endif 208 209 if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0) 210 return (error); 211 212 UFS_LOCK(ump); 213 ump->um_flags |= UM_WRITESUSPENDED; 214 UFS_UNLOCK(ump); 215 216 return (0); 217 } 218 219 static void 220 ffs_susp_unsuspend(struct mount *mp) 221 { 222 struct ufsmount *ump; 223 224 sx_assert(&ffs_susp_lock, SA_XLOCKED); 225 226 /* 227 * XXX: The status is kept per-process; the vfs_write_resume() routine 228 * asserts that the resuming thread is the same one that called 229 * vfs_write_suspend(). The cdevpriv data, however, is attached 230 * to the file descriptor, e.g. is inherited during fork. Thus, 231 * it's possible that the resuming process will be different from 232 * the one that started the suspension. 233 * 234 * Work around by fooling the check in vfs_write_resume(). 235 */ 236 mp->mnt_susp_owner = curthread; 237 238 vfs_write_resume(mp, 0); 239 ump = VFSTOUFS(mp); 240 UFS_LOCK(ump); 241 ump->um_flags &= ~UM_WRITESUSPENDED; 242 UFS_UNLOCK(ump); 243 vfs_unbusy(mp); 244 } 245 246 static void 247 ffs_susp_dtor(void *data) 248 { 249 struct fs *fs; 250 struct ufsmount *ump; 251 struct mount *mp; 252 int error; 253 254 sx_xlock(&ffs_susp_lock); 255 256 mp = (struct mount *)data; 257 ump = VFSTOUFS(mp); 258 fs = ump->um_fs; 259 260 if (ffs_susp_suspended(mp) == 0) { 261 sx_xunlock(&ffs_susp_lock); 262 return; 263 } 264 265 KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0, 266 ("MNTK_SUSPEND not set")); 267 268 error = ffs_reload(mp, FFSR_FORCE | FFSR_UNSUSPEND); 269 if (error != 0) 270 panic("failed to unsuspend writes on %s", fs->fs_fsmnt); 271 272 ffs_susp_unsuspend(mp); 273 sx_xunlock(&ffs_susp_lock); 274 } 275 276 static int 277 ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 278 struct thread *td) 279 { 280 struct mount *mp; 281 fsid_t *fsidp; 282 int error; 283 284 /* 285 * No suspend inside the jail. Allowing it would require making 286 * sure that e.g. the devfs ruleset for that jail permits access 287 * to the devvp. 288 */ 289 if (jailed(td->td_ucred)) 290 return (EPERM); 291 292 sx_xlock(&ffs_susp_lock); 293 294 switch (cmd) { 295 case UFSSUSPEND: 296 fsidp = (fsid_t *)addr; 297 mp = vfs_getvfs(fsidp); 298 if (mp == NULL) { 299 error = ENOENT; 300 break; 301 } 302 error = vfs_busy(mp, 0); 303 vfs_rel(mp); 304 if (error != 0) 305 break; 306 307 /* 308 * Require single-thread curproc so that the check is not racey. 309 * XXXKIB: might consider to singlethread curproc instead. 310 */ 311 error = curproc->p_numthreads > 1 ? EDEADLK : 312 descrip_check_write_mp(curproc->p_fd, mp); 313 if (error != 0) { 314 vfs_unbusy(mp); 315 break; 316 } 317 318 error = ffs_susp_suspend(mp); 319 if (error != 0) { 320 vfs_unbusy(mp); 321 break; 322 } 323 error = devfs_set_cdevpriv(mp, ffs_susp_dtor); 324 if (error != 0) 325 ffs_susp_unsuspend(mp); 326 break; 327 case UFSRESUME: 328 error = devfs_get_cdevpriv((void **)&mp); 329 if (error != 0) 330 break; 331 /* 332 * This calls ffs_susp_dtor, which in turn unsuspends the fs. 333 * The dtor expects to be called without lock held, because 334 * sometimes it's called from here, and sometimes due to the 335 * file being closed or process exiting. 336 */ 337 sx_xunlock(&ffs_susp_lock); 338 devfs_clear_cdevpriv(); 339 return (0); 340 default: 341 error = ENXIO; 342 break; 343 } 344 345 sx_xunlock(&ffs_susp_lock); 346 347 return (error); 348 } 349 350 void 351 ffs_susp_initialize(void) 352 { 353 354 sx_init(&ffs_susp_lock, "ffs_susp"); 355 ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 356 "ufssuspend"); 357 } 358 359 void 360 ffs_susp_uninitialize(void) 361 { 362 363 destroy_dev(ffs_susp_dev); 364 sx_destroy(&ffs_susp_lock); 365 } 366