1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/filedesc.h> 36 #include <sys/ioccom.h> 37 #include <sys/jail.h> 38 #include <sys/mount.h> 39 #include <sys/sx.h> 40 #include <sys/vnode.h> 41 42 #include <security/mac/mac_framework.h> 43 44 #include <ufs/ufs/extattr.h> 45 #include <ufs/ufs/quota.h> 46 #include <ufs/ufs/ufsmount.h> 47 #include <ufs/ufs/inode.h> 48 49 #include <ufs/ffs/fs.h> 50 #include <ufs/ffs/ffs_extern.h> 51 52 static d_open_t ffs_susp_open; 53 static d_write_t ffs_susp_rdwr; 54 static d_ioctl_t ffs_susp_ioctl; 55 56 static struct cdevsw ffs_susp_cdevsw = { 57 .d_version = D_VERSION, 58 .d_open = ffs_susp_open, 59 .d_read = ffs_susp_rdwr, 60 .d_write = ffs_susp_rdwr, 61 .d_ioctl = ffs_susp_ioctl, 62 .d_name = "ffs_susp", 63 }; 64 65 static struct cdev *ffs_susp_dev; 66 static struct sx ffs_susp_lock; 67 68 static int 69 ffs_susp_suspended(struct mount *mp) 70 { 71 struct ufsmount *ump; 72 73 sx_assert(&ffs_susp_lock, SA_LOCKED); 74 75 ump = VFSTOUFS(mp); 76 if ((ump->um_flags & UM_WRITESUSPENDED) != 0) 77 return (1); 78 return (0); 79 } 80 81 static int 82 ffs_susp_open(struct cdev *dev __unused, int flags __unused, 83 int fmt __unused, struct thread *td __unused) 84 { 85 86 return (0); 87 } 88 89 static int 90 ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag) 91 { 92 int error, i; 93 struct vnode *devvp; 94 struct mount *mp; 95 struct ufsmount *ump; 96 struct buf *bp; 97 void *base; 98 size_t len; 99 ssize_t cnt; 100 struct fs *fs; 101 102 sx_slock(&ffs_susp_lock); 103 104 error = devfs_get_cdevpriv((void **)&mp); 105 if (error != 0) { 106 sx_sunlock(&ffs_susp_lock); 107 return (ENXIO); 108 } 109 110 ump = VFSTOUFS(mp); 111 devvp = ump->um_devvp; 112 fs = ump->um_fs; 113 114 if (ffs_susp_suspended(mp) == 0) { 115 sx_sunlock(&ffs_susp_lock); 116 return (ENXIO); 117 } 118 119 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 120 ("neither UIO_READ or UIO_WRITE")); 121 KASSERT(uio->uio_segflg == UIO_USERSPACE, 122 ("uio->uio_segflg != UIO_USERSPACE")); 123 124 cnt = uio->uio_resid; 125 126 for (i = 0; i < uio->uio_iovcnt; i++) { 127 while (uio->uio_iov[i].iov_len) { 128 base = uio->uio_iov[i].iov_base; 129 len = uio->uio_iov[i].iov_len; 130 if (len > fs->fs_bsize) 131 len = fs->fs_bsize; 132 if (fragoff(fs, uio->uio_offset) != 0 || 133 fragoff(fs, len) != 0) { 134 error = EINVAL; 135 goto out; 136 } 137 error = bread(devvp, btodb(uio->uio_offset), len, 138 NOCRED, &bp); 139 if (error != 0) 140 goto out; 141 switch (uio->uio_rw) { 142 case UIO_WRITE: 143 error = copyin(base, bp->b_data, len); 144 if (error != 0) { 145 bp->b_flags |= B_INVAL | B_NOCACHE; 146 brelse(bp); 147 goto out; 148 } 149 error = bwrite(bp); 150 if (error != 0) 151 goto out; 152 break; 153 case UIO_READ: 154 error = copyout(bp->b_data, base, len); 155 brelse(bp); 156 if (error != 0) 157 goto out; 158 break; 159 } 160 uio->uio_iov[i].iov_base = 161 (char *)uio->uio_iov[i].iov_base + len; 162 uio->uio_iov[i].iov_len -= len; 163 uio->uio_resid -= len; 164 uio->uio_offset += len; 165 } 166 } 167 168 out: 169 sx_sunlock(&ffs_susp_lock); 170 171 if (uio->uio_resid < cnt) 172 return (0); 173 174 return (error); 175 } 176 177 static int 178 ffs_susp_suspend(struct mount *mp) 179 { 180 struct ufsmount *ump; 181 int error; 182 183 sx_assert(&ffs_susp_lock, SA_XLOCKED); 184 185 if (!ffs_own_mount(mp)) 186 return (EINVAL); 187 if (ffs_susp_suspended(mp)) 188 return (EBUSY); 189 190 ump = VFSTOUFS(mp); 191 192 /* 193 * Make sure the calling thread is permitted to access the mounted 194 * device. The permissions can change after we unlock the vnode; 195 * it's harmless. 196 */ 197 vn_lock(ump->um_odevvp, LK_EXCLUSIVE | LK_RETRY); 198 error = VOP_ACCESS(ump->um_odevvp, VREAD | VWRITE, 199 curthread->td_ucred, curthread); 200 VOP_UNLOCK(ump->um_odevvp); 201 if (error != 0) 202 return (error); 203 #ifdef MAC 204 if (mac_mount_check_stat(curthread->td_ucred, mp) != 0) 205 return (EPERM); 206 #endif 207 208 if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0) 209 return (error); 210 211 UFS_LOCK(ump); 212 ump->um_flags |= UM_WRITESUSPENDED; 213 UFS_UNLOCK(ump); 214 215 return (0); 216 } 217 218 static void 219 ffs_susp_unsuspend(struct mount *mp) 220 { 221 struct ufsmount *ump; 222 223 sx_assert(&ffs_susp_lock, SA_XLOCKED); 224 225 /* 226 * XXX: The status is kept per-process; the vfs_write_resume() routine 227 * asserts that the resuming thread is the same one that called 228 * vfs_write_suspend(). The cdevpriv data, however, is attached 229 * to the file descriptor, e.g. is inherited during fork. Thus, 230 * it's possible that the resuming process will be different from 231 * the one that started the suspension. 232 * 233 * Work around by fooling the check in vfs_write_resume(). 234 */ 235 mp->mnt_susp_owner = curthread; 236 237 vfs_write_resume(mp, 0); 238 ump = VFSTOUFS(mp); 239 UFS_LOCK(ump); 240 ump->um_flags &= ~UM_WRITESUSPENDED; 241 UFS_UNLOCK(ump); 242 vfs_unbusy(mp); 243 } 244 245 static void 246 ffs_susp_dtor(void *data) 247 { 248 struct fs *fs; 249 struct ufsmount *ump; 250 struct mount *mp; 251 int error; 252 253 sx_xlock(&ffs_susp_lock); 254 255 mp = (struct mount *)data; 256 ump = VFSTOUFS(mp); 257 fs = ump->um_fs; 258 259 if (ffs_susp_suspended(mp) == 0) { 260 sx_xunlock(&ffs_susp_lock); 261 return; 262 } 263 264 KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0, 265 ("MNTK_SUSPEND not set")); 266 267 error = ffs_reload(mp, FFSR_FORCE | FFSR_UNSUSPEND); 268 if (error != 0) 269 panic("failed to unsuspend writes on %s", fs->fs_fsmnt); 270 271 ffs_susp_unsuspend(mp); 272 sx_xunlock(&ffs_susp_lock); 273 } 274 275 static int 276 ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 277 struct thread *td) 278 { 279 struct mount *mp; 280 fsid_t *fsidp; 281 int error; 282 283 /* 284 * No suspend inside the jail. Allowing it would require making 285 * sure that e.g. the devfs ruleset for that jail permits access 286 * to the devvp. 287 */ 288 if (jailed(td->td_ucred)) 289 return (EPERM); 290 291 sx_xlock(&ffs_susp_lock); 292 293 switch (cmd) { 294 case UFSSUSPEND: 295 fsidp = (fsid_t *)addr; 296 mp = vfs_getvfs(fsidp); 297 if (mp == NULL) { 298 error = ENOENT; 299 break; 300 } 301 error = vfs_busy(mp, 0); 302 vfs_rel(mp); 303 if (error != 0) 304 break; 305 306 /* 307 * Require single-thread curproc so that the check is not racey. 308 * XXXKIB: might consider to singlethread curproc instead. 309 */ 310 error = curproc->p_numthreads > 1 ? EDEADLK : 311 descrip_check_write_mp(curproc->p_fd, mp); 312 if (error != 0) { 313 vfs_unbusy(mp); 314 break; 315 } 316 317 error = ffs_susp_suspend(mp); 318 if (error != 0) { 319 vfs_unbusy(mp); 320 break; 321 } 322 error = devfs_set_cdevpriv(mp, ffs_susp_dtor); 323 if (error != 0) 324 ffs_susp_unsuspend(mp); 325 break; 326 case UFSRESUME: 327 error = devfs_get_cdevpriv((void **)&mp); 328 if (error != 0) 329 break; 330 /* 331 * This calls ffs_susp_dtor, which in turn unsuspends the fs. 332 * The dtor expects to be called without lock held, because 333 * sometimes it's called from here, and sometimes due to the 334 * file being closed or process exiting. 335 */ 336 sx_xunlock(&ffs_susp_lock); 337 devfs_clear_cdevpriv(); 338 return (0); 339 default: 340 error = ENXIO; 341 break; 342 } 343 344 sx_xunlock(&ffs_susp_lock); 345 346 return (error); 347 } 348 349 void 350 ffs_susp_initialize(void) 351 { 352 353 sx_init(&ffs_susp_lock, "ffs_susp"); 354 ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 355 "ufssuspend"); 356 } 357 358 void 359 ffs_susp_uninitialize(void) 360 { 361 362 destroy_dev(ffs_susp_dev); 363 sx_destroy(&ffs_susp_lock); 364 } 365