1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/filedesc.h> 36 #include <sys/ioccom.h> 37 #include <sys/jail.h> 38 #include <sys/mount.h> 39 #include <sys/sx.h> 40 #include <sys/vnode.h> 41 42 #include <security/mac/mac_framework.h> 43 44 #include <ufs/ufs/extattr.h> 45 #include <ufs/ufs/quota.h> 46 #include <ufs/ufs/ufsmount.h> 47 #include <ufs/ufs/inode.h> 48 49 #include <ufs/ffs/fs.h> 50 #include <ufs/ffs/ffs_extern.h> 51 52 static d_open_t ffs_susp_open; 53 static d_write_t ffs_susp_rdwr; 54 static d_ioctl_t ffs_susp_ioctl; 55 56 static struct cdevsw ffs_susp_cdevsw = { 57 .d_version = D_VERSION, 58 .d_open = ffs_susp_open, 59 .d_read = ffs_susp_rdwr, 60 .d_write = ffs_susp_rdwr, 61 .d_ioctl = ffs_susp_ioctl, 62 .d_name = "ffs_susp", 63 }; 64 65 static struct cdev *ffs_susp_dev; 66 static struct sx ffs_susp_lock; 67 68 static int 69 ffs_susp_suspended(struct mount *mp) 70 { 71 struct ufsmount *ump; 72 73 sx_assert(&ffs_susp_lock, SA_LOCKED); 74 75 ump = VFSTOUFS(mp); 76 if ((ump->um_flags & UM_WRITESUSPENDED) != 0) 77 return (1); 78 return (0); 79 } 80 81 static int 82 ffs_susp_open(struct cdev *dev __unused, int flags __unused, 83 int fmt __unused, struct thread *td __unused) 84 { 85 86 return (0); 87 } 88 89 static int 90 ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag) 91 { 92 int error, i; 93 struct vnode *devvp; 94 struct mount *mp; 95 struct ufsmount *ump; 96 struct buf *bp; 97 void *base; 98 size_t len; 99 ssize_t cnt; 100 struct fs *fs; 101 102 sx_slock(&ffs_susp_lock); 103 104 error = devfs_get_cdevpriv((void **)&mp); 105 if (error != 0) { 106 sx_sunlock(&ffs_susp_lock); 107 return (ENXIO); 108 } 109 110 ump = VFSTOUFS(mp); 111 devvp = ump->um_devvp; 112 fs = ump->um_fs; 113 114 if (ffs_susp_suspended(mp) == 0) { 115 sx_sunlock(&ffs_susp_lock); 116 return (ENXIO); 117 } 118 119 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 120 ("neither UIO_READ or UIO_WRITE")); 121 KASSERT(uio->uio_segflg == UIO_USERSPACE, 122 ("uio->uio_segflg != UIO_USERSPACE")); 123 124 cnt = uio->uio_resid; 125 126 for (i = 0; i < uio->uio_iovcnt; i++) { 127 while (uio->uio_iov[i].iov_len) { 128 base = uio->uio_iov[i].iov_base; 129 len = uio->uio_iov[i].iov_len; 130 if (len > fs->fs_bsize) 131 len = fs->fs_bsize; 132 if (fragoff(fs, uio->uio_offset) != 0 || 133 fragoff(fs, len) != 0) { 134 error = EINVAL; 135 goto out; 136 } 137 error = bread(devvp, btodb(uio->uio_offset), len, 138 NOCRED, &bp); 139 if (error != 0) 140 goto out; 141 if (uio->uio_rw == UIO_WRITE) { 142 error = copyin(base, bp->b_data, len); 143 if (error != 0) { 144 bp->b_flags |= B_INVAL | B_NOCACHE; 145 brelse(bp); 146 goto out; 147 } 148 error = bwrite(bp); 149 if (error != 0) 150 goto out; 151 } else { 152 error = copyout(bp->b_data, base, len); 153 brelse(bp); 154 if (error != 0) 155 goto out; 156 } 157 uio->uio_iov[i].iov_base = 158 (char *)uio->uio_iov[i].iov_base + len; 159 uio->uio_iov[i].iov_len -= len; 160 uio->uio_resid -= len; 161 uio->uio_offset += len; 162 } 163 } 164 165 out: 166 sx_sunlock(&ffs_susp_lock); 167 168 if (uio->uio_resid < cnt) 169 return (0); 170 171 return (error); 172 } 173 174 static int 175 ffs_susp_suspend(struct mount *mp) 176 { 177 struct ufsmount *ump; 178 int error; 179 180 sx_assert(&ffs_susp_lock, SA_XLOCKED); 181 182 if (!ffs_own_mount(mp)) 183 return (EINVAL); 184 if (ffs_susp_suspended(mp)) 185 return (EBUSY); 186 187 ump = VFSTOUFS(mp); 188 189 /* 190 * Make sure the calling thread is permitted to access the mounted 191 * device. The permissions can change after we unlock the vnode; 192 * it's harmless. 193 */ 194 vn_lock(ump->um_odevvp, LK_EXCLUSIVE | LK_RETRY); 195 error = VOP_ACCESS(ump->um_odevvp, VREAD | VWRITE, 196 curthread->td_ucred, curthread); 197 VOP_UNLOCK(ump->um_odevvp); 198 if (error != 0) 199 return (error); 200 #ifdef MAC 201 if (mac_mount_check_stat(curthread->td_ucred, mp) != 0) 202 return (EPERM); 203 #endif 204 205 if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0) 206 return (error); 207 208 UFS_LOCK(ump); 209 ump->um_flags |= UM_WRITESUSPENDED; 210 UFS_UNLOCK(ump); 211 212 return (0); 213 } 214 215 static void 216 ffs_susp_unsuspend(struct mount *mp) 217 { 218 struct ufsmount *ump; 219 220 sx_assert(&ffs_susp_lock, SA_XLOCKED); 221 222 /* 223 * XXX: The status is kept per-process; the vfs_write_resume() routine 224 * asserts that the resuming thread is the same one that called 225 * vfs_write_suspend(). The cdevpriv data, however, is attached 226 * to the file descriptor, e.g. is inherited during fork. Thus, 227 * it's possible that the resuming process will be different from 228 * the one that started the suspension. 229 * 230 * Work around by fooling the check in vfs_write_resume(). 231 */ 232 mp->mnt_susp_owner = curthread; 233 234 vfs_write_resume(mp, 0); 235 ump = VFSTOUFS(mp); 236 UFS_LOCK(ump); 237 ump->um_flags &= ~UM_WRITESUSPENDED; 238 UFS_UNLOCK(ump); 239 vfs_unbusy(mp); 240 } 241 242 static void 243 ffs_susp_dtor(void *data) 244 { 245 struct fs *fs; 246 struct ufsmount *ump; 247 struct mount *mp; 248 int error; 249 250 sx_xlock(&ffs_susp_lock); 251 252 mp = (struct mount *)data; 253 ump = VFSTOUFS(mp); 254 fs = ump->um_fs; 255 256 if (ffs_susp_suspended(mp) == 0) { 257 sx_xunlock(&ffs_susp_lock); 258 return; 259 } 260 261 KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0, 262 ("MNTK_SUSPEND not set")); 263 264 error = ffs_reload(mp, FFSR_FORCE | FFSR_UNSUSPEND); 265 if (error != 0) 266 panic("failed to unsuspend writes on %s", fs->fs_fsmnt); 267 268 ffs_susp_unsuspend(mp); 269 sx_xunlock(&ffs_susp_lock); 270 } 271 272 static int 273 ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 274 struct thread *td) 275 { 276 struct mount *mp; 277 fsid_t *fsidp; 278 int error; 279 280 /* 281 * No suspend inside the jail. Allowing it would require making 282 * sure that e.g. the devfs ruleset for that jail permits access 283 * to the devvp. 284 */ 285 if (jailed(td->td_ucred)) 286 return (EPERM); 287 288 sx_xlock(&ffs_susp_lock); 289 290 switch (cmd) { 291 case UFSSUSPEND: 292 fsidp = (fsid_t *)addr; 293 mp = vfs_getvfs(fsidp); 294 if (mp == NULL) { 295 error = ENOENT; 296 break; 297 } 298 error = vfs_busy(mp, 0); 299 vfs_rel(mp); 300 if (error != 0) 301 break; 302 303 /* 304 * Require single-thread curproc so that the check is not racey. 305 * XXXKIB: might consider to singlethread curproc instead. 306 */ 307 error = curproc->p_numthreads > 1 ? EDEADLK : 308 descrip_check_write_mp(curproc->p_fd, mp); 309 if (error != 0) { 310 vfs_unbusy(mp); 311 break; 312 } 313 314 error = ffs_susp_suspend(mp); 315 if (error != 0) { 316 vfs_unbusy(mp); 317 break; 318 } 319 error = devfs_set_cdevpriv(mp, ffs_susp_dtor); 320 if (error != 0) 321 ffs_susp_unsuspend(mp); 322 break; 323 case UFSRESUME: 324 error = devfs_get_cdevpriv((void **)&mp); 325 if (error != 0) 326 break; 327 /* 328 * This calls ffs_susp_dtor, which in turn unsuspends the fs. 329 * The dtor expects to be called without lock held, because 330 * sometimes it's called from here, and sometimes due to the 331 * file being closed or process exiting. 332 */ 333 sx_xunlock(&ffs_susp_lock); 334 devfs_clear_cdevpriv(); 335 return (0); 336 default: 337 error = ENXIO; 338 break; 339 } 340 341 sx_xunlock(&ffs_susp_lock); 342 343 return (error); 344 } 345 346 void 347 ffs_susp_initialize(void) 348 { 349 350 sx_init(&ffs_susp_lock, "ffs_susp"); 351 ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 352 "ufssuspend"); 353 } 354 355 void 356 ffs_susp_uninitialize(void) 357 { 358 359 destroy_dev(ffs_susp_dev); 360 sx_destroy(&ffs_susp_lock); 361 } 362