1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/systm.h> 35 #include <sys/buf.h> 36 #include <sys/conf.h> 37 #include <sys/filedesc.h> 38 #include <sys/ioccom.h> 39 #include <sys/jail.h> 40 #include <sys/mount.h> 41 #include <sys/sx.h> 42 #include <sys/vnode.h> 43 44 #include <security/mac/mac_framework.h> 45 46 #include <ufs/ufs/extattr.h> 47 #include <ufs/ufs/quota.h> 48 #include <ufs/ufs/ufsmount.h> 49 #include <ufs/ufs/inode.h> 50 51 #include <ufs/ffs/fs.h> 52 #include <ufs/ffs/ffs_extern.h> 53 54 static d_open_t ffs_susp_open; 55 static d_write_t ffs_susp_rdwr; 56 static d_ioctl_t ffs_susp_ioctl; 57 58 static struct cdevsw ffs_susp_cdevsw = { 59 .d_version = D_VERSION, 60 .d_open = ffs_susp_open, 61 .d_read = ffs_susp_rdwr, 62 .d_write = ffs_susp_rdwr, 63 .d_ioctl = ffs_susp_ioctl, 64 .d_name = "ffs_susp", 65 }; 66 67 static struct cdev *ffs_susp_dev; 68 static struct sx ffs_susp_lock; 69 70 static int 71 ffs_susp_suspended(struct mount *mp) 72 { 73 struct ufsmount *ump; 74 75 sx_assert(&ffs_susp_lock, SA_LOCKED); 76 77 ump = VFSTOUFS(mp); 78 if ((ump->um_flags & UM_WRITESUSPENDED) != 0) 79 return (1); 80 return (0); 81 } 82 83 static int 84 ffs_susp_open(struct cdev *dev __unused, int flags __unused, 85 int fmt __unused, struct thread *td __unused) 86 { 87 88 return (0); 89 } 90 91 static int 92 ffs_susp_rdwr(struct cdev *dev, struct uio *uio, int ioflag) 93 { 94 int error, i; 95 struct vnode *devvp; 96 struct mount *mp; 97 struct ufsmount *ump; 98 struct buf *bp; 99 void *base; 100 size_t len; 101 ssize_t cnt; 102 struct fs *fs; 103 104 sx_slock(&ffs_susp_lock); 105 106 error = devfs_get_cdevpriv((void **)&mp); 107 if (error != 0) { 108 sx_sunlock(&ffs_susp_lock); 109 return (ENXIO); 110 } 111 112 ump = VFSTOUFS(mp); 113 devvp = ump->um_devvp; 114 fs = ump->um_fs; 115 116 if (ffs_susp_suspended(mp) == 0) { 117 sx_sunlock(&ffs_susp_lock); 118 return (ENXIO); 119 } 120 121 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, 122 ("neither UIO_READ or UIO_WRITE")); 123 KASSERT(uio->uio_segflg == UIO_USERSPACE, 124 ("uio->uio_segflg != UIO_USERSPACE")); 125 126 cnt = uio->uio_resid; 127 128 for (i = 0; i < uio->uio_iovcnt; i++) { 129 while (uio->uio_iov[i].iov_len) { 130 base = uio->uio_iov[i].iov_base; 131 len = uio->uio_iov[i].iov_len; 132 if (len > fs->fs_bsize) 133 len = fs->fs_bsize; 134 if (fragoff(fs, uio->uio_offset) != 0 || 135 fragoff(fs, len) != 0) { 136 error = EINVAL; 137 goto out; 138 } 139 error = bread(devvp, btodb(uio->uio_offset), len, 140 NOCRED, &bp); 141 if (error != 0) 142 goto out; 143 if (uio->uio_rw == UIO_WRITE) { 144 error = copyin(base, bp->b_data, len); 145 if (error != 0) { 146 bp->b_flags |= B_INVAL | B_NOCACHE; 147 brelse(bp); 148 goto out; 149 } 150 error = bwrite(bp); 151 if (error != 0) 152 goto out; 153 } else { 154 error = copyout(bp->b_data, base, len); 155 brelse(bp); 156 if (error != 0) 157 goto out; 158 } 159 uio->uio_iov[i].iov_base = 160 (char *)uio->uio_iov[i].iov_base + len; 161 uio->uio_iov[i].iov_len -= len; 162 uio->uio_resid -= len; 163 uio->uio_offset += len; 164 } 165 } 166 167 out: 168 sx_sunlock(&ffs_susp_lock); 169 170 if (uio->uio_resid < cnt) 171 return (0); 172 173 return (error); 174 } 175 176 static int 177 ffs_susp_suspend(struct mount *mp) 178 { 179 struct ufsmount *ump; 180 int error; 181 182 sx_assert(&ffs_susp_lock, SA_XLOCKED); 183 184 if (!ffs_own_mount(mp)) 185 return (EINVAL); 186 if (ffs_susp_suspended(mp)) 187 return (EBUSY); 188 189 ump = VFSTOUFS(mp); 190 191 /* 192 * Make sure the calling thread is permitted to access the mounted 193 * device. The permissions can change after we unlock the vnode; 194 * it's harmless. 195 */ 196 vn_lock(ump->um_odevvp, LK_EXCLUSIVE | LK_RETRY); 197 error = VOP_ACCESS(ump->um_odevvp, VREAD | VWRITE, 198 curthread->td_ucred, curthread); 199 VOP_UNLOCK(ump->um_odevvp); 200 if (error != 0) 201 return (error); 202 #ifdef MAC 203 if (mac_mount_check_stat(curthread->td_ucred, mp) != 0) 204 return (EPERM); 205 #endif 206 207 if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0) 208 return (error); 209 210 UFS_LOCK(ump); 211 ump->um_flags |= UM_WRITESUSPENDED; 212 UFS_UNLOCK(ump); 213 214 return (0); 215 } 216 217 static void 218 ffs_susp_unsuspend(struct mount *mp) 219 { 220 struct ufsmount *ump; 221 222 sx_assert(&ffs_susp_lock, SA_XLOCKED); 223 224 /* 225 * XXX: The status is kept per-process; the vfs_write_resume() routine 226 * asserts that the resuming thread is the same one that called 227 * vfs_write_suspend(). The cdevpriv data, however, is attached 228 * to the file descriptor, e.g. is inherited during fork. Thus, 229 * it's possible that the resuming process will be different from 230 * the one that started the suspension. 231 * 232 * Work around by fooling the check in vfs_write_resume(). 233 */ 234 mp->mnt_susp_owner = curthread; 235 236 vfs_write_resume(mp, 0); 237 ump = VFSTOUFS(mp); 238 UFS_LOCK(ump); 239 ump->um_flags &= ~UM_WRITESUSPENDED; 240 UFS_UNLOCK(ump); 241 vfs_unbusy(mp); 242 } 243 244 static void 245 ffs_susp_dtor(void *data) 246 { 247 struct fs *fs; 248 struct ufsmount *ump; 249 struct mount *mp; 250 int error; 251 252 sx_xlock(&ffs_susp_lock); 253 254 mp = (struct mount *)data; 255 ump = VFSTOUFS(mp); 256 fs = ump->um_fs; 257 258 if (ffs_susp_suspended(mp) == 0) { 259 sx_xunlock(&ffs_susp_lock); 260 return; 261 } 262 263 KASSERT((mp->mnt_kern_flag & MNTK_SUSPEND) != 0, 264 ("MNTK_SUSPEND not set")); 265 266 error = ffs_reload(mp, FFSR_FORCE | FFSR_UNSUSPEND); 267 if (error != 0) 268 panic("failed to unsuspend writes on %s", fs->fs_fsmnt); 269 270 ffs_susp_unsuspend(mp); 271 sx_xunlock(&ffs_susp_lock); 272 } 273 274 static int 275 ffs_susp_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 276 struct thread *td) 277 { 278 struct mount *mp; 279 fsid_t *fsidp; 280 int error; 281 282 /* 283 * No suspend inside the jail. Allowing it would require making 284 * sure that e.g. the devfs ruleset for that jail permits access 285 * to the devvp. 286 */ 287 if (jailed(td->td_ucred)) 288 return (EPERM); 289 290 sx_xlock(&ffs_susp_lock); 291 292 switch (cmd) { 293 case UFSSUSPEND: 294 fsidp = (fsid_t *)addr; 295 mp = vfs_getvfs(fsidp); 296 if (mp == NULL) { 297 error = ENOENT; 298 break; 299 } 300 error = vfs_busy(mp, 0); 301 vfs_rel(mp); 302 if (error != 0) 303 break; 304 305 /* 306 * Require single-thread curproc so that the check is not racey. 307 * XXXKIB: might consider to singlethread curproc instead. 308 */ 309 error = curproc->p_numthreads > 1 ? EDEADLK : 310 descrip_check_write_mp(curproc->p_fd, mp); 311 if (error != 0) { 312 vfs_unbusy(mp); 313 break; 314 } 315 316 error = ffs_susp_suspend(mp); 317 if (error != 0) { 318 vfs_unbusy(mp); 319 break; 320 } 321 error = devfs_set_cdevpriv(mp, ffs_susp_dtor); 322 if (error != 0) 323 ffs_susp_unsuspend(mp); 324 break; 325 case UFSRESUME: 326 error = devfs_get_cdevpriv((void **)&mp); 327 if (error != 0) 328 break; 329 /* 330 * This calls ffs_susp_dtor, which in turn unsuspends the fs. 331 * The dtor expects to be called without lock held, because 332 * sometimes it's called from here, and sometimes due to the 333 * file being closed or process exiting. 334 */ 335 sx_xunlock(&ffs_susp_lock); 336 devfs_clear_cdevpriv(); 337 return (0); 338 default: 339 error = ENXIO; 340 break; 341 } 342 343 sx_xunlock(&ffs_susp_lock); 344 345 return (error); 346 } 347 348 void 349 ffs_susp_initialize(void) 350 { 351 352 sx_init(&ffs_susp_lock, "ffs_susp"); 353 ffs_susp_dev = make_dev(&ffs_susp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 354 "ufssuspend"); 355 } 356 357 void 358 ffs_susp_uninitialize(void) 359 { 360 361 destroy_dev(ffs_susp_dev); 362 sx_destroy(&ffs_susp_lock); 363 } 364