1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004 Poul-Henning Kamp 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #define EXTERR_CATEGORY EXTERR_CAT_GEOMVFS 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/bio.h> 33 #include <sys/exterrvar.h> 34 #include <sys/kernel.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/sbuf.h> 39 #include <sys/vnode.h> 40 #include <sys/mount.h> 41 42 #include <geom/geom.h> 43 #include <geom/geom_vfs.h> 44 45 /* 46 * subroutines for use by filesystems. 47 * 48 * XXX: should maybe live somewhere else ? 49 */ 50 #include <sys/buf.h> 51 52 struct g_vfs_softc { 53 struct mtx sc_mtx; 54 struct bufobj *sc_bo; 55 struct g_event *sc_event; 56 int sc_active; 57 bool sc_orphaned; 58 int sc_enxio_active; 59 int sc_enxio_reported; 60 }; 61 62 static struct buf_ops __g_vfs_bufops = { 63 .bop_name = "GEOM_VFS", 64 .bop_write = bufwrite, 65 .bop_strategy = g_vfs_strategy, 66 .bop_sync = bufsync, 67 .bop_bdflush = bufbdflush 68 }; 69 70 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops; 71 72 static g_orphan_t g_vfs_orphan; 73 74 static struct g_class g_vfs_class = { 75 .name = "VFS", 76 .version = G_VERSION, 77 .orphan = g_vfs_orphan, 78 }; 79 80 DECLARE_GEOM_CLASS(g_vfs_class, g_vfs); 81 82 static void 83 g_vfs_destroy(void *arg, int flags __unused) 84 { 85 struct g_consumer *cp; 86 87 g_topology_assert(); 88 cp = arg; 89 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 90 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 91 g_detach(cp); 92 if (cp->geom->softc == NULL) 93 g_wither_geom(cp->geom, ENXIO); 94 } 95 96 static void 97 g_vfs_done(struct bio *bip) 98 { 99 struct g_consumer *cp; 100 struct g_event *event; 101 struct g_vfs_softc *sc; 102 struct buf *bp; 103 int destroy; 104 struct mount *mp; 105 struct vnode *vp; 106 struct cdev *cdevp; 107 108 /* 109 * Collect statistics on synchronous and asynchronous read 110 * and write counts for disks that have associated filesystems. 111 */ 112 bp = bip->bio_caller2; 113 vp = bp->b_vp; 114 if (vp != NULL) { 115 /* 116 * If not a disk vnode, use its associated mount point 117 * otherwise use the mountpoint associated with the disk. 118 */ 119 VI_LOCK(vp); 120 if (vp->v_type != VCHR || 121 (cdevp = vp->v_rdev) == NULL || 122 cdevp->si_devsw == NULL || 123 (cdevp->si_devsw->d_flags & D_DISK) == 0) 124 mp = vp->v_mount; 125 else 126 mp = cdevp->si_mountpt; 127 if (mp != NULL) { 128 if (bp->b_iocmd == BIO_READ) { 129 if (BUF_DISOWNED(bp)) 130 mp->mnt_stat.f_asyncreads++; 131 else 132 mp->mnt_stat.f_syncreads++; 133 } else if (bp->b_iocmd == BIO_WRITE) { 134 if (BUF_DISOWNED(bp)) 135 mp->mnt_stat.f_asyncwrites++; 136 else 137 mp->mnt_stat.f_syncwrites++; 138 } 139 } 140 VI_UNLOCK(vp); 141 } 142 143 cp = bip->bio_from; 144 sc = cp->geom->softc; 145 if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) { 146 if ((bp->b_xflags & BX_CVTENXIO) != 0) { 147 if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1)) 148 printf("g_vfs_done(): %s converting all errors to ENXIO\n", 149 bip->bio_to->name); 150 } 151 if (sc->sc_enxio_active) 152 bip->bio_error = ENXIO; 153 if (bip->bio_error != ENXIO || 154 atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) { 155 g_print_bio("g_vfs_done():", bip, "error = %d%s", 156 bip->bio_error, 157 bip->bio_error != ENXIO ? "" : 158 " suppressing further ENXIO"); 159 } 160 } 161 bp->b_ioflags = bip->bio_flags; 162 if (bip->bio_error) 163 bp->b_ioflags |= BIO_ERROR; 164 if ((bp->b_ioflags & BIO_EXTERR) != 0) 165 bp->b_exterr = bip->bio_exterr; 166 else 167 bp->b_error = bip->bio_error; 168 bp->b_resid = bp->b_bcount - bip->bio_completed; 169 g_destroy_bio(bip); 170 171 mtx_lock(&sc->sc_mtx); 172 destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned); 173 if (destroy) { 174 event = sc->sc_event; 175 sc->sc_event = NULL; 176 } else 177 event = NULL; 178 mtx_unlock(&sc->sc_mtx); 179 if (destroy) 180 g_post_event_ep(g_vfs_destroy, cp, event, NULL); 181 182 bufdone(bp); 183 } 184 185 void 186 g_vfs_strategy(struct bufobj *bo, struct buf *bp) 187 { 188 struct g_vfs_softc *sc; 189 struct g_consumer *cp; 190 struct bio *bip; 191 192 cp = bo->bo_private; 193 sc = cp->geom->softc; 194 195 /* 196 * If the provider has orphaned us, just return ENXIO. 197 */ 198 mtx_lock(&sc->sc_mtx); 199 if (sc->sc_orphaned || sc->sc_enxio_active) { 200 mtx_unlock(&sc->sc_mtx); 201 bp->b_error = ENXIO; 202 bp->b_ioflags |= BIO_ERROR; 203 EXTERROR_KE(&bp->b_exterr, ENXIO, 204 "orphaned or enxio active"); 205 bufdone(bp); 206 return; 207 } 208 sc->sc_active++; 209 mtx_unlock(&sc->sc_mtx); 210 211 bip = g_alloc_bio(); 212 bip->bio_cmd = bp->b_iocmd; 213 bip->bio_offset = bp->b_iooffset; 214 bip->bio_length = bp->b_bcount; 215 bdata2bio(bp, bip); 216 if ((bp->b_flags & B_BARRIER) != 0) { 217 bip->bio_flags |= BIO_ORDERED; 218 bp->b_flags &= ~B_BARRIER; 219 } 220 if (bp->b_iocmd == BIO_SPEEDUP) 221 bip->bio_flags |= bp->b_ioflags; 222 bip->bio_done = g_vfs_done; 223 bip->bio_caller2 = bp; 224 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) 225 buf_track(bp, __func__); 226 bip->bio_track_bp = bp; 227 #endif 228 g_io_request(bip, cp); 229 } 230 231 static void 232 g_vfs_orphan(struct g_consumer *cp) 233 { 234 struct g_geom *gp; 235 struct g_event *event; 236 struct g_vfs_softc *sc; 237 int destroy; 238 239 g_topology_assert(); 240 241 gp = cp->geom; 242 g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name); 243 sc = gp->softc; 244 if (sc == NULL) 245 return; 246 event = g_alloc_event(M_WAITOK); 247 mtx_lock(&sc->sc_mtx); 248 KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc)); 249 sc->sc_orphaned = true; 250 destroy = (sc->sc_active == 0); 251 if (!destroy) { 252 sc->sc_event = event; 253 event = NULL; 254 } 255 mtx_unlock(&sc->sc_mtx); 256 if (destroy) { 257 g_free(event); 258 g_vfs_destroy(cp, 0); 259 } 260 261 /* 262 * Do not destroy the geom. Filesystem will do that during unmount. 263 */ 264 } 265 266 int 267 g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr) 268 { 269 struct g_geom *gp; 270 struct g_provider *pp; 271 struct g_consumer *cp; 272 struct g_vfs_softc *sc; 273 struct bufobj *bo; 274 int error; 275 276 g_topology_assert(); 277 278 *cpp = NULL; 279 bo = &vp->v_bufobj; 280 if (bo->bo_private != vp) 281 return (EBUSY); 282 283 pp = g_dev_getprovider(vp->v_rdev); 284 if (pp == NULL) 285 return (ENOENT); 286 gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name); 287 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); 288 mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF); 289 sc->sc_bo = bo; 290 gp->softc = sc; 291 cp = g_new_consumer(gp); 292 error = g_attach(cp, pp); 293 if (error) { 294 g_wither_geom(gp, ENXIO); 295 return (error); 296 } 297 error = g_access(cp, 1, wr, wr); 298 if (error) { 299 g_wither_geom(gp, ENXIO); 300 return (error); 301 } 302 /* 303 * Mediasize might not be set until first access (see g_disk_access()), 304 * That's why we check it here and not earlier. 305 */ 306 if (pp->mediasize == 0) { 307 (void)g_access(cp, -1, -wr, -wr); 308 g_wither_geom(gp, ENXIO); 309 return (ENXIO); 310 } 311 vnode_create_disk_vobject(vp, pp->mediasize, curthread); 312 *cpp = cp; 313 cp->private = vp; 314 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 315 bo->bo_ops = g_vfs_bufops; 316 bo->bo_private = cp; 317 bo->bo_bsize = pp->sectorsize; 318 319 return (error); 320 } 321 322 void 323 g_vfs_close(struct g_consumer *cp) 324 { 325 struct g_geom *gp; 326 struct g_vfs_softc *sc; 327 328 g_topology_assert(); 329 330 gp = cp->geom; 331 sc = gp->softc; 332 bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0); 333 sc->sc_bo->bo_private = cp->private; 334 gp->softc = NULL; 335 mtx_destroy(&sc->sc_mtx); 336 if (!sc->sc_orphaned || cp->provider == NULL) 337 g_wither_geom_close(gp, ENXIO); 338 KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc)); 339 g_free(sc); 340 } 341