1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004 Poul-Henning Kamp
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #define EXTERR_CATEGORY EXTERR_CAT_GEOMVFS
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bio.h>
33 #include <sys/exterrvar.h>
34 #include <sys/kernel.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/sbuf.h>
39 #include <sys/vnode.h>
40 #include <sys/mount.h>
41
42 #include <geom/geom.h>
43 #include <geom/geom_vfs.h>
44
45 /*
46 * subroutines for use by filesystems.
47 *
48 * XXX: should maybe live somewhere else ?
49 */
50 #include <sys/buf.h>
51
52 struct g_vfs_softc {
53 struct mtx sc_mtx;
54 struct bufobj *sc_bo;
55 struct g_event *sc_event;
56 int sc_active;
57 bool sc_orphaned;
58 int sc_enxio_active;
59 int sc_enxio_reported;
60 };
61
62 static struct buf_ops __g_vfs_bufops = {
63 .bop_name = "GEOM_VFS",
64 .bop_write = bufwrite,
65 .bop_strategy = g_vfs_strategy,
66 .bop_sync = bufsync,
67 .bop_bdflush = bufbdflush
68 };
69
70 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
71
72 static g_orphan_t g_vfs_orphan;
73
74 static struct g_class g_vfs_class = {
75 .name = "VFS",
76 .version = G_VERSION,
77 .orphan = g_vfs_orphan,
78 };
79
80 DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
81
82 static void
g_vfs_destroy(void * arg,int flags __unused)83 g_vfs_destroy(void *arg, int flags __unused)
84 {
85 struct g_consumer *cp;
86
87 g_topology_assert();
88 cp = arg;
89 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
90 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
91 g_detach(cp);
92 if (cp->geom->softc == NULL)
93 g_wither_geom(cp->geom, ENXIO);
94 }
95
96 static void
g_vfs_done(struct bio * bip)97 g_vfs_done(struct bio *bip)
98 {
99 struct g_consumer *cp;
100 struct g_event *event;
101 struct g_vfs_softc *sc;
102 struct buf *bp;
103 int destroy;
104 struct mount *mp;
105 struct vnode *vp;
106 struct cdev *cdevp;
107
108 /*
109 * Collect statistics on synchronous and asynchronous read
110 * and write counts for disks that have associated filesystems.
111 */
112 bp = bip->bio_caller2;
113 vp = bp->b_vp;
114 if (vp != NULL) {
115 /*
116 * If not a disk vnode, use its associated mount point
117 * otherwise use the mountpoint associated with the disk.
118 */
119 VI_LOCK(vp);
120 if (vp->v_type != VCHR ||
121 (cdevp = vp->v_rdev) == NULL ||
122 cdevp->si_devsw == NULL ||
123 (cdevp->si_devsw->d_flags & D_DISK) == 0)
124 mp = vp->v_mount;
125 else
126 mp = cdevp->si_mountpt;
127 if (mp != NULL) {
128 if (bp->b_iocmd == BIO_READ) {
129 if (BUF_DISOWNED(bp))
130 mp->mnt_stat.f_asyncreads++;
131 else
132 mp->mnt_stat.f_syncreads++;
133 } else if (bp->b_iocmd == BIO_WRITE) {
134 if (BUF_DISOWNED(bp))
135 mp->mnt_stat.f_asyncwrites++;
136 else
137 mp->mnt_stat.f_syncwrites++;
138 }
139 }
140 VI_UNLOCK(vp);
141 }
142
143 cp = bip->bio_from;
144 sc = cp->geom->softc;
145 if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
146 if ((bp->b_xflags & BX_CVTENXIO) != 0) {
147 if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1))
148 printf("g_vfs_done(): %s converting all errors to ENXIO\n",
149 bip->bio_to->name);
150 }
151 if (sc->sc_enxio_active)
152 bip->bio_error = ENXIO;
153 if (bip->bio_error != ENXIO ||
154 atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) {
155 g_print_bio("g_vfs_done():", bip, "error = %d%s",
156 bip->bio_error,
157 bip->bio_error != ENXIO ? "" :
158 " suppressing further ENXIO");
159 }
160 }
161 bp->b_ioflags = bip->bio_flags;
162 if (bip->bio_error)
163 bp->b_ioflags |= BIO_ERROR;
164 if ((bp->b_ioflags & BIO_EXTERR) != 0)
165 bp->b_exterr = bip->bio_exterr;
166 else
167 bp->b_error = bip->bio_error;
168 bp->b_resid = bp->b_bcount - bip->bio_completed;
169 g_destroy_bio(bip);
170
171 mtx_lock(&sc->sc_mtx);
172 destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
173 if (destroy) {
174 event = sc->sc_event;
175 sc->sc_event = NULL;
176 } else
177 event = NULL;
178 mtx_unlock(&sc->sc_mtx);
179 if (destroy)
180 g_post_event_ep(g_vfs_destroy, cp, event, NULL);
181
182 bufdone(bp);
183 }
184
185 void
g_vfs_strategy(struct bufobj * bo,struct buf * bp)186 g_vfs_strategy(struct bufobj *bo, struct buf *bp)
187 {
188 struct g_vfs_softc *sc;
189 struct g_consumer *cp;
190 struct bio *bip;
191
192 cp = bo->bo_private;
193 sc = cp->geom->softc;
194
195 /*
196 * If the provider has orphaned us, just return ENXIO.
197 */
198 mtx_lock(&sc->sc_mtx);
199 if (sc->sc_orphaned || sc->sc_enxio_active) {
200 mtx_unlock(&sc->sc_mtx);
201 bp->b_error = ENXIO;
202 bp->b_ioflags |= BIO_ERROR;
203 EXTERROR_KE(&bp->b_exterr, ENXIO,
204 "orphaned or enxio active");
205 bufdone(bp);
206 return;
207 }
208 sc->sc_active++;
209 mtx_unlock(&sc->sc_mtx);
210
211 bip = g_alloc_bio();
212 bip->bio_cmd = bp->b_iocmd;
213 bip->bio_offset = bp->b_iooffset;
214 bip->bio_length = bp->b_bcount;
215 bdata2bio(bp, bip);
216 if ((bp->b_flags & B_BARRIER) != 0) {
217 bip->bio_flags |= BIO_ORDERED;
218 bp->b_flags &= ~B_BARRIER;
219 }
220 if (bp->b_iocmd == BIO_SPEEDUP)
221 bip->bio_flags |= bp->b_ioflags;
222 bip->bio_done = g_vfs_done;
223 bip->bio_caller2 = bp;
224 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
225 buf_track(bp, __func__);
226 bip->bio_track_bp = bp;
227 #endif
228 g_io_request(bip, cp);
229 }
230
231 static void
g_vfs_orphan(struct g_consumer * cp)232 g_vfs_orphan(struct g_consumer *cp)
233 {
234 struct g_geom *gp;
235 struct g_event *event;
236 struct g_vfs_softc *sc;
237 int destroy;
238
239 g_topology_assert();
240
241 gp = cp->geom;
242 g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
243 sc = gp->softc;
244 if (sc == NULL)
245 return;
246 event = g_alloc_event(M_WAITOK);
247 mtx_lock(&sc->sc_mtx);
248 KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc));
249 sc->sc_orphaned = true;
250 destroy = (sc->sc_active == 0);
251 if (!destroy) {
252 sc->sc_event = event;
253 event = NULL;
254 }
255 mtx_unlock(&sc->sc_mtx);
256 if (destroy) {
257 g_free(event);
258 g_vfs_destroy(cp, 0);
259 }
260
261 /*
262 * Do not destroy the geom. Filesystem will do that during unmount.
263 */
264 }
265
266 int
g_vfs_open(struct vnode * vp,struct g_consumer ** cpp,const char * fsname,int wr)267 g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
268 {
269 struct g_geom *gp;
270 struct g_provider *pp;
271 struct g_consumer *cp;
272 struct g_vfs_softc *sc;
273 struct bufobj *bo;
274 int error;
275
276 g_topology_assert();
277
278 *cpp = NULL;
279 bo = &vp->v_bufobj;
280 if (bo->bo_private != vp)
281 return (EBUSY);
282
283 pp = g_dev_getprovider(vp->v_rdev);
284 if (pp == NULL)
285 return (ENOENT);
286 gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
287 sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
288 mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
289 sc->sc_bo = bo;
290 gp->softc = sc;
291 cp = g_new_consumer(gp);
292 error = g_attach(cp, pp);
293 if (error) {
294 g_wither_geom(gp, ENXIO);
295 return (error);
296 }
297 error = g_access(cp, 1, wr, wr);
298 if (error) {
299 g_wither_geom(gp, ENXIO);
300 return (error);
301 }
302 /*
303 * Mediasize might not be set until first access (see g_disk_access()),
304 * That's why we check it here and not earlier.
305 */
306 if (pp->mediasize == 0) {
307 (void)g_access(cp, -1, -wr, -wr);
308 g_wither_geom(gp, ENXIO);
309 return (ENXIO);
310 }
311 vnode_create_disk_vobject(vp, pp->mediasize, curthread);
312 *cpp = cp;
313 cp->private = vp;
314 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
315 bo->bo_ops = g_vfs_bufops;
316 bo->bo_private = cp;
317 bo->bo_bsize = pp->sectorsize;
318
319 return (error);
320 }
321
322 void
g_vfs_close(struct g_consumer * cp)323 g_vfs_close(struct g_consumer *cp)
324 {
325 struct g_geom *gp;
326 struct g_vfs_softc *sc;
327
328 g_topology_assert();
329
330 gp = cp->geom;
331 sc = gp->softc;
332 bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
333 sc->sc_bo->bo_private = cp->private;
334 gp->softc = NULL;
335 mtx_destroy(&sc->sc_mtx);
336 if (!sc->sc_orphaned || cp->provider == NULL)
337 g_wither_geom_close(gp, ENXIO);
338 KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc));
339 g_free(sc);
340 }
341