xref: /freebsd/sys/geom/geom_vfs.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Poul-Henning Kamp
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/mutex.h>
36 #include <sys/sbuf.h>
37 #include <sys/vnode.h>
38 #include <sys/mount.h>
39 
40 #include <geom/geom.h>
41 #include <geom/geom_vfs.h>
42 
43 /*
44  * subroutines for use by filesystems.
45  *
46  * XXX: should maybe live somewhere else ?
47  */
48 #include <sys/buf.h>
49 
50 struct g_vfs_softc {
51 	struct mtx	 sc_mtx;
52 	struct bufobj	*sc_bo;
53 	struct g_event	*sc_event;
54 	int		 sc_active;
55 	bool		 sc_orphaned;
56 	int		 sc_enxio_active;
57 	int		 sc_enxio_reported;
58 };
59 
60 static struct buf_ops __g_vfs_bufops = {
61 	.bop_name =	"GEOM_VFS",
62 	.bop_write =	bufwrite,
63 	.bop_strategy =	g_vfs_strategy,
64 	.bop_sync =	bufsync,
65 	.bop_bdflush =	bufbdflush
66 };
67 
68 struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
69 
70 static g_orphan_t g_vfs_orphan;
71 
72 static struct g_class g_vfs_class = {
73 	.name =		"VFS",
74 	.version =	G_VERSION,
75 	.orphan =	g_vfs_orphan,
76 };
77 
78 DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);
79 
80 static void
81 g_vfs_destroy(void *arg, int flags __unused)
82 {
83 	struct g_consumer *cp;
84 
85 	g_topology_assert();
86 	cp = arg;
87 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
88 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
89 	g_detach(cp);
90 	if (cp->geom->softc == NULL)
91 		g_wither_geom(cp->geom, ENXIO);
92 }
93 
94 static void
95 g_vfs_done(struct bio *bip)
96 {
97 	struct g_consumer *cp;
98 	struct g_event *event;
99 	struct g_vfs_softc *sc;
100 	struct buf *bp;
101 	int destroy;
102 	struct mount *mp;
103 	struct vnode *vp;
104 	struct cdev *cdevp;
105 
106 	/*
107 	 * Collect statistics on synchronous and asynchronous read
108 	 * and write counts for disks that have associated filesystems.
109 	 */
110 	bp = bip->bio_caller2;
111 	vp = bp->b_vp;
112 	if (vp != NULL) {
113 		/*
114 		 * If not a disk vnode, use its associated mount point
115 		 * otherwise use the mountpoint associated with the disk.
116 		 */
117 		VI_LOCK(vp);
118 		if (vp->v_type != VCHR ||
119 		    (cdevp = vp->v_rdev) == NULL ||
120 		    cdevp->si_devsw == NULL ||
121 		    (cdevp->si_devsw->d_flags & D_DISK) == 0)
122 			mp = vp->v_mount;
123 		else
124 			mp = cdevp->si_mountpt;
125 		if (mp != NULL) {
126 			if (bp->b_iocmd == BIO_READ) {
127 				if (BUF_DISOWNED(bp))
128 					mp->mnt_stat.f_asyncreads++;
129 				else
130 					mp->mnt_stat.f_syncreads++;
131 			} else if (bp->b_iocmd == BIO_WRITE) {
132 				if (BUF_DISOWNED(bp))
133 					mp->mnt_stat.f_asyncwrites++;
134 				else
135 					mp->mnt_stat.f_syncwrites++;
136 			}
137 		}
138 		VI_UNLOCK(vp);
139 	}
140 
141 	cp = bip->bio_from;
142 	sc = cp->geom->softc;
143 	if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
144 		if ((bp->b_xflags & BX_CVTENXIO) != 0) {
145 			if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1))
146 				printf("g_vfs_done(): %s converting all errors to ENXIO\n",
147 				    bip->bio_to->name);
148 		}
149 		if (sc->sc_enxio_active)
150 			bip->bio_error = ENXIO;
151 		if (bip->bio_error != ENXIO ||
152 		    atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) {
153 			g_print_bio("g_vfs_done():", bip, "error = %d%s",
154 			    bip->bio_error,
155 			    bip->bio_error != ENXIO ? "" :
156 			    " supressing further ENXIO");
157 		}
158 	}
159 	bp->b_error = bip->bio_error;
160 	bp->b_ioflags = bip->bio_flags;
161 	if (bip->bio_error)
162 		bp->b_ioflags |= BIO_ERROR;
163 	bp->b_resid = bp->b_bcount - bip->bio_completed;
164 	g_destroy_bio(bip);
165 
166 	mtx_lock(&sc->sc_mtx);
167 	destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
168 	if (destroy) {
169 		event = sc->sc_event;
170 		sc->sc_event = NULL;
171 	} else
172 		event = NULL;
173 	mtx_unlock(&sc->sc_mtx);
174 	if (destroy)
175 		g_post_event_ep(g_vfs_destroy, cp, event, NULL);
176 
177 	bufdone(bp);
178 }
179 
180 void
181 g_vfs_strategy(struct bufobj *bo, struct buf *bp)
182 {
183 	struct g_vfs_softc *sc;
184 	struct g_consumer *cp;
185 	struct bio *bip;
186 
187 	cp = bo->bo_private;
188 	sc = cp->geom->softc;
189 
190 	/*
191 	 * If the provider has orphaned us, just return ENXIO.
192 	 */
193 	mtx_lock(&sc->sc_mtx);
194 	if (sc->sc_orphaned || sc->sc_enxio_active) {
195 		mtx_unlock(&sc->sc_mtx);
196 		bp->b_error = ENXIO;
197 		bp->b_ioflags |= BIO_ERROR;
198 		bufdone(bp);
199 		return;
200 	}
201 	sc->sc_active++;
202 	mtx_unlock(&sc->sc_mtx);
203 
204 	bip = g_alloc_bio();
205 	bip->bio_cmd = bp->b_iocmd;
206 	bip->bio_offset = bp->b_iooffset;
207 	bip->bio_length = bp->b_bcount;
208 	bdata2bio(bp, bip);
209 	if ((bp->b_flags & B_BARRIER) != 0) {
210 		bip->bio_flags |= BIO_ORDERED;
211 		bp->b_flags &= ~B_BARRIER;
212 	}
213 	if (bp->b_iocmd == BIO_SPEEDUP)
214 		bip->bio_flags |= bp->b_ioflags;
215 	bip->bio_done = g_vfs_done;
216 	bip->bio_caller2 = bp;
217 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
218 	buf_track(bp, __func__);
219 	bip->bio_track_bp = bp;
220 #endif
221 	g_io_request(bip, cp);
222 }
223 
224 static void
225 g_vfs_orphan(struct g_consumer *cp)
226 {
227 	struct g_geom *gp;
228 	struct g_event *event;
229 	struct g_vfs_softc *sc;
230 	int destroy;
231 
232 	g_topology_assert();
233 
234 	gp = cp->geom;
235 	g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
236 	sc = gp->softc;
237 	if (sc == NULL)
238 		return;
239 	event = g_alloc_event(M_WAITOK);
240 	mtx_lock(&sc->sc_mtx);
241 	KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc));
242 	sc->sc_orphaned = true;
243 	destroy = (sc->sc_active == 0);
244 	if (!destroy) {
245 		sc->sc_event = event;
246 		event = NULL;
247 	}
248 	mtx_unlock(&sc->sc_mtx);
249 	if (destroy) {
250 		g_free(event);
251 		g_vfs_destroy(cp, 0);
252 	}
253 
254 	/*
255 	 * Do not destroy the geom.  Filesystem will do that during unmount.
256 	 */
257 }
258 
259 int
260 g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
261 {
262 	struct g_geom *gp;
263 	struct g_provider *pp;
264 	struct g_consumer *cp;
265 	struct g_vfs_softc *sc;
266 	struct bufobj *bo;
267 	int error;
268 
269 	g_topology_assert();
270 
271 	*cpp = NULL;
272 	bo = &vp->v_bufobj;
273 	if (bo->bo_private != vp)
274 		return (EBUSY);
275 
276 	pp = g_dev_getprovider(vp->v_rdev);
277 	if (pp == NULL)
278 		return (ENOENT);
279 	gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
280 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
281 	mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
282 	sc->sc_bo = bo;
283 	gp->softc = sc;
284 	cp = g_new_consumer(gp);
285 	error = g_attach(cp, pp);
286 	if (error) {
287 		g_wither_geom(gp, ENXIO);
288 		return (error);
289 	}
290 	error = g_access(cp, 1, wr, wr);
291 	if (error) {
292 		g_wither_geom(gp, ENXIO);
293 		return (error);
294 	}
295 	/*
296 	 * Mediasize might not be set until first access (see g_disk_access()),
297 	 * That's why we check it here and not earlier.
298 	 */
299 	if (pp->mediasize == 0) {
300 		(void)g_access(cp, -1, -wr, -wr);
301 		g_wither_geom(gp, ENXIO);
302 		return (ENXIO);
303 	}
304 	vnode_create_disk_vobject(vp, pp->mediasize, curthread);
305 	*cpp = cp;
306 	cp->private = vp;
307 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
308 	bo->bo_ops = g_vfs_bufops;
309 	bo->bo_private = cp;
310 	bo->bo_bsize = pp->sectorsize;
311 
312 	return (error);
313 }
314 
315 void
316 g_vfs_close(struct g_consumer *cp)
317 {
318 	struct g_geom *gp;
319 	struct g_vfs_softc *sc;
320 
321 	g_topology_assert();
322 
323 	gp = cp->geom;
324 	sc = gp->softc;
325 	bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
326 	sc->sc_bo->bo_private = cp->private;
327 	gp->softc = NULL;
328 	mtx_destroy(&sc->sc_mtx);
329 	if (!sc->sc_orphaned || cp->provider == NULL)
330 		g_wither_geom_close(gp, ENXIO);
331 	KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc));
332 	g_free(sc);
333 }
334