xref: /freebsd/sys/geom/vinum/geom_vinum_volume.c (revision 721351876cd4d3a8a700f62d2061331fa951a488)
1 /*-
2  * Copyright (c) 2004 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/kthread.h>
35 #include <sys/libkern.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/systm.h>
41 
42 #include <geom/geom.h>
43 #include <geom/vinum/geom_vinum_var.h>
44 #include <geom/vinum/geom_vinum.h>
45 
46 static void gv_vol_completed_request(struct gv_volume *, struct bio *);
47 static void gv_vol_normal_request(struct gv_volume *, struct bio *);
48 
49 static void
50 gv_volume_orphan(struct g_consumer *cp)
51 {
52 	struct g_geom *gp;
53 	struct gv_volume *v;
54 	int error;
55 
56 	g_topology_assert();
57 	gp = cp->geom;
58 	g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
59 	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
60 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
61 	error = cp->provider->error;
62 	if (error == 0)
63 		error = ENXIO;
64 	g_detach(cp);
65 	g_destroy_consumer(cp);
66 	if (!LIST_EMPTY(&gp->consumer))
67 		return;
68 	v = gp->softc;
69 	if (v != NULL) {
70 		gv_kill_vol_thread(v);
71 		v->geom = NULL;
72 	}
73 	gp->softc = NULL;
74 	g_wither_geom(gp, error);
75 }
76 
77 /* We end up here after the requests to our plexes are done. */
78 static void
79 gv_volume_done(struct bio *bp)
80 {
81 	struct gv_volume *v;
82 
83 	v = bp->bio_from->geom->softc;
84 	bp->bio_cflags |= GV_BIO_DONE;
85 	mtx_lock(&v->bqueue_mtx);
86 	bioq_insert_tail(v->bqueue, bp);
87 	wakeup(v);
88 	mtx_unlock(&v->bqueue_mtx);
89 }
90 
91 static void
92 gv_volume_start(struct bio *bp)
93 {
94 	struct gv_volume *v;
95 
96 	switch(bp->bio_cmd) {
97 	case BIO_READ:
98 	case BIO_WRITE:
99 	case BIO_DELETE:
100 		break;
101 	case BIO_GETATTR:
102 	default:
103 		g_io_deliver(bp, EOPNOTSUPP);
104 		return;
105 	}
106 
107 	v = bp->bio_to->geom->softc;
108 	if (v->state != GV_VOL_UP) {
109 		g_io_deliver(bp, ENXIO);
110 		return;
111 	}
112 
113 	mtx_lock(&v->bqueue_mtx);
114 	bioq_disksort(v->bqueue, bp);
115 	wakeup(v);
116 	mtx_unlock(&v->bqueue_mtx);
117 }
118 
119 static void
120 gv_vol_worker(void *arg)
121 {
122 	struct bio *bp;
123 	struct gv_volume *v;
124 
125 	v = arg;
126 	KASSERT(v != NULL, ("NULL v"));
127 	mtx_lock(&v->bqueue_mtx);
128 	for (;;) {
129 		/* We were signaled to exit. */
130 		if (v->flags & GV_VOL_THREAD_DIE)
131 			break;
132 
133 		/* Take the first BIO from our queue. */
134 		bp = bioq_takefirst(v->bqueue);
135 		if (bp == NULL) {
136 			msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
137 			continue;
138 		}
139 		mtx_unlock(&v->bqueue_mtx);
140 
141 		if (bp->bio_cflags & GV_BIO_DONE)
142 			gv_vol_completed_request(v, bp);
143 		else
144 			gv_vol_normal_request(v, bp);
145 
146 		mtx_lock(&v->bqueue_mtx);
147 	}
148 	mtx_unlock(&v->bqueue_mtx);
149 	v->flags |= GV_VOL_THREAD_DEAD;
150 	wakeup(v);
151 
152 	kproc_exit(ENXIO);
153 }
154 
155 static void
156 gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
157 {
158 	struct bio *pbp;
159 	struct g_geom *gp;
160 	struct g_consumer *cp, *cp2;
161 
162 	pbp = bp->bio_parent;
163 
164 	if (pbp->bio_error == 0)
165 		pbp->bio_error = bp->bio_error;
166 
167 	switch (pbp->bio_cmd) {
168 	case BIO_READ:
169 		if (bp->bio_error == 0)
170 			break;
171 
172 		if (pbp->bio_cflags & GV_BIO_RETRY)
173 			break;
174 
175 		/* Check if we have another plex left. */
176 		cp = bp->bio_from;
177 		gp = cp->geom;
178 		cp2 = LIST_NEXT(cp, consumer);
179 		if (cp2 == NULL)
180 			break;
181 
182 		if (LIST_NEXT(cp2, consumer) == NULL)
183 			pbp->bio_cflags |= GV_BIO_RETRY;
184 
185 		g_destroy_bio(bp);
186 		pbp->bio_children--;
187 		mtx_lock(&v->bqueue_mtx);
188 		bioq_disksort(v->bqueue, pbp);
189 		mtx_unlock(&v->bqueue_mtx);
190 		return;
191 
192 	case BIO_WRITE:
193 	case BIO_DELETE:
194 		/* Remember if this write request succeeded. */
195 		if (bp->bio_error == 0)
196 			pbp->bio_cflags |= GV_BIO_SUCCEED;
197 		break;
198 	}
199 
200 	/* When the original request is finished, we deliver it. */
201 	pbp->bio_inbed++;
202 	if (pbp->bio_inbed == pbp->bio_children) {
203 		if (pbp->bio_cflags & GV_BIO_SUCCEED)
204 			pbp->bio_error = 0;
205 		pbp->bio_completed = bp->bio_length;
206 		g_io_deliver(pbp, pbp->bio_error);
207 	}
208 
209 	g_destroy_bio(bp);
210 }
211 
212 static void
213 gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
214 {
215 	struct bio_queue_head queue;
216 	struct g_geom *gp;
217 	struct gv_plex *p, *lp;
218 	struct bio *cbp;
219 
220 	gp = v->geom;
221 
222 	switch (bp->bio_cmd) {
223 	case BIO_READ:
224 		cbp = g_clone_bio(bp);
225 		if (cbp == NULL) {
226 			g_io_deliver(bp, ENOMEM);
227 			return;
228 		}
229 		cbp->bio_done = gv_volume_done;
230 		/*
231 		 * Try to find a good plex where we can send the request to.
232 		 * The plex either has to be up, or it's a degraded RAID5 plex.
233 		 */
234 		lp = v->last_read_plex;
235 		if (lp == NULL)
236 			lp = LIST_FIRST(&v->plexes);
237 		p = LIST_NEXT(lp, in_volume);
238 		do {
239 			if (p == NULL)
240 				p = LIST_FIRST(&v->plexes);
241 			if ((p->state > GV_PLEX_DEGRADED) ||
242 			    (p->state >= GV_PLEX_DEGRADED &&
243 			    p->org == GV_PLEX_RAID5))
244 				break;
245 			p = LIST_NEXT(p, in_volume);
246 		} while (p != lp);
247 
248 		if (p == NULL ||
249 		    (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
250 		    (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
251 			g_destroy_bio(cbp);
252 			bp->bio_children--;
253 			g_io_deliver(bp, ENXIO);
254 			return;
255 		}
256 		g_io_request(cbp, p->consumer);
257 		v->last_read_plex = p;
258 
259 		break;
260 
261 	case BIO_WRITE:
262 	case BIO_DELETE:
263 		bioq_init(&queue);
264 		LIST_FOREACH(p, &v->plexes, in_volume) {
265 			if (p->state < GV_PLEX_DEGRADED)
266 				continue;
267 			cbp = g_clone_bio(bp);
268 			if (cbp == NULL) {
269 				for (cbp = bioq_first(&queue); cbp != NULL;
270 				    cbp = bioq_first(&queue)) {
271 					bioq_remove(&queue, cbp);
272 					g_destroy_bio(cbp);
273 				}
274 				if (bp->bio_error == 0)
275 					bp->bio_error = ENOMEM;
276 				g_io_deliver(bp, bp->bio_error);
277 				return;
278 			}
279 			bioq_insert_tail(&queue, cbp);
280 			cbp->bio_done = gv_volume_done;
281 			cbp->bio_caller1 = p->consumer;
282 		}
283 		/* Fire off all sub-requests. */
284 		for (cbp = bioq_first(&queue); cbp != NULL;
285 		     cbp = bioq_first(&queue)) {
286 			bioq_remove(&queue, cbp);
287 			g_io_request(cbp, cbp->bio_caller1);
288 		}
289 		break;
290 	}
291 }
292 
293 static int
294 gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
295 {
296 	struct g_geom *gp;
297 	struct g_consumer *cp, *cp2;
298 	int error;
299 
300 	gp = pp->geom;
301 
302 	error = ENXIO;
303 	LIST_FOREACH(cp, &gp->consumer, consumer) {
304 		error = g_access(cp, dr, dw, de);
305 		if (error) {
306 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
307 				if (cp == cp2)
308 					break;
309 				g_access(cp2, -dr, -dw, -de);
310 			}
311 			return (error);
312 		}
313 	}
314 	return (error);
315 }
316 
317 static struct g_geom *
318 gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
319 {
320 	struct g_geom *gp;
321 	struct g_provider *pp2;
322 	struct g_consumer *cp, *ocp;
323 	struct gv_softc *sc;
324 	struct gv_volume *v;
325 	struct gv_plex *p;
326 	int error, first;
327 
328 	g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
329 	g_topology_assert();
330 
331 	/* First, find the VINUM class and its associated geom. */
332 	gp = find_vinum_geom();
333 	if (gp == NULL)
334 		return (NULL);
335 
336 	sc = gp->softc;
337 	KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
338 
339 	gp = pp->geom;
340 
341 	/* We only want to attach to plexes. */
342 	if (strcmp(gp->class->name, "VINUMPLEX"))
343 		return (NULL);
344 
345 	first = 0;
346 	p = gp->softc;
347 
348 	/* Let's see if the volume this plex wants is already configured. */
349 	v = gv_find_vol(sc, p->volume);
350 	if (v == NULL)
351 		return (NULL);
352 	if (v->geom == NULL) {
353 		gp = g_new_geomf(mp, "%s", p->volume);
354 		gp->start = gv_volume_start;
355 		gp->orphan = gv_volume_orphan;
356 		gp->access = gv_volume_access;
357 		gp->softc = v;
358 		first++;
359 	} else
360 		gp = v->geom;
361 
362 	/* Create bio queue, queue mutex, and worker thread, if necessary. */
363 	if (v->bqueue == NULL) {
364 		v->bqueue = g_malloc(sizeof(struct bio_queue_head),
365 		    M_WAITOK | M_ZERO);
366 		bioq_init(v->bqueue);
367 	}
368 	if (mtx_initialized(&v->bqueue_mtx) == 0)
369 		mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
370 
371 	if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
372 		kproc_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
373 		    v->name);
374 		v->flags |= GV_VOL_THREAD_ACTIVE;
375 	}
376 
377 	/*
378 	 * Create a new consumer and attach it to the plex geom.  Since this
379 	 * volume might already have a plex attached, we need to adjust the
380 	 * access counts of the new consumer.
381 	 */
382 	ocp = LIST_FIRST(&gp->consumer);
383 	cp = g_new_consumer(gp);
384 	g_attach(cp, pp);
385 	if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
386 		error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
387 		if (error) {
388 			printf("GEOM_VINUM: failed g_access %s -> %s; "
389 			    "errno %d\n", v->name, p->name, error);
390 			g_detach(cp);
391 			g_destroy_consumer(cp);
392 			if (first)
393 				g_destroy_geom(gp);
394 			return (NULL);
395 		}
396 	}
397 
398 	p->consumer = cp;
399 
400 	if (p->vol_sc != v) {
401 		p->vol_sc = v;
402 		v->plexcount++;
403 		LIST_INSERT_HEAD(&v->plexes, p, in_volume);
404 	}
405 
406 	/* We need to setup a new VINUMVOLUME geom. */
407 	if (first) {
408 		pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
409 		pp2->mediasize = pp->mediasize;
410 		pp2->sectorsize = pp->sectorsize;
411 		g_error_provider(pp2, 0);
412 		v->size = pp2->mediasize;
413 		v->geom = gp;
414 		return (gp);
415 	}
416 
417 	return (NULL);
418 }
419 
420 static int
421 gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
422     struct g_geom *gp)
423 {
424 	struct gv_volume *v;
425 
426 	g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
427 	g_topology_assert();
428 
429 	v = gp->softc;
430 	gv_kill_vol_thread(v);
431 	g_wither_geom(gp, ENXIO);
432 	return (0);
433 }
434 
435 #define	VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
436 
437 static struct g_class g_vinum_volume_class = {
438 	.name = VINUMVOLUME_CLASS_NAME,
439 	.version = G_VERSION,
440 	.taste = gv_volume_taste,
441 	.destroy_geom = gv_volume_destroy_geom,
442 };
443 
444 DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);
445