xref: /freebsd/sys/geom/vinum/geom_vinum_plex.c (revision 9162f64b58d01ec01481d60b6cdc06ffd8e8c7fc)
1 /*-
2  * Copyright (c) 2004 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/libkern.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/mutex.h>
39 #include <sys/systm.h>
40 
41 #include <geom/geom.h>
42 #include <geom/vinum/geom_vinum_var.h>
43 #include <geom/vinum/geom_vinum_raid5.h>
44 #include <geom/vinum/geom_vinum.h>
45 
46 static void gv_plex_completed_request(struct gv_plex *, struct bio *);
47 static void gv_plex_normal_request(struct gv_plex *, struct bio *);
48 static void gv_plex_worker(void *);
49 static int gv_check_parity(struct gv_plex *, struct bio *,
50     struct gv_raid5_packet *);
51 static int gv_normal_parity(struct gv_plex *, struct bio *,
52     struct gv_raid5_packet *);
53 
54 /* XXX: is this the place to catch dying subdisks? */
55 static void
56 gv_plex_orphan(struct g_consumer *cp)
57 {
58 	struct g_geom *gp;
59 	struct gv_plex *p;
60 	int error;
61 
62 	g_topology_assert();
63 	gp = cp->geom;
64 	g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
65 
66 	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
67 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
68 	error = cp->provider->error;
69 	if (error == 0)
70 		error = ENXIO;
71 	g_detach(cp);
72 	g_destroy_consumer(cp);
73 	if (!LIST_EMPTY(&gp->consumer))
74 		return;
75 
76 	p = gp->softc;
77 	if (p != NULL) {
78 		gv_kill_plex_thread(p);
79 		p->geom = NULL;
80 		p->provider = NULL;
81 		p->consumer = NULL;
82 	}
83 	gp->softc = NULL;
84 	g_wither_geom(gp, error);
85 }
86 
87 void
88 gv_plex_done(struct bio *bp)
89 {
90 	struct gv_plex *p;
91 
92 	p = bp->bio_from->geom->softc;
93 	bp->bio_cflags |= GV_BIO_DONE;
94 	mtx_lock(&p->bqueue_mtx);
95 	bioq_insert_tail(p->bqueue, bp);
96 	wakeup(p);
97 	mtx_unlock(&p->bqueue_mtx);
98 }
99 
100 /* Find the correct subdisk to send the bio to and build a bio to send. */
101 static int
102 gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
103 {
104 	struct g_geom *gp;
105 	struct gv_sd *s;
106 	struct bio *cbp, *pbp;
107 	int i, sdno;
108 	off_t len_left, real_len, real_off;
109 	off_t stripeend, stripeno, stripestart;
110 
111 	if (p == NULL || LIST_EMPTY(&p->subdisks))
112 		return (ENXIO);
113 
114 	s = NULL;
115 	gp = bp->bio_to->geom;
116 
117 	/*
118 	 * We only handle concatenated and striped plexes here.  RAID5 plexes
119 	 * are handled in build_raid5_request().
120 	 */
121 	switch (p->org) {
122 	case GV_PLEX_CONCAT:
123 		/*
124 		 * Find the subdisk where this request starts.  The subdisks in
125 		 * this list must be ordered by plex_offset.
126 		 */
127 		LIST_FOREACH(s, &p->subdisks, in_plex) {
128 			if (s->plex_offset <= boff &&
129 			    s->plex_offset + s->size > boff)
130 				break;
131 		}
132 		/* Subdisk not found. */
133 		if (s == NULL)
134 			return (ENXIO);
135 
136 		/* Calculate corresponding offsets on disk. */
137 		real_off = boff - s->plex_offset;
138 		len_left = s->size - real_off;
139 		real_len = (bcount > len_left) ? len_left : bcount;
140 		break;
141 
142 	case GV_PLEX_STRIPED:
143 		/* The number of the stripe where the request starts. */
144 		stripeno = boff / p->stripesize;
145 
146 		/* The number of the subdisk where the stripe resides. */
147 		sdno = stripeno % p->sdcount;
148 
149 		/* Find the right subdisk. */
150 		i = 0;
151 		LIST_FOREACH(s, &p->subdisks, in_plex) {
152 			if (i == sdno)
153 				break;
154 			i++;
155 		}
156 
157 		/* Subdisk not found. */
158 		if (s == NULL)
159 			return (ENXIO);
160 
161 		/* The offset of the stripe from the start of the subdisk. */
162 		stripestart = (stripeno / p->sdcount) *
163 		    p->stripesize;
164 
165 		/* The offset at the end of the stripe. */
166 		stripeend = stripestart + p->stripesize;
167 
168 		/* The offset of the request on this subdisk. */
169 		real_off = boff - (stripeno * p->stripesize) +
170 		    stripestart;
171 
172 		/* The length left in this stripe. */
173 		len_left = stripeend - real_off;
174 
175 		real_len = (bcount <= len_left) ? bcount : len_left;
176 		break;
177 
178 	default:
179 		return (EINVAL);
180 	}
181 
182 	/* Now check if we can handle the request on this subdisk. */
183 	switch (s->state) {
184 	case GV_SD_UP:
185 		/* If the subdisk is up, just continue. */
186 		break;
187 
188 	case GV_SD_STALE:
189 		if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
190 			return (ENXIO);
191 
192 		G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
193 		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
194 		break;
195 
196 	case GV_SD_INITIALIZING:
197 		if (bp->bio_cmd == BIO_READ)
198 			return (ENXIO);
199 		break;
200 
201 	default:
202 		/* All other subdisk states mean it's not accessible. */
203 		return (ENXIO);
204 	}
205 
206 	/* Clone the bio and adjust the offsets and sizes. */
207 	cbp = g_clone_bio(bp);
208 	if (cbp == NULL)
209 		return (ENOMEM);
210 	cbp->bio_offset = real_off;
211 	cbp->bio_length = real_len;
212 	cbp->bio_data = addr;
213 	cbp->bio_done = g_std_done;
214 	cbp->bio_caller2 = s->consumer;
215 	if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
216 		cbp->bio_cflags |= GV_BIO_SYNCREQ;
217 		cbp->bio_done = gv_plex_done;
218 	}
219 
220 	if (bp->bio_driver1 == NULL) {
221 		bp->bio_driver1 = cbp;
222 	} else {
223 		pbp = bp->bio_driver1;
224 		while (pbp->bio_caller1 != NULL)
225 			pbp = pbp->bio_caller1;
226 		pbp->bio_caller1 = cbp;
227 	}
228 
229 	return (0);
230 }
231 
232 static void
233 gv_plex_start(struct bio *bp)
234 {
235 	struct gv_plex *p;
236 
237 	switch(bp->bio_cmd) {
238 	case BIO_READ:
239 	case BIO_WRITE:
240 	case BIO_DELETE:
241 		break;
242 	case BIO_GETATTR:
243 	default:
244 		g_io_deliver(bp, EOPNOTSUPP);
245 		return;
246 	}
247 
248 	/*
249 	 * We cannot handle this request if too many of our subdisks are
250 	 * inaccessible.
251 	 */
252 	p = bp->bio_to->geom->softc;
253 	if ((p->state < GV_PLEX_DEGRADED) &&
254 	    !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
255 		g_io_deliver(bp, ENXIO);
256 		return;
257 	}
258 
259 	mtx_lock(&p->bqueue_mtx);
260 	bioq_disksort(p->bqueue, bp);
261 	wakeup(p);
262 	mtx_unlock(&p->bqueue_mtx);
263 }
264 
265 static void
266 gv_plex_worker(void *arg)
267 {
268 	struct bio *bp;
269 	struct gv_plex *p;
270 	struct gv_sd *s;
271 
272 	p = arg;
273 	KASSERT(p != NULL, ("NULL p"));
274 
275 	mtx_lock(&p->bqueue_mtx);
276 	for (;;) {
277 		/* We were signaled to exit. */
278 		if (p->flags & GV_PLEX_THREAD_DIE)
279 			break;
280 
281 		/* Take the first BIO from our queue. */
282 		bp = bioq_takefirst(p->bqueue);
283 		if (bp == NULL) {
284 			msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
285 			continue;
286 		}
287 		mtx_unlock(&p->bqueue_mtx);
288 
289 		/* A completed request. */
290 		if (bp->bio_cflags & GV_BIO_DONE) {
291 			if (bp->bio_cflags & GV_BIO_SYNCREQ ||
292 			    bp->bio_cflags & GV_BIO_REBUILD) {
293 				s = bp->bio_to->private;
294 				if (bp->bio_error == 0)
295 					s->initialized += bp->bio_length;
296 				if (s->initialized >= s->size) {
297 					g_topology_lock();
298 					gv_set_sd_state(s, GV_SD_UP,
299 					    GV_SETSTATE_CONFIG);
300 					g_topology_unlock();
301 					s->initialized = 0;
302 				}
303 			}
304 
305 			if (bp->bio_cflags & GV_BIO_SYNCREQ)
306 				g_std_done(bp);
307 			else
308 				gv_plex_completed_request(p, bp);
309 		/*
310 		 * A sub-request that was hold back because it interfered with
311 		 * another sub-request.
312 		 */
313 		} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
314 			/* Is it still locked out? */
315 			if (gv_stripe_active(p, bp)) {
316 				/* Park the bio on the waiting queue. */
317 				mtx_lock(&p->bqueue_mtx);
318 				bioq_disksort(p->wqueue, bp);
319 				mtx_unlock(&p->bqueue_mtx);
320 			} else {
321 				bp->bio_cflags &= ~GV_BIO_ONHOLD;
322 				g_io_request(bp, bp->bio_caller2);
323 			}
324 
325 		/* A normal request to this plex. */
326 		} else
327 			gv_plex_normal_request(p, bp);
328 
329 		mtx_lock(&p->bqueue_mtx);
330 	}
331 	mtx_unlock(&p->bqueue_mtx);
332 	p->flags |= GV_PLEX_THREAD_DEAD;
333 	wakeup(p);
334 
335 	kproc_exit(ENXIO);
336 }
337 
338 static int
339 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
340 {
341 	struct bio *cbp, *pbp;
342 	int finished, i;
343 
344 	finished = 1;
345 
346 	if (wp->waiting != NULL) {
347 		pbp = wp->waiting;
348 		wp->waiting = NULL;
349 		cbp = wp->parity;
350 		for (i = 0; i < wp->length; i++)
351 			cbp->bio_data[i] ^= pbp->bio_data[i];
352 		g_io_request(pbp, pbp->bio_caller2);
353 		finished = 0;
354 
355 	} else if (wp->parity != NULL) {
356 		cbp = wp->parity;
357 		wp->parity = NULL;
358 		g_io_request(cbp, cbp->bio_caller2);
359 		finished = 0;
360 	}
361 
362 	return (finished);
363 }
364 
365 static int
366 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
367 {
368 	struct bio *pbp;
369 	int err, finished, i;
370 
371 	err = 0;
372 	finished = 1;
373 
374 	if (wp->waiting != NULL) {
375 		pbp = wp->waiting;
376 		wp->waiting = NULL;
377 		g_io_request(pbp, pbp->bio_caller2);
378 		finished = 0;
379 
380 	} else if (wp->parity != NULL) {
381 		pbp = wp->parity;
382 		wp->parity = NULL;
383 
384 		/* Check if the parity is correct. */
385 		for (i = 0; i < wp->length; i++) {
386 			if (bp->bio_data[i] != pbp->bio_data[i]) {
387 				err = 1;
388 				break;
389 			}
390 		}
391 
392 		/* The parity is not correct... */
393 		if (err) {
394 			bp->bio_parent->bio_error = EAGAIN;
395 
396 			/* ... but we rebuild it. */
397 			if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
398 				g_io_request(pbp, pbp->bio_caller2);
399 				finished = 0;
400 			}
401 		}
402 
403 		/*
404 		 * Clean up the BIO we would have used for rebuilding the
405 		 * parity.
406 		 */
407 		if (finished) {
408 			bp->bio_parent->bio_inbed++;
409 			g_destroy_bio(pbp);
410 		}
411 
412 	}
413 
414 	return (finished);
415 }
416 
417 void
418 gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
419 {
420 	struct bio *cbp, *pbp;
421 	struct gv_bioq *bq, *bq2;
422 	struct gv_raid5_packet *wp;
423 	int i;
424 
425 	wp = bp->bio_driver1;
426 
427 	switch (bp->bio_parent->bio_cmd) {
428 	case BIO_READ:
429 		if (wp == NULL)
430 			break;
431 
432 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
433 			if (bq->bp == bp) {
434 				TAILQ_REMOVE(&wp->bits, bq, queue);
435 				g_free(bq);
436 				for (i = 0; i < wp->length; i++)
437 					wp->data[i] ^= bp->bio_data[i];
438 				break;
439 			}
440 		}
441 		if (TAILQ_EMPTY(&wp->bits)) {
442 			bp->bio_parent->bio_completed += wp->length;
443 			if (wp->lockbase != -1) {
444 				TAILQ_REMOVE(&p->packets, wp, list);
445 				/* Bring the waiting bios back into the game. */
446 				mtx_lock(&p->bqueue_mtx);
447 				pbp = bioq_takefirst(p->wqueue);
448 				while (pbp != NULL) {
449 					bioq_disksort(p->bqueue, pbp);
450 					pbp = bioq_takefirst(p->wqueue);
451 				}
452 				mtx_unlock(&p->bqueue_mtx);
453 			}
454 			g_free(wp);
455 		}
456 
457 		break;
458 
459  	case BIO_WRITE:
460 		if (wp == NULL)
461 			break;
462 
463 		/* Check if we need to handle parity data. */
464 		TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
465 			if (bq->bp == bp) {
466 				TAILQ_REMOVE(&wp->bits, bq, queue);
467 				g_free(bq);
468 				cbp = wp->parity;
469 				if (cbp != NULL) {
470 					for (i = 0; i < wp->length; i++)
471 						cbp->bio_data[i] ^=
472 						    bp->bio_data[i];
473 				}
474 				break;
475 			}
476 		}
477 
478 		/* Handle parity data. */
479 		if (TAILQ_EMPTY(&wp->bits)) {
480 			if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
481 				i = gv_check_parity(p, bp, wp);
482 			else
483 				i = gv_normal_parity(p, bp, wp);
484 
485 			/* All of our sub-requests have finished. */
486 			if (i) {
487 				bp->bio_parent->bio_completed += wp->length;
488 				TAILQ_REMOVE(&p->packets, wp, list);
489 				/* Bring the waiting bios back into the game. */
490 				mtx_lock(&p->bqueue_mtx);
491 				pbp = bioq_takefirst(p->wqueue);
492 				while (pbp != NULL) {
493 					bioq_disksort(p->bqueue, pbp);
494 					pbp = bioq_takefirst(p->wqueue);
495 				}
496 				mtx_unlock(&p->bqueue_mtx);
497 				g_free(wp);
498 			}
499 		}
500 
501 		break;
502 	}
503 
504 	pbp = bp->bio_parent;
505 	if (pbp->bio_error == 0)
506 		pbp->bio_error = bp->bio_error;
507 
508 	/* When the original request is finished, we deliver it. */
509 	pbp->bio_inbed++;
510 	if (pbp->bio_inbed == pbp->bio_children)
511 		g_io_deliver(pbp, pbp->bio_error);
512 
513 	/* Clean up what we allocated. */
514 	if (bp->bio_cflags & GV_BIO_MALLOC)
515 		g_free(bp->bio_data);
516 	g_destroy_bio(bp);
517 }
518 
519 void
520 gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
521 {
522 	struct bio *cbp, *pbp;
523 	struct gv_bioq *bq, *bq2;
524 	struct gv_raid5_packet *wp, *wp2;
525 	caddr_t addr;
526 	off_t bcount, boff;
527 	int err;
528 
529 	bcount = bp->bio_length;
530 	addr = bp->bio_data;
531 	boff = bp->bio_offset;
532 
533 	/* Walk over the whole length of the request, we might split it up. */
534 	while (bcount > 0) {
535 		wp = NULL;
536 
537  		/*
538 		 * RAID5 plexes need special treatment, as a single write
539 		 * request involves several read/write sub-requests.
540  		 */
541 		if (p->org == GV_PLEX_RAID5) {
542 			wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
543 			wp->bio = bp;
544 			TAILQ_INIT(&wp->bits);
545 
546 			if (bp->bio_cflags & GV_BIO_REBUILD)
547 				err = gv_rebuild_raid5(p, wp, bp, addr,
548 				    boff, bcount);
549 			else if (bp->bio_cflags & GV_BIO_CHECK)
550 				err = gv_check_raid5(p, wp, bp, addr,
551 				    boff, bcount);
552 			else
553 				err = gv_build_raid5_req(p, wp, bp, addr,
554 				    boff, bcount);
555 
556  			/*
557 			 * Building the sub-request failed, we probably need to
558 			 * clean up a lot.
559  			 */
560  			if (err) {
561 				G_VINUM_LOGREQ(0, bp, "plex request failed.");
562 				TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
563 					TAILQ_REMOVE(&wp->bits, bq, queue);
564 					g_free(bq);
565 				}
566 				if (wp->waiting != NULL) {
567 					if (wp->waiting->bio_cflags &
568 					    GV_BIO_MALLOC)
569 						g_free(wp->waiting->bio_data);
570 					g_destroy_bio(wp->waiting);
571 				}
572 				if (wp->parity != NULL) {
573 					if (wp->parity->bio_cflags &
574 					    GV_BIO_MALLOC)
575 						g_free(wp->parity->bio_data);
576 					g_destroy_bio(wp->parity);
577 				}
578 				g_free(wp);
579 
580 				TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
581 					if (wp->bio == bp) {
582 						TAILQ_REMOVE(&p->packets, wp,
583 						    list);
584 						TAILQ_FOREACH_SAFE(bq,
585 						    &wp->bits, queue, bq2) {
586 							TAILQ_REMOVE(&wp->bits,
587 							    bq, queue);
588 							g_free(bq);
589 						}
590 						g_free(wp);
591 					}
592 				}
593 
594 				cbp = bp->bio_driver1;
595 				while (cbp != NULL) {
596 					pbp = cbp->bio_caller1;
597 					if (cbp->bio_cflags & GV_BIO_MALLOC)
598 						g_free(cbp->bio_data);
599 					g_destroy_bio(cbp);
600 					cbp = pbp;
601 				}
602 
603 				g_io_deliver(bp, err);
604  				return;
605  			}
606 
607 			if (TAILQ_EMPTY(&wp->bits))
608 				g_free(wp);
609 			else if (wp->lockbase != -1)
610 				TAILQ_INSERT_TAIL(&p->packets, wp, list);
611 
612 		/*
613 		 * Requests to concatenated and striped plexes go straight
614 		 * through.
615 		 */
616 		} else {
617 			err = gv_plexbuffer(p, bp, addr, boff, bcount);
618 
619 			/* Building the sub-request failed. */
620 			if (err) {
621 				G_VINUM_LOGREQ(0, bp, "plex request failed.");
622 				cbp = bp->bio_driver1;
623 				while (cbp != NULL) {
624 					pbp = cbp->bio_caller1;
625 					g_destroy_bio(cbp);
626 					cbp = pbp;
627 				}
628 				g_io_deliver(bp, err);
629 				return;
630 			}
631 		}
632 
633 		/* Abuse bio_caller1 as linked list. */
634 		pbp = bp->bio_driver1;
635 		while (pbp->bio_caller1 != NULL)
636 			pbp = pbp->bio_caller1;
637 		bcount -= pbp->bio_length;
638 		addr += pbp->bio_length;
639 		boff += pbp->bio_length;
640 	}
641 
642 	/* Fire off all sub-requests. */
643 	pbp = bp->bio_driver1;
644 	while (pbp != NULL) {
645 		/*
646 		 * RAID5 sub-requests need to come in correct order, otherwise
647 		 * we trip over the parity, as it might be overwritten by
648 		 * another sub-request.
649 		 */
650 		if (pbp->bio_driver1 != NULL &&
651 		    gv_stripe_active(p, pbp)) {
652 			/* Park the bio on the waiting queue. */
653 			pbp->bio_cflags |= GV_BIO_ONHOLD;
654 			mtx_lock(&p->bqueue_mtx);
655 			bioq_disksort(p->wqueue, pbp);
656 			mtx_unlock(&p->bqueue_mtx);
657 		} else
658 			g_io_request(pbp, pbp->bio_caller2);
659 		pbp = pbp->bio_caller1;
660 	}
661 }
662 
663 static int
664 gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
665 {
666 	struct gv_plex *p;
667 	struct g_geom *gp;
668 	struct g_consumer *cp, *cp2;
669 	int error;
670 
671 	gp = pp->geom;
672 	p = gp->softc;
673 	KASSERT(p != NULL, ("NULL p"));
674 
675 	if (p->org == GV_PLEX_RAID5) {
676 		if (dw > 0 && dr == 0)
677 			dr = 1;
678 		else if (dw < 0 && dr == 0)
679 			dr = -1;
680 	}
681 
682 	LIST_FOREACH(cp, &gp->consumer, consumer) {
683 		error = g_access(cp, dr, dw, de);
684 		if (error) {
685 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
686 				if (cp == cp2)
687 					break;
688 				g_access(cp2, -dr, -dw, -de);
689 			}
690 			return (error);
691 		}
692 	}
693 	return (0);
694 }
695 
696 static struct g_geom *
697 gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
698 {
699 	struct g_geom *gp;
700 	struct g_consumer *cp, *cp2;
701 	struct g_provider *pp2;
702 	struct gv_plex *p;
703 	struct gv_sd *s;
704 	struct gv_softc *sc;
705 	int error;
706 
707 	g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
708 	g_topology_assert();
709 
710 	/* We only want to attach to subdisks. */
711 	if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
712 		return (NULL);
713 
714 	/* Find the VINUM class and its associated geom. */
715 	gp = find_vinum_geom();
716 	if (gp == NULL)
717 		return (NULL);
718 	sc = gp->softc;
719 	KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
720 
721 	/* Find out which subdisk the offered provider corresponds to. */
722 	s = pp->private;
723 	KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
724 
725 	/* Now find the correct plex where this subdisk belongs to. */
726 	p = gv_find_plex(sc, s->plex);
727 	if (p == NULL) {
728 		G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name);
729 		return (NULL);
730 	}
731 
732 	/*
733 	 * Add this subdisk to this plex.  Since we trust the on-disk
734 	 * configuration, we don't check the given value (should we?).
735 	 * XXX: shouldn't be done here
736 	 */
737 	gv_sd_to_plex(p, s, 0);
738 
739 	/* Now check if there's already a geom for this plex. */
740 	gp = p->geom;
741 
742 	/* Yes, there is already a geom, so we just add the consumer. */
743 	if (gp != NULL) {
744 		cp2 = LIST_FIRST(&gp->consumer);
745 		/* Need to attach a new consumer to this subdisk. */
746 		cp = g_new_consumer(gp);
747 		error = g_attach(cp, pp);
748 		if (error) {
749 			G_VINUM_DEBUG(0, "unable to attach consumer to %s",
750 			    pp->name);
751 			g_destroy_consumer(cp);
752 			return (NULL);
753 		}
754 		/* Adjust the access counts of the new consumer. */
755 		if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
756 			error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
757 			if (error) {
758 				G_VINUM_DEBUG(0, "unable to set access counts"
759 				    " for consumer on %s", pp->name);
760 				g_detach(cp);
761 				g_destroy_consumer(cp);
762 				return (NULL);
763 			}
764 		}
765 		s->consumer = cp;
766 
767 		/* Adjust the size of the providers this plex has. */
768 		LIST_FOREACH(pp2, &gp->provider, provider)
769 			pp2->mediasize = p->size;
770 
771 		/* Update the size of the volume this plex is attached to. */
772 		if (p->vol_sc != NULL)
773 			gv_update_vol_size(p->vol_sc, p->size);
774 
775 		/*
776 		 * If necessary, create bio queues, queue mutex and a worker
777 		 * thread.
778 		 */
779 		if (p->bqueue == NULL) {
780 			p->bqueue = g_malloc(sizeof(struct bio_queue_head),
781 			    M_WAITOK | M_ZERO);
782 			bioq_init(p->bqueue);
783 		}
784 		if (p->wqueue == NULL) {
785 			p->wqueue = g_malloc(sizeof(struct bio_queue_head),
786 			    M_WAITOK | M_ZERO);
787 			bioq_init(p->wqueue);
788 		}
789 		if (mtx_initialized(&p->bqueue_mtx) == 0)
790 			mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
791 		if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
792 			kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
793 			    p->name);
794 			p->flags |= GV_PLEX_THREAD_ACTIVE;
795 		}
796 
797 		return (NULL);
798 
799 	/* We need to create a new geom. */
800 	} else {
801 		gp = g_new_geomf(mp, "%s", p->name);
802 		gp->start = gv_plex_start;
803 		gp->orphan = gv_plex_orphan;
804 		gp->access = gv_plex_access;
805 		gp->softc = p;
806 		p->geom = gp;
807 
808 		TAILQ_INIT(&p->packets);
809 		p->bqueue = g_malloc(sizeof(struct bio_queue_head),
810 		    M_WAITOK | M_ZERO);
811 		bioq_init(p->bqueue);
812 		p->wqueue = g_malloc(sizeof(struct bio_queue_head),
813 		    M_WAITOK | M_ZERO);
814 		bioq_init(p->wqueue);
815 		mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
816 		kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
817 		    p->name);
818 		p->flags |= GV_PLEX_THREAD_ACTIVE;
819 
820 		/* Attach a consumer to this provider. */
821 		cp = g_new_consumer(gp);
822 		g_attach(cp, pp);
823 		s->consumer = cp;
824 
825 		/* Create a provider for the outside world. */
826 		pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
827 		pp2->mediasize = p->size;
828 		pp2->sectorsize = pp->sectorsize;
829 		p->provider = pp2;
830 		g_error_provider(pp2, 0);
831 		return (gp);
832 	}
833 }
834 
835 static int
836 gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
837     struct g_geom *gp)
838 {
839 	struct gv_plex *p;
840 
841 	g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
842 	g_topology_assert();
843 
844 	p = gp->softc;
845 
846 	KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
847 
848 	/*
849 	 * If this is a RAID5 plex, check if its worker thread is still active
850 	 * and signal it to self destruct.
851 	 */
852 	gv_kill_plex_thread(p);
853 	/* g_free(sc); */
854 	g_wither_geom(gp, ENXIO);
855 	return (0);
856 }
857 
858 #define	VINUMPLEX_CLASS_NAME "VINUMPLEX"
859 
860 static struct g_class g_vinum_plex_class = {
861 	.name = VINUMPLEX_CLASS_NAME,
862 	.version = G_VERSION,
863 	.taste = gv_plex_taste,
864 	.destroy_geom = gv_plex_destroy_geom,
865 };
866 
867 DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
868