xref: /freebsd/sys/geom/geom_ccd.c (revision d8a0fe102c0cfdfcd5b818f850eff09d8536c9bc)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2003 Poul-Henning Kamp.
5  * Copyright (c) 1995 Jason R. Thorpe.
6  * Copyright (c) 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * All rights reserved.
9  * Copyright (c) 1988 University of Utah.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * the Systems Programming Group of the University of Utah Computer
13  * Science Department.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. All advertising materials mentioning features or use of this software
24  *    must display the following acknowledgement:
25  *	This product includes software developed for the NetBSD Project
26  *	by Jason R. Thorpe.
27  * 4. The names of the authors may not be used to endorse or promote products
28  *    derived from this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
31  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
33  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
34  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
35  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
36  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
37  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
38  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  * Dynamic configuration and disklabel support by:
43  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
44  *	Numerical Aerodynamic Simulation Facility
45  *	Mail Stop 258-6
46  *	NASA Ames Research Center
47  *	Moffett Field, CA 94035
48  *
49  * from: Utah $Hdr: cd.c 1.6 90/11/28$
50  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
51  *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
52  */
53 
54 #include <sys/cdefs.h>
55 __FBSDID("$FreeBSD$");
56 
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <sys/bio.h>
62 #include <sys/malloc.h>
63 #include <sys/sbuf.h>
64 #include <geom/geom.h>
65 
66 /*
67  * Number of blocks to untouched in front of a component partition.
68  * This is to avoid violating its disklabel area when it starts at the
69  * beginning of the slice.
70  */
71 #if !defined(CCD_OFFSET)
72 #define CCD_OFFSET 16
73 #endif
74 
75 /* sc_flags */
76 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
77 #define CCDF_MIRROR	0x04	/* use mirroring */
78 #define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
79 #define CCDF_LINUX	0x10	/* use Linux compatibility mode */
80 
81 /* Mask of user-settable ccd flags. */
82 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
83 
84 /*
85  * Interleave description table.
86  * Computed at boot time to speed irregular-interleave lookups.
87  * The idea is that we interleave in "groups".  First we interleave
88  * evenly over all component disks up to the size of the smallest
89  * component (the first group), then we interleave evenly over all
90  * remaining disks up to the size of the next-smallest (second group),
91  * and so on.
92  *
93  * Each table entry describes the interleave characteristics of one
94  * of these groups.  For example if a concatenated disk consisted of
95  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
96  * DEV_BSIZE (1), the table would have three entries:
97  *
98  *	ndisk	startblk	startoff	dev
99  *	3	0		0		0, 1, 2
100  *	2	9		3		0, 2
101  *	1	13		5		2
102  *	0	-		-		-
103  *
104  * which says that the first nine blocks (0-8) are interleaved over
105  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
106  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
107  * at component block 3, and the remaining blocks (13-14) are on disk
108  * 2 starting at offset 5.
109  */
110 struct ccdiinfo {
111 	int	ii_ndisk;	/* # of disks range is interleaved over */
112 	daddr_t	ii_startblk;	/* starting scaled block # for range */
113 	daddr_t	ii_startoff;	/* starting component offset (block #) */
114 	int	*ii_index;	/* ordered list of components in range */
115 };
116 
117 /*
118  * Component info table.
119  * Describes a single component of a concatenated disk.
120  */
121 struct ccdcinfo {
122 	daddr_t		ci_size; 		/* size */
123 	struct g_provider *ci_provider;		/* provider */
124 	struct g_consumer *ci_consumer;		/* consumer */
125 };
126 
127 /*
128  * A concatenated disk is described by this structure.
129  */
130 
131 struct ccd_s {
132 	LIST_ENTRY(ccd_s) list;
133 
134 	int		 sc_unit;		/* logical unit number */
135 	int		 sc_flags;		/* flags */
136 	daddr_t		 sc_size;		/* size of ccd */
137 	int		 sc_ileave;		/* interleave */
138 	u_int		 sc_ndisks;		/* number of components */
139 	struct ccdcinfo	 *sc_cinfo;		/* component info */
140 	struct ccdiinfo	 *sc_itable;		/* interleave table */
141 	u_int32_t	 sc_secsize;		/* # bytes per sector */
142 	int		 sc_pick;		/* side of mirror picked */
143 	daddr_t		 sc_blk[2];		/* mirror localization */
144 	u_int32_t	 sc_offset;		/* actual offset used */
145 };
146 
147 static g_start_t g_ccd_start;
148 static void ccdiodone(struct bio *bp);
149 static void ccdinterleave(struct ccd_s *);
150 static int ccdinit(struct gctl_req *req, struct ccd_s *);
151 static int ccdbuffer(struct bio **ret, struct ccd_s *,
152 		      struct bio *, daddr_t, caddr_t, long);
153 
154 static void
155 g_ccd_orphan(struct g_consumer *cp)
156 {
157 	/*
158 	 * XXX: We don't do anything here.  It is not obvious
159 	 * XXX: what DTRT would be, so we do what the previous
160 	 * XXX: code did: ignore it and let the user cope.
161 	 */
162 }
163 
164 static int
165 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
166 {
167 	struct g_geom *gp;
168 	struct g_consumer *cp1, *cp2;
169 	int error;
170 
171 	de += dr;
172 	de += dw;
173 
174 	gp = pp->geom;
175 	error = ENXIO;
176 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
177 		error = g_access(cp1, dr, dw, de);
178 		if (error) {
179 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
180 				if (cp1 == cp2)
181 					break;
182 				g_access(cp2, -dr, -dw, -de);
183 			}
184 			break;
185 		}
186 	}
187 	return (error);
188 }
189 
190 /*
191  * Free the softc and its substructures.
192  */
193 static void
194 g_ccd_freesc(struct ccd_s *sc)
195 {
196 	struct ccdiinfo *ii;
197 
198 	g_free(sc->sc_cinfo);
199 	if (sc->sc_itable != NULL) {
200 		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
201 			if (ii->ii_index != NULL)
202 				g_free(ii->ii_index);
203 		g_free(sc->sc_itable);
204 	}
205 	g_free(sc);
206 }
207 
208 
209 static int
210 ccdinit(struct gctl_req *req, struct ccd_s *cs)
211 {
212 	struct ccdcinfo *ci;
213 	daddr_t size;
214 	int ix;
215 	daddr_t minsize;
216 	int maxsecsize;
217 	off_t mediasize;
218 	u_int sectorsize;
219 
220 	cs->sc_size = 0;
221 
222 	maxsecsize = 0;
223 	minsize = 0;
224 
225 	if (cs->sc_flags & CCDF_LINUX) {
226 		cs->sc_offset = 0;
227 		cs->sc_ileave *= 2;
228 		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
229 			gctl_error(req, "Mirror mode for Linux raids is "
230 			                "only supported with 2 devices");
231 	} else {
232 		if (cs->sc_flags & CCDF_NO_OFFSET)
233 			cs->sc_offset = 0;
234 		else
235 			cs->sc_offset = CCD_OFFSET;
236 
237 	}
238 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
239 		ci = &cs->sc_cinfo[ix];
240 
241 		mediasize = ci->ci_provider->mediasize;
242 		sectorsize = ci->ci_provider->sectorsize;
243 		if (sectorsize > maxsecsize)
244 			maxsecsize = sectorsize;
245 		size = mediasize / DEV_BSIZE - cs->sc_offset;
246 
247 		/* Truncate to interleave boundary */
248 
249 		if (cs->sc_ileave > 1)
250 			size -= size % cs->sc_ileave;
251 
252 		if (size == 0) {
253 			gctl_error(req, "Component %s has effective size zero",
254 			    ci->ci_provider->name);
255 			return(ENODEV);
256 		}
257 
258 		if (minsize == 0 || size < minsize)
259 			minsize = size;
260 		ci->ci_size = size;
261 		cs->sc_size += size;
262 	}
263 
264 	/*
265 	 * Don't allow the interleave to be smaller than
266 	 * the biggest component sector.
267 	 */
268 	if ((cs->sc_ileave > 0) &&
269 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
270 		gctl_error(req, "Interleave to small for sector size");
271 		return(EINVAL);
272 	}
273 
274 	/*
275 	 * If uniform interleave is desired set all sizes to that of
276 	 * the smallest component.  This will guarantee that a single
277 	 * interleave table is generated.
278 	 *
279 	 * Lost space must be taken into account when calculating the
280 	 * overall size.  Half the space is lost when CCDF_MIRROR is
281 	 * specified.
282 	 */
283 	if (cs->sc_flags & CCDF_UNIFORM) {
284 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
285 			ci = &cs->sc_cinfo[ix];
286 			ci->ci_size = minsize;
287 		}
288 		cs->sc_size = cs->sc_ndisks * minsize;
289 	}
290 
291 	if (cs->sc_flags & CCDF_MIRROR) {
292 		/*
293 		 * Check to see if an even number of components
294 		 * have been specified.  The interleave must also
295 		 * be non-zero in order for us to be able to
296 		 * guarantee the topology.
297 		 */
298 		if (cs->sc_ndisks % 2) {
299 			gctl_error(req,
300 			      "Mirroring requires an even number of disks");
301 			return(EINVAL);
302 		}
303 		if (cs->sc_ileave == 0) {
304 			gctl_error(req,
305 			     "An interleave must be specified when mirroring");
306 			return(EINVAL);
307 		}
308 		cs->sc_size = (cs->sc_ndisks/2) * minsize;
309 	}
310 
311 	/*
312 	 * Construct the interleave table.
313 	 */
314 	ccdinterleave(cs);
315 
316 	/*
317 	 * Create pseudo-geometry based on 1MB cylinders.  It's
318 	 * pretty close.
319 	 */
320 	cs->sc_secsize = maxsecsize;
321 
322 	return (0);
323 }
324 
325 static void
326 ccdinterleave(struct ccd_s *cs)
327 {
328 	struct ccdcinfo *ci, *smallci;
329 	struct ccdiinfo *ii;
330 	daddr_t bn, lbn;
331 	int ix;
332 	daddr_t size;
333 
334 
335 	/*
336 	 * Allocate an interleave table.  The worst case occurs when each
337 	 * of N disks is of a different size, resulting in N interleave
338 	 * tables.
339 	 *
340 	 * Chances are this is too big, but we don't care.
341 	 */
342 	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
343 	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
344 
345 	/*
346 	 * Trivial case: no interleave (actually interleave of disk size).
347 	 * Each table entry represents a single component in its entirety.
348 	 *
349 	 * An interleave of 0 may not be used with a mirror setup.
350 	 */
351 	if (cs->sc_ileave == 0) {
352 		bn = 0;
353 		ii = cs->sc_itable;
354 
355 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
356 			/* Allocate space for ii_index. */
357 			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
358 			ii->ii_ndisk = 1;
359 			ii->ii_startblk = bn;
360 			ii->ii_startoff = 0;
361 			ii->ii_index[0] = ix;
362 			bn += cs->sc_cinfo[ix].ci_size;
363 			ii++;
364 		}
365 		ii->ii_ndisk = 0;
366 		return;
367 	}
368 
369 	/*
370 	 * The following isn't fast or pretty; it doesn't have to be.
371 	 */
372 	size = 0;
373 	bn = lbn = 0;
374 	for (ii = cs->sc_itable; ; ii++) {
375 		/*
376 		 * Allocate space for ii_index.  We might allocate more then
377 		 * we use.
378 		 */
379 		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
380 		    M_WAITOK);
381 
382 		/*
383 		 * Locate the smallest of the remaining components
384 		 */
385 		smallci = NULL;
386 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
387 		    ci++) {
388 			if (ci->ci_size > size &&
389 			    (smallci == NULL ||
390 			     ci->ci_size < smallci->ci_size)) {
391 				smallci = ci;
392 			}
393 		}
394 
395 		/*
396 		 * Nobody left, all done
397 		 */
398 		if (smallci == NULL) {
399 			ii->ii_ndisk = 0;
400 			g_free(ii->ii_index);
401 			ii->ii_index = NULL;
402 			break;
403 		}
404 
405 		/*
406 		 * Record starting logical block using an sc_ileave blocksize.
407 		 */
408 		ii->ii_startblk = bn / cs->sc_ileave;
409 
410 		/*
411 		 * Record starting component block using an sc_ileave
412 		 * blocksize.  This value is relative to the beginning of
413 		 * a component disk.
414 		 */
415 		ii->ii_startoff = lbn;
416 
417 		/*
418 		 * Determine how many disks take part in this interleave
419 		 * and record their indices.
420 		 */
421 		ix = 0;
422 		for (ci = cs->sc_cinfo;
423 		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
424 			if (ci->ci_size >= smallci->ci_size) {
425 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
426 			}
427 		}
428 		ii->ii_ndisk = ix;
429 		bn += ix * (smallci->ci_size - size);
430 		lbn = smallci->ci_size / cs->sc_ileave;
431 		size = smallci->ci_size;
432 	}
433 }
434 
435 static void
436 g_ccd_start(struct bio *bp)
437 {
438 	long bcount, rcount;
439 	struct bio *cbp[2];
440 	caddr_t addr;
441 	daddr_t bn;
442 	int err;
443 	struct ccd_s *cs;
444 
445 	cs = bp->bio_to->geom->softc;
446 
447 	/*
448 	 * Block all GETATTR requests, we wouldn't know which of our
449 	 * subdevices we should ship it off to.
450 	 * XXX: this may not be the right policy.
451 	 */
452 	if(bp->bio_cmd == BIO_GETATTR) {
453 		g_io_deliver(bp, EINVAL);
454 		return;
455 	}
456 
457 	/*
458 	 * Translate the partition-relative block number to an absolute.
459 	 */
460 	bn = bp->bio_offset / cs->sc_secsize;
461 
462 	/*
463 	 * Allocate component buffers and fire off the requests
464 	 */
465 	addr = bp->bio_data;
466 	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
467 		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
468 		if (err) {
469 			bp->bio_completed += bcount;
470 			if (bp->bio_error == 0)
471 				bp->bio_error = err;
472 			if (bp->bio_completed == bp->bio_length)
473 				g_io_deliver(bp, bp->bio_error);
474 			return;
475 		}
476 		rcount = cbp[0]->bio_length;
477 
478 		if (cs->sc_flags & CCDF_MIRROR) {
479 			/*
480 			 * Mirroring.  Writes go to both disks, reads are
481 			 * taken from whichever disk seems most appropriate.
482 			 *
483 			 * We attempt to localize reads to the disk whos arm
484 			 * is nearest the read request.  We ignore seeks due
485 			 * to writes when making this determination and we
486 			 * also try to avoid hogging.
487 			 */
488 			if (cbp[0]->bio_cmd != BIO_READ) {
489 				g_io_request(cbp[0], cbp[0]->bio_from);
490 				g_io_request(cbp[1], cbp[1]->bio_from);
491 			} else {
492 				int pick = cs->sc_pick;
493 				daddr_t range = cs->sc_size / 16;
494 
495 				if (bn < cs->sc_blk[pick] - range ||
496 				    bn > cs->sc_blk[pick] + range
497 				) {
498 					cs->sc_pick = pick = 1 - pick;
499 				}
500 				cs->sc_blk[pick] = bn + btodb(rcount);
501 				g_io_request(cbp[pick], cbp[pick]->bio_from);
502 			}
503 		} else {
504 			/*
505 			 * Not mirroring
506 			 */
507 			g_io_request(cbp[0], cbp[0]->bio_from);
508 		}
509 		bn += btodb(rcount);
510 		addr += rcount;
511 	}
512 }
513 
514 /*
515  * Build a component buffer header.
516  */
517 static int
518 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
519 {
520 	struct ccdcinfo *ci, *ci2 = NULL;
521 	struct bio *cbp;
522 	daddr_t cbn, cboff;
523 	off_t cbc;
524 
525 	/*
526 	 * Determine which component bn falls in.
527 	 */
528 	cbn = bn;
529 	cboff = 0;
530 
531 	if (cs->sc_ileave == 0) {
532 		/*
533 		 * Serially concatenated and neither a mirror nor a parity
534 		 * config.  This is a special case.
535 		 */
536 		daddr_t sblk;
537 
538 		sblk = 0;
539 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
540 			sblk += ci->ci_size;
541 		cbn -= sblk;
542 	} else {
543 		struct ccdiinfo *ii;
544 		int ccdisk, off;
545 
546 		/*
547 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
548 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
549 		 * to cbn.
550 		 */
551 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
552 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
553 
554 		/*
555 		 * Figure out which interleave table to use.
556 		 */
557 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
558 			if (ii->ii_startblk > cbn)
559 				break;
560 		}
561 		ii--;
562 
563 		/*
564 		 * off is the logical superblock relative to the beginning
565 		 * of this interleave block.
566 		 */
567 		off = cbn - ii->ii_startblk;
568 
569 		/*
570 		 * We must calculate which disk component to use (ccdisk),
571 		 * and recalculate cbn to be the superblock relative to
572 		 * the beginning of the component.  This is typically done by
573 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
574 		 * must typically be divided by the number of components in
575 		 * this interleave array to be properly convert it from a
576 		 * CCD-relative logical superblock number to a
577 		 * component-relative superblock number.
578 		 */
579 		if (ii->ii_ndisk == 1) {
580 			/*
581 			 * When we have just one disk, it can't be a mirror
582 			 * or a parity config.
583 			 */
584 			ccdisk = ii->ii_index[0];
585 			cbn = ii->ii_startoff + off;
586 		} else {
587 			if (cs->sc_flags & CCDF_MIRROR) {
588 				/*
589 				 * We have forced a uniform mapping, resulting
590 				 * in a single interleave array.  We double
591 				 * up on the first half of the available
592 				 * components and our mirror is in the second
593 				 * half.  This only works with a single
594 				 * interleave array because doubling up
595 				 * doubles the number of sectors, so there
596 				 * cannot be another interleave array because
597 				 * the next interleave array's calculations
598 				 * would be off.
599 				 */
600 				int ndisk2 = ii->ii_ndisk / 2;
601 				ccdisk = ii->ii_index[off % ndisk2];
602 				cbn = ii->ii_startoff + off / ndisk2;
603 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
604 			} else {
605 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
606 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
607 			}
608 		}
609 
610 		ci = &cs->sc_cinfo[ccdisk];
611 
612 		/*
613 		 * Convert cbn from a superblock to a normal block so it
614 		 * can be used to calculate (along with cboff) the normal
615 		 * block index into this particular disk.
616 		 */
617 		cbn *= cs->sc_ileave;
618 	}
619 
620 	/*
621 	 * Fill in the component buf structure.
622 	 */
623 	cbp = g_clone_bio(bp);
624 	if (cbp == NULL)
625 		return (ENOMEM);
626 	cbp->bio_done = g_std_done;
627 	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
628 	cbp->bio_data = addr;
629 	if (cs->sc_ileave == 0)
630               cbc = dbtob((off_t)(ci->ci_size - cbn));
631 	else
632               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
633 	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
634 
635 	cbp->bio_from = ci->ci_consumer;
636 	cb[0] = cbp;
637 
638 	if (cs->sc_flags & CCDF_MIRROR) {
639 		cbp = g_clone_bio(bp);
640 		if (cbp == NULL)
641 			return (ENOMEM);
642 		cbp->bio_done = cb[0]->bio_done = ccdiodone;
643 		cbp->bio_offset = cb[0]->bio_offset;
644 		cbp->bio_data = cb[0]->bio_data;
645 		cbp->bio_length = cb[0]->bio_length;
646 		cbp->bio_from = ci2->ci_consumer;
647 		cbp->bio_caller1 = cb[0];
648 		cb[0]->bio_caller1 = cbp;
649 		cb[1] = cbp;
650 	}
651 	return (0);
652 }
653 
654 /*
655  * Called only for mirrored operations.
656  */
657 static void
658 ccdiodone(struct bio *cbp)
659 {
660 	struct bio *mbp, *pbp;
661 
662 	mbp = cbp->bio_caller1;
663 	pbp = cbp->bio_parent;
664 
665 	if (pbp->bio_cmd == BIO_READ) {
666 		if (cbp->bio_error == 0) {
667 			/* We will not be needing the partner bio */
668 			if (mbp != NULL) {
669 				pbp->bio_inbed++;
670 				g_destroy_bio(mbp);
671 			}
672 			g_std_done(cbp);
673 			return;
674 		}
675 		if (mbp != NULL) {
676 			/* Try partner the bio instead */
677 			mbp->bio_caller1 = NULL;
678 			pbp->bio_inbed++;
679 			g_destroy_bio(cbp);
680 			g_io_request(mbp, mbp->bio_from);
681 			/*
682 			 * XXX: If this comes back OK, we should actually
683 			 * try to write the good data on the failed mirror
684 			 */
685 			return;
686 		}
687 		g_std_done(cbp);
688 		return;
689 	}
690 	if (mbp != NULL) {
691 		mbp->bio_caller1 = NULL;
692 		pbp->bio_inbed++;
693 		if (cbp->bio_error != 0 && pbp->bio_error == 0)
694 			pbp->bio_error = cbp->bio_error;
695 		g_destroy_bio(cbp);
696 		return;
697 	}
698 	g_std_done(cbp);
699 }
700 
701 static void
702 g_ccd_create(struct gctl_req *req, struct g_class *mp)
703 {
704 	int *unit, *ileave, *nprovider;
705 	struct g_geom *gp;
706 	struct g_consumer *cp;
707 	struct g_provider *pp;
708 	struct ccd_s *sc;
709 	struct sbuf *sb;
710 	char buf[20];
711 	int i, error;
712 
713 	g_topology_assert();
714 	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
715 	if (unit == NULL) {
716 		gctl_error(req, "unit parameter not given");
717 		return;
718 	}
719 	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
720 	if (ileave == NULL) {
721 		gctl_error(req, "ileave parameter not given");
722 		return;
723 	}
724 	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
725 	if (nprovider == NULL) {
726 		gctl_error(req, "nprovider parameter not given");
727 		return;
728 	}
729 
730 	/* Check for duplicate unit */
731 	LIST_FOREACH(gp, &mp->geom, geom) {
732 		sc = gp->softc;
733 		if (sc != NULL && sc->sc_unit == *unit) {
734 			gctl_error(req, "Unit %d already configured", *unit);
735 			return;
736 		}
737 	}
738 
739 	if (*nprovider <= 0) {
740 		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
741 		return;
742 	}
743 
744 	/* Check all providers are valid */
745 	for (i = 0; i < *nprovider; i++) {
746 		sprintf(buf, "provider%d", i);
747 		pp = gctl_get_provider(req, buf);
748 		if (pp == NULL)
749 			return;
750 	}
751 
752 	gp = g_new_geomf(mp, "ccd%d", *unit);
753 	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
754 	gp->softc = sc;
755 	sc->sc_ndisks = *nprovider;
756 
757 	/* Allocate space for the component info. */
758 	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
759 	    M_WAITOK | M_ZERO);
760 
761 	/* Create consumers and attach to all providers */
762 	for (i = 0; i < *nprovider; i++) {
763 		sprintf(buf, "provider%d", i);
764 		pp = gctl_get_provider(req, buf);
765 		cp = g_new_consumer(gp);
766 		error = g_attach(cp, pp);
767 		KASSERT(error == 0, ("attach to %s failed", pp->name));
768 		sc->sc_cinfo[i].ci_consumer = cp;
769 		sc->sc_cinfo[i].ci_provider = pp;
770 	}
771 
772 	sc->sc_unit = *unit;
773 	sc->sc_ileave = *ileave;
774 
775 	if (gctl_get_param(req, "no_offset", NULL))
776 		sc->sc_flags |= CCDF_NO_OFFSET;
777 	if (gctl_get_param(req, "linux", NULL))
778 		sc->sc_flags |= CCDF_LINUX;
779 
780 	if (gctl_get_param(req, "uniform", NULL))
781 		sc->sc_flags |= CCDF_UNIFORM;
782 	if (gctl_get_param(req, "mirror", NULL))
783 		sc->sc_flags |= CCDF_MIRROR;
784 
785 	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
786 		printf("%s: disabling mirror, interleave is 0\n", gp->name);
787 		sc->sc_flags &= ~(CCDF_MIRROR);
788 	}
789 
790 	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
791 		printf("%s: mirror/parity forces uniform flag\n", gp->name);
792 		sc->sc_flags |= CCDF_UNIFORM;
793 	}
794 
795 	error = ccdinit(req, sc);
796 	if (error != 0) {
797 		g_ccd_freesc(sc);
798 		gp->softc = NULL;
799 		g_wither_geom(gp, ENXIO);
800 		return;
801 	}
802 
803 	pp = g_new_providerf(gp, "%s", gp->name);
804 	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
805 	pp->sectorsize = sc->sc_secsize;
806 	g_error_provider(pp, 0);
807 
808 	sb = sbuf_new_auto();
809 	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
810 	for (i = 0; i < *nprovider; i++) {
811 		sbuf_printf(sb, "%s%s",
812 		    i == 0 ? "(" : ", ",
813 		    sc->sc_cinfo[i].ci_provider->name);
814 	}
815 	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
816 	if (sc->sc_ileave != 0)
817 		sbuf_printf(sb, "interleaved at %d blocks\n",
818 			sc->sc_ileave);
819 	else
820 		sbuf_printf(sb, "concatenated\n");
821 	sbuf_finish(sb);
822 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
823 	sbuf_delete(sb);
824 }
825 
826 static int
827 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
828 {
829 	struct g_provider *pp;
830 	struct ccd_s *sc;
831 
832 	g_topology_assert();
833 	sc = gp->softc;
834 	pp = LIST_FIRST(&gp->provider);
835 	if (sc == NULL || pp == NULL)
836 		return (EBUSY);
837 	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
838 		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
839 		    pp->acr, pp->acw, pp->ace);
840 		return (EBUSY);
841 	}
842 	g_ccd_freesc(sc);
843 	gp->softc = NULL;
844 	g_wither_geom(gp, ENXIO);
845 	return (0);
846 }
847 
848 static void
849 g_ccd_list(struct gctl_req *req, struct g_class *mp)
850 {
851 	struct sbuf *sb;
852 	struct ccd_s *cs;
853 	struct g_geom *gp;
854 	int i, unit, *up;
855 
856 	up = gctl_get_paraml(req, "unit", sizeof (*up));
857 	if (up == NULL) {
858 		gctl_error(req, "unit parameter not given");
859 		return;
860 	}
861 	unit = *up;
862 	sb = sbuf_new_auto();
863 	LIST_FOREACH(gp, &mp->geom, geom) {
864 		cs = gp->softc;
865 		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
866 			continue;
867 		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
868 		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
869 
870 		for (i = 0; i < cs->sc_ndisks; ++i) {
871 			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
872 			    cs->sc_cinfo[i].ci_provider->name);
873 		}
874 		sbuf_printf(sb, "\n");
875 	}
876 	sbuf_finish(sb);
877 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
878 	sbuf_delete(sb);
879 }
880 
881 static void
882 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
883 {
884 	struct g_geom *gp;
885 
886 	g_topology_assert();
887 	if (!strcmp(verb, "create geom")) {
888 		g_ccd_create(req, mp);
889 	} else if (!strcmp(verb, "destroy geom")) {
890 		gp = gctl_get_geom(req, mp, "geom");
891 		if (gp != NULL)
892 		g_ccd_destroy_geom(req, mp, gp);
893 	} else if (!strcmp(verb, "list")) {
894 		g_ccd_list(req, mp);
895 	} else {
896 		gctl_error(req, "unknown verb");
897 	}
898 }
899 
900 static struct g_class g_ccd_class = {
901 	.name = "CCD",
902 	.version = G_VERSION,
903 	.ctlreq = g_ccd_config,
904 	.destroy_geom = g_ccd_destroy_geom,
905 	.start = g_ccd_start,
906 	.orphan = g_ccd_orphan,
907 	.access = g_ccd_access,
908 };
909 
910 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
911