xref: /freebsd/sys/geom/geom_ccd.c (revision c243e4902be8df1e643c76b5f18b68bb77cc5268)
1 /*-
2  * Copyright (c) 2003 Poul-Henning Kamp.
3  * Copyright (c) 1995 Jason R. Thorpe.
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * All rights reserved.
7  * Copyright (c) 1988 University of Utah.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed for the NetBSD Project
24  *	by Jason R. Thorpe.
25  * 4. The names of the authors may not be used to endorse or promote products
26  *    derived from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
29  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
30  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
31  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
36  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  * Dynamic configuration and disklabel support by:
41  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
42  *	Numerical Aerodynamic Simulation Facility
43  *	Mail Stop 258-6
44  *	NASA Ames Research Center
45  *	Moffett Field, CA 94035
46  *
47  * from: Utah $Hdr: cd.c 1.6 90/11/28$
48  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
49  *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
50  */
51 
52 #include <sys/cdefs.h>
53 __FBSDID("$FreeBSD$");
54 
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <sys/bio.h>
60 #include <sys/malloc.h>
61 #include <sys/sbuf.h>
62 #include <geom/geom.h>
63 
64 /*
65  * Number of blocks to untouched in front of a component partition.
66  * This is to avoid violating its disklabel area when it starts at the
67  * beginning of the slice.
68  */
69 #if !defined(CCD_OFFSET)
70 #define CCD_OFFSET 16
71 #endif
72 
73 /* sc_flags */
74 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
75 #define CCDF_MIRROR	0x04	/* use mirroring */
76 #define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
77 #define CCDF_LINUX	0x10	/* use Linux compatibility mode */
78 
79 /* Mask of user-settable ccd flags. */
80 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
81 
82 /*
83  * Interleave description table.
84  * Computed at boot time to speed irregular-interleave lookups.
85  * The idea is that we interleave in "groups".  First we interleave
86  * evenly over all component disks up to the size of the smallest
87  * component (the first group), then we interleave evenly over all
88  * remaining disks up to the size of the next-smallest (second group),
89  * and so on.
90  *
91  * Each table entry describes the interleave characteristics of one
92  * of these groups.  For example if a concatenated disk consisted of
93  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
94  * DEV_BSIZE (1), the table would have three entries:
95  *
96  *	ndisk	startblk	startoff	dev
97  *	3	0		0		0, 1, 2
98  *	2	9		3		0, 2
99  *	1	13		5		2
100  *	0	-		-		-
101  *
102  * which says that the first nine blocks (0-8) are interleaved over
103  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
104  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
105  * at component block 3, and the remaining blocks (13-14) are on disk
106  * 2 starting at offset 5.
107  */
108 struct ccdiinfo {
109 	int	ii_ndisk;	/* # of disks range is interleaved over */
110 	daddr_t	ii_startblk;	/* starting scaled block # for range */
111 	daddr_t	ii_startoff;	/* starting component offset (block #) */
112 	int	*ii_index;	/* ordered list of components in range */
113 };
114 
115 /*
116  * Component info table.
117  * Describes a single component of a concatenated disk.
118  */
119 struct ccdcinfo {
120 	daddr_t		ci_size; 		/* size */
121 	struct g_provider *ci_provider;		/* provider */
122 	struct g_consumer *ci_consumer;		/* consumer */
123 };
124 
125 /*
126  * A concatenated disk is described by this structure.
127  */
128 
129 struct ccd_s {
130 	LIST_ENTRY(ccd_s) list;
131 
132 	int		 sc_unit;		/* logical unit number */
133 	int		 sc_flags;		/* flags */
134 	daddr_t		 sc_size;		/* size of ccd */
135 	int		 sc_ileave;		/* interleave */
136 	u_int		 sc_ndisks;		/* number of components */
137 	struct ccdcinfo	 *sc_cinfo;		/* component info */
138 	struct ccdiinfo	 *sc_itable;		/* interleave table */
139 	u_int32_t	 sc_secsize;		/* # bytes per sector */
140 	int		 sc_pick;		/* side of mirror picked */
141 	daddr_t		 sc_blk[2];		/* mirror localization */
142 	u_int32_t	 sc_offset;		/* actual offset used */
143 };
144 
145 static g_start_t g_ccd_start;
146 static void ccdiodone(struct bio *bp);
147 static void ccdinterleave(struct ccd_s *);
148 static int ccdinit(struct gctl_req *req, struct ccd_s *);
149 static int ccdbuffer(struct bio **ret, struct ccd_s *,
150 		      struct bio *, daddr_t, caddr_t, long);
151 
152 static void
153 g_ccd_orphan(struct g_consumer *cp)
154 {
155 	/*
156 	 * XXX: We don't do anything here.  It is not obvious
157 	 * XXX: what DTRT would be, so we do what the previous
158 	 * XXX: code did: ignore it and let the user cope.
159 	 */
160 }
161 
162 static int
163 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
164 {
165 	struct g_geom *gp;
166 	struct g_consumer *cp1, *cp2;
167 	int error;
168 
169 	de += dr;
170 	de += dw;
171 
172 	gp = pp->geom;
173 	error = ENXIO;
174 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
175 		error = g_access(cp1, dr, dw, de);
176 		if (error) {
177 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
178 				if (cp1 == cp2)
179 					break;
180 				g_access(cp2, -dr, -dw, -de);
181 			}
182 			break;
183 		}
184 	}
185 	return (error);
186 }
187 
188 /*
189  * Free the softc and its substructures.
190  */
191 static void
192 g_ccd_freesc(struct ccd_s *sc)
193 {
194 	struct ccdiinfo *ii;
195 
196 	g_free(sc->sc_cinfo);
197 	if (sc->sc_itable != NULL) {
198 		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
199 			if (ii->ii_index != NULL)
200 				g_free(ii->ii_index);
201 		g_free(sc->sc_itable);
202 	}
203 	g_free(sc);
204 }
205 
206 
207 static int
208 ccdinit(struct gctl_req *req, struct ccd_s *cs)
209 {
210 	struct ccdcinfo *ci;
211 	daddr_t size;
212 	int ix;
213 	daddr_t minsize;
214 	int maxsecsize;
215 	off_t mediasize;
216 	u_int sectorsize;
217 
218 	cs->sc_size = 0;
219 
220 	maxsecsize = 0;
221 	minsize = 0;
222 
223 	if (cs->sc_flags & CCDF_LINUX) {
224 		cs->sc_offset = 0;
225 		cs->sc_ileave *= 2;
226 		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
227 			gctl_error(req, "Mirror mode for Linux raids is "
228 			                "only supported with 2 devices");
229 	} else {
230 		if (cs->sc_flags & CCDF_NO_OFFSET)
231 			cs->sc_offset = 0;
232 		else
233 			cs->sc_offset = CCD_OFFSET;
234 
235 	}
236 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
237 		ci = &cs->sc_cinfo[ix];
238 
239 		mediasize = ci->ci_provider->mediasize;
240 		sectorsize = ci->ci_provider->sectorsize;
241 		if (sectorsize > maxsecsize)
242 			maxsecsize = sectorsize;
243 		size = mediasize / DEV_BSIZE - cs->sc_offset;
244 
245 		/* Truncate to interleave boundary */
246 
247 		if (cs->sc_ileave > 1)
248 			size -= size % cs->sc_ileave;
249 
250 		if (size == 0) {
251 			gctl_error(req, "Component %s has effective size zero",
252 			    ci->ci_provider->name);
253 			return(ENODEV);
254 		}
255 
256 		if (minsize == 0 || size < minsize)
257 			minsize = size;
258 		ci->ci_size = size;
259 		cs->sc_size += size;
260 	}
261 
262 	/*
263 	 * Don't allow the interleave to be smaller than
264 	 * the biggest component sector.
265 	 */
266 	if ((cs->sc_ileave > 0) &&
267 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
268 		gctl_error(req, "Interleave to small for sector size");
269 		return(EINVAL);
270 	}
271 
272 	/*
273 	 * If uniform interleave is desired set all sizes to that of
274 	 * the smallest component.  This will guarentee that a single
275 	 * interleave table is generated.
276 	 *
277 	 * Lost space must be taken into account when calculating the
278 	 * overall size.  Half the space is lost when CCDF_MIRROR is
279 	 * specified.
280 	 */
281 	if (cs->sc_flags & CCDF_UNIFORM) {
282 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
283 			ci = &cs->sc_cinfo[ix];
284 			ci->ci_size = minsize;
285 		}
286 		cs->sc_size = cs->sc_ndisks * minsize;
287 	}
288 
289 	if (cs->sc_flags & CCDF_MIRROR) {
290 		/*
291 		 * Check to see if an even number of components
292 		 * have been specified.  The interleave must also
293 		 * be non-zero in order for us to be able to
294 		 * guarentee the topology.
295 		 */
296 		if (cs->sc_ndisks % 2) {
297 			gctl_error(req,
298 			      "Mirroring requires an even number of disks");
299 			return(EINVAL);
300 		}
301 		if (cs->sc_ileave == 0) {
302 			gctl_error(req,
303 			     "An interleave must be specified when mirroring");
304 			return(EINVAL);
305 		}
306 		cs->sc_size = (cs->sc_ndisks/2) * minsize;
307 	}
308 
309 	/*
310 	 * Construct the interleave table.
311 	 */
312 	ccdinterleave(cs);
313 
314 	/*
315 	 * Create pseudo-geometry based on 1MB cylinders.  It's
316 	 * pretty close.
317 	 */
318 	cs->sc_secsize = maxsecsize;
319 
320 	return (0);
321 }
322 
323 static void
324 ccdinterleave(struct ccd_s *cs)
325 {
326 	struct ccdcinfo *ci, *smallci;
327 	struct ccdiinfo *ii;
328 	daddr_t bn, lbn;
329 	int ix;
330 	daddr_t size;
331 
332 
333 	/*
334 	 * Allocate an interleave table.  The worst case occurs when each
335 	 * of N disks is of a different size, resulting in N interleave
336 	 * tables.
337 	 *
338 	 * Chances are this is too big, but we don't care.
339 	 */
340 	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
341 	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
342 
343 	/*
344 	 * Trivial case: no interleave (actually interleave of disk size).
345 	 * Each table entry represents a single component in its entirety.
346 	 *
347 	 * An interleave of 0 may not be used with a mirror setup.
348 	 */
349 	if (cs->sc_ileave == 0) {
350 		bn = 0;
351 		ii = cs->sc_itable;
352 
353 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
354 			/* Allocate space for ii_index. */
355 			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
356 			ii->ii_ndisk = 1;
357 			ii->ii_startblk = bn;
358 			ii->ii_startoff = 0;
359 			ii->ii_index[0] = ix;
360 			bn += cs->sc_cinfo[ix].ci_size;
361 			ii++;
362 		}
363 		ii->ii_ndisk = 0;
364 		return;
365 	}
366 
367 	/*
368 	 * The following isn't fast or pretty; it doesn't have to be.
369 	 */
370 	size = 0;
371 	bn = lbn = 0;
372 	for (ii = cs->sc_itable; ; ii++) {
373 		/*
374 		 * Allocate space for ii_index.  We might allocate more then
375 		 * we use.
376 		 */
377 		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
378 		    M_WAITOK);
379 
380 		/*
381 		 * Locate the smallest of the remaining components
382 		 */
383 		smallci = NULL;
384 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
385 		    ci++) {
386 			if (ci->ci_size > size &&
387 			    (smallci == NULL ||
388 			     ci->ci_size < smallci->ci_size)) {
389 				smallci = ci;
390 			}
391 		}
392 
393 		/*
394 		 * Nobody left, all done
395 		 */
396 		if (smallci == NULL) {
397 			ii->ii_ndisk = 0;
398 			g_free(ii->ii_index);
399 			ii->ii_index = NULL;
400 			break;
401 		}
402 
403 		/*
404 		 * Record starting logical block using an sc_ileave blocksize.
405 		 */
406 		ii->ii_startblk = bn / cs->sc_ileave;
407 
408 		/*
409 		 * Record starting component block using an sc_ileave
410 		 * blocksize.  This value is relative to the beginning of
411 		 * a component disk.
412 		 */
413 		ii->ii_startoff = lbn;
414 
415 		/*
416 		 * Determine how many disks take part in this interleave
417 		 * and record their indices.
418 		 */
419 		ix = 0;
420 		for (ci = cs->sc_cinfo;
421 		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
422 			if (ci->ci_size >= smallci->ci_size) {
423 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
424 			}
425 		}
426 		ii->ii_ndisk = ix;
427 		bn += ix * (smallci->ci_size - size);
428 		lbn = smallci->ci_size / cs->sc_ileave;
429 		size = smallci->ci_size;
430 	}
431 }
432 
433 static void
434 g_ccd_start(struct bio *bp)
435 {
436 	long bcount, rcount;
437 	struct bio *cbp[2];
438 	caddr_t addr;
439 	daddr_t bn;
440 	int err;
441 	struct ccd_s *cs;
442 
443 	cs = bp->bio_to->geom->softc;
444 
445 	/*
446 	 * Block all GETATTR requests, we wouldn't know which of our
447 	 * subdevices we should ship it off to.
448 	 * XXX: this may not be the right policy.
449 	 */
450 	if(bp->bio_cmd == BIO_GETATTR) {
451 		g_io_deliver(bp, EINVAL);
452 		return;
453 	}
454 
455 	/*
456 	 * Translate the partition-relative block number to an absolute.
457 	 */
458 	bn = bp->bio_offset / cs->sc_secsize;
459 
460 	/*
461 	 * Allocate component buffers and fire off the requests
462 	 */
463 	addr = bp->bio_data;
464 	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
465 		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
466 		if (err) {
467 			bp->bio_completed += bcount;
468 			if (bp->bio_error == 0)
469 				bp->bio_error = err;
470 			if (bp->bio_completed == bp->bio_length)
471 				g_io_deliver(bp, bp->bio_error);
472 			return;
473 		}
474 		rcount = cbp[0]->bio_length;
475 
476 		if (cs->sc_flags & CCDF_MIRROR) {
477 			/*
478 			 * Mirroring.  Writes go to both disks, reads are
479 			 * taken from whichever disk seems most appropriate.
480 			 *
481 			 * We attempt to localize reads to the disk whos arm
482 			 * is nearest the read request.  We ignore seeks due
483 			 * to writes when making this determination and we
484 			 * also try to avoid hogging.
485 			 */
486 			if (cbp[0]->bio_cmd != BIO_READ) {
487 				g_io_request(cbp[0], cbp[0]->bio_from);
488 				g_io_request(cbp[1], cbp[1]->bio_from);
489 			} else {
490 				int pick = cs->sc_pick;
491 				daddr_t range = cs->sc_size / 16;
492 
493 				if (bn < cs->sc_blk[pick] - range ||
494 				    bn > cs->sc_blk[pick] + range
495 				) {
496 					cs->sc_pick = pick = 1 - pick;
497 				}
498 				cs->sc_blk[pick] = bn + btodb(rcount);
499 				g_io_request(cbp[pick], cbp[pick]->bio_from);
500 			}
501 		} else {
502 			/*
503 			 * Not mirroring
504 			 */
505 			g_io_request(cbp[0], cbp[0]->bio_from);
506 		}
507 		bn += btodb(rcount);
508 		addr += rcount;
509 	}
510 }
511 
512 /*
513  * Build a component buffer header.
514  */
515 static int
516 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
517 {
518 	struct ccdcinfo *ci, *ci2 = NULL;
519 	struct bio *cbp;
520 	daddr_t cbn, cboff;
521 	off_t cbc;
522 
523 	/*
524 	 * Determine which component bn falls in.
525 	 */
526 	cbn = bn;
527 	cboff = 0;
528 
529 	if (cs->sc_ileave == 0) {
530 		/*
531 		 * Serially concatenated and neither a mirror nor a parity
532 		 * config.  This is a special case.
533 		 */
534 		daddr_t sblk;
535 
536 		sblk = 0;
537 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
538 			sblk += ci->ci_size;
539 		cbn -= sblk;
540 	} else {
541 		struct ccdiinfo *ii;
542 		int ccdisk, off;
543 
544 		/*
545 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
546 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
547 		 * to cbn.
548 		 */
549 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
550 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
551 
552 		/*
553 		 * Figure out which interleave table to use.
554 		 */
555 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
556 			if (ii->ii_startblk > cbn)
557 				break;
558 		}
559 		ii--;
560 
561 		/*
562 		 * off is the logical superblock relative to the beginning
563 		 * of this interleave block.
564 		 */
565 		off = cbn - ii->ii_startblk;
566 
567 		/*
568 		 * We must calculate which disk component to use (ccdisk),
569 		 * and recalculate cbn to be the superblock relative to
570 		 * the beginning of the component.  This is typically done by
571 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
572 		 * must typically be divided by the number of components in
573 		 * this interleave array to be properly convert it from a
574 		 * CCD-relative logical superblock number to a
575 		 * component-relative superblock number.
576 		 */
577 		if (ii->ii_ndisk == 1) {
578 			/*
579 			 * When we have just one disk, it can't be a mirror
580 			 * or a parity config.
581 			 */
582 			ccdisk = ii->ii_index[0];
583 			cbn = ii->ii_startoff + off;
584 		} else {
585 			if (cs->sc_flags & CCDF_MIRROR) {
586 				/*
587 				 * We have forced a uniform mapping, resulting
588 				 * in a single interleave array.  We double
589 				 * up on the first half of the available
590 				 * components and our mirror is in the second
591 				 * half.  This only works with a single
592 				 * interleave array because doubling up
593 				 * doubles the number of sectors, so there
594 				 * cannot be another interleave array because
595 				 * the next interleave array's calculations
596 				 * would be off.
597 				 */
598 				int ndisk2 = ii->ii_ndisk / 2;
599 				ccdisk = ii->ii_index[off % ndisk2];
600 				cbn = ii->ii_startoff + off / ndisk2;
601 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
602 			} else {
603 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
604 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
605 			}
606 		}
607 
608 		ci = &cs->sc_cinfo[ccdisk];
609 
610 		/*
611 		 * Convert cbn from a superblock to a normal block so it
612 		 * can be used to calculate (along with cboff) the normal
613 		 * block index into this particular disk.
614 		 */
615 		cbn *= cs->sc_ileave;
616 	}
617 
618 	/*
619 	 * Fill in the component buf structure.
620 	 */
621 	cbp = g_clone_bio(bp);
622 	if (cbp == NULL)
623 		return (ENOMEM);
624 	cbp->bio_done = g_std_done;
625 	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
626 	cbp->bio_data = addr;
627 	if (cs->sc_ileave == 0)
628               cbc = dbtob((off_t)(ci->ci_size - cbn));
629 	else
630               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
631 	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
632 
633 	cbp->bio_from = ci->ci_consumer;
634 	cb[0] = cbp;
635 
636 	if (cs->sc_flags & CCDF_MIRROR) {
637 		cbp = g_clone_bio(bp);
638 		if (cbp == NULL)
639 			return (ENOMEM);
640 		cbp->bio_done = cb[0]->bio_done = ccdiodone;
641 		cbp->bio_offset = cb[0]->bio_offset;
642 		cbp->bio_data = cb[0]->bio_data;
643 		cbp->bio_length = cb[0]->bio_length;
644 		cbp->bio_from = ci2->ci_consumer;
645 		cbp->bio_caller1 = cb[0];
646 		cb[0]->bio_caller1 = cbp;
647 		cb[1] = cbp;
648 	}
649 	return (0);
650 }
651 
652 /*
653  * Called only for mirrored operations.
654  */
655 static void
656 ccdiodone(struct bio *cbp)
657 {
658 	struct bio *mbp, *pbp;
659 
660 	mbp = cbp->bio_caller1;
661 	pbp = cbp->bio_parent;
662 
663 	if (pbp->bio_cmd == BIO_READ) {
664 		if (cbp->bio_error == 0) {
665 			/* We will not be needing the partner bio */
666 			if (mbp != NULL) {
667 				pbp->bio_inbed++;
668 				g_destroy_bio(mbp);
669 			}
670 			g_std_done(cbp);
671 			return;
672 		}
673 		if (mbp != NULL) {
674 			/* Try partner the bio instead */
675 			mbp->bio_caller1 = NULL;
676 			pbp->bio_inbed++;
677 			g_destroy_bio(cbp);
678 			g_io_request(mbp, mbp->bio_from);
679 			/*
680 			 * XXX: If this comes back OK, we should actually
681 			 * try to write the good data on the failed mirror
682 			 */
683 			return;
684 		}
685 		g_std_done(cbp);
686 		return;
687 	}
688 	if (mbp != NULL) {
689 		mbp->bio_caller1 = NULL;
690 		pbp->bio_inbed++;
691 		if (cbp->bio_error != 0 && pbp->bio_error == 0)
692 			pbp->bio_error = cbp->bio_error;
693 		g_destroy_bio(cbp);
694 		return;
695 	}
696 	g_std_done(cbp);
697 }
698 
699 static void
700 g_ccd_create(struct gctl_req *req, struct g_class *mp)
701 {
702 	int *unit, *ileave, *nprovider;
703 	struct g_geom *gp;
704 	struct g_consumer *cp;
705 	struct g_provider *pp;
706 	struct ccd_s *sc;
707 	struct sbuf *sb;
708 	char buf[20];
709 	int i, error;
710 
711 	g_topology_assert();
712 	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
713 	if (unit == NULL) {
714 		gctl_error(req, "unit parameter not given");
715 		return;
716 	}
717 	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
718 	if (ileave == NULL) {
719 		gctl_error(req, "ileave parameter not given");
720 		return;
721 	}
722 	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
723 	if (nprovider == NULL) {
724 		gctl_error(req, "nprovider parameter not given");
725 		return;
726 	}
727 
728 	/* Check for duplicate unit */
729 	LIST_FOREACH(gp, &mp->geom, geom) {
730 		sc = gp->softc;
731 		if (sc != NULL && sc->sc_unit == *unit) {
732 			gctl_error(req, "Unit %d already configured", *unit);
733 			return;
734 		}
735 	}
736 
737 	if (*nprovider <= 0) {
738 		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
739 		return;
740 	}
741 
742 	/* Check all providers are valid */
743 	for (i = 0; i < *nprovider; i++) {
744 		sprintf(buf, "provider%d", i);
745 		pp = gctl_get_provider(req, buf);
746 		if (pp == NULL)
747 			return;
748 	}
749 
750 	gp = g_new_geomf(mp, "ccd%d", *unit);
751 	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
752 	gp->softc = sc;
753 	sc->sc_ndisks = *nprovider;
754 
755 	/* Allocate space for the component info. */
756 	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
757 	    M_WAITOK | M_ZERO);
758 
759 	/* Create consumers and attach to all providers */
760 	for (i = 0; i < *nprovider; i++) {
761 		sprintf(buf, "provider%d", i);
762 		pp = gctl_get_provider(req, buf);
763 		cp = g_new_consumer(gp);
764 		error = g_attach(cp, pp);
765 		KASSERT(error == 0, ("attach to %s failed", pp->name));
766 		sc->sc_cinfo[i].ci_consumer = cp;
767 		sc->sc_cinfo[i].ci_provider = pp;
768 	}
769 
770 	sc->sc_unit = *unit;
771 	sc->sc_ileave = *ileave;
772 
773 	if (gctl_get_param(req, "no_offset", NULL))
774 		sc->sc_flags |= CCDF_NO_OFFSET;
775 	if (gctl_get_param(req, "linux", NULL))
776 		sc->sc_flags |= CCDF_LINUX;
777 
778 	if (gctl_get_param(req, "uniform", NULL))
779 		sc->sc_flags |= CCDF_UNIFORM;
780 	if (gctl_get_param(req, "mirror", NULL))
781 		sc->sc_flags |= CCDF_MIRROR;
782 
783 	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
784 		printf("%s: disabling mirror, interleave is 0\n", gp->name);
785 		sc->sc_flags &= ~(CCDF_MIRROR);
786 	}
787 
788 	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
789 		printf("%s: mirror/parity forces uniform flag\n", gp->name);
790 		sc->sc_flags |= CCDF_UNIFORM;
791 	}
792 
793 	error = ccdinit(req, sc);
794 	if (error != 0) {
795 		g_ccd_freesc(sc);
796 		gp->softc = NULL;
797 		g_wither_geom(gp, ENXIO);
798 		return;
799 	}
800 
801 	pp = g_new_providerf(gp, "%s", gp->name);
802 	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
803 	pp->sectorsize = sc->sc_secsize;
804 	g_error_provider(pp, 0);
805 
806 	sb = sbuf_new_auto();
807 	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
808 	for (i = 0; i < *nprovider; i++) {
809 		sbuf_printf(sb, "%s%s",
810 		    i == 0 ? "(" : ", ",
811 		    sc->sc_cinfo[i].ci_provider->name);
812 	}
813 	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
814 	if (sc->sc_ileave != 0)
815 		sbuf_printf(sb, "interleaved at %d blocks\n",
816 			sc->sc_ileave);
817 	else
818 		sbuf_printf(sb, "concatenated\n");
819 	sbuf_finish(sb);
820 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
821 	sbuf_delete(sb);
822 }
823 
824 static int
825 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
826 {
827 	struct g_provider *pp;
828 	struct ccd_s *sc;
829 
830 	g_topology_assert();
831 	sc = gp->softc;
832 	pp = LIST_FIRST(&gp->provider);
833 	if (sc == NULL || pp == NULL)
834 		return (EBUSY);
835 	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
836 		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
837 		    pp->acr, pp->acw, pp->ace);
838 		return (EBUSY);
839 	}
840 	g_ccd_freesc(sc);
841 	gp->softc = NULL;
842 	g_wither_geom(gp, ENXIO);
843 	return (0);
844 }
845 
846 static void
847 g_ccd_list(struct gctl_req *req, struct g_class *mp)
848 {
849 	struct sbuf *sb;
850 	struct ccd_s *cs;
851 	struct g_geom *gp;
852 	int i, unit, *up;
853 
854 	up = gctl_get_paraml(req, "unit", sizeof (*up));
855 	if (up == NULL) {
856 		gctl_error(req, "unit parameter not given");
857 		return;
858 	}
859 	unit = *up;
860 	sb = sbuf_new_auto();
861 	LIST_FOREACH(gp, &mp->geom, geom) {
862 		cs = gp->softc;
863 		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
864 			continue;
865 		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
866 		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
867 
868 		for (i = 0; i < cs->sc_ndisks; ++i) {
869 			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
870 			    cs->sc_cinfo[i].ci_provider->name);
871 		}
872 		sbuf_printf(sb, "\n");
873 	}
874 	sbuf_finish(sb);
875 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
876 	sbuf_delete(sb);
877 }
878 
879 static void
880 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
881 {
882 	struct g_geom *gp;
883 
884 	g_topology_assert();
885 	if (!strcmp(verb, "create geom")) {
886 		g_ccd_create(req, mp);
887 	} else if (!strcmp(verb, "destroy geom")) {
888 		gp = gctl_get_geom(req, mp, "geom");
889 		if (gp != NULL)
890 		g_ccd_destroy_geom(req, mp, gp);
891 	} else if (!strcmp(verb, "list")) {
892 		g_ccd_list(req, mp);
893 	} else {
894 		gctl_error(req, "unknown verb");
895 	}
896 }
897 
898 static struct g_class g_ccd_class = {
899 	.name = "CCD",
900 	.version = G_VERSION,
901 	.ctlreq = g_ccd_config,
902 	.destroy_geom = g_ccd_destroy_geom,
903 	.start = g_ccd_start,
904 	.orphan = g_ccd_orphan,
905 	.access = g_ccd_access,
906 };
907 
908 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
909