xref: /freebsd/sys/geom/geom_ccd.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 2003 Poul-Henning Kamp.
3  * Copyright (c) 1995 Jason R. Thorpe.
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * All rights reserved.
7  * Copyright (c) 1988 University of Utah.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed for the NetBSD Project
24  *	by Jason R. Thorpe.
25  * 4. The names of the authors may not be used to endorse or promote products
26  *    derived from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
29  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
30  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
31  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
36  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  * Dynamic configuration and disklabel support by:
41  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
42  *	Numerical Aerodynamic Simulation Facility
43  *	Mail Stop 258-6
44  *	NASA Ames Research Center
45  *	Moffett Field, CA 94035
46  *
47  * from: Utah $Hdr: cd.c 1.6 90/11/28$
48  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
49  *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
50  */
51 
52 #include <sys/cdefs.h>
53 __FBSDID("$FreeBSD$");
54 
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <sys/bio.h>
60 #include <sys/malloc.h>
61 #include <geom/geom.h>
62 
63 /*
64  * Number of blocks to untouched in front of a component partition.
65  * This is to avoid violating its disklabel area when it starts at the
66  * beginning of the slice.
67  */
68 #if !defined(CCD_OFFSET)
69 #define CCD_OFFSET 16
70 #endif
71 
72 /* sc_flags */
73 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
74 #define CCDF_MIRROR	0x04	/* use mirroring */
75 #define CCDF_NO_OFFSET	0x08	/* do not leave space in front */
76 #define CCDF_LINUX	0x10	/* use Linux compatibility mode */
77 
78 /* Mask of user-settable ccd flags. */
79 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
80 
81 /*
82  * Interleave description table.
83  * Computed at boot time to speed irregular-interleave lookups.
84  * The idea is that we interleave in "groups".  First we interleave
85  * evenly over all component disks up to the size of the smallest
86  * component (the first group), then we interleave evenly over all
87  * remaining disks up to the size of the next-smallest (second group),
88  * and so on.
89  *
90  * Each table entry describes the interleave characteristics of one
91  * of these groups.  For example if a concatenated disk consisted of
92  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
93  * DEV_BSIZE (1), the table would have three entries:
94  *
95  *	ndisk	startblk	startoff	dev
96  *	3	0		0		0, 1, 2
97  *	2	9		3		0, 2
98  *	1	13		5		2
99  *	0	-		-		-
100  *
101  * which says that the first nine blocks (0-8) are interleaved over
102  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
103  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
104  * at component block 3, and the remaining blocks (13-14) are on disk
105  * 2 starting at offset 5.
106  */
107 struct ccdiinfo {
108 	int	ii_ndisk;	/* # of disks range is interleaved over */
109 	daddr_t	ii_startblk;	/* starting scaled block # for range */
110 	daddr_t	ii_startoff;	/* starting component offset (block #) */
111 	int	*ii_index;	/* ordered list of components in range */
112 };
113 
114 /*
115  * Component info table.
116  * Describes a single component of a concatenated disk.
117  */
118 struct ccdcinfo {
119 	daddr_t		ci_size; 		/* size */
120 	struct g_provider *ci_provider;		/* provider */
121 	struct g_consumer *ci_consumer;		/* consumer */
122 };
123 
124 /*
125  * A concatenated disk is described by this structure.
126  */
127 
128 struct ccd_s {
129 	LIST_ENTRY(ccd_s) list;
130 
131 	int		 sc_unit;		/* logical unit number */
132 	int		 sc_flags;		/* flags */
133 	daddr_t		 sc_size;		/* size of ccd */
134 	int		 sc_ileave;		/* interleave */
135 	u_int		 sc_ndisks;		/* number of components */
136 	struct ccdcinfo	 *sc_cinfo;		/* component info */
137 	struct ccdiinfo	 *sc_itable;		/* interleave table */
138 	u_int32_t	 sc_secsize;		/* # bytes per sector */
139 	int		 sc_pick;		/* side of mirror picked */
140 	daddr_t		 sc_blk[2];		/* mirror localization */
141 	u_int32_t	 sc_offset;		/* actual offset used */
142 };
143 
144 static g_start_t g_ccd_start;
145 static void ccdiodone(struct bio *bp);
146 static void ccdinterleave(struct ccd_s *);
147 static int ccdinit(struct gctl_req *req, struct ccd_s *);
148 static int ccdbuffer(struct bio **ret, struct ccd_s *,
149 		      struct bio *, daddr_t, caddr_t, long);
150 
151 static void
152 g_ccd_orphan(struct g_consumer *cp)
153 {
154 	/*
155 	 * XXX: We don't do anything here.  It is not obvious
156 	 * XXX: what DTRT would be, so we do what the previous
157 	 * XXX: code did: ignore it and let the user cope.
158 	 */
159 }
160 
161 static int
162 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
163 {
164 	struct g_geom *gp;
165 	struct g_consumer *cp1, *cp2;
166 	int error;
167 
168 	de += dr;
169 	de += dw;
170 
171 	gp = pp->geom;
172 	error = ENXIO;
173 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
174 		error = g_access(cp1, dr, dw, de);
175 		if (error) {
176 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
177 				if (cp1 == cp2)
178 					break;
179 				g_access(cp2, -dr, -dw, -de);
180 			}
181 			break;
182 		}
183 	}
184 	return (error);
185 }
186 
187 /*
188  * Free the softc and its substructures.
189  */
190 static void
191 g_ccd_freesc(struct ccd_s *sc)
192 {
193 	struct ccdiinfo *ii;
194 
195 	g_free(sc->sc_cinfo);
196 	if (sc->sc_itable != NULL) {
197 		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
198 			if (ii->ii_index != NULL)
199 				g_free(ii->ii_index);
200 		g_free(sc->sc_itable);
201 	}
202 	g_free(sc);
203 }
204 
205 
206 static int
207 ccdinit(struct gctl_req *req, struct ccd_s *cs)
208 {
209 	struct ccdcinfo *ci;
210 	daddr_t size;
211 	int ix;
212 	daddr_t minsize;
213 	int maxsecsize;
214 	off_t mediasize;
215 	u_int sectorsize;
216 
217 	cs->sc_size = 0;
218 
219 	maxsecsize = 0;
220 	minsize = 0;
221 
222 	if (cs->sc_flags & CCDF_LINUX) {
223 		cs->sc_offset = 0;
224 		cs->sc_ileave *= 2;
225 		if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
226 			gctl_error(req, "Mirror mode for Linux raids is "
227 			                "only supported with 2 devices");
228 	} else {
229 		if (cs->sc_flags & CCDF_NO_OFFSET)
230 			cs->sc_offset = 0;
231 		else
232 			cs->sc_offset = CCD_OFFSET;
233 
234 	}
235 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
236 		ci = &cs->sc_cinfo[ix];
237 
238 		mediasize = ci->ci_provider->mediasize;
239 		sectorsize = ci->ci_provider->sectorsize;
240 		if (sectorsize > maxsecsize)
241 			maxsecsize = sectorsize;
242 		size = mediasize / DEV_BSIZE - cs->sc_offset;
243 
244 		/* Truncate to interleave boundary */
245 
246 		if (cs->sc_ileave > 1)
247 			size -= size % cs->sc_ileave;
248 
249 		if (size == 0) {
250 			gctl_error(req, "Component %s has effective size zero",
251 			    ci->ci_provider->name);
252 			return(ENODEV);
253 		}
254 
255 		if (minsize == 0 || size < minsize)
256 			minsize = size;
257 		ci->ci_size = size;
258 		cs->sc_size += size;
259 	}
260 
261 	/*
262 	 * Don't allow the interleave to be smaller than
263 	 * the biggest component sector.
264 	 */
265 	if ((cs->sc_ileave > 0) &&
266 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
267 		gctl_error(req, "Interleave to small for sector size");
268 		return(EINVAL);
269 	}
270 
271 	/*
272 	 * If uniform interleave is desired set all sizes to that of
273 	 * the smallest component.  This will guarentee that a single
274 	 * interleave table is generated.
275 	 *
276 	 * Lost space must be taken into account when calculating the
277 	 * overall size.  Half the space is lost when CCDF_MIRROR is
278 	 * specified.
279 	 */
280 	if (cs->sc_flags & CCDF_UNIFORM) {
281 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
282 			ci = &cs->sc_cinfo[ix];
283 			ci->ci_size = minsize;
284 		}
285 		cs->sc_size = cs->sc_ndisks * minsize;
286 	}
287 
288 	if (cs->sc_flags & CCDF_MIRROR) {
289 		/*
290 		 * Check to see if an even number of components
291 		 * have been specified.  The interleave must also
292 		 * be non-zero in order for us to be able to
293 		 * guarentee the topology.
294 		 */
295 		if (cs->sc_ndisks % 2) {
296 			gctl_error(req,
297 			      "Mirroring requires an even number of disks");
298 			return(EINVAL);
299 		}
300 		if (cs->sc_ileave == 0) {
301 			gctl_error(req,
302 			     "An interleave must be specified when mirroring");
303 			return(EINVAL);
304 		}
305 		cs->sc_size = (cs->sc_ndisks/2) * minsize;
306 	}
307 
308 	/*
309 	 * Construct the interleave table.
310 	 */
311 	ccdinterleave(cs);
312 
313 	/*
314 	 * Create pseudo-geometry based on 1MB cylinders.  It's
315 	 * pretty close.
316 	 */
317 	cs->sc_secsize = maxsecsize;
318 
319 	return (0);
320 }
321 
322 static void
323 ccdinterleave(struct ccd_s *cs)
324 {
325 	struct ccdcinfo *ci, *smallci;
326 	struct ccdiinfo *ii;
327 	daddr_t bn, lbn;
328 	int ix;
329 	daddr_t size;
330 
331 
332 	/*
333 	 * Allocate an interleave table.  The worst case occurs when each
334 	 * of N disks is of a different size, resulting in N interleave
335 	 * tables.
336 	 *
337 	 * Chances are this is too big, but we don't care.
338 	 */
339 	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
340 	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
341 
342 	/*
343 	 * Trivial case: no interleave (actually interleave of disk size).
344 	 * Each table entry represents a single component in its entirety.
345 	 *
346 	 * An interleave of 0 may not be used with a mirror setup.
347 	 */
348 	if (cs->sc_ileave == 0) {
349 		bn = 0;
350 		ii = cs->sc_itable;
351 
352 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
353 			/* Allocate space for ii_index. */
354 			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
355 			ii->ii_ndisk = 1;
356 			ii->ii_startblk = bn;
357 			ii->ii_startoff = 0;
358 			ii->ii_index[0] = ix;
359 			bn += cs->sc_cinfo[ix].ci_size;
360 			ii++;
361 		}
362 		ii->ii_ndisk = 0;
363 		return;
364 	}
365 
366 	/*
367 	 * The following isn't fast or pretty; it doesn't have to be.
368 	 */
369 	size = 0;
370 	bn = lbn = 0;
371 	for (ii = cs->sc_itable; ; ii++) {
372 		/*
373 		 * Allocate space for ii_index.  We might allocate more then
374 		 * we use.
375 		 */
376 		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
377 		    M_WAITOK);
378 
379 		/*
380 		 * Locate the smallest of the remaining components
381 		 */
382 		smallci = NULL;
383 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
384 		    ci++) {
385 			if (ci->ci_size > size &&
386 			    (smallci == NULL ||
387 			     ci->ci_size < smallci->ci_size)) {
388 				smallci = ci;
389 			}
390 		}
391 
392 		/*
393 		 * Nobody left, all done
394 		 */
395 		if (smallci == NULL) {
396 			ii->ii_ndisk = 0;
397 			g_free(ii->ii_index);
398 			ii->ii_index = NULL;
399 			break;
400 		}
401 
402 		/*
403 		 * Record starting logical block using an sc_ileave blocksize.
404 		 */
405 		ii->ii_startblk = bn / cs->sc_ileave;
406 
407 		/*
408 		 * Record starting component block using an sc_ileave
409 		 * blocksize.  This value is relative to the beginning of
410 		 * a component disk.
411 		 */
412 		ii->ii_startoff = lbn;
413 
414 		/*
415 		 * Determine how many disks take part in this interleave
416 		 * and record their indices.
417 		 */
418 		ix = 0;
419 		for (ci = cs->sc_cinfo;
420 		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
421 			if (ci->ci_size >= smallci->ci_size) {
422 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
423 			}
424 		}
425 		ii->ii_ndisk = ix;
426 		bn += ix * (smallci->ci_size - size);
427 		lbn = smallci->ci_size / cs->sc_ileave;
428 		size = smallci->ci_size;
429 	}
430 }
431 
432 static void
433 g_ccd_start(struct bio *bp)
434 {
435 	long bcount, rcount;
436 	struct bio *cbp[2];
437 	caddr_t addr;
438 	daddr_t bn;
439 	int err;
440 	struct ccd_s *cs;
441 
442 	cs = bp->bio_to->geom->softc;
443 
444 	/*
445 	 * Block all GETATTR requests, we wouldn't know which of our
446 	 * subdevices we should ship it off to.
447 	 * XXX: this may not be the right policy.
448 	 */
449 	if(bp->bio_cmd == BIO_GETATTR) {
450 		g_io_deliver(bp, EINVAL);
451 		return;
452 	}
453 
454 	/*
455 	 * Translate the partition-relative block number to an absolute.
456 	 */
457 	bn = bp->bio_offset / cs->sc_secsize;
458 
459 	/*
460 	 * Allocate component buffers and fire off the requests
461 	 */
462 	addr = bp->bio_data;
463 	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
464 		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
465 		if (err) {
466 			bp->bio_completed += bcount;
467 			if (bp->bio_error == 0)
468 				bp->bio_error = err;
469 			if (bp->bio_completed == bp->bio_length)
470 				g_io_deliver(bp, bp->bio_error);
471 			return;
472 		}
473 		rcount = cbp[0]->bio_length;
474 
475 		if (cs->sc_flags & CCDF_MIRROR) {
476 			/*
477 			 * Mirroring.  Writes go to both disks, reads are
478 			 * taken from whichever disk seems most appropriate.
479 			 *
480 			 * We attempt to localize reads to the disk whos arm
481 			 * is nearest the read request.  We ignore seeks due
482 			 * to writes when making this determination and we
483 			 * also try to avoid hogging.
484 			 */
485 			if (cbp[0]->bio_cmd != BIO_READ) {
486 				g_io_request(cbp[0], cbp[0]->bio_from);
487 				g_io_request(cbp[1], cbp[1]->bio_from);
488 			} else {
489 				int pick = cs->sc_pick;
490 				daddr_t range = cs->sc_size / 16;
491 
492 				if (bn < cs->sc_blk[pick] - range ||
493 				    bn > cs->sc_blk[pick] + range
494 				) {
495 					cs->sc_pick = pick = 1 - pick;
496 				}
497 				cs->sc_blk[pick] = bn + btodb(rcount);
498 				g_io_request(cbp[pick], cbp[pick]->bio_from);
499 			}
500 		} else {
501 			/*
502 			 * Not mirroring
503 			 */
504 			g_io_request(cbp[0], cbp[0]->bio_from);
505 		}
506 		bn += btodb(rcount);
507 		addr += rcount;
508 	}
509 }
510 
511 /*
512  * Build a component buffer header.
513  */
514 static int
515 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
516 {
517 	struct ccdcinfo *ci, *ci2 = NULL;
518 	struct bio *cbp;
519 	daddr_t cbn, cboff;
520 	off_t cbc;
521 
522 	/*
523 	 * Determine which component bn falls in.
524 	 */
525 	cbn = bn;
526 	cboff = 0;
527 
528 	if (cs->sc_ileave == 0) {
529 		/*
530 		 * Serially concatenated and neither a mirror nor a parity
531 		 * config.  This is a special case.
532 		 */
533 		daddr_t sblk;
534 
535 		sblk = 0;
536 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
537 			sblk += ci->ci_size;
538 		cbn -= sblk;
539 	} else {
540 		struct ccdiinfo *ii;
541 		int ccdisk, off;
542 
543 		/*
544 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
545 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
546 		 * to cbn.
547 		 */
548 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
549 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
550 
551 		/*
552 		 * Figure out which interleave table to use.
553 		 */
554 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
555 			if (ii->ii_startblk > cbn)
556 				break;
557 		}
558 		ii--;
559 
560 		/*
561 		 * off is the logical superblock relative to the beginning
562 		 * of this interleave block.
563 		 */
564 		off = cbn - ii->ii_startblk;
565 
566 		/*
567 		 * We must calculate which disk component to use (ccdisk),
568 		 * and recalculate cbn to be the superblock relative to
569 		 * the beginning of the component.  This is typically done by
570 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
571 		 * must typically be divided by the number of components in
572 		 * this interleave array to be properly convert it from a
573 		 * CCD-relative logical superblock number to a
574 		 * component-relative superblock number.
575 		 */
576 		if (ii->ii_ndisk == 1) {
577 			/*
578 			 * When we have just one disk, it can't be a mirror
579 			 * or a parity config.
580 			 */
581 			ccdisk = ii->ii_index[0];
582 			cbn = ii->ii_startoff + off;
583 		} else {
584 			if (cs->sc_flags & CCDF_MIRROR) {
585 				/*
586 				 * We have forced a uniform mapping, resulting
587 				 * in a single interleave array.  We double
588 				 * up on the first half of the available
589 				 * components and our mirror is in the second
590 				 * half.  This only works with a single
591 				 * interleave array because doubling up
592 				 * doubles the number of sectors, so there
593 				 * cannot be another interleave array because
594 				 * the next interleave array's calculations
595 				 * would be off.
596 				 */
597 				int ndisk2 = ii->ii_ndisk / 2;
598 				ccdisk = ii->ii_index[off % ndisk2];
599 				cbn = ii->ii_startoff + off / ndisk2;
600 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
601 			} else {
602 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
603 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
604 			}
605 		}
606 
607 		ci = &cs->sc_cinfo[ccdisk];
608 
609 		/*
610 		 * Convert cbn from a superblock to a normal block so it
611 		 * can be used to calculate (along with cboff) the normal
612 		 * block index into this particular disk.
613 		 */
614 		cbn *= cs->sc_ileave;
615 	}
616 
617 	/*
618 	 * Fill in the component buf structure.
619 	 */
620 	cbp = g_clone_bio(bp);
621 	if (cbp == NULL)
622 		return (ENOMEM);
623 	cbp->bio_done = g_std_done;
624 	cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
625 	cbp->bio_data = addr;
626 	if (cs->sc_ileave == 0)
627               cbc = dbtob((off_t)(ci->ci_size - cbn));
628 	else
629               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
630 	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
631 
632 	cbp->bio_from = ci->ci_consumer;
633 	cb[0] = cbp;
634 
635 	if (cs->sc_flags & CCDF_MIRROR) {
636 		cbp = g_clone_bio(bp);
637 		if (cbp == NULL)
638 			return (ENOMEM);
639 		cbp->bio_done = cb[0]->bio_done = ccdiodone;
640 		cbp->bio_offset = cb[0]->bio_offset;
641 		cbp->bio_data = cb[0]->bio_data;
642 		cbp->bio_length = cb[0]->bio_length;
643 		cbp->bio_from = ci2->ci_consumer;
644 		cbp->bio_caller1 = cb[0];
645 		cb[0]->bio_caller1 = cbp;
646 		cb[1] = cbp;
647 	}
648 	return (0);
649 }
650 
651 /*
652  * Called only for mirrored operations.
653  */
654 static void
655 ccdiodone(struct bio *cbp)
656 {
657 	struct bio *mbp, *pbp;
658 
659 	mbp = cbp->bio_caller1;
660 	pbp = cbp->bio_parent;
661 
662 	if (pbp->bio_cmd == BIO_READ) {
663 		if (cbp->bio_error == 0) {
664 			/* We will not be needing the partner bio */
665 			if (mbp != NULL) {
666 				pbp->bio_inbed++;
667 				g_destroy_bio(mbp);
668 			}
669 			g_std_done(cbp);
670 			return;
671 		}
672 		if (mbp != NULL) {
673 			/* Try partner the bio instead */
674 			mbp->bio_caller1 = NULL;
675 			pbp->bio_inbed++;
676 			g_destroy_bio(cbp);
677 			g_io_request(mbp, mbp->bio_from);
678 			/*
679 			 * XXX: If this comes back OK, we should actually
680 			 * try to write the good data on the failed mirror
681 			 */
682 			return;
683 		}
684 		g_std_done(cbp);
685 		return;
686 	}
687 	if (mbp != NULL) {
688 		mbp->bio_caller1 = NULL;
689 		pbp->bio_inbed++;
690 		if (cbp->bio_error != 0 && pbp->bio_error == 0)
691 			pbp->bio_error = cbp->bio_error;
692 		g_destroy_bio(cbp);
693 		return;
694 	}
695 	g_std_done(cbp);
696 }
697 
698 static void
699 g_ccd_create(struct gctl_req *req, struct g_class *mp)
700 {
701 	int *unit, *ileave, *nprovider;
702 	struct g_geom *gp;
703 	struct g_consumer *cp;
704 	struct g_provider *pp;
705 	struct ccd_s *sc;
706 	struct sbuf *sb;
707 	char buf[20];
708 	int i, error;
709 
710 	g_topology_assert();
711 	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
712 	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
713 	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
714 
715 	/* Check for duplicate unit */
716 	LIST_FOREACH(gp, &mp->geom, geom) {
717 		sc = gp->softc;
718 		if (sc != NULL && sc->sc_unit == *unit) {
719 			gctl_error(req, "Unit %d already configured", *unit);
720 			return;
721 		}
722 	}
723 
724 	if (*nprovider <= 0) {
725 		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
726 		return;
727 	}
728 
729 	/* Check all providers are valid */
730 	for (i = 0; i < *nprovider; i++) {
731 		sprintf(buf, "provider%d", i);
732 		pp = gctl_get_provider(req, buf);
733 		if (pp == NULL)
734 			return;
735 	}
736 
737 	gp = g_new_geomf(mp, "ccd%d", *unit);
738 	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
739 	gp->softc = sc;
740 	sc->sc_ndisks = *nprovider;
741 
742 	/* Allocate space for the component info. */
743 	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
744 	    M_WAITOK | M_ZERO);
745 
746 	/* Create consumers and attach to all providers */
747 	for (i = 0; i < *nprovider; i++) {
748 		sprintf(buf, "provider%d", i);
749 		pp = gctl_get_provider(req, buf);
750 		cp = g_new_consumer(gp);
751 		error = g_attach(cp, pp);
752 		KASSERT(error == 0, ("attach to %s failed", pp->name));
753 		sc->sc_cinfo[i].ci_consumer = cp;
754 		sc->sc_cinfo[i].ci_provider = pp;
755 	}
756 
757 	sc->sc_unit = *unit;
758 	sc->sc_ileave = *ileave;
759 
760 	if (gctl_get_param(req, "no_offset", NULL))
761 		sc->sc_flags |= CCDF_NO_OFFSET;
762 	if (gctl_get_param(req, "linux", NULL))
763 		sc->sc_flags |= CCDF_LINUX;
764 
765 	if (gctl_get_param(req, "uniform", NULL))
766 		sc->sc_flags |= CCDF_UNIFORM;
767 	if (gctl_get_param(req, "mirror", NULL))
768 		sc->sc_flags |= CCDF_MIRROR;
769 
770 	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
771 		printf("%s: disabling mirror, interleave is 0\n", gp->name);
772 		sc->sc_flags &= ~(CCDF_MIRROR);
773 	}
774 
775 	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
776 		printf("%s: mirror/parity forces uniform flag\n", gp->name);
777 		sc->sc_flags |= CCDF_UNIFORM;
778 	}
779 
780 	error = ccdinit(req, sc);
781 	if (error != 0) {
782 		g_ccd_freesc(sc);
783 		gp->softc = NULL;
784 		g_wither_geom(gp, ENXIO);
785 		return;
786 	}
787 
788 	pp = g_new_providerf(gp, "%s", gp->name);
789 	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
790 	pp->sectorsize = sc->sc_secsize;
791 	g_error_provider(pp, 0);
792 
793 	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
794 	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
795 	for (i = 0; i < *nprovider; i++) {
796 		sbuf_printf(sb, "%s%s",
797 		    i == 0 ? "(" : ", ",
798 		    sc->sc_cinfo[i].ci_provider->name);
799 	}
800 	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
801 	if (sc->sc_ileave != 0)
802 		sbuf_printf(sb, "interleaved at %d blocks\n",
803 			sc->sc_ileave);
804 	else
805 		sbuf_printf(sb, "concatenated\n");
806 	sbuf_finish(sb);
807 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
808 	sbuf_delete(sb);
809 }
810 
811 static int
812 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
813 {
814 	struct g_provider *pp;
815 	struct ccd_s *sc;
816 
817 	g_topology_assert();
818 	sc = gp->softc;
819 	pp = LIST_FIRST(&gp->provider);
820 	if (sc == NULL || pp == NULL)
821 		return (EBUSY);
822 	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
823 		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
824 		    pp->acr, pp->acw, pp->ace);
825 		return (EBUSY);
826 	}
827 	g_ccd_freesc(sc);
828 	gp->softc = NULL;
829 	g_wither_geom(gp, ENXIO);
830 	return (0);
831 }
832 
833 static void
834 g_ccd_list(struct gctl_req *req, struct g_class *mp)
835 {
836 	struct sbuf *sb;
837 	struct ccd_s *cs;
838 	struct g_geom *gp;
839 	int i, unit, *up;
840 
841 	up = gctl_get_paraml(req, "unit", sizeof (int));
842 	unit = *up;
843 	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
844 	LIST_FOREACH(gp, &mp->geom, geom) {
845 		cs = gp->softc;
846 		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
847 			continue;
848 		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
849 		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
850 
851 		for (i = 0; i < cs->sc_ndisks; ++i) {
852 			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
853 			    cs->sc_cinfo[i].ci_provider->name);
854 		}
855 		sbuf_printf(sb, "\n");
856 	}
857 	sbuf_finish(sb);
858 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
859 	sbuf_delete(sb);
860 }
861 
862 static void
863 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
864 {
865 	struct g_geom *gp;
866 
867 	g_topology_assert();
868 	if (!strcmp(verb, "create geom")) {
869 		g_ccd_create(req, mp);
870 	} else if (!strcmp(verb, "destroy geom")) {
871 		gp = gctl_get_geom(req, mp, "geom");
872 		if (gp != NULL)
873 		g_ccd_destroy_geom(req, mp, gp);
874 	} else if (!strcmp(verb, "list")) {
875 		g_ccd_list(req, mp);
876 	} else {
877 		gctl_error(req, "unknown verb");
878 	}
879 }
880 
881 static struct g_class g_ccd_class = {
882 	.name = "CCD",
883 	.version = G_VERSION,
884 	.ctlreq = g_ccd_config,
885 	.destroy_geom = g_ccd_destroy_geom,
886 	.start = g_ccd_start,
887 	.orphan = g_ccd_orphan,
888 	.access = g_ccd_access,
889 };
890 
891 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
892