xref: /freebsd/sys/geom/geom_ccd.c (revision 41063f9380135800856262feb57fb6c5aa6e799f)
1 /*-
2  * Copyright (c) 2003 Poul-Henning Kamp.
3  * Copyright (c) 1995 Jason R. Thorpe.
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * All rights reserved.
7  * Copyright (c) 1988 University of Utah.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed for the NetBSD Project
24  *	by Jason R. Thorpe.
25  * 4. The names of the authors may not be used to endorse or promote products
26  *    derived from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
29  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
30  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
31  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
36  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  * Dynamic configuration and disklabel support by:
41  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
42  *	Numerical Aerodynamic Simulation Facility
43  *	Mail Stop 258-6
44  *	NASA Ames Research Center
45  *	Moffett Field, CA 94035
46  *
47  * from: Utah $Hdr: cd.c 1.6 90/11/28$
48  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
49  *	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
50  */
51 
52 #include <sys/cdefs.h>
53 __FBSDID("$FreeBSD$");
54 
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/module.h>
59 #include <sys/bio.h>
60 #include <sys/malloc.h>
61 #include <geom/geom.h>
62 
63 /*
64  * Number of blocks to untouched in front of a component partition.
65  * This is to avoid violating its disklabel area when it starts at the
66  * beginning of the slice.
67  */
68 #if !defined(CCD_OFFSET)
69 #define CCD_OFFSET 16
70 #endif
71 
72 /* sc_flags */
73 #define CCDF_UNIFORM	0x02	/* use LCCD of sizes for uniform interleave */
74 #define CCDF_MIRROR	0x04	/* use mirroring */
75 
76 /* Mask of user-settable ccd flags. */
77 #define CCDF_USERMASK	(CCDF_UNIFORM|CCDF_MIRROR)
78 
79 /*
80  * Interleave description table.
81  * Computed at boot time to speed irregular-interleave lookups.
82  * The idea is that we interleave in "groups".  First we interleave
83  * evenly over all component disks up to the size of the smallest
84  * component (the first group), then we interleave evenly over all
85  * remaining disks up to the size of the next-smallest (second group),
86  * and so on.
87  *
88  * Each table entry describes the interleave characteristics of one
89  * of these groups.  For example if a concatenated disk consisted of
90  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
91  * DEV_BSIZE (1), the table would have three entries:
92  *
93  *	ndisk	startblk	startoff	dev
94  *	3	0		0		0, 1, 2
95  *	2	9		3		0, 2
96  *	1	13		5		2
97  *	0	-		-		-
98  *
99  * which says that the first nine blocks (0-8) are interleaved over
100  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
101  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
102  * at component block 3, and the remaining blocks (13-14) are on disk
103  * 2 starting at offset 5.
104  */
105 struct ccdiinfo {
106 	int	ii_ndisk;	/* # of disks range is interleaved over */
107 	daddr_t	ii_startblk;	/* starting scaled block # for range */
108 	daddr_t	ii_startoff;	/* starting component offset (block #) */
109 	int	*ii_index;	/* ordered list of components in range */
110 };
111 
112 /*
113  * Component info table.
114  * Describes a single component of a concatenated disk.
115  */
116 struct ccdcinfo {
117 	daddr_t		ci_size; 		/* size */
118 	struct g_provider *ci_provider;		/* provider */
119 	struct g_consumer *ci_consumer;		/* consumer */
120 };
121 
122 /*
123  * A concatenated disk is described by this structure.
124  */
125 
126 struct ccd_s {
127 	LIST_ENTRY(ccd_s) list;
128 
129 	int		 sc_unit;		/* logical unit number */
130 	int		 sc_flags;		/* flags */
131 	daddr_t		 sc_size;		/* size of ccd */
132 	int		 sc_ileave;		/* interleave */
133 	u_int		 sc_ndisks;		/* number of components */
134 	struct ccdcinfo	 *sc_cinfo;		/* component info */
135 	struct ccdiinfo	 *sc_itable;		/* interleave table */
136 	u_int32_t	 sc_secsize;		/* # bytes per sector */
137 	int		 sc_pick;		/* side of mirror picked */
138 	daddr_t		 sc_blk[2];		/* mirror localization */
139 };
140 
141 static g_start_t g_ccd_start;
142 static void ccdiodone(struct bio *bp);
143 static void ccdinterleave(struct ccd_s *);
144 static int ccdinit(struct gctl_req *req, struct ccd_s *);
145 static int ccdbuffer(struct bio **ret, struct ccd_s *,
146 		      struct bio *, daddr_t, caddr_t, long);
147 
148 static void
149 g_ccd_orphan(struct g_consumer *cp)
150 {
151 	/*
152 	 * XXX: We don't do anything here.  It is not obvious
153 	 * XXX: what DTRT would be, so we do what the previous
154 	 * XXX: code did: ignore it and let the user cope.
155 	 */
156 }
157 
158 static int
159 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
160 {
161 	struct g_geom *gp;
162 	struct g_consumer *cp1, *cp2;
163 	int error;
164 
165 	de += dr;
166 	de += dw;
167 
168 	gp = pp->geom;
169 	error = ENXIO;
170 	LIST_FOREACH(cp1, &gp->consumer, consumer) {
171 		error = g_access(cp1, dr, dw, de);
172 		if (error) {
173 			LIST_FOREACH(cp2, &gp->consumer, consumer) {
174 				if (cp1 == cp2)
175 					break;
176 				g_access(cp2, -dr, -dw, -de);
177 			}
178 			break;
179 		}
180 	}
181 	return (error);
182 }
183 
184 /*
185  * Free the softc and its substructures.
186  */
187 static void
188 g_ccd_freesc(struct ccd_s *sc)
189 {
190 	struct ccdiinfo *ii;
191 
192 	g_free(sc->sc_cinfo);
193 	if (sc->sc_itable != NULL) {
194 		for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
195 			if (ii->ii_index != NULL)
196 				g_free(ii->ii_index);
197 		g_free(sc->sc_itable);
198 	}
199 	g_free(sc);
200 }
201 
202 
203 static int
204 ccdinit(struct gctl_req *req, struct ccd_s *cs)
205 {
206 	struct ccdcinfo *ci;
207 	daddr_t size;
208 	int ix;
209 	daddr_t minsize;
210 	int maxsecsize;
211 	off_t mediasize;
212 	u_int sectorsize;
213 
214 	cs->sc_size = 0;
215 
216 	maxsecsize = 0;
217 	minsize = 0;
218 	for (ix = 0; ix < cs->sc_ndisks; ix++) {
219 		ci = &cs->sc_cinfo[ix];
220 
221 		mediasize = ci->ci_provider->mediasize;
222 		sectorsize = ci->ci_provider->sectorsize;
223 		if (sectorsize > maxsecsize)
224 			maxsecsize = sectorsize;
225 		size = mediasize / DEV_BSIZE - CCD_OFFSET;
226 
227 		/* Truncate to interleave boundary */
228 
229 		if (cs->sc_ileave > 1)
230 			size -= size % cs->sc_ileave;
231 
232 		if (size == 0) {
233 			gctl_error(req, "Component %s has effective size zero",
234 			    ci->ci_provider->name);
235 			return(ENODEV);
236 		}
237 
238 		if (minsize == 0 || size < minsize)
239 			minsize = size;
240 		ci->ci_size = size;
241 		cs->sc_size += size;
242 	}
243 
244 	/*
245 	 * Don't allow the interleave to be smaller than
246 	 * the biggest component sector.
247 	 */
248 	if ((cs->sc_ileave > 0) &&
249 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
250 		gctl_error(req, "Interleave to small for sector size");
251 		return(EINVAL);
252 	}
253 
254 	/*
255 	 * If uniform interleave is desired set all sizes to that of
256 	 * the smallest component.  This will guarentee that a single
257 	 * interleave table is generated.
258 	 *
259 	 * Lost space must be taken into account when calculating the
260 	 * overall size.  Half the space is lost when CCDF_MIRROR is
261 	 * specified.
262 	 */
263 	if (cs->sc_flags & CCDF_UNIFORM) {
264 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
265 			ci = &cs->sc_cinfo[ix];
266 			ci->ci_size = minsize;
267 		}
268 		cs->sc_size = cs->sc_ndisks * minsize;
269 	}
270 
271 	if (cs->sc_flags & CCDF_MIRROR) {
272 		/*
273 		 * Check to see if an even number of components
274 		 * have been specified.  The interleave must also
275 		 * be non-zero in order for us to be able to
276 		 * guarentee the topology.
277 		 */
278 		if (cs->sc_ndisks % 2) {
279 			gctl_error(req,
280 			      "Mirroring requires an even number of disks");
281 			return(EINVAL);
282 		}
283 		if (cs->sc_ileave == 0) {
284 			gctl_error(req,
285 			     "An interleave must be specified when mirroring");
286 			return(EINVAL);
287 		}
288 		cs->sc_size = (cs->sc_ndisks/2) * minsize;
289 	}
290 
291 	/*
292 	 * Construct the interleave table.
293 	 */
294 	ccdinterleave(cs);
295 
296 	/*
297 	 * Create pseudo-geometry based on 1MB cylinders.  It's
298 	 * pretty close.
299 	 */
300 	cs->sc_secsize = maxsecsize;
301 
302 	return (0);
303 }
304 
305 static void
306 ccdinterleave(struct ccd_s *cs)
307 {
308 	struct ccdcinfo *ci, *smallci;
309 	struct ccdiinfo *ii;
310 	daddr_t bn, lbn;
311 	int ix;
312 	daddr_t size;
313 
314 
315 	/*
316 	 * Allocate an interleave table.  The worst case occurs when each
317 	 * of N disks is of a different size, resulting in N interleave
318 	 * tables.
319 	 *
320 	 * Chances are this is too big, but we don't care.
321 	 */
322 	size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
323 	cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
324 
325 	/*
326 	 * Trivial case: no interleave (actually interleave of disk size).
327 	 * Each table entry represents a single component in its entirety.
328 	 *
329 	 * An interleave of 0 may not be used with a mirror setup.
330 	 */
331 	if (cs->sc_ileave == 0) {
332 		bn = 0;
333 		ii = cs->sc_itable;
334 
335 		for (ix = 0; ix < cs->sc_ndisks; ix++) {
336 			/* Allocate space for ii_index. */
337 			ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
338 			ii->ii_ndisk = 1;
339 			ii->ii_startblk = bn;
340 			ii->ii_startoff = 0;
341 			ii->ii_index[0] = ix;
342 			bn += cs->sc_cinfo[ix].ci_size;
343 			ii++;
344 		}
345 		ii->ii_ndisk = 0;
346 		return;
347 	}
348 
349 	/*
350 	 * The following isn't fast or pretty; it doesn't have to be.
351 	 */
352 	size = 0;
353 	bn = lbn = 0;
354 	for (ii = cs->sc_itable; ; ii++) {
355 		/*
356 		 * Allocate space for ii_index.  We might allocate more then
357 		 * we use.
358 		 */
359 		ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
360 		    M_WAITOK);
361 
362 		/*
363 		 * Locate the smallest of the remaining components
364 		 */
365 		smallci = NULL;
366 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks];
367 		    ci++) {
368 			if (ci->ci_size > size &&
369 			    (smallci == NULL ||
370 			     ci->ci_size < smallci->ci_size)) {
371 				smallci = ci;
372 			}
373 		}
374 
375 		/*
376 		 * Nobody left, all done
377 		 */
378 		if (smallci == NULL) {
379 			ii->ii_ndisk = 0;
380 			g_free(ii->ii_index);
381 			ii->ii_index = NULL;
382 			break;
383 		}
384 
385 		/*
386 		 * Record starting logical block using an sc_ileave blocksize.
387 		 */
388 		ii->ii_startblk = bn / cs->sc_ileave;
389 
390 		/*
391 		 * Record starting component block using an sc_ileave
392 		 * blocksize.  This value is relative to the beginning of
393 		 * a component disk.
394 		 */
395 		ii->ii_startoff = lbn;
396 
397 		/*
398 		 * Determine how many disks take part in this interleave
399 		 * and record their indices.
400 		 */
401 		ix = 0;
402 		for (ci = cs->sc_cinfo;
403 		    ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
404 			if (ci->ci_size >= smallci->ci_size) {
405 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
406 			}
407 		}
408 		ii->ii_ndisk = ix;
409 		bn += ix * (smallci->ci_size - size);
410 		lbn = smallci->ci_size / cs->sc_ileave;
411 		size = smallci->ci_size;
412 	}
413 }
414 
415 static void
416 g_ccd_start(struct bio *bp)
417 {
418 	long bcount, rcount;
419 	struct bio *cbp[2];
420 	caddr_t addr;
421 	daddr_t bn;
422 	int err;
423 	struct ccd_s *cs;
424 
425 	cs = bp->bio_to->geom->softc;
426 
427 	/*
428 	 * Block all GETATTR requests, we wouldn't know which of our
429 	 * subdevices we should ship it off to.
430 	 * XXX: this may not be the right policy.
431 	 */
432 	if(bp->bio_cmd == BIO_GETATTR) {
433 		g_io_deliver(bp, EINVAL);
434 		return;
435 	}
436 
437 	/*
438 	 * Translate the partition-relative block number to an absolute.
439 	 */
440 	bn = bp->bio_offset / cs->sc_secsize;
441 
442 	/*
443 	 * Allocate component buffers and fire off the requests
444 	 */
445 	addr = bp->bio_data;
446 	for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
447 		err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
448 		if (err) {
449 			bp->bio_completed += bcount;
450 			if (bp->bio_error == 0)
451 				bp->bio_error = err;
452 			if (bp->bio_completed == bp->bio_length)
453 				g_io_deliver(bp, bp->bio_error);
454 			return;
455 		}
456 		rcount = cbp[0]->bio_length;
457 
458 		if (cs->sc_flags & CCDF_MIRROR) {
459 			/*
460 			 * Mirroring.  Writes go to both disks, reads are
461 			 * taken from whichever disk seems most appropriate.
462 			 *
463 			 * We attempt to localize reads to the disk whos arm
464 			 * is nearest the read request.  We ignore seeks due
465 			 * to writes when making this determination and we
466 			 * also try to avoid hogging.
467 			 */
468 			if (cbp[0]->bio_cmd != BIO_READ) {
469 				g_io_request(cbp[0], cbp[0]->bio_from);
470 				g_io_request(cbp[1], cbp[1]->bio_from);
471 			} else {
472 				int pick = cs->sc_pick;
473 				daddr_t range = cs->sc_size / 16;
474 
475 				if (bn < cs->sc_blk[pick] - range ||
476 				    bn > cs->sc_blk[pick] + range
477 				) {
478 					cs->sc_pick = pick = 1 - pick;
479 				}
480 				cs->sc_blk[pick] = bn + btodb(rcount);
481 				g_io_request(cbp[pick], cbp[pick]->bio_from);
482 			}
483 		} else {
484 			/*
485 			 * Not mirroring
486 			 */
487 			g_io_request(cbp[0], cbp[0]->bio_from);
488 		}
489 		bn += btodb(rcount);
490 		addr += rcount;
491 	}
492 }
493 
494 /*
495  * Build a component buffer header.
496  */
497 static int
498 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
499 {
500 	struct ccdcinfo *ci, *ci2 = NULL;
501 	struct bio *cbp;
502 	daddr_t cbn, cboff;
503 	off_t cbc;
504 
505 	/*
506 	 * Determine which component bn falls in.
507 	 */
508 	cbn = bn;
509 	cboff = 0;
510 
511 	if (cs->sc_ileave == 0) {
512 		/*
513 		 * Serially concatenated and neither a mirror nor a parity
514 		 * config.  This is a special case.
515 		 */
516 		daddr_t sblk;
517 
518 		sblk = 0;
519 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
520 			sblk += ci->ci_size;
521 		cbn -= sblk;
522 	} else {
523 		struct ccdiinfo *ii;
524 		int ccdisk, off;
525 
526 		/*
527 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
528 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
529 		 * to cbn.
530 		 */
531 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
532 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
533 
534 		/*
535 		 * Figure out which interleave table to use.
536 		 */
537 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
538 			if (ii->ii_startblk > cbn)
539 				break;
540 		}
541 		ii--;
542 
543 		/*
544 		 * off is the logical superblock relative to the beginning
545 		 * of this interleave block.
546 		 */
547 		off = cbn - ii->ii_startblk;
548 
549 		/*
550 		 * We must calculate which disk component to use (ccdisk),
551 		 * and recalculate cbn to be the superblock relative to
552 		 * the beginning of the component.  This is typically done by
553 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
554 		 * must typically be divided by the number of components in
555 		 * this interleave array to be properly convert it from a
556 		 * CCD-relative logical superblock number to a
557 		 * component-relative superblock number.
558 		 */
559 		if (ii->ii_ndisk == 1) {
560 			/*
561 			 * When we have just one disk, it can't be a mirror
562 			 * or a parity config.
563 			 */
564 			ccdisk = ii->ii_index[0];
565 			cbn = ii->ii_startoff + off;
566 		} else {
567 			if (cs->sc_flags & CCDF_MIRROR) {
568 				/*
569 				 * We have forced a uniform mapping, resulting
570 				 * in a single interleave array.  We double
571 				 * up on the first half of the available
572 				 * components and our mirror is in the second
573 				 * half.  This only works with a single
574 				 * interleave array because doubling up
575 				 * doubles the number of sectors, so there
576 				 * cannot be another interleave array because
577 				 * the next interleave array's calculations
578 				 * would be off.
579 				 */
580 				int ndisk2 = ii->ii_ndisk / 2;
581 				ccdisk = ii->ii_index[off % ndisk2];
582 				cbn = ii->ii_startoff + off / ndisk2;
583 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
584 			} else {
585 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
586 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
587 			}
588 		}
589 
590 		ci = &cs->sc_cinfo[ccdisk];
591 
592 		/*
593 		 * Convert cbn from a superblock to a normal block so it
594 		 * can be used to calculate (along with cboff) the normal
595 		 * block index into this particular disk.
596 		 */
597 		cbn *= cs->sc_ileave;
598 	}
599 
600 	/*
601 	 * Fill in the component buf structure.
602 	 */
603 	cbp = g_clone_bio(bp);
604 	if (cbp == NULL)
605 		return (ENOMEM);
606 	cbp->bio_done = g_std_done;
607 	cbp->bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
608 	cbp->bio_data = addr;
609 	if (cs->sc_ileave == 0)
610               cbc = dbtob((off_t)(ci->ci_size - cbn));
611 	else
612               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
613 	cbp->bio_length = (cbc < bcount) ? cbc : bcount;
614 
615 	cbp->bio_from = ci->ci_consumer;
616 	cb[0] = cbp;
617 
618 	if (cs->sc_flags & CCDF_MIRROR) {
619 		cbp = g_clone_bio(bp);
620 		if (cbp == NULL)
621 			return (ENOMEM);
622 		cbp->bio_done = cb[0]->bio_done = ccdiodone;
623 		cbp->bio_offset = cb[0]->bio_offset;
624 		cbp->bio_data = cb[0]->bio_data;
625 		cbp->bio_length = cb[0]->bio_length;
626 		cbp->bio_from = ci2->ci_consumer;
627 		cbp->bio_caller1 = cb[0];
628 		cb[0]->bio_caller1 = cbp;
629 		cb[1] = cbp;
630 	}
631 	return (0);
632 }
633 
634 /*
635  * Called only for mirrored operations.
636  */
637 static void
638 ccdiodone(struct bio *cbp)
639 {
640 	struct bio *mbp, *pbp;
641 
642 	mbp = cbp->bio_caller1;
643 	pbp = cbp->bio_parent;
644 
645 	if (pbp->bio_cmd == BIO_READ) {
646 		if (cbp->bio_error == 0) {
647 			/* We will not be needing the partner bio */
648 			if (mbp != NULL) {
649 				pbp->bio_inbed++;
650 				g_destroy_bio(mbp);
651 			}
652 			g_std_done(cbp);
653 			return;
654 		}
655 		if (mbp != NULL) {
656 			/* Try partner the bio instead */
657 			mbp->bio_caller1 = NULL;
658 			pbp->bio_inbed++;
659 			g_destroy_bio(cbp);
660 			g_io_request(mbp, mbp->bio_from);
661 			/*
662 			 * XXX: If this comes back OK, we should actually
663 			 * try to write the good data on the failed mirror
664 			 */
665 			return;
666 		}
667 		g_std_done(cbp);
668 		return;
669 	}
670 	if (mbp != NULL) {
671 		mbp->bio_caller1 = NULL;
672 		pbp->bio_inbed++;
673 		if (cbp->bio_error != 0 && pbp->bio_error == 0)
674 			pbp->bio_error = cbp->bio_error;
675 		g_destroy_bio(cbp);
676 		return;
677 	}
678 	g_std_done(cbp);
679 }
680 
681 static void
682 g_ccd_create(struct gctl_req *req, struct g_class *mp)
683 {
684 	int *unit, *ileave, *nprovider;
685 	struct g_geom *gp;
686 	struct g_consumer *cp;
687 	struct g_provider *pp;
688 	struct ccd_s *sc;
689 	struct sbuf *sb;
690 	char buf[20];
691 	int i, error;
692 
693 	g_topology_assert();
694 	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
695 	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
696 	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
697 
698 	/* Check for duplicate unit */
699 	LIST_FOREACH(gp, &mp->geom, geom) {
700 		sc = gp->softc;
701 		if (sc != NULL && sc->sc_unit == *unit) {
702 			gctl_error(req, "Unit %d already configured", *unit);
703 			return;
704 		}
705 	}
706 
707 	if (*nprovider <= 0) {
708 		gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
709 		return;
710 	}
711 
712 	/* Check all providers are valid */
713 	for (i = 0; i < *nprovider; i++) {
714 		sprintf(buf, "provider%d", i);
715 		pp = gctl_get_provider(req, buf);
716 		if (pp == NULL)
717 			return;
718 	}
719 
720 	gp = g_new_geomf(mp, "ccd%d", *unit);
721 	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
722 	gp->softc = sc;
723 	sc->sc_ndisks = *nprovider;
724 
725 	/* Allocate space for the component info. */
726 	sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
727 	    M_WAITOK | M_ZERO);
728 
729 	/* Create consumers and attach to all providers */
730 	for (i = 0; i < *nprovider; i++) {
731 		sprintf(buf, "provider%d", i);
732 		pp = gctl_get_provider(req, buf);
733 		cp = g_new_consumer(gp);
734 		error = g_attach(cp, pp);
735 		KASSERT(error == 0, ("attach to %s failed", pp->name));
736 		sc->sc_cinfo[i].ci_consumer = cp;
737 		sc->sc_cinfo[i].ci_provider = pp;
738 	}
739 
740 	sc->sc_unit = *unit;
741 	sc->sc_ileave = *ileave;
742 
743 	if (gctl_get_param(req, "uniform", NULL))
744 		sc->sc_flags |= CCDF_UNIFORM;
745 	if (gctl_get_param(req, "mirror", NULL))
746 		sc->sc_flags |= CCDF_MIRROR;
747 
748 	if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
749 		printf("%s: disabling mirror, interleave is 0\n", gp->name);
750 		sc->sc_flags &= ~(CCDF_MIRROR);
751 	}
752 
753 	if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
754 		printf("%s: mirror/parity forces uniform flag\n", gp->name);
755 		sc->sc_flags |= CCDF_UNIFORM;
756 	}
757 
758 	error = ccdinit(req, sc);
759 	if (error != 0) {
760 		g_ccd_freesc(sc);
761 		gp->softc = NULL;
762 		g_wither_geom(gp, ENXIO);
763 		return;
764 	}
765 
766 	pp = g_new_providerf(gp, "%s", gp->name);
767 	pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
768 	pp->sectorsize = sc->sc_secsize;
769 	g_error_provider(pp, 0);
770 
771 	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
772 	sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
773 	for (i = 0; i < *nprovider; i++) {
774 		sbuf_printf(sb, "%s%s",
775 		    i == 0 ? "(" : ", ",
776 		    sc->sc_cinfo[i].ci_provider->name);
777 	}
778 	sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
779 	if (sc->sc_ileave != 0)
780 		sbuf_printf(sb, "interleaved at %d blocks\n",
781 			sc->sc_ileave);
782 	else
783 		sbuf_printf(sb, "concatenated\n");
784 	sbuf_finish(sb);
785 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
786 	sbuf_delete(sb);
787 }
788 
789 static int
790 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
791 {
792 	struct g_provider *pp;
793 	struct ccd_s *sc;
794 
795 	g_topology_assert();
796 	sc = gp->softc;
797 	pp = LIST_FIRST(&gp->provider);
798 	if (sc == NULL || pp == NULL)
799 		return (EBUSY);
800 	if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
801 		gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
802 		    pp->acr, pp->acw, pp->ace);
803 		return (EBUSY);
804 	}
805 	g_ccd_freesc(sc);
806 	gp->softc = NULL;
807 	g_wither_geom(gp, ENXIO);
808 	return (0);
809 }
810 
811 static void
812 g_ccd_list(struct gctl_req *req, struct g_class *mp)
813 {
814 	struct sbuf *sb;
815 	struct ccd_s *cs;
816 	struct g_geom *gp;
817 	int i, unit, *up;
818 
819 	up = gctl_get_paraml(req, "unit", sizeof (int));
820 	unit = *up;
821 	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
822 	LIST_FOREACH(gp, &mp->geom, geom) {
823 		cs = gp->softc;
824 		if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
825 			continue;
826 		sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
827 		    cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
828 
829 		for (i = 0; i < cs->sc_ndisks; ++i) {
830 			sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
831 			    cs->sc_cinfo[i].ci_provider->name);
832 		}
833 		sbuf_printf(sb, "\n");
834 	}
835 	sbuf_finish(sb);
836 	gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
837 	sbuf_delete(sb);
838 }
839 
840 static void
841 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
842 {
843 	struct g_geom *gp;
844 
845 	g_topology_assert();
846 	if (!strcmp(verb, "create geom")) {
847 		g_ccd_create(req, mp);
848 	} else if (!strcmp(verb, "destroy geom")) {
849 		gp = gctl_get_geom(req, mp, "geom");
850 		if (gp != NULL)
851 		g_ccd_destroy_geom(req, mp, gp);
852 	} else if (!strcmp(verb, "list")) {
853 		g_ccd_list(req, mp);
854 	} else {
855 		gctl_error(req, "unknown verb");
856 	}
857 }
858 
859 static struct g_class g_ccd_class = {
860 	.name = "CCD",
861 	.version = G_VERSION,
862 	.ctlreq = g_ccd_config,
863 	.destroy_geom = g_ccd_destroy_geom,
864 	.start = g_ccd_start,
865 	.orphan = g_ccd_orphan,
866 	.access = g_ccd_access,
867 };
868 
869 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
870