xref: /freebsd/sys/geom/geom_ccd.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /* $FreeBSD$ */
2 
3 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
4 
5 /*
6  * Copyright (c) 1995 Jason R. Thorpe.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project
20  *	by Jason R. Thorpe.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * Copyright (c) 1988 University of Utah.
39  * Copyright (c) 1990, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * This code is derived from software contributed to Berkeley by
43  * the Systems Programming Group of the University of Utah Computer
44  * Science Department.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. All advertising materials mentioning features or use of this software
55  *    must display the following acknowledgement:
56  *	This product includes software developed by the University of
57  *	California, Berkeley and its contributors.
58  * 4. Neither the name of the University nor the names of its contributors
59  *    may be used to endorse or promote products derived from this software
60  *    without specific prior written permission.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  *
74  * from: Utah $Hdr: cd.c 1.6 90/11/28$
75  *
76  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
77  */
78 
79 /*
80  * "Concatenated" disk driver.
81  *
82  * Dynamic configuration and disklabel support by:
83  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
84  *	Numerical Aerodynamic Simulation Facility
85  *	Mail Stop 258-6
86  *	NASA Ames Research Center
87  *	Moffett Field, CA 94035
88  */
89 
90 #include "ccd.h"
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/module.h>
96 #include <sys/proc.h>
97 #include <sys/bio.h>
98 #include <sys/malloc.h>
99 #include <sys/namei.h>
100 #include <sys/conf.h>
101 #include <sys/stat.h>
102 #include <sys/sysctl.h>
103 #include <sys/disklabel.h>
104 #include <ufs/ffs/fs.h>
105 #include <sys/devicestat.h>
106 #include <sys/fcntl.h>
107 #include <sys/vnode.h>
108 
109 #include <sys/ccdvar.h>
110 
111 #if defined(CCDDEBUG) && !defined(DEBUG)
112 #define DEBUG
113 #endif
114 
115 #ifdef DEBUG
116 #define CCDB_FOLLOW	0x01
117 #define CCDB_INIT	0x02
118 #define CCDB_IO		0x04
119 #define CCDB_LABEL	0x08
120 #define CCDB_VNODE	0x10
121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
122     CCDB_VNODE;
123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
124 #undef DEBUG
125 #endif
126 
127 #define	ccdunit(x)	dkunit(x)
128 #define ccdpart(x)	dkpart(x)
129 
130 /*
131    This is how mirroring works (only writes are special):
132 
133    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
134    linked together by the cb_mirror field.  "cb_pflags &
135    CCDPF_MIRROR_DONE" is set to 0 on both of them.
136 
137    When a component returns to ccdiodone(), it checks if "cb_pflags &
138    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
139    flag and returns.  If it is, it means its partner has already
140    returned, so it will go to the regular cleanup.
141 
142  */
143 
144 struct ccdbuf {
145 	struct bio	cb_buf;		/* new I/O buf */
146 	struct bio	*cb_obp;	/* ptr. to original I/O buf */
147 	struct ccdbuf	*cb_freenext;	/* free list link */
148 	int		cb_unit;	/* target unit */
149 	int		cb_comp;	/* target component */
150 	int		cb_pflags;	/* mirror/parity status flag */
151 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
152 };
153 
154 /* bits in cb_pflags */
155 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
156 
157 #define CCDLABELDEV(dev)	\
158 	(makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
159 
160 static d_open_t ccdopen;
161 static d_close_t ccdclose;
162 static d_strategy_t ccdstrategy;
163 static d_ioctl_t ccdioctl;
164 static d_dump_t ccddump;
165 static d_psize_t ccdsize;
166 
167 #define NCCDFREEHIWAT	16
168 
169 #define CDEV_MAJOR 74
170 
171 static struct cdevsw ccd_cdevsw = {
172 	/* open */	ccdopen,
173 	/* close */	ccdclose,
174 	/* read */	physread,
175 	/* write */	physwrite,
176 	/* ioctl */	ccdioctl,
177 	/* poll */	nopoll,
178 	/* mmap */	nommap,
179 	/* strategy */	ccdstrategy,
180 	/* name */	"ccd",
181 	/* maj */	CDEV_MAJOR,
182 	/* dump */	ccddump,
183 	/* psize */	ccdsize,
184 	/* flags */	D_DISK,
185 };
186 
187 /* called during module initialization */
188 static	void ccdattach __P((void));
189 static	int ccd_modevent __P((module_t, int, void *));
190 
191 /* called by biodone() at interrupt time */
192 static	void ccdiodone __P((struct bio *bp));
193 
194 static	void ccdstart __P((struct ccd_softc *, struct bio *));
195 static	void ccdinterleave __P((struct ccd_softc *, int));
196 static	void ccdintr __P((struct ccd_softc *, struct bio *));
197 static	int ccdinit __P((struct ccddevice *, char **, struct proc *));
198 static	int ccdlookup __P((char *, struct proc *p, struct vnode **));
199 static	void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
200 		struct bio *, daddr_t, caddr_t, long));
201 static	void ccdgetdisklabel __P((dev_t));
202 static	void ccdmakedisklabel __P((struct ccd_softc *));
203 static	int ccdlock __P((struct ccd_softc *));
204 static	void ccdunlock __P((struct ccd_softc *));
205 
206 #ifdef DEBUG
207 static	void printiinfo __P((struct ccdiinfo *));
208 #endif
209 
210 /* Non-private for the benefit of libkvm. */
211 struct	ccd_softc *ccd_softc;
212 struct	ccddevice *ccddevs;
213 struct	ccdbuf *ccdfreebufs;
214 static	int numccdfreebufs;
215 static	int numccd = 0;
216 
217 /*
218  * getccdbuf() -	Allocate and zero a ccd buffer.
219  *
220  *	This routine is called at splbio().
221  */
222 
223 static __inline
224 struct ccdbuf *
225 getccdbuf(struct ccdbuf *cpy)
226 {
227 	struct ccdbuf *cbp;
228 
229 	/*
230 	 * Allocate from freelist or malloc as necessary
231 	 */
232 	if ((cbp = ccdfreebufs) != NULL) {
233 		ccdfreebufs = cbp->cb_freenext;
234 		--numccdfreebufs;
235 	} else {
236 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
237 	}
238 
239 	/*
240 	 * Used by mirroring code
241 	 */
242 	if (cpy)
243 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
244 	else
245 		bzero(cbp, sizeof(struct ccdbuf));
246 
247 	/*
248 	 * independant struct bio initialization
249 	 */
250 
251 	return(cbp);
252 }
253 
254 /*
255  * putccdbuf() -	Free a ccd buffer.
256  *
257  *	This routine is called at splbio().
258  */
259 
260 static __inline
261 void
262 putccdbuf(struct ccdbuf *cbp)
263 {
264 
265 	if (numccdfreebufs < NCCDFREEHIWAT) {
266 		cbp->cb_freenext = ccdfreebufs;
267 		ccdfreebufs = cbp;
268 		++numccdfreebufs;
269 	} else {
270 		free((caddr_t)cbp, M_DEVBUF);
271 	}
272 }
273 
274 
275 /*
276  * Number of blocks to untouched in front of a component partition.
277  * This is to avoid violating its disklabel area when it starts at the
278  * beginning of the slice.
279  */
280 #if !defined(CCD_OFFSET)
281 #define CCD_OFFSET 16
282 #endif
283 
284 static void
285 ccd_clone(void *arg, char *name, int namelen, dev_t *dev)
286 {
287 	int i, u;
288 	char *s;
289 
290 	if (*dev != NODEV)
291 		return;
292 	i = dev_stdclone(name, &s, "ccd", &u);
293 	if (i != 2)
294 		return;
295 	if (u >= numccd)
296 		return;
297 	if (*s < 'a' || *s > 'h')
298 		return;
299 	if (s[1] != '\0')
300 		return;
301 	*dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a',
302 		UID_ROOT, GID_OPERATOR, 0640, name);
303 }
304 
305 /*
306  * Called by main() during pseudo-device attachment.  All we need
307  * to do is allocate enough space for devices to be configured later, and
308  * add devsw entries.
309  */
310 static void
311 ccdattach()
312 {
313 	int i;
314 	int num = NCCD;
315 
316 	if (num > 1)
317 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
318 	else
319 		printf("ccd0: Concatenated disk driver\n");
320 
321 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
322 	    M_DEVBUF, M_NOWAIT);
323 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
324 	    M_DEVBUF, M_NOWAIT);
325 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
326 		printf("WARNING: no memory for concatenated disks\n");
327 		if (ccd_softc != NULL)
328 			free(ccd_softc, M_DEVBUF);
329 		if (ccddevs != NULL)
330 			free(ccddevs, M_DEVBUF);
331 		return;
332 	}
333 	numccd = num;
334 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
335 	bzero(ccddevs, num * sizeof(struct ccddevice));
336 
337 	cdevsw_add(&ccd_cdevsw);
338 	/* XXX: is this necessary? */
339 	for (i = 0; i < numccd; ++i)
340 		ccddevs[i].ccd_dk = -1;
341 	EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000);
342 }
343 
344 static int
345 ccd_modevent(mod, type, data)
346 	module_t mod;
347 	int type;
348 	void *data;
349 {
350 	int error = 0;
351 
352 	switch (type) {
353 	case MOD_LOAD:
354 		ccdattach();
355 		break;
356 
357 	case MOD_UNLOAD:
358 		printf("ccd0: Unload not supported!\n");
359 		error = EOPNOTSUPP;
360 		break;
361 
362 	default:	/* MOD_SHUTDOWN etc */
363 		break;
364 	}
365 	return (error);
366 }
367 
368 DEV_MODULE(ccd, ccd_modevent, NULL);
369 
370 static int
371 ccdinit(ccd, cpaths, p)
372 	struct ccddevice *ccd;
373 	char **cpaths;
374 	struct proc *p;
375 {
376 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
377 	struct ccdcinfo *ci = NULL;	/* XXX */
378 	size_t size;
379 	int ix;
380 	struct vnode *vp;
381 	size_t minsize;
382 	int maxsecsize;
383 	struct partinfo dpart;
384 	struct ccdgeom *ccg = &cs->sc_geom;
385 	char tmppath[MAXPATHLEN];
386 	int error = 0;
387 
388 #ifdef DEBUG
389 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
390 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
391 #endif
392 
393 	cs->sc_size = 0;
394 	cs->sc_ileave = ccd->ccd_interleave;
395 	cs->sc_nccdisks = ccd->ccd_ndev;
396 
397 	/* Allocate space for the component info. */
398 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
399 	    M_DEVBUF, M_WAITOK);
400 
401 	/*
402 	 * Verify that each component piece exists and record
403 	 * relevant information about it.
404 	 */
405 	maxsecsize = 0;
406 	minsize = 0;
407 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
408 		vp = ccd->ccd_vpp[ix];
409 		ci = &cs->sc_cinfo[ix];
410 		ci->ci_vp = vp;
411 
412 		/*
413 		 * Copy in the pathname of the component.
414 		 */
415 		bzero(tmppath, sizeof(tmppath));	/* sanity */
416 		if ((error = copyinstr(cpaths[ix], tmppath,
417 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
418 #ifdef DEBUG
419 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
420 				printf("ccd%d: can't copy path, error = %d\n",
421 				    ccd->ccd_unit, error);
422 #endif
423 			goto fail;
424 		}
425 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
426 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
427 
428 		ci->ci_dev = vn_todev(vp);
429 
430 		/*
431 		 * Get partition information for the component.
432 		 */
433 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
434 		    FREAD, p->p_ucred, p)) != 0) {
435 #ifdef DEBUG
436 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
437 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
438 				     ccd->ccd_unit, ci->ci_path, error);
439 #endif
440 			goto fail;
441 		}
442 		if (dpart.part->p_fstype == FS_BSDFFS) {
443 			maxsecsize =
444 			    ((dpart.disklab->d_secsize > maxsecsize) ?
445 			    dpart.disklab->d_secsize : maxsecsize);
446 			size = dpart.part->p_size - CCD_OFFSET;
447 		} else {
448 #ifdef DEBUG
449 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
450 				printf("ccd%d: %s: incorrect partition type\n",
451 				    ccd->ccd_unit, ci->ci_path);
452 #endif
453 			error = EFTYPE;
454 			goto fail;
455 		}
456 
457 		/*
458 		 * Calculate the size, truncating to an interleave
459 		 * boundary if necessary.
460 		 */
461 
462 		if (cs->sc_ileave > 1)
463 			size -= size % cs->sc_ileave;
464 
465 		if (size == 0) {
466 #ifdef DEBUG
467 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
468 				printf("ccd%d: %s: size == 0\n",
469 				    ccd->ccd_unit, ci->ci_path);
470 #endif
471 			error = ENODEV;
472 			goto fail;
473 		}
474 
475 		if (minsize == 0 || size < minsize)
476 			minsize = size;
477 		ci->ci_size = size;
478 		cs->sc_size += size;
479 	}
480 
481 	/*
482 	 * Don't allow the interleave to be smaller than
483 	 * the biggest component sector.
484 	 */
485 	if ((cs->sc_ileave > 0) &&
486 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
487 #ifdef DEBUG
488 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
489 			printf("ccd%d: interleave must be at least %d\n",
490 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
491 #endif
492 		error = EINVAL;
493 		goto fail;
494 	}
495 
496 	/*
497 	 * If uniform interleave is desired set all sizes to that of
498 	 * the smallest component.  This will guarentee that a single
499 	 * interleave table is generated.
500 	 *
501 	 * Lost space must be taken into account when calculating the
502 	 * overall size.  Half the space is lost when CCDF_MIRROR is
503 	 * specified.  One disk is lost when CCDF_PARITY is specified.
504 	 */
505 	if (ccd->ccd_flags & CCDF_UNIFORM) {
506 		for (ci = cs->sc_cinfo;
507 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
508 			ci->ci_size = minsize;
509 		}
510 		if (ccd->ccd_flags & CCDF_MIRROR) {
511 			/*
512 			 * Check to see if an even number of components
513 			 * have been specified.  The interleave must also
514 			 * be non-zero in order for us to be able to
515 			 * guarentee the topology.
516 			 */
517 			if (cs->sc_nccdisks % 2) {
518 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
519 				error = EINVAL;
520 				goto fail;
521 			}
522 			if (cs->sc_ileave == 0) {
523 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
524 				error = EINVAL;
525 				goto fail;
526 			}
527 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
528 		} else if (ccd->ccd_flags & CCDF_PARITY) {
529 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
530 		} else {
531 			if (cs->sc_ileave == 0) {
532 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
533 				error = EINVAL;
534 				goto fail;
535 			}
536 			cs->sc_size = cs->sc_nccdisks * minsize;
537 		}
538 	}
539 
540 	/*
541 	 * Construct the interleave table.
542 	 */
543 	ccdinterleave(cs, ccd->ccd_unit);
544 
545 	/*
546 	 * Create pseudo-geometry based on 1MB cylinders.  It's
547 	 * pretty close.
548 	 */
549 	ccg->ccg_secsize = maxsecsize;
550 	ccg->ccg_ntracks = 1;
551 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
552 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
553 
554 	/*
555 	 * Add an devstat entry for this device.
556 	 */
557 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
558 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
559 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
560 			  DEVSTAT_PRIORITY_ARRAY);
561 
562 	cs->sc_flags |= CCDF_INITED;
563 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
564 	cs->sc_unit = ccd->ccd_unit;
565 	return (0);
566 fail:
567 	while (ci > cs->sc_cinfo) {
568 		ci--;
569 		free(ci->ci_path, M_DEVBUF);
570 	}
571 	free(cs->sc_cinfo, M_DEVBUF);
572 	return (error);
573 }
574 
575 static void
576 ccdinterleave(cs, unit)
577 	struct ccd_softc *cs;
578 	int unit;
579 {
580 	struct ccdcinfo *ci, *smallci;
581 	struct ccdiinfo *ii;
582 	daddr_t bn, lbn;
583 	int ix;
584 	u_long size;
585 
586 #ifdef DEBUG
587 	if (ccddebug & CCDB_INIT)
588 		printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
589 #endif
590 
591 	/*
592 	 * Allocate an interleave table.  The worst case occurs when each
593 	 * of N disks is of a different size, resulting in N interleave
594 	 * tables.
595 	 *
596 	 * Chances are this is too big, but we don't care.
597 	 */
598 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
599 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF,
600 	    M_WAITOK | M_ZERO);
601 
602 	/*
603 	 * Trivial case: no interleave (actually interleave of disk size).
604 	 * Each table entry represents a single component in its entirety.
605 	 *
606 	 * An interleave of 0 may not be used with a mirror or parity setup.
607 	 */
608 	if (cs->sc_ileave == 0) {
609 		bn = 0;
610 		ii = cs->sc_itable;
611 
612 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
613 			/* Allocate space for ii_index. */
614 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
615 			ii->ii_ndisk = 1;
616 			ii->ii_startblk = bn;
617 			ii->ii_startoff = 0;
618 			ii->ii_index[0] = ix;
619 			bn += cs->sc_cinfo[ix].ci_size;
620 			ii++;
621 		}
622 		ii->ii_ndisk = 0;
623 #ifdef DEBUG
624 		if (ccddebug & CCDB_INIT)
625 			printiinfo(cs->sc_itable);
626 #endif
627 		return;
628 	}
629 
630 	/*
631 	 * The following isn't fast or pretty; it doesn't have to be.
632 	 */
633 	size = 0;
634 	bn = lbn = 0;
635 	for (ii = cs->sc_itable; ; ii++) {
636 		/*
637 		 * Allocate space for ii_index.  We might allocate more then
638 		 * we use.
639 		 */
640 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
641 		    M_DEVBUF, M_WAITOK);
642 
643 		/*
644 		 * Locate the smallest of the remaining components
645 		 */
646 		smallci = NULL;
647 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
648 		    ci++) {
649 			if (ci->ci_size > size &&
650 			    (smallci == NULL ||
651 			     ci->ci_size < smallci->ci_size)) {
652 				smallci = ci;
653 			}
654 		}
655 
656 		/*
657 		 * Nobody left, all done
658 		 */
659 		if (smallci == NULL) {
660 			ii->ii_ndisk = 0;
661 			break;
662 		}
663 
664 		/*
665 		 * Record starting logical block using an sc_ileave blocksize.
666 		 */
667 		ii->ii_startblk = bn / cs->sc_ileave;
668 
669 		/*
670 		 * Record starting comopnent block using an sc_ileave
671 		 * blocksize.  This value is relative to the beginning of
672 		 * a component disk.
673 		 */
674 		ii->ii_startoff = lbn;
675 
676 		/*
677 		 * Determine how many disks take part in this interleave
678 		 * and record their indices.
679 		 */
680 		ix = 0;
681 		for (ci = cs->sc_cinfo;
682 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
683 			if (ci->ci_size >= smallci->ci_size) {
684 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
685 			}
686 		}
687 		ii->ii_ndisk = ix;
688 		bn += ix * (smallci->ci_size - size);
689 		lbn = smallci->ci_size / cs->sc_ileave;
690 		size = smallci->ci_size;
691 	}
692 #ifdef DEBUG
693 	if (ccddebug & CCDB_INIT)
694 		printiinfo(cs->sc_itable);
695 #endif
696 }
697 
698 /* ARGSUSED */
699 static int
700 ccdopen(dev, flags, fmt, p)
701 	dev_t dev;
702 	int flags, fmt;
703 	struct proc *p;
704 {
705 	int unit = ccdunit(dev);
706 	struct ccd_softc *cs;
707 	struct disklabel *lp;
708 	int error = 0, part, pmask;
709 
710 #ifdef DEBUG
711 	if (ccddebug & CCDB_FOLLOW)
712 		printf("ccdopen(%p, %x)\n", dev, flags);
713 #endif
714 	if (unit >= numccd)
715 		return (ENXIO);
716 	cs = &ccd_softc[unit];
717 
718 	if ((error = ccdlock(cs)) != 0)
719 		return (error);
720 
721 	lp = &cs->sc_label;
722 
723 	part = ccdpart(dev);
724 	pmask = (1 << part);
725 
726 	/*
727 	 * If we're initialized, check to see if there are any other
728 	 * open partitions.  If not, then it's safe to update
729 	 * the in-core disklabel.
730 	 */
731 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
732 		ccdgetdisklabel(dev);
733 
734 	/* Check that the partition exists. */
735 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
736 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
737 		error = ENXIO;
738 		goto done;
739 	}
740 
741 	cs->sc_openmask |= pmask;
742  done:
743 	ccdunlock(cs);
744 	return (0);
745 }
746 
747 /* ARGSUSED */
748 static int
749 ccdclose(dev, flags, fmt, p)
750 	dev_t dev;
751 	int flags, fmt;
752 	struct proc *p;
753 {
754 	int unit = ccdunit(dev);
755 	struct ccd_softc *cs;
756 	int error = 0, part;
757 
758 #ifdef DEBUG
759 	if (ccddebug & CCDB_FOLLOW)
760 		printf("ccdclose(%p, %x)\n", dev, flags);
761 #endif
762 
763 	if (unit >= numccd)
764 		return (ENXIO);
765 	cs = &ccd_softc[unit];
766 
767 	if ((error = ccdlock(cs)) != 0)
768 		return (error);
769 
770 	part = ccdpart(dev);
771 
772 	/* ...that much closer to allowing unconfiguration... */
773 	cs->sc_openmask &= ~(1 << part);
774 	ccdunlock(cs);
775 	return (0);
776 }
777 
778 static void
779 ccdstrategy(bp)
780 	struct bio *bp;
781 {
782 	int unit = ccdunit(bp->bio_dev);
783 	struct ccd_softc *cs = &ccd_softc[unit];
784 	int s;
785 	int wlabel;
786 	struct disklabel *lp;
787 
788 #ifdef DEBUG
789 	if (ccddebug & CCDB_FOLLOW)
790 		printf("ccdstrategy(%p): unit %d\n", bp, unit);
791 #endif
792 	if ((cs->sc_flags & CCDF_INITED) == 0) {
793 		biofinish(bp, NULL, ENXIO);
794 		return;
795 	}
796 
797 	/* If it's a nil transfer, wake up the top half now. */
798 	if (bp->bio_bcount == 0) {
799 		biodone(bp);
800 		return;
801 	}
802 
803 	lp = &cs->sc_label;
804 
805 	/*
806 	 * Do bounds checking and adjust transfer.  If there's an
807 	 * error, the bounds check will flag that for us.
808 	 */
809 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
810 	if (ccdpart(bp->bio_dev) != RAW_PART) {
811 		if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
812 			biodone(bp);
813 			return;
814 		}
815 	} else {
816 		int pbn;        /* in sc_secsize chunks */
817 		long sz;        /* in sc_secsize chunks */
818 
819 		pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
820 		sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize);
821 
822 		/*
823 		 * If out of bounds return an error. If at the EOF point,
824 		 * simply read or write less.
825 		 */
826 
827 		if (pbn < 0 || pbn >= cs->sc_size) {
828 			bp->bio_resid = bp->bio_bcount;
829 			if (pbn != cs->sc_size)
830 				biofinish(bp, NULL, EINVAL);
831 			else
832 				biodone(bp);
833 			return;
834 		}
835 
836 		/*
837 		 * If the request crosses EOF, truncate the request.
838 		 */
839 		if (pbn + sz > cs->sc_size) {
840 			bp->bio_bcount = (cs->sc_size - pbn) *
841 			    cs->sc_geom.ccg_secsize;
842 		}
843 	}
844 
845 	bp->bio_resid = bp->bio_bcount;
846 
847 	/*
848 	 * "Start" the unit.
849 	 */
850 	s = splbio();
851 	ccdstart(cs, bp);
852 	splx(s);
853 	return;
854 }
855 
856 static void
857 ccdstart(cs, bp)
858 	struct ccd_softc *cs;
859 	struct bio *bp;
860 {
861 	long bcount, rcount;
862 	struct ccdbuf *cbp[4];
863 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
864 	caddr_t addr;
865 	daddr_t bn;
866 	struct partition *pp;
867 
868 #ifdef DEBUG
869 	if (ccddebug & CCDB_FOLLOW)
870 		printf("ccdstart(%p, %p)\n", cs, bp);
871 #endif
872 
873 	/* Record the transaction start  */
874 	devstat_start_transaction(&cs->device_stats);
875 
876 	/*
877 	 * Translate the partition-relative block number to an absolute.
878 	 */
879 	bn = bp->bio_blkno;
880 	if (ccdpart(bp->bio_dev) != RAW_PART) {
881 		pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)];
882 		bn += pp->p_offset;
883 	}
884 
885 	/*
886 	 * Allocate component buffers and fire off the requests
887 	 */
888 	addr = bp->bio_data;
889 	for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) {
890 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
891 		rcount = cbp[0]->cb_buf.bio_bcount;
892 
893 		if (cs->sc_cflags & CCDF_MIRROR) {
894 			/*
895 			 * Mirroring.  Writes go to both disks, reads are
896 			 * taken from whichever disk seems most appropriate.
897 			 *
898 			 * We attempt to localize reads to the disk whos arm
899 			 * is nearest the read request.  We ignore seeks due
900 			 * to writes when making this determination and we
901 			 * also try to avoid hogging.
902 			 */
903 			if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) {
904 				BIO_STRATEGY(&cbp[0]->cb_buf, 0);
905 				BIO_STRATEGY(&cbp[1]->cb_buf, 0);
906 			} else {
907 				int pick = cs->sc_pick;
908 				daddr_t range = cs->sc_size / 16;
909 
910 				if (bn < cs->sc_blk[pick] - range ||
911 				    bn > cs->sc_blk[pick] + range
912 				) {
913 					cs->sc_pick = pick = 1 - pick;
914 				}
915 				cs->sc_blk[pick] = bn + btodb(rcount);
916 				BIO_STRATEGY(&cbp[pick]->cb_buf, 0);
917 			}
918 		} else {
919 			/*
920 			 * Not mirroring
921 			 */
922 			BIO_STRATEGY(&cbp[0]->cb_buf, 0);
923 		}
924 		bn += btodb(rcount);
925 		addr += rcount;
926 	}
927 }
928 
929 /*
930  * Build a component buffer header.
931  */
932 static void
933 ccdbuffer(cb, cs, bp, bn, addr, bcount)
934 	struct ccdbuf **cb;
935 	struct ccd_softc *cs;
936 	struct bio *bp;
937 	daddr_t bn;
938 	caddr_t addr;
939 	long bcount;
940 {
941 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
942 	struct ccdbuf *cbp;
943 	daddr_t cbn, cboff;
944 	off_t cbc;
945 
946 #ifdef DEBUG
947 	if (ccddebug & CCDB_IO)
948 		printf("ccdbuffer(%p, %p, %d, %p, %ld)\n",
949 		       cs, bp, bn, addr, bcount);
950 #endif
951 	/*
952 	 * Determine which component bn falls in.
953 	 */
954 	cbn = bn;
955 	cboff = 0;
956 
957 	if (cs->sc_ileave == 0) {
958 		/*
959 		 * Serially concatenated and neither a mirror nor a parity
960 		 * config.  This is a special case.
961 		 */
962 		daddr_t sblk;
963 
964 		sblk = 0;
965 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
966 			sblk += ci->ci_size;
967 		cbn -= sblk;
968 	} else {
969 		struct ccdiinfo *ii;
970 		int ccdisk, off;
971 
972 		/*
973 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
974 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
975 		 * to cbn.
976 		 */
977 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
978 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
979 
980 		/*
981 		 * Figure out which interleave table to use.
982 		 */
983 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
984 			if (ii->ii_startblk > cbn)
985 				break;
986 		}
987 		ii--;
988 
989 		/*
990 		 * off is the logical superblock relative to the beginning
991 		 * of this interleave block.
992 		 */
993 		off = cbn - ii->ii_startblk;
994 
995 		/*
996 		 * We must calculate which disk component to use (ccdisk),
997 		 * and recalculate cbn to be the superblock relative to
998 		 * the beginning of the component.  This is typically done by
999 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
1000 		 * must typically be divided by the number of components in
1001 		 * this interleave array to be properly convert it from a
1002 		 * CCD-relative logical superblock number to a
1003 		 * component-relative superblock number.
1004 		 */
1005 		if (ii->ii_ndisk == 1) {
1006 			/*
1007 			 * When we have just one disk, it can't be a mirror
1008 			 * or a parity config.
1009 			 */
1010 			ccdisk = ii->ii_index[0];
1011 			cbn = ii->ii_startoff + off;
1012 		} else {
1013 			if (cs->sc_cflags & CCDF_MIRROR) {
1014 				/*
1015 				 * We have forced a uniform mapping, resulting
1016 				 * in a single interleave array.  We double
1017 				 * up on the first half of the available
1018 				 * components and our mirror is in the second
1019 				 * half.  This only works with a single
1020 				 * interleave array because doubling up
1021 				 * doubles the number of sectors, so there
1022 				 * cannot be another interleave array because
1023 				 * the next interleave array's calculations
1024 				 * would be off.
1025 				 */
1026 				int ndisk2 = ii->ii_ndisk / 2;
1027 				ccdisk = ii->ii_index[off % ndisk2];
1028 				cbn = ii->ii_startoff + off / ndisk2;
1029 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1030 			} else if (cs->sc_cflags & CCDF_PARITY) {
1031 				/*
1032 				 * XXX not implemented yet
1033 				 */
1034 				int ndisk2 = ii->ii_ndisk - 1;
1035 				ccdisk = ii->ii_index[off % ndisk2];
1036 				cbn = ii->ii_startoff + off / ndisk2;
1037 				if (cbn % ii->ii_ndisk <= ccdisk)
1038 					ccdisk++;
1039 			} else {
1040 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1041 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1042 			}
1043 		}
1044 
1045 		ci = &cs->sc_cinfo[ccdisk];
1046 
1047 		/*
1048 		 * Convert cbn from a superblock to a normal block so it
1049 		 * can be used to calculate (along with cboff) the normal
1050 		 * block index into this particular disk.
1051 		 */
1052 		cbn *= cs->sc_ileave;
1053 	}
1054 
1055 	/*
1056 	 * Fill in the component buf structure.
1057 	 */
1058 	cbp = getccdbuf(NULL);
1059 	cbp->cb_buf.bio_cmd = bp->bio_cmd;
1060 	cbp->cb_buf.bio_done = ccdiodone;
1061 	cbp->cb_buf.bio_dev = ci->ci_dev;		/* XXX */
1062 	cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET;
1063 	cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1064 	cbp->cb_buf.bio_data = addr;
1065 	if (cs->sc_ileave == 0)
1066               cbc = dbtob((off_t)(ci->ci_size - cbn));
1067 	else
1068               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1069 	cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount;
1070  	cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount;
1071 
1072 	/*
1073 	 * context for ccdiodone
1074 	 */
1075 	cbp->cb_obp = bp;
1076 	cbp->cb_unit = cs - ccd_softc;
1077 	cbp->cb_comp = ci - cs->sc_cinfo;
1078 
1079 #ifdef DEBUG
1080 	if (ccddebug & CCDB_IO)
1081 		printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n",
1082 		       ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp,
1083 		       cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1084 		       cbp->cb_buf.bio_bcount);
1085 #endif
1086 	cb[0] = cbp;
1087 
1088 	/*
1089 	 * Note: both I/O's setup when reading from mirror, but only one
1090 	 * will be executed.
1091 	 */
1092 	if (cs->sc_cflags & CCDF_MIRROR) {
1093 		/* mirror, setup second I/O */
1094 		cbp = getccdbuf(cb[0]);
1095 		cbp->cb_buf.bio_dev = ci2->ci_dev;
1096 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1097 		cb[1] = cbp;
1098 		/* link together the ccdbuf's and clear "mirror done" flag */
1099 		cb[0]->cb_mirror = cb[1];
1100 		cb[1]->cb_mirror = cb[0];
1101 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1102 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1103 	}
1104 }
1105 
1106 static void
1107 ccdintr(cs, bp)
1108 	struct ccd_softc *cs;
1109 	struct bio *bp;
1110 {
1111 #ifdef DEBUG
1112 	if (ccddebug & CCDB_FOLLOW)
1113 		printf("ccdintr(%p, %p)\n", cs, bp);
1114 #endif
1115 	/*
1116 	 * Request is done for better or worse, wakeup the top half.
1117 	 */
1118 	if (bp->bio_flags & BIO_ERROR)
1119 		bp->bio_resid = bp->bio_bcount;
1120 	biofinish(bp, &cs->device_stats, 0);
1121 }
1122 
1123 /*
1124  * Called at interrupt time.
1125  * Mark the component as done and if all components are done,
1126  * take a ccd interrupt.
1127  */
1128 static void
1129 ccdiodone(ibp)
1130 	struct bio *ibp;
1131 {
1132 	struct ccdbuf *cbp = (struct ccdbuf *)ibp;
1133 	struct bio *bp = cbp->cb_obp;
1134 	int unit = cbp->cb_unit;
1135 	int count, s;
1136 
1137 	s = splbio();
1138 #ifdef DEBUG
1139 	if (ccddebug & CCDB_FOLLOW)
1140 		printf("ccdiodone(%p)\n", cbp);
1141 	if (ccddebug & CCDB_IO) {
1142 		printf("ccdiodone: bp %p bcount %ld resid %ld\n",
1143 		       bp, bp->bio_bcount, bp->bio_resid);
1144 		printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n",
1145 		       cbp->cb_buf.bio_dev, cbp->cb_comp, cbp,
1146 		       cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1147 		       cbp->cb_buf.bio_bcount);
1148 	}
1149 #endif
1150 	/*
1151 	 * If an error occured, report it.  If this is a mirrored
1152 	 * configuration and the first of two possible reads, do not
1153 	 * set the error in the bp yet because the second read may
1154 	 * succeed.
1155 	 */
1156 
1157 	if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1158 		const char *msg = "";
1159 
1160 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1161 		    (cbp->cb_buf.bio_cmd == BIO_READ) &&
1162 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1163 			/*
1164 			 * We will try our read on the other disk down
1165 			 * below, also reverse the default pick so if we
1166 			 * are doing a scan we do not keep hitting the
1167 			 * bad disk first.
1168 			 */
1169 			struct ccd_softc *cs = &ccd_softc[unit];
1170 
1171 			msg = ", trying other disk";
1172 			cs->sc_pick = 1 - cs->sc_pick;
1173 			cs->sc_blk[cs->sc_pick] = bp->bio_blkno;
1174 		} else {
1175 			bp->bio_flags |= BIO_ERROR;
1176 			bp->bio_error = cbp->cb_buf.bio_error ?
1177 			    cbp->cb_buf.bio_error : EIO;
1178 		}
1179 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1180 		       unit, bp->bio_error, cbp->cb_comp,
1181 		       (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg);
1182 	}
1183 
1184 	/*
1185 	 * Process mirror.  If we are writing, I/O has been initiated on both
1186 	 * buffers and we fall through only after both are finished.
1187 	 *
1188 	 * If we are reading only one I/O is initiated at a time.  If an
1189 	 * error occurs we initiate the second I/O and return, otherwise
1190 	 * we free the second I/O without initiating it.
1191 	 */
1192 
1193 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1194 		if (cbp->cb_buf.bio_cmd == BIO_WRITE) {
1195 			/*
1196 			 * When writing, handshake with the second buffer
1197 			 * to determine when both are done.  If both are not
1198 			 * done, return here.
1199 			 */
1200 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1201 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1202 				putccdbuf(cbp);
1203 				splx(s);
1204 				return;
1205 			}
1206 		} else {
1207 			/*
1208 			 * When reading, either dispose of the second buffer
1209 			 * or initiate I/O on the second buffer if an error
1210 			 * occured with this one.
1211 			 */
1212 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1213 				if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1214 					cbp->cb_mirror->cb_pflags |=
1215 					    CCDPF_MIRROR_DONE;
1216 					BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0);
1217 					putccdbuf(cbp);
1218 					splx(s);
1219 					return;
1220 				} else {
1221 					putccdbuf(cbp->cb_mirror);
1222 					/* fall through */
1223 				}
1224 			}
1225 		}
1226 	}
1227 
1228 	/*
1229 	 * use bio_caller1 to determine how big the original request was rather
1230 	 * then bio_bcount, because bio_bcount may have been truncated for EOF.
1231 	 *
1232 	 * XXX We check for an error, but we do not test the resid for an
1233 	 * aligned EOF condition.  This may result in character & block
1234 	 * device access not recognizing EOF properly when read or written
1235 	 * sequentially, but will not effect filesystems.
1236 	 */
1237 	count = (long)cbp->cb_buf.bio_caller1;
1238 	putccdbuf(cbp);
1239 
1240 	/*
1241 	 * If all done, "interrupt".
1242 	 */
1243 	bp->bio_resid -= count;
1244 	if (bp->bio_resid < 0)
1245 		panic("ccdiodone: count");
1246 	if (bp->bio_resid == 0)
1247 		ccdintr(&ccd_softc[unit], bp);
1248 	splx(s);
1249 }
1250 
1251 static int
1252 ccdioctl(dev, cmd, data, flag, p)
1253 	dev_t dev;
1254 	u_long cmd;
1255 	caddr_t data;
1256 	int flag;
1257 	struct proc *p;
1258 {
1259 	int unit = ccdunit(dev);
1260 	int i, j, lookedup = 0, error = 0;
1261 	int part, pmask, s;
1262 	struct ccd_softc *cs;
1263 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1264 	struct ccddevice ccd;
1265 	char **cpp;
1266 	struct vnode **vpp;
1267 
1268 	if (unit >= numccd)
1269 		return (ENXIO);
1270 	cs = &ccd_softc[unit];
1271 
1272 	bzero(&ccd, sizeof(ccd));
1273 
1274 	switch (cmd) {
1275 	case CCDIOCSET:
1276 		if (cs->sc_flags & CCDF_INITED)
1277 			return (EBUSY);
1278 
1279 		if ((flag & FWRITE) == 0)
1280 			return (EBADF);
1281 
1282 		if ((error = ccdlock(cs)) != 0)
1283 			return (error);
1284 
1285 		/* Fill in some important bits. */
1286 		ccd.ccd_unit = unit;
1287 		ccd.ccd_interleave = ccio->ccio_ileave;
1288 		if (ccd.ccd_interleave == 0 &&
1289 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1290 		     (ccio->ccio_flags & CCDF_PARITY))) {
1291 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1292 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1293 		}
1294 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1295 		    (ccio->ccio_flags & CCDF_PARITY)) {
1296 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1297 			ccio->ccio_flags &= ~CCDF_PARITY;
1298 		}
1299 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1300 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1301 			printf("ccd%d: mirror/parity forces uniform flag\n",
1302 			       unit);
1303 			ccio->ccio_flags |= CCDF_UNIFORM;
1304 		}
1305 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1306 
1307 		/*
1308 		 * Allocate space for and copy in the array of
1309 		 * componet pathnames and device numbers.
1310 		 */
1311 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1312 		    M_DEVBUF, M_WAITOK);
1313 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1314 		    M_DEVBUF, M_WAITOK);
1315 
1316 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1317 		    ccio->ccio_ndisks * sizeof(char **));
1318 		if (error) {
1319 			free(vpp, M_DEVBUF);
1320 			free(cpp, M_DEVBUF);
1321 			ccdunlock(cs);
1322 			return (error);
1323 		}
1324 
1325 #ifdef DEBUG
1326 		if (ccddebug & CCDB_INIT)
1327 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1328 				printf("ccdioctl: component %d: %p\n",
1329 				    i, cpp[i]);
1330 #endif
1331 
1332 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1333 #ifdef DEBUG
1334 			if (ccddebug & CCDB_INIT)
1335 				printf("ccdioctl: lookedup = %d\n", lookedup);
1336 #endif
1337 			if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1338 				for (j = 0; j < lookedup; ++j)
1339 					(void)vn_close(vpp[j], FREAD|FWRITE,
1340 					    p->p_ucred, p);
1341 				free(vpp, M_DEVBUF);
1342 				free(cpp, M_DEVBUF);
1343 				ccdunlock(cs);
1344 				return (error);
1345 			}
1346 			++lookedup;
1347 		}
1348 		ccd.ccd_cpp = cpp;
1349 		ccd.ccd_vpp = vpp;
1350 		ccd.ccd_ndev = ccio->ccio_ndisks;
1351 
1352 		/*
1353 		 * Initialize the ccd.  Fills in the softc for us.
1354 		 */
1355 		if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1356 			for (j = 0; j < lookedup; ++j)
1357 				(void)vn_close(vpp[j], FREAD|FWRITE,
1358 				    p->p_ucred, p);
1359 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1360 			free(vpp, M_DEVBUF);
1361 			free(cpp, M_DEVBUF);
1362 			ccdunlock(cs);
1363 			return (error);
1364 		}
1365 
1366 		/*
1367 		 * The ccd has been successfully initialized, so
1368 		 * we can place it into the array and read the disklabel.
1369 		 */
1370 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1371 		ccio->ccio_unit = unit;
1372 		ccio->ccio_size = cs->sc_size;
1373 		ccdgetdisklabel(dev);
1374 
1375 		ccdunlock(cs);
1376 
1377 		break;
1378 
1379 	case CCDIOCCLR:
1380 		if ((cs->sc_flags & CCDF_INITED) == 0)
1381 			return (ENXIO);
1382 
1383 		if ((flag & FWRITE) == 0)
1384 			return (EBADF);
1385 
1386 		if ((error = ccdlock(cs)) != 0)
1387 			return (error);
1388 
1389 		/* Don't unconfigure if any other partitions are open */
1390 		part = ccdpart(dev);
1391 		pmask = (1 << part);
1392 		if ((cs->sc_openmask & ~pmask)) {
1393 			ccdunlock(cs);
1394 			return (EBUSY);
1395 		}
1396 
1397 		/*
1398 		 * Free ccd_softc information and clear entry.
1399 		 */
1400 
1401 		/* Close the components and free their pathnames. */
1402 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1403 			/*
1404 			 * XXX: this close could potentially fail and
1405 			 * cause Bad Things.  Maybe we need to force
1406 			 * the close to happen?
1407 			 */
1408 #ifdef DEBUG
1409 			if (ccddebug & CCDB_VNODE)
1410 				vprint("CCDIOCCLR: vnode info",
1411 				    cs->sc_cinfo[i].ci_vp);
1412 #endif
1413 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1414 			    p->p_ucred, p);
1415 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1416 		}
1417 
1418 		/* Free interleave index. */
1419 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1420 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1421 
1422 		/* Free component info and interleave table. */
1423 		free(cs->sc_cinfo, M_DEVBUF);
1424 		free(cs->sc_itable, M_DEVBUF);
1425 		cs->sc_flags &= ~CCDF_INITED;
1426 
1427 		/*
1428 		 * Free ccddevice information and clear entry.
1429 		 */
1430 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1431 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1432 		ccd.ccd_dk = -1;
1433 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1434 
1435 		/*
1436 		 * And remove the devstat entry.
1437 		 */
1438 		devstat_remove_entry(&cs->device_stats);
1439 
1440 		/* This must be atomic. */
1441 		s = splhigh();
1442 		ccdunlock(cs);
1443 		bzero(cs, sizeof(struct ccd_softc));
1444 		splx(s);
1445 
1446 		break;
1447 
1448 	case DIOCGDINFO:
1449 		if ((cs->sc_flags & CCDF_INITED) == 0)
1450 			return (ENXIO);
1451 
1452 		*(struct disklabel *)data = cs->sc_label;
1453 		break;
1454 
1455 	case DIOCGPART:
1456 		if ((cs->sc_flags & CCDF_INITED) == 0)
1457 			return (ENXIO);
1458 
1459 		((struct partinfo *)data)->disklab = &cs->sc_label;
1460 		((struct partinfo *)data)->part =
1461 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1462 		break;
1463 
1464 	case DIOCWDINFO:
1465 	case DIOCSDINFO:
1466 		if ((cs->sc_flags & CCDF_INITED) == 0)
1467 			return (ENXIO);
1468 
1469 		if ((flag & FWRITE) == 0)
1470 			return (EBADF);
1471 
1472 		if ((error = ccdlock(cs)) != 0)
1473 			return (error);
1474 
1475 		cs->sc_flags |= CCDF_LABELLING;
1476 
1477 		error = setdisklabel(&cs->sc_label,
1478 		    (struct disklabel *)data, 0);
1479 		if (error == 0) {
1480 			if (cmd == DIOCWDINFO)
1481 				error = writedisklabel(CCDLABELDEV(dev),
1482 				    &cs->sc_label);
1483 		}
1484 
1485 		cs->sc_flags &= ~CCDF_LABELLING;
1486 
1487 		ccdunlock(cs);
1488 
1489 		if (error)
1490 			return (error);
1491 		break;
1492 
1493 	case DIOCWLABEL:
1494 		if ((cs->sc_flags & CCDF_INITED) == 0)
1495 			return (ENXIO);
1496 
1497 		if ((flag & FWRITE) == 0)
1498 			return (EBADF);
1499 		if (*(int *)data != 0)
1500 			cs->sc_flags |= CCDF_WLABEL;
1501 		else
1502 			cs->sc_flags &= ~CCDF_WLABEL;
1503 		break;
1504 
1505 	default:
1506 		return (ENOTTY);
1507 	}
1508 
1509 	return (0);
1510 }
1511 
1512 static int
1513 ccdsize(dev)
1514 	dev_t dev;
1515 {
1516 	struct ccd_softc *cs;
1517 	int part, size;
1518 
1519 	if (ccdopen(dev, 0, S_IFCHR, curproc))
1520 		return (-1);
1521 
1522 	cs = &ccd_softc[ccdunit(dev)];
1523 	part = ccdpart(dev);
1524 
1525 	if ((cs->sc_flags & CCDF_INITED) == 0)
1526 		return (-1);
1527 
1528 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1529 		size = -1;
1530 	else
1531 		size = cs->sc_label.d_partitions[part].p_size;
1532 
1533 	if (ccdclose(dev, 0, S_IFCHR, curproc))
1534 		return (-1);
1535 
1536 	return (size);
1537 }
1538 
1539 static int
1540 ccddump(dev)
1541 	dev_t dev;
1542 {
1543 
1544 	/* Not implemented. */
1545 	return ENXIO;
1546 }
1547 
1548 /*
1549  * Lookup the provided name in the filesystem.  If the file exists,
1550  * is a valid block device, and isn't being used by anyone else,
1551  * set *vpp to the file's vnode.
1552  */
1553 static int
1554 ccdlookup(path, p, vpp)
1555 	char *path;
1556 	struct proc *p;
1557 	struct vnode **vpp;	/* result */
1558 {
1559 	struct nameidata nd;
1560 	struct vnode *vp;
1561 	int error, flags;
1562 
1563 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1564 	flags = FREAD | FWRITE;
1565 	if ((error = vn_open(&nd, &flags, 0)) != 0) {
1566 #ifdef DEBUG
1567 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1568 			printf("ccdlookup: vn_open error = %d\n", error);
1569 #endif
1570 		return (error);
1571 	}
1572 	vp = nd.ni_vp;
1573 
1574 	if (vp->v_usecount > 1) {
1575 		error = EBUSY;
1576 		goto bad;
1577 	}
1578 
1579 	if (!vn_isdisk(vp, &error))
1580 		goto bad;
1581 
1582 #ifdef DEBUG
1583 	if (ccddebug & CCDB_VNODE)
1584 		vprint("ccdlookup: vnode info", vp);
1585 #endif
1586 
1587 	VOP_UNLOCK(vp, 0, p);
1588 	NDFREE(&nd, NDF_ONLY_PNBUF);
1589 	*vpp = vp;
1590 	return (0);
1591 bad:
1592 	VOP_UNLOCK(vp, 0, p);
1593 	NDFREE(&nd, NDF_ONLY_PNBUF);
1594 	/* vn_close does vrele() for vp */
1595 	(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1596 	return (error);
1597 }
1598 
1599 /*
1600  * Read the disklabel from the ccd.  If one is not present, fake one
1601  * up.
1602  */
1603 static void
1604 ccdgetdisklabel(dev)
1605 	dev_t dev;
1606 {
1607 	int unit = ccdunit(dev);
1608 	struct ccd_softc *cs = &ccd_softc[unit];
1609 	char *errstring;
1610 	struct disklabel *lp = &cs->sc_label;
1611 	struct ccdgeom *ccg = &cs->sc_geom;
1612 
1613 	bzero(lp, sizeof(*lp));
1614 
1615 	lp->d_secperunit = cs->sc_size;
1616 	lp->d_secsize = ccg->ccg_secsize;
1617 	lp->d_nsectors = ccg->ccg_nsectors;
1618 	lp->d_ntracks = ccg->ccg_ntracks;
1619 	lp->d_ncylinders = ccg->ccg_ncylinders;
1620 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1621 
1622 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1623 	lp->d_type = DTYPE_CCD;
1624 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1625 	lp->d_rpm = 3600;
1626 	lp->d_interleave = 1;
1627 	lp->d_flags = 0;
1628 
1629 	lp->d_partitions[RAW_PART].p_offset = 0;
1630 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1631 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1632 	lp->d_npartitions = RAW_PART + 1;
1633 
1634 	lp->d_bbsize = BBSIZE;				/* XXX */
1635 	lp->d_sbsize = SBSIZE;				/* XXX */
1636 
1637 	lp->d_magic = DISKMAGIC;
1638 	lp->d_magic2 = DISKMAGIC;
1639 	lp->d_checksum = dkcksum(&cs->sc_label);
1640 
1641 	/*
1642 	 * Call the generic disklabel extraction routine.
1643 	 */
1644 	errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1645 	if (errstring != NULL)
1646 		ccdmakedisklabel(cs);
1647 
1648 #ifdef DEBUG
1649 	/* It's actually extremely common to have unlabeled ccds. */
1650 	if (ccddebug & CCDB_LABEL)
1651 		if (errstring != NULL)
1652 			printf("ccd%d: %s\n", unit, errstring);
1653 #endif
1654 }
1655 
1656 /*
1657  * Take care of things one might want to take care of in the event
1658  * that a disklabel isn't present.
1659  */
1660 static void
1661 ccdmakedisklabel(cs)
1662 	struct ccd_softc *cs;
1663 {
1664 	struct disklabel *lp = &cs->sc_label;
1665 
1666 	/*
1667 	 * For historical reasons, if there's no disklabel present
1668 	 * the raw partition must be marked FS_BSDFFS.
1669 	 */
1670 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1671 
1672 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1673 }
1674 
1675 /*
1676  * Wait interruptibly for an exclusive lock.
1677  *
1678  * XXX
1679  * Several drivers do this; it should be abstracted and made MP-safe.
1680  */
1681 static int
1682 ccdlock(cs)
1683 	struct ccd_softc *cs;
1684 {
1685 	int error;
1686 
1687 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1688 		cs->sc_flags |= CCDF_WANTED;
1689 		if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1690 			return (error);
1691 	}
1692 	cs->sc_flags |= CCDF_LOCKED;
1693 	return (0);
1694 }
1695 
1696 /*
1697  * Unlock and wake up any waiters.
1698  */
1699 static void
1700 ccdunlock(cs)
1701 	struct ccd_softc *cs;
1702 {
1703 
1704 	cs->sc_flags &= ~CCDF_LOCKED;
1705 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1706 		cs->sc_flags &= ~CCDF_WANTED;
1707 		wakeup(cs);
1708 	}
1709 }
1710 
1711 #ifdef DEBUG
1712 static void
1713 printiinfo(ii)
1714 	struct ccdiinfo *ii;
1715 {
1716 	int ix, i;
1717 
1718 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1719 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1720 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1721 		for (i = 0; i < ii->ii_ndisk; i++)
1722 			printf(" %d", ii->ii_index[i]);
1723 		printf("\n");
1724 	}
1725 }
1726 #endif
1727