xref: /freebsd/sys/geom/geom_ccd.c (revision 5129159789cc9d7bc514e4546b88e3427695002d)
1 /* $FreeBSD$ */
2 
3 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
4 
5 /*
6  * Copyright (c) 1995 Jason R. Thorpe.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project
20  *	by Jason R. Thorpe.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * Copyright (c) 1988 University of Utah.
39  * Copyright (c) 1990, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * This code is derived from software contributed to Berkeley by
43  * the Systems Programming Group of the University of Utah Computer
44  * Science Department.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. All advertising materials mentioning features or use of this software
55  *    must display the following acknowledgement:
56  *	This product includes software developed by the University of
57  *	California, Berkeley and its contributors.
58  * 4. Neither the name of the University nor the names of its contributors
59  *    may be used to endorse or promote products derived from this software
60  *    without specific prior written permission.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  *
74  * from: Utah $Hdr: cd.c 1.6 90/11/28$
75  *
76  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
77  */
78 
79 /*
80  * "Concatenated" disk driver.
81  *
82  * Dynamic configuration and disklabel support by:
83  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
84  *	Numerical Aerodynamic Simulation Facility
85  *	Mail Stop 258-6
86  *	NASA Ames Research Center
87  *	Moffett Field, CA 94035
88  */
89 
90 #include "ccd.h"
91 #if NCCD > 0
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
97 #include <sys/proc.h>
98 #include <sys/buf.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <ufs/ffs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 
110 #include <sys/ccdvar.h>
111 
112 #include <vm/vm_zone.h>
113 
114 #if defined(CCDDEBUG) && !defined(DEBUG)
115 #define DEBUG
116 #endif
117 
118 #ifdef DEBUG
119 #define CCDB_FOLLOW	0x01
120 #define CCDB_INIT	0x02
121 #define CCDB_IO		0x04
122 #define CCDB_LABEL	0x08
123 #define CCDB_VNODE	0x10
124 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
125     CCDB_VNODE;
126 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
127 #undef DEBUG
128 #endif
129 
130 #define	ccdunit(x)	dkunit(x)
131 #define ccdpart(x)	dkpart(x)
132 
133 /*
134    This is how mirroring works (only writes are special):
135 
136    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
137    linked together by the cb_mirror field.  "cb_pflags &
138    CCDPF_MIRROR_DONE" is set to 0 on both of them.
139 
140    When a component returns to ccdiodone(), it checks if "cb_pflags &
141    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
142    flag and returns.  If it is, it means its partner has already
143    returned, so it will go to the regular cleanup.
144 
145  */
146 
147 struct ccdbuf {
148 	struct buf	cb_buf;		/* new I/O buf */
149 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
150 	struct ccdbuf	*cb_freenext;	/* free list link */
151 	int		cb_unit;	/* target unit */
152 	int		cb_comp;	/* target component */
153 	int		cb_pflags;	/* mirror/parity status flag */
154 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
155 };
156 
157 /* bits in cb_pflags */
158 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
159 
160 #define CCDLABELDEV(dev)	\
161 	(makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
162 
163 static d_open_t ccdopen;
164 static d_close_t ccdclose;
165 static d_strategy_t ccdstrategy;
166 static d_ioctl_t ccdioctl;
167 static d_dump_t ccddump;
168 static d_psize_t ccdsize;
169 
170 #define NCCDFREEHIWAT	16
171 
172 #define CDEV_MAJOR 74
173 #define BDEV_MAJOR 21
174 
175 static struct cdevsw ccd_cdevsw = {
176 	/* open */	ccdopen,
177 	/* close */	ccdclose,
178 	/* read */	physread,
179 	/* write */	physwrite,
180 	/* ioctl */	ccdioctl,
181 	/* poll */	nopoll,
182 	/* mmap */	nommap,
183 	/* strategy */	ccdstrategy,
184 	/* name */	"ccd",
185 	/* maj */	CDEV_MAJOR,
186 	/* dump */	ccddump,
187 	/* psize */	ccdsize,
188 	/* flags */	D_DISK,
189 	/* bmaj */	BDEV_MAJOR
190 };
191 
192 /* called during module initialization */
193 static	void ccdattach __P((void));
194 static	int ccd_modevent __P((module_t, int, void *));
195 
196 /* called by biodone() at interrupt time */
197 static	void ccdiodone __P((struct ccdbuf *cbp));
198 
199 static	void ccdstart __P((struct ccd_softc *, struct buf *));
200 static	void ccdinterleave __P((struct ccd_softc *, int));
201 static	void ccdintr __P((struct ccd_softc *, struct buf *));
202 static	int ccdinit __P((struct ccddevice *, char **, struct proc *));
203 static	int ccdlookup __P((char *, struct proc *p, struct vnode **));
204 static	void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
205 		struct buf *, daddr_t, caddr_t, long));
206 static	void ccdgetdisklabel __P((dev_t));
207 static	void ccdmakedisklabel __P((struct ccd_softc *));
208 static	int ccdlock __P((struct ccd_softc *));
209 static	void ccdunlock __P((struct ccd_softc *));
210 
211 #ifdef DEBUG
212 static	void printiinfo __P((struct ccdiinfo *));
213 #endif
214 
215 /* Non-private for the benefit of libkvm. */
216 struct	ccd_softc *ccd_softc;
217 struct	ccddevice *ccddevs;
218 struct	ccdbuf *ccdfreebufs;
219 static	int numccdfreebufs;
220 static	int numccd = 0;
221 
222 /*
223  * getccdbuf() -	Allocate and zero a ccd buffer.
224  *
225  *	This routine is called at splbio().
226  */
227 
228 static __inline
229 struct ccdbuf *
230 getccdbuf(struct ccdbuf *cpy)
231 {
232 	struct ccdbuf *cbp;
233 
234 	/*
235 	 * Allocate from freelist or malloc as necessary
236 	 */
237 	if ((cbp = ccdfreebufs) != NULL) {
238 		ccdfreebufs = cbp->cb_freenext;
239 		--numccdfreebufs;
240 	} else {
241 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
242 	}
243 
244 	/*
245 	 * Used by mirroring code
246 	 */
247 	if (cpy)
248 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
249 	else
250 		bzero(cbp, sizeof(struct ccdbuf));
251 
252 	/*
253 	 * independant struct buf initialization
254 	 */
255 	LIST_INIT(&cbp->cb_buf.b_dep);
256 	BUF_LOCKINIT(&cbp->cb_buf);
257 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
258 	BUF_KERNPROC(&cbp->cb_buf);
259 
260 	return(cbp);
261 }
262 
263 /*
264  * putccdbuf() -	Free a ccd buffer.
265  *
266  *	This routine is called at splbio().
267  */
268 
269 static __inline
270 void
271 putccdbuf(struct ccdbuf *cbp)
272 {
273 	BUF_UNLOCK(&cbp->cb_buf);
274 	BUF_LOCKFREE(&cbp->cb_buf);
275 
276 	if (numccdfreebufs < NCCDFREEHIWAT) {
277 		cbp->cb_freenext = ccdfreebufs;
278 		ccdfreebufs = cbp;
279 		++numccdfreebufs;
280 	} else {
281 		free((caddr_t)cbp, M_DEVBUF);
282 	}
283 }
284 
285 
286 /*
287  * Number of blocks to untouched in front of a component partition.
288  * This is to avoid violating its disklabel area when it starts at the
289  * beginning of the slice.
290  */
291 #if !defined(CCD_OFFSET)
292 #define CCD_OFFSET 16
293 #endif
294 
295 /*
296  * Called by main() during pseudo-device attachment.  All we need
297  * to do is allocate enough space for devices to be configured later, and
298  * add devsw entries.
299  */
300 static void
301 ccdattach()
302 {
303 	int i;
304 	int num = NCCD;
305 
306 	if (num > 1)
307 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
308 	else
309 		printf("ccd0: Concatenated disk driver\n");
310 
311 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
312 	    M_DEVBUF, M_NOWAIT);
313 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
314 	    M_DEVBUF, M_NOWAIT);
315 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
316 		printf("WARNING: no memory for concatenated disks\n");
317 		if (ccd_softc != NULL)
318 			free(ccd_softc, M_DEVBUF);
319 		if (ccddevs != NULL)
320 			free(ccddevs, M_DEVBUF);
321 		return;
322 	}
323 	numccd = num;
324 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
325 	bzero(ccddevs, num * sizeof(struct ccddevice));
326 
327 	cdevsw_add(&ccd_cdevsw);
328 	/* XXX: is this necessary? */
329 	for (i = 0; i < numccd; ++i)
330 		ccddevs[i].ccd_dk = -1;
331 }
332 
333 static int
334 ccd_modevent(mod, type, data)
335 	module_t mod;
336 	int type;
337 	void *data;
338 {
339 	int error = 0;
340 
341 	switch (type) {
342 	case MOD_LOAD:
343 		ccdattach();
344 		break;
345 
346 	case MOD_UNLOAD:
347 		printf("ccd0: Unload not supported!\n");
348 		error = EOPNOTSUPP;
349 		break;
350 
351 	default:	/* MOD_SHUTDOWN etc */
352 		break;
353 	}
354 	return (error);
355 }
356 
357 DEV_MODULE(ccd, ccd_modevent, NULL);
358 
359 static int
360 ccdinit(ccd, cpaths, p)
361 	struct ccddevice *ccd;
362 	char **cpaths;
363 	struct proc *p;
364 {
365 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
366 	struct ccdcinfo *ci = NULL;	/* XXX */
367 	size_t size;
368 	int ix;
369 	struct vnode *vp;
370 	size_t minsize;
371 	int maxsecsize;
372 	struct partinfo dpart;
373 	struct ccdgeom *ccg = &cs->sc_geom;
374 	char tmppath[MAXPATHLEN];
375 	int error = 0;
376 
377 #ifdef DEBUG
378 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
379 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
380 #endif
381 
382 	cs->sc_size = 0;
383 	cs->sc_ileave = ccd->ccd_interleave;
384 	cs->sc_nccdisks = ccd->ccd_ndev;
385 
386 	/* Allocate space for the component info. */
387 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
388 	    M_DEVBUF, M_WAITOK);
389 
390 	/*
391 	 * Verify that each component piece exists and record
392 	 * relevant information about it.
393 	 */
394 	maxsecsize = 0;
395 	minsize = 0;
396 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
397 		vp = ccd->ccd_vpp[ix];
398 		ci = &cs->sc_cinfo[ix];
399 		ci->ci_vp = vp;
400 
401 		/*
402 		 * Copy in the pathname of the component.
403 		 */
404 		bzero(tmppath, sizeof(tmppath));	/* sanity */
405 		if ((error = copyinstr(cpaths[ix], tmppath,
406 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
407 #ifdef DEBUG
408 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
409 				printf("ccd%d: can't copy path, error = %d\n",
410 				    ccd->ccd_unit, error);
411 #endif
412 			goto fail;
413 		}
414 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
415 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
416 
417 		ci->ci_dev = vn_todev(vp);
418 
419 		/*
420 		 * Get partition information for the component.
421 		 */
422 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
423 		    FREAD, p->p_ucred, p)) != 0) {
424 #ifdef DEBUG
425 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
426 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
427 				     ccd->ccd_unit, ci->ci_path, error);
428 #endif
429 			goto fail;
430 		}
431 		if (dpart.part->p_fstype == FS_BSDFFS) {
432 			maxsecsize =
433 			    ((dpart.disklab->d_secsize > maxsecsize) ?
434 			    dpart.disklab->d_secsize : maxsecsize);
435 			size = dpart.part->p_size - CCD_OFFSET;
436 		} else {
437 #ifdef DEBUG
438 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
439 				printf("ccd%d: %s: incorrect partition type\n",
440 				    ccd->ccd_unit, ci->ci_path);
441 #endif
442 			error = EFTYPE;
443 			goto fail;
444 		}
445 
446 		/*
447 		 * Calculate the size, truncating to an interleave
448 		 * boundary if necessary.
449 		 */
450 
451 		if (cs->sc_ileave > 1)
452 			size -= size % cs->sc_ileave;
453 
454 		if (size == 0) {
455 #ifdef DEBUG
456 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
457 				printf("ccd%d: %s: size == 0\n",
458 				    ccd->ccd_unit, ci->ci_path);
459 #endif
460 			error = ENODEV;
461 			goto fail;
462 		}
463 
464 		if (minsize == 0 || size < minsize)
465 			minsize = size;
466 		ci->ci_size = size;
467 		cs->sc_size += size;
468 	}
469 
470 	/*
471 	 * Don't allow the interleave to be smaller than
472 	 * the biggest component sector.
473 	 */
474 	if ((cs->sc_ileave > 0) &&
475 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
476 #ifdef DEBUG
477 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
478 			printf("ccd%d: interleave must be at least %d\n",
479 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
480 #endif
481 		error = EINVAL;
482 		goto fail;
483 	}
484 
485 	/*
486 	 * If uniform interleave is desired set all sizes to that of
487 	 * the smallest component.  This will guarentee that a single
488 	 * interleave table is generated.
489 	 *
490 	 * Lost space must be taken into account when calculating the
491 	 * overall size.  Half the space is lost when CCDF_MIRROR is
492 	 * specified.  One disk is lost when CCDF_PARITY is specified.
493 	 */
494 	if (ccd->ccd_flags & CCDF_UNIFORM) {
495 		for (ci = cs->sc_cinfo;
496 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
497 			ci->ci_size = minsize;
498 		}
499 		if (ccd->ccd_flags & CCDF_MIRROR) {
500 			/*
501 			 * Check to see if an even number of components
502 			 * have been specified.  The interleave must also
503 			 * be non-zero in order for us to be able to
504 			 * guarentee the topology.
505 			 */
506 			if (cs->sc_nccdisks % 2) {
507 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
508 				error = EINVAL;
509 				goto fail;
510 			}
511 			if (cs->sc_ileave == 0) {
512 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
513 				error = EINVAL;
514 				goto fail;
515 			}
516 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
517 		} else if (ccd->ccd_flags & CCDF_PARITY) {
518 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
519 		} else {
520 			if (cs->sc_ileave == 0) {
521 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
522 				error = EINVAL;
523 				goto fail;
524 			}
525 			cs->sc_size = cs->sc_nccdisks * minsize;
526 		}
527 	}
528 
529 	/*
530 	 * Construct the interleave table.
531 	 */
532 	ccdinterleave(cs, ccd->ccd_unit);
533 
534 	/*
535 	 * Create pseudo-geometry based on 1MB cylinders.  It's
536 	 * pretty close.
537 	 */
538 	ccg->ccg_secsize = maxsecsize;
539 	ccg->ccg_ntracks = 1;
540 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
541 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
542 
543 	/*
544 	 * Add an devstat entry for this device.
545 	 */
546 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
547 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
548 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
549 			  DEVSTAT_PRIORITY_ARRAY);
550 
551 	cs->sc_flags |= CCDF_INITED;
552 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
553 	cs->sc_unit = ccd->ccd_unit;
554 	return (0);
555 fail:
556 	while (ci > cs->sc_cinfo) {
557 		ci--;
558 		free(ci->ci_path, M_DEVBUF);
559 	}
560 	free(cs->sc_cinfo, M_DEVBUF);
561 	return (error);
562 }
563 
564 static void
565 ccdinterleave(cs, unit)
566 	struct ccd_softc *cs;
567 	int unit;
568 {
569 	struct ccdcinfo *ci, *smallci;
570 	struct ccdiinfo *ii;
571 	daddr_t bn, lbn;
572 	int ix;
573 	u_long size;
574 
575 #ifdef DEBUG
576 	if (ccddebug & CCDB_INIT)
577 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
578 #endif
579 
580 	/*
581 	 * Allocate an interleave table.  The worst case occurs when each
582 	 * of N disks is of a different size, resulting in N interleave
583 	 * tables.
584 	 *
585 	 * Chances are this is too big, but we don't care.
586 	 */
587 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
588 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
589 	bzero((caddr_t)cs->sc_itable, size);
590 
591 	/*
592 	 * Trivial case: no interleave (actually interleave of disk size).
593 	 * Each table entry represents a single component in its entirety.
594 	 *
595 	 * An interleave of 0 may not be used with a mirror or parity setup.
596 	 */
597 	if (cs->sc_ileave == 0) {
598 		bn = 0;
599 		ii = cs->sc_itable;
600 
601 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
602 			/* Allocate space for ii_index. */
603 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
604 			ii->ii_ndisk = 1;
605 			ii->ii_startblk = bn;
606 			ii->ii_startoff = 0;
607 			ii->ii_index[0] = ix;
608 			bn += cs->sc_cinfo[ix].ci_size;
609 			ii++;
610 		}
611 		ii->ii_ndisk = 0;
612 #ifdef DEBUG
613 		if (ccddebug & CCDB_INIT)
614 			printiinfo(cs->sc_itable);
615 #endif
616 		return;
617 	}
618 
619 	/*
620 	 * The following isn't fast or pretty; it doesn't have to be.
621 	 */
622 	size = 0;
623 	bn = lbn = 0;
624 	for (ii = cs->sc_itable; ; ii++) {
625 		/*
626 		 * Allocate space for ii_index.  We might allocate more then
627 		 * we use.
628 		 */
629 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
630 		    M_DEVBUF, M_WAITOK);
631 
632 		/*
633 		 * Locate the smallest of the remaining components
634 		 */
635 		smallci = NULL;
636 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
637 		    ci++) {
638 			if (ci->ci_size > size &&
639 			    (smallci == NULL ||
640 			     ci->ci_size < smallci->ci_size)) {
641 				smallci = ci;
642 			}
643 		}
644 
645 		/*
646 		 * Nobody left, all done
647 		 */
648 		if (smallci == NULL) {
649 			ii->ii_ndisk = 0;
650 			break;
651 		}
652 
653 		/*
654 		 * Record starting logical block using an sc_ileave blocksize.
655 		 */
656 		ii->ii_startblk = bn / cs->sc_ileave;
657 
658 		/*
659 		 * Record starting comopnent block using an sc_ileave
660 		 * blocksize.  This value is relative to the beginning of
661 		 * a component disk.
662 		 */
663 		ii->ii_startoff = lbn;
664 
665 		/*
666 		 * Determine how many disks take part in this interleave
667 		 * and record their indices.
668 		 */
669 		ix = 0;
670 		for (ci = cs->sc_cinfo;
671 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
672 			if (ci->ci_size >= smallci->ci_size) {
673 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
674 			}
675 		}
676 		ii->ii_ndisk = ix;
677 		bn += ix * (smallci->ci_size - size);
678 		lbn = smallci->ci_size / cs->sc_ileave;
679 		size = smallci->ci_size;
680 	}
681 #ifdef DEBUG
682 	if (ccddebug & CCDB_INIT)
683 		printiinfo(cs->sc_itable);
684 #endif
685 }
686 
687 /* ARGSUSED */
688 static int
689 ccdopen(dev, flags, fmt, p)
690 	dev_t dev;
691 	int flags, fmt;
692 	struct proc *p;
693 {
694 	int unit = ccdunit(dev);
695 	struct ccd_softc *cs;
696 	struct disklabel *lp;
697 	int error = 0, part, pmask;
698 
699 #ifdef DEBUG
700 	if (ccddebug & CCDB_FOLLOW)
701 		printf("ccdopen(%x, %x)\n", dev, flags);
702 #endif
703 	if (unit >= numccd)
704 		return (ENXIO);
705 	cs = &ccd_softc[unit];
706 
707 	if ((error = ccdlock(cs)) != 0)
708 		return (error);
709 
710 	lp = &cs->sc_label;
711 
712 	part = ccdpart(dev);
713 	pmask = (1 << part);
714 
715 	/*
716 	 * If we're initialized, check to see if there are any other
717 	 * open partitions.  If not, then it's safe to update
718 	 * the in-core disklabel.
719 	 */
720 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
721 		ccdgetdisklabel(dev);
722 
723 	/* Check that the partition exists. */
724 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
725 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
726 		error = ENXIO;
727 		goto done;
728 	}
729 
730 	/* Prevent our unit from being unconfigured while open. */
731 	switch (fmt) {
732 	case S_IFCHR:
733 		cs->sc_copenmask |= pmask;
734 		break;
735 
736 	case S_IFBLK:
737 		cs->sc_bopenmask |= pmask;
738 		break;
739 	}
740 	cs->sc_openmask =
741 	    cs->sc_copenmask | cs->sc_bopenmask;
742 
743  done:
744 	ccdunlock(cs);
745 	return (0);
746 }
747 
748 /* ARGSUSED */
749 static int
750 ccdclose(dev, flags, fmt, p)
751 	dev_t dev;
752 	int flags, fmt;
753 	struct proc *p;
754 {
755 	int unit = ccdunit(dev);
756 	struct ccd_softc *cs;
757 	int error = 0, part;
758 
759 #ifdef DEBUG
760 	if (ccddebug & CCDB_FOLLOW)
761 		printf("ccdclose(%x, %x)\n", dev, flags);
762 #endif
763 
764 	if (unit >= numccd)
765 		return (ENXIO);
766 	cs = &ccd_softc[unit];
767 
768 	if ((error = ccdlock(cs)) != 0)
769 		return (error);
770 
771 	part = ccdpart(dev);
772 
773 	/* ...that much closer to allowing unconfiguration... */
774 	switch (fmt) {
775 	case S_IFCHR:
776 		cs->sc_copenmask &= ~(1 << part);
777 		break;
778 
779 	case S_IFBLK:
780 		cs->sc_bopenmask &= ~(1 << part);
781 		break;
782 	}
783 	cs->sc_openmask =
784 	    cs->sc_copenmask | cs->sc_bopenmask;
785 
786 	ccdunlock(cs);
787 	return (0);
788 }
789 
790 static void
791 ccdstrategy(bp)
792 	struct buf *bp;
793 {
794 	int unit = ccdunit(bp->b_dev);
795 	struct ccd_softc *cs = &ccd_softc[unit];
796 	int s;
797 	int wlabel;
798 	struct disklabel *lp;
799 
800 #ifdef DEBUG
801 	if (ccddebug & CCDB_FOLLOW)
802 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
803 #endif
804 	if ((cs->sc_flags & CCDF_INITED) == 0) {
805 		bp->b_error = ENXIO;
806 		bp->b_flags |= B_ERROR;
807 		goto done;
808 	}
809 
810 	/* If it's a nil transfer, wake up the top half now. */
811 	if (bp->b_bcount == 0)
812 		goto done;
813 
814 	lp = &cs->sc_label;
815 
816 	/*
817 	 * Do bounds checking and adjust transfer.  If there's an
818 	 * error, the bounds check will flag that for us.
819 	 */
820 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
821 	if (ccdpart(bp->b_dev) != RAW_PART) {
822 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
823 			goto done;
824 	} else {
825 		int pbn;        /* in sc_secsize chunks */
826 		long sz;        /* in sc_secsize chunks */
827 
828 		pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
829 		sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
830 
831 		/*
832 		 * If out of bounds return an error. If at the EOF point,
833 		 * simply read or write less.
834 		 */
835 
836 		if (pbn < 0 || pbn >= cs->sc_size) {
837 			bp->b_resid = bp->b_bcount;
838 			if (pbn != cs->sc_size) {
839 				bp->b_error = EINVAL;
840 				bp->b_flags |= B_ERROR | B_INVAL;
841 			}
842 			goto done;
843 		}
844 
845 		/*
846 		 * If the request crosses EOF, truncate the request.
847 		 */
848 		if (pbn + sz > cs->sc_size) {
849 			bp->b_bcount = (cs->sc_size - pbn) *
850 			    cs->sc_geom.ccg_secsize;
851 		}
852 	}
853 
854 	bp->b_resid = bp->b_bcount;
855 
856 	/*
857 	 * "Start" the unit.
858 	 */
859 	s = splbio();
860 	ccdstart(cs, bp);
861 	splx(s);
862 	return;
863 done:
864 	biodone(bp);
865 }
866 
867 static void
868 ccdstart(cs, bp)
869 	struct ccd_softc *cs;
870 	struct buf *bp;
871 {
872 	long bcount, rcount;
873 	struct ccdbuf *cbp[4];
874 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
875 	caddr_t addr;
876 	daddr_t bn;
877 	struct partition *pp;
878 
879 #ifdef DEBUG
880 	if (ccddebug & CCDB_FOLLOW)
881 		printf("ccdstart(%x, %x)\n", cs, bp);
882 #endif
883 
884 	/* Record the transaction start  */
885 	devstat_start_transaction(&cs->device_stats);
886 
887 	/*
888 	 * Translate the partition-relative block number to an absolute.
889 	 */
890 	bn = bp->b_blkno;
891 	if (ccdpart(bp->b_dev) != RAW_PART) {
892 		pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
893 		bn += pp->p_offset;
894 	}
895 
896 	/*
897 	 * Allocate component buffers and fire off the requests
898 	 */
899 	addr = bp->b_data;
900 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
901 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
902 		rcount = cbp[0]->cb_buf.b_bcount;
903 
904 		if (cs->sc_cflags & CCDF_MIRROR) {
905 			/*
906 			 * Mirroring.  Writes go to both disks, reads are
907 			 * taken from whichever disk seems most appropriate.
908 			 *
909 			 * We attempt to localize reads to the disk whos arm
910 			 * is nearest the read request.  We ignore seeks due
911 			 * to writes when making this determination and we
912 			 * also try to avoid hogging.
913 			 */
914 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
915 				cbp[0]->cb_buf.b_vp->v_numoutput++;
916 				cbp[1]->cb_buf.b_vp->v_numoutput++;
917 				VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
918 				    &cbp[0]->cb_buf);
919 				VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
920 				    &cbp[1]->cb_buf);
921 			} else {
922 				int pick = cs->sc_pick;
923 				daddr_t range = cs->sc_size / 16;
924 
925 				if (bn < cs->sc_blk[pick] - range ||
926 				    bn > cs->sc_blk[pick] + range
927 				) {
928 					cs->sc_pick = pick = 1 - pick;
929 				}
930 				cs->sc_blk[pick] = bn + btodb(rcount);
931 				VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
932 				    &cbp[pick]->cb_buf);
933 			}
934 		} else {
935 			/*
936 			 * Not mirroring
937 			 */
938 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
939 				cbp[0]->cb_buf.b_vp->v_numoutput++;
940 			VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
941 		}
942 		bn += btodb(rcount);
943 		addr += rcount;
944 	}
945 }
946 
947 /*
948  * Build a component buffer header.
949  */
950 static void
951 ccdbuffer(cb, cs, bp, bn, addr, bcount)
952 	struct ccdbuf **cb;
953 	struct ccd_softc *cs;
954 	struct buf *bp;
955 	daddr_t bn;
956 	caddr_t addr;
957 	long bcount;
958 {
959 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
960 	struct ccdbuf *cbp;
961 	daddr_t cbn, cboff;
962 	off_t cbc;
963 
964 #ifdef DEBUG
965 	if (ccddebug & CCDB_IO)
966 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
967 		       cs, bp, bn, addr, bcount);
968 #endif
969 	/*
970 	 * Determine which component bn falls in.
971 	 */
972 	cbn = bn;
973 	cboff = 0;
974 
975 	if (cs->sc_ileave == 0) {
976 		/*
977 		 * Serially concatenated and neither a mirror nor a parity
978 		 * config.  This is a special case.
979 		 */
980 		daddr_t sblk;
981 
982 		sblk = 0;
983 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
984 			sblk += ci->ci_size;
985 		cbn -= sblk;
986 	} else {
987 		struct ccdiinfo *ii;
988 		int ccdisk, off;
989 
990 		/*
991 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
992 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
993 		 * to cbn.
994 		 */
995 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
996 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
997 
998 		/*
999 		 * Figure out which interleave table to use.
1000 		 */
1001 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1002 			if (ii->ii_startblk > cbn)
1003 				break;
1004 		}
1005 		ii--;
1006 
1007 		/*
1008 		 * off is the logical superblock relative to the beginning
1009 		 * of this interleave block.
1010 		 */
1011 		off = cbn - ii->ii_startblk;
1012 
1013 		/*
1014 		 * We must calculate which disk component to use (ccdisk),
1015 		 * and recalculate cbn to be the superblock relative to
1016 		 * the beginning of the component.  This is typically done by
1017 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
1018 		 * must typically be divided by the number of components in
1019 		 * this interleave array to be properly convert it from a
1020 		 * CCD-relative logical superblock number to a
1021 		 * component-relative superblock number.
1022 		 */
1023 		if (ii->ii_ndisk == 1) {
1024 			/*
1025 			 * When we have just one disk, it can't be a mirror
1026 			 * or a parity config.
1027 			 */
1028 			ccdisk = ii->ii_index[0];
1029 			cbn = ii->ii_startoff + off;
1030 		} else {
1031 			if (cs->sc_cflags & CCDF_MIRROR) {
1032 				/*
1033 				 * We have forced a uniform mapping, resulting
1034 				 * in a single interleave array.  We double
1035 				 * up on the first half of the available
1036 				 * components and our mirror is in the second
1037 				 * half.  This only works with a single
1038 				 * interleave array because doubling up
1039 				 * doubles the number of sectors, so there
1040 				 * cannot be another interleave array because
1041 				 * the next interleave array's calculations
1042 				 * would be off.
1043 				 */
1044 				int ndisk2 = ii->ii_ndisk / 2;
1045 				ccdisk = ii->ii_index[off % ndisk2];
1046 				cbn = ii->ii_startoff + off / ndisk2;
1047 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1048 			} else if (cs->sc_cflags & CCDF_PARITY) {
1049 				/*
1050 				 * XXX not implemented yet
1051 				 */
1052 				int ndisk2 = ii->ii_ndisk - 1;
1053 				ccdisk = ii->ii_index[off % ndisk2];
1054 				cbn = ii->ii_startoff + off / ndisk2;
1055 				if (cbn % ii->ii_ndisk <= ccdisk)
1056 					ccdisk++;
1057 			} else {
1058 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1059 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1060 			}
1061 		}
1062 
1063 		ci = &cs->sc_cinfo[ccdisk];
1064 
1065 		/*
1066 		 * Convert cbn from a superblock to a normal block so it
1067 		 * can be used to calculate (along with cboff) the normal
1068 		 * block index into this particular disk.
1069 		 */
1070 		cbn *= cs->sc_ileave;
1071 	}
1072 
1073 	/*
1074 	 * Fill in the component buf structure.
1075 	 */
1076 	cbp = getccdbuf(NULL);
1077 	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1078 	cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1079 	cbp->cb_buf.b_dev = ci->ci_dev;		/* XXX */
1080 	cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1081 	cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1082 	cbp->cb_buf.b_data = addr;
1083 	cbp->cb_buf.b_vp = ci->ci_vp;
1084 	if (cs->sc_ileave == 0)
1085               cbc = dbtob((off_t)(ci->ci_size - cbn));
1086 	else
1087               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1088 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1089  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1090 
1091 	/*
1092 	 * context for ccdiodone
1093 	 */
1094 	cbp->cb_obp = bp;
1095 	cbp->cb_unit = cs - ccd_softc;
1096 	cbp->cb_comp = ci - cs->sc_cinfo;
1097 
1098 #ifdef DEBUG
1099 	if (ccddebug & CCDB_IO)
1100 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1101 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1102 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1103 #endif
1104 	cb[0] = cbp;
1105 
1106 	/*
1107 	 * Note: both I/O's setup when reading from mirror, but only one
1108 	 * will be executed.
1109 	 */
1110 	if (cs->sc_cflags & CCDF_MIRROR) {
1111 		/* mirror, setup second I/O */
1112 		cbp = getccdbuf(cb[0]);
1113 		cbp->cb_buf.b_dev = ci2->ci_dev;
1114 		cbp->cb_buf.b_vp = ci2->ci_vp;
1115 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1116 		cb[1] = cbp;
1117 		/* link together the ccdbuf's and clear "mirror done" flag */
1118 		cb[0]->cb_mirror = cb[1];
1119 		cb[1]->cb_mirror = cb[0];
1120 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1121 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1122 	}
1123 }
1124 
1125 static void
1126 ccdintr(cs, bp)
1127 	struct ccd_softc *cs;
1128 	struct buf *bp;
1129 {
1130 #ifdef DEBUG
1131 	if (ccddebug & CCDB_FOLLOW)
1132 		printf("ccdintr(%x, %x)\n", cs, bp);
1133 #endif
1134 	/*
1135 	 * Request is done for better or worse, wakeup the top half.
1136 	 */
1137 	if (bp->b_flags & B_ERROR)
1138 		bp->b_resid = bp->b_bcount;
1139 	devstat_end_transaction_buf(&cs->device_stats, bp);
1140 	biodone(bp);
1141 }
1142 
1143 /*
1144  * Called at interrupt time.
1145  * Mark the component as done and if all components are done,
1146  * take a ccd interrupt.
1147  */
1148 static void
1149 ccdiodone(cbp)
1150 	struct ccdbuf *cbp;
1151 {
1152 	struct buf *bp = cbp->cb_obp;
1153 	int unit = cbp->cb_unit;
1154 	int count, s;
1155 
1156 	s = splbio();
1157 #ifdef DEBUG
1158 	if (ccddebug & CCDB_FOLLOW)
1159 		printf("ccdiodone(%x)\n", cbp);
1160 	if (ccddebug & CCDB_IO) {
1161 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1162 		       bp, bp->b_bcount, bp->b_resid);
1163 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1164 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1165 		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1166 		       cbp->cb_buf.b_bcount);
1167 	}
1168 #endif
1169 	/*
1170 	 * If an error occured, report it.  If this is a mirrored
1171 	 * configuration and the first of two possible reads, do not
1172 	 * set the error in the bp yet because the second read may
1173 	 * succeed.
1174 	 */
1175 
1176 	if (cbp->cb_buf.b_flags & B_ERROR) {
1177 		const char *msg = "";
1178 
1179 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1180 		    (cbp->cb_buf.b_flags & B_READ) &&
1181 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1182 			/*
1183 			 * We will try our read on the other disk down
1184 			 * below, also reverse the default pick so if we
1185 			 * are doing a scan we do not keep hitting the
1186 			 * bad disk first.
1187 			 */
1188 			struct ccd_softc *cs = &ccd_softc[unit];
1189 
1190 			msg = ", trying other disk";
1191 			cs->sc_pick = 1 - cs->sc_pick;
1192 			cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1193 		} else {
1194 			bp->b_flags |= B_ERROR;
1195 			bp->b_error = cbp->cb_buf.b_error ?
1196 			    cbp->cb_buf.b_error : EIO;
1197 		}
1198 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1199 		       unit, bp->b_error, cbp->cb_comp,
1200 		       (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1201 	}
1202 
1203 	/*
1204 	 * Process mirror.  If we are writing, I/O has been initiated on both
1205 	 * buffers and we fall through only after both are finished.
1206 	 *
1207 	 * If we are reading only one I/O is initiated at a time.  If an
1208 	 * error occurs we initiate the second I/O and return, otherwise
1209 	 * we free the second I/O without initiating it.
1210 	 */
1211 
1212 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1213 		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1214 			/*
1215 			 * When writing, handshake with the second buffer
1216 			 * to determine when both are done.  If both are not
1217 			 * done, return here.
1218 			 */
1219 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1220 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1221 				putccdbuf(cbp);
1222 				splx(s);
1223 				return;
1224 			}
1225 		} else {
1226 			/*
1227 			 * When reading, either dispose of the second buffer
1228 			 * or initiate I/O on the second buffer if an error
1229 			 * occured with this one.
1230 			 */
1231 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1232 				if (cbp->cb_buf.b_flags & B_ERROR) {
1233 					cbp->cb_mirror->cb_pflags |=
1234 					    CCDPF_MIRROR_DONE;
1235 					VOP_STRATEGY(
1236 					    cbp->cb_mirror->cb_buf.b_vp,
1237 					    &cbp->cb_mirror->cb_buf
1238 					);
1239 					putccdbuf(cbp);
1240 					splx(s);
1241 					return;
1242 				} else {
1243 					putccdbuf(cbp->cb_mirror);
1244 					/* fall through */
1245 				}
1246 			}
1247 		}
1248 	}
1249 
1250 	/*
1251 	 * use b_bufsize to determine how big the original request was rather
1252 	 * then b_bcount, because b_bcount may have been truncated for EOF.
1253 	 *
1254 	 * XXX We check for an error, but we do not test the resid for an
1255 	 * aligned EOF condition.  This may result in character & block
1256 	 * device access not recognizing EOF properly when read or written
1257 	 * sequentially, but will not effect filesystems.
1258 	 */
1259 	count = cbp->cb_buf.b_bufsize;
1260 	putccdbuf(cbp);
1261 
1262 	/*
1263 	 * If all done, "interrupt".
1264 	 */
1265 	bp->b_resid -= count;
1266 	if (bp->b_resid < 0)
1267 		panic("ccdiodone: count");
1268 	if (bp->b_resid == 0)
1269 		ccdintr(&ccd_softc[unit], bp);
1270 	splx(s);
1271 }
1272 
1273 static int
1274 ccdioctl(dev, cmd, data, flag, p)
1275 	dev_t dev;
1276 	u_long cmd;
1277 	caddr_t data;
1278 	int flag;
1279 	struct proc *p;
1280 {
1281 	int unit = ccdunit(dev);
1282 	int i, j, lookedup = 0, error = 0;
1283 	int part, pmask, s;
1284 	struct ccd_softc *cs;
1285 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1286 	struct ccddevice ccd;
1287 	char **cpp;
1288 	struct vnode **vpp;
1289 
1290 	if (unit >= numccd)
1291 		return (ENXIO);
1292 	cs = &ccd_softc[unit];
1293 
1294 	bzero(&ccd, sizeof(ccd));
1295 
1296 	switch (cmd) {
1297 	case CCDIOCSET:
1298 		if (cs->sc_flags & CCDF_INITED)
1299 			return (EBUSY);
1300 
1301 		if ((flag & FWRITE) == 0)
1302 			return (EBADF);
1303 
1304 		if ((error = ccdlock(cs)) != 0)
1305 			return (error);
1306 
1307 		/* Fill in some important bits. */
1308 		ccd.ccd_unit = unit;
1309 		ccd.ccd_interleave = ccio->ccio_ileave;
1310 		if (ccd.ccd_interleave == 0 &&
1311 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1312 		     (ccio->ccio_flags & CCDF_PARITY))) {
1313 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1314 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1315 		}
1316 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1317 		    (ccio->ccio_flags & CCDF_PARITY)) {
1318 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1319 			ccio->ccio_flags &= ~CCDF_PARITY;
1320 		}
1321 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1322 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1323 			printf("ccd%d: mirror/parity forces uniform flag\n",
1324 			       unit);
1325 			ccio->ccio_flags |= CCDF_UNIFORM;
1326 		}
1327 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1328 
1329 		/*
1330 		 * Allocate space for and copy in the array of
1331 		 * componet pathnames and device numbers.
1332 		 */
1333 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1334 		    M_DEVBUF, M_WAITOK);
1335 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1336 		    M_DEVBUF, M_WAITOK);
1337 
1338 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1339 		    ccio->ccio_ndisks * sizeof(char **));
1340 		if (error) {
1341 			free(vpp, M_DEVBUF);
1342 			free(cpp, M_DEVBUF);
1343 			ccdunlock(cs);
1344 			return (error);
1345 		}
1346 
1347 #ifdef DEBUG
1348 		if (ccddebug & CCDB_INIT)
1349 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1350 				printf("ccdioctl: component %d: 0x%x\n",
1351 				    i, cpp[i]);
1352 #endif
1353 
1354 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1355 #ifdef DEBUG
1356 			if (ccddebug & CCDB_INIT)
1357 				printf("ccdioctl: lookedup = %d\n", lookedup);
1358 #endif
1359 			if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1360 				for (j = 0; j < lookedup; ++j)
1361 					(void)vn_close(vpp[j], FREAD|FWRITE,
1362 					    p->p_ucred, p);
1363 				free(vpp, M_DEVBUF);
1364 				free(cpp, M_DEVBUF);
1365 				ccdunlock(cs);
1366 				return (error);
1367 			}
1368 			++lookedup;
1369 		}
1370 		ccd.ccd_cpp = cpp;
1371 		ccd.ccd_vpp = vpp;
1372 		ccd.ccd_ndev = ccio->ccio_ndisks;
1373 
1374 		/*
1375 		 * Initialize the ccd.  Fills in the softc for us.
1376 		 */
1377 		if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1378 			for (j = 0; j < lookedup; ++j)
1379 				(void)vn_close(vpp[j], FREAD|FWRITE,
1380 				    p->p_ucred, p);
1381 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1382 			free(vpp, M_DEVBUF);
1383 			free(cpp, M_DEVBUF);
1384 			ccdunlock(cs);
1385 			return (error);
1386 		}
1387 
1388 		/*
1389 		 * The ccd has been successfully initialized, so
1390 		 * we can place it into the array and read the disklabel.
1391 		 */
1392 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1393 		ccio->ccio_unit = unit;
1394 		ccio->ccio_size = cs->sc_size;
1395 		ccdgetdisklabel(dev);
1396 
1397 		ccdunlock(cs);
1398 
1399 		break;
1400 
1401 	case CCDIOCCLR:
1402 		if ((cs->sc_flags & CCDF_INITED) == 0)
1403 			return (ENXIO);
1404 
1405 		if ((flag & FWRITE) == 0)
1406 			return (EBADF);
1407 
1408 		if ((error = ccdlock(cs)) != 0)
1409 			return (error);
1410 
1411 		/*
1412 		 * Don't unconfigure if any other partitions are open
1413 		 * or if both the character and block flavors of this
1414 		 * partition are open.
1415 		 */
1416 		part = ccdpart(dev);
1417 		pmask = (1 << part);
1418 		if ((cs->sc_openmask & ~pmask) ||
1419 		    ((cs->sc_bopenmask & pmask) &&
1420 		    (cs->sc_copenmask & pmask))) {
1421 			ccdunlock(cs);
1422 			return (EBUSY);
1423 		}
1424 
1425 		/*
1426 		 * Free ccd_softc information and clear entry.
1427 		 */
1428 
1429 		/* Close the components and free their pathnames. */
1430 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1431 			/*
1432 			 * XXX: this close could potentially fail and
1433 			 * cause Bad Things.  Maybe we need to force
1434 			 * the close to happen?
1435 			 */
1436 #ifdef DEBUG
1437 			if (ccddebug & CCDB_VNODE)
1438 				vprint("CCDIOCCLR: vnode info",
1439 				    cs->sc_cinfo[i].ci_vp);
1440 #endif
1441 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1442 			    p->p_ucred, p);
1443 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1444 		}
1445 
1446 		/* Free interleave index. */
1447 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1448 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1449 
1450 		/* Free component info and interleave table. */
1451 		free(cs->sc_cinfo, M_DEVBUF);
1452 		free(cs->sc_itable, M_DEVBUF);
1453 		cs->sc_flags &= ~CCDF_INITED;
1454 
1455 		/*
1456 		 * Free ccddevice information and clear entry.
1457 		 */
1458 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1459 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1460 		ccd.ccd_dk = -1;
1461 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1462 
1463 		/*
1464 		 * And remove the devstat entry.
1465 		 */
1466 		devstat_remove_entry(&cs->device_stats);
1467 
1468 		/* This must be atomic. */
1469 		s = splhigh();
1470 		ccdunlock(cs);
1471 		bzero(cs, sizeof(struct ccd_softc));
1472 		splx(s);
1473 
1474 		break;
1475 
1476 	case DIOCGDINFO:
1477 		if ((cs->sc_flags & CCDF_INITED) == 0)
1478 			return (ENXIO);
1479 
1480 		*(struct disklabel *)data = cs->sc_label;
1481 		break;
1482 
1483 	case DIOCGPART:
1484 		if ((cs->sc_flags & CCDF_INITED) == 0)
1485 			return (ENXIO);
1486 
1487 		((struct partinfo *)data)->disklab = &cs->sc_label;
1488 		((struct partinfo *)data)->part =
1489 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1490 		break;
1491 
1492 	case DIOCWDINFO:
1493 	case DIOCSDINFO:
1494 		if ((cs->sc_flags & CCDF_INITED) == 0)
1495 			return (ENXIO);
1496 
1497 		if ((flag & FWRITE) == 0)
1498 			return (EBADF);
1499 
1500 		if ((error = ccdlock(cs)) != 0)
1501 			return (error);
1502 
1503 		cs->sc_flags |= CCDF_LABELLING;
1504 
1505 		error = setdisklabel(&cs->sc_label,
1506 		    (struct disklabel *)data, 0);
1507 		if (error == 0) {
1508 			if (cmd == DIOCWDINFO)
1509 				error = writedisklabel(CCDLABELDEV(dev),
1510 				    &cs->sc_label);
1511 		}
1512 
1513 		cs->sc_flags &= ~CCDF_LABELLING;
1514 
1515 		ccdunlock(cs);
1516 
1517 		if (error)
1518 			return (error);
1519 		break;
1520 
1521 	case DIOCWLABEL:
1522 		if ((cs->sc_flags & CCDF_INITED) == 0)
1523 			return (ENXIO);
1524 
1525 		if ((flag & FWRITE) == 0)
1526 			return (EBADF);
1527 		if (*(int *)data != 0)
1528 			cs->sc_flags |= CCDF_WLABEL;
1529 		else
1530 			cs->sc_flags &= ~CCDF_WLABEL;
1531 		break;
1532 
1533 	default:
1534 		return (ENOTTY);
1535 	}
1536 
1537 	return (0);
1538 }
1539 
1540 static int
1541 ccdsize(dev)
1542 	dev_t dev;
1543 {
1544 	struct ccd_softc *cs;
1545 	int part, size;
1546 
1547 	if (ccdopen(dev, 0, S_IFBLK, curproc))
1548 		return (-1);
1549 
1550 	cs = &ccd_softc[ccdunit(dev)];
1551 	part = ccdpart(dev);
1552 
1553 	if ((cs->sc_flags & CCDF_INITED) == 0)
1554 		return (-1);
1555 
1556 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1557 		size = -1;
1558 	else
1559 		size = cs->sc_label.d_partitions[part].p_size;
1560 
1561 	if (ccdclose(dev, 0, S_IFBLK, curproc))
1562 		return (-1);
1563 
1564 	return (size);
1565 }
1566 
1567 static int
1568 ccddump(dev)
1569 	dev_t dev;
1570 {
1571 
1572 	/* Not implemented. */
1573 	return ENXIO;
1574 }
1575 
1576 /*
1577  * Lookup the provided name in the filesystem.  If the file exists,
1578  * is a valid block device, and isn't being used by anyone else,
1579  * set *vpp to the file's vnode.
1580  */
1581 static int
1582 ccdlookup(path, p, vpp)
1583 	char *path;
1584 	struct proc *p;
1585 	struct vnode **vpp;	/* result */
1586 {
1587 	struct nameidata nd;
1588 	struct vnode *vp;
1589 	struct vattr va;
1590 	int error;
1591 
1592 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1593 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1594 #ifdef DEBUG
1595 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1596 			printf("ccdlookup: vn_open error = %d\n", error);
1597 #endif
1598 		return (error);
1599 	}
1600 	vp = nd.ni_vp;
1601 
1602 	if (vp->v_usecount > 1) {
1603 		error = EBUSY;
1604 		goto bad;
1605 	}
1606 
1607 	if (!vn_isdisk(vp)) {
1608 		error = ENOTBLK;
1609 		goto bad;
1610 	}
1611 
1612 #ifdef DEBUG
1613 	if (ccddebug & CCDB_VNODE)
1614 		vprint("ccdlookup: vnode info", vp);
1615 #endif
1616 
1617 	VOP_UNLOCK(vp, 0, p);
1618 	NDFREE(&nd, NDF_ONLY_PNBUF);
1619 	*vpp = vp;
1620 	return (0);
1621 bad:
1622 	VOP_UNLOCK(vp, 0, p);
1623 	NDFREE(&nd, NDF_ONLY_PNBUF);
1624 	/* vn_close does vrele() for vp */
1625 	(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1626 	return (error);
1627 }
1628 
1629 /*
1630  * Read the disklabel from the ccd.  If one is not present, fake one
1631  * up.
1632  */
1633 static void
1634 ccdgetdisklabel(dev)
1635 	dev_t dev;
1636 {
1637 	int unit = ccdunit(dev);
1638 	struct ccd_softc *cs = &ccd_softc[unit];
1639 	char *errstring;
1640 	struct disklabel *lp = &cs->sc_label;
1641 	struct ccdgeom *ccg = &cs->sc_geom;
1642 
1643 	bzero(lp, sizeof(*lp));
1644 
1645 	lp->d_secperunit = cs->sc_size;
1646 	lp->d_secsize = ccg->ccg_secsize;
1647 	lp->d_nsectors = ccg->ccg_nsectors;
1648 	lp->d_ntracks = ccg->ccg_ntracks;
1649 	lp->d_ncylinders = ccg->ccg_ncylinders;
1650 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1651 
1652 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1653 	lp->d_type = DTYPE_CCD;
1654 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1655 	lp->d_rpm = 3600;
1656 	lp->d_interleave = 1;
1657 	lp->d_flags = 0;
1658 
1659 	lp->d_partitions[RAW_PART].p_offset = 0;
1660 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1661 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1662 	lp->d_npartitions = RAW_PART + 1;
1663 
1664 	lp->d_bbsize = BBSIZE;				/* XXX */
1665 	lp->d_sbsize = SBSIZE;				/* XXX */
1666 
1667 	lp->d_magic = DISKMAGIC;
1668 	lp->d_magic2 = DISKMAGIC;
1669 	lp->d_checksum = dkcksum(&cs->sc_label);
1670 
1671 	/*
1672 	 * Call the generic disklabel extraction routine.
1673 	 */
1674 	errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1675 	if (errstring != NULL)
1676 		ccdmakedisklabel(cs);
1677 
1678 #ifdef DEBUG
1679 	/* It's actually extremely common to have unlabeled ccds. */
1680 	if (ccddebug & CCDB_LABEL)
1681 		if (errstring != NULL)
1682 			printf("ccd%d: %s\n", unit, errstring);
1683 #endif
1684 }
1685 
1686 /*
1687  * Take care of things one might want to take care of in the event
1688  * that a disklabel isn't present.
1689  */
1690 static void
1691 ccdmakedisklabel(cs)
1692 	struct ccd_softc *cs;
1693 {
1694 	struct disklabel *lp = &cs->sc_label;
1695 
1696 	/*
1697 	 * For historical reasons, if there's no disklabel present
1698 	 * the raw partition must be marked FS_BSDFFS.
1699 	 */
1700 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1701 
1702 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1703 }
1704 
1705 /*
1706  * Wait interruptibly for an exclusive lock.
1707  *
1708  * XXX
1709  * Several drivers do this; it should be abstracted and made MP-safe.
1710  */
1711 static int
1712 ccdlock(cs)
1713 	struct ccd_softc *cs;
1714 {
1715 	int error;
1716 
1717 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1718 		cs->sc_flags |= CCDF_WANTED;
1719 		if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1720 			return (error);
1721 	}
1722 	cs->sc_flags |= CCDF_LOCKED;
1723 	return (0);
1724 }
1725 
1726 /*
1727  * Unlock and wake up any waiters.
1728  */
1729 static void
1730 ccdunlock(cs)
1731 	struct ccd_softc *cs;
1732 {
1733 
1734 	cs->sc_flags &= ~CCDF_LOCKED;
1735 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1736 		cs->sc_flags &= ~CCDF_WANTED;
1737 		wakeup(cs);
1738 	}
1739 }
1740 
1741 #ifdef DEBUG
1742 static void
1743 printiinfo(ii)
1744 	struct ccdiinfo *ii;
1745 {
1746 	int ix, i;
1747 
1748 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1749 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1750 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1751 		for (i = 0; i < ii->ii_ndisk; i++)
1752 			printf(" %d", ii->ii_index[i]);
1753 		printf("\n");
1754 	}
1755 }
1756 #endif
1757 
1758 #endif /* NCCD > 0 */
1759 
1760 /* Local Variables: */
1761 /* c-argdecl-indent: 8 */
1762 /* c-continued-statement-offset: 8 */
1763 /* c-indent-level: 8 */
1764 /* End: */
1765