xref: /freebsd/sys/geom/geom_ccd.c (revision 4cf49a43559ed9fdad601bdcccd2c55963008675)
1 /* $FreeBSD$ */
2 
3 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
4 
5 /*
6  * Copyright (c) 1995 Jason R. Thorpe.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project
20  *	by Jason R. Thorpe.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * Copyright (c) 1988 University of Utah.
39  * Copyright (c) 1990, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * This code is derived from software contributed to Berkeley by
43  * the Systems Programming Group of the University of Utah Computer
44  * Science Department.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. All advertising materials mentioning features or use of this software
55  *    must display the following acknowledgement:
56  *	This product includes software developed by the University of
57  *	California, Berkeley and its contributors.
58  * 4. Neither the name of the University nor the names of its contributors
59  *    may be used to endorse or promote products derived from this software
60  *    without specific prior written permission.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  *
74  * from: Utah $Hdr: cd.c 1.6 90/11/28$
75  *
76  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
77  */
78 
79 /*
80  * "Concatenated" disk driver.
81  *
82  * Dynamic configuration and disklabel support by:
83  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
84  *	Numerical Aerodynamic Simulation Facility
85  *	Mail Stop 258-6
86  *	NASA Ames Research Center
87  *	Moffett Field, CA 94035
88  */
89 
90 #include "ccd.h"
91 #if NCCD > 0
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
97 #include <sys/proc.h>
98 #include <sys/buf.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <ufs/ffs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 
110 #include <sys/ccdvar.h>
111 
112 #if defined(CCDDEBUG) && !defined(DEBUG)
113 #define DEBUG
114 #endif
115 
116 #ifdef DEBUG
117 #define CCDB_FOLLOW	0x01
118 #define CCDB_INIT	0x02
119 #define CCDB_IO		0x04
120 #define CCDB_LABEL	0x08
121 #define CCDB_VNODE	0x10
122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
123     CCDB_VNODE;
124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
125 #undef DEBUG
126 #endif
127 
128 #define	ccdunit(x)	dkunit(x)
129 #define ccdpart(x)	dkpart(x)
130 
131 /*
132    This is how mirroring works (only writes are special):
133 
134    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
135    linked together by the cb_mirror field.  "cb_pflags &
136    CCDPF_MIRROR_DONE" is set to 0 on both of them.
137 
138    When a component returns to ccdiodone(), it checks if "cb_pflags &
139    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
140    flag and returns.  If it is, it means its partner has already
141    returned, so it will go to the regular cleanup.
142 
143  */
144 
145 struct ccdbuf {
146 	struct buf	cb_buf;		/* new I/O buf */
147 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
148 	struct ccdbuf	*cb_freenext;	/* free list link */
149 	int		cb_unit;	/* target unit */
150 	int		cb_comp;	/* target component */
151 	int		cb_pflags;	/* mirror/parity status flag */
152 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
153 };
154 
155 /* bits in cb_pflags */
156 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
157 
158 #define CCDLABELDEV(dev)	\
159 	(makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
160 
161 static d_open_t ccdopen;
162 static d_close_t ccdclose;
163 static d_strategy_t ccdstrategy;
164 static d_ioctl_t ccdioctl;
165 static d_dump_t ccddump;
166 static d_psize_t ccdsize;
167 
168 #define NCCDFREEHIWAT	16
169 
170 #define CDEV_MAJOR 74
171 #define BDEV_MAJOR 21
172 
173 static struct cdevsw ccd_cdevsw = {
174 	/* open */	ccdopen,
175 	/* close */	ccdclose,
176 	/* read */	physread,
177 	/* write */	physwrite,
178 	/* ioctl */	ccdioctl,
179 	/* poll */	nopoll,
180 	/* mmap */	nommap,
181 	/* strategy */	ccdstrategy,
182 	/* name */	"ccd",
183 	/* maj */	CDEV_MAJOR,
184 	/* dump */	ccddump,
185 	/* psize */	ccdsize,
186 	/* flags */	D_DISK,
187 	/* bmaj */	BDEV_MAJOR
188 };
189 
190 /* called during module initialization */
191 static	void ccdattach __P((void));
192 static	int ccd_modevent __P((module_t, int, void *));
193 
194 /* called by biodone() at interrupt time */
195 static	void ccdiodone __P((struct ccdbuf *cbp));
196 
197 static	void ccdstart __P((struct ccd_softc *, struct buf *));
198 static	void ccdinterleave __P((struct ccd_softc *, int));
199 static	void ccdintr __P((struct ccd_softc *, struct buf *));
200 static	int ccdinit __P((struct ccddevice *, char **, struct proc *));
201 static	int ccdlookup __P((char *, struct proc *p, struct vnode **));
202 static	void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
203 		struct buf *, daddr_t, caddr_t, long));
204 static	void ccdgetdisklabel __P((dev_t));
205 static	void ccdmakedisklabel __P((struct ccd_softc *));
206 static	int ccdlock __P((struct ccd_softc *));
207 static	void ccdunlock __P((struct ccd_softc *));
208 
209 #ifdef DEBUG
210 static	void printiinfo __P((struct ccdiinfo *));
211 #endif
212 
213 /* Non-private for the benefit of libkvm. */
214 struct	ccd_softc *ccd_softc;
215 struct	ccddevice *ccddevs;
216 struct	ccdbuf *ccdfreebufs;
217 static	int numccdfreebufs;
218 static	int numccd = 0;
219 
220 /*
221  * getccdbuf() -	Allocate and zero a ccd buffer.
222  *
223  *	This routine is called at splbio().
224  */
225 
226 static __inline
227 struct ccdbuf *
228 getccdbuf(struct ccdbuf *cpy)
229 {
230 	struct ccdbuf *cbp;
231 
232 	/*
233 	 * Allocate from freelist or malloc as necessary
234 	 */
235 	if ((cbp = ccdfreebufs) != NULL) {
236 		ccdfreebufs = cbp->cb_freenext;
237 		--numccdfreebufs;
238 	} else {
239 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
240 	}
241 
242 	/*
243 	 * Used by mirroring code
244 	 */
245 	if (cpy)
246 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
247 	else
248 		bzero(cbp, sizeof(struct ccdbuf));
249 
250 	/*
251 	 * independant struct buf initialization
252 	 */
253 	LIST_INIT(&cbp->cb_buf.b_dep);
254 	BUF_LOCKINIT(&cbp->cb_buf);
255 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
256 	BUF_KERNPROC(&cbp->cb_buf);
257 
258 	return(cbp);
259 }
260 
261 /*
262  * putccdbuf() -	Free a ccd buffer.
263  *
264  *	This routine is called at splbio().
265  */
266 
267 static __inline
268 void
269 putccdbuf(struct ccdbuf *cbp)
270 {
271 	BUF_UNLOCK(&cbp->cb_buf);
272 	BUF_LOCKFREE(&cbp->cb_buf);
273 
274 	if (numccdfreebufs < NCCDFREEHIWAT) {
275 		cbp->cb_freenext = ccdfreebufs;
276 		ccdfreebufs = cbp;
277 		++numccdfreebufs;
278 	} else {
279 		free((caddr_t)cbp, M_DEVBUF);
280 	}
281 }
282 
283 
284 /*
285  * Number of blocks to untouched in front of a component partition.
286  * This is to avoid violating its disklabel area when it starts at the
287  * beginning of the slice.
288  */
289 #if !defined(CCD_OFFSET)
290 #define CCD_OFFSET 16
291 #endif
292 
293 /*
294  * Called by main() during pseudo-device attachment.  All we need
295  * to do is allocate enough space for devices to be configured later, and
296  * add devsw entries.
297  */
298 static void
299 ccdattach()
300 {
301 	int i;
302 	int num = NCCD;
303 
304 	if (num > 1)
305 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
306 	else
307 		printf("ccd0: Concatenated disk driver\n");
308 
309 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
310 	    M_DEVBUF, M_NOWAIT);
311 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
312 	    M_DEVBUF, M_NOWAIT);
313 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
314 		printf("WARNING: no memory for concatenated disks\n");
315 		if (ccd_softc != NULL)
316 			free(ccd_softc, M_DEVBUF);
317 		if (ccddevs != NULL)
318 			free(ccddevs, M_DEVBUF);
319 		return;
320 	}
321 	numccd = num;
322 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
323 	bzero(ccddevs, num * sizeof(struct ccddevice));
324 
325 	/* XXX: is this necessary? */
326 	for (i = 0; i < numccd; ++i)
327 		ccddevs[i].ccd_dk = -1;
328 }
329 
330 static int
331 ccd_modevent(mod, type, data)
332 	module_t mod;
333 	int type;
334 	void *data;
335 {
336 	int error = 0;
337 
338 	switch (type) {
339 	case MOD_LOAD:
340 		ccdattach();
341 		break;
342 
343 	case MOD_UNLOAD:
344 		printf("ccd0: Unload not supported!\n");
345 		error = EOPNOTSUPP;
346 		break;
347 
348 	default:	/* MOD_SHUTDOWN etc */
349 		break;
350 	}
351 	return (error);
352 }
353 
354 DEV_MODULE(ccd, CDEV_MAJOR, BDEV_MAJOR, ccd_cdevsw, ccd_modevent, NULL);
355 
356 static int
357 ccdinit(ccd, cpaths, p)
358 	struct ccddevice *ccd;
359 	char **cpaths;
360 	struct proc *p;
361 {
362 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
363 	struct ccdcinfo *ci = NULL;	/* XXX */
364 	size_t size;
365 	int ix;
366 	struct vnode *vp;
367 	size_t minsize;
368 	int maxsecsize;
369 	struct partinfo dpart;
370 	struct ccdgeom *ccg = &cs->sc_geom;
371 	char tmppath[MAXPATHLEN];
372 	int error = 0;
373 
374 #ifdef DEBUG
375 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
376 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
377 #endif
378 
379 	cs->sc_size = 0;
380 	cs->sc_ileave = ccd->ccd_interleave;
381 	cs->sc_nccdisks = ccd->ccd_ndev;
382 
383 	/* Allocate space for the component info. */
384 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
385 	    M_DEVBUF, M_WAITOK);
386 
387 	/*
388 	 * Verify that each component piece exists and record
389 	 * relevant information about it.
390 	 */
391 	maxsecsize = 0;
392 	minsize = 0;
393 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
394 		vp = ccd->ccd_vpp[ix];
395 		ci = &cs->sc_cinfo[ix];
396 		ci->ci_vp = vp;
397 
398 		/*
399 		 * Copy in the pathname of the component.
400 		 */
401 		bzero(tmppath, sizeof(tmppath));	/* sanity */
402 		if ((error = copyinstr(cpaths[ix], tmppath,
403 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
404 #ifdef DEBUG
405 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
406 				printf("ccd%d: can't copy path, error = %d\n",
407 				    ccd->ccd_unit, error);
408 #endif
409 			goto fail;
410 		}
411 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
412 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
413 
414 		ci->ci_dev = vn_todev(vp);
415 
416 		/*
417 		 * Get partition information for the component.
418 		 */
419 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
420 		    FREAD, p->p_ucred, p)) != 0) {
421 #ifdef DEBUG
422 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
423 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
424 				     ccd->ccd_unit, ci->ci_path, error);
425 #endif
426 			goto fail;
427 		}
428 		if (dpart.part->p_fstype == FS_BSDFFS) {
429 			maxsecsize =
430 			    ((dpart.disklab->d_secsize > maxsecsize) ?
431 			    dpart.disklab->d_secsize : maxsecsize);
432 			size = dpart.part->p_size - CCD_OFFSET;
433 		} else {
434 #ifdef DEBUG
435 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
436 				printf("ccd%d: %s: incorrect partition type\n",
437 				    ccd->ccd_unit, ci->ci_path);
438 #endif
439 			error = EFTYPE;
440 			goto fail;
441 		}
442 
443 		/*
444 		 * Calculate the size, truncating to an interleave
445 		 * boundary if necessary.
446 		 */
447 
448 		if (cs->sc_ileave > 1)
449 			size -= size % cs->sc_ileave;
450 
451 		if (size == 0) {
452 #ifdef DEBUG
453 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
454 				printf("ccd%d: %s: size == 0\n",
455 				    ccd->ccd_unit, ci->ci_path);
456 #endif
457 			error = ENODEV;
458 			goto fail;
459 		}
460 
461 		if (minsize == 0 || size < minsize)
462 			minsize = size;
463 		ci->ci_size = size;
464 		cs->sc_size += size;
465 	}
466 
467 	/*
468 	 * Don't allow the interleave to be smaller than
469 	 * the biggest component sector.
470 	 */
471 	if ((cs->sc_ileave > 0) &&
472 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
473 #ifdef DEBUG
474 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
475 			printf("ccd%d: interleave must be at least %d\n",
476 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
477 #endif
478 		error = EINVAL;
479 		goto fail;
480 	}
481 
482 	/*
483 	 * If uniform interleave is desired set all sizes to that of
484 	 * the smallest component.  This will guarentee that a single
485 	 * interleave table is generated.
486 	 *
487 	 * Lost space must be taken into account when calculating the
488 	 * overall size.  Half the space is lost when CCDF_MIRROR is
489 	 * specified.  One disk is lost when CCDF_PARITY is specified.
490 	 */
491 	if (ccd->ccd_flags & CCDF_UNIFORM) {
492 		for (ci = cs->sc_cinfo;
493 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
494 			ci->ci_size = minsize;
495 		}
496 		if (ccd->ccd_flags & CCDF_MIRROR) {
497 			/*
498 			 * Check to see if an even number of components
499 			 * have been specified.  The interleave must also
500 			 * be non-zero in order for us to be able to
501 			 * guarentee the topology.
502 			 */
503 			if (cs->sc_nccdisks % 2) {
504 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
505 				error = EINVAL;
506 				goto fail;
507 			}
508 			if (cs->sc_ileave == 0) {
509 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
510 				error = EINVAL;
511 				goto fail;
512 			}
513 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
514 		} else if (ccd->ccd_flags & CCDF_PARITY) {
515 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
516 		} else {
517 			if (cs->sc_ileave == 0) {
518 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
519 				error = EINVAL;
520 				goto fail;
521 			}
522 			cs->sc_size = cs->sc_nccdisks * minsize;
523 		}
524 	}
525 
526 	/*
527 	 * Construct the interleave table.
528 	 */
529 	ccdinterleave(cs, ccd->ccd_unit);
530 
531 	/*
532 	 * Create pseudo-geometry based on 1MB cylinders.  It's
533 	 * pretty close.
534 	 */
535 	ccg->ccg_secsize = maxsecsize;
536 	ccg->ccg_ntracks = 1;
537 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
538 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
539 
540 	/*
541 	 * Add an devstat entry for this device.
542 	 */
543 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
544 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
545 			  DEVSTAT_TYPE_ASC0 |DEVSTAT_TYPE_IF_OTHER,
546 			  DEVSTAT_PRIORITY_CCD);
547 
548 	cs->sc_flags |= CCDF_INITED;
549 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
550 	cs->sc_unit = ccd->ccd_unit;
551 	return (0);
552 fail:
553 	while (ci > cs->sc_cinfo) {
554 		ci--;
555 		free(ci->ci_path, M_DEVBUF);
556 	}
557 	free(cs->sc_cinfo, M_DEVBUF);
558 	return (error);
559 }
560 
561 static void
562 ccdinterleave(cs, unit)
563 	struct ccd_softc *cs;
564 	int unit;
565 {
566 	struct ccdcinfo *ci, *smallci;
567 	struct ccdiinfo *ii;
568 	daddr_t bn, lbn;
569 	int ix;
570 	u_long size;
571 
572 #ifdef DEBUG
573 	if (ccddebug & CCDB_INIT)
574 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
575 #endif
576 
577 	/*
578 	 * Allocate an interleave table.  The worst case occurs when each
579 	 * of N disks is of a different size, resulting in N interleave
580 	 * tables.
581 	 *
582 	 * Chances are this is too big, but we don't care.
583 	 */
584 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
585 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
586 	bzero((caddr_t)cs->sc_itable, size);
587 
588 	/*
589 	 * Trivial case: no interleave (actually interleave of disk size).
590 	 * Each table entry represents a single component in its entirety.
591 	 *
592 	 * An interleave of 0 may not be used with a mirror or parity setup.
593 	 */
594 	if (cs->sc_ileave == 0) {
595 		bn = 0;
596 		ii = cs->sc_itable;
597 
598 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
599 			/* Allocate space for ii_index. */
600 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
601 			ii->ii_ndisk = 1;
602 			ii->ii_startblk = bn;
603 			ii->ii_startoff = 0;
604 			ii->ii_index[0] = ix;
605 			bn += cs->sc_cinfo[ix].ci_size;
606 			ii++;
607 		}
608 		ii->ii_ndisk = 0;
609 #ifdef DEBUG
610 		if (ccddebug & CCDB_INIT)
611 			printiinfo(cs->sc_itable);
612 #endif
613 		return;
614 	}
615 
616 	/*
617 	 * The following isn't fast or pretty; it doesn't have to be.
618 	 */
619 	size = 0;
620 	bn = lbn = 0;
621 	for (ii = cs->sc_itable; ; ii++) {
622 		/*
623 		 * Allocate space for ii_index.  We might allocate more then
624 		 * we use.
625 		 */
626 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
627 		    M_DEVBUF, M_WAITOK);
628 
629 		/*
630 		 * Locate the smallest of the remaining components
631 		 */
632 		smallci = NULL;
633 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
634 		    ci++) {
635 			if (ci->ci_size > size &&
636 			    (smallci == NULL ||
637 			     ci->ci_size < smallci->ci_size)) {
638 				smallci = ci;
639 			}
640 		}
641 
642 		/*
643 		 * Nobody left, all done
644 		 */
645 		if (smallci == NULL) {
646 			ii->ii_ndisk = 0;
647 			break;
648 		}
649 
650 		/*
651 		 * Record starting logical block using an sc_ileave blocksize.
652 		 */
653 		ii->ii_startblk = bn / cs->sc_ileave;
654 
655 		/*
656 		 * Record starting comopnent block using an sc_ileave
657 		 * blocksize.  This value is relative to the beginning of
658 		 * a component disk.
659 		 */
660 		ii->ii_startoff = lbn;
661 
662 		/*
663 		 * Determine how many disks take part in this interleave
664 		 * and record their indices.
665 		 */
666 		ix = 0;
667 		for (ci = cs->sc_cinfo;
668 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
669 			if (ci->ci_size >= smallci->ci_size) {
670 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
671 			}
672 		}
673 		ii->ii_ndisk = ix;
674 		bn += ix * (smallci->ci_size - size);
675 		lbn = smallci->ci_size / cs->sc_ileave;
676 		size = smallci->ci_size;
677 	}
678 #ifdef DEBUG
679 	if (ccddebug & CCDB_INIT)
680 		printiinfo(cs->sc_itable);
681 #endif
682 }
683 
684 /* ARGSUSED */
685 static int
686 ccdopen(dev, flags, fmt, p)
687 	dev_t dev;
688 	int flags, fmt;
689 	struct proc *p;
690 {
691 	int unit = ccdunit(dev);
692 	struct ccd_softc *cs;
693 	struct disklabel *lp;
694 	int error = 0, part, pmask;
695 
696 #ifdef DEBUG
697 	if (ccddebug & CCDB_FOLLOW)
698 		printf("ccdopen(%x, %x)\n", dev, flags);
699 #endif
700 	if (unit >= numccd)
701 		return (ENXIO);
702 	cs = &ccd_softc[unit];
703 
704 	if ((error = ccdlock(cs)) != 0)
705 		return (error);
706 
707 	lp = &cs->sc_label;
708 
709 	part = ccdpart(dev);
710 	pmask = (1 << part);
711 
712 	/*
713 	 * If we're initialized, check to see if there are any other
714 	 * open partitions.  If not, then it's safe to update
715 	 * the in-core disklabel.
716 	 */
717 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
718 		ccdgetdisklabel(dev);
719 
720 	/* Check that the partition exists. */
721 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
722 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
723 		error = ENXIO;
724 		goto done;
725 	}
726 
727 	/* Prevent our unit from being unconfigured while open. */
728 	switch (fmt) {
729 	case S_IFCHR:
730 		cs->sc_copenmask |= pmask;
731 		break;
732 
733 	case S_IFBLK:
734 		cs->sc_bopenmask |= pmask;
735 		break;
736 	}
737 	cs->sc_openmask =
738 	    cs->sc_copenmask | cs->sc_bopenmask;
739 
740  done:
741 	ccdunlock(cs);
742 	return (0);
743 }
744 
745 /* ARGSUSED */
746 static int
747 ccdclose(dev, flags, fmt, p)
748 	dev_t dev;
749 	int flags, fmt;
750 	struct proc *p;
751 {
752 	int unit = ccdunit(dev);
753 	struct ccd_softc *cs;
754 	int error = 0, part;
755 
756 #ifdef DEBUG
757 	if (ccddebug & CCDB_FOLLOW)
758 		printf("ccdclose(%x, %x)\n", dev, flags);
759 #endif
760 
761 	if (unit >= numccd)
762 		return (ENXIO);
763 	cs = &ccd_softc[unit];
764 
765 	if ((error = ccdlock(cs)) != 0)
766 		return (error);
767 
768 	part = ccdpart(dev);
769 
770 	/* ...that much closer to allowing unconfiguration... */
771 	switch (fmt) {
772 	case S_IFCHR:
773 		cs->sc_copenmask &= ~(1 << part);
774 		break;
775 
776 	case S_IFBLK:
777 		cs->sc_bopenmask &= ~(1 << part);
778 		break;
779 	}
780 	cs->sc_openmask =
781 	    cs->sc_copenmask | cs->sc_bopenmask;
782 
783 	ccdunlock(cs);
784 	return (0);
785 }
786 
787 static void
788 ccdstrategy(bp)
789 	struct buf *bp;
790 {
791 	int unit = ccdunit(bp->b_dev);
792 	struct ccd_softc *cs = &ccd_softc[unit];
793 	int s;
794 	int wlabel;
795 	struct disklabel *lp;
796 
797 #ifdef DEBUG
798 	if (ccddebug & CCDB_FOLLOW)
799 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
800 #endif
801 	if ((cs->sc_flags & CCDF_INITED) == 0) {
802 		bp->b_error = ENXIO;
803 		bp->b_flags |= B_ERROR;
804 		goto done;
805 	}
806 
807 	/* If it's a nil transfer, wake up the top half now. */
808 	if (bp->b_bcount == 0)
809 		goto done;
810 
811 	lp = &cs->sc_label;
812 
813 	/*
814 	 * Do bounds checking and adjust transfer.  If there's an
815 	 * error, the bounds check will flag that for us.
816 	 */
817 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
818 	if (ccdpart(bp->b_dev) != RAW_PART) {
819 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
820 			goto done;
821 	} else {
822 		int pbn;        /* in sc_secsize chunks */
823 		long sz;        /* in sc_secsize chunks */
824 
825 		pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
826 		sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
827 
828 		/*
829 		 * If out of bounds return an error. If at the EOF point,
830 		 * simply read or write less.
831 		 */
832 
833 		if (pbn < 0 || pbn >= cs->sc_size) {
834 			bp->b_resid = bp->b_bcount;
835 			if (pbn != cs->sc_size) {
836 				bp->b_error = EINVAL;
837 				bp->b_flags |= B_ERROR | B_INVAL;
838 			}
839 			goto done;
840 		}
841 
842 		/*
843 		 * If the request crosses EOF, truncate the request.
844 		 */
845 		if (pbn + sz > cs->sc_size) {
846 			bp->b_bcount = (cs->sc_size - pbn) *
847 			    cs->sc_geom.ccg_secsize;
848 		}
849 	}
850 
851 	bp->b_resid = bp->b_bcount;
852 
853 	/*
854 	 * "Start" the unit.
855 	 */
856 	s = splbio();
857 	ccdstart(cs, bp);
858 	splx(s);
859 	return;
860 done:
861 	biodone(bp);
862 }
863 
864 static void
865 ccdstart(cs, bp)
866 	struct ccd_softc *cs;
867 	struct buf *bp;
868 {
869 	long bcount, rcount;
870 	struct ccdbuf *cbp[4];
871 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
872 	caddr_t addr;
873 	daddr_t bn;
874 	struct partition *pp;
875 
876 #ifdef DEBUG
877 	if (ccddebug & CCDB_FOLLOW)
878 		printf("ccdstart(%x, %x)\n", cs, bp);
879 #endif
880 
881 	/* Record the transaction start  */
882 	devstat_start_transaction(&cs->device_stats);
883 
884 	/*
885 	 * Translate the partition-relative block number to an absolute.
886 	 */
887 	bn = bp->b_blkno;
888 	if (ccdpart(bp->b_dev) != RAW_PART) {
889 		pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
890 		bn += pp->p_offset;
891 	}
892 
893 	/*
894 	 * Allocate component buffers and fire off the requests
895 	 */
896 	addr = bp->b_data;
897 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
898 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
899 		rcount = cbp[0]->cb_buf.b_bcount;
900 
901 		if (cs->sc_cflags & CCDF_MIRROR) {
902 			/*
903 			 * Mirroring.  Writes go to both disks, reads are
904 			 * taken from whichever disk seems most appropriate.
905 			 *
906 			 * We attempt to localize reads to the disk whos arm
907 			 * is nearest the read request.  We ignore seeks due
908 			 * to writes when making this determination and we
909 			 * also try to avoid hogging.
910 			 */
911 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
912 				cbp[0]->cb_buf.b_vp->v_numoutput++;
913 				cbp[1]->cb_buf.b_vp->v_numoutput++;
914 				VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
915 				    &cbp[0]->cb_buf);
916 				VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
917 				    &cbp[1]->cb_buf);
918 			} else {
919 				int pick = cs->sc_pick;
920 				daddr_t range = cs->sc_size / 16;
921 
922 				if (bn < cs->sc_blk[pick] - range ||
923 				    bn > cs->sc_blk[pick] + range
924 				) {
925 					cs->sc_pick = pick = 1 - pick;
926 				}
927 				cs->sc_blk[pick] = bn + btodb(rcount);
928 				VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
929 				    &cbp[pick]->cb_buf);
930 			}
931 		} else {
932 			/*
933 			 * Not mirroring
934 			 */
935 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
936 				cbp[0]->cb_buf.b_vp->v_numoutput++;
937 			VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
938 		}
939 		bn += btodb(rcount);
940 		addr += rcount;
941 	}
942 }
943 
944 /*
945  * Build a component buffer header.
946  */
947 static void
948 ccdbuffer(cb, cs, bp, bn, addr, bcount)
949 	struct ccdbuf **cb;
950 	struct ccd_softc *cs;
951 	struct buf *bp;
952 	daddr_t bn;
953 	caddr_t addr;
954 	long bcount;
955 {
956 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
957 	struct ccdbuf *cbp;
958 	daddr_t cbn, cboff;
959 	off_t cbc;
960 
961 #ifdef DEBUG
962 	if (ccddebug & CCDB_IO)
963 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
964 		       cs, bp, bn, addr, bcount);
965 #endif
966 	/*
967 	 * Determine which component bn falls in.
968 	 */
969 	cbn = bn;
970 	cboff = 0;
971 
972 	if (cs->sc_ileave == 0) {
973 		/*
974 		 * Serially concatenated and neither a mirror nor a parity
975 		 * config.  This is a special case.
976 		 */
977 		daddr_t sblk;
978 
979 		sblk = 0;
980 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
981 			sblk += ci->ci_size;
982 		cbn -= sblk;
983 	} else {
984 		struct ccdiinfo *ii;
985 		int ccdisk, off;
986 
987 		/*
988 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
989 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
990 		 * to cbn.
991 		 */
992 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
993 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
994 
995 		/*
996 		 * Figure out which interleave table to use.
997 		 */
998 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
999 			if (ii->ii_startblk > cbn)
1000 				break;
1001 		}
1002 		ii--;
1003 
1004 		/*
1005 		 * off is the logical superblock relative to the beginning
1006 		 * of this interleave block.
1007 		 */
1008 		off = cbn - ii->ii_startblk;
1009 
1010 		/*
1011 		 * We must calculate which disk component to use (ccdisk),
1012 		 * and recalculate cbn to be the superblock relative to
1013 		 * the beginning of the component.  This is typically done by
1014 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
1015 		 * must typically be divided by the number of components in
1016 		 * this interleave array to be properly convert it from a
1017 		 * CCD-relative logical superblock number to a
1018 		 * component-relative superblock number.
1019 		 */
1020 		if (ii->ii_ndisk == 1) {
1021 			/*
1022 			 * When we have just one disk, it can't be a mirror
1023 			 * or a parity config.
1024 			 */
1025 			ccdisk = ii->ii_index[0];
1026 			cbn = ii->ii_startoff + off;
1027 		} else {
1028 			if (cs->sc_cflags & CCDF_MIRROR) {
1029 				/*
1030 				 * We have forced a uniform mapping, resulting
1031 				 * in a single interleave array.  We double
1032 				 * up on the first half of the available
1033 				 * components and our mirror is in the second
1034 				 * half.  This only works with a single
1035 				 * interleave array because doubling up
1036 				 * doubles the number of sectors, so there
1037 				 * cannot be another interleave array because
1038 				 * the next interleave array's calculations
1039 				 * would be off.
1040 				 */
1041 				int ndisk2 = ii->ii_ndisk / 2;
1042 				ccdisk = ii->ii_index[off % ndisk2];
1043 				cbn = ii->ii_startoff + off / ndisk2;
1044 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1045 			} else if (cs->sc_cflags & CCDF_PARITY) {
1046 				/*
1047 				 * XXX not implemented yet
1048 				 */
1049 				int ndisk2 = ii->ii_ndisk - 1;
1050 				ccdisk = ii->ii_index[off % ndisk2];
1051 				cbn = ii->ii_startoff + off / ndisk2;
1052 				if (cbn % ii->ii_ndisk <= ccdisk)
1053 					ccdisk++;
1054 			} else {
1055 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1056 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1057 			}
1058 		}
1059 
1060 		ci = &cs->sc_cinfo[ccdisk];
1061 
1062 		/*
1063 		 * Convert cbn from a superblock to a normal block so it
1064 		 * can be used to calculate (along with cboff) the normal
1065 		 * block index into this particular disk.
1066 		 */
1067 		cbn *= cs->sc_ileave;
1068 	}
1069 
1070 	/*
1071 	 * Fill in the component buf structure.
1072 	 */
1073 	cbp = getccdbuf(NULL);
1074 	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1075 	cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1076 	cbp->cb_buf.b_dev = ci->ci_dev;		/* XXX */
1077 	cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1078 	cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1079 	cbp->cb_buf.b_data = addr;
1080 	cbp->cb_buf.b_vp = ci->ci_vp;
1081 	if (cs->sc_ileave == 0)
1082               cbc = dbtob((off_t)(ci->ci_size - cbn));
1083 	else
1084               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1085 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1086  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1087 
1088 	/*
1089 	 * context for ccdiodone
1090 	 */
1091 	cbp->cb_obp = bp;
1092 	cbp->cb_unit = cs - ccd_softc;
1093 	cbp->cb_comp = ci - cs->sc_cinfo;
1094 
1095 #ifdef DEBUG
1096 	if (ccddebug & CCDB_IO)
1097 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1098 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1099 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1100 #endif
1101 	cb[0] = cbp;
1102 
1103 	/*
1104 	 * Note: both I/O's setup when reading from mirror, but only one
1105 	 * will be executed.
1106 	 */
1107 	if (cs->sc_cflags & CCDF_MIRROR) {
1108 		/* mirror, setup second I/O */
1109 		cbp = getccdbuf(cb[0]);
1110 		cbp->cb_buf.b_dev = ci2->ci_dev;
1111 		cbp->cb_buf.b_vp = ci2->ci_vp;
1112 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1113 		cb[1] = cbp;
1114 		/* link together the ccdbuf's and clear "mirror done" flag */
1115 		cb[0]->cb_mirror = cb[1];
1116 		cb[1]->cb_mirror = cb[0];
1117 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1118 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1119 	}
1120 }
1121 
1122 static void
1123 ccdintr(cs, bp)
1124 	struct ccd_softc *cs;
1125 	struct buf *bp;
1126 {
1127 #ifdef DEBUG
1128 	if (ccddebug & CCDB_FOLLOW)
1129 		printf("ccdintr(%x, %x)\n", cs, bp);
1130 #endif
1131 	/*
1132 	 * Request is done for better or worse, wakeup the top half.
1133 	 */
1134 	if (bp->b_flags & B_ERROR)
1135 		bp->b_resid = bp->b_bcount;
1136 	devstat_end_transaction_buf(&cs->device_stats, bp);
1137 	biodone(bp);
1138 }
1139 
1140 /*
1141  * Called at interrupt time.
1142  * Mark the component as done and if all components are done,
1143  * take a ccd interrupt.
1144  */
1145 static void
1146 ccdiodone(cbp)
1147 	struct ccdbuf *cbp;
1148 {
1149 	struct buf *bp = cbp->cb_obp;
1150 	int unit = cbp->cb_unit;
1151 	int count, s;
1152 
1153 	s = splbio();
1154 #ifdef DEBUG
1155 	if (ccddebug & CCDB_FOLLOW)
1156 		printf("ccdiodone(%x)\n", cbp);
1157 	if (ccddebug & CCDB_IO) {
1158 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1159 		       bp, bp->b_bcount, bp->b_resid);
1160 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1161 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1162 		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1163 		       cbp->cb_buf.b_bcount);
1164 	}
1165 #endif
1166 	/*
1167 	 * If an error occured, report it.  If this is a mirrored
1168 	 * configuration and the first of two possible reads, do not
1169 	 * set the error in the bp yet because the second read may
1170 	 * succeed.
1171 	 */
1172 
1173 	if (cbp->cb_buf.b_flags & B_ERROR) {
1174 		const char *msg = "";
1175 
1176 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1177 		    (cbp->cb_buf.b_flags & B_READ) &&
1178 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1179 			/*
1180 			 * We will try our read on the other disk down
1181 			 * below, also reverse the default pick so if we
1182 			 * are doing a scan we do not keep hitting the
1183 			 * bad disk first.
1184 			 */
1185 			struct ccd_softc *cs = &ccd_softc[unit];
1186 
1187 			msg = ", trying other disk";
1188 			cs->sc_pick = 1 - cs->sc_pick;
1189 			cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1190 		} else {
1191 			bp->b_flags |= B_ERROR;
1192 			bp->b_error = cbp->cb_buf.b_error ?
1193 			    cbp->cb_buf.b_error : EIO;
1194 		}
1195 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1196 		       unit, bp->b_error, cbp->cb_comp,
1197 		       (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1198 	}
1199 
1200 	/*
1201 	 * Process mirror.  If we are writing, I/O has been initiated on both
1202 	 * buffers and we fall through only after both are finished.
1203 	 *
1204 	 * If we are reading only one I/O is initiated at a time.  If an
1205 	 * error occurs we initiate the second I/O and return, otherwise
1206 	 * we free the second I/O without initiating it.
1207 	 */
1208 
1209 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1210 		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1211 			/*
1212 			 * When writing, handshake with the second buffer
1213 			 * to determine when both are done.  If both are not
1214 			 * done, return here.
1215 			 */
1216 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1217 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1218 				putccdbuf(cbp);
1219 				splx(s);
1220 				return;
1221 			}
1222 		} else {
1223 			/*
1224 			 * When reading, either dispose of the second buffer
1225 			 * or initiate I/O on the second buffer if an error
1226 			 * occured with this one.
1227 			 */
1228 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1229 				if (cbp->cb_buf.b_flags & B_ERROR) {
1230 					cbp->cb_mirror->cb_pflags |=
1231 					    CCDPF_MIRROR_DONE;
1232 					VOP_STRATEGY(
1233 					    cbp->cb_mirror->cb_buf.b_vp,
1234 					    &cbp->cb_mirror->cb_buf
1235 					);
1236 					putccdbuf(cbp);
1237 					splx(s);
1238 					return;
1239 				} else {
1240 					putccdbuf(cbp->cb_mirror);
1241 					/* fall through */
1242 				}
1243 			}
1244 		}
1245 	}
1246 
1247 	/*
1248 	 * use b_bufsize to determine how big the original request was rather
1249 	 * then b_bcount, because b_bcount may have been truncated for EOF.
1250 	 *
1251 	 * XXX We check for an error, but we do not test the resid for an
1252 	 * aligned EOF condition.  This may result in character & block
1253 	 * device access not recognizing EOF properly when read or written
1254 	 * sequentially, but will not effect filesystems.
1255 	 */
1256 	count = cbp->cb_buf.b_bufsize;
1257 	putccdbuf(cbp);
1258 
1259 	/*
1260 	 * If all done, "interrupt".
1261 	 */
1262 	bp->b_resid -= count;
1263 	if (bp->b_resid < 0)
1264 		panic("ccdiodone: count");
1265 	if (bp->b_resid == 0)
1266 		ccdintr(&ccd_softc[unit], bp);
1267 	splx(s);
1268 }
1269 
1270 static int
1271 ccdioctl(dev, cmd, data, flag, p)
1272 	dev_t dev;
1273 	u_long cmd;
1274 	caddr_t data;
1275 	int flag;
1276 	struct proc *p;
1277 {
1278 	int unit = ccdunit(dev);
1279 	int i, j, lookedup = 0, error = 0;
1280 	int part, pmask, s;
1281 	struct ccd_softc *cs;
1282 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1283 	struct ccddevice ccd;
1284 	char **cpp;
1285 	struct vnode **vpp;
1286 
1287 	if (unit >= numccd)
1288 		return (ENXIO);
1289 	cs = &ccd_softc[unit];
1290 
1291 	bzero(&ccd, sizeof(ccd));
1292 
1293 	switch (cmd) {
1294 	case CCDIOCSET:
1295 		if (cs->sc_flags & CCDF_INITED)
1296 			return (EBUSY);
1297 
1298 		if ((flag & FWRITE) == 0)
1299 			return (EBADF);
1300 
1301 		if ((error = ccdlock(cs)) != 0)
1302 			return (error);
1303 
1304 		/* Fill in some important bits. */
1305 		ccd.ccd_unit = unit;
1306 		ccd.ccd_interleave = ccio->ccio_ileave;
1307 		if (ccd.ccd_interleave == 0 &&
1308 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1309 		     (ccio->ccio_flags & CCDF_PARITY))) {
1310 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1311 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1312 		}
1313 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1314 		    (ccio->ccio_flags & CCDF_PARITY)) {
1315 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1316 			ccio->ccio_flags &= ~CCDF_PARITY;
1317 		}
1318 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1319 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1320 			printf("ccd%d: mirror/parity forces uniform flag\n",
1321 			       unit);
1322 			ccio->ccio_flags |= CCDF_UNIFORM;
1323 		}
1324 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1325 
1326 		/*
1327 		 * Allocate space for and copy in the array of
1328 		 * componet pathnames and device numbers.
1329 		 */
1330 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1331 		    M_DEVBUF, M_WAITOK);
1332 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1333 		    M_DEVBUF, M_WAITOK);
1334 
1335 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1336 		    ccio->ccio_ndisks * sizeof(char **));
1337 		if (error) {
1338 			free(vpp, M_DEVBUF);
1339 			free(cpp, M_DEVBUF);
1340 			ccdunlock(cs);
1341 			return (error);
1342 		}
1343 
1344 #ifdef DEBUG
1345 		if (ccddebug & CCDB_INIT)
1346 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1347 				printf("ccdioctl: component %d: 0x%x\n",
1348 				    i, cpp[i]);
1349 #endif
1350 
1351 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1352 #ifdef DEBUG
1353 			if (ccddebug & CCDB_INIT)
1354 				printf("ccdioctl: lookedup = %d\n", lookedup);
1355 #endif
1356 			if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1357 				for (j = 0; j < lookedup; ++j)
1358 					(void)vn_close(vpp[j], FREAD|FWRITE,
1359 					    p->p_ucred, p);
1360 				free(vpp, M_DEVBUF);
1361 				free(cpp, M_DEVBUF);
1362 				ccdunlock(cs);
1363 				return (error);
1364 			}
1365 			++lookedup;
1366 		}
1367 		ccd.ccd_cpp = cpp;
1368 		ccd.ccd_vpp = vpp;
1369 		ccd.ccd_ndev = ccio->ccio_ndisks;
1370 
1371 		/*
1372 		 * Initialize the ccd.  Fills in the softc for us.
1373 		 */
1374 		if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1375 			for (j = 0; j < lookedup; ++j)
1376 				(void)vn_close(vpp[j], FREAD|FWRITE,
1377 				    p->p_ucred, p);
1378 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1379 			free(vpp, M_DEVBUF);
1380 			free(cpp, M_DEVBUF);
1381 			ccdunlock(cs);
1382 			return (error);
1383 		}
1384 
1385 		/*
1386 		 * The ccd has been successfully initialized, so
1387 		 * we can place it into the array and read the disklabel.
1388 		 */
1389 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1390 		ccio->ccio_unit = unit;
1391 		ccio->ccio_size = cs->sc_size;
1392 		ccdgetdisklabel(dev);
1393 
1394 		ccdunlock(cs);
1395 
1396 		break;
1397 
1398 	case CCDIOCCLR:
1399 		if ((cs->sc_flags & CCDF_INITED) == 0)
1400 			return (ENXIO);
1401 
1402 		if ((flag & FWRITE) == 0)
1403 			return (EBADF);
1404 
1405 		if ((error = ccdlock(cs)) != 0)
1406 			return (error);
1407 
1408 		/*
1409 		 * Don't unconfigure if any other partitions are open
1410 		 * or if both the character and block flavors of this
1411 		 * partition are open.
1412 		 */
1413 		part = ccdpart(dev);
1414 		pmask = (1 << part);
1415 		if ((cs->sc_openmask & ~pmask) ||
1416 		    ((cs->sc_bopenmask & pmask) &&
1417 		    (cs->sc_copenmask & pmask))) {
1418 			ccdunlock(cs);
1419 			return (EBUSY);
1420 		}
1421 
1422 		/*
1423 		 * Free ccd_softc information and clear entry.
1424 		 */
1425 
1426 		/* Close the components and free their pathnames. */
1427 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1428 			/*
1429 			 * XXX: this close could potentially fail and
1430 			 * cause Bad Things.  Maybe we need to force
1431 			 * the close to happen?
1432 			 */
1433 #ifdef DEBUG
1434 			if (ccddebug & CCDB_VNODE)
1435 				vprint("CCDIOCCLR: vnode info",
1436 				    cs->sc_cinfo[i].ci_vp);
1437 #endif
1438 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1439 			    p->p_ucred, p);
1440 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1441 		}
1442 
1443 		/* Free interleave index. */
1444 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1445 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1446 
1447 		/* Free component info and interleave table. */
1448 		free(cs->sc_cinfo, M_DEVBUF);
1449 		free(cs->sc_itable, M_DEVBUF);
1450 		cs->sc_flags &= ~CCDF_INITED;
1451 
1452 		/*
1453 		 * Free ccddevice information and clear entry.
1454 		 */
1455 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1456 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1457 		ccd.ccd_dk = -1;
1458 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1459 
1460 		/*
1461 		 * And remove the devstat entry.
1462 		 */
1463 		devstat_remove_entry(&cs->device_stats);
1464 
1465 		/* This must be atomic. */
1466 		s = splhigh();
1467 		ccdunlock(cs);
1468 		bzero(cs, sizeof(struct ccd_softc));
1469 		splx(s);
1470 
1471 		break;
1472 
1473 	case DIOCGDINFO:
1474 		if ((cs->sc_flags & CCDF_INITED) == 0)
1475 			return (ENXIO);
1476 
1477 		*(struct disklabel *)data = cs->sc_label;
1478 		break;
1479 
1480 	case DIOCGPART:
1481 		if ((cs->sc_flags & CCDF_INITED) == 0)
1482 			return (ENXIO);
1483 
1484 		((struct partinfo *)data)->disklab = &cs->sc_label;
1485 		((struct partinfo *)data)->part =
1486 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1487 		break;
1488 
1489 	case DIOCWDINFO:
1490 	case DIOCSDINFO:
1491 		if ((cs->sc_flags & CCDF_INITED) == 0)
1492 			return (ENXIO);
1493 
1494 		if ((flag & FWRITE) == 0)
1495 			return (EBADF);
1496 
1497 		if ((error = ccdlock(cs)) != 0)
1498 			return (error);
1499 
1500 		cs->sc_flags |= CCDF_LABELLING;
1501 
1502 		error = setdisklabel(&cs->sc_label,
1503 		    (struct disklabel *)data, 0);
1504 		if (error == 0) {
1505 			if (cmd == DIOCWDINFO)
1506 				error = writedisklabel(CCDLABELDEV(dev),
1507 				    &cs->sc_label);
1508 		}
1509 
1510 		cs->sc_flags &= ~CCDF_LABELLING;
1511 
1512 		ccdunlock(cs);
1513 
1514 		if (error)
1515 			return (error);
1516 		break;
1517 
1518 	case DIOCWLABEL:
1519 		if ((cs->sc_flags & CCDF_INITED) == 0)
1520 			return (ENXIO);
1521 
1522 		if ((flag & FWRITE) == 0)
1523 			return (EBADF);
1524 		if (*(int *)data != 0)
1525 			cs->sc_flags |= CCDF_WLABEL;
1526 		else
1527 			cs->sc_flags &= ~CCDF_WLABEL;
1528 		break;
1529 
1530 	default:
1531 		return (ENOTTY);
1532 	}
1533 
1534 	return (0);
1535 }
1536 
1537 static int
1538 ccdsize(dev)
1539 	dev_t dev;
1540 {
1541 	struct ccd_softc *cs;
1542 	int part, size;
1543 
1544 	if (ccdopen(dev, 0, S_IFBLK, curproc))
1545 		return (-1);
1546 
1547 	cs = &ccd_softc[ccdunit(dev)];
1548 	part = ccdpart(dev);
1549 
1550 	if ((cs->sc_flags & CCDF_INITED) == 0)
1551 		return (-1);
1552 
1553 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1554 		size = -1;
1555 	else
1556 		size = cs->sc_label.d_partitions[part].p_size;
1557 
1558 	if (ccdclose(dev, 0, S_IFBLK, curproc))
1559 		return (-1);
1560 
1561 	return (size);
1562 }
1563 
1564 static int
1565 ccddump(dev)
1566 	dev_t dev;
1567 {
1568 
1569 	/* Not implemented. */
1570 	return ENXIO;
1571 }
1572 
1573 /*
1574  * Lookup the provided name in the filesystem.  If the file exists,
1575  * is a valid block device, and isn't being used by anyone else,
1576  * set *vpp to the file's vnode.
1577  */
1578 static int
1579 ccdlookup(path, p, vpp)
1580 	char *path;
1581 	struct proc *p;
1582 	struct vnode **vpp;	/* result */
1583 {
1584 	struct nameidata nd;
1585 	struct vnode *vp;
1586 	struct vattr va;
1587 	int error;
1588 
1589 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1590 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1591 #ifdef DEBUG
1592 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1593 			printf("ccdlookup: vn_open error = %d\n", error);
1594 #endif
1595 		return (error);
1596 	}
1597 	vp = nd.ni_vp;
1598 
1599 	if (vp->v_usecount > 1) {
1600 		VOP_UNLOCK(vp, 0, p);
1601 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1602 		return (EBUSY);
1603 	}
1604 
1605 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1606 #ifdef DEBUG
1607 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1608 			printf("ccdlookup: getattr error = %d\n", error);
1609 #endif
1610 		VOP_UNLOCK(vp, 0, p);
1611 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1612 		return (error);
1613 	}
1614 
1615 	/* XXX: eventually we should handle VREG, too. */
1616 	if (va.va_type != VBLK) {
1617 		VOP_UNLOCK(vp, 0, p);
1618 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1619 		return (ENOTBLK);
1620 	}
1621 
1622 #ifdef DEBUG
1623 	if (ccddebug & CCDB_VNODE)
1624 		vprint("ccdlookup: vnode info", vp);
1625 #endif
1626 
1627 	VOP_UNLOCK(vp, 0, p);
1628 	*vpp = vp;
1629 	return (0);
1630 }
1631 
1632 /*
1633  * Read the disklabel from the ccd.  If one is not present, fake one
1634  * up.
1635  */
1636 static void
1637 ccdgetdisklabel(dev)
1638 	dev_t dev;
1639 {
1640 	int unit = ccdunit(dev);
1641 	struct ccd_softc *cs = &ccd_softc[unit];
1642 	char *errstring;
1643 	struct disklabel *lp = &cs->sc_label;
1644 	struct ccdgeom *ccg = &cs->sc_geom;
1645 
1646 	bzero(lp, sizeof(*lp));
1647 
1648 	lp->d_secperunit = cs->sc_size;
1649 	lp->d_secsize = ccg->ccg_secsize;
1650 	lp->d_nsectors = ccg->ccg_nsectors;
1651 	lp->d_ntracks = ccg->ccg_ntracks;
1652 	lp->d_ncylinders = ccg->ccg_ncylinders;
1653 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1654 
1655 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1656 	lp->d_type = DTYPE_CCD;
1657 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1658 	lp->d_rpm = 3600;
1659 	lp->d_interleave = 1;
1660 	lp->d_flags = 0;
1661 
1662 	lp->d_partitions[RAW_PART].p_offset = 0;
1663 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1664 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1665 	lp->d_npartitions = RAW_PART + 1;
1666 
1667 	lp->d_bbsize = BBSIZE;				/* XXX */
1668 	lp->d_sbsize = SBSIZE;				/* XXX */
1669 
1670 	lp->d_magic = DISKMAGIC;
1671 	lp->d_magic2 = DISKMAGIC;
1672 	lp->d_checksum = dkcksum(&cs->sc_label);
1673 
1674 	/*
1675 	 * Call the generic disklabel extraction routine.
1676 	 */
1677 	errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1678 	if (errstring != NULL)
1679 		ccdmakedisklabel(cs);
1680 
1681 #ifdef DEBUG
1682 	/* It's actually extremely common to have unlabeled ccds. */
1683 	if (ccddebug & CCDB_LABEL)
1684 		if (errstring != NULL)
1685 			printf("ccd%d: %s\n", unit, errstring);
1686 #endif
1687 }
1688 
1689 /*
1690  * Take care of things one might want to take care of in the event
1691  * that a disklabel isn't present.
1692  */
1693 static void
1694 ccdmakedisklabel(cs)
1695 	struct ccd_softc *cs;
1696 {
1697 	struct disklabel *lp = &cs->sc_label;
1698 
1699 	/*
1700 	 * For historical reasons, if there's no disklabel present
1701 	 * the raw partition must be marked FS_BSDFFS.
1702 	 */
1703 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1704 
1705 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1706 }
1707 
1708 /*
1709  * Wait interruptibly for an exclusive lock.
1710  *
1711  * XXX
1712  * Several drivers do this; it should be abstracted and made MP-safe.
1713  */
1714 static int
1715 ccdlock(cs)
1716 	struct ccd_softc *cs;
1717 {
1718 	int error;
1719 
1720 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1721 		cs->sc_flags |= CCDF_WANTED;
1722 		if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1723 			return (error);
1724 	}
1725 	cs->sc_flags |= CCDF_LOCKED;
1726 	return (0);
1727 }
1728 
1729 /*
1730  * Unlock and wake up any waiters.
1731  */
1732 static void
1733 ccdunlock(cs)
1734 	struct ccd_softc *cs;
1735 {
1736 
1737 	cs->sc_flags &= ~CCDF_LOCKED;
1738 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1739 		cs->sc_flags &= ~CCDF_WANTED;
1740 		wakeup(cs);
1741 	}
1742 }
1743 
1744 #ifdef DEBUG
1745 static void
1746 printiinfo(ii)
1747 	struct ccdiinfo *ii;
1748 {
1749 	int ix, i;
1750 
1751 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1752 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1753 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1754 		for (i = 0; i < ii->ii_ndisk; i++)
1755 			printf(" %d", ii->ii_index[i]);
1756 		printf("\n");
1757 	}
1758 }
1759 #endif
1760 
1761 #endif /* NCCD > 0 */
1762 
1763 /* Local Variables: */
1764 /* c-argdecl-indent: 8 */
1765 /* c-continued-statement-offset: 8 */
1766 /* c-indent-level: 8 */
1767 /* End: */
1768