xref: /freebsd/sys/geom/geom_ccd.c (revision 23f282aa31e9b6fceacd449020e936e98d6f2298)
1 /* $FreeBSD$ */
2 
3 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
4 
5 /*
6  * Copyright (c) 1995 Jason R. Thorpe.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project
20  *	by Jason R. Thorpe.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * Copyright (c) 1988 University of Utah.
39  * Copyright (c) 1990, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * This code is derived from software contributed to Berkeley by
43  * the Systems Programming Group of the University of Utah Computer
44  * Science Department.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. All advertising materials mentioning features or use of this software
55  *    must display the following acknowledgement:
56  *	This product includes software developed by the University of
57  *	California, Berkeley and its contributors.
58  * 4. Neither the name of the University nor the names of its contributors
59  *    may be used to endorse or promote products derived from this software
60  *    without specific prior written permission.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  *
74  * from: Utah $Hdr: cd.c 1.6 90/11/28$
75  *
76  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
77  */
78 
79 /*
80  * "Concatenated" disk driver.
81  *
82  * Dynamic configuration and disklabel support by:
83  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
84  *	Numerical Aerodynamic Simulation Facility
85  *	Mail Stop 258-6
86  *	NASA Ames Research Center
87  *	Moffett Field, CA 94035
88  */
89 
90 #include "ccd.h"
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/module.h>
96 #include <sys/proc.h>
97 #include <sys/buf.h>
98 #include <sys/malloc.h>
99 #include <sys/namei.h>
100 #include <sys/conf.h>
101 #include <sys/stat.h>
102 #include <sys/sysctl.h>
103 #include <sys/disklabel.h>
104 #include <ufs/ffs/fs.h>
105 #include <sys/devicestat.h>
106 #include <sys/fcntl.h>
107 #include <sys/vnode.h>
108 
109 #include <sys/ccdvar.h>
110 
111 
112 #if defined(CCDDEBUG) && !defined(DEBUG)
113 #define DEBUG
114 #endif
115 
116 #ifdef DEBUG
117 #define CCDB_FOLLOW	0x01
118 #define CCDB_INIT	0x02
119 #define CCDB_IO		0x04
120 #define CCDB_LABEL	0x08
121 #define CCDB_VNODE	0x10
122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
123     CCDB_VNODE;
124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
125 #undef DEBUG
126 #endif
127 
128 #define	ccdunit(x)	dkunit(x)
129 #define ccdpart(x)	dkpart(x)
130 
131 /*
132    This is how mirroring works (only writes are special):
133 
134    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
135    linked together by the cb_mirror field.  "cb_pflags &
136    CCDPF_MIRROR_DONE" is set to 0 on both of them.
137 
138    When a component returns to ccdiodone(), it checks if "cb_pflags &
139    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
140    flag and returns.  If it is, it means its partner has already
141    returned, so it will go to the regular cleanup.
142 
143  */
144 
145 struct ccdbuf {
146 	struct bio	cb_buf;		/* new I/O buf */
147 	struct bio	*cb_obp;	/* ptr. to original I/O buf */
148 	struct ccdbuf	*cb_freenext;	/* free list link */
149 	int		cb_unit;	/* target unit */
150 	int		cb_comp;	/* target component */
151 	int		cb_pflags;	/* mirror/parity status flag */
152 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
153 };
154 
155 /* bits in cb_pflags */
156 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
157 
158 #define CCDLABELDEV(dev)	\
159 	(makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
160 
161 static d_open_t ccdopen;
162 static d_close_t ccdclose;
163 static d_strategy_t ccdstrategy;
164 static d_ioctl_t ccdioctl;
165 static d_dump_t ccddump;
166 static d_psize_t ccdsize;
167 
168 #define NCCDFREEHIWAT	16
169 
170 #define CDEV_MAJOR 74
171 #define BDEV_MAJOR 21
172 
173 static struct cdevsw ccd_cdevsw = {
174 	/* open */	ccdopen,
175 	/* close */	ccdclose,
176 	/* read */	physread,
177 	/* write */	physwrite,
178 	/* ioctl */	ccdioctl,
179 	/* poll */	nopoll,
180 	/* mmap */	nommap,
181 	/* strategy */	ccdstrategy,
182 	/* name */	"ccd",
183 	/* maj */	CDEV_MAJOR,
184 	/* dump */	ccddump,
185 	/* psize */	ccdsize,
186 	/* flags */	D_DISK,
187 	/* bmaj */	BDEV_MAJOR
188 };
189 
190 /* called during module initialization */
191 static	void ccdattach __P((void));
192 static	int ccd_modevent __P((module_t, int, void *));
193 
194 /* called by biodone() at interrupt time */
195 static	void ccdiodone __P((struct bio *bp));
196 
197 static	void ccdstart __P((struct ccd_softc *, struct bio *));
198 static	void ccdinterleave __P((struct ccd_softc *, int));
199 static	void ccdintr __P((struct ccd_softc *, struct bio *));
200 static	int ccdinit __P((struct ccddevice *, char **, struct proc *));
201 static	int ccdlookup __P((char *, struct proc *p, struct vnode **));
202 static	void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
203 		struct bio *, daddr_t, caddr_t, long));
204 static	void ccdgetdisklabel __P((dev_t));
205 static	void ccdmakedisklabel __P((struct ccd_softc *));
206 static	int ccdlock __P((struct ccd_softc *));
207 static	void ccdunlock __P((struct ccd_softc *));
208 
209 #ifdef DEBUG
210 static	void printiinfo __P((struct ccdiinfo *));
211 #endif
212 
213 /* Non-private for the benefit of libkvm. */
214 struct	ccd_softc *ccd_softc;
215 struct	ccddevice *ccddevs;
216 struct	ccdbuf *ccdfreebufs;
217 static	int numccdfreebufs;
218 static	int numccd = 0;
219 
220 /*
221  * getccdbuf() -	Allocate and zero a ccd buffer.
222  *
223  *	This routine is called at splbio().
224  */
225 
226 static __inline
227 struct ccdbuf *
228 getccdbuf(struct ccdbuf *cpy)
229 {
230 	struct ccdbuf *cbp;
231 
232 	/*
233 	 * Allocate from freelist or malloc as necessary
234 	 */
235 	if ((cbp = ccdfreebufs) != NULL) {
236 		ccdfreebufs = cbp->cb_freenext;
237 		--numccdfreebufs;
238 	} else {
239 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
240 	}
241 
242 	/*
243 	 * Used by mirroring code
244 	 */
245 	if (cpy)
246 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
247 	else
248 		bzero(cbp, sizeof(struct ccdbuf));
249 
250 	/*
251 	 * independant struct bio initialization
252 	 */
253 
254 	return(cbp);
255 }
256 
257 /*
258  * putccdbuf() -	Free a ccd buffer.
259  *
260  *	This routine is called at splbio().
261  */
262 
263 static __inline
264 void
265 putccdbuf(struct ccdbuf *cbp)
266 {
267 
268 	if (numccdfreebufs < NCCDFREEHIWAT) {
269 		cbp->cb_freenext = ccdfreebufs;
270 		ccdfreebufs = cbp;
271 		++numccdfreebufs;
272 	} else {
273 		free((caddr_t)cbp, M_DEVBUF);
274 	}
275 }
276 
277 
278 /*
279  * Number of blocks to untouched in front of a component partition.
280  * This is to avoid violating its disklabel area when it starts at the
281  * beginning of the slice.
282  */
283 #if !defined(CCD_OFFSET)
284 #define CCD_OFFSET 16
285 #endif
286 
287 /*
288  * Called by main() during pseudo-device attachment.  All we need
289  * to do is allocate enough space for devices to be configured later, and
290  * add devsw entries.
291  */
292 static void
293 ccdattach()
294 {
295 	int i;
296 	int num = NCCD;
297 
298 	if (num > 1)
299 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
300 	else
301 		printf("ccd0: Concatenated disk driver\n");
302 
303 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
304 	    M_DEVBUF, M_NOWAIT);
305 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
306 	    M_DEVBUF, M_NOWAIT);
307 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
308 		printf("WARNING: no memory for concatenated disks\n");
309 		if (ccd_softc != NULL)
310 			free(ccd_softc, M_DEVBUF);
311 		if (ccddevs != NULL)
312 			free(ccddevs, M_DEVBUF);
313 		return;
314 	}
315 	numccd = num;
316 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
317 	bzero(ccddevs, num * sizeof(struct ccddevice));
318 
319 	cdevsw_add(&ccd_cdevsw);
320 	/* XXX: is this necessary? */
321 	for (i = 0; i < numccd; ++i)
322 		ccddevs[i].ccd_dk = -1;
323 }
324 
325 static int
326 ccd_modevent(mod, type, data)
327 	module_t mod;
328 	int type;
329 	void *data;
330 {
331 	int error = 0;
332 
333 	switch (type) {
334 	case MOD_LOAD:
335 		ccdattach();
336 		break;
337 
338 	case MOD_UNLOAD:
339 		printf("ccd0: Unload not supported!\n");
340 		error = EOPNOTSUPP;
341 		break;
342 
343 	default:	/* MOD_SHUTDOWN etc */
344 		break;
345 	}
346 	return (error);
347 }
348 
349 DEV_MODULE(ccd, ccd_modevent, NULL);
350 
351 static int
352 ccdinit(ccd, cpaths, p)
353 	struct ccddevice *ccd;
354 	char **cpaths;
355 	struct proc *p;
356 {
357 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
358 	struct ccdcinfo *ci = NULL;	/* XXX */
359 	size_t size;
360 	int ix;
361 	struct vnode *vp;
362 	size_t minsize;
363 	int maxsecsize;
364 	struct partinfo dpart;
365 	struct ccdgeom *ccg = &cs->sc_geom;
366 	char tmppath[MAXPATHLEN];
367 	int error = 0;
368 
369 #ifdef DEBUG
370 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
371 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
372 #endif
373 
374 	cs->sc_size = 0;
375 	cs->sc_ileave = ccd->ccd_interleave;
376 	cs->sc_nccdisks = ccd->ccd_ndev;
377 
378 	/* Allocate space for the component info. */
379 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
380 	    M_DEVBUF, M_WAITOK);
381 
382 	/*
383 	 * Verify that each component piece exists and record
384 	 * relevant information about it.
385 	 */
386 	maxsecsize = 0;
387 	minsize = 0;
388 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
389 		vp = ccd->ccd_vpp[ix];
390 		ci = &cs->sc_cinfo[ix];
391 		ci->ci_vp = vp;
392 
393 		/*
394 		 * Copy in the pathname of the component.
395 		 */
396 		bzero(tmppath, sizeof(tmppath));	/* sanity */
397 		if ((error = copyinstr(cpaths[ix], tmppath,
398 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
399 #ifdef DEBUG
400 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
401 				printf("ccd%d: can't copy path, error = %d\n",
402 				    ccd->ccd_unit, error);
403 #endif
404 			goto fail;
405 		}
406 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
407 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
408 
409 		ci->ci_dev = vn_todev(vp);
410 
411 		/*
412 		 * Get partition information for the component.
413 		 */
414 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
415 		    FREAD, p->p_ucred, p)) != 0) {
416 #ifdef DEBUG
417 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
418 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
419 				     ccd->ccd_unit, ci->ci_path, error);
420 #endif
421 			goto fail;
422 		}
423 		if (dpart.part->p_fstype == FS_BSDFFS) {
424 			maxsecsize =
425 			    ((dpart.disklab->d_secsize > maxsecsize) ?
426 			    dpart.disklab->d_secsize : maxsecsize);
427 			size = dpart.part->p_size - CCD_OFFSET;
428 		} else {
429 #ifdef DEBUG
430 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 				printf("ccd%d: %s: incorrect partition type\n",
432 				    ccd->ccd_unit, ci->ci_path);
433 #endif
434 			error = EFTYPE;
435 			goto fail;
436 		}
437 
438 		/*
439 		 * Calculate the size, truncating to an interleave
440 		 * boundary if necessary.
441 		 */
442 
443 		if (cs->sc_ileave > 1)
444 			size -= size % cs->sc_ileave;
445 
446 		if (size == 0) {
447 #ifdef DEBUG
448 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
449 				printf("ccd%d: %s: size == 0\n",
450 				    ccd->ccd_unit, ci->ci_path);
451 #endif
452 			error = ENODEV;
453 			goto fail;
454 		}
455 
456 		if (minsize == 0 || size < minsize)
457 			minsize = size;
458 		ci->ci_size = size;
459 		cs->sc_size += size;
460 	}
461 
462 	/*
463 	 * Don't allow the interleave to be smaller than
464 	 * the biggest component sector.
465 	 */
466 	if ((cs->sc_ileave > 0) &&
467 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
468 #ifdef DEBUG
469 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
470 			printf("ccd%d: interleave must be at least %d\n",
471 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
472 #endif
473 		error = EINVAL;
474 		goto fail;
475 	}
476 
477 	/*
478 	 * If uniform interleave is desired set all sizes to that of
479 	 * the smallest component.  This will guarentee that a single
480 	 * interleave table is generated.
481 	 *
482 	 * Lost space must be taken into account when calculating the
483 	 * overall size.  Half the space is lost when CCDF_MIRROR is
484 	 * specified.  One disk is lost when CCDF_PARITY is specified.
485 	 */
486 	if (ccd->ccd_flags & CCDF_UNIFORM) {
487 		for (ci = cs->sc_cinfo;
488 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
489 			ci->ci_size = minsize;
490 		}
491 		if (ccd->ccd_flags & CCDF_MIRROR) {
492 			/*
493 			 * Check to see if an even number of components
494 			 * have been specified.  The interleave must also
495 			 * be non-zero in order for us to be able to
496 			 * guarentee the topology.
497 			 */
498 			if (cs->sc_nccdisks % 2) {
499 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
500 				error = EINVAL;
501 				goto fail;
502 			}
503 			if (cs->sc_ileave == 0) {
504 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
505 				error = EINVAL;
506 				goto fail;
507 			}
508 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
509 		} else if (ccd->ccd_flags & CCDF_PARITY) {
510 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
511 		} else {
512 			if (cs->sc_ileave == 0) {
513 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
514 				error = EINVAL;
515 				goto fail;
516 			}
517 			cs->sc_size = cs->sc_nccdisks * minsize;
518 		}
519 	}
520 
521 	/*
522 	 * Construct the interleave table.
523 	 */
524 	ccdinterleave(cs, ccd->ccd_unit);
525 
526 	/*
527 	 * Create pseudo-geometry based on 1MB cylinders.  It's
528 	 * pretty close.
529 	 */
530 	ccg->ccg_secsize = maxsecsize;
531 	ccg->ccg_ntracks = 1;
532 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
533 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
534 
535 	/*
536 	 * Add an devstat entry for this device.
537 	 */
538 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
539 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
540 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
541 			  DEVSTAT_PRIORITY_ARRAY);
542 
543 	cs->sc_flags |= CCDF_INITED;
544 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
545 	cs->sc_unit = ccd->ccd_unit;
546 	return (0);
547 fail:
548 	while (ci > cs->sc_cinfo) {
549 		ci--;
550 		free(ci->ci_path, M_DEVBUF);
551 	}
552 	free(cs->sc_cinfo, M_DEVBUF);
553 	return (error);
554 }
555 
556 static void
557 ccdinterleave(cs, unit)
558 	struct ccd_softc *cs;
559 	int unit;
560 {
561 	struct ccdcinfo *ci, *smallci;
562 	struct ccdiinfo *ii;
563 	daddr_t bn, lbn;
564 	int ix;
565 	u_long size;
566 
567 #ifdef DEBUG
568 	if (ccddebug & CCDB_INIT)
569 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
570 #endif
571 
572 	/*
573 	 * Allocate an interleave table.  The worst case occurs when each
574 	 * of N disks is of a different size, resulting in N interleave
575 	 * tables.
576 	 *
577 	 * Chances are this is too big, but we don't care.
578 	 */
579 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
580 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
581 	bzero((caddr_t)cs->sc_itable, size);
582 
583 	/*
584 	 * Trivial case: no interleave (actually interleave of disk size).
585 	 * Each table entry represents a single component in its entirety.
586 	 *
587 	 * An interleave of 0 may not be used with a mirror or parity setup.
588 	 */
589 	if (cs->sc_ileave == 0) {
590 		bn = 0;
591 		ii = cs->sc_itable;
592 
593 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
594 			/* Allocate space for ii_index. */
595 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
596 			ii->ii_ndisk = 1;
597 			ii->ii_startblk = bn;
598 			ii->ii_startoff = 0;
599 			ii->ii_index[0] = ix;
600 			bn += cs->sc_cinfo[ix].ci_size;
601 			ii++;
602 		}
603 		ii->ii_ndisk = 0;
604 #ifdef DEBUG
605 		if (ccddebug & CCDB_INIT)
606 			printiinfo(cs->sc_itable);
607 #endif
608 		return;
609 	}
610 
611 	/*
612 	 * The following isn't fast or pretty; it doesn't have to be.
613 	 */
614 	size = 0;
615 	bn = lbn = 0;
616 	for (ii = cs->sc_itable; ; ii++) {
617 		/*
618 		 * Allocate space for ii_index.  We might allocate more then
619 		 * we use.
620 		 */
621 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
622 		    M_DEVBUF, M_WAITOK);
623 
624 		/*
625 		 * Locate the smallest of the remaining components
626 		 */
627 		smallci = NULL;
628 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
629 		    ci++) {
630 			if (ci->ci_size > size &&
631 			    (smallci == NULL ||
632 			     ci->ci_size < smallci->ci_size)) {
633 				smallci = ci;
634 			}
635 		}
636 
637 		/*
638 		 * Nobody left, all done
639 		 */
640 		if (smallci == NULL) {
641 			ii->ii_ndisk = 0;
642 			break;
643 		}
644 
645 		/*
646 		 * Record starting logical block using an sc_ileave blocksize.
647 		 */
648 		ii->ii_startblk = bn / cs->sc_ileave;
649 
650 		/*
651 		 * Record starting comopnent block using an sc_ileave
652 		 * blocksize.  This value is relative to the beginning of
653 		 * a component disk.
654 		 */
655 		ii->ii_startoff = lbn;
656 
657 		/*
658 		 * Determine how many disks take part in this interleave
659 		 * and record their indices.
660 		 */
661 		ix = 0;
662 		for (ci = cs->sc_cinfo;
663 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
664 			if (ci->ci_size >= smallci->ci_size) {
665 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
666 			}
667 		}
668 		ii->ii_ndisk = ix;
669 		bn += ix * (smallci->ci_size - size);
670 		lbn = smallci->ci_size / cs->sc_ileave;
671 		size = smallci->ci_size;
672 	}
673 #ifdef DEBUG
674 	if (ccddebug & CCDB_INIT)
675 		printiinfo(cs->sc_itable);
676 #endif
677 }
678 
679 /* ARGSUSED */
680 static int
681 ccdopen(dev, flags, fmt, p)
682 	dev_t dev;
683 	int flags, fmt;
684 	struct proc *p;
685 {
686 	int unit = ccdunit(dev);
687 	struct ccd_softc *cs;
688 	struct disklabel *lp;
689 	int error = 0, part, pmask;
690 
691 #ifdef DEBUG
692 	if (ccddebug & CCDB_FOLLOW)
693 		printf("ccdopen(%x, %x)\n", dev, flags);
694 #endif
695 	if (unit >= numccd)
696 		return (ENXIO);
697 	cs = &ccd_softc[unit];
698 
699 	if ((error = ccdlock(cs)) != 0)
700 		return (error);
701 
702 	lp = &cs->sc_label;
703 
704 	part = ccdpart(dev);
705 	pmask = (1 << part);
706 
707 	/*
708 	 * If we're initialized, check to see if there are any other
709 	 * open partitions.  If not, then it's safe to update
710 	 * the in-core disklabel.
711 	 */
712 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
713 		ccdgetdisklabel(dev);
714 
715 	/* Check that the partition exists. */
716 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
717 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
718 		error = ENXIO;
719 		goto done;
720 	}
721 
722 	cs->sc_openmask |= pmask;
723  done:
724 	ccdunlock(cs);
725 	return (0);
726 }
727 
728 /* ARGSUSED */
729 static int
730 ccdclose(dev, flags, fmt, p)
731 	dev_t dev;
732 	int flags, fmt;
733 	struct proc *p;
734 {
735 	int unit = ccdunit(dev);
736 	struct ccd_softc *cs;
737 	int error = 0, part;
738 
739 #ifdef DEBUG
740 	if (ccddebug & CCDB_FOLLOW)
741 		printf("ccdclose(%x, %x)\n", dev, flags);
742 #endif
743 
744 	if (unit >= numccd)
745 		return (ENXIO);
746 	cs = &ccd_softc[unit];
747 
748 	if ((error = ccdlock(cs)) != 0)
749 		return (error);
750 
751 	part = ccdpart(dev);
752 
753 	/* ...that much closer to allowing unconfiguration... */
754 	cs->sc_openmask &= ~(1 << part);
755 	ccdunlock(cs);
756 	return (0);
757 }
758 
759 static void
760 ccdstrategy(bp)
761 	struct bio *bp;
762 {
763 	int unit = ccdunit(bp->bio_dev);
764 	struct ccd_softc *cs = &ccd_softc[unit];
765 	int s;
766 	int wlabel;
767 	struct disklabel *lp;
768 
769 #ifdef DEBUG
770 	if (ccddebug & CCDB_FOLLOW)
771 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
772 #endif
773 	if ((cs->sc_flags & CCDF_INITED) == 0) {
774 		bp->bio_error = ENXIO;
775 		bp->bio_flags |= BIO_ERROR;
776 		goto done;
777 	}
778 
779 	/* If it's a nil transfer, wake up the top half now. */
780 	if (bp->bio_bcount == 0)
781 		goto done;
782 
783 	lp = &cs->sc_label;
784 
785 	/*
786 	 * Do bounds checking and adjust transfer.  If there's an
787 	 * error, the bounds check will flag that for us.
788 	 */
789 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
790 	if (ccdpart(bp->bio_dev) != RAW_PART) {
791 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
792 			goto done;
793 	} else {
794 		int pbn;        /* in sc_secsize chunks */
795 		long sz;        /* in sc_secsize chunks */
796 
797 		pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
798 		sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize);
799 
800 		/*
801 		 * If out of bounds return an error. If at the EOF point,
802 		 * simply read or write less.
803 		 */
804 
805 		if (pbn < 0 || pbn >= cs->sc_size) {
806 			bp->bio_resid = bp->bio_bcount;
807 			if (pbn != cs->sc_size) {
808 				bp->bio_error = EINVAL;
809 				bp->bio_flags |= BIO_ERROR;
810 			}
811 			goto done;
812 		}
813 
814 		/*
815 		 * If the request crosses EOF, truncate the request.
816 		 */
817 		if (pbn + sz > cs->sc_size) {
818 			bp->bio_bcount = (cs->sc_size - pbn) *
819 			    cs->sc_geom.ccg_secsize;
820 		}
821 	}
822 
823 	bp->bio_resid = bp->bio_bcount;
824 
825 	/*
826 	 * "Start" the unit.
827 	 */
828 	s = splbio();
829 	ccdstart(cs, bp);
830 	splx(s);
831 	return;
832 done:
833 	biodone(bp);
834 }
835 
836 static void
837 ccdstart(cs, bp)
838 	struct ccd_softc *cs;
839 	struct bio *bp;
840 {
841 	long bcount, rcount;
842 	struct ccdbuf *cbp[4];
843 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
844 	caddr_t addr;
845 	daddr_t bn;
846 	struct partition *pp;
847 
848 #ifdef DEBUG
849 	if (ccddebug & CCDB_FOLLOW)
850 		printf("ccdstart(%x, %x)\n", cs, bp);
851 #endif
852 
853 	/* Record the transaction start  */
854 	devstat_start_transaction(&cs->device_stats);
855 
856 	/*
857 	 * Translate the partition-relative block number to an absolute.
858 	 */
859 	bn = bp->bio_blkno;
860 	if (ccdpart(bp->bio_dev) != RAW_PART) {
861 		pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)];
862 		bn += pp->p_offset;
863 	}
864 
865 	/*
866 	 * Allocate component buffers and fire off the requests
867 	 */
868 	addr = bp->bio_data;
869 	for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) {
870 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
871 		rcount = cbp[0]->cb_buf.bio_bcount;
872 
873 		if (cs->sc_cflags & CCDF_MIRROR) {
874 			/*
875 			 * Mirroring.  Writes go to both disks, reads are
876 			 * taken from whichever disk seems most appropriate.
877 			 *
878 			 * We attempt to localize reads to the disk whos arm
879 			 * is nearest the read request.  We ignore seeks due
880 			 * to writes when making this determination and we
881 			 * also try to avoid hogging.
882 			 */
883 			if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) {
884 				BIO_STRATEGY(&cbp[0]->cb_buf, 0);
885 				BIO_STRATEGY(&cbp[1]->cb_buf, 0);
886 			} else {
887 				int pick = cs->sc_pick;
888 				daddr_t range = cs->sc_size / 16;
889 
890 				if (bn < cs->sc_blk[pick] - range ||
891 				    bn > cs->sc_blk[pick] + range
892 				) {
893 					cs->sc_pick = pick = 1 - pick;
894 				}
895 				cs->sc_blk[pick] = bn + btodb(rcount);
896 				BIO_STRATEGY(&cbp[pick]->cb_buf, 0);
897 			}
898 		} else {
899 			/*
900 			 * Not mirroring
901 			 */
902 			BIO_STRATEGY(&cbp[0]->cb_buf, 0);
903 		}
904 		bn += btodb(rcount);
905 		addr += rcount;
906 	}
907 }
908 
909 /*
910  * Build a component buffer header.
911  */
912 static void
913 ccdbuffer(cb, cs, bp, bn, addr, bcount)
914 	struct ccdbuf **cb;
915 	struct ccd_softc *cs;
916 	struct bio *bp;
917 	daddr_t bn;
918 	caddr_t addr;
919 	long bcount;
920 {
921 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
922 	struct ccdbuf *cbp;
923 	daddr_t cbn, cboff;
924 	off_t cbc;
925 
926 #ifdef DEBUG
927 	if (ccddebug & CCDB_IO)
928 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
929 		       cs, bp, bn, addr, bcount);
930 #endif
931 	/*
932 	 * Determine which component bn falls in.
933 	 */
934 	cbn = bn;
935 	cboff = 0;
936 
937 	if (cs->sc_ileave == 0) {
938 		/*
939 		 * Serially concatenated and neither a mirror nor a parity
940 		 * config.  This is a special case.
941 		 */
942 		daddr_t sblk;
943 
944 		sblk = 0;
945 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
946 			sblk += ci->ci_size;
947 		cbn -= sblk;
948 	} else {
949 		struct ccdiinfo *ii;
950 		int ccdisk, off;
951 
952 		/*
953 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
954 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
955 		 * to cbn.
956 		 */
957 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
958 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
959 
960 		/*
961 		 * Figure out which interleave table to use.
962 		 */
963 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
964 			if (ii->ii_startblk > cbn)
965 				break;
966 		}
967 		ii--;
968 
969 		/*
970 		 * off is the logical superblock relative to the beginning
971 		 * of this interleave block.
972 		 */
973 		off = cbn - ii->ii_startblk;
974 
975 		/*
976 		 * We must calculate which disk component to use (ccdisk),
977 		 * and recalculate cbn to be the superblock relative to
978 		 * the beginning of the component.  This is typically done by
979 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
980 		 * must typically be divided by the number of components in
981 		 * this interleave array to be properly convert it from a
982 		 * CCD-relative logical superblock number to a
983 		 * component-relative superblock number.
984 		 */
985 		if (ii->ii_ndisk == 1) {
986 			/*
987 			 * When we have just one disk, it can't be a mirror
988 			 * or a parity config.
989 			 */
990 			ccdisk = ii->ii_index[0];
991 			cbn = ii->ii_startoff + off;
992 		} else {
993 			if (cs->sc_cflags & CCDF_MIRROR) {
994 				/*
995 				 * We have forced a uniform mapping, resulting
996 				 * in a single interleave array.  We double
997 				 * up on the first half of the available
998 				 * components and our mirror is in the second
999 				 * half.  This only works with a single
1000 				 * interleave array because doubling up
1001 				 * doubles the number of sectors, so there
1002 				 * cannot be another interleave array because
1003 				 * the next interleave array's calculations
1004 				 * would be off.
1005 				 */
1006 				int ndisk2 = ii->ii_ndisk / 2;
1007 				ccdisk = ii->ii_index[off % ndisk2];
1008 				cbn = ii->ii_startoff + off / ndisk2;
1009 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1010 			} else if (cs->sc_cflags & CCDF_PARITY) {
1011 				/*
1012 				 * XXX not implemented yet
1013 				 */
1014 				int ndisk2 = ii->ii_ndisk - 1;
1015 				ccdisk = ii->ii_index[off % ndisk2];
1016 				cbn = ii->ii_startoff + off / ndisk2;
1017 				if (cbn % ii->ii_ndisk <= ccdisk)
1018 					ccdisk++;
1019 			} else {
1020 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1021 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1022 			}
1023 		}
1024 
1025 		ci = &cs->sc_cinfo[ccdisk];
1026 
1027 		/*
1028 		 * Convert cbn from a superblock to a normal block so it
1029 		 * can be used to calculate (along with cboff) the normal
1030 		 * block index into this particular disk.
1031 		 */
1032 		cbn *= cs->sc_ileave;
1033 	}
1034 
1035 	/*
1036 	 * Fill in the component buf structure.
1037 	 */
1038 	cbp = getccdbuf(NULL);
1039 	cbp->cb_buf.bio_cmd = bp->bio_cmd;
1040 	cbp->cb_buf.bio_done = ccdiodone;
1041 	cbp->cb_buf.bio_dev = ci->ci_dev;		/* XXX */
1042 	cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET;
1043 	cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1044 	cbp->cb_buf.bio_data = addr;
1045 	if (cs->sc_ileave == 0)
1046               cbc = dbtob((off_t)(ci->ci_size - cbn));
1047 	else
1048               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1049 	cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount;
1050  	cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount;
1051 
1052 	/*
1053 	 * context for ccdiodone
1054 	 */
1055 	cbp->cb_obp = bp;
1056 	cbp->cb_unit = cs - ccd_softc;
1057 	cbp->cb_comp = ci - cs->sc_cinfo;
1058 
1059 #ifdef DEBUG
1060 	if (ccddebug & CCDB_IO)
1061 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1062 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno,
1063 		       cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount);
1064 #endif
1065 	cb[0] = cbp;
1066 
1067 	/*
1068 	 * Note: both I/O's setup when reading from mirror, but only one
1069 	 * will be executed.
1070 	 */
1071 	if (cs->sc_cflags & CCDF_MIRROR) {
1072 		/* mirror, setup second I/O */
1073 		cbp = getccdbuf(cb[0]);
1074 		cbp->cb_buf.bio_dev = ci2->ci_dev;
1075 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1076 		cb[1] = cbp;
1077 		/* link together the ccdbuf's and clear "mirror done" flag */
1078 		cb[0]->cb_mirror = cb[1];
1079 		cb[1]->cb_mirror = cb[0];
1080 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1081 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1082 	}
1083 }
1084 
1085 static void
1086 ccdintr(cs, bp)
1087 	struct ccd_softc *cs;
1088 	struct bio *bp;
1089 {
1090 #ifdef DEBUG
1091 	if (ccddebug & CCDB_FOLLOW)
1092 		printf("ccdintr(%x, %x)\n", cs, bp);
1093 #endif
1094 	/*
1095 	 * Request is done for better or worse, wakeup the top half.
1096 	 */
1097 	if (bp->bio_flags & BIO_ERROR)
1098 		bp->bio_resid = bp->bio_bcount;
1099 	devstat_end_transaction_bio(&cs->device_stats, bp);
1100 	biodone(bp);
1101 }
1102 
1103 /*
1104  * Called at interrupt time.
1105  * Mark the component as done and if all components are done,
1106  * take a ccd interrupt.
1107  */
1108 static void
1109 ccdiodone(ibp)
1110 	struct bio *ibp;
1111 {
1112 	struct ccdbuf *cbp = (struct ccdbuf *)ibp;
1113 	struct bio *bp = cbp->cb_obp;
1114 	int unit = cbp->cb_unit;
1115 	int count, s;
1116 
1117 	s = splbio();
1118 #ifdef DEBUG
1119 	if (ccddebug & CCDB_FOLLOW)
1120 		printf("ccdiodone(%x)\n", cbp);
1121 	if (ccddebug & CCDB_IO) {
1122 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1123 		       bp, bp->bio_bcount, bp->bio_resid);
1124 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1125 		       cbp->cb_buf.bio_dev, cbp->cb_comp, cbp,
1126 		       cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1127 		       cbp->cb_buf.bio_bcount);
1128 	}
1129 #endif
1130 	/*
1131 	 * If an error occured, report it.  If this is a mirrored
1132 	 * configuration and the first of two possible reads, do not
1133 	 * set the error in the bp yet because the second read may
1134 	 * succeed.
1135 	 */
1136 
1137 	if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1138 		const char *msg = "";
1139 
1140 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1141 		    (cbp->cb_buf.bio_cmd == BIO_READ) &&
1142 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1143 			/*
1144 			 * We will try our read on the other disk down
1145 			 * below, also reverse the default pick so if we
1146 			 * are doing a scan we do not keep hitting the
1147 			 * bad disk first.
1148 			 */
1149 			struct ccd_softc *cs = &ccd_softc[unit];
1150 
1151 			msg = ", trying other disk";
1152 			cs->sc_pick = 1 - cs->sc_pick;
1153 			cs->sc_blk[cs->sc_pick] = bp->bio_blkno;
1154 		} else {
1155 			bp->bio_flags |= BIO_ERROR;
1156 			bp->bio_error = cbp->cb_buf.bio_error ?
1157 			    cbp->cb_buf.bio_error : EIO;
1158 		}
1159 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1160 		       unit, bp->bio_error, cbp->cb_comp,
1161 		       (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg);
1162 	}
1163 
1164 	/*
1165 	 * Process mirror.  If we are writing, I/O has been initiated on both
1166 	 * buffers and we fall through only after both are finished.
1167 	 *
1168 	 * If we are reading only one I/O is initiated at a time.  If an
1169 	 * error occurs we initiate the second I/O and return, otherwise
1170 	 * we free the second I/O without initiating it.
1171 	 */
1172 
1173 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1174 		if (cbp->cb_buf.bio_cmd == BIO_WRITE) {
1175 			/*
1176 			 * When writing, handshake with the second buffer
1177 			 * to determine when both are done.  If both are not
1178 			 * done, return here.
1179 			 */
1180 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1181 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1182 				putccdbuf(cbp);
1183 				splx(s);
1184 				return;
1185 			}
1186 		} else {
1187 			/*
1188 			 * When reading, either dispose of the second buffer
1189 			 * or initiate I/O on the second buffer if an error
1190 			 * occured with this one.
1191 			 */
1192 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1193 				if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1194 					cbp->cb_mirror->cb_pflags |=
1195 					    CCDPF_MIRROR_DONE;
1196 					BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0);
1197 					putccdbuf(cbp);
1198 					splx(s);
1199 					return;
1200 				} else {
1201 					putccdbuf(cbp->cb_mirror);
1202 					/* fall through */
1203 				}
1204 			}
1205 		}
1206 	}
1207 
1208 	/*
1209 	 * use bio_caller1 to determine how big the original request was rather
1210 	 * then bio_bcount, because bio_bcount may have been truncated for EOF.
1211 	 *
1212 	 * XXX We check for an error, but we do not test the resid for an
1213 	 * aligned EOF condition.  This may result in character & block
1214 	 * device access not recognizing EOF properly when read or written
1215 	 * sequentially, but will not effect filesystems.
1216 	 */
1217 	count = (long)cbp->cb_buf.bio_caller1;
1218 	putccdbuf(cbp);
1219 
1220 	/*
1221 	 * If all done, "interrupt".
1222 	 */
1223 	bp->bio_resid -= count;
1224 	if (bp->bio_resid < 0)
1225 		panic("ccdiodone: count");
1226 	if (bp->bio_resid == 0)
1227 		ccdintr(&ccd_softc[unit], bp);
1228 	splx(s);
1229 }
1230 
1231 static int
1232 ccdioctl(dev, cmd, data, flag, p)
1233 	dev_t dev;
1234 	u_long cmd;
1235 	caddr_t data;
1236 	int flag;
1237 	struct proc *p;
1238 {
1239 	int unit = ccdunit(dev);
1240 	int i, j, lookedup = 0, error = 0;
1241 	int part, pmask, s;
1242 	struct ccd_softc *cs;
1243 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1244 	struct ccddevice ccd;
1245 	char **cpp;
1246 	struct vnode **vpp;
1247 
1248 	if (unit >= numccd)
1249 		return (ENXIO);
1250 	cs = &ccd_softc[unit];
1251 
1252 	bzero(&ccd, sizeof(ccd));
1253 
1254 	switch (cmd) {
1255 	case CCDIOCSET:
1256 		if (cs->sc_flags & CCDF_INITED)
1257 			return (EBUSY);
1258 
1259 		if ((flag & FWRITE) == 0)
1260 			return (EBADF);
1261 
1262 		if ((error = ccdlock(cs)) != 0)
1263 			return (error);
1264 
1265 		/* Fill in some important bits. */
1266 		ccd.ccd_unit = unit;
1267 		ccd.ccd_interleave = ccio->ccio_ileave;
1268 		if (ccd.ccd_interleave == 0 &&
1269 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1270 		     (ccio->ccio_flags & CCDF_PARITY))) {
1271 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1272 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1273 		}
1274 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1275 		    (ccio->ccio_flags & CCDF_PARITY)) {
1276 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1277 			ccio->ccio_flags &= ~CCDF_PARITY;
1278 		}
1279 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1280 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1281 			printf("ccd%d: mirror/parity forces uniform flag\n",
1282 			       unit);
1283 			ccio->ccio_flags |= CCDF_UNIFORM;
1284 		}
1285 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1286 
1287 		/*
1288 		 * Allocate space for and copy in the array of
1289 		 * componet pathnames and device numbers.
1290 		 */
1291 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1292 		    M_DEVBUF, M_WAITOK);
1293 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1294 		    M_DEVBUF, M_WAITOK);
1295 
1296 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1297 		    ccio->ccio_ndisks * sizeof(char **));
1298 		if (error) {
1299 			free(vpp, M_DEVBUF);
1300 			free(cpp, M_DEVBUF);
1301 			ccdunlock(cs);
1302 			return (error);
1303 		}
1304 
1305 #ifdef DEBUG
1306 		if (ccddebug & CCDB_INIT)
1307 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1308 				printf("ccdioctl: component %d: 0x%x\n",
1309 				    i, cpp[i]);
1310 #endif
1311 
1312 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1313 #ifdef DEBUG
1314 			if (ccddebug & CCDB_INIT)
1315 				printf("ccdioctl: lookedup = %d\n", lookedup);
1316 #endif
1317 			if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1318 				for (j = 0; j < lookedup; ++j)
1319 					(void)vn_close(vpp[j], FREAD|FWRITE,
1320 					    p->p_ucred, p);
1321 				free(vpp, M_DEVBUF);
1322 				free(cpp, M_DEVBUF);
1323 				ccdunlock(cs);
1324 				return (error);
1325 			}
1326 			++lookedup;
1327 		}
1328 		ccd.ccd_cpp = cpp;
1329 		ccd.ccd_vpp = vpp;
1330 		ccd.ccd_ndev = ccio->ccio_ndisks;
1331 
1332 		/*
1333 		 * Initialize the ccd.  Fills in the softc for us.
1334 		 */
1335 		if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1336 			for (j = 0; j < lookedup; ++j)
1337 				(void)vn_close(vpp[j], FREAD|FWRITE,
1338 				    p->p_ucred, p);
1339 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1340 			free(vpp, M_DEVBUF);
1341 			free(cpp, M_DEVBUF);
1342 			ccdunlock(cs);
1343 			return (error);
1344 		}
1345 
1346 		/*
1347 		 * The ccd has been successfully initialized, so
1348 		 * we can place it into the array and read the disklabel.
1349 		 */
1350 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1351 		ccio->ccio_unit = unit;
1352 		ccio->ccio_size = cs->sc_size;
1353 		ccdgetdisklabel(dev);
1354 
1355 		ccdunlock(cs);
1356 
1357 		break;
1358 
1359 	case CCDIOCCLR:
1360 		if ((cs->sc_flags & CCDF_INITED) == 0)
1361 			return (ENXIO);
1362 
1363 		if ((flag & FWRITE) == 0)
1364 			return (EBADF);
1365 
1366 		if ((error = ccdlock(cs)) != 0)
1367 			return (error);
1368 
1369 		/* Don't unconfigure if any other partitions are open */
1370 		part = ccdpart(dev);
1371 		pmask = (1 << part);
1372 		if ((cs->sc_openmask & ~pmask)) {
1373 			ccdunlock(cs);
1374 			return (EBUSY);
1375 		}
1376 
1377 		/*
1378 		 * Free ccd_softc information and clear entry.
1379 		 */
1380 
1381 		/* Close the components and free their pathnames. */
1382 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1383 			/*
1384 			 * XXX: this close could potentially fail and
1385 			 * cause Bad Things.  Maybe we need to force
1386 			 * the close to happen?
1387 			 */
1388 #ifdef DEBUG
1389 			if (ccddebug & CCDB_VNODE)
1390 				vprint("CCDIOCCLR: vnode info",
1391 				    cs->sc_cinfo[i].ci_vp);
1392 #endif
1393 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1394 			    p->p_ucred, p);
1395 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1396 		}
1397 
1398 		/* Free interleave index. */
1399 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1400 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1401 
1402 		/* Free component info and interleave table. */
1403 		free(cs->sc_cinfo, M_DEVBUF);
1404 		free(cs->sc_itable, M_DEVBUF);
1405 		cs->sc_flags &= ~CCDF_INITED;
1406 
1407 		/*
1408 		 * Free ccddevice information and clear entry.
1409 		 */
1410 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1411 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1412 		ccd.ccd_dk = -1;
1413 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1414 
1415 		/*
1416 		 * And remove the devstat entry.
1417 		 */
1418 		devstat_remove_entry(&cs->device_stats);
1419 
1420 		/* This must be atomic. */
1421 		s = splhigh();
1422 		ccdunlock(cs);
1423 		bzero(cs, sizeof(struct ccd_softc));
1424 		splx(s);
1425 
1426 		break;
1427 
1428 	case DIOCGDINFO:
1429 		if ((cs->sc_flags & CCDF_INITED) == 0)
1430 			return (ENXIO);
1431 
1432 		*(struct disklabel *)data = cs->sc_label;
1433 		break;
1434 
1435 	case DIOCGPART:
1436 		if ((cs->sc_flags & CCDF_INITED) == 0)
1437 			return (ENXIO);
1438 
1439 		((struct partinfo *)data)->disklab = &cs->sc_label;
1440 		((struct partinfo *)data)->part =
1441 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1442 		break;
1443 
1444 	case DIOCWDINFO:
1445 	case DIOCSDINFO:
1446 		if ((cs->sc_flags & CCDF_INITED) == 0)
1447 			return (ENXIO);
1448 
1449 		if ((flag & FWRITE) == 0)
1450 			return (EBADF);
1451 
1452 		if ((error = ccdlock(cs)) != 0)
1453 			return (error);
1454 
1455 		cs->sc_flags |= CCDF_LABELLING;
1456 
1457 		error = setdisklabel(&cs->sc_label,
1458 		    (struct disklabel *)data, 0);
1459 		if (error == 0) {
1460 			if (cmd == DIOCWDINFO)
1461 				error = writedisklabel(CCDLABELDEV(dev),
1462 				    &cs->sc_label);
1463 		}
1464 
1465 		cs->sc_flags &= ~CCDF_LABELLING;
1466 
1467 		ccdunlock(cs);
1468 
1469 		if (error)
1470 			return (error);
1471 		break;
1472 
1473 	case DIOCWLABEL:
1474 		if ((cs->sc_flags & CCDF_INITED) == 0)
1475 			return (ENXIO);
1476 
1477 		if ((flag & FWRITE) == 0)
1478 			return (EBADF);
1479 		if (*(int *)data != 0)
1480 			cs->sc_flags |= CCDF_WLABEL;
1481 		else
1482 			cs->sc_flags &= ~CCDF_WLABEL;
1483 		break;
1484 
1485 	default:
1486 		return (ENOTTY);
1487 	}
1488 
1489 	return (0);
1490 }
1491 
1492 static int
1493 ccdsize(dev)
1494 	dev_t dev;
1495 {
1496 	struct ccd_softc *cs;
1497 	int part, size;
1498 
1499 	if (ccdopen(dev, 0, S_IFCHR, curproc))
1500 		return (-1);
1501 
1502 	cs = &ccd_softc[ccdunit(dev)];
1503 	part = ccdpart(dev);
1504 
1505 	if ((cs->sc_flags & CCDF_INITED) == 0)
1506 		return (-1);
1507 
1508 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1509 		size = -1;
1510 	else
1511 		size = cs->sc_label.d_partitions[part].p_size;
1512 
1513 	if (ccdclose(dev, 0, S_IFCHR, curproc))
1514 		return (-1);
1515 
1516 	return (size);
1517 }
1518 
1519 static int
1520 ccddump(dev)
1521 	dev_t dev;
1522 {
1523 
1524 	/* Not implemented. */
1525 	return ENXIO;
1526 }
1527 
1528 /*
1529  * Lookup the provided name in the filesystem.  If the file exists,
1530  * is a valid block device, and isn't being used by anyone else,
1531  * set *vpp to the file's vnode.
1532  */
1533 static int
1534 ccdlookup(path, p, vpp)
1535 	char *path;
1536 	struct proc *p;
1537 	struct vnode **vpp;	/* result */
1538 {
1539 	struct nameidata nd;
1540 	struct vnode *vp;
1541 	int error;
1542 
1543 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1544 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1545 #ifdef DEBUG
1546 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1547 			printf("ccdlookup: vn_open error = %d\n", error);
1548 #endif
1549 		return (error);
1550 	}
1551 	vp = nd.ni_vp;
1552 
1553 	if (vp->v_usecount > 1) {
1554 		error = EBUSY;
1555 		goto bad;
1556 	}
1557 
1558 	if (!vn_isdisk(vp, &error))
1559 		goto bad;
1560 
1561 #ifdef DEBUG
1562 	if (ccddebug & CCDB_VNODE)
1563 		vprint("ccdlookup: vnode info", vp);
1564 #endif
1565 
1566 	VOP_UNLOCK(vp, 0, p);
1567 	NDFREE(&nd, NDF_ONLY_PNBUF);
1568 	*vpp = vp;
1569 	return (0);
1570 bad:
1571 	VOP_UNLOCK(vp, 0, p);
1572 	NDFREE(&nd, NDF_ONLY_PNBUF);
1573 	/* vn_close does vrele() for vp */
1574 	(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1575 	return (error);
1576 }
1577 
1578 /*
1579  * Read the disklabel from the ccd.  If one is not present, fake one
1580  * up.
1581  */
1582 static void
1583 ccdgetdisklabel(dev)
1584 	dev_t dev;
1585 {
1586 	int unit = ccdunit(dev);
1587 	struct ccd_softc *cs = &ccd_softc[unit];
1588 	char *errstring;
1589 	struct disklabel *lp = &cs->sc_label;
1590 	struct ccdgeom *ccg = &cs->sc_geom;
1591 
1592 	bzero(lp, sizeof(*lp));
1593 
1594 	lp->d_secperunit = cs->sc_size;
1595 	lp->d_secsize = ccg->ccg_secsize;
1596 	lp->d_nsectors = ccg->ccg_nsectors;
1597 	lp->d_ntracks = ccg->ccg_ntracks;
1598 	lp->d_ncylinders = ccg->ccg_ncylinders;
1599 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1600 
1601 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1602 	lp->d_type = DTYPE_CCD;
1603 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1604 	lp->d_rpm = 3600;
1605 	lp->d_interleave = 1;
1606 	lp->d_flags = 0;
1607 
1608 	lp->d_partitions[RAW_PART].p_offset = 0;
1609 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1610 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1611 	lp->d_npartitions = RAW_PART + 1;
1612 
1613 	lp->d_bbsize = BBSIZE;				/* XXX */
1614 	lp->d_sbsize = SBSIZE;				/* XXX */
1615 
1616 	lp->d_magic = DISKMAGIC;
1617 	lp->d_magic2 = DISKMAGIC;
1618 	lp->d_checksum = dkcksum(&cs->sc_label);
1619 
1620 	/*
1621 	 * Call the generic disklabel extraction routine.
1622 	 */
1623 	errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1624 	if (errstring != NULL)
1625 		ccdmakedisklabel(cs);
1626 
1627 #ifdef DEBUG
1628 	/* It's actually extremely common to have unlabeled ccds. */
1629 	if (ccddebug & CCDB_LABEL)
1630 		if (errstring != NULL)
1631 			printf("ccd%d: %s\n", unit, errstring);
1632 #endif
1633 }
1634 
1635 /*
1636  * Take care of things one might want to take care of in the event
1637  * that a disklabel isn't present.
1638  */
1639 static void
1640 ccdmakedisklabel(cs)
1641 	struct ccd_softc *cs;
1642 {
1643 	struct disklabel *lp = &cs->sc_label;
1644 
1645 	/*
1646 	 * For historical reasons, if there's no disklabel present
1647 	 * the raw partition must be marked FS_BSDFFS.
1648 	 */
1649 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1650 
1651 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1652 }
1653 
1654 /*
1655  * Wait interruptibly for an exclusive lock.
1656  *
1657  * XXX
1658  * Several drivers do this; it should be abstracted and made MP-safe.
1659  */
1660 static int
1661 ccdlock(cs)
1662 	struct ccd_softc *cs;
1663 {
1664 	int error;
1665 
1666 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1667 		cs->sc_flags |= CCDF_WANTED;
1668 		if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1669 			return (error);
1670 	}
1671 	cs->sc_flags |= CCDF_LOCKED;
1672 	return (0);
1673 }
1674 
1675 /*
1676  * Unlock and wake up any waiters.
1677  */
1678 static void
1679 ccdunlock(cs)
1680 	struct ccd_softc *cs;
1681 {
1682 
1683 	cs->sc_flags &= ~CCDF_LOCKED;
1684 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1685 		cs->sc_flags &= ~CCDF_WANTED;
1686 		wakeup(cs);
1687 	}
1688 }
1689 
1690 #ifdef DEBUG
1691 static void
1692 printiinfo(ii)
1693 	struct ccdiinfo *ii;
1694 {
1695 	int ix, i;
1696 
1697 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1698 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1699 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1700 		for (i = 0; i < ii->ii_ndisk; i++)
1701 			printf(" %d", ii->ii_index[i]);
1702 		printf("\n");
1703 	}
1704 }
1705 #endif
1706 
1707 
1708 /* Local Variables: */
1709 /* c-argdecl-indent: 8 */
1710 /* c-continued-statement-offset: 8 */
1711 /* c-indent-level: 8 */
1712 /* End: */
1713