1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include "ccd.h" 91 #if NCCD > 0 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/namei.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <ufs/ffs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 110 #include <sys/ccdvar.h> 111 112 #include <vm/vm_zone.h> 113 114 #if defined(CCDDEBUG) && !defined(DEBUG) 115 #define DEBUG 116 #endif 117 118 #ifdef DEBUG 119 #define CCDB_FOLLOW 0x01 120 #define CCDB_INIT 0x02 121 #define CCDB_IO 0x04 122 #define CCDB_LABEL 0x08 123 #define CCDB_VNODE 0x10 124 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 125 CCDB_VNODE; 126 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 127 #undef DEBUG 128 #endif 129 130 #define ccdunit(x) dkunit(x) 131 #define ccdpart(x) dkpart(x) 132 133 /* 134 This is how mirroring works (only writes are special): 135 136 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 137 linked together by the cb_mirror field. "cb_pflags & 138 CCDPF_MIRROR_DONE" is set to 0 on both of them. 139 140 When a component returns to ccdiodone(), it checks if "cb_pflags & 141 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 142 flag and returns. If it is, it means its partner has already 143 returned, so it will go to the regular cleanup. 144 145 */ 146 147 struct ccdbuf { 148 struct buf cb_buf; /* new I/O buf */ 149 struct buf *cb_obp; /* ptr. to original I/O buf */ 150 struct ccdbuf *cb_freenext; /* free list link */ 151 int cb_unit; /* target unit */ 152 int cb_comp; /* target component */ 153 int cb_pflags; /* mirror/parity status flag */ 154 struct ccdbuf *cb_mirror; /* mirror counterpart */ 155 }; 156 157 /* bits in cb_pflags */ 158 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 159 160 #define CCDLABELDEV(dev) \ 161 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_dump_t ccddump; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 #define BDEV_MAJOR 21 174 175 static struct cdevsw ccd_cdevsw = { 176 /* open */ ccdopen, 177 /* close */ ccdclose, 178 /* read */ physread, 179 /* write */ physwrite, 180 /* ioctl */ ccdioctl, 181 /* poll */ nopoll, 182 /* mmap */ nommap, 183 /* strategy */ ccdstrategy, 184 /* name */ "ccd", 185 /* maj */ CDEV_MAJOR, 186 /* dump */ ccddump, 187 /* psize */ ccdsize, 188 /* flags */ D_DISK, 189 /* bmaj */ BDEV_MAJOR 190 }; 191 192 /* called during module initialization */ 193 static void ccdattach __P((void)); 194 static int ccd_modevent __P((module_t, int, void *)); 195 196 /* called by biodone() at interrupt time */ 197 static void ccdiodone __P((struct ccdbuf *cbp)); 198 199 static void ccdstart __P((struct ccd_softc *, struct buf *)); 200 static void ccdinterleave __P((struct ccd_softc *, int)); 201 static void ccdintr __P((struct ccd_softc *, struct buf *)); 202 static int ccdinit __P((struct ccddevice *, char **, struct proc *)); 203 static int ccdlookup __P((char *, struct proc *p, struct vnode **)); 204 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 205 struct buf *, daddr_t, caddr_t, long)); 206 static void ccdgetdisklabel __P((dev_t)); 207 static void ccdmakedisklabel __P((struct ccd_softc *)); 208 static int ccdlock __P((struct ccd_softc *)); 209 static void ccdunlock __P((struct ccd_softc *)); 210 211 #ifdef DEBUG 212 static void printiinfo __P((struct ccdiinfo *)); 213 #endif 214 215 /* Non-private for the benefit of libkvm. */ 216 struct ccd_softc *ccd_softc; 217 struct ccddevice *ccddevs; 218 struct ccdbuf *ccdfreebufs; 219 static int numccdfreebufs; 220 static int numccd = 0; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct buf initialization 254 */ 255 LIST_INIT(&cbp->cb_buf.b_dep); 256 BUF_LOCKINIT(&cbp->cb_buf); 257 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 258 BUF_KERNPROC(&cbp->cb_buf); 259 260 return(cbp); 261 } 262 263 /* 264 * putccdbuf() - Free a ccd buffer. 265 * 266 * This routine is called at splbio(). 267 */ 268 269 static __inline 270 void 271 putccdbuf(struct ccdbuf *cbp) 272 { 273 BUF_UNLOCK(&cbp->cb_buf); 274 BUF_LOCKFREE(&cbp->cb_buf); 275 276 if (numccdfreebufs < NCCDFREEHIWAT) { 277 cbp->cb_freenext = ccdfreebufs; 278 ccdfreebufs = cbp; 279 ++numccdfreebufs; 280 } else { 281 free((caddr_t)cbp, M_DEVBUF); 282 } 283 } 284 285 286 /* 287 * Number of blocks to untouched in front of a component partition. 288 * This is to avoid violating its disklabel area when it starts at the 289 * beginning of the slice. 290 */ 291 #if !defined(CCD_OFFSET) 292 #define CCD_OFFSET 16 293 #endif 294 295 /* 296 * Called by main() during pseudo-device attachment. All we need 297 * to do is allocate enough space for devices to be configured later, and 298 * add devsw entries. 299 */ 300 static void 301 ccdattach() 302 { 303 int i; 304 int num = NCCD; 305 306 if (num > 1) 307 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 308 else 309 printf("ccd0: Concatenated disk driver\n"); 310 311 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 312 M_DEVBUF, M_NOWAIT); 313 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 314 M_DEVBUF, M_NOWAIT); 315 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 316 printf("WARNING: no memory for concatenated disks\n"); 317 if (ccd_softc != NULL) 318 free(ccd_softc, M_DEVBUF); 319 if (ccddevs != NULL) 320 free(ccddevs, M_DEVBUF); 321 return; 322 } 323 numccd = num; 324 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 325 bzero(ccddevs, num * sizeof(struct ccddevice)); 326 327 cdevsw_add(&ccd_cdevsw); 328 /* XXX: is this necessary? */ 329 for (i = 0; i < numccd; ++i) 330 ccddevs[i].ccd_dk = -1; 331 } 332 333 static int 334 ccd_modevent(mod, type, data) 335 module_t mod; 336 int type; 337 void *data; 338 { 339 int error = 0; 340 341 switch (type) { 342 case MOD_LOAD: 343 ccdattach(); 344 break; 345 346 case MOD_UNLOAD: 347 printf("ccd0: Unload not supported!\n"); 348 error = EOPNOTSUPP; 349 break; 350 351 default: /* MOD_SHUTDOWN etc */ 352 break; 353 } 354 return (error); 355 } 356 357 DEV_MODULE(ccd, ccd_modevent, NULL); 358 359 static int 360 ccdinit(ccd, cpaths, p) 361 struct ccddevice *ccd; 362 char **cpaths; 363 struct proc *p; 364 { 365 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 366 struct ccdcinfo *ci = NULL; /* XXX */ 367 size_t size; 368 int ix; 369 struct vnode *vp; 370 size_t minsize; 371 int maxsecsize; 372 struct partinfo dpart; 373 struct ccdgeom *ccg = &cs->sc_geom; 374 char tmppath[MAXPATHLEN]; 375 int error = 0; 376 377 #ifdef DEBUG 378 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 379 printf("ccdinit: unit %d\n", ccd->ccd_unit); 380 #endif 381 382 cs->sc_size = 0; 383 cs->sc_ileave = ccd->ccd_interleave; 384 cs->sc_nccdisks = ccd->ccd_ndev; 385 386 /* Allocate space for the component info. */ 387 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 388 M_DEVBUF, M_WAITOK); 389 390 /* 391 * Verify that each component piece exists and record 392 * relevant information about it. 393 */ 394 maxsecsize = 0; 395 minsize = 0; 396 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 397 vp = ccd->ccd_vpp[ix]; 398 ci = &cs->sc_cinfo[ix]; 399 ci->ci_vp = vp; 400 401 /* 402 * Copy in the pathname of the component. 403 */ 404 bzero(tmppath, sizeof(tmppath)); /* sanity */ 405 if ((error = copyinstr(cpaths[ix], tmppath, 406 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 407 #ifdef DEBUG 408 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 409 printf("ccd%d: can't copy path, error = %d\n", 410 ccd->ccd_unit, error); 411 #endif 412 goto fail; 413 } 414 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 415 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 416 417 ci->ci_dev = vn_todev(vp); 418 419 /* 420 * Get partition information for the component. 421 */ 422 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 423 FREAD, p->p_ucred, p)) != 0) { 424 #ifdef DEBUG 425 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 426 printf("ccd%d: %s: ioctl failed, error = %d\n", 427 ccd->ccd_unit, ci->ci_path, error); 428 #endif 429 goto fail; 430 } 431 if (dpart.part->p_fstype == FS_BSDFFS) { 432 maxsecsize = 433 ((dpart.disklab->d_secsize > maxsecsize) ? 434 dpart.disklab->d_secsize : maxsecsize); 435 size = dpart.part->p_size - CCD_OFFSET; 436 } else { 437 #ifdef DEBUG 438 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 439 printf("ccd%d: %s: incorrect partition type\n", 440 ccd->ccd_unit, ci->ci_path); 441 #endif 442 error = EFTYPE; 443 goto fail; 444 } 445 446 /* 447 * Calculate the size, truncating to an interleave 448 * boundary if necessary. 449 */ 450 451 if (cs->sc_ileave > 1) 452 size -= size % cs->sc_ileave; 453 454 if (size == 0) { 455 #ifdef DEBUG 456 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 457 printf("ccd%d: %s: size == 0\n", 458 ccd->ccd_unit, ci->ci_path); 459 #endif 460 error = ENODEV; 461 goto fail; 462 } 463 464 if (minsize == 0 || size < minsize) 465 minsize = size; 466 ci->ci_size = size; 467 cs->sc_size += size; 468 } 469 470 /* 471 * Don't allow the interleave to be smaller than 472 * the biggest component sector. 473 */ 474 if ((cs->sc_ileave > 0) && 475 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 476 #ifdef DEBUG 477 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 478 printf("ccd%d: interleave must be at least %d\n", 479 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 480 #endif 481 error = EINVAL; 482 goto fail; 483 } 484 485 /* 486 * If uniform interleave is desired set all sizes to that of 487 * the smallest component. This will guarentee that a single 488 * interleave table is generated. 489 * 490 * Lost space must be taken into account when calculating the 491 * overall size. Half the space is lost when CCDF_MIRROR is 492 * specified. One disk is lost when CCDF_PARITY is specified. 493 */ 494 if (ccd->ccd_flags & CCDF_UNIFORM) { 495 for (ci = cs->sc_cinfo; 496 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 497 ci->ci_size = minsize; 498 } 499 if (ccd->ccd_flags & CCDF_MIRROR) { 500 /* 501 * Check to see if an even number of components 502 * have been specified. The interleave must also 503 * be non-zero in order for us to be able to 504 * guarentee the topology. 505 */ 506 if (cs->sc_nccdisks % 2) { 507 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 508 error = EINVAL; 509 goto fail; 510 } 511 if (cs->sc_ileave == 0) { 512 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 513 error = EINVAL; 514 goto fail; 515 } 516 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 517 } else if (ccd->ccd_flags & CCDF_PARITY) { 518 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 519 } else { 520 if (cs->sc_ileave == 0) { 521 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 522 error = EINVAL; 523 goto fail; 524 } 525 cs->sc_size = cs->sc_nccdisks * minsize; 526 } 527 } 528 529 /* 530 * Construct the interleave table. 531 */ 532 ccdinterleave(cs, ccd->ccd_unit); 533 534 /* 535 * Create pseudo-geometry based on 1MB cylinders. It's 536 * pretty close. 537 */ 538 ccg->ccg_secsize = maxsecsize; 539 ccg->ccg_ntracks = 1; 540 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 541 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 542 543 /* 544 * Add an devstat entry for this device. 545 */ 546 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 547 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 548 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 549 DEVSTAT_PRIORITY_ARRAY); 550 551 cs->sc_flags |= CCDF_INITED; 552 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 553 cs->sc_unit = ccd->ccd_unit; 554 return (0); 555 fail: 556 while (ci > cs->sc_cinfo) { 557 ci--; 558 free(ci->ci_path, M_DEVBUF); 559 } 560 free(cs->sc_cinfo, M_DEVBUF); 561 return (error); 562 } 563 564 static void 565 ccdinterleave(cs, unit) 566 struct ccd_softc *cs; 567 int unit; 568 { 569 struct ccdcinfo *ci, *smallci; 570 struct ccdiinfo *ii; 571 daddr_t bn, lbn; 572 int ix; 573 u_long size; 574 575 #ifdef DEBUG 576 if (ccddebug & CCDB_INIT) 577 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 578 #endif 579 580 /* 581 * Allocate an interleave table. The worst case occurs when each 582 * of N disks is of a different size, resulting in N interleave 583 * tables. 584 * 585 * Chances are this is too big, but we don't care. 586 */ 587 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 588 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 589 bzero((caddr_t)cs->sc_itable, size); 590 591 /* 592 * Trivial case: no interleave (actually interleave of disk size). 593 * Each table entry represents a single component in its entirety. 594 * 595 * An interleave of 0 may not be used with a mirror or parity setup. 596 */ 597 if (cs->sc_ileave == 0) { 598 bn = 0; 599 ii = cs->sc_itable; 600 601 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 602 /* Allocate space for ii_index. */ 603 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 604 ii->ii_ndisk = 1; 605 ii->ii_startblk = bn; 606 ii->ii_startoff = 0; 607 ii->ii_index[0] = ix; 608 bn += cs->sc_cinfo[ix].ci_size; 609 ii++; 610 } 611 ii->ii_ndisk = 0; 612 #ifdef DEBUG 613 if (ccddebug & CCDB_INIT) 614 printiinfo(cs->sc_itable); 615 #endif 616 return; 617 } 618 619 /* 620 * The following isn't fast or pretty; it doesn't have to be. 621 */ 622 size = 0; 623 bn = lbn = 0; 624 for (ii = cs->sc_itable; ; ii++) { 625 /* 626 * Allocate space for ii_index. We might allocate more then 627 * we use. 628 */ 629 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 630 M_DEVBUF, M_WAITOK); 631 632 /* 633 * Locate the smallest of the remaining components 634 */ 635 smallci = NULL; 636 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 637 ci++) { 638 if (ci->ci_size > size && 639 (smallci == NULL || 640 ci->ci_size < smallci->ci_size)) { 641 smallci = ci; 642 } 643 } 644 645 /* 646 * Nobody left, all done 647 */ 648 if (smallci == NULL) { 649 ii->ii_ndisk = 0; 650 break; 651 } 652 653 /* 654 * Record starting logical block using an sc_ileave blocksize. 655 */ 656 ii->ii_startblk = bn / cs->sc_ileave; 657 658 /* 659 * Record starting comopnent block using an sc_ileave 660 * blocksize. This value is relative to the beginning of 661 * a component disk. 662 */ 663 ii->ii_startoff = lbn; 664 665 /* 666 * Determine how many disks take part in this interleave 667 * and record their indices. 668 */ 669 ix = 0; 670 for (ci = cs->sc_cinfo; 671 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 672 if (ci->ci_size >= smallci->ci_size) { 673 ii->ii_index[ix++] = ci - cs->sc_cinfo; 674 } 675 } 676 ii->ii_ndisk = ix; 677 bn += ix * (smallci->ci_size - size); 678 lbn = smallci->ci_size / cs->sc_ileave; 679 size = smallci->ci_size; 680 } 681 #ifdef DEBUG 682 if (ccddebug & CCDB_INIT) 683 printiinfo(cs->sc_itable); 684 #endif 685 } 686 687 /* ARGSUSED */ 688 static int 689 ccdopen(dev, flags, fmt, p) 690 dev_t dev; 691 int flags, fmt; 692 struct proc *p; 693 { 694 int unit = ccdunit(dev); 695 struct ccd_softc *cs; 696 struct disklabel *lp; 697 int error = 0, part, pmask; 698 699 #ifdef DEBUG 700 if (ccddebug & CCDB_FOLLOW) 701 printf("ccdopen(%x, %x)\n", dev, flags); 702 #endif 703 if (unit >= numccd) 704 return (ENXIO); 705 cs = &ccd_softc[unit]; 706 707 if ((error = ccdlock(cs)) != 0) 708 return (error); 709 710 lp = &cs->sc_label; 711 712 part = ccdpart(dev); 713 pmask = (1 << part); 714 715 /* 716 * If we're initialized, check to see if there are any other 717 * open partitions. If not, then it's safe to update 718 * the in-core disklabel. 719 */ 720 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 721 ccdgetdisklabel(dev); 722 723 /* Check that the partition exists. */ 724 if (part != RAW_PART && ((part >= lp->d_npartitions) || 725 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 726 error = ENXIO; 727 goto done; 728 } 729 730 /* Prevent our unit from being unconfigured while open. */ 731 switch (fmt) { 732 case S_IFCHR: 733 cs->sc_copenmask |= pmask; 734 break; 735 736 case S_IFBLK: 737 cs->sc_bopenmask |= pmask; 738 break; 739 } 740 cs->sc_openmask = 741 cs->sc_copenmask | cs->sc_bopenmask; 742 743 done: 744 ccdunlock(cs); 745 return (0); 746 } 747 748 /* ARGSUSED */ 749 static int 750 ccdclose(dev, flags, fmt, p) 751 dev_t dev; 752 int flags, fmt; 753 struct proc *p; 754 { 755 int unit = ccdunit(dev); 756 struct ccd_softc *cs; 757 int error = 0, part; 758 759 #ifdef DEBUG 760 if (ccddebug & CCDB_FOLLOW) 761 printf("ccdclose(%x, %x)\n", dev, flags); 762 #endif 763 764 if (unit >= numccd) 765 return (ENXIO); 766 cs = &ccd_softc[unit]; 767 768 if ((error = ccdlock(cs)) != 0) 769 return (error); 770 771 part = ccdpart(dev); 772 773 /* ...that much closer to allowing unconfiguration... */ 774 switch (fmt) { 775 case S_IFCHR: 776 cs->sc_copenmask &= ~(1 << part); 777 break; 778 779 case S_IFBLK: 780 cs->sc_bopenmask &= ~(1 << part); 781 break; 782 } 783 cs->sc_openmask = 784 cs->sc_copenmask | cs->sc_bopenmask; 785 786 ccdunlock(cs); 787 return (0); 788 } 789 790 static void 791 ccdstrategy(bp) 792 struct buf *bp; 793 { 794 int unit = ccdunit(bp->b_dev); 795 struct ccd_softc *cs = &ccd_softc[unit]; 796 int s; 797 int wlabel; 798 struct disklabel *lp; 799 800 #ifdef DEBUG 801 if (ccddebug & CCDB_FOLLOW) 802 printf("ccdstrategy(%x): unit %d\n", bp, unit); 803 #endif 804 if ((cs->sc_flags & CCDF_INITED) == 0) { 805 bp->b_error = ENXIO; 806 bp->b_flags |= B_ERROR; 807 goto done; 808 } 809 810 /* If it's a nil transfer, wake up the top half now. */ 811 if (bp->b_bcount == 0) 812 goto done; 813 814 lp = &cs->sc_label; 815 816 /* 817 * Do bounds checking and adjust transfer. If there's an 818 * error, the bounds check will flag that for us. 819 */ 820 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 821 if (ccdpart(bp->b_dev) != RAW_PART) { 822 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 823 goto done; 824 } else { 825 int pbn; /* in sc_secsize chunks */ 826 long sz; /* in sc_secsize chunks */ 827 828 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 829 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 830 831 /* 832 * If out of bounds return an error. If at the EOF point, 833 * simply read or write less. 834 */ 835 836 if (pbn < 0 || pbn >= cs->sc_size) { 837 bp->b_resid = bp->b_bcount; 838 if (pbn != cs->sc_size) { 839 bp->b_error = EINVAL; 840 bp->b_flags |= B_ERROR | B_INVAL; 841 } 842 goto done; 843 } 844 845 /* 846 * If the request crosses EOF, truncate the request. 847 */ 848 if (pbn + sz > cs->sc_size) { 849 bp->b_bcount = (cs->sc_size - pbn) * 850 cs->sc_geom.ccg_secsize; 851 } 852 } 853 854 bp->b_resid = bp->b_bcount; 855 856 /* 857 * "Start" the unit. 858 */ 859 s = splbio(); 860 ccdstart(cs, bp); 861 splx(s); 862 return; 863 done: 864 biodone(bp); 865 } 866 867 static void 868 ccdstart(cs, bp) 869 struct ccd_softc *cs; 870 struct buf *bp; 871 { 872 long bcount, rcount; 873 struct ccdbuf *cbp[4]; 874 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 875 caddr_t addr; 876 daddr_t bn; 877 struct partition *pp; 878 879 #ifdef DEBUG 880 if (ccddebug & CCDB_FOLLOW) 881 printf("ccdstart(%x, %x)\n", cs, bp); 882 #endif 883 884 /* Record the transaction start */ 885 devstat_start_transaction(&cs->device_stats); 886 887 /* 888 * Translate the partition-relative block number to an absolute. 889 */ 890 bn = bp->b_blkno; 891 if (ccdpart(bp->b_dev) != RAW_PART) { 892 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 893 bn += pp->p_offset; 894 } 895 896 /* 897 * Allocate component buffers and fire off the requests 898 */ 899 addr = bp->b_data; 900 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 901 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 902 rcount = cbp[0]->cb_buf.b_bcount; 903 904 if (cs->sc_cflags & CCDF_MIRROR) { 905 /* 906 * Mirroring. Writes go to both disks, reads are 907 * taken from whichever disk seems most appropriate. 908 * 909 * We attempt to localize reads to the disk whos arm 910 * is nearest the read request. We ignore seeks due 911 * to writes when making this determination and we 912 * also try to avoid hogging. 913 */ 914 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 915 cbp[0]->cb_buf.b_vp->v_numoutput++; 916 cbp[1]->cb_buf.b_vp->v_numoutput++; 917 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 918 &cbp[0]->cb_buf); 919 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 920 &cbp[1]->cb_buf); 921 } else { 922 int pick = cs->sc_pick; 923 daddr_t range = cs->sc_size / 16; 924 925 if (bn < cs->sc_blk[pick] - range || 926 bn > cs->sc_blk[pick] + range 927 ) { 928 cs->sc_pick = pick = 1 - pick; 929 } 930 cs->sc_blk[pick] = bn + btodb(rcount); 931 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 932 &cbp[pick]->cb_buf); 933 } 934 } else { 935 /* 936 * Not mirroring 937 */ 938 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 939 cbp[0]->cb_buf.b_vp->v_numoutput++; 940 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 941 } 942 bn += btodb(rcount); 943 addr += rcount; 944 } 945 } 946 947 /* 948 * Build a component buffer header. 949 */ 950 static void 951 ccdbuffer(cb, cs, bp, bn, addr, bcount) 952 struct ccdbuf **cb; 953 struct ccd_softc *cs; 954 struct buf *bp; 955 daddr_t bn; 956 caddr_t addr; 957 long bcount; 958 { 959 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 960 struct ccdbuf *cbp; 961 daddr_t cbn, cboff; 962 off_t cbc; 963 964 #ifdef DEBUG 965 if (ccddebug & CCDB_IO) 966 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 967 cs, bp, bn, addr, bcount); 968 #endif 969 /* 970 * Determine which component bn falls in. 971 */ 972 cbn = bn; 973 cboff = 0; 974 975 if (cs->sc_ileave == 0) { 976 /* 977 * Serially concatenated and neither a mirror nor a parity 978 * config. This is a special case. 979 */ 980 daddr_t sblk; 981 982 sblk = 0; 983 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 984 sblk += ci->ci_size; 985 cbn -= sblk; 986 } else { 987 struct ccdiinfo *ii; 988 int ccdisk, off; 989 990 /* 991 * Calculate cbn, the logical superblock (sc_ileave chunks), 992 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 993 * to cbn. 994 */ 995 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 996 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 997 998 /* 999 * Figure out which interleave table to use. 1000 */ 1001 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1002 if (ii->ii_startblk > cbn) 1003 break; 1004 } 1005 ii--; 1006 1007 /* 1008 * off is the logical superblock relative to the beginning 1009 * of this interleave block. 1010 */ 1011 off = cbn - ii->ii_startblk; 1012 1013 /* 1014 * We must calculate which disk component to use (ccdisk), 1015 * and recalculate cbn to be the superblock relative to 1016 * the beginning of the component. This is typically done by 1017 * adding 'off' and ii->ii_startoff together. However, 'off' 1018 * must typically be divided by the number of components in 1019 * this interleave array to be properly convert it from a 1020 * CCD-relative logical superblock number to a 1021 * component-relative superblock number. 1022 */ 1023 if (ii->ii_ndisk == 1) { 1024 /* 1025 * When we have just one disk, it can't be a mirror 1026 * or a parity config. 1027 */ 1028 ccdisk = ii->ii_index[0]; 1029 cbn = ii->ii_startoff + off; 1030 } else { 1031 if (cs->sc_cflags & CCDF_MIRROR) { 1032 /* 1033 * We have forced a uniform mapping, resulting 1034 * in a single interleave array. We double 1035 * up on the first half of the available 1036 * components and our mirror is in the second 1037 * half. This only works with a single 1038 * interleave array because doubling up 1039 * doubles the number of sectors, so there 1040 * cannot be another interleave array because 1041 * the next interleave array's calculations 1042 * would be off. 1043 */ 1044 int ndisk2 = ii->ii_ndisk / 2; 1045 ccdisk = ii->ii_index[off % ndisk2]; 1046 cbn = ii->ii_startoff + off / ndisk2; 1047 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1048 } else if (cs->sc_cflags & CCDF_PARITY) { 1049 /* 1050 * XXX not implemented yet 1051 */ 1052 int ndisk2 = ii->ii_ndisk - 1; 1053 ccdisk = ii->ii_index[off % ndisk2]; 1054 cbn = ii->ii_startoff + off / ndisk2; 1055 if (cbn % ii->ii_ndisk <= ccdisk) 1056 ccdisk++; 1057 } else { 1058 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1059 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1060 } 1061 } 1062 1063 ci = &cs->sc_cinfo[ccdisk]; 1064 1065 /* 1066 * Convert cbn from a superblock to a normal block so it 1067 * can be used to calculate (along with cboff) the normal 1068 * block index into this particular disk. 1069 */ 1070 cbn *= cs->sc_ileave; 1071 } 1072 1073 /* 1074 * Fill in the component buf structure. 1075 */ 1076 cbp = getccdbuf(NULL); 1077 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 1078 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1079 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1080 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1081 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1082 cbp->cb_buf.b_data = addr; 1083 cbp->cb_buf.b_vp = ci->ci_vp; 1084 if (cs->sc_ileave == 0) 1085 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1086 else 1087 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1088 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1089 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1090 1091 /* 1092 * context for ccdiodone 1093 */ 1094 cbp->cb_obp = bp; 1095 cbp->cb_unit = cs - ccd_softc; 1096 cbp->cb_comp = ci - cs->sc_cinfo; 1097 1098 #ifdef DEBUG 1099 if (ccddebug & CCDB_IO) 1100 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1101 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1102 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1103 #endif 1104 cb[0] = cbp; 1105 1106 /* 1107 * Note: both I/O's setup when reading from mirror, but only one 1108 * will be executed. 1109 */ 1110 if (cs->sc_cflags & CCDF_MIRROR) { 1111 /* mirror, setup second I/O */ 1112 cbp = getccdbuf(cb[0]); 1113 cbp->cb_buf.b_dev = ci2->ci_dev; 1114 cbp->cb_buf.b_vp = ci2->ci_vp; 1115 cbp->cb_comp = ci2 - cs->sc_cinfo; 1116 cb[1] = cbp; 1117 /* link together the ccdbuf's and clear "mirror done" flag */ 1118 cb[0]->cb_mirror = cb[1]; 1119 cb[1]->cb_mirror = cb[0]; 1120 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1121 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1122 } 1123 } 1124 1125 static void 1126 ccdintr(cs, bp) 1127 struct ccd_softc *cs; 1128 struct buf *bp; 1129 { 1130 #ifdef DEBUG 1131 if (ccddebug & CCDB_FOLLOW) 1132 printf("ccdintr(%x, %x)\n", cs, bp); 1133 #endif 1134 /* 1135 * Request is done for better or worse, wakeup the top half. 1136 */ 1137 if (bp->b_flags & B_ERROR) 1138 bp->b_resid = bp->b_bcount; 1139 devstat_end_transaction_buf(&cs->device_stats, bp); 1140 biodone(bp); 1141 } 1142 1143 /* 1144 * Called at interrupt time. 1145 * Mark the component as done and if all components are done, 1146 * take a ccd interrupt. 1147 */ 1148 static void 1149 ccdiodone(cbp) 1150 struct ccdbuf *cbp; 1151 { 1152 struct buf *bp = cbp->cb_obp; 1153 int unit = cbp->cb_unit; 1154 int count, s; 1155 1156 s = splbio(); 1157 #ifdef DEBUG 1158 if (ccddebug & CCDB_FOLLOW) 1159 printf("ccdiodone(%x)\n", cbp); 1160 if (ccddebug & CCDB_IO) { 1161 printf("ccdiodone: bp %x bcount %d resid %d\n", 1162 bp, bp->b_bcount, bp->b_resid); 1163 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1164 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1165 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1166 cbp->cb_buf.b_bcount); 1167 } 1168 #endif 1169 /* 1170 * If an error occured, report it. If this is a mirrored 1171 * configuration and the first of two possible reads, do not 1172 * set the error in the bp yet because the second read may 1173 * succeed. 1174 */ 1175 1176 if (cbp->cb_buf.b_flags & B_ERROR) { 1177 const char *msg = ""; 1178 1179 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1180 (cbp->cb_buf.b_flags & B_READ) && 1181 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1182 /* 1183 * We will try our read on the other disk down 1184 * below, also reverse the default pick so if we 1185 * are doing a scan we do not keep hitting the 1186 * bad disk first. 1187 */ 1188 struct ccd_softc *cs = &ccd_softc[unit]; 1189 1190 msg = ", trying other disk"; 1191 cs->sc_pick = 1 - cs->sc_pick; 1192 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1193 } else { 1194 bp->b_flags |= B_ERROR; 1195 bp->b_error = cbp->cb_buf.b_error ? 1196 cbp->cb_buf.b_error : EIO; 1197 } 1198 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1199 unit, bp->b_error, cbp->cb_comp, 1200 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1201 } 1202 1203 /* 1204 * Process mirror. If we are writing, I/O has been initiated on both 1205 * buffers and we fall through only after both are finished. 1206 * 1207 * If we are reading only one I/O is initiated at a time. If an 1208 * error occurs we initiate the second I/O and return, otherwise 1209 * we free the second I/O without initiating it. 1210 */ 1211 1212 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1213 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1214 /* 1215 * When writing, handshake with the second buffer 1216 * to determine when both are done. If both are not 1217 * done, return here. 1218 */ 1219 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1220 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1221 putccdbuf(cbp); 1222 splx(s); 1223 return; 1224 } 1225 } else { 1226 /* 1227 * When reading, either dispose of the second buffer 1228 * or initiate I/O on the second buffer if an error 1229 * occured with this one. 1230 */ 1231 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1232 if (cbp->cb_buf.b_flags & B_ERROR) { 1233 cbp->cb_mirror->cb_pflags |= 1234 CCDPF_MIRROR_DONE; 1235 VOP_STRATEGY( 1236 cbp->cb_mirror->cb_buf.b_vp, 1237 &cbp->cb_mirror->cb_buf 1238 ); 1239 putccdbuf(cbp); 1240 splx(s); 1241 return; 1242 } else { 1243 putccdbuf(cbp->cb_mirror); 1244 /* fall through */ 1245 } 1246 } 1247 } 1248 } 1249 1250 /* 1251 * use b_bufsize to determine how big the original request was rather 1252 * then b_bcount, because b_bcount may have been truncated for EOF. 1253 * 1254 * XXX We check for an error, but we do not test the resid for an 1255 * aligned EOF condition. This may result in character & block 1256 * device access not recognizing EOF properly when read or written 1257 * sequentially, but will not effect filesystems. 1258 */ 1259 count = cbp->cb_buf.b_bufsize; 1260 putccdbuf(cbp); 1261 1262 /* 1263 * If all done, "interrupt". 1264 */ 1265 bp->b_resid -= count; 1266 if (bp->b_resid < 0) 1267 panic("ccdiodone: count"); 1268 if (bp->b_resid == 0) 1269 ccdintr(&ccd_softc[unit], bp); 1270 splx(s); 1271 } 1272 1273 static int 1274 ccdioctl(dev, cmd, data, flag, p) 1275 dev_t dev; 1276 u_long cmd; 1277 caddr_t data; 1278 int flag; 1279 struct proc *p; 1280 { 1281 int unit = ccdunit(dev); 1282 int i, j, lookedup = 0, error = 0; 1283 int part, pmask, s; 1284 struct ccd_softc *cs; 1285 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1286 struct ccddevice ccd; 1287 char **cpp; 1288 struct vnode **vpp; 1289 1290 if (unit >= numccd) 1291 return (ENXIO); 1292 cs = &ccd_softc[unit]; 1293 1294 bzero(&ccd, sizeof(ccd)); 1295 1296 switch (cmd) { 1297 case CCDIOCSET: 1298 if (cs->sc_flags & CCDF_INITED) 1299 return (EBUSY); 1300 1301 if ((flag & FWRITE) == 0) 1302 return (EBADF); 1303 1304 if ((error = ccdlock(cs)) != 0) 1305 return (error); 1306 1307 /* Fill in some important bits. */ 1308 ccd.ccd_unit = unit; 1309 ccd.ccd_interleave = ccio->ccio_ileave; 1310 if (ccd.ccd_interleave == 0 && 1311 ((ccio->ccio_flags & CCDF_MIRROR) || 1312 (ccio->ccio_flags & CCDF_PARITY))) { 1313 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1314 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1315 } 1316 if ((ccio->ccio_flags & CCDF_MIRROR) && 1317 (ccio->ccio_flags & CCDF_PARITY)) { 1318 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1319 ccio->ccio_flags &= ~CCDF_PARITY; 1320 } 1321 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1322 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1323 printf("ccd%d: mirror/parity forces uniform flag\n", 1324 unit); 1325 ccio->ccio_flags |= CCDF_UNIFORM; 1326 } 1327 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1328 1329 /* 1330 * Allocate space for and copy in the array of 1331 * componet pathnames and device numbers. 1332 */ 1333 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1334 M_DEVBUF, M_WAITOK); 1335 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1336 M_DEVBUF, M_WAITOK); 1337 1338 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1339 ccio->ccio_ndisks * sizeof(char **)); 1340 if (error) { 1341 free(vpp, M_DEVBUF); 1342 free(cpp, M_DEVBUF); 1343 ccdunlock(cs); 1344 return (error); 1345 } 1346 1347 #ifdef DEBUG 1348 if (ccddebug & CCDB_INIT) 1349 for (i = 0; i < ccio->ccio_ndisks; ++i) 1350 printf("ccdioctl: component %d: 0x%x\n", 1351 i, cpp[i]); 1352 #endif 1353 1354 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1355 #ifdef DEBUG 1356 if (ccddebug & CCDB_INIT) 1357 printf("ccdioctl: lookedup = %d\n", lookedup); 1358 #endif 1359 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1360 for (j = 0; j < lookedup; ++j) 1361 (void)vn_close(vpp[j], FREAD|FWRITE, 1362 p->p_ucred, p); 1363 free(vpp, M_DEVBUF); 1364 free(cpp, M_DEVBUF); 1365 ccdunlock(cs); 1366 return (error); 1367 } 1368 ++lookedup; 1369 } 1370 ccd.ccd_cpp = cpp; 1371 ccd.ccd_vpp = vpp; 1372 ccd.ccd_ndev = ccio->ccio_ndisks; 1373 1374 /* 1375 * Initialize the ccd. Fills in the softc for us. 1376 */ 1377 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1378 for (j = 0; j < lookedup; ++j) 1379 (void)vn_close(vpp[j], FREAD|FWRITE, 1380 p->p_ucred, p); 1381 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1382 free(vpp, M_DEVBUF); 1383 free(cpp, M_DEVBUF); 1384 ccdunlock(cs); 1385 return (error); 1386 } 1387 1388 /* 1389 * The ccd has been successfully initialized, so 1390 * we can place it into the array and read the disklabel. 1391 */ 1392 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1393 ccio->ccio_unit = unit; 1394 ccio->ccio_size = cs->sc_size; 1395 ccdgetdisklabel(dev); 1396 1397 ccdunlock(cs); 1398 1399 break; 1400 1401 case CCDIOCCLR: 1402 if ((cs->sc_flags & CCDF_INITED) == 0) 1403 return (ENXIO); 1404 1405 if ((flag & FWRITE) == 0) 1406 return (EBADF); 1407 1408 if ((error = ccdlock(cs)) != 0) 1409 return (error); 1410 1411 /* 1412 * Don't unconfigure if any other partitions are open 1413 * or if both the character and block flavors of this 1414 * partition are open. 1415 */ 1416 part = ccdpart(dev); 1417 pmask = (1 << part); 1418 if ((cs->sc_openmask & ~pmask) || 1419 ((cs->sc_bopenmask & pmask) && 1420 (cs->sc_copenmask & pmask))) { 1421 ccdunlock(cs); 1422 return (EBUSY); 1423 } 1424 1425 /* 1426 * Free ccd_softc information and clear entry. 1427 */ 1428 1429 /* Close the components and free their pathnames. */ 1430 for (i = 0; i < cs->sc_nccdisks; ++i) { 1431 /* 1432 * XXX: this close could potentially fail and 1433 * cause Bad Things. Maybe we need to force 1434 * the close to happen? 1435 */ 1436 #ifdef DEBUG 1437 if (ccddebug & CCDB_VNODE) 1438 vprint("CCDIOCCLR: vnode info", 1439 cs->sc_cinfo[i].ci_vp); 1440 #endif 1441 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1442 p->p_ucred, p); 1443 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1444 } 1445 1446 /* Free interleave index. */ 1447 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1448 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1449 1450 /* Free component info and interleave table. */ 1451 free(cs->sc_cinfo, M_DEVBUF); 1452 free(cs->sc_itable, M_DEVBUF); 1453 cs->sc_flags &= ~CCDF_INITED; 1454 1455 /* 1456 * Free ccddevice information and clear entry. 1457 */ 1458 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1459 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1460 ccd.ccd_dk = -1; 1461 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1462 1463 /* 1464 * And remove the devstat entry. 1465 */ 1466 devstat_remove_entry(&cs->device_stats); 1467 1468 /* This must be atomic. */ 1469 s = splhigh(); 1470 ccdunlock(cs); 1471 bzero(cs, sizeof(struct ccd_softc)); 1472 splx(s); 1473 1474 break; 1475 1476 case DIOCGDINFO: 1477 if ((cs->sc_flags & CCDF_INITED) == 0) 1478 return (ENXIO); 1479 1480 *(struct disklabel *)data = cs->sc_label; 1481 break; 1482 1483 case DIOCGPART: 1484 if ((cs->sc_flags & CCDF_INITED) == 0) 1485 return (ENXIO); 1486 1487 ((struct partinfo *)data)->disklab = &cs->sc_label; 1488 ((struct partinfo *)data)->part = 1489 &cs->sc_label.d_partitions[ccdpart(dev)]; 1490 break; 1491 1492 case DIOCWDINFO: 1493 case DIOCSDINFO: 1494 if ((cs->sc_flags & CCDF_INITED) == 0) 1495 return (ENXIO); 1496 1497 if ((flag & FWRITE) == 0) 1498 return (EBADF); 1499 1500 if ((error = ccdlock(cs)) != 0) 1501 return (error); 1502 1503 cs->sc_flags |= CCDF_LABELLING; 1504 1505 error = setdisklabel(&cs->sc_label, 1506 (struct disklabel *)data, 0); 1507 if (error == 0) { 1508 if (cmd == DIOCWDINFO) 1509 error = writedisklabel(CCDLABELDEV(dev), 1510 &cs->sc_label); 1511 } 1512 1513 cs->sc_flags &= ~CCDF_LABELLING; 1514 1515 ccdunlock(cs); 1516 1517 if (error) 1518 return (error); 1519 break; 1520 1521 case DIOCWLABEL: 1522 if ((cs->sc_flags & CCDF_INITED) == 0) 1523 return (ENXIO); 1524 1525 if ((flag & FWRITE) == 0) 1526 return (EBADF); 1527 if (*(int *)data != 0) 1528 cs->sc_flags |= CCDF_WLABEL; 1529 else 1530 cs->sc_flags &= ~CCDF_WLABEL; 1531 break; 1532 1533 default: 1534 return (ENOTTY); 1535 } 1536 1537 return (0); 1538 } 1539 1540 static int 1541 ccdsize(dev) 1542 dev_t dev; 1543 { 1544 struct ccd_softc *cs; 1545 int part, size; 1546 1547 if (ccdopen(dev, 0, S_IFBLK, curproc)) 1548 return (-1); 1549 1550 cs = &ccd_softc[ccdunit(dev)]; 1551 part = ccdpart(dev); 1552 1553 if ((cs->sc_flags & CCDF_INITED) == 0) 1554 return (-1); 1555 1556 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1557 size = -1; 1558 else 1559 size = cs->sc_label.d_partitions[part].p_size; 1560 1561 if (ccdclose(dev, 0, S_IFBLK, curproc)) 1562 return (-1); 1563 1564 return (size); 1565 } 1566 1567 static int 1568 ccddump(dev) 1569 dev_t dev; 1570 { 1571 1572 /* Not implemented. */ 1573 return ENXIO; 1574 } 1575 1576 /* 1577 * Lookup the provided name in the filesystem. If the file exists, 1578 * is a valid block device, and isn't being used by anyone else, 1579 * set *vpp to the file's vnode. 1580 */ 1581 static int 1582 ccdlookup(path, p, vpp) 1583 char *path; 1584 struct proc *p; 1585 struct vnode **vpp; /* result */ 1586 { 1587 struct nameidata nd; 1588 struct vnode *vp; 1589 struct vattr va; 1590 int error; 1591 1592 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1593 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 1594 #ifdef DEBUG 1595 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1596 printf("ccdlookup: vn_open error = %d\n", error); 1597 #endif 1598 return (error); 1599 } 1600 vp = nd.ni_vp; 1601 1602 if (vp->v_usecount > 1) { 1603 error = EBUSY; 1604 goto bad; 1605 } 1606 1607 if (!vn_isdisk(vp)) { 1608 error = ENOTBLK; 1609 goto bad; 1610 } 1611 1612 #ifdef DEBUG 1613 if (ccddebug & CCDB_VNODE) 1614 vprint("ccdlookup: vnode info", vp); 1615 #endif 1616 1617 VOP_UNLOCK(vp, 0, p); 1618 NDFREE(&nd, NDF_ONLY_PNBUF); 1619 *vpp = vp; 1620 return (0); 1621 bad: 1622 VOP_UNLOCK(vp, 0, p); 1623 NDFREE(&nd, NDF_ONLY_PNBUF); 1624 /* vn_close does vrele() for vp */ 1625 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1626 return (error); 1627 } 1628 1629 /* 1630 * Read the disklabel from the ccd. If one is not present, fake one 1631 * up. 1632 */ 1633 static void 1634 ccdgetdisklabel(dev) 1635 dev_t dev; 1636 { 1637 int unit = ccdunit(dev); 1638 struct ccd_softc *cs = &ccd_softc[unit]; 1639 char *errstring; 1640 struct disklabel *lp = &cs->sc_label; 1641 struct ccdgeom *ccg = &cs->sc_geom; 1642 1643 bzero(lp, sizeof(*lp)); 1644 1645 lp->d_secperunit = cs->sc_size; 1646 lp->d_secsize = ccg->ccg_secsize; 1647 lp->d_nsectors = ccg->ccg_nsectors; 1648 lp->d_ntracks = ccg->ccg_ntracks; 1649 lp->d_ncylinders = ccg->ccg_ncylinders; 1650 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1651 1652 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1653 lp->d_type = DTYPE_CCD; 1654 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1655 lp->d_rpm = 3600; 1656 lp->d_interleave = 1; 1657 lp->d_flags = 0; 1658 1659 lp->d_partitions[RAW_PART].p_offset = 0; 1660 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1661 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1662 lp->d_npartitions = RAW_PART + 1; 1663 1664 lp->d_bbsize = BBSIZE; /* XXX */ 1665 lp->d_sbsize = SBSIZE; /* XXX */ 1666 1667 lp->d_magic = DISKMAGIC; 1668 lp->d_magic2 = DISKMAGIC; 1669 lp->d_checksum = dkcksum(&cs->sc_label); 1670 1671 /* 1672 * Call the generic disklabel extraction routine. 1673 */ 1674 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1675 if (errstring != NULL) 1676 ccdmakedisklabel(cs); 1677 1678 #ifdef DEBUG 1679 /* It's actually extremely common to have unlabeled ccds. */ 1680 if (ccddebug & CCDB_LABEL) 1681 if (errstring != NULL) 1682 printf("ccd%d: %s\n", unit, errstring); 1683 #endif 1684 } 1685 1686 /* 1687 * Take care of things one might want to take care of in the event 1688 * that a disklabel isn't present. 1689 */ 1690 static void 1691 ccdmakedisklabel(cs) 1692 struct ccd_softc *cs; 1693 { 1694 struct disklabel *lp = &cs->sc_label; 1695 1696 /* 1697 * For historical reasons, if there's no disklabel present 1698 * the raw partition must be marked FS_BSDFFS. 1699 */ 1700 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1701 1702 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1703 } 1704 1705 /* 1706 * Wait interruptibly for an exclusive lock. 1707 * 1708 * XXX 1709 * Several drivers do this; it should be abstracted and made MP-safe. 1710 */ 1711 static int 1712 ccdlock(cs) 1713 struct ccd_softc *cs; 1714 { 1715 int error; 1716 1717 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1718 cs->sc_flags |= CCDF_WANTED; 1719 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1720 return (error); 1721 } 1722 cs->sc_flags |= CCDF_LOCKED; 1723 return (0); 1724 } 1725 1726 /* 1727 * Unlock and wake up any waiters. 1728 */ 1729 static void 1730 ccdunlock(cs) 1731 struct ccd_softc *cs; 1732 { 1733 1734 cs->sc_flags &= ~CCDF_LOCKED; 1735 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1736 cs->sc_flags &= ~CCDF_WANTED; 1737 wakeup(cs); 1738 } 1739 } 1740 1741 #ifdef DEBUG 1742 static void 1743 printiinfo(ii) 1744 struct ccdiinfo *ii; 1745 { 1746 int ix, i; 1747 1748 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1749 printf(" itab[%d]: #dk %d sblk %d soff %d", 1750 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1751 for (i = 0; i < ii->ii_ndisk; i++) 1752 printf(" %d", ii->ii_index[i]); 1753 printf("\n"); 1754 } 1755 } 1756 #endif 1757 1758 #endif /* NCCD > 0 */ 1759 1760 /* Local Variables: */ 1761 /* c-argdecl-indent: 8 */ 1762 /* c-continued-statement-offset: 8 */ 1763 /* c-indent-level: 8 */ 1764 /* End: */ 1765