1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include "ccd.h" 91 #if NCCD > 0 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/namei.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <ufs/ffs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 110 #include <sys/ccdvar.h> 111 112 #if defined(CCDDEBUG) && !defined(DEBUG) 113 #define DEBUG 114 #endif 115 116 #ifdef DEBUG 117 #define CCDB_FOLLOW 0x01 118 #define CCDB_INIT 0x02 119 #define CCDB_IO 0x04 120 #define CCDB_LABEL 0x08 121 #define CCDB_VNODE 0x10 122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 123 CCDB_VNODE; 124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 125 #undef DEBUG 126 #endif 127 128 #define ccdunit(x) dkunit(x) 129 #define ccdpart(x) dkpart(x) 130 131 /* 132 This is how mirroring works (only writes are special): 133 134 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 135 linked together by the cb_mirror field. "cb_pflags & 136 CCDPF_MIRROR_DONE" is set to 0 on both of them. 137 138 When a component returns to ccdiodone(), it checks if "cb_pflags & 139 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 140 flag and returns. If it is, it means its partner has already 141 returned, so it will go to the regular cleanup. 142 143 */ 144 145 struct ccdbuf { 146 struct buf cb_buf; /* new I/O buf */ 147 struct buf *cb_obp; /* ptr. to original I/O buf */ 148 struct ccdbuf *cb_freenext; /* free list link */ 149 int cb_unit; /* target unit */ 150 int cb_comp; /* target component */ 151 int cb_pflags; /* mirror/parity status flag */ 152 struct ccdbuf *cb_mirror; /* mirror counterpart */ 153 }; 154 155 /* bits in cb_pflags */ 156 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 157 158 #define CCDLABELDEV(dev) \ 159 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 160 161 static d_open_t ccdopen; 162 static d_close_t ccdclose; 163 static d_strategy_t ccdstrategy; 164 static d_ioctl_t ccdioctl; 165 static d_dump_t ccddump; 166 static d_psize_t ccdsize; 167 168 #define NCCDFREEHIWAT 16 169 170 #define CDEV_MAJOR 74 171 #define BDEV_MAJOR 21 172 173 static struct cdevsw ccd_cdevsw = { 174 /* open */ ccdopen, 175 /* close */ ccdclose, 176 /* read */ physread, 177 /* write */ physwrite, 178 /* ioctl */ ccdioctl, 179 /* poll */ nopoll, 180 /* mmap */ nommap, 181 /* strategy */ ccdstrategy, 182 /* name */ "ccd", 183 /* maj */ CDEV_MAJOR, 184 /* dump */ ccddump, 185 /* psize */ ccdsize, 186 /* flags */ D_DISK, 187 /* bmaj */ BDEV_MAJOR 188 }; 189 190 /* called during module initialization */ 191 static void ccdattach __P((void)); 192 static int ccd_modevent __P((module_t, int, void *)); 193 194 /* called by biodone() at interrupt time */ 195 static void ccdiodone __P((struct ccdbuf *cbp)); 196 197 static void ccdstart __P((struct ccd_softc *, struct buf *)); 198 static void ccdinterleave __P((struct ccd_softc *, int)); 199 static void ccdintr __P((struct ccd_softc *, struct buf *)); 200 static int ccdinit __P((struct ccddevice *, char **, struct proc *)); 201 static int ccdlookup __P((char *, struct proc *p, struct vnode **)); 202 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 203 struct buf *, daddr_t, caddr_t, long)); 204 static void ccdgetdisklabel __P((dev_t)); 205 static void ccdmakedisklabel __P((struct ccd_softc *)); 206 static int ccdlock __P((struct ccd_softc *)); 207 static void ccdunlock __P((struct ccd_softc *)); 208 209 #ifdef DEBUG 210 static void printiinfo __P((struct ccdiinfo *)); 211 #endif 212 213 /* Non-private for the benefit of libkvm. */ 214 struct ccd_softc *ccd_softc; 215 struct ccddevice *ccddevs; 216 struct ccdbuf *ccdfreebufs; 217 static int numccdfreebufs; 218 static int numccd = 0; 219 220 /* 221 * getccdbuf() - Allocate and zero a ccd buffer. 222 * 223 * This routine is called at splbio(). 224 */ 225 226 static __inline 227 struct ccdbuf * 228 getccdbuf(struct ccdbuf *cpy) 229 { 230 struct ccdbuf *cbp; 231 232 /* 233 * Allocate from freelist or malloc as necessary 234 */ 235 if ((cbp = ccdfreebufs) != NULL) { 236 ccdfreebufs = cbp->cb_freenext; 237 --numccdfreebufs; 238 } else { 239 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 240 } 241 242 /* 243 * Used by mirroring code 244 */ 245 if (cpy) 246 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 247 else 248 bzero(cbp, sizeof(struct ccdbuf)); 249 250 /* 251 * independant struct buf initialization 252 */ 253 LIST_INIT(&cbp->cb_buf.b_dep); 254 BUF_LOCKINIT(&cbp->cb_buf); 255 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 256 BUF_KERNPROC(&cbp->cb_buf); 257 258 return(cbp); 259 } 260 261 /* 262 * putccdbuf() - Free a ccd buffer. 263 * 264 * This routine is called at splbio(). 265 */ 266 267 static __inline 268 void 269 putccdbuf(struct ccdbuf *cbp) 270 { 271 BUF_UNLOCK(&cbp->cb_buf); 272 BUF_LOCKFREE(&cbp->cb_buf); 273 274 if (numccdfreebufs < NCCDFREEHIWAT) { 275 cbp->cb_freenext = ccdfreebufs; 276 ccdfreebufs = cbp; 277 ++numccdfreebufs; 278 } else { 279 free((caddr_t)cbp, M_DEVBUF); 280 } 281 } 282 283 284 /* 285 * Number of blocks to untouched in front of a component partition. 286 * This is to avoid violating its disklabel area when it starts at the 287 * beginning of the slice. 288 */ 289 #if !defined(CCD_OFFSET) 290 #define CCD_OFFSET 16 291 #endif 292 293 /* 294 * Called by main() during pseudo-device attachment. All we need 295 * to do is allocate enough space for devices to be configured later, and 296 * add devsw entries. 297 */ 298 static void 299 ccdattach() 300 { 301 int i; 302 int num = NCCD; 303 304 if (num > 1) 305 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 306 else 307 printf("ccd0: Concatenated disk driver\n"); 308 309 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 310 M_DEVBUF, M_NOWAIT); 311 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 312 M_DEVBUF, M_NOWAIT); 313 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 314 printf("WARNING: no memory for concatenated disks\n"); 315 if (ccd_softc != NULL) 316 free(ccd_softc, M_DEVBUF); 317 if (ccddevs != NULL) 318 free(ccddevs, M_DEVBUF); 319 return; 320 } 321 numccd = num; 322 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 323 bzero(ccddevs, num * sizeof(struct ccddevice)); 324 325 /* XXX: is this necessary? */ 326 for (i = 0; i < numccd; ++i) 327 ccddevs[i].ccd_dk = -1; 328 } 329 330 static int 331 ccd_modevent(mod, type, data) 332 module_t mod; 333 int type; 334 void *data; 335 { 336 int error = 0; 337 338 switch (type) { 339 case MOD_LOAD: 340 ccdattach(); 341 break; 342 343 case MOD_UNLOAD: 344 printf("ccd0: Unload not supported!\n"); 345 error = EOPNOTSUPP; 346 break; 347 348 default: /* MOD_SHUTDOWN etc */ 349 break; 350 } 351 return (error); 352 } 353 354 DEV_MODULE(ccd, CDEV_MAJOR, BDEV_MAJOR, ccd_cdevsw, ccd_modevent, NULL); 355 356 static int 357 ccdinit(ccd, cpaths, p) 358 struct ccddevice *ccd; 359 char **cpaths; 360 struct proc *p; 361 { 362 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 363 struct ccdcinfo *ci = NULL; /* XXX */ 364 size_t size; 365 int ix; 366 struct vnode *vp; 367 size_t minsize; 368 int maxsecsize; 369 struct partinfo dpart; 370 struct ccdgeom *ccg = &cs->sc_geom; 371 char tmppath[MAXPATHLEN]; 372 int error = 0; 373 374 #ifdef DEBUG 375 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 376 printf("ccdinit: unit %d\n", ccd->ccd_unit); 377 #endif 378 379 cs->sc_size = 0; 380 cs->sc_ileave = ccd->ccd_interleave; 381 cs->sc_nccdisks = ccd->ccd_ndev; 382 383 /* Allocate space for the component info. */ 384 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 385 M_DEVBUF, M_WAITOK); 386 387 /* 388 * Verify that each component piece exists and record 389 * relevant information about it. 390 */ 391 maxsecsize = 0; 392 minsize = 0; 393 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 394 vp = ccd->ccd_vpp[ix]; 395 ci = &cs->sc_cinfo[ix]; 396 ci->ci_vp = vp; 397 398 /* 399 * Copy in the pathname of the component. 400 */ 401 bzero(tmppath, sizeof(tmppath)); /* sanity */ 402 if ((error = copyinstr(cpaths[ix], tmppath, 403 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 404 #ifdef DEBUG 405 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 406 printf("ccd%d: can't copy path, error = %d\n", 407 ccd->ccd_unit, error); 408 #endif 409 goto fail; 410 } 411 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 412 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 413 414 ci->ci_dev = vn_todev(vp); 415 416 /* 417 * Get partition information for the component. 418 */ 419 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 420 FREAD, p->p_ucred, p)) != 0) { 421 #ifdef DEBUG 422 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 423 printf("ccd%d: %s: ioctl failed, error = %d\n", 424 ccd->ccd_unit, ci->ci_path, error); 425 #endif 426 goto fail; 427 } 428 if (dpart.part->p_fstype == FS_BSDFFS) { 429 maxsecsize = 430 ((dpart.disklab->d_secsize > maxsecsize) ? 431 dpart.disklab->d_secsize : maxsecsize); 432 size = dpart.part->p_size - CCD_OFFSET; 433 } else { 434 #ifdef DEBUG 435 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 436 printf("ccd%d: %s: incorrect partition type\n", 437 ccd->ccd_unit, ci->ci_path); 438 #endif 439 error = EFTYPE; 440 goto fail; 441 } 442 443 /* 444 * Calculate the size, truncating to an interleave 445 * boundary if necessary. 446 */ 447 448 if (cs->sc_ileave > 1) 449 size -= size % cs->sc_ileave; 450 451 if (size == 0) { 452 #ifdef DEBUG 453 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 454 printf("ccd%d: %s: size == 0\n", 455 ccd->ccd_unit, ci->ci_path); 456 #endif 457 error = ENODEV; 458 goto fail; 459 } 460 461 if (minsize == 0 || size < minsize) 462 minsize = size; 463 ci->ci_size = size; 464 cs->sc_size += size; 465 } 466 467 /* 468 * Don't allow the interleave to be smaller than 469 * the biggest component sector. 470 */ 471 if ((cs->sc_ileave > 0) && 472 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 473 #ifdef DEBUG 474 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 475 printf("ccd%d: interleave must be at least %d\n", 476 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 477 #endif 478 error = EINVAL; 479 goto fail; 480 } 481 482 /* 483 * If uniform interleave is desired set all sizes to that of 484 * the smallest component. This will guarentee that a single 485 * interleave table is generated. 486 * 487 * Lost space must be taken into account when calculating the 488 * overall size. Half the space is lost when CCDF_MIRROR is 489 * specified. One disk is lost when CCDF_PARITY is specified. 490 */ 491 if (ccd->ccd_flags & CCDF_UNIFORM) { 492 for (ci = cs->sc_cinfo; 493 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 494 ci->ci_size = minsize; 495 } 496 if (ccd->ccd_flags & CCDF_MIRROR) { 497 /* 498 * Check to see if an even number of components 499 * have been specified. The interleave must also 500 * be non-zero in order for us to be able to 501 * guarentee the topology. 502 */ 503 if (cs->sc_nccdisks % 2) { 504 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 505 error = EINVAL; 506 goto fail; 507 } 508 if (cs->sc_ileave == 0) { 509 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 510 error = EINVAL; 511 goto fail; 512 } 513 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 514 } else if (ccd->ccd_flags & CCDF_PARITY) { 515 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 516 } else { 517 if (cs->sc_ileave == 0) { 518 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 519 error = EINVAL; 520 goto fail; 521 } 522 cs->sc_size = cs->sc_nccdisks * minsize; 523 } 524 } 525 526 /* 527 * Construct the interleave table. 528 */ 529 ccdinterleave(cs, ccd->ccd_unit); 530 531 /* 532 * Create pseudo-geometry based on 1MB cylinders. It's 533 * pretty close. 534 */ 535 ccg->ccg_secsize = maxsecsize; 536 ccg->ccg_ntracks = 1; 537 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 538 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 539 540 /* 541 * Add an devstat entry for this device. 542 */ 543 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 544 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 545 DEVSTAT_TYPE_ASC0 |DEVSTAT_TYPE_IF_OTHER, 546 DEVSTAT_PRIORITY_CCD); 547 548 cs->sc_flags |= CCDF_INITED; 549 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 550 cs->sc_unit = ccd->ccd_unit; 551 return (0); 552 fail: 553 while (ci > cs->sc_cinfo) { 554 ci--; 555 free(ci->ci_path, M_DEVBUF); 556 } 557 free(cs->sc_cinfo, M_DEVBUF); 558 return (error); 559 } 560 561 static void 562 ccdinterleave(cs, unit) 563 struct ccd_softc *cs; 564 int unit; 565 { 566 struct ccdcinfo *ci, *smallci; 567 struct ccdiinfo *ii; 568 daddr_t bn, lbn; 569 int ix; 570 u_long size; 571 572 #ifdef DEBUG 573 if (ccddebug & CCDB_INIT) 574 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 575 #endif 576 577 /* 578 * Allocate an interleave table. The worst case occurs when each 579 * of N disks is of a different size, resulting in N interleave 580 * tables. 581 * 582 * Chances are this is too big, but we don't care. 583 */ 584 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 585 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 586 bzero((caddr_t)cs->sc_itable, size); 587 588 /* 589 * Trivial case: no interleave (actually interleave of disk size). 590 * Each table entry represents a single component in its entirety. 591 * 592 * An interleave of 0 may not be used with a mirror or parity setup. 593 */ 594 if (cs->sc_ileave == 0) { 595 bn = 0; 596 ii = cs->sc_itable; 597 598 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 599 /* Allocate space for ii_index. */ 600 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 601 ii->ii_ndisk = 1; 602 ii->ii_startblk = bn; 603 ii->ii_startoff = 0; 604 ii->ii_index[0] = ix; 605 bn += cs->sc_cinfo[ix].ci_size; 606 ii++; 607 } 608 ii->ii_ndisk = 0; 609 #ifdef DEBUG 610 if (ccddebug & CCDB_INIT) 611 printiinfo(cs->sc_itable); 612 #endif 613 return; 614 } 615 616 /* 617 * The following isn't fast or pretty; it doesn't have to be. 618 */ 619 size = 0; 620 bn = lbn = 0; 621 for (ii = cs->sc_itable; ; ii++) { 622 /* 623 * Allocate space for ii_index. We might allocate more then 624 * we use. 625 */ 626 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 627 M_DEVBUF, M_WAITOK); 628 629 /* 630 * Locate the smallest of the remaining components 631 */ 632 smallci = NULL; 633 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 634 ci++) { 635 if (ci->ci_size > size && 636 (smallci == NULL || 637 ci->ci_size < smallci->ci_size)) { 638 smallci = ci; 639 } 640 } 641 642 /* 643 * Nobody left, all done 644 */ 645 if (smallci == NULL) { 646 ii->ii_ndisk = 0; 647 break; 648 } 649 650 /* 651 * Record starting logical block using an sc_ileave blocksize. 652 */ 653 ii->ii_startblk = bn / cs->sc_ileave; 654 655 /* 656 * Record starting comopnent block using an sc_ileave 657 * blocksize. This value is relative to the beginning of 658 * a component disk. 659 */ 660 ii->ii_startoff = lbn; 661 662 /* 663 * Determine how many disks take part in this interleave 664 * and record their indices. 665 */ 666 ix = 0; 667 for (ci = cs->sc_cinfo; 668 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 669 if (ci->ci_size >= smallci->ci_size) { 670 ii->ii_index[ix++] = ci - cs->sc_cinfo; 671 } 672 } 673 ii->ii_ndisk = ix; 674 bn += ix * (smallci->ci_size - size); 675 lbn = smallci->ci_size / cs->sc_ileave; 676 size = smallci->ci_size; 677 } 678 #ifdef DEBUG 679 if (ccddebug & CCDB_INIT) 680 printiinfo(cs->sc_itable); 681 #endif 682 } 683 684 /* ARGSUSED */ 685 static int 686 ccdopen(dev, flags, fmt, p) 687 dev_t dev; 688 int flags, fmt; 689 struct proc *p; 690 { 691 int unit = ccdunit(dev); 692 struct ccd_softc *cs; 693 struct disklabel *lp; 694 int error = 0, part, pmask; 695 696 #ifdef DEBUG 697 if (ccddebug & CCDB_FOLLOW) 698 printf("ccdopen(%x, %x)\n", dev, flags); 699 #endif 700 if (unit >= numccd) 701 return (ENXIO); 702 cs = &ccd_softc[unit]; 703 704 if ((error = ccdlock(cs)) != 0) 705 return (error); 706 707 lp = &cs->sc_label; 708 709 part = ccdpart(dev); 710 pmask = (1 << part); 711 712 /* 713 * If we're initialized, check to see if there are any other 714 * open partitions. If not, then it's safe to update 715 * the in-core disklabel. 716 */ 717 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 718 ccdgetdisklabel(dev); 719 720 /* Check that the partition exists. */ 721 if (part != RAW_PART && ((part >= lp->d_npartitions) || 722 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 723 error = ENXIO; 724 goto done; 725 } 726 727 /* Prevent our unit from being unconfigured while open. */ 728 switch (fmt) { 729 case S_IFCHR: 730 cs->sc_copenmask |= pmask; 731 break; 732 733 case S_IFBLK: 734 cs->sc_bopenmask |= pmask; 735 break; 736 } 737 cs->sc_openmask = 738 cs->sc_copenmask | cs->sc_bopenmask; 739 740 done: 741 ccdunlock(cs); 742 return (0); 743 } 744 745 /* ARGSUSED */ 746 static int 747 ccdclose(dev, flags, fmt, p) 748 dev_t dev; 749 int flags, fmt; 750 struct proc *p; 751 { 752 int unit = ccdunit(dev); 753 struct ccd_softc *cs; 754 int error = 0, part; 755 756 #ifdef DEBUG 757 if (ccddebug & CCDB_FOLLOW) 758 printf("ccdclose(%x, %x)\n", dev, flags); 759 #endif 760 761 if (unit >= numccd) 762 return (ENXIO); 763 cs = &ccd_softc[unit]; 764 765 if ((error = ccdlock(cs)) != 0) 766 return (error); 767 768 part = ccdpart(dev); 769 770 /* ...that much closer to allowing unconfiguration... */ 771 switch (fmt) { 772 case S_IFCHR: 773 cs->sc_copenmask &= ~(1 << part); 774 break; 775 776 case S_IFBLK: 777 cs->sc_bopenmask &= ~(1 << part); 778 break; 779 } 780 cs->sc_openmask = 781 cs->sc_copenmask | cs->sc_bopenmask; 782 783 ccdunlock(cs); 784 return (0); 785 } 786 787 static void 788 ccdstrategy(bp) 789 struct buf *bp; 790 { 791 int unit = ccdunit(bp->b_dev); 792 struct ccd_softc *cs = &ccd_softc[unit]; 793 int s; 794 int wlabel; 795 struct disklabel *lp; 796 797 #ifdef DEBUG 798 if (ccddebug & CCDB_FOLLOW) 799 printf("ccdstrategy(%x): unit %d\n", bp, unit); 800 #endif 801 if ((cs->sc_flags & CCDF_INITED) == 0) { 802 bp->b_error = ENXIO; 803 bp->b_flags |= B_ERROR; 804 goto done; 805 } 806 807 /* If it's a nil transfer, wake up the top half now. */ 808 if (bp->b_bcount == 0) 809 goto done; 810 811 lp = &cs->sc_label; 812 813 /* 814 * Do bounds checking and adjust transfer. If there's an 815 * error, the bounds check will flag that for us. 816 */ 817 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 818 if (ccdpart(bp->b_dev) != RAW_PART) { 819 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 820 goto done; 821 } else { 822 int pbn; /* in sc_secsize chunks */ 823 long sz; /* in sc_secsize chunks */ 824 825 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 826 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 827 828 /* 829 * If out of bounds return an error. If at the EOF point, 830 * simply read or write less. 831 */ 832 833 if (pbn < 0 || pbn >= cs->sc_size) { 834 bp->b_resid = bp->b_bcount; 835 if (pbn != cs->sc_size) { 836 bp->b_error = EINVAL; 837 bp->b_flags |= B_ERROR | B_INVAL; 838 } 839 goto done; 840 } 841 842 /* 843 * If the request crosses EOF, truncate the request. 844 */ 845 if (pbn + sz > cs->sc_size) { 846 bp->b_bcount = (cs->sc_size - pbn) * 847 cs->sc_geom.ccg_secsize; 848 } 849 } 850 851 bp->b_resid = bp->b_bcount; 852 853 /* 854 * "Start" the unit. 855 */ 856 s = splbio(); 857 ccdstart(cs, bp); 858 splx(s); 859 return; 860 done: 861 biodone(bp); 862 } 863 864 static void 865 ccdstart(cs, bp) 866 struct ccd_softc *cs; 867 struct buf *bp; 868 { 869 long bcount, rcount; 870 struct ccdbuf *cbp[4]; 871 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 872 caddr_t addr; 873 daddr_t bn; 874 struct partition *pp; 875 876 #ifdef DEBUG 877 if (ccddebug & CCDB_FOLLOW) 878 printf("ccdstart(%x, %x)\n", cs, bp); 879 #endif 880 881 /* Record the transaction start */ 882 devstat_start_transaction(&cs->device_stats); 883 884 /* 885 * Translate the partition-relative block number to an absolute. 886 */ 887 bn = bp->b_blkno; 888 if (ccdpart(bp->b_dev) != RAW_PART) { 889 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 890 bn += pp->p_offset; 891 } 892 893 /* 894 * Allocate component buffers and fire off the requests 895 */ 896 addr = bp->b_data; 897 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 898 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 899 rcount = cbp[0]->cb_buf.b_bcount; 900 901 if (cs->sc_cflags & CCDF_MIRROR) { 902 /* 903 * Mirroring. Writes go to both disks, reads are 904 * taken from whichever disk seems most appropriate. 905 * 906 * We attempt to localize reads to the disk whos arm 907 * is nearest the read request. We ignore seeks due 908 * to writes when making this determination and we 909 * also try to avoid hogging. 910 */ 911 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 912 cbp[0]->cb_buf.b_vp->v_numoutput++; 913 cbp[1]->cb_buf.b_vp->v_numoutput++; 914 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 915 &cbp[0]->cb_buf); 916 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 917 &cbp[1]->cb_buf); 918 } else { 919 int pick = cs->sc_pick; 920 daddr_t range = cs->sc_size / 16; 921 922 if (bn < cs->sc_blk[pick] - range || 923 bn > cs->sc_blk[pick] + range 924 ) { 925 cs->sc_pick = pick = 1 - pick; 926 } 927 cs->sc_blk[pick] = bn + btodb(rcount); 928 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 929 &cbp[pick]->cb_buf); 930 } 931 } else { 932 /* 933 * Not mirroring 934 */ 935 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 936 cbp[0]->cb_buf.b_vp->v_numoutput++; 937 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 938 } 939 bn += btodb(rcount); 940 addr += rcount; 941 } 942 } 943 944 /* 945 * Build a component buffer header. 946 */ 947 static void 948 ccdbuffer(cb, cs, bp, bn, addr, bcount) 949 struct ccdbuf **cb; 950 struct ccd_softc *cs; 951 struct buf *bp; 952 daddr_t bn; 953 caddr_t addr; 954 long bcount; 955 { 956 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 957 struct ccdbuf *cbp; 958 daddr_t cbn, cboff; 959 off_t cbc; 960 961 #ifdef DEBUG 962 if (ccddebug & CCDB_IO) 963 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 964 cs, bp, bn, addr, bcount); 965 #endif 966 /* 967 * Determine which component bn falls in. 968 */ 969 cbn = bn; 970 cboff = 0; 971 972 if (cs->sc_ileave == 0) { 973 /* 974 * Serially concatenated and neither a mirror nor a parity 975 * config. This is a special case. 976 */ 977 daddr_t sblk; 978 979 sblk = 0; 980 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 981 sblk += ci->ci_size; 982 cbn -= sblk; 983 } else { 984 struct ccdiinfo *ii; 985 int ccdisk, off; 986 987 /* 988 * Calculate cbn, the logical superblock (sc_ileave chunks), 989 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 990 * to cbn. 991 */ 992 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 993 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 994 995 /* 996 * Figure out which interleave table to use. 997 */ 998 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 999 if (ii->ii_startblk > cbn) 1000 break; 1001 } 1002 ii--; 1003 1004 /* 1005 * off is the logical superblock relative to the beginning 1006 * of this interleave block. 1007 */ 1008 off = cbn - ii->ii_startblk; 1009 1010 /* 1011 * We must calculate which disk component to use (ccdisk), 1012 * and recalculate cbn to be the superblock relative to 1013 * the beginning of the component. This is typically done by 1014 * adding 'off' and ii->ii_startoff together. However, 'off' 1015 * must typically be divided by the number of components in 1016 * this interleave array to be properly convert it from a 1017 * CCD-relative logical superblock number to a 1018 * component-relative superblock number. 1019 */ 1020 if (ii->ii_ndisk == 1) { 1021 /* 1022 * When we have just one disk, it can't be a mirror 1023 * or a parity config. 1024 */ 1025 ccdisk = ii->ii_index[0]; 1026 cbn = ii->ii_startoff + off; 1027 } else { 1028 if (cs->sc_cflags & CCDF_MIRROR) { 1029 /* 1030 * We have forced a uniform mapping, resulting 1031 * in a single interleave array. We double 1032 * up on the first half of the available 1033 * components and our mirror is in the second 1034 * half. This only works with a single 1035 * interleave array because doubling up 1036 * doubles the number of sectors, so there 1037 * cannot be another interleave array because 1038 * the next interleave array's calculations 1039 * would be off. 1040 */ 1041 int ndisk2 = ii->ii_ndisk / 2; 1042 ccdisk = ii->ii_index[off % ndisk2]; 1043 cbn = ii->ii_startoff + off / ndisk2; 1044 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1045 } else if (cs->sc_cflags & CCDF_PARITY) { 1046 /* 1047 * XXX not implemented yet 1048 */ 1049 int ndisk2 = ii->ii_ndisk - 1; 1050 ccdisk = ii->ii_index[off % ndisk2]; 1051 cbn = ii->ii_startoff + off / ndisk2; 1052 if (cbn % ii->ii_ndisk <= ccdisk) 1053 ccdisk++; 1054 } else { 1055 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1056 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1057 } 1058 } 1059 1060 ci = &cs->sc_cinfo[ccdisk]; 1061 1062 /* 1063 * Convert cbn from a superblock to a normal block so it 1064 * can be used to calculate (along with cboff) the normal 1065 * block index into this particular disk. 1066 */ 1067 cbn *= cs->sc_ileave; 1068 } 1069 1070 /* 1071 * Fill in the component buf structure. 1072 */ 1073 cbp = getccdbuf(NULL); 1074 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 1075 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1076 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1077 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1078 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1079 cbp->cb_buf.b_data = addr; 1080 cbp->cb_buf.b_vp = ci->ci_vp; 1081 if (cs->sc_ileave == 0) 1082 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1083 else 1084 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1085 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1086 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1087 1088 /* 1089 * context for ccdiodone 1090 */ 1091 cbp->cb_obp = bp; 1092 cbp->cb_unit = cs - ccd_softc; 1093 cbp->cb_comp = ci - cs->sc_cinfo; 1094 1095 #ifdef DEBUG 1096 if (ccddebug & CCDB_IO) 1097 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1098 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1099 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1100 #endif 1101 cb[0] = cbp; 1102 1103 /* 1104 * Note: both I/O's setup when reading from mirror, but only one 1105 * will be executed. 1106 */ 1107 if (cs->sc_cflags & CCDF_MIRROR) { 1108 /* mirror, setup second I/O */ 1109 cbp = getccdbuf(cb[0]); 1110 cbp->cb_buf.b_dev = ci2->ci_dev; 1111 cbp->cb_buf.b_vp = ci2->ci_vp; 1112 cbp->cb_comp = ci2 - cs->sc_cinfo; 1113 cb[1] = cbp; 1114 /* link together the ccdbuf's and clear "mirror done" flag */ 1115 cb[0]->cb_mirror = cb[1]; 1116 cb[1]->cb_mirror = cb[0]; 1117 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1118 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1119 } 1120 } 1121 1122 static void 1123 ccdintr(cs, bp) 1124 struct ccd_softc *cs; 1125 struct buf *bp; 1126 { 1127 #ifdef DEBUG 1128 if (ccddebug & CCDB_FOLLOW) 1129 printf("ccdintr(%x, %x)\n", cs, bp); 1130 #endif 1131 /* 1132 * Request is done for better or worse, wakeup the top half. 1133 */ 1134 if (bp->b_flags & B_ERROR) 1135 bp->b_resid = bp->b_bcount; 1136 devstat_end_transaction_buf(&cs->device_stats, bp); 1137 biodone(bp); 1138 } 1139 1140 /* 1141 * Called at interrupt time. 1142 * Mark the component as done and if all components are done, 1143 * take a ccd interrupt. 1144 */ 1145 static void 1146 ccdiodone(cbp) 1147 struct ccdbuf *cbp; 1148 { 1149 struct buf *bp = cbp->cb_obp; 1150 int unit = cbp->cb_unit; 1151 int count, s; 1152 1153 s = splbio(); 1154 #ifdef DEBUG 1155 if (ccddebug & CCDB_FOLLOW) 1156 printf("ccdiodone(%x)\n", cbp); 1157 if (ccddebug & CCDB_IO) { 1158 printf("ccdiodone: bp %x bcount %d resid %d\n", 1159 bp, bp->b_bcount, bp->b_resid); 1160 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1161 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1162 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1163 cbp->cb_buf.b_bcount); 1164 } 1165 #endif 1166 /* 1167 * If an error occured, report it. If this is a mirrored 1168 * configuration and the first of two possible reads, do not 1169 * set the error in the bp yet because the second read may 1170 * succeed. 1171 */ 1172 1173 if (cbp->cb_buf.b_flags & B_ERROR) { 1174 const char *msg = ""; 1175 1176 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1177 (cbp->cb_buf.b_flags & B_READ) && 1178 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1179 /* 1180 * We will try our read on the other disk down 1181 * below, also reverse the default pick so if we 1182 * are doing a scan we do not keep hitting the 1183 * bad disk first. 1184 */ 1185 struct ccd_softc *cs = &ccd_softc[unit]; 1186 1187 msg = ", trying other disk"; 1188 cs->sc_pick = 1 - cs->sc_pick; 1189 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1190 } else { 1191 bp->b_flags |= B_ERROR; 1192 bp->b_error = cbp->cb_buf.b_error ? 1193 cbp->cb_buf.b_error : EIO; 1194 } 1195 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1196 unit, bp->b_error, cbp->cb_comp, 1197 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1198 } 1199 1200 /* 1201 * Process mirror. If we are writing, I/O has been initiated on both 1202 * buffers and we fall through only after both are finished. 1203 * 1204 * If we are reading only one I/O is initiated at a time. If an 1205 * error occurs we initiate the second I/O and return, otherwise 1206 * we free the second I/O without initiating it. 1207 */ 1208 1209 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1210 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1211 /* 1212 * When writing, handshake with the second buffer 1213 * to determine when both are done. If both are not 1214 * done, return here. 1215 */ 1216 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1217 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1218 putccdbuf(cbp); 1219 splx(s); 1220 return; 1221 } 1222 } else { 1223 /* 1224 * When reading, either dispose of the second buffer 1225 * or initiate I/O on the second buffer if an error 1226 * occured with this one. 1227 */ 1228 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1229 if (cbp->cb_buf.b_flags & B_ERROR) { 1230 cbp->cb_mirror->cb_pflags |= 1231 CCDPF_MIRROR_DONE; 1232 VOP_STRATEGY( 1233 cbp->cb_mirror->cb_buf.b_vp, 1234 &cbp->cb_mirror->cb_buf 1235 ); 1236 putccdbuf(cbp); 1237 splx(s); 1238 return; 1239 } else { 1240 putccdbuf(cbp->cb_mirror); 1241 /* fall through */ 1242 } 1243 } 1244 } 1245 } 1246 1247 /* 1248 * use b_bufsize to determine how big the original request was rather 1249 * then b_bcount, because b_bcount may have been truncated for EOF. 1250 * 1251 * XXX We check for an error, but we do not test the resid for an 1252 * aligned EOF condition. This may result in character & block 1253 * device access not recognizing EOF properly when read or written 1254 * sequentially, but will not effect filesystems. 1255 */ 1256 count = cbp->cb_buf.b_bufsize; 1257 putccdbuf(cbp); 1258 1259 /* 1260 * If all done, "interrupt". 1261 */ 1262 bp->b_resid -= count; 1263 if (bp->b_resid < 0) 1264 panic("ccdiodone: count"); 1265 if (bp->b_resid == 0) 1266 ccdintr(&ccd_softc[unit], bp); 1267 splx(s); 1268 } 1269 1270 static int 1271 ccdioctl(dev, cmd, data, flag, p) 1272 dev_t dev; 1273 u_long cmd; 1274 caddr_t data; 1275 int flag; 1276 struct proc *p; 1277 { 1278 int unit = ccdunit(dev); 1279 int i, j, lookedup = 0, error = 0; 1280 int part, pmask, s; 1281 struct ccd_softc *cs; 1282 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1283 struct ccddevice ccd; 1284 char **cpp; 1285 struct vnode **vpp; 1286 1287 if (unit >= numccd) 1288 return (ENXIO); 1289 cs = &ccd_softc[unit]; 1290 1291 bzero(&ccd, sizeof(ccd)); 1292 1293 switch (cmd) { 1294 case CCDIOCSET: 1295 if (cs->sc_flags & CCDF_INITED) 1296 return (EBUSY); 1297 1298 if ((flag & FWRITE) == 0) 1299 return (EBADF); 1300 1301 if ((error = ccdlock(cs)) != 0) 1302 return (error); 1303 1304 /* Fill in some important bits. */ 1305 ccd.ccd_unit = unit; 1306 ccd.ccd_interleave = ccio->ccio_ileave; 1307 if (ccd.ccd_interleave == 0 && 1308 ((ccio->ccio_flags & CCDF_MIRROR) || 1309 (ccio->ccio_flags & CCDF_PARITY))) { 1310 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1311 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1312 } 1313 if ((ccio->ccio_flags & CCDF_MIRROR) && 1314 (ccio->ccio_flags & CCDF_PARITY)) { 1315 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1316 ccio->ccio_flags &= ~CCDF_PARITY; 1317 } 1318 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1319 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1320 printf("ccd%d: mirror/parity forces uniform flag\n", 1321 unit); 1322 ccio->ccio_flags |= CCDF_UNIFORM; 1323 } 1324 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1325 1326 /* 1327 * Allocate space for and copy in the array of 1328 * componet pathnames and device numbers. 1329 */ 1330 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1331 M_DEVBUF, M_WAITOK); 1332 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1333 M_DEVBUF, M_WAITOK); 1334 1335 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1336 ccio->ccio_ndisks * sizeof(char **)); 1337 if (error) { 1338 free(vpp, M_DEVBUF); 1339 free(cpp, M_DEVBUF); 1340 ccdunlock(cs); 1341 return (error); 1342 } 1343 1344 #ifdef DEBUG 1345 if (ccddebug & CCDB_INIT) 1346 for (i = 0; i < ccio->ccio_ndisks; ++i) 1347 printf("ccdioctl: component %d: 0x%x\n", 1348 i, cpp[i]); 1349 #endif 1350 1351 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1352 #ifdef DEBUG 1353 if (ccddebug & CCDB_INIT) 1354 printf("ccdioctl: lookedup = %d\n", lookedup); 1355 #endif 1356 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1357 for (j = 0; j < lookedup; ++j) 1358 (void)vn_close(vpp[j], FREAD|FWRITE, 1359 p->p_ucred, p); 1360 free(vpp, M_DEVBUF); 1361 free(cpp, M_DEVBUF); 1362 ccdunlock(cs); 1363 return (error); 1364 } 1365 ++lookedup; 1366 } 1367 ccd.ccd_cpp = cpp; 1368 ccd.ccd_vpp = vpp; 1369 ccd.ccd_ndev = ccio->ccio_ndisks; 1370 1371 /* 1372 * Initialize the ccd. Fills in the softc for us. 1373 */ 1374 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1375 for (j = 0; j < lookedup; ++j) 1376 (void)vn_close(vpp[j], FREAD|FWRITE, 1377 p->p_ucred, p); 1378 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1379 free(vpp, M_DEVBUF); 1380 free(cpp, M_DEVBUF); 1381 ccdunlock(cs); 1382 return (error); 1383 } 1384 1385 /* 1386 * The ccd has been successfully initialized, so 1387 * we can place it into the array and read the disklabel. 1388 */ 1389 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1390 ccio->ccio_unit = unit; 1391 ccio->ccio_size = cs->sc_size; 1392 ccdgetdisklabel(dev); 1393 1394 ccdunlock(cs); 1395 1396 break; 1397 1398 case CCDIOCCLR: 1399 if ((cs->sc_flags & CCDF_INITED) == 0) 1400 return (ENXIO); 1401 1402 if ((flag & FWRITE) == 0) 1403 return (EBADF); 1404 1405 if ((error = ccdlock(cs)) != 0) 1406 return (error); 1407 1408 /* 1409 * Don't unconfigure if any other partitions are open 1410 * or if both the character and block flavors of this 1411 * partition are open. 1412 */ 1413 part = ccdpart(dev); 1414 pmask = (1 << part); 1415 if ((cs->sc_openmask & ~pmask) || 1416 ((cs->sc_bopenmask & pmask) && 1417 (cs->sc_copenmask & pmask))) { 1418 ccdunlock(cs); 1419 return (EBUSY); 1420 } 1421 1422 /* 1423 * Free ccd_softc information and clear entry. 1424 */ 1425 1426 /* Close the components and free their pathnames. */ 1427 for (i = 0; i < cs->sc_nccdisks; ++i) { 1428 /* 1429 * XXX: this close could potentially fail and 1430 * cause Bad Things. Maybe we need to force 1431 * the close to happen? 1432 */ 1433 #ifdef DEBUG 1434 if (ccddebug & CCDB_VNODE) 1435 vprint("CCDIOCCLR: vnode info", 1436 cs->sc_cinfo[i].ci_vp); 1437 #endif 1438 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1439 p->p_ucred, p); 1440 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1441 } 1442 1443 /* Free interleave index. */ 1444 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1445 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1446 1447 /* Free component info and interleave table. */ 1448 free(cs->sc_cinfo, M_DEVBUF); 1449 free(cs->sc_itable, M_DEVBUF); 1450 cs->sc_flags &= ~CCDF_INITED; 1451 1452 /* 1453 * Free ccddevice information and clear entry. 1454 */ 1455 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1456 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1457 ccd.ccd_dk = -1; 1458 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1459 1460 /* 1461 * And remove the devstat entry. 1462 */ 1463 devstat_remove_entry(&cs->device_stats); 1464 1465 /* This must be atomic. */ 1466 s = splhigh(); 1467 ccdunlock(cs); 1468 bzero(cs, sizeof(struct ccd_softc)); 1469 splx(s); 1470 1471 break; 1472 1473 case DIOCGDINFO: 1474 if ((cs->sc_flags & CCDF_INITED) == 0) 1475 return (ENXIO); 1476 1477 *(struct disklabel *)data = cs->sc_label; 1478 break; 1479 1480 case DIOCGPART: 1481 if ((cs->sc_flags & CCDF_INITED) == 0) 1482 return (ENXIO); 1483 1484 ((struct partinfo *)data)->disklab = &cs->sc_label; 1485 ((struct partinfo *)data)->part = 1486 &cs->sc_label.d_partitions[ccdpart(dev)]; 1487 break; 1488 1489 case DIOCWDINFO: 1490 case DIOCSDINFO: 1491 if ((cs->sc_flags & CCDF_INITED) == 0) 1492 return (ENXIO); 1493 1494 if ((flag & FWRITE) == 0) 1495 return (EBADF); 1496 1497 if ((error = ccdlock(cs)) != 0) 1498 return (error); 1499 1500 cs->sc_flags |= CCDF_LABELLING; 1501 1502 error = setdisklabel(&cs->sc_label, 1503 (struct disklabel *)data, 0); 1504 if (error == 0) { 1505 if (cmd == DIOCWDINFO) 1506 error = writedisklabel(CCDLABELDEV(dev), 1507 &cs->sc_label); 1508 } 1509 1510 cs->sc_flags &= ~CCDF_LABELLING; 1511 1512 ccdunlock(cs); 1513 1514 if (error) 1515 return (error); 1516 break; 1517 1518 case DIOCWLABEL: 1519 if ((cs->sc_flags & CCDF_INITED) == 0) 1520 return (ENXIO); 1521 1522 if ((flag & FWRITE) == 0) 1523 return (EBADF); 1524 if (*(int *)data != 0) 1525 cs->sc_flags |= CCDF_WLABEL; 1526 else 1527 cs->sc_flags &= ~CCDF_WLABEL; 1528 break; 1529 1530 default: 1531 return (ENOTTY); 1532 } 1533 1534 return (0); 1535 } 1536 1537 static int 1538 ccdsize(dev) 1539 dev_t dev; 1540 { 1541 struct ccd_softc *cs; 1542 int part, size; 1543 1544 if (ccdopen(dev, 0, S_IFBLK, curproc)) 1545 return (-1); 1546 1547 cs = &ccd_softc[ccdunit(dev)]; 1548 part = ccdpart(dev); 1549 1550 if ((cs->sc_flags & CCDF_INITED) == 0) 1551 return (-1); 1552 1553 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1554 size = -1; 1555 else 1556 size = cs->sc_label.d_partitions[part].p_size; 1557 1558 if (ccdclose(dev, 0, S_IFBLK, curproc)) 1559 return (-1); 1560 1561 return (size); 1562 } 1563 1564 static int 1565 ccddump(dev) 1566 dev_t dev; 1567 { 1568 1569 /* Not implemented. */ 1570 return ENXIO; 1571 } 1572 1573 /* 1574 * Lookup the provided name in the filesystem. If the file exists, 1575 * is a valid block device, and isn't being used by anyone else, 1576 * set *vpp to the file's vnode. 1577 */ 1578 static int 1579 ccdlookup(path, p, vpp) 1580 char *path; 1581 struct proc *p; 1582 struct vnode **vpp; /* result */ 1583 { 1584 struct nameidata nd; 1585 struct vnode *vp; 1586 struct vattr va; 1587 int error; 1588 1589 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1590 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 1591 #ifdef DEBUG 1592 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1593 printf("ccdlookup: vn_open error = %d\n", error); 1594 #endif 1595 return (error); 1596 } 1597 vp = nd.ni_vp; 1598 1599 if (vp->v_usecount > 1) { 1600 VOP_UNLOCK(vp, 0, p); 1601 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1602 return (EBUSY); 1603 } 1604 1605 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 1606 #ifdef DEBUG 1607 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1608 printf("ccdlookup: getattr error = %d\n", error); 1609 #endif 1610 VOP_UNLOCK(vp, 0, p); 1611 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1612 return (error); 1613 } 1614 1615 /* XXX: eventually we should handle VREG, too. */ 1616 if (va.va_type != VBLK) { 1617 VOP_UNLOCK(vp, 0, p); 1618 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1619 return (ENOTBLK); 1620 } 1621 1622 #ifdef DEBUG 1623 if (ccddebug & CCDB_VNODE) 1624 vprint("ccdlookup: vnode info", vp); 1625 #endif 1626 1627 VOP_UNLOCK(vp, 0, p); 1628 *vpp = vp; 1629 return (0); 1630 } 1631 1632 /* 1633 * Read the disklabel from the ccd. If one is not present, fake one 1634 * up. 1635 */ 1636 static void 1637 ccdgetdisklabel(dev) 1638 dev_t dev; 1639 { 1640 int unit = ccdunit(dev); 1641 struct ccd_softc *cs = &ccd_softc[unit]; 1642 char *errstring; 1643 struct disklabel *lp = &cs->sc_label; 1644 struct ccdgeom *ccg = &cs->sc_geom; 1645 1646 bzero(lp, sizeof(*lp)); 1647 1648 lp->d_secperunit = cs->sc_size; 1649 lp->d_secsize = ccg->ccg_secsize; 1650 lp->d_nsectors = ccg->ccg_nsectors; 1651 lp->d_ntracks = ccg->ccg_ntracks; 1652 lp->d_ncylinders = ccg->ccg_ncylinders; 1653 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1654 1655 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1656 lp->d_type = DTYPE_CCD; 1657 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1658 lp->d_rpm = 3600; 1659 lp->d_interleave = 1; 1660 lp->d_flags = 0; 1661 1662 lp->d_partitions[RAW_PART].p_offset = 0; 1663 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1664 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1665 lp->d_npartitions = RAW_PART + 1; 1666 1667 lp->d_bbsize = BBSIZE; /* XXX */ 1668 lp->d_sbsize = SBSIZE; /* XXX */ 1669 1670 lp->d_magic = DISKMAGIC; 1671 lp->d_magic2 = DISKMAGIC; 1672 lp->d_checksum = dkcksum(&cs->sc_label); 1673 1674 /* 1675 * Call the generic disklabel extraction routine. 1676 */ 1677 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1678 if (errstring != NULL) 1679 ccdmakedisklabel(cs); 1680 1681 #ifdef DEBUG 1682 /* It's actually extremely common to have unlabeled ccds. */ 1683 if (ccddebug & CCDB_LABEL) 1684 if (errstring != NULL) 1685 printf("ccd%d: %s\n", unit, errstring); 1686 #endif 1687 } 1688 1689 /* 1690 * Take care of things one might want to take care of in the event 1691 * that a disklabel isn't present. 1692 */ 1693 static void 1694 ccdmakedisklabel(cs) 1695 struct ccd_softc *cs; 1696 { 1697 struct disklabel *lp = &cs->sc_label; 1698 1699 /* 1700 * For historical reasons, if there's no disklabel present 1701 * the raw partition must be marked FS_BSDFFS. 1702 */ 1703 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1704 1705 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1706 } 1707 1708 /* 1709 * Wait interruptibly for an exclusive lock. 1710 * 1711 * XXX 1712 * Several drivers do this; it should be abstracted and made MP-safe. 1713 */ 1714 static int 1715 ccdlock(cs) 1716 struct ccd_softc *cs; 1717 { 1718 int error; 1719 1720 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1721 cs->sc_flags |= CCDF_WANTED; 1722 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1723 return (error); 1724 } 1725 cs->sc_flags |= CCDF_LOCKED; 1726 return (0); 1727 } 1728 1729 /* 1730 * Unlock and wake up any waiters. 1731 */ 1732 static void 1733 ccdunlock(cs) 1734 struct ccd_softc *cs; 1735 { 1736 1737 cs->sc_flags &= ~CCDF_LOCKED; 1738 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1739 cs->sc_flags &= ~CCDF_WANTED; 1740 wakeup(cs); 1741 } 1742 } 1743 1744 #ifdef DEBUG 1745 static void 1746 printiinfo(ii) 1747 struct ccdiinfo *ii; 1748 { 1749 int ix, i; 1750 1751 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1752 printf(" itab[%d]: #dk %d sblk %d soff %d", 1753 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1754 for (i = 0; i < ii->ii_ndisk; i++) 1755 printf(" %d", ii->ii_index[i]); 1756 printf("\n"); 1757 } 1758 } 1759 #endif 1760 1761 #endif /* NCCD > 0 */ 1762 1763 /* Local Variables: */ 1764 /* c-argdecl-indent: 8 */ 1765 /* c-continued-statement-offset: 8 */ 1766 /* c-indent-level: 8 */ 1767 /* End: */ 1768