1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include "ccd.h" 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/kernel.h> 95 #include <sys/module.h> 96 #include <sys/proc.h> 97 #include <sys/buf.h> 98 #include <sys/malloc.h> 99 #include <sys/namei.h> 100 #include <sys/conf.h> 101 #include <sys/stat.h> 102 #include <sys/sysctl.h> 103 #include <sys/disklabel.h> 104 #include <ufs/ffs/fs.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 109 #include <sys/ccdvar.h> 110 111 112 #if defined(CCDDEBUG) && !defined(DEBUG) 113 #define DEBUG 114 #endif 115 116 #ifdef DEBUG 117 #define CCDB_FOLLOW 0x01 118 #define CCDB_INIT 0x02 119 #define CCDB_IO 0x04 120 #define CCDB_LABEL 0x08 121 #define CCDB_VNODE 0x10 122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 123 CCDB_VNODE; 124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 125 #undef DEBUG 126 #endif 127 128 #define ccdunit(x) dkunit(x) 129 #define ccdpart(x) dkpart(x) 130 131 /* 132 This is how mirroring works (only writes are special): 133 134 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 135 linked together by the cb_mirror field. "cb_pflags & 136 CCDPF_MIRROR_DONE" is set to 0 on both of them. 137 138 When a component returns to ccdiodone(), it checks if "cb_pflags & 139 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 140 flag and returns. If it is, it means its partner has already 141 returned, so it will go to the regular cleanup. 142 143 */ 144 145 struct ccdbuf { 146 struct bio cb_buf; /* new I/O buf */ 147 struct bio *cb_obp; /* ptr. to original I/O buf */ 148 struct ccdbuf *cb_freenext; /* free list link */ 149 int cb_unit; /* target unit */ 150 int cb_comp; /* target component */ 151 int cb_pflags; /* mirror/parity status flag */ 152 struct ccdbuf *cb_mirror; /* mirror counterpart */ 153 }; 154 155 /* bits in cb_pflags */ 156 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 157 158 #define CCDLABELDEV(dev) \ 159 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 160 161 static d_open_t ccdopen; 162 static d_close_t ccdclose; 163 static d_strategy_t ccdstrategy; 164 static d_ioctl_t ccdioctl; 165 static d_dump_t ccddump; 166 static d_psize_t ccdsize; 167 168 #define NCCDFREEHIWAT 16 169 170 #define CDEV_MAJOR 74 171 #define BDEV_MAJOR 21 172 173 static struct cdevsw ccd_cdevsw = { 174 /* open */ ccdopen, 175 /* close */ ccdclose, 176 /* read */ physread, 177 /* write */ physwrite, 178 /* ioctl */ ccdioctl, 179 /* poll */ nopoll, 180 /* mmap */ nommap, 181 /* strategy */ ccdstrategy, 182 /* name */ "ccd", 183 /* maj */ CDEV_MAJOR, 184 /* dump */ ccddump, 185 /* psize */ ccdsize, 186 /* flags */ D_DISK, 187 /* bmaj */ BDEV_MAJOR 188 }; 189 190 /* called during module initialization */ 191 static void ccdattach __P((void)); 192 static int ccd_modevent __P((module_t, int, void *)); 193 194 /* called by biodone() at interrupt time */ 195 static void ccdiodone __P((struct bio *bp)); 196 197 static void ccdstart __P((struct ccd_softc *, struct bio *)); 198 static void ccdinterleave __P((struct ccd_softc *, int)); 199 static void ccdintr __P((struct ccd_softc *, struct bio *)); 200 static int ccdinit __P((struct ccddevice *, char **, struct proc *)); 201 static int ccdlookup __P((char *, struct proc *p, struct vnode **)); 202 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 203 struct bio *, daddr_t, caddr_t, long)); 204 static void ccdgetdisklabel __P((dev_t)); 205 static void ccdmakedisklabel __P((struct ccd_softc *)); 206 static int ccdlock __P((struct ccd_softc *)); 207 static void ccdunlock __P((struct ccd_softc *)); 208 209 #ifdef DEBUG 210 static void printiinfo __P((struct ccdiinfo *)); 211 #endif 212 213 /* Non-private for the benefit of libkvm. */ 214 struct ccd_softc *ccd_softc; 215 struct ccddevice *ccddevs; 216 struct ccdbuf *ccdfreebufs; 217 static int numccdfreebufs; 218 static int numccd = 0; 219 220 /* 221 * getccdbuf() - Allocate and zero a ccd buffer. 222 * 223 * This routine is called at splbio(). 224 */ 225 226 static __inline 227 struct ccdbuf * 228 getccdbuf(struct ccdbuf *cpy) 229 { 230 struct ccdbuf *cbp; 231 232 /* 233 * Allocate from freelist or malloc as necessary 234 */ 235 if ((cbp = ccdfreebufs) != NULL) { 236 ccdfreebufs = cbp->cb_freenext; 237 --numccdfreebufs; 238 } else { 239 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 240 } 241 242 /* 243 * Used by mirroring code 244 */ 245 if (cpy) 246 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 247 else 248 bzero(cbp, sizeof(struct ccdbuf)); 249 250 /* 251 * independant struct bio initialization 252 */ 253 254 return(cbp); 255 } 256 257 /* 258 * putccdbuf() - Free a ccd buffer. 259 * 260 * This routine is called at splbio(). 261 */ 262 263 static __inline 264 void 265 putccdbuf(struct ccdbuf *cbp) 266 { 267 268 if (numccdfreebufs < NCCDFREEHIWAT) { 269 cbp->cb_freenext = ccdfreebufs; 270 ccdfreebufs = cbp; 271 ++numccdfreebufs; 272 } else { 273 free((caddr_t)cbp, M_DEVBUF); 274 } 275 } 276 277 278 /* 279 * Number of blocks to untouched in front of a component partition. 280 * This is to avoid violating its disklabel area when it starts at the 281 * beginning of the slice. 282 */ 283 #if !defined(CCD_OFFSET) 284 #define CCD_OFFSET 16 285 #endif 286 287 /* 288 * Called by main() during pseudo-device attachment. All we need 289 * to do is allocate enough space for devices to be configured later, and 290 * add devsw entries. 291 */ 292 static void 293 ccdattach() 294 { 295 int i; 296 int num = NCCD; 297 298 if (num > 1) 299 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 300 else 301 printf("ccd0: Concatenated disk driver\n"); 302 303 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 304 M_DEVBUF, M_NOWAIT); 305 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 306 M_DEVBUF, M_NOWAIT); 307 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 308 printf("WARNING: no memory for concatenated disks\n"); 309 if (ccd_softc != NULL) 310 free(ccd_softc, M_DEVBUF); 311 if (ccddevs != NULL) 312 free(ccddevs, M_DEVBUF); 313 return; 314 } 315 numccd = num; 316 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 317 bzero(ccddevs, num * sizeof(struct ccddevice)); 318 319 cdevsw_add(&ccd_cdevsw); 320 /* XXX: is this necessary? */ 321 for (i = 0; i < numccd; ++i) 322 ccddevs[i].ccd_dk = -1; 323 } 324 325 static int 326 ccd_modevent(mod, type, data) 327 module_t mod; 328 int type; 329 void *data; 330 { 331 int error = 0; 332 333 switch (type) { 334 case MOD_LOAD: 335 ccdattach(); 336 break; 337 338 case MOD_UNLOAD: 339 printf("ccd0: Unload not supported!\n"); 340 error = EOPNOTSUPP; 341 break; 342 343 default: /* MOD_SHUTDOWN etc */ 344 break; 345 } 346 return (error); 347 } 348 349 DEV_MODULE(ccd, ccd_modevent, NULL); 350 351 static int 352 ccdinit(ccd, cpaths, p) 353 struct ccddevice *ccd; 354 char **cpaths; 355 struct proc *p; 356 { 357 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 358 struct ccdcinfo *ci = NULL; /* XXX */ 359 size_t size; 360 int ix; 361 struct vnode *vp; 362 size_t minsize; 363 int maxsecsize; 364 struct partinfo dpart; 365 struct ccdgeom *ccg = &cs->sc_geom; 366 char tmppath[MAXPATHLEN]; 367 int error = 0; 368 369 #ifdef DEBUG 370 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 371 printf("ccdinit: unit %d\n", ccd->ccd_unit); 372 #endif 373 374 cs->sc_size = 0; 375 cs->sc_ileave = ccd->ccd_interleave; 376 cs->sc_nccdisks = ccd->ccd_ndev; 377 378 /* Allocate space for the component info. */ 379 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 380 M_DEVBUF, M_WAITOK); 381 382 /* 383 * Verify that each component piece exists and record 384 * relevant information about it. 385 */ 386 maxsecsize = 0; 387 minsize = 0; 388 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 389 vp = ccd->ccd_vpp[ix]; 390 ci = &cs->sc_cinfo[ix]; 391 ci->ci_vp = vp; 392 393 /* 394 * Copy in the pathname of the component. 395 */ 396 bzero(tmppath, sizeof(tmppath)); /* sanity */ 397 if ((error = copyinstr(cpaths[ix], tmppath, 398 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 399 #ifdef DEBUG 400 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 401 printf("ccd%d: can't copy path, error = %d\n", 402 ccd->ccd_unit, error); 403 #endif 404 goto fail; 405 } 406 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 407 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 408 409 ci->ci_dev = vn_todev(vp); 410 411 /* 412 * Get partition information for the component. 413 */ 414 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 415 FREAD, p->p_ucred, p)) != 0) { 416 #ifdef DEBUG 417 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 418 printf("ccd%d: %s: ioctl failed, error = %d\n", 419 ccd->ccd_unit, ci->ci_path, error); 420 #endif 421 goto fail; 422 } 423 if (dpart.part->p_fstype == FS_BSDFFS) { 424 maxsecsize = 425 ((dpart.disklab->d_secsize > maxsecsize) ? 426 dpart.disklab->d_secsize : maxsecsize); 427 size = dpart.part->p_size - CCD_OFFSET; 428 } else { 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 printf("ccd%d: %s: incorrect partition type\n", 432 ccd->ccd_unit, ci->ci_path); 433 #endif 434 error = EFTYPE; 435 goto fail; 436 } 437 438 /* 439 * Calculate the size, truncating to an interleave 440 * boundary if necessary. 441 */ 442 443 if (cs->sc_ileave > 1) 444 size -= size % cs->sc_ileave; 445 446 if (size == 0) { 447 #ifdef DEBUG 448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 449 printf("ccd%d: %s: size == 0\n", 450 ccd->ccd_unit, ci->ci_path); 451 #endif 452 error = ENODEV; 453 goto fail; 454 } 455 456 if (minsize == 0 || size < minsize) 457 minsize = size; 458 ci->ci_size = size; 459 cs->sc_size += size; 460 } 461 462 /* 463 * Don't allow the interleave to be smaller than 464 * the biggest component sector. 465 */ 466 if ((cs->sc_ileave > 0) && 467 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 468 #ifdef DEBUG 469 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 470 printf("ccd%d: interleave must be at least %d\n", 471 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 472 #endif 473 error = EINVAL; 474 goto fail; 475 } 476 477 /* 478 * If uniform interleave is desired set all sizes to that of 479 * the smallest component. This will guarentee that a single 480 * interleave table is generated. 481 * 482 * Lost space must be taken into account when calculating the 483 * overall size. Half the space is lost when CCDF_MIRROR is 484 * specified. One disk is lost when CCDF_PARITY is specified. 485 */ 486 if (ccd->ccd_flags & CCDF_UNIFORM) { 487 for (ci = cs->sc_cinfo; 488 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 489 ci->ci_size = minsize; 490 } 491 if (ccd->ccd_flags & CCDF_MIRROR) { 492 /* 493 * Check to see if an even number of components 494 * have been specified. The interleave must also 495 * be non-zero in order for us to be able to 496 * guarentee the topology. 497 */ 498 if (cs->sc_nccdisks % 2) { 499 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 500 error = EINVAL; 501 goto fail; 502 } 503 if (cs->sc_ileave == 0) { 504 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 505 error = EINVAL; 506 goto fail; 507 } 508 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 509 } else if (ccd->ccd_flags & CCDF_PARITY) { 510 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 511 } else { 512 if (cs->sc_ileave == 0) { 513 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 514 error = EINVAL; 515 goto fail; 516 } 517 cs->sc_size = cs->sc_nccdisks * minsize; 518 } 519 } 520 521 /* 522 * Construct the interleave table. 523 */ 524 ccdinterleave(cs, ccd->ccd_unit); 525 526 /* 527 * Create pseudo-geometry based on 1MB cylinders. It's 528 * pretty close. 529 */ 530 ccg->ccg_secsize = maxsecsize; 531 ccg->ccg_ntracks = 1; 532 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 533 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 534 535 /* 536 * Add an devstat entry for this device. 537 */ 538 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 539 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 540 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 541 DEVSTAT_PRIORITY_ARRAY); 542 543 cs->sc_flags |= CCDF_INITED; 544 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 545 cs->sc_unit = ccd->ccd_unit; 546 return (0); 547 fail: 548 while (ci > cs->sc_cinfo) { 549 ci--; 550 free(ci->ci_path, M_DEVBUF); 551 } 552 free(cs->sc_cinfo, M_DEVBUF); 553 return (error); 554 } 555 556 static void 557 ccdinterleave(cs, unit) 558 struct ccd_softc *cs; 559 int unit; 560 { 561 struct ccdcinfo *ci, *smallci; 562 struct ccdiinfo *ii; 563 daddr_t bn, lbn; 564 int ix; 565 u_long size; 566 567 #ifdef DEBUG 568 if (ccddebug & CCDB_INIT) 569 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 570 #endif 571 572 /* 573 * Allocate an interleave table. The worst case occurs when each 574 * of N disks is of a different size, resulting in N interleave 575 * tables. 576 * 577 * Chances are this is too big, but we don't care. 578 */ 579 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 580 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 581 bzero((caddr_t)cs->sc_itable, size); 582 583 /* 584 * Trivial case: no interleave (actually interleave of disk size). 585 * Each table entry represents a single component in its entirety. 586 * 587 * An interleave of 0 may not be used with a mirror or parity setup. 588 */ 589 if (cs->sc_ileave == 0) { 590 bn = 0; 591 ii = cs->sc_itable; 592 593 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 594 /* Allocate space for ii_index. */ 595 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 596 ii->ii_ndisk = 1; 597 ii->ii_startblk = bn; 598 ii->ii_startoff = 0; 599 ii->ii_index[0] = ix; 600 bn += cs->sc_cinfo[ix].ci_size; 601 ii++; 602 } 603 ii->ii_ndisk = 0; 604 #ifdef DEBUG 605 if (ccddebug & CCDB_INIT) 606 printiinfo(cs->sc_itable); 607 #endif 608 return; 609 } 610 611 /* 612 * The following isn't fast or pretty; it doesn't have to be. 613 */ 614 size = 0; 615 bn = lbn = 0; 616 for (ii = cs->sc_itable; ; ii++) { 617 /* 618 * Allocate space for ii_index. We might allocate more then 619 * we use. 620 */ 621 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 622 M_DEVBUF, M_WAITOK); 623 624 /* 625 * Locate the smallest of the remaining components 626 */ 627 smallci = NULL; 628 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 629 ci++) { 630 if (ci->ci_size > size && 631 (smallci == NULL || 632 ci->ci_size < smallci->ci_size)) { 633 smallci = ci; 634 } 635 } 636 637 /* 638 * Nobody left, all done 639 */ 640 if (smallci == NULL) { 641 ii->ii_ndisk = 0; 642 break; 643 } 644 645 /* 646 * Record starting logical block using an sc_ileave blocksize. 647 */ 648 ii->ii_startblk = bn / cs->sc_ileave; 649 650 /* 651 * Record starting comopnent block using an sc_ileave 652 * blocksize. This value is relative to the beginning of 653 * a component disk. 654 */ 655 ii->ii_startoff = lbn; 656 657 /* 658 * Determine how many disks take part in this interleave 659 * and record their indices. 660 */ 661 ix = 0; 662 for (ci = cs->sc_cinfo; 663 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 664 if (ci->ci_size >= smallci->ci_size) { 665 ii->ii_index[ix++] = ci - cs->sc_cinfo; 666 } 667 } 668 ii->ii_ndisk = ix; 669 bn += ix * (smallci->ci_size - size); 670 lbn = smallci->ci_size / cs->sc_ileave; 671 size = smallci->ci_size; 672 } 673 #ifdef DEBUG 674 if (ccddebug & CCDB_INIT) 675 printiinfo(cs->sc_itable); 676 #endif 677 } 678 679 /* ARGSUSED */ 680 static int 681 ccdopen(dev, flags, fmt, p) 682 dev_t dev; 683 int flags, fmt; 684 struct proc *p; 685 { 686 int unit = ccdunit(dev); 687 struct ccd_softc *cs; 688 struct disklabel *lp; 689 int error = 0, part, pmask; 690 691 #ifdef DEBUG 692 if (ccddebug & CCDB_FOLLOW) 693 printf("ccdopen(%x, %x)\n", dev, flags); 694 #endif 695 if (unit >= numccd) 696 return (ENXIO); 697 cs = &ccd_softc[unit]; 698 699 if ((error = ccdlock(cs)) != 0) 700 return (error); 701 702 lp = &cs->sc_label; 703 704 part = ccdpart(dev); 705 pmask = (1 << part); 706 707 /* 708 * If we're initialized, check to see if there are any other 709 * open partitions. If not, then it's safe to update 710 * the in-core disklabel. 711 */ 712 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 713 ccdgetdisklabel(dev); 714 715 /* Check that the partition exists. */ 716 if (part != RAW_PART && ((part >= lp->d_npartitions) || 717 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 718 error = ENXIO; 719 goto done; 720 } 721 722 cs->sc_openmask |= pmask; 723 done: 724 ccdunlock(cs); 725 return (0); 726 } 727 728 /* ARGSUSED */ 729 static int 730 ccdclose(dev, flags, fmt, p) 731 dev_t dev; 732 int flags, fmt; 733 struct proc *p; 734 { 735 int unit = ccdunit(dev); 736 struct ccd_softc *cs; 737 int error = 0, part; 738 739 #ifdef DEBUG 740 if (ccddebug & CCDB_FOLLOW) 741 printf("ccdclose(%x, %x)\n", dev, flags); 742 #endif 743 744 if (unit >= numccd) 745 return (ENXIO); 746 cs = &ccd_softc[unit]; 747 748 if ((error = ccdlock(cs)) != 0) 749 return (error); 750 751 part = ccdpart(dev); 752 753 /* ...that much closer to allowing unconfiguration... */ 754 cs->sc_openmask &= ~(1 << part); 755 ccdunlock(cs); 756 return (0); 757 } 758 759 static void 760 ccdstrategy(bp) 761 struct bio *bp; 762 { 763 int unit = ccdunit(bp->bio_dev); 764 struct ccd_softc *cs = &ccd_softc[unit]; 765 int s; 766 int wlabel; 767 struct disklabel *lp; 768 769 #ifdef DEBUG 770 if (ccddebug & CCDB_FOLLOW) 771 printf("ccdstrategy(%x): unit %d\n", bp, unit); 772 #endif 773 if ((cs->sc_flags & CCDF_INITED) == 0) { 774 bp->bio_error = ENXIO; 775 bp->bio_flags |= BIO_ERROR; 776 goto done; 777 } 778 779 /* If it's a nil transfer, wake up the top half now. */ 780 if (bp->bio_bcount == 0) 781 goto done; 782 783 lp = &cs->sc_label; 784 785 /* 786 * Do bounds checking and adjust transfer. If there's an 787 * error, the bounds check will flag that for us. 788 */ 789 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 790 if (ccdpart(bp->bio_dev) != RAW_PART) { 791 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 792 goto done; 793 } else { 794 int pbn; /* in sc_secsize chunks */ 795 long sz; /* in sc_secsize chunks */ 796 797 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 798 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 799 800 /* 801 * If out of bounds return an error. If at the EOF point, 802 * simply read or write less. 803 */ 804 805 if (pbn < 0 || pbn >= cs->sc_size) { 806 bp->bio_resid = bp->bio_bcount; 807 if (pbn != cs->sc_size) { 808 bp->bio_error = EINVAL; 809 bp->bio_flags |= BIO_ERROR; 810 } 811 goto done; 812 } 813 814 /* 815 * If the request crosses EOF, truncate the request. 816 */ 817 if (pbn + sz > cs->sc_size) { 818 bp->bio_bcount = (cs->sc_size - pbn) * 819 cs->sc_geom.ccg_secsize; 820 } 821 } 822 823 bp->bio_resid = bp->bio_bcount; 824 825 /* 826 * "Start" the unit. 827 */ 828 s = splbio(); 829 ccdstart(cs, bp); 830 splx(s); 831 return; 832 done: 833 biodone(bp); 834 } 835 836 static void 837 ccdstart(cs, bp) 838 struct ccd_softc *cs; 839 struct bio *bp; 840 { 841 long bcount, rcount; 842 struct ccdbuf *cbp[4]; 843 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 844 caddr_t addr; 845 daddr_t bn; 846 struct partition *pp; 847 848 #ifdef DEBUG 849 if (ccddebug & CCDB_FOLLOW) 850 printf("ccdstart(%x, %x)\n", cs, bp); 851 #endif 852 853 /* Record the transaction start */ 854 devstat_start_transaction(&cs->device_stats); 855 856 /* 857 * Translate the partition-relative block number to an absolute. 858 */ 859 bn = bp->bio_blkno; 860 if (ccdpart(bp->bio_dev) != RAW_PART) { 861 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 862 bn += pp->p_offset; 863 } 864 865 /* 866 * Allocate component buffers and fire off the requests 867 */ 868 addr = bp->bio_data; 869 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 870 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 871 rcount = cbp[0]->cb_buf.bio_bcount; 872 873 if (cs->sc_cflags & CCDF_MIRROR) { 874 /* 875 * Mirroring. Writes go to both disks, reads are 876 * taken from whichever disk seems most appropriate. 877 * 878 * We attempt to localize reads to the disk whos arm 879 * is nearest the read request. We ignore seeks due 880 * to writes when making this determination and we 881 * also try to avoid hogging. 882 */ 883 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 884 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 885 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 886 } else { 887 int pick = cs->sc_pick; 888 daddr_t range = cs->sc_size / 16; 889 890 if (bn < cs->sc_blk[pick] - range || 891 bn > cs->sc_blk[pick] + range 892 ) { 893 cs->sc_pick = pick = 1 - pick; 894 } 895 cs->sc_blk[pick] = bn + btodb(rcount); 896 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 897 } 898 } else { 899 /* 900 * Not mirroring 901 */ 902 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 903 } 904 bn += btodb(rcount); 905 addr += rcount; 906 } 907 } 908 909 /* 910 * Build a component buffer header. 911 */ 912 static void 913 ccdbuffer(cb, cs, bp, bn, addr, bcount) 914 struct ccdbuf **cb; 915 struct ccd_softc *cs; 916 struct bio *bp; 917 daddr_t bn; 918 caddr_t addr; 919 long bcount; 920 { 921 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 922 struct ccdbuf *cbp; 923 daddr_t cbn, cboff; 924 off_t cbc; 925 926 #ifdef DEBUG 927 if (ccddebug & CCDB_IO) 928 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 929 cs, bp, bn, addr, bcount); 930 #endif 931 /* 932 * Determine which component bn falls in. 933 */ 934 cbn = bn; 935 cboff = 0; 936 937 if (cs->sc_ileave == 0) { 938 /* 939 * Serially concatenated and neither a mirror nor a parity 940 * config. This is a special case. 941 */ 942 daddr_t sblk; 943 944 sblk = 0; 945 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 946 sblk += ci->ci_size; 947 cbn -= sblk; 948 } else { 949 struct ccdiinfo *ii; 950 int ccdisk, off; 951 952 /* 953 * Calculate cbn, the logical superblock (sc_ileave chunks), 954 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 955 * to cbn. 956 */ 957 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 958 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 959 960 /* 961 * Figure out which interleave table to use. 962 */ 963 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 964 if (ii->ii_startblk > cbn) 965 break; 966 } 967 ii--; 968 969 /* 970 * off is the logical superblock relative to the beginning 971 * of this interleave block. 972 */ 973 off = cbn - ii->ii_startblk; 974 975 /* 976 * We must calculate which disk component to use (ccdisk), 977 * and recalculate cbn to be the superblock relative to 978 * the beginning of the component. This is typically done by 979 * adding 'off' and ii->ii_startoff together. However, 'off' 980 * must typically be divided by the number of components in 981 * this interleave array to be properly convert it from a 982 * CCD-relative logical superblock number to a 983 * component-relative superblock number. 984 */ 985 if (ii->ii_ndisk == 1) { 986 /* 987 * When we have just one disk, it can't be a mirror 988 * or a parity config. 989 */ 990 ccdisk = ii->ii_index[0]; 991 cbn = ii->ii_startoff + off; 992 } else { 993 if (cs->sc_cflags & CCDF_MIRROR) { 994 /* 995 * We have forced a uniform mapping, resulting 996 * in a single interleave array. We double 997 * up on the first half of the available 998 * components and our mirror is in the second 999 * half. This only works with a single 1000 * interleave array because doubling up 1001 * doubles the number of sectors, so there 1002 * cannot be another interleave array because 1003 * the next interleave array's calculations 1004 * would be off. 1005 */ 1006 int ndisk2 = ii->ii_ndisk / 2; 1007 ccdisk = ii->ii_index[off % ndisk2]; 1008 cbn = ii->ii_startoff + off / ndisk2; 1009 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1010 } else if (cs->sc_cflags & CCDF_PARITY) { 1011 /* 1012 * XXX not implemented yet 1013 */ 1014 int ndisk2 = ii->ii_ndisk - 1; 1015 ccdisk = ii->ii_index[off % ndisk2]; 1016 cbn = ii->ii_startoff + off / ndisk2; 1017 if (cbn % ii->ii_ndisk <= ccdisk) 1018 ccdisk++; 1019 } else { 1020 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1021 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1022 } 1023 } 1024 1025 ci = &cs->sc_cinfo[ccdisk]; 1026 1027 /* 1028 * Convert cbn from a superblock to a normal block so it 1029 * can be used to calculate (along with cboff) the normal 1030 * block index into this particular disk. 1031 */ 1032 cbn *= cs->sc_ileave; 1033 } 1034 1035 /* 1036 * Fill in the component buf structure. 1037 */ 1038 cbp = getccdbuf(NULL); 1039 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1040 cbp->cb_buf.bio_done = ccdiodone; 1041 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1042 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1043 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1044 cbp->cb_buf.bio_data = addr; 1045 if (cs->sc_ileave == 0) 1046 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1047 else 1048 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1049 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1050 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1051 1052 /* 1053 * context for ccdiodone 1054 */ 1055 cbp->cb_obp = bp; 1056 cbp->cb_unit = cs - ccd_softc; 1057 cbp->cb_comp = ci - cs->sc_cinfo; 1058 1059 #ifdef DEBUG 1060 if (ccddebug & CCDB_IO) 1061 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1062 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno, 1063 cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); 1064 #endif 1065 cb[0] = cbp; 1066 1067 /* 1068 * Note: both I/O's setup when reading from mirror, but only one 1069 * will be executed. 1070 */ 1071 if (cs->sc_cflags & CCDF_MIRROR) { 1072 /* mirror, setup second I/O */ 1073 cbp = getccdbuf(cb[0]); 1074 cbp->cb_buf.bio_dev = ci2->ci_dev; 1075 cbp->cb_comp = ci2 - cs->sc_cinfo; 1076 cb[1] = cbp; 1077 /* link together the ccdbuf's and clear "mirror done" flag */ 1078 cb[0]->cb_mirror = cb[1]; 1079 cb[1]->cb_mirror = cb[0]; 1080 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1081 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1082 } 1083 } 1084 1085 static void 1086 ccdintr(cs, bp) 1087 struct ccd_softc *cs; 1088 struct bio *bp; 1089 { 1090 #ifdef DEBUG 1091 if (ccddebug & CCDB_FOLLOW) 1092 printf("ccdintr(%x, %x)\n", cs, bp); 1093 #endif 1094 /* 1095 * Request is done for better or worse, wakeup the top half. 1096 */ 1097 if (bp->bio_flags & BIO_ERROR) 1098 bp->bio_resid = bp->bio_bcount; 1099 devstat_end_transaction_bio(&cs->device_stats, bp); 1100 biodone(bp); 1101 } 1102 1103 /* 1104 * Called at interrupt time. 1105 * Mark the component as done and if all components are done, 1106 * take a ccd interrupt. 1107 */ 1108 static void 1109 ccdiodone(ibp) 1110 struct bio *ibp; 1111 { 1112 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1113 struct bio *bp = cbp->cb_obp; 1114 int unit = cbp->cb_unit; 1115 int count, s; 1116 1117 s = splbio(); 1118 #ifdef DEBUG 1119 if (ccddebug & CCDB_FOLLOW) 1120 printf("ccdiodone(%x)\n", cbp); 1121 if (ccddebug & CCDB_IO) { 1122 printf("ccdiodone: bp %x bcount %d resid %d\n", 1123 bp, bp->bio_bcount, bp->bio_resid); 1124 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1125 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1126 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1127 cbp->cb_buf.bio_bcount); 1128 } 1129 #endif 1130 /* 1131 * If an error occured, report it. If this is a mirrored 1132 * configuration and the first of two possible reads, do not 1133 * set the error in the bp yet because the second read may 1134 * succeed. 1135 */ 1136 1137 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1138 const char *msg = ""; 1139 1140 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1141 (cbp->cb_buf.bio_cmd == BIO_READ) && 1142 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1143 /* 1144 * We will try our read on the other disk down 1145 * below, also reverse the default pick so if we 1146 * are doing a scan we do not keep hitting the 1147 * bad disk first. 1148 */ 1149 struct ccd_softc *cs = &ccd_softc[unit]; 1150 1151 msg = ", trying other disk"; 1152 cs->sc_pick = 1 - cs->sc_pick; 1153 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1154 } else { 1155 bp->bio_flags |= BIO_ERROR; 1156 bp->bio_error = cbp->cb_buf.bio_error ? 1157 cbp->cb_buf.bio_error : EIO; 1158 } 1159 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1160 unit, bp->bio_error, cbp->cb_comp, 1161 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1162 } 1163 1164 /* 1165 * Process mirror. If we are writing, I/O has been initiated on both 1166 * buffers and we fall through only after both are finished. 1167 * 1168 * If we are reading only one I/O is initiated at a time. If an 1169 * error occurs we initiate the second I/O and return, otherwise 1170 * we free the second I/O without initiating it. 1171 */ 1172 1173 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1174 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1175 /* 1176 * When writing, handshake with the second buffer 1177 * to determine when both are done. If both are not 1178 * done, return here. 1179 */ 1180 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1181 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1182 putccdbuf(cbp); 1183 splx(s); 1184 return; 1185 } 1186 } else { 1187 /* 1188 * When reading, either dispose of the second buffer 1189 * or initiate I/O on the second buffer if an error 1190 * occured with this one. 1191 */ 1192 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1193 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1194 cbp->cb_mirror->cb_pflags |= 1195 CCDPF_MIRROR_DONE; 1196 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1197 putccdbuf(cbp); 1198 splx(s); 1199 return; 1200 } else { 1201 putccdbuf(cbp->cb_mirror); 1202 /* fall through */ 1203 } 1204 } 1205 } 1206 } 1207 1208 /* 1209 * use bio_caller1 to determine how big the original request was rather 1210 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1211 * 1212 * XXX We check for an error, but we do not test the resid for an 1213 * aligned EOF condition. This may result in character & block 1214 * device access not recognizing EOF properly when read or written 1215 * sequentially, but will not effect filesystems. 1216 */ 1217 count = (long)cbp->cb_buf.bio_caller1; 1218 putccdbuf(cbp); 1219 1220 /* 1221 * If all done, "interrupt". 1222 */ 1223 bp->bio_resid -= count; 1224 if (bp->bio_resid < 0) 1225 panic("ccdiodone: count"); 1226 if (bp->bio_resid == 0) 1227 ccdintr(&ccd_softc[unit], bp); 1228 splx(s); 1229 } 1230 1231 static int 1232 ccdioctl(dev, cmd, data, flag, p) 1233 dev_t dev; 1234 u_long cmd; 1235 caddr_t data; 1236 int flag; 1237 struct proc *p; 1238 { 1239 int unit = ccdunit(dev); 1240 int i, j, lookedup = 0, error = 0; 1241 int part, pmask, s; 1242 struct ccd_softc *cs; 1243 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1244 struct ccddevice ccd; 1245 char **cpp; 1246 struct vnode **vpp; 1247 1248 if (unit >= numccd) 1249 return (ENXIO); 1250 cs = &ccd_softc[unit]; 1251 1252 bzero(&ccd, sizeof(ccd)); 1253 1254 switch (cmd) { 1255 case CCDIOCSET: 1256 if (cs->sc_flags & CCDF_INITED) 1257 return (EBUSY); 1258 1259 if ((flag & FWRITE) == 0) 1260 return (EBADF); 1261 1262 if ((error = ccdlock(cs)) != 0) 1263 return (error); 1264 1265 /* Fill in some important bits. */ 1266 ccd.ccd_unit = unit; 1267 ccd.ccd_interleave = ccio->ccio_ileave; 1268 if (ccd.ccd_interleave == 0 && 1269 ((ccio->ccio_flags & CCDF_MIRROR) || 1270 (ccio->ccio_flags & CCDF_PARITY))) { 1271 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1272 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1273 } 1274 if ((ccio->ccio_flags & CCDF_MIRROR) && 1275 (ccio->ccio_flags & CCDF_PARITY)) { 1276 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1277 ccio->ccio_flags &= ~CCDF_PARITY; 1278 } 1279 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1280 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1281 printf("ccd%d: mirror/parity forces uniform flag\n", 1282 unit); 1283 ccio->ccio_flags |= CCDF_UNIFORM; 1284 } 1285 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1286 1287 /* 1288 * Allocate space for and copy in the array of 1289 * componet pathnames and device numbers. 1290 */ 1291 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1292 M_DEVBUF, M_WAITOK); 1293 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1294 M_DEVBUF, M_WAITOK); 1295 1296 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1297 ccio->ccio_ndisks * sizeof(char **)); 1298 if (error) { 1299 free(vpp, M_DEVBUF); 1300 free(cpp, M_DEVBUF); 1301 ccdunlock(cs); 1302 return (error); 1303 } 1304 1305 #ifdef DEBUG 1306 if (ccddebug & CCDB_INIT) 1307 for (i = 0; i < ccio->ccio_ndisks; ++i) 1308 printf("ccdioctl: component %d: 0x%x\n", 1309 i, cpp[i]); 1310 #endif 1311 1312 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1313 #ifdef DEBUG 1314 if (ccddebug & CCDB_INIT) 1315 printf("ccdioctl: lookedup = %d\n", lookedup); 1316 #endif 1317 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1318 for (j = 0; j < lookedup; ++j) 1319 (void)vn_close(vpp[j], FREAD|FWRITE, 1320 p->p_ucred, p); 1321 free(vpp, M_DEVBUF); 1322 free(cpp, M_DEVBUF); 1323 ccdunlock(cs); 1324 return (error); 1325 } 1326 ++lookedup; 1327 } 1328 ccd.ccd_cpp = cpp; 1329 ccd.ccd_vpp = vpp; 1330 ccd.ccd_ndev = ccio->ccio_ndisks; 1331 1332 /* 1333 * Initialize the ccd. Fills in the softc for us. 1334 */ 1335 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1336 for (j = 0; j < lookedup; ++j) 1337 (void)vn_close(vpp[j], FREAD|FWRITE, 1338 p->p_ucred, p); 1339 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1340 free(vpp, M_DEVBUF); 1341 free(cpp, M_DEVBUF); 1342 ccdunlock(cs); 1343 return (error); 1344 } 1345 1346 /* 1347 * The ccd has been successfully initialized, so 1348 * we can place it into the array and read the disklabel. 1349 */ 1350 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1351 ccio->ccio_unit = unit; 1352 ccio->ccio_size = cs->sc_size; 1353 ccdgetdisklabel(dev); 1354 1355 ccdunlock(cs); 1356 1357 break; 1358 1359 case CCDIOCCLR: 1360 if ((cs->sc_flags & CCDF_INITED) == 0) 1361 return (ENXIO); 1362 1363 if ((flag & FWRITE) == 0) 1364 return (EBADF); 1365 1366 if ((error = ccdlock(cs)) != 0) 1367 return (error); 1368 1369 /* Don't unconfigure if any other partitions are open */ 1370 part = ccdpart(dev); 1371 pmask = (1 << part); 1372 if ((cs->sc_openmask & ~pmask)) { 1373 ccdunlock(cs); 1374 return (EBUSY); 1375 } 1376 1377 /* 1378 * Free ccd_softc information and clear entry. 1379 */ 1380 1381 /* Close the components and free their pathnames. */ 1382 for (i = 0; i < cs->sc_nccdisks; ++i) { 1383 /* 1384 * XXX: this close could potentially fail and 1385 * cause Bad Things. Maybe we need to force 1386 * the close to happen? 1387 */ 1388 #ifdef DEBUG 1389 if (ccddebug & CCDB_VNODE) 1390 vprint("CCDIOCCLR: vnode info", 1391 cs->sc_cinfo[i].ci_vp); 1392 #endif 1393 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1394 p->p_ucred, p); 1395 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1396 } 1397 1398 /* Free interleave index. */ 1399 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1400 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1401 1402 /* Free component info and interleave table. */ 1403 free(cs->sc_cinfo, M_DEVBUF); 1404 free(cs->sc_itable, M_DEVBUF); 1405 cs->sc_flags &= ~CCDF_INITED; 1406 1407 /* 1408 * Free ccddevice information and clear entry. 1409 */ 1410 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1411 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1412 ccd.ccd_dk = -1; 1413 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1414 1415 /* 1416 * And remove the devstat entry. 1417 */ 1418 devstat_remove_entry(&cs->device_stats); 1419 1420 /* This must be atomic. */ 1421 s = splhigh(); 1422 ccdunlock(cs); 1423 bzero(cs, sizeof(struct ccd_softc)); 1424 splx(s); 1425 1426 break; 1427 1428 case DIOCGDINFO: 1429 if ((cs->sc_flags & CCDF_INITED) == 0) 1430 return (ENXIO); 1431 1432 *(struct disklabel *)data = cs->sc_label; 1433 break; 1434 1435 case DIOCGPART: 1436 if ((cs->sc_flags & CCDF_INITED) == 0) 1437 return (ENXIO); 1438 1439 ((struct partinfo *)data)->disklab = &cs->sc_label; 1440 ((struct partinfo *)data)->part = 1441 &cs->sc_label.d_partitions[ccdpart(dev)]; 1442 break; 1443 1444 case DIOCWDINFO: 1445 case DIOCSDINFO: 1446 if ((cs->sc_flags & CCDF_INITED) == 0) 1447 return (ENXIO); 1448 1449 if ((flag & FWRITE) == 0) 1450 return (EBADF); 1451 1452 if ((error = ccdlock(cs)) != 0) 1453 return (error); 1454 1455 cs->sc_flags |= CCDF_LABELLING; 1456 1457 error = setdisklabel(&cs->sc_label, 1458 (struct disklabel *)data, 0); 1459 if (error == 0) { 1460 if (cmd == DIOCWDINFO) 1461 error = writedisklabel(CCDLABELDEV(dev), 1462 &cs->sc_label); 1463 } 1464 1465 cs->sc_flags &= ~CCDF_LABELLING; 1466 1467 ccdunlock(cs); 1468 1469 if (error) 1470 return (error); 1471 break; 1472 1473 case DIOCWLABEL: 1474 if ((cs->sc_flags & CCDF_INITED) == 0) 1475 return (ENXIO); 1476 1477 if ((flag & FWRITE) == 0) 1478 return (EBADF); 1479 if (*(int *)data != 0) 1480 cs->sc_flags |= CCDF_WLABEL; 1481 else 1482 cs->sc_flags &= ~CCDF_WLABEL; 1483 break; 1484 1485 default: 1486 return (ENOTTY); 1487 } 1488 1489 return (0); 1490 } 1491 1492 static int 1493 ccdsize(dev) 1494 dev_t dev; 1495 { 1496 struct ccd_softc *cs; 1497 int part, size; 1498 1499 if (ccdopen(dev, 0, S_IFCHR, curproc)) 1500 return (-1); 1501 1502 cs = &ccd_softc[ccdunit(dev)]; 1503 part = ccdpart(dev); 1504 1505 if ((cs->sc_flags & CCDF_INITED) == 0) 1506 return (-1); 1507 1508 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1509 size = -1; 1510 else 1511 size = cs->sc_label.d_partitions[part].p_size; 1512 1513 if (ccdclose(dev, 0, S_IFCHR, curproc)) 1514 return (-1); 1515 1516 return (size); 1517 } 1518 1519 static int 1520 ccddump(dev) 1521 dev_t dev; 1522 { 1523 1524 /* Not implemented. */ 1525 return ENXIO; 1526 } 1527 1528 /* 1529 * Lookup the provided name in the filesystem. If the file exists, 1530 * is a valid block device, and isn't being used by anyone else, 1531 * set *vpp to the file's vnode. 1532 */ 1533 static int 1534 ccdlookup(path, p, vpp) 1535 char *path; 1536 struct proc *p; 1537 struct vnode **vpp; /* result */ 1538 { 1539 struct nameidata nd; 1540 struct vnode *vp; 1541 int error; 1542 1543 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1544 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 1545 #ifdef DEBUG 1546 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1547 printf("ccdlookup: vn_open error = %d\n", error); 1548 #endif 1549 return (error); 1550 } 1551 vp = nd.ni_vp; 1552 1553 if (vp->v_usecount > 1) { 1554 error = EBUSY; 1555 goto bad; 1556 } 1557 1558 if (!vn_isdisk(vp, &error)) 1559 goto bad; 1560 1561 #ifdef DEBUG 1562 if (ccddebug & CCDB_VNODE) 1563 vprint("ccdlookup: vnode info", vp); 1564 #endif 1565 1566 VOP_UNLOCK(vp, 0, p); 1567 NDFREE(&nd, NDF_ONLY_PNBUF); 1568 *vpp = vp; 1569 return (0); 1570 bad: 1571 VOP_UNLOCK(vp, 0, p); 1572 NDFREE(&nd, NDF_ONLY_PNBUF); 1573 /* vn_close does vrele() for vp */ 1574 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1575 return (error); 1576 } 1577 1578 /* 1579 * Read the disklabel from the ccd. If one is not present, fake one 1580 * up. 1581 */ 1582 static void 1583 ccdgetdisklabel(dev) 1584 dev_t dev; 1585 { 1586 int unit = ccdunit(dev); 1587 struct ccd_softc *cs = &ccd_softc[unit]; 1588 char *errstring; 1589 struct disklabel *lp = &cs->sc_label; 1590 struct ccdgeom *ccg = &cs->sc_geom; 1591 1592 bzero(lp, sizeof(*lp)); 1593 1594 lp->d_secperunit = cs->sc_size; 1595 lp->d_secsize = ccg->ccg_secsize; 1596 lp->d_nsectors = ccg->ccg_nsectors; 1597 lp->d_ntracks = ccg->ccg_ntracks; 1598 lp->d_ncylinders = ccg->ccg_ncylinders; 1599 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1600 1601 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1602 lp->d_type = DTYPE_CCD; 1603 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1604 lp->d_rpm = 3600; 1605 lp->d_interleave = 1; 1606 lp->d_flags = 0; 1607 1608 lp->d_partitions[RAW_PART].p_offset = 0; 1609 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1610 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1611 lp->d_npartitions = RAW_PART + 1; 1612 1613 lp->d_bbsize = BBSIZE; /* XXX */ 1614 lp->d_sbsize = SBSIZE; /* XXX */ 1615 1616 lp->d_magic = DISKMAGIC; 1617 lp->d_magic2 = DISKMAGIC; 1618 lp->d_checksum = dkcksum(&cs->sc_label); 1619 1620 /* 1621 * Call the generic disklabel extraction routine. 1622 */ 1623 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1624 if (errstring != NULL) 1625 ccdmakedisklabel(cs); 1626 1627 #ifdef DEBUG 1628 /* It's actually extremely common to have unlabeled ccds. */ 1629 if (ccddebug & CCDB_LABEL) 1630 if (errstring != NULL) 1631 printf("ccd%d: %s\n", unit, errstring); 1632 #endif 1633 } 1634 1635 /* 1636 * Take care of things one might want to take care of in the event 1637 * that a disklabel isn't present. 1638 */ 1639 static void 1640 ccdmakedisklabel(cs) 1641 struct ccd_softc *cs; 1642 { 1643 struct disklabel *lp = &cs->sc_label; 1644 1645 /* 1646 * For historical reasons, if there's no disklabel present 1647 * the raw partition must be marked FS_BSDFFS. 1648 */ 1649 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1650 1651 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1652 } 1653 1654 /* 1655 * Wait interruptibly for an exclusive lock. 1656 * 1657 * XXX 1658 * Several drivers do this; it should be abstracted and made MP-safe. 1659 */ 1660 static int 1661 ccdlock(cs) 1662 struct ccd_softc *cs; 1663 { 1664 int error; 1665 1666 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1667 cs->sc_flags |= CCDF_WANTED; 1668 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1669 return (error); 1670 } 1671 cs->sc_flags |= CCDF_LOCKED; 1672 return (0); 1673 } 1674 1675 /* 1676 * Unlock and wake up any waiters. 1677 */ 1678 static void 1679 ccdunlock(cs) 1680 struct ccd_softc *cs; 1681 { 1682 1683 cs->sc_flags &= ~CCDF_LOCKED; 1684 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1685 cs->sc_flags &= ~CCDF_WANTED; 1686 wakeup(cs); 1687 } 1688 } 1689 1690 #ifdef DEBUG 1691 static void 1692 printiinfo(ii) 1693 struct ccdiinfo *ii; 1694 { 1695 int ix, i; 1696 1697 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1698 printf(" itab[%d]: #dk %d sblk %d soff %d", 1699 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1700 for (i = 0; i < ii->ii_ndisk; i++) 1701 printf(" %d", ii->ii_index[i]); 1702 printf("\n"); 1703 } 1704 } 1705 #endif 1706 1707 1708 /* Local Variables: */ 1709 /* c-argdecl-indent: 8 */ 1710 /* c-continued-statement-offset: 8 */ 1711 /* c-indent-level: 8 */ 1712 /* End: */ 1713