1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include "ccd.h" 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/kernel.h> 95 #include <sys/module.h> 96 #include <sys/proc.h> 97 #include <sys/bio.h> 98 #include <sys/malloc.h> 99 #include <sys/namei.h> 100 #include <sys/conf.h> 101 #include <sys/stat.h> 102 #include <sys/sysctl.h> 103 #include <sys/disklabel.h> 104 #include <ufs/ffs/fs.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 109 #include <sys/ccdvar.h> 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #undef DEBUG 125 #endif 126 127 #define ccdunit(x) dkunit(x) 128 #define ccdpart(x) dkpart(x) 129 130 /* 131 This is how mirroring works (only writes are special): 132 133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 134 linked together by the cb_mirror field. "cb_pflags & 135 CCDPF_MIRROR_DONE" is set to 0 on both of them. 136 137 When a component returns to ccdiodone(), it checks if "cb_pflags & 138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 139 flag and returns. If it is, it means its partner has already 140 returned, so it will go to the regular cleanup. 141 142 */ 143 144 struct ccdbuf { 145 struct bio cb_buf; /* new I/O buf */ 146 struct bio *cb_obp; /* ptr. to original I/O buf */ 147 struct ccdbuf *cb_freenext; /* free list link */ 148 int cb_unit; /* target unit */ 149 int cb_comp; /* target component */ 150 int cb_pflags; /* mirror/parity status flag */ 151 struct ccdbuf *cb_mirror; /* mirror counterpart */ 152 }; 153 154 /* bits in cb_pflags */ 155 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 156 157 #define CCDLABELDEV(dev) \ 158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 159 160 static d_open_t ccdopen; 161 static d_close_t ccdclose; 162 static d_strategy_t ccdstrategy; 163 static d_ioctl_t ccdioctl; 164 static d_dump_t ccddump; 165 static d_psize_t ccdsize; 166 167 #define NCCDFREEHIWAT 16 168 169 #define CDEV_MAJOR 74 170 171 static struct cdevsw ccd_cdevsw = { 172 /* open */ ccdopen, 173 /* close */ ccdclose, 174 /* read */ physread, 175 /* write */ physwrite, 176 /* ioctl */ ccdioctl, 177 /* poll */ nopoll, 178 /* mmap */ nommap, 179 /* strategy */ ccdstrategy, 180 /* name */ "ccd", 181 /* maj */ CDEV_MAJOR, 182 /* dump */ ccddump, 183 /* psize */ ccdsize, 184 /* flags */ D_DISK, 185 }; 186 187 /* called during module initialization */ 188 static void ccdattach __P((void)); 189 static int ccd_modevent __P((module_t, int, void *)); 190 191 /* called by biodone() at interrupt time */ 192 static void ccdiodone __P((struct bio *bp)); 193 194 static void ccdstart __P((struct ccd_softc *, struct bio *)); 195 static void ccdinterleave __P((struct ccd_softc *, int)); 196 static void ccdintr __P((struct ccd_softc *, struct bio *)); 197 static int ccdinit __P((struct ccddevice *, char **, struct proc *)); 198 static int ccdlookup __P((char *, struct proc *p, struct vnode **)); 199 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 200 struct bio *, daddr_t, caddr_t, long)); 201 static void ccdgetdisklabel __P((dev_t)); 202 static void ccdmakedisklabel __P((struct ccd_softc *)); 203 static int ccdlock __P((struct ccd_softc *)); 204 static void ccdunlock __P((struct ccd_softc *)); 205 206 #ifdef DEBUG 207 static void printiinfo __P((struct ccdiinfo *)); 208 #endif 209 210 /* Non-private for the benefit of libkvm. */ 211 struct ccd_softc *ccd_softc; 212 struct ccddevice *ccddevs; 213 struct ccdbuf *ccdfreebufs; 214 static int numccdfreebufs; 215 static int numccd = 0; 216 217 /* 218 * getccdbuf() - Allocate and zero a ccd buffer. 219 * 220 * This routine is called at splbio(). 221 */ 222 223 static __inline 224 struct ccdbuf * 225 getccdbuf(struct ccdbuf *cpy) 226 { 227 struct ccdbuf *cbp; 228 229 /* 230 * Allocate from freelist or malloc as necessary 231 */ 232 if ((cbp = ccdfreebufs) != NULL) { 233 ccdfreebufs = cbp->cb_freenext; 234 --numccdfreebufs; 235 } else { 236 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 237 } 238 239 /* 240 * Used by mirroring code 241 */ 242 if (cpy) 243 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 244 else 245 bzero(cbp, sizeof(struct ccdbuf)); 246 247 /* 248 * independant struct bio initialization 249 */ 250 251 return(cbp); 252 } 253 254 /* 255 * putccdbuf() - Free a ccd buffer. 256 * 257 * This routine is called at splbio(). 258 */ 259 260 static __inline 261 void 262 putccdbuf(struct ccdbuf *cbp) 263 { 264 265 if (numccdfreebufs < NCCDFREEHIWAT) { 266 cbp->cb_freenext = ccdfreebufs; 267 ccdfreebufs = cbp; 268 ++numccdfreebufs; 269 } else { 270 free((caddr_t)cbp, M_DEVBUF); 271 } 272 } 273 274 275 /* 276 * Number of blocks to untouched in front of a component partition. 277 * This is to avoid violating its disklabel area when it starts at the 278 * beginning of the slice. 279 */ 280 #if !defined(CCD_OFFSET) 281 #define CCD_OFFSET 16 282 #endif 283 284 static void 285 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 286 { 287 int i, u; 288 char *s; 289 290 if (*dev != NODEV) 291 return; 292 i = dev_stdclone(name, &s, "ccd", &u); 293 if (i != 2) 294 return; 295 if (u >= numccd) 296 return; 297 if (*s < 'a' || *s > 'h') 298 return; 299 if (s[1] != '\0') 300 return; 301 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 302 UID_ROOT, GID_OPERATOR, 0640, name); 303 } 304 305 /* 306 * Called by main() during pseudo-device attachment. All we need 307 * to do is allocate enough space for devices to be configured later, and 308 * add devsw entries. 309 */ 310 static void 311 ccdattach() 312 { 313 int i; 314 int num = NCCD; 315 316 if (num > 1) 317 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 318 else 319 printf("ccd0: Concatenated disk driver\n"); 320 321 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 322 M_DEVBUF, M_NOWAIT); 323 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 324 M_DEVBUF, M_NOWAIT); 325 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 326 printf("WARNING: no memory for concatenated disks\n"); 327 if (ccd_softc != NULL) 328 free(ccd_softc, M_DEVBUF); 329 if (ccddevs != NULL) 330 free(ccddevs, M_DEVBUF); 331 return; 332 } 333 numccd = num; 334 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 335 bzero(ccddevs, num * sizeof(struct ccddevice)); 336 337 cdevsw_add(&ccd_cdevsw); 338 /* XXX: is this necessary? */ 339 for (i = 0; i < numccd; ++i) 340 ccddevs[i].ccd_dk = -1; 341 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 342 } 343 344 static int 345 ccd_modevent(mod, type, data) 346 module_t mod; 347 int type; 348 void *data; 349 { 350 int error = 0; 351 352 switch (type) { 353 case MOD_LOAD: 354 ccdattach(); 355 break; 356 357 case MOD_UNLOAD: 358 printf("ccd0: Unload not supported!\n"); 359 error = EOPNOTSUPP; 360 break; 361 362 default: /* MOD_SHUTDOWN etc */ 363 break; 364 } 365 return (error); 366 } 367 368 DEV_MODULE(ccd, ccd_modevent, NULL); 369 370 static int 371 ccdinit(ccd, cpaths, p) 372 struct ccddevice *ccd; 373 char **cpaths; 374 struct proc *p; 375 { 376 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 377 struct ccdcinfo *ci = NULL; /* XXX */ 378 size_t size; 379 int ix; 380 struct vnode *vp; 381 size_t minsize; 382 int maxsecsize; 383 struct partinfo dpart; 384 struct ccdgeom *ccg = &cs->sc_geom; 385 char tmppath[MAXPATHLEN]; 386 int error = 0; 387 388 #ifdef DEBUG 389 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 390 printf("ccdinit: unit %d\n", ccd->ccd_unit); 391 #endif 392 393 cs->sc_size = 0; 394 cs->sc_ileave = ccd->ccd_interleave; 395 cs->sc_nccdisks = ccd->ccd_ndev; 396 397 /* Allocate space for the component info. */ 398 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 399 M_DEVBUF, M_WAITOK); 400 401 /* 402 * Verify that each component piece exists and record 403 * relevant information about it. 404 */ 405 maxsecsize = 0; 406 minsize = 0; 407 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 408 vp = ccd->ccd_vpp[ix]; 409 ci = &cs->sc_cinfo[ix]; 410 ci->ci_vp = vp; 411 412 /* 413 * Copy in the pathname of the component. 414 */ 415 bzero(tmppath, sizeof(tmppath)); /* sanity */ 416 if ((error = copyinstr(cpaths[ix], tmppath, 417 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 418 #ifdef DEBUG 419 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 420 printf("ccd%d: can't copy path, error = %d\n", 421 ccd->ccd_unit, error); 422 #endif 423 goto fail; 424 } 425 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 426 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 427 428 ci->ci_dev = vn_todev(vp); 429 430 /* 431 * Get partition information for the component. 432 */ 433 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 434 FREAD, p->p_ucred, p)) != 0) { 435 #ifdef DEBUG 436 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 437 printf("ccd%d: %s: ioctl failed, error = %d\n", 438 ccd->ccd_unit, ci->ci_path, error); 439 #endif 440 goto fail; 441 } 442 if (dpart.part->p_fstype == FS_BSDFFS) { 443 maxsecsize = 444 ((dpart.disklab->d_secsize > maxsecsize) ? 445 dpart.disklab->d_secsize : maxsecsize); 446 size = dpart.part->p_size - CCD_OFFSET; 447 } else { 448 #ifdef DEBUG 449 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 450 printf("ccd%d: %s: incorrect partition type\n", 451 ccd->ccd_unit, ci->ci_path); 452 #endif 453 error = EFTYPE; 454 goto fail; 455 } 456 457 /* 458 * Calculate the size, truncating to an interleave 459 * boundary if necessary. 460 */ 461 462 if (cs->sc_ileave > 1) 463 size -= size % cs->sc_ileave; 464 465 if (size == 0) { 466 #ifdef DEBUG 467 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 468 printf("ccd%d: %s: size == 0\n", 469 ccd->ccd_unit, ci->ci_path); 470 #endif 471 error = ENODEV; 472 goto fail; 473 } 474 475 if (minsize == 0 || size < minsize) 476 minsize = size; 477 ci->ci_size = size; 478 cs->sc_size += size; 479 } 480 481 /* 482 * Don't allow the interleave to be smaller than 483 * the biggest component sector. 484 */ 485 if ((cs->sc_ileave > 0) && 486 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 487 #ifdef DEBUG 488 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 489 printf("ccd%d: interleave must be at least %d\n", 490 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 491 #endif 492 error = EINVAL; 493 goto fail; 494 } 495 496 /* 497 * If uniform interleave is desired set all sizes to that of 498 * the smallest component. This will guarentee that a single 499 * interleave table is generated. 500 * 501 * Lost space must be taken into account when calculating the 502 * overall size. Half the space is lost when CCDF_MIRROR is 503 * specified. One disk is lost when CCDF_PARITY is specified. 504 */ 505 if (ccd->ccd_flags & CCDF_UNIFORM) { 506 for (ci = cs->sc_cinfo; 507 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 508 ci->ci_size = minsize; 509 } 510 if (ccd->ccd_flags & CCDF_MIRROR) { 511 /* 512 * Check to see if an even number of components 513 * have been specified. The interleave must also 514 * be non-zero in order for us to be able to 515 * guarentee the topology. 516 */ 517 if (cs->sc_nccdisks % 2) { 518 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 519 error = EINVAL; 520 goto fail; 521 } 522 if (cs->sc_ileave == 0) { 523 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 524 error = EINVAL; 525 goto fail; 526 } 527 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 528 } else if (ccd->ccd_flags & CCDF_PARITY) { 529 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 530 } else { 531 if (cs->sc_ileave == 0) { 532 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 533 error = EINVAL; 534 goto fail; 535 } 536 cs->sc_size = cs->sc_nccdisks * minsize; 537 } 538 } 539 540 /* 541 * Construct the interleave table. 542 */ 543 ccdinterleave(cs, ccd->ccd_unit); 544 545 /* 546 * Create pseudo-geometry based on 1MB cylinders. It's 547 * pretty close. 548 */ 549 ccg->ccg_secsize = maxsecsize; 550 ccg->ccg_ntracks = 1; 551 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 552 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 553 554 /* 555 * Add an devstat entry for this device. 556 */ 557 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 558 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 559 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 560 DEVSTAT_PRIORITY_ARRAY); 561 562 cs->sc_flags |= CCDF_INITED; 563 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 564 cs->sc_unit = ccd->ccd_unit; 565 return (0); 566 fail: 567 while (ci > cs->sc_cinfo) { 568 ci--; 569 free(ci->ci_path, M_DEVBUF); 570 } 571 free(cs->sc_cinfo, M_DEVBUF); 572 return (error); 573 } 574 575 static void 576 ccdinterleave(cs, unit) 577 struct ccd_softc *cs; 578 int unit; 579 { 580 struct ccdcinfo *ci, *smallci; 581 struct ccdiinfo *ii; 582 daddr_t bn, lbn; 583 int ix; 584 u_long size; 585 586 #ifdef DEBUG 587 if (ccddebug & CCDB_INIT) 588 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 589 #endif 590 591 /* 592 * Allocate an interleave table. The worst case occurs when each 593 * of N disks is of a different size, resulting in N interleave 594 * tables. 595 * 596 * Chances are this is too big, but we don't care. 597 */ 598 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 599 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 600 M_WAITOK | M_ZERO); 601 602 /* 603 * Trivial case: no interleave (actually interleave of disk size). 604 * Each table entry represents a single component in its entirety. 605 * 606 * An interleave of 0 may not be used with a mirror or parity setup. 607 */ 608 if (cs->sc_ileave == 0) { 609 bn = 0; 610 ii = cs->sc_itable; 611 612 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 613 /* Allocate space for ii_index. */ 614 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 615 ii->ii_ndisk = 1; 616 ii->ii_startblk = bn; 617 ii->ii_startoff = 0; 618 ii->ii_index[0] = ix; 619 bn += cs->sc_cinfo[ix].ci_size; 620 ii++; 621 } 622 ii->ii_ndisk = 0; 623 #ifdef DEBUG 624 if (ccddebug & CCDB_INIT) 625 printiinfo(cs->sc_itable); 626 #endif 627 return; 628 } 629 630 /* 631 * The following isn't fast or pretty; it doesn't have to be. 632 */ 633 size = 0; 634 bn = lbn = 0; 635 for (ii = cs->sc_itable; ; ii++) { 636 /* 637 * Allocate space for ii_index. We might allocate more then 638 * we use. 639 */ 640 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 641 M_DEVBUF, M_WAITOK); 642 643 /* 644 * Locate the smallest of the remaining components 645 */ 646 smallci = NULL; 647 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 648 ci++) { 649 if (ci->ci_size > size && 650 (smallci == NULL || 651 ci->ci_size < smallci->ci_size)) { 652 smallci = ci; 653 } 654 } 655 656 /* 657 * Nobody left, all done 658 */ 659 if (smallci == NULL) { 660 ii->ii_ndisk = 0; 661 break; 662 } 663 664 /* 665 * Record starting logical block using an sc_ileave blocksize. 666 */ 667 ii->ii_startblk = bn / cs->sc_ileave; 668 669 /* 670 * Record starting comopnent block using an sc_ileave 671 * blocksize. This value is relative to the beginning of 672 * a component disk. 673 */ 674 ii->ii_startoff = lbn; 675 676 /* 677 * Determine how many disks take part in this interleave 678 * and record their indices. 679 */ 680 ix = 0; 681 for (ci = cs->sc_cinfo; 682 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 683 if (ci->ci_size >= smallci->ci_size) { 684 ii->ii_index[ix++] = ci - cs->sc_cinfo; 685 } 686 } 687 ii->ii_ndisk = ix; 688 bn += ix * (smallci->ci_size - size); 689 lbn = smallci->ci_size / cs->sc_ileave; 690 size = smallci->ci_size; 691 } 692 #ifdef DEBUG 693 if (ccddebug & CCDB_INIT) 694 printiinfo(cs->sc_itable); 695 #endif 696 } 697 698 /* ARGSUSED */ 699 static int 700 ccdopen(dev, flags, fmt, p) 701 dev_t dev; 702 int flags, fmt; 703 struct proc *p; 704 { 705 int unit = ccdunit(dev); 706 struct ccd_softc *cs; 707 struct disklabel *lp; 708 int error = 0, part, pmask; 709 710 #ifdef DEBUG 711 if (ccddebug & CCDB_FOLLOW) 712 printf("ccdopen(%p, %x)\n", dev, flags); 713 #endif 714 if (unit >= numccd) 715 return (ENXIO); 716 cs = &ccd_softc[unit]; 717 718 if ((error = ccdlock(cs)) != 0) 719 return (error); 720 721 lp = &cs->sc_label; 722 723 part = ccdpart(dev); 724 pmask = (1 << part); 725 726 /* 727 * If we're initialized, check to see if there are any other 728 * open partitions. If not, then it's safe to update 729 * the in-core disklabel. 730 */ 731 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 732 ccdgetdisklabel(dev); 733 734 /* Check that the partition exists. */ 735 if (part != RAW_PART && ((part >= lp->d_npartitions) || 736 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 737 error = ENXIO; 738 goto done; 739 } 740 741 cs->sc_openmask |= pmask; 742 done: 743 ccdunlock(cs); 744 return (0); 745 } 746 747 /* ARGSUSED */ 748 static int 749 ccdclose(dev, flags, fmt, p) 750 dev_t dev; 751 int flags, fmt; 752 struct proc *p; 753 { 754 int unit = ccdunit(dev); 755 struct ccd_softc *cs; 756 int error = 0, part; 757 758 #ifdef DEBUG 759 if (ccddebug & CCDB_FOLLOW) 760 printf("ccdclose(%p, %x)\n", dev, flags); 761 #endif 762 763 if (unit >= numccd) 764 return (ENXIO); 765 cs = &ccd_softc[unit]; 766 767 if ((error = ccdlock(cs)) != 0) 768 return (error); 769 770 part = ccdpart(dev); 771 772 /* ...that much closer to allowing unconfiguration... */ 773 cs->sc_openmask &= ~(1 << part); 774 ccdunlock(cs); 775 return (0); 776 } 777 778 static void 779 ccdstrategy(bp) 780 struct bio *bp; 781 { 782 int unit = ccdunit(bp->bio_dev); 783 struct ccd_softc *cs = &ccd_softc[unit]; 784 int s; 785 int wlabel; 786 struct disklabel *lp; 787 788 #ifdef DEBUG 789 if (ccddebug & CCDB_FOLLOW) 790 printf("ccdstrategy(%p): unit %d\n", bp, unit); 791 #endif 792 if ((cs->sc_flags & CCDF_INITED) == 0) { 793 biofinish(bp, NULL, ENXIO); 794 return; 795 } 796 797 /* If it's a nil transfer, wake up the top half now. */ 798 if (bp->bio_bcount == 0) { 799 biodone(bp); 800 return; 801 } 802 803 lp = &cs->sc_label; 804 805 /* 806 * Do bounds checking and adjust transfer. If there's an 807 * error, the bounds check will flag that for us. 808 */ 809 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 810 if (ccdpart(bp->bio_dev) != RAW_PART) { 811 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 812 biodone(bp); 813 return; 814 } 815 } else { 816 int pbn; /* in sc_secsize chunks */ 817 long sz; /* in sc_secsize chunks */ 818 819 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 820 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 821 822 /* 823 * If out of bounds return an error. If at the EOF point, 824 * simply read or write less. 825 */ 826 827 if (pbn < 0 || pbn >= cs->sc_size) { 828 bp->bio_resid = bp->bio_bcount; 829 if (pbn != cs->sc_size) 830 biofinish(bp, NULL, EINVAL); 831 else 832 biodone(bp); 833 return; 834 } 835 836 /* 837 * If the request crosses EOF, truncate the request. 838 */ 839 if (pbn + sz > cs->sc_size) { 840 bp->bio_bcount = (cs->sc_size - pbn) * 841 cs->sc_geom.ccg_secsize; 842 } 843 } 844 845 bp->bio_resid = bp->bio_bcount; 846 847 /* 848 * "Start" the unit. 849 */ 850 s = splbio(); 851 ccdstart(cs, bp); 852 splx(s); 853 return; 854 } 855 856 static void 857 ccdstart(cs, bp) 858 struct ccd_softc *cs; 859 struct bio *bp; 860 { 861 long bcount, rcount; 862 struct ccdbuf *cbp[4]; 863 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 864 caddr_t addr; 865 daddr_t bn; 866 struct partition *pp; 867 868 #ifdef DEBUG 869 if (ccddebug & CCDB_FOLLOW) 870 printf("ccdstart(%p, %p)\n", cs, bp); 871 #endif 872 873 /* Record the transaction start */ 874 devstat_start_transaction(&cs->device_stats); 875 876 /* 877 * Translate the partition-relative block number to an absolute. 878 */ 879 bn = bp->bio_blkno; 880 if (ccdpart(bp->bio_dev) != RAW_PART) { 881 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 882 bn += pp->p_offset; 883 } 884 885 /* 886 * Allocate component buffers and fire off the requests 887 */ 888 addr = bp->bio_data; 889 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 890 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 891 rcount = cbp[0]->cb_buf.bio_bcount; 892 893 if (cs->sc_cflags & CCDF_MIRROR) { 894 /* 895 * Mirroring. Writes go to both disks, reads are 896 * taken from whichever disk seems most appropriate. 897 * 898 * We attempt to localize reads to the disk whos arm 899 * is nearest the read request. We ignore seeks due 900 * to writes when making this determination and we 901 * also try to avoid hogging. 902 */ 903 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 904 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 905 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 906 } else { 907 int pick = cs->sc_pick; 908 daddr_t range = cs->sc_size / 16; 909 910 if (bn < cs->sc_blk[pick] - range || 911 bn > cs->sc_blk[pick] + range 912 ) { 913 cs->sc_pick = pick = 1 - pick; 914 } 915 cs->sc_blk[pick] = bn + btodb(rcount); 916 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 917 } 918 } else { 919 /* 920 * Not mirroring 921 */ 922 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 923 } 924 bn += btodb(rcount); 925 addr += rcount; 926 } 927 } 928 929 /* 930 * Build a component buffer header. 931 */ 932 static void 933 ccdbuffer(cb, cs, bp, bn, addr, bcount) 934 struct ccdbuf **cb; 935 struct ccd_softc *cs; 936 struct bio *bp; 937 daddr_t bn; 938 caddr_t addr; 939 long bcount; 940 { 941 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 942 struct ccdbuf *cbp; 943 daddr_t cbn, cboff; 944 off_t cbc; 945 946 #ifdef DEBUG 947 if (ccddebug & CCDB_IO) 948 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 949 cs, bp, bn, addr, bcount); 950 #endif 951 /* 952 * Determine which component bn falls in. 953 */ 954 cbn = bn; 955 cboff = 0; 956 957 if (cs->sc_ileave == 0) { 958 /* 959 * Serially concatenated and neither a mirror nor a parity 960 * config. This is a special case. 961 */ 962 daddr_t sblk; 963 964 sblk = 0; 965 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 966 sblk += ci->ci_size; 967 cbn -= sblk; 968 } else { 969 struct ccdiinfo *ii; 970 int ccdisk, off; 971 972 /* 973 * Calculate cbn, the logical superblock (sc_ileave chunks), 974 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 975 * to cbn. 976 */ 977 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 978 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 979 980 /* 981 * Figure out which interleave table to use. 982 */ 983 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 984 if (ii->ii_startblk > cbn) 985 break; 986 } 987 ii--; 988 989 /* 990 * off is the logical superblock relative to the beginning 991 * of this interleave block. 992 */ 993 off = cbn - ii->ii_startblk; 994 995 /* 996 * We must calculate which disk component to use (ccdisk), 997 * and recalculate cbn to be the superblock relative to 998 * the beginning of the component. This is typically done by 999 * adding 'off' and ii->ii_startoff together. However, 'off' 1000 * must typically be divided by the number of components in 1001 * this interleave array to be properly convert it from a 1002 * CCD-relative logical superblock number to a 1003 * component-relative superblock number. 1004 */ 1005 if (ii->ii_ndisk == 1) { 1006 /* 1007 * When we have just one disk, it can't be a mirror 1008 * or a parity config. 1009 */ 1010 ccdisk = ii->ii_index[0]; 1011 cbn = ii->ii_startoff + off; 1012 } else { 1013 if (cs->sc_cflags & CCDF_MIRROR) { 1014 /* 1015 * We have forced a uniform mapping, resulting 1016 * in a single interleave array. We double 1017 * up on the first half of the available 1018 * components and our mirror is in the second 1019 * half. This only works with a single 1020 * interleave array because doubling up 1021 * doubles the number of sectors, so there 1022 * cannot be another interleave array because 1023 * the next interleave array's calculations 1024 * would be off. 1025 */ 1026 int ndisk2 = ii->ii_ndisk / 2; 1027 ccdisk = ii->ii_index[off % ndisk2]; 1028 cbn = ii->ii_startoff + off / ndisk2; 1029 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1030 } else if (cs->sc_cflags & CCDF_PARITY) { 1031 /* 1032 * XXX not implemented yet 1033 */ 1034 int ndisk2 = ii->ii_ndisk - 1; 1035 ccdisk = ii->ii_index[off % ndisk2]; 1036 cbn = ii->ii_startoff + off / ndisk2; 1037 if (cbn % ii->ii_ndisk <= ccdisk) 1038 ccdisk++; 1039 } else { 1040 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1041 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1042 } 1043 } 1044 1045 ci = &cs->sc_cinfo[ccdisk]; 1046 1047 /* 1048 * Convert cbn from a superblock to a normal block so it 1049 * can be used to calculate (along with cboff) the normal 1050 * block index into this particular disk. 1051 */ 1052 cbn *= cs->sc_ileave; 1053 } 1054 1055 /* 1056 * Fill in the component buf structure. 1057 */ 1058 cbp = getccdbuf(NULL); 1059 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1060 cbp->cb_buf.bio_done = ccdiodone; 1061 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1062 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1063 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1064 cbp->cb_buf.bio_data = addr; 1065 if (cs->sc_ileave == 0) 1066 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1067 else 1068 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1069 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1070 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1071 1072 /* 1073 * context for ccdiodone 1074 */ 1075 cbp->cb_obp = bp; 1076 cbp->cb_unit = cs - ccd_softc; 1077 cbp->cb_comp = ci - cs->sc_cinfo; 1078 1079 #ifdef DEBUG 1080 if (ccddebug & CCDB_IO) 1081 printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n", 1082 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1083 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1084 cbp->cb_buf.bio_bcount); 1085 #endif 1086 cb[0] = cbp; 1087 1088 /* 1089 * Note: both I/O's setup when reading from mirror, but only one 1090 * will be executed. 1091 */ 1092 if (cs->sc_cflags & CCDF_MIRROR) { 1093 /* mirror, setup second I/O */ 1094 cbp = getccdbuf(cb[0]); 1095 cbp->cb_buf.bio_dev = ci2->ci_dev; 1096 cbp->cb_comp = ci2 - cs->sc_cinfo; 1097 cb[1] = cbp; 1098 /* link together the ccdbuf's and clear "mirror done" flag */ 1099 cb[0]->cb_mirror = cb[1]; 1100 cb[1]->cb_mirror = cb[0]; 1101 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1102 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1103 } 1104 } 1105 1106 static void 1107 ccdintr(cs, bp) 1108 struct ccd_softc *cs; 1109 struct bio *bp; 1110 { 1111 #ifdef DEBUG 1112 if (ccddebug & CCDB_FOLLOW) 1113 printf("ccdintr(%p, %p)\n", cs, bp); 1114 #endif 1115 /* 1116 * Request is done for better or worse, wakeup the top half. 1117 */ 1118 if (bp->bio_flags & BIO_ERROR) 1119 bp->bio_resid = bp->bio_bcount; 1120 biofinish(bp, &cs->device_stats, 0); 1121 } 1122 1123 /* 1124 * Called at interrupt time. 1125 * Mark the component as done and if all components are done, 1126 * take a ccd interrupt. 1127 */ 1128 static void 1129 ccdiodone(ibp) 1130 struct bio *ibp; 1131 { 1132 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1133 struct bio *bp = cbp->cb_obp; 1134 int unit = cbp->cb_unit; 1135 int count, s; 1136 1137 s = splbio(); 1138 #ifdef DEBUG 1139 if (ccddebug & CCDB_FOLLOW) 1140 printf("ccdiodone(%p)\n", cbp); 1141 if (ccddebug & CCDB_IO) { 1142 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1143 bp, bp->bio_bcount, bp->bio_resid); 1144 printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n", 1145 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1146 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1147 cbp->cb_buf.bio_bcount); 1148 } 1149 #endif 1150 /* 1151 * If an error occured, report it. If this is a mirrored 1152 * configuration and the first of two possible reads, do not 1153 * set the error in the bp yet because the second read may 1154 * succeed. 1155 */ 1156 1157 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1158 const char *msg = ""; 1159 1160 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1161 (cbp->cb_buf.bio_cmd == BIO_READ) && 1162 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1163 /* 1164 * We will try our read on the other disk down 1165 * below, also reverse the default pick so if we 1166 * are doing a scan we do not keep hitting the 1167 * bad disk first. 1168 */ 1169 struct ccd_softc *cs = &ccd_softc[unit]; 1170 1171 msg = ", trying other disk"; 1172 cs->sc_pick = 1 - cs->sc_pick; 1173 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1174 } else { 1175 bp->bio_flags |= BIO_ERROR; 1176 bp->bio_error = cbp->cb_buf.bio_error ? 1177 cbp->cb_buf.bio_error : EIO; 1178 } 1179 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1180 unit, bp->bio_error, cbp->cb_comp, 1181 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1182 } 1183 1184 /* 1185 * Process mirror. If we are writing, I/O has been initiated on both 1186 * buffers and we fall through only after both are finished. 1187 * 1188 * If we are reading only one I/O is initiated at a time. If an 1189 * error occurs we initiate the second I/O and return, otherwise 1190 * we free the second I/O without initiating it. 1191 */ 1192 1193 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1194 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1195 /* 1196 * When writing, handshake with the second buffer 1197 * to determine when both are done. If both are not 1198 * done, return here. 1199 */ 1200 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1201 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1202 putccdbuf(cbp); 1203 splx(s); 1204 return; 1205 } 1206 } else { 1207 /* 1208 * When reading, either dispose of the second buffer 1209 * or initiate I/O on the second buffer if an error 1210 * occured with this one. 1211 */ 1212 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1213 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1214 cbp->cb_mirror->cb_pflags |= 1215 CCDPF_MIRROR_DONE; 1216 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1217 putccdbuf(cbp); 1218 splx(s); 1219 return; 1220 } else { 1221 putccdbuf(cbp->cb_mirror); 1222 /* fall through */ 1223 } 1224 } 1225 } 1226 } 1227 1228 /* 1229 * use bio_caller1 to determine how big the original request was rather 1230 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1231 * 1232 * XXX We check for an error, but we do not test the resid for an 1233 * aligned EOF condition. This may result in character & block 1234 * device access not recognizing EOF properly when read or written 1235 * sequentially, but will not effect filesystems. 1236 */ 1237 count = (long)cbp->cb_buf.bio_caller1; 1238 putccdbuf(cbp); 1239 1240 /* 1241 * If all done, "interrupt". 1242 */ 1243 bp->bio_resid -= count; 1244 if (bp->bio_resid < 0) 1245 panic("ccdiodone: count"); 1246 if (bp->bio_resid == 0) 1247 ccdintr(&ccd_softc[unit], bp); 1248 splx(s); 1249 } 1250 1251 static int 1252 ccdioctl(dev, cmd, data, flag, p) 1253 dev_t dev; 1254 u_long cmd; 1255 caddr_t data; 1256 int flag; 1257 struct proc *p; 1258 { 1259 int unit = ccdunit(dev); 1260 int i, j, lookedup = 0, error = 0; 1261 int part, pmask, s; 1262 struct ccd_softc *cs; 1263 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1264 struct ccddevice ccd; 1265 char **cpp; 1266 struct vnode **vpp; 1267 1268 if (unit >= numccd) 1269 return (ENXIO); 1270 cs = &ccd_softc[unit]; 1271 1272 bzero(&ccd, sizeof(ccd)); 1273 1274 switch (cmd) { 1275 case CCDIOCSET: 1276 if (cs->sc_flags & CCDF_INITED) 1277 return (EBUSY); 1278 1279 if ((flag & FWRITE) == 0) 1280 return (EBADF); 1281 1282 if ((error = ccdlock(cs)) != 0) 1283 return (error); 1284 1285 /* Fill in some important bits. */ 1286 ccd.ccd_unit = unit; 1287 ccd.ccd_interleave = ccio->ccio_ileave; 1288 if (ccd.ccd_interleave == 0 && 1289 ((ccio->ccio_flags & CCDF_MIRROR) || 1290 (ccio->ccio_flags & CCDF_PARITY))) { 1291 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1292 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1293 } 1294 if ((ccio->ccio_flags & CCDF_MIRROR) && 1295 (ccio->ccio_flags & CCDF_PARITY)) { 1296 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1297 ccio->ccio_flags &= ~CCDF_PARITY; 1298 } 1299 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1300 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1301 printf("ccd%d: mirror/parity forces uniform flag\n", 1302 unit); 1303 ccio->ccio_flags |= CCDF_UNIFORM; 1304 } 1305 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1306 1307 /* 1308 * Allocate space for and copy in the array of 1309 * componet pathnames and device numbers. 1310 */ 1311 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1312 M_DEVBUF, M_WAITOK); 1313 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1314 M_DEVBUF, M_WAITOK); 1315 1316 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1317 ccio->ccio_ndisks * sizeof(char **)); 1318 if (error) { 1319 free(vpp, M_DEVBUF); 1320 free(cpp, M_DEVBUF); 1321 ccdunlock(cs); 1322 return (error); 1323 } 1324 1325 #ifdef DEBUG 1326 if (ccddebug & CCDB_INIT) 1327 for (i = 0; i < ccio->ccio_ndisks; ++i) 1328 printf("ccdioctl: component %d: %p\n", 1329 i, cpp[i]); 1330 #endif 1331 1332 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1333 #ifdef DEBUG 1334 if (ccddebug & CCDB_INIT) 1335 printf("ccdioctl: lookedup = %d\n", lookedup); 1336 #endif 1337 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1338 for (j = 0; j < lookedup; ++j) 1339 (void)vn_close(vpp[j], FREAD|FWRITE, 1340 p->p_ucred, p); 1341 free(vpp, M_DEVBUF); 1342 free(cpp, M_DEVBUF); 1343 ccdunlock(cs); 1344 return (error); 1345 } 1346 ++lookedup; 1347 } 1348 ccd.ccd_cpp = cpp; 1349 ccd.ccd_vpp = vpp; 1350 ccd.ccd_ndev = ccio->ccio_ndisks; 1351 1352 /* 1353 * Initialize the ccd. Fills in the softc for us. 1354 */ 1355 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1356 for (j = 0; j < lookedup; ++j) 1357 (void)vn_close(vpp[j], FREAD|FWRITE, 1358 p->p_ucred, p); 1359 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1360 free(vpp, M_DEVBUF); 1361 free(cpp, M_DEVBUF); 1362 ccdunlock(cs); 1363 return (error); 1364 } 1365 1366 /* 1367 * The ccd has been successfully initialized, so 1368 * we can place it into the array and read the disklabel. 1369 */ 1370 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1371 ccio->ccio_unit = unit; 1372 ccio->ccio_size = cs->sc_size; 1373 ccdgetdisklabel(dev); 1374 1375 ccdunlock(cs); 1376 1377 break; 1378 1379 case CCDIOCCLR: 1380 if ((cs->sc_flags & CCDF_INITED) == 0) 1381 return (ENXIO); 1382 1383 if ((flag & FWRITE) == 0) 1384 return (EBADF); 1385 1386 if ((error = ccdlock(cs)) != 0) 1387 return (error); 1388 1389 /* Don't unconfigure if any other partitions are open */ 1390 part = ccdpart(dev); 1391 pmask = (1 << part); 1392 if ((cs->sc_openmask & ~pmask)) { 1393 ccdunlock(cs); 1394 return (EBUSY); 1395 } 1396 1397 /* 1398 * Free ccd_softc information and clear entry. 1399 */ 1400 1401 /* Close the components and free their pathnames. */ 1402 for (i = 0; i < cs->sc_nccdisks; ++i) { 1403 /* 1404 * XXX: this close could potentially fail and 1405 * cause Bad Things. Maybe we need to force 1406 * the close to happen? 1407 */ 1408 #ifdef DEBUG 1409 if (ccddebug & CCDB_VNODE) 1410 vprint("CCDIOCCLR: vnode info", 1411 cs->sc_cinfo[i].ci_vp); 1412 #endif 1413 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1414 p->p_ucred, p); 1415 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1416 } 1417 1418 /* Free interleave index. */ 1419 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1420 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1421 1422 /* Free component info and interleave table. */ 1423 free(cs->sc_cinfo, M_DEVBUF); 1424 free(cs->sc_itable, M_DEVBUF); 1425 cs->sc_flags &= ~CCDF_INITED; 1426 1427 /* 1428 * Free ccddevice information and clear entry. 1429 */ 1430 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1431 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1432 ccd.ccd_dk = -1; 1433 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1434 1435 /* 1436 * And remove the devstat entry. 1437 */ 1438 devstat_remove_entry(&cs->device_stats); 1439 1440 /* This must be atomic. */ 1441 s = splhigh(); 1442 ccdunlock(cs); 1443 bzero(cs, sizeof(struct ccd_softc)); 1444 splx(s); 1445 1446 break; 1447 1448 case DIOCGDINFO: 1449 if ((cs->sc_flags & CCDF_INITED) == 0) 1450 return (ENXIO); 1451 1452 *(struct disklabel *)data = cs->sc_label; 1453 break; 1454 1455 case DIOCGPART: 1456 if ((cs->sc_flags & CCDF_INITED) == 0) 1457 return (ENXIO); 1458 1459 ((struct partinfo *)data)->disklab = &cs->sc_label; 1460 ((struct partinfo *)data)->part = 1461 &cs->sc_label.d_partitions[ccdpart(dev)]; 1462 break; 1463 1464 case DIOCWDINFO: 1465 case DIOCSDINFO: 1466 if ((cs->sc_flags & CCDF_INITED) == 0) 1467 return (ENXIO); 1468 1469 if ((flag & FWRITE) == 0) 1470 return (EBADF); 1471 1472 if ((error = ccdlock(cs)) != 0) 1473 return (error); 1474 1475 cs->sc_flags |= CCDF_LABELLING; 1476 1477 error = setdisklabel(&cs->sc_label, 1478 (struct disklabel *)data, 0); 1479 if (error == 0) { 1480 if (cmd == DIOCWDINFO) 1481 error = writedisklabel(CCDLABELDEV(dev), 1482 &cs->sc_label); 1483 } 1484 1485 cs->sc_flags &= ~CCDF_LABELLING; 1486 1487 ccdunlock(cs); 1488 1489 if (error) 1490 return (error); 1491 break; 1492 1493 case DIOCWLABEL: 1494 if ((cs->sc_flags & CCDF_INITED) == 0) 1495 return (ENXIO); 1496 1497 if ((flag & FWRITE) == 0) 1498 return (EBADF); 1499 if (*(int *)data != 0) 1500 cs->sc_flags |= CCDF_WLABEL; 1501 else 1502 cs->sc_flags &= ~CCDF_WLABEL; 1503 break; 1504 1505 default: 1506 return (ENOTTY); 1507 } 1508 1509 return (0); 1510 } 1511 1512 static int 1513 ccdsize(dev) 1514 dev_t dev; 1515 { 1516 struct ccd_softc *cs; 1517 int part, size; 1518 1519 if (ccdopen(dev, 0, S_IFCHR, curproc)) 1520 return (-1); 1521 1522 cs = &ccd_softc[ccdunit(dev)]; 1523 part = ccdpart(dev); 1524 1525 if ((cs->sc_flags & CCDF_INITED) == 0) 1526 return (-1); 1527 1528 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1529 size = -1; 1530 else 1531 size = cs->sc_label.d_partitions[part].p_size; 1532 1533 if (ccdclose(dev, 0, S_IFCHR, curproc)) 1534 return (-1); 1535 1536 return (size); 1537 } 1538 1539 static int 1540 ccddump(dev) 1541 dev_t dev; 1542 { 1543 1544 /* Not implemented. */ 1545 return ENXIO; 1546 } 1547 1548 /* 1549 * Lookup the provided name in the filesystem. If the file exists, 1550 * is a valid block device, and isn't being used by anyone else, 1551 * set *vpp to the file's vnode. 1552 */ 1553 static int 1554 ccdlookup(path, p, vpp) 1555 char *path; 1556 struct proc *p; 1557 struct vnode **vpp; /* result */ 1558 { 1559 struct nameidata nd; 1560 struct vnode *vp; 1561 int error, flags; 1562 1563 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1564 flags = FREAD | FWRITE; 1565 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1566 #ifdef DEBUG 1567 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1568 printf("ccdlookup: vn_open error = %d\n", error); 1569 #endif 1570 return (error); 1571 } 1572 vp = nd.ni_vp; 1573 1574 if (vp->v_usecount > 1) { 1575 error = EBUSY; 1576 goto bad; 1577 } 1578 1579 if (!vn_isdisk(vp, &error)) 1580 goto bad; 1581 1582 #ifdef DEBUG 1583 if (ccddebug & CCDB_VNODE) 1584 vprint("ccdlookup: vnode info", vp); 1585 #endif 1586 1587 VOP_UNLOCK(vp, 0, p); 1588 NDFREE(&nd, NDF_ONLY_PNBUF); 1589 *vpp = vp; 1590 return (0); 1591 bad: 1592 VOP_UNLOCK(vp, 0, p); 1593 NDFREE(&nd, NDF_ONLY_PNBUF); 1594 /* vn_close does vrele() for vp */ 1595 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1596 return (error); 1597 } 1598 1599 /* 1600 * Read the disklabel from the ccd. If one is not present, fake one 1601 * up. 1602 */ 1603 static void 1604 ccdgetdisklabel(dev) 1605 dev_t dev; 1606 { 1607 int unit = ccdunit(dev); 1608 struct ccd_softc *cs = &ccd_softc[unit]; 1609 char *errstring; 1610 struct disklabel *lp = &cs->sc_label; 1611 struct ccdgeom *ccg = &cs->sc_geom; 1612 1613 bzero(lp, sizeof(*lp)); 1614 1615 lp->d_secperunit = cs->sc_size; 1616 lp->d_secsize = ccg->ccg_secsize; 1617 lp->d_nsectors = ccg->ccg_nsectors; 1618 lp->d_ntracks = ccg->ccg_ntracks; 1619 lp->d_ncylinders = ccg->ccg_ncylinders; 1620 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1621 1622 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1623 lp->d_type = DTYPE_CCD; 1624 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1625 lp->d_rpm = 3600; 1626 lp->d_interleave = 1; 1627 lp->d_flags = 0; 1628 1629 lp->d_partitions[RAW_PART].p_offset = 0; 1630 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1631 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1632 lp->d_npartitions = RAW_PART + 1; 1633 1634 lp->d_bbsize = BBSIZE; /* XXX */ 1635 lp->d_sbsize = SBSIZE; /* XXX */ 1636 1637 lp->d_magic = DISKMAGIC; 1638 lp->d_magic2 = DISKMAGIC; 1639 lp->d_checksum = dkcksum(&cs->sc_label); 1640 1641 /* 1642 * Call the generic disklabel extraction routine. 1643 */ 1644 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1645 if (errstring != NULL) 1646 ccdmakedisklabel(cs); 1647 1648 #ifdef DEBUG 1649 /* It's actually extremely common to have unlabeled ccds. */ 1650 if (ccddebug & CCDB_LABEL) 1651 if (errstring != NULL) 1652 printf("ccd%d: %s\n", unit, errstring); 1653 #endif 1654 } 1655 1656 /* 1657 * Take care of things one might want to take care of in the event 1658 * that a disklabel isn't present. 1659 */ 1660 static void 1661 ccdmakedisklabel(cs) 1662 struct ccd_softc *cs; 1663 { 1664 struct disklabel *lp = &cs->sc_label; 1665 1666 /* 1667 * For historical reasons, if there's no disklabel present 1668 * the raw partition must be marked FS_BSDFFS. 1669 */ 1670 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1671 1672 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1673 } 1674 1675 /* 1676 * Wait interruptibly for an exclusive lock. 1677 * 1678 * XXX 1679 * Several drivers do this; it should be abstracted and made MP-safe. 1680 */ 1681 static int 1682 ccdlock(cs) 1683 struct ccd_softc *cs; 1684 { 1685 int error; 1686 1687 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1688 cs->sc_flags |= CCDF_WANTED; 1689 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1690 return (error); 1691 } 1692 cs->sc_flags |= CCDF_LOCKED; 1693 return (0); 1694 } 1695 1696 /* 1697 * Unlock and wake up any waiters. 1698 */ 1699 static void 1700 ccdunlock(cs) 1701 struct ccd_softc *cs; 1702 { 1703 1704 cs->sc_flags &= ~CCDF_LOCKED; 1705 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1706 cs->sc_flags &= ~CCDF_WANTED; 1707 wakeup(cs); 1708 } 1709 } 1710 1711 #ifdef DEBUG 1712 static void 1713 printiinfo(ii) 1714 struct ccdiinfo *ii; 1715 { 1716 int ix, i; 1717 1718 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1719 printf(" itab[%d]: #dk %d sblk %d soff %d", 1720 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1721 for (i = 0; i < ii->ii_ndisk; i++) 1722 printf(" %d", ii->ii_index[i]); 1723 printf("\n"); 1724 } 1725 } 1726 #endif 1727