1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include "ccd.h" 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/kernel.h> 95 #include <sys/module.h> 96 #include <sys/proc.h> 97 #include <sys/bio.h> 98 #include <sys/malloc.h> 99 #include <sys/namei.h> 100 #include <sys/conf.h> 101 #include <sys/stat.h> 102 #include <sys/sysctl.h> 103 #include <sys/disklabel.h> 104 #include <ufs/ffs/fs.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 109 #include <sys/ccdvar.h> 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #undef DEBUG 125 #endif 126 127 #define ccdunit(x) dkunit(x) 128 #define ccdpart(x) dkpart(x) 129 130 /* 131 This is how mirroring works (only writes are special): 132 133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 134 linked together by the cb_mirror field. "cb_pflags & 135 CCDPF_MIRROR_DONE" is set to 0 on both of them. 136 137 When a component returns to ccdiodone(), it checks if "cb_pflags & 138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 139 flag and returns. If it is, it means its partner has already 140 returned, so it will go to the regular cleanup. 141 142 */ 143 144 struct ccdbuf { 145 struct bio cb_buf; /* new I/O buf */ 146 struct bio *cb_obp; /* ptr. to original I/O buf */ 147 struct ccdbuf *cb_freenext; /* free list link */ 148 int cb_unit; /* target unit */ 149 int cb_comp; /* target component */ 150 int cb_pflags; /* mirror/parity status flag */ 151 struct ccdbuf *cb_mirror; /* mirror counterpart */ 152 }; 153 154 /* bits in cb_pflags */ 155 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 156 157 #define CCDLABELDEV(dev) \ 158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 159 160 static d_open_t ccdopen; 161 static d_close_t ccdclose; 162 static d_strategy_t ccdstrategy; 163 static d_ioctl_t ccdioctl; 164 static d_dump_t ccddump; 165 static d_psize_t ccdsize; 166 167 #define NCCDFREEHIWAT 16 168 169 #define CDEV_MAJOR 74 170 #define BDEV_MAJOR 21 171 172 static struct cdevsw ccd_cdevsw = { 173 /* open */ ccdopen, 174 /* close */ ccdclose, 175 /* read */ physread, 176 /* write */ physwrite, 177 /* ioctl */ ccdioctl, 178 /* poll */ nopoll, 179 /* mmap */ nommap, 180 /* strategy */ ccdstrategy, 181 /* name */ "ccd", 182 /* maj */ CDEV_MAJOR, 183 /* dump */ ccddump, 184 /* psize */ ccdsize, 185 /* flags */ D_DISK, 186 /* bmaj */ BDEV_MAJOR 187 }; 188 189 /* called during module initialization */ 190 static void ccdattach __P((void)); 191 static int ccd_modevent __P((module_t, int, void *)); 192 193 /* called by biodone() at interrupt time */ 194 static void ccdiodone __P((struct bio *bp)); 195 196 static void ccdstart __P((struct ccd_softc *, struct bio *)); 197 static void ccdinterleave __P((struct ccd_softc *, int)); 198 static void ccdintr __P((struct ccd_softc *, struct bio *)); 199 static int ccdinit __P((struct ccddevice *, char **, struct proc *)); 200 static int ccdlookup __P((char *, struct proc *p, struct vnode **)); 201 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, 202 struct bio *, daddr_t, caddr_t, long)); 203 static void ccdgetdisklabel __P((dev_t)); 204 static void ccdmakedisklabel __P((struct ccd_softc *)); 205 static int ccdlock __P((struct ccd_softc *)); 206 static void ccdunlock __P((struct ccd_softc *)); 207 208 #ifdef DEBUG 209 static void printiinfo __P((struct ccdiinfo *)); 210 #endif 211 212 /* Non-private for the benefit of libkvm. */ 213 struct ccd_softc *ccd_softc; 214 struct ccddevice *ccddevs; 215 struct ccdbuf *ccdfreebufs; 216 static int numccdfreebufs; 217 static int numccd = 0; 218 219 /* 220 * getccdbuf() - Allocate and zero a ccd buffer. 221 * 222 * This routine is called at splbio(). 223 */ 224 225 static __inline 226 struct ccdbuf * 227 getccdbuf(struct ccdbuf *cpy) 228 { 229 struct ccdbuf *cbp; 230 231 /* 232 * Allocate from freelist or malloc as necessary 233 */ 234 if ((cbp = ccdfreebufs) != NULL) { 235 ccdfreebufs = cbp->cb_freenext; 236 --numccdfreebufs; 237 } else { 238 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 239 } 240 241 /* 242 * Used by mirroring code 243 */ 244 if (cpy) 245 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 246 else 247 bzero(cbp, sizeof(struct ccdbuf)); 248 249 /* 250 * independant struct bio initialization 251 */ 252 253 return(cbp); 254 } 255 256 /* 257 * putccdbuf() - Free a ccd buffer. 258 * 259 * This routine is called at splbio(). 260 */ 261 262 static __inline 263 void 264 putccdbuf(struct ccdbuf *cbp) 265 { 266 267 if (numccdfreebufs < NCCDFREEHIWAT) { 268 cbp->cb_freenext = ccdfreebufs; 269 ccdfreebufs = cbp; 270 ++numccdfreebufs; 271 } else { 272 free((caddr_t)cbp, M_DEVBUF); 273 } 274 } 275 276 277 /* 278 * Number of blocks to untouched in front of a component partition. 279 * This is to avoid violating its disklabel area when it starts at the 280 * beginning of the slice. 281 */ 282 #if !defined(CCD_OFFSET) 283 #define CCD_OFFSET 16 284 #endif 285 286 static void 287 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 288 { 289 int i, u; 290 char *s; 291 292 if (*dev != NODEV) 293 return; 294 i = dev_stdclone(name, &s, "ccd", &u); 295 if (i != 2) 296 return; 297 if (u >= numccd) 298 return; 299 if (*s <= 'a' || *s >= 'h') 300 return; 301 if (s[1] != '\0') 302 return; 303 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 304 UID_ROOT, GID_OPERATOR, 0640, name); 305 } 306 307 /* 308 * Called by main() during pseudo-device attachment. All we need 309 * to do is allocate enough space for devices to be configured later, and 310 * add devsw entries. 311 */ 312 static void 313 ccdattach() 314 { 315 int i; 316 int num = NCCD; 317 318 if (num > 1) 319 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 320 else 321 printf("ccd0: Concatenated disk driver\n"); 322 323 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 324 M_DEVBUF, M_NOWAIT); 325 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 326 M_DEVBUF, M_NOWAIT); 327 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 328 printf("WARNING: no memory for concatenated disks\n"); 329 if (ccd_softc != NULL) 330 free(ccd_softc, M_DEVBUF); 331 if (ccddevs != NULL) 332 free(ccddevs, M_DEVBUF); 333 return; 334 } 335 numccd = num; 336 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 337 bzero(ccddevs, num * sizeof(struct ccddevice)); 338 339 cdevsw_add(&ccd_cdevsw); 340 /* XXX: is this necessary? */ 341 for (i = 0; i < numccd; ++i) 342 ccddevs[i].ccd_dk = -1; 343 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 344 } 345 346 static int 347 ccd_modevent(mod, type, data) 348 module_t mod; 349 int type; 350 void *data; 351 { 352 int error = 0; 353 354 switch (type) { 355 case MOD_LOAD: 356 ccdattach(); 357 break; 358 359 case MOD_UNLOAD: 360 printf("ccd0: Unload not supported!\n"); 361 error = EOPNOTSUPP; 362 break; 363 364 default: /* MOD_SHUTDOWN etc */ 365 break; 366 } 367 return (error); 368 } 369 370 DEV_MODULE(ccd, ccd_modevent, NULL); 371 372 static int 373 ccdinit(ccd, cpaths, p) 374 struct ccddevice *ccd; 375 char **cpaths; 376 struct proc *p; 377 { 378 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 379 struct ccdcinfo *ci = NULL; /* XXX */ 380 size_t size; 381 int ix; 382 struct vnode *vp; 383 size_t minsize; 384 int maxsecsize; 385 struct partinfo dpart; 386 struct ccdgeom *ccg = &cs->sc_geom; 387 char tmppath[MAXPATHLEN]; 388 int error = 0; 389 390 #ifdef DEBUG 391 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 392 printf("ccdinit: unit %d\n", ccd->ccd_unit); 393 #endif 394 395 cs->sc_size = 0; 396 cs->sc_ileave = ccd->ccd_interleave; 397 cs->sc_nccdisks = ccd->ccd_ndev; 398 399 /* Allocate space for the component info. */ 400 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 401 M_DEVBUF, M_WAITOK); 402 403 /* 404 * Verify that each component piece exists and record 405 * relevant information about it. 406 */ 407 maxsecsize = 0; 408 minsize = 0; 409 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 410 vp = ccd->ccd_vpp[ix]; 411 ci = &cs->sc_cinfo[ix]; 412 ci->ci_vp = vp; 413 414 /* 415 * Copy in the pathname of the component. 416 */ 417 bzero(tmppath, sizeof(tmppath)); /* sanity */ 418 if ((error = copyinstr(cpaths[ix], tmppath, 419 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 420 #ifdef DEBUG 421 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 422 printf("ccd%d: can't copy path, error = %d\n", 423 ccd->ccd_unit, error); 424 #endif 425 goto fail; 426 } 427 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 428 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 429 430 ci->ci_dev = vn_todev(vp); 431 432 /* 433 * Get partition information for the component. 434 */ 435 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 436 FREAD, p->p_ucred, p)) != 0) { 437 #ifdef DEBUG 438 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 439 printf("ccd%d: %s: ioctl failed, error = %d\n", 440 ccd->ccd_unit, ci->ci_path, error); 441 #endif 442 goto fail; 443 } 444 if (dpart.part->p_fstype == FS_BSDFFS) { 445 maxsecsize = 446 ((dpart.disklab->d_secsize > maxsecsize) ? 447 dpart.disklab->d_secsize : maxsecsize); 448 size = dpart.part->p_size - CCD_OFFSET; 449 } else { 450 #ifdef DEBUG 451 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 452 printf("ccd%d: %s: incorrect partition type\n", 453 ccd->ccd_unit, ci->ci_path); 454 #endif 455 error = EFTYPE; 456 goto fail; 457 } 458 459 /* 460 * Calculate the size, truncating to an interleave 461 * boundary if necessary. 462 */ 463 464 if (cs->sc_ileave > 1) 465 size -= size % cs->sc_ileave; 466 467 if (size == 0) { 468 #ifdef DEBUG 469 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 470 printf("ccd%d: %s: size == 0\n", 471 ccd->ccd_unit, ci->ci_path); 472 #endif 473 error = ENODEV; 474 goto fail; 475 } 476 477 if (minsize == 0 || size < minsize) 478 minsize = size; 479 ci->ci_size = size; 480 cs->sc_size += size; 481 } 482 483 /* 484 * Don't allow the interleave to be smaller than 485 * the biggest component sector. 486 */ 487 if ((cs->sc_ileave > 0) && 488 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 489 #ifdef DEBUG 490 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 491 printf("ccd%d: interleave must be at least %d\n", 492 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 493 #endif 494 error = EINVAL; 495 goto fail; 496 } 497 498 /* 499 * If uniform interleave is desired set all sizes to that of 500 * the smallest component. This will guarentee that a single 501 * interleave table is generated. 502 * 503 * Lost space must be taken into account when calculating the 504 * overall size. Half the space is lost when CCDF_MIRROR is 505 * specified. One disk is lost when CCDF_PARITY is specified. 506 */ 507 if (ccd->ccd_flags & CCDF_UNIFORM) { 508 for (ci = cs->sc_cinfo; 509 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 510 ci->ci_size = minsize; 511 } 512 if (ccd->ccd_flags & CCDF_MIRROR) { 513 /* 514 * Check to see if an even number of components 515 * have been specified. The interleave must also 516 * be non-zero in order for us to be able to 517 * guarentee the topology. 518 */ 519 if (cs->sc_nccdisks % 2) { 520 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 521 error = EINVAL; 522 goto fail; 523 } 524 if (cs->sc_ileave == 0) { 525 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 526 error = EINVAL; 527 goto fail; 528 } 529 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 530 } else if (ccd->ccd_flags & CCDF_PARITY) { 531 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 532 } else { 533 if (cs->sc_ileave == 0) { 534 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 535 error = EINVAL; 536 goto fail; 537 } 538 cs->sc_size = cs->sc_nccdisks * minsize; 539 } 540 } 541 542 /* 543 * Construct the interleave table. 544 */ 545 ccdinterleave(cs, ccd->ccd_unit); 546 547 /* 548 * Create pseudo-geometry based on 1MB cylinders. It's 549 * pretty close. 550 */ 551 ccg->ccg_secsize = maxsecsize; 552 ccg->ccg_ntracks = 1; 553 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 554 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 555 556 /* 557 * Add an devstat entry for this device. 558 */ 559 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 560 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 561 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 562 DEVSTAT_PRIORITY_ARRAY); 563 564 cs->sc_flags |= CCDF_INITED; 565 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 566 cs->sc_unit = ccd->ccd_unit; 567 return (0); 568 fail: 569 while (ci > cs->sc_cinfo) { 570 ci--; 571 free(ci->ci_path, M_DEVBUF); 572 } 573 free(cs->sc_cinfo, M_DEVBUF); 574 return (error); 575 } 576 577 static void 578 ccdinterleave(cs, unit) 579 struct ccd_softc *cs; 580 int unit; 581 { 582 struct ccdcinfo *ci, *smallci; 583 struct ccdiinfo *ii; 584 daddr_t bn, lbn; 585 int ix; 586 u_long size; 587 588 #ifdef DEBUG 589 if (ccddebug & CCDB_INIT) 590 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 591 #endif 592 593 /* 594 * Allocate an interleave table. The worst case occurs when each 595 * of N disks is of a different size, resulting in N interleave 596 * tables. 597 * 598 * Chances are this is too big, but we don't care. 599 */ 600 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 601 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 602 bzero((caddr_t)cs->sc_itable, size); 603 604 /* 605 * Trivial case: no interleave (actually interleave of disk size). 606 * Each table entry represents a single component in its entirety. 607 * 608 * An interleave of 0 may not be used with a mirror or parity setup. 609 */ 610 if (cs->sc_ileave == 0) { 611 bn = 0; 612 ii = cs->sc_itable; 613 614 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 615 /* Allocate space for ii_index. */ 616 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 617 ii->ii_ndisk = 1; 618 ii->ii_startblk = bn; 619 ii->ii_startoff = 0; 620 ii->ii_index[0] = ix; 621 bn += cs->sc_cinfo[ix].ci_size; 622 ii++; 623 } 624 ii->ii_ndisk = 0; 625 #ifdef DEBUG 626 if (ccddebug & CCDB_INIT) 627 printiinfo(cs->sc_itable); 628 #endif 629 return; 630 } 631 632 /* 633 * The following isn't fast or pretty; it doesn't have to be. 634 */ 635 size = 0; 636 bn = lbn = 0; 637 for (ii = cs->sc_itable; ; ii++) { 638 /* 639 * Allocate space for ii_index. We might allocate more then 640 * we use. 641 */ 642 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 643 M_DEVBUF, M_WAITOK); 644 645 /* 646 * Locate the smallest of the remaining components 647 */ 648 smallci = NULL; 649 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 650 ci++) { 651 if (ci->ci_size > size && 652 (smallci == NULL || 653 ci->ci_size < smallci->ci_size)) { 654 smallci = ci; 655 } 656 } 657 658 /* 659 * Nobody left, all done 660 */ 661 if (smallci == NULL) { 662 ii->ii_ndisk = 0; 663 break; 664 } 665 666 /* 667 * Record starting logical block using an sc_ileave blocksize. 668 */ 669 ii->ii_startblk = bn / cs->sc_ileave; 670 671 /* 672 * Record starting comopnent block using an sc_ileave 673 * blocksize. This value is relative to the beginning of 674 * a component disk. 675 */ 676 ii->ii_startoff = lbn; 677 678 /* 679 * Determine how many disks take part in this interleave 680 * and record their indices. 681 */ 682 ix = 0; 683 for (ci = cs->sc_cinfo; 684 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 685 if (ci->ci_size >= smallci->ci_size) { 686 ii->ii_index[ix++] = ci - cs->sc_cinfo; 687 } 688 } 689 ii->ii_ndisk = ix; 690 bn += ix * (smallci->ci_size - size); 691 lbn = smallci->ci_size / cs->sc_ileave; 692 size = smallci->ci_size; 693 } 694 #ifdef DEBUG 695 if (ccddebug & CCDB_INIT) 696 printiinfo(cs->sc_itable); 697 #endif 698 } 699 700 /* ARGSUSED */ 701 static int 702 ccdopen(dev, flags, fmt, p) 703 dev_t dev; 704 int flags, fmt; 705 struct proc *p; 706 { 707 int unit = ccdunit(dev); 708 struct ccd_softc *cs; 709 struct disklabel *lp; 710 int error = 0, part, pmask; 711 712 #ifdef DEBUG 713 if (ccddebug & CCDB_FOLLOW) 714 printf("ccdopen(%x, %x)\n", dev, flags); 715 #endif 716 if (unit >= numccd) 717 return (ENXIO); 718 cs = &ccd_softc[unit]; 719 720 if ((error = ccdlock(cs)) != 0) 721 return (error); 722 723 lp = &cs->sc_label; 724 725 part = ccdpart(dev); 726 pmask = (1 << part); 727 728 /* 729 * If we're initialized, check to see if there are any other 730 * open partitions. If not, then it's safe to update 731 * the in-core disklabel. 732 */ 733 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 734 ccdgetdisklabel(dev); 735 736 /* Check that the partition exists. */ 737 if (part != RAW_PART && ((part >= lp->d_npartitions) || 738 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 739 error = ENXIO; 740 goto done; 741 } 742 743 cs->sc_openmask |= pmask; 744 done: 745 ccdunlock(cs); 746 return (0); 747 } 748 749 /* ARGSUSED */ 750 static int 751 ccdclose(dev, flags, fmt, p) 752 dev_t dev; 753 int flags, fmt; 754 struct proc *p; 755 { 756 int unit = ccdunit(dev); 757 struct ccd_softc *cs; 758 int error = 0, part; 759 760 #ifdef DEBUG 761 if (ccddebug & CCDB_FOLLOW) 762 printf("ccdclose(%x, %x)\n", dev, flags); 763 #endif 764 765 if (unit >= numccd) 766 return (ENXIO); 767 cs = &ccd_softc[unit]; 768 769 if ((error = ccdlock(cs)) != 0) 770 return (error); 771 772 part = ccdpart(dev); 773 774 /* ...that much closer to allowing unconfiguration... */ 775 cs->sc_openmask &= ~(1 << part); 776 ccdunlock(cs); 777 return (0); 778 } 779 780 static void 781 ccdstrategy(bp) 782 struct bio *bp; 783 { 784 int unit = ccdunit(bp->bio_dev); 785 struct ccd_softc *cs = &ccd_softc[unit]; 786 int s; 787 int wlabel; 788 struct disklabel *lp; 789 790 #ifdef DEBUG 791 if (ccddebug & CCDB_FOLLOW) 792 printf("ccdstrategy(%x): unit %d\n", bp, unit); 793 #endif 794 if ((cs->sc_flags & CCDF_INITED) == 0) { 795 bp->bio_error = ENXIO; 796 bp->bio_flags |= BIO_ERROR; 797 goto done; 798 } 799 800 /* If it's a nil transfer, wake up the top half now. */ 801 if (bp->bio_bcount == 0) 802 goto done; 803 804 lp = &cs->sc_label; 805 806 /* 807 * Do bounds checking and adjust transfer. If there's an 808 * error, the bounds check will flag that for us. 809 */ 810 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 811 if (ccdpart(bp->bio_dev) != RAW_PART) { 812 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 813 goto done; 814 } else { 815 int pbn; /* in sc_secsize chunks */ 816 long sz; /* in sc_secsize chunks */ 817 818 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 819 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 820 821 /* 822 * If out of bounds return an error. If at the EOF point, 823 * simply read or write less. 824 */ 825 826 if (pbn < 0 || pbn >= cs->sc_size) { 827 bp->bio_resid = bp->bio_bcount; 828 if (pbn != cs->sc_size) { 829 bp->bio_error = EINVAL; 830 bp->bio_flags |= BIO_ERROR; 831 } 832 goto done; 833 } 834 835 /* 836 * If the request crosses EOF, truncate the request. 837 */ 838 if (pbn + sz > cs->sc_size) { 839 bp->bio_bcount = (cs->sc_size - pbn) * 840 cs->sc_geom.ccg_secsize; 841 } 842 } 843 844 bp->bio_resid = bp->bio_bcount; 845 846 /* 847 * "Start" the unit. 848 */ 849 s = splbio(); 850 ccdstart(cs, bp); 851 splx(s); 852 return; 853 done: 854 biodone(bp); 855 } 856 857 static void 858 ccdstart(cs, bp) 859 struct ccd_softc *cs; 860 struct bio *bp; 861 { 862 long bcount, rcount; 863 struct ccdbuf *cbp[4]; 864 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 865 caddr_t addr; 866 daddr_t bn; 867 struct partition *pp; 868 869 #ifdef DEBUG 870 if (ccddebug & CCDB_FOLLOW) 871 printf("ccdstart(%x, %x)\n", cs, bp); 872 #endif 873 874 /* Record the transaction start */ 875 devstat_start_transaction(&cs->device_stats); 876 877 /* 878 * Translate the partition-relative block number to an absolute. 879 */ 880 bn = bp->bio_blkno; 881 if (ccdpart(bp->bio_dev) != RAW_PART) { 882 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 883 bn += pp->p_offset; 884 } 885 886 /* 887 * Allocate component buffers and fire off the requests 888 */ 889 addr = bp->bio_data; 890 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 891 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 892 rcount = cbp[0]->cb_buf.bio_bcount; 893 894 if (cs->sc_cflags & CCDF_MIRROR) { 895 /* 896 * Mirroring. Writes go to both disks, reads are 897 * taken from whichever disk seems most appropriate. 898 * 899 * We attempt to localize reads to the disk whos arm 900 * is nearest the read request. We ignore seeks due 901 * to writes when making this determination and we 902 * also try to avoid hogging. 903 */ 904 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 905 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 906 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 907 } else { 908 int pick = cs->sc_pick; 909 daddr_t range = cs->sc_size / 16; 910 911 if (bn < cs->sc_blk[pick] - range || 912 bn > cs->sc_blk[pick] + range 913 ) { 914 cs->sc_pick = pick = 1 - pick; 915 } 916 cs->sc_blk[pick] = bn + btodb(rcount); 917 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 918 } 919 } else { 920 /* 921 * Not mirroring 922 */ 923 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 924 } 925 bn += btodb(rcount); 926 addr += rcount; 927 } 928 } 929 930 /* 931 * Build a component buffer header. 932 */ 933 static void 934 ccdbuffer(cb, cs, bp, bn, addr, bcount) 935 struct ccdbuf **cb; 936 struct ccd_softc *cs; 937 struct bio *bp; 938 daddr_t bn; 939 caddr_t addr; 940 long bcount; 941 { 942 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 943 struct ccdbuf *cbp; 944 daddr_t cbn, cboff; 945 off_t cbc; 946 947 #ifdef DEBUG 948 if (ccddebug & CCDB_IO) 949 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 950 cs, bp, bn, addr, bcount); 951 #endif 952 /* 953 * Determine which component bn falls in. 954 */ 955 cbn = bn; 956 cboff = 0; 957 958 if (cs->sc_ileave == 0) { 959 /* 960 * Serially concatenated and neither a mirror nor a parity 961 * config. This is a special case. 962 */ 963 daddr_t sblk; 964 965 sblk = 0; 966 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 967 sblk += ci->ci_size; 968 cbn -= sblk; 969 } else { 970 struct ccdiinfo *ii; 971 int ccdisk, off; 972 973 /* 974 * Calculate cbn, the logical superblock (sc_ileave chunks), 975 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 976 * to cbn. 977 */ 978 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 979 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 980 981 /* 982 * Figure out which interleave table to use. 983 */ 984 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 985 if (ii->ii_startblk > cbn) 986 break; 987 } 988 ii--; 989 990 /* 991 * off is the logical superblock relative to the beginning 992 * of this interleave block. 993 */ 994 off = cbn - ii->ii_startblk; 995 996 /* 997 * We must calculate which disk component to use (ccdisk), 998 * and recalculate cbn to be the superblock relative to 999 * the beginning of the component. This is typically done by 1000 * adding 'off' and ii->ii_startoff together. However, 'off' 1001 * must typically be divided by the number of components in 1002 * this interleave array to be properly convert it from a 1003 * CCD-relative logical superblock number to a 1004 * component-relative superblock number. 1005 */ 1006 if (ii->ii_ndisk == 1) { 1007 /* 1008 * When we have just one disk, it can't be a mirror 1009 * or a parity config. 1010 */ 1011 ccdisk = ii->ii_index[0]; 1012 cbn = ii->ii_startoff + off; 1013 } else { 1014 if (cs->sc_cflags & CCDF_MIRROR) { 1015 /* 1016 * We have forced a uniform mapping, resulting 1017 * in a single interleave array. We double 1018 * up on the first half of the available 1019 * components and our mirror is in the second 1020 * half. This only works with a single 1021 * interleave array because doubling up 1022 * doubles the number of sectors, so there 1023 * cannot be another interleave array because 1024 * the next interleave array's calculations 1025 * would be off. 1026 */ 1027 int ndisk2 = ii->ii_ndisk / 2; 1028 ccdisk = ii->ii_index[off % ndisk2]; 1029 cbn = ii->ii_startoff + off / ndisk2; 1030 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1031 } else if (cs->sc_cflags & CCDF_PARITY) { 1032 /* 1033 * XXX not implemented yet 1034 */ 1035 int ndisk2 = ii->ii_ndisk - 1; 1036 ccdisk = ii->ii_index[off % ndisk2]; 1037 cbn = ii->ii_startoff + off / ndisk2; 1038 if (cbn % ii->ii_ndisk <= ccdisk) 1039 ccdisk++; 1040 } else { 1041 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1042 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1043 } 1044 } 1045 1046 ci = &cs->sc_cinfo[ccdisk]; 1047 1048 /* 1049 * Convert cbn from a superblock to a normal block so it 1050 * can be used to calculate (along with cboff) the normal 1051 * block index into this particular disk. 1052 */ 1053 cbn *= cs->sc_ileave; 1054 } 1055 1056 /* 1057 * Fill in the component buf structure. 1058 */ 1059 cbp = getccdbuf(NULL); 1060 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1061 cbp->cb_buf.bio_done = ccdiodone; 1062 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1063 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1064 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1065 cbp->cb_buf.bio_data = addr; 1066 if (cs->sc_ileave == 0) 1067 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1068 else 1069 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1070 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1071 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1072 1073 /* 1074 * context for ccdiodone 1075 */ 1076 cbp->cb_obp = bp; 1077 cbp->cb_unit = cs - ccd_softc; 1078 cbp->cb_comp = ci - cs->sc_cinfo; 1079 1080 #ifdef DEBUG 1081 if (ccddebug & CCDB_IO) 1082 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1083 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno, 1084 cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); 1085 #endif 1086 cb[0] = cbp; 1087 1088 /* 1089 * Note: both I/O's setup when reading from mirror, but only one 1090 * will be executed. 1091 */ 1092 if (cs->sc_cflags & CCDF_MIRROR) { 1093 /* mirror, setup second I/O */ 1094 cbp = getccdbuf(cb[0]); 1095 cbp->cb_buf.bio_dev = ci2->ci_dev; 1096 cbp->cb_comp = ci2 - cs->sc_cinfo; 1097 cb[1] = cbp; 1098 /* link together the ccdbuf's and clear "mirror done" flag */ 1099 cb[0]->cb_mirror = cb[1]; 1100 cb[1]->cb_mirror = cb[0]; 1101 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1102 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1103 } 1104 } 1105 1106 static void 1107 ccdintr(cs, bp) 1108 struct ccd_softc *cs; 1109 struct bio *bp; 1110 { 1111 #ifdef DEBUG 1112 if (ccddebug & CCDB_FOLLOW) 1113 printf("ccdintr(%x, %x)\n", cs, bp); 1114 #endif 1115 /* 1116 * Request is done for better or worse, wakeup the top half. 1117 */ 1118 if (bp->bio_flags & BIO_ERROR) 1119 bp->bio_resid = bp->bio_bcount; 1120 devstat_end_transaction_bio(&cs->device_stats, bp); 1121 biodone(bp); 1122 } 1123 1124 /* 1125 * Called at interrupt time. 1126 * Mark the component as done and if all components are done, 1127 * take a ccd interrupt. 1128 */ 1129 static void 1130 ccdiodone(ibp) 1131 struct bio *ibp; 1132 { 1133 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1134 struct bio *bp = cbp->cb_obp; 1135 int unit = cbp->cb_unit; 1136 int count, s; 1137 1138 s = splbio(); 1139 #ifdef DEBUG 1140 if (ccddebug & CCDB_FOLLOW) 1141 printf("ccdiodone(%x)\n", cbp); 1142 if (ccddebug & CCDB_IO) { 1143 printf("ccdiodone: bp %x bcount %d resid %d\n", 1144 bp, bp->bio_bcount, bp->bio_resid); 1145 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1146 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1147 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1148 cbp->cb_buf.bio_bcount); 1149 } 1150 #endif 1151 /* 1152 * If an error occured, report it. If this is a mirrored 1153 * configuration and the first of two possible reads, do not 1154 * set the error in the bp yet because the second read may 1155 * succeed. 1156 */ 1157 1158 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1159 const char *msg = ""; 1160 1161 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1162 (cbp->cb_buf.bio_cmd == BIO_READ) && 1163 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1164 /* 1165 * We will try our read on the other disk down 1166 * below, also reverse the default pick so if we 1167 * are doing a scan we do not keep hitting the 1168 * bad disk first. 1169 */ 1170 struct ccd_softc *cs = &ccd_softc[unit]; 1171 1172 msg = ", trying other disk"; 1173 cs->sc_pick = 1 - cs->sc_pick; 1174 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1175 } else { 1176 bp->bio_flags |= BIO_ERROR; 1177 bp->bio_error = cbp->cb_buf.bio_error ? 1178 cbp->cb_buf.bio_error : EIO; 1179 } 1180 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1181 unit, bp->bio_error, cbp->cb_comp, 1182 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1183 } 1184 1185 /* 1186 * Process mirror. If we are writing, I/O has been initiated on both 1187 * buffers and we fall through only after both are finished. 1188 * 1189 * If we are reading only one I/O is initiated at a time. If an 1190 * error occurs we initiate the second I/O and return, otherwise 1191 * we free the second I/O without initiating it. 1192 */ 1193 1194 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1195 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1196 /* 1197 * When writing, handshake with the second buffer 1198 * to determine when both are done. If both are not 1199 * done, return here. 1200 */ 1201 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1203 putccdbuf(cbp); 1204 splx(s); 1205 return; 1206 } 1207 } else { 1208 /* 1209 * When reading, either dispose of the second buffer 1210 * or initiate I/O on the second buffer if an error 1211 * occured with this one. 1212 */ 1213 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1214 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1215 cbp->cb_mirror->cb_pflags |= 1216 CCDPF_MIRROR_DONE; 1217 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1218 putccdbuf(cbp); 1219 splx(s); 1220 return; 1221 } else { 1222 putccdbuf(cbp->cb_mirror); 1223 /* fall through */ 1224 } 1225 } 1226 } 1227 } 1228 1229 /* 1230 * use bio_caller1 to determine how big the original request was rather 1231 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1232 * 1233 * XXX We check for an error, but we do not test the resid for an 1234 * aligned EOF condition. This may result in character & block 1235 * device access not recognizing EOF properly when read or written 1236 * sequentially, but will not effect filesystems. 1237 */ 1238 count = (long)cbp->cb_buf.bio_caller1; 1239 putccdbuf(cbp); 1240 1241 /* 1242 * If all done, "interrupt". 1243 */ 1244 bp->bio_resid -= count; 1245 if (bp->bio_resid < 0) 1246 panic("ccdiodone: count"); 1247 if (bp->bio_resid == 0) 1248 ccdintr(&ccd_softc[unit], bp); 1249 splx(s); 1250 } 1251 1252 static int 1253 ccdioctl(dev, cmd, data, flag, p) 1254 dev_t dev; 1255 u_long cmd; 1256 caddr_t data; 1257 int flag; 1258 struct proc *p; 1259 { 1260 int unit = ccdunit(dev); 1261 int i, j, lookedup = 0, error = 0; 1262 int part, pmask, s; 1263 struct ccd_softc *cs; 1264 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1265 struct ccddevice ccd; 1266 char **cpp; 1267 struct vnode **vpp; 1268 1269 if (unit >= numccd) 1270 return (ENXIO); 1271 cs = &ccd_softc[unit]; 1272 1273 bzero(&ccd, sizeof(ccd)); 1274 1275 switch (cmd) { 1276 case CCDIOCSET: 1277 if (cs->sc_flags & CCDF_INITED) 1278 return (EBUSY); 1279 1280 if ((flag & FWRITE) == 0) 1281 return (EBADF); 1282 1283 if ((error = ccdlock(cs)) != 0) 1284 return (error); 1285 1286 /* Fill in some important bits. */ 1287 ccd.ccd_unit = unit; 1288 ccd.ccd_interleave = ccio->ccio_ileave; 1289 if (ccd.ccd_interleave == 0 && 1290 ((ccio->ccio_flags & CCDF_MIRROR) || 1291 (ccio->ccio_flags & CCDF_PARITY))) { 1292 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1293 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1294 } 1295 if ((ccio->ccio_flags & CCDF_MIRROR) && 1296 (ccio->ccio_flags & CCDF_PARITY)) { 1297 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1298 ccio->ccio_flags &= ~CCDF_PARITY; 1299 } 1300 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1301 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1302 printf("ccd%d: mirror/parity forces uniform flag\n", 1303 unit); 1304 ccio->ccio_flags |= CCDF_UNIFORM; 1305 } 1306 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1307 1308 /* 1309 * Allocate space for and copy in the array of 1310 * componet pathnames and device numbers. 1311 */ 1312 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1313 M_DEVBUF, M_WAITOK); 1314 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1315 M_DEVBUF, M_WAITOK); 1316 1317 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1318 ccio->ccio_ndisks * sizeof(char **)); 1319 if (error) { 1320 free(vpp, M_DEVBUF); 1321 free(cpp, M_DEVBUF); 1322 ccdunlock(cs); 1323 return (error); 1324 } 1325 1326 #ifdef DEBUG 1327 if (ccddebug & CCDB_INIT) 1328 for (i = 0; i < ccio->ccio_ndisks; ++i) 1329 printf("ccdioctl: component %d: 0x%x\n", 1330 i, cpp[i]); 1331 #endif 1332 1333 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1334 #ifdef DEBUG 1335 if (ccddebug & CCDB_INIT) 1336 printf("ccdioctl: lookedup = %d\n", lookedup); 1337 #endif 1338 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1339 for (j = 0; j < lookedup; ++j) 1340 (void)vn_close(vpp[j], FREAD|FWRITE, 1341 p->p_ucred, p); 1342 free(vpp, M_DEVBUF); 1343 free(cpp, M_DEVBUF); 1344 ccdunlock(cs); 1345 return (error); 1346 } 1347 ++lookedup; 1348 } 1349 ccd.ccd_cpp = cpp; 1350 ccd.ccd_vpp = vpp; 1351 ccd.ccd_ndev = ccio->ccio_ndisks; 1352 1353 /* 1354 * Initialize the ccd. Fills in the softc for us. 1355 */ 1356 if ((error = ccdinit(&ccd, cpp, p)) != 0) { 1357 for (j = 0; j < lookedup; ++j) 1358 (void)vn_close(vpp[j], FREAD|FWRITE, 1359 p->p_ucred, p); 1360 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1361 free(vpp, M_DEVBUF); 1362 free(cpp, M_DEVBUF); 1363 ccdunlock(cs); 1364 return (error); 1365 } 1366 1367 /* 1368 * The ccd has been successfully initialized, so 1369 * we can place it into the array and read the disklabel. 1370 */ 1371 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1372 ccio->ccio_unit = unit; 1373 ccio->ccio_size = cs->sc_size; 1374 ccdgetdisklabel(dev); 1375 1376 ccdunlock(cs); 1377 1378 break; 1379 1380 case CCDIOCCLR: 1381 if ((cs->sc_flags & CCDF_INITED) == 0) 1382 return (ENXIO); 1383 1384 if ((flag & FWRITE) == 0) 1385 return (EBADF); 1386 1387 if ((error = ccdlock(cs)) != 0) 1388 return (error); 1389 1390 /* Don't unconfigure if any other partitions are open */ 1391 part = ccdpart(dev); 1392 pmask = (1 << part); 1393 if ((cs->sc_openmask & ~pmask)) { 1394 ccdunlock(cs); 1395 return (EBUSY); 1396 } 1397 1398 /* 1399 * Free ccd_softc information and clear entry. 1400 */ 1401 1402 /* Close the components and free their pathnames. */ 1403 for (i = 0; i < cs->sc_nccdisks; ++i) { 1404 /* 1405 * XXX: this close could potentially fail and 1406 * cause Bad Things. Maybe we need to force 1407 * the close to happen? 1408 */ 1409 #ifdef DEBUG 1410 if (ccddebug & CCDB_VNODE) 1411 vprint("CCDIOCCLR: vnode info", 1412 cs->sc_cinfo[i].ci_vp); 1413 #endif 1414 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1415 p->p_ucred, p); 1416 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1417 } 1418 1419 /* Free interleave index. */ 1420 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1421 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1422 1423 /* Free component info and interleave table. */ 1424 free(cs->sc_cinfo, M_DEVBUF); 1425 free(cs->sc_itable, M_DEVBUF); 1426 cs->sc_flags &= ~CCDF_INITED; 1427 1428 /* 1429 * Free ccddevice information and clear entry. 1430 */ 1431 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1432 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1433 ccd.ccd_dk = -1; 1434 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1435 1436 /* 1437 * And remove the devstat entry. 1438 */ 1439 devstat_remove_entry(&cs->device_stats); 1440 1441 /* This must be atomic. */ 1442 s = splhigh(); 1443 ccdunlock(cs); 1444 bzero(cs, sizeof(struct ccd_softc)); 1445 splx(s); 1446 1447 break; 1448 1449 case DIOCGDINFO: 1450 if ((cs->sc_flags & CCDF_INITED) == 0) 1451 return (ENXIO); 1452 1453 *(struct disklabel *)data = cs->sc_label; 1454 break; 1455 1456 case DIOCGPART: 1457 if ((cs->sc_flags & CCDF_INITED) == 0) 1458 return (ENXIO); 1459 1460 ((struct partinfo *)data)->disklab = &cs->sc_label; 1461 ((struct partinfo *)data)->part = 1462 &cs->sc_label.d_partitions[ccdpart(dev)]; 1463 break; 1464 1465 case DIOCWDINFO: 1466 case DIOCSDINFO: 1467 if ((cs->sc_flags & CCDF_INITED) == 0) 1468 return (ENXIO); 1469 1470 if ((flag & FWRITE) == 0) 1471 return (EBADF); 1472 1473 if ((error = ccdlock(cs)) != 0) 1474 return (error); 1475 1476 cs->sc_flags |= CCDF_LABELLING; 1477 1478 error = setdisklabel(&cs->sc_label, 1479 (struct disklabel *)data, 0); 1480 if (error == 0) { 1481 if (cmd == DIOCWDINFO) 1482 error = writedisklabel(CCDLABELDEV(dev), 1483 &cs->sc_label); 1484 } 1485 1486 cs->sc_flags &= ~CCDF_LABELLING; 1487 1488 ccdunlock(cs); 1489 1490 if (error) 1491 return (error); 1492 break; 1493 1494 case DIOCWLABEL: 1495 if ((cs->sc_flags & CCDF_INITED) == 0) 1496 return (ENXIO); 1497 1498 if ((flag & FWRITE) == 0) 1499 return (EBADF); 1500 if (*(int *)data != 0) 1501 cs->sc_flags |= CCDF_WLABEL; 1502 else 1503 cs->sc_flags &= ~CCDF_WLABEL; 1504 break; 1505 1506 default: 1507 return (ENOTTY); 1508 } 1509 1510 return (0); 1511 } 1512 1513 static int 1514 ccdsize(dev) 1515 dev_t dev; 1516 { 1517 struct ccd_softc *cs; 1518 int part, size; 1519 1520 if (ccdopen(dev, 0, S_IFCHR, curproc)) 1521 return (-1); 1522 1523 cs = &ccd_softc[ccdunit(dev)]; 1524 part = ccdpart(dev); 1525 1526 if ((cs->sc_flags & CCDF_INITED) == 0) 1527 return (-1); 1528 1529 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1530 size = -1; 1531 else 1532 size = cs->sc_label.d_partitions[part].p_size; 1533 1534 if (ccdclose(dev, 0, S_IFCHR, curproc)) 1535 return (-1); 1536 1537 return (size); 1538 } 1539 1540 static int 1541 ccddump(dev) 1542 dev_t dev; 1543 { 1544 1545 /* Not implemented. */ 1546 return ENXIO; 1547 } 1548 1549 /* 1550 * Lookup the provided name in the filesystem. If the file exists, 1551 * is a valid block device, and isn't being used by anyone else, 1552 * set *vpp to the file's vnode. 1553 */ 1554 static int 1555 ccdlookup(path, p, vpp) 1556 char *path; 1557 struct proc *p; 1558 struct vnode **vpp; /* result */ 1559 { 1560 struct nameidata nd; 1561 struct vnode *vp; 1562 int error, flags; 1563 1564 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1565 flags = FREAD | FWRITE; 1566 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1567 #ifdef DEBUG 1568 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1569 printf("ccdlookup: vn_open error = %d\n", error); 1570 #endif 1571 return (error); 1572 } 1573 vp = nd.ni_vp; 1574 1575 if (vp->v_usecount > 1) { 1576 error = EBUSY; 1577 goto bad; 1578 } 1579 1580 if (!vn_isdisk(vp, &error)) 1581 goto bad; 1582 1583 #ifdef DEBUG 1584 if (ccddebug & CCDB_VNODE) 1585 vprint("ccdlookup: vnode info", vp); 1586 #endif 1587 1588 VOP_UNLOCK(vp, 0, p); 1589 NDFREE(&nd, NDF_ONLY_PNBUF); 1590 *vpp = vp; 1591 return (0); 1592 bad: 1593 VOP_UNLOCK(vp, 0, p); 1594 NDFREE(&nd, NDF_ONLY_PNBUF); 1595 /* vn_close does vrele() for vp */ 1596 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1597 return (error); 1598 } 1599 1600 /* 1601 * Read the disklabel from the ccd. If one is not present, fake one 1602 * up. 1603 */ 1604 static void 1605 ccdgetdisklabel(dev) 1606 dev_t dev; 1607 { 1608 int unit = ccdunit(dev); 1609 struct ccd_softc *cs = &ccd_softc[unit]; 1610 char *errstring; 1611 struct disklabel *lp = &cs->sc_label; 1612 struct ccdgeom *ccg = &cs->sc_geom; 1613 1614 bzero(lp, sizeof(*lp)); 1615 1616 lp->d_secperunit = cs->sc_size; 1617 lp->d_secsize = ccg->ccg_secsize; 1618 lp->d_nsectors = ccg->ccg_nsectors; 1619 lp->d_ntracks = ccg->ccg_ntracks; 1620 lp->d_ncylinders = ccg->ccg_ncylinders; 1621 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1622 1623 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1624 lp->d_type = DTYPE_CCD; 1625 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1626 lp->d_rpm = 3600; 1627 lp->d_interleave = 1; 1628 lp->d_flags = 0; 1629 1630 lp->d_partitions[RAW_PART].p_offset = 0; 1631 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1632 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1633 lp->d_npartitions = RAW_PART + 1; 1634 1635 lp->d_bbsize = BBSIZE; /* XXX */ 1636 lp->d_sbsize = SBSIZE; /* XXX */ 1637 1638 lp->d_magic = DISKMAGIC; 1639 lp->d_magic2 = DISKMAGIC; 1640 lp->d_checksum = dkcksum(&cs->sc_label); 1641 1642 /* 1643 * Call the generic disklabel extraction routine. 1644 */ 1645 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1646 if (errstring != NULL) 1647 ccdmakedisklabel(cs); 1648 1649 #ifdef DEBUG 1650 /* It's actually extremely common to have unlabeled ccds. */ 1651 if (ccddebug & CCDB_LABEL) 1652 if (errstring != NULL) 1653 printf("ccd%d: %s\n", unit, errstring); 1654 #endif 1655 } 1656 1657 /* 1658 * Take care of things one might want to take care of in the event 1659 * that a disklabel isn't present. 1660 */ 1661 static void 1662 ccdmakedisklabel(cs) 1663 struct ccd_softc *cs; 1664 { 1665 struct disklabel *lp = &cs->sc_label; 1666 1667 /* 1668 * For historical reasons, if there's no disklabel present 1669 * the raw partition must be marked FS_BSDFFS. 1670 */ 1671 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1672 1673 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1674 } 1675 1676 /* 1677 * Wait interruptibly for an exclusive lock. 1678 * 1679 * XXX 1680 * Several drivers do this; it should be abstracted and made MP-safe. 1681 */ 1682 static int 1683 ccdlock(cs) 1684 struct ccd_softc *cs; 1685 { 1686 int error; 1687 1688 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1689 cs->sc_flags |= CCDF_WANTED; 1690 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1691 return (error); 1692 } 1693 cs->sc_flags |= CCDF_LOCKED; 1694 return (0); 1695 } 1696 1697 /* 1698 * Unlock and wake up any waiters. 1699 */ 1700 static void 1701 ccdunlock(cs) 1702 struct ccd_softc *cs; 1703 { 1704 1705 cs->sc_flags &= ~CCDF_LOCKED; 1706 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1707 cs->sc_flags &= ~CCDF_WANTED; 1708 wakeup(cs); 1709 } 1710 } 1711 1712 #ifdef DEBUG 1713 static void 1714 printiinfo(ii) 1715 struct ccdiinfo *ii; 1716 { 1717 int ix, i; 1718 1719 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1720 printf(" itab[%d]: #dk %d sblk %d soff %d", 1721 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1722 for (i = 0; i < ii->ii_ndisk; i++) 1723 printf(" %d", ii->ii_index[i]); 1724 printf("\n"); 1725 } 1726 } 1727 #endif 1728