1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disklabel.h> 102 #include <ufs/ffs/fs.h> 103 #include <sys/devicestat.h> 104 #include <sys/fcntl.h> 105 #include <sys/vnode.h> 106 107 #include <sys/ccdvar.h> 108 109 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #endif 125 126 #define ccdunit(x) dkunit(x) 127 #define ccdpart(x) dkpart(x) 128 129 /* 130 This is how mirroring works (only writes are special): 131 132 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 133 linked together by the cb_mirror field. "cb_pflags & 134 CCDPF_MIRROR_DONE" is set to 0 on both of them. 135 136 When a component returns to ccdiodone(), it checks if "cb_pflags & 137 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 138 flag and returns. If it is, it means its partner has already 139 returned, so it will go to the regular cleanup. 140 141 */ 142 143 struct ccdbuf { 144 struct bio cb_buf; /* new I/O buf */ 145 struct bio *cb_obp; /* ptr. to original I/O buf */ 146 struct ccdbuf *cb_freenext; /* free list link */ 147 int cb_unit; /* target unit */ 148 int cb_comp; /* target component */ 149 int cb_pflags; /* mirror/parity status flag */ 150 struct ccdbuf *cb_mirror; /* mirror counterpart */ 151 }; 152 153 /* bits in cb_pflags */ 154 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 155 156 #define CCDLABELDEV(dev) \ 157 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 158 159 /* convinient macros for often-used statements */ 160 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 161 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_dump_t ccddump; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ ccddump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct proc *); 206 static int ccdlookup(char *, struct proc *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 default: /* MOD_SHUTDOWN etc */ 376 break; 377 } 378 return (error); 379 } 380 381 DEV_MODULE(ccd, ccd_modevent, NULL); 382 383 static int 384 ccdinit(struct ccd_s *cs, char **cpaths, struct proc *p) 385 { 386 struct ccdcinfo *ci = NULL; /* XXX */ 387 size_t size; 388 int ix; 389 struct vnode *vp; 390 size_t minsize; 391 int maxsecsize; 392 struct partinfo dpart; 393 struct ccdgeom *ccg = &cs->sc_geom; 394 char tmppath[MAXPATHLEN]; 395 int error = 0; 396 397 #ifdef DEBUG 398 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 399 printf("ccdinit: unit %d\n", cs->sc_unit); 400 #endif 401 402 cs->sc_size = 0; 403 404 /* Allocate space for the component info. */ 405 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 406 M_DEVBUF, M_WAITOK); 407 408 /* 409 * Verify that each component piece exists and record 410 * relevant information about it. 411 */ 412 maxsecsize = 0; 413 minsize = 0; 414 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 415 vp = cs->sc_vpp[ix]; 416 ci = &cs->sc_cinfo[ix]; 417 ci->ci_vp = vp; 418 419 /* 420 * Copy in the pathname of the component. 421 */ 422 bzero(tmppath, sizeof(tmppath)); /* sanity */ 423 if ((error = copyinstr(cpaths[ix], tmppath, 424 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 425 #ifdef DEBUG 426 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 427 printf("ccd%d: can't copy path, error = %d\n", 428 cs->sc_unit, error); 429 #endif 430 goto fail; 431 } 432 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 433 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 434 435 ci->ci_dev = vn_todev(vp); 436 437 /* 438 * Get partition information for the component. 439 */ 440 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 441 FREAD, p->p_ucred, p)) != 0) { 442 #ifdef DEBUG 443 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 444 printf("ccd%d: %s: ioctl failed, error = %d\n", 445 cs->sc_unit, ci->ci_path, error); 446 #endif 447 goto fail; 448 } 449 if (dpart.part->p_fstype == FS_BSDFFS) { 450 maxsecsize = 451 ((dpart.disklab->d_secsize > maxsecsize) ? 452 dpart.disklab->d_secsize : maxsecsize); 453 size = dpart.part->p_size - CCD_OFFSET; 454 } else { 455 #ifdef DEBUG 456 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 457 printf("ccd%d: %s: incorrect partition type\n", 458 cs->sc_unit, ci->ci_path); 459 #endif 460 error = EFTYPE; 461 goto fail; 462 } 463 464 /* 465 * Calculate the size, truncating to an interleave 466 * boundary if necessary. 467 */ 468 469 if (cs->sc_ileave > 1) 470 size -= size % cs->sc_ileave; 471 472 if (size == 0) { 473 #ifdef DEBUG 474 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 475 printf("ccd%d: %s: size == 0\n", 476 cs->sc_unit, ci->ci_path); 477 #endif 478 error = ENODEV; 479 goto fail; 480 } 481 482 if (minsize == 0 || size < minsize) 483 minsize = size; 484 ci->ci_size = size; 485 cs->sc_size += size; 486 } 487 488 /* 489 * Don't allow the interleave to be smaller than 490 * the biggest component sector. 491 */ 492 if ((cs->sc_ileave > 0) && 493 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 494 #ifdef DEBUG 495 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 496 printf("ccd%d: interleave must be at least %d\n", 497 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 498 #endif 499 error = EINVAL; 500 goto fail; 501 } 502 503 /* 504 * If uniform interleave is desired set all sizes to that of 505 * the smallest component. This will guarentee that a single 506 * interleave table is generated. 507 * 508 * Lost space must be taken into account when calculating the 509 * overall size. Half the space is lost when CCDF_MIRROR is 510 * specified. One disk is lost when CCDF_PARITY is specified. 511 */ 512 if (cs->sc_flags & CCDF_UNIFORM) { 513 for (ci = cs->sc_cinfo; 514 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 515 ci->ci_size = minsize; 516 } 517 if (cs->sc_flags & CCDF_MIRROR) { 518 /* 519 * Check to see if an even number of components 520 * have been specified. The interleave must also 521 * be non-zero in order for us to be able to 522 * guarentee the topology. 523 */ 524 if (cs->sc_nccdisks % 2) { 525 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 526 error = EINVAL; 527 goto fail; 528 } 529 if (cs->sc_ileave == 0) { 530 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 531 error = EINVAL; 532 goto fail; 533 } 534 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 535 } else if (cs->sc_flags & CCDF_PARITY) { 536 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 537 } else { 538 if (cs->sc_ileave == 0) { 539 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 540 error = EINVAL; 541 goto fail; 542 } 543 cs->sc_size = cs->sc_nccdisks * minsize; 544 } 545 } 546 547 /* 548 * Construct the interleave table. 549 */ 550 ccdinterleave(cs, cs->sc_unit); 551 552 /* 553 * Create pseudo-geometry based on 1MB cylinders. It's 554 * pretty close. 555 */ 556 ccg->ccg_secsize = maxsecsize; 557 ccg->ccg_ntracks = 1; 558 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 559 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 560 561 /* 562 * Add an devstat entry for this device. 563 */ 564 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 565 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 566 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 567 DEVSTAT_PRIORITY_ARRAY); 568 569 cs->sc_flags |= CCDF_INITED; 570 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 571 return (0); 572 fail: 573 while (ci > cs->sc_cinfo) { 574 ci--; 575 free(ci->ci_path, M_DEVBUF); 576 } 577 free(cs->sc_cinfo, M_DEVBUF); 578 return (error); 579 } 580 581 static void 582 ccdinterleave(struct ccd_s *cs, int unit) 583 { 584 struct ccdcinfo *ci, *smallci; 585 struct ccdiinfo *ii; 586 daddr_t bn, lbn; 587 int ix; 588 u_long size; 589 590 #ifdef DEBUG 591 if (ccddebug & CCDB_INIT) 592 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 593 #endif 594 595 /* 596 * Allocate an interleave table. The worst case occurs when each 597 * of N disks is of a different size, resulting in N interleave 598 * tables. 599 * 600 * Chances are this is too big, but we don't care. 601 */ 602 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 603 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 604 M_WAITOK | M_ZERO); 605 606 /* 607 * Trivial case: no interleave (actually interleave of disk size). 608 * Each table entry represents a single component in its entirety. 609 * 610 * An interleave of 0 may not be used with a mirror or parity setup. 611 */ 612 if (cs->sc_ileave == 0) { 613 bn = 0; 614 ii = cs->sc_itable; 615 616 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 617 /* Allocate space for ii_index. */ 618 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 619 ii->ii_ndisk = 1; 620 ii->ii_startblk = bn; 621 ii->ii_startoff = 0; 622 ii->ii_index[0] = ix; 623 bn += cs->sc_cinfo[ix].ci_size; 624 ii++; 625 } 626 ii->ii_ndisk = 0; 627 #ifdef DEBUG 628 if (ccddebug & CCDB_INIT) 629 printiinfo(cs->sc_itable); 630 #endif 631 return; 632 } 633 634 /* 635 * The following isn't fast or pretty; it doesn't have to be. 636 */ 637 size = 0; 638 bn = lbn = 0; 639 for (ii = cs->sc_itable; ; ii++) { 640 /* 641 * Allocate space for ii_index. We might allocate more then 642 * we use. 643 */ 644 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 645 M_DEVBUF, M_WAITOK); 646 647 /* 648 * Locate the smallest of the remaining components 649 */ 650 smallci = NULL; 651 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 652 ci++) { 653 if (ci->ci_size > size && 654 (smallci == NULL || 655 ci->ci_size < smallci->ci_size)) { 656 smallci = ci; 657 } 658 } 659 660 /* 661 * Nobody left, all done 662 */ 663 if (smallci == NULL) { 664 ii->ii_ndisk = 0; 665 break; 666 } 667 668 /* 669 * Record starting logical block using an sc_ileave blocksize. 670 */ 671 ii->ii_startblk = bn / cs->sc_ileave; 672 673 /* 674 * Record starting comopnent block using an sc_ileave 675 * blocksize. This value is relative to the beginning of 676 * a component disk. 677 */ 678 ii->ii_startoff = lbn; 679 680 /* 681 * Determine how many disks take part in this interleave 682 * and record their indices. 683 */ 684 ix = 0; 685 for (ci = cs->sc_cinfo; 686 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 687 if (ci->ci_size >= smallci->ci_size) { 688 ii->ii_index[ix++] = ci - cs->sc_cinfo; 689 } 690 } 691 ii->ii_ndisk = ix; 692 bn += ix * (smallci->ci_size - size); 693 lbn = smallci->ci_size / cs->sc_ileave; 694 size = smallci->ci_size; 695 } 696 #ifdef DEBUG 697 if (ccddebug & CCDB_INIT) 698 printiinfo(cs->sc_itable); 699 #endif 700 } 701 702 /* ARGSUSED */ 703 static int 704 ccdopen(dev_t dev, int flags, int fmt, struct proc *p) 705 { 706 int unit = ccdunit(dev); 707 struct ccd_s *cs; 708 struct disklabel *lp; 709 int error = 0, part, pmask; 710 711 #ifdef DEBUG 712 if (ccddebug & CCDB_FOLLOW) 713 printf("ccdopen(%p, %x)\n", dev, flags); 714 #endif 715 716 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 717 718 if ((error = ccdlock(cs)) != 0) 719 return (error); 720 721 lp = &cs->sc_label; 722 723 part = ccdpart(dev); 724 pmask = (1 << part); 725 726 /* 727 * If we're initialized, check to see if there are any other 728 * open partitions. If not, then it's safe to update 729 * the in-core disklabel. 730 */ 731 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 732 ccdgetdisklabel(dev); 733 734 /* Check that the partition exists. */ 735 if (part != RAW_PART && ((part >= lp->d_npartitions) || 736 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 737 error = ENXIO; 738 goto done; 739 } 740 741 cs->sc_openmask |= pmask; 742 done: 743 ccdunlock(cs); 744 return (0); 745 } 746 747 /* ARGSUSED */ 748 static int 749 ccdclose(dev_t dev, int flags, int fmt, struct proc *p) 750 { 751 int unit = ccdunit(dev); 752 struct ccd_s *cs; 753 int error = 0, part; 754 755 #ifdef DEBUG 756 if (ccddebug & CCDB_FOLLOW) 757 printf("ccdclose(%p, %x)\n", dev, flags); 758 #endif 759 760 if (!IS_ALLOCATED(unit)) 761 return (ENXIO); 762 cs = ccdfind(unit); 763 764 if ((error = ccdlock(cs)) != 0) 765 return (error); 766 767 part = ccdpart(dev); 768 769 /* ...that much closer to allowing unconfiguration... */ 770 cs->sc_openmask &= ~(1 << part); 771 /* collect "garbage" if possible */ 772 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 773 ccddestroy(cs, p); 774 else 775 ccdunlock(cs); 776 return (0); 777 } 778 779 static void 780 ccdstrategy(struct bio *bp) 781 { 782 int unit = ccdunit(bp->bio_dev); 783 struct ccd_s *cs = ccdfind(unit); 784 int s; 785 int wlabel; 786 struct disklabel *lp; 787 788 #ifdef DEBUG 789 if (ccddebug & CCDB_FOLLOW) 790 printf("ccdstrategy(%p): unit %d\n", bp, unit); 791 #endif 792 if (!IS_INITED(cs)) { 793 biofinish(bp, NULL, ENXIO); 794 return; 795 } 796 797 /* If it's a nil transfer, wake up the top half now. */ 798 if (bp->bio_bcount == 0) { 799 biodone(bp); 800 return; 801 } 802 803 lp = &cs->sc_label; 804 805 /* 806 * Do bounds checking and adjust transfer. If there's an 807 * error, the bounds check will flag that for us. 808 */ 809 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 810 if (ccdpart(bp->bio_dev) != RAW_PART) { 811 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 812 biodone(bp); 813 return; 814 } 815 } else { 816 int pbn; /* in sc_secsize chunks */ 817 long sz; /* in sc_secsize chunks */ 818 819 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 820 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 821 822 /* 823 * If out of bounds return an error. If at the EOF point, 824 * simply read or write less. 825 */ 826 827 if (pbn < 0 || pbn >= cs->sc_size) { 828 bp->bio_resid = bp->bio_bcount; 829 if (pbn != cs->sc_size) 830 biofinish(bp, NULL, EINVAL); 831 else 832 biodone(bp); 833 return; 834 } 835 836 /* 837 * If the request crosses EOF, truncate the request. 838 */ 839 if (pbn + sz > cs->sc_size) { 840 bp->bio_bcount = (cs->sc_size - pbn) * 841 cs->sc_geom.ccg_secsize; 842 } 843 } 844 845 bp->bio_resid = bp->bio_bcount; 846 847 /* 848 * "Start" the unit. 849 */ 850 s = splbio(); 851 ccdstart(cs, bp); 852 splx(s); 853 return; 854 } 855 856 static void 857 ccdstart(struct ccd_s *cs, struct bio *bp) 858 { 859 long bcount, rcount; 860 struct ccdbuf *cbp[4]; 861 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 862 caddr_t addr; 863 daddr_t bn; 864 struct partition *pp; 865 866 #ifdef DEBUG 867 if (ccddebug & CCDB_FOLLOW) 868 printf("ccdstart(%p, %p)\n", cs, bp); 869 #endif 870 871 /* Record the transaction start */ 872 devstat_start_transaction(&cs->device_stats); 873 874 /* 875 * Translate the partition-relative block number to an absolute. 876 */ 877 bn = bp->bio_blkno; 878 if (ccdpart(bp->bio_dev) != RAW_PART) { 879 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 880 bn += pp->p_offset; 881 } 882 883 /* 884 * Allocate component buffers and fire off the requests 885 */ 886 addr = bp->bio_data; 887 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 888 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 889 rcount = cbp[0]->cb_buf.bio_bcount; 890 891 if (cs->sc_cflags & CCDF_MIRROR) { 892 /* 893 * Mirroring. Writes go to both disks, reads are 894 * taken from whichever disk seems most appropriate. 895 * 896 * We attempt to localize reads to the disk whos arm 897 * is nearest the read request. We ignore seeks due 898 * to writes when making this determination and we 899 * also try to avoid hogging. 900 */ 901 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 902 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 903 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 904 } else { 905 int pick = cs->sc_pick; 906 daddr_t range = cs->sc_size / 16; 907 908 if (bn < cs->sc_blk[pick] - range || 909 bn > cs->sc_blk[pick] + range 910 ) { 911 cs->sc_pick = pick = 1 - pick; 912 } 913 cs->sc_blk[pick] = bn + btodb(rcount); 914 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 915 } 916 } else { 917 /* 918 * Not mirroring 919 */ 920 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 921 } 922 bn += btodb(rcount); 923 addr += rcount; 924 } 925 } 926 927 /* 928 * Build a component buffer header. 929 */ 930 static void 931 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 932 { 933 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 934 struct ccdbuf *cbp; 935 daddr_t cbn, cboff; 936 off_t cbc; 937 938 #ifdef DEBUG 939 if (ccddebug & CCDB_IO) 940 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 941 cs, bp, bn, addr, bcount); 942 #endif 943 /* 944 * Determine which component bn falls in. 945 */ 946 cbn = bn; 947 cboff = 0; 948 949 if (cs->sc_ileave == 0) { 950 /* 951 * Serially concatenated and neither a mirror nor a parity 952 * config. This is a special case. 953 */ 954 daddr_t sblk; 955 956 sblk = 0; 957 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 958 sblk += ci->ci_size; 959 cbn -= sblk; 960 } else { 961 struct ccdiinfo *ii; 962 int ccdisk, off; 963 964 /* 965 * Calculate cbn, the logical superblock (sc_ileave chunks), 966 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 967 * to cbn. 968 */ 969 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 970 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 971 972 /* 973 * Figure out which interleave table to use. 974 */ 975 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 976 if (ii->ii_startblk > cbn) 977 break; 978 } 979 ii--; 980 981 /* 982 * off is the logical superblock relative to the beginning 983 * of this interleave block. 984 */ 985 off = cbn - ii->ii_startblk; 986 987 /* 988 * We must calculate which disk component to use (ccdisk), 989 * and recalculate cbn to be the superblock relative to 990 * the beginning of the component. This is typically done by 991 * adding 'off' and ii->ii_startoff together. However, 'off' 992 * must typically be divided by the number of components in 993 * this interleave array to be properly convert it from a 994 * CCD-relative logical superblock number to a 995 * component-relative superblock number. 996 */ 997 if (ii->ii_ndisk == 1) { 998 /* 999 * When we have just one disk, it can't be a mirror 1000 * or a parity config. 1001 */ 1002 ccdisk = ii->ii_index[0]; 1003 cbn = ii->ii_startoff + off; 1004 } else { 1005 if (cs->sc_cflags & CCDF_MIRROR) { 1006 /* 1007 * We have forced a uniform mapping, resulting 1008 * in a single interleave array. We double 1009 * up on the first half of the available 1010 * components and our mirror is in the second 1011 * half. This only works with a single 1012 * interleave array because doubling up 1013 * doubles the number of sectors, so there 1014 * cannot be another interleave array because 1015 * the next interleave array's calculations 1016 * would be off. 1017 */ 1018 int ndisk2 = ii->ii_ndisk / 2; 1019 ccdisk = ii->ii_index[off % ndisk2]; 1020 cbn = ii->ii_startoff + off / ndisk2; 1021 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1022 } else if (cs->sc_cflags & CCDF_PARITY) { 1023 /* 1024 * XXX not implemented yet 1025 */ 1026 int ndisk2 = ii->ii_ndisk - 1; 1027 ccdisk = ii->ii_index[off % ndisk2]; 1028 cbn = ii->ii_startoff + off / ndisk2; 1029 if (cbn % ii->ii_ndisk <= ccdisk) 1030 ccdisk++; 1031 } else { 1032 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1033 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1034 } 1035 } 1036 1037 ci = &cs->sc_cinfo[ccdisk]; 1038 1039 /* 1040 * Convert cbn from a superblock to a normal block so it 1041 * can be used to calculate (along with cboff) the normal 1042 * block index into this particular disk. 1043 */ 1044 cbn *= cs->sc_ileave; 1045 } 1046 1047 /* 1048 * Fill in the component buf structure. 1049 */ 1050 cbp = getccdbuf(NULL); 1051 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1052 cbp->cb_buf.bio_done = ccdiodone; 1053 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1054 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1055 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1056 cbp->cb_buf.bio_data = addr; 1057 if (cs->sc_ileave == 0) 1058 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1059 else 1060 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1061 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1062 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1063 1064 /* 1065 * context for ccdiodone 1066 */ 1067 cbp->cb_obp = bp; 1068 cbp->cb_unit = cs->sc_unit; 1069 cbp->cb_comp = ci - cs->sc_cinfo; 1070 1071 #ifdef DEBUG 1072 if (ccddebug & CCDB_IO) 1073 printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n", 1074 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1075 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1076 cbp->cb_buf.bio_bcount); 1077 #endif 1078 cb[0] = cbp; 1079 1080 /* 1081 * Note: both I/O's setup when reading from mirror, but only one 1082 * will be executed. 1083 */ 1084 if (cs->sc_cflags & CCDF_MIRROR) { 1085 /* mirror, setup second I/O */ 1086 cbp = getccdbuf(cb[0]); 1087 cbp->cb_buf.bio_dev = ci2->ci_dev; 1088 cbp->cb_comp = ci2 - cs->sc_cinfo; 1089 cb[1] = cbp; 1090 /* link together the ccdbuf's and clear "mirror done" flag */ 1091 cb[0]->cb_mirror = cb[1]; 1092 cb[1]->cb_mirror = cb[0]; 1093 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1094 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1095 } 1096 } 1097 1098 static void 1099 ccdintr(struct ccd_s *cs, struct bio *bp) 1100 { 1101 #ifdef DEBUG 1102 if (ccddebug & CCDB_FOLLOW) 1103 printf("ccdintr(%p, %p)\n", cs, bp); 1104 #endif 1105 /* 1106 * Request is done for better or worse, wakeup the top half. 1107 */ 1108 if (bp->bio_flags & BIO_ERROR) 1109 bp->bio_resid = bp->bio_bcount; 1110 biofinish(bp, &cs->device_stats, 0); 1111 } 1112 1113 /* 1114 * Called at interrupt time. 1115 * Mark the component as done and if all components are done, 1116 * take a ccd interrupt. 1117 */ 1118 static void 1119 ccdiodone(struct bio *ibp) 1120 { 1121 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1122 struct bio *bp = cbp->cb_obp; 1123 int unit = cbp->cb_unit; 1124 int count, s; 1125 1126 s = splbio(); 1127 #ifdef DEBUG 1128 if (ccddebug & CCDB_FOLLOW) 1129 printf("ccdiodone(%p)\n", cbp); 1130 if (ccddebug & CCDB_IO) { 1131 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1132 bp, bp->bio_bcount, bp->bio_resid); 1133 printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n", 1134 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1135 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1136 cbp->cb_buf.bio_bcount); 1137 } 1138 #endif 1139 /* 1140 * If an error occured, report it. If this is a mirrored 1141 * configuration and the first of two possible reads, do not 1142 * set the error in the bp yet because the second read may 1143 * succeed. 1144 */ 1145 1146 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1147 const char *msg = ""; 1148 1149 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1150 (cbp->cb_buf.bio_cmd == BIO_READ) && 1151 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1152 /* 1153 * We will try our read on the other disk down 1154 * below, also reverse the default pick so if we 1155 * are doing a scan we do not keep hitting the 1156 * bad disk first. 1157 */ 1158 struct ccd_s *cs = ccdfind(unit); 1159 1160 msg = ", trying other disk"; 1161 cs->sc_pick = 1 - cs->sc_pick; 1162 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1163 } else { 1164 bp->bio_flags |= BIO_ERROR; 1165 bp->bio_error = cbp->cb_buf.bio_error ? 1166 cbp->cb_buf.bio_error : EIO; 1167 } 1168 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1169 unit, bp->bio_error, cbp->cb_comp, 1170 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1171 } 1172 1173 /* 1174 * Process mirror. If we are writing, I/O has been initiated on both 1175 * buffers and we fall through only after both are finished. 1176 * 1177 * If we are reading only one I/O is initiated at a time. If an 1178 * error occurs we initiate the second I/O and return, otherwise 1179 * we free the second I/O without initiating it. 1180 */ 1181 1182 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1183 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1184 /* 1185 * When writing, handshake with the second buffer 1186 * to determine when both are done. If both are not 1187 * done, return here. 1188 */ 1189 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1190 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1191 putccdbuf(cbp); 1192 splx(s); 1193 return; 1194 } 1195 } else { 1196 /* 1197 * When reading, either dispose of the second buffer 1198 * or initiate I/O on the second buffer if an error 1199 * occured with this one. 1200 */ 1201 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1203 cbp->cb_mirror->cb_pflags |= 1204 CCDPF_MIRROR_DONE; 1205 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1206 putccdbuf(cbp); 1207 splx(s); 1208 return; 1209 } else { 1210 putccdbuf(cbp->cb_mirror); 1211 /* fall through */ 1212 } 1213 } 1214 } 1215 } 1216 1217 /* 1218 * use bio_caller1 to determine how big the original request was rather 1219 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1220 * 1221 * XXX We check for an error, but we do not test the resid for an 1222 * aligned EOF condition. This may result in character & block 1223 * device access not recognizing EOF properly when read or written 1224 * sequentially, but will not effect filesystems. 1225 */ 1226 count = (long)cbp->cb_buf.bio_caller1; 1227 putccdbuf(cbp); 1228 1229 /* 1230 * If all done, "interrupt". 1231 */ 1232 bp->bio_resid -= count; 1233 if (bp->bio_resid < 0) 1234 panic("ccdiodone: count"); 1235 if (bp->bio_resid == 0) 1236 ccdintr(ccdfind(unit), bp); 1237 splx(s); 1238 } 1239 1240 static int 1241 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 1242 { 1243 int unit = ccdunit(dev); 1244 int i, j, lookedup = 0, error = 0; 1245 int part, pmask, s; 1246 struct ccd_s *cs; 1247 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1248 char **cpp; 1249 struct vnode **vpp; 1250 1251 if (!IS_ALLOCATED(unit)) 1252 return (ENXIO); 1253 cs = ccdfind(unit); 1254 1255 switch (cmd) { 1256 case CCDIOCSET: 1257 if (IS_INITED(cs)) 1258 return (EBUSY); 1259 1260 if ((flag & FWRITE) == 0) 1261 return (EBADF); 1262 1263 if ((error = ccdlock(cs)) != 0) 1264 return (error); 1265 1266 /* Fill in some important bits. */ 1267 cs->sc_ileave = ccio->ccio_ileave; 1268 if (cs->sc_ileave == 0 && 1269 ((ccio->ccio_flags & CCDF_MIRROR) || 1270 (ccio->ccio_flags & CCDF_PARITY))) { 1271 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1272 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1273 } 1274 if ((ccio->ccio_flags & CCDF_MIRROR) && 1275 (ccio->ccio_flags & CCDF_PARITY)) { 1276 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1277 ccio->ccio_flags &= ~CCDF_PARITY; 1278 } 1279 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1280 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1281 printf("ccd%d: mirror/parity forces uniform flag\n", 1282 unit); 1283 ccio->ccio_flags |= CCDF_UNIFORM; 1284 } 1285 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1286 1287 /* 1288 * Allocate space for and copy in the array of 1289 * componet pathnames and device numbers. 1290 */ 1291 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1292 M_DEVBUF, M_WAITOK); 1293 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1294 M_DEVBUF, M_WAITOK); 1295 1296 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1297 ccio->ccio_ndisks * sizeof(char **)); 1298 if (error) { 1299 free(vpp, M_DEVBUF); 1300 free(cpp, M_DEVBUF); 1301 ccdunlock(cs); 1302 return (error); 1303 } 1304 1305 #ifdef DEBUG 1306 if (ccddebug & CCDB_INIT) 1307 for (i = 0; i < ccio->ccio_ndisks; ++i) 1308 printf("ccdioctl: component %d: %p\n", 1309 i, cpp[i]); 1310 #endif 1311 1312 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1313 #ifdef DEBUG 1314 if (ccddebug & CCDB_INIT) 1315 printf("ccdioctl: lookedup = %d\n", lookedup); 1316 #endif 1317 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { 1318 for (j = 0; j < lookedup; ++j) 1319 (void)vn_close(vpp[j], FREAD|FWRITE, 1320 p->p_ucred, p); 1321 free(vpp, M_DEVBUF); 1322 free(cpp, M_DEVBUF); 1323 ccdunlock(cs); 1324 return (error); 1325 } 1326 ++lookedup; 1327 } 1328 cs->sc_vpp = vpp; 1329 cs->sc_nccdisks = ccio->ccio_ndisks; 1330 1331 /* 1332 * Initialize the ccd. Fills in the softc for us. 1333 */ 1334 if ((error = ccdinit(cs, cpp, p)) != 0) { 1335 for (j = 0; j < lookedup; ++j) 1336 (void)vn_close(vpp[j], FREAD|FWRITE, 1337 p->p_ucred, p); 1338 /* 1339 * We can't ccddestroy() cs just yet, because nothing 1340 * prevents user-level app to do another ioctl() 1341 * without closing the device first, therefore 1342 * declare unit null and void and let ccdclose() 1343 * destroy it when it is safe to do so. 1344 */ 1345 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1346 free(vpp, M_DEVBUF); 1347 free(cpp, M_DEVBUF); 1348 ccdunlock(cs); 1349 return (error); 1350 } 1351 1352 /* 1353 * The ccd has been successfully initialized, so 1354 * we can place it into the array and read the disklabel. 1355 */ 1356 ccio->ccio_unit = unit; 1357 ccio->ccio_size = cs->sc_size; 1358 ccdgetdisklabel(dev); 1359 1360 ccdunlock(cs); 1361 1362 break; 1363 1364 case CCDIOCCLR: 1365 if (!IS_INITED(cs)) 1366 return (ENXIO); 1367 1368 if ((flag & FWRITE) == 0) 1369 return (EBADF); 1370 1371 if ((error = ccdlock(cs)) != 0) 1372 return (error); 1373 1374 /* Don't unconfigure if any other partitions are open */ 1375 part = ccdpart(dev); 1376 pmask = (1 << part); 1377 if ((cs->sc_openmask & ~pmask)) { 1378 ccdunlock(cs); 1379 return (EBUSY); 1380 } 1381 1382 /* Declare unit null and void (reset all flags) */ 1383 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1384 1385 /* Close the components and free their pathnames. */ 1386 for (i = 0; i < cs->sc_nccdisks; ++i) { 1387 /* 1388 * XXX: this close could potentially fail and 1389 * cause Bad Things. Maybe we need to force 1390 * the close to happen? 1391 */ 1392 #ifdef DEBUG 1393 if (ccddebug & CCDB_VNODE) 1394 vprint("CCDIOCCLR: vnode info", 1395 cs->sc_cinfo[i].ci_vp); 1396 #endif 1397 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1398 p->p_ucred, p); 1399 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1400 } 1401 1402 /* Free interleave index. */ 1403 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1404 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1405 1406 /* Free component info and interleave table. */ 1407 free(cs->sc_cinfo, M_DEVBUF); 1408 free(cs->sc_itable, M_DEVBUF); 1409 free(cs->sc_vpp, M_DEVBUF); 1410 1411 /* And remove the devstat entry. */ 1412 devstat_remove_entry(&cs->device_stats); 1413 1414 /* This must be atomic. */ 1415 s = splhigh(); 1416 ccdunlock(cs); 1417 splx(s); 1418 1419 break; 1420 1421 case CCDCONFINFO: 1422 { 1423 int ninit = 0; 1424 struct ccdconf *conf = (struct ccdconf *)data; 1425 struct ccd_s *tmpcs; 1426 struct ccd_s *ubuf = conf->buffer; 1427 1428 /* XXX: LOCK(unique unit numbers) */ 1429 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1430 if (IS_INITED(tmpcs)) 1431 ninit++; 1432 1433 if (conf->size == 0) { 1434 conf->size = sizeof(struct ccd_s) * ninit; 1435 break; 1436 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1437 (conf->size % sizeof(struct ccd_s) != 0)) { 1438 /* XXX: UNLOCK(unique unit numbers) */ 1439 return (EINVAL); 1440 } 1441 1442 ubuf += ninit; 1443 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1444 if (!IS_INITED(tmpcs)) 1445 continue; 1446 error = copyout(tmpcs, --ubuf, 1447 sizeof(struct ccd_s)); 1448 if (error != 0) 1449 /* XXX: UNLOCK(unique unit numbers) */ 1450 return (error); 1451 } 1452 /* XXX: UNLOCK(unique unit numbers) */ 1453 } 1454 break; 1455 1456 case CCDCPPINFO: 1457 if (!IS_INITED(cs)) 1458 return (ENXIO); 1459 1460 { 1461 int len = 0; 1462 struct ccdcpps *cpps = (struct ccdcpps *)data; 1463 char *ubuf = cpps->buffer; 1464 1465 1466 for (i = 0; i < cs->sc_nccdisks; ++i) 1467 len += cs->sc_cinfo[i].ci_pathlen; 1468 1469 if (cpps->size == 0) { 1470 cpps->size = len; 1471 break; 1472 } else if (cpps->size != len) { 1473 return (EINVAL); 1474 } 1475 1476 for (i = 0; i < cs->sc_nccdisks; ++i) { 1477 len = cs->sc_cinfo[i].ci_pathlen; 1478 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1479 len); 1480 if (error != 0) 1481 return (error); 1482 ubuf += len; 1483 } 1484 } 1485 break; 1486 1487 case DIOCGDINFO: 1488 if (!IS_INITED(cs)) 1489 return (ENXIO); 1490 1491 *(struct disklabel *)data = cs->sc_label; 1492 break; 1493 1494 case DIOCGPART: 1495 if (!IS_INITED(cs)) 1496 return (ENXIO); 1497 1498 ((struct partinfo *)data)->disklab = &cs->sc_label; 1499 ((struct partinfo *)data)->part = 1500 &cs->sc_label.d_partitions[ccdpart(dev)]; 1501 break; 1502 1503 case DIOCWDINFO: 1504 case DIOCSDINFO: 1505 if (!IS_INITED(cs)) 1506 return (ENXIO); 1507 1508 if ((flag & FWRITE) == 0) 1509 return (EBADF); 1510 1511 if ((error = ccdlock(cs)) != 0) 1512 return (error); 1513 1514 cs->sc_flags |= CCDF_LABELLING; 1515 1516 error = setdisklabel(&cs->sc_label, 1517 (struct disklabel *)data, 0); 1518 if (error == 0) { 1519 if (cmd == DIOCWDINFO) 1520 error = writedisklabel(CCDLABELDEV(dev), 1521 &cs->sc_label); 1522 } 1523 1524 cs->sc_flags &= ~CCDF_LABELLING; 1525 1526 ccdunlock(cs); 1527 1528 if (error) 1529 return (error); 1530 break; 1531 1532 case DIOCWLABEL: 1533 if (!IS_INITED(cs)) 1534 return (ENXIO); 1535 1536 if ((flag & FWRITE) == 0) 1537 return (EBADF); 1538 if (*(int *)data != 0) 1539 cs->sc_flags |= CCDF_WLABEL; 1540 else 1541 cs->sc_flags &= ~CCDF_WLABEL; 1542 break; 1543 1544 default: 1545 return (ENOTTY); 1546 } 1547 1548 return (0); 1549 } 1550 1551 static int 1552 ccdsize(dev_t dev) 1553 { 1554 struct ccd_s *cs; 1555 int part, size; 1556 1557 if (ccdopen(dev, 0, S_IFCHR, curproc)) 1558 return (-1); 1559 1560 cs = ccdfind(ccdunit(dev)); 1561 part = ccdpart(dev); 1562 1563 if (!IS_INITED(cs)) 1564 return (-1); 1565 1566 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1567 size = -1; 1568 else 1569 size = cs->sc_label.d_partitions[part].p_size; 1570 1571 if (ccdclose(dev, 0, S_IFCHR, curproc)) 1572 return (-1); 1573 1574 return (size); 1575 } 1576 1577 static int 1578 ccddump(dev_t dev) 1579 { 1580 1581 /* Not implemented. */ 1582 return ENXIO; 1583 } 1584 1585 /* 1586 * Lookup the provided name in the filesystem. If the file exists, 1587 * is a valid block device, and isn't being used by anyone else, 1588 * set *vpp to the file's vnode. 1589 */ 1590 static int 1591 ccdlookup(char *path, struct proc *p, struct vnode **vpp) 1592 { 1593 struct nameidata nd; 1594 struct vnode *vp; 1595 int error, flags; 1596 1597 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); 1598 flags = FREAD | FWRITE; 1599 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1600 #ifdef DEBUG 1601 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1602 printf("ccdlookup: vn_open error = %d\n", error); 1603 #endif 1604 return (error); 1605 } 1606 vp = nd.ni_vp; 1607 1608 if (vp->v_usecount > 1) { 1609 error = EBUSY; 1610 goto bad; 1611 } 1612 1613 if (!vn_isdisk(vp, &error)) 1614 goto bad; 1615 1616 #ifdef DEBUG 1617 if (ccddebug & CCDB_VNODE) 1618 vprint("ccdlookup: vnode info", vp); 1619 #endif 1620 1621 VOP_UNLOCK(vp, 0, p); 1622 NDFREE(&nd, NDF_ONLY_PNBUF); 1623 *vpp = vp; 1624 return (0); 1625 bad: 1626 VOP_UNLOCK(vp, 0, p); 1627 NDFREE(&nd, NDF_ONLY_PNBUF); 1628 /* vn_close does vrele() for vp */ 1629 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); 1630 return (error); 1631 } 1632 1633 /* 1634 * Read the disklabel from the ccd. If one is not present, fake one 1635 * up. 1636 */ 1637 static void 1638 ccdgetdisklabel(dev_t dev) 1639 { 1640 int unit = ccdunit(dev); 1641 struct ccd_s *cs = ccdfind(unit); 1642 char *errstring; 1643 struct disklabel *lp = &cs->sc_label; 1644 struct ccdgeom *ccg = &cs->sc_geom; 1645 1646 bzero(lp, sizeof(*lp)); 1647 1648 lp->d_secperunit = cs->sc_size; 1649 lp->d_secsize = ccg->ccg_secsize; 1650 lp->d_nsectors = ccg->ccg_nsectors; 1651 lp->d_ntracks = ccg->ccg_ntracks; 1652 lp->d_ncylinders = ccg->ccg_ncylinders; 1653 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1654 1655 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1656 lp->d_type = DTYPE_CCD; 1657 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1658 lp->d_rpm = 3600; 1659 lp->d_interleave = 1; 1660 lp->d_flags = 0; 1661 1662 lp->d_partitions[RAW_PART].p_offset = 0; 1663 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1664 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1665 lp->d_npartitions = RAW_PART + 1; 1666 1667 lp->d_bbsize = BBSIZE; /* XXX */ 1668 lp->d_sbsize = SBSIZE; /* XXX */ 1669 1670 lp->d_magic = DISKMAGIC; 1671 lp->d_magic2 = DISKMAGIC; 1672 lp->d_checksum = dkcksum(&cs->sc_label); 1673 1674 /* 1675 * Call the generic disklabel extraction routine. 1676 */ 1677 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1678 if (errstring != NULL) 1679 ccdmakedisklabel(cs); 1680 1681 #ifdef DEBUG 1682 /* It's actually extremely common to have unlabeled ccds. */ 1683 if (ccddebug & CCDB_LABEL) 1684 if (errstring != NULL) 1685 printf("ccd%d: %s\n", unit, errstring); 1686 #endif 1687 } 1688 1689 /* 1690 * Take care of things one might want to take care of in the event 1691 * that a disklabel isn't present. 1692 */ 1693 static void 1694 ccdmakedisklabel(struct ccd_s *cs) 1695 { 1696 struct disklabel *lp = &cs->sc_label; 1697 1698 /* 1699 * For historical reasons, if there's no disklabel present 1700 * the raw partition must be marked FS_BSDFFS. 1701 */ 1702 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1703 1704 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1705 } 1706 1707 /* 1708 * Wait interruptibly for an exclusive lock. 1709 * 1710 * XXX 1711 * Several drivers do this; it should be abstracted and made MP-safe. 1712 */ 1713 static int 1714 ccdlock(struct ccd_s *cs) 1715 { 1716 int error; 1717 1718 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1719 cs->sc_flags |= CCDF_WANTED; 1720 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1721 return (error); 1722 } 1723 cs->sc_flags |= CCDF_LOCKED; 1724 return (0); 1725 } 1726 1727 /* 1728 * Unlock and wake up any waiters. 1729 */ 1730 static void 1731 ccdunlock(struct ccd_s *cs) 1732 { 1733 1734 cs->sc_flags &= ~CCDF_LOCKED; 1735 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1736 cs->sc_flags &= ~CCDF_WANTED; 1737 wakeup(cs); 1738 } 1739 } 1740 1741 #ifdef DEBUG 1742 static void 1743 printiinfo(struct ccdiinfo *ii) 1744 { 1745 int ix, i; 1746 1747 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1748 printf(" itab[%d]: #dk %d sblk %d soff %d", 1749 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1750 for (i = 0; i < ii->ii_ndisk; i++) 1751 printf(" %d", ii->ii_index[i]); 1752 printf("\n"); 1753 } 1754 } 1755 #endif 1756