1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disklabel.h> 102 #include <ufs/ffs/fs.h> 103 #include <sys/devicestat.h> 104 #include <sys/fcntl.h> 105 #include <sys/vnode.h> 106 107 #include <sys/ccdvar.h> 108 109 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #endif 125 126 #define ccdunit(x) dkunit(x) 127 #define ccdpart(x) dkpart(x) 128 129 /* 130 This is how mirroring works (only writes are special): 131 132 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 133 linked together by the cb_mirror field. "cb_pflags & 134 CCDPF_MIRROR_DONE" is set to 0 on both of them. 135 136 When a component returns to ccdiodone(), it checks if "cb_pflags & 137 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 138 flag and returns. If it is, it means its partner has already 139 returned, so it will go to the regular cleanup. 140 141 */ 142 143 struct ccdbuf { 144 struct bio cb_buf; /* new I/O buf */ 145 struct bio *cb_obp; /* ptr. to original I/O buf */ 146 struct ccdbuf *cb_freenext; /* free list link */ 147 int cb_unit; /* target unit */ 148 int cb_comp; /* target component */ 149 int cb_pflags; /* mirror/parity status flag */ 150 struct ccdbuf *cb_mirror; /* mirror counterpart */ 151 }; 152 153 /* bits in cb_pflags */ 154 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 155 156 #define CCDLABELDEV(dev) \ 157 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 158 159 /* convinient macros for often-used statements */ 160 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 161 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_dump_t ccddump; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ ccddump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct thread *); 206 static int ccdlookup(char *, struct thread *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 default: /* MOD_SHUTDOWN etc */ 376 break; 377 } 378 return (error); 379 } 380 381 DEV_MODULE(ccd, ccd_modevent, NULL); 382 383 static int 384 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 385 { 386 struct ccdcinfo *ci = NULL; /* XXX */ 387 size_t size; 388 int ix; 389 struct vnode *vp; 390 size_t minsize; 391 int maxsecsize; 392 struct partinfo dpart; 393 struct ccdgeom *ccg = &cs->sc_geom; 394 char tmppath[MAXPATHLEN]; 395 int error = 0; 396 397 #ifdef DEBUG 398 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 399 printf("ccdinit: unit %d\n", cs->sc_unit); 400 #endif 401 402 cs->sc_size = 0; 403 404 /* Allocate space for the component info. */ 405 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 406 M_DEVBUF, M_WAITOK); 407 408 /* 409 * Verify that each component piece exists and record 410 * relevant information about it. 411 */ 412 maxsecsize = 0; 413 minsize = 0; 414 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 415 vp = cs->sc_vpp[ix]; 416 ci = &cs->sc_cinfo[ix]; 417 ci->ci_vp = vp; 418 419 /* 420 * Copy in the pathname of the component. 421 */ 422 bzero(tmppath, sizeof(tmppath)); /* sanity */ 423 if ((error = copyinstr(cpaths[ix], tmppath, 424 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 425 #ifdef DEBUG 426 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 427 printf("ccd%d: can't copy path, error = %d\n", 428 cs->sc_unit, error); 429 #endif 430 goto fail; 431 } 432 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 433 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 434 435 ci->ci_dev = vn_todev(vp); 436 437 /* 438 * Get partition information for the component. 439 */ 440 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 441 FREAD, td->td_proc->p_ucred, td)) != 0) { 442 #ifdef DEBUG 443 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 444 printf("ccd%d: %s: ioctl failed, error = %d\n", 445 cs->sc_unit, ci->ci_path, error); 446 #endif 447 goto fail; 448 } 449 if (dpart.part->p_fstype == FS_BSDFFS) { 450 maxsecsize = 451 ((dpart.disklab->d_secsize > maxsecsize) ? 452 dpart.disklab->d_secsize : maxsecsize); 453 size = dpart.part->p_size - CCD_OFFSET; 454 } else { 455 #ifdef DEBUG 456 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 457 printf("ccd%d: %s: incorrect partition type\n", 458 cs->sc_unit, ci->ci_path); 459 #endif 460 error = EFTYPE; 461 goto fail; 462 } 463 464 /* 465 * Calculate the size, truncating to an interleave 466 * boundary if necessary. 467 */ 468 469 if (cs->sc_ileave > 1) 470 size -= size % cs->sc_ileave; 471 472 if (size == 0) { 473 #ifdef DEBUG 474 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 475 printf("ccd%d: %s: size == 0\n", 476 cs->sc_unit, ci->ci_path); 477 #endif 478 error = ENODEV; 479 goto fail; 480 } 481 482 if (minsize == 0 || size < minsize) 483 minsize = size; 484 ci->ci_size = size; 485 cs->sc_size += size; 486 } 487 488 /* 489 * Don't allow the interleave to be smaller than 490 * the biggest component sector. 491 */ 492 if ((cs->sc_ileave > 0) && 493 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 494 #ifdef DEBUG 495 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 496 printf("ccd%d: interleave must be at least %d\n", 497 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 498 #endif 499 error = EINVAL; 500 goto fail; 501 } 502 503 /* 504 * If uniform interleave is desired set all sizes to that of 505 * the smallest component. This will guarentee that a single 506 * interleave table is generated. 507 * 508 * Lost space must be taken into account when calculating the 509 * overall size. Half the space is lost when CCDF_MIRROR is 510 * specified. One disk is lost when CCDF_PARITY is specified. 511 */ 512 if (cs->sc_flags & CCDF_UNIFORM) { 513 for (ci = cs->sc_cinfo; 514 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 515 ci->ci_size = minsize; 516 } 517 if (cs->sc_flags & CCDF_MIRROR) { 518 /* 519 * Check to see if an even number of components 520 * have been specified. The interleave must also 521 * be non-zero in order for us to be able to 522 * guarentee the topology. 523 */ 524 if (cs->sc_nccdisks % 2) { 525 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 526 error = EINVAL; 527 goto fail; 528 } 529 if (cs->sc_ileave == 0) { 530 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 531 error = EINVAL; 532 goto fail; 533 } 534 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 535 } else if (cs->sc_flags & CCDF_PARITY) { 536 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 537 } else { 538 if (cs->sc_ileave == 0) { 539 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 540 error = EINVAL; 541 goto fail; 542 } 543 cs->sc_size = cs->sc_nccdisks * minsize; 544 } 545 } 546 547 /* 548 * Construct the interleave table. 549 */ 550 ccdinterleave(cs, cs->sc_unit); 551 552 /* 553 * Create pseudo-geometry based on 1MB cylinders. It's 554 * pretty close. 555 */ 556 ccg->ccg_secsize = maxsecsize; 557 ccg->ccg_ntracks = 1; 558 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 559 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 560 561 /* 562 * Add an devstat entry for this device. 563 */ 564 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 565 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 566 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 567 DEVSTAT_PRIORITY_ARRAY); 568 569 cs->sc_flags |= CCDF_INITED; 570 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 571 return (0); 572 fail: 573 while (ci > cs->sc_cinfo) { 574 ci--; 575 free(ci->ci_path, M_DEVBUF); 576 } 577 free(cs->sc_cinfo, M_DEVBUF); 578 return (error); 579 } 580 581 static void 582 ccdinterleave(struct ccd_s *cs, int unit) 583 { 584 struct ccdcinfo *ci, *smallci; 585 struct ccdiinfo *ii; 586 daddr_t bn, lbn; 587 int ix; 588 u_long size; 589 590 #ifdef DEBUG 591 if (ccddebug & CCDB_INIT) 592 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 593 #endif 594 595 /* 596 * Allocate an interleave table. The worst case occurs when each 597 * of N disks is of a different size, resulting in N interleave 598 * tables. 599 * 600 * Chances are this is too big, but we don't care. 601 */ 602 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 603 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 604 M_WAITOK | M_ZERO); 605 606 /* 607 * Trivial case: no interleave (actually interleave of disk size). 608 * Each table entry represents a single component in its entirety. 609 * 610 * An interleave of 0 may not be used with a mirror or parity setup. 611 */ 612 if (cs->sc_ileave == 0) { 613 bn = 0; 614 ii = cs->sc_itable; 615 616 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 617 /* Allocate space for ii_index. */ 618 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 619 ii->ii_ndisk = 1; 620 ii->ii_startblk = bn; 621 ii->ii_startoff = 0; 622 ii->ii_index[0] = ix; 623 bn += cs->sc_cinfo[ix].ci_size; 624 ii++; 625 } 626 ii->ii_ndisk = 0; 627 #ifdef DEBUG 628 if (ccddebug & CCDB_INIT) 629 printiinfo(cs->sc_itable); 630 #endif 631 return; 632 } 633 634 /* 635 * The following isn't fast or pretty; it doesn't have to be. 636 */ 637 size = 0; 638 bn = lbn = 0; 639 for (ii = cs->sc_itable; ; ii++) { 640 /* 641 * Allocate space for ii_index. We might allocate more then 642 * we use. 643 */ 644 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 645 M_DEVBUF, M_WAITOK); 646 647 /* 648 * Locate the smallest of the remaining components 649 */ 650 smallci = NULL; 651 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 652 ci++) { 653 if (ci->ci_size > size && 654 (smallci == NULL || 655 ci->ci_size < smallci->ci_size)) { 656 smallci = ci; 657 } 658 } 659 660 /* 661 * Nobody left, all done 662 */ 663 if (smallci == NULL) { 664 ii->ii_ndisk = 0; 665 break; 666 } 667 668 /* 669 * Record starting logical block using an sc_ileave blocksize. 670 */ 671 ii->ii_startblk = bn / cs->sc_ileave; 672 673 /* 674 * Record starting comopnent block using an sc_ileave 675 * blocksize. This value is relative to the beginning of 676 * a component disk. 677 */ 678 ii->ii_startoff = lbn; 679 680 /* 681 * Determine how many disks take part in this interleave 682 * and record their indices. 683 */ 684 ix = 0; 685 for (ci = cs->sc_cinfo; 686 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 687 if (ci->ci_size >= smallci->ci_size) { 688 ii->ii_index[ix++] = ci - cs->sc_cinfo; 689 } 690 } 691 ii->ii_ndisk = ix; 692 bn += ix * (smallci->ci_size - size); 693 lbn = smallci->ci_size / cs->sc_ileave; 694 size = smallci->ci_size; 695 } 696 #ifdef DEBUG 697 if (ccddebug & CCDB_INIT) 698 printiinfo(cs->sc_itable); 699 #endif 700 } 701 702 /* ARGSUSED */ 703 static int 704 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 705 { 706 int unit = ccdunit(dev); 707 struct ccd_s *cs; 708 struct disklabel *lp; 709 int error = 0, part, pmask; 710 711 #ifdef DEBUG 712 if (ccddebug & CCDB_FOLLOW) 713 printf("ccdopen(%p, %x)\n", dev, flags); 714 #endif 715 716 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 717 718 if ((error = ccdlock(cs)) != 0) 719 return (error); 720 721 lp = &cs->sc_label; 722 723 part = ccdpart(dev); 724 pmask = (1 << part); 725 726 /* 727 * If we're initialized, check to see if there are any other 728 * open partitions. If not, then it's safe to update 729 * the in-core disklabel. 730 */ 731 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 732 ccdgetdisklabel(dev); 733 734 /* Check that the partition exists. */ 735 if (part != RAW_PART && ((part >= lp->d_npartitions) || 736 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 737 error = ENXIO; 738 goto done; 739 } 740 741 cs->sc_openmask |= pmask; 742 done: 743 ccdunlock(cs); 744 return (0); 745 } 746 747 /* ARGSUSED */ 748 static int 749 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 750 { 751 int unit = ccdunit(dev); 752 struct ccd_s *cs; 753 int error = 0, part; 754 755 #ifdef DEBUG 756 if (ccddebug & CCDB_FOLLOW) 757 printf("ccdclose(%p, %x)\n", dev, flags); 758 #endif 759 760 if (!IS_ALLOCATED(unit)) 761 return (ENXIO); 762 cs = ccdfind(unit); 763 764 if ((error = ccdlock(cs)) != 0) 765 return (error); 766 767 part = ccdpart(dev); 768 769 /* ...that much closer to allowing unconfiguration... */ 770 cs->sc_openmask &= ~(1 << part); 771 /* collect "garbage" if possible */ 772 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 773 ccddestroy(cs, td->td_proc); 774 else 775 ccdunlock(cs); 776 return (0); 777 } 778 779 static void 780 ccdstrategy(struct bio *bp) 781 { 782 int unit = ccdunit(bp->bio_dev); 783 struct ccd_s *cs = ccdfind(unit); 784 int s; 785 int wlabel; 786 struct disklabel *lp; 787 788 #ifdef DEBUG 789 if (ccddebug & CCDB_FOLLOW) 790 printf("ccdstrategy(%p): unit %d\n", bp, unit); 791 #endif 792 if (!IS_INITED(cs)) { 793 biofinish(bp, NULL, ENXIO); 794 return; 795 } 796 797 /* If it's a nil transfer, wake up the top half now. */ 798 if (bp->bio_bcount == 0) { 799 biodone(bp); 800 return; 801 } 802 803 lp = &cs->sc_label; 804 805 /* 806 * Do bounds checking and adjust transfer. If there's an 807 * error, the bounds check will flag that for us. 808 */ 809 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 810 if (ccdpart(bp->bio_dev) != RAW_PART) { 811 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 812 biodone(bp); 813 return; 814 } 815 } else { 816 int pbn; /* in sc_secsize chunks */ 817 long sz; /* in sc_secsize chunks */ 818 819 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 820 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 821 822 /* 823 * If out of bounds return an error. If at the EOF point, 824 * simply read or write less. 825 */ 826 827 if (pbn < 0 || pbn >= cs->sc_size) { 828 bp->bio_resid = bp->bio_bcount; 829 if (pbn != cs->sc_size) 830 biofinish(bp, NULL, EINVAL); 831 else 832 biodone(bp); 833 return; 834 } 835 836 /* 837 * If the request crosses EOF, truncate the request. 838 */ 839 if (pbn + sz > cs->sc_size) { 840 bp->bio_bcount = (cs->sc_size - pbn) * 841 cs->sc_geom.ccg_secsize; 842 } 843 } 844 845 bp->bio_resid = bp->bio_bcount; 846 847 /* 848 * "Start" the unit. 849 */ 850 s = splbio(); 851 ccdstart(cs, bp); 852 splx(s); 853 return; 854 } 855 856 static void 857 ccdstart(struct ccd_s *cs, struct bio *bp) 858 { 859 long bcount, rcount; 860 struct ccdbuf *cbp[4]; 861 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 862 caddr_t addr; 863 daddr_t bn; 864 struct partition *pp; 865 866 #ifdef DEBUG 867 if (ccddebug & CCDB_FOLLOW) 868 printf("ccdstart(%p, %p)\n", cs, bp); 869 #endif 870 871 /* Record the transaction start */ 872 devstat_start_transaction(&cs->device_stats); 873 874 /* 875 * Translate the partition-relative block number to an absolute. 876 */ 877 bn = bp->bio_blkno; 878 if (ccdpart(bp->bio_dev) != RAW_PART) { 879 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 880 bn += pp->p_offset; 881 } 882 883 /* 884 * Allocate component buffers and fire off the requests 885 */ 886 addr = bp->bio_data; 887 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 888 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 889 rcount = cbp[0]->cb_buf.bio_bcount; 890 891 if (cs->sc_cflags & CCDF_MIRROR) { 892 /* 893 * Mirroring. Writes go to both disks, reads are 894 * taken from whichever disk seems most appropriate. 895 * 896 * We attempt to localize reads to the disk whos arm 897 * is nearest the read request. We ignore seeks due 898 * to writes when making this determination and we 899 * also try to avoid hogging. 900 */ 901 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 902 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 903 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 904 } else { 905 int pick = cs->sc_pick; 906 daddr_t range = cs->sc_size / 16; 907 908 if (bn < cs->sc_blk[pick] - range || 909 bn > cs->sc_blk[pick] + range 910 ) { 911 cs->sc_pick = pick = 1 - pick; 912 } 913 cs->sc_blk[pick] = bn + btodb(rcount); 914 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 915 } 916 } else { 917 /* 918 * Not mirroring 919 */ 920 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 921 } 922 bn += btodb(rcount); 923 addr += rcount; 924 } 925 } 926 927 /* 928 * Build a component buffer header. 929 */ 930 static void 931 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 932 { 933 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 934 struct ccdbuf *cbp; 935 daddr_t cbn, cboff; 936 off_t cbc; 937 938 #ifdef DEBUG 939 if (ccddebug & CCDB_IO) 940 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 941 cs, bp, bn, addr, bcount); 942 #endif 943 /* 944 * Determine which component bn falls in. 945 */ 946 cbn = bn; 947 cboff = 0; 948 949 if (cs->sc_ileave == 0) { 950 /* 951 * Serially concatenated and neither a mirror nor a parity 952 * config. This is a special case. 953 */ 954 daddr_t sblk; 955 956 sblk = 0; 957 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 958 sblk += ci->ci_size; 959 cbn -= sblk; 960 } else { 961 struct ccdiinfo *ii; 962 int ccdisk, off; 963 964 /* 965 * Calculate cbn, the logical superblock (sc_ileave chunks), 966 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 967 * to cbn. 968 */ 969 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 970 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 971 972 /* 973 * Figure out which interleave table to use. 974 */ 975 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 976 if (ii->ii_startblk > cbn) 977 break; 978 } 979 ii--; 980 981 /* 982 * off is the logical superblock relative to the beginning 983 * of this interleave block. 984 */ 985 off = cbn - ii->ii_startblk; 986 987 /* 988 * We must calculate which disk component to use (ccdisk), 989 * and recalculate cbn to be the superblock relative to 990 * the beginning of the component. This is typically done by 991 * adding 'off' and ii->ii_startoff together. However, 'off' 992 * must typically be divided by the number of components in 993 * this interleave array to be properly convert it from a 994 * CCD-relative logical superblock number to a 995 * component-relative superblock number. 996 */ 997 if (ii->ii_ndisk == 1) { 998 /* 999 * When we have just one disk, it can't be a mirror 1000 * or a parity config. 1001 */ 1002 ccdisk = ii->ii_index[0]; 1003 cbn = ii->ii_startoff + off; 1004 } else { 1005 if (cs->sc_cflags & CCDF_MIRROR) { 1006 /* 1007 * We have forced a uniform mapping, resulting 1008 * in a single interleave array. We double 1009 * up on the first half of the available 1010 * components and our mirror is in the second 1011 * half. This only works with a single 1012 * interleave array because doubling up 1013 * doubles the number of sectors, so there 1014 * cannot be another interleave array because 1015 * the next interleave array's calculations 1016 * would be off. 1017 */ 1018 int ndisk2 = ii->ii_ndisk / 2; 1019 ccdisk = ii->ii_index[off % ndisk2]; 1020 cbn = ii->ii_startoff + off / ndisk2; 1021 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1022 } else if (cs->sc_cflags & CCDF_PARITY) { 1023 /* 1024 * XXX not implemented yet 1025 */ 1026 int ndisk2 = ii->ii_ndisk - 1; 1027 ccdisk = ii->ii_index[off % ndisk2]; 1028 cbn = ii->ii_startoff + off / ndisk2; 1029 if (cbn % ii->ii_ndisk <= ccdisk) 1030 ccdisk++; 1031 } else { 1032 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1033 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1034 } 1035 } 1036 1037 ci = &cs->sc_cinfo[ccdisk]; 1038 1039 /* 1040 * Convert cbn from a superblock to a normal block so it 1041 * can be used to calculate (along with cboff) the normal 1042 * block index into this particular disk. 1043 */ 1044 cbn *= cs->sc_ileave; 1045 } 1046 1047 /* 1048 * Fill in the component buf structure. 1049 */ 1050 cbp = getccdbuf(NULL); 1051 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1052 cbp->cb_buf.bio_done = ccdiodone; 1053 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1054 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1055 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1056 cbp->cb_buf.bio_data = addr; 1057 if (cs->sc_ileave == 0) 1058 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1059 else 1060 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1061 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1062 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1063 1064 /* 1065 * context for ccdiodone 1066 */ 1067 cbp->cb_obp = bp; 1068 cbp->cb_unit = cs->sc_unit; 1069 cbp->cb_comp = ci - cs->sc_cinfo; 1070 1071 #ifdef DEBUG 1072 if (ccddebug & CCDB_IO) 1073 printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n", 1074 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1075 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1076 cbp->cb_buf.bio_bcount); 1077 #endif 1078 cb[0] = cbp; 1079 1080 /* 1081 * Note: both I/O's setup when reading from mirror, but only one 1082 * will be executed. 1083 */ 1084 if (cs->sc_cflags & CCDF_MIRROR) { 1085 /* mirror, setup second I/O */ 1086 cbp = getccdbuf(cb[0]); 1087 cbp->cb_buf.bio_dev = ci2->ci_dev; 1088 cbp->cb_comp = ci2 - cs->sc_cinfo; 1089 cb[1] = cbp; 1090 /* link together the ccdbuf's and clear "mirror done" flag */ 1091 cb[0]->cb_mirror = cb[1]; 1092 cb[1]->cb_mirror = cb[0]; 1093 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1094 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1095 } 1096 } 1097 1098 static void 1099 ccdintr(struct ccd_s *cs, struct bio *bp) 1100 { 1101 #ifdef DEBUG 1102 if (ccddebug & CCDB_FOLLOW) 1103 printf("ccdintr(%p, %p)\n", cs, bp); 1104 #endif 1105 /* 1106 * Request is done for better or worse, wakeup the top half. 1107 */ 1108 if (bp->bio_flags & BIO_ERROR) 1109 bp->bio_resid = bp->bio_bcount; 1110 biofinish(bp, &cs->device_stats, 0); 1111 } 1112 1113 /* 1114 * Called at interrupt time. 1115 * Mark the component as done and if all components are done, 1116 * take a ccd interrupt. 1117 */ 1118 static void 1119 ccdiodone(struct bio *ibp) 1120 { 1121 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1122 struct bio *bp = cbp->cb_obp; 1123 int unit = cbp->cb_unit; 1124 int count, s; 1125 1126 s = splbio(); 1127 #ifdef DEBUG 1128 if (ccddebug & CCDB_FOLLOW) 1129 printf("ccdiodone(%p)\n", cbp); 1130 if (ccddebug & CCDB_IO) { 1131 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1132 bp, bp->bio_bcount, bp->bio_resid); 1133 printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n", 1134 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1135 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1136 cbp->cb_buf.bio_bcount); 1137 } 1138 #endif 1139 /* 1140 * If an error occured, report it. If this is a mirrored 1141 * configuration and the first of two possible reads, do not 1142 * set the error in the bp yet because the second read may 1143 * succeed. 1144 */ 1145 1146 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1147 const char *msg = ""; 1148 1149 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1150 (cbp->cb_buf.bio_cmd == BIO_READ) && 1151 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1152 /* 1153 * We will try our read on the other disk down 1154 * below, also reverse the default pick so if we 1155 * are doing a scan we do not keep hitting the 1156 * bad disk first. 1157 */ 1158 struct ccd_s *cs = ccdfind(unit); 1159 1160 msg = ", trying other disk"; 1161 cs->sc_pick = 1 - cs->sc_pick; 1162 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1163 } else { 1164 bp->bio_flags |= BIO_ERROR; 1165 bp->bio_error = cbp->cb_buf.bio_error ? 1166 cbp->cb_buf.bio_error : EIO; 1167 } 1168 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1169 unit, bp->bio_error, cbp->cb_comp, 1170 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1171 } 1172 1173 /* 1174 * Process mirror. If we are writing, I/O has been initiated on both 1175 * buffers and we fall through only after both are finished. 1176 * 1177 * If we are reading only one I/O is initiated at a time. If an 1178 * error occurs we initiate the second I/O and return, otherwise 1179 * we free the second I/O without initiating it. 1180 */ 1181 1182 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1183 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1184 /* 1185 * When writing, handshake with the second buffer 1186 * to determine when both are done. If both are not 1187 * done, return here. 1188 */ 1189 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1190 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1191 putccdbuf(cbp); 1192 splx(s); 1193 return; 1194 } 1195 } else { 1196 /* 1197 * When reading, either dispose of the second buffer 1198 * or initiate I/O on the second buffer if an error 1199 * occured with this one. 1200 */ 1201 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1203 cbp->cb_mirror->cb_pflags |= 1204 CCDPF_MIRROR_DONE; 1205 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1206 putccdbuf(cbp); 1207 splx(s); 1208 return; 1209 } else { 1210 putccdbuf(cbp->cb_mirror); 1211 /* fall through */ 1212 } 1213 } 1214 } 1215 } 1216 1217 /* 1218 * use bio_caller1 to determine how big the original request was rather 1219 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1220 * 1221 * XXX We check for an error, but we do not test the resid for an 1222 * aligned EOF condition. This may result in character & block 1223 * device access not recognizing EOF properly when read or written 1224 * sequentially, but will not effect filesystems. 1225 */ 1226 count = (long)cbp->cb_buf.bio_caller1; 1227 putccdbuf(cbp); 1228 1229 /* 1230 * If all done, "interrupt". 1231 */ 1232 bp->bio_resid -= count; 1233 if (bp->bio_resid < 0) 1234 panic("ccdiodone: count"); 1235 if (bp->bio_resid == 0) 1236 ccdintr(ccdfind(unit), bp); 1237 splx(s); 1238 } 1239 1240 static int 1241 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1242 { 1243 int unit = ccdunit(dev); 1244 int i, j, lookedup = 0, error = 0; 1245 int part, pmask, s; 1246 struct ccd_s *cs; 1247 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1248 char **cpp; 1249 struct vnode **vpp; 1250 1251 if (!IS_ALLOCATED(unit)) 1252 return (ENXIO); 1253 cs = ccdfind(unit); 1254 1255 switch (cmd) { 1256 case CCDIOCSET: 1257 if (IS_INITED(cs)) 1258 return (EBUSY); 1259 1260 if ((flag & FWRITE) == 0) 1261 return (EBADF); 1262 1263 if ((error = ccdlock(cs)) != 0) 1264 return (error); 1265 1266 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1267 return (EINVAL); 1268 1269 /* Fill in some important bits. */ 1270 cs->sc_ileave = ccio->ccio_ileave; 1271 if (cs->sc_ileave == 0 && 1272 ((ccio->ccio_flags & CCDF_MIRROR) || 1273 (ccio->ccio_flags & CCDF_PARITY))) { 1274 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1275 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1276 } 1277 if ((ccio->ccio_flags & CCDF_MIRROR) && 1278 (ccio->ccio_flags & CCDF_PARITY)) { 1279 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1280 ccio->ccio_flags &= ~CCDF_PARITY; 1281 } 1282 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1283 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1284 printf("ccd%d: mirror/parity forces uniform flag\n", 1285 unit); 1286 ccio->ccio_flags |= CCDF_UNIFORM; 1287 } 1288 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1289 1290 /* 1291 * Allocate space for and copy in the array of 1292 * componet pathnames and device numbers. 1293 */ 1294 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1295 M_DEVBUF, M_WAITOK); 1296 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1297 M_DEVBUF, M_WAITOK); 1298 1299 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1300 ccio->ccio_ndisks * sizeof(char **)); 1301 if (error) { 1302 free(vpp, M_DEVBUF); 1303 free(cpp, M_DEVBUF); 1304 ccdunlock(cs); 1305 return (error); 1306 } 1307 1308 #ifdef DEBUG 1309 if (ccddebug & CCDB_INIT) 1310 for (i = 0; i < ccio->ccio_ndisks; ++i) 1311 printf("ccdioctl: component %d: %p\n", 1312 i, cpp[i]); 1313 #endif 1314 1315 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1316 #ifdef DEBUG 1317 if (ccddebug & CCDB_INIT) 1318 printf("ccdioctl: lookedup = %d\n", lookedup); 1319 #endif 1320 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1321 for (j = 0; j < lookedup; ++j) 1322 (void)vn_close(vpp[j], FREAD|FWRITE, 1323 td->td_proc->p_ucred, td); 1324 free(vpp, M_DEVBUF); 1325 free(cpp, M_DEVBUF); 1326 ccdunlock(cs); 1327 return (error); 1328 } 1329 ++lookedup; 1330 } 1331 cs->sc_vpp = vpp; 1332 cs->sc_nccdisks = ccio->ccio_ndisks; 1333 1334 /* 1335 * Initialize the ccd. Fills in the softc for us. 1336 */ 1337 if ((error = ccdinit(cs, cpp, td)) != 0) { 1338 for (j = 0; j < lookedup; ++j) 1339 (void)vn_close(vpp[j], FREAD|FWRITE, 1340 td->td_proc->p_ucred, td); 1341 /* 1342 * We can't ccddestroy() cs just yet, because nothing 1343 * prevents user-level app to do another ioctl() 1344 * without closing the device first, therefore 1345 * declare unit null and void and let ccdclose() 1346 * destroy it when it is safe to do so. 1347 */ 1348 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1349 free(vpp, M_DEVBUF); 1350 free(cpp, M_DEVBUF); 1351 ccdunlock(cs); 1352 return (error); 1353 } 1354 1355 /* 1356 * The ccd has been successfully initialized, so 1357 * we can place it into the array and read the disklabel. 1358 */ 1359 ccio->ccio_unit = unit; 1360 ccio->ccio_size = cs->sc_size; 1361 ccdgetdisklabel(dev); 1362 1363 ccdunlock(cs); 1364 1365 break; 1366 1367 case CCDIOCCLR: 1368 if (!IS_INITED(cs)) 1369 return (ENXIO); 1370 1371 if ((flag & FWRITE) == 0) 1372 return (EBADF); 1373 1374 if ((error = ccdlock(cs)) != 0) 1375 return (error); 1376 1377 /* Don't unconfigure if any other partitions are open */ 1378 part = ccdpart(dev); 1379 pmask = (1 << part); 1380 if ((cs->sc_openmask & ~pmask)) { 1381 ccdunlock(cs); 1382 return (EBUSY); 1383 } 1384 1385 /* Declare unit null and void (reset all flags) */ 1386 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1387 1388 /* Close the components and free their pathnames. */ 1389 for (i = 0; i < cs->sc_nccdisks; ++i) { 1390 /* 1391 * XXX: this close could potentially fail and 1392 * cause Bad Things. Maybe we need to force 1393 * the close to happen? 1394 */ 1395 #ifdef DEBUG 1396 if (ccddebug & CCDB_VNODE) 1397 vprint("CCDIOCCLR: vnode info", 1398 cs->sc_cinfo[i].ci_vp); 1399 #endif 1400 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1401 td->td_proc->p_ucred, td); 1402 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1403 } 1404 1405 /* Free interleave index. */ 1406 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1407 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1408 1409 /* Free component info and interleave table. */ 1410 free(cs->sc_cinfo, M_DEVBUF); 1411 free(cs->sc_itable, M_DEVBUF); 1412 free(cs->sc_vpp, M_DEVBUF); 1413 1414 /* And remove the devstat entry. */ 1415 devstat_remove_entry(&cs->device_stats); 1416 1417 /* This must be atomic. */ 1418 s = splhigh(); 1419 ccdunlock(cs); 1420 splx(s); 1421 1422 break; 1423 1424 case CCDCONFINFO: 1425 { 1426 int ninit = 0; 1427 struct ccdconf *conf = (struct ccdconf *)data; 1428 struct ccd_s *tmpcs; 1429 struct ccd_s *ubuf = conf->buffer; 1430 1431 /* XXX: LOCK(unique unit numbers) */ 1432 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1433 if (IS_INITED(tmpcs)) 1434 ninit++; 1435 1436 if (conf->size == 0) { 1437 conf->size = sizeof(struct ccd_s) * ninit; 1438 break; 1439 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1440 (conf->size % sizeof(struct ccd_s) != 0)) { 1441 /* XXX: UNLOCK(unique unit numbers) */ 1442 return (EINVAL); 1443 } 1444 1445 ubuf += ninit; 1446 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1447 if (!IS_INITED(tmpcs)) 1448 continue; 1449 error = copyout(tmpcs, --ubuf, 1450 sizeof(struct ccd_s)); 1451 if (error != 0) 1452 /* XXX: UNLOCK(unique unit numbers) */ 1453 return (error); 1454 } 1455 /* XXX: UNLOCK(unique unit numbers) */ 1456 } 1457 break; 1458 1459 case CCDCPPINFO: 1460 if (!IS_INITED(cs)) 1461 return (ENXIO); 1462 1463 { 1464 int len = 0; 1465 struct ccdcpps *cpps = (struct ccdcpps *)data; 1466 char *ubuf = cpps->buffer; 1467 1468 1469 for (i = 0; i < cs->sc_nccdisks; ++i) 1470 len += cs->sc_cinfo[i].ci_pathlen; 1471 1472 if (cpps->size == 0) { 1473 cpps->size = len; 1474 break; 1475 } else if (cpps->size != len) { 1476 return (EINVAL); 1477 } 1478 1479 for (i = 0; i < cs->sc_nccdisks; ++i) { 1480 len = cs->sc_cinfo[i].ci_pathlen; 1481 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1482 len); 1483 if (error != 0) 1484 return (error); 1485 ubuf += len; 1486 } 1487 } 1488 break; 1489 1490 case DIOCGDINFO: 1491 if (!IS_INITED(cs)) 1492 return (ENXIO); 1493 1494 *(struct disklabel *)data = cs->sc_label; 1495 break; 1496 1497 case DIOCGPART: 1498 if (!IS_INITED(cs)) 1499 return (ENXIO); 1500 1501 ((struct partinfo *)data)->disklab = &cs->sc_label; 1502 ((struct partinfo *)data)->part = 1503 &cs->sc_label.d_partitions[ccdpart(dev)]; 1504 break; 1505 1506 case DIOCWDINFO: 1507 case DIOCSDINFO: 1508 if (!IS_INITED(cs)) 1509 return (ENXIO); 1510 1511 if ((flag & FWRITE) == 0) 1512 return (EBADF); 1513 1514 if ((error = ccdlock(cs)) != 0) 1515 return (error); 1516 1517 cs->sc_flags |= CCDF_LABELLING; 1518 1519 error = setdisklabel(&cs->sc_label, 1520 (struct disklabel *)data, 0); 1521 if (error == 0) { 1522 if (cmd == DIOCWDINFO) 1523 error = writedisklabel(CCDLABELDEV(dev), 1524 &cs->sc_label); 1525 } 1526 1527 cs->sc_flags &= ~CCDF_LABELLING; 1528 1529 ccdunlock(cs); 1530 1531 if (error) 1532 return (error); 1533 break; 1534 1535 case DIOCWLABEL: 1536 if (!IS_INITED(cs)) 1537 return (ENXIO); 1538 1539 if ((flag & FWRITE) == 0) 1540 return (EBADF); 1541 if (*(int *)data != 0) 1542 cs->sc_flags |= CCDF_WLABEL; 1543 else 1544 cs->sc_flags &= ~CCDF_WLABEL; 1545 break; 1546 1547 default: 1548 return (ENOTTY); 1549 } 1550 1551 return (0); 1552 } 1553 1554 static int 1555 ccdsize(dev_t dev) 1556 { 1557 struct ccd_s *cs; 1558 int part, size; 1559 1560 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1561 return (-1); 1562 1563 cs = ccdfind(ccdunit(dev)); 1564 part = ccdpart(dev); 1565 1566 if (!IS_INITED(cs)) 1567 return (-1); 1568 1569 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1570 size = -1; 1571 else 1572 size = cs->sc_label.d_partitions[part].p_size; 1573 1574 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1575 return (-1); 1576 1577 return (size); 1578 } 1579 1580 static int 1581 ccddump(dev_t dev) 1582 { 1583 1584 /* Not implemented. */ 1585 return ENXIO; 1586 } 1587 1588 /* 1589 * Lookup the provided name in the filesystem. If the file exists, 1590 * is a valid block device, and isn't being used by anyone else, 1591 * set *vpp to the file's vnode. 1592 */ 1593 static int 1594 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1595 { 1596 struct nameidata nd; 1597 struct vnode *vp; 1598 int error, flags; 1599 1600 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1601 flags = FREAD | FWRITE; 1602 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1603 #ifdef DEBUG 1604 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1605 printf("ccdlookup: vn_open error = %d\n", error); 1606 #endif 1607 return (error); 1608 } 1609 vp = nd.ni_vp; 1610 1611 if (vp->v_usecount > 1) { 1612 error = EBUSY; 1613 goto bad; 1614 } 1615 1616 if (!vn_isdisk(vp, &error)) 1617 goto bad; 1618 1619 #ifdef DEBUG 1620 if (ccddebug & CCDB_VNODE) 1621 vprint("ccdlookup: vnode info", vp); 1622 #endif 1623 1624 VOP_UNLOCK(vp, 0, td); 1625 NDFREE(&nd, NDF_ONLY_PNBUF); 1626 *vpp = vp; 1627 return (0); 1628 bad: 1629 VOP_UNLOCK(vp, 0, td); 1630 NDFREE(&nd, NDF_ONLY_PNBUF); 1631 /* vn_close does vrele() for vp */ 1632 (void)vn_close(vp, FREAD|FWRITE, td->td_proc->p_ucred, td); 1633 return (error); 1634 } 1635 1636 /* 1637 * Read the disklabel from the ccd. If one is not present, fake one 1638 * up. 1639 */ 1640 static void 1641 ccdgetdisklabel(dev_t dev) 1642 { 1643 int unit = ccdunit(dev); 1644 struct ccd_s *cs = ccdfind(unit); 1645 char *errstring; 1646 struct disklabel *lp = &cs->sc_label; 1647 struct ccdgeom *ccg = &cs->sc_geom; 1648 1649 bzero(lp, sizeof(*lp)); 1650 1651 lp->d_secperunit = cs->sc_size; 1652 lp->d_secsize = ccg->ccg_secsize; 1653 lp->d_nsectors = ccg->ccg_nsectors; 1654 lp->d_ntracks = ccg->ccg_ntracks; 1655 lp->d_ncylinders = ccg->ccg_ncylinders; 1656 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1657 1658 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1659 lp->d_type = DTYPE_CCD; 1660 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1661 lp->d_rpm = 3600; 1662 lp->d_interleave = 1; 1663 lp->d_flags = 0; 1664 1665 lp->d_partitions[RAW_PART].p_offset = 0; 1666 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1667 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1668 lp->d_npartitions = RAW_PART + 1; 1669 1670 lp->d_bbsize = BBSIZE; /* XXX */ 1671 lp->d_sbsize = SBSIZE; /* XXX */ 1672 1673 lp->d_magic = DISKMAGIC; 1674 lp->d_magic2 = DISKMAGIC; 1675 lp->d_checksum = dkcksum(&cs->sc_label); 1676 1677 /* 1678 * Call the generic disklabel extraction routine. 1679 */ 1680 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1681 if (errstring != NULL) 1682 ccdmakedisklabel(cs); 1683 1684 #ifdef DEBUG 1685 /* It's actually extremely common to have unlabeled ccds. */ 1686 if (ccddebug & CCDB_LABEL) 1687 if (errstring != NULL) 1688 printf("ccd%d: %s\n", unit, errstring); 1689 #endif 1690 } 1691 1692 /* 1693 * Take care of things one might want to take care of in the event 1694 * that a disklabel isn't present. 1695 */ 1696 static void 1697 ccdmakedisklabel(struct ccd_s *cs) 1698 { 1699 struct disklabel *lp = &cs->sc_label; 1700 1701 /* 1702 * For historical reasons, if there's no disklabel present 1703 * the raw partition must be marked FS_BSDFFS. 1704 */ 1705 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1706 1707 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1708 } 1709 1710 /* 1711 * Wait interruptibly for an exclusive lock. 1712 * 1713 * XXX 1714 * Several drivers do this; it should be abstracted and made MP-safe. 1715 */ 1716 static int 1717 ccdlock(struct ccd_s *cs) 1718 { 1719 int error; 1720 1721 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1722 cs->sc_flags |= CCDF_WANTED; 1723 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1724 return (error); 1725 } 1726 cs->sc_flags |= CCDF_LOCKED; 1727 return (0); 1728 } 1729 1730 /* 1731 * Unlock and wake up any waiters. 1732 */ 1733 static void 1734 ccdunlock(struct ccd_s *cs) 1735 { 1736 1737 cs->sc_flags &= ~CCDF_LOCKED; 1738 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1739 cs->sc_flags &= ~CCDF_WANTED; 1740 wakeup(cs); 1741 } 1742 } 1743 1744 #ifdef DEBUG 1745 static void 1746 printiinfo(struct ccdiinfo *ii) 1747 { 1748 int ix, i; 1749 1750 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1751 printf(" itab[%d]: #dk %d sblk %d soff %d", 1752 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1753 for (i = 0; i < ii->ii_ndisk; i++) 1754 printf(" %d", ii->ii_index[i]); 1755 printf("\n"); 1756 } 1757 } 1758 #endif 1759