1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disklabel.h> 102 #include <ufs/ffs/fs.h> 103 #include <sys/devicestat.h> 104 #include <sys/fcntl.h> 105 #include <sys/vnode.h> 106 107 #include <sys/ccdvar.h> 108 109 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #endif 125 126 #define ccdunit(x) dkunit(x) 127 #define ccdpart(x) dkpart(x) 128 129 /* 130 This is how mirroring works (only writes are special): 131 132 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 133 linked together by the cb_mirror field. "cb_pflags & 134 CCDPF_MIRROR_DONE" is set to 0 on both of them. 135 136 When a component returns to ccdiodone(), it checks if "cb_pflags & 137 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 138 flag and returns. If it is, it means its partner has already 139 returned, so it will go to the regular cleanup. 140 141 */ 142 143 struct ccdbuf { 144 struct bio cb_buf; /* new I/O buf */ 145 struct bio *cb_obp; /* ptr. to original I/O buf */ 146 struct ccdbuf *cb_freenext; /* free list link */ 147 int cb_unit; /* target unit */ 148 int cb_comp; /* target component */ 149 int cb_pflags; /* mirror/parity status flag */ 150 struct ccdbuf *cb_mirror; /* mirror counterpart */ 151 }; 152 153 /* bits in cb_pflags */ 154 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 155 156 #define CCDLABELDEV(dev) \ 157 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 158 159 /* convinient macros for often-used statements */ 160 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 161 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_dump_t ccddump; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ ccddump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct thread *); 206 static int ccdlookup(char *, struct thread *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 case MOD_SHUTDOWN: 376 break; 377 378 default: 379 error = EOPNOTSUPP; 380 } 381 return (error); 382 } 383 384 DEV_MODULE(ccd, ccd_modevent, NULL); 385 386 static int 387 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 388 { 389 struct ccdcinfo *ci = NULL; /* XXX */ 390 size_t size; 391 int ix; 392 struct vnode *vp; 393 size_t minsize; 394 int maxsecsize; 395 struct partinfo dpart; 396 struct ccdgeom *ccg = &cs->sc_geom; 397 char tmppath[MAXPATHLEN]; 398 int error = 0; 399 400 #ifdef DEBUG 401 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 402 printf("ccdinit: unit %d\n", cs->sc_unit); 403 #endif 404 405 cs->sc_size = 0; 406 407 /* Allocate space for the component info. */ 408 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 409 M_DEVBUF, M_WAITOK); 410 411 /* 412 * Verify that each component piece exists and record 413 * relevant information about it. 414 */ 415 maxsecsize = 0; 416 minsize = 0; 417 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 418 vp = cs->sc_vpp[ix]; 419 ci = &cs->sc_cinfo[ix]; 420 ci->ci_vp = vp; 421 422 /* 423 * Copy in the pathname of the component. 424 */ 425 bzero(tmppath, sizeof(tmppath)); /* sanity */ 426 if ((error = copyinstr(cpaths[ix], tmppath, 427 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 428 #ifdef DEBUG 429 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 430 printf("ccd%d: can't copy path, error = %d\n", 431 cs->sc_unit, error); 432 #endif 433 goto fail; 434 } 435 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 436 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 437 438 ci->ci_dev = vn_todev(vp); 439 440 /* 441 * Get partition information for the component. 442 */ 443 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 444 FREAD, td->td_proc->p_ucred, td)) != 0) { 445 #ifdef DEBUG 446 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 447 printf("ccd%d: %s: ioctl failed, error = %d\n", 448 cs->sc_unit, ci->ci_path, error); 449 #endif 450 goto fail; 451 } 452 if (dpart.part->p_fstype == FS_BSDFFS) { 453 maxsecsize = 454 ((dpart.disklab->d_secsize > maxsecsize) ? 455 dpart.disklab->d_secsize : maxsecsize); 456 size = dpart.part->p_size - CCD_OFFSET; 457 } else { 458 #ifdef DEBUG 459 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 460 printf("ccd%d: %s: incorrect partition type\n", 461 cs->sc_unit, ci->ci_path); 462 #endif 463 error = EFTYPE; 464 goto fail; 465 } 466 467 /* 468 * Calculate the size, truncating to an interleave 469 * boundary if necessary. 470 */ 471 472 if (cs->sc_ileave > 1) 473 size -= size % cs->sc_ileave; 474 475 if (size == 0) { 476 #ifdef DEBUG 477 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 478 printf("ccd%d: %s: size == 0\n", 479 cs->sc_unit, ci->ci_path); 480 #endif 481 error = ENODEV; 482 goto fail; 483 } 484 485 if (minsize == 0 || size < minsize) 486 minsize = size; 487 ci->ci_size = size; 488 cs->sc_size += size; 489 } 490 491 /* 492 * Don't allow the interleave to be smaller than 493 * the biggest component sector. 494 */ 495 if ((cs->sc_ileave > 0) && 496 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 497 #ifdef DEBUG 498 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 499 printf("ccd%d: interleave must be at least %d\n", 500 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 501 #endif 502 error = EINVAL; 503 goto fail; 504 } 505 506 /* 507 * If uniform interleave is desired set all sizes to that of 508 * the smallest component. This will guarentee that a single 509 * interleave table is generated. 510 * 511 * Lost space must be taken into account when calculating the 512 * overall size. Half the space is lost when CCDF_MIRROR is 513 * specified. One disk is lost when CCDF_PARITY is specified. 514 */ 515 if (cs->sc_flags & CCDF_UNIFORM) { 516 for (ci = cs->sc_cinfo; 517 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 518 ci->ci_size = minsize; 519 } 520 if (cs->sc_flags & CCDF_MIRROR) { 521 /* 522 * Check to see if an even number of components 523 * have been specified. The interleave must also 524 * be non-zero in order for us to be able to 525 * guarentee the topology. 526 */ 527 if (cs->sc_nccdisks % 2) { 528 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 529 error = EINVAL; 530 goto fail; 531 } 532 if (cs->sc_ileave == 0) { 533 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 534 error = EINVAL; 535 goto fail; 536 } 537 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 538 } else if (cs->sc_flags & CCDF_PARITY) { 539 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 540 } else { 541 if (cs->sc_ileave == 0) { 542 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 543 error = EINVAL; 544 goto fail; 545 } 546 cs->sc_size = cs->sc_nccdisks * minsize; 547 } 548 } 549 550 /* 551 * Construct the interleave table. 552 */ 553 ccdinterleave(cs, cs->sc_unit); 554 555 /* 556 * Create pseudo-geometry based on 1MB cylinders. It's 557 * pretty close. 558 */ 559 ccg->ccg_secsize = maxsecsize; 560 ccg->ccg_ntracks = 1; 561 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 562 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 563 564 /* 565 * Add an devstat entry for this device. 566 */ 567 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 568 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 569 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 570 DEVSTAT_PRIORITY_ARRAY); 571 572 cs->sc_flags |= CCDF_INITED; 573 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 574 return (0); 575 fail: 576 while (ci > cs->sc_cinfo) { 577 ci--; 578 free(ci->ci_path, M_DEVBUF); 579 } 580 free(cs->sc_cinfo, M_DEVBUF); 581 return (error); 582 } 583 584 static void 585 ccdinterleave(struct ccd_s *cs, int unit) 586 { 587 struct ccdcinfo *ci, *smallci; 588 struct ccdiinfo *ii; 589 daddr_t bn, lbn; 590 int ix; 591 u_long size; 592 593 #ifdef DEBUG 594 if (ccddebug & CCDB_INIT) 595 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 596 #endif 597 598 /* 599 * Allocate an interleave table. The worst case occurs when each 600 * of N disks is of a different size, resulting in N interleave 601 * tables. 602 * 603 * Chances are this is too big, but we don't care. 604 */ 605 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 606 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 607 M_WAITOK | M_ZERO); 608 609 /* 610 * Trivial case: no interleave (actually interleave of disk size). 611 * Each table entry represents a single component in its entirety. 612 * 613 * An interleave of 0 may not be used with a mirror or parity setup. 614 */ 615 if (cs->sc_ileave == 0) { 616 bn = 0; 617 ii = cs->sc_itable; 618 619 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 620 /* Allocate space for ii_index. */ 621 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 622 ii->ii_ndisk = 1; 623 ii->ii_startblk = bn; 624 ii->ii_startoff = 0; 625 ii->ii_index[0] = ix; 626 bn += cs->sc_cinfo[ix].ci_size; 627 ii++; 628 } 629 ii->ii_ndisk = 0; 630 #ifdef DEBUG 631 if (ccddebug & CCDB_INIT) 632 printiinfo(cs->sc_itable); 633 #endif 634 return; 635 } 636 637 /* 638 * The following isn't fast or pretty; it doesn't have to be. 639 */ 640 size = 0; 641 bn = lbn = 0; 642 for (ii = cs->sc_itable; ; ii++) { 643 /* 644 * Allocate space for ii_index. We might allocate more then 645 * we use. 646 */ 647 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 648 M_DEVBUF, M_WAITOK); 649 650 /* 651 * Locate the smallest of the remaining components 652 */ 653 smallci = NULL; 654 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 655 ci++) { 656 if (ci->ci_size > size && 657 (smallci == NULL || 658 ci->ci_size < smallci->ci_size)) { 659 smallci = ci; 660 } 661 } 662 663 /* 664 * Nobody left, all done 665 */ 666 if (smallci == NULL) { 667 ii->ii_ndisk = 0; 668 break; 669 } 670 671 /* 672 * Record starting logical block using an sc_ileave blocksize. 673 */ 674 ii->ii_startblk = bn / cs->sc_ileave; 675 676 /* 677 * Record starting comopnent block using an sc_ileave 678 * blocksize. This value is relative to the beginning of 679 * a component disk. 680 */ 681 ii->ii_startoff = lbn; 682 683 /* 684 * Determine how many disks take part in this interleave 685 * and record their indices. 686 */ 687 ix = 0; 688 for (ci = cs->sc_cinfo; 689 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 690 if (ci->ci_size >= smallci->ci_size) { 691 ii->ii_index[ix++] = ci - cs->sc_cinfo; 692 } 693 } 694 ii->ii_ndisk = ix; 695 bn += ix * (smallci->ci_size - size); 696 lbn = smallci->ci_size / cs->sc_ileave; 697 size = smallci->ci_size; 698 } 699 #ifdef DEBUG 700 if (ccddebug & CCDB_INIT) 701 printiinfo(cs->sc_itable); 702 #endif 703 } 704 705 /* ARGSUSED */ 706 static int 707 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 708 { 709 int unit = ccdunit(dev); 710 struct ccd_s *cs; 711 struct disklabel *lp; 712 int error = 0, part, pmask; 713 714 #ifdef DEBUG 715 if (ccddebug & CCDB_FOLLOW) 716 printf("ccdopen(%p, %x)\n", dev, flags); 717 #endif 718 719 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 720 721 if ((error = ccdlock(cs)) != 0) 722 return (error); 723 724 lp = &cs->sc_label; 725 726 part = ccdpart(dev); 727 pmask = (1 << part); 728 729 /* 730 * If we're initialized, check to see if there are any other 731 * open partitions. If not, then it's safe to update 732 * the in-core disklabel. 733 */ 734 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 735 ccdgetdisklabel(dev); 736 737 /* Check that the partition exists. */ 738 if (part != RAW_PART && ((part >= lp->d_npartitions) || 739 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 740 error = ENXIO; 741 goto done; 742 } 743 744 cs->sc_openmask |= pmask; 745 done: 746 ccdunlock(cs); 747 return (0); 748 } 749 750 /* ARGSUSED */ 751 static int 752 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 753 { 754 int unit = ccdunit(dev); 755 struct ccd_s *cs; 756 int error = 0, part; 757 758 #ifdef DEBUG 759 if (ccddebug & CCDB_FOLLOW) 760 printf("ccdclose(%p, %x)\n", dev, flags); 761 #endif 762 763 if (!IS_ALLOCATED(unit)) 764 return (ENXIO); 765 cs = ccdfind(unit); 766 767 if ((error = ccdlock(cs)) != 0) 768 return (error); 769 770 part = ccdpart(dev); 771 772 /* ...that much closer to allowing unconfiguration... */ 773 cs->sc_openmask &= ~(1 << part); 774 /* collect "garbage" if possible */ 775 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 776 ccddestroy(cs, td->td_proc); 777 else 778 ccdunlock(cs); 779 return (0); 780 } 781 782 static void 783 ccdstrategy(struct bio *bp) 784 { 785 int unit = ccdunit(bp->bio_dev); 786 struct ccd_s *cs = ccdfind(unit); 787 int s; 788 int wlabel; 789 struct disklabel *lp; 790 791 #ifdef DEBUG 792 if (ccddebug & CCDB_FOLLOW) 793 printf("ccdstrategy(%p): unit %d\n", bp, unit); 794 #endif 795 if (!IS_INITED(cs)) { 796 biofinish(bp, NULL, ENXIO); 797 return; 798 } 799 800 /* If it's a nil transfer, wake up the top half now. */ 801 if (bp->bio_bcount == 0) { 802 biodone(bp); 803 return; 804 } 805 806 lp = &cs->sc_label; 807 808 /* 809 * Do bounds checking and adjust transfer. If there's an 810 * error, the bounds check will flag that for us. 811 */ 812 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 813 if (ccdpart(bp->bio_dev) != RAW_PART) { 814 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 815 biodone(bp); 816 return; 817 } 818 } else { 819 int pbn; /* in sc_secsize chunks */ 820 long sz; /* in sc_secsize chunks */ 821 822 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 823 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 824 825 /* 826 * If out of bounds return an error. If at the EOF point, 827 * simply read or write less. 828 */ 829 830 if (pbn < 0 || pbn >= cs->sc_size) { 831 bp->bio_resid = bp->bio_bcount; 832 if (pbn != cs->sc_size) 833 biofinish(bp, NULL, EINVAL); 834 else 835 biodone(bp); 836 return; 837 } 838 839 /* 840 * If the request crosses EOF, truncate the request. 841 */ 842 if (pbn + sz > cs->sc_size) { 843 bp->bio_bcount = (cs->sc_size - pbn) * 844 cs->sc_geom.ccg_secsize; 845 } 846 } 847 848 bp->bio_resid = bp->bio_bcount; 849 850 /* 851 * "Start" the unit. 852 */ 853 s = splbio(); 854 ccdstart(cs, bp); 855 splx(s); 856 return; 857 } 858 859 static void 860 ccdstart(struct ccd_s *cs, struct bio *bp) 861 { 862 long bcount, rcount; 863 struct ccdbuf *cbp[4]; 864 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 865 caddr_t addr; 866 daddr_t bn; 867 struct partition *pp; 868 869 #ifdef DEBUG 870 if (ccddebug & CCDB_FOLLOW) 871 printf("ccdstart(%p, %p)\n", cs, bp); 872 #endif 873 874 /* Record the transaction start */ 875 devstat_start_transaction(&cs->device_stats); 876 877 /* 878 * Translate the partition-relative block number to an absolute. 879 */ 880 bn = bp->bio_blkno; 881 if (ccdpart(bp->bio_dev) != RAW_PART) { 882 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 883 bn += pp->p_offset; 884 } 885 886 /* 887 * Allocate component buffers and fire off the requests 888 */ 889 addr = bp->bio_data; 890 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 891 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 892 rcount = cbp[0]->cb_buf.bio_bcount; 893 894 if (cs->sc_cflags & CCDF_MIRROR) { 895 /* 896 * Mirroring. Writes go to both disks, reads are 897 * taken from whichever disk seems most appropriate. 898 * 899 * We attempt to localize reads to the disk whos arm 900 * is nearest the read request. We ignore seeks due 901 * to writes when making this determination and we 902 * also try to avoid hogging. 903 */ 904 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 905 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 906 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 907 } else { 908 int pick = cs->sc_pick; 909 daddr_t range = cs->sc_size / 16; 910 911 if (bn < cs->sc_blk[pick] - range || 912 bn > cs->sc_blk[pick] + range 913 ) { 914 cs->sc_pick = pick = 1 - pick; 915 } 916 cs->sc_blk[pick] = bn + btodb(rcount); 917 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 918 } 919 } else { 920 /* 921 * Not mirroring 922 */ 923 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 924 } 925 bn += btodb(rcount); 926 addr += rcount; 927 } 928 } 929 930 /* 931 * Build a component buffer header. 932 */ 933 static void 934 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 935 { 936 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 937 struct ccdbuf *cbp; 938 daddr_t cbn, cboff; 939 off_t cbc; 940 941 #ifdef DEBUG 942 if (ccddebug & CCDB_IO) 943 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 944 cs, bp, bn, addr, bcount); 945 #endif 946 /* 947 * Determine which component bn falls in. 948 */ 949 cbn = bn; 950 cboff = 0; 951 952 if (cs->sc_ileave == 0) { 953 /* 954 * Serially concatenated and neither a mirror nor a parity 955 * config. This is a special case. 956 */ 957 daddr_t sblk; 958 959 sblk = 0; 960 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 961 sblk += ci->ci_size; 962 cbn -= sblk; 963 } else { 964 struct ccdiinfo *ii; 965 int ccdisk, off; 966 967 /* 968 * Calculate cbn, the logical superblock (sc_ileave chunks), 969 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 970 * to cbn. 971 */ 972 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 973 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 974 975 /* 976 * Figure out which interleave table to use. 977 */ 978 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 979 if (ii->ii_startblk > cbn) 980 break; 981 } 982 ii--; 983 984 /* 985 * off is the logical superblock relative to the beginning 986 * of this interleave block. 987 */ 988 off = cbn - ii->ii_startblk; 989 990 /* 991 * We must calculate which disk component to use (ccdisk), 992 * and recalculate cbn to be the superblock relative to 993 * the beginning of the component. This is typically done by 994 * adding 'off' and ii->ii_startoff together. However, 'off' 995 * must typically be divided by the number of components in 996 * this interleave array to be properly convert it from a 997 * CCD-relative logical superblock number to a 998 * component-relative superblock number. 999 */ 1000 if (ii->ii_ndisk == 1) { 1001 /* 1002 * When we have just one disk, it can't be a mirror 1003 * or a parity config. 1004 */ 1005 ccdisk = ii->ii_index[0]; 1006 cbn = ii->ii_startoff + off; 1007 } else { 1008 if (cs->sc_cflags & CCDF_MIRROR) { 1009 /* 1010 * We have forced a uniform mapping, resulting 1011 * in a single interleave array. We double 1012 * up on the first half of the available 1013 * components and our mirror is in the second 1014 * half. This only works with a single 1015 * interleave array because doubling up 1016 * doubles the number of sectors, so there 1017 * cannot be another interleave array because 1018 * the next interleave array's calculations 1019 * would be off. 1020 */ 1021 int ndisk2 = ii->ii_ndisk / 2; 1022 ccdisk = ii->ii_index[off % ndisk2]; 1023 cbn = ii->ii_startoff + off / ndisk2; 1024 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1025 } else if (cs->sc_cflags & CCDF_PARITY) { 1026 /* 1027 * XXX not implemented yet 1028 */ 1029 int ndisk2 = ii->ii_ndisk - 1; 1030 ccdisk = ii->ii_index[off % ndisk2]; 1031 cbn = ii->ii_startoff + off / ndisk2; 1032 if (cbn % ii->ii_ndisk <= ccdisk) 1033 ccdisk++; 1034 } else { 1035 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1036 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1037 } 1038 } 1039 1040 ci = &cs->sc_cinfo[ccdisk]; 1041 1042 /* 1043 * Convert cbn from a superblock to a normal block so it 1044 * can be used to calculate (along with cboff) the normal 1045 * block index into this particular disk. 1046 */ 1047 cbn *= cs->sc_ileave; 1048 } 1049 1050 /* 1051 * Fill in the component buf structure. 1052 */ 1053 cbp = getccdbuf(NULL); 1054 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1055 cbp->cb_buf.bio_done = ccdiodone; 1056 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1057 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1058 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1059 cbp->cb_buf.bio_data = addr; 1060 if (cs->sc_ileave == 0) 1061 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1062 else 1063 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1064 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1065 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1066 1067 /* 1068 * context for ccdiodone 1069 */ 1070 cbp->cb_obp = bp; 1071 cbp->cb_unit = cs->sc_unit; 1072 cbp->cb_comp = ci - cs->sc_cinfo; 1073 1074 #ifdef DEBUG 1075 if (ccddebug & CCDB_IO) 1076 printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n", 1077 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1078 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1079 cbp->cb_buf.bio_bcount); 1080 #endif 1081 cb[0] = cbp; 1082 1083 /* 1084 * Note: both I/O's setup when reading from mirror, but only one 1085 * will be executed. 1086 */ 1087 if (cs->sc_cflags & CCDF_MIRROR) { 1088 /* mirror, setup second I/O */ 1089 cbp = getccdbuf(cb[0]); 1090 cbp->cb_buf.bio_dev = ci2->ci_dev; 1091 cbp->cb_comp = ci2 - cs->sc_cinfo; 1092 cb[1] = cbp; 1093 /* link together the ccdbuf's and clear "mirror done" flag */ 1094 cb[0]->cb_mirror = cb[1]; 1095 cb[1]->cb_mirror = cb[0]; 1096 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1097 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1098 } 1099 } 1100 1101 static void 1102 ccdintr(struct ccd_s *cs, struct bio *bp) 1103 { 1104 #ifdef DEBUG 1105 if (ccddebug & CCDB_FOLLOW) 1106 printf("ccdintr(%p, %p)\n", cs, bp); 1107 #endif 1108 /* 1109 * Request is done for better or worse, wakeup the top half. 1110 */ 1111 if (bp->bio_flags & BIO_ERROR) 1112 bp->bio_resid = bp->bio_bcount; 1113 biofinish(bp, &cs->device_stats, 0); 1114 } 1115 1116 /* 1117 * Called at interrupt time. 1118 * Mark the component as done and if all components are done, 1119 * take a ccd interrupt. 1120 */ 1121 static void 1122 ccdiodone(struct bio *ibp) 1123 { 1124 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1125 struct bio *bp = cbp->cb_obp; 1126 int unit = cbp->cb_unit; 1127 int count, s; 1128 1129 s = splbio(); 1130 #ifdef DEBUG 1131 if (ccddebug & CCDB_FOLLOW) 1132 printf("ccdiodone(%p)\n", cbp); 1133 if (ccddebug & CCDB_IO) { 1134 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1135 bp, bp->bio_bcount, bp->bio_resid); 1136 printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n", 1137 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1138 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1139 cbp->cb_buf.bio_bcount); 1140 } 1141 #endif 1142 /* 1143 * If an error occured, report it. If this is a mirrored 1144 * configuration and the first of two possible reads, do not 1145 * set the error in the bp yet because the second read may 1146 * succeed. 1147 */ 1148 1149 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1150 const char *msg = ""; 1151 1152 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1153 (cbp->cb_buf.bio_cmd == BIO_READ) && 1154 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1155 /* 1156 * We will try our read on the other disk down 1157 * below, also reverse the default pick so if we 1158 * are doing a scan we do not keep hitting the 1159 * bad disk first. 1160 */ 1161 struct ccd_s *cs = ccdfind(unit); 1162 1163 msg = ", trying other disk"; 1164 cs->sc_pick = 1 - cs->sc_pick; 1165 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1166 } else { 1167 bp->bio_flags |= BIO_ERROR; 1168 bp->bio_error = cbp->cb_buf.bio_error ? 1169 cbp->cb_buf.bio_error : EIO; 1170 } 1171 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1172 unit, bp->bio_error, cbp->cb_comp, 1173 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1174 } 1175 1176 /* 1177 * Process mirror. If we are writing, I/O has been initiated on both 1178 * buffers and we fall through only after both are finished. 1179 * 1180 * If we are reading only one I/O is initiated at a time. If an 1181 * error occurs we initiate the second I/O and return, otherwise 1182 * we free the second I/O without initiating it. 1183 */ 1184 1185 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1186 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1187 /* 1188 * When writing, handshake with the second buffer 1189 * to determine when both are done. If both are not 1190 * done, return here. 1191 */ 1192 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1193 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1194 putccdbuf(cbp); 1195 splx(s); 1196 return; 1197 } 1198 } else { 1199 /* 1200 * When reading, either dispose of the second buffer 1201 * or initiate I/O on the second buffer if an error 1202 * occured with this one. 1203 */ 1204 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1205 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1206 cbp->cb_mirror->cb_pflags |= 1207 CCDPF_MIRROR_DONE; 1208 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1209 putccdbuf(cbp); 1210 splx(s); 1211 return; 1212 } else { 1213 putccdbuf(cbp->cb_mirror); 1214 /* fall through */ 1215 } 1216 } 1217 } 1218 } 1219 1220 /* 1221 * use bio_caller1 to determine how big the original request was rather 1222 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1223 * 1224 * XXX We check for an error, but we do not test the resid for an 1225 * aligned EOF condition. This may result in character & block 1226 * device access not recognizing EOF properly when read or written 1227 * sequentially, but will not effect filesystems. 1228 */ 1229 count = (long)cbp->cb_buf.bio_caller1; 1230 putccdbuf(cbp); 1231 1232 /* 1233 * If all done, "interrupt". 1234 */ 1235 bp->bio_resid -= count; 1236 if (bp->bio_resid < 0) 1237 panic("ccdiodone: count"); 1238 if (bp->bio_resid == 0) 1239 ccdintr(ccdfind(unit), bp); 1240 splx(s); 1241 } 1242 1243 static int 1244 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1245 { 1246 int unit = ccdunit(dev); 1247 int i, j, lookedup = 0, error = 0; 1248 int part, pmask, s; 1249 struct ccd_s *cs; 1250 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1251 char **cpp; 1252 struct vnode **vpp; 1253 1254 if (!IS_ALLOCATED(unit)) 1255 return (ENXIO); 1256 cs = ccdfind(unit); 1257 1258 switch (cmd) { 1259 case CCDIOCSET: 1260 if (IS_INITED(cs)) 1261 return (EBUSY); 1262 1263 if ((flag & FWRITE) == 0) 1264 return (EBADF); 1265 1266 if ((error = ccdlock(cs)) != 0) 1267 return (error); 1268 1269 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1270 return (EINVAL); 1271 1272 /* Fill in some important bits. */ 1273 cs->sc_ileave = ccio->ccio_ileave; 1274 if (cs->sc_ileave == 0 && 1275 ((ccio->ccio_flags & CCDF_MIRROR) || 1276 (ccio->ccio_flags & CCDF_PARITY))) { 1277 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1278 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1279 } 1280 if ((ccio->ccio_flags & CCDF_MIRROR) && 1281 (ccio->ccio_flags & CCDF_PARITY)) { 1282 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1283 ccio->ccio_flags &= ~CCDF_PARITY; 1284 } 1285 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1286 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1287 printf("ccd%d: mirror/parity forces uniform flag\n", 1288 unit); 1289 ccio->ccio_flags |= CCDF_UNIFORM; 1290 } 1291 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1292 1293 /* 1294 * Allocate space for and copy in the array of 1295 * componet pathnames and device numbers. 1296 */ 1297 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1298 M_DEVBUF, M_WAITOK); 1299 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1300 M_DEVBUF, M_WAITOK); 1301 1302 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1303 ccio->ccio_ndisks * sizeof(char **)); 1304 if (error) { 1305 free(vpp, M_DEVBUF); 1306 free(cpp, M_DEVBUF); 1307 ccdunlock(cs); 1308 return (error); 1309 } 1310 1311 #ifdef DEBUG 1312 if (ccddebug & CCDB_INIT) 1313 for (i = 0; i < ccio->ccio_ndisks; ++i) 1314 printf("ccdioctl: component %d: %p\n", 1315 i, cpp[i]); 1316 #endif 1317 1318 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1319 #ifdef DEBUG 1320 if (ccddebug & CCDB_INIT) 1321 printf("ccdioctl: lookedup = %d\n", lookedup); 1322 #endif 1323 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1324 for (j = 0; j < lookedup; ++j) 1325 (void)vn_close(vpp[j], FREAD|FWRITE, 1326 td->td_proc->p_ucred, td); 1327 free(vpp, M_DEVBUF); 1328 free(cpp, M_DEVBUF); 1329 ccdunlock(cs); 1330 return (error); 1331 } 1332 ++lookedup; 1333 } 1334 cs->sc_vpp = vpp; 1335 cs->sc_nccdisks = ccio->ccio_ndisks; 1336 1337 /* 1338 * Initialize the ccd. Fills in the softc for us. 1339 */ 1340 if ((error = ccdinit(cs, cpp, td)) != 0) { 1341 for (j = 0; j < lookedup; ++j) 1342 (void)vn_close(vpp[j], FREAD|FWRITE, 1343 td->td_proc->p_ucred, td); 1344 /* 1345 * We can't ccddestroy() cs just yet, because nothing 1346 * prevents user-level app to do another ioctl() 1347 * without closing the device first, therefore 1348 * declare unit null and void and let ccdclose() 1349 * destroy it when it is safe to do so. 1350 */ 1351 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1352 free(vpp, M_DEVBUF); 1353 free(cpp, M_DEVBUF); 1354 ccdunlock(cs); 1355 return (error); 1356 } 1357 1358 /* 1359 * The ccd has been successfully initialized, so 1360 * we can place it into the array and read the disklabel. 1361 */ 1362 ccio->ccio_unit = unit; 1363 ccio->ccio_size = cs->sc_size; 1364 ccdgetdisklabel(dev); 1365 1366 ccdunlock(cs); 1367 1368 break; 1369 1370 case CCDIOCCLR: 1371 if (!IS_INITED(cs)) 1372 return (ENXIO); 1373 1374 if ((flag & FWRITE) == 0) 1375 return (EBADF); 1376 1377 if ((error = ccdlock(cs)) != 0) 1378 return (error); 1379 1380 /* Don't unconfigure if any other partitions are open */ 1381 part = ccdpart(dev); 1382 pmask = (1 << part); 1383 if ((cs->sc_openmask & ~pmask)) { 1384 ccdunlock(cs); 1385 return (EBUSY); 1386 } 1387 1388 /* Declare unit null and void (reset all flags) */ 1389 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1390 1391 /* Close the components and free their pathnames. */ 1392 for (i = 0; i < cs->sc_nccdisks; ++i) { 1393 /* 1394 * XXX: this close could potentially fail and 1395 * cause Bad Things. Maybe we need to force 1396 * the close to happen? 1397 */ 1398 #ifdef DEBUG 1399 if (ccddebug & CCDB_VNODE) 1400 vprint("CCDIOCCLR: vnode info", 1401 cs->sc_cinfo[i].ci_vp); 1402 #endif 1403 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1404 td->td_proc->p_ucred, td); 1405 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1406 } 1407 1408 /* Free interleave index. */ 1409 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1410 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1411 1412 /* Free component info and interleave table. */ 1413 free(cs->sc_cinfo, M_DEVBUF); 1414 free(cs->sc_itable, M_DEVBUF); 1415 free(cs->sc_vpp, M_DEVBUF); 1416 1417 /* And remove the devstat entry. */ 1418 devstat_remove_entry(&cs->device_stats); 1419 1420 /* This must be atomic. */ 1421 s = splhigh(); 1422 ccdunlock(cs); 1423 splx(s); 1424 1425 break; 1426 1427 case CCDCONFINFO: 1428 { 1429 int ninit = 0; 1430 struct ccdconf *conf = (struct ccdconf *)data; 1431 struct ccd_s *tmpcs; 1432 struct ccd_s *ubuf = conf->buffer; 1433 1434 /* XXX: LOCK(unique unit numbers) */ 1435 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1436 if (IS_INITED(tmpcs)) 1437 ninit++; 1438 1439 if (conf->size == 0) { 1440 conf->size = sizeof(struct ccd_s) * ninit; 1441 break; 1442 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1443 (conf->size % sizeof(struct ccd_s) != 0)) { 1444 /* XXX: UNLOCK(unique unit numbers) */ 1445 return (EINVAL); 1446 } 1447 1448 ubuf += ninit; 1449 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1450 if (!IS_INITED(tmpcs)) 1451 continue; 1452 error = copyout(tmpcs, --ubuf, 1453 sizeof(struct ccd_s)); 1454 if (error != 0) 1455 /* XXX: UNLOCK(unique unit numbers) */ 1456 return (error); 1457 } 1458 /* XXX: UNLOCK(unique unit numbers) */ 1459 } 1460 break; 1461 1462 case CCDCPPINFO: 1463 if (!IS_INITED(cs)) 1464 return (ENXIO); 1465 1466 { 1467 int len = 0; 1468 struct ccdcpps *cpps = (struct ccdcpps *)data; 1469 char *ubuf = cpps->buffer; 1470 1471 1472 for (i = 0; i < cs->sc_nccdisks; ++i) 1473 len += cs->sc_cinfo[i].ci_pathlen; 1474 1475 if (cpps->size == 0) { 1476 cpps->size = len; 1477 break; 1478 } else if (cpps->size != len) { 1479 return (EINVAL); 1480 } 1481 1482 for (i = 0; i < cs->sc_nccdisks; ++i) { 1483 len = cs->sc_cinfo[i].ci_pathlen; 1484 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1485 len); 1486 if (error != 0) 1487 return (error); 1488 ubuf += len; 1489 } 1490 } 1491 break; 1492 1493 case DIOCGDINFO: 1494 if (!IS_INITED(cs)) 1495 return (ENXIO); 1496 1497 *(struct disklabel *)data = cs->sc_label; 1498 break; 1499 1500 case DIOCGPART: 1501 if (!IS_INITED(cs)) 1502 return (ENXIO); 1503 1504 ((struct partinfo *)data)->disklab = &cs->sc_label; 1505 ((struct partinfo *)data)->part = 1506 &cs->sc_label.d_partitions[ccdpart(dev)]; 1507 break; 1508 1509 case DIOCWDINFO: 1510 case DIOCSDINFO: 1511 if (!IS_INITED(cs)) 1512 return (ENXIO); 1513 1514 if ((flag & FWRITE) == 0) 1515 return (EBADF); 1516 1517 if ((error = ccdlock(cs)) != 0) 1518 return (error); 1519 1520 cs->sc_flags |= CCDF_LABELLING; 1521 1522 error = setdisklabel(&cs->sc_label, 1523 (struct disklabel *)data, 0); 1524 if (error == 0) { 1525 if (cmd == DIOCWDINFO) 1526 error = writedisklabel(CCDLABELDEV(dev), 1527 &cs->sc_label); 1528 } 1529 1530 cs->sc_flags &= ~CCDF_LABELLING; 1531 1532 ccdunlock(cs); 1533 1534 if (error) 1535 return (error); 1536 break; 1537 1538 case DIOCWLABEL: 1539 if (!IS_INITED(cs)) 1540 return (ENXIO); 1541 1542 if ((flag & FWRITE) == 0) 1543 return (EBADF); 1544 if (*(int *)data != 0) 1545 cs->sc_flags |= CCDF_WLABEL; 1546 else 1547 cs->sc_flags &= ~CCDF_WLABEL; 1548 break; 1549 1550 default: 1551 return (ENOTTY); 1552 } 1553 1554 return (0); 1555 } 1556 1557 static int 1558 ccdsize(dev_t dev) 1559 { 1560 struct ccd_s *cs; 1561 int part, size; 1562 1563 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1564 return (-1); 1565 1566 cs = ccdfind(ccdunit(dev)); 1567 part = ccdpart(dev); 1568 1569 if (!IS_INITED(cs)) 1570 return (-1); 1571 1572 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1573 size = -1; 1574 else 1575 size = cs->sc_label.d_partitions[part].p_size; 1576 1577 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1578 return (-1); 1579 1580 return (size); 1581 } 1582 1583 static int 1584 ccddump(dev_t dev) 1585 { 1586 1587 /* Not implemented. */ 1588 return ENXIO; 1589 } 1590 1591 /* 1592 * Lookup the provided name in the filesystem. If the file exists, 1593 * is a valid block device, and isn't being used by anyone else, 1594 * set *vpp to the file's vnode. 1595 */ 1596 static int 1597 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1598 { 1599 struct nameidata nd; 1600 struct vnode *vp; 1601 int error, flags; 1602 1603 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1604 flags = FREAD | FWRITE; 1605 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1606 #ifdef DEBUG 1607 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1608 printf("ccdlookup: vn_open error = %d\n", error); 1609 #endif 1610 return (error); 1611 } 1612 vp = nd.ni_vp; 1613 1614 if (vp->v_usecount > 1) { 1615 error = EBUSY; 1616 goto bad; 1617 } 1618 1619 if (!vn_isdisk(vp, &error)) 1620 goto bad; 1621 1622 #ifdef DEBUG 1623 if (ccddebug & CCDB_VNODE) 1624 vprint("ccdlookup: vnode info", vp); 1625 #endif 1626 1627 VOP_UNLOCK(vp, 0, td); 1628 NDFREE(&nd, NDF_ONLY_PNBUF); 1629 *vpp = vp; 1630 return (0); 1631 bad: 1632 VOP_UNLOCK(vp, 0, td); 1633 NDFREE(&nd, NDF_ONLY_PNBUF); 1634 /* vn_close does vrele() for vp */ 1635 (void)vn_close(vp, FREAD|FWRITE, td->td_proc->p_ucred, td); 1636 return (error); 1637 } 1638 1639 /* 1640 * Read the disklabel from the ccd. If one is not present, fake one 1641 * up. 1642 */ 1643 static void 1644 ccdgetdisklabel(dev_t dev) 1645 { 1646 int unit = ccdunit(dev); 1647 struct ccd_s *cs = ccdfind(unit); 1648 char *errstring; 1649 struct disklabel *lp = &cs->sc_label; 1650 struct ccdgeom *ccg = &cs->sc_geom; 1651 1652 bzero(lp, sizeof(*lp)); 1653 1654 lp->d_secperunit = cs->sc_size; 1655 lp->d_secsize = ccg->ccg_secsize; 1656 lp->d_nsectors = ccg->ccg_nsectors; 1657 lp->d_ntracks = ccg->ccg_ntracks; 1658 lp->d_ncylinders = ccg->ccg_ncylinders; 1659 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1660 1661 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1662 lp->d_type = DTYPE_CCD; 1663 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1664 lp->d_rpm = 3600; 1665 lp->d_interleave = 1; 1666 lp->d_flags = 0; 1667 1668 lp->d_partitions[RAW_PART].p_offset = 0; 1669 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1670 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1671 lp->d_npartitions = RAW_PART + 1; 1672 1673 lp->d_bbsize = BBSIZE; /* XXX */ 1674 lp->d_sbsize = SBSIZE; /* XXX */ 1675 1676 lp->d_magic = DISKMAGIC; 1677 lp->d_magic2 = DISKMAGIC; 1678 lp->d_checksum = dkcksum(&cs->sc_label); 1679 1680 /* 1681 * Call the generic disklabel extraction routine. 1682 */ 1683 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1684 if (errstring != NULL) 1685 ccdmakedisklabel(cs); 1686 1687 #ifdef DEBUG 1688 /* It's actually extremely common to have unlabeled ccds. */ 1689 if (ccddebug & CCDB_LABEL) 1690 if (errstring != NULL) 1691 printf("ccd%d: %s\n", unit, errstring); 1692 #endif 1693 } 1694 1695 /* 1696 * Take care of things one might want to take care of in the event 1697 * that a disklabel isn't present. 1698 */ 1699 static void 1700 ccdmakedisklabel(struct ccd_s *cs) 1701 { 1702 struct disklabel *lp = &cs->sc_label; 1703 1704 /* 1705 * For historical reasons, if there's no disklabel present 1706 * the raw partition must be marked FS_BSDFFS. 1707 */ 1708 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1709 1710 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1711 } 1712 1713 /* 1714 * Wait interruptibly for an exclusive lock. 1715 * 1716 * XXX 1717 * Several drivers do this; it should be abstracted and made MP-safe. 1718 */ 1719 static int 1720 ccdlock(struct ccd_s *cs) 1721 { 1722 int error; 1723 1724 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1725 cs->sc_flags |= CCDF_WANTED; 1726 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1727 return (error); 1728 } 1729 cs->sc_flags |= CCDF_LOCKED; 1730 return (0); 1731 } 1732 1733 /* 1734 * Unlock and wake up any waiters. 1735 */ 1736 static void 1737 ccdunlock(struct ccd_s *cs) 1738 { 1739 1740 cs->sc_flags &= ~CCDF_LOCKED; 1741 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1742 cs->sc_flags &= ~CCDF_WANTED; 1743 wakeup(cs); 1744 } 1745 } 1746 1747 #ifdef DEBUG 1748 static void 1749 printiinfo(struct ccdiinfo *ii) 1750 { 1751 int ix, i; 1752 1753 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1754 printf(" itab[%d]: #dk %d sblk %d soff %d", 1755 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1756 for (i = 0; i < ii->ii_ndisk; i++) 1757 printf(" %d", ii->ii_index[i]); 1758 printf("\n"); 1759 } 1760 } 1761 #endif 1762