1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disklabel.h> 102 #include <ufs/ffs/fs.h> 103 #include <sys/devicestat.h> 104 #include <sys/fcntl.h> 105 #include <sys/vnode.h> 106 107 #include <sys/ccdvar.h> 108 109 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #endif 125 126 #define ccdunit(x) dkunit(x) 127 #define ccdpart(x) dkpart(x) 128 129 /* 130 This is how mirroring works (only writes are special): 131 132 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 133 linked together by the cb_mirror field. "cb_pflags & 134 CCDPF_MIRROR_DONE" is set to 0 on both of them. 135 136 When a component returns to ccdiodone(), it checks if "cb_pflags & 137 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 138 flag and returns. If it is, it means its partner has already 139 returned, so it will go to the regular cleanup. 140 141 */ 142 143 struct ccdbuf { 144 struct bio cb_buf; /* new I/O buf */ 145 struct bio *cb_obp; /* ptr. to original I/O buf */ 146 struct ccdbuf *cb_freenext; /* free list link */ 147 int cb_unit; /* target unit */ 148 int cb_comp; /* target component */ 149 int cb_pflags; /* mirror/parity status flag */ 150 struct ccdbuf *cb_mirror; /* mirror counterpart */ 151 }; 152 153 /* bits in cb_pflags */ 154 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 155 156 #define CCDLABELDEV(dev) \ 157 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 158 159 /* convinient macros for often-used statements */ 160 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 161 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_dump_t ccddump; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ ccddump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct thread *); 206 static int ccdlookup(char *, struct thread *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 case MOD_SHUTDOWN: 376 break; 377 378 default: 379 error = EOPNOTSUPP; 380 } 381 return (error); 382 } 383 384 DEV_MODULE(ccd, ccd_modevent, NULL); 385 386 static int 387 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 388 { 389 struct ccdcinfo *ci = NULL; /* XXX */ 390 size_t size; 391 int ix; 392 struct vnode *vp; 393 size_t minsize; 394 int maxsecsize; 395 struct partinfo dpart; 396 struct ccdgeom *ccg = &cs->sc_geom; 397 char *tmppath = NULL; 398 int error = 0; 399 400 #ifdef DEBUG 401 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 402 printf("ccdinit: unit %d\n", cs->sc_unit); 403 #endif 404 405 cs->sc_size = 0; 406 407 /* Allocate space for the component info. */ 408 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 409 M_DEVBUF, M_WAITOK); 410 411 /* 412 * Verify that each component piece exists and record 413 * relevant information about it. 414 */ 415 maxsecsize = 0; 416 minsize = 0; 417 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); 418 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 419 vp = cs->sc_vpp[ix]; 420 ci = &cs->sc_cinfo[ix]; 421 ci->ci_vp = vp; 422 423 /* 424 * Copy in the pathname of the component. 425 */ 426 if ((error = copyinstr(cpaths[ix], tmppath, 427 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 428 #ifdef DEBUG 429 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 430 printf("ccd%d: can't copy path, error = %d\n", 431 cs->sc_unit, error); 432 #endif 433 goto fail; 434 } 435 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 436 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 437 438 ci->ci_dev = vn_todev(vp); 439 440 /* 441 * Get partition information for the component. 442 */ 443 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 444 FREAD, td->td_proc->p_ucred, td)) != 0) { 445 #ifdef DEBUG 446 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 447 printf("ccd%d: %s: ioctl failed, error = %d\n", 448 cs->sc_unit, ci->ci_path, error); 449 #endif 450 goto fail; 451 } 452 if (dpart.part->p_fstype == FS_BSDFFS) { 453 maxsecsize = 454 ((dpart.disklab->d_secsize > maxsecsize) ? 455 dpart.disklab->d_secsize : maxsecsize); 456 size = dpart.part->p_size - CCD_OFFSET; 457 } else { 458 #ifdef DEBUG 459 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 460 printf("ccd%d: %s: incorrect partition type\n", 461 cs->sc_unit, ci->ci_path); 462 #endif 463 error = EFTYPE; 464 goto fail; 465 } 466 467 /* 468 * Calculate the size, truncating to an interleave 469 * boundary if necessary. 470 */ 471 472 if (cs->sc_ileave > 1) 473 size -= size % cs->sc_ileave; 474 475 if (size == 0) { 476 #ifdef DEBUG 477 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 478 printf("ccd%d: %s: size == 0\n", 479 cs->sc_unit, ci->ci_path); 480 #endif 481 error = ENODEV; 482 goto fail; 483 } 484 485 if (minsize == 0 || size < minsize) 486 minsize = size; 487 ci->ci_size = size; 488 cs->sc_size += size; 489 } 490 491 free(tmppath, M_DEVBUF); 492 tmppath = NULL; 493 494 /* 495 * Don't allow the interleave to be smaller than 496 * the biggest component sector. 497 */ 498 if ((cs->sc_ileave > 0) && 499 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 500 #ifdef DEBUG 501 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 502 printf("ccd%d: interleave must be at least %d\n", 503 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 504 #endif 505 error = EINVAL; 506 goto fail; 507 } 508 509 /* 510 * If uniform interleave is desired set all sizes to that of 511 * the smallest component. This will guarentee that a single 512 * interleave table is generated. 513 * 514 * Lost space must be taken into account when calculating the 515 * overall size. Half the space is lost when CCDF_MIRROR is 516 * specified. One disk is lost when CCDF_PARITY is specified. 517 */ 518 if (cs->sc_flags & CCDF_UNIFORM) { 519 for (ci = cs->sc_cinfo; 520 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 521 ci->ci_size = minsize; 522 } 523 if (cs->sc_flags & CCDF_MIRROR) { 524 /* 525 * Check to see if an even number of components 526 * have been specified. The interleave must also 527 * be non-zero in order for us to be able to 528 * guarentee the topology. 529 */ 530 if (cs->sc_nccdisks % 2) { 531 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 532 error = EINVAL; 533 goto fail; 534 } 535 if (cs->sc_ileave == 0) { 536 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 537 error = EINVAL; 538 goto fail; 539 } 540 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 541 } else if (cs->sc_flags & CCDF_PARITY) { 542 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 543 } else { 544 if (cs->sc_ileave == 0) { 545 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 546 error = EINVAL; 547 goto fail; 548 } 549 cs->sc_size = cs->sc_nccdisks * minsize; 550 } 551 } 552 553 /* 554 * Construct the interleave table. 555 */ 556 ccdinterleave(cs, cs->sc_unit); 557 558 /* 559 * Create pseudo-geometry based on 1MB cylinders. It's 560 * pretty close. 561 */ 562 ccg->ccg_secsize = maxsecsize; 563 ccg->ccg_ntracks = 1; 564 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 565 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 566 567 /* 568 * Add an devstat entry for this device. 569 */ 570 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 571 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 572 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 573 DEVSTAT_PRIORITY_ARRAY); 574 575 cs->sc_flags |= CCDF_INITED; 576 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 577 return (0); 578 fail: 579 while (ci > cs->sc_cinfo) { 580 ci--; 581 free(ci->ci_path, M_DEVBUF); 582 } 583 if (tmppath != NULL) 584 free(tmppath, M_DEVBUF); 585 free(cs->sc_cinfo, M_DEVBUF); 586 return (error); 587 } 588 589 static void 590 ccdinterleave(struct ccd_s *cs, int unit) 591 { 592 struct ccdcinfo *ci, *smallci; 593 struct ccdiinfo *ii; 594 daddr_t bn, lbn; 595 int ix; 596 u_long size; 597 598 #ifdef DEBUG 599 if (ccddebug & CCDB_INIT) 600 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 601 #endif 602 603 /* 604 * Allocate an interleave table. The worst case occurs when each 605 * of N disks is of a different size, resulting in N interleave 606 * tables. 607 * 608 * Chances are this is too big, but we don't care. 609 */ 610 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 611 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 612 M_WAITOK | M_ZERO); 613 614 /* 615 * Trivial case: no interleave (actually interleave of disk size). 616 * Each table entry represents a single component in its entirety. 617 * 618 * An interleave of 0 may not be used with a mirror or parity setup. 619 */ 620 if (cs->sc_ileave == 0) { 621 bn = 0; 622 ii = cs->sc_itable; 623 624 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 625 /* Allocate space for ii_index. */ 626 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 627 ii->ii_ndisk = 1; 628 ii->ii_startblk = bn; 629 ii->ii_startoff = 0; 630 ii->ii_index[0] = ix; 631 bn += cs->sc_cinfo[ix].ci_size; 632 ii++; 633 } 634 ii->ii_ndisk = 0; 635 #ifdef DEBUG 636 if (ccddebug & CCDB_INIT) 637 printiinfo(cs->sc_itable); 638 #endif 639 return; 640 } 641 642 /* 643 * The following isn't fast or pretty; it doesn't have to be. 644 */ 645 size = 0; 646 bn = lbn = 0; 647 for (ii = cs->sc_itable; ; ii++) { 648 /* 649 * Allocate space for ii_index. We might allocate more then 650 * we use. 651 */ 652 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 653 M_DEVBUF, M_WAITOK); 654 655 /* 656 * Locate the smallest of the remaining components 657 */ 658 smallci = NULL; 659 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 660 ci++) { 661 if (ci->ci_size > size && 662 (smallci == NULL || 663 ci->ci_size < smallci->ci_size)) { 664 smallci = ci; 665 } 666 } 667 668 /* 669 * Nobody left, all done 670 */ 671 if (smallci == NULL) { 672 ii->ii_ndisk = 0; 673 break; 674 } 675 676 /* 677 * Record starting logical block using an sc_ileave blocksize. 678 */ 679 ii->ii_startblk = bn / cs->sc_ileave; 680 681 /* 682 * Record starting comopnent block using an sc_ileave 683 * blocksize. This value is relative to the beginning of 684 * a component disk. 685 */ 686 ii->ii_startoff = lbn; 687 688 /* 689 * Determine how many disks take part in this interleave 690 * and record their indices. 691 */ 692 ix = 0; 693 for (ci = cs->sc_cinfo; 694 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 695 if (ci->ci_size >= smallci->ci_size) { 696 ii->ii_index[ix++] = ci - cs->sc_cinfo; 697 } 698 } 699 ii->ii_ndisk = ix; 700 bn += ix * (smallci->ci_size - size); 701 lbn = smallci->ci_size / cs->sc_ileave; 702 size = smallci->ci_size; 703 } 704 #ifdef DEBUG 705 if (ccddebug & CCDB_INIT) 706 printiinfo(cs->sc_itable); 707 #endif 708 } 709 710 /* ARGSUSED */ 711 static int 712 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 713 { 714 int unit = ccdunit(dev); 715 struct ccd_s *cs; 716 struct disklabel *lp; 717 int error = 0, part, pmask; 718 719 #ifdef DEBUG 720 if (ccddebug & CCDB_FOLLOW) 721 printf("ccdopen(%p, %x)\n", dev, flags); 722 #endif 723 724 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 725 726 if ((error = ccdlock(cs)) != 0) 727 return (error); 728 729 lp = &cs->sc_label; 730 731 part = ccdpart(dev); 732 pmask = (1 << part); 733 734 /* 735 * If we're initialized, check to see if there are any other 736 * open partitions. If not, then it's safe to update 737 * the in-core disklabel. 738 */ 739 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 740 ccdgetdisklabel(dev); 741 742 /* Check that the partition exists. */ 743 if (part != RAW_PART && ((part >= lp->d_npartitions) || 744 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 745 error = ENXIO; 746 goto done; 747 } 748 749 cs->sc_openmask |= pmask; 750 done: 751 ccdunlock(cs); 752 return (0); 753 } 754 755 /* ARGSUSED */ 756 static int 757 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 758 { 759 int unit = ccdunit(dev); 760 struct ccd_s *cs; 761 int error = 0, part; 762 763 #ifdef DEBUG 764 if (ccddebug & CCDB_FOLLOW) 765 printf("ccdclose(%p, %x)\n", dev, flags); 766 #endif 767 768 if (!IS_ALLOCATED(unit)) 769 return (ENXIO); 770 cs = ccdfind(unit); 771 772 if ((error = ccdlock(cs)) != 0) 773 return (error); 774 775 part = ccdpart(dev); 776 777 /* ...that much closer to allowing unconfiguration... */ 778 cs->sc_openmask &= ~(1 << part); 779 /* collect "garbage" if possible */ 780 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 781 ccddestroy(cs, td->td_proc); 782 else 783 ccdunlock(cs); 784 return (0); 785 } 786 787 static void 788 ccdstrategy(struct bio *bp) 789 { 790 int unit = ccdunit(bp->bio_dev); 791 struct ccd_s *cs = ccdfind(unit); 792 int s; 793 int wlabel; 794 struct disklabel *lp; 795 796 #ifdef DEBUG 797 if (ccddebug & CCDB_FOLLOW) 798 printf("ccdstrategy(%p): unit %d\n", bp, unit); 799 #endif 800 if (!IS_INITED(cs)) { 801 biofinish(bp, NULL, ENXIO); 802 return; 803 } 804 805 /* If it's a nil transfer, wake up the top half now. */ 806 if (bp->bio_bcount == 0) { 807 biodone(bp); 808 return; 809 } 810 811 lp = &cs->sc_label; 812 813 /* 814 * Do bounds checking and adjust transfer. If there's an 815 * error, the bounds check will flag that for us. 816 */ 817 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 818 if (ccdpart(bp->bio_dev) != RAW_PART) { 819 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 820 biodone(bp); 821 return; 822 } 823 } else { 824 int pbn; /* in sc_secsize chunks */ 825 long sz; /* in sc_secsize chunks */ 826 827 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 828 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 829 830 /* 831 * If out of bounds return an error. If at the EOF point, 832 * simply read or write less. 833 */ 834 835 if (pbn < 0 || pbn >= cs->sc_size) { 836 bp->bio_resid = bp->bio_bcount; 837 if (pbn != cs->sc_size) 838 biofinish(bp, NULL, EINVAL); 839 else 840 biodone(bp); 841 return; 842 } 843 844 /* 845 * If the request crosses EOF, truncate the request. 846 */ 847 if (pbn + sz > cs->sc_size) { 848 bp->bio_bcount = (cs->sc_size - pbn) * 849 cs->sc_geom.ccg_secsize; 850 } 851 } 852 853 bp->bio_resid = bp->bio_bcount; 854 855 /* 856 * "Start" the unit. 857 */ 858 s = splbio(); 859 ccdstart(cs, bp); 860 splx(s); 861 return; 862 } 863 864 static void 865 ccdstart(struct ccd_s *cs, struct bio *bp) 866 { 867 long bcount, rcount; 868 struct ccdbuf *cbp[4]; 869 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 870 caddr_t addr; 871 daddr_t bn; 872 struct partition *pp; 873 874 #ifdef DEBUG 875 if (ccddebug & CCDB_FOLLOW) 876 printf("ccdstart(%p, %p)\n", cs, bp); 877 #endif 878 879 /* Record the transaction start */ 880 devstat_start_transaction(&cs->device_stats); 881 882 /* 883 * Translate the partition-relative block number to an absolute. 884 */ 885 bn = bp->bio_blkno; 886 if (ccdpart(bp->bio_dev) != RAW_PART) { 887 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 888 bn += pp->p_offset; 889 } 890 891 /* 892 * Allocate component buffers and fire off the requests 893 */ 894 addr = bp->bio_data; 895 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 896 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 897 rcount = cbp[0]->cb_buf.bio_bcount; 898 899 if (cs->sc_cflags & CCDF_MIRROR) { 900 /* 901 * Mirroring. Writes go to both disks, reads are 902 * taken from whichever disk seems most appropriate. 903 * 904 * We attempt to localize reads to the disk whos arm 905 * is nearest the read request. We ignore seeks due 906 * to writes when making this determination and we 907 * also try to avoid hogging. 908 */ 909 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 910 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 911 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 912 } else { 913 int pick = cs->sc_pick; 914 daddr_t range = cs->sc_size / 16; 915 916 if (bn < cs->sc_blk[pick] - range || 917 bn > cs->sc_blk[pick] + range 918 ) { 919 cs->sc_pick = pick = 1 - pick; 920 } 921 cs->sc_blk[pick] = bn + btodb(rcount); 922 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 923 } 924 } else { 925 /* 926 * Not mirroring 927 */ 928 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 929 } 930 bn += btodb(rcount); 931 addr += rcount; 932 } 933 } 934 935 /* 936 * Build a component buffer header. 937 */ 938 static void 939 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 940 { 941 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 942 struct ccdbuf *cbp; 943 daddr_t cbn, cboff; 944 off_t cbc; 945 946 #ifdef DEBUG 947 if (ccddebug & CCDB_IO) 948 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 949 cs, bp, bn, addr, bcount); 950 #endif 951 /* 952 * Determine which component bn falls in. 953 */ 954 cbn = bn; 955 cboff = 0; 956 957 if (cs->sc_ileave == 0) { 958 /* 959 * Serially concatenated and neither a mirror nor a parity 960 * config. This is a special case. 961 */ 962 daddr_t sblk; 963 964 sblk = 0; 965 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 966 sblk += ci->ci_size; 967 cbn -= sblk; 968 } else { 969 struct ccdiinfo *ii; 970 int ccdisk, off; 971 972 /* 973 * Calculate cbn, the logical superblock (sc_ileave chunks), 974 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 975 * to cbn. 976 */ 977 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 978 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 979 980 /* 981 * Figure out which interleave table to use. 982 */ 983 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 984 if (ii->ii_startblk > cbn) 985 break; 986 } 987 ii--; 988 989 /* 990 * off is the logical superblock relative to the beginning 991 * of this interleave block. 992 */ 993 off = cbn - ii->ii_startblk; 994 995 /* 996 * We must calculate which disk component to use (ccdisk), 997 * and recalculate cbn to be the superblock relative to 998 * the beginning of the component. This is typically done by 999 * adding 'off' and ii->ii_startoff together. However, 'off' 1000 * must typically be divided by the number of components in 1001 * this interleave array to be properly convert it from a 1002 * CCD-relative logical superblock number to a 1003 * component-relative superblock number. 1004 */ 1005 if (ii->ii_ndisk == 1) { 1006 /* 1007 * When we have just one disk, it can't be a mirror 1008 * or a parity config. 1009 */ 1010 ccdisk = ii->ii_index[0]; 1011 cbn = ii->ii_startoff + off; 1012 } else { 1013 if (cs->sc_cflags & CCDF_MIRROR) { 1014 /* 1015 * We have forced a uniform mapping, resulting 1016 * in a single interleave array. We double 1017 * up on the first half of the available 1018 * components and our mirror is in the second 1019 * half. This only works with a single 1020 * interleave array because doubling up 1021 * doubles the number of sectors, so there 1022 * cannot be another interleave array because 1023 * the next interleave array's calculations 1024 * would be off. 1025 */ 1026 int ndisk2 = ii->ii_ndisk / 2; 1027 ccdisk = ii->ii_index[off % ndisk2]; 1028 cbn = ii->ii_startoff + off / ndisk2; 1029 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1030 } else if (cs->sc_cflags & CCDF_PARITY) { 1031 /* 1032 * XXX not implemented yet 1033 */ 1034 int ndisk2 = ii->ii_ndisk - 1; 1035 ccdisk = ii->ii_index[off % ndisk2]; 1036 cbn = ii->ii_startoff + off / ndisk2; 1037 if (cbn % ii->ii_ndisk <= ccdisk) 1038 ccdisk++; 1039 } else { 1040 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1041 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1042 } 1043 } 1044 1045 ci = &cs->sc_cinfo[ccdisk]; 1046 1047 /* 1048 * Convert cbn from a superblock to a normal block so it 1049 * can be used to calculate (along with cboff) the normal 1050 * block index into this particular disk. 1051 */ 1052 cbn *= cs->sc_ileave; 1053 } 1054 1055 /* 1056 * Fill in the component buf structure. 1057 */ 1058 cbp = getccdbuf(NULL); 1059 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1060 cbp->cb_buf.bio_done = ccdiodone; 1061 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1062 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1063 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1064 cbp->cb_buf.bio_data = addr; 1065 if (cs->sc_ileave == 0) 1066 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1067 else 1068 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1069 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1070 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1071 1072 /* 1073 * context for ccdiodone 1074 */ 1075 cbp->cb_obp = bp; 1076 cbp->cb_unit = cs->sc_unit; 1077 cbp->cb_comp = ci - cs->sc_cinfo; 1078 1079 #ifdef DEBUG 1080 if (ccddebug & CCDB_IO) 1081 printf(" dev %p(u%ld): cbp %p bn %d addr %p bcnt %ld\n", 1082 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1083 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1084 cbp->cb_buf.bio_bcount); 1085 #endif 1086 cb[0] = cbp; 1087 1088 /* 1089 * Note: both I/O's setup when reading from mirror, but only one 1090 * will be executed. 1091 */ 1092 if (cs->sc_cflags & CCDF_MIRROR) { 1093 /* mirror, setup second I/O */ 1094 cbp = getccdbuf(cb[0]); 1095 cbp->cb_buf.bio_dev = ci2->ci_dev; 1096 cbp->cb_comp = ci2 - cs->sc_cinfo; 1097 cb[1] = cbp; 1098 /* link together the ccdbuf's and clear "mirror done" flag */ 1099 cb[0]->cb_mirror = cb[1]; 1100 cb[1]->cb_mirror = cb[0]; 1101 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1102 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1103 } 1104 } 1105 1106 static void 1107 ccdintr(struct ccd_s *cs, struct bio *bp) 1108 { 1109 #ifdef DEBUG 1110 if (ccddebug & CCDB_FOLLOW) 1111 printf("ccdintr(%p, %p)\n", cs, bp); 1112 #endif 1113 /* 1114 * Request is done for better or worse, wakeup the top half. 1115 */ 1116 if (bp->bio_flags & BIO_ERROR) 1117 bp->bio_resid = bp->bio_bcount; 1118 biofinish(bp, &cs->device_stats, 0); 1119 } 1120 1121 /* 1122 * Called at interrupt time. 1123 * Mark the component as done and if all components are done, 1124 * take a ccd interrupt. 1125 */ 1126 static void 1127 ccdiodone(struct bio *ibp) 1128 { 1129 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1130 struct bio *bp = cbp->cb_obp; 1131 int unit = cbp->cb_unit; 1132 int count, s; 1133 1134 s = splbio(); 1135 #ifdef DEBUG 1136 if (ccddebug & CCDB_FOLLOW) 1137 printf("ccdiodone(%p)\n", cbp); 1138 if (ccddebug & CCDB_IO) { 1139 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1140 bp, bp->bio_bcount, bp->bio_resid); 1141 printf(" dev %p(u%d), cbp %p bn %d addr %p bcnt %ld\n", 1142 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1143 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1144 cbp->cb_buf.bio_bcount); 1145 } 1146 #endif 1147 /* 1148 * If an error occured, report it. If this is a mirrored 1149 * configuration and the first of two possible reads, do not 1150 * set the error in the bp yet because the second read may 1151 * succeed. 1152 */ 1153 1154 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1155 const char *msg = ""; 1156 1157 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1158 (cbp->cb_buf.bio_cmd == BIO_READ) && 1159 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1160 /* 1161 * We will try our read on the other disk down 1162 * below, also reverse the default pick so if we 1163 * are doing a scan we do not keep hitting the 1164 * bad disk first. 1165 */ 1166 struct ccd_s *cs = ccdfind(unit); 1167 1168 msg = ", trying other disk"; 1169 cs->sc_pick = 1 - cs->sc_pick; 1170 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1171 } else { 1172 bp->bio_flags |= BIO_ERROR; 1173 bp->bio_error = cbp->cb_buf.bio_error ? 1174 cbp->cb_buf.bio_error : EIO; 1175 } 1176 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1177 unit, bp->bio_error, cbp->cb_comp, 1178 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1179 } 1180 1181 /* 1182 * Process mirror. If we are writing, I/O has been initiated on both 1183 * buffers and we fall through only after both are finished. 1184 * 1185 * If we are reading only one I/O is initiated at a time. If an 1186 * error occurs we initiate the second I/O and return, otherwise 1187 * we free the second I/O without initiating it. 1188 */ 1189 1190 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1191 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1192 /* 1193 * When writing, handshake with the second buffer 1194 * to determine when both are done. If both are not 1195 * done, return here. 1196 */ 1197 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1198 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1199 putccdbuf(cbp); 1200 splx(s); 1201 return; 1202 } 1203 } else { 1204 /* 1205 * When reading, either dispose of the second buffer 1206 * or initiate I/O on the second buffer if an error 1207 * occured with this one. 1208 */ 1209 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1210 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1211 cbp->cb_mirror->cb_pflags |= 1212 CCDPF_MIRROR_DONE; 1213 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1214 putccdbuf(cbp); 1215 splx(s); 1216 return; 1217 } else { 1218 putccdbuf(cbp->cb_mirror); 1219 /* fall through */ 1220 } 1221 } 1222 } 1223 } 1224 1225 /* 1226 * use bio_caller1 to determine how big the original request was rather 1227 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1228 * 1229 * XXX We check for an error, but we do not test the resid for an 1230 * aligned EOF condition. This may result in character & block 1231 * device access not recognizing EOF properly when read or written 1232 * sequentially, but will not effect filesystems. 1233 */ 1234 count = (long)cbp->cb_buf.bio_caller1; 1235 putccdbuf(cbp); 1236 1237 /* 1238 * If all done, "interrupt". 1239 */ 1240 bp->bio_resid -= count; 1241 if (bp->bio_resid < 0) 1242 panic("ccdiodone: count"); 1243 if (bp->bio_resid == 0) 1244 ccdintr(ccdfind(unit), bp); 1245 splx(s); 1246 } 1247 1248 static int 1249 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1250 { 1251 int unit = ccdunit(dev); 1252 int i, j, lookedup = 0, error = 0; 1253 int part, pmask, s; 1254 struct ccd_s *cs; 1255 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1256 char **cpp; 1257 struct vnode **vpp; 1258 1259 if (!IS_ALLOCATED(unit)) 1260 return (ENXIO); 1261 cs = ccdfind(unit); 1262 1263 switch (cmd) { 1264 case CCDIOCSET: 1265 if (IS_INITED(cs)) 1266 return (EBUSY); 1267 1268 if ((flag & FWRITE) == 0) 1269 return (EBADF); 1270 1271 if ((error = ccdlock(cs)) != 0) 1272 return (error); 1273 1274 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1275 return (EINVAL); 1276 1277 /* Fill in some important bits. */ 1278 cs->sc_ileave = ccio->ccio_ileave; 1279 if (cs->sc_ileave == 0 && 1280 ((ccio->ccio_flags & CCDF_MIRROR) || 1281 (ccio->ccio_flags & CCDF_PARITY))) { 1282 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1283 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1284 } 1285 if ((ccio->ccio_flags & CCDF_MIRROR) && 1286 (ccio->ccio_flags & CCDF_PARITY)) { 1287 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1288 ccio->ccio_flags &= ~CCDF_PARITY; 1289 } 1290 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1291 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1292 printf("ccd%d: mirror/parity forces uniform flag\n", 1293 unit); 1294 ccio->ccio_flags |= CCDF_UNIFORM; 1295 } 1296 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1297 1298 /* 1299 * Allocate space for and copy in the array of 1300 * componet pathnames and device numbers. 1301 */ 1302 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1303 M_DEVBUF, M_WAITOK); 1304 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1305 M_DEVBUF, M_WAITOK); 1306 1307 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1308 ccio->ccio_ndisks * sizeof(char **)); 1309 if (error) { 1310 free(vpp, M_DEVBUF); 1311 free(cpp, M_DEVBUF); 1312 ccdunlock(cs); 1313 return (error); 1314 } 1315 1316 #ifdef DEBUG 1317 if (ccddebug & CCDB_INIT) 1318 for (i = 0; i < ccio->ccio_ndisks; ++i) 1319 printf("ccdioctl: component %d: %p\n", 1320 i, cpp[i]); 1321 #endif 1322 1323 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1324 #ifdef DEBUG 1325 if (ccddebug & CCDB_INIT) 1326 printf("ccdioctl: lookedup = %d\n", lookedup); 1327 #endif 1328 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1329 for (j = 0; j < lookedup; ++j) 1330 (void)vn_close(vpp[j], FREAD|FWRITE, 1331 td->td_proc->p_ucred, td); 1332 free(vpp, M_DEVBUF); 1333 free(cpp, M_DEVBUF); 1334 ccdunlock(cs); 1335 return (error); 1336 } 1337 ++lookedup; 1338 } 1339 cs->sc_vpp = vpp; 1340 cs->sc_nccdisks = ccio->ccio_ndisks; 1341 1342 /* 1343 * Initialize the ccd. Fills in the softc for us. 1344 */ 1345 if ((error = ccdinit(cs, cpp, td)) != 0) { 1346 for (j = 0; j < lookedup; ++j) 1347 (void)vn_close(vpp[j], FREAD|FWRITE, 1348 td->td_proc->p_ucred, td); 1349 /* 1350 * We can't ccddestroy() cs just yet, because nothing 1351 * prevents user-level app to do another ioctl() 1352 * without closing the device first, therefore 1353 * declare unit null and void and let ccdclose() 1354 * destroy it when it is safe to do so. 1355 */ 1356 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1357 free(vpp, M_DEVBUF); 1358 free(cpp, M_DEVBUF); 1359 ccdunlock(cs); 1360 return (error); 1361 } 1362 1363 /* 1364 * The ccd has been successfully initialized, so 1365 * we can place it into the array and read the disklabel. 1366 */ 1367 ccio->ccio_unit = unit; 1368 ccio->ccio_size = cs->sc_size; 1369 ccdgetdisklabel(dev); 1370 1371 ccdunlock(cs); 1372 1373 break; 1374 1375 case CCDIOCCLR: 1376 if (!IS_INITED(cs)) 1377 return (ENXIO); 1378 1379 if ((flag & FWRITE) == 0) 1380 return (EBADF); 1381 1382 if ((error = ccdlock(cs)) != 0) 1383 return (error); 1384 1385 /* Don't unconfigure if any other partitions are open */ 1386 part = ccdpart(dev); 1387 pmask = (1 << part); 1388 if ((cs->sc_openmask & ~pmask)) { 1389 ccdunlock(cs); 1390 return (EBUSY); 1391 } 1392 1393 /* Declare unit null and void (reset all flags) */ 1394 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1395 1396 /* Close the components and free their pathnames. */ 1397 for (i = 0; i < cs->sc_nccdisks; ++i) { 1398 /* 1399 * XXX: this close could potentially fail and 1400 * cause Bad Things. Maybe we need to force 1401 * the close to happen? 1402 */ 1403 #ifdef DEBUG 1404 if (ccddebug & CCDB_VNODE) 1405 vprint("CCDIOCCLR: vnode info", 1406 cs->sc_cinfo[i].ci_vp); 1407 #endif 1408 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1409 td->td_proc->p_ucred, td); 1410 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1411 } 1412 1413 /* Free interleave index. */ 1414 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1415 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1416 1417 /* Free component info and interleave table. */ 1418 free(cs->sc_cinfo, M_DEVBUF); 1419 free(cs->sc_itable, M_DEVBUF); 1420 free(cs->sc_vpp, M_DEVBUF); 1421 1422 /* And remove the devstat entry. */ 1423 devstat_remove_entry(&cs->device_stats); 1424 1425 /* This must be atomic. */ 1426 s = splhigh(); 1427 ccdunlock(cs); 1428 splx(s); 1429 1430 break; 1431 1432 case CCDCONFINFO: 1433 { 1434 int ninit = 0; 1435 struct ccdconf *conf = (struct ccdconf *)data; 1436 struct ccd_s *tmpcs; 1437 struct ccd_s *ubuf = conf->buffer; 1438 1439 /* XXX: LOCK(unique unit numbers) */ 1440 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1441 if (IS_INITED(tmpcs)) 1442 ninit++; 1443 1444 if (conf->size == 0) { 1445 conf->size = sizeof(struct ccd_s) * ninit; 1446 break; 1447 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1448 (conf->size % sizeof(struct ccd_s) != 0)) { 1449 /* XXX: UNLOCK(unique unit numbers) */ 1450 return (EINVAL); 1451 } 1452 1453 ubuf += ninit; 1454 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1455 if (!IS_INITED(tmpcs)) 1456 continue; 1457 error = copyout(tmpcs, --ubuf, 1458 sizeof(struct ccd_s)); 1459 if (error != 0) 1460 /* XXX: UNLOCK(unique unit numbers) */ 1461 return (error); 1462 } 1463 /* XXX: UNLOCK(unique unit numbers) */ 1464 } 1465 break; 1466 1467 case CCDCPPINFO: 1468 if (!IS_INITED(cs)) 1469 return (ENXIO); 1470 1471 { 1472 int len = 0; 1473 struct ccdcpps *cpps = (struct ccdcpps *)data; 1474 char *ubuf = cpps->buffer; 1475 1476 1477 for (i = 0; i < cs->sc_nccdisks; ++i) 1478 len += cs->sc_cinfo[i].ci_pathlen; 1479 1480 if (cpps->size == 0) { 1481 cpps->size = len; 1482 break; 1483 } else if (cpps->size != len) { 1484 return (EINVAL); 1485 } 1486 1487 for (i = 0; i < cs->sc_nccdisks; ++i) { 1488 len = cs->sc_cinfo[i].ci_pathlen; 1489 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1490 len); 1491 if (error != 0) 1492 return (error); 1493 ubuf += len; 1494 } 1495 } 1496 break; 1497 1498 case DIOCGDINFO: 1499 if (!IS_INITED(cs)) 1500 return (ENXIO); 1501 1502 *(struct disklabel *)data = cs->sc_label; 1503 break; 1504 1505 case DIOCGPART: 1506 if (!IS_INITED(cs)) 1507 return (ENXIO); 1508 1509 ((struct partinfo *)data)->disklab = &cs->sc_label; 1510 ((struct partinfo *)data)->part = 1511 &cs->sc_label.d_partitions[ccdpart(dev)]; 1512 break; 1513 1514 case DIOCWDINFO: 1515 case DIOCSDINFO: 1516 if (!IS_INITED(cs)) 1517 return (ENXIO); 1518 1519 if ((flag & FWRITE) == 0) 1520 return (EBADF); 1521 1522 if ((error = ccdlock(cs)) != 0) 1523 return (error); 1524 1525 cs->sc_flags |= CCDF_LABELLING; 1526 1527 error = setdisklabel(&cs->sc_label, 1528 (struct disklabel *)data, 0); 1529 if (error == 0) { 1530 if (cmd == DIOCWDINFO) 1531 error = writedisklabel(CCDLABELDEV(dev), 1532 &cs->sc_label); 1533 } 1534 1535 cs->sc_flags &= ~CCDF_LABELLING; 1536 1537 ccdunlock(cs); 1538 1539 if (error) 1540 return (error); 1541 break; 1542 1543 case DIOCWLABEL: 1544 if (!IS_INITED(cs)) 1545 return (ENXIO); 1546 1547 if ((flag & FWRITE) == 0) 1548 return (EBADF); 1549 if (*(int *)data != 0) 1550 cs->sc_flags |= CCDF_WLABEL; 1551 else 1552 cs->sc_flags &= ~CCDF_WLABEL; 1553 break; 1554 1555 default: 1556 return (ENOTTY); 1557 } 1558 1559 return (0); 1560 } 1561 1562 static int 1563 ccdsize(dev_t dev) 1564 { 1565 struct ccd_s *cs; 1566 int part, size; 1567 1568 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1569 return (-1); 1570 1571 cs = ccdfind(ccdunit(dev)); 1572 part = ccdpart(dev); 1573 1574 if (!IS_INITED(cs)) 1575 return (-1); 1576 1577 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1578 size = -1; 1579 else 1580 size = cs->sc_label.d_partitions[part].p_size; 1581 1582 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1583 return (-1); 1584 1585 return (size); 1586 } 1587 1588 static int 1589 ccddump(dev_t dev) 1590 { 1591 1592 /* Not implemented. */ 1593 return ENXIO; 1594 } 1595 1596 /* 1597 * Lookup the provided name in the filesystem. If the file exists, 1598 * is a valid block device, and isn't being used by anyone else, 1599 * set *vpp to the file's vnode. 1600 */ 1601 static int 1602 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1603 { 1604 struct nameidata nd; 1605 struct vnode *vp; 1606 int error, flags; 1607 1608 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1609 flags = FREAD | FWRITE; 1610 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1611 #ifdef DEBUG 1612 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1613 printf("ccdlookup: vn_open error = %d\n", error); 1614 #endif 1615 return (error); 1616 } 1617 vp = nd.ni_vp; 1618 1619 if (vp->v_usecount > 1) { 1620 error = EBUSY; 1621 goto bad; 1622 } 1623 1624 if (!vn_isdisk(vp, &error)) 1625 goto bad; 1626 1627 #ifdef DEBUG 1628 if (ccddebug & CCDB_VNODE) 1629 vprint("ccdlookup: vnode info", vp); 1630 #endif 1631 1632 VOP_UNLOCK(vp, 0, td); 1633 NDFREE(&nd, NDF_ONLY_PNBUF); 1634 *vpp = vp; 1635 return (0); 1636 bad: 1637 VOP_UNLOCK(vp, 0, td); 1638 NDFREE(&nd, NDF_ONLY_PNBUF); 1639 /* vn_close does vrele() for vp */ 1640 (void)vn_close(vp, FREAD|FWRITE, td->td_proc->p_ucred, td); 1641 return (error); 1642 } 1643 1644 /* 1645 * Read the disklabel from the ccd. If one is not present, fake one 1646 * up. 1647 */ 1648 static void 1649 ccdgetdisklabel(dev_t dev) 1650 { 1651 int unit = ccdunit(dev); 1652 struct ccd_s *cs = ccdfind(unit); 1653 char *errstring; 1654 struct disklabel *lp = &cs->sc_label; 1655 struct ccdgeom *ccg = &cs->sc_geom; 1656 1657 bzero(lp, sizeof(*lp)); 1658 1659 lp->d_secperunit = cs->sc_size; 1660 lp->d_secsize = ccg->ccg_secsize; 1661 lp->d_nsectors = ccg->ccg_nsectors; 1662 lp->d_ntracks = ccg->ccg_ntracks; 1663 lp->d_ncylinders = ccg->ccg_ncylinders; 1664 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1665 1666 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1667 lp->d_type = DTYPE_CCD; 1668 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1669 lp->d_rpm = 3600; 1670 lp->d_interleave = 1; 1671 lp->d_flags = 0; 1672 1673 lp->d_partitions[RAW_PART].p_offset = 0; 1674 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1675 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1676 lp->d_npartitions = RAW_PART + 1; 1677 1678 lp->d_bbsize = BBSIZE; /* XXX */ 1679 lp->d_sbsize = SBSIZE; /* XXX */ 1680 1681 lp->d_magic = DISKMAGIC; 1682 lp->d_magic2 = DISKMAGIC; 1683 lp->d_checksum = dkcksum(&cs->sc_label); 1684 1685 /* 1686 * Call the generic disklabel extraction routine. 1687 */ 1688 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1689 if (errstring != NULL) 1690 ccdmakedisklabel(cs); 1691 1692 #ifdef DEBUG 1693 /* It's actually extremely common to have unlabeled ccds. */ 1694 if (ccddebug & CCDB_LABEL) 1695 if (errstring != NULL) 1696 printf("ccd%d: %s\n", unit, errstring); 1697 #endif 1698 } 1699 1700 /* 1701 * Take care of things one might want to take care of in the event 1702 * that a disklabel isn't present. 1703 */ 1704 static void 1705 ccdmakedisklabel(struct ccd_s *cs) 1706 { 1707 struct disklabel *lp = &cs->sc_label; 1708 1709 /* 1710 * For historical reasons, if there's no disklabel present 1711 * the raw partition must be marked FS_BSDFFS. 1712 */ 1713 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1714 1715 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1716 } 1717 1718 /* 1719 * Wait interruptibly for an exclusive lock. 1720 * 1721 * XXX 1722 * Several drivers do this; it should be abstracted and made MP-safe. 1723 */ 1724 static int 1725 ccdlock(struct ccd_s *cs) 1726 { 1727 int error; 1728 1729 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1730 cs->sc_flags |= CCDF_WANTED; 1731 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1732 return (error); 1733 } 1734 cs->sc_flags |= CCDF_LOCKED; 1735 return (0); 1736 } 1737 1738 /* 1739 * Unlock and wake up any waiters. 1740 */ 1741 static void 1742 ccdunlock(struct ccd_s *cs) 1743 { 1744 1745 cs->sc_flags &= ~CCDF_LOCKED; 1746 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1747 cs->sc_flags &= ~CCDF_WANTED; 1748 wakeup(cs); 1749 } 1750 } 1751 1752 #ifdef DEBUG 1753 static void 1754 printiinfo(struct ccdiinfo *ii) 1755 { 1756 int ix, i; 1757 1758 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1759 printf(" itab[%d]: #dk %d sblk %d soff %d", 1760 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1761 for (i = 0; i < ii->ii_ndisk; i++) 1762 printf(" %d", ii->ii_index[i]); 1763 printf("\n"); 1764 } 1765 } 1766 #endif 1767