1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/stdint.h> 101 #include <sys/sysctl.h> 102 #include <sys/disk.h> 103 #include <sys/disklabel.h> 104 #include <sys/devicestat.h> 105 #include <sys/fcntl.h> 106 #include <sys/vnode.h> 107 108 #include <sys/ccdvar.h> 109 110 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 111 112 #if defined(CCDDEBUG) && !defined(DEBUG) 113 #define DEBUG 114 #endif 115 116 #ifdef DEBUG 117 #define CCDB_FOLLOW 0x01 118 #define CCDB_INIT 0x02 119 #define CCDB_IO 0x04 120 #define CCDB_LABEL 0x08 121 #define CCDB_VNODE 0x10 122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 123 CCDB_VNODE; 124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 125 #endif 126 127 #define ccdunit(x) dkunit(x) 128 #define ccdpart(x) dkpart(x) 129 130 /* 131 This is how mirroring works (only writes are special): 132 133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 134 linked together by the cb_mirror field. "cb_pflags & 135 CCDPF_MIRROR_DONE" is set to 0 on both of them. 136 137 When a component returns to ccdiodone(), it checks if "cb_pflags & 138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 139 flag and returns. If it is, it means its partner has already 140 returned, so it will go to the regular cleanup. 141 142 */ 143 144 struct ccdbuf { 145 struct bio cb_buf; /* new I/O buf */ 146 struct bio *cb_obp; /* ptr. to original I/O buf */ 147 struct ccdbuf *cb_freenext; /* free list link */ 148 int cb_unit; /* target unit */ 149 int cb_comp; /* target component */ 150 int cb_pflags; /* mirror/parity status flag */ 151 struct ccdbuf *cb_mirror; /* mirror counterpart */ 152 }; 153 154 /* bits in cb_pflags */ 155 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 156 157 #define CCDLABELDEV(dev) \ 158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 159 160 /* convinient macros for often-used statements */ 161 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 162 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 163 164 static d_open_t ccdopen; 165 static d_close_t ccdclose; 166 static d_strategy_t ccdstrategy; 167 static d_ioctl_t ccdioctl; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ nodump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct thread *); 206 static int ccdlookup(char *, struct thread *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 case MOD_SHUTDOWN: 376 break; 377 378 default: 379 error = EOPNOTSUPP; 380 } 381 return (error); 382 } 383 384 DEV_MODULE(ccd, ccd_modevent, NULL); 385 386 static int 387 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 388 { 389 struct ccdcinfo *ci = NULL; /* XXX */ 390 size_t size; 391 int ix; 392 struct vnode *vp; 393 size_t minsize; 394 int maxsecsize; 395 struct ccdgeom *ccg = &cs->sc_geom; 396 char *tmppath = NULL; 397 int error = 0; 398 off_t mediasize; 399 u_int sectorsize; 400 401 #ifdef DEBUG 402 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 403 printf("ccdinit: unit %d\n", cs->sc_unit); 404 #endif 405 406 cs->sc_size = 0; 407 408 /* Allocate space for the component info. */ 409 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 410 M_DEVBUF, M_WAITOK); 411 412 /* 413 * Verify that each component piece exists and record 414 * relevant information about it. 415 */ 416 maxsecsize = 0; 417 minsize = 0; 418 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); 419 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 420 vp = cs->sc_vpp[ix]; 421 ci = &cs->sc_cinfo[ix]; 422 ci->ci_vp = vp; 423 424 /* 425 * Copy in the pathname of the component. 426 */ 427 if ((error = copyinstr(cpaths[ix], tmppath, 428 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 printf("ccd%d: can't copy path, error = %d\n", 432 cs->sc_unit, error); 433 #endif 434 goto fail; 435 } 436 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 438 439 ci->ci_dev = vn_todev(vp); 440 441 /* 442 * Get partition information for the component. 443 */ 444 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 445 FREAD, td->td_ucred, td); 446 if (error != 0) { 447 #ifdef DEBUG 448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 449 printf("ccd%d: %s: ioctl failed, error = %d\n", 450 cs->sc_unit, ci->ci_path, error); 451 #endif 452 goto fail; 453 } 454 /* 455 * Get partition information for the component. 456 */ 457 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 458 FREAD, td->td_ucred, td); 459 if (error != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 printf("ccd%d: %s: ioctl failed, error = %d\n", 463 cs->sc_unit, ci->ci_path, error); 464 #endif 465 goto fail; 466 } 467 if (sectorsize > maxsecsize) 468 maxsecsize = sectorsize; 469 size = mediasize / DEV_BSIZE - CCD_OFFSET; 470 471 /* 472 * Calculate the size, truncating to an interleave 473 * boundary if necessary. 474 */ 475 476 if (cs->sc_ileave > 1) 477 size -= size % cs->sc_ileave; 478 479 if (size == 0) { 480 #ifdef DEBUG 481 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 482 printf("ccd%d: %s: size == 0\n", 483 cs->sc_unit, ci->ci_path); 484 #endif 485 error = ENODEV; 486 goto fail; 487 } 488 489 if (minsize == 0 || size < minsize) 490 minsize = size; 491 ci->ci_size = size; 492 cs->sc_size += size; 493 } 494 495 free(tmppath, M_DEVBUF); 496 tmppath = NULL; 497 498 /* 499 * Don't allow the interleave to be smaller than 500 * the biggest component sector. 501 */ 502 if ((cs->sc_ileave > 0) && 503 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 504 #ifdef DEBUG 505 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 506 printf("ccd%d: interleave must be at least %d\n", 507 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 508 #endif 509 error = EINVAL; 510 goto fail; 511 } 512 513 /* 514 * If uniform interleave is desired set all sizes to that of 515 * the smallest component. This will guarentee that a single 516 * interleave table is generated. 517 * 518 * Lost space must be taken into account when calculating the 519 * overall size. Half the space is lost when CCDF_MIRROR is 520 * specified. One disk is lost when CCDF_PARITY is specified. 521 */ 522 if (cs->sc_flags & CCDF_UNIFORM) { 523 for (ci = cs->sc_cinfo; 524 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 525 ci->ci_size = minsize; 526 } 527 if (cs->sc_flags & CCDF_MIRROR) { 528 /* 529 * Check to see if an even number of components 530 * have been specified. The interleave must also 531 * be non-zero in order for us to be able to 532 * guarentee the topology. 533 */ 534 if (cs->sc_nccdisks % 2) { 535 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 536 error = EINVAL; 537 goto fail; 538 } 539 if (cs->sc_ileave == 0) { 540 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 541 error = EINVAL; 542 goto fail; 543 } 544 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 545 } else { 546 if (cs->sc_ileave == 0) { 547 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 548 error = EINVAL; 549 goto fail; 550 } 551 cs->sc_size = cs->sc_nccdisks * minsize; 552 } 553 } 554 555 /* 556 * Construct the interleave table. 557 */ 558 ccdinterleave(cs, cs->sc_unit); 559 560 /* 561 * Create pseudo-geometry based on 1MB cylinders. It's 562 * pretty close. 563 */ 564 ccg->ccg_secsize = maxsecsize; 565 ccg->ccg_ntracks = 1; 566 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 567 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 568 569 /* 570 * Add a devstat entry for this device. 571 */ 572 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 573 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 574 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 575 DEVSTAT_PRIORITY_ARRAY); 576 577 cs->sc_flags |= CCDF_INITED; 578 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 579 return (0); 580 fail: 581 while (ci > cs->sc_cinfo) { 582 ci--; 583 free(ci->ci_path, M_DEVBUF); 584 } 585 if (tmppath != NULL) 586 free(tmppath, M_DEVBUF); 587 free(cs->sc_cinfo, M_DEVBUF); 588 return (error); 589 } 590 591 static void 592 ccdinterleave(struct ccd_s *cs, int unit) 593 { 594 struct ccdcinfo *ci, *smallci; 595 struct ccdiinfo *ii; 596 daddr_t bn, lbn; 597 int ix; 598 u_long size; 599 600 #ifdef DEBUG 601 if (ccddebug & CCDB_INIT) 602 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 603 #endif 604 605 /* 606 * Allocate an interleave table. The worst case occurs when each 607 * of N disks is of a different size, resulting in N interleave 608 * tables. 609 * 610 * Chances are this is too big, but we don't care. 611 */ 612 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 613 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 614 M_WAITOK | M_ZERO); 615 616 /* 617 * Trivial case: no interleave (actually interleave of disk size). 618 * Each table entry represents a single component in its entirety. 619 * 620 * An interleave of 0 may not be used with a mirror or parity setup. 621 */ 622 if (cs->sc_ileave == 0) { 623 bn = 0; 624 ii = cs->sc_itable; 625 626 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 627 /* Allocate space for ii_index. */ 628 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 629 ii->ii_ndisk = 1; 630 ii->ii_startblk = bn; 631 ii->ii_startoff = 0; 632 ii->ii_index[0] = ix; 633 bn += cs->sc_cinfo[ix].ci_size; 634 ii++; 635 } 636 ii->ii_ndisk = 0; 637 #ifdef DEBUG 638 if (ccddebug & CCDB_INIT) 639 printiinfo(cs->sc_itable); 640 #endif 641 return; 642 } 643 644 /* 645 * The following isn't fast or pretty; it doesn't have to be. 646 */ 647 size = 0; 648 bn = lbn = 0; 649 for (ii = cs->sc_itable; ; ii++) { 650 /* 651 * Allocate space for ii_index. We might allocate more then 652 * we use. 653 */ 654 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 655 M_DEVBUF, M_WAITOK); 656 657 /* 658 * Locate the smallest of the remaining components 659 */ 660 smallci = NULL; 661 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 662 ci++) { 663 if (ci->ci_size > size && 664 (smallci == NULL || 665 ci->ci_size < smallci->ci_size)) { 666 smallci = ci; 667 } 668 } 669 670 /* 671 * Nobody left, all done 672 */ 673 if (smallci == NULL) { 674 ii->ii_ndisk = 0; 675 break; 676 } 677 678 /* 679 * Record starting logical block using an sc_ileave blocksize. 680 */ 681 ii->ii_startblk = bn / cs->sc_ileave; 682 683 /* 684 * Record starting comopnent block using an sc_ileave 685 * blocksize. This value is relative to the beginning of 686 * a component disk. 687 */ 688 ii->ii_startoff = lbn; 689 690 /* 691 * Determine how many disks take part in this interleave 692 * and record their indices. 693 */ 694 ix = 0; 695 for (ci = cs->sc_cinfo; 696 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 697 if (ci->ci_size >= smallci->ci_size) { 698 ii->ii_index[ix++] = ci - cs->sc_cinfo; 699 } 700 } 701 ii->ii_ndisk = ix; 702 bn += ix * (smallci->ci_size - size); 703 lbn = smallci->ci_size / cs->sc_ileave; 704 size = smallci->ci_size; 705 } 706 #ifdef DEBUG 707 if (ccddebug & CCDB_INIT) 708 printiinfo(cs->sc_itable); 709 #endif 710 } 711 712 /* ARGSUSED */ 713 static int 714 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 715 { 716 int unit = ccdunit(dev); 717 struct ccd_s *cs; 718 struct disklabel *lp; 719 int error = 0, part, pmask; 720 721 #ifdef DEBUG 722 if (ccddebug & CCDB_FOLLOW) 723 printf("ccdopen(%p, %x)\n", dev, flags); 724 #endif 725 726 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 727 728 if ((error = ccdlock(cs)) != 0) 729 return (error); 730 731 lp = &cs->sc_label; 732 733 part = ccdpart(dev); 734 pmask = (1 << part); 735 736 /* 737 * If we're initialized, check to see if there are any other 738 * open partitions. If not, then it's safe to update 739 * the in-core disklabel. 740 */ 741 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 742 ccdgetdisklabel(dev); 743 744 /* Check that the partition exists. */ 745 if (part != RAW_PART && ((part >= lp->d_npartitions) || 746 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 747 error = ENXIO; 748 goto done; 749 } 750 751 cs->sc_openmask |= pmask; 752 done: 753 ccdunlock(cs); 754 return (0); 755 } 756 757 /* ARGSUSED */ 758 static int 759 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 760 { 761 int unit = ccdunit(dev); 762 struct ccd_s *cs; 763 int error = 0, part; 764 765 #ifdef DEBUG 766 if (ccddebug & CCDB_FOLLOW) 767 printf("ccdclose(%p, %x)\n", dev, flags); 768 #endif 769 770 if (!IS_ALLOCATED(unit)) 771 return (ENXIO); 772 cs = ccdfind(unit); 773 774 if ((error = ccdlock(cs)) != 0) 775 return (error); 776 777 part = ccdpart(dev); 778 779 /* ...that much closer to allowing unconfiguration... */ 780 cs->sc_openmask &= ~(1 << part); 781 /* collect "garbage" if possible */ 782 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 783 ccddestroy(cs, td->td_proc); 784 else 785 ccdunlock(cs); 786 return (0); 787 } 788 789 static void 790 ccdstrategy(struct bio *bp) 791 { 792 int unit = ccdunit(bp->bio_dev); 793 struct ccd_s *cs = ccdfind(unit); 794 int s; 795 int wlabel; 796 struct disklabel *lp; 797 798 #ifdef DEBUG 799 if (ccddebug & CCDB_FOLLOW) 800 printf("ccdstrategy(%p): unit %d\n", bp, unit); 801 #endif 802 if (!IS_INITED(cs)) { 803 biofinish(bp, NULL, ENXIO); 804 return; 805 } 806 807 /* If it's a nil transfer, wake up the top half now. */ 808 if (bp->bio_bcount == 0) { 809 biodone(bp); 810 return; 811 } 812 813 lp = &cs->sc_label; 814 815 /* 816 * Do bounds checking and adjust transfer. If there's an 817 * error, the bounds check will flag that for us. 818 */ 819 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 820 if (ccdpart(bp->bio_dev) != RAW_PART) { 821 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 822 biodone(bp); 823 return; 824 } 825 } else { 826 int pbn; /* in sc_secsize chunks */ 827 long sz; /* in sc_secsize chunks */ 828 829 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 830 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 831 832 /* 833 * If out of bounds return an error. If at the EOF point, 834 * simply read or write less. 835 */ 836 837 if (pbn < 0 || pbn >= cs->sc_size) { 838 bp->bio_resid = bp->bio_bcount; 839 if (pbn != cs->sc_size) 840 biofinish(bp, NULL, EINVAL); 841 else 842 biodone(bp); 843 return; 844 } 845 846 /* 847 * If the request crosses EOF, truncate the request. 848 */ 849 if (pbn + sz > cs->sc_size) { 850 bp->bio_bcount = (cs->sc_size - pbn) * 851 cs->sc_geom.ccg_secsize; 852 } 853 } 854 855 bp->bio_resid = bp->bio_bcount; 856 857 /* 858 * "Start" the unit. 859 */ 860 s = splbio(); 861 ccdstart(cs, bp); 862 splx(s); 863 return; 864 } 865 866 static void 867 ccdstart(struct ccd_s *cs, struct bio *bp) 868 { 869 long bcount, rcount; 870 struct ccdbuf *cbp[4]; 871 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 872 caddr_t addr; 873 daddr_t bn; 874 struct partition *pp; 875 876 #ifdef DEBUG 877 if (ccddebug & CCDB_FOLLOW) 878 printf("ccdstart(%p, %p)\n", cs, bp); 879 #endif 880 881 /* Record the transaction start */ 882 devstat_start_transaction(&cs->device_stats); 883 884 /* 885 * Translate the partition-relative block number to an absolute. 886 */ 887 bn = bp->bio_blkno; 888 if (ccdpart(bp->bio_dev) != RAW_PART) { 889 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 890 bn += pp->p_offset; 891 } 892 893 /* 894 * Allocate component buffers and fire off the requests 895 */ 896 addr = bp->bio_data; 897 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 898 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 899 rcount = cbp[0]->cb_buf.bio_bcount; 900 901 if (cs->sc_cflags & CCDF_MIRROR) { 902 /* 903 * Mirroring. Writes go to both disks, reads are 904 * taken from whichever disk seems most appropriate. 905 * 906 * We attempt to localize reads to the disk whos arm 907 * is nearest the read request. We ignore seeks due 908 * to writes when making this determination and we 909 * also try to avoid hogging. 910 */ 911 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 912 BIO_STRATEGY(&cbp[0]->cb_buf); 913 BIO_STRATEGY(&cbp[1]->cb_buf); 914 } else { 915 int pick = cs->sc_pick; 916 daddr_t range = cs->sc_size / 16; 917 918 if (bn < cs->sc_blk[pick] - range || 919 bn > cs->sc_blk[pick] + range 920 ) { 921 cs->sc_pick = pick = 1 - pick; 922 } 923 cs->sc_blk[pick] = bn + btodb(rcount); 924 BIO_STRATEGY(&cbp[pick]->cb_buf); 925 } 926 } else { 927 /* 928 * Not mirroring 929 */ 930 BIO_STRATEGY(&cbp[0]->cb_buf); 931 } 932 bn += btodb(rcount); 933 addr += rcount; 934 } 935 } 936 937 /* 938 * Build a component buffer header. 939 */ 940 static void 941 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 942 { 943 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 944 struct ccdbuf *cbp; 945 daddr_t cbn, cboff; 946 off_t cbc; 947 948 #ifdef DEBUG 949 if (ccddebug & CCDB_IO) 950 printf("ccdbuffer(%p, %p, %lld, %p, %ld)\n", 951 (void *)cs, (void *)bp, (long long)bn, (void *)addr, 952 bcount); 953 #endif 954 /* 955 * Determine which component bn falls in. 956 */ 957 cbn = bn; 958 cboff = 0; 959 960 if (cs->sc_ileave == 0) { 961 /* 962 * Serially concatenated and neither a mirror nor a parity 963 * config. This is a special case. 964 */ 965 daddr_t sblk; 966 967 sblk = 0; 968 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 969 sblk += ci->ci_size; 970 cbn -= sblk; 971 } else { 972 struct ccdiinfo *ii; 973 int ccdisk, off; 974 975 /* 976 * Calculate cbn, the logical superblock (sc_ileave chunks), 977 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 978 * to cbn. 979 */ 980 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 981 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 982 983 /* 984 * Figure out which interleave table to use. 985 */ 986 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 987 if (ii->ii_startblk > cbn) 988 break; 989 } 990 ii--; 991 992 /* 993 * off is the logical superblock relative to the beginning 994 * of this interleave block. 995 */ 996 off = cbn - ii->ii_startblk; 997 998 /* 999 * We must calculate which disk component to use (ccdisk), 1000 * and recalculate cbn to be the superblock relative to 1001 * the beginning of the component. This is typically done by 1002 * adding 'off' and ii->ii_startoff together. However, 'off' 1003 * must typically be divided by the number of components in 1004 * this interleave array to be properly convert it from a 1005 * CCD-relative logical superblock number to a 1006 * component-relative superblock number. 1007 */ 1008 if (ii->ii_ndisk == 1) { 1009 /* 1010 * When we have just one disk, it can't be a mirror 1011 * or a parity config. 1012 */ 1013 ccdisk = ii->ii_index[0]; 1014 cbn = ii->ii_startoff + off; 1015 } else { 1016 if (cs->sc_cflags & CCDF_MIRROR) { 1017 /* 1018 * We have forced a uniform mapping, resulting 1019 * in a single interleave array. We double 1020 * up on the first half of the available 1021 * components and our mirror is in the second 1022 * half. This only works with a single 1023 * interleave array because doubling up 1024 * doubles the number of sectors, so there 1025 * cannot be another interleave array because 1026 * the next interleave array's calculations 1027 * would be off. 1028 */ 1029 int ndisk2 = ii->ii_ndisk / 2; 1030 ccdisk = ii->ii_index[off % ndisk2]; 1031 cbn = ii->ii_startoff + off / ndisk2; 1032 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1033 } else { 1034 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1035 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1036 } 1037 } 1038 1039 ci = &cs->sc_cinfo[ccdisk]; 1040 1041 /* 1042 * Convert cbn from a superblock to a normal block so it 1043 * can be used to calculate (along with cboff) the normal 1044 * block index into this particular disk. 1045 */ 1046 cbn *= cs->sc_ileave; 1047 } 1048 1049 /* 1050 * Fill in the component buf structure. 1051 */ 1052 cbp = getccdbuf(NULL); 1053 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1054 cbp->cb_buf.bio_done = ccdiodone; 1055 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1056 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1057 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1058 cbp->cb_buf.bio_data = addr; 1059 if (cs->sc_ileave == 0) 1060 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1061 else 1062 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1063 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1064 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1065 1066 /* 1067 * context for ccdiodone 1068 */ 1069 cbp->cb_obp = bp; 1070 cbp->cb_unit = cs->sc_unit; 1071 cbp->cb_comp = ci - cs->sc_cinfo; 1072 1073 #ifdef DEBUG 1074 if (ccddebug & CCDB_IO) 1075 printf(" dev %p(u%ld): cbp %p bn %jd addr %p bcnt %ld\n", 1076 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1077 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1078 cbp->cb_buf.bio_bcount); 1079 #endif 1080 cb[0] = cbp; 1081 1082 /* 1083 * Note: both I/O's setup when reading from mirror, but only one 1084 * will be executed. 1085 */ 1086 if (cs->sc_cflags & CCDF_MIRROR) { 1087 /* mirror, setup second I/O */ 1088 cbp = getccdbuf(cb[0]); 1089 cbp->cb_buf.bio_dev = ci2->ci_dev; 1090 cbp->cb_comp = ci2 - cs->sc_cinfo; 1091 cb[1] = cbp; 1092 /* link together the ccdbuf's and clear "mirror done" flag */ 1093 cb[0]->cb_mirror = cb[1]; 1094 cb[1]->cb_mirror = cb[0]; 1095 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1097 } 1098 } 1099 1100 static void 1101 ccdintr(struct ccd_s *cs, struct bio *bp) 1102 { 1103 #ifdef DEBUG 1104 if (ccddebug & CCDB_FOLLOW) 1105 printf("ccdintr(%p, %p)\n", cs, bp); 1106 #endif 1107 /* 1108 * Request is done for better or worse, wakeup the top half. 1109 */ 1110 if (bp->bio_flags & BIO_ERROR) 1111 bp->bio_resid = bp->bio_bcount; 1112 biofinish(bp, &cs->device_stats, 0); 1113 } 1114 1115 /* 1116 * Called at interrupt time. 1117 * Mark the component as done and if all components are done, 1118 * take a ccd interrupt. 1119 */ 1120 static void 1121 ccdiodone(struct bio *ibp) 1122 { 1123 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1124 struct bio *bp = cbp->cb_obp; 1125 int unit = cbp->cb_unit; 1126 int count, s; 1127 1128 s = splbio(); 1129 #ifdef DEBUG 1130 if (ccddebug & CCDB_FOLLOW) 1131 printf("ccdiodone(%p)\n", cbp); 1132 if (ccddebug & CCDB_IO) { 1133 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1134 bp, bp->bio_bcount, bp->bio_resid); 1135 printf(" dev %p(u%d), cbp %p bn %jd addr %p bcnt %ld\n", 1136 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1137 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1138 cbp->cb_buf.bio_bcount); 1139 } 1140 #endif 1141 /* 1142 * If an error occured, report it. If this is a mirrored 1143 * configuration and the first of two possible reads, do not 1144 * set the error in the bp yet because the second read may 1145 * succeed. 1146 */ 1147 1148 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1149 const char *msg = ""; 1150 1151 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1152 (cbp->cb_buf.bio_cmd == BIO_READ) && 1153 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1154 /* 1155 * We will try our read on the other disk down 1156 * below, also reverse the default pick so if we 1157 * are doing a scan we do not keep hitting the 1158 * bad disk first. 1159 */ 1160 struct ccd_s *cs = ccdfind(unit); 1161 1162 msg = ", trying other disk"; 1163 cs->sc_pick = 1 - cs->sc_pick; 1164 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1165 } else { 1166 bp->bio_flags |= BIO_ERROR; 1167 bp->bio_error = cbp->cb_buf.bio_error ? 1168 cbp->cb_buf.bio_error : EIO; 1169 } 1170 printf("ccd%d: error %d on component %d block %jd " 1171 "(ccd block %jd)%s\n", unit, bp->bio_error, cbp->cb_comp, 1172 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno, 1173 msg); 1174 } 1175 1176 /* 1177 * Process mirror. If we are writing, I/O has been initiated on both 1178 * buffers and we fall through only after both are finished. 1179 * 1180 * If we are reading only one I/O is initiated at a time. If an 1181 * error occurs we initiate the second I/O and return, otherwise 1182 * we free the second I/O without initiating it. 1183 */ 1184 1185 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1186 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1187 /* 1188 * When writing, handshake with the second buffer 1189 * to determine when both are done. If both are not 1190 * done, return here. 1191 */ 1192 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1193 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1194 putccdbuf(cbp); 1195 splx(s); 1196 return; 1197 } 1198 } else { 1199 /* 1200 * When reading, either dispose of the second buffer 1201 * or initiate I/O on the second buffer if an error 1202 * occured with this one. 1203 */ 1204 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1205 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1206 cbp->cb_mirror->cb_pflags |= 1207 CCDPF_MIRROR_DONE; 1208 BIO_STRATEGY(&cbp->cb_mirror->cb_buf); 1209 putccdbuf(cbp); 1210 splx(s); 1211 return; 1212 } else { 1213 putccdbuf(cbp->cb_mirror); 1214 /* fall through */ 1215 } 1216 } 1217 } 1218 } 1219 1220 /* 1221 * use bio_caller1 to determine how big the original request was rather 1222 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1223 * 1224 * XXX We check for an error, but we do not test the resid for an 1225 * aligned EOF condition. This may result in character & block 1226 * device access not recognizing EOF properly when read or written 1227 * sequentially, but will not effect filesystems. 1228 */ 1229 count = (long)cbp->cb_buf.bio_caller1; 1230 putccdbuf(cbp); 1231 1232 /* 1233 * If all done, "interrupt". 1234 */ 1235 bp->bio_resid -= count; 1236 if (bp->bio_resid < 0) 1237 panic("ccdiodone: count"); 1238 if (bp->bio_resid == 0) 1239 ccdintr(ccdfind(unit), bp); 1240 splx(s); 1241 } 1242 1243 static int 1244 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1245 { 1246 int unit = ccdunit(dev); 1247 int i, j, lookedup = 0, error = 0; 1248 int part, pmask, s; 1249 struct ccd_s *cs; 1250 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1251 char **cpp; 1252 struct vnode **vpp; 1253 1254 if (!IS_ALLOCATED(unit)) 1255 return (ENXIO); 1256 cs = ccdfind(unit); 1257 1258 switch (cmd) { 1259 case CCDIOCSET: 1260 if (IS_INITED(cs)) 1261 return (EBUSY); 1262 1263 if ((flag & FWRITE) == 0) 1264 return (EBADF); 1265 1266 if ((error = ccdlock(cs)) != 0) 1267 return (error); 1268 1269 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1270 return (EINVAL); 1271 1272 /* Fill in some important bits. */ 1273 cs->sc_ileave = ccio->ccio_ileave; 1274 if (cs->sc_ileave == 0 && (ccio->ccio_flags & CCDF_MIRROR)) { 1275 printf("ccd%d: disabling mirror, interleave is 0\n", 1276 unit); 1277 ccio->ccio_flags &= ~(CCDF_MIRROR); 1278 } 1279 if ((ccio->ccio_flags & CCDF_MIRROR) && 1280 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1281 printf("ccd%d: mirror/parity forces uniform flag\n", 1282 unit); 1283 ccio->ccio_flags |= CCDF_UNIFORM; 1284 } 1285 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1286 1287 /* 1288 * Allocate space for and copy in the array of 1289 * componet pathnames and device numbers. 1290 */ 1291 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1292 M_DEVBUF, M_WAITOK); 1293 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1294 M_DEVBUF, M_WAITOK); 1295 1296 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1297 ccio->ccio_ndisks * sizeof(char **)); 1298 if (error) { 1299 free(vpp, M_DEVBUF); 1300 free(cpp, M_DEVBUF); 1301 ccdunlock(cs); 1302 return (error); 1303 } 1304 1305 #ifdef DEBUG 1306 if (ccddebug & CCDB_INIT) 1307 for (i = 0; i < ccio->ccio_ndisks; ++i) 1308 printf("ccdioctl: component %d: %p\n", 1309 i, cpp[i]); 1310 #endif 1311 1312 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1313 #ifdef DEBUG 1314 if (ccddebug & CCDB_INIT) 1315 printf("ccdioctl: lookedup = %d\n", lookedup); 1316 #endif 1317 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1318 for (j = 0; j < lookedup; ++j) 1319 (void)vn_close(vpp[j], FREAD|FWRITE, 1320 td->td_ucred, td); 1321 free(vpp, M_DEVBUF); 1322 free(cpp, M_DEVBUF); 1323 ccdunlock(cs); 1324 return (error); 1325 } 1326 ++lookedup; 1327 } 1328 cs->sc_vpp = vpp; 1329 cs->sc_nccdisks = ccio->ccio_ndisks; 1330 1331 /* 1332 * Initialize the ccd. Fills in the softc for us. 1333 */ 1334 if ((error = ccdinit(cs, cpp, td)) != 0) { 1335 for (j = 0; j < lookedup; ++j) 1336 (void)vn_close(vpp[j], FREAD|FWRITE, 1337 td->td_ucred, td); 1338 /* 1339 * We can't ccddestroy() cs just yet, because nothing 1340 * prevents user-level app to do another ioctl() 1341 * without closing the device first, therefore 1342 * declare unit null and void and let ccdclose() 1343 * destroy it when it is safe to do so. 1344 */ 1345 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1346 free(vpp, M_DEVBUF); 1347 free(cpp, M_DEVBUF); 1348 ccdunlock(cs); 1349 return (error); 1350 } 1351 1352 /* 1353 * The ccd has been successfully initialized, so 1354 * we can place it into the array and read the disklabel. 1355 */ 1356 ccio->ccio_unit = unit; 1357 ccio->ccio_size = cs->sc_size; 1358 ccdgetdisklabel(dev); 1359 1360 ccdunlock(cs); 1361 1362 break; 1363 1364 case CCDIOCCLR: 1365 if (!IS_INITED(cs)) 1366 return (ENXIO); 1367 1368 if ((flag & FWRITE) == 0) 1369 return (EBADF); 1370 1371 if ((error = ccdlock(cs)) != 0) 1372 return (error); 1373 1374 /* Don't unconfigure if any other partitions are open */ 1375 part = ccdpart(dev); 1376 pmask = (1 << part); 1377 if ((cs->sc_openmask & ~pmask)) { 1378 ccdunlock(cs); 1379 return (EBUSY); 1380 } 1381 1382 /* Declare unit null and void (reset all flags) */ 1383 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1384 1385 /* Close the components and free their pathnames. */ 1386 for (i = 0; i < cs->sc_nccdisks; ++i) { 1387 /* 1388 * XXX: this close could potentially fail and 1389 * cause Bad Things. Maybe we need to force 1390 * the close to happen? 1391 */ 1392 #ifdef DEBUG 1393 if (ccddebug & CCDB_VNODE) 1394 vprint("CCDIOCCLR: vnode info", 1395 cs->sc_cinfo[i].ci_vp); 1396 #endif 1397 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1398 td->td_ucred, td); 1399 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1400 } 1401 1402 /* Free interleave index. */ 1403 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1404 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1405 1406 /* Free component info and interleave table. */ 1407 free(cs->sc_cinfo, M_DEVBUF); 1408 free(cs->sc_itable, M_DEVBUF); 1409 free(cs->sc_vpp, M_DEVBUF); 1410 1411 /* And remove the devstat entry. */ 1412 devstat_remove_entry(&cs->device_stats); 1413 1414 /* This must be atomic. */ 1415 s = splhigh(); 1416 ccdunlock(cs); 1417 splx(s); 1418 1419 break; 1420 1421 case CCDCONFINFO: 1422 { 1423 int ninit = 0; 1424 struct ccdconf *conf = (struct ccdconf *)data; 1425 struct ccd_s *tmpcs; 1426 struct ccd_s *ubuf = conf->buffer; 1427 1428 /* XXX: LOCK(unique unit numbers) */ 1429 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1430 if (IS_INITED(tmpcs)) 1431 ninit++; 1432 1433 if (conf->size == 0) { 1434 conf->size = sizeof(struct ccd_s) * ninit; 1435 break; 1436 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1437 (conf->size % sizeof(struct ccd_s) != 0)) { 1438 /* XXX: UNLOCK(unique unit numbers) */ 1439 return (EINVAL); 1440 } 1441 1442 ubuf += ninit; 1443 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1444 if (!IS_INITED(tmpcs)) 1445 continue; 1446 error = copyout(tmpcs, --ubuf, 1447 sizeof(struct ccd_s)); 1448 if (error != 0) 1449 /* XXX: UNLOCK(unique unit numbers) */ 1450 return (error); 1451 } 1452 /* XXX: UNLOCK(unique unit numbers) */ 1453 } 1454 break; 1455 1456 case CCDCPPINFO: 1457 if (!IS_INITED(cs)) 1458 return (ENXIO); 1459 1460 { 1461 int len = 0; 1462 struct ccdcpps *cpps = (struct ccdcpps *)data; 1463 char *ubuf = cpps->buffer; 1464 1465 1466 for (i = 0; i < cs->sc_nccdisks; ++i) 1467 len += cs->sc_cinfo[i].ci_pathlen; 1468 1469 if (cpps->size == 0) { 1470 cpps->size = len; 1471 break; 1472 } else if (cpps->size != len) { 1473 return (EINVAL); 1474 } 1475 1476 for (i = 0; i < cs->sc_nccdisks; ++i) { 1477 len = cs->sc_cinfo[i].ci_pathlen; 1478 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1479 len); 1480 if (error != 0) 1481 return (error); 1482 ubuf += len; 1483 } 1484 } 1485 break; 1486 1487 case DIOCGDINFO: 1488 if (!IS_INITED(cs)) 1489 return (ENXIO); 1490 1491 *(struct disklabel *)data = cs->sc_label; 1492 break; 1493 1494 case DIOCWDINFO: 1495 case DIOCSDINFO: 1496 if (!IS_INITED(cs)) 1497 return (ENXIO); 1498 1499 if ((flag & FWRITE) == 0) 1500 return (EBADF); 1501 1502 if ((error = ccdlock(cs)) != 0) 1503 return (error); 1504 1505 cs->sc_flags |= CCDF_LABELLING; 1506 1507 error = setdisklabel(&cs->sc_label, 1508 (struct disklabel *)data, 0); 1509 if (error == 0) { 1510 if (cmd == DIOCWDINFO) 1511 error = writedisklabel(CCDLABELDEV(dev), 1512 &cs->sc_label); 1513 } 1514 1515 cs->sc_flags &= ~CCDF_LABELLING; 1516 1517 ccdunlock(cs); 1518 1519 if (error) 1520 return (error); 1521 break; 1522 1523 case DIOCWLABEL: 1524 if (!IS_INITED(cs)) 1525 return (ENXIO); 1526 1527 if ((flag & FWRITE) == 0) 1528 return (EBADF); 1529 if (*(int *)data != 0) 1530 cs->sc_flags |= CCDF_WLABEL; 1531 else 1532 cs->sc_flags &= ~CCDF_WLABEL; 1533 break; 1534 1535 default: 1536 return (ENOTTY); 1537 } 1538 1539 return (0); 1540 } 1541 1542 static int 1543 ccdsize(dev_t dev) 1544 { 1545 struct ccd_s *cs; 1546 int part, size; 1547 1548 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1549 return (-1); 1550 1551 cs = ccdfind(ccdunit(dev)); 1552 part = ccdpart(dev); 1553 1554 if (!IS_INITED(cs)) 1555 return (-1); 1556 1557 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1558 size = -1; 1559 else 1560 size = cs->sc_label.d_partitions[part].p_size; 1561 1562 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1563 return (-1); 1564 1565 return (size); 1566 } 1567 1568 /* 1569 * Lookup the provided name in the filesystem. If the file exists, 1570 * is a valid block device, and isn't being used by anyone else, 1571 * set *vpp to the file's vnode. 1572 */ 1573 static int 1574 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1575 { 1576 struct nameidata nd; 1577 struct vnode *vp; 1578 int error, flags; 1579 1580 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1581 flags = FREAD | FWRITE; 1582 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1583 #ifdef DEBUG 1584 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1585 printf("ccdlookup: vn_open error = %d\n", error); 1586 #endif 1587 return (error); 1588 } 1589 vp = nd.ni_vp; 1590 1591 if (vrefcnt(vp) > 1) { 1592 error = EBUSY; 1593 goto bad; 1594 } 1595 1596 if (!vn_isdisk(vp, &error)) 1597 goto bad; 1598 1599 #ifdef DEBUG 1600 if (ccddebug & CCDB_VNODE) 1601 vprint("ccdlookup: vnode info", vp); 1602 #endif 1603 1604 VOP_UNLOCK(vp, 0, td); 1605 NDFREE(&nd, NDF_ONLY_PNBUF); 1606 *vpp = vp; 1607 return (0); 1608 bad: 1609 VOP_UNLOCK(vp, 0, td); 1610 NDFREE(&nd, NDF_ONLY_PNBUF); 1611 /* vn_close does vrele() for vp */ 1612 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1613 return (error); 1614 } 1615 1616 /* 1617 * Read the disklabel from the ccd. If one is not present, fake one 1618 * up. 1619 */ 1620 static void 1621 ccdgetdisklabel(dev_t dev) 1622 { 1623 int unit = ccdunit(dev); 1624 struct ccd_s *cs = ccdfind(unit); 1625 char *errstring; 1626 struct disklabel *lp = &cs->sc_label; 1627 struct ccdgeom *ccg = &cs->sc_geom; 1628 1629 bzero(lp, sizeof(*lp)); 1630 1631 lp->d_secperunit = cs->sc_size; 1632 lp->d_secsize = ccg->ccg_secsize; 1633 lp->d_nsectors = ccg->ccg_nsectors; 1634 lp->d_ntracks = ccg->ccg_ntracks; 1635 lp->d_ncylinders = ccg->ccg_ncylinders; 1636 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1637 1638 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1639 lp->d_type = DTYPE_CCD; 1640 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1641 lp->d_rpm = 3600; 1642 lp->d_interleave = 1; 1643 lp->d_flags = 0; 1644 1645 lp->d_partitions[RAW_PART].p_offset = 0; 1646 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1647 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1648 lp->d_npartitions = RAW_PART + 1; 1649 1650 lp->d_bbsize = BBSIZE; /* XXX */ 1651 lp->d_sbsize = 0; 1652 1653 lp->d_magic = DISKMAGIC; 1654 lp->d_magic2 = DISKMAGIC; 1655 lp->d_checksum = dkcksum(&cs->sc_label); 1656 1657 /* 1658 * Call the generic disklabel extraction routine. 1659 */ 1660 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1661 if (errstring != NULL) 1662 ccdmakedisklabel(cs); 1663 1664 #ifdef DEBUG 1665 /* It's actually extremely common to have unlabeled ccds. */ 1666 if (ccddebug & CCDB_LABEL) 1667 if (errstring != NULL) 1668 printf("ccd%d: %s\n", unit, errstring); 1669 #endif 1670 } 1671 1672 /* 1673 * Take care of things one might want to take care of in the event 1674 * that a disklabel isn't present. 1675 */ 1676 static void 1677 ccdmakedisklabel(struct ccd_s *cs) 1678 { 1679 struct disklabel *lp = &cs->sc_label; 1680 1681 /* 1682 * For historical reasons, if there's no disklabel present 1683 * the raw partition must be marked FS_BSDFFS. 1684 */ 1685 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1686 1687 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1688 } 1689 1690 /* 1691 * Wait interruptibly for an exclusive lock. 1692 * 1693 * XXX 1694 * Several drivers do this; it should be abstracted and made MP-safe. 1695 */ 1696 static int 1697 ccdlock(struct ccd_s *cs) 1698 { 1699 int error; 1700 1701 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1702 cs->sc_flags |= CCDF_WANTED; 1703 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1704 return (error); 1705 } 1706 cs->sc_flags |= CCDF_LOCKED; 1707 return (0); 1708 } 1709 1710 /* 1711 * Unlock and wake up any waiters. 1712 */ 1713 static void 1714 ccdunlock(struct ccd_s *cs) 1715 { 1716 1717 cs->sc_flags &= ~CCDF_LOCKED; 1718 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1719 cs->sc_flags &= ~CCDF_WANTED; 1720 wakeup(cs); 1721 } 1722 } 1723 1724 #ifdef DEBUG 1725 static void 1726 printiinfo(struct ccdiinfo *ii) 1727 { 1728 int ix, i; 1729 1730 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1731 printf(" itab[%d]: #dk %d sblk %lld soff %lld", 1732 ix, ii->ii_ndisk, (long long)ii->ii_startblk, 1733 (long long)ii->ii_startoff); 1734 for (i = 0; i < ii->ii_ndisk; i++) 1735 printf(" %d", ii->ii_index[i]); 1736 printf("\n"); 1737 } 1738 } 1739 #endif 1740