1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/stdint.h> 101 #include <sys/sysctl.h> 102 #include <sys/disk.h> 103 #include <sys/disklabel.h> 104 #include <sys/devicestat.h> 105 #include <sys/fcntl.h> 106 #include <sys/vnode.h> 107 108 #include <sys/ccdvar.h> 109 110 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 111 112 #if defined(CCDDEBUG) && !defined(DEBUG) 113 #define DEBUG 114 #endif 115 116 #ifdef DEBUG 117 #define CCDB_FOLLOW 0x01 118 #define CCDB_INIT 0x02 119 #define CCDB_IO 0x04 120 #define CCDB_LABEL 0x08 121 #define CCDB_VNODE 0x10 122 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 123 CCDB_VNODE; 124 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 125 #endif 126 127 #define ccdunit(x) dkunit(x) 128 #define ccdpart(x) dkpart(x) 129 130 /* 131 This is how mirroring works (only writes are special): 132 133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 134 linked together by the cb_mirror field. "cb_pflags & 135 CCDPF_MIRROR_DONE" is set to 0 on both of them. 136 137 When a component returns to ccdiodone(), it checks if "cb_pflags & 138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 139 flag and returns. If it is, it means its partner has already 140 returned, so it will go to the regular cleanup. 141 142 */ 143 144 struct ccdbuf { 145 struct bio cb_buf; /* new I/O buf */ 146 struct bio *cb_obp; /* ptr. to original I/O buf */ 147 struct ccdbuf *cb_freenext; /* free list link */ 148 int cb_unit; /* target unit */ 149 int cb_comp; /* target component */ 150 int cb_pflags; /* mirror/parity status flag */ 151 struct ccdbuf *cb_mirror; /* mirror counterpart */ 152 }; 153 154 /* bits in cb_pflags */ 155 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 156 157 #define CCDLABELDEV(dev) \ 158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 159 160 /* convinient macros for often-used statements */ 161 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 162 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 163 164 static d_open_t ccdopen; 165 static d_close_t ccdclose; 166 static d_strategy_t ccdstrategy; 167 static d_ioctl_t ccdioctl; 168 static d_psize_t ccdsize; 169 170 #define NCCDFREEHIWAT 16 171 172 #define CDEV_MAJOR 74 173 174 static struct cdevsw ccd_cdevsw = { 175 /* open */ ccdopen, 176 /* close */ ccdclose, 177 /* read */ physread, 178 /* write */ physwrite, 179 /* ioctl */ ccdioctl, 180 /* poll */ nopoll, 181 /* mmap */ nommap, 182 /* strategy */ ccdstrategy, 183 /* name */ "ccd", 184 /* maj */ CDEV_MAJOR, 185 /* dump */ nodump, 186 /* psize */ ccdsize, 187 /* flags */ D_DISK, 188 }; 189 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 190 191 static struct ccd_s *ccdfind(int); 192 static struct ccd_s *ccdnew(int); 193 static int ccddestroy(struct ccd_s *, struct proc *); 194 195 /* called during module initialization */ 196 static void ccdattach(void); 197 static int ccd_modevent(module_t, int, void *); 198 199 /* called by biodone() at interrupt time */ 200 static void ccdiodone(struct bio *bp); 201 202 static void ccdstart(struct ccd_s *, struct bio *); 203 static void ccdinterleave(struct ccd_s *, int); 204 static void ccdintr(struct ccd_s *, struct bio *); 205 static int ccdinit(struct ccd_s *, char **, struct thread *); 206 static int ccdlookup(char *, struct thread *p, struct vnode **); 207 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 208 struct bio *, daddr_t, caddr_t, long); 209 static void ccdgetdisklabel(dev_t); 210 static void ccdmakedisklabel(struct ccd_s *); 211 static int ccdlock(struct ccd_s *); 212 static void ccdunlock(struct ccd_s *); 213 214 #ifdef DEBUG 215 static void printiinfo(struct ccdiinfo *); 216 #endif 217 218 /* Non-private for the benefit of libkvm. */ 219 struct ccdbuf *ccdfreebufs; 220 static int numccdfreebufs; 221 222 /* 223 * getccdbuf() - Allocate and zero a ccd buffer. 224 * 225 * This routine is called at splbio(). 226 */ 227 228 static __inline 229 struct ccdbuf * 230 getccdbuf(struct ccdbuf *cpy) 231 { 232 struct ccdbuf *cbp; 233 234 /* 235 * Allocate from freelist or malloc as necessary 236 */ 237 if ((cbp = ccdfreebufs) != NULL) { 238 ccdfreebufs = cbp->cb_freenext; 239 --numccdfreebufs; 240 } else { 241 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 242 } 243 244 /* 245 * Used by mirroring code 246 */ 247 if (cpy) 248 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 249 else 250 bzero(cbp, sizeof(struct ccdbuf)); 251 252 /* 253 * independant struct bio initialization 254 */ 255 256 return(cbp); 257 } 258 259 /* 260 * putccdbuf() - Free a ccd buffer. 261 * 262 * This routine is called at splbio(). 263 */ 264 265 static __inline 266 void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 free((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 static struct ccd_s * 290 ccdfind(int unit) 291 { 292 struct ccd_s *sc = NULL; 293 294 /* XXX: LOCK(unique unit numbers) */ 295 LIST_FOREACH(sc, &ccd_softc_list, list) { 296 if (sc->sc_unit == unit) 297 break; 298 } 299 /* XXX: UNLOCK(unique unit numbers) */ 300 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 301 } 302 303 static struct ccd_s * 304 ccdnew(int unit) 305 { 306 struct ccd_s *sc; 307 308 /* XXX: LOCK(unique unit numbers) */ 309 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 310 return (NULL); 311 312 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 313 sc->sc_unit = unit; 314 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 315 /* XXX: UNLOCK(unique unit numbers) */ 316 return (sc); 317 } 318 319 static int 320 ccddestroy(struct ccd_s *sc, struct proc *p) 321 { 322 323 /* XXX: LOCK(unique unit numbers) */ 324 LIST_REMOVE(sc, list); 325 /* XXX: UNLOCK(unique unit numbers) */ 326 FREE(sc, M_CCD); 327 return (0); 328 } 329 330 static void 331 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 332 { 333 int i, u; 334 char *s; 335 336 if (*dev != NODEV) 337 return; 338 i = dev_stdclone(name, &s, "ccd", &u); 339 if (i != 2) 340 return; 341 if (*s < 'a' || *s > 'h') 342 return; 343 if (s[1] != '\0') 344 return; 345 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 346 UID_ROOT, GID_OPERATOR, 0640, name); 347 } 348 349 /* 350 * Called by main() during pseudo-device attachment. All we need 351 * to do is to add devsw entries. 352 */ 353 static void 354 ccdattach() 355 { 356 357 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 358 } 359 360 static int 361 ccd_modevent(module_t mod, int type, void *data) 362 { 363 int error = 0; 364 365 switch (type) { 366 case MOD_LOAD: 367 ccdattach(); 368 break; 369 370 case MOD_UNLOAD: 371 printf("ccd0: Unload not supported!\n"); 372 error = EOPNOTSUPP; 373 break; 374 375 case MOD_SHUTDOWN: 376 break; 377 378 default: 379 error = EOPNOTSUPP; 380 } 381 return (error); 382 } 383 384 DEV_MODULE(ccd, ccd_modevent, NULL); 385 386 static int 387 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 388 { 389 struct ccdcinfo *ci = NULL; /* XXX */ 390 size_t size; 391 int ix; 392 struct vnode *vp; 393 size_t minsize; 394 int maxsecsize; 395 struct ccdgeom *ccg = &cs->sc_geom; 396 char *tmppath = NULL; 397 int error = 0; 398 off_t mediasize; 399 u_int sectorsize; 400 401 #ifdef DEBUG 402 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 403 printf("ccdinit: unit %d\n", cs->sc_unit); 404 #endif 405 406 cs->sc_size = 0; 407 408 /* Allocate space for the component info. */ 409 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 410 M_DEVBUF, M_WAITOK); 411 412 /* 413 * Verify that each component piece exists and record 414 * relevant information about it. 415 */ 416 maxsecsize = 0; 417 minsize = 0; 418 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); 419 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 420 vp = cs->sc_vpp[ix]; 421 ci = &cs->sc_cinfo[ix]; 422 ci->ci_vp = vp; 423 424 /* 425 * Copy in the pathname of the component. 426 */ 427 if ((error = copyinstr(cpaths[ix], tmppath, 428 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 printf("ccd%d: can't copy path, error = %d\n", 432 cs->sc_unit, error); 433 #endif 434 goto fail; 435 } 436 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 438 439 ci->ci_dev = vn_todev(vp); 440 441 /* 442 * Get partition information for the component. 443 */ 444 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 445 FREAD, td->td_ucred, td); 446 if (error != 0) { 447 #ifdef DEBUG 448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 449 printf("ccd%d: %s: ioctl failed, error = %d\n", 450 cs->sc_unit, ci->ci_path, error); 451 #endif 452 goto fail; 453 } 454 /* 455 * Get partition information for the component. 456 */ 457 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 458 FREAD, td->td_ucred, td); 459 if (error != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 printf("ccd%d: %s: ioctl failed, error = %d\n", 463 cs->sc_unit, ci->ci_path, error); 464 #endif 465 goto fail; 466 } 467 if (sectorsize > maxsecsize) 468 maxsecsize = sectorsize; 469 size = mediasize / DEV_BSIZE - CCD_OFFSET; 470 471 /* 472 * Calculate the size, truncating to an interleave 473 * boundary if necessary. 474 */ 475 476 if (cs->sc_ileave > 1) 477 size -= size % cs->sc_ileave; 478 479 if (size == 0) { 480 #ifdef DEBUG 481 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 482 printf("ccd%d: %s: size == 0\n", 483 cs->sc_unit, ci->ci_path); 484 #endif 485 error = ENODEV; 486 goto fail; 487 } 488 489 if (minsize == 0 || size < minsize) 490 minsize = size; 491 ci->ci_size = size; 492 cs->sc_size += size; 493 } 494 495 free(tmppath, M_DEVBUF); 496 tmppath = NULL; 497 498 /* 499 * Don't allow the interleave to be smaller than 500 * the biggest component sector. 501 */ 502 if ((cs->sc_ileave > 0) && 503 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 504 #ifdef DEBUG 505 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 506 printf("ccd%d: interleave must be at least %d\n", 507 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 508 #endif 509 error = EINVAL; 510 goto fail; 511 } 512 513 /* 514 * If uniform interleave is desired set all sizes to that of 515 * the smallest component. This will guarentee that a single 516 * interleave table is generated. 517 * 518 * Lost space must be taken into account when calculating the 519 * overall size. Half the space is lost when CCDF_MIRROR is 520 * specified. One disk is lost when CCDF_PARITY is specified. 521 */ 522 if (cs->sc_flags & CCDF_UNIFORM) { 523 for (ci = cs->sc_cinfo; 524 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 525 ci->ci_size = minsize; 526 } 527 if (cs->sc_flags & CCDF_MIRROR) { 528 /* 529 * Check to see if an even number of components 530 * have been specified. The interleave must also 531 * be non-zero in order for us to be able to 532 * guarentee the topology. 533 */ 534 if (cs->sc_nccdisks % 2) { 535 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 536 error = EINVAL; 537 goto fail; 538 } 539 if (cs->sc_ileave == 0) { 540 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 541 error = EINVAL; 542 goto fail; 543 } 544 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 545 } else if (cs->sc_flags & CCDF_PARITY) { 546 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 547 } else { 548 if (cs->sc_ileave == 0) { 549 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 550 error = EINVAL; 551 goto fail; 552 } 553 cs->sc_size = cs->sc_nccdisks * minsize; 554 } 555 } 556 557 /* 558 * Construct the interleave table. 559 */ 560 ccdinterleave(cs, cs->sc_unit); 561 562 /* 563 * Create pseudo-geometry based on 1MB cylinders. It's 564 * pretty close. 565 */ 566 ccg->ccg_secsize = maxsecsize; 567 ccg->ccg_ntracks = 1; 568 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 569 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 570 571 /* 572 * Add an devstat entry for this device. 573 */ 574 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 575 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 576 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 577 DEVSTAT_PRIORITY_ARRAY); 578 579 cs->sc_flags |= CCDF_INITED; 580 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 581 return (0); 582 fail: 583 while (ci > cs->sc_cinfo) { 584 ci--; 585 free(ci->ci_path, M_DEVBUF); 586 } 587 if (tmppath != NULL) 588 free(tmppath, M_DEVBUF); 589 free(cs->sc_cinfo, M_DEVBUF); 590 return (error); 591 } 592 593 static void 594 ccdinterleave(struct ccd_s *cs, int unit) 595 { 596 struct ccdcinfo *ci, *smallci; 597 struct ccdiinfo *ii; 598 daddr_t bn, lbn; 599 int ix; 600 u_long size; 601 602 #ifdef DEBUG 603 if (ccddebug & CCDB_INIT) 604 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 605 #endif 606 607 /* 608 * Allocate an interleave table. The worst case occurs when each 609 * of N disks is of a different size, resulting in N interleave 610 * tables. 611 * 612 * Chances are this is too big, but we don't care. 613 */ 614 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 615 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 616 M_WAITOK | M_ZERO); 617 618 /* 619 * Trivial case: no interleave (actually interleave of disk size). 620 * Each table entry represents a single component in its entirety. 621 * 622 * An interleave of 0 may not be used with a mirror or parity setup. 623 */ 624 if (cs->sc_ileave == 0) { 625 bn = 0; 626 ii = cs->sc_itable; 627 628 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 629 /* Allocate space for ii_index. */ 630 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 631 ii->ii_ndisk = 1; 632 ii->ii_startblk = bn; 633 ii->ii_startoff = 0; 634 ii->ii_index[0] = ix; 635 bn += cs->sc_cinfo[ix].ci_size; 636 ii++; 637 } 638 ii->ii_ndisk = 0; 639 #ifdef DEBUG 640 if (ccddebug & CCDB_INIT) 641 printiinfo(cs->sc_itable); 642 #endif 643 return; 644 } 645 646 /* 647 * The following isn't fast or pretty; it doesn't have to be. 648 */ 649 size = 0; 650 bn = lbn = 0; 651 for (ii = cs->sc_itable; ; ii++) { 652 /* 653 * Allocate space for ii_index. We might allocate more then 654 * we use. 655 */ 656 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 657 M_DEVBUF, M_WAITOK); 658 659 /* 660 * Locate the smallest of the remaining components 661 */ 662 smallci = NULL; 663 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 664 ci++) { 665 if (ci->ci_size > size && 666 (smallci == NULL || 667 ci->ci_size < smallci->ci_size)) { 668 smallci = ci; 669 } 670 } 671 672 /* 673 * Nobody left, all done 674 */ 675 if (smallci == NULL) { 676 ii->ii_ndisk = 0; 677 break; 678 } 679 680 /* 681 * Record starting logical block using an sc_ileave blocksize. 682 */ 683 ii->ii_startblk = bn / cs->sc_ileave; 684 685 /* 686 * Record starting comopnent block using an sc_ileave 687 * blocksize. This value is relative to the beginning of 688 * a component disk. 689 */ 690 ii->ii_startoff = lbn; 691 692 /* 693 * Determine how many disks take part in this interleave 694 * and record their indices. 695 */ 696 ix = 0; 697 for (ci = cs->sc_cinfo; 698 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 699 if (ci->ci_size >= smallci->ci_size) { 700 ii->ii_index[ix++] = ci - cs->sc_cinfo; 701 } 702 } 703 ii->ii_ndisk = ix; 704 bn += ix * (smallci->ci_size - size); 705 lbn = smallci->ci_size / cs->sc_ileave; 706 size = smallci->ci_size; 707 } 708 #ifdef DEBUG 709 if (ccddebug & CCDB_INIT) 710 printiinfo(cs->sc_itable); 711 #endif 712 } 713 714 /* ARGSUSED */ 715 static int 716 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 717 { 718 int unit = ccdunit(dev); 719 struct ccd_s *cs; 720 struct disklabel *lp; 721 int error = 0, part, pmask; 722 723 #ifdef DEBUG 724 if (ccddebug & CCDB_FOLLOW) 725 printf("ccdopen(%p, %x)\n", dev, flags); 726 #endif 727 728 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 729 730 if ((error = ccdlock(cs)) != 0) 731 return (error); 732 733 lp = &cs->sc_label; 734 735 part = ccdpart(dev); 736 pmask = (1 << part); 737 738 /* 739 * If we're initialized, check to see if there are any other 740 * open partitions. If not, then it's safe to update 741 * the in-core disklabel. 742 */ 743 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 744 ccdgetdisklabel(dev); 745 746 /* Check that the partition exists. */ 747 if (part != RAW_PART && ((part >= lp->d_npartitions) || 748 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 749 error = ENXIO; 750 goto done; 751 } 752 753 cs->sc_openmask |= pmask; 754 done: 755 ccdunlock(cs); 756 return (0); 757 } 758 759 /* ARGSUSED */ 760 static int 761 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 762 { 763 int unit = ccdunit(dev); 764 struct ccd_s *cs; 765 int error = 0, part; 766 767 #ifdef DEBUG 768 if (ccddebug & CCDB_FOLLOW) 769 printf("ccdclose(%p, %x)\n", dev, flags); 770 #endif 771 772 if (!IS_ALLOCATED(unit)) 773 return (ENXIO); 774 cs = ccdfind(unit); 775 776 if ((error = ccdlock(cs)) != 0) 777 return (error); 778 779 part = ccdpart(dev); 780 781 /* ...that much closer to allowing unconfiguration... */ 782 cs->sc_openmask &= ~(1 << part); 783 /* collect "garbage" if possible */ 784 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 785 ccddestroy(cs, td->td_proc); 786 else 787 ccdunlock(cs); 788 return (0); 789 } 790 791 static void 792 ccdstrategy(struct bio *bp) 793 { 794 int unit = ccdunit(bp->bio_dev); 795 struct ccd_s *cs = ccdfind(unit); 796 int s; 797 int wlabel; 798 struct disklabel *lp; 799 800 #ifdef DEBUG 801 if (ccddebug & CCDB_FOLLOW) 802 printf("ccdstrategy(%p): unit %d\n", bp, unit); 803 #endif 804 if (!IS_INITED(cs)) { 805 biofinish(bp, NULL, ENXIO); 806 return; 807 } 808 809 /* If it's a nil transfer, wake up the top half now. */ 810 if (bp->bio_bcount == 0) { 811 biodone(bp); 812 return; 813 } 814 815 lp = &cs->sc_label; 816 817 /* 818 * Do bounds checking and adjust transfer. If there's an 819 * error, the bounds check will flag that for us. 820 */ 821 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 822 if (ccdpart(bp->bio_dev) != RAW_PART) { 823 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 824 biodone(bp); 825 return; 826 } 827 } else { 828 int pbn; /* in sc_secsize chunks */ 829 long sz; /* in sc_secsize chunks */ 830 831 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 832 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 833 834 /* 835 * If out of bounds return an error. If at the EOF point, 836 * simply read or write less. 837 */ 838 839 if (pbn < 0 || pbn >= cs->sc_size) { 840 bp->bio_resid = bp->bio_bcount; 841 if (pbn != cs->sc_size) 842 biofinish(bp, NULL, EINVAL); 843 else 844 biodone(bp); 845 return; 846 } 847 848 /* 849 * If the request crosses EOF, truncate the request. 850 */ 851 if (pbn + sz > cs->sc_size) { 852 bp->bio_bcount = (cs->sc_size - pbn) * 853 cs->sc_geom.ccg_secsize; 854 } 855 } 856 857 bp->bio_resid = bp->bio_bcount; 858 859 /* 860 * "Start" the unit. 861 */ 862 s = splbio(); 863 ccdstart(cs, bp); 864 splx(s); 865 return; 866 } 867 868 static void 869 ccdstart(struct ccd_s *cs, struct bio *bp) 870 { 871 long bcount, rcount; 872 struct ccdbuf *cbp[4]; 873 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 874 caddr_t addr; 875 daddr_t bn; 876 struct partition *pp; 877 878 #ifdef DEBUG 879 if (ccddebug & CCDB_FOLLOW) 880 printf("ccdstart(%p, %p)\n", cs, bp); 881 #endif 882 883 /* Record the transaction start */ 884 devstat_start_transaction(&cs->device_stats); 885 886 /* 887 * Translate the partition-relative block number to an absolute. 888 */ 889 bn = bp->bio_blkno; 890 if (ccdpart(bp->bio_dev) != RAW_PART) { 891 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 892 bn += pp->p_offset; 893 } 894 895 /* 896 * Allocate component buffers and fire off the requests 897 */ 898 addr = bp->bio_data; 899 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 900 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 901 rcount = cbp[0]->cb_buf.bio_bcount; 902 903 if (cs->sc_cflags & CCDF_MIRROR) { 904 /* 905 * Mirroring. Writes go to both disks, reads are 906 * taken from whichever disk seems most appropriate. 907 * 908 * We attempt to localize reads to the disk whos arm 909 * is nearest the read request. We ignore seeks due 910 * to writes when making this determination and we 911 * also try to avoid hogging. 912 */ 913 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 914 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 915 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 916 } else { 917 int pick = cs->sc_pick; 918 daddr_t range = cs->sc_size / 16; 919 920 if (bn < cs->sc_blk[pick] - range || 921 bn > cs->sc_blk[pick] + range 922 ) { 923 cs->sc_pick = pick = 1 - pick; 924 } 925 cs->sc_blk[pick] = bn + btodb(rcount); 926 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 927 } 928 } else { 929 /* 930 * Not mirroring 931 */ 932 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 933 } 934 bn += btodb(rcount); 935 addr += rcount; 936 } 937 } 938 939 /* 940 * Build a component buffer header. 941 */ 942 static void 943 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 944 { 945 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 946 struct ccdbuf *cbp; 947 daddr_t cbn, cboff; 948 off_t cbc; 949 950 #ifdef DEBUG 951 if (ccddebug & CCDB_IO) 952 printf("ccdbuffer(%p, %p, %lld, %p, %ld)\n", 953 (void *)cs, (void *)bp, (long long)bn, (void *)addr, 954 bcount); 955 #endif 956 /* 957 * Determine which component bn falls in. 958 */ 959 cbn = bn; 960 cboff = 0; 961 962 if (cs->sc_ileave == 0) { 963 /* 964 * Serially concatenated and neither a mirror nor a parity 965 * config. This is a special case. 966 */ 967 daddr_t sblk; 968 969 sblk = 0; 970 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 971 sblk += ci->ci_size; 972 cbn -= sblk; 973 } else { 974 struct ccdiinfo *ii; 975 int ccdisk, off; 976 977 /* 978 * Calculate cbn, the logical superblock (sc_ileave chunks), 979 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 980 * to cbn. 981 */ 982 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 983 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 984 985 /* 986 * Figure out which interleave table to use. 987 */ 988 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 989 if (ii->ii_startblk > cbn) 990 break; 991 } 992 ii--; 993 994 /* 995 * off is the logical superblock relative to the beginning 996 * of this interleave block. 997 */ 998 off = cbn - ii->ii_startblk; 999 1000 /* 1001 * We must calculate which disk component to use (ccdisk), 1002 * and recalculate cbn to be the superblock relative to 1003 * the beginning of the component. This is typically done by 1004 * adding 'off' and ii->ii_startoff together. However, 'off' 1005 * must typically be divided by the number of components in 1006 * this interleave array to be properly convert it from a 1007 * CCD-relative logical superblock number to a 1008 * component-relative superblock number. 1009 */ 1010 if (ii->ii_ndisk == 1) { 1011 /* 1012 * When we have just one disk, it can't be a mirror 1013 * or a parity config. 1014 */ 1015 ccdisk = ii->ii_index[0]; 1016 cbn = ii->ii_startoff + off; 1017 } else { 1018 if (cs->sc_cflags & CCDF_MIRROR) { 1019 /* 1020 * We have forced a uniform mapping, resulting 1021 * in a single interleave array. We double 1022 * up on the first half of the available 1023 * components and our mirror is in the second 1024 * half. This only works with a single 1025 * interleave array because doubling up 1026 * doubles the number of sectors, so there 1027 * cannot be another interleave array because 1028 * the next interleave array's calculations 1029 * would be off. 1030 */ 1031 int ndisk2 = ii->ii_ndisk / 2; 1032 ccdisk = ii->ii_index[off % ndisk2]; 1033 cbn = ii->ii_startoff + off / ndisk2; 1034 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1035 } else if (cs->sc_cflags & CCDF_PARITY) { 1036 /* 1037 * XXX not implemented yet 1038 */ 1039 int ndisk2 = ii->ii_ndisk - 1; 1040 ccdisk = ii->ii_index[off % ndisk2]; 1041 cbn = ii->ii_startoff + off / ndisk2; 1042 if (cbn % ii->ii_ndisk <= ccdisk) 1043 ccdisk++; 1044 } else { 1045 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1046 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1047 } 1048 } 1049 1050 ci = &cs->sc_cinfo[ccdisk]; 1051 1052 /* 1053 * Convert cbn from a superblock to a normal block so it 1054 * can be used to calculate (along with cboff) the normal 1055 * block index into this particular disk. 1056 */ 1057 cbn *= cs->sc_ileave; 1058 } 1059 1060 /* 1061 * Fill in the component buf structure. 1062 */ 1063 cbp = getccdbuf(NULL); 1064 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1065 cbp->cb_buf.bio_done = ccdiodone; 1066 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1067 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1068 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1069 cbp->cb_buf.bio_data = addr; 1070 if (cs->sc_ileave == 0) 1071 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1072 else 1073 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1074 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1075 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1076 1077 /* 1078 * context for ccdiodone 1079 */ 1080 cbp->cb_obp = bp; 1081 cbp->cb_unit = cs->sc_unit; 1082 cbp->cb_comp = ci - cs->sc_cinfo; 1083 1084 #ifdef DEBUG 1085 if (ccddebug & CCDB_IO) 1086 printf(" dev %p(u%ld): cbp %p bn %jd addr %p bcnt %ld\n", 1087 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1088 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1089 cbp->cb_buf.bio_bcount); 1090 #endif 1091 cb[0] = cbp; 1092 1093 /* 1094 * Note: both I/O's setup when reading from mirror, but only one 1095 * will be executed. 1096 */ 1097 if (cs->sc_cflags & CCDF_MIRROR) { 1098 /* mirror, setup second I/O */ 1099 cbp = getccdbuf(cb[0]); 1100 cbp->cb_buf.bio_dev = ci2->ci_dev; 1101 cbp->cb_comp = ci2 - cs->sc_cinfo; 1102 cb[1] = cbp; 1103 /* link together the ccdbuf's and clear "mirror done" flag */ 1104 cb[0]->cb_mirror = cb[1]; 1105 cb[1]->cb_mirror = cb[0]; 1106 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1107 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1108 } 1109 } 1110 1111 static void 1112 ccdintr(struct ccd_s *cs, struct bio *bp) 1113 { 1114 #ifdef DEBUG 1115 if (ccddebug & CCDB_FOLLOW) 1116 printf("ccdintr(%p, %p)\n", cs, bp); 1117 #endif 1118 /* 1119 * Request is done for better or worse, wakeup the top half. 1120 */ 1121 if (bp->bio_flags & BIO_ERROR) 1122 bp->bio_resid = bp->bio_bcount; 1123 biofinish(bp, &cs->device_stats, 0); 1124 } 1125 1126 /* 1127 * Called at interrupt time. 1128 * Mark the component as done and if all components are done, 1129 * take a ccd interrupt. 1130 */ 1131 static void 1132 ccdiodone(struct bio *ibp) 1133 { 1134 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1135 struct bio *bp = cbp->cb_obp; 1136 int unit = cbp->cb_unit; 1137 int count, s; 1138 1139 s = splbio(); 1140 #ifdef DEBUG 1141 if (ccddebug & CCDB_FOLLOW) 1142 printf("ccdiodone(%p)\n", cbp); 1143 if (ccddebug & CCDB_IO) { 1144 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1145 bp, bp->bio_bcount, bp->bio_resid); 1146 printf(" dev %p(u%d), cbp %p bn %jd addr %p bcnt %ld\n", 1147 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1148 (intmax_t)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1149 cbp->cb_buf.bio_bcount); 1150 } 1151 #endif 1152 /* 1153 * If an error occured, report it. If this is a mirrored 1154 * configuration and the first of two possible reads, do not 1155 * set the error in the bp yet because the second read may 1156 * succeed. 1157 */ 1158 1159 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1160 const char *msg = ""; 1161 1162 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1163 (cbp->cb_buf.bio_cmd == BIO_READ) && 1164 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1165 /* 1166 * We will try our read on the other disk down 1167 * below, also reverse the default pick so if we 1168 * are doing a scan we do not keep hitting the 1169 * bad disk first. 1170 */ 1171 struct ccd_s *cs = ccdfind(unit); 1172 1173 msg = ", trying other disk"; 1174 cs->sc_pick = 1 - cs->sc_pick; 1175 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1176 } else { 1177 bp->bio_flags |= BIO_ERROR; 1178 bp->bio_error = cbp->cb_buf.bio_error ? 1179 cbp->cb_buf.bio_error : EIO; 1180 } 1181 printf("ccd%d: error %d on component %d block %jd " 1182 "(ccd block %jd)%s\n", unit, bp->bio_error, cbp->cb_comp, 1183 (intmax_t)cbp->cb_buf.bio_blkno, (intmax_t)bp->bio_blkno, 1184 msg); 1185 } 1186 1187 /* 1188 * Process mirror. If we are writing, I/O has been initiated on both 1189 * buffers and we fall through only after both are finished. 1190 * 1191 * If we are reading only one I/O is initiated at a time. If an 1192 * error occurs we initiate the second I/O and return, otherwise 1193 * we free the second I/O without initiating it. 1194 */ 1195 1196 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1197 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1198 /* 1199 * When writing, handshake with the second buffer 1200 * to determine when both are done. If both are not 1201 * done, return here. 1202 */ 1203 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1204 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1205 putccdbuf(cbp); 1206 splx(s); 1207 return; 1208 } 1209 } else { 1210 /* 1211 * When reading, either dispose of the second buffer 1212 * or initiate I/O on the second buffer if an error 1213 * occured with this one. 1214 */ 1215 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1216 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1217 cbp->cb_mirror->cb_pflags |= 1218 CCDPF_MIRROR_DONE; 1219 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1220 putccdbuf(cbp); 1221 splx(s); 1222 return; 1223 } else { 1224 putccdbuf(cbp->cb_mirror); 1225 /* fall through */ 1226 } 1227 } 1228 } 1229 } 1230 1231 /* 1232 * use bio_caller1 to determine how big the original request was rather 1233 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1234 * 1235 * XXX We check for an error, but we do not test the resid for an 1236 * aligned EOF condition. This may result in character & block 1237 * device access not recognizing EOF properly when read or written 1238 * sequentially, but will not effect filesystems. 1239 */ 1240 count = (long)cbp->cb_buf.bio_caller1; 1241 putccdbuf(cbp); 1242 1243 /* 1244 * If all done, "interrupt". 1245 */ 1246 bp->bio_resid -= count; 1247 if (bp->bio_resid < 0) 1248 panic("ccdiodone: count"); 1249 if (bp->bio_resid == 0) 1250 ccdintr(ccdfind(unit), bp); 1251 splx(s); 1252 } 1253 1254 static int 1255 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1256 { 1257 int unit = ccdunit(dev); 1258 int i, j, lookedup = 0, error = 0; 1259 int part, pmask, s; 1260 struct ccd_s *cs; 1261 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1262 char **cpp; 1263 struct vnode **vpp; 1264 1265 if (!IS_ALLOCATED(unit)) 1266 return (ENXIO); 1267 cs = ccdfind(unit); 1268 1269 switch (cmd) { 1270 case CCDIOCSET: 1271 if (IS_INITED(cs)) 1272 return (EBUSY); 1273 1274 if ((flag & FWRITE) == 0) 1275 return (EBADF); 1276 1277 if ((error = ccdlock(cs)) != 0) 1278 return (error); 1279 1280 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1281 return (EINVAL); 1282 1283 /* Fill in some important bits. */ 1284 cs->sc_ileave = ccio->ccio_ileave; 1285 if (cs->sc_ileave == 0 && 1286 ((ccio->ccio_flags & CCDF_MIRROR) || 1287 (ccio->ccio_flags & CCDF_PARITY))) { 1288 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1289 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1290 } 1291 if ((ccio->ccio_flags & CCDF_MIRROR) && 1292 (ccio->ccio_flags & CCDF_PARITY)) { 1293 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1294 ccio->ccio_flags &= ~CCDF_PARITY; 1295 } 1296 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1297 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1298 printf("ccd%d: mirror/parity forces uniform flag\n", 1299 unit); 1300 ccio->ccio_flags |= CCDF_UNIFORM; 1301 } 1302 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1303 1304 /* 1305 * Allocate space for and copy in the array of 1306 * componet pathnames and device numbers. 1307 */ 1308 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1309 M_DEVBUF, M_WAITOK); 1310 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1311 M_DEVBUF, M_WAITOK); 1312 1313 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1314 ccio->ccio_ndisks * sizeof(char **)); 1315 if (error) { 1316 free(vpp, M_DEVBUF); 1317 free(cpp, M_DEVBUF); 1318 ccdunlock(cs); 1319 return (error); 1320 } 1321 1322 #ifdef DEBUG 1323 if (ccddebug & CCDB_INIT) 1324 for (i = 0; i < ccio->ccio_ndisks; ++i) 1325 printf("ccdioctl: component %d: %p\n", 1326 i, cpp[i]); 1327 #endif 1328 1329 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1330 #ifdef DEBUG 1331 if (ccddebug & CCDB_INIT) 1332 printf("ccdioctl: lookedup = %d\n", lookedup); 1333 #endif 1334 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1335 for (j = 0; j < lookedup; ++j) 1336 (void)vn_close(vpp[j], FREAD|FWRITE, 1337 td->td_ucred, td); 1338 free(vpp, M_DEVBUF); 1339 free(cpp, M_DEVBUF); 1340 ccdunlock(cs); 1341 return (error); 1342 } 1343 ++lookedup; 1344 } 1345 cs->sc_vpp = vpp; 1346 cs->sc_nccdisks = ccio->ccio_ndisks; 1347 1348 /* 1349 * Initialize the ccd. Fills in the softc for us. 1350 */ 1351 if ((error = ccdinit(cs, cpp, td)) != 0) { 1352 for (j = 0; j < lookedup; ++j) 1353 (void)vn_close(vpp[j], FREAD|FWRITE, 1354 td->td_ucred, td); 1355 /* 1356 * We can't ccddestroy() cs just yet, because nothing 1357 * prevents user-level app to do another ioctl() 1358 * without closing the device first, therefore 1359 * declare unit null and void and let ccdclose() 1360 * destroy it when it is safe to do so. 1361 */ 1362 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1363 free(vpp, M_DEVBUF); 1364 free(cpp, M_DEVBUF); 1365 ccdunlock(cs); 1366 return (error); 1367 } 1368 1369 /* 1370 * The ccd has been successfully initialized, so 1371 * we can place it into the array and read the disklabel. 1372 */ 1373 ccio->ccio_unit = unit; 1374 ccio->ccio_size = cs->sc_size; 1375 ccdgetdisklabel(dev); 1376 1377 ccdunlock(cs); 1378 1379 break; 1380 1381 case CCDIOCCLR: 1382 if (!IS_INITED(cs)) 1383 return (ENXIO); 1384 1385 if ((flag & FWRITE) == 0) 1386 return (EBADF); 1387 1388 if ((error = ccdlock(cs)) != 0) 1389 return (error); 1390 1391 /* Don't unconfigure if any other partitions are open */ 1392 part = ccdpart(dev); 1393 pmask = (1 << part); 1394 if ((cs->sc_openmask & ~pmask)) { 1395 ccdunlock(cs); 1396 return (EBUSY); 1397 } 1398 1399 /* Declare unit null and void (reset all flags) */ 1400 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1401 1402 /* Close the components and free their pathnames. */ 1403 for (i = 0; i < cs->sc_nccdisks; ++i) { 1404 /* 1405 * XXX: this close could potentially fail and 1406 * cause Bad Things. Maybe we need to force 1407 * the close to happen? 1408 */ 1409 #ifdef DEBUG 1410 if (ccddebug & CCDB_VNODE) 1411 vprint("CCDIOCCLR: vnode info", 1412 cs->sc_cinfo[i].ci_vp); 1413 #endif 1414 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1415 td->td_ucred, td); 1416 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1417 } 1418 1419 /* Free interleave index. */ 1420 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1421 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1422 1423 /* Free component info and interleave table. */ 1424 free(cs->sc_cinfo, M_DEVBUF); 1425 free(cs->sc_itable, M_DEVBUF); 1426 free(cs->sc_vpp, M_DEVBUF); 1427 1428 /* And remove the devstat entry. */ 1429 devstat_remove_entry(&cs->device_stats); 1430 1431 /* This must be atomic. */ 1432 s = splhigh(); 1433 ccdunlock(cs); 1434 splx(s); 1435 1436 break; 1437 1438 case CCDCONFINFO: 1439 { 1440 int ninit = 0; 1441 struct ccdconf *conf = (struct ccdconf *)data; 1442 struct ccd_s *tmpcs; 1443 struct ccd_s *ubuf = conf->buffer; 1444 1445 /* XXX: LOCK(unique unit numbers) */ 1446 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1447 if (IS_INITED(tmpcs)) 1448 ninit++; 1449 1450 if (conf->size == 0) { 1451 conf->size = sizeof(struct ccd_s) * ninit; 1452 break; 1453 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1454 (conf->size % sizeof(struct ccd_s) != 0)) { 1455 /* XXX: UNLOCK(unique unit numbers) */ 1456 return (EINVAL); 1457 } 1458 1459 ubuf += ninit; 1460 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1461 if (!IS_INITED(tmpcs)) 1462 continue; 1463 error = copyout(tmpcs, --ubuf, 1464 sizeof(struct ccd_s)); 1465 if (error != 0) 1466 /* XXX: UNLOCK(unique unit numbers) */ 1467 return (error); 1468 } 1469 /* XXX: UNLOCK(unique unit numbers) */ 1470 } 1471 break; 1472 1473 case CCDCPPINFO: 1474 if (!IS_INITED(cs)) 1475 return (ENXIO); 1476 1477 { 1478 int len = 0; 1479 struct ccdcpps *cpps = (struct ccdcpps *)data; 1480 char *ubuf = cpps->buffer; 1481 1482 1483 for (i = 0; i < cs->sc_nccdisks; ++i) 1484 len += cs->sc_cinfo[i].ci_pathlen; 1485 1486 if (cpps->size == 0) { 1487 cpps->size = len; 1488 break; 1489 } else if (cpps->size != len) { 1490 return (EINVAL); 1491 } 1492 1493 for (i = 0; i < cs->sc_nccdisks; ++i) { 1494 len = cs->sc_cinfo[i].ci_pathlen; 1495 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1496 len); 1497 if (error != 0) 1498 return (error); 1499 ubuf += len; 1500 } 1501 } 1502 break; 1503 1504 case DIOCGDINFO: 1505 if (!IS_INITED(cs)) 1506 return (ENXIO); 1507 1508 *(struct disklabel *)data = cs->sc_label; 1509 break; 1510 1511 case DIOCWDINFO: 1512 case DIOCSDINFO: 1513 if (!IS_INITED(cs)) 1514 return (ENXIO); 1515 1516 if ((flag & FWRITE) == 0) 1517 return (EBADF); 1518 1519 if ((error = ccdlock(cs)) != 0) 1520 return (error); 1521 1522 cs->sc_flags |= CCDF_LABELLING; 1523 1524 error = setdisklabel(&cs->sc_label, 1525 (struct disklabel *)data, 0); 1526 if (error == 0) { 1527 if (cmd == DIOCWDINFO) 1528 error = writedisklabel(CCDLABELDEV(dev), 1529 &cs->sc_label); 1530 } 1531 1532 cs->sc_flags &= ~CCDF_LABELLING; 1533 1534 ccdunlock(cs); 1535 1536 if (error) 1537 return (error); 1538 break; 1539 1540 case DIOCWLABEL: 1541 if (!IS_INITED(cs)) 1542 return (ENXIO); 1543 1544 if ((flag & FWRITE) == 0) 1545 return (EBADF); 1546 if (*(int *)data != 0) 1547 cs->sc_flags |= CCDF_WLABEL; 1548 else 1549 cs->sc_flags &= ~CCDF_WLABEL; 1550 break; 1551 1552 default: 1553 return (ENOTTY); 1554 } 1555 1556 return (0); 1557 } 1558 1559 static int 1560 ccdsize(dev_t dev) 1561 { 1562 struct ccd_s *cs; 1563 int part, size; 1564 1565 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1566 return (-1); 1567 1568 cs = ccdfind(ccdunit(dev)); 1569 part = ccdpart(dev); 1570 1571 if (!IS_INITED(cs)) 1572 return (-1); 1573 1574 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1575 size = -1; 1576 else 1577 size = cs->sc_label.d_partitions[part].p_size; 1578 1579 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1580 return (-1); 1581 1582 return (size); 1583 } 1584 1585 /* 1586 * Lookup the provided name in the filesystem. If the file exists, 1587 * is a valid block device, and isn't being used by anyone else, 1588 * set *vpp to the file's vnode. 1589 */ 1590 static int 1591 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1592 { 1593 struct nameidata nd; 1594 struct vnode *vp; 1595 int error, flags; 1596 1597 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1598 flags = FREAD | FWRITE; 1599 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1600 #ifdef DEBUG 1601 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1602 printf("ccdlookup: vn_open error = %d\n", error); 1603 #endif 1604 return (error); 1605 } 1606 vp = nd.ni_vp; 1607 1608 if (vrefcnt(vp) > 1) { 1609 error = EBUSY; 1610 goto bad; 1611 } 1612 1613 if (!vn_isdisk(vp, &error)) 1614 goto bad; 1615 1616 #ifdef DEBUG 1617 if (ccddebug & CCDB_VNODE) 1618 vprint("ccdlookup: vnode info", vp); 1619 #endif 1620 1621 VOP_UNLOCK(vp, 0, td); 1622 NDFREE(&nd, NDF_ONLY_PNBUF); 1623 *vpp = vp; 1624 return (0); 1625 bad: 1626 VOP_UNLOCK(vp, 0, td); 1627 NDFREE(&nd, NDF_ONLY_PNBUF); 1628 /* vn_close does vrele() for vp */ 1629 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1630 return (error); 1631 } 1632 1633 /* 1634 * Read the disklabel from the ccd. If one is not present, fake one 1635 * up. 1636 */ 1637 static void 1638 ccdgetdisklabel(dev_t dev) 1639 { 1640 int unit = ccdunit(dev); 1641 struct ccd_s *cs = ccdfind(unit); 1642 char *errstring; 1643 struct disklabel *lp = &cs->sc_label; 1644 struct ccdgeom *ccg = &cs->sc_geom; 1645 1646 bzero(lp, sizeof(*lp)); 1647 1648 lp->d_secperunit = cs->sc_size; 1649 lp->d_secsize = ccg->ccg_secsize; 1650 lp->d_nsectors = ccg->ccg_nsectors; 1651 lp->d_ntracks = ccg->ccg_ntracks; 1652 lp->d_ncylinders = ccg->ccg_ncylinders; 1653 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1654 1655 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1656 lp->d_type = DTYPE_CCD; 1657 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1658 lp->d_rpm = 3600; 1659 lp->d_interleave = 1; 1660 lp->d_flags = 0; 1661 1662 lp->d_partitions[RAW_PART].p_offset = 0; 1663 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1664 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1665 lp->d_npartitions = RAW_PART + 1; 1666 1667 lp->d_bbsize = BBSIZE; /* XXX */ 1668 lp->d_sbsize = 0; 1669 1670 lp->d_magic = DISKMAGIC; 1671 lp->d_magic2 = DISKMAGIC; 1672 lp->d_checksum = dkcksum(&cs->sc_label); 1673 1674 /* 1675 * Call the generic disklabel extraction routine. 1676 */ 1677 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1678 if (errstring != NULL) 1679 ccdmakedisklabel(cs); 1680 1681 #ifdef DEBUG 1682 /* It's actually extremely common to have unlabeled ccds. */ 1683 if (ccddebug & CCDB_LABEL) 1684 if (errstring != NULL) 1685 printf("ccd%d: %s\n", unit, errstring); 1686 #endif 1687 } 1688 1689 /* 1690 * Take care of things one might want to take care of in the event 1691 * that a disklabel isn't present. 1692 */ 1693 static void 1694 ccdmakedisklabel(struct ccd_s *cs) 1695 { 1696 struct disklabel *lp = &cs->sc_label; 1697 1698 /* 1699 * For historical reasons, if there's no disklabel present 1700 * the raw partition must be marked FS_BSDFFS. 1701 */ 1702 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1703 1704 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1705 } 1706 1707 /* 1708 * Wait interruptibly for an exclusive lock. 1709 * 1710 * XXX 1711 * Several drivers do this; it should be abstracted and made MP-safe. 1712 */ 1713 static int 1714 ccdlock(struct ccd_s *cs) 1715 { 1716 int error; 1717 1718 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1719 cs->sc_flags |= CCDF_WANTED; 1720 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1721 return (error); 1722 } 1723 cs->sc_flags |= CCDF_LOCKED; 1724 return (0); 1725 } 1726 1727 /* 1728 * Unlock and wake up any waiters. 1729 */ 1730 static void 1731 ccdunlock(struct ccd_s *cs) 1732 { 1733 1734 cs->sc_flags &= ~CCDF_LOCKED; 1735 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1736 cs->sc_flags &= ~CCDF_WANTED; 1737 wakeup(cs); 1738 } 1739 } 1740 1741 #ifdef DEBUG 1742 static void 1743 printiinfo(struct ccdiinfo *ii) 1744 { 1745 int ix, i; 1746 1747 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1748 printf(" itab[%d]: #dk %d sblk %lld soff %lld", 1749 ix, ii->ii_ndisk, (long long)ii->ii_startblk, 1750 (long long)ii->ii_startoff); 1751 for (i = 0; i < ii->ii_ndisk; i++) 1752 printf(" %d", ii->ii_index[i]); 1753 printf("\n"); 1754 } 1755 } 1756 #endif 1757