1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disk.h> 102 #include <sys/devicestat.h> 103 #include <sys/fcntl.h> 104 #include <sys/vnode.h> 105 106 #include <sys/ccdvar.h> 107 108 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 109 110 #if defined(CCDDEBUG) && !defined(DEBUG) 111 #define DEBUG 112 #endif 113 114 #ifdef DEBUG 115 #define CCDB_FOLLOW 0x01 116 #define CCDB_INIT 0x02 117 #define CCDB_IO 0x04 118 #define CCDB_LABEL 0x08 119 #define CCDB_VNODE 0x10 120 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 121 CCDB_VNODE; 122 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 123 #endif 124 125 #define ccdunit(x) dkunit(x) 126 #define ccdpart(x) dkpart(x) 127 128 /* 129 This is how mirroring works (only writes are special): 130 131 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 132 linked together by the cb_mirror field. "cb_pflags & 133 CCDPF_MIRROR_DONE" is set to 0 on both of them. 134 135 When a component returns to ccdiodone(), it checks if "cb_pflags & 136 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 137 flag and returns. If it is, it means its partner has already 138 returned, so it will go to the regular cleanup. 139 140 */ 141 142 struct ccdbuf { 143 struct bio cb_buf; /* new I/O buf */ 144 struct bio *cb_obp; /* ptr. to original I/O buf */ 145 struct ccdbuf *cb_freenext; /* free list link */ 146 int cb_unit; /* target unit */ 147 int cb_comp; /* target component */ 148 int cb_pflags; /* mirror/parity status flag */ 149 struct ccdbuf *cb_mirror; /* mirror counterpart */ 150 }; 151 152 /* bits in cb_pflags */ 153 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 154 155 #define CCDLABELDEV(dev) \ 156 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 157 158 /* convinient macros for often-used statements */ 159 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 160 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 161 162 static d_open_t ccdopen; 163 static d_close_t ccdclose; 164 static d_strategy_t ccdstrategy; 165 static d_ioctl_t ccdioctl; 166 static d_psize_t ccdsize; 167 168 #define NCCDFREEHIWAT 16 169 170 #define CDEV_MAJOR 74 171 172 static struct cdevsw ccd_cdevsw = { 173 /* open */ ccdopen, 174 /* close */ ccdclose, 175 /* read */ physread, 176 /* write */ physwrite, 177 /* ioctl */ ccdioctl, 178 /* poll */ nopoll, 179 /* mmap */ nommap, 180 /* strategy */ ccdstrategy, 181 /* name */ "ccd", 182 /* maj */ CDEV_MAJOR, 183 /* dump */ nodump, 184 /* psize */ ccdsize, 185 /* flags */ D_DISK, 186 }; 187 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 188 189 static struct ccd_s *ccdfind(int); 190 static struct ccd_s *ccdnew(int); 191 static int ccddestroy(struct ccd_s *, struct proc *); 192 193 /* called during module initialization */ 194 static void ccdattach(void); 195 static int ccd_modevent(module_t, int, void *); 196 197 /* called by biodone() at interrupt time */ 198 static void ccdiodone(struct bio *bp); 199 200 static void ccdstart(struct ccd_s *, struct bio *); 201 static void ccdinterleave(struct ccd_s *, int); 202 static void ccdintr(struct ccd_s *, struct bio *); 203 static int ccdinit(struct ccd_s *, char **, struct thread *); 204 static int ccdlookup(char *, struct thread *p, struct vnode **); 205 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 206 struct bio *, daddr_t, caddr_t, long); 207 static void ccdgetdisklabel(dev_t); 208 static void ccdmakedisklabel(struct ccd_s *); 209 static int ccdlock(struct ccd_s *); 210 static void ccdunlock(struct ccd_s *); 211 212 #ifdef DEBUG 213 static void printiinfo(struct ccdiinfo *); 214 #endif 215 216 /* Non-private for the benefit of libkvm. */ 217 struct ccdbuf *ccdfreebufs; 218 static int numccdfreebufs; 219 220 /* 221 * getccdbuf() - Allocate and zero a ccd buffer. 222 * 223 * This routine is called at splbio(). 224 */ 225 226 static __inline 227 struct ccdbuf * 228 getccdbuf(struct ccdbuf *cpy) 229 { 230 struct ccdbuf *cbp; 231 232 /* 233 * Allocate from freelist or malloc as necessary 234 */ 235 if ((cbp = ccdfreebufs) != NULL) { 236 ccdfreebufs = cbp->cb_freenext; 237 --numccdfreebufs; 238 } else { 239 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 240 } 241 242 /* 243 * Used by mirroring code 244 */ 245 if (cpy) 246 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 247 else 248 bzero(cbp, sizeof(struct ccdbuf)); 249 250 /* 251 * independant struct bio initialization 252 */ 253 254 return(cbp); 255 } 256 257 /* 258 * putccdbuf() - Free a ccd buffer. 259 * 260 * This routine is called at splbio(). 261 */ 262 263 static __inline 264 void 265 putccdbuf(struct ccdbuf *cbp) 266 { 267 268 if (numccdfreebufs < NCCDFREEHIWAT) { 269 cbp->cb_freenext = ccdfreebufs; 270 ccdfreebufs = cbp; 271 ++numccdfreebufs; 272 } else { 273 free((caddr_t)cbp, M_DEVBUF); 274 } 275 } 276 277 278 /* 279 * Number of blocks to untouched in front of a component partition. 280 * This is to avoid violating its disklabel area when it starts at the 281 * beginning of the slice. 282 */ 283 #if !defined(CCD_OFFSET) 284 #define CCD_OFFSET 16 285 #endif 286 287 static struct ccd_s * 288 ccdfind(int unit) 289 { 290 struct ccd_s *sc = NULL; 291 292 /* XXX: LOCK(unique unit numbers) */ 293 LIST_FOREACH(sc, &ccd_softc_list, list) { 294 if (sc->sc_unit == unit) 295 break; 296 } 297 /* XXX: UNLOCK(unique unit numbers) */ 298 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 299 } 300 301 static struct ccd_s * 302 ccdnew(int unit) 303 { 304 struct ccd_s *sc; 305 306 /* XXX: LOCK(unique unit numbers) */ 307 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 308 return (NULL); 309 310 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 311 sc->sc_unit = unit; 312 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 313 /* XXX: UNLOCK(unique unit numbers) */ 314 return (sc); 315 } 316 317 static int 318 ccddestroy(struct ccd_s *sc, struct proc *p) 319 { 320 321 /* XXX: LOCK(unique unit numbers) */ 322 LIST_REMOVE(sc, list); 323 /* XXX: UNLOCK(unique unit numbers) */ 324 FREE(sc, M_CCD); 325 return (0); 326 } 327 328 static void 329 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 330 { 331 int i, u; 332 char *s; 333 334 if (*dev != NODEV) 335 return; 336 i = dev_stdclone(name, &s, "ccd", &u); 337 if (i != 2) 338 return; 339 if (*s < 'a' || *s > 'h') 340 return; 341 if (s[1] != '\0') 342 return; 343 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 344 UID_ROOT, GID_OPERATOR, 0640, name); 345 } 346 347 /* 348 * Called by main() during pseudo-device attachment. All we need 349 * to do is to add devsw entries. 350 */ 351 static void 352 ccdattach() 353 { 354 355 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 356 } 357 358 static int 359 ccd_modevent(module_t mod, int type, void *data) 360 { 361 int error = 0; 362 363 switch (type) { 364 case MOD_LOAD: 365 ccdattach(); 366 break; 367 368 case MOD_UNLOAD: 369 printf("ccd0: Unload not supported!\n"); 370 error = EOPNOTSUPP; 371 break; 372 373 case MOD_SHUTDOWN: 374 break; 375 376 default: 377 error = EOPNOTSUPP; 378 } 379 return (error); 380 } 381 382 DEV_MODULE(ccd, ccd_modevent, NULL); 383 384 static int 385 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 386 { 387 struct ccdcinfo *ci = NULL; /* XXX */ 388 size_t size; 389 int ix; 390 struct vnode *vp; 391 size_t minsize; 392 int maxsecsize; 393 struct ccdgeom *ccg = &cs->sc_geom; 394 char *tmppath = NULL; 395 int error = 0; 396 off_t mediasize; 397 u_int sectorsize; 398 399 #ifdef DEBUG 400 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 401 printf("ccdinit: unit %d\n", cs->sc_unit); 402 #endif 403 404 cs->sc_size = 0; 405 406 /* Allocate space for the component info. */ 407 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 408 M_DEVBUF, M_WAITOK); 409 410 /* 411 * Verify that each component piece exists and record 412 * relevant information about it. 413 */ 414 maxsecsize = 0; 415 minsize = 0; 416 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); 417 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 418 vp = cs->sc_vpp[ix]; 419 ci = &cs->sc_cinfo[ix]; 420 ci->ci_vp = vp; 421 422 /* 423 * Copy in the pathname of the component. 424 */ 425 if ((error = copyinstr(cpaths[ix], tmppath, 426 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 427 #ifdef DEBUG 428 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 429 printf("ccd%d: can't copy path, error = %d\n", 430 cs->sc_unit, error); 431 #endif 432 goto fail; 433 } 434 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 435 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 436 437 ci->ci_dev = vn_todev(vp); 438 439 /* 440 * Get partition information for the component. 441 */ 442 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 443 FREAD, td->td_ucred, td); 444 if (error != 0) { 445 #ifdef DEBUG 446 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 447 printf("ccd%d: %s: ioctl failed, error = %d\n", 448 cs->sc_unit, ci->ci_path, error); 449 #endif 450 goto fail; 451 } 452 /* 453 * Get partition information for the component. 454 */ 455 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 456 FREAD, td->td_ucred, td); 457 if (error != 0) { 458 #ifdef DEBUG 459 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 460 printf("ccd%d: %s: ioctl failed, error = %d\n", 461 cs->sc_unit, ci->ci_path, error); 462 #endif 463 goto fail; 464 } 465 if (sectorsize > maxsecsize) 466 maxsecsize = sectorsize; 467 size = mediasize / DEV_BSIZE - CCD_OFFSET; 468 469 /* 470 * Calculate the size, truncating to an interleave 471 * boundary if necessary. 472 */ 473 474 if (cs->sc_ileave > 1) 475 size -= size % cs->sc_ileave; 476 477 if (size == 0) { 478 #ifdef DEBUG 479 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 480 printf("ccd%d: %s: size == 0\n", 481 cs->sc_unit, ci->ci_path); 482 #endif 483 error = ENODEV; 484 goto fail; 485 } 486 487 if (minsize == 0 || size < minsize) 488 minsize = size; 489 ci->ci_size = size; 490 cs->sc_size += size; 491 } 492 493 free(tmppath, M_DEVBUF); 494 tmppath = NULL; 495 496 /* 497 * Don't allow the interleave to be smaller than 498 * the biggest component sector. 499 */ 500 if ((cs->sc_ileave > 0) && 501 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 502 #ifdef DEBUG 503 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 504 printf("ccd%d: interleave must be at least %d\n", 505 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 506 #endif 507 error = EINVAL; 508 goto fail; 509 } 510 511 /* 512 * If uniform interleave is desired set all sizes to that of 513 * the smallest component. This will guarentee that a single 514 * interleave table is generated. 515 * 516 * Lost space must be taken into account when calculating the 517 * overall size. Half the space is lost when CCDF_MIRROR is 518 * specified. One disk is lost when CCDF_PARITY is specified. 519 */ 520 if (cs->sc_flags & CCDF_UNIFORM) { 521 for (ci = cs->sc_cinfo; 522 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 523 ci->ci_size = minsize; 524 } 525 if (cs->sc_flags & CCDF_MIRROR) { 526 /* 527 * Check to see if an even number of components 528 * have been specified. The interleave must also 529 * be non-zero in order for us to be able to 530 * guarentee the topology. 531 */ 532 if (cs->sc_nccdisks % 2) { 533 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 534 error = EINVAL; 535 goto fail; 536 } 537 if (cs->sc_ileave == 0) { 538 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 539 error = EINVAL; 540 goto fail; 541 } 542 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 543 } else if (cs->sc_flags & CCDF_PARITY) { 544 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 545 } else { 546 if (cs->sc_ileave == 0) { 547 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 548 error = EINVAL; 549 goto fail; 550 } 551 cs->sc_size = cs->sc_nccdisks * minsize; 552 } 553 } 554 555 /* 556 * Construct the interleave table. 557 */ 558 ccdinterleave(cs, cs->sc_unit); 559 560 /* 561 * Create pseudo-geometry based on 1MB cylinders. It's 562 * pretty close. 563 */ 564 ccg->ccg_secsize = maxsecsize; 565 ccg->ccg_ntracks = 1; 566 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 567 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 568 569 /* 570 * Add an devstat entry for this device. 571 */ 572 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 573 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 574 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 575 DEVSTAT_PRIORITY_ARRAY); 576 577 cs->sc_flags |= CCDF_INITED; 578 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 579 return (0); 580 fail: 581 while (ci > cs->sc_cinfo) { 582 ci--; 583 free(ci->ci_path, M_DEVBUF); 584 } 585 if (tmppath != NULL) 586 free(tmppath, M_DEVBUF); 587 free(cs->sc_cinfo, M_DEVBUF); 588 return (error); 589 } 590 591 static void 592 ccdinterleave(struct ccd_s *cs, int unit) 593 { 594 struct ccdcinfo *ci, *smallci; 595 struct ccdiinfo *ii; 596 daddr_t bn, lbn; 597 int ix; 598 u_long size; 599 600 #ifdef DEBUG 601 if (ccddebug & CCDB_INIT) 602 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 603 #endif 604 605 /* 606 * Allocate an interleave table. The worst case occurs when each 607 * of N disks is of a different size, resulting in N interleave 608 * tables. 609 * 610 * Chances are this is too big, but we don't care. 611 */ 612 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 613 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 614 M_WAITOK | M_ZERO); 615 616 /* 617 * Trivial case: no interleave (actually interleave of disk size). 618 * Each table entry represents a single component in its entirety. 619 * 620 * An interleave of 0 may not be used with a mirror or parity setup. 621 */ 622 if (cs->sc_ileave == 0) { 623 bn = 0; 624 ii = cs->sc_itable; 625 626 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 627 /* Allocate space for ii_index. */ 628 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 629 ii->ii_ndisk = 1; 630 ii->ii_startblk = bn; 631 ii->ii_startoff = 0; 632 ii->ii_index[0] = ix; 633 bn += cs->sc_cinfo[ix].ci_size; 634 ii++; 635 } 636 ii->ii_ndisk = 0; 637 #ifdef DEBUG 638 if (ccddebug & CCDB_INIT) 639 printiinfo(cs->sc_itable); 640 #endif 641 return; 642 } 643 644 /* 645 * The following isn't fast or pretty; it doesn't have to be. 646 */ 647 size = 0; 648 bn = lbn = 0; 649 for (ii = cs->sc_itable; ; ii++) { 650 /* 651 * Allocate space for ii_index. We might allocate more then 652 * we use. 653 */ 654 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 655 M_DEVBUF, M_WAITOK); 656 657 /* 658 * Locate the smallest of the remaining components 659 */ 660 smallci = NULL; 661 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 662 ci++) { 663 if (ci->ci_size > size && 664 (smallci == NULL || 665 ci->ci_size < smallci->ci_size)) { 666 smallci = ci; 667 } 668 } 669 670 /* 671 * Nobody left, all done 672 */ 673 if (smallci == NULL) { 674 ii->ii_ndisk = 0; 675 break; 676 } 677 678 /* 679 * Record starting logical block using an sc_ileave blocksize. 680 */ 681 ii->ii_startblk = bn / cs->sc_ileave; 682 683 /* 684 * Record starting comopnent block using an sc_ileave 685 * blocksize. This value is relative to the beginning of 686 * a component disk. 687 */ 688 ii->ii_startoff = lbn; 689 690 /* 691 * Determine how many disks take part in this interleave 692 * and record their indices. 693 */ 694 ix = 0; 695 for (ci = cs->sc_cinfo; 696 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 697 if (ci->ci_size >= smallci->ci_size) { 698 ii->ii_index[ix++] = ci - cs->sc_cinfo; 699 } 700 } 701 ii->ii_ndisk = ix; 702 bn += ix * (smallci->ci_size - size); 703 lbn = smallci->ci_size / cs->sc_ileave; 704 size = smallci->ci_size; 705 } 706 #ifdef DEBUG 707 if (ccddebug & CCDB_INIT) 708 printiinfo(cs->sc_itable); 709 #endif 710 } 711 712 /* ARGSUSED */ 713 static int 714 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 715 { 716 int unit = ccdunit(dev); 717 struct ccd_s *cs; 718 struct disklabel *lp; 719 int error = 0, part, pmask; 720 721 #ifdef DEBUG 722 if (ccddebug & CCDB_FOLLOW) 723 printf("ccdopen(%p, %x)\n", dev, flags); 724 #endif 725 726 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 727 728 if ((error = ccdlock(cs)) != 0) 729 return (error); 730 731 lp = &cs->sc_label; 732 733 part = ccdpart(dev); 734 pmask = (1 << part); 735 736 /* 737 * If we're initialized, check to see if there are any other 738 * open partitions. If not, then it's safe to update 739 * the in-core disklabel. 740 */ 741 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 742 ccdgetdisklabel(dev); 743 744 /* Check that the partition exists. */ 745 if (part != RAW_PART && ((part >= lp->d_npartitions) || 746 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 747 error = ENXIO; 748 goto done; 749 } 750 751 cs->sc_openmask |= pmask; 752 done: 753 ccdunlock(cs); 754 return (0); 755 } 756 757 /* ARGSUSED */ 758 static int 759 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 760 { 761 int unit = ccdunit(dev); 762 struct ccd_s *cs; 763 int error = 0, part; 764 765 #ifdef DEBUG 766 if (ccddebug & CCDB_FOLLOW) 767 printf("ccdclose(%p, %x)\n", dev, flags); 768 #endif 769 770 if (!IS_ALLOCATED(unit)) 771 return (ENXIO); 772 cs = ccdfind(unit); 773 774 if ((error = ccdlock(cs)) != 0) 775 return (error); 776 777 part = ccdpart(dev); 778 779 /* ...that much closer to allowing unconfiguration... */ 780 cs->sc_openmask &= ~(1 << part); 781 /* collect "garbage" if possible */ 782 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 783 ccddestroy(cs, td->td_proc); 784 else 785 ccdunlock(cs); 786 return (0); 787 } 788 789 static void 790 ccdstrategy(struct bio *bp) 791 { 792 int unit = ccdunit(bp->bio_dev); 793 struct ccd_s *cs = ccdfind(unit); 794 int s; 795 int wlabel; 796 struct disklabel *lp; 797 798 #ifdef DEBUG 799 if (ccddebug & CCDB_FOLLOW) 800 printf("ccdstrategy(%p): unit %d\n", bp, unit); 801 #endif 802 if (!IS_INITED(cs)) { 803 biofinish(bp, NULL, ENXIO); 804 return; 805 } 806 807 /* If it's a nil transfer, wake up the top half now. */ 808 if (bp->bio_bcount == 0) { 809 biodone(bp); 810 return; 811 } 812 813 lp = &cs->sc_label; 814 815 /* 816 * Do bounds checking and adjust transfer. If there's an 817 * error, the bounds check will flag that for us. 818 */ 819 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 820 if (ccdpart(bp->bio_dev) != RAW_PART) { 821 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 822 biodone(bp); 823 return; 824 } 825 } else { 826 int pbn; /* in sc_secsize chunks */ 827 long sz; /* in sc_secsize chunks */ 828 829 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 830 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 831 832 /* 833 * If out of bounds return an error. If at the EOF point, 834 * simply read or write less. 835 */ 836 837 if (pbn < 0 || pbn >= cs->sc_size) { 838 bp->bio_resid = bp->bio_bcount; 839 if (pbn != cs->sc_size) 840 biofinish(bp, NULL, EINVAL); 841 else 842 biodone(bp); 843 return; 844 } 845 846 /* 847 * If the request crosses EOF, truncate the request. 848 */ 849 if (pbn + sz > cs->sc_size) { 850 bp->bio_bcount = (cs->sc_size - pbn) * 851 cs->sc_geom.ccg_secsize; 852 } 853 } 854 855 bp->bio_resid = bp->bio_bcount; 856 857 /* 858 * "Start" the unit. 859 */ 860 s = splbio(); 861 ccdstart(cs, bp); 862 splx(s); 863 return; 864 } 865 866 static void 867 ccdstart(struct ccd_s *cs, struct bio *bp) 868 { 869 long bcount, rcount; 870 struct ccdbuf *cbp[4]; 871 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 872 caddr_t addr; 873 daddr_t bn; 874 struct partition *pp; 875 876 #ifdef DEBUG 877 if (ccddebug & CCDB_FOLLOW) 878 printf("ccdstart(%p, %p)\n", cs, bp); 879 #endif 880 881 /* Record the transaction start */ 882 devstat_start_transaction(&cs->device_stats); 883 884 /* 885 * Translate the partition-relative block number to an absolute. 886 */ 887 bn = bp->bio_blkno; 888 if (ccdpart(bp->bio_dev) != RAW_PART) { 889 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 890 bn += pp->p_offset; 891 } 892 893 /* 894 * Allocate component buffers and fire off the requests 895 */ 896 addr = bp->bio_data; 897 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 898 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 899 rcount = cbp[0]->cb_buf.bio_bcount; 900 901 if (cs->sc_cflags & CCDF_MIRROR) { 902 /* 903 * Mirroring. Writes go to both disks, reads are 904 * taken from whichever disk seems most appropriate. 905 * 906 * We attempt to localize reads to the disk whos arm 907 * is nearest the read request. We ignore seeks due 908 * to writes when making this determination and we 909 * also try to avoid hogging. 910 */ 911 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 912 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 913 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 914 } else { 915 int pick = cs->sc_pick; 916 daddr_t range = cs->sc_size / 16; 917 918 if (bn < cs->sc_blk[pick] - range || 919 bn > cs->sc_blk[pick] + range 920 ) { 921 cs->sc_pick = pick = 1 - pick; 922 } 923 cs->sc_blk[pick] = bn + btodb(rcount); 924 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 925 } 926 } else { 927 /* 928 * Not mirroring 929 */ 930 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 931 } 932 bn += btodb(rcount); 933 addr += rcount; 934 } 935 } 936 937 /* 938 * Build a component buffer header. 939 */ 940 static void 941 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 942 { 943 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 944 struct ccdbuf *cbp; 945 daddr_t cbn, cboff; 946 off_t cbc; 947 948 #ifdef DEBUG 949 if (ccddebug & CCDB_IO) 950 printf("ccdbuffer(%p, %p, %lld, %p, %ld)\n", 951 (void *)cs, (void *)bp, (long long)bn, (void *)addr, 952 bcount); 953 #endif 954 /* 955 * Determine which component bn falls in. 956 */ 957 cbn = bn; 958 cboff = 0; 959 960 if (cs->sc_ileave == 0) { 961 /* 962 * Serially concatenated and neither a mirror nor a parity 963 * config. This is a special case. 964 */ 965 daddr_t sblk; 966 967 sblk = 0; 968 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 969 sblk += ci->ci_size; 970 cbn -= sblk; 971 } else { 972 struct ccdiinfo *ii; 973 int ccdisk, off; 974 975 /* 976 * Calculate cbn, the logical superblock (sc_ileave chunks), 977 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 978 * to cbn. 979 */ 980 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 981 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 982 983 /* 984 * Figure out which interleave table to use. 985 */ 986 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 987 if (ii->ii_startblk > cbn) 988 break; 989 } 990 ii--; 991 992 /* 993 * off is the logical superblock relative to the beginning 994 * of this interleave block. 995 */ 996 off = cbn - ii->ii_startblk; 997 998 /* 999 * We must calculate which disk component to use (ccdisk), 1000 * and recalculate cbn to be the superblock relative to 1001 * the beginning of the component. This is typically done by 1002 * adding 'off' and ii->ii_startoff together. However, 'off' 1003 * must typically be divided by the number of components in 1004 * this interleave array to be properly convert it from a 1005 * CCD-relative logical superblock number to a 1006 * component-relative superblock number. 1007 */ 1008 if (ii->ii_ndisk == 1) { 1009 /* 1010 * When we have just one disk, it can't be a mirror 1011 * or a parity config. 1012 */ 1013 ccdisk = ii->ii_index[0]; 1014 cbn = ii->ii_startoff + off; 1015 } else { 1016 if (cs->sc_cflags & CCDF_MIRROR) { 1017 /* 1018 * We have forced a uniform mapping, resulting 1019 * in a single interleave array. We double 1020 * up on the first half of the available 1021 * components and our mirror is in the second 1022 * half. This only works with a single 1023 * interleave array because doubling up 1024 * doubles the number of sectors, so there 1025 * cannot be another interleave array because 1026 * the next interleave array's calculations 1027 * would be off. 1028 */ 1029 int ndisk2 = ii->ii_ndisk / 2; 1030 ccdisk = ii->ii_index[off % ndisk2]; 1031 cbn = ii->ii_startoff + off / ndisk2; 1032 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1033 } else if (cs->sc_cflags & CCDF_PARITY) { 1034 /* 1035 * XXX not implemented yet 1036 */ 1037 int ndisk2 = ii->ii_ndisk - 1; 1038 ccdisk = ii->ii_index[off % ndisk2]; 1039 cbn = ii->ii_startoff + off / ndisk2; 1040 if (cbn % ii->ii_ndisk <= ccdisk) 1041 ccdisk++; 1042 } else { 1043 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1044 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1045 } 1046 } 1047 1048 ci = &cs->sc_cinfo[ccdisk]; 1049 1050 /* 1051 * Convert cbn from a superblock to a normal block so it 1052 * can be used to calculate (along with cboff) the normal 1053 * block index into this particular disk. 1054 */ 1055 cbn *= cs->sc_ileave; 1056 } 1057 1058 /* 1059 * Fill in the component buf structure. 1060 */ 1061 cbp = getccdbuf(NULL); 1062 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1063 cbp->cb_buf.bio_done = ccdiodone; 1064 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1065 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1066 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1067 cbp->cb_buf.bio_data = addr; 1068 if (cs->sc_ileave == 0) 1069 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1070 else 1071 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1072 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1073 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1074 1075 /* 1076 * context for ccdiodone 1077 */ 1078 cbp->cb_obp = bp; 1079 cbp->cb_unit = cs->sc_unit; 1080 cbp->cb_comp = ci - cs->sc_cinfo; 1081 1082 #ifdef DEBUG 1083 if (ccddebug & CCDB_IO) 1084 printf(" dev %p(u%ld): cbp %p bn %lld addr %p bcnt %ld\n", 1085 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1086 (long long)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1087 cbp->cb_buf.bio_bcount); 1088 #endif 1089 cb[0] = cbp; 1090 1091 /* 1092 * Note: both I/O's setup when reading from mirror, but only one 1093 * will be executed. 1094 */ 1095 if (cs->sc_cflags & CCDF_MIRROR) { 1096 /* mirror, setup second I/O */ 1097 cbp = getccdbuf(cb[0]); 1098 cbp->cb_buf.bio_dev = ci2->ci_dev; 1099 cbp->cb_comp = ci2 - cs->sc_cinfo; 1100 cb[1] = cbp; 1101 /* link together the ccdbuf's and clear "mirror done" flag */ 1102 cb[0]->cb_mirror = cb[1]; 1103 cb[1]->cb_mirror = cb[0]; 1104 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1105 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1106 } 1107 } 1108 1109 static void 1110 ccdintr(struct ccd_s *cs, struct bio *bp) 1111 { 1112 #ifdef DEBUG 1113 if (ccddebug & CCDB_FOLLOW) 1114 printf("ccdintr(%p, %p)\n", cs, bp); 1115 #endif 1116 /* 1117 * Request is done for better or worse, wakeup the top half. 1118 */ 1119 if (bp->bio_flags & BIO_ERROR) 1120 bp->bio_resid = bp->bio_bcount; 1121 biofinish(bp, &cs->device_stats, 0); 1122 } 1123 1124 /* 1125 * Called at interrupt time. 1126 * Mark the component as done and if all components are done, 1127 * take a ccd interrupt. 1128 */ 1129 static void 1130 ccdiodone(struct bio *ibp) 1131 { 1132 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1133 struct bio *bp = cbp->cb_obp; 1134 int unit = cbp->cb_unit; 1135 int count, s; 1136 1137 s = splbio(); 1138 #ifdef DEBUG 1139 if (ccddebug & CCDB_FOLLOW) 1140 printf("ccdiodone(%p)\n", cbp); 1141 if (ccddebug & CCDB_IO) { 1142 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1143 bp, bp->bio_bcount, bp->bio_resid); 1144 printf(" dev %p(u%d), cbp %p bn %lld addr %p bcnt %ld\n", 1145 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1146 (long long)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1147 cbp->cb_buf.bio_bcount); 1148 } 1149 #endif 1150 /* 1151 * If an error occured, report it. If this is a mirrored 1152 * configuration and the first of two possible reads, do not 1153 * set the error in the bp yet because the second read may 1154 * succeed. 1155 */ 1156 1157 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1158 const char *msg = ""; 1159 1160 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1161 (cbp->cb_buf.bio_cmd == BIO_READ) && 1162 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1163 /* 1164 * We will try our read on the other disk down 1165 * below, also reverse the default pick so if we 1166 * are doing a scan we do not keep hitting the 1167 * bad disk first. 1168 */ 1169 struct ccd_s *cs = ccdfind(unit); 1170 1171 msg = ", trying other disk"; 1172 cs->sc_pick = 1 - cs->sc_pick; 1173 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1174 } else { 1175 bp->bio_flags |= BIO_ERROR; 1176 bp->bio_error = cbp->cb_buf.bio_error ? 1177 cbp->cb_buf.bio_error : EIO; 1178 } 1179 printf("ccd%d: error %d on component %d block %d (ccd block %lld)%s\n", 1180 unit, bp->bio_error, cbp->cb_comp, 1181 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1182 } 1183 1184 /* 1185 * Process mirror. If we are writing, I/O has been initiated on both 1186 * buffers and we fall through only after both are finished. 1187 * 1188 * If we are reading only one I/O is initiated at a time. If an 1189 * error occurs we initiate the second I/O and return, otherwise 1190 * we free the second I/O without initiating it. 1191 */ 1192 1193 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1194 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1195 /* 1196 * When writing, handshake with the second buffer 1197 * to determine when both are done. If both are not 1198 * done, return here. 1199 */ 1200 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1201 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1202 putccdbuf(cbp); 1203 splx(s); 1204 return; 1205 } 1206 } else { 1207 /* 1208 * When reading, either dispose of the second buffer 1209 * or initiate I/O on the second buffer if an error 1210 * occured with this one. 1211 */ 1212 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1213 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1214 cbp->cb_mirror->cb_pflags |= 1215 CCDPF_MIRROR_DONE; 1216 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1217 putccdbuf(cbp); 1218 splx(s); 1219 return; 1220 } else { 1221 putccdbuf(cbp->cb_mirror); 1222 /* fall through */ 1223 } 1224 } 1225 } 1226 } 1227 1228 /* 1229 * use bio_caller1 to determine how big the original request was rather 1230 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1231 * 1232 * XXX We check for an error, but we do not test the resid for an 1233 * aligned EOF condition. This may result in character & block 1234 * device access not recognizing EOF properly when read or written 1235 * sequentially, but will not effect filesystems. 1236 */ 1237 count = (long)cbp->cb_buf.bio_caller1; 1238 putccdbuf(cbp); 1239 1240 /* 1241 * If all done, "interrupt". 1242 */ 1243 bp->bio_resid -= count; 1244 if (bp->bio_resid < 0) 1245 panic("ccdiodone: count"); 1246 if (bp->bio_resid == 0) 1247 ccdintr(ccdfind(unit), bp); 1248 splx(s); 1249 } 1250 1251 static int 1252 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1253 { 1254 int unit = ccdunit(dev); 1255 int i, j, lookedup = 0, error = 0; 1256 int part, pmask, s; 1257 struct ccd_s *cs; 1258 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1259 char **cpp; 1260 struct vnode **vpp; 1261 1262 if (!IS_ALLOCATED(unit)) 1263 return (ENXIO); 1264 cs = ccdfind(unit); 1265 1266 switch (cmd) { 1267 case CCDIOCSET: 1268 if (IS_INITED(cs)) 1269 return (EBUSY); 1270 1271 if ((flag & FWRITE) == 0) 1272 return (EBADF); 1273 1274 if ((error = ccdlock(cs)) != 0) 1275 return (error); 1276 1277 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1278 return (EINVAL); 1279 1280 /* Fill in some important bits. */ 1281 cs->sc_ileave = ccio->ccio_ileave; 1282 if (cs->sc_ileave == 0 && 1283 ((ccio->ccio_flags & CCDF_MIRROR) || 1284 (ccio->ccio_flags & CCDF_PARITY))) { 1285 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1286 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1287 } 1288 if ((ccio->ccio_flags & CCDF_MIRROR) && 1289 (ccio->ccio_flags & CCDF_PARITY)) { 1290 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1291 ccio->ccio_flags &= ~CCDF_PARITY; 1292 } 1293 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1294 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1295 printf("ccd%d: mirror/parity forces uniform flag\n", 1296 unit); 1297 ccio->ccio_flags |= CCDF_UNIFORM; 1298 } 1299 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1300 1301 /* 1302 * Allocate space for and copy in the array of 1303 * componet pathnames and device numbers. 1304 */ 1305 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1306 M_DEVBUF, M_WAITOK); 1307 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1308 M_DEVBUF, M_WAITOK); 1309 1310 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1311 ccio->ccio_ndisks * sizeof(char **)); 1312 if (error) { 1313 free(vpp, M_DEVBUF); 1314 free(cpp, M_DEVBUF); 1315 ccdunlock(cs); 1316 return (error); 1317 } 1318 1319 #ifdef DEBUG 1320 if (ccddebug & CCDB_INIT) 1321 for (i = 0; i < ccio->ccio_ndisks; ++i) 1322 printf("ccdioctl: component %d: %p\n", 1323 i, cpp[i]); 1324 #endif 1325 1326 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1327 #ifdef DEBUG 1328 if (ccddebug & CCDB_INIT) 1329 printf("ccdioctl: lookedup = %d\n", lookedup); 1330 #endif 1331 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1332 for (j = 0; j < lookedup; ++j) 1333 (void)vn_close(vpp[j], FREAD|FWRITE, 1334 td->td_ucred, td); 1335 free(vpp, M_DEVBUF); 1336 free(cpp, M_DEVBUF); 1337 ccdunlock(cs); 1338 return (error); 1339 } 1340 ++lookedup; 1341 } 1342 cs->sc_vpp = vpp; 1343 cs->sc_nccdisks = ccio->ccio_ndisks; 1344 1345 /* 1346 * Initialize the ccd. Fills in the softc for us. 1347 */ 1348 if ((error = ccdinit(cs, cpp, td)) != 0) { 1349 for (j = 0; j < lookedup; ++j) 1350 (void)vn_close(vpp[j], FREAD|FWRITE, 1351 td->td_ucred, td); 1352 /* 1353 * We can't ccddestroy() cs just yet, because nothing 1354 * prevents user-level app to do another ioctl() 1355 * without closing the device first, therefore 1356 * declare unit null and void and let ccdclose() 1357 * destroy it when it is safe to do so. 1358 */ 1359 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1360 free(vpp, M_DEVBUF); 1361 free(cpp, M_DEVBUF); 1362 ccdunlock(cs); 1363 return (error); 1364 } 1365 1366 /* 1367 * The ccd has been successfully initialized, so 1368 * we can place it into the array and read the disklabel. 1369 */ 1370 ccio->ccio_unit = unit; 1371 ccio->ccio_size = cs->sc_size; 1372 ccdgetdisklabel(dev); 1373 1374 ccdunlock(cs); 1375 1376 break; 1377 1378 case CCDIOCCLR: 1379 if (!IS_INITED(cs)) 1380 return (ENXIO); 1381 1382 if ((flag & FWRITE) == 0) 1383 return (EBADF); 1384 1385 if ((error = ccdlock(cs)) != 0) 1386 return (error); 1387 1388 /* Don't unconfigure if any other partitions are open */ 1389 part = ccdpart(dev); 1390 pmask = (1 << part); 1391 if ((cs->sc_openmask & ~pmask)) { 1392 ccdunlock(cs); 1393 return (EBUSY); 1394 } 1395 1396 /* Declare unit null and void (reset all flags) */ 1397 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1398 1399 /* Close the components and free their pathnames. */ 1400 for (i = 0; i < cs->sc_nccdisks; ++i) { 1401 /* 1402 * XXX: this close could potentially fail and 1403 * cause Bad Things. Maybe we need to force 1404 * the close to happen? 1405 */ 1406 #ifdef DEBUG 1407 if (ccddebug & CCDB_VNODE) 1408 vprint("CCDIOCCLR: vnode info", 1409 cs->sc_cinfo[i].ci_vp); 1410 #endif 1411 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1412 td->td_ucred, td); 1413 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1414 } 1415 1416 /* Free interleave index. */ 1417 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1418 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1419 1420 /* Free component info and interleave table. */ 1421 free(cs->sc_cinfo, M_DEVBUF); 1422 free(cs->sc_itable, M_DEVBUF); 1423 free(cs->sc_vpp, M_DEVBUF); 1424 1425 /* And remove the devstat entry. */ 1426 devstat_remove_entry(&cs->device_stats); 1427 1428 /* This must be atomic. */ 1429 s = splhigh(); 1430 ccdunlock(cs); 1431 splx(s); 1432 1433 break; 1434 1435 case CCDCONFINFO: 1436 { 1437 int ninit = 0; 1438 struct ccdconf *conf = (struct ccdconf *)data; 1439 struct ccd_s *tmpcs; 1440 struct ccd_s *ubuf = conf->buffer; 1441 1442 /* XXX: LOCK(unique unit numbers) */ 1443 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1444 if (IS_INITED(tmpcs)) 1445 ninit++; 1446 1447 if (conf->size == 0) { 1448 conf->size = sizeof(struct ccd_s) * ninit; 1449 break; 1450 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1451 (conf->size % sizeof(struct ccd_s) != 0)) { 1452 /* XXX: UNLOCK(unique unit numbers) */ 1453 return (EINVAL); 1454 } 1455 1456 ubuf += ninit; 1457 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1458 if (!IS_INITED(tmpcs)) 1459 continue; 1460 error = copyout(tmpcs, --ubuf, 1461 sizeof(struct ccd_s)); 1462 if (error != 0) 1463 /* XXX: UNLOCK(unique unit numbers) */ 1464 return (error); 1465 } 1466 /* XXX: UNLOCK(unique unit numbers) */ 1467 } 1468 break; 1469 1470 case CCDCPPINFO: 1471 if (!IS_INITED(cs)) 1472 return (ENXIO); 1473 1474 { 1475 int len = 0; 1476 struct ccdcpps *cpps = (struct ccdcpps *)data; 1477 char *ubuf = cpps->buffer; 1478 1479 1480 for (i = 0; i < cs->sc_nccdisks; ++i) 1481 len += cs->sc_cinfo[i].ci_pathlen; 1482 1483 if (cpps->size == 0) { 1484 cpps->size = len; 1485 break; 1486 } else if (cpps->size != len) { 1487 return (EINVAL); 1488 } 1489 1490 for (i = 0; i < cs->sc_nccdisks; ++i) { 1491 len = cs->sc_cinfo[i].ci_pathlen; 1492 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1493 len); 1494 if (error != 0) 1495 return (error); 1496 ubuf += len; 1497 } 1498 } 1499 break; 1500 1501 case DIOCGDINFO: 1502 if (!IS_INITED(cs)) 1503 return (ENXIO); 1504 1505 *(struct disklabel *)data = cs->sc_label; 1506 break; 1507 1508 case DIOCWDINFO: 1509 case DIOCSDINFO: 1510 if (!IS_INITED(cs)) 1511 return (ENXIO); 1512 1513 if ((flag & FWRITE) == 0) 1514 return (EBADF); 1515 1516 if ((error = ccdlock(cs)) != 0) 1517 return (error); 1518 1519 cs->sc_flags |= CCDF_LABELLING; 1520 1521 error = setdisklabel(&cs->sc_label, 1522 (struct disklabel *)data, 0); 1523 if (error == 0) { 1524 if (cmd == DIOCWDINFO) 1525 error = writedisklabel(CCDLABELDEV(dev), 1526 &cs->sc_label); 1527 } 1528 1529 cs->sc_flags &= ~CCDF_LABELLING; 1530 1531 ccdunlock(cs); 1532 1533 if (error) 1534 return (error); 1535 break; 1536 1537 case DIOCWLABEL: 1538 if (!IS_INITED(cs)) 1539 return (ENXIO); 1540 1541 if ((flag & FWRITE) == 0) 1542 return (EBADF); 1543 if (*(int *)data != 0) 1544 cs->sc_flags |= CCDF_WLABEL; 1545 else 1546 cs->sc_flags &= ~CCDF_WLABEL; 1547 break; 1548 1549 default: 1550 return (ENOTTY); 1551 } 1552 1553 return (0); 1554 } 1555 1556 static int 1557 ccdsize(dev_t dev) 1558 { 1559 struct ccd_s *cs; 1560 int part, size; 1561 1562 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1563 return (-1); 1564 1565 cs = ccdfind(ccdunit(dev)); 1566 part = ccdpart(dev); 1567 1568 if (!IS_INITED(cs)) 1569 return (-1); 1570 1571 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1572 size = -1; 1573 else 1574 size = cs->sc_label.d_partitions[part].p_size; 1575 1576 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1577 return (-1); 1578 1579 return (size); 1580 } 1581 1582 /* 1583 * Lookup the provided name in the filesystem. If the file exists, 1584 * is a valid block device, and isn't being used by anyone else, 1585 * set *vpp to the file's vnode. 1586 */ 1587 static int 1588 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1589 { 1590 struct nameidata nd; 1591 struct vnode *vp; 1592 int error, flags; 1593 1594 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1595 flags = FREAD | FWRITE; 1596 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1597 #ifdef DEBUG 1598 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1599 printf("ccdlookup: vn_open error = %d\n", error); 1600 #endif 1601 return (error); 1602 } 1603 vp = nd.ni_vp; 1604 1605 if (vp->v_usecount > 1) { 1606 error = EBUSY; 1607 goto bad; 1608 } 1609 1610 if (!vn_isdisk(vp, &error)) 1611 goto bad; 1612 1613 #ifdef DEBUG 1614 if (ccddebug & CCDB_VNODE) 1615 vprint("ccdlookup: vnode info", vp); 1616 #endif 1617 1618 VOP_UNLOCK(vp, 0, td); 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 *vpp = vp; 1621 return (0); 1622 bad: 1623 VOP_UNLOCK(vp, 0, td); 1624 NDFREE(&nd, NDF_ONLY_PNBUF); 1625 /* vn_close does vrele() for vp */ 1626 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1627 return (error); 1628 } 1629 1630 /* 1631 * Read the disklabel from the ccd. If one is not present, fake one 1632 * up. 1633 */ 1634 static void 1635 ccdgetdisklabel(dev_t dev) 1636 { 1637 int unit = ccdunit(dev); 1638 struct ccd_s *cs = ccdfind(unit); 1639 char *errstring; 1640 struct disklabel *lp = &cs->sc_label; 1641 struct ccdgeom *ccg = &cs->sc_geom; 1642 1643 bzero(lp, sizeof(*lp)); 1644 1645 lp->d_secperunit = cs->sc_size; 1646 lp->d_secsize = ccg->ccg_secsize; 1647 lp->d_nsectors = ccg->ccg_nsectors; 1648 lp->d_ntracks = ccg->ccg_ntracks; 1649 lp->d_ncylinders = ccg->ccg_ncylinders; 1650 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1651 1652 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1653 lp->d_type = DTYPE_CCD; 1654 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1655 lp->d_rpm = 3600; 1656 lp->d_interleave = 1; 1657 lp->d_flags = 0; 1658 1659 lp->d_partitions[RAW_PART].p_offset = 0; 1660 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1661 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1662 lp->d_npartitions = RAW_PART + 1; 1663 1664 lp->d_bbsize = BBSIZE; /* XXX */ 1665 lp->d_sbsize = 0; 1666 1667 lp->d_magic = DISKMAGIC; 1668 lp->d_magic2 = DISKMAGIC; 1669 lp->d_checksum = dkcksum(&cs->sc_label); 1670 1671 /* 1672 * Call the generic disklabel extraction routine. 1673 */ 1674 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1675 if (errstring != NULL) 1676 ccdmakedisklabel(cs); 1677 1678 #ifdef DEBUG 1679 /* It's actually extremely common to have unlabeled ccds. */ 1680 if (ccddebug & CCDB_LABEL) 1681 if (errstring != NULL) 1682 printf("ccd%d: %s\n", unit, errstring); 1683 #endif 1684 } 1685 1686 /* 1687 * Take care of things one might want to take care of in the event 1688 * that a disklabel isn't present. 1689 */ 1690 static void 1691 ccdmakedisklabel(struct ccd_s *cs) 1692 { 1693 struct disklabel *lp = &cs->sc_label; 1694 1695 /* 1696 * For historical reasons, if there's no disklabel present 1697 * the raw partition must be marked FS_BSDFFS. 1698 */ 1699 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1700 1701 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1702 } 1703 1704 /* 1705 * Wait interruptibly for an exclusive lock. 1706 * 1707 * XXX 1708 * Several drivers do this; it should be abstracted and made MP-safe. 1709 */ 1710 static int 1711 ccdlock(struct ccd_s *cs) 1712 { 1713 int error; 1714 1715 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1716 cs->sc_flags |= CCDF_WANTED; 1717 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1718 return (error); 1719 } 1720 cs->sc_flags |= CCDF_LOCKED; 1721 return (0); 1722 } 1723 1724 /* 1725 * Unlock and wake up any waiters. 1726 */ 1727 static void 1728 ccdunlock(struct ccd_s *cs) 1729 { 1730 1731 cs->sc_flags &= ~CCDF_LOCKED; 1732 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1733 cs->sc_flags &= ~CCDF_WANTED; 1734 wakeup(cs); 1735 } 1736 } 1737 1738 #ifdef DEBUG 1739 static void 1740 printiinfo(struct ccdiinfo *ii) 1741 { 1742 int ix, i; 1743 1744 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1745 printf(" itab[%d]: #dk %d sblk %lld soff %lld", 1746 ix, ii->ii_ndisk, (long long)ii->ii_startblk, 1747 (long long)ii->ii_startoff); 1748 for (i = 0; i < ii->ii_ndisk; i++) 1749 printf(" %d", ii->ii_index[i]); 1750 printf("\n"); 1751 } 1752 } 1753 #endif 1754