1 /* $FreeBSD$ */ 2 3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 4 5 /* 6 * Copyright (c) 1995 Jason R. Thorpe. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project 20 * by Jason R. Thorpe. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Copyright (c) 1988 University of Utah. 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. All advertising materials mentioning features or use of this software 55 * must display the following acknowledgement: 56 * This product includes software developed by the University of 57 * California, Berkeley and its contributors. 58 * 4. Neither the name of the University nor the names of its contributors 59 * may be used to endorse or promote products derived from this software 60 * without specific prior written permission. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 * 74 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 75 * 76 * @(#)cd.c 8.2 (Berkeley) 11/16/93 77 */ 78 79 /* 80 * "Concatenated" disk driver. 81 * 82 * Dynamic configuration and disklabel support by: 83 * Jason R. Thorpe <thorpej@nas.nasa.gov> 84 * Numerical Aerodynamic Simulation Facility 85 * Mail Stop 258-6 86 * NASA Ames Research Center 87 * Moffett Field, CA 94035 88 */ 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/kernel.h> 93 #include <sys/module.h> 94 #include <sys/proc.h> 95 #include <sys/bio.h> 96 #include <sys/malloc.h> 97 #include <sys/namei.h> 98 #include <sys/conf.h> 99 #include <sys/stat.h> 100 #include <sys/sysctl.h> 101 #include <sys/disk.h> 102 #include <ufs/ffs/fs.h> 103 #include <sys/devicestat.h> 104 #include <sys/fcntl.h> 105 #include <sys/vnode.h> 106 107 #include <sys/ccdvar.h> 108 109 MALLOC_DEFINE(M_CCD, "CCD driver", "Concatenated Disk driver"); 110 111 #if defined(CCDDEBUG) && !defined(DEBUG) 112 #define DEBUG 113 #endif 114 115 #ifdef DEBUG 116 #define CCDB_FOLLOW 0x01 117 #define CCDB_INIT 0x02 118 #define CCDB_IO 0x04 119 #define CCDB_LABEL 0x08 120 #define CCDB_VNODE 0x10 121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 122 CCDB_VNODE; 123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 124 #endif 125 126 #define ccdunit(x) dkunit(x) 127 #define ccdpart(x) dkpart(x) 128 129 /* 130 This is how mirroring works (only writes are special): 131 132 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 133 linked together by the cb_mirror field. "cb_pflags & 134 CCDPF_MIRROR_DONE" is set to 0 on both of them. 135 136 When a component returns to ccdiodone(), it checks if "cb_pflags & 137 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 138 flag and returns. If it is, it means its partner has already 139 returned, so it will go to the regular cleanup. 140 141 */ 142 143 struct ccdbuf { 144 struct bio cb_buf; /* new I/O buf */ 145 struct bio *cb_obp; /* ptr. to original I/O buf */ 146 struct ccdbuf *cb_freenext; /* free list link */ 147 int cb_unit; /* target unit */ 148 int cb_comp; /* target component */ 149 int cb_pflags; /* mirror/parity status flag */ 150 struct ccdbuf *cb_mirror; /* mirror counterpart */ 151 }; 152 153 /* bits in cb_pflags */ 154 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 155 156 #define CCDLABELDEV(dev) \ 157 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 158 159 /* convinient macros for often-used statements */ 160 #define IS_ALLOCATED(unit) (ccdfind(unit) != NULL) 161 #define IS_INITED(cs) (((cs)->sc_flags & CCDF_INITED) != 0) 162 163 static d_open_t ccdopen; 164 static d_close_t ccdclose; 165 static d_strategy_t ccdstrategy; 166 static d_ioctl_t ccdioctl; 167 static d_psize_t ccdsize; 168 169 #define NCCDFREEHIWAT 16 170 171 #define CDEV_MAJOR 74 172 173 static struct cdevsw ccd_cdevsw = { 174 /* open */ ccdopen, 175 /* close */ ccdclose, 176 /* read */ physread, 177 /* write */ physwrite, 178 /* ioctl */ ccdioctl, 179 /* poll */ nopoll, 180 /* mmap */ nommap, 181 /* strategy */ ccdstrategy, 182 /* name */ "ccd", 183 /* maj */ CDEV_MAJOR, 184 /* dump */ nodump, 185 /* psize */ ccdsize, 186 /* flags */ D_DISK, 187 }; 188 static LIST_HEAD(, ccd_s) ccd_softc_list = LIST_HEAD_INITIALIZER(&ccd_softc_list); 189 190 static struct ccd_s *ccdfind(int); 191 static struct ccd_s *ccdnew(int); 192 static int ccddestroy(struct ccd_s *, struct proc *); 193 194 /* called during module initialization */ 195 static void ccdattach(void); 196 static int ccd_modevent(module_t, int, void *); 197 198 /* called by biodone() at interrupt time */ 199 static void ccdiodone(struct bio *bp); 200 201 static void ccdstart(struct ccd_s *, struct bio *); 202 static void ccdinterleave(struct ccd_s *, int); 203 static void ccdintr(struct ccd_s *, struct bio *); 204 static int ccdinit(struct ccd_s *, char **, struct thread *); 205 static int ccdlookup(char *, struct thread *p, struct vnode **); 206 static void ccdbuffer(struct ccdbuf **ret, struct ccd_s *, 207 struct bio *, daddr_t, caddr_t, long); 208 static void ccdgetdisklabel(dev_t); 209 static void ccdmakedisklabel(struct ccd_s *); 210 static int ccdlock(struct ccd_s *); 211 static void ccdunlock(struct ccd_s *); 212 213 #ifdef DEBUG 214 static void printiinfo(struct ccdiinfo *); 215 #endif 216 217 /* Non-private for the benefit of libkvm. */ 218 struct ccdbuf *ccdfreebufs; 219 static int numccdfreebufs; 220 221 /* 222 * getccdbuf() - Allocate and zero a ccd buffer. 223 * 224 * This routine is called at splbio(). 225 */ 226 227 static __inline 228 struct ccdbuf * 229 getccdbuf(struct ccdbuf *cpy) 230 { 231 struct ccdbuf *cbp; 232 233 /* 234 * Allocate from freelist or malloc as necessary 235 */ 236 if ((cbp = ccdfreebufs) != NULL) { 237 ccdfreebufs = cbp->cb_freenext; 238 --numccdfreebufs; 239 } else { 240 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 241 } 242 243 /* 244 * Used by mirroring code 245 */ 246 if (cpy) 247 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 248 else 249 bzero(cbp, sizeof(struct ccdbuf)); 250 251 /* 252 * independant struct bio initialization 253 */ 254 255 return(cbp); 256 } 257 258 /* 259 * putccdbuf() - Free a ccd buffer. 260 * 261 * This routine is called at splbio(). 262 */ 263 264 static __inline 265 void 266 putccdbuf(struct ccdbuf *cbp) 267 { 268 269 if (numccdfreebufs < NCCDFREEHIWAT) { 270 cbp->cb_freenext = ccdfreebufs; 271 ccdfreebufs = cbp; 272 ++numccdfreebufs; 273 } else { 274 free((caddr_t)cbp, M_DEVBUF); 275 } 276 } 277 278 279 /* 280 * Number of blocks to untouched in front of a component partition. 281 * This is to avoid violating its disklabel area when it starts at the 282 * beginning of the slice. 283 */ 284 #if !defined(CCD_OFFSET) 285 #define CCD_OFFSET 16 286 #endif 287 288 static struct ccd_s * 289 ccdfind(int unit) 290 { 291 struct ccd_s *sc = NULL; 292 293 /* XXX: LOCK(unique unit numbers) */ 294 LIST_FOREACH(sc, &ccd_softc_list, list) { 295 if (sc->sc_unit == unit) 296 break; 297 } 298 /* XXX: UNLOCK(unique unit numbers) */ 299 return ((sc == NULL) || (sc->sc_unit != unit) ? NULL : sc); 300 } 301 302 static struct ccd_s * 303 ccdnew(int unit) 304 { 305 struct ccd_s *sc; 306 307 /* XXX: LOCK(unique unit numbers) */ 308 if (IS_ALLOCATED(unit) || unit > DKMAXUNIT) 309 return (NULL); 310 311 MALLOC(sc, struct ccd_s *, sizeof(*sc), M_CCD, M_WAITOK | M_ZERO); 312 sc->sc_unit = unit; 313 LIST_INSERT_HEAD(&ccd_softc_list, sc, list); 314 /* XXX: UNLOCK(unique unit numbers) */ 315 return (sc); 316 } 317 318 static int 319 ccddestroy(struct ccd_s *sc, struct proc *p) 320 { 321 322 /* XXX: LOCK(unique unit numbers) */ 323 LIST_REMOVE(sc, list); 324 /* XXX: UNLOCK(unique unit numbers) */ 325 FREE(sc, M_CCD); 326 return (0); 327 } 328 329 static void 330 ccd_clone(void *arg, char *name, int namelen, dev_t *dev) 331 { 332 int i, u; 333 char *s; 334 335 if (*dev != NODEV) 336 return; 337 i = dev_stdclone(name, &s, "ccd", &u); 338 if (i != 2) 339 return; 340 if (*s < 'a' || *s > 'h') 341 return; 342 if (s[1] != '\0') 343 return; 344 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', 345 UID_ROOT, GID_OPERATOR, 0640, name); 346 } 347 348 /* 349 * Called by main() during pseudo-device attachment. All we need 350 * to do is to add devsw entries. 351 */ 352 static void 353 ccdattach() 354 { 355 356 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); 357 } 358 359 static int 360 ccd_modevent(module_t mod, int type, void *data) 361 { 362 int error = 0; 363 364 switch (type) { 365 case MOD_LOAD: 366 ccdattach(); 367 break; 368 369 case MOD_UNLOAD: 370 printf("ccd0: Unload not supported!\n"); 371 error = EOPNOTSUPP; 372 break; 373 374 case MOD_SHUTDOWN: 375 break; 376 377 default: 378 error = EOPNOTSUPP; 379 } 380 return (error); 381 } 382 383 DEV_MODULE(ccd, ccd_modevent, NULL); 384 385 static int 386 ccdinit(struct ccd_s *cs, char **cpaths, struct thread *td) 387 { 388 struct ccdcinfo *ci = NULL; /* XXX */ 389 size_t size; 390 int ix; 391 struct vnode *vp; 392 size_t minsize; 393 int maxsecsize; 394 struct ccdgeom *ccg = &cs->sc_geom; 395 char *tmppath = NULL; 396 int error = 0; 397 off_t mediasize; 398 u_int sectorsize; 399 400 #ifdef DEBUG 401 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 402 printf("ccdinit: unit %d\n", cs->sc_unit); 403 #endif 404 405 cs->sc_size = 0; 406 407 /* Allocate space for the component info. */ 408 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 409 M_DEVBUF, M_WAITOK); 410 411 /* 412 * Verify that each component piece exists and record 413 * relevant information about it. 414 */ 415 maxsecsize = 0; 416 minsize = 0; 417 tmppath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); 418 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 419 vp = cs->sc_vpp[ix]; 420 ci = &cs->sc_cinfo[ix]; 421 ci->ci_vp = vp; 422 423 /* 424 * Copy in the pathname of the component. 425 */ 426 if ((error = copyinstr(cpaths[ix], tmppath, 427 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 428 #ifdef DEBUG 429 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 430 printf("ccd%d: can't copy path, error = %d\n", 431 cs->sc_unit, error); 432 #endif 433 goto fail; 434 } 435 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 436 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 437 438 ci->ci_dev = vn_todev(vp); 439 440 /* 441 * Get partition information for the component. 442 */ 443 error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, 444 FREAD, td->td_ucred, td); 445 if (error != 0) { 446 #ifdef DEBUG 447 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 448 printf("ccd%d: %s: ioctl failed, error = %d\n", 449 cs->sc_unit, ci->ci_path, error); 450 #endif 451 goto fail; 452 } 453 /* 454 * Get partition information for the component. 455 */ 456 error = VOP_IOCTL(vp, DIOCGSECTORSIZE, (caddr_t)§orsize, 457 FREAD, td->td_ucred, td); 458 if (error != 0) { 459 #ifdef DEBUG 460 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 461 printf("ccd%d: %s: ioctl failed, error = %d\n", 462 cs->sc_unit, ci->ci_path, error); 463 #endif 464 goto fail; 465 } 466 if (sectorsize > maxsecsize) 467 maxsecsize = sectorsize; 468 size = mediasize / DEV_BSIZE - CCD_OFFSET; 469 470 /* 471 * Calculate the size, truncating to an interleave 472 * boundary if necessary. 473 */ 474 475 if (cs->sc_ileave > 1) 476 size -= size % cs->sc_ileave; 477 478 if (size == 0) { 479 #ifdef DEBUG 480 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 481 printf("ccd%d: %s: size == 0\n", 482 cs->sc_unit, ci->ci_path); 483 #endif 484 error = ENODEV; 485 goto fail; 486 } 487 488 if (minsize == 0 || size < minsize) 489 minsize = size; 490 ci->ci_size = size; 491 cs->sc_size += size; 492 } 493 494 free(tmppath, M_DEVBUF); 495 tmppath = NULL; 496 497 /* 498 * Don't allow the interleave to be smaller than 499 * the biggest component sector. 500 */ 501 if ((cs->sc_ileave > 0) && 502 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 503 #ifdef DEBUG 504 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 505 printf("ccd%d: interleave must be at least %d\n", 506 cs->sc_unit, (maxsecsize / DEV_BSIZE)); 507 #endif 508 error = EINVAL; 509 goto fail; 510 } 511 512 /* 513 * If uniform interleave is desired set all sizes to that of 514 * the smallest component. This will guarentee that a single 515 * interleave table is generated. 516 * 517 * Lost space must be taken into account when calculating the 518 * overall size. Half the space is lost when CCDF_MIRROR is 519 * specified. One disk is lost when CCDF_PARITY is specified. 520 */ 521 if (cs->sc_flags & CCDF_UNIFORM) { 522 for (ci = cs->sc_cinfo; 523 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 524 ci->ci_size = minsize; 525 } 526 if (cs->sc_flags & CCDF_MIRROR) { 527 /* 528 * Check to see if an even number of components 529 * have been specified. The interleave must also 530 * be non-zero in order for us to be able to 531 * guarentee the topology. 532 */ 533 if (cs->sc_nccdisks % 2) { 534 printf("ccd%d: mirroring requires an even number of disks\n", cs->sc_unit ); 535 error = EINVAL; 536 goto fail; 537 } 538 if (cs->sc_ileave == 0) { 539 printf("ccd%d: an interleave must be specified when mirroring\n", cs->sc_unit); 540 error = EINVAL; 541 goto fail; 542 } 543 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 544 } else if (cs->sc_flags & CCDF_PARITY) { 545 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 546 } else { 547 if (cs->sc_ileave == 0) { 548 printf("ccd%d: an interleave must be specified when using parity\n", cs->sc_unit); 549 error = EINVAL; 550 goto fail; 551 } 552 cs->sc_size = cs->sc_nccdisks * minsize; 553 } 554 } 555 556 /* 557 * Construct the interleave table. 558 */ 559 ccdinterleave(cs, cs->sc_unit); 560 561 /* 562 * Create pseudo-geometry based on 1MB cylinders. It's 563 * pretty close. 564 */ 565 ccg->ccg_secsize = maxsecsize; 566 ccg->ccg_ntracks = 1; 567 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 568 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 569 570 /* 571 * Add an devstat entry for this device. 572 */ 573 devstat_add_entry(&cs->device_stats, "ccd", cs->sc_unit, 574 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 575 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 576 DEVSTAT_PRIORITY_ARRAY); 577 578 cs->sc_flags |= CCDF_INITED; 579 cs->sc_cflags = cs->sc_flags; /* So we can find out later... */ 580 return (0); 581 fail: 582 while (ci > cs->sc_cinfo) { 583 ci--; 584 free(ci->ci_path, M_DEVBUF); 585 } 586 if (tmppath != NULL) 587 free(tmppath, M_DEVBUF); 588 free(cs->sc_cinfo, M_DEVBUF); 589 return (error); 590 } 591 592 static void 593 ccdinterleave(struct ccd_s *cs, int unit) 594 { 595 struct ccdcinfo *ci, *smallci; 596 struct ccdiinfo *ii; 597 daddr_t bn, lbn; 598 int ix; 599 u_long size; 600 601 #ifdef DEBUG 602 if (ccddebug & CCDB_INIT) 603 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 604 #endif 605 606 /* 607 * Allocate an interleave table. The worst case occurs when each 608 * of N disks is of a different size, resulting in N interleave 609 * tables. 610 * 611 * Chances are this is too big, but we don't care. 612 */ 613 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 614 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, 615 M_WAITOK | M_ZERO); 616 617 /* 618 * Trivial case: no interleave (actually interleave of disk size). 619 * Each table entry represents a single component in its entirety. 620 * 621 * An interleave of 0 may not be used with a mirror or parity setup. 622 */ 623 if (cs->sc_ileave == 0) { 624 bn = 0; 625 ii = cs->sc_itable; 626 627 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 628 /* Allocate space for ii_index. */ 629 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 630 ii->ii_ndisk = 1; 631 ii->ii_startblk = bn; 632 ii->ii_startoff = 0; 633 ii->ii_index[0] = ix; 634 bn += cs->sc_cinfo[ix].ci_size; 635 ii++; 636 } 637 ii->ii_ndisk = 0; 638 #ifdef DEBUG 639 if (ccddebug & CCDB_INIT) 640 printiinfo(cs->sc_itable); 641 #endif 642 return; 643 } 644 645 /* 646 * The following isn't fast or pretty; it doesn't have to be. 647 */ 648 size = 0; 649 bn = lbn = 0; 650 for (ii = cs->sc_itable; ; ii++) { 651 /* 652 * Allocate space for ii_index. We might allocate more then 653 * we use. 654 */ 655 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 656 M_DEVBUF, M_WAITOK); 657 658 /* 659 * Locate the smallest of the remaining components 660 */ 661 smallci = NULL; 662 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 663 ci++) { 664 if (ci->ci_size > size && 665 (smallci == NULL || 666 ci->ci_size < smallci->ci_size)) { 667 smallci = ci; 668 } 669 } 670 671 /* 672 * Nobody left, all done 673 */ 674 if (smallci == NULL) { 675 ii->ii_ndisk = 0; 676 break; 677 } 678 679 /* 680 * Record starting logical block using an sc_ileave blocksize. 681 */ 682 ii->ii_startblk = bn / cs->sc_ileave; 683 684 /* 685 * Record starting comopnent block using an sc_ileave 686 * blocksize. This value is relative to the beginning of 687 * a component disk. 688 */ 689 ii->ii_startoff = lbn; 690 691 /* 692 * Determine how many disks take part in this interleave 693 * and record their indices. 694 */ 695 ix = 0; 696 for (ci = cs->sc_cinfo; 697 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 698 if (ci->ci_size >= smallci->ci_size) { 699 ii->ii_index[ix++] = ci - cs->sc_cinfo; 700 } 701 } 702 ii->ii_ndisk = ix; 703 bn += ix * (smallci->ci_size - size); 704 lbn = smallci->ci_size / cs->sc_ileave; 705 size = smallci->ci_size; 706 } 707 #ifdef DEBUG 708 if (ccddebug & CCDB_INIT) 709 printiinfo(cs->sc_itable); 710 #endif 711 } 712 713 /* ARGSUSED */ 714 static int 715 ccdopen(dev_t dev, int flags, int fmt, struct thread *td) 716 { 717 int unit = ccdunit(dev); 718 struct ccd_s *cs; 719 struct disklabel *lp; 720 int error = 0, part, pmask; 721 722 #ifdef DEBUG 723 if (ccddebug & CCDB_FOLLOW) 724 printf("ccdopen(%p, %x)\n", dev, flags); 725 #endif 726 727 cs = IS_ALLOCATED(unit) ? ccdfind(unit) : ccdnew(unit); 728 729 if ((error = ccdlock(cs)) != 0) 730 return (error); 731 732 lp = &cs->sc_label; 733 734 part = ccdpart(dev); 735 pmask = (1 << part); 736 737 /* 738 * If we're initialized, check to see if there are any other 739 * open partitions. If not, then it's safe to update 740 * the in-core disklabel. 741 */ 742 if (IS_INITED(cs) && (cs->sc_openmask == 0)) 743 ccdgetdisklabel(dev); 744 745 /* Check that the partition exists. */ 746 if (part != RAW_PART && ((part >= lp->d_npartitions) || 747 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 748 error = ENXIO; 749 goto done; 750 } 751 752 cs->sc_openmask |= pmask; 753 done: 754 ccdunlock(cs); 755 return (0); 756 } 757 758 /* ARGSUSED */ 759 static int 760 ccdclose(dev_t dev, int flags, int fmt, struct thread *td) 761 { 762 int unit = ccdunit(dev); 763 struct ccd_s *cs; 764 int error = 0, part; 765 766 #ifdef DEBUG 767 if (ccddebug & CCDB_FOLLOW) 768 printf("ccdclose(%p, %x)\n", dev, flags); 769 #endif 770 771 if (!IS_ALLOCATED(unit)) 772 return (ENXIO); 773 cs = ccdfind(unit); 774 775 if ((error = ccdlock(cs)) != 0) 776 return (error); 777 778 part = ccdpart(dev); 779 780 /* ...that much closer to allowing unconfiguration... */ 781 cs->sc_openmask &= ~(1 << part); 782 /* collect "garbage" if possible */ 783 if (!IS_INITED(cs) && (cs->sc_flags & CCDF_WANTED) == 0) 784 ccddestroy(cs, td->td_proc); 785 else 786 ccdunlock(cs); 787 return (0); 788 } 789 790 static void 791 ccdstrategy(struct bio *bp) 792 { 793 int unit = ccdunit(bp->bio_dev); 794 struct ccd_s *cs = ccdfind(unit); 795 int s; 796 int wlabel; 797 struct disklabel *lp; 798 799 #ifdef DEBUG 800 if (ccddebug & CCDB_FOLLOW) 801 printf("ccdstrategy(%p): unit %d\n", bp, unit); 802 #endif 803 if (!IS_INITED(cs)) { 804 biofinish(bp, NULL, ENXIO); 805 return; 806 } 807 808 /* If it's a nil transfer, wake up the top half now. */ 809 if (bp->bio_bcount == 0) { 810 biodone(bp); 811 return; 812 } 813 814 lp = &cs->sc_label; 815 816 /* 817 * Do bounds checking and adjust transfer. If there's an 818 * error, the bounds check will flag that for us. 819 */ 820 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 821 if (ccdpart(bp->bio_dev) != RAW_PART) { 822 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 823 biodone(bp); 824 return; 825 } 826 } else { 827 int pbn; /* in sc_secsize chunks */ 828 long sz; /* in sc_secsize chunks */ 829 830 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 831 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); 832 833 /* 834 * If out of bounds return an error. If at the EOF point, 835 * simply read or write less. 836 */ 837 838 if (pbn < 0 || pbn >= cs->sc_size) { 839 bp->bio_resid = bp->bio_bcount; 840 if (pbn != cs->sc_size) 841 biofinish(bp, NULL, EINVAL); 842 else 843 biodone(bp); 844 return; 845 } 846 847 /* 848 * If the request crosses EOF, truncate the request. 849 */ 850 if (pbn + sz > cs->sc_size) { 851 bp->bio_bcount = (cs->sc_size - pbn) * 852 cs->sc_geom.ccg_secsize; 853 } 854 } 855 856 bp->bio_resid = bp->bio_bcount; 857 858 /* 859 * "Start" the unit. 860 */ 861 s = splbio(); 862 ccdstart(cs, bp); 863 splx(s); 864 return; 865 } 866 867 static void 868 ccdstart(struct ccd_s *cs, struct bio *bp) 869 { 870 long bcount, rcount; 871 struct ccdbuf *cbp[4]; 872 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 873 caddr_t addr; 874 daddr_t bn; 875 struct partition *pp; 876 877 #ifdef DEBUG 878 if (ccddebug & CCDB_FOLLOW) 879 printf("ccdstart(%p, %p)\n", cs, bp); 880 #endif 881 882 /* Record the transaction start */ 883 devstat_start_transaction(&cs->device_stats); 884 885 /* 886 * Translate the partition-relative block number to an absolute. 887 */ 888 bn = bp->bio_blkno; 889 if (ccdpart(bp->bio_dev) != RAW_PART) { 890 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; 891 bn += pp->p_offset; 892 } 893 894 /* 895 * Allocate component buffers and fire off the requests 896 */ 897 addr = bp->bio_data; 898 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { 899 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 900 rcount = cbp[0]->cb_buf.bio_bcount; 901 902 if (cs->sc_cflags & CCDF_MIRROR) { 903 /* 904 * Mirroring. Writes go to both disks, reads are 905 * taken from whichever disk seems most appropriate. 906 * 907 * We attempt to localize reads to the disk whos arm 908 * is nearest the read request. We ignore seeks due 909 * to writes when making this determination and we 910 * also try to avoid hogging. 911 */ 912 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { 913 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 914 BIO_STRATEGY(&cbp[1]->cb_buf, 0); 915 } else { 916 int pick = cs->sc_pick; 917 daddr_t range = cs->sc_size / 16; 918 919 if (bn < cs->sc_blk[pick] - range || 920 bn > cs->sc_blk[pick] + range 921 ) { 922 cs->sc_pick = pick = 1 - pick; 923 } 924 cs->sc_blk[pick] = bn + btodb(rcount); 925 BIO_STRATEGY(&cbp[pick]->cb_buf, 0); 926 } 927 } else { 928 /* 929 * Not mirroring 930 */ 931 BIO_STRATEGY(&cbp[0]->cb_buf, 0); 932 } 933 bn += btodb(rcount); 934 addr += rcount; 935 } 936 } 937 938 /* 939 * Build a component buffer header. 940 */ 941 static void 942 ccdbuffer(struct ccdbuf **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 943 { 944 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 945 struct ccdbuf *cbp; 946 daddr_t cbn, cboff; 947 off_t cbc; 948 949 #ifdef DEBUG 950 if (ccddebug & CCDB_IO) 951 printf("ccdbuffer(%p, %p, %d, %p, %ld)\n", 952 cs, bp, bn, addr, bcount); 953 #endif 954 /* 955 * Determine which component bn falls in. 956 */ 957 cbn = bn; 958 cboff = 0; 959 960 if (cs->sc_ileave == 0) { 961 /* 962 * Serially concatenated and neither a mirror nor a parity 963 * config. This is a special case. 964 */ 965 daddr_t sblk; 966 967 sblk = 0; 968 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 969 sblk += ci->ci_size; 970 cbn -= sblk; 971 } else { 972 struct ccdiinfo *ii; 973 int ccdisk, off; 974 975 /* 976 * Calculate cbn, the logical superblock (sc_ileave chunks), 977 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 978 * to cbn. 979 */ 980 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 981 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 982 983 /* 984 * Figure out which interleave table to use. 985 */ 986 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 987 if (ii->ii_startblk > cbn) 988 break; 989 } 990 ii--; 991 992 /* 993 * off is the logical superblock relative to the beginning 994 * of this interleave block. 995 */ 996 off = cbn - ii->ii_startblk; 997 998 /* 999 * We must calculate which disk component to use (ccdisk), 1000 * and recalculate cbn to be the superblock relative to 1001 * the beginning of the component. This is typically done by 1002 * adding 'off' and ii->ii_startoff together. However, 'off' 1003 * must typically be divided by the number of components in 1004 * this interleave array to be properly convert it from a 1005 * CCD-relative logical superblock number to a 1006 * component-relative superblock number. 1007 */ 1008 if (ii->ii_ndisk == 1) { 1009 /* 1010 * When we have just one disk, it can't be a mirror 1011 * or a parity config. 1012 */ 1013 ccdisk = ii->ii_index[0]; 1014 cbn = ii->ii_startoff + off; 1015 } else { 1016 if (cs->sc_cflags & CCDF_MIRROR) { 1017 /* 1018 * We have forced a uniform mapping, resulting 1019 * in a single interleave array. We double 1020 * up on the first half of the available 1021 * components and our mirror is in the second 1022 * half. This only works with a single 1023 * interleave array because doubling up 1024 * doubles the number of sectors, so there 1025 * cannot be another interleave array because 1026 * the next interleave array's calculations 1027 * would be off. 1028 */ 1029 int ndisk2 = ii->ii_ndisk / 2; 1030 ccdisk = ii->ii_index[off % ndisk2]; 1031 cbn = ii->ii_startoff + off / ndisk2; 1032 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1033 } else if (cs->sc_cflags & CCDF_PARITY) { 1034 /* 1035 * XXX not implemented yet 1036 */ 1037 int ndisk2 = ii->ii_ndisk - 1; 1038 ccdisk = ii->ii_index[off % ndisk2]; 1039 cbn = ii->ii_startoff + off / ndisk2; 1040 if (cbn % ii->ii_ndisk <= ccdisk) 1041 ccdisk++; 1042 } else { 1043 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1044 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1045 } 1046 } 1047 1048 ci = &cs->sc_cinfo[ccdisk]; 1049 1050 /* 1051 * Convert cbn from a superblock to a normal block so it 1052 * can be used to calculate (along with cboff) the normal 1053 * block index into this particular disk. 1054 */ 1055 cbn *= cs->sc_ileave; 1056 } 1057 1058 /* 1059 * Fill in the component buf structure. 1060 */ 1061 cbp = getccdbuf(NULL); 1062 cbp->cb_buf.bio_cmd = bp->bio_cmd; 1063 cbp->cb_buf.bio_done = ccdiodone; 1064 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ 1065 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; 1066 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1067 cbp->cb_buf.bio_data = addr; 1068 if (cs->sc_ileave == 0) 1069 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1070 else 1071 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1072 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; 1073 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; 1074 1075 /* 1076 * context for ccdiodone 1077 */ 1078 cbp->cb_obp = bp; 1079 cbp->cb_unit = cs->sc_unit; 1080 cbp->cb_comp = ci - cs->sc_cinfo; 1081 1082 #ifdef DEBUG 1083 if (ccddebug & CCDB_IO) 1084 printf(" dev %p(u%ld): cbp %p bn %lld addr %p bcnt %ld\n", 1085 ci->ci_dev, (unsigned long)(ci-cs->sc_cinfo), cbp, 1086 (long long)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1087 cbp->cb_buf.bio_bcount); 1088 #endif 1089 cb[0] = cbp; 1090 1091 /* 1092 * Note: both I/O's setup when reading from mirror, but only one 1093 * will be executed. 1094 */ 1095 if (cs->sc_cflags & CCDF_MIRROR) { 1096 /* mirror, setup second I/O */ 1097 cbp = getccdbuf(cb[0]); 1098 cbp->cb_buf.bio_dev = ci2->ci_dev; 1099 cbp->cb_comp = ci2 - cs->sc_cinfo; 1100 cb[1] = cbp; 1101 /* link together the ccdbuf's and clear "mirror done" flag */ 1102 cb[0]->cb_mirror = cb[1]; 1103 cb[1]->cb_mirror = cb[0]; 1104 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1105 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1106 } 1107 } 1108 1109 static void 1110 ccdintr(struct ccd_s *cs, struct bio *bp) 1111 { 1112 #ifdef DEBUG 1113 if (ccddebug & CCDB_FOLLOW) 1114 printf("ccdintr(%p, %p)\n", cs, bp); 1115 #endif 1116 /* 1117 * Request is done for better or worse, wakeup the top half. 1118 */ 1119 if (bp->bio_flags & BIO_ERROR) 1120 bp->bio_resid = bp->bio_bcount; 1121 biofinish(bp, &cs->device_stats, 0); 1122 } 1123 1124 /* 1125 * Called at interrupt time. 1126 * Mark the component as done and if all components are done, 1127 * take a ccd interrupt. 1128 */ 1129 static void 1130 ccdiodone(struct bio *ibp) 1131 { 1132 struct ccdbuf *cbp = (struct ccdbuf *)ibp; 1133 struct bio *bp = cbp->cb_obp; 1134 int unit = cbp->cb_unit; 1135 int count, s; 1136 1137 s = splbio(); 1138 #ifdef DEBUG 1139 if (ccddebug & CCDB_FOLLOW) 1140 printf("ccdiodone(%p)\n", cbp); 1141 if (ccddebug & CCDB_IO) { 1142 printf("ccdiodone: bp %p bcount %ld resid %ld\n", 1143 bp, bp->bio_bcount, bp->bio_resid); 1144 printf(" dev %p(u%d), cbp %p bn %lld addr %p bcnt %ld\n", 1145 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, 1146 (long long)cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, 1147 cbp->cb_buf.bio_bcount); 1148 } 1149 #endif 1150 /* 1151 * If an error occured, report it. If this is a mirrored 1152 * configuration and the first of two possible reads, do not 1153 * set the error in the bp yet because the second read may 1154 * succeed. 1155 */ 1156 1157 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1158 const char *msg = ""; 1159 1160 if ((ccdfind(unit)->sc_cflags & CCDF_MIRROR) && 1161 (cbp->cb_buf.bio_cmd == BIO_READ) && 1162 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1163 /* 1164 * We will try our read on the other disk down 1165 * below, also reverse the default pick so if we 1166 * are doing a scan we do not keep hitting the 1167 * bad disk first. 1168 */ 1169 struct ccd_s *cs = ccdfind(unit); 1170 1171 msg = ", trying other disk"; 1172 cs->sc_pick = 1 - cs->sc_pick; 1173 cs->sc_blk[cs->sc_pick] = bp->bio_blkno; 1174 } else { 1175 bp->bio_flags |= BIO_ERROR; 1176 bp->bio_error = cbp->cb_buf.bio_error ? 1177 cbp->cb_buf.bio_error : EIO; 1178 } 1179 printf("ccd%d: error %d on component %d block %d (ccd block %lld)%s\n", 1180 unit, bp->bio_error, cbp->cb_comp, 1181 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); 1182 } 1183 1184 /* 1185 * Process mirror. If we are writing, I/O has been initiated on both 1186 * buffers and we fall through only after both are finished. 1187 * 1188 * If we are reading only one I/O is initiated at a time. If an 1189 * error occurs we initiate the second I/O and return, otherwise 1190 * we free the second I/O without initiating it. 1191 */ 1192 1193 if (ccdfind(unit)->sc_cflags & CCDF_MIRROR) { 1194 if (cbp->cb_buf.bio_cmd == BIO_WRITE) { 1195 /* 1196 * When writing, handshake with the second buffer 1197 * to determine when both are done. If both are not 1198 * done, return here. 1199 */ 1200 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1201 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1202 putccdbuf(cbp); 1203 splx(s); 1204 return; 1205 } 1206 } else { 1207 /* 1208 * When reading, either dispose of the second buffer 1209 * or initiate I/O on the second buffer if an error 1210 * occured with this one. 1211 */ 1212 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1213 if (cbp->cb_buf.bio_flags & BIO_ERROR) { 1214 cbp->cb_mirror->cb_pflags |= 1215 CCDPF_MIRROR_DONE; 1216 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); 1217 putccdbuf(cbp); 1218 splx(s); 1219 return; 1220 } else { 1221 putccdbuf(cbp->cb_mirror); 1222 /* fall through */ 1223 } 1224 } 1225 } 1226 } 1227 1228 /* 1229 * use bio_caller1 to determine how big the original request was rather 1230 * then bio_bcount, because bio_bcount may have been truncated for EOF. 1231 * 1232 * XXX We check for an error, but we do not test the resid for an 1233 * aligned EOF condition. This may result in character & block 1234 * device access not recognizing EOF properly when read or written 1235 * sequentially, but will not effect filesystems. 1236 */ 1237 count = (long)cbp->cb_buf.bio_caller1; 1238 putccdbuf(cbp); 1239 1240 /* 1241 * If all done, "interrupt". 1242 */ 1243 bp->bio_resid -= count; 1244 if (bp->bio_resid < 0) 1245 panic("ccdiodone: count"); 1246 if (bp->bio_resid == 0) 1247 ccdintr(ccdfind(unit), bp); 1248 splx(s); 1249 } 1250 1251 static int 1252 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) 1253 { 1254 int unit = ccdunit(dev); 1255 int i, j, lookedup = 0, error = 0; 1256 int part, pmask, s; 1257 struct ccd_s *cs; 1258 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1259 char **cpp; 1260 struct vnode **vpp; 1261 1262 if (!IS_ALLOCATED(unit)) 1263 return (ENXIO); 1264 cs = ccdfind(unit); 1265 1266 switch (cmd) { 1267 case CCDIOCSET: 1268 if (IS_INITED(cs)) 1269 return (EBUSY); 1270 1271 if ((flag & FWRITE) == 0) 1272 return (EBADF); 1273 1274 if ((error = ccdlock(cs)) != 0) 1275 return (error); 1276 1277 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1278 return (EINVAL); 1279 1280 /* Fill in some important bits. */ 1281 cs->sc_ileave = ccio->ccio_ileave; 1282 if (cs->sc_ileave == 0 && 1283 ((ccio->ccio_flags & CCDF_MIRROR) || 1284 (ccio->ccio_flags & CCDF_PARITY))) { 1285 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1286 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1287 } 1288 if ((ccio->ccio_flags & CCDF_MIRROR) && 1289 (ccio->ccio_flags & CCDF_PARITY)) { 1290 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1291 ccio->ccio_flags &= ~CCDF_PARITY; 1292 } 1293 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1294 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1295 printf("ccd%d: mirror/parity forces uniform flag\n", 1296 unit); 1297 ccio->ccio_flags |= CCDF_UNIFORM; 1298 } 1299 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1300 1301 /* 1302 * Allocate space for and copy in the array of 1303 * componet pathnames and device numbers. 1304 */ 1305 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1306 M_DEVBUF, M_WAITOK); 1307 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1308 M_DEVBUF, M_WAITOK); 1309 1310 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1311 ccio->ccio_ndisks * sizeof(char **)); 1312 if (error) { 1313 free(vpp, M_DEVBUF); 1314 free(cpp, M_DEVBUF); 1315 ccdunlock(cs); 1316 return (error); 1317 } 1318 1319 #ifdef DEBUG 1320 if (ccddebug & CCDB_INIT) 1321 for (i = 0; i < ccio->ccio_ndisks; ++i) 1322 printf("ccdioctl: component %d: %p\n", 1323 i, cpp[i]); 1324 #endif 1325 1326 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1327 #ifdef DEBUG 1328 if (ccddebug & CCDB_INIT) 1329 printf("ccdioctl: lookedup = %d\n", lookedup); 1330 #endif 1331 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1332 for (j = 0; j < lookedup; ++j) 1333 (void)vn_close(vpp[j], FREAD|FWRITE, 1334 td->td_ucred, td); 1335 free(vpp, M_DEVBUF); 1336 free(cpp, M_DEVBUF); 1337 ccdunlock(cs); 1338 return (error); 1339 } 1340 ++lookedup; 1341 } 1342 cs->sc_vpp = vpp; 1343 cs->sc_nccdisks = ccio->ccio_ndisks; 1344 1345 /* 1346 * Initialize the ccd. Fills in the softc for us. 1347 */ 1348 if ((error = ccdinit(cs, cpp, td)) != 0) { 1349 for (j = 0; j < lookedup; ++j) 1350 (void)vn_close(vpp[j], FREAD|FWRITE, 1351 td->td_ucred, td); 1352 /* 1353 * We can't ccddestroy() cs just yet, because nothing 1354 * prevents user-level app to do another ioctl() 1355 * without closing the device first, therefore 1356 * declare unit null and void and let ccdclose() 1357 * destroy it when it is safe to do so. 1358 */ 1359 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1360 free(vpp, M_DEVBUF); 1361 free(cpp, M_DEVBUF); 1362 ccdunlock(cs); 1363 return (error); 1364 } 1365 1366 /* 1367 * The ccd has been successfully initialized, so 1368 * we can place it into the array and read the disklabel. 1369 */ 1370 ccio->ccio_unit = unit; 1371 ccio->ccio_size = cs->sc_size; 1372 ccdgetdisklabel(dev); 1373 1374 ccdunlock(cs); 1375 1376 break; 1377 1378 case CCDIOCCLR: 1379 if (!IS_INITED(cs)) 1380 return (ENXIO); 1381 1382 if ((flag & FWRITE) == 0) 1383 return (EBADF); 1384 1385 if ((error = ccdlock(cs)) != 0) 1386 return (error); 1387 1388 /* Don't unconfigure if any other partitions are open */ 1389 part = ccdpart(dev); 1390 pmask = (1 << part); 1391 if ((cs->sc_openmask & ~pmask)) { 1392 ccdunlock(cs); 1393 return (EBUSY); 1394 } 1395 1396 /* Declare unit null and void (reset all flags) */ 1397 cs->sc_flags &= (CCDF_WANTED | CCDF_LOCKED); 1398 1399 /* Close the components and free their pathnames. */ 1400 for (i = 0; i < cs->sc_nccdisks; ++i) { 1401 /* 1402 * XXX: this close could potentially fail and 1403 * cause Bad Things. Maybe we need to force 1404 * the close to happen? 1405 */ 1406 #ifdef DEBUG 1407 if (ccddebug & CCDB_VNODE) 1408 vprint("CCDIOCCLR: vnode info", 1409 cs->sc_cinfo[i].ci_vp); 1410 #endif 1411 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1412 td->td_ucred, td); 1413 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1414 } 1415 1416 /* Free interleave index. */ 1417 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1418 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1419 1420 /* Free component info and interleave table. */ 1421 free(cs->sc_cinfo, M_DEVBUF); 1422 free(cs->sc_itable, M_DEVBUF); 1423 free(cs->sc_vpp, M_DEVBUF); 1424 1425 /* And remove the devstat entry. */ 1426 devstat_remove_entry(&cs->device_stats); 1427 1428 /* This must be atomic. */ 1429 s = splhigh(); 1430 ccdunlock(cs); 1431 splx(s); 1432 1433 break; 1434 1435 case CCDCONFINFO: 1436 { 1437 int ninit = 0; 1438 struct ccdconf *conf = (struct ccdconf *)data; 1439 struct ccd_s *tmpcs; 1440 struct ccd_s *ubuf = conf->buffer; 1441 1442 /* XXX: LOCK(unique unit numbers) */ 1443 LIST_FOREACH(tmpcs, &ccd_softc_list, list) 1444 if (IS_INITED(tmpcs)) 1445 ninit++; 1446 1447 if (conf->size == 0) { 1448 conf->size = sizeof(struct ccd_s) * ninit; 1449 break; 1450 } else if ((conf->size / sizeof(struct ccd_s) != ninit) || 1451 (conf->size % sizeof(struct ccd_s) != 0)) { 1452 /* XXX: UNLOCK(unique unit numbers) */ 1453 return (EINVAL); 1454 } 1455 1456 ubuf += ninit; 1457 LIST_FOREACH(tmpcs, &ccd_softc_list, list) { 1458 if (!IS_INITED(tmpcs)) 1459 continue; 1460 error = copyout(tmpcs, --ubuf, 1461 sizeof(struct ccd_s)); 1462 if (error != 0) 1463 /* XXX: UNLOCK(unique unit numbers) */ 1464 return (error); 1465 } 1466 /* XXX: UNLOCK(unique unit numbers) */ 1467 } 1468 break; 1469 1470 case CCDCPPINFO: 1471 if (!IS_INITED(cs)) 1472 return (ENXIO); 1473 1474 { 1475 int len = 0; 1476 struct ccdcpps *cpps = (struct ccdcpps *)data; 1477 char *ubuf = cpps->buffer; 1478 1479 1480 for (i = 0; i < cs->sc_nccdisks; ++i) 1481 len += cs->sc_cinfo[i].ci_pathlen; 1482 1483 if (cpps->size == 0) { 1484 cpps->size = len; 1485 break; 1486 } else if (cpps->size != len) { 1487 return (EINVAL); 1488 } 1489 1490 for (i = 0; i < cs->sc_nccdisks; ++i) { 1491 len = cs->sc_cinfo[i].ci_pathlen; 1492 error = copyout(cs->sc_cinfo[i].ci_path, ubuf, 1493 len); 1494 if (error != 0) 1495 return (error); 1496 ubuf += len; 1497 } 1498 } 1499 break; 1500 1501 case DIOCGDINFO: 1502 if (!IS_INITED(cs)) 1503 return (ENXIO); 1504 1505 *(struct disklabel *)data = cs->sc_label; 1506 break; 1507 1508 case DIOCWDINFO: 1509 case DIOCSDINFO: 1510 if (!IS_INITED(cs)) 1511 return (ENXIO); 1512 1513 if ((flag & FWRITE) == 0) 1514 return (EBADF); 1515 1516 if ((error = ccdlock(cs)) != 0) 1517 return (error); 1518 1519 cs->sc_flags |= CCDF_LABELLING; 1520 1521 error = setdisklabel(&cs->sc_label, 1522 (struct disklabel *)data, 0); 1523 if (error == 0) { 1524 if (cmd == DIOCWDINFO) 1525 error = writedisklabel(CCDLABELDEV(dev), 1526 &cs->sc_label); 1527 } 1528 1529 cs->sc_flags &= ~CCDF_LABELLING; 1530 1531 ccdunlock(cs); 1532 1533 if (error) 1534 return (error); 1535 break; 1536 1537 case DIOCWLABEL: 1538 if (!IS_INITED(cs)) 1539 return (ENXIO); 1540 1541 if ((flag & FWRITE) == 0) 1542 return (EBADF); 1543 if (*(int *)data != 0) 1544 cs->sc_flags |= CCDF_WLABEL; 1545 else 1546 cs->sc_flags &= ~CCDF_WLABEL; 1547 break; 1548 1549 default: 1550 return (ENOTTY); 1551 } 1552 1553 return (0); 1554 } 1555 1556 static int 1557 ccdsize(dev_t dev) 1558 { 1559 struct ccd_s *cs; 1560 int part, size; 1561 1562 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1563 return (-1); 1564 1565 cs = ccdfind(ccdunit(dev)); 1566 part = ccdpart(dev); 1567 1568 if (!IS_INITED(cs)) 1569 return (-1); 1570 1571 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1572 size = -1; 1573 else 1574 size = cs->sc_label.d_partitions[part].p_size; 1575 1576 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1577 return (-1); 1578 1579 return (size); 1580 } 1581 1582 /* 1583 * Lookup the provided name in the filesystem. If the file exists, 1584 * is a valid block device, and isn't being used by anyone else, 1585 * set *vpp to the file's vnode. 1586 */ 1587 static int 1588 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1589 { 1590 struct nameidata nd; 1591 struct vnode *vp; 1592 int error, flags; 1593 1594 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, td); 1595 flags = FREAD | FWRITE; 1596 if ((error = vn_open(&nd, &flags, 0)) != 0) { 1597 #ifdef DEBUG 1598 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1599 printf("ccdlookup: vn_open error = %d\n", error); 1600 #endif 1601 return (error); 1602 } 1603 vp = nd.ni_vp; 1604 1605 if (vp->v_usecount > 1) { 1606 error = EBUSY; 1607 goto bad; 1608 } 1609 1610 if (!vn_isdisk(vp, &error)) 1611 goto bad; 1612 1613 #ifdef DEBUG 1614 if (ccddebug & CCDB_VNODE) 1615 vprint("ccdlookup: vnode info", vp); 1616 #endif 1617 1618 VOP_UNLOCK(vp, 0, td); 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 *vpp = vp; 1621 return (0); 1622 bad: 1623 VOP_UNLOCK(vp, 0, td); 1624 NDFREE(&nd, NDF_ONLY_PNBUF); 1625 /* vn_close does vrele() for vp */ 1626 (void)vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 1627 return (error); 1628 } 1629 1630 /* 1631 * Read the disklabel from the ccd. If one is not present, fake one 1632 * up. 1633 */ 1634 static void 1635 ccdgetdisklabel(dev_t dev) 1636 { 1637 int unit = ccdunit(dev); 1638 struct ccd_s *cs = ccdfind(unit); 1639 char *errstring; 1640 struct disklabel *lp = &cs->sc_label; 1641 struct ccdgeom *ccg = &cs->sc_geom; 1642 1643 bzero(lp, sizeof(*lp)); 1644 1645 lp->d_secperunit = cs->sc_size; 1646 lp->d_secsize = ccg->ccg_secsize; 1647 lp->d_nsectors = ccg->ccg_nsectors; 1648 lp->d_ntracks = ccg->ccg_ntracks; 1649 lp->d_ncylinders = ccg->ccg_ncylinders; 1650 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1651 1652 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1653 lp->d_type = DTYPE_CCD; 1654 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1655 lp->d_rpm = 3600; 1656 lp->d_interleave = 1; 1657 lp->d_flags = 0; 1658 1659 lp->d_partitions[RAW_PART].p_offset = 0; 1660 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1661 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1662 lp->d_npartitions = RAW_PART + 1; 1663 1664 lp->d_bbsize = BBSIZE; /* XXX */ 1665 lp->d_sbsize = SBSIZE; /* XXX */ 1666 1667 lp->d_magic = DISKMAGIC; 1668 lp->d_magic2 = DISKMAGIC; 1669 lp->d_checksum = dkcksum(&cs->sc_label); 1670 1671 /* 1672 * Call the generic disklabel extraction routine. 1673 */ 1674 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1675 if (errstring != NULL) 1676 ccdmakedisklabel(cs); 1677 1678 #ifdef DEBUG 1679 /* It's actually extremely common to have unlabeled ccds. */ 1680 if (ccddebug & CCDB_LABEL) 1681 if (errstring != NULL) 1682 printf("ccd%d: %s\n", unit, errstring); 1683 #endif 1684 } 1685 1686 /* 1687 * Take care of things one might want to take care of in the event 1688 * that a disklabel isn't present. 1689 */ 1690 static void 1691 ccdmakedisklabel(struct ccd_s *cs) 1692 { 1693 struct disklabel *lp = &cs->sc_label; 1694 1695 /* 1696 * For historical reasons, if there's no disklabel present 1697 * the raw partition must be marked FS_BSDFFS. 1698 */ 1699 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1700 1701 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1702 } 1703 1704 /* 1705 * Wait interruptibly for an exclusive lock. 1706 * 1707 * XXX 1708 * Several drivers do this; it should be abstracted and made MP-safe. 1709 */ 1710 static int 1711 ccdlock(struct ccd_s *cs) 1712 { 1713 int error; 1714 1715 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1716 cs->sc_flags |= CCDF_WANTED; 1717 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) 1718 return (error); 1719 } 1720 cs->sc_flags |= CCDF_LOCKED; 1721 return (0); 1722 } 1723 1724 /* 1725 * Unlock and wake up any waiters. 1726 */ 1727 static void 1728 ccdunlock(struct ccd_s *cs) 1729 { 1730 1731 cs->sc_flags &= ~CCDF_LOCKED; 1732 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1733 cs->sc_flags &= ~CCDF_WANTED; 1734 wakeup(cs); 1735 } 1736 } 1737 1738 #ifdef DEBUG 1739 static void 1740 printiinfo(struct ccdiinfo *ii) 1741 { 1742 int ix, i; 1743 1744 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1745 printf(" itab[%d]: #dk %d sblk %d soff %d", 1746 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1747 for (i = 0; i < ii->ii_ndisk; i++) 1748 printf(" %d", ii->ii_index[i]); 1749 printf("\n"); 1750 } 1751 } 1752 #endif 1753