1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause) 3 * 4 * Copyright (c) 2003 Poul-Henning Kamp. 5 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 33 */ 34 35 /*- 36 * Copyright (c) 1988 University of Utah. 37 * Copyright (c) 1990, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * This code is derived from software contributed to Berkeley by 41 * the Systems Programming Group of the University of Utah Computer 42 * Science Department. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 69 * 70 * @(#)cd.c 8.2 (Berkeley) 11/16/93 71 */ 72 73 /* 74 * Dynamic configuration and disklabel support by: 75 * Jason R. Thorpe <thorpej@nas.nasa.gov> 76 * Numerical Aerodynamic Simulation Facility 77 * Mail Stop 258-6 78 * NASA Ames Research Center 79 * Moffett Field, CA 94035 80 */ 81 82 #include <sys/cdefs.h> 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/kernel.h> 86 #include <sys/module.h> 87 #include <sys/bio.h> 88 #include <sys/malloc.h> 89 #include <sys/sbuf.h> 90 #include <geom/geom.h> 91 92 /* 93 * Number of blocks to untouched in front of a component partition. 94 * This is to avoid violating its disklabel area when it starts at the 95 * beginning of the slice. 96 */ 97 #if !defined(CCD_OFFSET) 98 #define CCD_OFFSET 16 99 #endif 100 101 /* sc_flags */ 102 #define CCDF_UNIFORM 0x02 /* use LCCD of sizes for uniform interleave */ 103 #define CCDF_MIRROR 0x04 /* use mirroring */ 104 #define CCDF_NO_OFFSET 0x08 /* do not leave space in front */ 105 #define CCDF_LINUX 0x10 /* use Linux compatibility mode */ 106 107 /* Mask of user-settable ccd flags. */ 108 #define CCDF_USERMASK (CCDF_UNIFORM|CCDF_MIRROR) 109 110 /* 111 * Interleave description table. 112 * Computed at boot time to speed irregular-interleave lookups. 113 * The idea is that we interleave in "groups". First we interleave 114 * evenly over all component disks up to the size of the smallest 115 * component (the first group), then we interleave evenly over all 116 * remaining disks up to the size of the next-smallest (second group), 117 * and so on. 118 * 119 * Each table entry describes the interleave characteristics of one 120 * of these groups. For example if a concatenated disk consisted of 121 * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at 122 * DEV_BSIZE (1), the table would have three entries: 123 * 124 * ndisk startblk startoff dev 125 * 3 0 0 0, 1, 2 126 * 2 9 3 0, 2 127 * 1 13 5 2 128 * 0 - - - 129 * 130 * which says that the first nine blocks (0-8) are interleaved over 131 * 3 disks (0, 1, 2) starting at block offset 0 on any component disk, 132 * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting 133 * at component block 3, and the remaining blocks (13-14) are on disk 134 * 2 starting at offset 5. 135 */ 136 struct ccdiinfo { 137 int ii_ndisk; /* # of disks range is interleaved over */ 138 daddr_t ii_startblk; /* starting scaled block # for range */ 139 daddr_t ii_startoff; /* starting component offset (block #) */ 140 int *ii_index; /* ordered list of components in range */ 141 }; 142 143 /* 144 * Component info table. 145 * Describes a single component of a concatenated disk. 146 */ 147 struct ccdcinfo { 148 daddr_t ci_size; /* size */ 149 struct g_provider *ci_provider; /* provider */ 150 struct g_consumer *ci_consumer; /* consumer */ 151 }; 152 153 /* 154 * A concatenated disk is described by this structure. 155 */ 156 157 struct ccd_s { 158 LIST_ENTRY(ccd_s) list; 159 160 int sc_unit; /* logical unit number */ 161 int sc_flags; /* flags */ 162 daddr_t sc_size; /* size of ccd */ 163 int sc_ileave; /* interleave */ 164 u_int sc_ndisks; /* number of components */ 165 struct ccdcinfo *sc_cinfo; /* component info */ 166 struct ccdiinfo *sc_itable; /* interleave table */ 167 uint32_t sc_secsize; /* # bytes per sector */ 168 int sc_pick; /* side of mirror picked */ 169 daddr_t sc_blk[2]; /* mirror localization */ 170 uint32_t sc_offset; /* actual offset used */ 171 }; 172 173 static g_start_t g_ccd_start; 174 static void ccdiodone(struct bio *bp); 175 static void ccdinterleave(struct ccd_s *); 176 static int ccdinit(struct gctl_req *req, struct ccd_s *); 177 static int ccdbuffer(struct bio **ret, struct ccd_s *, 178 struct bio *, daddr_t, caddr_t, long); 179 180 static void 181 g_ccd_orphan(struct g_consumer *cp) 182 { 183 /* 184 * XXX: We don't do anything here. It is not obvious 185 * XXX: what DTRT would be, so we do what the previous 186 * XXX: code did: ignore it and let the user cope. 187 */ 188 } 189 190 static int 191 g_ccd_access(struct g_provider *pp, int dr, int dw, int de) 192 { 193 struct g_geom *gp; 194 struct g_consumer *cp1, *cp2; 195 int error; 196 197 de += dr; 198 de += dw; 199 200 gp = pp->geom; 201 error = ENXIO; 202 LIST_FOREACH(cp1, &gp->consumer, consumer) { 203 error = g_access(cp1, dr, dw, de); 204 if (error) { 205 LIST_FOREACH(cp2, &gp->consumer, consumer) { 206 if (cp1 == cp2) 207 break; 208 g_access(cp2, -dr, -dw, -de); 209 } 210 break; 211 } 212 } 213 return (error); 214 } 215 216 /* 217 * Free the softc and its substructures. 218 */ 219 static void 220 g_ccd_freesc(struct ccd_s *sc) 221 { 222 struct ccdiinfo *ii; 223 224 g_free(sc->sc_cinfo); 225 if (sc->sc_itable != NULL) { 226 for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++) 227 g_free(ii->ii_index); 228 g_free(sc->sc_itable); 229 } 230 g_free(sc); 231 } 232 233 static int 234 ccdinit(struct gctl_req *req, struct ccd_s *cs) 235 { 236 struct ccdcinfo *ci; 237 daddr_t size; 238 int ix; 239 daddr_t minsize; 240 int maxsecsize; 241 off_t mediasize; 242 u_int sectorsize; 243 244 cs->sc_size = 0; 245 246 maxsecsize = 0; 247 minsize = 0; 248 249 if (cs->sc_flags & CCDF_LINUX) { 250 cs->sc_offset = 0; 251 cs->sc_ileave *= 2; 252 if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2) 253 gctl_error(req, "Mirror mode for Linux raids is " 254 "only supported with 2 devices"); 255 } else { 256 if (cs->sc_flags & CCDF_NO_OFFSET) 257 cs->sc_offset = 0; 258 else 259 cs->sc_offset = CCD_OFFSET; 260 } 261 for (ix = 0; ix < cs->sc_ndisks; ix++) { 262 ci = &cs->sc_cinfo[ix]; 263 264 mediasize = ci->ci_provider->mediasize; 265 sectorsize = ci->ci_provider->sectorsize; 266 if (sectorsize > maxsecsize) 267 maxsecsize = sectorsize; 268 size = mediasize / DEV_BSIZE - cs->sc_offset; 269 270 /* Truncate to interleave boundary */ 271 272 if (cs->sc_ileave > 1) 273 size -= size % cs->sc_ileave; 274 275 if (size == 0) { 276 gctl_error(req, "Component %s has effective size zero", 277 ci->ci_provider->name); 278 return(ENODEV); 279 } 280 281 if (minsize == 0 || size < minsize) 282 minsize = size; 283 ci->ci_size = size; 284 cs->sc_size += size; 285 } 286 287 /* 288 * Don't allow the interleave to be smaller than 289 * the biggest component sector. 290 */ 291 if ((cs->sc_ileave > 0) && 292 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 293 gctl_error(req, "Interleave to small for sector size"); 294 return(EINVAL); 295 } 296 297 /* 298 * If uniform interleave is desired set all sizes to that of 299 * the smallest component. This will guarantee that a single 300 * interleave table is generated. 301 * 302 * Lost space must be taken into account when calculating the 303 * overall size. Half the space is lost when CCDF_MIRROR is 304 * specified. 305 */ 306 if (cs->sc_flags & CCDF_UNIFORM) { 307 for (ix = 0; ix < cs->sc_ndisks; ix++) { 308 ci = &cs->sc_cinfo[ix]; 309 ci->ci_size = minsize; 310 } 311 cs->sc_size = cs->sc_ndisks * minsize; 312 } 313 314 if (cs->sc_flags & CCDF_MIRROR) { 315 /* 316 * Check to see if an even number of components 317 * have been specified. The interleave must also 318 * be non-zero in order for us to be able to 319 * guarantee the topology. 320 */ 321 if (cs->sc_ndisks % 2) { 322 gctl_error(req, 323 "Mirroring requires an even number of disks"); 324 return(EINVAL); 325 } 326 if (cs->sc_ileave == 0) { 327 gctl_error(req, 328 "An interleave must be specified when mirroring"); 329 return(EINVAL); 330 } 331 cs->sc_size = (cs->sc_ndisks/2) * minsize; 332 } 333 334 /* 335 * Construct the interleave table. 336 */ 337 ccdinterleave(cs); 338 339 /* 340 * Create pseudo-geometry based on 1MB cylinders. It's 341 * pretty close. 342 */ 343 cs->sc_secsize = maxsecsize; 344 345 return (0); 346 } 347 348 static void 349 ccdinterleave(struct ccd_s *cs) 350 { 351 struct ccdcinfo *ci, *smallci; 352 struct ccdiinfo *ii; 353 daddr_t bn, lbn; 354 int ix; 355 daddr_t size; 356 357 /* 358 * Allocate an interleave table. The worst case occurs when each 359 * of N disks is of a different size, resulting in N interleave 360 * tables. 361 * 362 * Chances are this is too big, but we don't care. 363 */ 364 size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo); 365 cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO); 366 367 /* 368 * Trivial case: no interleave (actually interleave of disk size). 369 * Each table entry represents a single component in its entirety. 370 * 371 * An interleave of 0 may not be used with a mirror setup. 372 */ 373 if (cs->sc_ileave == 0) { 374 bn = 0; 375 ii = cs->sc_itable; 376 377 for (ix = 0; ix < cs->sc_ndisks; ix++) { 378 /* Allocate space for ii_index. */ 379 ii->ii_index = g_malloc(sizeof(int), M_WAITOK); 380 ii->ii_ndisk = 1; 381 ii->ii_startblk = bn; 382 ii->ii_startoff = 0; 383 ii->ii_index[0] = ix; 384 bn += cs->sc_cinfo[ix].ci_size; 385 ii++; 386 } 387 ii->ii_ndisk = 0; 388 return; 389 } 390 391 /* 392 * The following isn't fast or pretty; it doesn't have to be. 393 */ 394 size = 0; 395 bn = lbn = 0; 396 for (ii = cs->sc_itable; ; ii++) { 397 /* 398 * Allocate space for ii_index. We might allocate more then 399 * we use. 400 */ 401 ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks), 402 M_WAITOK); 403 404 /* 405 * Locate the smallest of the remaining components 406 */ 407 smallci = NULL; 408 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; 409 ci++) { 410 if (ci->ci_size > size && 411 (smallci == NULL || 412 ci->ci_size < smallci->ci_size)) { 413 smallci = ci; 414 } 415 } 416 417 /* 418 * Nobody left, all done 419 */ 420 if (smallci == NULL) { 421 ii->ii_ndisk = 0; 422 g_free(ii->ii_index); 423 ii->ii_index = NULL; 424 break; 425 } 426 427 /* 428 * Record starting logical block using an sc_ileave blocksize. 429 */ 430 ii->ii_startblk = bn / cs->sc_ileave; 431 432 /* 433 * Record starting component block using an sc_ileave 434 * blocksize. This value is relative to the beginning of 435 * a component disk. 436 */ 437 ii->ii_startoff = lbn; 438 439 /* 440 * Determine how many disks take part in this interleave 441 * and record their indices. 442 */ 443 ix = 0; 444 for (ci = cs->sc_cinfo; 445 ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) { 446 if (ci->ci_size >= smallci->ci_size) { 447 ii->ii_index[ix++] = ci - cs->sc_cinfo; 448 } 449 } 450 ii->ii_ndisk = ix; 451 bn += ix * (smallci->ci_size - size); 452 lbn = smallci->ci_size / cs->sc_ileave; 453 size = smallci->ci_size; 454 } 455 } 456 457 static void 458 g_ccd_start(struct bio *bp) 459 { 460 long bcount, rcount; 461 struct bio *cbp[2]; 462 caddr_t addr; 463 daddr_t bn; 464 int err; 465 struct ccd_s *cs; 466 467 cs = bp->bio_to->geom->softc; 468 469 /* 470 * Block all GETATTR requests, we wouldn't know which of our 471 * subdevices we should ship it off to. 472 * XXX: this may not be the right policy. 473 */ 474 if(bp->bio_cmd == BIO_GETATTR) { 475 g_io_deliver(bp, EINVAL); 476 return; 477 } 478 479 /* 480 * Translate the partition-relative block number to an absolute. 481 */ 482 bn = bp->bio_offset / cs->sc_secsize; 483 484 /* 485 * Allocate component buffers and fire off the requests 486 */ 487 addr = bp->bio_data; 488 for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) { 489 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount); 490 if (err) { 491 bp->bio_completed += bcount; 492 if (bp->bio_error == 0) 493 bp->bio_error = err; 494 if (bp->bio_completed == bp->bio_length) 495 g_io_deliver(bp, bp->bio_error); 496 return; 497 } 498 rcount = cbp[0]->bio_length; 499 500 if (cs->sc_flags & CCDF_MIRROR) { 501 /* 502 * Mirroring. Writes go to both disks, reads are 503 * taken from whichever disk seems most appropriate. 504 * 505 * We attempt to localize reads to the disk whos arm 506 * is nearest the read request. We ignore seeks due 507 * to writes when making this determination and we 508 * also try to avoid hogging. 509 */ 510 if (cbp[0]->bio_cmd != BIO_READ) { 511 g_io_request(cbp[0], cbp[0]->bio_from); 512 g_io_request(cbp[1], cbp[1]->bio_from); 513 } else { 514 int pick = cs->sc_pick; 515 daddr_t range = cs->sc_size / 16; 516 517 if (bn < cs->sc_blk[pick] - range || 518 bn > cs->sc_blk[pick] + range 519 ) { 520 cs->sc_pick = pick = 1 - pick; 521 } 522 cs->sc_blk[pick] = bn + btodb(rcount); 523 g_io_request(cbp[pick], cbp[pick]->bio_from); 524 } 525 } else { 526 /* 527 * Not mirroring 528 */ 529 g_io_request(cbp[0], cbp[0]->bio_from); 530 } 531 bn += btodb(rcount); 532 addr += rcount; 533 } 534 } 535 536 /* 537 * Build a component buffer header. 538 */ 539 static int 540 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount) 541 { 542 struct ccdcinfo *ci, *ci2 = NULL; 543 struct bio *cbp; 544 daddr_t cbn, cboff; 545 off_t cbc; 546 547 /* 548 * Determine which component bn falls in. 549 */ 550 cbn = bn; 551 cboff = 0; 552 553 if (cs->sc_ileave == 0) { 554 /* 555 * Serially concatenated and neither a mirror nor a parity 556 * config. This is a special case. 557 */ 558 daddr_t sblk; 559 560 sblk = 0; 561 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 562 sblk += ci->ci_size; 563 cbn -= sblk; 564 } else { 565 struct ccdiinfo *ii; 566 int ccdisk, off; 567 568 /* 569 * Calculate cbn, the logical superblock (sc_ileave chunks), 570 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 571 * to cbn. 572 */ 573 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 574 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 575 576 /* 577 * Figure out which interleave table to use. 578 */ 579 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 580 if (ii->ii_startblk > cbn) 581 break; 582 } 583 ii--; 584 585 /* 586 * off is the logical superblock relative to the beginning 587 * of this interleave block. 588 */ 589 off = cbn - ii->ii_startblk; 590 591 /* 592 * We must calculate which disk component to use (ccdisk), 593 * and recalculate cbn to be the superblock relative to 594 * the beginning of the component. This is typically done by 595 * adding 'off' and ii->ii_startoff together. However, 'off' 596 * must typically be divided by the number of components in 597 * this interleave array to be properly convert it from a 598 * CCD-relative logical superblock number to a 599 * component-relative superblock number. 600 */ 601 if (ii->ii_ndisk == 1) { 602 /* 603 * When we have just one disk, it can't be a mirror 604 * or a parity config. 605 */ 606 ccdisk = ii->ii_index[0]; 607 cbn = ii->ii_startoff + off; 608 } else { 609 if (cs->sc_flags & CCDF_MIRROR) { 610 /* 611 * We have forced a uniform mapping, resulting 612 * in a single interleave array. We double 613 * up on the first half of the available 614 * components and our mirror is in the second 615 * half. This only works with a single 616 * interleave array because doubling up 617 * doubles the number of sectors, so there 618 * cannot be another interleave array because 619 * the next interleave array's calculations 620 * would be off. 621 */ 622 int ndisk2 = ii->ii_ndisk / 2; 623 ccdisk = ii->ii_index[off % ndisk2]; 624 cbn = ii->ii_startoff + off / ndisk2; 625 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 626 } else { 627 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 628 cbn = ii->ii_startoff + off / ii->ii_ndisk; 629 } 630 } 631 632 ci = &cs->sc_cinfo[ccdisk]; 633 634 /* 635 * Convert cbn from a superblock to a normal block so it 636 * can be used to calculate (along with cboff) the normal 637 * block index into this particular disk. 638 */ 639 cbn *= cs->sc_ileave; 640 } 641 642 /* 643 * Fill in the component buf structure. 644 */ 645 cbp = g_clone_bio(bp); 646 if (cbp == NULL) 647 return (ENOMEM); 648 cbp->bio_done = g_std_done; 649 cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset); 650 cbp->bio_data = addr; 651 if (cs->sc_ileave == 0) 652 cbc = dbtob((off_t)(ci->ci_size - cbn)); 653 else 654 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 655 cbp->bio_length = (cbc < bcount) ? cbc : bcount; 656 657 cbp->bio_from = ci->ci_consumer; 658 cb[0] = cbp; 659 660 if (cs->sc_flags & CCDF_MIRROR) { 661 cbp = g_clone_bio(bp); 662 if (cbp == NULL) 663 return (ENOMEM); 664 cbp->bio_done = cb[0]->bio_done = ccdiodone; 665 cbp->bio_offset = cb[0]->bio_offset; 666 cbp->bio_data = cb[0]->bio_data; 667 cbp->bio_length = cb[0]->bio_length; 668 cbp->bio_from = ci2->ci_consumer; 669 cbp->bio_caller1 = cb[0]; 670 cb[0]->bio_caller1 = cbp; 671 cb[1] = cbp; 672 } 673 return (0); 674 } 675 676 /* 677 * Called only for mirrored operations. 678 */ 679 static void 680 ccdiodone(struct bio *cbp) 681 { 682 struct bio *mbp, *pbp; 683 684 mbp = cbp->bio_caller1; 685 pbp = cbp->bio_parent; 686 687 if (pbp->bio_cmd == BIO_READ) { 688 if (cbp->bio_error == 0) { 689 /* We will not be needing the partner bio */ 690 if (mbp != NULL) { 691 pbp->bio_inbed++; 692 g_destroy_bio(mbp); 693 } 694 g_std_done(cbp); 695 return; 696 } 697 if (mbp != NULL) { 698 /* Try partner the bio instead */ 699 mbp->bio_caller1 = NULL; 700 pbp->bio_inbed++; 701 g_destroy_bio(cbp); 702 g_io_request(mbp, mbp->bio_from); 703 /* 704 * XXX: If this comes back OK, we should actually 705 * try to write the good data on the failed mirror 706 */ 707 return; 708 } 709 g_std_done(cbp); 710 return; 711 } 712 if (mbp != NULL) { 713 mbp->bio_caller1 = NULL; 714 pbp->bio_inbed++; 715 if (cbp->bio_error != 0 && pbp->bio_error == 0) 716 pbp->bio_error = cbp->bio_error; 717 g_destroy_bio(cbp); 718 return; 719 } 720 g_std_done(cbp); 721 } 722 723 static void 724 g_ccd_create(struct gctl_req *req, struct g_class *mp) 725 { 726 int *unit, *ileave, *nprovider; 727 struct g_geom *gp; 728 struct g_consumer *cp; 729 struct g_provider *pp; 730 struct ccd_s *sc; 731 struct sbuf *sb; 732 char buf[20]; 733 int i, error; 734 735 g_topology_assert(); 736 unit = gctl_get_paraml(req, "unit", sizeof (*unit)); 737 if (unit == NULL) { 738 gctl_error(req, "unit parameter not given"); 739 return; 740 } 741 ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave)); 742 if (ileave == NULL) { 743 gctl_error(req, "ileave parameter not given"); 744 return; 745 } 746 nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider)); 747 if (nprovider == NULL) { 748 gctl_error(req, "nprovider parameter not given"); 749 return; 750 } 751 752 /* Check for duplicate unit */ 753 LIST_FOREACH(gp, &mp->geom, geom) { 754 sc = gp->softc; 755 if (sc != NULL && sc->sc_unit == *unit) { 756 gctl_error(req, "Unit %d already configured", *unit); 757 return; 758 } 759 } 760 761 if (*nprovider <= 0) { 762 gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider); 763 return; 764 } 765 766 /* Check all providers are valid */ 767 for (i = 0; i < *nprovider; i++) { 768 snprintf(buf, sizeof(buf), "provider%d", i); 769 pp = gctl_get_provider(req, buf); 770 if (pp == NULL) 771 return; 772 } 773 774 gp = g_new_geomf(mp, "ccd%d", *unit); 775 sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO); 776 gp->softc = sc; 777 sc->sc_ndisks = *nprovider; 778 779 /* Allocate space for the component info. */ 780 sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo), 781 M_WAITOK | M_ZERO); 782 783 /* Create consumers and attach to all providers */ 784 for (i = 0; i < *nprovider; i++) { 785 snprintf(buf, sizeof(buf), "provider%d", i); 786 pp = gctl_get_provider(req, buf); 787 cp = g_new_consumer(gp); 788 error = g_attach(cp, pp); 789 KASSERT(error == 0, ("attach to %s failed", pp->name)); 790 sc->sc_cinfo[i].ci_consumer = cp; 791 sc->sc_cinfo[i].ci_provider = pp; 792 } 793 794 sc->sc_unit = *unit; 795 sc->sc_ileave = *ileave; 796 797 if (gctl_get_param(req, "no_offset", NULL)) 798 sc->sc_flags |= CCDF_NO_OFFSET; 799 if (gctl_get_param(req, "linux", NULL)) 800 sc->sc_flags |= CCDF_LINUX; 801 802 if (gctl_get_param(req, "uniform", NULL)) 803 sc->sc_flags |= CCDF_UNIFORM; 804 if (gctl_get_param(req, "mirror", NULL)) 805 sc->sc_flags |= CCDF_MIRROR; 806 807 if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) { 808 printf("%s: disabling mirror, interleave is 0\n", gp->name); 809 sc->sc_flags &= ~(CCDF_MIRROR); 810 } 811 812 if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) { 813 printf("%s: mirror/parity forces uniform flag\n", gp->name); 814 sc->sc_flags |= CCDF_UNIFORM; 815 } 816 817 error = ccdinit(req, sc); 818 if (error != 0) { 819 g_ccd_freesc(sc); 820 gp->softc = NULL; 821 g_wither_geom(gp, ENXIO); 822 return; 823 } 824 825 pp = g_new_providerf(gp, "%s", gp->name); 826 pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize; 827 pp->sectorsize = sc->sc_secsize; 828 g_error_provider(pp, 0); 829 830 sb = sbuf_new_auto(); 831 sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider); 832 for (i = 0; i < *nprovider; i++) { 833 sbuf_printf(sb, "%s%s", 834 i == 0 ? "(" : ", ", 835 sc->sc_cinfo[i].ci_provider->name); 836 } 837 sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE); 838 if (sc->sc_ileave != 0) 839 sbuf_printf(sb, "interleaved at %d blocks\n", 840 sc->sc_ileave); 841 else 842 sbuf_printf(sb, "concatenated\n"); 843 sbuf_finish(sb); 844 gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); 845 sbuf_delete(sb); 846 } 847 848 static int 849 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) 850 { 851 struct g_provider *pp; 852 struct ccd_s *sc; 853 854 g_topology_assert(); 855 sc = gp->softc; 856 pp = LIST_FIRST(&gp->provider); 857 if (sc == NULL || pp == NULL) 858 return (EBUSY); 859 if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) { 860 gctl_error(req, "%s is open(r%dw%de%d)", gp->name, 861 pp->acr, pp->acw, pp->ace); 862 return (EBUSY); 863 } 864 g_ccd_freesc(sc); 865 gp->softc = NULL; 866 g_wither_geom(gp, ENXIO); 867 return (0); 868 } 869 870 static void 871 g_ccd_list(struct gctl_req *req, struct g_class *mp) 872 { 873 struct sbuf *sb; 874 struct ccd_s *cs; 875 struct g_geom *gp; 876 int i, unit, *up; 877 878 up = gctl_get_paraml(req, "unit", sizeof (*up)); 879 if (up == NULL) { 880 gctl_error(req, "unit parameter not given"); 881 return; 882 } 883 unit = *up; 884 sb = sbuf_new_auto(); 885 LIST_FOREACH(gp, &mp->geom, geom) { 886 cs = gp->softc; 887 if (cs == NULL || (unit >= 0 && unit != cs->sc_unit)) 888 continue; 889 sbuf_printf(sb, "ccd%d\t\t%d\t%d\t", 890 cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK); 891 892 for (i = 0; i < cs->sc_ndisks; ++i) { 893 sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ", 894 cs->sc_cinfo[i].ci_provider->name); 895 } 896 sbuf_printf(sb, "\n"); 897 } 898 sbuf_finish(sb); 899 gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); 900 sbuf_delete(sb); 901 } 902 903 static void 904 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb) 905 { 906 struct g_geom *gp; 907 908 g_topology_assert(); 909 if (!strcmp(verb, "create geom")) { 910 g_ccd_create(req, mp); 911 } else if (!strcmp(verb, "destroy geom")) { 912 gp = gctl_get_geom(req, mp, "geom"); 913 if (gp != NULL) 914 g_ccd_destroy_geom(req, mp, gp); 915 } else if (!strcmp(verb, "list")) { 916 g_ccd_list(req, mp); 917 } else { 918 gctl_error(req, "unknown verb"); 919 } 920 } 921 922 static struct g_class g_ccd_class = { 923 .name = "CCD", 924 .version = G_VERSION, 925 .ctlreq = g_ccd_config, 926 .destroy_geom = g_ccd_destroy_geom, 927 .start = g_ccd_start, 928 .orphan = g_ccd_orphan, 929 .access = g_ccd_access, 930 }; 931 932 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd); 933 MODULE_VERSION(geom_ccd, 0); 934