1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/bio.h> 39 #include <sys/sbuf.h> 40 #include <sys/sysctl.h> 41 #include <sys/malloc.h> 42 #include <vm/uma.h> 43 #include <geom/geom.h> 44 #include <geom/geom_dbg.h> 45 #include <geom/stripe/g_stripe.h> 46 47 FEATURE(geom_stripe, "GEOM striping support"); 48 49 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 50 51 static uma_zone_t g_stripe_zone; 52 53 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 54 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 55 struct g_geom *gp); 56 57 static g_taste_t g_stripe_taste; 58 static g_ctl_req_t g_stripe_config; 59 static g_dumpconf_t g_stripe_dumpconf; 60 static g_init_t g_stripe_init; 61 static g_fini_t g_stripe_fini; 62 63 struct g_class g_stripe_class = { 64 .name = G_STRIPE_CLASS_NAME, 65 .version = G_VERSION, 66 .ctlreq = g_stripe_config, 67 .taste = g_stripe_taste, 68 .destroy_geom = g_stripe_destroy_geom, 69 .init = g_stripe_init, 70 .fini = g_stripe_fini 71 }; 72 73 SYSCTL_DECL(_kern_geom); 74 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "GEOM_STRIPE stuff"); 76 static u_int g_stripe_debug = 0; 77 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 78 "Debug level"); 79 static int g_stripe_fast = 0; 80 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast, 81 CTLFLAG_RWTUN, &g_stripe_fast, 0, 82 "Fast, but memory-consuming, mode"); 83 static u_long g_stripe_maxmem; 84 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem, 85 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0, 86 "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 87 static u_int g_stripe_fast_failed = 0; 88 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 89 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 90 91 /* 92 * Greatest Common Divisor. 93 */ 94 static u_int 95 gcd(u_int a, u_int b) 96 { 97 u_int c; 98 99 while (b != 0) { 100 c = a; 101 a = b; 102 b = (c % b); 103 } 104 return (a); 105 } 106 107 /* 108 * Least Common Multiple. 109 */ 110 static u_int 111 lcm(u_int a, u_int b) 112 { 113 114 return ((a * b) / gcd(a, b)); 115 } 116 117 static void 118 g_stripe_init(struct g_class *mp __unused) 119 { 120 121 g_stripe_maxmem = maxphys * 100; 122 TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem); 123 g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL, 124 NULL, NULL, 0, 0); 125 g_stripe_maxmem -= g_stripe_maxmem % maxphys; 126 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys); 127 } 128 129 static void 130 g_stripe_fini(struct g_class *mp __unused) 131 { 132 133 uma_zdestroy(g_stripe_zone); 134 } 135 136 /* 137 * Return the number of valid disks. 138 */ 139 static u_int 140 g_stripe_nvalid(struct g_stripe_softc *sc) 141 { 142 u_int i, no; 143 144 no = 0; 145 for (i = 0; i < sc->sc_ndisks; i++) { 146 if (sc->sc_disks[i] != NULL) 147 no++; 148 } 149 150 return (no); 151 } 152 153 static void 154 g_stripe_remove_disk(struct g_consumer *cp) 155 { 156 struct g_stripe_softc *sc; 157 158 g_topology_assert(); 159 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 160 sc = (struct g_stripe_softc *)cp->geom->softc; 161 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 162 163 if (cp->private == NULL) { 164 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 165 cp->provider->name, sc->sc_name); 166 cp->private = (void *)(uintptr_t)-1; 167 } 168 169 if (sc->sc_provider != NULL) { 170 G_STRIPE_DEBUG(0, "Device %s deactivated.", 171 sc->sc_provider->name); 172 g_wither_provider(sc->sc_provider, ENXIO); 173 sc->sc_provider = NULL; 174 } 175 176 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 177 return; 178 sc->sc_disks[cp->index] = NULL; 179 cp->index = 0; 180 g_detach(cp); 181 g_destroy_consumer(cp); 182 /* If there are no valid disks anymore, remove device. */ 183 if (LIST_EMPTY(&sc->sc_geom->consumer)) 184 g_stripe_destroy(sc, 1); 185 } 186 187 static void 188 g_stripe_orphan(struct g_consumer *cp) 189 { 190 struct g_stripe_softc *sc; 191 struct g_geom *gp; 192 193 g_topology_assert(); 194 gp = cp->geom; 195 sc = gp->softc; 196 if (sc == NULL) 197 return; 198 199 g_stripe_remove_disk(cp); 200 } 201 202 static int 203 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 204 { 205 struct g_consumer *cp1, *cp2, *tmp; 206 struct g_stripe_softc *sc __diagused; 207 struct g_geom *gp; 208 int error; 209 210 g_topology_assert(); 211 gp = pp->geom; 212 sc = gp->softc; 213 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 214 215 /* On first open, grab an extra "exclusive" bit */ 216 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 217 de++; 218 /* ... and let go of it on last close */ 219 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 220 de--; 221 222 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 223 error = g_access(cp1, dr, dw, de); 224 if (error != 0) 225 goto fail; 226 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 227 cp1->private != NULL) { 228 g_stripe_remove_disk(cp1); /* May destroy geom. */ 229 } 230 } 231 return (0); 232 233 fail: 234 LIST_FOREACH(cp2, &gp->consumer, consumer) { 235 if (cp1 == cp2) 236 break; 237 g_access(cp2, -dr, -dw, -de); 238 } 239 return (error); 240 } 241 242 static void 243 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 244 off_t length, int mode) 245 { 246 off_t stripesize; 247 size_t len; 248 249 stripesize = sc->sc_stripesize; 250 len = (size_t)(stripesize - (offset & (stripesize - 1))); 251 do { 252 bcopy(src, dst, len); 253 if (mode) { 254 dst += len + stripesize * (sc->sc_ndisks - 1); 255 src += len; 256 } else { 257 dst += len; 258 src += len + stripesize * (sc->sc_ndisks - 1); 259 } 260 length -= len; 261 KASSERT(length >= 0, 262 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", 263 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); 264 if (length > stripesize) 265 len = stripesize; 266 else 267 len = length; 268 } while (length > 0); 269 } 270 271 static void 272 g_stripe_done(struct bio *bp) 273 { 274 struct g_stripe_softc *sc; 275 struct bio *pbp; 276 277 pbp = bp->bio_parent; 278 sc = pbp->bio_to->geom->softc; 279 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 280 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 281 bp->bio_length, 1); 282 bp->bio_data = bp->bio_caller1; 283 bp->bio_caller1 = NULL; 284 } 285 mtx_lock(&sc->sc_lock); 286 if (pbp->bio_error == 0) 287 pbp->bio_error = bp->bio_error; 288 pbp->bio_completed += bp->bio_completed; 289 pbp->bio_inbed++; 290 if (pbp->bio_children == pbp->bio_inbed) { 291 mtx_unlock(&sc->sc_lock); 292 if (pbp->bio_driver1 != NULL) 293 uma_zfree(g_stripe_zone, pbp->bio_driver1); 294 if (bp->bio_cmd == BIO_SPEEDUP) 295 pbp->bio_completed = pbp->bio_length; 296 g_io_deliver(pbp, pbp->bio_error); 297 } else 298 mtx_unlock(&sc->sc_lock); 299 g_destroy_bio(bp); 300 } 301 302 static int 303 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 304 { 305 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 306 struct g_stripe_softc *sc; 307 char *addr, *data = NULL; 308 struct bio *cbp; 309 off_t stripesize; 310 u_int nparts = 0; 311 int error; 312 313 sc = bp->bio_to->geom->softc; 314 315 addr = bp->bio_data; 316 stripesize = sc->sc_stripesize; 317 318 cbp = g_clone_bio(bp); 319 if (cbp == NULL) { 320 error = ENOMEM; 321 goto failure; 322 } 323 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 324 nparts++; 325 /* 326 * Fill in the component buf structure. 327 */ 328 cbp->bio_done = g_stripe_done; 329 cbp->bio_offset = offset; 330 cbp->bio_data = addr; 331 cbp->bio_caller1 = NULL; 332 cbp->bio_length = length; 333 cbp->bio_caller2 = sc->sc_disks[no]; 334 335 /* offset -= offset % stripesize; */ 336 offset -= offset & (stripesize - 1); 337 addr += length; 338 length = bp->bio_length - length; 339 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 340 if (no > sc->sc_ndisks - 1) { 341 no = 0; 342 offset += stripesize; 343 } 344 if (nparts >= sc->sc_ndisks) { 345 cbp = TAILQ_NEXT(cbp, bio_queue); 346 if (cbp == NULL) 347 cbp = TAILQ_FIRST(&queue); 348 nparts++; 349 /* 350 * Update bio structure. 351 */ 352 /* 353 * MIN() is in case when 354 * (bp->bio_length % sc->sc_stripesize) != 0. 355 */ 356 cbp->bio_length += MIN(stripesize, length); 357 if (cbp->bio_caller1 == NULL) { 358 cbp->bio_caller1 = cbp->bio_data; 359 cbp->bio_data = NULL; 360 if (data == NULL) { 361 data = uma_zalloc(g_stripe_zone, 362 M_NOWAIT); 363 if (data == NULL) { 364 error = ENOMEM; 365 goto failure; 366 } 367 } 368 } 369 } else { 370 cbp = g_clone_bio(bp); 371 if (cbp == NULL) { 372 error = ENOMEM; 373 goto failure; 374 } 375 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 376 nparts++; 377 /* 378 * Fill in the component buf structure. 379 */ 380 cbp->bio_done = g_stripe_done; 381 cbp->bio_offset = offset; 382 cbp->bio_data = addr; 383 cbp->bio_caller1 = NULL; 384 /* 385 * MIN() is in case when 386 * (bp->bio_length % sc->sc_stripesize) != 0. 387 */ 388 cbp->bio_length = MIN(stripesize, length); 389 cbp->bio_caller2 = sc->sc_disks[no]; 390 } 391 } 392 if (data != NULL) 393 bp->bio_driver1 = data; 394 /* 395 * Fire off all allocated requests! 396 */ 397 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 398 struct g_consumer *cp; 399 400 TAILQ_REMOVE(&queue, cbp, bio_queue); 401 cp = cbp->bio_caller2; 402 cbp->bio_caller2 = NULL; 403 cbp->bio_to = cp->provider; 404 if (cbp->bio_caller1 != NULL) { 405 cbp->bio_data = data; 406 if (bp->bio_cmd == BIO_WRITE) { 407 g_stripe_copy(sc, cbp->bio_caller1, data, 408 cbp->bio_offset, cbp->bio_length, 0); 409 } 410 data += cbp->bio_length; 411 } 412 G_STRIPE_LOGREQ(cbp, "Sending request."); 413 g_io_request(cbp, cp); 414 } 415 return (0); 416 failure: 417 if (data != NULL) 418 uma_zfree(g_stripe_zone, data); 419 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 420 TAILQ_REMOVE(&queue, cbp, bio_queue); 421 if (cbp->bio_caller1 != NULL) { 422 cbp->bio_data = cbp->bio_caller1; 423 cbp->bio_caller1 = NULL; 424 } 425 bp->bio_children--; 426 g_destroy_bio(cbp); 427 } 428 return (error); 429 } 430 431 static int 432 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 433 { 434 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 435 struct g_stripe_softc *sc; 436 off_t stripesize; 437 struct bio *cbp; 438 char *addr; 439 int error; 440 441 sc = bp->bio_to->geom->softc; 442 443 stripesize = sc->sc_stripesize; 444 445 cbp = g_clone_bio(bp); 446 if (cbp == NULL) { 447 error = ENOMEM; 448 goto failure; 449 } 450 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 451 /* 452 * Fill in the component buf structure. 453 */ 454 if (bp->bio_length == length) 455 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 456 else 457 cbp->bio_done = g_stripe_done; 458 cbp->bio_offset = offset; 459 cbp->bio_length = length; 460 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 461 bp->bio_ma_n = round_page(bp->bio_ma_offset + 462 bp->bio_length) / PAGE_SIZE; 463 addr = NULL; 464 } else 465 addr = bp->bio_data; 466 cbp->bio_caller2 = sc->sc_disks[no]; 467 468 /* offset -= offset % stripesize; */ 469 offset -= offset & (stripesize - 1); 470 if (bp->bio_cmd != BIO_DELETE) 471 addr += length; 472 length = bp->bio_length - length; 473 for (no++; length > 0; no++, length -= stripesize) { 474 if (no > sc->sc_ndisks - 1) { 475 no = 0; 476 offset += stripesize; 477 } 478 cbp = g_clone_bio(bp); 479 if (cbp == NULL) { 480 error = ENOMEM; 481 goto failure; 482 } 483 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 484 485 /* 486 * Fill in the component buf structure. 487 */ 488 cbp->bio_done = g_stripe_done; 489 cbp->bio_offset = offset; 490 /* 491 * MIN() is in case when 492 * (bp->bio_length % sc->sc_stripesize) != 0. 493 */ 494 cbp->bio_length = MIN(stripesize, length); 495 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 496 cbp->bio_ma_offset += (uintptr_t)addr; 497 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 498 cbp->bio_ma_offset %= PAGE_SIZE; 499 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 500 cbp->bio_length) / PAGE_SIZE; 501 } else 502 cbp->bio_data = addr; 503 504 cbp->bio_caller2 = sc->sc_disks[no]; 505 506 if (bp->bio_cmd != BIO_DELETE) 507 addr += stripesize; 508 } 509 /* 510 * Fire off all allocated requests! 511 */ 512 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 513 struct g_consumer *cp; 514 515 TAILQ_REMOVE(&queue, cbp, bio_queue); 516 cp = cbp->bio_caller2; 517 cbp->bio_caller2 = NULL; 518 cbp->bio_to = cp->provider; 519 G_STRIPE_LOGREQ(cbp, "Sending request."); 520 g_io_request(cbp, cp); 521 } 522 return (0); 523 failure: 524 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 525 TAILQ_REMOVE(&queue, cbp, bio_queue); 526 bp->bio_children--; 527 g_destroy_bio(cbp); 528 } 529 return (error); 530 } 531 532 static void 533 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) 534 { 535 struct bio_queue_head queue; 536 struct g_consumer *cp; 537 struct bio *cbp; 538 u_int no; 539 540 bioq_init(&queue); 541 for (no = 0; no < sc->sc_ndisks; no++) { 542 cbp = g_clone_bio(bp); 543 if (cbp == NULL) { 544 for (cbp = bioq_first(&queue); cbp != NULL; 545 cbp = bioq_first(&queue)) { 546 bioq_remove(&queue, cbp); 547 g_destroy_bio(cbp); 548 } 549 if (bp->bio_error == 0) 550 bp->bio_error = ENOMEM; 551 g_io_deliver(bp, bp->bio_error); 552 return; 553 } 554 bioq_insert_tail(&queue, cbp); 555 cbp->bio_done = g_stripe_done; 556 cbp->bio_caller2 = sc->sc_disks[no]; 557 cbp->bio_to = sc->sc_disks[no]->provider; 558 } 559 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 560 bioq_remove(&queue, cbp); 561 G_STRIPE_LOGREQ(cbp, "Sending request."); 562 cp = cbp->bio_caller2; 563 cbp->bio_caller2 = NULL; 564 g_io_request(cbp, cp); 565 } 566 } 567 568 static void 569 g_stripe_start(struct bio *bp) 570 { 571 off_t offset, start, length, nstripe, stripesize; 572 struct g_stripe_softc *sc; 573 u_int no; 574 int error, fast = 0; 575 576 sc = bp->bio_to->geom->softc; 577 /* 578 * If sc == NULL, provider's error should be set and g_stripe_start() 579 * should not be called at all. 580 */ 581 KASSERT(sc != NULL, 582 ("Provider's error should be set (error=%d)(device=%s).", 583 bp->bio_to->error, bp->bio_to->name)); 584 585 G_STRIPE_LOGREQ(bp, "Request received."); 586 587 switch (bp->bio_cmd) { 588 case BIO_READ: 589 case BIO_WRITE: 590 case BIO_DELETE: 591 break; 592 case BIO_SPEEDUP: 593 case BIO_FLUSH: 594 g_stripe_pushdown(sc, bp); 595 return; 596 case BIO_GETATTR: 597 /* To which provider it should be delivered? */ 598 default: 599 g_io_deliver(bp, EOPNOTSUPP); 600 return; 601 } 602 603 stripesize = sc->sc_stripesize; 604 605 /* 606 * Calculations are quite messy, but fast I hope. 607 */ 608 609 /* Stripe number. */ 610 /* nstripe = bp->bio_offset / stripesize; */ 611 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 612 /* Disk number. */ 613 no = nstripe % sc->sc_ndisks; 614 /* Start position in stripe. */ 615 /* start = bp->bio_offset % stripesize; */ 616 start = bp->bio_offset & (stripesize - 1); 617 /* Start position in disk. */ 618 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 619 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 620 /* Length of data to operate. */ 621 length = MIN(bp->bio_length, stripesize - start); 622 623 /* 624 * Do use "fast" mode when: 625 * 1. "Fast" mode is ON. 626 * and 627 * 2. Request size is less than or equal to maxphys, 628 * which should always be true. 629 * and 630 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 631 * there will be no need to send more than one I/O request to 632 * a provider, so there is nothing to optmize. 633 * and 634 * 4. Request is not unmapped. 635 * and 636 * 5. It is not a BIO_DELETE. 637 */ 638 if (g_stripe_fast && bp->bio_length <= maxphys && 639 bp->bio_length >= stripesize * sc->sc_ndisks && 640 (bp->bio_flags & BIO_UNMAPPED) == 0 && 641 bp->bio_cmd != BIO_DELETE) { 642 fast = 1; 643 } 644 error = 0; 645 if (fast) { 646 error = g_stripe_start_fast(bp, no, offset, length); 647 if (error != 0) 648 g_stripe_fast_failed++; 649 } 650 /* 651 * Do use "economic" when: 652 * 1. "Economic" mode is ON. 653 * or 654 * 2. "Fast" mode failed. It can only fail if there is no memory. 655 */ 656 if (!fast || error != 0) 657 error = g_stripe_start_economic(bp, no, offset, length); 658 if (error != 0) { 659 if (bp->bio_error == 0) 660 bp->bio_error = error; 661 g_io_deliver(bp, bp->bio_error); 662 } 663 } 664 665 static void 666 g_stripe_check_and_run(struct g_stripe_softc *sc) 667 { 668 struct g_provider *dp; 669 off_t mediasize, ms; 670 u_int no, sectorsize = 0; 671 672 g_topology_assert(); 673 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 674 return; 675 676 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 677 sc->sc_name); 678 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 679 if (g_stripe_fast == 0) 680 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 681 /* 682 * Find the smallest disk. 683 */ 684 mediasize = sc->sc_disks[0]->provider->mediasize; 685 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 686 mediasize -= sc->sc_disks[0]->provider->sectorsize; 687 mediasize -= mediasize % sc->sc_stripesize; 688 sectorsize = sc->sc_disks[0]->provider->sectorsize; 689 for (no = 1; no < sc->sc_ndisks; no++) { 690 dp = sc->sc_disks[no]->provider; 691 ms = dp->mediasize; 692 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 693 ms -= dp->sectorsize; 694 ms -= ms % sc->sc_stripesize; 695 if (ms < mediasize) 696 mediasize = ms; 697 sectorsize = lcm(sectorsize, dp->sectorsize); 698 699 /* A provider underneath us doesn't support unmapped */ 700 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 701 G_STRIPE_DEBUG(1, "Cancelling unmapped " 702 "because of %s.", dp->name); 703 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 704 } 705 } 706 sc->sc_provider->sectorsize = sectorsize; 707 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 708 sc->sc_provider->stripesize = sc->sc_stripesize; 709 sc->sc_provider->stripeoffset = 0; 710 g_error_provider(sc->sc_provider, 0); 711 712 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 713 } 714 715 static int 716 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 717 { 718 struct g_provider *pp; 719 u_char *buf; 720 int error; 721 722 g_topology_assert(); 723 724 error = g_access(cp, 1, 0, 0); 725 if (error != 0) 726 return (error); 727 pp = cp->provider; 728 g_topology_unlock(); 729 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 730 &error); 731 g_topology_lock(); 732 g_access(cp, -1, 0, 0); 733 if (buf == NULL) 734 return (error); 735 736 /* Decode metadata. */ 737 stripe_metadata_decode(buf, md); 738 g_free(buf); 739 740 return (0); 741 } 742 743 /* 744 * Add disk to given device. 745 */ 746 static int 747 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 748 { 749 struct g_consumer *cp, *fcp; 750 struct g_geom *gp; 751 int error; 752 753 g_topology_assert(); 754 /* Metadata corrupted? */ 755 if (no >= sc->sc_ndisks) 756 return (EINVAL); 757 758 /* Check if disk is not already attached. */ 759 if (sc->sc_disks[no] != NULL) 760 return (EEXIST); 761 762 gp = sc->sc_geom; 763 fcp = LIST_FIRST(&gp->consumer); 764 765 cp = g_new_consumer(gp); 766 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 767 cp->private = NULL; 768 cp->index = no; 769 error = g_attach(cp, pp); 770 if (error != 0) { 771 g_destroy_consumer(cp); 772 return (error); 773 } 774 775 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 776 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 777 if (error != 0) { 778 g_detach(cp); 779 g_destroy_consumer(cp); 780 return (error); 781 } 782 } 783 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 784 struct g_stripe_metadata md; 785 786 /* Reread metadata. */ 787 error = g_stripe_read_metadata(cp, &md); 788 if (error != 0) 789 goto fail; 790 791 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 792 strcmp(md.md_name, sc->sc_name) != 0 || 793 md.md_id != sc->sc_id) { 794 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 795 goto fail; 796 } 797 } 798 799 sc->sc_disks[no] = cp; 800 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 801 g_stripe_check_and_run(sc); 802 803 return (0); 804 fail: 805 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 806 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 807 g_detach(cp); 808 g_destroy_consumer(cp); 809 return (error); 810 } 811 812 static struct g_geom * 813 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 814 u_int type) 815 { 816 struct g_stripe_softc *sc; 817 struct g_geom *gp; 818 u_int no; 819 820 g_topology_assert(); 821 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 822 md->md_id); 823 824 /* Two disks is minimum. */ 825 if (md->md_all < 2) { 826 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 827 return (NULL); 828 } 829 #if 0 830 /* Stripe size have to be grater than or equal to sector size. */ 831 if (md->md_stripesize < sectorsize) { 832 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 833 return (NULL); 834 } 835 #endif 836 /* Stripe size have to be power of 2. */ 837 if (!powerof2(md->md_stripesize)) { 838 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 839 return (NULL); 840 } 841 842 /* Check for duplicate unit */ 843 LIST_FOREACH(gp, &mp->geom, geom) { 844 sc = gp->softc; 845 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 846 G_STRIPE_DEBUG(0, "Device %s already configured.", 847 sc->sc_name); 848 return (NULL); 849 } 850 } 851 gp = g_new_geomf(mp, "%s", md->md_name); 852 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 853 gp->start = g_stripe_start; 854 gp->spoiled = g_stripe_orphan; 855 gp->orphan = g_stripe_orphan; 856 gp->access = g_stripe_access; 857 gp->dumpconf = g_stripe_dumpconf; 858 859 sc->sc_id = md->md_id; 860 sc->sc_stripesize = md->md_stripesize; 861 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 862 sc->sc_ndisks = md->md_all; 863 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 864 M_STRIPE, M_WAITOK | M_ZERO); 865 for (no = 0; no < sc->sc_ndisks; no++) 866 sc->sc_disks[no] = NULL; 867 sc->sc_type = type; 868 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 869 870 gp->softc = sc; 871 sc->sc_geom = gp; 872 sc->sc_provider = NULL; 873 874 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 875 876 return (gp); 877 } 878 879 static int 880 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 881 { 882 struct g_provider *pp; 883 struct g_consumer *cp, *cp1; 884 struct g_geom *gp; 885 886 g_topology_assert(); 887 888 if (sc == NULL) 889 return (ENXIO); 890 891 pp = sc->sc_provider; 892 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 893 if (force) { 894 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 895 "can't be definitely removed.", pp->name); 896 } else { 897 G_STRIPE_DEBUG(1, 898 "Device %s is still open (r%dw%de%d).", pp->name, 899 pp->acr, pp->acw, pp->ace); 900 return (EBUSY); 901 } 902 } 903 904 gp = sc->sc_geom; 905 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 906 g_stripe_remove_disk(cp); 907 if (cp1 == NULL) 908 return (0); /* Recursion happened. */ 909 } 910 if (!LIST_EMPTY(&gp->consumer)) 911 return (EINPROGRESS); 912 913 gp->softc = NULL; 914 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 915 gp->name)); 916 free(sc->sc_disks, M_STRIPE); 917 mtx_destroy(&sc->sc_lock); 918 free(sc, M_STRIPE); 919 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 920 g_wither_geom(gp, ENXIO); 921 return (0); 922 } 923 924 static int 925 g_stripe_destroy_geom(struct gctl_req *req __unused, 926 struct g_class *mp __unused, struct g_geom *gp) 927 { 928 struct g_stripe_softc *sc; 929 930 sc = gp->softc; 931 return (g_stripe_destroy(sc, 0)); 932 } 933 934 static struct g_geom * 935 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 936 { 937 struct g_stripe_metadata md; 938 struct g_stripe_softc *sc; 939 struct g_consumer *cp; 940 struct g_geom *gp; 941 int error; 942 943 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 944 g_topology_assert(); 945 946 /* Skip providers that are already open for writing. */ 947 if (pp->acw > 0) 948 return (NULL); 949 950 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 951 952 gp = g_new_geomf(mp, "stripe:taste"); 953 gp->start = g_stripe_start; 954 gp->access = g_stripe_access; 955 gp->orphan = g_stripe_orphan; 956 cp = g_new_consumer(gp); 957 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 958 error = g_attach(cp, pp); 959 if (error == 0) { 960 error = g_stripe_read_metadata(cp, &md); 961 g_detach(cp); 962 } 963 g_destroy_consumer(cp); 964 g_destroy_geom(gp); 965 if (error != 0) 966 return (NULL); 967 gp = NULL; 968 969 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 970 return (NULL); 971 if (md.md_version > G_STRIPE_VERSION) { 972 printf("geom_stripe.ko module is too old to handle %s.\n", 973 pp->name); 974 return (NULL); 975 } 976 /* 977 * Backward compatibility: 978 */ 979 /* There was no md_provider field in earlier versions of metadata. */ 980 if (md.md_version < 2) 981 bzero(md.md_provider, sizeof(md.md_provider)); 982 /* There was no md_provsize field in earlier versions of metadata. */ 983 if (md.md_version < 3) 984 md.md_provsize = pp->mediasize; 985 986 if (md.md_provider[0] != '\0' && 987 !g_compare_names(md.md_provider, pp->name)) 988 return (NULL); 989 if (md.md_provsize != pp->mediasize) 990 return (NULL); 991 992 /* 993 * Let's check if device already exists. 994 */ 995 sc = NULL; 996 LIST_FOREACH(gp, &mp->geom, geom) { 997 sc = gp->softc; 998 if (sc == NULL) 999 continue; 1000 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 1001 continue; 1002 if (strcmp(md.md_name, sc->sc_name) != 0) 1003 continue; 1004 if (md.md_id != sc->sc_id) 1005 continue; 1006 break; 1007 } 1008 if (gp != NULL) { 1009 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1010 error = g_stripe_add_disk(sc, pp, md.md_no); 1011 if (error != 0) { 1012 G_STRIPE_DEBUG(0, 1013 "Cannot add disk %s to %s (error=%d).", pp->name, 1014 gp->name, error); 1015 return (NULL); 1016 } 1017 } else { 1018 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1019 if (gp == NULL) { 1020 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1021 md.md_name); 1022 return (NULL); 1023 } 1024 sc = gp->softc; 1025 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1026 error = g_stripe_add_disk(sc, pp, md.md_no); 1027 if (error != 0) { 1028 G_STRIPE_DEBUG(0, 1029 "Cannot add disk %s to %s (error=%d).", pp->name, 1030 gp->name, error); 1031 g_stripe_destroy(sc, 1); 1032 return (NULL); 1033 } 1034 } 1035 1036 return (gp); 1037 } 1038 1039 static void 1040 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1041 { 1042 u_int attached, no; 1043 struct g_stripe_metadata md; 1044 struct g_provider *pp; 1045 struct g_stripe_softc *sc; 1046 struct g_geom *gp; 1047 struct sbuf *sb; 1048 off_t *stripesize; 1049 const char *name; 1050 char param[16]; 1051 int *nargs; 1052 1053 g_topology_assert(); 1054 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1055 if (nargs == NULL) { 1056 gctl_error(req, "No '%s' argument.", "nargs"); 1057 return; 1058 } 1059 if (*nargs <= 2) { 1060 gctl_error(req, "Too few arguments."); 1061 return; 1062 } 1063 1064 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1065 md.md_version = G_STRIPE_VERSION; 1066 name = gctl_get_asciiparam(req, "arg0"); 1067 if (name == NULL) { 1068 gctl_error(req, "No 'arg%u' argument.", 0); 1069 return; 1070 } 1071 strlcpy(md.md_name, name, sizeof(md.md_name)); 1072 md.md_id = arc4random(); 1073 md.md_no = 0; 1074 md.md_all = *nargs - 1; 1075 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1076 if (stripesize == NULL) { 1077 gctl_error(req, "No '%s' argument.", "stripesize"); 1078 return; 1079 } 1080 md.md_stripesize = (uint32_t)*stripesize; 1081 bzero(md.md_provider, sizeof(md.md_provider)); 1082 /* This field is not important here. */ 1083 md.md_provsize = 0; 1084 1085 /* Check all providers are valid */ 1086 for (no = 1; no < *nargs; no++) { 1087 snprintf(param, sizeof(param), "arg%u", no); 1088 pp = gctl_get_provider(req, param); 1089 if (pp == NULL) 1090 return; 1091 } 1092 1093 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1094 if (gp == NULL) { 1095 gctl_error(req, "Can't configure %s.", md.md_name); 1096 return; 1097 } 1098 1099 sc = gp->softc; 1100 sb = sbuf_new_auto(); 1101 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1102 for (attached = 0, no = 1; no < *nargs; no++) { 1103 snprintf(param, sizeof(param), "arg%u", no); 1104 pp = gctl_get_provider(req, param); 1105 if (pp == NULL) { 1106 name = gctl_get_asciiparam(req, param); 1107 MPASS(name != NULL); 1108 sbuf_printf(sb, " %s", name); 1109 continue; 1110 } 1111 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1112 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1113 no, pp->name, gp->name); 1114 sbuf_printf(sb, " %s", pp->name); 1115 continue; 1116 } 1117 attached++; 1118 } 1119 sbuf_finish(sb); 1120 if (md.md_all != attached) { 1121 g_stripe_destroy(gp->softc, 1); 1122 gctl_error(req, "%s", sbuf_data(sb)); 1123 } 1124 sbuf_delete(sb); 1125 } 1126 1127 static struct g_stripe_softc * 1128 g_stripe_find_device(struct g_class *mp, const char *name) 1129 { 1130 struct g_stripe_softc *sc; 1131 struct g_geom *gp; 1132 1133 LIST_FOREACH(gp, &mp->geom, geom) { 1134 sc = gp->softc; 1135 if (sc == NULL) 1136 continue; 1137 if (strcmp(sc->sc_name, name) == 0) 1138 return (sc); 1139 } 1140 return (NULL); 1141 } 1142 1143 static void 1144 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1145 { 1146 struct g_stripe_softc *sc; 1147 int *force, *nargs, error; 1148 const char *name; 1149 char param[16]; 1150 u_int i; 1151 1152 g_topology_assert(); 1153 1154 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1155 if (nargs == NULL) { 1156 gctl_error(req, "No '%s' argument.", "nargs"); 1157 return; 1158 } 1159 if (*nargs <= 0) { 1160 gctl_error(req, "Missing device(s)."); 1161 return; 1162 } 1163 force = gctl_get_paraml(req, "force", sizeof(*force)); 1164 if (force == NULL) { 1165 gctl_error(req, "No '%s' argument.", "force"); 1166 return; 1167 } 1168 1169 for (i = 0; i < (u_int)*nargs; i++) { 1170 snprintf(param, sizeof(param), "arg%u", i); 1171 name = gctl_get_asciiparam(req, param); 1172 if (name == NULL) { 1173 gctl_error(req, "No 'arg%u' argument.", i); 1174 return; 1175 } 1176 sc = g_stripe_find_device(mp, name); 1177 if (sc == NULL) { 1178 gctl_error(req, "No such device: %s.", name); 1179 return; 1180 } 1181 error = g_stripe_destroy(sc, *force); 1182 if (error != 0) { 1183 gctl_error(req, "Cannot destroy device %s (error=%d).", 1184 sc->sc_name, error); 1185 return; 1186 } 1187 } 1188 } 1189 1190 static void 1191 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1192 { 1193 uint32_t *version; 1194 1195 g_topology_assert(); 1196 1197 version = gctl_get_paraml(req, "version", sizeof(*version)); 1198 if (version == NULL) { 1199 gctl_error(req, "No '%s' argument.", "version"); 1200 return; 1201 } 1202 if (*version != G_STRIPE_VERSION) { 1203 gctl_error(req, "Userland and kernel parts are out of sync."); 1204 return; 1205 } 1206 1207 if (strcmp(verb, "create") == 0) { 1208 g_stripe_ctl_create(req, mp); 1209 return; 1210 } else if (strcmp(verb, "destroy") == 0 || 1211 strcmp(verb, "stop") == 0) { 1212 g_stripe_ctl_destroy(req, mp); 1213 return; 1214 } 1215 1216 gctl_error(req, "Unknown verb."); 1217 } 1218 1219 static void 1220 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1221 struct g_consumer *cp, struct g_provider *pp) 1222 { 1223 struct g_stripe_softc *sc; 1224 1225 sc = gp->softc; 1226 if (sc == NULL) 1227 return; 1228 if (pp != NULL) { 1229 /* Nothing here. */ 1230 } else if (cp != NULL) { 1231 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1232 (u_int)cp->index); 1233 } else { 1234 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1235 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent, 1236 (uintmax_t)sc->sc_stripesize); 1237 sbuf_printf(sb, "%s<Type>", indent); 1238 switch (sc->sc_type) { 1239 case G_STRIPE_TYPE_AUTOMATIC: 1240 sbuf_cat(sb, "AUTOMATIC"); 1241 break; 1242 case G_STRIPE_TYPE_MANUAL: 1243 sbuf_cat(sb, "MANUAL"); 1244 break; 1245 default: 1246 sbuf_cat(sb, "UNKNOWN"); 1247 break; 1248 } 1249 sbuf_cat(sb, "</Type>\n"); 1250 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1251 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1252 sbuf_printf(sb, "%s<State>", indent); 1253 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1254 sbuf_cat(sb, "UP"); 1255 else 1256 sbuf_cat(sb, "DOWN"); 1257 sbuf_cat(sb, "</State>\n"); 1258 } 1259 } 1260 1261 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1262 MODULE_VERSION(geom_stripe, 0); 1263