1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/bio.h> 39 #include <sys/sbuf.h> 40 #include <sys/sysctl.h> 41 #include <sys/malloc.h> 42 #include <vm/uma.h> 43 #include <geom/geom.h> 44 #include <geom/geom_dbg.h> 45 #include <geom/stripe/g_stripe.h> 46 47 FEATURE(geom_stripe, "GEOM striping support"); 48 49 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 50 51 static uma_zone_t g_stripe_zone; 52 53 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 54 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 55 struct g_geom *gp); 56 57 static g_taste_t g_stripe_taste; 58 static g_ctl_req_t g_stripe_config; 59 static g_dumpconf_t g_stripe_dumpconf; 60 static g_init_t g_stripe_init; 61 static g_fini_t g_stripe_fini; 62 63 struct g_class g_stripe_class = { 64 .name = G_STRIPE_CLASS_NAME, 65 .version = G_VERSION, 66 .ctlreq = g_stripe_config, 67 .taste = g_stripe_taste, 68 .destroy_geom = g_stripe_destroy_geom, 69 .init = g_stripe_init, 70 .fini = g_stripe_fini 71 }; 72 73 SYSCTL_DECL(_kern_geom); 74 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "GEOM_STRIPE stuff"); 76 static u_int g_stripe_debug = 0; 77 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 78 "Debug level"); 79 static int g_stripe_fast = 0; 80 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast, 81 CTLFLAG_RWTUN, &g_stripe_fast, 0, 82 "Fast, but memory-consuming, mode"); 83 static u_long g_stripe_maxmem; 84 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem, 85 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0, 86 "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 87 static u_int g_stripe_fast_failed = 0; 88 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 89 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 90 91 /* 92 * Greatest Common Divisor. 93 */ 94 static u_int 95 gcd(u_int a, u_int b) 96 { 97 u_int c; 98 99 while (b != 0) { 100 c = a; 101 a = b; 102 b = (c % b); 103 } 104 return (a); 105 } 106 107 /* 108 * Least Common Multiple. 109 */ 110 static u_int 111 lcm(u_int a, u_int b) 112 { 113 114 return ((a * b) / gcd(a, b)); 115 } 116 117 static void 118 g_stripe_init(struct g_class *mp __unused) 119 { 120 121 g_stripe_maxmem = maxphys * 100; 122 TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem); 123 g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL, 124 NULL, NULL, 0, 0); 125 g_stripe_maxmem -= g_stripe_maxmem % maxphys; 126 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys); 127 } 128 129 static void 130 g_stripe_fini(struct g_class *mp __unused) 131 { 132 133 uma_zdestroy(g_stripe_zone); 134 } 135 136 /* 137 * Return the number of valid disks. 138 */ 139 static u_int 140 g_stripe_nvalid(struct g_stripe_softc *sc) 141 { 142 u_int i, no; 143 144 no = 0; 145 for (i = 0; i < sc->sc_ndisks; i++) { 146 if (sc->sc_disks[i] != NULL) 147 no++; 148 } 149 150 return (no); 151 } 152 153 static void 154 g_stripe_remove_disk(struct g_consumer *cp) 155 { 156 struct g_stripe_softc *sc; 157 158 g_topology_assert(); 159 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 160 sc = (struct g_stripe_softc *)cp->geom->softc; 161 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 162 163 if (cp->private == NULL) { 164 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 165 cp->provider->name, sc->sc_name); 166 cp->private = (void *)(uintptr_t)-1; 167 } 168 169 if (sc->sc_provider != NULL) { 170 G_STRIPE_DEBUG(0, "Device %s deactivated.", 171 sc->sc_provider->name); 172 g_wither_provider(sc->sc_provider, ENXIO); 173 sc->sc_provider = NULL; 174 } 175 176 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 177 return; 178 sc->sc_disks[cp->index] = NULL; 179 cp->index = 0; 180 g_detach(cp); 181 g_destroy_consumer(cp); 182 /* If there are no valid disks anymore, remove device. */ 183 if (LIST_EMPTY(&sc->sc_geom->consumer)) 184 g_stripe_destroy(sc, 1); 185 } 186 187 static void 188 g_stripe_orphan(struct g_consumer *cp) 189 { 190 struct g_stripe_softc *sc; 191 struct g_geom *gp; 192 193 g_topology_assert(); 194 gp = cp->geom; 195 sc = gp->softc; 196 if (sc == NULL) 197 return; 198 199 g_stripe_remove_disk(cp); 200 } 201 202 static int 203 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 204 { 205 struct g_consumer *cp1, *cp2, *tmp; 206 struct g_stripe_softc *sc; 207 struct g_geom *gp; 208 int error; 209 210 g_topology_assert(); 211 gp = pp->geom; 212 sc = gp->softc; 213 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 214 215 /* On first open, grab an extra "exclusive" bit */ 216 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 217 de++; 218 /* ... and let go of it on last close */ 219 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 220 de--; 221 222 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 223 error = g_access(cp1, dr, dw, de); 224 if (error != 0) 225 goto fail; 226 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 227 cp1->private != NULL) { 228 g_stripe_remove_disk(cp1); /* May destroy geom. */ 229 } 230 } 231 return (0); 232 233 fail: 234 LIST_FOREACH(cp2, &gp->consumer, consumer) { 235 if (cp1 == cp2) 236 break; 237 g_access(cp2, -dr, -dw, -de); 238 } 239 return (error); 240 } 241 242 static void 243 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 244 off_t length, int mode) 245 { 246 off_t stripesize; 247 size_t len; 248 249 stripesize = sc->sc_stripesize; 250 len = (size_t)(stripesize - (offset & (stripesize - 1))); 251 do { 252 bcopy(src, dst, len); 253 if (mode) { 254 dst += len + stripesize * (sc->sc_ndisks - 1); 255 src += len; 256 } else { 257 dst += len; 258 src += len + stripesize * (sc->sc_ndisks - 1); 259 } 260 length -= len; 261 KASSERT(length >= 0, 262 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", 263 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); 264 if (length > stripesize) 265 len = stripesize; 266 else 267 len = length; 268 } while (length > 0); 269 } 270 271 static void 272 g_stripe_done(struct bio *bp) 273 { 274 struct g_stripe_softc *sc; 275 struct bio *pbp; 276 277 pbp = bp->bio_parent; 278 sc = pbp->bio_to->geom->softc; 279 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 280 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 281 bp->bio_length, 1); 282 bp->bio_data = bp->bio_caller1; 283 bp->bio_caller1 = NULL; 284 } 285 mtx_lock(&sc->sc_lock); 286 if (pbp->bio_error == 0) 287 pbp->bio_error = bp->bio_error; 288 pbp->bio_completed += bp->bio_completed; 289 pbp->bio_inbed++; 290 if (pbp->bio_children == pbp->bio_inbed) { 291 mtx_unlock(&sc->sc_lock); 292 if (pbp->bio_driver1 != NULL) 293 uma_zfree(g_stripe_zone, pbp->bio_driver1); 294 if (bp->bio_cmd == BIO_SPEEDUP) 295 pbp->bio_completed = pbp->bio_length; 296 g_io_deliver(pbp, pbp->bio_error); 297 } else 298 mtx_unlock(&sc->sc_lock); 299 g_destroy_bio(bp); 300 } 301 302 static int 303 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 304 { 305 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 306 struct g_stripe_softc *sc; 307 char *addr, *data = NULL; 308 struct bio *cbp; 309 off_t stripesize; 310 u_int nparts = 0; 311 int error; 312 313 sc = bp->bio_to->geom->softc; 314 315 addr = bp->bio_data; 316 stripesize = sc->sc_stripesize; 317 318 cbp = g_clone_bio(bp); 319 if (cbp == NULL) { 320 error = ENOMEM; 321 goto failure; 322 } 323 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 324 nparts++; 325 /* 326 * Fill in the component buf structure. 327 */ 328 cbp->bio_done = g_stripe_done; 329 cbp->bio_offset = offset; 330 cbp->bio_data = addr; 331 cbp->bio_caller1 = NULL; 332 cbp->bio_length = length; 333 cbp->bio_caller2 = sc->sc_disks[no]; 334 335 /* offset -= offset % stripesize; */ 336 offset -= offset & (stripesize - 1); 337 addr += length; 338 length = bp->bio_length - length; 339 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 340 if (no > sc->sc_ndisks - 1) { 341 no = 0; 342 offset += stripesize; 343 } 344 if (nparts >= sc->sc_ndisks) { 345 cbp = TAILQ_NEXT(cbp, bio_queue); 346 if (cbp == NULL) 347 cbp = TAILQ_FIRST(&queue); 348 nparts++; 349 /* 350 * Update bio structure. 351 */ 352 /* 353 * MIN() is in case when 354 * (bp->bio_length % sc->sc_stripesize) != 0. 355 */ 356 cbp->bio_length += MIN(stripesize, length); 357 if (cbp->bio_caller1 == NULL) { 358 cbp->bio_caller1 = cbp->bio_data; 359 cbp->bio_data = NULL; 360 if (data == NULL) { 361 data = uma_zalloc(g_stripe_zone, 362 M_NOWAIT); 363 if (data == NULL) { 364 error = ENOMEM; 365 goto failure; 366 } 367 } 368 } 369 } else { 370 cbp = g_clone_bio(bp); 371 if (cbp == NULL) { 372 error = ENOMEM; 373 goto failure; 374 } 375 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 376 nparts++; 377 /* 378 * Fill in the component buf structure. 379 */ 380 cbp->bio_done = g_stripe_done; 381 cbp->bio_offset = offset; 382 cbp->bio_data = addr; 383 cbp->bio_caller1 = NULL; 384 /* 385 * MIN() is in case when 386 * (bp->bio_length % sc->sc_stripesize) != 0. 387 */ 388 cbp->bio_length = MIN(stripesize, length); 389 cbp->bio_caller2 = sc->sc_disks[no]; 390 } 391 } 392 if (data != NULL) 393 bp->bio_driver1 = data; 394 /* 395 * Fire off all allocated requests! 396 */ 397 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 398 struct g_consumer *cp; 399 400 TAILQ_REMOVE(&queue, cbp, bio_queue); 401 cp = cbp->bio_caller2; 402 cbp->bio_caller2 = NULL; 403 cbp->bio_to = cp->provider; 404 if (cbp->bio_caller1 != NULL) { 405 cbp->bio_data = data; 406 if (bp->bio_cmd == BIO_WRITE) { 407 g_stripe_copy(sc, cbp->bio_caller1, data, 408 cbp->bio_offset, cbp->bio_length, 0); 409 } 410 data += cbp->bio_length; 411 } 412 G_STRIPE_LOGREQ(cbp, "Sending request."); 413 g_io_request(cbp, cp); 414 } 415 return (0); 416 failure: 417 if (data != NULL) 418 uma_zfree(g_stripe_zone, data); 419 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 420 TAILQ_REMOVE(&queue, cbp, bio_queue); 421 if (cbp->bio_caller1 != NULL) { 422 cbp->bio_data = cbp->bio_caller1; 423 cbp->bio_caller1 = NULL; 424 } 425 bp->bio_children--; 426 g_destroy_bio(cbp); 427 } 428 return (error); 429 } 430 431 static int 432 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 433 { 434 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 435 struct g_stripe_softc *sc; 436 off_t stripesize; 437 struct bio *cbp; 438 char *addr; 439 int error; 440 441 sc = bp->bio_to->geom->softc; 442 443 stripesize = sc->sc_stripesize; 444 445 cbp = g_clone_bio(bp); 446 if (cbp == NULL) { 447 error = ENOMEM; 448 goto failure; 449 } 450 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 451 /* 452 * Fill in the component buf structure. 453 */ 454 if (bp->bio_length == length) 455 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 456 else 457 cbp->bio_done = g_stripe_done; 458 cbp->bio_offset = offset; 459 cbp->bio_length = length; 460 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 461 bp->bio_ma_n = round_page(bp->bio_ma_offset + 462 bp->bio_length) / PAGE_SIZE; 463 addr = NULL; 464 } else 465 addr = bp->bio_data; 466 cbp->bio_caller2 = sc->sc_disks[no]; 467 468 /* offset -= offset % stripesize; */ 469 offset -= offset & (stripesize - 1); 470 if (bp->bio_cmd != BIO_DELETE) 471 addr += length; 472 length = bp->bio_length - length; 473 for (no++; length > 0; no++, length -= stripesize) { 474 if (no > sc->sc_ndisks - 1) { 475 no = 0; 476 offset += stripesize; 477 } 478 cbp = g_clone_bio(bp); 479 if (cbp == NULL) { 480 error = ENOMEM; 481 goto failure; 482 } 483 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 484 485 /* 486 * Fill in the component buf structure. 487 */ 488 cbp->bio_done = g_stripe_done; 489 cbp->bio_offset = offset; 490 /* 491 * MIN() is in case when 492 * (bp->bio_length % sc->sc_stripesize) != 0. 493 */ 494 cbp->bio_length = MIN(stripesize, length); 495 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 496 cbp->bio_ma_offset += (uintptr_t)addr; 497 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 498 cbp->bio_ma_offset %= PAGE_SIZE; 499 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 500 cbp->bio_length) / PAGE_SIZE; 501 } else 502 cbp->bio_data = addr; 503 504 cbp->bio_caller2 = sc->sc_disks[no]; 505 506 if (bp->bio_cmd != BIO_DELETE) 507 addr += stripesize; 508 } 509 /* 510 * Fire off all allocated requests! 511 */ 512 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 513 struct g_consumer *cp; 514 515 TAILQ_REMOVE(&queue, cbp, bio_queue); 516 cp = cbp->bio_caller2; 517 cbp->bio_caller2 = NULL; 518 cbp->bio_to = cp->provider; 519 G_STRIPE_LOGREQ(cbp, "Sending request."); 520 g_io_request(cbp, cp); 521 } 522 return (0); 523 failure: 524 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 525 TAILQ_REMOVE(&queue, cbp, bio_queue); 526 bp->bio_children--; 527 g_destroy_bio(cbp); 528 } 529 return (error); 530 } 531 532 static void 533 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) 534 { 535 struct bio_queue_head queue; 536 struct g_consumer *cp; 537 struct bio *cbp; 538 u_int no; 539 540 bioq_init(&queue); 541 for (no = 0; no < sc->sc_ndisks; no++) { 542 cbp = g_clone_bio(bp); 543 if (cbp == NULL) { 544 for (cbp = bioq_first(&queue); cbp != NULL; 545 cbp = bioq_first(&queue)) { 546 bioq_remove(&queue, cbp); 547 g_destroy_bio(cbp); 548 } 549 if (bp->bio_error == 0) 550 bp->bio_error = ENOMEM; 551 g_io_deliver(bp, bp->bio_error); 552 return; 553 } 554 bioq_insert_tail(&queue, cbp); 555 cbp->bio_done = g_stripe_done; 556 cbp->bio_caller2 = sc->sc_disks[no]; 557 cbp->bio_to = sc->sc_disks[no]->provider; 558 } 559 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 560 bioq_remove(&queue, cbp); 561 G_STRIPE_LOGREQ(cbp, "Sending request."); 562 cp = cbp->bio_caller2; 563 cbp->bio_caller2 = NULL; 564 g_io_request(cbp, cp); 565 } 566 } 567 568 static void 569 g_stripe_start(struct bio *bp) 570 { 571 off_t offset, start, length, nstripe, stripesize; 572 struct g_stripe_softc *sc; 573 u_int no; 574 int error, fast = 0; 575 576 sc = bp->bio_to->geom->softc; 577 /* 578 * If sc == NULL, provider's error should be set and g_stripe_start() 579 * should not be called at all. 580 */ 581 KASSERT(sc != NULL, 582 ("Provider's error should be set (error=%d)(device=%s).", 583 bp->bio_to->error, bp->bio_to->name)); 584 585 G_STRIPE_LOGREQ(bp, "Request received."); 586 587 switch (bp->bio_cmd) { 588 case BIO_READ: 589 case BIO_WRITE: 590 case BIO_DELETE: 591 break; 592 case BIO_SPEEDUP: 593 case BIO_FLUSH: 594 g_stripe_pushdown(sc, bp); 595 return; 596 case BIO_GETATTR: 597 /* To which provider it should be delivered? */ 598 default: 599 g_io_deliver(bp, EOPNOTSUPP); 600 return; 601 } 602 603 stripesize = sc->sc_stripesize; 604 605 /* 606 * Calculations are quite messy, but fast I hope. 607 */ 608 609 /* Stripe number. */ 610 /* nstripe = bp->bio_offset / stripesize; */ 611 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 612 /* Disk number. */ 613 no = nstripe % sc->sc_ndisks; 614 /* Start position in stripe. */ 615 /* start = bp->bio_offset % stripesize; */ 616 start = bp->bio_offset & (stripesize - 1); 617 /* Start position in disk. */ 618 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 619 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 620 /* Length of data to operate. */ 621 length = MIN(bp->bio_length, stripesize - start); 622 623 /* 624 * Do use "fast" mode when: 625 * 1. "Fast" mode is ON. 626 * and 627 * 2. Request size is less than or equal to maxphys, 628 * which should always be true. 629 * and 630 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 631 * there will be no need to send more than one I/O request to 632 * a provider, so there is nothing to optmize. 633 * and 634 * 4. Request is not unmapped. 635 * and 636 * 5. It is not a BIO_DELETE. 637 */ 638 if (g_stripe_fast && bp->bio_length <= maxphys && 639 bp->bio_length >= stripesize * sc->sc_ndisks && 640 (bp->bio_flags & BIO_UNMAPPED) == 0 && 641 bp->bio_cmd != BIO_DELETE) { 642 fast = 1; 643 } 644 error = 0; 645 if (fast) { 646 error = g_stripe_start_fast(bp, no, offset, length); 647 if (error != 0) 648 g_stripe_fast_failed++; 649 } 650 /* 651 * Do use "economic" when: 652 * 1. "Economic" mode is ON. 653 * or 654 * 2. "Fast" mode failed. It can only fail if there is no memory. 655 */ 656 if (!fast || error != 0) 657 error = g_stripe_start_economic(bp, no, offset, length); 658 if (error != 0) { 659 if (bp->bio_error == 0) 660 bp->bio_error = error; 661 g_io_deliver(bp, bp->bio_error); 662 } 663 } 664 665 static void 666 g_stripe_check_and_run(struct g_stripe_softc *sc) 667 { 668 struct g_provider *dp; 669 off_t mediasize, ms; 670 u_int no, sectorsize = 0; 671 672 g_topology_assert(); 673 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 674 return; 675 676 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 677 sc->sc_name); 678 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 679 if (g_stripe_fast == 0) 680 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 681 /* 682 * Find the smallest disk. 683 */ 684 mediasize = sc->sc_disks[0]->provider->mediasize; 685 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 686 mediasize -= sc->sc_disks[0]->provider->sectorsize; 687 mediasize -= mediasize % sc->sc_stripesize; 688 sectorsize = sc->sc_disks[0]->provider->sectorsize; 689 for (no = 1; no < sc->sc_ndisks; no++) { 690 dp = sc->sc_disks[no]->provider; 691 ms = dp->mediasize; 692 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 693 ms -= dp->sectorsize; 694 ms -= ms % sc->sc_stripesize; 695 if (ms < mediasize) 696 mediasize = ms; 697 sectorsize = lcm(sectorsize, dp->sectorsize); 698 699 /* A provider underneath us doesn't support unmapped */ 700 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 701 G_STRIPE_DEBUG(1, "Cancelling unmapped " 702 "because of %s.", dp->name); 703 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 704 } 705 } 706 sc->sc_provider->sectorsize = sectorsize; 707 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 708 sc->sc_provider->stripesize = sc->sc_stripesize; 709 sc->sc_provider->stripeoffset = 0; 710 g_error_provider(sc->sc_provider, 0); 711 712 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 713 } 714 715 static int 716 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 717 { 718 struct g_provider *pp; 719 u_char *buf; 720 int error; 721 722 g_topology_assert(); 723 724 error = g_access(cp, 1, 0, 0); 725 if (error != 0) 726 return (error); 727 pp = cp->provider; 728 g_topology_unlock(); 729 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 730 &error); 731 g_topology_lock(); 732 g_access(cp, -1, 0, 0); 733 if (buf == NULL) 734 return (error); 735 736 /* Decode metadata. */ 737 stripe_metadata_decode(buf, md); 738 g_free(buf); 739 740 return (0); 741 } 742 743 /* 744 * Add disk to given device. 745 */ 746 static int 747 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 748 { 749 struct g_consumer *cp, *fcp; 750 struct g_geom *gp; 751 int error; 752 753 g_topology_assert(); 754 /* Metadata corrupted? */ 755 if (no >= sc->sc_ndisks) 756 return (EINVAL); 757 758 /* Check if disk is not already attached. */ 759 if (sc->sc_disks[no] != NULL) 760 return (EEXIST); 761 762 gp = sc->sc_geom; 763 fcp = LIST_FIRST(&gp->consumer); 764 765 cp = g_new_consumer(gp); 766 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 767 cp->private = NULL; 768 cp->index = no; 769 error = g_attach(cp, pp); 770 if (error != 0) { 771 g_destroy_consumer(cp); 772 return (error); 773 } 774 775 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 776 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 777 if (error != 0) { 778 g_detach(cp); 779 g_destroy_consumer(cp); 780 return (error); 781 } 782 } 783 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 784 struct g_stripe_metadata md; 785 786 /* Reread metadata. */ 787 error = g_stripe_read_metadata(cp, &md); 788 if (error != 0) 789 goto fail; 790 791 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 792 strcmp(md.md_name, sc->sc_name) != 0 || 793 md.md_id != sc->sc_id) { 794 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 795 goto fail; 796 } 797 } 798 799 sc->sc_disks[no] = cp; 800 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 801 g_stripe_check_and_run(sc); 802 803 return (0); 804 fail: 805 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 806 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 807 g_detach(cp); 808 g_destroy_consumer(cp); 809 return (error); 810 } 811 812 static struct g_geom * 813 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 814 u_int type) 815 { 816 struct g_stripe_softc *sc; 817 struct g_geom *gp; 818 u_int no; 819 820 g_topology_assert(); 821 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 822 md->md_id); 823 824 /* Two disks is minimum. */ 825 if (md->md_all < 2) { 826 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 827 return (NULL); 828 } 829 #if 0 830 /* Stripe size have to be grater than or equal to sector size. */ 831 if (md->md_stripesize < sectorsize) { 832 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 833 return (NULL); 834 } 835 #endif 836 /* Stripe size have to be power of 2. */ 837 if (!powerof2(md->md_stripesize)) { 838 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 839 return (NULL); 840 } 841 842 /* Check for duplicate unit */ 843 LIST_FOREACH(gp, &mp->geom, geom) { 844 sc = gp->softc; 845 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 846 G_STRIPE_DEBUG(0, "Device %s already configured.", 847 sc->sc_name); 848 return (NULL); 849 } 850 } 851 gp = g_new_geomf(mp, "%s", md->md_name); 852 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 853 gp->start = g_stripe_start; 854 gp->spoiled = g_stripe_orphan; 855 gp->orphan = g_stripe_orphan; 856 gp->access = g_stripe_access; 857 gp->dumpconf = g_stripe_dumpconf; 858 859 sc->sc_id = md->md_id; 860 sc->sc_stripesize = md->md_stripesize; 861 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 862 sc->sc_ndisks = md->md_all; 863 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 864 M_STRIPE, M_WAITOK | M_ZERO); 865 for (no = 0; no < sc->sc_ndisks; no++) 866 sc->sc_disks[no] = NULL; 867 sc->sc_type = type; 868 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 869 870 gp->softc = sc; 871 sc->sc_geom = gp; 872 sc->sc_provider = NULL; 873 874 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 875 876 return (gp); 877 } 878 879 static int 880 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 881 { 882 struct g_provider *pp; 883 struct g_consumer *cp, *cp1; 884 struct g_geom *gp; 885 886 g_topology_assert(); 887 888 if (sc == NULL) 889 return (ENXIO); 890 891 pp = sc->sc_provider; 892 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 893 if (force) { 894 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 895 "can't be definitely removed.", pp->name); 896 } else { 897 G_STRIPE_DEBUG(1, 898 "Device %s is still open (r%dw%de%d).", pp->name, 899 pp->acr, pp->acw, pp->ace); 900 return (EBUSY); 901 } 902 } 903 904 gp = sc->sc_geom; 905 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 906 g_stripe_remove_disk(cp); 907 if (cp1 == NULL) 908 return (0); /* Recursion happened. */ 909 } 910 if (!LIST_EMPTY(&gp->consumer)) 911 return (EINPROGRESS); 912 913 gp->softc = NULL; 914 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 915 gp->name)); 916 free(sc->sc_disks, M_STRIPE); 917 mtx_destroy(&sc->sc_lock); 918 free(sc, M_STRIPE); 919 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 920 g_wither_geom(gp, ENXIO); 921 return (0); 922 } 923 924 static int 925 g_stripe_destroy_geom(struct gctl_req *req __unused, 926 struct g_class *mp __unused, struct g_geom *gp) 927 { 928 struct g_stripe_softc *sc; 929 930 sc = gp->softc; 931 return (g_stripe_destroy(sc, 0)); 932 } 933 934 static struct g_geom * 935 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 936 { 937 struct g_stripe_metadata md; 938 struct g_stripe_softc *sc; 939 struct g_consumer *cp; 940 struct g_geom *gp; 941 int error; 942 943 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 944 g_topology_assert(); 945 946 /* Skip providers that are already open for writing. */ 947 if (pp->acw > 0) 948 return (NULL); 949 950 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 951 952 gp = g_new_geomf(mp, "stripe:taste"); 953 gp->start = g_stripe_start; 954 gp->access = g_stripe_access; 955 gp->orphan = g_stripe_orphan; 956 cp = g_new_consumer(gp); 957 error = g_attach(cp, pp); 958 if (error == 0) { 959 error = g_stripe_read_metadata(cp, &md); 960 g_detach(cp); 961 } 962 g_destroy_consumer(cp); 963 g_destroy_geom(gp); 964 if (error != 0) 965 return (NULL); 966 gp = NULL; 967 968 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 969 return (NULL); 970 if (md.md_version > G_STRIPE_VERSION) { 971 printf("geom_stripe.ko module is too old to handle %s.\n", 972 pp->name); 973 return (NULL); 974 } 975 /* 976 * Backward compatibility: 977 */ 978 /* There was no md_provider field in earlier versions of metadata. */ 979 if (md.md_version < 2) 980 bzero(md.md_provider, sizeof(md.md_provider)); 981 /* There was no md_provsize field in earlier versions of metadata. */ 982 if (md.md_version < 3) 983 md.md_provsize = pp->mediasize; 984 985 if (md.md_provider[0] != '\0' && 986 !g_compare_names(md.md_provider, pp->name)) 987 return (NULL); 988 if (md.md_provsize != pp->mediasize) 989 return (NULL); 990 991 /* 992 * Let's check if device already exists. 993 */ 994 sc = NULL; 995 LIST_FOREACH(gp, &mp->geom, geom) { 996 sc = gp->softc; 997 if (sc == NULL) 998 continue; 999 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 1000 continue; 1001 if (strcmp(md.md_name, sc->sc_name) != 0) 1002 continue; 1003 if (md.md_id != sc->sc_id) 1004 continue; 1005 break; 1006 } 1007 if (gp != NULL) { 1008 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1009 error = g_stripe_add_disk(sc, pp, md.md_no); 1010 if (error != 0) { 1011 G_STRIPE_DEBUG(0, 1012 "Cannot add disk %s to %s (error=%d).", pp->name, 1013 gp->name, error); 1014 return (NULL); 1015 } 1016 } else { 1017 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1018 if (gp == NULL) { 1019 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1020 md.md_name); 1021 return (NULL); 1022 } 1023 sc = gp->softc; 1024 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1025 error = g_stripe_add_disk(sc, pp, md.md_no); 1026 if (error != 0) { 1027 G_STRIPE_DEBUG(0, 1028 "Cannot add disk %s to %s (error=%d).", pp->name, 1029 gp->name, error); 1030 g_stripe_destroy(sc, 1); 1031 return (NULL); 1032 } 1033 } 1034 1035 return (gp); 1036 } 1037 1038 static void 1039 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1040 { 1041 u_int attached, no; 1042 struct g_stripe_metadata md; 1043 struct g_provider *pp; 1044 struct g_stripe_softc *sc; 1045 struct g_geom *gp; 1046 struct sbuf *sb; 1047 off_t *stripesize; 1048 const char *name; 1049 char param[16]; 1050 int *nargs; 1051 1052 g_topology_assert(); 1053 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1054 if (nargs == NULL) { 1055 gctl_error(req, "No '%s' argument.", "nargs"); 1056 return; 1057 } 1058 if (*nargs <= 2) { 1059 gctl_error(req, "Too few arguments."); 1060 return; 1061 } 1062 1063 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1064 md.md_version = G_STRIPE_VERSION; 1065 name = gctl_get_asciiparam(req, "arg0"); 1066 if (name == NULL) { 1067 gctl_error(req, "No 'arg%u' argument.", 0); 1068 return; 1069 } 1070 strlcpy(md.md_name, name, sizeof(md.md_name)); 1071 md.md_id = arc4random(); 1072 md.md_no = 0; 1073 md.md_all = *nargs - 1; 1074 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1075 if (stripesize == NULL) { 1076 gctl_error(req, "No '%s' argument.", "stripesize"); 1077 return; 1078 } 1079 md.md_stripesize = (uint32_t)*stripesize; 1080 bzero(md.md_provider, sizeof(md.md_provider)); 1081 /* This field is not important here. */ 1082 md.md_provsize = 0; 1083 1084 /* Check all providers are valid */ 1085 for (no = 1; no < *nargs; no++) { 1086 snprintf(param, sizeof(param), "arg%u", no); 1087 pp = gctl_get_provider(req, param); 1088 if (pp == NULL) 1089 return; 1090 } 1091 1092 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1093 if (gp == NULL) { 1094 gctl_error(req, "Can't configure %s.", md.md_name); 1095 return; 1096 } 1097 1098 sc = gp->softc; 1099 sb = sbuf_new_auto(); 1100 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1101 for (attached = 0, no = 1; no < *nargs; no++) { 1102 snprintf(param, sizeof(param), "arg%u", no); 1103 pp = gctl_get_provider(req, param); 1104 if (pp == NULL) { 1105 name = gctl_get_asciiparam(req, param); 1106 MPASS(name != NULL); 1107 sbuf_printf(sb, " %s", name); 1108 continue; 1109 } 1110 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1111 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1112 no, pp->name, gp->name); 1113 sbuf_printf(sb, " %s", pp->name); 1114 continue; 1115 } 1116 attached++; 1117 } 1118 sbuf_finish(sb); 1119 if (md.md_all != attached) { 1120 g_stripe_destroy(gp->softc, 1); 1121 gctl_error(req, "%s", sbuf_data(sb)); 1122 } 1123 sbuf_delete(sb); 1124 } 1125 1126 static struct g_stripe_softc * 1127 g_stripe_find_device(struct g_class *mp, const char *name) 1128 { 1129 struct g_stripe_softc *sc; 1130 struct g_geom *gp; 1131 1132 LIST_FOREACH(gp, &mp->geom, geom) { 1133 sc = gp->softc; 1134 if (sc == NULL) 1135 continue; 1136 if (strcmp(sc->sc_name, name) == 0) 1137 return (sc); 1138 } 1139 return (NULL); 1140 } 1141 1142 static void 1143 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1144 { 1145 struct g_stripe_softc *sc; 1146 int *force, *nargs, error; 1147 const char *name; 1148 char param[16]; 1149 u_int i; 1150 1151 g_topology_assert(); 1152 1153 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1154 if (nargs == NULL) { 1155 gctl_error(req, "No '%s' argument.", "nargs"); 1156 return; 1157 } 1158 if (*nargs <= 0) { 1159 gctl_error(req, "Missing device(s)."); 1160 return; 1161 } 1162 force = gctl_get_paraml(req, "force", sizeof(*force)); 1163 if (force == NULL) { 1164 gctl_error(req, "No '%s' argument.", "force"); 1165 return; 1166 } 1167 1168 for (i = 0; i < (u_int)*nargs; i++) { 1169 snprintf(param, sizeof(param), "arg%u", i); 1170 name = gctl_get_asciiparam(req, param); 1171 if (name == NULL) { 1172 gctl_error(req, "No 'arg%u' argument.", i); 1173 return; 1174 } 1175 sc = g_stripe_find_device(mp, name); 1176 if (sc == NULL) { 1177 gctl_error(req, "No such device: %s.", name); 1178 return; 1179 } 1180 error = g_stripe_destroy(sc, *force); 1181 if (error != 0) { 1182 gctl_error(req, "Cannot destroy device %s (error=%d).", 1183 sc->sc_name, error); 1184 return; 1185 } 1186 } 1187 } 1188 1189 static void 1190 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1191 { 1192 uint32_t *version; 1193 1194 g_topology_assert(); 1195 1196 version = gctl_get_paraml(req, "version", sizeof(*version)); 1197 if (version == NULL) { 1198 gctl_error(req, "No '%s' argument.", "version"); 1199 return; 1200 } 1201 if (*version != G_STRIPE_VERSION) { 1202 gctl_error(req, "Userland and kernel parts are out of sync."); 1203 return; 1204 } 1205 1206 if (strcmp(verb, "create") == 0) { 1207 g_stripe_ctl_create(req, mp); 1208 return; 1209 } else if (strcmp(verb, "destroy") == 0 || 1210 strcmp(verb, "stop") == 0) { 1211 g_stripe_ctl_destroy(req, mp); 1212 return; 1213 } 1214 1215 gctl_error(req, "Unknown verb."); 1216 } 1217 1218 static void 1219 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1220 struct g_consumer *cp, struct g_provider *pp) 1221 { 1222 struct g_stripe_softc *sc; 1223 1224 sc = gp->softc; 1225 if (sc == NULL) 1226 return; 1227 if (pp != NULL) { 1228 /* Nothing here. */ 1229 } else if (cp != NULL) { 1230 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1231 (u_int)cp->index); 1232 } else { 1233 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1234 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent, 1235 (uintmax_t)sc->sc_stripesize); 1236 sbuf_printf(sb, "%s<Type>", indent); 1237 switch (sc->sc_type) { 1238 case G_STRIPE_TYPE_AUTOMATIC: 1239 sbuf_cat(sb, "AUTOMATIC"); 1240 break; 1241 case G_STRIPE_TYPE_MANUAL: 1242 sbuf_cat(sb, "MANUAL"); 1243 break; 1244 default: 1245 sbuf_cat(sb, "UNKNOWN"); 1246 break; 1247 } 1248 sbuf_cat(sb, "</Type>\n"); 1249 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1250 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1251 sbuf_printf(sb, "%s<State>", indent); 1252 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1253 sbuf_cat(sb, "UP"); 1254 else 1255 sbuf_cat(sb, "DOWN"); 1256 sbuf_cat(sb, "</State>\n"); 1257 } 1258 } 1259 1260 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1261 MODULE_VERSION(geom_stripe, 0); 1262