1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/module.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/bio.h> 37 #include <sys/sbuf.h> 38 #include <sys/sysctl.h> 39 #include <sys/malloc.h> 40 #include <vm/uma.h> 41 #include <geom/geom.h> 42 #include <geom/geom_dbg.h> 43 #include <geom/stripe/g_stripe.h> 44 45 FEATURE(geom_stripe, "GEOM striping support"); 46 47 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 48 49 static uma_zone_t g_stripe_zone; 50 51 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 52 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 53 struct g_geom *gp); 54 55 static g_taste_t g_stripe_taste; 56 static g_ctl_req_t g_stripe_config; 57 static g_dumpconf_t g_stripe_dumpconf; 58 static g_init_t g_stripe_init; 59 static g_fini_t g_stripe_fini; 60 61 struct g_class g_stripe_class = { 62 .name = G_STRIPE_CLASS_NAME, 63 .version = G_VERSION, 64 .ctlreq = g_stripe_config, 65 .taste = g_stripe_taste, 66 .destroy_geom = g_stripe_destroy_geom, 67 .init = g_stripe_init, 68 .fini = g_stripe_fini 69 }; 70 71 SYSCTL_DECL(_kern_geom); 72 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 73 "GEOM_STRIPE stuff"); 74 static u_int g_stripe_debug = 0; 75 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 76 "Debug level"); 77 static int g_stripe_fast = 0; 78 SYSCTL_INT(_kern_geom_stripe, OID_AUTO, fast, 79 CTLFLAG_RWTUN, &g_stripe_fast, 0, 80 "Fast, but memory-consuming, mode"); 81 static u_long g_stripe_maxmem; 82 SYSCTL_ULONG(_kern_geom_stripe, OID_AUTO, maxmem, 83 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &g_stripe_maxmem, 0, 84 "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 85 static u_int g_stripe_fast_failed = 0; 86 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 87 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 88 89 /* 90 * Greatest Common Divisor. 91 */ 92 static u_int 93 gcd(u_int a, u_int b) 94 { 95 u_int c; 96 97 while (b != 0) { 98 c = a; 99 a = b; 100 b = (c % b); 101 } 102 return (a); 103 } 104 105 /* 106 * Least Common Multiple. 107 */ 108 static u_int 109 lcm(u_int a, u_int b) 110 { 111 112 return ((a * b) / gcd(a, b)); 113 } 114 115 static void 116 g_stripe_init(struct g_class *mp __unused) 117 { 118 119 g_stripe_maxmem = maxphys * 100; 120 TUNABLE_ULONG_FETCH("kern.geom.stripe.maxmem,", &g_stripe_maxmem); 121 g_stripe_zone = uma_zcreate("g_stripe_zone", maxphys, NULL, NULL, 122 NULL, NULL, 0, 0); 123 g_stripe_maxmem -= g_stripe_maxmem % maxphys; 124 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / maxphys); 125 } 126 127 static void 128 g_stripe_fini(struct g_class *mp __unused) 129 { 130 131 uma_zdestroy(g_stripe_zone); 132 } 133 134 /* 135 * Return the number of valid disks. 136 */ 137 static u_int 138 g_stripe_nvalid(struct g_stripe_softc *sc) 139 { 140 u_int i, no; 141 142 no = 0; 143 for (i = 0; i < sc->sc_ndisks; i++) { 144 if (sc->sc_disks[i] != NULL) 145 no++; 146 } 147 148 return (no); 149 } 150 151 static void 152 g_stripe_remove_disk(struct g_consumer *cp) 153 { 154 struct g_stripe_softc *sc; 155 156 g_topology_assert(); 157 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 158 sc = (struct g_stripe_softc *)cp->geom->softc; 159 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 160 161 if (cp->private == NULL) { 162 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 163 cp->provider->name, sc->sc_name); 164 cp->private = (void *)(uintptr_t)-1; 165 } 166 167 if (sc->sc_provider != NULL) { 168 G_STRIPE_DEBUG(0, "Device %s deactivated.", 169 sc->sc_provider->name); 170 g_wither_provider(sc->sc_provider, ENXIO); 171 sc->sc_provider = NULL; 172 } 173 174 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 175 return; 176 sc->sc_disks[cp->index] = NULL; 177 cp->index = 0; 178 g_detach(cp); 179 g_destroy_consumer(cp); 180 /* If there are no valid disks anymore, remove device. */ 181 if (LIST_EMPTY(&sc->sc_geom->consumer)) 182 g_stripe_destroy(sc, 1); 183 } 184 185 static void 186 g_stripe_orphan(struct g_consumer *cp) 187 { 188 struct g_stripe_softc *sc; 189 struct g_geom *gp; 190 191 g_topology_assert(); 192 gp = cp->geom; 193 sc = gp->softc; 194 if (sc == NULL) 195 return; 196 197 g_stripe_remove_disk(cp); 198 } 199 200 static int 201 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 202 { 203 struct g_consumer *cp1, *cp2, *tmp; 204 struct g_stripe_softc *sc __diagused; 205 struct g_geom *gp; 206 int error; 207 208 g_topology_assert(); 209 gp = pp->geom; 210 sc = gp->softc; 211 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 212 213 /* On first open, grab an extra "exclusive" bit */ 214 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 215 de++; 216 /* ... and let go of it on last close */ 217 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 218 de--; 219 220 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 221 error = g_access(cp1, dr, dw, de); 222 if (error != 0) 223 goto fail; 224 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 225 cp1->private != NULL) { 226 g_stripe_remove_disk(cp1); /* May destroy geom. */ 227 } 228 } 229 return (0); 230 231 fail: 232 LIST_FOREACH(cp2, &gp->consumer, consumer) { 233 if (cp1 == cp2) 234 break; 235 g_access(cp2, -dr, -dw, -de); 236 } 237 return (error); 238 } 239 240 static void 241 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 242 off_t length, int mode) 243 { 244 off_t stripesize; 245 size_t len; 246 247 stripesize = sc->sc_stripesize; 248 len = (size_t)(stripesize - (offset & (stripesize - 1))); 249 do { 250 bcopy(src, dst, len); 251 if (mode) { 252 dst += len + stripesize * (sc->sc_ndisks - 1); 253 src += len; 254 } else { 255 dst += len; 256 src += len + stripesize * (sc->sc_ndisks - 1); 257 } 258 length -= len; 259 KASSERT(length >= 0, 260 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", 261 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); 262 if (length > stripesize) 263 len = stripesize; 264 else 265 len = length; 266 } while (length > 0); 267 } 268 269 static void 270 g_stripe_done(struct bio *bp) 271 { 272 struct g_stripe_softc *sc; 273 struct bio *pbp; 274 275 pbp = bp->bio_parent; 276 sc = pbp->bio_to->geom->softc; 277 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 278 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 279 bp->bio_length, 1); 280 bp->bio_data = bp->bio_caller1; 281 bp->bio_caller1 = NULL; 282 } 283 mtx_lock(&sc->sc_lock); 284 if (pbp->bio_error == 0) 285 pbp->bio_error = bp->bio_error; 286 pbp->bio_completed += bp->bio_completed; 287 pbp->bio_inbed++; 288 if (pbp->bio_children == pbp->bio_inbed) { 289 mtx_unlock(&sc->sc_lock); 290 if (pbp->bio_driver1 != NULL) 291 uma_zfree(g_stripe_zone, pbp->bio_driver1); 292 if (bp->bio_cmd == BIO_SPEEDUP) 293 pbp->bio_completed = pbp->bio_length; 294 g_io_deliver(pbp, pbp->bio_error); 295 } else 296 mtx_unlock(&sc->sc_lock); 297 g_destroy_bio(bp); 298 } 299 300 static int 301 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 302 { 303 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 304 struct g_stripe_softc *sc; 305 char *addr, *data = NULL; 306 struct bio *cbp; 307 off_t stripesize; 308 u_int nparts = 0; 309 int error; 310 311 sc = bp->bio_to->geom->softc; 312 313 addr = bp->bio_data; 314 stripesize = sc->sc_stripesize; 315 316 cbp = g_clone_bio(bp); 317 if (cbp == NULL) { 318 error = ENOMEM; 319 goto failure; 320 } 321 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 322 nparts++; 323 /* 324 * Fill in the component buf structure. 325 */ 326 cbp->bio_done = g_stripe_done; 327 cbp->bio_offset = offset; 328 cbp->bio_data = addr; 329 cbp->bio_caller1 = NULL; 330 cbp->bio_length = length; 331 cbp->bio_caller2 = sc->sc_disks[no]; 332 333 /* offset -= offset % stripesize; */ 334 offset -= offset & (stripesize - 1); 335 addr += length; 336 length = bp->bio_length - length; 337 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 338 if (no > sc->sc_ndisks - 1) { 339 no = 0; 340 offset += stripesize; 341 } 342 if (nparts >= sc->sc_ndisks) { 343 cbp = TAILQ_NEXT(cbp, bio_queue); 344 if (cbp == NULL) 345 cbp = TAILQ_FIRST(&queue); 346 nparts++; 347 /* 348 * Update bio structure. 349 */ 350 /* 351 * MIN() is in case when 352 * (bp->bio_length % sc->sc_stripesize) != 0. 353 */ 354 cbp->bio_length += MIN(stripesize, length); 355 if (cbp->bio_caller1 == NULL) { 356 cbp->bio_caller1 = cbp->bio_data; 357 cbp->bio_data = NULL; 358 if (data == NULL) { 359 data = uma_zalloc(g_stripe_zone, 360 M_NOWAIT); 361 if (data == NULL) { 362 error = ENOMEM; 363 goto failure; 364 } 365 } 366 } 367 } else { 368 cbp = g_clone_bio(bp); 369 if (cbp == NULL) { 370 error = ENOMEM; 371 goto failure; 372 } 373 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 374 nparts++; 375 /* 376 * Fill in the component buf structure. 377 */ 378 cbp->bio_done = g_stripe_done; 379 cbp->bio_offset = offset; 380 cbp->bio_data = addr; 381 cbp->bio_caller1 = NULL; 382 /* 383 * MIN() is in case when 384 * (bp->bio_length % sc->sc_stripesize) != 0. 385 */ 386 cbp->bio_length = MIN(stripesize, length); 387 cbp->bio_caller2 = sc->sc_disks[no]; 388 } 389 } 390 if (data != NULL) 391 bp->bio_driver1 = data; 392 /* 393 * Fire off all allocated requests! 394 */ 395 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 396 struct g_consumer *cp; 397 398 TAILQ_REMOVE(&queue, cbp, bio_queue); 399 cp = cbp->bio_caller2; 400 cbp->bio_caller2 = NULL; 401 cbp->bio_to = cp->provider; 402 if (cbp->bio_caller1 != NULL) { 403 cbp->bio_data = data; 404 if (bp->bio_cmd == BIO_WRITE) { 405 g_stripe_copy(sc, cbp->bio_caller1, data, 406 cbp->bio_offset, cbp->bio_length, 0); 407 } 408 data += cbp->bio_length; 409 } 410 G_STRIPE_LOGREQ(cbp, "Sending request."); 411 g_io_request(cbp, cp); 412 } 413 return (0); 414 failure: 415 if (data != NULL) 416 uma_zfree(g_stripe_zone, data); 417 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 418 TAILQ_REMOVE(&queue, cbp, bio_queue); 419 if (cbp->bio_caller1 != NULL) { 420 cbp->bio_data = cbp->bio_caller1; 421 cbp->bio_caller1 = NULL; 422 } 423 bp->bio_children--; 424 g_destroy_bio(cbp); 425 } 426 return (error); 427 } 428 429 static int 430 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 431 { 432 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 433 struct g_stripe_softc *sc; 434 off_t stripesize; 435 struct bio *cbp; 436 char *addr; 437 int error; 438 439 sc = bp->bio_to->geom->softc; 440 441 stripesize = sc->sc_stripesize; 442 443 cbp = g_clone_bio(bp); 444 if (cbp == NULL) { 445 error = ENOMEM; 446 goto failure; 447 } 448 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 449 /* 450 * Fill in the component buf structure. 451 */ 452 if (bp->bio_length == length) 453 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 454 else 455 cbp->bio_done = g_stripe_done; 456 cbp->bio_offset = offset; 457 cbp->bio_length = length; 458 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 459 bp->bio_ma_n = round_page(bp->bio_ma_offset + 460 bp->bio_length) / PAGE_SIZE; 461 addr = NULL; 462 } else 463 addr = bp->bio_data; 464 cbp->bio_caller2 = sc->sc_disks[no]; 465 466 /* offset -= offset % stripesize; */ 467 offset -= offset & (stripesize - 1); 468 if (bp->bio_cmd != BIO_DELETE) 469 addr += length; 470 length = bp->bio_length - length; 471 for (no++; length > 0; no++, length -= stripesize) { 472 if (no > sc->sc_ndisks - 1) { 473 no = 0; 474 offset += stripesize; 475 } 476 cbp = g_clone_bio(bp); 477 if (cbp == NULL) { 478 error = ENOMEM; 479 goto failure; 480 } 481 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 482 483 /* 484 * Fill in the component buf structure. 485 */ 486 cbp->bio_done = g_stripe_done; 487 cbp->bio_offset = offset; 488 /* 489 * MIN() is in case when 490 * (bp->bio_length % sc->sc_stripesize) != 0. 491 */ 492 cbp->bio_length = MIN(stripesize, length); 493 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 494 cbp->bio_ma_offset += (uintptr_t)addr; 495 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 496 cbp->bio_ma_offset %= PAGE_SIZE; 497 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 498 cbp->bio_length) / PAGE_SIZE; 499 } else 500 cbp->bio_data = addr; 501 502 cbp->bio_caller2 = sc->sc_disks[no]; 503 504 if (bp->bio_cmd != BIO_DELETE) 505 addr += stripesize; 506 } 507 /* 508 * Fire off all allocated requests! 509 */ 510 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 511 struct g_consumer *cp; 512 513 TAILQ_REMOVE(&queue, cbp, bio_queue); 514 cp = cbp->bio_caller2; 515 cbp->bio_caller2 = NULL; 516 cbp->bio_to = cp->provider; 517 G_STRIPE_LOGREQ(cbp, "Sending request."); 518 g_io_request(cbp, cp); 519 } 520 return (0); 521 failure: 522 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 523 TAILQ_REMOVE(&queue, cbp, bio_queue); 524 bp->bio_children--; 525 g_destroy_bio(cbp); 526 } 527 return (error); 528 } 529 530 static void 531 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) 532 { 533 struct bio_queue_head queue; 534 struct g_consumer *cp; 535 struct bio *cbp; 536 u_int no; 537 538 bioq_init(&queue); 539 for (no = 0; no < sc->sc_ndisks; no++) { 540 cbp = g_clone_bio(bp); 541 if (cbp == NULL) { 542 for (cbp = bioq_first(&queue); cbp != NULL; 543 cbp = bioq_first(&queue)) { 544 bioq_remove(&queue, cbp); 545 g_destroy_bio(cbp); 546 } 547 if (bp->bio_error == 0) 548 bp->bio_error = ENOMEM; 549 g_io_deliver(bp, bp->bio_error); 550 return; 551 } 552 bioq_insert_tail(&queue, cbp); 553 cbp->bio_done = g_stripe_done; 554 cbp->bio_caller2 = sc->sc_disks[no]; 555 cbp->bio_to = sc->sc_disks[no]->provider; 556 } 557 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 558 bioq_remove(&queue, cbp); 559 G_STRIPE_LOGREQ(cbp, "Sending request."); 560 cp = cbp->bio_caller2; 561 cbp->bio_caller2 = NULL; 562 g_io_request(cbp, cp); 563 } 564 } 565 566 static void 567 g_stripe_start(struct bio *bp) 568 { 569 off_t offset, start, length, nstripe, stripesize; 570 struct g_stripe_softc *sc; 571 u_int no; 572 int error, fast = 0; 573 574 sc = bp->bio_to->geom->softc; 575 /* 576 * If sc == NULL, provider's error should be set and g_stripe_start() 577 * should not be called at all. 578 */ 579 KASSERT(sc != NULL, 580 ("Provider's error should be set (error=%d)(device=%s).", 581 bp->bio_to->error, bp->bio_to->name)); 582 583 G_STRIPE_LOGREQ(bp, "Request received."); 584 585 switch (bp->bio_cmd) { 586 case BIO_READ: 587 case BIO_WRITE: 588 case BIO_DELETE: 589 break; 590 case BIO_SPEEDUP: 591 case BIO_FLUSH: 592 g_stripe_pushdown(sc, bp); 593 return; 594 case BIO_GETATTR: 595 /* To which provider it should be delivered? */ 596 default: 597 g_io_deliver(bp, EOPNOTSUPP); 598 return; 599 } 600 601 stripesize = sc->sc_stripesize; 602 603 /* 604 * Calculations are quite messy, but fast I hope. 605 */ 606 607 /* Stripe number. */ 608 /* nstripe = bp->bio_offset / stripesize; */ 609 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 610 /* Disk number. */ 611 no = nstripe % sc->sc_ndisks; 612 /* Start position in stripe. */ 613 /* start = bp->bio_offset % stripesize; */ 614 start = bp->bio_offset & (stripesize - 1); 615 /* Start position in disk. */ 616 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 617 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 618 /* Length of data to operate. */ 619 length = MIN(bp->bio_length, stripesize - start); 620 621 /* 622 * Do use "fast" mode when: 623 * 1. "Fast" mode is ON. 624 * and 625 * 2. Request size is less than or equal to maxphys, 626 * which should always be true. 627 * and 628 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 629 * there will be no need to send more than one I/O request to 630 * a provider, so there is nothing to optmize. 631 * and 632 * 4. Request is not unmapped. 633 * and 634 * 5. It is not a BIO_DELETE. 635 */ 636 if (g_stripe_fast && bp->bio_length <= maxphys && 637 bp->bio_length >= stripesize * sc->sc_ndisks && 638 (bp->bio_flags & BIO_UNMAPPED) == 0 && 639 bp->bio_cmd != BIO_DELETE) { 640 fast = 1; 641 } 642 error = 0; 643 if (fast) { 644 error = g_stripe_start_fast(bp, no, offset, length); 645 if (error != 0) 646 g_stripe_fast_failed++; 647 } 648 /* 649 * Do use "economic" when: 650 * 1. "Economic" mode is ON. 651 * or 652 * 2. "Fast" mode failed. It can only fail if there is no memory. 653 */ 654 if (!fast || error != 0) 655 error = g_stripe_start_economic(bp, no, offset, length); 656 if (error != 0) { 657 if (bp->bio_error == 0) 658 bp->bio_error = error; 659 g_io_deliver(bp, bp->bio_error); 660 } 661 } 662 663 static void 664 g_stripe_check_and_run(struct g_stripe_softc *sc) 665 { 666 struct g_provider *dp; 667 off_t mediasize, ms; 668 u_int no, sectorsize = 0; 669 670 g_topology_assert(); 671 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 672 return; 673 674 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 675 sc->sc_name); 676 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 677 if (g_stripe_fast == 0) 678 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 679 /* 680 * Find the smallest disk. 681 */ 682 mediasize = sc->sc_disks[0]->provider->mediasize; 683 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 684 mediasize -= sc->sc_disks[0]->provider->sectorsize; 685 mediasize -= mediasize % sc->sc_stripesize; 686 sectorsize = sc->sc_disks[0]->provider->sectorsize; 687 for (no = 1; no < sc->sc_ndisks; no++) { 688 dp = sc->sc_disks[no]->provider; 689 ms = dp->mediasize; 690 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 691 ms -= dp->sectorsize; 692 ms -= ms % sc->sc_stripesize; 693 if (ms < mediasize) 694 mediasize = ms; 695 sectorsize = lcm(sectorsize, dp->sectorsize); 696 697 /* A provider underneath us doesn't support unmapped */ 698 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 699 G_STRIPE_DEBUG(1, "Cancelling unmapped " 700 "because of %s.", dp->name); 701 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 702 } 703 } 704 sc->sc_provider->sectorsize = sectorsize; 705 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 706 sc->sc_provider->stripesize = sc->sc_stripesize; 707 sc->sc_provider->stripeoffset = 0; 708 g_error_provider(sc->sc_provider, 0); 709 710 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 711 } 712 713 static int 714 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 715 { 716 struct g_provider *pp; 717 u_char *buf; 718 int error; 719 720 g_topology_assert(); 721 722 error = g_access(cp, 1, 0, 0); 723 if (error != 0) 724 return (error); 725 pp = cp->provider; 726 g_topology_unlock(); 727 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 728 &error); 729 g_topology_lock(); 730 g_access(cp, -1, 0, 0); 731 if (buf == NULL) 732 return (error); 733 734 /* Decode metadata. */ 735 stripe_metadata_decode(buf, md); 736 g_free(buf); 737 738 return (0); 739 } 740 741 /* 742 * Add disk to given device. 743 */ 744 static int 745 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 746 { 747 struct g_consumer *cp, *fcp; 748 struct g_geom *gp; 749 int error; 750 751 g_topology_assert(); 752 /* Metadata corrupted? */ 753 if (no >= sc->sc_ndisks) 754 return (EINVAL); 755 756 /* Check if disk is not already attached. */ 757 if (sc->sc_disks[no] != NULL) 758 return (EEXIST); 759 760 gp = sc->sc_geom; 761 fcp = LIST_FIRST(&gp->consumer); 762 763 cp = g_new_consumer(gp); 764 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 765 cp->private = NULL; 766 cp->index = no; 767 error = g_attach(cp, pp); 768 if (error != 0) { 769 g_destroy_consumer(cp); 770 return (error); 771 } 772 773 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 774 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 775 if (error != 0) { 776 g_detach(cp); 777 g_destroy_consumer(cp); 778 return (error); 779 } 780 } 781 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 782 struct g_stripe_metadata md; 783 784 /* Reread metadata. */ 785 error = g_stripe_read_metadata(cp, &md); 786 if (error != 0) 787 goto fail; 788 789 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 790 strcmp(md.md_name, sc->sc_name) != 0 || 791 md.md_id != sc->sc_id) { 792 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 793 goto fail; 794 } 795 } 796 797 sc->sc_disks[no] = cp; 798 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 799 g_stripe_check_and_run(sc); 800 801 return (0); 802 fail: 803 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 804 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 805 g_detach(cp); 806 g_destroy_consumer(cp); 807 return (error); 808 } 809 810 static struct g_geom * 811 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 812 u_int type) 813 { 814 struct g_stripe_softc *sc; 815 struct g_geom *gp; 816 u_int no; 817 818 g_topology_assert(); 819 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 820 md->md_id); 821 822 /* Two disks is minimum. */ 823 if (md->md_all < 2) { 824 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 825 return (NULL); 826 } 827 #if 0 828 /* Stripe size have to be grater than or equal to sector size. */ 829 if (md->md_stripesize < sectorsize) { 830 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 831 return (NULL); 832 } 833 #endif 834 /* Stripe size have to be power of 2. */ 835 if (!powerof2(md->md_stripesize)) { 836 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 837 return (NULL); 838 } 839 840 /* Check for duplicate unit */ 841 LIST_FOREACH(gp, &mp->geom, geom) { 842 sc = gp->softc; 843 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 844 G_STRIPE_DEBUG(0, "Device %s already configured.", 845 sc->sc_name); 846 return (NULL); 847 } 848 } 849 gp = g_new_geomf(mp, "%s", md->md_name); 850 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 851 gp->start = g_stripe_start; 852 gp->spoiled = g_stripe_orphan; 853 gp->orphan = g_stripe_orphan; 854 gp->access = g_stripe_access; 855 gp->dumpconf = g_stripe_dumpconf; 856 857 sc->sc_id = md->md_id; 858 sc->sc_stripesize = md->md_stripesize; 859 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 860 sc->sc_ndisks = md->md_all; 861 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 862 M_STRIPE, M_WAITOK | M_ZERO); 863 for (no = 0; no < sc->sc_ndisks; no++) 864 sc->sc_disks[no] = NULL; 865 sc->sc_type = type; 866 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 867 868 gp->softc = sc; 869 sc->sc_geom = gp; 870 sc->sc_provider = NULL; 871 872 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 873 874 return (gp); 875 } 876 877 static int 878 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 879 { 880 struct g_provider *pp; 881 struct g_consumer *cp, *cp1; 882 struct g_geom *gp; 883 884 g_topology_assert(); 885 886 if (sc == NULL) 887 return (ENXIO); 888 889 pp = sc->sc_provider; 890 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 891 if (force) { 892 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 893 "can't be definitely removed.", pp->name); 894 } else { 895 G_STRIPE_DEBUG(1, 896 "Device %s is still open (r%dw%de%d).", pp->name, 897 pp->acr, pp->acw, pp->ace); 898 return (EBUSY); 899 } 900 } 901 902 gp = sc->sc_geom; 903 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 904 g_stripe_remove_disk(cp); 905 if (cp1 == NULL) 906 return (0); /* Recursion happened. */ 907 } 908 if (!LIST_EMPTY(&gp->consumer)) 909 return (EINPROGRESS); 910 911 gp->softc = NULL; 912 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 913 gp->name)); 914 free(sc->sc_disks, M_STRIPE); 915 mtx_destroy(&sc->sc_lock); 916 free(sc, M_STRIPE); 917 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 918 g_wither_geom(gp, ENXIO); 919 return (0); 920 } 921 922 static int 923 g_stripe_destroy_geom(struct gctl_req *req __unused, 924 struct g_class *mp __unused, struct g_geom *gp) 925 { 926 struct g_stripe_softc *sc; 927 928 sc = gp->softc; 929 return (g_stripe_destroy(sc, 0)); 930 } 931 932 static struct g_geom * 933 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 934 { 935 struct g_stripe_metadata md; 936 struct g_stripe_softc *sc; 937 struct g_consumer *cp; 938 struct g_geom *gp; 939 int error; 940 941 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 942 g_topology_assert(); 943 944 /* Skip providers that are already open for writing. */ 945 if (pp->acw > 0) 946 return (NULL); 947 948 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 949 950 gp = g_new_geomf(mp, "stripe:taste"); 951 gp->start = g_stripe_start; 952 gp->access = g_stripe_access; 953 gp->orphan = g_stripe_orphan; 954 cp = g_new_consumer(gp); 955 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 956 error = g_attach(cp, pp); 957 if (error == 0) { 958 error = g_stripe_read_metadata(cp, &md); 959 g_detach(cp); 960 } 961 g_destroy_consumer(cp); 962 g_destroy_geom(gp); 963 if (error != 0) 964 return (NULL); 965 gp = NULL; 966 967 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 968 return (NULL); 969 if (md.md_version > G_STRIPE_VERSION) { 970 printf("geom_stripe.ko module is too old to handle %s.\n", 971 pp->name); 972 return (NULL); 973 } 974 /* 975 * Backward compatibility: 976 */ 977 /* There was no md_provider field in earlier versions of metadata. */ 978 if (md.md_version < 2) 979 bzero(md.md_provider, sizeof(md.md_provider)); 980 /* There was no md_provsize field in earlier versions of metadata. */ 981 if (md.md_version < 3) 982 md.md_provsize = pp->mediasize; 983 984 if (md.md_provider[0] != '\0' && 985 !g_compare_names(md.md_provider, pp->name)) 986 return (NULL); 987 if (md.md_provsize != pp->mediasize) 988 return (NULL); 989 990 /* 991 * Let's check if device already exists. 992 */ 993 sc = NULL; 994 LIST_FOREACH(gp, &mp->geom, geom) { 995 sc = gp->softc; 996 if (sc == NULL) 997 continue; 998 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 999 continue; 1000 if (strcmp(md.md_name, sc->sc_name) != 0) 1001 continue; 1002 if (md.md_id != sc->sc_id) 1003 continue; 1004 break; 1005 } 1006 if (gp != NULL) { 1007 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1008 error = g_stripe_add_disk(sc, pp, md.md_no); 1009 if (error != 0) { 1010 G_STRIPE_DEBUG(0, 1011 "Cannot add disk %s to %s (error=%d).", pp->name, 1012 gp->name, error); 1013 return (NULL); 1014 } 1015 } else { 1016 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1017 if (gp == NULL) { 1018 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1019 md.md_name); 1020 return (NULL); 1021 } 1022 sc = gp->softc; 1023 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1024 error = g_stripe_add_disk(sc, pp, md.md_no); 1025 if (error != 0) { 1026 G_STRIPE_DEBUG(0, 1027 "Cannot add disk %s to %s (error=%d).", pp->name, 1028 gp->name, error); 1029 g_stripe_destroy(sc, 1); 1030 return (NULL); 1031 } 1032 } 1033 1034 return (gp); 1035 } 1036 1037 static void 1038 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1039 { 1040 u_int attached, no; 1041 struct g_stripe_metadata md; 1042 struct g_provider *pp; 1043 struct g_stripe_softc *sc; 1044 struct g_geom *gp; 1045 struct sbuf *sb; 1046 off_t *stripesize; 1047 const char *name; 1048 char param[16]; 1049 int *nargs; 1050 1051 g_topology_assert(); 1052 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1053 if (nargs == NULL) { 1054 gctl_error(req, "No '%s' argument.", "nargs"); 1055 return; 1056 } 1057 if (*nargs <= 2) { 1058 gctl_error(req, "Too few arguments."); 1059 return; 1060 } 1061 1062 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1063 md.md_version = G_STRIPE_VERSION; 1064 name = gctl_get_asciiparam(req, "arg0"); 1065 if (name == NULL) { 1066 gctl_error(req, "No 'arg%u' argument.", 0); 1067 return; 1068 } 1069 strlcpy(md.md_name, name, sizeof(md.md_name)); 1070 md.md_id = arc4random(); 1071 md.md_no = 0; 1072 md.md_all = *nargs - 1; 1073 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1074 if (stripesize == NULL) { 1075 gctl_error(req, "No '%s' argument.", "stripesize"); 1076 return; 1077 } 1078 md.md_stripesize = (uint32_t)*stripesize; 1079 bzero(md.md_provider, sizeof(md.md_provider)); 1080 /* This field is not important here. */ 1081 md.md_provsize = 0; 1082 1083 /* Check all providers are valid */ 1084 for (no = 1; no < *nargs; no++) { 1085 snprintf(param, sizeof(param), "arg%u", no); 1086 pp = gctl_get_provider(req, param); 1087 if (pp == NULL) 1088 return; 1089 } 1090 1091 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1092 if (gp == NULL) { 1093 gctl_error(req, "Can't configure %s.", md.md_name); 1094 return; 1095 } 1096 1097 sc = gp->softc; 1098 sb = sbuf_new_auto(); 1099 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1100 for (attached = 0, no = 1; no < *nargs; no++) { 1101 snprintf(param, sizeof(param), "arg%u", no); 1102 pp = gctl_get_provider(req, param); 1103 if (pp == NULL) { 1104 name = gctl_get_asciiparam(req, param); 1105 MPASS(name != NULL); 1106 sbuf_printf(sb, " %s", name); 1107 continue; 1108 } 1109 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1110 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1111 no, pp->name, gp->name); 1112 sbuf_printf(sb, " %s", pp->name); 1113 continue; 1114 } 1115 attached++; 1116 } 1117 sbuf_finish(sb); 1118 if (md.md_all != attached) { 1119 g_stripe_destroy(gp->softc, 1); 1120 gctl_error(req, "%s", sbuf_data(sb)); 1121 } 1122 sbuf_delete(sb); 1123 } 1124 1125 static struct g_stripe_softc * 1126 g_stripe_find_device(struct g_class *mp, const char *name) 1127 { 1128 struct g_stripe_softc *sc; 1129 struct g_geom *gp; 1130 1131 LIST_FOREACH(gp, &mp->geom, geom) { 1132 sc = gp->softc; 1133 if (sc == NULL) 1134 continue; 1135 if (strcmp(sc->sc_name, name) == 0) 1136 return (sc); 1137 } 1138 return (NULL); 1139 } 1140 1141 static void 1142 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1143 { 1144 struct g_stripe_softc *sc; 1145 int *force, *nargs, error; 1146 const char *name; 1147 char param[16]; 1148 u_int i; 1149 1150 g_topology_assert(); 1151 1152 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1153 if (nargs == NULL) { 1154 gctl_error(req, "No '%s' argument.", "nargs"); 1155 return; 1156 } 1157 if (*nargs <= 0) { 1158 gctl_error(req, "Missing device(s)."); 1159 return; 1160 } 1161 force = gctl_get_paraml(req, "force", sizeof(*force)); 1162 if (force == NULL) { 1163 gctl_error(req, "No '%s' argument.", "force"); 1164 return; 1165 } 1166 1167 for (i = 0; i < (u_int)*nargs; i++) { 1168 snprintf(param, sizeof(param), "arg%u", i); 1169 name = gctl_get_asciiparam(req, param); 1170 if (name == NULL) { 1171 gctl_error(req, "No 'arg%u' argument.", i); 1172 return; 1173 } 1174 sc = g_stripe_find_device(mp, name); 1175 if (sc == NULL) { 1176 gctl_error(req, "No such device: %s.", name); 1177 return; 1178 } 1179 error = g_stripe_destroy(sc, *force); 1180 if (error != 0) { 1181 gctl_error(req, "Cannot destroy device %s (error=%d).", 1182 sc->sc_name, error); 1183 return; 1184 } 1185 } 1186 } 1187 1188 static void 1189 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1190 { 1191 uint32_t *version; 1192 1193 g_topology_assert(); 1194 1195 version = gctl_get_paraml(req, "version", sizeof(*version)); 1196 if (version == NULL) { 1197 gctl_error(req, "No '%s' argument.", "version"); 1198 return; 1199 } 1200 if (*version != G_STRIPE_VERSION) { 1201 gctl_error(req, "Userland and kernel parts are out of sync."); 1202 return; 1203 } 1204 1205 if (strcmp(verb, "create") == 0) { 1206 g_stripe_ctl_create(req, mp); 1207 return; 1208 } else if (strcmp(verb, "destroy") == 0 || 1209 strcmp(verb, "stop") == 0) { 1210 g_stripe_ctl_destroy(req, mp); 1211 return; 1212 } 1213 1214 gctl_error(req, "Unknown verb."); 1215 } 1216 1217 static void 1218 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1219 struct g_consumer *cp, struct g_provider *pp) 1220 { 1221 struct g_stripe_softc *sc; 1222 1223 sc = gp->softc; 1224 if (sc == NULL) 1225 return; 1226 if (pp != NULL) { 1227 /* Nothing here. */ 1228 } else if (cp != NULL) { 1229 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1230 (u_int)cp->index); 1231 } else { 1232 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1233 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent, 1234 (uintmax_t)sc->sc_stripesize); 1235 sbuf_printf(sb, "%s<Type>", indent); 1236 switch (sc->sc_type) { 1237 case G_STRIPE_TYPE_AUTOMATIC: 1238 sbuf_cat(sb, "AUTOMATIC"); 1239 break; 1240 case G_STRIPE_TYPE_MANUAL: 1241 sbuf_cat(sb, "MANUAL"); 1242 break; 1243 default: 1244 sbuf_cat(sb, "UNKNOWN"); 1245 break; 1246 } 1247 sbuf_cat(sb, "</Type>\n"); 1248 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1249 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1250 sbuf_printf(sb, "%s<State>", indent); 1251 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1252 sbuf_cat(sb, "UP"); 1253 else 1254 sbuf_cat(sb, "DOWN"); 1255 sbuf_cat(sb, "</State>\n"); 1256 } 1257 } 1258 1259 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1260 MODULE_VERSION(geom_stripe, 0); 1261