1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/module.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/bio.h> 39 #include <sys/sbuf.h> 40 #include <sys/sysctl.h> 41 #include <sys/malloc.h> 42 #include <vm/uma.h> 43 #include <geom/geom.h> 44 #include <geom/geom_dbg.h> 45 #include <geom/stripe/g_stripe.h> 46 47 FEATURE(geom_stripe, "GEOM striping support"); 48 49 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 50 51 static uma_zone_t g_stripe_zone; 52 53 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 54 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 55 struct g_geom *gp); 56 57 static g_taste_t g_stripe_taste; 58 static g_ctl_req_t g_stripe_config; 59 static g_dumpconf_t g_stripe_dumpconf; 60 static g_init_t g_stripe_init; 61 static g_fini_t g_stripe_fini; 62 63 struct g_class g_stripe_class = { 64 .name = G_STRIPE_CLASS_NAME, 65 .version = G_VERSION, 66 .ctlreq = g_stripe_config, 67 .taste = g_stripe_taste, 68 .destroy_geom = g_stripe_destroy_geom, 69 .init = g_stripe_init, 70 .fini = g_stripe_fini 71 }; 72 73 SYSCTL_DECL(_kern_geom); 74 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 75 "GEOM_STRIPE stuff"); 76 static u_int g_stripe_debug = 0; 77 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 78 "Debug level"); 79 static int g_stripe_fast = 0; 80 static int 81 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) 82 { 83 int error, fast; 84 85 fast = g_stripe_fast; 86 error = sysctl_handle_int(oidp, &fast, 0, req); 87 if (error == 0 && req->newptr != NULL) 88 g_stripe_fast = fast; 89 return (error); 90 } 91 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, 92 CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, NULL, 0, 93 g_sysctl_stripe_fast, "I", 94 "Fast, but memory-consuming, mode"); 95 static u_int g_stripe_maxmem = MAXPHYS * 100; 96 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem, 97 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 98 static u_int g_stripe_fast_failed = 0; 99 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 100 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 101 102 /* 103 * Greatest Common Divisor. 104 */ 105 static u_int 106 gcd(u_int a, u_int b) 107 { 108 u_int c; 109 110 while (b != 0) { 111 c = a; 112 a = b; 113 b = (c % b); 114 } 115 return (a); 116 } 117 118 /* 119 * Least Common Multiple. 120 */ 121 static u_int 122 lcm(u_int a, u_int b) 123 { 124 125 return ((a * b) / gcd(a, b)); 126 } 127 128 static void 129 g_stripe_init(struct g_class *mp __unused) 130 { 131 132 g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, 133 NULL, NULL, 0, 0); 134 g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; 135 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); 136 } 137 138 static void 139 g_stripe_fini(struct g_class *mp __unused) 140 { 141 142 uma_zdestroy(g_stripe_zone); 143 } 144 145 /* 146 * Return the number of valid disks. 147 */ 148 static u_int 149 g_stripe_nvalid(struct g_stripe_softc *sc) 150 { 151 u_int i, no; 152 153 no = 0; 154 for (i = 0; i < sc->sc_ndisks; i++) { 155 if (sc->sc_disks[i] != NULL) 156 no++; 157 } 158 159 return (no); 160 } 161 162 static void 163 g_stripe_remove_disk(struct g_consumer *cp) 164 { 165 struct g_stripe_softc *sc; 166 167 g_topology_assert(); 168 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 169 sc = (struct g_stripe_softc *)cp->geom->softc; 170 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 171 172 if (cp->private == NULL) { 173 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 174 cp->provider->name, sc->sc_name); 175 cp->private = (void *)(uintptr_t)-1; 176 } 177 178 if (sc->sc_provider != NULL) { 179 G_STRIPE_DEBUG(0, "Device %s deactivated.", 180 sc->sc_provider->name); 181 g_wither_provider(sc->sc_provider, ENXIO); 182 sc->sc_provider = NULL; 183 } 184 185 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 186 return; 187 sc->sc_disks[cp->index] = NULL; 188 cp->index = 0; 189 g_detach(cp); 190 g_destroy_consumer(cp); 191 /* If there are no valid disks anymore, remove device. */ 192 if (LIST_EMPTY(&sc->sc_geom->consumer)) 193 g_stripe_destroy(sc, 1); 194 } 195 196 static void 197 g_stripe_orphan(struct g_consumer *cp) 198 { 199 struct g_stripe_softc *sc; 200 struct g_geom *gp; 201 202 g_topology_assert(); 203 gp = cp->geom; 204 sc = gp->softc; 205 if (sc == NULL) 206 return; 207 208 g_stripe_remove_disk(cp); 209 } 210 211 static int 212 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 213 { 214 struct g_consumer *cp1, *cp2, *tmp; 215 struct g_stripe_softc *sc; 216 struct g_geom *gp; 217 int error; 218 219 g_topology_assert(); 220 gp = pp->geom; 221 sc = gp->softc; 222 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 223 224 /* On first open, grab an extra "exclusive" bit */ 225 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 226 de++; 227 /* ... and let go of it on last close */ 228 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 229 de--; 230 231 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 232 error = g_access(cp1, dr, dw, de); 233 if (error != 0) 234 goto fail; 235 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 236 cp1->private != NULL) { 237 g_stripe_remove_disk(cp1); /* May destroy geom. */ 238 } 239 } 240 return (0); 241 242 fail: 243 LIST_FOREACH(cp2, &gp->consumer, consumer) { 244 if (cp1 == cp2) 245 break; 246 g_access(cp2, -dr, -dw, -de); 247 } 248 return (error); 249 } 250 251 static void 252 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 253 off_t length, int mode) 254 { 255 off_t stripesize; 256 size_t len; 257 258 stripesize = sc->sc_stripesize; 259 len = (size_t)(stripesize - (offset & (stripesize - 1))); 260 do { 261 bcopy(src, dst, len); 262 if (mode) { 263 dst += len + stripesize * (sc->sc_ndisks - 1); 264 src += len; 265 } else { 266 dst += len; 267 src += len + stripesize * (sc->sc_ndisks - 1); 268 } 269 length -= len; 270 KASSERT(length >= 0, 271 ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", 272 (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); 273 if (length > stripesize) 274 len = stripesize; 275 else 276 len = length; 277 } while (length > 0); 278 } 279 280 static void 281 g_stripe_done(struct bio *bp) 282 { 283 struct g_stripe_softc *sc; 284 struct bio *pbp; 285 286 pbp = bp->bio_parent; 287 sc = pbp->bio_to->geom->softc; 288 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 289 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 290 bp->bio_length, 1); 291 bp->bio_data = bp->bio_caller1; 292 bp->bio_caller1 = NULL; 293 } 294 mtx_lock(&sc->sc_lock); 295 if (pbp->bio_error == 0) 296 pbp->bio_error = bp->bio_error; 297 pbp->bio_completed += bp->bio_completed; 298 pbp->bio_inbed++; 299 if (pbp->bio_children == pbp->bio_inbed) { 300 mtx_unlock(&sc->sc_lock); 301 if (pbp->bio_driver1 != NULL) 302 uma_zfree(g_stripe_zone, pbp->bio_driver1); 303 if (bp->bio_cmd == BIO_SPEEDUP) 304 pbp->bio_completed = pbp->bio_length; 305 g_io_deliver(pbp, pbp->bio_error); 306 } else 307 mtx_unlock(&sc->sc_lock); 308 g_destroy_bio(bp); 309 } 310 311 static int 312 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 313 { 314 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 315 struct g_stripe_softc *sc; 316 char *addr, *data = NULL; 317 struct bio *cbp; 318 off_t stripesize; 319 u_int nparts = 0; 320 int error; 321 322 sc = bp->bio_to->geom->softc; 323 324 addr = bp->bio_data; 325 stripesize = sc->sc_stripesize; 326 327 cbp = g_clone_bio(bp); 328 if (cbp == NULL) { 329 error = ENOMEM; 330 goto failure; 331 } 332 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 333 nparts++; 334 /* 335 * Fill in the component buf structure. 336 */ 337 cbp->bio_done = g_stripe_done; 338 cbp->bio_offset = offset; 339 cbp->bio_data = addr; 340 cbp->bio_caller1 = NULL; 341 cbp->bio_length = length; 342 cbp->bio_caller2 = sc->sc_disks[no]; 343 344 /* offset -= offset % stripesize; */ 345 offset -= offset & (stripesize - 1); 346 addr += length; 347 length = bp->bio_length - length; 348 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 349 if (no > sc->sc_ndisks - 1) { 350 no = 0; 351 offset += stripesize; 352 } 353 if (nparts >= sc->sc_ndisks) { 354 cbp = TAILQ_NEXT(cbp, bio_queue); 355 if (cbp == NULL) 356 cbp = TAILQ_FIRST(&queue); 357 nparts++; 358 /* 359 * Update bio structure. 360 */ 361 /* 362 * MIN() is in case when 363 * (bp->bio_length % sc->sc_stripesize) != 0. 364 */ 365 cbp->bio_length += MIN(stripesize, length); 366 if (cbp->bio_caller1 == NULL) { 367 cbp->bio_caller1 = cbp->bio_data; 368 cbp->bio_data = NULL; 369 if (data == NULL) { 370 data = uma_zalloc(g_stripe_zone, 371 M_NOWAIT); 372 if (data == NULL) { 373 error = ENOMEM; 374 goto failure; 375 } 376 } 377 } 378 } else { 379 cbp = g_clone_bio(bp); 380 if (cbp == NULL) { 381 error = ENOMEM; 382 goto failure; 383 } 384 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 385 nparts++; 386 /* 387 * Fill in the component buf structure. 388 */ 389 cbp->bio_done = g_stripe_done; 390 cbp->bio_offset = offset; 391 cbp->bio_data = addr; 392 cbp->bio_caller1 = NULL; 393 /* 394 * MIN() is in case when 395 * (bp->bio_length % sc->sc_stripesize) != 0. 396 */ 397 cbp->bio_length = MIN(stripesize, length); 398 cbp->bio_caller2 = sc->sc_disks[no]; 399 } 400 } 401 if (data != NULL) 402 bp->bio_driver1 = data; 403 /* 404 * Fire off all allocated requests! 405 */ 406 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 407 struct g_consumer *cp; 408 409 TAILQ_REMOVE(&queue, cbp, bio_queue); 410 cp = cbp->bio_caller2; 411 cbp->bio_caller2 = NULL; 412 cbp->bio_to = cp->provider; 413 if (cbp->bio_caller1 != NULL) { 414 cbp->bio_data = data; 415 if (bp->bio_cmd == BIO_WRITE) { 416 g_stripe_copy(sc, cbp->bio_caller1, data, 417 cbp->bio_offset, cbp->bio_length, 0); 418 } 419 data += cbp->bio_length; 420 } 421 G_STRIPE_LOGREQ(cbp, "Sending request."); 422 g_io_request(cbp, cp); 423 } 424 return (0); 425 failure: 426 if (data != NULL) 427 uma_zfree(g_stripe_zone, data); 428 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 429 TAILQ_REMOVE(&queue, cbp, bio_queue); 430 if (cbp->bio_caller1 != NULL) { 431 cbp->bio_data = cbp->bio_caller1; 432 cbp->bio_caller1 = NULL; 433 } 434 bp->bio_children--; 435 g_destroy_bio(cbp); 436 } 437 return (error); 438 } 439 440 static int 441 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 442 { 443 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 444 struct g_stripe_softc *sc; 445 off_t stripesize; 446 struct bio *cbp; 447 char *addr; 448 int error; 449 450 sc = bp->bio_to->geom->softc; 451 452 stripesize = sc->sc_stripesize; 453 454 cbp = g_clone_bio(bp); 455 if (cbp == NULL) { 456 error = ENOMEM; 457 goto failure; 458 } 459 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 460 /* 461 * Fill in the component buf structure. 462 */ 463 if (bp->bio_length == length) 464 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 465 else 466 cbp->bio_done = g_stripe_done; 467 cbp->bio_offset = offset; 468 cbp->bio_length = length; 469 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 470 bp->bio_ma_n = round_page(bp->bio_ma_offset + 471 bp->bio_length) / PAGE_SIZE; 472 addr = NULL; 473 } else 474 addr = bp->bio_data; 475 cbp->bio_caller2 = sc->sc_disks[no]; 476 477 /* offset -= offset % stripesize; */ 478 offset -= offset & (stripesize - 1); 479 if (bp->bio_cmd != BIO_DELETE) 480 addr += length; 481 length = bp->bio_length - length; 482 for (no++; length > 0; no++, length -= stripesize) { 483 if (no > sc->sc_ndisks - 1) { 484 no = 0; 485 offset += stripesize; 486 } 487 cbp = g_clone_bio(bp); 488 if (cbp == NULL) { 489 error = ENOMEM; 490 goto failure; 491 } 492 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 493 494 /* 495 * Fill in the component buf structure. 496 */ 497 cbp->bio_done = g_stripe_done; 498 cbp->bio_offset = offset; 499 /* 500 * MIN() is in case when 501 * (bp->bio_length % sc->sc_stripesize) != 0. 502 */ 503 cbp->bio_length = MIN(stripesize, length); 504 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 505 cbp->bio_ma_offset += (uintptr_t)addr; 506 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 507 cbp->bio_ma_offset %= PAGE_SIZE; 508 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 509 cbp->bio_length) / PAGE_SIZE; 510 } else 511 cbp->bio_data = addr; 512 513 cbp->bio_caller2 = sc->sc_disks[no]; 514 515 if (bp->bio_cmd != BIO_DELETE) 516 addr += stripesize; 517 } 518 /* 519 * Fire off all allocated requests! 520 */ 521 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 522 struct g_consumer *cp; 523 524 TAILQ_REMOVE(&queue, cbp, bio_queue); 525 cp = cbp->bio_caller2; 526 cbp->bio_caller2 = NULL; 527 cbp->bio_to = cp->provider; 528 G_STRIPE_LOGREQ(cbp, "Sending request."); 529 g_io_request(cbp, cp); 530 } 531 return (0); 532 failure: 533 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 534 TAILQ_REMOVE(&queue, cbp, bio_queue); 535 bp->bio_children--; 536 g_destroy_bio(cbp); 537 } 538 return (error); 539 } 540 541 static void 542 g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) 543 { 544 struct bio_queue_head queue; 545 struct g_consumer *cp; 546 struct bio *cbp; 547 u_int no; 548 549 bioq_init(&queue); 550 for (no = 0; no < sc->sc_ndisks; no++) { 551 cbp = g_clone_bio(bp); 552 if (cbp == NULL) { 553 for (cbp = bioq_first(&queue); cbp != NULL; 554 cbp = bioq_first(&queue)) { 555 bioq_remove(&queue, cbp); 556 g_destroy_bio(cbp); 557 } 558 if (bp->bio_error == 0) 559 bp->bio_error = ENOMEM; 560 g_io_deliver(bp, bp->bio_error); 561 return; 562 } 563 bioq_insert_tail(&queue, cbp); 564 cbp->bio_done = g_stripe_done; 565 cbp->bio_caller2 = sc->sc_disks[no]; 566 cbp->bio_to = sc->sc_disks[no]->provider; 567 } 568 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 569 bioq_remove(&queue, cbp); 570 G_STRIPE_LOGREQ(cbp, "Sending request."); 571 cp = cbp->bio_caller2; 572 cbp->bio_caller2 = NULL; 573 g_io_request(cbp, cp); 574 } 575 } 576 577 static void 578 g_stripe_start(struct bio *bp) 579 { 580 off_t offset, start, length, nstripe, stripesize; 581 struct g_stripe_softc *sc; 582 u_int no; 583 int error, fast = 0; 584 585 sc = bp->bio_to->geom->softc; 586 /* 587 * If sc == NULL, provider's error should be set and g_stripe_start() 588 * should not be called at all. 589 */ 590 KASSERT(sc != NULL, 591 ("Provider's error should be set (error=%d)(device=%s).", 592 bp->bio_to->error, bp->bio_to->name)); 593 594 G_STRIPE_LOGREQ(bp, "Request received."); 595 596 switch (bp->bio_cmd) { 597 case BIO_READ: 598 case BIO_WRITE: 599 case BIO_DELETE: 600 break; 601 case BIO_SPEEDUP: 602 case BIO_FLUSH: 603 g_stripe_pushdown(sc, bp); 604 return; 605 case BIO_GETATTR: 606 /* To which provider it should be delivered? */ 607 default: 608 g_io_deliver(bp, EOPNOTSUPP); 609 return; 610 } 611 612 stripesize = sc->sc_stripesize; 613 614 /* 615 * Calculations are quite messy, but fast I hope. 616 */ 617 618 /* Stripe number. */ 619 /* nstripe = bp->bio_offset / stripesize; */ 620 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 621 /* Disk number. */ 622 no = nstripe % sc->sc_ndisks; 623 /* Start position in stripe. */ 624 /* start = bp->bio_offset % stripesize; */ 625 start = bp->bio_offset & (stripesize - 1); 626 /* Start position in disk. */ 627 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 628 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 629 /* Length of data to operate. */ 630 length = MIN(bp->bio_length, stripesize - start); 631 632 /* 633 * Do use "fast" mode when: 634 * 1. "Fast" mode is ON. 635 * and 636 * 2. Request size is less than or equal to MAXPHYS, 637 * which should always be true. 638 * and 639 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 640 * there will be no need to send more than one I/O request to 641 * a provider, so there is nothing to optmize. 642 * and 643 * 4. Request is not unmapped. 644 * and 645 * 5. It is not a BIO_DELETE. 646 */ 647 if (g_stripe_fast && bp->bio_length <= MAXPHYS && 648 bp->bio_length >= stripesize * sc->sc_ndisks && 649 (bp->bio_flags & BIO_UNMAPPED) == 0 && 650 bp->bio_cmd != BIO_DELETE) { 651 fast = 1; 652 } 653 error = 0; 654 if (fast) { 655 error = g_stripe_start_fast(bp, no, offset, length); 656 if (error != 0) 657 g_stripe_fast_failed++; 658 } 659 /* 660 * Do use "economic" when: 661 * 1. "Economic" mode is ON. 662 * or 663 * 2. "Fast" mode failed. It can only fail if there is no memory. 664 */ 665 if (!fast || error != 0) 666 error = g_stripe_start_economic(bp, no, offset, length); 667 if (error != 0) { 668 if (bp->bio_error == 0) 669 bp->bio_error = error; 670 g_io_deliver(bp, bp->bio_error); 671 } 672 } 673 674 static void 675 g_stripe_check_and_run(struct g_stripe_softc *sc) 676 { 677 struct g_provider *dp; 678 off_t mediasize, ms; 679 u_int no, sectorsize = 0; 680 681 g_topology_assert(); 682 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 683 return; 684 685 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 686 sc->sc_name); 687 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 688 if (g_stripe_fast == 0) 689 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 690 /* 691 * Find the smallest disk. 692 */ 693 mediasize = sc->sc_disks[0]->provider->mediasize; 694 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 695 mediasize -= sc->sc_disks[0]->provider->sectorsize; 696 mediasize -= mediasize % sc->sc_stripesize; 697 sectorsize = sc->sc_disks[0]->provider->sectorsize; 698 for (no = 1; no < sc->sc_ndisks; no++) { 699 dp = sc->sc_disks[no]->provider; 700 ms = dp->mediasize; 701 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 702 ms -= dp->sectorsize; 703 ms -= ms % sc->sc_stripesize; 704 if (ms < mediasize) 705 mediasize = ms; 706 sectorsize = lcm(sectorsize, dp->sectorsize); 707 708 /* A provider underneath us doesn't support unmapped */ 709 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 710 G_STRIPE_DEBUG(1, "Cancelling unmapped " 711 "because of %s.", dp->name); 712 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 713 } 714 } 715 sc->sc_provider->sectorsize = sectorsize; 716 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 717 sc->sc_provider->stripesize = sc->sc_stripesize; 718 sc->sc_provider->stripeoffset = 0; 719 g_error_provider(sc->sc_provider, 0); 720 721 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 722 } 723 724 static int 725 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 726 { 727 struct g_provider *pp; 728 u_char *buf; 729 int error; 730 731 g_topology_assert(); 732 733 error = g_access(cp, 1, 0, 0); 734 if (error != 0) 735 return (error); 736 pp = cp->provider; 737 g_topology_unlock(); 738 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 739 &error); 740 g_topology_lock(); 741 g_access(cp, -1, 0, 0); 742 if (buf == NULL) 743 return (error); 744 745 /* Decode metadata. */ 746 stripe_metadata_decode(buf, md); 747 g_free(buf); 748 749 return (0); 750 } 751 752 /* 753 * Add disk to given device. 754 */ 755 static int 756 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 757 { 758 struct g_consumer *cp, *fcp; 759 struct g_geom *gp; 760 int error; 761 762 g_topology_assert(); 763 /* Metadata corrupted? */ 764 if (no >= sc->sc_ndisks) 765 return (EINVAL); 766 767 /* Check if disk is not already attached. */ 768 if (sc->sc_disks[no] != NULL) 769 return (EEXIST); 770 771 gp = sc->sc_geom; 772 fcp = LIST_FIRST(&gp->consumer); 773 774 cp = g_new_consumer(gp); 775 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 776 cp->private = NULL; 777 cp->index = no; 778 error = g_attach(cp, pp); 779 if (error != 0) { 780 g_destroy_consumer(cp); 781 return (error); 782 } 783 784 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 785 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 786 if (error != 0) { 787 g_detach(cp); 788 g_destroy_consumer(cp); 789 return (error); 790 } 791 } 792 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 793 struct g_stripe_metadata md; 794 795 /* Reread metadata. */ 796 error = g_stripe_read_metadata(cp, &md); 797 if (error != 0) 798 goto fail; 799 800 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 801 strcmp(md.md_name, sc->sc_name) != 0 || 802 md.md_id != sc->sc_id) { 803 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 804 goto fail; 805 } 806 } 807 808 sc->sc_disks[no] = cp; 809 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 810 g_stripe_check_and_run(sc); 811 812 return (0); 813 fail: 814 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 815 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 816 g_detach(cp); 817 g_destroy_consumer(cp); 818 return (error); 819 } 820 821 static struct g_geom * 822 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 823 u_int type) 824 { 825 struct g_stripe_softc *sc; 826 struct g_geom *gp; 827 u_int no; 828 829 g_topology_assert(); 830 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 831 md->md_id); 832 833 /* Two disks is minimum. */ 834 if (md->md_all < 2) { 835 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 836 return (NULL); 837 } 838 #if 0 839 /* Stripe size have to be grater than or equal to sector size. */ 840 if (md->md_stripesize < sectorsize) { 841 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 842 return (NULL); 843 } 844 #endif 845 /* Stripe size have to be power of 2. */ 846 if (!powerof2(md->md_stripesize)) { 847 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 848 return (NULL); 849 } 850 851 /* Check for duplicate unit */ 852 LIST_FOREACH(gp, &mp->geom, geom) { 853 sc = gp->softc; 854 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 855 G_STRIPE_DEBUG(0, "Device %s already configured.", 856 sc->sc_name); 857 return (NULL); 858 } 859 } 860 gp = g_new_geomf(mp, "%s", md->md_name); 861 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 862 gp->start = g_stripe_start; 863 gp->spoiled = g_stripe_orphan; 864 gp->orphan = g_stripe_orphan; 865 gp->access = g_stripe_access; 866 gp->dumpconf = g_stripe_dumpconf; 867 868 sc->sc_id = md->md_id; 869 sc->sc_stripesize = md->md_stripesize; 870 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 871 sc->sc_ndisks = md->md_all; 872 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 873 M_STRIPE, M_WAITOK | M_ZERO); 874 for (no = 0; no < sc->sc_ndisks; no++) 875 sc->sc_disks[no] = NULL; 876 sc->sc_type = type; 877 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 878 879 gp->softc = sc; 880 sc->sc_geom = gp; 881 sc->sc_provider = NULL; 882 883 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 884 885 return (gp); 886 } 887 888 static int 889 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 890 { 891 struct g_provider *pp; 892 struct g_consumer *cp, *cp1; 893 struct g_geom *gp; 894 895 g_topology_assert(); 896 897 if (sc == NULL) 898 return (ENXIO); 899 900 pp = sc->sc_provider; 901 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 902 if (force) { 903 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 904 "can't be definitely removed.", pp->name); 905 } else { 906 G_STRIPE_DEBUG(1, 907 "Device %s is still open (r%dw%de%d).", pp->name, 908 pp->acr, pp->acw, pp->ace); 909 return (EBUSY); 910 } 911 } 912 913 gp = sc->sc_geom; 914 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 915 g_stripe_remove_disk(cp); 916 if (cp1 == NULL) 917 return (0); /* Recursion happened. */ 918 } 919 if (!LIST_EMPTY(&gp->consumer)) 920 return (EINPROGRESS); 921 922 gp->softc = NULL; 923 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 924 gp->name)); 925 free(sc->sc_disks, M_STRIPE); 926 mtx_destroy(&sc->sc_lock); 927 free(sc, M_STRIPE); 928 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 929 g_wither_geom(gp, ENXIO); 930 return (0); 931 } 932 933 static int 934 g_stripe_destroy_geom(struct gctl_req *req __unused, 935 struct g_class *mp __unused, struct g_geom *gp) 936 { 937 struct g_stripe_softc *sc; 938 939 sc = gp->softc; 940 return (g_stripe_destroy(sc, 0)); 941 } 942 943 static struct g_geom * 944 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 945 { 946 struct g_stripe_metadata md; 947 struct g_stripe_softc *sc; 948 struct g_consumer *cp; 949 struct g_geom *gp; 950 int error; 951 952 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 953 g_topology_assert(); 954 955 /* Skip providers that are already open for writing. */ 956 if (pp->acw > 0) 957 return (NULL); 958 959 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 960 961 gp = g_new_geomf(mp, "stripe:taste"); 962 gp->start = g_stripe_start; 963 gp->access = g_stripe_access; 964 gp->orphan = g_stripe_orphan; 965 cp = g_new_consumer(gp); 966 g_attach(cp, pp); 967 error = g_stripe_read_metadata(cp, &md); 968 g_detach(cp); 969 g_destroy_consumer(cp); 970 g_destroy_geom(gp); 971 if (error != 0) 972 return (NULL); 973 gp = NULL; 974 975 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 976 return (NULL); 977 if (md.md_version > G_STRIPE_VERSION) { 978 printf("geom_stripe.ko module is too old to handle %s.\n", 979 pp->name); 980 return (NULL); 981 } 982 /* 983 * Backward compatibility: 984 */ 985 /* There was no md_provider field in earlier versions of metadata. */ 986 if (md.md_version < 2) 987 bzero(md.md_provider, sizeof(md.md_provider)); 988 /* There was no md_provsize field in earlier versions of metadata. */ 989 if (md.md_version < 3) 990 md.md_provsize = pp->mediasize; 991 992 if (md.md_provider[0] != '\0' && 993 !g_compare_names(md.md_provider, pp->name)) 994 return (NULL); 995 if (md.md_provsize != pp->mediasize) 996 return (NULL); 997 998 /* 999 * Let's check if device already exists. 1000 */ 1001 sc = NULL; 1002 LIST_FOREACH(gp, &mp->geom, geom) { 1003 sc = gp->softc; 1004 if (sc == NULL) 1005 continue; 1006 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 1007 continue; 1008 if (strcmp(md.md_name, sc->sc_name) != 0) 1009 continue; 1010 if (md.md_id != sc->sc_id) 1011 continue; 1012 break; 1013 } 1014 if (gp != NULL) { 1015 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1016 error = g_stripe_add_disk(sc, pp, md.md_no); 1017 if (error != 0) { 1018 G_STRIPE_DEBUG(0, 1019 "Cannot add disk %s to %s (error=%d).", pp->name, 1020 gp->name, error); 1021 return (NULL); 1022 } 1023 } else { 1024 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1025 if (gp == NULL) { 1026 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1027 md.md_name); 1028 return (NULL); 1029 } 1030 sc = gp->softc; 1031 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1032 error = g_stripe_add_disk(sc, pp, md.md_no); 1033 if (error != 0) { 1034 G_STRIPE_DEBUG(0, 1035 "Cannot add disk %s to %s (error=%d).", pp->name, 1036 gp->name, error); 1037 g_stripe_destroy(sc, 1); 1038 return (NULL); 1039 } 1040 } 1041 1042 return (gp); 1043 } 1044 1045 static void 1046 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1047 { 1048 u_int attached, no; 1049 struct g_stripe_metadata md; 1050 struct g_provider *pp; 1051 struct g_stripe_softc *sc; 1052 struct g_geom *gp; 1053 struct sbuf *sb; 1054 off_t *stripesize; 1055 const char *name; 1056 char param[16]; 1057 int *nargs; 1058 1059 g_topology_assert(); 1060 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1061 if (nargs == NULL) { 1062 gctl_error(req, "No '%s' argument.", "nargs"); 1063 return; 1064 } 1065 if (*nargs <= 2) { 1066 gctl_error(req, "Too few arguments."); 1067 return; 1068 } 1069 1070 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1071 md.md_version = G_STRIPE_VERSION; 1072 name = gctl_get_asciiparam(req, "arg0"); 1073 if (name == NULL) { 1074 gctl_error(req, "No 'arg%u' argument.", 0); 1075 return; 1076 } 1077 strlcpy(md.md_name, name, sizeof(md.md_name)); 1078 md.md_id = arc4random(); 1079 md.md_no = 0; 1080 md.md_all = *nargs - 1; 1081 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1082 if (stripesize == NULL) { 1083 gctl_error(req, "No '%s' argument.", "stripesize"); 1084 return; 1085 } 1086 md.md_stripesize = (uint32_t)*stripesize; 1087 bzero(md.md_provider, sizeof(md.md_provider)); 1088 /* This field is not important here. */ 1089 md.md_provsize = 0; 1090 1091 /* Check all providers are valid */ 1092 for (no = 1; no < *nargs; no++) { 1093 snprintf(param, sizeof(param), "arg%u", no); 1094 name = gctl_get_asciiparam(req, param); 1095 if (name == NULL) { 1096 gctl_error(req, "No 'arg%u' argument.", no); 1097 return; 1098 } 1099 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1100 name += strlen("/dev/"); 1101 pp = g_provider_by_name(name); 1102 if (pp == NULL) { 1103 G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); 1104 gctl_error(req, "Disk %s is invalid.", name); 1105 return; 1106 } 1107 } 1108 1109 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1110 if (gp == NULL) { 1111 gctl_error(req, "Can't configure %s.", md.md_name); 1112 return; 1113 } 1114 1115 sc = gp->softc; 1116 sb = sbuf_new_auto(); 1117 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1118 for (attached = 0, no = 1; no < *nargs; no++) { 1119 snprintf(param, sizeof(param), "arg%u", no); 1120 name = gctl_get_asciiparam(req, param); 1121 if (name == NULL) { 1122 gctl_error(req, "No 'arg%u' argument.", no); 1123 continue; 1124 } 1125 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1126 name += strlen("/dev/"); 1127 pp = g_provider_by_name(name); 1128 KASSERT(pp != NULL, ("Provider %s disappear?!", name)); 1129 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1130 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1131 no, pp->name, gp->name); 1132 sbuf_printf(sb, " %s", pp->name); 1133 continue; 1134 } 1135 attached++; 1136 } 1137 sbuf_finish(sb); 1138 if (md.md_all != attached) { 1139 g_stripe_destroy(gp->softc, 1); 1140 gctl_error(req, "%s", sbuf_data(sb)); 1141 } 1142 sbuf_delete(sb); 1143 } 1144 1145 static struct g_stripe_softc * 1146 g_stripe_find_device(struct g_class *mp, const char *name) 1147 { 1148 struct g_stripe_softc *sc; 1149 struct g_geom *gp; 1150 1151 LIST_FOREACH(gp, &mp->geom, geom) { 1152 sc = gp->softc; 1153 if (sc == NULL) 1154 continue; 1155 if (strcmp(sc->sc_name, name) == 0) 1156 return (sc); 1157 } 1158 return (NULL); 1159 } 1160 1161 static void 1162 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1163 { 1164 struct g_stripe_softc *sc; 1165 int *force, *nargs, error; 1166 const char *name; 1167 char param[16]; 1168 u_int i; 1169 1170 g_topology_assert(); 1171 1172 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1173 if (nargs == NULL) { 1174 gctl_error(req, "No '%s' argument.", "nargs"); 1175 return; 1176 } 1177 if (*nargs <= 0) { 1178 gctl_error(req, "Missing device(s)."); 1179 return; 1180 } 1181 force = gctl_get_paraml(req, "force", sizeof(*force)); 1182 if (force == NULL) { 1183 gctl_error(req, "No '%s' argument.", "force"); 1184 return; 1185 } 1186 1187 for (i = 0; i < (u_int)*nargs; i++) { 1188 snprintf(param, sizeof(param), "arg%u", i); 1189 name = gctl_get_asciiparam(req, param); 1190 if (name == NULL) { 1191 gctl_error(req, "No 'arg%u' argument.", i); 1192 return; 1193 } 1194 sc = g_stripe_find_device(mp, name); 1195 if (sc == NULL) { 1196 gctl_error(req, "No such device: %s.", name); 1197 return; 1198 } 1199 error = g_stripe_destroy(sc, *force); 1200 if (error != 0) { 1201 gctl_error(req, "Cannot destroy device %s (error=%d).", 1202 sc->sc_name, error); 1203 return; 1204 } 1205 } 1206 } 1207 1208 static void 1209 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1210 { 1211 uint32_t *version; 1212 1213 g_topology_assert(); 1214 1215 version = gctl_get_paraml(req, "version", sizeof(*version)); 1216 if (version == NULL) { 1217 gctl_error(req, "No '%s' argument.", "version"); 1218 return; 1219 } 1220 if (*version != G_STRIPE_VERSION) { 1221 gctl_error(req, "Userland and kernel parts are out of sync."); 1222 return; 1223 } 1224 1225 if (strcmp(verb, "create") == 0) { 1226 g_stripe_ctl_create(req, mp); 1227 return; 1228 } else if (strcmp(verb, "destroy") == 0 || 1229 strcmp(verb, "stop") == 0) { 1230 g_stripe_ctl_destroy(req, mp); 1231 return; 1232 } 1233 1234 gctl_error(req, "Unknown verb."); 1235 } 1236 1237 static void 1238 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1239 struct g_consumer *cp, struct g_provider *pp) 1240 { 1241 struct g_stripe_softc *sc; 1242 1243 sc = gp->softc; 1244 if (sc == NULL) 1245 return; 1246 if (pp != NULL) { 1247 /* Nothing here. */ 1248 } else if (cp != NULL) { 1249 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1250 (u_int)cp->index); 1251 } else { 1252 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1253 sbuf_printf(sb, "%s<Stripesize>%ju</Stripesize>\n", indent, 1254 (uintmax_t)sc->sc_stripesize); 1255 sbuf_printf(sb, "%s<Type>", indent); 1256 switch (sc->sc_type) { 1257 case G_STRIPE_TYPE_AUTOMATIC: 1258 sbuf_cat(sb, "AUTOMATIC"); 1259 break; 1260 case G_STRIPE_TYPE_MANUAL: 1261 sbuf_cat(sb, "MANUAL"); 1262 break; 1263 default: 1264 sbuf_cat(sb, "UNKNOWN"); 1265 break; 1266 } 1267 sbuf_cat(sb, "</Type>\n"); 1268 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1269 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1270 sbuf_printf(sb, "%s<State>", indent); 1271 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1272 sbuf_cat(sb, "UP"); 1273 else 1274 sbuf_cat(sb, "DOWN"); 1275 sbuf_cat(sb, "</State>\n"); 1276 } 1277 } 1278 1279 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1280 MODULE_VERSION(geom_stripe, 0); 1281