1 /*- 2 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/module.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/bio.h> 37 #include <sys/sbuf.h> 38 #include <sys/sysctl.h> 39 #include <sys/malloc.h> 40 #include <vm/uma.h> 41 #include <geom/geom.h> 42 #include <geom/stripe/g_stripe.h> 43 44 FEATURE(geom_stripe, "GEOM striping support"); 45 46 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 47 48 static uma_zone_t g_stripe_zone; 49 50 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 51 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 52 struct g_geom *gp); 53 54 static g_taste_t g_stripe_taste; 55 static g_ctl_req_t g_stripe_config; 56 static g_dumpconf_t g_stripe_dumpconf; 57 static g_init_t g_stripe_init; 58 static g_fini_t g_stripe_fini; 59 60 struct g_class g_stripe_class = { 61 .name = G_STRIPE_CLASS_NAME, 62 .version = G_VERSION, 63 .ctlreq = g_stripe_config, 64 .taste = g_stripe_taste, 65 .destroy_geom = g_stripe_destroy_geom, 66 .init = g_stripe_init, 67 .fini = g_stripe_fini 68 }; 69 70 SYSCTL_DECL(_kern_geom); 71 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, 72 "GEOM_STRIPE stuff"); 73 static u_int g_stripe_debug = 0; 74 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 75 "Debug level"); 76 static int g_stripe_fast = 0; 77 static int 78 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) 79 { 80 int error, fast; 81 82 fast = g_stripe_fast; 83 error = sysctl_handle_int(oidp, &fast, 0, req); 84 if (error == 0 && req->newptr != NULL) 85 g_stripe_fast = fast; 86 return (error); 87 } 88 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN, 89 NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); 90 static u_int g_stripe_maxmem = MAXPHYS * 100; 91 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem, 92 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 93 static u_int g_stripe_fast_failed = 0; 94 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 95 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 96 97 /* 98 * Greatest Common Divisor. 99 */ 100 static u_int 101 gcd(u_int a, u_int b) 102 { 103 u_int c; 104 105 while (b != 0) { 106 c = a; 107 a = b; 108 b = (c % b); 109 } 110 return (a); 111 } 112 113 /* 114 * Least Common Multiple. 115 */ 116 static u_int 117 lcm(u_int a, u_int b) 118 { 119 120 return ((a * b) / gcd(a, b)); 121 } 122 123 static void 124 g_stripe_init(struct g_class *mp __unused) 125 { 126 127 g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, 128 NULL, NULL, 0, 0); 129 g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; 130 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); 131 } 132 133 static void 134 g_stripe_fini(struct g_class *mp __unused) 135 { 136 137 uma_zdestroy(g_stripe_zone); 138 } 139 140 /* 141 * Return the number of valid disks. 142 */ 143 static u_int 144 g_stripe_nvalid(struct g_stripe_softc *sc) 145 { 146 u_int i, no; 147 148 no = 0; 149 for (i = 0; i < sc->sc_ndisks; i++) { 150 if (sc->sc_disks[i] != NULL) 151 no++; 152 } 153 154 return (no); 155 } 156 157 static void 158 g_stripe_remove_disk(struct g_consumer *cp) 159 { 160 struct g_stripe_softc *sc; 161 162 g_topology_assert(); 163 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 164 sc = (struct g_stripe_softc *)cp->geom->softc; 165 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 166 167 if (cp->private == NULL) { 168 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 169 cp->provider->name, sc->sc_name); 170 cp->private = (void *)(uintptr_t)-1; 171 } 172 173 if (sc->sc_provider != NULL) { 174 G_STRIPE_DEBUG(0, "Device %s deactivated.", 175 sc->sc_provider->name); 176 g_wither_provider(sc->sc_provider, ENXIO); 177 sc->sc_provider = NULL; 178 } 179 180 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 181 return; 182 sc->sc_disks[cp->index] = NULL; 183 cp->index = 0; 184 g_detach(cp); 185 g_destroy_consumer(cp); 186 /* If there are no valid disks anymore, remove device. */ 187 if (LIST_EMPTY(&sc->sc_geom->consumer)) 188 g_stripe_destroy(sc, 1); 189 } 190 191 static void 192 g_stripe_orphan(struct g_consumer *cp) 193 { 194 struct g_stripe_softc *sc; 195 struct g_geom *gp; 196 197 g_topology_assert(); 198 gp = cp->geom; 199 sc = gp->softc; 200 if (sc == NULL) 201 return; 202 203 g_stripe_remove_disk(cp); 204 } 205 206 static int 207 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 208 { 209 struct g_consumer *cp1, *cp2, *tmp; 210 struct g_stripe_softc *sc; 211 struct g_geom *gp; 212 int error; 213 214 g_topology_assert(); 215 gp = pp->geom; 216 sc = gp->softc; 217 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 218 219 /* On first open, grab an extra "exclusive" bit */ 220 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 221 de++; 222 /* ... and let go of it on last close */ 223 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 224 de--; 225 226 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 227 error = g_access(cp1, dr, dw, de); 228 if (error != 0) 229 goto fail; 230 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 231 cp1->private != NULL) { 232 g_stripe_remove_disk(cp1); /* May destroy geom. */ 233 } 234 } 235 return (0); 236 237 fail: 238 LIST_FOREACH(cp2, &gp->consumer, consumer) { 239 if (cp1 == cp2) 240 break; 241 g_access(cp2, -dr, -dw, -de); 242 } 243 return (error); 244 } 245 246 static void 247 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 248 off_t length, int mode) 249 { 250 u_int stripesize; 251 size_t len; 252 253 stripesize = sc->sc_stripesize; 254 len = (size_t)(stripesize - (offset & (stripesize - 1))); 255 do { 256 bcopy(src, dst, len); 257 if (mode) { 258 dst += len + stripesize * (sc->sc_ndisks - 1); 259 src += len; 260 } else { 261 dst += len; 262 src += len + stripesize * (sc->sc_ndisks - 1); 263 } 264 length -= len; 265 KASSERT(length >= 0, 266 ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", 267 (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); 268 if (length > stripesize) 269 len = stripesize; 270 else 271 len = length; 272 } while (length > 0); 273 } 274 275 static void 276 g_stripe_done(struct bio *bp) 277 { 278 struct g_stripe_softc *sc; 279 struct bio *pbp; 280 281 pbp = bp->bio_parent; 282 sc = pbp->bio_to->geom->softc; 283 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 284 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 285 bp->bio_length, 1); 286 bp->bio_data = bp->bio_caller1; 287 bp->bio_caller1 = NULL; 288 } 289 mtx_lock(&sc->sc_lock); 290 if (pbp->bio_error == 0) 291 pbp->bio_error = bp->bio_error; 292 pbp->bio_completed += bp->bio_completed; 293 pbp->bio_inbed++; 294 if (pbp->bio_children == pbp->bio_inbed) { 295 mtx_unlock(&sc->sc_lock); 296 if (pbp->bio_driver1 != NULL) 297 uma_zfree(g_stripe_zone, pbp->bio_driver1); 298 g_io_deliver(pbp, pbp->bio_error); 299 } else 300 mtx_unlock(&sc->sc_lock); 301 g_destroy_bio(bp); 302 } 303 304 static int 305 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 306 { 307 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 308 u_int nparts = 0, stripesize; 309 struct g_stripe_softc *sc; 310 char *addr, *data = NULL; 311 struct bio *cbp; 312 int error; 313 314 sc = bp->bio_to->geom->softc; 315 316 addr = bp->bio_data; 317 stripesize = sc->sc_stripesize; 318 319 cbp = g_clone_bio(bp); 320 if (cbp == NULL) { 321 error = ENOMEM; 322 goto failure; 323 } 324 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 325 nparts++; 326 /* 327 * Fill in the component buf structure. 328 */ 329 cbp->bio_done = g_stripe_done; 330 cbp->bio_offset = offset; 331 cbp->bio_data = addr; 332 cbp->bio_caller1 = NULL; 333 cbp->bio_length = length; 334 cbp->bio_caller2 = sc->sc_disks[no]; 335 336 /* offset -= offset % stripesize; */ 337 offset -= offset & (stripesize - 1); 338 addr += length; 339 length = bp->bio_length - length; 340 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 341 if (no > sc->sc_ndisks - 1) { 342 no = 0; 343 offset += stripesize; 344 } 345 if (nparts >= sc->sc_ndisks) { 346 cbp = TAILQ_NEXT(cbp, bio_queue); 347 if (cbp == NULL) 348 cbp = TAILQ_FIRST(&queue); 349 nparts++; 350 /* 351 * Update bio structure. 352 */ 353 /* 354 * MIN() is in case when 355 * (bp->bio_length % sc->sc_stripesize) != 0. 356 */ 357 cbp->bio_length += MIN(stripesize, length); 358 if (cbp->bio_caller1 == NULL) { 359 cbp->bio_caller1 = cbp->bio_data; 360 cbp->bio_data = NULL; 361 if (data == NULL) { 362 data = uma_zalloc(g_stripe_zone, 363 M_NOWAIT); 364 if (data == NULL) { 365 error = ENOMEM; 366 goto failure; 367 } 368 } 369 } 370 } else { 371 cbp = g_clone_bio(bp); 372 if (cbp == NULL) { 373 error = ENOMEM; 374 goto failure; 375 } 376 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 377 nparts++; 378 /* 379 * Fill in the component buf structure. 380 */ 381 cbp->bio_done = g_stripe_done; 382 cbp->bio_offset = offset; 383 cbp->bio_data = addr; 384 cbp->bio_caller1 = NULL; 385 /* 386 * MIN() is in case when 387 * (bp->bio_length % sc->sc_stripesize) != 0. 388 */ 389 cbp->bio_length = MIN(stripesize, length); 390 cbp->bio_caller2 = sc->sc_disks[no]; 391 } 392 } 393 if (data != NULL) 394 bp->bio_driver1 = data; 395 /* 396 * Fire off all allocated requests! 397 */ 398 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 399 struct g_consumer *cp; 400 401 TAILQ_REMOVE(&queue, cbp, bio_queue); 402 cp = cbp->bio_caller2; 403 cbp->bio_caller2 = NULL; 404 cbp->bio_to = cp->provider; 405 if (cbp->bio_caller1 != NULL) { 406 cbp->bio_data = data; 407 if (bp->bio_cmd == BIO_WRITE) { 408 g_stripe_copy(sc, cbp->bio_caller1, data, 409 cbp->bio_offset, cbp->bio_length, 0); 410 } 411 data += cbp->bio_length; 412 } 413 G_STRIPE_LOGREQ(cbp, "Sending request."); 414 g_io_request(cbp, cp); 415 } 416 return (0); 417 failure: 418 if (data != NULL) 419 uma_zfree(g_stripe_zone, data); 420 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 421 TAILQ_REMOVE(&queue, cbp, bio_queue); 422 if (cbp->bio_caller1 != NULL) { 423 cbp->bio_data = cbp->bio_caller1; 424 cbp->bio_caller1 = NULL; 425 } 426 bp->bio_children--; 427 g_destroy_bio(cbp); 428 } 429 return (error); 430 } 431 432 static int 433 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 434 { 435 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 436 struct g_stripe_softc *sc; 437 uint32_t stripesize; 438 struct bio *cbp; 439 char *addr; 440 int error; 441 442 sc = bp->bio_to->geom->softc; 443 444 stripesize = sc->sc_stripesize; 445 446 cbp = g_clone_bio(bp); 447 if (cbp == NULL) { 448 error = ENOMEM; 449 goto failure; 450 } 451 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 452 /* 453 * Fill in the component buf structure. 454 */ 455 if (bp->bio_length == length) 456 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 457 else 458 cbp->bio_done = g_stripe_done; 459 cbp->bio_offset = offset; 460 cbp->bio_length = length; 461 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 462 bp->bio_ma_n = round_page(bp->bio_ma_offset + 463 bp->bio_length) / PAGE_SIZE; 464 addr = NULL; 465 } else 466 addr = bp->bio_data; 467 cbp->bio_caller2 = sc->sc_disks[no]; 468 469 /* offset -= offset % stripesize; */ 470 offset -= offset & (stripesize - 1); 471 if (bp->bio_cmd != BIO_DELETE) 472 addr += length; 473 length = bp->bio_length - length; 474 for (no++; length > 0; no++, length -= stripesize) { 475 if (no > sc->sc_ndisks - 1) { 476 no = 0; 477 offset += stripesize; 478 } 479 cbp = g_clone_bio(bp); 480 if (cbp == NULL) { 481 error = ENOMEM; 482 goto failure; 483 } 484 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 485 486 /* 487 * Fill in the component buf structure. 488 */ 489 cbp->bio_done = g_stripe_done; 490 cbp->bio_offset = offset; 491 /* 492 * MIN() is in case when 493 * (bp->bio_length % sc->sc_stripesize) != 0. 494 */ 495 cbp->bio_length = MIN(stripesize, length); 496 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 497 cbp->bio_ma_offset += (uintptr_t)addr; 498 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 499 cbp->bio_ma_offset %= PAGE_SIZE; 500 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 501 cbp->bio_length) / PAGE_SIZE; 502 } else 503 cbp->bio_data = addr; 504 505 cbp->bio_caller2 = sc->sc_disks[no]; 506 507 if (bp->bio_cmd != BIO_DELETE) 508 addr += stripesize; 509 } 510 /* 511 * Fire off all allocated requests! 512 */ 513 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 514 struct g_consumer *cp; 515 516 TAILQ_REMOVE(&queue, cbp, bio_queue); 517 cp = cbp->bio_caller2; 518 cbp->bio_caller2 = NULL; 519 cbp->bio_to = cp->provider; 520 G_STRIPE_LOGREQ(cbp, "Sending request."); 521 g_io_request(cbp, cp); 522 } 523 return (0); 524 failure: 525 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 526 TAILQ_REMOVE(&queue, cbp, bio_queue); 527 bp->bio_children--; 528 g_destroy_bio(cbp); 529 } 530 return (error); 531 } 532 533 static void 534 g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) 535 { 536 struct bio_queue_head queue; 537 struct g_consumer *cp; 538 struct bio *cbp; 539 u_int no; 540 541 bioq_init(&queue); 542 for (no = 0; no < sc->sc_ndisks; no++) { 543 cbp = g_clone_bio(bp); 544 if (cbp == NULL) { 545 for (cbp = bioq_first(&queue); cbp != NULL; 546 cbp = bioq_first(&queue)) { 547 bioq_remove(&queue, cbp); 548 g_destroy_bio(cbp); 549 } 550 if (bp->bio_error == 0) 551 bp->bio_error = ENOMEM; 552 g_io_deliver(bp, bp->bio_error); 553 return; 554 } 555 bioq_insert_tail(&queue, cbp); 556 cbp->bio_done = g_stripe_done; 557 cbp->bio_caller2 = sc->sc_disks[no]; 558 cbp->bio_to = sc->sc_disks[no]->provider; 559 } 560 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 561 bioq_remove(&queue, cbp); 562 G_STRIPE_LOGREQ(cbp, "Sending request."); 563 cp = cbp->bio_caller2; 564 cbp->bio_caller2 = NULL; 565 g_io_request(cbp, cp); 566 } 567 } 568 569 static void 570 g_stripe_start(struct bio *bp) 571 { 572 off_t offset, start, length, nstripe; 573 struct g_stripe_softc *sc; 574 u_int no, stripesize; 575 int error, fast = 0; 576 577 sc = bp->bio_to->geom->softc; 578 /* 579 * If sc == NULL, provider's error should be set and g_stripe_start() 580 * should not be called at all. 581 */ 582 KASSERT(sc != NULL, 583 ("Provider's error should be set (error=%d)(device=%s).", 584 bp->bio_to->error, bp->bio_to->name)); 585 586 G_STRIPE_LOGREQ(bp, "Request received."); 587 588 switch (bp->bio_cmd) { 589 case BIO_READ: 590 case BIO_WRITE: 591 case BIO_DELETE: 592 break; 593 case BIO_FLUSH: 594 g_stripe_flush(sc, bp); 595 return; 596 case BIO_GETATTR: 597 /* To which provider it should be delivered? */ 598 default: 599 g_io_deliver(bp, EOPNOTSUPP); 600 return; 601 } 602 603 stripesize = sc->sc_stripesize; 604 605 /* 606 * Calculations are quite messy, but fast I hope. 607 */ 608 609 /* Stripe number. */ 610 /* nstripe = bp->bio_offset / stripesize; */ 611 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 612 /* Disk number. */ 613 no = nstripe % sc->sc_ndisks; 614 /* Start position in stripe. */ 615 /* start = bp->bio_offset % stripesize; */ 616 start = bp->bio_offset & (stripesize - 1); 617 /* Start position in disk. */ 618 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 619 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 620 /* Length of data to operate. */ 621 length = MIN(bp->bio_length, stripesize - start); 622 623 /* 624 * Do use "fast" mode when: 625 * 1. "Fast" mode is ON. 626 * and 627 * 2. Request size is less than or equal to MAXPHYS, 628 * which should always be true. 629 * and 630 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 631 * there will be no need to send more than one I/O request to 632 * a provider, so there is nothing to optmize. 633 * and 634 * 4. Request is not unmapped. 635 * and 636 * 5. It is not a BIO_DELETE. 637 */ 638 if (g_stripe_fast && bp->bio_length <= MAXPHYS && 639 bp->bio_length >= stripesize * sc->sc_ndisks && 640 (bp->bio_flags & BIO_UNMAPPED) == 0 && 641 bp->bio_cmd != BIO_DELETE) { 642 fast = 1; 643 } 644 error = 0; 645 if (fast) { 646 error = g_stripe_start_fast(bp, no, offset, length); 647 if (error != 0) 648 g_stripe_fast_failed++; 649 } 650 /* 651 * Do use "economic" when: 652 * 1. "Economic" mode is ON. 653 * or 654 * 2. "Fast" mode failed. It can only fail if there is no memory. 655 */ 656 if (!fast || error != 0) 657 error = g_stripe_start_economic(bp, no, offset, length); 658 if (error != 0) { 659 if (bp->bio_error == 0) 660 bp->bio_error = error; 661 g_io_deliver(bp, bp->bio_error); 662 } 663 } 664 665 static void 666 g_stripe_check_and_run(struct g_stripe_softc *sc) 667 { 668 struct g_provider *dp; 669 off_t mediasize, ms; 670 u_int no, sectorsize = 0; 671 672 g_topology_assert(); 673 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 674 return; 675 676 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 677 sc->sc_name); 678 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 679 if (g_stripe_fast == 0) 680 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 681 /* 682 * Find the smallest disk. 683 */ 684 mediasize = sc->sc_disks[0]->provider->mediasize; 685 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 686 mediasize -= sc->sc_disks[0]->provider->sectorsize; 687 mediasize -= mediasize % sc->sc_stripesize; 688 sectorsize = sc->sc_disks[0]->provider->sectorsize; 689 for (no = 1; no < sc->sc_ndisks; no++) { 690 dp = sc->sc_disks[no]->provider; 691 ms = dp->mediasize; 692 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 693 ms -= dp->sectorsize; 694 ms -= ms % sc->sc_stripesize; 695 if (ms < mediasize) 696 mediasize = ms; 697 sectorsize = lcm(sectorsize, dp->sectorsize); 698 699 /* A provider underneath us doesn't support unmapped */ 700 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 701 G_STRIPE_DEBUG(1, "Cancelling unmapped " 702 "because of %s.", dp->name); 703 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 704 } 705 } 706 sc->sc_provider->sectorsize = sectorsize; 707 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 708 sc->sc_provider->stripesize = sc->sc_stripesize; 709 sc->sc_provider->stripeoffset = 0; 710 g_error_provider(sc->sc_provider, 0); 711 712 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 713 } 714 715 static int 716 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 717 { 718 struct g_provider *pp; 719 u_char *buf; 720 int error; 721 722 g_topology_assert(); 723 724 error = g_access(cp, 1, 0, 0); 725 if (error != 0) 726 return (error); 727 pp = cp->provider; 728 g_topology_unlock(); 729 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 730 &error); 731 g_topology_lock(); 732 g_access(cp, -1, 0, 0); 733 if (buf == NULL) 734 return (error); 735 736 /* Decode metadata. */ 737 stripe_metadata_decode(buf, md); 738 g_free(buf); 739 740 return (0); 741 } 742 743 /* 744 * Add disk to given device. 745 */ 746 static int 747 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 748 { 749 struct g_consumer *cp, *fcp; 750 struct g_geom *gp; 751 int error; 752 753 g_topology_assert(); 754 /* Metadata corrupted? */ 755 if (no >= sc->sc_ndisks) 756 return (EINVAL); 757 758 /* Check if disk is not already attached. */ 759 if (sc->sc_disks[no] != NULL) 760 return (EEXIST); 761 762 gp = sc->sc_geom; 763 fcp = LIST_FIRST(&gp->consumer); 764 765 cp = g_new_consumer(gp); 766 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 767 cp->private = NULL; 768 cp->index = no; 769 error = g_attach(cp, pp); 770 if (error != 0) { 771 g_destroy_consumer(cp); 772 return (error); 773 } 774 775 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 776 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 777 if (error != 0) { 778 g_detach(cp); 779 g_destroy_consumer(cp); 780 return (error); 781 } 782 } 783 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 784 struct g_stripe_metadata md; 785 786 /* Reread metadata. */ 787 error = g_stripe_read_metadata(cp, &md); 788 if (error != 0) 789 goto fail; 790 791 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 792 strcmp(md.md_name, sc->sc_name) != 0 || 793 md.md_id != sc->sc_id) { 794 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 795 goto fail; 796 } 797 } 798 799 sc->sc_disks[no] = cp; 800 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 801 g_stripe_check_and_run(sc); 802 803 return (0); 804 fail: 805 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 806 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 807 g_detach(cp); 808 g_destroy_consumer(cp); 809 return (error); 810 } 811 812 static struct g_geom * 813 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 814 u_int type) 815 { 816 struct g_stripe_softc *sc; 817 struct g_geom *gp; 818 u_int no; 819 820 g_topology_assert(); 821 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 822 md->md_id); 823 824 /* Two disks is minimum. */ 825 if (md->md_all < 2) { 826 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 827 return (NULL); 828 } 829 #if 0 830 /* Stripe size have to be grater than or equal to sector size. */ 831 if (md->md_stripesize < sectorsize) { 832 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 833 return (NULL); 834 } 835 #endif 836 /* Stripe size have to be power of 2. */ 837 if (!powerof2(md->md_stripesize)) { 838 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 839 return (NULL); 840 } 841 842 /* Check for duplicate unit */ 843 LIST_FOREACH(gp, &mp->geom, geom) { 844 sc = gp->softc; 845 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 846 G_STRIPE_DEBUG(0, "Device %s already configured.", 847 sc->sc_name); 848 return (NULL); 849 } 850 } 851 gp = g_new_geomf(mp, "%s", md->md_name); 852 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 853 gp->start = g_stripe_start; 854 gp->spoiled = g_stripe_orphan; 855 gp->orphan = g_stripe_orphan; 856 gp->access = g_stripe_access; 857 gp->dumpconf = g_stripe_dumpconf; 858 859 sc->sc_id = md->md_id; 860 sc->sc_stripesize = md->md_stripesize; 861 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 862 sc->sc_ndisks = md->md_all; 863 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 864 M_STRIPE, M_WAITOK | M_ZERO); 865 for (no = 0; no < sc->sc_ndisks; no++) 866 sc->sc_disks[no] = NULL; 867 sc->sc_type = type; 868 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 869 870 gp->softc = sc; 871 sc->sc_geom = gp; 872 sc->sc_provider = NULL; 873 874 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 875 876 return (gp); 877 } 878 879 static int 880 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 881 { 882 struct g_provider *pp; 883 struct g_consumer *cp, *cp1; 884 struct g_geom *gp; 885 886 g_topology_assert(); 887 888 if (sc == NULL) 889 return (ENXIO); 890 891 pp = sc->sc_provider; 892 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 893 if (force) { 894 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 895 "can't be definitely removed.", pp->name); 896 } else { 897 G_STRIPE_DEBUG(1, 898 "Device %s is still open (r%dw%de%d).", pp->name, 899 pp->acr, pp->acw, pp->ace); 900 return (EBUSY); 901 } 902 } 903 904 gp = sc->sc_geom; 905 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 906 g_stripe_remove_disk(cp); 907 if (cp1 == NULL) 908 return (0); /* Recursion happened. */ 909 } 910 if (!LIST_EMPTY(&gp->consumer)) 911 return (EINPROGRESS); 912 913 gp->softc = NULL; 914 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 915 gp->name)); 916 free(sc->sc_disks, M_STRIPE); 917 mtx_destroy(&sc->sc_lock); 918 free(sc, M_STRIPE); 919 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 920 g_wither_geom(gp, ENXIO); 921 return (0); 922 } 923 924 static int 925 g_stripe_destroy_geom(struct gctl_req *req __unused, 926 struct g_class *mp __unused, struct g_geom *gp) 927 { 928 struct g_stripe_softc *sc; 929 930 sc = gp->softc; 931 return (g_stripe_destroy(sc, 0)); 932 } 933 934 static struct g_geom * 935 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 936 { 937 struct g_stripe_metadata md; 938 struct g_stripe_softc *sc; 939 struct g_consumer *cp; 940 struct g_geom *gp; 941 int error; 942 943 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 944 g_topology_assert(); 945 946 /* Skip providers that are already open for writing. */ 947 if (pp->acw > 0) 948 return (NULL); 949 950 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 951 952 gp = g_new_geomf(mp, "stripe:taste"); 953 gp->start = g_stripe_start; 954 gp->access = g_stripe_access; 955 gp->orphan = g_stripe_orphan; 956 cp = g_new_consumer(gp); 957 g_attach(cp, pp); 958 error = g_stripe_read_metadata(cp, &md); 959 g_detach(cp); 960 g_destroy_consumer(cp); 961 g_destroy_geom(gp); 962 if (error != 0) 963 return (NULL); 964 gp = NULL; 965 966 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 967 return (NULL); 968 if (md.md_version > G_STRIPE_VERSION) { 969 printf("geom_stripe.ko module is too old to handle %s.\n", 970 pp->name); 971 return (NULL); 972 } 973 /* 974 * Backward compatibility: 975 */ 976 /* There was no md_provider field in earlier versions of metadata. */ 977 if (md.md_version < 2) 978 bzero(md.md_provider, sizeof(md.md_provider)); 979 /* There was no md_provsize field in earlier versions of metadata. */ 980 if (md.md_version < 3) 981 md.md_provsize = pp->mediasize; 982 983 if (md.md_provider[0] != '\0' && 984 !g_compare_names(md.md_provider, pp->name)) 985 return (NULL); 986 if (md.md_provsize != pp->mediasize) 987 return (NULL); 988 989 /* 990 * Let's check if device already exists. 991 */ 992 sc = NULL; 993 LIST_FOREACH(gp, &mp->geom, geom) { 994 sc = gp->softc; 995 if (sc == NULL) 996 continue; 997 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 998 continue; 999 if (strcmp(md.md_name, sc->sc_name) != 0) 1000 continue; 1001 if (md.md_id != sc->sc_id) 1002 continue; 1003 break; 1004 } 1005 if (gp != NULL) { 1006 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1007 error = g_stripe_add_disk(sc, pp, md.md_no); 1008 if (error != 0) { 1009 G_STRIPE_DEBUG(0, 1010 "Cannot add disk %s to %s (error=%d).", pp->name, 1011 gp->name, error); 1012 return (NULL); 1013 } 1014 } else { 1015 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1016 if (gp == NULL) { 1017 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1018 md.md_name); 1019 return (NULL); 1020 } 1021 sc = gp->softc; 1022 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1023 error = g_stripe_add_disk(sc, pp, md.md_no); 1024 if (error != 0) { 1025 G_STRIPE_DEBUG(0, 1026 "Cannot add disk %s to %s (error=%d).", pp->name, 1027 gp->name, error); 1028 g_stripe_destroy(sc, 1); 1029 return (NULL); 1030 } 1031 } 1032 1033 return (gp); 1034 } 1035 1036 static void 1037 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1038 { 1039 u_int attached, no; 1040 struct g_stripe_metadata md; 1041 struct g_provider *pp; 1042 struct g_stripe_softc *sc; 1043 struct g_geom *gp; 1044 struct sbuf *sb; 1045 intmax_t *stripesize; 1046 const char *name; 1047 char param[16]; 1048 int *nargs; 1049 1050 g_topology_assert(); 1051 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1052 if (nargs == NULL) { 1053 gctl_error(req, "No '%s' argument.", "nargs"); 1054 return; 1055 } 1056 if (*nargs <= 2) { 1057 gctl_error(req, "Too few arguments."); 1058 return; 1059 } 1060 1061 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1062 md.md_version = G_STRIPE_VERSION; 1063 name = gctl_get_asciiparam(req, "arg0"); 1064 if (name == NULL) { 1065 gctl_error(req, "No 'arg%u' argument.", 0); 1066 return; 1067 } 1068 strlcpy(md.md_name, name, sizeof(md.md_name)); 1069 md.md_id = arc4random(); 1070 md.md_no = 0; 1071 md.md_all = *nargs - 1; 1072 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1073 if (stripesize == NULL) { 1074 gctl_error(req, "No '%s' argument.", "stripesize"); 1075 return; 1076 } 1077 md.md_stripesize = *stripesize; 1078 bzero(md.md_provider, sizeof(md.md_provider)); 1079 /* This field is not important here. */ 1080 md.md_provsize = 0; 1081 1082 /* Check all providers are valid */ 1083 for (no = 1; no < *nargs; no++) { 1084 snprintf(param, sizeof(param), "arg%u", no); 1085 name = gctl_get_asciiparam(req, param); 1086 if (name == NULL) { 1087 gctl_error(req, "No 'arg%u' argument.", no); 1088 return; 1089 } 1090 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1091 name += strlen("/dev/"); 1092 pp = g_provider_by_name(name); 1093 if (pp == NULL) { 1094 G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); 1095 gctl_error(req, "Disk %s is invalid.", name); 1096 return; 1097 } 1098 } 1099 1100 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1101 if (gp == NULL) { 1102 gctl_error(req, "Can't configure %s.", md.md_name); 1103 return; 1104 } 1105 1106 sc = gp->softc; 1107 sb = sbuf_new_auto(); 1108 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1109 for (attached = 0, no = 1; no < *nargs; no++) { 1110 snprintf(param, sizeof(param), "arg%u", no); 1111 name = gctl_get_asciiparam(req, param); 1112 if (name == NULL) { 1113 gctl_error(req, "No 'arg%u' argument.", no); 1114 continue; 1115 } 1116 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1117 name += strlen("/dev/"); 1118 pp = g_provider_by_name(name); 1119 KASSERT(pp != NULL, ("Provider %s disappear?!", name)); 1120 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1121 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1122 no, pp->name, gp->name); 1123 sbuf_printf(sb, " %s", pp->name); 1124 continue; 1125 } 1126 attached++; 1127 } 1128 sbuf_finish(sb); 1129 if (md.md_all != attached) { 1130 g_stripe_destroy(gp->softc, 1); 1131 gctl_error(req, "%s", sbuf_data(sb)); 1132 } 1133 sbuf_delete(sb); 1134 } 1135 1136 static struct g_stripe_softc * 1137 g_stripe_find_device(struct g_class *mp, const char *name) 1138 { 1139 struct g_stripe_softc *sc; 1140 struct g_geom *gp; 1141 1142 LIST_FOREACH(gp, &mp->geom, geom) { 1143 sc = gp->softc; 1144 if (sc == NULL) 1145 continue; 1146 if (strcmp(sc->sc_name, name) == 0) 1147 return (sc); 1148 } 1149 return (NULL); 1150 } 1151 1152 static void 1153 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1154 { 1155 struct g_stripe_softc *sc; 1156 int *force, *nargs, error; 1157 const char *name; 1158 char param[16]; 1159 u_int i; 1160 1161 g_topology_assert(); 1162 1163 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1164 if (nargs == NULL) { 1165 gctl_error(req, "No '%s' argument.", "nargs"); 1166 return; 1167 } 1168 if (*nargs <= 0) { 1169 gctl_error(req, "Missing device(s)."); 1170 return; 1171 } 1172 force = gctl_get_paraml(req, "force", sizeof(*force)); 1173 if (force == NULL) { 1174 gctl_error(req, "No '%s' argument.", "force"); 1175 return; 1176 } 1177 1178 for (i = 0; i < (u_int)*nargs; i++) { 1179 snprintf(param, sizeof(param), "arg%u", i); 1180 name = gctl_get_asciiparam(req, param); 1181 if (name == NULL) { 1182 gctl_error(req, "No 'arg%u' argument.", i); 1183 return; 1184 } 1185 sc = g_stripe_find_device(mp, name); 1186 if (sc == NULL) { 1187 gctl_error(req, "No such device: %s.", name); 1188 return; 1189 } 1190 error = g_stripe_destroy(sc, *force); 1191 if (error != 0) { 1192 gctl_error(req, "Cannot destroy device %s (error=%d).", 1193 sc->sc_name, error); 1194 return; 1195 } 1196 } 1197 } 1198 1199 static void 1200 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1201 { 1202 uint32_t *version; 1203 1204 g_topology_assert(); 1205 1206 version = gctl_get_paraml(req, "version", sizeof(*version)); 1207 if (version == NULL) { 1208 gctl_error(req, "No '%s' argument.", "version"); 1209 return; 1210 } 1211 if (*version != G_STRIPE_VERSION) { 1212 gctl_error(req, "Userland and kernel parts are out of sync."); 1213 return; 1214 } 1215 1216 if (strcmp(verb, "create") == 0) { 1217 g_stripe_ctl_create(req, mp); 1218 return; 1219 } else if (strcmp(verb, "destroy") == 0 || 1220 strcmp(verb, "stop") == 0) { 1221 g_stripe_ctl_destroy(req, mp); 1222 return; 1223 } 1224 1225 gctl_error(req, "Unknown verb."); 1226 } 1227 1228 static void 1229 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1230 struct g_consumer *cp, struct g_provider *pp) 1231 { 1232 struct g_stripe_softc *sc; 1233 1234 sc = gp->softc; 1235 if (sc == NULL) 1236 return; 1237 if (pp != NULL) { 1238 /* Nothing here. */ 1239 } else if (cp != NULL) { 1240 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1241 (u_int)cp->index); 1242 } else { 1243 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1244 sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent, 1245 (u_int)sc->sc_stripesize); 1246 sbuf_printf(sb, "%s<Type>", indent); 1247 switch (sc->sc_type) { 1248 case G_STRIPE_TYPE_AUTOMATIC: 1249 sbuf_printf(sb, "AUTOMATIC"); 1250 break; 1251 case G_STRIPE_TYPE_MANUAL: 1252 sbuf_printf(sb, "MANUAL"); 1253 break; 1254 default: 1255 sbuf_printf(sb, "UNKNOWN"); 1256 break; 1257 } 1258 sbuf_printf(sb, "</Type>\n"); 1259 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1260 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1261 sbuf_printf(sb, "%s<State>", indent); 1262 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1263 sbuf_printf(sb, "UP"); 1264 else 1265 sbuf_printf(sb, "DOWN"); 1266 sbuf_printf(sb, "</State>\n"); 1267 } 1268 } 1269 1270 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1271