1 /*- 2 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/module.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/bio.h> 37 #include <sys/sbuf.h> 38 #include <sys/sysctl.h> 39 #include <sys/malloc.h> 40 #include <vm/uma.h> 41 #include <geom/geom.h> 42 #include <geom/stripe/g_stripe.h> 43 44 FEATURE(geom_stripe, "GEOM striping support"); 45 46 static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); 47 48 static uma_zone_t g_stripe_zone; 49 50 static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); 51 static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, 52 struct g_geom *gp); 53 54 static g_taste_t g_stripe_taste; 55 static g_ctl_req_t g_stripe_config; 56 static g_dumpconf_t g_stripe_dumpconf; 57 static g_init_t g_stripe_init; 58 static g_fini_t g_stripe_fini; 59 60 struct g_class g_stripe_class = { 61 .name = G_STRIPE_CLASS_NAME, 62 .version = G_VERSION, 63 .ctlreq = g_stripe_config, 64 .taste = g_stripe_taste, 65 .destroy_geom = g_stripe_destroy_geom, 66 .init = g_stripe_init, 67 .fini = g_stripe_fini 68 }; 69 70 SYSCTL_DECL(_kern_geom); 71 static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, 72 "GEOM_STRIPE stuff"); 73 static u_int g_stripe_debug = 0; 74 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, 75 "Debug level"); 76 static int g_stripe_fast = 0; 77 static int 78 g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) 79 { 80 int error, fast; 81 82 fast = g_stripe_fast; 83 error = sysctl_handle_int(oidp, &fast, 0, req); 84 if (error == 0 && req->newptr != NULL) 85 g_stripe_fast = fast; 86 return (error); 87 } 88 SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN, 89 NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); 90 static u_int g_stripe_maxmem = MAXPHYS * 100; 91 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem, 92 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); 93 static u_int g_stripe_fast_failed = 0; 94 SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, 95 &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); 96 97 /* 98 * Greatest Common Divisor. 99 */ 100 static u_int 101 gcd(u_int a, u_int b) 102 { 103 u_int c; 104 105 while (b != 0) { 106 c = a; 107 a = b; 108 b = (c % b); 109 } 110 return (a); 111 } 112 113 /* 114 * Least Common Multiple. 115 */ 116 static u_int 117 lcm(u_int a, u_int b) 118 { 119 120 return ((a * b) / gcd(a, b)); 121 } 122 123 static void 124 g_stripe_init(struct g_class *mp __unused) 125 { 126 127 g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, 128 NULL, NULL, 0, 0); 129 g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; 130 uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); 131 } 132 133 static void 134 g_stripe_fini(struct g_class *mp __unused) 135 { 136 137 uma_zdestroy(g_stripe_zone); 138 } 139 140 /* 141 * Return the number of valid disks. 142 */ 143 static u_int 144 g_stripe_nvalid(struct g_stripe_softc *sc) 145 { 146 u_int i, no; 147 148 no = 0; 149 for (i = 0; i < sc->sc_ndisks; i++) { 150 if (sc->sc_disks[i] != NULL) 151 no++; 152 } 153 154 return (no); 155 } 156 157 static void 158 g_stripe_remove_disk(struct g_consumer *cp) 159 { 160 struct g_stripe_softc *sc; 161 162 g_topology_assert(); 163 KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); 164 sc = (struct g_stripe_softc *)cp->geom->softc; 165 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 166 167 if (cp->private == NULL) { 168 G_STRIPE_DEBUG(0, "Disk %s removed from %s.", 169 cp->provider->name, sc->sc_name); 170 cp->private = (void *)(uintptr_t)-1; 171 } 172 173 if (sc->sc_provider != NULL) { 174 sc->sc_provider->flags |= G_PF_WITHER; 175 G_STRIPE_DEBUG(0, "Device %s deactivated.", 176 sc->sc_provider->name); 177 g_orphan_provider(sc->sc_provider, ENXIO); 178 sc->sc_provider = NULL; 179 } 180 181 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 182 return; 183 sc->sc_disks[cp->index] = NULL; 184 cp->index = 0; 185 g_detach(cp); 186 g_destroy_consumer(cp); 187 /* If there are no valid disks anymore, remove device. */ 188 if (LIST_EMPTY(&sc->sc_geom->consumer)) 189 g_stripe_destroy(sc, 1); 190 } 191 192 static void 193 g_stripe_orphan(struct g_consumer *cp) 194 { 195 struct g_stripe_softc *sc; 196 struct g_geom *gp; 197 198 g_topology_assert(); 199 gp = cp->geom; 200 sc = gp->softc; 201 if (sc == NULL) 202 return; 203 204 g_stripe_remove_disk(cp); 205 } 206 207 static int 208 g_stripe_access(struct g_provider *pp, int dr, int dw, int de) 209 { 210 struct g_consumer *cp1, *cp2, *tmp; 211 struct g_stripe_softc *sc; 212 struct g_geom *gp; 213 int error; 214 215 g_topology_assert(); 216 gp = pp->geom; 217 sc = gp->softc; 218 KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); 219 220 /* On first open, grab an extra "exclusive" bit */ 221 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 222 de++; 223 /* ... and let go of it on last close */ 224 if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) 225 de--; 226 227 LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { 228 error = g_access(cp1, dr, dw, de); 229 if (error != 0) 230 goto fail; 231 if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && 232 cp1->private != NULL) { 233 g_stripe_remove_disk(cp1); /* May destroy geom. */ 234 } 235 } 236 return (0); 237 238 fail: 239 LIST_FOREACH(cp2, &gp->consumer, consumer) { 240 if (cp1 == cp2) 241 break; 242 g_access(cp2, -dr, -dw, -de); 243 } 244 return (error); 245 } 246 247 static void 248 g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, 249 off_t length, int mode) 250 { 251 u_int stripesize; 252 size_t len; 253 254 stripesize = sc->sc_stripesize; 255 len = (size_t)(stripesize - (offset & (stripesize - 1))); 256 do { 257 bcopy(src, dst, len); 258 if (mode) { 259 dst += len + stripesize * (sc->sc_ndisks - 1); 260 src += len; 261 } else { 262 dst += len; 263 src += len + stripesize * (sc->sc_ndisks - 1); 264 } 265 length -= len; 266 KASSERT(length >= 0, 267 ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", 268 (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); 269 if (length > stripesize) 270 len = stripesize; 271 else 272 len = length; 273 } while (length > 0); 274 } 275 276 static void 277 g_stripe_done(struct bio *bp) 278 { 279 struct g_stripe_softc *sc; 280 struct bio *pbp; 281 282 pbp = bp->bio_parent; 283 sc = pbp->bio_to->geom->softc; 284 if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { 285 g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, 286 bp->bio_length, 1); 287 bp->bio_data = bp->bio_caller1; 288 bp->bio_caller1 = NULL; 289 } 290 mtx_lock(&sc->sc_lock); 291 if (pbp->bio_error == 0) 292 pbp->bio_error = bp->bio_error; 293 pbp->bio_completed += bp->bio_completed; 294 pbp->bio_inbed++; 295 if (pbp->bio_children == pbp->bio_inbed) { 296 mtx_unlock(&sc->sc_lock); 297 if (pbp->bio_driver1 != NULL) 298 uma_zfree(g_stripe_zone, pbp->bio_driver1); 299 g_io_deliver(pbp, pbp->bio_error); 300 } else 301 mtx_unlock(&sc->sc_lock); 302 g_destroy_bio(bp); 303 } 304 305 static int 306 g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) 307 { 308 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 309 u_int nparts = 0, stripesize; 310 struct g_stripe_softc *sc; 311 char *addr, *data = NULL; 312 struct bio *cbp; 313 int error; 314 315 sc = bp->bio_to->geom->softc; 316 317 addr = bp->bio_data; 318 stripesize = sc->sc_stripesize; 319 320 cbp = g_clone_bio(bp); 321 if (cbp == NULL) { 322 error = ENOMEM; 323 goto failure; 324 } 325 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 326 nparts++; 327 /* 328 * Fill in the component buf structure. 329 */ 330 cbp->bio_done = g_stripe_done; 331 cbp->bio_offset = offset; 332 cbp->bio_data = addr; 333 cbp->bio_caller1 = NULL; 334 cbp->bio_length = length; 335 cbp->bio_caller2 = sc->sc_disks[no]; 336 337 /* offset -= offset % stripesize; */ 338 offset -= offset & (stripesize - 1); 339 addr += length; 340 length = bp->bio_length - length; 341 for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { 342 if (no > sc->sc_ndisks - 1) { 343 no = 0; 344 offset += stripesize; 345 } 346 if (nparts >= sc->sc_ndisks) { 347 cbp = TAILQ_NEXT(cbp, bio_queue); 348 if (cbp == NULL) 349 cbp = TAILQ_FIRST(&queue); 350 nparts++; 351 /* 352 * Update bio structure. 353 */ 354 /* 355 * MIN() is in case when 356 * (bp->bio_length % sc->sc_stripesize) != 0. 357 */ 358 cbp->bio_length += MIN(stripesize, length); 359 if (cbp->bio_caller1 == NULL) { 360 cbp->bio_caller1 = cbp->bio_data; 361 cbp->bio_data = NULL; 362 if (data == NULL) { 363 data = uma_zalloc(g_stripe_zone, 364 M_NOWAIT); 365 if (data == NULL) { 366 error = ENOMEM; 367 goto failure; 368 } 369 } 370 } 371 } else { 372 cbp = g_clone_bio(bp); 373 if (cbp == NULL) { 374 error = ENOMEM; 375 goto failure; 376 } 377 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 378 nparts++; 379 /* 380 * Fill in the component buf structure. 381 */ 382 cbp->bio_done = g_stripe_done; 383 cbp->bio_offset = offset; 384 cbp->bio_data = addr; 385 cbp->bio_caller1 = NULL; 386 /* 387 * MIN() is in case when 388 * (bp->bio_length % sc->sc_stripesize) != 0. 389 */ 390 cbp->bio_length = MIN(stripesize, length); 391 cbp->bio_caller2 = sc->sc_disks[no]; 392 } 393 } 394 if (data != NULL) 395 bp->bio_driver1 = data; 396 /* 397 * Fire off all allocated requests! 398 */ 399 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 400 struct g_consumer *cp; 401 402 TAILQ_REMOVE(&queue, cbp, bio_queue); 403 cp = cbp->bio_caller2; 404 cbp->bio_caller2 = NULL; 405 cbp->bio_to = cp->provider; 406 if (cbp->bio_caller1 != NULL) { 407 cbp->bio_data = data; 408 if (bp->bio_cmd == BIO_WRITE) { 409 g_stripe_copy(sc, cbp->bio_caller1, data, 410 cbp->bio_offset, cbp->bio_length, 0); 411 } 412 data += cbp->bio_length; 413 } 414 G_STRIPE_LOGREQ(cbp, "Sending request."); 415 g_io_request(cbp, cp); 416 } 417 return (0); 418 failure: 419 if (data != NULL) 420 uma_zfree(g_stripe_zone, data); 421 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 422 TAILQ_REMOVE(&queue, cbp, bio_queue); 423 if (cbp->bio_caller1 != NULL) { 424 cbp->bio_data = cbp->bio_caller1; 425 cbp->bio_caller1 = NULL; 426 } 427 bp->bio_children--; 428 g_destroy_bio(cbp); 429 } 430 return (error); 431 } 432 433 static int 434 g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) 435 { 436 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 437 struct g_stripe_softc *sc; 438 uint32_t stripesize; 439 struct bio *cbp; 440 char *addr; 441 int error; 442 443 sc = bp->bio_to->geom->softc; 444 445 stripesize = sc->sc_stripesize; 446 447 cbp = g_clone_bio(bp); 448 if (cbp == NULL) { 449 error = ENOMEM; 450 goto failure; 451 } 452 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 453 /* 454 * Fill in the component buf structure. 455 */ 456 if (bp->bio_length == length) 457 cbp->bio_done = g_std_done; /* Optimized lockless case. */ 458 else 459 cbp->bio_done = g_stripe_done; 460 cbp->bio_offset = offset; 461 cbp->bio_length = length; 462 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 463 bp->bio_ma_n = round_page(bp->bio_ma_offset + 464 bp->bio_length) / PAGE_SIZE; 465 addr = NULL; 466 } else 467 addr = bp->bio_data; 468 cbp->bio_caller2 = sc->sc_disks[no]; 469 470 /* offset -= offset % stripesize; */ 471 offset -= offset & (stripesize - 1); 472 if (bp->bio_cmd != BIO_DELETE) 473 addr += length; 474 length = bp->bio_length - length; 475 for (no++; length > 0; no++, length -= stripesize) { 476 if (no > sc->sc_ndisks - 1) { 477 no = 0; 478 offset += stripesize; 479 } 480 cbp = g_clone_bio(bp); 481 if (cbp == NULL) { 482 error = ENOMEM; 483 goto failure; 484 } 485 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 486 487 /* 488 * Fill in the component buf structure. 489 */ 490 cbp->bio_done = g_stripe_done; 491 cbp->bio_offset = offset; 492 /* 493 * MIN() is in case when 494 * (bp->bio_length % sc->sc_stripesize) != 0. 495 */ 496 cbp->bio_length = MIN(stripesize, length); 497 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 498 cbp->bio_ma_offset += (uintptr_t)addr; 499 cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; 500 cbp->bio_ma_offset %= PAGE_SIZE; 501 cbp->bio_ma_n = round_page(cbp->bio_ma_offset + 502 cbp->bio_length) / PAGE_SIZE; 503 } else 504 cbp->bio_data = addr; 505 506 cbp->bio_caller2 = sc->sc_disks[no]; 507 508 if (bp->bio_cmd != BIO_DELETE) 509 addr += stripesize; 510 } 511 /* 512 * Fire off all allocated requests! 513 */ 514 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 515 struct g_consumer *cp; 516 517 TAILQ_REMOVE(&queue, cbp, bio_queue); 518 cp = cbp->bio_caller2; 519 cbp->bio_caller2 = NULL; 520 cbp->bio_to = cp->provider; 521 G_STRIPE_LOGREQ(cbp, "Sending request."); 522 g_io_request(cbp, cp); 523 } 524 return (0); 525 failure: 526 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 527 TAILQ_REMOVE(&queue, cbp, bio_queue); 528 bp->bio_children--; 529 g_destroy_bio(cbp); 530 } 531 return (error); 532 } 533 534 static void 535 g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) 536 { 537 struct bio_queue_head queue; 538 struct g_consumer *cp; 539 struct bio *cbp; 540 u_int no; 541 542 bioq_init(&queue); 543 for (no = 0; no < sc->sc_ndisks; no++) { 544 cbp = g_clone_bio(bp); 545 if (cbp == NULL) { 546 for (cbp = bioq_first(&queue); cbp != NULL; 547 cbp = bioq_first(&queue)) { 548 bioq_remove(&queue, cbp); 549 g_destroy_bio(cbp); 550 } 551 if (bp->bio_error == 0) 552 bp->bio_error = ENOMEM; 553 g_io_deliver(bp, bp->bio_error); 554 return; 555 } 556 bioq_insert_tail(&queue, cbp); 557 cbp->bio_done = g_stripe_done; 558 cbp->bio_caller2 = sc->sc_disks[no]; 559 cbp->bio_to = sc->sc_disks[no]->provider; 560 } 561 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 562 bioq_remove(&queue, cbp); 563 G_STRIPE_LOGREQ(cbp, "Sending request."); 564 cp = cbp->bio_caller2; 565 cbp->bio_caller2 = NULL; 566 g_io_request(cbp, cp); 567 } 568 } 569 570 static void 571 g_stripe_start(struct bio *bp) 572 { 573 off_t offset, start, length, nstripe; 574 struct g_stripe_softc *sc; 575 u_int no, stripesize; 576 int error, fast = 0; 577 578 sc = bp->bio_to->geom->softc; 579 /* 580 * If sc == NULL, provider's error should be set and g_stripe_start() 581 * should not be called at all. 582 */ 583 KASSERT(sc != NULL, 584 ("Provider's error should be set (error=%d)(device=%s).", 585 bp->bio_to->error, bp->bio_to->name)); 586 587 G_STRIPE_LOGREQ(bp, "Request received."); 588 589 switch (bp->bio_cmd) { 590 case BIO_READ: 591 case BIO_WRITE: 592 case BIO_DELETE: 593 break; 594 case BIO_FLUSH: 595 g_stripe_flush(sc, bp); 596 return; 597 case BIO_GETATTR: 598 /* To which provider it should be delivered? */ 599 default: 600 g_io_deliver(bp, EOPNOTSUPP); 601 return; 602 } 603 604 stripesize = sc->sc_stripesize; 605 606 /* 607 * Calculations are quite messy, but fast I hope. 608 */ 609 610 /* Stripe number. */ 611 /* nstripe = bp->bio_offset / stripesize; */ 612 nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; 613 /* Disk number. */ 614 no = nstripe % sc->sc_ndisks; 615 /* Start position in stripe. */ 616 /* start = bp->bio_offset % stripesize; */ 617 start = bp->bio_offset & (stripesize - 1); 618 /* Start position in disk. */ 619 /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ 620 offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; 621 /* Length of data to operate. */ 622 length = MIN(bp->bio_length, stripesize - start); 623 624 /* 625 * Do use "fast" mode when: 626 * 1. "Fast" mode is ON. 627 * and 628 * 2. Request size is less than or equal to MAXPHYS, 629 * which should always be true. 630 * and 631 * 3. Request size is bigger than stripesize * ndisks. If it isn't, 632 * there will be no need to send more than one I/O request to 633 * a provider, so there is nothing to optmize. 634 * and 635 * 4. Request is not unmapped. 636 * and 637 * 5. It is not a BIO_DELETE. 638 */ 639 if (g_stripe_fast && bp->bio_length <= MAXPHYS && 640 bp->bio_length >= stripesize * sc->sc_ndisks && 641 (bp->bio_flags & BIO_UNMAPPED) == 0 && 642 bp->bio_cmd != BIO_DELETE) { 643 fast = 1; 644 } 645 error = 0; 646 if (fast) { 647 error = g_stripe_start_fast(bp, no, offset, length); 648 if (error != 0) 649 g_stripe_fast_failed++; 650 } 651 /* 652 * Do use "economic" when: 653 * 1. "Economic" mode is ON. 654 * or 655 * 2. "Fast" mode failed. It can only fail if there is no memory. 656 */ 657 if (!fast || error != 0) 658 error = g_stripe_start_economic(bp, no, offset, length); 659 if (error != 0) { 660 if (bp->bio_error == 0) 661 bp->bio_error = error; 662 g_io_deliver(bp, bp->bio_error); 663 } 664 } 665 666 static void 667 g_stripe_check_and_run(struct g_stripe_softc *sc) 668 { 669 struct g_provider *dp; 670 off_t mediasize, ms; 671 u_int no, sectorsize = 0; 672 673 g_topology_assert(); 674 if (g_stripe_nvalid(sc) != sc->sc_ndisks) 675 return; 676 677 sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", 678 sc->sc_name); 679 sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 680 if (g_stripe_fast == 0) 681 sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; 682 /* 683 * Find the smallest disk. 684 */ 685 mediasize = sc->sc_disks[0]->provider->mediasize; 686 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 687 mediasize -= sc->sc_disks[0]->provider->sectorsize; 688 mediasize -= mediasize % sc->sc_stripesize; 689 sectorsize = sc->sc_disks[0]->provider->sectorsize; 690 for (no = 1; no < sc->sc_ndisks; no++) { 691 dp = sc->sc_disks[no]->provider; 692 ms = dp->mediasize; 693 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) 694 ms -= dp->sectorsize; 695 ms -= ms % sc->sc_stripesize; 696 if (ms < mediasize) 697 mediasize = ms; 698 sectorsize = lcm(sectorsize, dp->sectorsize); 699 700 /* A provider underneath us doesn't support unmapped */ 701 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 702 G_STRIPE_DEBUG(1, "Cancelling unmapped " 703 "because of %s.", dp->name); 704 sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; 705 } 706 } 707 sc->sc_provider->sectorsize = sectorsize; 708 sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; 709 sc->sc_provider->stripesize = sc->sc_stripesize; 710 sc->sc_provider->stripeoffset = 0; 711 g_error_provider(sc->sc_provider, 0); 712 713 G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); 714 } 715 716 static int 717 g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) 718 { 719 struct g_provider *pp; 720 u_char *buf; 721 int error; 722 723 g_topology_assert(); 724 725 error = g_access(cp, 1, 0, 0); 726 if (error != 0) 727 return (error); 728 pp = cp->provider; 729 g_topology_unlock(); 730 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 731 &error); 732 g_topology_lock(); 733 g_access(cp, -1, 0, 0); 734 if (buf == NULL) 735 return (error); 736 737 /* Decode metadata. */ 738 stripe_metadata_decode(buf, md); 739 g_free(buf); 740 741 return (0); 742 } 743 744 /* 745 * Add disk to given device. 746 */ 747 static int 748 g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) 749 { 750 struct g_consumer *cp, *fcp; 751 struct g_geom *gp; 752 int error; 753 754 g_topology_assert(); 755 /* Metadata corrupted? */ 756 if (no >= sc->sc_ndisks) 757 return (EINVAL); 758 759 /* Check if disk is not already attached. */ 760 if (sc->sc_disks[no] != NULL) 761 return (EEXIST); 762 763 gp = sc->sc_geom; 764 fcp = LIST_FIRST(&gp->consumer); 765 766 cp = g_new_consumer(gp); 767 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 768 cp->private = NULL; 769 cp->index = no; 770 error = g_attach(cp, pp); 771 if (error != 0) { 772 g_destroy_consumer(cp); 773 return (error); 774 } 775 776 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { 777 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 778 if (error != 0) { 779 g_detach(cp); 780 g_destroy_consumer(cp); 781 return (error); 782 } 783 } 784 if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { 785 struct g_stripe_metadata md; 786 787 /* Reread metadata. */ 788 error = g_stripe_read_metadata(cp, &md); 789 if (error != 0) 790 goto fail; 791 792 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || 793 strcmp(md.md_name, sc->sc_name) != 0 || 794 md.md_id != sc->sc_id) { 795 G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); 796 goto fail; 797 } 798 } 799 800 sc->sc_disks[no] = cp; 801 G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); 802 g_stripe_check_and_run(sc); 803 804 return (0); 805 fail: 806 if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) 807 g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); 808 g_detach(cp); 809 g_destroy_consumer(cp); 810 return (error); 811 } 812 813 static struct g_geom * 814 g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, 815 u_int type) 816 { 817 struct g_stripe_softc *sc; 818 struct g_geom *gp; 819 u_int no; 820 821 g_topology_assert(); 822 G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 823 md->md_id); 824 825 /* Two disks is minimum. */ 826 if (md->md_all < 2) { 827 G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); 828 return (NULL); 829 } 830 #if 0 831 /* Stripe size have to be grater than or equal to sector size. */ 832 if (md->md_stripesize < sectorsize) { 833 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 834 return (NULL); 835 } 836 #endif 837 /* Stripe size have to be power of 2. */ 838 if (!powerof2(md->md_stripesize)) { 839 G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); 840 return (NULL); 841 } 842 843 /* Check for duplicate unit */ 844 LIST_FOREACH(gp, &mp->geom, geom) { 845 sc = gp->softc; 846 if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { 847 G_STRIPE_DEBUG(0, "Device %s already configured.", 848 sc->sc_name); 849 return (NULL); 850 } 851 } 852 gp = g_new_geomf(mp, "%s", md->md_name); 853 sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); 854 gp->start = g_stripe_start; 855 gp->spoiled = g_stripe_orphan; 856 gp->orphan = g_stripe_orphan; 857 gp->access = g_stripe_access; 858 gp->dumpconf = g_stripe_dumpconf; 859 860 sc->sc_id = md->md_id; 861 sc->sc_stripesize = md->md_stripesize; 862 sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); 863 sc->sc_ndisks = md->md_all; 864 sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, 865 M_STRIPE, M_WAITOK | M_ZERO); 866 for (no = 0; no < sc->sc_ndisks; no++) 867 sc->sc_disks[no] = NULL; 868 sc->sc_type = type; 869 mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); 870 871 gp->softc = sc; 872 sc->sc_geom = gp; 873 sc->sc_provider = NULL; 874 875 G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 876 877 return (gp); 878 } 879 880 static int 881 g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) 882 { 883 struct g_provider *pp; 884 struct g_consumer *cp, *cp1; 885 struct g_geom *gp; 886 887 g_topology_assert(); 888 889 if (sc == NULL) 890 return (ENXIO); 891 892 pp = sc->sc_provider; 893 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 894 if (force) { 895 G_STRIPE_DEBUG(0, "Device %s is still open, so it " 896 "can't be definitely removed.", pp->name); 897 } else { 898 G_STRIPE_DEBUG(1, 899 "Device %s is still open (r%dw%de%d).", pp->name, 900 pp->acr, pp->acw, pp->ace); 901 return (EBUSY); 902 } 903 } 904 905 gp = sc->sc_geom; 906 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { 907 g_stripe_remove_disk(cp); 908 if (cp1 == NULL) 909 return (0); /* Recursion happened. */ 910 } 911 if (!LIST_EMPTY(&gp->consumer)) 912 return (EINPROGRESS); 913 914 gp->softc = NULL; 915 KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", 916 gp->name)); 917 free(sc->sc_disks, M_STRIPE); 918 mtx_destroy(&sc->sc_lock); 919 free(sc, M_STRIPE); 920 G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); 921 g_wither_geom(gp, ENXIO); 922 return (0); 923 } 924 925 static int 926 g_stripe_destroy_geom(struct gctl_req *req __unused, 927 struct g_class *mp __unused, struct g_geom *gp) 928 { 929 struct g_stripe_softc *sc; 930 931 sc = gp->softc; 932 return (g_stripe_destroy(sc, 0)); 933 } 934 935 static struct g_geom * 936 g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 937 { 938 struct g_stripe_metadata md; 939 struct g_stripe_softc *sc; 940 struct g_consumer *cp; 941 struct g_geom *gp; 942 int error; 943 944 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 945 g_topology_assert(); 946 947 /* Skip providers that are already open for writing. */ 948 if (pp->acw > 0) 949 return (NULL); 950 951 G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); 952 953 gp = g_new_geomf(mp, "stripe:taste"); 954 gp->start = g_stripe_start; 955 gp->access = g_stripe_access; 956 gp->orphan = g_stripe_orphan; 957 cp = g_new_consumer(gp); 958 g_attach(cp, pp); 959 error = g_stripe_read_metadata(cp, &md); 960 g_detach(cp); 961 g_destroy_consumer(cp); 962 g_destroy_geom(gp); 963 if (error != 0) 964 return (NULL); 965 gp = NULL; 966 967 if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) 968 return (NULL); 969 if (md.md_version > G_STRIPE_VERSION) { 970 printf("geom_stripe.ko module is too old to handle %s.\n", 971 pp->name); 972 return (NULL); 973 } 974 /* 975 * Backward compatibility: 976 */ 977 /* There was no md_provider field in earlier versions of metadata. */ 978 if (md.md_version < 2) 979 bzero(md.md_provider, sizeof(md.md_provider)); 980 /* There was no md_provsize field in earlier versions of metadata. */ 981 if (md.md_version < 3) 982 md.md_provsize = pp->mediasize; 983 984 if (md.md_provider[0] != '\0' && 985 !g_compare_names(md.md_provider, pp->name)) 986 return (NULL); 987 if (md.md_provsize != pp->mediasize) 988 return (NULL); 989 990 /* 991 * Let's check if device already exists. 992 */ 993 sc = NULL; 994 LIST_FOREACH(gp, &mp->geom, geom) { 995 sc = gp->softc; 996 if (sc == NULL) 997 continue; 998 if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) 999 continue; 1000 if (strcmp(md.md_name, sc->sc_name) != 0) 1001 continue; 1002 if (md.md_id != sc->sc_id) 1003 continue; 1004 break; 1005 } 1006 if (gp != NULL) { 1007 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1008 error = g_stripe_add_disk(sc, pp, md.md_no); 1009 if (error != 0) { 1010 G_STRIPE_DEBUG(0, 1011 "Cannot add disk %s to %s (error=%d).", pp->name, 1012 gp->name, error); 1013 return (NULL); 1014 } 1015 } else { 1016 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); 1017 if (gp == NULL) { 1018 G_STRIPE_DEBUG(0, "Cannot create device %s.", 1019 md.md_name); 1020 return (NULL); 1021 } 1022 sc = gp->softc; 1023 G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 1024 error = g_stripe_add_disk(sc, pp, md.md_no); 1025 if (error != 0) { 1026 G_STRIPE_DEBUG(0, 1027 "Cannot add disk %s to %s (error=%d).", pp->name, 1028 gp->name, error); 1029 g_stripe_destroy(sc, 1); 1030 return (NULL); 1031 } 1032 } 1033 1034 return (gp); 1035 } 1036 1037 static void 1038 g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) 1039 { 1040 u_int attached, no; 1041 struct g_stripe_metadata md; 1042 struct g_provider *pp; 1043 struct g_stripe_softc *sc; 1044 struct g_geom *gp; 1045 struct sbuf *sb; 1046 intmax_t *stripesize; 1047 const char *name; 1048 char param[16]; 1049 int *nargs; 1050 1051 g_topology_assert(); 1052 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1053 if (nargs == NULL) { 1054 gctl_error(req, "No '%s' argument.", "nargs"); 1055 return; 1056 } 1057 if (*nargs <= 2) { 1058 gctl_error(req, "Too few arguments."); 1059 return; 1060 } 1061 1062 strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); 1063 md.md_version = G_STRIPE_VERSION; 1064 name = gctl_get_asciiparam(req, "arg0"); 1065 if (name == NULL) { 1066 gctl_error(req, "No 'arg%u' argument.", 0); 1067 return; 1068 } 1069 strlcpy(md.md_name, name, sizeof(md.md_name)); 1070 md.md_id = arc4random(); 1071 md.md_no = 0; 1072 md.md_all = *nargs - 1; 1073 stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); 1074 if (stripesize == NULL) { 1075 gctl_error(req, "No '%s' argument.", "stripesize"); 1076 return; 1077 } 1078 md.md_stripesize = *stripesize; 1079 bzero(md.md_provider, sizeof(md.md_provider)); 1080 /* This field is not important here. */ 1081 md.md_provsize = 0; 1082 1083 /* Check all providers are valid */ 1084 for (no = 1; no < *nargs; no++) { 1085 snprintf(param, sizeof(param), "arg%u", no); 1086 name = gctl_get_asciiparam(req, param); 1087 if (name == NULL) { 1088 gctl_error(req, "No 'arg%u' argument.", no); 1089 return; 1090 } 1091 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1092 name += strlen("/dev/"); 1093 pp = g_provider_by_name(name); 1094 if (pp == NULL) { 1095 G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); 1096 gctl_error(req, "Disk %s is invalid.", name); 1097 return; 1098 } 1099 } 1100 1101 gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); 1102 if (gp == NULL) { 1103 gctl_error(req, "Can't configure %s.", md.md_name); 1104 return; 1105 } 1106 1107 sc = gp->softc; 1108 sb = sbuf_new_auto(); 1109 sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); 1110 for (attached = 0, no = 1; no < *nargs; no++) { 1111 snprintf(param, sizeof(param), "arg%u", no); 1112 name = gctl_get_asciiparam(req, param); 1113 if (name == NULL) { 1114 gctl_error(req, "No 'arg%u' argument.", no); 1115 continue; 1116 } 1117 if (strncmp(name, "/dev/", strlen("/dev/")) == 0) 1118 name += strlen("/dev/"); 1119 pp = g_provider_by_name(name); 1120 KASSERT(pp != NULL, ("Provider %s disappear?!", name)); 1121 if (g_stripe_add_disk(sc, pp, no - 1) != 0) { 1122 G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", 1123 no, pp->name, gp->name); 1124 sbuf_printf(sb, " %s", pp->name); 1125 continue; 1126 } 1127 attached++; 1128 } 1129 sbuf_finish(sb); 1130 if (md.md_all != attached) { 1131 g_stripe_destroy(gp->softc, 1); 1132 gctl_error(req, "%s", sbuf_data(sb)); 1133 } 1134 sbuf_delete(sb); 1135 } 1136 1137 static struct g_stripe_softc * 1138 g_stripe_find_device(struct g_class *mp, const char *name) 1139 { 1140 struct g_stripe_softc *sc; 1141 struct g_geom *gp; 1142 1143 LIST_FOREACH(gp, &mp->geom, geom) { 1144 sc = gp->softc; 1145 if (sc == NULL) 1146 continue; 1147 if (strcmp(sc->sc_name, name) == 0) 1148 return (sc); 1149 } 1150 return (NULL); 1151 } 1152 1153 static void 1154 g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) 1155 { 1156 struct g_stripe_softc *sc; 1157 int *force, *nargs, error; 1158 const char *name; 1159 char param[16]; 1160 u_int i; 1161 1162 g_topology_assert(); 1163 1164 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1165 if (nargs == NULL) { 1166 gctl_error(req, "No '%s' argument.", "nargs"); 1167 return; 1168 } 1169 if (*nargs <= 0) { 1170 gctl_error(req, "Missing device(s)."); 1171 return; 1172 } 1173 force = gctl_get_paraml(req, "force", sizeof(*force)); 1174 if (force == NULL) { 1175 gctl_error(req, "No '%s' argument.", "force"); 1176 return; 1177 } 1178 1179 for (i = 0; i < (u_int)*nargs; i++) { 1180 snprintf(param, sizeof(param), "arg%u", i); 1181 name = gctl_get_asciiparam(req, param); 1182 if (name == NULL) { 1183 gctl_error(req, "No 'arg%u' argument.", i); 1184 return; 1185 } 1186 sc = g_stripe_find_device(mp, name); 1187 if (sc == NULL) { 1188 gctl_error(req, "No such device: %s.", name); 1189 return; 1190 } 1191 error = g_stripe_destroy(sc, *force); 1192 if (error != 0) { 1193 gctl_error(req, "Cannot destroy device %s (error=%d).", 1194 sc->sc_name, error); 1195 return; 1196 } 1197 } 1198 } 1199 1200 static void 1201 g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) 1202 { 1203 uint32_t *version; 1204 1205 g_topology_assert(); 1206 1207 version = gctl_get_paraml(req, "version", sizeof(*version)); 1208 if (version == NULL) { 1209 gctl_error(req, "No '%s' argument.", "version"); 1210 return; 1211 } 1212 if (*version != G_STRIPE_VERSION) { 1213 gctl_error(req, "Userland and kernel parts are out of sync."); 1214 return; 1215 } 1216 1217 if (strcmp(verb, "create") == 0) { 1218 g_stripe_ctl_create(req, mp); 1219 return; 1220 } else if (strcmp(verb, "destroy") == 0 || 1221 strcmp(verb, "stop") == 0) { 1222 g_stripe_ctl_destroy(req, mp); 1223 return; 1224 } 1225 1226 gctl_error(req, "Unknown verb."); 1227 } 1228 1229 static void 1230 g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1231 struct g_consumer *cp, struct g_provider *pp) 1232 { 1233 struct g_stripe_softc *sc; 1234 1235 sc = gp->softc; 1236 if (sc == NULL) 1237 return; 1238 if (pp != NULL) { 1239 /* Nothing here. */ 1240 } else if (cp != NULL) { 1241 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 1242 (u_int)cp->index); 1243 } else { 1244 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 1245 sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent, 1246 (u_int)sc->sc_stripesize); 1247 sbuf_printf(sb, "%s<Type>", indent); 1248 switch (sc->sc_type) { 1249 case G_STRIPE_TYPE_AUTOMATIC: 1250 sbuf_printf(sb, "AUTOMATIC"); 1251 break; 1252 case G_STRIPE_TYPE_MANUAL: 1253 sbuf_printf(sb, "MANUAL"); 1254 break; 1255 default: 1256 sbuf_printf(sb, "UNKNOWN"); 1257 break; 1258 } 1259 sbuf_printf(sb, "</Type>\n"); 1260 sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n", 1261 indent, sc->sc_ndisks, g_stripe_nvalid(sc)); 1262 sbuf_printf(sb, "%s<State>", indent); 1263 if (sc->sc_provider != NULL && sc->sc_provider->error == 0) 1264 sbuf_printf(sb, "UP"); 1265 else 1266 sbuf_printf(sb, "DOWN"); 1267 sbuf_printf(sb, "</State>\n"); 1268 } 1269 } 1270 1271 DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); 1272