1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004, 2007 Lukas Ertl 5 * Copyright (c) 2007, 2009 Ulf Lilleengen 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/bio.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/systm.h> 38 39 #include <geom/geom.h> 40 #include <geom/geom_dbg.h> 41 #include <geom/vinum/geom_vinum_var.h> 42 #include <geom/vinum/geom_vinum_raid5.h> 43 #include <geom/vinum/geom_vinum.h> 44 45 static int gv_check_parity(struct gv_plex *, struct bio *, 46 struct gv_raid5_packet *); 47 static int gv_normal_parity(struct gv_plex *, struct bio *, 48 struct gv_raid5_packet *); 49 static void gv_plex_flush(struct gv_plex *); 50 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 51 int *, int); 52 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 53 off_t, caddr_t); 54 static void gv_post_bio(struct gv_softc *, struct bio *); 55 56 void 57 gv_plex_start(struct gv_plex *p, struct bio *bp) 58 { 59 struct bio *cbp; 60 struct gv_sd *s; 61 struct gv_raid5_packet *wp; 62 caddr_t addr; 63 off_t bcount, boff, len; 64 65 bcount = bp->bio_length; 66 addr = bp->bio_data; 67 boff = bp->bio_offset; 68 69 /* Walk over the whole length of the request, we might split it up. */ 70 while (bcount > 0) { 71 wp = NULL; 72 73 /* 74 * RAID5 plexes need special treatment, as a single request 75 * might involve several read/write sub-requests. 76 */ 77 if (p->org == GV_PLEX_RAID5) { 78 wp = gv_raid5_start(p, bp, addr, boff, bcount); 79 if (wp == NULL) 80 return; 81 82 len = wp->length; 83 84 if (TAILQ_EMPTY(&wp->bits)) 85 g_free(wp); 86 else if (wp->lockbase != -1) 87 TAILQ_INSERT_TAIL(&p->packets, wp, list); 88 89 /* 90 * Requests to concatenated and striped plexes go straight 91 * through. 92 */ 93 } else { 94 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 95 } 96 if (len < 0) 97 return; 98 99 bcount -= len; 100 addr += len; 101 boff += len; 102 } 103 104 /* 105 * Fire off all sub-requests. We get the correct consumer (== drive) 106 * to send each request to via the subdisk that was stored in 107 * cbp->bio_caller1. 108 */ 109 cbp = bioq_takefirst(p->bqueue); 110 while (cbp != NULL) { 111 /* 112 * RAID5 sub-requests need to come in correct order, otherwise 113 * we trip over the parity, as it might be overwritten by 114 * another sub-request. We abuse cbp->bio_caller2 to mark 115 * potential overlap situations. 116 */ 117 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 118 /* Park the bio on the waiting queue. */ 119 cbp->bio_pflags |= GV_BIO_ONHOLD; 120 bioq_disksort(p->wqueue, cbp); 121 } else { 122 s = cbp->bio_caller1; 123 g_io_request(cbp, s->drive_sc->consumer); 124 } 125 cbp = bioq_takefirst(p->bqueue); 126 } 127 } 128 129 static int 130 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 131 off_t *real_len, int *sdno, int growing) 132 { 133 struct gv_sd *s; 134 int i, sdcount; 135 off_t len_left, stripeend, stripeno, stripestart; 136 137 switch (p->org) { 138 case GV_PLEX_CONCAT: 139 /* 140 * Find the subdisk where this request starts. The subdisks in 141 * this list must be ordered by plex_offset. 142 */ 143 i = 0; 144 LIST_FOREACH(s, &p->subdisks, in_plex) { 145 if (s->plex_offset <= boff && 146 s->plex_offset + s->size > boff) { 147 *sdno = i; 148 break; 149 } 150 i++; 151 } 152 if (s == NULL || s->drive_sc == NULL) 153 return (GV_ERR_NOTFOUND); 154 155 /* Calculate corresponding offsets on disk. */ 156 *real_off = boff - s->plex_offset; 157 len_left = s->size - (*real_off); 158 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 159 *real_len = (bcount > len_left) ? len_left : bcount; 160 break; 161 162 case GV_PLEX_STRIPED: 163 /* The number of the stripe where the request starts. */ 164 stripeno = boff / p->stripesize; 165 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 166 167 /* Take growing subdisks into account when calculating. */ 168 sdcount = gv_sdcount(p, (boff >= p->synced)); 169 170 if (!(boff + bcount <= p->synced) && 171 (p->flags & GV_PLEX_GROWING) && 172 !growing) 173 return (GV_ERR_ISBUSY); 174 *sdno = stripeno % sdcount; 175 176 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); 177 stripestart = (stripeno / sdcount) * 178 p->stripesize; 179 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 180 stripeend = stripestart + p->stripesize; 181 *real_off = boff - (stripeno * p->stripesize) + 182 stripestart; 183 len_left = stripeend - *real_off; 184 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 185 186 *real_len = (bcount <= len_left) ? bcount : len_left; 187 break; 188 189 default: 190 return (GV_ERR_PLEXORG); 191 } 192 return (0); 193 } 194 195 /* 196 * Prepare a normal plex request. 197 */ 198 static int 199 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 200 off_t bcount, caddr_t addr) 201 { 202 struct gv_sd *s; 203 struct bio *cbp; 204 off_t real_len, real_off; 205 int i, err, sdno; 206 207 s = NULL; 208 sdno = -1; 209 real_len = real_off = 0; 210 211 err = ENXIO; 212 213 if (p == NULL || LIST_EMPTY(&p->subdisks)) 214 goto bad; 215 216 err = gv_plex_offset(p, boff, bcount, &real_off, 217 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); 218 /* If the request was blocked, put it into wait. */ 219 if (err == GV_ERR_ISBUSY) { 220 bioq_disksort(p->rqueue, bp); 221 return (-1); /* "Fail", and delay request. */ 222 } 223 if (err) { 224 err = ENXIO; 225 goto bad; 226 } 227 err = ENXIO; 228 229 /* Find the right subdisk. */ 230 i = 0; 231 LIST_FOREACH(s, &p->subdisks, in_plex) { 232 if (i == sdno) 233 break; 234 i++; 235 } 236 237 /* Subdisk not found. */ 238 if (s == NULL || s->drive_sc == NULL) 239 goto bad; 240 241 /* Now check if we can handle the request on this subdisk. */ 242 switch (s->state) { 243 case GV_SD_UP: 244 /* If the subdisk is up, just continue. */ 245 break; 246 case GV_SD_DOWN: 247 if (bp->bio_pflags & GV_BIO_INTERNAL) 248 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 249 " order to perform administrative requests"); 250 goto bad; 251 case GV_SD_STALE: 252 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { 253 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 254 "regular requests"); 255 goto bad; 256 } 257 258 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 259 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 260 break; 261 case GV_SD_INITIALIZING: 262 if (bp->bio_cmd == BIO_READ) 263 goto bad; 264 break; 265 default: 266 /* All other subdisk states mean it's not accessible. */ 267 goto bad; 268 } 269 270 /* Clone the bio and adjust the offsets and sizes. */ 271 cbp = g_clone_bio(bp); 272 if (cbp == NULL) { 273 err = ENOMEM; 274 goto bad; 275 } 276 cbp->bio_offset = real_off + s->drive_offset; 277 cbp->bio_length = real_len; 278 cbp->bio_data = addr; 279 cbp->bio_done = gv_done; 280 cbp->bio_caller1 = s; 281 s->drive_sc->active++; 282 283 /* Store the sub-requests now and let others issue them. */ 284 bioq_insert_tail(p->bqueue, cbp); 285 return (real_len); 286 bad: 287 G_VINUM_LOGREQ(0, bp, "plex request failed."); 288 /* Building the sub-request failed. If internal BIO, do not deliver. */ 289 if (bp->bio_pflags & GV_BIO_INTERNAL) { 290 if (bp->bio_pflags & GV_BIO_MALLOC) 291 g_free(bp->bio_data); 292 g_destroy_bio(bp); 293 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 294 GV_PLEX_GROWING); 295 return (-1); 296 } 297 g_io_deliver(bp, err); 298 return (-1); 299 } 300 301 /* 302 * Handle a completed request to a striped or concatenated plex. 303 */ 304 void 305 gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 306 { 307 struct bio *pbp; 308 309 pbp = bp->bio_parent; 310 if (pbp->bio_error == 0) 311 pbp->bio_error = bp->bio_error; 312 g_destroy_bio(bp); 313 pbp->bio_inbed++; 314 if (pbp->bio_children == pbp->bio_inbed) { 315 /* Just set it to length since multiple plexes will 316 * screw things up. */ 317 pbp->bio_completed = pbp->bio_length; 318 if (pbp->bio_pflags & GV_BIO_SYNCREQ) 319 gv_sync_complete(p, pbp); 320 else if (pbp->bio_pflags & GV_BIO_GROW) 321 gv_grow_complete(p, pbp); 322 else 323 g_io_deliver(pbp, pbp->bio_error); 324 } 325 } 326 327 /* 328 * Handle a completed request to a RAID-5 plex. 329 */ 330 void 331 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 332 { 333 struct gv_softc *sc; 334 struct bio *cbp, *pbp; 335 struct gv_bioq *bq, *bq2; 336 struct gv_raid5_packet *wp; 337 off_t completed; 338 int i; 339 340 completed = 0; 341 sc = p->vinumconf; 342 wp = bp->bio_caller2; 343 344 switch (bp->bio_parent->bio_cmd) { 345 case BIO_READ: 346 if (wp == NULL) { 347 completed = bp->bio_completed; 348 break; 349 } 350 351 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 352 if (bq->bp != bp) 353 continue; 354 TAILQ_REMOVE(&wp->bits, bq, queue); 355 g_free(bq); 356 for (i = 0; i < wp->length; i++) 357 wp->data[i] ^= bp->bio_data[i]; 358 break; 359 } 360 if (TAILQ_EMPTY(&wp->bits)) { 361 completed = wp->length; 362 if (wp->lockbase != -1) { 363 TAILQ_REMOVE(&p->packets, wp, list); 364 /* Bring the waiting bios back into the game. */ 365 pbp = bioq_takefirst(p->wqueue); 366 while (pbp != NULL) { 367 gv_post_bio(sc, pbp); 368 pbp = bioq_takefirst(p->wqueue); 369 } 370 } 371 g_free(wp); 372 } 373 374 break; 375 376 case BIO_WRITE: 377 /* XXX can this ever happen? */ 378 if (wp == NULL) { 379 completed = bp->bio_completed; 380 break; 381 } 382 383 /* Check if we need to handle parity data. */ 384 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 385 if (bq->bp != bp) 386 continue; 387 TAILQ_REMOVE(&wp->bits, bq, queue); 388 g_free(bq); 389 cbp = wp->parity; 390 if (cbp != NULL) { 391 for (i = 0; i < wp->length; i++) 392 cbp->bio_data[i] ^= bp->bio_data[i]; 393 } 394 break; 395 } 396 397 /* Handle parity data. */ 398 if (TAILQ_EMPTY(&wp->bits)) { 399 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) 400 i = gv_check_parity(p, bp, wp); 401 else 402 i = gv_normal_parity(p, bp, wp); 403 404 /* All of our sub-requests have finished. */ 405 if (i) { 406 completed = wp->length; 407 TAILQ_REMOVE(&p->packets, wp, list); 408 /* Bring the waiting bios back into the game. */ 409 pbp = bioq_takefirst(p->wqueue); 410 while (pbp != NULL) { 411 gv_post_bio(sc, pbp); 412 pbp = bioq_takefirst(p->wqueue); 413 } 414 g_free(wp); 415 } 416 } 417 418 break; 419 } 420 421 pbp = bp->bio_parent; 422 if (pbp->bio_error == 0) 423 pbp->bio_error = bp->bio_error; 424 pbp->bio_completed += completed; 425 426 /* When the original request is finished, we deliver it. */ 427 pbp->bio_inbed++; 428 if (pbp->bio_inbed == pbp->bio_children) { 429 /* Hand it over for checking or delivery. */ 430 if (pbp->bio_cmd == BIO_WRITE && 431 (pbp->bio_pflags & GV_BIO_CHECK)) { 432 gv_parity_complete(p, pbp); 433 } else if (pbp->bio_cmd == BIO_WRITE && 434 (pbp->bio_pflags & GV_BIO_REBUILD)) { 435 gv_rebuild_complete(p, pbp); 436 } else if (pbp->bio_pflags & GV_BIO_INIT) { 437 gv_init_complete(p, pbp); 438 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 439 gv_sync_complete(p, pbp); 440 } else if (pbp->bio_pflags & GV_BIO_GROW) { 441 gv_grow_complete(p, pbp); 442 } else { 443 g_io_deliver(pbp, pbp->bio_error); 444 } 445 } 446 447 /* Clean up what we allocated. */ 448 if (bp->bio_cflags & GV_BIO_MALLOC) 449 g_free(bp->bio_data); 450 g_destroy_bio(bp); 451 } 452 453 static int 454 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 455 { 456 struct bio *pbp; 457 struct gv_sd *s; 458 int err, finished, i; 459 460 err = 0; 461 finished = 1; 462 463 if (wp->waiting != NULL) { 464 pbp = wp->waiting; 465 wp->waiting = NULL; 466 s = pbp->bio_caller1; 467 g_io_request(pbp, s->drive_sc->consumer); 468 finished = 0; 469 470 } else if (wp->parity != NULL) { 471 pbp = wp->parity; 472 wp->parity = NULL; 473 474 /* Check if the parity is correct. */ 475 for (i = 0; i < wp->length; i++) { 476 if (bp->bio_data[i] != pbp->bio_data[i]) { 477 err = 1; 478 break; 479 } 480 } 481 482 /* The parity is not correct... */ 483 if (err) { 484 bp->bio_parent->bio_error = EAGAIN; 485 486 /* ... but we rebuild it. */ 487 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { 488 s = pbp->bio_caller1; 489 g_io_request(pbp, s->drive_sc->consumer); 490 finished = 0; 491 } 492 } 493 494 /* 495 * Clean up the BIO we would have used for rebuilding the 496 * parity. 497 */ 498 if (finished) { 499 bp->bio_parent->bio_inbed++; 500 g_destroy_bio(pbp); 501 } 502 503 } 504 505 return (finished); 506 } 507 508 static int 509 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 510 { 511 struct bio *cbp, *pbp; 512 struct gv_sd *s; 513 int finished, i; 514 515 finished = 1; 516 517 if (wp->waiting != NULL) { 518 pbp = wp->waiting; 519 wp->waiting = NULL; 520 cbp = wp->parity; 521 for (i = 0; i < wp->length; i++) 522 cbp->bio_data[i] ^= pbp->bio_data[i]; 523 s = pbp->bio_caller1; 524 g_io_request(pbp, s->drive_sc->consumer); 525 finished = 0; 526 527 } else if (wp->parity != NULL) { 528 cbp = wp->parity; 529 wp->parity = NULL; 530 s = cbp->bio_caller1; 531 g_io_request(cbp, s->drive_sc->consumer); 532 finished = 0; 533 } 534 535 return (finished); 536 } 537 538 /* Flush the queue with delayed requests. */ 539 static void 540 gv_plex_flush(struct gv_plex *p) 541 { 542 struct gv_softc *sc; 543 struct bio *bp; 544 545 sc = p->vinumconf; 546 bp = bioq_takefirst(p->rqueue); 547 while (bp != NULL) { 548 gv_plex_start(p, bp); 549 bp = bioq_takefirst(p->rqueue); 550 } 551 } 552 553 static void 554 gv_post_bio(struct gv_softc *sc, struct bio *bp) 555 { 556 557 KASSERT(sc != NULL, ("NULL sc")); 558 KASSERT(bp != NULL, ("NULL bp")); 559 mtx_lock(&sc->bqueue_mtx); 560 bioq_disksort(sc->bqueue_down, bp); 561 wakeup(sc); 562 mtx_unlock(&sc->bqueue_mtx); 563 } 564 565 int 566 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 567 off_t length, int type, caddr_t data) 568 { 569 struct gv_softc *sc; 570 struct bio *bp; 571 572 KASSERT(from != NULL, ("NULL from")); 573 KASSERT(to != NULL, ("NULL to")); 574 sc = from->vinumconf; 575 KASSERT(sc != NULL, ("NULL sc")); 576 577 bp = g_new_bio(); 578 if (bp == NULL) { 579 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 580 " %jd; out of memory", from->name, offset); 581 return (ENOMEM); 582 } 583 bp->bio_length = length; 584 bp->bio_done = NULL; 585 bp->bio_pflags |= GV_BIO_SYNCREQ; 586 bp->bio_offset = offset; 587 bp->bio_caller1 = from; 588 bp->bio_caller2 = to; 589 bp->bio_cmd = type; 590 if (data == NULL) 591 data = g_malloc(length, M_WAITOK); 592 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ 593 bp->bio_data = data; 594 595 /* Send down next. */ 596 gv_post_bio(sc, bp); 597 //gv_plex_start(from, bp); 598 return (0); 599 } 600 601 /* 602 * Handle a finished plex sync bio. 603 */ 604 int 605 gv_sync_complete(struct gv_plex *to, struct bio *bp) 606 { 607 struct gv_plex *from, *p; 608 struct gv_sd *s; 609 struct gv_volume *v; 610 struct gv_softc *sc; 611 off_t offset; 612 int err; 613 614 g_topology_assert_not(); 615 616 err = 0; 617 KASSERT(to != NULL, ("NULL to")); 618 KASSERT(bp != NULL, ("NULL bp")); 619 from = bp->bio_caller2; 620 KASSERT(from != NULL, ("NULL from")); 621 v = to->vol_sc; 622 KASSERT(v != NULL, ("NULL v")); 623 sc = v->vinumconf; 624 KASSERT(sc != NULL, ("NULL sc")); 625 626 /* If it was a read, write it. */ 627 if (bp->bio_cmd == BIO_READ) { 628 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 629 BIO_WRITE, bp->bio_data); 630 /* If it was a write, read the next one. */ 631 } else if (bp->bio_cmd == BIO_WRITE) { 632 if (bp->bio_pflags & GV_BIO_MALLOC) 633 g_free(bp->bio_data); 634 to->synced += bp->bio_length; 635 /* If we're finished, clean up. */ 636 if (bp->bio_offset + bp->bio_length >= from->size) { 637 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 638 to->name, from->name); 639 /* Update our state. */ 640 LIST_FOREACH(s, &to->subdisks, in_plex) 641 gv_set_sd_state(s, GV_SD_UP, 0); 642 gv_update_plex_state(to); 643 to->flags &= ~GV_PLEX_SYNCING; 644 to->synced = 0; 645 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 646 } else { 647 offset = bp->bio_offset + bp->bio_length; 648 err = gv_sync_request(from, to, offset, 649 MIN(bp->bio_length, from->size - offset), 650 BIO_READ, NULL); 651 } 652 } 653 g_destroy_bio(bp); 654 /* Clean up if there was an error. */ 655 if (err) { 656 to->flags &= ~GV_PLEX_SYNCING; 657 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 658 } 659 660 /* Check if all plexes are synced, and lower refcounts. */ 661 g_topology_lock(); 662 LIST_FOREACH(p, &v->plexes, in_volume) { 663 if (p->flags & GV_PLEX_SYNCING) { 664 g_topology_unlock(); 665 return (-1); 666 } 667 } 668 /* If we came here, all plexes are synced, and we're free. */ 669 gv_access(v->provider, -1, -1, 0); 670 g_topology_unlock(); 671 G_VINUM_DEBUG(1, "plex sync completed"); 672 gv_volume_flush(v); 673 return (0); 674 } 675 676 /* 677 * Create a new bio struct for the next grow request. 678 */ 679 int 680 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 681 caddr_t data) 682 { 683 struct gv_softc *sc; 684 struct bio *bp; 685 686 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 687 sc = p->vinumconf; 688 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 689 690 bp = g_new_bio(); 691 if (bp == NULL) { 692 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 693 "out of memory", p->name); 694 return (ENOMEM); 695 } 696 697 bp->bio_cmd = type; 698 bp->bio_done = NULL; 699 bp->bio_error = 0; 700 bp->bio_caller1 = p; 701 bp->bio_offset = offset; 702 bp->bio_length = length; 703 bp->bio_pflags |= GV_BIO_GROW; 704 if (data == NULL) 705 data = g_malloc(length, M_WAITOK); 706 bp->bio_pflags |= GV_BIO_MALLOC; 707 bp->bio_data = data; 708 709 gv_post_bio(sc, bp); 710 //gv_plex_start(p, bp); 711 return (0); 712 } 713 714 /* 715 * Finish handling of a bio to a growing plex. 716 */ 717 void 718 gv_grow_complete(struct gv_plex *p, struct bio *bp) 719 { 720 struct gv_softc *sc; 721 struct gv_sd *s; 722 struct gv_volume *v; 723 off_t origsize, offset; 724 int sdcount, err; 725 726 v = p->vol_sc; 727 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 728 sc = v->vinumconf; 729 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 730 err = 0; 731 732 /* If it was a read, write it. */ 733 if (bp->bio_cmd == BIO_READ) { 734 p->synced += bp->bio_length; 735 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 736 BIO_WRITE, bp->bio_data); 737 /* If it was a write, read next. */ 738 } else if (bp->bio_cmd == BIO_WRITE) { 739 if (bp->bio_pflags & GV_BIO_MALLOC) 740 g_free(bp->bio_data); 741 742 /* Find the real size of the plex. */ 743 sdcount = gv_sdcount(p, 1); 744 s = LIST_FIRST(&p->subdisks); 745 KASSERT(s != NULL, ("NULL s")); 746 origsize = (s->size * (sdcount - 1)); 747 if (bp->bio_offset + bp->bio_length >= origsize) { 748 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 749 p->flags &= ~GV_PLEX_GROWING; 750 LIST_FOREACH(s, &p->subdisks, in_plex) { 751 s->flags &= ~GV_SD_GROW; 752 gv_set_sd_state(s, GV_SD_UP, 0); 753 } 754 p->size = gv_plex_size(p); 755 gv_update_vol_size(v, gv_vol_size(v)); 756 gv_set_plex_state(p, GV_PLEX_UP, 0); 757 g_topology_lock(); 758 gv_access(v->provider, -1, -1, 0); 759 g_topology_unlock(); 760 p->synced = 0; 761 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 762 /* Issue delayed requests. */ 763 gv_plex_flush(p); 764 } else { 765 offset = bp->bio_offset + bp->bio_length; 766 err = gv_grow_request(p, offset, 767 MIN(bp->bio_length, origsize - offset), 768 BIO_READ, NULL); 769 } 770 } 771 g_destroy_bio(bp); 772 773 if (err) { 774 p->flags &= ~GV_PLEX_GROWING; 775 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 776 } 777 } 778 779 780 /* 781 * Create an initialization BIO and send it off to the consumer. Assume that 782 * we're given initialization data as parameter. 783 */ 784 void 785 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 786 { 787 struct gv_drive *d; 788 struct g_consumer *cp; 789 struct bio *bp, *cbp; 790 791 KASSERT(s != NULL, ("gv_init_request: NULL s")); 792 d = s->drive_sc; 793 KASSERT(d != NULL, ("gv_init_request: NULL d")); 794 cp = d->consumer; 795 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 796 797 bp = g_new_bio(); 798 if (bp == NULL) { 799 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 800 " (drive offset %jd); out of memory", s->name, 801 (intmax_t)s->initialized, (intmax_t)start); 802 return; /* XXX: Error codes. */ 803 } 804 bp->bio_cmd = BIO_WRITE; 805 bp->bio_data = data; 806 bp->bio_done = NULL; 807 bp->bio_error = 0; 808 bp->bio_length = length; 809 bp->bio_pflags |= GV_BIO_INIT; 810 bp->bio_offset = start; 811 bp->bio_caller1 = s; 812 813 /* Then ofcourse, we have to clone it. */ 814 cbp = g_clone_bio(bp); 815 if (cbp == NULL) { 816 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 817 " (drive offset %jd); out of memory", s->name, 818 (intmax_t)s->initialized, (intmax_t)start); 819 return; /* XXX: Error codes. */ 820 } 821 cbp->bio_done = gv_done; 822 cbp->bio_caller1 = s; 823 d->active++; 824 /* Send it off to the consumer. */ 825 g_io_request(cbp, cp); 826 } 827 828 /* 829 * Handle a finished initialization BIO. 830 */ 831 void 832 gv_init_complete(struct gv_plex *p, struct bio *bp) 833 { 834 struct gv_softc *sc; 835 struct gv_drive *d; 836 struct g_consumer *cp; 837 struct gv_sd *s; 838 off_t start, length; 839 caddr_t data; 840 int error; 841 842 s = bp->bio_caller1; 843 start = bp->bio_offset; 844 length = bp->bio_length; 845 error = bp->bio_error; 846 data = bp->bio_data; 847 848 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 849 d = s->drive_sc; 850 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 851 cp = d->consumer; 852 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 853 sc = p->vinumconf; 854 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 855 856 g_destroy_bio(bp); 857 858 /* 859 * First we need to find out if it was okay, and abort if it's not. 860 * Then we need to free previous buffers, find out the correct subdisk, 861 * as well as getting the correct starting point and length of the BIO. 862 */ 863 if (start >= s->drive_offset + s->size) { 864 /* Free the data we initialized. */ 865 if (data != NULL) 866 g_free(data); 867 g_topology_assert_not(); 868 g_topology_lock(); 869 g_access(cp, 0, -1, 0); 870 g_topology_unlock(); 871 if (error) { 872 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 873 GV_SETSTATE_CONFIG); 874 } else { 875 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 876 s->initialized = 0; 877 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 878 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 879 "successfully", s->name); 880 } 881 return; 882 } 883 s->initialized += length; 884 start += length; 885 gv_init_request(s, start, data, length); 886 } 887 888 /* 889 * Create a new bio struct for the next parity rebuild. Used both by internal 890 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 891 */ 892 void 893 gv_parity_request(struct gv_plex *p, int flags, off_t offset) 894 { 895 struct gv_softc *sc; 896 struct bio *bp; 897 898 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 899 sc = p->vinumconf; 900 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 901 902 bp = g_new_bio(); 903 if (bp == NULL) { 904 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 905 "out of memory", p->name); 906 return; 907 } 908 909 bp->bio_cmd = BIO_WRITE; 910 bp->bio_done = NULL; 911 bp->bio_error = 0; 912 bp->bio_length = p->stripesize; 913 bp->bio_caller1 = p; 914 915 /* 916 * Check if it's a rebuild of a degraded plex or a user request of 917 * parity rebuild. 918 */ 919 if (flags & GV_BIO_REBUILD) 920 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 921 else if (flags & GV_BIO_CHECK) 922 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 923 else { 924 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 925 return; 926 } 927 928 bp->bio_pflags = flags; 929 bp->bio_pflags |= GV_BIO_MALLOC; 930 931 /* We still have more parity to build. */ 932 bp->bio_offset = offset; 933 gv_post_bio(sc, bp); 934 //gv_plex_start(p, bp); /* Send it down to the plex. */ 935 } 936 937 /* 938 * Handle a finished parity write. 939 */ 940 void 941 gv_parity_complete(struct gv_plex *p, struct bio *bp) 942 { 943 struct gv_softc *sc; 944 int error, flags; 945 946 error = bp->bio_error; 947 flags = bp->bio_pflags; 948 flags &= ~GV_BIO_MALLOC; 949 950 sc = p->vinumconf; 951 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 952 953 /* Clean up what we allocated. */ 954 if (bp->bio_pflags & GV_BIO_MALLOC) 955 g_free(bp->bio_data); 956 g_destroy_bio(bp); 957 958 if (error == EAGAIN) { 959 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 960 (intmax_t)p->synced); 961 } 962 963 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 964 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 965 /* Make sure we don't have the lock. */ 966 g_topology_assert_not(); 967 g_topology_lock(); 968 gv_access(p->vol_sc->provider, -1, -1, 0); 969 g_topology_unlock(); 970 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 971 "errno %d", p->name, (intmax_t)p->synced, error); 972 return; 973 } else { 974 p->synced += p->stripesize; 975 } 976 977 if (p->synced >= p->size) { 978 /* Make sure we don't have the lock. */ 979 g_topology_assert_not(); 980 g_topology_lock(); 981 gv_access(p->vol_sc->provider, -1, -1, 0); 982 g_topology_unlock(); 983 /* We're finished. */ 984 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 985 p->synced = 0; 986 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 987 return; 988 } 989 990 /* Send down next. It will determine if we need to itself. */ 991 gv_parity_request(p, flags, p->synced); 992 } 993 994 /* 995 * Handle a finished plex rebuild bio. 996 */ 997 void 998 gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 999 { 1000 struct gv_softc *sc; 1001 struct gv_sd *s; 1002 int error, flags; 1003 off_t offset; 1004 1005 error = bp->bio_error; 1006 flags = bp->bio_pflags; 1007 offset = bp->bio_offset; 1008 flags &= ~GV_BIO_MALLOC; 1009 sc = p->vinumconf; 1010 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1011 1012 /* Clean up what we allocated. */ 1013 if (bp->bio_pflags & GV_BIO_MALLOC) 1014 g_free(bp->bio_data); 1015 g_destroy_bio(bp); 1016 1017 if (error) { 1018 g_topology_assert_not(); 1019 g_topology_lock(); 1020 gv_access(p->vol_sc->provider, -1, -1, 0); 1021 g_topology_unlock(); 1022 1023 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1024 p->name, (intmax_t)offset, error); 1025 p->flags &= ~GV_PLEX_REBUILDING; 1026 p->synced = 0; 1027 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1028 return; 1029 } 1030 1031 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1032 if (offset >= p->size) { 1033 /* We're finished. */ 1034 g_topology_assert_not(); 1035 g_topology_lock(); 1036 gv_access(p->vol_sc->provider, -1, -1, 0); 1037 g_topology_unlock(); 1038 1039 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1040 gv_save_config(p->vinumconf); 1041 p->flags &= ~GV_PLEX_REBUILDING; 1042 p->synced = 0; 1043 /* Try to up all subdisks. */ 1044 LIST_FOREACH(s, &p->subdisks, in_plex) 1045 gv_update_sd_state(s); 1046 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1047 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1048 return; 1049 } 1050 1051 /* Send down next. It will determine if we need to itself. */ 1052 gv_parity_request(p, flags, offset); 1053 } 1054