1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004, 2007 Lukas Ertl 5 * Copyright (c) 2007, 2009 Ulf Lilleengen 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/lock.h> 33 #include <sys/malloc.h> 34 #include <sys/systm.h> 35 36 #include <geom/geom.h> 37 #include <geom/geom_dbg.h> 38 #include <geom/vinum/geom_vinum_var.h> 39 #include <geom/vinum/geom_vinum_raid5.h> 40 #include <geom/vinum/geom_vinum.h> 41 42 static int gv_check_parity(struct gv_plex *, struct bio *, 43 struct gv_raid5_packet *); 44 static int gv_normal_parity(struct gv_plex *, struct bio *, 45 struct gv_raid5_packet *); 46 static void gv_plex_flush(struct gv_plex *); 47 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 48 int *, int); 49 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 50 off_t, caddr_t); 51 static void gv_post_bio(struct gv_softc *, struct bio *); 52 53 void 54 gv_plex_start(struct gv_plex *p, struct bio *bp) 55 { 56 struct bio *cbp; 57 struct gv_sd *s; 58 struct gv_raid5_packet *wp; 59 caddr_t addr; 60 off_t bcount, boff, len; 61 62 bcount = bp->bio_length; 63 addr = bp->bio_data; 64 boff = bp->bio_offset; 65 66 /* Walk over the whole length of the request, we might split it up. */ 67 while (bcount > 0) { 68 wp = NULL; 69 70 /* 71 * RAID5 plexes need special treatment, as a single request 72 * might involve several read/write sub-requests. 73 */ 74 if (p->org == GV_PLEX_RAID5) { 75 wp = gv_raid5_start(p, bp, addr, boff, bcount); 76 if (wp == NULL) 77 return; 78 79 len = wp->length; 80 81 if (TAILQ_EMPTY(&wp->bits)) 82 g_free(wp); 83 else if (wp->lockbase != -1) 84 TAILQ_INSERT_TAIL(&p->packets, wp, list); 85 86 /* 87 * Requests to concatenated and striped plexes go straight 88 * through. 89 */ 90 } else { 91 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 92 } 93 if (len < 0) 94 return; 95 96 bcount -= len; 97 addr += len; 98 boff += len; 99 } 100 101 /* 102 * Fire off all sub-requests. We get the correct consumer (== drive) 103 * to send each request to via the subdisk that was stored in 104 * cbp->bio_caller1. 105 */ 106 cbp = bioq_takefirst(p->bqueue); 107 while (cbp != NULL) { 108 /* 109 * RAID5 sub-requests need to come in correct order, otherwise 110 * we trip over the parity, as it might be overwritten by 111 * another sub-request. We abuse cbp->bio_caller2 to mark 112 * potential overlap situations. 113 */ 114 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 115 /* Park the bio on the waiting queue. */ 116 cbp->bio_pflags |= GV_BIO_ONHOLD; 117 bioq_disksort(p->wqueue, cbp); 118 } else { 119 s = cbp->bio_caller1; 120 g_io_request(cbp, s->drive_sc->consumer); 121 } 122 cbp = bioq_takefirst(p->bqueue); 123 } 124 } 125 126 static int 127 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 128 off_t *real_len, int *sdno, int growing) 129 { 130 struct gv_sd *s; 131 int i, sdcount; 132 off_t len_left, stripeend, stripeno, stripestart; 133 134 switch (p->org) { 135 case GV_PLEX_CONCAT: 136 /* 137 * Find the subdisk where this request starts. The subdisks in 138 * this list must be ordered by plex_offset. 139 */ 140 i = 0; 141 LIST_FOREACH(s, &p->subdisks, in_plex) { 142 if (s->plex_offset <= boff && 143 s->plex_offset + s->size > boff) { 144 *sdno = i; 145 break; 146 } 147 i++; 148 } 149 if (s == NULL || s->drive_sc == NULL) 150 return (GV_ERR_NOTFOUND); 151 152 /* Calculate corresponding offsets on disk. */ 153 *real_off = boff - s->plex_offset; 154 len_left = s->size - (*real_off); 155 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 156 *real_len = (bcount > len_left) ? len_left : bcount; 157 break; 158 159 case GV_PLEX_STRIPED: 160 /* The number of the stripe where the request starts. */ 161 stripeno = boff / p->stripesize; 162 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 163 164 /* Take growing subdisks into account when calculating. */ 165 sdcount = gv_sdcount(p, (boff >= p->synced)); 166 167 if (!(boff + bcount <= p->synced) && 168 (p->flags & GV_PLEX_GROWING) && 169 !growing) 170 return (GV_ERR_ISBUSY); 171 *sdno = stripeno % sdcount; 172 173 KASSERT(*sdno >= 0, ("gv_plex_offset: sdno < 0")); 174 stripestart = (stripeno / sdcount) * 175 p->stripesize; 176 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 177 stripeend = stripestart + p->stripesize; 178 *real_off = boff - (stripeno * p->stripesize) + 179 stripestart; 180 len_left = stripeend - *real_off; 181 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 182 183 *real_len = (bcount <= len_left) ? bcount : len_left; 184 break; 185 186 default: 187 return (GV_ERR_PLEXORG); 188 } 189 return (0); 190 } 191 192 /* 193 * Prepare a normal plex request. 194 */ 195 static int 196 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 197 off_t bcount, caddr_t addr) 198 { 199 struct gv_sd *s; 200 struct bio *cbp; 201 off_t real_len, real_off; 202 int i, err, sdno; 203 204 s = NULL; 205 sdno = -1; 206 real_len = real_off = 0; 207 208 err = ENXIO; 209 210 if (p == NULL || LIST_EMPTY(&p->subdisks)) 211 goto bad; 212 213 err = gv_plex_offset(p, boff, bcount, &real_off, 214 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); 215 /* If the request was blocked, put it into wait. */ 216 if (err == GV_ERR_ISBUSY) { 217 bioq_disksort(p->rqueue, bp); 218 return (-1); /* "Fail", and delay request. */ 219 } 220 if (err) { 221 err = ENXIO; 222 goto bad; 223 } 224 err = ENXIO; 225 226 /* Find the right subdisk. */ 227 i = 0; 228 LIST_FOREACH(s, &p->subdisks, in_plex) { 229 if (i == sdno) 230 break; 231 i++; 232 } 233 234 /* Subdisk not found. */ 235 if (s == NULL || s->drive_sc == NULL) 236 goto bad; 237 238 /* Now check if we can handle the request on this subdisk. */ 239 switch (s->state) { 240 case GV_SD_UP: 241 /* If the subdisk is up, just continue. */ 242 break; 243 case GV_SD_DOWN: 244 if (bp->bio_pflags & GV_BIO_INTERNAL) 245 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 246 " order to perform administrative requests"); 247 goto bad; 248 case GV_SD_STALE: 249 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { 250 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 251 "regular requests"); 252 goto bad; 253 } 254 255 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 256 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 257 break; 258 case GV_SD_INITIALIZING: 259 if (bp->bio_cmd == BIO_READ) 260 goto bad; 261 break; 262 default: 263 /* All other subdisk states mean it's not accessible. */ 264 goto bad; 265 } 266 267 /* Clone the bio and adjust the offsets and sizes. */ 268 cbp = g_clone_bio(bp); 269 if (cbp == NULL) { 270 err = ENOMEM; 271 goto bad; 272 } 273 cbp->bio_offset = real_off + s->drive_offset; 274 cbp->bio_length = real_len; 275 cbp->bio_data = addr; 276 cbp->bio_done = gv_done; 277 cbp->bio_caller1 = s; 278 s->drive_sc->active++; 279 280 /* Store the sub-requests now and let others issue them. */ 281 bioq_insert_tail(p->bqueue, cbp); 282 return (real_len); 283 bad: 284 G_VINUM_LOGREQ(0, bp, "plex request failed."); 285 /* Building the sub-request failed. If internal BIO, do not deliver. */ 286 if (bp->bio_pflags & GV_BIO_INTERNAL) { 287 if (bp->bio_pflags & GV_BIO_MALLOC) 288 g_free(bp->bio_data); 289 g_destroy_bio(bp); 290 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 291 GV_PLEX_GROWING); 292 return (-1); 293 } 294 g_io_deliver(bp, err); 295 return (-1); 296 } 297 298 /* 299 * Handle a completed request to a striped or concatenated plex. 300 */ 301 void 302 gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 303 { 304 struct bio *pbp; 305 306 pbp = bp->bio_parent; 307 if (pbp->bio_error == 0) 308 pbp->bio_error = bp->bio_error; 309 g_destroy_bio(bp); 310 pbp->bio_inbed++; 311 if (pbp->bio_children == pbp->bio_inbed) { 312 /* Just set it to length since multiple plexes will 313 * screw things up. */ 314 pbp->bio_completed = pbp->bio_length; 315 if (pbp->bio_pflags & GV_BIO_SYNCREQ) 316 gv_sync_complete(p, pbp); 317 else if (pbp->bio_pflags & GV_BIO_GROW) 318 gv_grow_complete(p, pbp); 319 else 320 g_io_deliver(pbp, pbp->bio_error); 321 } 322 } 323 324 /* 325 * Handle a completed request to a RAID-5 plex. 326 */ 327 void 328 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 329 { 330 struct gv_softc *sc; 331 struct bio *cbp, *pbp; 332 struct gv_bioq *bq, *bq2; 333 struct gv_raid5_packet *wp; 334 off_t completed; 335 int i; 336 337 completed = 0; 338 sc = p->vinumconf; 339 wp = bp->bio_caller2; 340 341 switch (bp->bio_parent->bio_cmd) { 342 case BIO_READ: 343 if (wp == NULL) { 344 completed = bp->bio_completed; 345 break; 346 } 347 348 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 349 if (bq->bp != bp) 350 continue; 351 TAILQ_REMOVE(&wp->bits, bq, queue); 352 g_free(bq); 353 for (i = 0; i < wp->length; i++) 354 wp->data[i] ^= bp->bio_data[i]; 355 break; 356 } 357 if (TAILQ_EMPTY(&wp->bits)) { 358 completed = wp->length; 359 if (wp->lockbase != -1) { 360 TAILQ_REMOVE(&p->packets, wp, list); 361 /* Bring the waiting bios back into the game. */ 362 pbp = bioq_takefirst(p->wqueue); 363 while (pbp != NULL) { 364 gv_post_bio(sc, pbp); 365 pbp = bioq_takefirst(p->wqueue); 366 } 367 } 368 g_free(wp); 369 } 370 371 break; 372 373 case BIO_WRITE: 374 /* XXX can this ever happen? */ 375 if (wp == NULL) { 376 completed = bp->bio_completed; 377 break; 378 } 379 380 /* Check if we need to handle parity data. */ 381 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 382 if (bq->bp != bp) 383 continue; 384 TAILQ_REMOVE(&wp->bits, bq, queue); 385 g_free(bq); 386 cbp = wp->parity; 387 if (cbp != NULL) { 388 for (i = 0; i < wp->length; i++) 389 cbp->bio_data[i] ^= bp->bio_data[i]; 390 } 391 break; 392 } 393 394 /* Handle parity data. */ 395 if (TAILQ_EMPTY(&wp->bits)) { 396 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) 397 i = gv_check_parity(p, bp, wp); 398 else 399 i = gv_normal_parity(p, bp, wp); 400 401 /* All of our sub-requests have finished. */ 402 if (i) { 403 completed = wp->length; 404 TAILQ_REMOVE(&p->packets, wp, list); 405 /* Bring the waiting bios back into the game. */ 406 pbp = bioq_takefirst(p->wqueue); 407 while (pbp != NULL) { 408 gv_post_bio(sc, pbp); 409 pbp = bioq_takefirst(p->wqueue); 410 } 411 g_free(wp); 412 } 413 } 414 415 break; 416 } 417 418 pbp = bp->bio_parent; 419 if (pbp->bio_error == 0) 420 pbp->bio_error = bp->bio_error; 421 pbp->bio_completed += completed; 422 423 /* When the original request is finished, we deliver it. */ 424 pbp->bio_inbed++; 425 if (pbp->bio_inbed == pbp->bio_children) { 426 /* Hand it over for checking or delivery. */ 427 if (pbp->bio_cmd == BIO_WRITE && 428 (pbp->bio_pflags & GV_BIO_CHECK)) { 429 gv_parity_complete(p, pbp); 430 } else if (pbp->bio_cmd == BIO_WRITE && 431 (pbp->bio_pflags & GV_BIO_REBUILD)) { 432 gv_rebuild_complete(p, pbp); 433 } else if (pbp->bio_pflags & GV_BIO_INIT) { 434 gv_init_complete(p, pbp); 435 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 436 gv_sync_complete(p, pbp); 437 } else if (pbp->bio_pflags & GV_BIO_GROW) { 438 gv_grow_complete(p, pbp); 439 } else { 440 g_io_deliver(pbp, pbp->bio_error); 441 } 442 } 443 444 /* Clean up what we allocated. */ 445 if (bp->bio_cflags & GV_BIO_MALLOC) 446 g_free(bp->bio_data); 447 g_destroy_bio(bp); 448 } 449 450 static int 451 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 452 { 453 struct bio *pbp; 454 struct gv_sd *s; 455 int err, finished, i; 456 457 err = 0; 458 finished = 1; 459 460 if (wp->waiting != NULL) { 461 pbp = wp->waiting; 462 wp->waiting = NULL; 463 s = pbp->bio_caller1; 464 g_io_request(pbp, s->drive_sc->consumer); 465 finished = 0; 466 467 } else if (wp->parity != NULL) { 468 pbp = wp->parity; 469 wp->parity = NULL; 470 471 /* Check if the parity is correct. */ 472 for (i = 0; i < wp->length; i++) { 473 if (bp->bio_data[i] != pbp->bio_data[i]) { 474 err = 1; 475 break; 476 } 477 } 478 479 /* The parity is not correct... */ 480 if (err) { 481 bp->bio_parent->bio_error = EAGAIN; 482 483 /* ... but we rebuild it. */ 484 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { 485 s = pbp->bio_caller1; 486 g_io_request(pbp, s->drive_sc->consumer); 487 finished = 0; 488 } 489 } 490 491 /* 492 * Clean up the BIO we would have used for rebuilding the 493 * parity. 494 */ 495 if (finished) { 496 bp->bio_parent->bio_inbed++; 497 g_destroy_bio(pbp); 498 } 499 } 500 501 return (finished); 502 } 503 504 static int 505 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 506 { 507 struct bio *cbp, *pbp; 508 struct gv_sd *s; 509 int finished, i; 510 511 finished = 1; 512 513 if (wp->waiting != NULL) { 514 pbp = wp->waiting; 515 wp->waiting = NULL; 516 cbp = wp->parity; 517 for (i = 0; i < wp->length; i++) 518 cbp->bio_data[i] ^= pbp->bio_data[i]; 519 s = pbp->bio_caller1; 520 g_io_request(pbp, s->drive_sc->consumer); 521 finished = 0; 522 523 } else if (wp->parity != NULL) { 524 cbp = wp->parity; 525 wp->parity = NULL; 526 s = cbp->bio_caller1; 527 g_io_request(cbp, s->drive_sc->consumer); 528 finished = 0; 529 } 530 531 return (finished); 532 } 533 534 /* Flush the queue with delayed requests. */ 535 static void 536 gv_plex_flush(struct gv_plex *p) 537 { 538 struct bio *bp; 539 540 bp = bioq_takefirst(p->rqueue); 541 while (bp != NULL) { 542 gv_plex_start(p, bp); 543 bp = bioq_takefirst(p->rqueue); 544 } 545 } 546 547 static void 548 gv_post_bio(struct gv_softc *sc, struct bio *bp) 549 { 550 551 KASSERT(sc != NULL, ("NULL sc")); 552 KASSERT(bp != NULL, ("NULL bp")); 553 mtx_lock(&sc->bqueue_mtx); 554 bioq_disksort(sc->bqueue_down, bp); 555 wakeup(sc); 556 mtx_unlock(&sc->bqueue_mtx); 557 } 558 559 int 560 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 561 off_t length, int type, caddr_t data) 562 { 563 struct gv_softc *sc; 564 struct bio *bp; 565 566 KASSERT(from != NULL, ("NULL from")); 567 KASSERT(to != NULL, ("NULL to")); 568 sc = from->vinumconf; 569 KASSERT(sc != NULL, ("NULL sc")); 570 571 bp = g_new_bio(); 572 if (bp == NULL) { 573 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 574 " %jd; out of memory", from->name, offset); 575 return (ENOMEM); 576 } 577 bp->bio_length = length; 578 bp->bio_done = NULL; 579 bp->bio_pflags |= GV_BIO_SYNCREQ; 580 bp->bio_offset = offset; 581 bp->bio_caller1 = from; 582 bp->bio_caller2 = to; 583 bp->bio_cmd = type; 584 if (data == NULL) 585 data = g_malloc(length, M_WAITOK); 586 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ 587 bp->bio_data = data; 588 589 /* Send down next. */ 590 gv_post_bio(sc, bp); 591 //gv_plex_start(from, bp); 592 return (0); 593 } 594 595 /* 596 * Handle a finished plex sync bio. 597 */ 598 int 599 gv_sync_complete(struct gv_plex *to, struct bio *bp) 600 { 601 struct gv_plex *from, *p; 602 struct gv_sd *s; 603 struct gv_volume *v; 604 struct gv_softc *sc; 605 off_t offset; 606 int err; 607 608 g_topology_assert_not(); 609 610 err = 0; 611 KASSERT(to != NULL, ("NULL to")); 612 KASSERT(bp != NULL, ("NULL bp")); 613 from = bp->bio_caller2; 614 KASSERT(from != NULL, ("NULL from")); 615 v = to->vol_sc; 616 KASSERT(v != NULL, ("NULL v")); 617 sc = v->vinumconf; 618 KASSERT(sc != NULL, ("NULL sc")); 619 620 /* If it was a read, write it. */ 621 if (bp->bio_cmd == BIO_READ) { 622 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 623 BIO_WRITE, bp->bio_data); 624 /* If it was a write, read the next one. */ 625 } else if (bp->bio_cmd == BIO_WRITE) { 626 if (bp->bio_pflags & GV_BIO_MALLOC) 627 g_free(bp->bio_data); 628 to->synced += bp->bio_length; 629 /* If we're finished, clean up. */ 630 if (bp->bio_offset + bp->bio_length >= from->size) { 631 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 632 to->name, from->name); 633 /* Update our state. */ 634 LIST_FOREACH(s, &to->subdisks, in_plex) 635 gv_set_sd_state(s, GV_SD_UP, 0); 636 gv_update_plex_state(to); 637 to->flags &= ~GV_PLEX_SYNCING; 638 to->synced = 0; 639 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 640 } else { 641 offset = bp->bio_offset + bp->bio_length; 642 err = gv_sync_request(from, to, offset, 643 MIN(bp->bio_length, from->size - offset), 644 BIO_READ, NULL); 645 } 646 } 647 g_destroy_bio(bp); 648 /* Clean up if there was an error. */ 649 if (err) { 650 to->flags &= ~GV_PLEX_SYNCING; 651 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 652 } 653 654 /* Check if all plexes are synced, and lower refcounts. */ 655 g_topology_lock(); 656 LIST_FOREACH(p, &v->plexes, in_volume) { 657 if (p->flags & GV_PLEX_SYNCING) { 658 g_topology_unlock(); 659 return (-1); 660 } 661 } 662 /* If we came here, all plexes are synced, and we're free. */ 663 gv_access(v->provider, -1, -1, 0); 664 g_topology_unlock(); 665 G_VINUM_DEBUG(1, "plex sync completed"); 666 gv_volume_flush(v); 667 return (0); 668 } 669 670 /* 671 * Create a new bio struct for the next grow request. 672 */ 673 int 674 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 675 caddr_t data) 676 { 677 struct gv_softc *sc; 678 struct bio *bp; 679 680 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 681 sc = p->vinumconf; 682 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 683 684 bp = g_new_bio(); 685 if (bp == NULL) { 686 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 687 "out of memory", p->name); 688 return (ENOMEM); 689 } 690 691 bp->bio_cmd = type; 692 bp->bio_done = NULL; 693 bp->bio_error = 0; 694 bp->bio_caller1 = p; 695 bp->bio_offset = offset; 696 bp->bio_length = length; 697 bp->bio_pflags |= GV_BIO_GROW; 698 if (data == NULL) 699 data = g_malloc(length, M_WAITOK); 700 bp->bio_pflags |= GV_BIO_MALLOC; 701 bp->bio_data = data; 702 703 gv_post_bio(sc, bp); 704 //gv_plex_start(p, bp); 705 return (0); 706 } 707 708 /* 709 * Finish handling of a bio to a growing plex. 710 */ 711 void 712 gv_grow_complete(struct gv_plex *p, struct bio *bp) 713 { 714 struct gv_softc *sc; 715 struct gv_sd *s; 716 struct gv_volume *v; 717 off_t origsize, offset; 718 int sdcount, err; 719 720 v = p->vol_sc; 721 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 722 sc = v->vinumconf; 723 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 724 err = 0; 725 726 /* If it was a read, write it. */ 727 if (bp->bio_cmd == BIO_READ) { 728 p->synced += bp->bio_length; 729 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 730 BIO_WRITE, bp->bio_data); 731 /* If it was a write, read next. */ 732 } else if (bp->bio_cmd == BIO_WRITE) { 733 if (bp->bio_pflags & GV_BIO_MALLOC) 734 g_free(bp->bio_data); 735 736 /* Find the real size of the plex. */ 737 sdcount = gv_sdcount(p, 1); 738 s = LIST_FIRST(&p->subdisks); 739 KASSERT(s != NULL, ("NULL s")); 740 origsize = (s->size * (sdcount - 1)); 741 if (bp->bio_offset + bp->bio_length >= origsize) { 742 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 743 p->flags &= ~GV_PLEX_GROWING; 744 LIST_FOREACH(s, &p->subdisks, in_plex) { 745 s->flags &= ~GV_SD_GROW; 746 gv_set_sd_state(s, GV_SD_UP, 0); 747 } 748 p->size = gv_plex_size(p); 749 gv_update_vol_size(v, gv_vol_size(v)); 750 gv_set_plex_state(p, GV_PLEX_UP, 0); 751 g_topology_lock(); 752 gv_access(v->provider, -1, -1, 0); 753 g_topology_unlock(); 754 p->synced = 0; 755 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 756 /* Issue delayed requests. */ 757 gv_plex_flush(p); 758 } else { 759 offset = bp->bio_offset + bp->bio_length; 760 err = gv_grow_request(p, offset, 761 MIN(bp->bio_length, origsize - offset), 762 BIO_READ, NULL); 763 } 764 } 765 g_destroy_bio(bp); 766 767 if (err) { 768 p->flags &= ~GV_PLEX_GROWING; 769 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 770 } 771 } 772 773 /* 774 * Create an initialization BIO and send it off to the consumer. Assume that 775 * we're given initialization data as parameter. 776 */ 777 void 778 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 779 { 780 struct gv_drive *d; 781 struct g_consumer *cp; 782 struct bio *bp, *cbp; 783 784 KASSERT(s != NULL, ("gv_init_request: NULL s")); 785 d = s->drive_sc; 786 KASSERT(d != NULL, ("gv_init_request: NULL d")); 787 cp = d->consumer; 788 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 789 790 bp = g_new_bio(); 791 if (bp == NULL) { 792 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 793 " (drive offset %jd); out of memory", s->name, 794 (intmax_t)s->initialized, (intmax_t)start); 795 return; /* XXX: Error codes. */ 796 } 797 bp->bio_cmd = BIO_WRITE; 798 bp->bio_data = data; 799 bp->bio_done = NULL; 800 bp->bio_error = 0; 801 bp->bio_length = length; 802 bp->bio_pflags |= GV_BIO_INIT; 803 bp->bio_offset = start; 804 bp->bio_caller1 = s; 805 806 /* Then ofcourse, we have to clone it. */ 807 cbp = g_clone_bio(bp); 808 if (cbp == NULL) { 809 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 810 " (drive offset %jd); out of memory", s->name, 811 (intmax_t)s->initialized, (intmax_t)start); 812 return; /* XXX: Error codes. */ 813 } 814 cbp->bio_done = gv_done; 815 cbp->bio_caller1 = s; 816 d->active++; 817 /* Send it off to the consumer. */ 818 g_io_request(cbp, cp); 819 } 820 821 /* 822 * Handle a finished initialization BIO. 823 */ 824 void 825 gv_init_complete(struct gv_plex *p, struct bio *bp) 826 { 827 struct gv_softc *sc; 828 struct gv_drive *d; 829 struct g_consumer *cp; 830 struct gv_sd *s; 831 off_t start, length; 832 caddr_t data; 833 int error; 834 835 s = bp->bio_caller1; 836 start = bp->bio_offset; 837 length = bp->bio_length; 838 error = bp->bio_error; 839 data = bp->bio_data; 840 841 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 842 d = s->drive_sc; 843 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 844 cp = d->consumer; 845 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 846 sc = p->vinumconf; 847 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 848 849 g_destroy_bio(bp); 850 851 /* 852 * First we need to find out if it was okay, and abort if it's not. 853 * Then we need to free previous buffers, find out the correct subdisk, 854 * as well as getting the correct starting point and length of the BIO. 855 */ 856 if (start >= s->drive_offset + s->size) { 857 /* Free the data we initialized. */ 858 g_free(data); 859 g_topology_assert_not(); 860 g_topology_lock(); 861 g_access(cp, 0, -1, 0); 862 g_topology_unlock(); 863 if (error) { 864 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 865 GV_SETSTATE_CONFIG); 866 } else { 867 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 868 s->initialized = 0; 869 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 870 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 871 "successfully", s->name); 872 } 873 return; 874 } 875 s->initialized += length; 876 start += length; 877 gv_init_request(s, start, data, length); 878 } 879 880 /* 881 * Create a new bio struct for the next parity rebuild. Used both by internal 882 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 883 */ 884 void 885 gv_parity_request(struct gv_plex *p, int flags, off_t offset) 886 { 887 struct gv_softc *sc; 888 struct bio *bp; 889 890 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 891 sc = p->vinumconf; 892 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 893 894 bp = g_new_bio(); 895 if (bp == NULL) { 896 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 897 "out of memory", p->name); 898 return; 899 } 900 901 bp->bio_cmd = BIO_WRITE; 902 bp->bio_done = NULL; 903 bp->bio_error = 0; 904 bp->bio_length = p->stripesize; 905 bp->bio_caller1 = p; 906 907 /* 908 * Check if it's a rebuild of a degraded plex or a user request of 909 * parity rebuild. 910 */ 911 if (flags & GV_BIO_REBUILD) 912 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 913 else if (flags & GV_BIO_CHECK) 914 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 915 else { 916 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 917 return; 918 } 919 920 bp->bio_pflags = flags; 921 bp->bio_pflags |= GV_BIO_MALLOC; 922 923 /* We still have more parity to build. */ 924 bp->bio_offset = offset; 925 gv_post_bio(sc, bp); 926 //gv_plex_start(p, bp); /* Send it down to the plex. */ 927 } 928 929 /* 930 * Handle a finished parity write. 931 */ 932 void 933 gv_parity_complete(struct gv_plex *p, struct bio *bp) 934 { 935 struct gv_softc *sc; 936 int error, flags; 937 938 error = bp->bio_error; 939 flags = bp->bio_pflags; 940 flags &= ~GV_BIO_MALLOC; 941 942 sc = p->vinumconf; 943 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 944 945 /* Clean up what we allocated. */ 946 if (bp->bio_pflags & GV_BIO_MALLOC) 947 g_free(bp->bio_data); 948 g_destroy_bio(bp); 949 950 if (error == EAGAIN) { 951 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 952 (intmax_t)p->synced); 953 } 954 955 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 956 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 957 /* Make sure we don't have the lock. */ 958 g_topology_assert_not(); 959 g_topology_lock(); 960 gv_access(p->vol_sc->provider, -1, -1, 0); 961 g_topology_unlock(); 962 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 963 "errno %d", p->name, (intmax_t)p->synced, error); 964 return; 965 } else { 966 p->synced += p->stripesize; 967 } 968 969 if (p->synced >= p->size) { 970 /* Make sure we don't have the lock. */ 971 g_topology_assert_not(); 972 g_topology_lock(); 973 gv_access(p->vol_sc->provider, -1, -1, 0); 974 g_topology_unlock(); 975 /* We're finished. */ 976 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 977 p->synced = 0; 978 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 979 return; 980 } 981 982 /* Send down next. It will determine if we need to itself. */ 983 gv_parity_request(p, flags, p->synced); 984 } 985 986 /* 987 * Handle a finished plex rebuild bio. 988 */ 989 void 990 gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 991 { 992 struct gv_softc *sc; 993 struct gv_sd *s; 994 int error, flags; 995 off_t offset; 996 997 error = bp->bio_error; 998 flags = bp->bio_pflags; 999 offset = bp->bio_offset; 1000 flags &= ~GV_BIO_MALLOC; 1001 sc = p->vinumconf; 1002 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1003 1004 /* Clean up what we allocated. */ 1005 if (bp->bio_pflags & GV_BIO_MALLOC) 1006 g_free(bp->bio_data); 1007 g_destroy_bio(bp); 1008 1009 if (error) { 1010 g_topology_assert_not(); 1011 g_topology_lock(); 1012 gv_access(p->vol_sc->provider, -1, -1, 0); 1013 g_topology_unlock(); 1014 1015 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1016 p->name, (intmax_t)offset, error); 1017 p->flags &= ~GV_PLEX_REBUILDING; 1018 p->synced = 0; 1019 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1020 return; 1021 } 1022 1023 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1024 if (offset >= p->size) { 1025 /* We're finished. */ 1026 g_topology_assert_not(); 1027 g_topology_lock(); 1028 gv_access(p->vol_sc->provider, -1, -1, 0); 1029 g_topology_unlock(); 1030 1031 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1032 gv_save_config(p->vinumconf); 1033 p->flags &= ~GV_PLEX_REBUILDING; 1034 p->synced = 0; 1035 /* Try to up all subdisks. */ 1036 LIST_FOREACH(s, &p->subdisks, in_plex) 1037 gv_update_sd_state(s); 1038 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1039 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1040 return; 1041 } 1042 1043 /* Send down next. It will determine if we need to itself. */ 1044 gv_parity_request(p, flags, offset); 1045 } 1046