1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004, 2007 Lukas Ertl 5 * Copyright (c) 2007, 2009 Ulf Lilleengen 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/bio.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/systm.h> 36 37 #include <geom/geom.h> 38 #include <geom/geom_dbg.h> 39 #include <geom/vinum/geom_vinum_var.h> 40 #include <geom/vinum/geom_vinum_raid5.h> 41 #include <geom/vinum/geom_vinum.h> 42 43 static int gv_check_parity(struct gv_plex *, struct bio *, 44 struct gv_raid5_packet *); 45 static int gv_normal_parity(struct gv_plex *, struct bio *, 46 struct gv_raid5_packet *); 47 static void gv_plex_flush(struct gv_plex *); 48 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 49 int *, int); 50 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 51 off_t, caddr_t); 52 static void gv_post_bio(struct gv_softc *, struct bio *); 53 54 void 55 gv_plex_start(struct gv_plex *p, struct bio *bp) 56 { 57 struct bio *cbp; 58 struct gv_sd *s; 59 struct gv_raid5_packet *wp; 60 caddr_t addr; 61 off_t bcount, boff, len; 62 63 bcount = bp->bio_length; 64 addr = bp->bio_data; 65 boff = bp->bio_offset; 66 67 /* Walk over the whole length of the request, we might split it up. */ 68 while (bcount > 0) { 69 wp = NULL; 70 71 /* 72 * RAID5 plexes need special treatment, as a single request 73 * might involve several read/write sub-requests. 74 */ 75 if (p->org == GV_PLEX_RAID5) { 76 wp = gv_raid5_start(p, bp, addr, boff, bcount); 77 if (wp == NULL) 78 return; 79 80 len = wp->length; 81 82 if (TAILQ_EMPTY(&wp->bits)) 83 g_free(wp); 84 else if (wp->lockbase != -1) 85 TAILQ_INSERT_TAIL(&p->packets, wp, list); 86 87 /* 88 * Requests to concatenated and striped plexes go straight 89 * through. 90 */ 91 } else { 92 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 93 } 94 if (len < 0) 95 return; 96 97 bcount -= len; 98 addr += len; 99 boff += len; 100 } 101 102 /* 103 * Fire off all sub-requests. We get the correct consumer (== drive) 104 * to send each request to via the subdisk that was stored in 105 * cbp->bio_caller1. 106 */ 107 cbp = bioq_takefirst(p->bqueue); 108 while (cbp != NULL) { 109 /* 110 * RAID5 sub-requests need to come in correct order, otherwise 111 * we trip over the parity, as it might be overwritten by 112 * another sub-request. We abuse cbp->bio_caller2 to mark 113 * potential overlap situations. 114 */ 115 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 116 /* Park the bio on the waiting queue. */ 117 cbp->bio_pflags |= GV_BIO_ONHOLD; 118 bioq_disksort(p->wqueue, cbp); 119 } else { 120 s = cbp->bio_caller1; 121 g_io_request(cbp, s->drive_sc->consumer); 122 } 123 cbp = bioq_takefirst(p->bqueue); 124 } 125 } 126 127 static int 128 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 129 off_t *real_len, int *sdno, int growing) 130 { 131 struct gv_sd *s; 132 int i, sdcount; 133 off_t len_left, stripeend, stripeno, stripestart; 134 135 switch (p->org) { 136 case GV_PLEX_CONCAT: 137 /* 138 * Find the subdisk where this request starts. The subdisks in 139 * this list must be ordered by plex_offset. 140 */ 141 i = 0; 142 LIST_FOREACH(s, &p->subdisks, in_plex) { 143 if (s->plex_offset <= boff && 144 s->plex_offset + s->size > boff) { 145 *sdno = i; 146 break; 147 } 148 i++; 149 } 150 if (s == NULL || s->drive_sc == NULL) 151 return (GV_ERR_NOTFOUND); 152 153 /* Calculate corresponding offsets on disk. */ 154 *real_off = boff - s->plex_offset; 155 len_left = s->size - (*real_off); 156 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 157 *real_len = (bcount > len_left) ? len_left : bcount; 158 break; 159 160 case GV_PLEX_STRIPED: 161 /* The number of the stripe where the request starts. */ 162 stripeno = boff / p->stripesize; 163 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 164 165 /* Take growing subdisks into account when calculating. */ 166 sdcount = gv_sdcount(p, (boff >= p->synced)); 167 168 if (!(boff + bcount <= p->synced) && 169 (p->flags & GV_PLEX_GROWING) && 170 !growing) 171 return (GV_ERR_ISBUSY); 172 *sdno = stripeno % sdcount; 173 174 KASSERT(*sdno >= 0, ("gv_plex_offset: sdno < 0")); 175 stripestart = (stripeno / sdcount) * 176 p->stripesize; 177 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 178 stripeend = stripestart + p->stripesize; 179 *real_off = boff - (stripeno * p->stripesize) + 180 stripestart; 181 len_left = stripeend - *real_off; 182 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 183 184 *real_len = (bcount <= len_left) ? bcount : len_left; 185 break; 186 187 default: 188 return (GV_ERR_PLEXORG); 189 } 190 return (0); 191 } 192 193 /* 194 * Prepare a normal plex request. 195 */ 196 static int 197 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 198 off_t bcount, caddr_t addr) 199 { 200 struct gv_sd *s; 201 struct bio *cbp; 202 off_t real_len, real_off; 203 int i, err, sdno; 204 205 s = NULL; 206 sdno = -1; 207 real_len = real_off = 0; 208 209 err = ENXIO; 210 211 if (p == NULL || LIST_EMPTY(&p->subdisks)) 212 goto bad; 213 214 err = gv_plex_offset(p, boff, bcount, &real_off, 215 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); 216 /* If the request was blocked, put it into wait. */ 217 if (err == GV_ERR_ISBUSY) { 218 bioq_disksort(p->rqueue, bp); 219 return (-1); /* "Fail", and delay request. */ 220 } 221 if (err) { 222 err = ENXIO; 223 goto bad; 224 } 225 err = ENXIO; 226 227 /* Find the right subdisk. */ 228 i = 0; 229 LIST_FOREACH(s, &p->subdisks, in_plex) { 230 if (i == sdno) 231 break; 232 i++; 233 } 234 235 /* Subdisk not found. */ 236 if (s == NULL || s->drive_sc == NULL) 237 goto bad; 238 239 /* Now check if we can handle the request on this subdisk. */ 240 switch (s->state) { 241 case GV_SD_UP: 242 /* If the subdisk is up, just continue. */ 243 break; 244 case GV_SD_DOWN: 245 if (bp->bio_pflags & GV_BIO_INTERNAL) 246 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 247 " order to perform administrative requests"); 248 goto bad; 249 case GV_SD_STALE: 250 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { 251 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 252 "regular requests"); 253 goto bad; 254 } 255 256 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 257 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 258 break; 259 case GV_SD_INITIALIZING: 260 if (bp->bio_cmd == BIO_READ) 261 goto bad; 262 break; 263 default: 264 /* All other subdisk states mean it's not accessible. */ 265 goto bad; 266 } 267 268 /* Clone the bio and adjust the offsets and sizes. */ 269 cbp = g_clone_bio(bp); 270 if (cbp == NULL) { 271 err = ENOMEM; 272 goto bad; 273 } 274 cbp->bio_offset = real_off + s->drive_offset; 275 cbp->bio_length = real_len; 276 cbp->bio_data = addr; 277 cbp->bio_done = gv_done; 278 cbp->bio_caller1 = s; 279 s->drive_sc->active++; 280 281 /* Store the sub-requests now and let others issue them. */ 282 bioq_insert_tail(p->bqueue, cbp); 283 return (real_len); 284 bad: 285 G_VINUM_LOGREQ(0, bp, "plex request failed."); 286 /* Building the sub-request failed. If internal BIO, do not deliver. */ 287 if (bp->bio_pflags & GV_BIO_INTERNAL) { 288 if (bp->bio_pflags & GV_BIO_MALLOC) 289 g_free(bp->bio_data); 290 g_destroy_bio(bp); 291 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 292 GV_PLEX_GROWING); 293 return (-1); 294 } 295 g_io_deliver(bp, err); 296 return (-1); 297 } 298 299 /* 300 * Handle a completed request to a striped or concatenated plex. 301 */ 302 void 303 gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 304 { 305 struct bio *pbp; 306 307 pbp = bp->bio_parent; 308 if (pbp->bio_error == 0) 309 pbp->bio_error = bp->bio_error; 310 g_destroy_bio(bp); 311 pbp->bio_inbed++; 312 if (pbp->bio_children == pbp->bio_inbed) { 313 /* Just set it to length since multiple plexes will 314 * screw things up. */ 315 pbp->bio_completed = pbp->bio_length; 316 if (pbp->bio_pflags & GV_BIO_SYNCREQ) 317 gv_sync_complete(p, pbp); 318 else if (pbp->bio_pflags & GV_BIO_GROW) 319 gv_grow_complete(p, pbp); 320 else 321 g_io_deliver(pbp, pbp->bio_error); 322 } 323 } 324 325 /* 326 * Handle a completed request to a RAID-5 plex. 327 */ 328 void 329 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 330 { 331 struct gv_softc *sc; 332 struct bio *cbp, *pbp; 333 struct gv_bioq *bq, *bq2; 334 struct gv_raid5_packet *wp; 335 off_t completed; 336 int i; 337 338 completed = 0; 339 sc = p->vinumconf; 340 wp = bp->bio_caller2; 341 342 switch (bp->bio_parent->bio_cmd) { 343 case BIO_READ: 344 if (wp == NULL) { 345 completed = bp->bio_completed; 346 break; 347 } 348 349 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 350 if (bq->bp != bp) 351 continue; 352 TAILQ_REMOVE(&wp->bits, bq, queue); 353 g_free(bq); 354 for (i = 0; i < wp->length; i++) 355 wp->data[i] ^= bp->bio_data[i]; 356 break; 357 } 358 if (TAILQ_EMPTY(&wp->bits)) { 359 completed = wp->length; 360 if (wp->lockbase != -1) { 361 TAILQ_REMOVE(&p->packets, wp, list); 362 /* Bring the waiting bios back into the game. */ 363 pbp = bioq_takefirst(p->wqueue); 364 while (pbp != NULL) { 365 gv_post_bio(sc, pbp); 366 pbp = bioq_takefirst(p->wqueue); 367 } 368 } 369 g_free(wp); 370 } 371 372 break; 373 374 case BIO_WRITE: 375 /* XXX can this ever happen? */ 376 if (wp == NULL) { 377 completed = bp->bio_completed; 378 break; 379 } 380 381 /* Check if we need to handle parity data. */ 382 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 383 if (bq->bp != bp) 384 continue; 385 TAILQ_REMOVE(&wp->bits, bq, queue); 386 g_free(bq); 387 cbp = wp->parity; 388 if (cbp != NULL) { 389 for (i = 0; i < wp->length; i++) 390 cbp->bio_data[i] ^= bp->bio_data[i]; 391 } 392 break; 393 } 394 395 /* Handle parity data. */ 396 if (TAILQ_EMPTY(&wp->bits)) { 397 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) 398 i = gv_check_parity(p, bp, wp); 399 else 400 i = gv_normal_parity(p, bp, wp); 401 402 /* All of our sub-requests have finished. */ 403 if (i) { 404 completed = wp->length; 405 TAILQ_REMOVE(&p->packets, wp, list); 406 /* Bring the waiting bios back into the game. */ 407 pbp = bioq_takefirst(p->wqueue); 408 while (pbp != NULL) { 409 gv_post_bio(sc, pbp); 410 pbp = bioq_takefirst(p->wqueue); 411 } 412 g_free(wp); 413 } 414 } 415 416 break; 417 } 418 419 pbp = bp->bio_parent; 420 if (pbp->bio_error == 0) 421 pbp->bio_error = bp->bio_error; 422 pbp->bio_completed += completed; 423 424 /* When the original request is finished, we deliver it. */ 425 pbp->bio_inbed++; 426 if (pbp->bio_inbed == pbp->bio_children) { 427 /* Hand it over for checking or delivery. */ 428 if (pbp->bio_cmd == BIO_WRITE && 429 (pbp->bio_pflags & GV_BIO_CHECK)) { 430 gv_parity_complete(p, pbp); 431 } else if (pbp->bio_cmd == BIO_WRITE && 432 (pbp->bio_pflags & GV_BIO_REBUILD)) { 433 gv_rebuild_complete(p, pbp); 434 } else if (pbp->bio_pflags & GV_BIO_INIT) { 435 gv_init_complete(p, pbp); 436 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 437 gv_sync_complete(p, pbp); 438 } else if (pbp->bio_pflags & GV_BIO_GROW) { 439 gv_grow_complete(p, pbp); 440 } else { 441 g_io_deliver(pbp, pbp->bio_error); 442 } 443 } 444 445 /* Clean up what we allocated. */ 446 if (bp->bio_cflags & GV_BIO_MALLOC) 447 g_free(bp->bio_data); 448 g_destroy_bio(bp); 449 } 450 451 static int 452 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 453 { 454 struct bio *pbp; 455 struct gv_sd *s; 456 int err, finished, i; 457 458 err = 0; 459 finished = 1; 460 461 if (wp->waiting != NULL) { 462 pbp = wp->waiting; 463 wp->waiting = NULL; 464 s = pbp->bio_caller1; 465 g_io_request(pbp, s->drive_sc->consumer); 466 finished = 0; 467 468 } else if (wp->parity != NULL) { 469 pbp = wp->parity; 470 wp->parity = NULL; 471 472 /* Check if the parity is correct. */ 473 for (i = 0; i < wp->length; i++) { 474 if (bp->bio_data[i] != pbp->bio_data[i]) { 475 err = 1; 476 break; 477 } 478 } 479 480 /* The parity is not correct... */ 481 if (err) { 482 bp->bio_parent->bio_error = EAGAIN; 483 484 /* ... but we rebuild it. */ 485 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { 486 s = pbp->bio_caller1; 487 g_io_request(pbp, s->drive_sc->consumer); 488 finished = 0; 489 } 490 } 491 492 /* 493 * Clean up the BIO we would have used for rebuilding the 494 * parity. 495 */ 496 if (finished) { 497 bp->bio_parent->bio_inbed++; 498 g_destroy_bio(pbp); 499 } 500 } 501 502 return (finished); 503 } 504 505 static int 506 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 507 { 508 struct bio *cbp, *pbp; 509 struct gv_sd *s; 510 int finished, i; 511 512 finished = 1; 513 514 if (wp->waiting != NULL) { 515 pbp = wp->waiting; 516 wp->waiting = NULL; 517 cbp = wp->parity; 518 for (i = 0; i < wp->length; i++) 519 cbp->bio_data[i] ^= pbp->bio_data[i]; 520 s = pbp->bio_caller1; 521 g_io_request(pbp, s->drive_sc->consumer); 522 finished = 0; 523 524 } else if (wp->parity != NULL) { 525 cbp = wp->parity; 526 wp->parity = NULL; 527 s = cbp->bio_caller1; 528 g_io_request(cbp, s->drive_sc->consumer); 529 finished = 0; 530 } 531 532 return (finished); 533 } 534 535 /* Flush the queue with delayed requests. */ 536 static void 537 gv_plex_flush(struct gv_plex *p) 538 { 539 struct bio *bp; 540 541 bp = bioq_takefirst(p->rqueue); 542 while (bp != NULL) { 543 gv_plex_start(p, bp); 544 bp = bioq_takefirst(p->rqueue); 545 } 546 } 547 548 static void 549 gv_post_bio(struct gv_softc *sc, struct bio *bp) 550 { 551 552 KASSERT(sc != NULL, ("NULL sc")); 553 KASSERT(bp != NULL, ("NULL bp")); 554 mtx_lock(&sc->bqueue_mtx); 555 bioq_disksort(sc->bqueue_down, bp); 556 wakeup(sc); 557 mtx_unlock(&sc->bqueue_mtx); 558 } 559 560 int 561 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 562 off_t length, int type, caddr_t data) 563 { 564 struct gv_softc *sc; 565 struct bio *bp; 566 567 KASSERT(from != NULL, ("NULL from")); 568 KASSERT(to != NULL, ("NULL to")); 569 sc = from->vinumconf; 570 KASSERT(sc != NULL, ("NULL sc")); 571 572 bp = g_new_bio(); 573 if (bp == NULL) { 574 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 575 " %jd; out of memory", from->name, offset); 576 return (ENOMEM); 577 } 578 bp->bio_length = length; 579 bp->bio_done = NULL; 580 bp->bio_pflags |= GV_BIO_SYNCREQ; 581 bp->bio_offset = offset; 582 bp->bio_caller1 = from; 583 bp->bio_caller2 = to; 584 bp->bio_cmd = type; 585 if (data == NULL) 586 data = g_malloc(length, M_WAITOK); 587 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ 588 bp->bio_data = data; 589 590 /* Send down next. */ 591 gv_post_bio(sc, bp); 592 //gv_plex_start(from, bp); 593 return (0); 594 } 595 596 /* 597 * Handle a finished plex sync bio. 598 */ 599 int 600 gv_sync_complete(struct gv_plex *to, struct bio *bp) 601 { 602 struct gv_plex *from, *p; 603 struct gv_sd *s; 604 struct gv_volume *v; 605 struct gv_softc *sc; 606 off_t offset; 607 int err; 608 609 g_topology_assert_not(); 610 611 err = 0; 612 KASSERT(to != NULL, ("NULL to")); 613 KASSERT(bp != NULL, ("NULL bp")); 614 from = bp->bio_caller2; 615 KASSERT(from != NULL, ("NULL from")); 616 v = to->vol_sc; 617 KASSERT(v != NULL, ("NULL v")); 618 sc = v->vinumconf; 619 KASSERT(sc != NULL, ("NULL sc")); 620 621 /* If it was a read, write it. */ 622 if (bp->bio_cmd == BIO_READ) { 623 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 624 BIO_WRITE, bp->bio_data); 625 /* If it was a write, read the next one. */ 626 } else if (bp->bio_cmd == BIO_WRITE) { 627 if (bp->bio_pflags & GV_BIO_MALLOC) 628 g_free(bp->bio_data); 629 to->synced += bp->bio_length; 630 /* If we're finished, clean up. */ 631 if (bp->bio_offset + bp->bio_length >= from->size) { 632 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 633 to->name, from->name); 634 /* Update our state. */ 635 LIST_FOREACH(s, &to->subdisks, in_plex) 636 gv_set_sd_state(s, GV_SD_UP, 0); 637 gv_update_plex_state(to); 638 to->flags &= ~GV_PLEX_SYNCING; 639 to->synced = 0; 640 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 641 } else { 642 offset = bp->bio_offset + bp->bio_length; 643 err = gv_sync_request(from, to, offset, 644 MIN(bp->bio_length, from->size - offset), 645 BIO_READ, NULL); 646 } 647 } 648 g_destroy_bio(bp); 649 /* Clean up if there was an error. */ 650 if (err) { 651 to->flags &= ~GV_PLEX_SYNCING; 652 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 653 } 654 655 /* Check if all plexes are synced, and lower refcounts. */ 656 g_topology_lock(); 657 LIST_FOREACH(p, &v->plexes, in_volume) { 658 if (p->flags & GV_PLEX_SYNCING) { 659 g_topology_unlock(); 660 return (-1); 661 } 662 } 663 /* If we came here, all plexes are synced, and we're free. */ 664 gv_access(v->provider, -1, -1, 0); 665 g_topology_unlock(); 666 G_VINUM_DEBUG(1, "plex sync completed"); 667 gv_volume_flush(v); 668 return (0); 669 } 670 671 /* 672 * Create a new bio struct for the next grow request. 673 */ 674 int 675 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 676 caddr_t data) 677 { 678 struct gv_softc *sc; 679 struct bio *bp; 680 681 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 682 sc = p->vinumconf; 683 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 684 685 bp = g_new_bio(); 686 if (bp == NULL) { 687 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 688 "out of memory", p->name); 689 return (ENOMEM); 690 } 691 692 bp->bio_cmd = type; 693 bp->bio_done = NULL; 694 bp->bio_error = 0; 695 bp->bio_caller1 = p; 696 bp->bio_offset = offset; 697 bp->bio_length = length; 698 bp->bio_pflags |= GV_BIO_GROW; 699 if (data == NULL) 700 data = g_malloc(length, M_WAITOK); 701 bp->bio_pflags |= GV_BIO_MALLOC; 702 bp->bio_data = data; 703 704 gv_post_bio(sc, bp); 705 //gv_plex_start(p, bp); 706 return (0); 707 } 708 709 /* 710 * Finish handling of a bio to a growing plex. 711 */ 712 void 713 gv_grow_complete(struct gv_plex *p, struct bio *bp) 714 { 715 struct gv_softc *sc; 716 struct gv_sd *s; 717 struct gv_volume *v; 718 off_t origsize, offset; 719 int sdcount, err; 720 721 v = p->vol_sc; 722 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 723 sc = v->vinumconf; 724 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 725 err = 0; 726 727 /* If it was a read, write it. */ 728 if (bp->bio_cmd == BIO_READ) { 729 p->synced += bp->bio_length; 730 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 731 BIO_WRITE, bp->bio_data); 732 /* If it was a write, read next. */ 733 } else if (bp->bio_cmd == BIO_WRITE) { 734 if (bp->bio_pflags & GV_BIO_MALLOC) 735 g_free(bp->bio_data); 736 737 /* Find the real size of the plex. */ 738 sdcount = gv_sdcount(p, 1); 739 s = LIST_FIRST(&p->subdisks); 740 KASSERT(s != NULL, ("NULL s")); 741 origsize = (s->size * (sdcount - 1)); 742 if (bp->bio_offset + bp->bio_length >= origsize) { 743 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 744 p->flags &= ~GV_PLEX_GROWING; 745 LIST_FOREACH(s, &p->subdisks, in_plex) { 746 s->flags &= ~GV_SD_GROW; 747 gv_set_sd_state(s, GV_SD_UP, 0); 748 } 749 p->size = gv_plex_size(p); 750 gv_update_vol_size(v, gv_vol_size(v)); 751 gv_set_plex_state(p, GV_PLEX_UP, 0); 752 g_topology_lock(); 753 gv_access(v->provider, -1, -1, 0); 754 g_topology_unlock(); 755 p->synced = 0; 756 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 757 /* Issue delayed requests. */ 758 gv_plex_flush(p); 759 } else { 760 offset = bp->bio_offset + bp->bio_length; 761 err = gv_grow_request(p, offset, 762 MIN(bp->bio_length, origsize - offset), 763 BIO_READ, NULL); 764 } 765 } 766 g_destroy_bio(bp); 767 768 if (err) { 769 p->flags &= ~GV_PLEX_GROWING; 770 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 771 } 772 } 773 774 /* 775 * Create an initialization BIO and send it off to the consumer. Assume that 776 * we're given initialization data as parameter. 777 */ 778 void 779 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 780 { 781 struct gv_drive *d; 782 struct g_consumer *cp; 783 struct bio *bp, *cbp; 784 785 KASSERT(s != NULL, ("gv_init_request: NULL s")); 786 d = s->drive_sc; 787 KASSERT(d != NULL, ("gv_init_request: NULL d")); 788 cp = d->consumer; 789 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 790 791 bp = g_new_bio(); 792 if (bp == NULL) { 793 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 794 " (drive offset %jd); out of memory", s->name, 795 (intmax_t)s->initialized, (intmax_t)start); 796 return; /* XXX: Error codes. */ 797 } 798 bp->bio_cmd = BIO_WRITE; 799 bp->bio_data = data; 800 bp->bio_done = NULL; 801 bp->bio_error = 0; 802 bp->bio_length = length; 803 bp->bio_pflags |= GV_BIO_INIT; 804 bp->bio_offset = start; 805 bp->bio_caller1 = s; 806 807 /* Then ofcourse, we have to clone it. */ 808 cbp = g_clone_bio(bp); 809 if (cbp == NULL) { 810 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 811 " (drive offset %jd); out of memory", s->name, 812 (intmax_t)s->initialized, (intmax_t)start); 813 return; /* XXX: Error codes. */ 814 } 815 cbp->bio_done = gv_done; 816 cbp->bio_caller1 = s; 817 d->active++; 818 /* Send it off to the consumer. */ 819 g_io_request(cbp, cp); 820 } 821 822 /* 823 * Handle a finished initialization BIO. 824 */ 825 void 826 gv_init_complete(struct gv_plex *p, struct bio *bp) 827 { 828 struct gv_softc *sc; 829 struct gv_drive *d; 830 struct g_consumer *cp; 831 struct gv_sd *s; 832 off_t start, length; 833 caddr_t data; 834 int error; 835 836 s = bp->bio_caller1; 837 start = bp->bio_offset; 838 length = bp->bio_length; 839 error = bp->bio_error; 840 data = bp->bio_data; 841 842 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 843 d = s->drive_sc; 844 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 845 cp = d->consumer; 846 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 847 sc = p->vinumconf; 848 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 849 850 g_destroy_bio(bp); 851 852 /* 853 * First we need to find out if it was okay, and abort if it's not. 854 * Then we need to free previous buffers, find out the correct subdisk, 855 * as well as getting the correct starting point and length of the BIO. 856 */ 857 if (start >= s->drive_offset + s->size) { 858 /* Free the data we initialized. */ 859 g_free(data); 860 g_topology_assert_not(); 861 g_topology_lock(); 862 g_access(cp, 0, -1, 0); 863 g_topology_unlock(); 864 if (error) { 865 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 866 GV_SETSTATE_CONFIG); 867 } else { 868 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 869 s->initialized = 0; 870 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 871 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 872 "successfully", s->name); 873 } 874 return; 875 } 876 s->initialized += length; 877 start += length; 878 gv_init_request(s, start, data, length); 879 } 880 881 /* 882 * Create a new bio struct for the next parity rebuild. Used both by internal 883 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 884 */ 885 void 886 gv_parity_request(struct gv_plex *p, int flags, off_t offset) 887 { 888 struct gv_softc *sc; 889 struct bio *bp; 890 891 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 892 sc = p->vinumconf; 893 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 894 895 bp = g_new_bio(); 896 if (bp == NULL) { 897 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 898 "out of memory", p->name); 899 return; 900 } 901 902 bp->bio_cmd = BIO_WRITE; 903 bp->bio_done = NULL; 904 bp->bio_error = 0; 905 bp->bio_length = p->stripesize; 906 bp->bio_caller1 = p; 907 908 /* 909 * Check if it's a rebuild of a degraded plex or a user request of 910 * parity rebuild. 911 */ 912 if (flags & GV_BIO_REBUILD) 913 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 914 else if (flags & GV_BIO_CHECK) 915 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 916 else { 917 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 918 return; 919 } 920 921 bp->bio_pflags = flags; 922 bp->bio_pflags |= GV_BIO_MALLOC; 923 924 /* We still have more parity to build. */ 925 bp->bio_offset = offset; 926 gv_post_bio(sc, bp); 927 //gv_plex_start(p, bp); /* Send it down to the plex. */ 928 } 929 930 /* 931 * Handle a finished parity write. 932 */ 933 void 934 gv_parity_complete(struct gv_plex *p, struct bio *bp) 935 { 936 struct gv_softc *sc; 937 int error, flags; 938 939 error = bp->bio_error; 940 flags = bp->bio_pflags; 941 flags &= ~GV_BIO_MALLOC; 942 943 sc = p->vinumconf; 944 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 945 946 /* Clean up what we allocated. */ 947 if (bp->bio_pflags & GV_BIO_MALLOC) 948 g_free(bp->bio_data); 949 g_destroy_bio(bp); 950 951 if (error == EAGAIN) { 952 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 953 (intmax_t)p->synced); 954 } 955 956 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 957 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 958 /* Make sure we don't have the lock. */ 959 g_topology_assert_not(); 960 g_topology_lock(); 961 gv_access(p->vol_sc->provider, -1, -1, 0); 962 g_topology_unlock(); 963 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 964 "errno %d", p->name, (intmax_t)p->synced, error); 965 return; 966 } else { 967 p->synced += p->stripesize; 968 } 969 970 if (p->synced >= p->size) { 971 /* Make sure we don't have the lock. */ 972 g_topology_assert_not(); 973 g_topology_lock(); 974 gv_access(p->vol_sc->provider, -1, -1, 0); 975 g_topology_unlock(); 976 /* We're finished. */ 977 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 978 p->synced = 0; 979 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 980 return; 981 } 982 983 /* Send down next. It will determine if we need to itself. */ 984 gv_parity_request(p, flags, p->synced); 985 } 986 987 /* 988 * Handle a finished plex rebuild bio. 989 */ 990 void 991 gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 992 { 993 struct gv_softc *sc; 994 struct gv_sd *s; 995 int error, flags; 996 off_t offset; 997 998 error = bp->bio_error; 999 flags = bp->bio_pflags; 1000 offset = bp->bio_offset; 1001 flags &= ~GV_BIO_MALLOC; 1002 sc = p->vinumconf; 1003 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1004 1005 /* Clean up what we allocated. */ 1006 if (bp->bio_pflags & GV_BIO_MALLOC) 1007 g_free(bp->bio_data); 1008 g_destroy_bio(bp); 1009 1010 if (error) { 1011 g_topology_assert_not(); 1012 g_topology_lock(); 1013 gv_access(p->vol_sc->provider, -1, -1, 0); 1014 g_topology_unlock(); 1015 1016 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1017 p->name, (intmax_t)offset, error); 1018 p->flags &= ~GV_PLEX_REBUILDING; 1019 p->synced = 0; 1020 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1021 return; 1022 } 1023 1024 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1025 if (offset >= p->size) { 1026 /* We're finished. */ 1027 g_topology_assert_not(); 1028 g_topology_lock(); 1029 gv_access(p->vol_sc->provider, -1, -1, 0); 1030 g_topology_unlock(); 1031 1032 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1033 gv_save_config(p->vinumconf); 1034 p->flags &= ~GV_PLEX_REBUILDING; 1035 p->synced = 0; 1036 /* Try to up all subdisks. */ 1037 LIST_FOREACH(s, &p->subdisks, in_plex) 1038 gv_update_sd_state(s); 1039 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1040 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1041 return; 1042 } 1043 1044 /* Send down next. It will determine if we need to itself. */ 1045 gv_parity_request(p, flags, offset); 1046 } 1047