1 /*- 2 * Copyright (c) 2004, 2007 Lukas Ertl 3 * Copyright (c) 2007, 2009 Ulf Lilleengen 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/bio.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/systm.h> 36 37 #include <geom/geom.h> 38 #include <geom/vinum/geom_vinum_var.h> 39 #include <geom/vinum/geom_vinum_raid5.h> 40 #include <geom/vinum/geom_vinum.h> 41 42 static int gv_check_parity(struct gv_plex *, struct bio *, 43 struct gv_raid5_packet *); 44 static int gv_normal_parity(struct gv_plex *, struct bio *, 45 struct gv_raid5_packet *); 46 static void gv_plex_flush(struct gv_plex *); 47 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 48 int *, int); 49 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 50 off_t, caddr_t); 51 static void gv_post_bio(struct gv_softc *, struct bio *); 52 53 void 54 gv_plex_start(struct gv_plex *p, struct bio *bp) 55 { 56 struct bio *cbp; 57 struct gv_sd *s; 58 struct gv_raid5_packet *wp; 59 caddr_t addr; 60 off_t bcount, boff, len; 61 62 bcount = bp->bio_length; 63 addr = bp->bio_data; 64 boff = bp->bio_offset; 65 66 /* Walk over the whole length of the request, we might split it up. */ 67 while (bcount > 0) { 68 wp = NULL; 69 70 /* 71 * RAID5 plexes need special treatment, as a single request 72 * might involve several read/write sub-requests. 73 */ 74 if (p->org == GV_PLEX_RAID5) { 75 wp = gv_raid5_start(p, bp, addr, boff, bcount); 76 if (wp == NULL) 77 return; 78 79 len = wp->length; 80 81 if (TAILQ_EMPTY(&wp->bits)) 82 g_free(wp); 83 else if (wp->lockbase != -1) 84 TAILQ_INSERT_TAIL(&p->packets, wp, list); 85 86 /* 87 * Requests to concatenated and striped plexes go straight 88 * through. 89 */ 90 } else { 91 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 92 } 93 if (len < 0) 94 return; 95 96 bcount -= len; 97 addr += len; 98 boff += len; 99 } 100 101 /* 102 * Fire off all sub-requests. We get the correct consumer (== drive) 103 * to send each request to via the subdisk that was stored in 104 * cbp->bio_caller1. 105 */ 106 cbp = bioq_takefirst(p->bqueue); 107 while (cbp != NULL) { 108 /* 109 * RAID5 sub-requests need to come in correct order, otherwise 110 * we trip over the parity, as it might be overwritten by 111 * another sub-request. We abuse cbp->bio_caller2 to mark 112 * potential overlap situations. 113 */ 114 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 115 /* Park the bio on the waiting queue. */ 116 cbp->bio_pflags |= GV_BIO_ONHOLD; 117 bioq_disksort(p->wqueue, cbp); 118 } else { 119 s = cbp->bio_caller1; 120 g_io_request(cbp, s->drive_sc->consumer); 121 } 122 cbp = bioq_takefirst(p->bqueue); 123 } 124 } 125 126 static int 127 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 128 off_t *real_len, int *sdno, int growing) 129 { 130 struct gv_sd *s; 131 int i, sdcount; 132 off_t len_left, stripeend, stripeno, stripestart; 133 134 switch (p->org) { 135 case GV_PLEX_CONCAT: 136 /* 137 * Find the subdisk where this request starts. The subdisks in 138 * this list must be ordered by plex_offset. 139 */ 140 i = 0; 141 LIST_FOREACH(s, &p->subdisks, in_plex) { 142 if (s->plex_offset <= boff && 143 s->plex_offset + s->size > boff) { 144 *sdno = i; 145 break; 146 } 147 i++; 148 } 149 if (s == NULL || s->drive_sc == NULL) 150 return (GV_ERR_NOTFOUND); 151 152 /* Calculate corresponding offsets on disk. */ 153 *real_off = boff - s->plex_offset; 154 len_left = s->size - (*real_off); 155 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 156 *real_len = (bcount > len_left) ? len_left : bcount; 157 break; 158 159 case GV_PLEX_STRIPED: 160 /* The number of the stripe where the request starts. */ 161 stripeno = boff / p->stripesize; 162 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 163 164 /* Take growing subdisks into account when calculating. */ 165 sdcount = gv_sdcount(p, (boff >= p->synced)); 166 167 if (!(boff + bcount <= p->synced) && 168 (p->flags & GV_PLEX_GROWING) && 169 !growing) 170 return (GV_ERR_ISBUSY); 171 *sdno = stripeno % sdcount; 172 173 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); 174 stripestart = (stripeno / sdcount) * 175 p->stripesize; 176 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 177 stripeend = stripestart + p->stripesize; 178 *real_off = boff - (stripeno * p->stripesize) + 179 stripestart; 180 len_left = stripeend - *real_off; 181 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 182 183 *real_len = (bcount <= len_left) ? bcount : len_left; 184 break; 185 186 default: 187 return (GV_ERR_PLEXORG); 188 } 189 return (0); 190 } 191 192 /* 193 * Prepare a normal plex request. 194 */ 195 static int 196 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 197 off_t bcount, caddr_t addr) 198 { 199 struct gv_sd *s; 200 struct bio *cbp; 201 off_t real_len, real_off; 202 int i, err, sdno; 203 204 s = NULL; 205 sdno = -1; 206 real_len = real_off = 0; 207 208 err = ENXIO; 209 210 if (p == NULL || LIST_EMPTY(&p->subdisks)) 211 goto bad; 212 213 err = gv_plex_offset(p, boff, bcount, &real_off, 214 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); 215 /* If the request was blocked, put it into wait. */ 216 if (err == GV_ERR_ISBUSY) { 217 bioq_disksort(p->rqueue, bp); 218 return (-1); /* "Fail", and delay request. */ 219 } 220 if (err) { 221 err = ENXIO; 222 goto bad; 223 } 224 err = ENXIO; 225 226 /* Find the right subdisk. */ 227 i = 0; 228 LIST_FOREACH(s, &p->subdisks, in_plex) { 229 if (i == sdno) 230 break; 231 i++; 232 } 233 234 /* Subdisk not found. */ 235 if (s == NULL || s->drive_sc == NULL) 236 goto bad; 237 238 /* Now check if we can handle the request on this subdisk. */ 239 switch (s->state) { 240 case GV_SD_UP: 241 /* If the subdisk is up, just continue. */ 242 break; 243 case GV_SD_DOWN: 244 if (bp->bio_pflags & GV_BIO_INTERNAL) 245 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 246 " order to perform administrative requests"); 247 goto bad; 248 case GV_SD_STALE: 249 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { 250 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 251 "regular requests"); 252 goto bad; 253 } 254 255 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 256 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 257 break; 258 case GV_SD_INITIALIZING: 259 if (bp->bio_cmd == BIO_READ) 260 goto bad; 261 break; 262 default: 263 /* All other subdisk states mean it's not accessible. */ 264 goto bad; 265 } 266 267 /* Clone the bio and adjust the offsets and sizes. */ 268 cbp = g_clone_bio(bp); 269 if (cbp == NULL) { 270 err = ENOMEM; 271 goto bad; 272 } 273 cbp->bio_offset = real_off + s->drive_offset; 274 cbp->bio_length = real_len; 275 cbp->bio_data = addr; 276 cbp->bio_done = gv_done; 277 cbp->bio_caller1 = s; 278 279 /* Store the sub-requests now and let others issue them. */ 280 bioq_insert_tail(p->bqueue, cbp); 281 return (real_len); 282 bad: 283 G_VINUM_LOGREQ(0, bp, "plex request failed."); 284 /* Building the sub-request failed. If internal BIO, do not deliver. */ 285 if (bp->bio_pflags & GV_BIO_INTERNAL) { 286 if (bp->bio_pflags & GV_BIO_MALLOC) 287 g_free(bp->bio_data); 288 g_destroy_bio(bp); 289 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 290 GV_PLEX_GROWING); 291 return (-1); 292 } 293 g_io_deliver(bp, err); 294 return (-1); 295 } 296 297 /* 298 * Handle a completed request to a striped or concatenated plex. 299 */ 300 void 301 gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 302 { 303 struct bio *pbp; 304 305 pbp = bp->bio_parent; 306 if (pbp->bio_error == 0) 307 pbp->bio_error = bp->bio_error; 308 g_destroy_bio(bp); 309 pbp->bio_inbed++; 310 if (pbp->bio_children == pbp->bio_inbed) { 311 /* Just set it to length since multiple plexes will 312 * screw things up. */ 313 pbp->bio_completed = pbp->bio_length; 314 if (pbp->bio_pflags & GV_BIO_SYNCREQ) 315 gv_sync_complete(p, pbp); 316 else if (pbp->bio_pflags & GV_BIO_GROW) 317 gv_grow_complete(p, pbp); 318 else 319 g_io_deliver(pbp, pbp->bio_error); 320 } 321 } 322 323 /* 324 * Handle a completed request to a RAID-5 plex. 325 */ 326 void 327 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 328 { 329 struct gv_softc *sc; 330 struct bio *cbp, *pbp; 331 struct gv_bioq *bq, *bq2; 332 struct gv_raid5_packet *wp; 333 off_t completed; 334 int i; 335 336 completed = 0; 337 sc = p->vinumconf; 338 wp = bp->bio_caller2; 339 340 switch (bp->bio_parent->bio_cmd) { 341 case BIO_READ: 342 if (wp == NULL) { 343 completed = bp->bio_completed; 344 break; 345 } 346 347 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 348 if (bq->bp != bp) 349 continue; 350 TAILQ_REMOVE(&wp->bits, bq, queue); 351 g_free(bq); 352 for (i = 0; i < wp->length; i++) 353 wp->data[i] ^= bp->bio_data[i]; 354 break; 355 } 356 if (TAILQ_EMPTY(&wp->bits)) { 357 completed = wp->length; 358 if (wp->lockbase != -1) { 359 TAILQ_REMOVE(&p->packets, wp, list); 360 /* Bring the waiting bios back into the game. */ 361 pbp = bioq_takefirst(p->wqueue); 362 while (pbp != NULL) { 363 gv_post_bio(sc, pbp); 364 pbp = bioq_takefirst(p->wqueue); 365 } 366 } 367 g_free(wp); 368 } 369 370 break; 371 372 case BIO_WRITE: 373 /* XXX can this ever happen? */ 374 if (wp == NULL) { 375 completed = bp->bio_completed; 376 break; 377 } 378 379 /* Check if we need to handle parity data. */ 380 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 381 if (bq->bp != bp) 382 continue; 383 TAILQ_REMOVE(&wp->bits, bq, queue); 384 g_free(bq); 385 cbp = wp->parity; 386 if (cbp != NULL) { 387 for (i = 0; i < wp->length; i++) 388 cbp->bio_data[i] ^= bp->bio_data[i]; 389 } 390 break; 391 } 392 393 /* Handle parity data. */ 394 if (TAILQ_EMPTY(&wp->bits)) { 395 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) 396 i = gv_check_parity(p, bp, wp); 397 else 398 i = gv_normal_parity(p, bp, wp); 399 400 /* All of our sub-requests have finished. */ 401 if (i) { 402 completed = wp->length; 403 TAILQ_REMOVE(&p->packets, wp, list); 404 /* Bring the waiting bios back into the game. */ 405 pbp = bioq_takefirst(p->wqueue); 406 while (pbp != NULL) { 407 gv_post_bio(sc, pbp); 408 pbp = bioq_takefirst(p->wqueue); 409 } 410 g_free(wp); 411 } 412 } 413 414 break; 415 } 416 417 pbp = bp->bio_parent; 418 if (pbp->bio_error == 0) 419 pbp->bio_error = bp->bio_error; 420 pbp->bio_completed += completed; 421 422 /* When the original request is finished, we deliver it. */ 423 pbp->bio_inbed++; 424 if (pbp->bio_inbed == pbp->bio_children) { 425 /* Hand it over for checking or delivery. */ 426 if (pbp->bio_cmd == BIO_WRITE && 427 (pbp->bio_pflags & GV_BIO_CHECK)) { 428 gv_parity_complete(p, pbp); 429 } else if (pbp->bio_cmd == BIO_WRITE && 430 (pbp->bio_pflags & GV_BIO_REBUILD)) { 431 gv_rebuild_complete(p, pbp); 432 } else if (pbp->bio_pflags & GV_BIO_INIT) { 433 gv_init_complete(p, pbp); 434 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 435 gv_sync_complete(p, pbp); 436 } else if (pbp->bio_pflags & GV_BIO_GROW) { 437 gv_grow_complete(p, pbp); 438 } else { 439 g_io_deliver(pbp, pbp->bio_error); 440 } 441 } 442 443 /* Clean up what we allocated. */ 444 if (bp->bio_cflags & GV_BIO_MALLOC) 445 g_free(bp->bio_data); 446 g_destroy_bio(bp); 447 } 448 449 static int 450 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 451 { 452 struct bio *pbp; 453 struct gv_sd *s; 454 int err, finished, i; 455 456 err = 0; 457 finished = 1; 458 459 if (wp->waiting != NULL) { 460 pbp = wp->waiting; 461 wp->waiting = NULL; 462 s = pbp->bio_caller1; 463 g_io_request(pbp, s->drive_sc->consumer); 464 finished = 0; 465 466 } else if (wp->parity != NULL) { 467 pbp = wp->parity; 468 wp->parity = NULL; 469 470 /* Check if the parity is correct. */ 471 for (i = 0; i < wp->length; i++) { 472 if (bp->bio_data[i] != pbp->bio_data[i]) { 473 err = 1; 474 break; 475 } 476 } 477 478 /* The parity is not correct... */ 479 if (err) { 480 bp->bio_parent->bio_error = EAGAIN; 481 482 /* ... but we rebuild it. */ 483 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { 484 s = pbp->bio_caller1; 485 g_io_request(pbp, s->drive_sc->consumer); 486 finished = 0; 487 } 488 } 489 490 /* 491 * Clean up the BIO we would have used for rebuilding the 492 * parity. 493 */ 494 if (finished) { 495 bp->bio_parent->bio_inbed++; 496 g_destroy_bio(pbp); 497 } 498 499 } 500 501 return (finished); 502 } 503 504 static int 505 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 506 { 507 struct bio *cbp, *pbp; 508 struct gv_sd *s; 509 int finished, i; 510 511 finished = 1; 512 513 if (wp->waiting != NULL) { 514 pbp = wp->waiting; 515 wp->waiting = NULL; 516 cbp = wp->parity; 517 for (i = 0; i < wp->length; i++) 518 cbp->bio_data[i] ^= pbp->bio_data[i]; 519 s = pbp->bio_caller1; 520 g_io_request(pbp, s->drive_sc->consumer); 521 finished = 0; 522 523 } else if (wp->parity != NULL) { 524 cbp = wp->parity; 525 wp->parity = NULL; 526 s = cbp->bio_caller1; 527 g_io_request(cbp, s->drive_sc->consumer); 528 finished = 0; 529 } 530 531 return (finished); 532 } 533 534 /* Flush the queue with delayed requests. */ 535 static void 536 gv_plex_flush(struct gv_plex *p) 537 { 538 struct gv_softc *sc; 539 struct bio *bp; 540 541 sc = p->vinumconf; 542 bp = bioq_takefirst(p->rqueue); 543 while (bp != NULL) { 544 gv_plex_start(p, bp); 545 bp = bioq_takefirst(p->rqueue); 546 } 547 } 548 549 static void 550 gv_post_bio(struct gv_softc *sc, struct bio *bp) 551 { 552 553 KASSERT(sc != NULL, ("NULL sc")); 554 KASSERT(bp != NULL, ("NULL bp")); 555 mtx_lock(&sc->bqueue_mtx); 556 bioq_disksort(sc->bqueue_down, bp); 557 wakeup(sc); 558 mtx_unlock(&sc->bqueue_mtx); 559 } 560 561 int 562 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 563 off_t length, int type, caddr_t data) 564 { 565 struct gv_softc *sc; 566 struct bio *bp; 567 568 KASSERT(from != NULL, ("NULL from")); 569 KASSERT(to != NULL, ("NULL to")); 570 sc = from->vinumconf; 571 KASSERT(sc != NULL, ("NULL sc")); 572 573 bp = g_new_bio(); 574 if (bp == NULL) { 575 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 576 " %jd; out of memory", from->name, offset); 577 return (ENOMEM); 578 } 579 bp->bio_length = length; 580 bp->bio_done = gv_done; 581 bp->bio_pflags |= GV_BIO_SYNCREQ; 582 bp->bio_offset = offset; 583 bp->bio_caller1 = from; 584 bp->bio_caller2 = to; 585 bp->bio_cmd = type; 586 if (data == NULL) 587 data = g_malloc(length, M_WAITOK); 588 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ 589 bp->bio_data = data; 590 591 /* Send down next. */ 592 gv_post_bio(sc, bp); 593 //gv_plex_start(from, bp); 594 return (0); 595 } 596 597 /* 598 * Handle a finished plex sync bio. 599 */ 600 int 601 gv_sync_complete(struct gv_plex *to, struct bio *bp) 602 { 603 struct gv_plex *from, *p; 604 struct gv_sd *s; 605 struct gv_volume *v; 606 struct gv_softc *sc; 607 off_t offset; 608 int err; 609 610 g_topology_assert_not(); 611 612 err = 0; 613 KASSERT(to != NULL, ("NULL to")); 614 KASSERT(bp != NULL, ("NULL bp")); 615 from = bp->bio_caller2; 616 KASSERT(from != NULL, ("NULL from")); 617 v = to->vol_sc; 618 KASSERT(v != NULL, ("NULL v")); 619 sc = v->vinumconf; 620 KASSERT(sc != NULL, ("NULL sc")); 621 622 /* If it was a read, write it. */ 623 if (bp->bio_cmd == BIO_READ) { 624 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 625 BIO_WRITE, bp->bio_data); 626 /* If it was a write, read the next one. */ 627 } else if (bp->bio_cmd == BIO_WRITE) { 628 if (bp->bio_pflags & GV_BIO_MALLOC) 629 g_free(bp->bio_data); 630 to->synced += bp->bio_length; 631 /* If we're finished, clean up. */ 632 if (bp->bio_offset + bp->bio_length >= from->size) { 633 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 634 to->name, from->name); 635 /* Update our state. */ 636 LIST_FOREACH(s, &to->subdisks, in_plex) 637 gv_set_sd_state(s, GV_SD_UP, 0); 638 gv_update_plex_state(to); 639 to->flags &= ~GV_PLEX_SYNCING; 640 to->synced = 0; 641 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 642 } else { 643 offset = bp->bio_offset + bp->bio_length; 644 err = gv_sync_request(from, to, offset, 645 MIN(bp->bio_length, from->size - offset), 646 BIO_READ, NULL); 647 } 648 } 649 g_destroy_bio(bp); 650 /* Clean up if there was an error. */ 651 if (err) { 652 to->flags &= ~GV_PLEX_SYNCING; 653 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 654 } 655 656 /* Check if all plexes are synced, and lower refcounts. */ 657 g_topology_lock(); 658 LIST_FOREACH(p, &v->plexes, in_volume) { 659 if (p->flags & GV_PLEX_SYNCING) { 660 g_topology_unlock(); 661 return (-1); 662 } 663 } 664 /* If we came here, all plexes are synced, and we're free. */ 665 gv_access(v->provider, -1, -1, 0); 666 g_topology_unlock(); 667 G_VINUM_DEBUG(1, "plex sync completed"); 668 gv_volume_flush(v); 669 return (0); 670 } 671 672 /* 673 * Create a new bio struct for the next grow request. 674 */ 675 int 676 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 677 caddr_t data) 678 { 679 struct gv_softc *sc; 680 struct bio *bp; 681 682 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 683 sc = p->vinumconf; 684 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 685 686 bp = g_new_bio(); 687 if (bp == NULL) { 688 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 689 "out of memory", p->name); 690 return (ENOMEM); 691 } 692 693 bp->bio_cmd = type; 694 bp->bio_done = gv_done; 695 bp->bio_error = 0; 696 bp->bio_caller1 = p; 697 bp->bio_offset = offset; 698 bp->bio_length = length; 699 bp->bio_pflags |= GV_BIO_GROW; 700 if (data == NULL) 701 data = g_malloc(length, M_WAITOK); 702 bp->bio_pflags |= GV_BIO_MALLOC; 703 bp->bio_data = data; 704 705 gv_post_bio(sc, bp); 706 //gv_plex_start(p, bp); 707 return (0); 708 } 709 710 /* 711 * Finish handling of a bio to a growing plex. 712 */ 713 void 714 gv_grow_complete(struct gv_plex *p, struct bio *bp) 715 { 716 struct gv_softc *sc; 717 struct gv_sd *s; 718 struct gv_volume *v; 719 off_t origsize, offset; 720 int sdcount, err; 721 722 v = p->vol_sc; 723 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 724 sc = v->vinumconf; 725 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 726 err = 0; 727 728 /* If it was a read, write it. */ 729 if (bp->bio_cmd == BIO_READ) { 730 p->synced += bp->bio_length; 731 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 732 BIO_WRITE, bp->bio_data); 733 /* If it was a write, read next. */ 734 } else if (bp->bio_cmd == BIO_WRITE) { 735 if (bp->bio_pflags & GV_BIO_MALLOC) 736 g_free(bp->bio_data); 737 738 /* Find the real size of the plex. */ 739 sdcount = gv_sdcount(p, 1); 740 s = LIST_FIRST(&p->subdisks); 741 KASSERT(s != NULL, ("NULL s")); 742 origsize = (s->size * (sdcount - 1)); 743 if (bp->bio_offset + bp->bio_length >= origsize) { 744 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 745 p->flags &= ~GV_PLEX_GROWING; 746 LIST_FOREACH(s, &p->subdisks, in_plex) { 747 s->flags &= ~GV_SD_GROW; 748 gv_set_sd_state(s, GV_SD_UP, 0); 749 } 750 p->size = gv_plex_size(p); 751 gv_update_vol_size(v, gv_vol_size(v)); 752 gv_set_plex_state(p, GV_PLEX_UP, 0); 753 g_topology_lock(); 754 gv_access(v->provider, -1, -1, 0); 755 g_topology_unlock(); 756 p->synced = 0; 757 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 758 /* Issue delayed requests. */ 759 gv_plex_flush(p); 760 } else { 761 offset = bp->bio_offset + bp->bio_length; 762 err = gv_grow_request(p, offset, 763 MIN(bp->bio_length, origsize - offset), 764 BIO_READ, NULL); 765 } 766 } 767 g_destroy_bio(bp); 768 769 if (err) { 770 p->flags &= ~GV_PLEX_GROWING; 771 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 772 } 773 } 774 775 776 /* 777 * Create an initialization BIO and send it off to the consumer. Assume that 778 * we're given initialization data as parameter. 779 */ 780 void 781 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 782 { 783 struct gv_drive *d; 784 struct g_consumer *cp; 785 struct bio *bp, *cbp; 786 787 KASSERT(s != NULL, ("gv_init_request: NULL s")); 788 d = s->drive_sc; 789 KASSERT(d != NULL, ("gv_init_request: NULL d")); 790 cp = d->consumer; 791 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 792 793 bp = g_new_bio(); 794 if (bp == NULL) { 795 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 796 " (drive offset %jd); out of memory", s->name, 797 (intmax_t)s->initialized, (intmax_t)start); 798 return; /* XXX: Error codes. */ 799 } 800 bp->bio_cmd = BIO_WRITE; 801 bp->bio_data = data; 802 bp->bio_done = gv_done; 803 bp->bio_error = 0; 804 bp->bio_length = length; 805 bp->bio_pflags |= GV_BIO_INIT; 806 bp->bio_offset = start; 807 bp->bio_caller1 = s; 808 809 /* Then ofcourse, we have to clone it. */ 810 cbp = g_clone_bio(bp); 811 if (cbp == NULL) { 812 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 813 " (drive offset %jd); out of memory", s->name, 814 (intmax_t)s->initialized, (intmax_t)start); 815 return; /* XXX: Error codes. */ 816 } 817 cbp->bio_done = gv_done; 818 cbp->bio_caller1 = s; 819 /* Send it off to the consumer. */ 820 g_io_request(cbp, cp); 821 } 822 823 /* 824 * Handle a finished initialization BIO. 825 */ 826 void 827 gv_init_complete(struct gv_plex *p, struct bio *bp) 828 { 829 struct gv_softc *sc; 830 struct gv_drive *d; 831 struct g_consumer *cp; 832 struct gv_sd *s; 833 off_t start, length; 834 caddr_t data; 835 int error; 836 837 s = bp->bio_caller1; 838 start = bp->bio_offset; 839 length = bp->bio_length; 840 error = bp->bio_error; 841 data = bp->bio_data; 842 843 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 844 d = s->drive_sc; 845 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 846 cp = d->consumer; 847 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 848 sc = p->vinumconf; 849 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 850 851 g_destroy_bio(bp); 852 853 /* 854 * First we need to find out if it was okay, and abort if it's not. 855 * Then we need to free previous buffers, find out the correct subdisk, 856 * as well as getting the correct starting point and length of the BIO. 857 */ 858 if (start >= s->drive_offset + s->size) { 859 /* Free the data we initialized. */ 860 if (data != NULL) 861 g_free(data); 862 g_topology_assert_not(); 863 g_topology_lock(); 864 g_access(cp, 0, -1, 0); 865 g_topology_unlock(); 866 if (error) { 867 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 868 GV_SETSTATE_CONFIG); 869 } else { 870 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 871 s->initialized = 0; 872 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 873 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 874 "successfully", s->name); 875 } 876 return; 877 } 878 s->initialized += length; 879 start += length; 880 gv_init_request(s, start, data, length); 881 } 882 883 /* 884 * Create a new bio struct for the next parity rebuild. Used both by internal 885 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 886 */ 887 void 888 gv_parity_request(struct gv_plex *p, int flags, off_t offset) 889 { 890 struct gv_softc *sc; 891 struct bio *bp; 892 893 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 894 sc = p->vinumconf; 895 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 896 897 bp = g_new_bio(); 898 if (bp == NULL) { 899 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 900 "out of memory", p->name); 901 return; 902 } 903 904 bp->bio_cmd = BIO_WRITE; 905 bp->bio_done = gv_done; 906 bp->bio_error = 0; 907 bp->bio_length = p->stripesize; 908 bp->bio_caller1 = p; 909 910 /* 911 * Check if it's a rebuild of a degraded plex or a user request of 912 * parity rebuild. 913 */ 914 if (flags & GV_BIO_REBUILD) 915 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 916 else if (flags & GV_BIO_CHECK) 917 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 918 else { 919 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 920 return; 921 } 922 923 bp->bio_pflags = flags; 924 bp->bio_pflags |= GV_BIO_MALLOC; 925 926 /* We still have more parity to build. */ 927 bp->bio_offset = offset; 928 gv_post_bio(sc, bp); 929 //gv_plex_start(p, bp); /* Send it down to the plex. */ 930 } 931 932 /* 933 * Handle a finished parity write. 934 */ 935 void 936 gv_parity_complete(struct gv_plex *p, struct bio *bp) 937 { 938 struct gv_softc *sc; 939 int error, flags; 940 941 error = bp->bio_error; 942 flags = bp->bio_pflags; 943 flags &= ~GV_BIO_MALLOC; 944 945 sc = p->vinumconf; 946 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 947 948 /* Clean up what we allocated. */ 949 if (bp->bio_pflags & GV_BIO_MALLOC) 950 g_free(bp->bio_data); 951 g_destroy_bio(bp); 952 953 if (error == EAGAIN) { 954 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 955 (intmax_t)p->synced); 956 } 957 958 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 959 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 960 /* Make sure we don't have the lock. */ 961 g_topology_assert_not(); 962 g_topology_lock(); 963 gv_access(p->vol_sc->provider, -1, -1, 0); 964 g_topology_unlock(); 965 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 966 "errno %d", p->name, (intmax_t)p->synced, error); 967 return; 968 } else { 969 p->synced += p->stripesize; 970 } 971 972 if (p->synced >= p->size) { 973 /* Make sure we don't have the lock. */ 974 g_topology_assert_not(); 975 g_topology_lock(); 976 gv_access(p->vol_sc->provider, -1, -1, 0); 977 g_topology_unlock(); 978 /* We're finished. */ 979 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 980 p->synced = 0; 981 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 982 return; 983 } 984 985 /* Send down next. It will determine if we need to itself. */ 986 gv_parity_request(p, flags, p->synced); 987 } 988 989 /* 990 * Handle a finished plex rebuild bio. 991 */ 992 void 993 gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 994 { 995 struct gv_softc *sc; 996 struct gv_sd *s; 997 int error, flags; 998 off_t offset; 999 1000 error = bp->bio_error; 1001 flags = bp->bio_pflags; 1002 offset = bp->bio_offset; 1003 flags &= ~GV_BIO_MALLOC; 1004 sc = p->vinumconf; 1005 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1006 1007 /* Clean up what we allocated. */ 1008 if (bp->bio_pflags & GV_BIO_MALLOC) 1009 g_free(bp->bio_data); 1010 g_destroy_bio(bp); 1011 1012 if (error) { 1013 g_topology_assert_not(); 1014 g_topology_lock(); 1015 gv_access(p->vol_sc->provider, -1, -1, 0); 1016 g_topology_unlock(); 1017 1018 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1019 p->name, (intmax_t)offset, error); 1020 p->flags &= ~GV_PLEX_REBUILDING; 1021 p->synced = 0; 1022 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1023 return; 1024 } 1025 1026 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1027 if (offset >= p->size) { 1028 /* We're finished. */ 1029 g_topology_assert_not(); 1030 g_topology_lock(); 1031 gv_access(p->vol_sc->provider, -1, -1, 0); 1032 g_topology_unlock(); 1033 1034 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1035 gv_save_config(p->vinumconf); 1036 p->flags &= ~GV_PLEX_REBUILDING; 1037 p->synced = 0; 1038 /* Try to up all subdisks. */ 1039 LIST_FOREACH(s, &p->subdisks, in_plex) 1040 gv_update_sd_state(s); 1041 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1042 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1043 return; 1044 } 1045 1046 /* Send down next. It will determine if we need to itself. */ 1047 gv_parity_request(p, flags, offset); 1048 } 1049