1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004, 2007 Lukas Ertl 5 * Copyright (c) 2007, 2009 Ulf Lilleengen 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/bio.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/systm.h> 38 39 #include <geom/geom.h> 40 #include <geom/vinum/geom_vinum_var.h> 41 #include <geom/vinum/geom_vinum_raid5.h> 42 #include <geom/vinum/geom_vinum.h> 43 44 static int gv_check_parity(struct gv_plex *, struct bio *, 45 struct gv_raid5_packet *); 46 static int gv_normal_parity(struct gv_plex *, struct bio *, 47 struct gv_raid5_packet *); 48 static void gv_plex_flush(struct gv_plex *); 49 static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *, 50 int *, int); 51 static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t, 52 off_t, caddr_t); 53 static void gv_post_bio(struct gv_softc *, struct bio *); 54 55 void 56 gv_plex_start(struct gv_plex *p, struct bio *bp) 57 { 58 struct bio *cbp; 59 struct gv_sd *s; 60 struct gv_raid5_packet *wp; 61 caddr_t addr; 62 off_t bcount, boff, len; 63 64 bcount = bp->bio_length; 65 addr = bp->bio_data; 66 boff = bp->bio_offset; 67 68 /* Walk over the whole length of the request, we might split it up. */ 69 while (bcount > 0) { 70 wp = NULL; 71 72 /* 73 * RAID5 plexes need special treatment, as a single request 74 * might involve several read/write sub-requests. 75 */ 76 if (p->org == GV_PLEX_RAID5) { 77 wp = gv_raid5_start(p, bp, addr, boff, bcount); 78 if (wp == NULL) 79 return; 80 81 len = wp->length; 82 83 if (TAILQ_EMPTY(&wp->bits)) 84 g_free(wp); 85 else if (wp->lockbase != -1) 86 TAILQ_INSERT_TAIL(&p->packets, wp, list); 87 88 /* 89 * Requests to concatenated and striped plexes go straight 90 * through. 91 */ 92 } else { 93 len = gv_plex_normal_request(p, bp, boff, bcount, addr); 94 } 95 if (len < 0) 96 return; 97 98 bcount -= len; 99 addr += len; 100 boff += len; 101 } 102 103 /* 104 * Fire off all sub-requests. We get the correct consumer (== drive) 105 * to send each request to via the subdisk that was stored in 106 * cbp->bio_caller1. 107 */ 108 cbp = bioq_takefirst(p->bqueue); 109 while (cbp != NULL) { 110 /* 111 * RAID5 sub-requests need to come in correct order, otherwise 112 * we trip over the parity, as it might be overwritten by 113 * another sub-request. We abuse cbp->bio_caller2 to mark 114 * potential overlap situations. 115 */ 116 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) { 117 /* Park the bio on the waiting queue. */ 118 cbp->bio_pflags |= GV_BIO_ONHOLD; 119 bioq_disksort(p->wqueue, cbp); 120 } else { 121 s = cbp->bio_caller1; 122 g_io_request(cbp, s->drive_sc->consumer); 123 } 124 cbp = bioq_takefirst(p->bqueue); 125 } 126 } 127 128 static int 129 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off, 130 off_t *real_len, int *sdno, int growing) 131 { 132 struct gv_sd *s; 133 int i, sdcount; 134 off_t len_left, stripeend, stripeno, stripestart; 135 136 switch (p->org) { 137 case GV_PLEX_CONCAT: 138 /* 139 * Find the subdisk where this request starts. The subdisks in 140 * this list must be ordered by plex_offset. 141 */ 142 i = 0; 143 LIST_FOREACH(s, &p->subdisks, in_plex) { 144 if (s->plex_offset <= boff && 145 s->plex_offset + s->size > boff) { 146 *sdno = i; 147 break; 148 } 149 i++; 150 } 151 if (s == NULL || s->drive_sc == NULL) 152 return (GV_ERR_NOTFOUND); 153 154 /* Calculate corresponding offsets on disk. */ 155 *real_off = boff - s->plex_offset; 156 len_left = s->size - (*real_off); 157 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 158 *real_len = (bcount > len_left) ? len_left : bcount; 159 break; 160 161 case GV_PLEX_STRIPED: 162 /* The number of the stripe where the request starts. */ 163 stripeno = boff / p->stripesize; 164 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); 165 166 /* Take growing subdisks into account when calculating. */ 167 sdcount = gv_sdcount(p, (boff >= p->synced)); 168 169 if (!(boff + bcount <= p->synced) && 170 (p->flags & GV_PLEX_GROWING) && 171 !growing) 172 return (GV_ERR_ISBUSY); 173 *sdno = stripeno % sdcount; 174 175 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0")); 176 stripestart = (stripeno / sdcount) * 177 p->stripesize; 178 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0")); 179 stripeend = stripestart + p->stripesize; 180 *real_off = boff - (stripeno * p->stripesize) + 181 stripestart; 182 len_left = stripeend - *real_off; 183 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0")); 184 185 *real_len = (bcount <= len_left) ? bcount : len_left; 186 break; 187 188 default: 189 return (GV_ERR_PLEXORG); 190 } 191 return (0); 192 } 193 194 /* 195 * Prepare a normal plex request. 196 */ 197 static int 198 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff, 199 off_t bcount, caddr_t addr) 200 { 201 struct gv_sd *s; 202 struct bio *cbp; 203 off_t real_len, real_off; 204 int i, err, sdno; 205 206 s = NULL; 207 sdno = -1; 208 real_len = real_off = 0; 209 210 err = ENXIO; 211 212 if (p == NULL || LIST_EMPTY(&p->subdisks)) 213 goto bad; 214 215 err = gv_plex_offset(p, boff, bcount, &real_off, 216 &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW)); 217 /* If the request was blocked, put it into wait. */ 218 if (err == GV_ERR_ISBUSY) { 219 bioq_disksort(p->rqueue, bp); 220 return (-1); /* "Fail", and delay request. */ 221 } 222 if (err) { 223 err = ENXIO; 224 goto bad; 225 } 226 err = ENXIO; 227 228 /* Find the right subdisk. */ 229 i = 0; 230 LIST_FOREACH(s, &p->subdisks, in_plex) { 231 if (i == sdno) 232 break; 233 i++; 234 } 235 236 /* Subdisk not found. */ 237 if (s == NULL || s->drive_sc == NULL) 238 goto bad; 239 240 /* Now check if we can handle the request on this subdisk. */ 241 switch (s->state) { 242 case GV_SD_UP: 243 /* If the subdisk is up, just continue. */ 244 break; 245 case GV_SD_DOWN: 246 if (bp->bio_pflags & GV_BIO_INTERNAL) 247 G_VINUM_DEBUG(0, "subdisk must be in the stale state in" 248 " order to perform administrative requests"); 249 goto bad; 250 case GV_SD_STALE: 251 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) { 252 G_VINUM_DEBUG(0, "subdisk stale, unable to perform " 253 "regular requests"); 254 goto bad; 255 } 256 257 G_VINUM_DEBUG(1, "sd %s is initializing", s->name); 258 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 259 break; 260 case GV_SD_INITIALIZING: 261 if (bp->bio_cmd == BIO_READ) 262 goto bad; 263 break; 264 default: 265 /* All other subdisk states mean it's not accessible. */ 266 goto bad; 267 } 268 269 /* Clone the bio and adjust the offsets and sizes. */ 270 cbp = g_clone_bio(bp); 271 if (cbp == NULL) { 272 err = ENOMEM; 273 goto bad; 274 } 275 cbp->bio_offset = real_off + s->drive_offset; 276 cbp->bio_length = real_len; 277 cbp->bio_data = addr; 278 cbp->bio_done = gv_done; 279 cbp->bio_caller1 = s; 280 281 /* Store the sub-requests now and let others issue them. */ 282 bioq_insert_tail(p->bqueue, cbp); 283 return (real_len); 284 bad: 285 G_VINUM_LOGREQ(0, bp, "plex request failed."); 286 /* Building the sub-request failed. If internal BIO, do not deliver. */ 287 if (bp->bio_pflags & GV_BIO_INTERNAL) { 288 if (bp->bio_pflags & GV_BIO_MALLOC) 289 g_free(bp->bio_data); 290 g_destroy_bio(bp); 291 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING | 292 GV_PLEX_GROWING); 293 return (-1); 294 } 295 g_io_deliver(bp, err); 296 return (-1); 297 } 298 299 /* 300 * Handle a completed request to a striped or concatenated plex. 301 */ 302 void 303 gv_plex_normal_done(struct gv_plex *p, struct bio *bp) 304 { 305 struct bio *pbp; 306 307 pbp = bp->bio_parent; 308 if (pbp->bio_error == 0) 309 pbp->bio_error = bp->bio_error; 310 g_destroy_bio(bp); 311 pbp->bio_inbed++; 312 if (pbp->bio_children == pbp->bio_inbed) { 313 /* Just set it to length since multiple plexes will 314 * screw things up. */ 315 pbp->bio_completed = pbp->bio_length; 316 if (pbp->bio_pflags & GV_BIO_SYNCREQ) 317 gv_sync_complete(p, pbp); 318 else if (pbp->bio_pflags & GV_BIO_GROW) 319 gv_grow_complete(p, pbp); 320 else 321 g_io_deliver(pbp, pbp->bio_error); 322 } 323 } 324 325 /* 326 * Handle a completed request to a RAID-5 plex. 327 */ 328 void 329 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp) 330 { 331 struct gv_softc *sc; 332 struct bio *cbp, *pbp; 333 struct gv_bioq *bq, *bq2; 334 struct gv_raid5_packet *wp; 335 off_t completed; 336 int i; 337 338 completed = 0; 339 sc = p->vinumconf; 340 wp = bp->bio_caller2; 341 342 switch (bp->bio_parent->bio_cmd) { 343 case BIO_READ: 344 if (wp == NULL) { 345 completed = bp->bio_completed; 346 break; 347 } 348 349 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 350 if (bq->bp != bp) 351 continue; 352 TAILQ_REMOVE(&wp->bits, bq, queue); 353 g_free(bq); 354 for (i = 0; i < wp->length; i++) 355 wp->data[i] ^= bp->bio_data[i]; 356 break; 357 } 358 if (TAILQ_EMPTY(&wp->bits)) { 359 completed = wp->length; 360 if (wp->lockbase != -1) { 361 TAILQ_REMOVE(&p->packets, wp, list); 362 /* Bring the waiting bios back into the game. */ 363 pbp = bioq_takefirst(p->wqueue); 364 while (pbp != NULL) { 365 gv_post_bio(sc, pbp); 366 pbp = bioq_takefirst(p->wqueue); 367 } 368 } 369 g_free(wp); 370 } 371 372 break; 373 374 case BIO_WRITE: 375 /* XXX can this ever happen? */ 376 if (wp == NULL) { 377 completed = bp->bio_completed; 378 break; 379 } 380 381 /* Check if we need to handle parity data. */ 382 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) { 383 if (bq->bp != bp) 384 continue; 385 TAILQ_REMOVE(&wp->bits, bq, queue); 386 g_free(bq); 387 cbp = wp->parity; 388 if (cbp != NULL) { 389 for (i = 0; i < wp->length; i++) 390 cbp->bio_data[i] ^= bp->bio_data[i]; 391 } 392 break; 393 } 394 395 /* Handle parity data. */ 396 if (TAILQ_EMPTY(&wp->bits)) { 397 if (bp->bio_parent->bio_pflags & GV_BIO_CHECK) 398 i = gv_check_parity(p, bp, wp); 399 else 400 i = gv_normal_parity(p, bp, wp); 401 402 /* All of our sub-requests have finished. */ 403 if (i) { 404 completed = wp->length; 405 TAILQ_REMOVE(&p->packets, wp, list); 406 /* Bring the waiting bios back into the game. */ 407 pbp = bioq_takefirst(p->wqueue); 408 while (pbp != NULL) { 409 gv_post_bio(sc, pbp); 410 pbp = bioq_takefirst(p->wqueue); 411 } 412 g_free(wp); 413 } 414 } 415 416 break; 417 } 418 419 pbp = bp->bio_parent; 420 if (pbp->bio_error == 0) 421 pbp->bio_error = bp->bio_error; 422 pbp->bio_completed += completed; 423 424 /* When the original request is finished, we deliver it. */ 425 pbp->bio_inbed++; 426 if (pbp->bio_inbed == pbp->bio_children) { 427 /* Hand it over for checking or delivery. */ 428 if (pbp->bio_cmd == BIO_WRITE && 429 (pbp->bio_pflags & GV_BIO_CHECK)) { 430 gv_parity_complete(p, pbp); 431 } else if (pbp->bio_cmd == BIO_WRITE && 432 (pbp->bio_pflags & GV_BIO_REBUILD)) { 433 gv_rebuild_complete(p, pbp); 434 } else if (pbp->bio_pflags & GV_BIO_INIT) { 435 gv_init_complete(p, pbp); 436 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) { 437 gv_sync_complete(p, pbp); 438 } else if (pbp->bio_pflags & GV_BIO_GROW) { 439 gv_grow_complete(p, pbp); 440 } else { 441 g_io_deliver(pbp, pbp->bio_error); 442 } 443 } 444 445 /* Clean up what we allocated. */ 446 if (bp->bio_cflags & GV_BIO_MALLOC) 447 g_free(bp->bio_data); 448 g_destroy_bio(bp); 449 } 450 451 static int 452 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 453 { 454 struct bio *pbp; 455 struct gv_sd *s; 456 int err, finished, i; 457 458 err = 0; 459 finished = 1; 460 461 if (wp->waiting != NULL) { 462 pbp = wp->waiting; 463 wp->waiting = NULL; 464 s = pbp->bio_caller1; 465 g_io_request(pbp, s->drive_sc->consumer); 466 finished = 0; 467 468 } else if (wp->parity != NULL) { 469 pbp = wp->parity; 470 wp->parity = NULL; 471 472 /* Check if the parity is correct. */ 473 for (i = 0; i < wp->length; i++) { 474 if (bp->bio_data[i] != pbp->bio_data[i]) { 475 err = 1; 476 break; 477 } 478 } 479 480 /* The parity is not correct... */ 481 if (err) { 482 bp->bio_parent->bio_error = EAGAIN; 483 484 /* ... but we rebuild it. */ 485 if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) { 486 s = pbp->bio_caller1; 487 g_io_request(pbp, s->drive_sc->consumer); 488 finished = 0; 489 } 490 } 491 492 /* 493 * Clean up the BIO we would have used for rebuilding the 494 * parity. 495 */ 496 if (finished) { 497 bp->bio_parent->bio_inbed++; 498 g_destroy_bio(pbp); 499 } 500 501 } 502 503 return (finished); 504 } 505 506 static int 507 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp) 508 { 509 struct bio *cbp, *pbp; 510 struct gv_sd *s; 511 int finished, i; 512 513 finished = 1; 514 515 if (wp->waiting != NULL) { 516 pbp = wp->waiting; 517 wp->waiting = NULL; 518 cbp = wp->parity; 519 for (i = 0; i < wp->length; i++) 520 cbp->bio_data[i] ^= pbp->bio_data[i]; 521 s = pbp->bio_caller1; 522 g_io_request(pbp, s->drive_sc->consumer); 523 finished = 0; 524 525 } else if (wp->parity != NULL) { 526 cbp = wp->parity; 527 wp->parity = NULL; 528 s = cbp->bio_caller1; 529 g_io_request(cbp, s->drive_sc->consumer); 530 finished = 0; 531 } 532 533 return (finished); 534 } 535 536 /* Flush the queue with delayed requests. */ 537 static void 538 gv_plex_flush(struct gv_plex *p) 539 { 540 struct gv_softc *sc; 541 struct bio *bp; 542 543 sc = p->vinumconf; 544 bp = bioq_takefirst(p->rqueue); 545 while (bp != NULL) { 546 gv_plex_start(p, bp); 547 bp = bioq_takefirst(p->rqueue); 548 } 549 } 550 551 static void 552 gv_post_bio(struct gv_softc *sc, struct bio *bp) 553 { 554 555 KASSERT(sc != NULL, ("NULL sc")); 556 KASSERT(bp != NULL, ("NULL bp")); 557 mtx_lock(&sc->bqueue_mtx); 558 bioq_disksort(sc->bqueue_down, bp); 559 wakeup(sc); 560 mtx_unlock(&sc->bqueue_mtx); 561 } 562 563 int 564 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset, 565 off_t length, int type, caddr_t data) 566 { 567 struct gv_softc *sc; 568 struct bio *bp; 569 570 KASSERT(from != NULL, ("NULL from")); 571 KASSERT(to != NULL, ("NULL to")); 572 sc = from->vinumconf; 573 KASSERT(sc != NULL, ("NULL sc")); 574 575 bp = g_new_bio(); 576 if (bp == NULL) { 577 G_VINUM_DEBUG(0, "sync from '%s' failed at offset " 578 " %jd; out of memory", from->name, offset); 579 return (ENOMEM); 580 } 581 bp->bio_length = length; 582 bp->bio_done = gv_done; 583 bp->bio_pflags |= GV_BIO_SYNCREQ; 584 bp->bio_offset = offset; 585 bp->bio_caller1 = from; 586 bp->bio_caller2 = to; 587 bp->bio_cmd = type; 588 if (data == NULL) 589 data = g_malloc(length, M_WAITOK); 590 bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */ 591 bp->bio_data = data; 592 593 /* Send down next. */ 594 gv_post_bio(sc, bp); 595 //gv_plex_start(from, bp); 596 return (0); 597 } 598 599 /* 600 * Handle a finished plex sync bio. 601 */ 602 int 603 gv_sync_complete(struct gv_plex *to, struct bio *bp) 604 { 605 struct gv_plex *from, *p; 606 struct gv_sd *s; 607 struct gv_volume *v; 608 struct gv_softc *sc; 609 off_t offset; 610 int err; 611 612 g_topology_assert_not(); 613 614 err = 0; 615 KASSERT(to != NULL, ("NULL to")); 616 KASSERT(bp != NULL, ("NULL bp")); 617 from = bp->bio_caller2; 618 KASSERT(from != NULL, ("NULL from")); 619 v = to->vol_sc; 620 KASSERT(v != NULL, ("NULL v")); 621 sc = v->vinumconf; 622 KASSERT(sc != NULL, ("NULL sc")); 623 624 /* If it was a read, write it. */ 625 if (bp->bio_cmd == BIO_READ) { 626 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length, 627 BIO_WRITE, bp->bio_data); 628 /* If it was a write, read the next one. */ 629 } else if (bp->bio_cmd == BIO_WRITE) { 630 if (bp->bio_pflags & GV_BIO_MALLOC) 631 g_free(bp->bio_data); 632 to->synced += bp->bio_length; 633 /* If we're finished, clean up. */ 634 if (bp->bio_offset + bp->bio_length >= from->size) { 635 G_VINUM_DEBUG(1, "syncing of %s from %s completed", 636 to->name, from->name); 637 /* Update our state. */ 638 LIST_FOREACH(s, &to->subdisks, in_plex) 639 gv_set_sd_state(s, GV_SD_UP, 0); 640 gv_update_plex_state(to); 641 to->flags &= ~GV_PLEX_SYNCING; 642 to->synced = 0; 643 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 644 } else { 645 offset = bp->bio_offset + bp->bio_length; 646 err = gv_sync_request(from, to, offset, 647 MIN(bp->bio_length, from->size - offset), 648 BIO_READ, NULL); 649 } 650 } 651 g_destroy_bio(bp); 652 /* Clean up if there was an error. */ 653 if (err) { 654 to->flags &= ~GV_PLEX_SYNCING; 655 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err); 656 } 657 658 /* Check if all plexes are synced, and lower refcounts. */ 659 g_topology_lock(); 660 LIST_FOREACH(p, &v->plexes, in_volume) { 661 if (p->flags & GV_PLEX_SYNCING) { 662 g_topology_unlock(); 663 return (-1); 664 } 665 } 666 /* If we came here, all plexes are synced, and we're free. */ 667 gv_access(v->provider, -1, -1, 0); 668 g_topology_unlock(); 669 G_VINUM_DEBUG(1, "plex sync completed"); 670 gv_volume_flush(v); 671 return (0); 672 } 673 674 /* 675 * Create a new bio struct for the next grow request. 676 */ 677 int 678 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type, 679 caddr_t data) 680 { 681 struct gv_softc *sc; 682 struct bio *bp; 683 684 KASSERT(p != NULL, ("gv_grow_request: NULL p")); 685 sc = p->vinumconf; 686 KASSERT(sc != NULL, ("gv_grow_request: NULL sc")); 687 688 bp = g_new_bio(); 689 if (bp == NULL) { 690 G_VINUM_DEBUG(0, "grow of %s failed creating bio: " 691 "out of memory", p->name); 692 return (ENOMEM); 693 } 694 695 bp->bio_cmd = type; 696 bp->bio_done = gv_done; 697 bp->bio_error = 0; 698 bp->bio_caller1 = p; 699 bp->bio_offset = offset; 700 bp->bio_length = length; 701 bp->bio_pflags |= GV_BIO_GROW; 702 if (data == NULL) 703 data = g_malloc(length, M_WAITOK); 704 bp->bio_pflags |= GV_BIO_MALLOC; 705 bp->bio_data = data; 706 707 gv_post_bio(sc, bp); 708 //gv_plex_start(p, bp); 709 return (0); 710 } 711 712 /* 713 * Finish handling of a bio to a growing plex. 714 */ 715 void 716 gv_grow_complete(struct gv_plex *p, struct bio *bp) 717 { 718 struct gv_softc *sc; 719 struct gv_sd *s; 720 struct gv_volume *v; 721 off_t origsize, offset; 722 int sdcount, err; 723 724 v = p->vol_sc; 725 KASSERT(v != NULL, ("gv_grow_complete: NULL v")); 726 sc = v->vinumconf; 727 KASSERT(sc != NULL, ("gv_grow_complete: NULL sc")); 728 err = 0; 729 730 /* If it was a read, write it. */ 731 if (bp->bio_cmd == BIO_READ) { 732 p->synced += bp->bio_length; 733 err = gv_grow_request(p, bp->bio_offset, bp->bio_length, 734 BIO_WRITE, bp->bio_data); 735 /* If it was a write, read next. */ 736 } else if (bp->bio_cmd == BIO_WRITE) { 737 if (bp->bio_pflags & GV_BIO_MALLOC) 738 g_free(bp->bio_data); 739 740 /* Find the real size of the plex. */ 741 sdcount = gv_sdcount(p, 1); 742 s = LIST_FIRST(&p->subdisks); 743 KASSERT(s != NULL, ("NULL s")); 744 origsize = (s->size * (sdcount - 1)); 745 if (bp->bio_offset + bp->bio_length >= origsize) { 746 G_VINUM_DEBUG(1, "growing of %s completed", p->name); 747 p->flags &= ~GV_PLEX_GROWING; 748 LIST_FOREACH(s, &p->subdisks, in_plex) { 749 s->flags &= ~GV_SD_GROW; 750 gv_set_sd_state(s, GV_SD_UP, 0); 751 } 752 p->size = gv_plex_size(p); 753 gv_update_vol_size(v, gv_vol_size(v)); 754 gv_set_plex_state(p, GV_PLEX_UP, 0); 755 g_topology_lock(); 756 gv_access(v->provider, -1, -1, 0); 757 g_topology_unlock(); 758 p->synced = 0; 759 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 760 /* Issue delayed requests. */ 761 gv_plex_flush(p); 762 } else { 763 offset = bp->bio_offset + bp->bio_length; 764 err = gv_grow_request(p, offset, 765 MIN(bp->bio_length, origsize - offset), 766 BIO_READ, NULL); 767 } 768 } 769 g_destroy_bio(bp); 770 771 if (err) { 772 p->flags &= ~GV_PLEX_GROWING; 773 G_VINUM_DEBUG(0, "error growing plex: error code %d", err); 774 } 775 } 776 777 778 /* 779 * Create an initialization BIO and send it off to the consumer. Assume that 780 * we're given initialization data as parameter. 781 */ 782 void 783 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length) 784 { 785 struct gv_drive *d; 786 struct g_consumer *cp; 787 struct bio *bp, *cbp; 788 789 KASSERT(s != NULL, ("gv_init_request: NULL s")); 790 d = s->drive_sc; 791 KASSERT(d != NULL, ("gv_init_request: NULL d")); 792 cp = d->consumer; 793 KASSERT(cp != NULL, ("gv_init_request: NULL cp")); 794 795 bp = g_new_bio(); 796 if (bp == NULL) { 797 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 798 " (drive offset %jd); out of memory", s->name, 799 (intmax_t)s->initialized, (intmax_t)start); 800 return; /* XXX: Error codes. */ 801 } 802 bp->bio_cmd = BIO_WRITE; 803 bp->bio_data = data; 804 bp->bio_done = gv_done; 805 bp->bio_error = 0; 806 bp->bio_length = length; 807 bp->bio_pflags |= GV_BIO_INIT; 808 bp->bio_offset = start; 809 bp->bio_caller1 = s; 810 811 /* Then ofcourse, we have to clone it. */ 812 cbp = g_clone_bio(bp); 813 if (cbp == NULL) { 814 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd" 815 " (drive offset %jd); out of memory", s->name, 816 (intmax_t)s->initialized, (intmax_t)start); 817 return; /* XXX: Error codes. */ 818 } 819 cbp->bio_done = gv_done; 820 cbp->bio_caller1 = s; 821 /* Send it off to the consumer. */ 822 g_io_request(cbp, cp); 823 } 824 825 /* 826 * Handle a finished initialization BIO. 827 */ 828 void 829 gv_init_complete(struct gv_plex *p, struct bio *bp) 830 { 831 struct gv_softc *sc; 832 struct gv_drive *d; 833 struct g_consumer *cp; 834 struct gv_sd *s; 835 off_t start, length; 836 caddr_t data; 837 int error; 838 839 s = bp->bio_caller1; 840 start = bp->bio_offset; 841 length = bp->bio_length; 842 error = bp->bio_error; 843 data = bp->bio_data; 844 845 KASSERT(s != NULL, ("gv_init_complete: NULL s")); 846 d = s->drive_sc; 847 KASSERT(d != NULL, ("gv_init_complete: NULL d")); 848 cp = d->consumer; 849 KASSERT(cp != NULL, ("gv_init_complete: NULL cp")); 850 sc = p->vinumconf; 851 KASSERT(sc != NULL, ("gv_init_complete: NULL sc")); 852 853 g_destroy_bio(bp); 854 855 /* 856 * First we need to find out if it was okay, and abort if it's not. 857 * Then we need to free previous buffers, find out the correct subdisk, 858 * as well as getting the correct starting point and length of the BIO. 859 */ 860 if (start >= s->drive_offset + s->size) { 861 /* Free the data we initialized. */ 862 if (data != NULL) 863 g_free(data); 864 g_topology_assert_not(); 865 g_topology_lock(); 866 g_access(cp, 0, -1, 0); 867 g_topology_unlock(); 868 if (error) { 869 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE | 870 GV_SETSTATE_CONFIG); 871 } else { 872 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 873 s->initialized = 0; 874 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 875 G_VINUM_DEBUG(1, "subdisk '%s' init: finished " 876 "successfully", s->name); 877 } 878 return; 879 } 880 s->initialized += length; 881 start += length; 882 gv_init_request(s, start, data, length); 883 } 884 885 /* 886 * Create a new bio struct for the next parity rebuild. Used both by internal 887 * rebuild of degraded plexes as well as user initiated rebuilds/checks. 888 */ 889 void 890 gv_parity_request(struct gv_plex *p, int flags, off_t offset) 891 { 892 struct gv_softc *sc; 893 struct bio *bp; 894 895 KASSERT(p != NULL, ("gv_parity_request: NULL p")); 896 sc = p->vinumconf; 897 KASSERT(sc != NULL, ("gv_parity_request: NULL sc")); 898 899 bp = g_new_bio(); 900 if (bp == NULL) { 901 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: " 902 "out of memory", p->name); 903 return; 904 } 905 906 bp->bio_cmd = BIO_WRITE; 907 bp->bio_done = gv_done; 908 bp->bio_error = 0; 909 bp->bio_length = p->stripesize; 910 bp->bio_caller1 = p; 911 912 /* 913 * Check if it's a rebuild of a degraded plex or a user request of 914 * parity rebuild. 915 */ 916 if (flags & GV_BIO_REBUILD) 917 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK); 918 else if (flags & GV_BIO_CHECK) 919 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 920 else { 921 G_VINUM_DEBUG(0, "invalid flags given in rebuild"); 922 return; 923 } 924 925 bp->bio_pflags = flags; 926 bp->bio_pflags |= GV_BIO_MALLOC; 927 928 /* We still have more parity to build. */ 929 bp->bio_offset = offset; 930 gv_post_bio(sc, bp); 931 //gv_plex_start(p, bp); /* Send it down to the plex. */ 932 } 933 934 /* 935 * Handle a finished parity write. 936 */ 937 void 938 gv_parity_complete(struct gv_plex *p, struct bio *bp) 939 { 940 struct gv_softc *sc; 941 int error, flags; 942 943 error = bp->bio_error; 944 flags = bp->bio_pflags; 945 flags &= ~GV_BIO_MALLOC; 946 947 sc = p->vinumconf; 948 KASSERT(sc != NULL, ("gv_parity_complete: NULL sc")); 949 950 /* Clean up what we allocated. */ 951 if (bp->bio_pflags & GV_BIO_MALLOC) 952 g_free(bp->bio_data); 953 g_destroy_bio(bp); 954 955 if (error == EAGAIN) { 956 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx", 957 (intmax_t)p->synced); 958 } 959 960 /* Any error is fatal, except EAGAIN when we're rebuilding. */ 961 if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) { 962 /* Make sure we don't have the lock. */ 963 g_topology_assert_not(); 964 g_topology_lock(); 965 gv_access(p->vol_sc->provider, -1, -1, 0); 966 g_topology_unlock(); 967 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx " 968 "errno %d", p->name, (intmax_t)p->synced, error); 969 return; 970 } else { 971 p->synced += p->stripesize; 972 } 973 974 if (p->synced >= p->size) { 975 /* Make sure we don't have the lock. */ 976 g_topology_assert_not(); 977 g_topology_lock(); 978 gv_access(p->vol_sc->provider, -1, -1, 0); 979 g_topology_unlock(); 980 /* We're finished. */ 981 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name); 982 p->synced = 0; 983 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 984 return; 985 } 986 987 /* Send down next. It will determine if we need to itself. */ 988 gv_parity_request(p, flags, p->synced); 989 } 990 991 /* 992 * Handle a finished plex rebuild bio. 993 */ 994 void 995 gv_rebuild_complete(struct gv_plex *p, struct bio *bp) 996 { 997 struct gv_softc *sc; 998 struct gv_sd *s; 999 int error, flags; 1000 off_t offset; 1001 1002 error = bp->bio_error; 1003 flags = bp->bio_pflags; 1004 offset = bp->bio_offset; 1005 flags &= ~GV_BIO_MALLOC; 1006 sc = p->vinumconf; 1007 KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc")); 1008 1009 /* Clean up what we allocated. */ 1010 if (bp->bio_pflags & GV_BIO_MALLOC) 1011 g_free(bp->bio_data); 1012 g_destroy_bio(bp); 1013 1014 if (error) { 1015 g_topology_assert_not(); 1016 g_topology_lock(); 1017 gv_access(p->vol_sc->provider, -1, -1, 0); 1018 g_topology_unlock(); 1019 1020 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d", 1021 p->name, (intmax_t)offset, error); 1022 p->flags &= ~GV_PLEX_REBUILDING; 1023 p->synced = 0; 1024 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1025 return; 1026 } 1027 1028 offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); 1029 if (offset >= p->size) { 1030 /* We're finished. */ 1031 g_topology_assert_not(); 1032 g_topology_lock(); 1033 gv_access(p->vol_sc->provider, -1, -1, 0); 1034 g_topology_unlock(); 1035 1036 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name); 1037 gv_save_config(p->vinumconf); 1038 p->flags &= ~GV_PLEX_REBUILDING; 1039 p->synced = 0; 1040 /* Try to up all subdisks. */ 1041 LIST_FOREACH(s, &p->subdisks, in_plex) 1042 gv_update_sd_state(s); 1043 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0); 1044 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */ 1045 return; 1046 } 1047 1048 /* Send down next. It will determine if we need to itself. */ 1049 gv_parity_request(p, flags, offset); 1050 } 1051