1 /*- 2 * Copyright (c) 2004 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/kernel.h> 33 #include <sys/kthread.h> 34 #include <sys/libkern.h> 35 #include <sys/malloc.h> 36 #include <sys/queue.h> 37 38 #include <geom/geom.h> 39 #include <geom/vinum/geom_vinum_var.h> 40 #include <geom/vinum/geom_vinum.h> 41 #include <geom/vinum/geom_vinum_share.h> 42 43 static int gv_init_plex(struct gv_plex *); 44 void gv_init_td(void *); 45 static int gv_rebuild_plex(struct gv_plex *); 46 void gv_rebuild_td(void *); 47 static int gv_start_plex(struct gv_plex *); 48 static int gv_start_vol(struct gv_volume *); 49 static int gv_sync(struct gv_volume *); 50 void gv_sync_td(void *); 51 52 struct gv_sync_args { 53 struct gv_volume *v; 54 struct gv_plex *from; 55 struct gv_plex *to; 56 off_t syncsize; 57 }; 58 59 void 60 gv_parityop(struct g_geom *gp, struct gctl_req *req) 61 { 62 struct gv_softc *sc; 63 struct gv_plex *p; 64 struct bio *bp; 65 struct g_consumer *cp; 66 int error, *flags, type, *rebuild, rv; 67 char *plex; 68 69 rv = -1; 70 71 plex = gctl_get_param(req, "plex", NULL); 72 if (plex == NULL) { 73 gctl_error(req, "no plex given"); 74 goto out; 75 } 76 77 flags = gctl_get_paraml(req, "flags", sizeof(*flags)); 78 if (flags == NULL) { 79 gctl_error(req, "no flags given"); 80 goto out; 81 } 82 83 rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild)); 84 if (rebuild == NULL) { 85 gctl_error(req, "no rebuild op given"); 86 goto out; 87 } 88 89 sc = gp->softc; 90 type = gv_object_type(sc, plex); 91 switch (type) { 92 case GV_TYPE_PLEX: 93 break; 94 case GV_TYPE_VOL: 95 case GV_TYPE_SD: 96 case GV_TYPE_DRIVE: 97 default: 98 gctl_error(req, "'%s' is not a plex", plex); 99 goto out; 100 } 101 102 p = gv_find_plex(sc, plex); 103 if (p->state != GV_PLEX_UP) { 104 gctl_error(req, "plex %s is not completely accessible", 105 p->name); 106 goto out; 107 } 108 if (p->org != GV_PLEX_RAID5) { 109 gctl_error(req, "plex %s is not a RAID5 plex", p->name); 110 goto out; 111 } 112 113 cp = p->consumer; 114 error = g_access(cp, 1, 1, 0); 115 if (error) { 116 gctl_error(req, "cannot access consumer"); 117 goto out; 118 } 119 g_topology_unlock(); 120 121 /* Reset the check pointer when using -f. */ 122 if (*flags & GV_FLAG_F) 123 p->synced = 0; 124 125 bp = g_new_bio(); 126 if (bp == NULL) { 127 gctl_error(req, "cannot create BIO - out of memory"); 128 g_topology_lock(); 129 error = g_access(cp, -1, -1, 0); 130 goto out; 131 } 132 bp->bio_cmd = BIO_WRITE; 133 bp->bio_done = NULL; 134 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO); 135 bp->bio_cflags |= GV_BIO_CHECK; 136 if (*rebuild) 137 bp->bio_cflags |= GV_BIO_PARITY; 138 bp->bio_offset = p->synced; 139 bp->bio_length = p->stripesize; 140 141 /* Schedule it down ... */ 142 g_io_request(bp, cp); 143 144 /* ... and wait for the result. */ 145 error = biowait(bp, "gwrite"); 146 g_free(bp->bio_data); 147 g_destroy_bio(bp); 148 149 if (error) { 150 /* Incorrect parity. */ 151 if (error == EAGAIN) 152 rv = 1; 153 154 /* Some other error happened. */ 155 else 156 gctl_error(req, "Parity check failed at offset 0x%jx, " 157 "errno %d", (intmax_t)p->synced, error); 158 159 /* Correct parity. */ 160 } else 161 rv = 0; 162 163 gctl_set_param(req, "offset", &p->synced, sizeof(p->synced)); 164 165 /* Advance the checkpointer if there was no error. */ 166 if (rv == 0) 167 p->synced += p->stripesize; 168 169 /* End of plex; reset the check pointer and signal it to the caller. */ 170 if (p->synced >= p->size) { 171 p->synced = 0; 172 rv = -2; 173 } 174 175 g_topology_lock(); 176 error = g_access(cp, -1, -1, 0); 177 178 out: 179 gctl_set_param(req, "rv", &rv, sizeof(rv)); 180 } 181 182 void 183 gv_start_obj(struct g_geom *gp, struct gctl_req *req) 184 { 185 struct gv_softc *sc; 186 struct gv_volume *v; 187 struct gv_plex *p; 188 int *argc, *initsize; 189 char *argv, buf[20]; 190 int err, i, type; 191 192 argc = gctl_get_paraml(req, "argc", sizeof(*argc)); 193 initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize)); 194 195 if (argc == NULL || *argc == 0) { 196 gctl_error(req, "no arguments given"); 197 return; 198 } 199 200 sc = gp->softc; 201 202 for (i = 0; i < *argc; i++) { 203 snprintf(buf, sizeof(buf), "argv%d", i); 204 argv = gctl_get_param(req, buf, NULL); 205 if (argv == NULL) 206 continue; 207 type = gv_object_type(sc, argv); 208 switch (type) { 209 case GV_TYPE_VOL: 210 v = gv_find_vol(sc, argv); 211 err = gv_start_vol(v); 212 if (err) { 213 if (err == EINPROGRESS) { 214 gctl_error(req, "cannot start volume " 215 "'%s': already in progress", argv); 216 } else { 217 gctl_error(req, "cannot start volume " 218 "'%s'; errno: %d", argv, err); 219 } 220 return; 221 } 222 break; 223 224 case GV_TYPE_PLEX: 225 p = gv_find_plex(sc, argv); 226 err = gv_start_plex(p); 227 if (err) { 228 if (err == EINPROGRESS) { 229 gctl_error(req, "cannot start plex " 230 "'%s': already in progress", argv); 231 } else { 232 gctl_error(req, "cannot start plex " 233 "'%s'; errno: %d", argv, err); 234 } 235 return; 236 } 237 break; 238 239 case GV_TYPE_SD: 240 case GV_TYPE_DRIVE: 241 /* XXX not yet */ 242 gctl_error(req, "cannot start '%s' - not yet supported", 243 argv); 244 return; 245 default: 246 gctl_error(req, "unknown object '%s'", argv); 247 return; 248 } 249 } 250 } 251 252 static int 253 gv_start_plex(struct gv_plex *p) 254 { 255 struct gv_volume *v; 256 int error; 257 258 KASSERT(p != NULL, ("gv_start_plex: NULL p")); 259 260 if (p->state == GV_PLEX_UP) 261 return (0); 262 263 error = 0; 264 v = p->vol_sc; 265 if ((v != NULL) && (v->plexcount > 1)) 266 error = gv_sync(v); 267 else if (p->org == GV_PLEX_RAID5) { 268 if (p->state == GV_PLEX_DEGRADED) 269 error = gv_rebuild_plex(p); 270 else 271 error = gv_init_plex(p); 272 } 273 274 return (error); 275 } 276 277 static int 278 gv_start_vol(struct gv_volume *v) 279 { 280 struct gv_plex *p; 281 struct gv_sd *s; 282 int error; 283 284 KASSERT(v != NULL, ("gv_start_vol: NULL v")); 285 286 error = 0; 287 288 if (v->plexcount == 0) 289 return (ENXIO); 290 291 else if (v->plexcount == 1) { 292 p = LIST_FIRST(&v->plexes); 293 KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name)); 294 if (p->org == GV_PLEX_RAID5) { 295 switch (p->state) { 296 case GV_PLEX_DOWN: 297 error = gv_init_plex(p); 298 break; 299 case GV_PLEX_DEGRADED: 300 error = gv_rebuild_plex(p); 301 break; 302 default: 303 return (0); 304 } 305 } else { 306 LIST_FOREACH(s, &p->subdisks, in_plex) { 307 gv_set_sd_state(s, GV_SD_UP, 308 GV_SETSTATE_CONFIG); 309 } 310 } 311 } else 312 error = gv_sync(v); 313 314 return (error); 315 } 316 317 static int 318 gv_sync(struct gv_volume *v) 319 { 320 struct gv_softc *sc; 321 struct gv_plex *p, *up; 322 struct gv_sync_args *sync; 323 324 KASSERT(v != NULL, ("gv_sync: NULL v")); 325 sc = v->vinumconf; 326 KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name)); 327 328 /* Find the plex that's up. */ 329 up = NULL; 330 LIST_FOREACH(up, &v->plexes, in_volume) { 331 if (up->state == GV_PLEX_UP) 332 break; 333 } 334 335 /* Didn't find a good plex. */ 336 if (up == NULL) 337 return (ENXIO); 338 339 LIST_FOREACH(p, &v->plexes, in_volume) { 340 if ((p == up) || (p->state == GV_PLEX_UP)) 341 continue; 342 if (p->flags & GV_PLEX_SYNCING) { 343 return (EINPROGRESS); 344 } 345 p->flags |= GV_PLEX_SYNCING; 346 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 347 sync->v = v; 348 sync->from = up; 349 sync->to = p; 350 sync->syncsize = GV_DFLT_SYNCSIZE; 351 kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'", 352 p->name); 353 } 354 355 return (0); 356 } 357 358 static int 359 gv_rebuild_plex(struct gv_plex *p) 360 { 361 struct gv_sync_args *sync; 362 363 if (gv_is_open(p->geom)) 364 return (EBUSY); 365 366 if (p->flags & GV_PLEX_SYNCING) 367 return (EINPROGRESS); 368 p->flags |= GV_PLEX_SYNCING; 369 370 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO); 371 sync->to = p; 372 sync->syncsize = GV_DFLT_SYNCSIZE; 373 374 kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s", 375 p->name); 376 377 return (0); 378 } 379 380 static int 381 gv_init_plex(struct gv_plex *p) 382 { 383 struct gv_sd *s; 384 385 KASSERT(p != NULL, ("gv_init_plex: NULL p")); 386 387 LIST_FOREACH(s, &p->subdisks, in_plex) { 388 if (s->state == GV_SD_INITIALIZING) 389 return (EINPROGRESS); 390 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE); 391 s->init_size = GV_DFLT_SYNCSIZE; 392 kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s", 393 s->name); 394 } 395 396 return (0); 397 } 398 399 /* This thread is responsible for rebuilding a degraded RAID5 plex. */ 400 void 401 gv_rebuild_td(void *arg) 402 { 403 struct bio *bp; 404 struct gv_plex *p; 405 struct g_consumer *cp; 406 struct gv_sync_args *sync; 407 u_char *buf; 408 off_t i; 409 int error; 410 411 buf = NULL; 412 bp = NULL; 413 414 sync = arg; 415 p = sync->to; 416 p->synced = 0; 417 cp = p->consumer; 418 419 g_topology_lock(); 420 error = g_access(cp, 1, 1, 0); 421 if (error) { 422 g_topology_unlock(); 423 printf("GEOM_VINUM: rebuild of %s failed to access consumer: " 424 "%d\n", p->name, error); 425 kthread_exit(error); 426 } 427 g_topology_unlock(); 428 429 buf = g_malloc(sync->syncsize, M_WAITOK); 430 431 printf("GEOM_VINUM: rebuild of %s started\n", p->name); 432 i = 0; 433 for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) { 434 /* 435 if (i + sync->syncsize > p->size) 436 sync->syncsize = p->size - i; 437 */ 438 bp = g_new_bio(); 439 if (bp == NULL) { 440 printf("GEOM_VINUM: rebuild of %s failed creating bio: " 441 "out of memory\n", p->name); 442 break; 443 } 444 bp->bio_cmd = BIO_WRITE; 445 bp->bio_done = NULL; 446 bp->bio_data = buf; 447 bp->bio_cflags |= GV_BIO_REBUILD; 448 bp->bio_offset = i; 449 bp->bio_length = p->stripesize; 450 451 /* Schedule it down ... */ 452 g_io_request(bp, cp); 453 454 /* ... and wait for the result. */ 455 error = biowait(bp, "gwrite"); 456 if (error) { 457 printf("GEOM_VINUM: rebuild of %s failed at offset %jd " 458 "errno: %d\n", p->name, i, error); 459 break; 460 } 461 g_destroy_bio(bp); 462 bp = NULL; 463 } 464 465 if (bp != NULL) 466 g_destroy_bio(bp); 467 if (buf != NULL) 468 g_free(buf); 469 470 g_topology_lock(); 471 g_access(cp, -1, -1, 0); 472 gv_save_config_all(p->vinumconf); 473 g_topology_unlock(); 474 475 p->flags &= ~GV_PLEX_SYNCING; 476 p->synced = 0; 477 478 /* Successful initialization. */ 479 if (!error) 480 printf("GEOM_VINUM: rebuild of %s finished\n", p->name); 481 482 g_free(sync); 483 kthread_exit(error); 484 } 485 486 void 487 gv_sync_td(void *arg) 488 { 489 struct bio *bp; 490 struct gv_plex *p; 491 struct g_consumer *from, *to; 492 struct gv_sync_args *sync; 493 u_char *buf; 494 off_t i; 495 int error; 496 497 sync = arg; 498 499 from = sync->from->consumer; 500 to = sync->to->consumer; 501 502 p = sync->to; 503 p->synced = 0; 504 505 error = 0; 506 507 g_topology_lock(); 508 error = g_access(from, 1, 0, 0); 509 if (error) { 510 g_topology_unlock(); 511 printf("GEOM_VINUM: sync from '%s' failed to access " 512 "consumer: %d\n", sync->from->name, error); 513 g_free(sync); 514 kthread_exit(error); 515 } 516 error = g_access(to, 0, 1, 0); 517 if (error) { 518 g_access(from, -1, 0, 0); 519 g_topology_unlock(); 520 printf("GEOM_VINUM: sync to '%s' failed to access " 521 "consumer: %d\n", p->name, error); 522 g_free(sync); 523 kthread_exit(error); 524 } 525 g_topology_unlock(); 526 527 printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name, 528 sync->to->name); 529 for (i = 0; i < p->size; i+= sync->syncsize) { 530 /* Read some bits from the good plex. */ 531 buf = g_read_data(from, i, sync->syncsize, &error); 532 if (buf == NULL) { 533 printf("GEOM_VINUM: sync read from '%s' failed at " 534 "offset %jd; errno: %d\n", sync->from->name, i, 535 error); 536 break; 537 } 538 539 /* 540 * Create a bio and schedule it down on the 'bad' plex. We 541 * cannot simply use g_write_data() because we have to let the 542 * lower parts know that we are an initialization process and 543 * not a 'normal' request. 544 */ 545 bp = g_new_bio(); 546 if (bp == NULL) { 547 printf("GEOM_VINUM: sync write to '%s' failed at " 548 "offset %jd; out of memory\n", p->name, i); 549 g_free(buf); 550 break; 551 } 552 bp->bio_cmd = BIO_WRITE; 553 bp->bio_offset = i; 554 bp->bio_length = sync->syncsize; 555 bp->bio_data = buf; 556 bp->bio_done = NULL; 557 558 /* 559 * This hack declare this bio as part of an initialization 560 * process, so that the lower levels allow it to get through. 561 */ 562 bp->bio_cflags |= GV_BIO_SYNCREQ; 563 564 /* Schedule it down ... */ 565 g_io_request(bp, to); 566 567 /* ... and wait for the result. */ 568 error = biowait(bp, "gwrite"); 569 g_destroy_bio(bp); 570 g_free(buf); 571 if (error) { 572 printf("GEOM_VINUM: sync write to '%s' failed at " 573 "offset %jd; errno: %d\n", p->name, i, error); 574 break; 575 } 576 577 /* Note that we have synced a little bit more. */ 578 p->synced += sync->syncsize; 579 } 580 581 g_topology_lock(); 582 g_access(from, -1, 0, 0); 583 g_access(to, 0, -1, 0); 584 gv_save_config_all(p->vinumconf); 585 g_topology_unlock(); 586 587 /* Successful initialization. */ 588 if (!error) 589 printf("GEOM_VINUM: plex sync %s -> %s finished\n", 590 sync->from->name, sync->to->name); 591 592 p->flags &= ~GV_PLEX_SYNCING; 593 p->synced = 0; 594 595 g_free(sync); 596 kthread_exit(error); 597 } 598 599 void 600 gv_init_td(void *arg) 601 { 602 struct gv_sd *s; 603 struct gv_drive *d; 604 struct g_geom *gp; 605 struct g_consumer *cp; 606 int error; 607 off_t i, init_size, start, offset, length; 608 u_char *buf; 609 610 s = arg; 611 KASSERT(s != NULL, ("gv_init_td: NULL s")); 612 d = s->drive_sc; 613 KASSERT(d != NULL, ("gv_init_td: NULL d")); 614 gp = d->geom; 615 KASSERT(gp != NULL, ("gv_init_td: NULL gp")); 616 617 cp = LIST_FIRST(&gp->consumer); 618 KASSERT(cp != NULL, ("gv_init_td: NULL cp")); 619 620 s->init_error = 0; 621 init_size = s->init_size; 622 start = s->drive_offset + s->initialized; 623 offset = s->drive_offset; 624 length = s->size; 625 626 buf = g_malloc(s->init_size, M_WAITOK | M_ZERO); 627 628 g_topology_lock(); 629 error = g_access(cp, 0, 1, 0); 630 if (error) { 631 s->init_error = error; 632 g_topology_unlock(); 633 printf("GEOM_VINUM: subdisk '%s' init: failed to access " 634 "consumer; error: %d\n", s->name, error); 635 kthread_exit(error); 636 } 637 g_topology_unlock(); 638 639 for (i = start; i < offset + length; i += init_size) { 640 error = g_write_data(cp, i, buf, init_size); 641 if (error) { 642 printf("GEOM_VINUM: subdisk '%s' init: write failed" 643 " at offset %jd (drive offset %jd); error %d\n", 644 s->name, (intmax_t)s->initialized, (intmax_t)i, 645 error); 646 break; 647 } 648 s->initialized += init_size; 649 } 650 651 g_free(buf); 652 653 g_topology_lock(); 654 g_access(cp, 0, -1, 0); 655 g_topology_unlock(); 656 if (error) { 657 s->init_error = error; 658 g_topology_lock(); 659 gv_set_sd_state(s, GV_SD_STALE, 660 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 661 g_topology_unlock(); 662 } else { 663 g_topology_lock(); 664 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG); 665 g_topology_unlock(); 666 s->initialized = 0; 667 printf("GEOM_VINUM: subdisk '%s' init: finished successfully\n", 668 s->name); 669 } 670 kthread_exit(error); 671 } 672