1 /*- 2 * Copyright (c) 2004, 2005 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/errno.h> 33 #include <sys/conf.h> 34 #include <sys/kernel.h> 35 #include <sys/kthread.h> 36 #include <sys/libkern.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/mutex.h> 41 #include <sys/sbuf.h> 42 #include <sys/systm.h> 43 #include <sys/time.h> 44 45 #include <geom/geom.h> 46 #include <geom/vinum/geom_vinum_var.h> 47 #include <geom/vinum/geom_vinum.h> 48 #include <geom/vinum/geom_vinum_share.h> 49 50 static void gv_drive_dead(void *, int); 51 static void gv_drive_worker(void *); 52 53 void 54 gv_config_new_drive(struct gv_drive *d) 55 { 56 struct gv_hdr *vhdr; 57 struct gv_freelist *fl; 58 59 KASSERT(d != NULL, ("config_new_drive: NULL d")); 60 61 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 62 vhdr->magic = GV_MAGIC; 63 vhdr->config_length = GV_CFG_LEN; 64 65 mtx_lock(&hostname_mtx); 66 bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 67 mtx_unlock(&hostname_mtx); 68 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 69 microtime(&vhdr->label.date_of_birth); 70 71 d->hdr = vhdr; 72 73 LIST_INIT(&d->subdisks); 74 LIST_INIT(&d->freelist); 75 76 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 77 fl->offset = GV_DATA_START; 78 fl->size = d->avail; 79 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 80 d->freelist_entries = 1; 81 82 d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); 83 bioq_init(d->bqueue); 84 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 85 kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 86 d->flags |= GV_DRIVE_THREAD_ACTIVE; 87 } 88 89 void 90 gv_save_config_all(struct gv_softc *sc) 91 { 92 struct gv_drive *d; 93 94 g_topology_assert(); 95 96 LIST_FOREACH(d, &sc->drives, drive) { 97 if (d->geom == NULL) 98 continue; 99 gv_save_config(NULL, d, sc); 100 } 101 } 102 103 /* Save the vinum configuration back to disk. */ 104 void 105 gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 106 { 107 struct g_geom *gp; 108 struct g_consumer *cp2; 109 struct gv_hdr *vhdr, *hdr; 110 struct sbuf *sb; 111 int error; 112 113 g_topology_assert(); 114 115 KASSERT(d != NULL, ("gv_save_config: null d")); 116 KASSERT(sc != NULL, ("gv_save_config: null sc")); 117 118 /* 119 * We can't save the config on a drive that isn't up, but drives that 120 * were just created aren't officially up yet, so we check a special 121 * flag. 122 */ 123 if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) 124 return; 125 126 if (cp == NULL) { 127 gp = d->geom; 128 KASSERT(gp != NULL, ("gv_save_config: null gp")); 129 cp2 = LIST_FIRST(&gp->consumer); 130 KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 131 } else 132 cp2 = cp; 133 134 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 135 vhdr->magic = GV_MAGIC; 136 vhdr->config_length = GV_CFG_LEN; 137 138 hdr = d->hdr; 139 if (hdr == NULL) { 140 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name); 141 g_free(vhdr); 142 return; 143 } 144 microtime(&hdr->label.last_update); 145 bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 146 147 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 148 gv_format_config(sc, sb, 1, NULL); 149 sbuf_finish(sb); 150 151 error = g_access(cp2, 0, 1, 0); 152 if (error) { 153 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n", 154 d->name, error); 155 sbuf_delete(sb); 156 g_free(vhdr); 157 return; 158 } 159 g_topology_unlock(); 160 161 do { 162 error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN); 163 if (error) { 164 printf("GEOM_VINUM: writing vhdr failed on drive %s, " 165 "errno %d", d->name, error); 166 break; 167 } 168 169 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 170 GV_CFG_LEN); 171 if (error) { 172 printf("GEOM_VINUM: writing first config copy failed " 173 "on drive %s, errno %d", d->name, error); 174 break; 175 } 176 177 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 178 sbuf_data(sb), GV_CFG_LEN); 179 if (error) 180 printf("GEOM_VINUM: writing second config copy failed " 181 "on drive %s, errno %d", d->name, error); 182 } while (0); 183 184 g_topology_lock(); 185 g_access(cp2, 0, -1, 0); 186 sbuf_delete(sb); 187 g_free(vhdr); 188 189 if (d->geom != NULL) 190 gv_drive_modify(d); 191 } 192 193 /* This resembles g_slice_access(). */ 194 static int 195 gv_drive_access(struct g_provider *pp, int dr, int dw, int de) 196 { 197 struct g_geom *gp; 198 struct g_consumer *cp; 199 struct g_provider *pp2; 200 struct gv_drive *d; 201 struct gv_sd *s, *s2; 202 int error; 203 204 gp = pp->geom; 205 cp = LIST_FIRST(&gp->consumer); 206 if (cp == NULL) 207 return (0); 208 209 d = gp->softc; 210 if (d == NULL) 211 return (0); 212 213 s = pp->private; 214 KASSERT(s != NULL, ("gv_drive_access: NULL s")); 215 216 LIST_FOREACH(s2, &d->subdisks, from_drive) { 217 if (s == s2) 218 continue; 219 if (s->drive_offset + s->size <= s2->drive_offset) 220 continue; 221 if (s2->drive_offset + s2->size <= s->drive_offset) 222 continue; 223 224 /* Overlap. */ 225 pp2 = s2->provider; 226 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 227 if ((pp->acw + dw) > 0 && pp2->ace > 0) 228 return (EPERM); 229 if ((pp->ace + de) > 0 && pp2->acw > 0) 230 return (EPERM); 231 } 232 233 error = g_access(cp, dr, dw, de); 234 return (error); 235 } 236 237 static void 238 gv_drive_done(struct bio *bp) 239 { 240 struct gv_drive *d; 241 242 /* Put the BIO on the worker queue again. */ 243 d = bp->bio_from->geom->softc; 244 bp->bio_cflags |= GV_BIO_DONE; 245 mtx_lock(&d->bqueue_mtx); 246 bioq_insert_tail(d->bqueue, bp); 247 wakeup(d); 248 mtx_unlock(&d->bqueue_mtx); 249 } 250 251 252 static void 253 gv_drive_start(struct bio *bp) 254 { 255 struct gv_drive *d; 256 struct gv_sd *s; 257 258 switch (bp->bio_cmd) { 259 case BIO_READ: 260 case BIO_WRITE: 261 case BIO_DELETE: 262 break; 263 case BIO_GETATTR: 264 default: 265 g_io_deliver(bp, EOPNOTSUPP); 266 return; 267 } 268 269 s = bp->bio_to->private; 270 if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 271 g_io_deliver(bp, ENXIO); 272 return; 273 } 274 275 d = bp->bio_to->geom->softc; 276 277 /* 278 * Put the BIO on the worker queue, where the worker thread will pick 279 * it up. 280 */ 281 mtx_lock(&d->bqueue_mtx); 282 bioq_disksort(d->bqueue, bp); 283 wakeup(d); 284 mtx_unlock(&d->bqueue_mtx); 285 286 } 287 288 static void 289 gv_drive_worker(void *arg) 290 { 291 struct bio *bp, *cbp; 292 struct g_geom *gp; 293 struct g_provider *pp; 294 struct gv_drive *d; 295 struct gv_sd *s; 296 int error; 297 298 d = arg; 299 300 mtx_lock(&d->bqueue_mtx); 301 for (;;) { 302 /* We were signaled to exit. */ 303 if (d->flags & GV_DRIVE_THREAD_DIE) 304 break; 305 306 /* Take the first BIO from out queue. */ 307 bp = bioq_takefirst(d->bqueue); 308 if (bp == NULL) { 309 msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 310 continue; 311 } 312 mtx_unlock(&d->bqueue_mtx); 313 314 pp = bp->bio_to; 315 gp = pp->geom; 316 317 /* Completed request. */ 318 if (bp->bio_cflags & GV_BIO_DONE) { 319 error = bp->bio_error; 320 321 /* Deliver the original request. */ 322 g_std_done(bp); 323 324 /* The request had an error, we need to clean up. */ 325 if (error != 0) { 326 g_topology_lock(); 327 gv_set_drive_state(d, GV_DRIVE_DOWN, 328 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 329 g_topology_unlock(); 330 g_post_event(gv_drive_dead, d, M_WAITOK, d, 331 NULL); 332 } 333 334 /* New request, needs to be sent downwards. */ 335 } else { 336 s = pp->private; 337 338 if ((s->state == GV_SD_DOWN) || 339 (s->state == GV_SD_STALE)) { 340 g_io_deliver(bp, ENXIO); 341 mtx_lock(&d->bqueue_mtx); 342 continue; 343 } 344 if (bp->bio_offset > s->size) { 345 g_io_deliver(bp, EINVAL); 346 mtx_lock(&d->bqueue_mtx); 347 continue; 348 } 349 350 cbp = g_clone_bio(bp); 351 if (cbp == NULL) { 352 g_io_deliver(bp, ENOMEM); 353 mtx_lock(&d->bqueue_mtx); 354 continue; 355 } 356 if (cbp->bio_offset + cbp->bio_length > s->size) 357 cbp->bio_length = s->size - 358 cbp->bio_offset; 359 cbp->bio_done = gv_drive_done; 360 cbp->bio_offset += s->drive_offset; 361 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 362 } 363 364 mtx_lock(&d->bqueue_mtx); 365 } 366 367 while ((bp = bioq_takefirst(d->bqueue)) != NULL) { 368 mtx_unlock(&d->bqueue_mtx); 369 if (bp->bio_cflags & GV_BIO_DONE) 370 g_std_done(bp); 371 else 372 g_io_deliver(bp, ENXIO); 373 mtx_lock(&d->bqueue_mtx); 374 } 375 mtx_unlock(&d->bqueue_mtx); 376 d->flags |= GV_DRIVE_THREAD_DEAD; 377 378 kproc_exit(ENXIO); 379 } 380 381 382 static void 383 gv_drive_orphan(struct g_consumer *cp) 384 { 385 struct g_geom *gp; 386 struct gv_drive *d; 387 388 g_topology_assert(); 389 gp = cp->geom; 390 g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 391 d = gp->softc; 392 if (d != NULL) { 393 gv_set_drive_state(d, GV_DRIVE_DOWN, 394 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 395 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 396 } else 397 g_wither_geom(gp, ENXIO); 398 } 399 400 static struct g_geom * 401 gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 402 { 403 struct g_geom *gp, *gp2; 404 struct g_consumer *cp; 405 struct gv_drive *d; 406 struct gv_sd *s; 407 struct gv_softc *sc; 408 struct gv_freelist *fl; 409 struct gv_hdr *vhdr; 410 int error; 411 char *buf, errstr[ERRBUFSIZ]; 412 413 vhdr = NULL; 414 d = NULL; 415 416 g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 417 g_topology_assert(); 418 419 /* Find the VINUM class and its associated geom. */ 420 gp2 = find_vinum_geom(); 421 if (gp2 == NULL) 422 return (NULL); 423 sc = gp2->softc; 424 425 gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 426 gp->start = gv_drive_start; 427 gp->orphan = gv_drive_orphan; 428 gp->access = gv_drive_access; 429 gp->start = gv_drive_start; 430 431 cp = g_new_consumer(gp); 432 g_attach(cp, pp); 433 error = g_access(cp, 1, 0, 0); 434 if (error) { 435 g_detach(cp); 436 g_destroy_consumer(cp); 437 g_destroy_geom(gp); 438 return (NULL); 439 } 440 441 g_topology_unlock(); 442 443 /* Now check if the provided slice is a valid vinum drive. */ 444 do { 445 vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); 446 if (vhdr == NULL) 447 break; 448 if (vhdr->magic != GV_MAGIC) { 449 g_free(vhdr); 450 break; 451 } 452 453 /* A valid vinum drive, let's parse the on-disk information. */ 454 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); 455 if (buf == NULL) { 456 g_free(vhdr); 457 break; 458 } 459 g_topology_lock(); 460 gv_parse_config(sc, buf, 1); 461 g_free(buf); 462 463 /* 464 * Let's see if this drive is already known in the 465 * configuration. 466 */ 467 d = gv_find_drive(sc, vhdr->label.name); 468 469 /* We already know about this drive. */ 470 if (d != NULL) { 471 /* Check if this drive already has a geom. */ 472 if (d->geom != NULL) { 473 g_topology_unlock(); 474 g_free(vhdr); 475 break; 476 } 477 bcopy(vhdr, d->hdr, sizeof(*vhdr)); 478 g_free(vhdr); 479 480 /* This is a new drive. */ 481 } else { 482 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 483 484 /* Initialize all needed variables. */ 485 d->size = pp->mediasize - GV_DATA_START; 486 d->avail = d->size; 487 d->hdr = vhdr; 488 strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 489 LIST_INIT(&d->subdisks); 490 LIST_INIT(&d->freelist); 491 492 /* We also need a freelist entry. */ 493 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 494 fl->offset = GV_DATA_START; 495 fl->size = d->avail; 496 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 497 d->freelist_entries = 1; 498 499 /* Save it into the main configuration. */ 500 LIST_INSERT_HEAD(&sc->drives, d, drive); 501 } 502 503 /* 504 * Create bio queue, queue mutex and a worker thread, if 505 * necessary. 506 */ 507 if (d->bqueue == NULL) { 508 d->bqueue = g_malloc(sizeof(struct bio_queue_head), 509 M_WAITOK | M_ZERO); 510 bioq_init(d->bqueue); 511 } 512 if (mtx_initialized(&d->bqueue_mtx) == 0) 513 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 514 515 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 516 kproc_create(gv_drive_worker, d, NULL, 0, 0, 517 "gv_d %s", d->name); 518 d->flags |= GV_DRIVE_THREAD_ACTIVE; 519 } 520 521 g_access(cp, -1, 0, 0); 522 523 gp->softc = d; 524 d->geom = gp; 525 d->vinumconf = sc; 526 strncpy(d->device, pp->name, GV_MAXDRIVENAME); 527 528 /* 529 * Find out which subdisks belong to this drive and crosslink 530 * them. 531 */ 532 LIST_FOREACH(s, &sc->subdisks, sd) { 533 if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 534 /* XXX: errors ignored */ 535 gv_sd_to_drive(sc, d, s, errstr, 536 sizeof(errstr)); 537 } 538 539 /* This drive is now up for sure. */ 540 gv_set_drive_state(d, GV_DRIVE_UP, 0); 541 542 /* 543 * If there are subdisks on this drive, we need to create 544 * providers for them. 545 */ 546 if (d->sdcount) 547 gv_drive_modify(d); 548 549 return (gp); 550 551 } while (0); 552 553 g_topology_lock(); 554 g_access(cp, -1, 0, 0); 555 556 g_detach(cp); 557 g_destroy_consumer(cp); 558 g_destroy_geom(gp); 559 return (NULL); 560 } 561 562 /* 563 * Modify the providers for the given drive 'd'. It is assumed that the 564 * subdisk list of 'd' is already correctly set up. 565 */ 566 void 567 gv_drive_modify(struct gv_drive *d) 568 { 569 struct g_geom *gp; 570 struct g_consumer *cp; 571 struct g_provider *pp, *pp2; 572 struct gv_sd *s; 573 574 KASSERT(d != NULL, ("gv_drive_modify: null d")); 575 gp = d->geom; 576 KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 577 cp = LIST_FIRST(&gp->consumer); 578 KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 579 pp = cp->provider; 580 KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 581 582 g_topology_assert(); 583 584 LIST_FOREACH(s, &d->subdisks, from_drive) { 585 /* This subdisk already has a provider. */ 586 if (s->provider != NULL) 587 continue; 588 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 589 pp2->mediasize = s->size; 590 pp2->sectorsize = pp->sectorsize; 591 g_error_provider(pp2, 0); 592 s->provider = pp2; 593 pp2->private = s; 594 } 595 } 596 597 static void 598 gv_drive_dead(void *arg, int flag) 599 { 600 struct g_geom *gp; 601 struct g_consumer *cp; 602 struct gv_drive *d; 603 struct gv_sd *s; 604 605 g_topology_assert(); 606 KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 607 608 if (flag == EV_CANCEL) 609 return; 610 611 d = arg; 612 if (d->state != GV_DRIVE_DOWN) 613 return; 614 615 g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 616 617 gp = d->geom; 618 if (gp == NULL) 619 return; 620 621 LIST_FOREACH(cp, &gp->consumer, consumer) { 622 if (cp->nstart != cp->nend) { 623 printf("GEOM_VINUM: dead drive '%s' has still " 624 "active requests, can't detach consumer\n", 625 d->name); 626 g_post_event(gv_drive_dead, d, M_WAITOK, d, 627 NULL); 628 return; 629 } 630 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 631 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 632 } 633 634 printf("GEOM_VINUM: lost drive '%s'\n", d->name); 635 d->geom = NULL; 636 LIST_FOREACH(s, &d->subdisks, from_drive) { 637 s->provider = NULL; 638 s->consumer = NULL; 639 } 640 gv_kill_drive_thread(d); 641 gp->softc = NULL; 642 g_wither_geom(gp, ENXIO); 643 } 644 645 static int 646 gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 647 struct g_geom *gp) 648 { 649 struct gv_drive *d; 650 651 g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 652 g_topology_assert(); 653 654 d = gp->softc; 655 gv_kill_drive_thread(d); 656 657 g_wither_geom(gp, ENXIO); 658 return (0); 659 } 660 661 #define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 662 663 static struct g_class g_vinum_drive_class = { 664 .name = VINUMDRIVE_CLASS_NAME, 665 .version = G_VERSION, 666 .taste = gv_drive_taste, 667 .destroy_geom = gv_drive_destroy_geom 668 }; 669 670 DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 671