1 /*- 2 * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* Implementation notes: 28 * - "Components" are wrappers around providers that make up the 29 * virtual storage (i.e. a virstor has "physical" components) 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/sx.h> 42 #include <sys/bio.h> 43 #include <sys/sbuf.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/time.h> 47 #include <sys/proc.h> 48 #include <sys/kthread.h> 49 #include <sys/mutex.h> 50 #include <vm/uma.h> 51 #include <geom/geom.h> 52 53 #include <geom/virstor/g_virstor.h> 54 #include <geom/virstor/g_virstor_md.h> 55 56 FEATURE(g_virstor, "GEOM virtual storage support"); 57 58 /* Declare malloc(9) label */ 59 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data"); 60 61 /* GEOM class methods */ 62 static g_init_t g_virstor_init; 63 static g_fini_t g_virstor_fini; 64 static g_taste_t g_virstor_taste; 65 static g_ctl_req_t g_virstor_config; 66 static g_ctl_destroy_geom_t g_virstor_destroy_geom; 67 68 /* Declare & initialize class structure ("geom class") */ 69 struct g_class g_virstor_class = { 70 .name = G_VIRSTOR_CLASS_NAME, 71 .version = G_VERSION, 72 .init = g_virstor_init, 73 .fini = g_virstor_fini, 74 .taste = g_virstor_taste, 75 .ctlreq = g_virstor_config, 76 .destroy_geom = g_virstor_destroy_geom 77 /* The .dumpconf and the rest are only usable for a geom instance, so 78 * they will be set when such instance is created. */ 79 }; 80 81 /* Declare sysctl's and loader tunables */ 82 SYSCTL_DECL(_kern_geom); 83 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0, 84 "GEOM_GVIRSTOR information"); 85 86 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */ 87 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RWTUN, &g_virstor_debug, 88 0, "Debug level (2=production, 5=normal, 15=excessive)"); 89 90 static u_int g_virstor_chunk_watermark = 100; 91 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RWTUN, 92 &g_virstor_chunk_watermark, 0, 93 "Minimum number of free chunks before issuing administrative warning"); 94 95 static u_int g_virstor_component_watermark = 1; 96 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RWTUN, 97 &g_virstor_component_watermark, 0, 98 "Minimum number of free components before issuing administrative warning"); 99 100 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *); 101 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *); 102 static int clear_metadata(struct g_virstor_component *); 103 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *, 104 struct g_virstor_metadata *); 105 static struct g_geom *create_virstor_geom(struct g_class *, 106 struct g_virstor_metadata *); 107 static void virstor_check_and_run(struct g_virstor_softc *); 108 static u_int virstor_valid_components(struct g_virstor_softc *); 109 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t, 110 boolean_t); 111 static void remove_component(struct g_virstor_softc *, 112 struct g_virstor_component *, boolean_t); 113 static void bioq_dismantle(struct bio_queue_head *); 114 static int allocate_chunk(struct g_virstor_softc *, 115 struct g_virstor_component **, u_int *, u_int *); 116 static void delay_destroy_consumer(void *, int); 117 static void dump_component(struct g_virstor_component *comp); 118 #if 0 119 static void dump_me(struct virstor_map_entry *me, unsigned int nr); 120 #endif 121 122 static void virstor_ctl_stop(struct gctl_req *, struct g_class *); 123 static void virstor_ctl_add(struct gctl_req *, struct g_class *); 124 static void virstor_ctl_remove(struct gctl_req *, struct g_class *); 125 static struct g_virstor_softc * virstor_find_geom(const struct g_class *, 126 const char *); 127 static void update_metadata(struct g_virstor_softc *); 128 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *, 129 u_int, u_int); 130 131 static void g_virstor_orphan(struct g_consumer *); 132 static int g_virstor_access(struct g_provider *, int, int, int); 133 static void g_virstor_start(struct bio *); 134 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *, 135 struct g_consumer *, struct g_provider *); 136 static void g_virstor_done(struct bio *); 137 138 static void invalid_call(void); 139 /* 140 * Initialise GEOM class (per-class callback) 141 */ 142 static void 143 g_virstor_init(struct g_class *mp __unused) 144 { 145 146 /* Catch map struct size mismatch at compile time; Map entries must 147 * fit into MAXPHYS exactly, with no wasted space. */ 148 CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); 149 150 /* Init UMA zones, TAILQ's, other global vars */ 151 } 152 153 /* 154 * Finalise GEOM class (per-class callback) 155 */ 156 static void 157 g_virstor_fini(struct g_class *mp __unused) 158 { 159 160 /* Deinit UMA zones & global vars */ 161 } 162 163 /* 164 * Config (per-class callback) 165 */ 166 static void 167 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb) 168 { 169 uint32_t *version; 170 171 g_topology_assert(); 172 173 version = gctl_get_paraml(req, "version", sizeof(*version)); 174 if (version == NULL) { 175 gctl_error(req, "Failed to get 'version' argument"); 176 return; 177 } 178 if (*version != G_VIRSTOR_VERSION) { 179 gctl_error(req, "Userland and kernel versions out of sync"); 180 return; 181 } 182 183 g_topology_unlock(); 184 if (strcmp(verb, "add") == 0) 185 virstor_ctl_add(req, cp); 186 else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0) 187 virstor_ctl_stop(req, cp); 188 else if (strcmp(verb, "remove") == 0) 189 virstor_ctl_remove(req, cp); 190 else 191 gctl_error(req, "unknown verb: '%s'", verb); 192 g_topology_lock(); 193 } 194 195 /* 196 * "stop" verb from userland 197 */ 198 static void 199 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) 200 { 201 int *force, *nargs; 202 int i; 203 204 nargs = gctl_get_paraml(req, "nargs", sizeof *nargs); 205 if (nargs == NULL) { 206 gctl_error(req, "Error fetching argument '%s'", "nargs"); 207 return; 208 } 209 if (*nargs < 1) { 210 gctl_error(req, "Invalid number of arguments"); 211 return; 212 } 213 force = gctl_get_paraml(req, "force", sizeof *force); 214 if (force == NULL) { 215 gctl_error(req, "Error fetching argument '%s'", "force"); 216 return; 217 } 218 219 g_topology_lock(); 220 for (i = 0; i < *nargs; i++) { 221 char param[8]; 222 const char *name; 223 struct g_virstor_softc *sc; 224 int error; 225 226 sprintf(param, "arg%d", i); 227 name = gctl_get_asciiparam(req, param); 228 if (name == NULL) { 229 gctl_error(req, "No 'arg%d' argument", i); 230 g_topology_unlock(); 231 return; 232 } 233 sc = virstor_find_geom(cp, name); 234 if (sc == NULL) { 235 gctl_error(req, "Don't know anything about '%s'", name); 236 g_topology_unlock(); 237 return; 238 } 239 240 LOG_MSG(LVL_INFO, "Stopping %s by the userland command", 241 sc->geom->name); 242 update_metadata(sc); 243 if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) { 244 LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d", 245 sc->geom->name, error); 246 } 247 } 248 g_topology_unlock(); 249 } 250 251 /* 252 * "add" verb from userland - add new component(s) to the structure. 253 * This will be done all at once in here, without going through the 254 * .taste function for new components. 255 */ 256 static void 257 virstor_ctl_add(struct gctl_req *req, struct g_class *cp) 258 { 259 /* Note: while this is going on, I/O is being done on 260 * the g_up and g_down threads. The idea is to make changes 261 * to softc members in a way that can atomically activate 262 * them all at once. */ 263 struct g_virstor_softc *sc; 264 int *hardcode, *nargs; 265 const char *geom_name; /* geom to add a component to */ 266 struct g_consumer *fcp; 267 struct g_virstor_bio_q *bq; 268 u_int added; 269 int error; 270 int i; 271 272 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 273 if (nargs == NULL) { 274 gctl_error(req, "Error fetching argument '%s'", "nargs"); 275 return; 276 } 277 if (*nargs < 2) { 278 gctl_error(req, "Invalid number of arguments"); 279 return; 280 } 281 hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); 282 if (hardcode == NULL) { 283 gctl_error(req, "Error fetching argument '%s'", "hardcode"); 284 return; 285 } 286 287 /* Find "our" geom */ 288 geom_name = gctl_get_asciiparam(req, "arg0"); 289 if (geom_name == NULL) { 290 gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)"); 291 return; 292 } 293 sc = virstor_find_geom(cp, geom_name); 294 if (sc == NULL) { 295 gctl_error(req, "Don't know anything about '%s'", geom_name); 296 return; 297 } 298 299 if (virstor_valid_components(sc) != sc->n_components) { 300 LOG_MSG(LVL_ERROR, "Cannot add components to incomplete " 301 "virstor %s", sc->geom->name); 302 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 303 return; 304 } 305 306 fcp = sc->components[0].gcons; 307 added = 0; 308 g_topology_lock(); 309 for (i = 1; i < *nargs; i++) { 310 struct g_virstor_metadata md; 311 char aname[8]; 312 const char *prov_name; 313 struct g_provider *pp; 314 struct g_consumer *cp; 315 u_int nc; 316 u_int j; 317 318 snprintf(aname, sizeof aname, "arg%d", i); 319 prov_name = gctl_get_asciiparam(req, aname); 320 if (prov_name == NULL) { 321 gctl_error(req, "Error fetching argument '%s'", aname); 322 g_topology_unlock(); 323 return; 324 } 325 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 326 prov_name += sizeof(_PATH_DEV) - 1; 327 328 pp = g_provider_by_name(prov_name); 329 if (pp == NULL) { 330 /* This is the most common error so be verbose about it */ 331 if (added != 0) { 332 gctl_error(req, "Invalid provider: '%s' (added" 333 " %u components)", prov_name, added); 334 update_metadata(sc); 335 } else { 336 gctl_error(req, "Invalid provider: '%s'", 337 prov_name); 338 } 339 g_topology_unlock(); 340 return; 341 } 342 cp = g_new_consumer(sc->geom); 343 if (cp == NULL) { 344 gctl_error(req, "Cannot create consumer"); 345 g_topology_unlock(); 346 return; 347 } 348 error = g_attach(cp, pp); 349 if (error != 0) { 350 gctl_error(req, "Cannot attach a consumer to %s", 351 pp->name); 352 g_destroy_consumer(cp); 353 g_topology_unlock(); 354 return; 355 } 356 if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) { 357 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 358 if (error != 0) { 359 gctl_error(req, "Access request failed for %s", 360 pp->name); 361 g_destroy_consumer(cp); 362 g_topology_unlock(); 363 return; 364 } 365 } 366 if (fcp->provider->sectorsize != pp->sectorsize) { 367 gctl_error(req, "Sector size doesn't fit for %s", 368 pp->name); 369 g_destroy_consumer(cp); 370 g_topology_unlock(); 371 return; 372 } 373 for (j = 0; j < sc->n_components; j++) { 374 if (strcmp(sc->components[j].gcons->provider->name, 375 pp->name) == 0) { 376 gctl_error(req, "Component %s already in %s", 377 pp->name, sc->geom->name); 378 g_destroy_consumer(cp); 379 g_topology_unlock(); 380 return; 381 } 382 } 383 sc->components = realloc(sc->components, 384 sizeof(*sc->components) * (sc->n_components + 1), 385 M_GVIRSTOR, M_WAITOK); 386 387 nc = sc->n_components; 388 sc->components[nc].gcons = cp; 389 sc->components[nc].sc = sc; 390 sc->components[nc].index = nc; 391 sc->components[nc].chunk_count = cp->provider->mediasize / 392 sc->chunk_size; 393 sc->components[nc].chunk_next = 0; 394 sc->components[nc].chunk_reserved = 0; 395 396 if (sc->components[nc].chunk_count < 4) { 397 gctl_error(req, "Provider too small: %s", 398 cp->provider->name); 399 g_destroy_consumer(cp); 400 g_topology_unlock(); 401 return; 402 } 403 fill_metadata(sc, &md, nc, *hardcode); 404 write_metadata(cp, &md); 405 /* The new component becomes visible when n_components is 406 * incremented */ 407 sc->n_components++; 408 added++; 409 410 } 411 /* This call to update_metadata() is critical. In case there's a 412 * power failure in the middle of it and some components are updated 413 * while others are not, there will be trouble on next .taste() iff 414 * a non-updated component is detected first */ 415 update_metadata(sc); 416 g_topology_unlock(); 417 LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added, 418 sc->geom->name); 419 /* Fire off BIOs previously queued because there wasn't any 420 * physical space left. If the BIOs still can't be satisfied 421 * they will again be added to the end of the queue (during 422 * which the mutex will be recursed) */ 423 bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK); 424 bq->bio = NULL; 425 mtx_lock(&sc->delayed_bio_q_mtx); 426 /* First, insert a sentinel to the queue end, so we don't 427 * end up in an infinite loop if there's still no free 428 * space available. */ 429 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage); 430 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 431 bq = STAILQ_FIRST(&sc->delayed_bio_q); 432 if (bq->bio != NULL) { 433 g_virstor_start(bq->bio); 434 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 435 free(bq, M_GVIRSTOR); 436 } else { 437 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 438 free(bq, M_GVIRSTOR); 439 break; 440 } 441 } 442 mtx_unlock(&sc->delayed_bio_q_mtx); 443 444 } 445 446 /* 447 * Find a geom handled by the class 448 */ 449 static struct g_virstor_softc * 450 virstor_find_geom(const struct g_class *cp, const char *name) 451 { 452 struct g_geom *gp; 453 454 LIST_FOREACH(gp, &cp->geom, geom) { 455 if (strcmp(name, gp->name) == 0) 456 return (gp->softc); 457 } 458 return (NULL); 459 } 460 461 /* 462 * Update metadata on all components to reflect the current state 463 * of these fields: 464 * - chunk_next 465 * - flags 466 * - md_count 467 * Expects things to be set up so write_metadata() can work, i.e. 468 * the topology lock must be held. 469 */ 470 static void 471 update_metadata(struct g_virstor_softc *sc) 472 { 473 struct g_virstor_metadata md; 474 u_int n; 475 476 if (virstor_valid_components(sc) != sc->n_components) 477 return; /* Incomplete device */ 478 LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s", 479 sc->geom->name); 480 /* Update metadata on components */ 481 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 482 sc->geom->class->name, sc->geom->name); 483 g_topology_assert(); 484 for (n = 0; n < sc->n_components; n++) { 485 read_metadata(sc->components[n].gcons, &md); 486 md.chunk_next = sc->components[n].chunk_next; 487 md.flags = sc->components[n].flags; 488 md.md_count = sc->n_components; 489 write_metadata(sc->components[n].gcons, &md); 490 } 491 } 492 493 /* 494 * Fills metadata (struct md) from information stored in softc and the nc'th 495 * component of virstor 496 */ 497 static void 498 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, 499 u_int nc, u_int hardcode) 500 { 501 struct g_virstor_component *c; 502 503 bzero(md, sizeof *md); 504 c = &sc->components[nc]; 505 506 strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic); 507 md->md_version = G_VIRSTOR_VERSION; 508 strncpy(md->md_name, sc->geom->name, sizeof md->md_name); 509 md->md_id = sc->id; 510 md->md_virsize = sc->virsize; 511 md->md_chunk_size = sc->chunk_size; 512 md->md_count = sc->n_components; 513 514 if (hardcode) { 515 strncpy(md->provider, c->gcons->provider->name, 516 sizeof md->provider); 517 } 518 md->no = nc; 519 md->provsize = c->gcons->provider->mediasize; 520 md->chunk_count = c->chunk_count; 521 md->chunk_next = c->chunk_next; 522 md->chunk_reserved = c->chunk_reserved; 523 md->flags = c->flags; 524 } 525 526 /* 527 * Remove a component from virstor device. 528 * Can only be done if the component is unallocated. 529 */ 530 static void 531 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp) 532 { 533 /* As this is executed in parallel to I/O, operations on virstor 534 * structures must be as atomic as possible. */ 535 struct g_virstor_softc *sc; 536 int *nargs; 537 const char *geom_name; 538 u_int removed; 539 int i; 540 541 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 542 if (nargs == NULL) { 543 gctl_error(req, "Error fetching argument '%s'", "nargs"); 544 return; 545 } 546 if (*nargs < 2) { 547 gctl_error(req, "Invalid number of arguments"); 548 return; 549 } 550 /* Find "our" geom */ 551 geom_name = gctl_get_asciiparam(req, "arg0"); 552 if (geom_name == NULL) { 553 gctl_error(req, "Error fetching argument '%s'", 554 "geom_name (arg0)"); 555 return; 556 } 557 sc = virstor_find_geom(cp, geom_name); 558 if (sc == NULL) { 559 gctl_error(req, "Don't know anything about '%s'", geom_name); 560 return; 561 } 562 563 if (virstor_valid_components(sc) != sc->n_components) { 564 LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete " 565 "virstor %s", sc->geom->name); 566 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 567 return; 568 } 569 570 removed = 0; 571 for (i = 1; i < *nargs; i++) { 572 char param[8]; 573 const char *prov_name; 574 int j, found; 575 struct g_virstor_component *newcomp, *compbak; 576 577 sprintf(param, "arg%d", i); 578 prov_name = gctl_get_asciiparam(req, param); 579 if (prov_name == NULL) { 580 gctl_error(req, "Error fetching argument '%s'", param); 581 return; 582 } 583 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 584 prov_name += sizeof(_PATH_DEV) - 1; 585 586 found = -1; 587 for (j = 0; j < sc->n_components; j++) { 588 if (strcmp(sc->components[j].gcons->provider->name, 589 prov_name) == 0) { 590 found = j; 591 break; 592 } 593 } 594 if (found == -1) { 595 LOG_MSG(LVL_ERROR, "No %s component in %s", 596 prov_name, sc->geom->name); 597 continue; 598 } 599 600 compbak = sc->components; 601 newcomp = malloc(sc->n_components * sizeof(*sc->components), 602 M_GVIRSTOR, M_WAITOK | M_ZERO); 603 bcopy(sc->components, newcomp, found * sizeof(*sc->components)); 604 bcopy(&sc->components[found + 1], newcomp + found, 605 found * sizeof(*sc->components)); 606 if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) { 607 LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be " 608 "removed from %s", 609 prov_name, sc->geom->name); 610 free(newcomp, M_GVIRSTOR); 611 /* We'll consider this non-fatal error */ 612 continue; 613 } 614 /* Renumerate unallocated components */ 615 for (j = 0; j < sc->n_components-1; j++) { 616 if ((sc->components[j].flags & 617 VIRSTOR_PROVIDER_ALLOCATED) == 0) { 618 sc->components[j].index = j; 619 } 620 } 621 /* This is the critical section. If a component allocation 622 * event happens while both variables are not yet set, 623 * there will be trouble. Something will panic on encountering 624 * NULL sc->components[x].gcomp member. 625 * Luckily, component allocation happens very rarely and 626 * removing components is an abnormal action in any case. */ 627 sc->components = newcomp; 628 sc->n_components--; 629 /* End critical section */ 630 631 g_topology_lock(); 632 if (clear_metadata(&compbak[found]) != 0) { 633 LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear " 634 "metadata on %s", prov_name); 635 } 636 g_detach(compbak[found].gcons); 637 g_destroy_consumer(compbak[found].gcons); 638 g_topology_unlock(); 639 640 free(compbak, M_GVIRSTOR); 641 642 removed++; 643 } 644 645 /* This call to update_metadata() is critical. In case there's a 646 * power failure in the middle of it and some components are updated 647 * while others are not, there will be trouble on next .taste() iff 648 * a non-updated component is detected first */ 649 g_topology_lock(); 650 update_metadata(sc); 651 g_topology_unlock(); 652 LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed, 653 sc->geom->name); 654 } 655 656 /* 657 * Clear metadata sector on component 658 */ 659 static int 660 clear_metadata(struct g_virstor_component *comp) 661 { 662 char *buf; 663 int error; 664 665 LOG_MSG(LVL_INFO, "Clearing metadata on %s", 666 comp->gcons->provider->name); 667 g_topology_assert(); 668 error = g_access(comp->gcons, 0, 1, 0); 669 if (error != 0) 670 return (error); 671 buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR, 672 M_WAITOK | M_ZERO); 673 error = g_write_data(comp->gcons, 674 comp->gcons->provider->mediasize - 675 comp->gcons->provider->sectorsize, 676 buf, 677 comp->gcons->provider->sectorsize); 678 free(buf, M_GVIRSTOR); 679 g_access(comp->gcons, 0, -1, 0); 680 return (error); 681 } 682 683 /* 684 * Destroy geom forcibly. 685 */ 686 static int 687 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp, 688 struct g_geom *gp) 689 { 690 struct g_virstor_softc *sc; 691 int exitval; 692 693 sc = gp->softc; 694 KASSERT(sc != NULL, ("%s: NULL sc", __func__)); 695 696 exitval = 0; 697 LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name, 698 gp->softc); 699 700 if (sc != NULL) { 701 #ifdef INVARIANTS 702 char *buf; 703 int error; 704 off_t off; 705 int isclean, count; 706 int n; 707 708 LOG_MSG(LVL_INFO, "INVARIANTS detected"); 709 LOG_MSG(LVL_INFO, "Verifying allocation " 710 "table for %s", sc->geom->name); 711 count = 0; 712 for (n = 0; n < sc->chunk_count; n++) { 713 if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0) 714 count++; 715 } 716 LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks", 717 sc->geom->name, count); 718 n = off = count = 0; 719 isclean = 1; 720 if (virstor_valid_components(sc) != sc->n_components) { 721 /* This is a incomplete virstor device (not all 722 * components have been found) */ 723 LOG_MSG(LVL_ERROR, "Device %s is incomplete", 724 sc->geom->name); 725 goto bailout; 726 } 727 error = g_access(sc->components[0].gcons, 1, 0, 0); 728 KASSERT(error == 0, ("%s: g_access failed (%d)", __func__, 729 error)); 730 /* Compare the whole on-disk allocation table with what's 731 * currently in memory */ 732 while (n < sc->chunk_count) { 733 buf = g_read_data(sc->components[0].gcons, off, 734 sc->sectorsize, &error); 735 KASSERT(buf != NULL, ("g_read_data returned NULL (%d) " 736 "for read at %jd", error, off)); 737 if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) { 738 LOG_MSG(LVL_ERROR, "ERROR in allocation table, " 739 "entry %d, offset %jd", n, off); 740 isclean = 0; 741 count++; 742 } 743 n += sc->me_per_sector; 744 off += sc->sectorsize; 745 g_free(buf); 746 } 747 error = g_access(sc->components[0].gcons, -1, 0, 0); 748 KASSERT(error == 0, ("%s: g_access failed (%d) on exit", 749 __func__, error)); 750 if (isclean != 1) { 751 LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s " 752 "(%d sectors don't match, max %zu allocations)", 753 sc->geom->name, count, 754 count * sc->me_per_sector); 755 } else { 756 LOG_MSG(LVL_INFO, "Allocation table ok for %s", 757 sc->geom->name); 758 } 759 bailout: 760 #endif 761 update_metadata(sc); 762 virstor_geom_destroy(sc, FALSE, FALSE); 763 exitval = EAGAIN; 764 } else 765 exitval = 0; 766 return (exitval); 767 } 768 769 /* 770 * Taste event (per-class callback) 771 * Examines a provider and creates geom instances if needed 772 */ 773 static struct g_geom * 774 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags) 775 { 776 struct g_virstor_metadata md; 777 struct g_geom *gp; 778 struct g_consumer *cp; 779 struct g_virstor_softc *sc; 780 int error; 781 782 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 783 g_topology_assert(); 784 LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name); 785 786 /* We need a dummy geom to attach a consumer to the given provider */ 787 gp = g_new_geomf(mp, "virstor:taste.helper"); 788 gp->start = (void *)invalid_call; /* XXX: hacked up so the */ 789 gp->access = (void *)invalid_call; /* compiler doesn't complain. */ 790 gp->orphan = (void *)invalid_call; /* I really want these to fail. */ 791 792 cp = g_new_consumer(gp); 793 g_attach(cp, pp); 794 error = read_metadata(cp, &md); 795 g_detach(cp); 796 g_destroy_consumer(cp); 797 g_destroy_geom(gp); 798 799 if (error != 0) 800 return (NULL); 801 802 if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0) 803 return (NULL); 804 if (md.md_version != G_VIRSTOR_VERSION) { 805 LOG_MSG(LVL_ERROR, "Kernel module version invalid " 806 "to handle %s (%s) : %d should be %d", 807 md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION); 808 return (NULL); 809 } 810 if (md.provsize != pp->mediasize) 811 return (NULL); 812 813 /* If the provider name is hardcoded, use the offered provider only 814 * if it's been offered with its proper name (the one used in 815 * the label command). */ 816 if (md.provider[0] != '\0' && 817 !g_compare_names(md.provider, pp->name)) 818 return (NULL); 819 820 /* Iterate all geoms this class already knows about to see if a new 821 * geom instance of this class needs to be created (in case the provider 822 * is first from a (possibly) multi-consumer geom) or it just needs 823 * to be added to an existing instance. */ 824 sc = NULL; 825 gp = NULL; 826 LIST_FOREACH(gp, &mp->geom, geom) { 827 sc = gp->softc; 828 if (sc == NULL) 829 continue; 830 if (strcmp(md.md_name, sc->geom->name) != 0) 831 continue; 832 if (md.md_id != sc->id) 833 continue; 834 break; 835 } 836 if (gp != NULL) { /* We found an existing geom instance; add to it */ 837 LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name); 838 error = add_provider_to_geom(sc, pp, &md); 839 if (error != 0) { 840 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 841 pp->name, md.md_name, error); 842 return (NULL); 843 } 844 } else { /* New geom instance needs to be created */ 845 gp = create_virstor_geom(mp, &md); 846 if (gp == NULL) { 847 LOG_MSG(LVL_ERROR, "Error creating new instance of " 848 "class %s: %s", mp->name, md.md_name); 849 LOG_MSG(LVL_DEBUG, "Error creating %s at %s", 850 md.md_name, pp->name); 851 return (NULL); 852 } 853 sc = gp->softc; 854 LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name, 855 md.md_name); 856 error = add_provider_to_geom(sc, pp, &md); 857 if (error != 0) { 858 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 859 pp->name, md.md_name, error); 860 virstor_geom_destroy(sc, TRUE, FALSE); 861 return (NULL); 862 } 863 } 864 865 return (gp); 866 } 867 868 /* 869 * Destroyes consumer passed to it in arguments. Used as a callback 870 * on g_event queue. 871 */ 872 static void 873 delay_destroy_consumer(void *arg, int flags __unused) 874 { 875 struct g_consumer *c = arg; 876 KASSERT(c != NULL, ("%s: invalid consumer", __func__)); 877 LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay", 878 c->provider->name); 879 g_detach(c); 880 g_destroy_consumer(c); 881 } 882 883 /* 884 * Remove a component (consumer) from geom instance; If it's the first 885 * component being removed, orphan the provider to announce geom's being 886 * dismantled 887 */ 888 static void 889 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp, 890 boolean_t delay) 891 { 892 struct g_consumer *c; 893 894 KASSERT(comp->gcons != NULL, ("Component with no consumer in %s", 895 sc->geom->name)); 896 c = comp->gcons; 897 898 comp->gcons = NULL; 899 KASSERT(c->provider != NULL, ("%s: no provider", __func__)); 900 LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name, 901 sc->geom->name); 902 if (sc->provider != NULL) { 903 LOG_MSG(LVL_INFO, "Removing provider %s", sc->provider->name); 904 g_wither_provider(sc->provider, ENXIO); 905 sc->provider = NULL; 906 } 907 908 if (c->acr > 0 || c->acw > 0 || c->ace > 0) 909 g_access(c, -c->acr, -c->acw, -c->ace); 910 if (delay) { 911 /* Destroy consumer after it's tasted */ 912 g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL); 913 } else { 914 g_detach(c); 915 g_destroy_consumer(c); 916 } 917 } 918 919 /* 920 * Destroy geom - called internally 921 * See g_virstor_destroy_geom for the other one 922 */ 923 static int 924 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force, 925 boolean_t delay) 926 { 927 struct g_provider *pp; 928 struct g_geom *gp; 929 u_int n; 930 931 g_topology_assert(); 932 933 if (sc == NULL) 934 return (ENXIO); 935 936 pp = sc->provider; 937 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 938 LOG_MSG(force ? LVL_WARNING : LVL_ERROR, 939 "Device %s is still open.", pp->name); 940 if (!force) 941 return (EBUSY); 942 } 943 944 for (n = 0; n < sc->n_components; n++) { 945 if (sc->components[n].gcons != NULL) 946 remove_component(sc, &sc->components[n], delay); 947 } 948 949 gp = sc->geom; 950 gp->softc = NULL; 951 952 KASSERT(sc->provider == NULL, ("Provider still exists for %s", 953 gp->name)); 954 955 /* XXX: This might or might not work, since we're called with 956 * the topology lock held. Also, it might panic the kernel if 957 * the error'd BIO is in softupdates code. */ 958 mtx_lock(&sc->delayed_bio_q_mtx); 959 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 960 struct g_virstor_bio_q *bq; 961 bq = STAILQ_FIRST(&sc->delayed_bio_q); 962 bq->bio->bio_error = ENOSPC; 963 g_io_deliver(bq->bio, EIO); 964 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 965 free(bq, M_GVIRSTOR); 966 } 967 mtx_unlock(&sc->delayed_bio_q_mtx); 968 mtx_destroy(&sc->delayed_bio_q_mtx); 969 970 free(sc->map, M_GVIRSTOR); 971 free(sc->components, M_GVIRSTOR); 972 bzero(sc, sizeof *sc); 973 free(sc, M_GVIRSTOR); 974 975 pp = LIST_FIRST(&gp->provider); /* We only offer one provider */ 976 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 977 LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name); 978 979 g_wither_geom(gp, ENXIO); 980 981 return (0); 982 } 983 984 /* 985 * Utility function: read metadata & decode. Wants topology lock to be 986 * held. 987 */ 988 static int 989 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 990 { 991 struct g_provider *pp; 992 char *buf; 993 int error; 994 995 g_topology_assert(); 996 error = g_access(cp, 1, 0, 0); 997 if (error != 0) 998 return (error); 999 pp = cp->provider; 1000 g_topology_unlock(); 1001 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 1002 &error); 1003 g_topology_lock(); 1004 g_access(cp, -1, 0, 0); 1005 if (buf == NULL) 1006 return (error); 1007 1008 virstor_metadata_decode(buf, md); 1009 g_free(buf); 1010 1011 return (0); 1012 } 1013 1014 /** 1015 * Utility function: encode & write metadata. Assumes topology lock is 1016 * held. 1017 * 1018 * There is no useful way of recovering from errors in this function, 1019 * not involving panicking the kernel. If the metadata cannot be written 1020 * the most we can do is notify the operator and hope he spots it and 1021 * replaces the broken drive. 1022 */ 1023 static void 1024 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 1025 { 1026 struct g_provider *pp; 1027 char *buf; 1028 int error; 1029 1030 KASSERT(cp != NULL && md != NULL && cp->provider != NULL, 1031 ("Something's fishy in %s", __func__)); 1032 LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name); 1033 g_topology_assert(); 1034 error = g_access(cp, 0, 1, 0); 1035 if (error != 0) { 1036 LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d", 1037 cp->provider->name, error); 1038 return; 1039 } 1040 pp = cp->provider; 1041 1042 buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK); 1043 virstor_metadata_encode(md, buf); 1044 g_topology_unlock(); 1045 error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, 1046 pp->sectorsize); 1047 g_topology_lock(); 1048 g_access(cp, 0, -1, 0); 1049 free(buf, M_GVIRSTOR); 1050 1051 if (error != 0) 1052 LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s", 1053 error, cp->provider->name); 1054 } 1055 1056 /* 1057 * Creates a new instance of this GEOM class, initialise softc 1058 */ 1059 static struct g_geom * 1060 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md) 1061 { 1062 struct g_geom *gp; 1063 struct g_virstor_softc *sc; 1064 1065 LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)", 1066 md->md_name, md->md_id); 1067 1068 if (md->md_count < 1 || md->md_chunk_size < 1 || 1069 md->md_virsize < md->md_chunk_size) { 1070 /* This is bogus configuration, and probably means data is 1071 * somehow corrupted. Panic, maybe? */ 1072 LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s", 1073 md->md_name); 1074 return (NULL); 1075 } 1076 1077 /* Check if it's already created */ 1078 LIST_FOREACH(gp, &mp->geom, geom) { 1079 sc = gp->softc; 1080 if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) { 1081 LOG_MSG(LVL_WARNING, "Geom %s already exists", 1082 md->md_name); 1083 if (sc->id != md->md_id) { 1084 LOG_MSG(LVL_ERROR, 1085 "Some stale or invalid components " 1086 "exist for virstor device named %s. " 1087 "You will need to <CLEAR> all stale " 1088 "components and maybe reconfigure " 1089 "the virstor device. Tune " 1090 "kern.geom.virstor.debug sysctl up " 1091 "for more information.", 1092 sc->geom->name); 1093 } 1094 return (NULL); 1095 } 1096 } 1097 gp = g_new_geomf(mp, "%s", md->md_name); 1098 gp->softc = NULL; /* to circumevent races that test softc */ 1099 1100 gp->start = g_virstor_start; 1101 gp->spoiled = g_virstor_orphan; 1102 gp->orphan = g_virstor_orphan; 1103 gp->access = g_virstor_access; 1104 gp->dumpconf = g_virstor_dumpconf; 1105 1106 sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO); 1107 sc->id = md->md_id; 1108 sc->n_components = md->md_count; 1109 sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count, 1110 M_GVIRSTOR, M_WAITOK | M_ZERO); 1111 sc->chunk_size = md->md_chunk_size; 1112 sc->virsize = md->md_virsize; 1113 STAILQ_INIT(&sc->delayed_bio_q); 1114 mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx", 1115 "gvirstor", MTX_DEF | MTX_RECURSE); 1116 1117 sc->geom = gp; 1118 sc->provider = NULL; /* virstor_check_and_run will create it */ 1119 gp->softc = sc; 1120 1121 LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name); 1122 1123 return (gp); 1124 } 1125 1126 /* 1127 * Add provider to a GEOM class instance 1128 */ 1129 static int 1130 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp, 1131 struct g_virstor_metadata *md) 1132 { 1133 struct g_virstor_component *component; 1134 struct g_consumer *cp, *fcp; 1135 struct g_geom *gp; 1136 int error; 1137 1138 if (md->no >= sc->n_components) 1139 return (EINVAL); 1140 1141 /* "Current" compontent */ 1142 component = &(sc->components[md->no]); 1143 if (component->gcons != NULL) 1144 return (EEXIST); 1145 1146 gp = sc->geom; 1147 fcp = LIST_FIRST(&gp->consumer); 1148 1149 cp = g_new_consumer(gp); 1150 error = g_attach(cp, pp); 1151 1152 if (error != 0) { 1153 g_destroy_consumer(cp); 1154 return (error); 1155 } 1156 1157 if (fcp != NULL) { 1158 if (fcp->provider->sectorsize != pp->sectorsize) { 1159 /* TODO: this can be made to work */ 1160 LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid " 1161 "sector size (%d)", pp->name, sc->geom->name, 1162 pp->sectorsize); 1163 return (EINVAL); 1164 } 1165 if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) { 1166 /* Replicate access permissions from first "live" consumer 1167 * to the new one */ 1168 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 1169 if (error != 0) { 1170 g_detach(cp); 1171 g_destroy_consumer(cp); 1172 return (error); 1173 } 1174 } 1175 } 1176 1177 /* Bring up a new component */ 1178 cp->private = component; 1179 component->gcons = cp; 1180 component->sc = sc; 1181 component->index = md->no; 1182 component->chunk_count = md->chunk_count; 1183 component->chunk_next = md->chunk_next; 1184 component->chunk_reserved = md->chunk_reserved; 1185 component->flags = md->flags; 1186 1187 LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name); 1188 1189 virstor_check_and_run(sc); 1190 return (0); 1191 } 1192 1193 /* 1194 * Check if everything's ready to create the geom provider & device entry, 1195 * create and start provider. 1196 * Called ultimately by .taste, from g_event thread 1197 */ 1198 static void 1199 virstor_check_and_run(struct g_virstor_softc *sc) 1200 { 1201 off_t off; 1202 size_t n, count; 1203 int index; 1204 int error; 1205 1206 if (virstor_valid_components(sc) != sc->n_components) 1207 return; 1208 1209 if (virstor_valid_components(sc) == 0) { 1210 /* This is actually a candidate for panic() */ 1211 LOG_MSG(LVL_ERROR, "No valid components for %s?", 1212 sc->provider->name); 1213 return; 1214 } 1215 1216 sc->sectorsize = sc->components[0].gcons->provider->sectorsize; 1217 1218 /* Initialise allocation map from the first consumer */ 1219 sc->chunk_count = sc->virsize / sc->chunk_size; 1220 if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) { 1221 LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes", 1222 sc->provider->name, 1223 sc->chunk_count * (off_t)sc->chunk_size); 1224 } 1225 sc->map_size = sc->chunk_count * sizeof *(sc->map); 1226 /* The following allocation is in order of 4MB - 8MB */ 1227 sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK); 1228 KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s", 1229 __func__, sc->map_size, sc->provider->name)); 1230 sc->map_sectors = sc->map_size / sc->sectorsize; 1231 1232 count = 0; 1233 for (n = 0; n < sc->n_components; n++) 1234 count += sc->components[n].chunk_count; 1235 LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual " 1236 "(%zu KB chunks)", 1237 sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024); 1238 1239 error = g_access(sc->components[0].gcons, 1, 0, 0); 1240 if (error != 0) { 1241 LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to " 1242 "read allocation map for %s", 1243 sc->components[0].gcons->provider->name, 1244 sc->geom->name); 1245 return; 1246 } 1247 /* Read in the allocation map */ 1248 LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name, 1249 sc->components[0].gcons->provider->name); 1250 off = count = n = 0; 1251 while (count < sc->map_size) { 1252 struct g_virstor_map_entry *mapbuf; 1253 size_t bs; 1254 1255 bs = MIN(MAXPHYS, sc->map_size - count); 1256 if (bs % sc->sectorsize != 0) { 1257 /* Check for alignment errors */ 1258 bs = rounddown(bs, sc->sectorsize); 1259 if (bs == 0) 1260 break; 1261 LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned " 1262 "for %s on %s", sc->geom->name, 1263 sc->components[0].gcons->provider->name); 1264 } 1265 mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error); 1266 if (mapbuf == NULL) { 1267 free(sc->map, M_GVIRSTOR); 1268 LOG_MSG(LVL_ERROR, "Error reading allocation map " 1269 "for %s from %s (offset %ju) (error %d)", 1270 sc->geom->name, 1271 sc->components[0].gcons->provider->name, 1272 off, error); 1273 return; 1274 } 1275 1276 bcopy(mapbuf, &sc->map[n], bs); 1277 off += bs; 1278 count += bs; 1279 n += bs / sizeof *(sc->map); 1280 g_free(mapbuf); 1281 } 1282 g_access(sc->components[0].gcons, -1, 0, 0); 1283 LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name); 1284 1285 /* find first component with allocatable chunks */ 1286 index = -1; 1287 for (n = 0; n < sc->n_components; n++) { 1288 if (sc->components[n].chunk_next < 1289 sc->components[n].chunk_count) { 1290 index = n; 1291 break; 1292 } 1293 } 1294 if (index == -1) 1295 /* not found? set it to the last component and handle it 1296 * later */ 1297 index = sc->n_components - 1; 1298 1299 if (index >= sc->n_components - g_virstor_component_watermark - 1) { 1300 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1301 "(%d/%u: %s)", sc->geom->name, 1302 index+1, 1303 sc->n_components, 1304 sc->components[index].gcons->provider->name); 1305 } 1306 sc->curr_component = index; 1307 1308 if (sc->components[index].chunk_next >= 1309 sc->components[index].chunk_count - g_virstor_chunk_watermark) { 1310 LOG_MSG(LVL_WARNING, 1311 "Component %s of %s is running out of free space " 1312 "(%u chunks left)", 1313 sc->components[index].gcons->provider->name, 1314 sc->geom->name, sc->components[index].chunk_count - 1315 sc->components[index].chunk_next); 1316 } 1317 1318 sc->me_per_sector = sc->sectorsize / sizeof *(sc->map); 1319 if (sc->sectorsize % sizeof *(sc->map) != 0) { 1320 LOG_MSG(LVL_ERROR, 1321 "%s: Map entries don't fit exactly in a sector (%s)", 1322 __func__, sc->geom->name); 1323 return; 1324 } 1325 1326 /* Recalculate allocated chunks in components & at the same time 1327 * verify map data is sane. We could trust metadata on this, but 1328 * we want to make sure. */ 1329 for (n = 0; n < sc->n_components; n++) 1330 sc->components[n].chunk_next = sc->components[n].chunk_reserved; 1331 1332 for (n = 0; n < sc->chunk_count; n++) { 1333 if (sc->map[n].provider_no >= sc->n_components || 1334 sc->map[n].provider_chunk >= 1335 sc->components[sc->map[n].provider_no].chunk_count) { 1336 LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s", 1337 __func__, (u_int)n, sc->geom->name); 1338 LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u" 1339 " provider_chunk: %u, chunk_count: %u", __func__, 1340 sc->map[n].provider_no, sc->n_components, 1341 sc->map[n].provider_chunk, 1342 sc->components[sc->map[n].provider_no].chunk_count); 1343 return; 1344 } 1345 if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED) 1346 sc->components[sc->map[n].provider_no].chunk_next++; 1347 } 1348 1349 sc->provider = g_new_providerf(sc->geom, "virstor/%s", 1350 sc->geom->name); 1351 1352 sc->provider->sectorsize = sc->sectorsize; 1353 sc->provider->mediasize = sc->virsize; 1354 g_error_provider(sc->provider, 0); 1355 1356 LOG_MSG(LVL_INFO, "%s activated", sc->provider->name); 1357 LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting " 1358 "chunk %u", sc->provider->name, sc->curr_component, 1359 sc->components[sc->curr_component].chunk_next); 1360 } 1361 1362 /* 1363 * Returns count of active providers in this geom instance 1364 */ 1365 static u_int 1366 virstor_valid_components(struct g_virstor_softc *sc) 1367 { 1368 unsigned int nc, i; 1369 1370 nc = 0; 1371 KASSERT(sc != NULL, ("%s: softc is NULL", __func__)); 1372 KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__)); 1373 for (i = 0; i < sc->n_components; i++) 1374 if (sc->components[i].gcons != NULL) 1375 nc++; 1376 return (nc); 1377 } 1378 1379 /* 1380 * Called when the consumer gets orphaned (?) 1381 */ 1382 static void 1383 g_virstor_orphan(struct g_consumer *cp) 1384 { 1385 struct g_virstor_softc *sc; 1386 struct g_virstor_component *comp; 1387 struct g_geom *gp; 1388 1389 g_topology_assert(); 1390 gp = cp->geom; 1391 sc = gp->softc; 1392 if (sc == NULL) 1393 return; 1394 1395 comp = cp->private; 1396 KASSERT(comp != NULL, ("%s: No component in private part of consumer", 1397 __func__)); 1398 remove_component(sc, comp, FALSE); 1399 if (virstor_valid_components(sc) == 0) 1400 virstor_geom_destroy(sc, TRUE, FALSE); 1401 } 1402 1403 /* 1404 * Called to notify geom when it's been opened, and for what intent 1405 */ 1406 static int 1407 g_virstor_access(struct g_provider *pp, int dr, int dw, int de) 1408 { 1409 struct g_consumer *c; 1410 struct g_virstor_softc *sc; 1411 struct g_geom *gp; 1412 int error; 1413 1414 KASSERT(pp != NULL, ("%s: NULL provider", __func__)); 1415 gp = pp->geom; 1416 KASSERT(gp != NULL, ("%s: NULL geom", __func__)); 1417 sc = gp->softc; 1418 1419 if (sc == NULL) { 1420 /* It seems that .access can be called with negative dr,dw,dx 1421 * in this case but I want to check for myself */ 1422 LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s", 1423 dr, dw, de, pp->name); 1424 /* This should only happen when geom is withered so 1425 * allow only negative requests */ 1426 KASSERT(dr <= 0 && dw <= 0 && de <= 0, 1427 ("%s: Positive access for %s", __func__, pp->name)); 1428 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) 1429 LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed", 1430 pp->name); 1431 return (0); 1432 } 1433 1434 /* Grab an exclusive bit to propagate on our consumers on first open */ 1435 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 1436 de++; 1437 /* ... drop it on close */ 1438 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) { 1439 de--; 1440 update_metadata(sc); /* Writes statistical information */ 1441 } 1442 1443 error = ENXIO; 1444 LIST_FOREACH(c, &gp->consumer, consumer) { 1445 KASSERT(c != NULL, ("%s: consumer is NULL", __func__)); 1446 error = g_access(c, dr, dw, de); 1447 if (error != 0) { 1448 struct g_consumer *c2; 1449 1450 /* Backout earlier changes */ 1451 LIST_FOREACH(c2, &gp->consumer, consumer) { 1452 if (c2 == c) /* all eariler components fixed */ 1453 return (error); 1454 g_access(c2, -dr, -dw, -de); 1455 } 1456 } 1457 } 1458 1459 return (error); 1460 } 1461 1462 /* 1463 * Generate XML dump of current state 1464 */ 1465 static void 1466 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1467 struct g_consumer *cp, struct g_provider *pp) 1468 { 1469 struct g_virstor_softc *sc; 1470 1471 g_topology_assert(); 1472 sc = gp->softc; 1473 1474 if (sc == NULL || pp != NULL) 1475 return; 1476 1477 if (cp != NULL) { 1478 /* For each component */ 1479 struct g_virstor_component *comp; 1480 1481 comp = cp->private; 1482 if (comp == NULL) 1483 return; 1484 sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n", 1485 indent, comp->index); 1486 sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n", 1487 indent, comp->chunk_count); 1488 sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n", 1489 indent, comp->chunk_next); 1490 sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n", 1491 indent, comp->chunk_reserved); 1492 sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n", 1493 indent, 1494 comp->chunk_next > 0 ? 100 - 1495 ((comp->chunk_next + comp->chunk_reserved) * 100) / 1496 comp->chunk_count : 100); 1497 } else { 1498 /* For the whole thing */ 1499 u_int count, used, i; 1500 off_t size; 1501 1502 count = used = size = 0; 1503 for (i = 0; i < sc->n_components; i++) { 1504 if (sc->components[i].gcons != NULL) { 1505 count += sc->components[i].chunk_count; 1506 used += sc->components[i].chunk_next + 1507 sc->components[i].chunk_reserved; 1508 size += sc->components[i].gcons-> 1509 provider->mediasize; 1510 } 1511 } 1512 1513 sbuf_printf(sb, "%s<Status>" 1514 "Components=%u, Online=%u</Status>\n", indent, 1515 sc->n_components, virstor_valid_components(sc)); 1516 sbuf_printf(sb, "%s<State>%u%% physical free</State>\n", 1517 indent, 100-(used * 100) / count); 1518 sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent, 1519 sc->chunk_size); 1520 sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n", 1521 indent, used > 0 ? 100 - (used * 100) / count : 100); 1522 sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n", 1523 indent, count); 1524 sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n", 1525 indent, sc->chunk_count); 1526 sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n", 1527 indent, 1528 (count * 100) / sc->chunk_count); 1529 sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n", 1530 indent, size); 1531 sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent, 1532 sc->virsize); 1533 } 1534 } 1535 1536 /* 1537 * GEOM .done handler 1538 * Can't use standard handler because one requested IO may 1539 * fork into additional data IOs 1540 */ 1541 static void 1542 g_virstor_done(struct bio *b) 1543 { 1544 struct g_virstor_softc *sc; 1545 struct bio *parent_b; 1546 1547 parent_b = b->bio_parent; 1548 sc = parent_b->bio_to->geom->softc; 1549 1550 if (b->bio_error != 0) { 1551 LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s", 1552 b->bio_error, b->bio_offset, b->bio_length, 1553 b->bio_to->name); 1554 if (parent_b->bio_error == 0) 1555 parent_b->bio_error = b->bio_error; 1556 } 1557 1558 parent_b->bio_inbed++; 1559 parent_b->bio_completed += b->bio_completed; 1560 1561 if (parent_b->bio_children == parent_b->bio_inbed) { 1562 parent_b->bio_completed = parent_b->bio_length; 1563 g_io_deliver(parent_b, parent_b->bio_error); 1564 } 1565 g_destroy_bio(b); 1566 } 1567 1568 /* 1569 * I/O starts here 1570 * Called in g_down thread 1571 */ 1572 static void 1573 g_virstor_start(struct bio *b) 1574 { 1575 struct g_virstor_softc *sc; 1576 struct g_virstor_component *comp; 1577 struct bio *cb; 1578 struct g_provider *pp; 1579 char *addr; 1580 off_t offset, length; 1581 struct bio_queue_head bq; 1582 size_t chunk_size; /* cached for convenience */ 1583 u_int count; 1584 1585 pp = b->bio_to; 1586 sc = pp->geom->softc; 1587 KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__, 1588 b->bio_to->error, b->bio_to->name)); 1589 1590 LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__); 1591 1592 switch (b->bio_cmd) { 1593 case BIO_READ: 1594 case BIO_WRITE: 1595 case BIO_DELETE: 1596 break; 1597 default: 1598 g_io_deliver(b, EOPNOTSUPP); 1599 return; 1600 } 1601 1602 LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length); 1603 bioq_init(&bq); 1604 1605 chunk_size = sc->chunk_size; 1606 addr = b->bio_data; 1607 offset = b->bio_offset; /* virtual offset and length */ 1608 length = b->bio_length; 1609 1610 while (length > 0) { 1611 size_t chunk_index, in_chunk_offset, in_chunk_length; 1612 struct virstor_map_entry *me; 1613 1614 chunk_index = offset / chunk_size; /* round downwards */ 1615 in_chunk_offset = offset % chunk_size; 1616 in_chunk_length = min(length, chunk_size - in_chunk_offset); 1617 LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)", 1618 b->bio_cmd == BIO_READ ? "R" : "W", 1619 offset, length, 1620 chunk_index, in_chunk_offset, in_chunk_length); 1621 me = &sc->map[chunk_index]; 1622 1623 if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) { 1624 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1625 /* Reads from unallocated chunks return zeroed 1626 * buffers */ 1627 if (b->bio_cmd == BIO_READ) 1628 bzero(addr, in_chunk_length); 1629 } else { 1630 comp = &sc->components[me->provider_no]; 1631 1632 cb = g_clone_bio(b); 1633 if (cb == NULL) { 1634 bioq_dismantle(&bq); 1635 if (b->bio_error == 0) 1636 b->bio_error = ENOMEM; 1637 g_io_deliver(b, b->bio_error); 1638 return; 1639 } 1640 cb->bio_to = comp->gcons->provider; 1641 cb->bio_done = g_virstor_done; 1642 cb->bio_offset = 1643 (off_t)me->provider_chunk * (off_t)chunk_size 1644 + in_chunk_offset; 1645 cb->bio_length = in_chunk_length; 1646 cb->bio_data = addr; 1647 cb->bio_caller1 = comp; 1648 bioq_disksort(&bq, cb); 1649 } 1650 } else { /* handle BIO_WRITE */ 1651 KASSERT(b->bio_cmd == BIO_WRITE, 1652 ("%s: Unknown command %d", __func__, 1653 b->bio_cmd)); 1654 1655 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1656 /* We have a virtual chunk, represented by 1657 * the "me" entry, but it's not yet allocated 1658 * (tied to) a physical chunk. So do it now. */ 1659 struct virstor_map_entry *data_me; 1660 u_int phys_chunk, comp_no; 1661 off_t s_offset; 1662 int error; 1663 1664 error = allocate_chunk(sc, &comp, &comp_no, 1665 &phys_chunk); 1666 if (error != 0) { 1667 /* We cannot allocate a physical chunk 1668 * to satisfy this request, so we'll 1669 * delay it to when we can... 1670 * XXX: this will prevent the fs from 1671 * being umounted! */ 1672 struct g_virstor_bio_q *biq; 1673 biq = malloc(sizeof *biq, M_GVIRSTOR, 1674 M_NOWAIT); 1675 if (biq == NULL) { 1676 bioq_dismantle(&bq); 1677 if (b->bio_error == 0) 1678 b->bio_error = ENOMEM; 1679 g_io_deliver(b, b->bio_error); 1680 return; 1681 } 1682 biq->bio = b; 1683 mtx_lock(&sc->delayed_bio_q_mtx); 1684 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, 1685 biq, linkage); 1686 mtx_unlock(&sc->delayed_bio_q_mtx); 1687 LOG_MSG(LVL_WARNING, "Delaying BIO " 1688 "(size=%ju) until free physical " 1689 "space can be found on %s", 1690 b->bio_length, 1691 sc->provider->name); 1692 return; 1693 } 1694 LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s " 1695 "for %s", 1696 phys_chunk, 1697 comp->gcons->provider->name, 1698 sc->provider->name); 1699 1700 me->provider_no = comp_no; 1701 me->provider_chunk = phys_chunk; 1702 me->flags |= VIRSTOR_MAP_ALLOCATED; 1703 1704 cb = g_clone_bio(b); 1705 if (cb == NULL) { 1706 me->flags &= ~VIRSTOR_MAP_ALLOCATED; 1707 me->provider_no = 0; 1708 me->provider_chunk = 0; 1709 bioq_dismantle(&bq); 1710 if (b->bio_error == 0) 1711 b->bio_error = ENOMEM; 1712 g_io_deliver(b, b->bio_error); 1713 return; 1714 } 1715 1716 /* The allocation table is stored continuously 1717 * at the start of the drive. We need to 1718 * calculate the offset of the sector that holds 1719 * this map entry both on the drive and in the 1720 * map array. 1721 * sc_offset will end up pointing to the drive 1722 * sector. */ 1723 s_offset = chunk_index * sizeof *me; 1724 s_offset = rounddown(s_offset, sc->sectorsize); 1725 1726 /* data_me points to map entry sector 1727 * in memory (analogous to offset) */ 1728 data_me = &sc->map[rounddown(chunk_index, 1729 sc->me_per_sector)]; 1730 1731 /* Commit sector with map entry to storage */ 1732 cb->bio_to = sc->components[0].gcons->provider; 1733 cb->bio_done = g_virstor_done; 1734 cb->bio_offset = s_offset; 1735 cb->bio_data = (char *)data_me; 1736 cb->bio_length = sc->sectorsize; 1737 cb->bio_caller1 = &sc->components[0]; 1738 bioq_disksort(&bq, cb); 1739 } 1740 1741 comp = &sc->components[me->provider_no]; 1742 cb = g_clone_bio(b); 1743 if (cb == NULL) { 1744 bioq_dismantle(&bq); 1745 if (b->bio_error == 0) 1746 b->bio_error = ENOMEM; 1747 g_io_deliver(b, b->bio_error); 1748 return; 1749 } 1750 /* Finally, handle the data */ 1751 cb->bio_to = comp->gcons->provider; 1752 cb->bio_done = g_virstor_done; 1753 cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size + 1754 in_chunk_offset; 1755 cb->bio_length = in_chunk_length; 1756 cb->bio_data = addr; 1757 cb->bio_caller1 = comp; 1758 bioq_disksort(&bq, cb); 1759 } 1760 addr += in_chunk_length; 1761 length -= in_chunk_length; 1762 offset += in_chunk_length; 1763 } 1764 1765 /* Fire off bio's here */ 1766 count = 0; 1767 for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) { 1768 bioq_remove(&bq, cb); 1769 LOG_REQ(LVL_MOREDEBUG, cb, "Firing request"); 1770 comp = cb->bio_caller1; 1771 cb->bio_caller1 = NULL; 1772 LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju", 1773 cb->bio_offset, cb->bio_length); 1774 g_io_request(cb, comp->gcons); 1775 count++; 1776 } 1777 if (count == 0) { /* We handled everything locally */ 1778 b->bio_completed = b->bio_length; 1779 g_io_deliver(b, 0); 1780 } 1781 1782 } 1783 1784 /* 1785 * Allocate a chunk from a physical provider. Returns physical component, 1786 * chunk index relative to the component and the component's index. 1787 */ 1788 static int 1789 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp, 1790 u_int *comp_no_p, u_int *chunk) 1791 { 1792 u_int comp_no; 1793 1794 KASSERT(sc->curr_component < sc->n_components, 1795 ("%s: Invalid curr_component: %u", __func__, sc->curr_component)); 1796 1797 comp_no = sc->curr_component; 1798 *comp = &sc->components[comp_no]; 1799 dump_component(*comp); 1800 if ((*comp)->chunk_next >= (*comp)->chunk_count) { 1801 /* This component is full. Allocate next component */ 1802 if (comp_no >= sc->n_components-1) { 1803 LOG_MSG(LVL_ERROR, "All physical space allocated for %s", 1804 sc->geom->name); 1805 return (-1); 1806 } 1807 (*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT; 1808 sc->curr_component = ++comp_no; 1809 1810 *comp = &sc->components[comp_no]; 1811 if (comp_no >= sc->n_components - g_virstor_component_watermark-1) 1812 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1813 "(switching to %u/%u: %s)", sc->geom->name, 1814 comp_no+1, sc->n_components, 1815 (*comp)->gcons->provider->name); 1816 /* Take care not to overwrite reserved chunks */ 1817 if ( (*comp)->chunk_reserved > 0 && 1818 (*comp)->chunk_next < (*comp)->chunk_reserved) 1819 (*comp)->chunk_next = (*comp)->chunk_reserved; 1820 1821 (*comp)->flags |= 1822 VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT; 1823 dump_component(*comp); 1824 *comp_no_p = comp_no; 1825 *chunk = (*comp)->chunk_next++; 1826 } else { 1827 *comp_no_p = comp_no; 1828 *chunk = (*comp)->chunk_next++; 1829 } 1830 return (0); 1831 } 1832 1833 /* Dump a component */ 1834 static void 1835 dump_component(struct g_virstor_component *comp) 1836 { 1837 1838 if (g_virstor_debug < LVL_DEBUG2) 1839 return; 1840 printf("Component %d: %s\n", comp->index, comp->gcons->provider->name); 1841 printf(" chunk_count: %u\n", comp->chunk_count); 1842 printf(" chunk_next: %u\n", comp->chunk_next); 1843 printf(" flags: %u\n", comp->flags); 1844 } 1845 1846 #if 0 1847 /* Dump a map entry */ 1848 static void 1849 dump_me(struct virstor_map_entry *me, unsigned int nr) 1850 { 1851 if (g_virstor_debug < LVL_DEBUG) 1852 return; 1853 printf("VIRT. CHUNK #%d: ", nr); 1854 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) 1855 printf("(unallocated)\n"); 1856 else 1857 printf("allocated at provider %u, provider_chunk %u\n", 1858 me->provider_no, me->provider_chunk); 1859 } 1860 #endif 1861 1862 /* 1863 * Dismantle bio_queue and destroy its components 1864 */ 1865 static void 1866 bioq_dismantle(struct bio_queue_head *bq) 1867 { 1868 struct bio *b; 1869 1870 for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) { 1871 bioq_remove(bq, b); 1872 g_destroy_bio(b); 1873 } 1874 } 1875 1876 /* 1877 * The function that shouldn't be called. 1878 * When this is called, the stack is already garbled because of 1879 * argument mismatch. There's nothing to do now but panic, which is 1880 * accidentally the whole purpose of this function. 1881 * Motivation: to guard from accidentally calling geom methods when 1882 * they shouldn't be called. (see g_..._taste) 1883 */ 1884 static void 1885 invalid_call(void) 1886 { 1887 panic("invalid_call() has just been called. Something's fishy here."); 1888 } 1889 1890 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */ 1891