1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* Implementation notes: 30 * - "Components" are wrappers around providers that make up the 31 * virtual storage (i.e. a virstor has "physical" components) 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/sx.h> 44 #include <sys/bio.h> 45 #include <sys/sbuf.h> 46 #include <sys/sysctl.h> 47 #include <sys/malloc.h> 48 #include <sys/time.h> 49 #include <sys/proc.h> 50 #include <sys/kthread.h> 51 #include <sys/mutex.h> 52 #include <vm/uma.h> 53 #include <geom/geom.h> 54 #include <geom/geom_dbg.h> 55 56 #include <geom/virstor/g_virstor.h> 57 #include <geom/virstor/g_virstor_md.h> 58 59 FEATURE(g_virstor, "GEOM virtual storage support"); 60 61 /* Declare malloc(9) label */ 62 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data"); 63 64 /* GEOM class methods */ 65 static g_init_t g_virstor_init; 66 static g_fini_t g_virstor_fini; 67 static g_taste_t g_virstor_taste; 68 static g_ctl_req_t g_virstor_config; 69 static g_ctl_destroy_geom_t g_virstor_destroy_geom; 70 71 /* Declare & initialize class structure ("geom class") */ 72 struct g_class g_virstor_class = { 73 .name = G_VIRSTOR_CLASS_NAME, 74 .version = G_VERSION, 75 .init = g_virstor_init, 76 .fini = g_virstor_fini, 77 .taste = g_virstor_taste, 78 .ctlreq = g_virstor_config, 79 .destroy_geom = g_virstor_destroy_geom 80 /* The .dumpconf and the rest are only usable for a geom instance, so 81 * they will be set when such instance is created. */ 82 }; 83 84 /* Declare sysctl's and loader tunables */ 85 SYSCTL_DECL(_kern_geom); 86 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0, 87 "GEOM_GVIRSTOR information"); 88 89 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */ 90 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RWTUN, &g_virstor_debug, 91 0, "Debug level (2=production, 5=normal, 15=excessive)"); 92 93 static u_int g_virstor_chunk_watermark = 100; 94 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RWTUN, 95 &g_virstor_chunk_watermark, 0, 96 "Minimum number of free chunks before issuing administrative warning"); 97 98 static u_int g_virstor_component_watermark = 1; 99 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RWTUN, 100 &g_virstor_component_watermark, 0, 101 "Minimum number of free components before issuing administrative warning"); 102 103 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *); 104 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *); 105 static int clear_metadata(struct g_virstor_component *); 106 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *, 107 struct g_virstor_metadata *); 108 static struct g_geom *create_virstor_geom(struct g_class *, 109 struct g_virstor_metadata *); 110 static void virstor_check_and_run(struct g_virstor_softc *); 111 static u_int virstor_valid_components(struct g_virstor_softc *); 112 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t, 113 boolean_t); 114 static void remove_component(struct g_virstor_softc *, 115 struct g_virstor_component *, boolean_t); 116 static void bioq_dismantle(struct bio_queue_head *); 117 static int allocate_chunk(struct g_virstor_softc *, 118 struct g_virstor_component **, u_int *, u_int *); 119 static void delay_destroy_consumer(void *, int); 120 static void dump_component(struct g_virstor_component *comp); 121 #if 0 122 static void dump_me(struct virstor_map_entry *me, unsigned int nr); 123 #endif 124 125 static void virstor_ctl_stop(struct gctl_req *, struct g_class *); 126 static void virstor_ctl_add(struct gctl_req *, struct g_class *); 127 static void virstor_ctl_remove(struct gctl_req *, struct g_class *); 128 static struct g_virstor_softc * virstor_find_geom(const struct g_class *, 129 const char *); 130 static void update_metadata(struct g_virstor_softc *); 131 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *, 132 u_int, u_int); 133 134 static void g_virstor_orphan(struct g_consumer *); 135 static int g_virstor_access(struct g_provider *, int, int, int); 136 static void g_virstor_start(struct bio *); 137 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *, 138 struct g_consumer *, struct g_provider *); 139 static void g_virstor_done(struct bio *); 140 141 static void invalid_call(void); 142 /* 143 * Initialise GEOM class (per-class callback) 144 */ 145 static void 146 g_virstor_init(struct g_class *mp __unused) 147 { 148 149 /* Catch map struct size mismatch at compile time; Map entries must 150 * fit into MAXPHYS exactly, with no wasted space. */ 151 CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); 152 153 /* Init UMA zones, TAILQ's, other global vars */ 154 } 155 156 /* 157 * Finalise GEOM class (per-class callback) 158 */ 159 static void 160 g_virstor_fini(struct g_class *mp __unused) 161 { 162 163 /* Deinit UMA zones & global vars */ 164 } 165 166 /* 167 * Config (per-class callback) 168 */ 169 static void 170 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb) 171 { 172 uint32_t *version; 173 174 g_topology_assert(); 175 176 version = gctl_get_paraml(req, "version", sizeof(*version)); 177 if (version == NULL) { 178 gctl_error(req, "Failed to get 'version' argument"); 179 return; 180 } 181 if (*version != G_VIRSTOR_VERSION) { 182 gctl_error(req, "Userland and kernel versions out of sync"); 183 return; 184 } 185 186 g_topology_unlock(); 187 if (strcmp(verb, "add") == 0) 188 virstor_ctl_add(req, cp); 189 else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0) 190 virstor_ctl_stop(req, cp); 191 else if (strcmp(verb, "remove") == 0) 192 virstor_ctl_remove(req, cp); 193 else 194 gctl_error(req, "unknown verb: '%s'", verb); 195 g_topology_lock(); 196 } 197 198 /* 199 * "stop" verb from userland 200 */ 201 static void 202 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) 203 { 204 int *force, *nargs; 205 int i; 206 207 nargs = gctl_get_paraml(req, "nargs", sizeof *nargs); 208 if (nargs == NULL) { 209 gctl_error(req, "Error fetching argument '%s'", "nargs"); 210 return; 211 } 212 if (*nargs < 1) { 213 gctl_error(req, "Invalid number of arguments"); 214 return; 215 } 216 force = gctl_get_paraml(req, "force", sizeof *force); 217 if (force == NULL) { 218 gctl_error(req, "Error fetching argument '%s'", "force"); 219 return; 220 } 221 222 g_topology_lock(); 223 for (i = 0; i < *nargs; i++) { 224 char param[8]; 225 const char *name; 226 struct g_virstor_softc *sc; 227 int error; 228 229 sprintf(param, "arg%d", i); 230 name = gctl_get_asciiparam(req, param); 231 if (name == NULL) { 232 gctl_error(req, "No 'arg%d' argument", i); 233 g_topology_unlock(); 234 return; 235 } 236 sc = virstor_find_geom(cp, name); 237 if (sc == NULL) { 238 gctl_error(req, "Don't know anything about '%s'", name); 239 g_topology_unlock(); 240 return; 241 } 242 243 LOG_MSG(LVL_INFO, "Stopping %s by the userland command", 244 sc->geom->name); 245 update_metadata(sc); 246 if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) { 247 LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d", 248 sc->geom->name, error); 249 } 250 } 251 g_topology_unlock(); 252 } 253 254 /* 255 * "add" verb from userland - add new component(s) to the structure. 256 * This will be done all at once in here, without going through the 257 * .taste function for new components. 258 */ 259 static void 260 virstor_ctl_add(struct gctl_req *req, struct g_class *cp) 261 { 262 /* Note: while this is going on, I/O is being done on 263 * the g_up and g_down threads. The idea is to make changes 264 * to softc members in a way that can atomically activate 265 * them all at once. */ 266 struct g_virstor_softc *sc; 267 int *hardcode, *nargs; 268 const char *geom_name; /* geom to add a component to */ 269 struct g_consumer *fcp; 270 struct g_virstor_bio_q *bq; 271 u_int added; 272 int error; 273 int i; 274 275 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 276 if (nargs == NULL) { 277 gctl_error(req, "Error fetching argument '%s'", "nargs"); 278 return; 279 } 280 if (*nargs < 2) { 281 gctl_error(req, "Invalid number of arguments"); 282 return; 283 } 284 hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); 285 if (hardcode == NULL) { 286 gctl_error(req, "Error fetching argument '%s'", "hardcode"); 287 return; 288 } 289 290 /* Find "our" geom */ 291 geom_name = gctl_get_asciiparam(req, "arg0"); 292 if (geom_name == NULL) { 293 gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)"); 294 return; 295 } 296 sc = virstor_find_geom(cp, geom_name); 297 if (sc == NULL) { 298 gctl_error(req, "Don't know anything about '%s'", geom_name); 299 return; 300 } 301 302 if (virstor_valid_components(sc) != sc->n_components) { 303 LOG_MSG(LVL_ERROR, "Cannot add components to incomplete " 304 "virstor %s", sc->geom->name); 305 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 306 return; 307 } 308 309 fcp = sc->components[0].gcons; 310 added = 0; 311 g_topology_lock(); 312 for (i = 1; i < *nargs; i++) { 313 struct g_virstor_metadata md; 314 char aname[8]; 315 const char *prov_name; 316 struct g_provider *pp; 317 struct g_consumer *cp; 318 u_int nc; 319 u_int j; 320 321 snprintf(aname, sizeof aname, "arg%d", i); 322 prov_name = gctl_get_asciiparam(req, aname); 323 if (prov_name == NULL) { 324 gctl_error(req, "Error fetching argument '%s'", aname); 325 g_topology_unlock(); 326 return; 327 } 328 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 329 prov_name += sizeof(_PATH_DEV) - 1; 330 331 pp = g_provider_by_name(prov_name); 332 if (pp == NULL) { 333 /* This is the most common error so be verbose about it */ 334 if (added != 0) { 335 gctl_error(req, "Invalid provider: '%s' (added" 336 " %u components)", prov_name, added); 337 update_metadata(sc); 338 } else { 339 gctl_error(req, "Invalid provider: '%s'", 340 prov_name); 341 } 342 g_topology_unlock(); 343 return; 344 } 345 cp = g_new_consumer(sc->geom); 346 if (cp == NULL) { 347 gctl_error(req, "Cannot create consumer"); 348 g_topology_unlock(); 349 return; 350 } 351 error = g_attach(cp, pp); 352 if (error != 0) { 353 gctl_error(req, "Cannot attach a consumer to %s", 354 pp->name); 355 g_destroy_consumer(cp); 356 g_topology_unlock(); 357 return; 358 } 359 if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) { 360 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 361 if (error != 0) { 362 gctl_error(req, "Access request failed for %s", 363 pp->name); 364 g_destroy_consumer(cp); 365 g_topology_unlock(); 366 return; 367 } 368 } 369 if (fcp->provider->sectorsize != pp->sectorsize) { 370 gctl_error(req, "Sector size doesn't fit for %s", 371 pp->name); 372 g_destroy_consumer(cp); 373 g_topology_unlock(); 374 return; 375 } 376 for (j = 0; j < sc->n_components; j++) { 377 if (strcmp(sc->components[j].gcons->provider->name, 378 pp->name) == 0) { 379 gctl_error(req, "Component %s already in %s", 380 pp->name, sc->geom->name); 381 g_destroy_consumer(cp); 382 g_topology_unlock(); 383 return; 384 } 385 } 386 sc->components = realloc(sc->components, 387 sizeof(*sc->components) * (sc->n_components + 1), 388 M_GVIRSTOR, M_WAITOK); 389 390 nc = sc->n_components; 391 sc->components[nc].gcons = cp; 392 sc->components[nc].sc = sc; 393 sc->components[nc].index = nc; 394 sc->components[nc].chunk_count = cp->provider->mediasize / 395 sc->chunk_size; 396 sc->components[nc].chunk_next = 0; 397 sc->components[nc].chunk_reserved = 0; 398 399 if (sc->components[nc].chunk_count < 4) { 400 gctl_error(req, "Provider too small: %s", 401 cp->provider->name); 402 g_destroy_consumer(cp); 403 g_topology_unlock(); 404 return; 405 } 406 fill_metadata(sc, &md, nc, *hardcode); 407 write_metadata(cp, &md); 408 /* The new component becomes visible when n_components is 409 * incremented */ 410 sc->n_components++; 411 added++; 412 413 } 414 /* This call to update_metadata() is critical. In case there's a 415 * power failure in the middle of it and some components are updated 416 * while others are not, there will be trouble on next .taste() iff 417 * a non-updated component is detected first */ 418 update_metadata(sc); 419 g_topology_unlock(); 420 LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added, 421 sc->geom->name); 422 /* Fire off BIOs previously queued because there wasn't any 423 * physical space left. If the BIOs still can't be satisfied 424 * they will again be added to the end of the queue (during 425 * which the mutex will be recursed) */ 426 bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK); 427 bq->bio = NULL; 428 mtx_lock(&sc->delayed_bio_q_mtx); 429 /* First, insert a sentinel to the queue end, so we don't 430 * end up in an infinite loop if there's still no free 431 * space available. */ 432 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage); 433 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 434 bq = STAILQ_FIRST(&sc->delayed_bio_q); 435 if (bq->bio != NULL) { 436 g_virstor_start(bq->bio); 437 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 438 free(bq, M_GVIRSTOR); 439 } else { 440 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 441 free(bq, M_GVIRSTOR); 442 break; 443 } 444 } 445 mtx_unlock(&sc->delayed_bio_q_mtx); 446 447 } 448 449 /* 450 * Find a geom handled by the class 451 */ 452 static struct g_virstor_softc * 453 virstor_find_geom(const struct g_class *cp, const char *name) 454 { 455 struct g_geom *gp; 456 457 LIST_FOREACH(gp, &cp->geom, geom) { 458 if (strcmp(name, gp->name) == 0) 459 return (gp->softc); 460 } 461 return (NULL); 462 } 463 464 /* 465 * Update metadata on all components to reflect the current state 466 * of these fields: 467 * - chunk_next 468 * - flags 469 * - md_count 470 * Expects things to be set up so write_metadata() can work, i.e. 471 * the topology lock must be held. 472 */ 473 static void 474 update_metadata(struct g_virstor_softc *sc) 475 { 476 struct g_virstor_metadata md; 477 u_int n; 478 479 if (virstor_valid_components(sc) != sc->n_components) 480 return; /* Incomplete device */ 481 LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s", 482 sc->geom->name); 483 /* Update metadata on components */ 484 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 485 sc->geom->class->name, sc->geom->name); 486 g_topology_assert(); 487 for (n = 0; n < sc->n_components; n++) { 488 read_metadata(sc->components[n].gcons, &md); 489 md.chunk_next = sc->components[n].chunk_next; 490 md.flags = sc->components[n].flags; 491 md.md_count = sc->n_components; 492 write_metadata(sc->components[n].gcons, &md); 493 } 494 } 495 496 /* 497 * Fills metadata (struct md) from information stored in softc and the nc'th 498 * component of virstor 499 */ 500 static void 501 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, 502 u_int nc, u_int hardcode) 503 { 504 struct g_virstor_component *c; 505 506 bzero(md, sizeof *md); 507 c = &sc->components[nc]; 508 509 strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic); 510 md->md_version = G_VIRSTOR_VERSION; 511 strncpy(md->md_name, sc->geom->name, sizeof md->md_name); 512 md->md_id = sc->id; 513 md->md_virsize = sc->virsize; 514 md->md_chunk_size = sc->chunk_size; 515 md->md_count = sc->n_components; 516 517 if (hardcode) { 518 strncpy(md->provider, c->gcons->provider->name, 519 sizeof md->provider); 520 } 521 md->no = nc; 522 md->provsize = c->gcons->provider->mediasize; 523 md->chunk_count = c->chunk_count; 524 md->chunk_next = c->chunk_next; 525 md->chunk_reserved = c->chunk_reserved; 526 md->flags = c->flags; 527 } 528 529 /* 530 * Remove a component from virstor device. 531 * Can only be done if the component is unallocated. 532 */ 533 static void 534 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp) 535 { 536 /* As this is executed in parallel to I/O, operations on virstor 537 * structures must be as atomic as possible. */ 538 struct g_virstor_softc *sc; 539 int *nargs; 540 const char *geom_name; 541 u_int removed; 542 int i; 543 544 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 545 if (nargs == NULL) { 546 gctl_error(req, "Error fetching argument '%s'", "nargs"); 547 return; 548 } 549 if (*nargs < 2) { 550 gctl_error(req, "Invalid number of arguments"); 551 return; 552 } 553 /* Find "our" geom */ 554 geom_name = gctl_get_asciiparam(req, "arg0"); 555 if (geom_name == NULL) { 556 gctl_error(req, "Error fetching argument '%s'", 557 "geom_name (arg0)"); 558 return; 559 } 560 sc = virstor_find_geom(cp, geom_name); 561 if (sc == NULL) { 562 gctl_error(req, "Don't know anything about '%s'", geom_name); 563 return; 564 } 565 566 if (virstor_valid_components(sc) != sc->n_components) { 567 LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete " 568 "virstor %s", sc->geom->name); 569 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 570 return; 571 } 572 573 removed = 0; 574 for (i = 1; i < *nargs; i++) { 575 char param[8]; 576 const char *prov_name; 577 int j, found; 578 struct g_virstor_component *newcomp, *compbak; 579 580 sprintf(param, "arg%d", i); 581 prov_name = gctl_get_asciiparam(req, param); 582 if (prov_name == NULL) { 583 gctl_error(req, "Error fetching argument '%s'", param); 584 return; 585 } 586 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 587 prov_name += sizeof(_PATH_DEV) - 1; 588 589 found = -1; 590 for (j = 0; j < sc->n_components; j++) { 591 if (strcmp(sc->components[j].gcons->provider->name, 592 prov_name) == 0) { 593 found = j; 594 break; 595 } 596 } 597 if (found == -1) { 598 LOG_MSG(LVL_ERROR, "No %s component in %s", 599 prov_name, sc->geom->name); 600 continue; 601 } 602 603 compbak = sc->components; 604 newcomp = malloc(sc->n_components * sizeof(*sc->components), 605 M_GVIRSTOR, M_WAITOK | M_ZERO); 606 bcopy(sc->components, newcomp, found * sizeof(*sc->components)); 607 bcopy(&sc->components[found + 1], newcomp + found, 608 found * sizeof(*sc->components)); 609 if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) { 610 LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be " 611 "removed from %s", 612 prov_name, sc->geom->name); 613 free(newcomp, M_GVIRSTOR); 614 /* We'll consider this non-fatal error */ 615 continue; 616 } 617 /* Renumerate unallocated components */ 618 for (j = 0; j < sc->n_components-1; j++) { 619 if ((sc->components[j].flags & 620 VIRSTOR_PROVIDER_ALLOCATED) == 0) { 621 sc->components[j].index = j; 622 } 623 } 624 /* This is the critical section. If a component allocation 625 * event happens while both variables are not yet set, 626 * there will be trouble. Something will panic on encountering 627 * NULL sc->components[x].gcomp member. 628 * Luckily, component allocation happens very rarely and 629 * removing components is an abnormal action in any case. */ 630 sc->components = newcomp; 631 sc->n_components--; 632 /* End critical section */ 633 634 g_topology_lock(); 635 if (clear_metadata(&compbak[found]) != 0) { 636 LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear " 637 "metadata on %s", prov_name); 638 } 639 g_detach(compbak[found].gcons); 640 g_destroy_consumer(compbak[found].gcons); 641 g_topology_unlock(); 642 643 free(compbak, M_GVIRSTOR); 644 645 removed++; 646 } 647 648 /* This call to update_metadata() is critical. In case there's a 649 * power failure in the middle of it and some components are updated 650 * while others are not, there will be trouble on next .taste() iff 651 * a non-updated component is detected first */ 652 g_topology_lock(); 653 update_metadata(sc); 654 g_topology_unlock(); 655 LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed, 656 sc->geom->name); 657 } 658 659 /* 660 * Clear metadata sector on component 661 */ 662 static int 663 clear_metadata(struct g_virstor_component *comp) 664 { 665 char *buf; 666 int error; 667 668 LOG_MSG(LVL_INFO, "Clearing metadata on %s", 669 comp->gcons->provider->name); 670 g_topology_assert(); 671 error = g_access(comp->gcons, 0, 1, 0); 672 if (error != 0) 673 return (error); 674 buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR, 675 M_WAITOK | M_ZERO); 676 error = g_write_data(comp->gcons, 677 comp->gcons->provider->mediasize - 678 comp->gcons->provider->sectorsize, 679 buf, 680 comp->gcons->provider->sectorsize); 681 free(buf, M_GVIRSTOR); 682 g_access(comp->gcons, 0, -1, 0); 683 return (error); 684 } 685 686 /* 687 * Destroy geom forcibly. 688 */ 689 static int 690 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp, 691 struct g_geom *gp) 692 { 693 struct g_virstor_softc *sc; 694 int exitval; 695 696 sc = gp->softc; 697 KASSERT(sc != NULL, ("%s: NULL sc", __func__)); 698 699 exitval = 0; 700 LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name, 701 gp->softc); 702 703 if (sc != NULL) { 704 #ifdef INVARIANTS 705 char *buf; 706 int error; 707 off_t off; 708 int isclean, count; 709 int n; 710 711 LOG_MSG(LVL_INFO, "INVARIANTS detected"); 712 LOG_MSG(LVL_INFO, "Verifying allocation " 713 "table for %s", sc->geom->name); 714 count = 0; 715 for (n = 0; n < sc->chunk_count; n++) { 716 if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0) 717 count++; 718 } 719 LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks", 720 sc->geom->name, count); 721 n = off = count = 0; 722 isclean = 1; 723 if (virstor_valid_components(sc) != sc->n_components) { 724 /* This is a incomplete virstor device (not all 725 * components have been found) */ 726 LOG_MSG(LVL_ERROR, "Device %s is incomplete", 727 sc->geom->name); 728 goto bailout; 729 } 730 error = g_access(sc->components[0].gcons, 1, 0, 0); 731 KASSERT(error == 0, ("%s: g_access failed (%d)", __func__, 732 error)); 733 /* Compare the whole on-disk allocation table with what's 734 * currently in memory */ 735 while (n < sc->chunk_count) { 736 buf = g_read_data(sc->components[0].gcons, off, 737 sc->sectorsize, &error); 738 KASSERT(buf != NULL, ("g_read_data returned NULL (%d) " 739 "for read at %jd", error, off)); 740 if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) { 741 LOG_MSG(LVL_ERROR, "ERROR in allocation table, " 742 "entry %d, offset %jd", n, off); 743 isclean = 0; 744 count++; 745 } 746 n += sc->me_per_sector; 747 off += sc->sectorsize; 748 g_free(buf); 749 } 750 error = g_access(sc->components[0].gcons, -1, 0, 0); 751 KASSERT(error == 0, ("%s: g_access failed (%d) on exit", 752 __func__, error)); 753 if (isclean != 1) { 754 LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s " 755 "(%d sectors don't match, max %zu allocations)", 756 sc->geom->name, count, 757 count * sc->me_per_sector); 758 } else { 759 LOG_MSG(LVL_INFO, "Allocation table ok for %s", 760 sc->geom->name); 761 } 762 bailout: 763 #endif 764 update_metadata(sc); 765 virstor_geom_destroy(sc, FALSE, FALSE); 766 exitval = EAGAIN; 767 } else 768 exitval = 0; 769 return (exitval); 770 } 771 772 /* 773 * Taste event (per-class callback) 774 * Examines a provider and creates geom instances if needed 775 */ 776 static struct g_geom * 777 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags) 778 { 779 struct g_virstor_metadata md; 780 struct g_geom *gp; 781 struct g_consumer *cp; 782 struct g_virstor_softc *sc; 783 int error; 784 785 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 786 g_topology_assert(); 787 LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name); 788 789 /* We need a dummy geom to attach a consumer to the given provider */ 790 gp = g_new_geomf(mp, "virstor:taste.helper"); 791 gp->start = (void *)invalid_call; /* XXX: hacked up so the */ 792 gp->access = (void *)invalid_call; /* compiler doesn't complain. */ 793 gp->orphan = (void *)invalid_call; /* I really want these to fail. */ 794 795 cp = g_new_consumer(gp); 796 g_attach(cp, pp); 797 error = read_metadata(cp, &md); 798 g_detach(cp); 799 g_destroy_consumer(cp); 800 g_destroy_geom(gp); 801 802 if (error != 0) 803 return (NULL); 804 805 if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0) 806 return (NULL); 807 if (md.md_version != G_VIRSTOR_VERSION) { 808 LOG_MSG(LVL_ERROR, "Kernel module version invalid " 809 "to handle %s (%s) : %d should be %d", 810 md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION); 811 return (NULL); 812 } 813 if (md.provsize != pp->mediasize) 814 return (NULL); 815 816 /* If the provider name is hardcoded, use the offered provider only 817 * if it's been offered with its proper name (the one used in 818 * the label command). */ 819 if (md.provider[0] != '\0' && 820 !g_compare_names(md.provider, pp->name)) 821 return (NULL); 822 823 /* Iterate all geoms this class already knows about to see if a new 824 * geom instance of this class needs to be created (in case the provider 825 * is first from a (possibly) multi-consumer geom) or it just needs 826 * to be added to an existing instance. */ 827 sc = NULL; 828 gp = NULL; 829 LIST_FOREACH(gp, &mp->geom, geom) { 830 sc = gp->softc; 831 if (sc == NULL) 832 continue; 833 if (strcmp(md.md_name, sc->geom->name) != 0) 834 continue; 835 if (md.md_id != sc->id) 836 continue; 837 break; 838 } 839 if (gp != NULL) { /* We found an existing geom instance; add to it */ 840 LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name); 841 error = add_provider_to_geom(sc, pp, &md); 842 if (error != 0) { 843 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 844 pp->name, md.md_name, error); 845 return (NULL); 846 } 847 } else { /* New geom instance needs to be created */ 848 gp = create_virstor_geom(mp, &md); 849 if (gp == NULL) { 850 LOG_MSG(LVL_ERROR, "Error creating new instance of " 851 "class %s: %s", mp->name, md.md_name); 852 LOG_MSG(LVL_DEBUG, "Error creating %s at %s", 853 md.md_name, pp->name); 854 return (NULL); 855 } 856 sc = gp->softc; 857 LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name, 858 md.md_name); 859 error = add_provider_to_geom(sc, pp, &md); 860 if (error != 0) { 861 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 862 pp->name, md.md_name, error); 863 virstor_geom_destroy(sc, TRUE, FALSE); 864 return (NULL); 865 } 866 } 867 868 return (gp); 869 } 870 871 /* 872 * Destroyes consumer passed to it in arguments. Used as a callback 873 * on g_event queue. 874 */ 875 static void 876 delay_destroy_consumer(void *arg, int flags __unused) 877 { 878 struct g_consumer *c = arg; 879 KASSERT(c != NULL, ("%s: invalid consumer", __func__)); 880 LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay", 881 c->provider->name); 882 g_detach(c); 883 g_destroy_consumer(c); 884 } 885 886 /* 887 * Remove a component (consumer) from geom instance; If it's the first 888 * component being removed, orphan the provider to announce geom's being 889 * dismantled 890 */ 891 static void 892 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp, 893 boolean_t delay) 894 { 895 struct g_consumer *c; 896 897 KASSERT(comp->gcons != NULL, ("Component with no consumer in %s", 898 sc->geom->name)); 899 c = comp->gcons; 900 901 comp->gcons = NULL; 902 KASSERT(c->provider != NULL, ("%s: no provider", __func__)); 903 LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name, 904 sc->geom->name); 905 if (sc->provider != NULL) { 906 LOG_MSG(LVL_INFO, "Removing provider %s", sc->provider->name); 907 g_wither_provider(sc->provider, ENXIO); 908 sc->provider = NULL; 909 } 910 911 if (c->acr > 0 || c->acw > 0 || c->ace > 0) 912 return; 913 if (delay) { 914 /* Destroy consumer after it's tasted */ 915 g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL); 916 } else { 917 g_detach(c); 918 g_destroy_consumer(c); 919 } 920 } 921 922 /* 923 * Destroy geom - called internally 924 * See g_virstor_destroy_geom for the other one 925 */ 926 static int 927 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force, 928 boolean_t delay) 929 { 930 struct g_provider *pp; 931 struct g_geom *gp; 932 u_int n; 933 934 g_topology_assert(); 935 936 if (sc == NULL) 937 return (ENXIO); 938 939 pp = sc->provider; 940 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 941 LOG_MSG(force ? LVL_WARNING : LVL_ERROR, 942 "Device %s is still open.", pp->name); 943 if (!force) 944 return (EBUSY); 945 } 946 947 for (n = 0; n < sc->n_components; n++) { 948 if (sc->components[n].gcons != NULL) 949 remove_component(sc, &sc->components[n], delay); 950 } 951 952 gp = sc->geom; 953 gp->softc = NULL; 954 955 KASSERT(sc->provider == NULL, ("Provider still exists for %s", 956 gp->name)); 957 958 /* XXX: This might or might not work, since we're called with 959 * the topology lock held. Also, it might panic the kernel if 960 * the error'd BIO is in softupdates code. */ 961 mtx_lock(&sc->delayed_bio_q_mtx); 962 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 963 struct g_virstor_bio_q *bq; 964 bq = STAILQ_FIRST(&sc->delayed_bio_q); 965 bq->bio->bio_error = ENOSPC; 966 g_io_deliver(bq->bio, EIO); 967 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 968 free(bq, M_GVIRSTOR); 969 } 970 mtx_unlock(&sc->delayed_bio_q_mtx); 971 mtx_destroy(&sc->delayed_bio_q_mtx); 972 973 free(sc->map, M_GVIRSTOR); 974 free(sc->components, M_GVIRSTOR); 975 bzero(sc, sizeof *sc); 976 free(sc, M_GVIRSTOR); 977 978 pp = LIST_FIRST(&gp->provider); /* We only offer one provider */ 979 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 980 LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name); 981 982 g_wither_geom(gp, ENXIO); 983 984 return (0); 985 } 986 987 /* 988 * Utility function: read metadata & decode. Wants topology lock to be 989 * held. 990 */ 991 static int 992 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 993 { 994 struct g_provider *pp; 995 char *buf; 996 int error; 997 998 g_topology_assert(); 999 error = g_access(cp, 1, 0, 0); 1000 if (error != 0) 1001 return (error); 1002 pp = cp->provider; 1003 g_topology_unlock(); 1004 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 1005 &error); 1006 g_topology_lock(); 1007 g_access(cp, -1, 0, 0); 1008 if (buf == NULL) 1009 return (error); 1010 1011 virstor_metadata_decode(buf, md); 1012 g_free(buf); 1013 1014 return (0); 1015 } 1016 1017 /** 1018 * Utility function: encode & write metadata. Assumes topology lock is 1019 * held. 1020 * 1021 * There is no useful way of recovering from errors in this function, 1022 * not involving panicking the kernel. If the metadata cannot be written 1023 * the most we can do is notify the operator and hope he spots it and 1024 * replaces the broken drive. 1025 */ 1026 static void 1027 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 1028 { 1029 struct g_provider *pp; 1030 char *buf; 1031 int error; 1032 1033 KASSERT(cp != NULL && md != NULL && cp->provider != NULL, 1034 ("Something's fishy in %s", __func__)); 1035 LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name); 1036 g_topology_assert(); 1037 error = g_access(cp, 0, 1, 0); 1038 if (error != 0) { 1039 LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d", 1040 cp->provider->name, error); 1041 return; 1042 } 1043 pp = cp->provider; 1044 1045 buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK); 1046 bzero(buf, pp->sectorsize); 1047 virstor_metadata_encode(md, buf); 1048 g_topology_unlock(); 1049 error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, 1050 pp->sectorsize); 1051 g_topology_lock(); 1052 g_access(cp, 0, -1, 0); 1053 free(buf, M_GVIRSTOR); 1054 1055 if (error != 0) 1056 LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s", 1057 error, cp->provider->name); 1058 } 1059 1060 /* 1061 * Creates a new instance of this GEOM class, initialise softc 1062 */ 1063 static struct g_geom * 1064 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md) 1065 { 1066 struct g_geom *gp; 1067 struct g_virstor_softc *sc; 1068 1069 LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)", 1070 md->md_name, md->md_id); 1071 1072 if (md->md_count < 1 || md->md_chunk_size < 1 || 1073 md->md_virsize < md->md_chunk_size) { 1074 /* This is bogus configuration, and probably means data is 1075 * somehow corrupted. Panic, maybe? */ 1076 LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s", 1077 md->md_name); 1078 return (NULL); 1079 } 1080 1081 /* Check if it's already created */ 1082 LIST_FOREACH(gp, &mp->geom, geom) { 1083 sc = gp->softc; 1084 if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) { 1085 LOG_MSG(LVL_WARNING, "Geom %s already exists", 1086 md->md_name); 1087 if (sc->id != md->md_id) { 1088 LOG_MSG(LVL_ERROR, 1089 "Some stale or invalid components " 1090 "exist for virstor device named %s. " 1091 "You will need to <CLEAR> all stale " 1092 "components and maybe reconfigure " 1093 "the virstor device. Tune " 1094 "kern.geom.virstor.debug sysctl up " 1095 "for more information.", 1096 sc->geom->name); 1097 } 1098 return (NULL); 1099 } 1100 } 1101 gp = g_new_geomf(mp, "%s", md->md_name); 1102 gp->softc = NULL; /* to circumevent races that test softc */ 1103 1104 gp->start = g_virstor_start; 1105 gp->spoiled = g_virstor_orphan; 1106 gp->orphan = g_virstor_orphan; 1107 gp->access = g_virstor_access; 1108 gp->dumpconf = g_virstor_dumpconf; 1109 1110 sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO); 1111 sc->id = md->md_id; 1112 sc->n_components = md->md_count; 1113 sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count, 1114 M_GVIRSTOR, M_WAITOK | M_ZERO); 1115 sc->chunk_size = md->md_chunk_size; 1116 sc->virsize = md->md_virsize; 1117 STAILQ_INIT(&sc->delayed_bio_q); 1118 mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx", 1119 "gvirstor", MTX_DEF | MTX_RECURSE); 1120 1121 sc->geom = gp; 1122 sc->provider = NULL; /* virstor_check_and_run will create it */ 1123 gp->softc = sc; 1124 1125 LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name); 1126 1127 return (gp); 1128 } 1129 1130 /* 1131 * Add provider to a GEOM class instance 1132 */ 1133 static int 1134 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp, 1135 struct g_virstor_metadata *md) 1136 { 1137 struct g_virstor_component *component; 1138 struct g_consumer *cp, *fcp; 1139 struct g_geom *gp; 1140 int error; 1141 1142 if (md->no >= sc->n_components) 1143 return (EINVAL); 1144 1145 /* "Current" compontent */ 1146 component = &(sc->components[md->no]); 1147 if (component->gcons != NULL) 1148 return (EEXIST); 1149 1150 gp = sc->geom; 1151 fcp = LIST_FIRST(&gp->consumer); 1152 1153 cp = g_new_consumer(gp); 1154 error = g_attach(cp, pp); 1155 1156 if (error != 0) { 1157 g_destroy_consumer(cp); 1158 return (error); 1159 } 1160 1161 if (fcp != NULL) { 1162 if (fcp->provider->sectorsize != pp->sectorsize) { 1163 /* TODO: this can be made to work */ 1164 LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid " 1165 "sector size (%d)", pp->name, sc->geom->name, 1166 pp->sectorsize); 1167 return (EINVAL); 1168 } 1169 if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) { 1170 /* Replicate access permissions from first "live" consumer 1171 * to the new one */ 1172 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 1173 if (error != 0) { 1174 g_detach(cp); 1175 g_destroy_consumer(cp); 1176 return (error); 1177 } 1178 } 1179 } 1180 1181 /* Bring up a new component */ 1182 cp->private = component; 1183 component->gcons = cp; 1184 component->sc = sc; 1185 component->index = md->no; 1186 component->chunk_count = md->chunk_count; 1187 component->chunk_next = md->chunk_next; 1188 component->chunk_reserved = md->chunk_reserved; 1189 component->flags = md->flags; 1190 1191 LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name); 1192 1193 virstor_check_and_run(sc); 1194 return (0); 1195 } 1196 1197 /* 1198 * Check if everything's ready to create the geom provider & device entry, 1199 * create and start provider. 1200 * Called ultimately by .taste, from g_event thread 1201 */ 1202 static void 1203 virstor_check_and_run(struct g_virstor_softc *sc) 1204 { 1205 off_t off; 1206 size_t n, count; 1207 int index; 1208 int error; 1209 1210 if (virstor_valid_components(sc) != sc->n_components) 1211 return; 1212 1213 if (virstor_valid_components(sc) == 0) { 1214 /* This is actually a candidate for panic() */ 1215 LOG_MSG(LVL_ERROR, "No valid components for %s?", 1216 sc->provider->name); 1217 return; 1218 } 1219 1220 sc->sectorsize = sc->components[0].gcons->provider->sectorsize; 1221 1222 /* Initialise allocation map from the first consumer */ 1223 sc->chunk_count = sc->virsize / sc->chunk_size; 1224 if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) { 1225 LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes", 1226 sc->provider->name, 1227 sc->chunk_count * (off_t)sc->chunk_size); 1228 } 1229 sc->map_size = sc->chunk_count * sizeof *(sc->map); 1230 /* The following allocation is in order of 4MB - 8MB */ 1231 sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK); 1232 KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s", 1233 __func__, sc->map_size, sc->provider->name)); 1234 sc->map_sectors = sc->map_size / sc->sectorsize; 1235 1236 count = 0; 1237 for (n = 0; n < sc->n_components; n++) 1238 count += sc->components[n].chunk_count; 1239 LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual " 1240 "(%zu KB chunks)", 1241 sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024); 1242 1243 error = g_access(sc->components[0].gcons, 1, 0, 0); 1244 if (error != 0) { 1245 LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to " 1246 "read allocation map for %s", 1247 sc->components[0].gcons->provider->name, 1248 sc->geom->name); 1249 return; 1250 } 1251 /* Read in the allocation map */ 1252 LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name, 1253 sc->components[0].gcons->provider->name); 1254 off = count = n = 0; 1255 while (count < sc->map_size) { 1256 struct g_virstor_map_entry *mapbuf; 1257 size_t bs; 1258 1259 bs = MIN(MAXPHYS, sc->map_size - count); 1260 if (bs % sc->sectorsize != 0) { 1261 /* Check for alignment errors */ 1262 bs = rounddown(bs, sc->sectorsize); 1263 if (bs == 0) 1264 break; 1265 LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned " 1266 "for %s on %s", sc->geom->name, 1267 sc->components[0].gcons->provider->name); 1268 } 1269 mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error); 1270 if (mapbuf == NULL) { 1271 free(sc->map, M_GVIRSTOR); 1272 LOG_MSG(LVL_ERROR, "Error reading allocation map " 1273 "for %s from %s (offset %ju) (error %d)", 1274 sc->geom->name, 1275 sc->components[0].gcons->provider->name, 1276 off, error); 1277 return; 1278 } 1279 1280 bcopy(mapbuf, &sc->map[n], bs); 1281 off += bs; 1282 count += bs; 1283 n += bs / sizeof *(sc->map); 1284 g_free(mapbuf); 1285 } 1286 g_access(sc->components[0].gcons, -1, 0, 0); 1287 LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name); 1288 1289 /* find first component with allocatable chunks */ 1290 index = -1; 1291 for (n = 0; n < sc->n_components; n++) { 1292 if (sc->components[n].chunk_next < 1293 sc->components[n].chunk_count) { 1294 index = n; 1295 break; 1296 } 1297 } 1298 if (index == -1) 1299 /* not found? set it to the last component and handle it 1300 * later */ 1301 index = sc->n_components - 1; 1302 1303 if (index >= sc->n_components - g_virstor_component_watermark - 1) { 1304 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1305 "(%d/%u: %s)", sc->geom->name, 1306 index+1, 1307 sc->n_components, 1308 sc->components[index].gcons->provider->name); 1309 } 1310 sc->curr_component = index; 1311 1312 if (sc->components[index].chunk_next >= 1313 sc->components[index].chunk_count - g_virstor_chunk_watermark) { 1314 LOG_MSG(LVL_WARNING, 1315 "Component %s of %s is running out of free space " 1316 "(%u chunks left)", 1317 sc->components[index].gcons->provider->name, 1318 sc->geom->name, sc->components[index].chunk_count - 1319 sc->components[index].chunk_next); 1320 } 1321 1322 sc->me_per_sector = sc->sectorsize / sizeof *(sc->map); 1323 if (sc->sectorsize % sizeof *(sc->map) != 0) { 1324 LOG_MSG(LVL_ERROR, 1325 "%s: Map entries don't fit exactly in a sector (%s)", 1326 __func__, sc->geom->name); 1327 return; 1328 } 1329 1330 /* Recalculate allocated chunks in components & at the same time 1331 * verify map data is sane. We could trust metadata on this, but 1332 * we want to make sure. */ 1333 for (n = 0; n < sc->n_components; n++) 1334 sc->components[n].chunk_next = sc->components[n].chunk_reserved; 1335 1336 for (n = 0; n < sc->chunk_count; n++) { 1337 if (sc->map[n].provider_no >= sc->n_components || 1338 sc->map[n].provider_chunk >= 1339 sc->components[sc->map[n].provider_no].chunk_count) { 1340 LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s", 1341 __func__, (u_int)n, sc->geom->name); 1342 LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u" 1343 " provider_chunk: %u, chunk_count: %u", __func__, 1344 sc->map[n].provider_no, sc->n_components, 1345 sc->map[n].provider_chunk, 1346 sc->components[sc->map[n].provider_no].chunk_count); 1347 return; 1348 } 1349 if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED) 1350 sc->components[sc->map[n].provider_no].chunk_next++; 1351 } 1352 1353 sc->provider = g_new_providerf(sc->geom, "virstor/%s", 1354 sc->geom->name); 1355 1356 sc->provider->sectorsize = sc->sectorsize; 1357 sc->provider->mediasize = sc->virsize; 1358 g_error_provider(sc->provider, 0); 1359 1360 LOG_MSG(LVL_INFO, "%s activated", sc->provider->name); 1361 LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting " 1362 "chunk %u", sc->provider->name, sc->curr_component, 1363 sc->components[sc->curr_component].chunk_next); 1364 } 1365 1366 /* 1367 * Returns count of active providers in this geom instance 1368 */ 1369 static u_int 1370 virstor_valid_components(struct g_virstor_softc *sc) 1371 { 1372 unsigned int nc, i; 1373 1374 nc = 0; 1375 KASSERT(sc != NULL, ("%s: softc is NULL", __func__)); 1376 KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__)); 1377 for (i = 0; i < sc->n_components; i++) 1378 if (sc->components[i].gcons != NULL) 1379 nc++; 1380 return (nc); 1381 } 1382 1383 /* 1384 * Called when the consumer gets orphaned (?) 1385 */ 1386 static void 1387 g_virstor_orphan(struct g_consumer *cp) 1388 { 1389 struct g_virstor_softc *sc; 1390 struct g_virstor_component *comp; 1391 struct g_geom *gp; 1392 1393 g_topology_assert(); 1394 gp = cp->geom; 1395 sc = gp->softc; 1396 if (sc == NULL) 1397 return; 1398 1399 comp = cp->private; 1400 KASSERT(comp != NULL, ("%s: No component in private part of consumer", 1401 __func__)); 1402 remove_component(sc, comp, FALSE); 1403 if (LIST_EMPTY(&gp->consumer)) 1404 virstor_geom_destroy(sc, TRUE, FALSE); 1405 } 1406 1407 /* 1408 * Called to notify geom when it's been opened, and for what intent 1409 */ 1410 static int 1411 g_virstor_access(struct g_provider *pp, int dr, int dw, int de) 1412 { 1413 struct g_consumer *c, *c2, *tmp; 1414 struct g_virstor_softc *sc; 1415 struct g_geom *gp; 1416 int error; 1417 1418 KASSERT(pp != NULL, ("%s: NULL provider", __func__)); 1419 gp = pp->geom; 1420 KASSERT(gp != NULL, ("%s: NULL geom", __func__)); 1421 sc = gp->softc; 1422 1423 /* Grab an exclusive bit to propagate on our consumers on first open */ 1424 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 1425 de++; 1426 /* ... drop it on close */ 1427 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) { 1428 de--; 1429 if (sc != NULL) 1430 update_metadata(sc); 1431 } 1432 1433 error = ENXIO; 1434 LIST_FOREACH_SAFE(c, &gp->consumer, consumer, tmp) { 1435 error = g_access(c, dr, dw, de); 1436 if (error != 0) 1437 goto fail; 1438 if (c->acr == 0 && c->acw == 0 && c->ace == 0 && 1439 c->flags & G_CF_ORPHAN) { 1440 g_detach(c); 1441 g_destroy_consumer(c); 1442 } 1443 } 1444 1445 if (sc != NULL && LIST_EMPTY(&gp->consumer)) 1446 virstor_geom_destroy(sc, TRUE, FALSE); 1447 1448 return (error); 1449 1450 fail: 1451 /* Backout earlier changes */ 1452 LIST_FOREACH(c2, &gp->consumer, consumer) { 1453 if (c2 == c) 1454 break; 1455 g_access(c2, -dr, -dw, -de); 1456 } 1457 return (error); 1458 } 1459 1460 /* 1461 * Generate XML dump of current state 1462 */ 1463 static void 1464 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1465 struct g_consumer *cp, struct g_provider *pp) 1466 { 1467 struct g_virstor_softc *sc; 1468 1469 g_topology_assert(); 1470 sc = gp->softc; 1471 1472 if (sc == NULL || pp != NULL) 1473 return; 1474 1475 if (cp != NULL) { 1476 /* For each component */ 1477 struct g_virstor_component *comp; 1478 1479 comp = cp->private; 1480 if (comp == NULL) 1481 return; 1482 sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n", 1483 indent, comp->index); 1484 sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n", 1485 indent, comp->chunk_count); 1486 sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n", 1487 indent, comp->chunk_next); 1488 sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n", 1489 indent, comp->chunk_reserved); 1490 sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n", 1491 indent, 1492 comp->chunk_next > 0 ? 100 - 1493 ((comp->chunk_next + comp->chunk_reserved) * 100) / 1494 comp->chunk_count : 100); 1495 } else { 1496 /* For the whole thing */ 1497 u_int count, used, i; 1498 off_t size; 1499 1500 count = used = size = 0; 1501 for (i = 0; i < sc->n_components; i++) { 1502 if (sc->components[i].gcons != NULL) { 1503 count += sc->components[i].chunk_count; 1504 used += sc->components[i].chunk_next + 1505 sc->components[i].chunk_reserved; 1506 size += sc->components[i].gcons-> 1507 provider->mediasize; 1508 } 1509 } 1510 1511 sbuf_printf(sb, "%s<Status>" 1512 "Components=%u, Online=%u</Status>\n", indent, 1513 sc->n_components, virstor_valid_components(sc)); 1514 sbuf_printf(sb, "%s<State>%u%% physical free</State>\n", 1515 indent, 100-(used * 100) / count); 1516 sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent, 1517 sc->chunk_size); 1518 sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n", 1519 indent, used > 0 ? 100 - (used * 100) / count : 100); 1520 sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n", 1521 indent, count); 1522 sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n", 1523 indent, sc->chunk_count); 1524 sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n", 1525 indent, 1526 (count * 100) / sc->chunk_count); 1527 sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n", 1528 indent, size); 1529 sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent, 1530 sc->virsize); 1531 } 1532 } 1533 1534 /* 1535 * GEOM .done handler 1536 * Can't use standard handler because one requested IO may 1537 * fork into additional data IOs 1538 */ 1539 static void 1540 g_virstor_done(struct bio *b) 1541 { 1542 struct g_virstor_softc *sc; 1543 struct bio *parent_b; 1544 1545 parent_b = b->bio_parent; 1546 sc = parent_b->bio_to->geom->softc; 1547 1548 if (b->bio_error != 0) { 1549 LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s", 1550 b->bio_error, b->bio_offset, b->bio_length, 1551 b->bio_to->name); 1552 if (parent_b->bio_error == 0) 1553 parent_b->bio_error = b->bio_error; 1554 } 1555 1556 parent_b->bio_inbed++; 1557 parent_b->bio_completed += b->bio_completed; 1558 1559 if (parent_b->bio_children == parent_b->bio_inbed) { 1560 parent_b->bio_completed = parent_b->bio_length; 1561 g_io_deliver(parent_b, parent_b->bio_error); 1562 } 1563 g_destroy_bio(b); 1564 } 1565 1566 /* 1567 * I/O starts here 1568 * Called in g_down thread 1569 */ 1570 static void 1571 g_virstor_start(struct bio *b) 1572 { 1573 struct g_virstor_softc *sc; 1574 struct g_virstor_component *comp; 1575 struct bio *cb; 1576 struct g_provider *pp; 1577 char *addr; 1578 off_t offset, length; 1579 struct bio_queue_head bq; 1580 size_t chunk_size; /* cached for convenience */ 1581 u_int count; 1582 1583 pp = b->bio_to; 1584 sc = pp->geom->softc; 1585 KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__, 1586 b->bio_to->error, b->bio_to->name)); 1587 1588 LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__); 1589 1590 switch (b->bio_cmd) { 1591 case BIO_READ: 1592 case BIO_WRITE: 1593 case BIO_DELETE: 1594 break; 1595 default: 1596 g_io_deliver(b, EOPNOTSUPP); 1597 return; 1598 } 1599 1600 LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length); 1601 bioq_init(&bq); 1602 1603 chunk_size = sc->chunk_size; 1604 addr = b->bio_data; 1605 offset = b->bio_offset; /* virtual offset and length */ 1606 length = b->bio_length; 1607 1608 while (length > 0) { 1609 size_t chunk_index, in_chunk_offset, in_chunk_length; 1610 struct virstor_map_entry *me; 1611 1612 chunk_index = offset / chunk_size; /* round downwards */ 1613 in_chunk_offset = offset % chunk_size; 1614 in_chunk_length = min(length, chunk_size - in_chunk_offset); 1615 LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)", 1616 b->bio_cmd == BIO_READ ? "R" : "W", 1617 offset, length, 1618 chunk_index, in_chunk_offset, in_chunk_length); 1619 me = &sc->map[chunk_index]; 1620 1621 if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) { 1622 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1623 /* Reads from unallocated chunks return zeroed 1624 * buffers */ 1625 if (b->bio_cmd == BIO_READ) 1626 bzero(addr, in_chunk_length); 1627 } else { 1628 comp = &sc->components[me->provider_no]; 1629 1630 cb = g_clone_bio(b); 1631 if (cb == NULL) { 1632 bioq_dismantle(&bq); 1633 if (b->bio_error == 0) 1634 b->bio_error = ENOMEM; 1635 g_io_deliver(b, b->bio_error); 1636 return; 1637 } 1638 cb->bio_to = comp->gcons->provider; 1639 cb->bio_done = g_virstor_done; 1640 cb->bio_offset = 1641 (off_t)me->provider_chunk * (off_t)chunk_size 1642 + in_chunk_offset; 1643 cb->bio_length = in_chunk_length; 1644 cb->bio_data = addr; 1645 cb->bio_caller1 = comp; 1646 bioq_disksort(&bq, cb); 1647 } 1648 } else { /* handle BIO_WRITE */ 1649 KASSERT(b->bio_cmd == BIO_WRITE, 1650 ("%s: Unknown command %d", __func__, 1651 b->bio_cmd)); 1652 1653 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1654 /* We have a virtual chunk, represented by 1655 * the "me" entry, but it's not yet allocated 1656 * (tied to) a physical chunk. So do it now. */ 1657 struct virstor_map_entry *data_me; 1658 u_int phys_chunk, comp_no; 1659 off_t s_offset; 1660 int error; 1661 1662 error = allocate_chunk(sc, &comp, &comp_no, 1663 &phys_chunk); 1664 if (error != 0) { 1665 /* We cannot allocate a physical chunk 1666 * to satisfy this request, so we'll 1667 * delay it to when we can... 1668 * XXX: this will prevent the fs from 1669 * being umounted! */ 1670 struct g_virstor_bio_q *biq; 1671 biq = malloc(sizeof *biq, M_GVIRSTOR, 1672 M_NOWAIT); 1673 if (biq == NULL) { 1674 bioq_dismantle(&bq); 1675 if (b->bio_error == 0) 1676 b->bio_error = ENOMEM; 1677 g_io_deliver(b, b->bio_error); 1678 return; 1679 } 1680 biq->bio = b; 1681 mtx_lock(&sc->delayed_bio_q_mtx); 1682 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, 1683 biq, linkage); 1684 mtx_unlock(&sc->delayed_bio_q_mtx); 1685 LOG_MSG(LVL_WARNING, "Delaying BIO " 1686 "(size=%ju) until free physical " 1687 "space can be found on %s", 1688 b->bio_length, 1689 sc->provider->name); 1690 return; 1691 } 1692 LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s " 1693 "for %s", 1694 phys_chunk, 1695 comp->gcons->provider->name, 1696 sc->provider->name); 1697 1698 me->provider_no = comp_no; 1699 me->provider_chunk = phys_chunk; 1700 me->flags |= VIRSTOR_MAP_ALLOCATED; 1701 1702 cb = g_clone_bio(b); 1703 if (cb == NULL) { 1704 me->flags &= ~VIRSTOR_MAP_ALLOCATED; 1705 me->provider_no = 0; 1706 me->provider_chunk = 0; 1707 bioq_dismantle(&bq); 1708 if (b->bio_error == 0) 1709 b->bio_error = ENOMEM; 1710 g_io_deliver(b, b->bio_error); 1711 return; 1712 } 1713 1714 /* The allocation table is stored continuously 1715 * at the start of the drive. We need to 1716 * calculate the offset of the sector that holds 1717 * this map entry both on the drive and in the 1718 * map array. 1719 * sc_offset will end up pointing to the drive 1720 * sector. */ 1721 s_offset = chunk_index * sizeof *me; 1722 s_offset = rounddown(s_offset, sc->sectorsize); 1723 1724 /* data_me points to map entry sector 1725 * in memory (analogous to offset) */ 1726 data_me = &sc->map[rounddown(chunk_index, 1727 sc->me_per_sector)]; 1728 1729 /* Commit sector with map entry to storage */ 1730 cb->bio_to = sc->components[0].gcons->provider; 1731 cb->bio_done = g_virstor_done; 1732 cb->bio_offset = s_offset; 1733 cb->bio_data = (char *)data_me; 1734 cb->bio_length = sc->sectorsize; 1735 cb->bio_caller1 = &sc->components[0]; 1736 bioq_disksort(&bq, cb); 1737 } 1738 1739 comp = &sc->components[me->provider_no]; 1740 cb = g_clone_bio(b); 1741 if (cb == NULL) { 1742 bioq_dismantle(&bq); 1743 if (b->bio_error == 0) 1744 b->bio_error = ENOMEM; 1745 g_io_deliver(b, b->bio_error); 1746 return; 1747 } 1748 /* Finally, handle the data */ 1749 cb->bio_to = comp->gcons->provider; 1750 cb->bio_done = g_virstor_done; 1751 cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size + 1752 in_chunk_offset; 1753 cb->bio_length = in_chunk_length; 1754 cb->bio_data = addr; 1755 cb->bio_caller1 = comp; 1756 bioq_disksort(&bq, cb); 1757 } 1758 addr += in_chunk_length; 1759 length -= in_chunk_length; 1760 offset += in_chunk_length; 1761 } 1762 1763 /* Fire off bio's here */ 1764 count = 0; 1765 for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) { 1766 bioq_remove(&bq, cb); 1767 LOG_REQ(LVL_MOREDEBUG, cb, "Firing request"); 1768 comp = cb->bio_caller1; 1769 cb->bio_caller1 = NULL; 1770 LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju", 1771 cb->bio_offset, cb->bio_length); 1772 g_io_request(cb, comp->gcons); 1773 count++; 1774 } 1775 if (count == 0) { /* We handled everything locally */ 1776 b->bio_completed = b->bio_length; 1777 g_io_deliver(b, 0); 1778 } 1779 1780 } 1781 1782 /* 1783 * Allocate a chunk from a physical provider. Returns physical component, 1784 * chunk index relative to the component and the component's index. 1785 */ 1786 static int 1787 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp, 1788 u_int *comp_no_p, u_int *chunk) 1789 { 1790 u_int comp_no; 1791 1792 KASSERT(sc->curr_component < sc->n_components, 1793 ("%s: Invalid curr_component: %u", __func__, sc->curr_component)); 1794 1795 comp_no = sc->curr_component; 1796 *comp = &sc->components[comp_no]; 1797 dump_component(*comp); 1798 if ((*comp)->chunk_next >= (*comp)->chunk_count) { 1799 /* This component is full. Allocate next component */ 1800 if (comp_no >= sc->n_components-1) { 1801 LOG_MSG(LVL_ERROR, "All physical space allocated for %s", 1802 sc->geom->name); 1803 return (-1); 1804 } 1805 (*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT; 1806 sc->curr_component = ++comp_no; 1807 1808 *comp = &sc->components[comp_no]; 1809 if (comp_no >= sc->n_components - g_virstor_component_watermark-1) 1810 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1811 "(switching to %u/%u: %s)", sc->geom->name, 1812 comp_no+1, sc->n_components, 1813 (*comp)->gcons->provider->name); 1814 /* Take care not to overwrite reserved chunks */ 1815 if ( (*comp)->chunk_reserved > 0 && 1816 (*comp)->chunk_next < (*comp)->chunk_reserved) 1817 (*comp)->chunk_next = (*comp)->chunk_reserved; 1818 1819 (*comp)->flags |= 1820 VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT; 1821 dump_component(*comp); 1822 *comp_no_p = comp_no; 1823 *chunk = (*comp)->chunk_next++; 1824 } else { 1825 *comp_no_p = comp_no; 1826 *chunk = (*comp)->chunk_next++; 1827 } 1828 return (0); 1829 } 1830 1831 /* Dump a component */ 1832 static void 1833 dump_component(struct g_virstor_component *comp) 1834 { 1835 1836 if (g_virstor_debug < LVL_DEBUG2) 1837 return; 1838 printf("Component %d: %s\n", comp->index, comp->gcons->provider->name); 1839 printf(" chunk_count: %u\n", comp->chunk_count); 1840 printf(" chunk_next: %u\n", comp->chunk_next); 1841 printf(" flags: %u\n", comp->flags); 1842 } 1843 1844 #if 0 1845 /* Dump a map entry */ 1846 static void 1847 dump_me(struct virstor_map_entry *me, unsigned int nr) 1848 { 1849 if (g_virstor_debug < LVL_DEBUG) 1850 return; 1851 printf("VIRT. CHUNK #%d: ", nr); 1852 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) 1853 printf("(unallocated)\n"); 1854 else 1855 printf("allocated at provider %u, provider_chunk %u\n", 1856 me->provider_no, me->provider_chunk); 1857 } 1858 #endif 1859 1860 /* 1861 * Dismantle bio_queue and destroy its components 1862 */ 1863 static void 1864 bioq_dismantle(struct bio_queue_head *bq) 1865 { 1866 struct bio *b; 1867 1868 for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) { 1869 bioq_remove(bq, b); 1870 g_destroy_bio(b); 1871 } 1872 } 1873 1874 /* 1875 * The function that shouldn't be called. 1876 * When this is called, the stack is already garbled because of 1877 * argument mismatch. There's nothing to do now but panic, which is 1878 * accidentally the whole purpose of this function. 1879 * Motivation: to guard from accidentally calling geom methods when 1880 * they shouldn't be called. (see g_..._taste) 1881 */ 1882 static void 1883 invalid_call(void) 1884 { 1885 panic("invalid_call() has just been called. Something's fishy here."); 1886 } 1887 1888 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */ 1889 MODULE_VERSION(geom_virstor, 0); 1890