1 /*- 2 * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* Implementation notes: 28 * - "Components" are wrappers around providers that make up the 29 * virtual storage (i.e. a virstor has "physical" components) 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/sx.h> 42 #include <sys/bio.h> 43 #include <sys/sbuf.h> 44 #include <sys/sysctl.h> 45 #include <sys/malloc.h> 46 #include <sys/time.h> 47 #include <sys/proc.h> 48 #include <sys/kthread.h> 49 #include <sys/mutex.h> 50 #include <vm/uma.h> 51 #include <geom/geom.h> 52 53 #include <geom/virstor/g_virstor.h> 54 #include <geom/virstor/g_virstor_md.h> 55 56 FEATURE(g_virstor, "GEOM virtual storage support"); 57 58 /* Declare malloc(9) label */ 59 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data"); 60 61 /* GEOM class methods */ 62 static g_init_t g_virstor_init; 63 static g_fini_t g_virstor_fini; 64 static g_taste_t g_virstor_taste; 65 static g_ctl_req_t g_virstor_config; 66 static g_ctl_destroy_geom_t g_virstor_destroy_geom; 67 68 /* Declare & initialize class structure ("geom class") */ 69 struct g_class g_virstor_class = { 70 .name = G_VIRSTOR_CLASS_NAME, 71 .version = G_VERSION, 72 .init = g_virstor_init, 73 .fini = g_virstor_fini, 74 .taste = g_virstor_taste, 75 .ctlreq = g_virstor_config, 76 .destroy_geom = g_virstor_destroy_geom 77 /* The .dumpconf and the rest are only usable for a geom instance, so 78 * they will be set when such instance is created. */ 79 }; 80 81 /* Declare sysctl's and loader tunables */ 82 SYSCTL_DECL(_kern_geom); 83 static SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0, 84 "GEOM_GVIRSTOR information"); 85 86 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */ 87 TUNABLE_INT("kern.geom.virstor.debug", &g_virstor_debug); 88 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RW, &g_virstor_debug, 89 0, "Debug level (2=production, 5=normal, 15=excessive)"); 90 91 static u_int g_virstor_chunk_watermark = 100; 92 TUNABLE_INT("kern.geom.virstor.chunk_watermark", &g_virstor_chunk_watermark); 93 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RW, 94 &g_virstor_chunk_watermark, 0, 95 "Minimum number of free chunks before issuing administrative warning"); 96 97 static u_int g_virstor_component_watermark = 1; 98 TUNABLE_INT("kern.geom.virstor.component_watermark", 99 &g_virstor_component_watermark); 100 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RW, 101 &g_virstor_component_watermark, 0, 102 "Minimum number of free components before issuing administrative warning"); 103 104 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *); 105 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *); 106 static int clear_metadata(struct g_virstor_component *); 107 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *, 108 struct g_virstor_metadata *); 109 static struct g_geom *create_virstor_geom(struct g_class *, 110 struct g_virstor_metadata *); 111 static void virstor_check_and_run(struct g_virstor_softc *); 112 static u_int virstor_valid_components(struct g_virstor_softc *); 113 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t, 114 boolean_t); 115 static void remove_component(struct g_virstor_softc *, 116 struct g_virstor_component *, boolean_t); 117 static void bioq_dismantle(struct bio_queue_head *); 118 static int allocate_chunk(struct g_virstor_softc *, 119 struct g_virstor_component **, u_int *, u_int *); 120 static void delay_destroy_consumer(void *, int); 121 static void dump_component(struct g_virstor_component *comp); 122 #if 0 123 static void dump_me(struct virstor_map_entry *me, unsigned int nr); 124 #endif 125 126 static void virstor_ctl_stop(struct gctl_req *, struct g_class *); 127 static void virstor_ctl_add(struct gctl_req *, struct g_class *); 128 static void virstor_ctl_remove(struct gctl_req *, struct g_class *); 129 static struct g_virstor_softc * virstor_find_geom(const struct g_class *, 130 const char *); 131 static void update_metadata(struct g_virstor_softc *); 132 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *, 133 u_int, u_int); 134 135 static void g_virstor_orphan(struct g_consumer *); 136 static int g_virstor_access(struct g_provider *, int, int, int); 137 static void g_virstor_start(struct bio *); 138 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *, 139 struct g_consumer *, struct g_provider *); 140 static void g_virstor_done(struct bio *); 141 142 static void invalid_call(void); 143 /* 144 * Initialise GEOM class (per-class callback) 145 */ 146 static void 147 g_virstor_init(struct g_class *mp __unused) 148 { 149 150 /* Catch map struct size mismatch at compile time; Map entries must 151 * fit into MAXPHYS exactly, with no wasted space. */ 152 CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); 153 154 /* Init UMA zones, TAILQ's, other global vars */ 155 } 156 157 /* 158 * Finalise GEOM class (per-class callback) 159 */ 160 static void 161 g_virstor_fini(struct g_class *mp __unused) 162 { 163 164 /* Deinit UMA zones & global vars */ 165 } 166 167 /* 168 * Config (per-class callback) 169 */ 170 static void 171 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb) 172 { 173 uint32_t *version; 174 175 g_topology_assert(); 176 177 version = gctl_get_paraml(req, "version", sizeof(*version)); 178 if (version == NULL) { 179 gctl_error(req, "Failed to get 'version' argument"); 180 return; 181 } 182 if (*version != G_VIRSTOR_VERSION) { 183 gctl_error(req, "Userland and kernel versions out of sync"); 184 return; 185 } 186 187 g_topology_unlock(); 188 if (strcmp(verb, "add") == 0) 189 virstor_ctl_add(req, cp); 190 else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0) 191 virstor_ctl_stop(req, cp); 192 else if (strcmp(verb, "remove") == 0) 193 virstor_ctl_remove(req, cp); 194 else 195 gctl_error(req, "unknown verb: '%s'", verb); 196 g_topology_lock(); 197 } 198 199 /* 200 * "stop" verb from userland 201 */ 202 static void 203 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) 204 { 205 int *force, *nargs; 206 int i; 207 208 nargs = gctl_get_paraml(req, "nargs", sizeof *nargs); 209 if (nargs == NULL) { 210 gctl_error(req, "Error fetching argument '%s'", "nargs"); 211 return; 212 } 213 if (*nargs < 1) { 214 gctl_error(req, "Invalid number of arguments"); 215 return; 216 } 217 force = gctl_get_paraml(req, "force", sizeof *force); 218 if (force == NULL) { 219 gctl_error(req, "Error fetching argument '%s'", "force"); 220 return; 221 } 222 223 g_topology_lock(); 224 for (i = 0; i < *nargs; i++) { 225 char param[8]; 226 const char *name; 227 struct g_virstor_softc *sc; 228 int error; 229 230 sprintf(param, "arg%d", i); 231 name = gctl_get_asciiparam(req, param); 232 if (name == NULL) { 233 gctl_error(req, "No 'arg%d' argument", i); 234 g_topology_unlock(); 235 return; 236 } 237 sc = virstor_find_geom(cp, name); 238 LOG_MSG(LVL_INFO, "Stopping %s by the userland command", 239 sc->geom->name); 240 update_metadata(sc); 241 if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) { 242 LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d", 243 sc->geom->name, error); 244 } 245 } 246 g_topology_unlock(); 247 } 248 249 /* 250 * "add" verb from userland - add new component(s) to the structure. 251 * This will be done all at once in here, without going through the 252 * .taste function for new components. 253 */ 254 static void 255 virstor_ctl_add(struct gctl_req *req, struct g_class *cp) 256 { 257 /* Note: while this is going on, I/O is being done on 258 * the g_up and g_down threads. The idea is to make changes 259 * to softc members in a way that can atomically activate 260 * them all at once. */ 261 struct g_virstor_softc *sc; 262 int *hardcode, *nargs; 263 const char *geom_name; /* geom to add a component to */ 264 struct g_consumer *fcp; 265 struct g_virstor_bio_q *bq; 266 u_int added; 267 int error; 268 int i; 269 270 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 271 if (nargs == NULL) { 272 gctl_error(req, "Error fetching argument '%s'", "nargs"); 273 return; 274 } 275 if (*nargs < 2) { 276 gctl_error(req, "Invalid number of arguments"); 277 return; 278 } 279 hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); 280 if (hardcode == NULL) { 281 gctl_error(req, "Error fetching argument '%s'", "hardcode"); 282 return; 283 } 284 285 /* Find "our" geom */ 286 geom_name = gctl_get_asciiparam(req, "arg0"); 287 if (geom_name == NULL) { 288 gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)"); 289 return; 290 } 291 sc = virstor_find_geom(cp, geom_name); 292 if (sc == NULL) { 293 gctl_error(req, "Don't know anything about '%s'", geom_name); 294 return; 295 } 296 297 if (virstor_valid_components(sc) != sc->n_components) { 298 LOG_MSG(LVL_ERROR, "Cannot add components to incomplete " 299 "virstor %s", sc->geom->name); 300 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 301 return; 302 } 303 304 fcp = sc->components[0].gcons; 305 added = 0; 306 g_topology_lock(); 307 for (i = 1; i < *nargs; i++) { 308 struct g_virstor_metadata md; 309 char aname[8]; 310 const char *prov_name; 311 struct g_provider *pp; 312 struct g_consumer *cp; 313 u_int nc; 314 u_int j; 315 316 snprintf(aname, sizeof aname, "arg%d", i); 317 prov_name = gctl_get_asciiparam(req, aname); 318 if (prov_name == NULL) { 319 gctl_error(req, "Error fetching argument '%s'", aname); 320 g_topology_unlock(); 321 return; 322 } 323 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 324 prov_name += sizeof(_PATH_DEV) - 1; 325 326 pp = g_provider_by_name(prov_name); 327 if (pp == NULL) { 328 /* This is the most common error so be verbose about it */ 329 if (added != 0) { 330 gctl_error(req, "Invalid provider: '%s' (added" 331 " %u components)", prov_name, added); 332 update_metadata(sc); 333 } else { 334 gctl_error(req, "Invalid provider: '%s'", 335 prov_name); 336 } 337 g_topology_unlock(); 338 return; 339 } 340 cp = g_new_consumer(sc->geom); 341 if (cp == NULL) { 342 gctl_error(req, "Cannot create consumer"); 343 g_topology_unlock(); 344 return; 345 } 346 error = g_attach(cp, pp); 347 if (error != 0) { 348 gctl_error(req, "Cannot attach a consumer to %s", 349 pp->name); 350 g_destroy_consumer(cp); 351 g_topology_unlock(); 352 return; 353 } 354 if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) { 355 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 356 if (error != 0) { 357 gctl_error(req, "Access request failed for %s", 358 pp->name); 359 g_destroy_consumer(cp); 360 g_topology_unlock(); 361 return; 362 } 363 } 364 if (fcp->provider->sectorsize != pp->sectorsize) { 365 gctl_error(req, "Sector size doesn't fit for %s", 366 pp->name); 367 g_destroy_consumer(cp); 368 g_topology_unlock(); 369 return; 370 } 371 for (j = 0; j < sc->n_components; j++) { 372 if (strcmp(sc->components[j].gcons->provider->name, 373 pp->name) == 0) { 374 gctl_error(req, "Component %s already in %s", 375 pp->name, sc->geom->name); 376 g_destroy_consumer(cp); 377 g_topology_unlock(); 378 return; 379 } 380 } 381 sc->components = realloc(sc->components, 382 sizeof(*sc->components) * (sc->n_components + 1), 383 M_GVIRSTOR, M_WAITOK); 384 385 nc = sc->n_components; 386 sc->components[nc].gcons = cp; 387 sc->components[nc].sc = sc; 388 sc->components[nc].index = nc; 389 sc->components[nc].chunk_count = cp->provider->mediasize / 390 sc->chunk_size; 391 sc->components[nc].chunk_next = 0; 392 sc->components[nc].chunk_reserved = 0; 393 394 if (sc->components[nc].chunk_count < 4) { 395 gctl_error(req, "Provider too small: %s", 396 cp->provider->name); 397 g_destroy_consumer(cp); 398 g_topology_unlock(); 399 return; 400 } 401 fill_metadata(sc, &md, nc, *hardcode); 402 write_metadata(cp, &md); 403 /* The new component becomes visible when n_components is 404 * incremented */ 405 sc->n_components++; 406 added++; 407 408 } 409 /* This call to update_metadata() is critical. In case there's a 410 * power failure in the middle of it and some components are updated 411 * while others are not, there will be trouble on next .taste() iff 412 * a non-updated component is detected first */ 413 update_metadata(sc); 414 g_topology_unlock(); 415 LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added, 416 sc->geom->name); 417 /* Fire off BIOs previously queued because there wasn't any 418 * physical space left. If the BIOs still can't be satisfied 419 * they will again be added to the end of the queue (during 420 * which the mutex will be recursed) */ 421 bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK); 422 bq->bio = NULL; 423 mtx_lock(&sc->delayed_bio_q_mtx); 424 /* First, insert a sentinel to the queue end, so we don't 425 * end up in an infinite loop if there's still no free 426 * space available. */ 427 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage); 428 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 429 bq = STAILQ_FIRST(&sc->delayed_bio_q); 430 if (bq->bio != NULL) { 431 g_virstor_start(bq->bio); 432 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 433 free(bq, M_GVIRSTOR); 434 } else { 435 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 436 free(bq, M_GVIRSTOR); 437 break; 438 } 439 } 440 mtx_unlock(&sc->delayed_bio_q_mtx); 441 442 } 443 444 /* 445 * Find a geom handled by the class 446 */ 447 static struct g_virstor_softc * 448 virstor_find_geom(const struct g_class *cp, const char *name) 449 { 450 struct g_geom *gp; 451 452 LIST_FOREACH(gp, &cp->geom, geom) { 453 if (strcmp(name, gp->name) == 0) 454 return (gp->softc); 455 } 456 return (NULL); 457 } 458 459 /* 460 * Update metadata on all components to reflect the current state 461 * of these fields: 462 * - chunk_next 463 * - flags 464 * - md_count 465 * Expects things to be set up so write_metadata() can work, i.e. 466 * the topology lock must be held. 467 */ 468 static void 469 update_metadata(struct g_virstor_softc *sc) 470 { 471 struct g_virstor_metadata md; 472 int n; 473 474 if (virstor_valid_components(sc) != sc->n_components) 475 return; /* Incomplete device */ 476 LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s", 477 sc->geom->name); 478 /* Update metadata on components */ 479 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 480 sc->geom->class->name, sc->geom->name); 481 g_topology_assert(); 482 for (n = 0; n < sc->n_components; n++) { 483 read_metadata(sc->components[n].gcons, &md); 484 md.chunk_next = sc->components[n].chunk_next; 485 md.flags = sc->components[n].flags; 486 md.md_count = sc->n_components; 487 write_metadata(sc->components[n].gcons, &md); 488 } 489 } 490 491 /* 492 * Fills metadata (struct md) from information stored in softc and the nc'th 493 * component of virstor 494 */ 495 static void 496 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, 497 u_int nc, u_int hardcode) 498 { 499 struct g_virstor_component *c; 500 501 bzero(md, sizeof *md); 502 c = &sc->components[nc]; 503 504 strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic); 505 md->md_version = G_VIRSTOR_VERSION; 506 strncpy(md->md_name, sc->geom->name, sizeof md->md_name); 507 md->md_id = sc->id; 508 md->md_virsize = sc->virsize; 509 md->md_chunk_size = sc->chunk_size; 510 md->md_count = sc->n_components; 511 512 if (hardcode) { 513 strncpy(md->provider, c->gcons->provider->name, 514 sizeof md->provider); 515 } 516 md->no = nc; 517 md->provsize = c->gcons->provider->mediasize; 518 md->chunk_count = c->chunk_count; 519 md->chunk_next = c->chunk_next; 520 md->chunk_reserved = c->chunk_reserved; 521 md->flags = c->flags; 522 } 523 524 /* 525 * Remove a component from virstor device. 526 * Can only be done if the component is unallocated. 527 */ 528 static void 529 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp) 530 { 531 /* As this is executed in parallel to I/O, operations on virstor 532 * structures must be as atomic as possible. */ 533 struct g_virstor_softc *sc; 534 int *nargs; 535 const char *geom_name; 536 u_int removed; 537 int i; 538 539 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 540 if (nargs == NULL) { 541 gctl_error(req, "Error fetching argument '%s'", "nargs"); 542 return; 543 } 544 if (*nargs < 2) { 545 gctl_error(req, "Invalid number of arguments"); 546 return; 547 } 548 /* Find "our" geom */ 549 geom_name = gctl_get_asciiparam(req, "arg0"); 550 if (geom_name == NULL) { 551 gctl_error(req, "Error fetching argument '%s'", 552 "geom_name (arg0)"); 553 return; 554 } 555 sc = virstor_find_geom(cp, geom_name); 556 if (sc == NULL) { 557 gctl_error(req, "Don't know anything about '%s'", geom_name); 558 return; 559 } 560 561 if (virstor_valid_components(sc) != sc->n_components) { 562 LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete " 563 "virstor %s", sc->geom->name); 564 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 565 return; 566 } 567 568 removed = 0; 569 for (i = 1; i < *nargs; i++) { 570 char param[8]; 571 const char *prov_name; 572 int j, found; 573 struct g_virstor_component *newcomp, *compbak; 574 575 sprintf(param, "arg%d", i); 576 prov_name = gctl_get_asciiparam(req, param); 577 if (prov_name == NULL) { 578 gctl_error(req, "Error fetching argument '%s'", param); 579 return; 580 } 581 if (strncmp(prov_name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) 582 prov_name += sizeof(_PATH_DEV) - 1; 583 584 found = -1; 585 for (j = 0; j < sc->n_components; j++) { 586 if (strcmp(sc->components[j].gcons->provider->name, 587 prov_name) == 0) { 588 found = j; 589 break; 590 } 591 } 592 if (found == -1) { 593 LOG_MSG(LVL_ERROR, "No %s component in %s", 594 prov_name, sc->geom->name); 595 continue; 596 } 597 598 compbak = sc->components; 599 newcomp = malloc(sc->n_components * sizeof(*sc->components), 600 M_GVIRSTOR, M_WAITOK | M_ZERO); 601 bcopy(sc->components, newcomp, found * sizeof(*sc->components)); 602 bcopy(&sc->components[found + 1], newcomp + found, 603 found * sizeof(*sc->components)); 604 if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) { 605 LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be " 606 "removed from %s", 607 prov_name, sc->geom->name); 608 free(newcomp, M_GVIRSTOR); 609 /* We'll consider this non-fatal error */ 610 continue; 611 } 612 /* Renumerate unallocated components */ 613 for (j = 0; j < sc->n_components-1; j++) { 614 if ((sc->components[j].flags & 615 VIRSTOR_PROVIDER_ALLOCATED) == 0) { 616 sc->components[j].index = j; 617 } 618 } 619 /* This is the critical section. If a component allocation 620 * event happens while both variables are not yet set, 621 * there will be trouble. Something will panic on encountering 622 * NULL sc->components[x].gcomp member. 623 * Luckily, component allocation happens very rarely and 624 * removing components is an abnormal action in any case. */ 625 sc->components = newcomp; 626 sc->n_components--; 627 /* End critical section */ 628 629 g_topology_lock(); 630 if (clear_metadata(&compbak[found]) != 0) { 631 LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear " 632 "metadata on %s", prov_name); 633 } 634 g_detach(compbak[found].gcons); 635 g_destroy_consumer(compbak[found].gcons); 636 g_topology_unlock(); 637 638 free(compbak, M_GVIRSTOR); 639 640 removed++; 641 } 642 643 /* This call to update_metadata() is critical. In case there's a 644 * power failure in the middle of it and some components are updated 645 * while others are not, there will be trouble on next .taste() iff 646 * a non-updated component is detected first */ 647 g_topology_lock(); 648 update_metadata(sc); 649 g_topology_unlock(); 650 LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed, 651 sc->geom->name); 652 } 653 654 /* 655 * Clear metadata sector on component 656 */ 657 static int 658 clear_metadata(struct g_virstor_component *comp) 659 { 660 char *buf; 661 int error; 662 663 LOG_MSG(LVL_INFO, "Clearing metadata on %s", 664 comp->gcons->provider->name); 665 g_topology_assert(); 666 error = g_access(comp->gcons, 0, 1, 0); 667 if (error != 0) 668 return (error); 669 buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR, 670 M_WAITOK | M_ZERO); 671 error = g_write_data(comp->gcons, 672 comp->gcons->provider->mediasize - 673 comp->gcons->provider->sectorsize, 674 buf, 675 comp->gcons->provider->sectorsize); 676 free(buf, M_GVIRSTOR); 677 g_access(comp->gcons, 0, -1, 0); 678 return (error); 679 } 680 681 /* 682 * Destroy geom forcibly. 683 */ 684 static int 685 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp, 686 struct g_geom *gp) 687 { 688 struct g_virstor_softc *sc; 689 int exitval; 690 691 sc = gp->softc; 692 KASSERT(sc != NULL, ("%s: NULL sc", __func__)); 693 694 exitval = 0; 695 LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name, 696 gp->softc); 697 698 if (sc != NULL) { 699 #ifdef INVARIANTS 700 char *buf; 701 int error; 702 off_t off; 703 int isclean, count; 704 int n; 705 706 LOG_MSG(LVL_INFO, "INVARIANTS detected"); 707 LOG_MSG(LVL_INFO, "Verifying allocation " 708 "table for %s", sc->geom->name); 709 count = 0; 710 for (n = 0; n < sc->chunk_count; n++) { 711 if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0) 712 count++; 713 } 714 LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks", 715 sc->geom->name, count); 716 n = off = count = 0; 717 isclean = 1; 718 if (virstor_valid_components(sc) != sc->n_components) { 719 /* This is a incomplete virstor device (not all 720 * components have been found) */ 721 LOG_MSG(LVL_ERROR, "Device %s is incomplete", 722 sc->geom->name); 723 goto bailout; 724 } 725 error = g_access(sc->components[0].gcons, 1, 0, 0); 726 KASSERT(error == 0, ("%s: g_access failed (%d)", __func__, 727 error)); 728 /* Compare the whole on-disk allocation table with what's 729 * currently in memory */ 730 while (n < sc->chunk_count) { 731 buf = g_read_data(sc->components[0].gcons, off, 732 sc->sectorsize, &error); 733 KASSERT(buf != NULL, ("g_read_data returned NULL (%d) " 734 "for read at %jd", error, off)); 735 if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) { 736 LOG_MSG(LVL_ERROR, "ERROR in allocation table, " 737 "entry %d, offset %jd", n, off); 738 isclean = 0; 739 count++; 740 } 741 n += sc->me_per_sector; 742 off += sc->sectorsize; 743 g_free(buf); 744 } 745 error = g_access(sc->components[0].gcons, -1, 0, 0); 746 KASSERT(error == 0, ("%s: g_access failed (%d) on exit", 747 __func__, error)); 748 if (isclean != 1) { 749 LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s " 750 "(%d sectors don't match, max %zu allocations)", 751 sc->geom->name, count, 752 count * sc->me_per_sector); 753 } else { 754 LOG_MSG(LVL_INFO, "Allocation table ok for %s", 755 sc->geom->name); 756 } 757 bailout: 758 #endif 759 update_metadata(sc); 760 virstor_geom_destroy(sc, FALSE, FALSE); 761 exitval = EAGAIN; 762 } else 763 exitval = 0; 764 return (exitval); 765 } 766 767 /* 768 * Taste event (per-class callback) 769 * Examines a provider and creates geom instances if needed 770 */ 771 static struct g_geom * 772 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags) 773 { 774 struct g_virstor_metadata md; 775 struct g_geom *gp; 776 struct g_consumer *cp; 777 struct g_virstor_softc *sc; 778 int error; 779 780 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 781 g_topology_assert(); 782 LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name); 783 784 /* We need a dummy geom to attach a consumer to the given provider */ 785 gp = g_new_geomf(mp, "virstor:taste.helper"); 786 gp->start = (void *)invalid_call; /* XXX: hacked up so the */ 787 gp->access = (void *)invalid_call; /* compiler doesn't complain. */ 788 gp->orphan = (void *)invalid_call; /* I really want these to fail. */ 789 790 cp = g_new_consumer(gp); 791 g_attach(cp, pp); 792 error = read_metadata(cp, &md); 793 g_detach(cp); 794 g_destroy_consumer(cp); 795 g_destroy_geom(gp); 796 797 if (error != 0) 798 return (NULL); 799 800 if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0) 801 return (NULL); 802 if (md.md_version != G_VIRSTOR_VERSION) { 803 LOG_MSG(LVL_ERROR, "Kernel module version invalid " 804 "to handle %s (%s) : %d should be %d", 805 md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION); 806 return (NULL); 807 } 808 if (md.provsize != pp->mediasize) 809 return (NULL); 810 811 /* If the provider name is hardcoded, use the offered provider only 812 * if it's been offered with its proper name (the one used in 813 * the label command). */ 814 if (md.provider[0] != '\0' && 815 !g_compare_names(md.provider, pp->name)) 816 return (NULL); 817 818 /* Iterate all geoms this class already knows about to see if a new 819 * geom instance of this class needs to be created (in case the provider 820 * is first from a (possibly) multi-consumer geom) or it just needs 821 * to be added to an existing instance. */ 822 sc = NULL; 823 gp = NULL; 824 LIST_FOREACH(gp, &mp->geom, geom) { 825 sc = gp->softc; 826 if (sc == NULL) 827 continue; 828 if (strcmp(md.md_name, sc->geom->name) != 0) 829 continue; 830 if (md.md_id != sc->id) 831 continue; 832 break; 833 } 834 if (gp != NULL) { /* We found an existing geom instance; add to it */ 835 LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name); 836 error = add_provider_to_geom(sc, pp, &md); 837 if (error != 0) { 838 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 839 pp->name, md.md_name, error); 840 return (NULL); 841 } 842 } else { /* New geom instance needs to be created */ 843 gp = create_virstor_geom(mp, &md); 844 if (gp == NULL) { 845 LOG_MSG(LVL_ERROR, "Error creating new instance of " 846 "class %s: %s", mp->name, md.md_name); 847 LOG_MSG(LVL_DEBUG, "Error creating %s at %s", 848 md.md_name, pp->name); 849 return (NULL); 850 } 851 sc = gp->softc; 852 LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name, 853 md.md_name); 854 error = add_provider_to_geom(sc, pp, &md); 855 if (error != 0) { 856 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 857 pp->name, md.md_name, error); 858 virstor_geom_destroy(sc, TRUE, FALSE); 859 return (NULL); 860 } 861 } 862 863 return (gp); 864 } 865 866 /* 867 * Destroyes consumer passed to it in arguments. Used as a callback 868 * on g_event queue. 869 */ 870 static void 871 delay_destroy_consumer(void *arg, int flags __unused) 872 { 873 struct g_consumer *c = arg; 874 KASSERT(c != NULL, ("%s: invalid consumer", __func__)); 875 LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay", 876 c->provider->name); 877 g_detach(c); 878 g_destroy_consumer(c); 879 } 880 881 /* 882 * Remove a component (consumer) from geom instance; If it's the first 883 * component being removed, orphan the provider to announce geom's being 884 * dismantled 885 */ 886 static void 887 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp, 888 boolean_t delay) 889 { 890 struct g_consumer *c; 891 892 KASSERT(comp->gcons != NULL, ("Component with no consumer in %s", 893 sc->geom->name)); 894 c = comp->gcons; 895 896 comp->gcons = NULL; 897 KASSERT(c->provider != NULL, ("%s: no provider", __func__)); 898 LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name, 899 sc->geom->name); 900 if (sc->provider != NULL) { 901 /* Whither, GEOM? */ 902 sc->provider->flags |= G_PF_WITHER; 903 g_orphan_provider(sc->provider, ENXIO); 904 sc->provider = NULL; 905 LOG_MSG(LVL_INFO, "Removing provider %s", sc->geom->name); 906 } 907 908 if (c->acr > 0 || c->acw > 0 || c->ace > 0) 909 g_access(c, -c->acr, -c->acw, -c->ace); 910 if (delay) { 911 /* Destroy consumer after it's tasted */ 912 g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL); 913 } else { 914 g_detach(c); 915 g_destroy_consumer(c); 916 } 917 } 918 919 /* 920 * Destroy geom - called internally 921 * See g_virstor_destroy_geom for the other one 922 */ 923 static int 924 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force, 925 boolean_t delay) 926 { 927 struct g_provider *pp; 928 struct g_geom *gp; 929 int n; 930 931 g_topology_assert(); 932 933 if (sc == NULL) 934 return (ENXIO); 935 936 pp = sc->provider; 937 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 938 LOG_MSG(force ? LVL_WARNING : LVL_ERROR, 939 "Device %s is still open.", pp->name); 940 if (!force) 941 return (EBUSY); 942 } 943 944 for (n = 0; n < sc->n_components; n++) { 945 if (sc->components[n].gcons != NULL) 946 remove_component(sc, &sc->components[n], delay); 947 } 948 949 gp = sc->geom; 950 gp->softc = NULL; 951 952 KASSERT(sc->provider == NULL, ("Provider still exists for %s", 953 gp->name)); 954 955 /* XXX: This might or might not work, since we're called with 956 * the topology lock held. Also, it might panic the kernel if 957 * the error'd BIO is in softupdates code. */ 958 mtx_lock(&sc->delayed_bio_q_mtx); 959 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 960 struct g_virstor_bio_q *bq; 961 bq = STAILQ_FIRST(&sc->delayed_bio_q); 962 bq->bio->bio_error = ENOSPC; 963 g_io_deliver(bq->bio, EIO); 964 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 965 free(bq, M_GVIRSTOR); 966 } 967 mtx_unlock(&sc->delayed_bio_q_mtx); 968 mtx_destroy(&sc->delayed_bio_q_mtx); 969 970 free(sc->map, M_GVIRSTOR); 971 free(sc->components, M_GVIRSTOR); 972 bzero(sc, sizeof *sc); 973 free(sc, M_GVIRSTOR); 974 975 pp = LIST_FIRST(&gp->provider); /* We only offer one provider */ 976 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 977 LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name); 978 979 g_wither_geom(gp, ENXIO); 980 981 return (0); 982 } 983 984 /* 985 * Utility function: read metadata & decode. Wants topology lock to be 986 * held. 987 */ 988 static int 989 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 990 { 991 struct g_provider *pp; 992 char *buf; 993 int error; 994 995 g_topology_assert(); 996 error = g_access(cp, 1, 0, 0); 997 if (error != 0) 998 return (error); 999 pp = cp->provider; 1000 g_topology_unlock(); 1001 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 1002 &error); 1003 g_topology_lock(); 1004 g_access(cp, -1, 0, 0); 1005 if (buf == NULL) 1006 return (error); 1007 1008 virstor_metadata_decode(buf, md); 1009 g_free(buf); 1010 1011 return (0); 1012 } 1013 1014 /** 1015 * Utility function: encode & write metadata. Assumes topology lock is 1016 * held. 1017 * 1018 * There is no useful way of recovering from errors in this function, 1019 * not involving panicking the kernel. If the metadata cannot be written 1020 * the most we can do is notify the operator and hope he spots it and 1021 * replaces the broken drive. 1022 */ 1023 static void 1024 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 1025 { 1026 struct g_provider *pp; 1027 char *buf; 1028 int error; 1029 1030 KASSERT(cp != NULL && md != NULL && cp->provider != NULL, 1031 ("Something's fishy in %s", __func__)); 1032 LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name); 1033 g_topology_assert(); 1034 error = g_access(cp, 0, 1, 0); 1035 if (error != 0) { 1036 LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d", 1037 cp->provider->name, error); 1038 return; 1039 } 1040 pp = cp->provider; 1041 1042 buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK); 1043 virstor_metadata_encode(md, buf); 1044 g_topology_unlock(); 1045 error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, 1046 pp->sectorsize); 1047 g_topology_lock(); 1048 g_access(cp, 0, -1, 0); 1049 free(buf, M_GVIRSTOR); 1050 1051 if (error != 0) 1052 LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s", 1053 error, cp->provider->name); 1054 } 1055 1056 /* 1057 * Creates a new instance of this GEOM class, initialise softc 1058 */ 1059 static struct g_geom * 1060 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md) 1061 { 1062 struct g_geom *gp; 1063 struct g_virstor_softc *sc; 1064 1065 LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)", 1066 md->md_name, md->md_id); 1067 1068 if (md->md_count < 1 || md->md_chunk_size < 1 || 1069 md->md_virsize < md->md_chunk_size) { 1070 /* This is bogus configuration, and probably means data is 1071 * somehow corrupted. Panic, maybe? */ 1072 LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s", 1073 md->md_name); 1074 return (NULL); 1075 } 1076 1077 /* Check if it's already created */ 1078 LIST_FOREACH(gp, &mp->geom, geom) { 1079 sc = gp->softc; 1080 if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) { 1081 LOG_MSG(LVL_WARNING, "Geom %s already exists", 1082 md->md_name); 1083 if (sc->id != md->md_id) { 1084 LOG_MSG(LVL_ERROR, 1085 "Some stale or invalid components " 1086 "exist for virstor device named %s. " 1087 "You will need to <CLEAR> all stale " 1088 "components and maybe reconfigure " 1089 "the virstor device. Tune " 1090 "kern.geom.virstor.debug sysctl up " 1091 "for more information.", 1092 sc->geom->name); 1093 } 1094 return (NULL); 1095 } 1096 } 1097 gp = g_new_geomf(mp, "%s", md->md_name); 1098 gp->softc = NULL; /* to circumevent races that test softc */ 1099 1100 gp->start = g_virstor_start; 1101 gp->spoiled = g_virstor_orphan; 1102 gp->orphan = g_virstor_orphan; 1103 gp->access = g_virstor_access; 1104 gp->dumpconf = g_virstor_dumpconf; 1105 1106 sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO); 1107 sc->id = md->md_id; 1108 sc->n_components = md->md_count; 1109 sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count, 1110 M_GVIRSTOR, M_WAITOK | M_ZERO); 1111 sc->chunk_size = md->md_chunk_size; 1112 sc->virsize = md->md_virsize; 1113 STAILQ_INIT(&sc->delayed_bio_q); 1114 mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx", 1115 "gvirstor", MTX_DEF | MTX_RECURSE); 1116 1117 sc->geom = gp; 1118 sc->provider = NULL; /* virstor_check_and_run will create it */ 1119 gp->softc = sc; 1120 1121 LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name); 1122 1123 return (gp); 1124 } 1125 1126 /* 1127 * Add provider to a GEOM class instance 1128 */ 1129 static int 1130 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp, 1131 struct g_virstor_metadata *md) 1132 { 1133 struct g_virstor_component *component; 1134 struct g_consumer *cp, *fcp; 1135 struct g_geom *gp; 1136 int error; 1137 1138 if (md->no >= sc->n_components) 1139 return (EINVAL); 1140 1141 /* "Current" compontent */ 1142 component = &(sc->components[md->no]); 1143 if (component->gcons != NULL) 1144 return (EEXIST); 1145 1146 gp = sc->geom; 1147 fcp = LIST_FIRST(&gp->consumer); 1148 1149 cp = g_new_consumer(gp); 1150 error = g_attach(cp, pp); 1151 1152 if (error != 0) { 1153 g_destroy_consumer(cp); 1154 return (error); 1155 } 1156 1157 if (fcp != NULL) { 1158 if (fcp->provider->sectorsize != pp->sectorsize) { 1159 /* TODO: this can be made to work */ 1160 LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid " 1161 "sector size (%d)", pp->name, sc->geom->name, 1162 pp->sectorsize); 1163 return (EINVAL); 1164 } 1165 if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) { 1166 /* Replicate access permissions from first "live" consumer 1167 * to the new one */ 1168 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 1169 if (error != 0) { 1170 g_detach(cp); 1171 g_destroy_consumer(cp); 1172 return (error); 1173 } 1174 } 1175 } 1176 1177 /* Bring up a new component */ 1178 cp->private = component; 1179 component->gcons = cp; 1180 component->sc = sc; 1181 component->index = md->no; 1182 component->chunk_count = md->chunk_count; 1183 component->chunk_next = md->chunk_next; 1184 component->chunk_reserved = md->chunk_reserved; 1185 component->flags = md->flags; 1186 1187 LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name); 1188 1189 virstor_check_and_run(sc); 1190 return (0); 1191 } 1192 1193 /* 1194 * Check if everything's ready to create the geom provider & device entry, 1195 * create and start provider. 1196 * Called ultimately by .taste, from g_event thread 1197 */ 1198 static void 1199 virstor_check_and_run(struct g_virstor_softc *sc) 1200 { 1201 off_t off; 1202 size_t n, count; 1203 int index; 1204 int error; 1205 1206 if (virstor_valid_components(sc) != sc->n_components) 1207 return; 1208 1209 if (virstor_valid_components(sc) == 0) { 1210 /* This is actually a candidate for panic() */ 1211 LOG_MSG(LVL_ERROR, "No valid components for %s?", 1212 sc->provider->name); 1213 return; 1214 } 1215 1216 sc->sectorsize = sc->components[0].gcons->provider->sectorsize; 1217 1218 /* Initialise allocation map from the first consumer */ 1219 sc->chunk_count = sc->virsize / sc->chunk_size; 1220 if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) { 1221 LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes", 1222 sc->provider->name, 1223 sc->chunk_count * (off_t)sc->chunk_size); 1224 } 1225 sc->map_size = sc->chunk_count * sizeof *(sc->map); 1226 /* The following allocation is in order of 4MB - 8MB */ 1227 sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK); 1228 KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s", 1229 __func__, sc->map_size, sc->provider->name)); 1230 sc->map_sectors = sc->map_size / sc->sectorsize; 1231 1232 count = 0; 1233 for (n = 0; n < sc->n_components; n++) 1234 count += sc->components[n].chunk_count; 1235 LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual " 1236 "(%zu KB chunks)", 1237 sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024); 1238 1239 error = g_access(sc->components[0].gcons, 1, 0, 0); 1240 if (error != 0) { 1241 LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to " 1242 "read allocation map for %s", 1243 sc->components[0].gcons->provider->name, 1244 sc->geom->name); 1245 return; 1246 } 1247 /* Read in the allocation map */ 1248 LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name, 1249 sc->components[0].gcons->provider->name); 1250 off = count = n = 0; 1251 while (count < sc->map_size) { 1252 struct g_virstor_map_entry *mapbuf; 1253 size_t bs; 1254 1255 bs = MIN(MAXPHYS, sc->map_size - count); 1256 if (bs % sc->sectorsize != 0) { 1257 /* Check for alignment errors */ 1258 bs = (bs / sc->sectorsize) * sc->sectorsize; 1259 if (bs == 0) 1260 break; 1261 LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned " 1262 "for %s on %s", sc->geom->name, 1263 sc->components[0].gcons->provider->name); 1264 } 1265 mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error); 1266 if (mapbuf == NULL) { 1267 free(sc->map, M_GVIRSTOR); 1268 LOG_MSG(LVL_ERROR, "Error reading allocation map " 1269 "for %s from %s (offset %ju) (error %d)", 1270 sc->geom->name, 1271 sc->components[0].gcons->provider->name, 1272 off, error); 1273 return; 1274 } 1275 1276 bcopy(mapbuf, &sc->map[n], bs); 1277 off += bs; 1278 count += bs; 1279 n += bs / sizeof *(sc->map); 1280 g_free(mapbuf); 1281 } 1282 g_access(sc->components[0].gcons, -1, 0, 0); 1283 LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name); 1284 1285 /* find first component with allocatable chunks */ 1286 index = -1; 1287 for (n = 0; n < sc->n_components; n++) { 1288 if (sc->components[n].chunk_next < 1289 sc->components[n].chunk_count) { 1290 index = n; 1291 break; 1292 } 1293 } 1294 if (index == -1) 1295 /* not found? set it to the last component and handle it 1296 * later */ 1297 index = sc->n_components - 1; 1298 1299 if (index >= sc->n_components - g_virstor_component_watermark - 1) { 1300 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1301 "(%d/%u: %s)", sc->geom->name, 1302 index+1, 1303 sc->n_components, 1304 sc->components[index].gcons->provider->name); 1305 } 1306 sc->curr_component = index; 1307 1308 if (sc->components[index].chunk_next >= 1309 sc->components[index].chunk_count - g_virstor_chunk_watermark) { 1310 LOG_MSG(LVL_WARNING, 1311 "Component %s of %s is running out of free space " 1312 "(%u chunks left)", 1313 sc->components[index].gcons->provider->name, 1314 sc->geom->name, sc->components[index].chunk_count - 1315 sc->components[index].chunk_next); 1316 } 1317 1318 sc->me_per_sector = sc->sectorsize / sizeof *(sc->map); 1319 if (sc->sectorsize % sizeof *(sc->map) != 0) { 1320 LOG_MSG(LVL_ERROR, 1321 "%s: Map entries don't fit exactly in a sector (%s)", 1322 __func__, sc->geom->name); 1323 return; 1324 } 1325 1326 /* Recalculate allocated chunks in components & at the same time 1327 * verify map data is sane. We could trust metadata on this, but 1328 * we want to make sure. */ 1329 for (n = 0; n < sc->n_components; n++) 1330 sc->components[n].chunk_next = sc->components[n].chunk_reserved; 1331 1332 for (n = 0; n < sc->chunk_count; n++) { 1333 if (sc->map[n].provider_no >= sc->n_components || 1334 sc->map[n].provider_chunk >= 1335 sc->components[sc->map[n].provider_no].chunk_count) { 1336 LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s", 1337 __func__, (u_int)n, sc->geom->name); 1338 LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u" 1339 " provider_chunk: %u, chunk_count: %u", __func__, 1340 sc->map[n].provider_no, sc->n_components, 1341 sc->map[n].provider_chunk, 1342 sc->components[sc->map[n].provider_no].chunk_count); 1343 return; 1344 } 1345 if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED) 1346 sc->components[sc->map[n].provider_no].chunk_next++; 1347 } 1348 1349 sc->provider = g_new_providerf(sc->geom, "virstor/%s", 1350 sc->geom->name); 1351 1352 sc->provider->sectorsize = sc->sectorsize; 1353 sc->provider->mediasize = sc->virsize; 1354 g_error_provider(sc->provider, 0); 1355 1356 LOG_MSG(LVL_INFO, "%s activated", sc->provider->name); 1357 LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting " 1358 "chunk %u", sc->provider->name, sc->curr_component, 1359 sc->components[sc->curr_component].chunk_next); 1360 } 1361 1362 /* 1363 * Returns count of active providers in this geom instance 1364 */ 1365 static u_int 1366 virstor_valid_components(struct g_virstor_softc *sc) 1367 { 1368 unsigned int nc, i; 1369 1370 nc = 0; 1371 KASSERT(sc != NULL, ("%s: softc is NULL", __func__)); 1372 KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__)); 1373 for (i = 0; i < sc->n_components; i++) 1374 if (sc->components[i].gcons != NULL) 1375 nc++; 1376 return (nc); 1377 } 1378 1379 /* 1380 * Called when the consumer gets orphaned (?) 1381 */ 1382 static void 1383 g_virstor_orphan(struct g_consumer *cp) 1384 { 1385 struct g_virstor_softc *sc; 1386 struct g_virstor_component *comp; 1387 struct g_geom *gp; 1388 1389 g_topology_assert(); 1390 gp = cp->geom; 1391 sc = gp->softc; 1392 if (sc == NULL) 1393 return; 1394 1395 comp = cp->private; 1396 KASSERT(comp != NULL, ("%s: No component in private part of consumer", 1397 __func__)); 1398 remove_component(sc, comp, FALSE); 1399 if (virstor_valid_components(sc) == 0) 1400 virstor_geom_destroy(sc, TRUE, FALSE); 1401 } 1402 1403 /* 1404 * Called to notify geom when it's been opened, and for what intent 1405 */ 1406 static int 1407 g_virstor_access(struct g_provider *pp, int dr, int dw, int de) 1408 { 1409 struct g_consumer *c; 1410 struct g_virstor_softc *sc; 1411 struct g_geom *gp; 1412 int error; 1413 1414 KASSERT(pp != NULL, ("%s: NULL provider", __func__)); 1415 gp = pp->geom; 1416 KASSERT(gp != NULL, ("%s: NULL geom", __func__)); 1417 sc = gp->softc; 1418 1419 if (sc == NULL) { 1420 /* It seems that .access can be called with negative dr,dw,dx 1421 * in this case but I want to check for myself */ 1422 LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s", 1423 dr, dw, de, pp->name); 1424 /* This should only happen when geom is withered so 1425 * allow only negative requests */ 1426 KASSERT(dr <= 0 && dw <= 0 && de <= 0, 1427 ("%s: Positive access for %s", __func__, pp->name)); 1428 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) 1429 LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed", 1430 pp->name); 1431 return (0); 1432 } 1433 1434 /* Grab an exclusive bit to propagate on our consumers on first open */ 1435 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 1436 de++; 1437 /* ... drop it on close */ 1438 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) { 1439 de--; 1440 update_metadata(sc); /* Writes statistical information */ 1441 } 1442 1443 error = ENXIO; 1444 LIST_FOREACH(c, &gp->consumer, consumer) { 1445 KASSERT(c != NULL, ("%s: consumer is NULL", __func__)); 1446 error = g_access(c, dr, dw, de); 1447 if (error != 0) { 1448 struct g_consumer *c2; 1449 1450 /* Backout earlier changes */ 1451 LIST_FOREACH(c2, &gp->consumer, consumer) { 1452 if (c2 == c) /* all eariler components fixed */ 1453 return (error); 1454 g_access(c2, -dr, -dw, -de); 1455 } 1456 } 1457 } 1458 1459 return (error); 1460 } 1461 1462 /* 1463 * Generate XML dump of current state 1464 */ 1465 static void 1466 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1467 struct g_consumer *cp, struct g_provider *pp) 1468 { 1469 struct g_virstor_softc *sc; 1470 1471 g_topology_assert(); 1472 sc = gp->softc; 1473 1474 if (sc == NULL || pp != NULL) 1475 return; 1476 1477 if (cp != NULL) { 1478 /* For each component */ 1479 struct g_virstor_component *comp; 1480 1481 comp = cp->private; 1482 if (comp == NULL) 1483 return; 1484 sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n", 1485 indent, comp->index); 1486 sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n", 1487 indent, comp->chunk_count); 1488 sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n", 1489 indent, comp->chunk_next); 1490 sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n", 1491 indent, comp->chunk_reserved); 1492 sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n", 1493 indent, 1494 comp->chunk_next > 0 ? 100 - 1495 ((comp->chunk_next + comp->chunk_reserved) * 100) / 1496 comp->chunk_count : 100); 1497 } else { 1498 /* For the whole thing */ 1499 u_int count, used, i; 1500 off_t size; 1501 1502 count = used = size = 0; 1503 for (i = 0; i < sc->n_components; i++) { 1504 if (sc->components[i].gcons != NULL) { 1505 count += sc->components[i].chunk_count; 1506 used += sc->components[i].chunk_next + 1507 sc->components[i].chunk_reserved; 1508 size += sc->components[i].gcons-> 1509 provider->mediasize; 1510 } 1511 } 1512 1513 sbuf_printf(sb, "%s<Status>" 1514 "Components=%u, Online=%u</Status>\n", indent, 1515 sc->n_components, virstor_valid_components(sc)); 1516 sbuf_printf(sb, "%s<State>%u%% physical free</State>\n", 1517 indent, 100-(used * 100) / count); 1518 sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent, 1519 sc->chunk_size); 1520 sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n", 1521 indent, used > 0 ? 100 - (used * 100) / count : 100); 1522 sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n", 1523 indent, count); 1524 sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n", 1525 indent, sc->chunk_count); 1526 sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n", 1527 indent, 1528 (count * 100) / sc->chunk_count); 1529 sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n", 1530 indent, size); 1531 sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent, 1532 sc->virsize); 1533 } 1534 } 1535 1536 /* 1537 * GEOM .done handler 1538 * Can't use standard handler because one requested IO may 1539 * fork into additional data IOs 1540 */ 1541 static void 1542 g_virstor_done(struct bio *b) 1543 { 1544 struct g_virstor_softc *sc; 1545 struct bio *parent_b; 1546 1547 parent_b = b->bio_parent; 1548 sc = parent_b->bio_to->geom->softc; 1549 1550 if (b->bio_error != 0) { 1551 LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s", 1552 b->bio_error, b->bio_offset, b->bio_length, 1553 b->bio_to->name); 1554 if (parent_b->bio_error == 0) 1555 parent_b->bio_error = b->bio_error; 1556 } 1557 1558 parent_b->bio_inbed++; 1559 parent_b->bio_completed += b->bio_completed; 1560 1561 if (parent_b->bio_children == parent_b->bio_inbed) { 1562 parent_b->bio_completed = parent_b->bio_length; 1563 g_io_deliver(parent_b, parent_b->bio_error); 1564 } 1565 g_destroy_bio(b); 1566 } 1567 1568 /* 1569 * I/O starts here 1570 * Called in g_down thread 1571 */ 1572 static void 1573 g_virstor_start(struct bio *b) 1574 { 1575 struct g_virstor_softc *sc; 1576 struct g_virstor_component *comp; 1577 struct bio *cb; 1578 struct g_provider *pp; 1579 char *addr; 1580 off_t offset, length; 1581 struct bio_queue_head bq; 1582 size_t chunk_size; /* cached for convenience */ 1583 u_int count; 1584 1585 pp = b->bio_to; 1586 sc = pp->geom->softc; 1587 KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__, 1588 b->bio_to->error, b->bio_to->name)); 1589 1590 LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__); 1591 1592 switch (b->bio_cmd) { 1593 case BIO_READ: 1594 case BIO_WRITE: 1595 case BIO_DELETE: 1596 break; 1597 default: 1598 g_io_deliver(b, EOPNOTSUPP); 1599 return; 1600 } 1601 1602 LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length); 1603 bioq_init(&bq); 1604 1605 chunk_size = sc->chunk_size; 1606 addr = b->bio_data; 1607 offset = b->bio_offset; /* virtual offset and length */ 1608 length = b->bio_length; 1609 1610 while (length > 0) { 1611 size_t chunk_index, in_chunk_offset, in_chunk_length; 1612 struct virstor_map_entry *me; 1613 1614 chunk_index = offset / chunk_size; /* round downwards */ 1615 in_chunk_offset = offset % chunk_size; 1616 in_chunk_length = min(length, chunk_size - in_chunk_offset); 1617 LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)", 1618 b->bio_cmd == BIO_READ ? "R" : "W", 1619 offset, length, 1620 chunk_index, in_chunk_offset, in_chunk_length); 1621 me = &sc->map[chunk_index]; 1622 1623 if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) { 1624 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1625 /* Reads from unallocated chunks return zeroed 1626 * buffers */ 1627 if (b->bio_cmd == BIO_READ) 1628 bzero(addr, in_chunk_length); 1629 } else { 1630 comp = &sc->components[me->provider_no]; 1631 1632 cb = g_clone_bio(b); 1633 if (cb == NULL) { 1634 bioq_dismantle(&bq); 1635 if (b->bio_error == 0) 1636 b->bio_error = ENOMEM; 1637 g_io_deliver(b, b->bio_error); 1638 return; 1639 } 1640 cb->bio_to = comp->gcons->provider; 1641 cb->bio_done = g_virstor_done; 1642 cb->bio_offset = 1643 (off_t)me->provider_chunk * (off_t)chunk_size 1644 + in_chunk_offset; 1645 cb->bio_length = in_chunk_length; 1646 cb->bio_data = addr; 1647 cb->bio_caller1 = comp; 1648 bioq_disksort(&bq, cb); 1649 } 1650 } else { /* handle BIO_WRITE */ 1651 KASSERT(b->bio_cmd == BIO_WRITE, 1652 ("%s: Unknown command %d", __func__, 1653 b->bio_cmd)); 1654 1655 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1656 /* We have a virtual chunk, represented by 1657 * the "me" entry, but it's not yet allocated 1658 * (tied to) a physical chunk. So do it now. */ 1659 struct virstor_map_entry *data_me; 1660 u_int phys_chunk, comp_no; 1661 off_t s_offset; 1662 int error; 1663 1664 error = allocate_chunk(sc, &comp, &comp_no, 1665 &phys_chunk); 1666 if (error != 0) { 1667 /* We cannot allocate a physical chunk 1668 * to satisfy this request, so we'll 1669 * delay it to when we can... 1670 * XXX: this will prevent the fs from 1671 * being umounted! */ 1672 struct g_virstor_bio_q *biq; 1673 biq = malloc(sizeof *biq, M_GVIRSTOR, 1674 M_NOWAIT); 1675 if (biq == NULL) { 1676 bioq_dismantle(&bq); 1677 if (b->bio_error == 0) 1678 b->bio_error = ENOMEM; 1679 g_io_deliver(b, b->bio_error); 1680 return; 1681 } 1682 biq->bio = b; 1683 mtx_lock(&sc->delayed_bio_q_mtx); 1684 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, 1685 biq, linkage); 1686 mtx_unlock(&sc->delayed_bio_q_mtx); 1687 LOG_MSG(LVL_WARNING, "Delaying BIO " 1688 "(size=%ju) until free physical " 1689 "space can be found on %s", 1690 b->bio_length, 1691 sc->provider->name); 1692 return; 1693 } 1694 LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s " 1695 "for %s", 1696 phys_chunk, 1697 comp->gcons->provider->name, 1698 sc->provider->name); 1699 1700 me->provider_no = comp_no; 1701 me->provider_chunk = phys_chunk; 1702 me->flags |= VIRSTOR_MAP_ALLOCATED; 1703 1704 cb = g_clone_bio(b); 1705 if (cb == NULL) { 1706 me->flags &= ~VIRSTOR_MAP_ALLOCATED; 1707 me->provider_no = 0; 1708 me->provider_chunk = 0; 1709 bioq_dismantle(&bq); 1710 if (b->bio_error == 0) 1711 b->bio_error = ENOMEM; 1712 g_io_deliver(b, b->bio_error); 1713 return; 1714 } 1715 1716 /* The allocation table is stored continuously 1717 * at the start of the drive. We need to 1718 * calculate the offset of the sector that holds 1719 * this map entry both on the drive and in the 1720 * map array. 1721 * sc_offset will end up pointing to the drive 1722 * sector. */ 1723 s_offset = chunk_index * sizeof *me; 1724 s_offset = (s_offset / sc->sectorsize) * 1725 sc->sectorsize; 1726 1727 /* data_me points to map entry sector 1728 * in memory (analoguos to offset) */ 1729 data_me = &sc->map[(chunk_index / 1730 sc->me_per_sector) * sc->me_per_sector]; 1731 1732 /* Commit sector with map entry to storage */ 1733 cb->bio_to = sc->components[0].gcons->provider; 1734 cb->bio_done = g_virstor_done; 1735 cb->bio_offset = s_offset; 1736 cb->bio_data = (char *)data_me; 1737 cb->bio_length = sc->sectorsize; 1738 cb->bio_caller1 = &sc->components[0]; 1739 bioq_disksort(&bq, cb); 1740 } 1741 1742 comp = &sc->components[me->provider_no]; 1743 cb = g_clone_bio(b); 1744 if (cb == NULL) { 1745 bioq_dismantle(&bq); 1746 if (b->bio_error == 0) 1747 b->bio_error = ENOMEM; 1748 g_io_deliver(b, b->bio_error); 1749 return; 1750 } 1751 /* Finally, handle the data */ 1752 cb->bio_to = comp->gcons->provider; 1753 cb->bio_done = g_virstor_done; 1754 cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size + 1755 in_chunk_offset; 1756 cb->bio_length = in_chunk_length; 1757 cb->bio_data = addr; 1758 cb->bio_caller1 = comp; 1759 bioq_disksort(&bq, cb); 1760 } 1761 addr += in_chunk_length; 1762 length -= in_chunk_length; 1763 offset += in_chunk_length; 1764 } 1765 1766 /* Fire off bio's here */ 1767 count = 0; 1768 for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) { 1769 bioq_remove(&bq, cb); 1770 LOG_REQ(LVL_MOREDEBUG, cb, "Firing request"); 1771 comp = cb->bio_caller1; 1772 cb->bio_caller1 = NULL; 1773 LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju", 1774 cb->bio_offset, cb->bio_length); 1775 g_io_request(cb, comp->gcons); 1776 count++; 1777 } 1778 if (count == 0) { /* We handled everything locally */ 1779 b->bio_completed = b->bio_length; 1780 g_io_deliver(b, 0); 1781 } 1782 1783 } 1784 1785 /* 1786 * Allocate a chunk from a physical provider. Returns physical component, 1787 * chunk index relative to the component and the component's index. 1788 */ 1789 static int 1790 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp, 1791 u_int *comp_no_p, u_int *chunk) 1792 { 1793 u_int comp_no; 1794 1795 KASSERT(sc->curr_component < sc->n_components, 1796 ("%s: Invalid curr_component: %u", __func__, sc->curr_component)); 1797 1798 comp_no = sc->curr_component; 1799 *comp = &sc->components[comp_no]; 1800 dump_component(*comp); 1801 if ((*comp)->chunk_next >= (*comp)->chunk_count) { 1802 /* This component is full. Allocate next component */ 1803 if (comp_no >= sc->n_components-1) { 1804 LOG_MSG(LVL_ERROR, "All physical space allocated for %s", 1805 sc->geom->name); 1806 return (-1); 1807 } 1808 (*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT; 1809 sc->curr_component = ++comp_no; 1810 1811 *comp = &sc->components[comp_no]; 1812 if (comp_no >= sc->n_components - g_virstor_component_watermark-1) 1813 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1814 "(switching to %u/%u: %s)", sc->geom->name, 1815 comp_no+1, sc->n_components, 1816 (*comp)->gcons->provider->name); 1817 /* Take care not to overwrite reserved chunks */ 1818 if ( (*comp)->chunk_reserved > 0 && 1819 (*comp)->chunk_next < (*comp)->chunk_reserved) 1820 (*comp)->chunk_next = (*comp)->chunk_reserved; 1821 1822 (*comp)->flags |= 1823 VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT; 1824 dump_component(*comp); 1825 *comp_no_p = comp_no; 1826 *chunk = (*comp)->chunk_next++; 1827 } else { 1828 *comp_no_p = comp_no; 1829 *chunk = (*comp)->chunk_next++; 1830 } 1831 return (0); 1832 } 1833 1834 /* Dump a component */ 1835 static void 1836 dump_component(struct g_virstor_component *comp) 1837 { 1838 1839 if (g_virstor_debug < LVL_DEBUG2) 1840 return; 1841 printf("Component %d: %s\n", comp->index, comp->gcons->provider->name); 1842 printf(" chunk_count: %u\n", comp->chunk_count); 1843 printf(" chunk_next: %u\n", comp->chunk_next); 1844 printf(" flags: %u\n", comp->flags); 1845 } 1846 1847 #if 0 1848 /* Dump a map entry */ 1849 static void 1850 dump_me(struct virstor_map_entry *me, unsigned int nr) 1851 { 1852 if (g_virstor_debug < LVL_DEBUG) 1853 return; 1854 printf("VIRT. CHUNK #%d: ", nr); 1855 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) 1856 printf("(unallocated)\n"); 1857 else 1858 printf("allocated at provider %u, provider_chunk %u\n", 1859 me->provider_no, me->provider_chunk); 1860 } 1861 #endif 1862 1863 /* 1864 * Dismantle bio_queue and destroy its components 1865 */ 1866 static void 1867 bioq_dismantle(struct bio_queue_head *bq) 1868 { 1869 struct bio *b; 1870 1871 for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) { 1872 bioq_remove(bq, b); 1873 g_destroy_bio(b); 1874 } 1875 } 1876 1877 /* 1878 * The function that shouldn't be called. 1879 * When this is called, the stack is already garbled because of 1880 * argument mismatch. There's nothing to do now but panic, which is 1881 * accidentally the whole purpose of this function. 1882 * Motivation: to guard from accidentally calling geom methods when 1883 * they shouldn't be called. (see g_..._taste) 1884 */ 1885 static void 1886 invalid_call(void) 1887 { 1888 panic("invalid_call() has just been called. Something's fishy here."); 1889 } 1890 1891 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */ 1892