1 /*- 2 * Copyright (c) 2006-2007 Ivan Voras <ivoras@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* Implementation notes: 28 * - "Components" are wrappers around providers that make up the 29 * virtual storage (i.e. a virstor has "physical" components) 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/sx.h> 42 #include <sys/bio.h> 43 #include <sys/sysctl.h> 44 #include <sys/malloc.h> 45 #include <sys/time.h> 46 #include <sys/proc.h> 47 #include <sys/kthread.h> 48 #include <sys/mutex.h> 49 #include <vm/uma.h> 50 #include <geom/geom.h> 51 52 #include <geom/virstor/g_virstor.h> 53 #include <geom/virstor/g_virstor_md.h> 54 55 /* Declare malloc(9) label */ 56 static MALLOC_DEFINE(M_GVIRSTOR, "gvirstor", "GEOM_VIRSTOR Data"); 57 58 /* GEOM class methods */ 59 static g_init_t g_virstor_init; 60 static g_fini_t g_virstor_fini; 61 static g_taste_t g_virstor_taste; 62 static g_ctl_req_t g_virstor_config; 63 static g_ctl_destroy_geom_t g_virstor_destroy_geom; 64 65 /* Declare & initialize class structure ("geom class") */ 66 struct g_class g_virstor_class = { 67 .name = G_VIRSTOR_CLASS_NAME, 68 .version = G_VERSION, 69 .init = g_virstor_init, 70 .fini = g_virstor_fini, 71 .taste = g_virstor_taste, 72 .ctlreq = g_virstor_config, 73 .destroy_geom = g_virstor_destroy_geom 74 /* The .dumpconf and the rest are only usable for a geom instance, so 75 * they will be set when such instance is created. */ 76 }; 77 78 /* Declare sysctl's and loader tunables */ 79 SYSCTL_DECL(_kern_geom); 80 SYSCTL_NODE(_kern_geom, OID_AUTO, virstor, CTLFLAG_RW, 0, "GEOM_GVIRSTOR information"); 81 82 static u_int g_virstor_debug = 2; /* XXX: lower to 2 when released to public */ 83 TUNABLE_INT("kern.geom.virstor.debug", &g_virstor_debug); 84 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, debug, CTLFLAG_RW, &g_virstor_debug, 85 0, "Debug level (2=production, 5=normal, 15=excessive)"); 86 87 static u_int g_virstor_chunk_watermark = 100; 88 TUNABLE_INT("kern.geom.virstor.chunk_watermark", &g_virstor_chunk_watermark); 89 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, chunk_watermark, CTLFLAG_RW, 90 &g_virstor_chunk_watermark, 0, 91 "Minimum number of free chunks before issuing administrative warning"); 92 93 static u_int g_virstor_component_watermark = 1; 94 TUNABLE_INT("kern.geom.virstor.component_watermark", 95 &g_virstor_component_watermark); 96 SYSCTL_UINT(_kern_geom_virstor, OID_AUTO, component_watermark, CTLFLAG_RW, 97 &g_virstor_component_watermark, 0, 98 "Minimum number of free components before issuing administrative warning"); 99 100 static int read_metadata(struct g_consumer *, struct g_virstor_metadata *); 101 static void write_metadata(struct g_consumer *, struct g_virstor_metadata *); 102 static int clear_metadata(struct g_virstor_component *); 103 static int add_provider_to_geom(struct g_virstor_softc *, struct g_provider *, 104 struct g_virstor_metadata *); 105 static struct g_geom *create_virstor_geom(struct g_class *, 106 struct g_virstor_metadata *); 107 static void virstor_check_and_run(struct g_virstor_softc *); 108 static u_int virstor_valid_components(struct g_virstor_softc *); 109 static int virstor_geom_destroy(struct g_virstor_softc *, boolean_t, 110 boolean_t); 111 static void remove_component(struct g_virstor_softc *, 112 struct g_virstor_component *, boolean_t); 113 static void bioq_dismantle(struct bio_queue_head *); 114 static int allocate_chunk(struct g_virstor_softc *, 115 struct g_virstor_component **, u_int *, u_int *); 116 static void delay_destroy_consumer(void *, int); 117 static void dump_component(struct g_virstor_component *comp); 118 #if 0 119 static void dump_me(struct virstor_map_entry *me, unsigned int nr); 120 #endif 121 122 static void virstor_ctl_stop(struct gctl_req *, struct g_class *); 123 static void virstor_ctl_add(struct gctl_req *, struct g_class *); 124 static void virstor_ctl_remove(struct gctl_req *, struct g_class *); 125 static struct g_virstor_softc * virstor_find_geom(const struct g_class *, 126 const char *); 127 static void update_metadata(struct g_virstor_softc *); 128 static void fill_metadata(struct g_virstor_softc *, struct g_virstor_metadata *, 129 u_int, u_int); 130 131 static void g_virstor_orphan(struct g_consumer *); 132 static int g_virstor_access(struct g_provider *, int, int, int); 133 static void g_virstor_start(struct bio *); 134 static void g_virstor_dumpconf(struct sbuf *, const char *, struct g_geom *, 135 struct g_consumer *, struct g_provider *); 136 static void g_virstor_done(struct bio *); 137 138 static void invalid_call(void); 139 /* 140 * Initialise GEOM class (per-class callback) 141 */ 142 static void 143 g_virstor_init(struct g_class *mp __unused) 144 { 145 146 /* Catch map struct size mismatch at compile time; Map entries must 147 * fit into MAXPHYS exactly, with no wasted space. */ 148 CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); 149 150 /* Init UMA zones, TAILQ's, other global vars */ 151 } 152 153 /* 154 * Finalise GEOM class (per-class callback) 155 */ 156 static void 157 g_virstor_fini(struct g_class *mp __unused) 158 { 159 160 /* Deinit UMA zones & global vars */ 161 } 162 163 /* 164 * Config (per-class callback) 165 */ 166 static void 167 g_virstor_config(struct gctl_req *req, struct g_class *cp, char const *verb) 168 { 169 uint32_t *version; 170 171 g_topology_assert(); 172 173 version = gctl_get_paraml(req, "version", sizeof(*version)); 174 if (version == NULL) { 175 gctl_error(req, "Failed to get 'version' argument"); 176 return; 177 } 178 if (*version != G_VIRSTOR_VERSION) { 179 gctl_error(req, "Userland and kernel versions out of sync"); 180 return; 181 } 182 183 g_topology_unlock(); 184 if (strcmp(verb, "add") == 0) 185 virstor_ctl_add(req, cp); 186 else if (strcmp(verb, "stop") == 0 || strcmp(verb, "destroy") == 0) 187 virstor_ctl_stop(req, cp); 188 else if (strcmp(verb, "remove") == 0) 189 virstor_ctl_remove(req, cp); 190 else 191 gctl_error(req, "unknown verb: '%s'", verb); 192 g_topology_lock(); 193 } 194 195 /* 196 * "stop" verb from userland 197 */ 198 static void 199 virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) 200 { 201 int *force, *nargs; 202 int i; 203 204 nargs = gctl_get_paraml(req, "nargs", sizeof *nargs); 205 if (nargs == NULL) { 206 gctl_error(req, "Error fetching argument '%s'", "nargs"); 207 return; 208 } 209 if (*nargs < 1) { 210 gctl_error(req, "Invalid number of arguments"); 211 return; 212 } 213 force = gctl_get_paraml(req, "force", sizeof *force); 214 if (force == NULL) { 215 gctl_error(req, "Error fetching argument '%s'", "force"); 216 return; 217 } 218 219 g_topology_lock(); 220 for (i = 0; i < *nargs; i++) { 221 char param[8]; 222 const char *name; 223 struct g_virstor_softc *sc; 224 int error; 225 226 sprintf(param, "arg%d", i); 227 name = gctl_get_asciiparam(req, param); 228 if (name == NULL) { 229 gctl_error(req, "No 'arg%d' argument", i); 230 g_topology_unlock(); 231 return; 232 } 233 sc = virstor_find_geom(cp, name); 234 LOG_MSG(LVL_INFO, "Stopping %s by the userland command", 235 sc->geom->name); 236 update_metadata(sc); 237 if ((error = virstor_geom_destroy(sc, TRUE, TRUE)) != 0) { 238 LOG_MSG(LVL_ERROR, "Cannot destroy %s: %d", 239 sc->geom->name, error); 240 } 241 } 242 g_topology_unlock(); 243 } 244 245 /* 246 * "add" verb from userland - add new component(s) to the structure. 247 * This will be done all at once in here, without going through the 248 * .taste function for new components. 249 */ 250 static void 251 virstor_ctl_add(struct gctl_req *req, struct g_class *cp) 252 { 253 /* Note: while this is going on, I/O is being done on 254 * the g_up and g_down threads. The idea is to make changes 255 * to softc members in a way that can atomically activate 256 * them all at once. */ 257 struct g_virstor_softc *sc; 258 int *hardcode, *nargs; 259 const char *geom_name; /* geom to add a component to */ 260 struct g_consumer *fcp; 261 struct g_virstor_bio_q *bq; 262 u_int added; 263 int error; 264 int i; 265 266 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 267 if (nargs == NULL) { 268 gctl_error(req, "Error fetching argument '%s'", "nargs"); 269 return; 270 } 271 if (*nargs < 2) { 272 gctl_error(req, "Invalid number of arguments"); 273 return; 274 } 275 hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); 276 if (hardcode == NULL) { 277 gctl_error(req, "Error fetching argument '%s'", "hardcode"); 278 return; 279 } 280 281 /* Find "our" geom */ 282 geom_name = gctl_get_asciiparam(req, "arg0"); 283 if (geom_name == NULL) { 284 gctl_error(req, "Error fetching argument '%s'", "geom_name (arg0)"); 285 return; 286 } 287 sc = virstor_find_geom(cp, geom_name); 288 if (sc == NULL) { 289 gctl_error(req, "Don't know anything about '%s'", geom_name); 290 return; 291 } 292 293 if (virstor_valid_components(sc) != sc->n_components) { 294 LOG_MSG(LVL_ERROR, "Cannot add components to incomplete " 295 "virstor %s", sc->geom->name); 296 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 297 return; 298 } 299 300 fcp = sc->components[0].gcons; 301 added = 0; 302 g_topology_lock(); 303 for (i = 1; i < *nargs; i++) { 304 struct g_virstor_metadata md; 305 char aname[8]; 306 const char *prov_name; 307 struct g_provider *pp; 308 struct g_consumer *cp; 309 u_int nc; 310 u_int j; 311 312 snprintf(aname, sizeof aname, "arg%d", i); 313 prov_name = gctl_get_asciiparam(req, aname); 314 if (prov_name == NULL) { 315 gctl_error(req, "Error fetching argument '%s'", aname); 316 g_topology_unlock(); 317 return; 318 } 319 if (strncmp(prov_name, _PATH_DEV, strlen(_PATH_DEV)) == 0) 320 prov_name += strlen(_PATH_DEV); 321 322 pp = g_provider_by_name(prov_name); 323 if (pp == NULL) { 324 /* This is the most common error so be verbose about it */ 325 if (added != 0) { 326 gctl_error(req, "Invalid provider: '%s' (added" 327 " %u components)", prov_name, added); 328 update_metadata(sc); 329 } else { 330 gctl_error(req, "Invalid provider: '%s'", 331 prov_name); 332 } 333 g_topology_unlock(); 334 return; 335 } 336 cp = g_new_consumer(sc->geom); 337 if (cp == NULL) { 338 gctl_error(req, "Cannot create consumer"); 339 g_topology_unlock(); 340 return; 341 } 342 error = g_attach(cp, pp); 343 if (error != 0) { 344 gctl_error(req, "Cannot attach a consumer to %s", 345 pp->name); 346 g_destroy_consumer(cp); 347 g_topology_unlock(); 348 return; 349 } 350 if (fcp->acr != 0 || fcp->acw != 0 || fcp->ace != 0) { 351 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 352 if (error != 0) { 353 gctl_error(req, "Access request failed for %s", 354 pp->name); 355 g_destroy_consumer(cp); 356 g_topology_unlock(); 357 return; 358 } 359 } 360 if (fcp->provider->sectorsize != pp->sectorsize) { 361 gctl_error(req, "Sector size doesn't fit for %s", 362 pp->name); 363 g_destroy_consumer(cp); 364 g_topology_unlock(); 365 return; 366 } 367 for (j = 0; j < sc->n_components; j++) { 368 if (strcmp(sc->components[j].gcons->provider->name, 369 pp->name) == 0) { 370 gctl_error(req, "Component %s already in %s", 371 pp->name, sc->geom->name); 372 g_destroy_consumer(cp); 373 g_topology_unlock(); 374 return; 375 } 376 } 377 sc->components = realloc(sc->components, 378 sizeof(*sc->components) * (sc->n_components + 1), 379 M_GVIRSTOR, M_WAITOK); 380 381 nc = sc->n_components; 382 sc->components[nc].gcons = cp; 383 sc->components[nc].sc = sc; 384 sc->components[nc].index = nc; 385 sc->components[nc].chunk_count = cp->provider->mediasize / 386 sc->chunk_size; 387 sc->components[nc].chunk_next = 0; 388 sc->components[nc].chunk_reserved = 0; 389 390 if (sc->components[nc].chunk_count < 4) { 391 gctl_error(req, "Provider too small: %s", 392 cp->provider->name); 393 g_destroy_consumer(cp); 394 g_topology_unlock(); 395 return; 396 } 397 fill_metadata(sc, &md, nc, *hardcode); 398 write_metadata(cp, &md); 399 /* The new component becomes visible when n_components is 400 * incremented */ 401 sc->n_components++; 402 added++; 403 404 } 405 /* This call to update_metadata() is critical. In case there's a 406 * power failure in the middle of it and some components are updated 407 * while others are not, there will be trouble on next .taste() iff 408 * a non-updated component is detected first */ 409 update_metadata(sc); 410 g_topology_unlock(); 411 LOG_MSG(LVL_INFO, "Added %d component(s) to %s", added, 412 sc->geom->name); 413 /* Fire off BIOs previously queued because there wasn't any 414 * physical space left. If the BIOs still can't be satisfied 415 * they will again be added to the end of the queue (during 416 * which the mutex will be recursed) */ 417 bq = malloc(sizeof(*bq), M_GVIRSTOR, M_WAITOK); 418 bq->bio = NULL; 419 mtx_lock(&sc->delayed_bio_q_mtx); 420 /* First, insert a sentinel to the queue end, so we don't 421 * end up in an infinite loop if there's still no free 422 * space available. */ 423 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, bq, linkage); 424 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 425 bq = STAILQ_FIRST(&sc->delayed_bio_q); 426 if (bq->bio != NULL) { 427 g_virstor_start(bq->bio); 428 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 429 free(bq, M_GVIRSTOR); 430 } else { 431 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 432 free(bq, M_GVIRSTOR); 433 break; 434 } 435 } 436 mtx_unlock(&sc->delayed_bio_q_mtx); 437 438 } 439 440 /* 441 * Find a geom handled by the class 442 */ 443 static struct g_virstor_softc * 444 virstor_find_geom(const struct g_class *cp, const char *name) 445 { 446 struct g_geom *gp; 447 448 LIST_FOREACH(gp, &cp->geom, geom) { 449 if (strcmp(name, gp->name) == 0) 450 return (gp->softc); 451 } 452 return (NULL); 453 } 454 455 /* 456 * Update metadata on all components to reflect the current state 457 * of these fields: 458 * - chunk_next 459 * - flags 460 * - md_count 461 * Expects things to be set up so write_metadata() can work, i.e. 462 * the topology lock must be held. 463 */ 464 static void 465 update_metadata(struct g_virstor_softc *sc) 466 { 467 struct g_virstor_metadata md; 468 int n; 469 470 if (virstor_valid_components(sc) != sc->n_components) 471 return; /* Incomplete device */ 472 LOG_MSG(LVL_DEBUG, "Updating metadata on components for %s", 473 sc->geom->name); 474 /* Update metadata on components */ 475 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, 476 sc->geom->class->name, sc->geom->name); 477 g_topology_assert(); 478 for (n = 0; n < sc->n_components; n++) { 479 read_metadata(sc->components[n].gcons, &md); 480 md.chunk_next = sc->components[n].chunk_next; 481 md.flags = sc->components[n].flags; 482 md.md_count = sc->n_components; 483 write_metadata(sc->components[n].gcons, &md); 484 } 485 } 486 487 /* 488 * Fills metadata (struct md) from information stored in softc and the nc'th 489 * component of virstor 490 */ 491 static void 492 fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, 493 u_int nc, u_int hardcode) 494 { 495 struct g_virstor_component *c; 496 497 bzero(md, sizeof *md); 498 c = &sc->components[nc]; 499 500 strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic); 501 md->md_version = G_VIRSTOR_VERSION; 502 strncpy(md->md_name, sc->geom->name, sizeof md->md_name); 503 md->md_id = sc->id; 504 md->md_virsize = sc->virsize; 505 md->md_chunk_size = sc->chunk_size; 506 md->md_count = sc->n_components; 507 508 if (hardcode) { 509 strncpy(md->provider, c->gcons->provider->name, 510 sizeof md->provider); 511 } 512 md->no = nc; 513 md->provsize = c->gcons->provider->mediasize; 514 md->chunk_count = c->chunk_count; 515 md->chunk_next = c->chunk_next; 516 md->chunk_reserved = c->chunk_reserved; 517 md->flags = c->flags; 518 } 519 520 /* 521 * Remove a component from virstor device. 522 * Can only be done if the component is unallocated. 523 */ 524 static void 525 virstor_ctl_remove(struct gctl_req *req, struct g_class *cp) 526 { 527 /* As this is executed in parallel to I/O, operations on virstor 528 * structures must be as atomic as possible. */ 529 struct g_virstor_softc *sc; 530 int *nargs; 531 const char *geom_name; 532 u_int removed; 533 int i; 534 535 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 536 if (nargs == NULL) { 537 gctl_error(req, "Error fetching argument '%s'", "nargs"); 538 return; 539 } 540 if (*nargs < 2) { 541 gctl_error(req, "Invalid number of arguments"); 542 return; 543 } 544 /* Find "our" geom */ 545 geom_name = gctl_get_asciiparam(req, "arg0"); 546 if (geom_name == NULL) { 547 gctl_error(req, "Error fetching argument '%s'", 548 "geom_name (arg0)"); 549 return; 550 } 551 sc = virstor_find_geom(cp, geom_name); 552 if (sc == NULL) { 553 gctl_error(req, "Don't know anything about '%s'", geom_name); 554 return; 555 } 556 557 if (virstor_valid_components(sc) != sc->n_components) { 558 LOG_MSG(LVL_ERROR, "Cannot remove components from incomplete " 559 "virstor %s", sc->geom->name); 560 gctl_error(req, "Virstor %s is incomplete", sc->geom->name); 561 return; 562 } 563 564 removed = 0; 565 for (i = 1; i < *nargs; i++) { 566 char param[8]; 567 const char *prov_name; 568 int j, found; 569 struct g_virstor_component *newcomp, *compbak; 570 571 sprintf(param, "arg%d", i); 572 prov_name = gctl_get_asciiparam(req, param); 573 if (prov_name == NULL) { 574 gctl_error(req, "Error fetching argument '%s'", param); 575 return; 576 } 577 if (strncmp(prov_name, _PATH_DEV, strlen(_PATH_DEV)) == 0) 578 prov_name += strlen(_PATH_DEV); 579 580 found = -1; 581 for (j = 0; j < sc->n_components; j++) { 582 if (strcmp(sc->components[j].gcons->provider->name, 583 prov_name) == 0) { 584 found = j; 585 break; 586 } 587 } 588 if (found == -1) { 589 LOG_MSG(LVL_ERROR, "No %s component in %s", 590 prov_name, sc->geom->name); 591 continue; 592 } 593 594 compbak = sc->components; 595 newcomp = malloc(sc->n_components * sizeof(*sc->components), 596 M_GVIRSTOR, M_WAITOK | M_ZERO); 597 bcopy(sc->components, newcomp, found * sizeof(*sc->components)); 598 bcopy(&sc->components[found + 1], newcomp + found, 599 found * sizeof(*sc->components)); 600 if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) { 601 LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be " 602 "removed from %s", 603 prov_name, sc->geom->name); 604 free(newcomp, M_GVIRSTOR); 605 /* We'll consider this non-fatal error */ 606 continue; 607 } 608 /* Renumerate unallocated components */ 609 for (j = 0; j < sc->n_components-1; j++) { 610 if ((sc->components[j].flags & 611 VIRSTOR_PROVIDER_ALLOCATED) == 0) { 612 sc->components[j].index = j; 613 } 614 } 615 /* This is the critical section. If a component allocation 616 * event happens while both variables are not yet set, 617 * there will be trouble. Something will panic on encountering 618 * NULL sc->components[x].gcomp member. 619 * Luckily, component allocation happens very rarely and 620 * removing components is an abnormal action in any case. */ 621 sc->components = newcomp; 622 sc->n_components--; 623 /* End critical section */ 624 625 g_topology_lock(); 626 if (clear_metadata(&compbak[found]) != 0) { 627 LOG_MSG(LVL_WARNING, "Trouble ahead: cannot clear " 628 "metadata on %s", prov_name); 629 } 630 g_detach(compbak[found].gcons); 631 g_destroy_consumer(compbak[found].gcons); 632 g_topology_unlock(); 633 634 free(compbak, M_GVIRSTOR); 635 636 removed++; 637 } 638 639 /* This call to update_metadata() is critical. In case there's a 640 * power failure in the middle of it and some components are updated 641 * while others are not, there will be trouble on next .taste() iff 642 * a non-updated component is detected first */ 643 g_topology_lock(); 644 update_metadata(sc); 645 g_topology_unlock(); 646 LOG_MSG(LVL_INFO, "Removed %d component(s) from %s", removed, 647 sc->geom->name); 648 } 649 650 /* 651 * Clear metadata sector on component 652 */ 653 static int 654 clear_metadata(struct g_virstor_component *comp) 655 { 656 char *buf; 657 int error; 658 659 LOG_MSG(LVL_INFO, "Clearing metadata on %s", 660 comp->gcons->provider->name); 661 g_topology_assert(); 662 error = g_access(comp->gcons, 0, 1, 0); 663 if (error != 0) 664 return (error); 665 buf = malloc(comp->gcons->provider->sectorsize, M_GVIRSTOR, 666 M_WAITOK | M_ZERO); 667 error = g_write_data(comp->gcons, 668 comp->gcons->provider->mediasize - 669 comp->gcons->provider->sectorsize, 670 buf, 671 comp->gcons->provider->sectorsize); 672 free(buf, M_GVIRSTOR); 673 g_access(comp->gcons, 0, -1, 0); 674 return (error); 675 } 676 677 /* 678 * Destroy geom forcibly. 679 */ 680 static int 681 g_virstor_destroy_geom(struct gctl_req *req __unused, struct g_class *mp, 682 struct g_geom *gp) 683 { 684 struct g_virstor_softc *sc; 685 int exitval; 686 687 sc = gp->softc; 688 KASSERT(sc != NULL, ("%s: NULL sc", __func__)); 689 690 exitval = 0; 691 LOG_MSG(LVL_DEBUG, "%s called for %s, sc=%p", __func__, gp->name, 692 gp->softc); 693 694 if (sc != NULL) { 695 #ifdef INVARIANTS 696 char *buf; 697 int error; 698 off_t off; 699 int isclean, count; 700 int n; 701 702 LOG_MSG(LVL_INFO, "INVARIANTS detected"); 703 LOG_MSG(LVL_INFO, "Verifying allocation " 704 "table for %s", sc->geom->name); 705 count = 0; 706 for (n = 0; n < sc->chunk_count; n++) { 707 if (sc->map[n].flags || VIRSTOR_MAP_ALLOCATED != 0) 708 count++; 709 } 710 LOG_MSG(LVL_INFO, "Device %s has %d allocated chunks", 711 sc->geom->name, count); 712 n = off = count = 0; 713 isclean = 1; 714 if (virstor_valid_components(sc) != sc->n_components) { 715 /* This is a incomplete virstor device (not all 716 * components have been found) */ 717 LOG_MSG(LVL_ERROR, "Device %s is incomplete", 718 sc->geom->name); 719 goto bailout; 720 } 721 error = g_access(sc->components[0].gcons, 1, 0, 0); 722 KASSERT(error == 0, ("%s: g_access failed (%d)", __func__, 723 error)); 724 /* Compare the whole on-disk allocation table with what's 725 * currently in memory */ 726 while (n < sc->chunk_count) { 727 buf = g_read_data(sc->components[0].gcons, off, 728 sc->sectorsize, &error); 729 KASSERT(buf != NULL, ("g_read_data returned NULL (%d) " 730 "for read at %jd", error, off)); 731 if (bcmp(buf, &sc->map[n], sc->sectorsize) != 0) { 732 LOG_MSG(LVL_ERROR, "ERROR in allocation table, " 733 "entry %d, offset %jd", n, off); 734 isclean = 0; 735 count++; 736 } 737 n += sc->me_per_sector; 738 off += sc->sectorsize; 739 g_free(buf); 740 } 741 error = g_access(sc->components[0].gcons, -1, 0, 0); 742 KASSERT(error == 0, ("%s: g_access failed (%d) on exit", 743 __func__, error)); 744 if (isclean != 1) { 745 LOG_MSG(LVL_ERROR, "ALLOCATION TABLE CORRUPTED FOR %s " 746 "(%d sectors don't match, max %zu allocations)", 747 sc->geom->name, count, 748 count * sc->me_per_sector); 749 } else { 750 LOG_MSG(LVL_INFO, "Allocation table ok for %s", 751 sc->geom->name); 752 } 753 bailout: 754 #endif 755 update_metadata(sc); 756 virstor_geom_destroy(sc, FALSE, FALSE); 757 exitval = EAGAIN; 758 } else 759 exitval = 0; 760 return (exitval); 761 } 762 763 /* 764 * Taste event (per-class callback) 765 * Examines a provider and creates geom instances if needed 766 */ 767 static struct g_geom * 768 g_virstor_taste(struct g_class *mp, struct g_provider *pp, int flags) 769 { 770 struct g_virstor_metadata md; 771 struct g_geom *gp; 772 struct g_consumer *cp; 773 struct g_virstor_softc *sc; 774 int error; 775 776 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 777 g_topology_assert(); 778 LOG_MSG(LVL_DEBUG, "Tasting %s", pp->name); 779 780 /* We need a dummy geom to attach a consumer to the given provider */ 781 gp = g_new_geomf(mp, "virstor:taste.helper"); 782 gp->start = (void *)invalid_call; /* XXX: hacked up so the */ 783 gp->access = (void *)invalid_call; /* compiler doesn't complain. */ 784 gp->orphan = (void *)invalid_call; /* I really want these to fail. */ 785 786 cp = g_new_consumer(gp); 787 g_attach(cp, pp); 788 error = read_metadata(cp, &md); 789 g_detach(cp); 790 g_destroy_consumer(cp); 791 g_destroy_geom(gp); 792 793 if (error != 0) 794 return (NULL); 795 796 if (strcmp(md.md_magic, G_VIRSTOR_MAGIC) != 0) 797 return (NULL); 798 if (md.md_version != G_VIRSTOR_VERSION) { 799 LOG_MSG(LVL_ERROR, "Kernel module version invalid " 800 "to handle %s (%s) : %d should be %d", 801 md.md_name, pp->name, md.md_version, G_VIRSTOR_VERSION); 802 return (NULL); 803 } 804 if (md.provsize != pp->mediasize) 805 return (NULL); 806 807 /* If the provider name is hardcoded, use the offered provider only 808 * if it's been offered with its proper name (the one used in 809 * the label command). */ 810 if (md.provider[0] != '\0') { 811 if (strcmp(md.provider, pp->name) != 0) 812 return (NULL); 813 } 814 815 /* Iterate all geoms this class already knows about to see if a new 816 * geom instance of this class needs to be created (in case the provider 817 * is first from a (possibly) multi-consumer geom) or it just needs 818 * to be added to an existing instance. */ 819 sc = NULL; 820 gp = NULL; 821 LIST_FOREACH(gp, &mp->geom, geom) { 822 sc = gp->softc; 823 if (sc == NULL) 824 continue; 825 if (strcmp(md.md_name, sc->geom->name) != 0) 826 continue; 827 if (md.md_id != sc->id) 828 continue; 829 break; 830 } 831 if (gp != NULL) { /* We found an existing geom instance; add to it */ 832 LOG_MSG(LVL_INFO, "Adding %s to %s", pp->name, md.md_name); 833 error = add_provider_to_geom(sc, pp, &md); 834 if (error != 0) { 835 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 836 pp->name, md.md_name, error); 837 return (NULL); 838 } 839 } else { /* New geom instance needs to be created */ 840 gp = create_virstor_geom(mp, &md); 841 if (gp == NULL) { 842 LOG_MSG(LVL_ERROR, "Error creating new instance of " 843 "class %s: %s", mp->name, md.md_name); 844 LOG_MSG(LVL_DEBUG, "Error creating %s at %s", 845 md.md_name, pp->name); 846 return (NULL); 847 } 848 sc = gp->softc; 849 LOG_MSG(LVL_INFO, "Adding %s to %s (first found)", pp->name, 850 md.md_name); 851 error = add_provider_to_geom(sc, pp, &md); 852 if (error != 0) { 853 LOG_MSG(LVL_ERROR, "Error adding %s to %s (error %d)", 854 pp->name, md.md_name, error); 855 virstor_geom_destroy(sc, TRUE, FALSE); 856 return (NULL); 857 } 858 } 859 860 return (gp); 861 } 862 863 /* 864 * Destroyes consumer passed to it in arguments. Used as a callback 865 * on g_event queue. 866 */ 867 static void 868 delay_destroy_consumer(void *arg, int flags __unused) 869 { 870 struct g_consumer *c = arg; 871 KASSERT(c != NULL, ("%s: invalid consumer", __func__)); 872 LOG_MSG(LVL_DEBUG, "Consumer %s destroyed with delay", 873 c->provider->name); 874 g_detach(c); 875 g_destroy_consumer(c); 876 } 877 878 /* 879 * Remove a component (consumer) from geom instance; If it's the first 880 * component being removed, orphan the provider to announce geom's being 881 * dismantled 882 */ 883 static void 884 remove_component(struct g_virstor_softc *sc, struct g_virstor_component *comp, 885 boolean_t delay) 886 { 887 struct g_consumer *c; 888 889 KASSERT(comp->gcons != NULL, ("Component with no consumer in %s", 890 sc->geom->name)); 891 c = comp->gcons; 892 893 comp->gcons = NULL; 894 KASSERT(c->provider != NULL, ("%s: no provider", __func__)); 895 LOG_MSG(LVL_DEBUG, "Component %s removed from %s", c->provider->name, 896 sc->geom->name); 897 if (sc->provider != NULL) { 898 /* Whither, GEOM? */ 899 sc->provider->flags |= G_PF_WITHER; 900 g_orphan_provider(sc->provider, ENXIO); 901 sc->provider = NULL; 902 LOG_MSG(LVL_INFO, "Removing provider %s", sc->geom->name); 903 } 904 905 if (c->acr > 0 || c->acw > 0 || c->ace > 0) 906 g_access(c, -c->acr, -c->acw, -c->ace); 907 if (delay) { 908 /* Destroy consumer after it's tasted */ 909 g_post_event(delay_destroy_consumer, c, M_WAITOK, NULL); 910 } else { 911 g_detach(c); 912 g_destroy_consumer(c); 913 } 914 } 915 916 /* 917 * Destroy geom - called internally 918 * See g_virstor_destroy_geom for the other one 919 */ 920 static int 921 virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force, 922 boolean_t delay) 923 { 924 struct g_provider *pp; 925 struct g_geom *gp; 926 int n; 927 928 g_topology_assert(); 929 930 if (sc == NULL) 931 return (ENXIO); 932 933 pp = sc->provider; 934 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 935 LOG_MSG(force ? LVL_WARNING : LVL_ERROR, 936 "Device %s is still open.", pp->name); 937 if (!force) 938 return (EBUSY); 939 } 940 941 for (n = 0; n < sc->n_components; n++) { 942 if (sc->components[n].gcons != NULL) 943 remove_component(sc, &sc->components[n], delay); 944 } 945 946 gp = sc->geom; 947 gp->softc = NULL; 948 949 KASSERT(sc->provider == NULL, ("Provider still exists for %s", 950 gp->name)); 951 952 /* XXX: This might or might not work, since we're called with 953 * the topology lock held. Also, it might panic the kernel if 954 * the error'd BIO is in softupdates code. */ 955 mtx_lock(&sc->delayed_bio_q_mtx); 956 while (!STAILQ_EMPTY(&sc->delayed_bio_q)) { 957 struct g_virstor_bio_q *bq; 958 bq = STAILQ_FIRST(&sc->delayed_bio_q); 959 bq->bio->bio_error = ENOSPC; 960 g_io_deliver(bq->bio, EIO); 961 STAILQ_REMOVE_HEAD(&sc->delayed_bio_q, linkage); 962 free(bq, M_GVIRSTOR); 963 } 964 mtx_unlock(&sc->delayed_bio_q_mtx); 965 mtx_destroy(&sc->delayed_bio_q_mtx); 966 967 free(sc->map, M_GVIRSTOR); 968 free(sc->components, M_GVIRSTOR); 969 bzero(sc, sizeof *sc); 970 free(sc, M_GVIRSTOR); 971 972 pp = LIST_FIRST(&gp->provider); /* We only offer one provider */ 973 if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)) 974 LOG_MSG(LVL_DEBUG, "Device %s destroyed", gp->name); 975 976 g_wither_geom(gp, ENXIO); 977 978 return (0); 979 } 980 981 /* 982 * Utility function: read metadata & decode. Wants topology lock to be 983 * held. 984 */ 985 static int 986 read_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 987 { 988 struct g_provider *pp; 989 char *buf; 990 int error; 991 992 g_topology_assert(); 993 error = g_access(cp, 1, 0, 0); 994 if (error != 0) 995 return (error); 996 pp = cp->provider; 997 g_topology_unlock(); 998 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 999 &error); 1000 g_topology_lock(); 1001 g_access(cp, -1, 0, 0); 1002 if (buf == NULL) 1003 return (error); 1004 1005 virstor_metadata_decode(buf, md); 1006 g_free(buf); 1007 1008 return (0); 1009 } 1010 1011 /** 1012 * Utility function: encode & write metadata. Assumes topology lock is 1013 * held. 1014 * 1015 * There is no useful way of recovering from errors in this function, 1016 * not involving panicking the kernel. If the metadata cannot be written 1017 * the most we can do is notify the operator and hope he spots it and 1018 * replaces the broken drive. 1019 */ 1020 static void 1021 write_metadata(struct g_consumer *cp, struct g_virstor_metadata *md) 1022 { 1023 struct g_provider *pp; 1024 char *buf; 1025 int error; 1026 1027 KASSERT(cp != NULL && md != NULL && cp->provider != NULL, 1028 ("Something's fishy in %s", __func__)); 1029 LOG_MSG(LVL_DEBUG, "Writing metadata on %s", cp->provider->name); 1030 g_topology_assert(); 1031 error = g_access(cp, 0, 1, 0); 1032 if (error != 0) { 1033 LOG_MSG(LVL_ERROR, "g_access(0,1,0) failed for %s: %d", 1034 cp->provider->name, error); 1035 return; 1036 } 1037 pp = cp->provider; 1038 1039 buf = malloc(pp->sectorsize, M_GVIRSTOR, M_WAITOK); 1040 virstor_metadata_encode(md, buf); 1041 g_topology_unlock(); 1042 error = g_write_data(cp, pp->mediasize - pp->sectorsize, buf, 1043 pp->sectorsize); 1044 g_topology_lock(); 1045 g_access(cp, 0, -1, 0); 1046 free(buf, M_GVIRSTOR); 1047 1048 if (error != 0) 1049 LOG_MSG(LVL_ERROR, "Error %d writing metadata to %s", 1050 error, cp->provider->name); 1051 } 1052 1053 /* 1054 * Creates a new instance of this GEOM class, initialise softc 1055 */ 1056 static struct g_geom * 1057 create_virstor_geom(struct g_class *mp, struct g_virstor_metadata *md) 1058 { 1059 struct g_geom *gp; 1060 struct g_virstor_softc *sc; 1061 1062 LOG_MSG(LVL_DEBUG, "Creating geom instance for %s (id=%u)", 1063 md->md_name, md->md_id); 1064 1065 if (md->md_count < 1 || md->md_chunk_size < 1 || 1066 md->md_virsize < md->md_chunk_size) { 1067 /* This is bogus configuration, and probably means data is 1068 * somehow corrupted. Panic, maybe? */ 1069 LOG_MSG(LVL_ERROR, "Nonsensical metadata information for %s", 1070 md->md_name); 1071 return (NULL); 1072 } 1073 1074 /* Check if it's already created */ 1075 LIST_FOREACH(gp, &mp->geom, geom) { 1076 sc = gp->softc; 1077 if (sc != NULL && strcmp(sc->geom->name, md->md_name) == 0) { 1078 LOG_MSG(LVL_WARNING, "Geom %s already exists", 1079 md->md_name); 1080 if (sc->id != md->md_id) { 1081 LOG_MSG(LVL_ERROR, 1082 "Some stale or invalid components " 1083 "exist for virstor device named %s. " 1084 "You will need to <CLEAR> all stale " 1085 "components and maybe reconfigure " 1086 "the virstor device. Tune " 1087 "kern.geom.virstor.debug sysctl up " 1088 "for more information.", 1089 sc->geom->name); 1090 } 1091 return (NULL); 1092 } 1093 } 1094 gp = g_new_geomf(mp, "%s", md->md_name); 1095 gp->softc = NULL; /* to circumevent races that test softc */ 1096 1097 gp->start = g_virstor_start; 1098 gp->spoiled = g_virstor_orphan; 1099 gp->orphan = g_virstor_orphan; 1100 gp->access = g_virstor_access; 1101 gp->dumpconf = g_virstor_dumpconf; 1102 1103 sc = malloc(sizeof(*sc), M_GVIRSTOR, M_WAITOK | M_ZERO); 1104 sc->id = md->md_id; 1105 sc->n_components = md->md_count; 1106 sc->components = malloc(sizeof(struct g_virstor_component) * md->md_count, 1107 M_GVIRSTOR, M_WAITOK | M_ZERO); 1108 sc->chunk_size = md->md_chunk_size; 1109 sc->virsize = md->md_virsize; 1110 STAILQ_INIT(&sc->delayed_bio_q); 1111 mtx_init(&sc->delayed_bio_q_mtx, "gvirstor_delayed_bio_q_mtx", 1112 "gvirstor", MTX_DEF | MTX_RECURSE); 1113 1114 sc->geom = gp; 1115 sc->provider = NULL; /* virstor_check_and_run will create it */ 1116 gp->softc = sc; 1117 1118 LOG_MSG(LVL_ANNOUNCE, "Device %s created", sc->geom->name); 1119 1120 return (gp); 1121 } 1122 1123 /* 1124 * Add provider to a GEOM class instance 1125 */ 1126 static int 1127 add_provider_to_geom(struct g_virstor_softc *sc, struct g_provider *pp, 1128 struct g_virstor_metadata *md) 1129 { 1130 struct g_virstor_component *component; 1131 struct g_consumer *cp, *fcp; 1132 struct g_geom *gp; 1133 int error; 1134 1135 if (md->no >= sc->n_components) 1136 return (EINVAL); 1137 1138 /* "Current" compontent */ 1139 component = &(sc->components[md->no]); 1140 if (component->gcons != NULL) 1141 return (EEXIST); 1142 1143 gp = sc->geom; 1144 fcp = LIST_FIRST(&gp->consumer); 1145 1146 cp = g_new_consumer(gp); 1147 error = g_attach(cp, pp); 1148 1149 if (error != 0) { 1150 g_destroy_consumer(cp); 1151 return (error); 1152 } 1153 1154 if (fcp != NULL) { 1155 if (fcp->provider->sectorsize != pp->sectorsize) { 1156 /* TODO: this can be made to work */ 1157 LOG_MSG(LVL_ERROR, "Provider %s of %s has invalid " 1158 "sector size (%d)", pp->name, sc->geom->name, 1159 pp->sectorsize); 1160 return (EINVAL); 1161 } 1162 if (fcp->acr > 0 || fcp->acw || fcp->ace > 0) { 1163 /* Replicate access permissions from first "live" consumer 1164 * to the new one */ 1165 error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); 1166 if (error != 0) { 1167 g_detach(cp); 1168 g_destroy_consumer(cp); 1169 return (error); 1170 } 1171 } 1172 } 1173 1174 /* Bring up a new component */ 1175 cp->private = component; 1176 component->gcons = cp; 1177 component->sc = sc; 1178 component->index = md->no; 1179 component->chunk_count = md->chunk_count; 1180 component->chunk_next = md->chunk_next; 1181 component->chunk_reserved = md->chunk_reserved; 1182 component->flags = md->flags; 1183 1184 LOG_MSG(LVL_DEBUG, "%s attached to %s", pp->name, sc->geom->name); 1185 1186 virstor_check_and_run(sc); 1187 return (0); 1188 } 1189 1190 /* 1191 * Check if everything's ready to create the geom provider & device entry, 1192 * create and start provider. 1193 * Called ultimately by .taste, from g_event thread 1194 */ 1195 static void 1196 virstor_check_and_run(struct g_virstor_softc *sc) 1197 { 1198 off_t off; 1199 size_t n, count; 1200 int index; 1201 int error; 1202 1203 if (virstor_valid_components(sc) != sc->n_components) 1204 return; 1205 1206 if (virstor_valid_components(sc) == 0) { 1207 /* This is actually a candidate for panic() */ 1208 LOG_MSG(LVL_ERROR, "No valid components for %s?", 1209 sc->provider->name); 1210 return; 1211 } 1212 1213 sc->sectorsize = sc->components[0].gcons->provider->sectorsize; 1214 1215 /* Initialise allocation map from the first consumer */ 1216 sc->chunk_count = sc->virsize / sc->chunk_size; 1217 if (sc->chunk_count * (off_t)sc->chunk_size != sc->virsize) { 1218 LOG_MSG(LVL_WARNING, "Device %s truncated to %ju bytes", 1219 sc->provider->name, 1220 sc->chunk_count * (off_t)sc->chunk_size); 1221 } 1222 sc->map_size = sc->chunk_count * sizeof *(sc->map); 1223 /* The following allocation is in order of 4MB - 8MB */ 1224 sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK); 1225 KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s", 1226 __func__, sc->map_size, sc->provider->name)); 1227 sc->map_sectors = sc->map_size / sc->sectorsize; 1228 1229 count = 0; 1230 for (n = 0; n < sc->n_components; n++) 1231 count += sc->components[n].chunk_count; 1232 LOG_MSG(LVL_INFO, "Device %s has %zu physical chunks and %zu virtual " 1233 "(%zu KB chunks)", 1234 sc->geom->name, count, sc->chunk_count, sc->chunk_size / 1024); 1235 1236 error = g_access(sc->components[0].gcons, 1, 0, 0); 1237 if (error != 0) { 1238 LOG_MSG(LVL_ERROR, "Cannot acquire read access for %s to " 1239 "read allocation map for %s", 1240 sc->components[0].gcons->provider->name, 1241 sc->geom->name); 1242 return; 1243 } 1244 /* Read in the allocation map */ 1245 LOG_MSG(LVL_DEBUG, "Reading map for %s from %s", sc->geom->name, 1246 sc->components[0].gcons->provider->name); 1247 off = count = n = 0; 1248 while (count < sc->map_size) { 1249 struct g_virstor_map_entry *mapbuf; 1250 size_t bs; 1251 1252 bs = MIN(MAXPHYS, sc->map_size - count); 1253 if (bs % sc->sectorsize != 0) { 1254 /* Check for alignment errors */ 1255 bs = (bs / sc->sectorsize) * sc->sectorsize; 1256 if (bs == 0) 1257 break; 1258 LOG_MSG(LVL_ERROR, "Trouble: map is not sector-aligned " 1259 "for %s on %s", sc->geom->name, 1260 sc->components[0].gcons->provider->name); 1261 } 1262 mapbuf = g_read_data(sc->components[0].gcons, off, bs, &error); 1263 if (mapbuf == NULL) { 1264 free(sc->map, M_GVIRSTOR); 1265 LOG_MSG(LVL_ERROR, "Error reading allocation map " 1266 "for %s from %s (offset %ju) (error %d)", 1267 sc->geom->name, 1268 sc->components[0].gcons->provider->name, 1269 off, error); 1270 return; 1271 } 1272 1273 bcopy(mapbuf, &sc->map[n], bs); 1274 off += bs; 1275 count += bs; 1276 n += bs / sizeof *(sc->map); 1277 g_free(mapbuf); 1278 } 1279 g_access(sc->components[0].gcons, -1, 0, 0); 1280 LOG_MSG(LVL_DEBUG, "Read map for %s", sc->geom->name); 1281 1282 /* find first component with allocatable chunks */ 1283 index = -1; 1284 for (n = 0; n < sc->n_components; n++) { 1285 if (sc->components[n].chunk_next < 1286 sc->components[n].chunk_count) { 1287 index = n; 1288 break; 1289 } 1290 } 1291 if (index == -1) 1292 /* not found? set it to the last component and handle it 1293 * later */ 1294 index = sc->n_components - 1; 1295 1296 if (index >= sc->n_components - g_virstor_component_watermark - 1) { 1297 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1298 "(%d/%u: %s)", sc->geom->name, 1299 index+1, 1300 sc->n_components, 1301 sc->components[index].gcons->provider->name); 1302 } 1303 sc->curr_component = index; 1304 1305 if (sc->components[index].chunk_next >= 1306 sc->components[index].chunk_count - g_virstor_chunk_watermark) { 1307 LOG_MSG(LVL_WARNING, 1308 "Component %s of %s is running out of free space " 1309 "(%u chunks left)", 1310 sc->components[index].gcons->provider->name, 1311 sc->geom->name, sc->components[index].chunk_count - 1312 sc->components[index].chunk_next); 1313 } 1314 1315 sc->me_per_sector = sc->sectorsize / sizeof *(sc->map); 1316 if (sc->sectorsize % sizeof *(sc->map) != 0) { 1317 LOG_MSG(LVL_ERROR, 1318 "%s: Map entries don't fit exactly in a sector (%s)", 1319 __func__, sc->geom->name); 1320 return; 1321 } 1322 1323 /* Recalculate allocated chunks in components & at the same time 1324 * verify map data is sane. We could trust metadata on this, but 1325 * we want to make sure. */ 1326 for (n = 0; n < sc->n_components; n++) 1327 sc->components[n].chunk_next = sc->components[n].chunk_reserved; 1328 1329 for (n = 0; n < sc->chunk_count; n++) { 1330 if (sc->map[n].provider_no >= sc->n_components || 1331 sc->map[n].provider_chunk >= 1332 sc->components[sc->map[n].provider_no].chunk_count) { 1333 LOG_MSG(LVL_ERROR, "%s: Invalid entry %u in map for %s", 1334 __func__, (u_int)n, sc->geom->name); 1335 LOG_MSG(LVL_ERROR, "%s: provider_no: %u, n_components: %u" 1336 " provider_chunk: %u, chunk_count: %u", __func__, 1337 sc->map[n].provider_no, sc->n_components, 1338 sc->map[n].provider_chunk, 1339 sc->components[sc->map[n].provider_no].chunk_count); 1340 return; 1341 } 1342 if (sc->map[n].flags & VIRSTOR_MAP_ALLOCATED) 1343 sc->components[sc->map[n].provider_no].chunk_next++; 1344 } 1345 1346 sc->provider = g_new_providerf(sc->geom, "virstor/%s", 1347 sc->geom->name); 1348 1349 sc->provider->sectorsize = sc->sectorsize; 1350 sc->provider->mediasize = sc->virsize; 1351 g_error_provider(sc->provider, 0); 1352 1353 LOG_MSG(LVL_INFO, "%s activated", sc->provider->name); 1354 LOG_MSG(LVL_DEBUG, "%s starting with current component %u, starting " 1355 "chunk %u", sc->provider->name, sc->curr_component, 1356 sc->components[sc->curr_component].chunk_next); 1357 } 1358 1359 /* 1360 * Returns count of active providers in this geom instance 1361 */ 1362 static u_int 1363 virstor_valid_components(struct g_virstor_softc *sc) 1364 { 1365 unsigned int nc, i; 1366 1367 nc = 0; 1368 KASSERT(sc != NULL, ("%s: softc is NULL", __func__)); 1369 KASSERT(sc->components != NULL, ("%s: sc->components is NULL", __func__)); 1370 for (i = 0; i < sc->n_components; i++) 1371 if (sc->components[i].gcons != NULL) 1372 nc++; 1373 return (nc); 1374 } 1375 1376 /* 1377 * Called when the consumer gets orphaned (?) 1378 */ 1379 static void 1380 g_virstor_orphan(struct g_consumer *cp) 1381 { 1382 struct g_virstor_softc *sc; 1383 struct g_virstor_component *comp; 1384 struct g_geom *gp; 1385 1386 g_topology_assert(); 1387 gp = cp->geom; 1388 sc = gp->softc; 1389 if (sc == NULL) 1390 return; 1391 1392 comp = cp->private; 1393 KASSERT(comp != NULL, ("%s: No component in private part of consumer", 1394 __func__)); 1395 remove_component(sc, comp, FALSE); 1396 if (virstor_valid_components(sc) == 0) 1397 virstor_geom_destroy(sc, TRUE, FALSE); 1398 } 1399 1400 /* 1401 * Called to notify geom when it's been opened, and for what intent 1402 */ 1403 static int 1404 g_virstor_access(struct g_provider *pp, int dr, int dw, int de) 1405 { 1406 struct g_consumer *c; 1407 struct g_virstor_softc *sc; 1408 struct g_geom *gp; 1409 int error; 1410 1411 KASSERT(pp != NULL, ("%s: NULL provider", __func__)); 1412 gp = pp->geom; 1413 KASSERT(gp != NULL, ("%s: NULL geom", __func__)); 1414 sc = gp->softc; 1415 1416 if (sc == NULL) { 1417 /* It seems that .access can be called with negative dr,dw,dx 1418 * in this case but I want to check for myself */ 1419 LOG_MSG(LVL_WARNING, "access(%d, %d, %d) for %s", 1420 dr, dw, de, pp->name); 1421 /* This should only happen when geom is withered so 1422 * allow only negative requests */ 1423 KASSERT(dr <= 0 && dw <= 0 && de <= 0, 1424 ("%s: Positive access for %s", __func__, pp->name)); 1425 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) 1426 LOG_MSG(LVL_DEBUG, "Device %s definitely destroyed", 1427 pp->name); 1428 return (0); 1429 } 1430 1431 /* Grab an exclusive bit to propagate on our consumers on first open */ 1432 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 1433 de++; 1434 /* ... drop it on close */ 1435 if (pp->acr + dr == 0 && pp->acw + dw == 0 && pp->ace + de == 0) { 1436 de--; 1437 update_metadata(sc); /* Writes statistical information */ 1438 } 1439 1440 error = ENXIO; 1441 LIST_FOREACH(c, &gp->consumer, consumer) { 1442 KASSERT(c != NULL, ("%s: consumer is NULL", __func__)); 1443 error = g_access(c, dr, dw, de); 1444 if (error != 0) { 1445 struct g_consumer *c2; 1446 1447 /* Backout earlier changes */ 1448 LIST_FOREACH(c2, &gp->consumer, consumer) { 1449 if (c2 == c) /* all eariler components fixed */ 1450 return (error); 1451 g_access(c2, -dr, -dw, -de); 1452 } 1453 } 1454 } 1455 1456 return (error); 1457 } 1458 1459 /* 1460 * Generate XML dump of current state 1461 */ 1462 static void 1463 g_virstor_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 1464 struct g_consumer *cp, struct g_provider *pp) 1465 { 1466 struct g_virstor_softc *sc; 1467 1468 g_topology_assert(); 1469 sc = gp->softc; 1470 1471 if (sc == NULL || pp != NULL) 1472 return; 1473 1474 if (cp != NULL) { 1475 /* For each component */ 1476 struct g_virstor_component *comp; 1477 1478 comp = cp->private; 1479 if (comp == NULL) 1480 return; 1481 sbuf_printf(sb, "%s<ComponentIndex>%u</ComponentIndex>\n", 1482 indent, comp->index); 1483 sbuf_printf(sb, "%s<ChunkCount>%u</ChunkCount>\n", 1484 indent, comp->chunk_count); 1485 sbuf_printf(sb, "%s<ChunksUsed>%u</ChunksUsed>\n", 1486 indent, comp->chunk_next); 1487 sbuf_printf(sb, "%s<ChunksReserved>%u</ChunksReserved>\n", 1488 indent, comp->chunk_reserved); 1489 sbuf_printf(sb, "%s<StorageFree>%u%%</StorageFree>\n", 1490 indent, 1491 comp->chunk_next > 0 ? 100 - 1492 ((comp->chunk_next + comp->chunk_reserved) * 100) / 1493 comp->chunk_count : 100); 1494 } else { 1495 /* For the whole thing */ 1496 u_int count, used, i; 1497 off_t size; 1498 1499 count = used = size = 0; 1500 for (i = 0; i < sc->n_components; i++) { 1501 if (sc->components[i].gcons != NULL) { 1502 count += sc->components[i].chunk_count; 1503 used += sc->components[i].chunk_next + 1504 sc->components[i].chunk_reserved; 1505 size += sc->components[i].gcons-> 1506 provider->mediasize; 1507 } 1508 } 1509 1510 sbuf_printf(sb, "%s<Status>" 1511 "Components=%u, Online=%u</Status>\n", indent, 1512 sc->n_components, virstor_valid_components(sc)); 1513 sbuf_printf(sb, "%s<State>%u%% physical free</State>\n", 1514 indent, 100-(used * 100) / count); 1515 sbuf_printf(sb, "%s<ChunkSize>%zu</ChunkSize>\n", indent, 1516 sc->chunk_size); 1517 sbuf_printf(sb, "%s<PhysicalFree>%u%%</PhysicalFree>\n", 1518 indent, used > 0 ? 100 - (used * 100) / count : 100); 1519 sbuf_printf(sb, "%s<ChunkPhysicalCount>%u</ChunkPhysicalCount>\n", 1520 indent, count); 1521 sbuf_printf(sb, "%s<ChunkVirtualCount>%zu</ChunkVirtualCount>\n", 1522 indent, sc->chunk_count); 1523 sbuf_printf(sb, "%s<PhysicalBacking>%zu%%</PhysicalBacking>\n", 1524 indent, 1525 (count * 100) / sc->chunk_count); 1526 sbuf_printf(sb, "%s<PhysicalBackingSize>%jd</PhysicalBackingSize>\n", 1527 indent, size); 1528 sbuf_printf(sb, "%s<VirtualSize>%jd</VirtualSize>\n", indent, 1529 sc->virsize); 1530 } 1531 } 1532 1533 /* 1534 * GEOM .done handler 1535 * Can't use standard handler because one requested IO may 1536 * fork into additional data IOs 1537 */ 1538 static void 1539 g_virstor_done(struct bio *b) 1540 { 1541 struct g_virstor_softc *sc; 1542 struct bio *parent_b; 1543 1544 parent_b = b->bio_parent; 1545 sc = parent_b->bio_to->geom->softc; 1546 1547 if (b->bio_error != 0) { 1548 LOG_MSG(LVL_ERROR, "Error %d for offset=%ju, length=%ju, %s", 1549 b->bio_error, b->bio_offset, b->bio_length, 1550 b->bio_to->name); 1551 if (parent_b->bio_error == 0) 1552 parent_b->bio_error = b->bio_error; 1553 } 1554 1555 parent_b->bio_inbed++; 1556 parent_b->bio_completed += b->bio_completed; 1557 1558 if (parent_b->bio_children == parent_b->bio_inbed) { 1559 parent_b->bio_completed = parent_b->bio_length; 1560 g_io_deliver(parent_b, parent_b->bio_error); 1561 } 1562 g_destroy_bio(b); 1563 } 1564 1565 /* 1566 * I/O starts here 1567 * Called in g_down thread 1568 */ 1569 static void 1570 g_virstor_start(struct bio *b) 1571 { 1572 struct g_virstor_softc *sc; 1573 struct g_virstor_component *comp; 1574 struct bio *cb; 1575 struct g_provider *pp; 1576 char *addr; 1577 off_t offset, length; 1578 struct bio_queue_head bq; 1579 size_t chunk_size; /* cached for convenience */ 1580 u_int count; 1581 1582 pp = b->bio_to; 1583 sc = pp->geom->softc; 1584 KASSERT(sc != NULL, ("%s: no softc (error=%d, device=%s)", __func__, 1585 b->bio_to->error, b->bio_to->name)); 1586 1587 LOG_REQ(LVL_MOREDEBUG, b, "%s", __func__); 1588 1589 switch (b->bio_cmd) { 1590 case BIO_READ: 1591 case BIO_WRITE: 1592 case BIO_DELETE: 1593 break; 1594 default: 1595 g_io_deliver(b, EOPNOTSUPP); 1596 return; 1597 } 1598 1599 LOG_MSG(LVL_DEBUG2, "BIO arrived, size=%ju", b->bio_length); 1600 bioq_init(&bq); 1601 1602 chunk_size = sc->chunk_size; 1603 addr = b->bio_data; 1604 offset = b->bio_offset; /* virtual offset and length */ 1605 length = b->bio_length; 1606 1607 while (length > 0) { 1608 size_t chunk_index, in_chunk_offset, in_chunk_length; 1609 struct virstor_map_entry *me; 1610 1611 chunk_index = offset / chunk_size; /* round downwards */ 1612 in_chunk_offset = offset % chunk_size; 1613 in_chunk_length = min(length, chunk_size - in_chunk_offset); 1614 LOG_MSG(LVL_DEBUG, "Mapped %s(%ju, %ju) to (%zu,%zu,%zu)", 1615 b->bio_cmd == BIO_READ ? "R" : "W", 1616 offset, length, 1617 chunk_index, in_chunk_offset, in_chunk_length); 1618 me = &sc->map[chunk_index]; 1619 1620 if (b->bio_cmd == BIO_READ || b->bio_cmd == BIO_DELETE) { 1621 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1622 /* Reads from unallocated chunks return zeroed 1623 * buffers */ 1624 if (b->bio_cmd == BIO_READ) 1625 bzero(addr, in_chunk_length); 1626 } else { 1627 comp = &sc->components[me->provider_no]; 1628 1629 cb = g_clone_bio(b); 1630 if (cb == NULL) { 1631 bioq_dismantle(&bq); 1632 if (b->bio_error == 0) 1633 b->bio_error = ENOMEM; 1634 g_io_deliver(b, b->bio_error); 1635 return; 1636 } 1637 cb->bio_to = comp->gcons->provider; 1638 cb->bio_done = g_virstor_done; 1639 cb->bio_offset = 1640 (off_t)me->provider_chunk * (off_t)chunk_size 1641 + in_chunk_offset; 1642 cb->bio_length = in_chunk_length; 1643 cb->bio_data = addr; 1644 cb->bio_caller1 = comp; 1645 bioq_disksort(&bq, cb); 1646 } 1647 } else { /* handle BIO_WRITE */ 1648 KASSERT(b->bio_cmd == BIO_WRITE, 1649 ("%s: Unknown command %d", __func__, 1650 b->bio_cmd)); 1651 1652 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) { 1653 /* We have a virtual chunk, represented by 1654 * the "me" entry, but it's not yet allocated 1655 * (tied to) a physical chunk. So do it now. */ 1656 struct virstor_map_entry *data_me; 1657 u_int phys_chunk, comp_no; 1658 off_t s_offset; 1659 int error; 1660 1661 error = allocate_chunk(sc, &comp, &comp_no, 1662 &phys_chunk); 1663 if (error != 0) { 1664 /* We cannot allocate a physical chunk 1665 * to satisfy this request, so we'll 1666 * delay it to when we can... 1667 * XXX: this will prevent the fs from 1668 * being umounted! */ 1669 struct g_virstor_bio_q *biq; 1670 biq = malloc(sizeof *biq, M_GVIRSTOR, 1671 M_NOWAIT); 1672 if (biq == NULL) { 1673 bioq_dismantle(&bq); 1674 if (b->bio_error == 0) 1675 b->bio_error = ENOMEM; 1676 g_io_deliver(b, b->bio_error); 1677 return; 1678 } 1679 biq->bio = b; 1680 mtx_lock(&sc->delayed_bio_q_mtx); 1681 STAILQ_INSERT_TAIL(&sc->delayed_bio_q, 1682 biq, linkage); 1683 mtx_unlock(&sc->delayed_bio_q_mtx); 1684 LOG_MSG(LVL_WARNING, "Delaying BIO " 1685 "(size=%ju) until free physical " 1686 "space can be found on %s", 1687 b->bio_length, 1688 sc->provider->name); 1689 return; 1690 } 1691 LOG_MSG(LVL_DEBUG, "Allocated chunk %u on %s " 1692 "for %s", 1693 phys_chunk, 1694 comp->gcons->provider->name, 1695 sc->provider->name); 1696 1697 me->provider_no = comp_no; 1698 me->provider_chunk = phys_chunk; 1699 me->flags |= VIRSTOR_MAP_ALLOCATED; 1700 1701 cb = g_clone_bio(b); 1702 if (cb == NULL) { 1703 me->flags &= ~VIRSTOR_MAP_ALLOCATED; 1704 me->provider_no = 0; 1705 me->provider_chunk = 0; 1706 bioq_dismantle(&bq); 1707 if (b->bio_error == 0) 1708 b->bio_error = ENOMEM; 1709 g_io_deliver(b, b->bio_error); 1710 return; 1711 } 1712 1713 /* The allocation table is stored continuously 1714 * at the start of the drive. We need to 1715 * calculate the offset of the sector that holds 1716 * this map entry both on the drive and in the 1717 * map array. 1718 * sc_offset will end up pointing to the drive 1719 * sector. */ 1720 s_offset = chunk_index * sizeof *me; 1721 s_offset = (s_offset / sc->sectorsize) * 1722 sc->sectorsize; 1723 1724 /* data_me points to map entry sector 1725 * in memory (analoguos to offset) */ 1726 data_me = &sc->map[(chunk_index / 1727 sc->me_per_sector) * sc->me_per_sector]; 1728 1729 /* Commit sector with map entry to storage */ 1730 cb->bio_to = sc->components[0].gcons->provider; 1731 cb->bio_done = g_virstor_done; 1732 cb->bio_offset = s_offset; 1733 cb->bio_data = (char *)data_me; 1734 cb->bio_length = sc->sectorsize; 1735 cb->bio_caller1 = &sc->components[0]; 1736 bioq_disksort(&bq, cb); 1737 } 1738 1739 comp = &sc->components[me->provider_no]; 1740 cb = g_clone_bio(b); 1741 if (cb == NULL) { 1742 bioq_dismantle(&bq); 1743 if (b->bio_error == 0) 1744 b->bio_error = ENOMEM; 1745 g_io_deliver(b, b->bio_error); 1746 return; 1747 } 1748 /* Finally, handle the data */ 1749 cb->bio_to = comp->gcons->provider; 1750 cb->bio_done = g_virstor_done; 1751 cb->bio_offset = (off_t)me->provider_chunk*(off_t)chunk_size + 1752 in_chunk_offset; 1753 cb->bio_length = in_chunk_length; 1754 cb->bio_data = addr; 1755 cb->bio_caller1 = comp; 1756 bioq_disksort(&bq, cb); 1757 } 1758 addr += in_chunk_length; 1759 length -= in_chunk_length; 1760 offset += in_chunk_length; 1761 } 1762 1763 /* Fire off bio's here */ 1764 count = 0; 1765 for (cb = bioq_first(&bq); cb != NULL; cb = bioq_first(&bq)) { 1766 bioq_remove(&bq, cb); 1767 LOG_REQ(LVL_MOREDEBUG, cb, "Firing request"); 1768 comp = cb->bio_caller1; 1769 cb->bio_caller1 = NULL; 1770 LOG_MSG(LVL_DEBUG, " firing bio, offset=%ju, length=%ju", 1771 cb->bio_offset, cb->bio_length); 1772 g_io_request(cb, comp->gcons); 1773 count++; 1774 } 1775 if (count == 0) { /* We handled everything locally */ 1776 b->bio_completed = b->bio_length; 1777 g_io_deliver(b, 0); 1778 } 1779 1780 } 1781 1782 /* 1783 * Allocate a chunk from a physical provider. Returns physical component, 1784 * chunk index relative to the component and the component's index. 1785 */ 1786 static int 1787 allocate_chunk(struct g_virstor_softc *sc, struct g_virstor_component **comp, 1788 u_int *comp_no_p, u_int *chunk) 1789 { 1790 u_int comp_no; 1791 1792 KASSERT(sc->curr_component < sc->n_components, 1793 ("%s: Invalid curr_component: %u", __func__, sc->curr_component)); 1794 1795 comp_no = sc->curr_component; 1796 *comp = &sc->components[comp_no]; 1797 dump_component(*comp); 1798 if ((*comp)->chunk_next >= (*comp)->chunk_count) { 1799 /* This component is full. Allocate next component */ 1800 if (comp_no >= sc->n_components-1) { 1801 LOG_MSG(LVL_ERROR, "All physical space allocated for %s", 1802 sc->geom->name); 1803 return (-1); 1804 } 1805 (*comp)->flags &= ~VIRSTOR_PROVIDER_CURRENT; 1806 sc->curr_component = ++comp_no; 1807 1808 *comp = &sc->components[comp_no]; 1809 if (comp_no >= sc->n_components - g_virstor_component_watermark-1) 1810 LOG_MSG(LVL_WARNING, "Device %s running out of components " 1811 "(switching to %u/%u: %s)", sc->geom->name, 1812 comp_no+1, sc->n_components, 1813 (*comp)->gcons->provider->name); 1814 /* Take care not to overwrite reserved chunks */ 1815 if ( (*comp)->chunk_reserved > 0 && 1816 (*comp)->chunk_next < (*comp)->chunk_reserved) 1817 (*comp)->chunk_next = (*comp)->chunk_reserved; 1818 1819 (*comp)->flags |= 1820 VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT; 1821 dump_component(*comp); 1822 *comp_no_p = comp_no; 1823 *chunk = (*comp)->chunk_next++; 1824 } else { 1825 *comp_no_p = comp_no; 1826 *chunk = (*comp)->chunk_next++; 1827 } 1828 return (0); 1829 } 1830 1831 /* Dump a component */ 1832 static void 1833 dump_component(struct g_virstor_component *comp) 1834 { 1835 1836 if (g_virstor_debug < LVL_DEBUG2) 1837 return; 1838 printf("Component %d: %s\n", comp->index, comp->gcons->provider->name); 1839 printf(" chunk_count: %u\n", comp->chunk_count); 1840 printf(" chunk_next: %u\n", comp->chunk_next); 1841 printf(" flags: %u\n", comp->flags); 1842 } 1843 1844 #if 0 1845 /* Dump a map entry */ 1846 static void 1847 dump_me(struct virstor_map_entry *me, unsigned int nr) 1848 { 1849 if (g_virstor_debug < LVL_DEBUG) 1850 return; 1851 printf("VIRT. CHUNK #%d: ", nr); 1852 if ((me->flags & VIRSTOR_MAP_ALLOCATED) == 0) 1853 printf("(unallocated)\n"); 1854 else 1855 printf("allocated at provider %u, provider_chunk %u\n", 1856 me->provider_no, me->provider_chunk); 1857 } 1858 #endif 1859 1860 /* 1861 * Dismantle bio_queue and destroy its components 1862 */ 1863 static void 1864 bioq_dismantle(struct bio_queue_head *bq) 1865 { 1866 struct bio *b; 1867 1868 for (b = bioq_first(bq); b != NULL; b = bioq_first(bq)) { 1869 bioq_remove(bq, b); 1870 g_destroy_bio(b); 1871 } 1872 } 1873 1874 /* 1875 * The function that shouldn't be called. 1876 * When this is called, the stack is already garbled because of 1877 * argument mismatch. There's nothing to do now but panic, which is 1878 * accidentally the whole purpose of this function. 1879 * Motivation: to guard from accidentally calling geom methods when 1880 * they shouldn't be called. (see g_..._taste) 1881 */ 1882 static void 1883 invalid_call(void) 1884 { 1885 panic("invalid_call() has just been called. Something's fishy here."); 1886 } 1887 1888 DECLARE_GEOM_CLASS(g_virstor_class, g_virstor); /* Let there be light */ 1889