1 /*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD$ 36 */ 37 38 39 #include <sys/param.h> 40 #ifndef _KERNEL 41 #include <stdio.h> 42 #include <unistd.h> 43 #include <stdlib.h> 44 #include <signal.h> 45 #include <string.h> 46 #include <err.h> 47 #else 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/malloc.h> 51 #include <sys/bio.h> 52 #include <sys/sysctl.h> 53 #include <sys/proc.h> 54 #include <sys/kthread.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #endif 58 #include <sys/errno.h> 59 #include <sys/sbuf.h> 60 #include <geom/geom.h> 61 #include <machine/stdarg.h> 62 63 struct class_list_head g_classs = LIST_HEAD_INITIALIZER(g_classs); 64 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); 65 static int g_nproviders; 66 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; 67 68 static int g_ignition; 69 70 void 71 g_add_class(struct g_class *mp) 72 { 73 74 if (!g_ignition) { 75 g_ignition++; 76 g_init(); 77 } 78 g_topology_lock(); 79 g_trace(G_T_TOPOLOGY, "g_add_class(%s)", mp->name); 80 LIST_INIT(&mp->geom); 81 LIST_INSERT_HEAD(&g_classs, mp, class); 82 if (g_nproviders > 0) 83 g_post_event(EV_NEW_CLASS, mp, NULL, NULL, NULL); 84 g_topology_unlock(); 85 } 86 87 struct g_geom * 88 g_new_geomf(struct g_class *mp, char *fmt, ...) 89 { 90 struct g_geom *gp; 91 va_list ap; 92 struct sbuf *sb; 93 94 g_topology_assert(); 95 va_start(ap, fmt); 96 mtx_lock(&Giant); 97 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 98 sbuf_vprintf(sb, fmt, ap); 99 sbuf_finish(sb); 100 mtx_unlock(&Giant); 101 gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 102 gp->name = (char *)(gp + 1); 103 gp->class = mp; 104 gp->rank = 1; 105 LIST_INIT(&gp->consumer); 106 LIST_INIT(&gp->provider); 107 LIST_INSERT_HEAD(&mp->geom, gp, geom); 108 TAILQ_INSERT_HEAD(&geoms, gp, geoms); 109 strcpy(gp->name, sbuf_data(sb)); 110 sbuf_delete(sb); 111 return (gp); 112 } 113 114 void 115 g_destroy_geom(struct g_geom *gp) 116 { 117 118 g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); 119 g_topology_assert(); 120 KASSERT(gp->event == NULL, ("g_destroy_geom() with event")); 121 KASSERT(LIST_EMPTY(&gp->consumer), 122 ("g_destroy_geom(%s) with consumer(s) [%p]", 123 gp->name, LIST_FIRST(&gp->consumer))); 124 KASSERT(LIST_EMPTY(&gp->provider), 125 ("g_destroy_geom(%s) with provider(s) [%p]", 126 gp->name, LIST_FIRST(&gp->consumer))); 127 LIST_REMOVE(gp, geom); 128 TAILQ_REMOVE(&geoms, gp, geoms); 129 g_free(gp); 130 } 131 132 struct g_consumer * 133 g_new_consumer(struct g_geom *gp) 134 { 135 struct g_consumer *cp; 136 137 g_topology_assert(); 138 KASSERT(gp->class->orphan != NULL, 139 ("g_new_consumer on class(%s) without orphan", gp->class->name)); 140 141 cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); 142 cp->geom = gp; 143 LIST_INSERT_HEAD(&gp->consumer, cp, consumer); 144 return(cp); 145 } 146 147 void 148 g_destroy_consumer(struct g_consumer *cp) 149 { 150 151 g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); 152 g_topology_assert(); 153 KASSERT(cp->event == NULL, ("g_destroy_consumer() with event")); 154 KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); 155 KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); 156 KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); 157 KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); 158 LIST_REMOVE(cp, consumer); 159 g_free(cp); 160 } 161 162 struct g_provider * 163 g_new_providerf(struct g_geom *gp, char *fmt, ...) 164 { 165 struct g_provider *pp; 166 struct sbuf *sb; 167 va_list ap; 168 169 g_topology_assert(); 170 va_start(ap, fmt); 171 mtx_lock(&Giant); 172 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 173 sbuf_vprintf(sb, fmt, ap); 174 sbuf_finish(sb); 175 mtx_unlock(&Giant); 176 pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 177 pp->name = (char *)(pp + 1); 178 strcpy(pp->name, sbuf_data(sb)); 179 sbuf_delete(sb); 180 LIST_INIT(&pp->consumers); 181 pp->error = ENXIO; 182 pp->geom = gp; 183 LIST_INSERT_HEAD(&gp->provider, pp, provider); 184 g_nproviders++; 185 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 186 return (pp); 187 } 188 189 void 190 g_error_provider(struct g_provider *pp, int error) 191 { 192 193 pp->error = error; 194 } 195 196 197 void 198 g_destroy_provider(struct g_provider *pp) 199 { 200 struct g_geom *gp; 201 struct g_consumer *cp; 202 203 g_topology_assert(); 204 KASSERT(pp->event == NULL, ("g_destroy_provider() with event")); 205 KASSERT(LIST_EMPTY(&pp->consumers), 206 ("g_destroy_provider but attached")); 207 KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); 208 KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); 209 KASSERT (pp->acw == 0, ("g_destroy_provider with ace")); 210 g_nproviders--; 211 LIST_REMOVE(pp, provider); 212 gp = pp->geom; 213 g_free(pp); 214 if (!(gp->flags & G_GEOM_WITHER)) 215 return; 216 if (!LIST_EMPTY(&gp->provider)) 217 return; 218 for (;;) { 219 cp = LIST_FIRST(&gp->consumer); 220 if (cp == NULL) 221 break; 222 g_dettach(cp); 223 g_destroy_consumer(cp); 224 } 225 g_destroy_geom(gp); 226 } 227 228 /* 229 * We keep the "geoms" list sorted by topological order (== increasing 230 * numerical rank) at all times. 231 * When an attach is done, the attaching geoms rank is invalidated 232 * and it is moved to the tail of the list. 233 * All geoms later in the sequence has their ranks reevaluated in 234 * sequence. If we cannot assign rank to a geom because it's 235 * prerequisites do not have rank, we move that element to the tail 236 * of the sequence with invalid rank as well. 237 * At some point we encounter our original geom and if we stil fail 238 * to assign it a rank, there must be a loop and we fail back to 239 * g_attach() which dettach again and calls redo_rank again 240 * to fix up the damage. 241 * It would be much simpler code wise to do it recursively, but we 242 * can't risk that on the kernel stack. 243 */ 244 245 static int 246 redo_rank(struct g_geom *gp) 247 { 248 struct g_consumer *cp; 249 struct g_geom *gp1, *gp2; 250 int n, m; 251 252 g_topology_assert(); 253 254 /* Invalidate this geoms rank and move it to the tail */ 255 gp1 = TAILQ_NEXT(gp, geoms); 256 if (gp1 != NULL) { 257 gp->rank = 0; 258 TAILQ_REMOVE(&geoms, gp, geoms); 259 TAILQ_INSERT_TAIL(&geoms, gp, geoms); 260 } else { 261 gp1 = gp; 262 } 263 264 /* re-rank the rest of the sequence */ 265 for (; gp1 != NULL; gp1 = gp2) { 266 gp1->rank = 0; 267 m = 1; 268 LIST_FOREACH(cp, &gp1->consumer, consumer) { 269 if (cp->provider == NULL) 270 continue; 271 n = cp->provider->geom->rank; 272 if (n == 0) { 273 m = 0; 274 break; 275 } else if (n >= m) 276 m = n + 1; 277 } 278 gp1->rank = m; 279 gp2 = TAILQ_NEXT(gp1, geoms); 280 281 /* got a rank, moving on */ 282 if (m != 0) 283 continue; 284 285 /* no rank to original geom means loop */ 286 if (gp == gp1) { 287 return (ELOOP); 288 289 /* no rank, put it at the end move on */ 290 TAILQ_REMOVE(&geoms, gp1, geoms); 291 TAILQ_INSERT_TAIL(&geoms, gp1, geoms); 292 } 293 } 294 return (0); 295 } 296 297 int 298 g_attach(struct g_consumer *cp, struct g_provider *pp) 299 { 300 int error; 301 302 g_topology_assert(); 303 KASSERT(cp->provider == NULL, ("attach but attached")); 304 cp->provider = pp; 305 LIST_INSERT_HEAD(&pp->consumers, cp, consumers); 306 error = redo_rank(cp->geom); 307 if (error) { 308 LIST_REMOVE(cp, consumers); 309 cp->provider = NULL; 310 redo_rank(cp->geom); 311 } 312 return (error); 313 } 314 315 void 316 g_dettach(struct g_consumer *cp) 317 { 318 struct g_provider *pp; 319 320 g_trace(G_T_TOPOLOGY, "g_dettach(%p)", cp); 321 KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!")); 322 g_topology_assert(); 323 KASSERT(cp->provider != NULL, ("dettach but not attached")); 324 KASSERT(cp->acr == 0, ("dettach but nonzero acr")); 325 KASSERT(cp->acw == 0, ("dettach but nonzero acw")); 326 KASSERT(cp->ace == 0, ("dettach but nonzero ace")); 327 KASSERT(cp->biocount == 0, ("dettach but nonzero biocount")); 328 pp = cp->provider; 329 LIST_REMOVE(cp, consumers); 330 cp->provider = NULL; 331 if (LIST_EMPTY(&pp->consumers)) { 332 if (pp->geom->flags & G_GEOM_WITHER) 333 g_destroy_provider(pp); 334 } 335 redo_rank(cp->geom); 336 } 337 338 339 /* 340 * g_access_abs() 341 * 342 * Access-check with absolute new values: Just fall through 343 * and use the relative version. 344 */ 345 int 346 g_access_abs(struct g_consumer *cp, int acr, int acw, int ace) 347 { 348 349 g_topology_assert(); 350 return(g_access_rel(cp, 351 acr - cp->acr, 352 acw - cp->acw, 353 ace - cp->ace)); 354 } 355 356 /* 357 * g_access_rel() 358 * 359 * Access-check with delta values. The question asked is "can provider 360 * "cp" change the access counters by the relative amounts dc[rwe] ?" 361 */ 362 363 int 364 g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce) 365 { 366 struct g_provider *pp; 367 int pr,pw,pe; 368 int error; 369 370 pp = cp->provider; 371 372 g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)", 373 cp, pp->name, dcr, dcw, dce); 374 375 g_topology_assert(); 376 KASSERT(cp->provider != NULL, ("access but not attached")); 377 KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); 378 KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); 379 KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); 380 KASSERT(pp->geom->class->access != NULL, ("NULL class->access")); 381 382 /* 383 * If our class cares about being spoiled, and we have been, we 384 * are probably just ahead of the event telling us that. Fail 385 * now rather than having to unravel this later. 386 */ 387 if (cp->geom->spoiled != NULL && cp->spoiled) { 388 KASSERT(dcr >= 0, ("spoiled but dcr = %d", dcr)); 389 KASSERT(dcw >= 0, ("spoiled but dce = %d", dcw)); 390 KASSERT(dce >= 0, ("spoiled but dcw = %d", dce)); 391 KASSERT(cp->acr == 0, ("spoiled but cp->acr = %d", cp->acr)); 392 KASSERT(cp->acw == 0, ("spoiled but cp->acw = %d", cp->acw)); 393 KASSERT(cp->ace == 0, ("spoiled but cp->ace = %d", cp->ace)); 394 return(ENXIO); 395 } 396 397 /* 398 * Figure out what counts the provider would have had, if this 399 * consumer had (r0w0e0) at this time. 400 */ 401 pr = pp->acr - cp->acr; 402 pw = pp->acw - cp->acw; 403 pe = pp->ace - cp->ace; 404 405 g_trace(G_T_ACCESS, 406 "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", 407 dcr, dcw, dce, 408 cp->acr, cp->acw, cp->ace, 409 pp->acr, pp->acw, pp->ace, 410 pp, pp->name); 411 412 /* If we try exclusive but already write: fail */ 413 if (dce > 0 && pw > 0) 414 return (EPERM); 415 /* If we try write but already exclusive: fail */ 416 if (dcw > 0 && pe > 0) 417 return (EPERM); 418 /* If we try to open more but provider is error'ed: fail */ 419 if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) 420 return (pp->error); 421 422 /* Ok then... */ 423 424 /* 425 * If we open first write, spoil any partner consumers. 426 * If we close last write, trigger re-taste. 427 */ 428 if (pp->acw == 0 && dcw != 0) 429 g_spoil(pp, cp); 430 else if (pp->acw != 0 && pp->acw == -dcw && !(pp->geom->flags & G_GEOM_WITHER)) 431 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 432 433 error = pp->geom->class->access(pp, dcr, dcw, dce); 434 if (!error) { 435 pp->acr += dcr; 436 pp->acw += dcw; 437 pp->ace += dce; 438 cp->acr += dcr; 439 cp->acw += dcw; 440 cp->ace += dce; 441 } 442 return (error); 443 } 444 445 int 446 g_haveattr_int(struct bio *bp, char *attribute, int val) 447 { 448 449 return (g_haveattr(bp, attribute, &val, sizeof val)); 450 } 451 452 int 453 g_haveattr_off_t(struct bio *bp, char *attribute, off_t val) 454 { 455 456 return (g_haveattr(bp, attribute, &val, sizeof val)); 457 } 458 459 460 int 461 g_haveattr(struct bio *bp, char *attribute, void *val, int len) 462 { 463 int error; 464 465 if (strcmp(bp->bio_attribute, attribute)) 466 return (0); 467 if (bp->bio_length != len) { 468 printf("bio_length %lld len %d -> EFAULT\n", bp->bio_length, len); 469 error = EFAULT; 470 } else { 471 error = 0; 472 bcopy(val, bp->bio_data, len); 473 bp->bio_completed = len; 474 } 475 bp->bio_error = error; 476 g_io_deliver(bp); 477 return (1); 478 } 479 480 int 481 g_std_access(struct g_provider *pp __unused, 482 int dr __unused, int dw __unused, int de __unused) 483 { 484 485 return (0); 486 } 487 488 void 489 g_std_done(struct bio *bp) 490 { 491 struct bio *bp2; 492 493 bp2 = bp->bio_linkage; 494 bp2->bio_error = bp->bio_error; 495 bp2->bio_completed = bp->bio_completed; 496 g_destroy_bio(bp); 497 g_io_deliver(bp2); 498 } 499 500 /* XXX: maybe this is only g_slice_spoiled */ 501 502 void 503 g_std_spoiled(struct g_consumer *cp) 504 { 505 struct g_geom *gp; 506 struct g_provider *pp; 507 508 g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); 509 g_topology_assert(); 510 g_dettach(cp); 511 gp = cp->geom; 512 LIST_FOREACH(pp, &gp->provider, provider) 513 g_orphan_provider(pp, ENXIO); 514 g_destroy_consumer(cp); 515 if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) 516 g_destroy_geom(gp); 517 else 518 gp->flags |= G_GEOM_WITHER; 519 } 520 521 /* 522 * Spoiling happens when a provider is opened for writing, but consumers 523 * which are configured by in-band data are attached (slicers for instance). 524 * Since the write might potentially change the in-band data, such consumers 525 * need to re-evaluate their existence after the writing session closes. 526 * We do this by (offering to) tear them down when the open for write happens 527 * in return for a re-taste when it closes again. 528 * Together with the fact that such consumers grab an 'e' bit whenever they 529 * are open, regardless of mode, this ends up DTRT. 530 */ 531 532 void 533 g_spoil(struct g_provider *pp, struct g_consumer *cp) 534 { 535 struct g_consumer *cp2; 536 537 g_topology_assert(); 538 539 LIST_FOREACH(cp2, &pp->consumers, consumers) { 540 if (cp2 == cp) 541 continue; 542 /* 543 KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); 544 KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); 545 */ 546 KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); 547 cp2->spoiled++; 548 } 549 g_post_event(EV_SPOILED, NULL, NULL, pp, cp); 550 } 551 552 static struct g_class * 553 g_class_by_name(char *name) 554 { 555 struct g_class *mp; 556 557 g_trace(G_T_TOPOLOGY, "g_class_by_name(%s)", name); 558 g_topology_assert(); 559 LIST_FOREACH(mp, &g_classs, class) 560 if (!strcmp(mp->name, name)) 561 return (mp); 562 return (NULL); 563 } 564 565 struct g_geom * 566 g_create_geomf(char *class, struct g_provider *pp, char *fmt, ...) 567 { 568 va_list ap; 569 struct sbuf *sb; 570 char *s; 571 struct g_class *mp; 572 struct g_geom *gp; 573 574 g_trace(G_T_TOPOLOGY, "g_create_geom(%s, %p(%s))", class, 575 pp, pp == NULL ? "" : pp->name); 576 g_topology_assert(); 577 gp = NULL; 578 mp = g_class_by_name(class); 579 if (mp == NULL) 580 return (NULL); 581 if (fmt != NULL) { 582 va_start(ap, fmt); 583 mtx_lock(&Giant); 584 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 585 sbuf_vprintf(sb, fmt, ap); 586 sbuf_finish(sb); 587 mtx_unlock(&Giant); 588 s = sbuf_data(sb); 589 } else { 590 s = NULL; 591 } 592 if (pp != NULL) 593 gp = mp->taste(mp, pp, NULL, G_TF_INSIST); 594 if (gp == NULL && mp->create_geom == NULL) 595 return (NULL); 596 if (gp == NULL) 597 gp = mp->create_geom(mp, pp, s); 598 /* XXX: delete sbuf */ 599 return (gp); 600 } 601 602 struct g_geom * 603 g_insert_geom(char *class, struct g_consumer *cp) 604 { 605 struct g_class *mp; 606 struct g_geom *gp; 607 struct g_provider *pp, *pp2; 608 struct g_consumer *cp2; 609 int error; 610 611 g_trace(G_T_TOPOLOGY, "g_insert_geomf(%s, %p)", class, cp); 612 g_topology_assert(); 613 KASSERT(cp->provider != NULL, ("g_insert_geomf but not attached")); 614 /* XXX: check for events ?? */ 615 mp = g_class_by_name(class); 616 if (mp == NULL) 617 return (NULL); 618 if (mp->create_geom == NULL) 619 return (NULL); 620 pp = cp->provider; 621 gp = mp->taste(mp, pp, NULL, G_TF_TRANSPARENT); 622 if (gp == NULL) 623 return (NULL); 624 pp2 = LIST_FIRST(&gp->provider); 625 cp2 = LIST_FIRST(&gp->consumer); 626 cp2->acr += pp->acr; 627 cp2->acw += pp->acw; 628 cp2->ace += pp->ace; 629 pp2->acr += pp->acr; 630 pp2->acw += pp->acw; 631 pp2->ace += pp->ace; 632 LIST_REMOVE(cp, consumers); 633 LIST_INSERT_HEAD(&pp2->consumers, cp, consumers); 634 cp->provider = pp2; 635 error = redo_rank(gp); 636 KASSERT(error == 0, ("redo_rank failed in g_insert_geom")); 637 return (gp); 638 } 639 640