1 /*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 8 * and NAI Labs, the Security Research Division of Network Associates, Inc. 9 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 10 * DARPA CHATS research program. 11 * 12 * Portions of this software were developed by Konstantin Belousov 13 * under sponsorship from the FreeBSD Foundation. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. The names of the authors may not be used to endorse or promote 24 * products derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/malloc.h> 47 #include <sys/bio.h> 48 #include <sys/ktr.h> 49 #include <sys/proc.h> 50 #include <sys/stack.h> 51 #include <sys/sysctl.h> 52 53 #include <sys/errno.h> 54 #include <geom/geom.h> 55 #include <geom/geom_int.h> 56 #include <sys/devicestat.h> 57 58 #include <vm/uma.h> 59 #include <vm/vm.h> 60 #include <vm/vm_param.h> 61 #include <vm/vm_kern.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_extern.h> 65 #include <vm/vm_map.h> 66 67 static struct g_bioq g_bio_run_down; 68 static struct g_bioq g_bio_run_up; 69 static struct g_bioq g_bio_run_task; 70 71 static u_int pace; 72 static uma_zone_t biozone; 73 74 /* 75 * The head of the list of classifiers used in g_io_request. 76 * Use g_register_classifier() and g_unregister_classifier() 77 * to add/remove entries to the list. 78 * Classifiers are invoked in registration order. 79 */ 80 static TAILQ_HEAD(g_classifier_tailq, g_classifier_hook) 81 g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq); 82 83 #include <machine/atomic.h> 84 85 static void 86 g_bioq_lock(struct g_bioq *bq) 87 { 88 89 mtx_lock(&bq->bio_queue_lock); 90 } 91 92 static void 93 g_bioq_unlock(struct g_bioq *bq) 94 { 95 96 mtx_unlock(&bq->bio_queue_lock); 97 } 98 99 #if 0 100 static void 101 g_bioq_destroy(struct g_bioq *bq) 102 { 103 104 mtx_destroy(&bq->bio_queue_lock); 105 } 106 #endif 107 108 static void 109 g_bioq_init(struct g_bioq *bq) 110 { 111 112 TAILQ_INIT(&bq->bio_queue); 113 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 114 } 115 116 static struct bio * 117 g_bioq_first(struct g_bioq *bq) 118 { 119 struct bio *bp; 120 121 bp = TAILQ_FIRST(&bq->bio_queue); 122 if (bp != NULL) { 123 KASSERT((bp->bio_flags & BIO_ONQUEUE), 124 ("Bio not on queue bp=%p target %p", bp, bq)); 125 bp->bio_flags &= ~BIO_ONQUEUE; 126 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 127 bq->bio_queue_length--; 128 } 129 return (bp); 130 } 131 132 struct bio * 133 g_new_bio(void) 134 { 135 struct bio *bp; 136 137 bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 138 #ifdef KTR 139 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 140 struct stack st; 141 142 CTR1(KTR_GEOM, "g_new_bio(): %p", bp); 143 stack_save(&st); 144 CTRSTACK(KTR_GEOM, &st, 3, 0); 145 } 146 #endif 147 return (bp); 148 } 149 150 struct bio * 151 g_alloc_bio(void) 152 { 153 struct bio *bp; 154 155 bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); 156 #ifdef KTR 157 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 158 struct stack st; 159 160 CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp); 161 stack_save(&st); 162 CTRSTACK(KTR_GEOM, &st, 3, 0); 163 } 164 #endif 165 return (bp); 166 } 167 168 void 169 g_destroy_bio(struct bio *bp) 170 { 171 #ifdef KTR 172 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 173 struct stack st; 174 175 CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp); 176 stack_save(&st); 177 CTRSTACK(KTR_GEOM, &st, 3, 0); 178 } 179 #endif 180 uma_zfree(biozone, bp); 181 } 182 183 struct bio * 184 g_clone_bio(struct bio *bp) 185 { 186 struct bio *bp2; 187 188 bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 189 if (bp2 != NULL) { 190 bp2->bio_parent = bp; 191 bp2->bio_cmd = bp->bio_cmd; 192 /* 193 * BIO_ORDERED flag may be used by disk drivers to enforce 194 * ordering restrictions, so this flag needs to be cloned. 195 * BIO_UNMAPPED should be inherited, to properly indicate 196 * which way the buffer is passed. 197 * Other bio flags are not suitable for cloning. 198 */ 199 bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED); 200 bp2->bio_length = bp->bio_length; 201 bp2->bio_offset = bp->bio_offset; 202 bp2->bio_data = bp->bio_data; 203 bp2->bio_ma = bp->bio_ma; 204 bp2->bio_ma_n = bp->bio_ma_n; 205 bp2->bio_ma_offset = bp->bio_ma_offset; 206 bp2->bio_attribute = bp->bio_attribute; 207 /* Inherit classification info from the parent */ 208 bp2->bio_classifier1 = bp->bio_classifier1; 209 bp2->bio_classifier2 = bp->bio_classifier2; 210 bp->bio_children++; 211 } 212 #ifdef KTR 213 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 214 struct stack st; 215 216 CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2); 217 stack_save(&st); 218 CTRSTACK(KTR_GEOM, &st, 3, 0); 219 } 220 #endif 221 return(bp2); 222 } 223 224 struct bio * 225 g_duplicate_bio(struct bio *bp) 226 { 227 struct bio *bp2; 228 229 bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO); 230 bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED; 231 bp2->bio_parent = bp; 232 bp2->bio_cmd = bp->bio_cmd; 233 bp2->bio_length = bp->bio_length; 234 bp2->bio_offset = bp->bio_offset; 235 bp2->bio_data = bp->bio_data; 236 bp2->bio_ma = bp->bio_ma; 237 bp2->bio_ma_n = bp->bio_ma_n; 238 bp2->bio_ma_offset = bp->bio_ma_offset; 239 bp2->bio_attribute = bp->bio_attribute; 240 bp->bio_children++; 241 #ifdef KTR 242 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 243 struct stack st; 244 245 CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2); 246 stack_save(&st); 247 CTRSTACK(KTR_GEOM, &st, 3, 0); 248 } 249 #endif 250 return(bp2); 251 } 252 253 void 254 g_io_init() 255 { 256 257 g_bioq_init(&g_bio_run_down); 258 g_bioq_init(&g_bio_run_up); 259 g_bioq_init(&g_bio_run_task); 260 biozone = uma_zcreate("g_bio", sizeof (struct bio), 261 NULL, NULL, 262 NULL, NULL, 263 0, 0); 264 } 265 266 int 267 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 268 { 269 struct bio *bp; 270 int error; 271 272 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 273 bp = g_alloc_bio(); 274 bp->bio_cmd = BIO_GETATTR; 275 bp->bio_done = NULL; 276 bp->bio_attribute = attr; 277 bp->bio_length = *len; 278 bp->bio_data = ptr; 279 g_io_request(bp, cp); 280 error = biowait(bp, "ggetattr"); 281 *len = bp->bio_completed; 282 g_destroy_bio(bp); 283 return (error); 284 } 285 286 int 287 g_io_flush(struct g_consumer *cp) 288 { 289 struct bio *bp; 290 int error; 291 292 g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name); 293 bp = g_alloc_bio(); 294 bp->bio_cmd = BIO_FLUSH; 295 bp->bio_flags |= BIO_ORDERED; 296 bp->bio_done = NULL; 297 bp->bio_attribute = NULL; 298 bp->bio_offset = cp->provider->mediasize; 299 bp->bio_length = 0; 300 bp->bio_data = NULL; 301 g_io_request(bp, cp); 302 error = biowait(bp, "gflush"); 303 g_destroy_bio(bp); 304 return (error); 305 } 306 307 static int 308 g_io_check(struct bio *bp) 309 { 310 struct g_consumer *cp; 311 struct g_provider *pp; 312 313 cp = bp->bio_from; 314 pp = bp->bio_to; 315 316 /* Fail if access counters dont allow the operation */ 317 switch(bp->bio_cmd) { 318 case BIO_READ: 319 case BIO_GETATTR: 320 if (cp->acr == 0) 321 return (EPERM); 322 break; 323 case BIO_WRITE: 324 case BIO_DELETE: 325 case BIO_FLUSH: 326 if (cp->acw == 0) 327 return (EPERM); 328 break; 329 default: 330 return (EPERM); 331 } 332 /* if provider is marked for error, don't disturb. */ 333 if (pp->error) 334 return (pp->error); 335 if (cp->flags & G_CF_ORPHAN) 336 return (ENXIO); 337 338 switch(bp->bio_cmd) { 339 case BIO_READ: 340 case BIO_WRITE: 341 case BIO_DELETE: 342 /* Zero sectorsize or mediasize is probably a lack of media. */ 343 if (pp->sectorsize == 0 || pp->mediasize == 0) 344 return (ENXIO); 345 /* Reject I/O not on sector boundary */ 346 if (bp->bio_offset % pp->sectorsize) 347 return (EINVAL); 348 /* Reject I/O not integral sector long */ 349 if (bp->bio_length % pp->sectorsize) 350 return (EINVAL); 351 /* Reject requests before or past the end of media. */ 352 if (bp->bio_offset < 0) 353 return (EIO); 354 if (bp->bio_offset > pp->mediasize) 355 return (EIO); 356 break; 357 default: 358 break; 359 } 360 return (0); 361 } 362 363 /* 364 * bio classification support. 365 * 366 * g_register_classifier() and g_unregister_classifier() 367 * are used to add/remove a classifier from the list. 368 * The list is protected using the g_bio_run_down lock, 369 * because the classifiers are called in this path. 370 * 371 * g_io_request() passes bio's that are not already classified 372 * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers(). 373 * Classifiers can store their result in the two fields 374 * bio_classifier1 and bio_classifier2. 375 * A classifier that updates one of the fields should 376 * return a non-zero value. 377 * If no classifier updates the field, g_run_classifiers() sets 378 * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls. 379 */ 380 381 int 382 g_register_classifier(struct g_classifier_hook *hook) 383 { 384 385 g_bioq_lock(&g_bio_run_down); 386 TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link); 387 g_bioq_unlock(&g_bio_run_down); 388 389 return (0); 390 } 391 392 void 393 g_unregister_classifier(struct g_classifier_hook *hook) 394 { 395 struct g_classifier_hook *entry; 396 397 g_bioq_lock(&g_bio_run_down); 398 TAILQ_FOREACH(entry, &g_classifier_tailq, link) { 399 if (entry == hook) { 400 TAILQ_REMOVE(&g_classifier_tailq, hook, link); 401 break; 402 } 403 } 404 g_bioq_unlock(&g_bio_run_down); 405 } 406 407 static void 408 g_run_classifiers(struct bio *bp) 409 { 410 struct g_classifier_hook *hook; 411 int classified = 0; 412 413 TAILQ_FOREACH(hook, &g_classifier_tailq, link) 414 classified |= hook->func(hook->arg, bp); 415 416 if (!classified) 417 bp->bio_classifier1 = BIO_NOTCLASSIFIED; 418 } 419 420 void 421 g_io_request(struct bio *bp, struct g_consumer *cp) 422 { 423 struct g_provider *pp; 424 int first; 425 426 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 427 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 428 pp = cp->provider; 429 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 430 #ifdef DIAGNOSTIC 431 KASSERT(bp->bio_driver1 == NULL, 432 ("bio_driver1 used by the consumer (geom %s)", cp->geom->name)); 433 KASSERT(bp->bio_driver2 == NULL, 434 ("bio_driver2 used by the consumer (geom %s)", cp->geom->name)); 435 KASSERT(bp->bio_pflags == 0, 436 ("bio_pflags used by the consumer (geom %s)", cp->geom->name)); 437 /* 438 * Remember consumer's private fields, so we can detect if they were 439 * modified by the provider. 440 */ 441 bp->_bio_caller1 = bp->bio_caller1; 442 bp->_bio_caller2 = bp->bio_caller2; 443 bp->_bio_cflags = bp->bio_cflags; 444 #endif 445 446 if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_GETATTR)) { 447 KASSERT(bp->bio_data != NULL, 448 ("NULL bp->data in g_io_request(cmd=%hhu)", bp->bio_cmd)); 449 } 450 if (bp->bio_cmd & (BIO_DELETE|BIO_FLUSH)) { 451 KASSERT(bp->bio_data == NULL, 452 ("non-NULL bp->data in g_io_request(cmd=%hhu)", 453 bp->bio_cmd)); 454 } 455 if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) { 456 KASSERT(bp->bio_offset % cp->provider->sectorsize == 0, 457 ("wrong offset %jd for sectorsize %u", 458 bp->bio_offset, cp->provider->sectorsize)); 459 KASSERT(bp->bio_length % cp->provider->sectorsize == 0, 460 ("wrong length %jd for sectorsize %u", 461 bp->bio_length, cp->provider->sectorsize)); 462 } 463 464 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 465 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 466 467 bp->bio_from = cp; 468 bp->bio_to = pp; 469 bp->bio_error = 0; 470 bp->bio_completed = 0; 471 472 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 473 ("Bio already on queue bp=%p", bp)); 474 bp->bio_flags |= BIO_ONQUEUE; 475 476 if (g_collectstats) 477 binuptime(&bp->bio_t0); 478 else 479 getbinuptime(&bp->bio_t0); 480 481 /* 482 * The statistics collection is lockless, as such, but we 483 * can not update one instance of the statistics from more 484 * than one thread at a time, so grab the lock first. 485 * 486 * We also use the lock to protect the list of classifiers. 487 */ 488 g_bioq_lock(&g_bio_run_down); 489 490 if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) 491 g_run_classifiers(bp); 492 493 if (g_collectstats & 1) 494 devstat_start_transaction(pp->stat, &bp->bio_t0); 495 if (g_collectstats & 2) 496 devstat_start_transaction(cp->stat, &bp->bio_t0); 497 498 pp->nstart++; 499 cp->nstart++; 500 first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); 501 TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); 502 g_bio_run_down.bio_queue_length++; 503 g_bioq_unlock(&g_bio_run_down); 504 505 /* Pass it on down. */ 506 if (first) 507 wakeup(&g_wait_down); 508 } 509 510 void 511 g_io_deliver(struct bio *bp, int error) 512 { 513 struct g_consumer *cp; 514 struct g_provider *pp; 515 int first; 516 517 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 518 pp = bp->bio_to; 519 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 520 cp = bp->bio_from; 521 if (cp == NULL) { 522 bp->bio_error = error; 523 bp->bio_done(bp); 524 return; 525 } 526 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 527 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 528 #ifdef DIAGNOSTIC 529 /* 530 * Some classes - GJournal in particular - can modify bio's 531 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO 532 * flag means it's an expected behaviour for that particular geom. 533 */ 534 if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) { 535 KASSERT(bp->bio_caller1 == bp->_bio_caller1, 536 ("bio_caller1 used by the provider %s", pp->name)); 537 KASSERT(bp->bio_caller2 == bp->_bio_caller2, 538 ("bio_caller2 used by the provider %s", pp->name)); 539 KASSERT(bp->bio_cflags == bp->_bio_cflags, 540 ("bio_cflags used by the provider %s", pp->name)); 541 } 542 #endif 543 KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0")); 544 KASSERT(bp->bio_completed <= bp->bio_length, 545 ("bio_completed can't be greater than bio_length")); 546 547 g_trace(G_T_BIO, 548 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 549 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 550 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 551 552 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 553 ("Bio already on queue bp=%p", bp)); 554 555 /* 556 * XXX: next two doesn't belong here 557 */ 558 bp->bio_bcount = bp->bio_length; 559 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 560 561 /* 562 * The statistics collection is lockless, as such, but we 563 * can not update one instance of the statistics from more 564 * than one thread at a time, so grab the lock first. 565 */ 566 g_bioq_lock(&g_bio_run_up); 567 if (g_collectstats & 1) 568 devstat_end_transaction_bio(pp->stat, bp); 569 if (g_collectstats & 2) 570 devstat_end_transaction_bio(cp->stat, bp); 571 572 cp->nend++; 573 pp->nend++; 574 if (error != ENOMEM) { 575 bp->bio_error = error; 576 first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); 577 TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); 578 bp->bio_flags |= BIO_ONQUEUE; 579 g_bio_run_up.bio_queue_length++; 580 g_bioq_unlock(&g_bio_run_up); 581 if (first) 582 wakeup(&g_wait_up); 583 return; 584 } 585 g_bioq_unlock(&g_bio_run_up); 586 587 if (bootverbose) 588 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 589 bp->bio_children = 0; 590 bp->bio_inbed = 0; 591 bp->bio_driver1 = NULL; 592 bp->bio_driver2 = NULL; 593 bp->bio_pflags = 0; 594 g_io_request(bp, cp); 595 pace++; 596 return; 597 } 598 599 SYSCTL_DECL(_kern_geom); 600 601 static long transient_maps; 602 SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD, 603 &transient_maps, 0, 604 "Total count of the transient mapping requests"); 605 u_int transient_map_retries = 10; 606 SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW, 607 &transient_map_retries, 0, 608 "Max count of retries used before giving up on creating transient map"); 609 int transient_map_hard_failures; 610 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD, 611 &transient_map_hard_failures, 0, 612 "Failures to establish the transient mapping due to retry attempts " 613 "exhausted"); 614 int transient_map_soft_failures; 615 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD, 616 &transient_map_soft_failures, 0, 617 "Count of retried failures to establish the transient mapping"); 618 int inflight_transient_maps; 619 SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD, 620 &inflight_transient_maps, 0, 621 "Current count of the active transient maps"); 622 623 static int 624 g_io_transient_map_bio(struct bio *bp) 625 { 626 vm_offset_t addr; 627 long size; 628 u_int retried; 629 int rv; 630 631 KASSERT(unmapped_buf_allowed, ("unmapped disabled")); 632 633 size = round_page(bp->bio_ma_offset + bp->bio_length); 634 KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp)); 635 addr = 0; 636 retried = 0; 637 atomic_add_long(&transient_maps, 1); 638 retry: 639 vm_map_lock(bio_transient_map); 640 if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map), 641 size, &addr)) { 642 vm_map_unlock(bio_transient_map); 643 if (transient_map_retries != 0 && 644 retried >= transient_map_retries) { 645 g_io_deliver(bp, EDEADLK/* XXXKIB */); 646 CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", 647 bp, bp->bio_to->name); 648 atomic_add_int(&transient_map_hard_failures, 1); 649 return (1); 650 } else { 651 /* 652 * Naive attempt to quisce the I/O to get more 653 * in-flight requests completed and defragment 654 * the bio_transient_map. 655 */ 656 CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d", 657 bp, bp->bio_to->name, retried); 658 pause("g_d_tra", hz / 10); 659 retried++; 660 atomic_add_int(&transient_map_soft_failures, 1); 661 goto retry; 662 } 663 } 664 rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size, 665 VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); 666 KASSERT(rv == KERN_SUCCESS, 667 ("vm_map_insert(bio_transient_map) rv %d %jx %lx", 668 rv, (uintmax_t)addr, size)); 669 vm_map_unlock(bio_transient_map); 670 atomic_add_int(&inflight_transient_maps, 1); 671 pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size)); 672 bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; 673 bp->bio_flags |= BIO_TRANSIENT_MAPPING; 674 bp->bio_flags &= ~BIO_UNMAPPED; 675 return (0); 676 } 677 678 void 679 g_io_schedule_down(struct thread *tp __unused) 680 { 681 struct bio *bp; 682 off_t excess; 683 int error; 684 685 for(;;) { 686 g_bioq_lock(&g_bio_run_down); 687 bp = g_bioq_first(&g_bio_run_down); 688 if (bp == NULL) { 689 CTR0(KTR_GEOM, "g_down going to sleep"); 690 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 691 PRIBIO | PDROP, "-", 0); 692 continue; 693 } 694 CTR0(KTR_GEOM, "g_down has work to do"); 695 g_bioq_unlock(&g_bio_run_down); 696 if (pace > 0) { 697 CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace); 698 pause("g_down", hz/10); 699 pace--; 700 } 701 error = g_io_check(bp); 702 if (error) { 703 CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " 704 "%s returned %d", bp, bp->bio_to->name, error); 705 g_io_deliver(bp, error); 706 continue; 707 } 708 CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, 709 bp->bio_to->name); 710 switch (bp->bio_cmd) { 711 case BIO_READ: 712 case BIO_WRITE: 713 case BIO_DELETE: 714 /* Truncate requests to the end of providers media. */ 715 /* 716 * XXX: What if we truncate because of offset being 717 * bad, not length? 718 */ 719 excess = bp->bio_offset + bp->bio_length; 720 if (excess > bp->bio_to->mediasize) { 721 KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || 722 round_page(bp->bio_ma_offset + 723 bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, 724 ("excess bio %p too short", bp)); 725 excess -= bp->bio_to->mediasize; 726 bp->bio_length -= excess; 727 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 728 bp->bio_ma_n = round_page( 729 bp->bio_ma_offset + 730 bp->bio_length) / PAGE_SIZE; 731 } 732 if (excess > 0) 733 CTR3(KTR_GEOM, "g_down truncated bio " 734 "%p provider %s by %d", bp, 735 bp->bio_to->name, excess); 736 } 737 /* Deliver zero length transfers right here. */ 738 if (bp->bio_length == 0) { 739 g_io_deliver(bp, 0); 740 CTR2(KTR_GEOM, "g_down terminated 0-length " 741 "bp %p provider %s", bp, bp->bio_to->name); 742 continue; 743 } 744 break; 745 default: 746 break; 747 } 748 if ((bp->bio_flags & BIO_UNMAPPED) != 0 && 749 (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && 750 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { 751 if (g_io_transient_map_bio(bp)) 752 continue; 753 } 754 THREAD_NO_SLEEPING(); 755 CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " 756 "len %ld", bp, bp->bio_to->name, bp->bio_offset, 757 bp->bio_length); 758 bp->bio_to->geom->start(bp); 759 THREAD_SLEEPING_OK(); 760 } 761 } 762 763 void 764 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 765 { 766 bp->bio_task = func; 767 bp->bio_task_arg = arg; 768 /* 769 * The taskqueue is actually just a second queue off the "up" 770 * queue, so we use the same lock. 771 */ 772 g_bioq_lock(&g_bio_run_up); 773 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 774 ("Bio already on queue bp=%p target taskq", bp)); 775 bp->bio_flags |= BIO_ONQUEUE; 776 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 777 g_bio_run_task.bio_queue_length++; 778 wakeup(&g_wait_up); 779 g_bioq_unlock(&g_bio_run_up); 780 } 781 782 783 void 784 g_io_schedule_up(struct thread *tp __unused) 785 { 786 struct bio *bp; 787 for(;;) { 788 g_bioq_lock(&g_bio_run_up); 789 bp = g_bioq_first(&g_bio_run_task); 790 if (bp != NULL) { 791 g_bioq_unlock(&g_bio_run_up); 792 THREAD_NO_SLEEPING(); 793 CTR1(KTR_GEOM, "g_up processing task bp %p", bp); 794 bp->bio_task(bp->bio_task_arg); 795 THREAD_SLEEPING_OK(); 796 continue; 797 } 798 bp = g_bioq_first(&g_bio_run_up); 799 if (bp != NULL) { 800 g_bioq_unlock(&g_bio_run_up); 801 THREAD_NO_SLEEPING(); 802 CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off " 803 "%jd len %ld", bp, bp->bio_to->name, 804 bp->bio_offset, bp->bio_length); 805 biodone(bp); 806 THREAD_SLEEPING_OK(); 807 continue; 808 } 809 CTR0(KTR_GEOM, "g_up going to sleep"); 810 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 811 PRIBIO | PDROP, "-", 0); 812 } 813 } 814 815 void * 816 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 817 { 818 struct bio *bp; 819 void *ptr; 820 int errorc; 821 822 KASSERT(length > 0 && length >= cp->provider->sectorsize && 823 length <= MAXPHYS, ("g_read_data(): invalid length %jd", 824 (intmax_t)length)); 825 826 bp = g_alloc_bio(); 827 bp->bio_cmd = BIO_READ; 828 bp->bio_done = NULL; 829 bp->bio_offset = offset; 830 bp->bio_length = length; 831 ptr = g_malloc(length, M_WAITOK); 832 bp->bio_data = ptr; 833 g_io_request(bp, cp); 834 errorc = biowait(bp, "gread"); 835 if (error != NULL) 836 *error = errorc; 837 g_destroy_bio(bp); 838 if (errorc) { 839 g_free(ptr); 840 ptr = NULL; 841 } 842 return (ptr); 843 } 844 845 int 846 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 847 { 848 struct bio *bp; 849 int error; 850 851 KASSERT(length > 0 && length >= cp->provider->sectorsize && 852 length <= MAXPHYS, ("g_write_data(): invalid length %jd", 853 (intmax_t)length)); 854 855 bp = g_alloc_bio(); 856 bp->bio_cmd = BIO_WRITE; 857 bp->bio_done = NULL; 858 bp->bio_offset = offset; 859 bp->bio_length = length; 860 bp->bio_data = ptr; 861 g_io_request(bp, cp); 862 error = biowait(bp, "gwrite"); 863 g_destroy_bio(bp); 864 return (error); 865 } 866 867 int 868 g_delete_data(struct g_consumer *cp, off_t offset, off_t length) 869 { 870 struct bio *bp; 871 int error; 872 873 KASSERT(length > 0 && length >= cp->provider->sectorsize, 874 ("g_delete_data(): invalid length %jd", (intmax_t)length)); 875 876 bp = g_alloc_bio(); 877 bp->bio_cmd = BIO_DELETE; 878 bp->bio_done = NULL; 879 bp->bio_offset = offset; 880 bp->bio_length = length; 881 bp->bio_data = NULL; 882 g_io_request(bp, cp); 883 error = biowait(bp, "gdelete"); 884 g_destroy_bio(bp); 885 return (error); 886 } 887 888 void 889 g_print_bio(struct bio *bp) 890 { 891 const char *pname, *cmd = NULL; 892 893 if (bp->bio_to != NULL) 894 pname = bp->bio_to->name; 895 else 896 pname = "[unknown]"; 897 898 switch (bp->bio_cmd) { 899 case BIO_GETATTR: 900 cmd = "GETATTR"; 901 printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute); 902 return; 903 case BIO_FLUSH: 904 cmd = "FLUSH"; 905 printf("%s[%s]", pname, cmd); 906 return; 907 case BIO_READ: 908 cmd = "READ"; 909 break; 910 case BIO_WRITE: 911 cmd = "WRITE"; 912 break; 913 case BIO_DELETE: 914 cmd = "DELETE"; 915 break; 916 default: 917 cmd = "UNKNOWN"; 918 printf("%s[%s()]", pname, cmd); 919 return; 920 } 921 printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd, 922 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 923 } 924