1 /*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD$ 36 */ 37 38 39 #include <sys/param.h> 40 #include <sys/stdint.h> 41 #ifndef _KERNEL 42 #include <stdio.h> 43 #include <string.h> 44 #include <stdlib.h> 45 #include <signal.h> 46 #include <err.h> 47 #include <sched.h> 48 #else 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/malloc.h> 52 #include <sys/bio.h> 53 #endif 54 55 #include <sys/errno.h> 56 #include <geom/geom.h> 57 #include <geom/geom_int.h> 58 #include <geom/geom_stats.h> 59 60 static struct g_bioq g_bio_run_down; 61 static struct g_bioq g_bio_run_up; 62 static struct g_bioq g_bio_run_task; 63 static struct g_bioq g_bio_idle; 64 65 static u_int pace; 66 67 #include <machine/atomic.h> 68 69 static void 70 g_bioq_lock(struct g_bioq *bq) 71 { 72 73 mtx_lock(&bq->bio_queue_lock); 74 } 75 76 static void 77 g_bioq_unlock(struct g_bioq *bq) 78 { 79 80 mtx_unlock(&bq->bio_queue_lock); 81 } 82 83 #if 0 84 static void 85 g_bioq_destroy(struct g_bioq *bq) 86 { 87 88 mtx_destroy(&bq->bio_queue_lock); 89 } 90 #endif 91 92 static void 93 g_bioq_init(struct g_bioq *bq) 94 { 95 96 TAILQ_INIT(&bq->bio_queue); 97 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 98 } 99 100 static struct bio * 101 g_bioq_first(struct g_bioq *bq) 102 { 103 struct bio *bp; 104 105 bp = TAILQ_FIRST(&bq->bio_queue); 106 if (bp != NULL) { 107 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 108 bq->bio_queue_length--; 109 } 110 return (bp); 111 } 112 113 static void 114 g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 115 { 116 117 g_bioq_lock(rq); 118 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 119 rq->bio_queue_length++; 120 g_bioq_unlock(rq); 121 } 122 123 struct bio * 124 g_new_bio(void) 125 { 126 struct bio *bp; 127 128 g_bioq_lock(&g_bio_idle); 129 bp = g_bioq_first(&g_bio_idle); 130 g_bioq_unlock(&g_bio_idle); 131 if (bp == NULL) 132 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 133 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 134 return (bp); 135 } 136 137 void 138 g_destroy_bio(struct bio *bp) 139 { 140 141 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 142 bzero(bp, sizeof *bp); 143 g_bioq_enqueue_tail(bp, &g_bio_idle); 144 } 145 146 struct bio * 147 g_clone_bio(struct bio *bp) 148 { 149 struct bio *bp2; 150 151 bp2 = g_new_bio(); 152 if (bp2 != NULL) { 153 bp2->bio_parent = bp; 154 bp2->bio_cmd = bp->bio_cmd; 155 bp2->bio_length = bp->bio_length; 156 bp2->bio_offset = bp->bio_offset; 157 bp2->bio_data = bp->bio_data; 158 bp2->bio_attribute = bp->bio_attribute; 159 bp->bio_children++; 160 } 161 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 162 return(bp2); 163 } 164 165 void 166 g_io_init() 167 { 168 169 g_bioq_init(&g_bio_run_down); 170 g_bioq_init(&g_bio_run_up); 171 g_bioq_init(&g_bio_run_task); 172 g_bioq_init(&g_bio_idle); 173 } 174 175 int 176 g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 177 { 178 struct bio *bp; 179 int error; 180 181 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 182 bp = g_new_bio(); 183 bp->bio_cmd = BIO_SETATTR; 184 bp->bio_done = NULL; 185 bp->bio_attribute = attr; 186 bp->bio_length = len; 187 bp->bio_data = ptr; 188 g_io_request(bp, cp); 189 error = biowait(bp, "gsetattr"); 190 g_destroy_bio(bp); 191 return (error); 192 } 193 194 195 int 196 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 197 { 198 struct bio *bp; 199 int error; 200 201 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 202 bp = g_new_bio(); 203 bp->bio_cmd = BIO_GETATTR; 204 bp->bio_done = NULL; 205 bp->bio_attribute = attr; 206 bp->bio_length = *len; 207 bp->bio_data = ptr; 208 g_io_request(bp, cp); 209 error = biowait(bp, "ggetattr"); 210 *len = bp->bio_completed; 211 g_destroy_bio(bp); 212 return (error); 213 } 214 215 static int 216 g_io_check(struct bio *bp) 217 { 218 struct g_consumer *cp; 219 struct g_provider *pp; 220 221 cp = bp->bio_from; 222 pp = bp->bio_to; 223 224 /* Fail if access counters dont allow the operation */ 225 switch(bp->bio_cmd) { 226 case BIO_READ: 227 case BIO_GETATTR: 228 if (cp->acr == 0) 229 return (EPERM); 230 break; 231 case BIO_WRITE: 232 case BIO_DELETE: 233 case BIO_SETATTR: 234 if (cp->acw == 0) 235 return (EPERM); 236 break; 237 default: 238 return (EPERM); 239 } 240 /* if provider is marked for error, don't disturb. */ 241 if (pp->error) 242 return (pp->error); 243 244 switch(bp->bio_cmd) { 245 case BIO_READ: 246 case BIO_WRITE: 247 case BIO_DELETE: 248 /* Reject I/O not on sector boundary */ 249 if (bp->bio_offset % pp->sectorsize) 250 return (EINVAL); 251 /* Reject I/O not integral sector long */ 252 if (bp->bio_length % pp->sectorsize) 253 return (EINVAL); 254 /* Reject requests past the end of media. */ 255 if (bp->bio_offset > pp->mediasize) 256 return (EIO); 257 break; 258 default: 259 break; 260 } 261 return (0); 262 } 263 264 void 265 g_io_request(struct bio *bp, struct g_consumer *cp) 266 { 267 struct g_provider *pp; 268 struct bintime bt; 269 270 pp = cp->provider; 271 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 272 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 273 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 274 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 275 276 bp->bio_from = cp; 277 bp->bio_to = pp; 278 bp->bio_error = 0; 279 bp->bio_completed = 0; 280 281 if (g_collectstats) { 282 binuptime(&bt); 283 bp->bio_t0 = bt; 284 if (cp->stat->nop == cp->stat->nend) 285 cp->stat->wentbusy = bt; /* Consumer is idle */ 286 if (pp->stat->nop == pp->stat->nend) 287 pp->stat->wentbusy = bt; /* Provider is idle */ 288 } 289 cp->stat->nop++; 290 pp->stat->nop++; 291 292 /* Pass it on down. */ 293 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 294 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 295 g_bioq_enqueue_tail(bp, &g_bio_run_down); 296 wakeup(&g_wait_down); 297 } 298 299 void 300 g_io_deliver(struct bio *bp, int error) 301 { 302 struct g_consumer *cp; 303 struct g_provider *pp; 304 struct bintime t1, dt; 305 int idx; 306 307 cp = bp->bio_from; 308 pp = bp->bio_to; 309 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 310 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 311 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 312 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 313 314 g_trace(G_T_BIO, 315 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 316 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 317 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 318 319 if (g_collectstats) { 320 switch (bp->bio_cmd) { 321 case BIO_READ: idx = G_STAT_IDX_READ; break; 322 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 323 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 324 case BIO_GETATTR: idx = -1; break; 325 case BIO_SETATTR: idx = -1; break; 326 default: 327 panic("unknown bio_cmd in g_io_deliver"); 328 break; 329 } 330 binuptime(&t1); 331 /* Raise the "inconsistent" flag for userland */ 332 atomic_add_acq_int(&cp->stat->seq0, 1); 333 atomic_add_acq_int(&pp->stat->seq0, 1); 334 if (idx >= 0) { 335 /* Account the service time */ 336 dt = t1; 337 bintime_sub(&dt, &bp->bio_t0); 338 bintime_add(&cp->stat->ops[idx].dt, &dt); 339 bintime_add(&pp->stat->ops[idx].dt, &dt); 340 /* ... and the metrics */ 341 pp->stat->ops[idx].nbyte += bp->bio_completed; 342 cp->stat->ops[idx].nbyte += bp->bio_completed; 343 pp->stat->ops[idx].nop++; 344 cp->stat->ops[idx].nop++; 345 /* ... and any errors */ 346 if (error == ENOMEM) { 347 cp->stat->ops[idx].nmem++; 348 pp->stat->ops[idx].nmem++; 349 } else if (error != 0) { 350 cp->stat->ops[idx].nerr++; 351 pp->stat->ops[idx].nerr++; 352 } 353 } 354 /* Account for busy time on the consumer */ 355 dt = t1; 356 bintime_sub(&dt, &cp->stat->wentbusy); 357 bintime_add(&cp->stat->bt, &dt); 358 cp->stat->wentbusy = t1; 359 /* Account for busy time on the provider */ 360 dt = t1; 361 bintime_sub(&dt, &pp->stat->wentbusy); 362 bintime_add(&pp->stat->bt, &dt); 363 pp->stat->wentbusy = t1; 364 /* Mark the structures as consistent again */ 365 atomic_add_acq_int(&cp->stat->seq1, 1); 366 atomic_add_acq_int(&pp->stat->seq1, 1); 367 } 368 cp->stat->nend++; 369 pp->stat->nend++; 370 371 if (error == ENOMEM) { 372 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 373 g_io_request(bp, cp); 374 pace++; 375 return; 376 } 377 bp->bio_error = error; 378 g_bioq_enqueue_tail(bp, &g_bio_run_up); 379 wakeup(&g_wait_up); 380 } 381 382 void 383 g_io_schedule_down(struct thread *tp __unused) 384 { 385 struct bio *bp; 386 off_t excess; 387 int error; 388 struct mtx mymutex; 389 390 bzero(&mymutex, sizeof mymutex); 391 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 392 393 for(;;) { 394 g_bioq_lock(&g_bio_run_down); 395 bp = g_bioq_first(&g_bio_run_down); 396 if (bp == NULL) { 397 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 398 PRIBIO | PDROP, "g_down", hz/10); 399 continue; 400 } 401 g_bioq_unlock(&g_bio_run_down); 402 error = g_io_check(bp); 403 if (error) { 404 g_io_deliver(bp, error); 405 continue; 406 } 407 switch (bp->bio_cmd) { 408 case BIO_READ: 409 case BIO_WRITE: 410 case BIO_DELETE: 411 /* Truncate requests to the end of providers media. */ 412 excess = bp->bio_offset + bp->bio_length; 413 if (excess > bp->bio_to->mediasize) { 414 excess -= bp->bio_to->mediasize; 415 bp->bio_length -= excess; 416 } 417 /* Deliver zero length transfers right here. */ 418 if (bp->bio_length == 0) { 419 g_io_deliver(bp, 0); 420 continue; 421 } 422 break; 423 default: 424 break; 425 } 426 mtx_lock(&mymutex); 427 bp->bio_to->geom->start(bp); 428 mtx_unlock(&mymutex); 429 if (pace) { 430 pace--; 431 break; 432 } 433 } 434 } 435 436 void 437 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 438 { 439 bp->bio_task = func; 440 bp->bio_task_arg = arg; 441 /* 442 * The taskqueue is actually just a second queue off the "up" 443 * queue, so we use the same lock. 444 */ 445 g_bioq_lock(&g_bio_run_up); 446 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 447 g_bio_run_task.bio_queue_length++; 448 wakeup(&g_wait_up); 449 g_bioq_unlock(&g_bio_run_up); 450 } 451 452 453 void 454 g_io_schedule_up(struct thread *tp __unused) 455 { 456 struct bio *bp; 457 struct mtx mymutex; 458 459 bzero(&mymutex, sizeof mymutex); 460 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 461 for(;;) { 462 g_bioq_lock(&g_bio_run_up); 463 bp = g_bioq_first(&g_bio_run_task); 464 if (bp != NULL) { 465 g_bioq_unlock(&g_bio_run_up); 466 mtx_lock(&mymutex); 467 bp->bio_task(bp, bp->bio_task_arg); 468 mtx_unlock(&mymutex); 469 continue; 470 } 471 bp = g_bioq_first(&g_bio_run_up); 472 if (bp != NULL) { 473 g_bioq_unlock(&g_bio_run_up); 474 mtx_lock(&mymutex); 475 biodone(bp); 476 mtx_unlock(&mymutex); 477 continue; 478 } 479 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 480 PRIBIO | PDROP, "g_up", hz/10); 481 } 482 } 483 484 void * 485 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 486 { 487 struct bio *bp; 488 void *ptr; 489 int errorc; 490 491 bp = g_new_bio(); 492 bp->bio_cmd = BIO_READ; 493 bp->bio_done = NULL; 494 bp->bio_offset = offset; 495 bp->bio_length = length; 496 ptr = g_malloc(length, M_WAITOK); 497 bp->bio_data = ptr; 498 g_io_request(bp, cp); 499 errorc = biowait(bp, "gread"); 500 if (error != NULL) 501 *error = errorc; 502 g_destroy_bio(bp); 503 if (errorc) { 504 g_free(ptr); 505 ptr = NULL; 506 } 507 return (ptr); 508 } 509 510 int 511 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 512 { 513 struct bio *bp; 514 int error; 515 516 bp = g_new_bio(); 517 bp->bio_cmd = BIO_WRITE; 518 bp->bio_done = NULL; 519 bp->bio_offset = offset; 520 bp->bio_length = length; 521 bp->bio_data = ptr; 522 g_io_request(bp, cp); 523 error = biowait(bp, "gwrite"); 524 g_destroy_bio(bp); 525 return (error); 526 } 527