1 /*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD$ 36 */ 37 38 39 #include <sys/param.h> 40 #ifndef _KERNEL 41 #include <stdio.h> 42 #include <string.h> 43 #include <stdlib.h> 44 #include <signal.h> 45 #include <err.h> 46 #include <sched.h> 47 #else 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/malloc.h> 51 #include <sys/bio.h> 52 #endif 53 54 #include <sys/errno.h> 55 #include <geom/geom.h> 56 #include <geom/geom_int.h> 57 #include <sys/devicestat.h> 58 59 static struct g_bioq g_bio_run_down; 60 static struct g_bioq g_bio_run_up; 61 static struct g_bioq g_bio_run_task; 62 static struct g_bioq g_bio_idle; 63 64 static u_int pace; 65 66 #include <machine/atomic.h> 67 68 static void 69 g_bioq_lock(struct g_bioq *bq) 70 { 71 72 mtx_lock(&bq->bio_queue_lock); 73 } 74 75 static void 76 g_bioq_unlock(struct g_bioq *bq) 77 { 78 79 mtx_unlock(&bq->bio_queue_lock); 80 } 81 82 #if 0 83 static void 84 g_bioq_destroy(struct g_bioq *bq) 85 { 86 87 mtx_destroy(&bq->bio_queue_lock); 88 } 89 #endif 90 91 static void 92 g_bioq_init(struct g_bioq *bq) 93 { 94 95 TAILQ_INIT(&bq->bio_queue); 96 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97 } 98 99 static struct bio * 100 g_bioq_first(struct g_bioq *bq) 101 { 102 struct bio *bp; 103 104 bp = TAILQ_FIRST(&bq->bio_queue); 105 if (bp != NULL) { 106 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 107 bq->bio_queue_length--; 108 } 109 return (bp); 110 } 111 112 static void 113 g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 114 { 115 116 g_bioq_lock(rq); 117 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 118 rq->bio_queue_length++; 119 g_bioq_unlock(rq); 120 } 121 122 struct bio * 123 g_new_bio(void) 124 { 125 struct bio *bp; 126 127 g_bioq_lock(&g_bio_idle); 128 bp = g_bioq_first(&g_bio_idle); 129 g_bioq_unlock(&g_bio_idle); 130 if (bp == NULL) 131 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133 return (bp); 134 } 135 136 void 137 g_destroy_bio(struct bio *bp) 138 { 139 140 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141 bzero(bp, sizeof *bp); 142 g_bioq_enqueue_tail(bp, &g_bio_idle); 143 } 144 145 struct bio * 146 g_clone_bio(struct bio *bp) 147 { 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162 } 163 164 void 165 g_io_init() 166 { 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_run_task); 171 g_bioq_init(&g_bio_idle); 172 } 173 174 int 175 g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 176 { 177 struct bio *bp; 178 int error; 179 180 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 181 bp = g_new_bio(); 182 bp->bio_cmd = BIO_SETATTR; 183 bp->bio_done = NULL; 184 bp->bio_attribute = attr; 185 bp->bio_length = len; 186 bp->bio_data = ptr; 187 g_io_request(bp, cp); 188 error = biowait(bp, "gsetattr"); 189 g_destroy_bio(bp); 190 return (error); 191 } 192 193 194 int 195 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 196 { 197 struct bio *bp; 198 int error; 199 200 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 201 bp = g_new_bio(); 202 bp->bio_cmd = BIO_GETATTR; 203 bp->bio_done = NULL; 204 bp->bio_attribute = attr; 205 bp->bio_length = *len; 206 bp->bio_data = ptr; 207 g_io_request(bp, cp); 208 error = biowait(bp, "ggetattr"); 209 *len = bp->bio_completed; 210 g_destroy_bio(bp); 211 return (error); 212 } 213 214 static int 215 g_io_check(struct bio *bp) 216 { 217 struct g_consumer *cp; 218 struct g_provider *pp; 219 220 cp = bp->bio_from; 221 pp = bp->bio_to; 222 223 /* Fail if access counters dont allow the operation */ 224 switch(bp->bio_cmd) { 225 case BIO_READ: 226 case BIO_GETATTR: 227 if (cp->acr == 0) 228 return (EPERM); 229 break; 230 case BIO_WRITE: 231 case BIO_DELETE: 232 case BIO_SETATTR: 233 if (cp->acw == 0) 234 return (EPERM); 235 break; 236 default: 237 return (EPERM); 238 } 239 /* if provider is marked for error, don't disturb. */ 240 if (pp->error) 241 return (pp->error); 242 243 switch(bp->bio_cmd) { 244 case BIO_READ: 245 case BIO_WRITE: 246 case BIO_DELETE: 247 /* Reject I/O not on sector boundary */ 248 if (bp->bio_offset % pp->sectorsize) 249 return (EINVAL); 250 /* Reject I/O not integral sector long */ 251 if (bp->bio_length % pp->sectorsize) 252 return (EINVAL); 253 /* Reject requests past the end of media. */ 254 if (bp->bio_offset > pp->mediasize) 255 return (EIO); 256 break; 257 default: 258 break; 259 } 260 return (0); 261 } 262 263 void 264 g_io_request(struct bio *bp, struct g_consumer *cp) 265 { 266 struct g_provider *pp; 267 268 pp = cp->provider; 269 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 270 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 271 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 272 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 273 274 bp->bio_from = cp; 275 bp->bio_to = pp; 276 bp->bio_error = 0; 277 bp->bio_completed = 0; 278 279 if (g_collectstats) { 280 devstat_start_transaction_bio(cp->stat, bp); 281 devstat_start_transaction_bio(pp->stat, bp); 282 } 283 cp->nstart++; 284 pp->nstart++; 285 286 /* Pass it on down. */ 287 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 288 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 289 g_bioq_enqueue_tail(bp, &g_bio_run_down); 290 wakeup(&g_wait_down); 291 } 292 293 void 294 g_io_deliver(struct bio *bp, int error) 295 { 296 struct g_consumer *cp; 297 struct g_provider *pp; 298 299 cp = bp->bio_from; 300 pp = bp->bio_to; 301 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 302 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 303 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 304 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 305 306 g_trace(G_T_BIO, 307 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 308 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 309 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 310 311 bp->bio_bcount = bp->bio_length; 312 if (g_collectstats) { 313 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 314 devstat_end_transaction_bio(cp->stat, bp); 315 devstat_end_transaction_bio(pp->stat, bp); 316 } 317 cp->nend++; 318 pp->nend++; 319 320 if (error == ENOMEM) { 321 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 322 g_io_request(bp, cp); 323 pace++; 324 return; 325 } 326 bp->bio_error = error; 327 g_bioq_enqueue_tail(bp, &g_bio_run_up); 328 wakeup(&g_wait_up); 329 } 330 331 void 332 g_io_schedule_down(struct thread *tp __unused) 333 { 334 struct bio *bp; 335 off_t excess; 336 int error; 337 struct mtx mymutex; 338 339 bzero(&mymutex, sizeof mymutex); 340 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 341 342 for(;;) { 343 g_bioq_lock(&g_bio_run_down); 344 bp = g_bioq_first(&g_bio_run_down); 345 if (bp == NULL) { 346 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 347 PRIBIO | PDROP, "g_down", hz/10); 348 continue; 349 } 350 g_bioq_unlock(&g_bio_run_down); 351 error = g_io_check(bp); 352 if (error) { 353 g_io_deliver(bp, error); 354 continue; 355 } 356 switch (bp->bio_cmd) { 357 case BIO_READ: 358 case BIO_WRITE: 359 case BIO_DELETE: 360 /* Truncate requests to the end of providers media. */ 361 excess = bp->bio_offset + bp->bio_length; 362 if (excess > bp->bio_to->mediasize) { 363 excess -= bp->bio_to->mediasize; 364 bp->bio_length -= excess; 365 } 366 /* Deliver zero length transfers right here. */ 367 if (bp->bio_length == 0) { 368 g_io_deliver(bp, 0); 369 continue; 370 } 371 break; 372 default: 373 break; 374 } 375 mtx_lock(&mymutex); 376 bp->bio_to->geom->start(bp); 377 mtx_unlock(&mymutex); 378 if (pace) { 379 pace--; 380 break; 381 } 382 } 383 } 384 385 void 386 bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 387 { 388 bp->bio_task = func; 389 bp->bio_task_arg = arg; 390 /* 391 * The taskqueue is actually just a second queue off the "up" 392 * queue, so we use the same lock. 393 */ 394 g_bioq_lock(&g_bio_run_up); 395 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 396 g_bio_run_task.bio_queue_length++; 397 wakeup(&g_wait_up); 398 g_bioq_unlock(&g_bio_run_up); 399 } 400 401 402 void 403 g_io_schedule_up(struct thread *tp __unused) 404 { 405 struct bio *bp; 406 struct mtx mymutex; 407 408 bzero(&mymutex, sizeof mymutex); 409 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 410 for(;;) { 411 g_bioq_lock(&g_bio_run_up); 412 bp = g_bioq_first(&g_bio_run_task); 413 if (bp != NULL) { 414 g_bioq_unlock(&g_bio_run_up); 415 mtx_lock(&mymutex); 416 bp->bio_task(bp, bp->bio_task_arg); 417 mtx_unlock(&mymutex); 418 continue; 419 } 420 bp = g_bioq_first(&g_bio_run_up); 421 if (bp != NULL) { 422 g_bioq_unlock(&g_bio_run_up); 423 mtx_lock(&mymutex); 424 biodone(bp); 425 mtx_unlock(&mymutex); 426 continue; 427 } 428 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 429 PRIBIO | PDROP, "g_up", hz/10); 430 } 431 } 432 433 void * 434 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 435 { 436 struct bio *bp; 437 void *ptr; 438 int errorc; 439 440 bp = g_new_bio(); 441 bp->bio_cmd = BIO_READ; 442 bp->bio_done = NULL; 443 bp->bio_offset = offset; 444 bp->bio_length = length; 445 ptr = g_malloc(length, M_WAITOK); 446 bp->bio_data = ptr; 447 g_io_request(bp, cp); 448 errorc = biowait(bp, "gread"); 449 if (error != NULL) 450 *error = errorc; 451 g_destroy_bio(bp); 452 if (errorc) { 453 g_free(ptr); 454 ptr = NULL; 455 } 456 return (ptr); 457 } 458 459 int 460 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 461 { 462 struct bio *bp; 463 int error; 464 465 bp = g_new_bio(); 466 bp->bio_cmd = BIO_WRITE; 467 bp->bio_done = NULL; 468 bp->bio_offset = offset; 469 bp->bio_length = length; 470 bp->bio_data = ptr; 471 g_io_request(bp, cp); 472 error = biowait(bp, "gwrite"); 473 g_destroy_bio(bp); 474 return (error); 475 } 476