1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1998,2001 by Sun Microsystems, Inc. 24 * All rights reserved. 25 * 26 */ 27 28 #include <sys/types.h> 29 #include <sys/cmn_err.h> 30 #include <sys/kmem.h> 31 #include <sys/systm.h> 32 #include <sys/debug.h> 33 #include <sys/ddi.h> 34 35 #include <sys/fdbuffer.h> 36 37 #ifdef DEBUG 38 static int fdb_debug; 39 #define FDB_D_CREATE 001 40 #define FDB_D_ALLOC 002 41 #define FDB_D_IO 004 42 #define FDB_D_ASYNC 010 43 #define DEBUGF(lvl, args) { if ((lvl) & fdb_debug) cmn_err args; } 44 #else 45 #define DEBUGF(level, args) 46 #endif 47 static struct kmem_cache *fdb_cache; 48 static void fdb_zero_holes(fdbuffer_t *fdb); 49 50 /* ARGSUSED */ 51 static int 52 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags) 53 { 54 fdbuffer_t *fdb = buf; 55 56 mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL); 57 58 return (0); 59 } 60 61 /* ARGSUSED */ 62 static void 63 fdb_cache_destructor(void *buf, void *cdrarg) 64 { 65 fdbuffer_t *fdb = buf; 66 67 mutex_destroy(&fdb->fd_mutex); 68 } 69 70 void 71 fdb_init() 72 { 73 fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t), 74 0, fdb_cache_constructor, fdb_cache_destructor, 75 NULL, NULL, NULL, 0); 76 } 77 78 static void 79 fdb_prepare(fdbuffer_t *fdb) 80 { 81 fdb->fd_holes = NULL; 82 fdb->fd_iofunc = NULL; 83 fdb->fd_iargp = NULL; 84 fdb->fd_parentbp = NULL; 85 fdb->fd_resid = 0; 86 fdb->fd_iocount = 0; 87 fdb->fd_iodispatch = 0; 88 fdb->fd_err = 0; 89 } 90 91 fdbuffer_t * 92 fdb_page_create(page_t *pp, size_t len, int flags) 93 { 94 fdbuffer_t *fdb; 95 96 DEBUGF(FDB_D_CREATE, (CE_NOTE, 97 "?fdb_page_create: pp: %p len: %lux flags: %x", 98 (void *)pp, len, flags)); 99 100 ASSERT(flags & (FDB_READ|FDB_WRITE)); 101 102 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP); 103 104 fdb_prepare(fdb); 105 106 fdb->fd_type = FDB_PAGEIO; 107 fdb->fd_len = len; 108 fdb->fd_state = flags; 109 fdb->fd_pages = pp; 110 111 return (fdb); 112 } 113 114 fdbuffer_t * 115 fdb_addr_create( 116 caddr_t addr, 117 size_t len, 118 int flags, 119 page_t **pplist, 120 struct proc *procp) 121 { 122 fdbuffer_t *fdb; 123 124 DEBUGF(FDB_D_CREATE, (CE_NOTE, 125 "?fdb_addr_create: addr: %p len: %lux flags: %x", 126 (void *)addr, len, flags)); 127 128 ASSERT(flags & (FDB_READ|FDB_WRITE)); 129 130 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP); 131 132 fdb_prepare(fdb); 133 134 fdb->fd_type = FDB_VADDR; 135 fdb->fd_len = len; 136 fdb->fd_state = flags; 137 fdb->fd_addr = addr; 138 fdb->fd_shadow = pplist; 139 fdb->fd_procp = procp; 140 141 return (fdb); 142 } 143 144 void 145 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag) 146 { 147 ASSERT(fdb); 148 ASSERT(iofunc); 149 ASSERT((flag & ~FDB_ICALLBACK) == 0); 150 151 fdb->fd_iofunc = iofunc; 152 fdb->fd_iargp = ioargp; 153 154 mutex_enter(&fdb->fd_mutex); 155 156 if (flag & FDB_ICALLBACK) 157 fdb->fd_state |= FDB_ICALLBACK; 158 159 fdb->fd_state |= FDB_ASYNC; 160 161 mutex_exit(&fdb->fd_mutex); 162 } 163 164 int 165 fdb_get_error(fdbuffer_t *fdb) 166 { 167 return (fdb->fd_err); 168 } 169 170 void 171 fdb_free(fdbuffer_t *fdb) 172 { 173 fdb_holes_t *fdh, *fdhp; 174 175 DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x", 176 (void *)fdb, fdb->fd_state)); 177 178 ASSERT(fdb); 179 ASSERT(fdb->fd_iodispatch == 0); 180 181 if (fdb->fd_state & FDB_ZEROHOLE) { 182 fdb_zero_holes(fdb); 183 } 184 185 for (fdh = fdb->fd_holes; fdh; ) { 186 fdhp = fdh; 187 fdh = fdh->next_hole; 188 kmem_free(fdhp, sizeof (fdb_holes_t)); 189 } 190 191 if (fdb->fd_parentbp != NULL) { 192 switch (fdb->fd_type) { 193 case FDB_PAGEIO: 194 pageio_done(fdb->fd_parentbp); 195 break; 196 case FDB_VADDR: 197 kmem_free(fdb->fd_parentbp, sizeof (struct buf)); 198 break; 199 default: 200 cmn_err(CE_CONT, "?fdb_free: Unknown fdb type."); 201 break; 202 } 203 } 204 205 kmem_cache_free(fdb_cache, fdb); 206 207 } 208 209 /* 210 * The offset should be from the begining of the buffer 211 * it has nothing to do with file offset. This fact should be 212 * reflected in the caller of this routine. 213 */ 214 215 void 216 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len) 217 { 218 fdb_holes_t *this_hole; 219 220 ASSERT(fdb); 221 ASSERT(off < fdb->fd_len); 222 223 DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx", 224 off, len)); 225 226 this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP); 227 this_hole->off = off; 228 this_hole->len = len; 229 230 if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) { 231 this_hole->next_hole = fdb->fd_holes; 232 fdb->fd_holes = this_hole; 233 } else { 234 fdb_holes_t *fdhp = fdb->fd_holes; 235 236 while (fdhp->next_hole && off > fdhp->next_hole->off) 237 fdhp = fdhp->next_hole; 238 239 this_hole->next_hole = fdhp->next_hole; 240 fdhp->next_hole = this_hole; 241 } 242 243 mutex_enter(&fdb->fd_mutex); 244 245 fdb->fd_iocount += len; 246 247 mutex_exit(&fdb->fd_mutex); 248 } 249 250 fdb_holes_t * 251 fdb_get_holes(fdbuffer_t *fdb) 252 { 253 ASSERT(fdb); 254 255 if (fdb->fd_state & FDB_ZEROHOLE) { 256 fdb_zero_holes(fdb); 257 } 258 259 return (fdb->fd_holes); 260 } 261 262 /* 263 * Note that offsets refer to offsets from the begining of the buffer 264 * and as such the memory should be cleared accordingly. 265 */ 266 267 static void 268 fdb_zero_holes(fdbuffer_t *fdb) 269 { 270 fdb_holes_t *fdh = fdb->fd_holes; 271 page_t *pp; 272 273 ASSERT(fdb); 274 275 if (!fdh) 276 return; 277 278 switch (fdb->fd_type) { 279 case FDB_PAGEIO: 280 pp = fdb->fd_pages; 281 while (fdh) { 282 fdb_holes_t *pfdh = fdh; 283 size_t l = fdh->len; 284 u_offset_t o = fdh->off; 285 ASSERT(pp); 286 287 do { 288 int zerolen; 289 ASSERT(o >= pp->p_offset); 290 291 /* 292 * This offset is wrong since 293 * the offset passed from the pages 294 * perspective starts at some virtual 295 * address but the hole is relative 296 * to the beginning of the fdbuffer. 297 */ 298 if (o >= pp->p_offset + PAGESIZE) 299 continue; 300 301 zerolen = min(PAGESIZE, l); 302 303 ASSERT(zerolen > 0); 304 ASSERT(zerolen <= PAGESIZE); 305 306 pagezero(pp, ((uintptr_t)o & PAGEOFFSET), 307 zerolen); 308 309 l -= zerolen; 310 o += zerolen; 311 312 if (l == 0) 313 break; 314 315 } while (pp = page_list_next(pp)); 316 317 if (!pp) 318 break; 319 320 fdh = fdh->next_hole; 321 kmem_free(pfdh, sizeof (fdb_holes_t)); 322 } 323 break; 324 case FDB_VADDR: 325 while (fdh) { 326 fdb_holes_t *pfdh = fdh; 327 328 bzero(fdb->fd_addr + fdh->off, fdh->len); 329 330 fdh = fdh->next_hole; 331 kmem_free(pfdh, sizeof (fdb_holes_t)); 332 } 333 break; 334 default: 335 panic("fdb_zero_holes: Unknown fdb type."); 336 break; 337 } 338 } 339 340 341 buf_t * 342 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp, 343 int b_flags) 344 { 345 buf_t *bp; 346 347 DEBUGF(FDB_D_IO, (CE_NOTE, 348 "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x", 349 off, len, fdb->fd_len, fdb->fd_state)); 350 351 ASSERT(fdb); 352 353 mutex_enter(&fdb->fd_mutex); 354 355 ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) || 356 ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE))); 357 /* 358 * The fdb can be used either in sync or async mode, if the 359 * buffer has not been used it may be used in either mode, but 360 * once you have started to use the buf in either mode all 361 * subsequent i/o requests must take place the same way. 362 */ 363 364 ASSERT(((b_flags & B_ASYNC) && 365 ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) || 366 (!(b_flags & B_ASYNC) && 367 ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC)))); 368 369 370 fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC; 371 372 fdb->fd_iodispatch++; 373 374 ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) || 375 fdb->fd_state & FDB_SYNC); 376 377 mutex_exit(&fdb->fd_mutex); 378 379 ASSERT((len & (DEV_BSIZE - 1)) == 0); 380 ASSERT(off+len <= fdb->fd_len); 381 382 switch (fdb->fd_type) { 383 case FDB_PAGEIO: 384 if (fdb->fd_parentbp == NULL) { 385 bp = pageio_setup(fdb->fd_pages, len, vp, b_flags); 386 fdb->fd_parentbp = bp; 387 } 388 break; 389 case FDB_VADDR: 390 if (fdb->fd_parentbp == NULL) { 391 392 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 393 bioinit(bp); 394 bp->b_error = 0; 395 bp->b_proc = fdb->fd_procp; 396 bp->b_flags = b_flags | B_BUSY | B_PHYS; 397 bp->b_bcount = len; 398 bp->b_un.b_addr = fdb->fd_addr; 399 bp->b_shadow = fdb->fd_shadow; 400 if (fdb->fd_shadow != NULL) 401 bp->b_flags |= B_SHADOW; 402 fdb->fd_parentbp = bp; 403 } 404 break; 405 default: 406 panic("fdb_iosetup: Unsupported fdb type."); 407 break; 408 }; 409 410 bp = bioclone(fdb->fd_parentbp, off, len, 0, 0, 411 (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL, 412 NULL, KM_SLEEP); 413 414 bp->b_forw = (struct buf *)fdb; 415 416 if (b_flags & B_ASYNC) 417 bp->b_flags |= B_ASYNC; 418 419 return (bp); 420 } 421 422 size_t 423 fdb_get_iolen(fdbuffer_t *fdb) 424 { 425 ASSERT(fdb); 426 ASSERT(fdb->fd_iodispatch == 0); 427 428 return (fdb->fd_iocount - fdb->fd_resid); 429 } 430 431 void 432 fdb_ioerrdone(fdbuffer_t *fdb, int error) 433 { 434 ASSERT(fdb); 435 ASSERT(fdb->fd_state & FDB_ASYNC); 436 437 DEBUGF(FDB_D_IO, (CE_NOTE, 438 "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d", 439 fdb->fd_len, fdb->fd_state, error)); 440 441 mutex_enter(&fdb->fd_mutex); 442 443 fdb->fd_err = error; 444 445 if (error) 446 fdb->fd_state |= FDB_ERROR; 447 else 448 fdb->fd_state |= FDB_DONE; 449 450 /* 451 * If there is outstanding i/o return wainting for i/o's to complete. 452 */ 453 if (fdb->fd_iodispatch > 0) { 454 mutex_exit(&fdb->fd_mutex); 455 return; 456 } 457 458 mutex_exit(&fdb->fd_mutex); 459 fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL); 460 } 461 462 void 463 fdb_iodone(buf_t *bp) 464 { 465 fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw; 466 int error, isasync; 467 int icallback; 468 469 ASSERT(fdb); 470 471 DEBUGF(FDB_D_IO, (CE_NOTE, 472 "?fdb_iodone: fdb: len: %lux flags: %x error: %d", 473 fdb->fd_len, fdb->fd_state, geterror(bp))); 474 475 if (bp->b_flags & B_REMAPPED) 476 bp_mapout(bp); 477 478 mutex_enter(&fdb->fd_mutex); 479 480 icallback = fdb->fd_state & FDB_ICALLBACK; 481 isasync = fdb->fd_state & FDB_ASYNC; 482 483 ASSERT(fdb->fd_iodispatch > 0); 484 fdb->fd_iodispatch--; 485 486 if (error = geterror(bp)) { 487 fdb->fd_err = error; 488 if (bp->b_resid) 489 fdb->fd_resid += bp->b_resid; 490 else 491 fdb->fd_resid += bp->b_bcount; 492 } 493 494 fdb->fd_iocount += bp->b_bcount; 495 496 /* 497 * ioack collects the total amount of i/o accounted for 498 * this includes: 499 * 500 * - i/o completed 501 * - i/o attempted but not completed, 502 * - i/o not done due to holes. 503 * 504 * Once the entire i/o ranges has been accounted for we'll 505 * call the async function associated with the fdb. 506 * 507 */ 508 509 if ((fdb->fd_iodispatch == 0) && 510 (fdb->fd_state & (FDB_ERROR|FDB_DONE))) { 511 512 mutex_exit(&fdb->fd_mutex); 513 514 if (isasync || icallback) { 515 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp); 516 } 517 518 } else { 519 520 mutex_exit(&fdb->fd_mutex); 521 522 if (icallback) { 523 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp); 524 } 525 } 526 527 freerbuf(bp); 528 } 529