1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1998,2001 by Sun Microsystems, Inc. 24 * All rights reserved. 25 * 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #include <sys/systm.h> 34 #include <sys/debug.h> 35 #include <sys/ddi.h> 36 37 #include <sys/fdbuffer.h> 38 39 #ifdef DEBUG 40 static int fdb_debug; 41 #define FDB_D_CREATE 001 42 #define FDB_D_ALLOC 002 43 #define FDB_D_IO 004 44 #define FDB_D_ASYNC 010 45 #define DEBUGF(lvl, args) { if ((lvl) & fdb_debug) cmn_err args; } 46 #else 47 #define DEBUGF(level, args) 48 #endif 49 static struct kmem_cache *fdb_cache; 50 static void fdb_zero_holes(fdbuffer_t *fdb); 51 52 /* ARGSUSED */ 53 static int 54 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags) 55 { 56 fdbuffer_t *fdb = buf; 57 58 mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL); 59 60 return (0); 61 } 62 63 /* ARGSUSED */ 64 static void 65 fdb_cache_destructor(void *buf, void *cdrarg) 66 { 67 fdbuffer_t *fdb = buf; 68 69 mutex_destroy(&fdb->fd_mutex); 70 } 71 72 void 73 fdb_init() 74 { 75 fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t), 76 0, fdb_cache_constructor, fdb_cache_destructor, 77 NULL, NULL, NULL, 0); 78 } 79 80 static void 81 fdb_prepare(fdbuffer_t *fdb) 82 { 83 fdb->fd_holes = NULL; 84 fdb->fd_iofunc = NULL; 85 fdb->fd_iargp = NULL; 86 fdb->fd_parentbp = NULL; 87 fdb->fd_resid = 0; 88 fdb->fd_iocount = 0; 89 fdb->fd_iodispatch = 0; 90 fdb->fd_err = 0; 91 } 92 93 fdbuffer_t * 94 fdb_page_create(page_t *pp, size_t len, int flags) 95 { 96 fdbuffer_t *fdb; 97 98 DEBUGF(FDB_D_CREATE, (CE_NOTE, 99 "?fdb_page_create: pp: %p len: %lux flags: %x", 100 (void *)pp, len, flags)); 101 102 ASSERT(flags & (FDB_READ|FDB_WRITE)); 103 104 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP); 105 106 fdb_prepare(fdb); 107 108 fdb->fd_type = FDB_PAGEIO; 109 fdb->fd_len = len; 110 fdb->fd_state = flags; 111 fdb->fd_pages = pp; 112 113 return (fdb); 114 } 115 116 fdbuffer_t * 117 fdb_addr_create( 118 caddr_t addr, 119 size_t len, 120 int flags, 121 page_t **pplist, 122 struct proc *procp) 123 { 124 fdbuffer_t *fdb; 125 126 DEBUGF(FDB_D_CREATE, (CE_NOTE, 127 "?fdb_addr_create: addr: %p len: %lux flags: %x", 128 (void *)addr, len, flags)); 129 130 ASSERT(flags & (FDB_READ|FDB_WRITE)); 131 132 fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP); 133 134 fdb_prepare(fdb); 135 136 fdb->fd_type = FDB_VADDR; 137 fdb->fd_len = len; 138 fdb->fd_state = flags; 139 fdb->fd_addr = addr; 140 fdb->fd_shadow = pplist; 141 fdb->fd_procp = procp; 142 143 return (fdb); 144 } 145 146 void 147 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag) 148 { 149 ASSERT(fdb); 150 ASSERT(iofunc); 151 ASSERT((flag & ~FDB_ICALLBACK) == 0); 152 153 fdb->fd_iofunc = iofunc; 154 fdb->fd_iargp = ioargp; 155 156 mutex_enter(&fdb->fd_mutex); 157 158 if (flag & FDB_ICALLBACK) 159 fdb->fd_state |= FDB_ICALLBACK; 160 161 fdb->fd_state |= FDB_ASYNC; 162 163 mutex_exit(&fdb->fd_mutex); 164 } 165 166 int 167 fdb_get_error(fdbuffer_t *fdb) 168 { 169 return (fdb->fd_err); 170 } 171 172 void 173 fdb_free(fdbuffer_t *fdb) 174 { 175 fdb_holes_t *fdh, *fdhp; 176 177 DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x", 178 (void *)fdb, fdb->fd_state)); 179 180 ASSERT(fdb); 181 ASSERT(fdb->fd_iodispatch == 0); 182 183 if (fdb->fd_state & FDB_ZEROHOLE) { 184 fdb_zero_holes(fdb); 185 } 186 187 for (fdh = fdb->fd_holes; fdh; ) { 188 fdhp = fdh; 189 fdh = fdh->next_hole; 190 kmem_free(fdhp, sizeof (fdb_holes_t)); 191 } 192 193 if (fdb->fd_parentbp != NULL) { 194 switch (fdb->fd_type) { 195 case FDB_PAGEIO: 196 pageio_done(fdb->fd_parentbp); 197 break; 198 case FDB_VADDR: 199 kmem_free(fdb->fd_parentbp, sizeof (struct buf)); 200 break; 201 default: 202 cmn_err(CE_CONT, "?fdb_free: Unknown fdb type."); 203 break; 204 } 205 } 206 207 kmem_cache_free(fdb_cache, fdb); 208 209 } 210 211 /* 212 * The offset should be from the begining of the buffer 213 * it has nothing to do with file offset. This fact should be 214 * reflected in the caller of this routine. 215 */ 216 217 void 218 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len) 219 { 220 fdb_holes_t *this_hole; 221 222 ASSERT(fdb); 223 ASSERT(off < fdb->fd_len); 224 225 DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx", 226 off, len)); 227 228 this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP); 229 this_hole->off = off; 230 this_hole->len = len; 231 232 if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) { 233 this_hole->next_hole = fdb->fd_holes; 234 fdb->fd_holes = this_hole; 235 } else { 236 fdb_holes_t *fdhp = fdb->fd_holes; 237 238 while (fdhp->next_hole && off > fdhp->next_hole->off) 239 fdhp = fdhp->next_hole; 240 241 this_hole->next_hole = fdhp->next_hole; 242 fdhp->next_hole = this_hole; 243 } 244 245 mutex_enter(&fdb->fd_mutex); 246 247 fdb->fd_iocount += len; 248 249 mutex_exit(&fdb->fd_mutex); 250 } 251 252 fdb_holes_t * 253 fdb_get_holes(fdbuffer_t *fdb) 254 { 255 ASSERT(fdb); 256 257 if (fdb->fd_state & FDB_ZEROHOLE) { 258 fdb_zero_holes(fdb); 259 } 260 261 return (fdb->fd_holes); 262 } 263 264 /* 265 * Note that offsets refer to offsets from the begining of the buffer 266 * and as such the memory should be cleared accordingly. 267 */ 268 269 static void 270 fdb_zero_holes(fdbuffer_t *fdb) 271 { 272 fdb_holes_t *fdh = fdb->fd_holes; 273 page_t *pp; 274 275 ASSERT(fdb); 276 277 if (!fdh) 278 return; 279 280 switch (fdb->fd_type) { 281 case FDB_PAGEIO: 282 pp = fdb->fd_pages; 283 while (fdh) { 284 fdb_holes_t *pfdh = fdh; 285 size_t l = fdh->len; 286 u_offset_t o = fdh->off; 287 ASSERT(pp); 288 289 do { 290 int zerolen; 291 ASSERT(o >= pp->p_offset); 292 293 /* 294 * This offset is wrong since 295 * the offset passed from the pages 296 * perspective starts at some virtual 297 * address but the hole is relative 298 * to the beginning of the fdbuffer. 299 */ 300 if (o >= pp->p_offset + PAGESIZE) 301 continue; 302 303 zerolen = min(PAGESIZE, l); 304 305 ASSERT(zerolen > 0); 306 ASSERT(zerolen <= PAGESIZE); 307 308 pagezero(pp, ((uintptr_t)o & PAGEOFFSET), 309 zerolen); 310 311 l -= zerolen; 312 o += zerolen; 313 314 if (l == 0) 315 break; 316 317 } while (pp = page_list_next(pp)); 318 319 if (!pp) 320 break; 321 322 fdh = fdh->next_hole; 323 kmem_free(pfdh, sizeof (fdb_holes_t)); 324 } 325 break; 326 case FDB_VADDR: 327 while (fdh) { 328 fdb_holes_t *pfdh = fdh; 329 330 bzero(fdb->fd_addr + fdh->off, fdh->len); 331 332 fdh = fdh->next_hole; 333 kmem_free(pfdh, sizeof (fdb_holes_t)); 334 } 335 default: 336 panic("fdb_zero_holes: Unknown fdb type."); 337 break; 338 } 339 } 340 341 342 buf_t * 343 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp, 344 int b_flags) 345 { 346 buf_t *bp; 347 348 DEBUGF(FDB_D_IO, (CE_NOTE, 349 "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x", 350 off, len, fdb->fd_len, fdb->fd_state)); 351 352 ASSERT(fdb); 353 354 mutex_enter(&fdb->fd_mutex); 355 356 ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) || 357 ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE))); 358 /* 359 * The fdb can be used either in sync or async mode, if the 360 * buffer has not been used it may be used in either mode, but 361 * once you have started to use the buf in either mode all 362 * subsequent i/o requests must take place the same way. 363 */ 364 365 ASSERT(((b_flags & B_ASYNC) && 366 ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) || 367 (!(b_flags & B_ASYNC) && 368 ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC)))); 369 370 371 fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC; 372 373 fdb->fd_iodispatch++; 374 375 ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) || 376 fdb->fd_state & FDB_SYNC); 377 378 mutex_exit(&fdb->fd_mutex); 379 380 ASSERT((len & (DEV_BSIZE - 1)) == 0); 381 ASSERT(off+len <= fdb->fd_len); 382 383 switch (fdb->fd_type) { 384 case FDB_PAGEIO: 385 if (fdb->fd_parentbp == NULL) { 386 bp = pageio_setup(fdb->fd_pages, len, vp, b_flags); 387 fdb->fd_parentbp = bp; 388 } 389 break; 390 case FDB_VADDR: 391 if (fdb->fd_parentbp == NULL) { 392 393 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 394 bioinit(bp); 395 bp->b_error = 0; 396 bp->b_proc = fdb->fd_procp; 397 bp->b_flags = b_flags | B_BUSY | B_PHYS; 398 bp->b_bcount = len; 399 bp->b_un.b_addr = fdb->fd_addr; 400 bp->b_shadow = fdb->fd_shadow; 401 if (fdb->fd_shadow != NULL) 402 bp->b_flags |= B_SHADOW; 403 fdb->fd_parentbp = bp; 404 } 405 break; 406 default: 407 panic("fdb_iosetup: Unsupported fdb type."); 408 break; 409 }; 410 411 bp = bioclone(fdb->fd_parentbp, off, len, 0, 0, 412 (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL, 413 NULL, KM_SLEEP); 414 415 bp->b_forw = (struct buf *)fdb; 416 417 if (b_flags & B_ASYNC) 418 bp->b_flags |= B_ASYNC; 419 420 return (bp); 421 } 422 423 size_t 424 fdb_get_iolen(fdbuffer_t *fdb) 425 { 426 ASSERT(fdb); 427 ASSERT(fdb->fd_iodispatch == 0); 428 429 return (fdb->fd_iocount - fdb->fd_resid); 430 } 431 432 void 433 fdb_ioerrdone(fdbuffer_t *fdb, int error) 434 { 435 ASSERT(fdb); 436 ASSERT(fdb->fd_state & FDB_ASYNC); 437 438 DEBUGF(FDB_D_IO, (CE_NOTE, 439 "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d", 440 fdb->fd_len, fdb->fd_state, error)); 441 442 mutex_enter(&fdb->fd_mutex); 443 444 fdb->fd_err = error; 445 446 if (error) 447 fdb->fd_state |= FDB_ERROR; 448 else 449 fdb->fd_state |= FDB_DONE; 450 451 /* 452 * If there is outstanding i/o return wainting for i/o's to complete. 453 */ 454 if (fdb->fd_iodispatch > 0) { 455 mutex_exit(&fdb->fd_mutex); 456 return; 457 } 458 459 mutex_exit(&fdb->fd_mutex); 460 fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL); 461 } 462 463 void 464 fdb_iodone(buf_t *bp) 465 { 466 fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw; 467 int error, isasync; 468 int icallback; 469 470 ASSERT(fdb); 471 472 DEBUGF(FDB_D_IO, (CE_NOTE, 473 "?fdb_iodone: fdb: len: %lux flags: %x error: %d", 474 fdb->fd_len, fdb->fd_state, geterror(bp))); 475 476 if (bp->b_flags & B_REMAPPED) 477 bp_mapout(bp); 478 479 mutex_enter(&fdb->fd_mutex); 480 481 icallback = fdb->fd_state & FDB_ICALLBACK; 482 isasync = fdb->fd_state & FDB_ASYNC; 483 484 ASSERT(fdb->fd_iodispatch > 0); 485 fdb->fd_iodispatch--; 486 487 if (error = geterror(bp)) { 488 fdb->fd_err = error; 489 if (bp->b_resid) 490 fdb->fd_resid += bp->b_resid; 491 else 492 fdb->fd_resid += bp->b_bcount; 493 } 494 495 fdb->fd_iocount += bp->b_bcount; 496 497 /* 498 * ioack collects the total amount of i/o accounted for 499 * this includes: 500 * 501 * - i/o completed 502 * - i/o attempted but not completed, 503 * - i/o not done due to holes. 504 * 505 * Once the entire i/o ranges has been accounted for we'll 506 * call the async function associated with the fdb. 507 * 508 */ 509 510 if ((fdb->fd_iodispatch == 0) && 511 (fdb->fd_state & (FDB_ERROR|FDB_DONE))) { 512 513 mutex_exit(&fdb->fd_mutex); 514 515 if (isasync || icallback) { 516 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp); 517 } 518 519 } else { 520 521 mutex_exit(&fdb->fd_mutex); 522 523 if (icallback) { 524 fdb->fd_iofunc(fdb, fdb->fd_iargp, bp); 525 } 526 } 527 528 freerbuf(bp); 529 } 530