1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.29 1995/02/22 09:16:07 davidg Exp $ 22 */ 23 24 /* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35 #define VMIO 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/vnode.h> 41 #include <vm/vm.h> 42 #include <vm/vm_pageout.h> 43 #include <vm/vm_page.h> 44 #include <vm/vm_object.h> 45 #include <sys/buf.h> 46 #include <sys/mount.h> 47 #include <sys/malloc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/proc.h> 50 51 #include <miscfs/specfs/specdev.h> 52 53 struct buf *buf; /* buffer header pool */ 54 int nbuf; /* number of buffer headers calculated 55 * elsewhere */ 56 struct swqueue bswlist; 57 int nvmio, nlru; 58 59 extern vm_map_t buffer_map, io_map, kernel_map, pager_map; 60 61 void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 62 void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 63 void vfs_dirty_pages(struct buf * bp); 64 void vfs_busy_pages(struct buf *, int clear_modify); 65 66 int needsbuffer; 67 68 /* 69 * Internal update daemon, process 3 70 * The variable vfs_update_wakeup allows for internal syncs. 71 */ 72 int vfs_update_wakeup; 73 74 75 /* 76 * buffers base kva 77 */ 78 caddr_t buffers_kva; 79 80 /* 81 * bogus page -- for I/O to/from partially complete buffers 82 * this is a temporary solution to the problem, but it is not 83 * really that bad. it would be better to split the buffer 84 * for input in the case of buffers partially already in memory, 85 * but the code is intricate enough already. 86 */ 87 vm_page_t bogus_page; 88 vm_offset_t bogus_offset; 89 90 int bufspace, maxbufspace; 91 92 /* 93 * advisory minimum for size of LRU queue or VMIO queue 94 */ 95 int minbuf; 96 97 /* 98 * Initialize buffer headers and related structures. 99 */ 100 void 101 bufinit() 102 { 103 struct buf *bp; 104 int i; 105 106 TAILQ_INIT(&bswlist); 107 LIST_INIT(&invalhash); 108 109 /* first, make a null hash table */ 110 for (i = 0; i < BUFHSZ; i++) 111 LIST_INIT(&bufhashtbl[i]); 112 113 /* next, make a null set of free lists */ 114 for (i = 0; i < BUFFER_QUEUES; i++) 115 TAILQ_INIT(&bufqueues[i]); 116 117 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 118 /* finally, initialize each buffer header and stick on empty q */ 119 for (i = 0; i < nbuf; i++) { 120 bp = &buf[i]; 121 bzero(bp, sizeof *bp); 122 bp->b_flags = B_INVAL; /* we're just an empty header */ 123 bp->b_dev = NODEV; 124 bp->b_vp = NULL; 125 bp->b_rcred = NOCRED; 126 bp->b_wcred = NOCRED; 127 bp->b_qindex = QUEUE_EMPTY; 128 bp->b_vnbufs.le_next = NOLIST; 129 bp->b_data = buffers_kva + i * MAXBSIZE; 130 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 131 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 132 } 133 /* 134 * this will change later!!! 135 */ 136 minbuf = nbuf / 3; 137 maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE; 138 139 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 140 bogus_page = vm_page_alloc(kernel_object, 141 bogus_offset - VM_MIN_KERNEL_ADDRESS, VM_ALLOC_NORMAL); 142 143 } 144 145 /* 146 * remove the buffer from the appropriate free list 147 */ 148 void 149 bremfree(struct buf * bp) 150 { 151 int s = splbio(); 152 153 if (bp->b_qindex != QUEUE_NONE) { 154 if (bp->b_qindex == QUEUE_LRU) 155 --nlru; 156 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 157 bp->b_qindex = QUEUE_NONE; 158 } else { 159 panic("bremfree: removing a buffer when not on a queue"); 160 } 161 splx(s); 162 } 163 164 /* 165 * Get a buffer with the specified data. Look in the cache first. 166 */ 167 int 168 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 169 struct buf ** bpp) 170 { 171 struct buf *bp; 172 173 bp = getblk(vp, blkno, size, 0, 0); 174 *bpp = bp; 175 176 /* if not found in cache, do some I/O */ 177 if ((bp->b_flags & B_CACHE) == 0) { 178 if (curproc && curproc->p_stats) /* count block I/O */ 179 curproc->p_stats->p_ru.ru_inblock++; 180 bp->b_flags |= B_READ; 181 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 182 if (bp->b_rcred == NOCRED) { 183 if (cred != NOCRED) 184 crhold(cred); 185 bp->b_rcred = cred; 186 } 187 vfs_busy_pages(bp, 0); 188 VOP_STRATEGY(bp); 189 return (biowait(bp)); 190 } else if (bp->b_lblkno == bp->b_blkno) { 191 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 192 &bp->b_blkno, (int *) 0); 193 } 194 return (0); 195 } 196 197 /* 198 * Operates like bread, but also starts asynchronous I/O on 199 * read-ahead blocks. 200 */ 201 int 202 breadn(struct vnode * vp, daddr_t blkno, int size, 203 daddr_t * rablkno, int *rabsize, 204 int cnt, struct ucred * cred, struct buf ** bpp) 205 { 206 struct buf *bp, *rabp; 207 int i; 208 int rv = 0, readwait = 0; 209 210 *bpp = bp = getblk(vp, blkno, size, 0, 0); 211 212 /* if not found in cache, do some I/O */ 213 if ((bp->b_flags & B_CACHE) == 0) { 214 if (curproc && curproc->p_stats) /* count block I/O */ 215 curproc->p_stats->p_ru.ru_inblock++; 216 bp->b_flags |= B_READ; 217 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 218 if (bp->b_rcred == NOCRED) { 219 if (cred != NOCRED) 220 crhold(cred); 221 bp->b_rcred = cred; 222 } 223 vfs_busy_pages(bp, 0); 224 VOP_STRATEGY(bp); 225 ++readwait; 226 } else if (bp->b_lblkno == bp->b_blkno) { 227 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 228 &bp->b_blkno, (int *) 0); 229 } 230 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 231 if (inmem(vp, *rablkno)) 232 continue; 233 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 234 235 if ((rabp->b_flags & B_CACHE) == 0) { 236 if (curproc && curproc->p_stats) 237 curproc->p_stats->p_ru.ru_inblock++; 238 rabp->b_flags |= B_READ | B_ASYNC; 239 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 240 if (rabp->b_rcred == NOCRED) { 241 if (cred != NOCRED) 242 crhold(cred); 243 rabp->b_rcred = cred; 244 } 245 vfs_busy_pages(rabp, 0); 246 VOP_STRATEGY(rabp); 247 } else { 248 brelse(rabp); 249 } 250 } 251 252 if (readwait) { 253 rv = biowait(bp); 254 } 255 return (rv); 256 } 257 258 /* 259 * Write, release buffer on completion. (Done by iodone 260 * if async.) 261 */ 262 int 263 bwrite(struct buf * bp) 264 { 265 int oldflags = bp->b_flags; 266 267 if (bp->b_flags & B_INVAL) { 268 brelse(bp); 269 return (0); 270 } 271 if (!(bp->b_flags & B_BUSY)) 272 panic("bwrite: buffer is not busy???"); 273 274 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 275 bp->b_flags |= B_WRITEINPROG; 276 277 if (oldflags & B_ASYNC) { 278 if (oldflags & B_DELWRI) { 279 reassignbuf(bp, bp->b_vp); 280 } else if (curproc) { 281 ++curproc->p_stats->p_ru.ru_oublock; 282 } 283 } 284 bp->b_vp->v_numoutput++; 285 vfs_busy_pages(bp, 1); 286 VOP_STRATEGY(bp); 287 288 if ((oldflags & B_ASYNC) == 0) { 289 int rtval = biowait(bp); 290 291 if (oldflags & B_DELWRI) { 292 reassignbuf(bp, bp->b_vp); 293 } else if (curproc) { 294 ++curproc->p_stats->p_ru.ru_oublock; 295 } 296 brelse(bp); 297 return (rtval); 298 } 299 return (0); 300 } 301 302 int 303 vn_bwrite(ap) 304 struct vop_bwrite_args *ap; 305 { 306 return (bwrite(ap->a_bp)); 307 } 308 309 /* 310 * Delayed write. (Buffer is marked dirty). 311 */ 312 void 313 bdwrite(struct buf * bp) 314 { 315 316 if ((bp->b_flags & B_BUSY) == 0) { 317 panic("bdwrite: buffer is not busy"); 318 } 319 if (bp->b_flags & B_INVAL) { 320 brelse(bp); 321 return; 322 } 323 if (bp->b_flags & B_TAPE) { 324 bawrite(bp); 325 return; 326 } 327 bp->b_flags &= ~B_READ; 328 vfs_dirty_pages(bp); 329 if ((bp->b_flags & B_DELWRI) == 0) { 330 if (curproc) 331 ++curproc->p_stats->p_ru.ru_oublock; 332 bp->b_flags |= B_DONE | B_DELWRI; 333 reassignbuf(bp, bp->b_vp); 334 } 335 brelse(bp); 336 return; 337 } 338 339 /* 340 * Asynchronous write. 341 * Start output on a buffer, but do not wait for it to complete. 342 * The buffer is released when the output completes. 343 */ 344 void 345 bawrite(struct buf * bp) 346 { 347 struct vnode *vp; 348 vp = bp->b_vp; 349 bp->b_flags |= B_ASYNC; 350 (void) bwrite(bp); 351 /* 352 * this code supports limits on the amount of outstanding 353 * writes to a disk file. this helps keep from overwhelming 354 * the buffer cache with writes, thereby allowing other files 355 * to be operated upon. 356 */ 357 if (vp->v_numoutput > (nbuf/2)) { 358 int s = splbio(); 359 360 while (vp->v_numoutput > (nbuf/4)) { 361 vp->v_flag |= VBWAIT; 362 tsleep((caddr_t) &vp->v_numoutput, PRIBIO, "bawnmo", 0); 363 } 364 splx(s); 365 } 366 } 367 368 /* 369 * Release a buffer. 370 */ 371 void 372 brelse(struct buf * bp) 373 { 374 int s; 375 376 if (bp->b_flags & B_CLUSTER) { 377 relpbuf(bp); 378 return; 379 } 380 /* anyone need a "free" block? */ 381 s = splbio(); 382 383 if (needsbuffer) { 384 needsbuffer = 0; 385 wakeup((caddr_t) &needsbuffer); 386 } 387 388 /* anyone need this block? */ 389 if (bp->b_flags & B_WANTED) { 390 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE); 391 wakeup((caddr_t) bp); 392 } else if (bp->b_flags & B_VMIO) { 393 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 394 wakeup((caddr_t) bp); 395 } 396 if (bp->b_flags & B_LOCKED) 397 bp->b_flags &= ~B_ERROR; 398 399 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 400 (bp->b_bufsize <= 0)) { 401 bp->b_flags |= B_INVAL; 402 bp->b_flags &= ~(B_DELWRI | B_CACHE); 403 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 404 brelvp(bp); 405 } 406 407 /* 408 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 409 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 410 * but the VM object is kept around. The B_NOCACHE flag is used to 411 * invalidate the pages in the VM object. 412 */ 413 if (bp->b_flags & B_VMIO) { 414 vm_offset_t foff; 415 vm_object_t obj; 416 int i, resid; 417 vm_page_t m; 418 int iototal = bp->b_bufsize; 419 420 foff = 0; 421 obj = 0; 422 if (bp->b_npages) { 423 if (bp->b_vp && bp->b_vp->v_mount) { 424 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 425 } else { 426 /* 427 * vnode pointer has been ripped away -- 428 * probably file gone... 429 */ 430 foff = bp->b_pages[0]->offset; 431 } 432 } 433 for (i = 0; i < bp->b_npages; i++) { 434 m = bp->b_pages[i]; 435 if (m == bogus_page) { 436 panic("brelse: bogus page found"); 437 } 438 resid = (m->offset + PAGE_SIZE) - foff; 439 if (resid > iototal) 440 resid = iototal; 441 if (resid > 0) { 442 if (bp->b_flags & (B_ERROR | B_NOCACHE)) { 443 vm_page_set_invalid(m, foff, resid); 444 } else if ((bp->b_flags & B_DELWRI) == 0) { 445 vm_page_set_clean(m, foff, resid); 446 vm_page_set_valid(m, foff, resid); 447 } 448 } else { 449 vm_page_test_dirty(m); 450 } 451 foff += resid; 452 iototal -= resid; 453 } 454 455 if (bp->b_flags & B_INVAL) { 456 for(i=0;i<bp->b_npages;i++) { 457 m = bp->b_pages[i]; 458 --m->bmapped; 459 if (m->bmapped == 0) { 460 PAGE_WAKEUP(m); 461 if (m->valid == 0) { 462 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 463 vm_page_free(m); 464 } else if ((m->dirty & m->valid) == 0 && 465 (m->flags & PG_REFERENCED) == 0 && 466 !pmap_is_referenced(VM_PAGE_TO_PHYS(m))) 467 vm_page_cache(m); 468 else if( (m->flags & PG_ACTIVE) == 0) 469 vm_page_activate(m); 470 } 471 } 472 bufspace -= bp->b_bufsize; 473 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 474 bp->b_npages = 0; 475 bp->b_bufsize = 0; 476 bp->b_flags &= ~B_VMIO; 477 if (bp->b_vp) 478 brelvp(bp); 479 --nvmio; 480 } 481 } 482 if (bp->b_qindex != QUEUE_NONE) 483 panic("brelse: free buffer onto another queue???"); 484 485 /* enqueue */ 486 /* buffers with no memory */ 487 if (bp->b_bufsize == 0) { 488 bp->b_qindex = QUEUE_EMPTY; 489 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 490 LIST_REMOVE(bp, b_hash); 491 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 492 bp->b_dev = NODEV; 493 /* buffers with junk contents */ 494 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) { 495 bp->b_qindex = QUEUE_AGE; 496 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 497 LIST_REMOVE(bp, b_hash); 498 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 499 bp->b_dev = NODEV; 500 /* buffers that are locked */ 501 } else if (bp->b_flags & B_LOCKED) { 502 bp->b_qindex = QUEUE_LOCKED; 503 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 504 /* buffers with stale but valid contents */ 505 } else if (bp->b_flags & B_AGE) { 506 bp->b_qindex = QUEUE_AGE; 507 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 508 /* buffers with valid and quite potentially reuseable contents */ 509 } else { 510 if (bp->b_flags & B_VMIO) 511 bp->b_qindex = QUEUE_VMIO; 512 else { 513 bp->b_qindex = QUEUE_LRU; 514 ++nlru; 515 } 516 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 517 } 518 519 /* unlock */ 520 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE); 521 splx(s); 522 } 523 524 /* 525 * this routine implements clustered async writes for 526 * clearing out B_DELWRI buffers... This is much better 527 * than the old way of writing only one buffer at a time. 528 */ 529 void 530 vfs_bio_awrite(struct buf * bp) 531 { 532 int i; 533 daddr_t lblkno = bp->b_lblkno; 534 struct vnode *vp = bp->b_vp; 535 int s; 536 int ncl; 537 struct buf *bpa; 538 539 s = splbio(); 540 if( vp->v_mount && (vp->v_flag & VVMIO) && 541 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { 542 int size = vp->v_mount->mnt_stat.f_iosize; 543 544 for (i = 1; i < MAXPHYS / size; i++) { 545 if ((bpa = incore(vp, lblkno + i)) && 546 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) && 547 (bpa->b_bufsize == size)) { 548 if ((bpa->b_blkno == bpa->b_lblkno) || 549 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 550 break; 551 } else { 552 break; 553 } 554 } 555 ncl = i; 556 /* 557 * this is a possible cluster write 558 */ 559 if (ncl != 1) { 560 cluster_wbuild(vp, NULL, size, lblkno, ncl, -1); 561 splx(s); 562 return; 563 } 564 } 565 /* 566 * default (old) behavior, writing out only one block 567 */ 568 bremfree(bp); 569 bp->b_flags |= B_BUSY | B_ASYNC; 570 bwrite(bp); 571 splx(s); 572 } 573 574 575 /* 576 * Find a buffer header which is available for use. 577 */ 578 struct buf * 579 getnewbuf(int slpflag, int slptimeo, int doingvmio) 580 { 581 struct buf *bp; 582 int s; 583 int firstbp = 1; 584 585 s = splbio(); 586 start: 587 if (bufspace >= maxbufspace) 588 goto trytofreespace; 589 590 /* can we constitute a new buffer? */ 591 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 592 if (bp->b_qindex != QUEUE_EMPTY) 593 panic("getnewbuf: inconsistent EMPTY queue"); 594 bremfree(bp); 595 goto fillbuf; 596 } 597 trytofreespace: 598 /* 599 * We keep the file I/O from hogging metadata I/O 600 * This is desirable because file data is cached in the 601 * VM/Buffer cache even if a buffer is freed. 602 */ 603 if (bp = bufqueues[QUEUE_AGE].tqh_first) { 604 if (bp->b_qindex != QUEUE_AGE) 605 panic("getnewbuf: inconsistent AGE queue"); 606 } else if ((nvmio > nbuf - minbuf) 607 && (bp = bufqueues[QUEUE_VMIO].tqh_first)) { 608 if (bp->b_qindex != QUEUE_VMIO) 609 panic("getnewbuf: inconsistent VMIO queue"); 610 } else if ((!doingvmio || (nlru > nbuf - minbuf)) && 611 (bp = bufqueues[QUEUE_LRU].tqh_first)) { 612 if (bp->b_qindex != QUEUE_LRU) 613 panic("getnewbuf: inconsistent LRU queue"); 614 } 615 if (!bp) { 616 if (doingvmio) { 617 if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 618 if (bp->b_qindex != QUEUE_VMIO) 619 panic("getnewbuf: inconsistent VMIO queue"); 620 } else if (bp = bufqueues[QUEUE_LRU].tqh_first) { 621 if (bp->b_qindex != QUEUE_LRU) 622 panic("getnewbuf: inconsistent LRU queue"); 623 } 624 } else { 625 if (bp = bufqueues[QUEUE_LRU].tqh_first) { 626 if (bp->b_qindex != QUEUE_LRU) 627 panic("getnewbuf: inconsistent LRU queue"); 628 } else if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 629 if (bp->b_qindex != QUEUE_VMIO) 630 panic("getnewbuf: inconsistent VMIO queue"); 631 } 632 } 633 } 634 if (!bp) { 635 /* wait for a free buffer of any kind */ 636 needsbuffer = 1; 637 tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 638 splx(s); 639 return (0); 640 } 641 /* if we are a delayed write, convert to an async write */ 642 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 643 vfs_bio_awrite(bp); 644 if (!slpflag && !slptimeo) { 645 splx(s); 646 return (0); 647 } 648 goto start; 649 } 650 bremfree(bp); 651 652 if (bp->b_flags & B_VMIO) { 653 bp->b_flags |= B_INVAL | B_BUSY; 654 brelse(bp); 655 bremfree(bp); 656 } 657 if (bp->b_vp) 658 brelvp(bp); 659 660 /* we are not free, nor do we contain interesting data */ 661 if (bp->b_rcred != NOCRED) 662 crfree(bp->b_rcred); 663 if (bp->b_wcred != NOCRED) 664 crfree(bp->b_wcred); 665 fillbuf: 666 bp->b_flags |= B_BUSY; 667 LIST_REMOVE(bp, b_hash); 668 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 669 splx(s); 670 if (bp->b_bufsize) { 671 allocbuf(bp, 0, 0); 672 } 673 bp->b_flags = B_BUSY; 674 bp->b_dev = NODEV; 675 bp->b_vp = NULL; 676 bp->b_blkno = bp->b_lblkno = 0; 677 bp->b_iodone = 0; 678 bp->b_error = 0; 679 bp->b_resid = 0; 680 bp->b_bcount = 0; 681 bp->b_npages = 0; 682 bp->b_wcred = bp->b_rcred = NOCRED; 683 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 684 bp->b_dirtyoff = bp->b_dirtyend = 0; 685 bp->b_validoff = bp->b_validend = 0; 686 if (bufspace >= maxbufspace) { 687 s = splbio(); 688 bp->b_flags |= B_INVAL; 689 brelse(bp); 690 goto trytofreespace; 691 } 692 return (bp); 693 } 694 695 /* 696 * Check to see if a block is currently memory resident. 697 */ 698 struct buf * 699 incore(struct vnode * vp, daddr_t blkno) 700 { 701 struct buf *bp; 702 struct bufhashhdr *bh; 703 704 int s = splbio(); 705 706 bh = BUFHASH(vp, blkno); 707 bp = bh->lh_first; 708 709 /* Search hash chain */ 710 while (bp) { 711 /* hit */ 712 if (bp->b_lblkno == blkno && bp->b_vp == vp 713 && (bp->b_flags & B_INVAL) == 0) { 714 splx(s); 715 return (bp); 716 } 717 bp = bp->b_hash.le_next; 718 } 719 splx(s); 720 721 return (0); 722 } 723 724 /* 725 * Returns true if no I/O is needed to access the 726 * associated VM object. This is like incore except 727 * it also hunts around in the VM system for the data. 728 */ 729 730 int 731 inmem(struct vnode * vp, daddr_t blkno) 732 { 733 vm_object_t obj; 734 vm_offset_t off, toff, tinc; 735 vm_page_t m; 736 737 if (incore(vp, blkno)) 738 return 1; 739 if (vp->v_mount == 0) 740 return 0; 741 if ((vp->v_vmdata == 0) || (vp->v_flag & VVMIO) == 0) 742 return 0; 743 744 obj = (vm_object_t) vp->v_vmdata; 745 tinc = PAGE_SIZE; 746 if (tinc > vp->v_mount->mnt_stat.f_iosize) 747 tinc = vp->v_mount->mnt_stat.f_iosize; 748 off = blkno * vp->v_mount->mnt_stat.f_iosize; 749 750 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 751 int mask; 752 753 m = vm_page_lookup(obj, trunc_page(toff + off)); 754 if (!m) 755 return 0; 756 if (vm_page_is_valid(m, toff + off, tinc) == 0) 757 return 0; 758 } 759 return 1; 760 } 761 762 /* 763 * Get a block given a specified block and offset into a file/device. 764 */ 765 struct buf * 766 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 767 { 768 struct buf *bp; 769 int s; 770 struct bufhashhdr *bh; 771 vm_offset_t off; 772 int nleft; 773 774 s = splbio(); 775 loop: 776 if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_cache_min) 777 wakeup((caddr_t) &vm_pages_needed); 778 779 if (bp = incore(vp, blkno)) { 780 if (bp->b_flags & B_BUSY) { 781 bp->b_flags |= B_WANTED; 782 if (curproc == pageproc) { 783 bp->b_flags |= B_PDWANTED; 784 wakeup((caddr_t) &cnt.v_free_count); 785 } 786 if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo)) 787 goto loop; 788 splx(s); 789 return (struct buf *) NULL; 790 } 791 bp->b_flags |= B_BUSY | B_CACHE; 792 bremfree(bp); 793 /* 794 * check for size inconsistancies 795 */ 796 if (bp->b_bcount != size) { 797 #if defined(VFS_BIO_DEBUG) 798 printf("getblk: invalid buffer size: %ld\n", bp->b_bcount); 799 #endif 800 bp->b_flags |= B_INVAL; 801 bwrite(bp); 802 goto loop; 803 } 804 splx(s); 805 return (bp); 806 } else { 807 vm_object_t obj; 808 int doingvmio; 809 810 if ((obj = (vm_object_t) vp->v_vmdata) && (vp->v_flag & VVMIO)) { 811 doingvmio = 1; 812 } else { 813 doingvmio = 0; 814 } 815 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 816 if (slpflag || slptimeo) 817 return NULL; 818 goto loop; 819 } 820 /* 821 * It is possible that another buffer has been constituted 822 * during the time that getnewbuf is blocked. This checks 823 * for this possibility, and handles it. 824 */ 825 if (incore(vp, blkno)) { 826 bp->b_flags |= B_INVAL; 827 brelse(bp); 828 goto loop; 829 } 830 /* 831 * Insert the buffer into the hash, so that it can 832 * be found by incore. 833 */ 834 bp->b_blkno = bp->b_lblkno = blkno; 835 bgetvp(vp, bp); 836 LIST_REMOVE(bp, b_hash); 837 bh = BUFHASH(vp, blkno); 838 LIST_INSERT_HEAD(bh, bp, b_hash); 839 840 if (doingvmio) { 841 bp->b_flags |= (B_VMIO | B_CACHE); 842 #if defined(VFS_BIO_DEBUG) 843 if (vp->v_type != VREG) 844 printf("getblk: vmioing file type %d???\n", vp->v_type); 845 #endif 846 ++nvmio; 847 } else { 848 if (bp->b_flags & B_VMIO) 849 --nvmio; 850 bp->b_flags &= ~B_VMIO; 851 } 852 splx(s); 853 854 if (!allocbuf(bp, size, 1)) { 855 s = splbio(); 856 goto loop; 857 } 858 return (bp); 859 } 860 } 861 862 /* 863 * Get an empty, disassociated buffer of given size. 864 */ 865 struct buf * 866 geteblk(int size) 867 { 868 struct buf *bp; 869 870 while ((bp = getnewbuf(0, 0, 0)) == 0); 871 allocbuf(bp, size, 0); 872 bp->b_flags |= B_INVAL; 873 return (bp); 874 } 875 876 /* 877 * This code constitutes the buffer memory from either anonymous system 878 * memory (in the case of non-VMIO operations) or from an associated 879 * VM object (in the case of VMIO operations). 880 * 881 * Note that this code is tricky, and has many complications to resolve 882 * deadlock or inconsistant data situations. Tread lightly!!! 883 * 884 * Modify the length of a buffer's underlying buffer storage without 885 * destroying information (unless, of course the buffer is shrinking). 886 */ 887 int 888 allocbuf(struct buf * bp, int size, int vmio) 889 { 890 891 int s; 892 int newbsize, mbsize; 893 int i; 894 895 if ((bp->b_flags & B_VMIO) == 0) { 896 /* 897 * Just get anonymous memory from the kernel 898 */ 899 mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 900 newbsize = round_page(size); 901 902 if (newbsize == bp->b_bufsize) { 903 bp->b_bcount = size; 904 return 1; 905 } else if (newbsize < bp->b_bufsize) { 906 vm_hold_free_pages( 907 bp, 908 (vm_offset_t) bp->b_data + newbsize, 909 (vm_offset_t) bp->b_data + bp->b_bufsize); 910 bufspace -= (bp->b_bufsize - newbsize); 911 } else if (newbsize > bp->b_bufsize) { 912 vm_hold_load_pages( 913 bp, 914 (vm_offset_t) bp->b_data + bp->b_bufsize, 915 (vm_offset_t) bp->b_data + newbsize); 916 bufspace += (newbsize - bp->b_bufsize); 917 } 918 } else { 919 vm_page_t m; 920 int desiredpages; 921 922 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 923 desiredpages = round_page(newbsize) / PAGE_SIZE; 924 925 if (newbsize == bp->b_bufsize) { 926 bp->b_bcount = size; 927 return 1; 928 } else if (newbsize < bp->b_bufsize) { 929 if (desiredpages < bp->b_npages) { 930 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 931 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 932 for (i = desiredpages; i < bp->b_npages; i++) { 933 m = bp->b_pages[i]; 934 s = splhigh(); 935 while ((m->flags & PG_BUSY) || (m->busy != 0)) { 936 m->flags |= PG_WANTED; 937 tsleep(m, PVM, "biodep", 0); 938 } 939 splx(s); 940 941 if (m->bmapped == 0) { 942 printf("allocbuf: bmapped is zero for page %d\n", i); 943 panic("allocbuf: error"); 944 } 945 --m->bmapped; 946 if (m->bmapped == 0) { 947 PAGE_WAKEUP(m); 948 if (m->valid == 0) { 949 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 950 vm_page_free(m); 951 } 952 } 953 bp->b_pages[i] = NULL; 954 } 955 bp->b_npages = desiredpages; 956 bufspace -= (bp->b_bufsize - newbsize); 957 } 958 } else { 959 vm_object_t obj; 960 vm_offset_t tinc, off, toff, objoff; 961 int pageindex, curbpnpages; 962 struct vnode *vp; 963 int bsize; 964 965 vp = bp->b_vp; 966 bsize = vp->v_mount->mnt_stat.f_iosize; 967 968 if (bp->b_npages < desiredpages) { 969 obj = (vm_object_t) vp->v_vmdata; 970 tinc = PAGE_SIZE; 971 if (tinc > bsize) 972 tinc = bsize; 973 off = bp->b_lblkno * bsize; 974 curbpnpages = bp->b_npages; 975 doretry: 976 for (toff = 0; toff < newbsize; toff += tinc) { 977 int mask; 978 int bytesinpage; 979 980 pageindex = toff / PAGE_SIZE; 981 objoff = trunc_page(toff + off); 982 if (pageindex < curbpnpages) { 983 int pb; 984 985 m = bp->b_pages[pageindex]; 986 if (m->offset != objoff) 987 panic("allocbuf: page changed offset??!!!?"); 988 bytesinpage = tinc; 989 if (tinc > (newbsize - toff)) 990 bytesinpage = newbsize - toff; 991 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 992 bp->b_flags &= ~B_CACHE; 993 } 994 if ((m->flags & PG_ACTIVE) == 0) 995 vm_page_activate(m); 996 continue; 997 } 998 m = vm_page_lookup(obj, objoff); 999 if (!m) { 1000 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); 1001 if (!m) { 1002 int j; 1003 1004 for (j = bp->b_npages; j < pageindex; j++) { 1005 vm_page_t mt = bp->b_pages[j]; 1006 1007 PAGE_WAKEUP(mt); 1008 if (mt->valid == 0 && mt->bmapped == 0) { 1009 vm_page_free(mt); 1010 } 1011 } 1012 VM_WAIT; 1013 if (vmio && (bp->b_flags & B_PDWANTED)) { 1014 bp->b_flags |= B_INVAL; 1015 brelse(bp); 1016 return 0; 1017 } 1018 curbpnpages = bp->b_npages; 1019 goto doretry; 1020 } 1021 m->valid = 0; 1022 vm_page_activate(m); 1023 } else if ((m->valid == 0) || (m->flags & PG_BUSY)) { 1024 int j; 1025 int bufferdestroyed = 0; 1026 1027 for (j = bp->b_npages; j < pageindex; j++) { 1028 vm_page_t mt = bp->b_pages[j]; 1029 1030 PAGE_WAKEUP(mt); 1031 if (mt->valid == 0 && mt->bmapped == 0) { 1032 vm_page_free(mt); 1033 } 1034 } 1035 if (vmio && (bp->b_flags & B_PDWANTED)) { 1036 bp->b_flags |= B_INVAL; 1037 brelse(bp); 1038 VM_WAIT; 1039 bufferdestroyed = 1; 1040 } 1041 s = splbio(); 1042 if (m->flags & PG_BUSY) { 1043 m->flags |= PG_WANTED; 1044 tsleep(m, PRIBIO, "pgtblk", 0); 1045 } else if( m->valid == 0 && m->bmapped == 0) { 1046 vm_page_free(m); 1047 } 1048 splx(s); 1049 if (bufferdestroyed) 1050 return 0; 1051 curbpnpages = bp->b_npages; 1052 goto doretry; 1053 } else { 1054 int pb; 1055 1056 if ((m->flags & PG_CACHE) && 1057 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { 1058 int j; 1059 1060 for (j = bp->b_npages; j < pageindex; j++) { 1061 vm_page_t mt = bp->b_pages[j]; 1062 1063 PAGE_WAKEUP(mt); 1064 if (mt->valid == 0 && mt->bmapped == 0) { 1065 vm_page_free(mt); 1066 } 1067 } 1068 VM_WAIT; 1069 if (vmio && (bp->b_flags & B_PDWANTED)) { 1070 bp->b_flags |= B_INVAL; 1071 brelse(bp); 1072 return 0; 1073 } 1074 curbpnpages = bp->b_npages; 1075 goto doretry; 1076 } 1077 bytesinpage = tinc; 1078 if (tinc > (newbsize - toff)) 1079 bytesinpage = newbsize - toff; 1080 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1081 bp->b_flags &= ~B_CACHE; 1082 } 1083 if ((m->flags & PG_ACTIVE) == 0) 1084 vm_page_activate(m); 1085 m->flags |= PG_BUSY; 1086 } 1087 bp->b_pages[pageindex] = m; 1088 curbpnpages = pageindex + 1; 1089 } 1090 if (bsize >= PAGE_SIZE) { 1091 for (i = bp->b_npages; i < curbpnpages; i++) { 1092 m = bp->b_pages[i]; 1093 if (m->valid == 0) { 1094 bp->b_flags &= ~B_CACHE; 1095 } 1096 m->bmapped++; 1097 PAGE_WAKEUP(m); 1098 } 1099 } else { 1100 if (!vm_page_is_valid(bp->b_pages[0], off, bsize)) 1101 bp->b_flags &= ~B_CACHE; 1102 bp->b_pages[0]->bmapped++; 1103 PAGE_WAKEUP(bp->b_pages[0]); 1104 } 1105 bp->b_npages = curbpnpages; 1106 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1107 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1108 bp->b_data += off % PAGE_SIZE; 1109 } 1110 bufspace += (newbsize - bp->b_bufsize); 1111 } 1112 } 1113 bp->b_bufsize = newbsize; 1114 bp->b_bcount = size; 1115 return 1; 1116 } 1117 1118 /* 1119 * Wait for buffer I/O completion, returning error status. 1120 */ 1121 int 1122 biowait(register struct buf * bp) 1123 { 1124 int s; 1125 1126 s = splbio(); 1127 while ((bp->b_flags & B_DONE) == 0) 1128 tsleep((caddr_t) bp, PRIBIO, "biowait", 0); 1129 if ((bp->b_flags & B_ERROR) || bp->b_error) { 1130 if ((bp->b_flags & B_INVAL) == 0) { 1131 bp->b_flags |= B_INVAL; 1132 bp->b_dev = NODEV; 1133 LIST_REMOVE(bp, b_hash); 1134 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 1135 wakeup((caddr_t) bp); 1136 } 1137 if (!bp->b_error) 1138 bp->b_error = EIO; 1139 else 1140 bp->b_flags |= B_ERROR; 1141 splx(s); 1142 return (bp->b_error); 1143 } else { 1144 splx(s); 1145 return (0); 1146 } 1147 } 1148 1149 /* 1150 * Finish I/O on a buffer, calling an optional function. 1151 * This is usually called from interrupt level, so process blocking 1152 * is not *a good idea*. 1153 */ 1154 void 1155 biodone(register struct buf * bp) 1156 { 1157 int s; 1158 1159 s = splbio(); 1160 if (bp->b_flags & B_DONE) 1161 printf("biodone: buffer already done\n"); 1162 bp->b_flags |= B_DONE; 1163 1164 if ((bp->b_flags & B_READ) == 0) { 1165 struct vnode *vp = bp->b_vp; 1166 vwakeup(bp); 1167 if (vp && (vp->v_numoutput == (nbuf/4)) && (vp->v_flag & VBWAIT)) { 1168 vp->v_flag &= ~VBWAIT; 1169 wakeup((caddr_t) &vp->v_numoutput); 1170 } 1171 } 1172 #ifdef BOUNCE_BUFFERS 1173 if (bp->b_flags & B_BOUNCE) 1174 vm_bounce_free(bp); 1175 #endif 1176 1177 /* call optional completion function if requested */ 1178 if (bp->b_flags & B_CALL) { 1179 bp->b_flags &= ~B_CALL; 1180 (*bp->b_iodone) (bp); 1181 splx(s); 1182 return; 1183 } 1184 if (bp->b_flags & B_VMIO) { 1185 int i, resid; 1186 vm_offset_t foff; 1187 vm_page_t m; 1188 vm_object_t obj; 1189 int iosize; 1190 struct vnode *vp = bp->b_vp; 1191 1192 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1193 obj = (vm_object_t) vp->v_vmdata; 1194 if (!obj) { 1195 return; 1196 } 1197 #if defined(VFS_BIO_DEBUG) 1198 if (obj->paging_in_progress < bp->b_npages) { 1199 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1200 obj->paging_in_progress, bp->b_npages); 1201 } 1202 #endif 1203 iosize = bp->b_bufsize; 1204 for (i = 0; i < bp->b_npages; i++) { 1205 m = bp->b_pages[i]; 1206 if (m == bogus_page) { 1207 m = vm_page_lookup(obj, foff); 1208 if (!m) { 1209 #if defined(VFS_BIO_DEBUG) 1210 printf("biodone: page disappeared\n"); 1211 #endif 1212 --obj->paging_in_progress; 1213 continue; 1214 } 1215 bp->b_pages[i] = m; 1216 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1217 } 1218 #if defined(VFS_BIO_DEBUG) 1219 if (trunc_page(foff) != m->offset) { 1220 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1221 } 1222 #endif 1223 resid = (m->offset + PAGE_SIZE) - foff; 1224 if (resid > iosize) 1225 resid = iosize; 1226 if (resid > 0) { 1227 vm_page_set_valid(m, foff, resid); 1228 vm_page_set_clean(m, foff, resid); 1229 } 1230 1231 /* 1232 * when debugging new filesystems or buffer I/O methods, this 1233 * is the most common error that pops up. if you see this, you 1234 * have not set the page busy flag correctly!!! 1235 */ 1236 if (m->busy == 0) { 1237 printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n", 1238 m->offset, foff, resid, i); 1239 printf(" iosize: %d, lblkno: %d\n", 1240 bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno); 1241 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1242 m->valid, m->dirty, m->bmapped); 1243 panic("biodone: page busy < 0\n"); 1244 } 1245 --m->busy; 1246 PAGE_WAKEUP(m); 1247 --obj->paging_in_progress; 1248 foff += resid; 1249 iosize -= resid; 1250 } 1251 if (obj && obj->paging_in_progress == 0 && 1252 (obj->flags & OBJ_PIPWNT)) { 1253 obj->flags &= ~OBJ_PIPWNT; 1254 wakeup((caddr_t) obj); 1255 } 1256 } 1257 /* 1258 * For asynchronous completions, release the buffer now. The brelse 1259 * checks for B_WANTED and will do the wakeup there if necessary - so 1260 * no need to do a wakeup here in the async case. 1261 */ 1262 1263 if (bp->b_flags & B_ASYNC) { 1264 brelse(bp); 1265 } else { 1266 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 1267 wakeup((caddr_t) bp); 1268 } 1269 splx(s); 1270 } 1271 1272 int 1273 count_lock_queue() 1274 { 1275 int count; 1276 struct buf *bp; 1277 1278 count = 0; 1279 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1280 bp != NULL; 1281 bp = bp->b_freelist.tqe_next) 1282 count++; 1283 return (count); 1284 } 1285 1286 int vfs_update_interval = 30; 1287 1288 void 1289 vfs_update() 1290 { 1291 (void) spl0(); 1292 while (1) { 1293 tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update", 1294 hz * vfs_update_interval); 1295 vfs_update_wakeup = 0; 1296 sync(curproc, NULL, NULL); 1297 } 1298 } 1299 1300 /* 1301 * This routine is called in lieu of iodone in the case of 1302 * incomplete I/O. This keeps the busy status for pages 1303 * consistant. 1304 */ 1305 void 1306 vfs_unbusy_pages(struct buf * bp) 1307 { 1308 int i; 1309 1310 if (bp->b_flags & B_VMIO) { 1311 struct vnode *vp = bp->b_vp; 1312 vm_object_t obj = (vm_object_t) vp->v_vmdata; 1313 vm_offset_t foff; 1314 1315 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1316 1317 for (i = 0; i < bp->b_npages; i++) { 1318 vm_page_t m = bp->b_pages[i]; 1319 1320 if (m == bogus_page) { 1321 m = vm_page_lookup(obj, foff); 1322 if (!m) { 1323 panic("vfs_unbusy_pages: page missing\n"); 1324 } 1325 bp->b_pages[i] = m; 1326 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1327 } 1328 --obj->paging_in_progress; 1329 --m->busy; 1330 PAGE_WAKEUP(m); 1331 } 1332 if (obj->paging_in_progress == 0 && 1333 (obj->flags & OBJ_PIPWNT)) { 1334 obj->flags &= ~OBJ_PIPWNT; 1335 wakeup((caddr_t) obj); 1336 } 1337 } 1338 } 1339 1340 /* 1341 * This routine is called before a device strategy routine. 1342 * It is used to tell the VM system that paging I/O is in 1343 * progress, and treat the pages associated with the buffer 1344 * almost as being PG_BUSY. Also the object paging_in_progress 1345 * flag is handled to make sure that the object doesn't become 1346 * inconsistant. 1347 */ 1348 void 1349 vfs_busy_pages(struct buf * bp, int clear_modify) 1350 { 1351 int i; 1352 1353 if (bp->b_flags & B_VMIO) { 1354 vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata; 1355 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1356 int iocount = bp->b_bufsize; 1357 1358 for (i = 0; i < bp->b_npages; i++) { 1359 vm_page_t m = bp->b_pages[i]; 1360 int resid = (m->offset + PAGE_SIZE) - foff; 1361 1362 if (resid > iocount) 1363 resid = iocount; 1364 obj->paging_in_progress++; 1365 m->busy++; 1366 if (clear_modify) { 1367 vm_page_test_dirty(m); 1368 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); 1369 } else if (bp->b_bcount >= PAGE_SIZE) { 1370 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1371 bp->b_pages[i] = bogus_page; 1372 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1373 } 1374 } 1375 foff += resid; 1376 iocount -= resid; 1377 } 1378 } 1379 } 1380 1381 /* 1382 * Tell the VM system that the pages associated with this buffer 1383 * are dirty. This is in case of the unlikely circumstance that 1384 * a buffer has to be destroyed before it is flushed. 1385 */ 1386 void 1387 vfs_dirty_pages(struct buf * bp) 1388 { 1389 int i; 1390 1391 if (bp->b_flags & B_VMIO) { 1392 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1393 int iocount = bp->b_bufsize; 1394 1395 for (i = 0; i < bp->b_npages; i++) { 1396 vm_page_t m = bp->b_pages[i]; 1397 int resid = (m->offset + PAGE_SIZE) - foff; 1398 1399 if (resid > iocount) 1400 resid = iocount; 1401 if (resid > 0) { 1402 vm_page_set_valid(m, foff, resid); 1403 vm_page_set_dirty(m, foff, resid); 1404 } 1405 PAGE_WAKEUP(m); 1406 foff += resid; 1407 iocount -= resid; 1408 } 1409 } 1410 } 1411 /* 1412 * vm_hold_load_pages and vm_hold_unload pages get pages into 1413 * a buffers address space. The pages are anonymous and are 1414 * not associated with a file object. 1415 */ 1416 void 1417 vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1418 { 1419 vm_offset_t pg; 1420 vm_page_t p; 1421 vm_offset_t from = round_page(froma); 1422 vm_offset_t to = round_page(toa); 1423 1424 for (pg = from; pg < to; pg += PAGE_SIZE) { 1425 1426 tryagain: 1427 1428 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 1429 VM_ALLOC_NORMAL); 1430 if (!p) { 1431 VM_WAIT; 1432 goto tryagain; 1433 } 1434 vm_page_wire(p); 1435 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1436 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1437 PAGE_WAKEUP(p); 1438 bp->b_npages++; 1439 } 1440 } 1441 1442 void 1443 vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1444 { 1445 vm_offset_t pg; 1446 vm_page_t p; 1447 vm_offset_t from = round_page(froma); 1448 vm_offset_t to = round_page(toa); 1449 1450 for (pg = from; pg < to; pg += PAGE_SIZE) { 1451 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1452 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1453 pmap_kremove(pg); 1454 vm_page_free(p); 1455 --bp->b_npages; 1456 } 1457 } 1458 1459 void 1460 bufstats() 1461 { 1462 } 1463