1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.19 1995/01/10 09:20:34 davidg Exp $ 22 */ 23 24 /* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35 #define VMIO 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/vnode.h> 41 #include <vm/vm.h> 42 #include <vm/vm_pageout.h> 43 #include <vm/vm_page.h> 44 #include <vm/vm_object.h> 45 #include <sys/buf.h> 46 #include <sys/mount.h> 47 #include <sys/malloc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/proc.h> 50 51 #include <miscfs/specfs/specdev.h> 52 53 struct buf *buf; /* buffer header pool */ 54 int nbuf; /* number of buffer headers calculated 55 * elsewhere */ 56 struct swqueue bswlist; 57 int nvmio, nlru; 58 59 extern vm_map_t buffer_map, io_map, kernel_map, pager_map; 60 61 void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 62 void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 63 void vfs_dirty_pages(struct buf * bp); 64 void vfs_busy_pages(struct buf *, int clear_modify); 65 66 int needsbuffer; 67 68 /* 69 * Internal update daemon, process 3 70 * The variable vfs_update_wakeup allows for internal syncs. 71 */ 72 int vfs_update_wakeup; 73 74 75 /* 76 * buffers base kva 77 */ 78 caddr_t buffers_kva; 79 80 /* 81 * bogus page -- for I/O to/from partially complete buffers 82 */ 83 vm_page_t bogus_page; 84 vm_offset_t bogus_offset; 85 86 /* 87 * Initialize buffer headers and related structures. 88 */ 89 void 90 bufinit() 91 { 92 struct buf *bp; 93 int i; 94 95 TAILQ_INIT(&bswlist); 96 LIST_INIT(&invalhash); 97 98 /* first, make a null hash table */ 99 for (i = 0; i < BUFHSZ; i++) 100 LIST_INIT(&bufhashtbl[i]); 101 102 /* next, make a null set of free lists */ 103 for (i = 0; i < BUFFER_QUEUES; i++) 104 TAILQ_INIT(&bufqueues[i]); 105 106 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 107 /* finally, initialize each buffer header and stick on empty q */ 108 for (i = 0; i < nbuf; i++) { 109 bp = &buf[i]; 110 bzero(bp, sizeof *bp); 111 bp->b_flags = B_INVAL; /* we're just an empty header */ 112 bp->b_dev = NODEV; 113 bp->b_vp = NULL; 114 bp->b_rcred = NOCRED; 115 bp->b_wcred = NOCRED; 116 bp->b_qindex = QUEUE_EMPTY; 117 bp->b_vnbufs.le_next = NOLIST; 118 bp->b_data = buffers_kva + i * MAXBSIZE; 119 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 120 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 121 } 122 123 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 124 bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0); 125 126 } 127 128 /* 129 * remove the buffer from the appropriate free list 130 */ 131 void 132 bremfree(struct buf * bp) 133 { 134 int s = splbio(); 135 136 if (bp->b_qindex != QUEUE_NONE) { 137 if (bp->b_qindex == QUEUE_LRU) 138 --nlru; 139 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 140 bp->b_qindex = QUEUE_NONE; 141 } else { 142 panic("bremfree: removing a buffer when not on a queue"); 143 } 144 splx(s); 145 } 146 147 /* 148 * Get a buffer with the specified data. Look in the cache first. 149 */ 150 int 151 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 152 struct buf ** bpp) 153 { 154 struct buf *bp; 155 156 bp = getblk(vp, blkno, size, 0, 0); 157 *bpp = bp; 158 159 /* if not found in cache, do some I/O */ 160 if ((bp->b_flags & B_CACHE) == 0) { 161 if (curproc && curproc->p_stats) /* count block I/O */ 162 curproc->p_stats->p_ru.ru_inblock++; 163 bp->b_flags |= B_READ; 164 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 165 if (bp->b_rcred == NOCRED) { 166 if (cred != NOCRED) 167 crhold(cred); 168 bp->b_rcred = cred; 169 } 170 vfs_busy_pages(bp, 0); 171 VOP_STRATEGY(bp); 172 return (biowait(bp)); 173 } else if (bp->b_lblkno == bp->b_blkno) { 174 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 175 &bp->b_blkno, (int *) 0); 176 } 177 return (0); 178 } 179 180 /* 181 * Operates like bread, but also starts asynchronous I/O on 182 * read-ahead blocks. 183 */ 184 int 185 breadn(struct vnode * vp, daddr_t blkno, int size, 186 daddr_t * rablkno, int *rabsize, 187 int cnt, struct ucred * cred, struct buf ** bpp) 188 { 189 struct buf *bp, *rabp; 190 int i; 191 int rv = 0, readwait = 0; 192 193 *bpp = bp = getblk(vp, blkno, size, 0, 0); 194 195 /* if not found in cache, do some I/O */ 196 if ((bp->b_flags & B_CACHE) == 0) { 197 if (curproc && curproc->p_stats) /* count block I/O */ 198 curproc->p_stats->p_ru.ru_inblock++; 199 bp->b_flags |= B_READ; 200 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 201 if (bp->b_rcred == NOCRED) { 202 if (cred != NOCRED) 203 crhold(cred); 204 bp->b_rcred = cred; 205 } 206 vfs_busy_pages(bp, 0); 207 VOP_STRATEGY(bp); 208 ++readwait; 209 } else if (bp->b_lblkno == bp->b_blkno) { 210 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 211 &bp->b_blkno, (int *) 0); 212 } 213 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 214 if (inmem(vp, *rablkno)) 215 continue; 216 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 217 218 if ((rabp->b_flags & B_CACHE) == 0) { 219 if (curproc && curproc->p_stats) 220 curproc->p_stats->p_ru.ru_inblock++; 221 rabp->b_flags |= B_READ | B_ASYNC; 222 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 223 if (rabp->b_rcred == NOCRED) { 224 if (cred != NOCRED) 225 crhold(cred); 226 rabp->b_rcred = cred; 227 } 228 vfs_busy_pages(rabp, 0); 229 VOP_STRATEGY(rabp); 230 } else { 231 brelse(rabp); 232 } 233 } 234 235 if (readwait) { 236 rv = biowait(bp); 237 } 238 return (rv); 239 } 240 241 /* 242 * this routine is used by filesystems to get at pages in the PG_CACHE 243 * queue. also, it is used to read pages that are currently being 244 * written out by the file i/o routines. 245 */ 246 int 247 vfs_read_bypass(struct vnode * vp, struct uio * uio, int maxread, daddr_t lbn) 248 { 249 vm_page_t m; 250 vm_offset_t kv; 251 int nread; 252 int error; 253 struct buf *bp, *bpa; 254 vm_object_t obj; 255 int off; 256 int nrest; 257 int flags; 258 int s; 259 260 return 0; 261 /* 262 * don't use the bypass mechanism for non-vmio vnodes 263 */ 264 if ((vp->v_flag & VVMIO) == 0) 265 return 0; 266 /* 267 * get the VM object (it has the pages) 268 */ 269 obj = (vm_object_t) vp->v_vmdata; 270 if (obj == NULL) 271 return 0; 272 273 /* 274 * if there is a buffer that is not busy, it is faster to use it. 275 * This like read-ahead, etc work better 276 */ 277 278 s = splbio(); 279 if ((bp = incore(vp, lbn)) && 280 (((bp->b_flags & B_READ) && (bp->b_flags & B_BUSY)) 281 || (bp->b_flags & B_BUSY) == 0)) { 282 splx(s); 283 return 0; 284 } 285 splx(s); 286 287 /* 288 * get a pbuf --> we just use the kva 289 */ 290 kv = kmem_alloc_wait(pager_map, PAGE_SIZE); 291 nread = 0; 292 error = 0; 293 294 while (!error && uio->uio_resid && maxread > 0) { 295 int po; 296 int count; 297 int s; 298 299 relookup: 300 /* 301 * lookup the page 302 */ 303 m = vm_page_lookup(obj, trunc_page(uio->uio_offset)); 304 if (!m) 305 break; 306 /* 307 * get the offset into the page, and the amount to read in the 308 * page 309 */ 310 nrest = round_page(uio->uio_offset) - uio->uio_offset; 311 if (nrest > uio->uio_resid) 312 nrest = uio->uio_resid; 313 314 /* 315 * check the valid bits for the page (DEV_BSIZE chunks) 316 */ 317 if (!vm_page_is_valid(m, uio->uio_offset, nrest)) 318 break; 319 320 /* 321 * if the page is busy, wait for it 322 */ 323 s = splhigh(); 324 if (!m->valid || (m->flags & PG_BUSY)) { 325 m->flags |= PG_WANTED; 326 tsleep((caddr_t) m, PVM, "vnibyp", 0); 327 splx(s); 328 goto relookup; 329 } 330 /* 331 * if the page is on the cache queue, remove it -- cache queue 332 * pages should be freeable by vm_page_alloc anytime. 333 */ 334 if (m->flags & PG_CACHE) { 335 if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_reserved) { 336 VM_WAIT; 337 goto relookup; 338 } 339 vm_page_unqueue(m); 340 } 341 /* 342 * add a buffer mapping (essentially wires the page too). 343 */ 344 m->bmapped++; 345 splx(s); 346 347 /* 348 * enter it into the kva 349 */ 350 pmap_qenter(kv, &m, 1); 351 352 /* 353 * do the copy 354 */ 355 po = uio->uio_offset & (PAGE_SIZE - 1); 356 count = PAGE_SIZE - po; 357 if (count > maxread) 358 count = maxread; 359 if (count > uio->uio_resid) 360 count = uio->uio_resid; 361 362 error = uiomove((caddr_t) kv + po, count, uio); 363 if (!error) { 364 nread += count; 365 maxread -= count; 366 } 367 /* 368 * remove from kva 369 */ 370 pmap_qremove(kv, 1); 371 PAGE_WAKEUP(m); /* XXX probably unnecessary */ 372 /* 373 * If the page was on the cache queue, then by definition 374 * bmapped was 0. Thus the following case will also take care 375 * of the page being removed from the cache queue above. 376 * Also, it is possible that the page was already entered onto 377 * another queue (or was already there), so we don't put it 378 * onto the cache queue... 379 */ 380 m->bmapped--; 381 if (m->bmapped == 0 && 382 (m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) == 0 && 383 m->wire_count == 0) { 384 vm_page_test_dirty(m); 385 386 /* 387 * make sure that the darned page is on a queue 388 * somewhere... 389 */ 390 if ((m->dirty & m->valid) == 0) { 391 vm_page_cache(m); 392 } else if (m->hold_count == 0) { 393 vm_page_deactivate(m); 394 } else { 395 vm_page_activate(m); 396 } 397 } 398 } 399 /* 400 * release our buffer(kva). 401 */ 402 kmem_free_wakeup(pager_map, kv, PAGE_SIZE); 403 return nread; 404 } 405 406 407 /* 408 * Write, release buffer on completion. (Done by iodone 409 * if async.) 410 */ 411 int 412 bwrite(struct buf * bp) 413 { 414 int oldflags = bp->b_flags; 415 416 if (bp->b_flags & B_INVAL) { 417 brelse(bp); 418 return (0); 419 } 420 if (!(bp->b_flags & B_BUSY)) 421 panic("bwrite: buffer is not busy???"); 422 423 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 424 bp->b_flags |= B_WRITEINPROG; 425 426 if (oldflags & B_ASYNC) { 427 if (oldflags & B_DELWRI) { 428 reassignbuf(bp, bp->b_vp); 429 } else if (curproc) { 430 ++curproc->p_stats->p_ru.ru_oublock; 431 } 432 } 433 bp->b_vp->v_numoutput++; 434 vfs_busy_pages(bp, 1); 435 VOP_STRATEGY(bp); 436 437 if ((oldflags & B_ASYNC) == 0) { 438 int rtval = biowait(bp); 439 440 if (oldflags & B_DELWRI) { 441 reassignbuf(bp, bp->b_vp); 442 } else if (curproc) { 443 ++curproc->p_stats->p_ru.ru_oublock; 444 } 445 brelse(bp); 446 return (rtval); 447 } 448 return (0); 449 } 450 451 int 452 vn_bwrite(ap) 453 struct vop_bwrite_args *ap; 454 { 455 return (bwrite(ap->a_bp)); 456 } 457 458 /* 459 * Delayed write. (Buffer is marked dirty). 460 */ 461 void 462 bdwrite(struct buf * bp) 463 { 464 465 if ((bp->b_flags & B_BUSY) == 0) { 466 panic("bdwrite: buffer is not busy"); 467 } 468 if (bp->b_flags & B_INVAL) { 469 brelse(bp); 470 return; 471 } 472 if (bp->b_flags & B_TAPE) { 473 bawrite(bp); 474 return; 475 } 476 bp->b_flags &= ~B_READ; 477 vfs_dirty_pages(bp); 478 if ((bp->b_flags & B_DELWRI) == 0) { 479 if (curproc) 480 ++curproc->p_stats->p_ru.ru_oublock; 481 bp->b_flags |= B_DONE | B_DELWRI; 482 reassignbuf(bp, bp->b_vp); 483 } 484 brelse(bp); 485 return; 486 } 487 488 /* 489 * Asynchronous write. 490 * Start output on a buffer, but do not wait for it to complete. 491 * The buffer is released when the output completes. 492 */ 493 void 494 bawrite(struct buf * bp) 495 { 496 if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) { 497 int s = splbio(); 498 499 while (bp->b_vp->v_numoutput > 16) { 500 bp->b_vp->v_flag |= VBWAIT; 501 tsleep((caddr_t) &bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0); 502 } 503 splx(s); 504 } 505 bp->b_flags |= B_ASYNC; 506 (void) bwrite(bp); 507 } 508 509 /* 510 * Release a buffer. 511 */ 512 void 513 brelse(struct buf * bp) 514 { 515 int s; 516 517 if (bp->b_flags & B_CLUSTER) { 518 relpbuf(bp); 519 return; 520 } 521 /* anyone need a "free" block? */ 522 s = splbio(); 523 524 if (needsbuffer) { 525 needsbuffer = 0; 526 wakeup((caddr_t) &needsbuffer); 527 } 528 /* anyone need this block? */ 529 if (bp->b_flags & B_WANTED) { 530 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE); 531 wakeup((caddr_t) bp); 532 } else if (bp->b_flags & B_VMIO) { 533 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 534 wakeup((caddr_t) bp); 535 } 536 if (bp->b_flags & B_LOCKED) 537 bp->b_flags &= ~B_ERROR; 538 539 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 540 (bp->b_bufsize <= 0)) { 541 bp->b_flags |= B_INVAL; 542 bp->b_flags &= ~(B_DELWRI | B_CACHE); 543 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 544 brelvp(bp); 545 } 546 if (bp->b_flags & B_VMIO) { 547 vm_offset_t foff; 548 vm_object_t obj; 549 int i, resid; 550 vm_page_t m; 551 int iototal = bp->b_bufsize; 552 553 foff = 0; 554 obj = 0; 555 if (bp->b_npages) { 556 if (bp->b_vp && bp->b_vp->v_mount) { 557 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 558 } else { 559 /* 560 * vnode pointer has been ripped away -- 561 * probably file gone... 562 */ 563 foff = bp->b_pages[0]->offset; 564 } 565 } 566 for (i = 0; i < bp->b_npages; i++) { 567 m = bp->b_pages[i]; 568 if (m == bogus_page) { 569 panic("brelse: bogus page found"); 570 } 571 resid = (m->offset + PAGE_SIZE) - foff; 572 if (resid > iototal) 573 resid = iototal; 574 if (resid > 0) { 575 if (bp->b_flags & (B_ERROR | B_NOCACHE)) { 576 vm_page_set_invalid(m, foff, resid); 577 } else if ((bp->b_flags & B_DELWRI) == 0) { 578 vm_page_set_clean(m, foff, resid); 579 vm_page_set_valid(m, foff, resid); 580 } 581 } else { 582 vm_page_test_dirty(m); 583 } 584 if (bp->b_flags & B_INVAL) { 585 if (m->bmapped == 0) { 586 panic("brelse: bmapped is zero for page\n"); 587 } 588 --m->bmapped; 589 if (m->bmapped == 0) { 590 PAGE_WAKEUP(m); 591 if ((m->dirty & m->valid) == 0) 592 vm_page_cache(m); 593 } 594 } 595 foff += resid; 596 iototal -= resid; 597 } 598 599 if (bp->b_flags & B_INVAL) { 600 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 601 bp->b_npages = 0; 602 bp->b_bufsize = 0; 603 bp->b_flags &= ~B_VMIO; 604 if (bp->b_vp) 605 brelvp(bp); 606 --nvmio; 607 } 608 } 609 if (bp->b_qindex != QUEUE_NONE) 610 panic("brelse: free buffer onto another queue???"); 611 612 /* enqueue */ 613 /* buffers with no memory */ 614 if (bp->b_bufsize == 0) { 615 bp->b_qindex = QUEUE_EMPTY; 616 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 617 LIST_REMOVE(bp, b_hash); 618 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 619 bp->b_dev = NODEV; 620 /* buffers with junk contents */ 621 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) { 622 bp->b_qindex = QUEUE_AGE; 623 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 624 LIST_REMOVE(bp, b_hash); 625 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 626 bp->b_dev = NODEV; 627 /* buffers that are locked */ 628 } else if (bp->b_flags & B_LOCKED) { 629 bp->b_qindex = QUEUE_LOCKED; 630 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 631 /* buffers with stale but valid contents */ 632 } else if (bp->b_flags & B_AGE) { 633 bp->b_qindex = QUEUE_AGE; 634 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 635 /* buffers with valid and quite potentially reuseable contents */ 636 } else { 637 if (bp->b_flags & B_VMIO) 638 bp->b_qindex = QUEUE_VMIO; 639 else { 640 bp->b_qindex = QUEUE_LRU; 641 ++nlru; 642 } 643 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 644 } 645 646 /* unlock */ 647 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE); 648 splx(s); 649 } 650 651 /* 652 * this routine implements clustered async writes for 653 * clearing out B_DELWRI buffers... 654 */ 655 void 656 vfs_bio_awrite(struct buf * bp) 657 { 658 int i; 659 daddr_t lblkno = bp->b_lblkno; 660 struct vnode *vp = bp->b_vp; 661 int s; 662 int ncl; 663 struct buf *bpa; 664 665 s = splbio(); 666 if( vp->v_mount && (vp->v_flag & VVMIO) && 667 (bp->b_flags & (B_CLUSTEROK|B_INVAL)) == B_CLUSTEROK) { 668 int size = vp->v_mount->mnt_stat.f_iosize; 669 for (i = 1; i < MAXPHYS / size; i++) { 670 if ((bpa = incore(vp, lblkno + i)) && 671 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) && 672 (bpa->b_bufsize == size)) { 673 if ((bpa->b_blkno == bpa->b_lblkno) || 674 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 675 break; 676 } else { 677 break; 678 } 679 } 680 ncl = i; 681 /* 682 * this is a possible cluster write 683 */ 684 if (ncl != 1) { 685 cluster_wbuild(vp, NULL, size, lblkno, ncl, -1); 686 splx(s); 687 return; 688 } 689 } 690 /* 691 * default (old) behavior, writing out only one block 692 */ 693 bremfree(bp); 694 bp->b_flags |= B_BUSY | B_ASYNC; 695 bwrite(bp); 696 splx(s); 697 } 698 699 int freebufspace; 700 int allocbufspace; 701 702 /* 703 * Find a buffer header which is available for use. 704 */ 705 struct buf * 706 getnewbuf(int slpflag, int slptimeo, int doingvmio) 707 { 708 struct buf *bp; 709 int s; 710 int firstbp = 1; 711 712 s = splbio(); 713 start: 714 /* can we constitute a new buffer? */ 715 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 716 if (bp->b_qindex != QUEUE_EMPTY) 717 panic("getnewbuf: inconsistent EMPTY queue"); 718 bremfree(bp); 719 goto fillbuf; 720 } 721 /* 722 * we keep the file I/O from hogging metadata I/O 723 */ 724 if (bp = bufqueues[QUEUE_AGE].tqh_first) { 725 if (bp->b_qindex != QUEUE_AGE) 726 panic("getnewbuf: inconsistent AGE queue"); 727 } else if ((nvmio > (2 * nbuf / 3)) 728 && (bp = bufqueues[QUEUE_VMIO].tqh_first)) { 729 if (bp->b_qindex != QUEUE_VMIO) 730 panic("getnewbuf: inconsistent VMIO queue"); 731 } else if ((!doingvmio || (nlru > (2 * nbuf / 3))) && 732 (bp = bufqueues[QUEUE_LRU].tqh_first)) { 733 if (bp->b_qindex != QUEUE_LRU) 734 panic("getnewbuf: inconsistent LRU queue"); 735 } 736 if (!bp) { 737 if (doingvmio) { 738 if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 739 if (bp->b_qindex != QUEUE_VMIO) 740 panic("getnewbuf: inconsistent VMIO queue"); 741 } else if (bp = bufqueues[QUEUE_LRU].tqh_first) { 742 if (bp->b_qindex != QUEUE_LRU) 743 panic("getnewbuf: inconsistent LRU queue"); 744 } 745 } else { 746 if (bp = bufqueues[QUEUE_LRU].tqh_first) { 747 if (bp->b_qindex != QUEUE_LRU) 748 panic("getnewbuf: inconsistent LRU queue"); 749 } else if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 750 if (bp->b_qindex != QUEUE_VMIO) 751 panic("getnewbuf: inconsistent VMIO queue"); 752 } 753 } 754 } 755 if (!bp) { 756 /* wait for a free buffer of any kind */ 757 needsbuffer = 1; 758 tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 759 splx(s); 760 return (0); 761 } 762 /* if we are a delayed write, convert to an async write */ 763 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 764 vfs_bio_awrite(bp); 765 if (!slpflag && !slptimeo) { 766 splx(s); 767 return (0); 768 } 769 goto start; 770 } 771 bremfree(bp); 772 773 if (bp->b_flags & B_VMIO) { 774 bp->b_flags |= B_INVAL | B_BUSY; 775 brelse(bp); 776 bremfree(bp); 777 } 778 if (bp->b_vp) 779 brelvp(bp); 780 781 /* we are not free, nor do we contain interesting data */ 782 if (bp->b_rcred != NOCRED) 783 crfree(bp->b_rcred); 784 if (bp->b_wcred != NOCRED) 785 crfree(bp->b_wcred); 786 fillbuf: 787 bp->b_flags = B_BUSY; 788 LIST_REMOVE(bp, b_hash); 789 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 790 splx(s); 791 if (bp->b_bufsize) { 792 allocbuf(bp, 0, 0); 793 } 794 bp->b_dev = NODEV; 795 bp->b_vp = NULL; 796 bp->b_blkno = bp->b_lblkno = 0; 797 bp->b_iodone = 0; 798 bp->b_error = 0; 799 bp->b_resid = 0; 800 bp->b_bcount = 0; 801 bp->b_npages = 0; 802 bp->b_wcred = bp->b_rcred = NOCRED; 803 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 804 bp->b_dirtyoff = bp->b_dirtyend = 0; 805 bp->b_validoff = bp->b_validend = 0; 806 return (bp); 807 } 808 809 /* 810 * Check to see if a block is currently memory resident. 811 */ 812 struct buf * 813 incore(struct vnode * vp, daddr_t blkno) 814 { 815 struct buf *bp; 816 struct bufhashhdr *bh; 817 818 int s = splbio(); 819 820 bh = BUFHASH(vp, blkno); 821 bp = bh->lh_first; 822 823 /* Search hash chain */ 824 while (bp) { 825 /* hit */ 826 if (bp->b_lblkno == blkno && bp->b_vp == vp 827 && (bp->b_flags & B_INVAL) == 0) { 828 splx(s); 829 return (bp); 830 } 831 bp = bp->b_hash.le_next; 832 } 833 splx(s); 834 835 return (0); 836 } 837 838 /* 839 * returns true if no I/O is needed to access the 840 * associated VM object. 841 */ 842 843 int 844 inmem(struct vnode * vp, daddr_t blkno) 845 { 846 vm_object_t obj; 847 vm_offset_t off, toff, tinc; 848 vm_page_t m; 849 850 if (incore(vp, blkno)) 851 return 1; 852 if (vp->v_mount == 0) 853 return 0; 854 if (vp->v_vmdata == 0) 855 return 0; 856 857 obj = (vm_object_t) vp->v_vmdata; 858 tinc = PAGE_SIZE; 859 if (tinc > vp->v_mount->mnt_stat.f_iosize) 860 tinc = vp->v_mount->mnt_stat.f_iosize; 861 off = blkno * vp->v_mount->mnt_stat.f_iosize; 862 863 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 864 int mask; 865 866 m = vm_page_lookup(obj, trunc_page(toff + off)); 867 if (!m) 868 return 0; 869 if (vm_page_is_valid(m, toff + off, tinc) == 0) 870 return 0; 871 } 872 return 1; 873 } 874 875 /* 876 * Get a block given a specified block and offset into a file/device. 877 */ 878 struct buf * 879 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 880 { 881 struct buf *bp; 882 int s; 883 struct bufhashhdr *bh; 884 vm_offset_t off; 885 int nleft; 886 887 s = splbio(); 888 loop: 889 if ((cnt.v_free_count + cnt.v_cache_count) < 890 cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE) 891 wakeup((caddr_t) &vm_pages_needed); 892 if (bp = incore(vp, blkno)) { 893 if (bp->b_flags & B_BUSY) { 894 bp->b_flags |= B_WANTED; 895 if (curproc == pageproc) { 896 bp->b_flags |= B_PDWANTED; 897 wakeup((caddr_t) &cnt.v_free_count); 898 } 899 if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo)) 900 goto loop; 901 splx(s); 902 return (struct buf *) NULL; 903 } 904 bp->b_flags |= B_BUSY | B_CACHE; 905 bremfree(bp); 906 /* 907 * check for size inconsistancies 908 */ 909 if (bp->b_bcount != size) { 910 #if defined(VFS_BIO_DEBUG) 911 printf("getblk: invalid buffer size: %ld\n", bp->b_bcount); 912 #endif 913 bp->b_flags |= B_INVAL; 914 bwrite(bp); 915 goto loop; 916 } 917 splx(s); 918 return (bp); 919 } else { 920 vm_object_t obj; 921 int doingvmio; 922 923 if ((obj = (vm_object_t) vp->v_vmdata) && 924 (vp->v_flag & VVMIO) /* && (blkno >= 0) */ ) { 925 doingvmio = 1; 926 } else { 927 doingvmio = 0; 928 } 929 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 930 if (slpflag || slptimeo) 931 return NULL; 932 goto loop; 933 } 934 if (incore(vp, blkno)) { 935 bp->b_flags |= B_INVAL; 936 brelse(bp); 937 goto loop; 938 } 939 bp->b_blkno = bp->b_lblkno = blkno; 940 bgetvp(vp, bp); 941 LIST_REMOVE(bp, b_hash); 942 bh = BUFHASH(vp, blkno); 943 LIST_INSERT_HEAD(bh, bp, b_hash); 944 if (doingvmio) { 945 bp->b_flags |= (B_VMIO | B_CACHE); 946 #if defined(VFS_BIO_DEBUG) 947 if (vp->v_type != VREG) 948 printf("getblk: vmioing file type %d???\n", vp->v_type); 949 #endif 950 ++nvmio; 951 } else { 952 if (bp->b_flags & B_VMIO) 953 --nvmio; 954 bp->b_flags &= ~B_VMIO; 955 } 956 splx(s); 957 if (!allocbuf(bp, size, 1)) { 958 s = splbio(); 959 goto loop; 960 } 961 return (bp); 962 } 963 } 964 965 /* 966 * Get an empty, disassociated buffer of given size. 967 */ 968 struct buf * 969 geteblk(int size) 970 { 971 struct buf *bp; 972 973 while ((bp = getnewbuf(0, 0, 0)) == 0); 974 allocbuf(bp, size, 0); 975 bp->b_flags |= B_INVAL; 976 return (bp); 977 } 978 979 /* 980 * Modify the length of a buffer's underlying buffer storage without 981 * destroying information (unless, of course the buffer is shrinking). 982 */ 983 int 984 allocbuf(struct buf * bp, int size, int vmio) 985 { 986 987 int s; 988 int newbsize; 989 int i; 990 991 if ((bp->b_flags & B_VMIO) == 0) { 992 newbsize = round_page(size); 993 if (newbsize == bp->b_bufsize) { 994 bp->b_bcount = size; 995 return 1; 996 } else if (newbsize < bp->b_bufsize) { 997 if (bp->b_flags & B_MALLOC) { 998 bp->b_bcount = size; 999 return 1; 1000 } 1001 vm_hold_free_pages( 1002 bp, 1003 (vm_offset_t) bp->b_data + newbsize, 1004 (vm_offset_t) bp->b_data + bp->b_bufsize); 1005 } else if (newbsize > bp->b_bufsize) { 1006 if (bp->b_flags & B_MALLOC) { 1007 vm_offset_t bufaddr; 1008 1009 bufaddr = (vm_offset_t) bp->b_data; 1010 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1011 vm_hold_load_pages( 1012 bp, 1013 (vm_offset_t) bp->b_data, 1014 (vm_offset_t) bp->b_data + newbsize); 1015 bcopy((caddr_t) bufaddr, bp->b_data, bp->b_bcount); 1016 free((caddr_t) bufaddr, M_TEMP); 1017 } else if ((newbsize <= PAGE_SIZE / 2) && (bp->b_bufsize == 0)) { 1018 bp->b_flags |= B_MALLOC; 1019 bp->b_data = malloc(newbsize, M_TEMP, M_WAITOK); 1020 bp->b_npages = 0; 1021 } else { 1022 vm_hold_load_pages( 1023 bp, 1024 (vm_offset_t) bp->b_data + bp->b_bufsize, 1025 (vm_offset_t) bp->b_data + newbsize); 1026 } 1027 } 1028 /* 1029 * adjust buffer cache's idea of memory allocated to buffer 1030 * contents 1031 */ 1032 freebufspace -= newbsize - bp->b_bufsize; 1033 allocbufspace += newbsize - bp->b_bufsize; 1034 } else { 1035 vm_page_t m; 1036 int desiredpages; 1037 1038 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 1039 desiredpages = round_page(newbsize) / PAGE_SIZE; 1040 1041 if (newbsize == bp->b_bufsize) { 1042 bp->b_bcount = size; 1043 return 1; 1044 } else if (newbsize < bp->b_bufsize) { 1045 if (desiredpages < bp->b_npages) { 1046 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 1047 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 1048 for (i = desiredpages; i < bp->b_npages; i++) { 1049 m = bp->b_pages[i]; 1050 s = splhigh(); 1051 if ((m->flags & PG_BUSY) || (m->busy != 0)) { 1052 m->flags |= PG_WANTED; 1053 tsleep(m, PVM, "biodep", 0); 1054 } 1055 splx(s); 1056 1057 if (m->bmapped == 0) { 1058 printf("allocbuf: bmapped is zero for page %d\n", i); 1059 panic("allocbuf: error"); 1060 } 1061 --m->bmapped; 1062 if (m->bmapped == 0) { 1063 PAGE_WAKEUP(m); 1064 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 1065 vm_page_free(m); 1066 } 1067 bp->b_pages[i] = NULL; 1068 } 1069 bp->b_npages = desiredpages; 1070 } 1071 } else { 1072 vm_object_t obj; 1073 vm_offset_t tinc, off, toff, objoff; 1074 int pageindex, curbpnpages; 1075 struct vnode *vp; 1076 int bsize; 1077 1078 vp = bp->b_vp; 1079 bsize = vp->v_mount->mnt_stat.f_iosize; 1080 1081 if (bp->b_npages < desiredpages) { 1082 obj = (vm_object_t) vp->v_vmdata; 1083 tinc = PAGE_SIZE; 1084 if (tinc > bsize) 1085 tinc = bsize; 1086 off = bp->b_lblkno * bsize; 1087 curbpnpages = bp->b_npages; 1088 doretry: 1089 for (toff = 0; toff < newbsize; toff += tinc) { 1090 int mask; 1091 int bytesinpage; 1092 1093 pageindex = toff / PAGE_SIZE; 1094 objoff = trunc_page(toff + off); 1095 if (pageindex < curbpnpages) { 1096 int pb; 1097 1098 m = bp->b_pages[pageindex]; 1099 if (m->offset != objoff) 1100 panic("allocbuf: page changed offset??!!!?"); 1101 bytesinpage = tinc; 1102 if (tinc > (newbsize - toff)) 1103 bytesinpage = newbsize - toff; 1104 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1105 bp->b_flags &= ~B_CACHE; 1106 } 1107 if ((m->flags & PG_ACTIVE) == 0) 1108 vm_page_activate(m); 1109 continue; 1110 } 1111 m = vm_page_lookup(obj, objoff); 1112 if (!m) { 1113 m = vm_page_alloc(obj, objoff, 0); 1114 if (!m) { 1115 int j; 1116 1117 for (j = bp->b_npages; j < pageindex; j++) { 1118 vm_page_t mt = bp->b_pages[j]; 1119 1120 PAGE_WAKEUP(mt); 1121 if (!mt->valid) { 1122 vm_page_free(mt); 1123 } 1124 } 1125 VM_WAIT; 1126 if (vmio && (bp->b_flags & B_PDWANTED)) { 1127 --nvmio; 1128 bp->b_flags &= ~B_VMIO; 1129 bp->b_flags |= B_INVAL; 1130 brelse(bp); 1131 return 0; 1132 } 1133 curbpnpages = bp->b_npages; 1134 goto doretry; 1135 } 1136 m->valid = 0; 1137 vm_page_activate(m); 1138 } else if ((m->valid == 0) || (m->flags & PG_BUSY)) { 1139 int j; 1140 int bufferdestroyed = 0; 1141 1142 for (j = bp->b_npages; j < pageindex; j++) { 1143 vm_page_t mt = bp->b_pages[j]; 1144 1145 PAGE_WAKEUP(mt); 1146 if (mt->valid == 0) { 1147 vm_page_free(mt); 1148 } 1149 } 1150 if (vmio && (bp->b_flags & B_PDWANTED)) { 1151 --nvmio; 1152 bp->b_flags &= ~B_VMIO; 1153 bp->b_flags |= B_INVAL; 1154 brelse(bp); 1155 VM_WAIT; 1156 bufferdestroyed = 1; 1157 } 1158 s = splbio(); 1159 if (m) { 1160 m->flags |= PG_WANTED; 1161 tsleep(m, PRIBIO, "pgtblk", 0); 1162 } 1163 splx(s); 1164 if (bufferdestroyed) 1165 return 0; 1166 curbpnpages = bp->b_npages; 1167 goto doretry; 1168 } else { 1169 int pb; 1170 1171 if ((m->flags & PG_CACHE) && 1172 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 1173 int j; 1174 1175 for (j = bp->b_npages; j < pageindex; j++) { 1176 vm_page_t mt = bp->b_pages[j]; 1177 1178 PAGE_WAKEUP(mt); 1179 if (mt->valid == 0) { 1180 vm_page_free(mt); 1181 } 1182 } 1183 VM_WAIT; 1184 if (vmio && (bp->b_flags & B_PDWANTED)) { 1185 --nvmio; 1186 bp->b_flags &= ~B_VMIO; 1187 bp->b_flags |= B_INVAL; 1188 brelse(bp); 1189 return 0; 1190 } 1191 curbpnpages = bp->b_npages; 1192 goto doretry; 1193 } 1194 bytesinpage = tinc; 1195 if (tinc > (newbsize - toff)) 1196 bytesinpage = newbsize - toff; 1197 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1198 bp->b_flags &= ~B_CACHE; 1199 } 1200 if ((m->flags & PG_ACTIVE) == 0) 1201 vm_page_activate(m); 1202 m->flags |= PG_BUSY; 1203 } 1204 bp->b_pages[pageindex] = m; 1205 curbpnpages = pageindex + 1; 1206 } 1207 if (bsize >= PAGE_SIZE) { 1208 for (i = bp->b_npages; i < curbpnpages; i++) { 1209 m = bp->b_pages[i]; 1210 if (m->valid == 0) { 1211 bp->b_flags &= ~B_CACHE; 1212 } 1213 m->bmapped++; 1214 PAGE_WAKEUP(m); 1215 } 1216 } else { 1217 if (!vm_page_is_valid(bp->b_pages[0], off, bsize)) 1218 bp->b_flags &= ~B_CACHE; 1219 bp->b_pages[0]->bmapped++; 1220 PAGE_WAKEUP(bp->b_pages[0]); 1221 } 1222 bp->b_npages = curbpnpages; 1223 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1224 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1225 bp->b_data += off % PAGE_SIZE; 1226 } 1227 } 1228 } 1229 bp->b_bufsize = newbsize; 1230 bp->b_bcount = size; 1231 return 1; 1232 } 1233 1234 /* 1235 * Wait for buffer I/O completion, returning error status. 1236 */ 1237 int 1238 biowait(register struct buf * bp) 1239 { 1240 int s; 1241 1242 s = splbio(); 1243 while ((bp->b_flags & B_DONE) == 0) 1244 tsleep((caddr_t) bp, PRIBIO, "biowait", 0); 1245 if ((bp->b_flags & B_ERROR) || bp->b_error) { 1246 if ((bp->b_flags & B_INVAL) == 0) { 1247 bp->b_flags |= B_INVAL; 1248 bp->b_dev = NODEV; 1249 LIST_REMOVE(bp, b_hash); 1250 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 1251 wakeup((caddr_t) bp); 1252 } 1253 if (!bp->b_error) 1254 bp->b_error = EIO; 1255 else 1256 bp->b_flags |= B_ERROR; 1257 splx(s); 1258 return (bp->b_error); 1259 } else { 1260 splx(s); 1261 return (0); 1262 } 1263 } 1264 1265 /* 1266 * Finish I/O on a buffer, calling an optional function. 1267 * This is usually called from interrupt level, so process blocking 1268 * is not *a good idea*. 1269 */ 1270 void 1271 biodone(register struct buf * bp) 1272 { 1273 int s; 1274 1275 s = splbio(); 1276 if (bp->b_flags & B_DONE) 1277 printf("biodone: buffer already done\n"); 1278 bp->b_flags |= B_DONE; 1279 1280 if ((bp->b_flags & B_READ) == 0) { 1281 vwakeup(bp); 1282 } 1283 #ifdef BOUNCE_BUFFERS 1284 if (bp->b_flags & B_BOUNCE) 1285 vm_bounce_free(bp); 1286 #endif 1287 1288 /* call optional completion function if requested */ 1289 if (bp->b_flags & B_CALL) { 1290 bp->b_flags &= ~B_CALL; 1291 (*bp->b_iodone) (bp); 1292 splx(s); 1293 return; 1294 } 1295 if (bp->b_flags & B_VMIO) { 1296 int i, resid; 1297 vm_offset_t foff; 1298 vm_page_t m; 1299 vm_object_t obj; 1300 int iosize; 1301 struct vnode *vp = bp->b_vp; 1302 1303 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1304 obj = (vm_object_t) vp->v_vmdata; 1305 if (!obj) { 1306 return; 1307 } 1308 #if defined(VFS_BIO_DEBUG) 1309 if (obj->paging_in_progress < bp->b_npages) { 1310 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1311 obj->paging_in_progress, bp->b_npages); 1312 } 1313 #endif 1314 iosize = bp->b_bufsize; 1315 for (i = 0; i < bp->b_npages; i++) { 1316 m = bp->b_pages[i]; 1317 if (m == bogus_page) { 1318 m = vm_page_lookup(obj, foff); 1319 if (!m) { 1320 #if defined(VFS_BIO_DEBUG) 1321 printf("biodone: page disappeared\n"); 1322 #endif 1323 --obj->paging_in_progress; 1324 continue; 1325 } 1326 bp->b_pages[i] = m; 1327 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1328 } 1329 #if defined(VFS_BIO_DEBUG) 1330 if (trunc_page(foff) != m->offset) { 1331 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1332 } 1333 #endif 1334 resid = (m->offset + PAGE_SIZE) - foff; 1335 if (resid > iosize) 1336 resid = iosize; 1337 if (resid > 0) { 1338 vm_page_set_valid(m, foff, resid); 1339 vm_page_set_clean(m, foff, resid); 1340 } 1341 if (m->busy == 0) { 1342 printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n", 1343 m->offset, foff, resid, i); 1344 printf(" iosize: %d, lblkno: %d\n", 1345 bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno); 1346 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1347 m->valid, m->dirty, m->bmapped); 1348 panic("biodone: page busy < 0\n"); 1349 } 1350 --m->busy; 1351 PAGE_WAKEUP(m); 1352 --obj->paging_in_progress; 1353 foff += resid; 1354 iosize -= resid; 1355 } 1356 if (obj && obj->paging_in_progress == 0) 1357 wakeup((caddr_t) obj); 1358 } 1359 /* 1360 * For asynchronous completions, release the buffer now. The brelse 1361 * checks for B_WANTED and will do the wakeup there if necessary - so 1362 * no need to do a wakeup here in the async case. 1363 */ 1364 1365 if (bp->b_flags & B_ASYNC) { 1366 brelse(bp); 1367 } else { 1368 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 1369 wakeup((caddr_t) bp); 1370 } 1371 splx(s); 1372 } 1373 1374 int 1375 count_lock_queue() 1376 { 1377 int count; 1378 struct buf *bp; 1379 1380 count = 0; 1381 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1382 bp != NULL; 1383 bp = bp->b_freelist.tqe_next) 1384 count++; 1385 return (count); 1386 } 1387 1388 int vfs_update_interval = 30; 1389 1390 void 1391 vfs_update() 1392 { 1393 (void) spl0(); 1394 while (1) { 1395 tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update", 1396 hz * vfs_update_interval); 1397 vfs_update_wakeup = 0; 1398 sync(curproc, NULL, NULL); 1399 } 1400 } 1401 1402 void 1403 vfs_unbusy_pages(struct buf * bp) 1404 { 1405 int i; 1406 1407 if (bp->b_flags & B_VMIO) { 1408 struct vnode *vp = bp->b_vp; 1409 vm_object_t obj = (vm_object_t) vp->v_vmdata; 1410 vm_offset_t foff; 1411 1412 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1413 1414 for (i = 0; i < bp->b_npages; i++) { 1415 vm_page_t m = bp->b_pages[i]; 1416 1417 if (m == bogus_page) { 1418 m = vm_page_lookup(obj, foff); 1419 if (!m) { 1420 panic("vfs_unbusy_pages: page missing\n"); 1421 } 1422 bp->b_pages[i] = m; 1423 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1424 } 1425 --obj->paging_in_progress; 1426 --m->busy; 1427 PAGE_WAKEUP(m); 1428 } 1429 if (obj->paging_in_progress == 0) 1430 wakeup((caddr_t) obj); 1431 } 1432 } 1433 1434 void 1435 vfs_busy_pages(struct buf * bp, int clear_modify) 1436 { 1437 int i; 1438 1439 if (bp->b_flags & B_VMIO) { 1440 vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata; 1441 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1442 int iocount = bp->b_bufsize; 1443 1444 for (i = 0; i < bp->b_npages; i++) { 1445 vm_page_t m = bp->b_pages[i]; 1446 int resid = (m->offset + PAGE_SIZE) - foff; 1447 1448 if (resid > iocount) 1449 resid = iocount; 1450 obj->paging_in_progress++; 1451 m->busy++; 1452 if (clear_modify) { 1453 vm_page_test_dirty(m); 1454 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); 1455 } else if (bp->b_bcount >= PAGE_SIZE) { 1456 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1457 bp->b_pages[i] = bogus_page; 1458 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1459 } 1460 } 1461 foff += resid; 1462 iocount -= resid; 1463 } 1464 } 1465 } 1466 1467 void 1468 vfs_dirty_pages(struct buf * bp) 1469 { 1470 int i; 1471 1472 if (bp->b_flags & B_VMIO) { 1473 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1474 int iocount = bp->b_bufsize; 1475 1476 for (i = 0; i < bp->b_npages; i++) { 1477 vm_page_t m = bp->b_pages[i]; 1478 int resid = (m->offset + PAGE_SIZE) - foff; 1479 1480 if (resid > iocount) 1481 resid = iocount; 1482 if (resid > 0) { 1483 vm_page_set_valid(m, foff, resid); 1484 vm_page_set_dirty(m, foff, resid); 1485 } 1486 PAGE_WAKEUP(m); 1487 foff += resid; 1488 iocount -= resid; 1489 } 1490 } 1491 } 1492 /* 1493 * these routines are not in the correct place (yet) 1494 * also they work *ONLY* for kernel_pmap!!! 1495 */ 1496 void 1497 vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1498 { 1499 vm_offset_t pg; 1500 vm_page_t p; 1501 vm_offset_t from = round_page(froma); 1502 vm_offset_t to = round_page(toa); 1503 1504 tryagain0: 1505 if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <= 1506 cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) { 1507 VM_WAIT; 1508 goto tryagain0; 1509 } 1510 for (pg = from; pg < to; pg += PAGE_SIZE) { 1511 1512 tryagain: 1513 1514 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0); 1515 if (!p) { 1516 VM_WAIT; 1517 goto tryagain; 1518 } 1519 vm_page_wire(p); 1520 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1521 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1522 PAGE_WAKEUP(p); 1523 bp->b_npages++; 1524 } 1525 } 1526 1527 void 1528 vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1529 { 1530 vm_offset_t pg; 1531 vm_page_t p; 1532 vm_offset_t from = round_page(froma); 1533 vm_offset_t to = round_page(toa); 1534 1535 for (pg = from; pg < to; pg += PAGE_SIZE) { 1536 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1537 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1538 pmap_kremove(pg); 1539 vm_page_free(p); 1540 --bp->b_npages; 1541 } 1542 } 1543 1544 void 1545 bufstats() 1546 { 1547 } 1548