1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $Id: vfs_bio.c,v 1.18 1995/01/10 07:32:35 davidg Exp $ 22 */ 23 24 /* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * 30 * Author: John S. Dyson 31 * Significant help during the development and debugging phases 32 * had been provided by David Greenman, also of the FreeBSD core team. 33 */ 34 35 #define VMIO 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/vnode.h> 41 #include <vm/vm.h> 42 #include <vm/vm_pageout.h> 43 #include <vm/vm_page.h> 44 #include <vm/vm_object.h> 45 #include <sys/buf.h> 46 #include <sys/mount.h> 47 #include <sys/malloc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/proc.h> 50 51 #include <miscfs/specfs/specdev.h> 52 53 struct buf *buf; /* buffer header pool */ 54 int nbuf; /* number of buffer headers calculated 55 * elsewhere */ 56 struct swqueue bswlist; 57 int nvmio, nlru; 58 59 extern vm_map_t buffer_map, io_map, kernel_map, pager_map; 60 61 void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 62 void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); 63 void vfs_dirty_pages(struct buf * bp); 64 void vfs_busy_pages(struct buf *, int clear_modify); 65 66 int needsbuffer; 67 68 /* 69 * Internal update daemon, process 3 70 * The variable vfs_update_wakeup allows for internal syncs. 71 */ 72 int vfs_update_wakeup; 73 74 75 /* 76 * buffers base kva 77 */ 78 caddr_t buffers_kva; 79 80 /* 81 * bogus page -- for I/O to/from partially complete buffers 82 */ 83 vm_page_t bogus_page; 84 vm_offset_t bogus_offset; 85 86 /* 87 * Initialize buffer headers and related structures. 88 */ 89 void 90 bufinit() 91 { 92 struct buf *bp; 93 int i; 94 95 TAILQ_INIT(&bswlist); 96 LIST_INIT(&invalhash); 97 98 /* first, make a null hash table */ 99 for (i = 0; i < BUFHSZ; i++) 100 LIST_INIT(&bufhashtbl[i]); 101 102 /* next, make a null set of free lists */ 103 for (i = 0; i < BUFFER_QUEUES; i++) 104 TAILQ_INIT(&bufqueues[i]); 105 106 buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); 107 /* finally, initialize each buffer header and stick on empty q */ 108 for (i = 0; i < nbuf; i++) { 109 bp = &buf[i]; 110 bzero(bp, sizeof *bp); 111 bp->b_flags = B_INVAL; /* we're just an empty header */ 112 bp->b_dev = NODEV; 113 bp->b_vp = NULL; 114 bp->b_rcred = NOCRED; 115 bp->b_wcred = NOCRED; 116 bp->b_qindex = QUEUE_EMPTY; 117 bp->b_vnbufs.le_next = NOLIST; 118 bp->b_data = buffers_kva + i * MAXBSIZE; 119 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 120 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 121 } 122 123 bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); 124 bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0); 125 126 } 127 128 /* 129 * remove the buffer from the appropriate free list 130 */ 131 void 132 bremfree(struct buf * bp) 133 { 134 int s = splbio(); 135 136 if (bp->b_qindex != QUEUE_NONE) { 137 if (bp->b_qindex == QUEUE_LRU) 138 --nlru; 139 TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); 140 bp->b_qindex = QUEUE_NONE; 141 } else { 142 panic("bremfree: removing a buffer when not on a queue"); 143 } 144 splx(s); 145 } 146 147 /* 148 * Get a buffer with the specified data. Look in the cache first. 149 */ 150 int 151 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, 152 struct buf ** bpp) 153 { 154 struct buf *bp; 155 156 bp = getblk(vp, blkno, size, 0, 0); 157 *bpp = bp; 158 159 /* if not found in cache, do some I/O */ 160 if ((bp->b_flags & B_CACHE) == 0) { 161 if (curproc && curproc->p_stats) /* count block I/O */ 162 curproc->p_stats->p_ru.ru_inblock++; 163 bp->b_flags |= B_READ; 164 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 165 if (bp->b_rcred == NOCRED) { 166 if (cred != NOCRED) 167 crhold(cred); 168 bp->b_rcred = cred; 169 } 170 vfs_busy_pages(bp, 0); 171 VOP_STRATEGY(bp); 172 return (biowait(bp)); 173 } else if (bp->b_lblkno == bp->b_blkno) { 174 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 175 &bp->b_blkno, (int *) 0); 176 } 177 return (0); 178 } 179 180 /* 181 * Operates like bread, but also starts asynchronous I/O on 182 * read-ahead blocks. 183 */ 184 int 185 breadn(struct vnode * vp, daddr_t blkno, int size, 186 daddr_t * rablkno, int *rabsize, 187 int cnt, struct ucred * cred, struct buf ** bpp) 188 { 189 struct buf *bp, *rabp; 190 int i; 191 int rv = 0, readwait = 0; 192 193 *bpp = bp = getblk(vp, blkno, size, 0, 0); 194 195 /* if not found in cache, do some I/O */ 196 if ((bp->b_flags & B_CACHE) == 0) { 197 if (curproc && curproc->p_stats) /* count block I/O */ 198 curproc->p_stats->p_ru.ru_inblock++; 199 bp->b_flags |= B_READ; 200 bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 201 if (bp->b_rcred == NOCRED) { 202 if (cred != NOCRED) 203 crhold(cred); 204 bp->b_rcred = cred; 205 } 206 vfs_busy_pages(bp, 0); 207 VOP_STRATEGY(bp); 208 ++readwait; 209 } else if (bp->b_lblkno == bp->b_blkno) { 210 VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0, 211 &bp->b_blkno, (int *) 0); 212 } 213 for (i = 0; i < cnt; i++, rablkno++, rabsize++) { 214 if (inmem(vp, *rablkno)) 215 continue; 216 rabp = getblk(vp, *rablkno, *rabsize, 0, 0); 217 218 if ((rabp->b_flags & B_CACHE) == 0) { 219 if (curproc && curproc->p_stats) 220 curproc->p_stats->p_ru.ru_inblock++; 221 rabp->b_flags |= B_READ | B_ASYNC; 222 rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); 223 if (rabp->b_rcred == NOCRED) { 224 if (cred != NOCRED) 225 crhold(cred); 226 rabp->b_rcred = cred; 227 } 228 vfs_busy_pages(rabp, 0); 229 VOP_STRATEGY(rabp); 230 } else { 231 brelse(rabp); 232 } 233 } 234 235 if (readwait) { 236 rv = biowait(bp); 237 } 238 return (rv); 239 } 240 241 /* 242 * this routine is used by filesystems to get at pages in the PG_CACHE 243 * queue. also, it is used to read pages that are currently being 244 * written out by the file i/o routines. 245 */ 246 int 247 vfs_read_bypass(struct vnode * vp, struct uio * uio, int maxread, daddr_t lbn) 248 { 249 vm_page_t m; 250 vm_offset_t kv; 251 int nread; 252 int error; 253 struct buf *bp, *bpa; 254 vm_object_t obj; 255 int off; 256 int nrest; 257 int flags; 258 int s; 259 260 return 0; 261 /* 262 * don't use the bypass mechanism for non-vmio vnodes 263 */ 264 if ((vp->v_flag & VVMIO) == 0) 265 return 0; 266 /* 267 * get the VM object (it has the pages) 268 */ 269 obj = (vm_object_t) vp->v_vmdata; 270 if (obj == NULL) 271 return 0; 272 273 /* 274 * if there is a buffer that is not busy, it is faster to use it. 275 * This like read-ahead, etc work better 276 */ 277 278 s = splbio(); 279 if ((bp = incore(vp, lbn)) && 280 (((bp->b_flags & B_READ) && (bp->b_flags & B_BUSY)) 281 || (bp->b_flags & B_BUSY) == 0)) { 282 splx(s); 283 return 0; 284 } 285 splx(s); 286 287 /* 288 * get a pbuf --> we just use the kva 289 */ 290 kv = kmem_alloc_wait(pager_map, PAGE_SIZE); 291 nread = 0; 292 error = 0; 293 294 while (!error && uio->uio_resid && maxread > 0) { 295 int po; 296 int count; 297 int s; 298 299 relookup: 300 /* 301 * lookup the page 302 */ 303 m = vm_page_lookup(obj, trunc_page(uio->uio_offset)); 304 if (!m) 305 break; 306 /* 307 * get the offset into the page, and the amount to read in the 308 * page 309 */ 310 nrest = round_page(uio->uio_offset) - uio->uio_offset; 311 if (nrest > uio->uio_resid) 312 nrest = uio->uio_resid; 313 314 /* 315 * check the valid bits for the page (DEV_BSIZE chunks) 316 */ 317 if (!vm_page_is_valid(m, uio->uio_offset, nrest)) 318 break; 319 320 /* 321 * if the page is busy, wait for it 322 */ 323 s = splhigh(); 324 if (!m->valid || (m->flags & PG_BUSY)) { 325 m->flags |= PG_WANTED; 326 tsleep((caddr_t) m, PVM, "vnibyp", 0); 327 splx(s); 328 goto relookup; 329 } 330 /* 331 * if the page is on the cache queue, remove it -- cache queue 332 * pages should be freeable by vm_page_alloc anytime. 333 */ 334 if (m->flags & PG_CACHE) { 335 if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_reserved) { 336 VM_WAIT; 337 goto relookup; 338 } 339 vm_page_unqueue(m); 340 } 341 /* 342 * add a buffer mapping (essentially wires the page too). 343 */ 344 m->bmapped++; 345 splx(s); 346 347 /* 348 * enter it into the kva 349 */ 350 pmap_qenter(kv, &m, 1); 351 352 /* 353 * do the copy 354 */ 355 po = uio->uio_offset & (PAGE_SIZE - 1); 356 count = PAGE_SIZE - po; 357 if (count > maxread) 358 count = maxread; 359 if (count > uio->uio_resid) 360 count = uio->uio_resid; 361 362 error = uiomove((caddr_t) kv + po, count, uio); 363 if (!error) { 364 nread += count; 365 maxread -= count; 366 } 367 /* 368 * remove from kva 369 */ 370 pmap_qremove(kv, 1); 371 PAGE_WAKEUP(m); /* XXX probably unnecessary */ 372 /* 373 * If the page was on the cache queue, then by definition 374 * bmapped was 0. Thus the following case will also take care 375 * of the page being removed from the cache queue above. 376 * Also, it is possible that the page was already entered onto 377 * another queue (or was already there), so we don't put it 378 * onto the cache queue... 379 */ 380 m->bmapped--; 381 if (m->bmapped == 0 && 382 (m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) == 0 && 383 m->wire_count == 0) { 384 vm_page_test_dirty(m); 385 386 /* 387 * make sure that the darned page is on a queue 388 * somewhere... 389 */ 390 if ((m->dirty & m->valid) == 0) { 391 vm_page_cache(m); 392 } else if (m->hold_count == 0) { 393 vm_page_deactivate(m); 394 } else { 395 vm_page_activate(m); 396 } 397 } 398 } 399 /* 400 * release our buffer(kva). 401 */ 402 kmem_free_wakeup(pager_map, kv, PAGE_SIZE); 403 return nread; 404 } 405 406 407 /* 408 * Write, release buffer on completion. (Done by iodone 409 * if async.) 410 */ 411 int 412 bwrite(struct buf * bp) 413 { 414 int oldflags = bp->b_flags; 415 416 if (bp->b_flags & B_INVAL) { 417 brelse(bp); 418 return (0); 419 } 420 if (!(bp->b_flags & B_BUSY)) 421 panic("bwrite: buffer is not busy???"); 422 423 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 424 bp->b_flags |= B_WRITEINPROG; 425 426 if (oldflags & B_ASYNC) { 427 if (oldflags & B_DELWRI) { 428 reassignbuf(bp, bp->b_vp); 429 } else if (curproc) { 430 ++curproc->p_stats->p_ru.ru_oublock; 431 } 432 } 433 bp->b_vp->v_numoutput++; 434 vfs_busy_pages(bp, 1); 435 VOP_STRATEGY(bp); 436 437 if ((oldflags & B_ASYNC) == 0) { 438 int rtval = biowait(bp); 439 440 if (oldflags & B_DELWRI) { 441 reassignbuf(bp, bp->b_vp); 442 } else if (curproc) { 443 ++curproc->p_stats->p_ru.ru_oublock; 444 } 445 brelse(bp); 446 return (rtval); 447 } 448 return (0); 449 } 450 451 int 452 vn_bwrite(ap) 453 struct vop_bwrite_args *ap; 454 { 455 return (bwrite(ap->a_bp)); 456 } 457 458 /* 459 * Delayed write. (Buffer is marked dirty). 460 */ 461 void 462 bdwrite(struct buf * bp) 463 { 464 465 if ((bp->b_flags & B_BUSY) == 0) { 466 panic("bdwrite: buffer is not busy"); 467 } 468 if (bp->b_flags & B_INVAL) { 469 brelse(bp); 470 return; 471 } 472 if (bp->b_flags & B_TAPE) { 473 bawrite(bp); 474 return; 475 } 476 bp->b_flags &= ~B_READ; 477 vfs_dirty_pages(bp); 478 if ((bp->b_flags & B_DELWRI) == 0) { 479 if (curproc) 480 ++curproc->p_stats->p_ru.ru_oublock; 481 bp->b_flags |= B_DONE | B_DELWRI; 482 reassignbuf(bp, bp->b_vp); 483 } 484 brelse(bp); 485 return; 486 } 487 488 /* 489 * Asynchronous write. 490 * Start output on a buffer, but do not wait for it to complete. 491 * The buffer is released when the output completes. 492 */ 493 void 494 bawrite(struct buf * bp) 495 { 496 if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) { 497 int s = splbio(); 498 499 while (bp->b_vp->v_numoutput > 16) { 500 bp->b_vp->v_flag |= VBWAIT; 501 tsleep((caddr_t) &bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0); 502 } 503 splx(s); 504 } 505 bp->b_flags |= B_ASYNC; 506 (void) bwrite(bp); 507 } 508 509 /* 510 * Release a buffer. 511 */ 512 void 513 brelse(struct buf * bp) 514 { 515 int s; 516 517 if (bp->b_flags & B_CLUSTER) { 518 relpbuf(bp); 519 return; 520 } 521 /* anyone need a "free" block? */ 522 s = splbio(); 523 524 if (needsbuffer) { 525 needsbuffer = 0; 526 wakeup((caddr_t) &needsbuffer); 527 } 528 /* anyone need this block? */ 529 if (bp->b_flags & B_WANTED) { 530 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE); 531 wakeup((caddr_t) bp); 532 } else if (bp->b_flags & B_VMIO) { 533 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 534 wakeup((caddr_t) bp); 535 } 536 if (bp->b_flags & B_LOCKED) 537 bp->b_flags &= ~B_ERROR; 538 539 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || 540 (bp->b_bufsize <= 0)) { 541 bp->b_flags |= B_INVAL; 542 bp->b_flags &= ~(B_DELWRI | B_CACHE); 543 if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) 544 brelvp(bp); 545 } 546 if (bp->b_flags & B_VMIO) { 547 vm_offset_t foff; 548 vm_object_t obj; 549 int i, resid; 550 vm_page_t m; 551 int iototal = bp->b_bufsize; 552 553 foff = 0; 554 obj = 0; 555 if (bp->b_npages) { 556 if (bp->b_vp && bp->b_vp->v_mount) { 557 foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 558 } else { 559 /* 560 * vnode pointer has been ripped away -- 561 * probably file gone... 562 */ 563 foff = bp->b_pages[0]->offset; 564 } 565 } 566 for (i = 0; i < bp->b_npages; i++) { 567 m = bp->b_pages[i]; 568 if (m == bogus_page) { 569 panic("brelse: bogus page found"); 570 } 571 resid = (m->offset + PAGE_SIZE) - foff; 572 if (resid > iototal) 573 resid = iototal; 574 if (resid > 0) { 575 if (bp->b_flags & (B_ERROR | B_NOCACHE)) { 576 vm_page_set_invalid(m, foff, resid); 577 } else if ((bp->b_flags & B_DELWRI) == 0) { 578 vm_page_set_clean(m, foff, resid); 579 vm_page_set_valid(m, foff, resid); 580 } 581 } else { 582 vm_page_test_dirty(m); 583 } 584 if (bp->b_flags & B_INVAL) { 585 if (m->bmapped == 0) { 586 panic("brelse: bmapped is zero for page\n"); 587 } 588 --m->bmapped; 589 if (m->bmapped == 0) { 590 PAGE_WAKEUP(m); 591 if ((m->dirty & m->valid) == 0) 592 vm_page_cache(m); 593 } 594 } 595 foff += resid; 596 iototal -= resid; 597 } 598 599 if (bp->b_flags & B_INVAL) { 600 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 601 bp->b_npages = 0; 602 bp->b_bufsize = 0; 603 bp->b_flags &= ~B_VMIO; 604 if (bp->b_vp) 605 brelvp(bp); 606 --nvmio; 607 } 608 } 609 if (bp->b_qindex != QUEUE_NONE) 610 panic("brelse: free buffer onto another queue???"); 611 612 /* enqueue */ 613 /* buffers with no memory */ 614 if (bp->b_bufsize == 0) { 615 bp->b_qindex = QUEUE_EMPTY; 616 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); 617 LIST_REMOVE(bp, b_hash); 618 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 619 bp->b_dev = NODEV; 620 /* buffers with junk contents */ 621 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) { 622 bp->b_qindex = QUEUE_AGE; 623 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); 624 LIST_REMOVE(bp, b_hash); 625 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 626 bp->b_dev = NODEV; 627 /* buffers that are locked */ 628 } else if (bp->b_flags & B_LOCKED) { 629 bp->b_qindex = QUEUE_LOCKED; 630 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); 631 /* buffers with stale but valid contents */ 632 } else if (bp->b_flags & B_AGE) { 633 bp->b_qindex = QUEUE_AGE; 634 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); 635 /* buffers with valid and quite potentially reuseable contents */ 636 } else { 637 if (bp->b_flags & B_VMIO) 638 bp->b_qindex = QUEUE_VMIO; 639 else { 640 bp->b_qindex = QUEUE_LRU; 641 ++nlru; 642 } 643 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); 644 } 645 646 /* unlock */ 647 bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE); 648 splx(s); 649 } 650 651 /* 652 * this routine implements clustered async writes for 653 * clearing out B_DELWRI buffers... 654 */ 655 void 656 vfs_bio_awrite(struct buf * bp) 657 { 658 int i; 659 daddr_t lblkno = bp->b_lblkno; 660 struct vnode *vp = bp->b_vp; 661 int size = vp->v_mount->mnt_stat.f_iosize; 662 int s; 663 int ncl; 664 struct buf *bpa; 665 666 s = splbio(); 667 ncl = 1; 668 if (vp->v_flag & VVMIO) { 669 for (i = 1; i < MAXPHYS / size; i++) { 670 if ((bpa = incore(vp, lblkno + i)) && 671 ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) && 672 (bpa->b_bufsize == size)) { 673 if ((bpa->b_blkno == bpa->b_lblkno) || 674 (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE)) 675 break; 676 } else { 677 break; 678 } 679 } 680 ncl = i; 681 } 682 /* 683 * we don't attempt to cluster meta-data or INVALID??? buffers 684 */ 685 if ((ncl != 1) && 686 (bp->b_flags & (B_INVAL | B_CLUSTEROK)) == B_CLUSTEROK) { 687 cluster_wbuild(vp, NULL, size, lblkno, ncl, -1); 688 } else { 689 bremfree(bp); 690 bp->b_flags |= B_BUSY | B_ASYNC; 691 bwrite(bp); 692 } 693 splx(s); 694 } 695 696 int freebufspace; 697 int allocbufspace; 698 699 /* 700 * Find a buffer header which is available for use. 701 */ 702 struct buf * 703 getnewbuf(int slpflag, int slptimeo, int doingvmio) 704 { 705 struct buf *bp; 706 int s; 707 int firstbp = 1; 708 709 s = splbio(); 710 start: 711 /* can we constitute a new buffer? */ 712 if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { 713 if (bp->b_qindex != QUEUE_EMPTY) 714 panic("getnewbuf: inconsistent EMPTY queue"); 715 bremfree(bp); 716 goto fillbuf; 717 } 718 /* 719 * we keep the file I/O from hogging metadata I/O 720 */ 721 if (bp = bufqueues[QUEUE_AGE].tqh_first) { 722 if (bp->b_qindex != QUEUE_AGE) 723 panic("getnewbuf: inconsistent AGE queue"); 724 } else if ((nvmio > (2 * nbuf / 3)) 725 && (bp = bufqueues[QUEUE_VMIO].tqh_first)) { 726 if (bp->b_qindex != QUEUE_VMIO) 727 panic("getnewbuf: inconsistent VMIO queue"); 728 } else if ((!doingvmio || (nlru > (2 * nbuf / 3))) && 729 (bp = bufqueues[QUEUE_LRU].tqh_first)) { 730 if (bp->b_qindex != QUEUE_LRU) 731 panic("getnewbuf: inconsistent LRU queue"); 732 } 733 if (!bp) { 734 if (doingvmio) { 735 if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 736 if (bp->b_qindex != QUEUE_VMIO) 737 panic("getnewbuf: inconsistent VMIO queue"); 738 } else if (bp = bufqueues[QUEUE_LRU].tqh_first) { 739 if (bp->b_qindex != QUEUE_LRU) 740 panic("getnewbuf: inconsistent LRU queue"); 741 } 742 } else { 743 if (bp = bufqueues[QUEUE_LRU].tqh_first) { 744 if (bp->b_qindex != QUEUE_LRU) 745 panic("getnewbuf: inconsistent LRU queue"); 746 } else if (bp = bufqueues[QUEUE_VMIO].tqh_first) { 747 if (bp->b_qindex != QUEUE_VMIO) 748 panic("getnewbuf: inconsistent VMIO queue"); 749 } 750 } 751 } 752 if (!bp) { 753 /* wait for a free buffer of any kind */ 754 needsbuffer = 1; 755 tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo); 756 splx(s); 757 return (0); 758 } 759 /* if we are a delayed write, convert to an async write */ 760 if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { 761 vfs_bio_awrite(bp); 762 if (!slpflag && !slptimeo) { 763 splx(s); 764 return (0); 765 } 766 goto start; 767 } 768 bremfree(bp); 769 770 if (bp->b_flags & B_VMIO) { 771 bp->b_flags |= B_INVAL | B_BUSY; 772 brelse(bp); 773 bremfree(bp); 774 } 775 if (bp->b_vp) 776 brelvp(bp); 777 778 /* we are not free, nor do we contain interesting data */ 779 if (bp->b_rcred != NOCRED) 780 crfree(bp->b_rcred); 781 if (bp->b_wcred != NOCRED) 782 crfree(bp->b_wcred); 783 fillbuf: 784 bp->b_flags = B_BUSY; 785 LIST_REMOVE(bp, b_hash); 786 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 787 splx(s); 788 if (bp->b_bufsize) { 789 allocbuf(bp, 0, 0); 790 } 791 bp->b_dev = NODEV; 792 bp->b_vp = NULL; 793 bp->b_blkno = bp->b_lblkno = 0; 794 bp->b_iodone = 0; 795 bp->b_error = 0; 796 bp->b_resid = 0; 797 bp->b_bcount = 0; 798 bp->b_npages = 0; 799 bp->b_wcred = bp->b_rcred = NOCRED; 800 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 801 bp->b_dirtyoff = bp->b_dirtyend = 0; 802 bp->b_validoff = bp->b_validend = 0; 803 return (bp); 804 } 805 806 /* 807 * Check to see if a block is currently memory resident. 808 */ 809 struct buf * 810 incore(struct vnode * vp, daddr_t blkno) 811 { 812 struct buf *bp; 813 struct bufhashhdr *bh; 814 815 int s = splbio(); 816 817 bh = BUFHASH(vp, blkno); 818 bp = bh->lh_first; 819 820 /* Search hash chain */ 821 while (bp) { 822 /* hit */ 823 if (bp->b_lblkno == blkno && bp->b_vp == vp 824 && (bp->b_flags & B_INVAL) == 0) { 825 splx(s); 826 return (bp); 827 } 828 bp = bp->b_hash.le_next; 829 } 830 splx(s); 831 832 return (0); 833 } 834 835 /* 836 * returns true if no I/O is needed to access the 837 * associated VM object. 838 */ 839 840 int 841 inmem(struct vnode * vp, daddr_t blkno) 842 { 843 vm_object_t obj; 844 vm_offset_t off, toff, tinc; 845 vm_page_t m; 846 847 if (incore(vp, blkno)) 848 return 1; 849 if (vp->v_mount == 0) 850 return 0; 851 if (vp->v_vmdata == 0) 852 return 0; 853 854 obj = (vm_object_t) vp->v_vmdata; 855 tinc = PAGE_SIZE; 856 if (tinc > vp->v_mount->mnt_stat.f_iosize) 857 tinc = vp->v_mount->mnt_stat.f_iosize; 858 off = blkno * vp->v_mount->mnt_stat.f_iosize; 859 860 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { 861 int mask; 862 863 m = vm_page_lookup(obj, trunc_page(toff + off)); 864 if (!m) 865 return 0; 866 if (vm_page_is_valid(m, toff + off, tinc) == 0) 867 return 0; 868 } 869 return 1; 870 } 871 872 /* 873 * Get a block given a specified block and offset into a file/device. 874 */ 875 struct buf * 876 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) 877 { 878 struct buf *bp; 879 int s; 880 struct bufhashhdr *bh; 881 vm_offset_t off; 882 int bsize; 883 int nleft; 884 885 bsize = DEV_BSIZE; 886 if (vp->v_mount) { 887 bsize = vp->v_mount->mnt_stat.f_iosize; 888 } 889 s = splbio(); 890 loop: 891 if ((cnt.v_free_count + cnt.v_cache_count) < 892 cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE) 893 wakeup((caddr_t) &vm_pages_needed); 894 if (bp = incore(vp, blkno)) { 895 if (bp->b_flags & B_BUSY) { 896 bp->b_flags |= B_WANTED; 897 if (curproc == pageproc) { 898 bp->b_flags |= B_PDWANTED; 899 wakeup((caddr_t) &cnt.v_free_count); 900 } 901 if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo)) 902 goto loop; 903 splx(s); 904 return (struct buf *) NULL; 905 } 906 bp->b_flags |= B_BUSY | B_CACHE; 907 bremfree(bp); 908 /* 909 * check for size inconsistancies 910 */ 911 if (bp->b_bcount != size) { 912 #if defined(VFS_BIO_DEBUG) 913 printf("getblk: invalid buffer size: %ld\n", bp->b_bcount); 914 #endif 915 bp->b_flags |= B_INVAL; 916 bwrite(bp); 917 goto loop; 918 } 919 splx(s); 920 return (bp); 921 } else { 922 vm_object_t obj; 923 int doingvmio; 924 925 if ((obj = (vm_object_t) vp->v_vmdata) && 926 (vp->v_flag & VVMIO) /* && (blkno >= 0) */ ) { 927 doingvmio = 1; 928 } else { 929 doingvmio = 0; 930 } 931 if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { 932 if (slpflag || slptimeo) 933 return NULL; 934 goto loop; 935 } 936 if (incore(vp, blkno)) { 937 bp->b_flags |= B_INVAL; 938 brelse(bp); 939 goto loop; 940 } 941 bp->b_blkno = bp->b_lblkno = blkno; 942 bgetvp(vp, bp); 943 LIST_REMOVE(bp, b_hash); 944 bh = BUFHASH(vp, blkno); 945 LIST_INSERT_HEAD(bh, bp, b_hash); 946 if (doingvmio) { 947 bp->b_flags |= (B_VMIO | B_CACHE); 948 #if defined(VFS_BIO_DEBUG) 949 if (vp->v_type != VREG) 950 printf("getblk: vmioing file type %d???\n", vp->v_type); 951 #endif 952 ++nvmio; 953 } else { 954 if (bp->b_flags & B_VMIO) 955 --nvmio; 956 bp->b_flags &= ~B_VMIO; 957 } 958 splx(s); 959 if (!allocbuf(bp, size, 1)) { 960 s = splbio(); 961 goto loop; 962 } 963 return (bp); 964 } 965 } 966 967 /* 968 * Get an empty, disassociated buffer of given size. 969 */ 970 struct buf * 971 geteblk(int size) 972 { 973 struct buf *bp; 974 975 while ((bp = getnewbuf(0, 0, 0)) == 0); 976 allocbuf(bp, size, 0); 977 bp->b_flags |= B_INVAL; 978 return (bp); 979 } 980 981 /* 982 * Modify the length of a buffer's underlying buffer storage without 983 * destroying information (unless, of course the buffer is shrinking). 984 */ 985 int 986 allocbuf(struct buf * bp, int size, int vmio) 987 { 988 989 int s; 990 int newbsize; 991 int i; 992 993 if ((bp->b_flags & B_VMIO) == 0) { 994 newbsize = round_page(size); 995 if (newbsize == bp->b_bufsize) { 996 bp->b_bcount = size; 997 return 1; 998 } else if (newbsize < bp->b_bufsize) { 999 if (bp->b_flags & B_MALLOC) { 1000 bp->b_bcount = size; 1001 return 1; 1002 } 1003 vm_hold_free_pages( 1004 bp, 1005 (vm_offset_t) bp->b_data + newbsize, 1006 (vm_offset_t) bp->b_data + bp->b_bufsize); 1007 } else if (newbsize > bp->b_bufsize) { 1008 if (bp->b_flags & B_MALLOC) { 1009 vm_offset_t bufaddr; 1010 1011 bufaddr = (vm_offset_t) bp->b_data; 1012 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1013 vm_hold_load_pages( 1014 bp, 1015 (vm_offset_t) bp->b_data, 1016 (vm_offset_t) bp->b_data + newbsize); 1017 bcopy((caddr_t) bufaddr, bp->b_data, bp->b_bcount); 1018 free((caddr_t) bufaddr, M_TEMP); 1019 } else if ((newbsize <= PAGE_SIZE / 2) && (bp->b_bufsize == 0)) { 1020 bp->b_flags |= B_MALLOC; 1021 bp->b_data = malloc(newbsize, M_TEMP, M_WAITOK); 1022 bp->b_npages = 0; 1023 } else { 1024 vm_hold_load_pages( 1025 bp, 1026 (vm_offset_t) bp->b_data + bp->b_bufsize, 1027 (vm_offset_t) bp->b_data + newbsize); 1028 } 1029 } 1030 /* 1031 * adjust buffer cache's idea of memory allocated to buffer 1032 * contents 1033 */ 1034 freebufspace -= newbsize - bp->b_bufsize; 1035 allocbufspace += newbsize - bp->b_bufsize; 1036 } else { 1037 vm_page_t m; 1038 int desiredpages; 1039 1040 newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE; 1041 desiredpages = round_page(newbsize) / PAGE_SIZE; 1042 1043 if (newbsize == bp->b_bufsize) { 1044 bp->b_bcount = size; 1045 return 1; 1046 } else if (newbsize < bp->b_bufsize) { 1047 if (desiredpages < bp->b_npages) { 1048 pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + 1049 desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages)); 1050 for (i = desiredpages; i < bp->b_npages; i++) { 1051 m = bp->b_pages[i]; 1052 s = splhigh(); 1053 if ((m->flags & PG_BUSY) || (m->busy != 0)) { 1054 m->flags |= PG_WANTED; 1055 tsleep(m, PVM, "biodep", 0); 1056 } 1057 splx(s); 1058 1059 if (m->bmapped == 0) { 1060 printf("allocbuf: bmapped is zero for page %d\n", i); 1061 panic("allocbuf: error"); 1062 } 1063 --m->bmapped; 1064 if (m->bmapped == 0) { 1065 PAGE_WAKEUP(m); 1066 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 1067 vm_page_free(m); 1068 } 1069 bp->b_pages[i] = NULL; 1070 } 1071 bp->b_npages = desiredpages; 1072 } 1073 } else { 1074 vm_object_t obj; 1075 vm_offset_t tinc, off, toff, objoff; 1076 int pageindex, curbpnpages; 1077 struct vnode *vp; 1078 int bsize; 1079 1080 vp = bp->b_vp; 1081 bsize = vp->v_mount->mnt_stat.f_iosize; 1082 1083 if (bp->b_npages < desiredpages) { 1084 obj = (vm_object_t) vp->v_vmdata; 1085 tinc = PAGE_SIZE; 1086 if (tinc > bsize) 1087 tinc = bsize; 1088 off = bp->b_lblkno * bsize; 1089 curbpnpages = bp->b_npages; 1090 doretry: 1091 for (toff = 0; toff < newbsize; toff += tinc) { 1092 int mask; 1093 int bytesinpage; 1094 1095 pageindex = toff / PAGE_SIZE; 1096 objoff = trunc_page(toff + off); 1097 if (pageindex < curbpnpages) { 1098 int pb; 1099 1100 m = bp->b_pages[pageindex]; 1101 if (m->offset != objoff) 1102 panic("allocbuf: page changed offset??!!!?"); 1103 bytesinpage = tinc; 1104 if (tinc > (newbsize - toff)) 1105 bytesinpage = newbsize - toff; 1106 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1107 bp->b_flags &= ~B_CACHE; 1108 } 1109 if ((m->flags & PG_ACTIVE) == 0) 1110 vm_page_activate(m); 1111 continue; 1112 } 1113 m = vm_page_lookup(obj, objoff); 1114 if (!m) { 1115 m = vm_page_alloc(obj, objoff, 0); 1116 if (!m) { 1117 int j; 1118 1119 for (j = bp->b_npages; j < pageindex; j++) { 1120 vm_page_t mt = bp->b_pages[j]; 1121 1122 PAGE_WAKEUP(mt); 1123 if (!mt->valid) { 1124 vm_page_free(mt); 1125 } 1126 } 1127 VM_WAIT; 1128 if (vmio && (bp->b_flags & B_PDWANTED)) { 1129 --nvmio; 1130 bp->b_flags &= ~B_VMIO; 1131 bp->b_flags |= B_INVAL; 1132 brelse(bp); 1133 return 0; 1134 } 1135 curbpnpages = bp->b_npages; 1136 goto doretry; 1137 } 1138 m->valid = 0; 1139 vm_page_activate(m); 1140 } else if ((m->valid == 0) || (m->flags & PG_BUSY)) { 1141 int j; 1142 int bufferdestroyed = 0; 1143 1144 for (j = bp->b_npages; j < pageindex; j++) { 1145 vm_page_t mt = bp->b_pages[j]; 1146 1147 PAGE_WAKEUP(mt); 1148 if (mt->valid == 0) { 1149 vm_page_free(mt); 1150 } 1151 } 1152 if (vmio && (bp->b_flags & B_PDWANTED)) { 1153 --nvmio; 1154 bp->b_flags &= ~B_VMIO; 1155 bp->b_flags |= B_INVAL; 1156 brelse(bp); 1157 VM_WAIT; 1158 bufferdestroyed = 1; 1159 } 1160 s = splbio(); 1161 if (m) { 1162 m->flags |= PG_WANTED; 1163 tsleep(m, PRIBIO, "pgtblk", 0); 1164 } 1165 splx(s); 1166 if (bufferdestroyed) 1167 return 0; 1168 curbpnpages = bp->b_npages; 1169 goto doretry; 1170 } else { 1171 int pb; 1172 1173 if ((m->flags & PG_CACHE) && 1174 (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 1175 int j; 1176 1177 for (j = bp->b_npages; j < pageindex; j++) { 1178 vm_page_t mt = bp->b_pages[j]; 1179 1180 PAGE_WAKEUP(mt); 1181 if (mt->valid == 0) { 1182 vm_page_free(mt); 1183 } 1184 } 1185 VM_WAIT; 1186 if (vmio && (bp->b_flags & B_PDWANTED)) { 1187 --nvmio; 1188 bp->b_flags &= ~B_VMIO; 1189 bp->b_flags |= B_INVAL; 1190 brelse(bp); 1191 return 0; 1192 } 1193 curbpnpages = bp->b_npages; 1194 goto doretry; 1195 } 1196 bytesinpage = tinc; 1197 if (tinc > (newbsize - toff)) 1198 bytesinpage = newbsize - toff; 1199 if (!vm_page_is_valid(m, toff + off, bytesinpage)) { 1200 bp->b_flags &= ~B_CACHE; 1201 } 1202 if ((m->flags & PG_ACTIVE) == 0) 1203 vm_page_activate(m); 1204 m->flags |= PG_BUSY; 1205 } 1206 bp->b_pages[pageindex] = m; 1207 curbpnpages = pageindex + 1; 1208 } 1209 if (bsize >= PAGE_SIZE) { 1210 for (i = bp->b_npages; i < curbpnpages; i++) { 1211 m = bp->b_pages[i]; 1212 if (m->valid == 0) { 1213 bp->b_flags &= ~B_CACHE; 1214 } 1215 m->bmapped++; 1216 PAGE_WAKEUP(m); 1217 } 1218 } else { 1219 if (!vm_page_is_valid(bp->b_pages[0], off, bsize)) 1220 bp->b_flags &= ~B_CACHE; 1221 bp->b_pages[0]->bmapped++; 1222 PAGE_WAKEUP(bp->b_pages[0]); 1223 } 1224 bp->b_npages = curbpnpages; 1225 bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; 1226 pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); 1227 bp->b_data += off % PAGE_SIZE; 1228 } 1229 } 1230 } 1231 bp->b_bufsize = newbsize; 1232 bp->b_bcount = size; 1233 return 1; 1234 } 1235 1236 /* 1237 * Wait for buffer I/O completion, returning error status. 1238 */ 1239 int 1240 biowait(register struct buf * bp) 1241 { 1242 int s; 1243 1244 s = splbio(); 1245 while ((bp->b_flags & B_DONE) == 0) 1246 tsleep((caddr_t) bp, PRIBIO, "biowait", 0); 1247 if ((bp->b_flags & B_ERROR) || bp->b_error) { 1248 if ((bp->b_flags & B_INVAL) == 0) { 1249 bp->b_flags |= B_INVAL; 1250 bp->b_dev = NODEV; 1251 LIST_REMOVE(bp, b_hash); 1252 LIST_INSERT_HEAD(&invalhash, bp, b_hash); 1253 wakeup((caddr_t) bp); 1254 } 1255 if (!bp->b_error) 1256 bp->b_error = EIO; 1257 else 1258 bp->b_flags |= B_ERROR; 1259 splx(s); 1260 return (bp->b_error); 1261 } else { 1262 splx(s); 1263 return (0); 1264 } 1265 } 1266 1267 /* 1268 * Finish I/O on a buffer, calling an optional function. 1269 * This is usually called from interrupt level, so process blocking 1270 * is not *a good idea*. 1271 */ 1272 void 1273 biodone(register struct buf * bp) 1274 { 1275 int s; 1276 1277 s = splbio(); 1278 if (bp->b_flags & B_DONE) 1279 printf("biodone: buffer already done\n"); 1280 bp->b_flags |= B_DONE; 1281 1282 if ((bp->b_flags & B_READ) == 0) { 1283 vwakeup(bp); 1284 } 1285 #ifdef BOUNCE_BUFFERS 1286 if (bp->b_flags & B_BOUNCE) 1287 vm_bounce_free(bp); 1288 #endif 1289 1290 /* call optional completion function if requested */ 1291 if (bp->b_flags & B_CALL) { 1292 bp->b_flags &= ~B_CALL; 1293 (*bp->b_iodone) (bp); 1294 splx(s); 1295 return; 1296 } 1297 if (bp->b_flags & B_VMIO) { 1298 int i, resid; 1299 vm_offset_t foff; 1300 vm_page_t m; 1301 vm_object_t obj; 1302 int iosize; 1303 struct vnode *vp = bp->b_vp; 1304 1305 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1306 obj = (vm_object_t) vp->v_vmdata; 1307 if (!obj) { 1308 return; 1309 } 1310 #if defined(VFS_BIO_DEBUG) 1311 if (obj->paging_in_progress < bp->b_npages) { 1312 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1313 obj->paging_in_progress, bp->b_npages); 1314 } 1315 #endif 1316 iosize = bp->b_bufsize; 1317 for (i = 0; i < bp->b_npages; i++) { 1318 m = bp->b_pages[i]; 1319 if (m == bogus_page) { 1320 m = vm_page_lookup(obj, foff); 1321 if (!m) { 1322 #if defined(VFS_BIO_DEBUG) 1323 printf("biodone: page disappeared\n"); 1324 #endif 1325 --obj->paging_in_progress; 1326 continue; 1327 } 1328 bp->b_pages[i] = m; 1329 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1330 } 1331 #if defined(VFS_BIO_DEBUG) 1332 if (trunc_page(foff) != m->offset) { 1333 printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset); 1334 } 1335 #endif 1336 resid = (m->offset + PAGE_SIZE) - foff; 1337 if (resid > iosize) 1338 resid = iosize; 1339 if (resid > 0) { 1340 vm_page_set_valid(m, foff, resid); 1341 vm_page_set_clean(m, foff, resid); 1342 } 1343 if (m->busy == 0) { 1344 printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n", 1345 m->offset, foff, resid, i); 1346 printf(" iosize: %d, lblkno: %d\n", 1347 bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno); 1348 printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", 1349 m->valid, m->dirty, m->bmapped); 1350 panic("biodone: page busy < 0\n"); 1351 } 1352 --m->busy; 1353 PAGE_WAKEUP(m); 1354 --obj->paging_in_progress; 1355 foff += resid; 1356 iosize -= resid; 1357 } 1358 if (obj && obj->paging_in_progress == 0) 1359 wakeup((caddr_t) obj); 1360 } 1361 /* 1362 * For asynchronous completions, release the buffer now. The brelse 1363 * checks for B_WANTED and will do the wakeup there if necessary - so 1364 * no need to do a wakeup here in the async case. 1365 */ 1366 1367 if (bp->b_flags & B_ASYNC) { 1368 brelse(bp); 1369 } else { 1370 bp->b_flags &= ~(B_WANTED | B_PDWANTED); 1371 wakeup((caddr_t) bp); 1372 } 1373 splx(s); 1374 } 1375 1376 int 1377 count_lock_queue() 1378 { 1379 int count; 1380 struct buf *bp; 1381 1382 count = 0; 1383 for (bp = bufqueues[QUEUE_LOCKED].tqh_first; 1384 bp != NULL; 1385 bp = bp->b_freelist.tqe_next) 1386 count++; 1387 return (count); 1388 } 1389 1390 int vfs_update_interval = 30; 1391 1392 void 1393 vfs_update() 1394 { 1395 (void) spl0(); 1396 while (1) { 1397 tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update", 1398 hz * vfs_update_interval); 1399 vfs_update_wakeup = 0; 1400 sync(curproc, NULL, NULL); 1401 } 1402 } 1403 1404 void 1405 vfs_unbusy_pages(struct buf * bp) 1406 { 1407 int i; 1408 1409 if (bp->b_flags & B_VMIO) { 1410 struct vnode *vp = bp->b_vp; 1411 vm_object_t obj = (vm_object_t) vp->v_vmdata; 1412 vm_offset_t foff; 1413 1414 foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1415 1416 for (i = 0; i < bp->b_npages; i++) { 1417 vm_page_t m = bp->b_pages[i]; 1418 1419 if (m == bogus_page) { 1420 m = vm_page_lookup(obj, foff); 1421 if (!m) { 1422 panic("vfs_unbusy_pages: page missing\n"); 1423 } 1424 bp->b_pages[i] = m; 1425 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1426 } 1427 --obj->paging_in_progress; 1428 --m->busy; 1429 PAGE_WAKEUP(m); 1430 } 1431 if (obj->paging_in_progress == 0) 1432 wakeup((caddr_t) obj); 1433 } 1434 } 1435 1436 void 1437 vfs_busy_pages(struct buf * bp, int clear_modify) 1438 { 1439 int i; 1440 1441 if (bp->b_flags & B_VMIO) { 1442 vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata; 1443 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1444 int iocount = bp->b_bufsize; 1445 1446 for (i = 0; i < bp->b_npages; i++) { 1447 vm_page_t m = bp->b_pages[i]; 1448 int resid = (m->offset + PAGE_SIZE) - foff; 1449 1450 if (resid > iocount) 1451 resid = iocount; 1452 obj->paging_in_progress++; 1453 m->busy++; 1454 if (clear_modify) { 1455 vm_page_test_dirty(m); 1456 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ); 1457 } else if (bp->b_bcount >= PAGE_SIZE) { 1458 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1459 bp->b_pages[i] = bogus_page; 1460 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1461 } 1462 } 1463 foff += resid; 1464 iocount -= resid; 1465 } 1466 } 1467 } 1468 1469 void 1470 vfs_dirty_pages(struct buf * bp) 1471 { 1472 int i; 1473 1474 if (bp->b_flags & B_VMIO) { 1475 vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1476 int iocount = bp->b_bufsize; 1477 1478 for (i = 0; i < bp->b_npages; i++) { 1479 vm_page_t m = bp->b_pages[i]; 1480 int resid = (m->offset + PAGE_SIZE) - foff; 1481 1482 if (resid > iocount) 1483 resid = iocount; 1484 if (resid > 0) { 1485 vm_page_set_valid(m, foff, resid); 1486 vm_page_set_dirty(m, foff, resid); 1487 } 1488 PAGE_WAKEUP(m); 1489 foff += resid; 1490 iocount -= resid; 1491 } 1492 } 1493 } 1494 /* 1495 * these routines are not in the correct place (yet) 1496 * also they work *ONLY* for kernel_pmap!!! 1497 */ 1498 void 1499 vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1500 { 1501 vm_offset_t pg; 1502 vm_page_t p; 1503 vm_offset_t from = round_page(froma); 1504 vm_offset_t to = round_page(toa); 1505 1506 tryagain0: 1507 if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <= 1508 cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) { 1509 VM_WAIT; 1510 goto tryagain0; 1511 } 1512 for (pg = from; pg < to; pg += PAGE_SIZE) { 1513 1514 tryagain: 1515 1516 p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0); 1517 if (!p) { 1518 VM_WAIT; 1519 goto tryagain; 1520 } 1521 vm_page_wire(p); 1522 pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); 1523 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p; 1524 PAGE_WAKEUP(p); 1525 bp->b_npages++; 1526 } 1527 } 1528 1529 void 1530 vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa) 1531 { 1532 vm_offset_t pg; 1533 vm_page_t p; 1534 vm_offset_t from = round_page(froma); 1535 vm_offset_t to = round_page(toa); 1536 1537 for (pg = from; pg < to; pg += PAGE_SIZE) { 1538 p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE]; 1539 bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0; 1540 pmap_kremove(pg); 1541 vm_page_free(p); 1542 --bp->b_npages; 1543 } 1544 } 1545 1546 void 1547 bufstats() 1548 { 1549 } 1550