1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * Copyright (c) 1993,1994 John S. Dyson 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40 * $Id: vnode_pager.c,v 1.17 1994/04/05 03:23:53 davidg Exp $ 41 */ 42 43 /* 44 * Page to/from files (vnodes). 45 * 46 * TODO: 47 * pageouts 48 * fix credential use (uses current process credentials now) 49 */ 50 51 /* 52 * MODIFICATIONS: 53 * John S. Dyson 08 Dec 93 54 * 55 * This file in conjunction with some vm_fault mods, eliminate the performance 56 * advantage for using the buffer cache and minimize memory copies. 57 * 58 * 1) Supports multiple - block reads 59 * 2) Bypasses buffer cache for reads 60 * 61 * TODO: 62 * 63 * 1) Totally bypass buffer cache for reads 64 * (Currently will still sometimes use buffer cache for reads) 65 * 2) Bypass buffer cache for writes 66 * (Code does not support it, but mods are simple) 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/proc.h> 72 #include <sys/malloc.h> 73 #include <sys/vnode.h> 74 #include <sys/uio.h> 75 #include <sys/mount.h> 76 77 #include <vm/vm.h> 78 #include <vm/vm_page.h> 79 #include <vm/vnode_pager.h> 80 81 #include <sys/buf.h> 82 #include <miscfs/specfs/specdev.h> 83 84 int vnode_pager_putmulti(); 85 86 void vnode_pager_init(); 87 vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 88 void vnode_pager_dealloc(); 89 int vnode_pager_getpage(); 90 int vnode_pager_getmulti(); 91 int vnode_pager_putpage(); 92 boolean_t vnode_pager_haspage(); 93 94 struct pagerops vnodepagerops = { 95 vnode_pager_init, 96 vnode_pager_alloc, 97 vnode_pager_dealloc, 98 vnode_pager_getpage, 99 vnode_pager_getmulti, 100 vnode_pager_putpage, 101 vnode_pager_putmulti, 102 vnode_pager_haspage 103 }; 104 105 static int vnode_pager_input(vn_pager_t vnp, vm_page_t *m, int count, int reqpage); 106 static int vnode_pager_output(vn_pager_t vnp, vm_page_t *m, int count, int *rtvals); 107 struct buf * getpbuf() ; 108 void relpbuf(struct buf *bp) ; 109 110 extern vm_map_t pager_map; 111 112 struct pagerlst vnode_pager_list; /* list of managed vnodes */ 113 114 #define MAXBP (PAGE_SIZE/DEV_BSIZE); 115 116 void 117 vnode_pager_init() 118 { 119 TAILQ_INIT(&vnode_pager_list); 120 } 121 122 /* 123 * Allocate (or lookup) pager for a vnode. 124 * Handle is a vnode pointer. 125 */ 126 vm_pager_t 127 vnode_pager_alloc(handle, size, prot, offset) 128 caddr_t handle; 129 vm_size_t size; 130 vm_prot_t prot; 131 vm_offset_t offset; 132 { 133 register vm_pager_t pager; 134 register vn_pager_t vnp; 135 vm_object_t object; 136 struct vattr vattr; 137 struct vnode *vp; 138 struct proc *p = curproc; /* XXX */ 139 140 /* 141 * Pageout to vnode, no can do yet. 142 */ 143 if (handle == NULL) 144 return(NULL); 145 146 /* 147 * Vnodes keep a pointer to any associated pager so no need to 148 * lookup with vm_pager_lookup. 149 */ 150 vp = (struct vnode *)handle; 151 pager = (vm_pager_t)vp->v_vmdata; 152 if (pager == NULL) { 153 /* 154 * Allocate pager structures 155 */ 156 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 157 if (pager == NULL) 158 return(NULL); 159 vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 160 if (vnp == NULL) { 161 free((caddr_t)pager, M_VMPAGER); 162 return(NULL); 163 } 164 /* 165 * And an object of the appropriate size 166 */ 167 if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { 168 object = vm_object_allocate(round_page(vattr.va_size)); 169 vm_object_enter(object, pager); 170 vm_object_setpager(object, pager, 0, TRUE); 171 } else { 172 free((caddr_t)vnp, M_VMPGDATA); 173 free((caddr_t)pager, M_VMPAGER); 174 return(NULL); 175 } 176 /* 177 * Hold a reference to the vnode and initialize pager data. 178 */ 179 VREF(vp); 180 vnp->vnp_flags = 0; 181 vnp->vnp_vp = vp; 182 vnp->vnp_size = vattr.va_size; 183 184 TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 185 pager->pg_handle = handle; 186 pager->pg_type = PG_VNODE; 187 pager->pg_ops = &vnodepagerops; 188 pager->pg_data = (caddr_t)vnp; 189 vp->v_vmdata = (caddr_t)pager; 190 } else { 191 /* 192 * vm_object_lookup() will remove the object from the 193 * cache if found and also gain a reference to the object. 194 */ 195 object = vm_object_lookup(pager); 196 } 197 return(pager); 198 } 199 200 void 201 vnode_pager_dealloc(pager) 202 vm_pager_t pager; 203 { 204 register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 205 register struct vnode *vp; 206 struct proc *p = curproc; /* XXX */ 207 208 if (vp = vnp->vnp_vp) { 209 vp->v_vmdata = NULL; 210 vp->v_flag &= ~VTEXT; 211 #if 0 212 /* can hang if done at reboot on NFS FS */ 213 (void) VOP_FSYNC(vp, p->p_ucred, p); 214 #endif 215 vrele(vp); 216 } 217 218 TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 219 free((caddr_t)vnp, M_VMPGDATA); 220 free((caddr_t)pager, M_VMPAGER); 221 } 222 223 int 224 vnode_pager_getmulti(pager, m, count, reqpage, sync) 225 vm_pager_t pager; 226 vm_page_t *m; 227 int count; 228 int reqpage; 229 boolean_t sync; 230 { 231 232 return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 233 } 234 235 int 236 vnode_pager_getpage(pager, m, sync) 237 vm_pager_t pager; 238 vm_page_t m; 239 boolean_t sync; 240 { 241 242 int err; 243 vm_page_t marray[1]; 244 if (pager == NULL) 245 return FALSE; 246 marray[0] = m; 247 248 return vnode_pager_input((vn_pager_t)pager->pg_data, marray, 1, 0); 249 } 250 251 boolean_t 252 vnode_pager_putpage(pager, m, sync) 253 vm_pager_t pager; 254 vm_page_t m; 255 boolean_t sync; 256 { 257 int err; 258 vm_page_t marray[1]; 259 int rtvals[1]; 260 261 if (pager == NULL) 262 return FALSE; 263 marray[0] = m; 264 vnode_pager_output((vn_pager_t)pager->pg_data, marray, 1, rtvals); 265 return rtvals[0]; 266 } 267 268 int 269 vnode_pager_putmulti(pager, m, c, sync, rtvals) 270 vm_pager_t pager; 271 vm_page_t *m; 272 int c; 273 boolean_t sync; 274 int *rtvals; 275 { 276 return vnode_pager_output((vn_pager_t)pager->pg_data, m, c, rtvals); 277 } 278 279 280 boolean_t 281 vnode_pager_haspage(pager, offset) 282 vm_pager_t pager; 283 vm_offset_t offset; 284 { 285 register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 286 daddr_t bn; 287 int run; 288 int err; 289 290 /* 291 * Offset beyond end of file, do not have the page 292 */ 293 if (offset >= vnp->vnp_size) { 294 return(FALSE); 295 } 296 297 /* 298 * Read the index to find the disk block to read 299 * from. If there is no block, report that we don't 300 * have this data. 301 * 302 * Assumes that the vnode has whole page or nothing. 303 */ 304 err = VOP_BMAP(vnp->vnp_vp, 305 offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, 306 (struct vnode **)0, &bn, 0); 307 if (err) { 308 return(TRUE); 309 } 310 return((long)bn < 0 ? FALSE : TRUE); 311 } 312 313 /* 314 * Lets the VM system know about a change in size for a file. 315 * If this vnode is mapped into some address space (i.e. we have a pager 316 * for it) we adjust our own internal size and flush any cached pages in 317 * the associated object that are affected by the size change. 318 * 319 * Note: this routine may be invoked as a result of a pager put 320 * operation (possibly at object termination time), so we must be careful. 321 */ 322 void 323 vnode_pager_setsize(vp, nsize) 324 struct vnode *vp; 325 u_long nsize; 326 { 327 register vn_pager_t vnp; 328 register vm_object_t object; 329 vm_pager_t pager; 330 331 /* 332 * Not a mapped vnode 333 */ 334 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 335 return; 336 /* 337 * Hasn't changed size 338 */ 339 pager = (vm_pager_t)vp->v_vmdata; 340 vnp = (vn_pager_t)pager->pg_data; 341 if (nsize == vnp->vnp_size) 342 return; 343 /* 344 * No object. 345 * This can happen during object termination since 346 * vm_object_page_clean is called after the object 347 * has been removed from the hash table, and clean 348 * may cause vnode write operations which can wind 349 * up back here. 350 */ 351 object = vm_object_lookup(pager); 352 if (object == NULL) 353 return; 354 355 /* 356 * File has shrunk. 357 * Toss any cached pages beyond the new EOF. 358 */ 359 if (round_page(nsize) < round_page(vnp->vnp_size)) { 360 vm_object_lock(object); 361 vm_object_page_remove(object, 362 (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size)); 363 vm_object_unlock(object); 364 } 365 vnp->vnp_size = (vm_offset_t)nsize; 366 vm_object_deallocate(object); 367 } 368 369 void 370 vnode_pager_umount(mp) 371 register struct mount *mp; 372 { 373 register vm_pager_t pager, npager; 374 struct vnode *vp; 375 376 pager = vnode_pager_list.tqh_first; 377 while( pager) { 378 /* 379 * Save the next pointer now since uncaching may 380 * terminate the object and render pager invalid 381 */ 382 vp = ((vn_pager_t)pager->pg_data)->vnp_vp; 383 npager = pager->pg_list.tqe_next; 384 if (mp == (struct mount *)0 || vp->v_mount == mp) 385 (void) vnode_pager_uncache(vp); 386 pager = npager; 387 } 388 } 389 390 /* 391 * Remove vnode associated object from the object cache. 392 * 393 * Note: this routine may be invoked as a result of a pager put 394 * operation (possibly at object termination time), so we must be careful. 395 */ 396 boolean_t 397 vnode_pager_uncache(vp) 398 register struct vnode *vp; 399 { 400 register vm_object_t object; 401 boolean_t uncached, locked; 402 vm_pager_t pager; 403 404 /* 405 * Not a mapped vnode 406 */ 407 pager = (vm_pager_t)vp->v_vmdata; 408 if (pager == NULL) 409 return (TRUE); 410 /* 411 * Unlock the vnode if it is currently locked. 412 * We do this since uncaching the object may result 413 * in its destruction which may initiate paging 414 * activity which may necessitate locking the vnode. 415 */ 416 locked = VOP_ISLOCKED(vp); 417 if (locked) 418 VOP_UNLOCK(vp); 419 /* 420 * Must use vm_object_lookup() as it actually removes 421 * the object from the cache list. 422 */ 423 object = vm_object_lookup(pager); 424 if (object) { 425 uncached = (object->ref_count <= 1); 426 pager_cache(object, FALSE); 427 } else 428 uncached = TRUE; 429 if (locked) 430 VOP_LOCK(vp); 431 return(uncached); 432 } 433 #if 0 434 /* 435 * Remove vnode associated object from the object cache. 436 * 437 * XXX unlock the vnode if it is currently locked. 438 * We must do this since uncaching the object may result in its 439 * destruction which may initiate paging activity which may necessitate 440 * re-locking the vnode. 441 */ 442 boolean_t 443 vnode_pager_uncache(vp) 444 register struct vnode *vp; 445 { 446 register vm_object_t object; 447 boolean_t uncached; 448 vm_pager_t pager; 449 450 /* 451 * Not a mapped vnode 452 */ 453 pager = (vm_pager_t)vp->v_vmdata; 454 if (pager == NULL) 455 return (TRUE); 456 /* 457 * Must use vm_object_lookup() as it actually removes 458 * the object from the cache list. 459 */ 460 object = vm_object_lookup(pager); 461 if (object) { 462 uncached = (object->ref_count <= 1); 463 VOP_UNLOCK(vp); 464 pager_cache(object, FALSE); 465 VOP_LOCK(vp); 466 } else 467 uncached = TRUE; 468 return(uncached); 469 } 470 #endif 471 472 473 void 474 vnode_pager_freepage(m) 475 vm_page_t m; 476 { 477 PAGE_WAKEUP(m); 478 vm_page_free(m); 479 } 480 481 /* 482 * calculate the linear (byte) disk address of specified virtual 483 * file address 484 */ 485 vm_offset_t 486 vnode_pager_addr(vp, address) 487 struct vnode *vp; 488 vm_offset_t address; 489 { 490 int rtaddress; 491 int bsize; 492 vm_offset_t block; 493 struct vnode *rtvp; 494 int err; 495 int vblock, voffset; 496 int run; 497 498 bsize = vp->v_mount->mnt_stat.f_iosize; 499 vblock = address / bsize; 500 voffset = address % bsize; 501 502 err = VOP_BMAP(vp,vblock,&rtvp,&block,0); 503 504 if( err) 505 rtaddress = -1; 506 else 507 rtaddress = block * DEV_BSIZE + voffset; 508 509 return rtaddress; 510 } 511 512 /* 513 * interrupt routine for I/O completion 514 */ 515 void 516 vnode_pager_iodone(bp) 517 struct buf *bp; 518 { 519 bp->b_flags |= B_DONE; 520 wakeup((caddr_t)bp); 521 } 522 523 /* 524 * small block file system vnode pager input 525 */ 526 int 527 vnode_pager_input_smlfs(vnp, m) 528 vn_pager_t vnp; 529 vm_page_t m; 530 { 531 int i; 532 int s; 533 vm_offset_t paging_offset; 534 struct vnode *dp, *vp; 535 struct buf *bp; 536 vm_offset_t mapsize; 537 vm_offset_t foff; 538 vm_offset_t kva; 539 int fileaddr; 540 int block; 541 vm_offset_t bsize; 542 int error = 0; 543 int run; 544 545 paging_offset = m->object->paging_offset; 546 vp = vnp->vnp_vp; 547 bsize = vp->v_mount->mnt_stat.f_iosize; 548 foff = m->offset + paging_offset; 549 550 VOP_BMAP(vp, foff, &dp, 0, 0); 551 552 kva = vm_pager_map_page(m); 553 554 for(i=0;i<PAGE_SIZE/bsize;i++) { 555 /* 556 * calculate logical block and offset 557 */ 558 block = foff / bsize + i; 559 s = splbio(); 560 while (bp = incore(vp, block)) { 561 int amount; 562 563 /* 564 * wait until the buffer is avail or gone 565 */ 566 if (bp->b_flags & B_BUSY) { 567 bp->b_flags |= B_WANTED; 568 tsleep ((caddr_t)bp, PVM, "vnwblk", 0); 569 continue; 570 } 571 572 amount = bsize; 573 if ((foff + bsize) > vnp->vnp_size) 574 amount = vnp->vnp_size - foff; 575 576 /* 577 * make sure that this page is in the buffer 578 */ 579 if ((amount > 0) && amount <= bp->b_bcount) { 580 bp->b_flags |= B_BUSY; 581 splx(s); 582 583 /* 584 * copy the data from the buffer 585 */ 586 bcopy(bp->b_un.b_addr, (caddr_t)kva + i * bsize, amount); 587 if (amount < bsize) { 588 bzero((caddr_t)kva + amount, bsize - amount); 589 } 590 bp->b_flags &= ~B_BUSY; 591 wakeup((caddr_t)bp); 592 goto nextblock; 593 } 594 break; 595 } 596 splx(s); 597 fileaddr = vnode_pager_addr(vp, foff + i * bsize); 598 if( fileaddr != -1) { 599 bp = getpbuf(); 600 VHOLD(vp); 601 602 /* build a minimal buffer header */ 603 bp->b_flags = B_BUSY | B_READ | B_CALL; 604 bp->b_iodone = vnode_pager_iodone; 605 bp->b_proc = curproc; 606 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 607 if( bp->b_rcred != NOCRED) 608 crhold(bp->b_rcred); 609 if( bp->b_wcred != NOCRED) 610 crhold(bp->b_wcred); 611 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 612 bp->b_blkno = fileaddr / DEV_BSIZE; 613 bgetvp(dp, bp); 614 bp->b_bcount = bsize; 615 bp->b_bufsize = bsize; 616 617 /* do the input */ 618 VOP_STRATEGY(bp); 619 620 /* we definitely need to be at splbio here */ 621 622 s = splbio(); 623 while ((bp->b_flags & B_DONE) == 0) { 624 tsleep((caddr_t)bp, PVM, "vnsrd", 0); 625 } 626 splx(s); 627 if ((bp->b_flags & B_ERROR) != 0) 628 error = EIO; 629 630 /* 631 * free the buffer header back to the swap buffer pool 632 */ 633 relpbuf(bp); 634 HOLDRELE(vp); 635 if( error) 636 break; 637 } else { 638 bzero((caddr_t) kva + i * bsize, bsize); 639 } 640 nextblock: 641 } 642 vm_pager_unmap_page(kva); 643 if( error) { 644 return VM_PAGER_FAIL; 645 } 646 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 647 m->flags |= PG_CLEAN; 648 m->flags &= ~PG_LAUNDRY; 649 return VM_PAGER_OK; 650 651 } 652 653 654 /* 655 * old style vnode pager output routine 656 */ 657 int 658 vnode_pager_input_old(vnp, m) 659 vn_pager_t vnp; 660 vm_page_t m; 661 { 662 int i; 663 struct uio auio; 664 struct iovec aiov; 665 int error; 666 int size; 667 vm_offset_t foff; 668 vm_offset_t kva; 669 670 error = 0; 671 foff = m->offset + m->object->paging_offset; 672 /* 673 * Return failure if beyond current EOF 674 */ 675 if (foff >= vnp->vnp_size) { 676 return VM_PAGER_BAD; 677 } else { 678 size = PAGE_SIZE; 679 if (foff + size > vnp->vnp_size) 680 size = vnp->vnp_size - foff; 681 /* 682 * Allocate a kernel virtual address and initialize so that 683 * we can use VOP_READ/WRITE routines. 684 */ 685 kva = vm_pager_map_page(m); 686 aiov.iov_base = (caddr_t)kva; 687 aiov.iov_len = size; 688 auio.uio_iov = &aiov; 689 auio.uio_iovcnt = 1; 690 auio.uio_offset = foff; 691 auio.uio_segflg = UIO_SYSSPACE; 692 auio.uio_rw = UIO_READ; 693 auio.uio_resid = size; 694 auio.uio_procp = (struct proc *)0; 695 696 error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 697 if (!error) { 698 register int count = size - auio.uio_resid; 699 700 if (count == 0) 701 error = EINVAL; 702 else if (count != PAGE_SIZE) 703 bzero((caddr_t)kva + count, PAGE_SIZE - count); 704 } 705 vm_pager_unmap_page(kva); 706 } 707 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 708 m->flags |= PG_CLEAN; 709 m->flags &= ~PG_LAUNDRY; 710 return error?VM_PAGER_FAIL:VM_PAGER_OK; 711 } 712 713 /* 714 * generic vnode pager input routine 715 */ 716 int 717 vnode_pager_input(vnp, m, count, reqpage) 718 register vn_pager_t vnp; 719 vm_page_t *m; 720 int count, reqpage; 721 { 722 int i,j; 723 vm_offset_t kva, foff; 724 int size; 725 struct proc *p = curproc; /* XXX */ 726 vm_object_t object; 727 vm_offset_t paging_offset; 728 struct vnode *dp, *vp; 729 vm_offset_t mapsize; 730 int bsize; 731 732 int first, last; 733 int reqaddr, firstaddr; 734 int run; 735 int block, offset; 736 737 int nbp; 738 struct buf *bp; 739 int s; 740 int failflag; 741 742 int errtype=0; /* 0 is file type otherwise vm type */ 743 int error = 0; 744 745 object = m[reqpage]->object; /* all vm_page_t items are in same object */ 746 paging_offset = object->paging_offset; 747 748 vp = vnp->vnp_vp; 749 bsize = vp->v_mount->mnt_stat.f_iosize; 750 751 /* get the UNDERLYING device for the file with VOP_BMAP() */ 752 /* 753 * originally, we did not check for an error return 754 * value -- assuming an fs always has a bmap entry point 755 * -- that assumption is wrong!!! 756 */ 757 kva = 0; 758 mapsize = 0; 759 foff = m[reqpage]->offset + paging_offset; 760 if (!VOP_BMAP(vp, foff, &dp, 0, 0)) { 761 /* 762 * we do not block for a kva, notice we default to a kva 763 * conservative behavior 764 */ 765 kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE)); 766 if( !kva) { 767 for (i = 0; i < count; i++) { 768 if (i != reqpage) { 769 vnode_pager_freepage(m[i]); 770 } 771 } 772 m[0] = m[reqpage]; 773 kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE); 774 reqpage = 0; 775 count = 1; 776 } 777 } 778 779 /* 780 * if we can't get a kva or we can't bmap, use old VOP code 781 */ 782 if (!kva) { 783 for (i = 0; i < count; i++) { 784 if (i != reqpage) { 785 vnode_pager_freepage(m[i]); 786 } 787 } 788 return vnode_pager_input_old(vnp, m[reqpage]); 789 /* 790 * if the blocksize is smaller than a page size, then use 791 * special small filesystem code. NFS sometimes has a small 792 * blocksize, but it can handle large reads itself. 793 */ 794 } else if( (PAGE_SIZE / bsize) > 1 && 795 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 796 797 kmem_free_wakeup(pager_map, kva, mapsize); 798 799 for (i = 0; i < count; i++) { 800 if (i != reqpage) { 801 vnode_pager_freepage(m[i]); 802 } 803 } 804 return vnode_pager_input_smlfs(vnp, m[reqpage]); 805 } 806 807 /* 808 * here on direct device I/O 809 */ 810 811 812 /* 813 * This pathetic hack gets data from the buffer cache, if it's there. 814 * I believe that this is not really necessary, and the ends can 815 * be gotten by defaulting to the normal vfs read behavior, but this 816 * might be more efficient, because the will NOT invoke read-aheads 817 * and one of the purposes of this code is to bypass the buffer 818 * cache and keep from flushing it by reading in a program. 819 */ 820 /* 821 * calculate logical block and offset 822 */ 823 block = foff / bsize; 824 offset = foff % bsize; 825 s = splbio(); 826 827 /* 828 * if we have a buffer in core, then try to use it 829 */ 830 while (bp = incore(vp, block)) { 831 int amount; 832 833 /* 834 * wait until the buffer is avail or gone 835 */ 836 if (bp->b_flags & B_BUSY) { 837 bp->b_flags |= B_WANTED; 838 tsleep ((caddr_t)bp, PVM, "vnwblk", 0); 839 continue; 840 } 841 842 amount = PAGE_SIZE; 843 if ((foff + amount) > vnp->vnp_size) 844 amount = vnp->vnp_size - foff; 845 846 /* 847 * make sure that this page is in the buffer 848 */ 849 if ((amount > 0) && (offset + amount) <= bp->b_bcount) { 850 bp->b_flags |= B_BUSY; 851 splx(s); 852 853 /* 854 * map the requested page 855 */ 856 pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage])); 857 pmap_update(); 858 859 /* 860 * copy the data from the buffer 861 */ 862 bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount); 863 if (amount < PAGE_SIZE) { 864 bzero((caddr_t)kva + amount, PAGE_SIZE - amount); 865 } 866 /* 867 * unmap the page and free the kva 868 */ 869 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE); 870 kmem_free_wakeup(pager_map, kva, mapsize); 871 /* 872 * release the buffer back to the block subsystem 873 */ 874 bp->b_flags &= ~B_BUSY; 875 wakeup((caddr_t)bp); 876 /* 877 * we did not have to do any work to get the requested 878 * page, the read behind/ahead does not justify a read 879 */ 880 for (i = 0; i < count; i++) { 881 if (i != reqpage) { 882 vnode_pager_freepage(m[i]); 883 } 884 } 885 count = 1; 886 reqpage = 0; 887 m[0] = m[reqpage]; 888 889 /* 890 * sorry for the goto 891 */ 892 goto finishup; 893 } 894 /* 895 * buffer is nowhere to be found, read from the disk 896 */ 897 break; 898 } 899 splx(s); 900 901 reqaddr = vnode_pager_addr(vp, foff); 902 s = splbio(); 903 /* 904 * Make sure that our I/O request is contiguous. 905 * Scan backward and stop for the first discontiguous 906 * entry or stop for a page being in buffer cache. 907 */ 908 failflag = 0; 909 first = reqpage; 910 for (i = reqpage - 1; i >= 0; --i) { 911 if (failflag || 912 incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 913 (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 914 != reqaddr + (i - reqpage) * PAGE_SIZE) { 915 vnode_pager_freepage(m[i]); 916 failflag = 1; 917 } else { 918 first = i; 919 } 920 } 921 922 /* 923 * Scan forward and stop for the first non-contiguous 924 * entry or stop for a page being in buffer cache. 925 */ 926 failflag = 0; 927 last = reqpage + 1; 928 for (i = reqpage + 1; i < count; i++) { 929 if (failflag || 930 incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 931 (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 932 != reqaddr + (i - reqpage) * PAGE_SIZE) { 933 vnode_pager_freepage(m[i]); 934 failflag = 1; 935 } else { 936 last = i + 1; 937 } 938 } 939 splx(s); 940 941 /* 942 * the first and last page have been calculated now, move input 943 * pages to be zero based... 944 */ 945 count = last; 946 if (first != 0) { 947 for (i = first; i < count; i++) { 948 m[i - first] = m[i]; 949 } 950 count -= first; 951 reqpage -= first; 952 } 953 954 /* 955 * calculate the file virtual address for the transfer 956 */ 957 foff = m[0]->offset + paging_offset; 958 /* 959 * and get the disk physical address (in bytes) 960 */ 961 firstaddr = vnode_pager_addr(vp, foff); 962 963 /* 964 * calculate the size of the transfer 965 */ 966 size = count * PAGE_SIZE; 967 if ((foff + size) > vnp->vnp_size) 968 size = vnp->vnp_size - foff; 969 970 /* 971 * round up physical size for real devices 972 */ 973 if( dp->v_type == VBLK || dp->v_type == VCHR) 974 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 975 976 /* 977 * and map the pages to be read into the kva 978 */ 979 for (i = 0; i < count; i++) 980 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 981 982 pmap_update(); 983 bp = getpbuf(); 984 VHOLD(vp); 985 986 /* build a minimal buffer header */ 987 bp->b_flags = B_BUSY | B_READ | B_CALL; 988 bp->b_iodone = vnode_pager_iodone; 989 /* B_PHYS is not set, but it is nice to fill this in */ 990 bp->b_proc = curproc; 991 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 992 if( bp->b_rcred != NOCRED) 993 crhold(bp->b_rcred); 994 if( bp->b_wcred != NOCRED) 995 crhold(bp->b_wcred); 996 bp->b_un.b_addr = (caddr_t) kva; 997 bp->b_blkno = firstaddr / DEV_BSIZE; 998 bgetvp(dp, bp); 999 bp->b_bcount = size; 1000 bp->b_bufsize = size; 1001 1002 /* do the input */ 1003 VOP_STRATEGY(bp); 1004 1005 s = splbio(); 1006 /* we definitely need to be at splbio here */ 1007 1008 while ((bp->b_flags & B_DONE) == 0) { 1009 tsleep((caddr_t)bp, PVM, "vnread", 0); 1010 } 1011 splx(s); 1012 if ((bp->b_flags & B_ERROR) != 0) 1013 error = EIO; 1014 1015 if (!error) { 1016 if (size != count * PAGE_SIZE) 1017 bzero((caddr_t)kva + size, PAGE_SIZE * count - size); 1018 } 1019 1020 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 1021 kmem_free_wakeup(pager_map, kva, mapsize); 1022 1023 /* 1024 * free the buffer header back to the swap buffer pool 1025 */ 1026 relpbuf(bp); 1027 HOLDRELE(vp); 1028 1029 finishup: 1030 for (i = 0; i < count; i++) { 1031 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1032 m[i]->flags |= PG_CLEAN; 1033 m[i]->flags &= ~PG_LAUNDRY; 1034 if (i != reqpage) { 1035 /* 1036 * whether or not to leave the page activated 1037 * is up in the air, but we should put the page 1038 * on a page queue somewhere. (it already is in 1039 * the object). 1040 * Result: It appears that emperical results show 1041 * that deactivating pages is best. 1042 */ 1043 /* 1044 * just in case someone was asking for this 1045 * page we now tell them that it is ok to use 1046 */ 1047 if (!error) { 1048 vm_page_deactivate(m[i]); 1049 PAGE_WAKEUP(m[i]); 1050 m[i]->flags &= ~PG_FAKE; 1051 m[i]->act_count = 2; 1052 } else { 1053 vnode_pager_freepage(m[i]); 1054 } 1055 } 1056 } 1057 if (error) { 1058 printf("vnode pager read error: %d\n", error); 1059 } 1060 if (errtype) 1061 return error; 1062 return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 1063 } 1064 1065 /* 1066 * old-style vnode pager output routine 1067 */ 1068 int 1069 vnode_pager_output_old(vnp, m) 1070 register vn_pager_t vnp; 1071 vm_page_t m; 1072 { 1073 vm_offset_t foff; 1074 vm_offset_t kva; 1075 vm_offset_t size; 1076 struct iovec aiov; 1077 struct uio auio; 1078 struct vnode *vp; 1079 int error; 1080 1081 vp = vnp->vnp_vp; 1082 foff = m->offset + m->object->paging_offset; 1083 /* 1084 * Return failure if beyond current EOF 1085 */ 1086 if (foff >= vnp->vnp_size) { 1087 return VM_PAGER_BAD; 1088 } else { 1089 size = PAGE_SIZE; 1090 if (foff + size > vnp->vnp_size) 1091 size = vnp->vnp_size - foff; 1092 /* 1093 * Allocate a kernel virtual address and initialize so that 1094 * we can use VOP_WRITE routines. 1095 */ 1096 kva = vm_pager_map_page(m); 1097 aiov.iov_base = (caddr_t)kva; 1098 aiov.iov_len = size; 1099 auio.uio_iov = &aiov; 1100 auio.uio_iovcnt = 1; 1101 auio.uio_offset = foff; 1102 auio.uio_segflg = UIO_SYSSPACE; 1103 auio.uio_rw = UIO_WRITE; 1104 auio.uio_resid = size; 1105 auio.uio_procp = (struct proc *)0; 1106 1107 error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 1108 1109 if (!error) { 1110 if ((size - auio.uio_resid) == 0) { 1111 error = EINVAL; 1112 } 1113 } 1114 vm_pager_unmap_page(kva); 1115 return error?VM_PAGER_FAIL:VM_PAGER_OK; 1116 } 1117 } 1118 1119 /* 1120 * vnode pager output on a small-block file system 1121 */ 1122 int 1123 vnode_pager_output_smlfs(vnp, m) 1124 vn_pager_t vnp; 1125 vm_page_t m; 1126 { 1127 int i; 1128 int s; 1129 vm_offset_t paging_offset; 1130 struct vnode *dp, *vp; 1131 struct buf *bp; 1132 vm_offset_t mapsize; 1133 vm_offset_t foff; 1134 vm_offset_t kva; 1135 int fileaddr; 1136 int block; 1137 vm_offset_t bsize; 1138 int run; 1139 int error = 0; 1140 1141 paging_offset = m->object->paging_offset; 1142 vp = vnp->vnp_vp; 1143 bsize = vp->v_mount->mnt_stat.f_iosize; 1144 foff = m->offset + paging_offset; 1145 1146 VOP_BMAP(vp, foff, &dp, 0, 0); 1147 kva = vm_pager_map_page(m); 1148 for(i = 0; !error && i < (PAGE_SIZE/bsize); i++) { 1149 /* 1150 * calculate logical block and offset 1151 */ 1152 fileaddr = vnode_pager_addr(vp, foff + i * bsize); 1153 if( fileaddr != -1) { 1154 s = splbio(); 1155 if( bp = incore( vp, (foff/bsize) + i)) { 1156 bp = getblk(vp, (foff/bsize) + i, bp->b_bufsize,0, 0); 1157 bp->b_flags |= B_INVAL; 1158 brelse(bp); 1159 } 1160 splx(s); 1161 1162 bp = getpbuf(); 1163 VHOLD(vp); 1164 1165 /* build a minimal buffer header */ 1166 bp->b_flags = B_BUSY | B_CALL | B_WRITE; 1167 bp->b_iodone = vnode_pager_iodone; 1168 bp->b_proc = curproc; 1169 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1170 if( bp->b_rcred != NOCRED) 1171 crhold(bp->b_rcred); 1172 if( bp->b_wcred != NOCRED) 1173 crhold(bp->b_wcred); 1174 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 1175 bp->b_blkno = fileaddr / DEV_BSIZE; 1176 bgetvp(dp, bp); 1177 ++dp->v_numoutput; 1178 /* for NFS */ 1179 bp->b_dirtyoff = 0; 1180 bp->b_dirtyend = bsize; 1181 bp->b_bcount = bsize; 1182 bp->b_bufsize = bsize; 1183 1184 /* do the input */ 1185 VOP_STRATEGY(bp); 1186 1187 /* we definitely need to be at splbio here */ 1188 1189 s = splbio(); 1190 while ((bp->b_flags & B_DONE) == 0) { 1191 tsleep((caddr_t)bp, PVM, "vnswrt", 0); 1192 } 1193 splx(s); 1194 if ((bp->b_flags & B_ERROR) != 0) 1195 error = EIO; 1196 1197 /* 1198 * free the buffer header back to the swap buffer pool 1199 */ 1200 relpbuf(bp); 1201 HOLDRELE(vp); 1202 } 1203 } 1204 vm_pager_unmap_page(kva); 1205 if( error) 1206 return VM_PAGER_FAIL; 1207 else 1208 return VM_PAGER_OK; 1209 } 1210 1211 /* 1212 * generic vnode pager output routine 1213 */ 1214 int 1215 vnode_pager_output(vnp, m, count, rtvals) 1216 vn_pager_t vnp; 1217 vm_page_t *m; 1218 int count; 1219 int *rtvals; 1220 { 1221 int i,j; 1222 vm_offset_t kva, foff; 1223 int size; 1224 struct proc *p = curproc; /* XXX */ 1225 vm_object_t object; 1226 vm_offset_t paging_offset; 1227 struct vnode *dp, *vp; 1228 struct buf *bp; 1229 vm_offset_t mapsize; 1230 vm_offset_t reqaddr; 1231 int run; 1232 int bsize; 1233 int s; 1234 1235 int error = 0; 1236 1237 retryoutput: 1238 object = m[0]->object; /* all vm_page_t items are in same object */ 1239 paging_offset = object->paging_offset; 1240 1241 vp = vnp->vnp_vp; 1242 bsize = vp->v_mount->mnt_stat.f_iosize; 1243 1244 for(i=0;i<count;i++) 1245 rtvals[i] = VM_PAGER_AGAIN; 1246 1247 /* 1248 * if the filesystem does not have a bmap, then use the 1249 * old code 1250 */ 1251 if (VOP_BMAP(vp, m[0]->offset+paging_offset, &dp, 0, 0)) { 1252 1253 rtvals[0] = vnode_pager_output_old(vnp, m[0]); 1254 1255 pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); 1256 m[0]->flags |= PG_CLEAN; 1257 m[0]->flags &= ~PG_LAUNDRY; 1258 return rtvals[0]; 1259 } 1260 1261 /* 1262 * if the filesystem has a small blocksize, then use 1263 * the small block filesystem output code 1264 */ 1265 if ((bsize < PAGE_SIZE) && 1266 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 1267 1268 for(i=0;i<count;i++) { 1269 rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 1270 if( rtvals[i] == VM_PAGER_OK) { 1271 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1272 m[i]->flags |= PG_CLEAN; 1273 m[i]->flags &= ~PG_LAUNDRY; 1274 } 1275 } 1276 return rtvals[0]; 1277 } 1278 1279 /* 1280 * get some kva for the output 1281 */ 1282 kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE)); 1283 if( !kva) { 1284 kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE)); 1285 count = 1; 1286 if( !kva) 1287 return rtvals[0]; 1288 } 1289 1290 for(i=0;i<count;i++) { 1291 foff = m[i]->offset + paging_offset; 1292 if (foff >= vnp->vnp_size) { 1293 for(j=i;j<count;j++) 1294 rtvals[j] = VM_PAGER_BAD; 1295 count = i; 1296 break; 1297 } 1298 } 1299 if (count == 0) { 1300 return rtvals[0]; 1301 } 1302 foff = m[0]->offset + paging_offset; 1303 reqaddr = vnode_pager_addr(vp, foff); 1304 /* 1305 * Scan forward and stop for the first non-contiguous 1306 * entry or stop for a page being in buffer cache. 1307 */ 1308 for (i = 1; i < count; i++) { 1309 if ( vnode_pager_addr(vp, m[i]->offset + paging_offset) 1310 != reqaddr + i * PAGE_SIZE) { 1311 count = i; 1312 break; 1313 } 1314 } 1315 1316 /* 1317 * calculate the size of the transfer 1318 */ 1319 size = count * PAGE_SIZE; 1320 if ((foff + size) > vnp->vnp_size) 1321 size = vnp->vnp_size - foff; 1322 1323 /* 1324 * round up physical size for real devices 1325 */ 1326 if( dp->v_type == VBLK || dp->v_type == VCHR) 1327 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1328 1329 /* 1330 * and map the pages to be read into the kva 1331 */ 1332 for (i = 0; i < count; i++) 1333 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1334 pmap_update(); 1335 /* 1336 printf("vnode: writing foff: %d, devoff: %d, size: %d\n", 1337 foff, reqaddr, size); 1338 */ 1339 /* 1340 * next invalidate the incore vfs_bio data 1341 */ 1342 for (i = 0; i < count; i++) { 1343 int filblock = (foff + i * PAGE_SIZE) / bsize; 1344 struct buf *fbp; 1345 1346 s = splbio(); 1347 if( fbp = incore( vp, filblock)) { 1348 /* printf("invalidating: %d\n", filblock); */ 1349 fbp = getblk(vp, filblock, fbp->b_bufsize,0,0); 1350 fbp->b_flags |= B_INVAL; 1351 brelse(fbp); 1352 } 1353 splx(s); 1354 } 1355 1356 1357 bp = getpbuf(); 1358 VHOLD(vp); 1359 /* build a minimal buffer header */ 1360 bp->b_flags = B_BUSY | B_WRITE | B_CALL; 1361 bp->b_iodone = vnode_pager_iodone; 1362 /* B_PHYS is not set, but it is nice to fill this in */ 1363 bp->b_proc = curproc; 1364 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1365 1366 if( bp->b_rcred != NOCRED) 1367 crhold(bp->b_rcred); 1368 if( bp->b_wcred != NOCRED) 1369 crhold(bp->b_wcred); 1370 bp->b_un.b_addr = (caddr_t) kva; 1371 bp->b_blkno = reqaddr / DEV_BSIZE; 1372 bgetvp(dp, bp); 1373 ++dp->v_numoutput; 1374 1375 /* for NFS */ 1376 bp->b_dirtyoff = 0; 1377 bp->b_dirtyend = size; 1378 1379 bp->b_bcount = size; 1380 bp->b_bufsize = size; 1381 1382 /* do the output */ 1383 VOP_STRATEGY(bp); 1384 1385 s = splbio(); 1386 1387 /* we definitely need to be at splbio here */ 1388 1389 while ((bp->b_flags & B_DONE) == 0) { 1390 tsleep((caddr_t)bp, PVM, "vnwrite", 0); 1391 } 1392 splx(s); 1393 1394 if ((bp->b_flags & B_ERROR) != 0) 1395 error = EIO; 1396 1397 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 1398 kmem_free_wakeup(pager_map, kva, mapsize); 1399 1400 /* 1401 * free the buffer header back to the swap buffer pool 1402 */ 1403 relpbuf(bp); 1404 HOLDRELE(vp); 1405 1406 if( !error) { 1407 for(i=0;i<count;i++) { 1408 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1409 m[i]->flags |= PG_CLEAN; 1410 m[i]->flags &= ~PG_LAUNDRY; 1411 rtvals[i] = VM_PAGER_OK; 1412 } 1413 } else if( count != 1) { 1414 error = 0; 1415 count = 1; 1416 goto retryoutput; 1417 } 1418 1419 if (error) { 1420 printf("vnode pager write error: %d\n", error); 1421 } 1422 return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 1423 } 1424 1425