1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * Copyright (c) 1993,1994 John S. Dyson 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40 * $Id: vnode_pager.c,v 1.24 1995/02/22 09:15:35 davidg Exp $ 41 */ 42 43 /* 44 * Page to/from files (vnodes). 45 * 46 * TODO: 47 * pageouts 48 * fix credential use (uses current process credentials now) 49 */ 50 51 /* 52 * MODIFICATIONS: 53 * John S. Dyson 08 Dec 93 54 * 55 * This file in conjunction with some vm_fault mods, eliminate the performance 56 * advantage for using the buffer cache and minimize memory copies. 57 * 58 * 1) Supports multiple - block reads 59 * 2) Bypasses buffer cache for reads 60 * 61 * TODO: 62 * 63 * 1) Totally bypass buffer cache for reads 64 * (Currently will still sometimes use buffer cache for reads) 65 * 2) Bypass buffer cache for writes 66 * (Code does not support it, but mods are simple) 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/proc.h> 73 #include <sys/malloc.h> 74 #include <sys/vnode.h> 75 #include <sys/uio.h> 76 #include <sys/mount.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_page.h> 80 #include <vm/vnode_pager.h> 81 82 #include <sys/buf.h> 83 #include <miscfs/specfs/specdev.h> 84 85 int vnode_pager_putmulti(); 86 87 void vnode_pager_init(); 88 vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 89 void vnode_pager_dealloc(); 90 int vnode_pager_getpage(); 91 int vnode_pager_getmulti(); 92 int vnode_pager_putpage(); 93 boolean_t vnode_pager_haspage(); 94 95 struct pagerops vnodepagerops = { 96 vnode_pager_init, 97 vnode_pager_alloc, 98 vnode_pager_dealloc, 99 vnode_pager_getpage, 100 vnode_pager_getmulti, 101 vnode_pager_putpage, 102 vnode_pager_putmulti, 103 vnode_pager_haspage 104 }; 105 106 107 108 static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 109 static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 110 111 extern vm_map_t pager_map; 112 113 struct pagerlst vnode_pager_list; /* list of managed vnodes */ 114 115 #define MAXBP (PAGE_SIZE/DEV_BSIZE); 116 117 void 118 vnode_pager_init() 119 { 120 TAILQ_INIT(&vnode_pager_list); 121 } 122 123 /* 124 * Allocate (or lookup) pager for a vnode. 125 * Handle is a vnode pointer. 126 */ 127 vm_pager_t 128 vnode_pager_alloc(handle, size, prot, offset) 129 caddr_t handle; 130 vm_size_t size; 131 vm_prot_t prot; 132 vm_offset_t offset; 133 { 134 register vm_pager_t pager; 135 register vn_pager_t vnp; 136 vm_object_t object, tobject; 137 struct vattr vattr; 138 struct vnode *vp; 139 struct proc *p = curproc; /* XXX */ 140 int rtval; 141 142 /* 143 * Pageout to vnode, no can do yet. 144 */ 145 if (handle == NULL) 146 return (NULL); 147 148 /* 149 * Vnodes keep a pointer to any associated pager so no need to lookup 150 * with vm_pager_lookup. 151 */ 152 vp = (struct vnode *) handle; 153 while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD)) 154 tsleep((caddr_t) object, PVM, "vadead", 0); 155 156 pager = NULL; 157 if (object != NULL) 158 pager = object->pager; 159 if (pager == NULL) { 160 161 /* 162 * Allocate pager structures 163 */ 164 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 165 if (pager == NULL) 166 return (NULL); 167 vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 168 if (vnp == NULL) { 169 free((caddr_t) pager, M_VMPAGER); 170 return (NULL); 171 } 172 /* 173 * And an object of the appropriate size 174 */ 175 if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 176 object = vm_object_allocate(round_page(vattr.va_size)); 177 object->flags &= ~OBJ_INTERNAL; 178 object->flags |= OBJ_CANPERSIST; 179 vm_object_enter(object, pager); 180 object->pager = pager; 181 } else { 182 printf("Error in getattr: %d\n", rtval); 183 free((caddr_t) vnp, M_VMPGDATA); 184 free((caddr_t) pager, M_VMPAGER); 185 return (NULL); 186 } 187 188 /* 189 * Hold a reference to the vnode and initialize pager data. 190 */ 191 VREF(vp); 192 vnp->vnp_flags = 0; 193 vnp->vnp_vp = vp; 194 vnp->vnp_size = vattr.va_size; 195 196 TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 197 pager->pg_handle = handle; 198 pager->pg_type = PG_VNODE; 199 pager->pg_ops = &vnodepagerops; 200 pager->pg_data = (caddr_t) vnp; 201 vp->v_vmdata = (caddr_t) object; 202 } else { 203 204 /* 205 * vm_object_lookup() will remove the object from the cache if 206 * found and also gain a reference to the object. 207 */ 208 (void) vm_object_lookup(pager); 209 } 210 return (pager); 211 } 212 213 void 214 vnode_pager_dealloc(pager) 215 vm_pager_t pager; 216 { 217 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 218 register struct vnode *vp; 219 vm_object_t object; 220 221 vp = vnp->vnp_vp; 222 if (vp) { 223 int s = splbio(); 224 225 object = (vm_object_t) vp->v_vmdata; 226 if (object) { 227 while (object->paging_in_progress) { 228 object->flags |= OBJ_PIPWNT; 229 tsleep(object, PVM, "vnpdea", 0); 230 } 231 } 232 splx(s); 233 234 vp->v_vmdata = NULL; 235 vp->v_flag &= ~(VTEXT | VVMIO); 236 vrele(vp); 237 } 238 TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 239 free((caddr_t) vnp, M_VMPGDATA); 240 free((caddr_t) pager, M_VMPAGER); 241 } 242 243 int 244 vnode_pager_getmulti(pager, m, count, reqpage, sync) 245 vm_pager_t pager; 246 vm_page_t *m; 247 int count; 248 int reqpage; 249 boolean_t sync; 250 { 251 252 return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 253 } 254 255 int 256 vnode_pager_getpage(pager, m, sync) 257 vm_pager_t pager; 258 vm_page_t m; 259 boolean_t sync; 260 { 261 262 vm_page_t marray[1]; 263 264 if (pager == NULL) 265 return FALSE; 266 marray[0] = m; 267 268 return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 269 } 270 271 boolean_t 272 vnode_pager_putpage(pager, m, sync) 273 vm_pager_t pager; 274 vm_page_t m; 275 boolean_t sync; 276 { 277 vm_page_t marray[1]; 278 int rtvals[1]; 279 280 if (pager == NULL) 281 return FALSE; 282 marray[0] = m; 283 vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 284 return rtvals[0]; 285 } 286 287 int 288 vnode_pager_putmulti(pager, m, c, sync, rtvals) 289 vm_pager_t pager; 290 vm_page_t *m; 291 int c; 292 boolean_t sync; 293 int *rtvals; 294 { 295 return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 296 } 297 298 299 boolean_t 300 vnode_pager_haspage(pager, offset) 301 vm_pager_t pager; 302 vm_offset_t offset; 303 { 304 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 305 register struct vnode *vp = vnp->vnp_vp; 306 daddr_t bn; 307 int err; 308 daddr_t block; 309 310 /* 311 * If filesystem no longer mounted or offset beyond end of file we do 312 * not have the page. 313 */ 314 if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 315 return FALSE; 316 317 block = offset / vp->v_mount->mnt_stat.f_iosize; 318 if (incore(vp, block)) 319 return TRUE; 320 /* 321 * Read the index to find the disk block to read from. If there is no 322 * block, report that we don't have this data. 323 * 324 * Assumes that the vnode has whole page or nothing. 325 */ 326 err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 327 if (err) 328 return (TRUE); 329 return ((long) bn < 0 ? FALSE : TRUE); 330 } 331 332 /* 333 * Lets the VM system know about a change in size for a file. 334 * If this vnode is mapped into some address space (i.e. we have a pager 335 * for it) we adjust our own internal size and flush any cached pages in 336 * the associated object that are affected by the size change. 337 * 338 * Note: this routine may be invoked as a result of a pager put 339 * operation (possibly at object termination time), so we must be careful. 340 */ 341 void 342 vnode_pager_setsize(vp, nsize) 343 struct vnode *vp; 344 u_long nsize; 345 { 346 register vn_pager_t vnp; 347 register vm_object_t object; 348 vm_pager_t pager; 349 350 /* 351 * Not a mapped vnode 352 */ 353 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 354 return; 355 356 /* 357 * Hasn't changed size 358 */ 359 object = (vm_object_t) vp->v_vmdata; 360 if (object == NULL) 361 return; 362 if ((pager = object->pager) == NULL) 363 return; 364 vnp = (vn_pager_t) pager->pg_data; 365 if (nsize == vnp->vnp_size) 366 return; 367 368 /* 369 * No object. This can happen during object termination since 370 * vm_object_page_clean is called after the object has been removed 371 * from the hash table, and clean may cause vnode write operations 372 * which can wind up back here. 373 */ 374 object = vm_object_lookup(pager); 375 if (object == NULL) 376 return; 377 378 /* 379 * File has shrunk. Toss any cached pages beyond the new EOF. 380 */ 381 if (nsize < vnp->vnp_size) { 382 if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 383 vm_object_lock(object); 384 vm_object_page_remove(object, 385 round_page((vm_offset_t) nsize), vnp->vnp_size); 386 vm_object_unlock(object); 387 } 388 /* 389 * this gets rid of garbage at the end of a page that is now 390 * only partially backed by the vnode... 391 */ 392 if (nsize & PAGE_MASK) { 393 vm_offset_t kva; 394 vm_page_t m; 395 396 m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 397 if (m) { 398 kva = vm_pager_map_page(m); 399 bzero((caddr_t) kva + (nsize & PAGE_MASK), 400 round_page(nsize) - nsize); 401 vm_pager_unmap_page(kva); 402 } 403 } 404 } 405 vnp->vnp_size = (vm_offset_t) nsize; 406 object->size = round_page(nsize); 407 408 vm_object_deallocate(object); 409 } 410 411 void 412 vnode_pager_umount(mp) 413 register struct mount *mp; 414 { 415 register vm_pager_t pager, npager; 416 struct vnode *vp; 417 418 pager = vnode_pager_list.tqh_first; 419 while (pager) { 420 421 /* 422 * Save the next pointer now since uncaching may terminate the 423 * object and render pager invalid 424 */ 425 vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 426 npager = pager->pg_list.tqe_next; 427 if (mp == (struct mount *) 0 || vp->v_mount == mp) 428 (void) vnode_pager_uncache(vp); 429 pager = npager; 430 } 431 } 432 433 /* 434 * Remove vnode associated object from the object cache. 435 * 436 * Note: this routine may be invoked as a result of a pager put 437 * operation (possibly at object termination time), so we must be careful. 438 */ 439 boolean_t 440 vnode_pager_uncache(vp) 441 register struct vnode *vp; 442 { 443 register vm_object_t object; 444 boolean_t uncached, locked; 445 vm_pager_t pager; 446 447 /* 448 * Not a mapped vnode 449 */ 450 object = (vm_object_t) vp->v_vmdata; 451 if (object == NULL) 452 return (TRUE); 453 454 pager = object->pager; 455 if (pager == NULL) 456 return (TRUE); 457 458 /* 459 * Unlock the vnode if it is currently locked. We do this since 460 * uncaching the object may result in its destruction which may 461 * initiate paging activity which may necessitate locking the vnode. 462 */ 463 locked = VOP_ISLOCKED(vp); 464 if (locked) 465 VOP_UNLOCK(vp); 466 467 /* 468 * Must use vm_object_lookup() as it actually removes the object from 469 * the cache list. 470 */ 471 object = vm_object_lookup(pager); 472 if (object) { 473 uncached = (object->ref_count <= 1); 474 pager_cache(object, FALSE); 475 } else 476 uncached = TRUE; 477 if (locked) 478 VOP_LOCK(vp); 479 return (uncached); 480 } 481 482 483 void 484 vnode_pager_freepage(m) 485 vm_page_t m; 486 { 487 PAGE_WAKEUP(m); 488 vm_page_free(m); 489 } 490 491 /* 492 * calculate the linear (byte) disk address of specified virtual 493 * file address 494 */ 495 vm_offset_t 496 vnode_pager_addr(vp, address, run) 497 struct vnode *vp; 498 vm_offset_t address; 499 int *run; 500 { 501 int rtaddress; 502 int bsize; 503 vm_offset_t block; 504 struct vnode *rtvp; 505 int err; 506 int vblock, voffset; 507 508 if ((int) address < 0) 509 return -1; 510 511 bsize = vp->v_mount->mnt_stat.f_iosize; 512 vblock = address / bsize; 513 voffset = address % bsize; 514 515 err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 516 517 if (err || (block == -1)) 518 rtaddress = -1; 519 else { 520 rtaddress = block + voffset / DEV_BSIZE; 521 if( run) { 522 *run += 1; 523 *run *= bsize/PAGE_SIZE; 524 *run -= voffset/PAGE_SIZE; 525 } 526 } 527 528 return rtaddress; 529 } 530 531 /* 532 * interrupt routine for I/O completion 533 */ 534 void 535 vnode_pager_iodone(bp) 536 struct buf *bp; 537 { 538 bp->b_flags |= B_DONE; 539 wakeup((caddr_t) bp); 540 if (bp->b_flags & B_ASYNC) { 541 vm_offset_t paddr; 542 vm_page_t m; 543 vm_object_t obj = 0; 544 int i; 545 int npages; 546 547 paddr = (vm_offset_t) bp->b_data; 548 if (bp->b_bufsize != bp->b_bcount) 549 bzero(bp->b_data + bp->b_bcount, 550 bp->b_bufsize - bp->b_bcount); 551 552 npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE; 553 for (i = 0; i < npages; i++) { 554 m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE)); 555 obj = m->object; 556 if (m) { 557 m->dirty = 0; 558 m->valid = VM_PAGE_BITS_ALL; 559 if (m->flags & PG_WANTED) 560 m->flags |= PG_REFERENCED; 561 PAGE_WAKEUP(m); 562 } else { 563 panic("vnode_pager_iodone: page is gone!!!"); 564 } 565 } 566 pmap_qremove(paddr, npages); 567 if (obj) { 568 --obj->paging_in_progress; 569 if (obj->paging_in_progress == 0 && 570 (obj->flags & OBJ_PIPWNT)) { 571 obj->flags &= ~OBJ_PIPWNT; 572 wakeup((caddr_t) obj); 573 } 574 } else { 575 panic("vnode_pager_iodone: object is gone???"); 576 } 577 relpbuf(bp); 578 } 579 } 580 581 /* 582 * small block file system vnode pager input 583 */ 584 int 585 vnode_pager_input_smlfs(vnp, m) 586 vn_pager_t vnp; 587 vm_page_t m; 588 { 589 int i; 590 int s; 591 struct vnode *dp, *vp; 592 struct buf *bp; 593 vm_offset_t kva; 594 int fileaddr; 595 int block; 596 vm_offset_t bsize; 597 int error = 0; 598 599 vp = vnp->vnp_vp; 600 bsize = vp->v_mount->mnt_stat.f_iosize; 601 602 VOP_BMAP(vp, 0, &dp, 0, 0); 603 604 kva = vm_pager_map_page(m); 605 606 for (i = 0; i < PAGE_SIZE / bsize; i++) { 607 608 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 609 continue; 610 611 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 612 if (fileaddr != -1) { 613 bp = getpbuf(); 614 615 /* build a minimal buffer header */ 616 bp->b_flags = B_BUSY | B_READ | B_CALL; 617 bp->b_iodone = vnode_pager_iodone; 618 bp->b_proc = curproc; 619 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 620 if (bp->b_rcred != NOCRED) 621 crhold(bp->b_rcred); 622 if (bp->b_wcred != NOCRED) 623 crhold(bp->b_wcred); 624 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 625 bp->b_blkno = fileaddr; 626 pbgetvp(dp, bp); 627 bp->b_bcount = bsize; 628 bp->b_bufsize = bsize; 629 630 /* do the input */ 631 VOP_STRATEGY(bp); 632 633 /* we definitely need to be at splbio here */ 634 635 s = splbio(); 636 while ((bp->b_flags & B_DONE) == 0) { 637 tsleep((caddr_t) bp, PVM, "vnsrd", 0); 638 } 639 splx(s); 640 if ((bp->b_flags & B_ERROR) != 0) 641 error = EIO; 642 643 /* 644 * free the buffer header back to the swap buffer pool 645 */ 646 relpbuf(bp); 647 HOLDRELE(vp); 648 if (error) 649 break; 650 651 vm_page_set_clean(m, i * bsize, bsize); 652 vm_page_set_valid(m, i * bsize, bsize); 653 } else { 654 vm_page_set_clean(m, i * bsize, bsize); 655 bzero((caddr_t) kva + i * bsize, bsize); 656 } 657 nextblock: 658 } 659 vm_pager_unmap_page(kva); 660 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 661 if (error) { 662 return VM_PAGER_ERROR; 663 } 664 return VM_PAGER_OK; 665 666 } 667 668 669 /* 670 * old style vnode pager output routine 671 */ 672 int 673 vnode_pager_input_old(vnp, m) 674 vn_pager_t vnp; 675 vm_page_t m; 676 { 677 struct uio auio; 678 struct iovec aiov; 679 int error; 680 int size; 681 vm_offset_t kva; 682 683 error = 0; 684 685 /* 686 * Return failure if beyond current EOF 687 */ 688 if (m->offset >= vnp->vnp_size) { 689 return VM_PAGER_BAD; 690 } else { 691 size = PAGE_SIZE; 692 if (m->offset + size > vnp->vnp_size) 693 size = vnp->vnp_size - m->offset; 694 /* 695 * Allocate a kernel virtual address and initialize so that 696 * we can use VOP_READ/WRITE routines. 697 */ 698 kva = vm_pager_map_page(m); 699 aiov.iov_base = (caddr_t) kva; 700 aiov.iov_len = size; 701 auio.uio_iov = &aiov; 702 auio.uio_iovcnt = 1; 703 auio.uio_offset = m->offset; 704 auio.uio_segflg = UIO_SYSSPACE; 705 auio.uio_rw = UIO_READ; 706 auio.uio_resid = size; 707 auio.uio_procp = (struct proc *) 0; 708 709 error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 710 if (!error) { 711 register int count = size - auio.uio_resid; 712 713 if (count == 0) 714 error = EINVAL; 715 else if (count != PAGE_SIZE) 716 bzero((caddr_t) kva + count, PAGE_SIZE - count); 717 } 718 vm_pager_unmap_page(kva); 719 } 720 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 721 m->dirty = 0; 722 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 723 } 724 725 /* 726 * generic vnode pager input routine 727 */ 728 int 729 vnode_pager_input(vnp, m, count, reqpage) 730 register vn_pager_t vnp; 731 vm_page_t *m; 732 int count, reqpage; 733 { 734 int i; 735 vm_offset_t kva, foff; 736 int size, sizea; 737 vm_object_t object; 738 struct vnode *dp, *vp; 739 int bsize; 740 741 int first, last; 742 int firstaddr; 743 int block, offset; 744 int runpg; 745 int runend; 746 747 struct buf *bp, *bpa; 748 int counta; 749 int s; 750 int failflag; 751 752 int error = 0; 753 754 object = m[reqpage]->object; /* all vm_page_t items are in same 755 * object */ 756 757 vp = vnp->vnp_vp; 758 bsize = vp->v_mount->mnt_stat.f_iosize; 759 760 /* get the UNDERLYING device for the file with VOP_BMAP() */ 761 762 /* 763 * originally, we did not check for an error return value -- assuming 764 * an fs always has a bmap entry point -- that assumption is wrong!!! 765 */ 766 foff = m[reqpage]->offset; 767 768 /* 769 * if we can't bmap, use old VOP code 770 */ 771 if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 772 for (i = 0; i < count; i++) { 773 if (i != reqpage) { 774 vnode_pager_freepage(m[i]); 775 } 776 } 777 cnt.v_vnodein++; 778 cnt.v_vnodepgsin++; 779 return vnode_pager_input_old(vnp, m[reqpage]); 780 781 /* 782 * if the blocksize is smaller than a page size, then use 783 * special small filesystem code. NFS sometimes has a small 784 * blocksize, but it can handle large reads itself. 785 */ 786 } else if ((PAGE_SIZE / bsize) > 1 && 787 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 788 789 for (i = 0; i < count; i++) { 790 if (i != reqpage) { 791 vnode_pager_freepage(m[i]); 792 } 793 } 794 cnt.v_vnodein++; 795 cnt.v_vnodepgsin++; 796 return vnode_pager_input_smlfs(vnp, m[reqpage]); 797 } 798 /* 799 * if ANY DEV_BSIZE blocks are valid on a large filesystem block 800 * then, the entire page is valid -- 801 */ 802 if (m[reqpage]->valid) { 803 m[reqpage]->valid = VM_PAGE_BITS_ALL; 804 for (i = 0; i < count; i++) { 805 if (i != reqpage) 806 vnode_pager_freepage(m[i]); 807 } 808 return VM_PAGER_OK; 809 } 810 /* 811 * here on direct device I/O 812 */ 813 814 815 firstaddr = -1; 816 /* 817 * calculate the run that includes the required page 818 */ 819 for(first = 0, i = 0; i < count; i = runend) { 820 firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 821 if (firstaddr == -1) { 822 if( i == reqpage && foff < vnp->vnp_size) { 823 printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n", 824 firstaddr, foff, vnp->vnp_size); 825 panic("vnode_pager_input:..."); 826 } 827 vnode_pager_freepage(m[i]); 828 runend = i + 1; 829 first = runend; 830 continue; 831 } 832 runend = i + runpg; 833 if( runend <= reqpage) { 834 int j; 835 for(j = i; j < runend; j++) { 836 vnode_pager_freepage(m[j]); 837 } 838 } else { 839 if( runpg < (count - first)) { 840 for(i=first + runpg; i < count; i++) 841 vnode_pager_freepage(m[i]); 842 count = first + runpg; 843 } 844 break; 845 } 846 first = runend; 847 } 848 849 /* 850 * the first and last page have been calculated now, move input pages 851 * to be zero based... 852 */ 853 if (first != 0) { 854 for (i = first; i < count; i++) { 855 m[i - first] = m[i]; 856 } 857 count -= first; 858 reqpage -= first; 859 } 860 861 /* 862 * calculate the file virtual address for the transfer 863 */ 864 foff = m[0]->offset; 865 #if 0 866 printf("foff: 0x%lx, firstaddr: 0x%lx\n", 867 foff, firstaddr); 868 DELAY(6000000); 869 #endif 870 871 /* 872 * calculate the size of the transfer 873 */ 874 size = count * PAGE_SIZE; 875 if ((foff + size) > vnp->vnp_size) 876 size = vnp->vnp_size - foff; 877 878 /* 879 * round up physical size for real devices 880 */ 881 if (dp->v_type == VBLK || dp->v_type == VCHR) 882 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 883 884 counta = 0; 885 if (count * PAGE_SIZE > bsize) 886 counta = (count - reqpage) - 1; 887 bpa = 0; 888 sizea = 0; 889 bp = getpbuf(); 890 if (counta) { 891 bpa = (struct buf *) trypbuf(); 892 if (bpa) { 893 count -= counta; 894 sizea = size - count * PAGE_SIZE; 895 size = count * PAGE_SIZE; 896 } 897 } 898 kva = (vm_offset_t) bp->b_data; 899 900 /* 901 * and map the pages to be read into the kva 902 */ 903 pmap_qenter(kva, m, count); 904 905 /* build a minimal buffer header */ 906 bp->b_flags = B_BUSY | B_READ | B_CALL; 907 bp->b_iodone = vnode_pager_iodone; 908 /* B_PHYS is not set, but it is nice to fill this in */ 909 bp->b_proc = curproc; 910 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 911 if (bp->b_rcred != NOCRED) 912 crhold(bp->b_rcred); 913 if (bp->b_wcred != NOCRED) 914 crhold(bp->b_wcred); 915 bp->b_blkno = firstaddr; 916 pbgetvp(dp, bp); 917 bp->b_bcount = size; 918 bp->b_bufsize = size; 919 920 cnt.v_vnodein++; 921 cnt.v_vnodepgsin += count; 922 923 /* do the input */ 924 VOP_STRATEGY(bp); 925 926 if (counta) { 927 for (i = 0; i < counta; i++) { 928 vm_page_deactivate(m[count + i]); 929 } 930 pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta); 931 ++m[count]->object->paging_in_progress; 932 bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC; 933 bpa->b_iodone = vnode_pager_iodone; 934 /* B_PHYS is not set, but it is nice to fill this in */ 935 bpa->b_proc = curproc; 936 bpa->b_rcred = bpa->b_wcred = bpa->b_proc->p_ucred; 937 if (bpa->b_rcred != NOCRED) 938 crhold(bpa->b_rcred); 939 if (bpa->b_wcred != NOCRED) 940 crhold(bpa->b_wcred); 941 bpa->b_blkno = firstaddr + count * (PAGE_SIZE / DEV_BSIZE); 942 pbgetvp(dp, bpa); 943 bpa->b_bcount = sizea; 944 bpa->b_bufsize = counta * PAGE_SIZE; 945 946 cnt.v_vnodepgsin += counta; 947 VOP_STRATEGY(bpa); 948 } 949 s = splbio(); 950 /* we definitely need to be at splbio here */ 951 952 while ((bp->b_flags & B_DONE) == 0) { 953 tsleep((caddr_t) bp, PVM, "vnread", 0); 954 } 955 splx(s); 956 if ((bp->b_flags & B_ERROR) != 0) 957 error = EIO; 958 959 if (!error) { 960 if (size != count * PAGE_SIZE) 961 bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 962 } 963 pmap_qremove(kva, count); 964 965 /* 966 * free the buffer header back to the swap buffer pool 967 */ 968 relpbuf(bp); 969 HOLDRELE(vp); 970 971 finishup: 972 for (i = 0; i < count; i++) { 973 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 974 m[i]->dirty = 0; 975 m[i]->valid = VM_PAGE_BITS_ALL; 976 if (i != reqpage) { 977 978 /* 979 * whether or not to leave the page activated is up in 980 * the air, but we should put the page on a page queue 981 * somewhere. (it already is in the object). Result: 982 * It appears that emperical results show that 983 * deactivating pages is best. 984 */ 985 986 /* 987 * just in case someone was asking for this page we 988 * now tell them that it is ok to use 989 */ 990 if (!error) { 991 vm_page_deactivate(m[i]); 992 PAGE_WAKEUP(m[i]); 993 } else { 994 vnode_pager_freepage(m[i]); 995 } 996 } 997 } 998 if (error) { 999 printf("vnode_pager_input: I/O read error\n"); 1000 } 1001 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 1002 } 1003 1004 /* 1005 * old-style vnode pager output routine 1006 */ 1007 int 1008 vnode_pager_output_old(vnp, m) 1009 register vn_pager_t vnp; 1010 vm_page_t m; 1011 { 1012 vm_offset_t kva, kva2; 1013 vm_offset_t size; 1014 struct iovec aiov; 1015 struct uio auio; 1016 struct vnode *vp; 1017 int error; 1018 1019 vp = vnp->vnp_vp; 1020 1021 /* 1022 * Dont return failure if beyond current EOF placate the VM system. 1023 */ 1024 if (m->offset >= vnp->vnp_size) { 1025 return VM_PAGER_OK; 1026 } else { 1027 size = PAGE_SIZE; 1028 if (m->offset + size > vnp->vnp_size) 1029 size = vnp->vnp_size - m->offset; 1030 1031 kva2 = kmem_alloc(pager_map, PAGE_SIZE); 1032 /* 1033 * Allocate a kernel virtual address and initialize so that 1034 * we can use VOP_WRITE routines. 1035 */ 1036 kva = vm_pager_map_page(m); 1037 bcopy((caddr_t) kva, (caddr_t) kva2, size); 1038 vm_pager_unmap_page(kva); 1039 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1040 PAGE_WAKEUP(m); 1041 1042 aiov.iov_base = (caddr_t) kva2; 1043 aiov.iov_len = size; 1044 auio.uio_iov = &aiov; 1045 auio.uio_iovcnt = 1; 1046 auio.uio_offset = m->offset; 1047 auio.uio_segflg = UIO_SYSSPACE; 1048 auio.uio_rw = UIO_WRITE; 1049 auio.uio_resid = size; 1050 auio.uio_procp = (struct proc *) 0; 1051 1052 error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 1053 1054 kmem_free_wakeup(pager_map, kva2, PAGE_SIZE); 1055 if (!error) { 1056 if ((size - auio.uio_resid) == 0) { 1057 error = EINVAL; 1058 } 1059 } 1060 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 1061 } 1062 } 1063 1064 /* 1065 * vnode pager output on a small-block file system 1066 */ 1067 int 1068 vnode_pager_output_smlfs(vnp, m) 1069 vn_pager_t vnp; 1070 vm_page_t m; 1071 { 1072 int i; 1073 int s; 1074 struct vnode *dp, *vp; 1075 struct buf *bp; 1076 vm_offset_t kva; 1077 int fileaddr; 1078 vm_offset_t bsize; 1079 int error = 0; 1080 1081 vp = vnp->vnp_vp; 1082 bsize = vp->v_mount->mnt_stat.f_iosize; 1083 1084 VOP_BMAP(vp, 0, &dp, 0, 0); 1085 kva = vm_pager_map_page(m); 1086 for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 1087 1088 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0) 1089 continue; 1090 /* 1091 * calculate logical block and offset 1092 */ 1093 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 1094 if (fileaddr != -1) { 1095 1096 bp = getpbuf(); 1097 1098 /* build a minimal buffer header */ 1099 bp->b_flags = B_BUSY | B_CALL | B_WRITE; 1100 bp->b_iodone = vnode_pager_iodone; 1101 bp->b_proc = curproc; 1102 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1103 if (bp->b_rcred != NOCRED) 1104 crhold(bp->b_rcred); 1105 if (bp->b_wcred != NOCRED) 1106 crhold(bp->b_wcred); 1107 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 1108 bp->b_blkno = fileaddr; 1109 pbgetvp(dp, bp); 1110 ++dp->v_numoutput; 1111 /* for NFS */ 1112 bp->b_dirtyoff = 0; 1113 bp->b_dirtyend = bsize; 1114 bp->b_bcount = bsize; 1115 bp->b_bufsize = bsize; 1116 1117 /* do the input */ 1118 VOP_STRATEGY(bp); 1119 1120 /* we definitely need to be at splbio here */ 1121 1122 s = splbio(); 1123 while ((bp->b_flags & B_DONE) == 0) { 1124 tsleep((caddr_t) bp, PVM, "vnswrt", 0); 1125 } 1126 splx(s); 1127 if ((bp->b_flags & B_ERROR) != 0) 1128 error = EIO; 1129 1130 vm_page_set_clean(m, i * bsize, bsize); 1131 /* 1132 * free the buffer header back to the swap buffer pool 1133 */ 1134 relpbuf(bp); 1135 HOLDRELE(vp); 1136 } 1137 } 1138 vm_pager_unmap_page(kva); 1139 if (error) 1140 return VM_PAGER_ERROR; 1141 else 1142 return VM_PAGER_OK; 1143 } 1144 1145 /* 1146 * generic vnode pager output routine 1147 */ 1148 int 1149 vnode_pager_output(vnp, m, count, rtvals) 1150 vn_pager_t vnp; 1151 vm_page_t *m; 1152 int count; 1153 int *rtvals; 1154 { 1155 int i, j; 1156 vm_offset_t kva, foff; 1157 int size; 1158 vm_object_t object; 1159 struct vnode *dp, *vp; 1160 struct buf *bp; 1161 vm_offset_t reqaddr; 1162 int bsize; 1163 int s; 1164 daddr_t block; 1165 struct timeval tv; 1166 int runpg; 1167 1168 int error = 0; 1169 1170 retryoutput: 1171 object = m[0]->object; /* all vm_page_t items are in same object */ 1172 1173 vp = vnp->vnp_vp; 1174 1175 /* 1176 * Make sure underlying filesystem is still mounted. 1177 */ 1178 if (vp->v_mount == NULL) 1179 return VM_PAGER_FAIL; 1180 1181 bsize = vp->v_mount->mnt_stat.f_iosize; 1182 1183 for (i = 0; i < count; i++) 1184 rtvals[i] = VM_PAGER_AGAIN; 1185 1186 if ((int) m[0]->offset < 0) { 1187 printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset); 1188 m[0]->dirty = 0; 1189 rtvals[0] = VM_PAGER_OK; 1190 return VM_PAGER_OK; 1191 } 1192 /* 1193 * if the filesystem does not have a bmap, then use the old code 1194 */ 1195 if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) || 1196 (block == -1)) { 1197 1198 rtvals[0] = vnode_pager_output_old(vnp, m[0]); 1199 1200 m[0]->dirty = 0; 1201 cnt.v_vnodeout++; 1202 cnt.v_vnodepgsout++; 1203 return rtvals[0]; 1204 } 1205 tv = time; 1206 VOP_UPDATE(vp, &tv, &tv, 0); 1207 1208 /* 1209 * if the filesystem has a small blocksize, then use the small block 1210 * filesystem output code 1211 */ 1212 if ((bsize < PAGE_SIZE) && 1213 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 1214 1215 for (i = 0; i < count; i++) { 1216 rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 1217 if (rtvals[i] == VM_PAGER_OK) { 1218 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1219 } 1220 } 1221 cnt.v_vnodeout++; 1222 cnt.v_vnodepgsout += count; 1223 return rtvals[0]; 1224 } 1225 for (i = 0; i < count; i++) { 1226 foff = m[i]->offset; 1227 if (foff >= vnp->vnp_size) { 1228 for (j = i; j < count; j++) 1229 rtvals[j] = VM_PAGER_BAD; 1230 count = i; 1231 break; 1232 } 1233 } 1234 if (count == 0) { 1235 return rtvals[0]; 1236 } 1237 foff = m[0]->offset; 1238 reqaddr = vnode_pager_addr(vp, foff, &runpg); 1239 if( runpg < count) 1240 count = runpg; 1241 1242 /* 1243 * calculate the size of the transfer 1244 */ 1245 size = count * PAGE_SIZE; 1246 if ((foff + size) > vnp->vnp_size) 1247 size = vnp->vnp_size - foff; 1248 1249 /* 1250 * round up physical size for real devices 1251 */ 1252 if (dp->v_type == VBLK || dp->v_type == VCHR) 1253 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1254 1255 bp = getpbuf(); 1256 kva = (vm_offset_t) bp->b_data; 1257 /* 1258 * and map the pages to be read into the kva 1259 */ 1260 pmap_qenter(kva, m, count); 1261 1262 /* build a minimal buffer header */ 1263 bp->b_flags = B_BUSY | B_WRITE | B_CALL; 1264 bp->b_iodone = vnode_pager_iodone; 1265 /* B_PHYS is not set, but it is nice to fill this in */ 1266 bp->b_proc = curproc; 1267 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1268 1269 if (bp->b_rcred != NOCRED) 1270 crhold(bp->b_rcred); 1271 if (bp->b_wcred != NOCRED) 1272 crhold(bp->b_wcred); 1273 bp->b_blkno = reqaddr; 1274 pbgetvp(dp, bp); 1275 ++dp->v_numoutput; 1276 1277 /* for NFS */ 1278 bp->b_dirtyoff = 0; 1279 bp->b_dirtyend = size; 1280 1281 bp->b_bcount = size; 1282 bp->b_bufsize = size; 1283 1284 cnt.v_vnodeout++; 1285 cnt.v_vnodepgsout += count; 1286 1287 /* do the output */ 1288 VOP_STRATEGY(bp); 1289 1290 s = splbio(); 1291 1292 /* we definitely need to be at splbio here */ 1293 1294 while ((bp->b_flags & B_DONE) == 0) { 1295 tsleep((caddr_t) bp, PVM, "vnwrite", 0); 1296 } 1297 splx(s); 1298 1299 if ((bp->b_flags & B_ERROR) != 0) 1300 error = EIO; 1301 1302 pmap_qremove(kva, count); 1303 1304 /* 1305 * free the buffer header back to the swap buffer pool 1306 */ 1307 relpbuf(bp); 1308 HOLDRELE(vp); 1309 1310 if (!error) { 1311 for (i = 0; i < count; i++) { 1312 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1313 m[i]->dirty = 0; 1314 rtvals[i] = VM_PAGER_OK; 1315 } 1316 } else if (count != 1) { 1317 error = 0; 1318 count = 1; 1319 goto retryoutput; 1320 } 1321 if (error) { 1322 printf("vnode_pager_output: I/O write error\n"); 1323 } 1324 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 1325 } 1326