1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * Copyright (c) 1993,1994 John S. Dyson 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40 * $Id: vnode_pager.c,v 1.18 1994/11/24 14:43:22 davidg Exp $ 41 */ 42 43 /* 44 * Page to/from files (vnodes). 45 * 46 * TODO: 47 * pageouts 48 * fix credential use (uses current process credentials now) 49 */ 50 51 /* 52 * MODIFICATIONS: 53 * John S. Dyson 08 Dec 93 54 * 55 * This file in conjunction with some vm_fault mods, eliminate the performance 56 * advantage for using the buffer cache and minimize memory copies. 57 * 58 * 1) Supports multiple - block reads 59 * 2) Bypasses buffer cache for reads 60 * 61 * TODO: 62 * 63 * 1) Totally bypass buffer cache for reads 64 * (Currently will still sometimes use buffer cache for reads) 65 * 2) Bypass buffer cache for writes 66 * (Code does not support it, but mods are simple) 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/proc.h> 73 #include <sys/malloc.h> 74 #include <sys/vnode.h> 75 #include <sys/uio.h> 76 #include <sys/mount.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_page.h> 80 #include <vm/vnode_pager.h> 81 82 #include <sys/buf.h> 83 #include <miscfs/specfs/specdev.h> 84 85 int vnode_pager_putmulti(); 86 87 void vnode_pager_init(); 88 vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 89 void vnode_pager_dealloc(); 90 int vnode_pager_getpage(); 91 int vnode_pager_getmulti(); 92 int vnode_pager_putpage(); 93 boolean_t vnode_pager_haspage(); 94 95 struct pagerops vnodepagerops = { 96 vnode_pager_init, 97 vnode_pager_alloc, 98 vnode_pager_dealloc, 99 vnode_pager_getpage, 100 vnode_pager_getmulti, 101 vnode_pager_putpage, 102 vnode_pager_putmulti, 103 vnode_pager_haspage 104 }; 105 106 107 108 static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 109 static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 110 111 extern vm_map_t pager_map; 112 113 struct pagerlst vnode_pager_list; /* list of managed vnodes */ 114 115 #define MAXBP (PAGE_SIZE/DEV_BSIZE); 116 117 void 118 vnode_pager_init() 119 { 120 TAILQ_INIT(&vnode_pager_list); 121 } 122 123 /* 124 * Allocate (or lookup) pager for a vnode. 125 * Handle is a vnode pointer. 126 */ 127 vm_pager_t 128 vnode_pager_alloc(handle, size, prot, offset) 129 caddr_t handle; 130 vm_size_t size; 131 vm_prot_t prot; 132 vm_offset_t offset; 133 { 134 register vm_pager_t pager; 135 register vn_pager_t vnp; 136 vm_object_t object, tobject; 137 struct vattr vattr; 138 struct vnode *vp; 139 struct proc *p = curproc; /* XXX */ 140 int rtval; 141 142 /* 143 * Pageout to vnode, no can do yet. 144 */ 145 if (handle == NULL) 146 return (NULL); 147 148 /* 149 * Vnodes keep a pointer to any associated pager so no need to lookup 150 * with vm_pager_lookup. 151 */ 152 vp = (struct vnode *) handle; 153 while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD)) 154 tsleep((caddr_t) object, PVM, "vadead", 0); 155 156 pager = NULL; 157 if (object != NULL) 158 pager = object->pager; 159 if (pager == NULL) { 160 161 /* 162 * Allocate pager structures 163 */ 164 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 165 if (pager == NULL) 166 return (NULL); 167 vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 168 if (vnp == NULL) { 169 free((caddr_t) pager, M_VMPAGER); 170 return (NULL); 171 } 172 /* 173 * And an object of the appropriate size 174 */ 175 if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 176 object = vm_object_allocate(round_page(vattr.va_size)); 177 vm_object_enter(object, pager); 178 vm_object_setpager(object, pager, 0, TRUE); 179 } else { 180 printf("Error in getattr: %d\n", rtval); 181 free((caddr_t) vnp, M_VMPGDATA); 182 free((caddr_t) pager, M_VMPAGER); 183 return (NULL); 184 } 185 186 /* 187 * Hold a reference to the vnode and initialize pager data. 188 */ 189 VREF(vp); 190 vnp->vnp_flags = 0; 191 vnp->vnp_vp = vp; 192 vnp->vnp_size = vattr.va_size; 193 194 TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 195 pager->pg_handle = handle; 196 pager->pg_type = PG_VNODE; 197 pager->pg_ops = &vnodepagerops; 198 pager->pg_data = (caddr_t) vnp; 199 vp->v_vmdata = (caddr_t) object; 200 } else { 201 202 /* 203 * vm_object_lookup() will remove the object from the cache if 204 * found and also gain a reference to the object. 205 */ 206 (void) vm_object_lookup(pager); 207 } 208 return (pager); 209 } 210 211 void 212 vnode_pager_dealloc(pager) 213 vm_pager_t pager; 214 { 215 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 216 register struct vnode *vp; 217 vm_object_t object; 218 219 vp = vnp->vnp_vp; 220 if (vp) { 221 int s = splbio(); 222 223 object = (vm_object_t) vp->v_vmdata; 224 if (object) { 225 while (object->paging_in_progress) { 226 tsleep(object, PVM, "vnpdea", 0); 227 } 228 } 229 splx(s); 230 231 vp->v_vmdata = NULL; 232 vp->v_flag &= ~(VTEXT | VVMIO); 233 vrele(vp); 234 } 235 TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 236 free((caddr_t) vnp, M_VMPGDATA); 237 free((caddr_t) pager, M_VMPAGER); 238 } 239 240 int 241 vnode_pager_getmulti(pager, m, count, reqpage, sync) 242 vm_pager_t pager; 243 vm_page_t *m; 244 int count; 245 int reqpage; 246 boolean_t sync; 247 { 248 249 return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 250 } 251 252 int 253 vnode_pager_getpage(pager, m, sync) 254 vm_pager_t pager; 255 vm_page_t m; 256 boolean_t sync; 257 { 258 259 vm_page_t marray[1]; 260 261 if (pager == NULL) 262 return FALSE; 263 marray[0] = m; 264 265 return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 266 } 267 268 boolean_t 269 vnode_pager_putpage(pager, m, sync) 270 vm_pager_t pager; 271 vm_page_t m; 272 boolean_t sync; 273 { 274 vm_page_t marray[1]; 275 int rtvals[1]; 276 277 if (pager == NULL) 278 return FALSE; 279 marray[0] = m; 280 vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 281 return rtvals[0]; 282 } 283 284 int 285 vnode_pager_putmulti(pager, m, c, sync, rtvals) 286 vm_pager_t pager; 287 vm_page_t *m; 288 int c; 289 boolean_t sync; 290 int *rtvals; 291 { 292 return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 293 } 294 295 296 boolean_t 297 vnode_pager_haspage(pager, offset) 298 vm_pager_t pager; 299 vm_offset_t offset; 300 { 301 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 302 register struct vnode *vp = vnp->vnp_vp; 303 daddr_t bn; 304 int err; 305 daddr_t block; 306 307 /* 308 * If filesystem no longer mounted or offset beyond end of file we do 309 * not have the page. 310 */ 311 if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 312 return FALSE; 313 314 block = offset / vp->v_mount->mnt_stat.f_iosize; 315 if (incore(vp, block)) 316 return TRUE; 317 /* 318 * Read the index to find the disk block to read from. If there is no 319 * block, report that we don't have this data. 320 * 321 * Assumes that the vnode has whole page or nothing. 322 */ 323 err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 324 if (err) 325 return (TRUE); 326 return ((long) bn < 0 ? FALSE : TRUE); 327 } 328 329 /* 330 * Lets the VM system know about a change in size for a file. 331 * If this vnode is mapped into some address space (i.e. we have a pager 332 * for it) we adjust our own internal size and flush any cached pages in 333 * the associated object that are affected by the size change. 334 * 335 * Note: this routine may be invoked as a result of a pager put 336 * operation (possibly at object termination time), so we must be careful. 337 */ 338 void 339 vnode_pager_setsize(vp, nsize) 340 struct vnode *vp; 341 u_long nsize; 342 { 343 register vn_pager_t vnp; 344 register vm_object_t object; 345 vm_pager_t pager; 346 347 /* 348 * Not a mapped vnode 349 */ 350 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 351 return; 352 353 /* 354 * Hasn't changed size 355 */ 356 object = (vm_object_t) vp->v_vmdata; 357 if (object == NULL) 358 return; 359 if ((pager = object->pager) == NULL) 360 return; 361 vnp = (vn_pager_t) pager->pg_data; 362 if (nsize == vnp->vnp_size) 363 return; 364 365 /* 366 * No object. This can happen during object termination since 367 * vm_object_page_clean is called after the object has been removed 368 * from the hash table, and clean may cause vnode write operations 369 * which can wind up back here. 370 */ 371 object = vm_object_lookup(pager); 372 if (object == NULL) 373 return; 374 375 /* 376 * File has shrunk. Toss any cached pages beyond the new EOF. 377 */ 378 if (nsize < vnp->vnp_size) { 379 if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 380 vm_object_lock(object); 381 vm_object_page_remove(object, 382 round_page((vm_offset_t) nsize), vnp->vnp_size); 383 vm_object_unlock(object); 384 } 385 /* 386 * this gets rid of garbage at the end of a page that is now 387 * only partially backed by the vnode... 388 */ 389 if (nsize & PAGE_MASK) { 390 vm_offset_t kva; 391 vm_page_t m; 392 393 m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 394 if (m) { 395 kva = vm_pager_map_page(m); 396 bzero((caddr_t) kva + (nsize & PAGE_MASK), 397 round_page(nsize) - nsize); 398 vm_pager_unmap_page(kva); 399 } 400 } 401 } 402 vnp->vnp_size = (vm_offset_t) nsize; 403 object->size = round_page(nsize); 404 405 vm_object_deallocate(object); 406 } 407 408 void 409 vnode_pager_umount(mp) 410 register struct mount *mp; 411 { 412 register vm_pager_t pager, npager; 413 struct vnode *vp; 414 415 pager = vnode_pager_list.tqh_first; 416 while (pager) { 417 418 /* 419 * Save the next pointer now since uncaching may terminate the 420 * object and render pager invalid 421 */ 422 vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 423 npager = pager->pg_list.tqe_next; 424 if (mp == (struct mount *) 0 || vp->v_mount == mp) 425 (void) vnode_pager_uncache(vp); 426 pager = npager; 427 } 428 } 429 430 /* 431 * Remove vnode associated object from the object cache. 432 * 433 * Note: this routine may be invoked as a result of a pager put 434 * operation (possibly at object termination time), so we must be careful. 435 */ 436 boolean_t 437 vnode_pager_uncache(vp) 438 register struct vnode *vp; 439 { 440 register vm_object_t object; 441 boolean_t uncached, locked; 442 vm_pager_t pager; 443 444 /* 445 * Not a mapped vnode 446 */ 447 object = (vm_object_t) vp->v_vmdata; 448 if (object == NULL) 449 return (TRUE); 450 451 pager = object->pager; 452 if (pager == NULL) 453 return (TRUE); 454 455 /* 456 * Unlock the vnode if it is currently locked. We do this since 457 * uncaching the object may result in its destruction which may 458 * initiate paging activity which may necessitate locking the vnode. 459 */ 460 locked = VOP_ISLOCKED(vp); 461 if (locked) 462 VOP_UNLOCK(vp); 463 464 /* 465 * Must use vm_object_lookup() as it actually removes the object from 466 * the cache list. 467 */ 468 object = vm_object_lookup(pager); 469 if (object) { 470 uncached = (object->ref_count <= 1); 471 pager_cache(object, FALSE); 472 } else 473 uncached = TRUE; 474 if (locked) 475 VOP_LOCK(vp); 476 return (uncached); 477 } 478 479 480 void 481 vnode_pager_freepage(m) 482 vm_page_t m; 483 { 484 PAGE_WAKEUP(m); 485 vm_page_free(m); 486 } 487 488 /* 489 * calculate the linear (byte) disk address of specified virtual 490 * file address 491 */ 492 vm_offset_t 493 vnode_pager_addr(vp, address) 494 struct vnode *vp; 495 vm_offset_t address; 496 { 497 int rtaddress; 498 int bsize; 499 vm_offset_t block; 500 struct vnode *rtvp; 501 int err; 502 int vblock, voffset; 503 504 if ((int) address < 0) 505 return -1; 506 507 bsize = vp->v_mount->mnt_stat.f_iosize; 508 vblock = address / bsize; 509 voffset = address % bsize; 510 511 err = VOP_BMAP(vp, vblock, &rtvp, &block, 0); 512 513 if (err) 514 rtaddress = -1; 515 else 516 rtaddress = block * DEV_BSIZE + voffset; 517 518 return rtaddress; 519 } 520 521 /* 522 * interrupt routine for I/O completion 523 */ 524 void 525 vnode_pager_iodone(bp) 526 struct buf *bp; 527 { 528 bp->b_flags |= B_DONE; 529 wakeup((caddr_t) bp); 530 if (bp->b_flags & B_ASYNC) { 531 vm_offset_t paddr; 532 vm_page_t m; 533 vm_object_t obj = 0; 534 int i; 535 int npages; 536 537 paddr = (vm_offset_t) bp->b_data; 538 if (bp->b_bufsize != bp->b_bcount) 539 bzero(bp->b_data + bp->b_bcount, 540 bp->b_bufsize - bp->b_bcount); 541 542 npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE; 543 for (i = 0; i < npages; i++) { 544 m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE)); 545 obj = m->object; 546 if (m) { 547 m->dirty = 0; 548 m->valid = VM_PAGE_BITS_ALL; 549 if (m->flags & PG_WANTED) 550 m->flags |= PG_REFERENCED; 551 PAGE_WAKEUP(m); 552 } else { 553 panic("vnode_pager_iodone: page is gone!!!"); 554 } 555 } 556 pmap_qremove(paddr, npages); 557 if (obj) { 558 --obj->paging_in_progress; 559 if (obj->paging_in_progress == 0) 560 wakeup((caddr_t) obj); 561 } else { 562 panic("vnode_pager_iodone: object is gone???"); 563 } 564 relpbuf(bp); 565 } 566 } 567 568 /* 569 * small block file system vnode pager input 570 */ 571 int 572 vnode_pager_input_smlfs(vnp, m) 573 vn_pager_t vnp; 574 vm_page_t m; 575 { 576 int i; 577 int s; 578 struct vnode *dp, *vp; 579 struct buf *bp; 580 vm_offset_t kva; 581 int fileaddr; 582 int block; 583 vm_offset_t bsize; 584 int error = 0; 585 586 vp = vnp->vnp_vp; 587 bsize = vp->v_mount->mnt_stat.f_iosize; 588 589 VOP_BMAP(vp, 0, &dp, 0, 0); 590 591 kva = vm_pager_map_page(m); 592 593 for (i = 0; i < PAGE_SIZE / bsize; i++) { 594 595 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 596 continue; 597 598 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize); 599 if (fileaddr != -1) { 600 bp = getpbuf(); 601 602 /* build a minimal buffer header */ 603 bp->b_flags = B_BUSY | B_READ | B_CALL; 604 bp->b_iodone = vnode_pager_iodone; 605 bp->b_proc = curproc; 606 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 607 if (bp->b_rcred != NOCRED) 608 crhold(bp->b_rcred); 609 if (bp->b_wcred != NOCRED) 610 crhold(bp->b_wcred); 611 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 612 bp->b_blkno = fileaddr / DEV_BSIZE; 613 pbgetvp(dp, bp); 614 bp->b_bcount = bsize; 615 bp->b_bufsize = bsize; 616 617 /* do the input */ 618 VOP_STRATEGY(bp); 619 620 /* we definitely need to be at splbio here */ 621 622 s = splbio(); 623 while ((bp->b_flags & B_DONE) == 0) { 624 tsleep((caddr_t) bp, PVM, "vnsrd", 0); 625 } 626 splx(s); 627 if ((bp->b_flags & B_ERROR) != 0) 628 error = EIO; 629 630 /* 631 * free the buffer header back to the swap buffer pool 632 */ 633 relpbuf(bp); 634 HOLDRELE(vp); 635 if (error) 636 break; 637 638 vm_page_set_clean(m, i * bsize, bsize); 639 vm_page_set_valid(m, i * bsize, bsize); 640 } else { 641 vm_page_set_clean(m, i * bsize, bsize); 642 bzero((caddr_t) kva + i * bsize, bsize); 643 } 644 nextblock: 645 } 646 vm_pager_unmap_page(kva); 647 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 648 if (error) { 649 return VM_PAGER_ERROR; 650 } 651 return VM_PAGER_OK; 652 653 } 654 655 656 /* 657 * old style vnode pager output routine 658 */ 659 int 660 vnode_pager_input_old(vnp, m) 661 vn_pager_t vnp; 662 vm_page_t m; 663 { 664 struct uio auio; 665 struct iovec aiov; 666 int error; 667 int size; 668 vm_offset_t kva; 669 670 error = 0; 671 672 /* 673 * Return failure if beyond current EOF 674 */ 675 if (m->offset >= vnp->vnp_size) { 676 return VM_PAGER_BAD; 677 } else { 678 size = PAGE_SIZE; 679 if (m->offset + size > vnp->vnp_size) 680 size = vnp->vnp_size - m->offset; 681 /* 682 * Allocate a kernel virtual address and initialize so that 683 * we can use VOP_READ/WRITE routines. 684 */ 685 kva = vm_pager_map_page(m); 686 aiov.iov_base = (caddr_t) kva; 687 aiov.iov_len = size; 688 auio.uio_iov = &aiov; 689 auio.uio_iovcnt = 1; 690 auio.uio_offset = m->offset; 691 auio.uio_segflg = UIO_SYSSPACE; 692 auio.uio_rw = UIO_READ; 693 auio.uio_resid = size; 694 auio.uio_procp = (struct proc *) 0; 695 696 error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 697 if (!error) { 698 register int count = size - auio.uio_resid; 699 700 if (count == 0) 701 error = EINVAL; 702 else if (count != PAGE_SIZE) 703 bzero((caddr_t) kva + count, PAGE_SIZE - count); 704 } 705 vm_pager_unmap_page(kva); 706 } 707 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 708 m->dirty = 0; 709 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 710 } 711 712 /* 713 * generic vnode pager input routine 714 */ 715 int 716 vnode_pager_input(vnp, m, count, reqpage) 717 register vn_pager_t vnp; 718 vm_page_t *m; 719 int count, reqpage; 720 { 721 int i; 722 vm_offset_t kva, foff; 723 int size, sizea; 724 vm_object_t object; 725 struct vnode *dp, *vp; 726 int bsize; 727 728 int first, last; 729 int reqaddr, firstaddr; 730 int block, offset; 731 732 struct buf *bp, *bpa; 733 int counta; 734 int s; 735 int failflag; 736 737 int error = 0; 738 739 object = m[reqpage]->object; /* all vm_page_t items are in same 740 * object */ 741 742 vp = vnp->vnp_vp; 743 bsize = vp->v_mount->mnt_stat.f_iosize; 744 745 /* get the UNDERLYING device for the file with VOP_BMAP() */ 746 747 /* 748 * originally, we did not check for an error return value -- assuming 749 * an fs always has a bmap entry point -- that assumption is wrong!!! 750 */ 751 foff = m[reqpage]->offset; 752 753 /* 754 * if we can't bmap, use old VOP code 755 */ 756 if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 757 for (i = 0; i < count; i++) { 758 if (i != reqpage) { 759 vnode_pager_freepage(m[i]); 760 } 761 } 762 cnt.v_vnodein++; 763 cnt.v_vnodepgsin++; 764 return vnode_pager_input_old(vnp, m[reqpage]); 765 766 /* 767 * if the blocksize is smaller than a page size, then use 768 * special small filesystem code. NFS sometimes has a small 769 * blocksize, but it can handle large reads itself. 770 */ 771 } else if ((PAGE_SIZE / bsize) > 1 && 772 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 773 774 for (i = 0; i < count; i++) { 775 if (i != reqpage) { 776 vnode_pager_freepage(m[i]); 777 } 778 } 779 cnt.v_vnodein++; 780 cnt.v_vnodepgsin++; 781 return vnode_pager_input_smlfs(vnp, m[reqpage]); 782 } 783 /* 784 * if ANY DEV_BSIZE blocks are valid on a large filesystem block 785 * then, the entire page is valid -- 786 */ 787 if (m[reqpage]->valid) { 788 m[reqpage]->valid = VM_PAGE_BITS_ALL; 789 for (i = 0; i < count; i++) { 790 if (i != reqpage) 791 vnode_pager_freepage(m[i]); 792 } 793 return VM_PAGER_OK; 794 } 795 /* 796 * here on direct device I/O 797 */ 798 799 reqaddr = vnode_pager_addr(vp, foff); 800 if (reqaddr == -1 && foff < vnp->vnp_size) { 801 printf("reqaddr: %d, foff: %d, vnp_size: %d\n", 802 reqaddr, foff, vnp->vnp_size); 803 Debugger(""); 804 } 805 s = splbio(); 806 807 /* 808 * Make sure that our I/O request is contiguous. Scan backward and 809 * stop for the first discontiguous entry or stop for a page being in 810 * buffer cache. 811 */ 812 failflag = 0; 813 first = reqpage; 814 for (i = reqpage - 1; i >= 0; --i) { 815 if (failflag || 816 (vnode_pager_addr(vp, m[i]->offset)) 817 != reqaddr + (i - reqpage) * PAGE_SIZE) { 818 vnode_pager_freepage(m[i]); 819 failflag = 1; 820 } else { 821 first = i; 822 } 823 } 824 825 /* 826 * Scan forward and stop for the first non-contiguous entry or stop 827 * for a page being in buffer cache. 828 */ 829 failflag = 0; 830 last = reqpage + 1; 831 for (i = reqpage + 1; i < count; i++) { 832 if (failflag || 833 (vnode_pager_addr(vp, m[i]->offset)) 834 != reqaddr + (i - reqpage) * PAGE_SIZE) { 835 vnode_pager_freepage(m[i]); 836 failflag = 1; 837 } else { 838 last = i + 1; 839 } 840 } 841 splx(s); 842 843 /* 844 * the first and last page have been calculated now, move input pages 845 * to be zero based... 846 */ 847 count = last; 848 if (first != 0) { 849 for (i = first; i < count; i++) { 850 m[i - first] = m[i]; 851 } 852 count -= first; 853 reqpage -= first; 854 } 855 /* 856 * calculate the file virtual address for the transfer 857 */ 858 foff = m[0]->offset; 859 860 /* 861 * and get the disk physical address (in bytes) 862 */ 863 firstaddr = vnode_pager_addr(vp, foff); 864 865 /* 866 * calculate the size of the transfer 867 */ 868 size = count * PAGE_SIZE; 869 if ((foff + size) > vnp->vnp_size) 870 size = vnp->vnp_size - foff; 871 872 /* 873 * round up physical size for real devices 874 */ 875 if (dp->v_type == VBLK || dp->v_type == VCHR) 876 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 877 878 counta = 0; 879 if (count * PAGE_SIZE > bsize) 880 counta = (count - reqpage) - 1; 881 bpa = 0; 882 sizea = 0; 883 if (counta) { 884 bpa = getpbuf(); 885 count -= counta; 886 sizea = size - count * PAGE_SIZE; 887 size = count * PAGE_SIZE; 888 } 889 bp = getpbuf(); 890 kva = (vm_offset_t) bp->b_data; 891 892 /* 893 * and map the pages to be read into the kva 894 */ 895 pmap_qenter(kva, m, count); 896 897 /* build a minimal buffer header */ 898 bp->b_flags = B_BUSY | B_READ | B_CALL; 899 bp->b_iodone = vnode_pager_iodone; 900 /* B_PHYS is not set, but it is nice to fill this in */ 901 bp->b_proc = curproc; 902 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 903 if (bp->b_rcred != NOCRED) 904 crhold(bp->b_rcred); 905 if (bp->b_wcred != NOCRED) 906 crhold(bp->b_wcred); 907 bp->b_blkno = firstaddr / DEV_BSIZE; 908 pbgetvp(dp, bp); 909 bp->b_bcount = size; 910 bp->b_bufsize = size; 911 912 cnt.v_vnodein++; 913 cnt.v_vnodepgsin += count; 914 915 /* do the input */ 916 VOP_STRATEGY(bp); 917 918 if (counta) { 919 for (i = 0; i < counta; i++) { 920 vm_page_deactivate(m[count + i]); 921 } 922 pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta); 923 ++m[count]->object->paging_in_progress; 924 bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC; 925 bpa->b_iodone = vnode_pager_iodone; 926 /* B_PHYS is not set, but it is nice to fill this in */ 927 bpa->b_proc = curproc; 928 bpa->b_rcred = bpa->b_wcred = bpa->b_proc->p_ucred; 929 if (bpa->b_rcred != NOCRED) 930 crhold(bpa->b_rcred); 931 if (bpa->b_wcred != NOCRED) 932 crhold(bpa->b_wcred); 933 bpa->b_blkno = (firstaddr + count * PAGE_SIZE) / DEV_BSIZE; 934 pbgetvp(dp, bpa); 935 bpa->b_bcount = sizea; 936 bpa->b_bufsize = counta * PAGE_SIZE; 937 938 cnt.v_vnodepgsin += counta; 939 VOP_STRATEGY(bpa); 940 } 941 s = splbio(); 942 /* we definitely need to be at splbio here */ 943 944 while ((bp->b_flags & B_DONE) == 0) { 945 tsleep((caddr_t) bp, PVM, "vnread", 0); 946 } 947 splx(s); 948 if ((bp->b_flags & B_ERROR) != 0) 949 error = EIO; 950 951 if (!error) { 952 if (size != count * PAGE_SIZE) 953 bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 954 } 955 pmap_qremove(kva, count); 956 957 /* 958 * free the buffer header back to the swap buffer pool 959 */ 960 relpbuf(bp); 961 HOLDRELE(vp); 962 963 finishup: 964 for (i = 0; i < count; i++) { 965 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 966 m[i]->dirty = 0; 967 m[i]->valid = VM_PAGE_BITS_ALL; 968 if (i != reqpage) { 969 970 /* 971 * whether or not to leave the page activated is up in 972 * the air, but we should put the page on a page queue 973 * somewhere. (it already is in the object). Result: 974 * It appears that emperical results show that 975 * deactivating pages is best. 976 */ 977 978 /* 979 * just in case someone was asking for this page we 980 * now tell them that it is ok to use 981 */ 982 if (!error) { 983 if (i != reqpage - 1) 984 vm_page_deactivate(m[i]); 985 else 986 vm_page_activate(m[i]); 987 PAGE_WAKEUP(m[i]); 988 } else { 989 vnode_pager_freepage(m[i]); 990 } 991 } 992 } 993 if (error) { 994 printf("vnode_pager_input: I/O read error\n"); 995 } 996 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 997 } 998 999 /* 1000 * old-style vnode pager output routine 1001 */ 1002 int 1003 vnode_pager_output_old(vnp, m) 1004 register vn_pager_t vnp; 1005 vm_page_t m; 1006 { 1007 vm_offset_t kva, kva2; 1008 vm_offset_t size; 1009 struct iovec aiov; 1010 struct uio auio; 1011 struct vnode *vp; 1012 int error; 1013 1014 vp = vnp->vnp_vp; 1015 1016 /* 1017 * Dont return failure if beyond current EOF placate the VM system. 1018 */ 1019 if (m->offset >= vnp->vnp_size) { 1020 return VM_PAGER_OK; 1021 } else { 1022 size = PAGE_SIZE; 1023 if (m->offset + size > vnp->vnp_size) 1024 size = vnp->vnp_size - m->offset; 1025 1026 kva2 = kmem_alloc(pager_map, PAGE_SIZE); 1027 /* 1028 * Allocate a kernel virtual address and initialize so that 1029 * we can use VOP_WRITE routines. 1030 */ 1031 kva = vm_pager_map_page(m); 1032 bcopy((caddr_t) kva, (caddr_t) kva2, size); 1033 vm_pager_unmap_page(kva); 1034 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1035 PAGE_WAKEUP(m); 1036 1037 aiov.iov_base = (caddr_t) kva2; 1038 aiov.iov_len = size; 1039 auio.uio_iov = &aiov; 1040 auio.uio_iovcnt = 1; 1041 auio.uio_offset = m->offset; 1042 auio.uio_segflg = UIO_SYSSPACE; 1043 auio.uio_rw = UIO_WRITE; 1044 auio.uio_resid = size; 1045 auio.uio_procp = (struct proc *) 0; 1046 1047 error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 1048 1049 kmem_free_wakeup(pager_map, kva2, PAGE_SIZE); 1050 if (!error) { 1051 if ((size - auio.uio_resid) == 0) { 1052 error = EINVAL; 1053 } 1054 } 1055 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 1056 } 1057 } 1058 1059 /* 1060 * vnode pager output on a small-block file system 1061 */ 1062 int 1063 vnode_pager_output_smlfs(vnp, m) 1064 vn_pager_t vnp; 1065 vm_page_t m; 1066 { 1067 int i; 1068 int s; 1069 struct vnode *dp, *vp; 1070 struct buf *bp; 1071 vm_offset_t kva; 1072 int fileaddr; 1073 vm_offset_t bsize; 1074 int error = 0; 1075 1076 vp = vnp->vnp_vp; 1077 bsize = vp->v_mount->mnt_stat.f_iosize; 1078 1079 VOP_BMAP(vp, 0, &dp, 0, 0); 1080 kva = vm_pager_map_page(m); 1081 for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 1082 1083 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0) 1084 continue; 1085 /* 1086 * calculate logical block and offset 1087 */ 1088 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize); 1089 if (fileaddr != -1) { 1090 1091 bp = getpbuf(); 1092 1093 /* build a minimal buffer header */ 1094 bp->b_flags = B_BUSY | B_CALL | B_WRITE; 1095 bp->b_iodone = vnode_pager_iodone; 1096 bp->b_proc = curproc; 1097 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1098 if (bp->b_rcred != NOCRED) 1099 crhold(bp->b_rcred); 1100 if (bp->b_wcred != NOCRED) 1101 crhold(bp->b_wcred); 1102 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 1103 bp->b_blkno = fileaddr / DEV_BSIZE; 1104 pbgetvp(dp, bp); 1105 ++dp->v_numoutput; 1106 /* for NFS */ 1107 bp->b_dirtyoff = 0; 1108 bp->b_dirtyend = bsize; 1109 bp->b_bcount = bsize; 1110 bp->b_bufsize = bsize; 1111 1112 /* do the input */ 1113 VOP_STRATEGY(bp); 1114 1115 /* we definitely need to be at splbio here */ 1116 1117 s = splbio(); 1118 while ((bp->b_flags & B_DONE) == 0) { 1119 tsleep((caddr_t) bp, PVM, "vnswrt", 0); 1120 } 1121 splx(s); 1122 if ((bp->b_flags & B_ERROR) != 0) 1123 error = EIO; 1124 1125 vm_page_set_clean(m, i * bsize, bsize); 1126 /* 1127 * free the buffer header back to the swap buffer pool 1128 */ 1129 relpbuf(bp); 1130 HOLDRELE(vp); 1131 } 1132 } 1133 vm_pager_unmap_page(kva); 1134 if (error) 1135 return VM_PAGER_ERROR; 1136 else 1137 return VM_PAGER_OK; 1138 } 1139 1140 /* 1141 * generic vnode pager output routine 1142 */ 1143 int 1144 vnode_pager_output(vnp, m, count, rtvals) 1145 vn_pager_t vnp; 1146 vm_page_t *m; 1147 int count; 1148 int *rtvals; 1149 { 1150 int i, j; 1151 vm_offset_t kva, foff; 1152 int size; 1153 vm_object_t object; 1154 struct vnode *dp, *vp; 1155 struct buf *bp; 1156 vm_offset_t reqaddr; 1157 int bsize; 1158 int s; 1159 daddr_t block; 1160 struct timeval tv; 1161 1162 int error = 0; 1163 1164 retryoutput: 1165 object = m[0]->object; /* all vm_page_t items are in same object */ 1166 1167 vp = vnp->vnp_vp; 1168 1169 /* 1170 * Make sure underlying filesystem is still mounted. 1171 */ 1172 if (vp->v_mount == NULL) 1173 return VM_PAGER_FAIL; 1174 1175 bsize = vp->v_mount->mnt_stat.f_iosize; 1176 1177 for (i = 0; i < count; i++) 1178 rtvals[i] = VM_PAGER_AGAIN; 1179 1180 if ((int) m[0]->offset < 0) { 1181 printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset); 1182 m[0]->dirty = 0; 1183 rtvals[0] = VM_PAGER_OK; 1184 return VM_PAGER_OK; 1185 } 1186 /* 1187 * if the filesystem does not have a bmap, then use the old code 1188 */ 1189 if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) || 1190 (block == -1)) { 1191 1192 rtvals[0] = vnode_pager_output_old(vnp, m[0]); 1193 1194 m[0]->dirty = 0; 1195 cnt.v_vnodeout++; 1196 cnt.v_vnodepgsout++; 1197 return rtvals[0]; 1198 } 1199 tv = time; 1200 VOP_UPDATE(vp, &tv, &tv, 0); 1201 1202 /* 1203 * if the filesystem has a small blocksize, then use the small block 1204 * filesystem output code 1205 */ 1206 if ((bsize < PAGE_SIZE) && 1207 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 1208 1209 for (i = 0; i < count; i++) { 1210 rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 1211 if (rtvals[i] == VM_PAGER_OK) { 1212 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1213 } 1214 } 1215 cnt.v_vnodeout++; 1216 cnt.v_vnodepgsout += count; 1217 return rtvals[0]; 1218 } 1219 for (i = 0; i < count; i++) { 1220 foff = m[i]->offset; 1221 if (foff >= vnp->vnp_size) { 1222 for (j = i; j < count; j++) 1223 rtvals[j] = VM_PAGER_BAD; 1224 count = i; 1225 break; 1226 } 1227 } 1228 if (count == 0) { 1229 return rtvals[0]; 1230 } 1231 foff = m[0]->offset; 1232 reqaddr = vnode_pager_addr(vp, foff); 1233 1234 /* 1235 * Scan forward and stop for the first non-contiguous entry or stop 1236 * for a page being in buffer cache. 1237 */ 1238 for (i = 1; i < count; i++) { 1239 if (vnode_pager_addr(vp, m[i]->offset) 1240 != reqaddr + i * PAGE_SIZE) { 1241 count = i; 1242 break; 1243 } 1244 } 1245 1246 /* 1247 * calculate the size of the transfer 1248 */ 1249 size = count * PAGE_SIZE; 1250 if ((foff + size) > vnp->vnp_size) 1251 size = vnp->vnp_size - foff; 1252 1253 /* 1254 * round up physical size for real devices 1255 */ 1256 if (dp->v_type == VBLK || dp->v_type == VCHR) 1257 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1258 1259 bp = getpbuf(); 1260 kva = (vm_offset_t) bp->b_data; 1261 /* 1262 * and map the pages to be read into the kva 1263 */ 1264 pmap_qenter(kva, m, count); 1265 1266 /* build a minimal buffer header */ 1267 bp->b_flags = B_BUSY | B_WRITE | B_CALL; 1268 bp->b_iodone = vnode_pager_iodone; 1269 /* B_PHYS is not set, but it is nice to fill this in */ 1270 bp->b_proc = curproc; 1271 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1272 1273 if (bp->b_rcred != NOCRED) 1274 crhold(bp->b_rcred); 1275 if (bp->b_wcred != NOCRED) 1276 crhold(bp->b_wcred); 1277 bp->b_blkno = reqaddr / DEV_BSIZE; 1278 pbgetvp(dp, bp); 1279 ++dp->v_numoutput; 1280 1281 /* for NFS */ 1282 bp->b_dirtyoff = 0; 1283 bp->b_dirtyend = size; 1284 1285 bp->b_bcount = size; 1286 bp->b_bufsize = size; 1287 1288 cnt.v_vnodeout++; 1289 cnt.v_vnodepgsout += count; 1290 1291 /* do the output */ 1292 VOP_STRATEGY(bp); 1293 1294 s = splbio(); 1295 1296 /* we definitely need to be at splbio here */ 1297 1298 while ((bp->b_flags & B_DONE) == 0) { 1299 tsleep((caddr_t) bp, PVM, "vnwrite", 0); 1300 } 1301 splx(s); 1302 1303 if ((bp->b_flags & B_ERROR) != 0) 1304 error = EIO; 1305 1306 pmap_qremove(kva, count); 1307 1308 /* 1309 * free the buffer header back to the swap buffer pool 1310 */ 1311 relpbuf(bp); 1312 HOLDRELE(vp); 1313 1314 if (!error) { 1315 for (i = 0; i < count; i++) { 1316 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1317 m[i]->dirty = 0; 1318 rtvals[i] = VM_PAGER_OK; 1319 } 1320 } else if (count != 1) { 1321 error = 0; 1322 count = 1; 1323 goto retryoutput; 1324 } 1325 if (error) { 1326 printf("vnode_pager_output: I/O write error\n"); 1327 } 1328 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 1329 } 1330