xref: /freebsd/sys/vm/vnode_pager.c (revision 5ebc7e6281887681c3a348a5a4c902e262ccd656)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  * Copyright (c) 1993,1994 John S. Dyson
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
40  *	$Id: vnode_pager.c,v 1.38 1995/05/10 18:56:09 davidg Exp $
41  */
42 
43 /*
44  * Page to/from files (vnodes).
45  *
46  * TODO:
47  *	pageouts
48  *	fix credential use (uses current process credentials now)
49  */
50 
51 /*
52  * 1) Supports multiple - block reads/writes
53  * 2) Bypasses buffer cache for reads
54  *
55  * TODO:
56  *	Implement getpage/putpage interface for filesystems.  Should
57  *	greatly re-simplify the vnode_pager.
58  *
59  */
60 
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/kernel.h>
64 #include <sys/proc.h>
65 #include <sys/malloc.h>
66 #include <sys/vnode.h>
67 #include <sys/uio.h>
68 #include <sys/mount.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_page.h>
72 #include <vm/vnode_pager.h>
73 
74 #include <sys/buf.h>
75 #include <miscfs/specfs/specdev.h>
76 
77 int vnode_pager_putmulti();
78 
79 void vnode_pager_init();
80 void vnode_pager_dealloc();
81 int vnode_pager_getpage();
82 int vnode_pager_getmulti();
83 int vnode_pager_putpage();
84 boolean_t vnode_pager_haspage();
85 
86 struct pagerops vnodepagerops = {
87 	vnode_pager_init,
88 	vnode_pager_alloc,
89 	vnode_pager_dealloc,
90 	vnode_pager_getpage,
91 	vnode_pager_getmulti,
92 	vnode_pager_putpage,
93 	vnode_pager_putmulti,
94 	vnode_pager_haspage
95 };
96 
97 
98 
99 static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage);
100 static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals);
101 
102 extern vm_map_t pager_map;
103 
104 struct pagerlst vnode_pager_list;	/* list of managed vnodes */
105 
106 #define MAXBP (PAGE_SIZE/DEV_BSIZE);
107 
108 void
109 vnode_pager_init()
110 {
111 	TAILQ_INIT(&vnode_pager_list);
112 }
113 
114 /*
115  * Allocate (or lookup) pager for a vnode.
116  * Handle is a vnode pointer.
117  */
118 vm_pager_t
119 vnode_pager_alloc(handle, size, prot, offset)
120 	void *handle;
121 	vm_size_t size;
122 	vm_prot_t prot;
123 	vm_offset_t offset;
124 {
125 	register vm_pager_t pager;
126 	register vn_pager_t vnp;
127 	vm_object_t object, tobject;
128 	struct vattr vattr;
129 	struct vnode *vp;
130 	struct proc *p = curproc;	/* XXX */
131 	int rtval;
132 
133 	/*
134 	 * Pageout to vnode, no can do yet.
135 	 */
136 	if (handle == NULL)
137 		return (NULL);
138 
139 	/*
140 	 * Vnodes keep a pointer to any associated pager so no need to lookup
141 	 * with vm_pager_lookup.
142 	 */
143 	vp = (struct vnode *) handle;
144 	while ((object = (vm_object_t) vp->v_vmdata) &&
145 		(object->flags & OBJ_DEAD))
146 		tsleep((caddr_t) object, PVM, "vadead", 0);
147 
148 	pager = NULL;
149 	if (object != NULL)
150 		pager = object->pager;
151 	if (pager == NULL) {
152 
153 		/*
154 		 * Allocate pager structures
155 		 */
156 		pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
157 		if (pager == NULL)
158 			return (NULL);
159 		vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
160 		if (vnp == NULL) {
161 			free((caddr_t) pager, M_VMPAGER);
162 			return (NULL);
163 		}
164 		/*
165 		 * And an object of the appropriate size
166 		 */
167 		if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) {
168 			object = vm_object_allocate(round_page(vattr.va_size));
169 			object->flags = OBJ_CANPERSIST;
170 			vm_object_enter(object, pager);
171 			object->pager = pager;
172 		} else {
173 			free((caddr_t) vnp, M_VMPGDATA);
174 			free((caddr_t) pager, M_VMPAGER);
175 			return (NULL);
176 		}
177 
178 		/*
179 		 * Hold a reference to the vnode and initialize pager data.
180 		 */
181 		VREF(vp);
182 		vnp->vnp_flags = 0;
183 		vnp->vnp_vp = vp;
184 		vnp->vnp_size = vattr.va_size;
185 
186 		TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
187 		pager->pg_handle = handle;
188 		pager->pg_type = PG_VNODE;
189 		pager->pg_ops = &vnodepagerops;
190 		pager->pg_data = (caddr_t) vnp;
191 		vp->v_vmdata = (caddr_t) object;
192 	} else {
193 
194 		/*
195 		 * vm_object_lookup() will remove the object from the cache if
196 		 * found and also gain a reference to the object.
197 		 */
198 		(void) vm_object_lookup(pager);
199 	}
200 	if( vp->v_type == VREG)
201 		vp->v_flag |= VVMIO;
202 	return (pager);
203 }
204 
205 void
206 vnode_pager_dealloc(pager)
207 	vm_pager_t pager;
208 {
209 	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
210 	register struct vnode *vp;
211 	vm_object_t object;
212 
213 	vp = vnp->vnp_vp;
214 	if (vp) {
215 		int s = splbio();
216 
217 		object = (vm_object_t) vp->v_vmdata;
218 		if (object) {
219 			while (object->paging_in_progress) {
220 				object->flags |= OBJ_PIPWNT;
221 				tsleep(object, PVM, "vnpdea", 0);
222 			}
223 		}
224 		splx(s);
225 
226 		vp->v_vmdata = NULL;
227 		vp->v_flag &= ~(VTEXT | VVMIO);
228 		vp->v_flag |= VAGE;
229 		vrele(vp);
230 	}
231 	TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
232 	free((caddr_t) vnp, M_VMPGDATA);
233 	free((caddr_t) pager, M_VMPAGER);
234 }
235 
236 int
237 vnode_pager_getmulti(pager, m, count, reqpage, sync)
238 	vm_pager_t pager;
239 	vm_page_t *m;
240 	int count;
241 	int reqpage;
242 	boolean_t sync;
243 {
244 
245 	return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage);
246 }
247 
248 int
249 vnode_pager_getpage(pager, m, sync)
250 	vm_pager_t pager;
251 	vm_page_t m;
252 	boolean_t sync;
253 {
254 
255 	vm_page_t marray[1];
256 
257 	if (pager == NULL)
258 		return FALSE;
259 	marray[0] = m;
260 
261 	return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0);
262 }
263 
264 boolean_t
265 vnode_pager_putpage(pager, m, sync)
266 	vm_pager_t pager;
267 	vm_page_t m;
268 	boolean_t sync;
269 {
270 	vm_page_t marray[1];
271 	int rtvals[1];
272 
273 	if (pager == NULL)
274 		return FALSE;
275 	marray[0] = m;
276 	vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals);
277 	return rtvals[0];
278 }
279 
280 int
281 vnode_pager_putmulti(pager, m, c, sync, rtvals)
282 	vm_pager_t pager;
283 	vm_page_t *m;
284 	int c;
285 	boolean_t sync;
286 	int *rtvals;
287 {
288 	return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals);
289 }
290 
291 
292 boolean_t
293 vnode_pager_haspage(pager, offset)
294 	vm_pager_t pager;
295 	vm_offset_t offset;
296 {
297 	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
298 	register struct vnode *vp = vnp->vnp_vp;
299 	daddr_t bn;
300 	int err;
301 	daddr_t block;
302 
303 	/*
304 	 * If filesystem no longer mounted or offset beyond end of file we do
305 	 * not have the page.
306 	 */
307 	if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size))
308 		return FALSE;
309 
310 	block = offset / vp->v_mount->mnt_stat.f_iosize;
311 	if (incore(vp, block))
312 		return TRUE;
313 
314 	/*
315 	 * Read the index to find the disk block to read from.  If there is no
316 	 * block, report that we don't have this data.
317 	 *
318 	 * Assumes that the vnode has whole page or nothing.
319 	 */
320 	err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0);
321 	if (err)
322 		return (TRUE);
323 	return ((long) bn < 0 ? FALSE : TRUE);
324 }
325 
326 /*
327  * Lets the VM system know about a change in size for a file.
328  * If this vnode is mapped into some address space (i.e. we have a pager
329  * for it) we adjust our own internal size and flush any cached pages in
330  * the associated object that are affected by the size change.
331  *
332  * Note: this routine may be invoked as a result of a pager put
333  * operation (possibly at object termination time), so we must be careful.
334  */
335 void
336 vnode_pager_setsize(vp, nsize)
337 	struct vnode *vp;
338 	u_long nsize;
339 {
340 	register vn_pager_t vnp;
341 	register vm_object_t object;
342 	vm_pager_t pager;
343 
344 	/*
345 	 * Not a mapped vnode
346 	 */
347 	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
348 		return;
349 
350 	/*
351 	 * Hasn't changed size
352 	 */
353 	object = (vm_object_t) vp->v_vmdata;
354 	if (object == NULL)
355 		return;
356 	if ((pager = object->pager) == NULL)
357 		return;
358 	vnp = (vn_pager_t) pager->pg_data;
359 	if (nsize == vnp->vnp_size)
360 		return;
361 
362 	/*
363 	 * File has shrunk. Toss any cached pages beyond the new EOF.
364 	 */
365 	if (nsize < vnp->vnp_size) {
366 		if (round_page((vm_offset_t) nsize) < vnp->vnp_size) {
367 			vm_object_lock(object);
368 			vm_object_page_remove(object,
369 			    round_page((vm_offset_t) nsize), vnp->vnp_size, FALSE);
370 			vm_object_unlock(object);
371 		}
372 		/*
373 		 * this gets rid of garbage at the end of a page that is now
374 		 * only partially backed by the vnode...
375 		 */
376 		if (nsize & PAGE_MASK) {
377 			vm_offset_t kva;
378 			vm_page_t m;
379 
380 			m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize));
381 			if (m) {
382 				kva = vm_pager_map_page(m);
383 				bzero((caddr_t) kva + (nsize & PAGE_MASK),
384 				    round_page(nsize) - nsize);
385 				vm_pager_unmap_page(kva);
386 			}
387 		}
388 	}
389 	vnp->vnp_size = (vm_offset_t) nsize;
390 	object->size = round_page(nsize);
391 }
392 
393 void
394 vnode_pager_umount(mp)
395 	register struct mount *mp;
396 {
397 	register vm_pager_t pager, npager;
398 	struct vnode *vp;
399 
400 	for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager) {
401 		/*
402 		 * Save the next pointer now since uncaching may terminate the
403 		 * object and render pager invalid
404 		 */
405 		npager = pager->pg_list.tqe_next;
406 		vp = ((vn_pager_t) pager->pg_data)->vnp_vp;
407 		if (mp == (struct mount *) 0 || vp->v_mount == mp) {
408 			VOP_LOCK(vp);
409 			(void) vnode_pager_uncache(vp);
410 			VOP_UNLOCK(vp);
411 		}
412 	}
413 }
414 
415 /*
416  * Remove vnode associated object from the object cache.
417  * This routine must be called with the vnode locked.
418  *
419  * XXX unlock the vnode.
420  * We must do this since uncaching the object may result in its
421  * destruction which may initiate paging activity which may necessitate
422  * re-locking the vnode.
423  */
424 boolean_t
425 vnode_pager_uncache(vp)
426 	register struct vnode *vp;
427 {
428 	register vm_object_t object;
429 	boolean_t uncached;
430 	vm_pager_t pager;
431 
432 	/*
433 	 * Not a mapped vnode
434 	 */
435 	object = (vm_object_t) vp->v_vmdata;
436 	if (object == NULL)
437 		return (TRUE);
438 
439 	pager = object->pager;
440 	if (pager == NULL)
441 		return (TRUE);
442 
443 #ifdef DEBUG
444 	if (!VOP_ISLOCKED(vp)) {
445 		extern int (**nfsv2_vnodeop_p)();
446 
447 		if (vp->v_op != nfsv2_vnodeop_p)
448 			panic("vnode_pager_uncache: vnode not locked!");
449 	}
450 #endif
451 	/*
452 	 * Must use vm_object_lookup() as it actually removes the object from
453 	 * the cache list.
454 	 */
455 	object = vm_object_lookup(pager);
456 	if (object) {
457 		uncached = (object->ref_count <= 1);
458 		VOP_UNLOCK(vp);
459 		pager_cache(object, FALSE);
460 		VOP_LOCK(vp);
461 	} else
462 		uncached = TRUE;
463 	return (uncached);
464 }
465 
466 
467 void
468 vnode_pager_freepage(m)
469 	vm_page_t m;
470 {
471 	PAGE_WAKEUP(m);
472 	vm_page_free(m);
473 }
474 
475 /*
476  * calculate the linear (byte) disk address of specified virtual
477  * file address
478  */
479 vm_offset_t
480 vnode_pager_addr(vp, address, run)
481 	struct vnode *vp;
482 	vm_offset_t address;
483 	int *run;
484 {
485 	int rtaddress;
486 	int bsize;
487 	vm_offset_t block;
488 	struct vnode *rtvp;
489 	int err;
490 	int vblock, voffset;
491 
492 	if ((int) address < 0)
493 		return -1;
494 
495 	bsize = vp->v_mount->mnt_stat.f_iosize;
496 	vblock = address / bsize;
497 	voffset = address % bsize;
498 
499 	err = VOP_BMAP(vp, vblock, &rtvp, &block, run);
500 
501 	if (err || (block == -1))
502 		rtaddress = -1;
503 	else {
504 		rtaddress = block + voffset / DEV_BSIZE;
505 		if( run) {
506 			*run += 1;
507 			*run *= bsize/PAGE_SIZE;
508 			*run -= voffset/PAGE_SIZE;
509 		}
510 	}
511 
512 	return rtaddress;
513 }
514 
515 /*
516  * interrupt routine for I/O completion
517  */
518 void
519 vnode_pager_iodone(bp)
520 	struct buf *bp;
521 {
522 	bp->b_flags |= B_DONE;
523 	wakeup((caddr_t) bp);
524 }
525 
526 /*
527  * small block file system vnode pager input
528  */
529 int
530 vnode_pager_input_smlfs(vnp, m)
531 	vn_pager_t vnp;
532 	vm_page_t m;
533 {
534 	int i;
535 	int s;
536 	struct vnode *dp, *vp;
537 	struct buf *bp;
538 	vm_offset_t kva;
539 	int fileaddr;
540 	int block;
541 	vm_offset_t bsize;
542 	int error = 0;
543 
544 	vp = vnp->vnp_vp;
545 	bsize = vp->v_mount->mnt_stat.f_iosize;
546 
547 
548 	VOP_BMAP(vp, 0, &dp, 0, 0);
549 
550 	kva = vm_pager_map_page(m);
551 
552 	for (i = 0; i < PAGE_SIZE / bsize; i++) {
553 
554 		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid))
555 			continue;
556 
557 		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
558 		if (fileaddr != -1) {
559 			bp = getpbuf();
560 
561 			/* build a minimal buffer header */
562 			bp->b_flags = B_BUSY | B_READ | B_CALL;
563 			bp->b_iodone = vnode_pager_iodone;
564 			bp->b_proc = curproc;
565 			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
566 			if (bp->b_rcred != NOCRED)
567 				crhold(bp->b_rcred);
568 			if (bp->b_wcred != NOCRED)
569 				crhold(bp->b_wcred);
570 			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
571 			bp->b_blkno = fileaddr;
572 			pbgetvp(dp, bp);
573 			bp->b_bcount = bsize;
574 			bp->b_bufsize = bsize;
575 
576 			/* do the input */
577 			VOP_STRATEGY(bp);
578 
579 			/* we definitely need to be at splbio here */
580 
581 			s = splbio();
582 			while ((bp->b_flags & B_DONE) == 0) {
583 				tsleep((caddr_t) bp, PVM, "vnsrd", 0);
584 			}
585 			splx(s);
586 			if ((bp->b_flags & B_ERROR) != 0)
587 				error = EIO;
588 
589 			/*
590 			 * free the buffer header back to the swap buffer pool
591 			 */
592 			relpbuf(bp);
593 			if (error)
594 				break;
595 
596 			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
597 			vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize);
598 		} else {
599 			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
600 			bzero((caddr_t) kva + i * bsize, bsize);
601 		}
602 nextblock:
603 	}
604 	vm_pager_unmap_page(kva);
605 	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
606 	if (error) {
607 		return VM_PAGER_ERROR;
608 	}
609 	return VM_PAGER_OK;
610 
611 }
612 
613 
614 /*
615  * old style vnode pager output routine
616  */
617 int
618 vnode_pager_input_old(vnp, m)
619 	vn_pager_t vnp;
620 	vm_page_t m;
621 {
622 	struct uio auio;
623 	struct iovec aiov;
624 	int error;
625 	int size;
626 	vm_offset_t kva;
627 
628 	error = 0;
629 
630 	/*
631 	 * Return failure if beyond current EOF
632 	 */
633 	if (m->offset >= vnp->vnp_size) {
634 		return VM_PAGER_BAD;
635 	} else {
636 		size = PAGE_SIZE;
637 		if (m->offset + size > vnp->vnp_size)
638 			size = vnp->vnp_size - m->offset;
639 
640 		/*
641 		 * Allocate a kernel virtual address and initialize so that
642 		 * we can use VOP_READ/WRITE routines.
643 		 */
644 		kva = vm_pager_map_page(m);
645 
646 		aiov.iov_base = (caddr_t) kva;
647 		aiov.iov_len = size;
648 		auio.uio_iov = &aiov;
649 		auio.uio_iovcnt = 1;
650 		auio.uio_offset = m->offset;
651 		auio.uio_segflg = UIO_SYSSPACE;
652 		auio.uio_rw = UIO_READ;
653 		auio.uio_resid = size;
654 		auio.uio_procp = (struct proc *) 0;
655 
656 		error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred);
657 		if (!error) {
658 			register int count = size - auio.uio_resid;
659 
660 			if (count == 0)
661 				error = EINVAL;
662 			else if (count != PAGE_SIZE)
663 				bzero((caddr_t) kva + count, PAGE_SIZE - count);
664 		}
665 		vm_pager_unmap_page(kva);
666 	}
667 	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
668 	m->dirty = 0;
669 	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
670 }
671 
672 /*
673  * generic vnode pager input routine
674  */
675 int
676 vnode_pager_input(vnp, m, count, reqpage)
677 	register vn_pager_t vnp;
678 	vm_page_t *m;
679 	int count, reqpage;
680 {
681 	int i;
682 	vm_offset_t kva, foff;
683 	int size;
684 	vm_object_t object;
685 	struct vnode *dp, *vp;
686 	int bsize;
687 
688 	int first, last;
689 	int firstaddr;
690 	int block, offset;
691 	int runpg;
692 	int runend;
693 
694 	struct buf *bp;
695 	int s;
696 	int failflag;
697 
698 	int error = 0;
699 
700 	object = m[reqpage]->object;	/* all vm_page_t items are in same
701 					 * object */
702 
703 	vp = vnp->vnp_vp;
704 	bsize = vp->v_mount->mnt_stat.f_iosize;
705 
706 	/* get the UNDERLYING device for the file with VOP_BMAP() */
707 
708 	/*
709 	 * originally, we did not check for an error return value -- assuming
710 	 * an fs always has a bmap entry point -- that assumption is wrong!!!
711 	 */
712 	foff = m[reqpage]->offset;
713 
714 	/*
715 	 * if we can't bmap, use old VOP code
716 	 */
717 	if (VOP_BMAP(vp, 0, &dp, 0, 0)) {
718 		for (i = 0; i < count; i++) {
719 			if (i != reqpage) {
720 				vnode_pager_freepage(m[i]);
721 			}
722 		}
723 		cnt.v_vnodein++;
724 		cnt.v_vnodepgsin++;
725 		return vnode_pager_input_old(vnp, m[reqpage]);
726 
727 		/*
728 		 * if the blocksize is smaller than a page size, then use
729 		 * special small filesystem code.  NFS sometimes has a small
730 		 * blocksize, but it can handle large reads itself.
731 		 */
732 	} else if ((PAGE_SIZE / bsize) > 1 &&
733 	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
734 
735 		for (i = 0; i < count; i++) {
736 			if (i != reqpage) {
737 				vnode_pager_freepage(m[i]);
738 			}
739 		}
740 		cnt.v_vnodein++;
741 		cnt.v_vnodepgsin++;
742 		return vnode_pager_input_smlfs(vnp, m[reqpage]);
743 	}
744 	/*
745 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
746 	 * then, the entire page is valid --
747 	 */
748 	if (m[reqpage]->valid) {
749 		m[reqpage]->valid = VM_PAGE_BITS_ALL;
750 		for (i = 0; i < count; i++) {
751 			if (i != reqpage)
752 				vnode_pager_freepage(m[i]);
753 		}
754 		return VM_PAGER_OK;
755 	}
756 
757 	/*
758 	 * here on direct device I/O
759 	 */
760 
761 	firstaddr = -1;
762 	/*
763 	 * calculate the run that includes the required page
764 	 */
765 	for(first = 0, i = 0; i < count; i = runend) {
766 		firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg);
767 		if (firstaddr == -1) {
768 			if( i == reqpage && foff < vnp->vnp_size) {
769 				printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n",
770 			   	 firstaddr, foff, vnp->vnp_size);
771 				panic("vnode_pager_input:...");
772 			}
773 			vnode_pager_freepage(m[i]);
774 			runend = i + 1;
775 			first = runend;
776 			continue;
777 		}
778 		runend = i + runpg;
779 		if( runend <= reqpage) {
780 			int j;
781 			for(j = i; j < runend; j++) {
782 				vnode_pager_freepage(m[j]);
783 			}
784 		} else {
785 			if( runpg < (count - first)) {
786 				for(i=first + runpg; i < count; i++)
787 					vnode_pager_freepage(m[i]);
788 				count = first + runpg;
789 			}
790 			break;
791 		}
792 		first = runend;
793 	}
794 
795 	/*
796 	 * the first and last page have been calculated now, move input pages
797 	 * to be zero based...
798 	 */
799 	if (first != 0) {
800 		for (i = first; i < count; i++) {
801 			m[i - first] = m[i];
802 		}
803 		count -= first;
804 		reqpage -= first;
805 	}
806 
807 	/*
808 	 * calculate the file virtual address for the transfer
809 	 */
810 	foff = m[0]->offset;
811 
812 	/*
813 	 * calculate the size of the transfer
814 	 */
815 	size = count * PAGE_SIZE;
816 	if ((foff + size) > vnp->vnp_size)
817 		size = vnp->vnp_size - foff;
818 
819 	/*
820 	 * round up physical size for real devices
821 	 */
822 	if (dp->v_type == VBLK || dp->v_type == VCHR)
823 		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
824 
825 	bp = getpbuf();
826 	kva = (vm_offset_t) bp->b_data;
827 
828 	/*
829 	 * and map the pages to be read into the kva
830 	 */
831 	pmap_qenter(kva, m, count);
832 
833 	/* build a minimal buffer header */
834 	bp->b_flags = B_BUSY | B_READ | B_CALL;
835 	bp->b_iodone = vnode_pager_iodone;
836 	/* B_PHYS is not set, but it is nice to fill this in */
837 	bp->b_proc = curproc;
838 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
839 	if (bp->b_rcred != NOCRED)
840 		crhold(bp->b_rcred);
841 	if (bp->b_wcred != NOCRED)
842 		crhold(bp->b_wcred);
843 	bp->b_blkno = firstaddr;
844 	pbgetvp(dp, bp);
845 	bp->b_bcount = size;
846 	bp->b_bufsize = size;
847 
848 	cnt.v_vnodein++;
849 	cnt.v_vnodepgsin += count;
850 
851 	/* do the input */
852 	VOP_STRATEGY(bp);
853 
854 	s = splbio();
855 	/* we definitely need to be at splbio here */
856 
857 	while ((bp->b_flags & B_DONE) == 0) {
858 		tsleep((caddr_t) bp, PVM, "vnread", 0);
859 	}
860 	splx(s);
861 	if ((bp->b_flags & B_ERROR) != 0)
862 		error = EIO;
863 
864 	if (!error) {
865 		if (size != count * PAGE_SIZE)
866 			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
867 	}
868 	pmap_qremove(kva, count);
869 
870 	/*
871 	 * free the buffer header back to the swap buffer pool
872 	 */
873 	relpbuf(bp);
874 
875 finishup:
876 	for (i = 0; i < count; i++) {
877 		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
878 		m[i]->dirty = 0;
879 		m[i]->valid = VM_PAGE_BITS_ALL;
880 		if (i != reqpage) {
881 
882 			/*
883 			 * whether or not to leave the page activated is up in
884 			 * the air, but we should put the page on a page queue
885 			 * somewhere. (it already is in the object). Result:
886 			 * It appears that emperical results show that
887 			 * deactivating pages is best.
888 			 */
889 
890 			/*
891 			 * just in case someone was asking for this page we
892 			 * now tell them that it is ok to use
893 			 */
894 			if (!error) {
895 				vm_page_deactivate(m[i]);
896 				PAGE_WAKEUP(m[i]);
897 			} else {
898 				vnode_pager_freepage(m[i]);
899 			}
900 		}
901 	}
902 	if (error) {
903 		printf("vnode_pager_input: I/O read error\n");
904 	}
905 	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
906 }
907 
908 /*
909  * generic vnode pager output routine
910  */
911 int
912 vnode_pager_output(vnp, m, count, rtvals)
913 	vn_pager_t vnp;
914 	vm_page_t *m;
915 	int count;
916 	int *rtvals;
917 {
918 	int i;
919 
920 	struct vnode *vp;
921 	int maxsize, ncount;
922 	struct uio auio;
923 	struct iovec aiov;
924 	int error;
925 
926 	vp = vnp->vnp_vp;
927 	for (i = 0; i < count; i++)
928 		rtvals[i] = VM_PAGER_AGAIN;
929 
930 	if ((int) m[0]->offset < 0) {
931 		printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty);
932 		rtvals[0] = VM_PAGER_BAD;
933 		return VM_PAGER_BAD;
934 	}
935 
936 	maxsize = count * PAGE_SIZE;
937 	ncount = count;
938 
939 	if (maxsize + m[0]->offset > vnp->vnp_size) {
940 		if (vnp->vnp_size > m[0]->offset)
941 			maxsize = vnp->vnp_size - m[0]->offset;
942 		else
943 			maxsize = 0;
944 		ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
945 		if (ncount < count) {
946 			for (i = ncount; i < count; i++) {
947 				rtvals[i] = VM_PAGER_BAD;
948 			}
949 			if (ncount == 0) {
950 				printf("vnode_pager_output: write past end of file: %d, %d\n",
951 					m[0]->offset, vnp->vnp_size);
952 				return rtvals[0];
953 			}
954 		}
955 	}
956 
957 	for (i = 0; i < count; i++) {
958 		m[i]->busy++;
959 		m[i]->flags &= ~PG_BUSY;
960 	}
961 
962 	aiov.iov_base = (caddr_t) 0;
963 	aiov.iov_len = maxsize;
964 	auio.uio_iov = &aiov;
965 	auio.uio_iovcnt = 1;
966 	auio.uio_offset = m[0]->offset;
967 	auio.uio_segflg = UIO_NOCOPY;
968 	auio.uio_rw = UIO_WRITE;
969 	auio.uio_resid = maxsize;
970 	auio.uio_procp = (struct proc *) 0;
971 	error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred);
972 	cnt.v_vnodeout++;
973 	cnt.v_vnodepgsout += ncount;
974 
975 	if (error) {
976 		printf("vnode_pager_output: I/O error %d\n", error);
977 	}
978 	if (auio.uio_resid) {
979 		printf("vnode_pager_output: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset);
980 	}
981 	for (i = 0; i < count; i++) {
982 		m[i]->busy--;
983 		if (i < ncount) {
984 			rtvals[i] = VM_PAGER_OK;
985 		}
986 		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
987 			wakeup((caddr_t) m[i]);
988 	}
989 	return rtvals[0];
990 }
991 
992 struct vnode *
993 vnode_pager_lock(vm_object_t object) {
994 
995 	for(;object;object=object->shadow) {
996 		vn_pager_t vnp;
997 		if( !object->pager || (object->pager->pg_type != PG_VNODE))
998 			continue;
999 
1000 		vnp = (vn_pager_t) object->pager->pg_data;
1001 		VOP_LOCK(vnp->vnp_vp);
1002 		return vnp->vnp_vp;
1003 	}
1004 	return (struct vnode *)NULL;
1005 }
1006 
1007 void
1008 vnode_pager_unlock(struct vnode *vp) {
1009 	VOP_UNLOCK(vp);
1010 }
1011 
1012