xref: /freebsd/sys/vm/swap_pager.c (revision 24ea4a96716695ed8828f91e40e0ac920c736fe3)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.17 1994/11/06 09:55:28 davidg Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/proc.h>
55 #include <sys/buf.h>
56 #include <sys/vnode.h>
57 #include <sys/malloc.h>
58 
59 #include <miscfs/specfs/specdev.h>
60 #include <sys/rlist.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_pager.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/swap_pager.h>
67 
68 #ifndef NPENDINGIO
69 #define NPENDINGIO	16
70 #endif
71 
72 int	swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
73 int	swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
74 
75 int nswiodone;
76 extern int vm_pageout_rate_limit;
77 static int cleandone;
78 extern int hz;
79 int swap_pager_full;
80 extern vm_map_t pager_map;
81 extern int vm_swap_size;
82 int no_swap_space=1;
83 struct rlist *swaplist;
84 int nswaplist;
85 
86 #define MAX_PAGEOUT_CLUSTER 8
87 
88 TAILQ_HEAD(swpclean, swpagerclean);
89 
90 typedef	struct swpagerclean	*swp_clean_t;
91 
92 struct swpagerclean {
93 	TAILQ_ENTRY(swpagerclean)	spc_list;
94 	int				spc_flags;
95 	struct buf			*spc_bp;
96 	sw_pager_t			spc_swp;
97 	vm_offset_t			spc_kva;
98 	int				spc_count;
99 	vm_page_t			spc_m[MAX_PAGEOUT_CLUSTER];
100 } swcleanlist [NPENDINGIO] ;
101 
102 
103 extern vm_map_t kernel_map;
104 
105 /* spc_flags values */
106 #define SPC_ERROR	0x01
107 
108 #define SWB_EMPTY (-1)
109 
110 struct swpclean swap_pager_done;	/* list of compileted page cleans */
111 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
112 struct swpclean swap_pager_free;	/* list of free pager clean structs */
113 struct pagerlst swap_pager_list;	/* list of "named" anon regions */
114 struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
115 
116 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
117 int swap_pager_needflags;
118 struct rlist *swapfrag;
119 
120 struct pagerlst *swp_qs[]={
121 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
122 };
123 
124 int swap_pager_putmulti();
125 
126 struct pagerops swappagerops = {
127 	swap_pager_init,
128 	swap_pager_alloc,
129 	swap_pager_dealloc,
130 	swap_pager_getpage,
131 	swap_pager_getmulti,
132 	swap_pager_putpage,
133 	swap_pager_putmulti,
134 	swap_pager_haspage
135 };
136 
137 int npendingio = NPENDINGIO;
138 int pendingiowait;
139 int require_swap_init;
140 void swap_pager_finish();
141 int dmmin, dmmax;
142 extern int vm_page_count;
143 
144 static inline void swapsizecheck() {
145 	if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
146 		if( swap_pager_full)
147 			printf("swap_pager: out of space\n");
148 		swap_pager_full = 1;
149 	} else if( vm_swap_size > 192*btodb(PAGE_SIZE))
150 		swap_pager_full = 0;
151 }
152 
153 void
154 swap_pager_init()
155 {
156 	dfltpagerops = &swappagerops;
157 
158 	TAILQ_INIT(&swap_pager_list);
159 	TAILQ_INIT(&swap_pager_un_list);
160 
161 	/*
162 	 * Initialize clean lists
163 	 */
164 	TAILQ_INIT(&swap_pager_inuse);
165 	TAILQ_INIT(&swap_pager_done);
166 	TAILQ_INIT(&swap_pager_free);
167 
168 	require_swap_init = 1;
169 
170 	/*
171 	 * Calculate the swap allocation constants.
172 	 */
173 
174 	dmmin = CLBYTES/DEV_BSIZE;
175 	dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
176 
177 }
178 
179 /*
180  * Allocate a pager structure and associated resources.
181  * Note that if we are called from the pageout daemon (handle == NULL)
182  * we should not wait for memory as it could resulting in deadlock.
183  */
184 vm_pager_t
185 swap_pager_alloc(handle, size, prot, offset)
186 	caddr_t handle;
187 	register vm_size_t size;
188 	vm_prot_t prot;
189 	vm_offset_t offset;
190 {
191 	register vm_pager_t pager;
192 	register sw_pager_t swp;
193 	int waitok;
194 	int i,j;
195 
196 	if (require_swap_init) {
197 		swp_clean_t spc;
198 		struct buf *bp;
199 		/*
200 		 * kva's are allocated here so that we dont need to keep
201 		 * doing kmem_alloc pageables at runtime
202 		 */
203 		for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
204 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER);
205 			if (!spc->spc_kva) {
206 				break;
207 			}
208 			spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
209 			if (!spc->spc_bp) {
210 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
211 				break;
212 			}
213 			spc->spc_flags = 0;
214 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
215 		}
216 		require_swap_init = 0;
217 		if( size == 0)
218 			return(NULL);
219 	}
220 
221 	/*
222 	 * If this is a "named" anonymous region, look it up and
223 	 * return the appropriate pager if it exists.
224 	 */
225 	if (handle) {
226 		pager = vm_pager_lookup(&swap_pager_list, handle);
227 		if (pager != NULL) {
228 			/*
229 			 * Use vm_object_lookup to gain a reference
230 			 * to the object and also to remove from the
231 			 * object cache.
232 			 */
233 			if (vm_object_lookup(pager) == NULL)
234 				panic("swap_pager_alloc: bad object");
235 			return(pager);
236 		}
237 	}
238 
239 	/*
240 	 * Pager doesn't exist, allocate swap management resources
241 	 * and initialize.
242 	 */
243 	waitok = handle ? M_WAITOK : M_NOWAIT;
244 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
245 	if (pager == NULL)
246 		return(NULL);
247 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
248 	if (swp == NULL) {
249 		free((caddr_t)pager, M_VMPAGER);
250 		return(NULL);
251 	}
252 	size = round_page(size);
253 	swp->sw_osize = size;
254 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
255 	swp->sw_blocks = (sw_blk_t)
256 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
257 		       M_VMPGDATA, waitok);
258 	if (swp->sw_blocks == NULL) {
259 		free((caddr_t)swp, M_VMPGDATA);
260 		free((caddr_t)pager, M_VMPAGER);
261 		return(NULL);
262 	}
263 
264 	for (i = 0; i < swp->sw_nblocks; i++) {
265 		swp->sw_blocks[i].swb_valid = 0;
266 		swp->sw_blocks[i].swb_locked = 0;
267 		for (j = 0; j < SWB_NPAGES; j++)
268 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
269 	}
270 
271 	swp->sw_poip = 0;
272 	if (handle) {
273 		vm_object_t object;
274 
275 		swp->sw_flags = SW_NAMED;
276 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
277 		/*
278 		 * Consistant with other pagers: return with object
279 		 * referenced.  Can't do this with handle == NULL
280 		 * since it might be the pageout daemon calling.
281 		 */
282 		object = vm_object_allocate(size);
283 		vm_object_enter(object, pager);
284 		vm_object_setpager(object, pager, 0, FALSE);
285 	} else {
286 		swp->sw_flags = 0;
287 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
288 	}
289 	pager->pg_handle = handle;
290 	pager->pg_ops = &swappagerops;
291 	pager->pg_type = PG_SWAP;
292 	pager->pg_data = (caddr_t)swp;
293 
294 	return(pager);
295 }
296 
297 /*
298  * returns disk block associated with pager and offset
299  * additionally, as a side effect returns a flag indicating
300  * if the block has been written
301  */
302 
303 static int *
304 swap_pager_diskaddr(swp, offset, valid)
305 	sw_pager_t swp;
306 	vm_offset_t offset;
307 	int *valid;
308 {
309 	register sw_blk_t swb;
310 	int ix;
311 
312 	if (valid)
313 		*valid = 0;
314 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
315 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
316 		return(FALSE);
317 	}
318 	swb = &swp->sw_blocks[ix];
319 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
320 	if (valid)
321 		*valid = swb->swb_valid & (1<<ix);
322 	return &swb->swb_block[ix];
323 }
324 
325 /*
326  * Utility routine to set the valid (written) bit for
327  * a block associated with a pager and offset
328  */
329 static void
330 swap_pager_setvalid(swp, offset, valid)
331 	sw_pager_t swp;
332 	vm_offset_t offset;
333 	int valid;
334 {
335 	register sw_blk_t swb;
336 	int ix;
337 
338 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
339 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
340 		return;
341 
342 	swb = &swp->sw_blocks[ix];
343 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
344 	if (valid)
345 		swb->swb_valid |= (1 << ix);
346 	else
347 		swb->swb_valid &= ~(1 << ix);
348 	return;
349 }
350 
351 /*
352  * this routine allocates swap space with a fragmentation
353  * minimization policy.
354  */
355 int
356 swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
357 	vm_swap_size -= amount;
358 	if( !rlist_alloc(&swaplist, amount, rtval)) {
359 		vm_swap_size += amount;
360 		return 0;
361 	} else {
362 		swapsizecheck();
363 		return 1;
364 	}
365 }
366 
367 /*
368  * this routine frees swap space with a fragmentation
369  * minimization policy.
370  */
371 void
372 swap_pager_freeswapspace( unsigned from, unsigned to) {
373 	rlist_free(&swaplist, from, to);
374 	vm_swap_size += (to-from)+1;
375 	swapsizecheck();
376 }
377 /*
378  * this routine frees swap blocks from a specified pager
379  */
380 void
381 _swap_pager_freespace(swp, start, size)
382 	sw_pager_t swp;
383 	vm_offset_t start;
384 	vm_offset_t size;
385 {
386 	vm_offset_t i;
387 	int s;
388 
389 	s = splbio();
390 	for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
391 		int valid;
392 		int *addr = swap_pager_diskaddr(swp, i, &valid);
393 		if (addr && *addr != SWB_EMPTY) {
394 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
395 			if( valid) {
396 				swap_pager_setvalid(swp, i, 0);
397 			}
398 			*addr = SWB_EMPTY;
399 		}
400 	}
401 	splx(s);
402 }
403 
404 void
405 swap_pager_freespace(pager, start, size)
406 	vm_pager_t pager;
407 	vm_offset_t start;
408 	vm_offset_t size;
409 {
410 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
411 }
412 
413 /*
414  * swap_pager_reclaim frees up over-allocated space from all pagers
415  * this eliminates internal fragmentation due to allocation of space
416  * for segments that are never swapped to. It has been written so that
417  * it does not block until the rlist_free operation occurs; it keeps
418  * the queues consistant.
419  */
420 
421 /*
422  * Maximum number of blocks (pages) to reclaim per pass
423  */
424 #define MAXRECLAIM 256
425 
426 void
427 swap_pager_reclaim()
428 {
429 	vm_pager_t p;
430 	sw_pager_t swp;
431 	int i, j, k;
432 	int s;
433 	int reclaimcount;
434 	static int reclaims[MAXRECLAIM];
435 	static int in_reclaim;
436 
437 /*
438  * allow only one process to be in the swap_pager_reclaim subroutine
439  */
440 	s = splbio();
441 	if (in_reclaim) {
442 		tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
443 		splx(s);
444 		return;
445 	}
446 	in_reclaim = 1;
447 	reclaimcount = 0;
448 
449 	/* for each pager queue */
450 	for (k = 0; swp_qs[k]; k++) {
451 
452 		p = swp_qs[k]->tqh_first;
453 		while (p && (reclaimcount < MAXRECLAIM)) {
454 
455 			/*
456 			 * see if any blocks associated with a pager has been
457 			 * allocated but not used (written)
458 			 */
459 			swp = (sw_pager_t) p->pg_data;
460 			for (i = 0; i < swp->sw_nblocks; i++) {
461 				sw_blk_t swb = &swp->sw_blocks[i];
462 				if( swb->swb_locked)
463 					continue;
464 				for (j = 0; j < SWB_NPAGES; j++) {
465 					if (swb->swb_block[j] != SWB_EMPTY &&
466 						(swb->swb_valid & (1 << j)) == 0) {
467 						reclaims[reclaimcount++] = swb->swb_block[j];
468 						swb->swb_block[j] = SWB_EMPTY;
469 						if (reclaimcount >= MAXRECLAIM)
470 							goto rfinished;
471 					}
472 				}
473 			}
474 			p = p->pg_list.tqe_next;
475 		}
476 	}
477 
478 rfinished:
479 
480 /*
481  * free the blocks that have been added to the reclaim list
482  */
483 	for (i = 0; i < reclaimcount; i++) {
484 		swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
485 		wakeup((caddr_t) &in_reclaim);
486 	}
487 
488 	splx(s);
489 	in_reclaim = 0;
490 	wakeup((caddr_t) &in_reclaim);
491 }
492 
493 
494 /*
495  * swap_pager_copy copies blocks from one pager to another and
496  * destroys the source pager
497  */
498 
499 void
500 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
501 	vm_pager_t srcpager;
502 	vm_offset_t srcoffset;
503 	vm_pager_t dstpager;
504 	vm_offset_t dstoffset;
505 	vm_offset_t offset;
506 {
507 	sw_pager_t srcswp, dstswp;
508 	vm_offset_t i;
509 	int s;
510 
511 	if( vm_swap_size)
512 		no_swap_space = 0;
513 
514 	if( no_swap_space)
515 		return;
516 
517 	srcswp = (sw_pager_t) srcpager->pg_data;
518 	dstswp = (sw_pager_t) dstpager->pg_data;
519 
520 /*
521  * remove the source pager from the swap_pager internal queue
522  */
523 	s = splbio();
524 	if (srcswp->sw_flags & SW_NAMED) {
525 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
526 		srcswp->sw_flags &= ~SW_NAMED;
527 	} else {
528 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
529 	}
530 
531 	while (srcswp->sw_poip) {
532 		tsleep((caddr_t)srcswp, PVM, "spgout", 0);
533 	}
534 	splx(s);
535 
536 /*
537  * clean all of the pages that are currently active and finished
538  */
539 	(void) swap_pager_clean();
540 
541 	s = splbio();
542 /*
543  * clear source block before destination object
544  * (release allocated space)
545  */
546 	for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
547 		int valid;
548 		int *addr = swap_pager_diskaddr(srcswp, i, &valid);
549 		if (addr && *addr != SWB_EMPTY) {
550 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
551 			*addr = SWB_EMPTY;
552 		}
553 	}
554 /*
555  * transfer source to destination
556  */
557 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
558 		int srcvalid, dstvalid;
559 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
560 			&srcvalid);
561 		int *dstaddrp;
562 	/*
563 	 * see if the source has space allocated
564 	 */
565 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
566 		/*
567 		 * if the source is valid and the dest has no space, then
568 		 * copy the allocation from the srouce to the dest.
569 		 */
570 			if (srcvalid) {
571 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
572 				/*
573 				 * if the dest already has a valid block, deallocate the
574 				 * source block without copying.
575 				 */
576 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
577 					swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
578 					*dstaddrp = SWB_EMPTY;
579 				}
580 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
581 					*dstaddrp = *srcaddrp;
582 					*srcaddrp = SWB_EMPTY;
583 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
584 				}
585 			}
586 		/*
587 		 * if the source is not empty at this point, then deallocate the space.
588 		 */
589 			if (*srcaddrp != SWB_EMPTY) {
590 				swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
591 				*srcaddrp = SWB_EMPTY;
592 			}
593 		}
594 	}
595 
596 /*
597  * deallocate the rest of the source object
598  */
599 	for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
600 		int valid;
601 		int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
602 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
603 			swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
604 			*srcaddrp = SWB_EMPTY;
605 		}
606 	}
607 
608 	splx(s);
609 
610 	free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
611 	srcswp->sw_blocks = 0;
612 	free((caddr_t)srcswp, M_VMPGDATA);
613 	srcpager->pg_data = 0;
614 	free((caddr_t)srcpager, M_VMPAGER);
615 
616 	return;
617 }
618 
619 
620 void
621 swap_pager_dealloc(pager)
622 	vm_pager_t pager;
623 {
624 	register int i,j;
625 	register sw_blk_t bp;
626 	register sw_pager_t swp;
627 	int s;
628 
629 	/*
630 	 * Remove from list right away so lookups will fail if we
631 	 * block for pageout completion.
632 	 */
633 	s = splbio();
634 	swp = (sw_pager_t) pager->pg_data;
635 	if (swp->sw_flags & SW_NAMED) {
636 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
637 		swp->sw_flags &= ~SW_NAMED;
638 	} else {
639 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
640 	}
641 	/*
642 	 * Wait for all pageouts to finish and remove
643 	 * all entries from cleaning list.
644 	 */
645 
646 	while (swp->sw_poip) {
647 		tsleep((caddr_t)swp, PVM, "swpout", 0);
648 	}
649 	splx(s);
650 
651 
652 	(void) swap_pager_clean();
653 
654 	/*
655 	 * Free left over swap blocks
656 	 */
657 	s = splbio();
658 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
659 		for (j = 0; j < SWB_NPAGES; j++)
660 		if (bp->swb_block[j] != SWB_EMPTY) {
661 			swap_pager_freeswapspace((unsigned)bp->swb_block[j],
662 				(unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
663 			bp->swb_block[j] = SWB_EMPTY;
664 		}
665 	}
666 	splx(s);
667 
668 	/*
669 	 * Free swap management resources
670 	 */
671 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
672 	swp->sw_blocks = 0;
673 	free((caddr_t)swp, M_VMPGDATA);
674 	pager->pg_data = 0;
675 	free((caddr_t)pager, M_VMPAGER);
676 }
677 
678 /*
679  * swap_pager_getmulti can get multiple pages.
680  */
681 int
682 swap_pager_getmulti(pager, m, count, reqpage, sync)
683 	vm_pager_t pager;
684 	vm_page_t *m;
685 	int count;
686 	int reqpage;
687 	boolean_t sync;
688 {
689 	if( reqpage >= count)
690 		panic("swap_pager_getmulti: reqpage >= count\n");
691 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
692 }
693 
694 /*
695  * swap_pager_getpage gets individual pages
696  */
697 int
698 swap_pager_getpage(pager, m, sync)
699 	vm_pager_t pager;
700 	vm_page_t m;
701 	boolean_t sync;
702 {
703 	vm_page_t marray[1];
704 
705 	marray[0] = m;
706 	return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
707 }
708 
709 int
710 swap_pager_putmulti(pager, m, c, sync, rtvals)
711 	vm_pager_t pager;
712 	vm_page_t *m;
713 	int c;
714 	boolean_t sync;
715 	int *rtvals;
716 {
717 	int flags;
718 
719 	if (pager == NULL) {
720 		(void) swap_pager_clean();
721 		return VM_PAGER_OK;
722 	}
723 
724 	flags = B_WRITE;
725 	if (!sync)
726 		flags |= B_ASYNC;
727 
728 	return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
729 }
730 
731 /*
732  * swap_pager_putpage writes individual pages
733  */
734 int
735 swap_pager_putpage(pager, m, sync)
736 	vm_pager_t pager;
737 	vm_page_t m;
738 	boolean_t sync;
739 {
740 	int flags;
741 	vm_page_t marray[1];
742 	int rtvals[1];
743 
744 
745 	if (pager == NULL) {
746 		(void) swap_pager_clean();
747 		return VM_PAGER_OK;
748 	}
749 
750 	marray[0] = m;
751 	flags = B_WRITE;
752 	if (!sync)
753 		flags |= B_ASYNC;
754 
755 	swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
756 
757 	return rtvals[0];
758 }
759 
760 static inline int
761 const swap_pager_block_index(swp, offset)
762 	sw_pager_t swp;
763 	vm_offset_t offset;
764 {
765 	return (offset / (SWB_NPAGES*PAGE_SIZE));
766 }
767 
768 static inline int
769 const swap_pager_block_offset(swp, offset)
770 	sw_pager_t swp;
771 	vm_offset_t offset;
772 {
773 	return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
774 }
775 
776 /*
777  * _swap_pager_haspage returns TRUE if the pager has data that has
778  * been written out.
779  */
780 static boolean_t
781 _swap_pager_haspage(swp, offset)
782 	sw_pager_t swp;
783 	vm_offset_t offset;
784 {
785 	register sw_blk_t swb;
786 	int ix;
787 
788 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
789 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
790 		return(FALSE);
791 	}
792 	swb = &swp->sw_blocks[ix];
793 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
794 	if (swb->swb_block[ix] != SWB_EMPTY) {
795 		if (swb->swb_valid & (1 << ix))
796 			return TRUE;
797 	}
798 
799 	return(FALSE);
800 }
801 
802 /*
803  * swap_pager_haspage is the externally accessible version of
804  * _swap_pager_haspage above.  this routine takes a vm_pager_t
805  * for an argument instead of sw_pager_t.
806  */
807 boolean_t
808 swap_pager_haspage(pager, offset)
809 	vm_pager_t pager;
810 	vm_offset_t offset;
811 {
812 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
813 }
814 
815 /*
816  * swap_pager_freepage is a convienience routine that clears the busy
817  * bit and deallocates a page.
818  */
819 static void
820 swap_pager_freepage(m)
821 	vm_page_t m;
822 {
823 	PAGE_WAKEUP(m);
824 	vm_page_free(m);
825 }
826 
827 /*
828  * swap_pager_ridpages is a convienience routine that deallocates all
829  * but the required page.  this is usually used in error returns that
830  * need to invalidate the "extra" readahead pages.
831  */
832 static void
833 swap_pager_ridpages(m, count, reqpage)
834 	vm_page_t *m;
835 	int count;
836 	int reqpage;
837 {
838 	int i;
839 	for (i = 0; i < count; i++)
840 		if (i != reqpage)
841 			swap_pager_freepage(m[i]);
842 }
843 
844 int swapwritecount=0;
845 
846 /*
847  * swap_pager_iodone1 is the completion routine for both reads and async writes
848  */
849 void
850 swap_pager_iodone1(bp)
851 	struct buf *bp;
852 {
853 	bp->b_flags |= B_DONE;
854 	bp->b_flags &= ~B_ASYNC;
855 	wakeup((caddr_t)bp);
856 /*
857 	if ((bp->b_flags & B_READ) == 0)
858 		vwakeup(bp);
859 */
860 }
861 
862 
863 int
864 swap_pager_input(swp, m, count, reqpage)
865 	register sw_pager_t swp;
866 	vm_page_t *m;
867 	int count, reqpage;
868 {
869 	register struct buf *bp;
870 	sw_blk_t swb[count];
871 	register int s;
872 	int i;
873 	boolean_t rv;
874 	vm_offset_t kva, off[count];
875 	swp_clean_t spc;
876 	vm_offset_t paging_offset;
877 	vm_object_t object;
878 	int reqaddr[count];
879 
880 	int first, last;
881 	int failed;
882 	int reqdskregion;
883 
884 	object = m[reqpage]->object;
885 	paging_offset = object->paging_offset;
886 	/*
887 	 * First determine if the page exists in the pager if this is
888 	 * a sync read.  This quickly handles cases where we are
889 	 * following shadow chains looking for the top level object
890 	 * with the page.
891 	 */
892 	if (swp->sw_blocks == NULL) {
893 		swap_pager_ridpages(m, count, reqpage);
894 		return(VM_PAGER_FAIL);
895 	}
896 
897 	for(i = 0; i < count; i++) {
898 		vm_offset_t foff = m[i]->offset + paging_offset;
899 		int ix = swap_pager_block_index(swp, foff);
900 		if (ix >= swp->sw_nblocks) {
901 			int j;
902 			if( i <= reqpage) {
903 				swap_pager_ridpages(m, count, reqpage);
904 				return(VM_PAGER_FAIL);
905 			}
906 			for(j = i; j < count; j++) {
907 				swap_pager_freepage(m[j]);
908 			}
909 			count = i;
910 			break;
911 		}
912 
913 		swb[i] = &swp->sw_blocks[ix];
914 		off[i] = swap_pager_block_offset(swp, foff);
915 		reqaddr[i] = swb[i]->swb_block[off[i]];
916 	}
917 
918 	/* make sure that our required input request is existant */
919 
920 	if (reqaddr[reqpage] == SWB_EMPTY ||
921 		(swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
922 		swap_pager_ridpages(m, count, reqpage);
923 		return(VM_PAGER_FAIL);
924 	}
925 
926 
927 	reqdskregion = reqaddr[reqpage] / dmmax;
928 
929 	/*
930 	 * search backwards for the first contiguous page to transfer
931 	 */
932 	failed = 0;
933 	first = 0;
934 	for (i = reqpage - 1; i >= 0; --i) {
935 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
936 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
937 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
938 			((reqaddr[i] / dmmax) != reqdskregion)) {
939 				failed = 1;
940 				swap_pager_freepage(m[i]);
941 				if (first == 0)
942 					first = i + 1;
943 		}
944 	}
945 	/*
946 	 * search forwards for the last contiguous page to transfer
947 	 */
948 	failed = 0;
949 	last = count;
950 	for (i = reqpage + 1; i < count; i++) {
951 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
952 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
953 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
954 			((reqaddr[i] / dmmax) != reqdskregion)) {
955 				failed = 1;
956 				swap_pager_freepage(m[i]);
957 				if (last == count)
958 					last = i;
959 		}
960 	}
961 
962 	count = last;
963 	if (first != 0) {
964 		for (i = first; i < count; i++) {
965 			m[i-first] = m[i];
966 			reqaddr[i-first] = reqaddr[i];
967 			off[i-first] = off[i];
968 		}
969 		count -= first;
970 		reqpage -= first;
971 	}
972 
973 	++swb[reqpage]->swb_locked;
974 
975 	/*
976 	 * at this point:
977 	 * "m" is a pointer to the array of vm_page_t for paging I/O
978 	 * "count" is the number of vm_page_t entries represented by "m"
979 	 * "object" is the vm_object_t for I/O
980 	 * "reqpage" is the index into "m" for the page actually faulted
981 	 */
982 
983 	spc = NULL;	/* we might not use an spc data structure */
984 
985 	if (count == 1) {
986 		/*
987 		 * if a kva has not been allocated, we can only do a one page transfer,
988 		 * so we free the other pages that might have been allocated by
989 		 * vm_fault.
990 		 */
991 		swap_pager_ridpages(m, count, reqpage);
992 		m[0] = m[reqpage];
993 		reqaddr[0] = reqaddr[reqpage];
994 
995 		count = 1;
996 		reqpage = 0;
997 	/*
998 	 * get a swap pager clean data structure, block until we get it
999 	 */
1000 		if (swap_pager_free.tqh_first == NULL) {
1001 			s = splbio();
1002 			if( curproc == pageproc)
1003 				(void) swap_pager_clean();
1004 			else
1005 				wakeup((caddr_t) &vm_pages_needed);
1006 			while (swap_pager_free.tqh_first == NULL) {
1007 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1008 				tsleep((caddr_t)&swap_pager_free,
1009 					PVM, "swpfre", 0);
1010 				if( curproc == pageproc)
1011 					(void) swap_pager_clean();
1012 				else
1013 					wakeup((caddr_t) &vm_pages_needed);
1014 			}
1015 			splx(s);
1016 		}
1017 		spc = swap_pager_free.tqh_first;
1018 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1019 		kva = spc->spc_kva;
1020 		bp = spc->spc_bp;
1021 		bzero(bp, sizeof *bp);
1022 		bp->b_spc = spc;
1023 	} else {
1024 	/*
1025 	 * Get a swap buffer header to perform the IO
1026 	 */
1027 		bp = getpbuf();
1028 		kva = (vm_offset_t) bp->b_data;
1029 	}
1030 
1031 	/*
1032 	 * map our page(s) into kva for input
1033 	 */
1034 	pmap_qenter( kva, m, count);
1035 
1036 	s = splbio();
1037 	bp->b_flags = B_BUSY | B_READ | B_CALL;
1038 	bp->b_iodone = swap_pager_iodone1;
1039 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1040 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1041 	crhold(bp->b_rcred);
1042 	crhold(bp->b_wcred);
1043 	bp->b_un.b_addr = (caddr_t) kva;
1044 	bp->b_blkno = reqaddr[0];
1045 	bp->b_bcount = PAGE_SIZE*count;
1046 	bp->b_bufsize = PAGE_SIZE*count;
1047 
1048 	bgetvp( swapdev_vp, bp);
1049 
1050 	swp->sw_piip++;
1051 
1052 	cnt.v_swapin++;
1053 	cnt.v_swappgsin += count;
1054 	/*
1055 	 * perform the I/O
1056 	 */
1057 	VOP_STRATEGY(bp);
1058 
1059 	/*
1060 	 * wait for the sync I/O to complete
1061 	 */
1062 	while ((bp->b_flags & B_DONE) == 0) {
1063 		tsleep((caddr_t)bp, PVM, "swread", 0);
1064 	}
1065 
1066 	if (bp->b_flags & B_ERROR) {
1067 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
1068 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1069 		rv = VM_PAGER_ERROR;
1070 	} else {
1071 		rv = VM_PAGER_OK;
1072 	}
1073 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1074 
1075 	--swp->sw_piip;
1076 	if (swp->sw_piip == 0)
1077 		wakeup((caddr_t) swp);
1078 
1079 	/*
1080 	 * relpbuf does this, but we maintain our own buffer
1081 	 * list also...
1082 	 */
1083 	if (bp->b_vp)
1084 		brelvp(bp);
1085 
1086 	splx(s);
1087 	--swb[reqpage]->swb_locked;
1088 
1089 	/*
1090 	 * remove the mapping for kernel virtual
1091 	 */
1092 	pmap_qremove( kva, count);
1093 
1094 	if (spc) {
1095 		/*
1096 		 * if we have used an spc, we need to free it.
1097 		 */
1098 		if( bp->b_rcred != NOCRED)
1099 			crfree(bp->b_rcred);
1100 		if( bp->b_wcred != NOCRED)
1101 			crfree(bp->b_wcred);
1102 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1103 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1104 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1105 			wakeup((caddr_t)&swap_pager_free);
1106 		}
1107 	} else {
1108 		/*
1109 		 * release the physical I/O buffer
1110 		 */
1111 		relpbuf(bp);
1112 		/*
1113 		 * finish up input if everything is ok
1114 		 */
1115 		if( rv == VM_PAGER_OK) {
1116 			for (i = 0; i < count; i++) {
1117 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1118 				m[i]->flags |= PG_CLEAN;
1119 				m[i]->flags &= ~PG_LAUNDRY;
1120 				if (i != reqpage) {
1121 					/*
1122 					 * whether or not to leave the page activated
1123 					 * is up in the air, but we should put the page
1124 					 * on a page queue somewhere. (it already is in
1125 					 * the object).
1126 					 * After some emperical results, it is best
1127 					 * to deactivate the readahead pages.
1128 					 */
1129 					vm_page_deactivate(m[i]);
1130 
1131 					/*
1132 					 * just in case someone was asking for this
1133 					 * page we now tell them that it is ok to use
1134 					 */
1135 					m[i]->flags &= ~PG_FAKE;
1136 					PAGE_WAKEUP(m[i]);
1137 				}
1138 			}
1139 			/*
1140 			 * If we're out of swap space, then attempt to free
1141 			 * some whenever pages are brought in. We must clear
1142 			 * the clean flag so that the page contents will be
1143 			 * preserved.
1144 			 */
1145 			if (swap_pager_full) {
1146 				for (i = 0; i < count; i++) {
1147 					m[i]->flags &= ~PG_CLEAN;
1148 				}
1149 				_swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
1150 			}
1151 		} else {
1152 			swap_pager_ridpages(m, count, reqpage);
1153 		}
1154 	}
1155 	return(rv);
1156 }
1157 
1158 int
1159 swap_pager_output(swp, m, count, flags, rtvals)
1160 	register sw_pager_t swp;
1161 	vm_page_t *m;
1162 	int count;
1163 	int flags;
1164 	int *rtvals;
1165 {
1166 	register struct buf *bp;
1167 	sw_blk_t swb[count];
1168 	register int s;
1169 	int i, j, ix;
1170 	boolean_t rv;
1171 	vm_offset_t kva, off, foff;
1172 	swp_clean_t spc;
1173 	vm_offset_t paging_offset;
1174 	vm_object_t object;
1175 	int reqaddr[count];
1176 	int failed;
1177 
1178 /*
1179 	if( count > 1)
1180 		printf("off: 0x%x, count: %d\n", m[0]->offset, count);
1181 */
1182 	if( vm_swap_size)
1183 		no_swap_space = 0;
1184 	if( no_swap_space) {
1185 		for(i=0;i<count;i++)
1186 			rtvals[i] = VM_PAGER_FAIL;
1187 		return VM_PAGER_FAIL;
1188 	}
1189 
1190 	spc = NULL;
1191 
1192 	object = m[0]->object;
1193 	paging_offset = object->paging_offset;
1194 
1195 	failed = 0;
1196 	for(j=0;j<count;j++) {
1197 		foff = m[j]->offset + paging_offset;
1198 		ix = swap_pager_block_index(swp, foff);
1199 		swb[j] = 0;
1200 		if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
1201 			rtvals[j] = VM_PAGER_FAIL;
1202 			failed = 1;
1203 			continue;
1204 		} else {
1205 			rtvals[j] = VM_PAGER_OK;
1206 		}
1207 		swb[j] = &swp->sw_blocks[ix];
1208 		++swb[j]->swb_locked;
1209 		if( failed) {
1210 			rtvals[j] = VM_PAGER_FAIL;
1211 			continue;
1212 		}
1213 		off = swap_pager_block_offset(swp, foff);
1214 		reqaddr[j] = swb[j]->swb_block[off];
1215 		if( reqaddr[j] == SWB_EMPTY) {
1216 			int blk;
1217 			int tries;
1218 			int ntoget;
1219 			tries = 0;
1220 			s = splbio();
1221 
1222 			/*
1223 			 * if any other pages have been allocated in this block, we
1224 			 * only try to get one page.
1225 			 */
1226 			for (i = 0; i < SWB_NPAGES; i++) {
1227 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1228 					break;
1229 			}
1230 
1231 
1232 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1233 			/*
1234 			 * this code is alittle conservative, but works
1235 			 * (the intent of this code is to allocate small chunks
1236 			 *  for small objects)
1237 			 */
1238 			if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
1239 				ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
1240 			}
1241 
1242 retrygetspace:
1243 			if (!swap_pager_full && ntoget > 1 &&
1244 				swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
1245 
1246 				for (i = 0; i < ntoget; i++) {
1247 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1248 					swb[j]->swb_valid = 0;
1249 				}
1250 
1251 				reqaddr[j] = swb[j]->swb_block[off];
1252 			} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
1253 				&swb[j]->swb_block[off])) {
1254 				/*
1255 				 * if the allocation has failed, we try to reclaim space and
1256 				 * retry.
1257 				 */
1258 				if (++tries == 1) {
1259 					swap_pager_reclaim();
1260 					goto retrygetspace;
1261 				}
1262 				rtvals[j] = VM_PAGER_AGAIN;
1263 				failed = 1;
1264 				swap_pager_full = 1;
1265 			} else {
1266 				reqaddr[j] = swb[j]->swb_block[off];
1267 				swb[j]->swb_valid &= ~(1<<off);
1268 			}
1269 			splx(s);
1270 		}
1271 	}
1272 
1273 	/*
1274 	 * search forwards for the last contiguous page to transfer
1275 	 */
1276 	failed = 0;
1277 	for (i = 0; i < count; i++) {
1278 		if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
1279 			(reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1280 			(rtvals[i] != VM_PAGER_OK)) {
1281 			failed = 1;
1282 			if( rtvals[i] == VM_PAGER_OK)
1283 				rtvals[i] = VM_PAGER_AGAIN;
1284 		}
1285 	}
1286 
1287 	for(i = 0; i < count; i++) {
1288 		if( rtvals[i] != VM_PAGER_OK) {
1289 			if( swb[i])
1290 				--swb[i]->swb_locked;
1291 		}
1292 	}
1293 
1294 	for(i = 0; i < count; i++)
1295 		if( rtvals[i] != VM_PAGER_OK)
1296 			break;
1297 
1298 	if( i == 0) {
1299 		return VM_PAGER_AGAIN;
1300 	}
1301 
1302 	count = i;
1303 	for(i=0;i<count;i++) {
1304 		if( reqaddr[i] == SWB_EMPTY)
1305 			printf("I/O to empty block????\n");
1306 	}
1307 
1308 	/*
1309 	 */
1310 
1311 	/*
1312 	 * For synchronous writes, we clean up
1313 	 * all completed async pageouts.
1314 	 */
1315 	if ((flags & B_ASYNC) == 0) {
1316 		swap_pager_clean();
1317 	}
1318 
1319 	kva = 0;
1320 
1321 	/*
1322 	 * we allocate a new kva for transfers > 1 page
1323 	 * but for transfers == 1 page, the swap_pager_free list contains
1324 	 * entries that have pre-allocated kva's (for efficiency).
1325 	 * NOTE -- we do not use the physical buffer pool or the
1326 	 * preallocated associated kva's because of the potential for
1327 	 * deadlock.  This is very subtile -- but deadlocks or resource
1328 	 * contention must be avoided on pageouts -- or your system will
1329 	 * sleep (forever) !!!
1330 	 */
1331 /*
1332 	if ( count > 1) {
1333 		kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
1334 		if( !kva) {
1335 			for (i = 0; i < count; i++) {
1336 				if( swb[i])
1337 					--swb[i]->swb_locked;
1338 				rtvals[i] = VM_PAGER_AGAIN;
1339 			}
1340 			return VM_PAGER_AGAIN;
1341 		}
1342 	}
1343 */
1344 
1345 	/*
1346 	 * get a swap pager clean data structure, block until we get it
1347 	 */
1348 	if (swap_pager_free.tqh_first == NULL) {
1349 		s = splbio();
1350 		if( curproc == pageproc)
1351 			(void) swap_pager_clean();
1352 		else
1353 			wakeup((caddr_t) &vm_pages_needed);
1354 		while (swap_pager_free.tqh_first == NULL) {
1355 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1356 			tsleep((caddr_t)&swap_pager_free,
1357 				PVM, "swpfre", 0);
1358 			if( curproc == pageproc)
1359 				(void) swap_pager_clean();
1360 			else
1361 				wakeup((caddr_t) &vm_pages_needed);
1362 		}
1363 		splx(s);
1364 	}
1365 
1366 	spc = swap_pager_free.tqh_first;
1367 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1368 
1369 	kva = spc->spc_kva;
1370 
1371 	/*
1372 	 * map our page(s) into kva for I/O
1373 	 */
1374 	pmap_qenter(kva, m, count);
1375 
1376 	/*
1377 	 * get the base I/O offset into the swap file
1378 	 */
1379 	for(i=0;i<count;i++) {
1380 		foff = m[i]->offset + paging_offset;
1381 		off = swap_pager_block_offset(swp, foff);
1382 		/*
1383 		 * set the valid bit
1384 		 */
1385 		swb[i]->swb_valid |= (1 << off);
1386 		/*
1387 		 * and unlock the data structure
1388 		 */
1389 		--swb[i]->swb_locked;
1390 	}
1391 
1392 	s = splbio();
1393 	/*
1394 	 * Get a swap buffer header and perform the IO
1395 	 */
1396 	bp = spc->spc_bp;
1397 	bzero(bp, sizeof *bp);
1398 	bp->b_spc = spc;
1399 
1400 	bp->b_flags = B_BUSY;
1401 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1402 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1403 	if( bp->b_rcred != NOCRED)
1404 		crhold(bp->b_rcred);
1405 	if( bp->b_wcred != NOCRED)
1406 		crhold(bp->b_wcred);
1407 	bp->b_data = (caddr_t) kva;
1408 	bp->b_blkno = reqaddr[0];
1409 	bgetvp( swapdev_vp, bp);
1410 
1411 	bp->b_bcount = PAGE_SIZE*count;
1412 	bp->b_bufsize = PAGE_SIZE*count;
1413 	swapdev_vp->v_numoutput++;
1414 
1415 	/*
1416 	 * If this is an async write we set up additional buffer fields
1417 	 * and place a "cleaning" entry on the inuse queue.
1418 	 */
1419 	if ( flags & B_ASYNC ) {
1420 		spc->spc_flags = 0;
1421 		spc->spc_swp = swp;
1422 		for(i=0;i<count;i++)
1423 			spc->spc_m[i] = m[i];
1424 		spc->spc_count = count;
1425 		/*
1426 		 * the completion routine for async writes
1427 		 */
1428 		bp->b_flags |= B_CALL;
1429 		bp->b_iodone = swap_pager_iodone;
1430 		bp->b_dirtyoff = 0;
1431 		bp->b_dirtyend = bp->b_bcount;
1432 		swp->sw_poip++;
1433 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1434 	} else {
1435 		swp->sw_poip++;
1436 		bp->b_flags |= B_CALL;
1437 		bp->b_iodone = swap_pager_iodone1;
1438 	}
1439 
1440 	cnt.v_swapout++;
1441 	cnt.v_swappgsout += count;
1442 	/*
1443 	 * perform the I/O
1444 	 */
1445 	VOP_STRATEGY(bp);
1446 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
1447 		if ((bp->b_flags & B_DONE) == B_DONE) {
1448 			swap_pager_clean();
1449 		}
1450 		splx(s);
1451 		for(i=0;i<count;i++) {
1452 			rtvals[i] = VM_PAGER_PEND;
1453 		}
1454 		return VM_PAGER_PEND;
1455 	}
1456 
1457 	/*
1458 	 * wait for the sync I/O to complete
1459 	 */
1460 	while ((bp->b_flags & B_DONE) == 0) {
1461 		tsleep((caddr_t)bp, PVM, "swwrt", 0);
1462 	}
1463 	if (bp->b_flags & B_ERROR) {
1464 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1465 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1466 		rv = VM_PAGER_ERROR;
1467 	} else {
1468 		rv = VM_PAGER_OK;
1469 	}
1470 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1471 
1472 	--swp->sw_poip;
1473 	if (swp->sw_poip == 0)
1474 		wakeup((caddr_t) swp);
1475 
1476 	if (bp->b_vp)
1477 		brelvp(bp);
1478 
1479 	splx(s);
1480 
1481 	/*
1482 	 * remove the mapping for kernel virtual
1483 	 */
1484 	pmap_qremove( kva, count);
1485 
1486 	/*
1487 	 * if we have written the page, then indicate that the page
1488 	 * is clean.
1489 	 */
1490 	if (rv == VM_PAGER_OK) {
1491 		for(i=0;i<count;i++) {
1492 			if( rtvals[i] == VM_PAGER_OK) {
1493 				m[i]->flags |= PG_CLEAN;
1494 				m[i]->flags &= ~PG_LAUNDRY;
1495 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1496 				/*
1497 				 * optimization, if a page has been read during the
1498 				 * pageout process, we activate it.
1499 				 */
1500 				if ( (m[i]->flags & PG_ACTIVE) == 0 &&
1501 					pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
1502 					vm_page_activate(m[i]);
1503 			}
1504 		}
1505 	} else {
1506 		for(i=0;i<count;i++) {
1507 			rtvals[i] = rv;
1508 			m[i]->flags |= PG_LAUNDRY;
1509 		}
1510 	}
1511 
1512 	if( bp->b_rcred != NOCRED)
1513 		crfree(bp->b_rcred);
1514 	if( bp->b_wcred != NOCRED)
1515 		crfree(bp->b_wcred);
1516 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1517 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1518 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1519 		wakeup((caddr_t)&swap_pager_free);
1520 	}
1521 
1522 	return(rv);
1523 }
1524 
1525 boolean_t
1526 swap_pager_clean()
1527 {
1528 	register swp_clean_t spc, tspc;
1529 	register int s;
1530 
1531 	tspc = NULL;
1532 	if (swap_pager_done.tqh_first == NULL)
1533 		return FALSE;
1534 	for (;;) {
1535 		s = splbio();
1536 		/*
1537 		 * Look up and removal from done list must be done
1538 		 * at splbio() to avoid conflicts with swap_pager_iodone.
1539 		 */
1540 		while ((spc = swap_pager_done.tqh_first) != 0) {
1541 			pmap_qremove( spc->spc_kva, spc->spc_count);
1542 			swap_pager_finish(spc);
1543 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1544 			goto doclean;
1545 		}
1546 
1547 		/*
1548 		 * No operations done, thats all we can do for now.
1549 		 */
1550 
1551 		splx(s);
1552 		break;
1553 
1554 		/*
1555 		 * The desired page was found to be busy earlier in
1556 		 * the scan but has since completed.
1557 		 */
1558 doclean:
1559 		if (tspc && tspc == spc) {
1560 			tspc = NULL;
1561 		}
1562 		spc->spc_flags = 0;
1563 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1564 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1565 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1566 			wakeup((caddr_t)&swap_pager_free);
1567 		}
1568 		++cleandone;
1569 		splx(s);
1570 	}
1571 
1572 	return(tspc ? TRUE : FALSE);
1573 }
1574 
1575 void
1576 swap_pager_finish(spc)
1577 	register swp_clean_t spc;
1578 {
1579 	vm_object_t object = spc->spc_m[0]->object;
1580 	int i;
1581 
1582 	if ((object->paging_in_progress -= spc->spc_count) == 0)
1583 		thread_wakeup((int) object);
1584 
1585 	/*
1586 	 * If no error mark as clean and inform the pmap system.
1587 	 * If error, mark as dirty so we will try again.
1588 	 * (XXX could get stuck doing this, should give up after awhile)
1589 	 */
1590 	if (spc->spc_flags & SPC_ERROR) {
1591 		for(i=0;i<spc->spc_count;i++) {
1592 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1593 			       (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i]));
1594 			spc->spc_m[i]->flags |= PG_LAUNDRY;
1595 		}
1596 	} else {
1597 		for(i=0;i<spc->spc_count;i++) {
1598 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1599 			spc->spc_m[i]->flags |= PG_CLEAN;
1600 		}
1601 	}
1602 
1603 
1604 	for(i=0;i<spc->spc_count;i++) {
1605 		/*
1606 		 * we wakeup any processes that are waiting on
1607 		 * these pages.
1608 		 */
1609 		PAGE_WAKEUP(spc->spc_m[i]);
1610 	}
1611 	nswiodone -= spc->spc_count;
1612 
1613 	return;
1614 }
1615 
1616 /*
1617  * swap_pager_iodone
1618  */
1619 void
1620 swap_pager_iodone(bp)
1621 	register struct buf *bp;
1622 {
1623 	register swp_clean_t spc;
1624 	int s;
1625 
1626 	s = splbio();
1627 	spc = (swp_clean_t) bp->b_spc;
1628 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1629 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1630 	if (bp->b_flags & B_ERROR) {
1631 		spc->spc_flags |= SPC_ERROR;
1632 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d",
1633 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1634 		    bp->b_error, (u_long)bp->b_blkno, bp->b_bcount);
1635 	}
1636 
1637 /*
1638 	if ((bp->b_flags & B_READ) == 0)
1639 		vwakeup(bp);
1640 */
1641 
1642 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
1643 	if (bp->b_vp) {
1644 		brelvp(bp);
1645 	}
1646 	if( bp->b_rcred != NOCRED)
1647 		crfree(bp->b_rcred);
1648 	if( bp->b_wcred != NOCRED)
1649 		crfree(bp->b_wcred);
1650 
1651 	nswiodone += spc->spc_count;
1652 	if (--spc->spc_swp->sw_poip == 0) {
1653 		wakeup((caddr_t)spc->spc_swp);
1654 	}
1655 
1656 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1657 	    swap_pager_inuse.tqh_first == 0) {
1658 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1659 		wakeup((caddr_t)&swap_pager_free);
1660 		wakeup((caddr_t)&vm_pages_needed);
1661 	}
1662 
1663 	if (vm_pageout_pages_needed) {
1664 		wakeup((caddr_t)&vm_pageout_pages_needed);
1665 	}
1666 
1667 	if ((swap_pager_inuse.tqh_first == NULL) ||
1668 	    (cnt.v_free_count < cnt.v_free_min &&
1669 	    nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
1670 		wakeup((caddr_t)&vm_pages_needed);
1671 	}
1672 	splx(s);
1673 }
1674 
1675 /*
1676  * return true if any swap control structures can be allocated
1677  */
1678 int
1679 swap_pager_ready() {
1680 	if( swap_pager_free.tqh_first)
1681 		return 1;
1682 	else
1683 		return 0;
1684 }
1685