xref: /freebsd/sys/vm/swap_pager.c (revision 5663e6de1c1b5448a2615536637bccda1f98e71a)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.14 1994/10/15 13:33:06 davidg Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/proc.h>
55 #include <sys/buf.h>
56 #include <sys/vnode.h>
57 #include <sys/malloc.h>
58 
59 #include <miscfs/specfs/specdev.h>
60 #include <sys/rlist.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_pager.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/swap_pager.h>
67 
68 #ifndef NPENDINGIO
69 #define NPENDINGIO	16
70 #endif
71 
72 int	swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
73 int	swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
74 
75 int nswiodone;
76 extern int vm_pageout_rate_limit;
77 static int cleandone;
78 extern int hz;
79 int swap_pager_full;
80 extern vm_map_t pager_map;
81 extern int vm_swap_size;
82 struct rlist *swaplist;
83 int nswaplist;
84 
85 #define MAX_PAGEOUT_CLUSTER 8
86 
87 TAILQ_HEAD(swpclean, swpagerclean);
88 
89 typedef	struct swpagerclean	*swp_clean_t;
90 
91 struct swpagerclean {
92 	TAILQ_ENTRY(swpagerclean)	spc_list;
93 	int				spc_flags;
94 	struct buf			*spc_bp;
95 	sw_pager_t			spc_swp;
96 	vm_offset_t			spc_kva;
97 	int				spc_count;
98 	vm_page_t			spc_m[MAX_PAGEOUT_CLUSTER];
99 } swcleanlist [NPENDINGIO] ;
100 
101 
102 extern vm_map_t kernel_map;
103 
104 /* spc_flags values */
105 #define SPC_ERROR	0x01
106 
107 #define SWB_EMPTY (-1)
108 
109 struct swpclean swap_pager_done;	/* list of compileted page cleans */
110 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
111 struct swpclean swap_pager_free;	/* list of free pager clean structs */
112 struct pagerlst swap_pager_list;	/* list of "named" anon regions */
113 struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
114 
115 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
116 int swap_pager_needflags;
117 struct rlist *swapfrag;
118 
119 struct pagerlst *swp_qs[]={
120 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
121 };
122 
123 int swap_pager_putmulti();
124 
125 struct pagerops swappagerops = {
126 	swap_pager_init,
127 	swap_pager_alloc,
128 	swap_pager_dealloc,
129 	swap_pager_getpage,
130 	swap_pager_getmulti,
131 	swap_pager_putpage,
132 	swap_pager_putmulti,
133 	swap_pager_haspage
134 };
135 
136 int npendingio = NPENDINGIO;
137 int pendingiowait;
138 int require_swap_init;
139 void swap_pager_finish();
140 int dmmin, dmmax;
141 extern int vm_page_count;
142 
143 static inline void swapsizecheck() {
144 	if( vm_swap_size == 0)
145 		return;
146 	if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
147 		if( swap_pager_full)
148 			printf("swap_pager: out of space\n");
149 		swap_pager_full = 1;
150 	} else if( vm_swap_size > 192*btodb(PAGE_SIZE))
151 		swap_pager_full = 0;
152 }
153 
154 void
155 swap_pager_init()
156 {
157 	dfltpagerops = &swappagerops;
158 
159 	TAILQ_INIT(&swap_pager_list);
160 	TAILQ_INIT(&swap_pager_un_list);
161 
162 	/*
163 	 * Initialize clean lists
164 	 */
165 	TAILQ_INIT(&swap_pager_inuse);
166 	TAILQ_INIT(&swap_pager_done);
167 	TAILQ_INIT(&swap_pager_free);
168 
169 	require_swap_init = 1;
170 
171 	/*
172 	 * Calculate the swap allocation constants.
173 	 */
174 
175 	dmmin = CLBYTES/DEV_BSIZE;
176 	dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
177 
178 }
179 
180 /*
181  * Allocate a pager structure and associated resources.
182  * Note that if we are called from the pageout daemon (handle == NULL)
183  * we should not wait for memory as it could resulting in deadlock.
184  */
185 vm_pager_t
186 swap_pager_alloc(handle, size, prot, offset)
187 	caddr_t handle;
188 	register vm_size_t size;
189 	vm_prot_t prot;
190 	vm_offset_t offset;
191 {
192 	register vm_pager_t pager;
193 	register sw_pager_t swp;
194 	int waitok;
195 	int i,j;
196 
197 	if (require_swap_init) {
198 		swp_clean_t spc;
199 		struct buf *bp;
200 		/*
201 		 * kva's are allocated here so that we dont need to keep
202 		 * doing kmem_alloc pageables at runtime
203 		 */
204 		for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
205 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER);
206 			if (!spc->spc_kva) {
207 				break;
208 			}
209 			spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
210 			if (!spc->spc_bp) {
211 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
212 				break;
213 			}
214 			spc->spc_flags = 0;
215 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
216 		}
217 		require_swap_init = 0;
218 		if( size == 0)
219 			return(NULL);
220 	}
221 
222 	/*
223 	 * If this is a "named" anonymous region, look it up and
224 	 * return the appropriate pager if it exists.
225 	 */
226 	if (handle) {
227 		pager = vm_pager_lookup(&swap_pager_list, handle);
228 		if (pager != NULL) {
229 			/*
230 			 * Use vm_object_lookup to gain a reference
231 			 * to the object and also to remove from the
232 			 * object cache.
233 			 */
234 			if (vm_object_lookup(pager) == NULL)
235 				panic("swap_pager_alloc: bad object");
236 			return(pager);
237 		}
238 	}
239 
240 /*
241 	if (swap_pager_full && (vm_swap_size == 0)) {
242 		return(NULL);
243 	}
244 */
245 
246 	/*
247 	 * Pager doesn't exist, allocate swap management resources
248 	 * and initialize.
249 	 */
250 	waitok = handle ? M_WAITOK : M_NOWAIT;
251 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
252 	if (pager == NULL)
253 		return(NULL);
254 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
255 	if (swp == NULL) {
256 		free((caddr_t)pager, M_VMPAGER);
257 		return(NULL);
258 	}
259 	size = round_page(size);
260 	swp->sw_osize = size;
261 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
262 	swp->sw_blocks = (sw_blk_t)
263 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
264 		       M_VMPGDATA, waitok);
265 	if (swp->sw_blocks == NULL) {
266 		free((caddr_t)swp, M_VMPGDATA);
267 		free((caddr_t)pager, M_VMPAGER);
268 		return(NULL);
269 	}
270 
271 	for (i = 0; i < swp->sw_nblocks; i++) {
272 		swp->sw_blocks[i].swb_valid = 0;
273 		swp->sw_blocks[i].swb_locked = 0;
274 		for (j = 0; j < SWB_NPAGES; j++)
275 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
276 	}
277 
278 	swp->sw_poip = 0;
279 	if (handle) {
280 		vm_object_t object;
281 
282 		swp->sw_flags = SW_NAMED;
283 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
284 		/*
285 		 * Consistant with other pagers: return with object
286 		 * referenced.  Can't do this with handle == NULL
287 		 * since it might be the pageout daemon calling.
288 		 */
289 		object = vm_object_allocate(size);
290 		vm_object_enter(object, pager);
291 		vm_object_setpager(object, pager, 0, FALSE);
292 	} else {
293 		swp->sw_flags = 0;
294 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
295 	}
296 	pager->pg_handle = handle;
297 	pager->pg_ops = &swappagerops;
298 	pager->pg_type = PG_SWAP;
299 	pager->pg_data = (caddr_t)swp;
300 
301 	return(pager);
302 }
303 
304 /*
305  * returns disk block associated with pager and offset
306  * additionally, as a side effect returns a flag indicating
307  * if the block has been written
308  */
309 
310 static int *
311 swap_pager_diskaddr(swp, offset, valid)
312 	sw_pager_t swp;
313 	vm_offset_t offset;
314 	int *valid;
315 {
316 	register sw_blk_t swb;
317 	int ix;
318 
319 	if (valid)
320 		*valid = 0;
321 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
322 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
323 		return(FALSE);
324 	}
325 	swb = &swp->sw_blocks[ix];
326 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
327 	if (valid)
328 		*valid = swb->swb_valid & (1<<ix);
329 	return &swb->swb_block[ix];
330 }
331 
332 /*
333  * Utility routine to set the valid (written) bit for
334  * a block associated with a pager and offset
335  */
336 static void
337 swap_pager_setvalid(swp, offset, valid)
338 	sw_pager_t swp;
339 	vm_offset_t offset;
340 	int valid;
341 {
342 	register sw_blk_t swb;
343 	int ix;
344 
345 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
346 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
347 		return;
348 
349 	swb = &swp->sw_blocks[ix];
350 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
351 	if (valid)
352 		swb->swb_valid |= (1 << ix);
353 	else
354 		swb->swb_valid &= ~(1 << ix);
355 	return;
356 }
357 
358 /*
359  * this routine allocates swap space with a fragmentation
360  * minimization policy.
361  */
362 int
363 swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
364 #ifdef EXP
365 	unsigned tmpalloc;
366 	unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
367 	if( amount < nblocksfrag) {
368 		if( rlist_alloc(&swapfrag, amount, rtval))
369 			return 1;
370 		if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc))
371 			return 0;
372 		rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1);
373 		*rtval = tmpalloc;
374 		return 1;
375 	}
376 #endif
377 	if( !rlist_alloc(&swaplist, amount, rtval))
378 		return 0;
379 	else
380 		return 1;
381 }
382 
383 /*
384  * this routine frees swap space with a fragmentation
385  * minimization policy.
386  */
387 void
388 swap_pager_freeswapspace( unsigned from, unsigned to) {
389 #ifdef EXP
390 	unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
391 	unsigned tmpalloc;
392 	if( ((to + 1) - from) >= nblocksfrag) {
393 #endif
394 		rlist_free(&swaplist, from, to);
395 #ifdef EXP
396 		return;
397 	}
398 	rlist_free(&swapfrag, from, to);
399 	while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) {
400 		rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1);
401 	}
402 #endif
403 }
404 /*
405  * this routine frees swap blocks from a specified pager
406  */
407 void
408 _swap_pager_freespace(swp, start, size)
409 	sw_pager_t swp;
410 	vm_offset_t start;
411 	vm_offset_t size;
412 {
413 	vm_offset_t i;
414 	int s;
415 
416 	s = splbio();
417 	for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
418 		int valid;
419 		int *addr = swap_pager_diskaddr(swp, i, &valid);
420 		if (addr && *addr != SWB_EMPTY) {
421 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
422 			if( valid) {
423 				vm_swap_size += btodb(PAGE_SIZE);
424 				swap_pager_setvalid(swp, i, 0);
425 			}
426 			*addr = SWB_EMPTY;
427 		}
428 	}
429 	swapsizecheck();
430 	splx(s);
431 }
432 
433 void
434 swap_pager_freespace(pager, start, size)
435 	vm_pager_t pager;
436 	vm_offset_t start;
437 	vm_offset_t size;
438 {
439 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
440 }
441 
442 /*
443  * swap_pager_reclaim frees up over-allocated space from all pagers
444  * this eliminates internal fragmentation due to allocation of space
445  * for segments that are never swapped to. It has been written so that
446  * it does not block until the rlist_free operation occurs; it keeps
447  * the queues consistant.
448  */
449 
450 /*
451  * Maximum number of blocks (pages) to reclaim per pass
452  */
453 #define MAXRECLAIM 256
454 
455 void
456 swap_pager_reclaim()
457 {
458 	vm_pager_t p;
459 	sw_pager_t swp;
460 	int i, j, k;
461 	int s;
462 	int reclaimcount;
463 	static int reclaims[MAXRECLAIM];
464 	static int in_reclaim;
465 
466 /*
467  * allow only one process to be in the swap_pager_reclaim subroutine
468  */
469 	s = splbio();
470 	if (in_reclaim) {
471 		tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
472 		splx(s);
473 		return;
474 	}
475 	in_reclaim = 1;
476 	reclaimcount = 0;
477 
478 	/* for each pager queue */
479 	for (k = 0; swp_qs[k]; k++) {
480 
481 		p = swp_qs[k]->tqh_first;
482 		while (p && (reclaimcount < MAXRECLAIM)) {
483 
484 			/*
485 			 * see if any blocks associated with a pager has been
486 			 * allocated but not used (written)
487 			 */
488 			swp = (sw_pager_t) p->pg_data;
489 			for (i = 0; i < swp->sw_nblocks; i++) {
490 				sw_blk_t swb = &swp->sw_blocks[i];
491 				if( swb->swb_locked)
492 					continue;
493 				for (j = 0; j < SWB_NPAGES; j++) {
494 					if (swb->swb_block[j] != SWB_EMPTY &&
495 						(swb->swb_valid & (1 << j)) == 0) {
496 						reclaims[reclaimcount++] = swb->swb_block[j];
497 						swb->swb_block[j] = SWB_EMPTY;
498 						if (reclaimcount >= MAXRECLAIM)
499 							goto rfinished;
500 					}
501 				}
502 			}
503 			p = p->pg_list.tqe_next;
504 		}
505 	}
506 
507 rfinished:
508 
509 /*
510  * free the blocks that have been added to the reclaim list
511  */
512 	for (i = 0; i < reclaimcount; i++) {
513 		swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
514 		swapsizecheck();
515 		wakeup((caddr_t) &in_reclaim);
516 	}
517 
518 	splx(s);
519 	in_reclaim = 0;
520 	wakeup((caddr_t) &in_reclaim);
521 }
522 
523 
524 /*
525  * swap_pager_copy copies blocks from one pager to another and
526  * destroys the source pager
527  */
528 
529 void
530 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
531 	vm_pager_t srcpager;
532 	vm_offset_t srcoffset;
533 	vm_pager_t dstpager;
534 	vm_offset_t dstoffset;
535 	vm_offset_t offset;
536 {
537 	sw_pager_t srcswp, dstswp;
538 	vm_offset_t i;
539 	int s;
540 
541 	if( vm_swap_size == 0)
542 		return;
543 
544 	srcswp = (sw_pager_t) srcpager->pg_data;
545 	dstswp = (sw_pager_t) dstpager->pg_data;
546 
547 /*
548  * remove the source pager from the swap_pager internal queue
549  */
550 	s = splbio();
551 	if (srcswp->sw_flags & SW_NAMED) {
552 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
553 		srcswp->sw_flags &= ~SW_NAMED;
554 	} else {
555 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
556 	}
557 
558 	while (srcswp->sw_poip) {
559 		tsleep((caddr_t)srcswp, PVM, "spgout", 0);
560 	}
561 	splx(s);
562 
563 /*
564  * clean all of the pages that are currently active and finished
565  */
566 	(void) swap_pager_clean();
567 
568 	s = splbio();
569 /*
570  * clear source block before destination object
571  * (release allocated space)
572  */
573 	for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
574 		int valid;
575 		int *addr = swap_pager_diskaddr(srcswp, i, &valid);
576 		if (addr && *addr != SWB_EMPTY) {
577 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
578 			if( valid)
579 				vm_swap_size += btodb(PAGE_SIZE);
580 			swapsizecheck();
581 			*addr = SWB_EMPTY;
582 		}
583 	}
584 /*
585  * transfer source to destination
586  */
587 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
588 		int srcvalid, dstvalid;
589 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
590 			&srcvalid);
591 		int *dstaddrp;
592 	/*
593 	 * see if the source has space allocated
594 	 */
595 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
596 		/*
597 		 * if the source is valid and the dest has no space, then
598 		 * copy the allocation from the srouce to the dest.
599 		 */
600 			if (srcvalid) {
601 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
602 				/*
603 				 * if the dest already has a valid block, deallocate the
604 				 * source block without copying.
605 				 */
606 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
607 					swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
608 					*dstaddrp = SWB_EMPTY;
609 				}
610 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
611 					*dstaddrp = *srcaddrp;
612 					*srcaddrp = SWB_EMPTY;
613 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
614 					vm_swap_size -= btodb(PAGE_SIZE);
615 				}
616 			}
617 		/*
618 		 * if the source is not empty at this point, then deallocate the space.
619 		 */
620 			if (*srcaddrp != SWB_EMPTY) {
621 				swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
622 				if( srcvalid)
623 					vm_swap_size += btodb(PAGE_SIZE);
624 				*srcaddrp = SWB_EMPTY;
625 			}
626 		}
627 	}
628 
629 /*
630  * deallocate the rest of the source object
631  */
632 	for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
633 		int valid;
634 		int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
635 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
636 			swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
637 			if( valid)
638 				vm_swap_size += btodb(PAGE_SIZE);
639 			*srcaddrp = SWB_EMPTY;
640 		}
641 	}
642 
643 	swapsizecheck();
644 	splx(s);
645 
646 	free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
647 	srcswp->sw_blocks = 0;
648 	free((caddr_t)srcswp, M_VMPGDATA);
649 	srcpager->pg_data = 0;
650 	free((caddr_t)srcpager, M_VMPAGER);
651 
652 	return;
653 }
654 
655 
656 void
657 swap_pager_dealloc(pager)
658 	vm_pager_t pager;
659 {
660 	register int i,j;
661 	register sw_blk_t bp;
662 	register sw_pager_t swp;
663 	int s;
664 
665 	/*
666 	 * Remove from list right away so lookups will fail if we
667 	 * block for pageout completion.
668 	 */
669 	s = splbio();
670 	swp = (sw_pager_t) pager->pg_data;
671 	if (swp->sw_flags & SW_NAMED) {
672 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
673 		swp->sw_flags &= ~SW_NAMED;
674 	} else {
675 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
676 	}
677 	/*
678 	 * Wait for all pageouts to finish and remove
679 	 * all entries from cleaning list.
680 	 */
681 
682 	while (swp->sw_poip) {
683 		tsleep((caddr_t)swp, PVM, "swpout", 0);
684 	}
685 	splx(s);
686 
687 
688 	(void) swap_pager_clean();
689 
690 	/*
691 	 * Free left over swap blocks
692 	 */
693 	s = splbio();
694 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
695 		for (j = 0; j < SWB_NPAGES; j++)
696 		if (bp->swb_block[j] != SWB_EMPTY) {
697 			swap_pager_freeswapspace((unsigned)bp->swb_block[j],
698 				(unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
699 			if( bp->swb_valid & (1<<j))
700 				vm_swap_size += btodb(PAGE_SIZE);
701 			bp->swb_block[j] = SWB_EMPTY;
702 		}
703 	}
704 	splx(s);
705 	swapsizecheck();
706 
707 	/*
708 	 * Free swap management resources
709 	 */
710 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
711 	swp->sw_blocks = 0;
712 	free((caddr_t)swp, M_VMPGDATA);
713 	pager->pg_data = 0;
714 	free((caddr_t)pager, M_VMPAGER);
715 }
716 
717 /*
718  * swap_pager_getmulti can get multiple pages.
719  */
720 int
721 swap_pager_getmulti(pager, m, count, reqpage, sync)
722 	vm_pager_t pager;
723 	vm_page_t *m;
724 	int count;
725 	int reqpage;
726 	boolean_t sync;
727 {
728 	if( reqpage >= count)
729 		panic("swap_pager_getmulti: reqpage >= count\n");
730 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
731 }
732 
733 /*
734  * swap_pager_getpage gets individual pages
735  */
736 int
737 swap_pager_getpage(pager, m, sync)
738 	vm_pager_t pager;
739 	vm_page_t m;
740 	boolean_t sync;
741 {
742 	vm_page_t marray[1];
743 
744 	marray[0] = m;
745 	return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
746 }
747 
748 int
749 swap_pager_putmulti(pager, m, c, sync, rtvals)
750 	vm_pager_t pager;
751 	vm_page_t *m;
752 	int c;
753 	boolean_t sync;
754 	int *rtvals;
755 {
756 	int flags;
757 
758 	if (pager == NULL) {
759 		(void) swap_pager_clean();
760 		return VM_PAGER_OK;
761 	}
762 
763 	flags = B_WRITE;
764 	if (!sync)
765 		flags |= B_ASYNC;
766 
767 	return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
768 }
769 
770 /*
771  * swap_pager_putpage writes individual pages
772  */
773 int
774 swap_pager_putpage(pager, m, sync)
775 	vm_pager_t pager;
776 	vm_page_t m;
777 	boolean_t sync;
778 {
779 	int flags;
780 	vm_page_t marray[1];
781 	int rtvals[1];
782 
783 
784 	if (pager == NULL) {
785 		(void) swap_pager_clean();
786 		return VM_PAGER_OK;
787 	}
788 
789 	marray[0] = m;
790 	flags = B_WRITE;
791 	if (!sync)
792 		flags |= B_ASYNC;
793 
794 	swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
795 
796 	return rtvals[0];
797 }
798 
799 static inline int
800 const swap_pager_block_index(swp, offset)
801 	sw_pager_t swp;
802 	vm_offset_t offset;
803 {
804 	return (offset / (SWB_NPAGES*PAGE_SIZE));
805 }
806 
807 static inline int
808 const swap_pager_block_offset(swp, offset)
809 	sw_pager_t swp;
810 	vm_offset_t offset;
811 {
812 	return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
813 }
814 
815 /*
816  * _swap_pager_haspage returns TRUE if the pager has data that has
817  * been written out.
818  */
819 static boolean_t
820 _swap_pager_haspage(swp, offset)
821 	sw_pager_t swp;
822 	vm_offset_t offset;
823 {
824 	register sw_blk_t swb;
825 	int ix;
826 
827 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
828 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
829 		return(FALSE);
830 	}
831 	swb = &swp->sw_blocks[ix];
832 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
833 	if (swb->swb_block[ix] != SWB_EMPTY) {
834 		if (swb->swb_valid & (1 << ix))
835 			return TRUE;
836 	}
837 
838 	return(FALSE);
839 }
840 
841 /*
842  * swap_pager_haspage is the externally accessible version of
843  * _swap_pager_haspage above.  this routine takes a vm_pager_t
844  * for an argument instead of sw_pager_t.
845  */
846 boolean_t
847 swap_pager_haspage(pager, offset)
848 	vm_pager_t pager;
849 	vm_offset_t offset;
850 {
851 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
852 }
853 
854 /*
855  * swap_pager_freepage is a convienience routine that clears the busy
856  * bit and deallocates a page.
857  */
858 static void
859 swap_pager_freepage(m)
860 	vm_page_t m;
861 {
862 	PAGE_WAKEUP(m);
863 	vm_page_free(m);
864 }
865 
866 /*
867  * swap_pager_ridpages is a convienience routine that deallocates all
868  * but the required page.  this is usually used in error returns that
869  * need to invalidate the "extra" readahead pages.
870  */
871 static void
872 swap_pager_ridpages(m, count, reqpage)
873 	vm_page_t *m;
874 	int count;
875 	int reqpage;
876 {
877 	int i;
878 	for (i = 0; i < count; i++)
879 		if (i != reqpage)
880 			swap_pager_freepage(m[i]);
881 }
882 
883 int swapwritecount=0;
884 
885 /*
886  * swap_pager_iodone1 is the completion routine for both reads and async writes
887  */
888 void
889 swap_pager_iodone1(bp)
890 	struct buf *bp;
891 {
892 	bp->b_flags |= B_DONE;
893 	bp->b_flags &= ~B_ASYNC;
894 	wakeup((caddr_t)bp);
895 /*
896 	if ((bp->b_flags & B_READ) == 0)
897 		vwakeup(bp);
898 */
899 }
900 
901 
902 int
903 swap_pager_input(swp, m, count, reqpage)
904 	register sw_pager_t swp;
905 	vm_page_t *m;
906 	int count, reqpage;
907 {
908 	register struct buf *bp;
909 	sw_blk_t swb[count];
910 	register int s;
911 	int i;
912 	boolean_t rv;
913 	vm_offset_t kva, off[count];
914 	swp_clean_t spc;
915 	vm_offset_t paging_offset;
916 	vm_object_t object;
917 	int reqaddr[count];
918 
919 	int first, last;
920 	int failed;
921 	int reqdskregion;
922 
923 	object = m[reqpage]->object;
924 	paging_offset = object->paging_offset;
925 	/*
926 	 * First determine if the page exists in the pager if this is
927 	 * a sync read.  This quickly handles cases where we are
928 	 * following shadow chains looking for the top level object
929 	 * with the page.
930 	 */
931 	if (swp->sw_blocks == NULL) {
932 		swap_pager_ridpages(m, count, reqpage);
933 		return(VM_PAGER_FAIL);
934 	}
935 
936 	for(i = 0; i < count; i++) {
937 		vm_offset_t foff = m[i]->offset + paging_offset;
938 		int ix = swap_pager_block_index(swp, foff);
939 		if (ix >= swp->sw_nblocks) {
940 			int j;
941 			if( i <= reqpage) {
942 				swap_pager_ridpages(m, count, reqpage);
943 				return(VM_PAGER_FAIL);
944 			}
945 			for(j = i; j < count; j++) {
946 				swap_pager_freepage(m[j]);
947 			}
948 			count = i;
949 			break;
950 		}
951 
952 		swb[i] = &swp->sw_blocks[ix];
953 		off[i] = swap_pager_block_offset(swp, foff);
954 		reqaddr[i] = swb[i]->swb_block[off[i]];
955 	}
956 
957 	/* make sure that our required input request is existant */
958 
959 	if (reqaddr[reqpage] == SWB_EMPTY ||
960 		(swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
961 		swap_pager_ridpages(m, count, reqpage);
962 		return(VM_PAGER_FAIL);
963 	}
964 
965 
966 	reqdskregion = reqaddr[reqpage] / dmmax;
967 
968 	/*
969 	 * search backwards for the first contiguous page to transfer
970 	 */
971 	failed = 0;
972 	first = 0;
973 	for (i = reqpage - 1; i >= 0; --i) {
974 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
975 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
976 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
977 			((reqaddr[i] / dmmax) != reqdskregion)) {
978 				failed = 1;
979 				swap_pager_freepage(m[i]);
980 				if (first == 0)
981 					first = i + 1;
982 		}
983 	}
984 	/*
985 	 * search forwards for the last contiguous page to transfer
986 	 */
987 	failed = 0;
988 	last = count;
989 	for (i = reqpage + 1; i < count; i++) {
990 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
991 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
992 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
993 			((reqaddr[i] / dmmax) != reqdskregion)) {
994 				failed = 1;
995 				swap_pager_freepage(m[i]);
996 				if (last == count)
997 					last = i;
998 		}
999 	}
1000 
1001 	count = last;
1002 	if (first != 0) {
1003 		for (i = first; i < count; i++) {
1004 			m[i-first] = m[i];
1005 			reqaddr[i-first] = reqaddr[i];
1006 			off[i-first] = off[i];
1007 		}
1008 		count -= first;
1009 		reqpage -= first;
1010 	}
1011 
1012 	++swb[reqpage]->swb_locked;
1013 
1014 	/*
1015 	 * at this point:
1016 	 * "m" is a pointer to the array of vm_page_t for paging I/O
1017 	 * "count" is the number of vm_page_t entries represented by "m"
1018 	 * "object" is the vm_object_t for I/O
1019 	 * "reqpage" is the index into "m" for the page actually faulted
1020 	 */
1021 
1022 	spc = NULL;	/* we might not use an spc data structure */
1023 
1024 	if (count == 1) {
1025 		/*
1026 		 * if a kva has not been allocated, we can only do a one page transfer,
1027 		 * so we free the other pages that might have been allocated by
1028 		 * vm_fault.
1029 		 */
1030 		swap_pager_ridpages(m, count, reqpage);
1031 		m[0] = m[reqpage];
1032 		reqaddr[0] = reqaddr[reqpage];
1033 
1034 		count = 1;
1035 		reqpage = 0;
1036 	/*
1037 	 * get a swap pager clean data structure, block until we get it
1038 	 */
1039 		if (swap_pager_free.tqh_first == NULL) {
1040 			s = splbio();
1041 			if( curproc == pageproc)
1042 				(void) swap_pager_clean();
1043 			else
1044 				wakeup((caddr_t) &vm_pages_needed);
1045 			while (swap_pager_free.tqh_first == NULL) {
1046 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1047 				tsleep((caddr_t)&swap_pager_free,
1048 					PVM, "swpfre", 0);
1049 				if( curproc == pageproc)
1050 					(void) swap_pager_clean();
1051 				else
1052 					wakeup((caddr_t) &vm_pages_needed);
1053 			}
1054 			splx(s);
1055 		}
1056 		spc = swap_pager_free.tqh_first;
1057 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1058 		kva = spc->spc_kva;
1059 		bp = spc->spc_bp;
1060 		bzero(bp, sizeof *bp);
1061 		bp->b_spc = spc;
1062 	} else {
1063 	/*
1064 	 * Get a swap buffer header to perform the IO
1065 	 */
1066 		bp = getpbuf();
1067 		kva = (vm_offset_t) bp->b_data;
1068 	}
1069 
1070 	/*
1071 	 * map our page(s) into kva for input
1072 	 */
1073 	pmap_qenter( kva, m, count);
1074 
1075 	s = splbio();
1076 	bp->b_flags = B_BUSY | B_READ | B_CALL;
1077 	bp->b_iodone = swap_pager_iodone1;
1078 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1079 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1080 	crhold(bp->b_rcred);
1081 	crhold(bp->b_wcred);
1082 	bp->b_un.b_addr = (caddr_t) kva;
1083 	bp->b_blkno = reqaddr[0];
1084 	bp->b_bcount = PAGE_SIZE*count;
1085 	bp->b_bufsize = PAGE_SIZE*count;
1086 
1087 	bgetvp( swapdev_vp, bp);
1088 
1089 	swp->sw_piip++;
1090 
1091 	cnt.v_swapin++;
1092 	cnt.v_swappgsin += count;
1093 	/*
1094 	 * perform the I/O
1095 	 */
1096 	VOP_STRATEGY(bp);
1097 
1098 	/*
1099 	 * wait for the sync I/O to complete
1100 	 */
1101 	while ((bp->b_flags & B_DONE) == 0) {
1102 		tsleep((caddr_t)bp, PVM, "swread", 0);
1103 	}
1104 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
1105 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1106 
1107 	--swp->sw_piip;
1108 	if (swp->sw_piip == 0)
1109 		wakeup((caddr_t) swp);
1110 
1111 	/*
1112 	 * relpbuf does this, but we maintain our own buffer
1113 	 * list also...
1114 	 */
1115 	if (bp->b_vp)
1116 		brelvp(bp);
1117 
1118 	splx(s);
1119 	--swb[reqpage]->swb_locked;
1120 
1121 	/*
1122 	 * remove the mapping for kernel virtual
1123 	 */
1124 	pmap_qremove( kva, count);
1125 
1126 	if (spc) {
1127 		/*
1128 		 * if we have used an spc, we need to free it.
1129 		 */
1130 		if( bp->b_rcred != NOCRED)
1131 			crfree(bp->b_rcred);
1132 		if( bp->b_wcred != NOCRED)
1133 			crfree(bp->b_wcred);
1134 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1135 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1136 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1137 			wakeup((caddr_t)&swap_pager_free);
1138 		}
1139 	} else {
1140 		/*
1141 		 * release the physical I/O buffer
1142 		 */
1143 		relpbuf(bp);
1144 		/*
1145 		 * finish up input if everything is ok
1146 		 */
1147 		if( rv == VM_PAGER_OK) {
1148 			for (i = 0; i < count; i++) {
1149 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1150 				m[i]->flags |= PG_CLEAN;
1151 				m[i]->flags &= ~PG_LAUNDRY;
1152 				if (i != reqpage) {
1153 					/*
1154 					 * whether or not to leave the page activated
1155 					 * is up in the air, but we should put the page
1156 					 * on a page queue somewhere. (it already is in
1157 					 * the object).
1158 					 * After some emperical results, it is best
1159 					 * to deactivate the readahead pages.
1160 					 */
1161 					vm_page_deactivate(m[i]);
1162 
1163 					/*
1164 					 * just in case someone was asking for this
1165 					 * page we now tell them that it is ok to use
1166 					 */
1167 					m[i]->flags &= ~PG_FAKE;
1168 					PAGE_WAKEUP(m[i]);
1169 				}
1170 			}
1171 			/*
1172 			 * If we're out of swap space, then attempt to free
1173 			 * some whenever pages are brought in. We must clear
1174 			 * the clean flag so that the page contents will be
1175 			 * preserved.
1176 			 */
1177 			if (swap_pager_full) {
1178 				for (i = 0; i < count; i++) {
1179 					m[i]->flags &= ~PG_CLEAN;
1180 				}
1181 				_swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
1182 			}
1183 		} else {
1184 			swap_pager_ridpages(m, count, reqpage);
1185 		}
1186 	}
1187 	return(rv);
1188 }
1189 
1190 int
1191 swap_pager_output(swp, m, count, flags, rtvals)
1192 	register sw_pager_t swp;
1193 	vm_page_t *m;
1194 	int count;
1195 	int flags;
1196 	int *rtvals;
1197 {
1198 	register struct buf *bp;
1199 	sw_blk_t swb[count];
1200 	register int s;
1201 	int i, j, ix;
1202 	boolean_t rv;
1203 	vm_offset_t kva, off, foff;
1204 	swp_clean_t spc;
1205 	vm_offset_t paging_offset;
1206 	vm_object_t object;
1207 	int reqaddr[count];
1208 	int failed;
1209 
1210 /*
1211 	if( count > 1)
1212 		printf("off: 0x%x, count: %d\n", m[0]->offset, count);
1213 */
1214 	if( vm_swap_size == 0) {
1215 		for(i=0;i<count;i++)
1216 			rtvals[i] = VM_PAGER_FAIL;
1217 		return VM_PAGER_FAIL;
1218 	}
1219 
1220 	spc = NULL;
1221 
1222 	object = m[0]->object;
1223 	paging_offset = object->paging_offset;
1224 
1225 	failed = 0;
1226 	for(j=0;j<count;j++) {
1227 		foff = m[j]->offset + paging_offset;
1228 		ix = swap_pager_block_index(swp, foff);
1229 		swb[j] = 0;
1230 		if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
1231 			rtvals[j] = VM_PAGER_FAIL;
1232 			failed = 1;
1233 			continue;
1234 		} else {
1235 			rtvals[j] = VM_PAGER_OK;
1236 		}
1237 		swb[j] = &swp->sw_blocks[ix];
1238 		++swb[j]->swb_locked;
1239 		if( failed) {
1240 			rtvals[j] = VM_PAGER_FAIL;
1241 			continue;
1242 		}
1243 		off = swap_pager_block_offset(swp, foff);
1244 		reqaddr[j] = swb[j]->swb_block[off];
1245 		if( reqaddr[j] == SWB_EMPTY) {
1246 			int blk;
1247 			int tries;
1248 			int ntoget;
1249 			tries = 0;
1250 			s = splbio();
1251 
1252 			/*
1253 			 * if any other pages have been allocated in this block, we
1254 			 * only try to get one page.
1255 			 */
1256 			for (i = 0; i < SWB_NPAGES; i++) {
1257 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1258 					break;
1259 			}
1260 
1261 
1262 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1263 			/*
1264 			 * this code is alittle conservative, but works
1265 			 * (the intent of this code is to allocate small chunks
1266 			 *  for small objects)
1267 			 */
1268 			if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
1269 				ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
1270 			}
1271 
1272 retrygetspace:
1273 			if (!swap_pager_full && ntoget > 1 &&
1274 				swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
1275 
1276 				for (i = 0; i < ntoget; i++) {
1277 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1278 					swb[j]->swb_valid = 0;
1279 				}
1280 
1281 				reqaddr[j] = swb[j]->swb_block[off];
1282 			} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
1283 				&swb[j]->swb_block[off])) {
1284 				/*
1285 				 * if the allocation has failed, we try to reclaim space and
1286 				 * retry.
1287 				 */
1288 				if (++tries == 1) {
1289 					swap_pager_reclaim();
1290 					goto retrygetspace;
1291 				}
1292 				rtvals[j] = VM_PAGER_AGAIN;
1293 				failed = 1;
1294 			} else {
1295 				reqaddr[j] = swb[j]->swb_block[off];
1296 				swb[j]->swb_valid &= ~(1<<off);
1297 			}
1298 			splx(s);
1299 		}
1300 	}
1301 
1302 	/*
1303 	 * search forwards for the last contiguous page to transfer
1304 	 */
1305 	failed = 0;
1306 	for (i = 0; i < count; i++) {
1307 		if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
1308 			(reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1309 			(rtvals[i] != VM_PAGER_OK)) {
1310 			failed = 1;
1311 			if( rtvals[i] == VM_PAGER_OK)
1312 				rtvals[i] = VM_PAGER_AGAIN;
1313 		}
1314 	}
1315 
1316 	for(i = 0; i < count; i++) {
1317 		if( rtvals[i] != VM_PAGER_OK) {
1318 			if( swb[i])
1319 				--swb[i]->swb_locked;
1320 		}
1321 	}
1322 
1323 	for(i = 0; i < count; i++)
1324 		if( rtvals[i] != VM_PAGER_OK)
1325 			break;
1326 
1327 	if( i == 0) {
1328 		return VM_PAGER_AGAIN;
1329 	}
1330 
1331 	count = i;
1332 	for(i=0;i<count;i++) {
1333 		if( reqaddr[i] == SWB_EMPTY)
1334 			printf("I/O to empty block????\n");
1335 	}
1336 
1337 	/*
1338 	 */
1339 
1340 	/*
1341 	 * For synchronous writes, we clean up
1342 	 * all completed async pageouts.
1343 	 */
1344 	if ((flags & B_ASYNC) == 0) {
1345 		swap_pager_clean();
1346 	}
1347 
1348 	kva = 0;
1349 
1350 	/*
1351 	 * we allocate a new kva for transfers > 1 page
1352 	 * but for transfers == 1 page, the swap_pager_free list contains
1353 	 * entries that have pre-allocated kva's (for efficiency).
1354 	 * NOTE -- we do not use the physical buffer pool or the
1355 	 * preallocated associated kva's because of the potential for
1356 	 * deadlock.  This is very subtile -- but deadlocks or resource
1357 	 * contention must be avoided on pageouts -- or your system will
1358 	 * sleep (forever) !!!
1359 	 */
1360 /*
1361 	if ( count > 1) {
1362 		kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
1363 		if( !kva) {
1364 			for (i = 0; i < count; i++) {
1365 				if( swb[i])
1366 					--swb[i]->swb_locked;
1367 				rtvals[i] = VM_PAGER_AGAIN;
1368 			}
1369 			return VM_PAGER_AGAIN;
1370 		}
1371 	}
1372 */
1373 
1374 	/*
1375 	 * get a swap pager clean data structure, block until we get it
1376 	 */
1377 	if (swap_pager_free.tqh_first == NULL) {
1378 		s = splbio();
1379 		if( curproc == pageproc)
1380 			(void) swap_pager_clean();
1381 		else
1382 			wakeup((caddr_t) &vm_pages_needed);
1383 		while (swap_pager_free.tqh_first == NULL) {
1384 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1385 			tsleep((caddr_t)&swap_pager_free,
1386 				PVM, "swpfre", 0);
1387 			if( curproc == pageproc)
1388 				(void) swap_pager_clean();
1389 			else
1390 				wakeup((caddr_t) &vm_pages_needed);
1391 		}
1392 		splx(s);
1393 	}
1394 
1395 	spc = swap_pager_free.tqh_first;
1396 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1397 
1398 	kva = spc->spc_kva;
1399 
1400 	/*
1401 	 * map our page(s) into kva for I/O
1402 	 */
1403 	pmap_qenter(kva, m, count);
1404 
1405 	/*
1406 	 * get the base I/O offset into the swap file
1407 	 */
1408 	for(i=0;i<count;i++) {
1409 		foff = m[i]->offset + paging_offset;
1410 		off = swap_pager_block_offset(swp, foff);
1411 		/*
1412 		 * if we are setting the valid bit anew,
1413 		 * then diminish the swap free space
1414 		 */
1415 		if( (swb[i]->swb_valid & (1 << off)) == 0)
1416 			vm_swap_size -= btodb(PAGE_SIZE);
1417 
1418 		/*
1419 		 * set the valid bit
1420 		 */
1421 		swb[i]->swb_valid |= (1 << off);
1422 		/*
1423 		 * and unlock the data structure
1424 		 */
1425 		--swb[i]->swb_locked;
1426 	}
1427 
1428 	s = splbio();
1429 	/*
1430 	 * Get a swap buffer header and perform the IO
1431 	 */
1432 	bp = spc->spc_bp;
1433 	bzero(bp, sizeof *bp);
1434 	bp->b_spc = spc;
1435 
1436 	bp->b_flags = B_BUSY;
1437 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1438 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1439 	if( bp->b_rcred != NOCRED)
1440 		crhold(bp->b_rcred);
1441 	if( bp->b_wcred != NOCRED)
1442 		crhold(bp->b_wcred);
1443 	bp->b_data = (caddr_t) kva;
1444 	bp->b_blkno = reqaddr[0];
1445 	bgetvp( swapdev_vp, bp);
1446 
1447 	bp->b_bcount = PAGE_SIZE*count;
1448 	bp->b_bufsize = PAGE_SIZE*count;
1449 	swapdev_vp->v_numoutput++;
1450 
1451 	/*
1452 	 * If this is an async write we set up additional buffer fields
1453 	 * and place a "cleaning" entry on the inuse queue.
1454 	 */
1455 	if ( flags & B_ASYNC ) {
1456 		spc->spc_flags = 0;
1457 		spc->spc_swp = swp;
1458 		for(i=0;i<count;i++)
1459 			spc->spc_m[i] = m[i];
1460 		spc->spc_count = count;
1461 		/*
1462 		 * the completion routine for async writes
1463 		 */
1464 		bp->b_flags |= B_CALL;
1465 		bp->b_iodone = swap_pager_iodone;
1466 		bp->b_dirtyoff = 0;
1467 		bp->b_dirtyend = bp->b_bcount;
1468 		swp->sw_poip++;
1469 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1470 	} else {
1471 		swp->sw_poip++;
1472 		bp->b_flags |= B_CALL;
1473 		bp->b_iodone = swap_pager_iodone1;
1474 	}
1475 
1476 	cnt.v_swapout++;
1477 	cnt.v_swappgsout += count;
1478 	/*
1479 	 * perform the I/O
1480 	 */
1481 	VOP_STRATEGY(bp);
1482 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
1483 		if ((bp->b_flags & B_DONE) == B_DONE) {
1484 			swap_pager_clean();
1485 		}
1486 		splx(s);
1487 		for(i=0;i<count;i++) {
1488 			rtvals[i] = VM_PAGER_PEND;
1489 		}
1490 		return VM_PAGER_PEND;
1491 	}
1492 
1493 	/*
1494 	 * wait for the sync I/O to complete
1495 	 */
1496 	while ((bp->b_flags & B_DONE) == 0) {
1497 		tsleep((caddr_t)bp, PVM, "swwrt", 0);
1498 	}
1499 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
1500 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1501 
1502 	--swp->sw_poip;
1503 	if (swp->sw_poip == 0)
1504 		wakeup((caddr_t) swp);
1505 
1506 	if (bp->b_vp)
1507 		brelvp(bp);
1508 
1509 	splx(s);
1510 
1511 	/*
1512 	 * remove the mapping for kernel virtual
1513 	 */
1514 	pmap_qremove( kva, count);
1515 
1516 	/*
1517 	 * if we have written the page, then indicate that the page
1518 	 * is clean.
1519 	 */
1520 	if (rv == VM_PAGER_OK) {
1521 		for(i=0;i<count;i++) {
1522 			if( rtvals[i] == VM_PAGER_OK) {
1523 				m[i]->flags |= PG_CLEAN;
1524 				m[i]->flags &= ~PG_LAUNDRY;
1525 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1526 				/*
1527 				 * optimization, if a page has been read during the
1528 				 * pageout process, we activate it.
1529 				 */
1530 				if ( (m[i]->flags & PG_ACTIVE) == 0 &&
1531 					pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
1532 					vm_page_activate(m[i]);
1533 			}
1534 		}
1535 	} else {
1536 		for(i=0;i<count;i++) {
1537 			rtvals[i] = rv;
1538 			m[i]->flags |= PG_LAUNDRY;
1539 		}
1540 	}
1541 
1542 	if( bp->b_rcred != NOCRED)
1543 		crfree(bp->b_rcred);
1544 	if( bp->b_wcred != NOCRED)
1545 		crfree(bp->b_wcred);
1546 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1547 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1548 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1549 		wakeup((caddr_t)&swap_pager_free);
1550 	}
1551 
1552 	return(rv);
1553 }
1554 
1555 boolean_t
1556 swap_pager_clean()
1557 {
1558 	register swp_clean_t spc, tspc;
1559 	register int s;
1560 
1561 	tspc = NULL;
1562 	if (swap_pager_done.tqh_first == NULL)
1563 		return FALSE;
1564 	for (;;) {
1565 		s = splbio();
1566 		/*
1567 		 * Look up and removal from done list must be done
1568 		 * at splbio() to avoid conflicts with swap_pager_iodone.
1569 		 */
1570 		while ((spc = swap_pager_done.tqh_first) != 0) {
1571 			pmap_qremove( spc->spc_kva, spc->spc_count);
1572 			swap_pager_finish(spc);
1573 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1574 			goto doclean;
1575 		}
1576 
1577 		/*
1578 		 * No operations done, thats all we can do for now.
1579 		 */
1580 
1581 		splx(s);
1582 		break;
1583 
1584 		/*
1585 		 * The desired page was found to be busy earlier in
1586 		 * the scan but has since completed.
1587 		 */
1588 doclean:
1589 		if (tspc && tspc == spc) {
1590 			tspc = NULL;
1591 		}
1592 		spc->spc_flags = 0;
1593 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1594 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1595 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1596 			wakeup((caddr_t)&swap_pager_free);
1597 		}
1598 		++cleandone;
1599 		splx(s);
1600 	}
1601 
1602 	return(tspc ? TRUE : FALSE);
1603 }
1604 
1605 void
1606 swap_pager_finish(spc)
1607 	register swp_clean_t spc;
1608 {
1609 	vm_object_t object = spc->spc_m[0]->object;
1610 	int i;
1611 
1612 	if ((object->paging_in_progress -= spc->spc_count) == 0)
1613 		thread_wakeup((int) object);
1614 
1615 	/*
1616 	 * If no error mark as clean and inform the pmap system.
1617 	 * If error, mark as dirty so we will try again.
1618 	 * (XXX could get stuck doing this, should give up after awhile)
1619 	 */
1620 	if (spc->spc_flags & SPC_ERROR) {
1621 		for(i=0;i<spc->spc_count;i++) {
1622 			printf("swap_pager_finish: clean of page %lx failed\n",
1623 			       (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i]));
1624 			spc->spc_m[i]->flags |= PG_LAUNDRY;
1625 		}
1626 	} else {
1627 		for(i=0;i<spc->spc_count;i++) {
1628 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1629 			spc->spc_m[i]->flags |= PG_CLEAN;
1630 		}
1631 	}
1632 
1633 
1634 	for(i=0;i<spc->spc_count;i++) {
1635 		/*
1636 		 * we wakeup any processes that are waiting on
1637 		 * these pages.
1638 		 */
1639 		PAGE_WAKEUP(spc->spc_m[i]);
1640 	}
1641 	nswiodone -= spc->spc_count;
1642 
1643 	return;
1644 }
1645 
1646 /*
1647  * swap_pager_iodone
1648  */
1649 void
1650 swap_pager_iodone(bp)
1651 	register struct buf *bp;
1652 {
1653 	register swp_clean_t spc;
1654 	int s;
1655 
1656 	s = splbio();
1657 	spc = (swp_clean_t) bp->b_spc;
1658 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1659 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1660 	if (bp->b_flags & B_ERROR) {
1661 		spc->spc_flags |= SPC_ERROR;
1662 		printf("error %d blkno %lu sz %ld ",
1663 			bp->b_error, (u_long)bp->b_blkno, bp->b_bcount);
1664 	}
1665 
1666 /*
1667 	if ((bp->b_flags & B_READ) == 0)
1668 		vwakeup(bp);
1669 */
1670 
1671 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
1672 	if (bp->b_vp) {
1673 		brelvp(bp);
1674 	}
1675 	if( bp->b_rcred != NOCRED)
1676 		crfree(bp->b_rcred);
1677 	if( bp->b_wcred != NOCRED)
1678 		crfree(bp->b_wcred);
1679 
1680 	nswiodone += spc->spc_count;
1681 	if (--spc->spc_swp->sw_poip == 0) {
1682 		wakeup((caddr_t)spc->spc_swp);
1683 	}
1684 
1685 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1686 	    swap_pager_inuse.tqh_first == 0) {
1687 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1688 		wakeup((caddr_t)&swap_pager_free);
1689 		wakeup((caddr_t)&vm_pages_needed);
1690 	}
1691 
1692 	if (vm_pageout_pages_needed) {
1693 		wakeup((caddr_t)&vm_pageout_pages_needed);
1694 	}
1695 
1696 	if ((swap_pager_inuse.tqh_first == NULL) ||
1697 	    (cnt.v_free_count < cnt.v_free_min &&
1698 	    nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
1699 		wakeup((caddr_t)&vm_pages_needed);
1700 	}
1701 	splx(s);
1702 }
1703 
1704 /*
1705  * return true if any swap control structures can be allocated
1706  */
1707 int
1708 swap_pager_ready() {
1709 	if( swap_pager_free.tqh_first)
1710 		return 1;
1711 	else
1712 		return 0;
1713 }
1714