xref: /freebsd/sys/vm/swap_pager.c (revision fff93ab600c4b237cd3fddfe5e736d24d77530d0)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.7 1994/08/18 22:36:00 wollman Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/proc.h>
55 #include <sys/buf.h>
56 #include <sys/vnode.h>
57 #include <sys/malloc.h>
58 
59 #include <miscfs/specfs/specdev.h>
60 #include <sys/rlist.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_pager.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/swap_pager.h>
67 
68 #ifndef NPENDINGIO
69 #define NPENDINGIO	16
70 #endif
71 
72 int nswiodone;
73 extern int vm_pageout_rate_limit;
74 static int cleandone;
75 extern int hz;
76 int swap_pager_full;
77 extern vm_map_t pager_map;
78 extern int vm_swap_size;
79 
80 #define MAX_PAGEOUT_CLUSTER 8
81 
82 TAILQ_HEAD(swpclean, swpagerclean);
83 
84 typedef	struct swpagerclean	*swp_clean_t;
85 
86 struct swpagerclean {
87 	TAILQ_ENTRY(swpagerclean)	spc_list;
88 	int				spc_flags;
89 	struct buf			*spc_bp;
90 	sw_pager_t			spc_swp;
91 	vm_offset_t			spc_kva;
92 	int				spc_count;
93 	vm_page_t			spc_m[MAX_PAGEOUT_CLUSTER];
94 } swcleanlist [NPENDINGIO] ;
95 
96 
97 extern vm_map_t kernel_map;
98 
99 /* spc_flags values */
100 #define SPC_ERROR	0x01
101 
102 #define SWB_EMPTY (-1)
103 
104 struct swpclean swap_pager_done;	/* list of compileted page cleans */
105 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
106 struct swpclean swap_pager_free;	/* list of free pager clean structs */
107 struct pagerlst swap_pager_list;	/* list of "named" anon regions */
108 struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
109 
110 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
111 int swap_pager_needflags;
112 struct rlist *swapfrag;
113 
114 struct pagerlst *swp_qs[]={
115 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
116 };
117 
118 int swap_pager_putmulti();
119 
120 struct pagerops swappagerops = {
121 	swap_pager_init,
122 	swap_pager_alloc,
123 	swap_pager_dealloc,
124 	swap_pager_getpage,
125 	swap_pager_getmulti,
126 	swap_pager_putpage,
127 	swap_pager_putmulti,
128 	swap_pager_haspage
129 };
130 
131 int npendingio = NPENDINGIO;
132 int pendingiowait;
133 int require_swap_init;
134 void swap_pager_finish();
135 int dmmin, dmmax;
136 extern int vm_page_count;
137 
138 struct buf * getpbuf() ;
139 void relpbuf(struct buf *bp) ;
140 
141 static inline void swapsizecheck() {
142 	if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
143 		if( swap_pager_full)
144 			printf("swap_pager: out of space\n");
145 		swap_pager_full = 1;
146 	} else if( vm_swap_size > 192*btodb(PAGE_SIZE))
147 		swap_pager_full = 0;
148 }
149 
150 void
151 swap_pager_init()
152 {
153 	dfltpagerops = &swappagerops;
154 
155 	TAILQ_INIT(&swap_pager_list);
156 	TAILQ_INIT(&swap_pager_un_list);
157 
158 	/*
159 	 * Initialize clean lists
160 	 */
161 	TAILQ_INIT(&swap_pager_inuse);
162 	TAILQ_INIT(&swap_pager_done);
163 	TAILQ_INIT(&swap_pager_free);
164 
165 	require_swap_init = 1;
166 
167 	/*
168 	 * Calculate the swap allocation constants.
169 	 */
170 
171 	dmmin = CLBYTES/DEV_BSIZE;
172 	dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
173 
174 }
175 
176 /*
177  * Allocate a pager structure and associated resources.
178  * Note that if we are called from the pageout daemon (handle == NULL)
179  * we should not wait for memory as it could resulting in deadlock.
180  */
181 vm_pager_t
182 swap_pager_alloc(handle, size, prot, offset)
183 	caddr_t handle;
184 	register vm_size_t size;
185 	vm_prot_t prot;
186 	vm_offset_t offset;
187 {
188 	register vm_pager_t pager;
189 	register sw_pager_t swp;
190 	int waitok;
191 	int i,j;
192 
193 	if (require_swap_init) {
194 		swp_clean_t spc;
195 		struct buf *bp;
196 		/*
197 		 * kva's are allocated here so that we dont need to keep
198 		 * doing kmem_alloc pageables at runtime
199 		 */
200 		for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
201 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER);
202 			if (!spc->spc_kva) {
203 				break;
204 			}
205 			spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
206 			if (!spc->spc_bp) {
207 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
208 				break;
209 			}
210 			spc->spc_flags = 0;
211 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
212 		}
213 		require_swap_init = 0;
214 		if( size == 0)
215 			return(NULL);
216 	}
217 
218 	/*
219 	 * If this is a "named" anonymous region, look it up and
220 	 * return the appropriate pager if it exists.
221 	 */
222 	if (handle) {
223 		pager = vm_pager_lookup(&swap_pager_list, handle);
224 		if (pager != NULL) {
225 			/*
226 			 * Use vm_object_lookup to gain a reference
227 			 * to the object and also to remove from the
228 			 * object cache.
229 			 */
230 			if (vm_object_lookup(pager) == NULL)
231 				panic("swap_pager_alloc: bad object");
232 			return(pager);
233 		}
234 	}
235 
236 	if (swap_pager_full) {
237 		return(NULL);
238 	}
239 
240 	/*
241 	 * Pager doesn't exist, allocate swap management resources
242 	 * and initialize.
243 	 */
244 	waitok = handle ? M_WAITOK : M_NOWAIT;
245 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
246 	if (pager == NULL)
247 		return(NULL);
248 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
249 	if (swp == NULL) {
250 		free((caddr_t)pager, M_VMPAGER);
251 		return(NULL);
252 	}
253 	size = round_page(size);
254 	swp->sw_osize = size;
255 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
256 	swp->sw_blocks = (sw_blk_t)
257 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
258 		       M_VMPGDATA, waitok);
259 	if (swp->sw_blocks == NULL) {
260 		free((caddr_t)swp, M_VMPGDATA);
261 		free((caddr_t)pager, M_VMPAGER);
262 		return(NULL);
263 	}
264 
265 	for (i = 0; i < swp->sw_nblocks; i++) {
266 		swp->sw_blocks[i].swb_valid = 0;
267 		swp->sw_blocks[i].swb_locked = 0;
268 		for (j = 0; j < SWB_NPAGES; j++)
269 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
270 	}
271 
272 	swp->sw_poip = 0;
273 	if (handle) {
274 		vm_object_t object;
275 
276 		swp->sw_flags = SW_NAMED;
277 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
278 		/*
279 		 * Consistant with other pagers: return with object
280 		 * referenced.  Can't do this with handle == NULL
281 		 * since it might be the pageout daemon calling.
282 		 */
283 		object = vm_object_allocate(size);
284 		vm_object_enter(object, pager);
285 		vm_object_setpager(object, pager, 0, FALSE);
286 	} else {
287 		swp->sw_flags = 0;
288 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
289 	}
290 	pager->pg_handle = handle;
291 	pager->pg_ops = &swappagerops;
292 	pager->pg_type = PG_SWAP;
293 	pager->pg_data = (caddr_t)swp;
294 
295 	return(pager);
296 }
297 
298 /*
299  * returns disk block associated with pager and offset
300  * additionally, as a side effect returns a flag indicating
301  * if the block has been written
302  */
303 
304 static int *
305 swap_pager_diskaddr(swp, offset, valid)
306 	sw_pager_t swp;
307 	vm_offset_t offset;
308 	int *valid;
309 {
310 	register sw_blk_t swb;
311 	int ix;
312 
313 	if (valid)
314 		*valid = 0;
315 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
316 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
317 		return(FALSE);
318 	}
319 	swb = &swp->sw_blocks[ix];
320 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
321 	if (valid)
322 		*valid = swb->swb_valid & (1<<ix);
323 	return &swb->swb_block[ix];
324 }
325 
326 /*
327  * Utility routine to set the valid (written) bit for
328  * a block associated with a pager and offset
329  */
330 static void
331 swap_pager_setvalid(swp, offset, valid)
332 	sw_pager_t swp;
333 	vm_offset_t offset;
334 	int valid;
335 {
336 	register sw_blk_t swb;
337 	int ix;
338 
339 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
340 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
341 		return;
342 
343 	swb = &swp->sw_blocks[ix];
344 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
345 	if (valid)
346 		swb->swb_valid |= (1 << ix);
347 	else
348 		swb->swb_valid &= ~(1 << ix);
349 	return;
350 }
351 
352 /*
353  * this routine allocates swap space with a fragmentation
354  * minimization policy.
355  */
356 int
357 swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
358 	unsigned tmpalloc;
359 	unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
360 	if( amount < nblocksfrag) {
361 		if( rlist_alloc(&swapfrag, amount, rtval))
362 			return 1;
363 		if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc))
364 			return 0;
365 		rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1);
366 		*rtval = tmpalloc;
367 		return 1;
368 	}
369 	if( !rlist_alloc(&swapmap, amount, rtval))
370 		return 0;
371 	else
372 		return 1;
373 }
374 
375 /*
376  * this routine frees swap space with a fragmentation
377  * minimization policy.
378  */
379 void
380 swap_pager_freeswapspace( unsigned from, unsigned to) {
381 	unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
382 	unsigned tmpalloc;
383 	if( ((to + 1) - from) >= nblocksfrag) {
384 		while( (from + nblocksfrag) <= to + 1) {
385 			rlist_free(&swapmap, from, from + nblocksfrag - 1);
386 			from += nblocksfrag;
387 		}
388 	}
389 	if( from >= to)
390 		return;
391 	rlist_free(&swapfrag, from, to);
392 	while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) {
393 		rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1);
394 	}
395 }
396 /*
397  * this routine frees swap blocks from a specified pager
398  */
399 void
400 _swap_pager_freespace(swp, start, size)
401 	sw_pager_t swp;
402 	vm_offset_t start;
403 	vm_offset_t size;
404 {
405 	vm_offset_t i;
406 	int s;
407 
408 	s = splbio();
409 	for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
410 		int valid;
411 		int *addr = swap_pager_diskaddr(swp, i, &valid);
412 		if (addr && *addr != SWB_EMPTY) {
413 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
414 			if( valid) {
415 				vm_swap_size += btodb(PAGE_SIZE);
416 				swap_pager_setvalid(swp, i, 0);
417 			}
418 			*addr = SWB_EMPTY;
419 		}
420 	}
421 	swapsizecheck();
422 	splx(s);
423 }
424 
425 void
426 swap_pager_freespace(pager, start, size)
427 	vm_pager_t pager;
428 	vm_offset_t start;
429 	vm_offset_t size;
430 {
431 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
432 }
433 
434 /*
435  * swap_pager_reclaim frees up over-allocated space from all pagers
436  * this eliminates internal fragmentation due to allocation of space
437  * for segments that are never swapped to. It has been written so that
438  * it does not block until the rlist_free operation occurs; it keeps
439  * the queues consistant.
440  */
441 
442 /*
443  * Maximum number of blocks (pages) to reclaim per pass
444  */
445 #define MAXRECLAIM 256
446 
447 void
448 swap_pager_reclaim()
449 {
450 	vm_pager_t p;
451 	sw_pager_t swp;
452 	int i, j, k;
453 	int s;
454 	int reclaimcount;
455 	static int reclaims[MAXRECLAIM];
456 	static int in_reclaim;
457 
458 /*
459  * allow only one process to be in the swap_pager_reclaim subroutine
460  */
461 	s = splbio();
462 	if (in_reclaim) {
463 		tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
464 		splx(s);
465 		return;
466 	}
467 	in_reclaim = 1;
468 	reclaimcount = 0;
469 
470 	/* for each pager queue */
471 	for (k = 0; swp_qs[k]; k++) {
472 
473 		p = swp_qs[k]->tqh_first;
474 		while (p && (reclaimcount < MAXRECLAIM)) {
475 
476 			/*
477 			 * see if any blocks associated with a pager has been
478 			 * allocated but not used (written)
479 			 */
480 			swp = (sw_pager_t) p->pg_data;
481 			for (i = 0; i < swp->sw_nblocks; i++) {
482 				sw_blk_t swb = &swp->sw_blocks[i];
483 				if( swb->swb_locked)
484 					continue;
485 				for (j = 0; j < SWB_NPAGES; j++) {
486 					if (swb->swb_block[j] != SWB_EMPTY &&
487 						(swb->swb_valid & (1 << j)) == 0) {
488 						reclaims[reclaimcount++] = swb->swb_block[j];
489 						swb->swb_block[j] = SWB_EMPTY;
490 						if (reclaimcount >= MAXRECLAIM)
491 							goto rfinished;
492 					}
493 				}
494 			}
495 			p = p->pg_list.tqe_next;
496 		}
497 	}
498 
499 rfinished:
500 
501 /*
502  * free the blocks that have been added to the reclaim list
503  */
504 	for (i = 0; i < reclaimcount; i++) {
505 		swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
506 		swapsizecheck();
507 		wakeup((caddr_t) &in_reclaim);
508 	}
509 
510 	splx(s);
511 	in_reclaim = 0;
512 	wakeup((caddr_t) &in_reclaim);
513 }
514 
515 
516 /*
517  * swap_pager_copy copies blocks from one pager to another and
518  * destroys the source pager
519  */
520 
521 void
522 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
523 	vm_pager_t srcpager;
524 	vm_offset_t srcoffset;
525 	vm_pager_t dstpager;
526 	vm_offset_t dstoffset;
527 	vm_offset_t offset;
528 {
529 	sw_pager_t srcswp, dstswp;
530 	vm_offset_t i;
531 	int s;
532 
533 	srcswp = (sw_pager_t) srcpager->pg_data;
534 	dstswp = (sw_pager_t) dstpager->pg_data;
535 
536 /*
537  * remove the source pager from the swap_pager internal queue
538  */
539 	s = splbio();
540 	if (srcswp->sw_flags & SW_NAMED) {
541 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
542 		srcswp->sw_flags &= ~SW_NAMED;
543 	} else {
544 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
545 	}
546 
547 	while (srcswp->sw_poip) {
548 		tsleep((caddr_t)srcswp, PVM, "spgout", 0);
549 	}
550 	splx(s);
551 
552 /*
553  * clean all of the pages that are currently active and finished
554  */
555 	(void) swap_pager_clean();
556 
557 	s = splbio();
558 /*
559  * clear source block before destination object
560  * (release allocated space)
561  */
562 	for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
563 		int valid;
564 		int *addr = swap_pager_diskaddr(srcswp, i, &valid);
565 		if (addr && *addr != SWB_EMPTY) {
566 			swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
567 			if( valid)
568 				vm_swap_size += btodb(PAGE_SIZE);
569 			swapsizecheck();
570 			*addr = SWB_EMPTY;
571 		}
572 	}
573 /*
574  * transfer source to destination
575  */
576 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
577 		int srcvalid, dstvalid;
578 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
579 			&srcvalid);
580 		int *dstaddrp;
581 	/*
582 	 * see if the source has space allocated
583 	 */
584 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
585 		/*
586 		 * if the source is valid and the dest has no space, then
587 		 * copy the allocation from the srouce to the dest.
588 		 */
589 			if (srcvalid) {
590 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
591 				/*
592 				 * if the dest already has a valid block, deallocate the
593 				 * source block without copying.
594 				 */
595 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
596 					swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
597 					*dstaddrp = SWB_EMPTY;
598 				}
599 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
600 					*dstaddrp = *srcaddrp;
601 					*srcaddrp = SWB_EMPTY;
602 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
603 					vm_swap_size -= btodb(PAGE_SIZE);
604 				}
605 			}
606 		/*
607 		 * if the source is not empty at this point, then deallocate the space.
608 		 */
609 			if (*srcaddrp != SWB_EMPTY) {
610 				swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
611 				if( srcvalid)
612 					vm_swap_size += btodb(PAGE_SIZE);
613 				*srcaddrp = SWB_EMPTY;
614 			}
615 		}
616 	}
617 
618 /*
619  * deallocate the rest of the source object
620  */
621 	for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
622 		int valid;
623 		int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
624 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
625 			swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
626 			if( valid)
627 				vm_swap_size += btodb(PAGE_SIZE);
628 			*srcaddrp = SWB_EMPTY;
629 		}
630 	}
631 
632 	swapsizecheck();
633 	splx(s);
634 
635 	free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
636 	srcswp->sw_blocks = 0;
637 	free((caddr_t)srcswp, M_VMPGDATA);
638 	srcpager->pg_data = 0;
639 	free((caddr_t)srcpager, M_VMPAGER);
640 
641 	return;
642 }
643 
644 
645 void
646 swap_pager_dealloc(pager)
647 	vm_pager_t pager;
648 {
649 	register int i,j;
650 	register sw_blk_t bp;
651 	register sw_pager_t swp;
652 	int s;
653 
654 	/*
655 	 * Remove from list right away so lookups will fail if we
656 	 * block for pageout completion.
657 	 */
658 	s = splbio();
659 	swp = (sw_pager_t) pager->pg_data;
660 	if (swp->sw_flags & SW_NAMED) {
661 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
662 		swp->sw_flags &= ~SW_NAMED;
663 	} else {
664 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
665 	}
666 	/*
667 	 * Wait for all pageouts to finish and remove
668 	 * all entries from cleaning list.
669 	 */
670 
671 	while (swp->sw_poip) {
672 		tsleep((caddr_t)swp, PVM, "swpout", 0);
673 	}
674 	splx(s);
675 
676 
677 	(void) swap_pager_clean();
678 
679 	/*
680 	 * Free left over swap blocks
681 	 */
682 	s = splbio();
683 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
684 		for (j = 0; j < SWB_NPAGES; j++)
685 		if (bp->swb_block[j] != SWB_EMPTY) {
686 			swap_pager_freeswapspace((unsigned)bp->swb_block[j],
687 				(unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
688 			if( bp->swb_valid & (1<<j))
689 				vm_swap_size += btodb(PAGE_SIZE);
690 			bp->swb_block[j] = SWB_EMPTY;
691 		}
692 	}
693 	splx(s);
694 	swapsizecheck();
695 
696 	/*
697 	 * Free swap management resources
698 	 */
699 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
700 	swp->sw_blocks = 0;
701 	free((caddr_t)swp, M_VMPGDATA);
702 	pager->pg_data = 0;
703 	free((caddr_t)pager, M_VMPAGER);
704 }
705 
706 /*
707  * swap_pager_getmulti can get multiple pages.
708  */
709 int
710 swap_pager_getmulti(pager, m, count, reqpage, sync)
711 	vm_pager_t pager;
712 	vm_page_t *m;
713 	int count;
714 	int reqpage;
715 	boolean_t sync;
716 {
717 	if( reqpage >= count)
718 		panic("swap_pager_getmulti: reqpage >= count\n");
719 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
720 }
721 
722 /*
723  * swap_pager_getpage gets individual pages
724  */
725 int
726 swap_pager_getpage(pager, m, sync)
727 	vm_pager_t pager;
728 	vm_page_t m;
729 	boolean_t sync;
730 {
731 	vm_page_t marray[1];
732 
733 	marray[0] = m;
734 	return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
735 }
736 
737 int
738 swap_pager_putmulti(pager, m, c, sync, rtvals)
739 	vm_pager_t pager;
740 	vm_page_t *m;
741 	int c;
742 	boolean_t sync;
743 	int *rtvals;
744 {
745 	int flags;
746 
747 	if (pager == NULL) {
748 		(void) swap_pager_clean();
749 		return VM_PAGER_OK;
750 	}
751 
752 	flags = B_WRITE;
753 	if (!sync)
754 		flags |= B_ASYNC;
755 
756 	return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
757 }
758 
759 /*
760  * swap_pager_putpage writes individual pages
761  */
762 int
763 swap_pager_putpage(pager, m, sync)
764 	vm_pager_t pager;
765 	vm_page_t m;
766 	boolean_t sync;
767 {
768 	int flags;
769 	vm_page_t marray[1];
770 	int rtvals[1];
771 
772 
773 	if (pager == NULL) {
774 		(void) swap_pager_clean();
775 		return VM_PAGER_OK;
776 	}
777 
778 	marray[0] = m;
779 	flags = B_WRITE;
780 	if (!sync)
781 		flags |= B_ASYNC;
782 
783 	swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
784 
785 	return rtvals[0];
786 }
787 
788 static inline int
789 const swap_pager_block_index(swp, offset)
790 	sw_pager_t swp;
791 	vm_offset_t offset;
792 {
793 	return (offset / (SWB_NPAGES*PAGE_SIZE));
794 }
795 
796 static inline int
797 const swap_pager_block_offset(swp, offset)
798 	sw_pager_t swp;
799 	vm_offset_t offset;
800 {
801 	return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
802 }
803 
804 /*
805  * _swap_pager_haspage returns TRUE if the pager has data that has
806  * been written out.
807  */
808 static boolean_t
809 _swap_pager_haspage(swp, offset)
810 	sw_pager_t swp;
811 	vm_offset_t offset;
812 {
813 	register sw_blk_t swb;
814 	int ix;
815 
816 	ix = offset / (SWB_NPAGES*PAGE_SIZE);
817 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
818 		return(FALSE);
819 	}
820 	swb = &swp->sw_blocks[ix];
821 	ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
822 	if (swb->swb_block[ix] != SWB_EMPTY) {
823 		if (swb->swb_valid & (1 << ix))
824 			return TRUE;
825 	}
826 
827 	return(FALSE);
828 }
829 
830 /*
831  * swap_pager_haspage is the externally accessible version of
832  * _swap_pager_haspage above.  this routine takes a vm_pager_t
833  * for an argument instead of sw_pager_t.
834  */
835 boolean_t
836 swap_pager_haspage(pager, offset)
837 	vm_pager_t pager;
838 	vm_offset_t offset;
839 {
840 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
841 }
842 
843 /*
844  * swap_pager_freepage is a convienience routine that clears the busy
845  * bit and deallocates a page.
846  */
847 static void
848 swap_pager_freepage(m)
849 	vm_page_t m;
850 {
851 	PAGE_WAKEUP(m);
852 	vm_page_free(m);
853 }
854 
855 /*
856  * swap_pager_ridpages is a convienience routine that deallocates all
857  * but the required page.  this is usually used in error returns that
858  * need to invalidate the "extra" readahead pages.
859  */
860 static void
861 swap_pager_ridpages(m, count, reqpage)
862 	vm_page_t *m;
863 	int count;
864 	int reqpage;
865 {
866 	int i;
867 	for (i = 0; i < count; i++)
868 		if (i != reqpage)
869 			swap_pager_freepage(m[i]);
870 }
871 
872 int swapwritecount=0;
873 
874 /*
875  * swap_pager_iodone1 is the completion routine for both reads and async writes
876  */
877 void
878 swap_pager_iodone1(bp)
879 	struct buf *bp;
880 {
881 	bp->b_flags |= B_DONE;
882 	bp->b_flags &= ~B_ASYNC;
883 	wakeup((caddr_t)bp);
884 /*
885 	if ((bp->b_flags & B_READ) == 0)
886 		vwakeup(bp);
887 */
888 }
889 
890 
891 int
892 swap_pager_input(swp, m, count, reqpage)
893 	register sw_pager_t swp;
894 	vm_page_t *m;
895 	int count, reqpage;
896 {
897 	register struct buf *bp;
898 	sw_blk_t swb[count];
899 	register int s;
900 	int i;
901 	boolean_t rv;
902 	vm_offset_t kva, off[count];
903 	swp_clean_t spc;
904 	vm_offset_t paging_offset;
905 	vm_object_t object;
906 	int reqaddr[count];
907 
908 	int first, last;
909 	int failed;
910 	int reqdskregion;
911 
912 	object = m[reqpage]->object;
913 	paging_offset = object->paging_offset;
914 	/*
915 	 * First determine if the page exists in the pager if this is
916 	 * a sync read.  This quickly handles cases where we are
917 	 * following shadow chains looking for the top level object
918 	 * with the page.
919 	 */
920 	if (swp->sw_blocks == NULL) {
921 		swap_pager_ridpages(m, count, reqpage);
922 		return(VM_PAGER_FAIL);
923 	}
924 
925 	for(i = 0; i < count; i++) {
926 		vm_offset_t foff = m[i]->offset + paging_offset;
927 		int ix = swap_pager_block_index(swp, foff);
928 		if (ix >= swp->sw_nblocks) {
929 			int j;
930 			if( i <= reqpage) {
931 				swap_pager_ridpages(m, count, reqpage);
932 				return(VM_PAGER_FAIL);
933 			}
934 			for(j = i; j < count; j++) {
935 				swap_pager_freepage(m[j]);
936 			}
937 			count = i;
938 			break;
939 		}
940 
941 		swb[i] = &swp->sw_blocks[ix];
942 		off[i] = swap_pager_block_offset(swp, foff);
943 		reqaddr[i] = swb[i]->swb_block[off[i]];
944 	}
945 
946 	/* make sure that our required input request is existant */
947 
948 	if (reqaddr[reqpage] == SWB_EMPTY ||
949 		(swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
950 		swap_pager_ridpages(m, count, reqpage);
951 		return(VM_PAGER_FAIL);
952 	}
953 
954 
955 	reqdskregion = reqaddr[reqpage] / dmmax;
956 
957 	/*
958 	 * search backwards for the first contiguous page to transfer
959 	 */
960 	failed = 0;
961 	first = 0;
962 	for (i = reqpage - 1; i >= 0; --i) {
963 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
964 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
965 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
966 			((reqaddr[i] / dmmax) != reqdskregion)) {
967 				failed = 1;
968 				swap_pager_freepage(m[i]);
969 				if (first == 0)
970 					first = i + 1;
971 		}
972 	}
973 	/*
974 	 * search forwards for the last contiguous page to transfer
975 	 */
976 	failed = 0;
977 	last = count;
978 	for (i = reqpage + 1; i < count; i++) {
979 		if ( failed || (reqaddr[i] == SWB_EMPTY) ||
980 			(swb[i]->swb_valid & (1 << off[i])) == 0 ||
981 			(reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
982 			((reqaddr[i] / dmmax) != reqdskregion)) {
983 				failed = 1;
984 				swap_pager_freepage(m[i]);
985 				if (last == count)
986 					last = i;
987 		}
988 	}
989 
990 	count = last;
991 	if (first != 0) {
992 		for (i = first; i < count; i++) {
993 			m[i-first] = m[i];
994 			reqaddr[i-first] = reqaddr[i];
995 			off[i-first] = off[i];
996 		}
997 		count -= first;
998 		reqpage -= first;
999 	}
1000 
1001 	++swb[reqpage]->swb_locked;
1002 
1003 	/*
1004 	 * at this point:
1005 	 * "m" is a pointer to the array of vm_page_t for paging I/O
1006 	 * "count" is the number of vm_page_t entries represented by "m"
1007 	 * "object" is the vm_object_t for I/O
1008 	 * "reqpage" is the index into "m" for the page actually faulted
1009 	 */
1010 
1011 	spc = NULL;	/* we might not use an spc data structure */
1012 
1013 	if (count == 1) {
1014 		/*
1015 		 * if a kva has not been allocated, we can only do a one page transfer,
1016 		 * so we free the other pages that might have been allocated by
1017 		 * vm_fault.
1018 		 */
1019 		swap_pager_ridpages(m, count, reqpage);
1020 		m[0] = m[reqpage];
1021 		reqaddr[0] = reqaddr[reqpage];
1022 
1023 		count = 1;
1024 		reqpage = 0;
1025 	/*
1026 	 * get a swap pager clean data structure, block until we get it
1027 	 */
1028 		if (swap_pager_free.tqh_first == NULL) {
1029 			s = splbio();
1030 			if( curproc == pageproc)
1031 				(void) swap_pager_clean();
1032 			else
1033 				wakeup((caddr_t) &vm_pages_needed);
1034 			while (swap_pager_free.tqh_first == NULL) {
1035 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1036 				tsleep((caddr_t)&swap_pager_free,
1037 					PVM, "swpfre", 0);
1038 				if( curproc == pageproc)
1039 					(void) swap_pager_clean();
1040 				else
1041 					wakeup((caddr_t) &vm_pages_needed);
1042 			}
1043 			splx(s);
1044 		}
1045 		spc = swap_pager_free.tqh_first;
1046 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1047 		kva = spc->spc_kva;
1048 		bp = spc->spc_bp;
1049 		bzero(bp, sizeof *bp);
1050 		bp->b_spc = spc;
1051 	} else {
1052 	/*
1053 	 * Get a swap buffer header to perform the IO
1054 	 */
1055 		bp = getpbuf();
1056 		kva = (vm_offset_t) bp->b_data;
1057 	}
1058 
1059 	/*
1060 	 * map our page(s) into kva for input
1061 	 */
1062 	pmap_qenter( kva, m, count);
1063 
1064 	s = splbio();
1065 	bp->b_flags = B_BUSY | B_READ | B_CALL;
1066 	bp->b_iodone = swap_pager_iodone1;
1067 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1068 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1069 	crhold(bp->b_rcred);
1070 	crhold(bp->b_wcred);
1071 	bp->b_un.b_addr = (caddr_t) kva;
1072 	bp->b_blkno = reqaddr[0];
1073 	bp->b_bcount = PAGE_SIZE*count;
1074 	bp->b_bufsize = PAGE_SIZE*count;
1075 
1076 	bgetvp( swapdev_vp, bp);
1077 
1078 	swp->sw_piip++;
1079 
1080 	/*
1081 	 * perform the I/O
1082 	 */
1083 	VOP_STRATEGY(bp);
1084 
1085 	/*
1086 	 * wait for the sync I/O to complete
1087 	 */
1088 	while ((bp->b_flags & B_DONE) == 0) {
1089 		tsleep((caddr_t)bp, PVM, "swread", 0);
1090 	}
1091 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
1092 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1093 
1094 	--swp->sw_piip;
1095 	if (swp->sw_piip == 0)
1096 		wakeup((caddr_t) swp);
1097 
1098 	/*
1099 	 * relpbuf does this, but we maintain our own buffer
1100 	 * list also...
1101 	 */
1102 	if (bp->b_vp)
1103 		brelvp(bp);
1104 
1105 	splx(s);
1106 	--swb[reqpage]->swb_locked;
1107 
1108 	/*
1109 	 * remove the mapping for kernel virtual
1110 	 */
1111 	pmap_qremove( kva, count);
1112 
1113 	if (spc) {
1114 		/*
1115 		 * if we have used an spc, we need to free it.
1116 		 */
1117 		if( bp->b_rcred != NOCRED)
1118 			crfree(bp->b_rcred);
1119 		if( bp->b_wcred != NOCRED)
1120 			crfree(bp->b_wcred);
1121 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1122 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1123 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1124 			wakeup((caddr_t)&swap_pager_free);
1125 		}
1126 	} else {
1127 		/*
1128 		 * release the physical I/O buffer
1129 		 */
1130 		relpbuf(bp);
1131 		/*
1132 		 * finish up input if everything is ok
1133 		 */
1134 		if( rv == VM_PAGER_OK) {
1135 			for (i = 0; i < count; i++) {
1136 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1137 				m[i]->flags |= PG_CLEAN;
1138 				m[i]->flags &= ~PG_LAUNDRY;
1139 				if (i != reqpage) {
1140 					/*
1141 					 * whether or not to leave the page activated
1142 					 * is up in the air, but we should put the page
1143 					 * on a page queue somewhere. (it already is in
1144 					 * the object).
1145 					 * After some emperical results, it is best
1146 					 * to deactivate the readahead pages.
1147 					 */
1148 					vm_page_deactivate(m[i]);
1149 
1150 					/*
1151 					 * just in case someone was asking for this
1152 					 * page we now tell them that it is ok to use
1153 					 */
1154 					m[i]->flags &= ~PG_FAKE;
1155 					PAGE_WAKEUP(m[i]);
1156 				}
1157 			}
1158 			if( swap_pager_full) {
1159 				_swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
1160 			}
1161 		} else {
1162 			swap_pager_ridpages(m, count, reqpage);
1163 		}
1164 	}
1165 	return(rv);
1166 }
1167 
1168 int
1169 swap_pager_output(swp, m, count, flags, rtvals)
1170 	register sw_pager_t swp;
1171 	vm_page_t *m;
1172 	int count;
1173 	int flags;
1174 	int *rtvals;
1175 {
1176 	register struct buf *bp;
1177 	sw_blk_t swb[count];
1178 	register int s;
1179 	int i, j, ix;
1180 	boolean_t rv;
1181 	vm_offset_t kva, off, foff;
1182 	swp_clean_t spc;
1183 	vm_offset_t paging_offset;
1184 	vm_object_t object;
1185 	int reqaddr[count];
1186 	int failed;
1187 
1188 /*
1189 	if( count > 1)
1190 		printf("off: 0x%x, count: %d\n", m[0]->offset, count);
1191 */
1192 	spc = NULL;
1193 
1194 	object = m[0]->object;
1195 	paging_offset = object->paging_offset;
1196 
1197 	failed = 0;
1198 	for(j=0;j<count;j++) {
1199 		foff = m[j]->offset + paging_offset;
1200 		ix = swap_pager_block_index(swp, foff);
1201 		swb[j] = 0;
1202 		if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
1203 			rtvals[j] = VM_PAGER_FAIL;
1204 			failed = 1;
1205 			continue;
1206 		} else {
1207 			rtvals[j] = VM_PAGER_OK;
1208 		}
1209 		swb[j] = &swp->sw_blocks[ix];
1210 		++swb[j]->swb_locked;
1211 		if( failed) {
1212 			rtvals[j] = VM_PAGER_FAIL;
1213 			continue;
1214 		}
1215 		off = swap_pager_block_offset(swp, foff);
1216 		reqaddr[j] = swb[j]->swb_block[off];
1217 		if( reqaddr[j] == SWB_EMPTY) {
1218 			int blk;
1219 			int tries;
1220 			int ntoget;
1221 			tries = 0;
1222 			s = splbio();
1223 
1224 			/*
1225 			 * if any other pages have been allocated in this block, we
1226 			 * only try to get one page.
1227 			 */
1228 			for (i = 0; i < SWB_NPAGES; i++) {
1229 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1230 					break;
1231 			}
1232 
1233 
1234 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1235 			/*
1236 			 * this code is alittle conservative, but works
1237 			 * (the intent of this code is to allocate small chunks
1238 			 *  for small objects)
1239 			 */
1240 			if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
1241 				ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
1242 			}
1243 
1244 retrygetspace:
1245 			if (!swap_pager_full && ntoget > 1 &&
1246 				swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
1247 
1248 				for (i = 0; i < ntoget; i++) {
1249 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1250 					swb[j]->swb_valid = 0;
1251 				}
1252 
1253 				reqaddr[j] = swb[j]->swb_block[off];
1254 			} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
1255 				&swb[j]->swb_block[off])) {
1256 				/*
1257 				 * if the allocation has failed, we try to reclaim space and
1258 				 * retry.
1259 				 */
1260 				if (++tries == 1) {
1261 					swap_pager_reclaim();
1262 					goto retrygetspace;
1263 				}
1264 				rtvals[j] = VM_PAGER_AGAIN;
1265 				failed = 1;
1266 			} else {
1267 				reqaddr[j] = swb[j]->swb_block[off];
1268 				swb[j]->swb_valid &= ~(1<<off);
1269 			}
1270 			splx(s);
1271 		}
1272 	}
1273 
1274 	/*
1275 	 * search forwards for the last contiguous page to transfer
1276 	 */
1277 	failed = 0;
1278 	for (i = 0; i < count; i++) {
1279 		if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
1280 			(reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1281 			(rtvals[i] != VM_PAGER_OK)) {
1282 			failed = 1;
1283 			if( rtvals[i] == VM_PAGER_OK)
1284 				rtvals[i] = VM_PAGER_AGAIN;
1285 		}
1286 	}
1287 
1288 	for(i = 0; i < count; i++) {
1289 		if( rtvals[i] != VM_PAGER_OK) {
1290 			if( swb[i])
1291 				--swb[i]->swb_locked;
1292 		}
1293 	}
1294 
1295 	for(i = 0; i < count; i++)
1296 		if( rtvals[i] != VM_PAGER_OK)
1297 			break;
1298 
1299 	if( i == 0) {
1300 		return VM_PAGER_AGAIN;
1301 	}
1302 
1303 	count = i;
1304 	for(i=0;i<count;i++) {
1305 		if( reqaddr[i] == SWB_EMPTY)
1306 			printf("I/O to empty block????\n");
1307 	}
1308 
1309 	/*
1310 	 */
1311 
1312 	/*
1313 	 * For synchronous writes, we clean up
1314 	 * all completed async pageouts.
1315 	 */
1316 	if ((flags & B_ASYNC) == 0) {
1317 		swap_pager_clean();
1318 	}
1319 
1320 	kva = 0;
1321 
1322 	/*
1323 	 * we allocate a new kva for transfers > 1 page
1324 	 * but for transfers == 1 page, the swap_pager_free list contains
1325 	 * entries that have pre-allocated kva's (for efficiency).
1326 	 * NOTE -- we do not use the physical buffer pool or the
1327 	 * preallocated associated kva's because of the potential for
1328 	 * deadlock.  This is very subtile -- but deadlocks or resource
1329 	 * contention must be avoided on pageouts -- or your system will
1330 	 * sleep (forever) !!!
1331 	 */
1332 /*
1333 	if ( count > 1) {
1334 		kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
1335 		if( !kva) {
1336 			for (i = 0; i < count; i++) {
1337 				if( swb[i])
1338 					--swb[i]->swb_locked;
1339 				rtvals[i] = VM_PAGER_AGAIN;
1340 			}
1341 			return VM_PAGER_AGAIN;
1342 		}
1343 	}
1344 */
1345 
1346 	/*
1347 	 * get a swap pager clean data structure, block until we get it
1348 	 */
1349 	if (swap_pager_free.tqh_first == NULL) {
1350 		s = splbio();
1351 		if( curproc == pageproc)
1352 			(void) swap_pager_clean();
1353 		else
1354 			wakeup((caddr_t) &vm_pages_needed);
1355 		while (swap_pager_free.tqh_first == NULL) {
1356 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1357 			tsleep((caddr_t)&swap_pager_free,
1358 				PVM, "swpfre", 0);
1359 			if( curproc == pageproc)
1360 				(void) swap_pager_clean();
1361 			else
1362 				wakeup((caddr_t) &vm_pages_needed);
1363 		}
1364 		splx(s);
1365 	}
1366 
1367 	spc = swap_pager_free.tqh_first;
1368 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1369 
1370 	kva = spc->spc_kva;
1371 
1372 	/*
1373 	 * map our page(s) into kva for I/O
1374 	 */
1375 	pmap_qenter(kva, m, count);
1376 
1377 	/*
1378 	 * get the base I/O offset into the swap file
1379 	 */
1380 	for(i=0;i<count;i++) {
1381 		foff = m[i]->offset + paging_offset;
1382 		off = swap_pager_block_offset(swp, foff);
1383 		/*
1384 		 * if we are setting the valid bit anew,
1385 		 * then diminish the swap free space
1386 		 */
1387 		if( (swb[i]->swb_valid & (1 << off)) == 0)
1388 			vm_swap_size -= btodb(PAGE_SIZE);
1389 
1390 		/*
1391 		 * set the valid bit
1392 		 */
1393 		swb[i]->swb_valid |= (1 << off);
1394 		/*
1395 		 * and unlock the data structure
1396 		 */
1397 		--swb[i]->swb_locked;
1398 	}
1399 
1400 	s = splbio();
1401 	/*
1402 	 * Get a swap buffer header and perform the IO
1403 	 */
1404 	bp = spc->spc_bp;
1405 	bzero(bp, sizeof *bp);
1406 	bp->b_spc = spc;
1407 
1408 	bp->b_flags = B_BUSY;
1409 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1410 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1411 	if( bp->b_rcred != NOCRED)
1412 		crhold(bp->b_rcred);
1413 	if( bp->b_wcred != NOCRED)
1414 		crhold(bp->b_wcred);
1415 	bp->b_data = (caddr_t) kva;
1416 	bp->b_blkno = reqaddr[0];
1417 	bgetvp( swapdev_vp, bp);
1418 
1419 	bp->b_bcount = PAGE_SIZE*count;
1420 	bp->b_bufsize = PAGE_SIZE*count;
1421 	swapdev_vp->v_numoutput++;
1422 
1423 	/*
1424 	 * If this is an async write we set up additional buffer fields
1425 	 * and place a "cleaning" entry on the inuse queue.
1426 	 */
1427 	if ( flags & B_ASYNC ) {
1428 		spc->spc_flags = 0;
1429 		spc->spc_swp = swp;
1430 		for(i=0;i<count;i++)
1431 			spc->spc_m[i] = m[i];
1432 		spc->spc_count = count;
1433 		/*
1434 		 * the completion routine for async writes
1435 		 */
1436 		bp->b_flags |= B_CALL;
1437 		bp->b_iodone = swap_pager_iodone;
1438 		bp->b_dirtyoff = 0;
1439 		bp->b_dirtyend = bp->b_bcount;
1440 		swp->sw_poip++;
1441 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1442 	} else {
1443 		swp->sw_poip++;
1444 		bp->b_flags |= B_CALL;
1445 		bp->b_iodone = swap_pager_iodone1;
1446 	}
1447 	/*
1448 	 * perform the I/O
1449 	 */
1450 	VOP_STRATEGY(bp);
1451 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
1452 		if ((bp->b_flags & B_DONE) == B_DONE) {
1453 			swap_pager_clean();
1454 		}
1455 		splx(s);
1456 		for(i=0;i<count;i++) {
1457 			rtvals[i] = VM_PAGER_PEND;
1458 		}
1459 		return VM_PAGER_PEND;
1460 	}
1461 
1462 	/*
1463 	 * wait for the sync I/O to complete
1464 	 */
1465 	while ((bp->b_flags & B_DONE) == 0) {
1466 		tsleep((caddr_t)bp, PVM, "swwrt", 0);
1467 	}
1468 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
1469 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
1470 
1471 	--swp->sw_poip;
1472 	if (swp->sw_poip == 0)
1473 		wakeup((caddr_t) swp);
1474 
1475 	if (bp->b_vp)
1476 		brelvp(bp);
1477 
1478 	splx(s);
1479 
1480 	/*
1481 	 * remove the mapping for kernel virtual
1482 	 */
1483 	pmap_qremove( kva, count);
1484 
1485 	/*
1486 	 * if we have written the page, then indicate that the page
1487 	 * is clean.
1488 	 */
1489 	if (rv == VM_PAGER_OK) {
1490 		for(i=0;i<count;i++) {
1491 			if( rtvals[i] == VM_PAGER_OK) {
1492 				m[i]->flags |= PG_CLEAN;
1493 				m[i]->flags &= ~PG_LAUNDRY;
1494 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1495 				/*
1496 				 * optimization, if a page has been read during the
1497 				 * pageout process, we activate it.
1498 				 */
1499 				if ( (m[i]->flags & PG_ACTIVE) == 0 &&
1500 					pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
1501 					vm_page_activate(m[i]);
1502 			}
1503 		}
1504 	} else {
1505 		for(i=0;i<count;i++) {
1506 			rtvals[i] = rv;
1507 			m[i]->flags |= PG_LAUNDRY;
1508 		}
1509 	}
1510 
1511 	if( bp->b_rcred != NOCRED)
1512 		crfree(bp->b_rcred);
1513 	if( bp->b_wcred != NOCRED)
1514 		crfree(bp->b_wcred);
1515 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1516 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1517 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1518 		wakeup((caddr_t)&swap_pager_free);
1519 	}
1520 
1521 	return(rv);
1522 }
1523 
1524 boolean_t
1525 swap_pager_clean()
1526 {
1527 	register swp_clean_t spc, tspc;
1528 	register int s;
1529 
1530 	tspc = NULL;
1531 	if (swap_pager_done.tqh_first == NULL)
1532 		return FALSE;
1533 	for (;;) {
1534 		s = splbio();
1535 		/*
1536 		 * Look up and removal from done list must be done
1537 		 * at splbio() to avoid conflicts with swap_pager_iodone.
1538 		 */
1539 		while (spc = swap_pager_done.tqh_first) {
1540 			pmap_qremove( spc->spc_kva, spc->spc_count);
1541 			swap_pager_finish(spc);
1542 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1543 			goto doclean;
1544 		}
1545 
1546 		/*
1547 		 * No operations done, thats all we can do for now.
1548 		 */
1549 
1550 		splx(s);
1551 		break;
1552 
1553 		/*
1554 		 * The desired page was found to be busy earlier in
1555 		 * the scan but has since completed.
1556 		 */
1557 doclean:
1558 		if (tspc && tspc == spc) {
1559 			tspc = NULL;
1560 		}
1561 		spc->spc_flags = 0;
1562 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1563 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1564 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1565 			wakeup((caddr_t)&swap_pager_free);
1566 		}
1567 		++cleandone;
1568 		splx(s);
1569 	}
1570 
1571 	return(tspc ? TRUE : FALSE);
1572 }
1573 
1574 void
1575 swap_pager_finish(spc)
1576 	register swp_clean_t spc;
1577 {
1578 	vm_object_t object = spc->spc_m[0]->object;
1579 	int i;
1580 
1581 	if ((object->paging_in_progress -= spc->spc_count) == 0)
1582 		thread_wakeup((int) object);
1583 
1584 	/*
1585 	 * If no error mark as clean and inform the pmap system.
1586 	 * If error, mark as dirty so we will try again.
1587 	 * (XXX could get stuck doing this, should give up after awhile)
1588 	 */
1589 	if (spc->spc_flags & SPC_ERROR) {
1590 		for(i=0;i<spc->spc_count;i++) {
1591 			printf("swap_pager_finish: clean of page %x failed\n",
1592 			       VM_PAGE_TO_PHYS(spc->spc_m[i]));
1593 			spc->spc_m[i]->flags |= PG_LAUNDRY;
1594 		}
1595 	} else {
1596 		for(i=0;i<spc->spc_count;i++) {
1597 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1598 			spc->spc_m[i]->flags |= PG_CLEAN;
1599 		}
1600 	}
1601 
1602 
1603 	for(i=0;i<spc->spc_count;i++) {
1604 		/*
1605 		 * we wakeup any processes that are waiting on
1606 		 * these pages.
1607 		 */
1608 		PAGE_WAKEUP(spc->spc_m[i]);
1609 	}
1610 	nswiodone -= spc->spc_count;
1611 
1612 	return;
1613 }
1614 
1615 /*
1616  * swap_pager_iodone
1617  */
1618 void
1619 swap_pager_iodone(bp)
1620 	register struct buf *bp;
1621 {
1622 	register swp_clean_t spc;
1623 	int s;
1624 
1625 	s = splbio();
1626 	spc = (swp_clean_t) bp->b_spc;
1627 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1628 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1629 	if (bp->b_flags & B_ERROR) {
1630 		spc->spc_flags |= SPC_ERROR;
1631 		printf("error %d blkno %d sz %d ",
1632 			bp->b_error, bp->b_blkno, bp->b_bcount);
1633 	}
1634 
1635 /*
1636 	if ((bp->b_flags & B_READ) == 0)
1637 		vwakeup(bp);
1638 */
1639 
1640 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
1641 	if (bp->b_vp) {
1642 		brelvp(bp);
1643 	}
1644 	if( bp->b_rcred != NOCRED)
1645 		crfree(bp->b_rcred);
1646 	if( bp->b_wcred != NOCRED)
1647 		crfree(bp->b_wcred);
1648 
1649 	nswiodone += spc->spc_count;
1650 	if (--spc->spc_swp->sw_poip == 0) {
1651 		wakeup((caddr_t)spc->spc_swp);
1652 	}
1653 
1654 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1655 	    swap_pager_inuse.tqh_first == 0) {
1656 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1657 		wakeup((caddr_t)&swap_pager_free);
1658 		wakeup((caddr_t)&vm_pages_needed);
1659 	}
1660 
1661 	if (vm_pageout_pages_needed) {
1662 		wakeup((caddr_t)&vm_pageout_pages_needed);
1663 	}
1664 
1665 	if ((swap_pager_inuse.tqh_first == NULL) ||
1666 	    (cnt.v_free_count < cnt.v_free_min &&
1667 	    nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
1668 		wakeup((caddr_t)&vm_pages_needed);
1669 	}
1670 	splx(s);
1671 }
1672 
1673 /*
1674  * return true if any swap control structures can be allocated
1675  */
1676 int
1677 swap_pager_ready() {
1678 	if( swap_pager_free.tqh_first)
1679 		return 1;
1680 	else
1681 		return 0;
1682 }
1683