xref: /freebsd/sys/vm/swap_pager.c (revision 5ebc7e6281887681c3a348a5a4c902e262ccd656)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.39 1995/05/14 03:00:08 davidg Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/proc.h>
56 #include <sys/buf.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 #include <sys/rlist.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_pager.h>
65 #include <vm/vm_page.h>
66 #include <vm/vm_pageout.h>
67 #include <vm/swap_pager.h>
68 #include <vm/vm_kern.h>
69 
70 #ifndef NPENDINGIO
71 #define NPENDINGIO	10
72 #endif
73 
74 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
75 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
76 
77 int nswiodone;
78 int swap_pager_full;
79 extern int vm_swap_size;
80 int no_swap_space = 1;
81 struct rlist *swaplist;
82 int nswaplist;
83 
84 #define MAX_PAGEOUT_CLUSTER 8
85 
86 TAILQ_HEAD(swpclean, swpagerclean);
87 
88 typedef struct swpagerclean *swp_clean_t;
89 
90 struct swpagerclean {
91 	TAILQ_ENTRY(swpagerclean) spc_list;
92 	int spc_flags;
93 	struct buf *spc_bp;
94 	sw_pager_t spc_swp;
95 	vm_offset_t spc_kva;
96 	int spc_count;
97 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
98 } swcleanlist[NPENDINGIO];
99 
100 
101 /* spc_flags values */
102 #define SPC_ERROR	0x01
103 
104 #define SWB_EMPTY (-1)
105 
106 struct swpclean swap_pager_done;	/* list of completed page cleans */
107 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
108 struct swpclean swap_pager_free;	/* list of free pager clean structs */
109 struct pagerlst swap_pager_list;	/* list of "named" anon regions */
110 struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
111 
112 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
113 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
114 int swap_pager_needflags;
115 
116 struct pagerlst *swp_qs[] = {
117 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
118 };
119 
120 int swap_pager_putmulti();
121 
122 struct pagerops swappagerops = {
123 	swap_pager_init,
124 	swap_pager_alloc,
125 	swap_pager_dealloc,
126 	swap_pager_getpage,
127 	swap_pager_getmulti,
128 	swap_pager_putpage,
129 	swap_pager_putmulti,
130 	swap_pager_haspage
131 };
132 
133 int npendingio = NPENDINGIO;
134 int require_swap_init;
135 void swap_pager_finish();
136 int dmmin, dmmax;
137 
138 static inline void
139 swapsizecheck()
140 {
141 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
142 		if (swap_pager_full == 0)
143 			printf("swap_pager: out of space\n");
144 		swap_pager_full = 1;
145 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
146 		swap_pager_full = 0;
147 }
148 
149 void
150 swap_pager_init()
151 {
152 	dfltpagerops = &swappagerops;
153 
154 	TAILQ_INIT(&swap_pager_list);
155 	TAILQ_INIT(&swap_pager_un_list);
156 
157 	/*
158 	 * Initialize clean lists
159 	 */
160 	TAILQ_INIT(&swap_pager_inuse);
161 	TAILQ_INIT(&swap_pager_done);
162 	TAILQ_INIT(&swap_pager_free);
163 
164 	require_swap_init = 1;
165 
166 	/*
167 	 * Calculate the swap allocation constants.
168 	 */
169 
170 	dmmin = CLBYTES / DEV_BSIZE;
171 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
172 
173 }
174 
175 /*
176  * Allocate a pager structure and associated resources.
177  * Note that if we are called from the pageout daemon (handle == NULL)
178  * we should not wait for memory as it could resulting in deadlock.
179  */
180 vm_pager_t
181 swap_pager_alloc(handle, size, prot, offset)
182 	void *handle;
183 	register vm_size_t size;
184 	vm_prot_t prot;
185 	vm_offset_t offset;
186 {
187 	register vm_pager_t pager;
188 	register sw_pager_t swp;
189 	int waitok;
190 	int i, j;
191 
192 	if (require_swap_init) {
193 		swp_clean_t spc;
194 		struct buf *bp;
195 
196 		/*
197 		 * kva's are allocated here so that we dont need to keep doing
198 		 * kmem_alloc pageables at runtime
199 		 */
200 		for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
201 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
202 			if (!spc->spc_kva) {
203 				break;
204 			}
205 			spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
206 			if (!spc->spc_bp) {
207 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
208 				break;
209 			}
210 			spc->spc_flags = 0;
211 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
212 		}
213 		require_swap_init = 0;
214 		if (size == 0)
215 			return (NULL);
216 	}
217 	/*
218 	 * If this is a "named" anonymous region, look it up and return the
219 	 * appropriate pager if it exists.
220 	 */
221 	if (handle) {
222 		pager = vm_pager_lookup(&swap_pager_list, handle);
223 		if (pager != NULL) {
224 			/*
225 			 * Use vm_object_lookup to gain a reference to the
226 			 * object and also to remove from the object cache.
227 			 */
228 			if (vm_object_lookup(pager) == NULL)
229 				panic("swap_pager_alloc: bad object");
230 			return (pager);
231 		}
232 	}
233 	/*
234 	 * Pager doesn't exist, allocate swap management resources and
235 	 * initialize.
236 	 */
237 	waitok = handle ? M_WAITOK : M_KERNEL;
238 	pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok);
239 	if (pager == NULL)
240 		return (NULL);
241 	swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok);
242 	if (swp == NULL) {
243 		free((caddr_t) pager, M_VMPAGER);
244 		return (NULL);
245 	}
246 	size = round_page(size);
247 	swp->sw_osize = size;
248 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE);
249 	swp->sw_blocks = (sw_blk_t)
250 	    malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks),
251 	    M_VMPGDATA, waitok);
252 	if (swp->sw_blocks == NULL) {
253 		free((caddr_t) swp, M_VMPGDATA);
254 		free((caddr_t) pager, M_VMPAGER);
255 		return (NULL);
256 	}
257 	for (i = 0; i < swp->sw_nblocks; i++) {
258 		swp->sw_blocks[i].swb_valid = 0;
259 		swp->sw_blocks[i].swb_locked = 0;
260 		for (j = 0; j < SWB_NPAGES; j++)
261 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
262 	}
263 
264 	swp->sw_poip = 0;
265 	swp->sw_allocsize = 0;
266 	if (handle) {
267 		vm_object_t object;
268 
269 		swp->sw_flags = SW_NAMED;
270 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
271 		/*
272 		 * Consistant with other pagers: return with object
273 		 * referenced.  Can't do this with handle == NULL since it
274 		 * might be the pageout daemon calling.
275 		 */
276 		object = vm_object_allocate(offset + size);
277 		object->flags &= ~OBJ_INTERNAL;
278 		vm_object_enter(object, pager);
279 		object->pager = pager;
280 	} else {
281 		swp->sw_flags = 0;
282 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
283 	}
284 	pager->pg_handle = handle;
285 	pager->pg_ops = &swappagerops;
286 	pager->pg_type = PG_SWAP;
287 	pager->pg_data = (caddr_t) swp;
288 
289 	return (pager);
290 }
291 
292 /*
293  * returns disk block associated with pager and offset
294  * additionally, as a side effect returns a flag indicating
295  * if the block has been written
296  */
297 
298 inline static int *
299 swap_pager_diskaddr(swp, offset, valid)
300 	sw_pager_t swp;
301 	vm_offset_t offset;
302 	int *valid;
303 {
304 	register sw_blk_t swb;
305 	int ix;
306 
307 	if (valid)
308 		*valid = 0;
309 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
310 	if ((swp->sw_blocks == NULL) || (ix >= swp->sw_nblocks) ||
311 			(offset >= swp->sw_osize)) {
312 		return (FALSE);
313 	}
314 	swb = &swp->sw_blocks[ix];
315 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
316 	if (valid)
317 		*valid = swb->swb_valid & (1 << ix);
318 	return &swb->swb_block[ix];
319 }
320 
321 /*
322  * Utility routine to set the valid (written) bit for
323  * a block associated with a pager and offset
324  */
325 static void
326 swap_pager_setvalid(swp, offset, valid)
327 	sw_pager_t swp;
328 	vm_offset_t offset;
329 	int valid;
330 {
331 	register sw_blk_t swb;
332 	int ix;
333 
334 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
335 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
336 		return;
337 
338 	swb = &swp->sw_blocks[ix];
339 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
340 	if (valid)
341 		swb->swb_valid |= (1 << ix);
342 	else
343 		swb->swb_valid &= ~(1 << ix);
344 	return;
345 }
346 
347 /*
348  * this routine allocates swap space with a fragmentation
349  * minimization policy.
350  */
351 int
352 swap_pager_getswapspace(sw_pager_t swp, unsigned amount, unsigned *rtval)
353 {
354 	vm_swap_size -= amount;
355 	if (!rlist_alloc(&swaplist, amount, rtval)) {
356 		vm_swap_size += amount;
357 		return 0;
358 	} else {
359 		swapsizecheck();
360 		swp->sw_allocsize += amount;
361 		return 1;
362 	}
363 }
364 
365 /*
366  * this routine frees swap space with a fragmentation
367  * minimization policy.
368  */
369 void
370 swap_pager_freeswapspace(sw_pager_t swp, unsigned from, unsigned to)
371 {
372 	rlist_free(&swaplist, from, to);
373 	vm_swap_size += (to - from) + 1;
374 	swp->sw_allocsize -= (to - from) + 1;
375 	swapsizecheck();
376 }
377 /*
378  * this routine frees swap blocks from a specified pager
379  */
380 void
381 _swap_pager_freespace(swp, start, size)
382 	sw_pager_t swp;
383 	vm_offset_t start;
384 	vm_offset_t size;
385 {
386 	vm_offset_t i;
387 	int s;
388 
389 	s = splbio();
390 	for (i = start; i < round_page(start + size); i += PAGE_SIZE) {
391 		int valid;
392 		int *addr = swap_pager_diskaddr(swp, i, &valid);
393 
394 		if (addr && *addr != SWB_EMPTY) {
395 			swap_pager_freeswapspace(swp, *addr, *addr + btodb(PAGE_SIZE) - 1);
396 			if (valid) {
397 				swap_pager_setvalid(swp, i, 0);
398 			}
399 			*addr = SWB_EMPTY;
400 		}
401 	}
402 	splx(s);
403 }
404 
405 void
406 swap_pager_freespace(pager, start, size)
407 	vm_pager_t pager;
408 	vm_offset_t start;
409 	vm_offset_t size;
410 {
411 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
412 }
413 
414 static void
415 swap_pager_free_swap(swp)
416 	sw_pager_t swp;
417 {
418 	register int i, j;
419 	register sw_blk_t bp;
420 	int first_block=0, block_count=0;
421 	int s;
422 	/*
423 	 * Free left over swap blocks
424 	 */
425 	s = splbio();
426 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
427 		for (j = 0; j < SWB_NPAGES; j++) {
428 			if (bp->swb_block[j] != SWB_EMPTY) {
429 				/*
430 				 * initially the length of the run is zero
431 				 */
432 				if( block_count == 0) {
433 					first_block = bp->swb_block[j];
434 					block_count = btodb(PAGE_SIZE);
435 					bp->swb_block[j] = SWB_EMPTY;
436 				/*
437 				 * if the new block can be included into the current run
438 				 */
439 				} else if( bp->swb_block[j] == first_block + block_count) {
440 					block_count += btodb(PAGE_SIZE);
441 					bp->swb_block[j] = SWB_EMPTY;
442 				/*
443 				 * terminate the previous run, and start a new one
444 				 */
445 				} else {
446 					swap_pager_freeswapspace(swp, first_block,
447 				   	 (unsigned) first_block + block_count - 1);
448 					first_block = bp->swb_block[j];
449 					block_count = btodb(PAGE_SIZE);
450 					bp->swb_block[j] = SWB_EMPTY;
451 				}
452 			}
453 		}
454 	}
455 
456 	if( block_count) {
457 		swap_pager_freeswapspace(swp, first_block,
458 		   	 (unsigned) first_block + block_count - 1);
459 	}
460 	splx(s);
461 }
462 
463 
464 /*
465  * swap_pager_reclaim frees up over-allocated space from all pagers
466  * this eliminates internal fragmentation due to allocation of space
467  * for segments that are never swapped to. It has been written so that
468  * it does not block until the rlist_free operation occurs; it keeps
469  * the queues consistant.
470  */
471 
472 /*
473  * Maximum number of blocks (pages) to reclaim per pass
474  */
475 #define MAXRECLAIM 128
476 
477 void
478 swap_pager_reclaim()
479 {
480 	vm_pager_t p;
481 	sw_pager_t swp;
482 	int i, j, k;
483 	int s;
484 	int reclaimcount;
485 	static struct {
486 		int	address;
487 		sw_pager_t pager;
488 	} reclaims[MAXRECLAIM];
489 	static int in_reclaim;
490 
491 	/*
492 	 * allow only one process to be in the swap_pager_reclaim subroutine
493 	 */
494 	s = splbio();
495 	if (in_reclaim) {
496 		tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
497 		splx(s);
498 		return;
499 	}
500 	in_reclaim = 1;
501 	reclaimcount = 0;
502 
503 	/* for each pager queue */
504 	for (k = 0; swp_qs[k]; k++) {
505 
506 		p = swp_qs[k]->tqh_first;
507 		while (p && (reclaimcount < MAXRECLAIM)) {
508 
509 			/*
510 			 * see if any blocks associated with a pager has been
511 			 * allocated but not used (written)
512 			 */
513 			swp = (sw_pager_t) p->pg_data;
514 			for (i = 0; i < swp->sw_nblocks; i++) {
515 				sw_blk_t swb = &swp->sw_blocks[i];
516 
517 				if (swb->swb_locked)
518 					continue;
519 				for (j = 0; j < SWB_NPAGES; j++) {
520 					if (swb->swb_block[j] != SWB_EMPTY &&
521 					    (swb->swb_valid & (1 << j)) == 0) {
522 						reclaims[reclaimcount].address = swb->swb_block[j];
523 						reclaims[reclaimcount++].pager = swp;
524 						swb->swb_block[j] = SWB_EMPTY;
525 						if (reclaimcount >= MAXRECLAIM)
526 							goto rfinished;
527 					}
528 				}
529 			}
530 			p = p->pg_list.tqe_next;
531 		}
532 	}
533 
534 rfinished:
535 
536 	/*
537 	 * free the blocks that have been added to the reclaim list
538 	 */
539 	for (i = 0; i < reclaimcount; i++) {
540 		swap_pager_freeswapspace(reclaims[i].pager, reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
541 	}
542 	splx(s);
543 	in_reclaim = 0;
544 	wakeup((caddr_t) &in_reclaim);
545 }
546 
547 
548 /*
549  * swap_pager_copy copies blocks from one pager to another and
550  * destroys the source pager
551  */
552 
553 void
554 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
555 	vm_pager_t srcpager;
556 	vm_offset_t srcoffset;
557 	vm_pager_t dstpager;
558 	vm_offset_t dstoffset;
559 	vm_offset_t offset;
560 {
561 	sw_pager_t srcswp, dstswp;
562 	vm_offset_t i;
563 	int origsize;
564 	int s;
565 
566 	if (vm_swap_size)
567 		no_swap_space = 0;
568 
569 	if (no_swap_space)
570 		return;
571 
572 	srcswp = (sw_pager_t) srcpager->pg_data;
573 	origsize = srcswp->sw_allocsize;
574 	dstswp = (sw_pager_t) dstpager->pg_data;
575 
576 	/*
577 	 * remove the source pager from the swap_pager internal queue
578 	 */
579 	s = splbio();
580 	if (srcswp->sw_flags & SW_NAMED) {
581 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
582 		srcswp->sw_flags &= ~SW_NAMED;
583 	} else {
584 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
585 	}
586 
587 	while (srcswp->sw_poip) {
588 		tsleep((caddr_t) srcswp, PVM, "spgout", 0);
589 	}
590 	splx(s);
591 
592 	/*
593 	 * clean all of the pages that are currently active and finished
594 	 */
595 	(void) swap_pager_clean();
596 
597 	s = splbio();
598 	/*
599 	 * transfer source to destination
600 	 */
601 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
602 		int srcvalid, dstvalid;
603 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
604 						    &srcvalid);
605 		int *dstaddrp;
606 
607 		/*
608 		 * see if the source has space allocated
609 		 */
610 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
611 			/*
612 			 * if the source is valid and the dest has no space,
613 			 * then copy the allocation from the srouce to the
614 			 * dest.
615 			 */
616 			if (srcvalid) {
617 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset,
618 							&dstvalid);
619 				/*
620 				 * if the dest already has a valid block,
621 				 * deallocate the source block without
622 				 * copying.
623 				 */
624 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
625 					swap_pager_freeswapspace(dstswp, *dstaddrp,
626 						*dstaddrp + btodb(PAGE_SIZE) - 1);
627 					*dstaddrp = SWB_EMPTY;
628 				}
629 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
630 					*dstaddrp = *srcaddrp;
631 					*srcaddrp = SWB_EMPTY;
632 					dstswp->sw_allocsize += btodb(PAGE_SIZE);
633 					srcswp->sw_allocsize -= btodb(PAGE_SIZE);
634 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
635 				}
636 			}
637 			/*
638 			 * if the source is not empty at this point, then
639 			 * deallocate the space.
640 			 */
641 			if (*srcaddrp != SWB_EMPTY) {
642 				swap_pager_freeswapspace(srcswp, *srcaddrp,
643 					*srcaddrp + btodb(PAGE_SIZE) - 1);
644 				*srcaddrp = SWB_EMPTY;
645 			}
646 		}
647 	}
648 	splx(s);
649 
650 	/*
651 	 * Free left over swap blocks
652 	 */
653 	swap_pager_free_swap(srcswp);
654 
655 	if( srcswp->sw_allocsize)
656 		printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", srcswp->sw_allocsize, origsize);
657 	free((caddr_t) srcswp->sw_blocks, M_VMPGDATA);
658 	srcswp->sw_blocks = 0;
659 	free((caddr_t) srcswp, M_VMPGDATA);
660 	srcpager->pg_data = 0;
661 	free((caddr_t) srcpager, M_VMPAGER);
662 
663 	return;
664 }
665 
666 void
667 swap_pager_dealloc(pager)
668 	vm_pager_t pager;
669 {
670 	register sw_pager_t swp;
671 	int s;
672 
673 	/*
674 	 * Remove from list right away so lookups will fail if we block for
675 	 * pageout completion.
676 	 */
677 	s = splbio();
678 	swp = (sw_pager_t) pager->pg_data;
679 	if (swp->sw_flags & SW_NAMED) {
680 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
681 		swp->sw_flags &= ~SW_NAMED;
682 	} else {
683 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
684 	}
685 	/*
686 	 * Wait for all pageouts to finish and remove all entries from
687 	 * cleaning list.
688 	 */
689 
690 	while (swp->sw_poip) {
691 		tsleep((caddr_t) swp, PVM, "swpout", 0);
692 	}
693 	splx(s);
694 
695 
696 	(void) swap_pager_clean();
697 
698 	/*
699 	 * Free left over swap blocks
700 	 */
701 	swap_pager_free_swap(swp);
702 
703 	if( swp->sw_allocsize)
704 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", swp->sw_allocsize);
705 	/*
706 	 * Free swap management resources
707 	 */
708 	free((caddr_t) swp->sw_blocks, M_VMPGDATA);
709 	swp->sw_blocks = 0;
710 	free((caddr_t) swp, M_VMPGDATA);
711 	pager->pg_data = 0;
712 	free((caddr_t) pager, M_VMPAGER);
713 }
714 
715 /*
716  * swap_pager_getmulti can get multiple pages.
717  */
718 int
719 swap_pager_getmulti(pager, m, count, reqpage, sync)
720 	vm_pager_t pager;
721 	vm_page_t *m;
722 	int count;
723 	int reqpage;
724 	boolean_t sync;
725 {
726 	if (reqpage >= count)
727 		panic("swap_pager_getmulti: reqpage >= count");
728 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
729 }
730 
731 /*
732  * swap_pager_getpage gets individual pages
733  */
734 int
735 swap_pager_getpage(pager, m, sync)
736 	vm_pager_t pager;
737 	vm_page_t m;
738 	boolean_t sync;
739 {
740 	vm_page_t marray[1];
741 
742 	marray[0] = m;
743 	return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0);
744 }
745 
746 int
747 swap_pager_putmulti(pager, m, c, sync, rtvals)
748 	vm_pager_t pager;
749 	vm_page_t *m;
750 	int c;
751 	boolean_t sync;
752 	int *rtvals;
753 {
754 	int flags;
755 
756 	if (pager == NULL) {
757 		(void) swap_pager_clean();
758 		return VM_PAGER_OK;
759 	}
760 	flags = B_WRITE;
761 	if (!sync)
762 		flags |= B_ASYNC;
763 
764 	return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals);
765 }
766 
767 /*
768  * swap_pager_putpage writes individual pages
769  */
770 int
771 swap_pager_putpage(pager, m, sync)
772 	vm_pager_t pager;
773 	vm_page_t m;
774 	boolean_t sync;
775 {
776 	int flags;
777 	vm_page_t marray[1];
778 	int rtvals[1];
779 
780 
781 	if (pager == NULL) {
782 		(void) swap_pager_clean();
783 		return VM_PAGER_OK;
784 	}
785 	marray[0] = m;
786 	flags = B_WRITE;
787 	if (!sync)
788 		flags |= B_ASYNC;
789 
790 	swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals);
791 
792 	return rtvals[0];
793 }
794 
795 static inline int
796 const
797 swap_pager_block_index(swp, offset)
798 	sw_pager_t swp;
799 	vm_offset_t offset;
800 {
801 	return (offset / (SWB_NPAGES * PAGE_SIZE));
802 }
803 
804 static inline int
805 const
806 swap_pager_block_offset(swp, offset)
807 	sw_pager_t swp;
808 	vm_offset_t offset;
809 {
810 	return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE);
811 }
812 
813 /*
814  * _swap_pager_haspage returns TRUE if the pager has data that has
815  * been written out.
816  */
817 static boolean_t
818 _swap_pager_haspage(swp, offset)
819 	sw_pager_t swp;
820 	vm_offset_t offset;
821 {
822 	register sw_blk_t swb;
823 	int ix;
824 
825 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
826 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
827 		return (FALSE);
828 	}
829 	swb = &swp->sw_blocks[ix];
830 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
831 	if (swb->swb_block[ix] != SWB_EMPTY) {
832 		if (swb->swb_valid & (1 << ix))
833 			return TRUE;
834 	}
835 	return (FALSE);
836 }
837 
838 /*
839  * swap_pager_haspage is the externally accessible version of
840  * _swap_pager_haspage above.  this routine takes a vm_pager_t
841  * for an argument instead of sw_pager_t.
842  */
843 boolean_t
844 swap_pager_haspage(pager, offset)
845 	vm_pager_t pager;
846 	vm_offset_t offset;
847 {
848 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
849 }
850 
851 /*
852  * swap_pager_freepage is a convienience routine that clears the busy
853  * bit and deallocates a page.
854  */
855 static void
856 swap_pager_freepage(m)
857 	vm_page_t m;
858 {
859 	PAGE_WAKEUP(m);
860 	vm_page_free(m);
861 }
862 
863 /*
864  * swap_pager_ridpages is a convienience routine that deallocates all
865  * but the required page.  this is usually used in error returns that
866  * need to invalidate the "extra" readahead pages.
867  */
868 static void
869 swap_pager_ridpages(m, count, reqpage)
870 	vm_page_t *m;
871 	int count;
872 	int reqpage;
873 {
874 	int i;
875 
876 	for (i = 0; i < count; i++)
877 		if (i != reqpage)
878 			swap_pager_freepage(m[i]);
879 }
880 
881 /*
882  * swap_pager_iodone1 is the completion routine for both reads and async writes
883  */
884 void
885 swap_pager_iodone1(bp)
886 	struct buf *bp;
887 {
888 	bp->b_flags |= B_DONE;
889 	bp->b_flags &= ~B_ASYNC;
890 	wakeup((caddr_t) bp);
891 }
892 
893 
894 int
895 swap_pager_input(swp, m, count, reqpage)
896 	register sw_pager_t swp;
897 	vm_page_t *m;
898 	int count, reqpage;
899 {
900 	register struct buf *bp;
901 	sw_blk_t swb[count];
902 	register int s;
903 	int i;
904 	boolean_t rv;
905 	vm_offset_t kva, off[count];
906 	swp_clean_t spc;
907 	vm_offset_t paging_offset;
908 	vm_object_t object;
909 	int reqaddr[count];
910 	int sequential;
911 
912 	int first, last;
913 	int failed;
914 	int reqdskregion;
915 
916 	object = m[reqpage]->object;
917 	paging_offset = object->paging_offset;
918 	sequential = (m[reqpage]->offset == (object->last_read + PAGE_SIZE));
919 	/*
920 	 * First determine if the page exists in the pager if this is a sync
921 	 * read.  This quickly handles cases where we are following shadow
922 	 * chains looking for the top level object with the page.
923 	 */
924 	if (swp->sw_blocks == NULL) {
925 		swap_pager_ridpages(m, count, reqpage);
926 		return (VM_PAGER_FAIL);
927 	}
928 	for (i = 0; i < count; i++) {
929 		vm_offset_t foff = m[i]->offset + paging_offset;
930 		int ix = swap_pager_block_index(swp, foff);
931 
932 		if (ix >= swp->sw_nblocks) {
933 			int j;
934 
935 			if (i <= reqpage) {
936 				swap_pager_ridpages(m, count, reqpage);
937 				return (VM_PAGER_FAIL);
938 			}
939 			for (j = i; j < count; j++) {
940 				swap_pager_freepage(m[j]);
941 			}
942 			count = i;
943 			break;
944 		}
945 		swb[i] = &swp->sw_blocks[ix];
946 		off[i] = swap_pager_block_offset(swp, foff);
947 		reqaddr[i] = swb[i]->swb_block[off[i]];
948 	}
949 
950 	/* make sure that our required input request is existant */
951 
952 	if (reqaddr[reqpage] == SWB_EMPTY ||
953 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
954 		swap_pager_ridpages(m, count, reqpage);
955 		return (VM_PAGER_FAIL);
956 	}
957 	reqdskregion = reqaddr[reqpage] / dmmax;
958 
959 	/*
960 	 * search backwards for the first contiguous page to transfer
961 	 */
962 	failed = 0;
963 	first = 0;
964 	for (i = reqpage - 1; i >= 0; --i) {
965 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
966 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
967 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
968 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
969 			failed = 1;
970 			swap_pager_freepage(m[i]);
971 			if (first == 0)
972 				first = i + 1;
973 		}
974 	}
975 	/*
976 	 * search forwards for the last contiguous page to transfer
977 	 */
978 	failed = 0;
979 	last = count;
980 	for (i = reqpage + 1; i < count; i++) {
981 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
982 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
983 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
984 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
985 			failed = 1;
986 			swap_pager_freepage(m[i]);
987 			if (last == count)
988 				last = i;
989 		}
990 	}
991 
992 	count = last;
993 	if (first != 0) {
994 		for (i = first; i < count; i++) {
995 			m[i - first] = m[i];
996 			reqaddr[i - first] = reqaddr[i];
997 			off[i - first] = off[i];
998 		}
999 		count -= first;
1000 		reqpage -= first;
1001 	}
1002 	++swb[reqpage]->swb_locked;
1003 
1004 	/*
1005 	 * at this point: "m" is a pointer to the array of vm_page_t for
1006 	 * paging I/O "count" is the number of vm_page_t entries represented
1007 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
1008 	 * into "m" for the page actually faulted
1009 	 */
1010 
1011 	spc = NULL;	/* we might not use an spc data structure */
1012 
1013 	if ((count == 1) && (swap_pager_free.tqh_first != NULL)) {
1014 		/*
1015 		 * if a kva has not been allocated, we can only do a one page
1016 		 * transfer, so we free the other pages that might have been
1017 		 * allocated by vm_fault.
1018 		 */
1019 		swap_pager_ridpages(m, count, reqpage);
1020 		m[0] = m[reqpage];
1021 		reqaddr[0] = reqaddr[reqpage];
1022 
1023 		count = 1;
1024 		reqpage = 0;
1025 		/*
1026 		 * get a swap pager clean data structure, block until we get
1027 		 * it
1028 		 */
1029 		if (swap_pager_free.tqh_first == NULL) {
1030 			s = splbio();
1031 			if (curproc == pageproc)
1032 				(void) swap_pager_clean();
1033 			else
1034 				pagedaemon_wakeup();
1035 			while (swap_pager_free.tqh_first == NULL) {
1036 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1037 				if (curproc == pageproc)
1038 					swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1039 				tsleep((caddr_t) &swap_pager_free,
1040 				    PVM, "swpfre", 0);
1041 				if (curproc == pageproc)
1042 					(void) swap_pager_clean();
1043 				else
1044 					pagedaemon_wakeup();
1045 			}
1046 			splx(s);
1047 		}
1048 		spc = swap_pager_free.tqh_first;
1049 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1050 		kva = spc->spc_kva;
1051 		bp = spc->spc_bp;
1052 		bzero(bp, sizeof *bp);
1053 		bp->b_spc = spc;
1054 		bp->b_vnbufs.le_next = NOLIST;
1055 	} else {
1056 		/*
1057 		 * Get a swap buffer header to perform the IO
1058 		 */
1059 		bp = getpbuf();
1060 		kva = (vm_offset_t) bp->b_data;
1061 	}
1062 
1063 	/*
1064 	 * map our page(s) into kva for input
1065 	 */
1066 	pmap_qenter(kva, m, count);
1067 
1068 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
1069 	bp->b_iodone = swap_pager_iodone1;
1070 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1071 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1072 	crhold(bp->b_rcred);
1073 	crhold(bp->b_wcred);
1074 	bp->b_un.b_addr = (caddr_t) kva;
1075 	bp->b_blkno = reqaddr[0];
1076 	bp->b_bcount = PAGE_SIZE * count;
1077 	bp->b_bufsize = PAGE_SIZE * count;
1078 
1079 	pbgetvp(swapdev_vp, bp);
1080 	swp->sw_piip++;
1081 
1082 	cnt.v_swapin++;
1083 	cnt.v_swappgsin += count;
1084 	/*
1085 	 * perform the I/O
1086 	 */
1087 	VOP_STRATEGY(bp);
1088 
1089 	/*
1090 	 * wait for the sync I/O to complete
1091 	 */
1092 	s = splbio();
1093 	while ((bp->b_flags & B_DONE) == 0) {
1094 		tsleep((caddr_t) bp, PVM, "swread", 0);
1095 	}
1096 
1097 	if (bp->b_flags & B_ERROR) {
1098 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
1099 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1100 		rv = VM_PAGER_ERROR;
1101 	} else {
1102 		rv = VM_PAGER_OK;
1103 	}
1104 
1105 	--swp->sw_piip;
1106 	if (swp->sw_piip == 0)
1107 		wakeup((caddr_t) swp);
1108 
1109 
1110 	/*
1111 	 * relpbuf does this, but we maintain our own buffer list also...
1112 	 */
1113 	if (bp->b_vp)
1114 		pbrelvp(bp);
1115 
1116 	splx(s);
1117 	--swb[reqpage]->swb_locked;
1118 
1119 	/*
1120 	 * remove the mapping for kernel virtual
1121 	 */
1122 	pmap_qremove(kva, count);
1123 
1124 	if (spc) {
1125 		m[reqpage]->object->last_read = m[reqpage]->offset;
1126 		if (bp->b_flags & B_WANTED)
1127 			wakeup((caddr_t) bp);
1128 		/*
1129 		 * if we have used an spc, we need to free it.
1130 		 */
1131 		if (bp->b_rcred != NOCRED)
1132 			crfree(bp->b_rcred);
1133 		if (bp->b_wcred != NOCRED)
1134 			crfree(bp->b_wcred);
1135 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1136 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1137 			wakeup((caddr_t) &swap_pager_free);
1138 		}
1139 		if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1140 			pagedaemon_wakeup();
1141 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1142 	} else {
1143 		/*
1144 		 * release the physical I/O buffer
1145 		 */
1146 		relpbuf(bp);
1147 		/*
1148 		 * finish up input if everything is ok
1149 		 */
1150 		if (rv == VM_PAGER_OK) {
1151 			for (i = 0; i < count; i++) {
1152 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1153 				m[i]->dirty = 0;
1154 				if (i != reqpage) {
1155 					/*
1156 					 * whether or not to leave the page
1157 					 * activated is up in the air, but we
1158 					 * should put the page on a page queue
1159 					 * somewhere. (it already is in the
1160 					 * object). After some emperical
1161 					 * results, it is best to deactivate
1162 					 * the readahead pages.
1163 					 */
1164 					vm_page_deactivate(m[i]);
1165 
1166 					/*
1167 					 * just in case someone was asking for
1168 					 * this page we now tell them that it
1169 					 * is ok to use
1170 					 */
1171 					m[i]->valid = VM_PAGE_BITS_ALL;
1172 					PAGE_WAKEUP(m[i]);
1173 				}
1174 			}
1175 
1176 			m[reqpage]->object->last_read = m[count-1]->offset;
1177 
1178 			/*
1179 			 * If we're out of swap space, then attempt to free
1180 			 * some whenever pages are brought in. We must clear
1181 			 * the clean flag so that the page contents will be
1182 			 * preserved.
1183 			 */
1184 			if (swap_pager_full) {
1185 				for (i = 0; i < count; i++) {
1186 					m[i]->dirty = VM_PAGE_BITS_ALL;
1187 				}
1188 				_swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE);
1189 			}
1190 		} else {
1191 			swap_pager_ridpages(m, count, reqpage);
1192 		}
1193 	}
1194 	return (rv);
1195 }
1196 
1197 int
1198 swap_pager_output(swp, m, count, flags, rtvals)
1199 	register sw_pager_t swp;
1200 	vm_page_t *m;
1201 	int count;
1202 	int flags;
1203 	int *rtvals;
1204 {
1205 	register struct buf *bp;
1206 	sw_blk_t swb[count];
1207 	register int s;
1208 	int i, j, ix;
1209 	boolean_t rv;
1210 	vm_offset_t kva, off, foff;
1211 	swp_clean_t spc;
1212 	vm_offset_t paging_offset;
1213 	vm_object_t object;
1214 	int reqaddr[count];
1215 	int failed;
1216 
1217 	if (vm_swap_size)
1218 		no_swap_space = 0;
1219 	if (no_swap_space) {
1220 		for (i = 0; i < count; i++)
1221 			rtvals[i] = VM_PAGER_FAIL;
1222 		return VM_PAGER_FAIL;
1223 	}
1224 	spc = NULL;
1225 
1226 	object = m[0]->object;
1227 	paging_offset = object->paging_offset;
1228 
1229 	failed = 0;
1230 	for (j = 0; j < count; j++) {
1231 		foff = m[j]->offset + paging_offset;
1232 		ix = swap_pager_block_index(swp, foff);
1233 		swb[j] = 0;
1234 		if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
1235 			rtvals[j] = VM_PAGER_FAIL;
1236 			failed = 1;
1237 			continue;
1238 		} else {
1239 			rtvals[j] = VM_PAGER_OK;
1240 		}
1241 		swb[j] = &swp->sw_blocks[ix];
1242 		++swb[j]->swb_locked;
1243 		if (failed) {
1244 			rtvals[j] = VM_PAGER_FAIL;
1245 			continue;
1246 		}
1247 		off = swap_pager_block_offset(swp, foff);
1248 		reqaddr[j] = swb[j]->swb_block[off];
1249 		if (reqaddr[j] == SWB_EMPTY) {
1250 			int blk;
1251 			int tries;
1252 			int ntoget;
1253 
1254 			tries = 0;
1255 			s = splbio();
1256 
1257 			/*
1258 			 * if any other pages have been allocated in this
1259 			 * block, we only try to get one page.
1260 			 */
1261 			for (i = 0; i < SWB_NPAGES; i++) {
1262 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1263 					break;
1264 			}
1265 
1266 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1267 			/*
1268 			 * this code is alittle conservative, but works (the
1269 			 * intent of this code is to allocate small chunks for
1270 			 * small objects)
1271 			 */
1272 			if ((foff == 0) &&
1273 				((ntoget * PAGE_SIZE) > object->size)) {
1274 				ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE;
1275 			}
1276 	retrygetspace:
1277 			if (!swap_pager_full && ntoget > 1 &&
1278 			    swap_pager_getswapspace(swp, ntoget * btodb(PAGE_SIZE), &blk)) {
1279 
1280 				for (i = 0; i < ntoget; i++) {
1281 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1282 					swb[j]->swb_valid = 0;
1283 				}
1284 
1285 				reqaddr[j] = swb[j]->swb_block[off];
1286 			} else if (!swap_pager_getswapspace(swp, btodb(PAGE_SIZE),
1287 				&swb[j]->swb_block[off])) {
1288 				/*
1289 				 * if the allocation has failed, we try to
1290 				 * reclaim space and retry.
1291 				 */
1292 				if (++tries == 1) {
1293 					swap_pager_reclaim();
1294 					goto retrygetspace;
1295 				}
1296 				rtvals[j] = VM_PAGER_AGAIN;
1297 				failed = 1;
1298 				swap_pager_full = 1;
1299 			} else {
1300 				reqaddr[j] = swb[j]->swb_block[off];
1301 				swb[j]->swb_valid &= ~(1 << off);
1302 			}
1303 			splx(s);
1304 		}
1305 	}
1306 
1307 	/*
1308 	 * search forwards for the last contiguous page to transfer
1309 	 */
1310 	failed = 0;
1311 	for (i = 0; i < count; i++) {
1312 		if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1313 		    (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1314 		    (rtvals[i] != VM_PAGER_OK)) {
1315 			failed = 1;
1316 			if (rtvals[i] == VM_PAGER_OK)
1317 				rtvals[i] = VM_PAGER_AGAIN;
1318 		}
1319 	}
1320 
1321 	for (i = 0; i < count; i++) {
1322 		if (rtvals[i] != VM_PAGER_OK) {
1323 			if (swb[i])
1324 				--swb[i]->swb_locked;
1325 		}
1326 	}
1327 
1328 	for (i = 0; i < count; i++)
1329 		if (rtvals[i] != VM_PAGER_OK)
1330 			break;
1331 
1332 	if (i == 0) {
1333 		return VM_PAGER_AGAIN;
1334 	}
1335 	count = i;
1336 	for (i = 0; i < count; i++) {
1337 		if (reqaddr[i] == SWB_EMPTY)
1338 			printf("I/O to empty block????\n");
1339 	}
1340 
1341 	/*
1342 	 * For synchronous writes, we clean up all completed async pageouts.
1343 	 */
1344 	if ((flags & B_ASYNC) == 0) {
1345 		swap_pager_clean();
1346 	}
1347 	kva = 0;
1348 
1349 	/*
1350 	 * get a swap pager clean data structure, block until we get it
1351 	 */
1352 	if (swap_pager_free.tqh_first == NULL ||
1353 		swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1354 		swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1355 		s = splbio();
1356 		if (curproc == pageproc) {
1357 			(void) swap_pager_clean();
1358 #if 0
1359 			splx(s);
1360 			return VM_PAGER_AGAIN;
1361 #endif
1362 		} else
1363 			pagedaemon_wakeup();
1364 		while (swap_pager_free.tqh_first == NULL ||
1365 			swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1366 			swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1367 			if (curproc == pageproc) {
1368 				swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1369 			    if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved)
1370 					wakeup((caddr_t) &cnt.v_free_count);
1371 			}
1372 
1373 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1374 			tsleep((caddr_t) &swap_pager_free,
1375 			    PVM, "swpfre", 0);
1376 			if (curproc == pageproc)
1377 				(void) swap_pager_clean();
1378 			else
1379 				pagedaemon_wakeup();
1380 		}
1381 		splx(s);
1382 	}
1383 	spc = swap_pager_free.tqh_first;
1384 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1385 
1386 	kva = spc->spc_kva;
1387 
1388 	/*
1389 	 * map our page(s) into kva for I/O
1390 	 */
1391 	pmap_qenter(kva, m, count);
1392 
1393 	/*
1394 	 * get the base I/O offset into the swap file
1395 	 */
1396 	for (i = 0; i < count; i++) {
1397 		foff = m[i]->offset + paging_offset;
1398 		off = swap_pager_block_offset(swp, foff);
1399 		/*
1400 		 * set the valid bit
1401 		 */
1402 		swb[i]->swb_valid |= (1 << off);
1403 		/*
1404 		 * and unlock the data structure
1405 		 */
1406 		--swb[i]->swb_locked;
1407 	}
1408 
1409 	/*
1410 	 * Get a swap buffer header and perform the IO
1411 	 */
1412 	bp = spc->spc_bp;
1413 	bzero(bp, sizeof *bp);
1414 	bp->b_spc = spc;
1415 	bp->b_vnbufs.le_next = NOLIST;
1416 
1417 	bp->b_flags = B_BUSY | B_PAGING;
1418 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1419 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1420 	if (bp->b_rcred != NOCRED)
1421 		crhold(bp->b_rcred);
1422 	if (bp->b_wcred != NOCRED)
1423 		crhold(bp->b_wcred);
1424 	bp->b_data = (caddr_t) kva;
1425 	bp->b_blkno = reqaddr[0];
1426 	pbgetvp(swapdev_vp, bp);
1427 
1428 	bp->b_bcount = PAGE_SIZE * count;
1429 	bp->b_bufsize = PAGE_SIZE * count;
1430 	swapdev_vp->v_numoutput++;
1431 
1432 	/*
1433 	 * If this is an async write we set up additional buffer fields and
1434 	 * place a "cleaning" entry on the inuse queue.
1435 	 */
1436 	s = splbio();
1437 	if (flags & B_ASYNC) {
1438 		spc->spc_flags = 0;
1439 		spc->spc_swp = swp;
1440 		for (i = 0; i < count; i++)
1441 			spc->spc_m[i] = m[i];
1442 		spc->spc_count = count;
1443 		/*
1444 		 * the completion routine for async writes
1445 		 */
1446 		bp->b_flags |= B_CALL;
1447 		bp->b_iodone = swap_pager_iodone;
1448 		bp->b_dirtyoff = 0;
1449 		bp->b_dirtyend = bp->b_bcount;
1450 		swp->sw_poip++;
1451 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1452 	} else {
1453 		swp->sw_poip++;
1454 		bp->b_flags |= B_CALL;
1455 		bp->b_iodone = swap_pager_iodone1;
1456 	}
1457 
1458 	cnt.v_swapout++;
1459 	cnt.v_swappgsout += count;
1460 	/*
1461 	 * perform the I/O
1462 	 */
1463 	VOP_STRATEGY(bp);
1464 	if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) {
1465 		if ((bp->b_flags & B_DONE) == B_DONE) {
1466 			swap_pager_clean();
1467 		}
1468 		splx(s);
1469 		for (i = 0; i < count; i++) {
1470 			rtvals[i] = VM_PAGER_PEND;
1471 		}
1472 		return VM_PAGER_PEND;
1473 	}
1474 	/*
1475 	 * wait for the sync I/O to complete
1476 	 */
1477 	while ((bp->b_flags & B_DONE) == 0) {
1478 		tsleep((caddr_t) bp, PVM, "swwrt", 0);
1479 	}
1480 	if (bp->b_flags & B_ERROR) {
1481 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1482 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1483 		rv = VM_PAGER_ERROR;
1484 	} else {
1485 		rv = VM_PAGER_OK;
1486 	}
1487 
1488 	--swp->sw_poip;
1489 	if (swp->sw_poip == 0)
1490 		wakeup((caddr_t) swp);
1491 
1492 	if (bp->b_vp)
1493 		pbrelvp(bp);
1494 	if (bp->b_flags & B_WANTED)
1495 		wakeup((caddr_t) bp);
1496 
1497 	splx(s);
1498 
1499 	/*
1500 	 * remove the mapping for kernel virtual
1501 	 */
1502 	pmap_qremove(kva, count);
1503 
1504 	/*
1505 	 * if we have written the page, then indicate that the page is clean.
1506 	 */
1507 	if (rv == VM_PAGER_OK) {
1508 		for (i = 0; i < count; i++) {
1509 			if (rtvals[i] == VM_PAGER_OK) {
1510 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1511 				m[i]->dirty = 0;
1512 				/*
1513 				 * optimization, if a page has been read
1514 				 * during the pageout process, we activate it.
1515 				 */
1516 				if ((m[i]->flags & PG_ACTIVE) == 0 &&
1517 				    ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1518 				    pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1519 					vm_page_activate(m[i]);
1520 				}
1521 			}
1522 		}
1523 	} else {
1524 		for (i = 0; i < count; i++) {
1525 			rtvals[i] = rv;
1526 		}
1527 	}
1528 
1529 	if (bp->b_rcred != NOCRED)
1530 		crfree(bp->b_rcred);
1531 	if (bp->b_wcred != NOCRED)
1532 		crfree(bp->b_wcred);
1533 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1534 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1535 		wakeup((caddr_t) &swap_pager_free);
1536 	}
1537 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1538 		pagedaemon_wakeup();
1539 	swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1540 	return (rv);
1541 }
1542 
1543 boolean_t
1544 swap_pager_clean()
1545 {
1546 	register swp_clean_t spc, tspc;
1547 	register int s;
1548 
1549 	tspc = NULL;
1550 	if (swap_pager_done.tqh_first == NULL)
1551 		return FALSE;
1552 	for (;;) {
1553 		s = splbio();
1554 		/*
1555 		 * Look up and removal from done list must be done at splbio()
1556 		 * to avoid conflicts with swap_pager_iodone.
1557 		 */
1558 		while ((spc = swap_pager_done.tqh_first) != 0) {
1559 			pmap_qremove(spc->spc_kva, spc->spc_count);
1560 			swap_pager_finish(spc);
1561 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1562 			goto doclean;
1563 		}
1564 
1565 		/*
1566 		 * No operations done, thats all we can do for now.
1567 		 */
1568 
1569 		splx(s);
1570 		break;
1571 
1572 		/*
1573 		 * The desired page was found to be busy earlier in the scan
1574 		 * but has since completed.
1575 		 */
1576 doclean:
1577 		if (tspc && tspc == spc) {
1578 			tspc = NULL;
1579 		}
1580 		spc->spc_flags = 0;
1581 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1582 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1583 			wakeup((caddr_t) &swap_pager_free);
1584 		}
1585 		if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1586 			pagedaemon_wakeup();
1587 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1588 		splx(s);
1589 	}
1590 
1591 	return (tspc ? TRUE : FALSE);
1592 }
1593 
1594 void
1595 swap_pager_finish(spc)
1596 	register swp_clean_t spc;
1597 {
1598 	vm_object_t object = spc->spc_m[0]->object;
1599 	int i;
1600 
1601 	object->paging_in_progress -= spc->spc_count;
1602 	if ((object->paging_in_progress == 0) &&
1603 	    (object->flags & OBJ_PIPWNT)) {
1604 		object->flags &= ~OBJ_PIPWNT;
1605 		thread_wakeup((int) object);
1606 	}
1607 
1608 	/*
1609 	 * If no error, mark as clean and inform the pmap system. If error,
1610 	 * mark as dirty so we will try again. (XXX could get stuck doing
1611 	 * this, should give up after awhile)
1612 	 */
1613 	if (spc->spc_flags & SPC_ERROR) {
1614 		for (i = 0; i < spc->spc_count; i++) {
1615 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1616 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1617 		}
1618 	} else {
1619 		for (i = 0; i < spc->spc_count; i++) {
1620 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1621 			spc->spc_m[i]->dirty = 0;
1622 			if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
1623 			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1624 				vm_page_activate(spc->spc_m[i]);
1625 		}
1626 	}
1627 
1628 
1629 	for (i = 0; i < spc->spc_count; i++) {
1630 		/*
1631 		 * we wakeup any processes that are waiting on these pages.
1632 		 */
1633 		PAGE_WAKEUP(spc->spc_m[i]);
1634 	}
1635 	nswiodone -= spc->spc_count;
1636 
1637 	return;
1638 }
1639 
1640 /*
1641  * swap_pager_iodone
1642  */
1643 void
1644 swap_pager_iodone(bp)
1645 	register struct buf *bp;
1646 {
1647 	register swp_clean_t spc;
1648 	int s;
1649 
1650 	s = splbio();
1651 	spc = (swp_clean_t) bp->b_spc;
1652 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1653 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1654 	if (bp->b_flags & B_ERROR) {
1655 		spc->spc_flags |= SPC_ERROR;
1656 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1657 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1658 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1659 	}
1660 
1661 	if (bp->b_vp)
1662 		pbrelvp(bp);
1663 
1664 	if (bp->b_flags & B_WANTED)
1665 		wakeup((caddr_t) bp);
1666 
1667 	if (bp->b_rcred != NOCRED)
1668 		crfree(bp->b_rcred);
1669 	if (bp->b_wcred != NOCRED)
1670 		crfree(bp->b_wcred);
1671 
1672 	nswiodone += spc->spc_count;
1673 	if (--spc->spc_swp->sw_poip == 0) {
1674 		wakeup((caddr_t) spc->spc_swp);
1675 	}
1676 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1677 	    swap_pager_inuse.tqh_first == 0) {
1678 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1679 		wakeup((caddr_t) &swap_pager_free);
1680 	}
1681 
1682 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1683 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1684 		pagedaemon_wakeup();
1685 	}
1686 
1687 	if (vm_pageout_pages_needed) {
1688 		wakeup((caddr_t) &vm_pageout_pages_needed);
1689 		vm_pageout_pages_needed = 0;
1690 	}
1691 	if ((swap_pager_inuse.tqh_first == NULL) ||
1692 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1693 	    nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1694 		pagedaemon_wakeup();
1695 	}
1696 	splx(s);
1697 }
1698 
1699 /*
1700  * return true if any swap control structures can be allocated
1701  */
1702 int
1703 swap_pager_ready()
1704 {
1705 	if (swap_pager_free.tqh_first)
1706 		return 1;
1707 	else
1708 		return 0;
1709 }
1710