xref: /freebsd/sys/vm/swap_pager.c (revision a316b26e50bbed7cf655fbba726ab87d8ab7599d)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.22 1995/01/09 16:05:33 davidg Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/proc.h>
55 #include <sys/buf.h>
56 #include <sys/vnode.h>
57 #include <sys/malloc.h>
58 
59 #include <miscfs/specfs/specdev.h>
60 #include <sys/rlist.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_pager.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/swap_pager.h>
67 
68 #ifndef NPENDINGIO
69 #define NPENDINGIO	10
70 #endif
71 
72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
74 
75 int nswiodone;
76 extern int vm_pageout_rate_limit;
77 static int cleandone;
78 extern int hz;
79 int swap_pager_full;
80 extern vm_map_t pager_map;
81 extern int vm_swap_size;
82 int no_swap_space = 1;
83 struct rlist *swaplist;
84 int nswaplist;
85 extern int vm_pio_needed;
86 
87 #define MAX_PAGEOUT_CLUSTER 8
88 
89 TAILQ_HEAD(swpclean, swpagerclean);
90 
91 typedef struct swpagerclean *swp_clean_t;
92 
93 struct swpagerclean {
94 	TAILQ_ENTRY(swpagerclean) spc_list;
95 	int spc_flags;
96 	struct buf *spc_bp;
97 	sw_pager_t spc_swp;
98 	vm_offset_t spc_kva;
99 	int spc_count;
100 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
101 } swcleanlist[NPENDINGIO];
102 
103 
104 extern vm_map_t kernel_map;
105 
106 /* spc_flags values */
107 #define SPC_ERROR	0x01
108 
109 #define SWB_EMPTY (-1)
110 
111 struct swpclean swap_pager_done;	/* list of compileted page cleans */
112 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
113 struct swpclean swap_pager_free;	/* list of free pager clean structs */
114 struct pagerlst swap_pager_list;	/* list of "named" anon regions */
115 struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
116 
117 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
118 int swap_pager_needflags;
119 struct rlist *swapfrag;
120 
121 struct pagerlst *swp_qs[] = {
122 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
123 };
124 
125 int swap_pager_putmulti();
126 
127 struct pagerops swappagerops = {
128 	swap_pager_init,
129 	swap_pager_alloc,
130 	swap_pager_dealloc,
131 	swap_pager_getpage,
132 	swap_pager_getmulti,
133 	swap_pager_putpage,
134 	swap_pager_putmulti,
135 	swap_pager_haspage
136 };
137 
138 int npendingio = NPENDINGIO;
139 int pendingiowait;
140 int require_swap_init;
141 void swap_pager_finish();
142 int dmmin, dmmax;
143 extern int vm_page_count;
144 
145 static inline void
146 swapsizecheck()
147 {
148 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
149 		if (swap_pager_full)
150 			printf("swap_pager: out of space\n");
151 		swap_pager_full = 1;
152 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
153 		swap_pager_full = 0;
154 }
155 
156 void
157 swap_pager_init()
158 {
159 	dfltpagerops = &swappagerops;
160 
161 	TAILQ_INIT(&swap_pager_list);
162 	TAILQ_INIT(&swap_pager_un_list);
163 
164 	/*
165 	 * Initialize clean lists
166 	 */
167 	TAILQ_INIT(&swap_pager_inuse);
168 	TAILQ_INIT(&swap_pager_done);
169 	TAILQ_INIT(&swap_pager_free);
170 
171 	require_swap_init = 1;
172 
173 	/*
174 	 * Calculate the swap allocation constants.
175 	 */
176 
177 	dmmin = CLBYTES / DEV_BSIZE;
178 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
179 
180 }
181 
182 /*
183  * Allocate a pager structure and associated resources.
184  * Note that if we are called from the pageout daemon (handle == NULL)
185  * we should not wait for memory as it could resulting in deadlock.
186  */
187 vm_pager_t
188 swap_pager_alloc(handle, size, prot, offset)
189 	caddr_t handle;
190 	register vm_size_t size;
191 	vm_prot_t prot;
192 	vm_offset_t offset;
193 {
194 	register vm_pager_t pager;
195 	register sw_pager_t swp;
196 	int waitok;
197 	int i, j;
198 
199 	if (require_swap_init) {
200 		swp_clean_t spc;
201 		struct buf *bp;
202 
203 #if 0
204 		int desiredpendingio;
205 
206 		desiredpendingio = cnt.v_page_count / 200 + 2;
207 		if (desiredpendingio < npendingio)
208 			npendingio = desiredpendingio;
209 #endif
210 
211 		/*
212 		 * kva's are allocated here so that we dont need to keep doing
213 		 * kmem_alloc pageables at runtime
214 		 */
215 		for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
216 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
217 			if (!spc->spc_kva) {
218 				break;
219 			}
220 			spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_NOWAIT);
221 			if (!spc->spc_bp) {
222 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
223 				break;
224 			}
225 			spc->spc_flags = 0;
226 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
227 		}
228 		require_swap_init = 0;
229 		if (size == 0)
230 			return (NULL);
231 	}
232 	/*
233 	 * If this is a "named" anonymous region, look it up and return the
234 	 * appropriate pager if it exists.
235 	 */
236 	if (handle) {
237 		pager = vm_pager_lookup(&swap_pager_list, handle);
238 		if (pager != NULL) {
239 			/*
240 			 * Use vm_object_lookup to gain a reference to the
241 			 * object and also to remove from the object cache.
242 			 */
243 			if (vm_object_lookup(pager) == NULL)
244 				panic("swap_pager_alloc: bad object");
245 			return (pager);
246 		}
247 	}
248 	/*
249 	 * Pager doesn't exist, allocate swap management resources and
250 	 * initialize.
251 	 */
252 	waitok = handle ? M_WAITOK : M_NOWAIT;
253 	pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok);
254 	if (pager == NULL)
255 		return (NULL);
256 	swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok);
257 	if (swp == NULL) {
258 		free((caddr_t) pager, M_VMPAGER);
259 		return (NULL);
260 	}
261 	size = round_page(size);
262 	swp->sw_osize = size;
263 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE);
264 	swp->sw_blocks = (sw_blk_t)
265 	    malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks),
266 	    M_VMPGDATA, waitok);
267 	if (swp->sw_blocks == NULL) {
268 		free((caddr_t) swp, M_VMPGDATA);
269 		free((caddr_t) pager, M_VMPAGER);
270 		return (NULL);
271 	}
272 	for (i = 0; i < swp->sw_nblocks; i++) {
273 		swp->sw_blocks[i].swb_valid = 0;
274 		swp->sw_blocks[i].swb_locked = 0;
275 		for (j = 0; j < SWB_NPAGES; j++)
276 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
277 	}
278 
279 	swp->sw_poip = 0;
280 	if (handle) {
281 		vm_object_t object;
282 
283 		swp->sw_flags = SW_NAMED;
284 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
285 		/*
286 		 * Consistant with other pagers: return with object
287 		 * referenced.  Can't do this with handle == NULL since it
288 		 * might be the pageout daemon calling.
289 		 */
290 		object = vm_object_allocate(size);
291 		vm_object_enter(object, pager);
292 		vm_object_setpager(object, pager, 0, FALSE);
293 	} else {
294 		swp->sw_flags = 0;
295 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
296 	}
297 	pager->pg_handle = handle;
298 	pager->pg_ops = &swappagerops;
299 	pager->pg_type = PG_SWAP;
300 	pager->pg_data = (caddr_t) swp;
301 
302 	return (pager);
303 }
304 
305 /*
306  * returns disk block associated with pager and offset
307  * additionally, as a side effect returns a flag indicating
308  * if the block has been written
309  */
310 
311 static int *
312 swap_pager_diskaddr(swp, offset, valid)
313 	sw_pager_t swp;
314 	vm_offset_t offset;
315 	int *valid;
316 {
317 	register sw_blk_t swb;
318 	int ix;
319 
320 	if (valid)
321 		*valid = 0;
322 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
323 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
324 		return (FALSE);
325 	}
326 	swb = &swp->sw_blocks[ix];
327 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
328 	if (valid)
329 		*valid = swb->swb_valid & (1 << ix);
330 	return &swb->swb_block[ix];
331 }
332 
333 /*
334  * Utility routine to set the valid (written) bit for
335  * a block associated with a pager and offset
336  */
337 static void
338 swap_pager_setvalid(swp, offset, valid)
339 	sw_pager_t swp;
340 	vm_offset_t offset;
341 	int valid;
342 {
343 	register sw_blk_t swb;
344 	int ix;
345 
346 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
347 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
348 		return;
349 
350 	swb = &swp->sw_blocks[ix];
351 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
352 	if (valid)
353 		swb->swb_valid |= (1 << ix);
354 	else
355 		swb->swb_valid &= ~(1 << ix);
356 	return;
357 }
358 
359 /*
360  * this routine allocates swap space with a fragmentation
361  * minimization policy.
362  */
363 int
364 swap_pager_getswapspace(unsigned amount, unsigned *rtval)
365 {
366 	vm_swap_size -= amount;
367 	if (!rlist_alloc(&swaplist, amount, rtval)) {
368 		vm_swap_size += amount;
369 		return 0;
370 	} else {
371 		swapsizecheck();
372 		return 1;
373 	}
374 }
375 
376 /*
377  * this routine frees swap space with a fragmentation
378  * minimization policy.
379  */
380 void
381 swap_pager_freeswapspace(unsigned from, unsigned to)
382 {
383 	rlist_free(&swaplist, from, to);
384 	vm_swap_size += (to - from) + 1;
385 	swapsizecheck();
386 }
387 /*
388  * this routine frees swap blocks from a specified pager
389  */
390 void
391 _swap_pager_freespace(swp, start, size)
392 	sw_pager_t swp;
393 	vm_offset_t start;
394 	vm_offset_t size;
395 {
396 	vm_offset_t i;
397 	int s;
398 
399 	s = splbio();
400 	for (i = start; i < round_page(start + size); i += PAGE_SIZE) {
401 		int valid;
402 		int *addr = swap_pager_diskaddr(swp, i, &valid);
403 
404 		if (addr && *addr != SWB_EMPTY) {
405 			swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1);
406 			if (valid) {
407 				swap_pager_setvalid(swp, i, 0);
408 			}
409 			*addr = SWB_EMPTY;
410 		}
411 	}
412 	splx(s);
413 }
414 
415 void
416 swap_pager_freespace(pager, start, size)
417 	vm_pager_t pager;
418 	vm_offset_t start;
419 	vm_offset_t size;
420 {
421 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
422 }
423 
424 /*
425  * swap_pager_reclaim frees up over-allocated space from all pagers
426  * this eliminates internal fragmentation due to allocation of space
427  * for segments that are never swapped to. It has been written so that
428  * it does not block until the rlist_free operation occurs; it keeps
429  * the queues consistant.
430  */
431 
432 /*
433  * Maximum number of blocks (pages) to reclaim per pass
434  */
435 #define MAXRECLAIM 256
436 
437 void
438 swap_pager_reclaim()
439 {
440 	vm_pager_t p;
441 	sw_pager_t swp;
442 	int i, j, k;
443 	int s;
444 	int reclaimcount;
445 	static int reclaims[MAXRECLAIM];
446 	static int in_reclaim;
447 
448 	/*
449 	 * allow only one process to be in the swap_pager_reclaim subroutine
450 	 */
451 	s = splbio();
452 	if (in_reclaim) {
453 		tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
454 		splx(s);
455 		return;
456 	}
457 	in_reclaim = 1;
458 	reclaimcount = 0;
459 
460 	/* for each pager queue */
461 	for (k = 0; swp_qs[k]; k++) {
462 
463 		p = swp_qs[k]->tqh_first;
464 		while (p && (reclaimcount < MAXRECLAIM)) {
465 
466 			/*
467 			 * see if any blocks associated with a pager has been
468 			 * allocated but not used (written)
469 			 */
470 			swp = (sw_pager_t) p->pg_data;
471 			for (i = 0; i < swp->sw_nblocks; i++) {
472 				sw_blk_t swb = &swp->sw_blocks[i];
473 
474 				if (swb->swb_locked)
475 					continue;
476 				for (j = 0; j < SWB_NPAGES; j++) {
477 					if (swb->swb_block[j] != SWB_EMPTY &&
478 					    (swb->swb_valid & (1 << j)) == 0) {
479 						reclaims[reclaimcount++] = swb->swb_block[j];
480 						swb->swb_block[j] = SWB_EMPTY;
481 						if (reclaimcount >= MAXRECLAIM)
482 							goto rfinished;
483 					}
484 				}
485 			}
486 			p = p->pg_list.tqe_next;
487 		}
488 	}
489 
490 rfinished:
491 
492 	/*
493 	 * free the blocks that have been added to the reclaim list
494 	 */
495 	for (i = 0; i < reclaimcount; i++) {
496 		swap_pager_freeswapspace(reclaims[i], reclaims[i] + btodb(PAGE_SIZE) - 1);
497 		wakeup((caddr_t) &in_reclaim);
498 	}
499 
500 	splx(s);
501 	in_reclaim = 0;
502 	wakeup((caddr_t) &in_reclaim);
503 }
504 
505 
506 /*
507  * swap_pager_copy copies blocks from one pager to another and
508  * destroys the source pager
509  */
510 
511 void
512 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
513 	vm_pager_t srcpager;
514 	vm_offset_t srcoffset;
515 	vm_pager_t dstpager;
516 	vm_offset_t dstoffset;
517 	vm_offset_t offset;
518 {
519 	sw_pager_t srcswp, dstswp;
520 	vm_offset_t i;
521 	int s;
522 
523 	if (vm_swap_size)
524 		no_swap_space = 0;
525 
526 	if (no_swap_space)
527 		return;
528 
529 	srcswp = (sw_pager_t) srcpager->pg_data;
530 	dstswp = (sw_pager_t) dstpager->pg_data;
531 
532 	/*
533 	 * remove the source pager from the swap_pager internal queue
534 	 */
535 	s = splbio();
536 	if (srcswp->sw_flags & SW_NAMED) {
537 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
538 		srcswp->sw_flags &= ~SW_NAMED;
539 	} else {
540 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
541 	}
542 
543 	while (srcswp->sw_poip) {
544 		tsleep((caddr_t) srcswp, PVM, "spgout", 0);
545 	}
546 	splx(s);
547 
548 	/*
549 	 * clean all of the pages that are currently active and finished
550 	 */
551 	(void) swap_pager_clean();
552 
553 	s = splbio();
554 	/*
555 	 * clear source block before destination object
556 	 * (release allocated space)
557 	 */
558 	for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
559 		int valid;
560 		int *addr = swap_pager_diskaddr(srcswp, i, &valid);
561 
562 		if (addr && *addr != SWB_EMPTY) {
563 			swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1);
564 			*addr = SWB_EMPTY;
565 		}
566 	}
567 	/*
568 	 * transfer source to destination
569 	 */
570 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
571 		int srcvalid, dstvalid;
572 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
573 		    &srcvalid);
574 		int *dstaddrp;
575 
576 		/*
577 		 * see if the source has space allocated
578 		 */
579 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
580 			/*
581 			 * if the source is valid and the dest has no space,
582 			 * then copy the allocation from the srouce to the
583 			 * dest.
584 			 */
585 			if (srcvalid) {
586 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
587 				/*
588 				 * if the dest already has a valid block,
589 				 * deallocate the source block without
590 				 * copying.
591 				 */
592 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
593 					swap_pager_freeswapspace(*dstaddrp, *dstaddrp + btodb(PAGE_SIZE) - 1);
594 					*dstaddrp = SWB_EMPTY;
595 				}
596 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
597 					*dstaddrp = *srcaddrp;
598 					*srcaddrp = SWB_EMPTY;
599 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
600 				}
601 			}
602 			/*
603 			 * if the source is not empty at this point, then
604 			 * deallocate the space.
605 			 */
606 			if (*srcaddrp != SWB_EMPTY) {
607 				swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1);
608 				*srcaddrp = SWB_EMPTY;
609 			}
610 		}
611 	}
612 
613 	/*
614 	 * deallocate the rest of the source object
615 	 */
616 	for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
617 		int valid;
618 		int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
619 
620 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
621 			swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1);
622 			*srcaddrp = SWB_EMPTY;
623 		}
624 	}
625 
626 	splx(s);
627 
628 	free((caddr_t) srcswp->sw_blocks, M_VMPGDATA);
629 	srcswp->sw_blocks = 0;
630 	free((caddr_t) srcswp, M_VMPGDATA);
631 	srcpager->pg_data = 0;
632 	free((caddr_t) srcpager, M_VMPAGER);
633 
634 	return;
635 }
636 
637 
638 void
639 swap_pager_dealloc(pager)
640 	vm_pager_t pager;
641 {
642 	register int i, j;
643 	register sw_blk_t bp;
644 	register sw_pager_t swp;
645 	int s;
646 
647 	/*
648 	 * Remove from list right away so lookups will fail if we block for
649 	 * pageout completion.
650 	 */
651 	s = splbio();
652 	swp = (sw_pager_t) pager->pg_data;
653 	if (swp->sw_flags & SW_NAMED) {
654 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
655 		swp->sw_flags &= ~SW_NAMED;
656 	} else {
657 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
658 	}
659 	/*
660 	 * Wait for all pageouts to finish and remove all entries from
661 	 * cleaning list.
662 	 */
663 
664 	while (swp->sw_poip) {
665 		tsleep((caddr_t) swp, PVM, "swpout", 0);
666 	}
667 	splx(s);
668 
669 
670 	(void) swap_pager_clean();
671 
672 	/*
673 	 * Free left over swap blocks
674 	 */
675 	s = splbio();
676 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
677 		for (j = 0; j < SWB_NPAGES; j++)
678 			if (bp->swb_block[j] != SWB_EMPTY) {
679 				swap_pager_freeswapspace((unsigned) bp->swb_block[j],
680 				    (unsigned) bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
681 				bp->swb_block[j] = SWB_EMPTY;
682 			}
683 	}
684 	splx(s);
685 
686 	/*
687 	 * Free swap management resources
688 	 */
689 	free((caddr_t) swp->sw_blocks, M_VMPGDATA);
690 	swp->sw_blocks = 0;
691 	free((caddr_t) swp, M_VMPGDATA);
692 	pager->pg_data = 0;
693 	free((caddr_t) pager, M_VMPAGER);
694 }
695 
696 /*
697  * swap_pager_getmulti can get multiple pages.
698  */
699 int
700 swap_pager_getmulti(pager, m, count, reqpage, sync)
701 	vm_pager_t pager;
702 	vm_page_t *m;
703 	int count;
704 	int reqpage;
705 	boolean_t sync;
706 {
707 	if (reqpage >= count)
708 		panic("swap_pager_getmulti: reqpage >= count\n");
709 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
710 }
711 
712 /*
713  * swap_pager_getpage gets individual pages
714  */
715 int
716 swap_pager_getpage(pager, m, sync)
717 	vm_pager_t pager;
718 	vm_page_t m;
719 	boolean_t sync;
720 {
721 	vm_page_t marray[1];
722 
723 	marray[0] = m;
724 	return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0);
725 }
726 
727 int
728 swap_pager_putmulti(pager, m, c, sync, rtvals)
729 	vm_pager_t pager;
730 	vm_page_t *m;
731 	int c;
732 	boolean_t sync;
733 	int *rtvals;
734 {
735 	int flags;
736 
737 	if (pager == NULL) {
738 		(void) swap_pager_clean();
739 		return VM_PAGER_OK;
740 	}
741 	flags = B_WRITE;
742 	if (!sync)
743 		flags |= B_ASYNC;
744 
745 	return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals);
746 }
747 
748 /*
749  * swap_pager_putpage writes individual pages
750  */
751 int
752 swap_pager_putpage(pager, m, sync)
753 	vm_pager_t pager;
754 	vm_page_t m;
755 	boolean_t sync;
756 {
757 	int flags;
758 	vm_page_t marray[1];
759 	int rtvals[1];
760 
761 
762 	if (pager == NULL) {
763 		(void) swap_pager_clean();
764 		return VM_PAGER_OK;
765 	}
766 	marray[0] = m;
767 	flags = B_WRITE;
768 	if (!sync)
769 		flags |= B_ASYNC;
770 
771 	swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals);
772 
773 	return rtvals[0];
774 }
775 
776 static inline int
777 const
778 swap_pager_block_index(swp, offset)
779 	sw_pager_t swp;
780 	vm_offset_t offset;
781 {
782 	return (offset / (SWB_NPAGES * PAGE_SIZE));
783 }
784 
785 static inline int
786 const
787 swap_pager_block_offset(swp, offset)
788 	sw_pager_t swp;
789 	vm_offset_t offset;
790 {
791 	return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE);
792 }
793 
794 /*
795  * _swap_pager_haspage returns TRUE if the pager has data that has
796  * been written out.
797  */
798 static boolean_t
799 _swap_pager_haspage(swp, offset)
800 	sw_pager_t swp;
801 	vm_offset_t offset;
802 {
803 	register sw_blk_t swb;
804 	int ix;
805 
806 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
807 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
808 		return (FALSE);
809 	}
810 	swb = &swp->sw_blocks[ix];
811 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
812 	if (swb->swb_block[ix] != SWB_EMPTY) {
813 		if (swb->swb_valid & (1 << ix))
814 			return TRUE;
815 	}
816 	return (FALSE);
817 }
818 
819 /*
820  * swap_pager_haspage is the externally accessible version of
821  * _swap_pager_haspage above.  this routine takes a vm_pager_t
822  * for an argument instead of sw_pager_t.
823  */
824 boolean_t
825 swap_pager_haspage(pager, offset)
826 	vm_pager_t pager;
827 	vm_offset_t offset;
828 {
829 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
830 }
831 
832 /*
833  * swap_pager_freepage is a convienience routine that clears the busy
834  * bit and deallocates a page.
835  */
836 static void
837 swap_pager_freepage(m)
838 	vm_page_t m;
839 {
840 	PAGE_WAKEUP(m);
841 	vm_page_free(m);
842 }
843 
844 /*
845  * swap_pager_ridpages is a convienience routine that deallocates all
846  * but the required page.  this is usually used in error returns that
847  * need to invalidate the "extra" readahead pages.
848  */
849 static void
850 swap_pager_ridpages(m, count, reqpage)
851 	vm_page_t *m;
852 	int count;
853 	int reqpage;
854 {
855 	int i;
856 
857 	for (i = 0; i < count; i++)
858 		if (i != reqpage)
859 			swap_pager_freepage(m[i]);
860 }
861 
862 int swapwritecount = 0;
863 
864 /*
865  * swap_pager_iodone1 is the completion routine for both reads and async writes
866  */
867 void
868 swap_pager_iodone1(bp)
869 	struct buf *bp;
870 {
871 	bp->b_flags |= B_DONE;
872 	bp->b_flags &= ~B_ASYNC;
873 	wakeup((caddr_t) bp);
874 /*
875 	if ((bp->b_flags & B_READ) == 0)
876 		vwakeup(bp);
877 */
878 }
879 
880 
881 int
882 swap_pager_input(swp, m, count, reqpage)
883 	register sw_pager_t swp;
884 	vm_page_t *m;
885 	int count, reqpage;
886 {
887 	register struct buf *bp;
888 	sw_blk_t swb[count];
889 	register int s;
890 	int i;
891 	boolean_t rv;
892 	vm_offset_t kva, off[count];
893 	swp_clean_t spc;
894 	vm_offset_t paging_offset;
895 	vm_object_t object;
896 	int reqaddr[count];
897 
898 	int first, last;
899 	int failed;
900 	int reqdskregion;
901 
902 	object = m[reqpage]->object;
903 	paging_offset = object->paging_offset;
904 	/*
905 	 * First determine if the page exists in the pager if this is a sync
906 	 * read.  This quickly handles cases where we are following shadow
907 	 * chains looking for the top level object with the page.
908 	 */
909 	if (swp->sw_blocks == NULL) {
910 		swap_pager_ridpages(m, count, reqpage);
911 		return (VM_PAGER_FAIL);
912 	}
913 	for (i = 0; i < count; i++) {
914 		vm_offset_t foff = m[i]->offset + paging_offset;
915 		int ix = swap_pager_block_index(swp, foff);
916 
917 		if (ix >= swp->sw_nblocks) {
918 			int j;
919 
920 			if (i <= reqpage) {
921 				swap_pager_ridpages(m, count, reqpage);
922 				return (VM_PAGER_FAIL);
923 			}
924 			for (j = i; j < count; j++) {
925 				swap_pager_freepage(m[j]);
926 			}
927 			count = i;
928 			break;
929 		}
930 		swb[i] = &swp->sw_blocks[ix];
931 		off[i] = swap_pager_block_offset(swp, foff);
932 		reqaddr[i] = swb[i]->swb_block[off[i]];
933 	}
934 
935 	/* make sure that our required input request is existant */
936 
937 	if (reqaddr[reqpage] == SWB_EMPTY ||
938 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
939 		swap_pager_ridpages(m, count, reqpage);
940 		return (VM_PAGER_FAIL);
941 	}
942 	reqdskregion = reqaddr[reqpage] / dmmax;
943 
944 	/*
945 	 * search backwards for the first contiguous page to transfer
946 	 */
947 	failed = 0;
948 	first = 0;
949 	for (i = reqpage - 1; i >= 0; --i) {
950 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
951 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
952 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
953 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
954 			failed = 1;
955 			swap_pager_freepage(m[i]);
956 			if (first == 0)
957 				first = i + 1;
958 		}
959 	}
960 	/*
961 	 * search forwards for the last contiguous page to transfer
962 	 */
963 	failed = 0;
964 	last = count;
965 	for (i = reqpage + 1; i < count; i++) {
966 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
967 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
968 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
969 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
970 			failed = 1;
971 			swap_pager_freepage(m[i]);
972 			if (last == count)
973 				last = i;
974 		}
975 	}
976 
977 	count = last;
978 	if (first != 0) {
979 		for (i = first; i < count; i++) {
980 			m[i - first] = m[i];
981 			reqaddr[i - first] = reqaddr[i];
982 			off[i - first] = off[i];
983 		}
984 		count -= first;
985 		reqpage -= first;
986 	}
987 	++swb[reqpage]->swb_locked;
988 
989 	/*
990 	 * at this point: "m" is a pointer to the array of vm_page_t for
991 	 * paging I/O "count" is the number of vm_page_t entries represented
992 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
993 	 * into "m" for the page actually faulted
994 	 */
995 
996 	spc = NULL;	/* we might not use an spc data structure */
997 
998 	if (count == 1) {
999 		/*
1000 		 * if a kva has not been allocated, we can only do a one page
1001 		 * transfer, so we free the other pages that might have been
1002 		 * allocated by vm_fault.
1003 		 */
1004 		swap_pager_ridpages(m, count, reqpage);
1005 		m[0] = m[reqpage];
1006 		reqaddr[0] = reqaddr[reqpage];
1007 
1008 		count = 1;
1009 		reqpage = 0;
1010 		/*
1011 		 * get a swap pager clean data structure, block until we get
1012 		 * it
1013 		 */
1014 		if (swap_pager_free.tqh_first == NULL) {
1015 			s = splbio();
1016 			if (curproc == pageproc)
1017 				(void) swap_pager_clean();
1018 			else
1019 				wakeup((caddr_t) &vm_pages_needed);
1020 			while (swap_pager_free.tqh_first == NULL) {
1021 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1022 				tsleep((caddr_t) &swap_pager_free,
1023 				    PVM, "swpfre", 0);
1024 				if (curproc == pageproc)
1025 					(void) swap_pager_clean();
1026 				else
1027 					wakeup((caddr_t) &vm_pages_needed);
1028 			}
1029 			splx(s);
1030 		}
1031 		spc = swap_pager_free.tqh_first;
1032 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1033 		kva = spc->spc_kva;
1034 		bp = spc->spc_bp;
1035 		bzero(bp, sizeof *bp);
1036 		bp->b_spc = spc;
1037 		bp->b_vnbufs.le_next = NOLIST;
1038 	} else {
1039 		/*
1040 		 * Get a swap buffer header to perform the IO
1041 		 */
1042 		bp = getpbuf();
1043 		kva = (vm_offset_t) bp->b_data;
1044 	}
1045 
1046 	/*
1047 	 * map our page(s) into kva for input
1048 	 */
1049 	pmap_qenter(kva, m, count);
1050 
1051 	bp->b_flags = B_BUSY | B_READ | B_CALL;
1052 	bp->b_iodone = swap_pager_iodone1;
1053 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1054 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1055 	crhold(bp->b_rcred);
1056 	crhold(bp->b_wcred);
1057 	bp->b_un.b_addr = (caddr_t) kva;
1058 	bp->b_blkno = reqaddr[0];
1059 	bp->b_bcount = PAGE_SIZE * count;
1060 	bp->b_bufsize = PAGE_SIZE * count;
1061 
1062 	pbgetvp(swapdev_vp, bp);
1063 	swp->sw_piip++;
1064 
1065 	cnt.v_swapin++;
1066 	cnt.v_swappgsin += count;
1067 	/*
1068 	 * perform the I/O
1069 	 */
1070 	VOP_STRATEGY(bp);
1071 
1072 	/*
1073 	 * wait for the sync I/O to complete
1074 	 */
1075 	s = splbio();
1076 	while ((bp->b_flags & B_DONE) == 0) {
1077 		tsleep((caddr_t) bp, PVM, "swread", 0);
1078 	}
1079 
1080 	if (bp->b_flags & B_ERROR) {
1081 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
1082 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1083 		rv = VM_PAGER_ERROR;
1084 	} else {
1085 		rv = VM_PAGER_OK;
1086 	}
1087 
1088 	--swp->sw_piip;
1089 	if (swp->sw_piip == 0)
1090 		wakeup((caddr_t) swp);
1091 
1092 
1093 	/*
1094 	 * relpbuf does this, but we maintain our own buffer list also...
1095 	 */
1096 	if (bp->b_vp)
1097 		pbrelvp(bp);
1098 
1099 	splx(s);
1100 	--swb[reqpage]->swb_locked;
1101 
1102 	/*
1103 	 * remove the mapping for kernel virtual
1104 	 */
1105 	pmap_qremove(kva, count);
1106 
1107 	if (spc) {
1108 		if (bp->b_flags & B_WANTED)
1109 			wakeup((caddr_t) bp);
1110 		/*
1111 		 * if we have used an spc, we need to free it.
1112 		 */
1113 		if (bp->b_rcred != NOCRED)
1114 			crfree(bp->b_rcred);
1115 		if (bp->b_wcred != NOCRED)
1116 			crfree(bp->b_wcred);
1117 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1118 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1119 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1120 			wakeup((caddr_t) &swap_pager_free);
1121 		}
1122 	} else {
1123 		/*
1124 		 * release the physical I/O buffer
1125 		 */
1126 		relpbuf(bp);
1127 		/*
1128 		 * finish up input if everything is ok
1129 		 */
1130 		if (rv == VM_PAGER_OK) {
1131 			for (i = 0; i < count; i++) {
1132 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1133 				m[i]->dirty = 0;
1134 				if (i != reqpage) {
1135 					/*
1136 					 * whether or not to leave the page
1137 					 * activated is up in the air, but we
1138 					 * should put the page on a page queue
1139 					 * somewhere. (it already is in the
1140 					 * object). After some emperical
1141 					 * results, it is best to deactivate
1142 					 * the readahead pages.
1143 					 */
1144 					if ((i == reqpage - 1) || (i == reqpage + 1))
1145 						vm_page_activate(m[i]);
1146 					else
1147 						vm_page_deactivate(m[i]);
1148 
1149 					/*
1150 					 * just in case someone was asking for
1151 					 * this page we now tell them that it
1152 					 * is ok to use
1153 					 */
1154 					m[i]->valid = VM_PAGE_BITS_ALL;
1155 					PAGE_WAKEUP(m[i]);
1156 				}
1157 			}
1158 			/*
1159 			 * If we're out of swap space, then attempt to free
1160 			 * some whenever pages are brought in. We must clear
1161 			 * the clean flag so that the page contents will be
1162 			 * preserved.
1163 			 */
1164 			if (swap_pager_full) {
1165 				for (i = 0; i < count; i++) {
1166 					m[i]->dirty = VM_PAGE_BITS_ALL;
1167 				}
1168 				_swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE);
1169 			}
1170 		} else {
1171 			swap_pager_ridpages(m, count, reqpage);
1172 		}
1173 	}
1174 	return (rv);
1175 }
1176 
1177 int
1178 swap_pager_output(swp, m, count, flags, rtvals)
1179 	register sw_pager_t swp;
1180 	vm_page_t *m;
1181 	int count;
1182 	int flags;
1183 	int *rtvals;
1184 {
1185 	register struct buf *bp;
1186 	sw_blk_t swb[count];
1187 	register int s;
1188 	int i, j, ix;
1189 	boolean_t rv;
1190 	vm_offset_t kva, off, foff;
1191 	swp_clean_t spc;
1192 	vm_offset_t paging_offset;
1193 	vm_object_t object;
1194 	int reqaddr[count];
1195 	int failed;
1196 
1197 	if (vm_swap_size)
1198 		no_swap_space = 0;
1199 	if (no_swap_space) {
1200 		for (i = 0; i < count; i++)
1201 			rtvals[i] = VM_PAGER_FAIL;
1202 		return VM_PAGER_FAIL;
1203 	}
1204 	spc = NULL;
1205 
1206 	object = m[0]->object;
1207 	paging_offset = object->paging_offset;
1208 
1209 	failed = 0;
1210 	for (j = 0; j < count; j++) {
1211 		foff = m[j]->offset + paging_offset;
1212 		ix = swap_pager_block_index(swp, foff);
1213 		swb[j] = 0;
1214 		if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
1215 			rtvals[j] = VM_PAGER_FAIL;
1216 			failed = 1;
1217 			continue;
1218 		} else {
1219 			rtvals[j] = VM_PAGER_OK;
1220 		}
1221 		swb[j] = &swp->sw_blocks[ix];
1222 		++swb[j]->swb_locked;
1223 		if (failed) {
1224 			rtvals[j] = VM_PAGER_FAIL;
1225 			continue;
1226 		}
1227 		off = swap_pager_block_offset(swp, foff);
1228 		reqaddr[j] = swb[j]->swb_block[off];
1229 		if (reqaddr[j] == SWB_EMPTY) {
1230 			int blk;
1231 			int tries;
1232 			int ntoget;
1233 
1234 			tries = 0;
1235 			s = splbio();
1236 
1237 			/*
1238 			 * if any other pages have been allocated in this
1239 			 * block, we only try to get one page.
1240 			 */
1241 			for (i = 0; i < SWB_NPAGES; i++) {
1242 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1243 					break;
1244 			}
1245 
1246 
1247 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1248 			/*
1249 			 * this code is alittle conservative, but works (the
1250 			 * intent of this code is to allocate small chunks for
1251 			 * small objects)
1252 			 */
1253 			if ((m[j]->offset == 0) && (ntoget * PAGE_SIZE > object->size)) {
1254 				ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE;
1255 			}
1256 	retrygetspace:
1257 			if (!swap_pager_full && ntoget > 1 &&
1258 			    swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
1259 
1260 				for (i = 0; i < ntoget; i++) {
1261 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1262 					swb[j]->swb_valid = 0;
1263 				}
1264 
1265 				reqaddr[j] = swb[j]->swb_block[off];
1266 			} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
1267 				&swb[j]->swb_block[off])) {
1268 				/*
1269 				 * if the allocation has failed, we try to
1270 				 * reclaim space and retry.
1271 				 */
1272 				if (++tries == 1) {
1273 					swap_pager_reclaim();
1274 					goto retrygetspace;
1275 				}
1276 				rtvals[j] = VM_PAGER_AGAIN;
1277 				failed = 1;
1278 				swap_pager_full = 1;
1279 			} else {
1280 				reqaddr[j] = swb[j]->swb_block[off];
1281 				swb[j]->swb_valid &= ~(1 << off);
1282 			}
1283 			splx(s);
1284 		}
1285 	}
1286 
1287 	/*
1288 	 * search forwards for the last contiguous page to transfer
1289 	 */
1290 	failed = 0;
1291 	for (i = 0; i < count; i++) {
1292 		if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1293 		    (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1294 		    (rtvals[i] != VM_PAGER_OK)) {
1295 			failed = 1;
1296 			if (rtvals[i] == VM_PAGER_OK)
1297 				rtvals[i] = VM_PAGER_AGAIN;
1298 		}
1299 	}
1300 
1301 	for (i = 0; i < count; i++) {
1302 		if (rtvals[i] != VM_PAGER_OK) {
1303 			if (swb[i])
1304 				--swb[i]->swb_locked;
1305 		}
1306 	}
1307 
1308 	for (i = 0; i < count; i++)
1309 		if (rtvals[i] != VM_PAGER_OK)
1310 			break;
1311 
1312 	if (i == 0) {
1313 		return VM_PAGER_AGAIN;
1314 	}
1315 	count = i;
1316 	for (i = 0; i < count; i++) {
1317 		if (reqaddr[i] == SWB_EMPTY)
1318 			printf("I/O to empty block????\n");
1319 	}
1320 
1321 	/*
1322 	 * */
1323 
1324 	/*
1325 	 * For synchronous writes, we clean up all completed async pageouts.
1326 	 */
1327 	if ((flags & B_ASYNC) == 0) {
1328 		swap_pager_clean();
1329 	}
1330 	kva = 0;
1331 
1332 	/*
1333 	 * we allocate a new kva for transfers > 1 page but for transfers == 1
1334 	 * page, the swap_pager_free list contains entries that have
1335 	 * pre-allocated kva's (for efficiency). NOTE -- we do not use the
1336 	 * physical buffer pool or the preallocated associated kva's because
1337 	 * of the potential for deadlock.  This is very subtile -- but
1338 	 * deadlocks or resource contention must be avoided on pageouts -- or
1339 	 * your system will sleep (forever) !!!
1340 	 */
1341 /*
1342 	if ( count > 1) {
1343 		kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
1344 		if( !kva) {
1345 			for (i = 0; i < count; i++) {
1346 				if( swb[i])
1347 					--swb[i]->swb_locked;
1348 				rtvals[i] = VM_PAGER_AGAIN;
1349 			}
1350 			return VM_PAGER_AGAIN;
1351 		}
1352 	}
1353 */
1354 
1355 	/*
1356 	 * get a swap pager clean data structure, block until we get it
1357 	 */
1358 	if (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1359 		s = splbio();
1360 		if (curproc == pageproc) {
1361 			(void) swap_pager_clean();
1362 /*
1363 			splx(s);
1364 			return VM_PAGER_AGAIN;
1365 */
1366 		} else
1367 			wakeup((caddr_t) &vm_pages_needed);
1368 		while (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1369 			if (curproc == pageproc &&
1370 			    (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)
1371 				wakeup((caddr_t) &cnt.v_free_count);
1372 
1373 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1374 			tsleep((caddr_t) &swap_pager_free,
1375 			    PVM, "swpfre", 0);
1376 			if (curproc == pageproc)
1377 				(void) swap_pager_clean();
1378 			else
1379 				wakeup((caddr_t) &vm_pages_needed);
1380 		}
1381 		splx(s);
1382 	}
1383 	spc = swap_pager_free.tqh_first;
1384 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1385 
1386 	kva = spc->spc_kva;
1387 
1388 	/*
1389 	 * map our page(s) into kva for I/O
1390 	 */
1391 	pmap_qenter(kva, m, count);
1392 
1393 	/*
1394 	 * get the base I/O offset into the swap file
1395 	 */
1396 	for (i = 0; i < count; i++) {
1397 		foff = m[i]->offset + paging_offset;
1398 		off = swap_pager_block_offset(swp, foff);
1399 		/*
1400 		 * set the valid bit
1401 		 */
1402 		swb[i]->swb_valid |= (1 << off);
1403 		/*
1404 		 * and unlock the data structure
1405 		 */
1406 		--swb[i]->swb_locked;
1407 	}
1408 
1409 	/*
1410 	 * Get a swap buffer header and perform the IO
1411 	 */
1412 	bp = spc->spc_bp;
1413 	bzero(bp, sizeof *bp);
1414 	bp->b_spc = spc;
1415 	bp->b_vnbufs.le_next = NOLIST;
1416 
1417 	bp->b_flags = B_BUSY;
1418 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1419 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1420 	if (bp->b_rcred != NOCRED)
1421 		crhold(bp->b_rcred);
1422 	if (bp->b_wcred != NOCRED)
1423 		crhold(bp->b_wcred);
1424 	bp->b_data = (caddr_t) kva;
1425 	bp->b_blkno = reqaddr[0];
1426 	pbgetvp(swapdev_vp, bp);
1427 
1428 	bp->b_bcount = PAGE_SIZE * count;
1429 	bp->b_bufsize = PAGE_SIZE * count;
1430 	swapdev_vp->v_numoutput++;
1431 
1432 	/*
1433 	 * If this is an async write we set up additional buffer fields and
1434 	 * place a "cleaning" entry on the inuse queue.
1435 	 */
1436 	s = splbio();
1437 	if (flags & B_ASYNC) {
1438 		spc->spc_flags = 0;
1439 		spc->spc_swp = swp;
1440 		for (i = 0; i < count; i++)
1441 			spc->spc_m[i] = m[i];
1442 		spc->spc_count = count;
1443 		/*
1444 		 * the completion routine for async writes
1445 		 */
1446 		bp->b_flags |= B_CALL;
1447 		bp->b_iodone = swap_pager_iodone;
1448 		bp->b_dirtyoff = 0;
1449 		bp->b_dirtyend = bp->b_bcount;
1450 		swp->sw_poip++;
1451 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1452 	} else {
1453 		swp->sw_poip++;
1454 		bp->b_flags |= B_CALL;
1455 		bp->b_iodone = swap_pager_iodone1;
1456 	}
1457 
1458 	cnt.v_swapout++;
1459 	cnt.v_swappgsout += count;
1460 	/*
1461 	 * perform the I/O
1462 	 */
1463 	VOP_STRATEGY(bp);
1464 	if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) {
1465 		if ((bp->b_flags & B_DONE) == B_DONE) {
1466 			swap_pager_clean();
1467 		}
1468 		splx(s);
1469 		for (i = 0; i < count; i++) {
1470 			rtvals[i] = VM_PAGER_PEND;
1471 		}
1472 		return VM_PAGER_PEND;
1473 	}
1474 	/*
1475 	 * wait for the sync I/O to complete
1476 	 */
1477 	while ((bp->b_flags & B_DONE) == 0) {
1478 		tsleep((caddr_t) bp, PVM, "swwrt", 0);
1479 	}
1480 	if (bp->b_flags & B_ERROR) {
1481 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1482 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1483 		rv = VM_PAGER_ERROR;
1484 	} else {
1485 		rv = VM_PAGER_OK;
1486 	}
1487 
1488 	--swp->sw_poip;
1489 	if (swp->sw_poip == 0)
1490 		wakeup((caddr_t) swp);
1491 
1492 	if (bp->b_vp)
1493 		pbrelvp(bp);
1494 	if (bp->b_flags & B_WANTED)
1495 		wakeup((caddr_t) bp);
1496 
1497 	splx(s);
1498 
1499 	/*
1500 	 * remove the mapping for kernel virtual
1501 	 */
1502 	pmap_qremove(kva, count);
1503 
1504 	/*
1505 	 * if we have written the page, then indicate that the page is clean.
1506 	 */
1507 	if (rv == VM_PAGER_OK) {
1508 		for (i = 0; i < count; i++) {
1509 			if (rtvals[i] == VM_PAGER_OK) {
1510 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1511 				m[i]->dirty = 0;
1512 				/*
1513 				 * optimization, if a page has been read
1514 				 * during the pageout process, we activate it.
1515 				 */
1516 				if ((m[i]->flags & PG_ACTIVE) == 0 &&
1517 				    ((m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))))
1518 					vm_page_activate(m[i]);
1519 			}
1520 		}
1521 	} else {
1522 		for (i = 0; i < count; i++) {
1523 			rtvals[i] = rv;
1524 		}
1525 	}
1526 
1527 	if (bp->b_rcred != NOCRED)
1528 		crfree(bp->b_rcred);
1529 	if (bp->b_wcred != NOCRED)
1530 		crfree(bp->b_wcred);
1531 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1532 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1533 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1534 		wakeup((caddr_t) &swap_pager_free);
1535 	}
1536 	return (rv);
1537 }
1538 
1539 boolean_t
1540 swap_pager_clean()
1541 {
1542 	register swp_clean_t spc, tspc;
1543 	register int s;
1544 
1545 	tspc = NULL;
1546 	if (swap_pager_done.tqh_first == NULL)
1547 		return FALSE;
1548 	for (;;) {
1549 		s = splbio();
1550 		/*
1551 		 * Look up and removal from done list must be done at splbio()
1552 		 * to avoid conflicts with swap_pager_iodone.
1553 		 */
1554 		while ((spc = swap_pager_done.tqh_first) != 0) {
1555 			pmap_qremove(spc->spc_kva, spc->spc_count);
1556 			swap_pager_finish(spc);
1557 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1558 			goto doclean;
1559 		}
1560 
1561 		/*
1562 		 * No operations done, thats all we can do for now.
1563 		 */
1564 
1565 		splx(s);
1566 		break;
1567 
1568 		/*
1569 		 * The desired page was found to be busy earlier in the scan
1570 		 * but has since completed.
1571 		 */
1572 doclean:
1573 		if (tspc && tspc == spc) {
1574 			tspc = NULL;
1575 		}
1576 		spc->spc_flags = 0;
1577 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1578 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1579 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1580 			wakeup((caddr_t) &swap_pager_free);
1581 		}
1582 		++cleandone;
1583 		splx(s);
1584 	}
1585 
1586 	return (tspc ? TRUE : FALSE);
1587 }
1588 
1589 void
1590 swap_pager_finish(spc)
1591 	register swp_clean_t spc;
1592 {
1593 	vm_object_t object = spc->spc_m[0]->object;
1594 	int i;
1595 
1596 	if ((object->paging_in_progress -= spc->spc_count) == 0)
1597 		thread_wakeup((int) object);
1598 
1599 	/*
1600 	 * If no error mark as clean and inform the pmap system. If error,
1601 	 * mark as dirty so we will try again. (XXX could get stuck doing
1602 	 * this, should give up after awhile)
1603 	 */
1604 	if (spc->spc_flags & SPC_ERROR) {
1605 		for (i = 0; i < spc->spc_count; i++) {
1606 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1607 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1608 		}
1609 	} else {
1610 		for (i = 0; i < spc->spc_count; i++) {
1611 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1612 			spc->spc_m[i]->dirty = 0;
1613 			if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
1614 			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1615 				vm_page_activate(spc->spc_m[i]);
1616 		}
1617 	}
1618 
1619 
1620 	for (i = 0; i < spc->spc_count; i++) {
1621 		/*
1622 		 * we wakeup any processes that are waiting on these pages.
1623 		 */
1624 		PAGE_WAKEUP(spc->spc_m[i]);
1625 	}
1626 	nswiodone -= spc->spc_count;
1627 
1628 	return;
1629 }
1630 
1631 /*
1632  * swap_pager_iodone
1633  */
1634 void
1635 swap_pager_iodone(bp)
1636 	register struct buf *bp;
1637 {
1638 	register swp_clean_t spc;
1639 	int s;
1640 
1641 	s = splbio();
1642 	spc = (swp_clean_t) bp->b_spc;
1643 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1644 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1645 	if (bp->b_flags & B_ERROR) {
1646 		spc->spc_flags |= SPC_ERROR;
1647 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d",
1648 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1649 		    bp->b_error, (u_long) bp->b_blkno, bp->b_bcount);
1650 	}
1651 /*
1652 	if ((bp->b_flags & B_READ) == 0)
1653 		vwakeup(bp);
1654 */
1655 
1656 	if (bp->b_vp)
1657 		pbrelvp(bp);
1658 
1659 	if (bp->b_flags & B_WANTED)
1660 		wakeup((caddr_t) bp);
1661 
1662 	if (bp->b_rcred != NOCRED)
1663 		crfree(bp->b_rcred);
1664 	if (bp->b_wcred != NOCRED)
1665 		crfree(bp->b_wcred);
1666 
1667 	nswiodone += spc->spc_count;
1668 	if (--spc->spc_swp->sw_poip == 0) {
1669 		wakeup((caddr_t) spc->spc_swp);
1670 	}
1671 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1672 	    swap_pager_inuse.tqh_first == 0) {
1673 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1674 		wakeup((caddr_t) &swap_pager_free);
1675 		wakeup((caddr_t) &vm_pages_needed);
1676 	}
1677 	if (vm_pageout_pages_needed) {
1678 		wakeup((caddr_t) &vm_pageout_pages_needed);
1679 	}
1680 	if ((swap_pager_inuse.tqh_first == NULL) ||
1681 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1682 		nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1683 		wakeup((caddr_t) &vm_pages_needed);
1684 	}
1685 	splx(s);
1686 }
1687 
1688 /*
1689  * return true if any swap control structures can be allocated
1690  */
1691 int
1692 swap_pager_ready()
1693 {
1694 	if (swap_pager_free.tqh_first)
1695 		return 1;
1696 	else
1697 		return 0;
1698 }
1699