xref: /freebsd/sys/vm/swap_pager.c (revision 05c7a37afb48ddd5ee1bd921a5d46fe59cc70b15)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.62 1996/03/03 21:11:05 dyson Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/proc.h>
56 #include <sys/buf.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 #include <sys/vmmeter.h>
60 
61 #include <miscfs/specfs/specdev.h>
62 #include <sys/rlist.h>
63 
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_prot.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_pager.h>
70 #include <vm/vm_pageout.h>
71 #include <vm/swap_pager.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 
75 #ifndef NPENDINGIO
76 #define NPENDINGIO	10
77 #endif
78 
79 static int nswiodone;
80 int swap_pager_full;
81 extern int vm_swap_size;
82 static int no_swap_space = 1;
83 struct rlisthdr swaplist;
84 
85 #define MAX_PAGEOUT_CLUSTER 16
86 
87 TAILQ_HEAD(swpclean, swpagerclean);
88 
89 typedef struct swpagerclean *swp_clean_t;
90 
91 static struct swpagerclean {
92 	TAILQ_ENTRY(swpagerclean) spc_list;
93 	int spc_flags;
94 	struct buf *spc_bp;
95 	vm_object_t spc_object;
96 	vm_offset_t spc_kva;
97 	int spc_count;
98 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
99 } swcleanlist[NPENDINGIO];
100 
101 
102 /* spc_flags values */
103 #define SPC_ERROR	0x01
104 
105 #define SWB_EMPTY (-1)
106 
107 /* list of completed page cleans */
108 static struct swpclean swap_pager_done;
109 
110 /* list of pending page cleans */
111 static struct swpclean swap_pager_inuse;
112 
113 /* list of free pager clean structs */
114 static struct swpclean swap_pager_free;
115 
116 /* list of "named" anon region objects */
117 static struct pagerlst swap_pager_object_list;
118 
119 /* list of "unnamed" anon region objects */
120 struct pagerlst swap_pager_un_object_list;
121 
122 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
123 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
124 static int swap_pager_needflags;
125 
126 static struct pagerlst *swp_qs[] = {
127 	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
128 };
129 
130 /*
131  * pagerops for OBJT_SWAP - "swap pager".
132  */
133 static vm_object_t
134 		swap_pager_alloc __P((void *handle, vm_size_t size,
135 				      vm_prot_t prot, vm_ooffset_t offset));
136 static void	swap_pager_dealloc __P((vm_object_t object));
137 static boolean_t
138 		swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
139 					int *before, int *after));
140 static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
141 static void	swap_pager_init __P((void));
142 static void	swap_pager_sync __P((void));
143 
144 struct pagerops swappagerops = {
145 	swap_pager_init,
146 	swap_pager_alloc,
147 	swap_pager_dealloc,
148 	swap_pager_getpages,
149 	swap_pager_putpages,
150 	swap_pager_haspage,
151 	swap_pager_sync
152 };
153 
154 static int npendingio = NPENDINGIO;
155 static int dmmin;
156 int dmmax;
157 
158 static __pure int
159 		swap_pager_block_index __P((vm_pindex_t pindex)) __pure2;
160 static __pure int
161 		swap_pager_block_offset __P((vm_pindex_t pindex)) __pure2;
162 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
163 					  vm_pindex_t pindex, int *valid));
164 static void	swap_pager_finish __P((swp_clean_t spc));
165 static void	swap_pager_freepage __P((vm_page_t m));
166 static void	swap_pager_free_swap __P((vm_object_t object));
167 static void	swap_pager_freeswapspace __P((vm_object_t object,
168 					      unsigned int from,
169 					      unsigned int to));
170 static int	swap_pager_getswapspace __P((vm_object_t object,
171 					     unsigned int amount,
172 					     daddr_t *rtval));
173 static void	swap_pager_iodone __P((struct buf *));
174 static void	swap_pager_iodone1 __P((struct buf *bp));
175 static void	swap_pager_reclaim __P((void));
176 static void	swap_pager_ridpages __P((vm_page_t *m, int count,
177 					 int reqpage));
178 static void	swap_pager_setvalid __P((vm_object_t object,
179 					 vm_offset_t offset, int valid));
180 static void	swapsizecheck __P((void));
181 
182 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
183 
184 static inline void
185 swapsizecheck()
186 {
187 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
188 		if (swap_pager_full == 0)
189 			printf("swap_pager: out of swap space\n");
190 		swap_pager_full = 1;
191 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
192 		swap_pager_full = 0;
193 }
194 
195 static void
196 swap_pager_init()
197 {
198 	TAILQ_INIT(&swap_pager_object_list);
199 	TAILQ_INIT(&swap_pager_un_object_list);
200 
201 	/*
202 	 * Initialize clean lists
203 	 */
204 	TAILQ_INIT(&swap_pager_inuse);
205 	TAILQ_INIT(&swap_pager_done);
206 	TAILQ_INIT(&swap_pager_free);
207 
208 	/*
209 	 * Calculate the swap allocation constants.
210 	 */
211 	dmmin = CLBYTES / DEV_BSIZE;
212 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
213 }
214 
215 void
216 swap_pager_swap_init()
217 {
218 	swp_clean_t spc;
219 	struct buf *bp;
220 	int i;
221 
222 	/*
223 	 * kva's are allocated here so that we dont need to keep doing
224 	 * kmem_alloc pageables at runtime
225 	 */
226 	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
227 		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
228 		if (!spc->spc_kva) {
229 			break;
230 		}
231 		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
232 		if (!spc->spc_bp) {
233 			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
234 			break;
235 		}
236 		spc->spc_flags = 0;
237 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
238 	}
239 }
240 
241 int
242 swap_pager_swp_alloc(object, wait)
243 	vm_object_t object;
244 	int wait;
245 {
246 	sw_blk_t swb;
247 	int nblocks;
248 	int i, j;
249 
250 	nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
251 	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
252 	if (swb == NULL)
253 		return 1;
254 
255 	for (i = 0; i < nblocks; i++) {
256 		swb[i].swb_valid = 0;
257 		swb[i].swb_locked = 0;
258 		for (j = 0; j < SWB_NPAGES; j++)
259 			swb[i].swb_block[j] = SWB_EMPTY;
260 	}
261 
262 	object->un_pager.swp.swp_nblocks = nblocks;
263 	object->un_pager.swp.swp_allocsize = 0;
264 	object->un_pager.swp.swp_blocks = swb;
265 	object->un_pager.swp.swp_poip = 0;
266 
267 	if (object->handle != NULL) {
268 		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
269 	} else {
270 		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
271 	}
272 
273 	return 0;
274 }
275 
276 /*
277  * Allocate an object and associated resources.
278  * Note that if we are called from the pageout daemon (handle == NULL)
279  * we should not wait for memory as it could resulting in deadlock.
280  */
281 static vm_object_t
282 swap_pager_alloc(handle, size, prot, offset)
283 	void *handle;
284 	register vm_size_t size;
285 	vm_prot_t prot;
286 	vm_ooffset_t offset;
287 {
288 	vm_object_t object;
289 
290 	/*
291 	 * If this is a "named" anonymous region, look it up and use the
292 	 * object if it exists, otherwise allocate a new one.
293 	 */
294 	if (handle) {
295 		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
296 		if (object != NULL) {
297 			vm_object_reference(object);
298 		} else {
299 			/*
300 			 * XXX - there is a race condition here. Two processes
301 			 * can request the same named object simultaneuously,
302 			 * and if one blocks for memory, the result is a disaster.
303 			 * Probably quite rare, but is yet another reason to just
304 			 * rip support of "named anonymous regions" out altogether.
305 			 */
306 			object = vm_object_allocate(OBJT_SWAP,
307 				OFF_TO_IDX(offset + PAGE_SIZE - 1) + size);
308 			object->handle = handle;
309 			(void) swap_pager_swp_alloc(object, M_WAITOK);
310 		}
311 	} else {
312 		object = vm_object_allocate(OBJT_SWAP,
313 			OFF_TO_IDX(offset + PAGE_SIZE - 1) + size);
314 		(void) swap_pager_swp_alloc(object, M_WAITOK);
315 	}
316 
317 	return (object);
318 }
319 
320 /*
321  * returns disk block associated with pager and offset
322  * additionally, as a side effect returns a flag indicating
323  * if the block has been written
324  */
325 
326 inline static daddr_t *
327 swap_pager_diskaddr(object, pindex, valid)
328 	vm_object_t object;
329 	vm_pindex_t pindex;
330 	int *valid;
331 {
332 	register sw_blk_t swb;
333 	int ix;
334 
335 	if (valid)
336 		*valid = 0;
337 	ix = pindex / SWB_NPAGES;
338 	if ((ix >= object->un_pager.swp.swp_nblocks) ||
339 	    (pindex >= object->size)) {
340 		return (FALSE);
341 	}
342 	swb = &object->un_pager.swp.swp_blocks[ix];
343 	ix = pindex % SWB_NPAGES;
344 	if (valid)
345 		*valid = swb->swb_valid & (1 << ix);
346 	return &swb->swb_block[ix];
347 }
348 
349 /*
350  * Utility routine to set the valid (written) bit for
351  * a block associated with a pager and offset
352  */
353 static void
354 swap_pager_setvalid(object, offset, valid)
355 	vm_object_t object;
356 	vm_offset_t offset;
357 	int valid;
358 {
359 	register sw_blk_t swb;
360 	int ix;
361 
362 	ix = offset / SWB_NPAGES;
363 	if (ix >= object->un_pager.swp.swp_nblocks)
364 		return;
365 
366 	swb = &object->un_pager.swp.swp_blocks[ix];
367 	ix = offset % SWB_NPAGES;
368 	if (valid)
369 		swb->swb_valid |= (1 << ix);
370 	else
371 		swb->swb_valid &= ~(1 << ix);
372 	return;
373 }
374 
375 /*
376  * this routine allocates swap space with a fragmentation
377  * minimization policy.
378  */
379 static int
380 swap_pager_getswapspace(object, amount, rtval)
381 	vm_object_t object;
382 	unsigned int amount;
383 	daddr_t *rtval;
384 {
385 	unsigned location;
386 	vm_swap_size -= amount;
387 	if (!rlist_alloc(&swaplist, amount, &location)) {
388 		vm_swap_size += amount;
389 		return 0;
390 	} else {
391 		swapsizecheck();
392 		object->un_pager.swp.swp_allocsize += amount;
393 		*rtval = location;
394 		return 1;
395 	}
396 }
397 
398 /*
399  * this routine frees swap space with a fragmentation
400  * minimization policy.
401  */
402 static void
403 swap_pager_freeswapspace(object, from, to)
404 	vm_object_t object;
405 	unsigned int from;
406 	unsigned int to;
407 {
408 	rlist_free(&swaplist, from, to);
409 	vm_swap_size += (to - from) + 1;
410 	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
411 	swapsizecheck();
412 }
413 /*
414  * this routine frees swap blocks from a specified pager
415  */
416 void
417 swap_pager_freespace(object, start, size)
418 	vm_object_t object;
419 	vm_pindex_t start;
420 	vm_size_t size;
421 {
422 	vm_pindex_t i;
423 	int s;
424 
425 	s = splbio();
426 	for (i = start; i < start + size; i += 1) {
427 		int valid;
428 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
429 
430 		if (addr && *addr != SWB_EMPTY) {
431 			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
432 			if (valid) {
433 				swap_pager_setvalid(object, i, 0);
434 			}
435 			*addr = SWB_EMPTY;
436 		}
437 	}
438 	splx(s);
439 }
440 
441 static void
442 swap_pager_free_swap(object)
443 	vm_object_t object;
444 {
445 	register int i, j;
446 	register sw_blk_t swb;
447 	int first_block=0, block_count=0;
448 	int s;
449 	/*
450 	 * Free left over swap blocks
451 	 */
452 	s = splbio();
453 	for (i = 0, swb = object->un_pager.swp.swp_blocks;
454 	    i < object->un_pager.swp.swp_nblocks; i++, swb++) {
455 		for (j = 0; j < SWB_NPAGES; j++) {
456 			if (swb->swb_block[j] != SWB_EMPTY) {
457 				/*
458 				 * initially the length of the run is zero
459 				 */
460 				if (block_count == 0) {
461 					first_block = swb->swb_block[j];
462 					block_count = btodb(PAGE_SIZE);
463 					swb->swb_block[j] = SWB_EMPTY;
464 				/*
465 				 * if the new block can be included into the current run
466 				 */
467 				} else if (swb->swb_block[j] == first_block + block_count) {
468 					block_count += btodb(PAGE_SIZE);
469 					swb->swb_block[j] = SWB_EMPTY;
470 				/*
471 				 * terminate the previous run, and start a new one
472 				 */
473 				} else {
474 					swap_pager_freeswapspace(object, first_block,
475 				   	 (unsigned) first_block + block_count - 1);
476 					first_block = swb->swb_block[j];
477 					block_count = btodb(PAGE_SIZE);
478 					swb->swb_block[j] = SWB_EMPTY;
479 				}
480 			}
481 		}
482 	}
483 
484 	if (block_count) {
485 		swap_pager_freeswapspace(object, first_block,
486 		   	 (unsigned) first_block + block_count - 1);
487 	}
488 	splx(s);
489 }
490 
491 
492 /*
493  * swap_pager_reclaim frees up over-allocated space from all pagers
494  * this eliminates internal fragmentation due to allocation of space
495  * for segments that are never swapped to. It has been written so that
496  * it does not block until the rlist_free operation occurs; it keeps
497  * the queues consistant.
498  */
499 
500 /*
501  * Maximum number of blocks (pages) to reclaim per pass
502  */
503 #define MAXRECLAIM 128
504 
505 static void
506 swap_pager_reclaim()
507 {
508 	vm_object_t object;
509 	int i, j, k;
510 	int s;
511 	int reclaimcount;
512 	static struct {
513 		int address;
514 		vm_object_t object;
515 	} reclaims[MAXRECLAIM];
516 	static int in_reclaim;
517 
518 	/*
519 	 * allow only one process to be in the swap_pager_reclaim subroutine
520 	 */
521 	s = splbio();
522 	if (in_reclaim) {
523 		tsleep(&in_reclaim, PSWP, "swrclm", 0);
524 		splx(s);
525 		return;
526 	}
527 	in_reclaim = 1;
528 	reclaimcount = 0;
529 
530 	/* for each pager queue */
531 	for (k = 0; swp_qs[k]; k++) {
532 
533 		object = swp_qs[k]->tqh_first;
534 		while (object && (reclaimcount < MAXRECLAIM)) {
535 
536 			/*
537 			 * see if any blocks associated with a pager has been
538 			 * allocated but not used (written)
539 			 */
540 			if (object->paging_in_progress == 0) {
541 				for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
542 					sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
543 
544 					if (swb->swb_locked)
545 						continue;
546 					for (j = 0; j < SWB_NPAGES; j++) {
547 						if (swb->swb_block[j] != SWB_EMPTY &&
548 						    (swb->swb_valid & (1 << j)) == 0) {
549 							reclaims[reclaimcount].address = swb->swb_block[j];
550 							reclaims[reclaimcount++].object = object;
551 							swb->swb_block[j] = SWB_EMPTY;
552 							if (reclaimcount >= MAXRECLAIM)
553 								goto rfinished;
554 						}
555 					}
556 				}
557 			}
558 			object = object->pager_object_list.tqe_next;
559 		}
560 	}
561 
562 rfinished:
563 
564 	/*
565 	 * free the blocks that have been added to the reclaim list
566 	 */
567 	for (i = 0; i < reclaimcount; i++) {
568 		swap_pager_freeswapspace(reclaims[i].object,
569 		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
570 	}
571 	splx(s);
572 	in_reclaim = 0;
573 	wakeup(&in_reclaim);
574 }
575 
576 
577 /*
578  * swap_pager_copy copies blocks from one pager to another and
579  * destroys the source pager
580  */
581 
582 void
583 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
584 	vm_object_t srcobject;
585 	vm_pindex_t srcoffset;
586 	vm_object_t dstobject;
587 	vm_pindex_t dstoffset;
588 	vm_pindex_t offset;
589 {
590 	vm_pindex_t i;
591 	int origsize;
592 	int s;
593 
594 	if (vm_swap_size)
595 		no_swap_space = 0;
596 
597 	origsize = srcobject->un_pager.swp.swp_allocsize;
598 
599 	/*
600 	 * remove the source object from the swap_pager internal queue
601 	 */
602 	if (srcobject->handle == NULL) {
603 		TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
604 	} else {
605 		TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
606 	}
607 
608 	s = splbio();
609 	while (srcobject->un_pager.swp.swp_poip) {
610 		tsleep(srcobject, PVM, "spgout", 0);
611 	}
612 	splx(s);
613 
614 	/*
615 	 * clean all of the pages that are currently active and finished
616 	 */
617 	swap_pager_sync();
618 
619 	s = splbio();
620 	/*
621 	 * transfer source to destination
622 	 */
623 	for (i = 0; i < dstobject->size; i += 1) {
624 		int srcvalid, dstvalid;
625 		daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
626 						    &srcvalid);
627 		daddr_t *dstaddrp;
628 
629 		/*
630 		 * see if the source has space allocated
631 		 */
632 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
633 			/*
634 			 * if the source is valid and the dest has no space,
635 			 * then copy the allocation from the srouce to the
636 			 * dest.
637 			 */
638 			if (srcvalid) {
639 				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
640 							&dstvalid);
641 				/*
642 				 * if the dest already has a valid block,
643 				 * deallocate the source block without
644 				 * copying.
645 				 */
646 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
647 					swap_pager_freeswapspace(dstobject, *dstaddrp,
648 						*dstaddrp + btodb(PAGE_SIZE) - 1);
649 					*dstaddrp = SWB_EMPTY;
650 				}
651 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
652 					*dstaddrp = *srcaddrp;
653 					*srcaddrp = SWB_EMPTY;
654 					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
655 					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
656 					swap_pager_setvalid(dstobject, i + dstoffset, 1);
657 				}
658 			}
659 			/*
660 			 * if the source is not empty at this point, then
661 			 * deallocate the space.
662 			 */
663 			if (*srcaddrp != SWB_EMPTY) {
664 				swap_pager_freeswapspace(srcobject, *srcaddrp,
665 					*srcaddrp + btodb(PAGE_SIZE) - 1);
666 				*srcaddrp = SWB_EMPTY;
667 			}
668 		}
669 	}
670 	splx(s);
671 
672 	/*
673 	 * Free left over swap blocks
674 	 */
675 	swap_pager_free_swap(srcobject);
676 
677 	if (srcobject->un_pager.swp.swp_allocsize) {
678 		printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
679 		    srcobject->un_pager.swp.swp_allocsize, origsize);
680 	}
681 
682 	free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
683 	srcobject->un_pager.swp.swp_blocks = NULL;
684 
685 	return;
686 }
687 
688 static void
689 swap_pager_dealloc(object)
690 	vm_object_t object;
691 {
692 	int s;
693 
694 	/*
695 	 * Remove from list right away so lookups will fail if we block for
696 	 * pageout completion.
697 	 */
698 	if (object->handle == NULL) {
699 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
700 	} else {
701 		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
702 	}
703 
704 	/*
705 	 * Wait for all pageouts to finish and remove all entries from
706 	 * cleaning list.
707 	 */
708 
709 	s = splbio();
710 	while (object->un_pager.swp.swp_poip) {
711 		tsleep(object, PVM, "swpout", 0);
712 	}
713 	splx(s);
714 
715 
716 	swap_pager_sync();
717 
718 	/*
719 	 * Free left over swap blocks
720 	 */
721 	swap_pager_free_swap(object);
722 
723 	if (object->un_pager.swp.swp_allocsize) {
724 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
725 		    object->un_pager.swp.swp_allocsize);
726 	}
727 	/*
728 	 * Free swap management resources
729 	 */
730 	free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
731 	object->un_pager.swp.swp_blocks = NULL;
732 }
733 
734 static inline __pure int
735 swap_pager_block_index(pindex)
736 	vm_pindex_t pindex;
737 {
738 	return (pindex / SWB_NPAGES);
739 }
740 
741 static inline __pure int
742 swap_pager_block_offset(pindex)
743 	vm_pindex_t pindex;
744 {
745 	return (pindex % SWB_NPAGES);
746 }
747 
748 /*
749  * swap_pager_haspage returns TRUE if the pager has data that has
750  * been written out.
751  */
752 static boolean_t
753 swap_pager_haspage(object, pindex, before, after)
754 	vm_object_t object;
755 	vm_pindex_t pindex;
756 	int *before;
757 	int *after;
758 {
759 	register sw_blk_t swb;
760 	int ix;
761 
762 	if (before != NULL)
763 		*before = 0;
764 	if (after != NULL)
765 		*after = 0;
766 	ix = pindex / SWB_NPAGES;
767 	if (ix >= object->un_pager.swp.swp_nblocks) {
768 		return (FALSE);
769 	}
770 	swb = &object->un_pager.swp.swp_blocks[ix];
771 	ix = pindex % SWB_NPAGES;
772 
773 	if (swb->swb_block[ix] != SWB_EMPTY) {
774 
775 		if (swb->swb_valid & (1 << ix)) {
776 			int tix;
777 			if (before) {
778 				for(tix = ix - 1; tix >= 0; --tix) {
779 					if ((swb->swb_valid & (1 << tix)) == 0)
780 						break;
781 					if ((swb->swb_block[tix] +
782 						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
783 						swb->swb_block[ix])
784 						break;
785 					(*before)++;
786 				}
787 			}
788 
789 			if (after) {
790 				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
791 					if ((swb->swb_valid & (1 << tix)) == 0)
792 						break;
793 					if ((swb->swb_block[tix] -
794 						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
795 						swb->swb_block[ix])
796 						break;
797 					(*after)++;
798 				}
799 			}
800 
801 			return TRUE;
802 		}
803 	}
804 	return (FALSE);
805 }
806 
807 /*
808  * swap_pager_freepage is a convienience routine that clears the busy
809  * bit and deallocates a page.
810  */
811 static void
812 swap_pager_freepage(m)
813 	vm_page_t m;
814 {
815 	PAGE_WAKEUP(m);
816 	vm_page_free(m);
817 }
818 
819 /*
820  * swap_pager_ridpages is a convienience routine that deallocates all
821  * but the required page.  this is usually used in error returns that
822  * need to invalidate the "extra" readahead pages.
823  */
824 static void
825 swap_pager_ridpages(m, count, reqpage)
826 	vm_page_t *m;
827 	int count;
828 	int reqpage;
829 {
830 	int i;
831 
832 	for (i = 0; i < count; i++)
833 		if (i != reqpage)
834 			swap_pager_freepage(m[i]);
835 }
836 
837 /*
838  * swap_pager_iodone1 is the completion routine for both reads and async writes
839  */
840 static void
841 swap_pager_iodone1(bp)
842 	struct buf *bp;
843 {
844 	bp->b_flags |= B_DONE;
845 	bp->b_flags &= ~B_ASYNC;
846 	wakeup(bp);
847 }
848 
849 static int
850 swap_pager_getpages(object, m, count, reqpage)
851 	vm_object_t object;
852 	vm_page_t *m;
853 	int count, reqpage;
854 {
855 	register struct buf *bp;
856 	sw_blk_t swb[count];
857 	register int s;
858 	int i;
859 	boolean_t rv;
860 	vm_offset_t kva, off[count];
861 	swp_clean_t spc;
862 	vm_pindex_t paging_offset;
863 	int reqaddr[count];
864 	int sequential;
865 
866 	int first, last;
867 	int failed;
868 	int reqdskregion;
869 
870 	object = m[reqpage]->object;
871 	paging_offset = OFF_TO_IDX(object->paging_offset);
872 	sequential = (m[reqpage]->pindex == (object->last_read + 1));
873 
874 	for (i = 0; i < count; i++) {
875 		vm_pindex_t fidx = m[i]->pindex + paging_offset;
876 		int ix = swap_pager_block_index(fidx);
877 
878 		if (ix >= object->un_pager.swp.swp_nblocks) {
879 			int j;
880 
881 			if (i <= reqpage) {
882 				swap_pager_ridpages(m, count, reqpage);
883 				return (VM_PAGER_FAIL);
884 			}
885 			for (j = i; j < count; j++) {
886 				swap_pager_freepage(m[j]);
887 			}
888 			count = i;
889 			break;
890 		}
891 		swb[i] = &object->un_pager.swp.swp_blocks[ix];
892 		off[i] = swap_pager_block_offset(fidx);
893 		reqaddr[i] = swb[i]->swb_block[off[i]];
894 	}
895 
896 	/* make sure that our required input request is existant */
897 
898 	if (reqaddr[reqpage] == SWB_EMPTY ||
899 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
900 		swap_pager_ridpages(m, count, reqpage);
901 		return (VM_PAGER_FAIL);
902 	}
903 	reqdskregion = reqaddr[reqpage] / dmmax;
904 
905 	/*
906 	 * search backwards for the first contiguous page to transfer
907 	 */
908 	failed = 0;
909 	first = 0;
910 	for (i = reqpage - 1; i >= 0; --i) {
911 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
912 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
913 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
914 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
915 			failed = 1;
916 			swap_pager_freepage(m[i]);
917 			if (first == 0)
918 				first = i + 1;
919 		}
920 	}
921 	/*
922 	 * search forwards for the last contiguous page to transfer
923 	 */
924 	failed = 0;
925 	last = count;
926 	for (i = reqpage + 1; i < count; i++) {
927 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
928 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
929 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
930 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
931 			failed = 1;
932 			swap_pager_freepage(m[i]);
933 			if (last == count)
934 				last = i;
935 		}
936 	}
937 
938 	count = last;
939 	if (first != 0) {
940 		for (i = first; i < count; i++) {
941 			m[i - first] = m[i];
942 			reqaddr[i - first] = reqaddr[i];
943 			off[i - first] = off[i];
944 		}
945 		count -= first;
946 		reqpage -= first;
947 	}
948 	++swb[reqpage]->swb_locked;
949 
950 	/*
951 	 * at this point: "m" is a pointer to the array of vm_page_t for
952 	 * paging I/O "count" is the number of vm_page_t entries represented
953 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
954 	 * into "m" for the page actually faulted
955 	 */
956 
957 	spc = NULL;	/* we might not use an spc data structure */
958 
959 	if ((count == 1) && (swap_pager_free.tqh_first != NULL)) {
960 		spc = swap_pager_free.tqh_first;
961 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
962 		kva = spc->spc_kva;
963 		bp = spc->spc_bp;
964 		bzero(bp, sizeof *bp);
965 		bp->b_spc = spc;
966 		bp->b_vnbufs.le_next = NOLIST;
967 	} else {
968 		/*
969 		 * Get a swap buffer header to perform the IO
970 		 */
971 		bp = getpbuf();
972 		kva = (vm_offset_t) bp->b_data;
973 	}
974 
975 	/*
976 	 * map our page(s) into kva for input
977 	 */
978 	pmap_qenter(kva, m, count);
979 
980 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
981 	bp->b_iodone = swap_pager_iodone1;
982 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
983 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
984 	crhold(bp->b_rcred);
985 	crhold(bp->b_wcred);
986 	bp->b_un.b_addr = (caddr_t) kva;
987 	bp->b_blkno = reqaddr[0];
988 	bp->b_bcount = PAGE_SIZE * count;
989 	bp->b_bufsize = PAGE_SIZE * count;
990 
991 	pbgetvp(swapdev_vp, bp);
992 
993 	cnt.v_swapin++;
994 	cnt.v_swappgsin += count;
995 	/*
996 	 * perform the I/O
997 	 */
998 	VOP_STRATEGY(bp);
999 
1000 	/*
1001 	 * wait for the sync I/O to complete
1002 	 */
1003 	s = splbio();
1004 	while ((bp->b_flags & B_DONE) == 0) {
1005 		tsleep(bp, PVM, "swread", 0);
1006 	}
1007 
1008 	if (bp->b_flags & B_ERROR) {
1009 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
1010 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1011 		rv = VM_PAGER_ERROR;
1012 	} else {
1013 		rv = VM_PAGER_OK;
1014 	}
1015 
1016 	/*
1017 	 * relpbuf does this, but we maintain our own buffer list also...
1018 	 */
1019 	if (bp->b_vp)
1020 		pbrelvp(bp);
1021 
1022 	splx(s);
1023 	swb[reqpage]->swb_locked--;
1024 
1025 	/*
1026 	 * remove the mapping for kernel virtual
1027 	 */
1028 	pmap_qremove(kva, count);
1029 
1030 	if (spc) {
1031 		m[reqpage]->object->last_read = m[reqpage]->pindex;
1032 		if (bp->b_flags & B_WANTED)
1033 			wakeup(bp);
1034 		/*
1035 		 * if we have used an spc, we need to free it.
1036 		 */
1037 		if (bp->b_rcred != NOCRED)
1038 			crfree(bp->b_rcred);
1039 		if (bp->b_wcred != NOCRED)
1040 			crfree(bp->b_wcred);
1041 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1042 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1043 			wakeup(&swap_pager_free);
1044 		}
1045 		if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1046 			pagedaemon_wakeup();
1047 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1048 		if (rv == VM_PAGER_OK) {
1049 			pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
1050 			m[reqpage]->valid = VM_PAGE_BITS_ALL;
1051 			m[reqpage]->dirty = 0;
1052 		}
1053 	} else {
1054 		/*
1055 		 * release the physical I/O buffer
1056 		 */
1057 		relpbuf(bp);
1058 		/*
1059 		 * finish up input if everything is ok
1060 		 */
1061 		if (rv == VM_PAGER_OK) {
1062 			for (i = 0; i < count; i++) {
1063 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1064 				m[i]->dirty = 0;
1065 				m[i]->flags &= ~PG_ZERO;
1066 				if (i != reqpage) {
1067 					/*
1068 					 * whether or not to leave the page
1069 					 * activated is up in the air, but we
1070 					 * should put the page on a page queue
1071 					 * somewhere. (it already is in the
1072 					 * object). After some emperical
1073 					 * results, it is best to deactivate
1074 					 * the readahead pages.
1075 					 */
1076 					vm_page_deactivate(m[i]);
1077 
1078 					/*
1079 					 * just in case someone was asking for
1080 					 * this page we now tell them that it
1081 					 * is ok to use
1082 					 */
1083 					m[i]->valid = VM_PAGE_BITS_ALL;
1084 					PAGE_WAKEUP(m[i]);
1085 				}
1086 			}
1087 
1088 			m[reqpage]->object->last_read = m[count-1]->pindex;
1089 
1090 			/*
1091 			 * If we're out of swap space, then attempt to free
1092 			 * some whenever multiple pages are brought in. We
1093 			 * must set the dirty bits so that the page contents
1094 			 * will be preserved.
1095 			 */
1096 			if (SWAPLOW) {
1097 				for (i = 0; i < count; i++) {
1098 					m[i]->dirty = VM_PAGE_BITS_ALL;
1099 				}
1100 				swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
1101 			}
1102 		} else {
1103 			swap_pager_ridpages(m, count, reqpage);
1104 		}
1105 	}
1106 	return (rv);
1107 }
1108 
1109 int
1110 swap_pager_putpages(object, m, count, sync, rtvals)
1111 	vm_object_t object;
1112 	vm_page_t *m;
1113 	int count;
1114 	boolean_t sync;
1115 	int *rtvals;
1116 {
1117 	register struct buf *bp;
1118 	sw_blk_t swb[count];
1119 	register int s;
1120 	int i, j, ix;
1121 	boolean_t rv;
1122 	vm_offset_t kva, off, fidx;
1123 	swp_clean_t spc;
1124 	vm_pindex_t paging_pindex;
1125 	int reqaddr[count];
1126 	int failed;
1127 
1128 	if (vm_swap_size)
1129 		no_swap_space = 0;
1130 	if (no_swap_space) {
1131 		for (i = 0; i < count; i++)
1132 			rtvals[i] = VM_PAGER_FAIL;
1133 		return VM_PAGER_FAIL;
1134 	}
1135 	spc = NULL;
1136 
1137 	object = m[0]->object;
1138 	paging_pindex = OFF_TO_IDX(object->paging_offset);
1139 
1140 	failed = 0;
1141 	for (j = 0; j < count; j++) {
1142 		fidx = m[j]->pindex + paging_pindex;
1143 		ix = swap_pager_block_index(fidx);
1144 		swb[j] = 0;
1145 		if (ix >= object->un_pager.swp.swp_nblocks) {
1146 			rtvals[j] = VM_PAGER_FAIL;
1147 			failed = 1;
1148 			continue;
1149 		} else {
1150 			rtvals[j] = VM_PAGER_OK;
1151 		}
1152 		swb[j] = &object->un_pager.swp.swp_blocks[ix];
1153 		swb[j]->swb_locked++;
1154 		if (failed) {
1155 			rtvals[j] = VM_PAGER_FAIL;
1156 			continue;
1157 		}
1158 		off = swap_pager_block_offset(fidx);
1159 		reqaddr[j] = swb[j]->swb_block[off];
1160 		if (reqaddr[j] == SWB_EMPTY) {
1161 			daddr_t blk;
1162 			int tries;
1163 			int ntoget;
1164 
1165 			tries = 0;
1166 			s = splbio();
1167 
1168 			/*
1169 			 * if any other pages have been allocated in this
1170 			 * block, we only try to get one page.
1171 			 */
1172 			for (i = 0; i < SWB_NPAGES; i++) {
1173 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1174 					break;
1175 			}
1176 
1177 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1178 			/*
1179 			 * this code is alittle conservative, but works (the
1180 			 * intent of this code is to allocate small chunks for
1181 			 * small objects)
1182 			 */
1183 			if ((off == 0) && ((fidx + ntoget) > object->size)) {
1184 				ntoget = object->size - fidx;
1185 			}
1186 	retrygetspace:
1187 			if (!swap_pager_full && ntoget > 1 &&
1188 			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1189 				&blk)) {
1190 
1191 				for (i = 0; i < ntoget; i++) {
1192 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1193 					swb[j]->swb_valid = 0;
1194 				}
1195 
1196 				reqaddr[j] = swb[j]->swb_block[off];
1197 			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1198 				&swb[j]->swb_block[off])) {
1199 				/*
1200 				 * if the allocation has failed, we try to
1201 				 * reclaim space and retry.
1202 				 */
1203 				if (++tries == 1) {
1204 					swap_pager_reclaim();
1205 					goto retrygetspace;
1206 				}
1207 				rtvals[j] = VM_PAGER_AGAIN;
1208 				failed = 1;
1209 				swap_pager_full = 1;
1210 			} else {
1211 				reqaddr[j] = swb[j]->swb_block[off];
1212 				swb[j]->swb_valid &= ~(1 << off);
1213 			}
1214 			splx(s);
1215 		}
1216 	}
1217 
1218 	/*
1219 	 * search forwards for the last contiguous page to transfer
1220 	 */
1221 	failed = 0;
1222 	for (i = 0; i < count; i++) {
1223 		if (failed ||
1224 			(reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1225 		    ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
1226 		    (rtvals[i] != VM_PAGER_OK)) {
1227 			failed = 1;
1228 			if (rtvals[i] == VM_PAGER_OK)
1229 				rtvals[i] = VM_PAGER_AGAIN;
1230 		}
1231 	}
1232 
1233 	for (i = 0; i < count; i++) {
1234 		if (rtvals[i] != VM_PAGER_OK) {
1235 			if (swb[i])
1236 				--swb[i]->swb_locked;
1237 		}
1238 	}
1239 
1240 	for (i = 0; i < count; i++)
1241 		if (rtvals[i] != VM_PAGER_OK)
1242 			break;
1243 
1244 	if (i == 0) {
1245 		return VM_PAGER_AGAIN;
1246 	}
1247 	count = i;
1248 	for (i = 0; i < count; i++) {
1249 		if (reqaddr[i] == SWB_EMPTY) {
1250 			printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1251 				m[i]->pindex, i);
1252 		}
1253 	}
1254 
1255 	/*
1256 	 * For synchronous writes, we clean up all completed async pageouts.
1257 	 */
1258 	if (sync == TRUE) {
1259 		swap_pager_sync();
1260 	}
1261 	kva = 0;
1262 
1263 	/*
1264 	 * get a swap pager clean data structure, block until we get it
1265 	 */
1266 	if (swap_pager_free.tqh_first == NULL ||
1267 		swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1268 		swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1269 		s = splbio();
1270 		if (curproc == pageproc) {
1271 retryfree:
1272 			/*
1273 			 * pageout daemon needs a swap control block
1274 			 */
1275 			swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
1276 			/*
1277 			 * if it does not get one within a short time, then
1278 			 * there is a potential deadlock, so we go-on trying
1279 			 * to free pages.  It is important to block here as opposed
1280 			 * to returning, thereby allowing the pageout daemon to continue.
1281 			 * It is likely that pageout daemon will start suboptimally
1282 			 * reclaiming vnode backed pages if we don't block.  Since the
1283 			 * I/O subsystem is probably already fully utilized, might as
1284 			 * well wait.
1285 			 */
1286 			if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) {
1287 				swap_pager_sync();
1288 				if (swap_pager_free.tqh_first == NULL ||
1289 					swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1290 					swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1291 					splx(s);
1292 					return VM_PAGER_AGAIN;
1293 				}
1294 			} else {
1295 			/*
1296 			 * we make sure that pageouts aren't taking up all of
1297 			 * the free swap control blocks.
1298 			 */
1299 				swap_pager_sync();
1300 				if (swap_pager_free.tqh_first == NULL ||
1301 					swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1302 					swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1303 					goto retryfree;
1304 				}
1305 			}
1306 		} else {
1307 			pagedaemon_wakeup();
1308 			while (swap_pager_free.tqh_first == NULL ||
1309 				swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1310 				swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1311 				swap_pager_needflags |= SWAP_FREE_NEEDED;
1312 				tsleep(&swap_pager_free, PVM, "swpfre", 0);
1313 				pagedaemon_wakeup();
1314 			}
1315 		}
1316 		splx(s);
1317 	}
1318 	spc = swap_pager_free.tqh_first;
1319 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1320 
1321 	kva = spc->spc_kva;
1322 
1323 	/*
1324 	 * map our page(s) into kva for I/O
1325 	 */
1326 	pmap_qenter(kva, m, count);
1327 
1328 	/*
1329 	 * get the base I/O offset into the swap file
1330 	 */
1331 	for (i = 0; i < count; i++) {
1332 		fidx = m[i]->pindex + paging_pindex;
1333 		off = swap_pager_block_offset(fidx);
1334 		/*
1335 		 * set the valid bit
1336 		 */
1337 		swb[i]->swb_valid |= (1 << off);
1338 		/*
1339 		 * and unlock the data structure
1340 		 */
1341 		swb[i]->swb_locked--;
1342 	}
1343 
1344 	/*
1345 	 * Get a swap buffer header and perform the IO
1346 	 */
1347 	bp = spc->spc_bp;
1348 	bzero(bp, sizeof *bp);
1349 	bp->b_spc = spc;
1350 	bp->b_vnbufs.le_next = NOLIST;
1351 
1352 	bp->b_flags = B_BUSY | B_PAGING;
1353 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1354 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1355 	if (bp->b_rcred != NOCRED)
1356 		crhold(bp->b_rcred);
1357 	if (bp->b_wcred != NOCRED)
1358 		crhold(bp->b_wcred);
1359 	bp->b_data = (caddr_t) kva;
1360 	bp->b_blkno = reqaddr[0];
1361 	pbgetvp(swapdev_vp, bp);
1362 
1363 	bp->b_bcount = PAGE_SIZE * count;
1364 	bp->b_bufsize = PAGE_SIZE * count;
1365 	swapdev_vp->v_numoutput++;
1366 
1367 	/*
1368 	 * If this is an async write we set up additional buffer fields and
1369 	 * place a "cleaning" entry on the inuse queue.
1370 	 */
1371 	s = splbio();
1372 	if (sync == FALSE) {
1373 		spc->spc_flags = 0;
1374 		spc->spc_object = object;
1375 		for (i = 0; i < count; i++)
1376 			spc->spc_m[i] = m[i];
1377 		spc->spc_count = count;
1378 		/*
1379 		 * the completion routine for async writes
1380 		 */
1381 		bp->b_flags |= B_CALL;
1382 		bp->b_iodone = swap_pager_iodone;
1383 		bp->b_dirtyoff = 0;
1384 		bp->b_dirtyend = bp->b_bcount;
1385 		object->un_pager.swp.swp_poip++;
1386 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1387 	} else {
1388 		object->un_pager.swp.swp_poip++;
1389 		bp->b_flags |= B_CALL;
1390 		bp->b_iodone = swap_pager_iodone1;
1391 	}
1392 
1393 	cnt.v_swapout++;
1394 	cnt.v_swappgsout += count;
1395 	/*
1396 	 * perform the I/O
1397 	 */
1398 	VOP_STRATEGY(bp);
1399 	if (sync == FALSE) {
1400 		if ((bp->b_flags & B_DONE) == B_DONE) {
1401 			swap_pager_sync();
1402 		}
1403 		splx(s);
1404 		for (i = 0; i < count; i++) {
1405 			rtvals[i] = VM_PAGER_PEND;
1406 		}
1407 		return VM_PAGER_PEND;
1408 	}
1409 	/*
1410 	 * wait for the sync I/O to complete
1411 	 */
1412 	while ((bp->b_flags & B_DONE) == 0) {
1413 		tsleep(bp, PVM, "swwrt", 0);
1414 	}
1415 	if (bp->b_flags & B_ERROR) {
1416 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1417 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1418 		rv = VM_PAGER_ERROR;
1419 	} else {
1420 		rv = VM_PAGER_OK;
1421 	}
1422 
1423 	object->un_pager.swp.swp_poip--;
1424 	if (object->un_pager.swp.swp_poip == 0)
1425 		wakeup(object);
1426 
1427 	if (bp->b_vp)
1428 		pbrelvp(bp);
1429 	if (bp->b_flags & B_WANTED)
1430 		wakeup(bp);
1431 
1432 	splx(s);
1433 
1434 	/*
1435 	 * remove the mapping for kernel virtual
1436 	 */
1437 	pmap_qremove(kva, count);
1438 
1439 	/*
1440 	 * if we have written the page, then indicate that the page is clean.
1441 	 */
1442 	if (rv == VM_PAGER_OK) {
1443 		for (i = 0; i < count; i++) {
1444 			if (rtvals[i] == VM_PAGER_OK) {
1445 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1446 				m[i]->dirty = 0;
1447 				/*
1448 				 * optimization, if a page has been read
1449 				 * during the pageout process, we activate it.
1450 				 */
1451 				if ((m[i]->queue != PQ_ACTIVE) &&
1452 				    ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1453 				    pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1454 					vm_page_activate(m[i]);
1455 				}
1456 			}
1457 		}
1458 	} else {
1459 		for (i = 0; i < count; i++) {
1460 			rtvals[i] = rv;
1461 		}
1462 	}
1463 
1464 	if (bp->b_rcred != NOCRED)
1465 		crfree(bp->b_rcred);
1466 	if (bp->b_wcred != NOCRED)
1467 		crfree(bp->b_wcred);
1468 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1469 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1470 		wakeup(&swap_pager_free);
1471 	}
1472 	if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1473 		pagedaemon_wakeup();
1474 	swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1475 	return (rv);
1476 }
1477 
1478 static void
1479 swap_pager_sync()
1480 {
1481 	register swp_clean_t spc, tspc;
1482 	register int s;
1483 
1484 	tspc = NULL;
1485 	if (swap_pager_done.tqh_first == NULL)
1486 		return;
1487 	for (;;) {
1488 		s = splbio();
1489 		/*
1490 		 * Look up and removal from done list must be done at splbio()
1491 		 * to avoid conflicts with swap_pager_iodone.
1492 		 */
1493 		while ((spc = swap_pager_done.tqh_first) != 0) {
1494 			pmap_qremove(spc->spc_kva, spc->spc_count);
1495 			swap_pager_finish(spc);
1496 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1497 			goto doclean;
1498 		}
1499 
1500 		/*
1501 		 * No operations done, thats all we can do for now.
1502 		 */
1503 
1504 		splx(s);
1505 		break;
1506 
1507 		/*
1508 		 * The desired page was found to be busy earlier in the scan
1509 		 * but has since completed.
1510 		 */
1511 doclean:
1512 		if (tspc && tspc == spc) {
1513 			tspc = NULL;
1514 		}
1515 		spc->spc_flags = 0;
1516 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1517 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1518 			wakeup(&swap_pager_free);
1519 		}
1520 		if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1521 			pagedaemon_wakeup();
1522 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1523 		splx(s);
1524 	}
1525 
1526 	return;
1527 }
1528 
1529 void
1530 swap_pager_finish(spc)
1531 	register swp_clean_t spc;
1532 {
1533 	vm_object_t object = spc->spc_m[0]->object;
1534 	int i;
1535 
1536 	object->paging_in_progress -= spc->spc_count;
1537 	if ((object->paging_in_progress == 0) &&
1538 	    (object->flags & OBJ_PIPWNT)) {
1539 		object->flags &= ~OBJ_PIPWNT;
1540 		wakeup(object);
1541 	}
1542 
1543 	/*
1544 	 * If no error, mark as clean and inform the pmap system. If error,
1545 	 * mark as dirty so we will try again. (XXX could get stuck doing
1546 	 * this, should give up after awhile)
1547 	 */
1548 	if (spc->spc_flags & SPC_ERROR) {
1549 		for (i = 0; i < spc->spc_count; i++) {
1550 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1551 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1552 		}
1553 	} else {
1554 		for (i = 0; i < spc->spc_count; i++) {
1555 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1556 			spc->spc_m[i]->dirty = 0;
1557 			if ((spc->spc_m[i]->queue != PQ_ACTIVE) &&
1558 			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1559 				vm_page_activate(spc->spc_m[i]);
1560 		}
1561 	}
1562 
1563 
1564 	for (i = 0; i < spc->spc_count; i++) {
1565 		/*
1566 		 * we wakeup any processes that are waiting on these pages.
1567 		 */
1568 		PAGE_WAKEUP(spc->spc_m[i]);
1569 	}
1570 	nswiodone -= spc->spc_count;
1571 
1572 	return;
1573 }
1574 
1575 /*
1576  * swap_pager_iodone
1577  */
1578 static void
1579 swap_pager_iodone(bp)
1580 	register struct buf *bp;
1581 {
1582 	register swp_clean_t spc;
1583 	int s;
1584 
1585 	s = splbio();
1586 	spc = (swp_clean_t) bp->b_spc;
1587 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1588 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1589 	if (bp->b_flags & B_ERROR) {
1590 		spc->spc_flags |= SPC_ERROR;
1591 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1592 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1593 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1594 	}
1595 
1596 	if (bp->b_vp)
1597 		pbrelvp(bp);
1598 
1599 	if (bp->b_flags & B_WANTED)
1600 		wakeup(bp);
1601 
1602 	if (bp->b_rcred != NOCRED)
1603 		crfree(bp->b_rcred);
1604 	if (bp->b_wcred != NOCRED)
1605 		crfree(bp->b_wcred);
1606 
1607 	nswiodone += spc->spc_count;
1608 	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1609 		wakeup(spc->spc_object);
1610 	}
1611 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1612 	    swap_pager_inuse.tqh_first == 0) {
1613 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1614 		wakeup(&swap_pager_free);
1615 	}
1616 
1617 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1618 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1619 		pagedaemon_wakeup();
1620 	}
1621 
1622 	if (vm_pageout_pages_needed) {
1623 		wakeup(&vm_pageout_pages_needed);
1624 		vm_pageout_pages_needed = 0;
1625 	}
1626 	if ((swap_pager_inuse.tqh_first == NULL) ||
1627 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1628 	    nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1629 		pagedaemon_wakeup();
1630 	}
1631 	splx(s);
1632 }
1633