xref: /freebsd/sys/vm/swap_pager.c (revision 0ea3482342b4d7d6e71f3007ce4dafe445c639fd)
1 /*
2  * Copyright (c) 1994 John S. Dyson
3  * Copyright (c) 1990 University of Utah.
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40  *
41  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42  * $Id: swap_pager.c,v 1.47 1995/09/24 04:40:19 davidg Exp $
43  */
44 
45 /*
46  * Quick hack to page to dedicated partition(s).
47  * TODO:
48  *	Add multiprocessor locks
49  *	Deal with async writes in a better fashion
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/proc.h>
56 #include <sys/buf.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 #include <sys/rlist.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_pager.h>
65 #include <vm/vm_page.h>
66 #include <vm/vm_pageout.h>
67 #include <vm/swap_pager.h>
68 #include <vm/vm_kern.h>
69 
70 #ifndef NPENDINGIO
71 #define NPENDINGIO	10
72 #endif
73 
74 int nswiodone;
75 int swap_pager_full;
76 extern int vm_swap_size;
77 int no_swap_space = 1;
78 struct rlist *swaplist;
79 int nswaplist;
80 
81 #define MAX_PAGEOUT_CLUSTER 8
82 
83 TAILQ_HEAD(swpclean, swpagerclean);
84 
85 typedef struct swpagerclean *swp_clean_t;
86 
87 struct swpagerclean {
88 	TAILQ_ENTRY(swpagerclean) spc_list;
89 	int spc_flags;
90 	struct buf *spc_bp;
91 	vm_object_t spc_object;
92 	vm_offset_t spc_kva;
93 	int spc_count;
94 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
95 } swcleanlist[NPENDINGIO];
96 
97 
98 /* spc_flags values */
99 #define SPC_ERROR	0x01
100 
101 #define SWB_EMPTY (-1)
102 
103 struct swpclean swap_pager_done;	/* list of completed page cleans */
104 struct swpclean swap_pager_inuse;	/* list of pending page cleans */
105 struct swpclean swap_pager_free;	/* list of free pager clean structs */
106 struct pagerlst swap_pager_object_list;	/* list of "named" anon region objects */
107 struct pagerlst swap_pager_un_object_list; /* list of "unnamed" anon region objects */
108 
109 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
110 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
111 int swap_pager_needflags;
112 
113 struct pagerlst *swp_qs[] = {
114 	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
115 };
116 
117 /*
118  * pagerops for OBJT_SWAP - "swap pager".
119  */
120 struct pagerops swappagerops = {
121 	swap_pager_init,
122 	swap_pager_alloc,
123 	swap_pager_dealloc,
124 	swap_pager_getpages,
125 	swap_pager_putpages,
126 	swap_pager_haspage,
127 	swap_pager_sync
128 };
129 
130 int npendingio = NPENDINGIO;
131 void swap_pager_finish();
132 int dmmin, dmmax;
133 
134 
135 static inline void
136 swapsizecheck()
137 {
138 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
139 		if (swap_pager_full == 0)
140 			printf("swap_pager: out of space\n");
141 		swap_pager_full = 1;
142 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
143 		swap_pager_full = 0;
144 }
145 
146 void
147 swap_pager_init()
148 {
149 	TAILQ_INIT(&swap_pager_object_list);
150 	TAILQ_INIT(&swap_pager_un_object_list);
151 
152 	/*
153 	 * Initialize clean lists
154 	 */
155 	TAILQ_INIT(&swap_pager_inuse);
156 	TAILQ_INIT(&swap_pager_done);
157 	TAILQ_INIT(&swap_pager_free);
158 
159 	/*
160 	 * Calculate the swap allocation constants.
161 	 */
162 	dmmin = CLBYTES / DEV_BSIZE;
163 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
164 }
165 
166 void
167 swap_pager_swap_init()
168 {
169 	swp_clean_t spc;
170 	struct buf *bp;
171 	int i;
172 
173 	/*
174 	 * kva's are allocated here so that we dont need to keep doing
175 	 * kmem_alloc pageables at runtime
176 	 */
177 	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
178 		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
179 		if (!spc->spc_kva) {
180 			break;
181 		}
182 		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
183 		if (!spc->spc_bp) {
184 			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
185 			break;
186 		}
187 		spc->spc_flags = 0;
188 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
189 	}
190 }
191 
192 int
193 swap_pager_swp_alloc(object, wait)
194 	vm_object_t object;
195 	int wait;
196 {
197 	sw_blk_t swb;
198 	int nblocks;
199 	int i, j;
200 
201 	nblocks = (btodb(object->size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) /
202 	    btodb(SWB_NPAGES * PAGE_SIZE);
203 
204 	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
205 	if (swb == NULL)
206 		return 1;
207 
208 	for (i = 0; i < nblocks; i++) {
209 		swb[i].swb_valid = 0;
210 		swb[i].swb_locked = 0;
211 		for (j = 0; j < SWB_NPAGES; j++)
212 			swb[i].swb_block[j] = SWB_EMPTY;
213 	}
214 
215 	object->un_pager.swp.swp_nblocks = nblocks;
216 	object->un_pager.swp.swp_allocsize = 0;
217 	object->un_pager.swp.swp_blocks = swb;
218 	object->un_pager.swp.swp_poip = 0;
219 
220 	if (object->handle != NULL) {
221 		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
222 	} else {
223 		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
224 	}
225 
226 	return 0;
227 }
228 
229 /*
230  * Allocate an object and associated resources.
231  * Note that if we are called from the pageout daemon (handle == NULL)
232  * we should not wait for memory as it could resulting in deadlock.
233  */
234 vm_object_t
235 swap_pager_alloc(handle, size, prot, offset)
236 	void *handle;
237 	register vm_size_t size;
238 	vm_prot_t prot;
239 	vm_offset_t offset;
240 {
241 	vm_object_t object;
242 	int i;
243 
244 	/*
245 	 * If this is a "named" anonymous region, look it up and use the
246 	 * object if it exists, otherwise allocate a new one.
247 	 */
248 	if (handle) {
249 		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
250 		if (object != NULL) {
251 			vm_object_reference(object);
252 		} else {
253 			/*
254 			 * XXX - there is a race condition here. Two processes
255 			 * can request the same named object simultaneuously,
256 			 * and if one blocks for memory, the result is a disaster.
257 			 * Probably quite rare, but is yet another reason to just
258 			 * rip support of "named anonymous regions" out altogether.
259 			 */
260 			object = vm_object_allocate(OBJT_SWAP, offset + size);
261 			object->handle = handle;
262 			(void) swap_pager_swp_alloc(object, M_WAITOK);
263 		}
264 	} else {
265 		object = vm_object_allocate(OBJT_SWAP, offset + size);
266 		(void) swap_pager_swp_alloc(object, M_WAITOK);
267 	}
268 
269 	return (object);
270 }
271 
272 /*
273  * returns disk block associated with pager and offset
274  * additionally, as a side effect returns a flag indicating
275  * if the block has been written
276  */
277 
278 inline static int *
279 swap_pager_diskaddr(object, offset, valid)
280 	vm_object_t object;
281 	vm_offset_t offset;
282 	int *valid;
283 {
284 	register sw_blk_t swb;
285 	int ix;
286 
287 	if (valid)
288 		*valid = 0;
289 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
290 	if ((ix >= object->un_pager.swp.swp_nblocks) ||
291 	    (offset >= object->size)) {
292 		return (FALSE);
293 	}
294 	swb = &object->un_pager.swp.swp_blocks[ix];
295 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
296 	if (valid)
297 		*valid = swb->swb_valid & (1 << ix);
298 	return &swb->swb_block[ix];
299 }
300 
301 /*
302  * Utility routine to set the valid (written) bit for
303  * a block associated with a pager and offset
304  */
305 static void
306 swap_pager_setvalid(object, offset, valid)
307 	vm_object_t object;
308 	vm_offset_t offset;
309 	int valid;
310 {
311 	register sw_blk_t swb;
312 	int ix;
313 
314 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
315 	if (ix >= object->un_pager.swp.swp_nblocks)
316 		return;
317 
318 	swb = &object->un_pager.swp.swp_blocks[ix];
319 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
320 	if (valid)
321 		swb->swb_valid |= (1 << ix);
322 	else
323 		swb->swb_valid &= ~(1 << ix);
324 	return;
325 }
326 
327 /*
328  * this routine allocates swap space with a fragmentation
329  * minimization policy.
330  */
331 int
332 swap_pager_getswapspace(object, amount, rtval)
333 	vm_object_t object;
334 	unsigned int amount;
335 	unsigned int *rtval;
336 {
337 	vm_swap_size -= amount;
338 	if (!rlist_alloc(&swaplist, amount, rtval)) {
339 		vm_swap_size += amount;
340 		return 0;
341 	} else {
342 		swapsizecheck();
343 		object->un_pager.swp.swp_allocsize += amount;
344 		return 1;
345 	}
346 }
347 
348 /*
349  * this routine frees swap space with a fragmentation
350  * minimization policy.
351  */
352 void
353 swap_pager_freeswapspace(object, from, to)
354 	vm_object_t object;
355 	unsigned int from;
356 	unsigned int to;
357 {
358 	rlist_free(&swaplist, from, to);
359 	vm_swap_size += (to - from) + 1;
360 	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
361 	swapsizecheck();
362 }
363 /*
364  * this routine frees swap blocks from a specified pager
365  */
366 void
367 swap_pager_freespace(object, start, size)
368 	vm_object_t object;
369 	vm_offset_t start;
370 	vm_offset_t size;
371 {
372 	vm_offset_t i;
373 	int s;
374 
375 	s = splbio();
376 	for (i = start; i < round_page(start + size); i += PAGE_SIZE) {
377 		int valid;
378 		int *addr = swap_pager_diskaddr(object, i, &valid);
379 
380 		if (addr && *addr != SWB_EMPTY) {
381 			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
382 			if (valid) {
383 				swap_pager_setvalid(object, i, 0);
384 			}
385 			*addr = SWB_EMPTY;
386 		}
387 	}
388 	splx(s);
389 }
390 
391 static void
392 swap_pager_free_swap(object)
393 	vm_object_t object;
394 {
395 	register int i, j;
396 	register sw_blk_t swb;
397 	int first_block=0, block_count=0;
398 	int s;
399 	/*
400 	 * Free left over swap blocks
401 	 */
402 	s = splbio();
403 	for (i = 0, swb = object->un_pager.swp.swp_blocks;
404 	    i < object->un_pager.swp.swp_nblocks; i++, swb++) {
405 		for (j = 0; j < SWB_NPAGES; j++) {
406 			if (swb->swb_block[j] != SWB_EMPTY) {
407 				/*
408 				 * initially the length of the run is zero
409 				 */
410 				if (block_count == 0) {
411 					first_block = swb->swb_block[j];
412 					block_count = btodb(PAGE_SIZE);
413 					swb->swb_block[j] = SWB_EMPTY;
414 				/*
415 				 * if the new block can be included into the current run
416 				 */
417 				} else if (swb->swb_block[j] == first_block + block_count) {
418 					block_count += btodb(PAGE_SIZE);
419 					swb->swb_block[j] = SWB_EMPTY;
420 				/*
421 				 * terminate the previous run, and start a new one
422 				 */
423 				} else {
424 					swap_pager_freeswapspace(object, first_block,
425 				   	 (unsigned) first_block + block_count - 1);
426 					first_block = swb->swb_block[j];
427 					block_count = btodb(PAGE_SIZE);
428 					swb->swb_block[j] = SWB_EMPTY;
429 				}
430 			}
431 		}
432 	}
433 
434 	if (block_count) {
435 		swap_pager_freeswapspace(object, first_block,
436 		   	 (unsigned) first_block + block_count - 1);
437 	}
438 	splx(s);
439 }
440 
441 
442 /*
443  * swap_pager_reclaim frees up over-allocated space from all pagers
444  * this eliminates internal fragmentation due to allocation of space
445  * for segments that are never swapped to. It has been written so that
446  * it does not block until the rlist_free operation occurs; it keeps
447  * the queues consistant.
448  */
449 
450 /*
451  * Maximum number of blocks (pages) to reclaim per pass
452  */
453 #define MAXRECLAIM 128
454 
455 void
456 swap_pager_reclaim()
457 {
458 	vm_object_t object;
459 	int i, j, k;
460 	int s;
461 	int reclaimcount;
462 	static struct {
463 		int address;
464 		vm_object_t object;
465 	} reclaims[MAXRECLAIM];
466 	static int in_reclaim;
467 
468 	/*
469 	 * allow only one process to be in the swap_pager_reclaim subroutine
470 	 */
471 	s = splbio();
472 	if (in_reclaim) {
473 		tsleep(&in_reclaim, PSWP, "swrclm", 0);
474 		splx(s);
475 		return;
476 	}
477 	in_reclaim = 1;
478 	reclaimcount = 0;
479 
480 	/* for each pager queue */
481 	for (k = 0; swp_qs[k]; k++) {
482 
483 		object = swp_qs[k]->tqh_first;
484 		while (object && (reclaimcount < MAXRECLAIM)) {
485 
486 			/*
487 			 * see if any blocks associated with a pager has been
488 			 * allocated but not used (written)
489 			 */
490 			for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
491 				sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
492 
493 				if (swb->swb_locked)
494 					continue;
495 				for (j = 0; j < SWB_NPAGES; j++) {
496 					if (swb->swb_block[j] != SWB_EMPTY &&
497 					    (swb->swb_valid & (1 << j)) == 0) {
498 						reclaims[reclaimcount].address = swb->swb_block[j];
499 						reclaims[reclaimcount++].object = object;
500 						swb->swb_block[j] = SWB_EMPTY;
501 						if (reclaimcount >= MAXRECLAIM)
502 							goto rfinished;
503 					}
504 				}
505 			}
506 			object = object->pager_object_list.tqe_next;
507 		}
508 	}
509 
510 rfinished:
511 
512 	/*
513 	 * free the blocks that have been added to the reclaim list
514 	 */
515 	for (i = 0; i < reclaimcount; i++) {
516 		swap_pager_freeswapspace(reclaims[i].object,
517 		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
518 	}
519 	splx(s);
520 	in_reclaim = 0;
521 	wakeup(&in_reclaim);
522 }
523 
524 
525 /*
526  * swap_pager_copy copies blocks from one pager to another and
527  * destroys the source pager
528  */
529 
530 void
531 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
532 	vm_object_t srcobject;
533 	vm_offset_t srcoffset;
534 	vm_object_t dstobject;
535 	vm_offset_t dstoffset;
536 	vm_offset_t offset;
537 {
538 	vm_offset_t i;
539 	int origsize;
540 	int s;
541 
542 	if (vm_swap_size)
543 		no_swap_space = 0;
544 
545 	origsize = srcobject->un_pager.swp.swp_allocsize;
546 
547 	/*
548 	 * remove the source object from the swap_pager internal queue
549 	 */
550 	if (srcobject->handle == NULL) {
551 		TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
552 	} else {
553 		TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
554 	}
555 
556 	s = splbio();
557 	while (srcobject->un_pager.swp.swp_poip) {
558 		tsleep(srcobject, PVM, "spgout", 0);
559 	}
560 	splx(s);
561 
562 	/*
563 	 * clean all of the pages that are currently active and finished
564 	 */
565 	swap_pager_sync();
566 
567 	s = splbio();
568 	/*
569 	 * transfer source to destination
570 	 */
571 	for (i = 0; i < dstobject->size; i += PAGE_SIZE) {
572 		int srcvalid, dstvalid;
573 		int *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
574 						    &srcvalid);
575 		int *dstaddrp;
576 
577 		/*
578 		 * see if the source has space allocated
579 		 */
580 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
581 			/*
582 			 * if the source is valid and the dest has no space,
583 			 * then copy the allocation from the srouce to the
584 			 * dest.
585 			 */
586 			if (srcvalid) {
587 				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
588 							&dstvalid);
589 				/*
590 				 * if the dest already has a valid block,
591 				 * deallocate the source block without
592 				 * copying.
593 				 */
594 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
595 					swap_pager_freeswapspace(dstobject, *dstaddrp,
596 						*dstaddrp + btodb(PAGE_SIZE) - 1);
597 					*dstaddrp = SWB_EMPTY;
598 				}
599 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
600 					*dstaddrp = *srcaddrp;
601 					*srcaddrp = SWB_EMPTY;
602 					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
603 					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
604 					swap_pager_setvalid(dstobject, i + dstoffset, 1);
605 				}
606 			}
607 			/*
608 			 * if the source is not empty at this point, then
609 			 * deallocate the space.
610 			 */
611 			if (*srcaddrp != SWB_EMPTY) {
612 				swap_pager_freeswapspace(srcobject, *srcaddrp,
613 					*srcaddrp + btodb(PAGE_SIZE) - 1);
614 				*srcaddrp = SWB_EMPTY;
615 			}
616 		}
617 	}
618 	splx(s);
619 
620 	/*
621 	 * Free left over swap blocks
622 	 */
623 	swap_pager_free_swap(srcobject);
624 
625 	if (srcobject->un_pager.swp.swp_allocsize) {
626 		printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
627 		    srcobject->un_pager.swp.swp_allocsize, origsize);
628 	}
629 
630 	free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
631 	srcobject->un_pager.swp.swp_blocks = NULL;
632 
633 	return;
634 }
635 
636 void
637 swap_pager_dealloc(object)
638 	vm_object_t object;
639 {
640 	int s;
641 
642 	/*
643 	 * Remove from list right away so lookups will fail if we block for
644 	 * pageout completion.
645 	 */
646 	if (object->handle == NULL) {
647 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
648 	} else {
649 		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
650 	}
651 
652 	/*
653 	 * Wait for all pageouts to finish and remove all entries from
654 	 * cleaning list.
655 	 */
656 
657 	s = splbio();
658 	while (object->un_pager.swp.swp_poip) {
659 		tsleep(object, PVM, "swpout", 0);
660 	}
661 	splx(s);
662 
663 
664 	swap_pager_sync();
665 
666 	/*
667 	 * Free left over swap blocks
668 	 */
669 	swap_pager_free_swap(object);
670 
671 	if (object->un_pager.swp.swp_allocsize) {
672 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
673 		    object->un_pager.swp.swp_allocsize);
674 	}
675 	/*
676 	 * Free swap management resources
677 	 */
678 	free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
679 	object->un_pager.swp.swp_blocks = NULL;
680 }
681 
682 static inline int
683 const
684 swap_pager_block_index(offset)
685 	vm_offset_t offset;
686 {
687 	return (offset / (SWB_NPAGES * PAGE_SIZE));
688 }
689 
690 static inline int
691 const
692 swap_pager_block_offset(offset)
693 	vm_offset_t offset;
694 {
695 	return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE);
696 }
697 
698 /*
699  * swap_pager_haspage returns TRUE if the pager has data that has
700  * been written out.
701  */
702 boolean_t
703 swap_pager_haspage(object, offset, before, after)
704 	vm_object_t object;
705 	vm_offset_t offset;
706 	int *before;
707 	int *after;
708 {
709 	register sw_blk_t swb;
710 	int ix;
711 	int gix;
712 
713 	if (before != NULL)
714 		*before = 0;
715 	if (after != NULL)
716 		*after = 0;
717 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
718 	if (ix >= object->un_pager.swp.swp_nblocks) {
719 		return (FALSE);
720 	}
721 	swb = &object->un_pager.swp.swp_blocks[ix];
722 	gix = offset / PAGE_SIZE;
723 	ix = gix % SWB_NPAGES;
724 
725 	if (swb->swb_block[ix] != SWB_EMPTY) {
726 
727 		if (swb->swb_valid & (1 << ix)) {
728 			int tix;
729 			if (before) {
730 				for(tix = ix - 1; tix >= 0; --tix) {
731 					if ((swb->swb_valid & (1 << tix)) == 0)
732 						break;
733 					if ((swb->swb_block[tix] +
734 						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
735 						swb->swb_block[ix])
736 						break;
737 					(*before)++;
738 				}
739 			}
740 
741 			if (after) {
742 				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
743 					if ((swb->swb_valid & (1 << tix)) == 0)
744 						break;
745 					if ((swb->swb_block[tix] -
746 						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
747 						swb->swb_block[ix])
748 						break;
749 					(*after)++;
750 				}
751 			}
752 
753 			return TRUE;
754 		}
755 	}
756 	return (FALSE);
757 }
758 
759 /*
760  * swap_pager_freepage is a convienience routine that clears the busy
761  * bit and deallocates a page.
762  */
763 static void
764 swap_pager_freepage(m)
765 	vm_page_t m;
766 {
767 	PAGE_WAKEUP(m);
768 	vm_page_free(m);
769 }
770 
771 /*
772  * swap_pager_ridpages is a convienience routine that deallocates all
773  * but the required page.  this is usually used in error returns that
774  * need to invalidate the "extra" readahead pages.
775  */
776 static void
777 swap_pager_ridpages(m, count, reqpage)
778 	vm_page_t *m;
779 	int count;
780 	int reqpage;
781 {
782 	int i;
783 
784 	for (i = 0; i < count; i++)
785 		if (i != reqpage)
786 			swap_pager_freepage(m[i]);
787 }
788 
789 /*
790  * swap_pager_iodone1 is the completion routine for both reads and async writes
791  */
792 void
793 swap_pager_iodone1(bp)
794 	struct buf *bp;
795 {
796 	bp->b_flags |= B_DONE;
797 	bp->b_flags &= ~B_ASYNC;
798 	wakeup(bp);
799 }
800 
801 int
802 swap_pager_getpages(object, m, count, reqpage)
803 	vm_object_t object;
804 	vm_page_t *m;
805 	int count, reqpage;
806 {
807 	register struct buf *bp;
808 	sw_blk_t swb[count];
809 	register int s;
810 	int i;
811 	boolean_t rv;
812 	vm_offset_t kva, off[count];
813 	swp_clean_t spc;
814 	vm_offset_t paging_offset;
815 	int reqaddr[count];
816 	int sequential;
817 
818 	int first, last;
819 	int failed;
820 	int reqdskregion;
821 
822 	object = m[reqpage]->object;
823 	paging_offset = object->paging_offset;
824 	sequential = (m[reqpage]->offset == (object->last_read + PAGE_SIZE));
825 
826 	for (i = 0; i < count; i++) {
827 		vm_offset_t foff = m[i]->offset + paging_offset;
828 		int ix = swap_pager_block_index(foff);
829 
830 		if (ix >= object->un_pager.swp.swp_nblocks) {
831 			int j;
832 
833 			if (i <= reqpage) {
834 				swap_pager_ridpages(m, count, reqpage);
835 				return (VM_PAGER_FAIL);
836 			}
837 			for (j = i; j < count; j++) {
838 				swap_pager_freepage(m[j]);
839 			}
840 			count = i;
841 			break;
842 		}
843 		swb[i] = &object->un_pager.swp.swp_blocks[ix];
844 		off[i] = swap_pager_block_offset(foff);
845 		reqaddr[i] = swb[i]->swb_block[off[i]];
846 	}
847 
848 	/* make sure that our required input request is existant */
849 
850 	if (reqaddr[reqpage] == SWB_EMPTY ||
851 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
852 		swap_pager_ridpages(m, count, reqpage);
853 		return (VM_PAGER_FAIL);
854 	}
855 	reqdskregion = reqaddr[reqpage] / dmmax;
856 
857 	/*
858 	 * search backwards for the first contiguous page to transfer
859 	 */
860 	failed = 0;
861 	first = 0;
862 	for (i = reqpage - 1; i >= 0; --i) {
863 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
864 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
865 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
866 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
867 			failed = 1;
868 			swap_pager_freepage(m[i]);
869 			if (first == 0)
870 				first = i + 1;
871 		}
872 	}
873 	/*
874 	 * search forwards for the last contiguous page to transfer
875 	 */
876 	failed = 0;
877 	last = count;
878 	for (i = reqpage + 1; i < count; i++) {
879 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
880 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
881 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
882 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
883 			failed = 1;
884 			swap_pager_freepage(m[i]);
885 			if (last == count)
886 				last = i;
887 		}
888 	}
889 
890 	count = last;
891 	if (first != 0) {
892 		for (i = first; i < count; i++) {
893 			m[i - first] = m[i];
894 			reqaddr[i - first] = reqaddr[i];
895 			off[i - first] = off[i];
896 		}
897 		count -= first;
898 		reqpage -= first;
899 	}
900 	++swb[reqpage]->swb_locked;
901 
902 	/*
903 	 * at this point: "m" is a pointer to the array of vm_page_t for
904 	 * paging I/O "count" is the number of vm_page_t entries represented
905 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
906 	 * into "m" for the page actually faulted
907 	 */
908 
909 	spc = NULL;	/* we might not use an spc data structure */
910 
911 	if ((count == 1) && (swap_pager_free.tqh_first != NULL)) {
912 		/*
913 		 * if a kva has not been allocated, we can only do a one page
914 		 * transfer, so we free the other pages that might have been
915 		 * allocated by vm_fault.
916 		 */
917 		swap_pager_ridpages(m, count, reqpage);
918 		m[0] = m[reqpage];
919 		reqaddr[0] = reqaddr[reqpage];
920 
921 		count = 1;
922 		reqpage = 0;
923 		/*
924 		 * get a swap pager clean data structure, block until we get
925 		 * it
926 		 */
927 		if (swap_pager_free.tqh_first == NULL) {
928 			s = splbio();
929 			if (curproc == pageproc)
930 				swap_pager_sync();
931 			else
932 				pagedaemon_wakeup();
933 			while (swap_pager_free.tqh_first == NULL) {
934 				swap_pager_needflags |= SWAP_FREE_NEEDED;
935 				if (curproc == pageproc)
936 					swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
937 				tsleep(&swap_pager_free,
938 				    PVM, "swpfre", 0);
939 				if (curproc == pageproc)
940 					swap_pager_sync();
941 				else
942 					pagedaemon_wakeup();
943 			}
944 			splx(s);
945 		}
946 		spc = swap_pager_free.tqh_first;
947 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
948 		kva = spc->spc_kva;
949 		bp = spc->spc_bp;
950 		bzero(bp, sizeof *bp);
951 		bp->b_spc = spc;
952 		bp->b_vnbufs.le_next = NOLIST;
953 	} else {
954 		/*
955 		 * Get a swap buffer header to perform the IO
956 		 */
957 		bp = getpbuf();
958 		kva = (vm_offset_t) bp->b_data;
959 	}
960 
961 	/*
962 	 * map our page(s) into kva for input
963 	 */
964 	pmap_qenter(kva, m, count);
965 
966 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
967 	bp->b_iodone = swap_pager_iodone1;
968 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
969 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
970 	crhold(bp->b_rcred);
971 	crhold(bp->b_wcred);
972 	bp->b_un.b_addr = (caddr_t) kva;
973 	bp->b_blkno = reqaddr[0];
974 	bp->b_bcount = PAGE_SIZE * count;
975 	bp->b_bufsize = PAGE_SIZE * count;
976 
977 	pbgetvp(swapdev_vp, bp);
978 
979 	cnt.v_swapin++;
980 	cnt.v_swappgsin += count;
981 	/*
982 	 * perform the I/O
983 	 */
984 	VOP_STRATEGY(bp);
985 
986 	/*
987 	 * wait for the sync I/O to complete
988 	 */
989 	s = splbio();
990 	while ((bp->b_flags & B_DONE) == 0) {
991 		tsleep(bp, PVM, "swread", 0);
992 	}
993 
994 	if (bp->b_flags & B_ERROR) {
995 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
996 		    bp->b_blkno, bp->b_bcount, bp->b_error);
997 		rv = VM_PAGER_ERROR;
998 	} else {
999 		rv = VM_PAGER_OK;
1000 	}
1001 
1002 	/*
1003 	 * relpbuf does this, but we maintain our own buffer list also...
1004 	 */
1005 	if (bp->b_vp)
1006 		pbrelvp(bp);
1007 
1008 	splx(s);
1009 	swb[reqpage]->swb_locked--;
1010 
1011 	/*
1012 	 * remove the mapping for kernel virtual
1013 	 */
1014 	pmap_qremove(kva, count);
1015 
1016 	if (spc) {
1017 		m[reqpage]->object->last_read = m[reqpage]->offset;
1018 		if (bp->b_flags & B_WANTED)
1019 			wakeup(bp);
1020 		/*
1021 		 * if we have used an spc, we need to free it.
1022 		 */
1023 		if (bp->b_rcred != NOCRED)
1024 			crfree(bp->b_rcred);
1025 		if (bp->b_wcred != NOCRED)
1026 			crfree(bp->b_wcred);
1027 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1028 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1029 			wakeup(&swap_pager_free);
1030 		}
1031 		if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1032 			pagedaemon_wakeup();
1033 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1034 	} else {
1035 		/*
1036 		 * release the physical I/O buffer
1037 		 */
1038 		relpbuf(bp);
1039 		/*
1040 		 * finish up input if everything is ok
1041 		 */
1042 		if (rv == VM_PAGER_OK) {
1043 			for (i = 0; i < count; i++) {
1044 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1045 				m[i]->dirty = 0;
1046 				m[i]->flags &= ~PG_ZERO;
1047 				if (i != reqpage) {
1048 					/*
1049 					 * whether or not to leave the page
1050 					 * activated is up in the air, but we
1051 					 * should put the page on a page queue
1052 					 * somewhere. (it already is in the
1053 					 * object). After some emperical
1054 					 * results, it is best to deactivate
1055 					 * the readahead pages.
1056 					 */
1057 					vm_page_deactivate(m[i]);
1058 
1059 					/*
1060 					 * just in case someone was asking for
1061 					 * this page we now tell them that it
1062 					 * is ok to use
1063 					 */
1064 					m[i]->valid = VM_PAGE_BITS_ALL;
1065 					PAGE_WAKEUP(m[i]);
1066 				}
1067 			}
1068 
1069 			m[reqpage]->object->last_read = m[count-1]->offset;
1070 
1071 			/*
1072 			 * If we're out of swap space, then attempt to free
1073 			 * some whenever pages are brought in. We must clear
1074 			 * the clean flag so that the page contents will be
1075 			 * preserved.
1076 			 */
1077 			if (swap_pager_full) {
1078 				for (i = 0; i < count; i++) {
1079 					m[i]->dirty = VM_PAGE_BITS_ALL;
1080 				}
1081 				swap_pager_freespace(object, m[0]->offset + paging_offset, count * PAGE_SIZE);
1082 			}
1083 		} else {
1084 			swap_pager_ridpages(m, count, reqpage);
1085 		}
1086 	}
1087 	if (rv == VM_PAGER_OK) {
1088 		pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
1089 		m[reqpage]->valid = VM_PAGE_BITS_ALL;
1090 		m[reqpage]->dirty = 0;
1091 	}
1092 	return (rv);
1093 }
1094 
1095 int
1096 swap_pager_putpages(object, m, count, sync, rtvals)
1097 	vm_object_t object;
1098 	vm_page_t *m;
1099 	int count;
1100 	boolean_t sync;
1101 	int *rtvals;
1102 {
1103 	register struct buf *bp;
1104 	sw_blk_t swb[count];
1105 	register int s;
1106 	int i, j, ix;
1107 	boolean_t rv;
1108 	vm_offset_t kva, off, foff;
1109 	swp_clean_t spc;
1110 	vm_offset_t paging_offset;
1111 	int reqaddr[count];
1112 	int failed;
1113 
1114 	if (vm_swap_size)
1115 		no_swap_space = 0;
1116 	if (no_swap_space) {
1117 		for (i = 0; i < count; i++)
1118 			rtvals[i] = VM_PAGER_FAIL;
1119 		return VM_PAGER_FAIL;
1120 	}
1121 	spc = NULL;
1122 
1123 	object = m[0]->object;
1124 	paging_offset = object->paging_offset;
1125 
1126 	failed = 0;
1127 	for (j = 0; j < count; j++) {
1128 		foff = m[j]->offset + paging_offset;
1129 		ix = swap_pager_block_index(foff);
1130 		swb[j] = 0;
1131 		if (ix >= object->un_pager.swp.swp_nblocks) {
1132 			rtvals[j] = VM_PAGER_FAIL;
1133 			failed = 1;
1134 			continue;
1135 		} else {
1136 			rtvals[j] = VM_PAGER_OK;
1137 		}
1138 		swb[j] = &object->un_pager.swp.swp_blocks[ix];
1139 		swb[j]->swb_locked++;
1140 		if (failed) {
1141 			rtvals[j] = VM_PAGER_FAIL;
1142 			continue;
1143 		}
1144 		off = swap_pager_block_offset(foff);
1145 		reqaddr[j] = swb[j]->swb_block[off];
1146 		if (reqaddr[j] == SWB_EMPTY) {
1147 			int blk;
1148 			int tries;
1149 			int ntoget;
1150 
1151 			tries = 0;
1152 			s = splbio();
1153 
1154 			/*
1155 			 * if any other pages have been allocated in this
1156 			 * block, we only try to get one page.
1157 			 */
1158 			for (i = 0; i < SWB_NPAGES; i++) {
1159 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1160 					break;
1161 			}
1162 
1163 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1164 			/*
1165 			 * this code is alittle conservative, but works (the
1166 			 * intent of this code is to allocate small chunks for
1167 			 * small objects)
1168 			 */
1169 			if ((foff == 0) &&
1170 				((ntoget * PAGE_SIZE) > object->size)) {
1171 				ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE;
1172 			}
1173 	retrygetspace:
1174 			if (!swap_pager_full && ntoget > 1 &&
1175 			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), &blk)) {
1176 
1177 				for (i = 0; i < ntoget; i++) {
1178 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1179 					swb[j]->swb_valid = 0;
1180 				}
1181 
1182 				reqaddr[j] = swb[j]->swb_block[off];
1183 			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1184 				&swb[j]->swb_block[off])) {
1185 				/*
1186 				 * if the allocation has failed, we try to
1187 				 * reclaim space and retry.
1188 				 */
1189 				if (++tries == 1) {
1190 					swap_pager_reclaim();
1191 					goto retrygetspace;
1192 				}
1193 				rtvals[j] = VM_PAGER_AGAIN;
1194 				failed = 1;
1195 				swap_pager_full = 1;
1196 			} else {
1197 				reqaddr[j] = swb[j]->swb_block[off];
1198 				swb[j]->swb_valid &= ~(1 << off);
1199 			}
1200 			splx(s);
1201 		}
1202 	}
1203 
1204 	/*
1205 	 * search forwards for the last contiguous page to transfer
1206 	 */
1207 	failed = 0;
1208 	for (i = 0; i < count; i++) {
1209 		if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1210 		    (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
1211 		    (rtvals[i] != VM_PAGER_OK)) {
1212 			failed = 1;
1213 			if (rtvals[i] == VM_PAGER_OK)
1214 				rtvals[i] = VM_PAGER_AGAIN;
1215 		}
1216 	}
1217 
1218 	for (i = 0; i < count; i++) {
1219 		if (rtvals[i] != VM_PAGER_OK) {
1220 			if (swb[i])
1221 				--swb[i]->swb_locked;
1222 		}
1223 	}
1224 
1225 	for (i = 0; i < count; i++)
1226 		if (rtvals[i] != VM_PAGER_OK)
1227 			break;
1228 
1229 	if (i == 0) {
1230 		return VM_PAGER_AGAIN;
1231 	}
1232 	count = i;
1233 	for (i = 0; i < count; i++) {
1234 		if (reqaddr[i] == SWB_EMPTY)
1235 			printf("I/O to empty block????\n");
1236 	}
1237 
1238 	/*
1239 	 * For synchronous writes, we clean up all completed async pageouts.
1240 	 */
1241 	if (sync == TRUE) {
1242 		swap_pager_sync();
1243 	}
1244 	kva = 0;
1245 
1246 	/*
1247 	 * get a swap pager clean data structure, block until we get it
1248 	 */
1249 	if (swap_pager_free.tqh_first == NULL ||
1250 		swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1251 		swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1252 		s = splbio();
1253 		if (curproc == pageproc) {
1254 			swap_pager_sync();
1255 #if 0
1256 			splx(s);
1257 			return VM_PAGER_AGAIN;
1258 #endif
1259 		} else
1260 			pagedaemon_wakeup();
1261 		while (swap_pager_free.tqh_first == NULL ||
1262 			swap_pager_free.tqh_first->spc_list.tqe_next == NULL ||
1263 			swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
1264 			if (curproc == pageproc) {
1265 				swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1266 			    if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved)
1267 					wakeup(&cnt.v_free_count);
1268 			}
1269 
1270 			swap_pager_needflags |= SWAP_FREE_NEEDED;
1271 			tsleep(&swap_pager_free, PVM, "swpfre", 0);
1272 			if (curproc == pageproc)
1273 				swap_pager_sync();
1274 			else
1275 				pagedaemon_wakeup();
1276 		}
1277 		splx(s);
1278 	}
1279 	spc = swap_pager_free.tqh_first;
1280 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1281 
1282 	kva = spc->spc_kva;
1283 
1284 	/*
1285 	 * map our page(s) into kva for I/O
1286 	 */
1287 	pmap_qenter(kva, m, count);
1288 
1289 	/*
1290 	 * get the base I/O offset into the swap file
1291 	 */
1292 	for (i = 0; i < count; i++) {
1293 		foff = m[i]->offset + paging_offset;
1294 		off = swap_pager_block_offset(foff);
1295 		/*
1296 		 * set the valid bit
1297 		 */
1298 		swb[i]->swb_valid |= (1 << off);
1299 		/*
1300 		 * and unlock the data structure
1301 		 */
1302 		swb[i]->swb_locked--;
1303 	}
1304 
1305 	/*
1306 	 * Get a swap buffer header and perform the IO
1307 	 */
1308 	bp = spc->spc_bp;
1309 	bzero(bp, sizeof *bp);
1310 	bp->b_spc = spc;
1311 	bp->b_vnbufs.le_next = NOLIST;
1312 
1313 	bp->b_flags = B_BUSY | B_PAGING;
1314 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
1315 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1316 	if (bp->b_rcred != NOCRED)
1317 		crhold(bp->b_rcred);
1318 	if (bp->b_wcred != NOCRED)
1319 		crhold(bp->b_wcred);
1320 	bp->b_data = (caddr_t) kva;
1321 	bp->b_blkno = reqaddr[0];
1322 	pbgetvp(swapdev_vp, bp);
1323 
1324 	bp->b_bcount = PAGE_SIZE * count;
1325 	bp->b_bufsize = PAGE_SIZE * count;
1326 	swapdev_vp->v_numoutput++;
1327 
1328 	/*
1329 	 * If this is an async write we set up additional buffer fields and
1330 	 * place a "cleaning" entry on the inuse queue.
1331 	 */
1332 	s = splbio();
1333 	if (sync == FALSE) {
1334 		spc->spc_flags = 0;
1335 		spc->spc_object = object;
1336 		for (i = 0; i < count; i++)
1337 			spc->spc_m[i] = m[i];
1338 		spc->spc_count = count;
1339 		/*
1340 		 * the completion routine for async writes
1341 		 */
1342 		bp->b_flags |= B_CALL;
1343 		bp->b_iodone = swap_pager_iodone;
1344 		bp->b_dirtyoff = 0;
1345 		bp->b_dirtyend = bp->b_bcount;
1346 		object->un_pager.swp.swp_poip++;
1347 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1348 	} else {
1349 		object->un_pager.swp.swp_poip++;
1350 		bp->b_flags |= B_CALL;
1351 		bp->b_iodone = swap_pager_iodone1;
1352 	}
1353 
1354 	cnt.v_swapout++;
1355 	cnt.v_swappgsout += count;
1356 	/*
1357 	 * perform the I/O
1358 	 */
1359 	VOP_STRATEGY(bp);
1360 	if (sync == FALSE) {
1361 		if ((bp->b_flags & B_DONE) == B_DONE) {
1362 			swap_pager_sync();
1363 		}
1364 		splx(s);
1365 		for (i = 0; i < count; i++) {
1366 			rtvals[i] = VM_PAGER_PEND;
1367 		}
1368 		return VM_PAGER_PEND;
1369 	}
1370 	/*
1371 	 * wait for the sync I/O to complete
1372 	 */
1373 	while ((bp->b_flags & B_DONE) == 0) {
1374 		tsleep(bp, PVM, "swwrt", 0);
1375 	}
1376 	if (bp->b_flags & B_ERROR) {
1377 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1378 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1379 		rv = VM_PAGER_ERROR;
1380 	} else {
1381 		rv = VM_PAGER_OK;
1382 	}
1383 
1384 	object->un_pager.swp.swp_poip--;
1385 	if (object->un_pager.swp.swp_poip == 0)
1386 		wakeup(object);
1387 
1388 	if (bp->b_vp)
1389 		pbrelvp(bp);
1390 	if (bp->b_flags & B_WANTED)
1391 		wakeup(bp);
1392 
1393 	splx(s);
1394 
1395 	/*
1396 	 * remove the mapping for kernel virtual
1397 	 */
1398 	pmap_qremove(kva, count);
1399 
1400 	/*
1401 	 * if we have written the page, then indicate that the page is clean.
1402 	 */
1403 	if (rv == VM_PAGER_OK) {
1404 		for (i = 0; i < count; i++) {
1405 			if (rtvals[i] == VM_PAGER_OK) {
1406 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1407 				m[i]->dirty = 0;
1408 				/*
1409 				 * optimization, if a page has been read
1410 				 * during the pageout process, we activate it.
1411 				 */
1412 				if ((m[i]->flags & PG_ACTIVE) == 0 &&
1413 				    ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1414 				    pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1415 					vm_page_activate(m[i]);
1416 				}
1417 			}
1418 		}
1419 	} else {
1420 		for (i = 0; i < count; i++) {
1421 			rtvals[i] = rv;
1422 		}
1423 	}
1424 
1425 	if (bp->b_rcred != NOCRED)
1426 		crfree(bp->b_rcred);
1427 	if (bp->b_wcred != NOCRED)
1428 		crfree(bp->b_wcred);
1429 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1430 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1431 		wakeup(&swap_pager_free);
1432 	}
1433 	if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1434 		pagedaemon_wakeup();
1435 	swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1436 	return (rv);
1437 }
1438 
1439 void
1440 swap_pager_sync()
1441 {
1442 	register swp_clean_t spc, tspc;
1443 	register int s;
1444 
1445 	tspc = NULL;
1446 	if (swap_pager_done.tqh_first == NULL)
1447 		return;
1448 	for (;;) {
1449 		s = splbio();
1450 		/*
1451 		 * Look up and removal from done list must be done at splbio()
1452 		 * to avoid conflicts with swap_pager_iodone.
1453 		 */
1454 		while ((spc = swap_pager_done.tqh_first) != 0) {
1455 			pmap_qremove(spc->spc_kva, spc->spc_count);
1456 			swap_pager_finish(spc);
1457 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1458 			goto doclean;
1459 		}
1460 
1461 		/*
1462 		 * No operations done, thats all we can do for now.
1463 		 */
1464 
1465 		splx(s);
1466 		break;
1467 
1468 		/*
1469 		 * The desired page was found to be busy earlier in the scan
1470 		 * but has since completed.
1471 		 */
1472 doclean:
1473 		if (tspc && tspc == spc) {
1474 			tspc = NULL;
1475 		}
1476 		spc->spc_flags = 0;
1477 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1478 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1479 			wakeup(&swap_pager_free);
1480 		}
1481 		if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1482 			pagedaemon_wakeup();
1483 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1484 		splx(s);
1485 	}
1486 
1487 	return;
1488 }
1489 
1490 void
1491 swap_pager_finish(spc)
1492 	register swp_clean_t spc;
1493 {
1494 	vm_object_t object = spc->spc_m[0]->object;
1495 	int i;
1496 
1497 	object->paging_in_progress -= spc->spc_count;
1498 	if ((object->paging_in_progress == 0) &&
1499 	    (object->flags & OBJ_PIPWNT)) {
1500 		object->flags &= ~OBJ_PIPWNT;
1501 		wakeup(object);
1502 	}
1503 
1504 	/*
1505 	 * If no error, mark as clean and inform the pmap system. If error,
1506 	 * mark as dirty so we will try again. (XXX could get stuck doing
1507 	 * this, should give up after awhile)
1508 	 */
1509 	if (spc->spc_flags & SPC_ERROR) {
1510 		for (i = 0; i < spc->spc_count; i++) {
1511 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1512 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
1513 		}
1514 	} else {
1515 		for (i = 0; i < spc->spc_count; i++) {
1516 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
1517 			spc->spc_m[i]->dirty = 0;
1518 			if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
1519 			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
1520 				vm_page_activate(spc->spc_m[i]);
1521 		}
1522 	}
1523 
1524 
1525 	for (i = 0; i < spc->spc_count; i++) {
1526 		/*
1527 		 * we wakeup any processes that are waiting on these pages.
1528 		 */
1529 		PAGE_WAKEUP(spc->spc_m[i]);
1530 	}
1531 	nswiodone -= spc->spc_count;
1532 
1533 	return;
1534 }
1535 
1536 /*
1537  * swap_pager_iodone
1538  */
1539 void
1540 swap_pager_iodone(bp)
1541 	register struct buf *bp;
1542 {
1543 	register swp_clean_t spc;
1544 	int s;
1545 
1546 	s = splbio();
1547 	spc = (swp_clean_t) bp->b_spc;
1548 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1549 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1550 	if (bp->b_flags & B_ERROR) {
1551 		spc->spc_flags |= SPC_ERROR;
1552 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1553 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1554 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1555 	}
1556 
1557 	if (bp->b_vp)
1558 		pbrelvp(bp);
1559 
1560 	if (bp->b_flags & B_WANTED)
1561 		wakeup(bp);
1562 
1563 	if (bp->b_rcred != NOCRED)
1564 		crfree(bp->b_rcred);
1565 	if (bp->b_wcred != NOCRED)
1566 		crfree(bp->b_wcred);
1567 
1568 	nswiodone += spc->spc_count;
1569 	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1570 		wakeup(spc->spc_object);
1571 	}
1572 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1573 	    swap_pager_inuse.tqh_first == 0) {
1574 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1575 		wakeup(&swap_pager_free);
1576 	}
1577 
1578 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1579 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1580 		pagedaemon_wakeup();
1581 	}
1582 
1583 	if (vm_pageout_pages_needed) {
1584 		wakeup(&vm_pageout_pages_needed);
1585 		vm_pageout_pages_needed = 0;
1586 	}
1587 	if ((swap_pager_inuse.tqh_first == NULL) ||
1588 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1589 	    nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1590 		pagedaemon_wakeup();
1591 	}
1592 	splx(s);
1593 }
1594 
1595 /*
1596  * return true if any swap control structures can be allocated
1597  */
1598 int
1599 swap_pager_ready()
1600 {
1601 	if (swap_pager_free.tqh_first)
1602 		return 1;
1603 	else
1604 		return 0;
1605 }
1606