xref: /freebsd/sys/vm/swap_pager.c (revision 6cde7a165f154ed47b58fc42d6d2041e18384680)
1df8bae1dSRodney W. Grimes /*
226f9a767SRodney W. Grimes  * Copyright (c) 1994 John S. Dyson
3df8bae1dSRodney W. Grimes  * Copyright (c) 1990 University of Utah.
4df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
8df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
9df8bae1dSRodney W. Grimes  * Science Department.
10df8bae1dSRodney W. Grimes  *
11df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
12df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
13df8bae1dSRodney W. Grimes  * are met:
14df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
15df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
16df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
17df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
18df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
19df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
20df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
21df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
22df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
23df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
24df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
25df8bae1dSRodney W. Grimes  *    without specific prior written permission.
26df8bae1dSRodney W. Grimes  *
27df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
38df8bae1dSRodney W. Grimes  *
39df8bae1dSRodney W. Grimes  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40df8bae1dSRodney W. Grimes  *
41df8bae1dSRodney W. Grimes  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
426cde7a16SDavid Greenman  * $Id: swap_pager.c,v 1.101 1998/09/04 08:06:56 dfr Exp $
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45df8bae1dSRodney W. Grimes /*
46df8bae1dSRodney W. Grimes  * Quick hack to page to dedicated partition(s).
47df8bae1dSRodney W. Grimes  * TODO:
48df8bae1dSRodney W. Grimes  *	Add multiprocessor locks
49df8bae1dSRodney W. Grimes  *	Deal with async writes in a better fashion
50df8bae1dSRodney W. Grimes  */
51df8bae1dSRodney W. Grimes 
52df8bae1dSRodney W. Grimes #include <sys/param.h>
53df8bae1dSRodney W. Grimes #include <sys/systm.h>
5464abb5a5SDavid Greenman #include <sys/kernel.h>
55df8bae1dSRodney W. Grimes #include <sys/proc.h>
56df8bae1dSRodney W. Grimes #include <sys/buf.h>
57df8bae1dSRodney W. Grimes #include <sys/vnode.h>
58df8bae1dSRodney W. Grimes #include <sys/malloc.h>
59efeaf95aSDavid Greenman #include <sys/vmmeter.h>
6026f9a767SRodney W. Grimes #include <sys/rlist.h>
61df8bae1dSRodney W. Grimes 
62e47ed70bSJohn Dyson #ifndef MAX_PAGEOUT_CLUSTER
63ffc82b0aSJohn Dyson #define MAX_PAGEOUT_CLUSTER 16
64e47ed70bSJohn Dyson #endif
65e47ed70bSJohn Dyson 
66e47ed70bSJohn Dyson #ifndef NPENDINGIO
67e47ed70bSJohn Dyson #define NPENDINGIO	16
68e47ed70bSJohn Dyson #endif
69e47ed70bSJohn Dyson 
70e47ed70bSJohn Dyson #define SWB_NPAGES MAX_PAGEOUT_CLUSTER
71e47ed70bSJohn Dyson 
72df8bae1dSRodney W. Grimes #include <vm/vm.h>
73efeaf95aSDavid Greenman #include <vm/vm_prot.h>
74efeaf95aSDavid Greenman #include <vm/vm_object.h>
75df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
76efeaf95aSDavid Greenman #include <vm/vm_pager.h>
77df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h>
78df8bae1dSRodney W. Grimes #include <vm/swap_pager.h>
79efeaf95aSDavid Greenman #include <vm/vm_extern.h>
80df8bae1dSRodney W. Grimes 
81f708ef1bSPoul-Henning Kamp static int nswiodone;
8226f9a767SRodney W. Grimes int swap_pager_full;
8326f9a767SRodney W. Grimes extern int vm_swap_size;
84b44e4b7aSJohn Dyson static int suggest_more_swap = 0;
85f5a12711SPoul-Henning Kamp static int no_swap_space = 1;
86e47ed70bSJohn Dyson static int max_pageout_cluster;
87836e5d13SJohn Dyson struct rlisthdr swaplist;
8826f9a767SRodney W. Grimes 
89df8bae1dSRodney W. Grimes TAILQ_HEAD(swpclean, swpagerclean);
90df8bae1dSRodney W. Grimes 
9126f9a767SRodney W. Grimes typedef struct swpagerclean *swp_clean_t;
9226f9a767SRodney W. Grimes 
93f708ef1bSPoul-Henning Kamp static struct swpagerclean {
94df8bae1dSRodney W. Grimes 	TAILQ_ENTRY(swpagerclean) spc_list;
95df8bae1dSRodney W. Grimes 	int spc_flags;
96df8bae1dSRodney W. Grimes 	struct buf *spc_bp;
972a4895f4SDavid Greenman 	vm_object_t spc_object;
98df8bae1dSRodney W. Grimes 	vm_offset_t spc_kva;
99e736cd05SJohn Dyson 	int spc_first;
10026f9a767SRodney W. Grimes 	int spc_count;
10126f9a767SRodney W. Grimes 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
102df8bae1dSRodney W. Grimes } swcleanlist[NPENDINGIO];
10326f9a767SRodney W. Grimes 
10426f9a767SRodney W. Grimes 
105df8bae1dSRodney W. Grimes /* spc_flags values */
10626f9a767SRodney W. Grimes #define SPC_ERROR	0x01
107df8bae1dSRodney W. Grimes 
10826f9a767SRodney W. Grimes #define SWB_EMPTY (-1)
109df8bae1dSRodney W. Grimes 
110f708ef1bSPoul-Henning Kamp /* list of completed page cleans */
111f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_done;
112f708ef1bSPoul-Henning Kamp 
113f708ef1bSPoul-Henning Kamp /* list of pending page cleans */
114f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_inuse;
115f708ef1bSPoul-Henning Kamp 
116f708ef1bSPoul-Henning Kamp /* list of free pager clean structs */
117f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_free;
118303b270bSEivind Eklund static int swap_pager_free_count;
119e47ed70bSJohn Dyson static int swap_pager_free_pending;
120f708ef1bSPoul-Henning Kamp 
121f708ef1bSPoul-Henning Kamp /* list of "named" anon region objects */
122f708ef1bSPoul-Henning Kamp static struct pagerlst swap_pager_object_list;
123f708ef1bSPoul-Henning Kamp 
124f708ef1bSPoul-Henning Kamp /* list of "unnamed" anon region objects */
125f708ef1bSPoul-Henning Kamp struct pagerlst swap_pager_un_object_list;
126df8bae1dSRodney W. Grimes 
12726f9a767SRodney W. Grimes #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
128a1f6d91cSDavid Greenman #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
129f708ef1bSPoul-Henning Kamp static int swap_pager_needflags;
13026f9a767SRodney W. Grimes 
131f5a12711SPoul-Henning Kamp static struct pagerlst *swp_qs[] = {
13224a1cce3SDavid Greenman 	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
13326f9a767SRodney W. Grimes };
13426f9a767SRodney W. Grimes 
13524a1cce3SDavid Greenman /*
13624a1cce3SDavid Greenman  * pagerops for OBJT_SWAP - "swap pager".
13724a1cce3SDavid Greenman  */
138ff98689dSBruce Evans static vm_object_t
1396cde7a16SDavid Greenman 		swap_pager_alloc __P((void *handle, vm_ooffset_t size,
140a316d390SJohn Dyson 				      vm_prot_t prot, vm_ooffset_t offset));
141ff98689dSBruce Evans static void	swap_pager_dealloc __P((vm_object_t object));
142ff98689dSBruce Evans static boolean_t
143a316d390SJohn Dyson 		swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
144ff98689dSBruce Evans 					int *before, int *after));
145f708ef1bSPoul-Henning Kamp static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
146ff98689dSBruce Evans static void	swap_pager_init __P((void));
147e47ed70bSJohn Dyson static void spc_free __P((swp_clean_t));
148f708ef1bSPoul-Henning Kamp 
149df8bae1dSRodney W. Grimes struct pagerops swappagerops = {
150df8bae1dSRodney W. Grimes 	swap_pager_init,
151df8bae1dSRodney W. Grimes 	swap_pager_alloc,
152df8bae1dSRodney W. Grimes 	swap_pager_dealloc,
15324a1cce3SDavid Greenman 	swap_pager_getpages,
15424a1cce3SDavid Greenman 	swap_pager_putpages,
15524a1cce3SDavid Greenman 	swap_pager_haspage,
15624a1cce3SDavid Greenman 	swap_pager_sync
157df8bae1dSRodney W. Grimes };
158df8bae1dSRodney W. Grimes 
159e47ed70bSJohn Dyson static int npendingio;
160f708ef1bSPoul-Henning Kamp static int dmmin;
161f708ef1bSPoul-Henning Kamp int dmmax;
16226f9a767SRodney W. Grimes 
1638ba0c490SBruce Evans static int	swap_pager_block_index __P((vm_pindex_t pindex));
1648ba0c490SBruce Evans static int	swap_pager_block_offset __P((vm_pindex_t pindex));
165a316d390SJohn Dyson static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
166a316d390SJohn Dyson 					  vm_pindex_t pindex, int *valid));
167cac597e4SBruce Evans static void	swap_pager_finish __P((swp_clean_t spc));
168cac597e4SBruce Evans static void	swap_pager_free_swap __P((vm_object_t object));
169cac597e4SBruce Evans static void	swap_pager_freeswapspace __P((vm_object_t object,
170cac597e4SBruce Evans 					      unsigned int from,
171cac597e4SBruce Evans 					      unsigned int to));
172cac597e4SBruce Evans static int	swap_pager_getswapspace __P((vm_object_t object,
173cac597e4SBruce Evans 					     unsigned int amount,
174a316d390SJohn Dyson 					     daddr_t *rtval));
175ff98689dSBruce Evans static void	swap_pager_iodone __P((struct buf *));
176cac597e4SBruce Evans static void	swap_pager_iodone1 __P((struct buf *bp));
177cac597e4SBruce Evans static void	swap_pager_reclaim __P((void));
178cac597e4SBruce Evans static void	swap_pager_ridpages __P((vm_page_t *m, int count,
179cac597e4SBruce Evans 					 int reqpage));
180cac597e4SBruce Evans static void	swap_pager_setvalid __P((vm_object_t object,
181cac597e4SBruce Evans 					 vm_offset_t offset, int valid));
182c1087c13SBruce Evans static __inline void	swapsizecheck __P((void));
18324a1cce3SDavid Greenman 
184de5f6a77SJohn Dyson #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
185de5f6a77SJohn Dyson 
186c1087c13SBruce Evans static __inline void
1870d94caffSDavid Greenman swapsizecheck()
1880d94caffSDavid Greenman {
18926f9a767SRodney W. Grimes 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
190a1f6d91cSDavid Greenman 		if (swap_pager_full == 0)
1911af87c92SDavid Greenman 			printf("swap_pager: out of swap space\n");
19226f9a767SRodney W. Grimes 		swap_pager_full = 1;
19326f9a767SRodney W. Grimes 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
19426f9a767SRodney W. Grimes 		swap_pager_full = 0;
19526f9a767SRodney W. Grimes }
19626f9a767SRodney W. Grimes 
197f5a12711SPoul-Henning Kamp static void
198df8bae1dSRodney W. Grimes swap_pager_init()
199df8bae1dSRodney W. Grimes {
200e47ed70bSJohn Dyson 	int maxsafepending;
20124a1cce3SDavid Greenman 	TAILQ_INIT(&swap_pager_object_list);
20224a1cce3SDavid Greenman 	TAILQ_INIT(&swap_pager_un_object_list);
203df8bae1dSRodney W. Grimes 
204df8bae1dSRodney W. Grimes 	/*
205df8bae1dSRodney W. Grimes 	 * Initialize clean lists
206df8bae1dSRodney W. Grimes 	 */
207df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_inuse);
20826f9a767SRodney W. Grimes 	TAILQ_INIT(&swap_pager_done);
209df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_free);
2103091ee09SJohn Dyson 	swap_pager_free_count = 0;
21126f9a767SRodney W. Grimes 
212df8bae1dSRodney W. Grimes 	/*
213df8bae1dSRodney W. Grimes 	 * Calculate the swap allocation constants.
214df8bae1dSRodney W. Grimes 	 */
215e911eafcSPoul-Henning Kamp 	dmmin = PAGE_SIZE / DEV_BSIZE;
21626f9a767SRodney W. Grimes 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
217e47ed70bSJohn Dyson 
218e47ed70bSJohn Dyson 	maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
219e47ed70bSJohn Dyson 	npendingio = NPENDINGIO;
220e47ed70bSJohn Dyson 	max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
221e47ed70bSJohn Dyson 
222e47ed70bSJohn Dyson 	if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
223e47ed70bSJohn Dyson 		max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
224e47ed70bSJohn Dyson 		npendingio = maxsafepending / (2 * max_pageout_cluster);
225e47ed70bSJohn Dyson 		if (npendingio < 2)
226e47ed70bSJohn Dyson 			npendingio = 2;
227e47ed70bSJohn Dyson 	}
228df8bae1dSRodney W. Grimes }
229df8bae1dSRodney W. Grimes 
23024a1cce3SDavid Greenman void
23124a1cce3SDavid Greenman swap_pager_swap_init()
232df8bae1dSRodney W. Grimes {
23326f9a767SRodney W. Grimes 	swp_clean_t spc;
23426f9a767SRodney W. Grimes 	struct buf *bp;
23524a1cce3SDavid Greenman 	int i;
2360d94caffSDavid Greenman 
23726f9a767SRodney W. Grimes 	/*
2380d94caffSDavid Greenman 	 * kva's are allocated here so that we dont need to keep doing
2390d94caffSDavid Greenman 	 * kmem_alloc pageables at runtime
24026f9a767SRodney W. Grimes 	 */
24126f9a767SRodney W. Grimes 	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
242e47ed70bSJohn Dyson 		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
24326f9a767SRodney W. Grimes 		if (!spc->spc_kva) {
24426f9a767SRodney W. Grimes 			break;
24526f9a767SRodney W. Grimes 		}
246a1f6d91cSDavid Greenman 		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
24726f9a767SRodney W. Grimes 		if (!spc->spc_bp) {
24826f9a767SRodney W. Grimes 			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
24926f9a767SRodney W. Grimes 			break;
25026f9a767SRodney W. Grimes 		}
25126f9a767SRodney W. Grimes 		spc->spc_flags = 0;
25226f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
2533091ee09SJohn Dyson 		swap_pager_free_count++;
25426f9a767SRodney W. Grimes 	}
25526f9a767SRodney W. Grimes }
25624a1cce3SDavid Greenman 
25724a1cce3SDavid Greenman int
25824a1cce3SDavid Greenman swap_pager_swp_alloc(object, wait)
25924a1cce3SDavid Greenman 	vm_object_t object;
26024a1cce3SDavid Greenman 	int wait;
26124a1cce3SDavid Greenman {
2622a4895f4SDavid Greenman 	sw_blk_t swb;
2632a4895f4SDavid Greenman 	int nblocks;
26424a1cce3SDavid Greenman 	int i, j;
26524a1cce3SDavid Greenman 
266a316d390SJohn Dyson 	nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
2672a4895f4SDavid Greenman 	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
2682a4895f4SDavid Greenman 	if (swb == NULL)
26924a1cce3SDavid Greenman 		return 1;
27024a1cce3SDavid Greenman 
2712a4895f4SDavid Greenman 	for (i = 0; i < nblocks; i++) {
2722a4895f4SDavid Greenman 		swb[i].swb_valid = 0;
2732a4895f4SDavid Greenman 		swb[i].swb_locked = 0;
27426f9a767SRodney W. Grimes 		for (j = 0; j < SWB_NPAGES; j++)
2752a4895f4SDavid Greenman 			swb[i].swb_block[j] = SWB_EMPTY;
27626f9a767SRodney W. Grimes 	}
27726f9a767SRodney W. Grimes 
2782a4895f4SDavid Greenman 	object->un_pager.swp.swp_nblocks = nblocks;
2792a4895f4SDavid Greenman 	object->un_pager.swp.swp_allocsize = 0;
2802a4895f4SDavid Greenman 	object->un_pager.swp.swp_blocks = swb;
2812a4895f4SDavid Greenman 	object->un_pager.swp.swp_poip = 0;
28224a1cce3SDavid Greenman 
28324a1cce3SDavid Greenman 	if (object->handle != NULL) {
28424a1cce3SDavid Greenman 		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
285df8bae1dSRodney W. Grimes 	} else {
28624a1cce3SDavid Greenman 		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
287df8bae1dSRodney W. Grimes 	}
288df8bae1dSRodney W. Grimes 
28924a1cce3SDavid Greenman 	return 0;
29024a1cce3SDavid Greenman }
29124a1cce3SDavid Greenman 
29224a1cce3SDavid Greenman /*
2932a4895f4SDavid Greenman  * Allocate an object and associated resources.
29424a1cce3SDavid Greenman  * Note that if we are called from the pageout daemon (handle == NULL)
29524a1cce3SDavid Greenman  * we should not wait for memory as it could resulting in deadlock.
29624a1cce3SDavid Greenman  */
297f5a12711SPoul-Henning Kamp static vm_object_t
2986cde7a16SDavid Greenman swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
299b9dcd593SBruce Evans 		 vm_ooffset_t offset)
30024a1cce3SDavid Greenman {
30124a1cce3SDavid Greenman 	vm_object_t object;
30224a1cce3SDavid Greenman 
30324a1cce3SDavid Greenman 	/*
30424a1cce3SDavid Greenman 	 * If this is a "named" anonymous region, look it up and use the
30524a1cce3SDavid Greenman 	 * object if it exists, otherwise allocate a new one.
30624a1cce3SDavid Greenman 	 */
30724a1cce3SDavid Greenman 	if (handle) {
30824a1cce3SDavid Greenman 		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
30924a1cce3SDavid Greenman 		if (object != NULL) {
31024a1cce3SDavid Greenman 			vm_object_reference(object);
31124a1cce3SDavid Greenman 		} else {
31224a1cce3SDavid Greenman 			/*
31324a1cce3SDavid Greenman 			 * XXX - there is a race condition here. Two processes
31424a1cce3SDavid Greenman 			 * can request the same named object simultaneuously,
31524a1cce3SDavid Greenman 			 * and if one blocks for memory, the result is a disaster.
31624a1cce3SDavid Greenman 			 * Probably quite rare, but is yet another reason to just
31724a1cce3SDavid Greenman 			 * rip support of "named anonymous regions" out altogether.
31824a1cce3SDavid Greenman 			 */
319a316d390SJohn Dyson 			object = vm_object_allocate(OBJT_SWAP,
3206cde7a16SDavid Greenman 				OFF_TO_IDX(offset + PAGE_MASK + size));
32124a1cce3SDavid Greenman 			object->handle = handle;
32224a1cce3SDavid Greenman 			(void) swap_pager_swp_alloc(object, M_WAITOK);
32324a1cce3SDavid Greenman 		}
32424a1cce3SDavid Greenman 	} else {
325a316d390SJohn Dyson 		object = vm_object_allocate(OBJT_SWAP,
3266cde7a16SDavid Greenman 			OFF_TO_IDX(offset + PAGE_MASK + size));
32724a1cce3SDavid Greenman 		(void) swap_pager_swp_alloc(object, M_WAITOK);
32824a1cce3SDavid Greenman 	}
32924a1cce3SDavid Greenman 
33024a1cce3SDavid Greenman 	return (object);
331df8bae1dSRodney W. Grimes }
332df8bae1dSRodney W. Grimes 
33326f9a767SRodney W. Grimes /*
33426f9a767SRodney W. Grimes  * returns disk block associated with pager and offset
33526f9a767SRodney W. Grimes  * additionally, as a side effect returns a flag indicating
33626f9a767SRodney W. Grimes  * if the block has been written
33726f9a767SRodney W. Grimes  */
33826f9a767SRodney W. Grimes 
339c1087c13SBruce Evans static __inline daddr_t *
340a316d390SJohn Dyson swap_pager_diskaddr(object, pindex, valid)
34124a1cce3SDavid Greenman 	vm_object_t object;
342a316d390SJohn Dyson 	vm_pindex_t pindex;
34326f9a767SRodney W. Grimes 	int *valid;
34426f9a767SRodney W. Grimes {
34526f9a767SRodney W. Grimes 	register sw_blk_t swb;
34626f9a767SRodney W. Grimes 	int ix;
34726f9a767SRodney W. Grimes 
34826f9a767SRodney W. Grimes 	if (valid)
34926f9a767SRodney W. Grimes 		*valid = 0;
350a316d390SJohn Dyson 	ix = pindex / SWB_NPAGES;
3512a4895f4SDavid Greenman 	if ((ix >= object->un_pager.swp.swp_nblocks) ||
352a316d390SJohn Dyson 	    (pindex >= object->size)) {
35326f9a767SRodney W. Grimes 		return (FALSE);
35426f9a767SRodney W. Grimes 	}
3552a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
356a316d390SJohn Dyson 	ix = pindex % SWB_NPAGES;
35726f9a767SRodney W. Grimes 	if (valid)
35826f9a767SRodney W. Grimes 		*valid = swb->swb_valid & (1 << ix);
35926f9a767SRodney W. Grimes 	return &swb->swb_block[ix];
36026f9a767SRodney W. Grimes }
36126f9a767SRodney W. Grimes 
36226f9a767SRodney W. Grimes /*
36326f9a767SRodney W. Grimes  * Utility routine to set the valid (written) bit for
36426f9a767SRodney W. Grimes  * a block associated with a pager and offset
36526f9a767SRodney W. Grimes  */
366df8bae1dSRodney W. Grimes static void
3672a4895f4SDavid Greenman swap_pager_setvalid(object, offset, valid)
3682a4895f4SDavid Greenman 	vm_object_t object;
36926f9a767SRodney W. Grimes 	vm_offset_t offset;
37026f9a767SRodney W. Grimes 	int valid;
37126f9a767SRodney W. Grimes {
37226f9a767SRodney W. Grimes 	register sw_blk_t swb;
37326f9a767SRodney W. Grimes 	int ix;
37426f9a767SRodney W. Grimes 
375a316d390SJohn Dyson 	ix = offset / SWB_NPAGES;
3762a4895f4SDavid Greenman 	if (ix >= object->un_pager.swp.swp_nblocks)
37726f9a767SRodney W. Grimes 		return;
37826f9a767SRodney W. Grimes 
3792a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
380a316d390SJohn Dyson 	ix = offset % SWB_NPAGES;
38126f9a767SRodney W. Grimes 	if (valid)
38226f9a767SRodney W. Grimes 		swb->swb_valid |= (1 << ix);
38326f9a767SRodney W. Grimes 	else
38426f9a767SRodney W. Grimes 		swb->swb_valid &= ~(1 << ix);
38526f9a767SRodney W. Grimes 	return;
38626f9a767SRodney W. Grimes }
38726f9a767SRodney W. Grimes 
38826f9a767SRodney W. Grimes /*
38926f9a767SRodney W. Grimes  * this routine allocates swap space with a fragmentation
39026f9a767SRodney W. Grimes  * minimization policy.
39126f9a767SRodney W. Grimes  */
392f5a12711SPoul-Henning Kamp static int
3932a4895f4SDavid Greenman swap_pager_getswapspace(object, amount, rtval)
3942a4895f4SDavid Greenman 	vm_object_t object;
3952a4895f4SDavid Greenman 	unsigned int amount;
396a316d390SJohn Dyson 	daddr_t *rtval;
3970d94caffSDavid Greenman {
398a316d390SJohn Dyson 	unsigned location;
399b44e4b7aSJohn Dyson 
40024ea4a96SDavid Greenman 	vm_swap_size -= amount;
401b44e4b7aSJohn Dyson 	if (!suggest_more_swap && (vm_swap_size < btodb(cnt.v_page_count * PAGE_SIZE))) {
402b44e4b7aSJohn Dyson 		printf("swap_pager: suggest more swap space: %d MB\n",
403b44e4b7aSJohn Dyson 			(2 * cnt.v_page_count * (PAGE_SIZE / 1024)) / 1000);
404b44e4b7aSJohn Dyson 		suggest_more_swap = 1;
405b44e4b7aSJohn Dyson 	}
406b44e4b7aSJohn Dyson 
407a316d390SJohn Dyson 	if (!rlist_alloc(&swaplist, amount, &location)) {
40824ea4a96SDavid Greenman 		vm_swap_size += amount;
40926f9a767SRodney W. Grimes 		return 0;
41024ea4a96SDavid Greenman 	} else {
41124ea4a96SDavid Greenman 		swapsizecheck();
4122a4895f4SDavid Greenman 		object->un_pager.swp.swp_allocsize += amount;
413a316d390SJohn Dyson 		*rtval = location;
41426f9a767SRodney W. Grimes 		return 1;
41526f9a767SRodney W. Grimes 	}
41626f9a767SRodney W. Grimes }
41726f9a767SRodney W. Grimes 
41826f9a767SRodney W. Grimes /*
41926f9a767SRodney W. Grimes  * this routine frees swap space with a fragmentation
42026f9a767SRodney W. Grimes  * minimization policy.
42126f9a767SRodney W. Grimes  */
422f5a12711SPoul-Henning Kamp static void
4232a4895f4SDavid Greenman swap_pager_freeswapspace(object, from, to)
4242a4895f4SDavid Greenman 	vm_object_t object;
4252a4895f4SDavid Greenman 	unsigned int from;
4262a4895f4SDavid Greenman 	unsigned int to;
4270d94caffSDavid Greenman {
42835c10d22SDavid Greenman 	rlist_free(&swaplist, from, to);
42924ea4a96SDavid Greenman 	vm_swap_size += (to - from) + 1;
4302a4895f4SDavid Greenman 	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
43124ea4a96SDavid Greenman 	swapsizecheck();
43226f9a767SRodney W. Grimes }
43326f9a767SRodney W. Grimes /*
43426f9a767SRodney W. Grimes  * this routine frees swap blocks from a specified pager
43526f9a767SRodney W. Grimes  */
43626f9a767SRodney W. Grimes void
43724a1cce3SDavid Greenman swap_pager_freespace(object, start, size)
43824a1cce3SDavid Greenman 	vm_object_t object;
439a316d390SJohn Dyson 	vm_pindex_t start;
440a316d390SJohn Dyson 	vm_size_t size;
44126f9a767SRodney W. Grimes {
442a316d390SJohn Dyson 	vm_pindex_t i;
44326f9a767SRodney W. Grimes 	int s;
44426f9a767SRodney W. Grimes 
445e47ed70bSJohn Dyson 	s = splvm();
446a316d390SJohn Dyson 	for (i = start; i < start + size; i += 1) {
44726f9a767SRodney W. Grimes 		int valid;
448a316d390SJohn Dyson 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
4490d94caffSDavid Greenman 
45026f9a767SRodney W. Grimes 		if (addr && *addr != SWB_EMPTY) {
4512a4895f4SDavid Greenman 			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
45226f9a767SRodney W. Grimes 			if (valid) {
4532a4895f4SDavid Greenman 				swap_pager_setvalid(object, i, 0);
45426f9a767SRodney W. Grimes 			}
45526f9a767SRodney W. Grimes 			*addr = SWB_EMPTY;
45626f9a767SRodney W. Grimes 		}
45726f9a767SRodney W. Grimes 	}
45826f9a767SRodney W. Grimes 	splx(s);
45926f9a767SRodney W. Grimes }
46026f9a767SRodney W. Grimes 
4610a47b48bSJohn Dyson /*
4620a47b48bSJohn Dyson  * same as freespace, but don't free, just force a DMZ next time
4630a47b48bSJohn Dyson  */
4640a47b48bSJohn Dyson void
4650a47b48bSJohn Dyson swap_pager_dmzspace(object, start, size)
4660a47b48bSJohn Dyson 	vm_object_t object;
4670a47b48bSJohn Dyson 	vm_pindex_t start;
4680a47b48bSJohn Dyson 	vm_size_t size;
4690a47b48bSJohn Dyson {
4700a47b48bSJohn Dyson 	vm_pindex_t i;
4710a47b48bSJohn Dyson 	int s;
4720a47b48bSJohn Dyson 
473e47ed70bSJohn Dyson 	s = splvm();
4740a47b48bSJohn Dyson 	for (i = start; i < start + size; i += 1) {
4750a47b48bSJohn Dyson 		int valid;
4760a47b48bSJohn Dyson 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
4770a47b48bSJohn Dyson 
4780a47b48bSJohn Dyson 		if (addr && *addr != SWB_EMPTY) {
4790a47b48bSJohn Dyson 			if (valid) {
4800a47b48bSJohn Dyson 				swap_pager_setvalid(object, i, 0);
4810a47b48bSJohn Dyson 			}
4820a47b48bSJohn Dyson 		}
4830a47b48bSJohn Dyson 	}
4840a47b48bSJohn Dyson 	splx(s);
4850a47b48bSJohn Dyson }
4860a47b48bSJohn Dyson 
487a1f6d91cSDavid Greenman static void
4882a4895f4SDavid Greenman swap_pager_free_swap(object)
4892a4895f4SDavid Greenman 	vm_object_t object;
490a1f6d91cSDavid Greenman {
491a1f6d91cSDavid Greenman 	register int i, j;
4922a4895f4SDavid Greenman 	register sw_blk_t swb;
493a1f6d91cSDavid Greenman 	int first_block=0, block_count=0;
494a1f6d91cSDavid Greenman 	int s;
495a1f6d91cSDavid Greenman 	/*
496a1f6d91cSDavid Greenman 	 * Free left over swap blocks
497a1f6d91cSDavid Greenman 	 */
49847221757SJohn Dyson 	swb = object->un_pager.swp.swp_blocks;
4992d8acc0fSJohn Dyson 	if (swb == NULL) {
50047221757SJohn Dyson 		return;
5012d8acc0fSJohn Dyson 	}
50247221757SJohn Dyson 
5032d8acc0fSJohn Dyson 	s = splvm();
50447221757SJohn Dyson 	for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
505a1f6d91cSDavid Greenman 		for (j = 0; j < SWB_NPAGES; j++) {
5062a4895f4SDavid Greenman 			if (swb->swb_block[j] != SWB_EMPTY) {
507a1f6d91cSDavid Greenman 				/*
508a1f6d91cSDavid Greenman    				 * initially the length of the run is zero
509a1f6d91cSDavid Greenman    				 */
510a1f6d91cSDavid Greenman 				if (block_count == 0) {
5112a4895f4SDavid Greenman 					first_block = swb->swb_block[j];
512a1f6d91cSDavid Greenman 					block_count = btodb(PAGE_SIZE);
5132a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
514a1f6d91cSDavid Greenman 				/*
515a1f6d91cSDavid Greenman    				 * if the new block can be included into the current run
516a1f6d91cSDavid Greenman    				 */
5172a4895f4SDavid Greenman 				} else if (swb->swb_block[j] == first_block + block_count) {
518a1f6d91cSDavid Greenman 					block_count += btodb(PAGE_SIZE);
5192a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
520a1f6d91cSDavid Greenman 				/*
521a1f6d91cSDavid Greenman    				 * terminate the previous run, and start a new one
522a1f6d91cSDavid Greenman    				 */
523a1f6d91cSDavid Greenman 				} else {
5242a4895f4SDavid Greenman 					swap_pager_freeswapspace(object, first_block,
525a1f6d91cSDavid Greenman    					(unsigned) first_block + block_count - 1);
5262a4895f4SDavid Greenman 					first_block = swb->swb_block[j];
527a1f6d91cSDavid Greenman 					block_count = btodb(PAGE_SIZE);
5282a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
529a1f6d91cSDavid Greenman 				}
530a1f6d91cSDavid Greenman 			}
531a1f6d91cSDavid Greenman 		}
532a1f6d91cSDavid Greenman 	}
533a1f6d91cSDavid Greenman 
534a1f6d91cSDavid Greenman 	if (block_count) {
5352a4895f4SDavid Greenman 		swap_pager_freeswapspace(object, first_block,
536a1f6d91cSDavid Greenman 		   	 (unsigned) first_block + block_count - 1);
537a1f6d91cSDavid Greenman 	}
538a1f6d91cSDavid Greenman 	splx(s);
539a1f6d91cSDavid Greenman }
540a1f6d91cSDavid Greenman 
541a1f6d91cSDavid Greenman 
54226f9a767SRodney W. Grimes /*
54326f9a767SRodney W. Grimes  * swap_pager_reclaim frees up over-allocated space from all pagers
54426f9a767SRodney W. Grimes  * this eliminates internal fragmentation due to allocation of space
54526f9a767SRodney W. Grimes  * for segments that are never swapped to. It has been written so that
54626f9a767SRodney W. Grimes  * it does not block until the rlist_free operation occurs; it keeps
54726f9a767SRodney W. Grimes  * the queues consistant.
54826f9a767SRodney W. Grimes  */
54926f9a767SRodney W. Grimes 
55026f9a767SRodney W. Grimes /*
55126f9a767SRodney W. Grimes  * Maximum number of blocks (pages) to reclaim per pass
55226f9a767SRodney W. Grimes  */
553a1f6d91cSDavid Greenman #define MAXRECLAIM 128
55426f9a767SRodney W. Grimes 
555f5a12711SPoul-Henning Kamp static void
55626f9a767SRodney W. Grimes swap_pager_reclaim()
55726f9a767SRodney W. Grimes {
55824a1cce3SDavid Greenman 	vm_object_t object;
55926f9a767SRodney W. Grimes 	int i, j, k;
56026f9a767SRodney W. Grimes 	int s;
56126f9a767SRodney W. Grimes 	int reclaimcount;
562a1f6d91cSDavid Greenman 	static struct {
563a1f6d91cSDavid Greenman 		int address;
5642a4895f4SDavid Greenman 		vm_object_t object;
565a1f6d91cSDavid Greenman 	} reclaims[MAXRECLAIM];
56626f9a767SRodney W. Grimes 	static int in_reclaim;
56726f9a767SRodney W. Grimes 
56826f9a767SRodney W. Grimes 	/*
56926f9a767SRodney W. Grimes 	 * allow only one process to be in the swap_pager_reclaim subroutine
57026f9a767SRodney W. Grimes 	 */
571e47ed70bSJohn Dyson 	s = splvm();
57226f9a767SRodney W. Grimes 	if (in_reclaim) {
57324a1cce3SDavid Greenman 		tsleep(&in_reclaim, PSWP, "swrclm", 0);
57426f9a767SRodney W. Grimes 		splx(s);
57526f9a767SRodney W. Grimes 		return;
57626f9a767SRodney W. Grimes 	}
57726f9a767SRodney W. Grimes 	in_reclaim = 1;
57826f9a767SRodney W. Grimes 	reclaimcount = 0;
57926f9a767SRodney W. Grimes 
58026f9a767SRodney W. Grimes 	/* for each pager queue */
58126f9a767SRodney W. Grimes 	for (k = 0; swp_qs[k]; k++) {
58226f9a767SRodney W. Grimes 
583b18bfc3dSJohn Dyson 		object = TAILQ_FIRST(swp_qs[k]);
58424a1cce3SDavid Greenman 		while (object && (reclaimcount < MAXRECLAIM)) {
58526f9a767SRodney W. Grimes 
58626f9a767SRodney W. Grimes 			/*
58726f9a767SRodney W. Grimes 			 * see if any blocks associated with a pager has been
58826f9a767SRodney W. Grimes 			 * allocated but not used (written)
58926f9a767SRodney W. Grimes 			 */
5905070c7f8SJohn Dyson 			if ((object->flags & OBJ_DEAD) == 0 &&
5915070c7f8SJohn Dyson 				(object->paging_in_progress == 0)) {
5922a4895f4SDavid Greenman 				for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
5932a4895f4SDavid Greenman 					sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
5940d94caffSDavid Greenman 
59526f9a767SRodney W. Grimes 					if (swb->swb_locked)
59626f9a767SRodney W. Grimes 						continue;
59726f9a767SRodney W. Grimes 					for (j = 0; j < SWB_NPAGES; j++) {
59826f9a767SRodney W. Grimes 						if (swb->swb_block[j] != SWB_EMPTY &&
59926f9a767SRodney W. Grimes 						    (swb->swb_valid & (1 << j)) == 0) {
600a1f6d91cSDavid Greenman 							reclaims[reclaimcount].address = swb->swb_block[j];
6012a4895f4SDavid Greenman 							reclaims[reclaimcount++].object = object;
60226f9a767SRodney W. Grimes 							swb->swb_block[j] = SWB_EMPTY;
60326f9a767SRodney W. Grimes 							if (reclaimcount >= MAXRECLAIM)
60426f9a767SRodney W. Grimes 								goto rfinished;
60526f9a767SRodney W. Grimes 						}
60626f9a767SRodney W. Grimes 					}
60726f9a767SRodney W. Grimes 				}
608a316d390SJohn Dyson 			}
609b18bfc3dSJohn Dyson 			object = TAILQ_NEXT(object, pager_object_list);
61026f9a767SRodney W. Grimes 		}
61126f9a767SRodney W. Grimes 	}
61226f9a767SRodney W. Grimes 
61326f9a767SRodney W. Grimes rfinished:
61426f9a767SRodney W. Grimes 
61526f9a767SRodney W. Grimes 	/*
61626f9a767SRodney W. Grimes 	 * free the blocks that have been added to the reclaim list
61726f9a767SRodney W. Grimes 	 */
61826f9a767SRodney W. Grimes 	for (i = 0; i < reclaimcount; i++) {
6192a4895f4SDavid Greenman 		swap_pager_freeswapspace(reclaims[i].object,
6202a4895f4SDavid Greenman 		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
62126f9a767SRodney W. Grimes 	}
62226f9a767SRodney W. Grimes 	splx(s);
62326f9a767SRodney W. Grimes 	in_reclaim = 0;
62424a1cce3SDavid Greenman 	wakeup(&in_reclaim);
62526f9a767SRodney W. Grimes }
62626f9a767SRodney W. Grimes 
62726f9a767SRodney W. Grimes 
62826f9a767SRodney W. Grimes /*
62926f9a767SRodney W. Grimes  * swap_pager_copy copies blocks from one pager to another and
63026f9a767SRodney W. Grimes  * destroys the source pager
63126f9a767SRodney W. Grimes  */
63226f9a767SRodney W. Grimes 
63326f9a767SRodney W. Grimes void
634cbd8ec09SJohn Dyson swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset,
635cbd8ec09SJohn Dyson 	offset, destroysource)
63624a1cce3SDavid Greenman 	vm_object_t srcobject;
637a316d390SJohn Dyson 	vm_pindex_t srcoffset;
63824a1cce3SDavid Greenman 	vm_object_t dstobject;
639a316d390SJohn Dyson 	vm_pindex_t dstoffset;
640a316d390SJohn Dyson 	vm_pindex_t offset;
641c0877f10SJohn Dyson 	int destroysource;
64226f9a767SRodney W. Grimes {
643a316d390SJohn Dyson 	vm_pindex_t i;
644a1f6d91cSDavid Greenman 	int origsize;
64526f9a767SRodney W. Grimes 	int s;
64626f9a767SRodney W. Grimes 
64724ea4a96SDavid Greenman 	if (vm_swap_size)
64824ea4a96SDavid Greenman 		no_swap_space = 0;
64924ea4a96SDavid Greenman 
6502a4895f4SDavid Greenman 	origsize = srcobject->un_pager.swp.swp_allocsize;
65126f9a767SRodney W. Grimes 
65226f9a767SRodney W. Grimes 	/*
65324a1cce3SDavid Greenman 	 * remove the source object from the swap_pager internal queue
65426f9a767SRodney W. Grimes 	 */
655cbd8ec09SJohn Dyson 	if (destroysource) {
65624a1cce3SDavid Greenman 		if (srcobject->handle == NULL) {
65724a1cce3SDavid Greenman 			TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
65826f9a767SRodney W. Grimes 		} else {
65924a1cce3SDavid Greenman 			TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
66026f9a767SRodney W. Grimes 		}
661cbd8ec09SJohn Dyson 	}
66226f9a767SRodney W. Grimes 
663e47ed70bSJohn Dyson 	s = splvm();
6642a4895f4SDavid Greenman 	while (srcobject->un_pager.swp.swp_poip) {
6652a4895f4SDavid Greenman 		tsleep(srcobject, PVM, "spgout", 0);
66626f9a767SRodney W. Grimes 	}
66726f9a767SRodney W. Grimes 
66826f9a767SRodney W. Grimes 	/*
66926f9a767SRodney W. Grimes 	 * clean all of the pages that are currently active and finished
67026f9a767SRodney W. Grimes 	 */
671e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
67224a1cce3SDavid Greenman 		swap_pager_sync();
67326f9a767SRodney W. Grimes 
67426f9a767SRodney W. Grimes 	/*
67526f9a767SRodney W. Grimes 	 * transfer source to destination
67626f9a767SRodney W. Grimes 	 */
677a316d390SJohn Dyson 	for (i = 0; i < dstobject->size; i += 1) {
67826f9a767SRodney W. Grimes 		int srcvalid, dstvalid;
679cbd8ec09SJohn Dyson 		daddr_t *srcaddrp = swap_pager_diskaddr(srcobject,
680cbd8ec09SJohn Dyson 				i + offset + srcoffset, &srcvalid);
681a316d390SJohn Dyson 		daddr_t *dstaddrp;
6820d94caffSDavid Greenman 
68326f9a767SRodney W. Grimes 		/*
68426f9a767SRodney W. Grimes 		 * see if the source has space allocated
68526f9a767SRodney W. Grimes 		 */
68626f9a767SRodney W. Grimes 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
68726f9a767SRodney W. Grimes 			/*
6880d94caffSDavid Greenman 			 * if the source is valid and the dest has no space,
6890d94caffSDavid Greenman 			 * then copy the allocation from the srouce to the
6900d94caffSDavid Greenman 			 * dest.
69126f9a767SRodney W. Grimes 			 */
69226f9a767SRodney W. Grimes 			if (srcvalid) {
69324a1cce3SDavid Greenman 				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
694a1f6d91cSDavid Greenman 							&dstvalid);
69526f9a767SRodney W. Grimes 				/*
6960d94caffSDavid Greenman 				 * if the dest already has a valid block,
6970d94caffSDavid Greenman 				 * deallocate the source block without
6980d94caffSDavid Greenman 				 * copying.
69926f9a767SRodney W. Grimes 				 */
70026f9a767SRodney W. Grimes 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
7012a4895f4SDavid Greenman 					swap_pager_freeswapspace(dstobject, *dstaddrp,
702a1f6d91cSDavid Greenman 						*dstaddrp + btodb(PAGE_SIZE) - 1);
70326f9a767SRodney W. Grimes 					*dstaddrp = SWB_EMPTY;
70426f9a767SRodney W. Grimes 				}
70526f9a767SRodney W. Grimes 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
70626f9a767SRodney W. Grimes 					*dstaddrp = *srcaddrp;
70726f9a767SRodney W. Grimes 					*srcaddrp = SWB_EMPTY;
7082a4895f4SDavid Greenman 					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
7092a4895f4SDavid Greenman 					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
7102a4895f4SDavid Greenman 					swap_pager_setvalid(dstobject, i + dstoffset, 1);
71126f9a767SRodney W. Grimes 				}
71226f9a767SRodney W. Grimes 			}
71326f9a767SRodney W. Grimes 			/*
7140d94caffSDavid Greenman 			 * if the source is not empty at this point, then
7150d94caffSDavid Greenman 			 * deallocate the space.
71626f9a767SRodney W. Grimes 			 */
71726f9a767SRodney W. Grimes 			if (*srcaddrp != SWB_EMPTY) {
7182a4895f4SDavid Greenman 				swap_pager_freeswapspace(srcobject, *srcaddrp,
719a1f6d91cSDavid Greenman 					*srcaddrp + btodb(PAGE_SIZE) - 1);
72026f9a767SRodney W. Grimes 				*srcaddrp = SWB_EMPTY;
72126f9a767SRodney W. Grimes 			}
72226f9a767SRodney W. Grimes 		}
72326f9a767SRodney W. Grimes 	}
72426f9a767SRodney W. Grimes 	splx(s);
72526f9a767SRodney W. Grimes 
726a1f6d91cSDavid Greenman 	/*
727a1f6d91cSDavid Greenman 	 * Free left over swap blocks
728a1f6d91cSDavid Greenman 	 */
729c0877f10SJohn Dyson 	if (destroysource) {
7302a4895f4SDavid Greenman 		swap_pager_free_swap(srcobject);
731a1f6d91cSDavid Greenman 
7322a4895f4SDavid Greenman 		if (srcobject->un_pager.swp.swp_allocsize) {
7332a4895f4SDavid Greenman 			printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
7342a4895f4SDavid Greenman 			    srcobject->un_pager.swp.swp_allocsize, origsize);
7352a4895f4SDavid Greenman 		}
7362a4895f4SDavid Greenman 
7372a4895f4SDavid Greenman 		free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
7382a4895f4SDavid Greenman 		srcobject->un_pager.swp.swp_blocks = NULL;
739c0877f10SJohn Dyson 	}
74026f9a767SRodney W. Grimes 	return;
74126f9a767SRodney W. Grimes }
74226f9a767SRodney W. Grimes 
743f5a12711SPoul-Henning Kamp static void
74424a1cce3SDavid Greenman swap_pager_dealloc(object)
74524a1cce3SDavid Greenman 	vm_object_t object;
746df8bae1dSRodney W. Grimes {
747df8bae1dSRodney W. Grimes 	int s;
74847221757SJohn Dyson 	sw_blk_t swb;
749df8bae1dSRodney W. Grimes 
750df8bae1dSRodney W. Grimes 	/*
7510d94caffSDavid Greenman 	 * Remove from list right away so lookups will fail if we block for
7520d94caffSDavid Greenman 	 * pageout completion.
753df8bae1dSRodney W. Grimes 	 */
75424a1cce3SDavid Greenman 	if (object->handle == NULL) {
75524a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
75626f9a767SRodney W. Grimes 	} else {
75724a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
758df8bae1dSRodney W. Grimes 	}
75924a1cce3SDavid Greenman 
760df8bae1dSRodney W. Grimes 	/*
7610d94caffSDavid Greenman 	 * Wait for all pageouts to finish and remove all entries from
7620d94caffSDavid Greenman 	 * cleaning list.
763df8bae1dSRodney W. Grimes 	 */
76426f9a767SRodney W. Grimes 
765e47ed70bSJohn Dyson 	s = splvm();
7662a4895f4SDavid Greenman 	while (object->un_pager.swp.swp_poip) {
7672a4895f4SDavid Greenman 		tsleep(object, PVM, "swpout", 0);
768df8bae1dSRodney W. Grimes 	}
769df8bae1dSRodney W. Grimes 	splx(s);
77026f9a767SRodney W. Grimes 
771e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
77224a1cce3SDavid Greenman 		swap_pager_sync();
773df8bae1dSRodney W. Grimes 
774df8bae1dSRodney W. Grimes 	/*
775df8bae1dSRodney W. Grimes 	 * Free left over swap blocks
776df8bae1dSRodney W. Grimes 	 */
7772a4895f4SDavid Greenman 	swap_pager_free_swap(object);
77826f9a767SRodney W. Grimes 
7792a4895f4SDavid Greenman 	if (object->un_pager.swp.swp_allocsize) {
7802a4895f4SDavid Greenman 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
7812a4895f4SDavid Greenman 		    object->un_pager.swp.swp_allocsize);
7822a4895f4SDavid Greenman 	}
78347221757SJohn Dyson 	swb = object->un_pager.swp.swp_blocks;
78447221757SJohn Dyson 	if (swb) {
785df8bae1dSRodney W. Grimes 		/*
786df8bae1dSRodney W. Grimes    		* Free swap management resources
787df8bae1dSRodney W. Grimes    		*/
78847221757SJohn Dyson 		free(swb, M_VMPGDATA);
7892a4895f4SDavid Greenman 		object->un_pager.swp.swp_blocks = NULL;
79026f9a767SRodney W. Grimes 	}
79147221757SJohn Dyson }
79226f9a767SRodney W. Grimes 
793c1087c13SBruce Evans static __inline int
794a316d390SJohn Dyson swap_pager_block_index(pindex)
795a316d390SJohn Dyson 	vm_pindex_t pindex;
79626f9a767SRodney W. Grimes {
797a316d390SJohn Dyson 	return (pindex / SWB_NPAGES);
79826f9a767SRodney W. Grimes }
79926f9a767SRodney W. Grimes 
800c1087c13SBruce Evans static __inline int
801a316d390SJohn Dyson swap_pager_block_offset(pindex)
802a316d390SJohn Dyson 	vm_pindex_t pindex;
80326f9a767SRodney W. Grimes {
804a316d390SJohn Dyson 	return (pindex % SWB_NPAGES);
80526f9a767SRodney W. Grimes }
80626f9a767SRodney W. Grimes 
80726f9a767SRodney W. Grimes /*
80824a1cce3SDavid Greenman  * swap_pager_haspage returns TRUE if the pager has data that has
80926f9a767SRodney W. Grimes  * been written out.
81026f9a767SRodney W. Grimes  */
811f5a12711SPoul-Henning Kamp static boolean_t
812a316d390SJohn Dyson swap_pager_haspage(object, pindex, before, after)
81324a1cce3SDavid Greenman 	vm_object_t object;
814a316d390SJohn Dyson 	vm_pindex_t pindex;
81524a1cce3SDavid Greenman 	int *before;
81624a1cce3SDavid Greenman 	int *after;
81726f9a767SRodney W. Grimes {
81826f9a767SRodney W. Grimes 	register sw_blk_t swb;
81926f9a767SRodney W. Grimes 	int ix;
82026f9a767SRodney W. Grimes 
82124a1cce3SDavid Greenman 	if (before != NULL)
82224a1cce3SDavid Greenman 		*before = 0;
82324a1cce3SDavid Greenman 	if (after != NULL)
82424a1cce3SDavid Greenman 		*after = 0;
825a316d390SJohn Dyson 	ix = pindex / SWB_NPAGES;
8262a4895f4SDavid Greenman 	if (ix >= object->un_pager.swp.swp_nblocks) {
82726f9a767SRodney W. Grimes 		return (FALSE);
82826f9a767SRodney W. Grimes 	}
8292a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
830a316d390SJohn Dyson 	ix = pindex % SWB_NPAGES;
831170db9c6SJohn Dyson 
83226f9a767SRodney W. Grimes 	if (swb->swb_block[ix] != SWB_EMPTY) {
833170db9c6SJohn Dyson 
834170db9c6SJohn Dyson 		if (swb->swb_valid & (1 << ix)) {
835170db9c6SJohn Dyson 			int tix;
836170db9c6SJohn Dyson 			if (before) {
837170db9c6SJohn Dyson 				for(tix = ix - 1; tix >= 0; --tix) {
8382f82e604SDavid Greenman 					if ((swb->swb_valid & (1 << tix)) == 0)
8392f82e604SDavid Greenman 						break;
840ca56715fSJohn Dyson 					if ((swb->swb_block[tix] +
841170db9c6SJohn Dyson 						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
842170db9c6SJohn Dyson 						swb->swb_block[ix])
843170db9c6SJohn Dyson 						break;
844170db9c6SJohn Dyson 					(*before)++;
845170db9c6SJohn Dyson 				}
846170db9c6SJohn Dyson 			}
847170db9c6SJohn Dyson 
848170db9c6SJohn Dyson 			if (after) {
849170db9c6SJohn Dyson 				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
8502f82e604SDavid Greenman 					if ((swb->swb_valid & (1 << tix)) == 0)
8512f82e604SDavid Greenman 						break;
852ca56715fSJohn Dyson 					if ((swb->swb_block[tix] -
853170db9c6SJohn Dyson 						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
854170db9c6SJohn Dyson 						swb->swb_block[ix])
855170db9c6SJohn Dyson 						break;
856170db9c6SJohn Dyson 					(*after)++;
857170db9c6SJohn Dyson 				}
858170db9c6SJohn Dyson 			}
859170db9c6SJohn Dyson 
86026f9a767SRodney W. Grimes 			return TRUE;
86126f9a767SRodney W. Grimes 		}
862170db9c6SJohn Dyson 	}
86326f9a767SRodney W. Grimes 	return (FALSE);
86426f9a767SRodney W. Grimes }
86526f9a767SRodney W. Grimes 
86626f9a767SRodney W. Grimes /*
867e47ed70bSJohn Dyson  * Wakeup based upon spc state
868e47ed70bSJohn Dyson  */
869e47ed70bSJohn Dyson static void
870e47ed70bSJohn Dyson spc_wakeup(void)
871e47ed70bSJohn Dyson {
872e47ed70bSJohn Dyson 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
873e47ed70bSJohn Dyson 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
874e47ed70bSJohn Dyson 		wakeup(&swap_pager_needflags);
875e47ed70bSJohn Dyson 	} else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
876e47ed70bSJohn Dyson 		swap_pager_free_count >= ((2 * npendingio) / 3)) {
877e47ed70bSJohn Dyson 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
878e47ed70bSJohn Dyson 		wakeup(&swap_pager_free);
879e47ed70bSJohn Dyson 	}
880e47ed70bSJohn Dyson }
881e47ed70bSJohn Dyson 
882e47ed70bSJohn Dyson /*
883e47ed70bSJohn Dyson  * Free an spc structure
884e47ed70bSJohn Dyson  */
885e47ed70bSJohn Dyson static void
886e47ed70bSJohn Dyson spc_free(spc)
887e47ed70bSJohn Dyson 	swp_clean_t spc;
888e47ed70bSJohn Dyson {
889e47ed70bSJohn Dyson 	spc->spc_flags = 0;
890e47ed70bSJohn Dyson 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
891e47ed70bSJohn Dyson 	swap_pager_free_count++;
892e47ed70bSJohn Dyson 	if (swap_pager_needflags) {
893e47ed70bSJohn Dyson 		spc_wakeup();
894e47ed70bSJohn Dyson 	}
895e47ed70bSJohn Dyson }
896e47ed70bSJohn Dyson 
897e47ed70bSJohn Dyson /*
89826f9a767SRodney W. Grimes  * swap_pager_ridpages is a convienience routine that deallocates all
89926f9a767SRodney W. Grimes  * but the required page.  this is usually used in error returns that
90026f9a767SRodney W. Grimes  * need to invalidate the "extra" readahead pages.
90126f9a767SRodney W. Grimes  */
90226f9a767SRodney W. Grimes static void
90326f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage)
90426f9a767SRodney W. Grimes 	vm_page_t *m;
90526f9a767SRodney W. Grimes 	int count;
90626f9a767SRodney W. Grimes 	int reqpage;
90726f9a767SRodney W. Grimes {
90826f9a767SRodney W. Grimes 	int i;
9090d94caffSDavid Greenman 
910ffc82b0aSJohn Dyson 	for (i = 0; i < count; i++) {
911ffc82b0aSJohn Dyson 		if (i != reqpage) {
912ffc82b0aSJohn Dyson 			vm_page_free(m[i]);
913ffc82b0aSJohn Dyson 		}
914ffc82b0aSJohn Dyson 	}
91526f9a767SRodney W. Grimes }
91626f9a767SRodney W. Grimes 
91726f9a767SRodney W. Grimes /*
91826f9a767SRodney W. Grimes  * swap_pager_iodone1 is the completion routine for both reads and async writes
91926f9a767SRodney W. Grimes  */
920f5a12711SPoul-Henning Kamp static void
92126f9a767SRodney W. Grimes swap_pager_iodone1(bp)
92226f9a767SRodney W. Grimes 	struct buf *bp;
92326f9a767SRodney W. Grimes {
92426f9a767SRodney W. Grimes 	bp->b_flags |= B_DONE;
92526f9a767SRodney W. Grimes 	bp->b_flags &= ~B_ASYNC;
92624a1cce3SDavid Greenman 	wakeup(bp);
92726f9a767SRodney W. Grimes }
92826f9a767SRodney W. Grimes 
929f708ef1bSPoul-Henning Kamp static int
93024a1cce3SDavid Greenman swap_pager_getpages(object, m, count, reqpage)
93124a1cce3SDavid Greenman 	vm_object_t object;
93226f9a767SRodney W. Grimes 	vm_page_t *m;
93326f9a767SRodney W. Grimes 	int count, reqpage;
934df8bae1dSRodney W. Grimes {
935df8bae1dSRodney W. Grimes 	register struct buf *bp;
93626f9a767SRodney W. Grimes 	sw_blk_t swb[count];
937df8bae1dSRodney W. Grimes 	register int s;
93826f9a767SRodney W. Grimes 	int i;
939df8bae1dSRodney W. Grimes 	boolean_t rv;
94026f9a767SRodney W. Grimes 	vm_offset_t kva, off[count];
941a316d390SJohn Dyson 	vm_pindex_t paging_offset;
94226f9a767SRodney W. Grimes 	int reqaddr[count];
9436d40c3d3SDavid Greenman 	int sequential;
944df8bae1dSRodney W. Grimes 
94526f9a767SRodney W. Grimes 	int first, last;
94626f9a767SRodney W. Grimes 	int failed;
94726f9a767SRodney W. Grimes 	int reqdskregion;
948df8bae1dSRodney W. Grimes 
94926f9a767SRodney W. Grimes 	object = m[reqpage]->object;
950a316d390SJohn Dyson 	paging_offset = OFF_TO_IDX(object->paging_offset);
951a316d390SJohn Dyson 	sequential = (m[reqpage]->pindex == (object->last_read + 1));
9522a4895f4SDavid Greenman 
95326f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
954a316d390SJohn Dyson 		vm_pindex_t fidx = m[i]->pindex + paging_offset;
955a316d390SJohn Dyson 		int ix = swap_pager_block_index(fidx);
9560d94caffSDavid Greenman 
9572a4895f4SDavid Greenman 		if (ix >= object->un_pager.swp.swp_nblocks) {
95826f9a767SRodney W. Grimes 			int j;
9590d94caffSDavid Greenman 
96026f9a767SRodney W. Grimes 			if (i <= reqpage) {
96126f9a767SRodney W. Grimes 				swap_pager_ridpages(m, count, reqpage);
962df8bae1dSRodney W. Grimes 				return (VM_PAGER_FAIL);
96326f9a767SRodney W. Grimes 			}
96426f9a767SRodney W. Grimes 			for (j = i; j < count; j++) {
965ffc82b0aSJohn Dyson 				vm_page_free(m[j]);
96626f9a767SRodney W. Grimes 			}
96726f9a767SRodney W. Grimes 			count = i;
96826f9a767SRodney W. Grimes 			break;
96926f9a767SRodney W. Grimes 		}
9702a4895f4SDavid Greenman 		swb[i] = &object->un_pager.swp.swp_blocks[ix];
971a316d390SJohn Dyson 		off[i] = swap_pager_block_offset(fidx);
97226f9a767SRodney W. Grimes 		reqaddr[i] = swb[i]->swb_block[off[i]];
97326f9a767SRodney W. Grimes 	}
97426f9a767SRodney W. Grimes 
97526f9a767SRodney W. Grimes 	/* make sure that our required input request is existant */
97626f9a767SRodney W. Grimes 
97726f9a767SRodney W. Grimes 	if (reqaddr[reqpage] == SWB_EMPTY ||
97826f9a767SRodney W. Grimes 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
97926f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
98026f9a767SRodney W. Grimes 		return (VM_PAGER_FAIL);
98126f9a767SRodney W. Grimes 	}
98226f9a767SRodney W. Grimes 	reqdskregion = reqaddr[reqpage] / dmmax;
983df8bae1dSRodney W. Grimes 
984df8bae1dSRodney W. Grimes 	/*
98526f9a767SRodney W. Grimes 	 * search backwards for the first contiguous page to transfer
986df8bae1dSRodney W. Grimes 	 */
98726f9a767SRodney W. Grimes 	failed = 0;
98826f9a767SRodney W. Grimes 	first = 0;
98926f9a767SRodney W. Grimes 	for (i = reqpage - 1; i >= 0; --i) {
9906d40c3d3SDavid Greenman 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
99126f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
99226f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
99326f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
99426f9a767SRodney W. Grimes 			failed = 1;
995ffc82b0aSJohn Dyson 			vm_page_free(m[i]);
99626f9a767SRodney W. Grimes 			if (first == 0)
99726f9a767SRodney W. Grimes 				first = i + 1;
99826f9a767SRodney W. Grimes 		}
999df8bae1dSRodney W. Grimes 	}
1000df8bae1dSRodney W. Grimes 	/*
100126f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
1002df8bae1dSRodney W. Grimes 	 */
100326f9a767SRodney W. Grimes 	failed = 0;
100426f9a767SRodney W. Grimes 	last = count;
100526f9a767SRodney W. Grimes 	for (i = reqpage + 1; i < count; i++) {
100626f9a767SRodney W. Grimes 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
100726f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
100826f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
100926f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
101026f9a767SRodney W. Grimes 			failed = 1;
1011ffc82b0aSJohn Dyson 			vm_page_free(m[i]);
101226f9a767SRodney W. Grimes 			if (last == count)
101326f9a767SRodney W. Grimes 				last = i;
101426f9a767SRodney W. Grimes 		}
101526f9a767SRodney W. Grimes 	}
101626f9a767SRodney W. Grimes 
101726f9a767SRodney W. Grimes 	count = last;
101826f9a767SRodney W. Grimes 	if (first != 0) {
101926f9a767SRodney W. Grimes 		for (i = first; i < count; i++) {
102026f9a767SRodney W. Grimes 			m[i - first] = m[i];
102126f9a767SRodney W. Grimes 			reqaddr[i - first] = reqaddr[i];
102226f9a767SRodney W. Grimes 			off[i - first] = off[i];
102326f9a767SRodney W. Grimes 		}
102426f9a767SRodney W. Grimes 		count -= first;
102526f9a767SRodney W. Grimes 		reqpage -= first;
102626f9a767SRodney W. Grimes 	}
102726f9a767SRodney W. Grimes 	++swb[reqpage]->swb_locked;
102826f9a767SRodney W. Grimes 
102926f9a767SRodney W. Grimes 	/*
10300d94caffSDavid Greenman 	 * at this point: "m" is a pointer to the array of vm_page_t for
10310d94caffSDavid Greenman 	 * paging I/O "count" is the number of vm_page_t entries represented
10320d94caffSDavid Greenman 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
10330d94caffSDavid Greenman 	 * into "m" for the page actually faulted
103426f9a767SRodney W. Grimes 	 */
103526f9a767SRodney W. Grimes 
103616f62314SDavid Greenman 	/*
103716f62314SDavid Greenman 	 * Get a swap buffer header to perform the IO
103816f62314SDavid Greenman 	 */
103926f9a767SRodney W. Grimes 	bp = getpbuf();
104016f62314SDavid Greenman 	kva = (vm_offset_t) bp->b_data;
104126f9a767SRodney W. Grimes 
104216f62314SDavid Greenman 	/*
104316f62314SDavid Greenman 	 * map our page(s) into kva for input
104416f62314SDavid Greenman 	 */
104516f62314SDavid Greenman 	pmap_qenter(kva, m, count);
104616f62314SDavid Greenman 
1047aba8f38eSDavid Greenman 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
104826f9a767SRodney W. Grimes 	bp->b_iodone = swap_pager_iodone1;
1049df8bae1dSRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
105026f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
105126f9a767SRodney W. Grimes 	crhold(bp->b_rcred);
105226f9a767SRodney W. Grimes 	crhold(bp->b_wcred);
1053ab3f7469SPoul-Henning Kamp 	bp->b_data = (caddr_t) kva;
105426f9a767SRodney W. Grimes 	bp->b_blkno = reqaddr[0];
105526f9a767SRodney W. Grimes 	bp->b_bcount = PAGE_SIZE * count;
105626f9a767SRodney W. Grimes 	bp->b_bufsize = PAGE_SIZE * count;
105726f9a767SRodney W. Grimes 
10580d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
1059df8bae1dSRodney W. Grimes 
1060976e77fcSDavid Greenman 	cnt.v_swapin++;
1061976e77fcSDavid Greenman 	cnt.v_swappgsin += count;
1062df8bae1dSRodney W. Grimes 	/*
106326f9a767SRodney W. Grimes 	 * perform the I/O
1064df8bae1dSRodney W. Grimes 	 */
1065fd5d1124SJulian Elischer 	VOP_STRATEGY(bp->b_vp, bp);
106626f9a767SRodney W. Grimes 
106726f9a767SRodney W. Grimes 	/*
106826f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
106926f9a767SRodney W. Grimes 	 */
1070e47ed70bSJohn Dyson 	s = splvm();
107126f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
10723091ee09SJohn Dyson 		if (tsleep(bp, PVM, "swread", hz*20)) {
1073ac1e407bSBruce Evans 			printf(
1074ac1e407bSBruce Evans "swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n",
1075ac1e407bSBruce Evans 			    (u_long)bp->b_dev, (long)bp->b_blkno,
1076ac1e407bSBruce Evans 			    (long)bp->b_bcount);
10773091ee09SJohn Dyson 		}
1078df8bae1dSRodney W. Grimes 	}
10791b119d9dSDavid Greenman 
10801b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
1081ac1e407bSBruce Evans 		printf(
1082ac1e407bSBruce Evans "swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n",
1083ac1e407bSBruce Evans 		    (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
1084a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
10851b119d9dSDavid Greenman 	} else {
10861b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
10871b119d9dSDavid Greenman 	}
108826f9a767SRodney W. Grimes 
1089df8bae1dSRodney W. Grimes 	splx(s);
10902a4895f4SDavid Greenman 	swb[reqpage]->swb_locked--;
109126f9a767SRodney W. Grimes 
109226f9a767SRodney W. Grimes 	/*
109326f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
109426f9a767SRodney W. Grimes 	 */
109516f62314SDavid Greenman 	pmap_qremove(kva, count);
109626f9a767SRodney W. Grimes 
109726f9a767SRodney W. Grimes 	/*
109826f9a767SRodney W. Grimes 	 * release the physical I/O buffer
109926f9a767SRodney W. Grimes 	 */
110026f9a767SRodney W. Grimes 	relpbuf(bp);
110126f9a767SRodney W. Grimes 	/*
110226f9a767SRodney W. Grimes 	 * finish up input if everything is ok
110326f9a767SRodney W. Grimes 	 */
110426f9a767SRodney W. Grimes 	if (rv == VM_PAGER_OK) {
110526f9a767SRodney W. Grimes 		for (i = 0; i < count; i++) {
11060d94caffSDavid Greenman 			m[i]->dirty = 0;
1107e69763a3SDoug Rabson 			vm_page_flag_clear(m[i], PG_ZERO);
110826f9a767SRodney W. Grimes 			if (i != reqpage) {
110926f9a767SRodney W. Grimes 				/*
11100d94caffSDavid Greenman 				 * whether or not to leave the page
11110d94caffSDavid Greenman 				 * activated is up in the air, but we
11120d94caffSDavid Greenman 				 * should put the page on a page queue
11130d94caffSDavid Greenman 				 * somewhere. (it already is in the
11140d94caffSDavid Greenman 				 * object). After some emperical
11150d94caffSDavid Greenman 				 * results, it is best to deactivate
11160d94caffSDavid Greenman 				 * the readahead pages.
111726f9a767SRodney W. Grimes 				 */
111826f9a767SRodney W. Grimes 				vm_page_deactivate(m[i]);
111926f9a767SRodney W. Grimes 
112026f9a767SRodney W. Grimes 				/*
11210d94caffSDavid Greenman 				 * just in case someone was asking for
11220d94caffSDavid Greenman 				 * this page we now tell them that it
11230d94caffSDavid Greenman 				 * is ok to use
112426f9a767SRodney W. Grimes 				 */
11250d94caffSDavid Greenman 				m[i]->valid = VM_PAGE_BITS_ALL;
1126e69763a3SDoug Rabson 				vm_page_wakeup(m[i]);
112726f9a767SRodney W. Grimes 			}
112826f9a767SRodney W. Grimes 		}
11296d40c3d3SDavid Greenman 
1130a316d390SJohn Dyson 		m[reqpage]->object->last_read = m[count-1]->pindex;
11316d40c3d3SDavid Greenman 
11322e1e24ddSDavid Greenman 		/*
11332e1e24ddSDavid Greenman 		 * If we're out of swap space, then attempt to free
113445952afcSJohn Dyson 		 * some whenever multiple pages are brought in. We
113545952afcSJohn Dyson 		 * must set the dirty bits so that the page contents
113645952afcSJohn Dyson 		 * will be preserved.
11372e1e24ddSDavid Greenman 		 */
1138b44e4b7aSJohn Dyson 		if (SWAPLOW ||
1139b44e4b7aSJohn Dyson 			(vm_swap_size < btodb((cnt.v_page_count - cnt.v_wire_count)) * PAGE_SIZE)) {
11402e1e24ddSDavid Greenman 			for (i = 0; i < count; i++) {
11410d94caffSDavid Greenman 				m[i]->dirty = VM_PAGE_BITS_ALL;
11422e1e24ddSDavid Greenman 			}
1143b44e4b7aSJohn Dyson 			swap_pager_freespace(object,
1144b44e4b7aSJohn Dyson 				m[0]->pindex + paging_offset, count);
114526f9a767SRodney W. Grimes 		}
1146e47ed70bSJohn Dyson 
114726f9a767SRodney W. Grimes 	} else {
114826f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
114926f9a767SRodney W. Grimes 	}
1150df8bae1dSRodney W. Grimes 	return (rv);
1151df8bae1dSRodney W. Grimes }
1152df8bae1dSRodney W. Grimes 
115326f9a767SRodney W. Grimes int
115424a1cce3SDavid Greenman swap_pager_putpages(object, m, count, sync, rtvals)
115524a1cce3SDavid Greenman 	vm_object_t object;
115626f9a767SRodney W. Grimes 	vm_page_t *m;
115726f9a767SRodney W. Grimes 	int count;
115824a1cce3SDavid Greenman 	boolean_t sync;
115926f9a767SRodney W. Grimes 	int *rtvals;
1160df8bae1dSRodney W. Grimes {
116126f9a767SRodney W. Grimes 	register struct buf *bp;
116226f9a767SRodney W. Grimes 	sw_blk_t swb[count];
116326f9a767SRodney W. Grimes 	register int s;
1164e736cd05SJohn Dyson 	int i, j, ix, firstidx, lastidx;
116526f9a767SRodney W. Grimes 	boolean_t rv;
1166a316d390SJohn Dyson 	vm_offset_t kva, off, fidx;
116726f9a767SRodney W. Grimes 	swp_clean_t spc;
1168a316d390SJohn Dyson 	vm_pindex_t paging_pindex;
116926f9a767SRodney W. Grimes 	int reqaddr[count];
117026f9a767SRodney W. Grimes 	int failed;
1171df8bae1dSRodney W. Grimes 
117224ea4a96SDavid Greenman 	if (vm_swap_size)
117324ea4a96SDavid Greenman 		no_swap_space = 0;
1174e736cd05SJohn Dyson 
117524ea4a96SDavid Greenman 	if (no_swap_space) {
11765663e6deSDavid Greenman 		for (i = 0; i < count; i++)
11775663e6deSDavid Greenman 			rtvals[i] = VM_PAGER_FAIL;
11785663e6deSDavid Greenman 		return VM_PAGER_FAIL;
11795663e6deSDavid Greenman 	}
1180e47ed70bSJohn Dyson 
1181e47ed70bSJohn Dyson 	if (curproc != pageproc)
1182e47ed70bSJohn Dyson 		sync = TRUE;
118326f9a767SRodney W. Grimes 
118426f9a767SRodney W. Grimes 	object = m[0]->object;
1185a316d390SJohn Dyson 	paging_pindex = OFF_TO_IDX(object->paging_offset);
118626f9a767SRodney W. Grimes 
118726f9a767SRodney W. Grimes 	failed = 0;
118826f9a767SRodney W. Grimes 	for (j = 0; j < count; j++) {
1189a316d390SJohn Dyson 		fidx = m[j]->pindex + paging_pindex;
1190a316d390SJohn Dyson 		ix = swap_pager_block_index(fidx);
119126f9a767SRodney W. Grimes 		swb[j] = 0;
11922a4895f4SDavid Greenman 		if (ix >= object->un_pager.swp.swp_nblocks) {
119326f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
119426f9a767SRodney W. Grimes 			failed = 1;
119526f9a767SRodney W. Grimes 			continue;
119626f9a767SRodney W. Grimes 		} else {
119726f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_OK;
119826f9a767SRodney W. Grimes 		}
11992a4895f4SDavid Greenman 		swb[j] = &object->un_pager.swp.swp_blocks[ix];
12002a4895f4SDavid Greenman 		swb[j]->swb_locked++;
120126f9a767SRodney W. Grimes 		if (failed) {
120226f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
120326f9a767SRodney W. Grimes 			continue;
120426f9a767SRodney W. Grimes 		}
1205a316d390SJohn Dyson 		off = swap_pager_block_offset(fidx);
120626f9a767SRodney W. Grimes 		reqaddr[j] = swb[j]->swb_block[off];
120726f9a767SRodney W. Grimes 		if (reqaddr[j] == SWB_EMPTY) {
1208a316d390SJohn Dyson 			daddr_t blk;
120926f9a767SRodney W. Grimes 			int tries;
121026f9a767SRodney W. Grimes 			int ntoget;
12110d94caffSDavid Greenman 
121226f9a767SRodney W. Grimes 			tries = 0;
1213e47ed70bSJohn Dyson 			s = splvm();
121426f9a767SRodney W. Grimes 
1215df8bae1dSRodney W. Grimes 			/*
12160d94caffSDavid Greenman 			 * if any other pages have been allocated in this
12170d94caffSDavid Greenman 			 * block, we only try to get one page.
1218df8bae1dSRodney W. Grimes 			 */
121926f9a767SRodney W. Grimes 			for (i = 0; i < SWB_NPAGES; i++) {
122026f9a767SRodney W. Grimes 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1221df8bae1dSRodney W. Grimes 					break;
1222df8bae1dSRodney W. Grimes 			}
122326f9a767SRodney W. Grimes 
122426f9a767SRodney W. Grimes 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
122526f9a767SRodney W. Grimes 			/*
12260d94caffSDavid Greenman 			 * this code is alittle conservative, but works (the
12270d94caffSDavid Greenman 			 * intent of this code is to allocate small chunks for
12280d94caffSDavid Greenman 			 * small objects)
122926f9a767SRodney W. Grimes 			 */
1230a316d390SJohn Dyson 			if ((off == 0) && ((fidx + ntoget) > object->size)) {
1231a316d390SJohn Dyson 				ntoget = object->size - fidx;
123226f9a767SRodney W. Grimes 			}
123326f9a767SRodney W. Grimes 	retrygetspace:
123426f9a767SRodney W. Grimes 			if (!swap_pager_full && ntoget > 1 &&
1235a316d390SJohn Dyson 			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1236a316d390SJohn Dyson 				&blk)) {
123726f9a767SRodney W. Grimes 
123826f9a767SRodney W. Grimes 				for (i = 0; i < ntoget; i++) {
123926f9a767SRodney W. Grimes 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
124026f9a767SRodney W. Grimes 					swb[j]->swb_valid = 0;
124126f9a767SRodney W. Grimes 				}
124226f9a767SRodney W. Grimes 
124326f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
12442a4895f4SDavid Greenman 			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
124526f9a767SRodney W. Grimes 				&swb[j]->swb_block[off])) {
124626f9a767SRodney W. Grimes 				/*
12470d94caffSDavid Greenman 				 * if the allocation has failed, we try to
12480d94caffSDavid Greenman 				 * reclaim space and retry.
124926f9a767SRodney W. Grimes 				 */
125026f9a767SRodney W. Grimes 				if (++tries == 1) {
125126f9a767SRodney W. Grimes 					swap_pager_reclaim();
125226f9a767SRodney W. Grimes 					goto retrygetspace;
125326f9a767SRodney W. Grimes 				}
125426f9a767SRodney W. Grimes 				rtvals[j] = VM_PAGER_AGAIN;
125526f9a767SRodney W. Grimes 				failed = 1;
125624ea4a96SDavid Greenman 				swap_pager_full = 1;
125726f9a767SRodney W. Grimes 			} else {
125826f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
125926f9a767SRodney W. Grimes 				swb[j]->swb_valid &= ~(1 << off);
1260df8bae1dSRodney W. Grimes 			}
1261df8bae1dSRodney W. Grimes 			splx(s);
126226f9a767SRodney W. Grimes 		}
126326f9a767SRodney W. Grimes 	}
126426f9a767SRodney W. Grimes 
126526f9a767SRodney W. Grimes 	/*
126626f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
126726f9a767SRodney W. Grimes 	 */
126826f9a767SRodney W. Grimes 	failed = 0;
126926f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
1270a316d390SJohn Dyson 		if (failed ||
1271a316d390SJohn Dyson 			(reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1272a316d390SJohn Dyson 		    ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
127326f9a767SRodney W. Grimes 		    (rtvals[i] != VM_PAGER_OK)) {
127426f9a767SRodney W. Grimes 			failed = 1;
127526f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK)
127626f9a767SRodney W. Grimes 				rtvals[i] = VM_PAGER_AGAIN;
127726f9a767SRodney W. Grimes 		}
127826f9a767SRodney W. Grimes 	}
127926f9a767SRodney W. Grimes 
1280e736cd05SJohn Dyson 	ix = 0;
1281e736cd05SJohn Dyson 	firstidx = -1;
128226f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
1283e736cd05SJohn Dyson 		if (rtvals[i] == VM_PAGER_OK) {
1284e736cd05SJohn Dyson 			ix++;
1285e736cd05SJohn Dyson 			if (firstidx == -1) {
1286e736cd05SJohn Dyson 				firstidx = i;
128726f9a767SRodney W. Grimes 			}
1288e736cd05SJohn Dyson 		} else if (firstidx >= 0) {
128926f9a767SRodney W. Grimes 			break;
1290e736cd05SJohn Dyson 		}
1291e736cd05SJohn Dyson 	}
129226f9a767SRodney W. Grimes 
1293e736cd05SJohn Dyson 	if (firstidx == -1) {
1294e47ed70bSJohn Dyson 		for (i = 0; i < count; i++) {
1295e47ed70bSJohn Dyson 			if (rtvals[i] == VM_PAGER_OK)
1296e47ed70bSJohn Dyson 				rtvals[i] = VM_PAGER_AGAIN;
1297e736cd05SJohn Dyson 		}
129826f9a767SRodney W. Grimes 		return VM_PAGER_AGAIN;
129926f9a767SRodney W. Grimes 	}
1300e736cd05SJohn Dyson 
1301e736cd05SJohn Dyson 	lastidx = firstidx + ix;
1302e736cd05SJohn Dyson 
1303e47ed70bSJohn Dyson 	if (ix > max_pageout_cluster) {
1304e47ed70bSJohn Dyson 		for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
1305e47ed70bSJohn Dyson 			if (rtvals[i] == VM_PAGER_OK)
1306e47ed70bSJohn Dyson 				rtvals[i] = VM_PAGER_AGAIN;
1307e47ed70bSJohn Dyson 		}
1308e47ed70bSJohn Dyson 		ix = max_pageout_cluster;
1309e47ed70bSJohn Dyson 		lastidx = firstidx + ix;
1310e47ed70bSJohn Dyson 	}
1311e47ed70bSJohn Dyson 
1312e736cd05SJohn Dyson 	for (i = 0; i < firstidx; i++) {
1313e736cd05SJohn Dyson 		if (swb[i])
1314e736cd05SJohn Dyson 			swb[i]->swb_locked--;
1315e736cd05SJohn Dyson 	}
1316e736cd05SJohn Dyson 
1317e736cd05SJohn Dyson 	for (i = lastidx; i < count; i++) {
1318e736cd05SJohn Dyson 		if (swb[i])
1319e736cd05SJohn Dyson 			swb[i]->swb_locked--;
1320e736cd05SJohn Dyson 	}
1321e736cd05SJohn Dyson 
1322e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
1323e736cd05SJohn Dyson 	for (i = firstidx; i < lastidx; i++) {
1324a316d390SJohn Dyson 		if (reqaddr[i] == SWB_EMPTY) {
1325a316d390SJohn Dyson 			printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1326a316d390SJohn Dyson 				m[i]->pindex, i);
1327a316d390SJohn Dyson 		}
132826f9a767SRodney W. Grimes 	}
1329e47ed70bSJohn Dyson #endif
133026f9a767SRodney W. Grimes 
133126f9a767SRodney W. Grimes 	/*
1332e47ed70bSJohn Dyson 	 * Clean up all completed async pageouts.
133326f9a767SRodney W. Grimes 	 */
1334e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
133524a1cce3SDavid Greenman 		swap_pager_sync();
133626f9a767SRodney W. Grimes 
133726f9a767SRodney W. Grimes 	/*
133826f9a767SRodney W. Grimes 	 * get a swap pager clean data structure, block until we get it
133926f9a767SRodney W. Grimes 	 */
13400d94caffSDavid Greenman 	if (curproc == pageproc) {
1341e47ed70bSJohn Dyson 		if (swap_pager_free_count == 0) {
1342e47ed70bSJohn Dyson 			s = splvm();
1343e47ed70bSJohn Dyson 			while (swap_pager_free_count == 0) {
1344e47ed70bSJohn Dyson 				swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1345cb6962cdSJohn Dyson 			/*
1346cb6962cdSJohn Dyson 			 * if it does not get one within a short time, then
1347cb6962cdSJohn Dyson 			 * there is a potential deadlock, so we go-on trying
1348bd7e5f99SJohn Dyson 			 * to free pages.  It is important to block here as opposed
1349bd7e5f99SJohn Dyson 			 * to returning, thereby allowing the pageout daemon to continue.
1350bd7e5f99SJohn Dyson 			 * It is likely that pageout daemon will start suboptimally
1351bd7e5f99SJohn Dyson 			 * reclaiming vnode backed pages if we don't block.  Since the
1352bd7e5f99SJohn Dyson 			 * I/O subsystem is probably already fully utilized, might as
1353bd7e5f99SJohn Dyson 			 * well wait.
1354cb6962cdSJohn Dyson 			 */
1355e47ed70bSJohn Dyson 				if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
1356e47ed70bSJohn Dyson 					if (swap_pager_free_pending)
135724a1cce3SDavid Greenman 						swap_pager_sync();
1358e47ed70bSJohn Dyson 					if (swap_pager_free_count == 0) {
1359e736cd05SJohn Dyson 						for (i = firstidx; i < lastidx; i++) {
1360e736cd05SJohn Dyson 							rtvals[i] = VM_PAGER_AGAIN;
1361e736cd05SJohn Dyson 						}
13620d94caffSDavid Greenman 						splx(s);
13630d94caffSDavid Greenman 						return VM_PAGER_AGAIN;
1364cb6962cdSJohn Dyson 					}
1365bd7e5f99SJohn Dyson 				} else {
1366bd7e5f99SJohn Dyson 					swap_pager_sync();
136726f9a767SRodney W. Grimes 				}
1368bd7e5f99SJohn Dyson 			}
136926f9a767SRodney W. Grimes 			splx(s);
137026f9a767SRodney W. Grimes 		}
1371e47ed70bSJohn Dyson 
1372b18bfc3dSJohn Dyson 		spc = TAILQ_FIRST(&swap_pager_free);
1373e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
13743091ee09SJohn Dyson 		if (spc == NULL)
1375e736cd05SJohn Dyson 			panic("swap_pager_putpages: free queue is empty, %d expected\n",
1376e736cd05SJohn Dyson 				swap_pager_free_count);
1377e47ed70bSJohn Dyson #endif
137826f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
13793091ee09SJohn Dyson 		swap_pager_free_count--;
1380fff93ab6SDavid Greenman 
138126f9a767SRodney W. Grimes 		kva = spc->spc_kva;
1382e47ed70bSJohn Dyson 		bp = spc->spc_bp;
1383e47ed70bSJohn Dyson 		bzero(bp, sizeof *bp);
1384e47ed70bSJohn Dyson 		bp->b_spc = spc;
1385e47ed70bSJohn Dyson 		bp->b_vnbufs.le_next = NOLIST;
1386e47ed70bSJohn Dyson 		bp->b_data = (caddr_t) kva;
1387e47ed70bSJohn Dyson 	} else {
1388e47ed70bSJohn Dyson 		spc = NULL;
1389e47ed70bSJohn Dyson 		bp = getpbuf();
1390e47ed70bSJohn Dyson 		kva = (vm_offset_t) bp->b_data;
1391e47ed70bSJohn Dyson 		bp->b_spc = NULL;
1392e47ed70bSJohn Dyson 	}
139326f9a767SRodney W. Grimes 
139426f9a767SRodney W. Grimes 	/*
139526f9a767SRodney W. Grimes 	 * map our page(s) into kva for I/O
139626f9a767SRodney W. Grimes 	 */
1397e736cd05SJohn Dyson 	pmap_qenter(kva, &m[firstidx], ix);
139826f9a767SRodney W. Grimes 
139926f9a767SRodney W. Grimes 	/*
140026f9a767SRodney W. Grimes 	 * get the base I/O offset into the swap file
140126f9a767SRodney W. Grimes 	 */
1402e736cd05SJohn Dyson 	for (i = firstidx; i < lastidx ; i++) {
1403a316d390SJohn Dyson 		fidx = m[i]->pindex + paging_pindex;
1404a316d390SJohn Dyson 		off = swap_pager_block_offset(fidx);
140526f9a767SRodney W. Grimes 		/*
140626f9a767SRodney W. Grimes 		 * set the valid bit
140726f9a767SRodney W. Grimes 		 */
140826f9a767SRodney W. Grimes 		swb[i]->swb_valid |= (1 << off);
140926f9a767SRodney W. Grimes 		/*
141026f9a767SRodney W. Grimes 		 * and unlock the data structure
141126f9a767SRodney W. Grimes 		 */
14122a4895f4SDavid Greenman 		swb[i]->swb_locked--;
141326f9a767SRodney W. Grimes 	}
141426f9a767SRodney W. Grimes 
1415aba8f38eSDavid Greenman 	bp->b_flags = B_BUSY | B_PAGING;
141626f9a767SRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
141726f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1418a481f200SDavid Greenman 	if (bp->b_rcred != NOCRED)
141926f9a767SRodney W. Grimes 		crhold(bp->b_rcred);
1420a481f200SDavid Greenman 	if (bp->b_wcred != NOCRED)
142126f9a767SRodney W. Grimes 		crhold(bp->b_wcred);
1422e736cd05SJohn Dyson 	bp->b_blkno = reqaddr[firstidx];
14230d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
142416f62314SDavid Greenman 
1425e736cd05SJohn Dyson 	bp->b_bcount = PAGE_SIZE * ix;
1426e736cd05SJohn Dyson 	bp->b_bufsize = PAGE_SIZE * ix;
1427e47ed70bSJohn Dyson 
1428e47ed70bSJohn Dyson 	s = splvm();
142926f9a767SRodney W. Grimes 	swapdev_vp->v_numoutput++;
143026f9a767SRodney W. Grimes 
143126f9a767SRodney W. Grimes 	/*
14320d94caffSDavid Greenman 	 * If this is an async write we set up additional buffer fields and
14330d94caffSDavid Greenman   	 * place a "cleaning" entry on the inuse queue.
143426f9a767SRodney W. Grimes   	 */
1435e47ed70bSJohn Dyson  	object->un_pager.swp.swp_poip++;
1436e47ed70bSJohn Dyson 
1437e47ed70bSJohn Dyson  	if (spc) {
143826f9a767SRodney W. Grimes   		spc->spc_flags = 0;
14392a4895f4SDavid Greenman   		spc->spc_object = object;
1440e47ed70bSJohn Dyson  		bp->b_npages = ix;
1441e47ed70bSJohn Dyson  		for (i = firstidx; i < lastidx; i++) {
144226f9a767SRodney W. Grimes   			spc->spc_m[i] = m[i];
1443e47ed70bSJohn Dyson  			bp->b_pages[i - firstidx] = m[i];
1444e47ed70bSJohn Dyson  			vm_page_protect(m[i], VM_PROT_READ);
1445e47ed70bSJohn Dyson  			pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1446e47ed70bSJohn Dyson  			m[i]->dirty = 0;
1447e47ed70bSJohn Dyson  		}
1448e736cd05SJohn Dyson   		spc->spc_first = firstidx;
1449e736cd05SJohn Dyson   		spc->spc_count = ix;
145026f9a767SRodney W. Grimes 		/*
145126f9a767SRodney W. Grimes 		 * the completion routine for async writes
145226f9a767SRodney W. Grimes 		 */
145326f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
145426f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone;
145526f9a767SRodney W. Grimes 		bp->b_dirtyoff = 0;
145626f9a767SRodney W. Grimes 		bp->b_dirtyend = bp->b_bcount;
145726f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
145826f9a767SRodney W. Grimes 	} else {
145926f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
146026f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone1;
1461e47ed70bSJohn Dyson 		bp->b_npages = ix;
1462e47ed70bSJohn Dyson 		for (i = firstidx; i < lastidx; i++)
1463e47ed70bSJohn Dyson 			bp->b_pages[i - firstidx] = m[i];
146426f9a767SRodney W. Grimes 	}
1465976e77fcSDavid Greenman 
1466976e77fcSDavid Greenman 	cnt.v_swapout++;
1467e736cd05SJohn Dyson 	cnt.v_swappgsout += ix;
1468e47ed70bSJohn Dyson 
146926f9a767SRodney W. Grimes 	/*
147026f9a767SRodney W. Grimes 	 * perform the I/O
147126f9a767SRodney W. Grimes 	 */
1472fd5d1124SJulian Elischer 	VOP_STRATEGY(bp->b_vp, bp);
147324a1cce3SDavid Greenman 	if (sync == FALSE) {
1474e47ed70bSJohn Dyson 		if (swap_pager_free_pending) {
147524a1cce3SDavid Greenman 			swap_pager_sync();
147626f9a767SRodney W. Grimes 		}
1477e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
147826f9a767SRodney W. Grimes 			rtvals[i] = VM_PAGER_PEND;
147926f9a767SRodney W. Grimes 		}
1480ccbbd927SBruce Evans 		splx(s);
148126f9a767SRodney W. Grimes 		return VM_PAGER_PEND;
148226f9a767SRodney W. Grimes 	}
1483e47ed70bSJohn Dyson 
148426f9a767SRodney W. Grimes 	/*
148526f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
148626f9a767SRodney W. Grimes 	 */
148726f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
148824a1cce3SDavid Greenman 		tsleep(bp, PVM, "swwrt", 0);
148926f9a767SRodney W. Grimes 	}
1490e47ed70bSJohn Dyson 
14911b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
1492ac1e407bSBruce Evans 		printf(
1493ac1e407bSBruce Evans "swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n",
1494ac1e407bSBruce Evans 		    (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
1495a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
14961b119d9dSDavid Greenman 	} else {
14971b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
14981b119d9dSDavid Greenman 	}
149926f9a767SRodney W. Grimes 
15002a4895f4SDavid Greenman 	object->un_pager.swp.swp_poip--;
15012a4895f4SDavid Greenman 	if (object->un_pager.swp.swp_poip == 0)
15022a4895f4SDavid Greenman 		wakeup(object);
150326f9a767SRodney W. Grimes 
150426f9a767SRodney W. Grimes 	if (bp->b_vp)
15050d94caffSDavid Greenman 		pbrelvp(bp);
150626f9a767SRodney W. Grimes 
150726f9a767SRodney W. Grimes 	splx(s);
150826f9a767SRodney W. Grimes 
150926f9a767SRodney W. Grimes 	/*
151026f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
151126f9a767SRodney W. Grimes 	 */
1512e736cd05SJohn Dyson 	pmap_qremove(kva, ix);
151326f9a767SRodney W. Grimes 
151426f9a767SRodney W. Grimes 	/*
15150d94caffSDavid Greenman 	 * if we have written the page, then indicate that the page is clean.
151626f9a767SRodney W. Grimes 	 */
151726f9a767SRodney W. Grimes 	if (rv == VM_PAGER_OK) {
1518e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
151926f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK) {
152067bf6868SJohn Dyson 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
15210d94caffSDavid Greenman 				m[i]->dirty = 0;
152226f9a767SRodney W. Grimes 				/*
15230d94caffSDavid Greenman 				 * optimization, if a page has been read
15240d94caffSDavid Greenman 				 * during the pageout process, we activate it.
152526f9a767SRodney W. Grimes 				 */
1526eaf13dd7SJohn Dyson 				if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
15279b5a5d81SJohn Dyson 				    pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
152826f9a767SRodney W. Grimes 					vm_page_activate(m[i]);
152926f9a767SRodney W. Grimes 				}
153026f9a767SRodney W. Grimes 			}
15317fb0c17eSDavid Greenman 		}
153226f9a767SRodney W. Grimes 	} else {
1533e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
153426f9a767SRodney W. Grimes 			rtvals[i] = rv;
153526f9a767SRodney W. Grimes 		}
153626f9a767SRodney W. Grimes 	}
153726f9a767SRodney W. Grimes 
153826f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
153926f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
154026f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
154126f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
1542e47ed70bSJohn Dyson 
1543e47ed70bSJohn Dyson 	spc_free(spc);
1544e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
1545e47ed70bSJohn Dyson 		swap_pager_sync();
1546e47ed70bSJohn Dyson 
154726f9a767SRodney W. Grimes 	return (rv);
154826f9a767SRodney W. Grimes }
154926f9a767SRodney W. Grimes 
155066095752SJohn Dyson void
155124a1cce3SDavid Greenman swap_pager_sync()
155226f9a767SRodney W. Grimes {
1553e47ed70bSJohn Dyson 	swp_clean_t spc;
155426f9a767SRodney W. Grimes 
1555e47ed70bSJohn Dyson 	while (spc = TAILQ_FIRST(&swap_pager_done)) {
155626f9a767SRodney W. Grimes 		swap_pager_finish(spc);
155726f9a767SRodney W. Grimes 	}
155824a1cce3SDavid Greenman 	return;
155926f9a767SRodney W. Grimes }
156026f9a767SRodney W. Grimes 
1561c1087c13SBruce Evans static void
156226f9a767SRodney W. Grimes swap_pager_finish(spc)
156326f9a767SRodney W. Grimes 	register swp_clean_t spc;
156426f9a767SRodney W. Grimes {
1565e47ed70bSJohn Dyson 	int i, s, lastidx;
1566e47ed70bSJohn Dyson 	vm_object_t object;
1567e47ed70bSJohn Dyson 	vm_page_t *ma;
1568e47ed70bSJohn Dyson 
1569e47ed70bSJohn Dyson 	ma = spc->spc_m;
1570ffc82b0aSJohn Dyson 	object = spc->spc_object;
1571e47ed70bSJohn Dyson 	lastidx = spc->spc_first + spc->spc_count;
1572e47ed70bSJohn Dyson 
1573e47ed70bSJohn Dyson 	s = splvm();
1574e47ed70bSJohn Dyson 	TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1575e47ed70bSJohn Dyson 	splx(s);
1576e47ed70bSJohn Dyson 
1577e47ed70bSJohn Dyson 	pmap_qremove(spc->spc_kva, spc->spc_count);
1578e47ed70bSJohn Dyson 
1579e47ed70bSJohn Dyson 	/*
1580e47ed70bSJohn Dyson 	 * If no error, mark as clean and inform the pmap system. If error,
1581e47ed70bSJohn Dyson 	 * mark as dirty so we will try again. (XXX could get stuck doing
1582e47ed70bSJohn Dyson 	 * this, should give up after awhile)
1583e47ed70bSJohn Dyson 	 */
1584e47ed70bSJohn Dyson 	if (spc->spc_flags & SPC_ERROR) {
1585e47ed70bSJohn Dyson 
1586e47ed70bSJohn Dyson 		for (i = spc->spc_first; i < lastidx; i++) {
1587e47ed70bSJohn Dyson 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1588e47ed70bSJohn Dyson 			    (u_long) VM_PAGE_TO_PHYS(ma[i]));
1589e47ed70bSJohn Dyson 			ma[i]->dirty = VM_PAGE_BITS_ALL;
1590e69763a3SDoug Rabson 			vm_page_io_finish(ma[i]);
1591e47ed70bSJohn Dyson 		}
159226f9a767SRodney W. Grimes 
1593069e9bc1SDoug Rabson 		vm_object_pip_subtract(object, spc->spc_count);
1594c0503609SDavid Greenman 		if ((object->paging_in_progress == 0) &&
1595c0503609SDavid Greenman 			(object->flags & OBJ_PIPWNT)) {
1596069e9bc1SDoug Rabson 			vm_object_clear_flag(object, OBJ_PIPWNT);
159724a1cce3SDavid Greenman 			wakeup(object);
1598c0503609SDavid Greenman 		}
1599df8bae1dSRodney W. Grimes 
1600df8bae1dSRodney W. Grimes 	} else {
1601e736cd05SJohn Dyson 		for (i = spc->spc_first; i < lastidx; i++) {
1602e736cd05SJohn Dyson 			if ((ma[i]->queue != PQ_ACTIVE) &&
1603e736cd05SJohn Dyson 			   ((ma[i]->flags & PG_WANTED) ||
1604ffc82b0aSJohn Dyson 				 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) {
1605e736cd05SJohn Dyson 				vm_page_activate(ma[i]);
1606df8bae1dSRodney W. Grimes 			}
1607df8bae1dSRodney W. Grimes 		}
1608ffc82b0aSJohn Dyson 	}
1609df8bae1dSRodney W. Grimes 
161026f9a767SRodney W. Grimes 	nswiodone -= spc->spc_count;
1611e47ed70bSJohn Dyson 	swap_pager_free_pending--;
1612e47ed70bSJohn Dyson 	spc_free(spc);
1613df8bae1dSRodney W. Grimes 
1614df8bae1dSRodney W. Grimes 	return;
161526f9a767SRodney W. Grimes }
1616df8bae1dSRodney W. Grimes 
161726f9a767SRodney W. Grimes /*
161826f9a767SRodney W. Grimes  * swap_pager_iodone
161926f9a767SRodney W. Grimes  */
1620f5a12711SPoul-Henning Kamp static void
1621df8bae1dSRodney W. Grimes swap_pager_iodone(bp)
1622df8bae1dSRodney W. Grimes 	register struct buf *bp;
1623df8bae1dSRodney W. Grimes {
1624ffc82b0aSJohn Dyson 	int i, s, lastidx;
1625df8bae1dSRodney W. Grimes 	register swp_clean_t spc;
1626e47ed70bSJohn Dyson 	vm_object_t object;
1627ffc82b0aSJohn Dyson 	vm_page_t *ma;
1628ffc82b0aSJohn Dyson 
1629df8bae1dSRodney W. Grimes 
1630e47ed70bSJohn Dyson 	s = splvm();
163126f9a767SRodney W. Grimes 	spc = (swp_clean_t) bp->b_spc;
163226f9a767SRodney W. Grimes 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
163326f9a767SRodney W. Grimes 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1634e47ed70bSJohn Dyson 
1635ffc82b0aSJohn Dyson 	object = spc->spc_object;
1636e47ed70bSJohn Dyson 
1637e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
1638e47ed70bSJohn Dyson 	if (object->paging_in_progress < spc->spc_count)
1639e47ed70bSJohn Dyson 		printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
1640e47ed70bSJohn Dyson 			object->paging_in_progress, spc->spc_count);
1641e47ed70bSJohn Dyson #endif
1642e47ed70bSJohn Dyson 
164326f9a767SRodney W. Grimes 	if (bp->b_flags & B_ERROR) {
1644df8bae1dSRodney W. Grimes 		spc->spc_flags |= SPC_ERROR;
1645c3a1e425SDavid Greenman 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
16461b119d9dSDavid Greenman 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1647c3a1e425SDavid Greenman 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1648e47ed70bSJohn Dyson 	} else {
1649069e9bc1SDoug Rabson 		vm_object_pip_subtract(object, spc->spc_count);
1650e47ed70bSJohn Dyson 		if ((object->paging_in_progress == 0) &&
1651e47ed70bSJohn Dyson 			(object->flags & OBJ_PIPWNT)) {
1652069e9bc1SDoug Rabson 			vm_object_clear_flag(object, OBJ_PIPWNT);
1653e47ed70bSJohn Dyson 			wakeup(object);
1654e47ed70bSJohn Dyson 		}
1655ffc82b0aSJohn Dyson 		ma = spc->spc_m;
1656ffc82b0aSJohn Dyson 		lastidx = spc->spc_first + spc->spc_count;
1657ffc82b0aSJohn Dyson 		for (i = spc->spc_first; i < lastidx; i++) {
1658ffc82b0aSJohn Dyson 			/*
1659ffc82b0aSJohn Dyson 			 * we wakeup any processes that are waiting on these pages.
1660ffc82b0aSJohn Dyson 			 */
1661e69763a3SDoug Rabson 			vm_page_io_finish(ma[i]);
1662ffc82b0aSJohn Dyson 		}
1663df8bae1dSRodney W. Grimes 	}
166426f9a767SRodney W. Grimes 
16650d94caffSDavid Greenman 	if (bp->b_vp)
16660d94caffSDavid Greenman 		pbrelvp(bp);
16670d94caffSDavid Greenman 
166826f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
166926f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
167026f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
167126f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
167226f9a767SRodney W. Grimes 
167326f9a767SRodney W. Grimes 	nswiodone += spc->spc_count;
1674e47ed70bSJohn Dyson 	swap_pager_free_pending++;
16752a4895f4SDavid Greenman 	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
16762a4895f4SDavid Greenman 		wakeup(spc->spc_object);
167726f9a767SRodney W. Grimes 	}
1678e47ed70bSJohn Dyson 
1679e47ed70bSJohn Dyson 	if (swap_pager_needflags &&
1680e47ed70bSJohn Dyson 	  ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
1681e47ed70bSJohn Dyson 		spc_wakeup();
1682a1f6d91cSDavid Greenman 	}
1683a1f6d91cSDavid Greenman 
1684e47ed70bSJohn Dyson 	if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
1685e47ed70bSJohn Dyson 		vm_pageout_pages_needed) {
168624a1cce3SDavid Greenman 		wakeup(&vm_pageout_pages_needed);
1687a1f6d91cSDavid Greenman 		vm_pageout_pages_needed = 0;
168826f9a767SRodney W. Grimes 	}
1689e47ed70bSJohn Dyson 
169026f9a767SRodney W. Grimes 	splx(s);
169126f9a767SRodney W. Grimes }
1692