xref: /freebsd/sys/vm/swap_pager.c (revision e47ed70b0f4aa94a0f041b10a4a3292c50014e23)
1df8bae1dSRodney W. Grimes /*
226f9a767SRodney W. Grimes  * Copyright (c) 1994 John S. Dyson
3df8bae1dSRodney W. Grimes  * Copyright (c) 1990 University of Utah.
4df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
8df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
9df8bae1dSRodney W. Grimes  * Science Department.
10df8bae1dSRodney W. Grimes  *
11df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
12df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
13df8bae1dSRodney W. Grimes  * are met:
14df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
15df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
16df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
17df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
18df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
19df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
20df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
21df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
22df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
23df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
24df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
25df8bae1dSRodney W. Grimes  *    without specific prior written permission.
26df8bae1dSRodney W. Grimes  *
27df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
38df8bae1dSRodney W. Grimes  *
39df8bae1dSRodney W. Grimes  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40df8bae1dSRodney W. Grimes  *
41df8bae1dSRodney W. Grimes  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
42e47ed70bSJohn Dyson  * $Id: swap_pager.c,v 1.88 1998/02/09 06:11:20 eivind Exp $
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45df8bae1dSRodney W. Grimes /*
46df8bae1dSRodney W. Grimes  * Quick hack to page to dedicated partition(s).
47df8bae1dSRodney W. Grimes  * TODO:
48df8bae1dSRodney W. Grimes  *	Add multiprocessor locks
49df8bae1dSRodney W. Grimes  *	Deal with async writes in a better fashion
50df8bae1dSRodney W. Grimes  */
51df8bae1dSRodney W. Grimes 
52df8bae1dSRodney W. Grimes #include <sys/param.h>
53df8bae1dSRodney W. Grimes #include <sys/systm.h>
5464abb5a5SDavid Greenman #include <sys/kernel.h>
55df8bae1dSRodney W. Grimes #include <sys/proc.h>
56df8bae1dSRodney W. Grimes #include <sys/buf.h>
57df8bae1dSRodney W. Grimes #include <sys/vnode.h>
58df8bae1dSRodney W. Grimes #include <sys/malloc.h>
59efeaf95aSDavid Greenman #include <sys/vmmeter.h>
6026f9a767SRodney W. Grimes #include <sys/rlist.h>
61df8bae1dSRodney W. Grimes 
62e47ed70bSJohn Dyson #ifndef MAX_PAGEOUT_CLUSTER
63e47ed70bSJohn Dyson #define MAX_PAGEOUT_CLUSTER 8
64e47ed70bSJohn Dyson #endif
65e47ed70bSJohn Dyson 
66e47ed70bSJohn Dyson #ifndef NPENDINGIO
67e47ed70bSJohn Dyson #define NPENDINGIO	16
68e47ed70bSJohn Dyson #endif
69e47ed70bSJohn Dyson 
70e47ed70bSJohn Dyson #define SWB_NPAGES MAX_PAGEOUT_CLUSTER
71e47ed70bSJohn Dyson 
72df8bae1dSRodney W. Grimes #include <vm/vm.h>
73efeaf95aSDavid Greenman #include <vm/vm_prot.h>
74efeaf95aSDavid Greenman #include <vm/vm_object.h>
75df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
76efeaf95aSDavid Greenman #include <vm/vm_pager.h>
77df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h>
78df8bae1dSRodney W. Grimes #include <vm/swap_pager.h>
79efeaf95aSDavid Greenman #include <vm/vm_extern.h>
80df8bae1dSRodney W. Grimes 
81f708ef1bSPoul-Henning Kamp static int nswiodone;
8226f9a767SRodney W. Grimes int swap_pager_full;
8326f9a767SRodney W. Grimes extern int vm_swap_size;
84b44e4b7aSJohn Dyson static int suggest_more_swap = 0;
85f5a12711SPoul-Henning Kamp static int no_swap_space = 1;
86e47ed70bSJohn Dyson static int max_pageout_cluster;
87836e5d13SJohn Dyson struct rlisthdr swaplist;
8826f9a767SRodney W. Grimes 
89df8bae1dSRodney W. Grimes TAILQ_HEAD(swpclean, swpagerclean);
90df8bae1dSRodney W. Grimes 
9126f9a767SRodney W. Grimes typedef struct swpagerclean *swp_clean_t;
9226f9a767SRodney W. Grimes 
93f708ef1bSPoul-Henning Kamp static struct swpagerclean {
94df8bae1dSRodney W. Grimes 	TAILQ_ENTRY(swpagerclean) spc_list;
95df8bae1dSRodney W. Grimes 	int spc_flags;
96df8bae1dSRodney W. Grimes 	struct buf *spc_bp;
972a4895f4SDavid Greenman 	vm_object_t spc_object;
98df8bae1dSRodney W. Grimes 	vm_offset_t spc_kva;
99e736cd05SJohn Dyson 	int spc_first;
10026f9a767SRodney W. Grimes 	int spc_count;
10126f9a767SRodney W. Grimes 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
102df8bae1dSRodney W. Grimes } swcleanlist[NPENDINGIO];
10326f9a767SRodney W. Grimes 
10426f9a767SRodney W. Grimes 
105df8bae1dSRodney W. Grimes /* spc_flags values */
10626f9a767SRodney W. Grimes #define SPC_ERROR	0x01
107df8bae1dSRodney W. Grimes 
10826f9a767SRodney W. Grimes #define SWB_EMPTY (-1)
109df8bae1dSRodney W. Grimes 
110f708ef1bSPoul-Henning Kamp /* list of completed page cleans */
111f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_done;
112f708ef1bSPoul-Henning Kamp 
113f708ef1bSPoul-Henning Kamp /* list of pending page cleans */
114f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_inuse;
115f708ef1bSPoul-Henning Kamp 
116f708ef1bSPoul-Henning Kamp /* list of free pager clean structs */
117f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_free;
118303b270bSEivind Eklund static int swap_pager_free_count;
119e47ed70bSJohn Dyson static int swap_pager_free_pending;
120f708ef1bSPoul-Henning Kamp 
121f708ef1bSPoul-Henning Kamp /* list of "named" anon region objects */
122f708ef1bSPoul-Henning Kamp static struct pagerlst swap_pager_object_list;
123f708ef1bSPoul-Henning Kamp 
124f708ef1bSPoul-Henning Kamp /* list of "unnamed" anon region objects */
125f708ef1bSPoul-Henning Kamp struct pagerlst swap_pager_un_object_list;
126df8bae1dSRodney W. Grimes 
12726f9a767SRodney W. Grimes #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
128a1f6d91cSDavid Greenman #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
129f708ef1bSPoul-Henning Kamp static int swap_pager_needflags;
13026f9a767SRodney W. Grimes 
131f5a12711SPoul-Henning Kamp static struct pagerlst *swp_qs[] = {
13224a1cce3SDavid Greenman 	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
13326f9a767SRodney W. Grimes };
13426f9a767SRodney W. Grimes 
13524a1cce3SDavid Greenman /*
13624a1cce3SDavid Greenman  * pagerops for OBJT_SWAP - "swap pager".
13724a1cce3SDavid Greenman  */
138ff98689dSBruce Evans static vm_object_t
139ff98689dSBruce Evans 		swap_pager_alloc __P((void *handle, vm_size_t size,
140a316d390SJohn Dyson 				      vm_prot_t prot, vm_ooffset_t offset));
141ff98689dSBruce Evans static void	swap_pager_dealloc __P((vm_object_t object));
142ff98689dSBruce Evans static boolean_t
143a316d390SJohn Dyson 		swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
144ff98689dSBruce Evans 					int *before, int *after));
145f708ef1bSPoul-Henning Kamp static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
146ff98689dSBruce Evans static void	swap_pager_init __P((void));
147f708ef1bSPoul-Henning Kamp static void	swap_pager_sync __P((void));
148e47ed70bSJohn Dyson static void spc_free __P((swp_clean_t));
149f708ef1bSPoul-Henning Kamp 
150df8bae1dSRodney W. Grimes struct pagerops swappagerops = {
151df8bae1dSRodney W. Grimes 	swap_pager_init,
152df8bae1dSRodney W. Grimes 	swap_pager_alloc,
153df8bae1dSRodney W. Grimes 	swap_pager_dealloc,
15424a1cce3SDavid Greenman 	swap_pager_getpages,
15524a1cce3SDavid Greenman 	swap_pager_putpages,
15624a1cce3SDavid Greenman 	swap_pager_haspage,
15724a1cce3SDavid Greenman 	swap_pager_sync
158df8bae1dSRodney W. Grimes };
159df8bae1dSRodney W. Grimes 
160e47ed70bSJohn Dyson static int npendingio;
161f708ef1bSPoul-Henning Kamp static int dmmin;
162f708ef1bSPoul-Henning Kamp int dmmax;
16326f9a767SRodney W. Grimes 
1648ba0c490SBruce Evans static int	swap_pager_block_index __P((vm_pindex_t pindex));
1658ba0c490SBruce Evans static int	swap_pager_block_offset __P((vm_pindex_t pindex));
166a316d390SJohn Dyson static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
167a316d390SJohn Dyson 					  vm_pindex_t pindex, int *valid));
168cac597e4SBruce Evans static void	swap_pager_finish __P((swp_clean_t spc));
169cac597e4SBruce Evans static void	swap_pager_freepage __P((vm_page_t m));
170cac597e4SBruce Evans static void	swap_pager_free_swap __P((vm_object_t object));
171cac597e4SBruce Evans static void	swap_pager_freeswapspace __P((vm_object_t object,
172cac597e4SBruce Evans 					      unsigned int from,
173cac597e4SBruce Evans 					      unsigned int to));
174cac597e4SBruce Evans static int	swap_pager_getswapspace __P((vm_object_t object,
175cac597e4SBruce Evans 					     unsigned int amount,
176a316d390SJohn Dyson 					     daddr_t *rtval));
177ff98689dSBruce Evans static void	swap_pager_iodone __P((struct buf *));
178cac597e4SBruce Evans static void	swap_pager_iodone1 __P((struct buf *bp));
179cac597e4SBruce Evans static void	swap_pager_reclaim __P((void));
180cac597e4SBruce Evans static void	swap_pager_ridpages __P((vm_page_t *m, int count,
181cac597e4SBruce Evans 					 int reqpage));
182cac597e4SBruce Evans static void	swap_pager_setvalid __P((vm_object_t object,
183cac597e4SBruce Evans 					 vm_offset_t offset, int valid));
184cac597e4SBruce Evans static void	swapsizecheck __P((void));
18524a1cce3SDavid Greenman 
186de5f6a77SJohn Dyson #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
187de5f6a77SJohn Dyson 
1880d94caffSDavid Greenman static inline void
1890d94caffSDavid Greenman swapsizecheck()
1900d94caffSDavid Greenman {
19126f9a767SRodney W. Grimes 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
192a1f6d91cSDavid Greenman 		if (swap_pager_full == 0)
1931af87c92SDavid Greenman 			printf("swap_pager: out of swap space\n");
19426f9a767SRodney W. Grimes 		swap_pager_full = 1;
19526f9a767SRodney W. Grimes 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
19626f9a767SRodney W. Grimes 		swap_pager_full = 0;
19726f9a767SRodney W. Grimes }
19826f9a767SRodney W. Grimes 
199f5a12711SPoul-Henning Kamp static void
200df8bae1dSRodney W. Grimes swap_pager_init()
201df8bae1dSRodney W. Grimes {
202e47ed70bSJohn Dyson 	int maxsafepending;
20324a1cce3SDavid Greenman 	TAILQ_INIT(&swap_pager_object_list);
20424a1cce3SDavid Greenman 	TAILQ_INIT(&swap_pager_un_object_list);
205df8bae1dSRodney W. Grimes 
206df8bae1dSRodney W. Grimes 	/*
207df8bae1dSRodney W. Grimes 	 * Initialize clean lists
208df8bae1dSRodney W. Grimes 	 */
209df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_inuse);
21026f9a767SRodney W. Grimes 	TAILQ_INIT(&swap_pager_done);
211df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_free);
2123091ee09SJohn Dyson 	swap_pager_free_count = 0;
21326f9a767SRodney W. Grimes 
214df8bae1dSRodney W. Grimes 	/*
215df8bae1dSRodney W. Grimes 	 * Calculate the swap allocation constants.
216df8bae1dSRodney W. Grimes 	 */
217e911eafcSPoul-Henning Kamp 	dmmin = PAGE_SIZE / DEV_BSIZE;
21826f9a767SRodney W. Grimes 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
219e47ed70bSJohn Dyson 
220e47ed70bSJohn Dyson 	maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
221e47ed70bSJohn Dyson 	npendingio = NPENDINGIO;
222e47ed70bSJohn Dyson 	max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
223e47ed70bSJohn Dyson 
224e47ed70bSJohn Dyson 	if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
225e47ed70bSJohn Dyson 		max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
226e47ed70bSJohn Dyson 		npendingio = maxsafepending / (2 * max_pageout_cluster);
227e47ed70bSJohn Dyson 		if (npendingio < 2)
228e47ed70bSJohn Dyson 			npendingio = 2;
229e47ed70bSJohn Dyson 	}
230df8bae1dSRodney W. Grimes }
231df8bae1dSRodney W. Grimes 
23224a1cce3SDavid Greenman void
23324a1cce3SDavid Greenman swap_pager_swap_init()
234df8bae1dSRodney W. Grimes {
23526f9a767SRodney W. Grimes 	swp_clean_t spc;
23626f9a767SRodney W. Grimes 	struct buf *bp;
23724a1cce3SDavid Greenman 	int i;
2380d94caffSDavid Greenman 
23926f9a767SRodney W. Grimes 	/*
2400d94caffSDavid Greenman 	 * kva's are allocated here so that we dont need to keep doing
2410d94caffSDavid Greenman 	 * kmem_alloc pageables at runtime
24226f9a767SRodney W. Grimes 	 */
24326f9a767SRodney W. Grimes 	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
244e47ed70bSJohn Dyson 		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
24526f9a767SRodney W. Grimes 		if (!spc->spc_kva) {
24626f9a767SRodney W. Grimes 			break;
24726f9a767SRodney W. Grimes 		}
248a1f6d91cSDavid Greenman 		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
24926f9a767SRodney W. Grimes 		if (!spc->spc_bp) {
25026f9a767SRodney W. Grimes 			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
25126f9a767SRodney W. Grimes 			break;
25226f9a767SRodney W. Grimes 		}
25326f9a767SRodney W. Grimes 		spc->spc_flags = 0;
25426f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
2553091ee09SJohn Dyson 		swap_pager_free_count++;
25626f9a767SRodney W. Grimes 	}
25726f9a767SRodney W. Grimes }
25824a1cce3SDavid Greenman 
25924a1cce3SDavid Greenman int
26024a1cce3SDavid Greenman swap_pager_swp_alloc(object, wait)
26124a1cce3SDavid Greenman 	vm_object_t object;
26224a1cce3SDavid Greenman 	int wait;
26324a1cce3SDavid Greenman {
2642a4895f4SDavid Greenman 	sw_blk_t swb;
2652a4895f4SDavid Greenman 	int nblocks;
26624a1cce3SDavid Greenman 	int i, j;
26724a1cce3SDavid Greenman 
268a316d390SJohn Dyson 	nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
2692a4895f4SDavid Greenman 	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
2702a4895f4SDavid Greenman 	if (swb == NULL)
27124a1cce3SDavid Greenman 		return 1;
27224a1cce3SDavid Greenman 
2732a4895f4SDavid Greenman 	for (i = 0; i < nblocks; i++) {
2742a4895f4SDavid Greenman 		swb[i].swb_valid = 0;
2752a4895f4SDavid Greenman 		swb[i].swb_locked = 0;
27626f9a767SRodney W. Grimes 		for (j = 0; j < SWB_NPAGES; j++)
2772a4895f4SDavid Greenman 			swb[i].swb_block[j] = SWB_EMPTY;
27826f9a767SRodney W. Grimes 	}
27926f9a767SRodney W. Grimes 
2802a4895f4SDavid Greenman 	object->un_pager.swp.swp_nblocks = nblocks;
2812a4895f4SDavid Greenman 	object->un_pager.swp.swp_allocsize = 0;
2822a4895f4SDavid Greenman 	object->un_pager.swp.swp_blocks = swb;
2832a4895f4SDavid Greenman 	object->un_pager.swp.swp_poip = 0;
28424a1cce3SDavid Greenman 
28524a1cce3SDavid Greenman 	if (object->handle != NULL) {
28624a1cce3SDavid Greenman 		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
287df8bae1dSRodney W. Grimes 	} else {
28824a1cce3SDavid Greenman 		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
289df8bae1dSRodney W. Grimes 	}
290df8bae1dSRodney W. Grimes 
29124a1cce3SDavid Greenman 	return 0;
29224a1cce3SDavid Greenman }
29324a1cce3SDavid Greenman 
29424a1cce3SDavid Greenman /*
2952a4895f4SDavid Greenman  * Allocate an object and associated resources.
29624a1cce3SDavid Greenman  * Note that if we are called from the pageout daemon (handle == NULL)
29724a1cce3SDavid Greenman  * we should not wait for memory as it could resulting in deadlock.
29824a1cce3SDavid Greenman  */
299f5a12711SPoul-Henning Kamp static vm_object_t
300b9dcd593SBruce Evans swap_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot,
301b9dcd593SBruce Evans 		 vm_ooffset_t offset)
30224a1cce3SDavid Greenman {
30324a1cce3SDavid Greenman 	vm_object_t object;
30424a1cce3SDavid Greenman 
30524a1cce3SDavid Greenman 	/*
30624a1cce3SDavid Greenman 	 * If this is a "named" anonymous region, look it up and use the
30724a1cce3SDavid Greenman 	 * object if it exists, otherwise allocate a new one.
30824a1cce3SDavid Greenman 	 */
30924a1cce3SDavid Greenman 	if (handle) {
31024a1cce3SDavid Greenman 		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
31124a1cce3SDavid Greenman 		if (object != NULL) {
31224a1cce3SDavid Greenman 			vm_object_reference(object);
31324a1cce3SDavid Greenman 		} else {
31424a1cce3SDavid Greenman 			/*
31524a1cce3SDavid Greenman 			 * XXX - there is a race condition here. Two processes
31624a1cce3SDavid Greenman 			 * can request the same named object simultaneuously,
31724a1cce3SDavid Greenman 			 * and if one blocks for memory, the result is a disaster.
31824a1cce3SDavid Greenman 			 * Probably quite rare, but is yet another reason to just
31924a1cce3SDavid Greenman 			 * rip support of "named anonymous regions" out altogether.
32024a1cce3SDavid Greenman 			 */
321a316d390SJohn Dyson 			object = vm_object_allocate(OBJT_SWAP,
322aa8de40aSPoul-Henning Kamp 				OFF_TO_IDX(offset + PAGE_MASK) + size);
32324a1cce3SDavid Greenman 			object->handle = handle;
32424a1cce3SDavid Greenman 			(void) swap_pager_swp_alloc(object, M_WAITOK);
32524a1cce3SDavid Greenman 		}
32624a1cce3SDavid Greenman 	} else {
327a316d390SJohn Dyson 		object = vm_object_allocate(OBJT_SWAP,
328aa8de40aSPoul-Henning Kamp 			OFF_TO_IDX(offset + PAGE_MASK) + size);
32924a1cce3SDavid Greenman 		(void) swap_pager_swp_alloc(object, M_WAITOK);
33024a1cce3SDavid Greenman 	}
33124a1cce3SDavid Greenman 
33224a1cce3SDavid Greenman 	return (object);
333df8bae1dSRodney W. Grimes }
334df8bae1dSRodney W. Grimes 
33526f9a767SRodney W. Grimes /*
33626f9a767SRodney W. Grimes  * returns disk block associated with pager and offset
33726f9a767SRodney W. Grimes  * additionally, as a side effect returns a flag indicating
33826f9a767SRodney W. Grimes  * if the block has been written
33926f9a767SRodney W. Grimes  */
34026f9a767SRodney W. Grimes 
341a316d390SJohn Dyson inline static daddr_t *
342a316d390SJohn Dyson swap_pager_diskaddr(object, pindex, valid)
34324a1cce3SDavid Greenman 	vm_object_t object;
344a316d390SJohn Dyson 	vm_pindex_t pindex;
34526f9a767SRodney W. Grimes 	int *valid;
34626f9a767SRodney W. Grimes {
34726f9a767SRodney W. Grimes 	register sw_blk_t swb;
34826f9a767SRodney W. Grimes 	int ix;
34926f9a767SRodney W. Grimes 
35026f9a767SRodney W. Grimes 	if (valid)
35126f9a767SRodney W. Grimes 		*valid = 0;
352a316d390SJohn Dyson 	ix = pindex / SWB_NPAGES;
3532a4895f4SDavid Greenman 	if ((ix >= object->un_pager.swp.swp_nblocks) ||
354a316d390SJohn Dyson 	    (pindex >= object->size)) {
35526f9a767SRodney W. Grimes 		return (FALSE);
35626f9a767SRodney W. Grimes 	}
3572a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
358a316d390SJohn Dyson 	ix = pindex % SWB_NPAGES;
35926f9a767SRodney W. Grimes 	if (valid)
36026f9a767SRodney W. Grimes 		*valid = swb->swb_valid & (1 << ix);
36126f9a767SRodney W. Grimes 	return &swb->swb_block[ix];
36226f9a767SRodney W. Grimes }
36326f9a767SRodney W. Grimes 
36426f9a767SRodney W. Grimes /*
36526f9a767SRodney W. Grimes  * Utility routine to set the valid (written) bit for
36626f9a767SRodney W. Grimes  * a block associated with a pager and offset
36726f9a767SRodney W. Grimes  */
368df8bae1dSRodney W. Grimes static void
3692a4895f4SDavid Greenman swap_pager_setvalid(object, offset, valid)
3702a4895f4SDavid Greenman 	vm_object_t object;
37126f9a767SRodney W. Grimes 	vm_offset_t offset;
37226f9a767SRodney W. Grimes 	int valid;
37326f9a767SRodney W. Grimes {
37426f9a767SRodney W. Grimes 	register sw_blk_t swb;
37526f9a767SRodney W. Grimes 	int ix;
37626f9a767SRodney W. Grimes 
377a316d390SJohn Dyson 	ix = offset / SWB_NPAGES;
3782a4895f4SDavid Greenman 	if (ix >= object->un_pager.swp.swp_nblocks)
37926f9a767SRodney W. Grimes 		return;
38026f9a767SRodney W. Grimes 
3812a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
382a316d390SJohn Dyson 	ix = offset % SWB_NPAGES;
38326f9a767SRodney W. Grimes 	if (valid)
38426f9a767SRodney W. Grimes 		swb->swb_valid |= (1 << ix);
38526f9a767SRodney W. Grimes 	else
38626f9a767SRodney W. Grimes 		swb->swb_valid &= ~(1 << ix);
38726f9a767SRodney W. Grimes 	return;
38826f9a767SRodney W. Grimes }
38926f9a767SRodney W. Grimes 
39026f9a767SRodney W. Grimes /*
39126f9a767SRodney W. Grimes  * this routine allocates swap space with a fragmentation
39226f9a767SRodney W. Grimes  * minimization policy.
39326f9a767SRodney W. Grimes  */
394f5a12711SPoul-Henning Kamp static int
3952a4895f4SDavid Greenman swap_pager_getswapspace(object, amount, rtval)
3962a4895f4SDavid Greenman 	vm_object_t object;
3972a4895f4SDavid Greenman 	unsigned int amount;
398a316d390SJohn Dyson 	daddr_t *rtval;
3990d94caffSDavid Greenman {
400a316d390SJohn Dyson 	unsigned location;
401b44e4b7aSJohn Dyson 
40224ea4a96SDavid Greenman 	vm_swap_size -= amount;
403b44e4b7aSJohn Dyson 	if (!suggest_more_swap && (vm_swap_size < btodb(cnt.v_page_count * PAGE_SIZE))) {
404b44e4b7aSJohn Dyson 		printf("swap_pager: suggest more swap space: %d MB\n",
405b44e4b7aSJohn Dyson 			(2 * cnt.v_page_count * (PAGE_SIZE / 1024)) / 1000);
406b44e4b7aSJohn Dyson 		suggest_more_swap = 1;
407b44e4b7aSJohn Dyson 	}
408b44e4b7aSJohn Dyson 
409a316d390SJohn Dyson 	if (!rlist_alloc(&swaplist, amount, &location)) {
41024ea4a96SDavid Greenman 		vm_swap_size += amount;
41126f9a767SRodney W. Grimes 		return 0;
41224ea4a96SDavid Greenman 	} else {
41324ea4a96SDavid Greenman 		swapsizecheck();
4142a4895f4SDavid Greenman 		object->un_pager.swp.swp_allocsize += amount;
415a316d390SJohn Dyson 		*rtval = location;
41626f9a767SRodney W. Grimes 		return 1;
41726f9a767SRodney W. Grimes 	}
41826f9a767SRodney W. Grimes }
41926f9a767SRodney W. Grimes 
42026f9a767SRodney W. Grimes /*
42126f9a767SRodney W. Grimes  * this routine frees swap space with a fragmentation
42226f9a767SRodney W. Grimes  * minimization policy.
42326f9a767SRodney W. Grimes  */
424f5a12711SPoul-Henning Kamp static void
4252a4895f4SDavid Greenman swap_pager_freeswapspace(object, from, to)
4262a4895f4SDavid Greenman 	vm_object_t object;
4272a4895f4SDavid Greenman 	unsigned int from;
4282a4895f4SDavid Greenman 	unsigned int to;
4290d94caffSDavid Greenman {
43035c10d22SDavid Greenman 	rlist_free(&swaplist, from, to);
43124ea4a96SDavid Greenman 	vm_swap_size += (to - from) + 1;
4322a4895f4SDavid Greenman 	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
43324ea4a96SDavid Greenman 	swapsizecheck();
43426f9a767SRodney W. Grimes }
43526f9a767SRodney W. Grimes /*
43626f9a767SRodney W. Grimes  * this routine frees swap blocks from a specified pager
43726f9a767SRodney W. Grimes  */
43826f9a767SRodney W. Grimes void
43924a1cce3SDavid Greenman swap_pager_freespace(object, start, size)
44024a1cce3SDavid Greenman 	vm_object_t object;
441a316d390SJohn Dyson 	vm_pindex_t start;
442a316d390SJohn Dyson 	vm_size_t size;
44326f9a767SRodney W. Grimes {
444a316d390SJohn Dyson 	vm_pindex_t i;
44526f9a767SRodney W. Grimes 	int s;
44626f9a767SRodney W. Grimes 
447e47ed70bSJohn Dyson 	s = splvm();
448a316d390SJohn Dyson 	for (i = start; i < start + size; i += 1) {
44926f9a767SRodney W. Grimes 		int valid;
450a316d390SJohn Dyson 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
4510d94caffSDavid Greenman 
45226f9a767SRodney W. Grimes 		if (addr && *addr != SWB_EMPTY) {
4532a4895f4SDavid Greenman 			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
45426f9a767SRodney W. Grimes 			if (valid) {
4552a4895f4SDavid Greenman 				swap_pager_setvalid(object, i, 0);
45626f9a767SRodney W. Grimes 			}
45726f9a767SRodney W. Grimes 			*addr = SWB_EMPTY;
45826f9a767SRodney W. Grimes 		}
45926f9a767SRodney W. Grimes 	}
46026f9a767SRodney W. Grimes 	splx(s);
46126f9a767SRodney W. Grimes }
46226f9a767SRodney W. Grimes 
4630a47b48bSJohn Dyson /*
4640a47b48bSJohn Dyson  * same as freespace, but don't free, just force a DMZ next time
4650a47b48bSJohn Dyson  */
4660a47b48bSJohn Dyson void
4670a47b48bSJohn Dyson swap_pager_dmzspace(object, start, size)
4680a47b48bSJohn Dyson 	vm_object_t object;
4690a47b48bSJohn Dyson 	vm_pindex_t start;
4700a47b48bSJohn Dyson 	vm_size_t size;
4710a47b48bSJohn Dyson {
4720a47b48bSJohn Dyson 	vm_pindex_t i;
4730a47b48bSJohn Dyson 	int s;
4740a47b48bSJohn Dyson 
475e47ed70bSJohn Dyson 	s = splvm();
4760a47b48bSJohn Dyson 	for (i = start; i < start + size; i += 1) {
4770a47b48bSJohn Dyson 		int valid;
4780a47b48bSJohn Dyson 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
4790a47b48bSJohn Dyson 
4800a47b48bSJohn Dyson 		if (addr && *addr != SWB_EMPTY) {
4810a47b48bSJohn Dyson 			if (valid) {
4820a47b48bSJohn Dyson 				swap_pager_setvalid(object, i, 0);
4830a47b48bSJohn Dyson 			}
4840a47b48bSJohn Dyson 		}
4850a47b48bSJohn Dyson 	}
4860a47b48bSJohn Dyson 	splx(s);
4870a47b48bSJohn Dyson }
4880a47b48bSJohn Dyson 
489a1f6d91cSDavid Greenman static void
4902a4895f4SDavid Greenman swap_pager_free_swap(object)
4912a4895f4SDavid Greenman 	vm_object_t object;
492a1f6d91cSDavid Greenman {
493a1f6d91cSDavid Greenman 	register int i, j;
4942a4895f4SDavid Greenman 	register sw_blk_t swb;
495a1f6d91cSDavid Greenman 	int first_block=0, block_count=0;
496a1f6d91cSDavid Greenman 	int s;
497a1f6d91cSDavid Greenman 	/*
498a1f6d91cSDavid Greenman 	 * Free left over swap blocks
499a1f6d91cSDavid Greenman 	 */
50047221757SJohn Dyson 	swb = object->un_pager.swp.swp_blocks;
5012d8acc0fSJohn Dyson 	if (swb == NULL) {
50247221757SJohn Dyson 		return;
5032d8acc0fSJohn Dyson 	}
50447221757SJohn Dyson 
5052d8acc0fSJohn Dyson 	s = splvm();
50647221757SJohn Dyson 	for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
507a1f6d91cSDavid Greenman 		for (j = 0; j < SWB_NPAGES; j++) {
5082a4895f4SDavid Greenman 			if (swb->swb_block[j] != SWB_EMPTY) {
509a1f6d91cSDavid Greenman 				/*
510a1f6d91cSDavid Greenman    				 * initially the length of the run is zero
511a1f6d91cSDavid Greenman    				 */
512a1f6d91cSDavid Greenman 				if (block_count == 0) {
5132a4895f4SDavid Greenman 					first_block = swb->swb_block[j];
514a1f6d91cSDavid Greenman 					block_count = btodb(PAGE_SIZE);
5152a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
516a1f6d91cSDavid Greenman 				/*
517a1f6d91cSDavid Greenman    				 * if the new block can be included into the current run
518a1f6d91cSDavid Greenman    				 */
5192a4895f4SDavid Greenman 				} else if (swb->swb_block[j] == first_block + block_count) {
520a1f6d91cSDavid Greenman 					block_count += btodb(PAGE_SIZE);
5212a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
522a1f6d91cSDavid Greenman 				/*
523a1f6d91cSDavid Greenman    				 * terminate the previous run, and start a new one
524a1f6d91cSDavid Greenman    				 */
525a1f6d91cSDavid Greenman 				} else {
5262a4895f4SDavid Greenman 					swap_pager_freeswapspace(object, first_block,
527a1f6d91cSDavid Greenman    					(unsigned) first_block + block_count - 1);
5282a4895f4SDavid Greenman 					first_block = swb->swb_block[j];
529a1f6d91cSDavid Greenman 					block_count = btodb(PAGE_SIZE);
5302a4895f4SDavid Greenman 					swb->swb_block[j] = SWB_EMPTY;
531a1f6d91cSDavid Greenman 				}
532a1f6d91cSDavid Greenman 			}
533a1f6d91cSDavid Greenman 		}
534a1f6d91cSDavid Greenman 	}
535a1f6d91cSDavid Greenman 
536a1f6d91cSDavid Greenman 	if (block_count) {
5372a4895f4SDavid Greenman 		swap_pager_freeswapspace(object, first_block,
538a1f6d91cSDavid Greenman 		   	 (unsigned) first_block + block_count - 1);
539a1f6d91cSDavid Greenman 	}
540a1f6d91cSDavid Greenman 	splx(s);
541a1f6d91cSDavid Greenman }
542a1f6d91cSDavid Greenman 
543a1f6d91cSDavid Greenman 
54426f9a767SRodney W. Grimes /*
54526f9a767SRodney W. Grimes  * swap_pager_reclaim frees up over-allocated space from all pagers
54626f9a767SRodney W. Grimes  * this eliminates internal fragmentation due to allocation of space
54726f9a767SRodney W. Grimes  * for segments that are never swapped to. It has been written so that
54826f9a767SRodney W. Grimes  * it does not block until the rlist_free operation occurs; it keeps
54926f9a767SRodney W. Grimes  * the queues consistant.
55026f9a767SRodney W. Grimes  */
55126f9a767SRodney W. Grimes 
55226f9a767SRodney W. Grimes /*
55326f9a767SRodney W. Grimes  * Maximum number of blocks (pages) to reclaim per pass
55426f9a767SRodney W. Grimes  */
555a1f6d91cSDavid Greenman #define MAXRECLAIM 128
55626f9a767SRodney W. Grimes 
557f5a12711SPoul-Henning Kamp static void
55826f9a767SRodney W. Grimes swap_pager_reclaim()
55926f9a767SRodney W. Grimes {
56024a1cce3SDavid Greenman 	vm_object_t object;
56126f9a767SRodney W. Grimes 	int i, j, k;
56226f9a767SRodney W. Grimes 	int s;
56326f9a767SRodney W. Grimes 	int reclaimcount;
564a1f6d91cSDavid Greenman 	static struct {
565a1f6d91cSDavid Greenman 		int address;
5662a4895f4SDavid Greenman 		vm_object_t object;
567a1f6d91cSDavid Greenman 	} reclaims[MAXRECLAIM];
56826f9a767SRodney W. Grimes 	static int in_reclaim;
56926f9a767SRodney W. Grimes 
57026f9a767SRodney W. Grimes 	/*
57126f9a767SRodney W. Grimes 	 * allow only one process to be in the swap_pager_reclaim subroutine
57226f9a767SRodney W. Grimes 	 */
573e47ed70bSJohn Dyson 	s = splvm();
57426f9a767SRodney W. Grimes 	if (in_reclaim) {
57524a1cce3SDavid Greenman 		tsleep(&in_reclaim, PSWP, "swrclm", 0);
57626f9a767SRodney W. Grimes 		splx(s);
57726f9a767SRodney W. Grimes 		return;
57826f9a767SRodney W. Grimes 	}
57926f9a767SRodney W. Grimes 	in_reclaim = 1;
58026f9a767SRodney W. Grimes 	reclaimcount = 0;
58126f9a767SRodney W. Grimes 
58226f9a767SRodney W. Grimes 	/* for each pager queue */
58326f9a767SRodney W. Grimes 	for (k = 0; swp_qs[k]; k++) {
58426f9a767SRodney W. Grimes 
585b18bfc3dSJohn Dyson 		object = TAILQ_FIRST(swp_qs[k]);
58624a1cce3SDavid Greenman 		while (object && (reclaimcount < MAXRECLAIM)) {
58726f9a767SRodney W. Grimes 
58826f9a767SRodney W. Grimes 			/*
58926f9a767SRodney W. Grimes 			 * see if any blocks associated with a pager has been
59026f9a767SRodney W. Grimes 			 * allocated but not used (written)
59126f9a767SRodney W. Grimes 			 */
5925070c7f8SJohn Dyson 			if ((object->flags & OBJ_DEAD) == 0 &&
5935070c7f8SJohn Dyson 				(object->paging_in_progress == 0)) {
5942a4895f4SDavid Greenman 				for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
5952a4895f4SDavid Greenman 					sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
5960d94caffSDavid Greenman 
59726f9a767SRodney W. Grimes 					if (swb->swb_locked)
59826f9a767SRodney W. Grimes 						continue;
59926f9a767SRodney W. Grimes 					for (j = 0; j < SWB_NPAGES; j++) {
60026f9a767SRodney W. Grimes 						if (swb->swb_block[j] != SWB_EMPTY &&
60126f9a767SRodney W. Grimes 						    (swb->swb_valid & (1 << j)) == 0) {
602a1f6d91cSDavid Greenman 							reclaims[reclaimcount].address = swb->swb_block[j];
6032a4895f4SDavid Greenman 							reclaims[reclaimcount++].object = object;
60426f9a767SRodney W. Grimes 							swb->swb_block[j] = SWB_EMPTY;
60526f9a767SRodney W. Grimes 							if (reclaimcount >= MAXRECLAIM)
60626f9a767SRodney W. Grimes 								goto rfinished;
60726f9a767SRodney W. Grimes 						}
60826f9a767SRodney W. Grimes 					}
60926f9a767SRodney W. Grimes 				}
610a316d390SJohn Dyson 			}
611b18bfc3dSJohn Dyson 			object = TAILQ_NEXT(object, pager_object_list);
61226f9a767SRodney W. Grimes 		}
61326f9a767SRodney W. Grimes 	}
61426f9a767SRodney W. Grimes 
61526f9a767SRodney W. Grimes rfinished:
61626f9a767SRodney W. Grimes 
61726f9a767SRodney W. Grimes 	/*
61826f9a767SRodney W. Grimes 	 * free the blocks that have been added to the reclaim list
61926f9a767SRodney W. Grimes 	 */
62026f9a767SRodney W. Grimes 	for (i = 0; i < reclaimcount; i++) {
6212a4895f4SDavid Greenman 		swap_pager_freeswapspace(reclaims[i].object,
6222a4895f4SDavid Greenman 		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
62326f9a767SRodney W. Grimes 	}
62426f9a767SRodney W. Grimes 	splx(s);
62526f9a767SRodney W. Grimes 	in_reclaim = 0;
62624a1cce3SDavid Greenman 	wakeup(&in_reclaim);
62726f9a767SRodney W. Grimes }
62826f9a767SRodney W. Grimes 
62926f9a767SRodney W. Grimes 
63026f9a767SRodney W. Grimes /*
63126f9a767SRodney W. Grimes  * swap_pager_copy copies blocks from one pager to another and
63226f9a767SRodney W. Grimes  * destroys the source pager
63326f9a767SRodney W. Grimes  */
63426f9a767SRodney W. Grimes 
63526f9a767SRodney W. Grimes void
63624a1cce3SDavid Greenman swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
63724a1cce3SDavid Greenman 	vm_object_t srcobject;
638a316d390SJohn Dyson 	vm_pindex_t srcoffset;
63924a1cce3SDavid Greenman 	vm_object_t dstobject;
640a316d390SJohn Dyson 	vm_pindex_t dstoffset;
641a316d390SJohn Dyson 	vm_pindex_t offset;
64226f9a767SRodney W. Grimes {
643a316d390SJohn Dyson 	vm_pindex_t i;
644a1f6d91cSDavid Greenman 	int origsize;
64526f9a767SRodney W. Grimes 	int s;
64626f9a767SRodney W. Grimes 
64724ea4a96SDavid Greenman 	if (vm_swap_size)
64824ea4a96SDavid Greenman 		no_swap_space = 0;
64924ea4a96SDavid Greenman 
6502a4895f4SDavid Greenman 	origsize = srcobject->un_pager.swp.swp_allocsize;
65126f9a767SRodney W. Grimes 
65226f9a767SRodney W. Grimes 	/*
65324a1cce3SDavid Greenman 	 * remove the source object from the swap_pager internal queue
65426f9a767SRodney W. Grimes 	 */
65524a1cce3SDavid Greenman 	if (srcobject->handle == NULL) {
65624a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
65726f9a767SRodney W. Grimes 	} else {
65824a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
65926f9a767SRodney W. Grimes 	}
66026f9a767SRodney W. Grimes 
661e47ed70bSJohn Dyson 	s = splvm();
6622a4895f4SDavid Greenman 	while (srcobject->un_pager.swp.swp_poip) {
6632a4895f4SDavid Greenman 		tsleep(srcobject, PVM, "spgout", 0);
66426f9a767SRodney W. Grimes 	}
66526f9a767SRodney W. Grimes 
66626f9a767SRodney W. Grimes 	/*
66726f9a767SRodney W. Grimes 	 * clean all of the pages that are currently active and finished
66826f9a767SRodney W. Grimes 	 */
669e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
67024a1cce3SDavid Greenman 		swap_pager_sync();
67126f9a767SRodney W. Grimes 
67226f9a767SRodney W. Grimes 	/*
67326f9a767SRodney W. Grimes 	 * transfer source to destination
67426f9a767SRodney W. Grimes 	 */
675a316d390SJohn Dyson 	for (i = 0; i < dstobject->size; i += 1) {
67626f9a767SRodney W. Grimes 		int srcvalid, dstvalid;
677a316d390SJohn Dyson 		daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
67826f9a767SRodney W. Grimes 						    &srcvalid);
679a316d390SJohn Dyson 		daddr_t *dstaddrp;
6800d94caffSDavid Greenman 
68126f9a767SRodney W. Grimes 		/*
68226f9a767SRodney W. Grimes 		 * see if the source has space allocated
68326f9a767SRodney W. Grimes 		 */
68426f9a767SRodney W. Grimes 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
68526f9a767SRodney W. Grimes 			/*
6860d94caffSDavid Greenman 			 * if the source is valid and the dest has no space,
6870d94caffSDavid Greenman 			 * then copy the allocation from the srouce to the
6880d94caffSDavid Greenman 			 * dest.
68926f9a767SRodney W. Grimes 			 */
69026f9a767SRodney W. Grimes 			if (srcvalid) {
69124a1cce3SDavid Greenman 				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
692a1f6d91cSDavid Greenman 							&dstvalid);
69326f9a767SRodney W. Grimes 				/*
6940d94caffSDavid Greenman 				 * if the dest already has a valid block,
6950d94caffSDavid Greenman 				 * deallocate the source block without
6960d94caffSDavid Greenman 				 * copying.
69726f9a767SRodney W. Grimes 				 */
69826f9a767SRodney W. Grimes 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
6992a4895f4SDavid Greenman 					swap_pager_freeswapspace(dstobject, *dstaddrp,
700a1f6d91cSDavid Greenman 						*dstaddrp + btodb(PAGE_SIZE) - 1);
70126f9a767SRodney W. Grimes 					*dstaddrp = SWB_EMPTY;
70226f9a767SRodney W. Grimes 				}
70326f9a767SRodney W. Grimes 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
70426f9a767SRodney W. Grimes 					*dstaddrp = *srcaddrp;
70526f9a767SRodney W. Grimes 					*srcaddrp = SWB_EMPTY;
7062a4895f4SDavid Greenman 					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
7072a4895f4SDavid Greenman 					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
7082a4895f4SDavid Greenman 					swap_pager_setvalid(dstobject, i + dstoffset, 1);
70926f9a767SRodney W. Grimes 				}
71026f9a767SRodney W. Grimes 			}
71126f9a767SRodney W. Grimes 			/*
7120d94caffSDavid Greenman 			 * if the source is not empty at this point, then
7130d94caffSDavid Greenman 			 * deallocate the space.
71426f9a767SRodney W. Grimes 			 */
71526f9a767SRodney W. Grimes 			if (*srcaddrp != SWB_EMPTY) {
7162a4895f4SDavid Greenman 				swap_pager_freeswapspace(srcobject, *srcaddrp,
717a1f6d91cSDavid Greenman 					*srcaddrp + btodb(PAGE_SIZE) - 1);
71826f9a767SRodney W. Grimes 				*srcaddrp = SWB_EMPTY;
71926f9a767SRodney W. Grimes 			}
72026f9a767SRodney W. Grimes 		}
72126f9a767SRodney W. Grimes 	}
72226f9a767SRodney W. Grimes 	splx(s);
72326f9a767SRodney W. Grimes 
724a1f6d91cSDavid Greenman 	/*
725a1f6d91cSDavid Greenman 	 * Free left over swap blocks
726a1f6d91cSDavid Greenman 	 */
7272a4895f4SDavid Greenman 	swap_pager_free_swap(srcobject);
728a1f6d91cSDavid Greenman 
7292a4895f4SDavid Greenman 	if (srcobject->un_pager.swp.swp_allocsize) {
7302a4895f4SDavid Greenman 		printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
7312a4895f4SDavid Greenman 		    srcobject->un_pager.swp.swp_allocsize, origsize);
7322a4895f4SDavid Greenman 	}
7332a4895f4SDavid Greenman 
7342a4895f4SDavid Greenman 	free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
7352a4895f4SDavid Greenman 	srcobject->un_pager.swp.swp_blocks = NULL;
73626f9a767SRodney W. Grimes 
73726f9a767SRodney W. Grimes 	return;
73826f9a767SRodney W. Grimes }
73926f9a767SRodney W. Grimes 
740f5a12711SPoul-Henning Kamp static void
74124a1cce3SDavid Greenman swap_pager_dealloc(object)
74224a1cce3SDavid Greenman 	vm_object_t object;
743df8bae1dSRodney W. Grimes {
744df8bae1dSRodney W. Grimes 	int s;
74547221757SJohn Dyson 	sw_blk_t swb;
746df8bae1dSRodney W. Grimes 
747df8bae1dSRodney W. Grimes 	/*
7480d94caffSDavid Greenman 	 * Remove from list right away so lookups will fail if we block for
7490d94caffSDavid Greenman 	 * pageout completion.
750df8bae1dSRodney W. Grimes 	 */
75124a1cce3SDavid Greenman 	if (object->handle == NULL) {
75224a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
75326f9a767SRodney W. Grimes 	} else {
75424a1cce3SDavid Greenman 		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
755df8bae1dSRodney W. Grimes 	}
75624a1cce3SDavid Greenman 
757df8bae1dSRodney W. Grimes 	/*
7580d94caffSDavid Greenman 	 * Wait for all pageouts to finish and remove all entries from
7590d94caffSDavid Greenman 	 * cleaning list.
760df8bae1dSRodney W. Grimes 	 */
76126f9a767SRodney W. Grimes 
762e47ed70bSJohn Dyson 	s = splvm();
7632a4895f4SDavid Greenman 	while (object->un_pager.swp.swp_poip) {
7642a4895f4SDavid Greenman 		tsleep(object, PVM, "swpout", 0);
765df8bae1dSRodney W. Grimes 	}
766df8bae1dSRodney W. Grimes 	splx(s);
76726f9a767SRodney W. Grimes 
768e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
76924a1cce3SDavid Greenman 		swap_pager_sync();
770df8bae1dSRodney W. Grimes 
771df8bae1dSRodney W. Grimes 	/*
772df8bae1dSRodney W. Grimes 	 * Free left over swap blocks
773df8bae1dSRodney W. Grimes 	 */
7742a4895f4SDavid Greenman 	swap_pager_free_swap(object);
77526f9a767SRodney W. Grimes 
7762a4895f4SDavid Greenman 	if (object->un_pager.swp.swp_allocsize) {
7772a4895f4SDavid Greenman 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
7782a4895f4SDavid Greenman 		    object->un_pager.swp.swp_allocsize);
7792a4895f4SDavid Greenman 	}
78047221757SJohn Dyson 	swb = object->un_pager.swp.swp_blocks;
78147221757SJohn Dyson 	if (swb) {
782df8bae1dSRodney W. Grimes 		/*
783df8bae1dSRodney W. Grimes    		* Free swap management resources
784df8bae1dSRodney W. Grimes    		*/
78547221757SJohn Dyson 		free(swb, M_VMPGDATA);
7862a4895f4SDavid Greenman 		object->un_pager.swp.swp_blocks = NULL;
78726f9a767SRodney W. Grimes 	}
78847221757SJohn Dyson }
78926f9a767SRodney W. Grimes 
7908ba0c490SBruce Evans static inline int
791a316d390SJohn Dyson swap_pager_block_index(pindex)
792a316d390SJohn Dyson 	vm_pindex_t pindex;
79326f9a767SRodney W. Grimes {
794a316d390SJohn Dyson 	return (pindex / SWB_NPAGES);
79526f9a767SRodney W. Grimes }
79626f9a767SRodney W. Grimes 
7978ba0c490SBruce Evans static inline int
798a316d390SJohn Dyson swap_pager_block_offset(pindex)
799a316d390SJohn Dyson 	vm_pindex_t pindex;
80026f9a767SRodney W. Grimes {
801a316d390SJohn Dyson 	return (pindex % SWB_NPAGES);
80226f9a767SRodney W. Grimes }
80326f9a767SRodney W. Grimes 
80426f9a767SRodney W. Grimes /*
80524a1cce3SDavid Greenman  * swap_pager_haspage returns TRUE if the pager has data that has
80626f9a767SRodney W. Grimes  * been written out.
80726f9a767SRodney W. Grimes  */
808f5a12711SPoul-Henning Kamp static boolean_t
809a316d390SJohn Dyson swap_pager_haspage(object, pindex, before, after)
81024a1cce3SDavid Greenman 	vm_object_t object;
811a316d390SJohn Dyson 	vm_pindex_t pindex;
81224a1cce3SDavid Greenman 	int *before;
81324a1cce3SDavid Greenman 	int *after;
81426f9a767SRodney W. Grimes {
81526f9a767SRodney W. Grimes 	register sw_blk_t swb;
81626f9a767SRodney W. Grimes 	int ix;
81726f9a767SRodney W. Grimes 
81824a1cce3SDavid Greenman 	if (before != NULL)
81924a1cce3SDavid Greenman 		*before = 0;
82024a1cce3SDavid Greenman 	if (after != NULL)
82124a1cce3SDavid Greenman 		*after = 0;
822a316d390SJohn Dyson 	ix = pindex / SWB_NPAGES;
8232a4895f4SDavid Greenman 	if (ix >= object->un_pager.swp.swp_nblocks) {
82426f9a767SRodney W. Grimes 		return (FALSE);
82526f9a767SRodney W. Grimes 	}
8262a4895f4SDavid Greenman 	swb = &object->un_pager.swp.swp_blocks[ix];
827a316d390SJohn Dyson 	ix = pindex % SWB_NPAGES;
828170db9c6SJohn Dyson 
82926f9a767SRodney W. Grimes 	if (swb->swb_block[ix] != SWB_EMPTY) {
830170db9c6SJohn Dyson 
831170db9c6SJohn Dyson 		if (swb->swb_valid & (1 << ix)) {
832170db9c6SJohn Dyson 			int tix;
833170db9c6SJohn Dyson 			if (before) {
834170db9c6SJohn Dyson 				for(tix = ix - 1; tix >= 0; --tix) {
8352f82e604SDavid Greenman 					if ((swb->swb_valid & (1 << tix)) == 0)
8362f82e604SDavid Greenman 						break;
837ca56715fSJohn Dyson 					if ((swb->swb_block[tix] +
838170db9c6SJohn Dyson 						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
839170db9c6SJohn Dyson 						swb->swb_block[ix])
840170db9c6SJohn Dyson 						break;
841170db9c6SJohn Dyson 					(*before)++;
842170db9c6SJohn Dyson 				}
843170db9c6SJohn Dyson 			}
844170db9c6SJohn Dyson 
845170db9c6SJohn Dyson 			if (after) {
846170db9c6SJohn Dyson 				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
8472f82e604SDavid Greenman 					if ((swb->swb_valid & (1 << tix)) == 0)
8482f82e604SDavid Greenman 						break;
849ca56715fSJohn Dyson 					if ((swb->swb_block[tix] -
850170db9c6SJohn Dyson 						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
851170db9c6SJohn Dyson 						swb->swb_block[ix])
852170db9c6SJohn Dyson 						break;
853170db9c6SJohn Dyson 					(*after)++;
854170db9c6SJohn Dyson 				}
855170db9c6SJohn Dyson 			}
856170db9c6SJohn Dyson 
85726f9a767SRodney W. Grimes 			return TRUE;
85826f9a767SRodney W. Grimes 		}
859170db9c6SJohn Dyson 	}
86026f9a767SRodney W. Grimes 	return (FALSE);
86126f9a767SRodney W. Grimes }
86226f9a767SRodney W. Grimes 
86326f9a767SRodney W. Grimes /*
86426f9a767SRodney W. Grimes  * swap_pager_freepage is a convienience routine that clears the busy
86526f9a767SRodney W. Grimes  * bit and deallocates a page.
866df8bae1dSRodney W. Grimes  */
86726f9a767SRodney W. Grimes static void
86826f9a767SRodney W. Grimes swap_pager_freepage(m)
86926f9a767SRodney W. Grimes 	vm_page_t m;
87026f9a767SRodney W. Grimes {
87126f9a767SRodney W. Grimes 	vm_page_free(m);
87226f9a767SRodney W. Grimes }
87326f9a767SRodney W. Grimes 
87426f9a767SRodney W. Grimes /*
875e47ed70bSJohn Dyson  * Wakeup based upon spc state
876e47ed70bSJohn Dyson  */
877e47ed70bSJohn Dyson static void
878e47ed70bSJohn Dyson spc_wakeup(void)
879e47ed70bSJohn Dyson {
880e47ed70bSJohn Dyson 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
881e47ed70bSJohn Dyson 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
882e47ed70bSJohn Dyson 		wakeup(&swap_pager_needflags);
883e47ed70bSJohn Dyson 	} else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
884e47ed70bSJohn Dyson 		swap_pager_free_count >= ((2 * npendingio) / 3)) {
885e47ed70bSJohn Dyson 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
886e47ed70bSJohn Dyson 		wakeup(&swap_pager_free);
887e47ed70bSJohn Dyson 	}
888e47ed70bSJohn Dyson }
889e47ed70bSJohn Dyson 
890e47ed70bSJohn Dyson /*
891e47ed70bSJohn Dyson  * Free an spc structure
892e47ed70bSJohn Dyson  */
893e47ed70bSJohn Dyson static void
894e47ed70bSJohn Dyson spc_free(spc)
895e47ed70bSJohn Dyson 	swp_clean_t spc;
896e47ed70bSJohn Dyson {
897e47ed70bSJohn Dyson 	spc->spc_flags = 0;
898e47ed70bSJohn Dyson 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
899e47ed70bSJohn Dyson 	swap_pager_free_count++;
900e47ed70bSJohn Dyson 	if (swap_pager_needflags) {
901e47ed70bSJohn Dyson 		spc_wakeup();
902e47ed70bSJohn Dyson 	}
903e47ed70bSJohn Dyson }
904e47ed70bSJohn Dyson 
905e47ed70bSJohn Dyson /*
90626f9a767SRodney W. Grimes  * swap_pager_ridpages is a convienience routine that deallocates all
90726f9a767SRodney W. Grimes  * but the required page.  this is usually used in error returns that
90826f9a767SRodney W. Grimes  * need to invalidate the "extra" readahead pages.
90926f9a767SRodney W. Grimes  */
91026f9a767SRodney W. Grimes static void
91126f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage)
91226f9a767SRodney W. Grimes 	vm_page_t *m;
91326f9a767SRodney W. Grimes 	int count;
91426f9a767SRodney W. Grimes 	int reqpage;
91526f9a767SRodney W. Grimes {
91626f9a767SRodney W. Grimes 	int i;
9170d94caffSDavid Greenman 
91826f9a767SRodney W. Grimes 	for (i = 0; i < count; i++)
91926f9a767SRodney W. Grimes 		if (i != reqpage)
92026f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
92126f9a767SRodney W. Grimes }
92226f9a767SRodney W. Grimes 
92326f9a767SRodney W. Grimes /*
92426f9a767SRodney W. Grimes  * swap_pager_iodone1 is the completion routine for both reads and async writes
92526f9a767SRodney W. Grimes  */
926f5a12711SPoul-Henning Kamp static void
92726f9a767SRodney W. Grimes swap_pager_iodone1(bp)
92826f9a767SRodney W. Grimes 	struct buf *bp;
92926f9a767SRodney W. Grimes {
93026f9a767SRodney W. Grimes 	bp->b_flags |= B_DONE;
93126f9a767SRodney W. Grimes 	bp->b_flags &= ~B_ASYNC;
93224a1cce3SDavid Greenman 	wakeup(bp);
93326f9a767SRodney W. Grimes }
93426f9a767SRodney W. Grimes 
935f708ef1bSPoul-Henning Kamp static int
93624a1cce3SDavid Greenman swap_pager_getpages(object, m, count, reqpage)
93724a1cce3SDavid Greenman 	vm_object_t object;
93826f9a767SRodney W. Grimes 	vm_page_t *m;
93926f9a767SRodney W. Grimes 	int count, reqpage;
940df8bae1dSRodney W. Grimes {
941df8bae1dSRodney W. Grimes 	register struct buf *bp;
94226f9a767SRodney W. Grimes 	sw_blk_t swb[count];
943df8bae1dSRodney W. Grimes 	register int s;
94426f9a767SRodney W. Grimes 	int i;
945df8bae1dSRodney W. Grimes 	boolean_t rv;
94626f9a767SRodney W. Grimes 	vm_offset_t kva, off[count];
947a316d390SJohn Dyson 	vm_pindex_t paging_offset;
94826f9a767SRodney W. Grimes 	int reqaddr[count];
9496d40c3d3SDavid Greenman 	int sequential;
950df8bae1dSRodney W. Grimes 
95126f9a767SRodney W. Grimes 	int first, last;
95226f9a767SRodney W. Grimes 	int failed;
95326f9a767SRodney W. Grimes 	int reqdskregion;
954df8bae1dSRodney W. Grimes 
95526f9a767SRodney W. Grimes 	object = m[reqpage]->object;
956a316d390SJohn Dyson 	paging_offset = OFF_TO_IDX(object->paging_offset);
957a316d390SJohn Dyson 	sequential = (m[reqpage]->pindex == (object->last_read + 1));
9582a4895f4SDavid Greenman 
95926f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
960a316d390SJohn Dyson 		vm_pindex_t fidx = m[i]->pindex + paging_offset;
961a316d390SJohn Dyson 		int ix = swap_pager_block_index(fidx);
9620d94caffSDavid Greenman 
9632a4895f4SDavid Greenman 		if (ix >= object->un_pager.swp.swp_nblocks) {
96426f9a767SRodney W. Grimes 			int j;
9650d94caffSDavid Greenman 
96626f9a767SRodney W. Grimes 			if (i <= reqpage) {
96726f9a767SRodney W. Grimes 				swap_pager_ridpages(m, count, reqpage);
968df8bae1dSRodney W. Grimes 				return (VM_PAGER_FAIL);
96926f9a767SRodney W. Grimes 			}
97026f9a767SRodney W. Grimes 			for (j = i; j < count; j++) {
97126f9a767SRodney W. Grimes 				swap_pager_freepage(m[j]);
97226f9a767SRodney W. Grimes 			}
97326f9a767SRodney W. Grimes 			count = i;
97426f9a767SRodney W. Grimes 			break;
97526f9a767SRodney W. Grimes 		}
9762a4895f4SDavid Greenman 		swb[i] = &object->un_pager.swp.swp_blocks[ix];
977a316d390SJohn Dyson 		off[i] = swap_pager_block_offset(fidx);
97826f9a767SRodney W. Grimes 		reqaddr[i] = swb[i]->swb_block[off[i]];
97926f9a767SRodney W. Grimes 	}
98026f9a767SRodney W. Grimes 
98126f9a767SRodney W. Grimes 	/* make sure that our required input request is existant */
98226f9a767SRodney W. Grimes 
98326f9a767SRodney W. Grimes 	if (reqaddr[reqpage] == SWB_EMPTY ||
98426f9a767SRodney W. Grimes 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
98526f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
98626f9a767SRodney W. Grimes 		return (VM_PAGER_FAIL);
98726f9a767SRodney W. Grimes 	}
98826f9a767SRodney W. Grimes 	reqdskregion = reqaddr[reqpage] / dmmax;
989df8bae1dSRodney W. Grimes 
990df8bae1dSRodney W. Grimes 	/*
99126f9a767SRodney W. Grimes 	 * search backwards for the first contiguous page to transfer
992df8bae1dSRodney W. Grimes 	 */
99326f9a767SRodney W. Grimes 	failed = 0;
99426f9a767SRodney W. Grimes 	first = 0;
99526f9a767SRodney W. Grimes 	for (i = reqpage - 1; i >= 0; --i) {
9966d40c3d3SDavid Greenman 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
99726f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
99826f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
99926f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
100026f9a767SRodney W. Grimes 			failed = 1;
100126f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
100226f9a767SRodney W. Grimes 			if (first == 0)
100326f9a767SRodney W. Grimes 				first = i + 1;
100426f9a767SRodney W. Grimes 		}
1005df8bae1dSRodney W. Grimes 	}
1006df8bae1dSRodney W. Grimes 	/*
100726f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
1008df8bae1dSRodney W. Grimes 	 */
100926f9a767SRodney W. Grimes 	failed = 0;
101026f9a767SRodney W. Grimes 	last = count;
101126f9a767SRodney W. Grimes 	for (i = reqpage + 1; i < count; i++) {
101226f9a767SRodney W. Grimes 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
101326f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
101426f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
101526f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
101626f9a767SRodney W. Grimes 			failed = 1;
101726f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
101826f9a767SRodney W. Grimes 			if (last == count)
101926f9a767SRodney W. Grimes 				last = i;
102026f9a767SRodney W. Grimes 		}
102126f9a767SRodney W. Grimes 	}
102226f9a767SRodney W. Grimes 
102326f9a767SRodney W. Grimes 	count = last;
102426f9a767SRodney W. Grimes 	if (first != 0) {
102526f9a767SRodney W. Grimes 		for (i = first; i < count; i++) {
102626f9a767SRodney W. Grimes 			m[i - first] = m[i];
102726f9a767SRodney W. Grimes 			reqaddr[i - first] = reqaddr[i];
102826f9a767SRodney W. Grimes 			off[i - first] = off[i];
102926f9a767SRodney W. Grimes 		}
103026f9a767SRodney W. Grimes 		count -= first;
103126f9a767SRodney W. Grimes 		reqpage -= first;
103226f9a767SRodney W. Grimes 	}
103326f9a767SRodney W. Grimes 	++swb[reqpage]->swb_locked;
103426f9a767SRodney W. Grimes 
103526f9a767SRodney W. Grimes 	/*
10360d94caffSDavid Greenman 	 * at this point: "m" is a pointer to the array of vm_page_t for
10370d94caffSDavid Greenman 	 * paging I/O "count" is the number of vm_page_t entries represented
10380d94caffSDavid Greenman 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
10390d94caffSDavid Greenman 	 * into "m" for the page actually faulted
104026f9a767SRodney W. Grimes 	 */
104126f9a767SRodney W. Grimes 
104216f62314SDavid Greenman 	/*
104316f62314SDavid Greenman 	 * Get a swap buffer header to perform the IO
104416f62314SDavid Greenman 	 */
104526f9a767SRodney W. Grimes 	bp = getpbuf();
104616f62314SDavid Greenman 	kva = (vm_offset_t) bp->b_data;
104726f9a767SRodney W. Grimes 
104816f62314SDavid Greenman 	/*
104916f62314SDavid Greenman 	 * map our page(s) into kva for input
105016f62314SDavid Greenman 	 */
105116f62314SDavid Greenman 	pmap_qenter(kva, m, count);
105216f62314SDavid Greenman 
1053aba8f38eSDavid Greenman 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
105426f9a767SRodney W. Grimes 	bp->b_iodone = swap_pager_iodone1;
1055df8bae1dSRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
105626f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
105726f9a767SRodney W. Grimes 	crhold(bp->b_rcred);
105826f9a767SRodney W. Grimes 	crhold(bp->b_wcred);
1059ab3f7469SPoul-Henning Kamp 	bp->b_data = (caddr_t) kva;
106026f9a767SRodney W. Grimes 	bp->b_blkno = reqaddr[0];
106126f9a767SRodney W. Grimes 	bp->b_bcount = PAGE_SIZE * count;
106226f9a767SRodney W. Grimes 	bp->b_bufsize = PAGE_SIZE * count;
106326f9a767SRodney W. Grimes 
10640d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
1065df8bae1dSRodney W. Grimes 
1066976e77fcSDavid Greenman 	cnt.v_swapin++;
1067976e77fcSDavid Greenman 	cnt.v_swappgsin += count;
1068df8bae1dSRodney W. Grimes 	/*
106926f9a767SRodney W. Grimes 	 * perform the I/O
1070df8bae1dSRodney W. Grimes 	 */
1071df8bae1dSRodney W. Grimes 	VOP_STRATEGY(bp);
107226f9a767SRodney W. Grimes 
107326f9a767SRodney W. Grimes 	/*
107426f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
107526f9a767SRodney W. Grimes 	 */
1076e47ed70bSJohn Dyson 	s = splvm();
107726f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
10783091ee09SJohn Dyson 		if (tsleep(bp, PVM, "swread", hz*20)) {
1079dfeca1b8SBruce Evans 			printf("swap_pager: indefinite wait buffer: device: %#x, blkno: %d, size: %d\n",
10803091ee09SJohn Dyson 				bp->b_dev, bp->b_blkno, bp->b_bcount);
10813091ee09SJohn Dyson 		}
1082df8bae1dSRodney W. Grimes 	}
10831b119d9dSDavid Greenman 
10841b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
10851b119d9dSDavid Greenman 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
10861b119d9dSDavid Greenman 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1087a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
10881b119d9dSDavid Greenman 	} else {
10891b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
10901b119d9dSDavid Greenman 	}
109126f9a767SRodney W. Grimes 
1092df8bae1dSRodney W. Grimes 	splx(s);
10932a4895f4SDavid Greenman 	swb[reqpage]->swb_locked--;
109426f9a767SRodney W. Grimes 
109526f9a767SRodney W. Grimes 	/*
109626f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
109726f9a767SRodney W. Grimes 	 */
109816f62314SDavid Greenman 	pmap_qremove(kva, count);
109926f9a767SRodney W. Grimes 
110026f9a767SRodney W. Grimes 	/*
110126f9a767SRodney W. Grimes 	 * release the physical I/O buffer
110226f9a767SRodney W. Grimes 	 */
110326f9a767SRodney W. Grimes 	relpbuf(bp);
110426f9a767SRodney W. Grimes 	/*
110526f9a767SRodney W. Grimes 	 * finish up input if everything is ok
110626f9a767SRodney W. Grimes 	 */
110726f9a767SRodney W. Grimes 	if (rv == VM_PAGER_OK) {
110826f9a767SRodney W. Grimes 		for (i = 0; i < count; i++) {
11090d94caffSDavid Greenman 			m[i]->dirty = 0;
1110894048d7SJohn Dyson 			m[i]->flags &= ~PG_ZERO;
111126f9a767SRodney W. Grimes 			if (i != reqpage) {
111226f9a767SRodney W. Grimes 				/*
11130d94caffSDavid Greenman 				 * whether or not to leave the page
11140d94caffSDavid Greenman 				 * activated is up in the air, but we
11150d94caffSDavid Greenman 				 * should put the page on a page queue
11160d94caffSDavid Greenman 				 * somewhere. (it already is in the
11170d94caffSDavid Greenman 				 * object). After some emperical
11180d94caffSDavid Greenman 				 * results, it is best to deactivate
11190d94caffSDavid Greenman 				 * the readahead pages.
112026f9a767SRodney W. Grimes 				 */
112126f9a767SRodney W. Grimes 				vm_page_deactivate(m[i]);
112226f9a767SRodney W. Grimes 
112326f9a767SRodney W. Grimes 				/*
11240d94caffSDavid Greenman 				 * just in case someone was asking for
11250d94caffSDavid Greenman 				 * this page we now tell them that it
11260d94caffSDavid Greenman 				 * is ok to use
112726f9a767SRodney W. Grimes 				 */
11280d94caffSDavid Greenman 				m[i]->valid = VM_PAGE_BITS_ALL;
112926f9a767SRodney W. Grimes 				PAGE_WAKEUP(m[i]);
113026f9a767SRodney W. Grimes 			}
113126f9a767SRodney W. Grimes 		}
11326d40c3d3SDavid Greenman 
1133a316d390SJohn Dyson 		m[reqpage]->object->last_read = m[count-1]->pindex;
11346d40c3d3SDavid Greenman 
11352e1e24ddSDavid Greenman 		/*
11362e1e24ddSDavid Greenman 		 * If we're out of swap space, then attempt to free
113745952afcSJohn Dyson 		 * some whenever multiple pages are brought in. We
113845952afcSJohn Dyson 		 * must set the dirty bits so that the page contents
113945952afcSJohn Dyson 		 * will be preserved.
11402e1e24ddSDavid Greenman 		 */
1141b44e4b7aSJohn Dyson 		if (SWAPLOW ||
1142b44e4b7aSJohn Dyson 			(vm_swap_size < btodb((cnt.v_page_count - cnt.v_wire_count)) * PAGE_SIZE)) {
11432e1e24ddSDavid Greenman 			for (i = 0; i < count; i++) {
11440d94caffSDavid Greenman 				m[i]->dirty = VM_PAGE_BITS_ALL;
11452e1e24ddSDavid Greenman 			}
1146b44e4b7aSJohn Dyson 			swap_pager_freespace(object,
1147b44e4b7aSJohn Dyson 				m[0]->pindex + paging_offset, count);
114826f9a767SRodney W. Grimes 		}
1149e47ed70bSJohn Dyson 
115026f9a767SRodney W. Grimes 	} else {
115126f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
115226f9a767SRodney W. Grimes 	}
1153df8bae1dSRodney W. Grimes 	return (rv);
1154df8bae1dSRodney W. Grimes }
1155df8bae1dSRodney W. Grimes 
115626f9a767SRodney W. Grimes int
115724a1cce3SDavid Greenman swap_pager_putpages(object, m, count, sync, rtvals)
115824a1cce3SDavid Greenman 	vm_object_t object;
115926f9a767SRodney W. Grimes 	vm_page_t *m;
116026f9a767SRodney W. Grimes 	int count;
116124a1cce3SDavid Greenman 	boolean_t sync;
116226f9a767SRodney W. Grimes 	int *rtvals;
1163df8bae1dSRodney W. Grimes {
116426f9a767SRodney W. Grimes 	register struct buf *bp;
116526f9a767SRodney W. Grimes 	sw_blk_t swb[count];
116626f9a767SRodney W. Grimes 	register int s;
1167e736cd05SJohn Dyson 	int i, j, ix, firstidx, lastidx;
116826f9a767SRodney W. Grimes 	boolean_t rv;
1169a316d390SJohn Dyson 	vm_offset_t kva, off, fidx;
117026f9a767SRodney W. Grimes 	swp_clean_t spc;
1171a316d390SJohn Dyson 	vm_pindex_t paging_pindex;
117226f9a767SRodney W. Grimes 	int reqaddr[count];
117326f9a767SRodney W. Grimes 	int failed;
1174df8bae1dSRodney W. Grimes 
117524ea4a96SDavid Greenman 	if (vm_swap_size)
117624ea4a96SDavid Greenman 		no_swap_space = 0;
1177e736cd05SJohn Dyson 
117824ea4a96SDavid Greenman 	if (no_swap_space) {
11795663e6deSDavid Greenman 		for (i = 0; i < count; i++)
11805663e6deSDavid Greenman 			rtvals[i] = VM_PAGER_FAIL;
11815663e6deSDavid Greenman 		return VM_PAGER_FAIL;
11825663e6deSDavid Greenman 	}
1183e47ed70bSJohn Dyson 
1184e47ed70bSJohn Dyson 	if (curproc != pageproc)
1185e47ed70bSJohn Dyson 		sync = TRUE;
118626f9a767SRodney W. Grimes 
118726f9a767SRodney W. Grimes 	object = m[0]->object;
1188a316d390SJohn Dyson 	paging_pindex = OFF_TO_IDX(object->paging_offset);
118926f9a767SRodney W. Grimes 
119026f9a767SRodney W. Grimes 	failed = 0;
119126f9a767SRodney W. Grimes 	for (j = 0; j < count; j++) {
1192a316d390SJohn Dyson 		fidx = m[j]->pindex + paging_pindex;
1193a316d390SJohn Dyson 		ix = swap_pager_block_index(fidx);
119426f9a767SRodney W. Grimes 		swb[j] = 0;
11952a4895f4SDavid Greenman 		if (ix >= object->un_pager.swp.swp_nblocks) {
119626f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
119726f9a767SRodney W. Grimes 			failed = 1;
119826f9a767SRodney W. Grimes 			continue;
119926f9a767SRodney W. Grimes 		} else {
120026f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_OK;
120126f9a767SRodney W. Grimes 		}
12022a4895f4SDavid Greenman 		swb[j] = &object->un_pager.swp.swp_blocks[ix];
12032a4895f4SDavid Greenman 		swb[j]->swb_locked++;
120426f9a767SRodney W. Grimes 		if (failed) {
120526f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
120626f9a767SRodney W. Grimes 			continue;
120726f9a767SRodney W. Grimes 		}
1208a316d390SJohn Dyson 		off = swap_pager_block_offset(fidx);
120926f9a767SRodney W. Grimes 		reqaddr[j] = swb[j]->swb_block[off];
121026f9a767SRodney W. Grimes 		if (reqaddr[j] == SWB_EMPTY) {
1211a316d390SJohn Dyson 			daddr_t blk;
121226f9a767SRodney W. Grimes 			int tries;
121326f9a767SRodney W. Grimes 			int ntoget;
12140d94caffSDavid Greenman 
121526f9a767SRodney W. Grimes 			tries = 0;
1216e47ed70bSJohn Dyson 			s = splvm();
121726f9a767SRodney W. Grimes 
1218df8bae1dSRodney W. Grimes 			/*
12190d94caffSDavid Greenman 			 * if any other pages have been allocated in this
12200d94caffSDavid Greenman 			 * block, we only try to get one page.
1221df8bae1dSRodney W. Grimes 			 */
122226f9a767SRodney W. Grimes 			for (i = 0; i < SWB_NPAGES; i++) {
122326f9a767SRodney W. Grimes 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1224df8bae1dSRodney W. Grimes 					break;
1225df8bae1dSRodney W. Grimes 			}
122626f9a767SRodney W. Grimes 
122726f9a767SRodney W. Grimes 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
122826f9a767SRodney W. Grimes 			/*
12290d94caffSDavid Greenman 			 * this code is alittle conservative, but works (the
12300d94caffSDavid Greenman 			 * intent of this code is to allocate small chunks for
12310d94caffSDavid Greenman 			 * small objects)
123226f9a767SRodney W. Grimes 			 */
1233a316d390SJohn Dyson 			if ((off == 0) && ((fidx + ntoget) > object->size)) {
1234a316d390SJohn Dyson 				ntoget = object->size - fidx;
123526f9a767SRodney W. Grimes 			}
123626f9a767SRodney W. Grimes 	retrygetspace:
123726f9a767SRodney W. Grimes 			if (!swap_pager_full && ntoget > 1 &&
1238a316d390SJohn Dyson 			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1239a316d390SJohn Dyson 				&blk)) {
124026f9a767SRodney W. Grimes 
124126f9a767SRodney W. Grimes 				for (i = 0; i < ntoget; i++) {
124226f9a767SRodney W. Grimes 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
124326f9a767SRodney W. Grimes 					swb[j]->swb_valid = 0;
124426f9a767SRodney W. Grimes 				}
124526f9a767SRodney W. Grimes 
124626f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
12472a4895f4SDavid Greenman 			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
124826f9a767SRodney W. Grimes 				&swb[j]->swb_block[off])) {
124926f9a767SRodney W. Grimes 				/*
12500d94caffSDavid Greenman 				 * if the allocation has failed, we try to
12510d94caffSDavid Greenman 				 * reclaim space and retry.
125226f9a767SRodney W. Grimes 				 */
125326f9a767SRodney W. Grimes 				if (++tries == 1) {
125426f9a767SRodney W. Grimes 					swap_pager_reclaim();
125526f9a767SRodney W. Grimes 					goto retrygetspace;
125626f9a767SRodney W. Grimes 				}
125726f9a767SRodney W. Grimes 				rtvals[j] = VM_PAGER_AGAIN;
125826f9a767SRodney W. Grimes 				failed = 1;
125924ea4a96SDavid Greenman 				swap_pager_full = 1;
126026f9a767SRodney W. Grimes 			} else {
126126f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
126226f9a767SRodney W. Grimes 				swb[j]->swb_valid &= ~(1 << off);
1263df8bae1dSRodney W. Grimes 			}
1264df8bae1dSRodney W. Grimes 			splx(s);
126526f9a767SRodney W. Grimes 		}
126626f9a767SRodney W. Grimes 	}
126726f9a767SRodney W. Grimes 
126826f9a767SRodney W. Grimes 	/*
126926f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
127026f9a767SRodney W. Grimes 	 */
127126f9a767SRodney W. Grimes 	failed = 0;
127226f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
1273a316d390SJohn Dyson 		if (failed ||
1274a316d390SJohn Dyson 			(reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1275a316d390SJohn Dyson 		    ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
127626f9a767SRodney W. Grimes 		    (rtvals[i] != VM_PAGER_OK)) {
127726f9a767SRodney W. Grimes 			failed = 1;
127826f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK)
127926f9a767SRodney W. Grimes 				rtvals[i] = VM_PAGER_AGAIN;
128026f9a767SRodney W. Grimes 		}
128126f9a767SRodney W. Grimes 	}
128226f9a767SRodney W. Grimes 
1283e736cd05SJohn Dyson 	ix = 0;
1284e736cd05SJohn Dyson 	firstidx = -1;
128526f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
1286e736cd05SJohn Dyson 		if (rtvals[i] == VM_PAGER_OK) {
1287e736cd05SJohn Dyson 			ix++;
1288e736cd05SJohn Dyson 			if (firstidx == -1) {
1289e736cd05SJohn Dyson 				firstidx = i;
129026f9a767SRodney W. Grimes 			}
1291e736cd05SJohn Dyson 		} else if (firstidx >= 0) {
129226f9a767SRodney W. Grimes 			break;
1293e736cd05SJohn Dyson 		}
1294e736cd05SJohn Dyson 	}
129526f9a767SRodney W. Grimes 
1296e736cd05SJohn Dyson 	if (firstidx == -1) {
1297e47ed70bSJohn Dyson 		for (i = 0; i < count; i++) {
1298e47ed70bSJohn Dyson 			if (rtvals[i] == VM_PAGER_OK)
1299e47ed70bSJohn Dyson 				rtvals[i] = VM_PAGER_AGAIN;
1300e736cd05SJohn Dyson 		}
130126f9a767SRodney W. Grimes 		return VM_PAGER_AGAIN;
130226f9a767SRodney W. Grimes 	}
1303e736cd05SJohn Dyson 
1304e736cd05SJohn Dyson 	lastidx = firstidx + ix;
1305e736cd05SJohn Dyson 
1306e47ed70bSJohn Dyson 	if (ix > max_pageout_cluster) {
1307e47ed70bSJohn Dyson 		for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
1308e47ed70bSJohn Dyson 			if (rtvals[i] == VM_PAGER_OK)
1309e47ed70bSJohn Dyson 				rtvals[i] = VM_PAGER_AGAIN;
1310e47ed70bSJohn Dyson 		}
1311e47ed70bSJohn Dyson 		ix = max_pageout_cluster;
1312e47ed70bSJohn Dyson 		lastidx = firstidx + ix;
1313e47ed70bSJohn Dyson 	}
1314e47ed70bSJohn Dyson 
1315e736cd05SJohn Dyson 	for (i = 0; i < firstidx; i++) {
1316e736cd05SJohn Dyson 		if (swb[i])
1317e736cd05SJohn Dyson 			swb[i]->swb_locked--;
1318e736cd05SJohn Dyson 	}
1319e736cd05SJohn Dyson 
1320e736cd05SJohn Dyson 	for (i = lastidx; i < count; i++) {
1321e736cd05SJohn Dyson 		if (swb[i])
1322e736cd05SJohn Dyson 			swb[i]->swb_locked--;
1323e736cd05SJohn Dyson 	}
1324e736cd05SJohn Dyson 
1325e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
1326e736cd05SJohn Dyson 	for (i = firstidx; i < lastidx; i++) {
1327a316d390SJohn Dyson 		if (reqaddr[i] == SWB_EMPTY) {
1328a316d390SJohn Dyson 			printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1329a316d390SJohn Dyson 				m[i]->pindex, i);
1330a316d390SJohn Dyson 		}
133126f9a767SRodney W. Grimes 	}
1332e47ed70bSJohn Dyson #endif
133326f9a767SRodney W. Grimes 
133426f9a767SRodney W. Grimes 	/*
1335e47ed70bSJohn Dyson 	 * Clean up all completed async pageouts.
133626f9a767SRodney W. Grimes 	 */
1337e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
133824a1cce3SDavid Greenman 		swap_pager_sync();
133926f9a767SRodney W. Grimes 
134026f9a767SRodney W. Grimes 	/*
134126f9a767SRodney W. Grimes 	 * get a swap pager clean data structure, block until we get it
134226f9a767SRodney W. Grimes 	 */
13430d94caffSDavid Greenman 	if (curproc == pageproc) {
1344e47ed70bSJohn Dyson 		if (swap_pager_free_count == 0) {
1345e47ed70bSJohn Dyson 			s = splvm();
1346e47ed70bSJohn Dyson 			while (swap_pager_free_count == 0) {
1347e47ed70bSJohn Dyson 				swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1348cb6962cdSJohn Dyson 			/*
1349cb6962cdSJohn Dyson 			 * if it does not get one within a short time, then
1350cb6962cdSJohn Dyson 			 * there is a potential deadlock, so we go-on trying
1351bd7e5f99SJohn Dyson 			 * to free pages.  It is important to block here as opposed
1352bd7e5f99SJohn Dyson 			 * to returning, thereby allowing the pageout daemon to continue.
1353bd7e5f99SJohn Dyson 			 * It is likely that pageout daemon will start suboptimally
1354bd7e5f99SJohn Dyson 			 * reclaiming vnode backed pages if we don't block.  Since the
1355bd7e5f99SJohn Dyson 			 * I/O subsystem is probably already fully utilized, might as
1356bd7e5f99SJohn Dyson 			 * well wait.
1357cb6962cdSJohn Dyson 			 */
1358e47ed70bSJohn Dyson 				if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
1359e47ed70bSJohn Dyson 					if (swap_pager_free_pending)
136024a1cce3SDavid Greenman 						swap_pager_sync();
1361e47ed70bSJohn Dyson 					if (swap_pager_free_count == 0) {
1362e736cd05SJohn Dyson 						for (i = firstidx; i < lastidx; i++) {
1363e736cd05SJohn Dyson 							rtvals[i] = VM_PAGER_AGAIN;
1364e736cd05SJohn Dyson 						}
13650d94caffSDavid Greenman 						splx(s);
13660d94caffSDavid Greenman 						return VM_PAGER_AGAIN;
1367cb6962cdSJohn Dyson 					}
1368bd7e5f99SJohn Dyson 				} else {
1369bd7e5f99SJohn Dyson 					swap_pager_sync();
137026f9a767SRodney W. Grimes 				}
1371bd7e5f99SJohn Dyson 			}
137226f9a767SRodney W. Grimes 			splx(s);
137326f9a767SRodney W. Grimes 		}
1374e47ed70bSJohn Dyson 
1375b18bfc3dSJohn Dyson 		spc = TAILQ_FIRST(&swap_pager_free);
1376e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
13773091ee09SJohn Dyson 		if (spc == NULL)
1378e736cd05SJohn Dyson 			panic("swap_pager_putpages: free queue is empty, %d expected\n",
1379e736cd05SJohn Dyson 				swap_pager_free_count);
1380e47ed70bSJohn Dyson #endif
138126f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
13823091ee09SJohn Dyson 		swap_pager_free_count--;
1383fff93ab6SDavid Greenman 
138426f9a767SRodney W. Grimes 		kva = spc->spc_kva;
1385e47ed70bSJohn Dyson 		bp = spc->spc_bp;
1386e47ed70bSJohn Dyson 		bzero(bp, sizeof *bp);
1387e47ed70bSJohn Dyson 		bp->b_spc = spc;
1388e47ed70bSJohn Dyson 		bp->b_vnbufs.le_next = NOLIST;
1389e47ed70bSJohn Dyson 		bp->b_data = (caddr_t) kva;
1390e47ed70bSJohn Dyson 	} else {
1391e47ed70bSJohn Dyson 		spc = NULL;
1392e47ed70bSJohn Dyson 		bp = getpbuf();
1393e47ed70bSJohn Dyson 		kva = (vm_offset_t) bp->b_data;
1394e47ed70bSJohn Dyson 		bp->b_spc = NULL;
1395e47ed70bSJohn Dyson 	}
139626f9a767SRodney W. Grimes 
139726f9a767SRodney W. Grimes 	/*
139826f9a767SRodney W. Grimes 	 * map our page(s) into kva for I/O
139926f9a767SRodney W. Grimes 	 */
1400e736cd05SJohn Dyson 	pmap_qenter(kva, &m[firstidx], ix);
140126f9a767SRodney W. Grimes 
140226f9a767SRodney W. Grimes 	/*
140326f9a767SRodney W. Grimes 	 * get the base I/O offset into the swap file
140426f9a767SRodney W. Grimes 	 */
1405e736cd05SJohn Dyson 	for (i = firstidx; i < lastidx ; i++) {
1406a316d390SJohn Dyson 		fidx = m[i]->pindex + paging_pindex;
1407a316d390SJohn Dyson 		off = swap_pager_block_offset(fidx);
140826f9a767SRodney W. Grimes 		/*
140926f9a767SRodney W. Grimes 		 * set the valid bit
141026f9a767SRodney W. Grimes 		 */
141126f9a767SRodney W. Grimes 		swb[i]->swb_valid |= (1 << off);
141226f9a767SRodney W. Grimes 		/*
141326f9a767SRodney W. Grimes 		 * and unlock the data structure
141426f9a767SRodney W. Grimes 		 */
14152a4895f4SDavid Greenman 		swb[i]->swb_locked--;
141626f9a767SRodney W. Grimes 	}
141726f9a767SRodney W. Grimes 
1418aba8f38eSDavid Greenman 	bp->b_flags = B_BUSY | B_PAGING;
141926f9a767SRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
142026f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1421a481f200SDavid Greenman 	if (bp->b_rcred != NOCRED)
142226f9a767SRodney W. Grimes 		crhold(bp->b_rcred);
1423a481f200SDavid Greenman 	if (bp->b_wcred != NOCRED)
142426f9a767SRodney W. Grimes 		crhold(bp->b_wcred);
1425e736cd05SJohn Dyson 	bp->b_blkno = reqaddr[firstidx];
14260d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
142716f62314SDavid Greenman 
1428e736cd05SJohn Dyson 	bp->b_bcount = PAGE_SIZE * ix;
1429e736cd05SJohn Dyson 	bp->b_bufsize = PAGE_SIZE * ix;
1430e47ed70bSJohn Dyson 
1431e47ed70bSJohn Dyson 
1432e47ed70bSJohn Dyson 	s = splvm();
143326f9a767SRodney W. Grimes 	swapdev_vp->v_numoutput++;
143426f9a767SRodney W. Grimes 
143526f9a767SRodney W. Grimes 	/*
14360d94caffSDavid Greenman 	 * If this is an async write we set up additional buffer fields and
14370d94caffSDavid Greenman   	 * place a "cleaning" entry on the inuse queue.
143826f9a767SRodney W. Grimes   	 */
1439e47ed70bSJohn Dyson  	object->un_pager.swp.swp_poip++;
1440e47ed70bSJohn Dyson 
1441e47ed70bSJohn Dyson  	if (spc) {
144226f9a767SRodney W. Grimes   		spc->spc_flags = 0;
14432a4895f4SDavid Greenman   		spc->spc_object = object;
1444e47ed70bSJohn Dyson  		bp->b_npages = ix;
1445e47ed70bSJohn Dyson  		for (i = firstidx; i < lastidx; i++) {
144626f9a767SRodney W. Grimes   			spc->spc_m[i] = m[i];
1447e47ed70bSJohn Dyson  			bp->b_pages[i - firstidx] = m[i];
1448e47ed70bSJohn Dyson  			vm_page_protect(m[i], VM_PROT_READ);
1449e47ed70bSJohn Dyson  			pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1450e47ed70bSJohn Dyson  			m[i]->dirty = 0;
1451e47ed70bSJohn Dyson  		}
1452e736cd05SJohn Dyson   		spc->spc_first = firstidx;
1453e736cd05SJohn Dyson   		spc->spc_count = ix;
145426f9a767SRodney W. Grimes 		/*
145526f9a767SRodney W. Grimes 		 * the completion routine for async writes
145626f9a767SRodney W. Grimes 		 */
145726f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
145826f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone;
145926f9a767SRodney W. Grimes 		bp->b_dirtyoff = 0;
146026f9a767SRodney W. Grimes 		bp->b_dirtyend = bp->b_bcount;
146126f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
146226f9a767SRodney W. Grimes 	} else {
146326f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
146426f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone1;
1465e47ed70bSJohn Dyson 		bp->b_npages = ix;
1466e47ed70bSJohn Dyson 		for (i = firstidx; i < lastidx; i++)
1467e47ed70bSJohn Dyson 			bp->b_pages[i - firstidx] = m[i];
146826f9a767SRodney W. Grimes 	}
1469976e77fcSDavid Greenman 
1470976e77fcSDavid Greenman 	cnt.v_swapout++;
1471e736cd05SJohn Dyson 	cnt.v_swappgsout += ix;
1472e47ed70bSJohn Dyson 
147326f9a767SRodney W. Grimes 	/*
147426f9a767SRodney W. Grimes 	 * perform the I/O
147526f9a767SRodney W. Grimes 	 */
147626f9a767SRodney W. Grimes 	VOP_STRATEGY(bp);
147724a1cce3SDavid Greenman 	if (sync == FALSE) {
1478e47ed70bSJohn Dyson 		if (swap_pager_free_pending) {
147924a1cce3SDavid Greenman 			swap_pager_sync();
148026f9a767SRodney W. Grimes 		}
1481e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
148226f9a767SRodney W. Grimes 			rtvals[i] = VM_PAGER_PEND;
148326f9a767SRodney W. Grimes 		}
148426f9a767SRodney W. Grimes 		return VM_PAGER_PEND;
148526f9a767SRodney W. Grimes 	}
1486e47ed70bSJohn Dyson 
1487e47ed70bSJohn Dyson 	s = splvm();
148826f9a767SRodney W. Grimes 	/*
148926f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
149026f9a767SRodney W. Grimes 	 */
149126f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
149224a1cce3SDavid Greenman 		tsleep(bp, PVM, "swwrt", 0);
149326f9a767SRodney W. Grimes 	}
1494e47ed70bSJohn Dyson 
14951b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
14961b119d9dSDavid Greenman 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
14971b119d9dSDavid Greenman 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1498a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
14991b119d9dSDavid Greenman 	} else {
15001b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
15011b119d9dSDavid Greenman 	}
150226f9a767SRodney W. Grimes 
15032a4895f4SDavid Greenman 	object->un_pager.swp.swp_poip--;
15042a4895f4SDavid Greenman 	if (object->un_pager.swp.swp_poip == 0)
15052a4895f4SDavid Greenman 		wakeup(object);
150626f9a767SRodney W. Grimes 
150726f9a767SRodney W. Grimes 	if (bp->b_vp)
15080d94caffSDavid Greenman 		pbrelvp(bp);
150926f9a767SRodney W. Grimes 
151026f9a767SRodney W. Grimes 	splx(s);
151126f9a767SRodney W. Grimes 
151226f9a767SRodney W. Grimes 	/*
151326f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
151426f9a767SRodney W. Grimes 	 */
1515e736cd05SJohn Dyson 	pmap_qremove(kva, ix);
151626f9a767SRodney W. Grimes 
151726f9a767SRodney W. Grimes 	/*
15180d94caffSDavid Greenman 	 * if we have written the page, then indicate that the page is clean.
151926f9a767SRodney W. Grimes 	 */
152026f9a767SRodney W. Grimes 	if (rv == VM_PAGER_OK) {
1521e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
152226f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK) {
152367bf6868SJohn Dyson 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
15240d94caffSDavid Greenman 				m[i]->dirty = 0;
152526f9a767SRodney W. Grimes 				/*
15260d94caffSDavid Greenman 				 * optimization, if a page has been read
15270d94caffSDavid Greenman 				 * during the pageout process, we activate it.
152826f9a767SRodney W. Grimes 				 */
1529eaf13dd7SJohn Dyson 				if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
15309b5a5d81SJohn Dyson 				    pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
153126f9a767SRodney W. Grimes 					vm_page_activate(m[i]);
153226f9a767SRodney W. Grimes 				}
153326f9a767SRodney W. Grimes 			}
15347fb0c17eSDavid Greenman 		}
153526f9a767SRodney W. Grimes 	} else {
1536e736cd05SJohn Dyson 		for (i = firstidx; i < lastidx; i++) {
153726f9a767SRodney W. Grimes 			rtvals[i] = rv;
153826f9a767SRodney W. Grimes 		}
153926f9a767SRodney W. Grimes 	}
154026f9a767SRodney W. Grimes 
154126f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
154226f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
154326f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
154426f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
1545e47ed70bSJohn Dyson 
1546e47ed70bSJohn Dyson 	spc_free(spc);
1547e47ed70bSJohn Dyson 	if (swap_pager_free_pending)
1548e47ed70bSJohn Dyson 		swap_pager_sync();
1549e47ed70bSJohn Dyson 
155026f9a767SRodney W. Grimes 	return (rv);
155126f9a767SRodney W. Grimes }
155226f9a767SRodney W. Grimes 
1553f708ef1bSPoul-Henning Kamp static void
155424a1cce3SDavid Greenman swap_pager_sync()
155526f9a767SRodney W. Grimes {
1556e47ed70bSJohn Dyson 	swp_clean_t spc;
155726f9a767SRodney W. Grimes 
1558e47ed70bSJohn Dyson 	while (spc = TAILQ_FIRST(&swap_pager_done)) {
155926f9a767SRodney W. Grimes 		swap_pager_finish(spc);
156026f9a767SRodney W. Grimes 	}
156124a1cce3SDavid Greenman 	return;
156226f9a767SRodney W. Grimes }
156326f9a767SRodney W. Grimes 
1564e47ed70bSJohn Dyson static void
156526f9a767SRodney W. Grimes swap_pager_finish(spc)
156626f9a767SRodney W. Grimes 	register swp_clean_t spc;
156726f9a767SRodney W. Grimes {
1568e47ed70bSJohn Dyson 	int i, s, lastidx;
1569e47ed70bSJohn Dyson 	vm_object_t object;
1570e47ed70bSJohn Dyson 	vm_page_t *ma;
1571e47ed70bSJohn Dyson 
1572e47ed70bSJohn Dyson 	ma = spc->spc_m;
1573e47ed70bSJohn Dyson 	object = ma[spc->spc_first]->object;
1574e47ed70bSJohn Dyson 	lastidx = spc->spc_first + spc->spc_count;
1575e47ed70bSJohn Dyson 
1576e47ed70bSJohn Dyson 	s = splvm();
1577e47ed70bSJohn Dyson 	TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1578e47ed70bSJohn Dyson 	splx(s);
1579e47ed70bSJohn Dyson 
1580e47ed70bSJohn Dyson 	pmap_qremove(spc->spc_kva, spc->spc_count);
1581e47ed70bSJohn Dyson 
1582e47ed70bSJohn Dyson 	/*
1583e47ed70bSJohn Dyson 	 * If no error, mark as clean and inform the pmap system. If error,
1584e47ed70bSJohn Dyson 	 * mark as dirty so we will try again. (XXX could get stuck doing
1585e47ed70bSJohn Dyson 	 * this, should give up after awhile)
1586e47ed70bSJohn Dyson 	 */
1587e47ed70bSJohn Dyson 	if (spc->spc_flags & SPC_ERROR) {
1588e47ed70bSJohn Dyson 
1589e47ed70bSJohn Dyson 		for (i = spc->spc_first; i < lastidx; i++) {
1590e47ed70bSJohn Dyson 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1591e47ed70bSJohn Dyson 			    (u_long) VM_PAGE_TO_PHYS(ma[i]));
1592e47ed70bSJohn Dyson 			ma[i]->dirty = VM_PAGE_BITS_ALL;
1593e47ed70bSJohn Dyson 			PAGE_WAKEUP(ma[i]);
1594e47ed70bSJohn Dyson 		}
159526f9a767SRodney W. Grimes 
1596c0503609SDavid Greenman 		object->paging_in_progress -= spc->spc_count;
1597c0503609SDavid Greenman 		if ((object->paging_in_progress == 0) &&
1598c0503609SDavid Greenman 			(object->flags & OBJ_PIPWNT)) {
1599c0503609SDavid Greenman 			object->flags &= ~OBJ_PIPWNT;
160024a1cce3SDavid Greenman 			wakeup(object);
1601c0503609SDavid Greenman 		}
1602df8bae1dSRodney W. Grimes 
1603df8bae1dSRodney W. Grimes 	} else {
1604e736cd05SJohn Dyson 		for (i = spc->spc_first; i < lastidx; i++) {
1605e736cd05SJohn Dyson 			if ((ma[i]->queue != PQ_ACTIVE) &&
1606e736cd05SJohn Dyson 			   ((ma[i]->flags & PG_WANTED) ||
1607e736cd05SJohn Dyson 				 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i]))))
1608e736cd05SJohn Dyson 				vm_page_activate(ma[i]);
1609df8bae1dSRodney W. Grimes 		}
1610df8bae1dSRodney W. Grimes 	}
1611df8bae1dSRodney W. Grimes 
161226f9a767SRodney W. Grimes 	nswiodone -= spc->spc_count;
1613e47ed70bSJohn Dyson 	swap_pager_free_pending--;
1614e47ed70bSJohn Dyson 	spc_free(spc);
1615df8bae1dSRodney W. Grimes 
1616df8bae1dSRodney W. Grimes 	return;
161726f9a767SRodney W. Grimes }
1618df8bae1dSRodney W. Grimes 
161926f9a767SRodney W. Grimes /*
162026f9a767SRodney W. Grimes  * swap_pager_iodone
162126f9a767SRodney W. Grimes  */
1622f5a12711SPoul-Henning Kamp static void
1623df8bae1dSRodney W. Grimes swap_pager_iodone(bp)
1624df8bae1dSRodney W. Grimes 	register struct buf *bp;
1625df8bae1dSRodney W. Grimes {
1626e47ed70bSJohn Dyson 	int i, s;
1627df8bae1dSRodney W. Grimes 	register swp_clean_t spc;
1628e47ed70bSJohn Dyson 	vm_object_t object;
1629df8bae1dSRodney W. Grimes 
1630e47ed70bSJohn Dyson 	s = splvm();
163126f9a767SRodney W. Grimes 	spc = (swp_clean_t) bp->b_spc;
163226f9a767SRodney W. Grimes 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
163326f9a767SRodney W. Grimes 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1634e47ed70bSJohn Dyson 
1635e47ed70bSJohn Dyson 	object = bp->b_pages[0]->object;
1636e47ed70bSJohn Dyson 
1637e47ed70bSJohn Dyson #if defined(DIAGNOSTIC)
1638e47ed70bSJohn Dyson 	if (object->paging_in_progress < spc->spc_count)
1639e47ed70bSJohn Dyson 		printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
1640e47ed70bSJohn Dyson 			object->paging_in_progress, spc->spc_count);
1641e47ed70bSJohn Dyson #endif
1642e47ed70bSJohn Dyson 
164326f9a767SRodney W. Grimes 	if (bp->b_flags & B_ERROR) {
1644df8bae1dSRodney W. Grimes 		spc->spc_flags |= SPC_ERROR;
1645c3a1e425SDavid Greenman 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
16461b119d9dSDavid Greenman 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
1647c3a1e425SDavid Greenman 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1648e47ed70bSJohn Dyson 	} else {
1649e47ed70bSJohn Dyson 		for (i = 0; i < bp->b_npages; i++) {
1650e47ed70bSJohn Dyson 			/*
1651e47ed70bSJohn Dyson 			 * we wakeup any processes that are waiting on these pages.
1652e47ed70bSJohn Dyson 			 */
1653e47ed70bSJohn Dyson 			PAGE_WAKEUP(bp->b_pages[i]);
1654e47ed70bSJohn Dyson 		}
1655e47ed70bSJohn Dyson 
1656e47ed70bSJohn Dyson 		object->paging_in_progress -= spc->spc_count;
1657e47ed70bSJohn Dyson 		if ((object->paging_in_progress == 0) &&
1658e47ed70bSJohn Dyson 			(object->flags & OBJ_PIPWNT)) {
1659e47ed70bSJohn Dyson 			object->flags &= ~OBJ_PIPWNT;
1660e47ed70bSJohn Dyson 			wakeup(object);
1661e47ed70bSJohn Dyson 		}
1662df8bae1dSRodney W. Grimes 	}
166326f9a767SRodney W. Grimes 
16640d94caffSDavid Greenman 	if (bp->b_vp)
16650d94caffSDavid Greenman 		pbrelvp(bp);
16660d94caffSDavid Greenman 
166726f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
166826f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
166926f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
167026f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
167126f9a767SRodney W. Grimes 
167226f9a767SRodney W. Grimes 	nswiodone += spc->spc_count;
1673e47ed70bSJohn Dyson 	swap_pager_free_pending++;
16742a4895f4SDavid Greenman 	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
16752a4895f4SDavid Greenman 		wakeup(spc->spc_object);
167626f9a767SRodney W. Grimes 	}
1677e47ed70bSJohn Dyson 
1678e47ed70bSJohn Dyson 	if (swap_pager_needflags &&
1679e47ed70bSJohn Dyson 	  ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
1680e47ed70bSJohn Dyson 		spc_wakeup();
1681a1f6d91cSDavid Greenman 	}
1682a1f6d91cSDavid Greenman 
1683e47ed70bSJohn Dyson 	if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
1684e47ed70bSJohn Dyson 		vm_pageout_pages_needed) {
168524a1cce3SDavid Greenman 		wakeup(&vm_pageout_pages_needed);
1686a1f6d91cSDavid Greenman 		vm_pageout_pages_needed = 0;
168726f9a767SRodney W. Grimes 	}
1688e47ed70bSJohn Dyson 
168926f9a767SRodney W. Grimes 	splx(s);
169026f9a767SRodney W. Grimes }
1691