xref: /freebsd/sys/vm/swap_pager.c (revision 0d94caffcad13f94e28da90c9e945749e0659463)
1df8bae1dSRodney W. Grimes /*
226f9a767SRodney W. Grimes  * Copyright (c) 1994 John S. Dyson
3df8bae1dSRodney W. Grimes  * Copyright (c) 1990 University of Utah.
4df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
8df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
9df8bae1dSRodney W. Grimes  * Science Department.
10df8bae1dSRodney W. Grimes  *
11df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
12df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
13df8bae1dSRodney W. Grimes  * are met:
14df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
15df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
16df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
17df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
18df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
19df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
20df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
21df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
22df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
23df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
24df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
25df8bae1dSRodney W. Grimes  *    without specific prior written permission.
26df8bae1dSRodney W. Grimes  *
27df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
38df8bae1dSRodney W. Grimes  *
39df8bae1dSRodney W. Grimes  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40df8bae1dSRodney W. Grimes  *
41df8bae1dSRodney W. Grimes  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
420d94caffSDavid Greenman  * $Id: swap_pager.c,v 1.21 1994/12/23 04:56:50 davidg Exp $
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45df8bae1dSRodney W. Grimes /*
46df8bae1dSRodney W. Grimes  * Quick hack to page to dedicated partition(s).
47df8bae1dSRodney W. Grimes  * TODO:
48df8bae1dSRodney W. Grimes  *	Add multiprocessor locks
49df8bae1dSRodney W. Grimes  *	Deal with async writes in a better fashion
50df8bae1dSRodney W. Grimes  */
51df8bae1dSRodney W. Grimes 
52df8bae1dSRodney W. Grimes #include <sys/param.h>
53df8bae1dSRodney W. Grimes #include <sys/systm.h>
54df8bae1dSRodney W. Grimes #include <sys/proc.h>
55df8bae1dSRodney W. Grimes #include <sys/buf.h>
56df8bae1dSRodney W. Grimes #include <sys/vnode.h>
57df8bae1dSRodney W. Grimes #include <sys/malloc.h>
58df8bae1dSRodney W. Grimes 
59df8bae1dSRodney W. Grimes #include <miscfs/specfs/specdev.h>
6026f9a767SRodney W. Grimes #include <sys/rlist.h>
61df8bae1dSRodney W. Grimes 
62df8bae1dSRodney W. Grimes #include <vm/vm.h>
6326f9a767SRodney W. Grimes #include <vm/vm_pager.h>
64df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
65df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h>
66df8bae1dSRodney W. Grimes #include <vm/swap_pager.h>
67df8bae1dSRodney W. Grimes 
68df8bae1dSRodney W. Grimes #ifndef NPENDINGIO
690d94caffSDavid Greenman #define NPENDINGIO	10
70df8bae1dSRodney W. Grimes #endif
71df8bae1dSRodney W. Grimes 
7205f0fdd2SPoul-Henning Kamp int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int));
7305f0fdd2SPoul-Henning Kamp int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *));
7405f0fdd2SPoul-Henning Kamp 
7526f9a767SRodney W. Grimes int nswiodone;
7626f9a767SRodney W. Grimes extern int vm_pageout_rate_limit;
7726f9a767SRodney W. Grimes static int cleandone;
7826f9a767SRodney W. Grimes extern int hz;
7926f9a767SRodney W. Grimes int swap_pager_full;
8026f9a767SRodney W. Grimes extern vm_map_t pager_map;
8126f9a767SRodney W. Grimes extern int vm_swap_size;
8224ea4a96SDavid Greenman int no_swap_space = 1;
8335c10d22SDavid Greenman struct rlist *swaplist;
8435c10d22SDavid Greenman int nswaplist;
850d94caffSDavid Greenman extern int vm_pio_needed;
8626f9a767SRodney W. Grimes 
8726f9a767SRodney W. Grimes #define MAX_PAGEOUT_CLUSTER 8
88df8bae1dSRodney W. Grimes 
89df8bae1dSRodney W. Grimes TAILQ_HEAD(swpclean, swpagerclean);
90df8bae1dSRodney W. Grimes 
9126f9a767SRodney W. Grimes typedef struct swpagerclean *swp_clean_t;
9226f9a767SRodney W. Grimes 
93df8bae1dSRodney W. Grimes struct swpagerclean {
94df8bae1dSRodney W. Grimes 	TAILQ_ENTRY(swpagerclean) spc_list;
95df8bae1dSRodney W. Grimes 	int spc_flags;
96df8bae1dSRodney W. Grimes 	struct buf *spc_bp;
97df8bae1dSRodney W. Grimes 	sw_pager_t spc_swp;
98df8bae1dSRodney W. Grimes 	vm_offset_t spc_kva;
9926f9a767SRodney W. Grimes 	int spc_count;
10026f9a767SRodney W. Grimes 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
101df8bae1dSRodney W. Grimes } swcleanlist[NPENDINGIO];
10226f9a767SRodney W. Grimes 
10326f9a767SRodney W. Grimes 
10426f9a767SRodney W. Grimes extern vm_map_t kernel_map;
105df8bae1dSRodney W. Grimes 
106df8bae1dSRodney W. Grimes /* spc_flags values */
10726f9a767SRodney W. Grimes #define SPC_ERROR	0x01
108df8bae1dSRodney W. Grimes 
10926f9a767SRodney W. Grimes #define SWB_EMPTY (-1)
110df8bae1dSRodney W. Grimes 
11126f9a767SRodney W. Grimes struct swpclean swap_pager_done;	/* list of compileted page cleans */
112df8bae1dSRodney W. Grimes struct swpclean swap_pager_inuse;	/* list of pending page cleans */
113df8bae1dSRodney W. Grimes struct swpclean swap_pager_free;	/* list of free pager clean structs */
114df8bae1dSRodney W. Grimes struct pagerlst swap_pager_list;	/* list of "named" anon regions */
11526f9a767SRodney W. Grimes struct pagerlst swap_pager_un_list;	/* list of "unnamed" anon pagers */
116df8bae1dSRodney W. Grimes 
11726f9a767SRodney W. Grimes #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
11826f9a767SRodney W. Grimes int swap_pager_needflags;
11926f9a767SRodney W. Grimes struct rlist *swapfrag;
12026f9a767SRodney W. Grimes 
12126f9a767SRodney W. Grimes struct pagerlst *swp_qs[] = {
12226f9a767SRodney W. Grimes 	&swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
12326f9a767SRodney W. Grimes };
12426f9a767SRodney W. Grimes 
12526f9a767SRodney W. Grimes int swap_pager_putmulti();
126df8bae1dSRodney W. Grimes 
127df8bae1dSRodney W. Grimes struct pagerops swappagerops = {
128df8bae1dSRodney W. Grimes 	swap_pager_init,
129df8bae1dSRodney W. Grimes 	swap_pager_alloc,
130df8bae1dSRodney W. Grimes 	swap_pager_dealloc,
131df8bae1dSRodney W. Grimes 	swap_pager_getpage,
13226f9a767SRodney W. Grimes 	swap_pager_getmulti,
133df8bae1dSRodney W. Grimes 	swap_pager_putpage,
13426f9a767SRodney W. Grimes 	swap_pager_putmulti,
13526f9a767SRodney W. Grimes 	swap_pager_haspage
136df8bae1dSRodney W. Grimes };
137df8bae1dSRodney W. Grimes 
13826f9a767SRodney W. Grimes int npendingio = NPENDINGIO;
13926f9a767SRodney W. Grimes int pendingiowait;
14026f9a767SRodney W. Grimes int require_swap_init;
14126f9a767SRodney W. Grimes void swap_pager_finish();
14226f9a767SRodney W. Grimes int dmmin, dmmax;
14326f9a767SRodney W. Grimes extern int vm_page_count;
14426f9a767SRodney W. Grimes 
1450d94caffSDavid Greenman static inline void
1460d94caffSDavid Greenman swapsizecheck()
1470d94caffSDavid Greenman {
14826f9a767SRodney W. Grimes 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
14926f9a767SRodney W. Grimes 		if (swap_pager_full)
15026f9a767SRodney W. Grimes 			printf("swap_pager: out of space\n");
15126f9a767SRodney W. Grimes 		swap_pager_full = 1;
15226f9a767SRodney W. Grimes 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
15326f9a767SRodney W. Grimes 		swap_pager_full = 0;
15426f9a767SRodney W. Grimes }
15526f9a767SRodney W. Grimes 
15626f9a767SRodney W. Grimes void
157df8bae1dSRodney W. Grimes swap_pager_init()
158df8bae1dSRodney W. Grimes {
159df8bae1dSRodney W. Grimes 	dfltpagerops = &swappagerops;
160df8bae1dSRodney W. Grimes 
16126f9a767SRodney W. Grimes 	TAILQ_INIT(&swap_pager_list);
16226f9a767SRodney W. Grimes 	TAILQ_INIT(&swap_pager_un_list);
163df8bae1dSRodney W. Grimes 
164df8bae1dSRodney W. Grimes 	/*
165df8bae1dSRodney W. Grimes 	 * Initialize clean lists
166df8bae1dSRodney W. Grimes 	 */
167df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_inuse);
16826f9a767SRodney W. Grimes 	TAILQ_INIT(&swap_pager_done);
169df8bae1dSRodney W. Grimes 	TAILQ_INIT(&swap_pager_free);
17026f9a767SRodney W. Grimes 
17126f9a767SRodney W. Grimes 	require_swap_init = 1;
172df8bae1dSRodney W. Grimes 
173df8bae1dSRodney W. Grimes 	/*
174df8bae1dSRodney W. Grimes 	 * Calculate the swap allocation constants.
175df8bae1dSRodney W. Grimes 	 */
176df8bae1dSRodney W. Grimes 
17726f9a767SRodney W. Grimes 	dmmin = CLBYTES / DEV_BSIZE;
17826f9a767SRodney W. Grimes 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
17926f9a767SRodney W. Grimes 
180df8bae1dSRodney W. Grimes }
181df8bae1dSRodney W. Grimes 
182df8bae1dSRodney W. Grimes /*
183df8bae1dSRodney W. Grimes  * Allocate a pager structure and associated resources.
184df8bae1dSRodney W. Grimes  * Note that if we are called from the pageout daemon (handle == NULL)
185df8bae1dSRodney W. Grimes  * we should not wait for memory as it could resulting in deadlock.
186df8bae1dSRodney W. Grimes  */
18726f9a767SRodney W. Grimes vm_pager_t
18826f9a767SRodney W. Grimes swap_pager_alloc(handle, size, prot, offset)
189df8bae1dSRodney W. Grimes 	caddr_t handle;
190df8bae1dSRodney W. Grimes 	register vm_size_t size;
191df8bae1dSRodney W. Grimes 	vm_prot_t prot;
19226f9a767SRodney W. Grimes 	vm_offset_t offset;
193df8bae1dSRodney W. Grimes {
194df8bae1dSRodney W. Grimes 	register vm_pager_t pager;
195df8bae1dSRodney W. Grimes 	register sw_pager_t swp;
196df8bae1dSRodney W. Grimes 	int waitok;
19726f9a767SRodney W. Grimes 	int i, j;
198df8bae1dSRodney W. Grimes 
19926f9a767SRodney W. Grimes 	if (require_swap_init) {
20026f9a767SRodney W. Grimes 		swp_clean_t spc;
20126f9a767SRodney W. Grimes 		struct buf *bp;
2020d94caffSDavid Greenman 
2030d94caffSDavid Greenman #if 0
2040d94caffSDavid Greenman 		int desiredpendingio;
2050d94caffSDavid Greenman 
2060d94caffSDavid Greenman 		desiredpendingio = cnt.v_page_count / 200 + 2;
2070d94caffSDavid Greenman 		if (desiredpendingio < npendingio)
2080d94caffSDavid Greenman 			npendingio = desiredpendingio;
2090d94caffSDavid Greenman #endif
2100d94caffSDavid Greenman 
21126f9a767SRodney W. Grimes 		/*
2120d94caffSDavid Greenman 		 * kva's are allocated here so that we dont need to keep doing
2130d94caffSDavid Greenman 		 * kmem_alloc pageables at runtime
21426f9a767SRodney W. Grimes 		 */
21526f9a767SRodney W. Grimes 		for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
216fff93ab6SDavid Greenman 			spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
21726f9a767SRodney W. Grimes 			if (!spc->spc_kva) {
21826f9a767SRodney W. Grimes 				break;
21926f9a767SRodney W. Grimes 			}
22026f9a767SRodney W. Grimes 			spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_NOWAIT);
22126f9a767SRodney W. Grimes 			if (!spc->spc_bp) {
22226f9a767SRodney W. Grimes 				kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
22326f9a767SRodney W. Grimes 				break;
22426f9a767SRodney W. Grimes 			}
22526f9a767SRodney W. Grimes 			spc->spc_flags = 0;
22626f9a767SRodney W. Grimes 			TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
22726f9a767SRodney W. Grimes 		}
22826f9a767SRodney W. Grimes 		require_swap_init = 0;
22926f9a767SRodney W. Grimes 		if (size == 0)
23026f9a767SRodney W. Grimes 			return (NULL);
23126f9a767SRodney W. Grimes 	}
232df8bae1dSRodney W. Grimes 	/*
2330d94caffSDavid Greenman 	 * If this is a "named" anonymous region, look it up and return the
2340d94caffSDavid Greenman 	 * appropriate pager if it exists.
235df8bae1dSRodney W. Grimes 	 */
236df8bae1dSRodney W. Grimes 	if (handle) {
237df8bae1dSRodney W. Grimes 		pager = vm_pager_lookup(&swap_pager_list, handle);
238df8bae1dSRodney W. Grimes 		if (pager != NULL) {
239df8bae1dSRodney W. Grimes 			/*
2400d94caffSDavid Greenman 			 * Use vm_object_lookup to gain a reference to the
2410d94caffSDavid Greenman 			 * object and also to remove from the object cache.
242df8bae1dSRodney W. Grimes 			 */
243df8bae1dSRodney W. Grimes 			if (vm_object_lookup(pager) == NULL)
244df8bae1dSRodney W. Grimes 				panic("swap_pager_alloc: bad object");
245df8bae1dSRodney W. Grimes 			return (pager);
246df8bae1dSRodney W. Grimes 		}
247df8bae1dSRodney W. Grimes 	}
2485663e6deSDavid Greenman 	/*
2490d94caffSDavid Greenman 	 * Pager doesn't exist, allocate swap management resources and
2500d94caffSDavid Greenman 	 * initialize.
251df8bae1dSRodney W. Grimes 	 */
252df8bae1dSRodney W. Grimes 	waitok = handle ? M_WAITOK : M_NOWAIT;
253df8bae1dSRodney W. Grimes 	pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok);
254df8bae1dSRodney W. Grimes 	if (pager == NULL)
255df8bae1dSRodney W. Grimes 		return (NULL);
256df8bae1dSRodney W. Grimes 	swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok);
257df8bae1dSRodney W. Grimes 	if (swp == NULL) {
258df8bae1dSRodney W. Grimes 		free((caddr_t) pager, M_VMPAGER);
259df8bae1dSRodney W. Grimes 		return (NULL);
260df8bae1dSRodney W. Grimes 	}
261df8bae1dSRodney W. Grimes 	size = round_page(size);
262df8bae1dSRodney W. Grimes 	swp->sw_osize = size;
26326f9a767SRodney W. Grimes 	swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE);
264df8bae1dSRodney W. Grimes 	swp->sw_blocks = (sw_blk_t)
265df8bae1dSRodney W. Grimes 	    malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks),
26626f9a767SRodney W. Grimes 	    M_VMPGDATA, waitok);
267df8bae1dSRodney W. Grimes 	if (swp->sw_blocks == NULL) {
268df8bae1dSRodney W. Grimes 		free((caddr_t) swp, M_VMPGDATA);
269df8bae1dSRodney W. Grimes 		free((caddr_t) pager, M_VMPAGER);
27026f9a767SRodney W. Grimes 		return (NULL);
271df8bae1dSRodney W. Grimes 	}
27226f9a767SRodney W. Grimes 	for (i = 0; i < swp->sw_nblocks; i++) {
27326f9a767SRodney W. Grimes 		swp->sw_blocks[i].swb_valid = 0;
27426f9a767SRodney W. Grimes 		swp->sw_blocks[i].swb_locked = 0;
27526f9a767SRodney W. Grimes 		for (j = 0; j < SWB_NPAGES; j++)
27626f9a767SRodney W. Grimes 			swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
27726f9a767SRodney W. Grimes 	}
27826f9a767SRodney W. Grimes 
279df8bae1dSRodney W. Grimes 	swp->sw_poip = 0;
280df8bae1dSRodney W. Grimes 	if (handle) {
281df8bae1dSRodney W. Grimes 		vm_object_t object;
282df8bae1dSRodney W. Grimes 
283df8bae1dSRodney W. Grimes 		swp->sw_flags = SW_NAMED;
284df8bae1dSRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
285df8bae1dSRodney W. Grimes 		/*
286df8bae1dSRodney W. Grimes 		 * Consistant with other pagers: return with object
2870d94caffSDavid Greenman 		 * referenced.  Can't do this with handle == NULL since it
2880d94caffSDavid Greenman 		 * might be the pageout daemon calling.
289df8bae1dSRodney W. Grimes 		 */
290df8bae1dSRodney W. Grimes 		object = vm_object_allocate(size);
291df8bae1dSRodney W. Grimes 		vm_object_enter(object, pager);
292df8bae1dSRodney W. Grimes 		vm_object_setpager(object, pager, 0, FALSE);
293df8bae1dSRodney W. Grimes 	} else {
294df8bae1dSRodney W. Grimes 		swp->sw_flags = 0;
29526f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
296df8bae1dSRodney W. Grimes 	}
297df8bae1dSRodney W. Grimes 	pager->pg_handle = handle;
298df8bae1dSRodney W. Grimes 	pager->pg_ops = &swappagerops;
299df8bae1dSRodney W. Grimes 	pager->pg_type = PG_SWAP;
30026f9a767SRodney W. Grimes 	pager->pg_data = (caddr_t) swp;
301df8bae1dSRodney W. Grimes 
302df8bae1dSRodney W. Grimes 	return (pager);
303df8bae1dSRodney W. Grimes }
304df8bae1dSRodney W. Grimes 
30526f9a767SRodney W. Grimes /*
30626f9a767SRodney W. Grimes  * returns disk block associated with pager and offset
30726f9a767SRodney W. Grimes  * additionally, as a side effect returns a flag indicating
30826f9a767SRodney W. Grimes  * if the block has been written
30926f9a767SRodney W. Grimes  */
31026f9a767SRodney W. Grimes 
31126f9a767SRodney W. Grimes static int *
31226f9a767SRodney W. Grimes swap_pager_diskaddr(swp, offset, valid)
31326f9a767SRodney W. Grimes 	sw_pager_t swp;
31426f9a767SRodney W. Grimes 	vm_offset_t offset;
31526f9a767SRodney W. Grimes 	int *valid;
31626f9a767SRodney W. Grimes {
31726f9a767SRodney W. Grimes 	register sw_blk_t swb;
31826f9a767SRodney W. Grimes 	int ix;
31926f9a767SRodney W. Grimes 
32026f9a767SRodney W. Grimes 	if (valid)
32126f9a767SRodney W. Grimes 		*valid = 0;
32226f9a767SRodney W. Grimes 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
32326f9a767SRodney W. Grimes 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
32426f9a767SRodney W. Grimes 		return (FALSE);
32526f9a767SRodney W. Grimes 	}
32626f9a767SRodney W. Grimes 	swb = &swp->sw_blocks[ix];
32726f9a767SRodney W. Grimes 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
32826f9a767SRodney W. Grimes 	if (valid)
32926f9a767SRodney W. Grimes 		*valid = swb->swb_valid & (1 << ix);
33026f9a767SRodney W. Grimes 	return &swb->swb_block[ix];
33126f9a767SRodney W. Grimes }
33226f9a767SRodney W. Grimes 
33326f9a767SRodney W. Grimes /*
33426f9a767SRodney W. Grimes  * Utility routine to set the valid (written) bit for
33526f9a767SRodney W. Grimes  * a block associated with a pager and offset
33626f9a767SRodney W. Grimes  */
337df8bae1dSRodney W. Grimes static void
33826f9a767SRodney W. Grimes swap_pager_setvalid(swp, offset, valid)
33926f9a767SRodney W. Grimes 	sw_pager_t swp;
34026f9a767SRodney W. Grimes 	vm_offset_t offset;
34126f9a767SRodney W. Grimes 	int valid;
34226f9a767SRodney W. Grimes {
34326f9a767SRodney W. Grimes 	register sw_blk_t swb;
34426f9a767SRodney W. Grimes 	int ix;
34526f9a767SRodney W. Grimes 
34626f9a767SRodney W. Grimes 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
34726f9a767SRodney W. Grimes 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
34826f9a767SRodney W. Grimes 		return;
34926f9a767SRodney W. Grimes 
35026f9a767SRodney W. Grimes 	swb = &swp->sw_blocks[ix];
35126f9a767SRodney W. Grimes 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
35226f9a767SRodney W. Grimes 	if (valid)
35326f9a767SRodney W. Grimes 		swb->swb_valid |= (1 << ix);
35426f9a767SRodney W. Grimes 	else
35526f9a767SRodney W. Grimes 		swb->swb_valid &= ~(1 << ix);
35626f9a767SRodney W. Grimes 	return;
35726f9a767SRodney W. Grimes }
35826f9a767SRodney W. Grimes 
35926f9a767SRodney W. Grimes /*
36026f9a767SRodney W. Grimes  * this routine allocates swap space with a fragmentation
36126f9a767SRodney W. Grimes  * minimization policy.
36226f9a767SRodney W. Grimes  */
36326f9a767SRodney W. Grimes int
3640d94caffSDavid Greenman swap_pager_getswapspace(unsigned amount, unsigned *rtval)
3650d94caffSDavid Greenman {
36624ea4a96SDavid Greenman 	vm_swap_size -= amount;
36724ea4a96SDavid Greenman 	if (!rlist_alloc(&swaplist, amount, rtval)) {
36824ea4a96SDavid Greenman 		vm_swap_size += amount;
36926f9a767SRodney W. Grimes 		return 0;
37024ea4a96SDavid Greenman 	} else {
37124ea4a96SDavid Greenman 		swapsizecheck();
37226f9a767SRodney W. Grimes 		return 1;
37326f9a767SRodney W. Grimes 	}
37426f9a767SRodney W. Grimes }
37526f9a767SRodney W. Grimes 
37626f9a767SRodney W. Grimes /*
37726f9a767SRodney W. Grimes  * this routine frees swap space with a fragmentation
37826f9a767SRodney W. Grimes  * minimization policy.
37926f9a767SRodney W. Grimes  */
38026f9a767SRodney W. Grimes void
3810d94caffSDavid Greenman swap_pager_freeswapspace(unsigned from, unsigned to)
3820d94caffSDavid Greenman {
38335c10d22SDavid Greenman 	rlist_free(&swaplist, from, to);
38424ea4a96SDavid Greenman 	vm_swap_size += (to - from) + 1;
38524ea4a96SDavid Greenman 	swapsizecheck();
38626f9a767SRodney W. Grimes }
38726f9a767SRodney W. Grimes /*
38826f9a767SRodney W. Grimes  * this routine frees swap blocks from a specified pager
38926f9a767SRodney W. Grimes  */
39026f9a767SRodney W. Grimes void
39126f9a767SRodney W. Grimes _swap_pager_freespace(swp, start, size)
39226f9a767SRodney W. Grimes 	sw_pager_t swp;
39326f9a767SRodney W. Grimes 	vm_offset_t start;
39426f9a767SRodney W. Grimes 	vm_offset_t size;
39526f9a767SRodney W. Grimes {
39626f9a767SRodney W. Grimes 	vm_offset_t i;
39726f9a767SRodney W. Grimes 	int s;
39826f9a767SRodney W. Grimes 
39926f9a767SRodney W. Grimes 	s = splbio();
4006f7bc393SDavid Greenman 	for (i = start; i < round_page(start + size); i += PAGE_SIZE) {
40126f9a767SRodney W. Grimes 		int valid;
40226f9a767SRodney W. Grimes 		int *addr = swap_pager_diskaddr(swp, i, &valid);
4030d94caffSDavid Greenman 
40426f9a767SRodney W. Grimes 		if (addr && *addr != SWB_EMPTY) {
40526f9a767SRodney W. Grimes 			swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1);
40626f9a767SRodney W. Grimes 			if (valid) {
40726f9a767SRodney W. Grimes 				swap_pager_setvalid(swp, i, 0);
40826f9a767SRodney W. Grimes 			}
40926f9a767SRodney W. Grimes 			*addr = SWB_EMPTY;
41026f9a767SRodney W. Grimes 		}
41126f9a767SRodney W. Grimes 	}
41226f9a767SRodney W. Grimes 	splx(s);
41326f9a767SRodney W. Grimes }
41426f9a767SRodney W. Grimes 
41526f9a767SRodney W. Grimes void
41626f9a767SRodney W. Grimes swap_pager_freespace(pager, start, size)
41726f9a767SRodney W. Grimes 	vm_pager_t pager;
41826f9a767SRodney W. Grimes 	vm_offset_t start;
41926f9a767SRodney W. Grimes 	vm_offset_t size;
42026f9a767SRodney W. Grimes {
42126f9a767SRodney W. Grimes 	_swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
42226f9a767SRodney W. Grimes }
42326f9a767SRodney W. Grimes 
42426f9a767SRodney W. Grimes /*
42526f9a767SRodney W. Grimes  * swap_pager_reclaim frees up over-allocated space from all pagers
42626f9a767SRodney W. Grimes  * this eliminates internal fragmentation due to allocation of space
42726f9a767SRodney W. Grimes  * for segments that are never swapped to. It has been written so that
42826f9a767SRodney W. Grimes  * it does not block until the rlist_free operation occurs; it keeps
42926f9a767SRodney W. Grimes  * the queues consistant.
43026f9a767SRodney W. Grimes  */
43126f9a767SRodney W. Grimes 
43226f9a767SRodney W. Grimes /*
43326f9a767SRodney W. Grimes  * Maximum number of blocks (pages) to reclaim per pass
43426f9a767SRodney W. Grimes  */
43526f9a767SRodney W. Grimes #define MAXRECLAIM 256
43626f9a767SRodney W. Grimes 
43726f9a767SRodney W. Grimes void
43826f9a767SRodney W. Grimes swap_pager_reclaim()
43926f9a767SRodney W. Grimes {
44026f9a767SRodney W. Grimes 	vm_pager_t p;
44126f9a767SRodney W. Grimes 	sw_pager_t swp;
44226f9a767SRodney W. Grimes 	int i, j, k;
44326f9a767SRodney W. Grimes 	int s;
44426f9a767SRodney W. Grimes 	int reclaimcount;
44526f9a767SRodney W. Grimes 	static int reclaims[MAXRECLAIM];
44626f9a767SRodney W. Grimes 	static int in_reclaim;
44726f9a767SRodney W. Grimes 
44826f9a767SRodney W. Grimes 	/*
44926f9a767SRodney W. Grimes 	 * allow only one process to be in the swap_pager_reclaim subroutine
45026f9a767SRodney W. Grimes 	 */
45126f9a767SRodney W. Grimes 	s = splbio();
45226f9a767SRodney W. Grimes 	if (in_reclaim) {
45326f9a767SRodney W. Grimes 		tsleep((caddr_t) & in_reclaim, PSWP, "swrclm", 0);
45426f9a767SRodney W. Grimes 		splx(s);
45526f9a767SRodney W. Grimes 		return;
45626f9a767SRodney W. Grimes 	}
45726f9a767SRodney W. Grimes 	in_reclaim = 1;
45826f9a767SRodney W. Grimes 	reclaimcount = 0;
45926f9a767SRodney W. Grimes 
46026f9a767SRodney W. Grimes 	/* for each pager queue */
46126f9a767SRodney W. Grimes 	for (k = 0; swp_qs[k]; k++) {
46226f9a767SRodney W. Grimes 
46326f9a767SRodney W. Grimes 		p = swp_qs[k]->tqh_first;
46426f9a767SRodney W. Grimes 		while (p && (reclaimcount < MAXRECLAIM)) {
46526f9a767SRodney W. Grimes 
46626f9a767SRodney W. Grimes 			/*
46726f9a767SRodney W. Grimes 			 * see if any blocks associated with a pager has been
46826f9a767SRodney W. Grimes 			 * allocated but not used (written)
46926f9a767SRodney W. Grimes 			 */
47026f9a767SRodney W. Grimes 			swp = (sw_pager_t) p->pg_data;
47126f9a767SRodney W. Grimes 			for (i = 0; i < swp->sw_nblocks; i++) {
47226f9a767SRodney W. Grimes 				sw_blk_t swb = &swp->sw_blocks[i];
4730d94caffSDavid Greenman 
47426f9a767SRodney W. Grimes 				if (swb->swb_locked)
47526f9a767SRodney W. Grimes 					continue;
47626f9a767SRodney W. Grimes 				for (j = 0; j < SWB_NPAGES; j++) {
47726f9a767SRodney W. Grimes 					if (swb->swb_block[j] != SWB_EMPTY &&
47826f9a767SRodney W. Grimes 					    (swb->swb_valid & (1 << j)) == 0) {
47926f9a767SRodney W. Grimes 						reclaims[reclaimcount++] = swb->swb_block[j];
48026f9a767SRodney W. Grimes 						swb->swb_block[j] = SWB_EMPTY;
48126f9a767SRodney W. Grimes 						if (reclaimcount >= MAXRECLAIM)
48226f9a767SRodney W. Grimes 							goto rfinished;
48326f9a767SRodney W. Grimes 					}
48426f9a767SRodney W. Grimes 				}
48526f9a767SRodney W. Grimes 			}
48626f9a767SRodney W. Grimes 			p = p->pg_list.tqe_next;
48726f9a767SRodney W. Grimes 		}
48826f9a767SRodney W. Grimes 	}
48926f9a767SRodney W. Grimes 
49026f9a767SRodney W. Grimes rfinished:
49126f9a767SRodney W. Grimes 
49226f9a767SRodney W. Grimes 	/*
49326f9a767SRodney W. Grimes 	 * free the blocks that have been added to the reclaim list
49426f9a767SRodney W. Grimes 	 */
49526f9a767SRodney W. Grimes 	for (i = 0; i < reclaimcount; i++) {
49626f9a767SRodney W. Grimes 		swap_pager_freeswapspace(reclaims[i], reclaims[i] + btodb(PAGE_SIZE) - 1);
49726f9a767SRodney W. Grimes 		wakeup((caddr_t) & in_reclaim);
49826f9a767SRodney W. Grimes 	}
49926f9a767SRodney W. Grimes 
50026f9a767SRodney W. Grimes 	splx(s);
50126f9a767SRodney W. Grimes 	in_reclaim = 0;
50226f9a767SRodney W. Grimes 	wakeup((caddr_t) & in_reclaim);
50326f9a767SRodney W. Grimes }
50426f9a767SRodney W. Grimes 
50526f9a767SRodney W. Grimes 
50626f9a767SRodney W. Grimes /*
50726f9a767SRodney W. Grimes  * swap_pager_copy copies blocks from one pager to another and
50826f9a767SRodney W. Grimes  * destroys the source pager
50926f9a767SRodney W. Grimes  */
51026f9a767SRodney W. Grimes 
51126f9a767SRodney W. Grimes void
51226f9a767SRodney W. Grimes swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
51326f9a767SRodney W. Grimes 	vm_pager_t srcpager;
51426f9a767SRodney W. Grimes 	vm_offset_t srcoffset;
51526f9a767SRodney W. Grimes 	vm_pager_t dstpager;
51626f9a767SRodney W. Grimes 	vm_offset_t dstoffset;
51726f9a767SRodney W. Grimes 	vm_offset_t offset;
51826f9a767SRodney W. Grimes {
51926f9a767SRodney W. Grimes 	sw_pager_t srcswp, dstswp;
52026f9a767SRodney W. Grimes 	vm_offset_t i;
52126f9a767SRodney W. Grimes 	int s;
52226f9a767SRodney W. Grimes 
52324ea4a96SDavid Greenman 	if (vm_swap_size)
52424ea4a96SDavid Greenman 		no_swap_space = 0;
52524ea4a96SDavid Greenman 
52624ea4a96SDavid Greenman 	if (no_swap_space)
5275663e6deSDavid Greenman 		return;
5285663e6deSDavid Greenman 
52926f9a767SRodney W. Grimes 	srcswp = (sw_pager_t) srcpager->pg_data;
53026f9a767SRodney W. Grimes 	dstswp = (sw_pager_t) dstpager->pg_data;
53126f9a767SRodney W. Grimes 
53226f9a767SRodney W. Grimes 	/*
53326f9a767SRodney W. Grimes 	 * remove the source pager from the swap_pager internal queue
53426f9a767SRodney W. Grimes 	 */
53526f9a767SRodney W. Grimes 	s = splbio();
53626f9a767SRodney W. Grimes 	if (srcswp->sw_flags & SW_NAMED) {
53726f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
53826f9a767SRodney W. Grimes 		srcswp->sw_flags &= ~SW_NAMED;
53926f9a767SRodney W. Grimes 	} else {
54026f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
54126f9a767SRodney W. Grimes 	}
54226f9a767SRodney W. Grimes 
54326f9a767SRodney W. Grimes 	while (srcswp->sw_poip) {
54426f9a767SRodney W. Grimes 		tsleep((caddr_t) srcswp, PVM, "spgout", 0);
54526f9a767SRodney W. Grimes 	}
54626f9a767SRodney W. Grimes 	splx(s);
54726f9a767SRodney W. Grimes 
54826f9a767SRodney W. Grimes 	/*
54926f9a767SRodney W. Grimes 	 * clean all of the pages that are currently active and finished
55026f9a767SRodney W. Grimes 	 */
55126f9a767SRodney W. Grimes 	(void) swap_pager_clean();
55226f9a767SRodney W. Grimes 
55326f9a767SRodney W. Grimes 	s = splbio();
55426f9a767SRodney W. Grimes 	/*
55526f9a767SRodney W. Grimes 	 * clear source block before destination object
55626f9a767SRodney W. Grimes 	 * (release allocated space)
55726f9a767SRodney W. Grimes 	 */
55826f9a767SRodney W. Grimes 	for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
55926f9a767SRodney W. Grimes 		int valid;
56026f9a767SRodney W. Grimes 		int *addr = swap_pager_diskaddr(srcswp, i, &valid);
5610d94caffSDavid Greenman 
56226f9a767SRodney W. Grimes 		if (addr && *addr != SWB_EMPTY) {
56326f9a767SRodney W. Grimes 			swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1);
56426f9a767SRodney W. Grimes 			*addr = SWB_EMPTY;
56526f9a767SRodney W. Grimes 		}
56626f9a767SRodney W. Grimes 	}
56726f9a767SRodney W. Grimes 	/*
56826f9a767SRodney W. Grimes 	 * transfer source to destination
56926f9a767SRodney W. Grimes 	 */
57026f9a767SRodney W. Grimes 	for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
57126f9a767SRodney W. Grimes 		int srcvalid, dstvalid;
57226f9a767SRodney W. Grimes 		int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
57326f9a767SRodney W. Grimes 		    &srcvalid);
57426f9a767SRodney W. Grimes 		int *dstaddrp;
5750d94caffSDavid Greenman 
57626f9a767SRodney W. Grimes 		/*
57726f9a767SRodney W. Grimes 		 * see if the source has space allocated
57826f9a767SRodney W. Grimes 		 */
57926f9a767SRodney W. Grimes 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
58026f9a767SRodney W. Grimes 			/*
5810d94caffSDavid Greenman 			 * if the source is valid and the dest has no space,
5820d94caffSDavid Greenman 			 * then copy the allocation from the srouce to the
5830d94caffSDavid Greenman 			 * dest.
58426f9a767SRodney W. Grimes 			 */
58526f9a767SRodney W. Grimes 			if (srcvalid) {
58626f9a767SRodney W. Grimes 				dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
58726f9a767SRodney W. Grimes 				/*
5880d94caffSDavid Greenman 				 * if the dest already has a valid block,
5890d94caffSDavid Greenman 				 * deallocate the source block without
5900d94caffSDavid Greenman 				 * copying.
59126f9a767SRodney W. Grimes 				 */
59226f9a767SRodney W. Grimes 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
59326f9a767SRodney W. Grimes 					swap_pager_freeswapspace(*dstaddrp, *dstaddrp + btodb(PAGE_SIZE) - 1);
59426f9a767SRodney W. Grimes 					*dstaddrp = SWB_EMPTY;
59526f9a767SRodney W. Grimes 				}
59626f9a767SRodney W. Grimes 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
59726f9a767SRodney W. Grimes 					*dstaddrp = *srcaddrp;
59826f9a767SRodney W. Grimes 					*srcaddrp = SWB_EMPTY;
59926f9a767SRodney W. Grimes 					swap_pager_setvalid(dstswp, i + dstoffset, 1);
60026f9a767SRodney W. Grimes 				}
60126f9a767SRodney W. Grimes 			}
60226f9a767SRodney W. Grimes 			/*
6030d94caffSDavid Greenman 			 * if the source is not empty at this point, then
6040d94caffSDavid Greenman 			 * deallocate the space.
60526f9a767SRodney W. Grimes 			 */
60626f9a767SRodney W. Grimes 			if (*srcaddrp != SWB_EMPTY) {
60726f9a767SRodney W. Grimes 				swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1);
60826f9a767SRodney W. Grimes 				*srcaddrp = SWB_EMPTY;
60926f9a767SRodney W. Grimes 			}
61026f9a767SRodney W. Grimes 		}
61126f9a767SRodney W. Grimes 	}
61226f9a767SRodney W. Grimes 
61326f9a767SRodney W. Grimes 	/*
61426f9a767SRodney W. Grimes 	 * deallocate the rest of the source object
61526f9a767SRodney W. Grimes 	 */
61626f9a767SRodney W. Grimes 	for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
61726f9a767SRodney W. Grimes 		int valid;
61826f9a767SRodney W. Grimes 		int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
6190d94caffSDavid Greenman 
62026f9a767SRodney W. Grimes 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
62126f9a767SRodney W. Grimes 			swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1);
62226f9a767SRodney W. Grimes 			*srcaddrp = SWB_EMPTY;
62326f9a767SRodney W. Grimes 		}
62426f9a767SRodney W. Grimes 	}
62526f9a767SRodney W. Grimes 
62626f9a767SRodney W. Grimes 	splx(s);
62726f9a767SRodney W. Grimes 
62826f9a767SRodney W. Grimes 	free((caddr_t) srcswp->sw_blocks, M_VMPGDATA);
62926f9a767SRodney W. Grimes 	srcswp->sw_blocks = 0;
63026f9a767SRodney W. Grimes 	free((caddr_t) srcswp, M_VMPGDATA);
63126f9a767SRodney W. Grimes 	srcpager->pg_data = 0;
63226f9a767SRodney W. Grimes 	free((caddr_t) srcpager, M_VMPAGER);
63326f9a767SRodney W. Grimes 
63426f9a767SRodney W. Grimes 	return;
63526f9a767SRodney W. Grimes }
63626f9a767SRodney W. Grimes 
63726f9a767SRodney W. Grimes 
63826f9a767SRodney W. Grimes void
639df8bae1dSRodney W. Grimes swap_pager_dealloc(pager)
640df8bae1dSRodney W. Grimes 	vm_pager_t pager;
641df8bae1dSRodney W. Grimes {
64226f9a767SRodney W. Grimes 	register int i, j;
643df8bae1dSRodney W. Grimes 	register sw_blk_t bp;
644df8bae1dSRodney W. Grimes 	register sw_pager_t swp;
645df8bae1dSRodney W. Grimes 	int s;
646df8bae1dSRodney W. Grimes 
647df8bae1dSRodney W. Grimes 	/*
6480d94caffSDavid Greenman 	 * Remove from list right away so lookups will fail if we block for
6490d94caffSDavid Greenman 	 * pageout completion.
650df8bae1dSRodney W. Grimes 	 */
65126f9a767SRodney W. Grimes 	s = splbio();
652df8bae1dSRodney W. Grimes 	swp = (sw_pager_t) pager->pg_data;
653df8bae1dSRodney W. Grimes 	if (swp->sw_flags & SW_NAMED) {
654df8bae1dSRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
655df8bae1dSRodney W. Grimes 		swp->sw_flags &= ~SW_NAMED;
65626f9a767SRodney W. Grimes 	} else {
65726f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
658df8bae1dSRodney W. Grimes 	}
659df8bae1dSRodney W. Grimes 	/*
6600d94caffSDavid Greenman 	 * Wait for all pageouts to finish and remove all entries from
6610d94caffSDavid Greenman 	 * cleaning list.
662df8bae1dSRodney W. Grimes 	 */
66326f9a767SRodney W. Grimes 
664df8bae1dSRodney W. Grimes 	while (swp->sw_poip) {
66526f9a767SRodney W. Grimes 		tsleep((caddr_t) swp, PVM, "swpout", 0);
666df8bae1dSRodney W. Grimes 	}
667df8bae1dSRodney W. Grimes 	splx(s);
66826f9a767SRodney W. Grimes 
66926f9a767SRodney W. Grimes 
67026f9a767SRodney W. Grimes 	(void) swap_pager_clean();
671df8bae1dSRodney W. Grimes 
672df8bae1dSRodney W. Grimes 	/*
673df8bae1dSRodney W. Grimes 	 * Free left over swap blocks
674df8bae1dSRodney W. Grimes 	 */
67526f9a767SRodney W. Grimes 	s = splbio();
67626f9a767SRodney W. Grimes 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
67726f9a767SRodney W. Grimes 		for (j = 0; j < SWB_NPAGES; j++)
67826f9a767SRodney W. Grimes 			if (bp->swb_block[j] != SWB_EMPTY) {
67926f9a767SRodney W. Grimes 				swap_pager_freeswapspace((unsigned) bp->swb_block[j],
68026f9a767SRodney W. Grimes 				    (unsigned) bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
68126f9a767SRodney W. Grimes 				bp->swb_block[j] = SWB_EMPTY;
682df8bae1dSRodney W. Grimes 			}
68326f9a767SRodney W. Grimes 	}
68426f9a767SRodney W. Grimes 	splx(s);
68526f9a767SRodney W. Grimes 
686df8bae1dSRodney W. Grimes 	/*
687df8bae1dSRodney W. Grimes 	 * Free swap management resources
688df8bae1dSRodney W. Grimes 	 */
689df8bae1dSRodney W. Grimes 	free((caddr_t) swp->sw_blocks, M_VMPGDATA);
69026f9a767SRodney W. Grimes 	swp->sw_blocks = 0;
691df8bae1dSRodney W. Grimes 	free((caddr_t) swp, M_VMPGDATA);
69226f9a767SRodney W. Grimes 	pager->pg_data = 0;
693df8bae1dSRodney W. Grimes 	free((caddr_t) pager, M_VMPAGER);
694df8bae1dSRodney W. Grimes }
695df8bae1dSRodney W. Grimes 
69626f9a767SRodney W. Grimes /*
69726f9a767SRodney W. Grimes  * swap_pager_getmulti can get multiple pages.
69826f9a767SRodney W. Grimes  */
69926f9a767SRodney W. Grimes int
70026f9a767SRodney W. Grimes swap_pager_getmulti(pager, m, count, reqpage, sync)
701df8bae1dSRodney W. Grimes 	vm_pager_t pager;
70226f9a767SRodney W. Grimes 	vm_page_t *m;
70326f9a767SRodney W. Grimes 	int count;
70426f9a767SRodney W. Grimes 	int reqpage;
705df8bae1dSRodney W. Grimes 	boolean_t sync;
706df8bae1dSRodney W. Grimes {
70726f9a767SRodney W. Grimes 	if (reqpage >= count)
70826f9a767SRodney W. Grimes 		panic("swap_pager_getmulti: reqpage >= count\n");
70926f9a767SRodney W. Grimes 	return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
710df8bae1dSRodney W. Grimes }
711df8bae1dSRodney W. Grimes 
71226f9a767SRodney W. Grimes /*
71326f9a767SRodney W. Grimes  * swap_pager_getpage gets individual pages
71426f9a767SRodney W. Grimes  */
71526f9a767SRodney W. Grimes int
71626f9a767SRodney W. Grimes swap_pager_getpage(pager, m, sync)
717df8bae1dSRodney W. Grimes 	vm_pager_t pager;
71826f9a767SRodney W. Grimes 	vm_page_t m;
719df8bae1dSRodney W. Grimes 	boolean_t sync;
720df8bae1dSRodney W. Grimes {
72126f9a767SRodney W. Grimes 	vm_page_t marray[1];
72226f9a767SRodney W. Grimes 
72326f9a767SRodney W. Grimes 	marray[0] = m;
72426f9a767SRodney W. Grimes 	return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0);
72526f9a767SRodney W. Grimes }
72626f9a767SRodney W. Grimes 
72726f9a767SRodney W. Grimes int
72826f9a767SRodney W. Grimes swap_pager_putmulti(pager, m, c, sync, rtvals)
72926f9a767SRodney W. Grimes 	vm_pager_t pager;
73026f9a767SRodney W. Grimes 	vm_page_t *m;
73126f9a767SRodney W. Grimes 	int c;
73226f9a767SRodney W. Grimes 	boolean_t sync;
73326f9a767SRodney W. Grimes 	int *rtvals;
73426f9a767SRodney W. Grimes {
735df8bae1dSRodney W. Grimes 	int flags;
736df8bae1dSRodney W. Grimes 
737df8bae1dSRodney W. Grimes 	if (pager == NULL) {
73826f9a767SRodney W. Grimes 		(void) swap_pager_clean();
73926f9a767SRodney W. Grimes 		return VM_PAGER_OK;
740df8bae1dSRodney W. Grimes 	}
741df8bae1dSRodney W. Grimes 	flags = B_WRITE;
742df8bae1dSRodney W. Grimes 	if (!sync)
743df8bae1dSRodney W. Grimes 		flags |= B_ASYNC;
74426f9a767SRodney W. Grimes 
74526f9a767SRodney W. Grimes 	return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals);
746df8bae1dSRodney W. Grimes }
747df8bae1dSRodney W. Grimes 
74826f9a767SRodney W. Grimes /*
74926f9a767SRodney W. Grimes  * swap_pager_putpage writes individual pages
75026f9a767SRodney W. Grimes  */
75126f9a767SRodney W. Grimes int
75226f9a767SRodney W. Grimes swap_pager_putpage(pager, m, sync)
75326f9a767SRodney W. Grimes 	vm_pager_t pager;
75426f9a767SRodney W. Grimes 	vm_page_t m;
75526f9a767SRodney W. Grimes 	boolean_t sync;
75626f9a767SRodney W. Grimes {
75726f9a767SRodney W. Grimes 	int flags;
75826f9a767SRodney W. Grimes 	vm_page_t marray[1];
75926f9a767SRodney W. Grimes 	int rtvals[1];
76026f9a767SRodney W. Grimes 
76126f9a767SRodney W. Grimes 
76226f9a767SRodney W. Grimes 	if (pager == NULL) {
76326f9a767SRodney W. Grimes 		(void) swap_pager_clean();
76426f9a767SRodney W. Grimes 		return VM_PAGER_OK;
76526f9a767SRodney W. Grimes 	}
76626f9a767SRodney W. Grimes 	marray[0] = m;
76726f9a767SRodney W. Grimes 	flags = B_WRITE;
76826f9a767SRodney W. Grimes 	if (!sync)
76926f9a767SRodney W. Grimes 		flags |= B_ASYNC;
77026f9a767SRodney W. Grimes 
77126f9a767SRodney W. Grimes 	swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals);
77226f9a767SRodney W. Grimes 
77326f9a767SRodney W. Grimes 	return rtvals[0];
77426f9a767SRodney W. Grimes }
77526f9a767SRodney W. Grimes 
77626f9a767SRodney W. Grimes static inline int
7770d94caffSDavid Greenman const
7780d94caffSDavid Greenman swap_pager_block_index(swp, offset)
77926f9a767SRodney W. Grimes 	sw_pager_t swp;
78026f9a767SRodney W. Grimes 	vm_offset_t offset;
78126f9a767SRodney W. Grimes {
78226f9a767SRodney W. Grimes 	return (offset / (SWB_NPAGES * PAGE_SIZE));
78326f9a767SRodney W. Grimes }
78426f9a767SRodney W. Grimes 
78526f9a767SRodney W. Grimes static inline int
7860d94caffSDavid Greenman const
7870d94caffSDavid Greenman swap_pager_block_offset(swp, offset)
78826f9a767SRodney W. Grimes 	sw_pager_t swp;
78926f9a767SRodney W. Grimes 	vm_offset_t offset;
79026f9a767SRodney W. Grimes {
79126f9a767SRodney W. Grimes 	return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE);
79226f9a767SRodney W. Grimes }
79326f9a767SRodney W. Grimes 
79426f9a767SRodney W. Grimes /*
79526f9a767SRodney W. Grimes  * _swap_pager_haspage returns TRUE if the pager has data that has
79626f9a767SRodney W. Grimes  * been written out.
79726f9a767SRodney W. Grimes  */
798df8bae1dSRodney W. Grimes static boolean_t
79926f9a767SRodney W. Grimes _swap_pager_haspage(swp, offset)
80026f9a767SRodney W. Grimes 	sw_pager_t swp;
80126f9a767SRodney W. Grimes 	vm_offset_t offset;
80226f9a767SRodney W. Grimes {
80326f9a767SRodney W. Grimes 	register sw_blk_t swb;
80426f9a767SRodney W. Grimes 	int ix;
80526f9a767SRodney W. Grimes 
80626f9a767SRodney W. Grimes 	ix = offset / (SWB_NPAGES * PAGE_SIZE);
80726f9a767SRodney W. Grimes 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
80826f9a767SRodney W. Grimes 		return (FALSE);
80926f9a767SRodney W. Grimes 	}
81026f9a767SRodney W. Grimes 	swb = &swp->sw_blocks[ix];
81126f9a767SRodney W. Grimes 	ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE;
81226f9a767SRodney W. Grimes 	if (swb->swb_block[ix] != SWB_EMPTY) {
81326f9a767SRodney W. Grimes 		if (swb->swb_valid & (1 << ix))
81426f9a767SRodney W. Grimes 			return TRUE;
81526f9a767SRodney W. Grimes 	}
81626f9a767SRodney W. Grimes 	return (FALSE);
81726f9a767SRodney W. Grimes }
81826f9a767SRodney W. Grimes 
81926f9a767SRodney W. Grimes /*
82026f9a767SRodney W. Grimes  * swap_pager_haspage is the externally accessible version of
82126f9a767SRodney W. Grimes  * _swap_pager_haspage above.  this routine takes a vm_pager_t
82226f9a767SRodney W. Grimes  * for an argument instead of sw_pager_t.
82326f9a767SRodney W. Grimes  */
82426f9a767SRodney W. Grimes boolean_t
825df8bae1dSRodney W. Grimes swap_pager_haspage(pager, offset)
826df8bae1dSRodney W. Grimes 	vm_pager_t pager;
827df8bae1dSRodney W. Grimes 	vm_offset_t offset;
828df8bae1dSRodney W. Grimes {
82926f9a767SRodney W. Grimes 	return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
830df8bae1dSRodney W. Grimes }
831df8bae1dSRodney W. Grimes 
832df8bae1dSRodney W. Grimes /*
83326f9a767SRodney W. Grimes  * swap_pager_freepage is a convienience routine that clears the busy
83426f9a767SRodney W. Grimes  * bit and deallocates a page.
835df8bae1dSRodney W. Grimes  */
83626f9a767SRodney W. Grimes static void
83726f9a767SRodney W. Grimes swap_pager_freepage(m)
83826f9a767SRodney W. Grimes 	vm_page_t m;
83926f9a767SRodney W. Grimes {
84026f9a767SRodney W. Grimes 	PAGE_WAKEUP(m);
84126f9a767SRodney W. Grimes 	vm_page_free(m);
84226f9a767SRodney W. Grimes }
84326f9a767SRodney W. Grimes 
84426f9a767SRodney W. Grimes /*
84526f9a767SRodney W. Grimes  * swap_pager_ridpages is a convienience routine that deallocates all
84626f9a767SRodney W. Grimes  * but the required page.  this is usually used in error returns that
84726f9a767SRodney W. Grimes  * need to invalidate the "extra" readahead pages.
84826f9a767SRodney W. Grimes  */
84926f9a767SRodney W. Grimes static void
85026f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage)
85126f9a767SRodney W. Grimes 	vm_page_t *m;
85226f9a767SRodney W. Grimes 	int count;
85326f9a767SRodney W. Grimes 	int reqpage;
85426f9a767SRodney W. Grimes {
85526f9a767SRodney W. Grimes 	int i;
8560d94caffSDavid Greenman 
85726f9a767SRodney W. Grimes 	for (i = 0; i < count; i++)
85826f9a767SRodney W. Grimes 		if (i != reqpage)
85926f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
86026f9a767SRodney W. Grimes }
86126f9a767SRodney W. Grimes 
86226f9a767SRodney W. Grimes int swapwritecount = 0;
86326f9a767SRodney W. Grimes 
86426f9a767SRodney W. Grimes /*
86526f9a767SRodney W. Grimes  * swap_pager_iodone1 is the completion routine for both reads and async writes
86626f9a767SRodney W. Grimes  */
86726f9a767SRodney W. Grimes void
86826f9a767SRodney W. Grimes swap_pager_iodone1(bp)
86926f9a767SRodney W. Grimes 	struct buf *bp;
87026f9a767SRodney W. Grimes {
87126f9a767SRodney W. Grimes 	bp->b_flags |= B_DONE;
87226f9a767SRodney W. Grimes 	bp->b_flags &= ~B_ASYNC;
87326f9a767SRodney W. Grimes 	wakeup((caddr_t) bp);
87426f9a767SRodney W. Grimes /*
87526f9a767SRodney W. Grimes 	if ((bp->b_flags & B_READ) == 0)
87626f9a767SRodney W. Grimes 		vwakeup(bp);
87726f9a767SRodney W. Grimes */
87826f9a767SRodney W. Grimes }
87926f9a767SRodney W. Grimes 
88026f9a767SRodney W. Grimes 
88126f9a767SRodney W. Grimes int
88226f9a767SRodney W. Grimes swap_pager_input(swp, m, count, reqpage)
883df8bae1dSRodney W. Grimes 	register sw_pager_t swp;
88426f9a767SRodney W. Grimes 	vm_page_t *m;
88526f9a767SRodney W. Grimes 	int count, reqpage;
886df8bae1dSRodney W. Grimes {
887df8bae1dSRodney W. Grimes 	register struct buf *bp;
88826f9a767SRodney W. Grimes 	sw_blk_t swb[count];
889df8bae1dSRodney W. Grimes 	register int s;
89026f9a767SRodney W. Grimes 	int i;
891df8bae1dSRodney W. Grimes 	boolean_t rv;
89226f9a767SRodney W. Grimes 	vm_offset_t kva, off[count];
893df8bae1dSRodney W. Grimes 	swp_clean_t spc;
89426f9a767SRodney W. Grimes 	vm_offset_t paging_offset;
89526f9a767SRodney W. Grimes 	vm_object_t object;
89626f9a767SRodney W. Grimes 	int reqaddr[count];
897df8bae1dSRodney W. Grimes 
89826f9a767SRodney W. Grimes 	int first, last;
89926f9a767SRodney W. Grimes 	int failed;
90026f9a767SRodney W. Grimes 	int reqdskregion;
901df8bae1dSRodney W. Grimes 
90226f9a767SRodney W. Grimes 	object = m[reqpage]->object;
90326f9a767SRodney W. Grimes 	paging_offset = object->paging_offset;
904df8bae1dSRodney W. Grimes 	/*
9050d94caffSDavid Greenman 	 * First determine if the page exists in the pager if this is a sync
9060d94caffSDavid Greenman 	 * read.  This quickly handles cases where we are following shadow
9070d94caffSDavid Greenman 	 * chains looking for the top level object with the page.
908df8bae1dSRodney W. Grimes 	 */
90926f9a767SRodney W. Grimes 	if (swp->sw_blocks == NULL) {
91026f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
911df8bae1dSRodney W. Grimes 		return (VM_PAGER_FAIL);
912df8bae1dSRodney W. Grimes 	}
91326f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
91426f9a767SRodney W. Grimes 		vm_offset_t foff = m[i]->offset + paging_offset;
91526f9a767SRodney W. Grimes 		int ix = swap_pager_block_index(swp, foff);
9160d94caffSDavid Greenman 
91726f9a767SRodney W. Grimes 		if (ix >= swp->sw_nblocks) {
91826f9a767SRodney W. Grimes 			int j;
9190d94caffSDavid Greenman 
92026f9a767SRodney W. Grimes 			if (i <= reqpage) {
92126f9a767SRodney W. Grimes 				swap_pager_ridpages(m, count, reqpage);
922df8bae1dSRodney W. Grimes 				return (VM_PAGER_FAIL);
92326f9a767SRodney W. Grimes 			}
92426f9a767SRodney W. Grimes 			for (j = i; j < count; j++) {
92526f9a767SRodney W. Grimes 				swap_pager_freepage(m[j]);
92626f9a767SRodney W. Grimes 			}
92726f9a767SRodney W. Grimes 			count = i;
92826f9a767SRodney W. Grimes 			break;
92926f9a767SRodney W. Grimes 		}
93026f9a767SRodney W. Grimes 		swb[i] = &swp->sw_blocks[ix];
93126f9a767SRodney W. Grimes 		off[i] = swap_pager_block_offset(swp, foff);
93226f9a767SRodney W. Grimes 		reqaddr[i] = swb[i]->swb_block[off[i]];
93326f9a767SRodney W. Grimes 	}
93426f9a767SRodney W. Grimes 
93526f9a767SRodney W. Grimes 	/* make sure that our required input request is existant */
93626f9a767SRodney W. Grimes 
93726f9a767SRodney W. Grimes 	if (reqaddr[reqpage] == SWB_EMPTY ||
93826f9a767SRodney W. Grimes 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
93926f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
94026f9a767SRodney W. Grimes 		return (VM_PAGER_FAIL);
94126f9a767SRodney W. Grimes 	}
94226f9a767SRodney W. Grimes 	reqdskregion = reqaddr[reqpage] / dmmax;
943df8bae1dSRodney W. Grimes 
944df8bae1dSRodney W. Grimes 	/*
94526f9a767SRodney W. Grimes 	 * search backwards for the first contiguous page to transfer
946df8bae1dSRodney W. Grimes 	 */
94726f9a767SRodney W. Grimes 	failed = 0;
94826f9a767SRodney W. Grimes 	first = 0;
94926f9a767SRodney W. Grimes 	for (i = reqpage - 1; i >= 0; --i) {
95026f9a767SRodney W. Grimes 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
95126f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
95226f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
95326f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
95426f9a767SRodney W. Grimes 			failed = 1;
95526f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
95626f9a767SRodney W. Grimes 			if (first == 0)
95726f9a767SRodney W. Grimes 				first = i + 1;
95826f9a767SRodney W. Grimes 		}
959df8bae1dSRodney W. Grimes 	}
960df8bae1dSRodney W. Grimes 	/*
96126f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
962df8bae1dSRodney W. Grimes 	 */
96326f9a767SRodney W. Grimes 	failed = 0;
96426f9a767SRodney W. Grimes 	last = count;
96526f9a767SRodney W. Grimes 	for (i = reqpage + 1; i < count; i++) {
96626f9a767SRodney W. Grimes 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
96726f9a767SRodney W. Grimes 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
96826f9a767SRodney W. Grimes 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
96926f9a767SRodney W. Grimes 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
97026f9a767SRodney W. Grimes 			failed = 1;
97126f9a767SRodney W. Grimes 			swap_pager_freepage(m[i]);
97226f9a767SRodney W. Grimes 			if (last == count)
97326f9a767SRodney W. Grimes 				last = i;
97426f9a767SRodney W. Grimes 		}
97526f9a767SRodney W. Grimes 	}
97626f9a767SRodney W. Grimes 
97726f9a767SRodney W. Grimes 	count = last;
97826f9a767SRodney W. Grimes 	if (first != 0) {
97926f9a767SRodney W. Grimes 		for (i = first; i < count; i++) {
98026f9a767SRodney W. Grimes 			m[i - first] = m[i];
98126f9a767SRodney W. Grimes 			reqaddr[i - first] = reqaddr[i];
98226f9a767SRodney W. Grimes 			off[i - first] = off[i];
98326f9a767SRodney W. Grimes 		}
98426f9a767SRodney W. Grimes 		count -= first;
98526f9a767SRodney W. Grimes 		reqpage -= first;
98626f9a767SRodney W. Grimes 	}
98726f9a767SRodney W. Grimes 	++swb[reqpage]->swb_locked;
98826f9a767SRodney W. Grimes 
98926f9a767SRodney W. Grimes 	/*
9900d94caffSDavid Greenman 	 * at this point: "m" is a pointer to the array of vm_page_t for
9910d94caffSDavid Greenman 	 * paging I/O "count" is the number of vm_page_t entries represented
9920d94caffSDavid Greenman 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
9930d94caffSDavid Greenman 	 * into "m" for the page actually faulted
99426f9a767SRodney W. Grimes 	 */
99526f9a767SRodney W. Grimes 
99626f9a767SRodney W. Grimes 	spc = NULL;	/* we might not use an spc data structure */
99726f9a767SRodney W. Grimes 
99816f62314SDavid Greenman 	if (count == 1) {
99926f9a767SRodney W. Grimes 		/*
10000d94caffSDavid Greenman 		 * if a kva has not been allocated, we can only do a one page
10010d94caffSDavid Greenman 		 * transfer, so we free the other pages that might have been
10020d94caffSDavid Greenman 		 * allocated by vm_fault.
100326f9a767SRodney W. Grimes 		 */
100426f9a767SRodney W. Grimes 		swap_pager_ridpages(m, count, reqpage);
100526f9a767SRodney W. Grimes 		m[0] = m[reqpage];
100626f9a767SRodney W. Grimes 		reqaddr[0] = reqaddr[reqpage];
100726f9a767SRodney W. Grimes 
100826f9a767SRodney W. Grimes 		count = 1;
100926f9a767SRodney W. Grimes 		reqpage = 0;
101026f9a767SRodney W. Grimes 		/*
10110d94caffSDavid Greenman 		 * get a swap pager clean data structure, block until we get
10120d94caffSDavid Greenman 		 * it
101326f9a767SRodney W. Grimes 		 */
1014df8bae1dSRodney W. Grimes 		if (swap_pager_free.tqh_first == NULL) {
1015df8bae1dSRodney W. Grimes 			s = splbio();
101626f9a767SRodney W. Grimes 			if (curproc == pageproc)
101726f9a767SRodney W. Grimes 				(void) swap_pager_clean();
101826f9a767SRodney W. Grimes 			else
101926f9a767SRodney W. Grimes 				wakeup((caddr_t) & vm_pages_needed);
102026f9a767SRodney W. Grimes 			while (swap_pager_free.tqh_first == NULL) {
102126f9a767SRodney W. Grimes 				swap_pager_needflags |= SWAP_FREE_NEEDED;
102226f9a767SRodney W. Grimes 				tsleep((caddr_t) & swap_pager_free,
102326f9a767SRodney W. Grimes 				    PVM, "swpfre", 0);
102426f9a767SRodney W. Grimes 				if (curproc == pageproc)
102526f9a767SRodney W. Grimes 					(void) swap_pager_clean();
102626f9a767SRodney W. Grimes 				else
102726f9a767SRodney W. Grimes 					wakeup((caddr_t) & vm_pages_needed);
1028df8bae1dSRodney W. Grimes 			}
1029df8bae1dSRodney W. Grimes 			splx(s);
103026f9a767SRodney W. Grimes 		}
103126f9a767SRodney W. Grimes 		spc = swap_pager_free.tqh_first;
103226f9a767SRodney W. Grimes 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
103326f9a767SRodney W. Grimes 		kva = spc->spc_kva;
103426f9a767SRodney W. Grimes 		bp = spc->spc_bp;
103526f9a767SRodney W. Grimes 		bzero(bp, sizeof *bp);
103626f9a767SRodney W. Grimes 		bp->b_spc = spc;
10377609ab12SDavid Greenman 		bp->b_vnbufs.le_next = NOLIST;
103826f9a767SRodney W. Grimes 	} else {
103916f62314SDavid Greenman 		/*
104016f62314SDavid Greenman 		 * Get a swap buffer header to perform the IO
104116f62314SDavid Greenman 		 */
104226f9a767SRodney W. Grimes 		bp = getpbuf();
104316f62314SDavid Greenman 		kva = (vm_offset_t) bp->b_data;
104426f9a767SRodney W. Grimes 	}
104526f9a767SRodney W. Grimes 
104616f62314SDavid Greenman 	/*
104716f62314SDavid Greenman 	 * map our page(s) into kva for input
104816f62314SDavid Greenman 	 */
104916f62314SDavid Greenman 	pmap_qenter(kva, m, count);
105016f62314SDavid Greenman 
105126f9a767SRodney W. Grimes 	bp->b_flags = B_BUSY | B_READ | B_CALL;
105226f9a767SRodney W. Grimes 	bp->b_iodone = swap_pager_iodone1;
1053df8bae1dSRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
105426f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
105526f9a767SRodney W. Grimes 	crhold(bp->b_rcred);
105626f9a767SRodney W. Grimes 	crhold(bp->b_wcred);
105726f9a767SRodney W. Grimes 	bp->b_un.b_addr = (caddr_t) kva;
105826f9a767SRodney W. Grimes 	bp->b_blkno = reqaddr[0];
105926f9a767SRodney W. Grimes 	bp->b_bcount = PAGE_SIZE * count;
106026f9a767SRodney W. Grimes 	bp->b_bufsize = PAGE_SIZE * count;
106126f9a767SRodney W. Grimes 
10620d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
106326f9a767SRodney W. Grimes 	swp->sw_piip++;
1064df8bae1dSRodney W. Grimes 
1065976e77fcSDavid Greenman 	cnt.v_swapin++;
1066976e77fcSDavid Greenman 	cnt.v_swappgsin += count;
1067df8bae1dSRodney W. Grimes 	/*
106826f9a767SRodney W. Grimes 	 * perform the I/O
1069df8bae1dSRodney W. Grimes 	 */
1070df8bae1dSRodney W. Grimes 	VOP_STRATEGY(bp);
107126f9a767SRodney W. Grimes 
107226f9a767SRodney W. Grimes 	/*
107326f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
107426f9a767SRodney W. Grimes 	 */
10757609ab12SDavid Greenman 	s = splbio();
107626f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
107726f9a767SRodney W. Grimes 		tsleep((caddr_t) bp, PVM, "swread", 0);
1078df8bae1dSRodney W. Grimes 	}
10791b119d9dSDavid Greenman 
10801b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
10811b119d9dSDavid Greenman 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
10821b119d9dSDavid Greenman 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1083a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
10841b119d9dSDavid Greenman 	} else {
10851b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
10861b119d9dSDavid Greenman 	}
108726f9a767SRodney W. Grimes 
108826f9a767SRodney W. Grimes 	--swp->sw_piip;
108926f9a767SRodney W. Grimes 	if (swp->sw_piip == 0)
109026f9a767SRodney W. Grimes 		wakeup((caddr_t) swp);
109126f9a767SRodney W. Grimes 
10920d94caffSDavid Greenman 
109326f9a767SRodney W. Grimes 	/*
10940d94caffSDavid Greenman 	 * relpbuf does this, but we maintain our own buffer list also...
109526f9a767SRodney W. Grimes 	 */
1096df8bae1dSRodney W. Grimes 	if (bp->b_vp)
10970d94caffSDavid Greenman 		pbrelvp(bp);
109826f9a767SRodney W. Grimes 
1099df8bae1dSRodney W. Grimes 	splx(s);
110026f9a767SRodney W. Grimes 	--swb[reqpage]->swb_locked;
110126f9a767SRodney W. Grimes 
110226f9a767SRodney W. Grimes 	/*
110326f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
110426f9a767SRodney W. Grimes 	 */
110516f62314SDavid Greenman 	pmap_qremove(kva, count);
110626f9a767SRodney W. Grimes 
110726f9a767SRodney W. Grimes 	if (spc) {
11080d94caffSDavid Greenman 		if (bp->b_flags & B_WANTED)
11090d94caffSDavid Greenman 			wakeup((caddr_t) bp);
111026f9a767SRodney W. Grimes 		/*
111126f9a767SRodney W. Grimes 		 * if we have used an spc, we need to free it.
111226f9a767SRodney W. Grimes 		 */
111326f9a767SRodney W. Grimes 		if (bp->b_rcred != NOCRED)
111426f9a767SRodney W. Grimes 			crfree(bp->b_rcred);
111526f9a767SRodney W. Grimes 		if (bp->b_wcred != NOCRED)
111626f9a767SRodney W. Grimes 			crfree(bp->b_wcred);
111726f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
111826f9a767SRodney W. Grimes 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
111926f9a767SRodney W. Grimes 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
112026f9a767SRodney W. Grimes 			wakeup((caddr_t) & swap_pager_free);
112126f9a767SRodney W. Grimes 		}
112226f9a767SRodney W. Grimes 	} else {
112326f9a767SRodney W. Grimes 		/*
112426f9a767SRodney W. Grimes 		 * release the physical I/O buffer
112526f9a767SRodney W. Grimes 		 */
112626f9a767SRodney W. Grimes 		relpbuf(bp);
112726f9a767SRodney W. Grimes 		/*
112826f9a767SRodney W. Grimes 		 * finish up input if everything is ok
112926f9a767SRodney W. Grimes 		 */
113026f9a767SRodney W. Grimes 		if (rv == VM_PAGER_OK) {
113126f9a767SRodney W. Grimes 			for (i = 0; i < count; i++) {
113226f9a767SRodney W. Grimes 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
11330d94caffSDavid Greenman 				m[i]->dirty = 0;
113426f9a767SRodney W. Grimes 				if (i != reqpage) {
113526f9a767SRodney W. Grimes 					/*
11360d94caffSDavid Greenman 					 * whether or not to leave the page
11370d94caffSDavid Greenman 					 * activated is up in the air, but we
11380d94caffSDavid Greenman 					 * should put the page on a page queue
11390d94caffSDavid Greenman 					 * somewhere. (it already is in the
11400d94caffSDavid Greenman 					 * object). After some emperical
11410d94caffSDavid Greenman 					 * results, it is best to deactivate
11420d94caffSDavid Greenman 					 * the readahead pages.
114326f9a767SRodney W. Grimes 					 */
11440d94caffSDavid Greenman 					if ((i == reqpage - 1) || (i == reqpage + 1))
11450d94caffSDavid Greenman 						vm_page_activate(m[i]);
11460d94caffSDavid Greenman 					else
114726f9a767SRodney W. Grimes 						vm_page_deactivate(m[i]);
114826f9a767SRodney W. Grimes 
114926f9a767SRodney W. Grimes 					/*
11500d94caffSDavid Greenman 					 * just in case someone was asking for
11510d94caffSDavid Greenman 					 * this page we now tell them that it
11520d94caffSDavid Greenman 					 * is ok to use
115326f9a767SRodney W. Grimes 					 */
11540d94caffSDavid Greenman 					m[i]->valid = VM_PAGE_BITS_ALL;
115526f9a767SRodney W. Grimes 					PAGE_WAKEUP(m[i]);
115626f9a767SRodney W. Grimes 				}
115726f9a767SRodney W. Grimes 			}
11582e1e24ddSDavid Greenman 			/*
11592e1e24ddSDavid Greenman 			 * If we're out of swap space, then attempt to free
11602e1e24ddSDavid Greenman 			 * some whenever pages are brought in. We must clear
11612e1e24ddSDavid Greenman 			 * the clean flag so that the page contents will be
11622e1e24ddSDavid Greenman 			 * preserved.
11632e1e24ddSDavid Greenman 			 */
116426f9a767SRodney W. Grimes 			if (swap_pager_full) {
11652e1e24ddSDavid Greenman 				for (i = 0; i < count; i++) {
11660d94caffSDavid Greenman 					m[i]->dirty = VM_PAGE_BITS_ALL;
11672e1e24ddSDavid Greenman 				}
116826f9a767SRodney W. Grimes 				_swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE);
116926f9a767SRodney W. Grimes 			}
117026f9a767SRodney W. Grimes 		} else {
117126f9a767SRodney W. Grimes 			swap_pager_ridpages(m, count, reqpage);
117226f9a767SRodney W. Grimes 		}
117326f9a767SRodney W. Grimes 	}
1174df8bae1dSRodney W. Grimes 	return (rv);
1175df8bae1dSRodney W. Grimes }
1176df8bae1dSRodney W. Grimes 
117726f9a767SRodney W. Grimes int
117826f9a767SRodney W. Grimes swap_pager_output(swp, m, count, flags, rtvals)
117926f9a767SRodney W. Grimes 	register sw_pager_t swp;
118026f9a767SRodney W. Grimes 	vm_page_t *m;
118126f9a767SRodney W. Grimes 	int count;
118226f9a767SRodney W. Grimes 	int flags;
118326f9a767SRodney W. Grimes 	int *rtvals;
1184df8bae1dSRodney W. Grimes {
118526f9a767SRodney W. Grimes 	register struct buf *bp;
118626f9a767SRodney W. Grimes 	sw_blk_t swb[count];
118726f9a767SRodney W. Grimes 	register int s;
118826f9a767SRodney W. Grimes 	int i, j, ix;
118926f9a767SRodney W. Grimes 	boolean_t rv;
119026f9a767SRodney W. Grimes 	vm_offset_t kva, off, foff;
119126f9a767SRodney W. Grimes 	swp_clean_t spc;
119226f9a767SRodney W. Grimes 	vm_offset_t paging_offset;
1193df8bae1dSRodney W. Grimes 	vm_object_t object;
119426f9a767SRodney W. Grimes 	int reqaddr[count];
119526f9a767SRodney W. Grimes 	int failed;
1196df8bae1dSRodney W. Grimes 
119724ea4a96SDavid Greenman 	if (vm_swap_size)
119824ea4a96SDavid Greenman 		no_swap_space = 0;
119924ea4a96SDavid Greenman 	if (no_swap_space) {
12005663e6deSDavid Greenman 		for (i = 0; i < count; i++)
12015663e6deSDavid Greenman 			rtvals[i] = VM_PAGER_FAIL;
12025663e6deSDavid Greenman 		return VM_PAGER_FAIL;
12035663e6deSDavid Greenman 	}
120426f9a767SRodney W. Grimes 	spc = NULL;
120526f9a767SRodney W. Grimes 
120626f9a767SRodney W. Grimes 	object = m[0]->object;
120726f9a767SRodney W. Grimes 	paging_offset = object->paging_offset;
120826f9a767SRodney W. Grimes 
120926f9a767SRodney W. Grimes 	failed = 0;
121026f9a767SRodney W. Grimes 	for (j = 0; j < count; j++) {
121126f9a767SRodney W. Grimes 		foff = m[j]->offset + paging_offset;
121226f9a767SRodney W. Grimes 		ix = swap_pager_block_index(swp, foff);
121326f9a767SRodney W. Grimes 		swb[j] = 0;
121426f9a767SRodney W. Grimes 		if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
121526f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
121626f9a767SRodney W. Grimes 			failed = 1;
121726f9a767SRodney W. Grimes 			continue;
121826f9a767SRodney W. Grimes 		} else {
121926f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_OK;
122026f9a767SRodney W. Grimes 		}
122126f9a767SRodney W. Grimes 		swb[j] = &swp->sw_blocks[ix];
122226f9a767SRodney W. Grimes 		++swb[j]->swb_locked;
122326f9a767SRodney W. Grimes 		if (failed) {
122426f9a767SRodney W. Grimes 			rtvals[j] = VM_PAGER_FAIL;
122526f9a767SRodney W. Grimes 			continue;
122626f9a767SRodney W. Grimes 		}
122726f9a767SRodney W. Grimes 		off = swap_pager_block_offset(swp, foff);
122826f9a767SRodney W. Grimes 		reqaddr[j] = swb[j]->swb_block[off];
122926f9a767SRodney W. Grimes 		if (reqaddr[j] == SWB_EMPTY) {
123026f9a767SRodney W. Grimes 			int blk;
123126f9a767SRodney W. Grimes 			int tries;
123226f9a767SRodney W. Grimes 			int ntoget;
12330d94caffSDavid Greenman 
123426f9a767SRodney W. Grimes 			tries = 0;
1235df8bae1dSRodney W. Grimes 			s = splbio();
123626f9a767SRodney W. Grimes 
1237df8bae1dSRodney W. Grimes 			/*
12380d94caffSDavid Greenman 			 * if any other pages have been allocated in this
12390d94caffSDavid Greenman 			 * block, we only try to get one page.
1240df8bae1dSRodney W. Grimes 			 */
124126f9a767SRodney W. Grimes 			for (i = 0; i < SWB_NPAGES; i++) {
124226f9a767SRodney W. Grimes 				if (swb[j]->swb_block[i] != SWB_EMPTY)
1243df8bae1dSRodney W. Grimes 					break;
1244df8bae1dSRodney W. Grimes 			}
124526f9a767SRodney W. Grimes 
124626f9a767SRodney W. Grimes 
124726f9a767SRodney W. Grimes 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
124826f9a767SRodney W. Grimes 			/*
12490d94caffSDavid Greenman 			 * this code is alittle conservative, but works (the
12500d94caffSDavid Greenman 			 * intent of this code is to allocate small chunks for
12510d94caffSDavid Greenman 			 * small objects)
125226f9a767SRodney W. Grimes 			 */
125326f9a767SRodney W. Grimes 			if ((m[j]->offset == 0) && (ntoget * PAGE_SIZE > object->size)) {
125426f9a767SRodney W. Grimes 				ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE;
125526f9a767SRodney W. Grimes 			}
125626f9a767SRodney W. Grimes 	retrygetspace:
125726f9a767SRodney W. Grimes 			if (!swap_pager_full && ntoget > 1 &&
125826f9a767SRodney W. Grimes 			    swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
125926f9a767SRodney W. Grimes 
126026f9a767SRodney W. Grimes 				for (i = 0; i < ntoget; i++) {
126126f9a767SRodney W. Grimes 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
126226f9a767SRodney W. Grimes 					swb[j]->swb_valid = 0;
126326f9a767SRodney W. Grimes 				}
126426f9a767SRodney W. Grimes 
126526f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
126626f9a767SRodney W. Grimes 			} else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
126726f9a767SRodney W. Grimes 				&swb[j]->swb_block[off])) {
126826f9a767SRodney W. Grimes 				/*
12690d94caffSDavid Greenman 				 * if the allocation has failed, we try to
12700d94caffSDavid Greenman 				 * reclaim space and retry.
127126f9a767SRodney W. Grimes 				 */
127226f9a767SRodney W. Grimes 				if (++tries == 1) {
127326f9a767SRodney W. Grimes 					swap_pager_reclaim();
127426f9a767SRodney W. Grimes 					goto retrygetspace;
127526f9a767SRodney W. Grimes 				}
127626f9a767SRodney W. Grimes 				rtvals[j] = VM_PAGER_AGAIN;
127726f9a767SRodney W. Grimes 				failed = 1;
127824ea4a96SDavid Greenman 				swap_pager_full = 1;
127926f9a767SRodney W. Grimes 			} else {
128026f9a767SRodney W. Grimes 				reqaddr[j] = swb[j]->swb_block[off];
128126f9a767SRodney W. Grimes 				swb[j]->swb_valid &= ~(1 << off);
1282df8bae1dSRodney W. Grimes 			}
1283df8bae1dSRodney W. Grimes 			splx(s);
128426f9a767SRodney W. Grimes 		}
128526f9a767SRodney W. Grimes 	}
128626f9a767SRodney W. Grimes 
128726f9a767SRodney W. Grimes 	/*
128826f9a767SRodney W. Grimes 	 * search forwards for the last contiguous page to transfer
128926f9a767SRodney W. Grimes 	 */
129026f9a767SRodney W. Grimes 	failed = 0;
129126f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
129226f9a767SRodney W. Grimes 		if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
129326f9a767SRodney W. Grimes 		    (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
129426f9a767SRodney W. Grimes 		    (rtvals[i] != VM_PAGER_OK)) {
129526f9a767SRodney W. Grimes 			failed = 1;
129626f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK)
129726f9a767SRodney W. Grimes 				rtvals[i] = VM_PAGER_AGAIN;
129826f9a767SRodney W. Grimes 		}
129926f9a767SRodney W. Grimes 	}
130026f9a767SRodney W. Grimes 
130126f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
130226f9a767SRodney W. Grimes 		if (rtvals[i] != VM_PAGER_OK) {
130326f9a767SRodney W. Grimes 			if (swb[i])
130426f9a767SRodney W. Grimes 				--swb[i]->swb_locked;
130526f9a767SRodney W. Grimes 		}
130626f9a767SRodney W. Grimes 	}
130726f9a767SRodney W. Grimes 
130826f9a767SRodney W. Grimes 	for (i = 0; i < count; i++)
130926f9a767SRodney W. Grimes 		if (rtvals[i] != VM_PAGER_OK)
131026f9a767SRodney W. Grimes 			break;
131126f9a767SRodney W. Grimes 
131226f9a767SRodney W. Grimes 	if (i == 0) {
131326f9a767SRodney W. Grimes 		return VM_PAGER_AGAIN;
131426f9a767SRodney W. Grimes 	}
131526f9a767SRodney W. Grimes 	count = i;
131626f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
131726f9a767SRodney W. Grimes 		if (reqaddr[i] == SWB_EMPTY)
131826f9a767SRodney W. Grimes 			printf("I/O to empty block????\n");
131926f9a767SRodney W. Grimes 	}
132026f9a767SRodney W. Grimes 
132126f9a767SRodney W. Grimes 	/*
13220d94caffSDavid Greenman 	 * */
132326f9a767SRodney W. Grimes 
132426f9a767SRodney W. Grimes 	/*
13250d94caffSDavid Greenman 	 * For synchronous writes, we clean up all completed async pageouts.
132626f9a767SRodney W. Grimes 	 */
132726f9a767SRodney W. Grimes 	if ((flags & B_ASYNC) == 0) {
132826f9a767SRodney W. Grimes 		swap_pager_clean();
132926f9a767SRodney W. Grimes 	}
133026f9a767SRodney W. Grimes 	kva = 0;
133126f9a767SRodney W. Grimes 
133226f9a767SRodney W. Grimes 	/*
13330d94caffSDavid Greenman 	 * we allocate a new kva for transfers > 1 page but for transfers == 1
13340d94caffSDavid Greenman 	 * page, the swap_pager_free list contains entries that have
13350d94caffSDavid Greenman 	 * pre-allocated kva's (for efficiency). NOTE -- we do not use the
13360d94caffSDavid Greenman 	 * physical buffer pool or the preallocated associated kva's because
13370d94caffSDavid Greenman 	 * of the potential for deadlock.  This is very subtile -- but
13380d94caffSDavid Greenman 	 * deadlocks or resource contention must be avoided on pageouts -- or
13390d94caffSDavid Greenman 	 * your system will sleep (forever) !!!
134026f9a767SRodney W. Grimes 	 */
1341fff93ab6SDavid Greenman /*
134226f9a767SRodney W. Grimes 	if ( count > 1) {
134326f9a767SRodney W. Grimes 		kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
134426f9a767SRodney W. Grimes 		if( !kva) {
134526f9a767SRodney W. Grimes 			for (i = 0; i < count; i++) {
134626f9a767SRodney W. Grimes 				if( swb[i])
134726f9a767SRodney W. Grimes 					--swb[i]->swb_locked;
134826f9a767SRodney W. Grimes 				rtvals[i] = VM_PAGER_AGAIN;
134926f9a767SRodney W. Grimes 			}
135026f9a767SRodney W. Grimes 			return VM_PAGER_AGAIN;
135126f9a767SRodney W. Grimes 		}
135226f9a767SRodney W. Grimes 	}
1353fff93ab6SDavid Greenman */
135426f9a767SRodney W. Grimes 
135526f9a767SRodney W. Grimes 	/*
135626f9a767SRodney W. Grimes 	 * get a swap pager clean data structure, block until we get it
135726f9a767SRodney W. Grimes 	 */
13580d94caffSDavid Greenman 	if (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
135926f9a767SRodney W. Grimes 		s = splbio();
13600d94caffSDavid Greenman 		if (curproc == pageproc) {
136126f9a767SRodney W. Grimes 			(void) swap_pager_clean();
13620d94caffSDavid Greenman /*
13630d94caffSDavid Greenman 			splx(s);
13640d94caffSDavid Greenman 			return VM_PAGER_AGAIN;
13650d94caffSDavid Greenman */
13660d94caffSDavid Greenman 		} else
136726f9a767SRodney W. Grimes 			wakeup((caddr_t) & vm_pages_needed);
13680d94caffSDavid Greenman 		while (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) {
13690d94caffSDavid Greenman 			if (curproc == pageproc &&
13700d94caffSDavid Greenman 			    (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)
13710d94caffSDavid Greenman 				wakeup((caddr_t) & cnt.v_free_count);
13720d94caffSDavid Greenman 
137326f9a767SRodney W. Grimes 			swap_pager_needflags |= SWAP_FREE_NEEDED;
137426f9a767SRodney W. Grimes 			tsleep((caddr_t) & swap_pager_free,
137526f9a767SRodney W. Grimes 			    PVM, "swpfre", 0);
137626f9a767SRodney W. Grimes 			if (curproc == pageproc)
137726f9a767SRodney W. Grimes 				(void) swap_pager_clean();
137826f9a767SRodney W. Grimes 			else
137926f9a767SRodney W. Grimes 				wakeup((caddr_t) & vm_pages_needed);
138026f9a767SRodney W. Grimes 		}
138126f9a767SRodney W. Grimes 		splx(s);
138226f9a767SRodney W. Grimes 	}
138326f9a767SRodney W. Grimes 	spc = swap_pager_free.tqh_first;
138426f9a767SRodney W. Grimes 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1385fff93ab6SDavid Greenman 
138626f9a767SRodney W. Grimes 	kva = spc->spc_kva;
138726f9a767SRodney W. Grimes 
138826f9a767SRodney W. Grimes 	/*
138926f9a767SRodney W. Grimes 	 * map our page(s) into kva for I/O
139026f9a767SRodney W. Grimes 	 */
139116f62314SDavid Greenman 	pmap_qenter(kva, m, count);
139226f9a767SRodney W. Grimes 
139326f9a767SRodney W. Grimes 	/*
139426f9a767SRodney W. Grimes 	 * get the base I/O offset into the swap file
139526f9a767SRodney W. Grimes 	 */
139626f9a767SRodney W. Grimes 	for (i = 0; i < count; i++) {
139726f9a767SRodney W. Grimes 		foff = m[i]->offset + paging_offset;
139826f9a767SRodney W. Grimes 		off = swap_pager_block_offset(swp, foff);
139926f9a767SRodney W. Grimes 		/*
140026f9a767SRodney W. Grimes 		 * set the valid bit
140126f9a767SRodney W. Grimes 		 */
140226f9a767SRodney W. Grimes 		swb[i]->swb_valid |= (1 << off);
140326f9a767SRodney W. Grimes 		/*
140426f9a767SRodney W. Grimes 		 * and unlock the data structure
140526f9a767SRodney W. Grimes 		 */
140626f9a767SRodney W. Grimes 		--swb[i]->swb_locked;
140726f9a767SRodney W. Grimes 	}
140826f9a767SRodney W. Grimes 
140926f9a767SRodney W. Grimes 	/*
141026f9a767SRodney W. Grimes 	 * Get a swap buffer header and perform the IO
141126f9a767SRodney W. Grimes 	 */
141226f9a767SRodney W. Grimes 	bp = spc->spc_bp;
141326f9a767SRodney W. Grimes 	bzero(bp, sizeof *bp);
141426f9a767SRodney W. Grimes 	bp->b_spc = spc;
14157609ab12SDavid Greenman 	bp->b_vnbufs.le_next = NOLIST;
141626f9a767SRodney W. Grimes 
141726f9a767SRodney W. Grimes 	bp->b_flags = B_BUSY;
141826f9a767SRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
141926f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1420a481f200SDavid Greenman 	if (bp->b_rcred != NOCRED)
142126f9a767SRodney W. Grimes 		crhold(bp->b_rcred);
1422a481f200SDavid Greenman 	if (bp->b_wcred != NOCRED)
142326f9a767SRodney W. Grimes 		crhold(bp->b_wcred);
1424a481f200SDavid Greenman 	bp->b_data = (caddr_t) kva;
142526f9a767SRodney W. Grimes 	bp->b_blkno = reqaddr[0];
14260d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
142716f62314SDavid Greenman 
142826f9a767SRodney W. Grimes 	bp->b_bcount = PAGE_SIZE * count;
142926f9a767SRodney W. Grimes 	bp->b_bufsize = PAGE_SIZE * count;
143026f9a767SRodney W. Grimes 	swapdev_vp->v_numoutput++;
143126f9a767SRodney W. Grimes 
143226f9a767SRodney W. Grimes 	/*
14330d94caffSDavid Greenman 	 * If this is an async write we set up additional buffer fields and
14340d94caffSDavid Greenman 	 * place a "cleaning" entry on the inuse queue.
143526f9a767SRodney W. Grimes 	 */
14367609ab12SDavid Greenman 	s = splbio();
143726f9a767SRodney W. Grimes 	if (flags & B_ASYNC) {
143826f9a767SRodney W. Grimes 		spc->spc_flags = 0;
143926f9a767SRodney W. Grimes 		spc->spc_swp = swp;
144026f9a767SRodney W. Grimes 		for (i = 0; i < count; i++)
144126f9a767SRodney W. Grimes 			spc->spc_m[i] = m[i];
144226f9a767SRodney W. Grimes 		spc->spc_count = count;
144326f9a767SRodney W. Grimes 		/*
144426f9a767SRodney W. Grimes 		 * the completion routine for async writes
144526f9a767SRodney W. Grimes 		 */
144626f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
144726f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone;
144826f9a767SRodney W. Grimes 		bp->b_dirtyoff = 0;
144926f9a767SRodney W. Grimes 		bp->b_dirtyend = bp->b_bcount;
145026f9a767SRodney W. Grimes 		swp->sw_poip++;
145126f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
145226f9a767SRodney W. Grimes 	} else {
145326f9a767SRodney W. Grimes 		swp->sw_poip++;
145426f9a767SRodney W. Grimes 		bp->b_flags |= B_CALL;
145526f9a767SRodney W. Grimes 		bp->b_iodone = swap_pager_iodone1;
145626f9a767SRodney W. Grimes 	}
1457976e77fcSDavid Greenman 
1458976e77fcSDavid Greenman 	cnt.v_swapout++;
1459976e77fcSDavid Greenman 	cnt.v_swappgsout += count;
146026f9a767SRodney W. Grimes 	/*
146126f9a767SRodney W. Grimes 	 * perform the I/O
146226f9a767SRodney W. Grimes 	 */
146326f9a767SRodney W. Grimes 	VOP_STRATEGY(bp);
146426f9a767SRodney W. Grimes 	if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) {
146526f9a767SRodney W. Grimes 		if ((bp->b_flags & B_DONE) == B_DONE) {
146626f9a767SRodney W. Grimes 			swap_pager_clean();
146726f9a767SRodney W. Grimes 		}
146826f9a767SRodney W. Grimes 		splx(s);
146926f9a767SRodney W. Grimes 		for (i = 0; i < count; i++) {
147026f9a767SRodney W. Grimes 			rtvals[i] = VM_PAGER_PEND;
147126f9a767SRodney W. Grimes 		}
147226f9a767SRodney W. Grimes 		return VM_PAGER_PEND;
147326f9a767SRodney W. Grimes 	}
147426f9a767SRodney W. Grimes 	/*
147526f9a767SRodney W. Grimes 	 * wait for the sync I/O to complete
147626f9a767SRodney W. Grimes 	 */
147726f9a767SRodney W. Grimes 	while ((bp->b_flags & B_DONE) == 0) {
147826f9a767SRodney W. Grimes 		tsleep((caddr_t) bp, PVM, "swwrt", 0);
147926f9a767SRodney W. Grimes 	}
14801b119d9dSDavid Greenman 	if (bp->b_flags & B_ERROR) {
14811b119d9dSDavid Greenman 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
14821b119d9dSDavid Greenman 		    bp->b_blkno, bp->b_bcount, bp->b_error);
1483a83c285cSDavid Greenman 		rv = VM_PAGER_ERROR;
14841b119d9dSDavid Greenman 	} else {
14851b119d9dSDavid Greenman 		rv = VM_PAGER_OK;
14861b119d9dSDavid Greenman 	}
148726f9a767SRodney W. Grimes 
148826f9a767SRodney W. Grimes 	--swp->sw_poip;
148926f9a767SRodney W. Grimes 	if (swp->sw_poip == 0)
149026f9a767SRodney W. Grimes 		wakeup((caddr_t) swp);
149126f9a767SRodney W. Grimes 
149226f9a767SRodney W. Grimes 	if (bp->b_vp)
14930d94caffSDavid Greenman 		pbrelvp(bp);
14940d94caffSDavid Greenman 	if (bp->b_flags & B_WANTED)
14950d94caffSDavid Greenman 		wakeup((caddr_t) bp);
149626f9a767SRodney W. Grimes 
149726f9a767SRodney W. Grimes 	splx(s);
149826f9a767SRodney W. Grimes 
149926f9a767SRodney W. Grimes 	/*
150026f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
150126f9a767SRodney W. Grimes 	 */
150216f62314SDavid Greenman 	pmap_qremove(kva, count);
150326f9a767SRodney W. Grimes 
150426f9a767SRodney W. Grimes 	/*
15050d94caffSDavid Greenman 	 * if we have written the page, then indicate that the page is clean.
150626f9a767SRodney W. Grimes 	 */
150726f9a767SRodney W. Grimes 	if (rv == VM_PAGER_OK) {
150826f9a767SRodney W. Grimes 		for (i = 0; i < count; i++) {
150926f9a767SRodney W. Grimes 			if (rtvals[i] == VM_PAGER_OK) {
151026f9a767SRodney W. Grimes 				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
15110d94caffSDavid Greenman 				m[i]->dirty = 0;
151226f9a767SRodney W. Grimes 				/*
15130d94caffSDavid Greenman 				 * optimization, if a page has been read
15140d94caffSDavid Greenman 				 * during the pageout process, we activate it.
151526f9a767SRodney W. Grimes 				 */
151626f9a767SRodney W. Grimes 				if ((m[i]->flags & PG_ACTIVE) == 0 &&
15170d94caffSDavid Greenman 				    ((m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))))
151826f9a767SRodney W. Grimes 					vm_page_activate(m[i]);
151926f9a767SRodney W. Grimes 			}
152026f9a767SRodney W. Grimes 		}
152126f9a767SRodney W. Grimes 	} else {
152226f9a767SRodney W. Grimes 		for (i = 0; i < count; i++) {
152326f9a767SRodney W. Grimes 			rtvals[i] = rv;
152426f9a767SRodney W. Grimes 		}
152526f9a767SRodney W. Grimes 	}
152626f9a767SRodney W. Grimes 
152726f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
152826f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
152926f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
153026f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
153126f9a767SRodney W. Grimes 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
153226f9a767SRodney W. Grimes 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
153326f9a767SRodney W. Grimes 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
153426f9a767SRodney W. Grimes 		wakeup((caddr_t) & swap_pager_free);
153526f9a767SRodney W. Grimes 	}
153626f9a767SRodney W. Grimes 	return (rv);
153726f9a767SRodney W. Grimes }
153826f9a767SRodney W. Grimes 
153926f9a767SRodney W. Grimes boolean_t
154026f9a767SRodney W. Grimes swap_pager_clean()
154126f9a767SRodney W. Grimes {
154226f9a767SRodney W. Grimes 	register swp_clean_t spc, tspc;
154326f9a767SRodney W. Grimes 	register int s;
154426f9a767SRodney W. Grimes 
154526f9a767SRodney W. Grimes 	tspc = NULL;
154626f9a767SRodney W. Grimes 	if (swap_pager_done.tqh_first == NULL)
154726f9a767SRodney W. Grimes 		return FALSE;
154826f9a767SRodney W. Grimes 	for (;;) {
154926f9a767SRodney W. Grimes 		s = splbio();
155026f9a767SRodney W. Grimes 		/*
15510d94caffSDavid Greenman 		 * Look up and removal from done list must be done at splbio()
15520d94caffSDavid Greenman 		 * to avoid conflicts with swap_pager_iodone.
155326f9a767SRodney W. Grimes 		 */
155405f0fdd2SPoul-Henning Kamp 		while ((spc = swap_pager_done.tqh_first) != 0) {
1555fff93ab6SDavid Greenman 			pmap_qremove(spc->spc_kva, spc->spc_count);
155626f9a767SRodney W. Grimes 			swap_pager_finish(spc);
155726f9a767SRodney W. Grimes 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
155826f9a767SRodney W. Grimes 			goto doclean;
155926f9a767SRodney W. Grimes 		}
1560df8bae1dSRodney W. Grimes 
1561df8bae1dSRodney W. Grimes 		/*
1562df8bae1dSRodney W. Grimes 		 * No operations done, thats all we can do for now.
1563df8bae1dSRodney W. Grimes 		 */
156426f9a767SRodney W. Grimes 
156526f9a767SRodney W. Grimes 		splx(s);
1566df8bae1dSRodney W. Grimes 		break;
1567df8bae1dSRodney W. Grimes 
1568df8bae1dSRodney W. Grimes 		/*
15690d94caffSDavid Greenman 		 * The desired page was found to be busy earlier in the scan
15700d94caffSDavid Greenman 		 * but has since completed.
1571df8bae1dSRodney W. Grimes 		 */
157226f9a767SRodney W. Grimes doclean:
157326f9a767SRodney W. Grimes 		if (tspc && tspc == spc) {
157426f9a767SRodney W. Grimes 			tspc = NULL;
157526f9a767SRodney W. Grimes 		}
157626f9a767SRodney W. Grimes 		spc->spc_flags = 0;
157726f9a767SRodney W. Grimes 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
157826f9a767SRodney W. Grimes 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
157926f9a767SRodney W. Grimes 			swap_pager_needflags &= ~SWAP_FREE_NEEDED;
158026f9a767SRodney W. Grimes 			wakeup((caddr_t) & swap_pager_free);
158126f9a767SRodney W. Grimes 		}
158226f9a767SRodney W. Grimes 		++cleandone;
158326f9a767SRodney W. Grimes 		splx(s);
158426f9a767SRodney W. Grimes 	}
158526f9a767SRodney W. Grimes 
158626f9a767SRodney W. Grimes 	return (tspc ? TRUE : FALSE);
158726f9a767SRodney W. Grimes }
158826f9a767SRodney W. Grimes 
158926f9a767SRodney W. Grimes void
159026f9a767SRodney W. Grimes swap_pager_finish(spc)
159126f9a767SRodney W. Grimes 	register swp_clean_t spc;
159226f9a767SRodney W. Grimes {
159326f9a767SRodney W. Grimes 	vm_object_t object = spc->spc_m[0]->object;
159426f9a767SRodney W. Grimes 	int i;
159526f9a767SRodney W. Grimes 
159626f9a767SRodney W. Grimes 	if ((object->paging_in_progress -= spc->spc_count) == 0)
159726f9a767SRodney W. Grimes 		thread_wakeup((int) object);
1598df8bae1dSRodney W. Grimes 
1599df8bae1dSRodney W. Grimes 	/*
16000d94caffSDavid Greenman 	 * If no error mark as clean and inform the pmap system. If error,
16010d94caffSDavid Greenman 	 * mark as dirty so we will try again. (XXX could get stuck doing
16020d94caffSDavid Greenman 	 * this, should give up after awhile)
1603df8bae1dSRodney W. Grimes 	 */
1604df8bae1dSRodney W. Grimes 	if (spc->spc_flags & SPC_ERROR) {
160526f9a767SRodney W. Grimes 		for (i = 0; i < spc->spc_count; i++) {
1606a83c285cSDavid Greenman 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
160705f0fdd2SPoul-Henning Kamp 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
160826f9a767SRodney W. Grimes 		}
1609df8bae1dSRodney W. Grimes 	} else {
161026f9a767SRodney W. Grimes 		for (i = 0; i < spc->spc_count; i++) {
161126f9a767SRodney W. Grimes 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
16120d94caffSDavid Greenman 			spc->spc_m[i]->dirty = 0;
16130d94caffSDavid Greenman 			if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 &&
16140d94caffSDavid Greenman 			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
16150d94caffSDavid Greenman 				vm_page_activate(spc->spc_m[i]);
1616df8bae1dSRodney W. Grimes 		}
1617df8bae1dSRodney W. Grimes 	}
1618df8bae1dSRodney W. Grimes 
161926f9a767SRodney W. Grimes 
162026f9a767SRodney W. Grimes 	for (i = 0; i < spc->spc_count; i++) {
1621df8bae1dSRodney W. Grimes 		/*
16220d94caffSDavid Greenman 		 * we wakeup any processes that are waiting on these pages.
1623df8bae1dSRodney W. Grimes 		 */
162426f9a767SRodney W. Grimes 		PAGE_WAKEUP(spc->spc_m[i]);
1625df8bae1dSRodney W. Grimes 	}
162626f9a767SRodney W. Grimes 	nswiodone -= spc->spc_count;
1627df8bae1dSRodney W. Grimes 
1628df8bae1dSRodney W. Grimes 	return;
162926f9a767SRodney W. Grimes }
1630df8bae1dSRodney W. Grimes 
163126f9a767SRodney W. Grimes /*
163226f9a767SRodney W. Grimes  * swap_pager_iodone
163326f9a767SRodney W. Grimes  */
163426f9a767SRodney W. Grimes void
1635df8bae1dSRodney W. Grimes swap_pager_iodone(bp)
1636df8bae1dSRodney W. Grimes 	register struct buf *bp;
1637df8bae1dSRodney W. Grimes {
1638df8bae1dSRodney W. Grimes 	register swp_clean_t spc;
1639df8bae1dSRodney W. Grimes 	int s;
1640df8bae1dSRodney W. Grimes 
1641df8bae1dSRodney W. Grimes 	s = splbio();
164226f9a767SRodney W. Grimes 	spc = (swp_clean_t) bp->b_spc;
164326f9a767SRodney W. Grimes 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
164426f9a767SRodney W. Grimes 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
164526f9a767SRodney W. Grimes 	if (bp->b_flags & B_ERROR) {
1646df8bae1dSRodney W. Grimes 		spc->spc_flags |= SPC_ERROR;
16471b119d9dSDavid Greenman 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d",
16481b119d9dSDavid Greenman 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
164905f0fdd2SPoul-Henning Kamp 		    bp->b_error, (u_long) bp->b_blkno, bp->b_bcount);
1650df8bae1dSRodney W. Grimes 	}
165126f9a767SRodney W. Grimes /*
165226f9a767SRodney W. Grimes 	if ((bp->b_flags & B_READ) == 0)
165326f9a767SRodney W. Grimes 		vwakeup(bp);
165426f9a767SRodney W. Grimes */
165526f9a767SRodney W. Grimes 
16560d94caffSDavid Greenman 	if (bp->b_vp)
16570d94caffSDavid Greenman 		pbrelvp(bp);
16580d94caffSDavid Greenman 
16590d94caffSDavid Greenman 	if (bp->b_flags & B_WANTED)
16600d94caffSDavid Greenman 		wakeup((caddr_t) bp);
16610d94caffSDavid Greenman 
166226f9a767SRodney W. Grimes 	if (bp->b_rcred != NOCRED)
166326f9a767SRodney W. Grimes 		crfree(bp->b_rcred);
166426f9a767SRodney W. Grimes 	if (bp->b_wcred != NOCRED)
166526f9a767SRodney W. Grimes 		crfree(bp->b_wcred);
166626f9a767SRodney W. Grimes 
166726f9a767SRodney W. Grimes 	nswiodone += spc->spc_count;
166826f9a767SRodney W. Grimes 	if (--spc->spc_swp->sw_poip == 0) {
166926f9a767SRodney W. Grimes 		wakeup((caddr_t) spc->spc_swp);
167026f9a767SRodney W. Grimes 	}
167126f9a767SRodney W. Grimes 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
167226f9a767SRodney W. Grimes 	    swap_pager_inuse.tqh_first == 0) {
167326f9a767SRodney W. Grimes 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
167426f9a767SRodney W. Grimes 		wakeup((caddr_t) & swap_pager_free);
167526f9a767SRodney W. Grimes 		wakeup((caddr_t) & vm_pages_needed);
167626f9a767SRodney W. Grimes 	}
167726f9a767SRodney W. Grimes 	if (vm_pageout_pages_needed) {
167826f9a767SRodney W. Grimes 		wakeup((caddr_t) & vm_pageout_pages_needed);
167926f9a767SRodney W. Grimes 	}
168026f9a767SRodney W. Grimes 	if ((swap_pager_inuse.tqh_first == NULL) ||
16810d94caffSDavid Greenman 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
16820d94caffSDavid Greenman 		nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
168326f9a767SRodney W. Grimes 		wakeup((caddr_t) & vm_pages_needed);
168426f9a767SRodney W. Grimes 	}
168526f9a767SRodney W. Grimes 	splx(s);
168626f9a767SRodney W. Grimes }
168726f9a767SRodney W. Grimes 
168826f9a767SRodney W. Grimes /*
168926f9a767SRodney W. Grimes  * return true if any swap control structures can be allocated
169026f9a767SRodney W. Grimes  */
169126f9a767SRodney W. Grimes int
16920d94caffSDavid Greenman swap_pager_ready()
16930d94caffSDavid Greenman {
169426f9a767SRodney W. Grimes 	if (swap_pager_free.tqh_first)
169526f9a767SRodney W. Grimes 		return 1;
169626f9a767SRodney W. Grimes 	else
169726f9a767SRodney W. Grimes 		return 0;
169826f9a767SRodney W. Grimes }
1699