xref: /freebsd/sys/vm/swap_pager.c (revision 20d3034f39604f2986ad9468ed686b0b73702a02)
1df8bae1dSRodney W. Grimes /*
21c7c3c6aSMatthew Dillon  * Copyright (c) 1998 Matthew Dillon,
326f9a767SRodney W. Grimes  * Copyright (c) 1994 John S. Dyson
4df8bae1dSRodney W. Grimes  * Copyright (c) 1990 University of Utah.
5df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
6df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
7df8bae1dSRodney W. Grimes  *
8df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
9df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
10df8bae1dSRodney W. Grimes  * Science Department.
11df8bae1dSRodney W. Grimes  *
12df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
13df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
14df8bae1dSRodney W. Grimes  * are met:
15df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
17df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
18df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
19df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
20df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
21df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
22df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
23df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
24df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
25df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
26df8bae1dSRodney W. Grimes  *    without specific prior written permission.
27df8bae1dSRodney W. Grimes  *
28df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
39df8bae1dSRodney W. Grimes  *
401c7c3c6aSMatthew Dillon  *				New Swap System
411c7c3c6aSMatthew Dillon  *				Matthew Dillon
421c7c3c6aSMatthew Dillon  *
431c7c3c6aSMatthew Dillon  * Radix Bitmap 'blists'.
441c7c3c6aSMatthew Dillon  *
451c7c3c6aSMatthew Dillon  *	- The new swapper uses the new radix bitmap code.  This should scale
461c7c3c6aSMatthew Dillon  *	  to arbitrarily small or arbitrarily large swap spaces and an almost
471c7c3c6aSMatthew Dillon  *	  arbitrary degree of fragmentation.
481c7c3c6aSMatthew Dillon  *
491c7c3c6aSMatthew Dillon  * Features:
501c7c3c6aSMatthew Dillon  *
511c7c3c6aSMatthew Dillon  *	- on the fly reallocation of swap during putpages.  The new system
521c7c3c6aSMatthew Dillon  *	  does not try to keep previously allocated swap blocks for dirty
531c7c3c6aSMatthew Dillon  *	  pages.
541c7c3c6aSMatthew Dillon  *
551c7c3c6aSMatthew Dillon  *	- on the fly deallocation of swap
561c7c3c6aSMatthew Dillon  *
571c7c3c6aSMatthew Dillon  *	- No more garbage collection required.  Unnecessarily allocated swap
581c7c3c6aSMatthew Dillon  *	  blocks only exist for dirty vm_page_t's now and these are already
591c7c3c6aSMatthew Dillon  *	  cycled (in a high-load system) by the pager.  We also do on-the-fly
601c7c3c6aSMatthew Dillon  *	  removal of invalidated swap blocks when a page is destroyed
611c7c3c6aSMatthew Dillon  *	  or renamed.
621c7c3c6aSMatthew Dillon  *
63df8bae1dSRodney W. Grimes  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
64df8bae1dSRodney W. Grimes  *
65df8bae1dSRodney W. Grimes  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
661c7c3c6aSMatthew Dillon  *
6720d3034fSMatthew Dillon  * $Id: swap_pager.c,v 1.114 1999/02/18 19:57:33 dillon Exp $
68df8bae1dSRodney W. Grimes  */
69df8bae1dSRodney W. Grimes 
70df8bae1dSRodney W. Grimes #include <sys/param.h>
71df8bae1dSRodney W. Grimes #include <sys/systm.h>
7264abb5a5SDavid Greenman #include <sys/kernel.h>
73df8bae1dSRodney W. Grimes #include <sys/proc.h>
74df8bae1dSRodney W. Grimes #include <sys/buf.h>
75df8bae1dSRodney W. Grimes #include <sys/vnode.h>
76df8bae1dSRodney W. Grimes #include <sys/malloc.h>
77efeaf95aSDavid Greenman #include <sys/vmmeter.h>
78327f4e83SMatthew Dillon #include <sys/sysctl.h>
791c7c3c6aSMatthew Dillon #include <sys/blist.h>
801c7c3c6aSMatthew Dillon #include <sys/lock.h>
81df8bae1dSRodney W. Grimes 
82e47ed70bSJohn Dyson #ifndef MAX_PAGEOUT_CLUSTER
83ffc82b0aSJohn Dyson #define MAX_PAGEOUT_CLUSTER 16
84e47ed70bSJohn Dyson #endif
85e47ed70bSJohn Dyson 
86e47ed70bSJohn Dyson #define SWB_NPAGES	MAX_PAGEOUT_CLUSTER
87e47ed70bSJohn Dyson 
881c7c3c6aSMatthew Dillon #include "opt_swap.h"
89df8bae1dSRodney W. Grimes #include <vm/vm.h>
90efeaf95aSDavid Greenman #include <vm/vm_prot.h>
91efeaf95aSDavid Greenman #include <vm/vm_object.h>
92df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
93efeaf95aSDavid Greenman #include <vm/vm_pager.h>
94df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h>
95df8bae1dSRodney W. Grimes #include <vm/swap_pager.h>
96efeaf95aSDavid Greenman #include <vm/vm_extern.h>
971c7c3c6aSMatthew Dillon #include <vm/vm_zone.h>
98df8bae1dSRodney W. Grimes 
991c7c3c6aSMatthew Dillon #define SWM_FREE	0x02	/* free, period			*/
1001c7c3c6aSMatthew Dillon #define SWM_POP		0x04	/* pop out			*/
10126f9a767SRodney W. Grimes 
10224a1cce3SDavid Greenman /*
1031c7c3c6aSMatthew Dillon  * vm_swap_size is in page-sized chunks now.  It was DEV_BSIZE'd chunks
1041c7c3c6aSMatthew Dillon  * in the old system.
10524a1cce3SDavid Greenman  */
1061c7c3c6aSMatthew Dillon 
1071c7c3c6aSMatthew Dillon extern int vm_swap_size;	/* number of free swap blocks, in pages */
1081c7c3c6aSMatthew Dillon 
10920d3034fSMatthew Dillon int swap_pager_full;		/* swap space exhaustion (task killing) */
11020d3034fSMatthew Dillon static int swap_pager_almost_full; /* swap space exhaustion (w/ hysteresis)*/
1111c7c3c6aSMatthew Dillon static int nsw_rcount;		/* free read buffers			*/
112327f4e83SMatthew Dillon static int nsw_wcount_sync;	/* limit write buffers / synchronous	*/
113327f4e83SMatthew Dillon static int nsw_wcount_async;	/* limit write buffers / asynchronous	*/
114327f4e83SMatthew Dillon static int nsw_wcount_async_max;/* assigned maximum			*/
115327f4e83SMatthew Dillon static int nsw_cluster_max;	/* maximum VOP I/O allowed		*/
1161c7c3c6aSMatthew Dillon static int sw_alloc_interlock;	/* swap pager allocation interlock	*/
1171c7c3c6aSMatthew Dillon 
1181c7c3c6aSMatthew Dillon struct blist *swapblist;
1191c7c3c6aSMatthew Dillon static struct swblock **swhash;
1201c7c3c6aSMatthew Dillon static int swhash_mask;
121327f4e83SMatthew Dillon static int swap_async_max = 4;	/* maximum in-progress async I/O's	*/
122327f4e83SMatthew Dillon static int swap_cluster_max;	/* maximum VOP I/O allowed		*/
1231c7c3c6aSMatthew Dillon 
124327f4e83SMatthew Dillon #ifndef DISALLOW_SWAP_TUNE
125327f4e83SMatthew Dillon 
126327f4e83SMatthew Dillon SYSCTL_INT(_vm, OID_AUTO, swap_async_max,
127327f4e83SMatthew Dillon         CTLFLAG_RW, &swap_async_max, 0, "Maximum running async swap ops");
128327f4e83SMatthew Dillon SYSCTL_INT(_vm, OID_AUTO, swap_cluster_max,
129327f4e83SMatthew Dillon         CTLFLAG_RW, &swap_cluster_max, 0, "Maximum swap I/O cluster (pages)");
130327f4e83SMatthew Dillon 
131327f4e83SMatthew Dillon #else
132327f4e83SMatthew Dillon 
133327f4e83SMatthew Dillon SYSCTL_INT(_vm, OID_AUTO, swap_async_max,
134327f4e83SMatthew Dillon         CTLFLAG_RD, &swap_async_max, 0, "");
135327f4e83SMatthew Dillon SYSCTL_INT(_vm, OID_AUTO, swap_cluster_max,
136327f4e83SMatthew Dillon         CTLFLAG_RD, &swap_cluster_max, 0, "");
137327f4e83SMatthew Dillon 
138327f4e83SMatthew Dillon #endif
1391c7c3c6aSMatthew Dillon 
1401c7c3c6aSMatthew Dillon /*
1411c7c3c6aSMatthew Dillon  * "named" and "unnamed" anon region objects.  Try to reduce the overhead
1421c7c3c6aSMatthew Dillon  * of searching a named list by hashing it just a little.
1431c7c3c6aSMatthew Dillon  */
1441c7c3c6aSMatthew Dillon 
1451c7c3c6aSMatthew Dillon #define NOBJLISTS		8
1461c7c3c6aSMatthew Dillon 
1471c7c3c6aSMatthew Dillon #define NOBJLIST(handle)	\
1481c7c3c6aSMatthew Dillon 	(&swap_pager_object_list[((int)(long)handle >> 4) & (NOBJLISTS-1)])
1491c7c3c6aSMatthew Dillon 
1501c7c3c6aSMatthew Dillon static struct pagerlst	swap_pager_object_list[NOBJLISTS];
1511c7c3c6aSMatthew Dillon struct pagerlst		swap_pager_un_object_list;
1521c7c3c6aSMatthew Dillon vm_zone_t		swap_zone;
1531c7c3c6aSMatthew Dillon 
1541c7c3c6aSMatthew Dillon /*
1551c7c3c6aSMatthew Dillon  * pagerops for OBJT_SWAP - "swap pager".  Some ops are also global procedure
1561c7c3c6aSMatthew Dillon  * calls hooked from other parts of the VM system and do not appear here.
1571c7c3c6aSMatthew Dillon  * (see vm/swap_pager.h).
1581c7c3c6aSMatthew Dillon  */
1591c7c3c6aSMatthew Dillon 
160ff98689dSBruce Evans static vm_object_t
1616cde7a16SDavid Greenman 		swap_pager_alloc __P((void *handle, vm_ooffset_t size,
162a316d390SJohn Dyson 				      vm_prot_t prot, vm_ooffset_t offset));
163ff98689dSBruce Evans static void	swap_pager_dealloc __P((vm_object_t object));
164f708ef1bSPoul-Henning Kamp static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
165ff98689dSBruce Evans static void	swap_pager_init __P((void));
1661c7c3c6aSMatthew Dillon static void	swap_pager_unswapped __P((vm_page_t));
167f708ef1bSPoul-Henning Kamp 
168df8bae1dSRodney W. Grimes struct pagerops swappagerops = {
1691c7c3c6aSMatthew Dillon 	swap_pager_init,	/* early system initialization of pager	*/
1701c7c3c6aSMatthew Dillon 	swap_pager_alloc,	/* allocate an OBJT_SWAP object		*/
1711c7c3c6aSMatthew Dillon 	swap_pager_dealloc,	/* deallocate an OBJT_SWAP object	*/
1721c7c3c6aSMatthew Dillon 	swap_pager_getpages,	/* pagein				*/
1731c7c3c6aSMatthew Dillon 	swap_pager_putpages,	/* pageout				*/
1741c7c3c6aSMatthew Dillon 	swap_pager_haspage,	/* get backing store status for page	*/
1751c7c3c6aSMatthew Dillon 	swap_pager_unswapped	/* remove swap related to page		*/
176df8bae1dSRodney W. Grimes };
177df8bae1dSRodney W. Grimes 
1781c7c3c6aSMatthew Dillon /*
1791c7c3c6aSMatthew Dillon  * dmmax is in page-sized chunks with the new swap system.  It was
1801c7c3c6aSMatthew Dillon  * dev-bsized chunks in the old.
1811c7c3c6aSMatthew Dillon  *
1821c7c3c6aSMatthew Dillon  * swap_*() routines are externally accessible.  swp_*() routines are
1831c7c3c6aSMatthew Dillon  * internal.
1841c7c3c6aSMatthew Dillon  */
1851c7c3c6aSMatthew Dillon 
186f708ef1bSPoul-Henning Kamp int dmmax;
1871c7c3c6aSMatthew Dillon static int dmmax_mask;
18820d3034fSMatthew Dillon int nswap_lowat = 128;		/* in pages, swap_pager_almost_full warn */
18920d3034fSMatthew Dillon int nswap_hiwat = 512;		/* in pages, swap_pager_almost_full warn */
19026f9a767SRodney W. Grimes 
1911c7c3c6aSMatthew Dillon static __inline void	swp_sizecheck __P((void));
1921c7c3c6aSMatthew Dillon static void	swp_pager_sync_iodone __P((struct buf *bp));
1931c7c3c6aSMatthew Dillon static void	swp_pager_async_iodone __P((struct buf *bp));
19424a1cce3SDavid Greenman 
1951c7c3c6aSMatthew Dillon /*
1961c7c3c6aSMatthew Dillon  * Swap bitmap functions
1971c7c3c6aSMatthew Dillon  */
1981c7c3c6aSMatthew Dillon 
1991c7c3c6aSMatthew Dillon static __inline void	swp_pager_freeswapspace __P((daddr_t blk, int npages));
2001c7c3c6aSMatthew Dillon static __inline daddr_t	swp_pager_getswapspace __P((int npages));
2011c7c3c6aSMatthew Dillon 
2021c7c3c6aSMatthew Dillon /*
2031c7c3c6aSMatthew Dillon  * Metadata functions
2041c7c3c6aSMatthew Dillon  */
2051c7c3c6aSMatthew Dillon 
2061c7c3c6aSMatthew Dillon static void swp_pager_meta_build __P((vm_object_t, daddr_t, daddr_t, int));
2071c7c3c6aSMatthew Dillon static void swp_pager_meta_free __P((vm_object_t, daddr_t, daddr_t));
2081c7c3c6aSMatthew Dillon static void swp_pager_meta_free_all __P((vm_object_t));
2091c7c3c6aSMatthew Dillon static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
2101c7c3c6aSMatthew Dillon 
2111c7c3c6aSMatthew Dillon /*
2121c7c3c6aSMatthew Dillon  * SWP_SIZECHECK() -	update swap_pager_full indication
2131c7c3c6aSMatthew Dillon  *
21420d3034fSMatthew Dillon  *	update the swap_pager_almost_full indication and warn when we are
21520d3034fSMatthew Dillon  *	about to run out of swap space, using lowat/hiwat hysteresis.
21620d3034fSMatthew Dillon  *
21720d3034fSMatthew Dillon  *	Clear swap_pager_full ( task killing ) indication when lowat is met.
2181c7c3c6aSMatthew Dillon  *
2191c7c3c6aSMatthew Dillon  *	No restrictions on call
2201c7c3c6aSMatthew Dillon  *	This routine may not block.
2211c7c3c6aSMatthew Dillon  *	This routine must be called at splvm()
2221c7c3c6aSMatthew Dillon  */
223de5f6a77SJohn Dyson 
224c1087c13SBruce Evans static __inline void
2251c7c3c6aSMatthew Dillon swp_sizecheck()
2260d94caffSDavid Greenman {
2271c7c3c6aSMatthew Dillon 	if (vm_swap_size < nswap_lowat) {
22820d3034fSMatthew Dillon 		if (swap_pager_almost_full == 0) {
2291af87c92SDavid Greenman 			printf("swap_pager: out of swap space\n");
23020d3034fSMatthew Dillon 			swap_pager_almost_full = 1;
2312b0d37a4SMatthew Dillon 		}
23220d3034fSMatthew Dillon 	} else {
23326f9a767SRodney W. Grimes 		swap_pager_full = 0;
23420d3034fSMatthew Dillon 		if (vm_swap_size > nswap_hiwat)
23520d3034fSMatthew Dillon 			swap_pager_almost_full = 0;
23626f9a767SRodney W. Grimes 	}
2371c7c3c6aSMatthew Dillon }
2381c7c3c6aSMatthew Dillon 
2391c7c3c6aSMatthew Dillon /*
2401c7c3c6aSMatthew Dillon  * SWAP_PAGER_INIT() -	initialize the swap pager!
2411c7c3c6aSMatthew Dillon  *
2421c7c3c6aSMatthew Dillon  *	Expected to be started from system init.  NOTE:  This code is run
2431c7c3c6aSMatthew Dillon  *	before much else so be careful what you depend on.  Most of the VM
2441c7c3c6aSMatthew Dillon  *	system has yet to be initialized at this point.
2451c7c3c6aSMatthew Dillon  */
24626f9a767SRodney W. Grimes 
247f5a12711SPoul-Henning Kamp static void
248df8bae1dSRodney W. Grimes swap_pager_init()
249df8bae1dSRodney W. Grimes {
2501c7c3c6aSMatthew Dillon 	/*
2511c7c3c6aSMatthew Dillon 	 * Initialize object lists
2521c7c3c6aSMatthew Dillon 	 */
2531c7c3c6aSMatthew Dillon 	int i;
2541c7c3c6aSMatthew Dillon 
2551c7c3c6aSMatthew Dillon 	for (i = 0; i < NOBJLISTS; ++i)
2561c7c3c6aSMatthew Dillon 		TAILQ_INIT(&swap_pager_object_list[i]);
25724a1cce3SDavid Greenman 	TAILQ_INIT(&swap_pager_un_object_list);
258df8bae1dSRodney W. Grimes 
259df8bae1dSRodney W. Grimes 	/*
2601c7c3c6aSMatthew Dillon 	 * Device Stripe, in PAGE_SIZE'd blocks
261df8bae1dSRodney W. Grimes 	 */
2621c7c3c6aSMatthew Dillon 
2631c7c3c6aSMatthew Dillon 	dmmax = SWB_NPAGES * 2;
2641c7c3c6aSMatthew Dillon 	dmmax_mask = ~(dmmax - 1);
2651c7c3c6aSMatthew Dillon }
26626f9a767SRodney W. Grimes 
267df8bae1dSRodney W. Grimes /*
2681c7c3c6aSMatthew Dillon  * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process
2691c7c3c6aSMatthew Dillon  *
2701c7c3c6aSMatthew Dillon  *	Expected to be started from pageout process once, prior to entering
2711c7c3c6aSMatthew Dillon  *	its main loop.
272df8bae1dSRodney W. Grimes  */
273df8bae1dSRodney W. Grimes 
27424a1cce3SDavid Greenman void
27524a1cce3SDavid Greenman swap_pager_swap_init()
276df8bae1dSRodney W. Grimes {
2771c7c3c6aSMatthew Dillon 	int n;
2780d94caffSDavid Greenman 
27926f9a767SRodney W. Grimes 	/*
2801c7c3c6aSMatthew Dillon 	 * Number of in-transit swap bp operations.  Don't
2811c7c3c6aSMatthew Dillon 	 * exhaust the pbufs completely.  Make sure we
2821c7c3c6aSMatthew Dillon 	 * initialize workable values (0 will work for hysteresis
2831c7c3c6aSMatthew Dillon 	 * but it isn't very efficient).
2841c7c3c6aSMatthew Dillon 	 *
285327f4e83SMatthew Dillon 	 * The nsw_cluster_max is constrained by the bp->b_pages[]
2861c7c3c6aSMatthew Dillon 	 * array (MAXPHYS/PAGE_SIZE) and our locally defined
2871c7c3c6aSMatthew Dillon 	 * MAX_PAGEOUT_CLUSTER.   Also be aware that swap ops are
2881c7c3c6aSMatthew Dillon 	 * constrained by the swap device interleave stripe size.
289327f4e83SMatthew Dillon 	 *
290327f4e83SMatthew Dillon 	 * Currently we hardwire nsw_wcount_async to 4.  This limit is
291327f4e83SMatthew Dillon 	 * designed to prevent other I/O from having high latencies due to
292327f4e83SMatthew Dillon 	 * our pageout I/O.  The value 4 works well for one or two active swap
293327f4e83SMatthew Dillon 	 * devices but is probably a little low if you have more.  Even so,
294327f4e83SMatthew Dillon 	 * a higher value would probably generate only a limited improvement
295327f4e83SMatthew Dillon 	 * with three or four active swap devices since the system does not
296327f4e83SMatthew Dillon 	 * typically have to pageout at extreme bandwidths.   We will want
297327f4e83SMatthew Dillon 	 * at least 2 per swap devices, and 4 is a pretty good value if you
298327f4e83SMatthew Dillon 	 * have one NFS swap device due to the command/ack latency over NFS.
299327f4e83SMatthew Dillon 	 * So it all works out pretty well.
30026f9a767SRodney W. Grimes 	 */
30124a1cce3SDavid Greenman 
302327f4e83SMatthew Dillon 	swap_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
303327f4e83SMatthew Dillon 
3041c7c3c6aSMatthew Dillon 	nsw_rcount = (nswbuf + 1) / 2;
305327f4e83SMatthew Dillon 	nsw_wcount_sync = (nswbuf + 3) / 4;
306327f4e83SMatthew Dillon 	nsw_wcount_async = 4;
307327f4e83SMatthew Dillon 	nsw_wcount_async_max = nsw_wcount_async;
308327f4e83SMatthew Dillon 	nsw_cluster_max = swap_cluster_max;
30924a1cce3SDavid Greenman 
3101c7c3c6aSMatthew Dillon 	/*
3111c7c3c6aSMatthew Dillon 	 * Initialize our zone.  Right now I'm just guessing on the number
3121c7c3c6aSMatthew Dillon 	 * we need based on the number of pages in the system.  Each swblock
3131c7c3c6aSMatthew Dillon 	 * can hold 16 pages, so this is probably overkill.
3141c7c3c6aSMatthew Dillon 	 */
31524a1cce3SDavid Greenman 
3161c7c3c6aSMatthew Dillon 	n = cnt.v_page_count * 2;
31726f9a767SRodney W. Grimes 
3181c7c3c6aSMatthew Dillon 	swap_zone = zinit(
3191c7c3c6aSMatthew Dillon 	    "SWAPMETA",
3201c7c3c6aSMatthew Dillon 	    sizeof(struct swblock),
3211c7c3c6aSMatthew Dillon 	    n,
3221c7c3c6aSMatthew Dillon 	    ZONE_INTERRUPT,
3231c7c3c6aSMatthew Dillon 	    1
3241c7c3c6aSMatthew Dillon 	);
32524a1cce3SDavid Greenman 
3261c7c3c6aSMatthew Dillon 	/*
3271c7c3c6aSMatthew Dillon 	 * Initialize our meta-data hash table.  The swapper does not need to
3281c7c3c6aSMatthew Dillon 	 * be quite as efficient as the VM system, so we do not use an
3291c7c3c6aSMatthew Dillon 	 * oversized hash table.
3301c7c3c6aSMatthew Dillon 	 *
3311c7c3c6aSMatthew Dillon 	 * 	n: 		size of hash table, must be power of 2
3321c7c3c6aSMatthew Dillon 	 *	swhash_mask:	hash table index mask
3331c7c3c6aSMatthew Dillon 	 */
334df8bae1dSRodney W. Grimes 
3351c7c3c6aSMatthew Dillon 	for (n = 1; n < cnt.v_page_count / 4; n <<= 1)
3361c7c3c6aSMatthew Dillon 		;
3371c7c3c6aSMatthew Dillon 
3381c7c3c6aSMatthew Dillon 	swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK);
3391c7c3c6aSMatthew Dillon 	bzero(swhash, sizeof(struct swblock *) * n);
3401c7c3c6aSMatthew Dillon 
3411c7c3c6aSMatthew Dillon 	swhash_mask = n - 1;
34224a1cce3SDavid Greenman }
34324a1cce3SDavid Greenman 
34424a1cce3SDavid Greenman /*
3451c7c3c6aSMatthew Dillon  * SWAP_PAGER_ALLOC() -	allocate a new OBJT_SWAP VM object and instantiate
3461c7c3c6aSMatthew Dillon  *			its metadata structures.
3471c7c3c6aSMatthew Dillon  *
3481c7c3c6aSMatthew Dillon  *	This routine is called from the mmap and fork code to create a new
3491c7c3c6aSMatthew Dillon  *	OBJT_SWAP object.  We do this by creating an OBJT_DEFAULT object
3501c7c3c6aSMatthew Dillon  *	and then converting it with swp_pager_meta_build().
3511c7c3c6aSMatthew Dillon  *
3521c7c3c6aSMatthew Dillon  *	This routine may block in vm_object_allocate() and create a named
3531c7c3c6aSMatthew Dillon  *	object lookup race, so we must interlock.   We must also run at
3541c7c3c6aSMatthew Dillon  *	splvm() for the object lookup to handle races with interrupts, but
3551c7c3c6aSMatthew Dillon  *	we do not have to maintain splvm() in between the lookup and the
3561c7c3c6aSMatthew Dillon  *	add because (I believe) it is not possible to attempt to create
3571c7c3c6aSMatthew Dillon  *	a new swap object w/handle when a default object with that handle
3581c7c3c6aSMatthew Dillon  *	already exists.
35924a1cce3SDavid Greenman  */
3601c7c3c6aSMatthew Dillon 
361f5a12711SPoul-Henning Kamp static vm_object_t
3626cde7a16SDavid Greenman swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
363b9dcd593SBruce Evans 		 vm_ooffset_t offset)
36424a1cce3SDavid Greenman {
36524a1cce3SDavid Greenman 	vm_object_t object;
36624a1cce3SDavid Greenman 
36724a1cce3SDavid Greenman 	if (handle) {
3681c7c3c6aSMatthew Dillon 		/*
3691c7c3c6aSMatthew Dillon 		 * Reference existing named region or allocate new one.  There
3701c7c3c6aSMatthew Dillon 		 * should not be a race here against swp_pager_meta_build()
3711c7c3c6aSMatthew Dillon 		 * as called from vm_page_remove() in regards to the lookup
3721c7c3c6aSMatthew Dillon 		 * of the handle.
3731c7c3c6aSMatthew Dillon 		 */
3741c7c3c6aSMatthew Dillon 
3751c7c3c6aSMatthew Dillon 		while (sw_alloc_interlock) {
3761c7c3c6aSMatthew Dillon 			sw_alloc_interlock = -1;
3771c7c3c6aSMatthew Dillon 			tsleep(&sw_alloc_interlock, PVM, "swpalc", 0);
3781c7c3c6aSMatthew Dillon 		}
3791c7c3c6aSMatthew Dillon 		sw_alloc_interlock = 1;
3801c7c3c6aSMatthew Dillon 
3811c7c3c6aSMatthew Dillon 		object = vm_pager_object_lookup(NOBJLIST(handle), handle);
3821c7c3c6aSMatthew Dillon 
38324a1cce3SDavid Greenman 		if (object != NULL) {
38424a1cce3SDavid Greenman 			vm_object_reference(object);
38524a1cce3SDavid Greenman 		} else {
3861c7c3c6aSMatthew Dillon 			object = vm_object_allocate(OBJT_DEFAULT,
3876cde7a16SDavid Greenman 				OFF_TO_IDX(offset + PAGE_MASK + size));
38824a1cce3SDavid Greenman 			object->handle = handle;
3891c7c3c6aSMatthew Dillon 
3901c7c3c6aSMatthew Dillon 			swp_pager_meta_build(
3911c7c3c6aSMatthew Dillon 			    object,
3921c7c3c6aSMatthew Dillon 			    0,
3931c7c3c6aSMatthew Dillon 			    SWAPBLK_NONE,
3941c7c3c6aSMatthew Dillon 			    0
3951c7c3c6aSMatthew Dillon 			);
39624a1cce3SDavid Greenman 		}
3971c7c3c6aSMatthew Dillon 
3981c7c3c6aSMatthew Dillon 		if (sw_alloc_interlock < 0)
3991c7c3c6aSMatthew Dillon 			wakeup(&sw_alloc_interlock);
4001c7c3c6aSMatthew Dillon 
4011c7c3c6aSMatthew Dillon 		sw_alloc_interlock = 0;
40224a1cce3SDavid Greenman 	} else {
4031c7c3c6aSMatthew Dillon 		object = vm_object_allocate(OBJT_DEFAULT,
4046cde7a16SDavid Greenman 			OFF_TO_IDX(offset + PAGE_MASK + size));
4051c7c3c6aSMatthew Dillon 
4061c7c3c6aSMatthew Dillon 		swp_pager_meta_build(
4071c7c3c6aSMatthew Dillon 		    object,
4081c7c3c6aSMatthew Dillon 		    0,
4091c7c3c6aSMatthew Dillon 		    SWAPBLK_NONE,
4101c7c3c6aSMatthew Dillon 		    0
4111c7c3c6aSMatthew Dillon 		);
41224a1cce3SDavid Greenman 	}
41324a1cce3SDavid Greenman 
41424a1cce3SDavid Greenman 	return (object);
415df8bae1dSRodney W. Grimes }
416df8bae1dSRodney W. Grimes 
41726f9a767SRodney W. Grimes /*
4181c7c3c6aSMatthew Dillon  * SWAP_PAGER_DEALLOC() -	remove swap metadata from object
4191c7c3c6aSMatthew Dillon  *
4201c7c3c6aSMatthew Dillon  *	The swap backing for the object is destroyed.  The code is
4211c7c3c6aSMatthew Dillon  *	designed such that we can reinstantiate it later, but this
4221c7c3c6aSMatthew Dillon  *	routine is typically called only when the entire object is
4231c7c3c6aSMatthew Dillon  *	about to be destroyed.
4241c7c3c6aSMatthew Dillon  *
4251c7c3c6aSMatthew Dillon  *	This routine may block, but no longer does.
4261c7c3c6aSMatthew Dillon  *
4271c7c3c6aSMatthew Dillon  *	The object must be locked or unreferenceable.
42826f9a767SRodney W. Grimes  */
42926f9a767SRodney W. Grimes 
430df8bae1dSRodney W. Grimes static void
4311c7c3c6aSMatthew Dillon swap_pager_dealloc(object)
4322a4895f4SDavid Greenman 	vm_object_t object;
43326f9a767SRodney W. Grimes {
43426f9a767SRodney W. Grimes 	/*
4351c7c3c6aSMatthew Dillon 	 * Remove from list right away so lookups will fail if we block for
4361c7c3c6aSMatthew Dillon 	 * pageout completion.
43726f9a767SRodney W. Grimes 	 */
438b44e4b7aSJohn Dyson 
4391c7c3c6aSMatthew Dillon 	if (object->handle == NULL) {
4401c7c3c6aSMatthew Dillon 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
44124ea4a96SDavid Greenman 	} else {
4421c7c3c6aSMatthew Dillon 		TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list);
44326f9a767SRodney W. Grimes 	}
4441c7c3c6aSMatthew Dillon 
4451c7c3c6aSMatthew Dillon 	vm_object_pip_wait(object, "swpdea");
4461c7c3c6aSMatthew Dillon 
4471c7c3c6aSMatthew Dillon 	/*
4481c7c3c6aSMatthew Dillon 	 * Free all remaining metadata.  We only bother to free it from
4491c7c3c6aSMatthew Dillon 	 * the swap meta data.  We do not attempt to free swapblk's still
4501c7c3c6aSMatthew Dillon 	 * associated with vm_page_t's for this object.  We do not care
4511c7c3c6aSMatthew Dillon 	 * if paging is still in progress on some objects.
4521c7c3c6aSMatthew Dillon 	 */
4531c7c3c6aSMatthew Dillon 
4541c7c3c6aSMatthew Dillon 	swp_pager_meta_free_all(object);
4551c7c3c6aSMatthew Dillon }
4561c7c3c6aSMatthew Dillon 
4571c7c3c6aSMatthew Dillon /************************************************************************
4581c7c3c6aSMatthew Dillon  *			SWAP PAGER BITMAP ROUTINES			*
4591c7c3c6aSMatthew Dillon  ************************************************************************/
4601c7c3c6aSMatthew Dillon 
4611c7c3c6aSMatthew Dillon /*
4621c7c3c6aSMatthew Dillon  * SWP_PAGER_GETSWAPSPACE() -	allocate raw swap space
4631c7c3c6aSMatthew Dillon  *
4641c7c3c6aSMatthew Dillon  *	Allocate swap for the requested number of pages.  The starting
4651c7c3c6aSMatthew Dillon  *	swap block number (a page index) is returned or SWAPBLK_NONE
4661c7c3c6aSMatthew Dillon  *	if the allocation failed.
4671c7c3c6aSMatthew Dillon  *
4681c7c3c6aSMatthew Dillon  *	Also has the side effect of advising that somebody made a mistake
4691c7c3c6aSMatthew Dillon  *	when they configured swap and didn't configure enough.
4701c7c3c6aSMatthew Dillon  *
4711c7c3c6aSMatthew Dillon  *	Must be called at splvm() to avoid races with bitmap frees from
4721c7c3c6aSMatthew Dillon  *	vm_page_remove() aka swap_pager_page_removed().
4731c7c3c6aSMatthew Dillon  *
4741c7c3c6aSMatthew Dillon  *	This routine may not block
4751c7c3c6aSMatthew Dillon  *	This routine must be called at splvm().
4761c7c3c6aSMatthew Dillon  */
4771c7c3c6aSMatthew Dillon 
4781c7c3c6aSMatthew Dillon static __inline daddr_t
4791c7c3c6aSMatthew Dillon swp_pager_getswapspace(npages)
4801c7c3c6aSMatthew Dillon 	int npages;
4811c7c3c6aSMatthew Dillon {
4821c7c3c6aSMatthew Dillon 	daddr_t blk;
4831c7c3c6aSMatthew Dillon 
4841c7c3c6aSMatthew Dillon 	if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
4852b0d37a4SMatthew Dillon 		if (swap_pager_full != 2) {
4861c7c3c6aSMatthew Dillon 			printf("swap_pager_getswapspace: failed\n");
4872b0d37a4SMatthew Dillon 			swap_pager_full = 2;
48820d3034fSMatthew Dillon 			swap_pager_almost_full = 1;
4892b0d37a4SMatthew Dillon 		}
4901c7c3c6aSMatthew Dillon 	} else {
4911c7c3c6aSMatthew Dillon 		vm_swap_size -= npages;
4921c7c3c6aSMatthew Dillon 		swp_sizecheck();
4931c7c3c6aSMatthew Dillon 	}
4941c7c3c6aSMatthew Dillon 	return(blk);
49526f9a767SRodney W. Grimes }
49626f9a767SRodney W. Grimes 
49726f9a767SRodney W. Grimes /*
4981c7c3c6aSMatthew Dillon  * SWP_PAGER_FREESWAPSPACE() -	free raw swap space
4991c7c3c6aSMatthew Dillon  *
5001c7c3c6aSMatthew Dillon  *	This routine returns the specified swap blocks back to the bitmap.
5011c7c3c6aSMatthew Dillon  *
5021c7c3c6aSMatthew Dillon  *	Note:  This routine may not block (it could in the old swap code),
5031c7c3c6aSMatthew Dillon  *	and through the use of the new blist routines it does not block.
5041c7c3c6aSMatthew Dillon  *
5051c7c3c6aSMatthew Dillon  *	We must be called at splvm() to avoid races with bitmap frees from
5061c7c3c6aSMatthew Dillon  *	vm_page_remove() aka swap_pager_page_removed().
5071c7c3c6aSMatthew Dillon  *
5081c7c3c6aSMatthew Dillon  *	This routine may not block
5091c7c3c6aSMatthew Dillon  *	This routine must be called at splvm().
51026f9a767SRodney W. Grimes  */
5111c7c3c6aSMatthew Dillon 
5121c7c3c6aSMatthew Dillon static __inline void
5131c7c3c6aSMatthew Dillon swp_pager_freeswapspace(blk, npages)
5141c7c3c6aSMatthew Dillon 	daddr_t blk;
5151c7c3c6aSMatthew Dillon 	int npages;
5160d94caffSDavid Greenman {
5171c7c3c6aSMatthew Dillon 	blist_free(swapblist, blk, npages);
5181c7c3c6aSMatthew Dillon 	vm_swap_size += npages;
5191c7c3c6aSMatthew Dillon 	swp_sizecheck();
52026f9a767SRodney W. Grimes }
5211c7c3c6aSMatthew Dillon 
52226f9a767SRodney W. Grimes /*
5231c7c3c6aSMatthew Dillon  * SWAP_PAGER_FREESPACE() -	frees swap blocks associated with a page
5241c7c3c6aSMatthew Dillon  *				range within an object.
5251c7c3c6aSMatthew Dillon  *
5261c7c3c6aSMatthew Dillon  *	This is a globally accessible routine.
5271c7c3c6aSMatthew Dillon  *
5281c7c3c6aSMatthew Dillon  *	This routine removes swapblk assignments from swap metadata.
5291c7c3c6aSMatthew Dillon  *
5301c7c3c6aSMatthew Dillon  *	The external callers of this routine typically have already destroyed
5311c7c3c6aSMatthew Dillon  *	or renamed vm_page_t's associated with this range in the object so
5321c7c3c6aSMatthew Dillon  *	we should be ok.
53326f9a767SRodney W. Grimes  */
5341c7c3c6aSMatthew Dillon 
53526f9a767SRodney W. Grimes void
53624a1cce3SDavid Greenman swap_pager_freespace(object, start, size)
53724a1cce3SDavid Greenman 	vm_object_t object;
538a316d390SJohn Dyson 	vm_pindex_t start;
539a316d390SJohn Dyson 	vm_size_t size;
54026f9a767SRodney W. Grimes {
5411c7c3c6aSMatthew Dillon 	swp_pager_meta_free(object, start, size);
54226f9a767SRodney W. Grimes }
54326f9a767SRodney W. Grimes 
5440a47b48bSJohn Dyson /*
5451c7c3c6aSMatthew Dillon  * SWAP_PAGER_COPY() -  copy blocks from source pager to destination pager
5461c7c3c6aSMatthew Dillon  *			and destroy the source.
5471c7c3c6aSMatthew Dillon  *
5481c7c3c6aSMatthew Dillon  *	Copy any valid swapblks from the source to the destination.  In
5491c7c3c6aSMatthew Dillon  *	cases where both the source and destination have a valid swapblk,
5501c7c3c6aSMatthew Dillon  *	we keep the destination's.
5511c7c3c6aSMatthew Dillon  *
5521c7c3c6aSMatthew Dillon  *	This routine is allowed to block.  It may block allocating metadata
5531c7c3c6aSMatthew Dillon  *	indirectly through swp_pager_meta_build() or if paging is still in
5541c7c3c6aSMatthew Dillon  *	progress on the source.
5551c7c3c6aSMatthew Dillon  *
5561c7c3c6aSMatthew Dillon  *	XXX vm_page_collapse() kinda expects us not to block because we
5571c7c3c6aSMatthew Dillon  *	supposedly do not need to allocate memory, but for the moment we
5581c7c3c6aSMatthew Dillon  *	*may* have to get a little memory from the zone allocator, but
5591c7c3c6aSMatthew Dillon  *	it is taken from the interrupt memory.  We should be ok.
5601c7c3c6aSMatthew Dillon  *
5611c7c3c6aSMatthew Dillon  *	The source object contains no vm_page_t's (which is just as well)
5621c7c3c6aSMatthew Dillon  *
5631c7c3c6aSMatthew Dillon  *	The source object is of type OBJT_SWAP.
5641c7c3c6aSMatthew Dillon  *
5651c7c3c6aSMatthew Dillon  *	The source and destination objects must be
5665e24f1a2SMatthew Dillon  *	locked or inaccessible (XXX are they ?)
56726f9a767SRodney W. Grimes  */
56826f9a767SRodney W. Grimes 
56926f9a767SRodney W. Grimes void
5701c7c3c6aSMatthew Dillon swap_pager_copy(srcobject, dstobject, offset, destroysource)
57124a1cce3SDavid Greenman 	vm_object_t srcobject;
57224a1cce3SDavid Greenman 	vm_object_t dstobject;
573a316d390SJohn Dyson 	vm_pindex_t offset;
574c0877f10SJohn Dyson 	int destroysource;
57526f9a767SRodney W. Grimes {
576a316d390SJohn Dyson 	vm_pindex_t i;
57726f9a767SRodney W. Grimes 
57826f9a767SRodney W. Grimes 	/*
5791c7c3c6aSMatthew Dillon 	 * If destroysource is set, we remove the source object from the
5801c7c3c6aSMatthew Dillon 	 * swap_pager internal queue now.
58126f9a767SRodney W. Grimes 	 */
5821c7c3c6aSMatthew Dillon 
583cbd8ec09SJohn Dyson 	if (destroysource) {
58424a1cce3SDavid Greenman 		if (srcobject->handle == NULL) {
5851c7c3c6aSMatthew Dillon 			TAILQ_REMOVE(
5861c7c3c6aSMatthew Dillon 			    &swap_pager_un_object_list,
5871c7c3c6aSMatthew Dillon 			    srcobject,
5881c7c3c6aSMatthew Dillon 			    pager_object_list
5891c7c3c6aSMatthew Dillon 			);
59026f9a767SRodney W. Grimes 		} else {
5911c7c3c6aSMatthew Dillon 			TAILQ_REMOVE(
5921c7c3c6aSMatthew Dillon 			    NOBJLIST(srcobject->handle),
5931c7c3c6aSMatthew Dillon 			    srcobject,
5941c7c3c6aSMatthew Dillon 			    pager_object_list
5951c7c3c6aSMatthew Dillon 			);
59626f9a767SRodney W. Grimes 		}
597cbd8ec09SJohn Dyson 	}
59826f9a767SRodney W. Grimes 
5991c7c3c6aSMatthew Dillon 	/*
6001c7c3c6aSMatthew Dillon 	 * transfer source to destination.
6011c7c3c6aSMatthew Dillon 	 */
6021c7c3c6aSMatthew Dillon 
6031c7c3c6aSMatthew Dillon 	for (i = 0; i < dstobject->size; ++i) {
6041c7c3c6aSMatthew Dillon 		daddr_t dstaddr;
6051c7c3c6aSMatthew Dillon 
6061c7c3c6aSMatthew Dillon 		/*
6071c7c3c6aSMatthew Dillon 		 * Locate (without changing) the swapblk on the destination,
6081c7c3c6aSMatthew Dillon 		 * unless it is invalid in which case free it silently, or
6091c7c3c6aSMatthew Dillon 		 * if the destination is a resident page, in which case the
6101c7c3c6aSMatthew Dillon 		 * source is thrown away.
6111c7c3c6aSMatthew Dillon 		 */
6121c7c3c6aSMatthew Dillon 
6131c7c3c6aSMatthew Dillon 		dstaddr = swp_pager_meta_ctl(dstobject, i, 0);
6141c7c3c6aSMatthew Dillon 
6151c7c3c6aSMatthew Dillon 		if (dstaddr == SWAPBLK_NONE) {
6161c7c3c6aSMatthew Dillon 			/*
6171c7c3c6aSMatthew Dillon 			 * Destination has no swapblk and is not resident,
6181c7c3c6aSMatthew Dillon 			 * copy source.
6191c7c3c6aSMatthew Dillon 			 */
6201c7c3c6aSMatthew Dillon 			daddr_t srcaddr;
6211c7c3c6aSMatthew Dillon 
6221c7c3c6aSMatthew Dillon 			srcaddr = swp_pager_meta_ctl(
6231c7c3c6aSMatthew Dillon 			    srcobject,
6241c7c3c6aSMatthew Dillon 			    i + offset,
6251c7c3c6aSMatthew Dillon 			    SWM_POP
6261c7c3c6aSMatthew Dillon 			);
6271c7c3c6aSMatthew Dillon 
6281c7c3c6aSMatthew Dillon 			if (srcaddr != SWAPBLK_NONE)
6291c7c3c6aSMatthew Dillon 				swp_pager_meta_build(dstobject, i, srcaddr, 1);
6301c7c3c6aSMatthew Dillon 		} else {
6311c7c3c6aSMatthew Dillon 			/*
6321c7c3c6aSMatthew Dillon 			 * Destination has valid swapblk or it is represented
6331c7c3c6aSMatthew Dillon 			 * by a resident page.  We destroy the sourceblock.
6341c7c3c6aSMatthew Dillon 			 */
6351c7c3c6aSMatthew Dillon 
6361c7c3c6aSMatthew Dillon 			swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE);
6371c7c3c6aSMatthew Dillon 		}
63826f9a767SRodney W. Grimes 	}
63926f9a767SRodney W. Grimes 
64026f9a767SRodney W. Grimes 	/*
6411c7c3c6aSMatthew Dillon 	 * Free left over swap blocks in source.
6421c7c3c6aSMatthew Dillon 	 *
6431c7c3c6aSMatthew Dillon 	 * We have to revert the type to OBJT_DEFAULT so we do not accidently
6441c7c3c6aSMatthew Dillon 	 * double-remove the object from the swap queues.
64526f9a767SRodney W. Grimes 	 */
64626f9a767SRodney W. Grimes 
647c0877f10SJohn Dyson 	if (destroysource) {
6481c7c3c6aSMatthew Dillon 		swp_pager_meta_free_all(srcobject);
6491c7c3c6aSMatthew Dillon 		/*
6501c7c3c6aSMatthew Dillon 		 * Reverting the type is not necessary, the caller is going
6511c7c3c6aSMatthew Dillon 		 * to destroy srcobject directly, but I'm doing it here
6521c7c3c6aSMatthew Dillon 		 * for consistancy since we've removed the object from its
6531c7c3c6aSMatthew Dillon 		 * queues.
6541c7c3c6aSMatthew Dillon 		 */
6551c7c3c6aSMatthew Dillon 		srcobject->type = OBJT_DEFAULT;
656c0877f10SJohn Dyson 	}
65726f9a767SRodney W. Grimes 	return;
65826f9a767SRodney W. Grimes }
65926f9a767SRodney W. Grimes 
660df8bae1dSRodney W. Grimes /*
6611c7c3c6aSMatthew Dillon  * SWAP_PAGER_HASPAGE() -	determine if we have good backing store for
6621c7c3c6aSMatthew Dillon  *				the requested page.
6631c7c3c6aSMatthew Dillon  *
6641c7c3c6aSMatthew Dillon  *	We determine whether good backing store exists for the requested
6651c7c3c6aSMatthew Dillon  *	page and return TRUE if it does, FALSE if it doesn't.
6661c7c3c6aSMatthew Dillon  *
6671c7c3c6aSMatthew Dillon  *	If TRUE, we also try to determine how much valid, contiguous backing
6681c7c3c6aSMatthew Dillon  *	store exists before and after the requested page within a reasonable
6691c7c3c6aSMatthew Dillon  *	distance.  We do not try to restrict it to the swap device stripe
6701c7c3c6aSMatthew Dillon  *	(that is handled in getpages/putpages).  It probably isn't worth
6711c7c3c6aSMatthew Dillon  *	doing here.
672df8bae1dSRodney W. Grimes  */
67326f9a767SRodney W. Grimes 
6741c7c3c6aSMatthew Dillon boolean_t
675a316d390SJohn Dyson swap_pager_haspage(object, pindex, before, after)
67624a1cce3SDavid Greenman 	vm_object_t object;
677a316d390SJohn Dyson 	vm_pindex_t pindex;
67824a1cce3SDavid Greenman 	int *before;
67924a1cce3SDavid Greenman 	int *after;
68026f9a767SRodney W. Grimes {
6811c7c3c6aSMatthew Dillon 	daddr_t blk0;
68226f9a767SRodney W. Grimes 
6831c7c3c6aSMatthew Dillon 	/*
6841c7c3c6aSMatthew Dillon 	 * do we have good backing store at the requested index ?
6851c7c3c6aSMatthew Dillon 	 */
6861c7c3c6aSMatthew Dillon 
6871c7c3c6aSMatthew Dillon 	blk0 = swp_pager_meta_ctl(object, pindex, 0);
6881c7c3c6aSMatthew Dillon 
6891c7c3c6aSMatthew Dillon 	if (blk0 & SWAPBLK_NONE) {
6901c7c3c6aSMatthew Dillon 		if (before)
69124a1cce3SDavid Greenman 			*before = 0;
6921c7c3c6aSMatthew Dillon 		if (after)
69324a1cce3SDavid Greenman 			*after = 0;
69426f9a767SRodney W. Grimes 		return (FALSE);
69526f9a767SRodney W. Grimes 	}
69626f9a767SRodney W. Grimes 
69726f9a767SRodney W. Grimes 	/*
6981c7c3c6aSMatthew Dillon 	 * find backwards-looking contiguous good backing store
699e47ed70bSJohn Dyson 	 */
700e47ed70bSJohn Dyson 
7011c7c3c6aSMatthew Dillon 	if (before != NULL) {
70226f9a767SRodney W. Grimes 		int i;
7030d94caffSDavid Greenman 
7041c7c3c6aSMatthew Dillon 		for (i = 1; i < (SWB_NPAGES/2); ++i) {
7051c7c3c6aSMatthew Dillon 			daddr_t blk;
7061c7c3c6aSMatthew Dillon 
7071c7c3c6aSMatthew Dillon 			if (i > pindex)
7081c7c3c6aSMatthew Dillon 				break;
7091c7c3c6aSMatthew Dillon 			blk = swp_pager_meta_ctl(object, pindex - i, 0);
7101c7c3c6aSMatthew Dillon 			if (blk & SWAPBLK_NONE)
7111c7c3c6aSMatthew Dillon 				break;
7121c7c3c6aSMatthew Dillon 			if (blk != blk0 - i)
7131c7c3c6aSMatthew Dillon 				break;
714ffc82b0aSJohn Dyson 		}
7151c7c3c6aSMatthew Dillon 		*before = (i - 1);
71626f9a767SRodney W. Grimes 	}
71726f9a767SRodney W. Grimes 
71826f9a767SRodney W. Grimes 	/*
7191c7c3c6aSMatthew Dillon 	 * find forward-looking contiguous good backing store
72026f9a767SRodney W. Grimes 	 */
7211c7c3c6aSMatthew Dillon 
7221c7c3c6aSMatthew Dillon 	if (after != NULL) {
7231c7c3c6aSMatthew Dillon 		int i;
7241c7c3c6aSMatthew Dillon 
7251c7c3c6aSMatthew Dillon 		for (i = 1; i < (SWB_NPAGES/2); ++i) {
7261c7c3c6aSMatthew Dillon 			daddr_t blk;
7271c7c3c6aSMatthew Dillon 
7281c7c3c6aSMatthew Dillon 			blk = swp_pager_meta_ctl(object, pindex + i, 0);
7291c7c3c6aSMatthew Dillon 			if (blk & SWAPBLK_NONE)
7301c7c3c6aSMatthew Dillon 				break;
7311c7c3c6aSMatthew Dillon 			if (blk != blk0 + i)
7321c7c3c6aSMatthew Dillon 				break;
73326f9a767SRodney W. Grimes 		}
7341c7c3c6aSMatthew Dillon 		*after = (i - 1);
7351c7c3c6aSMatthew Dillon 	}
7361c7c3c6aSMatthew Dillon 
7371c7c3c6aSMatthew Dillon 	return (TRUE);
7381c7c3c6aSMatthew Dillon }
7391c7c3c6aSMatthew Dillon 
7401c7c3c6aSMatthew Dillon /*
7411c7c3c6aSMatthew Dillon  * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
7421c7c3c6aSMatthew Dillon  *
7431c7c3c6aSMatthew Dillon  *	This removes any associated swap backing store, whether valid or
7441c7c3c6aSMatthew Dillon  *	not, from the page.
7451c7c3c6aSMatthew Dillon  *
7461c7c3c6aSMatthew Dillon  *	This routine is typically called when a page is made dirty, at
7471c7c3c6aSMatthew Dillon  *	which point any associated swap can be freed.  MADV_FREE also
7481c7c3c6aSMatthew Dillon  *	calls us in a special-case situation
7491c7c3c6aSMatthew Dillon  *
7501c7c3c6aSMatthew Dillon  *	NOTE!!!  If the page is clean and the swap was valid, the caller
7511c7c3c6aSMatthew Dillon  *	should make the page dirty before calling this routine.  This routine
7521c7c3c6aSMatthew Dillon  *	does NOT change the m->dirty status of the page.  Also: MADV_FREE
7531c7c3c6aSMatthew Dillon  *	depends on it.
7541c7c3c6aSMatthew Dillon  *
7551c7c3c6aSMatthew Dillon  *	This routine may not block
7561c7c3c6aSMatthew Dillon  */
7571c7c3c6aSMatthew Dillon 
7581c7c3c6aSMatthew Dillon static void
7591c7c3c6aSMatthew Dillon swap_pager_unswapped(m)
7601c7c3c6aSMatthew Dillon 	vm_page_t m;
7611c7c3c6aSMatthew Dillon {
7621c7c3c6aSMatthew Dillon 	swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE);
7631c7c3c6aSMatthew Dillon }
7641c7c3c6aSMatthew Dillon 
7651c7c3c6aSMatthew Dillon /*
7661c7c3c6aSMatthew Dillon  * SWAP_PAGER_GETPAGES() - bring pages in from swap
7671c7c3c6aSMatthew Dillon  *
7681c7c3c6aSMatthew Dillon  *	Attempt to retrieve (m, count) pages from backing store, but make
7691c7c3c6aSMatthew Dillon  *	sure we retrieve at least m[reqpage].  We try to load in as large
7701c7c3c6aSMatthew Dillon  *	a chunk surrounding m[reqpage] as is contiguous in swap and which
7711c7c3c6aSMatthew Dillon  *	belongs to the same object.
7721c7c3c6aSMatthew Dillon  *
7731c7c3c6aSMatthew Dillon  *	The code is designed for asynchronous operation and
7741c7c3c6aSMatthew Dillon  *	immediate-notification of 'reqpage' but tends not to be
7751c7c3c6aSMatthew Dillon  *	used that way.  Please do not optimize-out this algorithmic
7761c7c3c6aSMatthew Dillon  *	feature, I intend to improve on it in the future.
7771c7c3c6aSMatthew Dillon  *
7781c7c3c6aSMatthew Dillon  *	The parent has a single vm_object_pip_add() reference prior to
7791c7c3c6aSMatthew Dillon  *	calling us and we should return with the same.
7801c7c3c6aSMatthew Dillon  *
7811c7c3c6aSMatthew Dillon  *	The parent has BUSY'd the pages.  We should return with 'm'
7821c7c3c6aSMatthew Dillon  *	left busy, but the others adjusted.
7831c7c3c6aSMatthew Dillon  */
78426f9a767SRodney W. Grimes 
785f708ef1bSPoul-Henning Kamp static int
78624a1cce3SDavid Greenman swap_pager_getpages(object, m, count, reqpage)
78724a1cce3SDavid Greenman 	vm_object_t object;
78826f9a767SRodney W. Grimes 	vm_page_t *m;
78926f9a767SRodney W. Grimes 	int count, reqpage;
790df8bae1dSRodney W. Grimes {
7911c7c3c6aSMatthew Dillon 	struct buf *bp;
7921c7c3c6aSMatthew Dillon 	vm_page_t mreq;
7931c7c3c6aSMatthew Dillon 	int s;
79426f9a767SRodney W. Grimes 	int i;
79526f9a767SRodney W. Grimes 	int j;
7961c7c3c6aSMatthew Dillon 	daddr_t blk;
7971c7c3c6aSMatthew Dillon 	vm_offset_t kva;
7981c7c3c6aSMatthew Dillon 	vm_pindex_t lastpindex;
7990d94caffSDavid Greenman 
8001c7c3c6aSMatthew Dillon 	mreq = m[reqpage];
8011c7c3c6aSMatthew Dillon 
8021c7c3c6aSMatthew Dillon #if !defined(MAX_PERF)
8031c7c3c6aSMatthew Dillon 	if (mreq->object != object) {
8041c7c3c6aSMatthew Dillon 		panic("swap_pager_getpages: object mismatch %p/%p",
8051c7c3c6aSMatthew Dillon 		    object,
8061c7c3c6aSMatthew Dillon 		    mreq->object
8071c7c3c6aSMatthew Dillon 		);
80826f9a767SRodney W. Grimes 	}
8091c7c3c6aSMatthew Dillon #endif
8101c7c3c6aSMatthew Dillon 	/*
8111c7c3c6aSMatthew Dillon 	 * Calculate range to retrieve.  The pages have already been assigned
8121c7c3c6aSMatthew Dillon 	 * their swapblks.  We require a *contiguous* range that falls entirely
8131c7c3c6aSMatthew Dillon 	 * within a single device stripe.   If we do not supply it, bad things
8141c7c3c6aSMatthew Dillon 	 * happen.
8151c7c3c6aSMatthew Dillon 	 */
8161c7c3c6aSMatthew Dillon 
8171c7c3c6aSMatthew Dillon 
8181c7c3c6aSMatthew Dillon 	blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0);
8191c7c3c6aSMatthew Dillon 
8201c7c3c6aSMatthew Dillon 	for (i = reqpage - 1; i >= 0; --i) {
8211c7c3c6aSMatthew Dillon 		daddr_t iblk;
8221c7c3c6aSMatthew Dillon 
8231c7c3c6aSMatthew Dillon 		iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0);
8241c7c3c6aSMatthew Dillon 		if (iblk & SWAPBLK_NONE)
8251c7c3c6aSMatthew Dillon 			break;
8261c7c3c6aSMatthew Dillon 
8271c7c3c6aSMatthew Dillon 		if ((blk ^ iblk) & dmmax_mask)
8281c7c3c6aSMatthew Dillon 			break;
8291c7c3c6aSMatthew Dillon 
8301c7c3c6aSMatthew Dillon 		if (blk != iblk + (reqpage - i))
83126f9a767SRodney W. Grimes 			break;
83226f9a767SRodney W. Grimes 	}
8331c7c3c6aSMatthew Dillon 	++i;
8341c7c3c6aSMatthew Dillon 
8351c7c3c6aSMatthew Dillon 	for (j = reqpage + 1; j < count; ++j) {
8361c7c3c6aSMatthew Dillon 		daddr_t jblk;
8371c7c3c6aSMatthew Dillon 
8381c7c3c6aSMatthew Dillon 		jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0);
8391c7c3c6aSMatthew Dillon 		if (jblk & SWAPBLK_NONE)
8401c7c3c6aSMatthew Dillon 			break;
8411c7c3c6aSMatthew Dillon 
8421c7c3c6aSMatthew Dillon 		if ((blk ^ jblk) & dmmax_mask)
8431c7c3c6aSMatthew Dillon 			break;
8441c7c3c6aSMatthew Dillon 
8451c7c3c6aSMatthew Dillon 		if (blk != jblk - (j - reqpage))
8461c7c3c6aSMatthew Dillon 			break;
84726f9a767SRodney W. Grimes 	}
84826f9a767SRodney W. Grimes 
8491c7c3c6aSMatthew Dillon 	/*
8501c7c3c6aSMatthew Dillon 	 * If blk itself is bad, well, we can't do any I/O.  This should
8511c7c3c6aSMatthew Dillon 	 * already be covered as a side effect, but I'm making sure.
8521c7c3c6aSMatthew Dillon 	 */
85326f9a767SRodney W. Grimes 
8541c7c3c6aSMatthew Dillon 	if (blk & SWAPBLK_NONE) {
8551c7c3c6aSMatthew Dillon 		i = reqpage;
8561c7c3c6aSMatthew Dillon 		j = reqpage + 1;
8571c7c3c6aSMatthew Dillon 	}
8581c7c3c6aSMatthew Dillon 
8591c7c3c6aSMatthew Dillon 	/*
8601c7c3c6aSMatthew Dillon 	 * free pages outside our collection range.   Note: we never free
8611c7c3c6aSMatthew Dillon 	 * mreq, it must remain busy throughout.
8621c7c3c6aSMatthew Dillon 	 */
8631c7c3c6aSMatthew Dillon 
8641c7c3c6aSMatthew Dillon 	{
8651c7c3c6aSMatthew Dillon 		int k;
8661c7c3c6aSMatthew Dillon 
8671c7c3c6aSMatthew Dillon 		for (k = 0; k < i; ++k) {
8681c7c3c6aSMatthew Dillon 			vm_page_free(m[k]);
8691c7c3c6aSMatthew Dillon 		}
8701c7c3c6aSMatthew Dillon 		for (k = j; k < count; ++k) {
8711c7c3c6aSMatthew Dillon 			vm_page_free(m[k]);
8721c7c3c6aSMatthew Dillon 		}
8731c7c3c6aSMatthew Dillon 	}
8741c7c3c6aSMatthew Dillon 
8751c7c3c6aSMatthew Dillon 	/*
8761c7c3c6aSMatthew Dillon 	 * Return VM_PAGER_FAIL if we have nothing
8771c7c3c6aSMatthew Dillon 	 * to do.  Return mreq still busy, but the
8781c7c3c6aSMatthew Dillon 	 * others unbusied.
8791c7c3c6aSMatthew Dillon 	 */
8801c7c3c6aSMatthew Dillon 
8811c7c3c6aSMatthew Dillon 	if (blk & SWAPBLK_NONE)
88226f9a767SRodney W. Grimes 		return(VM_PAGER_FAIL);
883df8bae1dSRodney W. Grimes 
88426f9a767SRodney W. Grimes 
88516f62314SDavid Greenman 	/*
88616f62314SDavid Greenman 	 * Get a swap buffer header to perform the IO
88716f62314SDavid Greenman 	 */
8881c7c3c6aSMatthew Dillon 
8891c7c3c6aSMatthew Dillon 	bp = getpbuf(&nsw_rcount);
89016f62314SDavid Greenman 	kva = (vm_offset_t) bp->b_data;
89126f9a767SRodney W. Grimes 
89216f62314SDavid Greenman 	/*
89316f62314SDavid Greenman 	 * map our page(s) into kva for input
8941c7c3c6aSMatthew Dillon 	 *
8951c7c3c6aSMatthew Dillon 	 * NOTE: B_PAGING is set by pbgetvp()
89616f62314SDavid Greenman 	 */
89716f62314SDavid Greenman 
8981c7c3c6aSMatthew Dillon 	pmap_qenter(kva, m + i, j - i);
8991c7c3c6aSMatthew Dillon 
9001c7c3c6aSMatthew Dillon 	bp->b_flags = B_BUSY | B_READ | B_CALL;
9011c7c3c6aSMatthew Dillon 	bp->b_iodone = swp_pager_async_iodone;
902df8bae1dSRodney W. Grimes 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
90326f9a767SRodney W. Grimes 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
90426f9a767SRodney W. Grimes 	crhold(bp->b_rcred);
90526f9a767SRodney W. Grimes 	crhold(bp->b_wcred);
906ab3f7469SPoul-Henning Kamp 	bp->b_data = (caddr_t) kva;
9071c7c3c6aSMatthew Dillon 	/*
9081c7c3c6aSMatthew Dillon 	 * b_blkno is in page-sized chunks.  swapblk is valid, too, so
9091c7c3c6aSMatthew Dillon 	 * we don't have to mask it against SWAPBLK_MASK.
9101c7c3c6aSMatthew Dillon 	 */
9111c7c3c6aSMatthew Dillon 	bp->b_blkno = blk - (reqpage - i);
9121c7c3c6aSMatthew Dillon 	bp->b_bcount = PAGE_SIZE * (j - i);
9131c7c3c6aSMatthew Dillon 	bp->b_bufsize = PAGE_SIZE * (j - i);
9141c7c3c6aSMatthew Dillon 	bp->b_pager.pg_reqpage = reqpage - i;
9151c7c3c6aSMatthew Dillon 
9161c7c3c6aSMatthew Dillon 	{
9171c7c3c6aSMatthew Dillon 		int k;
9181c7c3c6aSMatthew Dillon 
9191c7c3c6aSMatthew Dillon 		for (k = i; k < j; ++k) {
9201c7c3c6aSMatthew Dillon 			bp->b_pages[k - i] = m[k];
9211c7c3c6aSMatthew Dillon 			vm_page_flag_set(m[k], PG_SWAPINPROG);
9221c7c3c6aSMatthew Dillon 		}
9231c7c3c6aSMatthew Dillon 	}
9241c7c3c6aSMatthew Dillon 	bp->b_npages = j - i;
92526f9a767SRodney W. Grimes 
9260d94caffSDavid Greenman 	pbgetvp(swapdev_vp, bp);
927df8bae1dSRodney W. Grimes 
928976e77fcSDavid Greenman 	cnt.v_swapin++;
9291c7c3c6aSMatthew Dillon 	cnt.v_swappgsin += bp->b_npages;
9301c7c3c6aSMatthew Dillon 
931df8bae1dSRodney W. Grimes 	/*
9321c7c3c6aSMatthew Dillon 	 * We still hold the lock on mreq, and our automatic completion routine
9331c7c3c6aSMatthew Dillon 	 * does not remove it.
934df8bae1dSRodney W. Grimes 	 */
9351c7c3c6aSMatthew Dillon 
9361c7c3c6aSMatthew Dillon 	vm_object_pip_add(mreq->object, bp->b_npages);
9371c7c3c6aSMatthew Dillon 	lastpindex = m[j-1]->pindex;
9381c7c3c6aSMatthew Dillon 
9391c7c3c6aSMatthew Dillon 	/*
9401c7c3c6aSMatthew Dillon 	 * perform the I/O.  NOTE!!!  bp cannot be considered valid after
9411c7c3c6aSMatthew Dillon 	 * this point because we automatically release it on completion.
9421c7c3c6aSMatthew Dillon 	 * Instead, we look at the one page we are interested in which we
9431c7c3c6aSMatthew Dillon 	 * still hold a lock on even through the I/O completion.
9441c7c3c6aSMatthew Dillon 	 *
9451c7c3c6aSMatthew Dillon 	 * The other pages in our m[] array are also released on completion,
9461c7c3c6aSMatthew Dillon 	 * so we cannot assume they are valid anymore either.
9471c7c3c6aSMatthew Dillon 	 *
9481c7c3c6aSMatthew Dillon 	 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
9491c7c3c6aSMatthew Dillon 	 */
9501c7c3c6aSMatthew Dillon 
951fd5d1124SJulian Elischer 	VOP_STRATEGY(bp->b_vp, bp);
95226f9a767SRodney W. Grimes 
95326f9a767SRodney W. Grimes 	/*
9541c7c3c6aSMatthew Dillon 	 * wait for the page we want to complete.  PG_SWAPINPROG is always
9551c7c3c6aSMatthew Dillon 	 * cleared on completion.  If an I/O error occurs, SWAPBLK_NONE
9561c7c3c6aSMatthew Dillon 	 * is set in the meta-data.
95726f9a767SRodney W. Grimes 	 */
9581b119d9dSDavid Greenman 
9591c7c3c6aSMatthew Dillon 	s = splvm();
9601c7c3c6aSMatthew Dillon 
9611c7c3c6aSMatthew Dillon 	while ((mreq->flags & PG_SWAPINPROG) != 0) {
9621c7c3c6aSMatthew Dillon 		vm_page_flag_set(mreq, PG_WANTED | PG_REFERENCED);
9631c7c3c6aSMatthew Dillon 		cnt.v_intrans++;
9641c7c3c6aSMatthew Dillon 		if (tsleep(mreq, PSWP, "swread", hz*20)) {
965ac1e407bSBruce Evans 			printf(
9661c7c3c6aSMatthew Dillon 			    "swap_pager: indefinite wait buffer: device:"
9671c7c3c6aSMatthew Dillon 				" %#lx, blkno: %ld, size: %ld\n",
9681c7c3c6aSMatthew Dillon 			    (u_long)bp->b_dev, (long)bp->b_blkno,
9691c7c3c6aSMatthew Dillon 			    (long)bp->b_bcount
9701c7c3c6aSMatthew Dillon 			);
9711c7c3c6aSMatthew Dillon 		}
9721b119d9dSDavid Greenman 	}
97326f9a767SRodney W. Grimes 
974df8bae1dSRodney W. Grimes 	splx(s);
97526f9a767SRodney W. Grimes 
97626f9a767SRodney W. Grimes 	/*
9771c7c3c6aSMatthew Dillon 	 * mreq is left bussied after completion, but all the other pages
9781c7c3c6aSMatthew Dillon 	 * are freed.  If we had an unrecoverable read error the page will
9791c7c3c6aSMatthew Dillon 	 * not be valid.
98026f9a767SRodney W. Grimes 	 */
98126f9a767SRodney W. Grimes 
9821c7c3c6aSMatthew Dillon 	if (mreq->valid != VM_PAGE_BITS_ALL) {
9831c7c3c6aSMatthew Dillon 		return(VM_PAGER_ERROR);
98426f9a767SRodney W. Grimes 	} else {
9851c7c3c6aSMatthew Dillon 		mreq->object->last_read = lastpindex;
9861c7c3c6aSMatthew Dillon 		return(VM_PAGER_OK);
98726f9a767SRodney W. Grimes 	}
9881c7c3c6aSMatthew Dillon 
9891c7c3c6aSMatthew Dillon 	/*
9901c7c3c6aSMatthew Dillon 	 * A final note: in a low swap situation, we cannot deallocate swap
9911c7c3c6aSMatthew Dillon 	 * and mark a page dirty here because the caller is likely to mark
9921c7c3c6aSMatthew Dillon 	 * the page clean when we return, causing the page to possibly revert
9931c7c3c6aSMatthew Dillon 	 * to all-zero's later.
9941c7c3c6aSMatthew Dillon 	 */
995df8bae1dSRodney W. Grimes }
996df8bae1dSRodney W. Grimes 
9971c7c3c6aSMatthew Dillon /*
9981c7c3c6aSMatthew Dillon  *	swap_pager_putpages:
9991c7c3c6aSMatthew Dillon  *
10001c7c3c6aSMatthew Dillon  *	Assign swap (if necessary) and initiate I/O on the specified pages.
10011c7c3c6aSMatthew Dillon  *
10021c7c3c6aSMatthew Dillon  *	We support both OBJT_DEFAULT and OBJT_SWAP objects.  DEFAULT objects
10031c7c3c6aSMatthew Dillon  *	are automatically converted to SWAP objects.
10041c7c3c6aSMatthew Dillon  *
10051c7c3c6aSMatthew Dillon  *	In a low memory situation we may block in VOP_STRATEGY(), but the new
10061c7c3c6aSMatthew Dillon  *	vm_page reservation system coupled with properly written VFS devices
10071c7c3c6aSMatthew Dillon  *	should ensure that no low-memory deadlock occurs.  This is an area
10081c7c3c6aSMatthew Dillon  *	which needs work.
10091c7c3c6aSMatthew Dillon  *
10101c7c3c6aSMatthew Dillon  *	The parent has N vm_object_pip_add() references prior to
10111c7c3c6aSMatthew Dillon  *	calling us and will remove references for rtvals[] that are
10121c7c3c6aSMatthew Dillon  *	not set to VM_PAGER_PEND.  We need to remove the rest on I/O
10131c7c3c6aSMatthew Dillon  *	completion.
10141c7c3c6aSMatthew Dillon  *
10151c7c3c6aSMatthew Dillon  *	The parent has soft-busy'd the pages it passes us and will unbusy
10161c7c3c6aSMatthew Dillon  *	those whos rtvals[] entry is not set to VM_PAGER_PEND on return.
10171c7c3c6aSMatthew Dillon  *	We need to unbusy the rest on I/O completion.
10181c7c3c6aSMatthew Dillon  */
10191c7c3c6aSMatthew Dillon 
1020e4542174SMatthew Dillon void
102124a1cce3SDavid Greenman swap_pager_putpages(object, m, count, sync, rtvals)
102224a1cce3SDavid Greenman 	vm_object_t object;
102326f9a767SRodney W. Grimes 	vm_page_t *m;
102426f9a767SRodney W. Grimes 	int count;
102524a1cce3SDavid Greenman 	boolean_t sync;
102626f9a767SRodney W. Grimes 	int *rtvals;
1027df8bae1dSRodney W. Grimes {
10281c7c3c6aSMatthew Dillon 	int i;
10291c7c3c6aSMatthew Dillon 	int n = 0;
1030df8bae1dSRodney W. Grimes 
10311c7c3c6aSMatthew Dillon #if !defined(MAX_PERF)
10321c7c3c6aSMatthew Dillon 	if (count && m[0]->object != object) {
10331c7c3c6aSMatthew Dillon 		panic("swap_pager_getpages: object mismatch %p/%p",
10341c7c3c6aSMatthew Dillon 		    object,
10351c7c3c6aSMatthew Dillon 		    m[0]->object
10361c7c3c6aSMatthew Dillon 		);
10371c7c3c6aSMatthew Dillon 	}
10381c7c3c6aSMatthew Dillon #endif
10391c7c3c6aSMatthew Dillon 	/*
10401c7c3c6aSMatthew Dillon 	 * Step 1
10411c7c3c6aSMatthew Dillon 	 *
10421c7c3c6aSMatthew Dillon 	 * Turn object into OBJT_SWAP
10431c7c3c6aSMatthew Dillon 	 * check for bogus sysops
10441c7c3c6aSMatthew Dillon 	 * force sync if not pageout process
10451c7c3c6aSMatthew Dillon 	 */
1046e736cd05SJohn Dyson 
10471c7c3c6aSMatthew Dillon 	if (object->type != OBJT_SWAP) {
10481c7c3c6aSMatthew Dillon 		swp_pager_meta_build(object, 0, SWAPBLK_NONE, 0);
10495663e6deSDavid Greenman 	}
1050e47ed70bSJohn Dyson 
1051e47ed70bSJohn Dyson 	if (curproc != pageproc)
1052e47ed70bSJohn Dyson 		sync = TRUE;
105326f9a767SRodney W. Grimes 
10541c7c3c6aSMatthew Dillon 	/*
10551c7c3c6aSMatthew Dillon 	 * Step 2
10561c7c3c6aSMatthew Dillon 	 *
1057327f4e83SMatthew Dillon 	 * Update nsw parameters from swap_async_max and swap_cluster_max
1058327f4e83SMatthew Dillon 	 * sysctl values.  Do not let the sysop crash the machine with bogus
1059327f4e83SMatthew Dillon 	 * numbers.
1060327f4e83SMatthew Dillon 	 */
1061327f4e83SMatthew Dillon 
1062327f4e83SMatthew Dillon #ifndef DISALLOW_SWAP_TUNE
1063327f4e83SMatthew Dillon 
1064327f4e83SMatthew Dillon 	if (swap_async_max != nsw_wcount_async_max) {
1065327f4e83SMatthew Dillon 		int n;
1066327f4e83SMatthew Dillon 		int s;
1067327f4e83SMatthew Dillon 
1068327f4e83SMatthew Dillon 		/*
1069327f4e83SMatthew Dillon 		 * limit range
1070327f4e83SMatthew Dillon 		 */
1071327f4e83SMatthew Dillon 		if ((n = swap_async_max) > nswbuf / 2)
1072327f4e83SMatthew Dillon 			n = nswbuf / 2;
1073327f4e83SMatthew Dillon 		if (n < 1)
1074327f4e83SMatthew Dillon 			n = 1;
1075327f4e83SMatthew Dillon 		swap_async_max = n;
1076327f4e83SMatthew Dillon 
1077327f4e83SMatthew Dillon 		/*
1078327f4e83SMatthew Dillon 		 * Adjust difference ( if possible ).  If the current async
1079327f4e83SMatthew Dillon 		 * count is too low, we may not be able to make the adjustment
1080327f4e83SMatthew Dillon 		 * at this time.
1081327f4e83SMatthew Dillon 		 */
1082327f4e83SMatthew Dillon 		s = splvm();
1083327f4e83SMatthew Dillon 		n -= nsw_wcount_async_max;
1084327f4e83SMatthew Dillon 		if (nsw_wcount_async + n >= 0) {
1085327f4e83SMatthew Dillon 			nsw_wcount_async += n;
1086327f4e83SMatthew Dillon 			nsw_wcount_async_max += n;
1087327f4e83SMatthew Dillon 			wakeup(&nsw_wcount_async);
1088327f4e83SMatthew Dillon 		}
1089327f4e83SMatthew Dillon 		splx(s);
1090327f4e83SMatthew Dillon 	}
1091327f4e83SMatthew Dillon 
1092327f4e83SMatthew Dillon 	if (swap_cluster_max != nsw_cluster_max) {
1093327f4e83SMatthew Dillon 		int n;
1094327f4e83SMatthew Dillon 
1095327f4e83SMatthew Dillon 		if ((n = swap_cluster_max) < 1)
1096327f4e83SMatthew Dillon 			n = 1;
1097327f4e83SMatthew Dillon 		if (n > min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER))
1098327f4e83SMatthew Dillon 			n = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
1099327f4e83SMatthew Dillon 		swap_cluster_max = n;
1100327f4e83SMatthew Dillon 		nsw_cluster_max = n;
1101327f4e83SMatthew Dillon 	}
1102327f4e83SMatthew Dillon 
1103327f4e83SMatthew Dillon #endif
1104327f4e83SMatthew Dillon 
1105327f4e83SMatthew Dillon 	/*
1106327f4e83SMatthew Dillon 	 * Step 3
1107327f4e83SMatthew Dillon 	 *
11081c7c3c6aSMatthew Dillon 	 * Assign swap blocks and issue I/O.  We reallocate swap on the fly.
11091c7c3c6aSMatthew Dillon 	 * The page is left dirty until the pageout operation completes
11101c7c3c6aSMatthew Dillon 	 * successfully.
11111c7c3c6aSMatthew Dillon 	 */
111226f9a767SRodney W. Grimes 
11131c7c3c6aSMatthew Dillon 	for (i = 0; i < count; i += n) {
11141c7c3c6aSMatthew Dillon 		int s;
11151c7c3c6aSMatthew Dillon 		int j;
11161c7c3c6aSMatthew Dillon 		struct buf *bp;
1117a316d390SJohn Dyson 		daddr_t blk;
111826f9a767SRodney W. Grimes 
1119df8bae1dSRodney W. Grimes 		/*
11201c7c3c6aSMatthew Dillon 		 * Maximum I/O size is limited by a number of factors.
1121df8bae1dSRodney W. Grimes 		 */
112226f9a767SRodney W. Grimes 
11231c7c3c6aSMatthew Dillon 		n = min(BLIST_MAX_ALLOC, count - i);
1124327f4e83SMatthew Dillon 		n = min(n, nsw_cluster_max);
11251c7c3c6aSMatthew Dillon 
112626f9a767SRodney W. Grimes 		/*
11271c7c3c6aSMatthew Dillon 		 * Get biggest block of swap we can.  If we fail, fall
11281c7c3c6aSMatthew Dillon 		 * back and try to allocate a smaller block.  Don't go
11291c7c3c6aSMatthew Dillon 		 * overboard trying to allocate space if it would overly
11301c7c3c6aSMatthew Dillon 		 * fragment swap.
113126f9a767SRodney W. Grimes 		 */
11321c7c3c6aSMatthew Dillon 		while (
11331c7c3c6aSMatthew Dillon 		    (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE &&
11341c7c3c6aSMatthew Dillon 		    n > 4
11351c7c3c6aSMatthew Dillon 		) {
11361c7c3c6aSMatthew Dillon 			n >>= 1;
113726f9a767SRodney W. Grimes 		}
11381c7c3c6aSMatthew Dillon 		if (blk == SWAPBLK_NONE) {
11391c7c3c6aSMatthew Dillon 			for (j = 0; j < n; ++j) {
11401c7c3c6aSMatthew Dillon 				rtvals[i+j] = VM_PAGER_FAIL;
114126f9a767SRodney W. Grimes 			}
11421c7c3c6aSMatthew Dillon 			continue;
114326f9a767SRodney W. Grimes 		}
114426f9a767SRodney W. Grimes 
114526f9a767SRodney W. Grimes 		/*
11461c7c3c6aSMatthew Dillon 		 * Oops, too big if it crosses a stripe
11471c7c3c6aSMatthew Dillon 		 *
11481c7c3c6aSMatthew Dillon 		 * 1111000000
11491c7c3c6aSMatthew Dillon 		 *     111111
11501c7c3c6aSMatthew Dillon 		 *    1000001
115126f9a767SRodney W. Grimes 		 */
11521c7c3c6aSMatthew Dillon 		if ((blk ^ (blk + n)) & dmmax_mask) {
11531c7c3c6aSMatthew Dillon 			j = ((blk + dmmax) & dmmax_mask) - blk;
11541c7c3c6aSMatthew Dillon 			swp_pager_freeswapspace(blk + j, n - j);
11551c7c3c6aSMatthew Dillon 			n = j;
1156e47ed70bSJohn Dyson 		}
115726f9a767SRodney W. Grimes 
115826f9a767SRodney W. Grimes 		/*
11591c7c3c6aSMatthew Dillon 		 * All I/O parameters have been satisfied, build the I/O
11601c7c3c6aSMatthew Dillon 		 * request and assign the swap space.
11611c7c3c6aSMatthew Dillon 		 *
11621c7c3c6aSMatthew Dillon 		 * NOTE: B_PAGING is set by pbgetvp()
116326f9a767SRodney W. Grimes 		 */
116426f9a767SRodney W. Grimes 
1165327f4e83SMatthew Dillon 		if (sync == TRUE) {
1166327f4e83SMatthew Dillon 			bp = getpbuf(&nsw_wcount_sync);
1167327f4e83SMatthew Dillon 			bp->b_flags = B_BUSY;
1168327f4e83SMatthew Dillon 		} else {
1169327f4e83SMatthew Dillon 			bp = getpbuf(&nsw_wcount_async);
1170327f4e83SMatthew Dillon 			bp->b_flags = B_BUSY | B_ASYNC;
1171327f4e83SMatthew Dillon 		}
11721c7c3c6aSMatthew Dillon 		bp->b_spc = NULL;	/* not used, but NULL-out anyway */
117326f9a767SRodney W. Grimes 
11741c7c3c6aSMatthew Dillon 		pmap_qenter((vm_offset_t)bp->b_data, &m[i], n);
11751c7c3c6aSMatthew Dillon 
11761c7c3c6aSMatthew Dillon 		bp->b_proc = &proc0; /* XXX (but without B_PHYS this is ok) */
117726f9a767SRodney W. Grimes 		bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
11781c7c3c6aSMatthew Dillon 
1179a481f200SDavid Greenman 		if (bp->b_rcred != NOCRED)
118026f9a767SRodney W. Grimes 			crhold(bp->b_rcred);
1181a481f200SDavid Greenman 		if (bp->b_wcred != NOCRED)
118226f9a767SRodney W. Grimes 			crhold(bp->b_wcred);
11830d94caffSDavid Greenman 		pbgetvp(swapdev_vp, bp);
118416f62314SDavid Greenman 
11851c7c3c6aSMatthew Dillon 		bp->b_bcount = PAGE_SIZE * n;
11861c7c3c6aSMatthew Dillon 		bp->b_bufsize = PAGE_SIZE * n;
11871c7c3c6aSMatthew Dillon 		bp->b_blkno = blk;
1188e47ed70bSJohn Dyson 
1189e47ed70bSJohn Dyson 		s = splvm();
11901c7c3c6aSMatthew Dillon 
11911c7c3c6aSMatthew Dillon 		for (j = 0; j < n; ++j) {
11921c7c3c6aSMatthew Dillon 			vm_page_t mreq = m[i+j];
11931c7c3c6aSMatthew Dillon 
11941c7c3c6aSMatthew Dillon 			swp_pager_meta_build(
11951c7c3c6aSMatthew Dillon 			    mreq->object,
11961c7c3c6aSMatthew Dillon 			    mreq->pindex,
11971c7c3c6aSMatthew Dillon 			    blk + j,
11981c7c3c6aSMatthew Dillon 			    0
11991c7c3c6aSMatthew Dillon 			);
12007dbf82dcSMatthew Dillon 			vm_page_dirty(mreq);
12011c7c3c6aSMatthew Dillon 			rtvals[i+j] = VM_PAGER_OK;
12021c7c3c6aSMatthew Dillon 
12031c7c3c6aSMatthew Dillon 			vm_page_flag_set(mreq, PG_SWAPINPROG);
12041c7c3c6aSMatthew Dillon 			bp->b_pages[j] = mreq;
12051c7c3c6aSMatthew Dillon 		}
12061c7c3c6aSMatthew Dillon 		bp->b_flags |= B_CALL;
12071c7c3c6aSMatthew Dillon 		bp->b_npages = n;
12081c7c3c6aSMatthew Dillon 
12091c7c3c6aSMatthew Dillon 		cnt.v_swapout++;
12101c7c3c6aSMatthew Dillon 		cnt.v_swappgsout += bp->b_npages;
121126f9a767SRodney W. Grimes 		swapdev_vp->v_numoutput++;
121226f9a767SRodney W. Grimes 
121326f9a767SRodney W. Grimes 		/*
12141c7c3c6aSMatthew Dillon 		 * asynchronous
12151c7c3c6aSMatthew Dillon 		 *
12161c7c3c6aSMatthew Dillon 		 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
121726f9a767SRodney W. Grimes 		 */
1218e47ed70bSJohn Dyson 
12191c7c3c6aSMatthew Dillon 		if (sync == FALSE) {
12201c7c3c6aSMatthew Dillon 			bp->b_iodone = swp_pager_async_iodone;
122126f9a767SRodney W. Grimes 			bp->b_dirtyoff = 0;
122226f9a767SRodney W. Grimes 			bp->b_dirtyend = bp->b_bcount;
1223fd5d1124SJulian Elischer 			VOP_STRATEGY(bp->b_vp, bp);
12241c7c3c6aSMatthew Dillon 
12251c7c3c6aSMatthew Dillon 			for (j = 0; j < n; ++j)
12261c7c3c6aSMatthew Dillon 				rtvals[i+j] = VM_PAGER_PEND;
12271c7c3c6aSMatthew Dillon 
1228ccbbd927SBruce Evans 			splx(s);
12291c7c3c6aSMatthew Dillon 			continue;
123026f9a767SRodney W. Grimes 		}
1231e47ed70bSJohn Dyson 
123226f9a767SRodney W. Grimes 		/*
12331c7c3c6aSMatthew Dillon 		 * synchronous
12341c7c3c6aSMatthew Dillon 		 *
12351c7c3c6aSMatthew Dillon 		 * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
12361c7c3c6aSMatthew Dillon 		 */
12371c7c3c6aSMatthew Dillon 
12381c7c3c6aSMatthew Dillon 		bp->b_iodone = swp_pager_sync_iodone;
12391c7c3c6aSMatthew Dillon 		VOP_STRATEGY(bp->b_vp, bp);
12401c7c3c6aSMatthew Dillon 
12411c7c3c6aSMatthew Dillon 		/*
12421c7c3c6aSMatthew Dillon 		 * Wait for the sync I/O to complete, then update rtvals.
12431c7c3c6aSMatthew Dillon 		 * We just set the rtvals[] to VM_PAGER_PEND so we can call
12441c7c3c6aSMatthew Dillon 		 * our async completion routine at the end, thus avoiding a
12451c7c3c6aSMatthew Dillon 		 * double-free.
124626f9a767SRodney W. Grimes 		 */
124726f9a767SRodney W. Grimes 		while ((bp->b_flags & B_DONE) == 0) {
124824a1cce3SDavid Greenman 			tsleep(bp, PVM, "swwrt", 0);
124926f9a767SRodney W. Grimes 		}
1250e47ed70bSJohn Dyson 
1251e4542174SMatthew Dillon #if 0
12521b119d9dSDavid Greenman 		if (bp->b_flags & B_ERROR) {
12531c7c3c6aSMatthew Dillon 			grv = VM_PAGER_ERROR;
12541b119d9dSDavid Greenman 		}
1255e4542174SMatthew Dillon #endif
125626f9a767SRodney W. Grimes 
12571c7c3c6aSMatthew Dillon 		for (j = 0; j < n; ++j)
12581c7c3c6aSMatthew Dillon 			rtvals[i+j] = VM_PAGER_PEND;
125926f9a767SRodney W. Grimes 
1260e4542174SMatthew Dillon #if 0
12611c7c3c6aSMatthew Dillon 		if (bp->b_flags & B_ERROR) {
12621c7c3c6aSMatthew Dillon 			grv = VM_PAGER_ERROR;
12631c7c3c6aSMatthew Dillon 		}
1264e4542174SMatthew Dillon #endif
12651c7c3c6aSMatthew Dillon 
12661c7c3c6aSMatthew Dillon 		/*
12671c7c3c6aSMatthew Dillon 		 * Now that we are through with the bp, we can call the
12681c7c3c6aSMatthew Dillon 		 * normal async completion, which frees everything up.
12691c7c3c6aSMatthew Dillon 		 */
12701c7c3c6aSMatthew Dillon 
12711c7c3c6aSMatthew Dillon 		swp_pager_async_iodone(bp);
127226f9a767SRodney W. Grimes 
127326f9a767SRodney W. Grimes 		splx(s);
12741c7c3c6aSMatthew Dillon 	}
12751c7c3c6aSMatthew Dillon }
12761c7c3c6aSMatthew Dillon 
12771c7c3c6aSMatthew Dillon /*
12781c7c3c6aSMatthew Dillon  *	swap_pager_sync_iodone:
12791c7c3c6aSMatthew Dillon  *
12801c7c3c6aSMatthew Dillon  *	Completion routine for synchronous reads and writes from/to swap.
12811c7c3c6aSMatthew Dillon  *	We just mark the bp is complete and wake up anyone waiting on it.
12821c7c3c6aSMatthew Dillon  *
12831c7c3c6aSMatthew Dillon  *	This routine may not block.
12841c7c3c6aSMatthew Dillon  */
12851c7c3c6aSMatthew Dillon 
12861c7c3c6aSMatthew Dillon static void
12871c7c3c6aSMatthew Dillon swp_pager_sync_iodone(bp)
12881c7c3c6aSMatthew Dillon 	struct buf *bp;
12891c7c3c6aSMatthew Dillon {
12901c7c3c6aSMatthew Dillon 	bp->b_flags |= B_DONE;
12911c7c3c6aSMatthew Dillon 	bp->b_flags &= ~B_ASYNC;
12921c7c3c6aSMatthew Dillon 	wakeup(bp);
12931c7c3c6aSMatthew Dillon }
12941c7c3c6aSMatthew Dillon 
12951c7c3c6aSMatthew Dillon /*
12961c7c3c6aSMatthew Dillon  *	swp_pager_async_iodone:
12971c7c3c6aSMatthew Dillon  *
12981c7c3c6aSMatthew Dillon  *	Completion routine for asynchronous reads and writes from/to swap.
12991c7c3c6aSMatthew Dillon  *	Also called manually by synchronous code to finish up a bp.
13001c7c3c6aSMatthew Dillon  *
13011c7c3c6aSMatthew Dillon  *	WARNING!  This routine may be called from an interrupt.  We cannot
13021c7c3c6aSMatthew Dillon  *	mess with swap metadata unless we want to run all our other routines
13031c7c3c6aSMatthew Dillon  *	at splbio() too, which I'd rather not do.  We up ourselves
13041c7c3c6aSMatthew Dillon  * 	to splvm() because we may call vm_page_free(), which can unlink a
13051c7c3c6aSMatthew Dillon  *	page from an object.
13061c7c3c6aSMatthew Dillon  *
13071c7c3c6aSMatthew Dillon  *	XXX currently I do not believe any object routines protect
13081c7c3c6aSMatthew Dillon  *	object->memq at splvm().  The code must be gone over to determine
13091c7c3c6aSMatthew Dillon  *	the actual state of the problem.
13101c7c3c6aSMatthew Dillon  *
13111c7c3c6aSMatthew Dillon  *	For READ operations, the pages are PG_BUSY'd.  For WRITE operations,
13121c7c3c6aSMatthew Dillon  *	the pages are vm_page_t->busy'd.  For READ operations, we PG_BUSY
13131c7c3c6aSMatthew Dillon  *	unbusy all pages except the 'main' request page.  For WRITE
13141c7c3c6aSMatthew Dillon  *	operations, we vm_page_t->busy'd unbusy all pages ( we can do this
13151c7c3c6aSMatthew Dillon  *	because we marked them all VM_PAGER_PEND on return from putpages ).
13161c7c3c6aSMatthew Dillon  *
13171c7c3c6aSMatthew Dillon  *	This routine may not block.
13181c7c3c6aSMatthew Dillon  *	This routine is called at splbio()
13191c7c3c6aSMatthew Dillon  */
13201c7c3c6aSMatthew Dillon 
13211c7c3c6aSMatthew Dillon static void
13221c7c3c6aSMatthew Dillon swp_pager_async_iodone(bp)
13231c7c3c6aSMatthew Dillon 	register struct buf *bp;
13241c7c3c6aSMatthew Dillon {
13251c7c3c6aSMatthew Dillon 	int s;
13261c7c3c6aSMatthew Dillon 	int i;
13271c7c3c6aSMatthew Dillon 	vm_object_t object = NULL;
13281c7c3c6aSMatthew Dillon 
13291c7c3c6aSMatthew Dillon 	s = splvm();
13301c7c3c6aSMatthew Dillon 
13311c7c3c6aSMatthew Dillon 	bp->b_flags |= B_DONE;
13321c7c3c6aSMatthew Dillon 
13331c7c3c6aSMatthew Dillon 	/*
13341c7c3c6aSMatthew Dillon 	 * report error
13351c7c3c6aSMatthew Dillon 	 */
13361c7c3c6aSMatthew Dillon 
13371c7c3c6aSMatthew Dillon 	if (bp->b_flags & B_ERROR) {
13381c7c3c6aSMatthew Dillon 		printf(
13391c7c3c6aSMatthew Dillon 		    "swap_pager: I/O error - %s failed; blkno %ld,"
13401c7c3c6aSMatthew Dillon 			"size %ld, error %d\n",
13411c7c3c6aSMatthew Dillon 		    ((bp->b_flags & B_READ) ? "pagein" : "pageout"),
13421c7c3c6aSMatthew Dillon 		    (long)bp->b_blkno,
13431c7c3c6aSMatthew Dillon 		    (long)bp->b_bcount,
13441c7c3c6aSMatthew Dillon 		    bp->b_error
13451c7c3c6aSMatthew Dillon 		);
13461c7c3c6aSMatthew Dillon 	}
13471c7c3c6aSMatthew Dillon 
13481c7c3c6aSMatthew Dillon 	/*
13491c7c3c6aSMatthew Dillon 	 * set object.
13501c7c3c6aSMatthew Dillon 	 */
13511c7c3c6aSMatthew Dillon 
13521c7c3c6aSMatthew Dillon 	if (bp->b_npages)
13531c7c3c6aSMatthew Dillon 		object = bp->b_pages[0]->object;
135426f9a767SRodney W. Grimes 
135526f9a767SRodney W. Grimes 	/*
135626f9a767SRodney W. Grimes 	 * remove the mapping for kernel virtual
135726f9a767SRodney W. Grimes 	 */
13581c7c3c6aSMatthew Dillon 
13591c7c3c6aSMatthew Dillon 	pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
136026f9a767SRodney W. Grimes 
136126f9a767SRodney W. Grimes 	/*
13621c7c3c6aSMatthew Dillon 	 * cleanup pages.  If an error occurs writing to swap, we are in
13631c7c3c6aSMatthew Dillon 	 * very serious trouble.  If it happens to be a disk error, though,
13641c7c3c6aSMatthew Dillon 	 * we may be able to recover by reassigning the swap later on.  So
13651c7c3c6aSMatthew Dillon 	 * in this case we remove the m->swapblk assignment for the page
13661c7c3c6aSMatthew Dillon 	 * but do not free it in the rlist.  The errornous block(s) are thus
13671c7c3c6aSMatthew Dillon 	 * never reallocated as swap.  Redirty the page and continue.
136826f9a767SRodney W. Grimes 	 */
136926f9a767SRodney W. Grimes 
13701c7c3c6aSMatthew Dillon 	for (i = 0; i < bp->b_npages; ++i) {
13711c7c3c6aSMatthew Dillon 		vm_page_t m = bp->b_pages[i];
1372e47ed70bSJohn Dyson 
13731c7c3c6aSMatthew Dillon 		vm_page_flag_clear(m, PG_SWAPINPROG);
1374e47ed70bSJohn Dyson 
137526f9a767SRodney W. Grimes 		if (bp->b_flags & B_ERROR) {
1376ffc82b0aSJohn Dyson 			/*
13771c7c3c6aSMatthew Dillon 			 * If an error occurs I'd love to throw the swapblk
13781c7c3c6aSMatthew Dillon 			 * away without freeing it back to swapspace, so it
13791c7c3c6aSMatthew Dillon 			 * can never be used again.  But I can't from an
13801c7c3c6aSMatthew Dillon 			 * interrupt.
1381ffc82b0aSJohn Dyson 			 */
13821c7c3c6aSMatthew Dillon 
13831c7c3c6aSMatthew Dillon 			if (bp->b_flags & B_READ) {
13841c7c3c6aSMatthew Dillon 				/*
13851c7c3c6aSMatthew Dillon 				 * When reading, reqpage needs to stay
13861c7c3c6aSMatthew Dillon 				 * locked for the parent, but all other
13871c7c3c6aSMatthew Dillon 				 * pages can be freed.  We still want to
13881c7c3c6aSMatthew Dillon 				 * wakeup the parent waiting on the page,
13891c7c3c6aSMatthew Dillon 				 * though.  ( also: pg_reqpage can be -1 and
13901c7c3c6aSMatthew Dillon 				 * not match anything ).
13911c7c3c6aSMatthew Dillon 				 *
13921c7c3c6aSMatthew Dillon 				 * We have to wake specifically requested pages
13931c7c3c6aSMatthew Dillon 				 * up too because we cleared PG_SWAPINPROG and
13941c7c3c6aSMatthew Dillon 				 * someone may be waiting for that.
13951c7c3c6aSMatthew Dillon 				 *
13961c7c3c6aSMatthew Dillon 				 * NOTE: for reads, m->dirty will probably
13971c7c3c6aSMatthew Dillon 				 * be overriden by the original caller of
13981c7c3c6aSMatthew Dillon 				 * getpages so don't play cute tricks here.
13991c7c3c6aSMatthew Dillon 				 *
14001c7c3c6aSMatthew Dillon 				 * XXX it may not be legal to free the page
14011c7c3c6aSMatthew Dillon 				 * here as this messes with the object->memq's.
14021c7c3c6aSMatthew Dillon 				 */
14031c7c3c6aSMatthew Dillon 
14041c7c3c6aSMatthew Dillon 				m->valid = 0;
14051c7c3c6aSMatthew Dillon 				vm_page_flag_clear(m, PG_ZERO);
14061c7c3c6aSMatthew Dillon 
14071c7c3c6aSMatthew Dillon 				if (i != bp->b_pager.pg_reqpage)
14081c7c3c6aSMatthew Dillon 					vm_page_free(m);
14091c7c3c6aSMatthew Dillon 				else
14101c7c3c6aSMatthew Dillon 					vm_page_flash(m);
14111c7c3c6aSMatthew Dillon 				/*
14121c7c3c6aSMatthew Dillon 				 * If i == bp->b_pager.pg_reqpage, do not wake
14131c7c3c6aSMatthew Dillon 				 * the page up.  The caller needs to.
14141c7c3c6aSMatthew Dillon 				 */
14151c7c3c6aSMatthew Dillon 			} else {
14161c7c3c6aSMatthew Dillon 				/*
14171c7c3c6aSMatthew Dillon 				 * If a write error occurs, reactivate page
14181c7c3c6aSMatthew Dillon 				 * so it doesn't clog the inactive list,
14191c7c3c6aSMatthew Dillon 				 * then finish the I/O.
14201c7c3c6aSMatthew Dillon 				 */
14217dbf82dcSMatthew Dillon 				vm_page_dirty(m);
14221c7c3c6aSMatthew Dillon 				vm_page_activate(m);
14231c7c3c6aSMatthew Dillon 				vm_page_io_finish(m);
14241c7c3c6aSMatthew Dillon 			}
14251c7c3c6aSMatthew Dillon 		} else if (bp->b_flags & B_READ) {
14261c7c3c6aSMatthew Dillon 			/*
14271c7c3c6aSMatthew Dillon 			 * For read success, clear dirty bits.  Nobody should
14281c7c3c6aSMatthew Dillon 			 * have this page mapped but don't take any chances,
14291c7c3c6aSMatthew Dillon 			 * make sure the pmap modify bits are also cleared.
14301c7c3c6aSMatthew Dillon 			 *
14311c7c3c6aSMatthew Dillon 			 * NOTE: for reads, m->dirty will probably be
14321c7c3c6aSMatthew Dillon 			 * overriden by the original caller of getpages so
14331c7c3c6aSMatthew Dillon 			 * we cannot set them in order to free the underlying
14341c7c3c6aSMatthew Dillon 			 * swap in a low-swap situation.  I don't think we'd
14351c7c3c6aSMatthew Dillon 			 * want to do that anyway, but it was an optimization
14361c7c3c6aSMatthew Dillon 			 * that existed in the old swapper for a time before
14371c7c3c6aSMatthew Dillon 			 * it got ripped out due to precisely this problem.
14381c7c3c6aSMatthew Dillon 			 *
14391c7c3c6aSMatthew Dillon 			 * clear PG_ZERO in page.
14401c7c3c6aSMatthew Dillon 			 *
14411c7c3c6aSMatthew Dillon 			 * If not the requested page then deactivate it.
14421c7c3c6aSMatthew Dillon 			 *
14431c7c3c6aSMatthew Dillon 			 * Note that the requested page, reqpage, is left
14441c7c3c6aSMatthew Dillon 			 * busied, but we still have to wake it up.  The
14451c7c3c6aSMatthew Dillon 			 * other pages are released (unbusied) by
14461c7c3c6aSMatthew Dillon 			 * vm_page_wakeup().  We do not set reqpage's
14471c7c3c6aSMatthew Dillon 			 * valid bits here, it is up to the caller.
14481c7c3c6aSMatthew Dillon 			 */
14491c7c3c6aSMatthew Dillon 
14501c7c3c6aSMatthew Dillon 			pmap_clear_modify(VM_PAGE_TO_PHYS(m));
14511c7c3c6aSMatthew Dillon 			m->valid = VM_PAGE_BITS_ALL;
14521c7c3c6aSMatthew Dillon 			m->dirty = 0;
14531c7c3c6aSMatthew Dillon 			vm_page_flag_clear(m, PG_ZERO);
14541c7c3c6aSMatthew Dillon 
14551c7c3c6aSMatthew Dillon 			/*
14561c7c3c6aSMatthew Dillon 			 * We have to wake specifically requested pages
14571c7c3c6aSMatthew Dillon 			 * up too because we cleared PG_SWAPINPROG and
14581c7c3c6aSMatthew Dillon 			 * could be waiting for it in getpages.  However,
14591c7c3c6aSMatthew Dillon 			 * be sure to not unbusy getpages specifically
14601c7c3c6aSMatthew Dillon 			 * requested page - getpages expects it to be
14611c7c3c6aSMatthew Dillon 			 * left busy.
14621c7c3c6aSMatthew Dillon 			 */
14631c7c3c6aSMatthew Dillon 			if (i != bp->b_pager.pg_reqpage) {
14641c7c3c6aSMatthew Dillon 				vm_page_deactivate(m);
14651c7c3c6aSMatthew Dillon 				vm_page_wakeup(m);
14661c7c3c6aSMatthew Dillon 			} else {
14671c7c3c6aSMatthew Dillon 				vm_page_flash(m);
14681c7c3c6aSMatthew Dillon 			}
14691c7c3c6aSMatthew Dillon 		} else {
14701c7c3c6aSMatthew Dillon 			/*
14711c7c3c6aSMatthew Dillon 			 * For write success, clear the modify and dirty
14721c7c3c6aSMatthew Dillon 			 * status, then finish the I/O ( which decrements the
14731c7c3c6aSMatthew Dillon 			 * busy count and possibly wakes waiter's up ).
14741c7c3c6aSMatthew Dillon 			 */
14751c7c3c6aSMatthew Dillon 			vm_page_protect(m, VM_PROT_READ);
14761c7c3c6aSMatthew Dillon 			pmap_clear_modify(VM_PAGE_TO_PHYS(m));
14771c7c3c6aSMatthew Dillon 			m->dirty = 0;
14781c7c3c6aSMatthew Dillon 			vm_page_io_finish(m);
1479ffc82b0aSJohn Dyson 		}
1480df8bae1dSRodney W. Grimes 	}
148126f9a767SRodney W. Grimes 
14821c7c3c6aSMatthew Dillon 	/*
14831c7c3c6aSMatthew Dillon 	 * adjust pip.  NOTE: the original parent may still have its own
14841c7c3c6aSMatthew Dillon 	 * pip refs on the object.
14851c7c3c6aSMatthew Dillon 	 */
14860d94caffSDavid Greenman 
14871c7c3c6aSMatthew Dillon 	if (object)
14881c7c3c6aSMatthew Dillon 		vm_object_pip_wakeupn(object, bp->b_npages);
148926f9a767SRodney W. Grimes 
14901c7c3c6aSMatthew Dillon 	/*
14911c7c3c6aSMatthew Dillon 	 * release the physical I/O buffer
14921c7c3c6aSMatthew Dillon 	 */
1493e47ed70bSJohn Dyson 
1494327f4e83SMatthew Dillon 	relpbuf(
1495327f4e83SMatthew Dillon 	    bp,
1496327f4e83SMatthew Dillon 	    ((bp->b_flags & B_READ) ? &nsw_rcount :
1497327f4e83SMatthew Dillon 		((bp->b_flags & B_ASYNC) ?
1498327f4e83SMatthew Dillon 		    &nsw_wcount_async :
1499327f4e83SMatthew Dillon 		    &nsw_wcount_sync
1500327f4e83SMatthew Dillon 		)
1501327f4e83SMatthew Dillon 	    )
1502327f4e83SMatthew Dillon 	);
150326f9a767SRodney W. Grimes 	splx(s);
150426f9a767SRodney W. Grimes }
15051c7c3c6aSMatthew Dillon 
15061c7c3c6aSMatthew Dillon /************************************************************************
15071c7c3c6aSMatthew Dillon  *				SWAP META DATA 				*
15081c7c3c6aSMatthew Dillon  ************************************************************************
15091c7c3c6aSMatthew Dillon  *
15101c7c3c6aSMatthew Dillon  *	These routines manipulate the swap metadata stored in the
15111c7c3c6aSMatthew Dillon  *	OBJT_SWAP object.
15121c7c3c6aSMatthew Dillon  *
15131c7c3c6aSMatthew Dillon  *	In fact, we just have a few counters in the vm_object_t.  The
15141c7c3c6aSMatthew Dillon  *	metadata is actually stored in a hash table.
15151c7c3c6aSMatthew Dillon  */
15161c7c3c6aSMatthew Dillon 
15171c7c3c6aSMatthew Dillon /*
15181c7c3c6aSMatthew Dillon  * SWP_PAGER_HASH() -	hash swap meta data
15191c7c3c6aSMatthew Dillon  *
15201c7c3c6aSMatthew Dillon  *	This is an inline helper function which hash the swapblk given
15211c7c3c6aSMatthew Dillon  *	the object and page index.  It returns a pointer to a pointer
15221c7c3c6aSMatthew Dillon  *	to the object, or a pointer to a NULL pointer if it could not
15231c7c3c6aSMatthew Dillon  *	find a swapblk.
15241c7c3c6aSMatthew Dillon  */
15251c7c3c6aSMatthew Dillon 
15261c7c3c6aSMatthew Dillon static __inline struct swblock **
15271c7c3c6aSMatthew Dillon swp_pager_hash(vm_object_t object, daddr_t index)
15281c7c3c6aSMatthew Dillon {
15291c7c3c6aSMatthew Dillon 	struct swblock **pswap;
15301c7c3c6aSMatthew Dillon 	struct swblock *swap;
15311c7c3c6aSMatthew Dillon 
15321c7c3c6aSMatthew Dillon 	index &= ~SWAP_META_MASK;
15331c7c3c6aSMatthew Dillon 	pswap = &swhash[(index ^ (int)(long)object) & swhash_mask];
15341c7c3c6aSMatthew Dillon 
15351c7c3c6aSMatthew Dillon 	while ((swap = *pswap) != NULL) {
15361c7c3c6aSMatthew Dillon 		if (swap->swb_object == object &&
15371c7c3c6aSMatthew Dillon 		    swap->swb_index == index
15381c7c3c6aSMatthew Dillon 		) {
15391c7c3c6aSMatthew Dillon 			break;
15401c7c3c6aSMatthew Dillon 		}
15411c7c3c6aSMatthew Dillon 		pswap = &swap->swb_hnext;
15421c7c3c6aSMatthew Dillon 	}
15431c7c3c6aSMatthew Dillon 	return(pswap);
15441c7c3c6aSMatthew Dillon }
15451c7c3c6aSMatthew Dillon 
15461c7c3c6aSMatthew Dillon /*
15471c7c3c6aSMatthew Dillon  * SWP_PAGER_META_BUILD() -	add swap block to swap meta data for object
15481c7c3c6aSMatthew Dillon  *
15491c7c3c6aSMatthew Dillon  *	We first convert the object to a swap object if it is a default
15501c7c3c6aSMatthew Dillon  *	object.
15511c7c3c6aSMatthew Dillon  *
15521c7c3c6aSMatthew Dillon  *	The specified swapblk is added to the object's swap metadata.  If
15531c7c3c6aSMatthew Dillon  *	the swapblk is not valid, it is freed instead.  Any previously
15541c7c3c6aSMatthew Dillon  *	assigned swapblk is freed.
15551c7c3c6aSMatthew Dillon  */
15561c7c3c6aSMatthew Dillon 
15571c7c3c6aSMatthew Dillon static void
15581c7c3c6aSMatthew Dillon swp_pager_meta_build(
15591c7c3c6aSMatthew Dillon 	vm_object_t object,
15601c7c3c6aSMatthew Dillon 	daddr_t index,
15611c7c3c6aSMatthew Dillon 	daddr_t swapblk,
15621c7c3c6aSMatthew Dillon 	int waitok
15631c7c3c6aSMatthew Dillon ) {
15641c7c3c6aSMatthew Dillon 	struct swblock *swap;
15651c7c3c6aSMatthew Dillon 	struct swblock **pswap;
15661c7c3c6aSMatthew Dillon 
15671c7c3c6aSMatthew Dillon 	/*
15681c7c3c6aSMatthew Dillon 	 * Convert default object to swap object if necessary
15691c7c3c6aSMatthew Dillon 	 */
15701c7c3c6aSMatthew Dillon 
15711c7c3c6aSMatthew Dillon 	if (object->type != OBJT_SWAP) {
15721c7c3c6aSMatthew Dillon 		object->type = OBJT_SWAP;
15731c7c3c6aSMatthew Dillon 		object->un_pager.swp.swp_bcount = 0;
15741c7c3c6aSMatthew Dillon 
15751c7c3c6aSMatthew Dillon 		if (object->handle != NULL) {
15761c7c3c6aSMatthew Dillon 			TAILQ_INSERT_TAIL(
15771c7c3c6aSMatthew Dillon 			    NOBJLIST(object->handle),
15781c7c3c6aSMatthew Dillon 			    object,
15791c7c3c6aSMatthew Dillon 			    pager_object_list
15801c7c3c6aSMatthew Dillon 			);
15811c7c3c6aSMatthew Dillon 		} else {
15821c7c3c6aSMatthew Dillon 			TAILQ_INSERT_TAIL(
15831c7c3c6aSMatthew Dillon 			    &swap_pager_un_object_list,
15841c7c3c6aSMatthew Dillon 			    object,
15851c7c3c6aSMatthew Dillon 			    pager_object_list
15861c7c3c6aSMatthew Dillon 			);
15871c7c3c6aSMatthew Dillon 		}
15881c7c3c6aSMatthew Dillon 	}
15891c7c3c6aSMatthew Dillon 
15901c7c3c6aSMatthew Dillon 	/*
15911c7c3c6aSMatthew Dillon 	 * Wait for free memory when waitok is TRUE prior to calling the
15921c7c3c6aSMatthew Dillon 	 * zone allocator.
15931c7c3c6aSMatthew Dillon 	 */
15941c7c3c6aSMatthew Dillon 
15951c7c3c6aSMatthew Dillon 	while (waitok && cnt.v_free_count == 0) {
15961c7c3c6aSMatthew Dillon 		VM_WAIT;
15971c7c3c6aSMatthew Dillon 	}
15981c7c3c6aSMatthew Dillon 
15991c7c3c6aSMatthew Dillon 	/*
16001c7c3c6aSMatthew Dillon 	 * If swapblk being added is invalid, just free it.
16011c7c3c6aSMatthew Dillon 	 */
16021c7c3c6aSMatthew Dillon 
16031c7c3c6aSMatthew Dillon 	if (swapblk & SWAPBLK_NONE) {
16041c7c3c6aSMatthew Dillon 		if (swapblk != SWAPBLK_NONE) {
16051c7c3c6aSMatthew Dillon 			swp_pager_freeswapspace(
16061c7c3c6aSMatthew Dillon 			    index,
16071c7c3c6aSMatthew Dillon 			    1
16081c7c3c6aSMatthew Dillon 			);
16091c7c3c6aSMatthew Dillon 			swapblk = SWAPBLK_NONE;
16101c7c3c6aSMatthew Dillon 		}
16111c7c3c6aSMatthew Dillon 	}
16121c7c3c6aSMatthew Dillon 
16131c7c3c6aSMatthew Dillon 	/*
16141c7c3c6aSMatthew Dillon 	 * Locate hash entry.  If not found create, but if we aren't adding
16151c7c3c6aSMatthew Dillon 	 * anything just return.
16161c7c3c6aSMatthew Dillon 	 */
16171c7c3c6aSMatthew Dillon 
16181c7c3c6aSMatthew Dillon 	pswap = swp_pager_hash(object, index);
16191c7c3c6aSMatthew Dillon 
16201c7c3c6aSMatthew Dillon 	if ((swap = *pswap) == NULL) {
16211c7c3c6aSMatthew Dillon 		int i;
16221c7c3c6aSMatthew Dillon 
16231c7c3c6aSMatthew Dillon 		if (swapblk == SWAPBLK_NONE)
16241c7c3c6aSMatthew Dillon 			return;
16251c7c3c6aSMatthew Dillon 
16261c7c3c6aSMatthew Dillon 		swap = *pswap = zalloc(swap_zone);
16271c7c3c6aSMatthew Dillon 
16281c7c3c6aSMatthew Dillon 		swap->swb_hnext = NULL;
16291c7c3c6aSMatthew Dillon 		swap->swb_object = object;
16301c7c3c6aSMatthew Dillon 		swap->swb_index = index & ~SWAP_META_MASK;
16311c7c3c6aSMatthew Dillon 		swap->swb_count = 0;
16321c7c3c6aSMatthew Dillon 
16331c7c3c6aSMatthew Dillon 		++object->un_pager.swp.swp_bcount;
16341c7c3c6aSMatthew Dillon 
16351c7c3c6aSMatthew Dillon 		for (i = 0; i < SWAP_META_PAGES; ++i)
16361c7c3c6aSMatthew Dillon 			swap->swb_pages[i] = SWAPBLK_NONE;
16371c7c3c6aSMatthew Dillon 	}
16381c7c3c6aSMatthew Dillon 
16391c7c3c6aSMatthew Dillon 	/*
16401c7c3c6aSMatthew Dillon 	 * Delete prior contents of metadata
16411c7c3c6aSMatthew Dillon 	 */
16421c7c3c6aSMatthew Dillon 
16431c7c3c6aSMatthew Dillon 	index &= SWAP_META_MASK;
16441c7c3c6aSMatthew Dillon 
16451c7c3c6aSMatthew Dillon 	if (swap->swb_pages[index] != SWAPBLK_NONE) {
16461c7c3c6aSMatthew Dillon 		swp_pager_freeswapspace(
16471c7c3c6aSMatthew Dillon 		    swap->swb_pages[index] & SWAPBLK_MASK,
16481c7c3c6aSMatthew Dillon 		    1
16491c7c3c6aSMatthew Dillon 		);
16501c7c3c6aSMatthew Dillon 		--swap->swb_count;
16511c7c3c6aSMatthew Dillon 	}
16521c7c3c6aSMatthew Dillon 
16531c7c3c6aSMatthew Dillon 	/*
16541c7c3c6aSMatthew Dillon 	 * Enter block into metadata
16551c7c3c6aSMatthew Dillon 	 */
16561c7c3c6aSMatthew Dillon 
16571c7c3c6aSMatthew Dillon 	swap->swb_pages[index] = swapblk;
16581c7c3c6aSMatthew Dillon 	++swap->swb_count;
16591c7c3c6aSMatthew Dillon }
16601c7c3c6aSMatthew Dillon 
16611c7c3c6aSMatthew Dillon /*
16621c7c3c6aSMatthew Dillon  * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata
16631c7c3c6aSMatthew Dillon  *
16641c7c3c6aSMatthew Dillon  *	The requested range of blocks is freed, with any associated swap
16651c7c3c6aSMatthew Dillon  *	returned to the swap bitmap.
16661c7c3c6aSMatthew Dillon  *
16671c7c3c6aSMatthew Dillon  *	This routine will free swap metadata structures as they are cleaned
16681c7c3c6aSMatthew Dillon  *	out.  This routine does *NOT* operate on swap metadata associated
16691c7c3c6aSMatthew Dillon  *	with resident pages.
16701c7c3c6aSMatthew Dillon  *
16711c7c3c6aSMatthew Dillon  *	This routine must be called at splvm()
16721c7c3c6aSMatthew Dillon  */
16731c7c3c6aSMatthew Dillon 
16741c7c3c6aSMatthew Dillon static void
16751c7c3c6aSMatthew Dillon swp_pager_meta_free(vm_object_t object, daddr_t index, daddr_t count)
16761c7c3c6aSMatthew Dillon {
16771c7c3c6aSMatthew Dillon 	if (object->type != OBJT_SWAP)
16781c7c3c6aSMatthew Dillon 		return;
16791c7c3c6aSMatthew Dillon 
16801c7c3c6aSMatthew Dillon 	while (count > 0) {
16811c7c3c6aSMatthew Dillon 		struct swblock **pswap;
16821c7c3c6aSMatthew Dillon 		struct swblock *swap;
16831c7c3c6aSMatthew Dillon 
16841c7c3c6aSMatthew Dillon 		pswap = swp_pager_hash(object, index);
16851c7c3c6aSMatthew Dillon 
16861c7c3c6aSMatthew Dillon 		if ((swap = *pswap) != NULL) {
16871c7c3c6aSMatthew Dillon 			daddr_t v = swap->swb_pages[index & SWAP_META_MASK];
16881c7c3c6aSMatthew Dillon 
16891c7c3c6aSMatthew Dillon 			if (v != SWAPBLK_NONE) {
16901c7c3c6aSMatthew Dillon 				swp_pager_freeswapspace(v, 1);
16911c7c3c6aSMatthew Dillon 				swap->swb_pages[index & SWAP_META_MASK] =
16921c7c3c6aSMatthew Dillon 					SWAPBLK_NONE;
16931c7c3c6aSMatthew Dillon 				if (--swap->swb_count == 0) {
16941c7c3c6aSMatthew Dillon 					*pswap = swap->swb_hnext;
16951c7c3c6aSMatthew Dillon 					zfree(swap_zone, swap);
16961c7c3c6aSMatthew Dillon 					--object->un_pager.swp.swp_bcount;
16971c7c3c6aSMatthew Dillon 				}
16981c7c3c6aSMatthew Dillon 			}
16991c7c3c6aSMatthew Dillon 			--count;
17001c7c3c6aSMatthew Dillon 			++index;
17011c7c3c6aSMatthew Dillon 		} else {
17021c7c3c6aSMatthew Dillon 			daddr_t n = SWAP_META_PAGES - (index & SWAP_META_MASK);
17031c7c3c6aSMatthew Dillon 			count -= n;
17041c7c3c6aSMatthew Dillon 			index += n;
17051c7c3c6aSMatthew Dillon 		}
17061c7c3c6aSMatthew Dillon 	}
17071c7c3c6aSMatthew Dillon }
17081c7c3c6aSMatthew Dillon 
17091c7c3c6aSMatthew Dillon /*
17101c7c3c6aSMatthew Dillon  * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object
17111c7c3c6aSMatthew Dillon  *
17121c7c3c6aSMatthew Dillon  *	This routine locates and destroys all swap metadata associated with
17131c7c3c6aSMatthew Dillon  *	an object.
17141c7c3c6aSMatthew Dillon  */
17151c7c3c6aSMatthew Dillon 
17161c7c3c6aSMatthew Dillon static void
17171c7c3c6aSMatthew Dillon swp_pager_meta_free_all(vm_object_t object)
17181c7c3c6aSMatthew Dillon {
17191c7c3c6aSMatthew Dillon 	daddr_t index = 0;
17201c7c3c6aSMatthew Dillon 
17211c7c3c6aSMatthew Dillon 	if (object->type != OBJT_SWAP)
17221c7c3c6aSMatthew Dillon 		return;
17231c7c3c6aSMatthew Dillon 
17241c7c3c6aSMatthew Dillon 	while (object->un_pager.swp.swp_bcount) {
17251c7c3c6aSMatthew Dillon 		struct swblock **pswap;
17261c7c3c6aSMatthew Dillon 		struct swblock *swap;
17271c7c3c6aSMatthew Dillon 
17281c7c3c6aSMatthew Dillon 		pswap = swp_pager_hash(object, index);
17291c7c3c6aSMatthew Dillon 		if ((swap = *pswap) != NULL) {
17301c7c3c6aSMatthew Dillon 			int i;
17311c7c3c6aSMatthew Dillon 
17321c7c3c6aSMatthew Dillon 			for (i = 0; i < SWAP_META_PAGES; ++i) {
17331c7c3c6aSMatthew Dillon 				daddr_t v = swap->swb_pages[i];
17341c7c3c6aSMatthew Dillon 				if (v != SWAPBLK_NONE) {
17351c7c3c6aSMatthew Dillon #if !defined(MAX_PERF)
17361c7c3c6aSMatthew Dillon 					--swap->swb_count;
17371c7c3c6aSMatthew Dillon #endif
17381c7c3c6aSMatthew Dillon 					swp_pager_freeswapspace(
17391c7c3c6aSMatthew Dillon 					    v,
17401c7c3c6aSMatthew Dillon 					    1
17411c7c3c6aSMatthew Dillon 					);
17421c7c3c6aSMatthew Dillon 				}
17431c7c3c6aSMatthew Dillon 			}
17441c7c3c6aSMatthew Dillon #if !defined(MAX_PERF)
17451c7c3c6aSMatthew Dillon 			if (swap->swb_count != 0)
17461c7c3c6aSMatthew Dillon 				panic("swap_pager_meta_free_all: swb_count != 0");
17471c7c3c6aSMatthew Dillon #endif
17481c7c3c6aSMatthew Dillon 			*pswap = swap->swb_hnext;
17491c7c3c6aSMatthew Dillon 			zfree(swap_zone, swap);
17501c7c3c6aSMatthew Dillon 			--object->un_pager.swp.swp_bcount;
17511c7c3c6aSMatthew Dillon 		}
17521c7c3c6aSMatthew Dillon 		index += SWAP_META_PAGES;
17531c7c3c6aSMatthew Dillon #if !defined(MAX_PERF)
17541c7c3c6aSMatthew Dillon 		if (index > 0x20000000)
17551c7c3c6aSMatthew Dillon 			panic("swp_pager_meta_free_all: failed to locate all swap meta blocks");
17561c7c3c6aSMatthew Dillon #endif
17571c7c3c6aSMatthew Dillon 	}
17581c7c3c6aSMatthew Dillon }
17591c7c3c6aSMatthew Dillon 
17601c7c3c6aSMatthew Dillon /*
17611c7c3c6aSMatthew Dillon  * SWP_PAGER_METACTL() -  misc control of swap and vm_page_t meta data.
17621c7c3c6aSMatthew Dillon  *
17631c7c3c6aSMatthew Dillon  *	This routine is capable of looking up, popping, or freeing
17641c7c3c6aSMatthew Dillon  *	swapblk assignments in the swap meta data or in the vm_page_t.
17651c7c3c6aSMatthew Dillon  *	The routine typically returns the swapblk being looked-up, or popped,
17661c7c3c6aSMatthew Dillon  *	or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block
17671c7c3c6aSMatthew Dillon  *	was invalid.  This routine will automatically free any invalid
17681c7c3c6aSMatthew Dillon  *	meta-data swapblks.
17691c7c3c6aSMatthew Dillon  *
17701c7c3c6aSMatthew Dillon  *	It is not possible to store invalid swapblks in the swap meta data
17711c7c3c6aSMatthew Dillon  *	(other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
17721c7c3c6aSMatthew Dillon  *
17731c7c3c6aSMatthew Dillon  *	When acting on a busy resident page and paging is in progress, we
17741c7c3c6aSMatthew Dillon  *	have to wait until paging is complete but otherwise can act on the
17751c7c3c6aSMatthew Dillon  *	busy page.
17761c7c3c6aSMatthew Dillon  *
17771c7c3c6aSMatthew Dillon  *	SWM_FREE	remove and free swap block from metadata
17781c7c3c6aSMatthew Dillon  *
17791c7c3c6aSMatthew Dillon  *	SWM_POP		remove from meta data but do not free.. pop it out
17801c7c3c6aSMatthew Dillon  */
17811c7c3c6aSMatthew Dillon 
17821c7c3c6aSMatthew Dillon static daddr_t
17831c7c3c6aSMatthew Dillon swp_pager_meta_ctl(
17841c7c3c6aSMatthew Dillon 	vm_object_t object,
17851c7c3c6aSMatthew Dillon 	vm_pindex_t index,
17861c7c3c6aSMatthew Dillon 	int flags
17871c7c3c6aSMatthew Dillon ) {
17881c7c3c6aSMatthew Dillon 	/*
17891c7c3c6aSMatthew Dillon 	 * The meta data only exists of the object is OBJT_SWAP
17901c7c3c6aSMatthew Dillon 	 * and even then might not be allocated yet.
17911c7c3c6aSMatthew Dillon 	 */
17921c7c3c6aSMatthew Dillon 
17931c7c3c6aSMatthew Dillon 	if (
17941c7c3c6aSMatthew Dillon 	    object->type != OBJT_SWAP ||
17951c7c3c6aSMatthew Dillon 	    object->un_pager.swp.swp_bcount == 0
17961c7c3c6aSMatthew Dillon 	) {
17971c7c3c6aSMatthew Dillon 		return(SWAPBLK_NONE);
17981c7c3c6aSMatthew Dillon 	}
17991c7c3c6aSMatthew Dillon 
18001c7c3c6aSMatthew Dillon 	{
18011c7c3c6aSMatthew Dillon 		struct swblock **pswap;
18021c7c3c6aSMatthew Dillon 		struct swblock *swap;
18031c7c3c6aSMatthew Dillon 		daddr_t r1 = SWAPBLK_NONE;
18041c7c3c6aSMatthew Dillon 
18051c7c3c6aSMatthew Dillon 		pswap = swp_pager_hash(object, index);
18061c7c3c6aSMatthew Dillon 
18071c7c3c6aSMatthew Dillon 		index &= SWAP_META_MASK;
18081c7c3c6aSMatthew Dillon 
18091c7c3c6aSMatthew Dillon 		if ((swap = *pswap) != NULL) {
18101c7c3c6aSMatthew Dillon 			r1 = swap->swb_pages[index];
18111c7c3c6aSMatthew Dillon 
18121c7c3c6aSMatthew Dillon 			if (r1 != SWAPBLK_NONE) {
18131c7c3c6aSMatthew Dillon 				if (flags & SWM_FREE) {
18141c7c3c6aSMatthew Dillon 					swp_pager_freeswapspace(
18151c7c3c6aSMatthew Dillon 					    r1,
18161c7c3c6aSMatthew Dillon 					    1
18171c7c3c6aSMatthew Dillon 					);
18181c7c3c6aSMatthew Dillon 					r1 = SWAPBLK_NONE;
18191c7c3c6aSMatthew Dillon 				}
18201c7c3c6aSMatthew Dillon 				if (flags & (SWM_FREE|SWM_POP)) {
18211c7c3c6aSMatthew Dillon 					swap->swb_pages[index] = SWAPBLK_NONE;
18221c7c3c6aSMatthew Dillon 					if (--swap->swb_count == 0) {
18231c7c3c6aSMatthew Dillon 						*pswap = swap->swb_hnext;
18241c7c3c6aSMatthew Dillon 						zfree(swap_zone, swap);
18251c7c3c6aSMatthew Dillon 						--object->un_pager.swp.swp_bcount;
18261c7c3c6aSMatthew Dillon 					}
18271c7c3c6aSMatthew Dillon 				}
18281c7c3c6aSMatthew Dillon 	 		}
18291c7c3c6aSMatthew Dillon 		}
18301c7c3c6aSMatthew Dillon 
18311c7c3c6aSMatthew Dillon 		return(r1);
18321c7c3c6aSMatthew Dillon 	}
18331c7c3c6aSMatthew Dillon 	/* not reached */
18341c7c3c6aSMatthew Dillon }
18351c7c3c6aSMatthew Dillon 
1836