1df8bae1dSRodney W. Grimes /* 226f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 3df8bae1dSRodney W. Grimes * Copyright (c) 1990 University of Utah. 4df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 8df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 9df8bae1dSRodney W. Grimes * Science Department. 10df8bae1dSRodney W. Grimes * 11df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 12df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 13df8bae1dSRodney W. Grimes * are met: 14df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 15df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 16df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 18df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 19df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 20df8bae1dSRodney W. Grimes * must display the following acknowledgement: 21df8bae1dSRodney W. Grimes * This product includes software developed by the University of 22df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 23df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 24df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 25df8bae1dSRodney W. Grimes * without specific prior written permission. 26df8bae1dSRodney W. Grimes * 27df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37df8bae1dSRodney W. Grimes * SUCH DAMAGE. 38df8bae1dSRodney W. Grimes * 39df8bae1dSRodney W. Grimes * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40df8bae1dSRodney W. Grimes * 41df8bae1dSRodney W. Grimes * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 421b119d9dSDavid Greenman * $Id: swap_pager.c,v 1.15 1994/10/22 02:17:59 davidg Exp $ 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45df8bae1dSRodney W. Grimes /* 46df8bae1dSRodney W. Grimes * Quick hack to page to dedicated partition(s). 47df8bae1dSRodney W. Grimes * TODO: 48df8bae1dSRodney W. Grimes * Add multiprocessor locks 49df8bae1dSRodney W. Grimes * Deal with async writes in a better fashion 50df8bae1dSRodney W. Grimes */ 51df8bae1dSRodney W. Grimes 52df8bae1dSRodney W. Grimes #include <sys/param.h> 53df8bae1dSRodney W. Grimes #include <sys/systm.h> 54df8bae1dSRodney W. Grimes #include <sys/proc.h> 55df8bae1dSRodney W. Grimes #include <sys/buf.h> 56df8bae1dSRodney W. Grimes #include <sys/vnode.h> 57df8bae1dSRodney W. Grimes #include <sys/malloc.h> 58df8bae1dSRodney W. Grimes 59df8bae1dSRodney W. Grimes #include <miscfs/specfs/specdev.h> 6026f9a767SRodney W. Grimes #include <sys/rlist.h> 61df8bae1dSRodney W. Grimes 62df8bae1dSRodney W. Grimes #include <vm/vm.h> 6326f9a767SRodney W. Grimes #include <vm/vm_pager.h> 64df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 65df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 66df8bae1dSRodney W. Grimes #include <vm/swap_pager.h> 67df8bae1dSRodney W. Grimes 68df8bae1dSRodney W. Grimes #ifndef NPENDINGIO 6926f9a767SRodney W. Grimes #define NPENDINGIO 16 70df8bae1dSRodney W. Grimes #endif 71df8bae1dSRodney W. Grimes 7205f0fdd2SPoul-Henning Kamp int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 7305f0fdd2SPoul-Henning Kamp int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 7405f0fdd2SPoul-Henning Kamp 7526f9a767SRodney W. Grimes int nswiodone; 7626f9a767SRodney W. Grimes extern int vm_pageout_rate_limit; 7726f9a767SRodney W. Grimes static int cleandone; 7826f9a767SRodney W. Grimes extern int hz; 7926f9a767SRodney W. Grimes int swap_pager_full; 8026f9a767SRodney W. Grimes extern vm_map_t pager_map; 8126f9a767SRodney W. Grimes extern int vm_swap_size; 8235c10d22SDavid Greenman struct rlist *swaplist; 8335c10d22SDavid Greenman int nswaplist; 8426f9a767SRodney W. Grimes 8526f9a767SRodney W. Grimes #define MAX_PAGEOUT_CLUSTER 8 86df8bae1dSRodney W. Grimes 87df8bae1dSRodney W. Grimes TAILQ_HEAD(swpclean, swpagerclean); 88df8bae1dSRodney W. Grimes 8926f9a767SRodney W. Grimes typedef struct swpagerclean *swp_clean_t; 9026f9a767SRodney W. Grimes 91df8bae1dSRodney W. Grimes struct swpagerclean { 92df8bae1dSRodney W. Grimes TAILQ_ENTRY(swpagerclean) spc_list; 93df8bae1dSRodney W. Grimes int spc_flags; 94df8bae1dSRodney W. Grimes struct buf *spc_bp; 95df8bae1dSRodney W. Grimes sw_pager_t spc_swp; 96df8bae1dSRodney W. Grimes vm_offset_t spc_kva; 9726f9a767SRodney W. Grimes int spc_count; 9826f9a767SRodney W. Grimes vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99df8bae1dSRodney W. Grimes } swcleanlist [NPENDINGIO] ; 10026f9a767SRodney W. Grimes 10126f9a767SRodney W. Grimes 10226f9a767SRodney W. Grimes extern vm_map_t kernel_map; 103df8bae1dSRodney W. Grimes 104df8bae1dSRodney W. Grimes /* spc_flags values */ 10526f9a767SRodney W. Grimes #define SPC_ERROR 0x01 106df8bae1dSRodney W. Grimes 10726f9a767SRodney W. Grimes #define SWB_EMPTY (-1) 108df8bae1dSRodney W. Grimes 10926f9a767SRodney W. Grimes struct swpclean swap_pager_done; /* list of compileted page cleans */ 110df8bae1dSRodney W. Grimes struct swpclean swap_pager_inuse; /* list of pending page cleans */ 111df8bae1dSRodney W. Grimes struct swpclean swap_pager_free; /* list of free pager clean structs */ 112df8bae1dSRodney W. Grimes struct pagerlst swap_pager_list; /* list of "named" anon regions */ 11326f9a767SRodney W. Grimes struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 114df8bae1dSRodney W. Grimes 11526f9a767SRodney W. Grimes #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 11626f9a767SRodney W. Grimes int swap_pager_needflags; 11726f9a767SRodney W. Grimes struct rlist *swapfrag; 11826f9a767SRodney W. Grimes 11926f9a767SRodney W. Grimes struct pagerlst *swp_qs[]={ 12026f9a767SRodney W. Grimes &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 12126f9a767SRodney W. Grimes }; 12226f9a767SRodney W. Grimes 12326f9a767SRodney W. Grimes int swap_pager_putmulti(); 124df8bae1dSRodney W. Grimes 125df8bae1dSRodney W. Grimes struct pagerops swappagerops = { 126df8bae1dSRodney W. Grimes swap_pager_init, 127df8bae1dSRodney W. Grimes swap_pager_alloc, 128df8bae1dSRodney W. Grimes swap_pager_dealloc, 129df8bae1dSRodney W. Grimes swap_pager_getpage, 13026f9a767SRodney W. Grimes swap_pager_getmulti, 131df8bae1dSRodney W. Grimes swap_pager_putpage, 13226f9a767SRodney W. Grimes swap_pager_putmulti, 13326f9a767SRodney W. Grimes swap_pager_haspage 134df8bae1dSRodney W. Grimes }; 135df8bae1dSRodney W. Grimes 13626f9a767SRodney W. Grimes int npendingio = NPENDINGIO; 13726f9a767SRodney W. Grimes int pendingiowait; 13826f9a767SRodney W. Grimes int require_swap_init; 13926f9a767SRodney W. Grimes void swap_pager_finish(); 14026f9a767SRodney W. Grimes int dmmin, dmmax; 14126f9a767SRodney W. Grimes extern int vm_page_count; 14226f9a767SRodney W. Grimes 14326f9a767SRodney W. Grimes static inline void swapsizecheck() { 1445663e6deSDavid Greenman if( vm_swap_size == 0) 1455663e6deSDavid Greenman return; 14626f9a767SRodney W. Grimes if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 14726f9a767SRodney W. Grimes if( swap_pager_full) 14826f9a767SRodney W. Grimes printf("swap_pager: out of space\n"); 14926f9a767SRodney W. Grimes swap_pager_full = 1; 15026f9a767SRodney W. Grimes } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 15126f9a767SRodney W. Grimes swap_pager_full = 0; 15226f9a767SRodney W. Grimes } 15326f9a767SRodney W. Grimes 15426f9a767SRodney W. Grimes void 155df8bae1dSRodney W. Grimes swap_pager_init() 156df8bae1dSRodney W. Grimes { 157df8bae1dSRodney W. Grimes dfltpagerops = &swappagerops; 158df8bae1dSRodney W. Grimes 15926f9a767SRodney W. Grimes TAILQ_INIT(&swap_pager_list); 16026f9a767SRodney W. Grimes TAILQ_INIT(&swap_pager_un_list); 161df8bae1dSRodney W. Grimes 162df8bae1dSRodney W. Grimes /* 163df8bae1dSRodney W. Grimes * Initialize clean lists 164df8bae1dSRodney W. Grimes */ 165df8bae1dSRodney W. Grimes TAILQ_INIT(&swap_pager_inuse); 16626f9a767SRodney W. Grimes TAILQ_INIT(&swap_pager_done); 167df8bae1dSRodney W. Grimes TAILQ_INIT(&swap_pager_free); 16826f9a767SRodney W. Grimes 16926f9a767SRodney W. Grimes require_swap_init = 1; 170df8bae1dSRodney W. Grimes 171df8bae1dSRodney W. Grimes /* 172df8bae1dSRodney W. Grimes * Calculate the swap allocation constants. 173df8bae1dSRodney W. Grimes */ 174df8bae1dSRodney W. Grimes 17526f9a767SRodney W. Grimes dmmin = CLBYTES/DEV_BSIZE; 17626f9a767SRodney W. Grimes dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 17726f9a767SRodney W. Grimes 178df8bae1dSRodney W. Grimes } 179df8bae1dSRodney W. Grimes 180df8bae1dSRodney W. Grimes /* 181df8bae1dSRodney W. Grimes * Allocate a pager structure and associated resources. 182df8bae1dSRodney W. Grimes * Note that if we are called from the pageout daemon (handle == NULL) 183df8bae1dSRodney W. Grimes * we should not wait for memory as it could resulting in deadlock. 184df8bae1dSRodney W. Grimes */ 18526f9a767SRodney W. Grimes vm_pager_t 18626f9a767SRodney W. Grimes swap_pager_alloc(handle, size, prot, offset) 187df8bae1dSRodney W. Grimes caddr_t handle; 188df8bae1dSRodney W. Grimes register vm_size_t size; 189df8bae1dSRodney W. Grimes vm_prot_t prot; 19026f9a767SRodney W. Grimes vm_offset_t offset; 191df8bae1dSRodney W. Grimes { 192df8bae1dSRodney W. Grimes register vm_pager_t pager; 193df8bae1dSRodney W. Grimes register sw_pager_t swp; 194df8bae1dSRodney W. Grimes int waitok; 19526f9a767SRodney W. Grimes int i,j; 196df8bae1dSRodney W. Grimes 19726f9a767SRodney W. Grimes if (require_swap_init) { 19826f9a767SRodney W. Grimes swp_clean_t spc; 19926f9a767SRodney W. Grimes struct buf *bp; 20026f9a767SRodney W. Grimes /* 20126f9a767SRodney W. Grimes * kva's are allocated here so that we dont need to keep 20226f9a767SRodney W. Grimes * doing kmem_alloc pageables at runtime 20326f9a767SRodney W. Grimes */ 20426f9a767SRodney W. Grimes for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 205fff93ab6SDavid Greenman spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 20626f9a767SRodney W. Grimes if (!spc->spc_kva) { 20726f9a767SRodney W. Grimes break; 20826f9a767SRodney W. Grimes } 20926f9a767SRodney W. Grimes spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 21026f9a767SRodney W. Grimes if (!spc->spc_bp) { 21126f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 21226f9a767SRodney W. Grimes break; 21326f9a767SRodney W. Grimes } 21426f9a767SRodney W. Grimes spc->spc_flags = 0; 21526f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 21626f9a767SRodney W. Grimes } 21726f9a767SRodney W. Grimes require_swap_init = 0; 21826f9a767SRodney W. Grimes if( size == 0) 21926f9a767SRodney W. Grimes return(NULL); 22026f9a767SRodney W. Grimes } 22126f9a767SRodney W. Grimes 222df8bae1dSRodney W. Grimes /* 223df8bae1dSRodney W. Grimes * If this is a "named" anonymous region, look it up and 224df8bae1dSRodney W. Grimes * return the appropriate pager if it exists. 225df8bae1dSRodney W. Grimes */ 226df8bae1dSRodney W. Grimes if (handle) { 227df8bae1dSRodney W. Grimes pager = vm_pager_lookup(&swap_pager_list, handle); 228df8bae1dSRodney W. Grimes if (pager != NULL) { 229df8bae1dSRodney W. Grimes /* 230df8bae1dSRodney W. Grimes * Use vm_object_lookup to gain a reference 231df8bae1dSRodney W. Grimes * to the object and also to remove from the 232df8bae1dSRodney W. Grimes * object cache. 233df8bae1dSRodney W. Grimes */ 234df8bae1dSRodney W. Grimes if (vm_object_lookup(pager) == NULL) 235df8bae1dSRodney W. Grimes panic("swap_pager_alloc: bad object"); 236df8bae1dSRodney W. Grimes return(pager); 237df8bae1dSRodney W. Grimes } 238df8bae1dSRodney W. Grimes } 23926f9a767SRodney W. Grimes 2405663e6deSDavid Greenman /* 2415663e6deSDavid Greenman if (swap_pager_full && (vm_swap_size == 0)) { 24226f9a767SRodney W. Grimes return(NULL); 24326f9a767SRodney W. Grimes } 2445663e6deSDavid Greenman */ 24526f9a767SRodney W. Grimes 246df8bae1dSRodney W. Grimes /* 247df8bae1dSRodney W. Grimes * Pager doesn't exist, allocate swap management resources 248df8bae1dSRodney W. Grimes * and initialize. 249df8bae1dSRodney W. Grimes */ 250df8bae1dSRodney W. Grimes waitok = handle ? M_WAITOK : M_NOWAIT; 251df8bae1dSRodney W. Grimes pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 252df8bae1dSRodney W. Grimes if (pager == NULL) 253df8bae1dSRodney W. Grimes return(NULL); 254df8bae1dSRodney W. Grimes swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 255df8bae1dSRodney W. Grimes if (swp == NULL) { 256df8bae1dSRodney W. Grimes free((caddr_t)pager, M_VMPAGER); 257df8bae1dSRodney W. Grimes return(NULL); 258df8bae1dSRodney W. Grimes } 259df8bae1dSRodney W. Grimes size = round_page(size); 260df8bae1dSRodney W. Grimes swp->sw_osize = size; 26126f9a767SRodney W. Grimes swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 262df8bae1dSRodney W. Grimes swp->sw_blocks = (sw_blk_t) 263df8bae1dSRodney W. Grimes malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 26426f9a767SRodney W. Grimes M_VMPGDATA, waitok); 265df8bae1dSRodney W. Grimes if (swp->sw_blocks == NULL) { 266df8bae1dSRodney W. Grimes free((caddr_t)swp, M_VMPGDATA); 267df8bae1dSRodney W. Grimes free((caddr_t)pager, M_VMPAGER); 26826f9a767SRodney W. Grimes return(NULL); 269df8bae1dSRodney W. Grimes } 27026f9a767SRodney W. Grimes 27126f9a767SRodney W. Grimes for (i = 0; i < swp->sw_nblocks; i++) { 27226f9a767SRodney W. Grimes swp->sw_blocks[i].swb_valid = 0; 27326f9a767SRodney W. Grimes swp->sw_blocks[i].swb_locked = 0; 27426f9a767SRodney W. Grimes for (j = 0; j < SWB_NPAGES; j++) 27526f9a767SRodney W. Grimes swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 27626f9a767SRodney W. Grimes } 27726f9a767SRodney W. Grimes 278df8bae1dSRodney W. Grimes swp->sw_poip = 0; 279df8bae1dSRodney W. Grimes if (handle) { 280df8bae1dSRodney W. Grimes vm_object_t object; 281df8bae1dSRodney W. Grimes 282df8bae1dSRodney W. Grimes swp->sw_flags = SW_NAMED; 283df8bae1dSRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 284df8bae1dSRodney W. Grimes /* 285df8bae1dSRodney W. Grimes * Consistant with other pagers: return with object 286df8bae1dSRodney W. Grimes * referenced. Can't do this with handle == NULL 287df8bae1dSRodney W. Grimes * since it might be the pageout daemon calling. 288df8bae1dSRodney W. Grimes */ 289df8bae1dSRodney W. Grimes object = vm_object_allocate(size); 290df8bae1dSRodney W. Grimes vm_object_enter(object, pager); 291df8bae1dSRodney W. Grimes vm_object_setpager(object, pager, 0, FALSE); 292df8bae1dSRodney W. Grimes } else { 293df8bae1dSRodney W. Grimes swp->sw_flags = 0; 29426f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 295df8bae1dSRodney W. Grimes } 296df8bae1dSRodney W. Grimes pager->pg_handle = handle; 297df8bae1dSRodney W. Grimes pager->pg_ops = &swappagerops; 298df8bae1dSRodney W. Grimes pager->pg_type = PG_SWAP; 29926f9a767SRodney W. Grimes pager->pg_data = (caddr_t)swp; 300df8bae1dSRodney W. Grimes 301df8bae1dSRodney W. Grimes return(pager); 302df8bae1dSRodney W. Grimes } 303df8bae1dSRodney W. Grimes 30426f9a767SRodney W. Grimes /* 30526f9a767SRodney W. Grimes * returns disk block associated with pager and offset 30626f9a767SRodney W. Grimes * additionally, as a side effect returns a flag indicating 30726f9a767SRodney W. Grimes * if the block has been written 30826f9a767SRodney W. Grimes */ 30926f9a767SRodney W. Grimes 31026f9a767SRodney W. Grimes static int * 31126f9a767SRodney W. Grimes swap_pager_diskaddr(swp, offset, valid) 31226f9a767SRodney W. Grimes sw_pager_t swp; 31326f9a767SRodney W. Grimes vm_offset_t offset; 31426f9a767SRodney W. Grimes int *valid; 31526f9a767SRodney W. Grimes { 31626f9a767SRodney W. Grimes register sw_blk_t swb; 31726f9a767SRodney W. Grimes int ix; 31826f9a767SRodney W. Grimes 31926f9a767SRodney W. Grimes if (valid) 32026f9a767SRodney W. Grimes *valid = 0; 32126f9a767SRodney W. Grimes ix = offset / (SWB_NPAGES*PAGE_SIZE); 32226f9a767SRodney W. Grimes if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 32326f9a767SRodney W. Grimes return(FALSE); 32426f9a767SRodney W. Grimes } 32526f9a767SRodney W. Grimes swb = &swp->sw_blocks[ix]; 32626f9a767SRodney W. Grimes ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 32726f9a767SRodney W. Grimes if (valid) 32826f9a767SRodney W. Grimes *valid = swb->swb_valid & (1<<ix); 32926f9a767SRodney W. Grimes return &swb->swb_block[ix]; 33026f9a767SRodney W. Grimes } 33126f9a767SRodney W. Grimes 33226f9a767SRodney W. Grimes /* 33326f9a767SRodney W. Grimes * Utility routine to set the valid (written) bit for 33426f9a767SRodney W. Grimes * a block associated with a pager and offset 33526f9a767SRodney W. Grimes */ 336df8bae1dSRodney W. Grimes static void 33726f9a767SRodney W. Grimes swap_pager_setvalid(swp, offset, valid) 33826f9a767SRodney W. Grimes sw_pager_t swp; 33926f9a767SRodney W. Grimes vm_offset_t offset; 34026f9a767SRodney W. Grimes int valid; 34126f9a767SRodney W. Grimes { 34226f9a767SRodney W. Grimes register sw_blk_t swb; 34326f9a767SRodney W. Grimes int ix; 34426f9a767SRodney W. Grimes 34526f9a767SRodney W. Grimes ix = offset / (SWB_NPAGES*PAGE_SIZE); 34626f9a767SRodney W. Grimes if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 34726f9a767SRodney W. Grimes return; 34826f9a767SRodney W. Grimes 34926f9a767SRodney W. Grimes swb = &swp->sw_blocks[ix]; 35026f9a767SRodney W. Grimes ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 35126f9a767SRodney W. Grimes if (valid) 35226f9a767SRodney W. Grimes swb->swb_valid |= (1 << ix); 35326f9a767SRodney W. Grimes else 35426f9a767SRodney W. Grimes swb->swb_valid &= ~(1 << ix); 35526f9a767SRodney W. Grimes return; 35626f9a767SRodney W. Grimes } 35726f9a767SRodney W. Grimes 35826f9a767SRodney W. Grimes /* 35926f9a767SRodney W. Grimes * this routine allocates swap space with a fragmentation 36026f9a767SRodney W. Grimes * minimization policy. 36126f9a767SRodney W. Grimes */ 36226f9a767SRodney W. Grimes int 36326f9a767SRodney W. Grimes swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 364426de760SDavid Greenman #ifdef EXP 36526f9a767SRodney W. Grimes unsigned tmpalloc; 36626f9a767SRodney W. Grimes unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 36726f9a767SRodney W. Grimes if( amount < nblocksfrag) { 36826f9a767SRodney W. Grimes if( rlist_alloc(&swapfrag, amount, rtval)) 36926f9a767SRodney W. Grimes return 1; 37035c10d22SDavid Greenman if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc)) 37126f9a767SRodney W. Grimes return 0; 37226f9a767SRodney W. Grimes rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 37326f9a767SRodney W. Grimes *rtval = tmpalloc; 37426f9a767SRodney W. Grimes return 1; 37526f9a767SRodney W. Grimes } 376426de760SDavid Greenman #endif 37735c10d22SDavid Greenman if( !rlist_alloc(&swaplist, amount, rtval)) 37826f9a767SRodney W. Grimes return 0; 37926f9a767SRodney W. Grimes else 38026f9a767SRodney W. Grimes return 1; 38126f9a767SRodney W. Grimes } 38226f9a767SRodney W. Grimes 38326f9a767SRodney W. Grimes /* 38426f9a767SRodney W. Grimes * this routine frees swap space with a fragmentation 38526f9a767SRodney W. Grimes * minimization policy. 38626f9a767SRodney W. Grimes */ 38726f9a767SRodney W. Grimes void 38826f9a767SRodney W. Grimes swap_pager_freeswapspace( unsigned from, unsigned to) { 38905f0fdd2SPoul-Henning Kamp #ifdef EXP 39026f9a767SRodney W. Grimes unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 39126f9a767SRodney W. Grimes unsigned tmpalloc; 39226f9a767SRodney W. Grimes if( ((to + 1) - from) >= nblocksfrag) { 393426de760SDavid Greenman #endif 39435c10d22SDavid Greenman rlist_free(&swaplist, from, to); 395426de760SDavid Greenman #ifdef EXP 39626f9a767SRodney W. Grimes return; 397426de760SDavid Greenman } 39826f9a767SRodney W. Grimes rlist_free(&swapfrag, from, to); 39926f9a767SRodney W. Grimes while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 40035c10d22SDavid Greenman rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1); 40126f9a767SRodney W. Grimes } 402426de760SDavid Greenman #endif 40326f9a767SRodney W. Grimes } 40426f9a767SRodney W. Grimes /* 40526f9a767SRodney W. Grimes * this routine frees swap blocks from a specified pager 40626f9a767SRodney W. Grimes */ 40726f9a767SRodney W. Grimes void 40826f9a767SRodney W. Grimes _swap_pager_freespace(swp, start, size) 40926f9a767SRodney W. Grimes sw_pager_t swp; 41026f9a767SRodney W. Grimes vm_offset_t start; 41126f9a767SRodney W. Grimes vm_offset_t size; 41226f9a767SRodney W. Grimes { 41326f9a767SRodney W. Grimes vm_offset_t i; 41426f9a767SRodney W. Grimes int s; 41526f9a767SRodney W. Grimes 41626f9a767SRodney W. Grimes s = splbio(); 41726f9a767SRodney W. Grimes for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 41826f9a767SRodney W. Grimes int valid; 41926f9a767SRodney W. Grimes int *addr = swap_pager_diskaddr(swp, i, &valid); 42026f9a767SRodney W. Grimes if (addr && *addr != SWB_EMPTY) { 42126f9a767SRodney W. Grimes swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 42226f9a767SRodney W. Grimes if( valid) { 42326f9a767SRodney W. Grimes vm_swap_size += btodb(PAGE_SIZE); 42426f9a767SRodney W. Grimes swap_pager_setvalid(swp, i, 0); 42526f9a767SRodney W. Grimes } 42626f9a767SRodney W. Grimes *addr = SWB_EMPTY; 42726f9a767SRodney W. Grimes } 42826f9a767SRodney W. Grimes } 42926f9a767SRodney W. Grimes swapsizecheck(); 43026f9a767SRodney W. Grimes splx(s); 43126f9a767SRodney W. Grimes } 43226f9a767SRodney W. Grimes 43326f9a767SRodney W. Grimes void 43426f9a767SRodney W. Grimes swap_pager_freespace(pager, start, size) 43526f9a767SRodney W. Grimes vm_pager_t pager; 43626f9a767SRodney W. Grimes vm_offset_t start; 43726f9a767SRodney W. Grimes vm_offset_t size; 43826f9a767SRodney W. Grimes { 43926f9a767SRodney W. Grimes _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 44026f9a767SRodney W. Grimes } 44126f9a767SRodney W. Grimes 44226f9a767SRodney W. Grimes /* 44326f9a767SRodney W. Grimes * swap_pager_reclaim frees up over-allocated space from all pagers 44426f9a767SRodney W. Grimes * this eliminates internal fragmentation due to allocation of space 44526f9a767SRodney W. Grimes * for segments that are never swapped to. It has been written so that 44626f9a767SRodney W. Grimes * it does not block until the rlist_free operation occurs; it keeps 44726f9a767SRodney W. Grimes * the queues consistant. 44826f9a767SRodney W. Grimes */ 44926f9a767SRodney W. Grimes 45026f9a767SRodney W. Grimes /* 45126f9a767SRodney W. Grimes * Maximum number of blocks (pages) to reclaim per pass 45226f9a767SRodney W. Grimes */ 45326f9a767SRodney W. Grimes #define MAXRECLAIM 256 45426f9a767SRodney W. Grimes 45526f9a767SRodney W. Grimes void 45626f9a767SRodney W. Grimes swap_pager_reclaim() 45726f9a767SRodney W. Grimes { 45826f9a767SRodney W. Grimes vm_pager_t p; 45926f9a767SRodney W. Grimes sw_pager_t swp; 46026f9a767SRodney W. Grimes int i, j, k; 46126f9a767SRodney W. Grimes int s; 46226f9a767SRodney W. Grimes int reclaimcount; 46326f9a767SRodney W. Grimes static int reclaims[MAXRECLAIM]; 46426f9a767SRodney W. Grimes static int in_reclaim; 46526f9a767SRodney W. Grimes 46626f9a767SRodney W. Grimes /* 46726f9a767SRodney W. Grimes * allow only one process to be in the swap_pager_reclaim subroutine 46826f9a767SRodney W. Grimes */ 46926f9a767SRodney W. Grimes s = splbio(); 47026f9a767SRodney W. Grimes if (in_reclaim) { 47126f9a767SRodney W. Grimes tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 47226f9a767SRodney W. Grimes splx(s); 47326f9a767SRodney W. Grimes return; 47426f9a767SRodney W. Grimes } 47526f9a767SRodney W. Grimes in_reclaim = 1; 47626f9a767SRodney W. Grimes reclaimcount = 0; 47726f9a767SRodney W. Grimes 47826f9a767SRodney W. Grimes /* for each pager queue */ 47926f9a767SRodney W. Grimes for (k = 0; swp_qs[k]; k++) { 48026f9a767SRodney W. Grimes 48126f9a767SRodney W. Grimes p = swp_qs[k]->tqh_first; 48226f9a767SRodney W. Grimes while (p && (reclaimcount < MAXRECLAIM)) { 48326f9a767SRodney W. Grimes 48426f9a767SRodney W. Grimes /* 48526f9a767SRodney W. Grimes * see if any blocks associated with a pager has been 48626f9a767SRodney W. Grimes * allocated but not used (written) 48726f9a767SRodney W. Grimes */ 48826f9a767SRodney W. Grimes swp = (sw_pager_t) p->pg_data; 48926f9a767SRodney W. Grimes for (i = 0; i < swp->sw_nblocks; i++) { 49026f9a767SRodney W. Grimes sw_blk_t swb = &swp->sw_blocks[i]; 49126f9a767SRodney W. Grimes if( swb->swb_locked) 49226f9a767SRodney W. Grimes continue; 49326f9a767SRodney W. Grimes for (j = 0; j < SWB_NPAGES; j++) { 49426f9a767SRodney W. Grimes if (swb->swb_block[j] != SWB_EMPTY && 49526f9a767SRodney W. Grimes (swb->swb_valid & (1 << j)) == 0) { 49626f9a767SRodney W. Grimes reclaims[reclaimcount++] = swb->swb_block[j]; 49726f9a767SRodney W. Grimes swb->swb_block[j] = SWB_EMPTY; 49826f9a767SRodney W. Grimes if (reclaimcount >= MAXRECLAIM) 49926f9a767SRodney W. Grimes goto rfinished; 50026f9a767SRodney W. Grimes } 50126f9a767SRodney W. Grimes } 50226f9a767SRodney W. Grimes } 50326f9a767SRodney W. Grimes p = p->pg_list.tqe_next; 50426f9a767SRodney W. Grimes } 50526f9a767SRodney W. Grimes } 50626f9a767SRodney W. Grimes 50726f9a767SRodney W. Grimes rfinished: 50826f9a767SRodney W. Grimes 50926f9a767SRodney W. Grimes /* 51026f9a767SRodney W. Grimes * free the blocks that have been added to the reclaim list 51126f9a767SRodney W. Grimes */ 51226f9a767SRodney W. Grimes for (i = 0; i < reclaimcount; i++) { 51326f9a767SRodney W. Grimes swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 51426f9a767SRodney W. Grimes swapsizecheck(); 51526f9a767SRodney W. Grimes wakeup((caddr_t) &in_reclaim); 51626f9a767SRodney W. Grimes } 51726f9a767SRodney W. Grimes 51826f9a767SRodney W. Grimes splx(s); 51926f9a767SRodney W. Grimes in_reclaim = 0; 52026f9a767SRodney W. Grimes wakeup((caddr_t) &in_reclaim); 52126f9a767SRodney W. Grimes } 52226f9a767SRodney W. Grimes 52326f9a767SRodney W. Grimes 52426f9a767SRodney W. Grimes /* 52526f9a767SRodney W. Grimes * swap_pager_copy copies blocks from one pager to another and 52626f9a767SRodney W. Grimes * destroys the source pager 52726f9a767SRodney W. Grimes */ 52826f9a767SRodney W. Grimes 52926f9a767SRodney W. Grimes void 53026f9a767SRodney W. Grimes swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 53126f9a767SRodney W. Grimes vm_pager_t srcpager; 53226f9a767SRodney W. Grimes vm_offset_t srcoffset; 53326f9a767SRodney W. Grimes vm_pager_t dstpager; 53426f9a767SRodney W. Grimes vm_offset_t dstoffset; 53526f9a767SRodney W. Grimes vm_offset_t offset; 53626f9a767SRodney W. Grimes { 53726f9a767SRodney W. Grimes sw_pager_t srcswp, dstswp; 53826f9a767SRodney W. Grimes vm_offset_t i; 53926f9a767SRodney W. Grimes int s; 54026f9a767SRodney W. Grimes 5415663e6deSDavid Greenman if( vm_swap_size == 0) 5425663e6deSDavid Greenman return; 5435663e6deSDavid Greenman 54426f9a767SRodney W. Grimes srcswp = (sw_pager_t) srcpager->pg_data; 54526f9a767SRodney W. Grimes dstswp = (sw_pager_t) dstpager->pg_data; 54626f9a767SRodney W. Grimes 54726f9a767SRodney W. Grimes /* 54826f9a767SRodney W. Grimes * remove the source pager from the swap_pager internal queue 54926f9a767SRodney W. Grimes */ 55026f9a767SRodney W. Grimes s = splbio(); 55126f9a767SRodney W. Grimes if (srcswp->sw_flags & SW_NAMED) { 55226f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 55326f9a767SRodney W. Grimes srcswp->sw_flags &= ~SW_NAMED; 55426f9a767SRodney W. Grimes } else { 55526f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 55626f9a767SRodney W. Grimes } 55726f9a767SRodney W. Grimes 55826f9a767SRodney W. Grimes while (srcswp->sw_poip) { 55926f9a767SRodney W. Grimes tsleep((caddr_t)srcswp, PVM, "spgout", 0); 56026f9a767SRodney W. Grimes } 56126f9a767SRodney W. Grimes splx(s); 56226f9a767SRodney W. Grimes 56326f9a767SRodney W. Grimes /* 56426f9a767SRodney W. Grimes * clean all of the pages that are currently active and finished 56526f9a767SRodney W. Grimes */ 56626f9a767SRodney W. Grimes (void) swap_pager_clean(); 56726f9a767SRodney W. Grimes 56826f9a767SRodney W. Grimes s = splbio(); 56926f9a767SRodney W. Grimes /* 57026f9a767SRodney W. Grimes * clear source block before destination object 57126f9a767SRodney W. Grimes * (release allocated space) 57226f9a767SRodney W. Grimes */ 57326f9a767SRodney W. Grimes for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 57426f9a767SRodney W. Grimes int valid; 57526f9a767SRodney W. Grimes int *addr = swap_pager_diskaddr(srcswp, i, &valid); 57626f9a767SRodney W. Grimes if (addr && *addr != SWB_EMPTY) { 57726f9a767SRodney W. Grimes swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 57826f9a767SRodney W. Grimes if( valid) 57926f9a767SRodney W. Grimes vm_swap_size += btodb(PAGE_SIZE); 58026f9a767SRodney W. Grimes swapsizecheck(); 58126f9a767SRodney W. Grimes *addr = SWB_EMPTY; 58226f9a767SRodney W. Grimes } 58326f9a767SRodney W. Grimes } 58426f9a767SRodney W. Grimes /* 58526f9a767SRodney W. Grimes * transfer source to destination 58626f9a767SRodney W. Grimes */ 58726f9a767SRodney W. Grimes for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 58826f9a767SRodney W. Grimes int srcvalid, dstvalid; 58926f9a767SRodney W. Grimes int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 59026f9a767SRodney W. Grimes &srcvalid); 59126f9a767SRodney W. Grimes int *dstaddrp; 59226f9a767SRodney W. Grimes /* 59326f9a767SRodney W. Grimes * see if the source has space allocated 59426f9a767SRodney W. Grimes */ 59526f9a767SRodney W. Grimes if (srcaddrp && *srcaddrp != SWB_EMPTY) { 59626f9a767SRodney W. Grimes /* 59726f9a767SRodney W. Grimes * if the source is valid and the dest has no space, then 59826f9a767SRodney W. Grimes * copy the allocation from the srouce to the dest. 59926f9a767SRodney W. Grimes */ 60026f9a767SRodney W. Grimes if (srcvalid) { 60126f9a767SRodney W. Grimes dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 60226f9a767SRodney W. Grimes /* 60326f9a767SRodney W. Grimes * if the dest already has a valid block, deallocate the 60426f9a767SRodney W. Grimes * source block without copying. 60526f9a767SRodney W. Grimes */ 60626f9a767SRodney W. Grimes if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 60726f9a767SRodney W. Grimes swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 60826f9a767SRodney W. Grimes *dstaddrp = SWB_EMPTY; 60926f9a767SRodney W. Grimes } 61026f9a767SRodney W. Grimes if (dstaddrp && *dstaddrp == SWB_EMPTY) { 61126f9a767SRodney W. Grimes *dstaddrp = *srcaddrp; 61226f9a767SRodney W. Grimes *srcaddrp = SWB_EMPTY; 61326f9a767SRodney W. Grimes swap_pager_setvalid(dstswp, i + dstoffset, 1); 61426f9a767SRodney W. Grimes vm_swap_size -= btodb(PAGE_SIZE); 61526f9a767SRodney W. Grimes } 61626f9a767SRodney W. Grimes } 61726f9a767SRodney W. Grimes /* 61826f9a767SRodney W. Grimes * if the source is not empty at this point, then deallocate the space. 61926f9a767SRodney W. Grimes */ 62026f9a767SRodney W. Grimes if (*srcaddrp != SWB_EMPTY) { 62126f9a767SRodney W. Grimes swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 62226f9a767SRodney W. Grimes if( srcvalid) 62326f9a767SRodney W. Grimes vm_swap_size += btodb(PAGE_SIZE); 62426f9a767SRodney W. Grimes *srcaddrp = SWB_EMPTY; 62526f9a767SRodney W. Grimes } 62626f9a767SRodney W. Grimes } 62726f9a767SRodney W. Grimes } 62826f9a767SRodney W. Grimes 62926f9a767SRodney W. Grimes /* 63026f9a767SRodney W. Grimes * deallocate the rest of the source object 63126f9a767SRodney W. Grimes */ 63226f9a767SRodney W. Grimes for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 63326f9a767SRodney W. Grimes int valid; 63426f9a767SRodney W. Grimes int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 63526f9a767SRodney W. Grimes if (srcaddrp && *srcaddrp != SWB_EMPTY) { 63626f9a767SRodney W. Grimes swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 63726f9a767SRodney W. Grimes if( valid) 63826f9a767SRodney W. Grimes vm_swap_size += btodb(PAGE_SIZE); 63926f9a767SRodney W. Grimes *srcaddrp = SWB_EMPTY; 64026f9a767SRodney W. Grimes } 64126f9a767SRodney W. Grimes } 64226f9a767SRodney W. Grimes 64326f9a767SRodney W. Grimes swapsizecheck(); 64426f9a767SRodney W. Grimes splx(s); 64526f9a767SRodney W. Grimes 64626f9a767SRodney W. Grimes free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 64726f9a767SRodney W. Grimes srcswp->sw_blocks = 0; 64826f9a767SRodney W. Grimes free((caddr_t)srcswp, M_VMPGDATA); 64926f9a767SRodney W. Grimes srcpager->pg_data = 0; 65026f9a767SRodney W. Grimes free((caddr_t)srcpager, M_VMPAGER); 65126f9a767SRodney W. Grimes 65226f9a767SRodney W. Grimes return; 65326f9a767SRodney W. Grimes } 65426f9a767SRodney W. Grimes 65526f9a767SRodney W. Grimes 65626f9a767SRodney W. Grimes void 657df8bae1dSRodney W. Grimes swap_pager_dealloc(pager) 658df8bae1dSRodney W. Grimes vm_pager_t pager; 659df8bae1dSRodney W. Grimes { 66026f9a767SRodney W. Grimes register int i,j; 661df8bae1dSRodney W. Grimes register sw_blk_t bp; 662df8bae1dSRodney W. Grimes register sw_pager_t swp; 663df8bae1dSRodney W. Grimes int s; 664df8bae1dSRodney W. Grimes 665df8bae1dSRodney W. Grimes /* 666df8bae1dSRodney W. Grimes * Remove from list right away so lookups will fail if we 667df8bae1dSRodney W. Grimes * block for pageout completion. 668df8bae1dSRodney W. Grimes */ 66926f9a767SRodney W. Grimes s = splbio(); 670df8bae1dSRodney W. Grimes swp = (sw_pager_t) pager->pg_data; 671df8bae1dSRodney W. Grimes if (swp->sw_flags & SW_NAMED) { 672df8bae1dSRodney W. Grimes TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 673df8bae1dSRodney W. Grimes swp->sw_flags &= ~SW_NAMED; 67426f9a767SRodney W. Grimes } else { 67526f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 676df8bae1dSRodney W. Grimes } 677df8bae1dSRodney W. Grimes /* 678df8bae1dSRodney W. Grimes * Wait for all pageouts to finish and remove 679df8bae1dSRodney W. Grimes * all entries from cleaning list. 680df8bae1dSRodney W. Grimes */ 68126f9a767SRodney W. Grimes 682df8bae1dSRodney W. Grimes while (swp->sw_poip) { 68326f9a767SRodney W. Grimes tsleep((caddr_t)swp, PVM, "swpout", 0); 684df8bae1dSRodney W. Grimes } 685df8bae1dSRodney W. Grimes splx(s); 68626f9a767SRodney W. Grimes 68726f9a767SRodney W. Grimes 68826f9a767SRodney W. Grimes (void) swap_pager_clean(); 689df8bae1dSRodney W. Grimes 690df8bae1dSRodney W. Grimes /* 691df8bae1dSRodney W. Grimes * Free left over swap blocks 692df8bae1dSRodney W. Grimes */ 69326f9a767SRodney W. Grimes s = splbio(); 69426f9a767SRodney W. Grimes for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 69526f9a767SRodney W. Grimes for (j = 0; j < SWB_NPAGES; j++) 69626f9a767SRodney W. Grimes if (bp->swb_block[j] != SWB_EMPTY) { 69726f9a767SRodney W. Grimes swap_pager_freeswapspace((unsigned)bp->swb_block[j], 69826f9a767SRodney W. Grimes (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 69926f9a767SRodney W. Grimes if( bp->swb_valid & (1<<j)) 70026f9a767SRodney W. Grimes vm_swap_size += btodb(PAGE_SIZE); 70126f9a767SRodney W. Grimes bp->swb_block[j] = SWB_EMPTY; 702df8bae1dSRodney W. Grimes } 70326f9a767SRodney W. Grimes } 70426f9a767SRodney W. Grimes splx(s); 70526f9a767SRodney W. Grimes swapsizecheck(); 70626f9a767SRodney W. Grimes 707df8bae1dSRodney W. Grimes /* 708df8bae1dSRodney W. Grimes * Free swap management resources 709df8bae1dSRodney W. Grimes */ 710df8bae1dSRodney W. Grimes free((caddr_t)swp->sw_blocks, M_VMPGDATA); 71126f9a767SRodney W. Grimes swp->sw_blocks = 0; 712df8bae1dSRodney W. Grimes free((caddr_t)swp, M_VMPGDATA); 71326f9a767SRodney W. Grimes pager->pg_data = 0; 714df8bae1dSRodney W. Grimes free((caddr_t)pager, M_VMPAGER); 715df8bae1dSRodney W. Grimes } 716df8bae1dSRodney W. Grimes 71726f9a767SRodney W. Grimes /* 71826f9a767SRodney W. Grimes * swap_pager_getmulti can get multiple pages. 71926f9a767SRodney W. Grimes */ 72026f9a767SRodney W. Grimes int 72126f9a767SRodney W. Grimes swap_pager_getmulti(pager, m, count, reqpage, sync) 722df8bae1dSRodney W. Grimes vm_pager_t pager; 72326f9a767SRodney W. Grimes vm_page_t *m; 72426f9a767SRodney W. Grimes int count; 72526f9a767SRodney W. Grimes int reqpage; 726df8bae1dSRodney W. Grimes boolean_t sync; 727df8bae1dSRodney W. Grimes { 72826f9a767SRodney W. Grimes if( reqpage >= count) 72926f9a767SRodney W. Grimes panic("swap_pager_getmulti: reqpage >= count\n"); 73026f9a767SRodney W. Grimes return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 731df8bae1dSRodney W. Grimes } 732df8bae1dSRodney W. Grimes 73326f9a767SRodney W. Grimes /* 73426f9a767SRodney W. Grimes * swap_pager_getpage gets individual pages 73526f9a767SRodney W. Grimes */ 73626f9a767SRodney W. Grimes int 73726f9a767SRodney W. Grimes swap_pager_getpage(pager, m, sync) 738df8bae1dSRodney W. Grimes vm_pager_t pager; 73926f9a767SRodney W. Grimes vm_page_t m; 740df8bae1dSRodney W. Grimes boolean_t sync; 741df8bae1dSRodney W. Grimes { 74226f9a767SRodney W. Grimes vm_page_t marray[1]; 74326f9a767SRodney W. Grimes 74426f9a767SRodney W. Grimes marray[0] = m; 74526f9a767SRodney W. Grimes return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 74626f9a767SRodney W. Grimes } 74726f9a767SRodney W. Grimes 74826f9a767SRodney W. Grimes int 74926f9a767SRodney W. Grimes swap_pager_putmulti(pager, m, c, sync, rtvals) 75026f9a767SRodney W. Grimes vm_pager_t pager; 75126f9a767SRodney W. Grimes vm_page_t *m; 75226f9a767SRodney W. Grimes int c; 75326f9a767SRodney W. Grimes boolean_t sync; 75426f9a767SRodney W. Grimes int *rtvals; 75526f9a767SRodney W. Grimes { 756df8bae1dSRodney W. Grimes int flags; 757df8bae1dSRodney W. Grimes 758df8bae1dSRodney W. Grimes if (pager == NULL) { 75926f9a767SRodney W. Grimes (void) swap_pager_clean(); 76026f9a767SRodney W. Grimes return VM_PAGER_OK; 761df8bae1dSRodney W. Grimes } 76226f9a767SRodney W. Grimes 763df8bae1dSRodney W. Grimes flags = B_WRITE; 764df8bae1dSRodney W. Grimes if (!sync) 765df8bae1dSRodney W. Grimes flags |= B_ASYNC; 76626f9a767SRodney W. Grimes 76726f9a767SRodney W. Grimes return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 768df8bae1dSRodney W. Grimes } 769df8bae1dSRodney W. Grimes 77026f9a767SRodney W. Grimes /* 77126f9a767SRodney W. Grimes * swap_pager_putpage writes individual pages 77226f9a767SRodney W. Grimes */ 77326f9a767SRodney W. Grimes int 77426f9a767SRodney W. Grimes swap_pager_putpage(pager, m, sync) 77526f9a767SRodney W. Grimes vm_pager_t pager; 77626f9a767SRodney W. Grimes vm_page_t m; 77726f9a767SRodney W. Grimes boolean_t sync; 77826f9a767SRodney W. Grimes { 77926f9a767SRodney W. Grimes int flags; 78026f9a767SRodney W. Grimes vm_page_t marray[1]; 78126f9a767SRodney W. Grimes int rtvals[1]; 78226f9a767SRodney W. Grimes 78326f9a767SRodney W. Grimes 78426f9a767SRodney W. Grimes if (pager == NULL) { 78526f9a767SRodney W. Grimes (void) swap_pager_clean(); 78626f9a767SRodney W. Grimes return VM_PAGER_OK; 78726f9a767SRodney W. Grimes } 78826f9a767SRodney W. Grimes 78926f9a767SRodney W. Grimes marray[0] = m; 79026f9a767SRodney W. Grimes flags = B_WRITE; 79126f9a767SRodney W. Grimes if (!sync) 79226f9a767SRodney W. Grimes flags |= B_ASYNC; 79326f9a767SRodney W. Grimes 79426f9a767SRodney W. Grimes swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 79526f9a767SRodney W. Grimes 79626f9a767SRodney W. Grimes return rtvals[0]; 79726f9a767SRodney W. Grimes } 79826f9a767SRodney W. Grimes 79926f9a767SRodney W. Grimes static inline int 80026f9a767SRodney W. Grimes const swap_pager_block_index(swp, offset) 80126f9a767SRodney W. Grimes sw_pager_t swp; 80226f9a767SRodney W. Grimes vm_offset_t offset; 80326f9a767SRodney W. Grimes { 80426f9a767SRodney W. Grimes return (offset / (SWB_NPAGES*PAGE_SIZE)); 80526f9a767SRodney W. Grimes } 80626f9a767SRodney W. Grimes 80726f9a767SRodney W. Grimes static inline int 80826f9a767SRodney W. Grimes const swap_pager_block_offset(swp, offset) 80926f9a767SRodney W. Grimes sw_pager_t swp; 81026f9a767SRodney W. Grimes vm_offset_t offset; 81126f9a767SRodney W. Grimes { 81226f9a767SRodney W. Grimes return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 81326f9a767SRodney W. Grimes } 81426f9a767SRodney W. Grimes 81526f9a767SRodney W. Grimes /* 81626f9a767SRodney W. Grimes * _swap_pager_haspage returns TRUE if the pager has data that has 81726f9a767SRodney W. Grimes * been written out. 81826f9a767SRodney W. Grimes */ 819df8bae1dSRodney W. Grimes static boolean_t 82026f9a767SRodney W. Grimes _swap_pager_haspage(swp, offset) 82126f9a767SRodney W. Grimes sw_pager_t swp; 82226f9a767SRodney W. Grimes vm_offset_t offset; 82326f9a767SRodney W. Grimes { 82426f9a767SRodney W. Grimes register sw_blk_t swb; 82526f9a767SRodney W. Grimes int ix; 82626f9a767SRodney W. Grimes 82726f9a767SRodney W. Grimes ix = offset / (SWB_NPAGES*PAGE_SIZE); 82826f9a767SRodney W. Grimes if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 82926f9a767SRodney W. Grimes return(FALSE); 83026f9a767SRodney W. Grimes } 83126f9a767SRodney W. Grimes swb = &swp->sw_blocks[ix]; 83226f9a767SRodney W. Grimes ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 83326f9a767SRodney W. Grimes if (swb->swb_block[ix] != SWB_EMPTY) { 83426f9a767SRodney W. Grimes if (swb->swb_valid & (1 << ix)) 83526f9a767SRodney W. Grimes return TRUE; 83626f9a767SRodney W. Grimes } 83726f9a767SRodney W. Grimes 83826f9a767SRodney W. Grimes return(FALSE); 83926f9a767SRodney W. Grimes } 84026f9a767SRodney W. Grimes 84126f9a767SRodney W. Grimes /* 84226f9a767SRodney W. Grimes * swap_pager_haspage is the externally accessible version of 84326f9a767SRodney W. Grimes * _swap_pager_haspage above. this routine takes a vm_pager_t 84426f9a767SRodney W. Grimes * for an argument instead of sw_pager_t. 84526f9a767SRodney W. Grimes */ 84626f9a767SRodney W. Grimes boolean_t 847df8bae1dSRodney W. Grimes swap_pager_haspage(pager, offset) 848df8bae1dSRodney W. Grimes vm_pager_t pager; 849df8bae1dSRodney W. Grimes vm_offset_t offset; 850df8bae1dSRodney W. Grimes { 85126f9a767SRodney W. Grimes return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 852df8bae1dSRodney W. Grimes } 853df8bae1dSRodney W. Grimes 854df8bae1dSRodney W. Grimes /* 85526f9a767SRodney W. Grimes * swap_pager_freepage is a convienience routine that clears the busy 85626f9a767SRodney W. Grimes * bit and deallocates a page. 857df8bae1dSRodney W. Grimes */ 85826f9a767SRodney W. Grimes static void 85926f9a767SRodney W. Grimes swap_pager_freepage(m) 86026f9a767SRodney W. Grimes vm_page_t m; 86126f9a767SRodney W. Grimes { 86226f9a767SRodney W. Grimes PAGE_WAKEUP(m); 86326f9a767SRodney W. Grimes vm_page_free(m); 86426f9a767SRodney W. Grimes } 86526f9a767SRodney W. Grimes 86626f9a767SRodney W. Grimes /* 86726f9a767SRodney W. Grimes * swap_pager_ridpages is a convienience routine that deallocates all 86826f9a767SRodney W. Grimes * but the required page. this is usually used in error returns that 86926f9a767SRodney W. Grimes * need to invalidate the "extra" readahead pages. 87026f9a767SRodney W. Grimes */ 87126f9a767SRodney W. Grimes static void 87226f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage) 87326f9a767SRodney W. Grimes vm_page_t *m; 87426f9a767SRodney W. Grimes int count; 87526f9a767SRodney W. Grimes int reqpage; 87626f9a767SRodney W. Grimes { 87726f9a767SRodney W. Grimes int i; 87826f9a767SRodney W. Grimes for (i = 0; i < count; i++) 87926f9a767SRodney W. Grimes if (i != reqpage) 88026f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 88126f9a767SRodney W. Grimes } 88226f9a767SRodney W. Grimes 88326f9a767SRodney W. Grimes int swapwritecount=0; 88426f9a767SRodney W. Grimes 88526f9a767SRodney W. Grimes /* 88626f9a767SRodney W. Grimes * swap_pager_iodone1 is the completion routine for both reads and async writes 88726f9a767SRodney W. Grimes */ 88826f9a767SRodney W. Grimes void 88926f9a767SRodney W. Grimes swap_pager_iodone1(bp) 89026f9a767SRodney W. Grimes struct buf *bp; 89126f9a767SRodney W. Grimes { 89226f9a767SRodney W. Grimes bp->b_flags |= B_DONE; 89326f9a767SRodney W. Grimes bp->b_flags &= ~B_ASYNC; 89426f9a767SRodney W. Grimes wakeup((caddr_t)bp); 89526f9a767SRodney W. Grimes /* 89626f9a767SRodney W. Grimes if ((bp->b_flags & B_READ) == 0) 89726f9a767SRodney W. Grimes vwakeup(bp); 89826f9a767SRodney W. Grimes */ 89926f9a767SRodney W. Grimes } 90026f9a767SRodney W. Grimes 90126f9a767SRodney W. Grimes 90226f9a767SRodney W. Grimes int 90326f9a767SRodney W. Grimes swap_pager_input(swp, m, count, reqpage) 904df8bae1dSRodney W. Grimes register sw_pager_t swp; 90526f9a767SRodney W. Grimes vm_page_t *m; 90626f9a767SRodney W. Grimes int count, reqpage; 907df8bae1dSRodney W. Grimes { 908df8bae1dSRodney W. Grimes register struct buf *bp; 90926f9a767SRodney W. Grimes sw_blk_t swb[count]; 910df8bae1dSRodney W. Grimes register int s; 91126f9a767SRodney W. Grimes int i; 912df8bae1dSRodney W. Grimes boolean_t rv; 91326f9a767SRodney W. Grimes vm_offset_t kva, off[count]; 914df8bae1dSRodney W. Grimes swp_clean_t spc; 91526f9a767SRodney W. Grimes vm_offset_t paging_offset; 91626f9a767SRodney W. Grimes vm_object_t object; 91726f9a767SRodney W. Grimes int reqaddr[count]; 918df8bae1dSRodney W. Grimes 91926f9a767SRodney W. Grimes int first, last; 92026f9a767SRodney W. Grimes int failed; 92126f9a767SRodney W. Grimes int reqdskregion; 922df8bae1dSRodney W. Grimes 92326f9a767SRodney W. Grimes object = m[reqpage]->object; 92426f9a767SRodney W. Grimes paging_offset = object->paging_offset; 925df8bae1dSRodney W. Grimes /* 926df8bae1dSRodney W. Grimes * First determine if the page exists in the pager if this is 927df8bae1dSRodney W. Grimes * a sync read. This quickly handles cases where we are 928df8bae1dSRodney W. Grimes * following shadow chains looking for the top level object 929df8bae1dSRodney W. Grimes * with the page. 930df8bae1dSRodney W. Grimes */ 93126f9a767SRodney W. Grimes if (swp->sw_blocks == NULL) { 93226f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 933df8bae1dSRodney W. Grimes return(VM_PAGER_FAIL); 934df8bae1dSRodney W. Grimes } 93526f9a767SRodney W. Grimes 93626f9a767SRodney W. Grimes for(i = 0; i < count; i++) { 93726f9a767SRodney W. Grimes vm_offset_t foff = m[i]->offset + paging_offset; 93826f9a767SRodney W. Grimes int ix = swap_pager_block_index(swp, foff); 93926f9a767SRodney W. Grimes if (ix >= swp->sw_nblocks) { 94026f9a767SRodney W. Grimes int j; 94126f9a767SRodney W. Grimes if( i <= reqpage) { 94226f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 943df8bae1dSRodney W. Grimes return(VM_PAGER_FAIL); 94426f9a767SRodney W. Grimes } 94526f9a767SRodney W. Grimes for(j = i; j < count; j++) { 94626f9a767SRodney W. Grimes swap_pager_freepage(m[j]); 94726f9a767SRodney W. Grimes } 94826f9a767SRodney W. Grimes count = i; 94926f9a767SRodney W. Grimes break; 95026f9a767SRodney W. Grimes } 95126f9a767SRodney W. Grimes 95226f9a767SRodney W. Grimes swb[i] = &swp->sw_blocks[ix]; 95326f9a767SRodney W. Grimes off[i] = swap_pager_block_offset(swp, foff); 95426f9a767SRodney W. Grimes reqaddr[i] = swb[i]->swb_block[off[i]]; 95526f9a767SRodney W. Grimes } 95626f9a767SRodney W. Grimes 95726f9a767SRodney W. Grimes /* make sure that our required input request is existant */ 95826f9a767SRodney W. Grimes 95926f9a767SRodney W. Grimes if (reqaddr[reqpage] == SWB_EMPTY || 96026f9a767SRodney W. Grimes (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 96126f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 96226f9a767SRodney W. Grimes return(VM_PAGER_FAIL); 96326f9a767SRodney W. Grimes } 96426f9a767SRodney W. Grimes 96526f9a767SRodney W. Grimes 96626f9a767SRodney W. Grimes reqdskregion = reqaddr[reqpage] / dmmax; 967df8bae1dSRodney W. Grimes 968df8bae1dSRodney W. Grimes /* 96926f9a767SRodney W. Grimes * search backwards for the first contiguous page to transfer 970df8bae1dSRodney W. Grimes */ 97126f9a767SRodney W. Grimes failed = 0; 97226f9a767SRodney W. Grimes first = 0; 97326f9a767SRodney W. Grimes for (i = reqpage - 1; i >= 0; --i) { 97426f9a767SRodney W. Grimes if ( failed || (reqaddr[i] == SWB_EMPTY) || 97526f9a767SRodney W. Grimes (swb[i]->swb_valid & (1 << off[i])) == 0 || 97626f9a767SRodney W. Grimes (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 97726f9a767SRodney W. Grimes ((reqaddr[i] / dmmax) != reqdskregion)) { 97826f9a767SRodney W. Grimes failed = 1; 97926f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 98026f9a767SRodney W. Grimes if (first == 0) 98126f9a767SRodney W. Grimes first = i + 1; 98226f9a767SRodney W. Grimes } 983df8bae1dSRodney W. Grimes } 984df8bae1dSRodney W. Grimes /* 98526f9a767SRodney W. Grimes * search forwards for the last contiguous page to transfer 986df8bae1dSRodney W. Grimes */ 98726f9a767SRodney W. Grimes failed = 0; 98826f9a767SRodney W. Grimes last = count; 98926f9a767SRodney W. Grimes for (i = reqpage + 1; i < count; i++) { 99026f9a767SRodney W. Grimes if ( failed || (reqaddr[i] == SWB_EMPTY) || 99126f9a767SRodney W. Grimes (swb[i]->swb_valid & (1 << off[i])) == 0 || 99226f9a767SRodney W. Grimes (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 99326f9a767SRodney W. Grimes ((reqaddr[i] / dmmax) != reqdskregion)) { 99426f9a767SRodney W. Grimes failed = 1; 99526f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 99626f9a767SRodney W. Grimes if (last == count) 99726f9a767SRodney W. Grimes last = i; 99826f9a767SRodney W. Grimes } 99926f9a767SRodney W. Grimes } 100026f9a767SRodney W. Grimes 100126f9a767SRodney W. Grimes count = last; 100226f9a767SRodney W. Grimes if (first != 0) { 100326f9a767SRodney W. Grimes for (i = first; i < count; i++) { 100426f9a767SRodney W. Grimes m[i-first] = m[i]; 100526f9a767SRodney W. Grimes reqaddr[i-first] = reqaddr[i]; 100626f9a767SRodney W. Grimes off[i-first] = off[i]; 100726f9a767SRodney W. Grimes } 100826f9a767SRodney W. Grimes count -= first; 100926f9a767SRodney W. Grimes reqpage -= first; 101026f9a767SRodney W. Grimes } 101126f9a767SRodney W. Grimes 101226f9a767SRodney W. Grimes ++swb[reqpage]->swb_locked; 101326f9a767SRodney W. Grimes 101426f9a767SRodney W. Grimes /* 101526f9a767SRodney W. Grimes * at this point: 101626f9a767SRodney W. Grimes * "m" is a pointer to the array of vm_page_t for paging I/O 101726f9a767SRodney W. Grimes * "count" is the number of vm_page_t entries represented by "m" 101826f9a767SRodney W. Grimes * "object" is the vm_object_t for I/O 101926f9a767SRodney W. Grimes * "reqpage" is the index into "m" for the page actually faulted 102026f9a767SRodney W. Grimes */ 102126f9a767SRodney W. Grimes 102226f9a767SRodney W. Grimes spc = NULL; /* we might not use an spc data structure */ 102326f9a767SRodney W. Grimes 102416f62314SDavid Greenman if (count == 1) { 102526f9a767SRodney W. Grimes /* 102626f9a767SRodney W. Grimes * if a kva has not been allocated, we can only do a one page transfer, 102726f9a767SRodney W. Grimes * so we free the other pages that might have been allocated by 102826f9a767SRodney W. Grimes * vm_fault. 102926f9a767SRodney W. Grimes */ 103026f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 103126f9a767SRodney W. Grimes m[0] = m[reqpage]; 103226f9a767SRodney W. Grimes reqaddr[0] = reqaddr[reqpage]; 103326f9a767SRodney W. Grimes 103426f9a767SRodney W. Grimes count = 1; 103526f9a767SRodney W. Grimes reqpage = 0; 103626f9a767SRodney W. Grimes /* 103726f9a767SRodney W. Grimes * get a swap pager clean data structure, block until we get it 103826f9a767SRodney W. Grimes */ 1039df8bae1dSRodney W. Grimes if (swap_pager_free.tqh_first == NULL) { 1040df8bae1dSRodney W. Grimes s = splbio(); 104126f9a767SRodney W. Grimes if( curproc == pageproc) 104226f9a767SRodney W. Grimes (void) swap_pager_clean(); 104326f9a767SRodney W. Grimes else 104426f9a767SRodney W. Grimes wakeup((caddr_t) &vm_pages_needed); 104526f9a767SRodney W. Grimes while (swap_pager_free.tqh_first == NULL) { 104626f9a767SRodney W. Grimes swap_pager_needflags |= SWAP_FREE_NEEDED; 104726f9a767SRodney W. Grimes tsleep((caddr_t)&swap_pager_free, 104826f9a767SRodney W. Grimes PVM, "swpfre", 0); 104926f9a767SRodney W. Grimes if( curproc == pageproc) 105026f9a767SRodney W. Grimes (void) swap_pager_clean(); 105126f9a767SRodney W. Grimes else 105226f9a767SRodney W. Grimes wakeup((caddr_t) &vm_pages_needed); 1053df8bae1dSRodney W. Grimes } 1054df8bae1dSRodney W. Grimes splx(s); 105526f9a767SRodney W. Grimes } 105626f9a767SRodney W. Grimes spc = swap_pager_free.tqh_first; 105726f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 105826f9a767SRodney W. Grimes kva = spc->spc_kva; 105926f9a767SRodney W. Grimes bp = spc->spc_bp; 106026f9a767SRodney W. Grimes bzero(bp, sizeof *bp); 106126f9a767SRodney W. Grimes bp->b_spc = spc; 106226f9a767SRodney W. Grimes } else { 106316f62314SDavid Greenman /* 106416f62314SDavid Greenman * Get a swap buffer header to perform the IO 106516f62314SDavid Greenman */ 106626f9a767SRodney W. Grimes bp = getpbuf(); 106716f62314SDavid Greenman kva = (vm_offset_t) bp->b_data; 106826f9a767SRodney W. Grimes } 106926f9a767SRodney W. Grimes 107016f62314SDavid Greenman /* 107116f62314SDavid Greenman * map our page(s) into kva for input 107216f62314SDavid Greenman */ 107316f62314SDavid Greenman pmap_qenter( kva, m, count); 107416f62314SDavid Greenman 107526f9a767SRodney W. Grimes s = splbio(); 107626f9a767SRodney W. Grimes bp->b_flags = B_BUSY | B_READ | B_CALL; 107726f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone1; 1078df8bae1dSRodney W. Grimes bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 107926f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 108026f9a767SRodney W. Grimes crhold(bp->b_rcred); 108126f9a767SRodney W. Grimes crhold(bp->b_wcred); 108226f9a767SRodney W. Grimes bp->b_un.b_addr = (caddr_t) kva; 108326f9a767SRodney W. Grimes bp->b_blkno = reqaddr[0]; 108426f9a767SRodney W. Grimes bp->b_bcount = PAGE_SIZE*count; 108526f9a767SRodney W. Grimes bp->b_bufsize = PAGE_SIZE*count; 108626f9a767SRodney W. Grimes 108726f9a767SRodney W. Grimes bgetvp( swapdev_vp, bp); 108826f9a767SRodney W. Grimes 108926f9a767SRodney W. Grimes swp->sw_piip++; 1090df8bae1dSRodney W. Grimes 1091976e77fcSDavid Greenman cnt.v_swapin++; 1092976e77fcSDavid Greenman cnt.v_swappgsin += count; 1093df8bae1dSRodney W. Grimes /* 109426f9a767SRodney W. Grimes * perform the I/O 1095df8bae1dSRodney W. Grimes */ 1096df8bae1dSRodney W. Grimes VOP_STRATEGY(bp); 109726f9a767SRodney W. Grimes 109826f9a767SRodney W. Grimes /* 109926f9a767SRodney W. Grimes * wait for the sync I/O to complete 110026f9a767SRodney W. Grimes */ 110126f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 110226f9a767SRodney W. Grimes tsleep((caddr_t)bp, PVM, "swread", 0); 1103df8bae1dSRodney W. Grimes } 11041b119d9dSDavid Greenman 11051b119d9dSDavid Greenman if (bp->b_flags & B_ERROR) { 11061b119d9dSDavid Greenman printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 11071b119d9dSDavid Greenman bp->b_blkno, bp->b_bcount, bp->b_error); 11081b119d9dSDavid Greenman rv = VM_PAGER_FAIL; 11091b119d9dSDavid Greenman } else { 11101b119d9dSDavid Greenman rv = VM_PAGER_OK; 11111b119d9dSDavid Greenman } 111226f9a767SRodney W. Grimes bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 111326f9a767SRodney W. Grimes 111426f9a767SRodney W. Grimes --swp->sw_piip; 111526f9a767SRodney W. Grimes if (swp->sw_piip == 0) 111626f9a767SRodney W. Grimes wakeup((caddr_t) swp); 111726f9a767SRodney W. Grimes 111826f9a767SRodney W. Grimes /* 111926f9a767SRodney W. Grimes * relpbuf does this, but we maintain our own buffer 112026f9a767SRodney W. Grimes * list also... 112126f9a767SRodney W. Grimes */ 1122df8bae1dSRodney W. Grimes if (bp->b_vp) 1123df8bae1dSRodney W. Grimes brelvp(bp); 112426f9a767SRodney W. Grimes 1125df8bae1dSRodney W. Grimes splx(s); 112626f9a767SRodney W. Grimes --swb[reqpage]->swb_locked; 112726f9a767SRodney W. Grimes 112826f9a767SRodney W. Grimes /* 112926f9a767SRodney W. Grimes * remove the mapping for kernel virtual 113026f9a767SRodney W. Grimes */ 113116f62314SDavid Greenman pmap_qremove( kva, count); 113226f9a767SRodney W. Grimes 113326f9a767SRodney W. Grimes if (spc) { 113426f9a767SRodney W. Grimes /* 113526f9a767SRodney W. Grimes * if we have used an spc, we need to free it. 113626f9a767SRodney W. Grimes */ 113726f9a767SRodney W. Grimes if( bp->b_rcred != NOCRED) 113826f9a767SRodney W. Grimes crfree(bp->b_rcred); 113926f9a767SRodney W. Grimes if( bp->b_wcred != NOCRED) 114026f9a767SRodney W. Grimes crfree(bp->b_wcred); 114126f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 114226f9a767SRodney W. Grimes if (swap_pager_needflags & SWAP_FREE_NEEDED) { 114326f9a767SRodney W. Grimes swap_pager_needflags &= ~SWAP_FREE_NEEDED; 114426f9a767SRodney W. Grimes wakeup((caddr_t)&swap_pager_free); 114526f9a767SRodney W. Grimes } 114626f9a767SRodney W. Grimes } else { 114726f9a767SRodney W. Grimes /* 114826f9a767SRodney W. Grimes * release the physical I/O buffer 114926f9a767SRodney W. Grimes */ 115026f9a767SRodney W. Grimes relpbuf(bp); 115126f9a767SRodney W. Grimes /* 115226f9a767SRodney W. Grimes * finish up input if everything is ok 115326f9a767SRodney W. Grimes */ 115426f9a767SRodney W. Grimes if( rv == VM_PAGER_OK) { 115526f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 115626f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 115726f9a767SRodney W. Grimes m[i]->flags |= PG_CLEAN; 115826f9a767SRodney W. Grimes m[i]->flags &= ~PG_LAUNDRY; 115926f9a767SRodney W. Grimes if (i != reqpage) { 116026f9a767SRodney W. Grimes /* 116126f9a767SRodney W. Grimes * whether or not to leave the page activated 116226f9a767SRodney W. Grimes * is up in the air, but we should put the page 116326f9a767SRodney W. Grimes * on a page queue somewhere. (it already is in 116426f9a767SRodney W. Grimes * the object). 116526f9a767SRodney W. Grimes * After some emperical results, it is best 116626f9a767SRodney W. Grimes * to deactivate the readahead pages. 116726f9a767SRodney W. Grimes */ 116826f9a767SRodney W. Grimes vm_page_deactivate(m[i]); 116926f9a767SRodney W. Grimes 117026f9a767SRodney W. Grimes /* 117126f9a767SRodney W. Grimes * just in case someone was asking for this 117226f9a767SRodney W. Grimes * page we now tell them that it is ok to use 117326f9a767SRodney W. Grimes */ 117426f9a767SRodney W. Grimes m[i]->flags &= ~PG_FAKE; 117526f9a767SRodney W. Grimes PAGE_WAKEUP(m[i]); 117626f9a767SRodney W. Grimes } 117726f9a767SRodney W. Grimes } 11782e1e24ddSDavid Greenman /* 11792e1e24ddSDavid Greenman * If we're out of swap space, then attempt to free 11802e1e24ddSDavid Greenman * some whenever pages are brought in. We must clear 11812e1e24ddSDavid Greenman * the clean flag so that the page contents will be 11822e1e24ddSDavid Greenman * preserved. 11832e1e24ddSDavid Greenman */ 118426f9a767SRodney W. Grimes if (swap_pager_full) { 11852e1e24ddSDavid Greenman for (i = 0; i < count; i++) { 11862e1e24ddSDavid Greenman m[i]->flags &= ~PG_CLEAN; 11872e1e24ddSDavid Greenman } 118826f9a767SRodney W. Grimes _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 118926f9a767SRodney W. Grimes } 119026f9a767SRodney W. Grimes } else { 119126f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 119226f9a767SRodney W. Grimes } 119326f9a767SRodney W. Grimes } 1194df8bae1dSRodney W. Grimes return(rv); 1195df8bae1dSRodney W. Grimes } 1196df8bae1dSRodney W. Grimes 119726f9a767SRodney W. Grimes int 119826f9a767SRodney W. Grimes swap_pager_output(swp, m, count, flags, rtvals) 119926f9a767SRodney W. Grimes register sw_pager_t swp; 120026f9a767SRodney W. Grimes vm_page_t *m; 120126f9a767SRodney W. Grimes int count; 120226f9a767SRodney W. Grimes int flags; 120326f9a767SRodney W. Grimes int *rtvals; 1204df8bae1dSRodney W. Grimes { 120526f9a767SRodney W. Grimes register struct buf *bp; 120626f9a767SRodney W. Grimes sw_blk_t swb[count]; 120726f9a767SRodney W. Grimes register int s; 120826f9a767SRodney W. Grimes int i, j, ix; 120926f9a767SRodney W. Grimes boolean_t rv; 121026f9a767SRodney W. Grimes vm_offset_t kva, off, foff; 121126f9a767SRodney W. Grimes swp_clean_t spc; 121226f9a767SRodney W. Grimes vm_offset_t paging_offset; 1213df8bae1dSRodney W. Grimes vm_object_t object; 121426f9a767SRodney W. Grimes int reqaddr[count]; 121526f9a767SRodney W. Grimes int failed; 1216df8bae1dSRodney W. Grimes 1217df8bae1dSRodney W. Grimes /* 121826f9a767SRodney W. Grimes if( count > 1) 121926f9a767SRodney W. Grimes printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1220df8bae1dSRodney W. Grimes */ 12215663e6deSDavid Greenman if( vm_swap_size == 0) { 12225663e6deSDavid Greenman for(i=0;i<count;i++) 12235663e6deSDavid Greenman rtvals[i] = VM_PAGER_FAIL; 12245663e6deSDavid Greenman return VM_PAGER_FAIL; 12255663e6deSDavid Greenman } 12265663e6deSDavid Greenman 122726f9a767SRodney W. Grimes spc = NULL; 122826f9a767SRodney W. Grimes 122926f9a767SRodney W. Grimes object = m[0]->object; 123026f9a767SRodney W. Grimes paging_offset = object->paging_offset; 123126f9a767SRodney W. Grimes 123226f9a767SRodney W. Grimes failed = 0; 123326f9a767SRodney W. Grimes for(j=0;j<count;j++) { 123426f9a767SRodney W. Grimes foff = m[j]->offset + paging_offset; 123526f9a767SRodney W. Grimes ix = swap_pager_block_index(swp, foff); 123626f9a767SRodney W. Grimes swb[j] = 0; 123726f9a767SRodney W. Grimes if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 123826f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_FAIL; 123926f9a767SRodney W. Grimes failed = 1; 124026f9a767SRodney W. Grimes continue; 124126f9a767SRodney W. Grimes } else { 124226f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_OK; 124326f9a767SRodney W. Grimes } 124426f9a767SRodney W. Grimes swb[j] = &swp->sw_blocks[ix]; 124526f9a767SRodney W. Grimes ++swb[j]->swb_locked; 124626f9a767SRodney W. Grimes if( failed) { 124726f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_FAIL; 124826f9a767SRodney W. Grimes continue; 124926f9a767SRodney W. Grimes } 125026f9a767SRodney W. Grimes off = swap_pager_block_offset(swp, foff); 125126f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 125226f9a767SRodney W. Grimes if( reqaddr[j] == SWB_EMPTY) { 125326f9a767SRodney W. Grimes int blk; 125426f9a767SRodney W. Grimes int tries; 125526f9a767SRodney W. Grimes int ntoget; 125626f9a767SRodney W. Grimes tries = 0; 1257df8bae1dSRodney W. Grimes s = splbio(); 125826f9a767SRodney W. Grimes 1259df8bae1dSRodney W. Grimes /* 126026f9a767SRodney W. Grimes * if any other pages have been allocated in this block, we 126126f9a767SRodney W. Grimes * only try to get one page. 1262df8bae1dSRodney W. Grimes */ 126326f9a767SRodney W. Grimes for (i = 0; i < SWB_NPAGES; i++) { 126426f9a767SRodney W. Grimes if (swb[j]->swb_block[i] != SWB_EMPTY) 1265df8bae1dSRodney W. Grimes break; 1266df8bae1dSRodney W. Grimes } 126726f9a767SRodney W. Grimes 126826f9a767SRodney W. Grimes 126926f9a767SRodney W. Grimes ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 127026f9a767SRodney W. Grimes /* 127126f9a767SRodney W. Grimes * this code is alittle conservative, but works 127226f9a767SRodney W. Grimes * (the intent of this code is to allocate small chunks 127326f9a767SRodney W. Grimes * for small objects) 127426f9a767SRodney W. Grimes */ 127526f9a767SRodney W. Grimes if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 127626f9a767SRodney W. Grimes ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 127726f9a767SRodney W. Grimes } 127826f9a767SRodney W. Grimes 127926f9a767SRodney W. Grimes retrygetspace: 128026f9a767SRodney W. Grimes if (!swap_pager_full && ntoget > 1 && 128126f9a767SRodney W. Grimes swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 128226f9a767SRodney W. Grimes 128326f9a767SRodney W. Grimes for (i = 0; i < ntoget; i++) { 128426f9a767SRodney W. Grimes swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 128526f9a767SRodney W. Grimes swb[j]->swb_valid = 0; 128626f9a767SRodney W. Grimes } 128726f9a767SRodney W. Grimes 128826f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 128926f9a767SRodney W. Grimes } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 129026f9a767SRodney W. Grimes &swb[j]->swb_block[off])) { 129126f9a767SRodney W. Grimes /* 129226f9a767SRodney W. Grimes * if the allocation has failed, we try to reclaim space and 129326f9a767SRodney W. Grimes * retry. 129426f9a767SRodney W. Grimes */ 129526f9a767SRodney W. Grimes if (++tries == 1) { 129626f9a767SRodney W. Grimes swap_pager_reclaim(); 129726f9a767SRodney W. Grimes goto retrygetspace; 129826f9a767SRodney W. Grimes } 129926f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_AGAIN; 130026f9a767SRodney W. Grimes failed = 1; 130126f9a767SRodney W. Grimes } else { 130226f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 130326f9a767SRodney W. Grimes swb[j]->swb_valid &= ~(1<<off); 1304df8bae1dSRodney W. Grimes } 1305df8bae1dSRodney W. Grimes splx(s); 130626f9a767SRodney W. Grimes } 130726f9a767SRodney W. Grimes } 130826f9a767SRodney W. Grimes 130926f9a767SRodney W. Grimes /* 131026f9a767SRodney W. Grimes * search forwards for the last contiguous page to transfer 131126f9a767SRodney W. Grimes */ 131226f9a767SRodney W. Grimes failed = 0; 131326f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 131426f9a767SRodney W. Grimes if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 131526f9a767SRodney W. Grimes (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 131626f9a767SRodney W. Grimes (rtvals[i] != VM_PAGER_OK)) { 131726f9a767SRodney W. Grimes failed = 1; 131826f9a767SRodney W. Grimes if( rtvals[i] == VM_PAGER_OK) 131926f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_AGAIN; 132026f9a767SRodney W. Grimes } 132126f9a767SRodney W. Grimes } 132226f9a767SRodney W. Grimes 132326f9a767SRodney W. Grimes for(i = 0; i < count; i++) { 132426f9a767SRodney W. Grimes if( rtvals[i] != VM_PAGER_OK) { 132526f9a767SRodney W. Grimes if( swb[i]) 132626f9a767SRodney W. Grimes --swb[i]->swb_locked; 132726f9a767SRodney W. Grimes } 132826f9a767SRodney W. Grimes } 132926f9a767SRodney W. Grimes 133026f9a767SRodney W. Grimes for(i = 0; i < count; i++) 133126f9a767SRodney W. Grimes if( rtvals[i] != VM_PAGER_OK) 133226f9a767SRodney W. Grimes break; 133326f9a767SRodney W. Grimes 133426f9a767SRodney W. Grimes if( i == 0) { 133526f9a767SRodney W. Grimes return VM_PAGER_AGAIN; 133626f9a767SRodney W. Grimes } 133726f9a767SRodney W. Grimes 133826f9a767SRodney W. Grimes count = i; 133926f9a767SRodney W. Grimes for(i=0;i<count;i++) { 134026f9a767SRodney W. Grimes if( reqaddr[i] == SWB_EMPTY) 134126f9a767SRodney W. Grimes printf("I/O to empty block????\n"); 134226f9a767SRodney W. Grimes } 134326f9a767SRodney W. Grimes 134426f9a767SRodney W. Grimes /* 134526f9a767SRodney W. Grimes */ 134626f9a767SRodney W. Grimes 134726f9a767SRodney W. Grimes /* 134826f9a767SRodney W. Grimes * For synchronous writes, we clean up 134926f9a767SRodney W. Grimes * all completed async pageouts. 135026f9a767SRodney W. Grimes */ 135126f9a767SRodney W. Grimes if ((flags & B_ASYNC) == 0) { 135226f9a767SRodney W. Grimes swap_pager_clean(); 135326f9a767SRodney W. Grimes } 135426f9a767SRodney W. Grimes 135526f9a767SRodney W. Grimes kva = 0; 135626f9a767SRodney W. Grimes 135726f9a767SRodney W. Grimes /* 135826f9a767SRodney W. Grimes * we allocate a new kva for transfers > 1 page 135926f9a767SRodney W. Grimes * but for transfers == 1 page, the swap_pager_free list contains 136026f9a767SRodney W. Grimes * entries that have pre-allocated kva's (for efficiency). 136116f62314SDavid Greenman * NOTE -- we do not use the physical buffer pool or the 136216f62314SDavid Greenman * preallocated associated kva's because of the potential for 136316f62314SDavid Greenman * deadlock. This is very subtile -- but deadlocks or resource 136416f62314SDavid Greenman * contention must be avoided on pageouts -- or your system will 136516f62314SDavid Greenman * sleep (forever) !!! 136626f9a767SRodney W. Grimes */ 1367fff93ab6SDavid Greenman /* 136826f9a767SRodney W. Grimes if ( count > 1) { 136926f9a767SRodney W. Grimes kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 137026f9a767SRodney W. Grimes if( !kva) { 137126f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 137226f9a767SRodney W. Grimes if( swb[i]) 137326f9a767SRodney W. Grimes --swb[i]->swb_locked; 137426f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_AGAIN; 137526f9a767SRodney W. Grimes } 137626f9a767SRodney W. Grimes return VM_PAGER_AGAIN; 137726f9a767SRodney W. Grimes } 137826f9a767SRodney W. Grimes } 1379fff93ab6SDavid Greenman */ 138026f9a767SRodney W. Grimes 138126f9a767SRodney W. Grimes /* 138226f9a767SRodney W. Grimes * get a swap pager clean data structure, block until we get it 138326f9a767SRodney W. Grimes */ 138426f9a767SRodney W. Grimes if (swap_pager_free.tqh_first == NULL) { 138526f9a767SRodney W. Grimes s = splbio(); 138626f9a767SRodney W. Grimes if( curproc == pageproc) 138726f9a767SRodney W. Grimes (void) swap_pager_clean(); 138826f9a767SRodney W. Grimes else 138926f9a767SRodney W. Grimes wakeup((caddr_t) &vm_pages_needed); 139026f9a767SRodney W. Grimes while (swap_pager_free.tqh_first == NULL) { 139126f9a767SRodney W. Grimes swap_pager_needflags |= SWAP_FREE_NEEDED; 139226f9a767SRodney W. Grimes tsleep((caddr_t)&swap_pager_free, 139326f9a767SRodney W. Grimes PVM, "swpfre", 0); 139426f9a767SRodney W. Grimes if( curproc == pageproc) 139526f9a767SRodney W. Grimes (void) swap_pager_clean(); 139626f9a767SRodney W. Grimes else 139726f9a767SRodney W. Grimes wakeup((caddr_t) &vm_pages_needed); 139826f9a767SRodney W. Grimes } 139926f9a767SRodney W. Grimes splx(s); 140026f9a767SRodney W. Grimes } 140126f9a767SRodney W. Grimes 140226f9a767SRodney W. Grimes spc = swap_pager_free.tqh_first; 140326f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1404fff93ab6SDavid Greenman 140526f9a767SRodney W. Grimes kva = spc->spc_kva; 140626f9a767SRodney W. Grimes 140726f9a767SRodney W. Grimes /* 140826f9a767SRodney W. Grimes * map our page(s) into kva for I/O 140926f9a767SRodney W. Grimes */ 141016f62314SDavid Greenman pmap_qenter(kva, m, count); 141126f9a767SRodney W. Grimes 141226f9a767SRodney W. Grimes /* 141326f9a767SRodney W. Grimes * get the base I/O offset into the swap file 141426f9a767SRodney W. Grimes */ 141526f9a767SRodney W. Grimes for(i=0;i<count;i++) { 141626f9a767SRodney W. Grimes foff = m[i]->offset + paging_offset; 141726f9a767SRodney W. Grimes off = swap_pager_block_offset(swp, foff); 141826f9a767SRodney W. Grimes /* 141926f9a767SRodney W. Grimes * if we are setting the valid bit anew, 142026f9a767SRodney W. Grimes * then diminish the swap free space 142126f9a767SRodney W. Grimes */ 142226f9a767SRodney W. Grimes if( (swb[i]->swb_valid & (1 << off)) == 0) 142326f9a767SRodney W. Grimes vm_swap_size -= btodb(PAGE_SIZE); 142426f9a767SRodney W. Grimes 142526f9a767SRodney W. Grimes /* 142626f9a767SRodney W. Grimes * set the valid bit 142726f9a767SRodney W. Grimes */ 142826f9a767SRodney W. Grimes swb[i]->swb_valid |= (1 << off); 142926f9a767SRodney W. Grimes /* 143026f9a767SRodney W. Grimes * and unlock the data structure 143126f9a767SRodney W. Grimes */ 143226f9a767SRodney W. Grimes --swb[i]->swb_locked; 143326f9a767SRodney W. Grimes } 143426f9a767SRodney W. Grimes 143526f9a767SRodney W. Grimes s = splbio(); 143626f9a767SRodney W. Grimes /* 143726f9a767SRodney W. Grimes * Get a swap buffer header and perform the IO 143826f9a767SRodney W. Grimes */ 143926f9a767SRodney W. Grimes bp = spc->spc_bp; 144026f9a767SRodney W. Grimes bzero(bp, sizeof *bp); 144126f9a767SRodney W. Grimes bp->b_spc = spc; 144226f9a767SRodney W. Grimes 144326f9a767SRodney W. Grimes bp->b_flags = B_BUSY; 144426f9a767SRodney W. Grimes bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 144526f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1446a481f200SDavid Greenman if( bp->b_rcred != NOCRED) 144726f9a767SRodney W. Grimes crhold(bp->b_rcred); 1448a481f200SDavid Greenman if( bp->b_wcred != NOCRED) 144926f9a767SRodney W. Grimes crhold(bp->b_wcred); 1450a481f200SDavid Greenman bp->b_data = (caddr_t) kva; 145126f9a767SRodney W. Grimes bp->b_blkno = reqaddr[0]; 145226f9a767SRodney W. Grimes bgetvp( swapdev_vp, bp); 145316f62314SDavid Greenman 145426f9a767SRodney W. Grimes bp->b_bcount = PAGE_SIZE*count; 145526f9a767SRodney W. Grimes bp->b_bufsize = PAGE_SIZE*count; 145626f9a767SRodney W. Grimes swapdev_vp->v_numoutput++; 145726f9a767SRodney W. Grimes 145826f9a767SRodney W. Grimes /* 145926f9a767SRodney W. Grimes * If this is an async write we set up additional buffer fields 146026f9a767SRodney W. Grimes * and place a "cleaning" entry on the inuse queue. 146126f9a767SRodney W. Grimes */ 146226f9a767SRodney W. Grimes if ( flags & B_ASYNC ) { 146326f9a767SRodney W. Grimes spc->spc_flags = 0; 146426f9a767SRodney W. Grimes spc->spc_swp = swp; 146526f9a767SRodney W. Grimes for(i=0;i<count;i++) 146626f9a767SRodney W. Grimes spc->spc_m[i] = m[i]; 146726f9a767SRodney W. Grimes spc->spc_count = count; 146826f9a767SRodney W. Grimes /* 146926f9a767SRodney W. Grimes * the completion routine for async writes 147026f9a767SRodney W. Grimes */ 147126f9a767SRodney W. Grimes bp->b_flags |= B_CALL; 147226f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone; 147326f9a767SRodney W. Grimes bp->b_dirtyoff = 0; 147426f9a767SRodney W. Grimes bp->b_dirtyend = bp->b_bcount; 147526f9a767SRodney W. Grimes swp->sw_poip++; 147626f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 147726f9a767SRodney W. Grimes } else { 147826f9a767SRodney W. Grimes swp->sw_poip++; 147926f9a767SRodney W. Grimes bp->b_flags |= B_CALL; 148026f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone1; 148126f9a767SRodney W. Grimes } 1482976e77fcSDavid Greenman 1483976e77fcSDavid Greenman cnt.v_swapout++; 1484976e77fcSDavid Greenman cnt.v_swappgsout += count; 148526f9a767SRodney W. Grimes /* 148626f9a767SRodney W. Grimes * perform the I/O 148726f9a767SRodney W. Grimes */ 148826f9a767SRodney W. Grimes VOP_STRATEGY(bp); 148926f9a767SRodney W. Grimes if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 149026f9a767SRodney W. Grimes if ((bp->b_flags & B_DONE) == B_DONE) { 149126f9a767SRodney W. Grimes swap_pager_clean(); 149226f9a767SRodney W. Grimes } 149326f9a767SRodney W. Grimes splx(s); 149426f9a767SRodney W. Grimes for(i=0;i<count;i++) { 149526f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_PEND; 149626f9a767SRodney W. Grimes } 149726f9a767SRodney W. Grimes return VM_PAGER_PEND; 149826f9a767SRodney W. Grimes } 149926f9a767SRodney W. Grimes 150026f9a767SRodney W. Grimes /* 150126f9a767SRodney W. Grimes * wait for the sync I/O to complete 150226f9a767SRodney W. Grimes */ 150326f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 150426f9a767SRodney W. Grimes tsleep((caddr_t)bp, PVM, "swwrt", 0); 150526f9a767SRodney W. Grimes } 15061b119d9dSDavid Greenman if (bp->b_flags & B_ERROR) { 15071b119d9dSDavid Greenman printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 15081b119d9dSDavid Greenman bp->b_blkno, bp->b_bcount, bp->b_error); 15091b119d9dSDavid Greenman rv = VM_PAGER_FAIL; 15101b119d9dSDavid Greenman } else { 15111b119d9dSDavid Greenman rv = VM_PAGER_OK; 15121b119d9dSDavid Greenman } 151326f9a767SRodney W. Grimes bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 151426f9a767SRodney W. Grimes 151526f9a767SRodney W. Grimes --swp->sw_poip; 151626f9a767SRodney W. Grimes if (swp->sw_poip == 0) 151726f9a767SRodney W. Grimes wakeup((caddr_t) swp); 151826f9a767SRodney W. Grimes 151926f9a767SRodney W. Grimes if (bp->b_vp) 152026f9a767SRodney W. Grimes brelvp(bp); 152126f9a767SRodney W. Grimes 152226f9a767SRodney W. Grimes splx(s); 152326f9a767SRodney W. Grimes 152426f9a767SRodney W. Grimes /* 152526f9a767SRodney W. Grimes * remove the mapping for kernel virtual 152626f9a767SRodney W. Grimes */ 152716f62314SDavid Greenman pmap_qremove( kva, count); 152826f9a767SRodney W. Grimes 152926f9a767SRodney W. Grimes /* 153026f9a767SRodney W. Grimes * if we have written the page, then indicate that the page 153126f9a767SRodney W. Grimes * is clean. 153226f9a767SRodney W. Grimes */ 153326f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 153426f9a767SRodney W. Grimes for(i=0;i<count;i++) { 153526f9a767SRodney W. Grimes if( rtvals[i] == VM_PAGER_OK) { 153626f9a767SRodney W. Grimes m[i]->flags |= PG_CLEAN; 153726f9a767SRodney W. Grimes m[i]->flags &= ~PG_LAUNDRY; 153826f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 153926f9a767SRodney W. Grimes /* 154026f9a767SRodney W. Grimes * optimization, if a page has been read during the 154126f9a767SRodney W. Grimes * pageout process, we activate it. 154226f9a767SRodney W. Grimes */ 154326f9a767SRodney W. Grimes if ( (m[i]->flags & PG_ACTIVE) == 0 && 154426f9a767SRodney W. Grimes pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 154526f9a767SRodney W. Grimes vm_page_activate(m[i]); 154626f9a767SRodney W. Grimes } 154726f9a767SRodney W. Grimes } 154826f9a767SRodney W. Grimes } else { 154926f9a767SRodney W. Grimes for(i=0;i<count;i++) { 155026f9a767SRodney W. Grimes rtvals[i] = rv; 155126f9a767SRodney W. Grimes m[i]->flags |= PG_LAUNDRY; 155226f9a767SRodney W. Grimes } 155326f9a767SRodney W. Grimes } 155426f9a767SRodney W. Grimes 155526f9a767SRodney W. Grimes if( bp->b_rcred != NOCRED) 155626f9a767SRodney W. Grimes crfree(bp->b_rcred); 155726f9a767SRodney W. Grimes if( bp->b_wcred != NOCRED) 155826f9a767SRodney W. Grimes crfree(bp->b_wcred); 155926f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 156026f9a767SRodney W. Grimes if (swap_pager_needflags & SWAP_FREE_NEEDED) { 156126f9a767SRodney W. Grimes swap_pager_needflags &= ~SWAP_FREE_NEEDED; 156226f9a767SRodney W. Grimes wakeup((caddr_t)&swap_pager_free); 156326f9a767SRodney W. Grimes } 156426f9a767SRodney W. Grimes 156526f9a767SRodney W. Grimes return(rv); 156626f9a767SRodney W. Grimes } 156726f9a767SRodney W. Grimes 156826f9a767SRodney W. Grimes boolean_t 156926f9a767SRodney W. Grimes swap_pager_clean() 157026f9a767SRodney W. Grimes { 157126f9a767SRodney W. Grimes register swp_clean_t spc, tspc; 157226f9a767SRodney W. Grimes register int s; 157326f9a767SRodney W. Grimes 157426f9a767SRodney W. Grimes tspc = NULL; 157526f9a767SRodney W. Grimes if (swap_pager_done.tqh_first == NULL) 157626f9a767SRodney W. Grimes return FALSE; 157726f9a767SRodney W. Grimes for (;;) { 157826f9a767SRodney W. Grimes s = splbio(); 157926f9a767SRodney W. Grimes /* 158026f9a767SRodney W. Grimes * Look up and removal from done list must be done 158126f9a767SRodney W. Grimes * at splbio() to avoid conflicts with swap_pager_iodone. 158226f9a767SRodney W. Grimes */ 158305f0fdd2SPoul-Henning Kamp while ((spc = swap_pager_done.tqh_first) != 0) { 1584fff93ab6SDavid Greenman pmap_qremove( spc->spc_kva, spc->spc_count); 158526f9a767SRodney W. Grimes swap_pager_finish(spc); 158626f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 158726f9a767SRodney W. Grimes goto doclean; 158826f9a767SRodney W. Grimes } 1589df8bae1dSRodney W. Grimes 1590df8bae1dSRodney W. Grimes /* 1591df8bae1dSRodney W. Grimes * No operations done, thats all we can do for now. 1592df8bae1dSRodney W. Grimes */ 159326f9a767SRodney W. Grimes 159426f9a767SRodney W. Grimes splx(s); 1595df8bae1dSRodney W. Grimes break; 1596df8bae1dSRodney W. Grimes 1597df8bae1dSRodney W. Grimes /* 159826f9a767SRodney W. Grimes * The desired page was found to be busy earlier in 159926f9a767SRodney W. Grimes * the scan but has since completed. 1600df8bae1dSRodney W. Grimes */ 160126f9a767SRodney W. Grimes doclean: 160226f9a767SRodney W. Grimes if (tspc && tspc == spc) { 160326f9a767SRodney W. Grimes tspc = NULL; 160426f9a767SRodney W. Grimes } 160526f9a767SRodney W. Grimes spc->spc_flags = 0; 160626f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 160726f9a767SRodney W. Grimes if (swap_pager_needflags & SWAP_FREE_NEEDED) { 160826f9a767SRodney W. Grimes swap_pager_needflags &= ~SWAP_FREE_NEEDED; 160926f9a767SRodney W. Grimes wakeup((caddr_t)&swap_pager_free); 161026f9a767SRodney W. Grimes } 161126f9a767SRodney W. Grimes ++cleandone; 161226f9a767SRodney W. Grimes splx(s); 161326f9a767SRodney W. Grimes } 161426f9a767SRodney W. Grimes 161526f9a767SRodney W. Grimes return(tspc ? TRUE : FALSE); 161626f9a767SRodney W. Grimes } 161726f9a767SRodney W. Grimes 161826f9a767SRodney W. Grimes void 161926f9a767SRodney W. Grimes swap_pager_finish(spc) 162026f9a767SRodney W. Grimes register swp_clean_t spc; 162126f9a767SRodney W. Grimes { 162226f9a767SRodney W. Grimes vm_object_t object = spc->spc_m[0]->object; 162326f9a767SRodney W. Grimes int i; 162426f9a767SRodney W. Grimes 162526f9a767SRodney W. Grimes if ((object->paging_in_progress -= spc->spc_count) == 0) 162626f9a767SRodney W. Grimes thread_wakeup((int) object); 1627df8bae1dSRodney W. Grimes 1628df8bae1dSRodney W. Grimes /* 162926f9a767SRodney W. Grimes * If no error mark as clean and inform the pmap system. 163026f9a767SRodney W. Grimes * If error, mark as dirty so we will try again. 163126f9a767SRodney W. Grimes * (XXX could get stuck doing this, should give up after awhile) 1632df8bae1dSRodney W. Grimes */ 1633df8bae1dSRodney W. Grimes if (spc->spc_flags & SPC_ERROR) { 163426f9a767SRodney W. Grimes for(i=0;i<spc->spc_count;i++) { 163505f0fdd2SPoul-Henning Kamp printf("swap_pager_finish: clean of page %lx failed\n", 163605f0fdd2SPoul-Henning Kamp (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); 163726f9a767SRodney W. Grimes spc->spc_m[i]->flags |= PG_LAUNDRY; 163826f9a767SRodney W. Grimes } 1639df8bae1dSRodney W. Grimes } else { 164026f9a767SRodney W. Grimes for(i=0;i<spc->spc_count;i++) { 164126f9a767SRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 164226f9a767SRodney W. Grimes spc->spc_m[i]->flags |= PG_CLEAN; 1643df8bae1dSRodney W. Grimes } 1644df8bae1dSRodney W. Grimes } 1645df8bae1dSRodney W. Grimes 164626f9a767SRodney W. Grimes 164726f9a767SRodney W. Grimes for(i=0;i<spc->spc_count;i++) { 1648df8bae1dSRodney W. Grimes /* 164926f9a767SRodney W. Grimes * we wakeup any processes that are waiting on 165026f9a767SRodney W. Grimes * these pages. 1651df8bae1dSRodney W. Grimes */ 165226f9a767SRodney W. Grimes PAGE_WAKEUP(spc->spc_m[i]); 1653df8bae1dSRodney W. Grimes } 165426f9a767SRodney W. Grimes nswiodone -= spc->spc_count; 1655df8bae1dSRodney W. Grimes 1656df8bae1dSRodney W. Grimes return; 165726f9a767SRodney W. Grimes } 1658df8bae1dSRodney W. Grimes 165926f9a767SRodney W. Grimes /* 166026f9a767SRodney W. Grimes * swap_pager_iodone 166126f9a767SRodney W. Grimes */ 166226f9a767SRodney W. Grimes void 1663df8bae1dSRodney W. Grimes swap_pager_iodone(bp) 1664df8bae1dSRodney W. Grimes register struct buf *bp; 1665df8bae1dSRodney W. Grimes { 1666df8bae1dSRodney W. Grimes register swp_clean_t spc; 1667df8bae1dSRodney W. Grimes int s; 1668df8bae1dSRodney W. Grimes 1669df8bae1dSRodney W. Grimes s = splbio(); 167026f9a767SRodney W. Grimes spc = (swp_clean_t) bp->b_spc; 167126f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 167226f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 167326f9a767SRodney W. Grimes if (bp->b_flags & B_ERROR) { 1674df8bae1dSRodney W. Grimes spc->spc_flags |= SPC_ERROR; 16751b119d9dSDavid Greenman printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d", 16761b119d9dSDavid Greenman (bp->b_flags & B_READ) ? "pagein" : "pageout", 167705f0fdd2SPoul-Henning Kamp bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); 1678df8bae1dSRodney W. Grimes } 1679df8bae1dSRodney W. Grimes 168026f9a767SRodney W. Grimes /* 168126f9a767SRodney W. Grimes if ((bp->b_flags & B_READ) == 0) 168226f9a767SRodney W. Grimes vwakeup(bp); 168326f9a767SRodney W. Grimes */ 168426f9a767SRodney W. Grimes 168526f9a767SRodney W. Grimes bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 168626f9a767SRodney W. Grimes if (bp->b_vp) { 168726f9a767SRodney W. Grimes brelvp(bp); 168826f9a767SRodney W. Grimes } 168926f9a767SRodney W. Grimes if( bp->b_rcred != NOCRED) 169026f9a767SRodney W. Grimes crfree(bp->b_rcred); 169126f9a767SRodney W. Grimes if( bp->b_wcred != NOCRED) 169226f9a767SRodney W. Grimes crfree(bp->b_wcred); 169326f9a767SRodney W. Grimes 169426f9a767SRodney W. Grimes nswiodone += spc->spc_count; 169526f9a767SRodney W. Grimes if (--spc->spc_swp->sw_poip == 0) { 169626f9a767SRodney W. Grimes wakeup((caddr_t)spc->spc_swp); 169726f9a767SRodney W. Grimes } 169826f9a767SRodney W. Grimes 169926f9a767SRodney W. Grimes if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 170026f9a767SRodney W. Grimes swap_pager_inuse.tqh_first == 0) { 170126f9a767SRodney W. Grimes swap_pager_needflags &= ~SWAP_FREE_NEEDED; 170226f9a767SRodney W. Grimes wakeup((caddr_t)&swap_pager_free); 170326f9a767SRodney W. Grimes wakeup((caddr_t)&vm_pages_needed); 170426f9a767SRodney W. Grimes } 170526f9a767SRodney W. Grimes 170626f9a767SRodney W. Grimes if (vm_pageout_pages_needed) { 170726f9a767SRodney W. Grimes wakeup((caddr_t)&vm_pageout_pages_needed); 170826f9a767SRodney W. Grimes } 170926f9a767SRodney W. Grimes 171026f9a767SRodney W. Grimes if ((swap_pager_inuse.tqh_first == NULL) || 171126f9a767SRodney W. Grimes (cnt.v_free_count < cnt.v_free_min && 171226f9a767SRodney W. Grimes nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 171326f9a767SRodney W. Grimes wakeup((caddr_t)&vm_pages_needed); 171426f9a767SRodney W. Grimes } 171526f9a767SRodney W. Grimes splx(s); 171626f9a767SRodney W. Grimes } 171726f9a767SRodney W. Grimes 171826f9a767SRodney W. Grimes /* 171926f9a767SRodney W. Grimes * return true if any swap control structures can be allocated 172026f9a767SRodney W. Grimes */ 172126f9a767SRodney W. Grimes int 172226f9a767SRodney W. Grimes swap_pager_ready() { 172326f9a767SRodney W. Grimes if( swap_pager_free.tqh_first) 172426f9a767SRodney W. Grimes return 1; 172526f9a767SRodney W. Grimes else 172626f9a767SRodney W. Grimes return 0; 172726f9a767SRodney W. Grimes } 1728