1df8bae1dSRodney W. Grimes /* 226f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 3df8bae1dSRodney W. Grimes * Copyright (c) 1990 University of Utah. 4df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 8df8bae1dSRodney W. Grimes * the Systems Programming Group of the University of Utah Computer 9df8bae1dSRodney W. Grimes * Science Department. 10df8bae1dSRodney W. Grimes * 11df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 12df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 13df8bae1dSRodney W. Grimes * are met: 14df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 15df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 16df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 18df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 19df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 20df8bae1dSRodney W. Grimes * must display the following acknowledgement: 21df8bae1dSRodney W. Grimes * This product includes software developed by the University of 22df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 23df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 24df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 25df8bae1dSRodney W. Grimes * without specific prior written permission. 26df8bae1dSRodney W. Grimes * 27df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37df8bae1dSRodney W. Grimes * SUCH DAMAGE. 38df8bae1dSRodney W. Grimes * 39df8bae1dSRodney W. Grimes * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40df8bae1dSRodney W. Grimes * 41df8bae1dSRodney W. Grimes * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42e47ed70bSJohn Dyson * $Id: swap_pager.c,v 1.88 1998/02/09 06:11:20 eivind Exp $ 43df8bae1dSRodney W. Grimes */ 44df8bae1dSRodney W. Grimes 45df8bae1dSRodney W. Grimes /* 46df8bae1dSRodney W. Grimes * Quick hack to page to dedicated partition(s). 47df8bae1dSRodney W. Grimes * TODO: 48df8bae1dSRodney W. Grimes * Add multiprocessor locks 49df8bae1dSRodney W. Grimes * Deal with async writes in a better fashion 50df8bae1dSRodney W. Grimes */ 51df8bae1dSRodney W. Grimes 52df8bae1dSRodney W. Grimes #include <sys/param.h> 53df8bae1dSRodney W. Grimes #include <sys/systm.h> 5464abb5a5SDavid Greenman #include <sys/kernel.h> 55df8bae1dSRodney W. Grimes #include <sys/proc.h> 56df8bae1dSRodney W. Grimes #include <sys/buf.h> 57df8bae1dSRodney W. Grimes #include <sys/vnode.h> 58df8bae1dSRodney W. Grimes #include <sys/malloc.h> 59efeaf95aSDavid Greenman #include <sys/vmmeter.h> 6026f9a767SRodney W. Grimes #include <sys/rlist.h> 61df8bae1dSRodney W. Grimes 62e47ed70bSJohn Dyson #ifndef MAX_PAGEOUT_CLUSTER 63e47ed70bSJohn Dyson #define MAX_PAGEOUT_CLUSTER 8 64e47ed70bSJohn Dyson #endif 65e47ed70bSJohn Dyson 66e47ed70bSJohn Dyson #ifndef NPENDINGIO 67e47ed70bSJohn Dyson #define NPENDINGIO 16 68e47ed70bSJohn Dyson #endif 69e47ed70bSJohn Dyson 70e47ed70bSJohn Dyson #define SWB_NPAGES MAX_PAGEOUT_CLUSTER 71e47ed70bSJohn Dyson 72df8bae1dSRodney W. Grimes #include <vm/vm.h> 73efeaf95aSDavid Greenman #include <vm/vm_prot.h> 74efeaf95aSDavid Greenman #include <vm/vm_object.h> 75df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 76efeaf95aSDavid Greenman #include <vm/vm_pager.h> 77df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 78df8bae1dSRodney W. Grimes #include <vm/swap_pager.h> 79efeaf95aSDavid Greenman #include <vm/vm_extern.h> 80df8bae1dSRodney W. Grimes 81f708ef1bSPoul-Henning Kamp static int nswiodone; 8226f9a767SRodney W. Grimes int swap_pager_full; 8326f9a767SRodney W. Grimes extern int vm_swap_size; 84b44e4b7aSJohn Dyson static int suggest_more_swap = 0; 85f5a12711SPoul-Henning Kamp static int no_swap_space = 1; 86e47ed70bSJohn Dyson static int max_pageout_cluster; 87836e5d13SJohn Dyson struct rlisthdr swaplist; 8826f9a767SRodney W. Grimes 89df8bae1dSRodney W. Grimes TAILQ_HEAD(swpclean, swpagerclean); 90df8bae1dSRodney W. Grimes 9126f9a767SRodney W. Grimes typedef struct swpagerclean *swp_clean_t; 9226f9a767SRodney W. Grimes 93f708ef1bSPoul-Henning Kamp static struct swpagerclean { 94df8bae1dSRodney W. Grimes TAILQ_ENTRY(swpagerclean) spc_list; 95df8bae1dSRodney W. Grimes int spc_flags; 96df8bae1dSRodney W. Grimes struct buf *spc_bp; 972a4895f4SDavid Greenman vm_object_t spc_object; 98df8bae1dSRodney W. Grimes vm_offset_t spc_kva; 99e736cd05SJohn Dyson int spc_first; 10026f9a767SRodney W. Grimes int spc_count; 10126f9a767SRodney W. Grimes vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 102df8bae1dSRodney W. Grimes } swcleanlist[NPENDINGIO]; 10326f9a767SRodney W. Grimes 10426f9a767SRodney W. Grimes 105df8bae1dSRodney W. Grimes /* spc_flags values */ 10626f9a767SRodney W. Grimes #define SPC_ERROR 0x01 107df8bae1dSRodney W. Grimes 10826f9a767SRodney W. Grimes #define SWB_EMPTY (-1) 109df8bae1dSRodney W. Grimes 110f708ef1bSPoul-Henning Kamp /* list of completed page cleans */ 111f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_done; 112f708ef1bSPoul-Henning Kamp 113f708ef1bSPoul-Henning Kamp /* list of pending page cleans */ 114f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_inuse; 115f708ef1bSPoul-Henning Kamp 116f708ef1bSPoul-Henning Kamp /* list of free pager clean structs */ 117f708ef1bSPoul-Henning Kamp static struct swpclean swap_pager_free; 118303b270bSEivind Eklund static int swap_pager_free_count; 119e47ed70bSJohn Dyson static int swap_pager_free_pending; 120f708ef1bSPoul-Henning Kamp 121f708ef1bSPoul-Henning Kamp /* list of "named" anon region objects */ 122f708ef1bSPoul-Henning Kamp static struct pagerlst swap_pager_object_list; 123f708ef1bSPoul-Henning Kamp 124f708ef1bSPoul-Henning Kamp /* list of "unnamed" anon region objects */ 125f708ef1bSPoul-Henning Kamp struct pagerlst swap_pager_un_object_list; 126df8bae1dSRodney W. Grimes 12726f9a767SRodney W. Grimes #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 128a1f6d91cSDavid Greenman #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 129f708ef1bSPoul-Henning Kamp static int swap_pager_needflags; 13026f9a767SRodney W. Grimes 131f5a12711SPoul-Henning Kamp static struct pagerlst *swp_qs[] = { 13224a1cce3SDavid Greenman &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 13326f9a767SRodney W. Grimes }; 13426f9a767SRodney W. Grimes 13524a1cce3SDavid Greenman /* 13624a1cce3SDavid Greenman * pagerops for OBJT_SWAP - "swap pager". 13724a1cce3SDavid Greenman */ 138ff98689dSBruce Evans static vm_object_t 139ff98689dSBruce Evans swap_pager_alloc __P((void *handle, vm_size_t size, 140a316d390SJohn Dyson vm_prot_t prot, vm_ooffset_t offset)); 141ff98689dSBruce Evans static void swap_pager_dealloc __P((vm_object_t object)); 142ff98689dSBruce Evans static boolean_t 143a316d390SJohn Dyson swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 144ff98689dSBruce Evans int *before, int *after)); 145f708ef1bSPoul-Henning Kamp static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 146ff98689dSBruce Evans static void swap_pager_init __P((void)); 147f708ef1bSPoul-Henning Kamp static void swap_pager_sync __P((void)); 148e47ed70bSJohn Dyson static void spc_free __P((swp_clean_t)); 149f708ef1bSPoul-Henning Kamp 150df8bae1dSRodney W. Grimes struct pagerops swappagerops = { 151df8bae1dSRodney W. Grimes swap_pager_init, 152df8bae1dSRodney W. Grimes swap_pager_alloc, 153df8bae1dSRodney W. Grimes swap_pager_dealloc, 15424a1cce3SDavid Greenman swap_pager_getpages, 15524a1cce3SDavid Greenman swap_pager_putpages, 15624a1cce3SDavid Greenman swap_pager_haspage, 15724a1cce3SDavid Greenman swap_pager_sync 158df8bae1dSRodney W. Grimes }; 159df8bae1dSRodney W. Grimes 160e47ed70bSJohn Dyson static int npendingio; 161f708ef1bSPoul-Henning Kamp static int dmmin; 162f708ef1bSPoul-Henning Kamp int dmmax; 16326f9a767SRodney W. Grimes 1648ba0c490SBruce Evans static int swap_pager_block_index __P((vm_pindex_t pindex)); 1658ba0c490SBruce Evans static int swap_pager_block_offset __P((vm_pindex_t pindex)); 166a316d390SJohn Dyson static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 167a316d390SJohn Dyson vm_pindex_t pindex, int *valid)); 168cac597e4SBruce Evans static void swap_pager_finish __P((swp_clean_t spc)); 169cac597e4SBruce Evans static void swap_pager_freepage __P((vm_page_t m)); 170cac597e4SBruce Evans static void swap_pager_free_swap __P((vm_object_t object)); 171cac597e4SBruce Evans static void swap_pager_freeswapspace __P((vm_object_t object, 172cac597e4SBruce Evans unsigned int from, 173cac597e4SBruce Evans unsigned int to)); 174cac597e4SBruce Evans static int swap_pager_getswapspace __P((vm_object_t object, 175cac597e4SBruce Evans unsigned int amount, 176a316d390SJohn Dyson daddr_t *rtval)); 177ff98689dSBruce Evans static void swap_pager_iodone __P((struct buf *)); 178cac597e4SBruce Evans static void swap_pager_iodone1 __P((struct buf *bp)); 179cac597e4SBruce Evans static void swap_pager_reclaim __P((void)); 180cac597e4SBruce Evans static void swap_pager_ridpages __P((vm_page_t *m, int count, 181cac597e4SBruce Evans int reqpage)); 182cac597e4SBruce Evans static void swap_pager_setvalid __P((vm_object_t object, 183cac597e4SBruce Evans vm_offset_t offset, int valid)); 184cac597e4SBruce Evans static void swapsizecheck __P((void)); 18524a1cce3SDavid Greenman 186de5f6a77SJohn Dyson #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 187de5f6a77SJohn Dyson 1880d94caffSDavid Greenman static inline void 1890d94caffSDavid Greenman swapsizecheck() 1900d94caffSDavid Greenman { 19126f9a767SRodney W. Grimes if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 192a1f6d91cSDavid Greenman if (swap_pager_full == 0) 1931af87c92SDavid Greenman printf("swap_pager: out of swap space\n"); 19426f9a767SRodney W. Grimes swap_pager_full = 1; 19526f9a767SRodney W. Grimes } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 19626f9a767SRodney W. Grimes swap_pager_full = 0; 19726f9a767SRodney W. Grimes } 19826f9a767SRodney W. Grimes 199f5a12711SPoul-Henning Kamp static void 200df8bae1dSRodney W. Grimes swap_pager_init() 201df8bae1dSRodney W. Grimes { 202e47ed70bSJohn Dyson int maxsafepending; 20324a1cce3SDavid Greenman TAILQ_INIT(&swap_pager_object_list); 20424a1cce3SDavid Greenman TAILQ_INIT(&swap_pager_un_object_list); 205df8bae1dSRodney W. Grimes 206df8bae1dSRodney W. Grimes /* 207df8bae1dSRodney W. Grimes * Initialize clean lists 208df8bae1dSRodney W. Grimes */ 209df8bae1dSRodney W. Grimes TAILQ_INIT(&swap_pager_inuse); 21026f9a767SRodney W. Grimes TAILQ_INIT(&swap_pager_done); 211df8bae1dSRodney W. Grimes TAILQ_INIT(&swap_pager_free); 2123091ee09SJohn Dyson swap_pager_free_count = 0; 21326f9a767SRodney W. Grimes 214df8bae1dSRodney W. Grimes /* 215df8bae1dSRodney W. Grimes * Calculate the swap allocation constants. 216df8bae1dSRodney W. Grimes */ 217e911eafcSPoul-Henning Kamp dmmin = PAGE_SIZE / DEV_BSIZE; 21826f9a767SRodney W. Grimes dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 219e47ed70bSJohn Dyson 220e47ed70bSJohn Dyson maxsafepending = cnt.v_free_min - cnt.v_free_reserved; 221e47ed70bSJohn Dyson npendingio = NPENDINGIO; 222e47ed70bSJohn Dyson max_pageout_cluster = MAX_PAGEOUT_CLUSTER; 223e47ed70bSJohn Dyson 224e47ed70bSJohn Dyson if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) { 225e47ed70bSJohn Dyson max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2; 226e47ed70bSJohn Dyson npendingio = maxsafepending / (2 * max_pageout_cluster); 227e47ed70bSJohn Dyson if (npendingio < 2) 228e47ed70bSJohn Dyson npendingio = 2; 229e47ed70bSJohn Dyson } 230df8bae1dSRodney W. Grimes } 231df8bae1dSRodney W. Grimes 23224a1cce3SDavid Greenman void 23324a1cce3SDavid Greenman swap_pager_swap_init() 234df8bae1dSRodney W. Grimes { 23526f9a767SRodney W. Grimes swp_clean_t spc; 23626f9a767SRodney W. Grimes struct buf *bp; 23724a1cce3SDavid Greenman int i; 2380d94caffSDavid Greenman 23926f9a767SRodney W. Grimes /* 2400d94caffSDavid Greenman * kva's are allocated here so that we dont need to keep doing 2410d94caffSDavid Greenman * kmem_alloc pageables at runtime 24226f9a767SRodney W. Grimes */ 24326f9a767SRodney W. Grimes for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 244e47ed70bSJohn Dyson spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster); 24526f9a767SRodney W. Grimes if (!spc->spc_kva) { 24626f9a767SRodney W. Grimes break; 24726f9a767SRodney W. Grimes } 248a1f6d91cSDavid Greenman spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 24926f9a767SRodney W. Grimes if (!spc->spc_bp) { 25026f9a767SRodney W. Grimes kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 25126f9a767SRodney W. Grimes break; 25226f9a767SRodney W. Grimes } 25326f9a767SRodney W. Grimes spc->spc_flags = 0; 25426f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 2553091ee09SJohn Dyson swap_pager_free_count++; 25626f9a767SRodney W. Grimes } 25726f9a767SRodney W. Grimes } 25824a1cce3SDavid Greenman 25924a1cce3SDavid Greenman int 26024a1cce3SDavid Greenman swap_pager_swp_alloc(object, wait) 26124a1cce3SDavid Greenman vm_object_t object; 26224a1cce3SDavid Greenman int wait; 26324a1cce3SDavid Greenman { 2642a4895f4SDavid Greenman sw_blk_t swb; 2652a4895f4SDavid Greenman int nblocks; 26624a1cce3SDavid Greenman int i, j; 26724a1cce3SDavid Greenman 268a316d390SJohn Dyson nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 2692a4895f4SDavid Greenman swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 2702a4895f4SDavid Greenman if (swb == NULL) 27124a1cce3SDavid Greenman return 1; 27224a1cce3SDavid Greenman 2732a4895f4SDavid Greenman for (i = 0; i < nblocks; i++) { 2742a4895f4SDavid Greenman swb[i].swb_valid = 0; 2752a4895f4SDavid Greenman swb[i].swb_locked = 0; 27626f9a767SRodney W. Grimes for (j = 0; j < SWB_NPAGES; j++) 2772a4895f4SDavid Greenman swb[i].swb_block[j] = SWB_EMPTY; 27826f9a767SRodney W. Grimes } 27926f9a767SRodney W. Grimes 2802a4895f4SDavid Greenman object->un_pager.swp.swp_nblocks = nblocks; 2812a4895f4SDavid Greenman object->un_pager.swp.swp_allocsize = 0; 2822a4895f4SDavid Greenman object->un_pager.swp.swp_blocks = swb; 2832a4895f4SDavid Greenman object->un_pager.swp.swp_poip = 0; 28424a1cce3SDavid Greenman 28524a1cce3SDavid Greenman if (object->handle != NULL) { 28624a1cce3SDavid Greenman TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 287df8bae1dSRodney W. Grimes } else { 28824a1cce3SDavid Greenman TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 289df8bae1dSRodney W. Grimes } 290df8bae1dSRodney W. Grimes 29124a1cce3SDavid Greenman return 0; 29224a1cce3SDavid Greenman } 29324a1cce3SDavid Greenman 29424a1cce3SDavid Greenman /* 2952a4895f4SDavid Greenman * Allocate an object and associated resources. 29624a1cce3SDavid Greenman * Note that if we are called from the pageout daemon (handle == NULL) 29724a1cce3SDavid Greenman * we should not wait for memory as it could resulting in deadlock. 29824a1cce3SDavid Greenman */ 299f5a12711SPoul-Henning Kamp static vm_object_t 300b9dcd593SBruce Evans swap_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot, 301b9dcd593SBruce Evans vm_ooffset_t offset) 30224a1cce3SDavid Greenman { 30324a1cce3SDavid Greenman vm_object_t object; 30424a1cce3SDavid Greenman 30524a1cce3SDavid Greenman /* 30624a1cce3SDavid Greenman * If this is a "named" anonymous region, look it up and use the 30724a1cce3SDavid Greenman * object if it exists, otherwise allocate a new one. 30824a1cce3SDavid Greenman */ 30924a1cce3SDavid Greenman if (handle) { 31024a1cce3SDavid Greenman object = vm_pager_object_lookup(&swap_pager_object_list, handle); 31124a1cce3SDavid Greenman if (object != NULL) { 31224a1cce3SDavid Greenman vm_object_reference(object); 31324a1cce3SDavid Greenman } else { 31424a1cce3SDavid Greenman /* 31524a1cce3SDavid Greenman * XXX - there is a race condition here. Two processes 31624a1cce3SDavid Greenman * can request the same named object simultaneuously, 31724a1cce3SDavid Greenman * and if one blocks for memory, the result is a disaster. 31824a1cce3SDavid Greenman * Probably quite rare, but is yet another reason to just 31924a1cce3SDavid Greenman * rip support of "named anonymous regions" out altogether. 32024a1cce3SDavid Greenman */ 321a316d390SJohn Dyson object = vm_object_allocate(OBJT_SWAP, 322aa8de40aSPoul-Henning Kamp OFF_TO_IDX(offset + PAGE_MASK) + size); 32324a1cce3SDavid Greenman object->handle = handle; 32424a1cce3SDavid Greenman (void) swap_pager_swp_alloc(object, M_WAITOK); 32524a1cce3SDavid Greenman } 32624a1cce3SDavid Greenman } else { 327a316d390SJohn Dyson object = vm_object_allocate(OBJT_SWAP, 328aa8de40aSPoul-Henning Kamp OFF_TO_IDX(offset + PAGE_MASK) + size); 32924a1cce3SDavid Greenman (void) swap_pager_swp_alloc(object, M_WAITOK); 33024a1cce3SDavid Greenman } 33124a1cce3SDavid Greenman 33224a1cce3SDavid Greenman return (object); 333df8bae1dSRodney W. Grimes } 334df8bae1dSRodney W. Grimes 33526f9a767SRodney W. Grimes /* 33626f9a767SRodney W. Grimes * returns disk block associated with pager and offset 33726f9a767SRodney W. Grimes * additionally, as a side effect returns a flag indicating 33826f9a767SRodney W. Grimes * if the block has been written 33926f9a767SRodney W. Grimes */ 34026f9a767SRodney W. Grimes 341a316d390SJohn Dyson inline static daddr_t * 342a316d390SJohn Dyson swap_pager_diskaddr(object, pindex, valid) 34324a1cce3SDavid Greenman vm_object_t object; 344a316d390SJohn Dyson vm_pindex_t pindex; 34526f9a767SRodney W. Grimes int *valid; 34626f9a767SRodney W. Grimes { 34726f9a767SRodney W. Grimes register sw_blk_t swb; 34826f9a767SRodney W. Grimes int ix; 34926f9a767SRodney W. Grimes 35026f9a767SRodney W. Grimes if (valid) 35126f9a767SRodney W. Grimes *valid = 0; 352a316d390SJohn Dyson ix = pindex / SWB_NPAGES; 3532a4895f4SDavid Greenman if ((ix >= object->un_pager.swp.swp_nblocks) || 354a316d390SJohn Dyson (pindex >= object->size)) { 35526f9a767SRodney W. Grimes return (FALSE); 35626f9a767SRodney W. Grimes } 3572a4895f4SDavid Greenman swb = &object->un_pager.swp.swp_blocks[ix]; 358a316d390SJohn Dyson ix = pindex % SWB_NPAGES; 35926f9a767SRodney W. Grimes if (valid) 36026f9a767SRodney W. Grimes *valid = swb->swb_valid & (1 << ix); 36126f9a767SRodney W. Grimes return &swb->swb_block[ix]; 36226f9a767SRodney W. Grimes } 36326f9a767SRodney W. Grimes 36426f9a767SRodney W. Grimes /* 36526f9a767SRodney W. Grimes * Utility routine to set the valid (written) bit for 36626f9a767SRodney W. Grimes * a block associated with a pager and offset 36726f9a767SRodney W. Grimes */ 368df8bae1dSRodney W. Grimes static void 3692a4895f4SDavid Greenman swap_pager_setvalid(object, offset, valid) 3702a4895f4SDavid Greenman vm_object_t object; 37126f9a767SRodney W. Grimes vm_offset_t offset; 37226f9a767SRodney W. Grimes int valid; 37326f9a767SRodney W. Grimes { 37426f9a767SRodney W. Grimes register sw_blk_t swb; 37526f9a767SRodney W. Grimes int ix; 37626f9a767SRodney W. Grimes 377a316d390SJohn Dyson ix = offset / SWB_NPAGES; 3782a4895f4SDavid Greenman if (ix >= object->un_pager.swp.swp_nblocks) 37926f9a767SRodney W. Grimes return; 38026f9a767SRodney W. Grimes 3812a4895f4SDavid Greenman swb = &object->un_pager.swp.swp_blocks[ix]; 382a316d390SJohn Dyson ix = offset % SWB_NPAGES; 38326f9a767SRodney W. Grimes if (valid) 38426f9a767SRodney W. Grimes swb->swb_valid |= (1 << ix); 38526f9a767SRodney W. Grimes else 38626f9a767SRodney W. Grimes swb->swb_valid &= ~(1 << ix); 38726f9a767SRodney W. Grimes return; 38826f9a767SRodney W. Grimes } 38926f9a767SRodney W. Grimes 39026f9a767SRodney W. Grimes /* 39126f9a767SRodney W. Grimes * this routine allocates swap space with a fragmentation 39226f9a767SRodney W. Grimes * minimization policy. 39326f9a767SRodney W. Grimes */ 394f5a12711SPoul-Henning Kamp static int 3952a4895f4SDavid Greenman swap_pager_getswapspace(object, amount, rtval) 3962a4895f4SDavid Greenman vm_object_t object; 3972a4895f4SDavid Greenman unsigned int amount; 398a316d390SJohn Dyson daddr_t *rtval; 3990d94caffSDavid Greenman { 400a316d390SJohn Dyson unsigned location; 401b44e4b7aSJohn Dyson 40224ea4a96SDavid Greenman vm_swap_size -= amount; 403b44e4b7aSJohn Dyson if (!suggest_more_swap && (vm_swap_size < btodb(cnt.v_page_count * PAGE_SIZE))) { 404b44e4b7aSJohn Dyson printf("swap_pager: suggest more swap space: %d MB\n", 405b44e4b7aSJohn Dyson (2 * cnt.v_page_count * (PAGE_SIZE / 1024)) / 1000); 406b44e4b7aSJohn Dyson suggest_more_swap = 1; 407b44e4b7aSJohn Dyson } 408b44e4b7aSJohn Dyson 409a316d390SJohn Dyson if (!rlist_alloc(&swaplist, amount, &location)) { 41024ea4a96SDavid Greenman vm_swap_size += amount; 41126f9a767SRodney W. Grimes return 0; 41224ea4a96SDavid Greenman } else { 41324ea4a96SDavid Greenman swapsizecheck(); 4142a4895f4SDavid Greenman object->un_pager.swp.swp_allocsize += amount; 415a316d390SJohn Dyson *rtval = location; 41626f9a767SRodney W. Grimes return 1; 41726f9a767SRodney W. Grimes } 41826f9a767SRodney W. Grimes } 41926f9a767SRodney W. Grimes 42026f9a767SRodney W. Grimes /* 42126f9a767SRodney W. Grimes * this routine frees swap space with a fragmentation 42226f9a767SRodney W. Grimes * minimization policy. 42326f9a767SRodney W. Grimes */ 424f5a12711SPoul-Henning Kamp static void 4252a4895f4SDavid Greenman swap_pager_freeswapspace(object, from, to) 4262a4895f4SDavid Greenman vm_object_t object; 4272a4895f4SDavid Greenman unsigned int from; 4282a4895f4SDavid Greenman unsigned int to; 4290d94caffSDavid Greenman { 43035c10d22SDavid Greenman rlist_free(&swaplist, from, to); 43124ea4a96SDavid Greenman vm_swap_size += (to - from) + 1; 4322a4895f4SDavid Greenman object->un_pager.swp.swp_allocsize -= (to - from) + 1; 43324ea4a96SDavid Greenman swapsizecheck(); 43426f9a767SRodney W. Grimes } 43526f9a767SRodney W. Grimes /* 43626f9a767SRodney W. Grimes * this routine frees swap blocks from a specified pager 43726f9a767SRodney W. Grimes */ 43826f9a767SRodney W. Grimes void 43924a1cce3SDavid Greenman swap_pager_freespace(object, start, size) 44024a1cce3SDavid Greenman vm_object_t object; 441a316d390SJohn Dyson vm_pindex_t start; 442a316d390SJohn Dyson vm_size_t size; 44326f9a767SRodney W. Grimes { 444a316d390SJohn Dyson vm_pindex_t i; 44526f9a767SRodney W. Grimes int s; 44626f9a767SRodney W. Grimes 447e47ed70bSJohn Dyson s = splvm(); 448a316d390SJohn Dyson for (i = start; i < start + size; i += 1) { 44926f9a767SRodney W. Grimes int valid; 450a316d390SJohn Dyson daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 4510d94caffSDavid Greenman 45226f9a767SRodney W. Grimes if (addr && *addr != SWB_EMPTY) { 4532a4895f4SDavid Greenman swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 45426f9a767SRodney W. Grimes if (valid) { 4552a4895f4SDavid Greenman swap_pager_setvalid(object, i, 0); 45626f9a767SRodney W. Grimes } 45726f9a767SRodney W. Grimes *addr = SWB_EMPTY; 45826f9a767SRodney W. Grimes } 45926f9a767SRodney W. Grimes } 46026f9a767SRodney W. Grimes splx(s); 46126f9a767SRodney W. Grimes } 46226f9a767SRodney W. Grimes 4630a47b48bSJohn Dyson /* 4640a47b48bSJohn Dyson * same as freespace, but don't free, just force a DMZ next time 4650a47b48bSJohn Dyson */ 4660a47b48bSJohn Dyson void 4670a47b48bSJohn Dyson swap_pager_dmzspace(object, start, size) 4680a47b48bSJohn Dyson vm_object_t object; 4690a47b48bSJohn Dyson vm_pindex_t start; 4700a47b48bSJohn Dyson vm_size_t size; 4710a47b48bSJohn Dyson { 4720a47b48bSJohn Dyson vm_pindex_t i; 4730a47b48bSJohn Dyson int s; 4740a47b48bSJohn Dyson 475e47ed70bSJohn Dyson s = splvm(); 4760a47b48bSJohn Dyson for (i = start; i < start + size; i += 1) { 4770a47b48bSJohn Dyson int valid; 4780a47b48bSJohn Dyson daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 4790a47b48bSJohn Dyson 4800a47b48bSJohn Dyson if (addr && *addr != SWB_EMPTY) { 4810a47b48bSJohn Dyson if (valid) { 4820a47b48bSJohn Dyson swap_pager_setvalid(object, i, 0); 4830a47b48bSJohn Dyson } 4840a47b48bSJohn Dyson } 4850a47b48bSJohn Dyson } 4860a47b48bSJohn Dyson splx(s); 4870a47b48bSJohn Dyson } 4880a47b48bSJohn Dyson 489a1f6d91cSDavid Greenman static void 4902a4895f4SDavid Greenman swap_pager_free_swap(object) 4912a4895f4SDavid Greenman vm_object_t object; 492a1f6d91cSDavid Greenman { 493a1f6d91cSDavid Greenman register int i, j; 4942a4895f4SDavid Greenman register sw_blk_t swb; 495a1f6d91cSDavid Greenman int first_block=0, block_count=0; 496a1f6d91cSDavid Greenman int s; 497a1f6d91cSDavid Greenman /* 498a1f6d91cSDavid Greenman * Free left over swap blocks 499a1f6d91cSDavid Greenman */ 50047221757SJohn Dyson swb = object->un_pager.swp.swp_blocks; 5012d8acc0fSJohn Dyson if (swb == NULL) { 50247221757SJohn Dyson return; 5032d8acc0fSJohn Dyson } 50447221757SJohn Dyson 5052d8acc0fSJohn Dyson s = splvm(); 50647221757SJohn Dyson for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) { 507a1f6d91cSDavid Greenman for (j = 0; j < SWB_NPAGES; j++) { 5082a4895f4SDavid Greenman if (swb->swb_block[j] != SWB_EMPTY) { 509a1f6d91cSDavid Greenman /* 510a1f6d91cSDavid Greenman * initially the length of the run is zero 511a1f6d91cSDavid Greenman */ 512a1f6d91cSDavid Greenman if (block_count == 0) { 5132a4895f4SDavid Greenman first_block = swb->swb_block[j]; 514a1f6d91cSDavid Greenman block_count = btodb(PAGE_SIZE); 5152a4895f4SDavid Greenman swb->swb_block[j] = SWB_EMPTY; 516a1f6d91cSDavid Greenman /* 517a1f6d91cSDavid Greenman * if the new block can be included into the current run 518a1f6d91cSDavid Greenman */ 5192a4895f4SDavid Greenman } else if (swb->swb_block[j] == first_block + block_count) { 520a1f6d91cSDavid Greenman block_count += btodb(PAGE_SIZE); 5212a4895f4SDavid Greenman swb->swb_block[j] = SWB_EMPTY; 522a1f6d91cSDavid Greenman /* 523a1f6d91cSDavid Greenman * terminate the previous run, and start a new one 524a1f6d91cSDavid Greenman */ 525a1f6d91cSDavid Greenman } else { 5262a4895f4SDavid Greenman swap_pager_freeswapspace(object, first_block, 527a1f6d91cSDavid Greenman (unsigned) first_block + block_count - 1); 5282a4895f4SDavid Greenman first_block = swb->swb_block[j]; 529a1f6d91cSDavid Greenman block_count = btodb(PAGE_SIZE); 5302a4895f4SDavid Greenman swb->swb_block[j] = SWB_EMPTY; 531a1f6d91cSDavid Greenman } 532a1f6d91cSDavid Greenman } 533a1f6d91cSDavid Greenman } 534a1f6d91cSDavid Greenman } 535a1f6d91cSDavid Greenman 536a1f6d91cSDavid Greenman if (block_count) { 5372a4895f4SDavid Greenman swap_pager_freeswapspace(object, first_block, 538a1f6d91cSDavid Greenman (unsigned) first_block + block_count - 1); 539a1f6d91cSDavid Greenman } 540a1f6d91cSDavid Greenman splx(s); 541a1f6d91cSDavid Greenman } 542a1f6d91cSDavid Greenman 543a1f6d91cSDavid Greenman 54426f9a767SRodney W. Grimes /* 54526f9a767SRodney W. Grimes * swap_pager_reclaim frees up over-allocated space from all pagers 54626f9a767SRodney W. Grimes * this eliminates internal fragmentation due to allocation of space 54726f9a767SRodney W. Grimes * for segments that are never swapped to. It has been written so that 54826f9a767SRodney W. Grimes * it does not block until the rlist_free operation occurs; it keeps 54926f9a767SRodney W. Grimes * the queues consistant. 55026f9a767SRodney W. Grimes */ 55126f9a767SRodney W. Grimes 55226f9a767SRodney W. Grimes /* 55326f9a767SRodney W. Grimes * Maximum number of blocks (pages) to reclaim per pass 55426f9a767SRodney W. Grimes */ 555a1f6d91cSDavid Greenman #define MAXRECLAIM 128 55626f9a767SRodney W. Grimes 557f5a12711SPoul-Henning Kamp static void 55826f9a767SRodney W. Grimes swap_pager_reclaim() 55926f9a767SRodney W. Grimes { 56024a1cce3SDavid Greenman vm_object_t object; 56126f9a767SRodney W. Grimes int i, j, k; 56226f9a767SRodney W. Grimes int s; 56326f9a767SRodney W. Grimes int reclaimcount; 564a1f6d91cSDavid Greenman static struct { 565a1f6d91cSDavid Greenman int address; 5662a4895f4SDavid Greenman vm_object_t object; 567a1f6d91cSDavid Greenman } reclaims[MAXRECLAIM]; 56826f9a767SRodney W. Grimes static int in_reclaim; 56926f9a767SRodney W. Grimes 57026f9a767SRodney W. Grimes /* 57126f9a767SRodney W. Grimes * allow only one process to be in the swap_pager_reclaim subroutine 57226f9a767SRodney W. Grimes */ 573e47ed70bSJohn Dyson s = splvm(); 57426f9a767SRodney W. Grimes if (in_reclaim) { 57524a1cce3SDavid Greenman tsleep(&in_reclaim, PSWP, "swrclm", 0); 57626f9a767SRodney W. Grimes splx(s); 57726f9a767SRodney W. Grimes return; 57826f9a767SRodney W. Grimes } 57926f9a767SRodney W. Grimes in_reclaim = 1; 58026f9a767SRodney W. Grimes reclaimcount = 0; 58126f9a767SRodney W. Grimes 58226f9a767SRodney W. Grimes /* for each pager queue */ 58326f9a767SRodney W. Grimes for (k = 0; swp_qs[k]; k++) { 58426f9a767SRodney W. Grimes 585b18bfc3dSJohn Dyson object = TAILQ_FIRST(swp_qs[k]); 58624a1cce3SDavid Greenman while (object && (reclaimcount < MAXRECLAIM)) { 58726f9a767SRodney W. Grimes 58826f9a767SRodney W. Grimes /* 58926f9a767SRodney W. Grimes * see if any blocks associated with a pager has been 59026f9a767SRodney W. Grimes * allocated but not used (written) 59126f9a767SRodney W. Grimes */ 5925070c7f8SJohn Dyson if ((object->flags & OBJ_DEAD) == 0 && 5935070c7f8SJohn Dyson (object->paging_in_progress == 0)) { 5942a4895f4SDavid Greenman for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 5952a4895f4SDavid Greenman sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 5960d94caffSDavid Greenman 59726f9a767SRodney W. Grimes if (swb->swb_locked) 59826f9a767SRodney W. Grimes continue; 59926f9a767SRodney W. Grimes for (j = 0; j < SWB_NPAGES; j++) { 60026f9a767SRodney W. Grimes if (swb->swb_block[j] != SWB_EMPTY && 60126f9a767SRodney W. Grimes (swb->swb_valid & (1 << j)) == 0) { 602a1f6d91cSDavid Greenman reclaims[reclaimcount].address = swb->swb_block[j]; 6032a4895f4SDavid Greenman reclaims[reclaimcount++].object = object; 60426f9a767SRodney W. Grimes swb->swb_block[j] = SWB_EMPTY; 60526f9a767SRodney W. Grimes if (reclaimcount >= MAXRECLAIM) 60626f9a767SRodney W. Grimes goto rfinished; 60726f9a767SRodney W. Grimes } 60826f9a767SRodney W. Grimes } 60926f9a767SRodney W. Grimes } 610a316d390SJohn Dyson } 611b18bfc3dSJohn Dyson object = TAILQ_NEXT(object, pager_object_list); 61226f9a767SRodney W. Grimes } 61326f9a767SRodney W. Grimes } 61426f9a767SRodney W. Grimes 61526f9a767SRodney W. Grimes rfinished: 61626f9a767SRodney W. Grimes 61726f9a767SRodney W. Grimes /* 61826f9a767SRodney W. Grimes * free the blocks that have been added to the reclaim list 61926f9a767SRodney W. Grimes */ 62026f9a767SRodney W. Grimes for (i = 0; i < reclaimcount; i++) { 6212a4895f4SDavid Greenman swap_pager_freeswapspace(reclaims[i].object, 6222a4895f4SDavid Greenman reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 62326f9a767SRodney W. Grimes } 62426f9a767SRodney W. Grimes splx(s); 62526f9a767SRodney W. Grimes in_reclaim = 0; 62624a1cce3SDavid Greenman wakeup(&in_reclaim); 62726f9a767SRodney W. Grimes } 62826f9a767SRodney W. Grimes 62926f9a767SRodney W. Grimes 63026f9a767SRodney W. Grimes /* 63126f9a767SRodney W. Grimes * swap_pager_copy copies blocks from one pager to another and 63226f9a767SRodney W. Grimes * destroys the source pager 63326f9a767SRodney W. Grimes */ 63426f9a767SRodney W. Grimes 63526f9a767SRodney W. Grimes void 63624a1cce3SDavid Greenman swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset) 63724a1cce3SDavid Greenman vm_object_t srcobject; 638a316d390SJohn Dyson vm_pindex_t srcoffset; 63924a1cce3SDavid Greenman vm_object_t dstobject; 640a316d390SJohn Dyson vm_pindex_t dstoffset; 641a316d390SJohn Dyson vm_pindex_t offset; 64226f9a767SRodney W. Grimes { 643a316d390SJohn Dyson vm_pindex_t i; 644a1f6d91cSDavid Greenman int origsize; 64526f9a767SRodney W. Grimes int s; 64626f9a767SRodney W. Grimes 64724ea4a96SDavid Greenman if (vm_swap_size) 64824ea4a96SDavid Greenman no_swap_space = 0; 64924ea4a96SDavid Greenman 6502a4895f4SDavid Greenman origsize = srcobject->un_pager.swp.swp_allocsize; 65126f9a767SRodney W. Grimes 65226f9a767SRodney W. Grimes /* 65324a1cce3SDavid Greenman * remove the source object from the swap_pager internal queue 65426f9a767SRodney W. Grimes */ 65524a1cce3SDavid Greenman if (srcobject->handle == NULL) { 65624a1cce3SDavid Greenman TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 65726f9a767SRodney W. Grimes } else { 65824a1cce3SDavid Greenman TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 65926f9a767SRodney W. Grimes } 66026f9a767SRodney W. Grimes 661e47ed70bSJohn Dyson s = splvm(); 6622a4895f4SDavid Greenman while (srcobject->un_pager.swp.swp_poip) { 6632a4895f4SDavid Greenman tsleep(srcobject, PVM, "spgout", 0); 66426f9a767SRodney W. Grimes } 66526f9a767SRodney W. Grimes 66626f9a767SRodney W. Grimes /* 66726f9a767SRodney W. Grimes * clean all of the pages that are currently active and finished 66826f9a767SRodney W. Grimes */ 669e47ed70bSJohn Dyson if (swap_pager_free_pending) 67024a1cce3SDavid Greenman swap_pager_sync(); 67126f9a767SRodney W. Grimes 67226f9a767SRodney W. Grimes /* 67326f9a767SRodney W. Grimes * transfer source to destination 67426f9a767SRodney W. Grimes */ 675a316d390SJohn Dyson for (i = 0; i < dstobject->size; i += 1) { 67626f9a767SRodney W. Grimes int srcvalid, dstvalid; 677a316d390SJohn Dyson daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset, 67826f9a767SRodney W. Grimes &srcvalid); 679a316d390SJohn Dyson daddr_t *dstaddrp; 6800d94caffSDavid Greenman 68126f9a767SRodney W. Grimes /* 68226f9a767SRodney W. Grimes * see if the source has space allocated 68326f9a767SRodney W. Grimes */ 68426f9a767SRodney W. Grimes if (srcaddrp && *srcaddrp != SWB_EMPTY) { 68526f9a767SRodney W. Grimes /* 6860d94caffSDavid Greenman * if the source is valid and the dest has no space, 6870d94caffSDavid Greenman * then copy the allocation from the srouce to the 6880d94caffSDavid Greenman * dest. 68926f9a767SRodney W. Grimes */ 69026f9a767SRodney W. Grimes if (srcvalid) { 69124a1cce3SDavid Greenman dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 692a1f6d91cSDavid Greenman &dstvalid); 69326f9a767SRodney W. Grimes /* 6940d94caffSDavid Greenman * if the dest already has a valid block, 6950d94caffSDavid Greenman * deallocate the source block without 6960d94caffSDavid Greenman * copying. 69726f9a767SRodney W. Grimes */ 69826f9a767SRodney W. Grimes if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 6992a4895f4SDavid Greenman swap_pager_freeswapspace(dstobject, *dstaddrp, 700a1f6d91cSDavid Greenman *dstaddrp + btodb(PAGE_SIZE) - 1); 70126f9a767SRodney W. Grimes *dstaddrp = SWB_EMPTY; 70226f9a767SRodney W. Grimes } 70326f9a767SRodney W. Grimes if (dstaddrp && *dstaddrp == SWB_EMPTY) { 70426f9a767SRodney W. Grimes *dstaddrp = *srcaddrp; 70526f9a767SRodney W. Grimes *srcaddrp = SWB_EMPTY; 7062a4895f4SDavid Greenman dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 7072a4895f4SDavid Greenman srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 7082a4895f4SDavid Greenman swap_pager_setvalid(dstobject, i + dstoffset, 1); 70926f9a767SRodney W. Grimes } 71026f9a767SRodney W. Grimes } 71126f9a767SRodney W. Grimes /* 7120d94caffSDavid Greenman * if the source is not empty at this point, then 7130d94caffSDavid Greenman * deallocate the space. 71426f9a767SRodney W. Grimes */ 71526f9a767SRodney W. Grimes if (*srcaddrp != SWB_EMPTY) { 7162a4895f4SDavid Greenman swap_pager_freeswapspace(srcobject, *srcaddrp, 717a1f6d91cSDavid Greenman *srcaddrp + btodb(PAGE_SIZE) - 1); 71826f9a767SRodney W. Grimes *srcaddrp = SWB_EMPTY; 71926f9a767SRodney W. Grimes } 72026f9a767SRodney W. Grimes } 72126f9a767SRodney W. Grimes } 72226f9a767SRodney W. Grimes splx(s); 72326f9a767SRodney W. Grimes 724a1f6d91cSDavid Greenman /* 725a1f6d91cSDavid Greenman * Free left over swap blocks 726a1f6d91cSDavid Greenman */ 7272a4895f4SDavid Greenman swap_pager_free_swap(srcobject); 728a1f6d91cSDavid Greenman 7292a4895f4SDavid Greenman if (srcobject->un_pager.swp.swp_allocsize) { 7302a4895f4SDavid Greenman printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 7312a4895f4SDavid Greenman srcobject->un_pager.swp.swp_allocsize, origsize); 7322a4895f4SDavid Greenman } 7332a4895f4SDavid Greenman 7342a4895f4SDavid Greenman free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 7352a4895f4SDavid Greenman srcobject->un_pager.swp.swp_blocks = NULL; 73626f9a767SRodney W. Grimes 73726f9a767SRodney W. Grimes return; 73826f9a767SRodney W. Grimes } 73926f9a767SRodney W. Grimes 740f5a12711SPoul-Henning Kamp static void 74124a1cce3SDavid Greenman swap_pager_dealloc(object) 74224a1cce3SDavid Greenman vm_object_t object; 743df8bae1dSRodney W. Grimes { 744df8bae1dSRodney W. Grimes int s; 74547221757SJohn Dyson sw_blk_t swb; 746df8bae1dSRodney W. Grimes 747df8bae1dSRodney W. Grimes /* 7480d94caffSDavid Greenman * Remove from list right away so lookups will fail if we block for 7490d94caffSDavid Greenman * pageout completion. 750df8bae1dSRodney W. Grimes */ 75124a1cce3SDavid Greenman if (object->handle == NULL) { 75224a1cce3SDavid Greenman TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 75326f9a767SRodney W. Grimes } else { 75424a1cce3SDavid Greenman TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 755df8bae1dSRodney W. Grimes } 75624a1cce3SDavid Greenman 757df8bae1dSRodney W. Grimes /* 7580d94caffSDavid Greenman * Wait for all pageouts to finish and remove all entries from 7590d94caffSDavid Greenman * cleaning list. 760df8bae1dSRodney W. Grimes */ 76126f9a767SRodney W. Grimes 762e47ed70bSJohn Dyson s = splvm(); 7632a4895f4SDavid Greenman while (object->un_pager.swp.swp_poip) { 7642a4895f4SDavid Greenman tsleep(object, PVM, "swpout", 0); 765df8bae1dSRodney W. Grimes } 766df8bae1dSRodney W. Grimes splx(s); 76726f9a767SRodney W. Grimes 768e47ed70bSJohn Dyson if (swap_pager_free_pending) 76924a1cce3SDavid Greenman swap_pager_sync(); 770df8bae1dSRodney W. Grimes 771df8bae1dSRodney W. Grimes /* 772df8bae1dSRodney W. Grimes * Free left over swap blocks 773df8bae1dSRodney W. Grimes */ 7742a4895f4SDavid Greenman swap_pager_free_swap(object); 77526f9a767SRodney W. Grimes 7762a4895f4SDavid Greenman if (object->un_pager.swp.swp_allocsize) { 7772a4895f4SDavid Greenman printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 7782a4895f4SDavid Greenman object->un_pager.swp.swp_allocsize); 7792a4895f4SDavid Greenman } 78047221757SJohn Dyson swb = object->un_pager.swp.swp_blocks; 78147221757SJohn Dyson if (swb) { 782df8bae1dSRodney W. Grimes /* 783df8bae1dSRodney W. Grimes * Free swap management resources 784df8bae1dSRodney W. Grimes */ 78547221757SJohn Dyson free(swb, M_VMPGDATA); 7862a4895f4SDavid Greenman object->un_pager.swp.swp_blocks = NULL; 78726f9a767SRodney W. Grimes } 78847221757SJohn Dyson } 78926f9a767SRodney W. Grimes 7908ba0c490SBruce Evans static inline int 791a316d390SJohn Dyson swap_pager_block_index(pindex) 792a316d390SJohn Dyson vm_pindex_t pindex; 79326f9a767SRodney W. Grimes { 794a316d390SJohn Dyson return (pindex / SWB_NPAGES); 79526f9a767SRodney W. Grimes } 79626f9a767SRodney W. Grimes 7978ba0c490SBruce Evans static inline int 798a316d390SJohn Dyson swap_pager_block_offset(pindex) 799a316d390SJohn Dyson vm_pindex_t pindex; 80026f9a767SRodney W. Grimes { 801a316d390SJohn Dyson return (pindex % SWB_NPAGES); 80226f9a767SRodney W. Grimes } 80326f9a767SRodney W. Grimes 80426f9a767SRodney W. Grimes /* 80524a1cce3SDavid Greenman * swap_pager_haspage returns TRUE if the pager has data that has 80626f9a767SRodney W. Grimes * been written out. 80726f9a767SRodney W. Grimes */ 808f5a12711SPoul-Henning Kamp static boolean_t 809a316d390SJohn Dyson swap_pager_haspage(object, pindex, before, after) 81024a1cce3SDavid Greenman vm_object_t object; 811a316d390SJohn Dyson vm_pindex_t pindex; 81224a1cce3SDavid Greenman int *before; 81324a1cce3SDavid Greenman int *after; 81426f9a767SRodney W. Grimes { 81526f9a767SRodney W. Grimes register sw_blk_t swb; 81626f9a767SRodney W. Grimes int ix; 81726f9a767SRodney W. Grimes 81824a1cce3SDavid Greenman if (before != NULL) 81924a1cce3SDavid Greenman *before = 0; 82024a1cce3SDavid Greenman if (after != NULL) 82124a1cce3SDavid Greenman *after = 0; 822a316d390SJohn Dyson ix = pindex / SWB_NPAGES; 8232a4895f4SDavid Greenman if (ix >= object->un_pager.swp.swp_nblocks) { 82426f9a767SRodney W. Grimes return (FALSE); 82526f9a767SRodney W. Grimes } 8262a4895f4SDavid Greenman swb = &object->un_pager.swp.swp_blocks[ix]; 827a316d390SJohn Dyson ix = pindex % SWB_NPAGES; 828170db9c6SJohn Dyson 82926f9a767SRodney W. Grimes if (swb->swb_block[ix] != SWB_EMPTY) { 830170db9c6SJohn Dyson 831170db9c6SJohn Dyson if (swb->swb_valid & (1 << ix)) { 832170db9c6SJohn Dyson int tix; 833170db9c6SJohn Dyson if (before) { 834170db9c6SJohn Dyson for(tix = ix - 1; tix >= 0; --tix) { 8352f82e604SDavid Greenman if ((swb->swb_valid & (1 << tix)) == 0) 8362f82e604SDavid Greenman break; 837ca56715fSJohn Dyson if ((swb->swb_block[tix] + 838170db9c6SJohn Dyson (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 839170db9c6SJohn Dyson swb->swb_block[ix]) 840170db9c6SJohn Dyson break; 841170db9c6SJohn Dyson (*before)++; 842170db9c6SJohn Dyson } 843170db9c6SJohn Dyson } 844170db9c6SJohn Dyson 845170db9c6SJohn Dyson if (after) { 846170db9c6SJohn Dyson for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 8472f82e604SDavid Greenman if ((swb->swb_valid & (1 << tix)) == 0) 8482f82e604SDavid Greenman break; 849ca56715fSJohn Dyson if ((swb->swb_block[tix] - 850170db9c6SJohn Dyson (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 851170db9c6SJohn Dyson swb->swb_block[ix]) 852170db9c6SJohn Dyson break; 853170db9c6SJohn Dyson (*after)++; 854170db9c6SJohn Dyson } 855170db9c6SJohn Dyson } 856170db9c6SJohn Dyson 85726f9a767SRodney W. Grimes return TRUE; 85826f9a767SRodney W. Grimes } 859170db9c6SJohn Dyson } 86026f9a767SRodney W. Grimes return (FALSE); 86126f9a767SRodney W. Grimes } 86226f9a767SRodney W. Grimes 86326f9a767SRodney W. Grimes /* 86426f9a767SRodney W. Grimes * swap_pager_freepage is a convienience routine that clears the busy 86526f9a767SRodney W. Grimes * bit and deallocates a page. 866df8bae1dSRodney W. Grimes */ 86726f9a767SRodney W. Grimes static void 86826f9a767SRodney W. Grimes swap_pager_freepage(m) 86926f9a767SRodney W. Grimes vm_page_t m; 87026f9a767SRodney W. Grimes { 87126f9a767SRodney W. Grimes vm_page_free(m); 87226f9a767SRodney W. Grimes } 87326f9a767SRodney W. Grimes 87426f9a767SRodney W. Grimes /* 875e47ed70bSJohn Dyson * Wakeup based upon spc state 876e47ed70bSJohn Dyson */ 877e47ed70bSJohn Dyson static void 878e47ed70bSJohn Dyson spc_wakeup(void) 879e47ed70bSJohn Dyson { 880e47ed70bSJohn Dyson if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 881e47ed70bSJohn Dyson swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 882e47ed70bSJohn Dyson wakeup(&swap_pager_needflags); 883e47ed70bSJohn Dyson } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) && 884e47ed70bSJohn Dyson swap_pager_free_count >= ((2 * npendingio) / 3)) { 885e47ed70bSJohn Dyson swap_pager_needflags &= ~SWAP_FREE_NEEDED; 886e47ed70bSJohn Dyson wakeup(&swap_pager_free); 887e47ed70bSJohn Dyson } 888e47ed70bSJohn Dyson } 889e47ed70bSJohn Dyson 890e47ed70bSJohn Dyson /* 891e47ed70bSJohn Dyson * Free an spc structure 892e47ed70bSJohn Dyson */ 893e47ed70bSJohn Dyson static void 894e47ed70bSJohn Dyson spc_free(spc) 895e47ed70bSJohn Dyson swp_clean_t spc; 896e47ed70bSJohn Dyson { 897e47ed70bSJohn Dyson spc->spc_flags = 0; 898e47ed70bSJohn Dyson TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 899e47ed70bSJohn Dyson swap_pager_free_count++; 900e47ed70bSJohn Dyson if (swap_pager_needflags) { 901e47ed70bSJohn Dyson spc_wakeup(); 902e47ed70bSJohn Dyson } 903e47ed70bSJohn Dyson } 904e47ed70bSJohn Dyson 905e47ed70bSJohn Dyson /* 90626f9a767SRodney W. Grimes * swap_pager_ridpages is a convienience routine that deallocates all 90726f9a767SRodney W. Grimes * but the required page. this is usually used in error returns that 90826f9a767SRodney W. Grimes * need to invalidate the "extra" readahead pages. 90926f9a767SRodney W. Grimes */ 91026f9a767SRodney W. Grimes static void 91126f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage) 91226f9a767SRodney W. Grimes vm_page_t *m; 91326f9a767SRodney W. Grimes int count; 91426f9a767SRodney W. Grimes int reqpage; 91526f9a767SRodney W. Grimes { 91626f9a767SRodney W. Grimes int i; 9170d94caffSDavid Greenman 91826f9a767SRodney W. Grimes for (i = 0; i < count; i++) 91926f9a767SRodney W. Grimes if (i != reqpage) 92026f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 92126f9a767SRodney W. Grimes } 92226f9a767SRodney W. Grimes 92326f9a767SRodney W. Grimes /* 92426f9a767SRodney W. Grimes * swap_pager_iodone1 is the completion routine for both reads and async writes 92526f9a767SRodney W. Grimes */ 926f5a12711SPoul-Henning Kamp static void 92726f9a767SRodney W. Grimes swap_pager_iodone1(bp) 92826f9a767SRodney W. Grimes struct buf *bp; 92926f9a767SRodney W. Grimes { 93026f9a767SRodney W. Grimes bp->b_flags |= B_DONE; 93126f9a767SRodney W. Grimes bp->b_flags &= ~B_ASYNC; 93224a1cce3SDavid Greenman wakeup(bp); 93326f9a767SRodney W. Grimes } 93426f9a767SRodney W. Grimes 935f708ef1bSPoul-Henning Kamp static int 93624a1cce3SDavid Greenman swap_pager_getpages(object, m, count, reqpage) 93724a1cce3SDavid Greenman vm_object_t object; 93826f9a767SRodney W. Grimes vm_page_t *m; 93926f9a767SRodney W. Grimes int count, reqpage; 940df8bae1dSRodney W. Grimes { 941df8bae1dSRodney W. Grimes register struct buf *bp; 94226f9a767SRodney W. Grimes sw_blk_t swb[count]; 943df8bae1dSRodney W. Grimes register int s; 94426f9a767SRodney W. Grimes int i; 945df8bae1dSRodney W. Grimes boolean_t rv; 94626f9a767SRodney W. Grimes vm_offset_t kva, off[count]; 947a316d390SJohn Dyson vm_pindex_t paging_offset; 94826f9a767SRodney W. Grimes int reqaddr[count]; 9496d40c3d3SDavid Greenman int sequential; 950df8bae1dSRodney W. Grimes 95126f9a767SRodney W. Grimes int first, last; 95226f9a767SRodney W. Grimes int failed; 95326f9a767SRodney W. Grimes int reqdskregion; 954df8bae1dSRodney W. Grimes 95526f9a767SRodney W. Grimes object = m[reqpage]->object; 956a316d390SJohn Dyson paging_offset = OFF_TO_IDX(object->paging_offset); 957a316d390SJohn Dyson sequential = (m[reqpage]->pindex == (object->last_read + 1)); 9582a4895f4SDavid Greenman 95926f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 960a316d390SJohn Dyson vm_pindex_t fidx = m[i]->pindex + paging_offset; 961a316d390SJohn Dyson int ix = swap_pager_block_index(fidx); 9620d94caffSDavid Greenman 9632a4895f4SDavid Greenman if (ix >= object->un_pager.swp.swp_nblocks) { 96426f9a767SRodney W. Grimes int j; 9650d94caffSDavid Greenman 96626f9a767SRodney W. Grimes if (i <= reqpage) { 96726f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 968df8bae1dSRodney W. Grimes return (VM_PAGER_FAIL); 96926f9a767SRodney W. Grimes } 97026f9a767SRodney W. Grimes for (j = i; j < count; j++) { 97126f9a767SRodney W. Grimes swap_pager_freepage(m[j]); 97226f9a767SRodney W. Grimes } 97326f9a767SRodney W. Grimes count = i; 97426f9a767SRodney W. Grimes break; 97526f9a767SRodney W. Grimes } 9762a4895f4SDavid Greenman swb[i] = &object->un_pager.swp.swp_blocks[ix]; 977a316d390SJohn Dyson off[i] = swap_pager_block_offset(fidx); 97826f9a767SRodney W. Grimes reqaddr[i] = swb[i]->swb_block[off[i]]; 97926f9a767SRodney W. Grimes } 98026f9a767SRodney W. Grimes 98126f9a767SRodney W. Grimes /* make sure that our required input request is existant */ 98226f9a767SRodney W. Grimes 98326f9a767SRodney W. Grimes if (reqaddr[reqpage] == SWB_EMPTY || 98426f9a767SRodney W. Grimes (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 98526f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 98626f9a767SRodney W. Grimes return (VM_PAGER_FAIL); 98726f9a767SRodney W. Grimes } 98826f9a767SRodney W. Grimes reqdskregion = reqaddr[reqpage] / dmmax; 989df8bae1dSRodney W. Grimes 990df8bae1dSRodney W. Grimes /* 99126f9a767SRodney W. Grimes * search backwards for the first contiguous page to transfer 992df8bae1dSRodney W. Grimes */ 99326f9a767SRodney W. Grimes failed = 0; 99426f9a767SRodney W. Grimes first = 0; 99526f9a767SRodney W. Grimes for (i = reqpage - 1; i >= 0; --i) { 9966d40c3d3SDavid Greenman if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 99726f9a767SRodney W. Grimes (swb[i]->swb_valid & (1 << off[i])) == 0 || 99826f9a767SRodney W. Grimes (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 99926f9a767SRodney W. Grimes ((reqaddr[i] / dmmax) != reqdskregion)) { 100026f9a767SRodney W. Grimes failed = 1; 100126f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 100226f9a767SRodney W. Grimes if (first == 0) 100326f9a767SRodney W. Grimes first = i + 1; 100426f9a767SRodney W. Grimes } 1005df8bae1dSRodney W. Grimes } 1006df8bae1dSRodney W. Grimes /* 100726f9a767SRodney W. Grimes * search forwards for the last contiguous page to transfer 1008df8bae1dSRodney W. Grimes */ 100926f9a767SRodney W. Grimes failed = 0; 101026f9a767SRodney W. Grimes last = count; 101126f9a767SRodney W. Grimes for (i = reqpage + 1; i < count; i++) { 101226f9a767SRodney W. Grimes if (failed || (reqaddr[i] == SWB_EMPTY) || 101326f9a767SRodney W. Grimes (swb[i]->swb_valid & (1 << off[i])) == 0 || 101426f9a767SRodney W. Grimes (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 101526f9a767SRodney W. Grimes ((reqaddr[i] / dmmax) != reqdskregion)) { 101626f9a767SRodney W. Grimes failed = 1; 101726f9a767SRodney W. Grimes swap_pager_freepage(m[i]); 101826f9a767SRodney W. Grimes if (last == count) 101926f9a767SRodney W. Grimes last = i; 102026f9a767SRodney W. Grimes } 102126f9a767SRodney W. Grimes } 102226f9a767SRodney W. Grimes 102326f9a767SRodney W. Grimes count = last; 102426f9a767SRodney W. Grimes if (first != 0) { 102526f9a767SRodney W. Grimes for (i = first; i < count; i++) { 102626f9a767SRodney W. Grimes m[i - first] = m[i]; 102726f9a767SRodney W. Grimes reqaddr[i - first] = reqaddr[i]; 102826f9a767SRodney W. Grimes off[i - first] = off[i]; 102926f9a767SRodney W. Grimes } 103026f9a767SRodney W. Grimes count -= first; 103126f9a767SRodney W. Grimes reqpage -= first; 103226f9a767SRodney W. Grimes } 103326f9a767SRodney W. Grimes ++swb[reqpage]->swb_locked; 103426f9a767SRodney W. Grimes 103526f9a767SRodney W. Grimes /* 10360d94caffSDavid Greenman * at this point: "m" is a pointer to the array of vm_page_t for 10370d94caffSDavid Greenman * paging I/O "count" is the number of vm_page_t entries represented 10380d94caffSDavid Greenman * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 10390d94caffSDavid Greenman * into "m" for the page actually faulted 104026f9a767SRodney W. Grimes */ 104126f9a767SRodney W. Grimes 104216f62314SDavid Greenman /* 104316f62314SDavid Greenman * Get a swap buffer header to perform the IO 104416f62314SDavid Greenman */ 104526f9a767SRodney W. Grimes bp = getpbuf(); 104616f62314SDavid Greenman kva = (vm_offset_t) bp->b_data; 104726f9a767SRodney W. Grimes 104816f62314SDavid Greenman /* 104916f62314SDavid Greenman * map our page(s) into kva for input 105016f62314SDavid Greenman */ 105116f62314SDavid Greenman pmap_qenter(kva, m, count); 105216f62314SDavid Greenman 1053aba8f38eSDavid Greenman bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 105426f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone1; 1055df8bae1dSRodney W. Grimes bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 105626f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 105726f9a767SRodney W. Grimes crhold(bp->b_rcred); 105826f9a767SRodney W. Grimes crhold(bp->b_wcred); 1059ab3f7469SPoul-Henning Kamp bp->b_data = (caddr_t) kva; 106026f9a767SRodney W. Grimes bp->b_blkno = reqaddr[0]; 106126f9a767SRodney W. Grimes bp->b_bcount = PAGE_SIZE * count; 106226f9a767SRodney W. Grimes bp->b_bufsize = PAGE_SIZE * count; 106326f9a767SRodney W. Grimes 10640d94caffSDavid Greenman pbgetvp(swapdev_vp, bp); 1065df8bae1dSRodney W. Grimes 1066976e77fcSDavid Greenman cnt.v_swapin++; 1067976e77fcSDavid Greenman cnt.v_swappgsin += count; 1068df8bae1dSRodney W. Grimes /* 106926f9a767SRodney W. Grimes * perform the I/O 1070df8bae1dSRodney W. Grimes */ 1071df8bae1dSRodney W. Grimes VOP_STRATEGY(bp); 107226f9a767SRodney W. Grimes 107326f9a767SRodney W. Grimes /* 107426f9a767SRodney W. Grimes * wait for the sync I/O to complete 107526f9a767SRodney W. Grimes */ 1076e47ed70bSJohn Dyson s = splvm(); 107726f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 10783091ee09SJohn Dyson if (tsleep(bp, PVM, "swread", hz*20)) { 1079dfeca1b8SBruce Evans printf("swap_pager: indefinite wait buffer: device: %#x, blkno: %d, size: %d\n", 10803091ee09SJohn Dyson bp->b_dev, bp->b_blkno, bp->b_bcount); 10813091ee09SJohn Dyson } 1082df8bae1dSRodney W. Grimes } 10831b119d9dSDavid Greenman 10841b119d9dSDavid Greenman if (bp->b_flags & B_ERROR) { 10851b119d9dSDavid Greenman printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 10861b119d9dSDavid Greenman bp->b_blkno, bp->b_bcount, bp->b_error); 1087a83c285cSDavid Greenman rv = VM_PAGER_ERROR; 10881b119d9dSDavid Greenman } else { 10891b119d9dSDavid Greenman rv = VM_PAGER_OK; 10901b119d9dSDavid Greenman } 109126f9a767SRodney W. Grimes 1092df8bae1dSRodney W. Grimes splx(s); 10932a4895f4SDavid Greenman swb[reqpage]->swb_locked--; 109426f9a767SRodney W. Grimes 109526f9a767SRodney W. Grimes /* 109626f9a767SRodney W. Grimes * remove the mapping for kernel virtual 109726f9a767SRodney W. Grimes */ 109816f62314SDavid Greenman pmap_qremove(kva, count); 109926f9a767SRodney W. Grimes 110026f9a767SRodney W. Grimes /* 110126f9a767SRodney W. Grimes * release the physical I/O buffer 110226f9a767SRodney W. Grimes */ 110326f9a767SRodney W. Grimes relpbuf(bp); 110426f9a767SRodney W. Grimes /* 110526f9a767SRodney W. Grimes * finish up input if everything is ok 110626f9a767SRodney W. Grimes */ 110726f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 110826f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 11090d94caffSDavid Greenman m[i]->dirty = 0; 1110894048d7SJohn Dyson m[i]->flags &= ~PG_ZERO; 111126f9a767SRodney W. Grimes if (i != reqpage) { 111226f9a767SRodney W. Grimes /* 11130d94caffSDavid Greenman * whether or not to leave the page 11140d94caffSDavid Greenman * activated is up in the air, but we 11150d94caffSDavid Greenman * should put the page on a page queue 11160d94caffSDavid Greenman * somewhere. (it already is in the 11170d94caffSDavid Greenman * object). After some emperical 11180d94caffSDavid Greenman * results, it is best to deactivate 11190d94caffSDavid Greenman * the readahead pages. 112026f9a767SRodney W. Grimes */ 112126f9a767SRodney W. Grimes vm_page_deactivate(m[i]); 112226f9a767SRodney W. Grimes 112326f9a767SRodney W. Grimes /* 11240d94caffSDavid Greenman * just in case someone was asking for 11250d94caffSDavid Greenman * this page we now tell them that it 11260d94caffSDavid Greenman * is ok to use 112726f9a767SRodney W. Grimes */ 11280d94caffSDavid Greenman m[i]->valid = VM_PAGE_BITS_ALL; 112926f9a767SRodney W. Grimes PAGE_WAKEUP(m[i]); 113026f9a767SRodney W. Grimes } 113126f9a767SRodney W. Grimes } 11326d40c3d3SDavid Greenman 1133a316d390SJohn Dyson m[reqpage]->object->last_read = m[count-1]->pindex; 11346d40c3d3SDavid Greenman 11352e1e24ddSDavid Greenman /* 11362e1e24ddSDavid Greenman * If we're out of swap space, then attempt to free 113745952afcSJohn Dyson * some whenever multiple pages are brought in. We 113845952afcSJohn Dyson * must set the dirty bits so that the page contents 113945952afcSJohn Dyson * will be preserved. 11402e1e24ddSDavid Greenman */ 1141b44e4b7aSJohn Dyson if (SWAPLOW || 1142b44e4b7aSJohn Dyson (vm_swap_size < btodb((cnt.v_page_count - cnt.v_wire_count)) * PAGE_SIZE)) { 11432e1e24ddSDavid Greenman for (i = 0; i < count; i++) { 11440d94caffSDavid Greenman m[i]->dirty = VM_PAGE_BITS_ALL; 11452e1e24ddSDavid Greenman } 1146b44e4b7aSJohn Dyson swap_pager_freespace(object, 1147b44e4b7aSJohn Dyson m[0]->pindex + paging_offset, count); 114826f9a767SRodney W. Grimes } 1149e47ed70bSJohn Dyson 115026f9a767SRodney W. Grimes } else { 115126f9a767SRodney W. Grimes swap_pager_ridpages(m, count, reqpage); 115226f9a767SRodney W. Grimes } 1153df8bae1dSRodney W. Grimes return (rv); 1154df8bae1dSRodney W. Grimes } 1155df8bae1dSRodney W. Grimes 115626f9a767SRodney W. Grimes int 115724a1cce3SDavid Greenman swap_pager_putpages(object, m, count, sync, rtvals) 115824a1cce3SDavid Greenman vm_object_t object; 115926f9a767SRodney W. Grimes vm_page_t *m; 116026f9a767SRodney W. Grimes int count; 116124a1cce3SDavid Greenman boolean_t sync; 116226f9a767SRodney W. Grimes int *rtvals; 1163df8bae1dSRodney W. Grimes { 116426f9a767SRodney W. Grimes register struct buf *bp; 116526f9a767SRodney W. Grimes sw_blk_t swb[count]; 116626f9a767SRodney W. Grimes register int s; 1167e736cd05SJohn Dyson int i, j, ix, firstidx, lastidx; 116826f9a767SRodney W. Grimes boolean_t rv; 1169a316d390SJohn Dyson vm_offset_t kva, off, fidx; 117026f9a767SRodney W. Grimes swp_clean_t spc; 1171a316d390SJohn Dyson vm_pindex_t paging_pindex; 117226f9a767SRodney W. Grimes int reqaddr[count]; 117326f9a767SRodney W. Grimes int failed; 1174df8bae1dSRodney W. Grimes 117524ea4a96SDavid Greenman if (vm_swap_size) 117624ea4a96SDavid Greenman no_swap_space = 0; 1177e736cd05SJohn Dyson 117824ea4a96SDavid Greenman if (no_swap_space) { 11795663e6deSDavid Greenman for (i = 0; i < count; i++) 11805663e6deSDavid Greenman rtvals[i] = VM_PAGER_FAIL; 11815663e6deSDavid Greenman return VM_PAGER_FAIL; 11825663e6deSDavid Greenman } 1183e47ed70bSJohn Dyson 1184e47ed70bSJohn Dyson if (curproc != pageproc) 1185e47ed70bSJohn Dyson sync = TRUE; 118626f9a767SRodney W. Grimes 118726f9a767SRodney W. Grimes object = m[0]->object; 1188a316d390SJohn Dyson paging_pindex = OFF_TO_IDX(object->paging_offset); 118926f9a767SRodney W. Grimes 119026f9a767SRodney W. Grimes failed = 0; 119126f9a767SRodney W. Grimes for (j = 0; j < count; j++) { 1192a316d390SJohn Dyson fidx = m[j]->pindex + paging_pindex; 1193a316d390SJohn Dyson ix = swap_pager_block_index(fidx); 119426f9a767SRodney W. Grimes swb[j] = 0; 11952a4895f4SDavid Greenman if (ix >= object->un_pager.swp.swp_nblocks) { 119626f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_FAIL; 119726f9a767SRodney W. Grimes failed = 1; 119826f9a767SRodney W. Grimes continue; 119926f9a767SRodney W. Grimes } else { 120026f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_OK; 120126f9a767SRodney W. Grimes } 12022a4895f4SDavid Greenman swb[j] = &object->un_pager.swp.swp_blocks[ix]; 12032a4895f4SDavid Greenman swb[j]->swb_locked++; 120426f9a767SRodney W. Grimes if (failed) { 120526f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_FAIL; 120626f9a767SRodney W. Grimes continue; 120726f9a767SRodney W. Grimes } 1208a316d390SJohn Dyson off = swap_pager_block_offset(fidx); 120926f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 121026f9a767SRodney W. Grimes if (reqaddr[j] == SWB_EMPTY) { 1211a316d390SJohn Dyson daddr_t blk; 121226f9a767SRodney W. Grimes int tries; 121326f9a767SRodney W. Grimes int ntoget; 12140d94caffSDavid Greenman 121526f9a767SRodney W. Grimes tries = 0; 1216e47ed70bSJohn Dyson s = splvm(); 121726f9a767SRodney W. Grimes 1218df8bae1dSRodney W. Grimes /* 12190d94caffSDavid Greenman * if any other pages have been allocated in this 12200d94caffSDavid Greenman * block, we only try to get one page. 1221df8bae1dSRodney W. Grimes */ 122226f9a767SRodney W. Grimes for (i = 0; i < SWB_NPAGES; i++) { 122326f9a767SRodney W. Grimes if (swb[j]->swb_block[i] != SWB_EMPTY) 1224df8bae1dSRodney W. Grimes break; 1225df8bae1dSRodney W. Grimes } 122626f9a767SRodney W. Grimes 122726f9a767SRodney W. Grimes ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 122826f9a767SRodney W. Grimes /* 12290d94caffSDavid Greenman * this code is alittle conservative, but works (the 12300d94caffSDavid Greenman * intent of this code is to allocate small chunks for 12310d94caffSDavid Greenman * small objects) 123226f9a767SRodney W. Grimes */ 1233a316d390SJohn Dyson if ((off == 0) && ((fidx + ntoget) > object->size)) { 1234a316d390SJohn Dyson ntoget = object->size - fidx; 123526f9a767SRodney W. Grimes } 123626f9a767SRodney W. Grimes retrygetspace: 123726f9a767SRodney W. Grimes if (!swap_pager_full && ntoget > 1 && 1238a316d390SJohn Dyson swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1239a316d390SJohn Dyson &blk)) { 124026f9a767SRodney W. Grimes 124126f9a767SRodney W. Grimes for (i = 0; i < ntoget; i++) { 124226f9a767SRodney W. Grimes swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 124326f9a767SRodney W. Grimes swb[j]->swb_valid = 0; 124426f9a767SRodney W. Grimes } 124526f9a767SRodney W. Grimes 124626f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 12472a4895f4SDavid Greenman } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 124826f9a767SRodney W. Grimes &swb[j]->swb_block[off])) { 124926f9a767SRodney W. Grimes /* 12500d94caffSDavid Greenman * if the allocation has failed, we try to 12510d94caffSDavid Greenman * reclaim space and retry. 125226f9a767SRodney W. Grimes */ 125326f9a767SRodney W. Grimes if (++tries == 1) { 125426f9a767SRodney W. Grimes swap_pager_reclaim(); 125526f9a767SRodney W. Grimes goto retrygetspace; 125626f9a767SRodney W. Grimes } 125726f9a767SRodney W. Grimes rtvals[j] = VM_PAGER_AGAIN; 125826f9a767SRodney W. Grimes failed = 1; 125924ea4a96SDavid Greenman swap_pager_full = 1; 126026f9a767SRodney W. Grimes } else { 126126f9a767SRodney W. Grimes reqaddr[j] = swb[j]->swb_block[off]; 126226f9a767SRodney W. Grimes swb[j]->swb_valid &= ~(1 << off); 1263df8bae1dSRodney W. Grimes } 1264df8bae1dSRodney W. Grimes splx(s); 126526f9a767SRodney W. Grimes } 126626f9a767SRodney W. Grimes } 126726f9a767SRodney W. Grimes 126826f9a767SRodney W. Grimes /* 126926f9a767SRodney W. Grimes * search forwards for the last contiguous page to transfer 127026f9a767SRodney W. Grimes */ 127126f9a767SRodney W. Grimes failed = 0; 127226f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 1273a316d390SJohn Dyson if (failed || 1274a316d390SJohn Dyson (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1275a316d390SJohn Dyson ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 127626f9a767SRodney W. Grimes (rtvals[i] != VM_PAGER_OK)) { 127726f9a767SRodney W. Grimes failed = 1; 127826f9a767SRodney W. Grimes if (rtvals[i] == VM_PAGER_OK) 127926f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_AGAIN; 128026f9a767SRodney W. Grimes } 128126f9a767SRodney W. Grimes } 128226f9a767SRodney W. Grimes 1283e736cd05SJohn Dyson ix = 0; 1284e736cd05SJohn Dyson firstidx = -1; 128526f9a767SRodney W. Grimes for (i = 0; i < count; i++) { 1286e736cd05SJohn Dyson if (rtvals[i] == VM_PAGER_OK) { 1287e736cd05SJohn Dyson ix++; 1288e736cd05SJohn Dyson if (firstidx == -1) { 1289e736cd05SJohn Dyson firstidx = i; 129026f9a767SRodney W. Grimes } 1291e736cd05SJohn Dyson } else if (firstidx >= 0) { 129226f9a767SRodney W. Grimes break; 1293e736cd05SJohn Dyson } 1294e736cd05SJohn Dyson } 129526f9a767SRodney W. Grimes 1296e736cd05SJohn Dyson if (firstidx == -1) { 1297e47ed70bSJohn Dyson for (i = 0; i < count; i++) { 1298e47ed70bSJohn Dyson if (rtvals[i] == VM_PAGER_OK) 1299e47ed70bSJohn Dyson rtvals[i] = VM_PAGER_AGAIN; 1300e736cd05SJohn Dyson } 130126f9a767SRodney W. Grimes return VM_PAGER_AGAIN; 130226f9a767SRodney W. Grimes } 1303e736cd05SJohn Dyson 1304e736cd05SJohn Dyson lastidx = firstidx + ix; 1305e736cd05SJohn Dyson 1306e47ed70bSJohn Dyson if (ix > max_pageout_cluster) { 1307e47ed70bSJohn Dyson for (i = firstidx + max_pageout_cluster; i < lastidx; i++) { 1308e47ed70bSJohn Dyson if (rtvals[i] == VM_PAGER_OK) 1309e47ed70bSJohn Dyson rtvals[i] = VM_PAGER_AGAIN; 1310e47ed70bSJohn Dyson } 1311e47ed70bSJohn Dyson ix = max_pageout_cluster; 1312e47ed70bSJohn Dyson lastidx = firstidx + ix; 1313e47ed70bSJohn Dyson } 1314e47ed70bSJohn Dyson 1315e736cd05SJohn Dyson for (i = 0; i < firstidx; i++) { 1316e736cd05SJohn Dyson if (swb[i]) 1317e736cd05SJohn Dyson swb[i]->swb_locked--; 1318e736cd05SJohn Dyson } 1319e736cd05SJohn Dyson 1320e736cd05SJohn Dyson for (i = lastidx; i < count; i++) { 1321e736cd05SJohn Dyson if (swb[i]) 1322e736cd05SJohn Dyson swb[i]->swb_locked--; 1323e736cd05SJohn Dyson } 1324e736cd05SJohn Dyson 1325e47ed70bSJohn Dyson #if defined(DIAGNOSTIC) 1326e736cd05SJohn Dyson for (i = firstidx; i < lastidx; i++) { 1327a316d390SJohn Dyson if (reqaddr[i] == SWB_EMPTY) { 1328a316d390SJohn Dyson printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1329a316d390SJohn Dyson m[i]->pindex, i); 1330a316d390SJohn Dyson } 133126f9a767SRodney W. Grimes } 1332e47ed70bSJohn Dyson #endif 133326f9a767SRodney W. Grimes 133426f9a767SRodney W. Grimes /* 1335e47ed70bSJohn Dyson * Clean up all completed async pageouts. 133626f9a767SRodney W. Grimes */ 1337e47ed70bSJohn Dyson if (swap_pager_free_pending) 133824a1cce3SDavid Greenman swap_pager_sync(); 133926f9a767SRodney W. Grimes 134026f9a767SRodney W. Grimes /* 134126f9a767SRodney W. Grimes * get a swap pager clean data structure, block until we get it 134226f9a767SRodney W. Grimes */ 13430d94caffSDavid Greenman if (curproc == pageproc) { 1344e47ed70bSJohn Dyson if (swap_pager_free_count == 0) { 1345e47ed70bSJohn Dyson s = splvm(); 1346e47ed70bSJohn Dyson while (swap_pager_free_count == 0) { 1347e47ed70bSJohn Dyson swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; 1348cb6962cdSJohn Dyson /* 1349cb6962cdSJohn Dyson * if it does not get one within a short time, then 1350cb6962cdSJohn Dyson * there is a potential deadlock, so we go-on trying 1351bd7e5f99SJohn Dyson * to free pages. It is important to block here as opposed 1352bd7e5f99SJohn Dyson * to returning, thereby allowing the pageout daemon to continue. 1353bd7e5f99SJohn Dyson * It is likely that pageout daemon will start suboptimally 1354bd7e5f99SJohn Dyson * reclaiming vnode backed pages if we don't block. Since the 1355bd7e5f99SJohn Dyson * I/O subsystem is probably already fully utilized, might as 1356bd7e5f99SJohn Dyson * well wait. 1357cb6962cdSJohn Dyson */ 1358e47ed70bSJohn Dyson if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) { 1359e47ed70bSJohn Dyson if (swap_pager_free_pending) 136024a1cce3SDavid Greenman swap_pager_sync(); 1361e47ed70bSJohn Dyson if (swap_pager_free_count == 0) { 1362e736cd05SJohn Dyson for (i = firstidx; i < lastidx; i++) { 1363e736cd05SJohn Dyson rtvals[i] = VM_PAGER_AGAIN; 1364e736cd05SJohn Dyson } 13650d94caffSDavid Greenman splx(s); 13660d94caffSDavid Greenman return VM_PAGER_AGAIN; 1367cb6962cdSJohn Dyson } 1368bd7e5f99SJohn Dyson } else { 1369bd7e5f99SJohn Dyson swap_pager_sync(); 137026f9a767SRodney W. Grimes } 1371bd7e5f99SJohn Dyson } 137226f9a767SRodney W. Grimes splx(s); 137326f9a767SRodney W. Grimes } 1374e47ed70bSJohn Dyson 1375b18bfc3dSJohn Dyson spc = TAILQ_FIRST(&swap_pager_free); 1376e47ed70bSJohn Dyson #if defined(DIAGNOSTIC) 13773091ee09SJohn Dyson if (spc == NULL) 1378e736cd05SJohn Dyson panic("swap_pager_putpages: free queue is empty, %d expected\n", 1379e736cd05SJohn Dyson swap_pager_free_count); 1380e47ed70bSJohn Dyson #endif 138126f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 13823091ee09SJohn Dyson swap_pager_free_count--; 1383fff93ab6SDavid Greenman 138426f9a767SRodney W. Grimes kva = spc->spc_kva; 1385e47ed70bSJohn Dyson bp = spc->spc_bp; 1386e47ed70bSJohn Dyson bzero(bp, sizeof *bp); 1387e47ed70bSJohn Dyson bp->b_spc = spc; 1388e47ed70bSJohn Dyson bp->b_vnbufs.le_next = NOLIST; 1389e47ed70bSJohn Dyson bp->b_data = (caddr_t) kva; 1390e47ed70bSJohn Dyson } else { 1391e47ed70bSJohn Dyson spc = NULL; 1392e47ed70bSJohn Dyson bp = getpbuf(); 1393e47ed70bSJohn Dyson kva = (vm_offset_t) bp->b_data; 1394e47ed70bSJohn Dyson bp->b_spc = NULL; 1395e47ed70bSJohn Dyson } 139626f9a767SRodney W. Grimes 139726f9a767SRodney W. Grimes /* 139826f9a767SRodney W. Grimes * map our page(s) into kva for I/O 139926f9a767SRodney W. Grimes */ 1400e736cd05SJohn Dyson pmap_qenter(kva, &m[firstidx], ix); 140126f9a767SRodney W. Grimes 140226f9a767SRodney W. Grimes /* 140326f9a767SRodney W. Grimes * get the base I/O offset into the swap file 140426f9a767SRodney W. Grimes */ 1405e736cd05SJohn Dyson for (i = firstidx; i < lastidx ; i++) { 1406a316d390SJohn Dyson fidx = m[i]->pindex + paging_pindex; 1407a316d390SJohn Dyson off = swap_pager_block_offset(fidx); 140826f9a767SRodney W. Grimes /* 140926f9a767SRodney W. Grimes * set the valid bit 141026f9a767SRodney W. Grimes */ 141126f9a767SRodney W. Grimes swb[i]->swb_valid |= (1 << off); 141226f9a767SRodney W. Grimes /* 141326f9a767SRodney W. Grimes * and unlock the data structure 141426f9a767SRodney W. Grimes */ 14152a4895f4SDavid Greenman swb[i]->swb_locked--; 141626f9a767SRodney W. Grimes } 141726f9a767SRodney W. Grimes 1418aba8f38eSDavid Greenman bp->b_flags = B_BUSY | B_PAGING; 141926f9a767SRodney W. Grimes bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 142026f9a767SRodney W. Grimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1421a481f200SDavid Greenman if (bp->b_rcred != NOCRED) 142226f9a767SRodney W. Grimes crhold(bp->b_rcred); 1423a481f200SDavid Greenman if (bp->b_wcred != NOCRED) 142426f9a767SRodney W. Grimes crhold(bp->b_wcred); 1425e736cd05SJohn Dyson bp->b_blkno = reqaddr[firstidx]; 14260d94caffSDavid Greenman pbgetvp(swapdev_vp, bp); 142716f62314SDavid Greenman 1428e736cd05SJohn Dyson bp->b_bcount = PAGE_SIZE * ix; 1429e736cd05SJohn Dyson bp->b_bufsize = PAGE_SIZE * ix; 1430e47ed70bSJohn Dyson 1431e47ed70bSJohn Dyson 1432e47ed70bSJohn Dyson s = splvm(); 143326f9a767SRodney W. Grimes swapdev_vp->v_numoutput++; 143426f9a767SRodney W. Grimes 143526f9a767SRodney W. Grimes /* 14360d94caffSDavid Greenman * If this is an async write we set up additional buffer fields and 14370d94caffSDavid Greenman * place a "cleaning" entry on the inuse queue. 143826f9a767SRodney W. Grimes */ 1439e47ed70bSJohn Dyson object->un_pager.swp.swp_poip++; 1440e47ed70bSJohn Dyson 1441e47ed70bSJohn Dyson if (spc) { 144226f9a767SRodney W. Grimes spc->spc_flags = 0; 14432a4895f4SDavid Greenman spc->spc_object = object; 1444e47ed70bSJohn Dyson bp->b_npages = ix; 1445e47ed70bSJohn Dyson for (i = firstidx; i < lastidx; i++) { 144626f9a767SRodney W. Grimes spc->spc_m[i] = m[i]; 1447e47ed70bSJohn Dyson bp->b_pages[i - firstidx] = m[i]; 1448e47ed70bSJohn Dyson vm_page_protect(m[i], VM_PROT_READ); 1449e47ed70bSJohn Dyson pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1450e47ed70bSJohn Dyson m[i]->dirty = 0; 1451e47ed70bSJohn Dyson } 1452e736cd05SJohn Dyson spc->spc_first = firstidx; 1453e736cd05SJohn Dyson spc->spc_count = ix; 145426f9a767SRodney W. Grimes /* 145526f9a767SRodney W. Grimes * the completion routine for async writes 145626f9a767SRodney W. Grimes */ 145726f9a767SRodney W. Grimes bp->b_flags |= B_CALL; 145826f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone; 145926f9a767SRodney W. Grimes bp->b_dirtyoff = 0; 146026f9a767SRodney W. Grimes bp->b_dirtyend = bp->b_bcount; 146126f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 146226f9a767SRodney W. Grimes } else { 146326f9a767SRodney W. Grimes bp->b_flags |= B_CALL; 146426f9a767SRodney W. Grimes bp->b_iodone = swap_pager_iodone1; 1465e47ed70bSJohn Dyson bp->b_npages = ix; 1466e47ed70bSJohn Dyson for (i = firstidx; i < lastidx; i++) 1467e47ed70bSJohn Dyson bp->b_pages[i - firstidx] = m[i]; 146826f9a767SRodney W. Grimes } 1469976e77fcSDavid Greenman 1470976e77fcSDavid Greenman cnt.v_swapout++; 1471e736cd05SJohn Dyson cnt.v_swappgsout += ix; 1472e47ed70bSJohn Dyson 147326f9a767SRodney W. Grimes /* 147426f9a767SRodney W. Grimes * perform the I/O 147526f9a767SRodney W. Grimes */ 147626f9a767SRodney W. Grimes VOP_STRATEGY(bp); 147724a1cce3SDavid Greenman if (sync == FALSE) { 1478e47ed70bSJohn Dyson if (swap_pager_free_pending) { 147924a1cce3SDavid Greenman swap_pager_sync(); 148026f9a767SRodney W. Grimes } 1481e736cd05SJohn Dyson for (i = firstidx; i < lastidx; i++) { 148226f9a767SRodney W. Grimes rtvals[i] = VM_PAGER_PEND; 148326f9a767SRodney W. Grimes } 148426f9a767SRodney W. Grimes return VM_PAGER_PEND; 148526f9a767SRodney W. Grimes } 1486e47ed70bSJohn Dyson 1487e47ed70bSJohn Dyson s = splvm(); 148826f9a767SRodney W. Grimes /* 148926f9a767SRodney W. Grimes * wait for the sync I/O to complete 149026f9a767SRodney W. Grimes */ 149126f9a767SRodney W. Grimes while ((bp->b_flags & B_DONE) == 0) { 149224a1cce3SDavid Greenman tsleep(bp, PVM, "swwrt", 0); 149326f9a767SRodney W. Grimes } 1494e47ed70bSJohn Dyson 14951b119d9dSDavid Greenman if (bp->b_flags & B_ERROR) { 14961b119d9dSDavid Greenman printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 14971b119d9dSDavid Greenman bp->b_blkno, bp->b_bcount, bp->b_error); 1498a83c285cSDavid Greenman rv = VM_PAGER_ERROR; 14991b119d9dSDavid Greenman } else { 15001b119d9dSDavid Greenman rv = VM_PAGER_OK; 15011b119d9dSDavid Greenman } 150226f9a767SRodney W. Grimes 15032a4895f4SDavid Greenman object->un_pager.swp.swp_poip--; 15042a4895f4SDavid Greenman if (object->un_pager.swp.swp_poip == 0) 15052a4895f4SDavid Greenman wakeup(object); 150626f9a767SRodney W. Grimes 150726f9a767SRodney W. Grimes if (bp->b_vp) 15080d94caffSDavid Greenman pbrelvp(bp); 150926f9a767SRodney W. Grimes 151026f9a767SRodney W. Grimes splx(s); 151126f9a767SRodney W. Grimes 151226f9a767SRodney W. Grimes /* 151326f9a767SRodney W. Grimes * remove the mapping for kernel virtual 151426f9a767SRodney W. Grimes */ 1515e736cd05SJohn Dyson pmap_qremove(kva, ix); 151626f9a767SRodney W. Grimes 151726f9a767SRodney W. Grimes /* 15180d94caffSDavid Greenman * if we have written the page, then indicate that the page is clean. 151926f9a767SRodney W. Grimes */ 152026f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 1521e736cd05SJohn Dyson for (i = firstidx; i < lastidx; i++) { 152226f9a767SRodney W. Grimes if (rtvals[i] == VM_PAGER_OK) { 152367bf6868SJohn Dyson pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 15240d94caffSDavid Greenman m[i]->dirty = 0; 152526f9a767SRodney W. Grimes /* 15260d94caffSDavid Greenman * optimization, if a page has been read 15270d94caffSDavid Greenman * during the pageout process, we activate it. 152826f9a767SRodney W. Grimes */ 1529eaf13dd7SJohn Dyson if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 15309b5a5d81SJohn Dyson pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) { 153126f9a767SRodney W. Grimes vm_page_activate(m[i]); 153226f9a767SRodney W. Grimes } 153326f9a767SRodney W. Grimes } 15347fb0c17eSDavid Greenman } 153526f9a767SRodney W. Grimes } else { 1536e736cd05SJohn Dyson for (i = firstidx; i < lastidx; i++) { 153726f9a767SRodney W. Grimes rtvals[i] = rv; 153826f9a767SRodney W. Grimes } 153926f9a767SRodney W. Grimes } 154026f9a767SRodney W. Grimes 154126f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 154226f9a767SRodney W. Grimes crfree(bp->b_rcred); 154326f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 154426f9a767SRodney W. Grimes crfree(bp->b_wcred); 1545e47ed70bSJohn Dyson 1546e47ed70bSJohn Dyson spc_free(spc); 1547e47ed70bSJohn Dyson if (swap_pager_free_pending) 1548e47ed70bSJohn Dyson swap_pager_sync(); 1549e47ed70bSJohn Dyson 155026f9a767SRodney W. Grimes return (rv); 155126f9a767SRodney W. Grimes } 155226f9a767SRodney W. Grimes 1553f708ef1bSPoul-Henning Kamp static void 155424a1cce3SDavid Greenman swap_pager_sync() 155526f9a767SRodney W. Grimes { 1556e47ed70bSJohn Dyson swp_clean_t spc; 155726f9a767SRodney W. Grimes 1558e47ed70bSJohn Dyson while (spc = TAILQ_FIRST(&swap_pager_done)) { 155926f9a767SRodney W. Grimes swap_pager_finish(spc); 156026f9a767SRodney W. Grimes } 156124a1cce3SDavid Greenman return; 156226f9a767SRodney W. Grimes } 156326f9a767SRodney W. Grimes 1564e47ed70bSJohn Dyson static void 156526f9a767SRodney W. Grimes swap_pager_finish(spc) 156626f9a767SRodney W. Grimes register swp_clean_t spc; 156726f9a767SRodney W. Grimes { 1568e47ed70bSJohn Dyson int i, s, lastidx; 1569e47ed70bSJohn Dyson vm_object_t object; 1570e47ed70bSJohn Dyson vm_page_t *ma; 1571e47ed70bSJohn Dyson 1572e47ed70bSJohn Dyson ma = spc->spc_m; 1573e47ed70bSJohn Dyson object = ma[spc->spc_first]->object; 1574e47ed70bSJohn Dyson lastidx = spc->spc_first + spc->spc_count; 1575e47ed70bSJohn Dyson 1576e47ed70bSJohn Dyson s = splvm(); 1577e47ed70bSJohn Dyson TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1578e47ed70bSJohn Dyson splx(s); 1579e47ed70bSJohn Dyson 1580e47ed70bSJohn Dyson pmap_qremove(spc->spc_kva, spc->spc_count); 1581e47ed70bSJohn Dyson 1582e47ed70bSJohn Dyson /* 1583e47ed70bSJohn Dyson * If no error, mark as clean and inform the pmap system. If error, 1584e47ed70bSJohn Dyson * mark as dirty so we will try again. (XXX could get stuck doing 1585e47ed70bSJohn Dyson * this, should give up after awhile) 1586e47ed70bSJohn Dyson */ 1587e47ed70bSJohn Dyson if (spc->spc_flags & SPC_ERROR) { 1588e47ed70bSJohn Dyson 1589e47ed70bSJohn Dyson for (i = spc->spc_first; i < lastidx; i++) { 1590e47ed70bSJohn Dyson printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1591e47ed70bSJohn Dyson (u_long) VM_PAGE_TO_PHYS(ma[i])); 1592e47ed70bSJohn Dyson ma[i]->dirty = VM_PAGE_BITS_ALL; 1593e47ed70bSJohn Dyson PAGE_WAKEUP(ma[i]); 1594e47ed70bSJohn Dyson } 159526f9a767SRodney W. Grimes 1596c0503609SDavid Greenman object->paging_in_progress -= spc->spc_count; 1597c0503609SDavid Greenman if ((object->paging_in_progress == 0) && 1598c0503609SDavid Greenman (object->flags & OBJ_PIPWNT)) { 1599c0503609SDavid Greenman object->flags &= ~OBJ_PIPWNT; 160024a1cce3SDavid Greenman wakeup(object); 1601c0503609SDavid Greenman } 1602df8bae1dSRodney W. Grimes 1603df8bae1dSRodney W. Grimes } else { 1604e736cd05SJohn Dyson for (i = spc->spc_first; i < lastidx; i++) { 1605e736cd05SJohn Dyson if ((ma[i]->queue != PQ_ACTIVE) && 1606e736cd05SJohn Dyson ((ma[i]->flags & PG_WANTED) || 1607e736cd05SJohn Dyson pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) 1608e736cd05SJohn Dyson vm_page_activate(ma[i]); 1609df8bae1dSRodney W. Grimes } 1610df8bae1dSRodney W. Grimes } 1611df8bae1dSRodney W. Grimes 161226f9a767SRodney W. Grimes nswiodone -= spc->spc_count; 1613e47ed70bSJohn Dyson swap_pager_free_pending--; 1614e47ed70bSJohn Dyson spc_free(spc); 1615df8bae1dSRodney W. Grimes 1616df8bae1dSRodney W. Grimes return; 161726f9a767SRodney W. Grimes } 1618df8bae1dSRodney W. Grimes 161926f9a767SRodney W. Grimes /* 162026f9a767SRodney W. Grimes * swap_pager_iodone 162126f9a767SRodney W. Grimes */ 1622f5a12711SPoul-Henning Kamp static void 1623df8bae1dSRodney W. Grimes swap_pager_iodone(bp) 1624df8bae1dSRodney W. Grimes register struct buf *bp; 1625df8bae1dSRodney W. Grimes { 1626e47ed70bSJohn Dyson int i, s; 1627df8bae1dSRodney W. Grimes register swp_clean_t spc; 1628e47ed70bSJohn Dyson vm_object_t object; 1629df8bae1dSRodney W. Grimes 1630e47ed70bSJohn Dyson s = splvm(); 163126f9a767SRodney W. Grimes spc = (swp_clean_t) bp->b_spc; 163226f9a767SRodney W. Grimes TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 163326f9a767SRodney W. Grimes TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1634e47ed70bSJohn Dyson 1635e47ed70bSJohn Dyson object = bp->b_pages[0]->object; 1636e47ed70bSJohn Dyson 1637e47ed70bSJohn Dyson #if defined(DIAGNOSTIC) 1638e47ed70bSJohn Dyson if (object->paging_in_progress < spc->spc_count) 1639e47ed70bSJohn Dyson printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n", 1640e47ed70bSJohn Dyson object->paging_in_progress, spc->spc_count); 1641e47ed70bSJohn Dyson #endif 1642e47ed70bSJohn Dyson 164326f9a767SRodney W. Grimes if (bp->b_flags & B_ERROR) { 1644df8bae1dSRodney W. Grimes spc->spc_flags |= SPC_ERROR; 1645c3a1e425SDavid Greenman printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 16461b119d9dSDavid Greenman (bp->b_flags & B_READ) ? "pagein" : "pageout", 1647c3a1e425SDavid Greenman (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1648e47ed70bSJohn Dyson } else { 1649e47ed70bSJohn Dyson for (i = 0; i < bp->b_npages; i++) { 1650e47ed70bSJohn Dyson /* 1651e47ed70bSJohn Dyson * we wakeup any processes that are waiting on these pages. 1652e47ed70bSJohn Dyson */ 1653e47ed70bSJohn Dyson PAGE_WAKEUP(bp->b_pages[i]); 1654e47ed70bSJohn Dyson } 1655e47ed70bSJohn Dyson 1656e47ed70bSJohn Dyson object->paging_in_progress -= spc->spc_count; 1657e47ed70bSJohn Dyson if ((object->paging_in_progress == 0) && 1658e47ed70bSJohn Dyson (object->flags & OBJ_PIPWNT)) { 1659e47ed70bSJohn Dyson object->flags &= ~OBJ_PIPWNT; 1660e47ed70bSJohn Dyson wakeup(object); 1661e47ed70bSJohn Dyson } 1662df8bae1dSRodney W. Grimes } 166326f9a767SRodney W. Grimes 16640d94caffSDavid Greenman if (bp->b_vp) 16650d94caffSDavid Greenman pbrelvp(bp); 16660d94caffSDavid Greenman 166726f9a767SRodney W. Grimes if (bp->b_rcred != NOCRED) 166826f9a767SRodney W. Grimes crfree(bp->b_rcred); 166926f9a767SRodney W. Grimes if (bp->b_wcred != NOCRED) 167026f9a767SRodney W. Grimes crfree(bp->b_wcred); 167126f9a767SRodney W. Grimes 167226f9a767SRodney W. Grimes nswiodone += spc->spc_count; 1673e47ed70bSJohn Dyson swap_pager_free_pending++; 16742a4895f4SDavid Greenman if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 16752a4895f4SDavid Greenman wakeup(spc->spc_object); 167626f9a767SRodney W. Grimes } 1677e47ed70bSJohn Dyson 1678e47ed70bSJohn Dyson if (swap_pager_needflags && 1679e47ed70bSJohn Dyson ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) { 1680e47ed70bSJohn Dyson spc_wakeup(); 1681a1f6d91cSDavid Greenman } 1682a1f6d91cSDavid Greenman 1683e47ed70bSJohn Dyson if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) && 1684e47ed70bSJohn Dyson vm_pageout_pages_needed) { 168524a1cce3SDavid Greenman wakeup(&vm_pageout_pages_needed); 1686a1f6d91cSDavid Greenman vm_pageout_pages_needed = 0; 168726f9a767SRodney W. Grimes } 1688e47ed70bSJohn Dyson 168926f9a767SRodney W. Grimes splx(s); 169026f9a767SRodney W. Grimes } 1691