1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 426f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 526f9a767SRodney W. Grimes * All rights reserved. 626f9a767SRodney W. Grimes * Copyright (c) 1994 David Greenman 726f9a767SRodney W. Grimes * All rights reserved. 826f9a767SRodney W. Grimes * 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 11df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 12df8bae1dSRodney W. Grimes * 13df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 14df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 15df8bae1dSRodney W. Grimes * are met: 16df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 18df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 19df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 20df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 21df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 22df8bae1dSRodney W. Grimes * must display the following acknowledgement: 23df8bae1dSRodney W. Grimes * This product includes software developed by the University of 24df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 25df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 26df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 27df8bae1dSRodney W. Grimes * without specific prior written permission. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39df8bae1dSRodney W. Grimes * SUCH DAMAGE. 40df8bae1dSRodney W. Grimes * 413c4dd356SDavid Greenman * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42df8bae1dSRodney W. Grimes * 43df8bae1dSRodney W. Grimes * 44df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45df8bae1dSRodney W. Grimes * All rights reserved. 46df8bae1dSRodney W. Grimes * 47df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48df8bae1dSRodney W. Grimes * 49df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 50df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 51df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 52df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 53df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 54df8bae1dSRodney W. Grimes * 55df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 60df8bae1dSRodney W. Grimes * 61df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62df8bae1dSRodney W. Grimes * School of Computer Science 63df8bae1dSRodney W. Grimes * Carnegie Mellon University 64df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 65df8bae1dSRodney W. Grimes * 66df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 67df8bae1dSRodney W. Grimes * rights to redistribute these changes. 683c4dd356SDavid Greenman * 696d40c3d3SDavid Greenman * $Id: vm_fault.c,v 1.15 1995/01/10 07:32:45 davidg Exp $ 70df8bae1dSRodney W. Grimes */ 71df8bae1dSRodney W. Grimes 72df8bae1dSRodney W. Grimes /* 73df8bae1dSRodney W. Grimes * Page fault handling module. 74df8bae1dSRodney W. Grimes */ 75df8bae1dSRodney W. Grimes 76df8bae1dSRodney W. Grimes #include <sys/param.h> 77df8bae1dSRodney W. Grimes #include <sys/systm.h> 7826f9a767SRodney W. Grimes #include <sys/proc.h> 7905f0fdd2SPoul-Henning Kamp #include <sys/resource.h> 8005f0fdd2SPoul-Henning Kamp #include <sys/signalvar.h> 8126f9a767SRodney W. Grimes #include <sys/resourcevar.h> 82df8bae1dSRodney W. Grimes 83df8bae1dSRodney W. Grimes #include <vm/vm.h> 84df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 85df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 86a83c285cSDavid Greenman #include <vm/vm_kern.h> 87df8bae1dSRodney W. Grimes 8805f0fdd2SPoul-Henning Kamp int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int, vm_page_t *, int *)); 8926f9a767SRodney W. Grimes 9026f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD 4 9126f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD_MIN 1 9226f9a767SRodney W. Grimes #define VM_FAULT_READ_BEHIND 3 9326f9a767SRodney W. Grimes #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 9426f9a767SRodney W. Grimes extern int swap_pager_full; 9526f9a767SRodney W. Grimes extern int vm_pageout_proc_limit; 9626f9a767SRodney W. Grimes 97df8bae1dSRodney W. Grimes /* 98df8bae1dSRodney W. Grimes * vm_fault: 99df8bae1dSRodney W. Grimes * 100df8bae1dSRodney W. Grimes * Handle a page fault occuring at the given address, 101df8bae1dSRodney W. Grimes * requiring the given permissions, in the map specified. 102df8bae1dSRodney W. Grimes * If successful, the page is inserted into the 103df8bae1dSRodney W. Grimes * associated physical map. 104df8bae1dSRodney W. Grimes * 105df8bae1dSRodney W. Grimes * NOTE: the given address should be truncated to the 106df8bae1dSRodney W. Grimes * proper page address. 107df8bae1dSRodney W. Grimes * 108df8bae1dSRodney W. Grimes * KERN_SUCCESS is returned if the page fault is handled; otherwise, 109df8bae1dSRodney W. Grimes * a standard error specifying why the fault is fatal is returned. 110df8bae1dSRodney W. Grimes * 111df8bae1dSRodney W. Grimes * 112df8bae1dSRodney W. Grimes * The map in question must be referenced, and remains so. 113df8bae1dSRodney W. Grimes * Caller may hold no locks. 114df8bae1dSRodney W. Grimes */ 115df8bae1dSRodney W. Grimes int 116df8bae1dSRodney W. Grimes vm_fault(map, vaddr, fault_type, change_wiring) 117df8bae1dSRodney W. Grimes vm_map_t map; 118df8bae1dSRodney W. Grimes vm_offset_t vaddr; 119df8bae1dSRodney W. Grimes vm_prot_t fault_type; 120df8bae1dSRodney W. Grimes boolean_t change_wiring; 121df8bae1dSRodney W. Grimes { 122df8bae1dSRodney W. Grimes vm_object_t first_object; 123df8bae1dSRodney W. Grimes vm_offset_t first_offset; 124df8bae1dSRodney W. Grimes vm_map_entry_t entry; 125df8bae1dSRodney W. Grimes register vm_object_t object; 126df8bae1dSRodney W. Grimes register vm_offset_t offset; 12726f9a767SRodney W. Grimes vm_page_t m; 128df8bae1dSRodney W. Grimes vm_page_t first_m; 129df8bae1dSRodney W. Grimes vm_prot_t prot; 130df8bae1dSRodney W. Grimes int result; 131df8bae1dSRodney W. Grimes boolean_t wired; 132df8bae1dSRodney W. Grimes boolean_t su; 133df8bae1dSRodney W. Grimes boolean_t lookup_still_valid; 134df8bae1dSRodney W. Grimes boolean_t page_exists; 135df8bae1dSRodney W. Grimes vm_page_t old_m; 136df8bae1dSRodney W. Grimes vm_object_t next_object; 13726f9a767SRodney W. Grimes vm_page_t marray[VM_FAULT_READ]; 13826f9a767SRodney W. Grimes int spl; 13926f9a767SRodney W. Grimes int hardfault = 0; 140df8bae1dSRodney W. Grimes 141b8d95f16SDavid Greenman cnt.v_vm_faults++; /* needs lock XXX */ 142df8bae1dSRodney W. Grimes /* 143df8bae1dSRodney W. Grimes * Recovery actions 144df8bae1dSRodney W. Grimes */ 145df8bae1dSRodney W. Grimes #define FREE_PAGE(m) { \ 146df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 147df8bae1dSRodney W. Grimes vm_page_lock_queues(); \ 148df8bae1dSRodney W. Grimes vm_page_free(m); \ 149df8bae1dSRodney W. Grimes vm_page_unlock_queues(); \ 150df8bae1dSRodney W. Grimes } 151df8bae1dSRodney W. Grimes 152df8bae1dSRodney W. Grimes #define RELEASE_PAGE(m) { \ 153df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 154df8bae1dSRodney W. Grimes vm_page_lock_queues(); \ 155df8bae1dSRodney W. Grimes vm_page_activate(m); \ 156df8bae1dSRodney W. Grimes vm_page_unlock_queues(); \ 157df8bae1dSRodney W. Grimes } 158df8bae1dSRodney W. Grimes 159df8bae1dSRodney W. Grimes #define UNLOCK_MAP { \ 160df8bae1dSRodney W. Grimes if (lookup_still_valid) { \ 161df8bae1dSRodney W. Grimes vm_map_lookup_done(map, entry); \ 162df8bae1dSRodney W. Grimes lookup_still_valid = FALSE; \ 163df8bae1dSRodney W. Grimes } \ 164df8bae1dSRodney W. Grimes } 165df8bae1dSRodney W. Grimes 166df8bae1dSRodney W. Grimes #define UNLOCK_THINGS { \ 167df8bae1dSRodney W. Grimes object->paging_in_progress--; \ 16826f9a767SRodney W. Grimes if (object->paging_in_progress == 0) \ 16926f9a767SRodney W. Grimes wakeup((caddr_t)object); \ 170df8bae1dSRodney W. Grimes vm_object_unlock(object); \ 171df8bae1dSRodney W. Grimes if (object != first_object) { \ 172df8bae1dSRodney W. Grimes vm_object_lock(first_object); \ 173df8bae1dSRodney W. Grimes FREE_PAGE(first_m); \ 174df8bae1dSRodney W. Grimes first_object->paging_in_progress--; \ 17526f9a767SRodney W. Grimes if (first_object->paging_in_progress == 0) \ 17626f9a767SRodney W. Grimes wakeup((caddr_t)first_object); \ 177df8bae1dSRodney W. Grimes vm_object_unlock(first_object); \ 178df8bae1dSRodney W. Grimes } \ 179df8bae1dSRodney W. Grimes UNLOCK_MAP; \ 180df8bae1dSRodney W. Grimes } 181df8bae1dSRodney W. Grimes 182df8bae1dSRodney W. Grimes #define UNLOCK_AND_DEALLOCATE { \ 183df8bae1dSRodney W. Grimes UNLOCK_THINGS; \ 184df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); \ 185df8bae1dSRodney W. Grimes } 186df8bae1dSRodney W. Grimes 18726f9a767SRodney W. Grimes 188df8bae1dSRodney W. Grimes RetryFault:; 189df8bae1dSRodney W. Grimes 190df8bae1dSRodney W. Grimes /* 1910d94caffSDavid Greenman * Find the backing store object and offset into it to begin the 1920d94caffSDavid Greenman * search. 193df8bae1dSRodney W. Grimes */ 194df8bae1dSRodney W. Grimes 1950d94caffSDavid Greenman if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, &first_object, 1960d94caffSDavid Greenman &first_offset, &prot, &wired, &su)) != KERN_SUCCESS) { 197df8bae1dSRodney W. Grimes return (result); 198df8bae1dSRodney W. Grimes } 199df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 200df8bae1dSRodney W. Grimes 201df8bae1dSRodney W. Grimes if (wired) 202df8bae1dSRodney W. Grimes fault_type = prot; 203df8bae1dSRodney W. Grimes 204df8bae1dSRodney W. Grimes first_m = NULL; 205df8bae1dSRodney W. Grimes 206df8bae1dSRodney W. Grimes /* 2070d94caffSDavid Greenman * Make a reference to this object to prevent its disposal while we 2080d94caffSDavid Greenman * are messing with it. Once we have the reference, the map is free 2090d94caffSDavid Greenman * to be diddled. Since objects reference their shadows (and copies), 2100d94caffSDavid Greenman * they will stay around as well. 211df8bae1dSRodney W. Grimes */ 212df8bae1dSRodney W. Grimes 213df8bae1dSRodney W. Grimes vm_object_lock(first_object); 214df8bae1dSRodney W. Grimes 215df8bae1dSRodney W. Grimes first_object->ref_count++; 216df8bae1dSRodney W. Grimes first_object->paging_in_progress++; 217df8bae1dSRodney W. Grimes 218df8bae1dSRodney W. Grimes /* 219df8bae1dSRodney W. Grimes * INVARIANTS (through entire routine): 220df8bae1dSRodney W. Grimes * 2210d94caffSDavid Greenman * 1) At all times, we must either have the object lock or a busy 2220d94caffSDavid Greenman * page in some object to prevent some other thread from trying to 2230d94caffSDavid Greenman * bring in the same page. 224df8bae1dSRodney W. Grimes * 2250d94caffSDavid Greenman * Note that we cannot hold any locks during the pager access or when 2260d94caffSDavid Greenman * waiting for memory, so we use a busy page then. 227df8bae1dSRodney W. Grimes * 2280d94caffSDavid Greenman * Note also that we aren't as concerned about more than one thead 2290d94caffSDavid Greenman * attempting to pager_data_unlock the same page at once, so we don't 2300d94caffSDavid Greenman * hold the page as busy then, but do record the highest unlock value 2310d94caffSDavid Greenman * so far. [Unlock requests may also be delivered out of order.] 232df8bae1dSRodney W. Grimes * 2330d94caffSDavid Greenman * 2) Once we have a busy page, we must remove it from the pageout 2340d94caffSDavid Greenman * queues, so that the pageout daemon will not grab it away. 235df8bae1dSRodney W. Grimes * 2360d94caffSDavid Greenman * 3) To prevent another thread from racing us down the shadow chain 2370d94caffSDavid Greenman * and entering a new page in the top object before we do, we must 2380d94caffSDavid Greenman * keep a busy page in the top object while following the shadow 2390d94caffSDavid Greenman * chain. 240df8bae1dSRodney W. Grimes * 2410d94caffSDavid Greenman * 4) We must increment paging_in_progress on any object for which 2420d94caffSDavid Greenman * we have a busy page, to prevent vm_object_collapse from removing 2430d94caffSDavid Greenman * the busy page without our noticing. 244df8bae1dSRodney W. Grimes */ 245df8bae1dSRodney W. Grimes 246df8bae1dSRodney W. Grimes /* 247df8bae1dSRodney W. Grimes * Search for the page at object/offset. 248df8bae1dSRodney W. Grimes */ 249df8bae1dSRodney W. Grimes 250df8bae1dSRodney W. Grimes object = first_object; 251df8bae1dSRodney W. Grimes offset = first_offset; 252df8bae1dSRodney W. Grimes 253df8bae1dSRodney W. Grimes /* 254df8bae1dSRodney W. Grimes * See whether this page is resident 255df8bae1dSRodney W. Grimes */ 256df8bae1dSRodney W. Grimes 257df8bae1dSRodney W. Grimes while (TRUE) { 258df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 259df8bae1dSRodney W. Grimes if (m != NULL) { 260df8bae1dSRodney W. Grimes /* 2610d94caffSDavid Greenman * If the page is being brought in, wait for it and 2620d94caffSDavid Greenman * then retry. 263df8bae1dSRodney W. Grimes */ 2640d94caffSDavid Greenman if ((m->flags & PG_BUSY) || m->busy) { 26516f62314SDavid Greenman int s; 2660d94caffSDavid Greenman 267df8bae1dSRodney W. Grimes UNLOCK_THINGS; 26816f62314SDavid Greenman s = splhigh(); 2690d94caffSDavid Greenman if ((m->flags & PG_BUSY) || m->busy) { 2700d94caffSDavid Greenman m->flags |= PG_WANTED | PG_REFERENCED; 271976e77fcSDavid Greenman cnt.v_intrans++; 27226f9a767SRodney W. Grimes tsleep((caddr_t) m, PSWP, "vmpfw", 0); 27326f9a767SRodney W. Grimes } 27416f62314SDavid Greenman splx(s); 275df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 276df8bae1dSRodney W. Grimes goto RetryFault; 277df8bae1dSRodney W. Grimes } 2780d94caffSDavid Greenman if ((m->flags & PG_CACHE) && 2790d94caffSDavid Greenman (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 2800d94caffSDavid Greenman UNLOCK_AND_DEALLOCATE; 2810d94caffSDavid Greenman VM_WAIT; 2820d94caffSDavid Greenman goto RetryFault; 2830d94caffSDavid Greenman } 284df8bae1dSRodney W. Grimes /* 2850d94caffSDavid Greenman * Remove the page from the pageout daemon's reach 2860d94caffSDavid Greenman * while we play with it. 287df8bae1dSRodney W. Grimes */ 288df8bae1dSRodney W. Grimes 289df8bae1dSRodney W. Grimes vm_page_lock_queues(); 2900d94caffSDavid Greenman vm_page_unqueue(m); 291df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 292df8bae1dSRodney W. Grimes 293df8bae1dSRodney W. Grimes /* 294df8bae1dSRodney W. Grimes * Mark page busy for other threads. 295df8bae1dSRodney W. Grimes */ 296df8bae1dSRodney W. Grimes m->flags |= PG_BUSY; 2970d94caffSDavid Greenman if (m->object != kernel_object && m->object != kmem_object && 2980d94caffSDavid Greenman m->valid && 2990d94caffSDavid Greenman ((m->valid & vm_page_bits(0, PAGE_SIZE)) 3000d94caffSDavid Greenman != vm_page_bits(0, PAGE_SIZE))) { 3010d94caffSDavid Greenman goto readrest; 3020d94caffSDavid Greenman } 303df8bae1dSRodney W. Grimes break; 304df8bae1dSRodney W. Grimes } 3050d94caffSDavid Greenman if (((object->pager != NULL) && (!change_wiring || wired)) 306df8bae1dSRodney W. Grimes || (object == first_object)) { 307df8bae1dSRodney W. Grimes 30847c9acfdSDavid Greenman if (swap_pager_full && !object->shadow && (!object->pager || 30926f9a767SRodney W. Grimes (object->pager && object->pager->pg_type == PG_SWAP && 31026f9a767SRodney W. Grimes !vm_pager_has_page(object->pager, offset + object->paging_offset)))) { 3110d94caffSDavid Greenman if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) { /* XXX */ 31205f0fdd2SPoul-Henning Kamp printf("Process %lu killed by vm_fault -- out of swap\n", (u_long) curproc->p_pid); 31326f9a767SRodney W. Grimes psignal(curproc, SIGKILL); 31426f9a767SRodney W. Grimes curproc->p_estcpu = 0; 31526f9a767SRodney W. Grimes curproc->p_nice = PRIO_MIN; 316da8b3304SDavid Greenman resetpriority(curproc); 31726f9a767SRodney W. Grimes } 31826f9a767SRodney W. Grimes } 319df8bae1dSRodney W. Grimes /* 3200d94caffSDavid Greenman * Allocate a new page for this object/offset pair. 321df8bae1dSRodney W. Grimes */ 322df8bae1dSRodney W. Grimes 3236d40c3d3SDavid Greenman m = vm_page_alloc(object, offset, VM_ALLOC_NORMAL); 324df8bae1dSRodney W. Grimes 325df8bae1dSRodney W. Grimes if (m == NULL) { 326df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 327df8bae1dSRodney W. Grimes VM_WAIT; 328df8bae1dSRodney W. Grimes goto RetryFault; 329df8bae1dSRodney W. Grimes } 330df8bae1dSRodney W. Grimes } 3310d94caffSDavid Greenman readrest: 332df8bae1dSRodney W. Grimes if (object->pager != NULL && (!change_wiring || wired)) { 333df8bae1dSRodney W. Grimes int rv; 33426f9a767SRodney W. Grimes int faultcount; 33526f9a767SRodney W. Grimes int reqpage; 336df8bae1dSRodney W. Grimes 337df8bae1dSRodney W. Grimes /* 3380d94caffSDavid Greenman * Now that we have a busy page, we can release the 3390d94caffSDavid Greenman * object lock. 340df8bae1dSRodney W. Grimes */ 341df8bae1dSRodney W. Grimes vm_object_unlock(object); 34226f9a767SRodney W. Grimes /* 3430d94caffSDavid Greenman * now we find out if any other pages should be paged 3440d94caffSDavid Greenman * in at this time this routine checks to see if the 3450d94caffSDavid Greenman * pages surrounding this fault reside in the same 3460d94caffSDavid Greenman * object as the page for this fault. If they do, 3470d94caffSDavid Greenman * then they are faulted in also into the object. The 3480d94caffSDavid Greenman * array "marray" returned contains an array of 3490d94caffSDavid Greenman * vm_page_t structs where one of them is the 3500d94caffSDavid Greenman * vm_page_t passed to the routine. The reqpage 3510d94caffSDavid Greenman * return value is the index into the marray for the 3520d94caffSDavid Greenman * vm_page_t passed to the routine. 35326f9a767SRodney W. Grimes */ 35405f0fdd2SPoul-Henning Kamp faultcount = vm_fault_additional_pages( 35505f0fdd2SPoul-Henning Kamp first_object, first_offset, 35605f0fdd2SPoul-Henning Kamp m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, 35705f0fdd2SPoul-Henning Kamp marray, &reqpage); 358df8bae1dSRodney W. Grimes 359df8bae1dSRodney W. Grimes /* 3600d94caffSDavid Greenman * Call the pager to retrieve the data, if any, after 3610d94caffSDavid Greenman * releasing the lock on the map. 362df8bae1dSRodney W. Grimes */ 363df8bae1dSRodney W. Grimes UNLOCK_MAP; 364df8bae1dSRodney W. Grimes 36526f9a767SRodney W. Grimes rv = faultcount ? 36626f9a767SRodney W. Grimes vm_pager_get_pages(object->pager, 36726f9a767SRodney W. Grimes marray, faultcount, reqpage, TRUE) : VM_PAGER_FAIL; 36826f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 369df8bae1dSRodney W. Grimes /* 3700d94caffSDavid Greenman * Found the page. Leave it busy while we play 3710d94caffSDavid Greenman * with it. 372df8bae1dSRodney W. Grimes */ 37326f9a767SRodney W. Grimes vm_object_lock(object); 37426f9a767SRodney W. Grimes 375df8bae1dSRodney W. Grimes /* 3760d94caffSDavid Greenman * Relookup in case pager changed page. Pager 3770d94caffSDavid Greenman * is responsible for disposition of old page 3780d94caffSDavid Greenman * if moved. 379df8bae1dSRodney W. Grimes */ 380df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 3810d94caffSDavid Greenman if (!m) { 3820d94caffSDavid Greenman printf("vm_fault: error fetching offset: %lx (fc: %d, rq: %d)\n", 3830d94caffSDavid Greenman offset, faultcount, reqpage); 3840d94caffSDavid Greenman } 3850d94caffSDavid Greenman m->valid = VM_PAGE_BITS_ALL; 386df8bae1dSRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 38726f9a767SRodney W. Grimes hardfault++; 388df8bae1dSRodney W. Grimes break; 389df8bae1dSRodney W. Grimes } 390df8bae1dSRodney W. Grimes /* 3910d94caffSDavid Greenman * Remove the bogus page (which does not exist at this 3920d94caffSDavid Greenman * object/offset); before doing so, we must get back 3930d94caffSDavid Greenman * our object lock to preserve our invariant. 394df8bae1dSRodney W. Grimes * 3950d94caffSDavid Greenman * Also wake up any other thread that may want to bring 3960d94caffSDavid Greenman * in this page. 397df8bae1dSRodney W. Grimes * 3980d94caffSDavid Greenman * If this is the top-level object, we must leave the 3990d94caffSDavid Greenman * busy page to prevent another thread from rushing 4000d94caffSDavid Greenman * past us, and inserting the page in that object at 4010d94caffSDavid Greenman * the same time that we are. 402df8bae1dSRodney W. Grimes */ 40326f9a767SRodney W. Grimes 404a83c285cSDavid Greenman if (rv == VM_PAGER_ERROR) 405a83c285cSDavid Greenman printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 406a83c285cSDavid Greenman curproc->p_pid); 40726f9a767SRodney W. Grimes vm_object_lock(object); 40826f9a767SRodney W. Grimes /* 409a83c285cSDavid Greenman * Data outside the range of the pager or an I/O error 41026f9a767SRodney W. Grimes */ 411a83c285cSDavid Greenman /* 4120d94caffSDavid Greenman * XXX - the check for kernel_map is a kludge to work 4130d94caffSDavid Greenman * around having the machine panic on a kernel space 4140d94caffSDavid Greenman * fault w/ I/O error. 415a83c285cSDavid Greenman */ 416a83c285cSDavid Greenman if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 41726f9a767SRodney W. Grimes FREE_PAGE(m); 41826f9a767SRodney W. Grimes UNLOCK_AND_DEALLOCATE; 419a83c285cSDavid Greenman return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 42026f9a767SRodney W. Grimes } 421df8bae1dSRodney W. Grimes if (object != first_object) { 422df8bae1dSRodney W. Grimes FREE_PAGE(m); 42326f9a767SRodney W. Grimes /* 42426f9a767SRodney W. Grimes * XXX - we cannot just fall out at this 42526f9a767SRodney W. Grimes * point, m has been freed and is invalid! 42626f9a767SRodney W. Grimes */ 427df8bae1dSRodney W. Grimes } 428df8bae1dSRodney W. Grimes } 429df8bae1dSRodney W. Grimes /* 4300d94caffSDavid Greenman * We get here if the object has no pager (or unwiring) or the 4310d94caffSDavid Greenman * pager doesn't have the page. 432df8bae1dSRodney W. Grimes */ 433df8bae1dSRodney W. Grimes if (object == first_object) 434df8bae1dSRodney W. Grimes first_m = m; 435df8bae1dSRodney W. Grimes 436df8bae1dSRodney W. Grimes /* 4370d94caffSDavid Greenman * Move on to the next object. Lock the next object before 4380d94caffSDavid Greenman * unlocking the current one. 439df8bae1dSRodney W. Grimes */ 440df8bae1dSRodney W. Grimes 441df8bae1dSRodney W. Grimes offset += object->shadow_offset; 442df8bae1dSRodney W. Grimes next_object = object->shadow; 443df8bae1dSRodney W. Grimes if (next_object == NULL) { 444df8bae1dSRodney W. Grimes /* 4450d94caffSDavid Greenman * If there's no object left, fill the page in the top 4460d94caffSDavid Greenman * object with zeros. 447df8bae1dSRodney W. Grimes */ 448df8bae1dSRodney W. Grimes if (object != first_object) { 449df8bae1dSRodney W. Grimes object->paging_in_progress--; 45026f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 45126f9a767SRodney W. Grimes wakeup((caddr_t) object); 452df8bae1dSRodney W. Grimes vm_object_unlock(object); 453df8bae1dSRodney W. Grimes 454df8bae1dSRodney W. Grimes object = first_object; 455df8bae1dSRodney W. Grimes offset = first_offset; 456df8bae1dSRodney W. Grimes m = first_m; 457df8bae1dSRodney W. Grimes vm_object_lock(object); 458df8bae1dSRodney W. Grimes } 459df8bae1dSRodney W. Grimes first_m = NULL; 460df8bae1dSRodney W. Grimes 461df8bae1dSRodney W. Grimes vm_page_zero_fill(m); 4620d94caffSDavid Greenman m->valid = VM_PAGE_BITS_ALL; 463df8bae1dSRodney W. Grimes cnt.v_zfod++; 464df8bae1dSRodney W. Grimes break; 4650d94caffSDavid Greenman } else { 466df8bae1dSRodney W. Grimes vm_object_lock(next_object); 46726f9a767SRodney W. Grimes if (object != first_object) { 468df8bae1dSRodney W. Grimes object->paging_in_progress--; 46926f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 47026f9a767SRodney W. Grimes wakeup((caddr_t) object); 47126f9a767SRodney W. Grimes } 472df8bae1dSRodney W. Grimes vm_object_unlock(object); 473df8bae1dSRodney W. Grimes object = next_object; 474df8bae1dSRodney W. Grimes object->paging_in_progress++; 475df8bae1dSRodney W. Grimes } 476df8bae1dSRodney W. Grimes } 477df8bae1dSRodney W. Grimes 4780d94caffSDavid Greenman if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE) != 0) || 47926f9a767SRodney W. Grimes (m->flags & PG_BUSY) == 0) 48026f9a767SRodney W. Grimes panic("vm_fault: absent or active or inactive or not busy after main loop"); 481df8bae1dSRodney W. Grimes 482df8bae1dSRodney W. Grimes /* 4830d94caffSDavid Greenman * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 484df8bae1dSRodney W. Grimes * is held.] 485df8bae1dSRodney W. Grimes */ 486df8bae1dSRodney W. Grimes 487df8bae1dSRodney W. Grimes old_m = m; /* save page that would be copied */ 488df8bae1dSRodney W. Grimes 489df8bae1dSRodney W. Grimes /* 4900d94caffSDavid Greenman * If the page is being written, but isn't already owned by the 4910d94caffSDavid Greenman * top-level object, we have to copy it into a new page owned by the 4920d94caffSDavid Greenman * top-level object. 493df8bae1dSRodney W. Grimes */ 494df8bae1dSRodney W. Grimes 495df8bae1dSRodney W. Grimes if (object != first_object) { 496df8bae1dSRodney W. Grimes /* 4970d94caffSDavid Greenman * We only really need to copy if we want to write it. 498df8bae1dSRodney W. Grimes */ 499df8bae1dSRodney W. Grimes 500df8bae1dSRodney W. Grimes if (fault_type & VM_PROT_WRITE) { 501df8bae1dSRodney W. Grimes 502df8bae1dSRodney W. Grimes /* 5030d94caffSDavid Greenman * If we try to collapse first_object at this point, 5040d94caffSDavid Greenman * we may deadlock when we try to get the lock on an 5050d94caffSDavid Greenman * intermediate object (since we have the bottom 5060d94caffSDavid Greenman * object locked). We can't unlock the bottom object, 5070d94caffSDavid Greenman * because the page we found may move (by collapse) if 5080d94caffSDavid Greenman * we do. 509df8bae1dSRodney W. Grimes * 5100d94caffSDavid Greenman * Instead, we first copy the page. Then, when we have 5110d94caffSDavid Greenman * no more use for the bottom object, we unlock it and 5120d94caffSDavid Greenman * try to collapse. 513df8bae1dSRodney W. Grimes * 5140d94caffSDavid Greenman * Note that we copy the page even if we didn't need 5150d94caffSDavid Greenman * to... that's the breaks. 516df8bae1dSRodney W. Grimes */ 517df8bae1dSRodney W. Grimes 518df8bae1dSRodney W. Grimes /* 5190d94caffSDavid Greenman * We already have an empty page in first_object - use 5200d94caffSDavid Greenman * it. 521df8bae1dSRodney W. Grimes */ 522df8bae1dSRodney W. Grimes 523df8bae1dSRodney W. Grimes vm_page_copy(m, first_m); 5240d94caffSDavid Greenman first_m->valid = VM_PAGE_BITS_ALL; 525df8bae1dSRodney W. Grimes 526df8bae1dSRodney W. Grimes /* 5270d94caffSDavid Greenman * If another map is truly sharing this page with us, 5280d94caffSDavid Greenman * we have to flush all uses of the original page, 5290d94caffSDavid Greenman * since we can't distinguish those which want the 5300d94caffSDavid Greenman * original from those which need the new copy. 531df8bae1dSRodney W. Grimes * 5320d94caffSDavid Greenman * XXX If we know that only one map has access to this 5330d94caffSDavid Greenman * page, then we could avoid the pmap_page_protect() 5340d94caffSDavid Greenman * call. 535df8bae1dSRodney W. Grimes */ 536df8bae1dSRodney W. Grimes 537df8bae1dSRodney W. Grimes vm_page_lock_queues(); 53826f9a767SRodney W. Grimes 539df8bae1dSRodney W. Grimes vm_page_activate(m); 540df8bae1dSRodney W. Grimes pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 541df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 542df8bae1dSRodney W. Grimes 543df8bae1dSRodney W. Grimes /* 544df8bae1dSRodney W. Grimes * We no longer need the old page or object. 545df8bae1dSRodney W. Grimes */ 546df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 547df8bae1dSRodney W. Grimes object->paging_in_progress--; 54826f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 54926f9a767SRodney W. Grimes wakeup((caddr_t) object); 550df8bae1dSRodney W. Grimes vm_object_unlock(object); 551df8bae1dSRodney W. Grimes 552df8bae1dSRodney W. Grimes /* 553df8bae1dSRodney W. Grimes * Only use the new page below... 554df8bae1dSRodney W. Grimes */ 555df8bae1dSRodney W. Grimes 556df8bae1dSRodney W. Grimes cnt.v_cow_faults++; 557df8bae1dSRodney W. Grimes m = first_m; 558df8bae1dSRodney W. Grimes object = first_object; 559df8bae1dSRodney W. Grimes offset = first_offset; 560df8bae1dSRodney W. Grimes 561df8bae1dSRodney W. Grimes /* 5620d94caffSDavid Greenman * Now that we've gotten the copy out of the way, 5630d94caffSDavid Greenman * let's try to collapse the top object. 564df8bae1dSRodney W. Grimes */ 565df8bae1dSRodney W. Grimes vm_object_lock(object); 566df8bae1dSRodney W. Grimes /* 567df8bae1dSRodney W. Grimes * But we have to play ugly games with 568df8bae1dSRodney W. Grimes * paging_in_progress to do that... 569df8bae1dSRodney W. Grimes */ 570df8bae1dSRodney W. Grimes object->paging_in_progress--; 57126f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 57226f9a767SRodney W. Grimes wakeup((caddr_t) object); 573df8bae1dSRodney W. Grimes vm_object_collapse(object); 574df8bae1dSRodney W. Grimes object->paging_in_progress++; 5750d94caffSDavid Greenman } else { 576df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 577df8bae1dSRodney W. Grimes m->flags |= PG_COPYONWRITE; 578df8bae1dSRodney W. Grimes } 579df8bae1dSRodney W. Grimes } 5800d94caffSDavid Greenman if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 581df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before copy object handling"); 582df8bae1dSRodney W. Grimes 583df8bae1dSRodney W. Grimes /* 5840d94caffSDavid Greenman * If the page is being written, but hasn't been copied to the 5850d94caffSDavid Greenman * copy-object, we have to copy it there. 586df8bae1dSRodney W. Grimes */ 587df8bae1dSRodney W. Grimes RetryCopy: 588df8bae1dSRodney W. Grimes if (first_object->copy != NULL) { 589df8bae1dSRodney W. Grimes vm_object_t copy_object = first_object->copy; 590df8bae1dSRodney W. Grimes vm_offset_t copy_offset; 591df8bae1dSRodney W. Grimes vm_page_t copy_m; 592df8bae1dSRodney W. Grimes 593df8bae1dSRodney W. Grimes /* 594df8bae1dSRodney W. Grimes * We only need to copy if we want to write it. 595df8bae1dSRodney W. Grimes */ 596df8bae1dSRodney W. Grimes if ((fault_type & VM_PROT_WRITE) == 0) { 597df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 598df8bae1dSRodney W. Grimes m->flags |= PG_COPYONWRITE; 5990d94caffSDavid Greenman } else { 600df8bae1dSRodney W. Grimes /* 601df8bae1dSRodney W. Grimes * Try to get the lock on the copy_object. 602df8bae1dSRodney W. Grimes */ 603df8bae1dSRodney W. Grimes if (!vm_object_lock_try(copy_object)) { 604df8bae1dSRodney W. Grimes vm_object_unlock(object); 605df8bae1dSRodney W. Grimes /* should spin a bit here... */ 606df8bae1dSRodney W. Grimes vm_object_lock(object); 607df8bae1dSRodney W. Grimes goto RetryCopy; 608df8bae1dSRodney W. Grimes } 609df8bae1dSRodney W. Grimes /* 6100d94caffSDavid Greenman * Make another reference to the copy-object, to keep 6110d94caffSDavid Greenman * it from disappearing during the copy. 612df8bae1dSRodney W. Grimes */ 613df8bae1dSRodney W. Grimes copy_object->ref_count++; 614df8bae1dSRodney W. Grimes 615df8bae1dSRodney W. Grimes /* 616df8bae1dSRodney W. Grimes * Does the page exist in the copy? 617df8bae1dSRodney W. Grimes */ 618df8bae1dSRodney W. Grimes copy_offset = first_offset 619df8bae1dSRodney W. Grimes - copy_object->shadow_offset; 620df8bae1dSRodney W. Grimes copy_m = vm_page_lookup(copy_object, copy_offset); 62105f0fdd2SPoul-Henning Kamp page_exists = (copy_m != NULL); 62205f0fdd2SPoul-Henning Kamp if (page_exists) { 6230d94caffSDavid Greenman if ((copy_m->flags & PG_BUSY) || copy_m->busy) { 624df8bae1dSRodney W. Grimes /* 6250d94caffSDavid Greenman * If the page is being brought in, 6260d94caffSDavid Greenman * wait for it and then retry. 627df8bae1dSRodney W. Grimes */ 628df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 629df8bae1dSRodney W. Grimes copy_object->ref_count--; 630df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 631df8bae1dSRodney W. Grimes UNLOCK_THINGS; 63247c9acfdSDavid Greenman spl = splhigh(); 6330d94caffSDavid Greenman if ((copy_m->flags & PG_BUSY) || copy_m->busy) { 6340d94caffSDavid Greenman copy_m->flags |= PG_WANTED | PG_REFERENCED; 63547c9acfdSDavid Greenman tsleep((caddr_t) copy_m, PSWP, "vmpfwc", 0); 63647c9acfdSDavid Greenman } 63747c9acfdSDavid Greenman splx(spl); 638df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 639df8bae1dSRodney W. Grimes goto RetryFault; 640df8bae1dSRodney W. Grimes } 641df8bae1dSRodney W. Grimes } 642df8bae1dSRodney W. Grimes /* 6430d94caffSDavid Greenman * If the page is not in memory (in the object) and 6440d94caffSDavid Greenman * the object has a pager, we have to check if the 6450d94caffSDavid Greenman * pager has the data in secondary storage. 646df8bae1dSRodney W. Grimes */ 647df8bae1dSRodney W. Grimes if (!page_exists) { 648df8bae1dSRodney W. Grimes 649df8bae1dSRodney W. Grimes /* 6500d94caffSDavid Greenman * If we don't allocate a (blank) page here... 6510d94caffSDavid Greenman * another thread could try to page it in, 6520d94caffSDavid Greenman * allocate a page, and then block on the busy 6530d94caffSDavid Greenman * page in its shadow (first_object). Then 6540d94caffSDavid Greenman * we'd trip over the busy page after we found 6550d94caffSDavid Greenman * that the copy_object's pager doesn't have 6560d94caffSDavid Greenman * the page... 657df8bae1dSRodney W. Grimes */ 6586d40c3d3SDavid Greenman copy_m = vm_page_alloc(copy_object, copy_offset, VM_ALLOC_NORMAL); 659df8bae1dSRodney W. Grimes if (copy_m == NULL) { 660df8bae1dSRodney W. Grimes /* 661df8bae1dSRodney W. Grimes * Wait for a page, then retry. 662df8bae1dSRodney W. Grimes */ 663df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 664df8bae1dSRodney W. Grimes copy_object->ref_count--; 665df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 666df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 667df8bae1dSRodney W. Grimes VM_WAIT; 668df8bae1dSRodney W. Grimes goto RetryFault; 669df8bae1dSRodney W. Grimes } 670df8bae1dSRodney W. Grimes if (copy_object->pager != NULL) { 671df8bae1dSRodney W. Grimes vm_object_unlock(object); 672df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 673df8bae1dSRodney W. Grimes UNLOCK_MAP; 674df8bae1dSRodney W. Grimes 675df8bae1dSRodney W. Grimes page_exists = vm_pager_has_page( 676df8bae1dSRodney W. Grimes copy_object->pager, 677df8bae1dSRodney W. Grimes (copy_offset + copy_object->paging_offset)); 678df8bae1dSRodney W. Grimes 679df8bae1dSRodney W. Grimes vm_object_lock(copy_object); 680df8bae1dSRodney W. Grimes 681df8bae1dSRodney W. Grimes /* 682df8bae1dSRodney W. Grimes * Since the map is unlocked, someone 683df8bae1dSRodney W. Grimes * else could have copied this object 684df8bae1dSRodney W. Grimes * and put a different copy_object 685df8bae1dSRodney W. Grimes * between the two. Or, the last 686df8bae1dSRodney W. Grimes * reference to the copy-object (other 687df8bae1dSRodney W. Grimes * than the one we have) may have 688df8bae1dSRodney W. Grimes * disappeared - if that has happened, 689df8bae1dSRodney W. Grimes * we don't need to make the copy. 690df8bae1dSRodney W. Grimes */ 691df8bae1dSRodney W. Grimes if (copy_object->shadow != object || 692df8bae1dSRodney W. Grimes copy_object->ref_count == 1) { 693df8bae1dSRodney W. Grimes /* 694df8bae1dSRodney W. Grimes * Gaah... start over! 695df8bae1dSRodney W. Grimes */ 696df8bae1dSRodney W. Grimes FREE_PAGE(copy_m); 697df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 698df8bae1dSRodney W. Grimes vm_object_deallocate(copy_object); 699df8bae1dSRodney W. Grimes /* may block */ 700df8bae1dSRodney W. Grimes vm_object_lock(object); 701df8bae1dSRodney W. Grimes goto RetryCopy; 702df8bae1dSRodney W. Grimes } 703df8bae1dSRodney W. Grimes vm_object_lock(object); 704df8bae1dSRodney W. Grimes 705df8bae1dSRodney W. Grimes if (page_exists) { 706df8bae1dSRodney W. Grimes /* 707df8bae1dSRodney W. Grimes * We didn't need the page 708df8bae1dSRodney W. Grimes */ 709df8bae1dSRodney W. Grimes FREE_PAGE(copy_m); 710df8bae1dSRodney W. Grimes } 711df8bae1dSRodney W. Grimes } 712df8bae1dSRodney W. Grimes } 713df8bae1dSRodney W. Grimes if (!page_exists) { 714df8bae1dSRodney W. Grimes /* 715df8bae1dSRodney W. Grimes * Must copy page into copy-object. 716df8bae1dSRodney W. Grimes */ 717df8bae1dSRodney W. Grimes vm_page_copy(m, copy_m); 7180d94caffSDavid Greenman copy_m->valid = VM_PAGE_BITS_ALL; 719df8bae1dSRodney W. Grimes 720df8bae1dSRodney W. Grimes /* 7210d94caffSDavid Greenman * Things to remember: 1. The copied page must 7220d94caffSDavid Greenman * be marked 'dirty' so it will be paged out 7230d94caffSDavid Greenman * to the copy object. 2. If the old page was 7240d94caffSDavid Greenman * in use by any users of the copy-object, it 7250d94caffSDavid Greenman * must be removed from all pmaps. (We can't 7260d94caffSDavid Greenman * know which pmaps use it.) 727df8bae1dSRodney W. Grimes */ 728df8bae1dSRodney W. Grimes vm_page_lock_queues(); 72926f9a767SRodney W. Grimes 73026f9a767SRodney W. Grimes vm_page_activate(old_m); 73126f9a767SRodney W. Grimes 732df8bae1dSRodney W. Grimes pmap_page_protect(VM_PAGE_TO_PHYS(old_m), 733df8bae1dSRodney W. Grimes VM_PROT_NONE); 7340d94caffSDavid Greenman copy_m->dirty = VM_PAGE_BITS_ALL; 73526f9a767SRodney W. Grimes vm_page_activate(copy_m); 736df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 737df8bae1dSRodney W. Grimes 738df8bae1dSRodney W. Grimes PAGE_WAKEUP(copy_m); 739df8bae1dSRodney W. Grimes } 740df8bae1dSRodney W. Grimes /* 7410d94caffSDavid Greenman * The reference count on copy_object must be at least 7420d94caffSDavid Greenman * 2: one for our extra reference, and at least one 7430d94caffSDavid Greenman * from the outside world (we checked that when we 7440d94caffSDavid Greenman * last locked copy_object). 745df8bae1dSRodney W. Grimes */ 746df8bae1dSRodney W. Grimes copy_object->ref_count--; 747df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 748df8bae1dSRodney W. Grimes m->flags &= ~PG_COPYONWRITE; 749df8bae1dSRodney W. Grimes } 750df8bae1dSRodney W. Grimes } 7510d94caffSDavid Greenman if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 752df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before retrying lookup"); 753df8bae1dSRodney W. Grimes 754df8bae1dSRodney W. Grimes /* 7550d94caffSDavid Greenman * We must verify that the maps have not changed since our last 7560d94caffSDavid Greenman * lookup. 757df8bae1dSRodney W. Grimes */ 758df8bae1dSRodney W. Grimes 759df8bae1dSRodney W. Grimes if (!lookup_still_valid) { 760df8bae1dSRodney W. Grimes vm_object_t retry_object; 761df8bae1dSRodney W. Grimes vm_offset_t retry_offset; 762df8bae1dSRodney W. Grimes vm_prot_t retry_prot; 763df8bae1dSRodney W. Grimes 764df8bae1dSRodney W. Grimes /* 7650d94caffSDavid Greenman * Since map entries may be pageable, make sure we can take a 7660d94caffSDavid Greenman * page fault on them. 767df8bae1dSRodney W. Grimes */ 768df8bae1dSRodney W. Grimes vm_object_unlock(object); 769df8bae1dSRodney W. Grimes 770df8bae1dSRodney W. Grimes /* 7710d94caffSDavid Greenman * To avoid trying to write_lock the map while another thread 7720d94caffSDavid Greenman * has it read_locked (in vm_map_pageable), we do not try for 7730d94caffSDavid Greenman * write permission. If the page is still writable, we will 7740d94caffSDavid Greenman * get write permission. If it is not, or has been marked 7750d94caffSDavid Greenman * needs_copy, we enter the mapping without write permission, 7760d94caffSDavid Greenman * and will merely take another fault. 777df8bae1dSRodney W. Grimes */ 7780d94caffSDavid Greenman result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 7790d94caffSDavid Greenman &entry, &retry_object, &retry_offset, &retry_prot, &wired, &su); 780df8bae1dSRodney W. Grimes 781df8bae1dSRodney W. Grimes vm_object_lock(object); 782df8bae1dSRodney W. Grimes 783df8bae1dSRodney W. Grimes /* 7840d94caffSDavid Greenman * If we don't need the page any longer, put it on the active 7850d94caffSDavid Greenman * list (the easiest thing to do here). If no one needs it, 7860d94caffSDavid Greenman * pageout will grab it eventually. 787df8bae1dSRodney W. Grimes */ 788df8bae1dSRodney W. Grimes 789df8bae1dSRodney W. Grimes if (result != KERN_SUCCESS) { 790df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 791df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 792df8bae1dSRodney W. Grimes return (result); 793df8bae1dSRodney W. Grimes } 794df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 795df8bae1dSRodney W. Grimes 796df8bae1dSRodney W. Grimes if ((retry_object != first_object) || 797df8bae1dSRodney W. Grimes (retry_offset != first_offset)) { 798df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 799df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 800df8bae1dSRodney W. Grimes goto RetryFault; 801df8bae1dSRodney W. Grimes } 802df8bae1dSRodney W. Grimes /* 8030d94caffSDavid Greenman * Check whether the protection has changed or the object has 8040d94caffSDavid Greenman * been copied while we left the map unlocked. Changing from 8050d94caffSDavid Greenman * read to write permission is OK - we leave the page 8060d94caffSDavid Greenman * write-protected, and catch the write fault. Changing from 8070d94caffSDavid Greenman * write to read permission means that we can't mark the page 8080d94caffSDavid Greenman * write-enabled after all. 809df8bae1dSRodney W. Grimes */ 810df8bae1dSRodney W. Grimes prot &= retry_prot; 811df8bae1dSRodney W. Grimes if (m->flags & PG_COPYONWRITE) 812df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 813df8bae1dSRodney W. Grimes } 814df8bae1dSRodney W. Grimes /* 8150d94caffSDavid Greenman * (the various bits we're fiddling with here are locked by the 8160d94caffSDavid Greenman * object's lock) 817df8bae1dSRodney W. Grimes */ 818df8bae1dSRodney W. Grimes 819df8bae1dSRodney W. Grimes /* XXX This distorts the meaning of the copy_on_write bit */ 820df8bae1dSRodney W. Grimes 821df8bae1dSRodney W. Grimes if (prot & VM_PROT_WRITE) 822df8bae1dSRodney W. Grimes m->flags &= ~PG_COPYONWRITE; 823df8bae1dSRodney W. Grimes 824df8bae1dSRodney W. Grimes /* 8250d94caffSDavid Greenman * It's critically important that a wired-down page be faulted only 8260d94caffSDavid Greenman * once in each map for which it is wired. 827df8bae1dSRodney W. Grimes */ 828df8bae1dSRodney W. Grimes 8290d94caffSDavid Greenman if (m->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) 830df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before pmap_enter"); 831df8bae1dSRodney W. Grimes 832df8bae1dSRodney W. Grimes vm_object_unlock(object); 833df8bae1dSRodney W. Grimes 834df8bae1dSRodney W. Grimes /* 8350d94caffSDavid Greenman * Put this page into the physical map. We had to do the unlock above 8360d94caffSDavid Greenman * because pmap_enter may cause other faults. We don't put the page 8370d94caffSDavid Greenman * back on the active queue until later so that the page-out daemon 8380d94caffSDavid Greenman * won't find us (yet). 839df8bae1dSRodney W. Grimes */ 840df8bae1dSRodney W. Grimes 841df8bae1dSRodney W. Grimes pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 8426d40c3d3SDavid Greenman if( ((prot & VM_PROT_WRITE) == 0) && change_wiring == 0 && wired == 0) 8436d40c3d3SDavid Greenman pmap_prefault(map->pmap, vaddr, entry, first_object); 844df8bae1dSRodney W. Grimes 845df8bae1dSRodney W. Grimes /* 8460d94caffSDavid Greenman * If the page is not wired down, then put it where the pageout daemon 8470d94caffSDavid Greenman * can find it. 848df8bae1dSRodney W. Grimes */ 849df8bae1dSRodney W. Grimes vm_object_lock(object); 850df8bae1dSRodney W. Grimes vm_page_lock_queues(); 851df8bae1dSRodney W. Grimes if (change_wiring) { 852df8bae1dSRodney W. Grimes if (wired) 853df8bae1dSRodney W. Grimes vm_page_wire(m); 854df8bae1dSRodney W. Grimes else 855df8bae1dSRodney W. Grimes vm_page_unwire(m); 8560d94caffSDavid Greenman } else { 857df8bae1dSRodney W. Grimes vm_page_activate(m); 85826f9a767SRodney W. Grimes } 85926f9a767SRodney W. Grimes 86026f9a767SRodney W. Grimes if (curproc && curproc->p_stats) { 86126f9a767SRodney W. Grimes if (hardfault) { 86226f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_majflt++; 86326f9a767SRodney W. Grimes } else { 86426f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_minflt++; 86526f9a767SRodney W. Grimes } 86626f9a767SRodney W. Grimes } 867df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 868df8bae1dSRodney W. Grimes 869df8bae1dSRodney W. Grimes /* 870df8bae1dSRodney W. Grimes * Unlock everything, and return 871df8bae1dSRodney W. Grimes */ 872df8bae1dSRodney W. Grimes 873df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 874df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 875df8bae1dSRodney W. Grimes 876df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 877df8bae1dSRodney W. Grimes 878df8bae1dSRodney W. Grimes } 879df8bae1dSRodney W. Grimes 880df8bae1dSRodney W. Grimes /* 881df8bae1dSRodney W. Grimes * vm_fault_wire: 882df8bae1dSRodney W. Grimes * 883df8bae1dSRodney W. Grimes * Wire down a range of virtual addresses in a map. 884df8bae1dSRodney W. Grimes */ 885df8bae1dSRodney W. Grimes int 886df8bae1dSRodney W. Grimes vm_fault_wire(map, start, end) 887df8bae1dSRodney W. Grimes vm_map_t map; 888df8bae1dSRodney W. Grimes vm_offset_t start, end; 889df8bae1dSRodney W. Grimes { 89026f9a767SRodney W. Grimes 891df8bae1dSRodney W. Grimes register vm_offset_t va; 892df8bae1dSRodney W. Grimes register pmap_t pmap; 893df8bae1dSRodney W. Grimes int rv; 894df8bae1dSRodney W. Grimes 895df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 896df8bae1dSRodney W. Grimes 897df8bae1dSRodney W. Grimes /* 8980d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 8990d94caffSDavid Greenman * not fault, so that page tables and such can be locked down as well. 900df8bae1dSRodney W. Grimes */ 901df8bae1dSRodney W. Grimes 902df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, FALSE); 903df8bae1dSRodney W. Grimes 904df8bae1dSRodney W. Grimes /* 9050d94caffSDavid Greenman * We simulate a fault to get the page and enter it in the physical 9060d94caffSDavid Greenman * map. 907df8bae1dSRodney W. Grimes */ 908df8bae1dSRodney W. Grimes 909df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 9106d40c3d3SDavid Greenman 9116d40c3d3SDavid Greenman if( curproc != pageproc && 9126d40c3d3SDavid Greenman (cnt.v_free_count <= cnt.v_pageout_free_min)) 9136d40c3d3SDavid Greenman VM_WAIT; 9146d40c3d3SDavid Greenman 915df8bae1dSRodney W. Grimes rv = vm_fault(map, va, VM_PROT_NONE, TRUE); 916df8bae1dSRodney W. Grimes if (rv) { 917df8bae1dSRodney W. Grimes if (va != start) 918df8bae1dSRodney W. Grimes vm_fault_unwire(map, start, va); 919df8bae1dSRodney W. Grimes return (rv); 920df8bae1dSRodney W. Grimes } 921df8bae1dSRodney W. Grimes } 922df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 923df8bae1dSRodney W. Grimes } 924df8bae1dSRodney W. Grimes 925df8bae1dSRodney W. Grimes 926df8bae1dSRodney W. Grimes /* 927df8bae1dSRodney W. Grimes * vm_fault_unwire: 928df8bae1dSRodney W. Grimes * 929df8bae1dSRodney W. Grimes * Unwire a range of virtual addresses in a map. 930df8bae1dSRodney W. Grimes */ 93126f9a767SRodney W. Grimes void 93226f9a767SRodney W. Grimes vm_fault_unwire(map, start, end) 933df8bae1dSRodney W. Grimes vm_map_t map; 934df8bae1dSRodney W. Grimes vm_offset_t start, end; 935df8bae1dSRodney W. Grimes { 936df8bae1dSRodney W. Grimes 937df8bae1dSRodney W. Grimes register vm_offset_t va, pa; 938df8bae1dSRodney W. Grimes register pmap_t pmap; 939df8bae1dSRodney W. Grimes 940df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 941df8bae1dSRodney W. Grimes 942df8bae1dSRodney W. Grimes /* 9430d94caffSDavid Greenman * Since the pages are wired down, we must be able to get their 9440d94caffSDavid Greenman * mappings from the physical map system. 945df8bae1dSRodney W. Grimes */ 946df8bae1dSRodney W. Grimes 947df8bae1dSRodney W. Grimes vm_page_lock_queues(); 948df8bae1dSRodney W. Grimes 949df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 950df8bae1dSRodney W. Grimes pa = pmap_extract(pmap, va); 951df8bae1dSRodney W. Grimes if (pa == (vm_offset_t) 0) { 952df8bae1dSRodney W. Grimes panic("unwire: page not in pmap"); 953df8bae1dSRodney W. Grimes } 954df8bae1dSRodney W. Grimes pmap_change_wiring(pmap, va, FALSE); 955df8bae1dSRodney W. Grimes vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 956df8bae1dSRodney W. Grimes } 957df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 958df8bae1dSRodney W. Grimes 959df8bae1dSRodney W. Grimes /* 9600d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 9610d94caffSDavid Greenman * fault, so that page tables and such may be unwired themselves. 962df8bae1dSRodney W. Grimes */ 963df8bae1dSRodney W. Grimes 964df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, TRUE); 965df8bae1dSRodney W. Grimes 966df8bae1dSRodney W. Grimes } 967df8bae1dSRodney W. Grimes 968df8bae1dSRodney W. Grimes /* 969df8bae1dSRodney W. Grimes * Routine: 970df8bae1dSRodney W. Grimes * vm_fault_copy_entry 971df8bae1dSRodney W. Grimes * Function: 972df8bae1dSRodney W. Grimes * Copy all of the pages from a wired-down map entry to another. 973df8bae1dSRodney W. Grimes * 974df8bae1dSRodney W. Grimes * In/out conditions: 975df8bae1dSRodney W. Grimes * The source and destination maps must be locked for write. 976df8bae1dSRodney W. Grimes * The source map entry must be wired down (or be a sharing map 977df8bae1dSRodney W. Grimes * entry corresponding to a main map entry that is wired down). 978df8bae1dSRodney W. Grimes */ 979df8bae1dSRodney W. Grimes 98026f9a767SRodney W. Grimes void 98126f9a767SRodney W. Grimes vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 982df8bae1dSRodney W. Grimes vm_map_t dst_map; 983df8bae1dSRodney W. Grimes vm_map_t src_map; 984df8bae1dSRodney W. Grimes vm_map_entry_t dst_entry; 985df8bae1dSRodney W. Grimes vm_map_entry_t src_entry; 986df8bae1dSRodney W. Grimes { 987df8bae1dSRodney W. Grimes vm_object_t dst_object; 988df8bae1dSRodney W. Grimes vm_object_t src_object; 989df8bae1dSRodney W. Grimes vm_offset_t dst_offset; 990df8bae1dSRodney W. Grimes vm_offset_t src_offset; 991df8bae1dSRodney W. Grimes vm_prot_t prot; 992df8bae1dSRodney W. Grimes vm_offset_t vaddr; 993df8bae1dSRodney W. Grimes vm_page_t dst_m; 994df8bae1dSRodney W. Grimes vm_page_t src_m; 995df8bae1dSRodney W. Grimes 996df8bae1dSRodney W. Grimes #ifdef lint 997df8bae1dSRodney W. Grimes src_map++; 9980d94caffSDavid Greenman #endif /* lint */ 999df8bae1dSRodney W. Grimes 1000df8bae1dSRodney W. Grimes src_object = src_entry->object.vm_object; 1001df8bae1dSRodney W. Grimes src_offset = src_entry->offset; 1002df8bae1dSRodney W. Grimes 1003df8bae1dSRodney W. Grimes /* 10040d94caffSDavid Greenman * Create the top-level object for the destination entry. (Doesn't 10050d94caffSDavid Greenman * actually shadow anything - we copy the pages directly.) 1006df8bae1dSRodney W. Grimes */ 1007df8bae1dSRodney W. Grimes dst_object = vm_object_allocate( 1008df8bae1dSRodney W. Grimes (vm_size_t) (dst_entry->end - dst_entry->start)); 1009df8bae1dSRodney W. Grimes 1010df8bae1dSRodney W. Grimes dst_entry->object.vm_object = dst_object; 1011df8bae1dSRodney W. Grimes dst_entry->offset = 0; 1012df8bae1dSRodney W. Grimes 1013df8bae1dSRodney W. Grimes prot = dst_entry->max_protection; 1014df8bae1dSRodney W. Grimes 1015df8bae1dSRodney W. Grimes /* 10160d94caffSDavid Greenman * Loop through all of the pages in the entry's range, copying each 10170d94caffSDavid Greenman * one from the source object (it should be there) to the destination 10180d94caffSDavid Greenman * object. 1019df8bae1dSRodney W. Grimes */ 1020df8bae1dSRodney W. Grimes for (vaddr = dst_entry->start, dst_offset = 0; 1021df8bae1dSRodney W. Grimes vaddr < dst_entry->end; 1022df8bae1dSRodney W. Grimes vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1023df8bae1dSRodney W. Grimes 1024df8bae1dSRodney W. Grimes /* 1025df8bae1dSRodney W. Grimes * Allocate a page in the destination object 1026df8bae1dSRodney W. Grimes */ 1027df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1028df8bae1dSRodney W. Grimes do { 10296d40c3d3SDavid Greenman dst_m = vm_page_alloc(dst_object, dst_offset, VM_ALLOC_NORMAL); 1030df8bae1dSRodney W. Grimes if (dst_m == NULL) { 1031df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1032df8bae1dSRodney W. Grimes VM_WAIT; 1033df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1034df8bae1dSRodney W. Grimes } 1035df8bae1dSRodney W. Grimes } while (dst_m == NULL); 1036df8bae1dSRodney W. Grimes 1037df8bae1dSRodney W. Grimes /* 1038df8bae1dSRodney W. Grimes * Find the page in the source object, and copy it in. 10390d94caffSDavid Greenman * (Because the source is wired down, the page will be in 10400d94caffSDavid Greenman * memory.) 1041df8bae1dSRodney W. Grimes */ 1042df8bae1dSRodney W. Grimes vm_object_lock(src_object); 1043df8bae1dSRodney W. Grimes src_m = vm_page_lookup(src_object, dst_offset + src_offset); 1044df8bae1dSRodney W. Grimes if (src_m == NULL) 1045df8bae1dSRodney W. Grimes panic("vm_fault_copy_wired: page missing"); 1046df8bae1dSRodney W. Grimes 1047df8bae1dSRodney W. Grimes vm_page_copy(src_m, dst_m); 1048df8bae1dSRodney W. Grimes 1049df8bae1dSRodney W. Grimes /* 1050df8bae1dSRodney W. Grimes * Enter it in the pmap... 1051df8bae1dSRodney W. Grimes */ 1052df8bae1dSRodney W. Grimes vm_object_unlock(src_object); 1053df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1054df8bae1dSRodney W. Grimes 1055df8bae1dSRodney W. Grimes pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1056df8bae1dSRodney W. Grimes prot, FALSE); 1057df8bae1dSRodney W. Grimes 1058df8bae1dSRodney W. Grimes /* 1059df8bae1dSRodney W. Grimes * Mark it no longer busy, and put it on the active list. 1060df8bae1dSRodney W. Grimes */ 1061df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1062df8bae1dSRodney W. Grimes vm_page_lock_queues(); 1063df8bae1dSRodney W. Grimes vm_page_activate(dst_m); 1064df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 1065df8bae1dSRodney W. Grimes PAGE_WAKEUP(dst_m); 1066df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1067df8bae1dSRodney W. Grimes } 1068df8bae1dSRodney W. Grimes } 106926f9a767SRodney W. Grimes 107026f9a767SRodney W. Grimes 107126f9a767SRodney W. Grimes /* 107226f9a767SRodney W. Grimes * looks page up in shadow chain 107326f9a767SRodney W. Grimes */ 107426f9a767SRodney W. Grimes 107526f9a767SRodney W. Grimes int 107626f9a767SRodney W. Grimes vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) 107726f9a767SRodney W. Grimes vm_object_t object; 107826f9a767SRodney W. Grimes vm_offset_t offset; 107926f9a767SRodney W. Grimes vm_object_t *rtobject; 108026f9a767SRodney W. Grimes vm_offset_t *rtoffset; 108126f9a767SRodney W. Grimes vm_page_t *rtm; 108226f9a767SRodney W. Grimes { 108326f9a767SRodney W. Grimes vm_page_t m; 108426f9a767SRodney W. Grimes 108526f9a767SRodney W. Grimes *rtm = 0; 108626f9a767SRodney W. Grimes *rtobject = 0; 108726f9a767SRodney W. Grimes *rtoffset = 0; 108826f9a767SRodney W. Grimes 108926f9a767SRodney W. Grimes while (!(m = vm_page_lookup(object, offset))) { 109026f9a767SRodney W. Grimes if (object->pager) { 109126f9a767SRodney W. Grimes if (vm_pager_has_page(object->pager, object->paging_offset + offset)) { 109226f9a767SRodney W. Grimes *rtobject = object; 109326f9a767SRodney W. Grimes *rtoffset = offset; 109426f9a767SRodney W. Grimes return 1; 109526f9a767SRodney W. Grimes } 109626f9a767SRodney W. Grimes } 109726f9a767SRodney W. Grimes if (!object->shadow) 109826f9a767SRodney W. Grimes return 0; 109926f9a767SRodney W. Grimes else { 110026f9a767SRodney W. Grimes offset += object->shadow_offset; 110126f9a767SRodney W. Grimes object = object->shadow; 110226f9a767SRodney W. Grimes } 110326f9a767SRodney W. Grimes } 110426f9a767SRodney W. Grimes *rtobject = object; 110526f9a767SRodney W. Grimes *rtoffset = offset; 110626f9a767SRodney W. Grimes *rtm = m; 110726f9a767SRodney W. Grimes return 1; 110826f9a767SRodney W. Grimes } 110926f9a767SRodney W. Grimes 111026f9a767SRodney W. Grimes /* 111126f9a767SRodney W. Grimes * This routine checks around the requested page for other pages that 111226f9a767SRodney W. Grimes * might be able to be faulted in. 111326f9a767SRodney W. Grimes * 111426f9a767SRodney W. Grimes * Inputs: 111526f9a767SRodney W. Grimes * first_object, first_offset, m, rbehind, rahead 111626f9a767SRodney W. Grimes * 111726f9a767SRodney W. Grimes * Outputs: 111826f9a767SRodney W. Grimes * marray (array of vm_page_t), reqpage (index of requested page) 111926f9a767SRodney W. Grimes * 112026f9a767SRodney W. Grimes * Return value: 112126f9a767SRodney W. Grimes * number of pages in marray 112226f9a767SRodney W. Grimes */ 112326f9a767SRodney W. Grimes int 112426f9a767SRodney W. Grimes vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) 112526f9a767SRodney W. Grimes vm_object_t first_object; 112626f9a767SRodney W. Grimes vm_offset_t first_offset; 112726f9a767SRodney W. Grimes vm_page_t m; 112826f9a767SRodney W. Grimes int rbehind; 112926f9a767SRodney W. Grimes int raheada; 113026f9a767SRodney W. Grimes vm_page_t *marray; 113126f9a767SRodney W. Grimes int *reqpage; 113226f9a767SRodney W. Grimes { 113326f9a767SRodney W. Grimes int i; 113426f9a767SRodney W. Grimes vm_object_t object; 113526f9a767SRodney W. Grimes vm_offset_t offset, startoffset, endoffset, toffset, size; 113626f9a767SRodney W. Grimes vm_object_t rtobject; 113726f9a767SRodney W. Grimes vm_page_t rtm; 113826f9a767SRodney W. Grimes vm_offset_t rtoffset; 113926f9a767SRodney W. Grimes vm_offset_t offsetdiff; 114026f9a767SRodney W. Grimes int rahead; 114126f9a767SRodney W. Grimes int treqpage; 114226f9a767SRodney W. Grimes 114326f9a767SRodney W. Grimes object = m->object; 114426f9a767SRodney W. Grimes offset = m->offset; 114526f9a767SRodney W. Grimes 114626f9a767SRodney W. Grimes offsetdiff = offset - first_offset; 114726f9a767SRodney W. Grimes 114826f9a767SRodney W. Grimes /* 114926f9a767SRodney W. Grimes * if the requested page is not available, then give up now 115026f9a767SRodney W. Grimes */ 115126f9a767SRodney W. Grimes 115226f9a767SRodney W. Grimes if (!vm_pager_has_page(object->pager, object->paging_offset + offset)) 115326f9a767SRodney W. Grimes return 0; 115426f9a767SRodney W. Grimes 115526f9a767SRodney W. Grimes /* 115626f9a767SRodney W. Grimes * try to do any readahead that we might have free pages for. 115726f9a767SRodney W. Grimes */ 115826f9a767SRodney W. Grimes rahead = raheada; 11590d94caffSDavid Greenman if ((rahead + rbehind) > ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 11600d94caffSDavid Greenman rahead = ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved) / 2; 11610d94caffSDavid Greenman rbehind = rahead; 11620d94caffSDavid Greenman if (!rahead) 11630d94caffSDavid Greenman wakeup((caddr_t) &vm_pages_needed); 116426f9a767SRodney W. Grimes } 116526f9a767SRodney W. Grimes /* 116626f9a767SRodney W. Grimes * if we don't have any free pages, then just read one page. 116726f9a767SRodney W. Grimes */ 116826f9a767SRodney W. Grimes if (rahead <= 0) { 116926f9a767SRodney W. Grimes *reqpage = 0; 117026f9a767SRodney W. Grimes marray[0] = m; 117126f9a767SRodney W. Grimes return 1; 117226f9a767SRodney W. Grimes } 117326f9a767SRodney W. Grimes /* 11740d94caffSDavid Greenman * scan backward for the read behind pages -- in memory or on disk not 11750d94caffSDavid Greenman * in same object 117626f9a767SRodney W. Grimes */ 117726f9a767SRodney W. Grimes toffset = offset - NBPG; 1178317205caSDavid Greenman if (toffset < offset) { 117926f9a767SRodney W. Grimes if (rbehind * NBPG > offset) 118026f9a767SRodney W. Grimes rbehind = offset / NBPG; 118126f9a767SRodney W. Grimes startoffset = offset - rbehind * NBPG; 118226f9a767SRodney W. Grimes while (toffset >= startoffset) { 118326f9a767SRodney W. Grimes if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 118426f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 118526f9a767SRodney W. Grimes startoffset = toffset + NBPG; 118626f9a767SRodney W. Grimes break; 118726f9a767SRodney W. Grimes } 118826f9a767SRodney W. Grimes if (toffset == 0) 118926f9a767SRodney W. Grimes break; 119026f9a767SRodney W. Grimes toffset -= NBPG; 119126f9a767SRodney W. Grimes } 1192317205caSDavid Greenman } else { 1193317205caSDavid Greenman startoffset = offset; 1194317205caSDavid Greenman } 119526f9a767SRodney W. Grimes 119626f9a767SRodney W. Grimes /* 11970d94caffSDavid Greenman * scan forward for the read ahead pages -- in memory or on disk not 11980d94caffSDavid Greenman * in same object 119926f9a767SRodney W. Grimes */ 120026f9a767SRodney W. Grimes toffset = offset + NBPG; 120126f9a767SRodney W. Grimes endoffset = offset + (rahead + 1) * NBPG; 120226f9a767SRodney W. Grimes while (toffset < object->size && toffset < endoffset) { 120326f9a767SRodney W. Grimes if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 120426f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 120526f9a767SRodney W. Grimes break; 120626f9a767SRodney W. Grimes } 120726f9a767SRodney W. Grimes toffset += NBPG; 120826f9a767SRodney W. Grimes } 120926f9a767SRodney W. Grimes endoffset = toffset; 121026f9a767SRodney W. Grimes 121126f9a767SRodney W. Grimes /* calculate number of bytes of pages */ 121226f9a767SRodney W. Grimes size = (endoffset - startoffset) / NBPG; 121326f9a767SRodney W. Grimes 121426f9a767SRodney W. Grimes /* calculate the page offset of the required page */ 121526f9a767SRodney W. Grimes treqpage = (offset - startoffset) / NBPG; 121626f9a767SRodney W. Grimes 121726f9a767SRodney W. Grimes /* see if we have space (again) */ 12180d94caffSDavid Greenman if ((cnt.v_free_count + cnt.v_cache_count) > (cnt.v_free_reserved + size)) { 121926f9a767SRodney W. Grimes bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); 122026f9a767SRodney W. Grimes /* 122126f9a767SRodney W. Grimes * get our pages and don't block for them 122226f9a767SRodney W. Grimes */ 122326f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 122426f9a767SRodney W. Grimes if (i != treqpage) 12256d40c3d3SDavid Greenman rtm = vm_page_alloc(object, startoffset + i * NBPG, VM_ALLOC_NORMAL); 122626f9a767SRodney W. Grimes else 122726f9a767SRodney W. Grimes rtm = m; 122826f9a767SRodney W. Grimes marray[i] = rtm; 122926f9a767SRodney W. Grimes } 123026f9a767SRodney W. Grimes 123126f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 123226f9a767SRodney W. Grimes if (marray[i] == 0) 123326f9a767SRodney W. Grimes break; 123426f9a767SRodney W. Grimes } 123526f9a767SRodney W. Grimes 123626f9a767SRodney W. Grimes /* 12370d94caffSDavid Greenman * if we could not get our block of pages, then free the 12380d94caffSDavid Greenman * readahead/readbehind pages. 123926f9a767SRodney W. Grimes */ 124026f9a767SRodney W. Grimes if (i < size) { 124126f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 124226f9a767SRodney W. Grimes if (i != treqpage && marray[i]) 124326f9a767SRodney W. Grimes FREE_PAGE(marray[i]); 124426f9a767SRodney W. Grimes } 124526f9a767SRodney W. Grimes *reqpage = 0; 124626f9a767SRodney W. Grimes marray[0] = m; 124726f9a767SRodney W. Grimes return 1; 124826f9a767SRodney W. Grimes } 124926f9a767SRodney W. Grimes *reqpage = treqpage; 125026f9a767SRodney W. Grimes return size; 125126f9a767SRodney W. Grimes } 125226f9a767SRodney W. Grimes *reqpage = 0; 125326f9a767SRodney W. Grimes marray[0] = m; 125426f9a767SRodney W. Grimes return 1; 125526f9a767SRodney W. Grimes } 1256