1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 426f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 526f9a767SRodney W. Grimes * All rights reserved. 626f9a767SRodney W. Grimes * Copyright (c) 1994 David Greenman 726f9a767SRodney W. Grimes * All rights reserved. 826f9a767SRodney W. Grimes * 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 11df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 12df8bae1dSRodney W. Grimes * 13df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 14df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 15df8bae1dSRodney W. Grimes * are met: 16df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 18df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 19df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 20df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 21df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 22df8bae1dSRodney W. Grimes * must display the following acknowledgement: 23df8bae1dSRodney W. Grimes * This product includes software developed by the University of 24df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 25df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 26df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 27df8bae1dSRodney W. Grimes * without specific prior written permission. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39df8bae1dSRodney W. Grimes * SUCH DAMAGE. 40df8bae1dSRodney W. Grimes * 413c4dd356SDavid Greenman * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42df8bae1dSRodney W. Grimes * 43df8bae1dSRodney W. Grimes * 44df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45df8bae1dSRodney W. Grimes * All rights reserved. 46df8bae1dSRodney W. Grimes * 47df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48df8bae1dSRodney W. Grimes * 49df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 50df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 51df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 52df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 53df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 54df8bae1dSRodney W. Grimes * 55df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 60df8bae1dSRodney W. Grimes * 61df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62df8bae1dSRodney W. Grimes * School of Computer Science 63df8bae1dSRodney W. Grimes * Carnegie Mellon University 64df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 65df8bae1dSRodney W. Grimes * 66df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 67df8bae1dSRodney W. Grimes * rights to redistribute these changes. 683c4dd356SDavid Greenman * 6921bf3904SJohn Dyson * $Id: vm_fault.c,v 1.30 1995/09/11 00:45:15 dyson Exp $ 70df8bae1dSRodney W. Grimes */ 71df8bae1dSRodney W. Grimes 72df8bae1dSRodney W. Grimes /* 73df8bae1dSRodney W. Grimes * Page fault handling module. 74df8bae1dSRodney W. Grimes */ 75df8bae1dSRodney W. Grimes 76df8bae1dSRodney W. Grimes #include <sys/param.h> 77df8bae1dSRodney W. Grimes #include <sys/systm.h> 7826f9a767SRodney W. Grimes #include <sys/proc.h> 7924a1cce3SDavid Greenman #include <sys/vnode.h> 8005f0fdd2SPoul-Henning Kamp #include <sys/resource.h> 8105f0fdd2SPoul-Henning Kamp #include <sys/signalvar.h> 8226f9a767SRodney W. Grimes #include <sys/resourcevar.h> 83df8bae1dSRodney W. Grimes 84df8bae1dSRodney W. Grimes #include <vm/vm.h> 85df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 86df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 87a83c285cSDavid Greenman #include <vm/vm_kern.h> 8824a1cce3SDavid Greenman #include <vm/vm_pager.h> 8924a1cce3SDavid Greenman #include <vm/vnode_pager.h> 90df8bae1dSRodney W. Grimes 9105f0fdd2SPoul-Henning Kamp int vm_fault_additional_pages __P((vm_object_t, vm_offset_t, vm_page_t, int, int, vm_page_t *, int *)); 9226f9a767SRodney W. Grimes 9326f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD 4 9426f9a767SRodney W. Grimes #define VM_FAULT_READ_BEHIND 3 9526f9a767SRodney W. Grimes #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 9626f9a767SRodney W. Grimes extern int swap_pager_full; 9726f9a767SRodney W. Grimes 98df8bae1dSRodney W. Grimes /* 99df8bae1dSRodney W. Grimes * vm_fault: 100df8bae1dSRodney W. Grimes * 101df8bae1dSRodney W. Grimes * Handle a page fault occuring at the given address, 102df8bae1dSRodney W. Grimes * requiring the given permissions, in the map specified. 103df8bae1dSRodney W. Grimes * If successful, the page is inserted into the 104df8bae1dSRodney W. Grimes * associated physical map. 105df8bae1dSRodney W. Grimes * 106df8bae1dSRodney W. Grimes * NOTE: the given address should be truncated to the 107df8bae1dSRodney W. Grimes * proper page address. 108df8bae1dSRodney W. Grimes * 109df8bae1dSRodney W. Grimes * KERN_SUCCESS is returned if the page fault is handled; otherwise, 110df8bae1dSRodney W. Grimes * a standard error specifying why the fault is fatal is returned. 111df8bae1dSRodney W. Grimes * 112df8bae1dSRodney W. Grimes * 113df8bae1dSRodney W. Grimes * The map in question must be referenced, and remains so. 114df8bae1dSRodney W. Grimes * Caller may hold no locks. 115df8bae1dSRodney W. Grimes */ 116df8bae1dSRodney W. Grimes int 117df8bae1dSRodney W. Grimes vm_fault(map, vaddr, fault_type, change_wiring) 118df8bae1dSRodney W. Grimes vm_map_t map; 119df8bae1dSRodney W. Grimes vm_offset_t vaddr; 120df8bae1dSRodney W. Grimes vm_prot_t fault_type; 121df8bae1dSRodney W. Grimes boolean_t change_wiring; 122df8bae1dSRodney W. Grimes { 123df8bae1dSRodney W. Grimes vm_object_t first_object; 124df8bae1dSRodney W. Grimes vm_offset_t first_offset; 125df8bae1dSRodney W. Grimes vm_map_entry_t entry; 126df8bae1dSRodney W. Grimes register vm_object_t object; 127df8bae1dSRodney W. Grimes register vm_offset_t offset; 12826f9a767SRodney W. Grimes vm_page_t m; 129df8bae1dSRodney W. Grimes vm_page_t first_m; 130df8bae1dSRodney W. Grimes vm_prot_t prot; 131df8bae1dSRodney W. Grimes int result; 132df8bae1dSRodney W. Grimes boolean_t wired; 133df8bae1dSRodney W. Grimes boolean_t su; 134df8bae1dSRodney W. Grimes boolean_t lookup_still_valid; 135df8bae1dSRodney W. Grimes boolean_t page_exists; 136df8bae1dSRodney W. Grimes vm_page_t old_m; 137df8bae1dSRodney W. Grimes vm_object_t next_object; 13826f9a767SRodney W. Grimes vm_page_t marray[VM_FAULT_READ]; 13926f9a767SRodney W. Grimes int spl; 14026f9a767SRodney W. Grimes int hardfault = 0; 141f6b04d2bSDavid Greenman struct vnode *vp = NULL; 142df8bae1dSRodney W. Grimes 143b8d95f16SDavid Greenman cnt.v_vm_faults++; /* needs lock XXX */ 144df8bae1dSRodney W. Grimes /* 145df8bae1dSRodney W. Grimes * Recovery actions 146df8bae1dSRodney W. Grimes */ 147df8bae1dSRodney W. Grimes #define FREE_PAGE(m) { \ 148df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 149df8bae1dSRodney W. Grimes vm_page_free(m); \ 150df8bae1dSRodney W. Grimes } 151df8bae1dSRodney W. Grimes 152df8bae1dSRodney W. Grimes #define RELEASE_PAGE(m) { \ 153df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 154f919ebdeSDavid Greenman if ((m->flags & PG_ACTIVE) == 0) vm_page_activate(m); \ 155df8bae1dSRodney W. Grimes } 156df8bae1dSRodney W. Grimes 157df8bae1dSRodney W. Grimes #define UNLOCK_MAP { \ 158df8bae1dSRodney W. Grimes if (lookup_still_valid) { \ 159df8bae1dSRodney W. Grimes vm_map_lookup_done(map, entry); \ 160df8bae1dSRodney W. Grimes lookup_still_valid = FALSE; \ 161df8bae1dSRodney W. Grimes } \ 162df8bae1dSRodney W. Grimes } 163df8bae1dSRodney W. Grimes 164df8bae1dSRodney W. Grimes #define UNLOCK_THINGS { \ 165f919ebdeSDavid Greenman vm_object_pip_wakeup(object); \ 166df8bae1dSRodney W. Grimes if (object != first_object) { \ 167df8bae1dSRodney W. Grimes FREE_PAGE(first_m); \ 168f919ebdeSDavid Greenman vm_object_pip_wakeup(first_object); \ 169df8bae1dSRodney W. Grimes } \ 170df8bae1dSRodney W. Grimes UNLOCK_MAP; \ 17124a1cce3SDavid Greenman if (vp != NULL) VOP_UNLOCK(vp); \ 172df8bae1dSRodney W. Grimes } 173df8bae1dSRodney W. Grimes 174df8bae1dSRodney W. Grimes #define UNLOCK_AND_DEALLOCATE { \ 175df8bae1dSRodney W. Grimes UNLOCK_THINGS; \ 176df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); \ 177df8bae1dSRodney W. Grimes } 178df8bae1dSRodney W. Grimes 17926f9a767SRodney W. Grimes 180df8bae1dSRodney W. Grimes RetryFault:; 181df8bae1dSRodney W. Grimes 182df8bae1dSRodney W. Grimes /* 1830d94caffSDavid Greenman * Find the backing store object and offset into it to begin the 1840d94caffSDavid Greenman * search. 185df8bae1dSRodney W. Grimes */ 186df8bae1dSRodney W. Grimes 1870d94caffSDavid Greenman if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, &first_object, 1880d94caffSDavid Greenman &first_offset, &prot, &wired, &su)) != KERN_SUCCESS) { 189df8bae1dSRodney W. Grimes return (result); 190df8bae1dSRodney W. Grimes } 191f6b04d2bSDavid Greenman 19224a1cce3SDavid Greenman vp = vnode_pager_lock(first_object); 193f6b04d2bSDavid Greenman 194df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 195df8bae1dSRodney W. Grimes 196df8bae1dSRodney W. Grimes if (wired) 197df8bae1dSRodney W. Grimes fault_type = prot; 198df8bae1dSRodney W. Grimes 199df8bae1dSRodney W. Grimes first_m = NULL; 200df8bae1dSRodney W. Grimes 201df8bae1dSRodney W. Grimes /* 2020d94caffSDavid Greenman * Make a reference to this object to prevent its disposal while we 2030d94caffSDavid Greenman * are messing with it. Once we have the reference, the map is free 2040d94caffSDavid Greenman * to be diddled. Since objects reference their shadows (and copies), 2050d94caffSDavid Greenman * they will stay around as well. 206df8bae1dSRodney W. Grimes */ 207df8bae1dSRodney W. Grimes 208df8bae1dSRodney W. Grimes first_object->ref_count++; 209df8bae1dSRodney W. Grimes first_object->paging_in_progress++; 210df8bae1dSRodney W. Grimes 211df8bae1dSRodney W. Grimes /* 212df8bae1dSRodney W. Grimes * INVARIANTS (through entire routine): 213df8bae1dSRodney W. Grimes * 2140d94caffSDavid Greenman * 1) At all times, we must either have the object lock or a busy 21524a1cce3SDavid Greenman * page in some object to prevent some other process from trying to 2160d94caffSDavid Greenman * bring in the same page. 217df8bae1dSRodney W. Grimes * 2180d94caffSDavid Greenman * Note that we cannot hold any locks during the pager access or when 2190d94caffSDavid Greenman * waiting for memory, so we use a busy page then. 220df8bae1dSRodney W. Grimes * 2210d94caffSDavid Greenman * Note also that we aren't as concerned about more than one thead 2220d94caffSDavid Greenman * attempting to pager_data_unlock the same page at once, so we don't 2230d94caffSDavid Greenman * hold the page as busy then, but do record the highest unlock value 2240d94caffSDavid Greenman * so far. [Unlock requests may also be delivered out of order.] 225df8bae1dSRodney W. Grimes * 2260d94caffSDavid Greenman * 2) Once we have a busy page, we must remove it from the pageout 2270d94caffSDavid Greenman * queues, so that the pageout daemon will not grab it away. 228df8bae1dSRodney W. Grimes * 22924a1cce3SDavid Greenman * 3) To prevent another process from racing us down the shadow chain 2300d94caffSDavid Greenman * and entering a new page in the top object before we do, we must 2310d94caffSDavid Greenman * keep a busy page in the top object while following the shadow 2320d94caffSDavid Greenman * chain. 233df8bae1dSRodney W. Grimes * 2340d94caffSDavid Greenman * 4) We must increment paging_in_progress on any object for which 2350d94caffSDavid Greenman * we have a busy page, to prevent vm_object_collapse from removing 2360d94caffSDavid Greenman * the busy page without our noticing. 237df8bae1dSRodney W. Grimes */ 238df8bae1dSRodney W. Grimes 239df8bae1dSRodney W. Grimes /* 240df8bae1dSRodney W. Grimes * Search for the page at object/offset. 241df8bae1dSRodney W. Grimes */ 242df8bae1dSRodney W. Grimes 243df8bae1dSRodney W. Grimes object = first_object; 244df8bae1dSRodney W. Grimes offset = first_offset; 245df8bae1dSRodney W. Grimes 246df8bae1dSRodney W. Grimes /* 247df8bae1dSRodney W. Grimes * See whether this page is resident 248df8bae1dSRodney W. Grimes */ 249df8bae1dSRodney W. Grimes 250df8bae1dSRodney W. Grimes while (TRUE) { 251df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 252df8bae1dSRodney W. Grimes if (m != NULL) { 253df8bae1dSRodney W. Grimes /* 2540d94caffSDavid Greenman * If the page is being brought in, wait for it and 2550d94caffSDavid Greenman * then retry. 256df8bae1dSRodney W. Grimes */ 2570d94caffSDavid Greenman if ((m->flags & PG_BUSY) || m->busy) { 25816f62314SDavid Greenman int s; 2590d94caffSDavid Greenman 260df8bae1dSRodney W. Grimes UNLOCK_THINGS; 26116f62314SDavid Greenman s = splhigh(); 2620d94caffSDavid Greenman if ((m->flags & PG_BUSY) || m->busy) { 2630d94caffSDavid Greenman m->flags |= PG_WANTED | PG_REFERENCED; 264976e77fcSDavid Greenman cnt.v_intrans++; 26524a1cce3SDavid Greenman tsleep(m, PSWP, "vmpfw", 0); 26626f9a767SRodney W. Grimes } 26716f62314SDavid Greenman splx(s); 268df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 269df8bae1dSRodney W. Grimes goto RetryFault; 270df8bae1dSRodney W. Grimes } 271f6b04d2bSDavid Greenman 2720d94caffSDavid Greenman if ((m->flags & PG_CACHE) && 2730d94caffSDavid Greenman (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 2740d94caffSDavid Greenman UNLOCK_AND_DEALLOCATE; 2750d94caffSDavid Greenman VM_WAIT; 2760d94caffSDavid Greenman goto RetryFault; 2770d94caffSDavid Greenman } 278df8bae1dSRodney W. Grimes 279df8bae1dSRodney W. Grimes /* 28024a1cce3SDavid Greenman * Mark page busy for other processes, and the pagedaemon. 281df8bae1dSRodney W. Grimes */ 282df8bae1dSRodney W. Grimes m->flags |= PG_BUSY; 283f919ebdeSDavid Greenman if (m->valid && ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 284f919ebdeSDavid Greenman m->object != kernel_object && m->object != kmem_object) { 2850d94caffSDavid Greenman goto readrest; 2860d94caffSDavid Greenman } 287df8bae1dSRodney W. Grimes break; 288df8bae1dSRodney W. Grimes } 28924a1cce3SDavid Greenman if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired)) 290df8bae1dSRodney W. Grimes || (object == first_object)) { 291df8bae1dSRodney W. Grimes 2925f55e841SDavid Greenman if (offset >= object->size) { 2935f55e841SDavid Greenman UNLOCK_AND_DEALLOCATE; 2945f55e841SDavid Greenman return (KERN_PROTECTION_FAILURE); 2955f55e841SDavid Greenman } 29624a1cce3SDavid Greenman #if 0 /* XXX is this really necessary? */ 29724a1cce3SDavid Greenman if (swap_pager_full && !object->backing_object && 29824a1cce3SDavid Greenman (object->type == OBJT_DEFAULT || 29924a1cce3SDavid Greenman (object->type == OBJT_SWAP && 30024a1cce3SDavid Greenman !vm_pager_has_page(object, offset + object->paging_offset, NULL, NULL)))) { 3010d94caffSDavid Greenman if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) { /* XXX */ 30205f0fdd2SPoul-Henning Kamp printf("Process %lu killed by vm_fault -- out of swap\n", (u_long) curproc->p_pid); 30326f9a767SRodney W. Grimes psignal(curproc, SIGKILL); 30426f9a767SRodney W. Grimes curproc->p_estcpu = 0; 30526f9a767SRodney W. Grimes curproc->p_nice = PRIO_MIN; 306da8b3304SDavid Greenman resetpriority(curproc); 30726f9a767SRodney W. Grimes } 30826f9a767SRodney W. Grimes } 30924a1cce3SDavid Greenman #endif 310df8bae1dSRodney W. Grimes /* 3110d94caffSDavid Greenman * Allocate a new page for this object/offset pair. 312df8bae1dSRodney W. Grimes */ 313f70f05f2SJohn Dyson m = vm_page_alloc(object, offset, 314f70f05f2SJohn Dyson vp?VM_ALLOC_NORMAL:(VM_ALLOC_NORMAL|VM_ALLOC_ZERO)); 315df8bae1dSRodney W. Grimes 316df8bae1dSRodney W. Grimes if (m == NULL) { 317df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 318df8bae1dSRodney W. Grimes VM_WAIT; 319df8bae1dSRodney W. Grimes goto RetryFault; 320df8bae1dSRodney W. Grimes } 321df8bae1dSRodney W. Grimes } 3220d94caffSDavid Greenman readrest: 32324a1cce3SDavid Greenman if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) { 324df8bae1dSRodney W. Grimes int rv; 32526f9a767SRodney W. Grimes int faultcount; 32626f9a767SRodney W. Grimes int reqpage; 327df8bae1dSRodney W. Grimes 328df8bae1dSRodney W. Grimes /* 3290d94caffSDavid Greenman * now we find out if any other pages should be paged 3300d94caffSDavid Greenman * in at this time this routine checks to see if the 3310d94caffSDavid Greenman * pages surrounding this fault reside in the same 3320d94caffSDavid Greenman * object as the page for this fault. If they do, 3330d94caffSDavid Greenman * then they are faulted in also into the object. The 3340d94caffSDavid Greenman * array "marray" returned contains an array of 3350d94caffSDavid Greenman * vm_page_t structs where one of them is the 3360d94caffSDavid Greenman * vm_page_t passed to the routine. The reqpage 3370d94caffSDavid Greenman * return value is the index into the marray for the 3380d94caffSDavid Greenman * vm_page_t passed to the routine. 33926f9a767SRodney W. Grimes */ 34005f0fdd2SPoul-Henning Kamp faultcount = vm_fault_additional_pages( 34105f0fdd2SPoul-Henning Kamp first_object, first_offset, 34205f0fdd2SPoul-Henning Kamp m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, 34305f0fdd2SPoul-Henning Kamp marray, &reqpage); 344df8bae1dSRodney W. Grimes 345df8bae1dSRodney W. Grimes /* 3460d94caffSDavid Greenman * Call the pager to retrieve the data, if any, after 3470d94caffSDavid Greenman * releasing the lock on the map. 348df8bae1dSRodney W. Grimes */ 349df8bae1dSRodney W. Grimes UNLOCK_MAP; 350df8bae1dSRodney W. Grimes 35126f9a767SRodney W. Grimes rv = faultcount ? 35224a1cce3SDavid Greenman vm_pager_get_pages(object, marray, faultcount, 35324a1cce3SDavid Greenman reqpage) : VM_PAGER_FAIL; 35426f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 355df8bae1dSRodney W. Grimes /* 3560d94caffSDavid Greenman * Found the page. Leave it busy while we play 3570d94caffSDavid Greenman * with it. 358df8bae1dSRodney W. Grimes */ 35926f9a767SRodney W. Grimes 360df8bae1dSRodney W. Grimes /* 3610d94caffSDavid Greenman * Relookup in case pager changed page. Pager 3620d94caffSDavid Greenman * is responsible for disposition of old page 3630d94caffSDavid Greenman * if moved. 364df8bae1dSRodney W. Grimes */ 365df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 366f6b04d2bSDavid Greenman if( !m) { 367f6b04d2bSDavid Greenman UNLOCK_AND_DEALLOCATE; 368f6b04d2bSDavid Greenman goto RetryFault; 369f6b04d2bSDavid Greenman } 370f6b04d2bSDavid Greenman 371df8bae1dSRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 372f6b04d2bSDavid Greenman m->valid = VM_PAGE_BITS_ALL; 37326f9a767SRodney W. Grimes hardfault++; 374df8bae1dSRodney W. Grimes break; 375df8bae1dSRodney W. Grimes } 376df8bae1dSRodney W. Grimes /* 3770d94caffSDavid Greenman * Remove the bogus page (which does not exist at this 3780d94caffSDavid Greenman * object/offset); before doing so, we must get back 3790d94caffSDavid Greenman * our object lock to preserve our invariant. 380df8bae1dSRodney W. Grimes * 38124a1cce3SDavid Greenman * Also wake up any other process that may want to bring 3820d94caffSDavid Greenman * in this page. 383df8bae1dSRodney W. Grimes * 3840d94caffSDavid Greenman * If this is the top-level object, we must leave the 38524a1cce3SDavid Greenman * busy page to prevent another process from rushing 3860d94caffSDavid Greenman * past us, and inserting the page in that object at 3870d94caffSDavid Greenman * the same time that we are. 388df8bae1dSRodney W. Grimes */ 38926f9a767SRodney W. Grimes 390a83c285cSDavid Greenman if (rv == VM_PAGER_ERROR) 391a83c285cSDavid Greenman printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 392a83c285cSDavid Greenman curproc->p_pid); 39326f9a767SRodney W. Grimes /* 394a83c285cSDavid Greenman * Data outside the range of the pager or an I/O error 39526f9a767SRodney W. Grimes */ 396a83c285cSDavid Greenman /* 3970d94caffSDavid Greenman * XXX - the check for kernel_map is a kludge to work 3980d94caffSDavid Greenman * around having the machine panic on a kernel space 3990d94caffSDavid Greenman * fault w/ I/O error. 400a83c285cSDavid Greenman */ 401a83c285cSDavid Greenman if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 40226f9a767SRodney W. Grimes FREE_PAGE(m); 40326f9a767SRodney W. Grimes UNLOCK_AND_DEALLOCATE; 404a83c285cSDavid Greenman return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 40526f9a767SRodney W. Grimes } 406df8bae1dSRodney W. Grimes if (object != first_object) { 407df8bae1dSRodney W. Grimes FREE_PAGE(m); 40826f9a767SRodney W. Grimes /* 40926f9a767SRodney W. Grimes * XXX - we cannot just fall out at this 41026f9a767SRodney W. Grimes * point, m has been freed and is invalid! 41126f9a767SRodney W. Grimes */ 412df8bae1dSRodney W. Grimes } 413df8bae1dSRodney W. Grimes } 414df8bae1dSRodney W. Grimes /* 41524a1cce3SDavid Greenman * We get here if the object has default pager (or unwiring) or the 4160d94caffSDavid Greenman * pager doesn't have the page. 417df8bae1dSRodney W. Grimes */ 418df8bae1dSRodney W. Grimes if (object == first_object) 419df8bae1dSRodney W. Grimes first_m = m; 420df8bae1dSRodney W. Grimes 421df8bae1dSRodney W. Grimes /* 4220d94caffSDavid Greenman * Move on to the next object. Lock the next object before 4230d94caffSDavid Greenman * unlocking the current one. 424df8bae1dSRodney W. Grimes */ 425df8bae1dSRodney W. Grimes 42624a1cce3SDavid Greenman offset += object->backing_object_offset; 42724a1cce3SDavid Greenman next_object = object->backing_object; 428df8bae1dSRodney W. Grimes if (next_object == NULL) { 429df8bae1dSRodney W. Grimes /* 4300d94caffSDavid Greenman * If there's no object left, fill the page in the top 4310d94caffSDavid Greenman * object with zeros. 432df8bae1dSRodney W. Grimes */ 433df8bae1dSRodney W. Grimes if (object != first_object) { 434f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 435df8bae1dSRodney W. Grimes 436df8bae1dSRodney W. Grimes object = first_object; 437df8bae1dSRodney W. Grimes offset = first_offset; 438df8bae1dSRodney W. Grimes m = first_m; 439df8bae1dSRodney W. Grimes } 440df8bae1dSRodney W. Grimes first_m = NULL; 441df8bae1dSRodney W. Grimes 442f70f05f2SJohn Dyson if ((m->flags & PG_ZERO) == 0) 443df8bae1dSRodney W. Grimes vm_page_zero_fill(m); 4440d94caffSDavid Greenman m->valid = VM_PAGE_BITS_ALL; 445df8bae1dSRodney W. Grimes cnt.v_zfod++; 446df8bae1dSRodney W. Grimes break; 4470d94caffSDavid Greenman } else { 44826f9a767SRodney W. Grimes if (object != first_object) { 449f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 450c0503609SDavid Greenman } 451df8bae1dSRodney W. Grimes object = next_object; 452df8bae1dSRodney W. Grimes object->paging_in_progress++; 453df8bae1dSRodney W. Grimes } 454df8bae1dSRodney W. Grimes } 455df8bae1dSRodney W. Grimes 456f919ebdeSDavid Greenman if ((m->flags & PG_BUSY) == 0) 457f919ebdeSDavid Greenman panic("vm_fault: not busy after main loop"); 458df8bae1dSRodney W. Grimes 459df8bae1dSRodney W. Grimes /* 4600d94caffSDavid Greenman * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 461df8bae1dSRodney W. Grimes * is held.] 462df8bae1dSRodney W. Grimes */ 463df8bae1dSRodney W. Grimes 464df8bae1dSRodney W. Grimes old_m = m; /* save page that would be copied */ 465df8bae1dSRodney W. Grimes 466df8bae1dSRodney W. Grimes /* 4670d94caffSDavid Greenman * If the page is being written, but isn't already owned by the 4680d94caffSDavid Greenman * top-level object, we have to copy it into a new page owned by the 4690d94caffSDavid Greenman * top-level object. 470df8bae1dSRodney W. Grimes */ 471df8bae1dSRodney W. Grimes 472df8bae1dSRodney W. Grimes if (object != first_object) { 473df8bae1dSRodney W. Grimes /* 4740d94caffSDavid Greenman * We only really need to copy if we want to write it. 475df8bae1dSRodney W. Grimes */ 476df8bae1dSRodney W. Grimes 477df8bae1dSRodney W. Grimes if (fault_type & VM_PROT_WRITE) { 478df8bae1dSRodney W. Grimes 479df8bae1dSRodney W. Grimes /* 4800d94caffSDavid Greenman * If we try to collapse first_object at this point, 4810d94caffSDavid Greenman * we may deadlock when we try to get the lock on an 4820d94caffSDavid Greenman * intermediate object (since we have the bottom 4830d94caffSDavid Greenman * object locked). We can't unlock the bottom object, 4840d94caffSDavid Greenman * because the page we found may move (by collapse) if 4850d94caffSDavid Greenman * we do. 486df8bae1dSRodney W. Grimes * 4870d94caffSDavid Greenman * Instead, we first copy the page. Then, when we have 4880d94caffSDavid Greenman * no more use for the bottom object, we unlock it and 4890d94caffSDavid Greenman * try to collapse. 490df8bae1dSRodney W. Grimes * 4910d94caffSDavid Greenman * Note that we copy the page even if we didn't need 4920d94caffSDavid Greenman * to... that's the breaks. 493df8bae1dSRodney W. Grimes */ 494df8bae1dSRodney W. Grimes 495df8bae1dSRodney W. Grimes /* 4960d94caffSDavid Greenman * We already have an empty page in first_object - use 4970d94caffSDavid Greenman * it. 498df8bae1dSRodney W. Grimes */ 499df8bae1dSRodney W. Grimes 500df8bae1dSRodney W. Grimes vm_page_copy(m, first_m); 5010d94caffSDavid Greenman first_m->valid = VM_PAGE_BITS_ALL; 502df8bae1dSRodney W. Grimes 503df8bae1dSRodney W. Grimes /* 5040d94caffSDavid Greenman * If another map is truly sharing this page with us, 5050d94caffSDavid Greenman * we have to flush all uses of the original page, 5060d94caffSDavid Greenman * since we can't distinguish those which want the 5070d94caffSDavid Greenman * original from those which need the new copy. 508df8bae1dSRodney W. Grimes * 5090d94caffSDavid Greenman * XXX If we know that only one map has access to this 5100d94caffSDavid Greenman * page, then we could avoid the pmap_page_protect() 5110d94caffSDavid Greenman * call. 512df8bae1dSRodney W. Grimes */ 513df8bae1dSRodney W. Grimes 514f919ebdeSDavid Greenman if ((m->flags & PG_ACTIVE) == 0) 515df8bae1dSRodney W. Grimes vm_page_activate(m); 516f919ebdeSDavid Greenman vm_page_protect(m, VM_PROT_NONE); 517df8bae1dSRodney W. Grimes 518df8bae1dSRodney W. Grimes /* 519df8bae1dSRodney W. Grimes * We no longer need the old page or object. 520df8bae1dSRodney W. Grimes */ 521df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 522f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 523df8bae1dSRodney W. Grimes 524df8bae1dSRodney W. Grimes /* 525df8bae1dSRodney W. Grimes * Only use the new page below... 526df8bae1dSRodney W. Grimes */ 527df8bae1dSRodney W. Grimes 528df8bae1dSRodney W. Grimes cnt.v_cow_faults++; 529df8bae1dSRodney W. Grimes m = first_m; 530df8bae1dSRodney W. Grimes object = first_object; 531df8bae1dSRodney W. Grimes offset = first_offset; 532df8bae1dSRodney W. Grimes 533df8bae1dSRodney W. Grimes /* 5340d94caffSDavid Greenman * Now that we've gotten the copy out of the way, 5350d94caffSDavid Greenman * let's try to collapse the top object. 53624a1cce3SDavid Greenman * 537df8bae1dSRodney W. Grimes * But we have to play ugly games with 538df8bae1dSRodney W. Grimes * paging_in_progress to do that... 539df8bae1dSRodney W. Grimes */ 540f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 541df8bae1dSRodney W. Grimes vm_object_collapse(object); 542df8bae1dSRodney W. Grimes object->paging_in_progress++; 5430d94caffSDavid Greenman } else { 544df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 545df8bae1dSRodney W. Grimes m->flags |= PG_COPYONWRITE; 546df8bae1dSRodney W. Grimes } 547df8bae1dSRodney W. Grimes } 548df8bae1dSRodney W. Grimes 549df8bae1dSRodney W. Grimes /* 5500d94caffSDavid Greenman * We must verify that the maps have not changed since our last 5510d94caffSDavid Greenman * lookup. 552df8bae1dSRodney W. Grimes */ 553df8bae1dSRodney W. Grimes 554df8bae1dSRodney W. Grimes if (!lookup_still_valid) { 555df8bae1dSRodney W. Grimes vm_object_t retry_object; 556df8bae1dSRodney W. Grimes vm_offset_t retry_offset; 557df8bae1dSRodney W. Grimes vm_prot_t retry_prot; 558df8bae1dSRodney W. Grimes 559df8bae1dSRodney W. Grimes /* 5600d94caffSDavid Greenman * Since map entries may be pageable, make sure we can take a 5610d94caffSDavid Greenman * page fault on them. 562df8bae1dSRodney W. Grimes */ 563df8bae1dSRodney W. Grimes 564df8bae1dSRodney W. Grimes /* 56524a1cce3SDavid Greenman * To avoid trying to write_lock the map while another process 5660d94caffSDavid Greenman * has it read_locked (in vm_map_pageable), we do not try for 5670d94caffSDavid Greenman * write permission. If the page is still writable, we will 5680d94caffSDavid Greenman * get write permission. If it is not, or has been marked 5690d94caffSDavid Greenman * needs_copy, we enter the mapping without write permission, 5700d94caffSDavid Greenman * and will merely take another fault. 571df8bae1dSRodney W. Grimes */ 5720d94caffSDavid Greenman result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 5730d94caffSDavid Greenman &entry, &retry_object, &retry_offset, &retry_prot, &wired, &su); 574df8bae1dSRodney W. Grimes 575df8bae1dSRodney W. Grimes /* 5760d94caffSDavid Greenman * If we don't need the page any longer, put it on the active 5770d94caffSDavid Greenman * list (the easiest thing to do here). If no one needs it, 5780d94caffSDavid Greenman * pageout will grab it eventually. 579df8bae1dSRodney W. Grimes */ 580df8bae1dSRodney W. Grimes 581df8bae1dSRodney W. Grimes if (result != KERN_SUCCESS) { 582df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 583df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 584df8bae1dSRodney W. Grimes return (result); 585df8bae1dSRodney W. Grimes } 586df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 587df8bae1dSRodney W. Grimes 588df8bae1dSRodney W. Grimes if ((retry_object != first_object) || 589df8bae1dSRodney W. Grimes (retry_offset != first_offset)) { 590df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 591df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 592df8bae1dSRodney W. Grimes goto RetryFault; 593df8bae1dSRodney W. Grimes } 594df8bae1dSRodney W. Grimes /* 5950d94caffSDavid Greenman * Check whether the protection has changed or the object has 5960d94caffSDavid Greenman * been copied while we left the map unlocked. Changing from 5970d94caffSDavid Greenman * read to write permission is OK - we leave the page 5980d94caffSDavid Greenman * write-protected, and catch the write fault. Changing from 5990d94caffSDavid Greenman * write to read permission means that we can't mark the page 6000d94caffSDavid Greenman * write-enabled after all. 601df8bae1dSRodney W. Grimes */ 602df8bae1dSRodney W. Grimes prot &= retry_prot; 603df8bae1dSRodney W. Grimes if (m->flags & PG_COPYONWRITE) 604df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 605df8bae1dSRodney W. Grimes } 606df8bae1dSRodney W. Grimes /* 6070d94caffSDavid Greenman * (the various bits we're fiddling with here are locked by the 6080d94caffSDavid Greenman * object's lock) 609df8bae1dSRodney W. Grimes */ 610df8bae1dSRodney W. Grimes 611df8bae1dSRodney W. Grimes /* XXX This distorts the meaning of the copy_on_write bit */ 612df8bae1dSRodney W. Grimes 613df8bae1dSRodney W. Grimes if (prot & VM_PROT_WRITE) 614df8bae1dSRodney W. Grimes m->flags &= ~PG_COPYONWRITE; 615df8bae1dSRodney W. Grimes 616df8bae1dSRodney W. Grimes /* 6170d94caffSDavid Greenman * It's critically important that a wired-down page be faulted only 6180d94caffSDavid Greenman * once in each map for which it is wired. 619df8bae1dSRodney W. Grimes */ 620df8bae1dSRodney W. Grimes 621df8bae1dSRodney W. Grimes /* 6220d94caffSDavid Greenman * Put this page into the physical map. We had to do the unlock above 6230d94caffSDavid Greenman * because pmap_enter may cause other faults. We don't put the page 6240d94caffSDavid Greenman * back on the active queue until later so that the page-out daemon 6250d94caffSDavid Greenman * won't find us (yet). 626df8bae1dSRodney W. Grimes */ 627df8bae1dSRodney W. Grimes 6282ddba215SDavid Greenman if (prot & VM_PROT_WRITE) { 629f919ebdeSDavid Greenman m->flags |= PG_WRITEABLE; 630f6b04d2bSDavid Greenman m->object->flags |= OBJ_WRITEABLE; 6312ddba215SDavid Greenman /* 6322ddba215SDavid Greenman * If the fault is a write, we know that this page is being 6332ddba215SDavid Greenman * written NOW. This will save on the pmap_is_modified() calls 6342ddba215SDavid Greenman * later. 6352ddba215SDavid Greenman */ 6362ddba215SDavid Greenman if (fault_type & VM_PROT_WRITE) { 6372ddba215SDavid Greenman m->dirty = VM_PAGE_BITS_ALL; 6382ddba215SDavid Greenman } 6392ddba215SDavid Greenman } 640f6b04d2bSDavid Greenman 641f70f05f2SJohn Dyson m->flags |= PG_MAPPED|PG_REFERENCED; 642ced399eeSJohn Dyson m->flags &= ~PG_ZERO; 643f919ebdeSDavid Greenman 644df8bae1dSRodney W. Grimes pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 6451e9122e5SDavid Greenman #if 0 646f6b04d2bSDavid Greenman if (change_wiring == 0 && wired == 0) 6476d40c3d3SDavid Greenman pmap_prefault(map->pmap, vaddr, entry, first_object); 6481e9122e5SDavid Greenman #endif 649df8bae1dSRodney W. Grimes 650df8bae1dSRodney W. Grimes /* 6510d94caffSDavid Greenman * If the page is not wired down, then put it where the pageout daemon 6520d94caffSDavid Greenman * can find it. 653df8bae1dSRodney W. Grimes */ 654df8bae1dSRodney W. Grimes if (change_wiring) { 655df8bae1dSRodney W. Grimes if (wired) 656df8bae1dSRodney W. Grimes vm_page_wire(m); 657df8bae1dSRodney W. Grimes else 658df8bae1dSRodney W. Grimes vm_page_unwire(m); 6590d94caffSDavid Greenman } else { 660f919ebdeSDavid Greenman if ((m->flags & PG_ACTIVE) == 0) 661df8bae1dSRodney W. Grimes vm_page_activate(m); 66226f9a767SRodney W. Grimes } 66326f9a767SRodney W. Grimes 664a1f6d91cSDavid Greenman if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 66526f9a767SRodney W. Grimes if (hardfault) { 66626f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_majflt++; 66726f9a767SRodney W. Grimes } else { 66826f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_minflt++; 66926f9a767SRodney W. Grimes } 67026f9a767SRodney W. Grimes } 671df8bae1dSRodney W. Grimes 672df8bae1dSRodney W. Grimes /* 673df8bae1dSRodney W. Grimes * Unlock everything, and return 674df8bae1dSRodney W. Grimes */ 675df8bae1dSRodney W. Grimes 676df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 677df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 678df8bae1dSRodney W. Grimes 679df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 680df8bae1dSRodney W. Grimes 681df8bae1dSRodney W. Grimes } 682df8bae1dSRodney W. Grimes 683df8bae1dSRodney W. Grimes /* 684df8bae1dSRodney W. Grimes * vm_fault_wire: 685df8bae1dSRodney W. Grimes * 686df8bae1dSRodney W. Grimes * Wire down a range of virtual addresses in a map. 687df8bae1dSRodney W. Grimes */ 688df8bae1dSRodney W. Grimes int 689df8bae1dSRodney W. Grimes vm_fault_wire(map, start, end) 690df8bae1dSRodney W. Grimes vm_map_t map; 691df8bae1dSRodney W. Grimes vm_offset_t start, end; 692df8bae1dSRodney W. Grimes { 69326f9a767SRodney W. Grimes 694df8bae1dSRodney W. Grimes register vm_offset_t va; 695df8bae1dSRodney W. Grimes register pmap_t pmap; 696df8bae1dSRodney W. Grimes int rv; 697df8bae1dSRodney W. Grimes 698df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 699df8bae1dSRodney W. Grimes 700df8bae1dSRodney W. Grimes /* 7010d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 7020d94caffSDavid Greenman * not fault, so that page tables and such can be locked down as well. 703df8bae1dSRodney W. Grimes */ 704df8bae1dSRodney W. Grimes 705df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, FALSE); 706df8bae1dSRodney W. Grimes 707df8bae1dSRodney W. Grimes /* 7080d94caffSDavid Greenman * We simulate a fault to get the page and enter it in the physical 7090d94caffSDavid Greenman * map. 710df8bae1dSRodney W. Grimes */ 711df8bae1dSRodney W. Grimes 712df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 7136d40c3d3SDavid Greenman 714a1f6d91cSDavid Greenman while( curproc != pageproc && 7156d40c3d3SDavid Greenman (cnt.v_free_count <= cnt.v_pageout_free_min)) 7166d40c3d3SDavid Greenman VM_WAIT; 7176d40c3d3SDavid Greenman 718a1f6d91cSDavid Greenman rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE); 719df8bae1dSRodney W. Grimes if (rv) { 720df8bae1dSRodney W. Grimes if (va != start) 721df8bae1dSRodney W. Grimes vm_fault_unwire(map, start, va); 722df8bae1dSRodney W. Grimes return (rv); 723df8bae1dSRodney W. Grimes } 724df8bae1dSRodney W. Grimes } 725df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 726df8bae1dSRodney W. Grimes } 727df8bae1dSRodney W. Grimes 728df8bae1dSRodney W. Grimes 729df8bae1dSRodney W. Grimes /* 730df8bae1dSRodney W. Grimes * vm_fault_unwire: 731df8bae1dSRodney W. Grimes * 732df8bae1dSRodney W. Grimes * Unwire a range of virtual addresses in a map. 733df8bae1dSRodney W. Grimes */ 73426f9a767SRodney W. Grimes void 73526f9a767SRodney W. Grimes vm_fault_unwire(map, start, end) 736df8bae1dSRodney W. Grimes vm_map_t map; 737df8bae1dSRodney W. Grimes vm_offset_t start, end; 738df8bae1dSRodney W. Grimes { 739df8bae1dSRodney W. Grimes 740df8bae1dSRodney W. Grimes register vm_offset_t va, pa; 741df8bae1dSRodney W. Grimes register pmap_t pmap; 742df8bae1dSRodney W. Grimes 743df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 744df8bae1dSRodney W. Grimes 745df8bae1dSRodney W. Grimes /* 7460d94caffSDavid Greenman * Since the pages are wired down, we must be able to get their 7470d94caffSDavid Greenman * mappings from the physical map system. 748df8bae1dSRodney W. Grimes */ 749df8bae1dSRodney W. Grimes 750df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 751df8bae1dSRodney W. Grimes pa = pmap_extract(pmap, va); 752df8bae1dSRodney W. Grimes if (pa == (vm_offset_t) 0) { 753df8bae1dSRodney W. Grimes panic("unwire: page not in pmap"); 754df8bae1dSRodney W. Grimes } 755df8bae1dSRodney W. Grimes pmap_change_wiring(pmap, va, FALSE); 756df8bae1dSRodney W. Grimes vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 757df8bae1dSRodney W. Grimes } 758df8bae1dSRodney W. Grimes 759df8bae1dSRodney W. Grimes /* 7600d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 7610d94caffSDavid Greenman * fault, so that page tables and such may be unwired themselves. 762df8bae1dSRodney W. Grimes */ 763df8bae1dSRodney W. Grimes 764df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, TRUE); 765df8bae1dSRodney W. Grimes 766df8bae1dSRodney W. Grimes } 767df8bae1dSRodney W. Grimes 768df8bae1dSRodney W. Grimes /* 769df8bae1dSRodney W. Grimes * Routine: 770df8bae1dSRodney W. Grimes * vm_fault_copy_entry 771df8bae1dSRodney W. Grimes * Function: 772df8bae1dSRodney W. Grimes * Copy all of the pages from a wired-down map entry to another. 773df8bae1dSRodney W. Grimes * 774df8bae1dSRodney W. Grimes * In/out conditions: 775df8bae1dSRodney W. Grimes * The source and destination maps must be locked for write. 776df8bae1dSRodney W. Grimes * The source map entry must be wired down (or be a sharing map 777df8bae1dSRodney W. Grimes * entry corresponding to a main map entry that is wired down). 778df8bae1dSRodney W. Grimes */ 779df8bae1dSRodney W. Grimes 78026f9a767SRodney W. Grimes void 78126f9a767SRodney W. Grimes vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 782df8bae1dSRodney W. Grimes vm_map_t dst_map; 783df8bae1dSRodney W. Grimes vm_map_t src_map; 784df8bae1dSRodney W. Grimes vm_map_entry_t dst_entry; 785df8bae1dSRodney W. Grimes vm_map_entry_t src_entry; 786df8bae1dSRodney W. Grimes { 787df8bae1dSRodney W. Grimes vm_object_t dst_object; 788df8bae1dSRodney W. Grimes vm_object_t src_object; 789df8bae1dSRodney W. Grimes vm_offset_t dst_offset; 790df8bae1dSRodney W. Grimes vm_offset_t src_offset; 791df8bae1dSRodney W. Grimes vm_prot_t prot; 792df8bae1dSRodney W. Grimes vm_offset_t vaddr; 793df8bae1dSRodney W. Grimes vm_page_t dst_m; 794df8bae1dSRodney W. Grimes vm_page_t src_m; 795df8bae1dSRodney W. Grimes 796df8bae1dSRodney W. Grimes #ifdef lint 797df8bae1dSRodney W. Grimes src_map++; 7980d94caffSDavid Greenman #endif /* lint */ 799df8bae1dSRodney W. Grimes 800df8bae1dSRodney W. Grimes src_object = src_entry->object.vm_object; 801df8bae1dSRodney W. Grimes src_offset = src_entry->offset; 802df8bae1dSRodney W. Grimes 803df8bae1dSRodney W. Grimes /* 8040d94caffSDavid Greenman * Create the top-level object for the destination entry. (Doesn't 8050d94caffSDavid Greenman * actually shadow anything - we copy the pages directly.) 806df8bae1dSRodney W. Grimes */ 80724a1cce3SDavid Greenman dst_object = vm_object_allocate(OBJT_DEFAULT, 808df8bae1dSRodney W. Grimes (vm_size_t) (dst_entry->end - dst_entry->start)); 809df8bae1dSRodney W. Grimes 810df8bae1dSRodney W. Grimes dst_entry->object.vm_object = dst_object; 811df8bae1dSRodney W. Grimes dst_entry->offset = 0; 812df8bae1dSRodney W. Grimes 813df8bae1dSRodney W. Grimes prot = dst_entry->max_protection; 814df8bae1dSRodney W. Grimes 815df8bae1dSRodney W. Grimes /* 8160d94caffSDavid Greenman * Loop through all of the pages in the entry's range, copying each 8170d94caffSDavid Greenman * one from the source object (it should be there) to the destination 8180d94caffSDavid Greenman * object. 819df8bae1dSRodney W. Grimes */ 820df8bae1dSRodney W. Grimes for (vaddr = dst_entry->start, dst_offset = 0; 821df8bae1dSRodney W. Grimes vaddr < dst_entry->end; 822df8bae1dSRodney W. Grimes vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 823df8bae1dSRodney W. Grimes 824df8bae1dSRodney W. Grimes /* 825df8bae1dSRodney W. Grimes * Allocate a page in the destination object 826df8bae1dSRodney W. Grimes */ 827df8bae1dSRodney W. Grimes do { 8286d40c3d3SDavid Greenman dst_m = vm_page_alloc(dst_object, dst_offset, VM_ALLOC_NORMAL); 829df8bae1dSRodney W. Grimes if (dst_m == NULL) { 830df8bae1dSRodney W. Grimes VM_WAIT; 831df8bae1dSRodney W. Grimes } 832df8bae1dSRodney W. Grimes } while (dst_m == NULL); 833df8bae1dSRodney W. Grimes 834df8bae1dSRodney W. Grimes /* 835df8bae1dSRodney W. Grimes * Find the page in the source object, and copy it in. 8360d94caffSDavid Greenman * (Because the source is wired down, the page will be in 8370d94caffSDavid Greenman * memory.) 838df8bae1dSRodney W. Grimes */ 839df8bae1dSRodney W. Grimes src_m = vm_page_lookup(src_object, dst_offset + src_offset); 840df8bae1dSRodney W. Grimes if (src_m == NULL) 841df8bae1dSRodney W. Grimes panic("vm_fault_copy_wired: page missing"); 842df8bae1dSRodney W. Grimes 843df8bae1dSRodney W. Grimes vm_page_copy(src_m, dst_m); 844df8bae1dSRodney W. Grimes 845df8bae1dSRodney W. Grimes /* 846df8bae1dSRodney W. Grimes * Enter it in the pmap... 847df8bae1dSRodney W. Grimes */ 848df8bae1dSRodney W. Grimes 849ced399eeSJohn Dyson dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 850ccbb2f72SJohn Dyson dst_m->flags &= ~PG_ZERO; 851df8bae1dSRodney W. Grimes pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 852df8bae1dSRodney W. Grimes prot, FALSE); 853df8bae1dSRodney W. Grimes 854df8bae1dSRodney W. Grimes /* 855df8bae1dSRodney W. Grimes * Mark it no longer busy, and put it on the active list. 856df8bae1dSRodney W. Grimes */ 857df8bae1dSRodney W. Grimes vm_page_activate(dst_m); 858df8bae1dSRodney W. Grimes PAGE_WAKEUP(dst_m); 859df8bae1dSRodney W. Grimes } 860df8bae1dSRodney W. Grimes } 86126f9a767SRodney W. Grimes 86226f9a767SRodney W. Grimes 86326f9a767SRodney W. Grimes /* 86426f9a767SRodney W. Grimes * looks page up in shadow chain 86526f9a767SRodney W. Grimes */ 86626f9a767SRodney W. Grimes 86726f9a767SRodney W. Grimes int 86826f9a767SRodney W. Grimes vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) 86926f9a767SRodney W. Grimes vm_object_t object; 87026f9a767SRodney W. Grimes vm_offset_t offset; 87126f9a767SRodney W. Grimes vm_object_t *rtobject; 87226f9a767SRodney W. Grimes vm_offset_t *rtoffset; 87326f9a767SRodney W. Grimes vm_page_t *rtm; 87426f9a767SRodney W. Grimes { 87526f9a767SRodney W. Grimes vm_page_t m; 87626f9a767SRodney W. Grimes 87726f9a767SRodney W. Grimes *rtm = 0; 87826f9a767SRodney W. Grimes *rtoffset = 0; 87926f9a767SRodney W. Grimes 88026f9a767SRodney W. Grimes while (!(m = vm_page_lookup(object, offset))) { 881ced399eeSJohn Dyson if (vm_pager_has_page(object, 882ced399eeSJohn Dyson object->paging_offset + offset, NULL, NULL)) { 88326f9a767SRodney W. Grimes *rtobject = object; 88426f9a767SRodney W. Grimes *rtoffset = offset; 88526f9a767SRodney W. Grimes return 1; 88626f9a767SRodney W. Grimes } 887170db9c6SJohn Dyson if (!object->backing_object || (object == *rtobject)) 88826f9a767SRodney W. Grimes return 0; 88926f9a767SRodney W. Grimes else { 89024a1cce3SDavid Greenman offset += object->backing_object_offset; 89124a1cce3SDavid Greenman object = object->backing_object; 89226f9a767SRodney W. Grimes } 89326f9a767SRodney W. Grimes } 89426f9a767SRodney W. Grimes *rtobject = object; 89526f9a767SRodney W. Grimes *rtoffset = offset; 89626f9a767SRodney W. Grimes *rtm = m; 89726f9a767SRodney W. Grimes return 1; 89826f9a767SRodney W. Grimes } 89926f9a767SRodney W. Grimes 90026f9a767SRodney W. Grimes /* 90126f9a767SRodney W. Grimes * This routine checks around the requested page for other pages that 90226f9a767SRodney W. Grimes * might be able to be faulted in. 90326f9a767SRodney W. Grimes * 90426f9a767SRodney W. Grimes * Inputs: 90526f9a767SRodney W. Grimes * first_object, first_offset, m, rbehind, rahead 90626f9a767SRodney W. Grimes * 90726f9a767SRodney W. Grimes * Outputs: 90826f9a767SRodney W. Grimes * marray (array of vm_page_t), reqpage (index of requested page) 90926f9a767SRodney W. Grimes * 91026f9a767SRodney W. Grimes * Return value: 91126f9a767SRodney W. Grimes * number of pages in marray 91226f9a767SRodney W. Grimes */ 91326f9a767SRodney W. Grimes int 91426f9a767SRodney W. Grimes vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) 91526f9a767SRodney W. Grimes vm_object_t first_object; 91626f9a767SRodney W. Grimes vm_offset_t first_offset; 91726f9a767SRodney W. Grimes vm_page_t m; 91826f9a767SRodney W. Grimes int rbehind; 91926f9a767SRodney W. Grimes int raheada; 92026f9a767SRodney W. Grimes vm_page_t *marray; 92126f9a767SRodney W. Grimes int *reqpage; 92226f9a767SRodney W. Grimes { 92326f9a767SRodney W. Grimes int i; 92426f9a767SRodney W. Grimes vm_object_t object; 92526f9a767SRodney W. Grimes vm_offset_t offset, startoffset, endoffset, toffset, size; 92626f9a767SRodney W. Grimes vm_object_t rtobject; 92726f9a767SRodney W. Grimes vm_page_t rtm; 92826f9a767SRodney W. Grimes vm_offset_t rtoffset; 92926f9a767SRodney W. Grimes vm_offset_t offsetdiff; 93026f9a767SRodney W. Grimes int rahead; 93126f9a767SRodney W. Grimes int treqpage; 932170db9c6SJohn Dyson int cbehind, cahead; 93326f9a767SRodney W. Grimes 93426f9a767SRodney W. Grimes object = m->object; 93526f9a767SRodney W. Grimes offset = m->offset; 93626f9a767SRodney W. Grimes 93726f9a767SRodney W. Grimes offsetdiff = offset - first_offset; 93826f9a767SRodney W. Grimes 93926f9a767SRodney W. Grimes /* 94026f9a767SRodney W. Grimes * if the requested page is not available, then give up now 94126f9a767SRodney W. Grimes */ 94226f9a767SRodney W. Grimes 943170db9c6SJohn Dyson if (!vm_pager_has_page(object, 944170db9c6SJohn Dyson object->paging_offset + offset, &cbehind, &cahead)) 94526f9a767SRodney W. Grimes return 0; 94626f9a767SRodney W. Grimes 947170db9c6SJohn Dyson if (object->backing_object == NULL) { 948170db9c6SJohn Dyson if (raheada > cahead) { 949170db9c6SJohn Dyson raheada = cahead; 950170db9c6SJohn Dyson } 951170db9c6SJohn Dyson if (rbehind > cbehind) { 952170db9c6SJohn Dyson rbehind = cbehind; 953170db9c6SJohn Dyson } 954170db9c6SJohn Dyson } 955170db9c6SJohn Dyson 95626f9a767SRodney W. Grimes /* 95726f9a767SRodney W. Grimes * try to do any readahead that we might have free pages for. 95826f9a767SRodney W. Grimes */ 95926f9a767SRodney W. Grimes rahead = raheada; 960ccbb2f72SJohn Dyson if ((rahead + rbehind) > 961ccbb2f72SJohn Dyson ((cnt.v_free_count + cnt.v_cache_count) - 2*cnt.v_free_reserved)) { 962ccbb2f72SJohn Dyson rahead = ((cnt.v_free_count + cnt.v_cache_count) - 963ccbb2f72SJohn Dyson 2*cnt.v_free_reserved) / 2; 9640d94caffSDavid Greenman rbehind = rahead; 9650d94caffSDavid Greenman if (!rahead) 966f919ebdeSDavid Greenman pagedaemon_wakeup(); 96726f9a767SRodney W. Grimes } 96826f9a767SRodney W. Grimes /* 96926f9a767SRodney W. Grimes * if we don't have any free pages, then just read one page. 97026f9a767SRodney W. Grimes */ 97126f9a767SRodney W. Grimes if (rahead <= 0) { 97226f9a767SRodney W. Grimes *reqpage = 0; 97326f9a767SRodney W. Grimes marray[0] = m; 97426f9a767SRodney W. Grimes return 1; 97526f9a767SRodney W. Grimes } 97626f9a767SRodney W. Grimes /* 9770d94caffSDavid Greenman * scan backward for the read behind pages -- in memory or on disk not 9780d94caffSDavid Greenman * in same object 97926f9a767SRodney W. Grimes */ 98026f9a767SRodney W. Grimes toffset = offset - NBPG; 981317205caSDavid Greenman if (toffset < offset) { 98226f9a767SRodney W. Grimes if (rbehind * NBPG > offset) 98326f9a767SRodney W. Grimes rbehind = offset / NBPG; 98426f9a767SRodney W. Grimes startoffset = offset - rbehind * NBPG; 98526f9a767SRodney W. Grimes while (toffset >= startoffset) { 986170db9c6SJohn Dyson rtobject = object; 987ccbb2f72SJohn Dyson if (!vm_fault_page_lookup(first_object, 988ccbb2f72SJohn Dyson toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 98926f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 99026f9a767SRodney W. Grimes startoffset = toffset + NBPG; 99126f9a767SRodney W. Grimes break; 99226f9a767SRodney W. Grimes } 99326f9a767SRodney W. Grimes if (toffset == 0) 99426f9a767SRodney W. Grimes break; 99526f9a767SRodney W. Grimes toffset -= NBPG; 99626f9a767SRodney W. Grimes } 997317205caSDavid Greenman } else { 998317205caSDavid Greenman startoffset = offset; 999317205caSDavid Greenman } 100026f9a767SRodney W. Grimes 100126f9a767SRodney W. Grimes /* 10020d94caffSDavid Greenman * scan forward for the read ahead pages -- in memory or on disk not 10030d94caffSDavid Greenman * in same object 100426f9a767SRodney W. Grimes */ 100526f9a767SRodney W. Grimes toffset = offset + NBPG; 100626f9a767SRodney W. Grimes endoffset = offset + (rahead + 1) * NBPG; 100726f9a767SRodney W. Grimes while (toffset < object->size && toffset < endoffset) { 1008170db9c6SJohn Dyson rtobject = object; 1009ccbb2f72SJohn Dyson if (!vm_fault_page_lookup(first_object, 1010ccbb2f72SJohn Dyson toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 101126f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 101226f9a767SRodney W. Grimes break; 101326f9a767SRodney W. Grimes } 101426f9a767SRodney W. Grimes toffset += NBPG; 101526f9a767SRodney W. Grimes } 101626f9a767SRodney W. Grimes endoffset = toffset; 101726f9a767SRodney W. Grimes 101826f9a767SRodney W. Grimes /* calculate number of bytes of pages */ 101926f9a767SRodney W. Grimes size = (endoffset - startoffset) / NBPG; 102026f9a767SRodney W. Grimes 102126f9a767SRodney W. Grimes /* calculate the page offset of the required page */ 102226f9a767SRodney W. Grimes treqpage = (offset - startoffset) / NBPG; 102326f9a767SRodney W. Grimes 102426f9a767SRodney W. Grimes /* see if we have space (again) */ 10250d94caffSDavid Greenman if ((cnt.v_free_count + cnt.v_cache_count) > (cnt.v_free_reserved + size)) { 102626f9a767SRodney W. Grimes /* 102726f9a767SRodney W. Grimes * get our pages and don't block for them 102826f9a767SRodney W. Grimes */ 102926f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 1030170db9c6SJohn Dyson if (i != treqpage) { 1031ccbb2f72SJohn Dyson rtm = vm_page_alloc(object, 1032ccbb2f72SJohn Dyson startoffset + i * NBPG, VM_ALLOC_NORMAL); 1033ccbb2f72SJohn Dyson if (rtm == NULL) { 1034170db9c6SJohn Dyson if (i < treqpage) { 1035ccbb2f72SJohn Dyson int j; 1036ccbb2f72SJohn Dyson for (j = 0; j < i; j++) { 103721bf3904SJohn Dyson FREE_PAGE(marray[j]); 103826f9a767SRodney W. Grimes } 103926f9a767SRodney W. Grimes *reqpage = 0; 104026f9a767SRodney W. Grimes marray[0] = m; 104126f9a767SRodney W. Grimes return 1; 1042ccbb2f72SJohn Dyson } else { 1043ccbb2f72SJohn Dyson size = i; 1044ccbb2f72SJohn Dyson *reqpage = treqpage; 1045ccbb2f72SJohn Dyson return size; 1046ccbb2f72SJohn Dyson } 1047ccbb2f72SJohn Dyson } 1048ccbb2f72SJohn Dyson marray[i] = rtm; 1049ccbb2f72SJohn Dyson } else { 1050ccbb2f72SJohn Dyson marray[i] = m; 1051ccbb2f72SJohn Dyson } 105226f9a767SRodney W. Grimes } 1053170db9c6SJohn Dyson 105426f9a767SRodney W. Grimes *reqpage = treqpage; 105526f9a767SRodney W. Grimes return size; 105626f9a767SRodney W. Grimes } 105726f9a767SRodney W. Grimes *reqpage = 0; 105826f9a767SRodney W. Grimes marray[0] = m; 105926f9a767SRodney W. Grimes return 1; 106026f9a767SRodney W. Grimes } 1061