1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 426f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 526f9a767SRodney W. Grimes * All rights reserved. 626f9a767SRodney W. Grimes * Copyright (c) 1994 David Greenman 726f9a767SRodney W. Grimes * All rights reserved. 826f9a767SRodney W. Grimes * 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 11df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 12df8bae1dSRodney W. Grimes * 13df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 14df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 15df8bae1dSRodney W. Grimes * are met: 16df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 18df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 19df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 20df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 21df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 22df8bae1dSRodney W. Grimes * must display the following acknowledgement: 23df8bae1dSRodney W. Grimes * This product includes software developed by the University of 24df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 25df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 26df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 27df8bae1dSRodney W. Grimes * without specific prior written permission. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39df8bae1dSRodney W. Grimes * SUCH DAMAGE. 40df8bae1dSRodney W. Grimes * 413c4dd356SDavid Greenman * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42df8bae1dSRodney W. Grimes * 43df8bae1dSRodney W. Grimes * 44df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45df8bae1dSRodney W. Grimes * All rights reserved. 46df8bae1dSRodney W. Grimes * 47df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48df8bae1dSRodney W. Grimes * 49df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 50df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 51df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 52df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 53df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 54df8bae1dSRodney W. Grimes * 55df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 60df8bae1dSRodney W. Grimes * 61df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62df8bae1dSRodney W. Grimes * School of Computer Science 63df8bae1dSRodney W. Grimes * Carnegie Mellon University 64df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 65df8bae1dSRodney W. Grimes * 66df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 67df8bae1dSRodney W. Grimes * rights to redistribute these changes. 683c4dd356SDavid Greenman * 698e58bf68SDavid Greenman * $Id: vm_fault.c,v 1.4 1994/08/06 09:15:37 davidg Exp $ 70df8bae1dSRodney W. Grimes */ 71df8bae1dSRodney W. Grimes 72df8bae1dSRodney W. Grimes /* 73df8bae1dSRodney W. Grimes * Page fault handling module. 74df8bae1dSRodney W. Grimes */ 75df8bae1dSRodney W. Grimes 76df8bae1dSRodney W. Grimes #include <sys/param.h> 77df8bae1dSRodney W. Grimes #include <sys/systm.h> 7826f9a767SRodney W. Grimes #include <sys/proc.h> 7926f9a767SRodney W. Grimes #include <sys/resourcevar.h> 80df8bae1dSRodney W. Grimes 81df8bae1dSRodney W. Grimes #include <vm/vm.h> 82df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 83df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 84df8bae1dSRodney W. Grimes 8526f9a767SRodney W. Grimes 8626f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD 4 8726f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD_MIN 1 8826f9a767SRodney W. Grimes #define VM_FAULT_READ_BEHIND 3 8926f9a767SRodney W. Grimes #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 9026f9a767SRodney W. Grimes extern int swap_pager_full; 9126f9a767SRodney W. Grimes extern int vm_pageout_proc_limit; 9226f9a767SRodney W. Grimes 93df8bae1dSRodney W. Grimes /* 94df8bae1dSRodney W. Grimes * vm_fault: 95df8bae1dSRodney W. Grimes * 96df8bae1dSRodney W. Grimes * Handle a page fault occuring at the given address, 97df8bae1dSRodney W. Grimes * requiring the given permissions, in the map specified. 98df8bae1dSRodney W. Grimes * If successful, the page is inserted into the 99df8bae1dSRodney W. Grimes * associated physical map. 100df8bae1dSRodney W. Grimes * 101df8bae1dSRodney W. Grimes * NOTE: the given address should be truncated to the 102df8bae1dSRodney W. Grimes * proper page address. 103df8bae1dSRodney W. Grimes * 104df8bae1dSRodney W. Grimes * KERN_SUCCESS is returned if the page fault is handled; otherwise, 105df8bae1dSRodney W. Grimes * a standard error specifying why the fault is fatal is returned. 106df8bae1dSRodney W. Grimes * 107df8bae1dSRodney W. Grimes * 108df8bae1dSRodney W. Grimes * The map in question must be referenced, and remains so. 109df8bae1dSRodney W. Grimes * Caller may hold no locks. 110df8bae1dSRodney W. Grimes */ 111df8bae1dSRodney W. Grimes int 112df8bae1dSRodney W. Grimes vm_fault(map, vaddr, fault_type, change_wiring) 113df8bae1dSRodney W. Grimes vm_map_t map; 114df8bae1dSRodney W. Grimes vm_offset_t vaddr; 115df8bae1dSRodney W. Grimes vm_prot_t fault_type; 116df8bae1dSRodney W. Grimes boolean_t change_wiring; 117df8bae1dSRodney W. Grimes { 118df8bae1dSRodney W. Grimes vm_object_t first_object; 119df8bae1dSRodney W. Grimes vm_offset_t first_offset; 120df8bae1dSRodney W. Grimes vm_map_entry_t entry; 121df8bae1dSRodney W. Grimes register vm_object_t object; 122df8bae1dSRodney W. Grimes register vm_offset_t offset; 12326f9a767SRodney W. Grimes vm_page_t m; 124df8bae1dSRodney W. Grimes vm_page_t first_m; 125df8bae1dSRodney W. Grimes vm_prot_t prot; 126df8bae1dSRodney W. Grimes int result; 127df8bae1dSRodney W. Grimes boolean_t wired; 128df8bae1dSRodney W. Grimes boolean_t su; 129df8bae1dSRodney W. Grimes boolean_t lookup_still_valid; 130df8bae1dSRodney W. Grimes boolean_t page_exists; 131df8bae1dSRodney W. Grimes vm_page_t old_m; 132df8bae1dSRodney W. Grimes vm_object_t next_object; 13326f9a767SRodney W. Grimes vm_page_t marray[VM_FAULT_READ]; 13426f9a767SRodney W. Grimes int reqpage; 13526f9a767SRodney W. Grimes int spl; 13626f9a767SRodney W. Grimes int hardfault=0; 137df8bae1dSRodney W. Grimes 138df8bae1dSRodney W. Grimes cnt.v_faults++; /* needs lock XXX */ 139df8bae1dSRodney W. Grimes /* 140df8bae1dSRodney W. Grimes * Recovery actions 141df8bae1dSRodney W. Grimes */ 142df8bae1dSRodney W. Grimes #define FREE_PAGE(m) { \ 143df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 144df8bae1dSRodney W. Grimes vm_page_lock_queues(); \ 145df8bae1dSRodney W. Grimes vm_page_free(m); \ 146df8bae1dSRodney W. Grimes vm_page_unlock_queues(); \ 147df8bae1dSRodney W. Grimes } 148df8bae1dSRodney W. Grimes 149df8bae1dSRodney W. Grimes #define RELEASE_PAGE(m) { \ 150df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 151df8bae1dSRodney W. Grimes vm_page_lock_queues(); \ 152df8bae1dSRodney W. Grimes vm_page_activate(m); \ 153df8bae1dSRodney W. Grimes vm_page_unlock_queues(); \ 154df8bae1dSRodney W. Grimes } 155df8bae1dSRodney W. Grimes 156df8bae1dSRodney W. Grimes #define UNLOCK_MAP { \ 157df8bae1dSRodney W. Grimes if (lookup_still_valid) { \ 158df8bae1dSRodney W. Grimes vm_map_lookup_done(map, entry); \ 159df8bae1dSRodney W. Grimes lookup_still_valid = FALSE; \ 160df8bae1dSRodney W. Grimes } \ 161df8bae1dSRodney W. Grimes } 162df8bae1dSRodney W. Grimes 163df8bae1dSRodney W. Grimes #define UNLOCK_THINGS { \ 164df8bae1dSRodney W. Grimes object->paging_in_progress--; \ 16526f9a767SRodney W. Grimes if (object->paging_in_progress == 0) \ 16626f9a767SRodney W. Grimes wakeup((caddr_t)object); \ 167df8bae1dSRodney W. Grimes vm_object_unlock(object); \ 168df8bae1dSRodney W. Grimes if (object != first_object) { \ 169df8bae1dSRodney W. Grimes vm_object_lock(first_object); \ 170df8bae1dSRodney W. Grimes FREE_PAGE(first_m); \ 171df8bae1dSRodney W. Grimes first_object->paging_in_progress--; \ 17226f9a767SRodney W. Grimes if (first_object->paging_in_progress == 0) \ 17326f9a767SRodney W. Grimes wakeup((caddr_t)first_object); \ 174df8bae1dSRodney W. Grimes vm_object_unlock(first_object); \ 175df8bae1dSRodney W. Grimes } \ 176df8bae1dSRodney W. Grimes UNLOCK_MAP; \ 177df8bae1dSRodney W. Grimes } 178df8bae1dSRodney W. Grimes 179df8bae1dSRodney W. Grimes #define UNLOCK_AND_DEALLOCATE { \ 180df8bae1dSRodney W. Grimes UNLOCK_THINGS; \ 181df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); \ 182df8bae1dSRodney W. Grimes } 183df8bae1dSRodney W. Grimes 18426f9a767SRodney W. Grimes 185df8bae1dSRodney W. Grimes RetryFault: ; 186df8bae1dSRodney W. Grimes 187df8bae1dSRodney W. Grimes /* 188df8bae1dSRodney W. Grimes * Find the backing store object and offset into 189df8bae1dSRodney W. Grimes * it to begin the search. 190df8bae1dSRodney W. Grimes */ 191df8bae1dSRodney W. Grimes 192df8bae1dSRodney W. Grimes if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, 193df8bae1dSRodney W. Grimes &first_object, &first_offset, 194df8bae1dSRodney W. Grimes &prot, &wired, &su)) != KERN_SUCCESS) { 195df8bae1dSRodney W. Grimes return(result); 196df8bae1dSRodney W. Grimes } 197df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 198df8bae1dSRodney W. Grimes 199df8bae1dSRodney W. Grimes if (wired) 200df8bae1dSRodney W. Grimes fault_type = prot; 201df8bae1dSRodney W. Grimes 202df8bae1dSRodney W. Grimes first_m = NULL; 203df8bae1dSRodney W. Grimes 204df8bae1dSRodney W. Grimes /* 205df8bae1dSRodney W. Grimes * Make a reference to this object to 206df8bae1dSRodney W. Grimes * prevent its disposal while we are messing with 207df8bae1dSRodney W. Grimes * it. Once we have the reference, the map is free 208df8bae1dSRodney W. Grimes * to be diddled. Since objects reference their 209df8bae1dSRodney W. Grimes * shadows (and copies), they will stay around as well. 210df8bae1dSRodney W. Grimes */ 211df8bae1dSRodney W. Grimes 212df8bae1dSRodney W. Grimes vm_object_lock(first_object); 213df8bae1dSRodney W. Grimes 214df8bae1dSRodney W. Grimes first_object->ref_count++; 215df8bae1dSRodney W. Grimes first_object->paging_in_progress++; 216df8bae1dSRodney W. Grimes 217df8bae1dSRodney W. Grimes /* 218df8bae1dSRodney W. Grimes * INVARIANTS (through entire routine): 219df8bae1dSRodney W. Grimes * 220df8bae1dSRodney W. Grimes * 1) At all times, we must either have the object 221df8bae1dSRodney W. Grimes * lock or a busy page in some object to prevent 222df8bae1dSRodney W. Grimes * some other thread from trying to bring in 223df8bae1dSRodney W. Grimes * the same page. 224df8bae1dSRodney W. Grimes * 225df8bae1dSRodney W. Grimes * Note that we cannot hold any locks during the 226df8bae1dSRodney W. Grimes * pager access or when waiting for memory, so 227df8bae1dSRodney W. Grimes * we use a busy page then. 228df8bae1dSRodney W. Grimes * 229df8bae1dSRodney W. Grimes * Note also that we aren't as concerned about 230df8bae1dSRodney W. Grimes * more than one thead attempting to pager_data_unlock 231df8bae1dSRodney W. Grimes * the same page at once, so we don't hold the page 232df8bae1dSRodney W. Grimes * as busy then, but do record the highest unlock 233df8bae1dSRodney W. Grimes * value so far. [Unlock requests may also be delivered 234df8bae1dSRodney W. Grimes * out of order.] 235df8bae1dSRodney W. Grimes * 236df8bae1dSRodney W. Grimes * 2) Once we have a busy page, we must remove it from 237df8bae1dSRodney W. Grimes * the pageout queues, so that the pageout daemon 238df8bae1dSRodney W. Grimes * will not grab it away. 239df8bae1dSRodney W. Grimes * 240df8bae1dSRodney W. Grimes * 3) To prevent another thread from racing us down the 241df8bae1dSRodney W. Grimes * shadow chain and entering a new page in the top 242df8bae1dSRodney W. Grimes * object before we do, we must keep a busy page in 243df8bae1dSRodney W. Grimes * the top object while following the shadow chain. 244df8bae1dSRodney W. Grimes * 245df8bae1dSRodney W. Grimes * 4) We must increment paging_in_progress on any object 246df8bae1dSRodney W. Grimes * for which we have a busy page, to prevent 247df8bae1dSRodney W. Grimes * vm_object_collapse from removing the busy page 248df8bae1dSRodney W. Grimes * without our noticing. 249df8bae1dSRodney W. Grimes */ 250df8bae1dSRodney W. Grimes 251df8bae1dSRodney W. Grimes /* 252df8bae1dSRodney W. Grimes * Search for the page at object/offset. 253df8bae1dSRodney W. Grimes */ 254df8bae1dSRodney W. Grimes 255df8bae1dSRodney W. Grimes object = first_object; 256df8bae1dSRodney W. Grimes offset = first_offset; 257df8bae1dSRodney W. Grimes 258df8bae1dSRodney W. Grimes /* 259df8bae1dSRodney W. Grimes * See whether this page is resident 260df8bae1dSRodney W. Grimes */ 261df8bae1dSRodney W. Grimes 262df8bae1dSRodney W. Grimes while (TRUE) { 263df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 264df8bae1dSRodney W. Grimes if (m != NULL) { 265df8bae1dSRodney W. Grimes /* 266df8bae1dSRodney W. Grimes * If the page is being brought in, 267df8bae1dSRodney W. Grimes * wait for it and then retry. 268df8bae1dSRodney W. Grimes */ 2698e58bf68SDavid Greenman if (m->flags & (PG_BUSY|PG_VMIO)) { 27016f62314SDavid Greenman int s; 271df8bae1dSRodney W. Grimes UNLOCK_THINGS; 27216f62314SDavid Greenman s = splhigh(); 2738e58bf68SDavid Greenman if (m->flags & (PG_BUSY|PG_VMIO)) { 27426f9a767SRodney W. Grimes m->flags |= PG_WANTED; 27526f9a767SRodney W. Grimes tsleep((caddr_t)m,PSWP,"vmpfw",0); 27626f9a767SRodney W. Grimes } 27716f62314SDavid Greenman splx(s); 278df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 279df8bae1dSRodney W. Grimes goto RetryFault; 280df8bae1dSRodney W. Grimes } 281df8bae1dSRodney W. Grimes 282df8bae1dSRodney W. Grimes /* 283df8bae1dSRodney W. Grimes * Remove the page from the pageout daemon's 284df8bae1dSRodney W. Grimes * reach while we play with it. 285df8bae1dSRodney W. Grimes */ 286df8bae1dSRodney W. Grimes 287df8bae1dSRodney W. Grimes vm_page_lock_queues(); 28816f62314SDavid Greenman spl = splhigh(); 289df8bae1dSRodney W. Grimes if (m->flags & PG_INACTIVE) { 290df8bae1dSRodney W. Grimes TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); 291df8bae1dSRodney W. Grimes m->flags &= ~PG_INACTIVE; 292df8bae1dSRodney W. Grimes cnt.v_inactive_count--; 293df8bae1dSRodney W. Grimes cnt.v_reactivated++; 294df8bae1dSRodney W. Grimes } 295df8bae1dSRodney W. Grimes 296df8bae1dSRodney W. Grimes if (m->flags & PG_ACTIVE) { 297df8bae1dSRodney W. Grimes TAILQ_REMOVE(&vm_page_queue_active, m, pageq); 298df8bae1dSRodney W. Grimes m->flags &= ~PG_ACTIVE; 299df8bae1dSRodney W. Grimes cnt.v_active_count--; 300df8bae1dSRodney W. Grimes } 30126f9a767SRodney W. Grimes splx(spl); 302df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 303df8bae1dSRodney W. Grimes 304df8bae1dSRodney W. Grimes /* 305df8bae1dSRodney W. Grimes * Mark page busy for other threads. 306df8bae1dSRodney W. Grimes */ 307df8bae1dSRodney W. Grimes m->flags |= PG_BUSY; 308df8bae1dSRodney W. Grimes break; 309df8bae1dSRodney W. Grimes } 310df8bae1dSRodney W. Grimes 311df8bae1dSRodney W. Grimes if (((object->pager != NULL) && 312df8bae1dSRodney W. Grimes (!change_wiring || wired)) 313df8bae1dSRodney W. Grimes || (object == first_object)) { 314df8bae1dSRodney W. Grimes 31526f9a767SRodney W. Grimes #if 0 31626f9a767SRodney W. Grimes if (curproc && (vaddr < VM_MAXUSER_ADDRESS) && 31726f9a767SRodney W. Grimes (curproc->p_rlimit[RLIMIT_RSS].rlim_max < 31826f9a767SRodney W. Grimes curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) { 31926f9a767SRodney W. Grimes UNLOCK_AND_DEALLOCATE; 32026f9a767SRodney W. Grimes vm_fault_free_pages(curproc); 32126f9a767SRodney W. Grimes goto RetryFault; 32226f9a767SRodney W. Grimes } 32326f9a767SRodney W. Grimes #endif 32426f9a767SRodney W. Grimes 32526f9a767SRodney W. Grimes if (swap_pager_full && !object->shadow && (!object->pager || 32626f9a767SRodney W. Grimes (object->pager && object->pager->pg_type == PG_SWAP && 32726f9a767SRodney W. Grimes !vm_pager_has_page(object->pager, offset+object->paging_offset)))) { 32826f9a767SRodney W. Grimes if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ { 32926f9a767SRodney W. Grimes printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid); 33026f9a767SRodney W. Grimes psignal(curproc, SIGKILL); 33126f9a767SRodney W. Grimes curproc->p_estcpu = 0; 33226f9a767SRodney W. Grimes curproc->p_nice = PRIO_MIN; 33326f9a767SRodney W. Grimes setpriority(curproc); 33426f9a767SRodney W. Grimes } 33526f9a767SRodney W. Grimes } 33626f9a767SRodney W. Grimes 337df8bae1dSRodney W. Grimes /* 338df8bae1dSRodney W. Grimes * Allocate a new page for this object/offset 339df8bae1dSRodney W. Grimes * pair. 340df8bae1dSRodney W. Grimes */ 341df8bae1dSRodney W. Grimes 342df8bae1dSRodney W. Grimes m = vm_page_alloc(object, offset); 343df8bae1dSRodney W. Grimes 344df8bae1dSRodney W. Grimes if (m == NULL) { 345df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 346df8bae1dSRodney W. Grimes VM_WAIT; 347df8bae1dSRodney W. Grimes goto RetryFault; 348df8bae1dSRodney W. Grimes } 349df8bae1dSRodney W. Grimes } 350df8bae1dSRodney W. Grimes 351df8bae1dSRodney W. Grimes if (object->pager != NULL && (!change_wiring || wired)) { 352df8bae1dSRodney W. Grimes int rv; 35326f9a767SRodney W. Grimes int faultcount; 35426f9a767SRodney W. Grimes int reqpage; 355df8bae1dSRodney W. Grimes 356df8bae1dSRodney W. Grimes /* 357df8bae1dSRodney W. Grimes * Now that we have a busy page, we can 358df8bae1dSRodney W. Grimes * release the object lock. 359df8bae1dSRodney W. Grimes */ 360df8bae1dSRodney W. Grimes vm_object_unlock(object); 36126f9a767SRodney W. Grimes /* 36226f9a767SRodney W. Grimes * now we find out if any other pages should 36326f9a767SRodney W. Grimes * be paged in at this time 36426f9a767SRodney W. Grimes * this routine checks to see if the pages surrounding this fault 36526f9a767SRodney W. Grimes * reside in the same object as the page for this fault. If 36626f9a767SRodney W. Grimes * they do, then they are faulted in also into the 36726f9a767SRodney W. Grimes * object. The array "marray" returned contains an array of 36826f9a767SRodney W. Grimes * vm_page_t structs where one of them is the vm_page_t passed to 36926f9a767SRodney W. Grimes * the routine. The reqpage return value is the index into the 37026f9a767SRodney W. Grimes * marray for the vm_page_t passed to the routine. 37126f9a767SRodney W. Grimes */ 37226f9a767SRodney W. Grimes cnt.v_pageins++; 37326f9a767SRodney W. Grimes faultcount = vm_fault_additional_pages(first_object, first_offset, 37426f9a767SRodney W. Grimes m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, marray, &reqpage); 375df8bae1dSRodney W. Grimes 376df8bae1dSRodney W. Grimes /* 377df8bae1dSRodney W. Grimes * Call the pager to retrieve the data, if any, 378df8bae1dSRodney W. Grimes * after releasing the lock on the map. 379df8bae1dSRodney W. Grimes */ 380df8bae1dSRodney W. Grimes UNLOCK_MAP; 381df8bae1dSRodney W. Grimes 38226f9a767SRodney W. Grimes rv = faultcount ? 38326f9a767SRodney W. Grimes vm_pager_get_pages(object->pager, 38426f9a767SRodney W. Grimes marray, faultcount, reqpage, TRUE): VM_PAGER_FAIL; 38526f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 386df8bae1dSRodney W. Grimes /* 387df8bae1dSRodney W. Grimes * Found the page. 388df8bae1dSRodney W. Grimes * Leave it busy while we play with it. 389df8bae1dSRodney W. Grimes */ 39026f9a767SRodney W. Grimes vm_object_lock(object); 39126f9a767SRodney W. Grimes 392df8bae1dSRodney W. Grimes /* 393df8bae1dSRodney W. Grimes * Relookup in case pager changed page. 394df8bae1dSRodney W. Grimes * Pager is responsible for disposition 395df8bae1dSRodney W. Grimes * of old page if moved. 396df8bae1dSRodney W. Grimes */ 397df8bae1dSRodney W. Grimes m = vm_page_lookup(object, offset); 398df8bae1dSRodney W. Grimes 399df8bae1dSRodney W. Grimes cnt.v_pgpgin++; 400df8bae1dSRodney W. Grimes m->flags &= ~PG_FAKE; 401df8bae1dSRodney W. Grimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 40226f9a767SRodney W. Grimes hardfault++; 403df8bae1dSRodney W. Grimes break; 404df8bae1dSRodney W. Grimes } 405df8bae1dSRodney W. Grimes 406df8bae1dSRodney W. Grimes /* 40726f9a767SRodney W. Grimes * Remove the bogus page (which does not 40826f9a767SRodney W. Grimes * exist at this object/offset); before 40926f9a767SRodney W. Grimes * doing so, we must get back our object 41026f9a767SRodney W. Grimes * lock to preserve our invariant. 411df8bae1dSRodney W. Grimes * 41226f9a767SRodney W. Grimes * Also wake up any other thread that may want 41326f9a767SRodney W. Grimes * to bring in this page. 414df8bae1dSRodney W. Grimes * 415df8bae1dSRodney W. Grimes * If this is the top-level object, we must 416df8bae1dSRodney W. Grimes * leave the busy page to prevent another 417df8bae1dSRodney W. Grimes * thread from rushing past us, and inserting 418df8bae1dSRodney W. Grimes * the page in that object at the same time 419df8bae1dSRodney W. Grimes * that we are. 420df8bae1dSRodney W. Grimes */ 42126f9a767SRodney W. Grimes 42226f9a767SRodney W. Grimes vm_object_lock(object); 42326f9a767SRodney W. Grimes /* 42426f9a767SRodney W. Grimes * Data outside the range of the pager; an error 42526f9a767SRodney W. Grimes */ 42626f9a767SRodney W. Grimes if ((rv == VM_PAGER_ERROR) || (rv == VM_PAGER_BAD)) { 42726f9a767SRodney W. Grimes FREE_PAGE(m); 42826f9a767SRodney W. Grimes UNLOCK_AND_DEALLOCATE; 42926f9a767SRodney W. Grimes return(KERN_PROTECTION_FAILURE); /* XXX */ 43026f9a767SRodney W. Grimes } 431df8bae1dSRodney W. Grimes if (object != first_object) { 432df8bae1dSRodney W. Grimes FREE_PAGE(m); 43326f9a767SRodney W. Grimes /* 43426f9a767SRodney W. Grimes * XXX - we cannot just fall out at this 43526f9a767SRodney W. Grimes * point, m has been freed and is invalid! 43626f9a767SRodney W. Grimes */ 437df8bae1dSRodney W. Grimes } 438df8bae1dSRodney W. Grimes } 439df8bae1dSRodney W. Grimes 440df8bae1dSRodney W. Grimes /* 441df8bae1dSRodney W. Grimes * We get here if the object has no pager (or unwiring) 442df8bae1dSRodney W. Grimes * or the pager doesn't have the page. 443df8bae1dSRodney W. Grimes */ 444df8bae1dSRodney W. Grimes if (object == first_object) 445df8bae1dSRodney W. Grimes first_m = m; 446df8bae1dSRodney W. Grimes 447df8bae1dSRodney W. Grimes /* 448df8bae1dSRodney W. Grimes * Move on to the next object. Lock the next 449df8bae1dSRodney W. Grimes * object before unlocking the current one. 450df8bae1dSRodney W. Grimes */ 451df8bae1dSRodney W. Grimes 452df8bae1dSRodney W. Grimes offset += object->shadow_offset; 453df8bae1dSRodney W. Grimes next_object = object->shadow; 454df8bae1dSRodney W. Grimes if (next_object == NULL) { 455df8bae1dSRodney W. Grimes /* 456df8bae1dSRodney W. Grimes * If there's no object left, fill the page 457df8bae1dSRodney W. Grimes * in the top object with zeros. 458df8bae1dSRodney W. Grimes */ 459df8bae1dSRodney W. Grimes if (object != first_object) { 460df8bae1dSRodney W. Grimes object->paging_in_progress--; 46126f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 46226f9a767SRodney W. Grimes wakeup((caddr_t) object); 463df8bae1dSRodney W. Grimes vm_object_unlock(object); 464df8bae1dSRodney W. Grimes 465df8bae1dSRodney W. Grimes object = first_object; 466df8bae1dSRodney W. Grimes offset = first_offset; 467df8bae1dSRodney W. Grimes m = first_m; 468df8bae1dSRodney W. Grimes vm_object_lock(object); 469df8bae1dSRodney W. Grimes } 470df8bae1dSRodney W. Grimes first_m = NULL; 471df8bae1dSRodney W. Grimes 472df8bae1dSRodney W. Grimes vm_page_zero_fill(m); 473df8bae1dSRodney W. Grimes cnt.v_zfod++; 474df8bae1dSRodney W. Grimes m->flags &= ~PG_FAKE; 475df8bae1dSRodney W. Grimes break; 476df8bae1dSRodney W. Grimes } 477df8bae1dSRodney W. Grimes else { 478df8bae1dSRodney W. Grimes vm_object_lock(next_object); 47926f9a767SRodney W. Grimes if (object != first_object) { 480df8bae1dSRodney W. Grimes object->paging_in_progress--; 48126f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 48226f9a767SRodney W. Grimes wakeup((caddr_t) object); 48326f9a767SRodney W. Grimes } 484df8bae1dSRodney W. Grimes vm_object_unlock(object); 485df8bae1dSRodney W. Grimes object = next_object; 486df8bae1dSRodney W. Grimes object->paging_in_progress++; 487df8bae1dSRodney W. Grimes } 488df8bae1dSRodney W. Grimes } 489df8bae1dSRodney W. Grimes 49026f9a767SRodney W. Grimes if ((m->flags & (PG_ACTIVE|PG_INACTIVE) != 0) || 49126f9a767SRodney W. Grimes (m->flags & PG_BUSY) == 0) 49226f9a767SRodney W. Grimes panic("vm_fault: absent or active or inactive or not busy after main loop"); 493df8bae1dSRodney W. Grimes 494df8bae1dSRodney W. Grimes /* 495df8bae1dSRodney W. Grimes * PAGE HAS BEEN FOUND. 496df8bae1dSRodney W. Grimes * [Loop invariant still holds -- the object lock 497df8bae1dSRodney W. Grimes * is held.] 498df8bae1dSRodney W. Grimes */ 499df8bae1dSRodney W. Grimes 500df8bae1dSRodney W. Grimes old_m = m; /* save page that would be copied */ 501df8bae1dSRodney W. Grimes 502df8bae1dSRodney W. Grimes /* 503df8bae1dSRodney W. Grimes * If the page is being written, but isn't 504df8bae1dSRodney W. Grimes * already owned by the top-level object, 505df8bae1dSRodney W. Grimes * we have to copy it into a new page owned 506df8bae1dSRodney W. Grimes * by the top-level object. 507df8bae1dSRodney W. Grimes */ 508df8bae1dSRodney W. Grimes 509df8bae1dSRodney W. Grimes if (object != first_object) { 510df8bae1dSRodney W. Grimes /* 511df8bae1dSRodney W. Grimes * We only really need to copy if we 512df8bae1dSRodney W. Grimes * want to write it. 513df8bae1dSRodney W. Grimes */ 514df8bae1dSRodney W. Grimes 515df8bae1dSRodney W. Grimes if (fault_type & VM_PROT_WRITE) { 516df8bae1dSRodney W. Grimes 517df8bae1dSRodney W. Grimes /* 518df8bae1dSRodney W. Grimes * If we try to collapse first_object at this 519df8bae1dSRodney W. Grimes * point, we may deadlock when we try to get 520df8bae1dSRodney W. Grimes * the lock on an intermediate object (since we 521df8bae1dSRodney W. Grimes * have the bottom object locked). We can't 522df8bae1dSRodney W. Grimes * unlock the bottom object, because the page 523df8bae1dSRodney W. Grimes * we found may move (by collapse) if we do. 524df8bae1dSRodney W. Grimes * 525df8bae1dSRodney W. Grimes * Instead, we first copy the page. Then, when 526df8bae1dSRodney W. Grimes * we have no more use for the bottom object, 527df8bae1dSRodney W. Grimes * we unlock it and try to collapse. 528df8bae1dSRodney W. Grimes * 529df8bae1dSRodney W. Grimes * Note that we copy the page even if we didn't 530df8bae1dSRodney W. Grimes * need to... that's the breaks. 531df8bae1dSRodney W. Grimes */ 532df8bae1dSRodney W. Grimes 533df8bae1dSRodney W. Grimes /* 534df8bae1dSRodney W. Grimes * We already have an empty page in 535df8bae1dSRodney W. Grimes * first_object - use it. 536df8bae1dSRodney W. Grimes */ 537df8bae1dSRodney W. Grimes 538df8bae1dSRodney W. Grimes vm_page_copy(m, first_m); 539df8bae1dSRodney W. Grimes first_m->flags &= ~PG_FAKE; 540df8bae1dSRodney W. Grimes 541df8bae1dSRodney W. Grimes /* 542df8bae1dSRodney W. Grimes * If another map is truly sharing this 543df8bae1dSRodney W. Grimes * page with us, we have to flush all 544df8bae1dSRodney W. Grimes * uses of the original page, since we 545df8bae1dSRodney W. Grimes * can't distinguish those which want the 546df8bae1dSRodney W. Grimes * original from those which need the 547df8bae1dSRodney W. Grimes * new copy. 548df8bae1dSRodney W. Grimes * 549df8bae1dSRodney W. Grimes * XXX If we know that only one map has 550df8bae1dSRodney W. Grimes * access to this page, then we could 551df8bae1dSRodney W. Grimes * avoid the pmap_page_protect() call. 552df8bae1dSRodney W. Grimes */ 553df8bae1dSRodney W. Grimes 554df8bae1dSRodney W. Grimes vm_page_lock_queues(); 55526f9a767SRodney W. Grimes 556df8bae1dSRodney W. Grimes vm_page_activate(m); 557df8bae1dSRodney W. Grimes pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 55826f9a767SRodney W. Grimes if ((m->flags & PG_CLEAN) == 0) 55926f9a767SRodney W. Grimes m->flags |= PG_LAUNDRY; 560df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 561df8bae1dSRodney W. Grimes 562df8bae1dSRodney W. Grimes /* 563df8bae1dSRodney W. Grimes * We no longer need the old page or object. 564df8bae1dSRodney W. Grimes */ 565df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 566df8bae1dSRodney W. Grimes object->paging_in_progress--; 56726f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 56826f9a767SRodney W. Grimes wakeup((caddr_t) object); 569df8bae1dSRodney W. Grimes vm_object_unlock(object); 570df8bae1dSRodney W. Grimes 571df8bae1dSRodney W. Grimes /* 572df8bae1dSRodney W. Grimes * Only use the new page below... 573df8bae1dSRodney W. Grimes */ 574df8bae1dSRodney W. Grimes 575df8bae1dSRodney W. Grimes cnt.v_cow_faults++; 576df8bae1dSRodney W. Grimes m = first_m; 577df8bae1dSRodney W. Grimes object = first_object; 578df8bae1dSRodney W. Grimes offset = first_offset; 579df8bae1dSRodney W. Grimes 580df8bae1dSRodney W. Grimes /* 581df8bae1dSRodney W. Grimes * Now that we've gotten the copy out of the 582df8bae1dSRodney W. Grimes * way, let's try to collapse the top object. 583df8bae1dSRodney W. Grimes */ 584df8bae1dSRodney W. Grimes vm_object_lock(object); 585df8bae1dSRodney W. Grimes /* 586df8bae1dSRodney W. Grimes * But we have to play ugly games with 587df8bae1dSRodney W. Grimes * paging_in_progress to do that... 588df8bae1dSRodney W. Grimes */ 589df8bae1dSRodney W. Grimes object->paging_in_progress--; 59026f9a767SRodney W. Grimes if (object->paging_in_progress == 0) 59126f9a767SRodney W. Grimes wakeup((caddr_t) object); 592df8bae1dSRodney W. Grimes vm_object_collapse(object); 593df8bae1dSRodney W. Grimes object->paging_in_progress++; 594df8bae1dSRodney W. Grimes } 595df8bae1dSRodney W. Grimes else { 596df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 597df8bae1dSRodney W. Grimes m->flags |= PG_COPYONWRITE; 598df8bae1dSRodney W. Grimes } 599df8bae1dSRodney W. Grimes } 600df8bae1dSRodney W. Grimes 601df8bae1dSRodney W. Grimes if (m->flags & (PG_ACTIVE|PG_INACTIVE)) 602df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before copy object handling"); 603df8bae1dSRodney W. Grimes 604df8bae1dSRodney W. Grimes /* 605df8bae1dSRodney W. Grimes * If the page is being written, but hasn't been 606df8bae1dSRodney W. Grimes * copied to the copy-object, we have to copy it there. 607df8bae1dSRodney W. Grimes */ 608df8bae1dSRodney W. Grimes RetryCopy: 609df8bae1dSRodney W. Grimes if (first_object->copy != NULL) { 610df8bae1dSRodney W. Grimes vm_object_t copy_object = first_object->copy; 611df8bae1dSRodney W. Grimes vm_offset_t copy_offset; 612df8bae1dSRodney W. Grimes vm_page_t copy_m; 613df8bae1dSRodney W. Grimes 614df8bae1dSRodney W. Grimes /* 615df8bae1dSRodney W. Grimes * We only need to copy if we want to write it. 616df8bae1dSRodney W. Grimes */ 617df8bae1dSRodney W. Grimes if ((fault_type & VM_PROT_WRITE) == 0) { 618df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 619df8bae1dSRodney W. Grimes m->flags |= PG_COPYONWRITE; 620df8bae1dSRodney W. Grimes } 621df8bae1dSRodney W. Grimes else { 622df8bae1dSRodney W. Grimes /* 623df8bae1dSRodney W. Grimes * Try to get the lock on the copy_object. 624df8bae1dSRodney W. Grimes */ 625df8bae1dSRodney W. Grimes if (!vm_object_lock_try(copy_object)) { 626df8bae1dSRodney W. Grimes vm_object_unlock(object); 627df8bae1dSRodney W. Grimes /* should spin a bit here... */ 628df8bae1dSRodney W. Grimes vm_object_lock(object); 629df8bae1dSRodney W. Grimes goto RetryCopy; 630df8bae1dSRodney W. Grimes } 631df8bae1dSRodney W. Grimes 632df8bae1dSRodney W. Grimes /* 633df8bae1dSRodney W. Grimes * Make another reference to the copy-object, 634df8bae1dSRodney W. Grimes * to keep it from disappearing during the 635df8bae1dSRodney W. Grimes * copy. 636df8bae1dSRodney W. Grimes */ 637df8bae1dSRodney W. Grimes copy_object->ref_count++; 638df8bae1dSRodney W. Grimes 639df8bae1dSRodney W. Grimes /* 640df8bae1dSRodney W. Grimes * Does the page exist in the copy? 641df8bae1dSRodney W. Grimes */ 642df8bae1dSRodney W. Grimes copy_offset = first_offset 643df8bae1dSRodney W. Grimes - copy_object->shadow_offset; 644df8bae1dSRodney W. Grimes copy_m = vm_page_lookup(copy_object, copy_offset); 645df8bae1dSRodney W. Grimes if (page_exists = (copy_m != NULL)) { 6468e58bf68SDavid Greenman if (copy_m->flags & (PG_BUSY|PG_VMIO)) { 647df8bae1dSRodney W. Grimes /* 648df8bae1dSRodney W. Grimes * If the page is being brought 649df8bae1dSRodney W. Grimes * in, wait for it and then retry. 650df8bae1dSRodney W. Grimes */ 651df8bae1dSRodney W. Grimes PAGE_ASSERT_WAIT(copy_m, !change_wiring); 652df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 653df8bae1dSRodney W. Grimes copy_object->ref_count--; 654df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 655df8bae1dSRodney W. Grimes UNLOCK_THINGS; 65626f9a767SRodney W. Grimes thread_block("fltcpy"); 657df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 658df8bae1dSRodney W. Grimes goto RetryFault; 659df8bae1dSRodney W. Grimes } 660df8bae1dSRodney W. Grimes } 661df8bae1dSRodney W. Grimes 662df8bae1dSRodney W. Grimes /* 663df8bae1dSRodney W. Grimes * If the page is not in memory (in the object) 664df8bae1dSRodney W. Grimes * and the object has a pager, we have to check 665df8bae1dSRodney W. Grimes * if the pager has the data in secondary 666df8bae1dSRodney W. Grimes * storage. 667df8bae1dSRodney W. Grimes */ 668df8bae1dSRodney W. Grimes if (!page_exists) { 669df8bae1dSRodney W. Grimes 670df8bae1dSRodney W. Grimes /* 671df8bae1dSRodney W. Grimes * If we don't allocate a (blank) page 672df8bae1dSRodney W. Grimes * here... another thread could try 673df8bae1dSRodney W. Grimes * to page it in, allocate a page, and 674df8bae1dSRodney W. Grimes * then block on the busy page in its 675df8bae1dSRodney W. Grimes * shadow (first_object). Then we'd 676df8bae1dSRodney W. Grimes * trip over the busy page after we 677df8bae1dSRodney W. Grimes * found that the copy_object's pager 678df8bae1dSRodney W. Grimes * doesn't have the page... 679df8bae1dSRodney W. Grimes */ 68026f9a767SRodney W. Grimes copy_m = vm_page_alloc(copy_object, copy_offset); 681df8bae1dSRodney W. Grimes if (copy_m == NULL) { 682df8bae1dSRodney W. Grimes /* 683df8bae1dSRodney W. Grimes * Wait for a page, then retry. 684df8bae1dSRodney W. Grimes */ 685df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 686df8bae1dSRodney W. Grimes copy_object->ref_count--; 687df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 688df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 689df8bae1dSRodney W. Grimes VM_WAIT; 690df8bae1dSRodney W. Grimes goto RetryFault; 691df8bae1dSRodney W. Grimes } 692df8bae1dSRodney W. Grimes 693df8bae1dSRodney W. Grimes if (copy_object->pager != NULL) { 694df8bae1dSRodney W. Grimes vm_object_unlock(object); 695df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 696df8bae1dSRodney W. Grimes UNLOCK_MAP; 697df8bae1dSRodney W. Grimes 698df8bae1dSRodney W. Grimes page_exists = vm_pager_has_page( 699df8bae1dSRodney W. Grimes copy_object->pager, 700df8bae1dSRodney W. Grimes (copy_offset + copy_object->paging_offset)); 701df8bae1dSRodney W. Grimes 702df8bae1dSRodney W. Grimes vm_object_lock(copy_object); 703df8bae1dSRodney W. Grimes 704df8bae1dSRodney W. Grimes /* 705df8bae1dSRodney W. Grimes * Since the map is unlocked, someone 706df8bae1dSRodney W. Grimes * else could have copied this object 707df8bae1dSRodney W. Grimes * and put a different copy_object 708df8bae1dSRodney W. Grimes * between the two. Or, the last 709df8bae1dSRodney W. Grimes * reference to the copy-object (other 710df8bae1dSRodney W. Grimes * than the one we have) may have 711df8bae1dSRodney W. Grimes * disappeared - if that has happened, 712df8bae1dSRodney W. Grimes * we don't need to make the copy. 713df8bae1dSRodney W. Grimes */ 714df8bae1dSRodney W. Grimes if (copy_object->shadow != object || 715df8bae1dSRodney W. Grimes copy_object->ref_count == 1) { 716df8bae1dSRodney W. Grimes /* 717df8bae1dSRodney W. Grimes * Gaah... start over! 718df8bae1dSRodney W. Grimes */ 719df8bae1dSRodney W. Grimes FREE_PAGE(copy_m); 720df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 721df8bae1dSRodney W. Grimes vm_object_deallocate(copy_object); 722df8bae1dSRodney W. Grimes /* may block */ 723df8bae1dSRodney W. Grimes vm_object_lock(object); 724df8bae1dSRodney W. Grimes goto RetryCopy; 725df8bae1dSRodney W. Grimes } 726df8bae1dSRodney W. Grimes vm_object_lock(object); 727df8bae1dSRodney W. Grimes 728df8bae1dSRodney W. Grimes if (page_exists) { 729df8bae1dSRodney W. Grimes /* 730df8bae1dSRodney W. Grimes * We didn't need the page 731df8bae1dSRodney W. Grimes */ 732df8bae1dSRodney W. Grimes FREE_PAGE(copy_m); 733df8bae1dSRodney W. Grimes } 734df8bae1dSRodney W. Grimes } 735df8bae1dSRodney W. Grimes } 736df8bae1dSRodney W. Grimes if (!page_exists) { 737df8bae1dSRodney W. Grimes /* 738df8bae1dSRodney W. Grimes * Must copy page into copy-object. 739df8bae1dSRodney W. Grimes */ 740df8bae1dSRodney W. Grimes vm_page_copy(m, copy_m); 741df8bae1dSRodney W. Grimes copy_m->flags &= ~PG_FAKE; 742df8bae1dSRodney W. Grimes 743df8bae1dSRodney W. Grimes /* 744df8bae1dSRodney W. Grimes * Things to remember: 745df8bae1dSRodney W. Grimes * 1. The copied page must be marked 'dirty' 746df8bae1dSRodney W. Grimes * so it will be paged out to the copy 747df8bae1dSRodney W. Grimes * object. 748df8bae1dSRodney W. Grimes * 2. If the old page was in use by any users 749df8bae1dSRodney W. Grimes * of the copy-object, it must be removed 750df8bae1dSRodney W. Grimes * from all pmaps. (We can't know which 751df8bae1dSRodney W. Grimes * pmaps use it.) 752df8bae1dSRodney W. Grimes */ 753df8bae1dSRodney W. Grimes vm_page_lock_queues(); 75426f9a767SRodney W. Grimes 75526f9a767SRodney W. Grimes vm_page_activate(old_m); 75626f9a767SRodney W. Grimes 75726f9a767SRodney W. Grimes 758df8bae1dSRodney W. Grimes pmap_page_protect(VM_PAGE_TO_PHYS(old_m), 759df8bae1dSRodney W. Grimes VM_PROT_NONE); 76026f9a767SRodney W. Grimes if ((old_m->flags & PG_CLEAN) == 0) 76126f9a767SRodney W. Grimes old_m->flags |= PG_LAUNDRY; 762df8bae1dSRodney W. Grimes copy_m->flags &= ~PG_CLEAN; 76326f9a767SRodney W. Grimes vm_page_activate(copy_m); 764df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 765df8bae1dSRodney W. Grimes 766df8bae1dSRodney W. Grimes PAGE_WAKEUP(copy_m); 767df8bae1dSRodney W. Grimes } 768df8bae1dSRodney W. Grimes /* 769df8bae1dSRodney W. Grimes * The reference count on copy_object must be 770df8bae1dSRodney W. Grimes * at least 2: one for our extra reference, 771df8bae1dSRodney W. Grimes * and at least one from the outside world 772df8bae1dSRodney W. Grimes * (we checked that when we last locked 773df8bae1dSRodney W. Grimes * copy_object). 774df8bae1dSRodney W. Grimes */ 775df8bae1dSRodney W. Grimes copy_object->ref_count--; 776df8bae1dSRodney W. Grimes vm_object_unlock(copy_object); 777df8bae1dSRodney W. Grimes m->flags &= ~PG_COPYONWRITE; 778df8bae1dSRodney W. Grimes } 779df8bae1dSRodney W. Grimes } 780df8bae1dSRodney W. Grimes 781df8bae1dSRodney W. Grimes if (m->flags & (PG_ACTIVE | PG_INACTIVE)) 782df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before retrying lookup"); 783df8bae1dSRodney W. Grimes 784df8bae1dSRodney W. Grimes /* 785df8bae1dSRodney W. Grimes * We must verify that the maps have not changed 786df8bae1dSRodney W. Grimes * since our last lookup. 787df8bae1dSRodney W. Grimes */ 788df8bae1dSRodney W. Grimes 789df8bae1dSRodney W. Grimes if (!lookup_still_valid) { 790df8bae1dSRodney W. Grimes vm_object_t retry_object; 791df8bae1dSRodney W. Grimes vm_offset_t retry_offset; 792df8bae1dSRodney W. Grimes vm_prot_t retry_prot; 793df8bae1dSRodney W. Grimes 794df8bae1dSRodney W. Grimes /* 795df8bae1dSRodney W. Grimes * Since map entries may be pageable, make sure we can 796df8bae1dSRodney W. Grimes * take a page fault on them. 797df8bae1dSRodney W. Grimes */ 798df8bae1dSRodney W. Grimes vm_object_unlock(object); 799df8bae1dSRodney W. Grimes 800df8bae1dSRodney W. Grimes /* 801df8bae1dSRodney W. Grimes * To avoid trying to write_lock the map while another 802df8bae1dSRodney W. Grimes * thread has it read_locked (in vm_map_pageable), we 803df8bae1dSRodney W. Grimes * do not try for write permission. If the page is 804df8bae1dSRodney W. Grimes * still writable, we will get write permission. If it 805df8bae1dSRodney W. Grimes * is not, or has been marked needs_copy, we enter the 806df8bae1dSRodney W. Grimes * mapping without write permission, and will merely 807df8bae1dSRodney W. Grimes * take another fault. 808df8bae1dSRodney W. Grimes */ 809df8bae1dSRodney W. Grimes result = vm_map_lookup(&map, vaddr, 810df8bae1dSRodney W. Grimes fault_type & ~VM_PROT_WRITE, &entry, 811df8bae1dSRodney W. Grimes &retry_object, &retry_offset, &retry_prot, 812df8bae1dSRodney W. Grimes &wired, &su); 813df8bae1dSRodney W. Grimes 814df8bae1dSRodney W. Grimes vm_object_lock(object); 815df8bae1dSRodney W. Grimes 816df8bae1dSRodney W. Grimes /* 817df8bae1dSRodney W. Grimes * If we don't need the page any longer, put it on the 818df8bae1dSRodney W. Grimes * active list (the easiest thing to do here). If no 819df8bae1dSRodney W. Grimes * one needs it, pageout will grab it eventually. 820df8bae1dSRodney W. Grimes */ 821df8bae1dSRodney W. Grimes 822df8bae1dSRodney W. Grimes if (result != KERN_SUCCESS) { 823df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 824df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 825df8bae1dSRodney W. Grimes return(result); 826df8bae1dSRodney W. Grimes } 827df8bae1dSRodney W. Grimes 828df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 829df8bae1dSRodney W. Grimes 830df8bae1dSRodney W. Grimes if ((retry_object != first_object) || 831df8bae1dSRodney W. Grimes (retry_offset != first_offset)) { 832df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 833df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 834df8bae1dSRodney W. Grimes goto RetryFault; 835df8bae1dSRodney W. Grimes } 836df8bae1dSRodney W. Grimes 837df8bae1dSRodney W. Grimes /* 838df8bae1dSRodney W. Grimes * Check whether the protection has changed or the object 839df8bae1dSRodney W. Grimes * has been copied while we left the map unlocked. 840df8bae1dSRodney W. Grimes * Changing from read to write permission is OK - we leave 841df8bae1dSRodney W. Grimes * the page write-protected, and catch the write fault. 842df8bae1dSRodney W. Grimes * Changing from write to read permission means that we 843df8bae1dSRodney W. Grimes * can't mark the page write-enabled after all. 844df8bae1dSRodney W. Grimes */ 845df8bae1dSRodney W. Grimes prot &= retry_prot; 846df8bae1dSRodney W. Grimes if (m->flags & PG_COPYONWRITE) 847df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 848df8bae1dSRodney W. Grimes } 849df8bae1dSRodney W. Grimes 850df8bae1dSRodney W. Grimes /* 851df8bae1dSRodney W. Grimes * (the various bits we're fiddling with here are locked by 852df8bae1dSRodney W. Grimes * the object's lock) 853df8bae1dSRodney W. Grimes */ 854df8bae1dSRodney W. Grimes 855df8bae1dSRodney W. Grimes /* XXX This distorts the meaning of the copy_on_write bit */ 856df8bae1dSRodney W. Grimes 857df8bae1dSRodney W. Grimes if (prot & VM_PROT_WRITE) 858df8bae1dSRodney W. Grimes m->flags &= ~PG_COPYONWRITE; 859df8bae1dSRodney W. Grimes 860df8bae1dSRodney W. Grimes /* 861df8bae1dSRodney W. Grimes * It's critically important that a wired-down page be faulted 862df8bae1dSRodney W. Grimes * only once in each map for which it is wired. 863df8bae1dSRodney W. Grimes */ 864df8bae1dSRodney W. Grimes 865df8bae1dSRodney W. Grimes if (m->flags & (PG_ACTIVE | PG_INACTIVE)) 866df8bae1dSRodney W. Grimes panic("vm_fault: active or inactive before pmap_enter"); 867df8bae1dSRodney W. Grimes 868df8bae1dSRodney W. Grimes vm_object_unlock(object); 869df8bae1dSRodney W. Grimes 870df8bae1dSRodney W. Grimes /* 871df8bae1dSRodney W. Grimes * Put this page into the physical map. 872df8bae1dSRodney W. Grimes * We had to do the unlock above because pmap_enter 873df8bae1dSRodney W. Grimes * may cause other faults. We don't put the 874df8bae1dSRodney W. Grimes * page back on the active queue until later so 875df8bae1dSRodney W. Grimes * that the page-out daemon won't find us (yet). 876df8bae1dSRodney W. Grimes */ 877df8bae1dSRodney W. Grimes 878df8bae1dSRodney W. Grimes pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 879df8bae1dSRodney W. Grimes 880df8bae1dSRodney W. Grimes /* 881df8bae1dSRodney W. Grimes * If the page is not wired down, then put it where the 882df8bae1dSRodney W. Grimes * pageout daemon can find it. 883df8bae1dSRodney W. Grimes */ 884df8bae1dSRodney W. Grimes vm_object_lock(object); 885df8bae1dSRodney W. Grimes vm_page_lock_queues(); 886df8bae1dSRodney W. Grimes if (change_wiring) { 887df8bae1dSRodney W. Grimes if (wired) 888df8bae1dSRodney W. Grimes vm_page_wire(m); 889df8bae1dSRodney W. Grimes else 890df8bae1dSRodney W. Grimes vm_page_unwire(m); 891df8bae1dSRodney W. Grimes } 89226f9a767SRodney W. Grimes else { 893df8bae1dSRodney W. Grimes vm_page_activate(m); 89426f9a767SRodney W. Grimes } 89526f9a767SRodney W. Grimes 89626f9a767SRodney W. Grimes if( curproc && curproc->p_stats) { 89726f9a767SRodney W. Grimes if (hardfault) { 89826f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_majflt++; 89926f9a767SRodney W. Grimes } else { 90026f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_minflt++; 90126f9a767SRodney W. Grimes } 90226f9a767SRodney W. Grimes } 90326f9a767SRodney W. Grimes 904df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 905df8bae1dSRodney W. Grimes 906df8bae1dSRodney W. Grimes /* 907df8bae1dSRodney W. Grimes * Unlock everything, and return 908df8bae1dSRodney W. Grimes */ 909df8bae1dSRodney W. Grimes 910df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 911df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 912df8bae1dSRodney W. Grimes 913df8bae1dSRodney W. Grimes return(KERN_SUCCESS); 914df8bae1dSRodney W. Grimes 915df8bae1dSRodney W. Grimes } 916df8bae1dSRodney W. Grimes 917df8bae1dSRodney W. Grimes /* 918df8bae1dSRodney W. Grimes * vm_fault_wire: 919df8bae1dSRodney W. Grimes * 920df8bae1dSRodney W. Grimes * Wire down a range of virtual addresses in a map. 921df8bae1dSRodney W. Grimes */ 922df8bae1dSRodney W. Grimes int 923df8bae1dSRodney W. Grimes vm_fault_wire(map, start, end) 924df8bae1dSRodney W. Grimes vm_map_t map; 925df8bae1dSRodney W. Grimes vm_offset_t start, end; 926df8bae1dSRodney W. Grimes { 92726f9a767SRodney W. Grimes 928df8bae1dSRodney W. Grimes register vm_offset_t va; 929df8bae1dSRodney W. Grimes register pmap_t pmap; 930df8bae1dSRodney W. Grimes int rv; 931df8bae1dSRodney W. Grimes 932df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 933df8bae1dSRodney W. Grimes 934df8bae1dSRodney W. Grimes /* 935df8bae1dSRodney W. Grimes * Inform the physical mapping system that the 936df8bae1dSRodney W. Grimes * range of addresses may not fault, so that 937df8bae1dSRodney W. Grimes * page tables and such can be locked down as well. 938df8bae1dSRodney W. Grimes */ 939df8bae1dSRodney W. Grimes 940df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, FALSE); 941df8bae1dSRodney W. Grimes 942df8bae1dSRodney W. Grimes /* 943df8bae1dSRodney W. Grimes * We simulate a fault to get the page and enter it 944df8bae1dSRodney W. Grimes * in the physical map. 945df8bae1dSRodney W. Grimes */ 946df8bae1dSRodney W. Grimes 947df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 948df8bae1dSRodney W. Grimes rv = vm_fault(map, va, VM_PROT_NONE, TRUE); 949df8bae1dSRodney W. Grimes if (rv) { 950df8bae1dSRodney W. Grimes if (va != start) 951df8bae1dSRodney W. Grimes vm_fault_unwire(map, start, va); 952df8bae1dSRodney W. Grimes return(rv); 953df8bae1dSRodney W. Grimes } 954df8bae1dSRodney W. Grimes } 955df8bae1dSRodney W. Grimes return(KERN_SUCCESS); 956df8bae1dSRodney W. Grimes } 957df8bae1dSRodney W. Grimes 958df8bae1dSRodney W. Grimes 959df8bae1dSRodney W. Grimes /* 960df8bae1dSRodney W. Grimes * vm_fault_unwire: 961df8bae1dSRodney W. Grimes * 962df8bae1dSRodney W. Grimes * Unwire a range of virtual addresses in a map. 963df8bae1dSRodney W. Grimes */ 96426f9a767SRodney W. Grimes void 96526f9a767SRodney W. Grimes vm_fault_unwire(map, start, end) 966df8bae1dSRodney W. Grimes vm_map_t map; 967df8bae1dSRodney W. Grimes vm_offset_t start, end; 968df8bae1dSRodney W. Grimes { 969df8bae1dSRodney W. Grimes 970df8bae1dSRodney W. Grimes register vm_offset_t va, pa; 971df8bae1dSRodney W. Grimes register pmap_t pmap; 972df8bae1dSRodney W. Grimes 973df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 974df8bae1dSRodney W. Grimes 975df8bae1dSRodney W. Grimes /* 976df8bae1dSRodney W. Grimes * Since the pages are wired down, we must be able to 977df8bae1dSRodney W. Grimes * get their mappings from the physical map system. 978df8bae1dSRodney W. Grimes */ 979df8bae1dSRodney W. Grimes 980df8bae1dSRodney W. Grimes vm_page_lock_queues(); 981df8bae1dSRodney W. Grimes 982df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 983df8bae1dSRodney W. Grimes pa = pmap_extract(pmap, va); 984df8bae1dSRodney W. Grimes if (pa == (vm_offset_t) 0) { 985df8bae1dSRodney W. Grimes panic("unwire: page not in pmap"); 986df8bae1dSRodney W. Grimes } 987df8bae1dSRodney W. Grimes pmap_change_wiring(pmap, va, FALSE); 988df8bae1dSRodney W. Grimes vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 989df8bae1dSRodney W. Grimes } 990df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 991df8bae1dSRodney W. Grimes 992df8bae1dSRodney W. Grimes /* 993df8bae1dSRodney W. Grimes * Inform the physical mapping system that the range 994df8bae1dSRodney W. Grimes * of addresses may fault, so that page tables and 995df8bae1dSRodney W. Grimes * such may be unwired themselves. 996df8bae1dSRodney W. Grimes */ 997df8bae1dSRodney W. Grimes 998df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, TRUE); 999df8bae1dSRodney W. Grimes 1000df8bae1dSRodney W. Grimes } 1001df8bae1dSRodney W. Grimes 1002df8bae1dSRodney W. Grimes /* 1003df8bae1dSRodney W. Grimes * Routine: 1004df8bae1dSRodney W. Grimes * vm_fault_copy_entry 1005df8bae1dSRodney W. Grimes * Function: 1006df8bae1dSRodney W. Grimes * Copy all of the pages from a wired-down map entry to another. 1007df8bae1dSRodney W. Grimes * 1008df8bae1dSRodney W. Grimes * In/out conditions: 1009df8bae1dSRodney W. Grimes * The source and destination maps must be locked for write. 1010df8bae1dSRodney W. Grimes * The source map entry must be wired down (or be a sharing map 1011df8bae1dSRodney W. Grimes * entry corresponding to a main map entry that is wired down). 1012df8bae1dSRodney W. Grimes */ 1013df8bae1dSRodney W. Grimes 101426f9a767SRodney W. Grimes void 101526f9a767SRodney W. Grimes vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 1016df8bae1dSRodney W. Grimes vm_map_t dst_map; 1017df8bae1dSRodney W. Grimes vm_map_t src_map; 1018df8bae1dSRodney W. Grimes vm_map_entry_t dst_entry; 1019df8bae1dSRodney W. Grimes vm_map_entry_t src_entry; 1020df8bae1dSRodney W. Grimes { 1021df8bae1dSRodney W. Grimes vm_object_t dst_object; 1022df8bae1dSRodney W. Grimes vm_object_t src_object; 1023df8bae1dSRodney W. Grimes vm_offset_t dst_offset; 1024df8bae1dSRodney W. Grimes vm_offset_t src_offset; 1025df8bae1dSRodney W. Grimes vm_prot_t prot; 1026df8bae1dSRodney W. Grimes vm_offset_t vaddr; 1027df8bae1dSRodney W. Grimes vm_page_t dst_m; 1028df8bae1dSRodney W. Grimes vm_page_t src_m; 1029df8bae1dSRodney W. Grimes 1030df8bae1dSRodney W. Grimes #ifdef lint 1031df8bae1dSRodney W. Grimes src_map++; 103226f9a767SRodney W. Grimes #endif lint 1033df8bae1dSRodney W. Grimes 1034df8bae1dSRodney W. Grimes src_object = src_entry->object.vm_object; 1035df8bae1dSRodney W. Grimes src_offset = src_entry->offset; 1036df8bae1dSRodney W. Grimes 1037df8bae1dSRodney W. Grimes /* 1038df8bae1dSRodney W. Grimes * Create the top-level object for the destination entry. 1039df8bae1dSRodney W. Grimes * (Doesn't actually shadow anything - we copy the pages 1040df8bae1dSRodney W. Grimes * directly.) 1041df8bae1dSRodney W. Grimes */ 1042df8bae1dSRodney W. Grimes dst_object = vm_object_allocate( 1043df8bae1dSRodney W. Grimes (vm_size_t) (dst_entry->end - dst_entry->start)); 1044df8bae1dSRodney W. Grimes 1045df8bae1dSRodney W. Grimes dst_entry->object.vm_object = dst_object; 1046df8bae1dSRodney W. Grimes dst_entry->offset = 0; 1047df8bae1dSRodney W. Grimes 1048df8bae1dSRodney W. Grimes prot = dst_entry->max_protection; 1049df8bae1dSRodney W. Grimes 1050df8bae1dSRodney W. Grimes /* 1051df8bae1dSRodney W. Grimes * Loop through all of the pages in the entry's range, copying 1052df8bae1dSRodney W. Grimes * each one from the source object (it should be there) to the 1053df8bae1dSRodney W. Grimes * destination object. 1054df8bae1dSRodney W. Grimes */ 1055df8bae1dSRodney W. Grimes for (vaddr = dst_entry->start, dst_offset = 0; 1056df8bae1dSRodney W. Grimes vaddr < dst_entry->end; 1057df8bae1dSRodney W. Grimes vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 1058df8bae1dSRodney W. Grimes 1059df8bae1dSRodney W. Grimes /* 1060df8bae1dSRodney W. Grimes * Allocate a page in the destination object 1061df8bae1dSRodney W. Grimes */ 1062df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1063df8bae1dSRodney W. Grimes do { 1064df8bae1dSRodney W. Grimes dst_m = vm_page_alloc(dst_object, dst_offset); 1065df8bae1dSRodney W. Grimes if (dst_m == NULL) { 1066df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1067df8bae1dSRodney W. Grimes VM_WAIT; 1068df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1069df8bae1dSRodney W. Grimes } 1070df8bae1dSRodney W. Grimes } while (dst_m == NULL); 1071df8bae1dSRodney W. Grimes 1072df8bae1dSRodney W. Grimes /* 1073df8bae1dSRodney W. Grimes * Find the page in the source object, and copy it in. 1074df8bae1dSRodney W. Grimes * (Because the source is wired down, the page will be 1075df8bae1dSRodney W. Grimes * in memory.) 1076df8bae1dSRodney W. Grimes */ 1077df8bae1dSRodney W. Grimes vm_object_lock(src_object); 1078df8bae1dSRodney W. Grimes src_m = vm_page_lookup(src_object, dst_offset + src_offset); 1079df8bae1dSRodney W. Grimes if (src_m == NULL) 1080df8bae1dSRodney W. Grimes panic("vm_fault_copy_wired: page missing"); 1081df8bae1dSRodney W. Grimes 1082df8bae1dSRodney W. Grimes vm_page_copy(src_m, dst_m); 1083df8bae1dSRodney W. Grimes 1084df8bae1dSRodney W. Grimes /* 1085df8bae1dSRodney W. Grimes * Enter it in the pmap... 1086df8bae1dSRodney W. Grimes */ 1087df8bae1dSRodney W. Grimes vm_object_unlock(src_object); 1088df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1089df8bae1dSRodney W. Grimes 1090df8bae1dSRodney W. Grimes pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1091df8bae1dSRodney W. Grimes prot, FALSE); 1092df8bae1dSRodney W. Grimes 1093df8bae1dSRodney W. Grimes /* 1094df8bae1dSRodney W. Grimes * Mark it no longer busy, and put it on the active list. 1095df8bae1dSRodney W. Grimes */ 1096df8bae1dSRodney W. Grimes vm_object_lock(dst_object); 1097df8bae1dSRodney W. Grimes vm_page_lock_queues(); 1098df8bae1dSRodney W. Grimes vm_page_activate(dst_m); 1099df8bae1dSRodney W. Grimes vm_page_unlock_queues(); 1100df8bae1dSRodney W. Grimes PAGE_WAKEUP(dst_m); 1101df8bae1dSRodney W. Grimes vm_object_unlock(dst_object); 1102df8bae1dSRodney W. Grimes } 1103df8bae1dSRodney W. Grimes } 110426f9a767SRodney W. Grimes 110526f9a767SRodney W. Grimes 110626f9a767SRodney W. Grimes /* 110726f9a767SRodney W. Grimes * looks page up in shadow chain 110826f9a767SRodney W. Grimes */ 110926f9a767SRodney W. Grimes 111026f9a767SRodney W. Grimes int 111126f9a767SRodney W. Grimes vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm) 111226f9a767SRodney W. Grimes vm_object_t object; 111326f9a767SRodney W. Grimes vm_offset_t offset; 111426f9a767SRodney W. Grimes vm_object_t *rtobject; 111526f9a767SRodney W. Grimes vm_offset_t *rtoffset; 111626f9a767SRodney W. Grimes vm_page_t *rtm; 111726f9a767SRodney W. Grimes { 111826f9a767SRodney W. Grimes vm_page_t m; 111926f9a767SRodney W. Grimes vm_object_t first_object = object; 112026f9a767SRodney W. Grimes 112126f9a767SRodney W. Grimes *rtm = 0; 112226f9a767SRodney W. Grimes *rtobject = 0; 112326f9a767SRodney W. Grimes *rtoffset = 0; 112426f9a767SRodney W. Grimes 112526f9a767SRodney W. Grimes 112626f9a767SRodney W. Grimes while (!(m=vm_page_lookup(object, offset))) { 112726f9a767SRodney W. Grimes if (object->pager) { 112826f9a767SRodney W. Grimes if (vm_pager_has_page(object->pager, object->paging_offset+offset)) { 112926f9a767SRodney W. Grimes *rtobject = object; 113026f9a767SRodney W. Grimes *rtoffset = offset; 113126f9a767SRodney W. Grimes return 1; 113226f9a767SRodney W. Grimes } 113326f9a767SRodney W. Grimes } 113426f9a767SRodney W. Grimes 113526f9a767SRodney W. Grimes if (!object->shadow) 113626f9a767SRodney W. Grimes return 0; 113726f9a767SRodney W. Grimes else { 113826f9a767SRodney W. Grimes offset += object->shadow_offset; 113926f9a767SRodney W. Grimes object = object->shadow; 114026f9a767SRodney W. Grimes } 114126f9a767SRodney W. Grimes } 114226f9a767SRodney W. Grimes *rtobject = object; 114326f9a767SRodney W. Grimes *rtoffset = offset; 114426f9a767SRodney W. Grimes *rtm = m; 114526f9a767SRodney W. Grimes return 1; 114626f9a767SRodney W. Grimes } 114726f9a767SRodney W. Grimes 114826f9a767SRodney W. Grimes /* 114926f9a767SRodney W. Grimes * This routine checks around the requested page for other pages that 115026f9a767SRodney W. Grimes * might be able to be faulted in. 115126f9a767SRodney W. Grimes * 115226f9a767SRodney W. Grimes * Inputs: 115326f9a767SRodney W. Grimes * first_object, first_offset, m, rbehind, rahead 115426f9a767SRodney W. Grimes * 115526f9a767SRodney W. Grimes * Outputs: 115626f9a767SRodney W. Grimes * marray (array of vm_page_t), reqpage (index of requested page) 115726f9a767SRodney W. Grimes * 115826f9a767SRodney W. Grimes * Return value: 115926f9a767SRodney W. Grimes * number of pages in marray 116026f9a767SRodney W. Grimes */ 116126f9a767SRodney W. Grimes int 116226f9a767SRodney W. Grimes vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage) 116326f9a767SRodney W. Grimes vm_object_t first_object; 116426f9a767SRodney W. Grimes vm_offset_t first_offset; 116526f9a767SRodney W. Grimes vm_page_t m; 116626f9a767SRodney W. Grimes int rbehind; 116726f9a767SRodney W. Grimes int raheada; 116826f9a767SRodney W. Grimes vm_page_t *marray; 116926f9a767SRodney W. Grimes int *reqpage; 117026f9a767SRodney W. Grimes { 117126f9a767SRodney W. Grimes int i; 117226f9a767SRodney W. Grimes vm_page_t tmpm; 117326f9a767SRodney W. Grimes vm_object_t object; 117426f9a767SRodney W. Grimes vm_offset_t offset, startoffset, endoffset, toffset, size; 117526f9a767SRodney W. Grimes vm_object_t rtobject; 117626f9a767SRodney W. Grimes vm_page_t rtm; 117726f9a767SRodney W. Grimes vm_offset_t rtoffset; 117826f9a767SRodney W. Grimes vm_offset_t offsetdiff; 117926f9a767SRodney W. Grimes int rahead; 118026f9a767SRodney W. Grimes int treqpage; 118126f9a767SRodney W. Grimes 118226f9a767SRodney W. Grimes object = m->object; 118326f9a767SRodney W. Grimes offset = m->offset; 118426f9a767SRodney W. Grimes 118526f9a767SRodney W. Grimes offsetdiff = offset - first_offset; 118626f9a767SRodney W. Grimes 118726f9a767SRodney W. Grimes /* 118826f9a767SRodney W. Grimes * if the requested page is not available, then give up now 118926f9a767SRodney W. Grimes */ 119026f9a767SRodney W. Grimes 119126f9a767SRodney W. Grimes if (!vm_pager_has_page(object->pager, object->paging_offset+offset)) 119226f9a767SRodney W. Grimes return 0; 119326f9a767SRodney W. Grimes 119426f9a767SRodney W. Grimes /* 119526f9a767SRodney W. Grimes * if there is no getmulti routine for this pager, then just allow 119626f9a767SRodney W. Grimes * one page to be read. 119726f9a767SRodney W. Grimes */ 119826f9a767SRodney W. Grimes /* 119926f9a767SRodney W. Grimes if (!object->pager->pg_ops->pgo_getpages) { 120026f9a767SRodney W. Grimes *reqpage = 0; 120126f9a767SRodney W. Grimes marray[0] = m; 120226f9a767SRodney W. Grimes return 1; 120326f9a767SRodney W. Grimes } 120426f9a767SRodney W. Grimes */ 120526f9a767SRodney W. Grimes 120626f9a767SRodney W. Grimes /* 120726f9a767SRodney W. Grimes * try to do any readahead that we might have free pages for. 120826f9a767SRodney W. Grimes */ 120926f9a767SRodney W. Grimes rahead = raheada; 121026f9a767SRodney W. Grimes if (rahead > (cnt.v_free_count - cnt.v_free_reserved)) { 121126f9a767SRodney W. Grimes rahead = cnt.v_free_count - cnt.v_free_reserved; 121226f9a767SRodney W. Grimes rbehind = 0; 121326f9a767SRodney W. Grimes } 121426f9a767SRodney W. Grimes 121526f9a767SRodney W. Grimes if (cnt.v_free_count < cnt.v_free_min) { 121626f9a767SRodney W. Grimes if (rahead > VM_FAULT_READ_AHEAD_MIN) 121726f9a767SRodney W. Grimes rahead = VM_FAULT_READ_AHEAD_MIN; 121826f9a767SRodney W. Grimes rbehind = 0; 121926f9a767SRodney W. Grimes } 122026f9a767SRodney W. Grimes 122126f9a767SRodney W. Grimes /* 122226f9a767SRodney W. Grimes * if we don't have any free pages, then just read one page. 122326f9a767SRodney W. Grimes */ 122426f9a767SRodney W. Grimes if (rahead <= 0) { 122526f9a767SRodney W. Grimes *reqpage = 0; 122626f9a767SRodney W. Grimes marray[0] = m; 122726f9a767SRodney W. Grimes return 1; 122826f9a767SRodney W. Grimes } 122926f9a767SRodney W. Grimes 123026f9a767SRodney W. Grimes /* 123126f9a767SRodney W. Grimes * scan backward for the read behind pages -- 123226f9a767SRodney W. Grimes * in memory or on disk not in same object 123326f9a767SRodney W. Grimes */ 123426f9a767SRodney W. Grimes toffset = offset - NBPG; 123526f9a767SRodney W. Grimes if( rbehind*NBPG > offset) 123626f9a767SRodney W. Grimes rbehind = offset / NBPG; 123726f9a767SRodney W. Grimes startoffset = offset - rbehind*NBPG; 123826f9a767SRodney W. Grimes while (toffset >= startoffset) { 123926f9a767SRodney W. Grimes if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 124026f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 124126f9a767SRodney W. Grimes startoffset = toffset + NBPG; 124226f9a767SRodney W. Grimes break; 124326f9a767SRodney W. Grimes } 124426f9a767SRodney W. Grimes if( toffset == 0) 124526f9a767SRodney W. Grimes break; 124626f9a767SRodney W. Grimes toffset -= NBPG; 124726f9a767SRodney W. Grimes } 124826f9a767SRodney W. Grimes 124926f9a767SRodney W. Grimes /* 125026f9a767SRodney W. Grimes * scan forward for the read ahead pages -- 125126f9a767SRodney W. Grimes * in memory or on disk not in same object 125226f9a767SRodney W. Grimes */ 125326f9a767SRodney W. Grimes toffset = offset + NBPG; 125426f9a767SRodney W. Grimes endoffset = offset + (rahead+1)*NBPG; 125526f9a767SRodney W. Grimes while (toffset < object->size && toffset < endoffset) { 125626f9a767SRodney W. Grimes if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) || 125726f9a767SRodney W. Grimes rtm != 0 || rtobject != object) { 125826f9a767SRodney W. Grimes break; 125926f9a767SRodney W. Grimes } 126026f9a767SRodney W. Grimes toffset += NBPG; 126126f9a767SRodney W. Grimes } 126226f9a767SRodney W. Grimes endoffset = toffset; 126326f9a767SRodney W. Grimes 126426f9a767SRodney W. Grimes /* calculate number of bytes of pages */ 126526f9a767SRodney W. Grimes size = (endoffset - startoffset) / NBPG; 126626f9a767SRodney W. Grimes 126726f9a767SRodney W. Grimes /* calculate the page offset of the required page */ 126826f9a767SRodney W. Grimes treqpage = (offset - startoffset) / NBPG; 126926f9a767SRodney W. Grimes 127026f9a767SRodney W. Grimes /* see if we have space (again) */ 127126f9a767SRodney W. Grimes if (cnt.v_free_count >= cnt.v_free_reserved + size) { 127226f9a767SRodney W. Grimes bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t)); 127326f9a767SRodney W. Grimes /* 127426f9a767SRodney W. Grimes * get our pages and don't block for them 127526f9a767SRodney W. Grimes */ 127626f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 127726f9a767SRodney W. Grimes if (i != treqpage) 127826f9a767SRodney W. Grimes rtm = vm_page_alloc(object, startoffset + i * NBPG); 127926f9a767SRodney W. Grimes else 128026f9a767SRodney W. Grimes rtm = m; 128126f9a767SRodney W. Grimes marray[i] = rtm; 128226f9a767SRodney W. Grimes } 128326f9a767SRodney W. Grimes 128426f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 128526f9a767SRodney W. Grimes if (marray[i] == 0) 128626f9a767SRodney W. Grimes break; 128726f9a767SRodney W. Grimes } 128826f9a767SRodney W. Grimes 128926f9a767SRodney W. Grimes /* 129026f9a767SRodney W. Grimes * if we could not get our block of pages, then 129126f9a767SRodney W. Grimes * free the readahead/readbehind pages. 129226f9a767SRodney W. Grimes */ 129326f9a767SRodney W. Grimes if (i < size) { 129426f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 129526f9a767SRodney W. Grimes if (i != treqpage && marray[i]) 129626f9a767SRodney W. Grimes FREE_PAGE(marray[i]); 129726f9a767SRodney W. Grimes } 129826f9a767SRodney W. Grimes *reqpage = 0; 129926f9a767SRodney W. Grimes marray[0] = m; 130026f9a767SRodney W. Grimes return 1; 130126f9a767SRodney W. Grimes } 130226f9a767SRodney W. Grimes 130326f9a767SRodney W. Grimes *reqpage = treqpage; 130426f9a767SRodney W. Grimes return size; 130526f9a767SRodney W. Grimes } 130626f9a767SRodney W. Grimes *reqpage = 0; 130726f9a767SRodney W. Grimes marray[0] = m; 130826f9a767SRodney W. Grimes return 1; 130926f9a767SRodney W. Grimes } 131026f9a767SRodney W. Grimes 1311