1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 426f9a767SRodney W. Grimes * Copyright (c) 1994 John S. Dyson 526f9a767SRodney W. Grimes * All rights reserved. 626f9a767SRodney W. Grimes * Copyright (c) 1994 David Greenman 726f9a767SRodney W. Grimes * All rights reserved. 826f9a767SRodney W. Grimes * 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 11df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 12df8bae1dSRodney W. Grimes * 13df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 14df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 15df8bae1dSRodney W. Grimes * are met: 16df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 17df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 18df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 19df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 20df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 21df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 22df8bae1dSRodney W. Grimes * must display the following acknowledgement: 23df8bae1dSRodney W. Grimes * This product includes software developed by the University of 24df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 25df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 26df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 27df8bae1dSRodney W. Grimes * without specific prior written permission. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39df8bae1dSRodney W. Grimes * SUCH DAMAGE. 40df8bae1dSRodney W. Grimes * 413c4dd356SDavid Greenman * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 42df8bae1dSRodney W. Grimes * 43df8bae1dSRodney W. Grimes * 44df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 45df8bae1dSRodney W. Grimes * All rights reserved. 46df8bae1dSRodney W. Grimes * 47df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 48df8bae1dSRodney W. Grimes * 49df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 50df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 51df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 52df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 53df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 54df8bae1dSRodney W. Grimes * 55df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 60df8bae1dSRodney W. Grimes * 61df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62df8bae1dSRodney W. Grimes * School of Computer Science 63df8bae1dSRodney W. Grimes * Carnegie Mellon University 64df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 65df8bae1dSRodney W. Grimes * 66df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 67df8bae1dSRodney W. Grimes * rights to redistribute these changes. 683c4dd356SDavid Greenman * 690ed43762SJohn Dyson * $Id: vm_fault.c,v 1.45 1996/05/19 07:36:45 dyson Exp $ 70df8bae1dSRodney W. Grimes */ 71df8bae1dSRodney W. Grimes 72df8bae1dSRodney W. Grimes /* 73df8bae1dSRodney W. Grimes * Page fault handling module. 74df8bae1dSRodney W. Grimes */ 75df8bae1dSRodney W. Grimes 76df8bae1dSRodney W. Grimes #include <sys/param.h> 77df8bae1dSRodney W. Grimes #include <sys/systm.h> 7826f9a767SRodney W. Grimes #include <sys/proc.h> 7924a1cce3SDavid Greenman #include <sys/vnode.h> 8005f0fdd2SPoul-Henning Kamp #include <sys/resource.h> 8105f0fdd2SPoul-Henning Kamp #include <sys/signalvar.h> 8226f9a767SRodney W. Grimes #include <sys/resourcevar.h> 83efeaf95aSDavid Greenman #include <sys/vmmeter.h> 84df8bae1dSRodney W. Grimes 85df8bae1dSRodney W. Grimes #include <vm/vm.h> 86efeaf95aSDavid Greenman #include <vm/vm_param.h> 87efeaf95aSDavid Greenman #include <vm/vm_prot.h> 88efeaf95aSDavid Greenman #include <vm/lock.h> 89efeaf95aSDavid Greenman #include <vm/pmap.h> 90efeaf95aSDavid Greenman #include <vm/vm_map.h> 91efeaf95aSDavid Greenman #include <vm/vm_object.h> 92df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 93df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 94a83c285cSDavid Greenman #include <vm/vm_kern.h> 9524a1cce3SDavid Greenman #include <vm/vm_pager.h> 9624a1cce3SDavid Greenman #include <vm/vnode_pager.h> 97cd41fc12SDavid Greenman #include <vm/swap_pager.h> 98efeaf95aSDavid Greenman #include <vm/vm_extern.h> 99df8bae1dSRodney W. Grimes 10022ba64e8SJohn Dyson int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); 10126f9a767SRodney W. Grimes 10226f9a767SRodney W. Grimes #define VM_FAULT_READ_AHEAD 4 10326f9a767SRodney W. Grimes #define VM_FAULT_READ_BEHIND 3 10426f9a767SRodney W. Grimes #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) 10526f9a767SRodney W. Grimes 106de5f6a77SJohn Dyson int vm_fault_free_1; 107de5f6a77SJohn Dyson int vm_fault_copy_save_1; 108de5f6a77SJohn Dyson int vm_fault_copy_save_2; 109de5f6a77SJohn Dyson 110df8bae1dSRodney W. Grimes /* 111df8bae1dSRodney W. Grimes * vm_fault: 112df8bae1dSRodney W. Grimes * 113df8bae1dSRodney W. Grimes * Handle a page fault occuring at the given address, 114df8bae1dSRodney W. Grimes * requiring the given permissions, in the map specified. 115df8bae1dSRodney W. Grimes * If successful, the page is inserted into the 116df8bae1dSRodney W. Grimes * associated physical map. 117df8bae1dSRodney W. Grimes * 118df8bae1dSRodney W. Grimes * NOTE: the given address should be truncated to the 119df8bae1dSRodney W. Grimes * proper page address. 120df8bae1dSRodney W. Grimes * 121df8bae1dSRodney W. Grimes * KERN_SUCCESS is returned if the page fault is handled; otherwise, 122df8bae1dSRodney W. Grimes * a standard error specifying why the fault is fatal is returned. 123df8bae1dSRodney W. Grimes * 124df8bae1dSRodney W. Grimes * 125df8bae1dSRodney W. Grimes * The map in question must be referenced, and remains so. 126df8bae1dSRodney W. Grimes * Caller may hold no locks. 127df8bae1dSRodney W. Grimes */ 128df8bae1dSRodney W. Grimes int 129df8bae1dSRodney W. Grimes vm_fault(map, vaddr, fault_type, change_wiring) 130df8bae1dSRodney W. Grimes vm_map_t map; 131df8bae1dSRodney W. Grimes vm_offset_t vaddr; 132df8bae1dSRodney W. Grimes vm_prot_t fault_type; 133df8bae1dSRodney W. Grimes boolean_t change_wiring; 134df8bae1dSRodney W. Grimes { 135df8bae1dSRodney W. Grimes vm_object_t first_object; 136a316d390SJohn Dyson vm_pindex_t first_pindex; 137df8bae1dSRodney W. Grimes vm_map_entry_t entry; 138df8bae1dSRodney W. Grimes register vm_object_t object; 139a316d390SJohn Dyson register vm_pindex_t pindex; 14026f9a767SRodney W. Grimes vm_page_t m; 141df8bae1dSRodney W. Grimes vm_page_t first_m; 142df8bae1dSRodney W. Grimes vm_prot_t prot; 143df8bae1dSRodney W. Grimes int result; 144df8bae1dSRodney W. Grimes boolean_t wired; 145df8bae1dSRodney W. Grimes boolean_t su; 146df8bae1dSRodney W. Grimes boolean_t lookup_still_valid; 147df8bae1dSRodney W. Grimes vm_page_t old_m; 148df8bae1dSRodney W. Grimes vm_object_t next_object; 14926f9a767SRodney W. Grimes vm_page_t marray[VM_FAULT_READ]; 15026f9a767SRodney W. Grimes int hardfault = 0; 151f6b04d2bSDavid Greenman struct vnode *vp = NULL; 152df8bae1dSRodney W. Grimes 153b8d95f16SDavid Greenman cnt.v_vm_faults++; /* needs lock XXX */ 154df8bae1dSRodney W. Grimes /* 155df8bae1dSRodney W. Grimes * Recovery actions 156df8bae1dSRodney W. Grimes */ 157df8bae1dSRodney W. Grimes #define FREE_PAGE(m) { \ 158df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 159df8bae1dSRodney W. Grimes vm_page_free(m); \ 160df8bae1dSRodney W. Grimes } 161df8bae1dSRodney W. Grimes 162df8bae1dSRodney W. Grimes #define RELEASE_PAGE(m) { \ 163df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); \ 164bd7e5f99SJohn Dyson if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ 165df8bae1dSRodney W. Grimes } 166df8bae1dSRodney W. Grimes 167df8bae1dSRodney W. Grimes #define UNLOCK_MAP { \ 168df8bae1dSRodney W. Grimes if (lookup_still_valid) { \ 169df8bae1dSRodney W. Grimes vm_map_lookup_done(map, entry); \ 170df8bae1dSRodney W. Grimes lookup_still_valid = FALSE; \ 171df8bae1dSRodney W. Grimes } \ 172df8bae1dSRodney W. Grimes } 173df8bae1dSRodney W. Grimes 174df8bae1dSRodney W. Grimes #define UNLOCK_THINGS { \ 175f919ebdeSDavid Greenman vm_object_pip_wakeup(object); \ 176df8bae1dSRodney W. Grimes if (object != first_object) { \ 177df8bae1dSRodney W. Grimes FREE_PAGE(first_m); \ 178f919ebdeSDavid Greenman vm_object_pip_wakeup(first_object); \ 179df8bae1dSRodney W. Grimes } \ 180df8bae1dSRodney W. Grimes UNLOCK_MAP; \ 18124a1cce3SDavid Greenman if (vp != NULL) VOP_UNLOCK(vp); \ 182df8bae1dSRodney W. Grimes } 183df8bae1dSRodney W. Grimes 184df8bae1dSRodney W. Grimes #define UNLOCK_AND_DEALLOCATE { \ 185df8bae1dSRodney W. Grimes UNLOCK_THINGS; \ 186df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); \ 187df8bae1dSRodney W. Grimes } 188df8bae1dSRodney W. Grimes 18926f9a767SRodney W. Grimes 190df8bae1dSRodney W. Grimes RetryFault:; 191df8bae1dSRodney W. Grimes 192df8bae1dSRodney W. Grimes /* 1930d94caffSDavid Greenman * Find the backing store object and offset into it to begin the 1940d94caffSDavid Greenman * search. 195df8bae1dSRodney W. Grimes */ 196df8bae1dSRodney W. Grimes 19722ba64e8SJohn Dyson if ((result = vm_map_lookup(&map, vaddr, 19822ba64e8SJohn Dyson fault_type, &entry, &first_object, 199a316d390SJohn Dyson &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) { 200df8bae1dSRodney W. Grimes return (result); 201df8bae1dSRodney W. Grimes } 202f6b04d2bSDavid Greenman 20324a1cce3SDavid Greenman vp = vnode_pager_lock(first_object); 204f6b04d2bSDavid Greenman 205df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 206df8bae1dSRodney W. Grimes 207df8bae1dSRodney W. Grimes if (wired) 208df8bae1dSRodney W. Grimes fault_type = prot; 209df8bae1dSRodney W. Grimes 210df8bae1dSRodney W. Grimes first_m = NULL; 211df8bae1dSRodney W. Grimes 212df8bae1dSRodney W. Grimes /* 2130d94caffSDavid Greenman * Make a reference to this object to prevent its disposal while we 2140d94caffSDavid Greenman * are messing with it. Once we have the reference, the map is free 2150d94caffSDavid Greenman * to be diddled. Since objects reference their shadows (and copies), 2160d94caffSDavid Greenman * they will stay around as well. 217df8bae1dSRodney W. Grimes */ 218df8bae1dSRodney W. Grimes 219df8bae1dSRodney W. Grimes first_object->ref_count++; 220df8bae1dSRodney W. Grimes first_object->paging_in_progress++; 221df8bae1dSRodney W. Grimes 222df8bae1dSRodney W. Grimes /* 223df8bae1dSRodney W. Grimes * INVARIANTS (through entire routine): 224df8bae1dSRodney W. Grimes * 2250d94caffSDavid Greenman * 1) At all times, we must either have the object lock or a busy 22624a1cce3SDavid Greenman * page in some object to prevent some other process from trying to 2270d94caffSDavid Greenman * bring in the same page. 228df8bae1dSRodney W. Grimes * 2290d94caffSDavid Greenman * Note that we cannot hold any locks during the pager access or when 2300d94caffSDavid Greenman * waiting for memory, so we use a busy page then. 231df8bae1dSRodney W. Grimes * 2320d94caffSDavid Greenman * Note also that we aren't as concerned about more than one thead 2330d94caffSDavid Greenman * attempting to pager_data_unlock the same page at once, so we don't 2340d94caffSDavid Greenman * hold the page as busy then, but do record the highest unlock value 2350d94caffSDavid Greenman * so far. [Unlock requests may also be delivered out of order.] 236df8bae1dSRodney W. Grimes * 2370d94caffSDavid Greenman * 2) Once we have a busy page, we must remove it from the pageout 2380d94caffSDavid Greenman * queues, so that the pageout daemon will not grab it away. 239df8bae1dSRodney W. Grimes * 24024a1cce3SDavid Greenman * 3) To prevent another process from racing us down the shadow chain 2410d94caffSDavid Greenman * and entering a new page in the top object before we do, we must 2420d94caffSDavid Greenman * keep a busy page in the top object while following the shadow 2430d94caffSDavid Greenman * chain. 244df8bae1dSRodney W. Grimes * 2450d94caffSDavid Greenman * 4) We must increment paging_in_progress on any object for which 2460d94caffSDavid Greenman * we have a busy page, to prevent vm_object_collapse from removing 2470d94caffSDavid Greenman * the busy page without our noticing. 248df8bae1dSRodney W. Grimes */ 249df8bae1dSRodney W. Grimes 250df8bae1dSRodney W. Grimes /* 251df8bae1dSRodney W. Grimes * Search for the page at object/offset. 252df8bae1dSRodney W. Grimes */ 253df8bae1dSRodney W. Grimes 254df8bae1dSRodney W. Grimes object = first_object; 255a316d390SJohn Dyson pindex = first_pindex; 256df8bae1dSRodney W. Grimes 257df8bae1dSRodney W. Grimes /* 258df8bae1dSRodney W. Grimes * See whether this page is resident 259df8bae1dSRodney W. Grimes */ 260df8bae1dSRodney W. Grimes 261df8bae1dSRodney W. Grimes while (TRUE) { 262a316d390SJohn Dyson m = vm_page_lookup(object, pindex); 263df8bae1dSRodney W. Grimes if (m != NULL) { 264df8bae1dSRodney W. Grimes /* 2650d94caffSDavid Greenman * If the page is being brought in, wait for it and 2660d94caffSDavid Greenman * then retry. 267df8bae1dSRodney W. Grimes */ 2680d94caffSDavid Greenman if ((m->flags & PG_BUSY) || m->busy) { 26916f62314SDavid Greenman int s; 2700d94caffSDavid Greenman 271df8bae1dSRodney W. Grimes UNLOCK_THINGS; 272b18bfc3dSJohn Dyson s = splvm(); 273b18bfc3dSJohn Dyson if (((m->flags & PG_BUSY) || m->busy)) { 2740d94caffSDavid Greenman m->flags |= PG_WANTED | PG_REFERENCED; 275976e77fcSDavid Greenman cnt.v_intrans++; 27624a1cce3SDavid Greenman tsleep(m, PSWP, "vmpfw", 0); 27726f9a767SRodney W. Grimes } 27816f62314SDavid Greenman splx(s); 279df8bae1dSRodney W. Grimes vm_object_deallocate(first_object); 280df8bae1dSRodney W. Grimes goto RetryFault; 281df8bae1dSRodney W. Grimes } 282f6b04d2bSDavid Greenman 283df8bae1dSRodney W. Grimes /* 28424a1cce3SDavid Greenman * Mark page busy for other processes, and the pagedaemon. 285df8bae1dSRodney W. Grimes */ 286df8bae1dSRodney W. Grimes m->flags |= PG_BUSY; 287bd7e5f99SJohn Dyson if ((m->queue == PQ_CACHE) && 28822ba64e8SJohn Dyson (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { 28922ba64e8SJohn Dyson UNLOCK_AND_DEALLOCATE; 29022ba64e8SJohn Dyson VM_WAIT; 29122ba64e8SJohn Dyson PAGE_WAKEUP(m); 29222ba64e8SJohn Dyson goto RetryFault; 29322ba64e8SJohn Dyson } 29422ba64e8SJohn Dyson 2950ed43762SJohn Dyson vm_page_unqueue(m); 2960ed43762SJohn Dyson 297bd7e5f99SJohn Dyson if (m->valid && 298bd7e5f99SJohn Dyson ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && 299f919ebdeSDavid Greenman m->object != kernel_object && m->object != kmem_object) { 3000d94caffSDavid Greenman goto readrest; 3010d94caffSDavid Greenman } 302df8bae1dSRodney W. Grimes break; 303df8bae1dSRodney W. Grimes } 30424a1cce3SDavid Greenman if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired)) 305df8bae1dSRodney W. Grimes || (object == first_object)) { 306df8bae1dSRodney W. Grimes 307a316d390SJohn Dyson if (pindex >= object->size) { 3085f55e841SDavid Greenman UNLOCK_AND_DEALLOCATE; 3095f55e841SDavid Greenman return (KERN_PROTECTION_FAILURE); 3105f55e841SDavid Greenman } 31122ba64e8SJohn Dyson 312df8bae1dSRodney W. Grimes /* 3130d94caffSDavid Greenman * Allocate a new page for this object/offset pair. 314df8bae1dSRodney W. Grimes */ 315a316d390SJohn Dyson m = vm_page_alloc(object, pindex, 316b18bfc3dSJohn Dyson (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); 317df8bae1dSRodney W. Grimes 318df8bae1dSRodney W. Grimes if (m == NULL) { 319df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 320df8bae1dSRodney W. Grimes VM_WAIT; 321df8bae1dSRodney W. Grimes goto RetryFault; 322df8bae1dSRodney W. Grimes } 323df8bae1dSRodney W. Grimes } 3240d94caffSDavid Greenman readrest: 32524a1cce3SDavid Greenman if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) { 326df8bae1dSRodney W. Grimes int rv; 32726f9a767SRodney W. Grimes int faultcount; 32826f9a767SRodney W. Grimes int reqpage; 329867a482dSJohn Dyson int ahead, behind; 330867a482dSJohn Dyson 331867a482dSJohn Dyson ahead = VM_FAULT_READ_AHEAD; 332867a482dSJohn Dyson behind = VM_FAULT_READ_BEHIND; 333867a482dSJohn Dyson if (first_object->behavior == OBJ_RANDOM) { 334867a482dSJohn Dyson ahead = 0; 335867a482dSJohn Dyson behind = 0; 336867a482dSJohn Dyson } 337867a482dSJohn Dyson 338867a482dSJohn Dyson if (first_object->behavior == OBJ_SEQUENTIAL) { 339867a482dSJohn Dyson vm_pindex_t firstpindex, tmppindex; 340867a482dSJohn Dyson if (first_pindex < 341867a482dSJohn Dyson 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) 342867a482dSJohn Dyson firstpindex = 0; 343867a482dSJohn Dyson else 344867a482dSJohn Dyson firstpindex = first_pindex - 345867a482dSJohn Dyson 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); 346867a482dSJohn Dyson 347867a482dSJohn Dyson for(tmppindex = first_pindex - 1; 348867a482dSJohn Dyson tmppindex >= first_pindex; 349867a482dSJohn Dyson --tmppindex) { 350867a482dSJohn Dyson vm_page_t mt; 351867a482dSJohn Dyson mt = vm_page_lookup( first_object, tmppindex); 352867a482dSJohn Dyson if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) 353867a482dSJohn Dyson break; 354867a482dSJohn Dyson if (mt->busy || (mt->flags & PG_BUSY) || mt->hold_count || 355867a482dSJohn Dyson mt->wire_count) 356867a482dSJohn Dyson continue; 357867a482dSJohn Dyson if (mt->dirty == 0) 358867a482dSJohn Dyson vm_page_test_dirty(mt); 359867a482dSJohn Dyson if (mt->dirty) { 360867a482dSJohn Dyson vm_page_protect(mt, VM_PROT_NONE); 361867a482dSJohn Dyson vm_page_deactivate(mt); 362867a482dSJohn Dyson } else { 363867a482dSJohn Dyson vm_page_cache(mt); 364867a482dSJohn Dyson } 365867a482dSJohn Dyson } 366867a482dSJohn Dyson 367867a482dSJohn Dyson ahead += behind; 368867a482dSJohn Dyson behind = 0; 369867a482dSJohn Dyson } 370df8bae1dSRodney W. Grimes 371df8bae1dSRodney W. Grimes /* 3720d94caffSDavid Greenman * now we find out if any other pages should be paged 3730d94caffSDavid Greenman * in at this time this routine checks to see if the 3740d94caffSDavid Greenman * pages surrounding this fault reside in the same 3750d94caffSDavid Greenman * object as the page for this fault. If they do, 3760d94caffSDavid Greenman * then they are faulted in also into the object. The 3770d94caffSDavid Greenman * array "marray" returned contains an array of 3780d94caffSDavid Greenman * vm_page_t structs where one of them is the 3790d94caffSDavid Greenman * vm_page_t passed to the routine. The reqpage 3800d94caffSDavid Greenman * return value is the index into the marray for the 3810d94caffSDavid Greenman * vm_page_t passed to the routine. 38226f9a767SRodney W. Grimes */ 38305f0fdd2SPoul-Henning Kamp faultcount = vm_fault_additional_pages( 384867a482dSJohn Dyson m, behind, ahead, marray, &reqpage); 385df8bae1dSRodney W. Grimes 386df8bae1dSRodney W. Grimes /* 3870d94caffSDavid Greenman * Call the pager to retrieve the data, if any, after 3880d94caffSDavid Greenman * releasing the lock on the map. 389df8bae1dSRodney W. Grimes */ 390df8bae1dSRodney W. Grimes UNLOCK_MAP; 391df8bae1dSRodney W. Grimes 39226f9a767SRodney W. Grimes rv = faultcount ? 39324a1cce3SDavid Greenman vm_pager_get_pages(object, marray, faultcount, 39424a1cce3SDavid Greenman reqpage) : VM_PAGER_FAIL; 39522ba64e8SJohn Dyson 39626f9a767SRodney W. Grimes if (rv == VM_PAGER_OK) { 397df8bae1dSRodney W. Grimes /* 3980d94caffSDavid Greenman * Found the page. Leave it busy while we play 3990d94caffSDavid Greenman * with it. 400df8bae1dSRodney W. Grimes */ 40126f9a767SRodney W. Grimes 402df8bae1dSRodney W. Grimes /* 4030d94caffSDavid Greenman * Relookup in case pager changed page. Pager 4040d94caffSDavid Greenman * is responsible for disposition of old page 4050d94caffSDavid Greenman * if moved. 406df8bae1dSRodney W. Grimes */ 407a316d390SJohn Dyson m = vm_page_lookup(object, pindex); 408f6b04d2bSDavid Greenman if( !m) { 409f6b04d2bSDavid Greenman UNLOCK_AND_DEALLOCATE; 410f6b04d2bSDavid Greenman goto RetryFault; 411f6b04d2bSDavid Greenman } 412f6b04d2bSDavid Greenman 41326f9a767SRodney W. Grimes hardfault++; 414df8bae1dSRodney W. Grimes break; 415df8bae1dSRodney W. Grimes } 416df8bae1dSRodney W. Grimes /* 4170d94caffSDavid Greenman * Remove the bogus page (which does not exist at this 4180d94caffSDavid Greenman * object/offset); before doing so, we must get back 4190d94caffSDavid Greenman * our object lock to preserve our invariant. 420df8bae1dSRodney W. Grimes * 42124a1cce3SDavid Greenman * Also wake up any other process that may want to bring 4220d94caffSDavid Greenman * in this page. 423df8bae1dSRodney W. Grimes * 4240d94caffSDavid Greenman * If this is the top-level object, we must leave the 42524a1cce3SDavid Greenman * busy page to prevent another process from rushing 4260d94caffSDavid Greenman * past us, and inserting the page in that object at 4270d94caffSDavid Greenman * the same time that we are. 428df8bae1dSRodney W. Grimes */ 42926f9a767SRodney W. Grimes 430a83c285cSDavid Greenman if (rv == VM_PAGER_ERROR) 431a83c285cSDavid Greenman printf("vm_fault: pager input (probably hardware) error, PID %d failure\n", 432a83c285cSDavid Greenman curproc->p_pid); 43326f9a767SRodney W. Grimes /* 434a83c285cSDavid Greenman * Data outside the range of the pager or an I/O error 43526f9a767SRodney W. Grimes */ 436a83c285cSDavid Greenman /* 4370d94caffSDavid Greenman * XXX - the check for kernel_map is a kludge to work 4380d94caffSDavid Greenman * around having the machine panic on a kernel space 4390d94caffSDavid Greenman * fault w/ I/O error. 440a83c285cSDavid Greenman */ 441a83c285cSDavid Greenman if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { 44226f9a767SRodney W. Grimes FREE_PAGE(m); 44326f9a767SRodney W. Grimes UNLOCK_AND_DEALLOCATE; 444a83c285cSDavid Greenman return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 44526f9a767SRodney W. Grimes } 446df8bae1dSRodney W. Grimes if (object != first_object) { 447df8bae1dSRodney W. Grimes FREE_PAGE(m); 44826f9a767SRodney W. Grimes /* 44926f9a767SRodney W. Grimes * XXX - we cannot just fall out at this 45026f9a767SRodney W. Grimes * point, m has been freed and is invalid! 45126f9a767SRodney W. Grimes */ 452df8bae1dSRodney W. Grimes } 453df8bae1dSRodney W. Grimes } 454df8bae1dSRodney W. Grimes /* 45524a1cce3SDavid Greenman * We get here if the object has default pager (or unwiring) or the 4560d94caffSDavid Greenman * pager doesn't have the page. 457df8bae1dSRodney W. Grimes */ 458df8bae1dSRodney W. Grimes if (object == first_object) 459df8bae1dSRodney W. Grimes first_m = m; 460df8bae1dSRodney W. Grimes 461df8bae1dSRodney W. Grimes /* 4620d94caffSDavid Greenman * Move on to the next object. Lock the next object before 4630d94caffSDavid Greenman * unlocking the current one. 464df8bae1dSRodney W. Grimes */ 465df8bae1dSRodney W. Grimes 466a316d390SJohn Dyson pindex += OFF_TO_IDX(object->backing_object_offset); 46724a1cce3SDavid Greenman next_object = object->backing_object; 468df8bae1dSRodney W. Grimes if (next_object == NULL) { 469df8bae1dSRodney W. Grimes /* 4700d94caffSDavid Greenman * If there's no object left, fill the page in the top 4710d94caffSDavid Greenman * object with zeros. 472df8bae1dSRodney W. Grimes */ 473df8bae1dSRodney W. Grimes if (object != first_object) { 474f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 475df8bae1dSRodney W. Grimes 476df8bae1dSRodney W. Grimes object = first_object; 477a316d390SJohn Dyson pindex = first_pindex; 478df8bae1dSRodney W. Grimes m = first_m; 479df8bae1dSRodney W. Grimes } 480df8bae1dSRodney W. Grimes first_m = NULL; 481df8bae1dSRodney W. Grimes 482f70f05f2SJohn Dyson if ((m->flags & PG_ZERO) == 0) 483df8bae1dSRodney W. Grimes vm_page_zero_fill(m); 484df8bae1dSRodney W. Grimes cnt.v_zfod++; 485df8bae1dSRodney W. Grimes break; 4860d94caffSDavid Greenman } else { 48726f9a767SRodney W. Grimes if (object != first_object) { 488f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 489c0503609SDavid Greenman } 490df8bae1dSRodney W. Grimes object = next_object; 491df8bae1dSRodney W. Grimes object->paging_in_progress++; 492df8bae1dSRodney W. Grimes } 493df8bae1dSRodney W. Grimes } 494df8bae1dSRodney W. Grimes 495f919ebdeSDavid Greenman if ((m->flags & PG_BUSY) == 0) 496f919ebdeSDavid Greenman panic("vm_fault: not busy after main loop"); 497df8bae1dSRodney W. Grimes 498df8bae1dSRodney W. Grimes /* 4990d94caffSDavid Greenman * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 500df8bae1dSRodney W. Grimes * is held.] 501df8bae1dSRodney W. Grimes */ 502df8bae1dSRodney W. Grimes 503df8bae1dSRodney W. Grimes old_m = m; /* save page that would be copied */ 504df8bae1dSRodney W. Grimes 505df8bae1dSRodney W. Grimes /* 5060d94caffSDavid Greenman * If the page is being written, but isn't already owned by the 5070d94caffSDavid Greenman * top-level object, we have to copy it into a new page owned by the 5080d94caffSDavid Greenman * top-level object. 509df8bae1dSRodney W. Grimes */ 510df8bae1dSRodney W. Grimes 511df8bae1dSRodney W. Grimes if (object != first_object) { 512df8bae1dSRodney W. Grimes /* 5130d94caffSDavid Greenman * We only really need to copy if we want to write it. 514df8bae1dSRodney W. Grimes */ 515df8bae1dSRodney W. Grimes 516df8bae1dSRodney W. Grimes if (fault_type & VM_PROT_WRITE) { 517df8bae1dSRodney W. Grimes 518df8bae1dSRodney W. Grimes /* 5190d94caffSDavid Greenman * We already have an empty page in first_object - use 5200d94caffSDavid Greenman * it. 521df8bae1dSRodney W. Grimes */ 522df8bae1dSRodney W. Grimes 523de5f6a77SJohn Dyson if (lookup_still_valid && 524de5f6a77SJohn Dyson /* 525de5f6a77SJohn Dyson * Only one shadow object 526de5f6a77SJohn Dyson */ 527de5f6a77SJohn Dyson (object->shadow_count == 1) && 528de5f6a77SJohn Dyson /* 529de5f6a77SJohn Dyson * No COW refs, except us 530de5f6a77SJohn Dyson */ 531de5f6a77SJohn Dyson (object->ref_count == 1) && 532de5f6a77SJohn Dyson /* 533de5f6a77SJohn Dyson * Noone else can look this object up 534de5f6a77SJohn Dyson */ 535de5f6a77SJohn Dyson (object->handle == NULL) && 536de5f6a77SJohn Dyson /* 537de5f6a77SJohn Dyson * No other ways to look the object up 538de5f6a77SJohn Dyson */ 539de5f6a77SJohn Dyson ((object->type == OBJT_DEFAULT) || 540de5f6a77SJohn Dyson (object->type == OBJT_SWAP)) && 541de5f6a77SJohn Dyson /* 542de5f6a77SJohn Dyson * We don't chase down the shadow chain 543de5f6a77SJohn Dyson */ 544de5f6a77SJohn Dyson (object == first_object->backing_object)) { 545df8bae1dSRodney W. Grimes 546df8bae1dSRodney W. Grimes /* 547de5f6a77SJohn Dyson * get rid of the unnecessary page 548df8bae1dSRodney W. Grimes */ 549de5f6a77SJohn Dyson vm_page_protect(first_m, VM_PROT_NONE); 550de5f6a77SJohn Dyson PAGE_WAKEUP(first_m); 551de5f6a77SJohn Dyson vm_page_free(first_m); 552de5f6a77SJohn Dyson /* 553de5f6a77SJohn Dyson * grab the page and put it into the process'es object 554de5f6a77SJohn Dyson */ 555de5f6a77SJohn Dyson vm_page_rename(m, first_object, first_pindex); 556de5f6a77SJohn Dyson first_m = m; 557de5f6a77SJohn Dyson m->dirty = VM_PAGE_BITS_ALL; 558de5f6a77SJohn Dyson m = NULL; 559de5f6a77SJohn Dyson ++vm_fault_copy_save_1; 560de5f6a77SJohn Dyson } else { 561de5f6a77SJohn Dyson /* 562de5f6a77SJohn Dyson * Oh, well, lets copy it. 563de5f6a77SJohn Dyson */ 564de5f6a77SJohn Dyson vm_page_copy(m, first_m); 565de5f6a77SJohn Dyson } 566df8bae1dSRodney W. Grimes 567de5f6a77SJohn Dyson if (lookup_still_valid && 568de5f6a77SJohn Dyson /* 569de5f6a77SJohn Dyson * make sure that we have two shadow objs 570de5f6a77SJohn Dyson */ 571de5f6a77SJohn Dyson (object->shadow_count == 2) && 572de5f6a77SJohn Dyson /* 573de5f6a77SJohn Dyson * And no COW refs -- note that there are sometimes 574de5f6a77SJohn Dyson * temp refs to objs, but ignore that case -- we just 575de5f6a77SJohn Dyson * punt. 576de5f6a77SJohn Dyson */ 577de5f6a77SJohn Dyson (object->ref_count == 2) && 578de5f6a77SJohn Dyson /* 579de5f6a77SJohn Dyson * Noone else can look us up 580de5f6a77SJohn Dyson */ 581de5f6a77SJohn Dyson (object->handle == NULL) && 582de5f6a77SJohn Dyson /* 583de5f6a77SJohn Dyson * Not something that can be referenced elsewhere 584de5f6a77SJohn Dyson */ 585de5f6a77SJohn Dyson ((object->type == OBJT_DEFAULT) || 586de5f6a77SJohn Dyson (object->type == OBJT_SWAP)) && 587de5f6a77SJohn Dyson /* 588de5f6a77SJohn Dyson * We don't bother chasing down object chain 589de5f6a77SJohn Dyson */ 590de5f6a77SJohn Dyson (object == first_object->backing_object)) { 591de5f6a77SJohn Dyson 592de5f6a77SJohn Dyson vm_object_t other_object; 593de5f6a77SJohn Dyson vm_pindex_t other_pindex, other_pindex_offset; 594de5f6a77SJohn Dyson vm_page_t tm; 595de5f6a77SJohn Dyson 596b18bfc3dSJohn Dyson other_object = TAILQ_FIRST(&object->shadow_head); 597de5f6a77SJohn Dyson if (other_object == first_object) 598b18bfc3dSJohn Dyson other_object = TAILQ_NEXT(other_object, shadow_list); 599de5f6a77SJohn Dyson if (!other_object) 600de5f6a77SJohn Dyson panic("vm_fault: other object missing"); 601de5f6a77SJohn Dyson if (other_object && 602de5f6a77SJohn Dyson (other_object->type == OBJT_DEFAULT) && 603de5f6a77SJohn Dyson (other_object->paging_in_progress == 0)) { 604de5f6a77SJohn Dyson other_pindex_offset = 605de5f6a77SJohn Dyson OFF_TO_IDX(other_object->backing_object_offset); 606de5f6a77SJohn Dyson if (pindex >= other_pindex_offset) { 607de5f6a77SJohn Dyson other_pindex = pindex - other_pindex_offset; 608de5f6a77SJohn Dyson /* 609de5f6a77SJohn Dyson * If the other object has the page, just free it. 610de5f6a77SJohn Dyson */ 611de5f6a77SJohn Dyson if ((tm = vm_page_lookup(other_object, other_pindex))) { 612de5f6a77SJohn Dyson if ((tm->flags & PG_BUSY) == 0 && 613de5f6a77SJohn Dyson tm->busy == 0 && 614de5f6a77SJohn Dyson tm->valid == VM_PAGE_BITS_ALL) { 615de5f6a77SJohn Dyson /* 616de5f6a77SJohn Dyson * get rid of the unnecessary page 617de5f6a77SJohn Dyson */ 618de5f6a77SJohn Dyson vm_page_protect(m, VM_PROT_NONE); 619de5f6a77SJohn Dyson PAGE_WAKEUP(m); 620de5f6a77SJohn Dyson vm_page_free(m); 621de5f6a77SJohn Dyson m = NULL; 622de5f6a77SJohn Dyson ++vm_fault_free_1; 623de5f6a77SJohn Dyson tm->dirty = VM_PAGE_BITS_ALL; 624de5f6a77SJohn Dyson first_m->dirty = VM_PAGE_BITS_ALL; 625de5f6a77SJohn Dyson } 626de5f6a77SJohn Dyson } else { 627de5f6a77SJohn Dyson /* 628de5f6a77SJohn Dyson * If the other object doesn't have the page, 629de5f6a77SJohn Dyson * then we move it there. 630de5f6a77SJohn Dyson */ 631de5f6a77SJohn Dyson vm_page_rename(m, other_object, other_pindex); 632de5f6a77SJohn Dyson m->dirty = VM_PAGE_BITS_ALL; 63365bc79b8SJohn Dyson m->valid = VM_PAGE_BITS_ALL; 634de5f6a77SJohn Dyson ++vm_fault_copy_save_2; 635de5f6a77SJohn Dyson } 636de5f6a77SJohn Dyson } 637de5f6a77SJohn Dyson } 638de5f6a77SJohn Dyson } 639de5f6a77SJohn Dyson 640de5f6a77SJohn Dyson if (m) { 641bd7e5f99SJohn Dyson if (m->queue != PQ_ACTIVE) 642df8bae1dSRodney W. Grimes vm_page_activate(m); 643df8bae1dSRodney W. Grimes /* 644df8bae1dSRodney W. Grimes * We no longer need the old page or object. 645df8bae1dSRodney W. Grimes */ 646df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 647de5f6a77SJohn Dyson } 648df8bae1dSRodney W. Grimes 649de5f6a77SJohn Dyson vm_object_pip_wakeup(object); 650df8bae1dSRodney W. Grimes /* 651df8bae1dSRodney W. Grimes * Only use the new page below... 652df8bae1dSRodney W. Grimes */ 653df8bae1dSRodney W. Grimes 654df8bae1dSRodney W. Grimes cnt.v_cow_faults++; 655df8bae1dSRodney W. Grimes m = first_m; 656df8bae1dSRodney W. Grimes object = first_object; 657a316d390SJohn Dyson pindex = first_pindex; 658df8bae1dSRodney W. Grimes 659df8bae1dSRodney W. Grimes /* 6600d94caffSDavid Greenman * Now that we've gotten the copy out of the way, 6610d94caffSDavid Greenman * let's try to collapse the top object. 66224a1cce3SDavid Greenman * 663df8bae1dSRodney W. Grimes * But we have to play ugly games with 664df8bae1dSRodney W. Grimes * paging_in_progress to do that... 665df8bae1dSRodney W. Grimes */ 666f919ebdeSDavid Greenman vm_object_pip_wakeup(object); 667df8bae1dSRodney W. Grimes vm_object_collapse(object); 668df8bae1dSRodney W. Grimes object->paging_in_progress++; 6690d94caffSDavid Greenman } else { 670df8bae1dSRodney W. Grimes prot &= ~VM_PROT_WRITE; 671df8bae1dSRodney W. Grimes } 672df8bae1dSRodney W. Grimes } 673df8bae1dSRodney W. Grimes 674df8bae1dSRodney W. Grimes /* 6750d94caffSDavid Greenman * We must verify that the maps have not changed since our last 6760d94caffSDavid Greenman * lookup. 677df8bae1dSRodney W. Grimes */ 678df8bae1dSRodney W. Grimes 679df8bae1dSRodney W. Grimes if (!lookup_still_valid) { 680df8bae1dSRodney W. Grimes vm_object_t retry_object; 681a316d390SJohn Dyson vm_pindex_t retry_pindex; 682df8bae1dSRodney W. Grimes vm_prot_t retry_prot; 683df8bae1dSRodney W. Grimes 684df8bae1dSRodney W. Grimes /* 6850d94caffSDavid Greenman * Since map entries may be pageable, make sure we can take a 6860d94caffSDavid Greenman * page fault on them. 687df8bae1dSRodney W. Grimes */ 688df8bae1dSRodney W. Grimes 689df8bae1dSRodney W. Grimes /* 69024a1cce3SDavid Greenman * To avoid trying to write_lock the map while another process 6910d94caffSDavid Greenman * has it read_locked (in vm_map_pageable), we do not try for 6920d94caffSDavid Greenman * write permission. If the page is still writable, we will 6930d94caffSDavid Greenman * get write permission. If it is not, or has been marked 6940d94caffSDavid Greenman * needs_copy, we enter the mapping without write permission, 6950d94caffSDavid Greenman * and will merely take another fault. 696df8bae1dSRodney W. Grimes */ 6970d94caffSDavid Greenman result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE, 698a316d390SJohn Dyson &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su); 699df8bae1dSRodney W. Grimes 700df8bae1dSRodney W. Grimes /* 7010d94caffSDavid Greenman * If we don't need the page any longer, put it on the active 7020d94caffSDavid Greenman * list (the easiest thing to do here). If no one needs it, 7030d94caffSDavid Greenman * pageout will grab it eventually. 704df8bae1dSRodney W. Grimes */ 705df8bae1dSRodney W. Grimes 706df8bae1dSRodney W. Grimes if (result != KERN_SUCCESS) { 707df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 708df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 709df8bae1dSRodney W. Grimes return (result); 710df8bae1dSRodney W. Grimes } 711df8bae1dSRodney W. Grimes lookup_still_valid = TRUE; 712df8bae1dSRodney W. Grimes 713df8bae1dSRodney W. Grimes if ((retry_object != first_object) || 714a316d390SJohn Dyson (retry_pindex != first_pindex)) { 715df8bae1dSRodney W. Grimes RELEASE_PAGE(m); 716df8bae1dSRodney W. Grimes UNLOCK_AND_DEALLOCATE; 717df8bae1dSRodney W. Grimes goto RetryFault; 718df8bae1dSRodney W. Grimes } 719df8bae1dSRodney W. Grimes /* 7200d94caffSDavid Greenman * Check whether the protection has changed or the object has 7210d94caffSDavid Greenman * been copied while we left the map unlocked. Changing from 7220d94caffSDavid Greenman * read to write permission is OK - we leave the page 7230d94caffSDavid Greenman * write-protected, and catch the write fault. Changing from 7240d94caffSDavid Greenman * write to read permission means that we can't mark the page 7250d94caffSDavid Greenman * write-enabled after all. 726df8bae1dSRodney W. Grimes */ 727df8bae1dSRodney W. Grimes prot &= retry_prot; 728df8bae1dSRodney W. Grimes } 729df8bae1dSRodney W. Grimes 730df8bae1dSRodney W. Grimes /* 7310d94caffSDavid Greenman * Put this page into the physical map. We had to do the unlock above 7320d94caffSDavid Greenman * because pmap_enter may cause other faults. We don't put the page 7330d94caffSDavid Greenman * back on the active queue until later so that the page-out daemon 7340d94caffSDavid Greenman * won't find us (yet). 735df8bae1dSRodney W. Grimes */ 736df8bae1dSRodney W. Grimes 7372ddba215SDavid Greenman if (prot & VM_PROT_WRITE) { 738f919ebdeSDavid Greenman m->flags |= PG_WRITEABLE; 739aef922f5SJohn Dyson m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY; 7402ddba215SDavid Greenman /* 7412ddba215SDavid Greenman * If the fault is a write, we know that this page is being 7422ddba215SDavid Greenman * written NOW. This will save on the pmap_is_modified() calls 7432ddba215SDavid Greenman * later. 7442ddba215SDavid Greenman */ 7452ddba215SDavid Greenman if (fault_type & VM_PROT_WRITE) { 7462ddba215SDavid Greenman m->dirty = VM_PAGE_BITS_ALL; 7472ddba215SDavid Greenman } 7482ddba215SDavid Greenman } 749f6b04d2bSDavid Greenman 75030dcfc09SJohn Dyson UNLOCK_THINGS; 75130dcfc09SJohn Dyson 752f70f05f2SJohn Dyson m->flags |= PG_MAPPED|PG_REFERENCED; 753ced399eeSJohn Dyson m->flags &= ~PG_ZERO; 75465bc79b8SJohn Dyson m->valid = VM_PAGE_BITS_ALL; 755f919ebdeSDavid Greenman 756df8bae1dSRodney W. Grimes pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); 757b18bfc3dSJohn Dyson if ((change_wiring == 0) && (wired == 0)) 7586d40c3d3SDavid Greenman pmap_prefault(map->pmap, vaddr, entry, first_object); 759df8bae1dSRodney W. Grimes 760df8bae1dSRodney W. Grimes /* 7610d94caffSDavid Greenman * If the page is not wired down, then put it where the pageout daemon 7620d94caffSDavid Greenman * can find it. 763df8bae1dSRodney W. Grimes */ 764df8bae1dSRodney W. Grimes if (change_wiring) { 765df8bae1dSRodney W. Grimes if (wired) 766df8bae1dSRodney W. Grimes vm_page_wire(m); 767df8bae1dSRodney W. Grimes else 768df8bae1dSRodney W. Grimes vm_page_unwire(m); 7690d94caffSDavid Greenman } else { 770bd7e5f99SJohn Dyson if (m->queue != PQ_ACTIVE) 771df8bae1dSRodney W. Grimes vm_page_activate(m); 77226f9a767SRodney W. Grimes } 77326f9a767SRodney W. Grimes 774a1f6d91cSDavid Greenman if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) { 77526f9a767SRodney W. Grimes if (hardfault) { 77626f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_majflt++; 77726f9a767SRodney W. Grimes } else { 77826f9a767SRodney W. Grimes curproc->p_stats->p_ru.ru_minflt++; 77926f9a767SRodney W. Grimes } 78026f9a767SRodney W. Grimes } 781df8bae1dSRodney W. Grimes 782df8bae1dSRodney W. Grimes /* 783df8bae1dSRodney W. Grimes * Unlock everything, and return 784df8bae1dSRodney W. Grimes */ 785df8bae1dSRodney W. Grimes 786df8bae1dSRodney W. Grimes PAGE_WAKEUP(m); 78730dcfc09SJohn Dyson vm_object_deallocate(first_object); 788df8bae1dSRodney W. Grimes 789df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 790df8bae1dSRodney W. Grimes 791df8bae1dSRodney W. Grimes } 792df8bae1dSRodney W. Grimes 793df8bae1dSRodney W. Grimes /* 794df8bae1dSRodney W. Grimes * vm_fault_wire: 795df8bae1dSRodney W. Grimes * 796df8bae1dSRodney W. Grimes * Wire down a range of virtual addresses in a map. 797df8bae1dSRodney W. Grimes */ 798df8bae1dSRodney W. Grimes int 799df8bae1dSRodney W. Grimes vm_fault_wire(map, start, end) 800df8bae1dSRodney W. Grimes vm_map_t map; 801df8bae1dSRodney W. Grimes vm_offset_t start, end; 802df8bae1dSRodney W. Grimes { 80326f9a767SRodney W. Grimes 804df8bae1dSRodney W. Grimes register vm_offset_t va; 805df8bae1dSRodney W. Grimes register pmap_t pmap; 806df8bae1dSRodney W. Grimes int rv; 807df8bae1dSRodney W. Grimes 808df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 809df8bae1dSRodney W. Grimes 810df8bae1dSRodney W. Grimes /* 8110d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 8120d94caffSDavid Greenman * not fault, so that page tables and such can be locked down as well. 813df8bae1dSRodney W. Grimes */ 814df8bae1dSRodney W. Grimes 815df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, FALSE); 816df8bae1dSRodney W. Grimes 817df8bae1dSRodney W. Grimes /* 8180d94caffSDavid Greenman * We simulate a fault to get the page and enter it in the physical 8190d94caffSDavid Greenman * map. 820df8bae1dSRodney W. Grimes */ 821df8bae1dSRodney W. Grimes 822df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 8236d40c3d3SDavid Greenman 824a1f6d91cSDavid Greenman while( curproc != pageproc && 825b18bfc3dSJohn Dyson (cnt.v_free_count <= cnt.v_pageout_free_min)) { 8266d40c3d3SDavid Greenman VM_WAIT; 827b18bfc3dSJohn Dyson } 8286d40c3d3SDavid Greenman 829a1f6d91cSDavid Greenman rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE); 830df8bae1dSRodney W. Grimes if (rv) { 831df8bae1dSRodney W. Grimes if (va != start) 832df8bae1dSRodney W. Grimes vm_fault_unwire(map, start, va); 833df8bae1dSRodney W. Grimes return (rv); 834df8bae1dSRodney W. Grimes } 835df8bae1dSRodney W. Grimes } 836df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 837df8bae1dSRodney W. Grimes } 838df8bae1dSRodney W. Grimes 839df8bae1dSRodney W. Grimes 840df8bae1dSRodney W. Grimes /* 841df8bae1dSRodney W. Grimes * vm_fault_unwire: 842df8bae1dSRodney W. Grimes * 843df8bae1dSRodney W. Grimes * Unwire a range of virtual addresses in a map. 844df8bae1dSRodney W. Grimes */ 84526f9a767SRodney W. Grimes void 84626f9a767SRodney W. Grimes vm_fault_unwire(map, start, end) 847df8bae1dSRodney W. Grimes vm_map_t map; 848df8bae1dSRodney W. Grimes vm_offset_t start, end; 849df8bae1dSRodney W. Grimes { 850df8bae1dSRodney W. Grimes 851df8bae1dSRodney W. Grimes register vm_offset_t va, pa; 852df8bae1dSRodney W. Grimes register pmap_t pmap; 853df8bae1dSRodney W. Grimes 854df8bae1dSRodney W. Grimes pmap = vm_map_pmap(map); 855df8bae1dSRodney W. Grimes 856df8bae1dSRodney W. Grimes /* 8570d94caffSDavid Greenman * Since the pages are wired down, we must be able to get their 8580d94caffSDavid Greenman * mappings from the physical map system. 859df8bae1dSRodney W. Grimes */ 860df8bae1dSRodney W. Grimes 861df8bae1dSRodney W. Grimes for (va = start; va < end; va += PAGE_SIZE) { 862df8bae1dSRodney W. Grimes pa = pmap_extract(pmap, va); 863b18bfc3dSJohn Dyson if (pa != (vm_offset_t) 0) { 864df8bae1dSRodney W. Grimes pmap_change_wiring(pmap, va, FALSE); 865df8bae1dSRodney W. Grimes vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 866df8bae1dSRodney W. Grimes } 867b18bfc3dSJohn Dyson } 868df8bae1dSRodney W. Grimes 869df8bae1dSRodney W. Grimes /* 8700d94caffSDavid Greenman * Inform the physical mapping system that the range of addresses may 8710d94caffSDavid Greenman * fault, so that page tables and such may be unwired themselves. 872df8bae1dSRodney W. Grimes */ 873df8bae1dSRodney W. Grimes 874df8bae1dSRodney W. Grimes pmap_pageable(pmap, start, end, TRUE); 875df8bae1dSRodney W. Grimes 876df8bae1dSRodney W. Grimes } 877df8bae1dSRodney W. Grimes 878df8bae1dSRodney W. Grimes /* 879df8bae1dSRodney W. Grimes * Routine: 880df8bae1dSRodney W. Grimes * vm_fault_copy_entry 881df8bae1dSRodney W. Grimes * Function: 882df8bae1dSRodney W. Grimes * Copy all of the pages from a wired-down map entry to another. 883df8bae1dSRodney W. Grimes * 884df8bae1dSRodney W. Grimes * In/out conditions: 885df8bae1dSRodney W. Grimes * The source and destination maps must be locked for write. 886df8bae1dSRodney W. Grimes * The source map entry must be wired down (or be a sharing map 887df8bae1dSRodney W. Grimes * entry corresponding to a main map entry that is wired down). 888df8bae1dSRodney W. Grimes */ 889df8bae1dSRodney W. Grimes 89026f9a767SRodney W. Grimes void 89126f9a767SRodney W. Grimes vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 892df8bae1dSRodney W. Grimes vm_map_t dst_map; 893df8bae1dSRodney W. Grimes vm_map_t src_map; 894df8bae1dSRodney W. Grimes vm_map_entry_t dst_entry; 895df8bae1dSRodney W. Grimes vm_map_entry_t src_entry; 896df8bae1dSRodney W. Grimes { 897df8bae1dSRodney W. Grimes vm_object_t dst_object; 898df8bae1dSRodney W. Grimes vm_object_t src_object; 899a316d390SJohn Dyson vm_ooffset_t dst_offset; 900a316d390SJohn Dyson vm_ooffset_t src_offset; 901df8bae1dSRodney W. Grimes vm_prot_t prot; 902df8bae1dSRodney W. Grimes vm_offset_t vaddr; 903df8bae1dSRodney W. Grimes vm_page_t dst_m; 904df8bae1dSRodney W. Grimes vm_page_t src_m; 905df8bae1dSRodney W. Grimes 906df8bae1dSRodney W. Grimes #ifdef lint 907df8bae1dSRodney W. Grimes src_map++; 9080d94caffSDavid Greenman #endif /* lint */ 909df8bae1dSRodney W. Grimes 910df8bae1dSRodney W. Grimes src_object = src_entry->object.vm_object; 911df8bae1dSRodney W. Grimes src_offset = src_entry->offset; 912df8bae1dSRodney W. Grimes 913df8bae1dSRodney W. Grimes /* 9140d94caffSDavid Greenman * Create the top-level object for the destination entry. (Doesn't 9150d94caffSDavid Greenman * actually shadow anything - we copy the pages directly.) 916df8bae1dSRodney W. Grimes */ 91724a1cce3SDavid Greenman dst_object = vm_object_allocate(OBJT_DEFAULT, 918a316d390SJohn Dyson (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start)); 919df8bae1dSRodney W. Grimes 920df8bae1dSRodney W. Grimes dst_entry->object.vm_object = dst_object; 921df8bae1dSRodney W. Grimes dst_entry->offset = 0; 922df8bae1dSRodney W. Grimes 923df8bae1dSRodney W. Grimes prot = dst_entry->max_protection; 924df8bae1dSRodney W. Grimes 925df8bae1dSRodney W. Grimes /* 9260d94caffSDavid Greenman * Loop through all of the pages in the entry's range, copying each 9270d94caffSDavid Greenman * one from the source object (it should be there) to the destination 9280d94caffSDavid Greenman * object. 929df8bae1dSRodney W. Grimes */ 930df8bae1dSRodney W. Grimes for (vaddr = dst_entry->start, dst_offset = 0; 931df8bae1dSRodney W. Grimes vaddr < dst_entry->end; 932df8bae1dSRodney W. Grimes vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 933df8bae1dSRodney W. Grimes 934df8bae1dSRodney W. Grimes /* 935df8bae1dSRodney W. Grimes * Allocate a page in the destination object 936df8bae1dSRodney W. Grimes */ 937df8bae1dSRodney W. Grimes do { 938a316d390SJohn Dyson dst_m = vm_page_alloc(dst_object, 939a316d390SJohn Dyson OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL); 940df8bae1dSRodney W. Grimes if (dst_m == NULL) { 941df8bae1dSRodney W. Grimes VM_WAIT; 942df8bae1dSRodney W. Grimes } 943df8bae1dSRodney W. Grimes } while (dst_m == NULL); 944df8bae1dSRodney W. Grimes 945df8bae1dSRodney W. Grimes /* 946df8bae1dSRodney W. Grimes * Find the page in the source object, and copy it in. 9470d94caffSDavid Greenman * (Because the source is wired down, the page will be in 9480d94caffSDavid Greenman * memory.) 949df8bae1dSRodney W. Grimes */ 950a316d390SJohn Dyson src_m = vm_page_lookup(src_object, 951a316d390SJohn Dyson OFF_TO_IDX(dst_offset + src_offset)); 952df8bae1dSRodney W. Grimes if (src_m == NULL) 953df8bae1dSRodney W. Grimes panic("vm_fault_copy_wired: page missing"); 954df8bae1dSRodney W. Grimes 955df8bae1dSRodney W. Grimes vm_page_copy(src_m, dst_m); 956df8bae1dSRodney W. Grimes 957df8bae1dSRodney W. Grimes /* 958df8bae1dSRodney W. Grimes * Enter it in the pmap... 959df8bae1dSRodney W. Grimes */ 960df8bae1dSRodney W. Grimes 961ced399eeSJohn Dyson dst_m->flags |= PG_WRITEABLE|PG_MAPPED; 962ccbb2f72SJohn Dyson dst_m->flags &= ~PG_ZERO; 963df8bae1dSRodney W. Grimes pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 964df8bae1dSRodney W. Grimes prot, FALSE); 965df8bae1dSRodney W. Grimes 966df8bae1dSRodney W. Grimes /* 967df8bae1dSRodney W. Grimes * Mark it no longer busy, and put it on the active list. 968df8bae1dSRodney W. Grimes */ 969df8bae1dSRodney W. Grimes vm_page_activate(dst_m); 970df8bae1dSRodney W. Grimes PAGE_WAKEUP(dst_m); 971df8bae1dSRodney W. Grimes } 972df8bae1dSRodney W. Grimes } 97326f9a767SRodney W. Grimes 97426f9a767SRodney W. Grimes 97526f9a767SRodney W. Grimes /* 97626f9a767SRodney W. Grimes * This routine checks around the requested page for other pages that 97722ba64e8SJohn Dyson * might be able to be faulted in. This routine brackets the viable 97822ba64e8SJohn Dyson * pages for the pages to be paged in. 97926f9a767SRodney W. Grimes * 98026f9a767SRodney W. Grimes * Inputs: 98122ba64e8SJohn Dyson * m, rbehind, rahead 98226f9a767SRodney W. Grimes * 98326f9a767SRodney W. Grimes * Outputs: 98426f9a767SRodney W. Grimes * marray (array of vm_page_t), reqpage (index of requested page) 98526f9a767SRodney W. Grimes * 98626f9a767SRodney W. Grimes * Return value: 98726f9a767SRodney W. Grimes * number of pages in marray 98826f9a767SRodney W. Grimes */ 98926f9a767SRodney W. Grimes int 99022ba64e8SJohn Dyson vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 99126f9a767SRodney W. Grimes vm_page_t m; 99226f9a767SRodney W. Grimes int rbehind; 99322ba64e8SJohn Dyson int rahead; 99426f9a767SRodney W. Grimes vm_page_t *marray; 99526f9a767SRodney W. Grimes int *reqpage; 99626f9a767SRodney W. Grimes { 99726f9a767SRodney W. Grimes int i; 99826f9a767SRodney W. Grimes vm_object_t object; 999a316d390SJohn Dyson vm_pindex_t pindex, startpindex, endpindex, tpindex; 1000a316d390SJohn Dyson vm_offset_t size; 100126f9a767SRodney W. Grimes vm_page_t rtm; 100226f9a767SRodney W. Grimes int treqpage; 1003170db9c6SJohn Dyson int cbehind, cahead; 100426f9a767SRodney W. Grimes 100526f9a767SRodney W. Grimes object = m->object; 1006a316d390SJohn Dyson pindex = m->pindex; 100726f9a767SRodney W. Grimes 100826f9a767SRodney W. Grimes /* 100926f9a767SRodney W. Grimes * if the requested page is not available, then give up now 101026f9a767SRodney W. Grimes */ 101126f9a767SRodney W. Grimes 1012170db9c6SJohn Dyson if (!vm_pager_has_page(object, 1013a316d390SJohn Dyson OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) 101426f9a767SRodney W. Grimes return 0; 101526f9a767SRodney W. Grimes 101622ba64e8SJohn Dyson if ((cbehind == 0) && (cahead == 0)) { 101722ba64e8SJohn Dyson *reqpage = 0; 101822ba64e8SJohn Dyson marray[0] = m; 101922ba64e8SJohn Dyson return 1; 1020170db9c6SJohn Dyson } 102122ba64e8SJohn Dyson 102222ba64e8SJohn Dyson if (rahead > cahead) { 102322ba64e8SJohn Dyson rahead = cahead; 102422ba64e8SJohn Dyson } 102522ba64e8SJohn Dyson 1026170db9c6SJohn Dyson if (rbehind > cbehind) { 1027170db9c6SJohn Dyson rbehind = cbehind; 1028170db9c6SJohn Dyson } 1029170db9c6SJohn Dyson 103026f9a767SRodney W. Grimes /* 103126f9a767SRodney W. Grimes * try to do any readahead that we might have free pages for. 103226f9a767SRodney W. Grimes */ 1033ccbb2f72SJohn Dyson if ((rahead + rbehind) > 103422ba64e8SJohn Dyson ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) { 1035f919ebdeSDavid Greenman pagedaemon_wakeup(); 103626f9a767SRodney W. Grimes *reqpage = 0; 103726f9a767SRodney W. Grimes marray[0] = m; 103826f9a767SRodney W. Grimes return 1; 103926f9a767SRodney W. Grimes } 104022ba64e8SJohn Dyson 104126f9a767SRodney W. Grimes /* 10420d94caffSDavid Greenman * scan backward for the read behind pages -- in memory or on disk not 10430d94caffSDavid Greenman * in same object 104426f9a767SRodney W. Grimes */ 1045a316d390SJohn Dyson tpindex = pindex - 1; 1046a316d390SJohn Dyson if (tpindex < pindex) { 1047a316d390SJohn Dyson if (rbehind > pindex) 1048a316d390SJohn Dyson rbehind = pindex; 1049a316d390SJohn Dyson startpindex = pindex - rbehind; 1050a316d390SJohn Dyson while (tpindex >= startpindex) { 1051a316d390SJohn Dyson if (vm_page_lookup( object, tpindex)) { 1052a316d390SJohn Dyson startpindex = tpindex + 1; 105326f9a767SRodney W. Grimes break; 105426f9a767SRodney W. Grimes } 1055a316d390SJohn Dyson if (tpindex == 0) 105626f9a767SRodney W. Grimes break; 1057a316d390SJohn Dyson tpindex -= 1; 105826f9a767SRodney W. Grimes } 1059317205caSDavid Greenman } else { 1060a316d390SJohn Dyson startpindex = pindex; 1061317205caSDavid Greenman } 106226f9a767SRodney W. Grimes 106326f9a767SRodney W. Grimes /* 10640d94caffSDavid Greenman * scan forward for the read ahead pages -- in memory or on disk not 10650d94caffSDavid Greenman * in same object 106626f9a767SRodney W. Grimes */ 1067a316d390SJohn Dyson tpindex = pindex + 1; 1068a316d390SJohn Dyson endpindex = pindex + (rahead + 1); 1069a316d390SJohn Dyson if (endpindex > object->size) 1070a316d390SJohn Dyson endpindex = object->size; 1071a316d390SJohn Dyson while (tpindex < endpindex) { 1072a316d390SJohn Dyson if ( vm_page_lookup(object, tpindex)) { 107326f9a767SRodney W. Grimes break; 107426f9a767SRodney W. Grimes } 1075a316d390SJohn Dyson tpindex += 1; 107626f9a767SRodney W. Grimes } 1077a316d390SJohn Dyson endpindex = tpindex; 107826f9a767SRodney W. Grimes 107926f9a767SRodney W. Grimes /* calculate number of bytes of pages */ 1080a316d390SJohn Dyson size = endpindex - startpindex; 108126f9a767SRodney W. Grimes 108226f9a767SRodney W. Grimes /* calculate the page offset of the required page */ 1083a316d390SJohn Dyson treqpage = pindex - startpindex; 108426f9a767SRodney W. Grimes 108526f9a767SRodney W. Grimes /* see if we have space (again) */ 108622ba64e8SJohn Dyson if ((cnt.v_free_count + cnt.v_cache_count) > 108722ba64e8SJohn Dyson (cnt.v_free_reserved + size)) { 108826f9a767SRodney W. Grimes /* 108926f9a767SRodney W. Grimes * get our pages and don't block for them 109026f9a767SRodney W. Grimes */ 109126f9a767SRodney W. Grimes for (i = 0; i < size; i++) { 1092170db9c6SJohn Dyson if (i != treqpage) { 1093ccbb2f72SJohn Dyson rtm = vm_page_alloc(object, 1094a316d390SJohn Dyson startpindex + i, 109522ba64e8SJohn Dyson VM_ALLOC_NORMAL); 1096ccbb2f72SJohn Dyson if (rtm == NULL) { 1097170db9c6SJohn Dyson if (i < treqpage) { 1098ccbb2f72SJohn Dyson int j; 1099ccbb2f72SJohn Dyson for (j = 0; j < i; j++) { 110021bf3904SJohn Dyson FREE_PAGE(marray[j]); 110126f9a767SRodney W. Grimes } 110226f9a767SRodney W. Grimes *reqpage = 0; 110326f9a767SRodney W. Grimes marray[0] = m; 110426f9a767SRodney W. Grimes return 1; 1105ccbb2f72SJohn Dyson } else { 1106ccbb2f72SJohn Dyson size = i; 1107ccbb2f72SJohn Dyson *reqpage = treqpage; 1108ccbb2f72SJohn Dyson return size; 1109ccbb2f72SJohn Dyson } 1110ccbb2f72SJohn Dyson } 1111ccbb2f72SJohn Dyson marray[i] = rtm; 1112ccbb2f72SJohn Dyson } else { 1113ccbb2f72SJohn Dyson marray[i] = m; 1114ccbb2f72SJohn Dyson } 111526f9a767SRodney W. Grimes } 1116170db9c6SJohn Dyson 111726f9a767SRodney W. Grimes *reqpage = treqpage; 111826f9a767SRodney W. Grimes return size; 111926f9a767SRodney W. Grimes } 112026f9a767SRodney W. Grimes *reqpage = 0; 112126f9a767SRodney W. Grimes marray[0] = m; 112226f9a767SRodney W. Grimes return 1; 112326f9a767SRodney W. Grimes } 1124