160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 6df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 17df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 18df8bae1dSRodney W. Grimes * without specific prior written permission. 19df8bae1dSRodney W. Grimes * 20df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30df8bae1dSRodney W. Grimes * SUCH DAMAGE. 31df8bae1dSRodney W. Grimes * 323c4dd356SDavid Greenman * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * 35df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 36df8bae1dSRodney W. Grimes * All rights reserved. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 39df8bae1dSRodney W. Grimes * 40df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 41df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 42df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 43df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 44df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 45df8bae1dSRodney W. Grimes * 46df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 47df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 48df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 49df8bae1dSRodney W. Grimes * 50df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 53df8bae1dSRodney W. Grimes * School of Computer Science 54df8bae1dSRodney W. Grimes * Carnegie Mellon University 55df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 56df8bae1dSRodney W. Grimes * 57df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 58df8bae1dSRodney W. Grimes * rights to redistribute these changes. 59df8bae1dSRodney W. Grimes */ 60df8bae1dSRodney W. Grimes 61df8bae1dSRodney W. Grimes /* 62df8bae1dSRodney W. Grimes * Virtual memory mapping module. 63df8bae1dSRodney W. Grimes */ 64df8bae1dSRodney W. Grimes 65874651b1SDavid E. O'Brien #include <sys/cdefs.h> 66874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 67874651b1SDavid E. O'Brien 68df8bae1dSRodney W. Grimes #include <sys/param.h> 69df8bae1dSRodney W. Grimes #include <sys/systm.h> 709a6d144fSKonstantin Belousov #include <sys/kernel.h> 7161d80e90SJohn Baldwin #include <sys/ktr.h> 72fb919e4dSMark Murray #include <sys/lock.h> 73fb919e4dSMark Murray #include <sys/mutex.h> 74b5e8ce9fSBruce Evans #include <sys/proc.h> 75efeaf95aSDavid Greenman #include <sys/vmmeter.h> 76867a482dSJohn Dyson #include <sys/mman.h> 771efb74fbSJohn Dyson #include <sys/vnode.h> 781ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 792267af78SJulian Elischer #include <sys/resourcevar.h> 8089f6b863SAttilio Rao #include <sys/rwlock.h> 813fde38dfSMike Silbersack #include <sys/file.h> 829a6d144fSKonstantin Belousov #include <sys/sysctl.h> 8305ba50f5SJake Burkholder #include <sys/sysent.h> 843db161e0SMatthew Dillon #include <sys/shm.h> 85df8bae1dSRodney W. Grimes 86df8bae1dSRodney W. Grimes #include <vm/vm.h> 87efeaf95aSDavid Greenman #include <vm/vm_param.h> 88efeaf95aSDavid Greenman #include <vm/pmap.h> 89efeaf95aSDavid Greenman #include <vm/vm_map.h> 90df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 91df8bae1dSRodney W. Grimes #include <vm/vm_object.h> 9247221757SJohn Dyson #include <vm/vm_pager.h> 9326f9a767SRodney W. Grimes #include <vm/vm_kern.h> 94efeaf95aSDavid Greenman #include <vm/vm_extern.h> 9584110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 9621cd6e62SSeigo Tanimura #include <vm/swap_pager.h> 97670d17b5SJeff Roberson #include <vm/uma.h> 98df8bae1dSRodney W. Grimes 99df8bae1dSRodney W. Grimes /* 100df8bae1dSRodney W. Grimes * Virtual memory maps provide for the mapping, protection, 101df8bae1dSRodney W. Grimes * and sharing of virtual memory objects. In addition, 102df8bae1dSRodney W. Grimes * this module provides for an efficient virtual copy of 103df8bae1dSRodney W. Grimes * memory from one map to another. 104df8bae1dSRodney W. Grimes * 105df8bae1dSRodney W. Grimes * Synchronization is required prior to most operations. 106df8bae1dSRodney W. Grimes * 107df8bae1dSRodney W. Grimes * Maps consist of an ordered doubly-linked list of simple 108e2abaaaaSAlan Cox * entries; a self-adjusting binary search tree of these 109e2abaaaaSAlan Cox * entries is used to speed up lookups. 110df8bae1dSRodney W. Grimes * 111956f3135SPhilippe Charnier * Since portions of maps are specified by start/end addresses, 112df8bae1dSRodney W. Grimes * which may not align with existing map entries, all 113df8bae1dSRodney W. Grimes * routines merely "clip" entries to these start/end values. 114df8bae1dSRodney W. Grimes * [That is, an entry is split into two, bordering at a 115df8bae1dSRodney W. Grimes * start or end value.] Note that these clippings may not 116df8bae1dSRodney W. Grimes * always be necessary (as the two resulting entries are then 117df8bae1dSRodney W. Grimes * not changed); however, the clipping is done for convenience. 118df8bae1dSRodney W. Grimes * 119df8bae1dSRodney W. Grimes * As mentioned above, virtual copy operations are performed 120ad5fca3bSAlan Cox * by copying VM object references from one map to 121df8bae1dSRodney W. Grimes * another, and then marking both regions as copy-on-write. 122df8bae1dSRodney W. Grimes */ 123df8bae1dSRodney W. Grimes 1243a92e5d5SAlan Cox static struct mtx map_sleep_mtx; 1258355f576SJeff Roberson static uma_zone_t mapentzone; 1268355f576SJeff Roberson static uma_zone_t kmapentzone; 1278355f576SJeff Roberson static uma_zone_t mapzone; 1288355f576SJeff Roberson static uma_zone_t vmspace_zone; 129b23f72e9SBrian Feldman static int vmspace_zinit(void *mem, int size, int flags); 130b23f72e9SBrian Feldman static int vm_map_zinit(void *mem, int ize, int flags); 13192351f16SAlan Cox static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, 13292351f16SAlan Cox vm_offset_t max); 1330b367bd8SKonstantin Belousov static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); 134655c3490SKonstantin Belousov static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); 1358355f576SJeff Roberson #ifdef INVARIANTS 1368355f576SJeff Roberson static void vm_map_zdtor(void *mem, int size, void *arg); 1378355f576SJeff Roberson static void vmspace_zdtor(void *mem, int size, void *arg); 1388355f576SJeff Roberson #endif 139b18bfc3dSJohn Dyson 140ef694c1aSEdward Tomasz Napierala #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ 141ef694c1aSEdward Tomasz Napierala ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ 1423364c323SKonstantin Belousov !((e)->eflags & MAP_ENTRY_NEEDS_COPY))) 1433364c323SKonstantin Belousov 14457051fdcSTor Egge /* 14557051fdcSTor Egge * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type 14657051fdcSTor Egge * stable. 14757051fdcSTor Egge */ 14857051fdcSTor Egge #define PROC_VMSPACE_LOCK(p) do { } while (0) 14957051fdcSTor Egge #define PROC_VMSPACE_UNLOCK(p) do { } while (0) 15057051fdcSTor Egge 151d239bd3cSKonstantin Belousov /* 152d239bd3cSKonstantin Belousov * VM_MAP_RANGE_CHECK: [ internal use only ] 153d239bd3cSKonstantin Belousov * 154d239bd3cSKonstantin Belousov * Asserts that the starting and ending region 155d239bd3cSKonstantin Belousov * addresses fall within the valid range of the map. 156d239bd3cSKonstantin Belousov */ 157d239bd3cSKonstantin Belousov #define VM_MAP_RANGE_CHECK(map, start, end) \ 158d239bd3cSKonstantin Belousov { \ 159d239bd3cSKonstantin Belousov if (start < vm_map_min(map)) \ 160d239bd3cSKonstantin Belousov start = vm_map_min(map); \ 161d239bd3cSKonstantin Belousov if (end > vm_map_max(map)) \ 162d239bd3cSKonstantin Belousov end = vm_map_max(map); \ 163d239bd3cSKonstantin Belousov if (start > end) \ 164d239bd3cSKonstantin Belousov start = end; \ 165d239bd3cSKonstantin Belousov } 166d239bd3cSKonstantin Belousov 1676fecb26bSKonstantin Belousov /* 1686fecb26bSKonstantin Belousov * vm_map_startup: 1696fecb26bSKonstantin Belousov * 1706fecb26bSKonstantin Belousov * Initialize the vm_map module. Must be called before 1716fecb26bSKonstantin Belousov * any other vm_map routines. 1726fecb26bSKonstantin Belousov * 1736fecb26bSKonstantin Belousov * Map and entry structures are allocated from the general 1746fecb26bSKonstantin Belousov * purpose memory pool with some exceptions: 1756fecb26bSKonstantin Belousov * 1766fecb26bSKonstantin Belousov * - The kernel map and kmem submap are allocated statically. 1776fecb26bSKonstantin Belousov * - Kernel map entries are allocated out of a static pool. 1786fecb26bSKonstantin Belousov * 1796fecb26bSKonstantin Belousov * These restrictions are necessary since malloc() uses the 1806fecb26bSKonstantin Belousov * maps and requires map entries. 1816fecb26bSKonstantin Belousov */ 1826fecb26bSKonstantin Belousov 1830d94caffSDavid Greenman void 1841b40f8c0SMatthew Dillon vm_map_startup(void) 185df8bae1dSRodney W. Grimes { 1863a92e5d5SAlan Cox mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF); 1878355f576SJeff Roberson mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, 1888355f576SJeff Roberson #ifdef INVARIANTS 1898355f576SJeff Roberson vm_map_zdtor, 1908355f576SJeff Roberson #else 1918355f576SJeff Roberson NULL, 1928355f576SJeff Roberson #endif 193f872f6eaSAlan Cox vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1948355f576SJeff Roberson uma_prealloc(mapzone, MAX_KMAP); 195670d17b5SJeff Roberson kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 19618aa2de5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 19718aa2de5SJeff Roberson UMA_ZONE_MTXCLASS | UMA_ZONE_VM); 198670d17b5SJeff Roberson mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 199670d17b5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2005df87b21SJeff Roberson vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 2015df87b21SJeff Roberson #ifdef INVARIANTS 2025df87b21SJeff Roberson vmspace_zdtor, 2035df87b21SJeff Roberson #else 2045df87b21SJeff Roberson NULL, 2055df87b21SJeff Roberson #endif 206f872f6eaSAlan Cox vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2078355f576SJeff Roberson } 2088355f576SJeff Roberson 209b23f72e9SBrian Feldman static int 210b23f72e9SBrian Feldman vmspace_zinit(void *mem, int size, int flags) 2118355f576SJeff Roberson { 2128355f576SJeff Roberson struct vmspace *vm; 2138355f576SJeff Roberson 2148355f576SJeff Roberson vm = (struct vmspace *)mem; 2158355f576SJeff Roberson 21689b57fcfSKonstantin Belousov vm->vm_map.pmap = NULL; 217b23f72e9SBrian Feldman (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags); 218e68c64f0SKonstantin Belousov PMAP_LOCK_INIT(vmspace_pmap(vm)); 219b23f72e9SBrian Feldman return (0); 2208355f576SJeff Roberson } 2218355f576SJeff Roberson 222b23f72e9SBrian Feldman static int 223b23f72e9SBrian Feldman vm_map_zinit(void *mem, int size, int flags) 2248355f576SJeff Roberson { 2258355f576SJeff Roberson vm_map_t map; 2268355f576SJeff Roberson 2278355f576SJeff Roberson map = (vm_map_t)mem; 228763d9566STim Kientzle memset(map, 0, sizeof(*map)); 229e30df26eSAlan Cox mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK); 230e30df26eSAlan Cox sx_init(&map->lock, "vm map (user)"); 231b23f72e9SBrian Feldman return (0); 2328355f576SJeff Roberson } 2338355f576SJeff Roberson 2348355f576SJeff Roberson #ifdef INVARIANTS 2358355f576SJeff Roberson static void 2368355f576SJeff Roberson vmspace_zdtor(void *mem, int size, void *arg) 2378355f576SJeff Roberson { 2388355f576SJeff Roberson struct vmspace *vm; 2398355f576SJeff Roberson 2408355f576SJeff Roberson vm = (struct vmspace *)mem; 2418355f576SJeff Roberson 2428355f576SJeff Roberson vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); 2438355f576SJeff Roberson } 2448355f576SJeff Roberson static void 2458355f576SJeff Roberson vm_map_zdtor(void *mem, int size, void *arg) 2468355f576SJeff Roberson { 2478355f576SJeff Roberson vm_map_t map; 2488355f576SJeff Roberson 2498355f576SJeff Roberson map = (vm_map_t)mem; 2508355f576SJeff Roberson KASSERT(map->nentries == 0, 2518355f576SJeff Roberson ("map %p nentries == %d on free.", 2528355f576SJeff Roberson map, map->nentries)); 2538355f576SJeff Roberson KASSERT(map->size == 0, 2548355f576SJeff Roberson ("map %p size == %lu on free.", 2559eb6e519SJeff Roberson map, (unsigned long)map->size)); 2568355f576SJeff Roberson } 2578355f576SJeff Roberson #endif /* INVARIANTS */ 2588355f576SJeff Roberson 259df8bae1dSRodney W. Grimes /* 260df8bae1dSRodney W. Grimes * Allocate a vmspace structure, including a vm_map and pmap, 261df8bae1dSRodney W. Grimes * and initialize those structures. The refcnt is set to 1. 26274d1d2b7SNeel Natu * 26374d1d2b7SNeel Natu * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit(). 264df8bae1dSRodney W. Grimes */ 265df8bae1dSRodney W. Grimes struct vmspace * 26674d1d2b7SNeel Natu vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) 267df8bae1dSRodney W. Grimes { 268c0877f10SJohn Dyson struct vmspace *vm; 2690d94caffSDavid Greenman 270a163d034SWarner Losh vm = uma_zalloc(vmspace_zone, M_WAITOK); 27174d1d2b7SNeel Natu 27274d1d2b7SNeel Natu KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL")); 27374d1d2b7SNeel Natu 27474d1d2b7SNeel Natu if (pinit == NULL) 27574d1d2b7SNeel Natu pinit = &pmap_pinit; 27674d1d2b7SNeel Natu 27774d1d2b7SNeel Natu if (!pinit(vmspace_pmap(vm))) { 27889b57fcfSKonstantin Belousov uma_zfree(vmspace_zone, vm); 27989b57fcfSKonstantin Belousov return (NULL); 28089b57fcfSKonstantin Belousov } 28121c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_alloc: %p", vm); 28292351f16SAlan Cox _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); 283df8bae1dSRodney W. Grimes vm->vm_refcnt = 1; 2842d8acc0fSJohn Dyson vm->vm_shm = NULL; 28551ab6c28SAlan Cox vm->vm_swrss = 0; 28651ab6c28SAlan Cox vm->vm_tsize = 0; 28751ab6c28SAlan Cox vm->vm_dsize = 0; 28851ab6c28SAlan Cox vm->vm_ssize = 0; 28951ab6c28SAlan Cox vm->vm_taddr = 0; 29051ab6c28SAlan Cox vm->vm_daddr = 0; 29151ab6c28SAlan Cox vm->vm_maxsaddr = 0; 292df8bae1dSRodney W. Grimes return (vm); 293df8bae1dSRodney W. Grimes } 294df8bae1dSRodney W. Grimes 2951ba5ad42SEdward Tomasz Napierala static void 2961ba5ad42SEdward Tomasz Napierala vmspace_container_reset(struct proc *p) 2971ba5ad42SEdward Tomasz Napierala { 2981ba5ad42SEdward Tomasz Napierala 299afcc55f3SEdward Tomasz Napierala #ifdef RACCT 3001ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 3011ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_DATA, 0); 3021ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_STACK, 0); 3031ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_RSS, 0); 3041ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_MEMLOCK, 0); 3051ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_VMEM, 0); 3061ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 307afcc55f3SEdward Tomasz Napierala #endif 3081ba5ad42SEdward Tomasz Napierala } 3091ba5ad42SEdward Tomasz Napierala 31062a59e8fSWarner Losh static inline void 311582ec34cSAlfred Perlstein vmspace_dofree(struct vmspace *vm) 312df8bae1dSRodney W. Grimes { 3130ef12795SAlan Cox 31421c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_free: %p", vm); 3153db161e0SMatthew Dillon 3163db161e0SMatthew Dillon /* 3173db161e0SMatthew Dillon * Make sure any SysV shm is freed, it might not have been in 3183db161e0SMatthew Dillon * exit1(). 3193db161e0SMatthew Dillon */ 3203db161e0SMatthew Dillon shmexit(vm); 3213db161e0SMatthew Dillon 32230dcfc09SJohn Dyson /* 323df8bae1dSRodney W. Grimes * Lock the map, to wait out all other references to it. 3240d94caffSDavid Greenman * Delete all of the mappings and pages they hold, then call 3250d94caffSDavid Greenman * the pmap module to reclaim anything left. 326df8bae1dSRodney W. Grimes */ 327717f7d59SAlan Cox (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset, 328df8bae1dSRodney W. Grimes vm->vm_map.max_offset); 3298355f576SJeff Roberson 3300ef12795SAlan Cox pmap_release(vmspace_pmap(vm)); 3310ef12795SAlan Cox vm->vm_map.pmap = NULL; 3328355f576SJeff Roberson uma_zfree(vmspace_zone, vm); 333df8bae1dSRodney W. Grimes } 334582ec34cSAlfred Perlstein 335582ec34cSAlfred Perlstein void 336582ec34cSAlfred Perlstein vmspace_free(struct vmspace *vm) 337582ec34cSAlfred Perlstein { 338582ec34cSAlfred Perlstein 339582ec34cSAlfred Perlstein if (vm->vm_refcnt == 0) 340582ec34cSAlfred Perlstein panic("vmspace_free: attempt to free already freed vmspace"); 341582ec34cSAlfred Perlstein 3421a587ef2SJohn Baldwin if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1) 343582ec34cSAlfred Perlstein vmspace_dofree(vm); 344582ec34cSAlfred Perlstein } 345582ec34cSAlfred Perlstein 346582ec34cSAlfred Perlstein void 347582ec34cSAlfred Perlstein vmspace_exitfree(struct proc *p) 348582ec34cSAlfred Perlstein { 349334f7061SPeter Wemm struct vmspace *vm; 350582ec34cSAlfred Perlstein 35157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 352334f7061SPeter Wemm vm = p->p_vmspace; 353334f7061SPeter Wemm p->p_vmspace = NULL; 35457051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 35557051fdcSTor Egge KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); 35657051fdcSTor Egge vmspace_free(vm); 35757051fdcSTor Egge } 35857051fdcSTor Egge 35957051fdcSTor Egge void 36057051fdcSTor Egge vmspace_exit(struct thread *td) 36157051fdcSTor Egge { 36257051fdcSTor Egge int refcnt; 36357051fdcSTor Egge struct vmspace *vm; 36457051fdcSTor Egge struct proc *p; 365389d2b6eSMatthew Dillon 366389d2b6eSMatthew Dillon /* 36757051fdcSTor Egge * Release user portion of address space. 36857051fdcSTor Egge * This releases references to vnodes, 36957051fdcSTor Egge * which could cause I/O if the file has been unlinked. 37057051fdcSTor Egge * Need to do this early enough that we can still sleep. 371389d2b6eSMatthew Dillon * 37257051fdcSTor Egge * The last exiting process to reach this point releases as 37357051fdcSTor Egge * much of the environment as it can. vmspace_dofree() is the 37457051fdcSTor Egge * slower fallback in case another process had a temporary 37557051fdcSTor Egge * reference to the vmspace. 376389d2b6eSMatthew Dillon */ 37757051fdcSTor Egge 37857051fdcSTor Egge p = td->td_proc; 37957051fdcSTor Egge vm = p->p_vmspace; 38057051fdcSTor Egge atomic_add_int(&vmspace0.vm_refcnt, 1); 38157051fdcSTor Egge do { 38257051fdcSTor Egge refcnt = vm->vm_refcnt; 38357051fdcSTor Egge if (refcnt > 1 && p->p_vmspace != &vmspace0) { 38457051fdcSTor Egge /* Switch now since other proc might free vmspace */ 38557051fdcSTor Egge PROC_VMSPACE_LOCK(p); 38657051fdcSTor Egge p->p_vmspace = &vmspace0; 38757051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 38857051fdcSTor Egge pmap_activate(td); 38957051fdcSTor Egge } 39057051fdcSTor Egge } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1)); 39157051fdcSTor Egge if (refcnt == 1) { 39257051fdcSTor Egge if (p->p_vmspace != vm) { 39357051fdcSTor Egge /* vmspace not yet freed, switch back */ 39457051fdcSTor Egge PROC_VMSPACE_LOCK(p); 39557051fdcSTor Egge p->p_vmspace = vm; 39657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 39757051fdcSTor Egge pmap_activate(td); 39857051fdcSTor Egge } 39957051fdcSTor Egge pmap_remove_pages(vmspace_pmap(vm)); 40057051fdcSTor Egge /* Switch now since this proc will free vmspace */ 40157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 40257051fdcSTor Egge p->p_vmspace = &vmspace0; 40357051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 40457051fdcSTor Egge pmap_activate(td); 405334f7061SPeter Wemm vmspace_dofree(vm); 406334f7061SPeter Wemm } 4071ba5ad42SEdward Tomasz Napierala vmspace_container_reset(p); 40857051fdcSTor Egge } 40957051fdcSTor Egge 41057051fdcSTor Egge /* Acquire reference to vmspace owned by another process. */ 41157051fdcSTor Egge 41257051fdcSTor Egge struct vmspace * 41357051fdcSTor Egge vmspace_acquire_ref(struct proc *p) 41457051fdcSTor Egge { 41557051fdcSTor Egge struct vmspace *vm; 41657051fdcSTor Egge int refcnt; 41757051fdcSTor Egge 41857051fdcSTor Egge PROC_VMSPACE_LOCK(p); 41957051fdcSTor Egge vm = p->p_vmspace; 42057051fdcSTor Egge if (vm == NULL) { 42157051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 42257051fdcSTor Egge return (NULL); 42357051fdcSTor Egge } 42457051fdcSTor Egge do { 42557051fdcSTor Egge refcnt = vm->vm_refcnt; 42657051fdcSTor Egge if (refcnt <= 0) { /* Avoid 0->1 transition */ 42757051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 42857051fdcSTor Egge return (NULL); 42957051fdcSTor Egge } 43057051fdcSTor Egge } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1)); 43157051fdcSTor Egge if (vm != p->p_vmspace) { 43257051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 43357051fdcSTor Egge vmspace_free(vm); 43457051fdcSTor Egge return (NULL); 43557051fdcSTor Egge } 43657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 43757051fdcSTor Egge return (vm); 43857051fdcSTor Egge } 439df8bae1dSRodney W. Grimes 4401b40f8c0SMatthew Dillon void 441780b1c09SAlan Cox _vm_map_lock(vm_map_t map, const char *file, int line) 4421b40f8c0SMatthew Dillon { 443bc91c510SAlan Cox 44493bc4879SAlan Cox if (map->system_map) 445ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 44612c64974SMaxime Henrion else 4479fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 4481b40f8c0SMatthew Dillon map->timestamp++; 4491b40f8c0SMatthew Dillon } 4501b40f8c0SMatthew Dillon 4510b367bd8SKonstantin Belousov static void 4520b367bd8SKonstantin Belousov vm_map_process_deferred(void) 4530e0af8ecSBrian Feldman { 4540b367bd8SKonstantin Belousov struct thread *td; 4556fbe60faSJohn Baldwin vm_map_entry_t entry, next; 45684110e7eSKonstantin Belousov vm_object_t object; 457655c3490SKonstantin Belousov 4580b367bd8SKonstantin Belousov td = curthread; 4596fbe60faSJohn Baldwin entry = td->td_map_def_user; 4606fbe60faSJohn Baldwin td->td_map_def_user = NULL; 4616fbe60faSJohn Baldwin while (entry != NULL) { 4626fbe60faSJohn Baldwin next = entry->next; 46384110e7eSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) { 46484110e7eSKonstantin Belousov /* 46584110e7eSKonstantin Belousov * Decrement the object's writemappings and 46684110e7eSKonstantin Belousov * possibly the vnode's v_writecount. 46784110e7eSKonstantin Belousov */ 46884110e7eSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 46984110e7eSKonstantin Belousov ("Submap with writecount")); 47084110e7eSKonstantin Belousov object = entry->object.vm_object; 47184110e7eSKonstantin Belousov KASSERT(object != NULL, ("No object for writecount")); 47284110e7eSKonstantin Belousov vnode_pager_release_writecount(object, entry->start, 47384110e7eSKonstantin Belousov entry->end); 47484110e7eSKonstantin Belousov } 4750b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, FALSE); 4766fbe60faSJohn Baldwin entry = next; 4770b367bd8SKonstantin Belousov } 4780b367bd8SKonstantin Belousov } 4790b367bd8SKonstantin Belousov 4800b367bd8SKonstantin Belousov void 4810b367bd8SKonstantin Belousov _vm_map_unlock(vm_map_t map, const char *file, int line) 4820b367bd8SKonstantin Belousov { 4830b367bd8SKonstantin Belousov 4840b367bd8SKonstantin Belousov if (map->system_map) 485ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 4860b367bd8SKonstantin Belousov else { 4879fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 4880b367bd8SKonstantin Belousov vm_map_process_deferred(); 489655c3490SKonstantin Belousov } 4900e0af8ecSBrian Feldman } 4910e0af8ecSBrian Feldman 4920e0af8ecSBrian Feldman void 493780b1c09SAlan Cox _vm_map_lock_read(vm_map_t map, const char *file, int line) 4940e0af8ecSBrian Feldman { 495bc91c510SAlan Cox 49693bc4879SAlan Cox if (map->system_map) 497ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 49812c64974SMaxime Henrion else 4999fde98bbSAttilio Rao sx_slock_(&map->lock, file, line); 50036daaecdSAlan Cox } 5010e0af8ecSBrian Feldman 5020e0af8ecSBrian Feldman void 503780b1c09SAlan Cox _vm_map_unlock_read(vm_map_t map, const char *file, int line) 5040e0af8ecSBrian Feldman { 505bc91c510SAlan Cox 50636daaecdSAlan Cox if (map->system_map) 507ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 5080b367bd8SKonstantin Belousov else { 5099fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 5100b367bd8SKonstantin Belousov vm_map_process_deferred(); 5110b367bd8SKonstantin Belousov } 51225adb370SBrian Feldman } 51325adb370SBrian Feldman 514d974f03cSAlan Cox int 515780b1c09SAlan Cox _vm_map_trylock(vm_map_t map, const char *file, int line) 516d974f03cSAlan Cox { 51725adb370SBrian Feldman int error; 51825adb370SBrian Feldman 51936daaecdSAlan Cox error = map->system_map ? 520ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 5219fde98bbSAttilio Rao !sx_try_xlock_(&map->lock, file, line); 5223a92e5d5SAlan Cox if (error == 0) 5233a92e5d5SAlan Cox map->timestamp++; 524bc91c510SAlan Cox return (error == 0); 5250e0af8ecSBrian Feldman } 5260e0af8ecSBrian Feldman 5270e0af8ecSBrian Feldman int 52872d97679SDavid Schultz _vm_map_trylock_read(vm_map_t map, const char *file, int line) 52972d97679SDavid Schultz { 53072d97679SDavid Schultz int error; 53172d97679SDavid Schultz 53272d97679SDavid Schultz error = map->system_map ? 533ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 5349fde98bbSAttilio Rao !sx_try_slock_(&map->lock, file, line); 53572d97679SDavid Schultz return (error == 0); 53672d97679SDavid Schultz } 53772d97679SDavid Schultz 53805a8c414SAlan Cox /* 53905a8c414SAlan Cox * _vm_map_lock_upgrade: [ internal use only ] 54005a8c414SAlan Cox * 54105a8c414SAlan Cox * Tries to upgrade a read (shared) lock on the specified map to a write 54205a8c414SAlan Cox * (exclusive) lock. Returns the value "0" if the upgrade succeeds and a 54305a8c414SAlan Cox * non-zero value if the upgrade fails. If the upgrade fails, the map is 54405a8c414SAlan Cox * returned without a read or write lock held. 54505a8c414SAlan Cox * 54605a8c414SAlan Cox * Requires that the map be read locked. 54705a8c414SAlan Cox */ 54872d97679SDavid Schultz int 549780b1c09SAlan Cox _vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 5500e0af8ecSBrian Feldman { 55105a8c414SAlan Cox unsigned int last_timestamp; 552bc91c510SAlan Cox 55312c64974SMaxime Henrion if (map->system_map) { 554ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 55505a8c414SAlan Cox } else { 5569fde98bbSAttilio Rao if (!sx_try_upgrade_(&map->lock, file, line)) { 55705a8c414SAlan Cox last_timestamp = map->timestamp; 5589fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 5590b367bd8SKonstantin Belousov vm_map_process_deferred(); 56005a8c414SAlan Cox /* 56105a8c414SAlan Cox * If the map's timestamp does not change while the 56205a8c414SAlan Cox * map is unlocked, then the upgrade succeeds. 56305a8c414SAlan Cox */ 5649fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 56505a8c414SAlan Cox if (last_timestamp != map->timestamp) { 5669fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 56705a8c414SAlan Cox return (1); 56805a8c414SAlan Cox } 56905a8c414SAlan Cox } 57005a8c414SAlan Cox } 571bc91c510SAlan Cox map->timestamp++; 572bc91c510SAlan Cox return (0); 5730e0af8ecSBrian Feldman } 5740e0af8ecSBrian Feldman 5750e0af8ecSBrian Feldman void 576780b1c09SAlan Cox _vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 5771b40f8c0SMatthew Dillon { 578bc91c510SAlan Cox 57912c64974SMaxime Henrion if (map->system_map) { 580ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 58105a8c414SAlan Cox } else 5829fde98bbSAttilio Rao sx_downgrade_(&map->lock, file, line); 58305a8c414SAlan Cox } 58405a8c414SAlan Cox 58505a8c414SAlan Cox /* 58605a8c414SAlan Cox * vm_map_locked: 58705a8c414SAlan Cox * 58805a8c414SAlan Cox * Returns a non-zero value if the caller holds a write (exclusive) lock 58905a8c414SAlan Cox * on the specified map and the value "0" otherwise. 59005a8c414SAlan Cox */ 59105a8c414SAlan Cox int 59205a8c414SAlan Cox vm_map_locked(vm_map_t map) 59305a8c414SAlan Cox { 59405a8c414SAlan Cox 59505a8c414SAlan Cox if (map->system_map) 59605a8c414SAlan Cox return (mtx_owned(&map->system_mtx)); 59705a8c414SAlan Cox else 59805a8c414SAlan Cox return (sx_xlocked(&map->lock)); 59925adb370SBrian Feldman } 60025adb370SBrian Feldman 6013a0916b8SKonstantin Belousov #ifdef INVARIANTS 6023a0916b8SKonstantin Belousov static void 6033a0916b8SKonstantin Belousov _vm_map_assert_locked(vm_map_t map, const char *file, int line) 6043a0916b8SKonstantin Belousov { 6053a0916b8SKonstantin Belousov 6063a0916b8SKonstantin Belousov if (map->system_map) 607ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 6083a0916b8SKonstantin Belousov else 6099fde98bbSAttilio Rao sx_assert_(&map->lock, SA_XLOCKED, file, line); 6103a0916b8SKonstantin Belousov } 6113a0916b8SKonstantin Belousov 6123a0916b8SKonstantin Belousov #define VM_MAP_ASSERT_LOCKED(map) \ 6133a0916b8SKonstantin Belousov _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE) 6143a0916b8SKonstantin Belousov #else 6153a0916b8SKonstantin Belousov #define VM_MAP_ASSERT_LOCKED(map) 6163a0916b8SKonstantin Belousov #endif 6173a0916b8SKonstantin Belousov 618acd9a301SAlan Cox /* 6198304adaaSAlan Cox * _vm_map_unlock_and_wait: 6208304adaaSAlan Cox * 6218304adaaSAlan Cox * Atomically releases the lock on the specified map and puts the calling 6228304adaaSAlan Cox * thread to sleep. The calling thread will remain asleep until either 6238304adaaSAlan Cox * vm_map_wakeup() is performed on the map or the specified timeout is 6248304adaaSAlan Cox * exceeded. 6258304adaaSAlan Cox * 6268304adaaSAlan Cox * WARNING! This function does not perform deferred deallocations of 6278304adaaSAlan Cox * objects and map entries. Therefore, the calling thread is expected to 6288304adaaSAlan Cox * reacquire the map lock after reawakening and later perform an ordinary 6298304adaaSAlan Cox * unlock operation, such as vm_map_unlock(), before completing its 6308304adaaSAlan Cox * operation on the map. 631acd9a301SAlan Cox */ 6329688f931SAlan Cox int 6338304adaaSAlan Cox _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line) 634acd9a301SAlan Cox { 635acd9a301SAlan Cox 6363a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 6378304adaaSAlan Cox if (map->system_map) 638ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 6398304adaaSAlan Cox else 6409fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 6418304adaaSAlan Cox return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 6428304adaaSAlan Cox timo)); 643acd9a301SAlan Cox } 644acd9a301SAlan Cox 645acd9a301SAlan Cox /* 646acd9a301SAlan Cox * vm_map_wakeup: 6478304adaaSAlan Cox * 6488304adaaSAlan Cox * Awaken any threads that have slept on the map using 6498304adaaSAlan Cox * vm_map_unlock_and_wait(). 650acd9a301SAlan Cox */ 6519688f931SAlan Cox void 652acd9a301SAlan Cox vm_map_wakeup(vm_map_t map) 653acd9a301SAlan Cox { 654acd9a301SAlan Cox 655b49ecb86SAlan Cox /* 6563a92e5d5SAlan Cox * Acquire and release map_sleep_mtx to prevent a wakeup() 6578304adaaSAlan Cox * from being performed (and lost) between the map unlock 6588304adaaSAlan Cox * and the msleep() in _vm_map_unlock_and_wait(). 659b49ecb86SAlan Cox */ 6603a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 6613a92e5d5SAlan Cox mtx_unlock(&map_sleep_mtx); 662acd9a301SAlan Cox wakeup(&map->root); 663acd9a301SAlan Cox } 664acd9a301SAlan Cox 665a5db445dSMax Laier void 666a5db445dSMax Laier vm_map_busy(vm_map_t map) 667a5db445dSMax Laier { 668a5db445dSMax Laier 669a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 670a5db445dSMax Laier map->busy++; 671a5db445dSMax Laier } 672a5db445dSMax Laier 673a5db445dSMax Laier void 674a5db445dSMax Laier vm_map_unbusy(vm_map_t map) 675a5db445dSMax Laier { 676a5db445dSMax Laier 677a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 678a5db445dSMax Laier KASSERT(map->busy, ("vm_map_unbusy: not busy")); 679a5db445dSMax Laier if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { 680a5db445dSMax Laier vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); 681a5db445dSMax Laier wakeup(&map->busy); 682a5db445dSMax Laier } 683a5db445dSMax Laier } 684a5db445dSMax Laier 685a5db445dSMax Laier void 686a5db445dSMax Laier vm_map_wait_busy(vm_map_t map) 687a5db445dSMax Laier { 688a5db445dSMax Laier 689a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 690a5db445dSMax Laier while (map->busy) { 691a5db445dSMax Laier vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); 692a5db445dSMax Laier if (map->system_map) 693a5db445dSMax Laier msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); 694a5db445dSMax Laier else 695a5db445dSMax Laier sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); 696a5db445dSMax Laier } 697a5db445dSMax Laier map->timestamp++; 698a5db445dSMax Laier } 699a5db445dSMax Laier 7001b40f8c0SMatthew Dillon long 7011b40f8c0SMatthew Dillon vmspace_resident_count(struct vmspace *vmspace) 7021b40f8c0SMatthew Dillon { 7031b40f8c0SMatthew Dillon return pmap_resident_count(vmspace_pmap(vmspace)); 7041b40f8c0SMatthew Dillon } 7051b40f8c0SMatthew Dillon 706ff2b5645SMatthew Dillon /* 707df8bae1dSRodney W. Grimes * vm_map_create: 708df8bae1dSRodney W. Grimes * 709df8bae1dSRodney W. Grimes * Creates and returns a new empty VM map with 710df8bae1dSRodney W. Grimes * the given physical map structure, and having 711df8bae1dSRodney W. Grimes * the given lower and upper address bounds. 712df8bae1dSRodney W. Grimes */ 7130d94caffSDavid Greenman vm_map_t 7141b40f8c0SMatthew Dillon vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) 715df8bae1dSRodney W. Grimes { 716c0877f10SJohn Dyson vm_map_t result; 717df8bae1dSRodney W. Grimes 718a163d034SWarner Losh result = uma_zalloc(mapzone, M_WAITOK); 71921c641b2SJohn Baldwin CTR1(KTR_VM, "vm_map_create: %p", result); 72092351f16SAlan Cox _vm_map_init(result, pmap, min, max); 721df8bae1dSRodney W. Grimes return (result); 722df8bae1dSRodney W. Grimes } 723df8bae1dSRodney W. Grimes 724df8bae1dSRodney W. Grimes /* 725df8bae1dSRodney W. Grimes * Initialize an existing vm_map structure 726df8bae1dSRodney W. Grimes * such as that in the vmspace structure. 727df8bae1dSRodney W. Grimes */ 7288355f576SJeff Roberson static void 72992351f16SAlan Cox _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 730df8bae1dSRodney W. Grimes { 73121c641b2SJohn Baldwin 732df8bae1dSRodney W. Grimes map->header.next = map->header.prev = &map->header; 7339688f931SAlan Cox map->needs_wakeup = FALSE; 7343075778bSJohn Dyson map->system_map = 0; 73592351f16SAlan Cox map->pmap = pmap; 736df8bae1dSRodney W. Grimes map->min_offset = min; 737df8bae1dSRodney W. Grimes map->max_offset = max; 738af7cd0c5SBrian Feldman map->flags = 0; 7394e94f402SAlan Cox map->root = NULL; 740df8bae1dSRodney W. Grimes map->timestamp = 0; 741a5db445dSMax Laier map->busy = 0; 742df8bae1dSRodney W. Grimes } 743df8bae1dSRodney W. Grimes 744a18b1f1dSJason Evans void 74592351f16SAlan Cox vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 746a18b1f1dSJason Evans { 74792351f16SAlan Cox 74892351f16SAlan Cox _vm_map_init(map, pmap, min, max); 749d923c598SAlan Cox mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK); 75012c64974SMaxime Henrion sx_init(&map->lock, "user map"); 751a18b1f1dSJason Evans } 752a18b1f1dSJason Evans 753df8bae1dSRodney W. Grimes /* 754b18bfc3dSJohn Dyson * vm_map_entry_dispose: [ internal use only ] 755b18bfc3dSJohn Dyson * 756b18bfc3dSJohn Dyson * Inverse of vm_map_entry_create. 757b18bfc3dSJohn Dyson */ 75862487bb4SJohn Dyson static void 7591b40f8c0SMatthew Dillon vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 760b18bfc3dSJohn Dyson { 7612b4a2c27SAlan Cox uma_zfree(map->system_map ? kmapentzone : mapentzone, entry); 762b18bfc3dSJohn Dyson } 763b18bfc3dSJohn Dyson 764b18bfc3dSJohn Dyson /* 765df8bae1dSRodney W. Grimes * vm_map_entry_create: [ internal use only ] 766df8bae1dSRodney W. Grimes * 767df8bae1dSRodney W. Grimes * Allocates a VM map entry for insertion. 768b28cb1caSAlfred Perlstein * No entry fields are filled in. 769df8bae1dSRodney W. Grimes */ 770f708ef1bSPoul-Henning Kamp static vm_map_entry_t 7711b40f8c0SMatthew Dillon vm_map_entry_create(vm_map_t map) 772df8bae1dSRodney W. Grimes { 7731f6889a1SMatthew Dillon vm_map_entry_t new_entry; 7741f6889a1SMatthew Dillon 7752b4a2c27SAlan Cox if (map->system_map) 7762b4a2c27SAlan Cox new_entry = uma_zalloc(kmapentzone, M_NOWAIT); 7772b4a2c27SAlan Cox else 778a163d034SWarner Losh new_entry = uma_zalloc(mapentzone, M_WAITOK); 7791f6889a1SMatthew Dillon if (new_entry == NULL) 7801f6889a1SMatthew Dillon panic("vm_map_entry_create: kernel resources exhausted"); 7811f6889a1SMatthew Dillon return (new_entry); 782df8bae1dSRodney W. Grimes } 783df8bae1dSRodney W. Grimes 784df8bae1dSRodney W. Grimes /* 785794316a8SAlan Cox * vm_map_entry_set_behavior: 786794316a8SAlan Cox * 787794316a8SAlan Cox * Set the expected access behavior, either normal, random, or 788794316a8SAlan Cox * sequential. 789794316a8SAlan Cox */ 79062a59e8fSWarner Losh static inline void 791794316a8SAlan Cox vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) 792794316a8SAlan Cox { 793794316a8SAlan Cox entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 794794316a8SAlan Cox (behavior & MAP_ENTRY_BEHAV_MASK); 795794316a8SAlan Cox } 796794316a8SAlan Cox 797794316a8SAlan Cox /* 7980164e057SAlan Cox * vm_map_entry_set_max_free: 7990164e057SAlan Cox * 8000164e057SAlan Cox * Set the max_free field in a vm_map_entry. 8010164e057SAlan Cox */ 80262a59e8fSWarner Losh static inline void 8030164e057SAlan Cox vm_map_entry_set_max_free(vm_map_entry_t entry) 8040164e057SAlan Cox { 8050164e057SAlan Cox 8060164e057SAlan Cox entry->max_free = entry->adj_free; 8070164e057SAlan Cox if (entry->left != NULL && entry->left->max_free > entry->max_free) 8080164e057SAlan Cox entry->max_free = entry->left->max_free; 8090164e057SAlan Cox if (entry->right != NULL && entry->right->max_free > entry->max_free) 8100164e057SAlan Cox entry->max_free = entry->right->max_free; 8110164e057SAlan Cox } 8120164e057SAlan Cox 8130164e057SAlan Cox /* 8144e94f402SAlan Cox * vm_map_entry_splay: 8154e94f402SAlan Cox * 8160164e057SAlan Cox * The Sleator and Tarjan top-down splay algorithm with the 8170164e057SAlan Cox * following variation. Max_free must be computed bottom-up, so 8180164e057SAlan Cox * on the downward pass, maintain the left and right spines in 8190164e057SAlan Cox * reverse order. Then, make a second pass up each side to fix 8200164e057SAlan Cox * the pointers and compute max_free. The time bound is O(log n) 8210164e057SAlan Cox * amortized. 8220164e057SAlan Cox * 8230164e057SAlan Cox * The new root is the vm_map_entry containing "addr", or else an 8240164e057SAlan Cox * adjacent entry (lower or higher) if addr is not in the tree. 8250164e057SAlan Cox * 8260164e057SAlan Cox * The map must be locked, and leaves it so. 8270164e057SAlan Cox * 8280164e057SAlan Cox * Returns: the new root. 8294e94f402SAlan Cox */ 8304e94f402SAlan Cox static vm_map_entry_t 8310164e057SAlan Cox vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) 8324e94f402SAlan Cox { 8330164e057SAlan Cox vm_map_entry_t llist, rlist; 8340164e057SAlan Cox vm_map_entry_t ltree, rtree; 8350164e057SAlan Cox vm_map_entry_t y; 8364e94f402SAlan Cox 8370164e057SAlan Cox /* Special case of empty tree. */ 8384e94f402SAlan Cox if (root == NULL) 8394e94f402SAlan Cox return (root); 8400164e057SAlan Cox 8410164e057SAlan Cox /* 8420164e057SAlan Cox * Pass One: Splay down the tree until we find addr or a NULL 8430164e057SAlan Cox * pointer where addr would go. llist and rlist are the two 8440164e057SAlan Cox * sides in reverse order (bottom-up), with llist linked by 8450164e057SAlan Cox * the right pointer and rlist linked by the left pointer in 8460164e057SAlan Cox * the vm_map_entry. Wait until Pass Two to set max_free on 8470164e057SAlan Cox * the two spines. 8480164e057SAlan Cox */ 8490164e057SAlan Cox llist = NULL; 8500164e057SAlan Cox rlist = NULL; 8510164e057SAlan Cox for (;;) { 8520164e057SAlan Cox /* root is never NULL in here. */ 8530164e057SAlan Cox if (addr < root->start) { 8540164e057SAlan Cox y = root->left; 8550164e057SAlan Cox if (y == NULL) 8564e94f402SAlan Cox break; 8570164e057SAlan Cox if (addr < y->start && y->left != NULL) { 8580164e057SAlan Cox /* Rotate right and put y on rlist. */ 8594e94f402SAlan Cox root->left = y->right; 8604e94f402SAlan Cox y->right = root; 8610164e057SAlan Cox vm_map_entry_set_max_free(root); 8620164e057SAlan Cox root = y->left; 8630164e057SAlan Cox y->left = rlist; 8640164e057SAlan Cox rlist = y; 8650164e057SAlan Cox } else { 8660164e057SAlan Cox /* Put root on rlist. */ 8670164e057SAlan Cox root->left = rlist; 8680164e057SAlan Cox rlist = root; 8694e94f402SAlan Cox root = y; 8704e94f402SAlan Cox } 8717438d60bSAlan Cox } else if (addr >= root->end) { 8720164e057SAlan Cox y = root->right; 8737438d60bSAlan Cox if (y == NULL) 8744e94f402SAlan Cox break; 8750164e057SAlan Cox if (addr >= y->end && y->right != NULL) { 8760164e057SAlan Cox /* Rotate left and put y on llist. */ 8774e94f402SAlan Cox root->right = y->left; 8784e94f402SAlan Cox y->left = root; 8790164e057SAlan Cox vm_map_entry_set_max_free(root); 8800164e057SAlan Cox root = y->right; 8810164e057SAlan Cox y->right = llist; 8820164e057SAlan Cox llist = y; 8830164e057SAlan Cox } else { 8840164e057SAlan Cox /* Put root on llist. */ 8850164e057SAlan Cox root->right = llist; 8860164e057SAlan Cox llist = root; 8874e94f402SAlan Cox root = y; 8884e94f402SAlan Cox } 8897438d60bSAlan Cox } else 8907438d60bSAlan Cox break; 8910164e057SAlan Cox } 8920164e057SAlan Cox 8930164e057SAlan Cox /* 8940164e057SAlan Cox * Pass Two: Walk back up the two spines, flip the pointers 8950164e057SAlan Cox * and set max_free. The subtrees of the root go at the 8960164e057SAlan Cox * bottom of llist and rlist. 8970164e057SAlan Cox */ 8980164e057SAlan Cox ltree = root->left; 8990164e057SAlan Cox while (llist != NULL) { 9000164e057SAlan Cox y = llist->right; 9010164e057SAlan Cox llist->right = ltree; 9020164e057SAlan Cox vm_map_entry_set_max_free(llist); 9030164e057SAlan Cox ltree = llist; 9040164e057SAlan Cox llist = y; 9050164e057SAlan Cox } 9060164e057SAlan Cox rtree = root->right; 9070164e057SAlan Cox while (rlist != NULL) { 9080164e057SAlan Cox y = rlist->left; 9090164e057SAlan Cox rlist->left = rtree; 9100164e057SAlan Cox vm_map_entry_set_max_free(rlist); 9110164e057SAlan Cox rtree = rlist; 9120164e057SAlan Cox rlist = y; 9130164e057SAlan Cox } 9140164e057SAlan Cox 9150164e057SAlan Cox /* 9160164e057SAlan Cox * Final assembly: add ltree and rtree as subtrees of root. 9170164e057SAlan Cox */ 9180164e057SAlan Cox root->left = ltree; 9190164e057SAlan Cox root->right = rtree; 9200164e057SAlan Cox vm_map_entry_set_max_free(root); 9210164e057SAlan Cox 9224e94f402SAlan Cox return (root); 9234e94f402SAlan Cox } 9244e94f402SAlan Cox 9254e94f402SAlan Cox /* 926df8bae1dSRodney W. Grimes * vm_map_entry_{un,}link: 927df8bae1dSRodney W. Grimes * 928df8bae1dSRodney W. Grimes * Insert/remove entries from maps. 929df8bae1dSRodney W. Grimes */ 9304e94f402SAlan Cox static void 93199c81ca9SAlan Cox vm_map_entry_link(vm_map_t map, 93299c81ca9SAlan Cox vm_map_entry_t after_where, 93399c81ca9SAlan Cox vm_map_entry_t entry) 93499c81ca9SAlan Cox { 93521c641b2SJohn Baldwin 93621c641b2SJohn Baldwin CTR4(KTR_VM, 93721c641b2SJohn Baldwin "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, 93821c641b2SJohn Baldwin map->nentries, entry, after_where); 9393a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 94099c81ca9SAlan Cox map->nentries++; 94199c81ca9SAlan Cox entry->prev = after_where; 94299c81ca9SAlan Cox entry->next = after_where->next; 94399c81ca9SAlan Cox entry->next->prev = entry; 94499c81ca9SAlan Cox after_where->next = entry; 9454e94f402SAlan Cox 9464e94f402SAlan Cox if (after_where != &map->header) { 9474e94f402SAlan Cox if (after_where != map->root) 9484e94f402SAlan Cox vm_map_entry_splay(after_where->start, map->root); 9494e94f402SAlan Cox entry->right = after_where->right; 9504e94f402SAlan Cox entry->left = after_where; 9514e94f402SAlan Cox after_where->right = NULL; 9520164e057SAlan Cox after_where->adj_free = entry->start - after_where->end; 9530164e057SAlan Cox vm_map_entry_set_max_free(after_where); 9544e94f402SAlan Cox } else { 9554e94f402SAlan Cox entry->right = map->root; 9564e94f402SAlan Cox entry->left = NULL; 9574e94f402SAlan Cox } 9580164e057SAlan Cox entry->adj_free = (entry->next == &map->header ? map->max_offset : 9590164e057SAlan Cox entry->next->start) - entry->end; 9600164e057SAlan Cox vm_map_entry_set_max_free(entry); 9614e94f402SAlan Cox map->root = entry; 962df8bae1dSRodney W. Grimes } 96399c81ca9SAlan Cox 9644e94f402SAlan Cox static void 96599c81ca9SAlan Cox vm_map_entry_unlink(vm_map_t map, 96699c81ca9SAlan Cox vm_map_entry_t entry) 96799c81ca9SAlan Cox { 9684e94f402SAlan Cox vm_map_entry_t next, prev, root; 96999c81ca9SAlan Cox 9703a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 9714e94f402SAlan Cox if (entry != map->root) 9724e94f402SAlan Cox vm_map_entry_splay(entry->start, map->root); 9734e94f402SAlan Cox if (entry->left == NULL) 9744e94f402SAlan Cox root = entry->right; 9754e94f402SAlan Cox else { 9764e94f402SAlan Cox root = vm_map_entry_splay(entry->start, entry->left); 9774e94f402SAlan Cox root->right = entry->right; 9780164e057SAlan Cox root->adj_free = (entry->next == &map->header ? map->max_offset : 9790164e057SAlan Cox entry->next->start) - root->end; 9800164e057SAlan Cox vm_map_entry_set_max_free(root); 9814e94f402SAlan Cox } 9824e94f402SAlan Cox map->root = root; 9834e94f402SAlan Cox 9844e94f402SAlan Cox prev = entry->prev; 9854e94f402SAlan Cox next = entry->next; 98699c81ca9SAlan Cox next->prev = prev; 98799c81ca9SAlan Cox prev->next = next; 98899c81ca9SAlan Cox map->nentries--; 98921c641b2SJohn Baldwin CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 99021c641b2SJohn Baldwin map->nentries, entry); 991df8bae1dSRodney W. Grimes } 992df8bae1dSRodney W. Grimes 993df8bae1dSRodney W. Grimes /* 9940164e057SAlan Cox * vm_map_entry_resize_free: 9950164e057SAlan Cox * 9960164e057SAlan Cox * Recompute the amount of free space following a vm_map_entry 9970164e057SAlan Cox * and propagate that value up the tree. Call this function after 9980164e057SAlan Cox * resizing a map entry in-place, that is, without a call to 9990164e057SAlan Cox * vm_map_entry_link() or _unlink(). 10000164e057SAlan Cox * 10010164e057SAlan Cox * The map must be locked, and leaves it so. 10020164e057SAlan Cox */ 10030164e057SAlan Cox static void 10040164e057SAlan Cox vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) 10050164e057SAlan Cox { 10060164e057SAlan Cox 10070164e057SAlan Cox /* 10080164e057SAlan Cox * Using splay trees without parent pointers, propagating 10090164e057SAlan Cox * max_free up the tree is done by moving the entry to the 10100164e057SAlan Cox * root and making the change there. 10110164e057SAlan Cox */ 10120164e057SAlan Cox if (entry != map->root) 10130164e057SAlan Cox map->root = vm_map_entry_splay(entry->start, map->root); 10140164e057SAlan Cox 10150164e057SAlan Cox entry->adj_free = (entry->next == &map->header ? map->max_offset : 10160164e057SAlan Cox entry->next->start) - entry->end; 10170164e057SAlan Cox vm_map_entry_set_max_free(entry); 10180164e057SAlan Cox } 10190164e057SAlan Cox 10200164e057SAlan Cox /* 1021df8bae1dSRodney W. Grimes * vm_map_lookup_entry: [ internal use only ] 1022df8bae1dSRodney W. Grimes * 1023df8bae1dSRodney W. Grimes * Finds the map entry containing (or 1024df8bae1dSRodney W. Grimes * immediately preceding) the specified address 1025df8bae1dSRodney W. Grimes * in the given map; the entry is returned 1026df8bae1dSRodney W. Grimes * in the "entry" parameter. The boolean 1027df8bae1dSRodney W. Grimes * result indicates whether the address is 1028df8bae1dSRodney W. Grimes * actually contained in the map. 1029df8bae1dSRodney W. Grimes */ 10300d94caffSDavid Greenman boolean_t 10311b40f8c0SMatthew Dillon vm_map_lookup_entry( 10321b40f8c0SMatthew Dillon vm_map_t map, 10331b40f8c0SMatthew Dillon vm_offset_t address, 10341b40f8c0SMatthew Dillon vm_map_entry_t *entry) /* OUT */ 1035df8bae1dSRodney W. Grimes { 1036c0877f10SJohn Dyson vm_map_entry_t cur; 103705a8c414SAlan Cox boolean_t locked; 1038df8bae1dSRodney W. Grimes 10394c3ef59eSAlan Cox /* 10404c3ef59eSAlan Cox * If the map is empty, then the map entry immediately preceding 10414c3ef59eSAlan Cox * "address" is the map's header. 10424c3ef59eSAlan Cox */ 10434c3ef59eSAlan Cox cur = map->root; 10444e94f402SAlan Cox if (cur == NULL) 10454e94f402SAlan Cox *entry = &map->header; 10464c3ef59eSAlan Cox else if (address >= cur->start && cur->end > address) { 10474c3ef59eSAlan Cox *entry = cur; 10484c3ef59eSAlan Cox return (TRUE); 104905a8c414SAlan Cox } else if ((locked = vm_map_locked(map)) || 105005a8c414SAlan Cox sx_try_upgrade(&map->lock)) { 105105a8c414SAlan Cox /* 105205a8c414SAlan Cox * Splay requires a write lock on the map. However, it only 105305a8c414SAlan Cox * restructures the binary search tree; it does not otherwise 105405a8c414SAlan Cox * change the map. Thus, the map's timestamp need not change 105505a8c414SAlan Cox * on a temporary upgrade. 105605a8c414SAlan Cox */ 10574c3ef59eSAlan Cox map->root = cur = vm_map_entry_splay(address, cur); 105805a8c414SAlan Cox if (!locked) 105905a8c414SAlan Cox sx_downgrade(&map->lock); 1060df8bae1dSRodney W. Grimes 10614c3ef59eSAlan Cox /* 10624c3ef59eSAlan Cox * If "address" is contained within a map entry, the new root 10634c3ef59eSAlan Cox * is that map entry. Otherwise, the new root is a map entry 10644c3ef59eSAlan Cox * immediately before or after "address". 10654c3ef59eSAlan Cox */ 1066df8bae1dSRodney W. Grimes if (address >= cur->start) { 1067df8bae1dSRodney W. Grimes *entry = cur; 10684e94f402SAlan Cox if (cur->end > address) 1069df8bae1dSRodney W. Grimes return (TRUE); 10704e94f402SAlan Cox } else 1071df8bae1dSRodney W. Grimes *entry = cur->prev; 107205a8c414SAlan Cox } else 107305a8c414SAlan Cox /* 107405a8c414SAlan Cox * Since the map is only locked for read access, perform a 107505a8c414SAlan Cox * standard binary search tree lookup for "address". 107605a8c414SAlan Cox */ 107705a8c414SAlan Cox for (;;) { 107805a8c414SAlan Cox if (address < cur->start) { 107905a8c414SAlan Cox if (cur->left == NULL) { 108005a8c414SAlan Cox *entry = cur->prev; 108105a8c414SAlan Cox break; 108205a8c414SAlan Cox } 108305a8c414SAlan Cox cur = cur->left; 108405a8c414SAlan Cox } else if (cur->end > address) { 108505a8c414SAlan Cox *entry = cur; 108605a8c414SAlan Cox return (TRUE); 108705a8c414SAlan Cox } else { 108805a8c414SAlan Cox if (cur->right == NULL) { 108905a8c414SAlan Cox *entry = cur; 109005a8c414SAlan Cox break; 109105a8c414SAlan Cox } 109205a8c414SAlan Cox cur = cur->right; 109305a8c414SAlan Cox } 10944e94f402SAlan Cox } 1095df8bae1dSRodney W. Grimes return (FALSE); 1096df8bae1dSRodney W. Grimes } 1097df8bae1dSRodney W. Grimes 1098df8bae1dSRodney W. Grimes /* 109930dcfc09SJohn Dyson * vm_map_insert: 110030dcfc09SJohn Dyson * 110130dcfc09SJohn Dyson * Inserts the given whole VM object into the target 110230dcfc09SJohn Dyson * map at the specified address range. The object's 110330dcfc09SJohn Dyson * size should match that of the address range. 110430dcfc09SJohn Dyson * 110530dcfc09SJohn Dyson * Requires that the map be locked, and leaves it so. 11062aaeadf8SMatthew Dillon * 11072aaeadf8SMatthew Dillon * If object is non-NULL, ref count must be bumped by caller 11082aaeadf8SMatthew Dillon * prior to making call to account for the new entry. 110930dcfc09SJohn Dyson */ 111030dcfc09SJohn Dyson int 1111b9dcd593SBruce Evans vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1112b9dcd593SBruce Evans vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 1113b9dcd593SBruce Evans int cow) 111430dcfc09SJohn Dyson { 1115c0877f10SJohn Dyson vm_map_entry_t new_entry; 1116c0877f10SJohn Dyson vm_map_entry_t prev_entry; 111730dcfc09SJohn Dyson vm_map_entry_t temp_entry; 11189730a5daSPaul Saab vm_eflags_t protoeflags; 1119ef694c1aSEdward Tomasz Napierala struct ucred *cred; 11208211bd45SKonstantin Belousov vm_inherit_t inheritance; 11213364c323SKonstantin Belousov boolean_t charge_prev_obj; 112230dcfc09SJohn Dyson 11233a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 11243a0916b8SKonstantin Belousov 112530dcfc09SJohn Dyson /* 112630dcfc09SJohn Dyson * Check that the start and end points are not bogus. 112730dcfc09SJohn Dyson */ 112830dcfc09SJohn Dyson if ((start < map->min_offset) || (end > map->max_offset) || 112930dcfc09SJohn Dyson (start >= end)) 113030dcfc09SJohn Dyson return (KERN_INVALID_ADDRESS); 113130dcfc09SJohn Dyson 113230dcfc09SJohn Dyson /* 113330dcfc09SJohn Dyson * Find the entry prior to the proposed starting address; if it's part 113430dcfc09SJohn Dyson * of an existing entry, this range is bogus. 113530dcfc09SJohn Dyson */ 113630dcfc09SJohn Dyson if (vm_map_lookup_entry(map, start, &temp_entry)) 113730dcfc09SJohn Dyson return (KERN_NO_SPACE); 113830dcfc09SJohn Dyson 113930dcfc09SJohn Dyson prev_entry = temp_entry; 114030dcfc09SJohn Dyson 114130dcfc09SJohn Dyson /* 114230dcfc09SJohn Dyson * Assert that the next entry doesn't overlap the end point. 114330dcfc09SJohn Dyson */ 114430dcfc09SJohn Dyson if ((prev_entry->next != &map->header) && 114530dcfc09SJohn Dyson (prev_entry->next->start < end)) 114630dcfc09SJohn Dyson return (KERN_NO_SPACE); 114730dcfc09SJohn Dyson 1148afa07f7eSJohn Dyson protoeflags = 0; 11493364c323SKonstantin Belousov charge_prev_obj = FALSE; 1150afa07f7eSJohn Dyson 1151afa07f7eSJohn Dyson if (cow & MAP_COPY_ON_WRITE) 1152e5f13bddSAlan Cox protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; 1153afa07f7eSJohn Dyson 11544e045f93SAlan Cox if (cow & MAP_NOFAULT) { 1155afa07f7eSJohn Dyson protoeflags |= MAP_ENTRY_NOFAULT; 1156afa07f7eSJohn Dyson 11574e045f93SAlan Cox KASSERT(object == NULL, 11584e045f93SAlan Cox ("vm_map_insert: paradoxical MAP_NOFAULT request")); 11594e045f93SAlan Cox } 11604f79d873SMatthew Dillon if (cow & MAP_DISABLE_SYNCER) 11614f79d873SMatthew Dillon protoeflags |= MAP_ENTRY_NOSYNC; 11629730a5daSPaul Saab if (cow & MAP_DISABLE_COREDUMP) 11639730a5daSPaul Saab protoeflags |= MAP_ENTRY_NOCOREDUMP; 116484110e7eSKonstantin Belousov if (cow & MAP_VN_WRITECOUNT) 116584110e7eSKonstantin Belousov protoeflags |= MAP_ENTRY_VN_WRITECNT; 11668211bd45SKonstantin Belousov if (cow & MAP_INHERIT_SHARE) 11678211bd45SKonstantin Belousov inheritance = VM_INHERIT_SHARE; 11688211bd45SKonstantin Belousov else 11698211bd45SKonstantin Belousov inheritance = VM_INHERIT_DEFAULT; 11704f79d873SMatthew Dillon 1171ef694c1aSEdward Tomasz Napierala cred = NULL; 11723364c323SKonstantin Belousov KASSERT((object != kmem_object && object != kernel_object) || 11733364c323SKonstantin Belousov ((object == kmem_object || object == kernel_object) && 11743364c323SKonstantin Belousov !(protoeflags & MAP_ENTRY_NEEDS_COPY)), 11753364c323SKonstantin Belousov ("kmem or kernel object and cow")); 11763364c323SKonstantin Belousov if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT)) 11773364c323SKonstantin Belousov goto charged; 11783364c323SKonstantin Belousov if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && 11793364c323SKonstantin Belousov ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { 11803364c323SKonstantin Belousov if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) 11813364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 118241c22744SKonstantin Belousov KASSERT(object == NULL || (protoeflags & MAP_ENTRY_NEEDS_COPY) || 1183ef694c1aSEdward Tomasz Napierala object->cred == NULL, 11843364c323SKonstantin Belousov ("OVERCOMMIT: vm_map_insert o %p", object)); 1185ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 1186ef694c1aSEdward Tomasz Napierala crhold(cred); 11873364c323SKonstantin Belousov if (object == NULL && !(protoeflags & MAP_ENTRY_NEEDS_COPY)) 11883364c323SKonstantin Belousov charge_prev_obj = TRUE; 11893364c323SKonstantin Belousov } 11903364c323SKonstantin Belousov 11913364c323SKonstantin Belousov charged: 1192f8616ebfSAlan Cox /* Expand the kernel pmap, if necessary. */ 1193f8616ebfSAlan Cox if (map == kernel_map && end > kernel_vm_end) 1194f8616ebfSAlan Cox pmap_growkernel(end); 11951d284e00SAlan Cox if (object != NULL) { 119630dcfc09SJohn Dyson /* 11971d284e00SAlan Cox * OBJ_ONEMAPPING must be cleared unless this mapping 11981d284e00SAlan Cox * is trivially proven to be the only mapping for any 11991d284e00SAlan Cox * of the object's pages. (Object granularity 12001d284e00SAlan Cox * reference counting is insufficient to recognize 12011d284e00SAlan Cox * aliases with precision.) 120230dcfc09SJohn Dyson */ 120389f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 12041d284e00SAlan Cox if (object->ref_count > 1 || object->shadow_count != 0) 12052aaeadf8SMatthew Dillon vm_object_clear_flag(object, OBJ_ONEMAPPING); 120689f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 12074e045f93SAlan Cox } 12084e045f93SAlan Cox else if ((prev_entry != &map->header) && 12094e045f93SAlan Cox (prev_entry->eflags == protoeflags) && 12108cc7e047SJohn Dyson (prev_entry->end == start) && 12114e045f93SAlan Cox (prev_entry->wired_count == 0) && 1212ef694c1aSEdward Tomasz Napierala (prev_entry->cred == cred || 12133364c323SKonstantin Belousov (prev_entry->object.vm_object != NULL && 1214ef694c1aSEdward Tomasz Napierala (prev_entry->object.vm_object->cred == cred))) && 12158cc7e047SJohn Dyson vm_object_coalesce(prev_entry->object.vm_object, 121657a21abaSAlan Cox prev_entry->offset, 12178cc7e047SJohn Dyson (vm_size_t)(prev_entry->end - prev_entry->start), 12183364c323SKonstantin Belousov (vm_size_t)(end - prev_entry->end), charge_prev_obj)) { 121930dcfc09SJohn Dyson /* 12202aaeadf8SMatthew Dillon * We were able to extend the object. Determine if we 12212aaeadf8SMatthew Dillon * can extend the previous map entry to include the 12222aaeadf8SMatthew Dillon * new range as well. 122330dcfc09SJohn Dyson */ 12248211bd45SKonstantin Belousov if ((prev_entry->inheritance == inheritance) && 12258cc7e047SJohn Dyson (prev_entry->protection == prot) && 12268cc7e047SJohn Dyson (prev_entry->max_protection == max)) { 122730dcfc09SJohn Dyson map->size += (end - prev_entry->end); 122830dcfc09SJohn Dyson prev_entry->end = end; 12290164e057SAlan Cox vm_map_entry_resize_free(map, prev_entry); 12304e71e795SMatthew Dillon vm_map_simplify_entry(map, prev_entry); 1231ef694c1aSEdward Tomasz Napierala if (cred != NULL) 1232ef694c1aSEdward Tomasz Napierala crfree(cred); 123330dcfc09SJohn Dyson return (KERN_SUCCESS); 123430dcfc09SJohn Dyson } 12358cc7e047SJohn Dyson 12362aaeadf8SMatthew Dillon /* 12372aaeadf8SMatthew Dillon * If we can extend the object but cannot extend the 12382aaeadf8SMatthew Dillon * map entry, we have to create a new map entry. We 12392aaeadf8SMatthew Dillon * must bump the ref count on the extended object to 12404e71e795SMatthew Dillon * account for it. object may be NULL. 12412aaeadf8SMatthew Dillon */ 12422aaeadf8SMatthew Dillon object = prev_entry->object.vm_object; 12432aaeadf8SMatthew Dillon offset = prev_entry->offset + 12442aaeadf8SMatthew Dillon (prev_entry->end - prev_entry->start); 12458cc7e047SJohn Dyson vm_object_reference(object); 1246ef694c1aSEdward Tomasz Napierala if (cred != NULL && object != NULL && object->cred != NULL && 12473364c323SKonstantin Belousov !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 12483364c323SKonstantin Belousov /* Object already accounts for this uid. */ 1249ef694c1aSEdward Tomasz Napierala crfree(cred); 1250ef694c1aSEdward Tomasz Napierala cred = NULL; 12513364c323SKonstantin Belousov } 1252b18bfc3dSJohn Dyson } 12532aaeadf8SMatthew Dillon 12542aaeadf8SMatthew Dillon /* 12552aaeadf8SMatthew Dillon * NOTE: if conditionals fail, object can be NULL here. This occurs 12562aaeadf8SMatthew Dillon * in things like the buffer map where we manage kva but do not manage 12572aaeadf8SMatthew Dillon * backing objects. 12582aaeadf8SMatthew Dillon */ 12598cc7e047SJohn Dyson 126030dcfc09SJohn Dyson /* 126130dcfc09SJohn Dyson * Create a new entry 126230dcfc09SJohn Dyson */ 126330dcfc09SJohn Dyson new_entry = vm_map_entry_create(map); 126430dcfc09SJohn Dyson new_entry->start = start; 126530dcfc09SJohn Dyson new_entry->end = end; 1266ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 126730dcfc09SJohn Dyson 1268afa07f7eSJohn Dyson new_entry->eflags = protoeflags; 126930dcfc09SJohn Dyson new_entry->object.vm_object = object; 127030dcfc09SJohn Dyson new_entry->offset = offset; 12712267af78SJulian Elischer new_entry->avail_ssize = 0; 12722267af78SJulian Elischer 12738211bd45SKonstantin Belousov new_entry->inheritance = inheritance; 127430dcfc09SJohn Dyson new_entry->protection = prot; 127530dcfc09SJohn Dyson new_entry->max_protection = max; 127630dcfc09SJohn Dyson new_entry->wired_count = 0; 127713458803SAlan Cox new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; 127813458803SAlan Cox new_entry->next_read = OFF_TO_IDX(offset); 1279e5f251d2SAlan Cox 1280ef694c1aSEdward Tomasz Napierala KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry), 12813364c323SKonstantin Belousov ("OVERCOMMIT: vm_map_insert leaks vm_map %p", new_entry)); 1282ef694c1aSEdward Tomasz Napierala new_entry->cred = cred; 12833364c323SKonstantin Belousov 128430dcfc09SJohn Dyson /* 128530dcfc09SJohn Dyson * Insert the new entry into the list 128630dcfc09SJohn Dyson */ 128730dcfc09SJohn Dyson vm_map_entry_link(map, prev_entry, new_entry); 128830dcfc09SJohn Dyson map->size += new_entry->end - new_entry->start; 128930dcfc09SJohn Dyson 12901a484d28SMatthew Dillon /* 1291d2a444c0SAlan Cox * It may be possible to merge the new entry with the next and/or 1292d2a444c0SAlan Cox * previous entries. However, due to MAP_STACK_* being a hack, a 1293d2a444c0SAlan Cox * panic can result from merging such entries. 12941a484d28SMatthew Dillon */ 1295d2a444c0SAlan Cox if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0) 12964e71e795SMatthew Dillon vm_map_simplify_entry(map, new_entry); 12974e71e795SMatthew Dillon 12984f79d873SMatthew Dillon if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { 12994da4d293SAlan Cox vm_map_pmap_enter(map, start, prot, 1300e972780aSAlan Cox object, OFF_TO_IDX(offset), end - start, 1301e972780aSAlan Cox cow & MAP_PREFAULT_PARTIAL); 13024f79d873SMatthew Dillon } 1303e972780aSAlan Cox 130430dcfc09SJohn Dyson return (KERN_SUCCESS); 130530dcfc09SJohn Dyson } 130630dcfc09SJohn Dyson 130730dcfc09SJohn Dyson /* 13080164e057SAlan Cox * vm_map_findspace: 13090164e057SAlan Cox * 13100164e057SAlan Cox * Find the first fit (lowest VM address) for "length" free bytes 13110164e057SAlan Cox * beginning at address >= start in the given map. 13120164e057SAlan Cox * 13130164e057SAlan Cox * In a vm_map_entry, "adj_free" is the amount of free space 13140164e057SAlan Cox * adjacent (higher address) to this entry, and "max_free" is the 13150164e057SAlan Cox * maximum amount of contiguous free space in its subtree. This 13160164e057SAlan Cox * allows finding a free region in one path down the tree, so 13170164e057SAlan Cox * O(log n) amortized with splay trees. 13180164e057SAlan Cox * 13190164e057SAlan Cox * The map must be locked, and leaves it so. 13200164e057SAlan Cox * 13210164e057SAlan Cox * Returns: 0 on success, and starting address in *addr, 13220164e057SAlan Cox * 1 if insufficient space. 1323df8bae1dSRodney W. Grimes */ 1324df8bae1dSRodney W. Grimes int 13250164e057SAlan Cox vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, 13260164e057SAlan Cox vm_offset_t *addr) /* OUT */ 1327df8bae1dSRodney W. Grimes { 13280164e057SAlan Cox vm_map_entry_t entry; 1329f8616ebfSAlan Cox vm_offset_t st; 1330df8bae1dSRodney W. Grimes 1331986b43f8SAlan Cox /* 1332986b43f8SAlan Cox * Request must fit within min/max VM address and must avoid 1333986b43f8SAlan Cox * address wrap. 1334986b43f8SAlan Cox */ 1335df8bae1dSRodney W. Grimes if (start < map->min_offset) 1336df8bae1dSRodney W. Grimes start = map->min_offset; 1337986b43f8SAlan Cox if (start + length > map->max_offset || start + length < start) 1338df8bae1dSRodney W. Grimes return (1); 1339df8bae1dSRodney W. Grimes 13400164e057SAlan Cox /* Empty tree means wide open address space. */ 13410164e057SAlan Cox if (map->root == NULL) { 1342df8bae1dSRodney W. Grimes *addr = start; 1343f8616ebfSAlan Cox return (0); 134499448ed1SJohn Dyson } 13450164e057SAlan Cox 13460164e057SAlan Cox /* 13470164e057SAlan Cox * After splay, if start comes before root node, then there 13480164e057SAlan Cox * must be a gap from start to the root. 13490164e057SAlan Cox */ 13500164e057SAlan Cox map->root = vm_map_entry_splay(start, map->root); 13510164e057SAlan Cox if (start + length <= map->root->start) { 13520164e057SAlan Cox *addr = start; 1353f8616ebfSAlan Cox return (0); 13540164e057SAlan Cox } 13550164e057SAlan Cox 13560164e057SAlan Cox /* 13570164e057SAlan Cox * Root is the last node that might begin its gap before 1358986b43f8SAlan Cox * start, and this is the last comparison where address 1359986b43f8SAlan Cox * wrap might be a problem. 13600164e057SAlan Cox */ 13610164e057SAlan Cox st = (start > map->root->end) ? start : map->root->end; 1362986b43f8SAlan Cox if (length <= map->root->end + map->root->adj_free - st) { 13630164e057SAlan Cox *addr = st; 1364f8616ebfSAlan Cox return (0); 13650164e057SAlan Cox } 13660164e057SAlan Cox 13670164e057SAlan Cox /* With max_free, can immediately tell if no solution. */ 13680164e057SAlan Cox entry = map->root->right; 13690164e057SAlan Cox if (entry == NULL || length > entry->max_free) 13700164e057SAlan Cox return (1); 13710164e057SAlan Cox 13720164e057SAlan Cox /* 13730164e057SAlan Cox * Search the right subtree in the order: left subtree, root, 13740164e057SAlan Cox * right subtree (first fit). The previous splay implies that 13750164e057SAlan Cox * all regions in the right subtree have addresses > start. 13760164e057SAlan Cox */ 13770164e057SAlan Cox while (entry != NULL) { 13780164e057SAlan Cox if (entry->left != NULL && entry->left->max_free >= length) 13790164e057SAlan Cox entry = entry->left; 13800164e057SAlan Cox else if (entry->adj_free >= length) { 13810164e057SAlan Cox *addr = entry->end; 1382f8616ebfSAlan Cox return (0); 13830164e057SAlan Cox } else 13840164e057SAlan Cox entry = entry->right; 13850164e057SAlan Cox } 13860164e057SAlan Cox 13870164e057SAlan Cox /* Can't get here, so panic if we do. */ 13880164e057SAlan Cox panic("vm_map_findspace: max_free corrupt"); 1389df8bae1dSRodney W. Grimes } 1390df8bae1dSRodney W. Grimes 1391d239bd3cSKonstantin Belousov int 1392d239bd3cSKonstantin Belousov vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1393b8ca4ef2SAlan Cox vm_offset_t start, vm_size_t length, vm_prot_t prot, 1394d239bd3cSKonstantin Belousov vm_prot_t max, int cow) 1395d239bd3cSKonstantin Belousov { 1396b8ca4ef2SAlan Cox vm_offset_t end; 1397d239bd3cSKonstantin Belousov int result; 1398d239bd3cSKonstantin Belousov 1399d239bd3cSKonstantin Belousov end = start + length; 1400897d81a0SKonstantin Belousov vm_map_lock(map); 1401d239bd3cSKonstantin Belousov VM_MAP_RANGE_CHECK(map, start, end); 1402655c3490SKonstantin Belousov (void) vm_map_delete(map, start, end); 1403d239bd3cSKonstantin Belousov result = vm_map_insert(map, object, offset, start, end, prot, 1404d239bd3cSKonstantin Belousov max, cow); 1405d239bd3cSKonstantin Belousov vm_map_unlock(map); 1406d239bd3cSKonstantin Belousov return (result); 1407d239bd3cSKonstantin Belousov } 1408d239bd3cSKonstantin Belousov 1409df8bae1dSRodney W. Grimes /* 1410df8bae1dSRodney W. Grimes * vm_map_find finds an unallocated region in the target address 1411df8bae1dSRodney W. Grimes * map with the given length. The search is defined to be 1412df8bae1dSRodney W. Grimes * first-fit from the specified address; the region found is 1413df8bae1dSRodney W. Grimes * returned in the same parameter. 1414df8bae1dSRodney W. Grimes * 14152aaeadf8SMatthew Dillon * If object is non-NULL, ref count must be bumped by caller 14162aaeadf8SMatthew Dillon * prior to making call to account for the new entry. 1417df8bae1dSRodney W. Grimes */ 1418df8bae1dSRodney W. Grimes int 1419b9dcd593SBruce Evans vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1420b9dcd593SBruce Evans vm_offset_t *addr, /* IN/OUT */ 1421edb572a3SJohn Baldwin vm_size_t length, vm_offset_t max_addr, int find_space, 1422edb572a3SJohn Baldwin vm_prot_t prot, vm_prot_t max, int cow) 1423df8bae1dSRodney W. Grimes { 14245aa60b6fSJohn Baldwin vm_offset_t alignment, initial_addr, start; 14256eaee3feSAlan Cox int result; 1426df8bae1dSRodney W. Grimes 1427ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || 1428ff74a3faSJohn Baldwin (object->flags & OBJ_COLORED) == 0)) 1429ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 14305aa60b6fSJohn Baldwin if (find_space >> 8 != 0) { 14315aa60b6fSJohn Baldwin KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); 14325aa60b6fSJohn Baldwin alignment = (vm_offset_t)1 << (find_space >> 8); 14335aa60b6fSJohn Baldwin } else 14345aa60b6fSJohn Baldwin alignment = 0; 1435ff74a3faSJohn Baldwin initial_addr = *addr; 1436ff74a3faSJohn Baldwin again: 1437ff74a3faSJohn Baldwin start = initial_addr; 1438bea41bcfSDavid Greenman vm_map_lock(map); 143926c538ffSAlan Cox do { 144026c538ffSAlan Cox if (find_space != VMFS_NO_SPACE) { 1441edb572a3SJohn Baldwin if (vm_map_findspace(map, start, length, addr) || 1442edb572a3SJohn Baldwin (max_addr != 0 && *addr + length > max_addr)) { 1443df8bae1dSRodney W. Grimes vm_map_unlock(map); 1444ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE) { 1445ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 1446ff74a3faSJohn Baldwin goto again; 1447ff74a3faSJohn Baldwin } 1448df8bae1dSRodney W. Grimes return (KERN_NO_SPACE); 1449df8bae1dSRodney W. Grimes } 1450ca596a25SJuli Mallett switch (find_space) { 14515aa60b6fSJohn Baldwin case VMFS_SUPER_SPACE: 1452ff74a3faSJohn Baldwin case VMFS_OPTIMAL_SPACE: 145326c538ffSAlan Cox pmap_align_superpage(object, offset, addr, 145426c538ffSAlan Cox length); 1455ca596a25SJuli Mallett break; 14565aa60b6fSJohn Baldwin case VMFS_ANY_SPACE: 14575aa60b6fSJohn Baldwin break; 1458ca596a25SJuli Mallett default: 14595aa60b6fSJohn Baldwin if ((*addr & (alignment - 1)) != 0) { 14605aa60b6fSJohn Baldwin *addr &= ~(alignment - 1); 14615aa60b6fSJohn Baldwin *addr += alignment; 14625aa60b6fSJohn Baldwin } 1463ca596a25SJuli Mallett break; 1464ca596a25SJuli Mallett } 1465ca596a25SJuli Mallett 1466df8bae1dSRodney W. Grimes start = *addr; 1467df8bae1dSRodney W. Grimes } 146826c538ffSAlan Cox result = vm_map_insert(map, object, offset, start, start + 146926c538ffSAlan Cox length, prot, max, cow); 14705aa60b6fSJohn Baldwin } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE && 14715aa60b6fSJohn Baldwin find_space != VMFS_ANY_SPACE); 1472df8bae1dSRodney W. Grimes vm_map_unlock(map); 1473df8bae1dSRodney W. Grimes return (result); 1474df8bae1dSRodney W. Grimes } 1475df8bae1dSRodney W. Grimes 1476df8bae1dSRodney W. Grimes /* 1477b7b2aac2SJohn Dyson * vm_map_simplify_entry: 147867bf6868SJohn Dyson * 14794e71e795SMatthew Dillon * Simplify the given map entry by merging with either neighbor. This 14804e71e795SMatthew Dillon * routine also has the ability to merge with both neighbors. 14814e71e795SMatthew Dillon * 14824e71e795SMatthew Dillon * The map must be locked. 14834e71e795SMatthew Dillon * 14844e71e795SMatthew Dillon * This routine guarentees that the passed entry remains valid (though 14854e71e795SMatthew Dillon * possibly extended). When merging, this routine may delete one or 14864e71e795SMatthew Dillon * both neighbors. 1487df8bae1dSRodney W. Grimes */ 1488b7b2aac2SJohn Dyson void 14891b40f8c0SMatthew Dillon vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) 1490df8bae1dSRodney W. Grimes { 1491308c24baSJohn Dyson vm_map_entry_t next, prev; 1492b7b2aac2SJohn Dyson vm_size_t prevsize, esize; 1493df8bae1dSRodney W. Grimes 1494acd9a301SAlan Cox if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) 1495df8bae1dSRodney W. Grimes return; 1496308c24baSJohn Dyson 1497308c24baSJohn Dyson prev = entry->prev; 1498308c24baSJohn Dyson if (prev != &map->header) { 149967bf6868SJohn Dyson prevsize = prev->end - prev->start; 150067bf6868SJohn Dyson if ( (prev->end == entry->start) && 150167bf6868SJohn Dyson (prev->object.vm_object == entry->object.vm_object) && 150295e5e988SJohn Dyson (!prev->object.vm_object || 150367bf6868SJohn Dyson (prev->offset + prevsize == entry->offset)) && 1504afa07f7eSJohn Dyson (prev->eflags == entry->eflags) && 150567bf6868SJohn Dyson (prev->protection == entry->protection) && 150667bf6868SJohn Dyson (prev->max_protection == entry->max_protection) && 150767bf6868SJohn Dyson (prev->inheritance == entry->inheritance) && 15083364c323SKonstantin Belousov (prev->wired_count == entry->wired_count) && 1509ef694c1aSEdward Tomasz Napierala (prev->cred == entry->cred)) { 1510308c24baSJohn Dyson vm_map_entry_unlink(map, prev); 1511308c24baSJohn Dyson entry->start = prev->start; 1512308c24baSJohn Dyson entry->offset = prev->offset; 15130164e057SAlan Cox if (entry->prev != &map->header) 15140164e057SAlan Cox vm_map_entry_resize_free(map, entry->prev); 15157fd10fb3SKonstantin Belousov 15167fd10fb3SKonstantin Belousov /* 1517b0994946SKonstantin Belousov * If the backing object is a vnode object, 1518b0994946SKonstantin Belousov * vm_object_deallocate() calls vrele(). 1519b0994946SKonstantin Belousov * However, vrele() does not lock the vnode 1520b0994946SKonstantin Belousov * because the vnode has additional 1521b0994946SKonstantin Belousov * references. Thus, the map lock can be kept 1522b0994946SKonstantin Belousov * without causing a lock-order reversal with 1523b0994946SKonstantin Belousov * the vnode lock. 152484110e7eSKonstantin Belousov * 152584110e7eSKonstantin Belousov * Since we count the number of virtual page 152684110e7eSKonstantin Belousov * mappings in object->un_pager.vnp.writemappings, 152784110e7eSKonstantin Belousov * the writemappings value should not be adjusted 152884110e7eSKonstantin Belousov * when the entry is disposed of. 15297fd10fb3SKonstantin Belousov */ 1530b18bfc3dSJohn Dyson if (prev->object.vm_object) 1531308c24baSJohn Dyson vm_object_deallocate(prev->object.vm_object); 1532ef694c1aSEdward Tomasz Napierala if (prev->cred != NULL) 1533ef694c1aSEdward Tomasz Napierala crfree(prev->cred); 1534308c24baSJohn Dyson vm_map_entry_dispose(map, prev); 1535308c24baSJohn Dyson } 1536308c24baSJohn Dyson } 1537de5f6a77SJohn Dyson 1538de5f6a77SJohn Dyson next = entry->next; 1539308c24baSJohn Dyson if (next != &map->header) { 154067bf6868SJohn Dyson esize = entry->end - entry->start; 154167bf6868SJohn Dyson if ((entry->end == next->start) && 154267bf6868SJohn Dyson (next->object.vm_object == entry->object.vm_object) && 154367bf6868SJohn Dyson (!entry->object.vm_object || 154467bf6868SJohn Dyson (entry->offset + esize == next->offset)) && 1545afa07f7eSJohn Dyson (next->eflags == entry->eflags) && 154667bf6868SJohn Dyson (next->protection == entry->protection) && 154767bf6868SJohn Dyson (next->max_protection == entry->max_protection) && 154867bf6868SJohn Dyson (next->inheritance == entry->inheritance) && 15493364c323SKonstantin Belousov (next->wired_count == entry->wired_count) && 1550ef694c1aSEdward Tomasz Napierala (next->cred == entry->cred)) { 1551de5f6a77SJohn Dyson vm_map_entry_unlink(map, next); 1552de5f6a77SJohn Dyson entry->end = next->end; 15530164e057SAlan Cox vm_map_entry_resize_free(map, entry); 15547fd10fb3SKonstantin Belousov 15557fd10fb3SKonstantin Belousov /* 15567fd10fb3SKonstantin Belousov * See comment above. 15577fd10fb3SKonstantin Belousov */ 1558b18bfc3dSJohn Dyson if (next->object.vm_object) 1559de5f6a77SJohn Dyson vm_object_deallocate(next->object.vm_object); 1560ef694c1aSEdward Tomasz Napierala if (next->cred != NULL) 1561ef694c1aSEdward Tomasz Napierala crfree(next->cred); 1562de5f6a77SJohn Dyson vm_map_entry_dispose(map, next); 1563df8bae1dSRodney W. Grimes } 1564df8bae1dSRodney W. Grimes } 1565de5f6a77SJohn Dyson } 1566df8bae1dSRodney W. Grimes /* 1567df8bae1dSRodney W. Grimes * vm_map_clip_start: [ internal use only ] 1568df8bae1dSRodney W. Grimes * 1569df8bae1dSRodney W. Grimes * Asserts that the given entry begins at or after 1570df8bae1dSRodney W. Grimes * the specified address; if necessary, 1571df8bae1dSRodney W. Grimes * it splits the entry into two. 1572df8bae1dSRodney W. Grimes */ 1573df8bae1dSRodney W. Grimes #define vm_map_clip_start(map, entry, startaddr) \ 1574df8bae1dSRodney W. Grimes { \ 1575df8bae1dSRodney W. Grimes if (startaddr > entry->start) \ 1576df8bae1dSRodney W. Grimes _vm_map_clip_start(map, entry, startaddr); \ 1577df8bae1dSRodney W. Grimes } 1578df8bae1dSRodney W. Grimes 1579df8bae1dSRodney W. Grimes /* 1580df8bae1dSRodney W. Grimes * This routine is called only when it is known that 1581df8bae1dSRodney W. Grimes * the entry must be split. 1582df8bae1dSRodney W. Grimes */ 15830d94caffSDavid Greenman static void 15841b40f8c0SMatthew Dillon _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) 1585df8bae1dSRodney W. Grimes { 1586c0877f10SJohn Dyson vm_map_entry_t new_entry; 1587df8bae1dSRodney W. Grimes 15883a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 15893a0916b8SKonstantin Belousov 1590df8bae1dSRodney W. Grimes /* 15910d94caffSDavid Greenman * Split off the front portion -- note that we must insert the new 15920d94caffSDavid Greenman * entry BEFORE this one, so that this entry has the specified 15930d94caffSDavid Greenman * starting address. 1594df8bae1dSRodney W. Grimes */ 1595f32dbbeeSJohn Dyson vm_map_simplify_entry(map, entry); 1596f32dbbeeSJohn Dyson 159711cccda1SJohn Dyson /* 159811cccda1SJohn Dyson * If there is no object backing this entry, we might as well create 159911cccda1SJohn Dyson * one now. If we defer it, an object can get created after the map 160011cccda1SJohn Dyson * is clipped, and individual objects will be created for the split-up 160111cccda1SJohn Dyson * map. This is a bit of a hack, but is also about the best place to 160211cccda1SJohn Dyson * put this improvement. 160311cccda1SJohn Dyson */ 16044e71e795SMatthew Dillon if (entry->object.vm_object == NULL && !map->system_map) { 160511cccda1SJohn Dyson vm_object_t object; 160611cccda1SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 1607c2e11a03SJohn Dyson atop(entry->end - entry->start)); 160811cccda1SJohn Dyson entry->object.vm_object = object; 160911cccda1SJohn Dyson entry->offset = 0; 1610ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 1611ef694c1aSEdward Tomasz Napierala object->cred = entry->cred; 16123364c323SKonstantin Belousov object->charge = entry->end - entry->start; 1613ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 16143364c323SKonstantin Belousov } 16153364c323SKonstantin Belousov } else if (entry->object.vm_object != NULL && 16163364c323SKonstantin Belousov ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 1617ef694c1aSEdward Tomasz Napierala entry->cred != NULL) { 161889f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 1619ef694c1aSEdward Tomasz Napierala KASSERT(entry->object.vm_object->cred == NULL, 1620ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry)); 1621ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 16223364c323SKonstantin Belousov entry->object.vm_object->charge = entry->end - entry->start; 162389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 1624ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 162511cccda1SJohn Dyson } 162611cccda1SJohn Dyson 1627df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(map); 1628df8bae1dSRodney W. Grimes *new_entry = *entry; 1629df8bae1dSRodney W. Grimes 1630df8bae1dSRodney W. Grimes new_entry->end = start; 1631df8bae1dSRodney W. Grimes entry->offset += (start - entry->start); 1632df8bae1dSRodney W. Grimes entry->start = start; 1633ef694c1aSEdward Tomasz Napierala if (new_entry->cred != NULL) 1634ef694c1aSEdward Tomasz Napierala crhold(entry->cred); 1635df8bae1dSRodney W. Grimes 1636df8bae1dSRodney W. Grimes vm_map_entry_link(map, entry->prev, new_entry); 1637df8bae1dSRodney W. Grimes 16389fdfe602SMatthew Dillon if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1639df8bae1dSRodney W. Grimes vm_object_reference(new_entry->object.vm_object); 164084110e7eSKonstantin Belousov /* 164184110e7eSKonstantin Belousov * The object->un_pager.vnp.writemappings for the 164284110e7eSKonstantin Belousov * object of MAP_ENTRY_VN_WRITECNT type entry shall be 164384110e7eSKonstantin Belousov * kept as is here. The virtual pages are 164484110e7eSKonstantin Belousov * re-distributed among the clipped entries, so the sum is 164584110e7eSKonstantin Belousov * left the same. 164684110e7eSKonstantin Belousov */ 1647df8bae1dSRodney W. Grimes } 1648c0877f10SJohn Dyson } 1649df8bae1dSRodney W. Grimes 1650df8bae1dSRodney W. Grimes /* 1651df8bae1dSRodney W. Grimes * vm_map_clip_end: [ internal use only ] 1652df8bae1dSRodney W. Grimes * 1653df8bae1dSRodney W. Grimes * Asserts that the given entry ends at or before 1654df8bae1dSRodney W. Grimes * the specified address; if necessary, 1655df8bae1dSRodney W. Grimes * it splits the entry into two. 1656df8bae1dSRodney W. Grimes */ 1657df8bae1dSRodney W. Grimes #define vm_map_clip_end(map, entry, endaddr) \ 1658df8bae1dSRodney W. Grimes { \ 1659af045176SPoul-Henning Kamp if ((endaddr) < (entry->end)) \ 1660af045176SPoul-Henning Kamp _vm_map_clip_end((map), (entry), (endaddr)); \ 1661df8bae1dSRodney W. Grimes } 1662df8bae1dSRodney W. Grimes 1663df8bae1dSRodney W. Grimes /* 1664df8bae1dSRodney W. Grimes * This routine is called only when it is known that 1665df8bae1dSRodney W. Grimes * the entry must be split. 1666df8bae1dSRodney W. Grimes */ 16670d94caffSDavid Greenman static void 16681b40f8c0SMatthew Dillon _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) 1669df8bae1dSRodney W. Grimes { 1670c0877f10SJohn Dyson vm_map_entry_t new_entry; 1671df8bae1dSRodney W. Grimes 16723a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 16733a0916b8SKonstantin Belousov 1674df8bae1dSRodney W. Grimes /* 167511cccda1SJohn Dyson * If there is no object backing this entry, we might as well create 167611cccda1SJohn Dyson * one now. If we defer it, an object can get created after the map 167711cccda1SJohn Dyson * is clipped, and individual objects will be created for the split-up 167811cccda1SJohn Dyson * map. This is a bit of a hack, but is also about the best place to 167911cccda1SJohn Dyson * put this improvement. 168011cccda1SJohn Dyson */ 16814e71e795SMatthew Dillon if (entry->object.vm_object == NULL && !map->system_map) { 168211cccda1SJohn Dyson vm_object_t object; 168311cccda1SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 1684c2e11a03SJohn Dyson atop(entry->end - entry->start)); 168511cccda1SJohn Dyson entry->object.vm_object = object; 168611cccda1SJohn Dyson entry->offset = 0; 1687ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 1688ef694c1aSEdward Tomasz Napierala object->cred = entry->cred; 16893364c323SKonstantin Belousov object->charge = entry->end - entry->start; 1690ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 16913364c323SKonstantin Belousov } 16923364c323SKonstantin Belousov } else if (entry->object.vm_object != NULL && 16933364c323SKonstantin Belousov ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 1694ef694c1aSEdward Tomasz Napierala entry->cred != NULL) { 169589f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 1696ef694c1aSEdward Tomasz Napierala KASSERT(entry->object.vm_object->cred == NULL, 1697ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry)); 1698ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 16993364c323SKonstantin Belousov entry->object.vm_object->charge = entry->end - entry->start; 170089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 1701ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 170211cccda1SJohn Dyson } 170311cccda1SJohn Dyson 170411cccda1SJohn Dyson /* 17050d94caffSDavid Greenman * Create a new entry and insert it AFTER the specified entry 1706df8bae1dSRodney W. Grimes */ 1707df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(map); 1708df8bae1dSRodney W. Grimes *new_entry = *entry; 1709df8bae1dSRodney W. Grimes 1710df8bae1dSRodney W. Grimes new_entry->start = entry->end = end; 1711df8bae1dSRodney W. Grimes new_entry->offset += (end - entry->start); 1712ef694c1aSEdward Tomasz Napierala if (new_entry->cred != NULL) 1713ef694c1aSEdward Tomasz Napierala crhold(entry->cred); 1714df8bae1dSRodney W. Grimes 1715df8bae1dSRodney W. Grimes vm_map_entry_link(map, entry, new_entry); 1716df8bae1dSRodney W. Grimes 17179fdfe602SMatthew Dillon if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1718df8bae1dSRodney W. Grimes vm_object_reference(new_entry->object.vm_object); 1719df8bae1dSRodney W. Grimes } 1720c0877f10SJohn Dyson } 1721df8bae1dSRodney W. Grimes 1722df8bae1dSRodney W. Grimes /* 1723df8bae1dSRodney W. Grimes * vm_map_submap: [ kernel use only ] 1724df8bae1dSRodney W. Grimes * 1725df8bae1dSRodney W. Grimes * Mark the given range as handled by a subordinate map. 1726df8bae1dSRodney W. Grimes * 1727df8bae1dSRodney W. Grimes * This range must have been created with vm_map_find, 1728df8bae1dSRodney W. Grimes * and no other operations may have been performed on this 1729df8bae1dSRodney W. Grimes * range prior to calling vm_map_submap. 1730df8bae1dSRodney W. Grimes * 1731df8bae1dSRodney W. Grimes * Only a limited number of operations can be performed 1732df8bae1dSRodney W. Grimes * within this rage after calling vm_map_submap: 1733df8bae1dSRodney W. Grimes * vm_fault 1734df8bae1dSRodney W. Grimes * [Don't try vm_map_copy!] 1735df8bae1dSRodney W. Grimes * 1736df8bae1dSRodney W. Grimes * To remove a submapping, one must first remove the 1737df8bae1dSRodney W. Grimes * range from the superior map, and then destroy the 1738df8bae1dSRodney W. Grimes * submap (if desired). [Better yet, don't try it.] 1739df8bae1dSRodney W. Grimes */ 1740df8bae1dSRodney W. Grimes int 17411b40f8c0SMatthew Dillon vm_map_submap( 17421b40f8c0SMatthew Dillon vm_map_t map, 17431b40f8c0SMatthew Dillon vm_offset_t start, 17441b40f8c0SMatthew Dillon vm_offset_t end, 17451b40f8c0SMatthew Dillon vm_map_t submap) 1746df8bae1dSRodney W. Grimes { 1747df8bae1dSRodney W. Grimes vm_map_entry_t entry; 1748c0877f10SJohn Dyson int result = KERN_INVALID_ARGUMENT; 1749df8bae1dSRodney W. Grimes 1750df8bae1dSRodney W. Grimes vm_map_lock(map); 1751df8bae1dSRodney W. Grimes 1752df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 1753df8bae1dSRodney W. Grimes 1754df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &entry)) { 1755df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 17560d94caffSDavid Greenman } else 1757df8bae1dSRodney W. Grimes entry = entry->next; 1758df8bae1dSRodney W. Grimes 1759df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 1760df8bae1dSRodney W. Grimes 1761df8bae1dSRodney W. Grimes if ((entry->start == start) && (entry->end == end) && 17629fdfe602SMatthew Dillon ((entry->eflags & MAP_ENTRY_COW) == 0) && 1763afa07f7eSJohn Dyson (entry->object.vm_object == NULL)) { 17642d8acc0fSJohn Dyson entry->object.sub_map = submap; 1765afa07f7eSJohn Dyson entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 1766df8bae1dSRodney W. Grimes result = KERN_SUCCESS; 1767df8bae1dSRodney W. Grimes } 1768df8bae1dSRodney W. Grimes vm_map_unlock(map); 1769df8bae1dSRodney W. Grimes 1770df8bae1dSRodney W. Grimes return (result); 1771df8bae1dSRodney W. Grimes } 1772df8bae1dSRodney W. Grimes 1773df8bae1dSRodney W. Grimes /* 17741f78f902SAlan Cox * The maximum number of pages to map 17751f78f902SAlan Cox */ 17761f78f902SAlan Cox #define MAX_INIT_PT 96 17771f78f902SAlan Cox 17781f78f902SAlan Cox /* 17790551c08dSAlan Cox * vm_map_pmap_enter: 17800551c08dSAlan Cox * 1781a922d312SAlan Cox * Preload read-only mappings for the specified object's resident pages 1782a922d312SAlan Cox * into the target map. If "flags" is MAP_PREFAULT_PARTIAL, then only 1783a922d312SAlan Cox * the resident pages within the address range [addr, addr + ulmin(size, 1784a922d312SAlan Cox * ptoa(MAX_INIT_PT))) are mapped. Otherwise, all resident pages within 1785a922d312SAlan Cox * the specified address range are mapped. This eliminates many soft 1786a922d312SAlan Cox * faults on process startup and immediately after an mmap(2). Because 1787a922d312SAlan Cox * these are speculative mappings, cached pages are not reactivated and 1788a922d312SAlan Cox * mapped. 17890551c08dSAlan Cox */ 17900551c08dSAlan Cox void 17914da4d293SAlan Cox vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 17920551c08dSAlan Cox vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) 17930551c08dSAlan Cox { 17948fece8c3SAlan Cox vm_offset_t start; 1795ce142d9eSAlan Cox vm_page_t p, p_start; 17968fece8c3SAlan Cox vm_pindex_t psize, tmpidx; 17970551c08dSAlan Cox 1798ba8bca61SAlan Cox if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) 17991f78f902SAlan Cox return; 18009af6d512SAttilio Rao VM_OBJECT_RLOCK(object); 18019af6d512SAttilio Rao if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 18029af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 180389f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 180401381811SJohn Baldwin if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 18059af6d512SAttilio Rao pmap_object_init_pt(map->pmap, addr, object, pindex, 18069af6d512SAttilio Rao size); 18079af6d512SAttilio Rao VM_OBJECT_WUNLOCK(object); 18089af6d512SAttilio Rao return; 18099af6d512SAttilio Rao } 18109af6d512SAttilio Rao VM_OBJECT_LOCK_DOWNGRADE(object); 18111f78f902SAlan Cox } 18121f78f902SAlan Cox 18131f78f902SAlan Cox psize = atop(size); 1814a922d312SAlan Cox if (psize > MAX_INIT_PT && (flags & MAP_PREFAULT_PARTIAL) != 0) 1815a922d312SAlan Cox psize = MAX_INIT_PT; 18161f78f902SAlan Cox if (psize + pindex > object->size) { 18179af6d512SAttilio Rao if (object->size < pindex) { 18189af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 18199af6d512SAttilio Rao return; 18209af6d512SAttilio Rao } 18211f78f902SAlan Cox psize = object->size - pindex; 18221f78f902SAlan Cox } 18231f78f902SAlan Cox 1824ce142d9eSAlan Cox start = 0; 1825ce142d9eSAlan Cox p_start = NULL; 18261f78f902SAlan Cox 1827b382c10aSKonstantin Belousov p = vm_page_find_least(object, pindex); 18281f78f902SAlan Cox /* 18291f78f902SAlan Cox * Assert: the variable p is either (1) the page with the 18301f78f902SAlan Cox * least pindex greater than or equal to the parameter pindex 18311f78f902SAlan Cox * or (2) NULL. 18321f78f902SAlan Cox */ 18331f78f902SAlan Cox for (; 18341f78f902SAlan Cox p != NULL && (tmpidx = p->pindex - pindex) < psize; 18351f78f902SAlan Cox p = TAILQ_NEXT(p, listq)) { 18361f78f902SAlan Cox /* 18371f78f902SAlan Cox * don't allow an madvise to blow away our really 18381f78f902SAlan Cox * free pages allocating pv entries. 18391f78f902SAlan Cox */ 18401f78f902SAlan Cox if ((flags & MAP_PREFAULT_MADVISE) && 18412feb50bfSAttilio Rao cnt.v_free_count < cnt.v_free_reserved) { 1842379fb642SAlan Cox psize = tmpidx; 18431f78f902SAlan Cox break; 18441f78f902SAlan Cox } 18450a2e596aSAlan Cox if (p->valid == VM_PAGE_BITS_ALL) { 1846ce142d9eSAlan Cox if (p_start == NULL) { 1847ce142d9eSAlan Cox start = addr + ptoa(tmpidx); 1848ce142d9eSAlan Cox p_start = p; 1849ce142d9eSAlan Cox } 18507bfda801SAlan Cox } else if (p_start != NULL) { 1851cf4682aeSAlan Cox pmap_enter_object(map->pmap, start, addr + 1852cf4682aeSAlan Cox ptoa(tmpidx), p_start, prot); 1853cf4682aeSAlan Cox p_start = NULL; 1854cf4682aeSAlan Cox } 1855cf4682aeSAlan Cox } 1856c46b90e9SAlan Cox if (p_start != NULL) 1857379fb642SAlan Cox pmap_enter_object(map->pmap, start, addr + ptoa(psize), 1858379fb642SAlan Cox p_start, prot); 18599af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 18600551c08dSAlan Cox } 18610551c08dSAlan Cox 18620551c08dSAlan Cox /* 1863df8bae1dSRodney W. Grimes * vm_map_protect: 1864df8bae1dSRodney W. Grimes * 1865df8bae1dSRodney W. Grimes * Sets the protection of the specified address 1866df8bae1dSRodney W. Grimes * region in the target map. If "set_max" is 1867df8bae1dSRodney W. Grimes * specified, the maximum protection is to be set; 1868df8bae1dSRodney W. Grimes * otherwise, only the current protection is affected. 1869df8bae1dSRodney W. Grimes */ 1870df8bae1dSRodney W. Grimes int 1871b9dcd593SBruce Evans vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1872b9dcd593SBruce Evans vm_prot_t new_prot, boolean_t set_max) 1873df8bae1dSRodney W. Grimes { 1874210a6886SKonstantin Belousov vm_map_entry_t current, entry; 18753364c323SKonstantin Belousov vm_object_t obj; 1876ef694c1aSEdward Tomasz Napierala struct ucred *cred; 1877210a6886SKonstantin Belousov vm_prot_t old_prot; 1878df8bae1dSRodney W. Grimes 1879df8bae1dSRodney W. Grimes vm_map_lock(map); 1880df8bae1dSRodney W. Grimes 1881df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 1882df8bae1dSRodney W. Grimes 1883df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &entry)) { 1884df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 1885b7b2aac2SJohn Dyson } else { 1886df8bae1dSRodney W. Grimes entry = entry->next; 1887b7b2aac2SJohn Dyson } 1888df8bae1dSRodney W. Grimes 1889df8bae1dSRodney W. Grimes /* 18900d94caffSDavid Greenman * Make a first pass to check for protection violations. 1891df8bae1dSRodney W. Grimes */ 1892df8bae1dSRodney W. Grimes current = entry; 1893df8bae1dSRodney W. Grimes while ((current != &map->header) && (current->start < end)) { 1894afa07f7eSJohn Dyson if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1895a1f6d91cSDavid Greenman vm_map_unlock(map); 1896df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 1897a1f6d91cSDavid Greenman } 1898df8bae1dSRodney W. Grimes if ((new_prot & current->max_protection) != new_prot) { 1899df8bae1dSRodney W. Grimes vm_map_unlock(map); 1900df8bae1dSRodney W. Grimes return (KERN_PROTECTION_FAILURE); 1901df8bae1dSRodney W. Grimes } 1902df8bae1dSRodney W. Grimes current = current->next; 1903df8bae1dSRodney W. Grimes } 1904df8bae1dSRodney W. Grimes 19053364c323SKonstantin Belousov 19063364c323SKonstantin Belousov /* 19073364c323SKonstantin Belousov * Do an accounting pass for private read-only mappings that 19083364c323SKonstantin Belousov * now will do cow due to allowed write (e.g. debugger sets 19093364c323SKonstantin Belousov * breakpoint on text segment) 19103364c323SKonstantin Belousov */ 19113364c323SKonstantin Belousov for (current = entry; (current != &map->header) && 19123364c323SKonstantin Belousov (current->start < end); current = current->next) { 19133364c323SKonstantin Belousov 19143364c323SKonstantin Belousov vm_map_clip_end(map, current, end); 19153364c323SKonstantin Belousov 19163364c323SKonstantin Belousov if (set_max || 19173364c323SKonstantin Belousov ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 || 19183364c323SKonstantin Belousov ENTRY_CHARGED(current)) { 19193364c323SKonstantin Belousov continue; 19203364c323SKonstantin Belousov } 19213364c323SKonstantin Belousov 1922ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 19233364c323SKonstantin Belousov obj = current->object.vm_object; 19243364c323SKonstantin Belousov 19253364c323SKonstantin Belousov if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) { 19263364c323SKonstantin Belousov if (!swap_reserve(current->end - current->start)) { 19273364c323SKonstantin Belousov vm_map_unlock(map); 19283364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 19293364c323SKonstantin Belousov } 1930ef694c1aSEdward Tomasz Napierala crhold(cred); 1931ef694c1aSEdward Tomasz Napierala current->cred = cred; 19323364c323SKonstantin Belousov continue; 19333364c323SKonstantin Belousov } 19343364c323SKonstantin Belousov 193589f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 19363364c323SKonstantin Belousov if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) { 193789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 19383364c323SKonstantin Belousov continue; 19393364c323SKonstantin Belousov } 19403364c323SKonstantin Belousov 19413364c323SKonstantin Belousov /* 19423364c323SKonstantin Belousov * Charge for the whole object allocation now, since 19433364c323SKonstantin Belousov * we cannot distinguish between non-charged and 19443364c323SKonstantin Belousov * charged clipped mapping of the same object later. 19453364c323SKonstantin Belousov */ 19463364c323SKonstantin Belousov KASSERT(obj->charge == 0, 19473364c323SKonstantin Belousov ("vm_map_protect: object %p overcharged\n", obj)); 19483364c323SKonstantin Belousov if (!swap_reserve(ptoa(obj->size))) { 194989f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 19503364c323SKonstantin Belousov vm_map_unlock(map); 19513364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 19523364c323SKonstantin Belousov } 19533364c323SKonstantin Belousov 1954ef694c1aSEdward Tomasz Napierala crhold(cred); 1955ef694c1aSEdward Tomasz Napierala obj->cred = cred; 19563364c323SKonstantin Belousov obj->charge = ptoa(obj->size); 195789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 19583364c323SKonstantin Belousov } 19593364c323SKonstantin Belousov 1960df8bae1dSRodney W. Grimes /* 19610d94caffSDavid Greenman * Go back and fix up protections. [Note that clipping is not 19620d94caffSDavid Greenman * necessary the second time.] 1963df8bae1dSRodney W. Grimes */ 1964df8bae1dSRodney W. Grimes current = entry; 1965df8bae1dSRodney W. Grimes while ((current != &map->header) && (current->start < end)) { 1966df8bae1dSRodney W. Grimes old_prot = current->protection; 1967210a6886SKonstantin Belousov 1968df8bae1dSRodney W. Grimes if (set_max) 1969df8bae1dSRodney W. Grimes current->protection = 1970df8bae1dSRodney W. Grimes (current->max_protection = new_prot) & 1971df8bae1dSRodney W. Grimes old_prot; 1972df8bae1dSRodney W. Grimes else 1973df8bae1dSRodney W. Grimes current->protection = new_prot; 1974df8bae1dSRodney W. Grimes 1975210a6886SKonstantin Belousov if ((current->eflags & (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED)) 1976210a6886SKonstantin Belousov == (MAP_ENTRY_COW | MAP_ENTRY_USER_WIRED) && 1977210a6886SKonstantin Belousov (current->protection & VM_PROT_WRITE) != 0 && 1978210a6886SKonstantin Belousov (old_prot & VM_PROT_WRITE) == 0) { 1979210a6886SKonstantin Belousov vm_fault_copy_entry(map, map, current, current, NULL); 1980210a6886SKonstantin Belousov } 1981210a6886SKonstantin Belousov 1982df8bae1dSRodney W. Grimes /* 19832fafce9eSAlan Cox * When restricting access, update the physical map. Worry 19842fafce9eSAlan Cox * about copy-on-write here. 1985df8bae1dSRodney W. Grimes */ 19862fafce9eSAlan Cox if ((old_prot & ~current->protection) != 0) { 1987afa07f7eSJohn Dyson #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1988df8bae1dSRodney W. Grimes VM_PROT_ALL) 1989df8bae1dSRodney W. Grimes pmap_protect(map->pmap, current->start, 1990df8bae1dSRodney W. Grimes current->end, 19911c85e3dfSAlan Cox current->protection & MASK(current)); 1992df8bae1dSRodney W. Grimes #undef MASK 1993df8bae1dSRodney W. Grimes } 19947d78abc9SJohn Dyson vm_map_simplify_entry(map, current); 1995df8bae1dSRodney W. Grimes current = current->next; 1996df8bae1dSRodney W. Grimes } 1997df8bae1dSRodney W. Grimes vm_map_unlock(map); 1998df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 1999df8bae1dSRodney W. Grimes } 2000df8bae1dSRodney W. Grimes 2001df8bae1dSRodney W. Grimes /* 2002867a482dSJohn Dyson * vm_map_madvise: 2003867a482dSJohn Dyson * 2004867a482dSJohn Dyson * This routine traverses a processes map handling the madvise 2005f7fc307aSAlan Cox * system call. Advisories are classified as either those effecting 2006f7fc307aSAlan Cox * the vm_map_entry structure, or those effecting the underlying 2007f7fc307aSAlan Cox * objects. 2008867a482dSJohn Dyson */ 2009b4309055SMatthew Dillon int 20101b40f8c0SMatthew Dillon vm_map_madvise( 20111b40f8c0SMatthew Dillon vm_map_t map, 20121b40f8c0SMatthew Dillon vm_offset_t start, 20131b40f8c0SMatthew Dillon vm_offset_t end, 20141b40f8c0SMatthew Dillon int behav) 2015867a482dSJohn Dyson { 2016f7fc307aSAlan Cox vm_map_entry_t current, entry; 2017b4309055SMatthew Dillon int modify_map = 0; 2018867a482dSJohn Dyson 2019b4309055SMatthew Dillon /* 2020b4309055SMatthew Dillon * Some madvise calls directly modify the vm_map_entry, in which case 2021b4309055SMatthew Dillon * we need to use an exclusive lock on the map and we need to perform 2022b4309055SMatthew Dillon * various clipping operations. Otherwise we only need a read-lock 2023b4309055SMatthew Dillon * on the map. 2024b4309055SMatthew Dillon */ 2025b4309055SMatthew Dillon switch(behav) { 2026b4309055SMatthew Dillon case MADV_NORMAL: 2027b4309055SMatthew Dillon case MADV_SEQUENTIAL: 2028b4309055SMatthew Dillon case MADV_RANDOM: 20294f79d873SMatthew Dillon case MADV_NOSYNC: 20304f79d873SMatthew Dillon case MADV_AUTOSYNC: 20319730a5daSPaul Saab case MADV_NOCORE: 20329730a5daSPaul Saab case MADV_CORE: 2033b4309055SMatthew Dillon modify_map = 1; 2034867a482dSJohn Dyson vm_map_lock(map); 2035b4309055SMatthew Dillon break; 2036b4309055SMatthew Dillon case MADV_WILLNEED: 2037b4309055SMatthew Dillon case MADV_DONTNEED: 2038b4309055SMatthew Dillon case MADV_FREE: 2039f7fc307aSAlan Cox vm_map_lock_read(map); 2040b4309055SMatthew Dillon break; 2041b4309055SMatthew Dillon default: 2042b4309055SMatthew Dillon return (KERN_INVALID_ARGUMENT); 2043b4309055SMatthew Dillon } 2044b4309055SMatthew Dillon 2045b4309055SMatthew Dillon /* 2046b4309055SMatthew Dillon * Locate starting entry and clip if necessary. 2047b4309055SMatthew Dillon */ 2048867a482dSJohn Dyson VM_MAP_RANGE_CHECK(map, start, end); 2049867a482dSJohn Dyson 2050867a482dSJohn Dyson if (vm_map_lookup_entry(map, start, &entry)) { 2051f7fc307aSAlan Cox if (modify_map) 2052867a482dSJohn Dyson vm_map_clip_start(map, entry, start); 2053b4309055SMatthew Dillon } else { 2054867a482dSJohn Dyson entry = entry->next; 2055b4309055SMatthew Dillon } 2056867a482dSJohn Dyson 2057f7fc307aSAlan Cox if (modify_map) { 2058f7fc307aSAlan Cox /* 2059f7fc307aSAlan Cox * madvise behaviors that are implemented in the vm_map_entry. 2060f7fc307aSAlan Cox * 2061f7fc307aSAlan Cox * We clip the vm_map_entry so that behavioral changes are 2062f7fc307aSAlan Cox * limited to the specified address range. 2063f7fc307aSAlan Cox */ 2064867a482dSJohn Dyson for (current = entry; 2065867a482dSJohn Dyson (current != &map->header) && (current->start < end); 2066b4309055SMatthew Dillon current = current->next 2067b4309055SMatthew Dillon ) { 2068f7fc307aSAlan Cox if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2069867a482dSJohn Dyson continue; 2070fed9a903SJohn Dyson 207147221757SJohn Dyson vm_map_clip_end(map, current, end); 2072fed9a903SJohn Dyson 2073f7fc307aSAlan Cox switch (behav) { 2074867a482dSJohn Dyson case MADV_NORMAL: 20757f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 2076867a482dSJohn Dyson break; 2077867a482dSJohn Dyson case MADV_SEQUENTIAL: 20787f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 2079867a482dSJohn Dyson break; 2080867a482dSJohn Dyson case MADV_RANDOM: 20817f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 2082867a482dSJohn Dyson break; 20834f79d873SMatthew Dillon case MADV_NOSYNC: 20844f79d873SMatthew Dillon current->eflags |= MAP_ENTRY_NOSYNC; 20854f79d873SMatthew Dillon break; 20864f79d873SMatthew Dillon case MADV_AUTOSYNC: 20874f79d873SMatthew Dillon current->eflags &= ~MAP_ENTRY_NOSYNC; 20884f79d873SMatthew Dillon break; 20899730a5daSPaul Saab case MADV_NOCORE: 20909730a5daSPaul Saab current->eflags |= MAP_ENTRY_NOCOREDUMP; 20919730a5daSPaul Saab break; 20929730a5daSPaul Saab case MADV_CORE: 20939730a5daSPaul Saab current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 20949730a5daSPaul Saab break; 2095867a482dSJohn Dyson default: 2096867a482dSJohn Dyson break; 2097867a482dSJohn Dyson } 2098f7fc307aSAlan Cox vm_map_simplify_entry(map, current); 2099867a482dSJohn Dyson } 2100867a482dSJohn Dyson vm_map_unlock(map); 2101b4309055SMatthew Dillon } else { 210292a59946SJohn Baldwin vm_pindex_t pstart, pend; 2103f7fc307aSAlan Cox 2104f7fc307aSAlan Cox /* 2105f7fc307aSAlan Cox * madvise behaviors that are implemented in the underlying 2106f7fc307aSAlan Cox * vm_object. 2107f7fc307aSAlan Cox * 2108f7fc307aSAlan Cox * Since we don't clip the vm_map_entry, we have to clip 2109f7fc307aSAlan Cox * the vm_object pindex and count. 2110f7fc307aSAlan Cox */ 2111f7fc307aSAlan Cox for (current = entry; 2112f7fc307aSAlan Cox (current != &map->header) && (current->start < end); 2113b4309055SMatthew Dillon current = current->next 2114b4309055SMatthew Dillon ) { 211551321f7cSAlan Cox vm_offset_t useEnd, useStart; 21165f99b57cSMatthew Dillon 2117f7fc307aSAlan Cox if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2118f7fc307aSAlan Cox continue; 2119f7fc307aSAlan Cox 212092a59946SJohn Baldwin pstart = OFF_TO_IDX(current->offset); 212192a59946SJohn Baldwin pend = pstart + atop(current->end - current->start); 21225f99b57cSMatthew Dillon useStart = current->start; 212351321f7cSAlan Cox useEnd = current->end; 2124f7fc307aSAlan Cox 2125f7fc307aSAlan Cox if (current->start < start) { 212692a59946SJohn Baldwin pstart += atop(start - current->start); 21275f99b57cSMatthew Dillon useStart = start; 2128f7fc307aSAlan Cox } 212951321f7cSAlan Cox if (current->end > end) { 213092a59946SJohn Baldwin pend -= atop(current->end - end); 213151321f7cSAlan Cox useEnd = end; 213251321f7cSAlan Cox } 2133f7fc307aSAlan Cox 213492a59946SJohn Baldwin if (pstart >= pend) 2135f7fc307aSAlan Cox continue; 2136f7fc307aSAlan Cox 213751321f7cSAlan Cox /* 213851321f7cSAlan Cox * Perform the pmap_advise() before clearing 213951321f7cSAlan Cox * PGA_REFERENCED in vm_page_advise(). Otherwise, a 214051321f7cSAlan Cox * concurrent pmap operation, such as pmap_remove(), 214151321f7cSAlan Cox * could clear a reference in the pmap and set 214251321f7cSAlan Cox * PGA_REFERENCED on the page before the pmap_advise() 214351321f7cSAlan Cox * had completed. Consequently, the page would appear 214451321f7cSAlan Cox * referenced based upon an old reference that 214551321f7cSAlan Cox * occurred before this pmap_advise() ran. 214651321f7cSAlan Cox */ 214751321f7cSAlan Cox if (behav == MADV_DONTNEED || behav == MADV_FREE) 214851321f7cSAlan Cox pmap_advise(map->pmap, useStart, useEnd, 214951321f7cSAlan Cox behav); 215051321f7cSAlan Cox 215192a59946SJohn Baldwin vm_object_madvise(current->object.vm_object, pstart, 215292a59946SJohn Baldwin pend, behav); 2153b4309055SMatthew Dillon if (behav == MADV_WILLNEED) { 21540551c08dSAlan Cox vm_map_pmap_enter(map, 21555f99b57cSMatthew Dillon useStart, 21564da4d293SAlan Cox current->protection, 2157f7fc307aSAlan Cox current->object.vm_object, 215892a59946SJohn Baldwin pstart, 215992a59946SJohn Baldwin ptoa(pend - pstart), 2160e3026983SMatthew Dillon MAP_PREFAULT_MADVISE 2161b4309055SMatthew Dillon ); 2162f7fc307aSAlan Cox } 2163f7fc307aSAlan Cox } 2164f7fc307aSAlan Cox vm_map_unlock_read(map); 2165f7fc307aSAlan Cox } 2166b4309055SMatthew Dillon return (0); 2167867a482dSJohn Dyson } 2168867a482dSJohn Dyson 2169867a482dSJohn Dyson 2170867a482dSJohn Dyson /* 2171df8bae1dSRodney W. Grimes * vm_map_inherit: 2172df8bae1dSRodney W. Grimes * 2173df8bae1dSRodney W. Grimes * Sets the inheritance of the specified address 2174df8bae1dSRodney W. Grimes * range in the target map. Inheritance 2175df8bae1dSRodney W. Grimes * affects how the map will be shared with 2176e2abaaaaSAlan Cox * child maps at the time of vmspace_fork. 2177df8bae1dSRodney W. Grimes */ 2178df8bae1dSRodney W. Grimes int 2179b9dcd593SBruce Evans vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 2180b9dcd593SBruce Evans vm_inherit_t new_inheritance) 2181df8bae1dSRodney W. Grimes { 2182c0877f10SJohn Dyson vm_map_entry_t entry; 2183df8bae1dSRodney W. Grimes vm_map_entry_t temp_entry; 2184df8bae1dSRodney W. Grimes 2185df8bae1dSRodney W. Grimes switch (new_inheritance) { 2186df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 2187df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 2188df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 2189df8bae1dSRodney W. Grimes break; 2190df8bae1dSRodney W. Grimes default: 2191df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 2192df8bae1dSRodney W. Grimes } 2193df8bae1dSRodney W. Grimes vm_map_lock(map); 2194df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2195df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &temp_entry)) { 2196df8bae1dSRodney W. Grimes entry = temp_entry; 2197df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 21980d94caffSDavid Greenman } else 2199df8bae1dSRodney W. Grimes entry = temp_entry->next; 2200df8bae1dSRodney W. Grimes while ((entry != &map->header) && (entry->start < end)) { 2201df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 2202df8bae1dSRodney W. Grimes entry->inheritance = new_inheritance; 220344428f62SAlan Cox vm_map_simplify_entry(map, entry); 2204df8bae1dSRodney W. Grimes entry = entry->next; 2205df8bae1dSRodney W. Grimes } 2206df8bae1dSRodney W. Grimes vm_map_unlock(map); 2207df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 2208df8bae1dSRodney W. Grimes } 2209df8bae1dSRodney W. Grimes 2210df8bae1dSRodney W. Grimes /* 2211acd9a301SAlan Cox * vm_map_unwire: 2212acd9a301SAlan Cox * 2213e27e17b7SAlan Cox * Implements both kernel and user unwiring. 2214acd9a301SAlan Cox */ 2215acd9a301SAlan Cox int 2216acd9a301SAlan Cox vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2217abd498aaSBruce M Simpson int flags) 2218acd9a301SAlan Cox { 2219acd9a301SAlan Cox vm_map_entry_t entry, first_entry, tmp_entry; 2220acd9a301SAlan Cox vm_offset_t saved_start; 2221acd9a301SAlan Cox unsigned int last_timestamp; 2222acd9a301SAlan Cox int rv; 2223abd498aaSBruce M Simpson boolean_t need_wakeup, result, user_unwire; 2224acd9a301SAlan Cox 2225abd498aaSBruce M Simpson user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 2226acd9a301SAlan Cox vm_map_lock(map); 2227acd9a301SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 2228acd9a301SAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) { 2229abd498aaSBruce M Simpson if (flags & VM_MAP_WIRE_HOLESOK) 2230cbef13d8SAlan Cox first_entry = first_entry->next; 2231abd498aaSBruce M Simpson else { 2232acd9a301SAlan Cox vm_map_unlock(map); 2233acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 2234acd9a301SAlan Cox } 2235abd498aaSBruce M Simpson } 2236acd9a301SAlan Cox last_timestamp = map->timestamp; 2237acd9a301SAlan Cox entry = first_entry; 2238acd9a301SAlan Cox while (entry != &map->header && entry->start < end) { 2239acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2240acd9a301SAlan Cox /* 2241acd9a301SAlan Cox * We have not yet clipped the entry. 2242acd9a301SAlan Cox */ 2243acd9a301SAlan Cox saved_start = (start >= entry->start) ? start : 2244acd9a301SAlan Cox entry->start; 2245acd9a301SAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 22468ce2d00aSPawel Jakub Dawidek if (vm_map_unlock_and_wait(map, 0)) { 2247acd9a301SAlan Cox /* 2248acd9a301SAlan Cox * Allow interruption of user unwiring? 2249acd9a301SAlan Cox */ 2250acd9a301SAlan Cox } 2251acd9a301SAlan Cox vm_map_lock(map); 2252acd9a301SAlan Cox if (last_timestamp+1 != map->timestamp) { 2253acd9a301SAlan Cox /* 2254acd9a301SAlan Cox * Look again for the entry because the map was 2255acd9a301SAlan Cox * modified while it was unlocked. 2256acd9a301SAlan Cox * Specifically, the entry may have been 2257acd9a301SAlan Cox * clipped, merged, or deleted. 2258acd9a301SAlan Cox */ 2259acd9a301SAlan Cox if (!vm_map_lookup_entry(map, saved_start, 2260acd9a301SAlan Cox &tmp_entry)) { 2261cbef13d8SAlan Cox if (flags & VM_MAP_WIRE_HOLESOK) 2262cbef13d8SAlan Cox tmp_entry = tmp_entry->next; 2263cbef13d8SAlan Cox else { 2264acd9a301SAlan Cox if (saved_start == start) { 2265acd9a301SAlan Cox /* 2266acd9a301SAlan Cox * First_entry has been deleted. 2267acd9a301SAlan Cox */ 2268acd9a301SAlan Cox vm_map_unlock(map); 2269acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 2270acd9a301SAlan Cox } 2271acd9a301SAlan Cox end = saved_start; 2272acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 2273acd9a301SAlan Cox goto done; 2274acd9a301SAlan Cox } 2275cbef13d8SAlan Cox } 2276acd9a301SAlan Cox if (entry == first_entry) 2277acd9a301SAlan Cox first_entry = tmp_entry; 2278acd9a301SAlan Cox else 2279acd9a301SAlan Cox first_entry = NULL; 2280acd9a301SAlan Cox entry = tmp_entry; 2281acd9a301SAlan Cox } 2282acd9a301SAlan Cox last_timestamp = map->timestamp; 2283acd9a301SAlan Cox continue; 2284acd9a301SAlan Cox } 2285acd9a301SAlan Cox vm_map_clip_start(map, entry, start); 2286acd9a301SAlan Cox vm_map_clip_end(map, entry, end); 2287acd9a301SAlan Cox /* 2288acd9a301SAlan Cox * Mark the entry in case the map lock is released. (See 2289acd9a301SAlan Cox * above.) 2290acd9a301SAlan Cox */ 2291*ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2292*ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 2293*ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 2294acd9a301SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 22950acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 2296acd9a301SAlan Cox /* 2297acd9a301SAlan Cox * Check the map for holes in the specified region. 2298abd498aaSBruce M Simpson * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 2299acd9a301SAlan Cox */ 2300abd498aaSBruce M Simpson if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && 2301abd498aaSBruce M Simpson (entry->end < end && (entry->next == &map->header || 2302abd498aaSBruce M Simpson entry->next->start > entry->end))) { 2303acd9a301SAlan Cox end = entry->end; 2304acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 2305acd9a301SAlan Cox goto done; 2306acd9a301SAlan Cox } 2307acd9a301SAlan Cox /* 23083ffbc0cdSAlan Cox * If system unwiring, require that the entry is system wired. 2309acd9a301SAlan Cox */ 23100ada205eSBrian Feldman if (!user_unwire && 23110ada205eSBrian Feldman vm_map_entry_system_wired_count(entry) == 0) { 2312acd9a301SAlan Cox end = entry->end; 2313acd9a301SAlan Cox rv = KERN_INVALID_ARGUMENT; 2314acd9a301SAlan Cox goto done; 2315acd9a301SAlan Cox } 2316acd9a301SAlan Cox entry = entry->next; 2317acd9a301SAlan Cox } 2318acd9a301SAlan Cox rv = KERN_SUCCESS; 2319acd9a301SAlan Cox done: 2320e27e17b7SAlan Cox need_wakeup = FALSE; 2321acd9a301SAlan Cox if (first_entry == NULL) { 2322acd9a301SAlan Cox result = vm_map_lookup_entry(map, start, &first_entry); 2323cbef13d8SAlan Cox if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 2324cbef13d8SAlan Cox first_entry = first_entry->next; 2325cbef13d8SAlan Cox else 2326acd9a301SAlan Cox KASSERT(result, ("vm_map_unwire: lookup failed")); 2327acd9a301SAlan Cox } 23280acea7dfSKonstantin Belousov for (entry = first_entry; entry != &map->header && entry->start < end; 23290acea7dfSKonstantin Belousov entry = entry->next) { 23300acea7dfSKonstantin Belousov /* 23310acea7dfSKonstantin Belousov * If VM_MAP_WIRE_HOLESOK was specified, an empty 23320acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 23330acea7dfSKonstantin Belousov * while the map lock was dropped for draining 23340acea7dfSKonstantin Belousov * MAP_ENTRY_IN_TRANSITION. Moreover, another thread 23350acea7dfSKonstantin Belousov * could be simultaneously wiring this new mapping 23360acea7dfSKonstantin Belousov * entry. Detect these cases and skip any entries 23370acea7dfSKonstantin Belousov * marked as in transition by us. 23380acea7dfSKonstantin Belousov */ 23390acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 23400acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 23410acea7dfSKonstantin Belousov KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 23420acea7dfSKonstantin Belousov ("vm_map_unwire: !HOLESOK and new/changed entry")); 23430acea7dfSKonstantin Belousov continue; 23440acea7dfSKonstantin Belousov } 23450acea7dfSKonstantin Belousov 23463ffbc0cdSAlan Cox if (rv == KERN_SUCCESS && (!user_unwire || 23473ffbc0cdSAlan Cox (entry->eflags & MAP_ENTRY_USER_WIRED))) { 2348b2f3846aSAlan Cox if (user_unwire) 2349b2f3846aSAlan Cox entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2350b2f3846aSAlan Cox entry->wired_count--; 23510ada205eSBrian Feldman if (entry->wired_count == 0) { 2352b2f3846aSAlan Cox /* 2353b2f3846aSAlan Cox * Retain the map lock. 2354b2f3846aSAlan Cox */ 23554be14af9SAlan Cox vm_fault_unwire(map, entry->start, entry->end, 23564be14af9SAlan Cox entry->object.vm_object != NULL && 235728634820SAlan Cox (entry->object.vm_object->flags & 235828634820SAlan Cox OBJ_FICTITIOUS) != 0); 2359b2f3846aSAlan Cox } 2360b2f3846aSAlan Cox } 23610acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 2362*ff3ae454SKonstantin Belousov ("vm_map_unwire: in-transition flag missing %p", entry)); 2363*ff3ae454SKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 2364*ff3ae454SKonstantin Belousov ("vm_map_unwire: alien wire %p", entry)); 2365acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 23660acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 2367acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 2368acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 2369acd9a301SAlan Cox need_wakeup = TRUE; 2370acd9a301SAlan Cox } 2371acd9a301SAlan Cox vm_map_simplify_entry(map, entry); 2372acd9a301SAlan Cox } 2373acd9a301SAlan Cox vm_map_unlock(map); 2374acd9a301SAlan Cox if (need_wakeup) 2375acd9a301SAlan Cox vm_map_wakeup(map); 2376acd9a301SAlan Cox return (rv); 2377acd9a301SAlan Cox } 2378acd9a301SAlan Cox 2379acd9a301SAlan Cox /* 2380e27e17b7SAlan Cox * vm_map_wire: 2381e27e17b7SAlan Cox * 2382e27e17b7SAlan Cox * Implements both kernel and user wiring. 2383e27e17b7SAlan Cox */ 2384e27e17b7SAlan Cox int 2385e27e17b7SAlan Cox vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2386abd498aaSBruce M Simpson int flags) 2387e27e17b7SAlan Cox { 238812d7cc84SAlan Cox vm_map_entry_t entry, first_entry, tmp_entry; 238912d7cc84SAlan Cox vm_offset_t saved_end, saved_start; 239012d7cc84SAlan Cox unsigned int last_timestamp; 239112d7cc84SAlan Cox int rv; 23924be14af9SAlan Cox boolean_t fictitious, need_wakeup, result, user_wire; 2393e4cd31ddSJeff Roberson vm_prot_t prot; 2394e27e17b7SAlan Cox 2395e4cd31ddSJeff Roberson prot = 0; 2396e4cd31ddSJeff Roberson if (flags & VM_MAP_WIRE_WRITE) 2397e4cd31ddSJeff Roberson prot |= VM_PROT_WRITE; 2398abd498aaSBruce M Simpson user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 239912d7cc84SAlan Cox vm_map_lock(map); 240012d7cc84SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 240112d7cc84SAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) { 2402abd498aaSBruce M Simpson if (flags & VM_MAP_WIRE_HOLESOK) 2403cbef13d8SAlan Cox first_entry = first_entry->next; 2404abd498aaSBruce M Simpson else { 240512d7cc84SAlan Cox vm_map_unlock(map); 240612d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 240712d7cc84SAlan Cox } 2408abd498aaSBruce M Simpson } 240912d7cc84SAlan Cox last_timestamp = map->timestamp; 241012d7cc84SAlan Cox entry = first_entry; 241112d7cc84SAlan Cox while (entry != &map->header && entry->start < end) { 241212d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 241312d7cc84SAlan Cox /* 241412d7cc84SAlan Cox * We have not yet clipped the entry. 241512d7cc84SAlan Cox */ 241612d7cc84SAlan Cox saved_start = (start >= entry->start) ? start : 241712d7cc84SAlan Cox entry->start; 241812d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 24198ce2d00aSPawel Jakub Dawidek if (vm_map_unlock_and_wait(map, 0)) { 242012d7cc84SAlan Cox /* 242112d7cc84SAlan Cox * Allow interruption of user wiring? 242212d7cc84SAlan Cox */ 242312d7cc84SAlan Cox } 242412d7cc84SAlan Cox vm_map_lock(map); 242512d7cc84SAlan Cox if (last_timestamp + 1 != map->timestamp) { 242612d7cc84SAlan Cox /* 242712d7cc84SAlan Cox * Look again for the entry because the map was 242812d7cc84SAlan Cox * modified while it was unlocked. 242912d7cc84SAlan Cox * Specifically, the entry may have been 243012d7cc84SAlan Cox * clipped, merged, or deleted. 243112d7cc84SAlan Cox */ 243212d7cc84SAlan Cox if (!vm_map_lookup_entry(map, saved_start, 243312d7cc84SAlan Cox &tmp_entry)) { 2434cbef13d8SAlan Cox if (flags & VM_MAP_WIRE_HOLESOK) 2435cbef13d8SAlan Cox tmp_entry = tmp_entry->next; 2436cbef13d8SAlan Cox else { 243712d7cc84SAlan Cox if (saved_start == start) { 243812d7cc84SAlan Cox /* 243912d7cc84SAlan Cox * first_entry has been deleted. 244012d7cc84SAlan Cox */ 244112d7cc84SAlan Cox vm_map_unlock(map); 244212d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 244312d7cc84SAlan Cox } 244412d7cc84SAlan Cox end = saved_start; 244512d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 244612d7cc84SAlan Cox goto done; 244712d7cc84SAlan Cox } 2448cbef13d8SAlan Cox } 244912d7cc84SAlan Cox if (entry == first_entry) 245012d7cc84SAlan Cox first_entry = tmp_entry; 245112d7cc84SAlan Cox else 245212d7cc84SAlan Cox first_entry = NULL; 245312d7cc84SAlan Cox entry = tmp_entry; 245412d7cc84SAlan Cox } 245512d7cc84SAlan Cox last_timestamp = map->timestamp; 245612d7cc84SAlan Cox continue; 245712d7cc84SAlan Cox } 245812d7cc84SAlan Cox vm_map_clip_start(map, entry, start); 245912d7cc84SAlan Cox vm_map_clip_end(map, entry, end); 246012d7cc84SAlan Cox /* 246112d7cc84SAlan Cox * Mark the entry in case the map lock is released. (See 246212d7cc84SAlan Cox * above.) 246312d7cc84SAlan Cox */ 2464*ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2465*ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 2466*ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 246712d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 24680acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 2469e4cd31ddSJeff Roberson if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 2470e4cd31ddSJeff Roberson || (entry->protection & prot) != prot) { 2471529ab57bSKonstantin Belousov entry->eflags |= MAP_ENTRY_WIRE_SKIPPED; 24726d7e8091SKonstantin Belousov if ((flags & VM_MAP_WIRE_HOLESOK) == 0) { 24736d7e8091SKonstantin Belousov end = entry->end; 24746d7e8091SKonstantin Belousov rv = KERN_INVALID_ADDRESS; 24756d7e8091SKonstantin Belousov goto done; 24766d7e8091SKonstantin Belousov } 24776d7e8091SKonstantin Belousov goto next_entry; 24786d7e8091SKonstantin Belousov } 2479e4cd31ddSJeff Roberson if (entry->wired_count == 0) { 24800ada205eSBrian Feldman entry->wired_count++; 248112d7cc84SAlan Cox saved_start = entry->start; 248212d7cc84SAlan Cox saved_end = entry->end; 24834be14af9SAlan Cox fictitious = entry->object.vm_object != NULL && 248428634820SAlan Cox (entry->object.vm_object->flags & 248528634820SAlan Cox OBJ_FICTITIOUS) != 0; 248612d7cc84SAlan Cox /* 248712d7cc84SAlan Cox * Release the map lock, relying on the in-transition 2488a5db445dSMax Laier * mark. Mark the map busy for fork. 248912d7cc84SAlan Cox */ 2490a5db445dSMax Laier vm_map_busy(map); 249112d7cc84SAlan Cox vm_map_unlock(map); 2492ef594d31SAlan Cox rv = vm_fault_wire(map, saved_start, saved_end, 24932db65ab4SAlan Cox fictitious); 249412d7cc84SAlan Cox vm_map_lock(map); 2495a5db445dSMax Laier vm_map_unbusy(map); 249612d7cc84SAlan Cox if (last_timestamp + 1 != map->timestamp) { 249712d7cc84SAlan Cox /* 249812d7cc84SAlan Cox * Look again for the entry because the map was 249912d7cc84SAlan Cox * modified while it was unlocked. The entry 250012d7cc84SAlan Cox * may have been clipped, but NOT merged or 250112d7cc84SAlan Cox * deleted. 250212d7cc84SAlan Cox */ 250312d7cc84SAlan Cox result = vm_map_lookup_entry(map, saved_start, 250412d7cc84SAlan Cox &tmp_entry); 250512d7cc84SAlan Cox KASSERT(result, ("vm_map_wire: lookup failed")); 250612d7cc84SAlan Cox if (entry == first_entry) 250712d7cc84SAlan Cox first_entry = tmp_entry; 250812d7cc84SAlan Cox else 250912d7cc84SAlan Cox first_entry = NULL; 251012d7cc84SAlan Cox entry = tmp_entry; 251128c58286SAlan Cox while (entry->end < saved_end) { 251228c58286SAlan Cox if (rv != KERN_SUCCESS) { 251328c58286SAlan Cox KASSERT(entry->wired_count == 1, 251428c58286SAlan Cox ("vm_map_wire: bad count")); 251528c58286SAlan Cox entry->wired_count = -1; 251628c58286SAlan Cox } 251712d7cc84SAlan Cox entry = entry->next; 251812d7cc84SAlan Cox } 251928c58286SAlan Cox } 252012d7cc84SAlan Cox last_timestamp = map->timestamp; 252112d7cc84SAlan Cox if (rv != KERN_SUCCESS) { 252228c58286SAlan Cox KASSERT(entry->wired_count == 1, 252328c58286SAlan Cox ("vm_map_wire: bad count")); 252412d7cc84SAlan Cox /* 252528c58286SAlan Cox * Assign an out-of-range value to represent 252628c58286SAlan Cox * the failure to wire this entry. 252712d7cc84SAlan Cox */ 252828c58286SAlan Cox entry->wired_count = -1; 252912d7cc84SAlan Cox end = entry->end; 253012d7cc84SAlan Cox goto done; 253112d7cc84SAlan Cox } 25320ada205eSBrian Feldman } else if (!user_wire || 25330ada205eSBrian Feldman (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 25340ada205eSBrian Feldman entry->wired_count++; 253512d7cc84SAlan Cox } 253612d7cc84SAlan Cox /* 253712d7cc84SAlan Cox * Check the map for holes in the specified region. 2538abd498aaSBruce M Simpson * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 253912d7cc84SAlan Cox */ 25406d7e8091SKonstantin Belousov next_entry: 2541abd498aaSBruce M Simpson if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && 2542abd498aaSBruce M Simpson (entry->end < end && (entry->next == &map->header || 2543abd498aaSBruce M Simpson entry->next->start > entry->end))) { 254412d7cc84SAlan Cox end = entry->end; 254512d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 254612d7cc84SAlan Cox goto done; 254712d7cc84SAlan Cox } 254812d7cc84SAlan Cox entry = entry->next; 254912d7cc84SAlan Cox } 255012d7cc84SAlan Cox rv = KERN_SUCCESS; 255112d7cc84SAlan Cox done: 255212d7cc84SAlan Cox need_wakeup = FALSE; 255312d7cc84SAlan Cox if (first_entry == NULL) { 255412d7cc84SAlan Cox result = vm_map_lookup_entry(map, start, &first_entry); 2555cbef13d8SAlan Cox if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 2556cbef13d8SAlan Cox first_entry = first_entry->next; 2557cbef13d8SAlan Cox else 255812d7cc84SAlan Cox KASSERT(result, ("vm_map_wire: lookup failed")); 255912d7cc84SAlan Cox } 25600acea7dfSKonstantin Belousov for (entry = first_entry; entry != &map->header && entry->start < end; 25610acea7dfSKonstantin Belousov entry = entry->next) { 25626d7e8091SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) 25636d7e8091SKonstantin Belousov goto next_entry_done; 25640acea7dfSKonstantin Belousov 25650acea7dfSKonstantin Belousov /* 25660acea7dfSKonstantin Belousov * If VM_MAP_WIRE_HOLESOK was specified, an empty 25670acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 25680acea7dfSKonstantin Belousov * while the map lock was dropped for faulting in the 25690acea7dfSKonstantin Belousov * pages or draining MAP_ENTRY_IN_TRANSITION. 25700acea7dfSKonstantin Belousov * Moreover, another thread could be simultaneously 25710acea7dfSKonstantin Belousov * wiring this new mapping entry. Detect these cases 25720acea7dfSKonstantin Belousov * and skip any entries marked as in transition by us. 25730acea7dfSKonstantin Belousov */ 25740acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 25750acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 25760acea7dfSKonstantin Belousov KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 25770acea7dfSKonstantin Belousov ("vm_map_wire: !HOLESOK and new/changed entry")); 25780acea7dfSKonstantin Belousov continue; 25790acea7dfSKonstantin Belousov } 25800acea7dfSKonstantin Belousov 258112d7cc84SAlan Cox if (rv == KERN_SUCCESS) { 258212d7cc84SAlan Cox if (user_wire) 258312d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_USER_WIRED; 258428c58286SAlan Cox } else if (entry->wired_count == -1) { 258528c58286SAlan Cox /* 258628c58286SAlan Cox * Wiring failed on this entry. Thus, unwiring is 258728c58286SAlan Cox * unnecessary. 258828c58286SAlan Cox */ 258928c58286SAlan Cox entry->wired_count = 0; 259012d7cc84SAlan Cox } else { 25910ada205eSBrian Feldman if (!user_wire || 25920ada205eSBrian Feldman (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) 259312d7cc84SAlan Cox entry->wired_count--; 25940ada205eSBrian Feldman if (entry->wired_count == 0) { 259512d7cc84SAlan Cox /* 259612d7cc84SAlan Cox * Retain the map lock. 259712d7cc84SAlan Cox */ 25984be14af9SAlan Cox vm_fault_unwire(map, entry->start, entry->end, 25994be14af9SAlan Cox entry->object.vm_object != NULL && 260028634820SAlan Cox (entry->object.vm_object->flags & 260128634820SAlan Cox OBJ_FICTITIOUS) != 0); 260212d7cc84SAlan Cox } 260312d7cc84SAlan Cox } 26046d7e8091SKonstantin Belousov next_entry_done: 26050acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 26060acea7dfSKonstantin Belousov ("vm_map_wire: in-transition flag missing %p", entry)); 26070acea7dfSKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 26080acea7dfSKonstantin Belousov ("vm_map_wire: alien wire %p", entry)); 26090acea7dfSKonstantin Belousov entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION | 26100acea7dfSKonstantin Belousov MAP_ENTRY_WIRE_SKIPPED); 26110acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 261212d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 261312d7cc84SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 261412d7cc84SAlan Cox need_wakeup = TRUE; 261512d7cc84SAlan Cox } 261612d7cc84SAlan Cox vm_map_simplify_entry(map, entry); 261712d7cc84SAlan Cox } 261812d7cc84SAlan Cox vm_map_unlock(map); 261912d7cc84SAlan Cox if (need_wakeup) 262012d7cc84SAlan Cox vm_map_wakeup(map); 262112d7cc84SAlan Cox return (rv); 2622e27e17b7SAlan Cox } 2623e27e17b7SAlan Cox 2624e27e17b7SAlan Cox /* 2625950f8459SAlan Cox * vm_map_sync 2626df8bae1dSRodney W. Grimes * 2627df8bae1dSRodney W. Grimes * Push any dirty cached pages in the address range to their pager. 2628df8bae1dSRodney W. Grimes * If syncio is TRUE, dirty pages are written synchronously. 2629df8bae1dSRodney W. Grimes * If invalidate is TRUE, any cached pages are freed as well. 2630df8bae1dSRodney W. Grimes * 2631637315edSAlan Cox * If the size of the region from start to end is zero, we are 2632637315edSAlan Cox * supposed to flush all modified pages within the region containing 2633637315edSAlan Cox * start. Unfortunately, a region can be split or coalesced with 2634637315edSAlan Cox * neighboring regions, making it difficult to determine what the 2635637315edSAlan Cox * original region was. Therefore, we approximate this requirement by 2636637315edSAlan Cox * flushing the current region containing start. 2637637315edSAlan Cox * 2638df8bae1dSRodney W. Grimes * Returns an error if any part of the specified range is not mapped. 2639df8bae1dSRodney W. Grimes */ 2640df8bae1dSRodney W. Grimes int 2641950f8459SAlan Cox vm_map_sync( 26421b40f8c0SMatthew Dillon vm_map_t map, 26431b40f8c0SMatthew Dillon vm_offset_t start, 26441b40f8c0SMatthew Dillon vm_offset_t end, 26451b40f8c0SMatthew Dillon boolean_t syncio, 26461b40f8c0SMatthew Dillon boolean_t invalidate) 2647df8bae1dSRodney W. Grimes { 2648c0877f10SJohn Dyson vm_map_entry_t current; 2649df8bae1dSRodney W. Grimes vm_map_entry_t entry; 2650df8bae1dSRodney W. Grimes vm_size_t size; 2651df8bae1dSRodney W. Grimes vm_object_t object; 2652a316d390SJohn Dyson vm_ooffset_t offset; 2653e53fa61bSKonstantin Belousov unsigned int last_timestamp; 2654126d6082SKonstantin Belousov boolean_t failed; 2655df8bae1dSRodney W. Grimes 2656df8bae1dSRodney W. Grimes vm_map_lock_read(map); 2657df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2658df8bae1dSRodney W. Grimes if (!vm_map_lookup_entry(map, start, &entry)) { 2659df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 2660df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 2661637315edSAlan Cox } else if (start == end) { 2662637315edSAlan Cox start = entry->start; 2663637315edSAlan Cox end = entry->end; 2664df8bae1dSRodney W. Grimes } 2665df8bae1dSRodney W. Grimes /* 2666b7b7cd44SAlan Cox * Make a first pass to check for user-wired memory and holes. 2667df8bae1dSRodney W. Grimes */ 26687b0e72d1SAlan Cox for (current = entry; current != &map->header && current->start < end; 26697b0e72d1SAlan Cox current = current->next) { 2670b7b7cd44SAlan Cox if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) { 2671df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 2672df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 2673df8bae1dSRodney W. Grimes } 2674df8bae1dSRodney W. Grimes if (end > current->end && 2675df8bae1dSRodney W. Grimes (current->next == &map->header || 2676df8bae1dSRodney W. Grimes current->end != current->next->start)) { 2677df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 2678df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 2679df8bae1dSRodney W. Grimes } 2680df8bae1dSRodney W. Grimes } 2681df8bae1dSRodney W. Grimes 26822cf13952SAlan Cox if (invalidate) 2683bc105a67SAlan Cox pmap_remove(map->pmap, start, end); 2684126d6082SKonstantin Belousov failed = FALSE; 26852cf13952SAlan Cox 2686df8bae1dSRodney W. Grimes /* 2687df8bae1dSRodney W. Grimes * Make a second pass, cleaning/uncaching pages from the indicated 2688df8bae1dSRodney W. Grimes * objects as we go. 2689df8bae1dSRodney W. Grimes */ 2690e53fa61bSKonstantin Belousov for (current = entry; current != &map->header && current->start < end;) { 2691df8bae1dSRodney W. Grimes offset = current->offset + (start - current->start); 2692df8bae1dSRodney W. Grimes size = (end <= current->end ? end : current->end) - start; 26939fdfe602SMatthew Dillon if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 2694c0877f10SJohn Dyson vm_map_t smap; 2695df8bae1dSRodney W. Grimes vm_map_entry_t tentry; 2696df8bae1dSRodney W. Grimes vm_size_t tsize; 2697df8bae1dSRodney W. Grimes 26989fdfe602SMatthew Dillon smap = current->object.sub_map; 2699df8bae1dSRodney W. Grimes vm_map_lock_read(smap); 2700df8bae1dSRodney W. Grimes (void) vm_map_lookup_entry(smap, offset, &tentry); 2701df8bae1dSRodney W. Grimes tsize = tentry->end - offset; 2702df8bae1dSRodney W. Grimes if (tsize < size) 2703df8bae1dSRodney W. Grimes size = tsize; 2704df8bae1dSRodney W. Grimes object = tentry->object.vm_object; 2705df8bae1dSRodney W. Grimes offset = tentry->offset + (offset - tentry->start); 2706df8bae1dSRodney W. Grimes vm_map_unlock_read(smap); 2707df8bae1dSRodney W. Grimes } else { 2708df8bae1dSRodney W. Grimes object = current->object.vm_object; 2709df8bae1dSRodney W. Grimes } 2710e53fa61bSKonstantin Belousov vm_object_reference(object); 2711e53fa61bSKonstantin Belousov last_timestamp = map->timestamp; 2712e53fa61bSKonstantin Belousov vm_map_unlock_read(map); 2713126d6082SKonstantin Belousov if (!vm_object_sync(object, offset, size, syncio, invalidate)) 2714126d6082SKonstantin Belousov failed = TRUE; 2715df8bae1dSRodney W. Grimes start += size; 2716e53fa61bSKonstantin Belousov vm_object_deallocate(object); 2717e53fa61bSKonstantin Belousov vm_map_lock_read(map); 2718e53fa61bSKonstantin Belousov if (last_timestamp == map->timestamp || 2719e53fa61bSKonstantin Belousov !vm_map_lookup_entry(map, start, ¤t)) 2720e53fa61bSKonstantin Belousov current = current->next; 2721df8bae1dSRodney W. Grimes } 2722df8bae1dSRodney W. Grimes 2723df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 2724126d6082SKonstantin Belousov return (failed ? KERN_FAILURE : KERN_SUCCESS); 2725df8bae1dSRodney W. Grimes } 2726df8bae1dSRodney W. Grimes 2727df8bae1dSRodney W. Grimes /* 2728df8bae1dSRodney W. Grimes * vm_map_entry_unwire: [ internal use only ] 2729df8bae1dSRodney W. Grimes * 2730df8bae1dSRodney W. Grimes * Make the region specified by this entry pageable. 2731df8bae1dSRodney W. Grimes * 2732df8bae1dSRodney W. Grimes * The map in question should be locked. 2733df8bae1dSRodney W. Grimes * [This is the reason for this routine's existence.] 2734df8bae1dSRodney W. Grimes */ 27350362d7d7SJohn Dyson static void 27361b40f8c0SMatthew Dillon vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 2737df8bae1dSRodney W. Grimes { 27384be14af9SAlan Cox vm_fault_unwire(map, entry->start, entry->end, 27394be14af9SAlan Cox entry->object.vm_object != NULL && 274028634820SAlan Cox (entry->object.vm_object->flags & OBJ_FICTITIOUS) != 0); 2741df8bae1dSRodney W. Grimes entry->wired_count = 0; 2742df8bae1dSRodney W. Grimes } 2743df8bae1dSRodney W. Grimes 27440b367bd8SKonstantin Belousov static void 27450b367bd8SKonstantin Belousov vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map) 27460b367bd8SKonstantin Belousov { 27470b367bd8SKonstantin Belousov 27480b367bd8SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) 27490b367bd8SKonstantin Belousov vm_object_deallocate(entry->object.vm_object); 27500b367bd8SKonstantin Belousov uma_zfree(system_map ? kmapentzone : mapentzone, entry); 27510b367bd8SKonstantin Belousov } 27520b367bd8SKonstantin Belousov 2753df8bae1dSRodney W. Grimes /* 2754df8bae1dSRodney W. Grimes * vm_map_entry_delete: [ internal use only ] 2755df8bae1dSRodney W. Grimes * 2756df8bae1dSRodney W. Grimes * Deallocate the given entry from the target map. 2757df8bae1dSRodney W. Grimes */ 27580362d7d7SJohn Dyson static void 27591b40f8c0SMatthew Dillon vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 2760df8bae1dSRodney W. Grimes { 276132a89c32SAlan Cox vm_object_t object; 27623364c323SKonstantin Belousov vm_pindex_t offidxstart, offidxend, count, size1; 27633364c323SKonstantin Belousov vm_ooffset_t size; 276432a89c32SAlan Cox 2765df8bae1dSRodney W. Grimes vm_map_entry_unlink(map, entry); 27663364c323SKonstantin Belousov object = entry->object.vm_object; 27673364c323SKonstantin Belousov size = entry->end - entry->start; 27683364c323SKonstantin Belousov map->size -= size; 27693364c323SKonstantin Belousov 2770ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 2771ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 2772ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 27733364c323SKonstantin Belousov } 2774df8bae1dSRodney W. Grimes 277532a89c32SAlan Cox if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 27763364c323SKonstantin Belousov (object != NULL)) { 2777ef694c1aSEdward Tomasz Napierala KASSERT(entry->cred == NULL || object->cred == NULL || 27783364c323SKonstantin Belousov (entry->eflags & MAP_ENTRY_NEEDS_COPY), 2779ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry)); 27803364c323SKonstantin Belousov count = OFF_TO_IDX(size); 278132a89c32SAlan Cox offidxstart = OFF_TO_IDX(entry->offset); 278232a89c32SAlan Cox offidxend = offidxstart + count; 278389f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 278432a89c32SAlan Cox if (object->ref_count != 1 && 278532a89c32SAlan Cox ((object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || 27869f5c801bSAlan Cox object == kernel_object || object == kmem_object)) { 278732a89c32SAlan Cox vm_object_collapse(object); 27886bbee8e2SAlan Cox 27896bbee8e2SAlan Cox /* 27906bbee8e2SAlan Cox * The option OBJPR_NOTMAPPED can be passed here 27916bbee8e2SAlan Cox * because vm_map_delete() already performed 27926bbee8e2SAlan Cox * pmap_remove() on the only mapping to this range 27936bbee8e2SAlan Cox * of pages. 27946bbee8e2SAlan Cox */ 27956bbee8e2SAlan Cox vm_object_page_remove(object, offidxstart, offidxend, 27966bbee8e2SAlan Cox OBJPR_NOTMAPPED); 279732a89c32SAlan Cox if (object->type == OBJT_SWAP) 279832a89c32SAlan Cox swap_pager_freespace(object, offidxstart, count); 279932a89c32SAlan Cox if (offidxend >= object->size && 28003364c323SKonstantin Belousov offidxstart < object->size) { 28013364c323SKonstantin Belousov size1 = object->size; 280232a89c32SAlan Cox object->size = offidxstart; 2803ef694c1aSEdward Tomasz Napierala if (object->cred != NULL) { 28043364c323SKonstantin Belousov size1 -= object->size; 28053364c323SKonstantin Belousov KASSERT(object->charge >= ptoa(size1), 28063364c323SKonstantin Belousov ("vm_map_entry_delete: object->charge < 0")); 2807ef694c1aSEdward Tomasz Napierala swap_release_by_cred(ptoa(size1), object->cred); 28083364c323SKonstantin Belousov object->charge -= ptoa(size1); 28093364c323SKonstantin Belousov } 28103364c323SKonstantin Belousov } 281132a89c32SAlan Cox } 281289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 2813897d81a0SKonstantin Belousov } else 2814897d81a0SKonstantin Belousov entry->object.vm_object = NULL; 28150b367bd8SKonstantin Belousov if (map->system_map) 28160b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, TRUE); 28170b367bd8SKonstantin Belousov else { 28180b367bd8SKonstantin Belousov entry->next = curthread->td_map_def_user; 28190b367bd8SKonstantin Belousov curthread->td_map_def_user = entry; 28200b367bd8SKonstantin Belousov } 2821df8bae1dSRodney W. Grimes } 2822df8bae1dSRodney W. Grimes 2823df8bae1dSRodney W. Grimes /* 2824df8bae1dSRodney W. Grimes * vm_map_delete: [ internal use only ] 2825df8bae1dSRodney W. Grimes * 2826df8bae1dSRodney W. Grimes * Deallocates the given address range from the target 2827df8bae1dSRodney W. Grimes * map. 2828df8bae1dSRodney W. Grimes */ 2829df8bae1dSRodney W. Grimes int 2830655c3490SKonstantin Belousov vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 2831df8bae1dSRodney W. Grimes { 2832c0877f10SJohn Dyson vm_map_entry_t entry; 2833df8bae1dSRodney W. Grimes vm_map_entry_t first_entry; 2834df8bae1dSRodney W. Grimes 28353a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 28363a0916b8SKonstantin Belousov 2837df8bae1dSRodney W. Grimes /* 2838df8bae1dSRodney W. Grimes * Find the start of the region, and clip it 2839df8bae1dSRodney W. Grimes */ 2840876318ecSAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) 2841df8bae1dSRodney W. Grimes entry = first_entry->next; 2842876318ecSAlan Cox else { 2843df8bae1dSRodney W. Grimes entry = first_entry; 2844df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 2845df8bae1dSRodney W. Grimes } 2846df8bae1dSRodney W. Grimes 2847df8bae1dSRodney W. Grimes /* 2848df8bae1dSRodney W. Grimes * Step through all entries in this region 2849df8bae1dSRodney W. Grimes */ 2850df8bae1dSRodney W. Grimes while ((entry != &map->header) && (entry->start < end)) { 2851df8bae1dSRodney W. Grimes vm_map_entry_t next; 2852df8bae1dSRodney W. Grimes 285373b2baceSAlan Cox /* 285473b2baceSAlan Cox * Wait for wiring or unwiring of an entry to complete. 28557c938963SBrian Feldman * Also wait for any system wirings to disappear on 28567c938963SBrian Feldman * user maps. 285773b2baceSAlan Cox */ 28587c938963SBrian Feldman if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || 28597c938963SBrian Feldman (vm_map_pmap(map) != kernel_pmap && 28607c938963SBrian Feldman vm_map_entry_system_wired_count(entry) != 0)) { 286173b2baceSAlan Cox unsigned int last_timestamp; 286273b2baceSAlan Cox vm_offset_t saved_start; 286373b2baceSAlan Cox vm_map_entry_t tmp_entry; 286473b2baceSAlan Cox 286573b2baceSAlan Cox saved_start = entry->start; 286673b2baceSAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 286773b2baceSAlan Cox last_timestamp = map->timestamp; 28688ce2d00aSPawel Jakub Dawidek (void) vm_map_unlock_and_wait(map, 0); 286973b2baceSAlan Cox vm_map_lock(map); 287073b2baceSAlan Cox if (last_timestamp + 1 != map->timestamp) { 287173b2baceSAlan Cox /* 287273b2baceSAlan Cox * Look again for the entry because the map was 287373b2baceSAlan Cox * modified while it was unlocked. 287473b2baceSAlan Cox * Specifically, the entry may have been 287573b2baceSAlan Cox * clipped, merged, or deleted. 287673b2baceSAlan Cox */ 287773b2baceSAlan Cox if (!vm_map_lookup_entry(map, saved_start, 287873b2baceSAlan Cox &tmp_entry)) 287973b2baceSAlan Cox entry = tmp_entry->next; 288073b2baceSAlan Cox else { 288173b2baceSAlan Cox entry = tmp_entry; 288273b2baceSAlan Cox vm_map_clip_start(map, entry, 288373b2baceSAlan Cox saved_start); 288473b2baceSAlan Cox } 288573b2baceSAlan Cox } 288673b2baceSAlan Cox continue; 288773b2baceSAlan Cox } 2888df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 2889df8bae1dSRodney W. Grimes 2890c0877f10SJohn Dyson next = entry->next; 2891df8bae1dSRodney W. Grimes 2892df8bae1dSRodney W. Grimes /* 28930d94caffSDavid Greenman * Unwire before removing addresses from the pmap; otherwise, 28940d94caffSDavid Greenman * unwiring will put the entries back in the pmap. 2895df8bae1dSRodney W. Grimes */ 2896c0877f10SJohn Dyson if (entry->wired_count != 0) { 2897df8bae1dSRodney W. Grimes vm_map_entry_unwire(map, entry); 2898c0877f10SJohn Dyson } 2899df8bae1dSRodney W. Grimes 290032a89c32SAlan Cox pmap_remove(map->pmap, entry->start, entry->end); 2901df8bae1dSRodney W. Grimes 2902df8bae1dSRodney W. Grimes /* 2903e608cc3cSKonstantin Belousov * Delete the entry only after removing all pmap 2904e608cc3cSKonstantin Belousov * entries pointing to its pages. (Otherwise, its 2905e608cc3cSKonstantin Belousov * page frames may be reallocated, and any modify bits 2906e608cc3cSKonstantin Belousov * will be set in the wrong object!) 2907df8bae1dSRodney W. Grimes */ 2908df8bae1dSRodney W. Grimes vm_map_entry_delete(map, entry); 2909df8bae1dSRodney W. Grimes entry = next; 2910df8bae1dSRodney W. Grimes } 2911df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 2912df8bae1dSRodney W. Grimes } 2913df8bae1dSRodney W. Grimes 2914df8bae1dSRodney W. Grimes /* 2915df8bae1dSRodney W. Grimes * vm_map_remove: 2916df8bae1dSRodney W. Grimes * 2917df8bae1dSRodney W. Grimes * Remove the given address range from the target map. 2918df8bae1dSRodney W. Grimes * This is the exported form of vm_map_delete. 2919df8bae1dSRodney W. Grimes */ 2920df8bae1dSRodney W. Grimes int 29211b40f8c0SMatthew Dillon vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 2922df8bae1dSRodney W. Grimes { 29236eaee3feSAlan Cox int result; 2924df8bae1dSRodney W. Grimes 2925df8bae1dSRodney W. Grimes vm_map_lock(map); 2926df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2927655c3490SKonstantin Belousov result = vm_map_delete(map, start, end); 2928df8bae1dSRodney W. Grimes vm_map_unlock(map); 2929df8bae1dSRodney W. Grimes return (result); 2930df8bae1dSRodney W. Grimes } 2931df8bae1dSRodney W. Grimes 2932df8bae1dSRodney W. Grimes /* 2933df8bae1dSRodney W. Grimes * vm_map_check_protection: 2934df8bae1dSRodney W. Grimes * 29352d5c7e45SMatthew Dillon * Assert that the target map allows the specified privilege on the 29362d5c7e45SMatthew Dillon * entire address region given. The entire region must be allocated. 29372d5c7e45SMatthew Dillon * 29382d5c7e45SMatthew Dillon * WARNING! This code does not and should not check whether the 29392d5c7e45SMatthew Dillon * contents of the region is accessible. For example a smaller file 29402d5c7e45SMatthew Dillon * might be mapped into a larger address space. 29412d5c7e45SMatthew Dillon * 29422d5c7e45SMatthew Dillon * NOTE! This code is also called by munmap(). 2943d8834602SAlan Cox * 2944d8834602SAlan Cox * The map must be locked. A read lock is sufficient. 2945df8bae1dSRodney W. Grimes */ 29460d94caffSDavid Greenman boolean_t 2947b9dcd593SBruce Evans vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 2948b9dcd593SBruce Evans vm_prot_t protection) 2949df8bae1dSRodney W. Grimes { 2950c0877f10SJohn Dyson vm_map_entry_t entry; 2951df8bae1dSRodney W. Grimes vm_map_entry_t tmp_entry; 2952df8bae1dSRodney W. Grimes 2953d8834602SAlan Cox if (!vm_map_lookup_entry(map, start, &tmp_entry)) 2954df8bae1dSRodney W. Grimes return (FALSE); 2955df8bae1dSRodney W. Grimes entry = tmp_entry; 2956df8bae1dSRodney W. Grimes 2957df8bae1dSRodney W. Grimes while (start < end) { 2958d8834602SAlan Cox if (entry == &map->header) 2959df8bae1dSRodney W. Grimes return (FALSE); 2960df8bae1dSRodney W. Grimes /* 2961df8bae1dSRodney W. Grimes * No holes allowed! 2962df8bae1dSRodney W. Grimes */ 2963d8834602SAlan Cox if (start < entry->start) 2964df8bae1dSRodney W. Grimes return (FALSE); 2965df8bae1dSRodney W. Grimes /* 2966df8bae1dSRodney W. Grimes * Check protection associated with entry. 2967df8bae1dSRodney W. Grimes */ 2968d8834602SAlan Cox if ((entry->protection & protection) != protection) 2969df8bae1dSRodney W. Grimes return (FALSE); 2970df8bae1dSRodney W. Grimes /* go to next entry */ 2971df8bae1dSRodney W. Grimes start = entry->end; 2972df8bae1dSRodney W. Grimes entry = entry->next; 2973df8bae1dSRodney W. Grimes } 2974df8bae1dSRodney W. Grimes return (TRUE); 2975df8bae1dSRodney W. Grimes } 2976df8bae1dSRodney W. Grimes 297786524867SJohn Dyson /* 2978df8bae1dSRodney W. Grimes * vm_map_copy_entry: 2979df8bae1dSRodney W. Grimes * 2980df8bae1dSRodney W. Grimes * Copies the contents of the source entry to the destination 2981df8bae1dSRodney W. Grimes * entry. The entries *must* be aligned properly. 2982df8bae1dSRodney W. Grimes */ 2983f708ef1bSPoul-Henning Kamp static void 29841b40f8c0SMatthew Dillon vm_map_copy_entry( 29851b40f8c0SMatthew Dillon vm_map_t src_map, 29861b40f8c0SMatthew Dillon vm_map_t dst_map, 29871b40f8c0SMatthew Dillon vm_map_entry_t src_entry, 29883364c323SKonstantin Belousov vm_map_entry_t dst_entry, 29893364c323SKonstantin Belousov vm_ooffset_t *fork_charge) 2990df8bae1dSRodney W. Grimes { 2991c0877f10SJohn Dyson vm_object_t src_object; 299284110e7eSKonstantin Belousov vm_map_entry_t fake_entry; 29933364c323SKonstantin Belousov vm_offset_t size; 2994ef694c1aSEdward Tomasz Napierala struct ucred *cred; 29953364c323SKonstantin Belousov int charged; 2996c0877f10SJohn Dyson 29973a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(dst_map); 29983a0916b8SKonstantin Belousov 29999fdfe602SMatthew Dillon if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 3000df8bae1dSRodney W. Grimes return; 3001df8bae1dSRodney W. Grimes 3002df8bae1dSRodney W. Grimes if (src_entry->wired_count == 0) { 3003df8bae1dSRodney W. Grimes 3004df8bae1dSRodney W. Grimes /* 30050d94caffSDavid Greenman * If the source entry is marked needs_copy, it is already 30060d94caffSDavid Greenman * write-protected. 3007df8bae1dSRodney W. Grimes */ 3008afa07f7eSJohn Dyson if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 3009df8bae1dSRodney W. Grimes pmap_protect(src_map->pmap, 3010df8bae1dSRodney W. Grimes src_entry->start, 3011df8bae1dSRodney W. Grimes src_entry->end, 3012df8bae1dSRodney W. Grimes src_entry->protection & ~VM_PROT_WRITE); 3013df8bae1dSRodney W. Grimes } 3014b18bfc3dSJohn Dyson 3015df8bae1dSRodney W. Grimes /* 3016df8bae1dSRodney W. Grimes * Make a copy of the object. 3017df8bae1dSRodney W. Grimes */ 30183364c323SKonstantin Belousov size = src_entry->end - src_entry->start; 30198aef1712SMatthew Dillon if ((src_object = src_entry->object.vm_object) != NULL) { 302089f6b863SAttilio Rao VM_OBJECT_WLOCK(src_object); 30213364c323SKonstantin Belousov charged = ENTRY_CHARGED(src_entry); 3022c0877f10SJohn Dyson if ((src_object->handle == NULL) && 3023c0877f10SJohn Dyson (src_object->type == OBJT_DEFAULT || 3024c0877f10SJohn Dyson src_object->type == OBJT_SWAP)) { 3025c0877f10SJohn Dyson vm_object_collapse(src_object); 302696fb8cf2SJohn Dyson if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 3027c5aaa06dSAlan Cox vm_object_split(src_entry); 3028c0877f10SJohn Dyson src_object = src_entry->object.vm_object; 3029a89c6258SAlan Cox } 3030a89c6258SAlan Cox } 3031b921a12bSAlan Cox vm_object_reference_locked(src_object); 3032069e9bc1SDoug Rabson vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 3033ef694c1aSEdward Tomasz Napierala if (src_entry->cred != NULL && 30343364c323SKonstantin Belousov !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 3035ef694c1aSEdward Tomasz Napierala KASSERT(src_object->cred == NULL, 3036ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_map_copy_entry: cred %p", 30373364c323SKonstantin Belousov src_object)); 3038ef694c1aSEdward Tomasz Napierala src_object->cred = src_entry->cred; 30393364c323SKonstantin Belousov src_object->charge = size; 30403364c323SKonstantin Belousov } 304189f6b863SAttilio Rao VM_OBJECT_WUNLOCK(src_object); 3042c0877f10SJohn Dyson dst_entry->object.vm_object = src_object; 30433364c323SKonstantin Belousov if (charged) { 3044ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 3045ef694c1aSEdward Tomasz Napierala crhold(cred); 3046ef694c1aSEdward Tomasz Napierala dst_entry->cred = cred; 30473364c323SKonstantin Belousov *fork_charge += size; 30483364c323SKonstantin Belousov if (!(src_entry->eflags & 30493364c323SKonstantin Belousov MAP_ENTRY_NEEDS_COPY)) { 3050ef694c1aSEdward Tomasz Napierala crhold(cred); 3051ef694c1aSEdward Tomasz Napierala src_entry->cred = cred; 30523364c323SKonstantin Belousov *fork_charge += size; 30533364c323SKonstantin Belousov } 30543364c323SKonstantin Belousov } 3055afa07f7eSJohn Dyson src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 3056afa07f7eSJohn Dyson dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 3057b18bfc3dSJohn Dyson dst_entry->offset = src_entry->offset; 305884110e7eSKonstantin Belousov if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 305984110e7eSKonstantin Belousov /* 306084110e7eSKonstantin Belousov * MAP_ENTRY_VN_WRITECNT cannot 306184110e7eSKonstantin Belousov * indicate write reference from 306284110e7eSKonstantin Belousov * src_entry, since the entry is 306384110e7eSKonstantin Belousov * marked as needs copy. Allocate a 306484110e7eSKonstantin Belousov * fake entry that is used to 306584110e7eSKonstantin Belousov * decrement object->un_pager.vnp.writecount 306684110e7eSKonstantin Belousov * at the appropriate time. Attach 306784110e7eSKonstantin Belousov * fake_entry to the deferred list. 306884110e7eSKonstantin Belousov */ 306984110e7eSKonstantin Belousov fake_entry = vm_map_entry_create(dst_map); 307084110e7eSKonstantin Belousov fake_entry->eflags = MAP_ENTRY_VN_WRITECNT; 307184110e7eSKonstantin Belousov src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; 307284110e7eSKonstantin Belousov vm_object_reference(src_object); 307384110e7eSKonstantin Belousov fake_entry->object.vm_object = src_object; 307484110e7eSKonstantin Belousov fake_entry->start = src_entry->start; 307584110e7eSKonstantin Belousov fake_entry->end = src_entry->end; 307684110e7eSKonstantin Belousov fake_entry->next = curthread->td_map_def_user; 307784110e7eSKonstantin Belousov curthread->td_map_def_user = fake_entry; 307884110e7eSKonstantin Belousov } 3079b18bfc3dSJohn Dyson } else { 3080b18bfc3dSJohn Dyson dst_entry->object.vm_object = NULL; 3081b18bfc3dSJohn Dyson dst_entry->offset = 0; 3082ef694c1aSEdward Tomasz Napierala if (src_entry->cred != NULL) { 3083ef694c1aSEdward Tomasz Napierala dst_entry->cred = curthread->td_ucred; 3084ef694c1aSEdward Tomasz Napierala crhold(dst_entry->cred); 30853364c323SKonstantin Belousov *fork_charge += size; 30863364c323SKonstantin Belousov } 3087b18bfc3dSJohn Dyson } 3088df8bae1dSRodney W. Grimes 3089df8bae1dSRodney W. Grimes pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 3090df8bae1dSRodney W. Grimes dst_entry->end - dst_entry->start, src_entry->start); 30910d94caffSDavid Greenman } else { 3092df8bae1dSRodney W. Grimes /* 3093df8bae1dSRodney W. Grimes * Of course, wired down pages can't be set copy-on-write. 30940d94caffSDavid Greenman * Cause wired pages to be copied into the new map by 30950d94caffSDavid Greenman * simulating faults (the new pages are pageable) 3096df8bae1dSRodney W. Grimes */ 3097121fd461SKonstantin Belousov vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry, 3098121fd461SKonstantin Belousov fork_charge); 3099df8bae1dSRodney W. Grimes } 3100df8bae1dSRodney W. Grimes } 3101df8bae1dSRodney W. Grimes 3102df8bae1dSRodney W. Grimes /* 31032a7be1b6SBrian Feldman * vmspace_map_entry_forked: 31042a7be1b6SBrian Feldman * Update the newly-forked vmspace each time a map entry is inherited 31052a7be1b6SBrian Feldman * or copied. The values for vm_dsize and vm_tsize are approximate 31062a7be1b6SBrian Feldman * (and mostly-obsolete ideas in the face of mmap(2) et al.) 31072a7be1b6SBrian Feldman */ 31082a7be1b6SBrian Feldman static void 31092a7be1b6SBrian Feldman vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, 31102a7be1b6SBrian Feldman vm_map_entry_t entry) 31112a7be1b6SBrian Feldman { 31122a7be1b6SBrian Feldman vm_size_t entrysize; 31132a7be1b6SBrian Feldman vm_offset_t newend; 31142a7be1b6SBrian Feldman 31152a7be1b6SBrian Feldman entrysize = entry->end - entry->start; 31162a7be1b6SBrian Feldman vm2->vm_map.size += entrysize; 31172a7be1b6SBrian Feldman if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) { 31182a7be1b6SBrian Feldman vm2->vm_ssize += btoc(entrysize); 31192a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_daddr && 31202a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) { 3121b351299cSAndrew Gallatin newend = MIN(entry->end, 31222a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)); 31232a7be1b6SBrian Feldman vm2->vm_dsize += btoc(newend - entry->start); 31242a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_taddr && 31252a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) { 3126b351299cSAndrew Gallatin newend = MIN(entry->end, 31272a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); 31282a7be1b6SBrian Feldman vm2->vm_tsize += btoc(newend - entry->start); 31292a7be1b6SBrian Feldman } 31302a7be1b6SBrian Feldman } 31312a7be1b6SBrian Feldman 31322a7be1b6SBrian Feldman /* 3133df8bae1dSRodney W. Grimes * vmspace_fork: 3134df8bae1dSRodney W. Grimes * Create a new process vmspace structure and vm_map 3135df8bae1dSRodney W. Grimes * based on those of an existing process. The new map 3136df8bae1dSRodney W. Grimes * is based on the old map, according to the inheritance 3137df8bae1dSRodney W. Grimes * values on the regions in that map. 3138df8bae1dSRodney W. Grimes * 31392a7be1b6SBrian Feldman * XXX It might be worth coalescing the entries added to the new vmspace. 31402a7be1b6SBrian Feldman * 3141df8bae1dSRodney W. Grimes * The source map must not be locked. 3142df8bae1dSRodney W. Grimes */ 3143df8bae1dSRodney W. Grimes struct vmspace * 31443364c323SKonstantin Belousov vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) 3145df8bae1dSRodney W. Grimes { 3146c0877f10SJohn Dyson struct vmspace *vm2; 314779e53838SAlan Cox vm_map_t new_map, old_map; 314879e53838SAlan Cox vm_map_entry_t new_entry, old_entry; 3149de5f6a77SJohn Dyson vm_object_t object; 31501fac7d7fSKonstantin Belousov int locked; 3151df8bae1dSRodney W. Grimes 315279e53838SAlan Cox old_map = &vm1->vm_map; 315379e53838SAlan Cox /* Copy immutable fields of vm1 to vm2. */ 315474d1d2b7SNeel Natu vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, NULL); 315589b57fcfSKonstantin Belousov if (vm2 == NULL) 315679e53838SAlan Cox return (NULL); 31572a7be1b6SBrian Feldman vm2->vm_taddr = vm1->vm_taddr; 31582a7be1b6SBrian Feldman vm2->vm_daddr = vm1->vm_daddr; 31592a7be1b6SBrian Feldman vm2->vm_maxsaddr = vm1->vm_maxsaddr; 316079e53838SAlan Cox vm_map_lock(old_map); 316179e53838SAlan Cox if (old_map->busy) 316279e53838SAlan Cox vm_map_wait_busy(old_map); 316379e53838SAlan Cox new_map = &vm2->vm_map; 31641fac7d7fSKonstantin Belousov locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ 31651fac7d7fSKonstantin Belousov KASSERT(locked, ("vmspace_fork: lock failed")); 3166df8bae1dSRodney W. Grimes 3167df8bae1dSRodney W. Grimes old_entry = old_map->header.next; 3168df8bae1dSRodney W. Grimes 3169df8bae1dSRodney W. Grimes while (old_entry != &old_map->header) { 3170afa07f7eSJohn Dyson if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 3171df8bae1dSRodney W. Grimes panic("vm_map_fork: encountered a submap"); 3172df8bae1dSRodney W. Grimes 3173df8bae1dSRodney W. Grimes switch (old_entry->inheritance) { 3174df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 3175df8bae1dSRodney W. Grimes break; 3176df8bae1dSRodney W. Grimes 3177df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 3178df8bae1dSRodney W. Grimes /* 3179fed9a903SJohn Dyson * Clone the entry, creating the shared object if necessary. 3180fed9a903SJohn Dyson */ 3181fed9a903SJohn Dyson object = old_entry->object.vm_object; 3182fed9a903SJohn Dyson if (object == NULL) { 3183fed9a903SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 3184c2e11a03SJohn Dyson atop(old_entry->end - old_entry->start)); 3185fed9a903SJohn Dyson old_entry->object.vm_object = object; 318615d2d313SAlan Cox old_entry->offset = 0; 3187ef694c1aSEdward Tomasz Napierala if (old_entry->cred != NULL) { 3188ef694c1aSEdward Tomasz Napierala object->cred = old_entry->cred; 31893364c323SKonstantin Belousov object->charge = old_entry->end - 31903364c323SKonstantin Belousov old_entry->start; 3191ef694c1aSEdward Tomasz Napierala old_entry->cred = NULL; 31923364c323SKonstantin Belousov } 31939a2f6362SAlan Cox } 31949a2f6362SAlan Cox 31959a2f6362SAlan Cox /* 31969a2f6362SAlan Cox * Add the reference before calling vm_object_shadow 31979a2f6362SAlan Cox * to insure that a shadow object is created. 31989a2f6362SAlan Cox */ 31999a2f6362SAlan Cox vm_object_reference(object); 32009a2f6362SAlan Cox if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 32015069bf57SJohn Dyson vm_object_shadow(&old_entry->object.vm_object, 32025069bf57SJohn Dyson &old_entry->offset, 32030cc74f14SAlan Cox old_entry->end - old_entry->start); 32045069bf57SJohn Dyson old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 3205d30344bdSIan Dowse /* Transfer the second reference too. */ 3206d30344bdSIan Dowse vm_object_reference( 3207d30344bdSIan Dowse old_entry->object.vm_object); 32087fd10fb3SKonstantin Belousov 32097fd10fb3SKonstantin Belousov /* 32107fd10fb3SKonstantin Belousov * As in vm_map_simplify_entry(), the 3211b0994946SKonstantin Belousov * vnode lock will not be acquired in 32127fd10fb3SKonstantin Belousov * this call to vm_object_deallocate(). 32137fd10fb3SKonstantin Belousov */ 3214d30344bdSIan Dowse vm_object_deallocate(object); 32155069bf57SJohn Dyson object = old_entry->object.vm_object; 3216fed9a903SJohn Dyson } 321789f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 3218069e9bc1SDoug Rabson vm_object_clear_flag(object, OBJ_ONEMAPPING); 3219ef694c1aSEdward Tomasz Napierala if (old_entry->cred != NULL) { 3220ef694c1aSEdward Tomasz Napierala KASSERT(object->cred == NULL, ("vmspace_fork both cred")); 3221ef694c1aSEdward Tomasz Napierala object->cred = old_entry->cred; 32223364c323SKonstantin Belousov object->charge = old_entry->end - old_entry->start; 3223ef694c1aSEdward Tomasz Napierala old_entry->cred = NULL; 32243364c323SKonstantin Belousov } 3225b9781cf6SKonstantin Belousov 3226b9781cf6SKonstantin Belousov /* 3227b9781cf6SKonstantin Belousov * Assert the correct state of the vnode 3228b9781cf6SKonstantin Belousov * v_writecount while the object is locked, to 3229b9781cf6SKonstantin Belousov * not relock it later for the assertion 3230b9781cf6SKonstantin Belousov * correctness. 3231b9781cf6SKonstantin Belousov */ 3232b9781cf6SKonstantin Belousov if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT && 3233b9781cf6SKonstantin Belousov object->type == OBJT_VNODE) { 3234b9781cf6SKonstantin Belousov KASSERT(((struct vnode *)object->handle)-> 3235b9781cf6SKonstantin Belousov v_writecount > 0, 3236b9781cf6SKonstantin Belousov ("vmspace_fork: v_writecount %p", object)); 3237b9781cf6SKonstantin Belousov KASSERT(object->un_pager.vnp.writemappings > 0, 3238b9781cf6SKonstantin Belousov ("vmspace_fork: vnp.writecount %p", 3239b9781cf6SKonstantin Belousov object)); 3240b9781cf6SKonstantin Belousov } 324189f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 3242fed9a903SJohn Dyson 3243fed9a903SJohn Dyson /* 3244ad5fca3bSAlan Cox * Clone the entry, referencing the shared object. 3245df8bae1dSRodney W. Grimes */ 3246df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 3247df8bae1dSRodney W. Grimes *new_entry = *old_entry; 32489f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 32499f6acfd1SKonstantin Belousov MAP_ENTRY_IN_TRANSITION); 32500acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 3251df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 325284110e7eSKonstantin Belousov if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 325384110e7eSKonstantin Belousov vnode_pager_update_writecount(object, 325484110e7eSKonstantin Belousov new_entry->start, new_entry->end); 325584110e7eSKonstantin Belousov } 3256df8bae1dSRodney W. Grimes 3257df8bae1dSRodney W. Grimes /* 32580d94caffSDavid Greenman * Insert the entry into the new map -- we know we're 32590d94caffSDavid Greenman * inserting at the end of the new map. 3260df8bae1dSRodney W. Grimes */ 3261df8bae1dSRodney W. Grimes vm_map_entry_link(new_map, new_map->header.prev, 3262df8bae1dSRodney W. Grimes new_entry); 32632a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 3264df8bae1dSRodney W. Grimes 3265df8bae1dSRodney W. Grimes /* 3266df8bae1dSRodney W. Grimes * Update the physical map 3267df8bae1dSRodney W. Grimes */ 3268df8bae1dSRodney W. Grimes pmap_copy(new_map->pmap, old_map->pmap, 3269df8bae1dSRodney W. Grimes new_entry->start, 3270df8bae1dSRodney W. Grimes (old_entry->end - old_entry->start), 3271df8bae1dSRodney W. Grimes old_entry->start); 3272df8bae1dSRodney W. Grimes break; 3273df8bae1dSRodney W. Grimes 3274df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 3275df8bae1dSRodney W. Grimes /* 3276df8bae1dSRodney W. Grimes * Clone the entry and link into the map. 3277df8bae1dSRodney W. Grimes */ 3278df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 3279df8bae1dSRodney W. Grimes *new_entry = *old_entry; 328084110e7eSKonstantin Belousov /* 328184110e7eSKonstantin Belousov * Copied entry is COW over the old object. 328284110e7eSKonstantin Belousov */ 32839f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 328484110e7eSKonstantin Belousov MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); 32850acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 3286df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 3287df8bae1dSRodney W. Grimes new_entry->object.vm_object = NULL; 3288ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 3289df8bae1dSRodney W. Grimes vm_map_entry_link(new_map, new_map->header.prev, 3290df8bae1dSRodney W. Grimes new_entry); 32912a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 3292bd7e5f99SJohn Dyson vm_map_copy_entry(old_map, new_map, old_entry, 32933364c323SKonstantin Belousov new_entry, fork_charge); 3294df8bae1dSRodney W. Grimes break; 3295df8bae1dSRodney W. Grimes } 3296df8bae1dSRodney W. Grimes old_entry = old_entry->next; 3297df8bae1dSRodney W. Grimes } 329884110e7eSKonstantin Belousov /* 329984110e7eSKonstantin Belousov * Use inlined vm_map_unlock() to postpone handling the deferred 330084110e7eSKonstantin Belousov * map entries, which cannot be done until both old_map and 330184110e7eSKonstantin Belousov * new_map locks are released. 330284110e7eSKonstantin Belousov */ 330384110e7eSKonstantin Belousov sx_xunlock(&old_map->lock); 330484110e7eSKonstantin Belousov sx_xunlock(&new_map->lock); 330584110e7eSKonstantin Belousov vm_map_process_deferred(); 3306df8bae1dSRodney W. Grimes 3307df8bae1dSRodney W. Grimes return (vm2); 3308df8bae1dSRodney W. Grimes } 3309df8bae1dSRodney W. Grimes 331094f7e29aSAlan Cox int 331194f7e29aSAlan Cox vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 331294f7e29aSAlan Cox vm_prot_t prot, vm_prot_t max, int cow) 331394f7e29aSAlan Cox { 3314fd75d710SMarcel Moolenaar vm_map_entry_t new_entry, prev_entry; 3315fd75d710SMarcel Moolenaar vm_offset_t bot, top; 3316cfe52ecfSAndrey Zonov vm_size_t growsize, init_ssize; 3317fd75d710SMarcel Moolenaar int orient, rv; 33187e19eda4SAndrey Zonov rlim_t lmemlim, vmemlim; 331994f7e29aSAlan Cox 3320fd75d710SMarcel Moolenaar /* 3321fd75d710SMarcel Moolenaar * The stack orientation is piggybacked with the cow argument. 3322fd75d710SMarcel Moolenaar * Extract it into orient and mask the cow argument so that we 3323fd75d710SMarcel Moolenaar * don't pass it around further. 3324fd75d710SMarcel Moolenaar * NOTE: We explicitly allow bi-directional stacks. 3325fd75d710SMarcel Moolenaar */ 3326fd75d710SMarcel Moolenaar orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP); 3327fd75d710SMarcel Moolenaar cow &= ~orient; 3328fd75d710SMarcel Moolenaar KASSERT(orient != 0, ("No stack grow direction")); 3329fd75d710SMarcel Moolenaar 333077bc7900SKonstantin Belousov if (addrbos < vm_map_min(map) || 333177bc7900SKonstantin Belousov addrbos > vm_map_max(map) || 333277bc7900SKonstantin Belousov addrbos + max_ssize < addrbos) 333394f7e29aSAlan Cox return (KERN_NO_SPACE); 3334fd75d710SMarcel Moolenaar 3335cfe52ecfSAndrey Zonov growsize = sgrowsiz; 3336cfe52ecfSAndrey Zonov init_ssize = (max_ssize < growsize) ? max_ssize : growsize; 333794f7e29aSAlan Cox 33387e19eda4SAndrey Zonov PROC_LOCK(curproc); 33397e19eda4SAndrey Zonov lmemlim = lim_cur(curproc, RLIMIT_MEMLOCK); 33407e19eda4SAndrey Zonov vmemlim = lim_cur(curproc, RLIMIT_VMEM); 33417e19eda4SAndrey Zonov PROC_UNLOCK(curproc); 334291d5354aSJohn Baldwin 334394f7e29aSAlan Cox vm_map_lock(map); 334494f7e29aSAlan Cox 334594f7e29aSAlan Cox /* If addr is already mapped, no go */ 334694f7e29aSAlan Cox if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { 334794f7e29aSAlan Cox vm_map_unlock(map); 334894f7e29aSAlan Cox return (KERN_NO_SPACE); 334994f7e29aSAlan Cox } 335094f7e29aSAlan Cox 33517e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 33523ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + init_ssize > lmemlim) { 33537e19eda4SAndrey Zonov vm_map_unlock(map); 33547e19eda4SAndrey Zonov return (KERN_NO_SPACE); 33557e19eda4SAndrey Zonov } 33567e19eda4SAndrey Zonov } 33577e19eda4SAndrey Zonov 3358a69ac174SMatthew Dillon /* If we would blow our VMEM resource limit, no go */ 335991d5354aSJohn Baldwin if (map->size + init_ssize > vmemlim) { 3360a69ac174SMatthew Dillon vm_map_unlock(map); 3361a69ac174SMatthew Dillon return (KERN_NO_SPACE); 3362a69ac174SMatthew Dillon } 3363a69ac174SMatthew Dillon 3364fd75d710SMarcel Moolenaar /* 3365fd75d710SMarcel Moolenaar * If we can't accomodate max_ssize in the current mapping, no go. 3366fd75d710SMarcel Moolenaar * However, we need to be aware that subsequent user mappings might 3367fd75d710SMarcel Moolenaar * map into the space we have reserved for stack, and currently this 3368fd75d710SMarcel Moolenaar * space is not protected. 336994f7e29aSAlan Cox * 3370fd75d710SMarcel Moolenaar * Hopefully we will at least detect this condition when we try to 3371fd75d710SMarcel Moolenaar * grow the stack. 337294f7e29aSAlan Cox */ 337394f7e29aSAlan Cox if ((prev_entry->next != &map->header) && 337494f7e29aSAlan Cox (prev_entry->next->start < addrbos + max_ssize)) { 337594f7e29aSAlan Cox vm_map_unlock(map); 337694f7e29aSAlan Cox return (KERN_NO_SPACE); 337794f7e29aSAlan Cox } 337894f7e29aSAlan Cox 3379fd75d710SMarcel Moolenaar /* 3380fd75d710SMarcel Moolenaar * We initially map a stack of only init_ssize. We will grow as 3381fd75d710SMarcel Moolenaar * needed later. Depending on the orientation of the stack (i.e. 3382fd75d710SMarcel Moolenaar * the grow direction) we either map at the top of the range, the 3383fd75d710SMarcel Moolenaar * bottom of the range or in the middle. 338494f7e29aSAlan Cox * 3385fd75d710SMarcel Moolenaar * Note: we would normally expect prot and max to be VM_PROT_ALL, 3386fd75d710SMarcel Moolenaar * and cow to be 0. Possibly we should eliminate these as input 3387fd75d710SMarcel Moolenaar * parameters, and just pass these values here in the insert call. 338894f7e29aSAlan Cox */ 3389fd75d710SMarcel Moolenaar if (orient == MAP_STACK_GROWS_DOWN) 3390fd75d710SMarcel Moolenaar bot = addrbos + max_ssize - init_ssize; 3391fd75d710SMarcel Moolenaar else if (orient == MAP_STACK_GROWS_UP) 3392fd75d710SMarcel Moolenaar bot = addrbos; 3393fd75d710SMarcel Moolenaar else 3394fd75d710SMarcel Moolenaar bot = round_page(addrbos + max_ssize/2 - init_ssize/2); 3395fd75d710SMarcel Moolenaar top = bot + init_ssize; 3396fd75d710SMarcel Moolenaar rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow); 339794f7e29aSAlan Cox 3398fd75d710SMarcel Moolenaar /* Now set the avail_ssize amount. */ 339994f7e29aSAlan Cox if (rv == KERN_SUCCESS) { 340029b45e9eSAlan Cox if (prev_entry != &map->header) 3401fd75d710SMarcel Moolenaar vm_map_clip_end(map, prev_entry, bot); 3402fd75d710SMarcel Moolenaar new_entry = prev_entry->next; 3403fd75d710SMarcel Moolenaar if (new_entry->end != top || new_entry->start != bot) 340494f7e29aSAlan Cox panic("Bad entry start/end for new stack entry"); 3405b21a0008SMarcel Moolenaar 3406fd75d710SMarcel Moolenaar new_entry->avail_ssize = max_ssize - init_ssize; 3407fd75d710SMarcel Moolenaar if (orient & MAP_STACK_GROWS_DOWN) 3408fd75d710SMarcel Moolenaar new_entry->eflags |= MAP_ENTRY_GROWS_DOWN; 3409fd75d710SMarcel Moolenaar if (orient & MAP_STACK_GROWS_UP) 3410fd75d710SMarcel Moolenaar new_entry->eflags |= MAP_ENTRY_GROWS_UP; 341194f7e29aSAlan Cox } 341294f7e29aSAlan Cox 341394f7e29aSAlan Cox vm_map_unlock(map); 341494f7e29aSAlan Cox return (rv); 341594f7e29aSAlan Cox } 341694f7e29aSAlan Cox 34179a6d144fSKonstantin Belousov static int stack_guard_page = 0; 34189a6d144fSKonstantin Belousov TUNABLE_INT("security.bsd.stack_guard_page", &stack_guard_page); 34199a6d144fSKonstantin Belousov SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RW, 34209a6d144fSKonstantin Belousov &stack_guard_page, 0, 34219a6d144fSKonstantin Belousov "Insert stack guard page ahead of the growable segments."); 34229a6d144fSKonstantin Belousov 342394f7e29aSAlan Cox /* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the 342494f7e29aSAlan Cox * desired address is already mapped, or if we successfully grow 342594f7e29aSAlan Cox * the stack. Also returns KERN_SUCCESS if addr is outside the 342694f7e29aSAlan Cox * stack range (this is strange, but preserves compatibility with 342794f7e29aSAlan Cox * the grow function in vm_machdep.c). 342894f7e29aSAlan Cox */ 342994f7e29aSAlan Cox int 343094f7e29aSAlan Cox vm_map_growstack(struct proc *p, vm_offset_t addr) 343194f7e29aSAlan Cox { 3432b21a0008SMarcel Moolenaar vm_map_entry_t next_entry, prev_entry; 3433b21a0008SMarcel Moolenaar vm_map_entry_t new_entry, stack_entry; 343494f7e29aSAlan Cox struct vmspace *vm = p->p_vmspace; 343594f7e29aSAlan Cox vm_map_t map = &vm->vm_map; 343694f7e29aSAlan Cox vm_offset_t end; 3437cfe52ecfSAndrey Zonov vm_size_t growsize; 3438b21a0008SMarcel Moolenaar size_t grow_amount, max_grow; 34397e19eda4SAndrey Zonov rlim_t lmemlim, stacklim, vmemlim; 3440b21a0008SMarcel Moolenaar int is_procstack, rv; 3441ef694c1aSEdward Tomasz Napierala struct ucred *cred; 34421ba5ad42SEdward Tomasz Napierala #ifdef notyet 34431ba5ad42SEdward Tomasz Napierala uint64_t limit; 34441ba5ad42SEdward Tomasz Napierala #endif 3445afcc55f3SEdward Tomasz Napierala #ifdef RACCT 34461ba5ad42SEdward Tomasz Napierala int error; 3447afcc55f3SEdward Tomasz Napierala #endif 344823955314SAlfred Perlstein 344994f7e29aSAlan Cox Retry: 345091d5354aSJohn Baldwin PROC_LOCK(p); 34517e19eda4SAndrey Zonov lmemlim = lim_cur(p, RLIMIT_MEMLOCK); 345291d5354aSJohn Baldwin stacklim = lim_cur(p, RLIMIT_STACK); 3453bfee999dSAlan Cox vmemlim = lim_cur(p, RLIMIT_VMEM); 345491d5354aSJohn Baldwin PROC_UNLOCK(p); 345591d5354aSJohn Baldwin 345694f7e29aSAlan Cox vm_map_lock_read(map); 345794f7e29aSAlan Cox 345894f7e29aSAlan Cox /* If addr is already in the entry range, no need to grow.*/ 345994f7e29aSAlan Cox if (vm_map_lookup_entry(map, addr, &prev_entry)) { 346094f7e29aSAlan Cox vm_map_unlock_read(map); 34610cddd8f0SMatthew Dillon return (KERN_SUCCESS); 346294f7e29aSAlan Cox } 346394f7e29aSAlan Cox 3464b21a0008SMarcel Moolenaar next_entry = prev_entry->next; 3465b21a0008SMarcel Moolenaar if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) { 3466b21a0008SMarcel Moolenaar /* 3467b21a0008SMarcel Moolenaar * This entry does not grow upwards. Since the address lies 3468b21a0008SMarcel Moolenaar * beyond this entry, the next entry (if one exists) has to 3469b21a0008SMarcel Moolenaar * be a downward growable entry. The entry list header is 3470b21a0008SMarcel Moolenaar * never a growable entry, so it suffices to check the flags. 347194f7e29aSAlan Cox */ 3472b21a0008SMarcel Moolenaar if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) { 347394f7e29aSAlan Cox vm_map_unlock_read(map); 34740cddd8f0SMatthew Dillon return (KERN_SUCCESS); 347594f7e29aSAlan Cox } 3476b21a0008SMarcel Moolenaar stack_entry = next_entry; 3477b21a0008SMarcel Moolenaar } else { 3478b21a0008SMarcel Moolenaar /* 3479b21a0008SMarcel Moolenaar * This entry grows upward. If the next entry does not at 3480b21a0008SMarcel Moolenaar * least grow downwards, this is the entry we need to grow. 3481b21a0008SMarcel Moolenaar * otherwise we have two possible choices and we have to 3482b21a0008SMarcel Moolenaar * select one. 3483b21a0008SMarcel Moolenaar */ 3484b21a0008SMarcel Moolenaar if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) { 3485b21a0008SMarcel Moolenaar /* 3486b21a0008SMarcel Moolenaar * We have two choices; grow the entry closest to 3487b21a0008SMarcel Moolenaar * the address to minimize the amount of growth. 3488b21a0008SMarcel Moolenaar */ 3489b21a0008SMarcel Moolenaar if (addr - prev_entry->end <= next_entry->start - addr) 3490b21a0008SMarcel Moolenaar stack_entry = prev_entry; 3491b21a0008SMarcel Moolenaar else 3492b21a0008SMarcel Moolenaar stack_entry = next_entry; 3493b21a0008SMarcel Moolenaar } else 3494b21a0008SMarcel Moolenaar stack_entry = prev_entry; 3495b21a0008SMarcel Moolenaar } 349694f7e29aSAlan Cox 3497b21a0008SMarcel Moolenaar if (stack_entry == next_entry) { 3498b21a0008SMarcel Moolenaar KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo")); 3499b21a0008SMarcel Moolenaar KASSERT(addr < stack_entry->start, ("foo")); 3500b21a0008SMarcel Moolenaar end = (prev_entry != &map->header) ? prev_entry->end : 3501b21a0008SMarcel Moolenaar stack_entry->start - stack_entry->avail_ssize; 350294f7e29aSAlan Cox grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE); 3503b21a0008SMarcel Moolenaar max_grow = stack_entry->start - end; 3504b21a0008SMarcel Moolenaar } else { 3505b21a0008SMarcel Moolenaar KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo")); 350608667f6dSMarcel Moolenaar KASSERT(addr >= stack_entry->end, ("foo")); 3507b21a0008SMarcel Moolenaar end = (next_entry != &map->header) ? next_entry->start : 3508b21a0008SMarcel Moolenaar stack_entry->end + stack_entry->avail_ssize; 3509fd75d710SMarcel Moolenaar grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE); 3510b21a0008SMarcel Moolenaar max_grow = end - stack_entry->end; 3511b21a0008SMarcel Moolenaar } 3512b21a0008SMarcel Moolenaar 351394f7e29aSAlan Cox if (grow_amount > stack_entry->avail_ssize) { 351494f7e29aSAlan Cox vm_map_unlock_read(map); 35150cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 351694f7e29aSAlan Cox } 351794f7e29aSAlan Cox 3518b21a0008SMarcel Moolenaar /* 3519b21a0008SMarcel Moolenaar * If there is no longer enough space between the entries nogo, and 3520b21a0008SMarcel Moolenaar * adjust the available space. Note: this should only happen if the 3521b21a0008SMarcel Moolenaar * user has mapped into the stack area after the stack was created, 3522b21a0008SMarcel Moolenaar * and is probably an error. 352394f7e29aSAlan Cox * 3524b21a0008SMarcel Moolenaar * This also effectively destroys any guard page the user might have 3525b21a0008SMarcel Moolenaar * intended by limiting the stack size. 352694f7e29aSAlan Cox */ 35279a6d144fSKonstantin Belousov if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) { 352825adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 352994f7e29aSAlan Cox goto Retry; 353094f7e29aSAlan Cox 3531b21a0008SMarcel Moolenaar stack_entry->avail_ssize = max_grow; 353294f7e29aSAlan Cox 353394f7e29aSAlan Cox vm_map_unlock(map); 35340cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 353594f7e29aSAlan Cox } 353694f7e29aSAlan Cox 3537b21a0008SMarcel Moolenaar is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr) ? 1 : 0; 353894f7e29aSAlan Cox 3539b21a0008SMarcel Moolenaar /* 3540b21a0008SMarcel Moolenaar * If this is the main process stack, see if we're over the stack 3541b21a0008SMarcel Moolenaar * limit. 354294f7e29aSAlan Cox */ 354391d5354aSJohn Baldwin if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { 354494f7e29aSAlan Cox vm_map_unlock_read(map); 35450cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 354694f7e29aSAlan Cox } 3547afcc55f3SEdward Tomasz Napierala #ifdef RACCT 35481ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 35491ba5ad42SEdward Tomasz Napierala if (is_procstack && 35501ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_STACK, ctob(vm->vm_ssize) + grow_amount)) { 35511ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 35521ba5ad42SEdward Tomasz Napierala vm_map_unlock_read(map); 35531ba5ad42SEdward Tomasz Napierala return (KERN_NO_SPACE); 35541ba5ad42SEdward Tomasz Napierala } 35551ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 3556afcc55f3SEdward Tomasz Napierala #endif 355794f7e29aSAlan Cox 3558cfe52ecfSAndrey Zonov /* Round up the grow amount modulo sgrowsiz */ 3559cfe52ecfSAndrey Zonov growsize = sgrowsiz; 3560cfe52ecfSAndrey Zonov grow_amount = roundup(grow_amount, growsize); 3561b21a0008SMarcel Moolenaar if (grow_amount > stack_entry->avail_ssize) 356294f7e29aSAlan Cox grow_amount = stack_entry->avail_ssize; 356391d5354aSJohn Baldwin if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { 3564e4826248SAlan Cox grow_amount = trunc_page((vm_size_t)stacklim) - 3565e4826248SAlan Cox ctob(vm->vm_ssize); 356694f7e29aSAlan Cox } 35671ba5ad42SEdward Tomasz Napierala #ifdef notyet 35681ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 35691ba5ad42SEdward Tomasz Napierala limit = racct_get_available(p, RACCT_STACK); 35701ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 35711ba5ad42SEdward Tomasz Napierala if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) 35721ba5ad42SEdward Tomasz Napierala grow_amount = limit - ctob(vm->vm_ssize); 35731ba5ad42SEdward Tomasz Napierala #endif 35747e19eda4SAndrey Zonov if (!old_mlock && map->flags & MAP_WIREFUTURE) { 35753ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { 35767e19eda4SAndrey Zonov vm_map_unlock_read(map); 35777e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 35787e19eda4SAndrey Zonov goto out; 35797e19eda4SAndrey Zonov } 35807e19eda4SAndrey Zonov #ifdef RACCT 35817e19eda4SAndrey Zonov PROC_LOCK(p); 35827e19eda4SAndrey Zonov if (racct_set(p, RACCT_MEMLOCK, 35833ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + grow_amount)) { 35847e19eda4SAndrey Zonov PROC_UNLOCK(p); 35857e19eda4SAndrey Zonov vm_map_unlock_read(map); 35867e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 35877e19eda4SAndrey Zonov goto out; 35887e19eda4SAndrey Zonov } 35897e19eda4SAndrey Zonov PROC_UNLOCK(p); 35907e19eda4SAndrey Zonov #endif 35917e19eda4SAndrey Zonov } 3592a69ac174SMatthew Dillon /* If we would blow our VMEM resource limit, no go */ 359391d5354aSJohn Baldwin if (map->size + grow_amount > vmemlim) { 3594a69ac174SMatthew Dillon vm_map_unlock_read(map); 35951ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 35961ba5ad42SEdward Tomasz Napierala goto out; 3597a69ac174SMatthew Dillon } 3598afcc55f3SEdward Tomasz Napierala #ifdef RACCT 35991ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 36001ba5ad42SEdward Tomasz Napierala if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) { 36011ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 36021ba5ad42SEdward Tomasz Napierala vm_map_unlock_read(map); 36031ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 36041ba5ad42SEdward Tomasz Napierala goto out; 36051ba5ad42SEdward Tomasz Napierala } 36061ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 3607afcc55f3SEdward Tomasz Napierala #endif 3608a69ac174SMatthew Dillon 360925adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 361094f7e29aSAlan Cox goto Retry; 361194f7e29aSAlan Cox 3612b21a0008SMarcel Moolenaar if (stack_entry == next_entry) { 3613b21a0008SMarcel Moolenaar /* 3614b21a0008SMarcel Moolenaar * Growing downward. 3615b21a0008SMarcel Moolenaar */ 361694f7e29aSAlan Cox /* Get the preliminary new entry start value */ 361794f7e29aSAlan Cox addr = stack_entry->start - grow_amount; 361894f7e29aSAlan Cox 3619b21a0008SMarcel Moolenaar /* 3620b21a0008SMarcel Moolenaar * If this puts us into the previous entry, cut back our 3621b21a0008SMarcel Moolenaar * growth to the available space. Also, see the note above. 362294f7e29aSAlan Cox */ 362394f7e29aSAlan Cox if (addr < end) { 3624b21a0008SMarcel Moolenaar stack_entry->avail_ssize = max_grow; 362594f7e29aSAlan Cox addr = end; 36269a6d144fSKonstantin Belousov if (stack_guard_page) 36279a6d144fSKonstantin Belousov addr += PAGE_SIZE; 362894f7e29aSAlan Cox } 362994f7e29aSAlan Cox 363094f7e29aSAlan Cox rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, 363183ce0853SKonstantin Belousov next_entry->protection, next_entry->max_protection, 0); 363294f7e29aSAlan Cox 363394f7e29aSAlan Cox /* Adjust the available stack space by the amount we grew. */ 363494f7e29aSAlan Cox if (rv == KERN_SUCCESS) { 363529b45e9eSAlan Cox if (prev_entry != &map->header) 363629b45e9eSAlan Cox vm_map_clip_end(map, prev_entry, addr); 3637b21a0008SMarcel Moolenaar new_entry = prev_entry->next; 3638b21a0008SMarcel Moolenaar KASSERT(new_entry == stack_entry->prev, ("foo")); 3639b21a0008SMarcel Moolenaar KASSERT(new_entry->end == stack_entry->start, ("foo")); 3640b21a0008SMarcel Moolenaar KASSERT(new_entry->start == addr, ("foo")); 3641b21a0008SMarcel Moolenaar grow_amount = new_entry->end - new_entry->start; 3642b21a0008SMarcel Moolenaar new_entry->avail_ssize = stack_entry->avail_ssize - 3643b21a0008SMarcel Moolenaar grow_amount; 3644b21a0008SMarcel Moolenaar stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN; 3645b21a0008SMarcel Moolenaar new_entry->eflags |= MAP_ENTRY_GROWS_DOWN; 364694f7e29aSAlan Cox } 3647b21a0008SMarcel Moolenaar } else { 3648b21a0008SMarcel Moolenaar /* 3649b21a0008SMarcel Moolenaar * Growing upward. 3650b21a0008SMarcel Moolenaar */ 3651b21a0008SMarcel Moolenaar addr = stack_entry->end + grow_amount; 3652b21a0008SMarcel Moolenaar 3653b21a0008SMarcel Moolenaar /* 3654b21a0008SMarcel Moolenaar * If this puts us into the next entry, cut back our growth 3655b21a0008SMarcel Moolenaar * to the available space. Also, see the note above. 3656b21a0008SMarcel Moolenaar */ 3657b21a0008SMarcel Moolenaar if (addr > end) { 3658b21a0008SMarcel Moolenaar stack_entry->avail_ssize = end - stack_entry->end; 3659b21a0008SMarcel Moolenaar addr = end; 36609a6d144fSKonstantin Belousov if (stack_guard_page) 36619a6d144fSKonstantin Belousov addr -= PAGE_SIZE; 366294f7e29aSAlan Cox } 366394f7e29aSAlan Cox 3664b21a0008SMarcel Moolenaar grow_amount = addr - stack_entry->end; 3665ef694c1aSEdward Tomasz Napierala cred = stack_entry->cred; 3666ef694c1aSEdward Tomasz Napierala if (cred == NULL && stack_entry->object.vm_object != NULL) 3667ef694c1aSEdward Tomasz Napierala cred = stack_entry->object.vm_object->cred; 3668ef694c1aSEdward Tomasz Napierala if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred)) 36693364c323SKonstantin Belousov rv = KERN_NO_SPACE; 3670b21a0008SMarcel Moolenaar /* Grow the underlying object if applicable. */ 36713364c323SKonstantin Belousov else if (stack_entry->object.vm_object == NULL || 3672b21a0008SMarcel Moolenaar vm_object_coalesce(stack_entry->object.vm_object, 367357a21abaSAlan Cox stack_entry->offset, 3674b21a0008SMarcel Moolenaar (vm_size_t)(stack_entry->end - stack_entry->start), 3675ef694c1aSEdward Tomasz Napierala (vm_size_t)grow_amount, cred != NULL)) { 367608667f6dSMarcel Moolenaar map->size += (addr - stack_entry->end); 3677b21a0008SMarcel Moolenaar /* Update the current entry. */ 3678b21a0008SMarcel Moolenaar stack_entry->end = addr; 3679199c91abSMarcel Moolenaar stack_entry->avail_ssize -= grow_amount; 36800164e057SAlan Cox vm_map_entry_resize_free(map, stack_entry); 3681b21a0008SMarcel Moolenaar rv = KERN_SUCCESS; 3682b21a0008SMarcel Moolenaar 3683b21a0008SMarcel Moolenaar if (next_entry != &map->header) 3684b21a0008SMarcel Moolenaar vm_map_clip_start(map, next_entry, addr); 3685b21a0008SMarcel Moolenaar } else 3686b21a0008SMarcel Moolenaar rv = KERN_FAILURE; 3687b21a0008SMarcel Moolenaar } 3688b21a0008SMarcel Moolenaar 3689b21a0008SMarcel Moolenaar if (rv == KERN_SUCCESS && is_procstack) 3690b21a0008SMarcel Moolenaar vm->vm_ssize += btoc(grow_amount); 3691b21a0008SMarcel Moolenaar 369294f7e29aSAlan Cox vm_map_unlock(map); 3693b21a0008SMarcel Moolenaar 3694abd498aaSBruce M Simpson /* 3695abd498aaSBruce M Simpson * Heed the MAP_WIREFUTURE flag if it was set for this process. 3696abd498aaSBruce M Simpson */ 3697b21a0008SMarcel Moolenaar if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) { 3698b21a0008SMarcel Moolenaar vm_map_wire(map, 3699b21a0008SMarcel Moolenaar (stack_entry == next_entry) ? addr : addr - grow_amount, 3700b21a0008SMarcel Moolenaar (stack_entry == next_entry) ? stack_entry->start : addr, 3701b21a0008SMarcel Moolenaar (p->p_flag & P_SYSTEM) 3702b21a0008SMarcel Moolenaar ? VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES 3703b21a0008SMarcel Moolenaar : VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); 3704b21a0008SMarcel Moolenaar } 3705abd498aaSBruce M Simpson 37061ba5ad42SEdward Tomasz Napierala out: 3707afcc55f3SEdward Tomasz Napierala #ifdef RACCT 37081ba5ad42SEdward Tomasz Napierala if (rv != KERN_SUCCESS) { 37091ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 37101ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_VMEM, map->size); 37111ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); 37127e19eda4SAndrey Zonov if (!old_mlock) { 37137e19eda4SAndrey Zonov error = racct_set(p, RACCT_MEMLOCK, 37143ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 37157e19eda4SAndrey Zonov KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); 37167e19eda4SAndrey Zonov } 37171ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); 37181ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_STACK failed")); 37191ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 37201ba5ad42SEdward Tomasz Napierala } 3721afcc55f3SEdward Tomasz Napierala #endif 37221ba5ad42SEdward Tomasz Napierala 37230cddd8f0SMatthew Dillon return (rv); 372494f7e29aSAlan Cox } 372594f7e29aSAlan Cox 3726df8bae1dSRodney W. Grimes /* 37275856e12eSJohn Dyson * Unshare the specified VM space for exec. If other processes are 37285856e12eSJohn Dyson * mapped to it, then create a new one. The new vmspace is null. 37295856e12eSJohn Dyson */ 373089b57fcfSKonstantin Belousov int 37313ebc1248SPeter Wemm vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) 37321b40f8c0SMatthew Dillon { 37335856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 37345856e12eSJohn Dyson struct vmspace *newvmspace; 37355856e12eSJohn Dyson 373674d1d2b7SNeel Natu newvmspace = vmspace_alloc(minuser, maxuser, NULL); 373789b57fcfSKonstantin Belousov if (newvmspace == NULL) 373889b57fcfSKonstantin Belousov return (ENOMEM); 373951ab6c28SAlan Cox newvmspace->vm_swrss = oldvmspace->vm_swrss; 37405856e12eSJohn Dyson /* 37415856e12eSJohn Dyson * This code is written like this for prototype purposes. The 37425856e12eSJohn Dyson * goal is to avoid running down the vmspace here, but let the 37435856e12eSJohn Dyson * other process's that are still using the vmspace to finally 37445856e12eSJohn Dyson * run it down. Even though there is little or no chance of blocking 37455856e12eSJohn Dyson * here, it is a good idea to keep this form for future mods. 37465856e12eSJohn Dyson */ 374757051fdcSTor Egge PROC_VMSPACE_LOCK(p); 37485856e12eSJohn Dyson p->p_vmspace = newvmspace; 374957051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 37506617724cSJeff Roberson if (p == curthread->td_proc) 3751b40ce416SJulian Elischer pmap_activate(curthread); 3752b56ef1c1SJohn Baldwin vmspace_free(oldvmspace); 375389b57fcfSKonstantin Belousov return (0); 37545856e12eSJohn Dyson } 37555856e12eSJohn Dyson 37565856e12eSJohn Dyson /* 37575856e12eSJohn Dyson * Unshare the specified VM space for forcing COW. This 37585856e12eSJohn Dyson * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 37595856e12eSJohn Dyson */ 376089b57fcfSKonstantin Belousov int 37611b40f8c0SMatthew Dillon vmspace_unshare(struct proc *p) 37621b40f8c0SMatthew Dillon { 37635856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 37645856e12eSJohn Dyson struct vmspace *newvmspace; 37653364c323SKonstantin Belousov vm_ooffset_t fork_charge; 37665856e12eSJohn Dyson 37675856e12eSJohn Dyson if (oldvmspace->vm_refcnt == 1) 376889b57fcfSKonstantin Belousov return (0); 37693364c323SKonstantin Belousov fork_charge = 0; 37703364c323SKonstantin Belousov newvmspace = vmspace_fork(oldvmspace, &fork_charge); 377189b57fcfSKonstantin Belousov if (newvmspace == NULL) 377289b57fcfSKonstantin Belousov return (ENOMEM); 3773ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) { 37743364c323SKonstantin Belousov vmspace_free(newvmspace); 37753364c323SKonstantin Belousov return (ENOMEM); 37763364c323SKonstantin Belousov } 377757051fdcSTor Egge PROC_VMSPACE_LOCK(p); 37785856e12eSJohn Dyson p->p_vmspace = newvmspace; 377957051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 37806617724cSJeff Roberson if (p == curthread->td_proc) 3781b40ce416SJulian Elischer pmap_activate(curthread); 3782b56ef1c1SJohn Baldwin vmspace_free(oldvmspace); 378389b57fcfSKonstantin Belousov return (0); 37845856e12eSJohn Dyson } 37855856e12eSJohn Dyson 37865856e12eSJohn Dyson /* 3787df8bae1dSRodney W. Grimes * vm_map_lookup: 3788df8bae1dSRodney W. Grimes * 3789df8bae1dSRodney W. Grimes * Finds the VM object, offset, and 3790df8bae1dSRodney W. Grimes * protection for a given virtual address in the 3791df8bae1dSRodney W. Grimes * specified map, assuming a page fault of the 3792df8bae1dSRodney W. Grimes * type specified. 3793df8bae1dSRodney W. Grimes * 3794df8bae1dSRodney W. Grimes * Leaves the map in question locked for read; return 3795df8bae1dSRodney W. Grimes * values are guaranteed until a vm_map_lookup_done 3796df8bae1dSRodney W. Grimes * call is performed. Note that the map argument 3797df8bae1dSRodney W. Grimes * is in/out; the returned map must be used in 3798df8bae1dSRodney W. Grimes * the call to vm_map_lookup_done. 3799df8bae1dSRodney W. Grimes * 3800df8bae1dSRodney W. Grimes * A handle (out_entry) is returned for use in 3801df8bae1dSRodney W. Grimes * vm_map_lookup_done, to make that fast. 3802df8bae1dSRodney W. Grimes * 3803df8bae1dSRodney W. Grimes * If a lookup is requested with "write protection" 3804df8bae1dSRodney W. Grimes * specified, the map may be changed to perform virtual 3805df8bae1dSRodney W. Grimes * copying operations, although the data referenced will 3806df8bae1dSRodney W. Grimes * remain the same. 3807df8bae1dSRodney W. Grimes */ 3808df8bae1dSRodney W. Grimes int 3809b9dcd593SBruce Evans vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 3810b9dcd593SBruce Evans vm_offset_t vaddr, 381147221757SJohn Dyson vm_prot_t fault_typea, 3812b9dcd593SBruce Evans vm_map_entry_t *out_entry, /* OUT */ 3813b9dcd593SBruce Evans vm_object_t *object, /* OUT */ 3814b9dcd593SBruce Evans vm_pindex_t *pindex, /* OUT */ 3815b9dcd593SBruce Evans vm_prot_t *out_prot, /* OUT */ 38162d8acc0fSJohn Dyson boolean_t *wired) /* OUT */ 3817df8bae1dSRodney W. Grimes { 3818c0877f10SJohn Dyson vm_map_entry_t entry; 3819c0877f10SJohn Dyson vm_map_t map = *var_map; 3820c0877f10SJohn Dyson vm_prot_t prot; 382147221757SJohn Dyson vm_prot_t fault_type = fault_typea; 38223364c323SKonstantin Belousov vm_object_t eobject; 38230cc74f14SAlan Cox vm_size_t size; 3824ef694c1aSEdward Tomasz Napierala struct ucred *cred; 3825df8bae1dSRodney W. Grimes 3826df8bae1dSRodney W. Grimes RetryLookup:; 3827df8bae1dSRodney W. Grimes 3828df8bae1dSRodney W. Grimes vm_map_lock_read(map); 3829df8bae1dSRodney W. Grimes 3830df8bae1dSRodney W. Grimes /* 38314c3ef59eSAlan Cox * Lookup the faulting address. 3832df8bae1dSRodney W. Grimes */ 3833095104acSAlan Cox if (!vm_map_lookup_entry(map, vaddr, out_entry)) { 3834095104acSAlan Cox vm_map_unlock_read(map); 3835095104acSAlan Cox return (KERN_INVALID_ADDRESS); 3836095104acSAlan Cox } 3837df8bae1dSRodney W. Grimes 38384e94f402SAlan Cox entry = *out_entry; 3839b7b2aac2SJohn Dyson 3840df8bae1dSRodney W. Grimes /* 3841df8bae1dSRodney W. Grimes * Handle submaps. 3842df8bae1dSRodney W. Grimes */ 3843afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 3844df8bae1dSRodney W. Grimes vm_map_t old_map = map; 3845df8bae1dSRodney W. Grimes 3846df8bae1dSRodney W. Grimes *var_map = map = entry->object.sub_map; 3847df8bae1dSRodney W. Grimes vm_map_unlock_read(old_map); 3848df8bae1dSRodney W. Grimes goto RetryLookup; 3849df8bae1dSRodney W. Grimes } 3850a04c970aSJohn Dyson 3851df8bae1dSRodney W. Grimes /* 38520d94caffSDavid Greenman * Check whether this task is allowed to have this page. 3853df8bae1dSRodney W. Grimes */ 3854df8bae1dSRodney W. Grimes prot = entry->protection; 385547221757SJohn Dyson fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); 38562db65ab4SAlan Cox if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { 3857095104acSAlan Cox vm_map_unlock_read(map); 3858095104acSAlan Cox return (KERN_PROTECTION_FAILURE); 385947221757SJohn Dyson } 38602ed14a92SAlan Cox if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 386147221757SJohn Dyson (entry->eflags & MAP_ENTRY_COW) && 3862a6d42a0dSAlan Cox (fault_type & VM_PROT_WRITE)) { 3863095104acSAlan Cox vm_map_unlock_read(map); 3864095104acSAlan Cox return (KERN_PROTECTION_FAILURE); 3865a04c970aSJohn Dyson } 38665b3e0257SDag-Erling Smørgrav if ((fault_typea & VM_PROT_COPY) != 0 && 38675b3e0257SDag-Erling Smørgrav (entry->max_protection & VM_PROT_WRITE) == 0 && 38685b3e0257SDag-Erling Smørgrav (entry->eflags & MAP_ENTRY_COW) == 0) { 38695b3e0257SDag-Erling Smørgrav vm_map_unlock_read(map); 38705b3e0257SDag-Erling Smørgrav return (KERN_PROTECTION_FAILURE); 38715b3e0257SDag-Erling Smørgrav } 3872df8bae1dSRodney W. Grimes 3873df8bae1dSRodney W. Grimes /* 38740d94caffSDavid Greenman * If this page is not pageable, we have to get it for all possible 38750d94caffSDavid Greenman * accesses. 3876df8bae1dSRodney W. Grimes */ 387705f0fdd2SPoul-Henning Kamp *wired = (entry->wired_count != 0); 387805f0fdd2SPoul-Henning Kamp if (*wired) 3879a6d42a0dSAlan Cox fault_type = entry->protection; 38803364c323SKonstantin Belousov size = entry->end - entry->start; 3881df8bae1dSRodney W. Grimes /* 3882df8bae1dSRodney W. Grimes * If the entry was copy-on-write, we either ... 3883df8bae1dSRodney W. Grimes */ 3884afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 3885df8bae1dSRodney W. Grimes /* 38860d94caffSDavid Greenman * If we want to write the page, we may as well handle that 3887ad5fca3bSAlan Cox * now since we've got the map locked. 3888df8bae1dSRodney W. Grimes * 38890d94caffSDavid Greenman * If we don't need to write the page, we just demote the 38900d94caffSDavid Greenman * permissions allowed. 3891df8bae1dSRodney W. Grimes */ 3892a6d42a0dSAlan Cox if ((fault_type & VM_PROT_WRITE) != 0 || 3893a6d42a0dSAlan Cox (fault_typea & VM_PROT_COPY) != 0) { 3894df8bae1dSRodney W. Grimes /* 38950d94caffSDavid Greenman * Make a new object, and place it in the object 38960d94caffSDavid Greenman * chain. Note that no new references have appeared 3897ad5fca3bSAlan Cox * -- one just moved from the map to the new 38980d94caffSDavid Greenman * object. 3899df8bae1dSRodney W. Grimes */ 390025adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 3901df8bae1dSRodney W. Grimes goto RetryLookup; 39029917e010SAlan Cox 3903ef694c1aSEdward Tomasz Napierala if (entry->cred == NULL) { 39043364c323SKonstantin Belousov /* 39053364c323SKonstantin Belousov * The debugger owner is charged for 39063364c323SKonstantin Belousov * the memory. 39073364c323SKonstantin Belousov */ 3908ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 3909ef694c1aSEdward Tomasz Napierala crhold(cred); 3910ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(size, cred)) { 3911ef694c1aSEdward Tomasz Napierala crfree(cred); 39123364c323SKonstantin Belousov vm_map_unlock(map); 39133364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 39143364c323SKonstantin Belousov } 3915ef694c1aSEdward Tomasz Napierala entry->cred = cred; 39163364c323SKonstantin Belousov } 39170cc74f14SAlan Cox vm_object_shadow(&entry->object.vm_object, 39180cc74f14SAlan Cox &entry->offset, size); 3919afa07f7eSJohn Dyson entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 39203364c323SKonstantin Belousov eobject = entry->object.vm_object; 3921ef694c1aSEdward Tomasz Napierala if (eobject->cred != NULL) { 39223364c323SKonstantin Belousov /* 39233364c323SKonstantin Belousov * The object was not shadowed. 39243364c323SKonstantin Belousov */ 3925ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 3926ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 3927ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 3928ef694c1aSEdward Tomasz Napierala } else if (entry->cred != NULL) { 392989f6b863SAttilio Rao VM_OBJECT_WLOCK(eobject); 3930ef694c1aSEdward Tomasz Napierala eobject->cred = entry->cred; 39313364c323SKonstantin Belousov eobject->charge = size; 393289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(eobject); 3933ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 39343364c323SKonstantin Belousov } 39359917e010SAlan Cox 39369b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 39370d94caffSDavid Greenman } else { 3938df8bae1dSRodney W. Grimes /* 39390d94caffSDavid Greenman * We're attempting to read a copy-on-write page -- 39400d94caffSDavid Greenman * don't allow writes. 3941df8bae1dSRodney W. Grimes */ 39422d8acc0fSJohn Dyson prot &= ~VM_PROT_WRITE; 3943df8bae1dSRodney W. Grimes } 3944df8bae1dSRodney W. Grimes } 39452d8acc0fSJohn Dyson 3946df8bae1dSRodney W. Grimes /* 3947df8bae1dSRodney W. Grimes * Create an object if necessary. 3948df8bae1dSRodney W. Grimes */ 39494e71e795SMatthew Dillon if (entry->object.vm_object == NULL && 39504e71e795SMatthew Dillon !map->system_map) { 395125adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 3952df8bae1dSRodney W. Grimes goto RetryLookup; 395324a1cce3SDavid Greenman entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 39543364c323SKonstantin Belousov atop(size)); 3955df8bae1dSRodney W. Grimes entry->offset = 0; 3956ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 395789f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 3958ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 39593364c323SKonstantin Belousov entry->object.vm_object->charge = size; 396089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 3961ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 39623364c323SKonstantin Belousov } 39639b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 3964df8bae1dSRodney W. Grimes } 3965b5b40fa6SJohn Dyson 3966df8bae1dSRodney W. Grimes /* 39670d94caffSDavid Greenman * Return the object/offset from this entry. If the entry was 39680d94caffSDavid Greenman * copy-on-write or empty, it has been fixed up. 3969df8bae1dSRodney W. Grimes */ 39709b09b6c7SMatthew Dillon *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 3971df8bae1dSRodney W. Grimes *object = entry->object.vm_object; 3972df8bae1dSRodney W. Grimes 3973df8bae1dSRodney W. Grimes *out_prot = prot; 3974df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 3975df8bae1dSRodney W. Grimes } 3976df8bae1dSRodney W. Grimes 3977df8bae1dSRodney W. Grimes /* 397819dc5607STor Egge * vm_map_lookup_locked: 397919dc5607STor Egge * 398019dc5607STor Egge * Lookup the faulting address. A version of vm_map_lookup that returns 398119dc5607STor Egge * KERN_FAILURE instead of blocking on map lock or memory allocation. 398219dc5607STor Egge */ 398319dc5607STor Egge int 398419dc5607STor Egge vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ 398519dc5607STor Egge vm_offset_t vaddr, 398619dc5607STor Egge vm_prot_t fault_typea, 398719dc5607STor Egge vm_map_entry_t *out_entry, /* OUT */ 398819dc5607STor Egge vm_object_t *object, /* OUT */ 398919dc5607STor Egge vm_pindex_t *pindex, /* OUT */ 399019dc5607STor Egge vm_prot_t *out_prot, /* OUT */ 399119dc5607STor Egge boolean_t *wired) /* OUT */ 399219dc5607STor Egge { 399319dc5607STor Egge vm_map_entry_t entry; 399419dc5607STor Egge vm_map_t map = *var_map; 399519dc5607STor Egge vm_prot_t prot; 399619dc5607STor Egge vm_prot_t fault_type = fault_typea; 399719dc5607STor Egge 399819dc5607STor Egge /* 39994c3ef59eSAlan Cox * Lookup the faulting address. 400019dc5607STor Egge */ 400119dc5607STor Egge if (!vm_map_lookup_entry(map, vaddr, out_entry)) 400219dc5607STor Egge return (KERN_INVALID_ADDRESS); 400319dc5607STor Egge 400419dc5607STor Egge entry = *out_entry; 400519dc5607STor Egge 400619dc5607STor Egge /* 400719dc5607STor Egge * Fail if the entry refers to a submap. 400819dc5607STor Egge */ 400919dc5607STor Egge if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 401019dc5607STor Egge return (KERN_FAILURE); 401119dc5607STor Egge 401219dc5607STor Egge /* 401319dc5607STor Egge * Check whether this task is allowed to have this page. 401419dc5607STor Egge */ 401519dc5607STor Egge prot = entry->protection; 401619dc5607STor Egge fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 401719dc5607STor Egge if ((fault_type & prot) != fault_type) 401819dc5607STor Egge return (KERN_PROTECTION_FAILURE); 401919dc5607STor Egge if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 402019dc5607STor Egge (entry->eflags & MAP_ENTRY_COW) && 4021a6d42a0dSAlan Cox (fault_type & VM_PROT_WRITE)) 402219dc5607STor Egge return (KERN_PROTECTION_FAILURE); 402319dc5607STor Egge 402419dc5607STor Egge /* 402519dc5607STor Egge * If this page is not pageable, we have to get it for all possible 402619dc5607STor Egge * accesses. 402719dc5607STor Egge */ 402819dc5607STor Egge *wired = (entry->wired_count != 0); 402919dc5607STor Egge if (*wired) 4030a6d42a0dSAlan Cox fault_type = entry->protection; 403119dc5607STor Egge 403219dc5607STor Egge if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 403319dc5607STor Egge /* 403419dc5607STor Egge * Fail if the entry was copy-on-write for a write fault. 403519dc5607STor Egge */ 403619dc5607STor Egge if (fault_type & VM_PROT_WRITE) 403719dc5607STor Egge return (KERN_FAILURE); 403819dc5607STor Egge /* 403919dc5607STor Egge * We're attempting to read a copy-on-write page -- 404019dc5607STor Egge * don't allow writes. 404119dc5607STor Egge */ 404219dc5607STor Egge prot &= ~VM_PROT_WRITE; 404319dc5607STor Egge } 404419dc5607STor Egge 404519dc5607STor Egge /* 404619dc5607STor Egge * Fail if an object should be created. 404719dc5607STor Egge */ 404819dc5607STor Egge if (entry->object.vm_object == NULL && !map->system_map) 404919dc5607STor Egge return (KERN_FAILURE); 405019dc5607STor Egge 405119dc5607STor Egge /* 405219dc5607STor Egge * Return the object/offset from this entry. If the entry was 405319dc5607STor Egge * copy-on-write or empty, it has been fixed up. 405419dc5607STor Egge */ 405519dc5607STor Egge *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 405619dc5607STor Egge *object = entry->object.vm_object; 405719dc5607STor Egge 405819dc5607STor Egge *out_prot = prot; 405919dc5607STor Egge return (KERN_SUCCESS); 406019dc5607STor Egge } 406119dc5607STor Egge 406219dc5607STor Egge /* 4063df8bae1dSRodney W. Grimes * vm_map_lookup_done: 4064df8bae1dSRodney W. Grimes * 4065df8bae1dSRodney W. Grimes * Releases locks acquired by a vm_map_lookup 4066df8bae1dSRodney W. Grimes * (according to the handle returned by that lookup). 4067df8bae1dSRodney W. Grimes */ 40680d94caffSDavid Greenman void 40691b40f8c0SMatthew Dillon vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 4070df8bae1dSRodney W. Grimes { 4071df8bae1dSRodney W. Grimes /* 4072df8bae1dSRodney W. Grimes * Unlock the main-level map 4073df8bae1dSRodney W. Grimes */ 4074df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 4075df8bae1dSRodney W. Grimes } 4076df8bae1dSRodney W. Grimes 4077c7c34a24SBruce Evans #include "opt_ddb.h" 4078c3cb3e12SDavid Greenman #ifdef DDB 4079c7c34a24SBruce Evans #include <sys/kernel.h> 4080c7c34a24SBruce Evans 4081c7c34a24SBruce Evans #include <ddb/ddb.h> 4082c7c34a24SBruce Evans 40832ebcd458SAttilio Rao static void 40842ebcd458SAttilio Rao vm_map_print(vm_map_t map) 4085df8bae1dSRodney W. Grimes { 4086c0877f10SJohn Dyson vm_map_entry_t entry; 4087c7c34a24SBruce Evans 4088e5f251d2SAlan Cox db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 4089e5f251d2SAlan Cox (void *)map, 4090101eeb7fSBruce Evans (void *)map->pmap, map->nentries, map->timestamp); 4091df8bae1dSRodney W. Grimes 4092c7c34a24SBruce Evans db_indent += 2; 4093df8bae1dSRodney W. Grimes for (entry = map->header.next; entry != &map->header; 4094df8bae1dSRodney W. Grimes entry = entry->next) { 4095fc62ef1fSBruce Evans db_iprintf("map entry %p: start=%p, end=%p\n", 4096fc62ef1fSBruce Evans (void *)entry, (void *)entry->start, (void *)entry->end); 4097e5f251d2SAlan Cox { 4098df8bae1dSRodney W. Grimes static char *inheritance_name[4] = 4099df8bae1dSRodney W. Grimes {"share", "copy", "none", "donate_copy"}; 41000d94caffSDavid Greenman 410195e5e988SJohn Dyson db_iprintf(" prot=%x/%x/%s", 4102df8bae1dSRodney W. Grimes entry->protection, 4103df8bae1dSRodney W. Grimes entry->max_protection, 41048aef1712SMatthew Dillon inheritance_name[(int)(unsigned char)entry->inheritance]); 4105df8bae1dSRodney W. Grimes if (entry->wired_count != 0) 410695e5e988SJohn Dyson db_printf(", wired"); 4107df8bae1dSRodney W. Grimes } 41089fdfe602SMatthew Dillon if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 4109cd034a5bSMaxime Henrion db_printf(", share=%p, offset=0x%jx\n", 41109fdfe602SMatthew Dillon (void *)entry->object.sub_map, 4111cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 4112df8bae1dSRodney W. Grimes if ((entry->prev == &map->header) || 41139fdfe602SMatthew Dillon (entry->prev->object.sub_map != 41149fdfe602SMatthew Dillon entry->object.sub_map)) { 4115c7c34a24SBruce Evans db_indent += 2; 41162ebcd458SAttilio Rao vm_map_print((vm_map_t)entry->object.sub_map); 4117c7c34a24SBruce Evans db_indent -= 2; 4118df8bae1dSRodney W. Grimes } 41190d94caffSDavid Greenman } else { 4120ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) 4121ef694c1aSEdward Tomasz Napierala db_printf(", ruid %d", entry->cred->cr_ruid); 4122cd034a5bSMaxime Henrion db_printf(", object=%p, offset=0x%jx", 4123101eeb7fSBruce Evans (void *)entry->object.vm_object, 4124cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 4125ef694c1aSEdward Tomasz Napierala if (entry->object.vm_object && entry->object.vm_object->cred) 4126ef694c1aSEdward Tomasz Napierala db_printf(", obj ruid %d charge %jx", 4127ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred->cr_ruid, 41283364c323SKonstantin Belousov (uintmax_t)entry->object.vm_object->charge); 4129afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_COW) 4130c7c34a24SBruce Evans db_printf(", copy (%s)", 4131afa07f7eSJohn Dyson (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 4132c7c34a24SBruce Evans db_printf("\n"); 4133df8bae1dSRodney W. Grimes 4134df8bae1dSRodney W. Grimes if ((entry->prev == &map->header) || 4135df8bae1dSRodney W. Grimes (entry->prev->object.vm_object != 4136df8bae1dSRodney W. Grimes entry->object.vm_object)) { 4137c7c34a24SBruce Evans db_indent += 2; 4138101eeb7fSBruce Evans vm_object_print((db_expr_t)(intptr_t) 4139101eeb7fSBruce Evans entry->object.vm_object, 41402ebcd458SAttilio Rao 1, 0, (char *)0); 4141c7c34a24SBruce Evans db_indent -= 2; 4142df8bae1dSRodney W. Grimes } 4143df8bae1dSRodney W. Grimes } 4144df8bae1dSRodney W. Grimes } 4145c7c34a24SBruce Evans db_indent -= 2; 4146df8bae1dSRodney W. Grimes } 414795e5e988SJohn Dyson 41482ebcd458SAttilio Rao DB_SHOW_COMMAND(map, map) 41492ebcd458SAttilio Rao { 41502ebcd458SAttilio Rao 41512ebcd458SAttilio Rao if (!have_addr) { 41522ebcd458SAttilio Rao db_printf("usage: show map <addr>\n"); 41532ebcd458SAttilio Rao return; 41542ebcd458SAttilio Rao } 41552ebcd458SAttilio Rao vm_map_print((vm_map_t)addr); 41562ebcd458SAttilio Rao } 415795e5e988SJohn Dyson 415895e5e988SJohn Dyson DB_SHOW_COMMAND(procvm, procvm) 415995e5e988SJohn Dyson { 416095e5e988SJohn Dyson struct proc *p; 416195e5e988SJohn Dyson 416295e5e988SJohn Dyson if (have_addr) { 416395e5e988SJohn Dyson p = (struct proc *) addr; 416495e5e988SJohn Dyson } else { 416595e5e988SJohn Dyson p = curproc; 416695e5e988SJohn Dyson } 416795e5e988SJohn Dyson 4168ac1e407bSBruce Evans db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 4169ac1e407bSBruce Evans (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 4170b1028ad1SLuoqi Chen (void *)vmspace_pmap(p->p_vmspace)); 417195e5e988SJohn Dyson 41722ebcd458SAttilio Rao vm_map_print((vm_map_t)&p->p_vmspace->vm_map); 417395e5e988SJohn Dyson } 417495e5e988SJohn Dyson 4175c7c34a24SBruce Evans #endif /* DDB */ 4176