160727d8bSWarner Losh /*- 2796df753SPedro F. Giffuni * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 8df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 343c4dd356SDavid Greenman * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * 37df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38df8bae1dSRodney W. Grimes * All rights reserved. 39df8bae1dSRodney W. Grimes * 40df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41df8bae1dSRodney W. Grimes * 42df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 43df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 44df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 45df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 46df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 47df8bae1dSRodney W. Grimes * 48df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 53df8bae1dSRodney W. Grimes * 54df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55df8bae1dSRodney W. Grimes * School of Computer Science 56df8bae1dSRodney W. Grimes * Carnegie Mellon University 57df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 60df8bae1dSRodney W. Grimes * rights to redistribute these changes. 61df8bae1dSRodney W. Grimes */ 62df8bae1dSRodney W. Grimes 63df8bae1dSRodney W. Grimes /* 64df8bae1dSRodney W. Grimes * Virtual memory mapping module. 65df8bae1dSRodney W. Grimes */ 66df8bae1dSRodney W. Grimes 67874651b1SDavid E. O'Brien #include <sys/cdefs.h> 68874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 69874651b1SDavid E. O'Brien 70df8bae1dSRodney W. Grimes #include <sys/param.h> 71df8bae1dSRodney W. Grimes #include <sys/systm.h> 729a6d144fSKonstantin Belousov #include <sys/kernel.h> 7361d80e90SJohn Baldwin #include <sys/ktr.h> 74fb919e4dSMark Murray #include <sys/lock.h> 75fb919e4dSMark Murray #include <sys/mutex.h> 76b5e8ce9fSBruce Evans #include <sys/proc.h> 77efeaf95aSDavid Greenman #include <sys/vmmeter.h> 78867a482dSJohn Dyson #include <sys/mman.h> 791efb74fbSJohn Dyson #include <sys/vnode.h> 801ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 812267af78SJulian Elischer #include <sys/resourcevar.h> 8289f6b863SAttilio Rao #include <sys/rwlock.h> 833fde38dfSMike Silbersack #include <sys/file.h> 849a6d144fSKonstantin Belousov #include <sys/sysctl.h> 8505ba50f5SJake Burkholder #include <sys/sysent.h> 863db161e0SMatthew Dillon #include <sys/shm.h> 87df8bae1dSRodney W. Grimes 88df8bae1dSRodney W. Grimes #include <vm/vm.h> 89efeaf95aSDavid Greenman #include <vm/vm_param.h> 90efeaf95aSDavid Greenman #include <vm/pmap.h> 91efeaf95aSDavid Greenman #include <vm/vm_map.h> 92df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 93df8bae1dSRodney W. Grimes #include <vm/vm_object.h> 9447221757SJohn Dyson #include <vm/vm_pager.h> 9526f9a767SRodney W. Grimes #include <vm/vm_kern.h> 96efeaf95aSDavid Greenman #include <vm/vm_extern.h> 9784110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 9821cd6e62SSeigo Tanimura #include <vm/swap_pager.h> 99670d17b5SJeff Roberson #include <vm/uma.h> 100df8bae1dSRodney W. Grimes 101df8bae1dSRodney W. Grimes /* 102df8bae1dSRodney W. Grimes * Virtual memory maps provide for the mapping, protection, 103df8bae1dSRodney W. Grimes * and sharing of virtual memory objects. In addition, 104df8bae1dSRodney W. Grimes * this module provides for an efficient virtual copy of 105df8bae1dSRodney W. Grimes * memory from one map to another. 106df8bae1dSRodney W. Grimes * 107df8bae1dSRodney W. Grimes * Synchronization is required prior to most operations. 108df8bae1dSRodney W. Grimes * 109df8bae1dSRodney W. Grimes * Maps consist of an ordered doubly-linked list of simple 110e2abaaaaSAlan Cox * entries; a self-adjusting binary search tree of these 111e2abaaaaSAlan Cox * entries is used to speed up lookups. 112df8bae1dSRodney W. Grimes * 113956f3135SPhilippe Charnier * Since portions of maps are specified by start/end addresses, 114df8bae1dSRodney W. Grimes * which may not align with existing map entries, all 115df8bae1dSRodney W. Grimes * routines merely "clip" entries to these start/end values. 116df8bae1dSRodney W. Grimes * [That is, an entry is split into two, bordering at a 117df8bae1dSRodney W. Grimes * start or end value.] Note that these clippings may not 118df8bae1dSRodney W. Grimes * always be necessary (as the two resulting entries are then 119df8bae1dSRodney W. Grimes * not changed); however, the clipping is done for convenience. 120df8bae1dSRodney W. Grimes * 121df8bae1dSRodney W. Grimes * As mentioned above, virtual copy operations are performed 122ad5fca3bSAlan Cox * by copying VM object references from one map to 123df8bae1dSRodney W. Grimes * another, and then marking both regions as copy-on-write. 124df8bae1dSRodney W. Grimes */ 125df8bae1dSRodney W. Grimes 1263a92e5d5SAlan Cox static struct mtx map_sleep_mtx; 1278355f576SJeff Roberson static uma_zone_t mapentzone; 1288355f576SJeff Roberson static uma_zone_t kmapentzone; 1298355f576SJeff Roberson static uma_zone_t mapzone; 1308355f576SJeff Roberson static uma_zone_t vmspace_zone; 131b23f72e9SBrian Feldman static int vmspace_zinit(void *mem, int size, int flags); 132b23f72e9SBrian Feldman static int vm_map_zinit(void *mem, int ize, int flags); 13392351f16SAlan Cox static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, 13492351f16SAlan Cox vm_offset_t max); 1350b367bd8SKonstantin Belousov static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); 136655c3490SKonstantin Belousov static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); 13703462509SAlan Cox static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry); 13819bd0d9cSKonstantin Belousov static int vm_map_growstack(vm_map_t map, vm_offset_t addr, 13919bd0d9cSKonstantin Belousov vm_map_entry_t gap_entry); 140077ec27cSAlan Cox static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 141077ec27cSAlan Cox vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags); 1428355f576SJeff Roberson #ifdef INVARIANTS 1438355f576SJeff Roberson static void vm_map_zdtor(void *mem, int size, void *arg); 1448355f576SJeff Roberson static void vmspace_zdtor(void *mem, int size, void *arg); 1458355f576SJeff Roberson #endif 1464648ba0aSKonstantin Belousov static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, 1474648ba0aSKonstantin Belousov vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, 1484648ba0aSKonstantin Belousov int cow); 14966cd575bSAlan Cox static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 15066cd575bSAlan Cox vm_offset_t failed_addr); 151b18bfc3dSJohn Dyson 152ef694c1aSEdward Tomasz Napierala #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ 153ef694c1aSEdward Tomasz Napierala ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ 1543364c323SKonstantin Belousov !((e)->eflags & MAP_ENTRY_NEEDS_COPY))) 1553364c323SKonstantin Belousov 15657051fdcSTor Egge /* 15757051fdcSTor Egge * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type 15857051fdcSTor Egge * stable. 15957051fdcSTor Egge */ 16057051fdcSTor Egge #define PROC_VMSPACE_LOCK(p) do { } while (0) 16157051fdcSTor Egge #define PROC_VMSPACE_UNLOCK(p) do { } while (0) 16257051fdcSTor Egge 163d239bd3cSKonstantin Belousov /* 164d239bd3cSKonstantin Belousov * VM_MAP_RANGE_CHECK: [ internal use only ] 165d239bd3cSKonstantin Belousov * 166d239bd3cSKonstantin Belousov * Asserts that the starting and ending region 167d239bd3cSKonstantin Belousov * addresses fall within the valid range of the map. 168d239bd3cSKonstantin Belousov */ 169d239bd3cSKonstantin Belousov #define VM_MAP_RANGE_CHECK(map, start, end) \ 170d239bd3cSKonstantin Belousov { \ 171d239bd3cSKonstantin Belousov if (start < vm_map_min(map)) \ 172d239bd3cSKonstantin Belousov start = vm_map_min(map); \ 173d239bd3cSKonstantin Belousov if (end > vm_map_max(map)) \ 174d239bd3cSKonstantin Belousov end = vm_map_max(map); \ 175d239bd3cSKonstantin Belousov if (start > end) \ 176d239bd3cSKonstantin Belousov start = end; \ 177d239bd3cSKonstantin Belousov } 178d239bd3cSKonstantin Belousov 1796fecb26bSKonstantin Belousov /* 1806fecb26bSKonstantin Belousov * vm_map_startup: 1816fecb26bSKonstantin Belousov * 1826fecb26bSKonstantin Belousov * Initialize the vm_map module. Must be called before 1836fecb26bSKonstantin Belousov * any other vm_map routines. 1846fecb26bSKonstantin Belousov * 1856fecb26bSKonstantin Belousov * Map and entry structures are allocated from the general 1866fecb26bSKonstantin Belousov * purpose memory pool with some exceptions: 1876fecb26bSKonstantin Belousov * 1886fecb26bSKonstantin Belousov * - The kernel map and kmem submap are allocated statically. 1896fecb26bSKonstantin Belousov * - Kernel map entries are allocated out of a static pool. 1906fecb26bSKonstantin Belousov * 1916fecb26bSKonstantin Belousov * These restrictions are necessary since malloc() uses the 1926fecb26bSKonstantin Belousov * maps and requires map entries. 1936fecb26bSKonstantin Belousov */ 1946fecb26bSKonstantin Belousov 1950d94caffSDavid Greenman void 1961b40f8c0SMatthew Dillon vm_map_startup(void) 197df8bae1dSRodney W. Grimes { 1983a92e5d5SAlan Cox mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF); 1998355f576SJeff Roberson mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, 2008355f576SJeff Roberson #ifdef INVARIANTS 2018355f576SJeff Roberson vm_map_zdtor, 2028355f576SJeff Roberson #else 2038355f576SJeff Roberson NULL, 2048355f576SJeff Roberson #endif 205f872f6eaSAlan Cox vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2068355f576SJeff Roberson uma_prealloc(mapzone, MAX_KMAP); 207670d17b5SJeff Roberson kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 20818aa2de5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 20918aa2de5SJeff Roberson UMA_ZONE_MTXCLASS | UMA_ZONE_VM); 210670d17b5SJeff Roberson mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 211670d17b5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2125df87b21SJeff Roberson vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 2135df87b21SJeff Roberson #ifdef INVARIANTS 2145df87b21SJeff Roberson vmspace_zdtor, 2155df87b21SJeff Roberson #else 2165df87b21SJeff Roberson NULL, 2175df87b21SJeff Roberson #endif 218f872f6eaSAlan Cox vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2198355f576SJeff Roberson } 2208355f576SJeff Roberson 221b23f72e9SBrian Feldman static int 222b23f72e9SBrian Feldman vmspace_zinit(void *mem, int size, int flags) 2238355f576SJeff Roberson { 2248355f576SJeff Roberson struct vmspace *vm; 2258355f576SJeff Roberson 2268355f576SJeff Roberson vm = (struct vmspace *)mem; 2278355f576SJeff Roberson 22889b57fcfSKonstantin Belousov vm->vm_map.pmap = NULL; 229b23f72e9SBrian Feldman (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags); 230e68c64f0SKonstantin Belousov PMAP_LOCK_INIT(vmspace_pmap(vm)); 231b23f72e9SBrian Feldman return (0); 2328355f576SJeff Roberson } 2338355f576SJeff Roberson 234b23f72e9SBrian Feldman static int 235b23f72e9SBrian Feldman vm_map_zinit(void *mem, int size, int flags) 2368355f576SJeff Roberson { 2378355f576SJeff Roberson vm_map_t map; 2388355f576SJeff Roberson 2398355f576SJeff Roberson map = (vm_map_t)mem; 240763d9566STim Kientzle memset(map, 0, sizeof(*map)); 241e30df26eSAlan Cox mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK); 242e30df26eSAlan Cox sx_init(&map->lock, "vm map (user)"); 243b23f72e9SBrian Feldman return (0); 2448355f576SJeff Roberson } 2458355f576SJeff Roberson 2468355f576SJeff Roberson #ifdef INVARIANTS 2478355f576SJeff Roberson static void 2488355f576SJeff Roberson vmspace_zdtor(void *mem, int size, void *arg) 2498355f576SJeff Roberson { 2508355f576SJeff Roberson struct vmspace *vm; 2518355f576SJeff Roberson 2528355f576SJeff Roberson vm = (struct vmspace *)mem; 2538355f576SJeff Roberson 2548355f576SJeff Roberson vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); 2558355f576SJeff Roberson } 2568355f576SJeff Roberson static void 2578355f576SJeff Roberson vm_map_zdtor(void *mem, int size, void *arg) 2588355f576SJeff Roberson { 2598355f576SJeff Roberson vm_map_t map; 2608355f576SJeff Roberson 2618355f576SJeff Roberson map = (vm_map_t)mem; 2628355f576SJeff Roberson KASSERT(map->nentries == 0, 2638355f576SJeff Roberson ("map %p nentries == %d on free.", 2648355f576SJeff Roberson map, map->nentries)); 2658355f576SJeff Roberson KASSERT(map->size == 0, 2668355f576SJeff Roberson ("map %p size == %lu on free.", 2679eb6e519SJeff Roberson map, (unsigned long)map->size)); 2688355f576SJeff Roberson } 2698355f576SJeff Roberson #endif /* INVARIANTS */ 2708355f576SJeff Roberson 271df8bae1dSRodney W. Grimes /* 272df8bae1dSRodney W. Grimes * Allocate a vmspace structure, including a vm_map and pmap, 273df8bae1dSRodney W. Grimes * and initialize those structures. The refcnt is set to 1. 27474d1d2b7SNeel Natu * 27574d1d2b7SNeel Natu * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit(). 276df8bae1dSRodney W. Grimes */ 277df8bae1dSRodney W. Grimes struct vmspace * 27874d1d2b7SNeel Natu vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) 279df8bae1dSRodney W. Grimes { 280c0877f10SJohn Dyson struct vmspace *vm; 2810d94caffSDavid Greenman 282a163d034SWarner Losh vm = uma_zalloc(vmspace_zone, M_WAITOK); 28374d1d2b7SNeel Natu KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL")); 28474d1d2b7SNeel Natu if (!pinit(vmspace_pmap(vm))) { 28589b57fcfSKonstantin Belousov uma_zfree(vmspace_zone, vm); 28689b57fcfSKonstantin Belousov return (NULL); 28789b57fcfSKonstantin Belousov } 28821c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_alloc: %p", vm); 28992351f16SAlan Cox _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); 290df8bae1dSRodney W. Grimes vm->vm_refcnt = 1; 2912d8acc0fSJohn Dyson vm->vm_shm = NULL; 29251ab6c28SAlan Cox vm->vm_swrss = 0; 29351ab6c28SAlan Cox vm->vm_tsize = 0; 29451ab6c28SAlan Cox vm->vm_dsize = 0; 29551ab6c28SAlan Cox vm->vm_ssize = 0; 29651ab6c28SAlan Cox vm->vm_taddr = 0; 29751ab6c28SAlan Cox vm->vm_daddr = 0; 29851ab6c28SAlan Cox vm->vm_maxsaddr = 0; 299df8bae1dSRodney W. Grimes return (vm); 300df8bae1dSRodney W. Grimes } 301df8bae1dSRodney W. Grimes 3024b5c9cf6SEdward Tomasz Napierala #ifdef RACCT 3031ba5ad42SEdward Tomasz Napierala static void 3041ba5ad42SEdward Tomasz Napierala vmspace_container_reset(struct proc *p) 3051ba5ad42SEdward Tomasz Napierala { 3061ba5ad42SEdward Tomasz Napierala 3071ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 3081ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_DATA, 0); 3091ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_STACK, 0); 3101ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_RSS, 0); 3111ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_MEMLOCK, 0); 3121ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_VMEM, 0); 3131ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 3141ba5ad42SEdward Tomasz Napierala } 3154b5c9cf6SEdward Tomasz Napierala #endif 3161ba5ad42SEdward Tomasz Napierala 31762a59e8fSWarner Losh static inline void 318582ec34cSAlfred Perlstein vmspace_dofree(struct vmspace *vm) 319df8bae1dSRodney W. Grimes { 3200ef12795SAlan Cox 32121c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_free: %p", vm); 3223db161e0SMatthew Dillon 3233db161e0SMatthew Dillon /* 3243db161e0SMatthew Dillon * Make sure any SysV shm is freed, it might not have been in 3253db161e0SMatthew Dillon * exit1(). 3263db161e0SMatthew Dillon */ 3273db161e0SMatthew Dillon shmexit(vm); 3283db161e0SMatthew Dillon 32930dcfc09SJohn Dyson /* 330df8bae1dSRodney W. Grimes * Lock the map, to wait out all other references to it. 3310d94caffSDavid Greenman * Delete all of the mappings and pages they hold, then call 3320d94caffSDavid Greenman * the pmap module to reclaim anything left. 333df8bae1dSRodney W. Grimes */ 334f0165b1cSKonstantin Belousov (void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), 335f0165b1cSKonstantin Belousov vm_map_max(&vm->vm_map)); 3368355f576SJeff Roberson 3370ef12795SAlan Cox pmap_release(vmspace_pmap(vm)); 3380ef12795SAlan Cox vm->vm_map.pmap = NULL; 3398355f576SJeff Roberson uma_zfree(vmspace_zone, vm); 340df8bae1dSRodney W. Grimes } 341582ec34cSAlfred Perlstein 342582ec34cSAlfred Perlstein void 343582ec34cSAlfred Perlstein vmspace_free(struct vmspace *vm) 344582ec34cSAlfred Perlstein { 345582ec34cSAlfred Perlstein 346423521aaSRyan Stone WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 347164a37a5SJohn Baldwin "vmspace_free() called"); 348423521aaSRyan Stone 349582ec34cSAlfred Perlstein if (vm->vm_refcnt == 0) 350582ec34cSAlfred Perlstein panic("vmspace_free: attempt to free already freed vmspace"); 351582ec34cSAlfred Perlstein 3521a587ef2SJohn Baldwin if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1) 353582ec34cSAlfred Perlstein vmspace_dofree(vm); 354582ec34cSAlfred Perlstein } 355582ec34cSAlfred Perlstein 356582ec34cSAlfred Perlstein void 357582ec34cSAlfred Perlstein vmspace_exitfree(struct proc *p) 358582ec34cSAlfred Perlstein { 359334f7061SPeter Wemm struct vmspace *vm; 360582ec34cSAlfred Perlstein 36157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 362334f7061SPeter Wemm vm = p->p_vmspace; 363334f7061SPeter Wemm p->p_vmspace = NULL; 36457051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 36557051fdcSTor Egge KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); 36657051fdcSTor Egge vmspace_free(vm); 36757051fdcSTor Egge } 36857051fdcSTor Egge 36957051fdcSTor Egge void 37057051fdcSTor Egge vmspace_exit(struct thread *td) 37157051fdcSTor Egge { 37257051fdcSTor Egge int refcnt; 37357051fdcSTor Egge struct vmspace *vm; 37457051fdcSTor Egge struct proc *p; 375389d2b6eSMatthew Dillon 376389d2b6eSMatthew Dillon /* 37757051fdcSTor Egge * Release user portion of address space. 37857051fdcSTor Egge * This releases references to vnodes, 37957051fdcSTor Egge * which could cause I/O if the file has been unlinked. 38057051fdcSTor Egge * Need to do this early enough that we can still sleep. 381389d2b6eSMatthew Dillon * 38257051fdcSTor Egge * The last exiting process to reach this point releases as 38357051fdcSTor Egge * much of the environment as it can. vmspace_dofree() is the 38457051fdcSTor Egge * slower fallback in case another process had a temporary 38557051fdcSTor Egge * reference to the vmspace. 386389d2b6eSMatthew Dillon */ 38757051fdcSTor Egge 38857051fdcSTor Egge p = td->td_proc; 38957051fdcSTor Egge vm = p->p_vmspace; 39057051fdcSTor Egge atomic_add_int(&vmspace0.vm_refcnt, 1); 39157051fdcSTor Egge refcnt = vm->vm_refcnt; 39283764b44SMateusz Guzik do { 39357051fdcSTor Egge if (refcnt > 1 && p->p_vmspace != &vmspace0) { 39457051fdcSTor Egge /* Switch now since other proc might free vmspace */ 39557051fdcSTor Egge PROC_VMSPACE_LOCK(p); 39657051fdcSTor Egge p->p_vmspace = &vmspace0; 39757051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 39857051fdcSTor Egge pmap_activate(td); 39957051fdcSTor Egge } 40083764b44SMateusz Guzik } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt - 1)); 40157051fdcSTor Egge if (refcnt == 1) { 40257051fdcSTor Egge if (p->p_vmspace != vm) { 40357051fdcSTor Egge /* vmspace not yet freed, switch back */ 40457051fdcSTor Egge PROC_VMSPACE_LOCK(p); 40557051fdcSTor Egge p->p_vmspace = vm; 40657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 40757051fdcSTor Egge pmap_activate(td); 40857051fdcSTor Egge } 40957051fdcSTor Egge pmap_remove_pages(vmspace_pmap(vm)); 41057051fdcSTor Egge /* Switch now since this proc will free vmspace */ 41157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 41257051fdcSTor Egge p->p_vmspace = &vmspace0; 41357051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 41457051fdcSTor Egge pmap_activate(td); 415334f7061SPeter Wemm vmspace_dofree(vm); 416334f7061SPeter Wemm } 4174b5c9cf6SEdward Tomasz Napierala #ifdef RACCT 4184b5c9cf6SEdward Tomasz Napierala if (racct_enable) 4191ba5ad42SEdward Tomasz Napierala vmspace_container_reset(p); 4204b5c9cf6SEdward Tomasz Napierala #endif 42157051fdcSTor Egge } 42257051fdcSTor Egge 42357051fdcSTor Egge /* Acquire reference to vmspace owned by another process. */ 42457051fdcSTor Egge 42557051fdcSTor Egge struct vmspace * 42657051fdcSTor Egge vmspace_acquire_ref(struct proc *p) 42757051fdcSTor Egge { 42857051fdcSTor Egge struct vmspace *vm; 42957051fdcSTor Egge int refcnt; 43057051fdcSTor Egge 43157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 43257051fdcSTor Egge vm = p->p_vmspace; 43357051fdcSTor Egge if (vm == NULL) { 43457051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 43557051fdcSTor Egge return (NULL); 43657051fdcSTor Egge } 43757051fdcSTor Egge refcnt = vm->vm_refcnt; 43883764b44SMateusz Guzik do { 43957051fdcSTor Egge if (refcnt <= 0) { /* Avoid 0->1 transition */ 44057051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 44157051fdcSTor Egge return (NULL); 44257051fdcSTor Egge } 44383764b44SMateusz Guzik } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt + 1)); 44457051fdcSTor Egge if (vm != p->p_vmspace) { 44557051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 44657051fdcSTor Egge vmspace_free(vm); 44757051fdcSTor Egge return (NULL); 44857051fdcSTor Egge } 44957051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 45057051fdcSTor Egge return (vm); 45157051fdcSTor Egge } 452df8bae1dSRodney W. Grimes 4538a4dc40fSJohn Baldwin /* 4548a4dc40fSJohn Baldwin * Switch between vmspaces in an AIO kernel process. 4558a4dc40fSJohn Baldwin * 4568a4dc40fSJohn Baldwin * The AIO kernel processes switch to and from a user process's 4578a4dc40fSJohn Baldwin * vmspace while performing an I/O operation on behalf of a user 4588a4dc40fSJohn Baldwin * process. The new vmspace is either the vmspace of a user process 4598a4dc40fSJohn Baldwin * obtained from an active AIO request or the initial vmspace of the 4608a4dc40fSJohn Baldwin * AIO kernel process (when it is idling). Because user processes 4618a4dc40fSJohn Baldwin * will block to drain any active AIO requests before proceeding in 4628a4dc40fSJohn Baldwin * exit() or execve(), the vmspace reference count for these vmspaces 4638a4dc40fSJohn Baldwin * can never be 0. This allows for a much simpler implementation than 4648a4dc40fSJohn Baldwin * the loop in vmspace_acquire_ref() above. Similarly, AIO kernel 4658a4dc40fSJohn Baldwin * processes hold an extra reference on their initial vmspace for the 4668a4dc40fSJohn Baldwin * life of the process so that this guarantee is true for any vmspace 4678a4dc40fSJohn Baldwin * passed as 'newvm'. 4688a4dc40fSJohn Baldwin */ 4698a4dc40fSJohn Baldwin void 4708a4dc40fSJohn Baldwin vmspace_switch_aio(struct vmspace *newvm) 4718a4dc40fSJohn Baldwin { 4728a4dc40fSJohn Baldwin struct vmspace *oldvm; 4738a4dc40fSJohn Baldwin 4748a4dc40fSJohn Baldwin /* XXX: Need some way to assert that this is an aio daemon. */ 4758a4dc40fSJohn Baldwin 4768a4dc40fSJohn Baldwin KASSERT(newvm->vm_refcnt > 0, 4778a4dc40fSJohn Baldwin ("vmspace_switch_aio: newvm unreferenced")); 4788a4dc40fSJohn Baldwin 4798a4dc40fSJohn Baldwin oldvm = curproc->p_vmspace; 4808a4dc40fSJohn Baldwin if (oldvm == newvm) 4818a4dc40fSJohn Baldwin return; 4828a4dc40fSJohn Baldwin 4838a4dc40fSJohn Baldwin /* 4848a4dc40fSJohn Baldwin * Point to the new address space and refer to it. 4858a4dc40fSJohn Baldwin */ 4868a4dc40fSJohn Baldwin curproc->p_vmspace = newvm; 4878a4dc40fSJohn Baldwin atomic_add_int(&newvm->vm_refcnt, 1); 4888a4dc40fSJohn Baldwin 4898a4dc40fSJohn Baldwin /* Activate the new mapping. */ 4908a4dc40fSJohn Baldwin pmap_activate(curthread); 4918a4dc40fSJohn Baldwin 4928a4dc40fSJohn Baldwin /* Remove the daemon's reference to the old address space. */ 4938a4dc40fSJohn Baldwin KASSERT(oldvm->vm_refcnt > 1, 4948a4dc40fSJohn Baldwin ("vmspace_switch_aio: oldvm dropping last reference")); 4958a4dc40fSJohn Baldwin vmspace_free(oldvm); 4968a4dc40fSJohn Baldwin } 4978a4dc40fSJohn Baldwin 4981b40f8c0SMatthew Dillon void 499780b1c09SAlan Cox _vm_map_lock(vm_map_t map, const char *file, int line) 5001b40f8c0SMatthew Dillon { 501bc91c510SAlan Cox 50293bc4879SAlan Cox if (map->system_map) 503ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 50412c64974SMaxime Henrion else 5059fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 5061b40f8c0SMatthew Dillon map->timestamp++; 5071b40f8c0SMatthew Dillon } 5081b40f8c0SMatthew Dillon 5090b367bd8SKonstantin Belousov static void 5100b367bd8SKonstantin Belousov vm_map_process_deferred(void) 5110e0af8ecSBrian Feldman { 5120b367bd8SKonstantin Belousov struct thread *td; 5136fbe60faSJohn Baldwin vm_map_entry_t entry, next; 51484110e7eSKonstantin Belousov vm_object_t object; 515655c3490SKonstantin Belousov 5160b367bd8SKonstantin Belousov td = curthread; 5176fbe60faSJohn Baldwin entry = td->td_map_def_user; 5186fbe60faSJohn Baldwin td->td_map_def_user = NULL; 5196fbe60faSJohn Baldwin while (entry != NULL) { 5206fbe60faSJohn Baldwin next = entry->next; 52184110e7eSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) { 52284110e7eSKonstantin Belousov /* 52384110e7eSKonstantin Belousov * Decrement the object's writemappings and 52484110e7eSKonstantin Belousov * possibly the vnode's v_writecount. 52584110e7eSKonstantin Belousov */ 52684110e7eSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 52784110e7eSKonstantin Belousov ("Submap with writecount")); 52884110e7eSKonstantin Belousov object = entry->object.vm_object; 52984110e7eSKonstantin Belousov KASSERT(object != NULL, ("No object for writecount")); 53084110e7eSKonstantin Belousov vnode_pager_release_writecount(object, entry->start, 53184110e7eSKonstantin Belousov entry->end); 53284110e7eSKonstantin Belousov } 5330b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, FALSE); 5346fbe60faSJohn Baldwin entry = next; 5350b367bd8SKonstantin Belousov } 5360b367bd8SKonstantin Belousov } 5370b367bd8SKonstantin Belousov 5380b367bd8SKonstantin Belousov void 5390b367bd8SKonstantin Belousov _vm_map_unlock(vm_map_t map, const char *file, int line) 5400b367bd8SKonstantin Belousov { 5410b367bd8SKonstantin Belousov 5420b367bd8SKonstantin Belousov if (map->system_map) 543ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 5440b367bd8SKonstantin Belousov else { 5459fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 5460b367bd8SKonstantin Belousov vm_map_process_deferred(); 547655c3490SKonstantin Belousov } 5480e0af8ecSBrian Feldman } 5490e0af8ecSBrian Feldman 5500e0af8ecSBrian Feldman void 551780b1c09SAlan Cox _vm_map_lock_read(vm_map_t map, const char *file, int line) 5520e0af8ecSBrian Feldman { 553bc91c510SAlan Cox 55493bc4879SAlan Cox if (map->system_map) 555ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 55612c64974SMaxime Henrion else 5579fde98bbSAttilio Rao sx_slock_(&map->lock, file, line); 55836daaecdSAlan Cox } 5590e0af8ecSBrian Feldman 5600e0af8ecSBrian Feldman void 561780b1c09SAlan Cox _vm_map_unlock_read(vm_map_t map, const char *file, int line) 5620e0af8ecSBrian Feldman { 563bc91c510SAlan Cox 56436daaecdSAlan Cox if (map->system_map) 565ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 5660b367bd8SKonstantin Belousov else { 5679fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 5680b367bd8SKonstantin Belousov vm_map_process_deferred(); 5690b367bd8SKonstantin Belousov } 57025adb370SBrian Feldman } 57125adb370SBrian Feldman 572d974f03cSAlan Cox int 573780b1c09SAlan Cox _vm_map_trylock(vm_map_t map, const char *file, int line) 574d974f03cSAlan Cox { 57525adb370SBrian Feldman int error; 57625adb370SBrian Feldman 57736daaecdSAlan Cox error = map->system_map ? 578ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 5799fde98bbSAttilio Rao !sx_try_xlock_(&map->lock, file, line); 5803a92e5d5SAlan Cox if (error == 0) 5813a92e5d5SAlan Cox map->timestamp++; 582bc91c510SAlan Cox return (error == 0); 5830e0af8ecSBrian Feldman } 5840e0af8ecSBrian Feldman 5850e0af8ecSBrian Feldman int 58672d97679SDavid Schultz _vm_map_trylock_read(vm_map_t map, const char *file, int line) 58772d97679SDavid Schultz { 58872d97679SDavid Schultz int error; 58972d97679SDavid Schultz 59072d97679SDavid Schultz error = map->system_map ? 591ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 5929fde98bbSAttilio Rao !sx_try_slock_(&map->lock, file, line); 59372d97679SDavid Schultz return (error == 0); 59472d97679SDavid Schultz } 59572d97679SDavid Schultz 59605a8c414SAlan Cox /* 59705a8c414SAlan Cox * _vm_map_lock_upgrade: [ internal use only ] 59805a8c414SAlan Cox * 59905a8c414SAlan Cox * Tries to upgrade a read (shared) lock on the specified map to a write 60005a8c414SAlan Cox * (exclusive) lock. Returns the value "0" if the upgrade succeeds and a 60105a8c414SAlan Cox * non-zero value if the upgrade fails. If the upgrade fails, the map is 60205a8c414SAlan Cox * returned without a read or write lock held. 60305a8c414SAlan Cox * 60405a8c414SAlan Cox * Requires that the map be read locked. 60505a8c414SAlan Cox */ 60672d97679SDavid Schultz int 607780b1c09SAlan Cox _vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 6080e0af8ecSBrian Feldman { 60905a8c414SAlan Cox unsigned int last_timestamp; 610bc91c510SAlan Cox 61112c64974SMaxime Henrion if (map->system_map) { 612ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 61305a8c414SAlan Cox } else { 6149fde98bbSAttilio Rao if (!sx_try_upgrade_(&map->lock, file, line)) { 61505a8c414SAlan Cox last_timestamp = map->timestamp; 6169fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 6170b367bd8SKonstantin Belousov vm_map_process_deferred(); 61805a8c414SAlan Cox /* 61905a8c414SAlan Cox * If the map's timestamp does not change while the 62005a8c414SAlan Cox * map is unlocked, then the upgrade succeeds. 62105a8c414SAlan Cox */ 6229fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 62305a8c414SAlan Cox if (last_timestamp != map->timestamp) { 6249fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 62505a8c414SAlan Cox return (1); 62605a8c414SAlan Cox } 62705a8c414SAlan Cox } 62805a8c414SAlan Cox } 629bc91c510SAlan Cox map->timestamp++; 630bc91c510SAlan Cox return (0); 6310e0af8ecSBrian Feldman } 6320e0af8ecSBrian Feldman 6330e0af8ecSBrian Feldman void 634780b1c09SAlan Cox _vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 6351b40f8c0SMatthew Dillon { 636bc91c510SAlan Cox 63712c64974SMaxime Henrion if (map->system_map) { 638ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 63905a8c414SAlan Cox } else 6409fde98bbSAttilio Rao sx_downgrade_(&map->lock, file, line); 64105a8c414SAlan Cox } 64205a8c414SAlan Cox 64305a8c414SAlan Cox /* 64405a8c414SAlan Cox * vm_map_locked: 64505a8c414SAlan Cox * 64605a8c414SAlan Cox * Returns a non-zero value if the caller holds a write (exclusive) lock 64705a8c414SAlan Cox * on the specified map and the value "0" otherwise. 64805a8c414SAlan Cox */ 64905a8c414SAlan Cox int 65005a8c414SAlan Cox vm_map_locked(vm_map_t map) 65105a8c414SAlan Cox { 65205a8c414SAlan Cox 65305a8c414SAlan Cox if (map->system_map) 65405a8c414SAlan Cox return (mtx_owned(&map->system_mtx)); 65505a8c414SAlan Cox else 65605a8c414SAlan Cox return (sx_xlocked(&map->lock)); 65725adb370SBrian Feldman } 65825adb370SBrian Feldman 6593a0916b8SKonstantin Belousov #ifdef INVARIANTS 6603a0916b8SKonstantin Belousov static void 6613a0916b8SKonstantin Belousov _vm_map_assert_locked(vm_map_t map, const char *file, int line) 6623a0916b8SKonstantin Belousov { 6633a0916b8SKonstantin Belousov 6643a0916b8SKonstantin Belousov if (map->system_map) 665ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 6663a0916b8SKonstantin Belousov else 6679fde98bbSAttilio Rao sx_assert_(&map->lock, SA_XLOCKED, file, line); 6683a0916b8SKonstantin Belousov } 6693a0916b8SKonstantin Belousov 6703a0916b8SKonstantin Belousov #define VM_MAP_ASSERT_LOCKED(map) \ 6713a0916b8SKonstantin Belousov _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE) 672*9f701172SKonstantin Belousov 673*9f701172SKonstantin Belousov static void 674*9f701172SKonstantin Belousov _vm_map_assert_consistent(vm_map_t map) 675*9f701172SKonstantin Belousov { 676*9f701172SKonstantin Belousov vm_map_entry_t entry; 677*9f701172SKonstantin Belousov vm_map_entry_t child; 678*9f701172SKonstantin Belousov vm_size_t max_left, max_right; 679*9f701172SKonstantin Belousov 680*9f701172SKonstantin Belousov for (entry = map->header.next; entry != &map->header; 681*9f701172SKonstantin Belousov entry = entry->next) { 682*9f701172SKonstantin Belousov KASSERT(entry->prev->end <= entry->start, 683*9f701172SKonstantin Belousov ("map %p prev->end = %jx, start = %jx", map, 684*9f701172SKonstantin Belousov (uintmax_t)entry->prev->end, (uintmax_t)entry->start)); 685*9f701172SKonstantin Belousov KASSERT(entry->start < entry->end, 686*9f701172SKonstantin Belousov ("map %p start = %jx, end = %jx", map, 687*9f701172SKonstantin Belousov (uintmax_t)entry->start, (uintmax_t)entry->end)); 688*9f701172SKonstantin Belousov KASSERT(entry->end <= entry->next->start, 689*9f701172SKonstantin Belousov ("map %p end = %jx, next->start = %jx", map, 690*9f701172SKonstantin Belousov (uintmax_t)entry->end, (uintmax_t)entry->next->start)); 691*9f701172SKonstantin Belousov KASSERT(entry->left == NULL || 692*9f701172SKonstantin Belousov entry->left->start < entry->start, 693*9f701172SKonstantin Belousov ("map %p left->start = %jx, start = %jx", map, 694*9f701172SKonstantin Belousov (uintmax_t)entry->left->start, (uintmax_t)entry->start)); 695*9f701172SKonstantin Belousov KASSERT(entry->right == NULL || 696*9f701172SKonstantin Belousov entry->start < entry->right->start, 697*9f701172SKonstantin Belousov ("map %p start = %jx, right->start = %jx", map, 698*9f701172SKonstantin Belousov (uintmax_t)entry->start, (uintmax_t)entry->right->start)); 699*9f701172SKonstantin Belousov child = entry->left; 700*9f701172SKonstantin Belousov max_left = (child != NULL) ? child->max_free : 701*9f701172SKonstantin Belousov entry->start - entry->prev->end; 702*9f701172SKonstantin Belousov child = entry->right; 703*9f701172SKonstantin Belousov max_right = (child != NULL) ? child->max_free : 704*9f701172SKonstantin Belousov entry->next->start - entry->end; 705*9f701172SKonstantin Belousov KASSERT(entry->max_free == MAX(max_left, max_right), 706*9f701172SKonstantin Belousov ("map %p max = %jx, max_left = %jx, max_right = %jx", map, 707*9f701172SKonstantin Belousov (uintmax_t)entry->max_free, 708*9f701172SKonstantin Belousov (uintmax_t)max_left, (uintmax_t)max_right)); 709*9f701172SKonstantin Belousov } 710*9f701172SKonstantin Belousov } 711*9f701172SKonstantin Belousov 712*9f701172SKonstantin Belousov #define VM_MAP_ASSERT_CONSISTENT(map) \ 713*9f701172SKonstantin Belousov _vm_map_assert_consistent(map) 7143a0916b8SKonstantin Belousov #else 7153a0916b8SKonstantin Belousov #define VM_MAP_ASSERT_LOCKED(map) 716*9f701172SKonstantin Belousov #define VM_MAP_ASSERT_CONSISTENT(map) 7173a0916b8SKonstantin Belousov #endif 7183a0916b8SKonstantin Belousov 719acd9a301SAlan Cox /* 7208304adaaSAlan Cox * _vm_map_unlock_and_wait: 7218304adaaSAlan Cox * 7228304adaaSAlan Cox * Atomically releases the lock on the specified map and puts the calling 7238304adaaSAlan Cox * thread to sleep. The calling thread will remain asleep until either 7248304adaaSAlan Cox * vm_map_wakeup() is performed on the map or the specified timeout is 7258304adaaSAlan Cox * exceeded. 7268304adaaSAlan Cox * 7278304adaaSAlan Cox * WARNING! This function does not perform deferred deallocations of 7288304adaaSAlan Cox * objects and map entries. Therefore, the calling thread is expected to 7298304adaaSAlan Cox * reacquire the map lock after reawakening and later perform an ordinary 7308304adaaSAlan Cox * unlock operation, such as vm_map_unlock(), before completing its 7318304adaaSAlan Cox * operation on the map. 732acd9a301SAlan Cox */ 7339688f931SAlan Cox int 7348304adaaSAlan Cox _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line) 735acd9a301SAlan Cox { 736acd9a301SAlan Cox 7373a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 7388304adaaSAlan Cox if (map->system_map) 739ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 7408304adaaSAlan Cox else 7419fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 7428304adaaSAlan Cox return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 7438304adaaSAlan Cox timo)); 744acd9a301SAlan Cox } 745acd9a301SAlan Cox 746acd9a301SAlan Cox /* 747acd9a301SAlan Cox * vm_map_wakeup: 7488304adaaSAlan Cox * 7498304adaaSAlan Cox * Awaken any threads that have slept on the map using 7508304adaaSAlan Cox * vm_map_unlock_and_wait(). 751acd9a301SAlan Cox */ 7529688f931SAlan Cox void 753acd9a301SAlan Cox vm_map_wakeup(vm_map_t map) 754acd9a301SAlan Cox { 755acd9a301SAlan Cox 756b49ecb86SAlan Cox /* 7573a92e5d5SAlan Cox * Acquire and release map_sleep_mtx to prevent a wakeup() 7588304adaaSAlan Cox * from being performed (and lost) between the map unlock 7598304adaaSAlan Cox * and the msleep() in _vm_map_unlock_and_wait(). 760b49ecb86SAlan Cox */ 7613a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 7623a92e5d5SAlan Cox mtx_unlock(&map_sleep_mtx); 763acd9a301SAlan Cox wakeup(&map->root); 764acd9a301SAlan Cox } 765acd9a301SAlan Cox 766a5db445dSMax Laier void 767a5db445dSMax Laier vm_map_busy(vm_map_t map) 768a5db445dSMax Laier { 769a5db445dSMax Laier 770a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 771a5db445dSMax Laier map->busy++; 772a5db445dSMax Laier } 773a5db445dSMax Laier 774a5db445dSMax Laier void 775a5db445dSMax Laier vm_map_unbusy(vm_map_t map) 776a5db445dSMax Laier { 777a5db445dSMax Laier 778a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 779a5db445dSMax Laier KASSERT(map->busy, ("vm_map_unbusy: not busy")); 780a5db445dSMax Laier if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { 781a5db445dSMax Laier vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); 782a5db445dSMax Laier wakeup(&map->busy); 783a5db445dSMax Laier } 784a5db445dSMax Laier } 785a5db445dSMax Laier 786a5db445dSMax Laier void 787a5db445dSMax Laier vm_map_wait_busy(vm_map_t map) 788a5db445dSMax Laier { 789a5db445dSMax Laier 790a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 791a5db445dSMax Laier while (map->busy) { 792a5db445dSMax Laier vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); 793a5db445dSMax Laier if (map->system_map) 794a5db445dSMax Laier msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); 795a5db445dSMax Laier else 796a5db445dSMax Laier sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); 797a5db445dSMax Laier } 798a5db445dSMax Laier map->timestamp++; 799a5db445dSMax Laier } 800a5db445dSMax Laier 8011b40f8c0SMatthew Dillon long 8021b40f8c0SMatthew Dillon vmspace_resident_count(struct vmspace *vmspace) 8031b40f8c0SMatthew Dillon { 8041b40f8c0SMatthew Dillon return pmap_resident_count(vmspace_pmap(vmspace)); 8051b40f8c0SMatthew Dillon } 8061b40f8c0SMatthew Dillon 807ff2b5645SMatthew Dillon /* 808df8bae1dSRodney W. Grimes * vm_map_create: 809df8bae1dSRodney W. Grimes * 810df8bae1dSRodney W. Grimes * Creates and returns a new empty VM map with 811df8bae1dSRodney W. Grimes * the given physical map structure, and having 812df8bae1dSRodney W. Grimes * the given lower and upper address bounds. 813df8bae1dSRodney W. Grimes */ 8140d94caffSDavid Greenman vm_map_t 8151b40f8c0SMatthew Dillon vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) 816df8bae1dSRodney W. Grimes { 817c0877f10SJohn Dyson vm_map_t result; 818df8bae1dSRodney W. Grimes 819a163d034SWarner Losh result = uma_zalloc(mapzone, M_WAITOK); 82021c641b2SJohn Baldwin CTR1(KTR_VM, "vm_map_create: %p", result); 82192351f16SAlan Cox _vm_map_init(result, pmap, min, max); 822df8bae1dSRodney W. Grimes return (result); 823df8bae1dSRodney W. Grimes } 824df8bae1dSRodney W. Grimes 825df8bae1dSRodney W. Grimes /* 826df8bae1dSRodney W. Grimes * Initialize an existing vm_map structure 827df8bae1dSRodney W. Grimes * such as that in the vmspace structure. 828df8bae1dSRodney W. Grimes */ 8298355f576SJeff Roberson static void 83092351f16SAlan Cox _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 831df8bae1dSRodney W. Grimes { 83221c641b2SJohn Baldwin 833df8bae1dSRodney W. Grimes map->header.next = map->header.prev = &map->header; 8342203c46dSMark Johnston map->header.eflags = MAP_ENTRY_HEADER; 8359688f931SAlan Cox map->needs_wakeup = FALSE; 8363075778bSJohn Dyson map->system_map = 0; 83792351f16SAlan Cox map->pmap = pmap; 838f0165b1cSKonstantin Belousov map->header.end = min; 839f0165b1cSKonstantin Belousov map->header.start = max; 840af7cd0c5SBrian Feldman map->flags = 0; 8414e94f402SAlan Cox map->root = NULL; 842df8bae1dSRodney W. Grimes map->timestamp = 0; 843a5db445dSMax Laier map->busy = 0; 844fa50a355SKonstantin Belousov map->anon_loc = 0; 845df8bae1dSRodney W. Grimes } 846df8bae1dSRodney W. Grimes 847a18b1f1dSJason Evans void 84892351f16SAlan Cox vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 849a18b1f1dSJason Evans { 85092351f16SAlan Cox 85192351f16SAlan Cox _vm_map_init(map, pmap, min, max); 852d923c598SAlan Cox mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK); 85312c64974SMaxime Henrion sx_init(&map->lock, "user map"); 854a18b1f1dSJason Evans } 855a18b1f1dSJason Evans 856df8bae1dSRodney W. Grimes /* 857b18bfc3dSJohn Dyson * vm_map_entry_dispose: [ internal use only ] 858b18bfc3dSJohn Dyson * 859b18bfc3dSJohn Dyson * Inverse of vm_map_entry_create. 860b18bfc3dSJohn Dyson */ 86162487bb4SJohn Dyson static void 8621b40f8c0SMatthew Dillon vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 863b18bfc3dSJohn Dyson { 8642b4a2c27SAlan Cox uma_zfree(map->system_map ? kmapentzone : mapentzone, entry); 865b18bfc3dSJohn Dyson } 866b18bfc3dSJohn Dyson 867b18bfc3dSJohn Dyson /* 868df8bae1dSRodney W. Grimes * vm_map_entry_create: [ internal use only ] 869df8bae1dSRodney W. Grimes * 870df8bae1dSRodney W. Grimes * Allocates a VM map entry for insertion. 871b28cb1caSAlfred Perlstein * No entry fields are filled in. 872df8bae1dSRodney W. Grimes */ 873f708ef1bSPoul-Henning Kamp static vm_map_entry_t 8741b40f8c0SMatthew Dillon vm_map_entry_create(vm_map_t map) 875df8bae1dSRodney W. Grimes { 8761f6889a1SMatthew Dillon vm_map_entry_t new_entry; 8771f6889a1SMatthew Dillon 8782b4a2c27SAlan Cox if (map->system_map) 8792b4a2c27SAlan Cox new_entry = uma_zalloc(kmapentzone, M_NOWAIT); 8802b4a2c27SAlan Cox else 881a163d034SWarner Losh new_entry = uma_zalloc(mapentzone, M_WAITOK); 8821f6889a1SMatthew Dillon if (new_entry == NULL) 8831f6889a1SMatthew Dillon panic("vm_map_entry_create: kernel resources exhausted"); 8841f6889a1SMatthew Dillon return (new_entry); 885df8bae1dSRodney W. Grimes } 886df8bae1dSRodney W. Grimes 887df8bae1dSRodney W. Grimes /* 888794316a8SAlan Cox * vm_map_entry_set_behavior: 889794316a8SAlan Cox * 890794316a8SAlan Cox * Set the expected access behavior, either normal, random, or 891794316a8SAlan Cox * sequential. 892794316a8SAlan Cox */ 89362a59e8fSWarner Losh static inline void 894794316a8SAlan Cox vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) 895794316a8SAlan Cox { 896794316a8SAlan Cox entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 897794316a8SAlan Cox (behavior & MAP_ENTRY_BEHAV_MASK); 898794316a8SAlan Cox } 899794316a8SAlan Cox 900794316a8SAlan Cox /* 9010164e057SAlan Cox * vm_map_entry_set_max_free: 9020164e057SAlan Cox * 9030164e057SAlan Cox * Set the max_free field in a vm_map_entry. 9040164e057SAlan Cox */ 90562a59e8fSWarner Losh static inline void 9060164e057SAlan Cox vm_map_entry_set_max_free(vm_map_entry_t entry) 9070164e057SAlan Cox { 908*9f701172SKonstantin Belousov vm_map_entry_t child; 909*9f701172SKonstantin Belousov vm_size_t max_left, max_right; 9100164e057SAlan Cox 911*9f701172SKonstantin Belousov child = entry->left; 912*9f701172SKonstantin Belousov max_left = (child != NULL) ? child->max_free : 913*9f701172SKonstantin Belousov entry->start - entry->prev->end; 914*9f701172SKonstantin Belousov child = entry->right; 915*9f701172SKonstantin Belousov max_right = (child != NULL) ? child->max_free : 916*9f701172SKonstantin Belousov entry->next->start - entry->end; 917*9f701172SKonstantin Belousov entry->max_free = MAX(max_left, max_right); 9180164e057SAlan Cox } 9190164e057SAlan Cox 920*9f701172SKonstantin Belousov #define SPLAY_LEFT_STEP(root, y, rlist, test) do { \ 921*9f701172SKonstantin Belousov y = root->left; \ 922*9f701172SKonstantin Belousov if (y != NULL && (test)) { \ 923*9f701172SKonstantin Belousov /* Rotate right and make y root. */ \ 924*9f701172SKonstantin Belousov root->left = y->right; \ 925*9f701172SKonstantin Belousov y->right = root; \ 926*9f701172SKonstantin Belousov vm_map_entry_set_max_free(root); \ 927*9f701172SKonstantin Belousov root = y; \ 928*9f701172SKonstantin Belousov y = root->left; \ 929*9f701172SKonstantin Belousov } \ 930*9f701172SKonstantin Belousov /* Put root on rlist. */ \ 931*9f701172SKonstantin Belousov root->left = rlist; \ 932*9f701172SKonstantin Belousov rlist = root; \ 933*9f701172SKonstantin Belousov root = y; \ 934*9f701172SKonstantin Belousov } while (0) 935*9f701172SKonstantin Belousov 936*9f701172SKonstantin Belousov #define SPLAY_RIGHT_STEP(root, y, llist, test) do { \ 937*9f701172SKonstantin Belousov y = root->right; \ 938*9f701172SKonstantin Belousov if (y != NULL && (test)) { \ 939*9f701172SKonstantin Belousov /* Rotate left and make y root. */ \ 940*9f701172SKonstantin Belousov root->right = y->left; \ 941*9f701172SKonstantin Belousov y->left = root; \ 942*9f701172SKonstantin Belousov vm_map_entry_set_max_free(root); \ 943*9f701172SKonstantin Belousov root = y; \ 944*9f701172SKonstantin Belousov y = root->right; \ 945*9f701172SKonstantin Belousov } \ 946*9f701172SKonstantin Belousov /* Put root on llist. */ \ 947*9f701172SKonstantin Belousov root->right = llist; \ 948*9f701172SKonstantin Belousov llist = root; \ 949*9f701172SKonstantin Belousov root = y; \ 950*9f701172SKonstantin Belousov } while (0) 951*9f701172SKonstantin Belousov 9520164e057SAlan Cox /* 953*9f701172SKonstantin Belousov * Walk down the tree until we find addr or a NULL pointer where addr would go, 954*9f701172SKonstantin Belousov * breaking off left and right subtrees of nodes less than, or greater than 955*9f701172SKonstantin Belousov * addr. Treat pointers to nodes with max_free < length as NULL pointers. 956*9f701172SKonstantin Belousov * llist and rlist are the two sides in reverse order (bottom-up), with llist 957*9f701172SKonstantin Belousov * linked by the right pointer and rlist linked by the left pointer in the 958*9f701172SKonstantin Belousov * vm_map_entry. 9594e94f402SAlan Cox */ 9604e94f402SAlan Cox static vm_map_entry_t 961*9f701172SKonstantin Belousov vm_map_splay_split(vm_offset_t addr, vm_size_t length, 962*9f701172SKonstantin Belousov vm_map_entry_t root, vm_map_entry_t *out_llist, vm_map_entry_t *out_rlist) 9634e94f402SAlan Cox { 9640164e057SAlan Cox vm_map_entry_t llist, rlist; 9650164e057SAlan Cox vm_map_entry_t y; 9664e94f402SAlan Cox 9670164e057SAlan Cox llist = NULL; 9680164e057SAlan Cox rlist = NULL; 969*9f701172SKonstantin Belousov while (root != NULL && root->max_free >= length) { 9700164e057SAlan Cox if (addr < root->start) { 971*9f701172SKonstantin Belousov SPLAY_LEFT_STEP(root, y, rlist, 972*9f701172SKonstantin Belousov y->max_free >= length && addr < y->start); 9737438d60bSAlan Cox } else if (addr >= root->end) { 974*9f701172SKonstantin Belousov SPLAY_RIGHT_STEP(root, y, llist, 975*9f701172SKonstantin Belousov y->max_free >= length && addr >= y->end); 9767438d60bSAlan Cox } else 9777438d60bSAlan Cox break; 9780164e057SAlan Cox } 979*9f701172SKonstantin Belousov *out_llist = llist; 980*9f701172SKonstantin Belousov *out_rlist = rlist; 981*9f701172SKonstantin Belousov return (root); 982*9f701172SKonstantin Belousov } 983*9f701172SKonstantin Belousov 984*9f701172SKonstantin Belousov static void 985*9f701172SKonstantin Belousov vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *iolist) 986*9f701172SKonstantin Belousov { 987*9f701172SKonstantin Belousov vm_map_entry_t rlist, y; 988*9f701172SKonstantin Belousov 989*9f701172SKonstantin Belousov root = root->right; 990*9f701172SKonstantin Belousov rlist = *iolist; 991*9f701172SKonstantin Belousov while (root != NULL) 992*9f701172SKonstantin Belousov SPLAY_LEFT_STEP(root, y, rlist, true); 993*9f701172SKonstantin Belousov *iolist = rlist; 994*9f701172SKonstantin Belousov } 995*9f701172SKonstantin Belousov 996*9f701172SKonstantin Belousov static void 997*9f701172SKonstantin Belousov vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *iolist) 998*9f701172SKonstantin Belousov { 999*9f701172SKonstantin Belousov vm_map_entry_t llist, y; 1000*9f701172SKonstantin Belousov 1001*9f701172SKonstantin Belousov root = root->left; 1002*9f701172SKonstantin Belousov llist = *iolist; 1003*9f701172SKonstantin Belousov while (root != NULL) 1004*9f701172SKonstantin Belousov SPLAY_RIGHT_STEP(root, y, llist, true); 1005*9f701172SKonstantin Belousov *iolist = llist; 1006*9f701172SKonstantin Belousov } 10070164e057SAlan Cox 10080164e057SAlan Cox /* 1009*9f701172SKonstantin Belousov * Walk back up the two spines, flip the pointers and set max_free. The 1010*9f701172SKonstantin Belousov * subtrees of the root go at the bottom of llist and rlist. 10110164e057SAlan Cox */ 1012*9f701172SKonstantin Belousov static vm_map_entry_t 1013*9f701172SKonstantin Belousov vm_map_splay_merge(vm_map_entry_t root, 1014*9f701172SKonstantin Belousov vm_map_entry_t llist, vm_map_entry_t rlist, 1015*9f701172SKonstantin Belousov vm_map_entry_t ltree, vm_map_entry_t rtree) 1016*9f701172SKonstantin Belousov { 1017*9f701172SKonstantin Belousov vm_map_entry_t y; 1018*9f701172SKonstantin Belousov 10190164e057SAlan Cox while (llist != NULL) { 10200164e057SAlan Cox y = llist->right; 10210164e057SAlan Cox llist->right = ltree; 10220164e057SAlan Cox vm_map_entry_set_max_free(llist); 10230164e057SAlan Cox ltree = llist; 10240164e057SAlan Cox llist = y; 10250164e057SAlan Cox } 10260164e057SAlan Cox while (rlist != NULL) { 10270164e057SAlan Cox y = rlist->left; 10280164e057SAlan Cox rlist->left = rtree; 10290164e057SAlan Cox vm_map_entry_set_max_free(rlist); 10300164e057SAlan Cox rtree = rlist; 10310164e057SAlan Cox rlist = y; 10320164e057SAlan Cox } 10330164e057SAlan Cox 10340164e057SAlan Cox /* 10350164e057SAlan Cox * Final assembly: add ltree and rtree as subtrees of root. 10360164e057SAlan Cox */ 10370164e057SAlan Cox root->left = ltree; 10380164e057SAlan Cox root->right = rtree; 10390164e057SAlan Cox vm_map_entry_set_max_free(root); 10400164e057SAlan Cox 10414e94f402SAlan Cox return (root); 10424e94f402SAlan Cox } 10434e94f402SAlan Cox 10444e94f402SAlan Cox /* 1045*9f701172SKonstantin Belousov * vm_map_entry_splay: 1046*9f701172SKonstantin Belousov * 1047*9f701172SKonstantin Belousov * The Sleator and Tarjan top-down splay algorithm with the 1048*9f701172SKonstantin Belousov * following variation. Max_free must be computed bottom-up, so 1049*9f701172SKonstantin Belousov * on the downward pass, maintain the left and right spines in 1050*9f701172SKonstantin Belousov * reverse order. Then, make a second pass up each side to fix 1051*9f701172SKonstantin Belousov * the pointers and compute max_free. The time bound is O(log n) 1052*9f701172SKonstantin Belousov * amortized. 1053*9f701172SKonstantin Belousov * 1054*9f701172SKonstantin Belousov * The new root is the vm_map_entry containing "addr", or else an 1055*9f701172SKonstantin Belousov * adjacent entry (lower if possible) if addr is not in the tree. 1056*9f701172SKonstantin Belousov * 1057*9f701172SKonstantin Belousov * The map must be locked, and leaves it so. 1058*9f701172SKonstantin Belousov * 1059*9f701172SKonstantin Belousov * Returns: the new root. 1060*9f701172SKonstantin Belousov */ 1061*9f701172SKonstantin Belousov static vm_map_entry_t 1062*9f701172SKonstantin Belousov vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) 1063*9f701172SKonstantin Belousov { 1064*9f701172SKonstantin Belousov vm_map_entry_t llist, rlist; 1065*9f701172SKonstantin Belousov 1066*9f701172SKonstantin Belousov root = vm_map_splay_split(addr, 0, root, &llist, &rlist); 1067*9f701172SKonstantin Belousov if (root != NULL) { 1068*9f701172SKonstantin Belousov /* do nothing */ 1069*9f701172SKonstantin Belousov } else if (llist != NULL) { 1070*9f701172SKonstantin Belousov /* 1071*9f701172SKonstantin Belousov * Recover the greatest node in the left 1072*9f701172SKonstantin Belousov * subtree and make it the root. 1073*9f701172SKonstantin Belousov */ 1074*9f701172SKonstantin Belousov root = llist; 1075*9f701172SKonstantin Belousov llist = root->right; 1076*9f701172SKonstantin Belousov root->right = NULL; 1077*9f701172SKonstantin Belousov } else if (rlist != NULL) { 1078*9f701172SKonstantin Belousov /* 1079*9f701172SKonstantin Belousov * Recover the least node in the right 1080*9f701172SKonstantin Belousov * subtree and make it the root. 1081*9f701172SKonstantin Belousov */ 1082*9f701172SKonstantin Belousov root = rlist; 1083*9f701172SKonstantin Belousov rlist = root->left; 1084*9f701172SKonstantin Belousov root->left = NULL; 1085*9f701172SKonstantin Belousov } else { 1086*9f701172SKonstantin Belousov /* There is no root. */ 1087*9f701172SKonstantin Belousov return (NULL); 1088*9f701172SKonstantin Belousov } 1089*9f701172SKonstantin Belousov return (vm_map_splay_merge(root, llist, rlist, 1090*9f701172SKonstantin Belousov root->left, root->right)); 1091*9f701172SKonstantin Belousov } 1092*9f701172SKonstantin Belousov 1093*9f701172SKonstantin Belousov /* 1094df8bae1dSRodney W. Grimes * vm_map_entry_{un,}link: 1095df8bae1dSRodney W. Grimes * 1096df8bae1dSRodney W. Grimes * Insert/remove entries from maps. 1097df8bae1dSRodney W. Grimes */ 10984e94f402SAlan Cox static void 109999c81ca9SAlan Cox vm_map_entry_link(vm_map_t map, 110099c81ca9SAlan Cox vm_map_entry_t entry) 110199c81ca9SAlan Cox { 1102*9f701172SKonstantin Belousov vm_map_entry_t llist, rlist, root; 110321c641b2SJohn Baldwin 1104*9f701172SKonstantin Belousov CTR3(KTR_VM, 1105*9f701172SKonstantin Belousov "vm_map_entry_link: map %p, nentries %d, entry %p", map, 1106*9f701172SKonstantin Belousov map->nentries, entry); 11073a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 110899c81ca9SAlan Cox map->nentries++; 1109*9f701172SKonstantin Belousov root = map->root; 1110*9f701172SKonstantin Belousov root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); 1111*9f701172SKonstantin Belousov KASSERT(root == NULL, 1112*9f701172SKonstantin Belousov ("vm_map_entry_link: link object already mapped")); 1113*9f701172SKonstantin Belousov entry->prev = (llist == NULL) ? &map->header : llist; 1114*9f701172SKonstantin Belousov entry->next = (rlist == NULL) ? &map->header : rlist; 1115*9f701172SKonstantin Belousov entry->prev->next = entry->next->prev = entry; 1116*9f701172SKonstantin Belousov root = vm_map_splay_merge(entry, llist, rlist, NULL, NULL); 11174e94f402SAlan Cox map->root = entry; 1118*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1119df8bae1dSRodney W. Grimes } 112099c81ca9SAlan Cox 1121*9f701172SKonstantin Belousov enum unlink_merge_type { 1122*9f701172SKonstantin Belousov UNLINK_MERGE_PREV, 1123*9f701172SKonstantin Belousov UNLINK_MERGE_NONE, 1124*9f701172SKonstantin Belousov UNLINK_MERGE_NEXT 1125*9f701172SKonstantin Belousov }; 1126*9f701172SKonstantin Belousov 11274e94f402SAlan Cox static void 112899c81ca9SAlan Cox vm_map_entry_unlink(vm_map_t map, 1129*9f701172SKonstantin Belousov vm_map_entry_t entry, 1130*9f701172SKonstantin Belousov enum unlink_merge_type op) 113199c81ca9SAlan Cox { 1132*9f701172SKonstantin Belousov vm_map_entry_t llist, rlist, root, y; 113399c81ca9SAlan Cox 11343a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 1135*9f701172SKonstantin Belousov llist = entry->prev; 1136*9f701172SKonstantin Belousov rlist = entry->next; 1137*9f701172SKonstantin Belousov llist->next = rlist; 1138*9f701172SKonstantin Belousov rlist->prev = llist; 1139*9f701172SKonstantin Belousov root = map->root; 1140*9f701172SKonstantin Belousov root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); 1141*9f701172SKonstantin Belousov KASSERT(root != NULL, 1142*9f701172SKonstantin Belousov ("vm_map_entry_unlink: unlink object not mapped")); 11434e94f402SAlan Cox 1144*9f701172SKonstantin Belousov switch (op) { 1145*9f701172SKonstantin Belousov case UNLINK_MERGE_PREV: 1146*9f701172SKonstantin Belousov vm_map_splay_findprev(root, &llist); 1147*9f701172SKonstantin Belousov llist->end = root->end; 1148*9f701172SKonstantin Belousov y = root->right; 1149*9f701172SKonstantin Belousov root = llist; 1150*9f701172SKonstantin Belousov llist = root->right; 1151*9f701172SKonstantin Belousov root->right = y; 1152*9f701172SKonstantin Belousov break; 1153*9f701172SKonstantin Belousov case UNLINK_MERGE_NEXT: 1154*9f701172SKonstantin Belousov vm_map_splay_findnext(root, &rlist); 1155*9f701172SKonstantin Belousov rlist->start = root->start; 1156*9f701172SKonstantin Belousov rlist->offset = root->offset; 1157*9f701172SKonstantin Belousov y = root->left; 1158*9f701172SKonstantin Belousov root = rlist; 1159*9f701172SKonstantin Belousov rlist = root->left; 1160*9f701172SKonstantin Belousov root->left = y; 1161*9f701172SKonstantin Belousov break; 1162*9f701172SKonstantin Belousov case UNLINK_MERGE_NONE: 1163*9f701172SKonstantin Belousov vm_map_splay_findprev(root, &llist); 1164*9f701172SKonstantin Belousov vm_map_splay_findnext(root, &rlist); 1165*9f701172SKonstantin Belousov if (llist != NULL) { 1166*9f701172SKonstantin Belousov root = llist; 1167*9f701172SKonstantin Belousov llist = root->right; 1168*9f701172SKonstantin Belousov root->right = NULL; 1169*9f701172SKonstantin Belousov } else if (rlist != NULL) { 1170*9f701172SKonstantin Belousov root = rlist; 1171*9f701172SKonstantin Belousov rlist = root->left; 1172*9f701172SKonstantin Belousov root->left = NULL; 1173*9f701172SKonstantin Belousov } else 1174*9f701172SKonstantin Belousov root = NULL; 1175*9f701172SKonstantin Belousov break; 1176*9f701172SKonstantin Belousov } 1177*9f701172SKonstantin Belousov if (root != NULL) 1178*9f701172SKonstantin Belousov root = vm_map_splay_merge(root, llist, rlist, 1179*9f701172SKonstantin Belousov root->left, root->right); 1180*9f701172SKonstantin Belousov map->root = root; 1181*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 118299c81ca9SAlan Cox map->nentries--; 118321c641b2SJohn Baldwin CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 118421c641b2SJohn Baldwin map->nentries, entry); 1185df8bae1dSRodney W. Grimes } 1186df8bae1dSRodney W. Grimes 1187df8bae1dSRodney W. Grimes /* 11880164e057SAlan Cox * vm_map_entry_resize_free: 11890164e057SAlan Cox * 1190*9f701172SKonstantin Belousov * Recompute the amount of free space following a modified vm_map_entry 1191*9f701172SKonstantin Belousov * and propagate those values up the tree. Call this function after 1192*9f701172SKonstantin Belousov * resizing a map entry in-place by changing the end value, without a 1193*9f701172SKonstantin Belousov * call to vm_map_entry_link() or _unlink(). 11940164e057SAlan Cox * 11950164e057SAlan Cox * The map must be locked, and leaves it so. 11960164e057SAlan Cox */ 11970164e057SAlan Cox static void 11980164e057SAlan Cox vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) 11990164e057SAlan Cox { 1200*9f701172SKonstantin Belousov vm_map_entry_t llist, rlist, root; 12010164e057SAlan Cox 1202*9f701172SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 1203*9f701172SKonstantin Belousov root = map->root; 1204*9f701172SKonstantin Belousov root = vm_map_splay_split(entry->start, 0, root, &llist, &rlist); 1205*9f701172SKonstantin Belousov KASSERT(root != NULL, 1206*9f701172SKonstantin Belousov ("vm_map_entry_resize_free: resize_free object not mapped")); 1207*9f701172SKonstantin Belousov vm_map_splay_findnext(root, &rlist); 1208*9f701172SKonstantin Belousov root->right = NULL; 1209*9f701172SKonstantin Belousov map->root = vm_map_splay_merge(root, llist, rlist, 1210*9f701172SKonstantin Belousov root->left, root->right); 1211*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1212*9f701172SKonstantin Belousov CTR3(KTR_VM, "vm_map_entry_resize_free: map %p, nentries %d, entry %p", map, 1213*9f701172SKonstantin Belousov map->nentries, entry); 12140164e057SAlan Cox } 12150164e057SAlan Cox 12160164e057SAlan Cox /* 1217df8bae1dSRodney W. Grimes * vm_map_lookup_entry: [ internal use only ] 1218df8bae1dSRodney W. Grimes * 1219df8bae1dSRodney W. Grimes * Finds the map entry containing (or 1220df8bae1dSRodney W. Grimes * immediately preceding) the specified address 1221df8bae1dSRodney W. Grimes * in the given map; the entry is returned 1222df8bae1dSRodney W. Grimes * in the "entry" parameter. The boolean 1223df8bae1dSRodney W. Grimes * result indicates whether the address is 1224df8bae1dSRodney W. Grimes * actually contained in the map. 1225df8bae1dSRodney W. Grimes */ 12260d94caffSDavid Greenman boolean_t 12271b40f8c0SMatthew Dillon vm_map_lookup_entry( 12281b40f8c0SMatthew Dillon vm_map_t map, 12291b40f8c0SMatthew Dillon vm_offset_t address, 12301b40f8c0SMatthew Dillon vm_map_entry_t *entry) /* OUT */ 1231df8bae1dSRodney W. Grimes { 1232*9f701172SKonstantin Belousov vm_map_entry_t cur, lbound; 123305a8c414SAlan Cox boolean_t locked; 1234df8bae1dSRodney W. Grimes 12354c3ef59eSAlan Cox /* 12364c3ef59eSAlan Cox * If the map is empty, then the map entry immediately preceding 12374c3ef59eSAlan Cox * "address" is the map's header. 12384c3ef59eSAlan Cox */ 12394c3ef59eSAlan Cox cur = map->root; 1240*9f701172SKonstantin Belousov if (cur == NULL) { 12414e94f402SAlan Cox *entry = &map->header; 1242*9f701172SKonstantin Belousov return (FALSE); 1243*9f701172SKonstantin Belousov } 1244*9f701172SKonstantin Belousov if (address >= cur->start && cur->end > address) { 12454c3ef59eSAlan Cox *entry = cur; 12464c3ef59eSAlan Cox return (TRUE); 1247*9f701172SKonstantin Belousov } 1248*9f701172SKonstantin Belousov if ((locked = vm_map_locked(map)) || 124905a8c414SAlan Cox sx_try_upgrade(&map->lock)) { 125005a8c414SAlan Cox /* 125105a8c414SAlan Cox * Splay requires a write lock on the map. However, it only 125205a8c414SAlan Cox * restructures the binary search tree; it does not otherwise 125305a8c414SAlan Cox * change the map. Thus, the map's timestamp need not change 125405a8c414SAlan Cox * on a temporary upgrade. 125505a8c414SAlan Cox */ 12564c3ef59eSAlan Cox map->root = cur = vm_map_entry_splay(address, cur); 1257*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 125805a8c414SAlan Cox if (!locked) 125905a8c414SAlan Cox sx_downgrade(&map->lock); 1260df8bae1dSRodney W. Grimes 12614c3ef59eSAlan Cox /* 12624c3ef59eSAlan Cox * If "address" is contained within a map entry, the new root 12634c3ef59eSAlan Cox * is that map entry. Otherwise, the new root is a map entry 12644c3ef59eSAlan Cox * immediately before or after "address". 12654c3ef59eSAlan Cox */ 1266*9f701172SKonstantin Belousov if (address < cur->start) { 1267*9f701172SKonstantin Belousov *entry = &map->header; 1268*9f701172SKonstantin Belousov return (FALSE); 1269*9f701172SKonstantin Belousov } 1270df8bae1dSRodney W. Grimes *entry = cur; 1271*9f701172SKonstantin Belousov return (address < cur->end); 1272*9f701172SKonstantin Belousov } 127305a8c414SAlan Cox /* 127405a8c414SAlan Cox * Since the map is only locked for read access, perform a 127505a8c414SAlan Cox * standard binary search tree lookup for "address". 127605a8c414SAlan Cox */ 1277*9f701172SKonstantin Belousov lbound = &map->header; 1278*9f701172SKonstantin Belousov do { 127905a8c414SAlan Cox if (address < cur->start) { 128005a8c414SAlan Cox cur = cur->left; 1281*9f701172SKonstantin Belousov } else if (cur->end <= address) { 1282*9f701172SKonstantin Belousov lbound = cur; 1283*9f701172SKonstantin Belousov cur = cur->right; 1284*9f701172SKonstantin Belousov } else { 128505a8c414SAlan Cox *entry = cur; 128605a8c414SAlan Cox return (TRUE); 128705a8c414SAlan Cox } 1288*9f701172SKonstantin Belousov } while (cur != NULL); 1289*9f701172SKonstantin Belousov *entry = lbound; 1290df8bae1dSRodney W. Grimes return (FALSE); 1291df8bae1dSRodney W. Grimes } 1292df8bae1dSRodney W. Grimes 1293df8bae1dSRodney W. Grimes /* 129430dcfc09SJohn Dyson * vm_map_insert: 129530dcfc09SJohn Dyson * 129630dcfc09SJohn Dyson * Inserts the given whole VM object into the target 129730dcfc09SJohn Dyson * map at the specified address range. The object's 129830dcfc09SJohn Dyson * size should match that of the address range. 129930dcfc09SJohn Dyson * 130030dcfc09SJohn Dyson * Requires that the map be locked, and leaves it so. 13012aaeadf8SMatthew Dillon * 13022aaeadf8SMatthew Dillon * If object is non-NULL, ref count must be bumped by caller 13032aaeadf8SMatthew Dillon * prior to making call to account for the new entry. 130430dcfc09SJohn Dyson */ 130530dcfc09SJohn Dyson int 1306b9dcd593SBruce Evans vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 130733314db0SAlan Cox vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) 130830dcfc09SJohn Dyson { 130933314db0SAlan Cox vm_map_entry_t new_entry, prev_entry, temp_entry; 1310ef694c1aSEdward Tomasz Napierala struct ucred *cred; 13111569205fSKonstantin Belousov vm_eflags_t protoeflags; 13128211bd45SKonstantin Belousov vm_inherit_t inheritance; 131330dcfc09SJohn Dyson 13143a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 13152e47807cSJeff Roberson KASSERT(object != kernel_object || 131633314db0SAlan Cox (cow & MAP_COPY_ON_WRITE) == 0, 13172e47807cSJeff Roberson ("vm_map_insert: kernel object and COW")); 131833314db0SAlan Cox KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, 131933314db0SAlan Cox ("vm_map_insert: paradoxical MAP_NOFAULT request")); 132000de6773SKonstantin Belousov KASSERT((prot & ~max) == 0, 132100de6773SKonstantin Belousov ("prot %#x is not subset of max_prot %#x", prot, max)); 13223a0916b8SKonstantin Belousov 132330dcfc09SJohn Dyson /* 132430dcfc09SJohn Dyson * Check that the start and end points are not bogus. 132530dcfc09SJohn Dyson */ 1326f0165b1cSKonstantin Belousov if (start < vm_map_min(map) || end > vm_map_max(map) || 1327f0165b1cSKonstantin Belousov start >= end) 132830dcfc09SJohn Dyson return (KERN_INVALID_ADDRESS); 132930dcfc09SJohn Dyson 133030dcfc09SJohn Dyson /* 133130dcfc09SJohn Dyson * Find the entry prior to the proposed starting address; if it's part 133230dcfc09SJohn Dyson * of an existing entry, this range is bogus. 133330dcfc09SJohn Dyson */ 133430dcfc09SJohn Dyson if (vm_map_lookup_entry(map, start, &temp_entry)) 133530dcfc09SJohn Dyson return (KERN_NO_SPACE); 133630dcfc09SJohn Dyson 133730dcfc09SJohn Dyson prev_entry = temp_entry; 133830dcfc09SJohn Dyson 133930dcfc09SJohn Dyson /* 134030dcfc09SJohn Dyson * Assert that the next entry doesn't overlap the end point. 134130dcfc09SJohn Dyson */ 13421c5196c3SKonstantin Belousov if (prev_entry->next->start < end) 134330dcfc09SJohn Dyson return (KERN_NO_SPACE); 134430dcfc09SJohn Dyson 134519bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL || 134619bd0d9cSKonstantin Belousov max != VM_PROT_NONE)) 134719bd0d9cSKonstantin Belousov return (KERN_INVALID_ARGUMENT); 134819bd0d9cSKonstantin Belousov 1349afa07f7eSJohn Dyson protoeflags = 0; 1350afa07f7eSJohn Dyson if (cow & MAP_COPY_ON_WRITE) 1351e5f13bddSAlan Cox protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; 135233314db0SAlan Cox if (cow & MAP_NOFAULT) 1353afa07f7eSJohn Dyson protoeflags |= MAP_ENTRY_NOFAULT; 13544f79d873SMatthew Dillon if (cow & MAP_DISABLE_SYNCER) 13554f79d873SMatthew Dillon protoeflags |= MAP_ENTRY_NOSYNC; 13569730a5daSPaul Saab if (cow & MAP_DISABLE_COREDUMP) 13579730a5daSPaul Saab protoeflags |= MAP_ENTRY_NOCOREDUMP; 1358712efe66SAlan Cox if (cow & MAP_STACK_GROWS_DOWN) 1359712efe66SAlan Cox protoeflags |= MAP_ENTRY_GROWS_DOWN; 1360712efe66SAlan Cox if (cow & MAP_STACK_GROWS_UP) 1361712efe66SAlan Cox protoeflags |= MAP_ENTRY_GROWS_UP; 136284110e7eSKonstantin Belousov if (cow & MAP_VN_WRITECOUNT) 136384110e7eSKonstantin Belousov protoeflags |= MAP_ENTRY_VN_WRITECNT; 136419bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_GUARD) != 0) 136519bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_GUARD; 136619bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_STACK_GAP_DN) != 0) 136719bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_STACK_GAP_DN; 136819bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_STACK_GAP_UP) != 0) 136919bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_STACK_GAP_UP; 13708211bd45SKonstantin Belousov if (cow & MAP_INHERIT_SHARE) 13718211bd45SKonstantin Belousov inheritance = VM_INHERIT_SHARE; 13728211bd45SKonstantin Belousov else 13738211bd45SKonstantin Belousov inheritance = VM_INHERIT_DEFAULT; 13744f79d873SMatthew Dillon 1375ef694c1aSEdward Tomasz Napierala cred = NULL; 137619bd0d9cSKonstantin Belousov if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) 13773364c323SKonstantin Belousov goto charged; 13783364c323SKonstantin Belousov if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && 13793364c323SKonstantin Belousov ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { 13803364c323SKonstantin Belousov if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) 13813364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 13821569205fSKonstantin Belousov KASSERT(object == NULL || 13831569205fSKonstantin Belousov (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || 1384ef694c1aSEdward Tomasz Napierala object->cred == NULL, 13851569205fSKonstantin Belousov ("overcommit: vm_map_insert o %p", object)); 1386ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 13873364c323SKonstantin Belousov } 13883364c323SKonstantin Belousov 13893364c323SKonstantin Belousov charged: 1390f8616ebfSAlan Cox /* Expand the kernel pmap, if necessary. */ 1391f8616ebfSAlan Cox if (map == kernel_map && end > kernel_vm_end) 1392f8616ebfSAlan Cox pmap_growkernel(end); 13931d284e00SAlan Cox if (object != NULL) { 139430dcfc09SJohn Dyson /* 13951d284e00SAlan Cox * OBJ_ONEMAPPING must be cleared unless this mapping 13961d284e00SAlan Cox * is trivially proven to be the only mapping for any 13971d284e00SAlan Cox * of the object's pages. (Object granularity 13981d284e00SAlan Cox * reference counting is insufficient to recognize 13991d284e00SAlan Cox * aliases with precision.) 140030dcfc09SJohn Dyson */ 140189f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 14021d284e00SAlan Cox if (object->ref_count > 1 || object->shadow_count != 0) 14032aaeadf8SMatthew Dillon vm_object_clear_flag(object, OBJ_ONEMAPPING); 140489f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 14052203c46dSMark Johnston } else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) == 14062203c46dSMark Johnston protoeflags && 1407b5f8c226SKonstantin Belousov (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 && 1408737e25f7SAlan Cox prev_entry->end == start && (prev_entry->cred == cred || 14093364c323SKonstantin Belousov (prev_entry->object.vm_object != NULL && 14101569205fSKonstantin Belousov prev_entry->object.vm_object->cred == cred)) && 14118cc7e047SJohn Dyson vm_object_coalesce(prev_entry->object.vm_object, 141257a21abaSAlan Cox prev_entry->offset, 14138cc7e047SJohn Dyson (vm_size_t)(prev_entry->end - prev_entry->start), 141460169c88SAlan Cox (vm_size_t)(end - prev_entry->end), cred != NULL && 141560169c88SAlan Cox (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) { 141630dcfc09SJohn Dyson /* 14172aaeadf8SMatthew Dillon * We were able to extend the object. Determine if we 14182aaeadf8SMatthew Dillon * can extend the previous map entry to include the 14192aaeadf8SMatthew Dillon * new range as well. 142030dcfc09SJohn Dyson */ 14211569205fSKonstantin Belousov if (prev_entry->inheritance == inheritance && 14221569205fSKonstantin Belousov prev_entry->protection == prot && 1423737e25f7SAlan Cox prev_entry->max_protection == max && 1424737e25f7SAlan Cox prev_entry->wired_count == 0) { 1425737e25f7SAlan Cox KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) == 1426737e25f7SAlan Cox 0, ("prev_entry %p has incoherent wiring", 1427737e25f7SAlan Cox prev_entry)); 142819bd0d9cSKonstantin Belousov if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) 14291569205fSKonstantin Belousov map->size += end - prev_entry->end; 143030dcfc09SJohn Dyson prev_entry->end = end; 14310164e057SAlan Cox vm_map_entry_resize_free(map, prev_entry); 14324e71e795SMatthew Dillon vm_map_simplify_entry(map, prev_entry); 143330dcfc09SJohn Dyson return (KERN_SUCCESS); 143430dcfc09SJohn Dyson } 14358cc7e047SJohn Dyson 14362aaeadf8SMatthew Dillon /* 14372aaeadf8SMatthew Dillon * If we can extend the object but cannot extend the 14382aaeadf8SMatthew Dillon * map entry, we have to create a new map entry. We 14392aaeadf8SMatthew Dillon * must bump the ref count on the extended object to 14404e71e795SMatthew Dillon * account for it. object may be NULL. 14412aaeadf8SMatthew Dillon */ 14422aaeadf8SMatthew Dillon object = prev_entry->object.vm_object; 14432aaeadf8SMatthew Dillon offset = prev_entry->offset + 14442aaeadf8SMatthew Dillon (prev_entry->end - prev_entry->start); 14458cc7e047SJohn Dyson vm_object_reference(object); 1446ef694c1aSEdward Tomasz Napierala if (cred != NULL && object != NULL && object->cred != NULL && 14473364c323SKonstantin Belousov !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 14483364c323SKonstantin Belousov /* Object already accounts for this uid. */ 1449ef694c1aSEdward Tomasz Napierala cred = NULL; 14503364c323SKonstantin Belousov } 1451b18bfc3dSJohn Dyson } 145260169c88SAlan Cox if (cred != NULL) 145360169c88SAlan Cox crhold(cred); 14542aaeadf8SMatthew Dillon 14552aaeadf8SMatthew Dillon /* 145630dcfc09SJohn Dyson * Create a new entry 145730dcfc09SJohn Dyson */ 145830dcfc09SJohn Dyson new_entry = vm_map_entry_create(map); 145930dcfc09SJohn Dyson new_entry->start = start; 146030dcfc09SJohn Dyson new_entry->end = end; 1461ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 146230dcfc09SJohn Dyson 1463afa07f7eSJohn Dyson new_entry->eflags = protoeflags; 146430dcfc09SJohn Dyson new_entry->object.vm_object = object; 146530dcfc09SJohn Dyson new_entry->offset = offset; 14662267af78SJulian Elischer 14678211bd45SKonstantin Belousov new_entry->inheritance = inheritance; 146830dcfc09SJohn Dyson new_entry->protection = prot; 146930dcfc09SJohn Dyson new_entry->max_protection = max; 147030dcfc09SJohn Dyson new_entry->wired_count = 0; 1471997ac690SKonstantin Belousov new_entry->wiring_thread = NULL; 147213458803SAlan Cox new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; 1473381b7242SAlan Cox new_entry->next_read = start; 1474e5f251d2SAlan Cox 1475ef694c1aSEdward Tomasz Napierala KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry), 14761569205fSKonstantin Belousov ("overcommit: vm_map_insert leaks vm_map %p", new_entry)); 1477ef694c1aSEdward Tomasz Napierala new_entry->cred = cred; 14783364c323SKonstantin Belousov 147930dcfc09SJohn Dyson /* 148030dcfc09SJohn Dyson * Insert the new entry into the list 148130dcfc09SJohn Dyson */ 1482*9f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 148319bd0d9cSKonstantin Belousov if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0) 148430dcfc09SJohn Dyson map->size += new_entry->end - new_entry->start; 148530dcfc09SJohn Dyson 14861a484d28SMatthew Dillon /* 1487eaaf9f7fSAlan Cox * Try to coalesce the new entry with both the previous and next 1488eaaf9f7fSAlan Cox * entries in the list. Previously, we only attempted to coalesce 1489eaaf9f7fSAlan Cox * with the previous entry when object is NULL. Here, we handle the 1490eaaf9f7fSAlan Cox * other cases, which are less common. 14911a484d28SMatthew Dillon */ 14924e71e795SMatthew Dillon vm_map_simplify_entry(map, new_entry); 14934e71e795SMatthew Dillon 14941569205fSKonstantin Belousov if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) { 14951569205fSKonstantin Belousov vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset), 14961569205fSKonstantin Belousov end - start, cow & MAP_PREFAULT_PARTIAL); 14974f79d873SMatthew Dillon } 1498e972780aSAlan Cox 149930dcfc09SJohn Dyson return (KERN_SUCCESS); 150030dcfc09SJohn Dyson } 150130dcfc09SJohn Dyson 150230dcfc09SJohn Dyson /* 15030164e057SAlan Cox * vm_map_findspace: 15040164e057SAlan Cox * 15050164e057SAlan Cox * Find the first fit (lowest VM address) for "length" free bytes 15060164e057SAlan Cox * beginning at address >= start in the given map. 15070164e057SAlan Cox * 1508*9f701172SKonstantin Belousov * In a vm_map_entry, "max_free" is the maximum amount of 1509*9f701172SKonstantin Belousov * contiguous free space between an entry in its subtree and a 1510*9f701172SKonstantin Belousov * neighbor of that entry. This allows finding a free region in 1511*9f701172SKonstantin Belousov * one path down the tree, so O(log n) amortized with splay 1512*9f701172SKonstantin Belousov * trees. 15130164e057SAlan Cox * 15140164e057SAlan Cox * The map must be locked, and leaves it so. 15150164e057SAlan Cox * 1516*9f701172SKonstantin Belousov * Returns: starting address if sufficient space, 1517*9f701172SKonstantin Belousov * vm_map_max(map)-length+1 if insufficient space. 1518df8bae1dSRodney W. Grimes */ 1519*9f701172SKonstantin Belousov vm_offset_t 1520*9f701172SKonstantin Belousov vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length) 1521df8bae1dSRodney W. Grimes { 1522*9f701172SKonstantin Belousov vm_map_entry_t llist, rlist, root, y; 1523*9f701172SKonstantin Belousov vm_size_t left_length; 1524df8bae1dSRodney W. Grimes 1525986b43f8SAlan Cox /* 1526986b43f8SAlan Cox * Request must fit within min/max VM address and must avoid 1527986b43f8SAlan Cox * address wrap. 1528986b43f8SAlan Cox */ 1529f0165b1cSKonstantin Belousov start = MAX(start, vm_map_min(map)); 1530f0165b1cSKonstantin Belousov if (start + length > vm_map_max(map) || start + length < start) 1531*9f701172SKonstantin Belousov return (vm_map_max(map) - length + 1); 1532df8bae1dSRodney W. Grimes 15330164e057SAlan Cox /* Empty tree means wide open address space. */ 1534*9f701172SKonstantin Belousov if (map->root == NULL) 1535*9f701172SKonstantin Belousov return (start); 15360164e057SAlan Cox 15370164e057SAlan Cox /* 15380164e057SAlan Cox * After splay, if start comes before root node, then there 15390164e057SAlan Cox * must be a gap from start to the root. 15400164e057SAlan Cox */ 1541*9f701172SKonstantin Belousov root = vm_map_splay_split(start, length, map->root, 1542*9f701172SKonstantin Belousov &llist, &rlist); 1543*9f701172SKonstantin Belousov if (root != NULL) 1544*9f701172SKonstantin Belousov start = root->end; 1545*9f701172SKonstantin Belousov else if (rlist != NULL) { 1546*9f701172SKonstantin Belousov root = rlist; 1547*9f701172SKonstantin Belousov rlist = root->left; 1548*9f701172SKonstantin Belousov root->left = NULL; 1549*9f701172SKonstantin Belousov } else { 1550*9f701172SKonstantin Belousov root = llist; 1551*9f701172SKonstantin Belousov llist = root->right; 1552*9f701172SKonstantin Belousov root->right = NULL; 15530164e057SAlan Cox } 1554*9f701172SKonstantin Belousov map->root = vm_map_splay_merge(root, llist, rlist, 1555*9f701172SKonstantin Belousov root->left, root->right); 1556*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1557*9f701172SKonstantin Belousov if (start + length <= root->start) 1558*9f701172SKonstantin Belousov return (start); 15590164e057SAlan Cox 15600164e057SAlan Cox /* 15610164e057SAlan Cox * Root is the last node that might begin its gap before 1562986b43f8SAlan Cox * start, and this is the last comparison where address 1563986b43f8SAlan Cox * wrap might be a problem. 15640164e057SAlan Cox */ 1565*9f701172SKonstantin Belousov if (root->right == NULL && 1566*9f701172SKonstantin Belousov start + length <= vm_map_max(map)) 1567*9f701172SKonstantin Belousov return (start); 15680164e057SAlan Cox 15690164e057SAlan Cox /* With max_free, can immediately tell if no solution. */ 1570*9f701172SKonstantin Belousov if (root->right == NULL || length > root->right->max_free) 1571*9f701172SKonstantin Belousov return (vm_map_max(map) - length + 1); 15720164e057SAlan Cox 15730164e057SAlan Cox /* 1574*9f701172SKonstantin Belousov * Splay for the least large-enough gap in the right subtree. 15750164e057SAlan Cox */ 1576*9f701172SKonstantin Belousov llist = NULL; 1577*9f701172SKonstantin Belousov rlist = NULL; 1578*9f701172SKonstantin Belousov for (left_length = 0; ; 1579*9f701172SKonstantin Belousov left_length = root->left != NULL ? 1580*9f701172SKonstantin Belousov root->left->max_free : root->start - llist->end) { 1581*9f701172SKonstantin Belousov if (length <= left_length) 1582*9f701172SKonstantin Belousov SPLAY_LEFT_STEP(root, y, rlist, 1583*9f701172SKonstantin Belousov length <= (y->left != NULL ? 1584*9f701172SKonstantin Belousov y->left->max_free : y->start - llist->end)); 1585*9f701172SKonstantin Belousov else 1586*9f701172SKonstantin Belousov SPLAY_RIGHT_STEP(root, y, llist, 1587*9f701172SKonstantin Belousov length > (y->left != NULL ? 1588*9f701172SKonstantin Belousov y->left->max_free : y->start - root->end)); 1589*9f701172SKonstantin Belousov if (root == NULL) 1590*9f701172SKonstantin Belousov break; 15910164e057SAlan Cox } 1592*9f701172SKonstantin Belousov root = llist; 1593*9f701172SKonstantin Belousov llist = root->right; 1594*9f701172SKonstantin Belousov if ((y = rlist) == NULL) 1595*9f701172SKonstantin Belousov root->right = NULL; 1596*9f701172SKonstantin Belousov else { 1597*9f701172SKonstantin Belousov rlist = y->left; 1598*9f701172SKonstantin Belousov y->left = NULL; 1599*9f701172SKonstantin Belousov root->right = y->right; 1600*9f701172SKonstantin Belousov } 1601*9f701172SKonstantin Belousov root = vm_map_splay_merge(root, llist, rlist, 1602*9f701172SKonstantin Belousov root->left, root->right); 1603*9f701172SKonstantin Belousov if (y != NULL) { 1604*9f701172SKonstantin Belousov y->right = root->right; 1605*9f701172SKonstantin Belousov vm_map_entry_set_max_free(y); 1606*9f701172SKonstantin Belousov root->right = y; 1607*9f701172SKonstantin Belousov vm_map_entry_set_max_free(root); 1608*9f701172SKonstantin Belousov } 1609*9f701172SKonstantin Belousov map->root = root; 1610*9f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1611*9f701172SKonstantin Belousov return (root->end); 1612df8bae1dSRodney W. Grimes } 1613df8bae1dSRodney W. Grimes 1614d239bd3cSKonstantin Belousov int 1615d239bd3cSKonstantin Belousov vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1616b8ca4ef2SAlan Cox vm_offset_t start, vm_size_t length, vm_prot_t prot, 1617d239bd3cSKonstantin Belousov vm_prot_t max, int cow) 1618d239bd3cSKonstantin Belousov { 1619b8ca4ef2SAlan Cox vm_offset_t end; 1620d239bd3cSKonstantin Belousov int result; 1621d239bd3cSKonstantin Belousov 1622d239bd3cSKonstantin Belousov end = start + length; 16234648ba0aSKonstantin Belousov KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 16244648ba0aSKonstantin Belousov object == NULL, 16254648ba0aSKonstantin Belousov ("vm_map_fixed: non-NULL backing object for stack")); 1626897d81a0SKonstantin Belousov vm_map_lock(map); 1627d239bd3cSKonstantin Belousov VM_MAP_RANGE_CHECK(map, start, end); 162811c42bccSKonstantin Belousov if ((cow & MAP_CHECK_EXCL) == 0) 162911c42bccSKonstantin Belousov vm_map_delete(map, start, end); 16304648ba0aSKonstantin Belousov if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 16314648ba0aSKonstantin Belousov result = vm_map_stack_locked(map, start, length, sgrowsiz, 16324648ba0aSKonstantin Belousov prot, max, cow); 16334648ba0aSKonstantin Belousov } else { 16344648ba0aSKonstantin Belousov result = vm_map_insert(map, object, offset, start, end, 16354648ba0aSKonstantin Belousov prot, max, cow); 16364648ba0aSKonstantin Belousov } 1637d239bd3cSKonstantin Belousov vm_map_unlock(map); 1638d239bd3cSKonstantin Belousov return (result); 1639d239bd3cSKonstantin Belousov } 1640d239bd3cSKonstantin Belousov 1641fa50a355SKonstantin Belousov static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; 1642fa50a355SKonstantin Belousov static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; 1643fa50a355SKonstantin Belousov 1644fa50a355SKonstantin Belousov static int cluster_anon = 1; 1645fa50a355SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, 1646fa50a355SKonstantin Belousov &cluster_anon, 0, 1647484e9d03SKonstantin Belousov "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); 1648484e9d03SKonstantin Belousov 1649484e9d03SKonstantin Belousov static bool 1650484e9d03SKonstantin Belousov clustering_anon_allowed(vm_offset_t addr) 1651484e9d03SKonstantin Belousov { 1652484e9d03SKonstantin Belousov 1653484e9d03SKonstantin Belousov switch (cluster_anon) { 1654484e9d03SKonstantin Belousov case 0: 1655484e9d03SKonstantin Belousov return (false); 1656484e9d03SKonstantin Belousov case 1: 1657484e9d03SKonstantin Belousov return (addr == 0); 1658484e9d03SKonstantin Belousov case 2: 1659484e9d03SKonstantin Belousov default: 1660484e9d03SKonstantin Belousov return (true); 1661484e9d03SKonstantin Belousov } 1662484e9d03SKonstantin Belousov } 1663fa50a355SKonstantin Belousov 1664fa50a355SKonstantin Belousov static long aslr_restarts; 1665fa50a355SKonstantin Belousov SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, 1666fa50a355SKonstantin Belousov &aslr_restarts, 0, 1667fa50a355SKonstantin Belousov "Number of aslr failures"); 1668fa50a355SKonstantin Belousov 1669fa50a355SKonstantin Belousov #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 1670fa50a355SKonstantin Belousov 1671df8bae1dSRodney W. Grimes /* 1672fec29688SAlan Cox * Searches for the specified amount of free space in the given map with the 1673fec29688SAlan Cox * specified alignment. Performs an address-ordered, first-fit search from 1674fec29688SAlan Cox * the given address "*addr", with an optional upper bound "max_addr". If the 1675fec29688SAlan Cox * parameter "alignment" is zero, then the alignment is computed from the 1676fec29688SAlan Cox * given (object, offset) pair so as to enable the greatest possible use of 1677fec29688SAlan Cox * superpage mappings. Returns KERN_SUCCESS and the address of the free space 1678fec29688SAlan Cox * in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. 1679fec29688SAlan Cox * 1680fec29688SAlan Cox * The map must be locked. Initially, there must be at least "length" bytes 1681fec29688SAlan Cox * of free space at the given address. 1682fec29688SAlan Cox */ 1683fec29688SAlan Cox static int 1684fec29688SAlan Cox vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1685fec29688SAlan Cox vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, 1686fec29688SAlan Cox vm_offset_t alignment) 1687fec29688SAlan Cox { 1688fec29688SAlan Cox vm_offset_t aligned_addr, free_addr; 1689fec29688SAlan Cox 1690fec29688SAlan Cox VM_MAP_ASSERT_LOCKED(map); 1691fec29688SAlan Cox free_addr = *addr; 1692*9f701172SKonstantin Belousov KASSERT(free_addr == vm_map_findspace(map, free_addr, length), 1693*9f701172SKonstantin Belousov ("caller failed to provide space %d at address %p", 1694*9f701172SKonstantin Belousov (int)length, (void*)free_addr)); 1695fec29688SAlan Cox for (;;) { 1696fec29688SAlan Cox /* 1697fec29688SAlan Cox * At the start of every iteration, the free space at address 1698fec29688SAlan Cox * "*addr" is at least "length" bytes. 1699fec29688SAlan Cox */ 1700fec29688SAlan Cox if (alignment == 0) 1701fec29688SAlan Cox pmap_align_superpage(object, offset, addr, length); 1702fec29688SAlan Cox else if ((*addr & (alignment - 1)) != 0) { 1703fec29688SAlan Cox *addr &= ~(alignment - 1); 1704fec29688SAlan Cox *addr += alignment; 1705fec29688SAlan Cox } 1706fec29688SAlan Cox aligned_addr = *addr; 1707fec29688SAlan Cox if (aligned_addr == free_addr) { 1708fec29688SAlan Cox /* 1709fec29688SAlan Cox * Alignment did not change "*addr", so "*addr" must 1710fec29688SAlan Cox * still provide sufficient free space. 1711fec29688SAlan Cox */ 1712fec29688SAlan Cox return (KERN_SUCCESS); 1713fec29688SAlan Cox } 1714fec29688SAlan Cox 1715fec29688SAlan Cox /* 1716fec29688SAlan Cox * Test for address wrap on "*addr". A wrapped "*addr" could 1717fec29688SAlan Cox * be a valid address, in which case vm_map_findspace() cannot 1718fec29688SAlan Cox * be relied upon to fail. 1719fec29688SAlan Cox */ 1720*9f701172SKonstantin Belousov if (aligned_addr < free_addr) 1721*9f701172SKonstantin Belousov return (KERN_NO_SPACE); 1722*9f701172SKonstantin Belousov *addr = vm_map_findspace(map, aligned_addr, length); 1723*9f701172SKonstantin Belousov if (*addr + length > vm_map_max(map) || 1724fec29688SAlan Cox (max_addr != 0 && *addr + length > max_addr)) 1725fec29688SAlan Cox return (KERN_NO_SPACE); 1726fec29688SAlan Cox free_addr = *addr; 1727fec29688SAlan Cox if (free_addr == aligned_addr) { 1728fec29688SAlan Cox /* 1729fec29688SAlan Cox * If a successful call to vm_map_findspace() did not 1730fec29688SAlan Cox * change "*addr", then "*addr" must still be aligned 1731fec29688SAlan Cox * and provide sufficient free space. 1732fec29688SAlan Cox */ 1733fec29688SAlan Cox return (KERN_SUCCESS); 1734fec29688SAlan Cox } 1735fec29688SAlan Cox } 1736fec29688SAlan Cox } 1737fec29688SAlan Cox 1738fec29688SAlan Cox /* 1739df8bae1dSRodney W. Grimes * vm_map_find finds an unallocated region in the target address 1740df8bae1dSRodney W. Grimes * map with the given length. The search is defined to be 1741df8bae1dSRodney W. Grimes * first-fit from the specified address; the region found is 1742df8bae1dSRodney W. Grimes * returned in the same parameter. 1743df8bae1dSRodney W. Grimes * 17442aaeadf8SMatthew Dillon * If object is non-NULL, ref count must be bumped by caller 17452aaeadf8SMatthew Dillon * prior to making call to account for the new entry. 1746df8bae1dSRodney W. Grimes */ 1747df8bae1dSRodney W. Grimes int 1748b9dcd593SBruce Evans vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1749b9dcd593SBruce Evans vm_offset_t *addr, /* IN/OUT */ 1750edb572a3SJohn Baldwin vm_size_t length, vm_offset_t max_addr, int find_space, 1751edb572a3SJohn Baldwin vm_prot_t prot, vm_prot_t max, int cow) 1752df8bae1dSRodney W. Grimes { 1753fa50a355SKonstantin Belousov vm_offset_t alignment, curr_min_addr, min_addr; 1754fa50a355SKonstantin Belousov int gap, pidx, rv, try; 1755fa50a355SKonstantin Belousov bool cluster, en_aslr, update_anon; 1756df8bae1dSRodney W. Grimes 17574648ba0aSKonstantin Belousov KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 17584648ba0aSKonstantin Belousov object == NULL, 17594648ba0aSKonstantin Belousov ("vm_map_find: non-NULL backing object for stack")); 1760ea7e7006SKonstantin Belousov MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE && 1761ea7e7006SKonstantin Belousov (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0)); 1762ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || 1763ff74a3faSJohn Baldwin (object->flags & OBJ_COLORED) == 0)) 1764ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 17655aa60b6fSJohn Baldwin if (find_space >> 8 != 0) { 17665aa60b6fSJohn Baldwin KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); 17675aa60b6fSJohn Baldwin alignment = (vm_offset_t)1 << (find_space >> 8); 17685aa60b6fSJohn Baldwin } else 17695aa60b6fSJohn Baldwin alignment = 0; 1770fa50a355SKonstantin Belousov en_aslr = (map->flags & MAP_ASLR) != 0; 1771484e9d03SKonstantin Belousov update_anon = cluster = clustering_anon_allowed(*addr) && 1772fa50a355SKonstantin Belousov (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && 1773fa50a355SKonstantin Belousov find_space != VMFS_NO_SPACE && object == NULL && 1774fa50a355SKonstantin Belousov (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | 1775fa50a355SKonstantin Belousov MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; 1776fa50a355SKonstantin Belousov curr_min_addr = min_addr = *addr; 1777fa50a355SKonstantin Belousov if (en_aslr && min_addr == 0 && !cluster && 1778fa50a355SKonstantin Belousov find_space != VMFS_NO_SPACE && 1779fa50a355SKonstantin Belousov (map->flags & MAP_ASLR_IGNSTART) != 0) 1780fa50a355SKonstantin Belousov curr_min_addr = min_addr = vm_map_min(map); 1781fa50a355SKonstantin Belousov try = 0; 17824d572bb3SAlan Cox vm_map_lock(map); 1783fa50a355SKonstantin Belousov if (cluster) { 1784fa50a355SKonstantin Belousov curr_min_addr = map->anon_loc; 1785fa50a355SKonstantin Belousov if (curr_min_addr == 0) 1786fa50a355SKonstantin Belousov cluster = false; 1787fa50a355SKonstantin Belousov } 178826c538ffSAlan Cox if (find_space != VMFS_NO_SPACE) { 1789fec29688SAlan Cox KASSERT(find_space == VMFS_ANY_SPACE || 1790fec29688SAlan Cox find_space == VMFS_OPTIMAL_SPACE || 1791fec29688SAlan Cox find_space == VMFS_SUPER_SPACE || 1792fec29688SAlan Cox alignment != 0, ("unexpected VMFS flag")); 1793fec29688SAlan Cox again: 1794fa50a355SKonstantin Belousov /* 1795fa50a355SKonstantin Belousov * When creating an anonymous mapping, try clustering 1796fa50a355SKonstantin Belousov * with an existing anonymous mapping first. 1797fa50a355SKonstantin Belousov * 1798fa50a355SKonstantin Belousov * We make up to two attempts to find address space 1799fa50a355SKonstantin Belousov * for a given find_space value. The first attempt may 1800fa50a355SKonstantin Belousov * apply randomization or may cluster with an existing 1801fa50a355SKonstantin Belousov * anonymous mapping. If this first attempt fails, 1802fa50a355SKonstantin Belousov * perform a first-fit search of the available address 1803fa50a355SKonstantin Belousov * space. 1804fa50a355SKonstantin Belousov * 1805fa50a355SKonstantin Belousov * If all tries failed, and find_space is 1806fa50a355SKonstantin Belousov * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. 1807fa50a355SKonstantin Belousov * Again enable clustering and randomization. 1808fa50a355SKonstantin Belousov */ 1809fa50a355SKonstantin Belousov try++; 1810fa50a355SKonstantin Belousov MPASS(try <= 2); 1811fa50a355SKonstantin Belousov 1812fa50a355SKonstantin Belousov if (try == 2) { 1813fa50a355SKonstantin Belousov /* 1814fa50a355SKonstantin Belousov * Second try: we failed either to find a 1815fa50a355SKonstantin Belousov * suitable region for randomizing the 1816fa50a355SKonstantin Belousov * allocation, or to cluster with an existing 1817fa50a355SKonstantin Belousov * mapping. Retry with free run. 1818fa50a355SKonstantin Belousov */ 1819fa50a355SKonstantin Belousov curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? 1820fa50a355SKonstantin Belousov vm_map_min(map) : min_addr; 1821fa50a355SKonstantin Belousov atomic_add_long(&aslr_restarts, 1); 1822fa50a355SKonstantin Belousov } 1823fa50a355SKonstantin Belousov 1824fa50a355SKonstantin Belousov if (try == 1 && en_aslr && !cluster) { 1825fa50a355SKonstantin Belousov /* 1826fa50a355SKonstantin Belousov * Find space for allocation, including 1827fa50a355SKonstantin Belousov * gap needed for later randomization. 1828fa50a355SKonstantin Belousov */ 1829fa50a355SKonstantin Belousov pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && 1830fa50a355SKonstantin Belousov (find_space == VMFS_SUPER_SPACE || find_space == 1831fa50a355SKonstantin Belousov VMFS_OPTIMAL_SPACE) ? 1 : 0; 1832fa50a355SKonstantin Belousov gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && 1833fa50a355SKonstantin Belousov (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? 1834fa50a355SKonstantin Belousov aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; 1835*9f701172SKonstantin Belousov *addr = vm_map_findspace(map, curr_min_addr, 1836*9f701172SKonstantin Belousov length + gap * pagesizes[pidx]); 1837*9f701172SKonstantin Belousov if (*addr + length + gap * pagesizes[pidx] > 1838*9f701172SKonstantin Belousov + vm_map_max(map)) 1839fa50a355SKonstantin Belousov goto again; 1840fa50a355SKonstantin Belousov /* And randomize the start address. */ 1841fa50a355SKonstantin Belousov *addr += (arc4random() % gap) * pagesizes[pidx]; 18425019dac9SKonstantin Belousov if (max_addr != 0 && *addr + length > max_addr) 18435019dac9SKonstantin Belousov goto again; 1844*9f701172SKonstantin Belousov } else { 1845*9f701172SKonstantin Belousov *addr = vm_map_findspace(map, curr_min_addr, length); 1846*9f701172SKonstantin Belousov if (*addr + length > vm_map_max(map) || 1847edb572a3SJohn Baldwin (max_addr != 0 && *addr + length > max_addr)) { 1848fa50a355SKonstantin Belousov if (cluster) { 1849fa50a355SKonstantin Belousov cluster = false; 1850fa50a355SKonstantin Belousov MPASS(try == 1); 1851fa50a355SKonstantin Belousov goto again; 1852fa50a355SKonstantin Belousov } 1853fec29688SAlan Cox rv = KERN_NO_SPACE; 1854fec29688SAlan Cox goto done; 1855fec29688SAlan Cox } 1856*9f701172SKonstantin Belousov } 1857fa50a355SKonstantin Belousov 1858fec29688SAlan Cox if (find_space != VMFS_ANY_SPACE && 1859fec29688SAlan Cox (rv = vm_map_alignspace(map, object, offset, addr, length, 1860fec29688SAlan Cox max_addr, alignment)) != KERN_SUCCESS) { 1861ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE) { 1862ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 1863fa50a355SKonstantin Belousov curr_min_addr = min_addr; 1864fa50a355SKonstantin Belousov cluster = update_anon; 1865fa50a355SKonstantin Belousov try = 0; 1866ff74a3faSJohn Baldwin goto again; 1867ff74a3faSJohn Baldwin } 1868fec29688SAlan Cox goto done; 1869df8bae1dSRodney W. Grimes } 1870ea7e7006SKonstantin Belousov } else if ((cow & MAP_REMAP) != 0) { 1871ea7e7006SKonstantin Belousov if (*addr < vm_map_min(map) || 1872ea7e7006SKonstantin Belousov *addr + length > vm_map_max(map) || 1873ea7e7006SKonstantin Belousov *addr + length <= length) { 1874ea7e7006SKonstantin Belousov rv = KERN_INVALID_ADDRESS; 1875ea7e7006SKonstantin Belousov goto done; 1876ea7e7006SKonstantin Belousov } 1877ea7e7006SKonstantin Belousov vm_map_delete(map, *addr, *addr + length); 1878df8bae1dSRodney W. Grimes } 18794648ba0aSKonstantin Belousov if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 1880fec29688SAlan Cox rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, 1881fec29688SAlan Cox max, cow); 18824648ba0aSKonstantin Belousov } else { 1883fec29688SAlan Cox rv = vm_map_insert(map, object, offset, *addr, *addr + length, 1884fec29688SAlan Cox prot, max, cow); 18854648ba0aSKonstantin Belousov } 1886fa50a355SKonstantin Belousov if (rv == KERN_SUCCESS && update_anon) 1887fa50a355SKonstantin Belousov map->anon_loc = *addr + length; 1888fec29688SAlan Cox done: 1889df8bae1dSRodney W. Grimes vm_map_unlock(map); 1890fec29688SAlan Cox return (rv); 1891df8bae1dSRodney W. Grimes } 1892df8bae1dSRodney W. Grimes 1893e8502826SKonstantin Belousov /* 1894e8502826SKonstantin Belousov * vm_map_find_min() is a variant of vm_map_find() that takes an 1895e8502826SKonstantin Belousov * additional parameter (min_addr) and treats the given address 1896e8502826SKonstantin Belousov * (*addr) differently. Specifically, it treats *addr as a hint 1897e8502826SKonstantin Belousov * and not as the minimum address where the mapping is created. 1898e8502826SKonstantin Belousov * 1899e8502826SKonstantin Belousov * This function works in two phases. First, it tries to 1900e8502826SKonstantin Belousov * allocate above the hint. If that fails and the hint is 1901e8502826SKonstantin Belousov * greater than min_addr, it performs a second pass, replacing 1902e8502826SKonstantin Belousov * the hint with min_addr as the minimum address for the 1903e8502826SKonstantin Belousov * allocation. 1904e8502826SKonstantin Belousov */ 19056a97a3f7SKonstantin Belousov int 19066a97a3f7SKonstantin Belousov vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 19076a97a3f7SKonstantin Belousov vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, 19086a97a3f7SKonstantin Belousov vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, 19096a97a3f7SKonstantin Belousov int cow) 19106a97a3f7SKonstantin Belousov { 19116a97a3f7SKonstantin Belousov vm_offset_t hint; 19126a97a3f7SKonstantin Belousov int rv; 19136a97a3f7SKonstantin Belousov 19146a97a3f7SKonstantin Belousov hint = *addr; 19156a97a3f7SKonstantin Belousov for (;;) { 19166a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, offset, addr, length, max_addr, 19176a97a3f7SKonstantin Belousov find_space, prot, max, cow); 19186a97a3f7SKonstantin Belousov if (rv == KERN_SUCCESS || min_addr >= hint) 19196a97a3f7SKonstantin Belousov return (rv); 19207683ad70SKonstantin Belousov *addr = hint = min_addr; 19216a97a3f7SKonstantin Belousov } 19226a97a3f7SKonstantin Belousov } 19236a97a3f7SKonstantin Belousov 192492e78c10SAlan Cox /* 192592e78c10SAlan Cox * A map entry with any of the following flags set must not be merged with 192692e78c10SAlan Cox * another entry. 192792e78c10SAlan Cox */ 192892e78c10SAlan Cox #define MAP_ENTRY_NOMERGE_MASK (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \ 192992e78c10SAlan Cox MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP) 193092e78c10SAlan Cox 193107424462SKonstantin Belousov static bool 193207424462SKonstantin Belousov vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry) 193307424462SKonstantin Belousov { 193407424462SKonstantin Belousov 193592e78c10SAlan Cox KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 || 193692e78c10SAlan Cox (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0, 193792e78c10SAlan Cox ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable", 193892e78c10SAlan Cox prev, entry)); 193907424462SKonstantin Belousov return (prev->end == entry->start && 194007424462SKonstantin Belousov prev->object.vm_object == entry->object.vm_object && 194107424462SKonstantin Belousov (prev->object.vm_object == NULL || 194292e78c10SAlan Cox prev->offset + (prev->end - prev->start) == entry->offset) && 194307424462SKonstantin Belousov prev->eflags == entry->eflags && 194407424462SKonstantin Belousov prev->protection == entry->protection && 194507424462SKonstantin Belousov prev->max_protection == entry->max_protection && 194607424462SKonstantin Belousov prev->inheritance == entry->inheritance && 194707424462SKonstantin Belousov prev->wired_count == entry->wired_count && 194807424462SKonstantin Belousov prev->cred == entry->cred); 194907424462SKonstantin Belousov } 195007424462SKonstantin Belousov 195107424462SKonstantin Belousov static void 195207424462SKonstantin Belousov vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry) 195307424462SKonstantin Belousov { 195407424462SKonstantin Belousov 195507424462SKonstantin Belousov /* 195692e78c10SAlan Cox * If the backing object is a vnode object, vm_object_deallocate() 195792e78c10SAlan Cox * calls vrele(). However, vrele() does not lock the vnode because 195892e78c10SAlan Cox * the vnode has additional references. Thus, the map lock can be 195992e78c10SAlan Cox * kept without causing a lock-order reversal with the vnode lock. 196007424462SKonstantin Belousov * 196192e78c10SAlan Cox * Since we count the number of virtual page mappings in 196292e78c10SAlan Cox * object->un_pager.vnp.writemappings, the writemappings value 196392e78c10SAlan Cox * should not be adjusted when the entry is disposed of. 196407424462SKonstantin Belousov */ 196507424462SKonstantin Belousov if (entry->object.vm_object != NULL) 196607424462SKonstantin Belousov vm_object_deallocate(entry->object.vm_object); 196707424462SKonstantin Belousov if (entry->cred != NULL) 196807424462SKonstantin Belousov crfree(entry->cred); 196907424462SKonstantin Belousov vm_map_entry_dispose(map, entry); 197007424462SKonstantin Belousov } 197107424462SKonstantin Belousov 1972df8bae1dSRodney W. Grimes /* 1973b7b2aac2SJohn Dyson * vm_map_simplify_entry: 197467bf6868SJohn Dyson * 19754e71e795SMatthew Dillon * Simplify the given map entry by merging with either neighbor. This 19764e71e795SMatthew Dillon * routine also has the ability to merge with both neighbors. 19774e71e795SMatthew Dillon * 19784e71e795SMatthew Dillon * The map must be locked. 19794e71e795SMatthew Dillon * 1980ba7c64d1SKonstantin Belousov * This routine guarantees that the passed entry remains valid (though 19814e71e795SMatthew Dillon * possibly extended). When merging, this routine may delete one or 19824e71e795SMatthew Dillon * both neighbors. 1983df8bae1dSRodney W. Grimes */ 19840afcd3afSAlan Cox void 19851b40f8c0SMatthew Dillon vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) 1986df8bae1dSRodney W. Grimes { 1987308c24baSJohn Dyson vm_map_entry_t next, prev; 1988df8bae1dSRodney W. Grimes 198992e78c10SAlan Cox if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) != 0) 1990df8bae1dSRodney W. Grimes return; 1991308c24baSJohn Dyson prev = entry->prev; 19922203c46dSMark Johnston if (vm_map_mergeable_neighbors(prev, entry)) { 1993*9f701172SKonstantin Belousov vm_map_entry_unlink(map, prev, UNLINK_MERGE_NEXT); 199407424462SKonstantin Belousov vm_map_merged_neighbor_dispose(map, prev); 1995308c24baSJohn Dyson } 1996de5f6a77SJohn Dyson next = entry->next; 19972203c46dSMark Johnston if (vm_map_mergeable_neighbors(entry, next)) { 1998*9f701172SKonstantin Belousov vm_map_entry_unlink(map, next, UNLINK_MERGE_PREV); 199907424462SKonstantin Belousov vm_map_merged_neighbor_dispose(map, next); 2000df8bae1dSRodney W. Grimes } 2001df8bae1dSRodney W. Grimes } 200292e78c10SAlan Cox 2003df8bae1dSRodney W. Grimes /* 2004df8bae1dSRodney W. Grimes * vm_map_clip_start: [ internal use only ] 2005df8bae1dSRodney W. Grimes * 2006df8bae1dSRodney W. Grimes * Asserts that the given entry begins at or after 2007df8bae1dSRodney W. Grimes * the specified address; if necessary, 2008df8bae1dSRodney W. Grimes * it splits the entry into two. 2009df8bae1dSRodney W. Grimes */ 2010df8bae1dSRodney W. Grimes #define vm_map_clip_start(map, entry, startaddr) \ 2011df8bae1dSRodney W. Grimes { \ 2012df8bae1dSRodney W. Grimes if (startaddr > entry->start) \ 2013df8bae1dSRodney W. Grimes _vm_map_clip_start(map, entry, startaddr); \ 2014df8bae1dSRodney W. Grimes } 2015df8bae1dSRodney W. Grimes 2016df8bae1dSRodney W. Grimes /* 2017df8bae1dSRodney W. Grimes * This routine is called only when it is known that 2018df8bae1dSRodney W. Grimes * the entry must be split. 2019df8bae1dSRodney W. Grimes */ 20200d94caffSDavid Greenman static void 20211b40f8c0SMatthew Dillon _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) 2022df8bae1dSRodney W. Grimes { 2023c0877f10SJohn Dyson vm_map_entry_t new_entry; 2024df8bae1dSRodney W. Grimes 20253a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 2026ed11e4d7SMark Johnston KASSERT(entry->end > start && entry->start < start, 2027ed11e4d7SMark Johnston ("_vm_map_clip_start: invalid clip of entry %p", entry)); 20283a0916b8SKonstantin Belousov 2029df8bae1dSRodney W. Grimes /* 20300d94caffSDavid Greenman * Split off the front portion -- note that we must insert the new 20310d94caffSDavid Greenman * entry BEFORE this one, so that this entry has the specified 20320d94caffSDavid Greenman * starting address. 2033df8bae1dSRodney W. Grimes */ 2034f32dbbeeSJohn Dyson vm_map_simplify_entry(map, entry); 2035f32dbbeeSJohn Dyson 203611cccda1SJohn Dyson /* 203711cccda1SJohn Dyson * If there is no object backing this entry, we might as well create 203811cccda1SJohn Dyson * one now. If we defer it, an object can get created after the map 203911cccda1SJohn Dyson * is clipped, and individual objects will be created for the split-up 204011cccda1SJohn Dyson * map. This is a bit of a hack, but is also about the best place to 204111cccda1SJohn Dyson * put this improvement. 204211cccda1SJohn Dyson */ 204319bd0d9cSKonstantin Belousov if (entry->object.vm_object == NULL && !map->system_map && 204419bd0d9cSKonstantin Belousov (entry->eflags & MAP_ENTRY_GUARD) == 0) { 204511cccda1SJohn Dyson vm_object_t object; 204611cccda1SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 2047c2e11a03SJohn Dyson atop(entry->end - entry->start)); 204811cccda1SJohn Dyson entry->object.vm_object = object; 204911cccda1SJohn Dyson entry->offset = 0; 2050ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 2051ef694c1aSEdward Tomasz Napierala object->cred = entry->cred; 20523364c323SKonstantin Belousov object->charge = entry->end - entry->start; 2053ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 20543364c323SKonstantin Belousov } 20553364c323SKonstantin Belousov } else if (entry->object.vm_object != NULL && 20563364c323SKonstantin Belousov ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 2057ef694c1aSEdward Tomasz Napierala entry->cred != NULL) { 205889f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 2059ef694c1aSEdward Tomasz Napierala KASSERT(entry->object.vm_object->cred == NULL, 2060ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry)); 2061ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 20623364c323SKonstantin Belousov entry->object.vm_object->charge = entry->end - entry->start; 206389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 2064ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 206511cccda1SJohn Dyson } 206611cccda1SJohn Dyson 2067df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(map); 2068df8bae1dSRodney W. Grimes *new_entry = *entry; 2069df8bae1dSRodney W. Grimes 2070df8bae1dSRodney W. Grimes new_entry->end = start; 2071df8bae1dSRodney W. Grimes entry->offset += (start - entry->start); 2072df8bae1dSRodney W. Grimes entry->start = start; 2073ef694c1aSEdward Tomasz Napierala if (new_entry->cred != NULL) 2074ef694c1aSEdward Tomasz Napierala crhold(entry->cred); 2075df8bae1dSRodney W. Grimes 2076*9f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 2077df8bae1dSRodney W. Grimes 20789fdfe602SMatthew Dillon if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2079df8bae1dSRodney W. Grimes vm_object_reference(new_entry->object.vm_object); 208084110e7eSKonstantin Belousov /* 208184110e7eSKonstantin Belousov * The object->un_pager.vnp.writemappings for the 208284110e7eSKonstantin Belousov * object of MAP_ENTRY_VN_WRITECNT type entry shall be 208384110e7eSKonstantin Belousov * kept as is here. The virtual pages are 208484110e7eSKonstantin Belousov * re-distributed among the clipped entries, so the sum is 208584110e7eSKonstantin Belousov * left the same. 208684110e7eSKonstantin Belousov */ 2087df8bae1dSRodney W. Grimes } 2088c0877f10SJohn Dyson } 2089df8bae1dSRodney W. Grimes 2090df8bae1dSRodney W. Grimes /* 2091df8bae1dSRodney W. Grimes * vm_map_clip_end: [ internal use only ] 2092df8bae1dSRodney W. Grimes * 2093df8bae1dSRodney W. Grimes * Asserts that the given entry ends at or before 2094df8bae1dSRodney W. Grimes * the specified address; if necessary, 2095df8bae1dSRodney W. Grimes * it splits the entry into two. 2096df8bae1dSRodney W. Grimes */ 2097df8bae1dSRodney W. Grimes #define vm_map_clip_end(map, entry, endaddr) \ 2098df8bae1dSRodney W. Grimes { \ 2099af045176SPoul-Henning Kamp if ((endaddr) < (entry->end)) \ 2100af045176SPoul-Henning Kamp _vm_map_clip_end((map), (entry), (endaddr)); \ 2101df8bae1dSRodney W. Grimes } 2102df8bae1dSRodney W. Grimes 2103df8bae1dSRodney W. Grimes /* 2104df8bae1dSRodney W. Grimes * This routine is called only when it is known that 2105df8bae1dSRodney W. Grimes * the entry must be split. 2106df8bae1dSRodney W. Grimes */ 21070d94caffSDavid Greenman static void 21081b40f8c0SMatthew Dillon _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) 2109df8bae1dSRodney W. Grimes { 2110c0877f10SJohn Dyson vm_map_entry_t new_entry; 2111df8bae1dSRodney W. Grimes 21123a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 2113ed11e4d7SMark Johnston KASSERT(entry->start < end && entry->end > end, 2114ed11e4d7SMark Johnston ("_vm_map_clip_end: invalid clip of entry %p", entry)); 21153a0916b8SKonstantin Belousov 2116df8bae1dSRodney W. Grimes /* 211711cccda1SJohn Dyson * If there is no object backing this entry, we might as well create 211811cccda1SJohn Dyson * one now. If we defer it, an object can get created after the map 211911cccda1SJohn Dyson * is clipped, and individual objects will be created for the split-up 212011cccda1SJohn Dyson * map. This is a bit of a hack, but is also about the best place to 212111cccda1SJohn Dyson * put this improvement. 212211cccda1SJohn Dyson */ 212319bd0d9cSKonstantin Belousov if (entry->object.vm_object == NULL && !map->system_map && 212419bd0d9cSKonstantin Belousov (entry->eflags & MAP_ENTRY_GUARD) == 0) { 212511cccda1SJohn Dyson vm_object_t object; 212611cccda1SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 2127c2e11a03SJohn Dyson atop(entry->end - entry->start)); 212811cccda1SJohn Dyson entry->object.vm_object = object; 212911cccda1SJohn Dyson entry->offset = 0; 2130ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 2131ef694c1aSEdward Tomasz Napierala object->cred = entry->cred; 21323364c323SKonstantin Belousov object->charge = entry->end - entry->start; 2133ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 21343364c323SKonstantin Belousov } 21353364c323SKonstantin Belousov } else if (entry->object.vm_object != NULL && 21363364c323SKonstantin Belousov ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 2137ef694c1aSEdward Tomasz Napierala entry->cred != NULL) { 213889f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 2139ef694c1aSEdward Tomasz Napierala KASSERT(entry->object.vm_object->cred == NULL, 2140ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry)); 2141ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 21423364c323SKonstantin Belousov entry->object.vm_object->charge = entry->end - entry->start; 214389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 2144ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 214511cccda1SJohn Dyson } 214611cccda1SJohn Dyson 214711cccda1SJohn Dyson /* 21480d94caffSDavid Greenman * Create a new entry and insert it AFTER the specified entry 2149df8bae1dSRodney W. Grimes */ 2150df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(map); 2151df8bae1dSRodney W. Grimes *new_entry = *entry; 2152df8bae1dSRodney W. Grimes 2153df8bae1dSRodney W. Grimes new_entry->start = entry->end = end; 2154df8bae1dSRodney W. Grimes new_entry->offset += (end - entry->start); 2155ef694c1aSEdward Tomasz Napierala if (new_entry->cred != NULL) 2156ef694c1aSEdward Tomasz Napierala crhold(entry->cred); 2157df8bae1dSRodney W. Grimes 2158*9f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 2159df8bae1dSRodney W. Grimes 21609fdfe602SMatthew Dillon if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2161df8bae1dSRodney W. Grimes vm_object_reference(new_entry->object.vm_object); 2162df8bae1dSRodney W. Grimes } 2163c0877f10SJohn Dyson } 2164df8bae1dSRodney W. Grimes 2165df8bae1dSRodney W. Grimes /* 2166df8bae1dSRodney W. Grimes * vm_map_submap: [ kernel use only ] 2167df8bae1dSRodney W. Grimes * 2168df8bae1dSRodney W. Grimes * Mark the given range as handled by a subordinate map. 2169df8bae1dSRodney W. Grimes * 2170df8bae1dSRodney W. Grimes * This range must have been created with vm_map_find, 2171df8bae1dSRodney W. Grimes * and no other operations may have been performed on this 2172df8bae1dSRodney W. Grimes * range prior to calling vm_map_submap. 2173df8bae1dSRodney W. Grimes * 2174df8bae1dSRodney W. Grimes * Only a limited number of operations can be performed 2175df8bae1dSRodney W. Grimes * within this rage after calling vm_map_submap: 2176df8bae1dSRodney W. Grimes * vm_fault 2177df8bae1dSRodney W. Grimes * [Don't try vm_map_copy!] 2178df8bae1dSRodney W. Grimes * 2179df8bae1dSRodney W. Grimes * To remove a submapping, one must first remove the 2180df8bae1dSRodney W. Grimes * range from the superior map, and then destroy the 2181df8bae1dSRodney W. Grimes * submap (if desired). [Better yet, don't try it.] 2182df8bae1dSRodney W. Grimes */ 2183df8bae1dSRodney W. Grimes int 21841b40f8c0SMatthew Dillon vm_map_submap( 21851b40f8c0SMatthew Dillon vm_map_t map, 21861b40f8c0SMatthew Dillon vm_offset_t start, 21871b40f8c0SMatthew Dillon vm_offset_t end, 21881b40f8c0SMatthew Dillon vm_map_t submap) 2189df8bae1dSRodney W. Grimes { 2190df8bae1dSRodney W. Grimes vm_map_entry_t entry; 2191fa50a355SKonstantin Belousov int result; 2192fa50a355SKonstantin Belousov 2193fa50a355SKonstantin Belousov result = KERN_INVALID_ARGUMENT; 2194fa50a355SKonstantin Belousov 2195fa50a355SKonstantin Belousov vm_map_lock(submap); 2196fa50a355SKonstantin Belousov submap->flags |= MAP_IS_SUB_MAP; 2197fa50a355SKonstantin Belousov vm_map_unlock(submap); 2198df8bae1dSRodney W. Grimes 2199df8bae1dSRodney W. Grimes vm_map_lock(map); 2200df8bae1dSRodney W. Grimes 2201df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2202df8bae1dSRodney W. Grimes 2203df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &entry)) { 2204df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 22050d94caffSDavid Greenman } else 2206df8bae1dSRodney W. Grimes entry = entry->next; 2207df8bae1dSRodney W. Grimes 2208df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 2209df8bae1dSRodney W. Grimes 2210df8bae1dSRodney W. Grimes if ((entry->start == start) && (entry->end == end) && 22119fdfe602SMatthew Dillon ((entry->eflags & MAP_ENTRY_COW) == 0) && 2212afa07f7eSJohn Dyson (entry->object.vm_object == NULL)) { 22132d8acc0fSJohn Dyson entry->object.sub_map = submap; 2214afa07f7eSJohn Dyson entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 2215df8bae1dSRodney W. Grimes result = KERN_SUCCESS; 2216df8bae1dSRodney W. Grimes } 2217df8bae1dSRodney W. Grimes vm_map_unlock(map); 2218df8bae1dSRodney W. Grimes 2219fa50a355SKonstantin Belousov if (result != KERN_SUCCESS) { 2220fa50a355SKonstantin Belousov vm_map_lock(submap); 2221fa50a355SKonstantin Belousov submap->flags &= ~MAP_IS_SUB_MAP; 2222fa50a355SKonstantin Belousov vm_map_unlock(submap); 2223fa50a355SKonstantin Belousov } 2224df8bae1dSRodney W. Grimes return (result); 2225df8bae1dSRodney W. Grimes } 2226df8bae1dSRodney W. Grimes 2227df8bae1dSRodney W. Grimes /* 2228dd05fa19SAlan Cox * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified 22291f78f902SAlan Cox */ 22301f78f902SAlan Cox #define MAX_INIT_PT 96 22311f78f902SAlan Cox 22321f78f902SAlan Cox /* 22330551c08dSAlan Cox * vm_map_pmap_enter: 22340551c08dSAlan Cox * 2235dd05fa19SAlan Cox * Preload the specified map's pmap with mappings to the specified 2236dd05fa19SAlan Cox * object's memory-resident pages. No further physical pages are 2237dd05fa19SAlan Cox * allocated, and no further virtual pages are retrieved from secondary 2238dd05fa19SAlan Cox * storage. If the specified flags include MAP_PREFAULT_PARTIAL, then a 2239dd05fa19SAlan Cox * limited number of page mappings are created at the low-end of the 2240dd05fa19SAlan Cox * specified address range. (For this purpose, a superpage mapping 2241dd05fa19SAlan Cox * counts as one page mapping.) Otherwise, all resident pages within 22423453bca8SAlan Cox * the specified address range are mapped. 22430551c08dSAlan Cox */ 2244077ec27cSAlan Cox static void 22454da4d293SAlan Cox vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 22460551c08dSAlan Cox vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) 22470551c08dSAlan Cox { 22488fece8c3SAlan Cox vm_offset_t start; 2249ce142d9eSAlan Cox vm_page_t p, p_start; 2250dd05fa19SAlan Cox vm_pindex_t mask, psize, threshold, tmpidx; 22510551c08dSAlan Cox 2252ba8bca61SAlan Cox if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) 22531f78f902SAlan Cox return; 22549af6d512SAttilio Rao VM_OBJECT_RLOCK(object); 22559af6d512SAttilio Rao if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 22569af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 225789f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 225801381811SJohn Baldwin if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 22599af6d512SAttilio Rao pmap_object_init_pt(map->pmap, addr, object, pindex, 22609af6d512SAttilio Rao size); 22619af6d512SAttilio Rao VM_OBJECT_WUNLOCK(object); 22629af6d512SAttilio Rao return; 22639af6d512SAttilio Rao } 22649af6d512SAttilio Rao VM_OBJECT_LOCK_DOWNGRADE(object); 22651f78f902SAlan Cox } 22661f78f902SAlan Cox 22671f78f902SAlan Cox psize = atop(size); 22681f78f902SAlan Cox if (psize + pindex > object->size) { 22699af6d512SAttilio Rao if (object->size < pindex) { 22709af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 22719af6d512SAttilio Rao return; 22729af6d512SAttilio Rao } 22731f78f902SAlan Cox psize = object->size - pindex; 22741f78f902SAlan Cox } 22751f78f902SAlan Cox 2276ce142d9eSAlan Cox start = 0; 2277ce142d9eSAlan Cox p_start = NULL; 2278dd05fa19SAlan Cox threshold = MAX_INIT_PT; 22791f78f902SAlan Cox 2280b382c10aSKonstantin Belousov p = vm_page_find_least(object, pindex); 22811f78f902SAlan Cox /* 22821f78f902SAlan Cox * Assert: the variable p is either (1) the page with the 22831f78f902SAlan Cox * least pindex greater than or equal to the parameter pindex 22841f78f902SAlan Cox * or (2) NULL. 22851f78f902SAlan Cox */ 22861f78f902SAlan Cox for (; 22871f78f902SAlan Cox p != NULL && (tmpidx = p->pindex - pindex) < psize; 22881f78f902SAlan Cox p = TAILQ_NEXT(p, listq)) { 22891f78f902SAlan Cox /* 22901f78f902SAlan Cox * don't allow an madvise to blow away our really 22911f78f902SAlan Cox * free pages allocating pv entries. 22921f78f902SAlan Cox */ 2293dd05fa19SAlan Cox if (((flags & MAP_PREFAULT_MADVISE) != 0 && 2294e2068d0bSJeff Roberson vm_page_count_severe()) || 2295dd05fa19SAlan Cox ((flags & MAP_PREFAULT_PARTIAL) != 0 && 2296dd05fa19SAlan Cox tmpidx >= threshold)) { 2297379fb642SAlan Cox psize = tmpidx; 22981f78f902SAlan Cox break; 22991f78f902SAlan Cox } 23000a2e596aSAlan Cox if (p->valid == VM_PAGE_BITS_ALL) { 2301ce142d9eSAlan Cox if (p_start == NULL) { 2302ce142d9eSAlan Cox start = addr + ptoa(tmpidx); 2303ce142d9eSAlan Cox p_start = p; 2304ce142d9eSAlan Cox } 2305dd05fa19SAlan Cox /* Jump ahead if a superpage mapping is possible. */ 2306dd05fa19SAlan Cox if (p->psind > 0 && ((addr + ptoa(tmpidx)) & 2307dd05fa19SAlan Cox (pagesizes[p->psind] - 1)) == 0) { 2308dd05fa19SAlan Cox mask = atop(pagesizes[p->psind]) - 1; 2309dd05fa19SAlan Cox if (tmpidx + mask < psize && 231088302601SAlan Cox vm_page_ps_test(p, PS_ALL_VALID, NULL)) { 2311dd05fa19SAlan Cox p += mask; 2312dd05fa19SAlan Cox threshold += mask; 2313dd05fa19SAlan Cox } 2314dd05fa19SAlan Cox } 23157bfda801SAlan Cox } else if (p_start != NULL) { 2316cf4682aeSAlan Cox pmap_enter_object(map->pmap, start, addr + 2317cf4682aeSAlan Cox ptoa(tmpidx), p_start, prot); 2318cf4682aeSAlan Cox p_start = NULL; 2319cf4682aeSAlan Cox } 2320cf4682aeSAlan Cox } 2321c46b90e9SAlan Cox if (p_start != NULL) 2322379fb642SAlan Cox pmap_enter_object(map->pmap, start, addr + ptoa(psize), 2323379fb642SAlan Cox p_start, prot); 23249af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 23250551c08dSAlan Cox } 23260551c08dSAlan Cox 23270551c08dSAlan Cox /* 2328df8bae1dSRodney W. Grimes * vm_map_protect: 2329df8bae1dSRodney W. Grimes * 2330df8bae1dSRodney W. Grimes * Sets the protection of the specified address 2331df8bae1dSRodney W. Grimes * region in the target map. If "set_max" is 2332df8bae1dSRodney W. Grimes * specified, the maximum protection is to be set; 2333df8bae1dSRodney W. Grimes * otherwise, only the current protection is affected. 2334df8bae1dSRodney W. Grimes */ 2335df8bae1dSRodney W. Grimes int 2336b9dcd593SBruce Evans vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 2337b9dcd593SBruce Evans vm_prot_t new_prot, boolean_t set_max) 2338df8bae1dSRodney W. Grimes { 2339210a6886SKonstantin Belousov vm_map_entry_t current, entry; 23403364c323SKonstantin Belousov vm_object_t obj; 2341ef694c1aSEdward Tomasz Napierala struct ucred *cred; 2342210a6886SKonstantin Belousov vm_prot_t old_prot; 2343df8bae1dSRodney W. Grimes 234479e9451fSKonstantin Belousov if (start == end) 234579e9451fSKonstantin Belousov return (KERN_SUCCESS); 234679e9451fSKonstantin Belousov 2347df8bae1dSRodney W. Grimes vm_map_lock(map); 2348df8bae1dSRodney W. Grimes 2349e1cb9d37SMark Johnston /* 2350e1cb9d37SMark Johnston * Ensure that we are not concurrently wiring pages. vm_map_wire() may 2351e1cb9d37SMark Johnston * need to fault pages into the map and will drop the map lock while 2352e1cb9d37SMark Johnston * doing so, and the VM object may end up in an inconsistent state if we 2353e1cb9d37SMark Johnston * update the protection on the map entry in between faults. 2354e1cb9d37SMark Johnston */ 2355e1cb9d37SMark Johnston vm_map_wait_busy(map); 2356e1cb9d37SMark Johnston 2357df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2358df8bae1dSRodney W. Grimes 2359df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &entry)) { 2360df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 2361b7b2aac2SJohn Dyson } else { 2362df8bae1dSRodney W. Grimes entry = entry->next; 2363b7b2aac2SJohn Dyson } 2364df8bae1dSRodney W. Grimes 2365df8bae1dSRodney W. Grimes /* 23660d94caffSDavid Greenman * Make a first pass to check for protection violations. 2367df8bae1dSRodney W. Grimes */ 23681c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 23698a89ca94SKonstantin Belousov if ((current->eflags & MAP_ENTRY_GUARD) != 0) 23708a89ca94SKonstantin Belousov continue; 2371afa07f7eSJohn Dyson if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 2372a1f6d91cSDavid Greenman vm_map_unlock(map); 2373df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 2374a1f6d91cSDavid Greenman } 2375df8bae1dSRodney W. Grimes if ((new_prot & current->max_protection) != new_prot) { 2376df8bae1dSRodney W. Grimes vm_map_unlock(map); 2377df8bae1dSRodney W. Grimes return (KERN_PROTECTION_FAILURE); 2378df8bae1dSRodney W. Grimes } 2379df8bae1dSRodney W. Grimes } 2380df8bae1dSRodney W. Grimes 23813364c323SKonstantin Belousov /* 23823364c323SKonstantin Belousov * Do an accounting pass for private read-only mappings that 23833364c323SKonstantin Belousov * now will do cow due to allowed write (e.g. debugger sets 23843364c323SKonstantin Belousov * breakpoint on text segment) 23853364c323SKonstantin Belousov */ 23861c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 23873364c323SKonstantin Belousov 23883364c323SKonstantin Belousov vm_map_clip_end(map, current, end); 23893364c323SKonstantin Belousov 23903364c323SKonstantin Belousov if (set_max || 23913364c323SKonstantin Belousov ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 || 239219bd0d9cSKonstantin Belousov ENTRY_CHARGED(current) || 239319bd0d9cSKonstantin Belousov (current->eflags & MAP_ENTRY_GUARD) != 0) { 23943364c323SKonstantin Belousov continue; 23953364c323SKonstantin Belousov } 23963364c323SKonstantin Belousov 2397ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 23983364c323SKonstantin Belousov obj = current->object.vm_object; 23993364c323SKonstantin Belousov 24003364c323SKonstantin Belousov if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) { 24013364c323SKonstantin Belousov if (!swap_reserve(current->end - current->start)) { 24023364c323SKonstantin Belousov vm_map_unlock(map); 24033364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 24043364c323SKonstantin Belousov } 2405ef694c1aSEdward Tomasz Napierala crhold(cred); 2406ef694c1aSEdward Tomasz Napierala current->cred = cred; 24073364c323SKonstantin Belousov continue; 24083364c323SKonstantin Belousov } 24093364c323SKonstantin Belousov 241089f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 24113364c323SKonstantin Belousov if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) { 241289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 24133364c323SKonstantin Belousov continue; 24143364c323SKonstantin Belousov } 24153364c323SKonstantin Belousov 24163364c323SKonstantin Belousov /* 24173364c323SKonstantin Belousov * Charge for the whole object allocation now, since 24183364c323SKonstantin Belousov * we cannot distinguish between non-charged and 24193364c323SKonstantin Belousov * charged clipped mapping of the same object later. 24203364c323SKonstantin Belousov */ 24213364c323SKonstantin Belousov KASSERT(obj->charge == 0, 24223d95614fSKonstantin Belousov ("vm_map_protect: object %p overcharged (entry %p)", 24233d95614fSKonstantin Belousov obj, current)); 24243364c323SKonstantin Belousov if (!swap_reserve(ptoa(obj->size))) { 242589f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 24263364c323SKonstantin Belousov vm_map_unlock(map); 24273364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 24283364c323SKonstantin Belousov } 24293364c323SKonstantin Belousov 2430ef694c1aSEdward Tomasz Napierala crhold(cred); 2431ef694c1aSEdward Tomasz Napierala obj->cred = cred; 24323364c323SKonstantin Belousov obj->charge = ptoa(obj->size); 243389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 24343364c323SKonstantin Belousov } 24353364c323SKonstantin Belousov 2436df8bae1dSRodney W. Grimes /* 24370d94caffSDavid Greenman * Go back and fix up protections. [Note that clipping is not 24380d94caffSDavid Greenman * necessary the second time.] 2439df8bae1dSRodney W. Grimes */ 24401c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 244119bd0d9cSKonstantin Belousov if ((current->eflags & MAP_ENTRY_GUARD) != 0) 244219bd0d9cSKonstantin Belousov continue; 244319bd0d9cSKonstantin Belousov 2444df8bae1dSRodney W. Grimes old_prot = current->protection; 2445210a6886SKonstantin Belousov 2446df8bae1dSRodney W. Grimes if (set_max) 2447df8bae1dSRodney W. Grimes current->protection = 2448df8bae1dSRodney W. Grimes (current->max_protection = new_prot) & 2449df8bae1dSRodney W. Grimes old_prot; 2450df8bae1dSRodney W. Grimes else 2451df8bae1dSRodney W. Grimes current->protection = new_prot; 2452df8bae1dSRodney W. Grimes 2453dd006a1bSAlan Cox /* 2454dd006a1bSAlan Cox * For user wired map entries, the normal lazy evaluation of 2455dd006a1bSAlan Cox * write access upgrades through soft page faults is 2456dd006a1bSAlan Cox * undesirable. Instead, immediately copy any pages that are 2457dd006a1bSAlan Cox * copy-on-write and enable write access in the physical map. 2458dd006a1bSAlan Cox */ 2459dd006a1bSAlan Cox if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 && 2460210a6886SKonstantin Belousov (current->protection & VM_PROT_WRITE) != 0 && 24615930251aSKonstantin Belousov (old_prot & VM_PROT_WRITE) == 0) 2462210a6886SKonstantin Belousov vm_fault_copy_entry(map, map, current, current, NULL); 2463210a6886SKonstantin Belousov 2464df8bae1dSRodney W. Grimes /* 24652fafce9eSAlan Cox * When restricting access, update the physical map. Worry 24662fafce9eSAlan Cox * about copy-on-write here. 2467df8bae1dSRodney W. Grimes */ 24682fafce9eSAlan Cox if ((old_prot & ~current->protection) != 0) { 2469afa07f7eSJohn Dyson #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 2470df8bae1dSRodney W. Grimes VM_PROT_ALL) 2471df8bae1dSRodney W. Grimes pmap_protect(map->pmap, current->start, 2472df8bae1dSRodney W. Grimes current->end, 24731c85e3dfSAlan Cox current->protection & MASK(current)); 2474df8bae1dSRodney W. Grimes #undef MASK 2475df8bae1dSRodney W. Grimes } 24767d78abc9SJohn Dyson vm_map_simplify_entry(map, current); 2477df8bae1dSRodney W. Grimes } 2478df8bae1dSRodney W. Grimes vm_map_unlock(map); 2479df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 2480df8bae1dSRodney W. Grimes } 2481df8bae1dSRodney W. Grimes 2482df8bae1dSRodney W. Grimes /* 2483867a482dSJohn Dyson * vm_map_madvise: 2484867a482dSJohn Dyson * 2485867a482dSJohn Dyson * This routine traverses a processes map handling the madvise 2486f7fc307aSAlan Cox * system call. Advisories are classified as either those effecting 2487f7fc307aSAlan Cox * the vm_map_entry structure, or those effecting the underlying 2488f7fc307aSAlan Cox * objects. 2489867a482dSJohn Dyson */ 2490b4309055SMatthew Dillon int 24911b40f8c0SMatthew Dillon vm_map_madvise( 24921b40f8c0SMatthew Dillon vm_map_t map, 24931b40f8c0SMatthew Dillon vm_offset_t start, 24941b40f8c0SMatthew Dillon vm_offset_t end, 24951b40f8c0SMatthew Dillon int behav) 2496867a482dSJohn Dyson { 2497f7fc307aSAlan Cox vm_map_entry_t current, entry; 24983e7cb27cSAlan Cox bool modify_map; 2499867a482dSJohn Dyson 2500b4309055SMatthew Dillon /* 2501b4309055SMatthew Dillon * Some madvise calls directly modify the vm_map_entry, in which case 2502b4309055SMatthew Dillon * we need to use an exclusive lock on the map and we need to perform 2503b4309055SMatthew Dillon * various clipping operations. Otherwise we only need a read-lock 2504b4309055SMatthew Dillon * on the map. 2505b4309055SMatthew Dillon */ 2506b4309055SMatthew Dillon switch(behav) { 2507b4309055SMatthew Dillon case MADV_NORMAL: 2508b4309055SMatthew Dillon case MADV_SEQUENTIAL: 2509b4309055SMatthew Dillon case MADV_RANDOM: 25104f79d873SMatthew Dillon case MADV_NOSYNC: 25114f79d873SMatthew Dillon case MADV_AUTOSYNC: 25129730a5daSPaul Saab case MADV_NOCORE: 25139730a5daSPaul Saab case MADV_CORE: 251479e9451fSKonstantin Belousov if (start == end) 25153e7cb27cSAlan Cox return (0); 25163e7cb27cSAlan Cox modify_map = true; 2517867a482dSJohn Dyson vm_map_lock(map); 2518b4309055SMatthew Dillon break; 2519b4309055SMatthew Dillon case MADV_WILLNEED: 2520b4309055SMatthew Dillon case MADV_DONTNEED: 2521b4309055SMatthew Dillon case MADV_FREE: 252279e9451fSKonstantin Belousov if (start == end) 25233e7cb27cSAlan Cox return (0); 25243e7cb27cSAlan Cox modify_map = false; 2525f7fc307aSAlan Cox vm_map_lock_read(map); 2526b4309055SMatthew Dillon break; 2527b4309055SMatthew Dillon default: 25283e7cb27cSAlan Cox return (EINVAL); 2529b4309055SMatthew Dillon } 2530b4309055SMatthew Dillon 2531b4309055SMatthew Dillon /* 2532b4309055SMatthew Dillon * Locate starting entry and clip if necessary. 2533b4309055SMatthew Dillon */ 2534867a482dSJohn Dyson VM_MAP_RANGE_CHECK(map, start, end); 2535867a482dSJohn Dyson 2536867a482dSJohn Dyson if (vm_map_lookup_entry(map, start, &entry)) { 2537f7fc307aSAlan Cox if (modify_map) 2538867a482dSJohn Dyson vm_map_clip_start(map, entry, start); 2539b4309055SMatthew Dillon } else { 2540867a482dSJohn Dyson entry = entry->next; 2541b4309055SMatthew Dillon } 2542867a482dSJohn Dyson 2543f7fc307aSAlan Cox if (modify_map) { 2544f7fc307aSAlan Cox /* 2545f7fc307aSAlan Cox * madvise behaviors that are implemented in the vm_map_entry. 2546f7fc307aSAlan Cox * 2547f7fc307aSAlan Cox * We clip the vm_map_entry so that behavioral changes are 2548f7fc307aSAlan Cox * limited to the specified address range. 2549f7fc307aSAlan Cox */ 25501c5196c3SKonstantin Belousov for (current = entry; current->start < end; 25511c5196c3SKonstantin Belousov current = current->next) { 2552f7fc307aSAlan Cox if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2553867a482dSJohn Dyson continue; 2554fed9a903SJohn Dyson 255547221757SJohn Dyson vm_map_clip_end(map, current, end); 2556fed9a903SJohn Dyson 2557f7fc307aSAlan Cox switch (behav) { 2558867a482dSJohn Dyson case MADV_NORMAL: 25597f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 2560867a482dSJohn Dyson break; 2561867a482dSJohn Dyson case MADV_SEQUENTIAL: 25627f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 2563867a482dSJohn Dyson break; 2564867a482dSJohn Dyson case MADV_RANDOM: 25657f866e4bSAlan Cox vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 2566867a482dSJohn Dyson break; 25674f79d873SMatthew Dillon case MADV_NOSYNC: 25684f79d873SMatthew Dillon current->eflags |= MAP_ENTRY_NOSYNC; 25694f79d873SMatthew Dillon break; 25704f79d873SMatthew Dillon case MADV_AUTOSYNC: 25714f79d873SMatthew Dillon current->eflags &= ~MAP_ENTRY_NOSYNC; 25724f79d873SMatthew Dillon break; 25739730a5daSPaul Saab case MADV_NOCORE: 25749730a5daSPaul Saab current->eflags |= MAP_ENTRY_NOCOREDUMP; 25759730a5daSPaul Saab break; 25769730a5daSPaul Saab case MADV_CORE: 25779730a5daSPaul Saab current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 25789730a5daSPaul Saab break; 2579867a482dSJohn Dyson default: 2580867a482dSJohn Dyson break; 2581867a482dSJohn Dyson } 2582f7fc307aSAlan Cox vm_map_simplify_entry(map, current); 2583867a482dSJohn Dyson } 2584867a482dSJohn Dyson vm_map_unlock(map); 2585b4309055SMatthew Dillon } else { 258692a59946SJohn Baldwin vm_pindex_t pstart, pend; 2587f7fc307aSAlan Cox 2588f7fc307aSAlan Cox /* 2589f7fc307aSAlan Cox * madvise behaviors that are implemented in the underlying 2590f7fc307aSAlan Cox * vm_object. 2591f7fc307aSAlan Cox * 2592f7fc307aSAlan Cox * Since we don't clip the vm_map_entry, we have to clip 2593f7fc307aSAlan Cox * the vm_object pindex and count. 2594f7fc307aSAlan Cox */ 25951c5196c3SKonstantin Belousov for (current = entry; current->start < end; 25961c5196c3SKonstantin Belousov current = current->next) { 259751321f7cSAlan Cox vm_offset_t useEnd, useStart; 25985f99b57cSMatthew Dillon 2599f7fc307aSAlan Cox if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2600f7fc307aSAlan Cox continue; 2601f7fc307aSAlan Cox 260292a59946SJohn Baldwin pstart = OFF_TO_IDX(current->offset); 260392a59946SJohn Baldwin pend = pstart + atop(current->end - current->start); 26045f99b57cSMatthew Dillon useStart = current->start; 260551321f7cSAlan Cox useEnd = current->end; 2606f7fc307aSAlan Cox 2607f7fc307aSAlan Cox if (current->start < start) { 260892a59946SJohn Baldwin pstart += atop(start - current->start); 26095f99b57cSMatthew Dillon useStart = start; 2610f7fc307aSAlan Cox } 261151321f7cSAlan Cox if (current->end > end) { 261292a59946SJohn Baldwin pend -= atop(current->end - end); 261351321f7cSAlan Cox useEnd = end; 261451321f7cSAlan Cox } 2615f7fc307aSAlan Cox 261692a59946SJohn Baldwin if (pstart >= pend) 2617f7fc307aSAlan Cox continue; 2618f7fc307aSAlan Cox 261951321f7cSAlan Cox /* 262051321f7cSAlan Cox * Perform the pmap_advise() before clearing 262151321f7cSAlan Cox * PGA_REFERENCED in vm_page_advise(). Otherwise, a 262251321f7cSAlan Cox * concurrent pmap operation, such as pmap_remove(), 262351321f7cSAlan Cox * could clear a reference in the pmap and set 262451321f7cSAlan Cox * PGA_REFERENCED on the page before the pmap_advise() 262551321f7cSAlan Cox * had completed. Consequently, the page would appear 262651321f7cSAlan Cox * referenced based upon an old reference that 262751321f7cSAlan Cox * occurred before this pmap_advise() ran. 262851321f7cSAlan Cox */ 262951321f7cSAlan Cox if (behav == MADV_DONTNEED || behav == MADV_FREE) 263051321f7cSAlan Cox pmap_advise(map->pmap, useStart, useEnd, 263151321f7cSAlan Cox behav); 263251321f7cSAlan Cox 263392a59946SJohn Baldwin vm_object_madvise(current->object.vm_object, pstart, 263492a59946SJohn Baldwin pend, behav); 263554432196SKonstantin Belousov 263654432196SKonstantin Belousov /* 263754432196SKonstantin Belousov * Pre-populate paging structures in the 263854432196SKonstantin Belousov * WILLNEED case. For wired entries, the 263954432196SKonstantin Belousov * paging structures are already populated. 264054432196SKonstantin Belousov */ 264154432196SKonstantin Belousov if (behav == MADV_WILLNEED && 264254432196SKonstantin Belousov current->wired_count == 0) { 26430551c08dSAlan Cox vm_map_pmap_enter(map, 26445f99b57cSMatthew Dillon useStart, 26454da4d293SAlan Cox current->protection, 2646f7fc307aSAlan Cox current->object.vm_object, 264792a59946SJohn Baldwin pstart, 264892a59946SJohn Baldwin ptoa(pend - pstart), 2649e3026983SMatthew Dillon MAP_PREFAULT_MADVISE 2650b4309055SMatthew Dillon ); 2651f7fc307aSAlan Cox } 2652f7fc307aSAlan Cox } 2653f7fc307aSAlan Cox vm_map_unlock_read(map); 2654f7fc307aSAlan Cox } 2655b4309055SMatthew Dillon return (0); 2656867a482dSJohn Dyson } 2657867a482dSJohn Dyson 2658867a482dSJohn Dyson 2659867a482dSJohn Dyson /* 2660df8bae1dSRodney W. Grimes * vm_map_inherit: 2661df8bae1dSRodney W. Grimes * 2662df8bae1dSRodney W. Grimes * Sets the inheritance of the specified address 2663df8bae1dSRodney W. Grimes * range in the target map. Inheritance 2664df8bae1dSRodney W. Grimes * affects how the map will be shared with 2665e2abaaaaSAlan Cox * child maps at the time of vmspace_fork. 2666df8bae1dSRodney W. Grimes */ 2667df8bae1dSRodney W. Grimes int 2668b9dcd593SBruce Evans vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 2669b9dcd593SBruce Evans vm_inherit_t new_inheritance) 2670df8bae1dSRodney W. Grimes { 2671c0877f10SJohn Dyson vm_map_entry_t entry; 2672df8bae1dSRodney W. Grimes vm_map_entry_t temp_entry; 2673df8bae1dSRodney W. Grimes 2674df8bae1dSRodney W. Grimes switch (new_inheritance) { 2675df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 2676df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 2677df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 267878d7964bSXin LI case VM_INHERIT_ZERO: 2679df8bae1dSRodney W. Grimes break; 2680df8bae1dSRodney W. Grimes default: 2681df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 2682df8bae1dSRodney W. Grimes } 268379e9451fSKonstantin Belousov if (start == end) 268479e9451fSKonstantin Belousov return (KERN_SUCCESS); 2685df8bae1dSRodney W. Grimes vm_map_lock(map); 2686df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2687df8bae1dSRodney W. Grimes if (vm_map_lookup_entry(map, start, &temp_entry)) { 2688df8bae1dSRodney W. Grimes entry = temp_entry; 2689df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 26900d94caffSDavid Greenman } else 2691df8bae1dSRodney W. Grimes entry = temp_entry->next; 26921c5196c3SKonstantin Belousov while (entry->start < end) { 2693df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 269419bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) == 0 || 269519bd0d9cSKonstantin Belousov new_inheritance != VM_INHERIT_ZERO) 2696df8bae1dSRodney W. Grimes entry->inheritance = new_inheritance; 269744428f62SAlan Cox vm_map_simplify_entry(map, entry); 2698df8bae1dSRodney W. Grimes entry = entry->next; 2699df8bae1dSRodney W. Grimes } 2700df8bae1dSRodney W. Grimes vm_map_unlock(map); 2701df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 2702df8bae1dSRodney W. Grimes } 2703df8bae1dSRodney W. Grimes 2704df8bae1dSRodney W. Grimes /* 2705acd9a301SAlan Cox * vm_map_unwire: 2706acd9a301SAlan Cox * 2707e27e17b7SAlan Cox * Implements both kernel and user unwiring. 2708acd9a301SAlan Cox */ 2709acd9a301SAlan Cox int 2710acd9a301SAlan Cox vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2711abd498aaSBruce M Simpson int flags) 2712acd9a301SAlan Cox { 2713acd9a301SAlan Cox vm_map_entry_t entry, first_entry, tmp_entry; 2714acd9a301SAlan Cox vm_offset_t saved_start; 2715acd9a301SAlan Cox unsigned int last_timestamp; 2716acd9a301SAlan Cox int rv; 2717abd498aaSBruce M Simpson boolean_t need_wakeup, result, user_unwire; 2718acd9a301SAlan Cox 271979e9451fSKonstantin Belousov if (start == end) 272079e9451fSKonstantin Belousov return (KERN_SUCCESS); 2721abd498aaSBruce M Simpson user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 2722acd9a301SAlan Cox vm_map_lock(map); 2723acd9a301SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 2724acd9a301SAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) { 2725abd498aaSBruce M Simpson if (flags & VM_MAP_WIRE_HOLESOK) 2726cbef13d8SAlan Cox first_entry = first_entry->next; 2727abd498aaSBruce M Simpson else { 2728acd9a301SAlan Cox vm_map_unlock(map); 2729acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 2730acd9a301SAlan Cox } 2731abd498aaSBruce M Simpson } 2732acd9a301SAlan Cox last_timestamp = map->timestamp; 2733acd9a301SAlan Cox entry = first_entry; 27341c5196c3SKonstantin Belousov while (entry->start < end) { 2735acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2736acd9a301SAlan Cox /* 2737acd9a301SAlan Cox * We have not yet clipped the entry. 2738acd9a301SAlan Cox */ 2739acd9a301SAlan Cox saved_start = (start >= entry->start) ? start : 2740acd9a301SAlan Cox entry->start; 2741acd9a301SAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 27428ce2d00aSPawel Jakub Dawidek if (vm_map_unlock_and_wait(map, 0)) { 2743acd9a301SAlan Cox /* 2744acd9a301SAlan Cox * Allow interruption of user unwiring? 2745acd9a301SAlan Cox */ 2746acd9a301SAlan Cox } 2747acd9a301SAlan Cox vm_map_lock(map); 2748acd9a301SAlan Cox if (last_timestamp+1 != map->timestamp) { 2749acd9a301SAlan Cox /* 2750acd9a301SAlan Cox * Look again for the entry because the map was 2751acd9a301SAlan Cox * modified while it was unlocked. 2752acd9a301SAlan Cox * Specifically, the entry may have been 2753acd9a301SAlan Cox * clipped, merged, or deleted. 2754acd9a301SAlan Cox */ 2755acd9a301SAlan Cox if (!vm_map_lookup_entry(map, saved_start, 2756acd9a301SAlan Cox &tmp_entry)) { 2757cbef13d8SAlan Cox if (flags & VM_MAP_WIRE_HOLESOK) 2758cbef13d8SAlan Cox tmp_entry = tmp_entry->next; 2759cbef13d8SAlan Cox else { 2760acd9a301SAlan Cox if (saved_start == start) { 2761acd9a301SAlan Cox /* 2762acd9a301SAlan Cox * First_entry has been deleted. 2763acd9a301SAlan Cox */ 2764acd9a301SAlan Cox vm_map_unlock(map); 2765acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 2766acd9a301SAlan Cox } 2767acd9a301SAlan Cox end = saved_start; 2768acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 2769acd9a301SAlan Cox goto done; 2770acd9a301SAlan Cox } 2771cbef13d8SAlan Cox } 2772acd9a301SAlan Cox if (entry == first_entry) 2773acd9a301SAlan Cox first_entry = tmp_entry; 2774acd9a301SAlan Cox else 2775acd9a301SAlan Cox first_entry = NULL; 2776acd9a301SAlan Cox entry = tmp_entry; 2777acd9a301SAlan Cox } 2778acd9a301SAlan Cox last_timestamp = map->timestamp; 2779acd9a301SAlan Cox continue; 2780acd9a301SAlan Cox } 2781acd9a301SAlan Cox vm_map_clip_start(map, entry, start); 2782acd9a301SAlan Cox vm_map_clip_end(map, entry, end); 2783acd9a301SAlan Cox /* 2784acd9a301SAlan Cox * Mark the entry in case the map lock is released. (See 2785acd9a301SAlan Cox * above.) 2786acd9a301SAlan Cox */ 2787ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2788ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 2789ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 2790acd9a301SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 27910acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 2792acd9a301SAlan Cox /* 2793acd9a301SAlan Cox * Check the map for holes in the specified region. 2794abd498aaSBruce M Simpson * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 2795acd9a301SAlan Cox */ 2796abd498aaSBruce M Simpson if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && 27971c5196c3SKonstantin Belousov (entry->end < end && entry->next->start > entry->end)) { 2798acd9a301SAlan Cox end = entry->end; 2799acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 2800acd9a301SAlan Cox goto done; 2801acd9a301SAlan Cox } 2802acd9a301SAlan Cox /* 28033ffbc0cdSAlan Cox * If system unwiring, require that the entry is system wired. 2804acd9a301SAlan Cox */ 28050ada205eSBrian Feldman if (!user_unwire && 28060ada205eSBrian Feldman vm_map_entry_system_wired_count(entry) == 0) { 2807acd9a301SAlan Cox end = entry->end; 2808acd9a301SAlan Cox rv = KERN_INVALID_ARGUMENT; 2809acd9a301SAlan Cox goto done; 2810acd9a301SAlan Cox } 2811acd9a301SAlan Cox entry = entry->next; 2812acd9a301SAlan Cox } 2813acd9a301SAlan Cox rv = KERN_SUCCESS; 2814acd9a301SAlan Cox done: 2815e27e17b7SAlan Cox need_wakeup = FALSE; 2816acd9a301SAlan Cox if (first_entry == NULL) { 2817acd9a301SAlan Cox result = vm_map_lookup_entry(map, start, &first_entry); 2818cbef13d8SAlan Cox if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 2819cbef13d8SAlan Cox first_entry = first_entry->next; 2820cbef13d8SAlan Cox else 2821acd9a301SAlan Cox KASSERT(result, ("vm_map_unwire: lookup failed")); 2822acd9a301SAlan Cox } 28231c5196c3SKonstantin Belousov for (entry = first_entry; entry->start < end; entry = entry->next) { 28240acea7dfSKonstantin Belousov /* 28250acea7dfSKonstantin Belousov * If VM_MAP_WIRE_HOLESOK was specified, an empty 28260acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 28270acea7dfSKonstantin Belousov * while the map lock was dropped for draining 28280acea7dfSKonstantin Belousov * MAP_ENTRY_IN_TRANSITION. Moreover, another thread 28290acea7dfSKonstantin Belousov * could be simultaneously wiring this new mapping 28300acea7dfSKonstantin Belousov * entry. Detect these cases and skip any entries 28310acea7dfSKonstantin Belousov * marked as in transition by us. 28320acea7dfSKonstantin Belousov */ 28330acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 28340acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 28350acea7dfSKonstantin Belousov KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 28360acea7dfSKonstantin Belousov ("vm_map_unwire: !HOLESOK and new/changed entry")); 28370acea7dfSKonstantin Belousov continue; 28380acea7dfSKonstantin Belousov } 28390acea7dfSKonstantin Belousov 28403ffbc0cdSAlan Cox if (rv == KERN_SUCCESS && (!user_unwire || 28413ffbc0cdSAlan Cox (entry->eflags & MAP_ENTRY_USER_WIRED))) { 2842b2f3846aSAlan Cox if (user_unwire) 2843b2f3846aSAlan Cox entry->eflags &= ~MAP_ENTRY_USER_WIRED; 284403462509SAlan Cox if (entry->wired_count == 1) 284503462509SAlan Cox vm_map_entry_unwire(map, entry); 284603462509SAlan Cox else 2847b2f3846aSAlan Cox entry->wired_count--; 2848b2f3846aSAlan Cox } 28490acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 2850ff3ae454SKonstantin Belousov ("vm_map_unwire: in-transition flag missing %p", entry)); 2851ff3ae454SKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 2852ff3ae454SKonstantin Belousov ("vm_map_unwire: alien wire %p", entry)); 2853acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 28540acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 2855acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 2856acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 2857acd9a301SAlan Cox need_wakeup = TRUE; 2858acd9a301SAlan Cox } 2859acd9a301SAlan Cox vm_map_simplify_entry(map, entry); 2860acd9a301SAlan Cox } 2861acd9a301SAlan Cox vm_map_unlock(map); 2862acd9a301SAlan Cox if (need_wakeup) 2863acd9a301SAlan Cox vm_map_wakeup(map); 2864acd9a301SAlan Cox return (rv); 2865acd9a301SAlan Cox } 2866acd9a301SAlan Cox 2867acd9a301SAlan Cox /* 286866cd575bSAlan Cox * vm_map_wire_entry_failure: 286966cd575bSAlan Cox * 287066cd575bSAlan Cox * Handle a wiring failure on the given entry. 287166cd575bSAlan Cox * 287266cd575bSAlan Cox * The map should be locked. 287366cd575bSAlan Cox */ 287466cd575bSAlan Cox static void 287566cd575bSAlan Cox vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 287666cd575bSAlan Cox vm_offset_t failed_addr) 287766cd575bSAlan Cox { 287866cd575bSAlan Cox 287966cd575bSAlan Cox VM_MAP_ASSERT_LOCKED(map); 288066cd575bSAlan Cox KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && 288166cd575bSAlan Cox entry->wired_count == 1, 288266cd575bSAlan Cox ("vm_map_wire_entry_failure: entry %p isn't being wired", entry)); 288366cd575bSAlan Cox KASSERT(failed_addr < entry->end, 288466cd575bSAlan Cox ("vm_map_wire_entry_failure: entry %p was fully wired", entry)); 288566cd575bSAlan Cox 288666cd575bSAlan Cox /* 288766cd575bSAlan Cox * If any pages at the start of this entry were successfully wired, 288866cd575bSAlan Cox * then unwire them. 288966cd575bSAlan Cox */ 289066cd575bSAlan Cox if (failed_addr > entry->start) { 289166cd575bSAlan Cox pmap_unwire(map->pmap, entry->start, failed_addr); 289266cd575bSAlan Cox vm_object_unwire(entry->object.vm_object, entry->offset, 289366cd575bSAlan Cox failed_addr - entry->start, PQ_ACTIVE); 289466cd575bSAlan Cox } 289566cd575bSAlan Cox 289666cd575bSAlan Cox /* 289766cd575bSAlan Cox * Assign an out-of-range value to represent the failure to wire this 289866cd575bSAlan Cox * entry. 289966cd575bSAlan Cox */ 290066cd575bSAlan Cox entry->wired_count = -1; 290166cd575bSAlan Cox } 290266cd575bSAlan Cox 290366cd575bSAlan Cox /* 2904e27e17b7SAlan Cox * vm_map_wire: 2905e27e17b7SAlan Cox * 2906e27e17b7SAlan Cox * Implements both kernel and user wiring. 2907e27e17b7SAlan Cox */ 2908e27e17b7SAlan Cox int 2909e27e17b7SAlan Cox vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2910abd498aaSBruce M Simpson int flags) 2911e27e17b7SAlan Cox { 291212d7cc84SAlan Cox vm_map_entry_t entry, first_entry, tmp_entry; 291366cd575bSAlan Cox vm_offset_t faddr, saved_end, saved_start; 291412d7cc84SAlan Cox unsigned int last_timestamp; 291512d7cc84SAlan Cox int rv; 291666cd575bSAlan Cox boolean_t need_wakeup, result, user_wire; 2917e4cd31ddSJeff Roberson vm_prot_t prot; 2918e27e17b7SAlan Cox 291979e9451fSKonstantin Belousov if (start == end) 292079e9451fSKonstantin Belousov return (KERN_SUCCESS); 2921e4cd31ddSJeff Roberson prot = 0; 2922e4cd31ddSJeff Roberson if (flags & VM_MAP_WIRE_WRITE) 2923e4cd31ddSJeff Roberson prot |= VM_PROT_WRITE; 2924abd498aaSBruce M Simpson user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 292512d7cc84SAlan Cox vm_map_lock(map); 292612d7cc84SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 292712d7cc84SAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) { 2928abd498aaSBruce M Simpson if (flags & VM_MAP_WIRE_HOLESOK) 2929cbef13d8SAlan Cox first_entry = first_entry->next; 2930abd498aaSBruce M Simpson else { 293112d7cc84SAlan Cox vm_map_unlock(map); 293212d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 293312d7cc84SAlan Cox } 2934abd498aaSBruce M Simpson } 293512d7cc84SAlan Cox last_timestamp = map->timestamp; 293612d7cc84SAlan Cox entry = first_entry; 29371c5196c3SKonstantin Belousov while (entry->start < end) { 293812d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 293912d7cc84SAlan Cox /* 294012d7cc84SAlan Cox * We have not yet clipped the entry. 294112d7cc84SAlan Cox */ 294212d7cc84SAlan Cox saved_start = (start >= entry->start) ? start : 294312d7cc84SAlan Cox entry->start; 294412d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 29458ce2d00aSPawel Jakub Dawidek if (vm_map_unlock_and_wait(map, 0)) { 294612d7cc84SAlan Cox /* 294712d7cc84SAlan Cox * Allow interruption of user wiring? 294812d7cc84SAlan Cox */ 294912d7cc84SAlan Cox } 295012d7cc84SAlan Cox vm_map_lock(map); 295112d7cc84SAlan Cox if (last_timestamp + 1 != map->timestamp) { 295212d7cc84SAlan Cox /* 295312d7cc84SAlan Cox * Look again for the entry because the map was 295412d7cc84SAlan Cox * modified while it was unlocked. 295512d7cc84SAlan Cox * Specifically, the entry may have been 295612d7cc84SAlan Cox * clipped, merged, or deleted. 295712d7cc84SAlan Cox */ 295812d7cc84SAlan Cox if (!vm_map_lookup_entry(map, saved_start, 295912d7cc84SAlan Cox &tmp_entry)) { 2960cbef13d8SAlan Cox if (flags & VM_MAP_WIRE_HOLESOK) 2961cbef13d8SAlan Cox tmp_entry = tmp_entry->next; 2962cbef13d8SAlan Cox else { 296312d7cc84SAlan Cox if (saved_start == start) { 296412d7cc84SAlan Cox /* 296512d7cc84SAlan Cox * first_entry has been deleted. 296612d7cc84SAlan Cox */ 296712d7cc84SAlan Cox vm_map_unlock(map); 296812d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 296912d7cc84SAlan Cox } 297012d7cc84SAlan Cox end = saved_start; 297112d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 297212d7cc84SAlan Cox goto done; 297312d7cc84SAlan Cox } 2974cbef13d8SAlan Cox } 297512d7cc84SAlan Cox if (entry == first_entry) 297612d7cc84SAlan Cox first_entry = tmp_entry; 297712d7cc84SAlan Cox else 297812d7cc84SAlan Cox first_entry = NULL; 297912d7cc84SAlan Cox entry = tmp_entry; 298012d7cc84SAlan Cox } 298112d7cc84SAlan Cox last_timestamp = map->timestamp; 298212d7cc84SAlan Cox continue; 298312d7cc84SAlan Cox } 298412d7cc84SAlan Cox vm_map_clip_start(map, entry, start); 298512d7cc84SAlan Cox vm_map_clip_end(map, entry, end); 298612d7cc84SAlan Cox /* 298712d7cc84SAlan Cox * Mark the entry in case the map lock is released. (See 298812d7cc84SAlan Cox * above.) 298912d7cc84SAlan Cox */ 2990ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2991ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 2992ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 299312d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 29940acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 2995e4cd31ddSJeff Roberson if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 2996e4cd31ddSJeff Roberson || (entry->protection & prot) != prot) { 2997529ab57bSKonstantin Belousov entry->eflags |= MAP_ENTRY_WIRE_SKIPPED; 29986d7e8091SKonstantin Belousov if ((flags & VM_MAP_WIRE_HOLESOK) == 0) { 29996d7e8091SKonstantin Belousov end = entry->end; 30006d7e8091SKonstantin Belousov rv = KERN_INVALID_ADDRESS; 30016d7e8091SKonstantin Belousov goto done; 30026d7e8091SKonstantin Belousov } 30036d7e8091SKonstantin Belousov goto next_entry; 30046d7e8091SKonstantin Belousov } 3005e4cd31ddSJeff Roberson if (entry->wired_count == 0) { 30060ada205eSBrian Feldman entry->wired_count++; 300712d7cc84SAlan Cox saved_start = entry->start; 300812d7cc84SAlan Cox saved_end = entry->end; 300966cd575bSAlan Cox 301012d7cc84SAlan Cox /* 301112d7cc84SAlan Cox * Release the map lock, relying on the in-transition 3012a5db445dSMax Laier * mark. Mark the map busy for fork. 301312d7cc84SAlan Cox */ 3014a5db445dSMax Laier vm_map_busy(map); 301512d7cc84SAlan Cox vm_map_unlock(map); 301666cd575bSAlan Cox 30170b695684SAlan Cox faddr = saved_start; 30180b695684SAlan Cox do { 301966cd575bSAlan Cox /* 302066cd575bSAlan Cox * Simulate a fault to get the page and enter 302166cd575bSAlan Cox * it into the physical map. 302266cd575bSAlan Cox */ 302366cd575bSAlan Cox if ((rv = vm_fault(map, faddr, VM_PROT_NONE, 30246a875bf9SKonstantin Belousov VM_FAULT_WIRE)) != KERN_SUCCESS) 302566cd575bSAlan Cox break; 30260b695684SAlan Cox } while ((faddr += PAGE_SIZE) < saved_end); 302712d7cc84SAlan Cox vm_map_lock(map); 3028a5db445dSMax Laier vm_map_unbusy(map); 302912d7cc84SAlan Cox if (last_timestamp + 1 != map->timestamp) { 303012d7cc84SAlan Cox /* 303112d7cc84SAlan Cox * Look again for the entry because the map was 303212d7cc84SAlan Cox * modified while it was unlocked. The entry 303312d7cc84SAlan Cox * may have been clipped, but NOT merged or 303412d7cc84SAlan Cox * deleted. 303512d7cc84SAlan Cox */ 303612d7cc84SAlan Cox result = vm_map_lookup_entry(map, saved_start, 303712d7cc84SAlan Cox &tmp_entry); 303812d7cc84SAlan Cox KASSERT(result, ("vm_map_wire: lookup failed")); 303912d7cc84SAlan Cox if (entry == first_entry) 304012d7cc84SAlan Cox first_entry = tmp_entry; 304112d7cc84SAlan Cox else 304212d7cc84SAlan Cox first_entry = NULL; 304312d7cc84SAlan Cox entry = tmp_entry; 304428c58286SAlan Cox while (entry->end < saved_end) { 304566cd575bSAlan Cox /* 304666cd575bSAlan Cox * In case of failure, handle entries 304766cd575bSAlan Cox * that were not fully wired here; 304866cd575bSAlan Cox * fully wired entries are handled 304966cd575bSAlan Cox * later. 305066cd575bSAlan Cox */ 305166cd575bSAlan Cox if (rv != KERN_SUCCESS && 305266cd575bSAlan Cox faddr < entry->end) 305366cd575bSAlan Cox vm_map_wire_entry_failure(map, 305466cd575bSAlan Cox entry, faddr); 305512d7cc84SAlan Cox entry = entry->next; 305612d7cc84SAlan Cox } 305728c58286SAlan Cox } 305812d7cc84SAlan Cox last_timestamp = map->timestamp; 305912d7cc84SAlan Cox if (rv != KERN_SUCCESS) { 306066cd575bSAlan Cox vm_map_wire_entry_failure(map, entry, faddr); 306112d7cc84SAlan Cox end = entry->end; 306212d7cc84SAlan Cox goto done; 306312d7cc84SAlan Cox } 30640ada205eSBrian Feldman } else if (!user_wire || 30650ada205eSBrian Feldman (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 30660ada205eSBrian Feldman entry->wired_count++; 306712d7cc84SAlan Cox } 306812d7cc84SAlan Cox /* 306912d7cc84SAlan Cox * Check the map for holes in the specified region. 3070abd498aaSBruce M Simpson * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 307112d7cc84SAlan Cox */ 30726d7e8091SKonstantin Belousov next_entry: 3073f141ed73SKonstantin Belousov if ((flags & VM_MAP_WIRE_HOLESOK) == 0 && 30741c5196c3SKonstantin Belousov entry->end < end && entry->next->start > entry->end) { 307512d7cc84SAlan Cox end = entry->end; 307612d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 307712d7cc84SAlan Cox goto done; 307812d7cc84SAlan Cox } 307912d7cc84SAlan Cox entry = entry->next; 308012d7cc84SAlan Cox } 308112d7cc84SAlan Cox rv = KERN_SUCCESS; 308212d7cc84SAlan Cox done: 308312d7cc84SAlan Cox need_wakeup = FALSE; 308412d7cc84SAlan Cox if (first_entry == NULL) { 308512d7cc84SAlan Cox result = vm_map_lookup_entry(map, start, &first_entry); 3086cbef13d8SAlan Cox if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 3087cbef13d8SAlan Cox first_entry = first_entry->next; 3088cbef13d8SAlan Cox else 308912d7cc84SAlan Cox KASSERT(result, ("vm_map_wire: lookup failed")); 309012d7cc84SAlan Cox } 30911c5196c3SKonstantin Belousov for (entry = first_entry; entry->start < end; entry = entry->next) { 30920acea7dfSKonstantin Belousov /* 30930acea7dfSKonstantin Belousov * If VM_MAP_WIRE_HOLESOK was specified, an empty 30940acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 30950acea7dfSKonstantin Belousov * while the map lock was dropped for faulting in the 30960acea7dfSKonstantin Belousov * pages or draining MAP_ENTRY_IN_TRANSITION. 30970acea7dfSKonstantin Belousov * Moreover, another thread could be simultaneously 30980acea7dfSKonstantin Belousov * wiring this new mapping entry. Detect these cases 3099546bb2d7SKonstantin Belousov * and skip any entries marked as in transition not by us. 31000acea7dfSKonstantin Belousov */ 31010acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 31020acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 31030acea7dfSKonstantin Belousov KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 31040acea7dfSKonstantin Belousov ("vm_map_wire: !HOLESOK and new/changed entry")); 31050acea7dfSKonstantin Belousov continue; 31060acea7dfSKonstantin Belousov } 31070acea7dfSKonstantin Belousov 3108546bb2d7SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) 3109546bb2d7SKonstantin Belousov goto next_entry_done; 3110546bb2d7SKonstantin Belousov 311112d7cc84SAlan Cox if (rv == KERN_SUCCESS) { 311212d7cc84SAlan Cox if (user_wire) 311312d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_USER_WIRED; 311428c58286SAlan Cox } else if (entry->wired_count == -1) { 311528c58286SAlan Cox /* 311628c58286SAlan Cox * Wiring failed on this entry. Thus, unwiring is 311728c58286SAlan Cox * unnecessary. 311828c58286SAlan Cox */ 311928c58286SAlan Cox entry->wired_count = 0; 312003462509SAlan Cox } else if (!user_wire || 312103462509SAlan Cox (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 312266cd575bSAlan Cox /* 312366cd575bSAlan Cox * Undo the wiring. Wiring succeeded on this entry 312466cd575bSAlan Cox * but failed on a later entry. 312566cd575bSAlan Cox */ 312603462509SAlan Cox if (entry->wired_count == 1) 312703462509SAlan Cox vm_map_entry_unwire(map, entry); 312803462509SAlan Cox else 312912d7cc84SAlan Cox entry->wired_count--; 313012d7cc84SAlan Cox } 31316d7e8091SKonstantin Belousov next_entry_done: 31320acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 31330acea7dfSKonstantin Belousov ("vm_map_wire: in-transition flag missing %p", entry)); 31340acea7dfSKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 31350acea7dfSKonstantin Belousov ("vm_map_wire: alien wire %p", entry)); 31360acea7dfSKonstantin Belousov entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION | 31370acea7dfSKonstantin Belousov MAP_ENTRY_WIRE_SKIPPED); 31380acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 313912d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 314012d7cc84SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 314112d7cc84SAlan Cox need_wakeup = TRUE; 314212d7cc84SAlan Cox } 314312d7cc84SAlan Cox vm_map_simplify_entry(map, entry); 314412d7cc84SAlan Cox } 314512d7cc84SAlan Cox vm_map_unlock(map); 314612d7cc84SAlan Cox if (need_wakeup) 314712d7cc84SAlan Cox vm_map_wakeup(map); 314812d7cc84SAlan Cox return (rv); 3149e27e17b7SAlan Cox } 3150e27e17b7SAlan Cox 3151e27e17b7SAlan Cox /* 3152950f8459SAlan Cox * vm_map_sync 3153df8bae1dSRodney W. Grimes * 3154df8bae1dSRodney W. Grimes * Push any dirty cached pages in the address range to their pager. 3155df8bae1dSRodney W. Grimes * If syncio is TRUE, dirty pages are written synchronously. 3156df8bae1dSRodney W. Grimes * If invalidate is TRUE, any cached pages are freed as well. 3157df8bae1dSRodney W. Grimes * 3158637315edSAlan Cox * If the size of the region from start to end is zero, we are 3159637315edSAlan Cox * supposed to flush all modified pages within the region containing 3160637315edSAlan Cox * start. Unfortunately, a region can be split or coalesced with 3161637315edSAlan Cox * neighboring regions, making it difficult to determine what the 3162637315edSAlan Cox * original region was. Therefore, we approximate this requirement by 3163637315edSAlan Cox * flushing the current region containing start. 3164637315edSAlan Cox * 3165df8bae1dSRodney W. Grimes * Returns an error if any part of the specified range is not mapped. 3166df8bae1dSRodney W. Grimes */ 3167df8bae1dSRodney W. Grimes int 3168950f8459SAlan Cox vm_map_sync( 31691b40f8c0SMatthew Dillon vm_map_t map, 31701b40f8c0SMatthew Dillon vm_offset_t start, 31711b40f8c0SMatthew Dillon vm_offset_t end, 31721b40f8c0SMatthew Dillon boolean_t syncio, 31731b40f8c0SMatthew Dillon boolean_t invalidate) 3174df8bae1dSRodney W. Grimes { 3175c0877f10SJohn Dyson vm_map_entry_t current; 3176df8bae1dSRodney W. Grimes vm_map_entry_t entry; 3177df8bae1dSRodney W. Grimes vm_size_t size; 3178df8bae1dSRodney W. Grimes vm_object_t object; 3179a316d390SJohn Dyson vm_ooffset_t offset; 3180e53fa61bSKonstantin Belousov unsigned int last_timestamp; 3181126d6082SKonstantin Belousov boolean_t failed; 3182df8bae1dSRodney W. Grimes 3183df8bae1dSRodney W. Grimes vm_map_lock_read(map); 3184df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 3185df8bae1dSRodney W. Grimes if (!vm_map_lookup_entry(map, start, &entry)) { 3186df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3187df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 3188637315edSAlan Cox } else if (start == end) { 3189637315edSAlan Cox start = entry->start; 3190637315edSAlan Cox end = entry->end; 3191df8bae1dSRodney W. Grimes } 3192df8bae1dSRodney W. Grimes /* 3193b7b7cd44SAlan Cox * Make a first pass to check for user-wired memory and holes. 3194df8bae1dSRodney W. Grimes */ 31951c5196c3SKonstantin Belousov for (current = entry; current->start < end; current = current->next) { 3196b7b7cd44SAlan Cox if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) { 3197df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3198df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 3199df8bae1dSRodney W. Grimes } 3200df8bae1dSRodney W. Grimes if (end > current->end && 32011c5196c3SKonstantin Belousov current->end != current->next->start) { 3202df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3203df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 3204df8bae1dSRodney W. Grimes } 3205df8bae1dSRodney W. Grimes } 3206df8bae1dSRodney W. Grimes 32072cf13952SAlan Cox if (invalidate) 3208bc105a67SAlan Cox pmap_remove(map->pmap, start, end); 3209126d6082SKonstantin Belousov failed = FALSE; 32102cf13952SAlan Cox 3211df8bae1dSRodney W. Grimes /* 3212df8bae1dSRodney W. Grimes * Make a second pass, cleaning/uncaching pages from the indicated 3213df8bae1dSRodney W. Grimes * objects as we go. 3214df8bae1dSRodney W. Grimes */ 32151c5196c3SKonstantin Belousov for (current = entry; current->start < end;) { 3216df8bae1dSRodney W. Grimes offset = current->offset + (start - current->start); 3217df8bae1dSRodney W. Grimes size = (end <= current->end ? end : current->end) - start; 32189fdfe602SMatthew Dillon if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 3219c0877f10SJohn Dyson vm_map_t smap; 3220df8bae1dSRodney W. Grimes vm_map_entry_t tentry; 3221df8bae1dSRodney W. Grimes vm_size_t tsize; 3222df8bae1dSRodney W. Grimes 32239fdfe602SMatthew Dillon smap = current->object.sub_map; 3224df8bae1dSRodney W. Grimes vm_map_lock_read(smap); 3225df8bae1dSRodney W. Grimes (void) vm_map_lookup_entry(smap, offset, &tentry); 3226df8bae1dSRodney W. Grimes tsize = tentry->end - offset; 3227df8bae1dSRodney W. Grimes if (tsize < size) 3228df8bae1dSRodney W. Grimes size = tsize; 3229df8bae1dSRodney W. Grimes object = tentry->object.vm_object; 3230df8bae1dSRodney W. Grimes offset = tentry->offset + (offset - tentry->start); 3231df8bae1dSRodney W. Grimes vm_map_unlock_read(smap); 3232df8bae1dSRodney W. Grimes } else { 3233df8bae1dSRodney W. Grimes object = current->object.vm_object; 3234df8bae1dSRodney W. Grimes } 3235e53fa61bSKonstantin Belousov vm_object_reference(object); 3236e53fa61bSKonstantin Belousov last_timestamp = map->timestamp; 3237e53fa61bSKonstantin Belousov vm_map_unlock_read(map); 3238126d6082SKonstantin Belousov if (!vm_object_sync(object, offset, size, syncio, invalidate)) 3239126d6082SKonstantin Belousov failed = TRUE; 3240df8bae1dSRodney W. Grimes start += size; 3241e53fa61bSKonstantin Belousov vm_object_deallocate(object); 3242e53fa61bSKonstantin Belousov vm_map_lock_read(map); 3243e53fa61bSKonstantin Belousov if (last_timestamp == map->timestamp || 3244e53fa61bSKonstantin Belousov !vm_map_lookup_entry(map, start, ¤t)) 3245e53fa61bSKonstantin Belousov current = current->next; 3246df8bae1dSRodney W. Grimes } 3247df8bae1dSRodney W. Grimes 3248df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3249126d6082SKonstantin Belousov return (failed ? KERN_FAILURE : KERN_SUCCESS); 3250df8bae1dSRodney W. Grimes } 3251df8bae1dSRodney W. Grimes 3252df8bae1dSRodney W. Grimes /* 3253df8bae1dSRodney W. Grimes * vm_map_entry_unwire: [ internal use only ] 3254df8bae1dSRodney W. Grimes * 3255df8bae1dSRodney W. Grimes * Make the region specified by this entry pageable. 3256df8bae1dSRodney W. Grimes * 3257df8bae1dSRodney W. Grimes * The map in question should be locked. 3258df8bae1dSRodney W. Grimes * [This is the reason for this routine's existence.] 3259df8bae1dSRodney W. Grimes */ 32600362d7d7SJohn Dyson static void 32611b40f8c0SMatthew Dillon vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 3262df8bae1dSRodney W. Grimes { 326303462509SAlan Cox 326403462509SAlan Cox VM_MAP_ASSERT_LOCKED(map); 326503462509SAlan Cox KASSERT(entry->wired_count > 0, 326603462509SAlan Cox ("vm_map_entry_unwire: entry %p isn't wired", entry)); 326703462509SAlan Cox pmap_unwire(map->pmap, entry->start, entry->end); 326803462509SAlan Cox vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - 326903462509SAlan Cox entry->start, PQ_ACTIVE); 3270df8bae1dSRodney W. Grimes entry->wired_count = 0; 3271df8bae1dSRodney W. Grimes } 3272df8bae1dSRodney W. Grimes 32730b367bd8SKonstantin Belousov static void 32740b367bd8SKonstantin Belousov vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map) 32750b367bd8SKonstantin Belousov { 32760b367bd8SKonstantin Belousov 32770b367bd8SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) 32780b367bd8SKonstantin Belousov vm_object_deallocate(entry->object.vm_object); 32790b367bd8SKonstantin Belousov uma_zfree(system_map ? kmapentzone : mapentzone, entry); 32800b367bd8SKonstantin Belousov } 32810b367bd8SKonstantin Belousov 3282df8bae1dSRodney W. Grimes /* 3283df8bae1dSRodney W. Grimes * vm_map_entry_delete: [ internal use only ] 3284df8bae1dSRodney W. Grimes * 3285df8bae1dSRodney W. Grimes * Deallocate the given entry from the target map. 3286df8bae1dSRodney W. Grimes */ 32870362d7d7SJohn Dyson static void 32881b40f8c0SMatthew Dillon vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 3289df8bae1dSRodney W. Grimes { 329032a89c32SAlan Cox vm_object_t object; 32913364c323SKonstantin Belousov vm_pindex_t offidxstart, offidxend, count, size1; 3292d1780e8dSKonstantin Belousov vm_size_t size; 329332a89c32SAlan Cox 3294*9f701172SKonstantin Belousov vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE); 32953364c323SKonstantin Belousov object = entry->object.vm_object; 329619bd0d9cSKonstantin Belousov 329719bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) != 0) { 329819bd0d9cSKonstantin Belousov MPASS(entry->cred == NULL); 329919bd0d9cSKonstantin Belousov MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0); 330019bd0d9cSKonstantin Belousov MPASS(object == NULL); 330119bd0d9cSKonstantin Belousov vm_map_entry_deallocate(entry, map->system_map); 330219bd0d9cSKonstantin Belousov return; 330319bd0d9cSKonstantin Belousov } 330419bd0d9cSKonstantin Belousov 33053364c323SKonstantin Belousov size = entry->end - entry->start; 33063364c323SKonstantin Belousov map->size -= size; 33073364c323SKonstantin Belousov 3308ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 3309ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 3310ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 33113364c323SKonstantin Belousov } 3312df8bae1dSRodney W. Grimes 331332a89c32SAlan Cox if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 33143364c323SKonstantin Belousov (object != NULL)) { 3315ef694c1aSEdward Tomasz Napierala KASSERT(entry->cred == NULL || object->cred == NULL || 33163364c323SKonstantin Belousov (entry->eflags & MAP_ENTRY_NEEDS_COPY), 3317ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry)); 3318d1780e8dSKonstantin Belousov count = atop(size); 331932a89c32SAlan Cox offidxstart = OFF_TO_IDX(entry->offset); 332032a89c32SAlan Cox offidxend = offidxstart + count; 332189f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 33229a4ee196SKonstantin Belousov if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT | 33239a4ee196SKonstantin Belousov OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || 33242e47807cSJeff Roberson object == kernel_object)) { 332532a89c32SAlan Cox vm_object_collapse(object); 33266bbee8e2SAlan Cox 33276bbee8e2SAlan Cox /* 33286bbee8e2SAlan Cox * The option OBJPR_NOTMAPPED can be passed here 33296bbee8e2SAlan Cox * because vm_map_delete() already performed 33306bbee8e2SAlan Cox * pmap_remove() on the only mapping to this range 33316bbee8e2SAlan Cox * of pages. 33326bbee8e2SAlan Cox */ 33336bbee8e2SAlan Cox vm_object_page_remove(object, offidxstart, offidxend, 33346bbee8e2SAlan Cox OBJPR_NOTMAPPED); 333532a89c32SAlan Cox if (object->type == OBJT_SWAP) 33369a4ee196SKonstantin Belousov swap_pager_freespace(object, offidxstart, 33379a4ee196SKonstantin Belousov count); 333832a89c32SAlan Cox if (offidxend >= object->size && 33393364c323SKonstantin Belousov offidxstart < object->size) { 33403364c323SKonstantin Belousov size1 = object->size; 334132a89c32SAlan Cox object->size = offidxstart; 3342ef694c1aSEdward Tomasz Napierala if (object->cred != NULL) { 33433364c323SKonstantin Belousov size1 -= object->size; 33443364c323SKonstantin Belousov KASSERT(object->charge >= ptoa(size1), 33459a4ee196SKonstantin Belousov ("object %p charge < 0", object)); 33469a4ee196SKonstantin Belousov swap_release_by_cred(ptoa(size1), 33479a4ee196SKonstantin Belousov object->cred); 33483364c323SKonstantin Belousov object->charge -= ptoa(size1); 33493364c323SKonstantin Belousov } 33503364c323SKonstantin Belousov } 335132a89c32SAlan Cox } 335289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 3353897d81a0SKonstantin Belousov } else 3354897d81a0SKonstantin Belousov entry->object.vm_object = NULL; 33550b367bd8SKonstantin Belousov if (map->system_map) 33560b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, TRUE); 33570b367bd8SKonstantin Belousov else { 33580b367bd8SKonstantin Belousov entry->next = curthread->td_map_def_user; 33590b367bd8SKonstantin Belousov curthread->td_map_def_user = entry; 33600b367bd8SKonstantin Belousov } 3361df8bae1dSRodney W. Grimes } 3362df8bae1dSRodney W. Grimes 3363df8bae1dSRodney W. Grimes /* 3364df8bae1dSRodney W. Grimes * vm_map_delete: [ internal use only ] 3365df8bae1dSRodney W. Grimes * 3366df8bae1dSRodney W. Grimes * Deallocates the given address range from the target 3367df8bae1dSRodney W. Grimes * map. 3368df8bae1dSRodney W. Grimes */ 3369df8bae1dSRodney W. Grimes int 3370655c3490SKonstantin Belousov vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 3371df8bae1dSRodney W. Grimes { 3372c0877f10SJohn Dyson vm_map_entry_t entry; 3373df8bae1dSRodney W. Grimes vm_map_entry_t first_entry; 3374df8bae1dSRodney W. Grimes 33753a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 337679e9451fSKonstantin Belousov if (start == end) 337779e9451fSKonstantin Belousov return (KERN_SUCCESS); 33783a0916b8SKonstantin Belousov 3379df8bae1dSRodney W. Grimes /* 3380df8bae1dSRodney W. Grimes * Find the start of the region, and clip it 3381df8bae1dSRodney W. Grimes */ 3382876318ecSAlan Cox if (!vm_map_lookup_entry(map, start, &first_entry)) 3383df8bae1dSRodney W. Grimes entry = first_entry->next; 3384876318ecSAlan Cox else { 3385df8bae1dSRodney W. Grimes entry = first_entry; 3386df8bae1dSRodney W. Grimes vm_map_clip_start(map, entry, start); 3387df8bae1dSRodney W. Grimes } 3388df8bae1dSRodney W. Grimes 3389df8bae1dSRodney W. Grimes /* 3390df8bae1dSRodney W. Grimes * Step through all entries in this region 3391df8bae1dSRodney W. Grimes */ 33921c5196c3SKonstantin Belousov while (entry->start < end) { 3393df8bae1dSRodney W. Grimes vm_map_entry_t next; 3394df8bae1dSRodney W. Grimes 339573b2baceSAlan Cox /* 339673b2baceSAlan Cox * Wait for wiring or unwiring of an entry to complete. 33977c938963SBrian Feldman * Also wait for any system wirings to disappear on 33987c938963SBrian Feldman * user maps. 339973b2baceSAlan Cox */ 34007c938963SBrian Feldman if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || 34017c938963SBrian Feldman (vm_map_pmap(map) != kernel_pmap && 34027c938963SBrian Feldman vm_map_entry_system_wired_count(entry) != 0)) { 340373b2baceSAlan Cox unsigned int last_timestamp; 340473b2baceSAlan Cox vm_offset_t saved_start; 340573b2baceSAlan Cox vm_map_entry_t tmp_entry; 340673b2baceSAlan Cox 340773b2baceSAlan Cox saved_start = entry->start; 340873b2baceSAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 340973b2baceSAlan Cox last_timestamp = map->timestamp; 34108ce2d00aSPawel Jakub Dawidek (void) vm_map_unlock_and_wait(map, 0); 341173b2baceSAlan Cox vm_map_lock(map); 341273b2baceSAlan Cox if (last_timestamp + 1 != map->timestamp) { 341373b2baceSAlan Cox /* 341473b2baceSAlan Cox * Look again for the entry because the map was 341573b2baceSAlan Cox * modified while it was unlocked. 341673b2baceSAlan Cox * Specifically, the entry may have been 341773b2baceSAlan Cox * clipped, merged, or deleted. 341873b2baceSAlan Cox */ 341973b2baceSAlan Cox if (!vm_map_lookup_entry(map, saved_start, 342073b2baceSAlan Cox &tmp_entry)) 342173b2baceSAlan Cox entry = tmp_entry->next; 342273b2baceSAlan Cox else { 342373b2baceSAlan Cox entry = tmp_entry; 342473b2baceSAlan Cox vm_map_clip_start(map, entry, 342573b2baceSAlan Cox saved_start); 342673b2baceSAlan Cox } 342773b2baceSAlan Cox } 342873b2baceSAlan Cox continue; 342973b2baceSAlan Cox } 3430df8bae1dSRodney W. Grimes vm_map_clip_end(map, entry, end); 3431df8bae1dSRodney W. Grimes 3432c0877f10SJohn Dyson next = entry->next; 3433df8bae1dSRodney W. Grimes 3434df8bae1dSRodney W. Grimes /* 34350d94caffSDavid Greenman * Unwire before removing addresses from the pmap; otherwise, 34360d94caffSDavid Greenman * unwiring will put the entries back in the pmap. 3437df8bae1dSRodney W. Grimes */ 3438be7be412SKonstantin Belousov if (entry->wired_count != 0) 3439df8bae1dSRodney W. Grimes vm_map_entry_unwire(map, entry); 3440df8bae1dSRodney W. Grimes 344132f0fefcSKonstantin Belousov /* 344232f0fefcSKonstantin Belousov * Remove mappings for the pages, but only if the 344332f0fefcSKonstantin Belousov * mappings could exist. For instance, it does not 344432f0fefcSKonstantin Belousov * make sense to call pmap_remove() for guard entries. 344532f0fefcSKonstantin Belousov */ 344632f0fefcSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || 344732f0fefcSKonstantin Belousov entry->object.vm_object != NULL) 344832a89c32SAlan Cox pmap_remove(map->pmap, entry->start, entry->end); 3449df8bae1dSRodney W. Grimes 3450fa50a355SKonstantin Belousov if (entry->end == map->anon_loc) 3451fa50a355SKonstantin Belousov map->anon_loc = entry->start; 3452fa50a355SKonstantin Belousov 3453df8bae1dSRodney W. Grimes /* 3454e608cc3cSKonstantin Belousov * Delete the entry only after removing all pmap 3455e608cc3cSKonstantin Belousov * entries pointing to its pages. (Otherwise, its 3456e608cc3cSKonstantin Belousov * page frames may be reallocated, and any modify bits 3457e608cc3cSKonstantin Belousov * will be set in the wrong object!) 3458df8bae1dSRodney W. Grimes */ 3459df8bae1dSRodney W. Grimes vm_map_entry_delete(map, entry); 3460df8bae1dSRodney W. Grimes entry = next; 3461df8bae1dSRodney W. Grimes } 3462df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 3463df8bae1dSRodney W. Grimes } 3464df8bae1dSRodney W. Grimes 3465df8bae1dSRodney W. Grimes /* 3466df8bae1dSRodney W. Grimes * vm_map_remove: 3467df8bae1dSRodney W. Grimes * 3468df8bae1dSRodney W. Grimes * Remove the given address range from the target map. 3469df8bae1dSRodney W. Grimes * This is the exported form of vm_map_delete. 3470df8bae1dSRodney W. Grimes */ 3471df8bae1dSRodney W. Grimes int 34721b40f8c0SMatthew Dillon vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 3473df8bae1dSRodney W. Grimes { 34746eaee3feSAlan Cox int result; 3475df8bae1dSRodney W. Grimes 3476df8bae1dSRodney W. Grimes vm_map_lock(map); 3477df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 3478655c3490SKonstantin Belousov result = vm_map_delete(map, start, end); 3479df8bae1dSRodney W. Grimes vm_map_unlock(map); 3480df8bae1dSRodney W. Grimes return (result); 3481df8bae1dSRodney W. Grimes } 3482df8bae1dSRodney W. Grimes 3483df8bae1dSRodney W. Grimes /* 3484df8bae1dSRodney W. Grimes * vm_map_check_protection: 3485df8bae1dSRodney W. Grimes * 34862d5c7e45SMatthew Dillon * Assert that the target map allows the specified privilege on the 34872d5c7e45SMatthew Dillon * entire address region given. The entire region must be allocated. 34882d5c7e45SMatthew Dillon * 34892d5c7e45SMatthew Dillon * WARNING! This code does not and should not check whether the 34902d5c7e45SMatthew Dillon * contents of the region is accessible. For example a smaller file 34912d5c7e45SMatthew Dillon * might be mapped into a larger address space. 34922d5c7e45SMatthew Dillon * 34932d5c7e45SMatthew Dillon * NOTE! This code is also called by munmap(). 3494d8834602SAlan Cox * 3495d8834602SAlan Cox * The map must be locked. A read lock is sufficient. 3496df8bae1dSRodney W. Grimes */ 34970d94caffSDavid Greenman boolean_t 3498b9dcd593SBruce Evans vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 3499b9dcd593SBruce Evans vm_prot_t protection) 3500df8bae1dSRodney W. Grimes { 3501c0877f10SJohn Dyson vm_map_entry_t entry; 3502df8bae1dSRodney W. Grimes vm_map_entry_t tmp_entry; 3503df8bae1dSRodney W. Grimes 3504d8834602SAlan Cox if (!vm_map_lookup_entry(map, start, &tmp_entry)) 3505df8bae1dSRodney W. Grimes return (FALSE); 3506df8bae1dSRodney W. Grimes entry = tmp_entry; 3507df8bae1dSRodney W. Grimes 3508df8bae1dSRodney W. Grimes while (start < end) { 3509df8bae1dSRodney W. Grimes /* 3510df8bae1dSRodney W. Grimes * No holes allowed! 3511df8bae1dSRodney W. Grimes */ 3512d8834602SAlan Cox if (start < entry->start) 3513df8bae1dSRodney W. Grimes return (FALSE); 3514df8bae1dSRodney W. Grimes /* 3515df8bae1dSRodney W. Grimes * Check protection associated with entry. 3516df8bae1dSRodney W. Grimes */ 3517d8834602SAlan Cox if ((entry->protection & protection) != protection) 3518df8bae1dSRodney W. Grimes return (FALSE); 3519df8bae1dSRodney W. Grimes /* go to next entry */ 3520df8bae1dSRodney W. Grimes start = entry->end; 3521df8bae1dSRodney W. Grimes entry = entry->next; 3522df8bae1dSRodney W. Grimes } 3523df8bae1dSRodney W. Grimes return (TRUE); 3524df8bae1dSRodney W. Grimes } 3525df8bae1dSRodney W. Grimes 352686524867SJohn Dyson /* 3527df8bae1dSRodney W. Grimes * vm_map_copy_entry: 3528df8bae1dSRodney W. Grimes * 3529df8bae1dSRodney W. Grimes * Copies the contents of the source entry to the destination 3530df8bae1dSRodney W. Grimes * entry. The entries *must* be aligned properly. 3531df8bae1dSRodney W. Grimes */ 3532f708ef1bSPoul-Henning Kamp static void 35331b40f8c0SMatthew Dillon vm_map_copy_entry( 35341b40f8c0SMatthew Dillon vm_map_t src_map, 35351b40f8c0SMatthew Dillon vm_map_t dst_map, 35361b40f8c0SMatthew Dillon vm_map_entry_t src_entry, 35373364c323SKonstantin Belousov vm_map_entry_t dst_entry, 35383364c323SKonstantin Belousov vm_ooffset_t *fork_charge) 3539df8bae1dSRodney W. Grimes { 3540c0877f10SJohn Dyson vm_object_t src_object; 354184110e7eSKonstantin Belousov vm_map_entry_t fake_entry; 35423364c323SKonstantin Belousov vm_offset_t size; 3543ef694c1aSEdward Tomasz Napierala struct ucred *cred; 35443364c323SKonstantin Belousov int charged; 3545c0877f10SJohn Dyson 35463a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(dst_map); 35473a0916b8SKonstantin Belousov 35489fdfe602SMatthew Dillon if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 3549df8bae1dSRodney W. Grimes return; 3550df8bae1dSRodney W. Grimes 3551afaa41f6SAlan Cox if (src_entry->wired_count == 0 || 3552afaa41f6SAlan Cox (src_entry->protection & VM_PROT_WRITE) == 0) { 3553df8bae1dSRodney W. Grimes /* 35540d94caffSDavid Greenman * If the source entry is marked needs_copy, it is already 35550d94caffSDavid Greenman * write-protected. 3556df8bae1dSRodney W. Grimes */ 3557d9a9209aSAlan Cox if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 && 3558d9a9209aSAlan Cox (src_entry->protection & VM_PROT_WRITE) != 0) { 3559df8bae1dSRodney W. Grimes pmap_protect(src_map->pmap, 3560df8bae1dSRodney W. Grimes src_entry->start, 3561df8bae1dSRodney W. Grimes src_entry->end, 3562df8bae1dSRodney W. Grimes src_entry->protection & ~VM_PROT_WRITE); 3563df8bae1dSRodney W. Grimes } 3564b18bfc3dSJohn Dyson 3565df8bae1dSRodney W. Grimes /* 3566df8bae1dSRodney W. Grimes * Make a copy of the object. 3567df8bae1dSRodney W. Grimes */ 35683364c323SKonstantin Belousov size = src_entry->end - src_entry->start; 35698aef1712SMatthew Dillon if ((src_object = src_entry->object.vm_object) != NULL) { 357089f6b863SAttilio Rao VM_OBJECT_WLOCK(src_object); 35713364c323SKonstantin Belousov charged = ENTRY_CHARGED(src_entry); 35729a4ee196SKonstantin Belousov if (src_object->handle == NULL && 3573c0877f10SJohn Dyson (src_object->type == OBJT_DEFAULT || 3574c0877f10SJohn Dyson src_object->type == OBJT_SWAP)) { 3575c0877f10SJohn Dyson vm_object_collapse(src_object); 35769a4ee196SKonstantin Belousov if ((src_object->flags & (OBJ_NOSPLIT | 35779a4ee196SKonstantin Belousov OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 3578c5aaa06dSAlan Cox vm_object_split(src_entry); 35799a4ee196SKonstantin Belousov src_object = 35809a4ee196SKonstantin Belousov src_entry->object.vm_object; 3581a89c6258SAlan Cox } 3582a89c6258SAlan Cox } 3583b921a12bSAlan Cox vm_object_reference_locked(src_object); 3584069e9bc1SDoug Rabson vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 3585ef694c1aSEdward Tomasz Napierala if (src_entry->cred != NULL && 35863364c323SKonstantin Belousov !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 3587ef694c1aSEdward Tomasz Napierala KASSERT(src_object->cred == NULL, 3588ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT: vm_map_copy_entry: cred %p", 35893364c323SKonstantin Belousov src_object)); 3590ef694c1aSEdward Tomasz Napierala src_object->cred = src_entry->cred; 35913364c323SKonstantin Belousov src_object->charge = size; 35923364c323SKonstantin Belousov } 359389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(src_object); 3594c0877f10SJohn Dyson dst_entry->object.vm_object = src_object; 35953364c323SKonstantin Belousov if (charged) { 3596ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 3597ef694c1aSEdward Tomasz Napierala crhold(cred); 3598ef694c1aSEdward Tomasz Napierala dst_entry->cred = cred; 35993364c323SKonstantin Belousov *fork_charge += size; 36003364c323SKonstantin Belousov if (!(src_entry->eflags & 36013364c323SKonstantin Belousov MAP_ENTRY_NEEDS_COPY)) { 3602ef694c1aSEdward Tomasz Napierala crhold(cred); 3603ef694c1aSEdward Tomasz Napierala src_entry->cred = cred; 36043364c323SKonstantin Belousov *fork_charge += size; 36053364c323SKonstantin Belousov } 36063364c323SKonstantin Belousov } 36079a4ee196SKonstantin Belousov src_entry->eflags |= MAP_ENTRY_COW | 36089a4ee196SKonstantin Belousov MAP_ENTRY_NEEDS_COPY; 36099a4ee196SKonstantin Belousov dst_entry->eflags |= MAP_ENTRY_COW | 36109a4ee196SKonstantin Belousov MAP_ENTRY_NEEDS_COPY; 3611b18bfc3dSJohn Dyson dst_entry->offset = src_entry->offset; 361284110e7eSKonstantin Belousov if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 361384110e7eSKonstantin Belousov /* 361484110e7eSKonstantin Belousov * MAP_ENTRY_VN_WRITECNT cannot 361584110e7eSKonstantin Belousov * indicate write reference from 361684110e7eSKonstantin Belousov * src_entry, since the entry is 361784110e7eSKonstantin Belousov * marked as needs copy. Allocate a 361884110e7eSKonstantin Belousov * fake entry that is used to 361984110e7eSKonstantin Belousov * decrement object->un_pager.vnp.writecount 362084110e7eSKonstantin Belousov * at the appropriate time. Attach 362184110e7eSKonstantin Belousov * fake_entry to the deferred list. 362284110e7eSKonstantin Belousov */ 362384110e7eSKonstantin Belousov fake_entry = vm_map_entry_create(dst_map); 362484110e7eSKonstantin Belousov fake_entry->eflags = MAP_ENTRY_VN_WRITECNT; 362584110e7eSKonstantin Belousov src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; 362684110e7eSKonstantin Belousov vm_object_reference(src_object); 362784110e7eSKonstantin Belousov fake_entry->object.vm_object = src_object; 362884110e7eSKonstantin Belousov fake_entry->start = src_entry->start; 362984110e7eSKonstantin Belousov fake_entry->end = src_entry->end; 363084110e7eSKonstantin Belousov fake_entry->next = curthread->td_map_def_user; 363184110e7eSKonstantin Belousov curthread->td_map_def_user = fake_entry; 363284110e7eSKonstantin Belousov } 36330ec97ffcSKonstantin Belousov 36340ec97ffcSKonstantin Belousov pmap_copy(dst_map->pmap, src_map->pmap, 36350ec97ffcSKonstantin Belousov dst_entry->start, dst_entry->end - dst_entry->start, 36360ec97ffcSKonstantin Belousov src_entry->start); 3637b18bfc3dSJohn Dyson } else { 3638b18bfc3dSJohn Dyson dst_entry->object.vm_object = NULL; 3639b18bfc3dSJohn Dyson dst_entry->offset = 0; 3640ef694c1aSEdward Tomasz Napierala if (src_entry->cred != NULL) { 3641ef694c1aSEdward Tomasz Napierala dst_entry->cred = curthread->td_ucred; 3642ef694c1aSEdward Tomasz Napierala crhold(dst_entry->cred); 36433364c323SKonstantin Belousov *fork_charge += size; 36443364c323SKonstantin Belousov } 3645b18bfc3dSJohn Dyson } 36460d94caffSDavid Greenman } else { 3647df8bae1dSRodney W. Grimes /* 3648afaa41f6SAlan Cox * We don't want to make writeable wired pages copy-on-write. 3649afaa41f6SAlan Cox * Immediately copy these pages into the new map by simulating 3650afaa41f6SAlan Cox * page faults. The new pages are pageable. 3651df8bae1dSRodney W. Grimes */ 3652121fd461SKonstantin Belousov vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry, 3653121fd461SKonstantin Belousov fork_charge); 3654df8bae1dSRodney W. Grimes } 3655df8bae1dSRodney W. Grimes } 3656df8bae1dSRodney W. Grimes 3657df8bae1dSRodney W. Grimes /* 36582a7be1b6SBrian Feldman * vmspace_map_entry_forked: 36592a7be1b6SBrian Feldman * Update the newly-forked vmspace each time a map entry is inherited 36602a7be1b6SBrian Feldman * or copied. The values for vm_dsize and vm_tsize are approximate 36612a7be1b6SBrian Feldman * (and mostly-obsolete ideas in the face of mmap(2) et al.) 36622a7be1b6SBrian Feldman */ 36632a7be1b6SBrian Feldman static void 36642a7be1b6SBrian Feldman vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, 36652a7be1b6SBrian Feldman vm_map_entry_t entry) 36662a7be1b6SBrian Feldman { 36672a7be1b6SBrian Feldman vm_size_t entrysize; 36682a7be1b6SBrian Feldman vm_offset_t newend; 36692a7be1b6SBrian Feldman 367019bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) != 0) 367119bd0d9cSKonstantin Belousov return; 36722a7be1b6SBrian Feldman entrysize = entry->end - entry->start; 36732a7be1b6SBrian Feldman vm2->vm_map.size += entrysize; 36742a7be1b6SBrian Feldman if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) { 36752a7be1b6SBrian Feldman vm2->vm_ssize += btoc(entrysize); 36762a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_daddr && 36772a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) { 3678b351299cSAndrew Gallatin newend = MIN(entry->end, 36792a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)); 36802a7be1b6SBrian Feldman vm2->vm_dsize += btoc(newend - entry->start); 36812a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_taddr && 36822a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) { 3683b351299cSAndrew Gallatin newend = MIN(entry->end, 36842a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); 36852a7be1b6SBrian Feldman vm2->vm_tsize += btoc(newend - entry->start); 36862a7be1b6SBrian Feldman } 36872a7be1b6SBrian Feldman } 36882a7be1b6SBrian Feldman 36892a7be1b6SBrian Feldman /* 3690df8bae1dSRodney W. Grimes * vmspace_fork: 3691df8bae1dSRodney W. Grimes * Create a new process vmspace structure and vm_map 3692df8bae1dSRodney W. Grimes * based on those of an existing process. The new map 3693df8bae1dSRodney W. Grimes * is based on the old map, according to the inheritance 3694df8bae1dSRodney W. Grimes * values on the regions in that map. 3695df8bae1dSRodney W. Grimes * 36962a7be1b6SBrian Feldman * XXX It might be worth coalescing the entries added to the new vmspace. 36972a7be1b6SBrian Feldman * 3698df8bae1dSRodney W. Grimes * The source map must not be locked. 3699df8bae1dSRodney W. Grimes */ 3700df8bae1dSRodney W. Grimes struct vmspace * 37013364c323SKonstantin Belousov vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) 3702df8bae1dSRodney W. Grimes { 3703c0877f10SJohn Dyson struct vmspace *vm2; 370479e53838SAlan Cox vm_map_t new_map, old_map; 370579e53838SAlan Cox vm_map_entry_t new_entry, old_entry; 3706de5f6a77SJohn Dyson vm_object_t object; 3707e7a9df16SKonstantin Belousov int error, locked; 370819bd0d9cSKonstantin Belousov vm_inherit_t inh; 3709df8bae1dSRodney W. Grimes 371079e53838SAlan Cox old_map = &vm1->vm_map; 371179e53838SAlan Cox /* Copy immutable fields of vm1 to vm2. */ 37126e00f3a3SKonstantin Belousov vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), 37136e00f3a3SKonstantin Belousov pmap_pinit); 371489b57fcfSKonstantin Belousov if (vm2 == NULL) 371579e53838SAlan Cox return (NULL); 3716e7a9df16SKonstantin Belousov 37172a7be1b6SBrian Feldman vm2->vm_taddr = vm1->vm_taddr; 37182a7be1b6SBrian Feldman vm2->vm_daddr = vm1->vm_daddr; 37192a7be1b6SBrian Feldman vm2->vm_maxsaddr = vm1->vm_maxsaddr; 372079e53838SAlan Cox vm_map_lock(old_map); 372179e53838SAlan Cox if (old_map->busy) 372279e53838SAlan Cox vm_map_wait_busy(old_map); 372379e53838SAlan Cox new_map = &vm2->vm_map; 37241fac7d7fSKonstantin Belousov locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ 37251fac7d7fSKonstantin Belousov KASSERT(locked, ("vmspace_fork: lock failed")); 3726df8bae1dSRodney W. Grimes 3727e7a9df16SKonstantin Belousov error = pmap_vmspace_copy(new_map->pmap, old_map->pmap); 3728e7a9df16SKonstantin Belousov if (error != 0) { 3729e7a9df16SKonstantin Belousov sx_xunlock(&old_map->lock); 3730e7a9df16SKonstantin Belousov sx_xunlock(&new_map->lock); 3731e7a9df16SKonstantin Belousov vm_map_process_deferred(); 3732e7a9df16SKonstantin Belousov vmspace_free(vm2); 3733e7a9df16SKonstantin Belousov return (NULL); 3734e7a9df16SKonstantin Belousov } 3735e7a9df16SKonstantin Belousov 3736fa50a355SKonstantin Belousov new_map->anon_loc = old_map->anon_loc; 3737e7a9df16SKonstantin Belousov 3738df8bae1dSRodney W. Grimes old_entry = old_map->header.next; 3739df8bae1dSRodney W. Grimes 3740df8bae1dSRodney W. Grimes while (old_entry != &old_map->header) { 3741afa07f7eSJohn Dyson if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 3742df8bae1dSRodney W. Grimes panic("vm_map_fork: encountered a submap"); 3743df8bae1dSRodney W. Grimes 374419bd0d9cSKonstantin Belousov inh = old_entry->inheritance; 374519bd0d9cSKonstantin Belousov if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 && 374619bd0d9cSKonstantin Belousov inh != VM_INHERIT_NONE) 374719bd0d9cSKonstantin Belousov inh = VM_INHERIT_COPY; 374819bd0d9cSKonstantin Belousov 374919bd0d9cSKonstantin Belousov switch (inh) { 3750df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 3751df8bae1dSRodney W. Grimes break; 3752df8bae1dSRodney W. Grimes 3753df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 3754df8bae1dSRodney W. Grimes /* 3755fed9a903SJohn Dyson * Clone the entry, creating the shared object if necessary. 3756fed9a903SJohn Dyson */ 3757fed9a903SJohn Dyson object = old_entry->object.vm_object; 3758fed9a903SJohn Dyson if (object == NULL) { 3759fed9a903SJohn Dyson object = vm_object_allocate(OBJT_DEFAULT, 3760c2e11a03SJohn Dyson atop(old_entry->end - old_entry->start)); 3761fed9a903SJohn Dyson old_entry->object.vm_object = object; 376215d2d313SAlan Cox old_entry->offset = 0; 3763ef694c1aSEdward Tomasz Napierala if (old_entry->cred != NULL) { 3764ef694c1aSEdward Tomasz Napierala object->cred = old_entry->cred; 37653364c323SKonstantin Belousov object->charge = old_entry->end - 37663364c323SKonstantin Belousov old_entry->start; 3767ef694c1aSEdward Tomasz Napierala old_entry->cred = NULL; 37683364c323SKonstantin Belousov } 37699a2f6362SAlan Cox } 37709a2f6362SAlan Cox 37719a2f6362SAlan Cox /* 37729a2f6362SAlan Cox * Add the reference before calling vm_object_shadow 37739a2f6362SAlan Cox * to insure that a shadow object is created. 37749a2f6362SAlan Cox */ 37759a2f6362SAlan Cox vm_object_reference(object); 37769a2f6362SAlan Cox if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 37775069bf57SJohn Dyson vm_object_shadow(&old_entry->object.vm_object, 37785069bf57SJohn Dyson &old_entry->offset, 37790cc74f14SAlan Cox old_entry->end - old_entry->start); 37805069bf57SJohn Dyson old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 3781d30344bdSIan Dowse /* Transfer the second reference too. */ 3782d30344bdSIan Dowse vm_object_reference( 3783d30344bdSIan Dowse old_entry->object.vm_object); 37847fd10fb3SKonstantin Belousov 37857fd10fb3SKonstantin Belousov /* 37867fd10fb3SKonstantin Belousov * As in vm_map_simplify_entry(), the 3787b0994946SKonstantin Belousov * vnode lock will not be acquired in 37887fd10fb3SKonstantin Belousov * this call to vm_object_deallocate(). 37897fd10fb3SKonstantin Belousov */ 3790d30344bdSIan Dowse vm_object_deallocate(object); 37915069bf57SJohn Dyson object = old_entry->object.vm_object; 3792fed9a903SJohn Dyson } 379389f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 3794069e9bc1SDoug Rabson vm_object_clear_flag(object, OBJ_ONEMAPPING); 3795ef694c1aSEdward Tomasz Napierala if (old_entry->cred != NULL) { 3796ef694c1aSEdward Tomasz Napierala KASSERT(object->cred == NULL, ("vmspace_fork both cred")); 3797ef694c1aSEdward Tomasz Napierala object->cred = old_entry->cred; 37983364c323SKonstantin Belousov object->charge = old_entry->end - old_entry->start; 3799ef694c1aSEdward Tomasz Napierala old_entry->cred = NULL; 38003364c323SKonstantin Belousov } 3801b9781cf6SKonstantin Belousov 3802b9781cf6SKonstantin Belousov /* 3803b9781cf6SKonstantin Belousov * Assert the correct state of the vnode 3804b9781cf6SKonstantin Belousov * v_writecount while the object is locked, to 3805b9781cf6SKonstantin Belousov * not relock it later for the assertion 3806b9781cf6SKonstantin Belousov * correctness. 3807b9781cf6SKonstantin Belousov */ 3808b9781cf6SKonstantin Belousov if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT && 3809b9781cf6SKonstantin Belousov object->type == OBJT_VNODE) { 3810b9781cf6SKonstantin Belousov KASSERT(((struct vnode *)object->handle)-> 3811b9781cf6SKonstantin Belousov v_writecount > 0, 3812b9781cf6SKonstantin Belousov ("vmspace_fork: v_writecount %p", object)); 3813b9781cf6SKonstantin Belousov KASSERT(object->un_pager.vnp.writemappings > 0, 3814b9781cf6SKonstantin Belousov ("vmspace_fork: vnp.writecount %p", 3815b9781cf6SKonstantin Belousov object)); 3816b9781cf6SKonstantin Belousov } 381789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 3818fed9a903SJohn Dyson 3819fed9a903SJohn Dyson /* 3820ad5fca3bSAlan Cox * Clone the entry, referencing the shared object. 3821df8bae1dSRodney W. Grimes */ 3822df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 3823df8bae1dSRodney W. Grimes *new_entry = *old_entry; 38249f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 38259f6acfd1SKonstantin Belousov MAP_ENTRY_IN_TRANSITION); 38260acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 3827df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 382884110e7eSKonstantin Belousov if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 382984110e7eSKonstantin Belousov vnode_pager_update_writecount(object, 383084110e7eSKonstantin Belousov new_entry->start, new_entry->end); 383184110e7eSKonstantin Belousov } 3832df8bae1dSRodney W. Grimes 3833df8bae1dSRodney W. Grimes /* 38340d94caffSDavid Greenman * Insert the entry into the new map -- we know we're 38350d94caffSDavid Greenman * inserting at the end of the new map. 3836df8bae1dSRodney W. Grimes */ 3837*9f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 38382a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 3839df8bae1dSRodney W. Grimes 3840df8bae1dSRodney W. Grimes /* 3841df8bae1dSRodney W. Grimes * Update the physical map 3842df8bae1dSRodney W. Grimes */ 3843df8bae1dSRodney W. Grimes pmap_copy(new_map->pmap, old_map->pmap, 3844df8bae1dSRodney W. Grimes new_entry->start, 3845df8bae1dSRodney W. Grimes (old_entry->end - old_entry->start), 3846df8bae1dSRodney W. Grimes old_entry->start); 3847df8bae1dSRodney W. Grimes break; 3848df8bae1dSRodney W. Grimes 3849df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 3850df8bae1dSRodney W. Grimes /* 3851df8bae1dSRodney W. Grimes * Clone the entry and link into the map. 3852df8bae1dSRodney W. Grimes */ 3853df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 3854df8bae1dSRodney W. Grimes *new_entry = *old_entry; 385584110e7eSKonstantin Belousov /* 385684110e7eSKonstantin Belousov * Copied entry is COW over the old object. 385784110e7eSKonstantin Belousov */ 38589f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 385984110e7eSKonstantin Belousov MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); 38600acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 3861df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 3862df8bae1dSRodney W. Grimes new_entry->object.vm_object = NULL; 3863ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 3864*9f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 38652a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 3866bd7e5f99SJohn Dyson vm_map_copy_entry(old_map, new_map, old_entry, 38673364c323SKonstantin Belousov new_entry, fork_charge); 3868df8bae1dSRodney W. Grimes break; 386978d7964bSXin LI 387078d7964bSXin LI case VM_INHERIT_ZERO: 387178d7964bSXin LI /* 387278d7964bSXin LI * Create a new anonymous mapping entry modelled from 387378d7964bSXin LI * the old one. 387478d7964bSXin LI */ 387578d7964bSXin LI new_entry = vm_map_entry_create(new_map); 387678d7964bSXin LI memset(new_entry, 0, sizeof(*new_entry)); 387778d7964bSXin LI 387878d7964bSXin LI new_entry->start = old_entry->start; 387978d7964bSXin LI new_entry->end = old_entry->end; 388078d7964bSXin LI new_entry->eflags = old_entry->eflags & 388178d7964bSXin LI ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | 388278d7964bSXin LI MAP_ENTRY_VN_WRITECNT); 388378d7964bSXin LI new_entry->protection = old_entry->protection; 388478d7964bSXin LI new_entry->max_protection = old_entry->max_protection; 388578d7964bSXin LI new_entry->inheritance = VM_INHERIT_ZERO; 388678d7964bSXin LI 3887*9f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 388878d7964bSXin LI vmspace_map_entry_forked(vm1, vm2, new_entry); 388978d7964bSXin LI 389078d7964bSXin LI new_entry->cred = curthread->td_ucred; 389178d7964bSXin LI crhold(new_entry->cred); 389278d7964bSXin LI *fork_charge += (new_entry->end - new_entry->start); 389378d7964bSXin LI 389478d7964bSXin LI break; 3895df8bae1dSRodney W. Grimes } 3896df8bae1dSRodney W. Grimes old_entry = old_entry->next; 3897df8bae1dSRodney W. Grimes } 389884110e7eSKonstantin Belousov /* 389984110e7eSKonstantin Belousov * Use inlined vm_map_unlock() to postpone handling the deferred 390084110e7eSKonstantin Belousov * map entries, which cannot be done until both old_map and 390184110e7eSKonstantin Belousov * new_map locks are released. 390284110e7eSKonstantin Belousov */ 390384110e7eSKonstantin Belousov sx_xunlock(&old_map->lock); 390484110e7eSKonstantin Belousov sx_xunlock(&new_map->lock); 390584110e7eSKonstantin Belousov vm_map_process_deferred(); 3906df8bae1dSRodney W. Grimes 3907df8bae1dSRodney W. Grimes return (vm2); 3908df8bae1dSRodney W. Grimes } 3909df8bae1dSRodney W. Grimes 39108056df6eSAlan Cox /* 39118056df6eSAlan Cox * Create a process's stack for exec_new_vmspace(). This function is never 39128056df6eSAlan Cox * asked to wire the newly created stack. 39138056df6eSAlan Cox */ 391494f7e29aSAlan Cox int 391594f7e29aSAlan Cox vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 391694f7e29aSAlan Cox vm_prot_t prot, vm_prot_t max, int cow) 391794f7e29aSAlan Cox { 39184648ba0aSKonstantin Belousov vm_size_t growsize, init_ssize; 39198056df6eSAlan Cox rlim_t vmemlim; 39204648ba0aSKonstantin Belousov int rv; 39214648ba0aSKonstantin Belousov 39228056df6eSAlan Cox MPASS((map->flags & MAP_WIREFUTURE) == 0); 39234648ba0aSKonstantin Belousov growsize = sgrowsiz; 39244648ba0aSKonstantin Belousov init_ssize = (max_ssize < growsize) ? max_ssize : growsize; 39254648ba0aSKonstantin Belousov vm_map_lock(map); 3926f6f6d240SMateusz Guzik vmemlim = lim_cur(curthread, RLIMIT_VMEM); 39274648ba0aSKonstantin Belousov /* If we would blow our VMEM resource limit, no go */ 39284648ba0aSKonstantin Belousov if (map->size + init_ssize > vmemlim) { 39294648ba0aSKonstantin Belousov rv = KERN_NO_SPACE; 39304648ba0aSKonstantin Belousov goto out; 39314648ba0aSKonstantin Belousov } 3932e1f92cccSAlan Cox rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot, 39334648ba0aSKonstantin Belousov max, cow); 39344648ba0aSKonstantin Belousov out: 39354648ba0aSKonstantin Belousov vm_map_unlock(map); 39364648ba0aSKonstantin Belousov return (rv); 39374648ba0aSKonstantin Belousov } 39384648ba0aSKonstantin Belousov 393919f49ad3SKonstantin Belousov static int stack_guard_page = 1; 394019f49ad3SKonstantin Belousov SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN, 394119f49ad3SKonstantin Belousov &stack_guard_page, 0, 394219f49ad3SKonstantin Belousov "Specifies the number of guard pages for a stack that grows"); 394319f49ad3SKonstantin Belousov 39444648ba0aSKonstantin Belousov static int 39454648ba0aSKonstantin Belousov vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 39464648ba0aSKonstantin Belousov vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow) 39474648ba0aSKonstantin Belousov { 3948fd75d710SMarcel Moolenaar vm_map_entry_t new_entry, prev_entry; 394919bd0d9cSKonstantin Belousov vm_offset_t bot, gap_bot, gap_top, top; 395019f49ad3SKonstantin Belousov vm_size_t init_ssize, sgp; 3951fd75d710SMarcel Moolenaar int orient, rv; 395294f7e29aSAlan Cox 3953fd75d710SMarcel Moolenaar /* 3954fd75d710SMarcel Moolenaar * The stack orientation is piggybacked with the cow argument. 3955fd75d710SMarcel Moolenaar * Extract it into orient and mask the cow argument so that we 3956fd75d710SMarcel Moolenaar * don't pass it around further. 3957fd75d710SMarcel Moolenaar */ 3958fd75d710SMarcel Moolenaar orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP); 3959fd75d710SMarcel Moolenaar KASSERT(orient != 0, ("No stack grow direction")); 396019bd0d9cSKonstantin Belousov KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP), 396119bd0d9cSKonstantin Belousov ("bi-dir stack")); 3962fd75d710SMarcel Moolenaar 396377bc7900SKonstantin Belousov if (addrbos < vm_map_min(map) || 39649410cd7dSKonstantin Belousov addrbos + max_ssize > vm_map_max(map) || 39659410cd7dSKonstantin Belousov addrbos + max_ssize <= addrbos) 39669410cd7dSKonstantin Belousov return (KERN_INVALID_ADDRESS); 39679410cd7dSKonstantin Belousov sgp = (vm_size_t)stack_guard_page * PAGE_SIZE; 39689410cd7dSKonstantin Belousov if (sgp >= max_ssize) 39699410cd7dSKonstantin Belousov return (KERN_INVALID_ARGUMENT); 3970fd75d710SMarcel Moolenaar 397119f49ad3SKonstantin Belousov init_ssize = growsize; 397219f49ad3SKonstantin Belousov if (max_ssize < init_ssize + sgp) 397319f49ad3SKonstantin Belousov init_ssize = max_ssize - sgp; 397494f7e29aSAlan Cox 397594f7e29aSAlan Cox /* If addr is already mapped, no go */ 39764648ba0aSKonstantin Belousov if (vm_map_lookup_entry(map, addrbos, &prev_entry)) 397794f7e29aSAlan Cox return (KERN_NO_SPACE); 3978a69ac174SMatthew Dillon 3979fd75d710SMarcel Moolenaar /* 3980763df3ecSPedro F. Giffuni * If we can't accommodate max_ssize in the current mapping, no go. 398194f7e29aSAlan Cox */ 39821c5196c3SKonstantin Belousov if (prev_entry->next->start < addrbos + max_ssize) 398394f7e29aSAlan Cox return (KERN_NO_SPACE); 398494f7e29aSAlan Cox 3985fd75d710SMarcel Moolenaar /* 3986fd75d710SMarcel Moolenaar * We initially map a stack of only init_ssize. We will grow as 3987fd75d710SMarcel Moolenaar * needed later. Depending on the orientation of the stack (i.e. 3988fd75d710SMarcel Moolenaar * the grow direction) we either map at the top of the range, the 3989fd75d710SMarcel Moolenaar * bottom of the range or in the middle. 399094f7e29aSAlan Cox * 3991fd75d710SMarcel Moolenaar * Note: we would normally expect prot and max to be VM_PROT_ALL, 3992fd75d710SMarcel Moolenaar * and cow to be 0. Possibly we should eliminate these as input 3993fd75d710SMarcel Moolenaar * parameters, and just pass these values here in the insert call. 399494f7e29aSAlan Cox */ 399519bd0d9cSKonstantin Belousov if (orient == MAP_STACK_GROWS_DOWN) { 3996fd75d710SMarcel Moolenaar bot = addrbos + max_ssize - init_ssize; 3997fd75d710SMarcel Moolenaar top = bot + init_ssize; 399819bd0d9cSKonstantin Belousov gap_bot = addrbos; 399919bd0d9cSKonstantin Belousov gap_top = bot; 400019bd0d9cSKonstantin Belousov } else /* if (orient == MAP_STACK_GROWS_UP) */ { 400119bd0d9cSKonstantin Belousov bot = addrbos; 400219bd0d9cSKonstantin Belousov top = bot + init_ssize; 400319bd0d9cSKonstantin Belousov gap_bot = top; 400419bd0d9cSKonstantin Belousov gap_top = addrbos + max_ssize; 400519bd0d9cSKonstantin Belousov } 4006fd75d710SMarcel Moolenaar rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow); 400719bd0d9cSKonstantin Belousov if (rv != KERN_SUCCESS) 400819bd0d9cSKonstantin Belousov return (rv); 4009fd75d710SMarcel Moolenaar new_entry = prev_entry->next; 401019bd0d9cSKonstantin Belousov KASSERT(new_entry->end == top || new_entry->start == bot, 401119bd0d9cSKonstantin Belousov ("Bad entry start/end for new stack entry")); 4012712efe66SAlan Cox KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 || 4013712efe66SAlan Cox (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0, 4014712efe66SAlan Cox ("new entry lacks MAP_ENTRY_GROWS_DOWN")); 4015712efe66SAlan Cox KASSERT((orient & MAP_STACK_GROWS_UP) == 0 || 4016712efe66SAlan Cox (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0, 4017712efe66SAlan Cox ("new entry lacks MAP_ENTRY_GROWS_UP")); 401819bd0d9cSKonstantin Belousov rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE, 401919bd0d9cSKonstantin Belousov VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ? 402019bd0d9cSKonstantin Belousov MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP)); 402119bd0d9cSKonstantin Belousov if (rv != KERN_SUCCESS) 402219bd0d9cSKonstantin Belousov (void)vm_map_delete(map, bot, top); 402394f7e29aSAlan Cox return (rv); 402494f7e29aSAlan Cox } 402594f7e29aSAlan Cox 402619bd0d9cSKonstantin Belousov /* 402719bd0d9cSKonstantin Belousov * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we 402819bd0d9cSKonstantin Belousov * successfully grow the stack. 402994f7e29aSAlan Cox */ 403019bd0d9cSKonstantin Belousov static int 403119bd0d9cSKonstantin Belousov vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) 403294f7e29aSAlan Cox { 403319bd0d9cSKonstantin Belousov vm_map_entry_t stack_entry; 403419bd0d9cSKonstantin Belousov struct proc *p; 403519bd0d9cSKonstantin Belousov struct vmspace *vm; 403619bd0d9cSKonstantin Belousov struct ucred *cred; 403719bd0d9cSKonstantin Belousov vm_offset_t gap_end, gap_start, grow_start; 4038201f03b8SAlan Cox size_t grow_amount, guard, max_grow; 40397e19eda4SAndrey Zonov rlim_t lmemlim, stacklim, vmemlim; 404019bd0d9cSKonstantin Belousov int rv, rv1; 404119bd0d9cSKonstantin Belousov bool gap_deleted, grow_down, is_procstack; 40421ba5ad42SEdward Tomasz Napierala #ifdef notyet 40431ba5ad42SEdward Tomasz Napierala uint64_t limit; 40441ba5ad42SEdward Tomasz Napierala #endif 4045afcc55f3SEdward Tomasz Napierala #ifdef RACCT 40461ba5ad42SEdward Tomasz Napierala int error; 4047afcc55f3SEdward Tomasz Napierala #endif 404823955314SAlfred Perlstein 404919bd0d9cSKonstantin Belousov p = curproc; 405019bd0d9cSKonstantin Belousov vm = p->p_vmspace; 4051eb5ea878SKonstantin Belousov 4052eb5ea878SKonstantin Belousov /* 4053eb5ea878SKonstantin Belousov * Disallow stack growth when the access is performed by a 4054eb5ea878SKonstantin Belousov * debugger or AIO daemon. The reason is that the wrong 4055eb5ea878SKonstantin Belousov * resource limits are applied. 4056eb5ea878SKonstantin Belousov */ 4057eb5ea878SKonstantin Belousov if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL) 4058f758aaddSKonstantin Belousov return (KERN_FAILURE); 4059eb5ea878SKonstantin Belousov 406019bd0d9cSKonstantin Belousov MPASS(!map->system_map); 406119bd0d9cSKonstantin Belousov 4062201f03b8SAlan Cox guard = stack_guard_page * PAGE_SIZE; 4063f6f6d240SMateusz Guzik lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK); 4064f6f6d240SMateusz Guzik stacklim = lim_cur(curthread, RLIMIT_STACK); 4065f6f6d240SMateusz Guzik vmemlim = lim_cur(curthread, RLIMIT_VMEM); 406619bd0d9cSKonstantin Belousov retry: 406719bd0d9cSKonstantin Belousov /* If addr is not in a hole for a stack grow area, no need to grow. */ 406819bd0d9cSKonstantin Belousov if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry)) 406919bd0d9cSKonstantin Belousov return (KERN_FAILURE); 407019bd0d9cSKonstantin Belousov if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0) 40710cddd8f0SMatthew Dillon return (KERN_SUCCESS); 407219bd0d9cSKonstantin Belousov if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) { 407319bd0d9cSKonstantin Belousov stack_entry = gap_entry->next; 407419bd0d9cSKonstantin Belousov if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 || 407519bd0d9cSKonstantin Belousov stack_entry->start != gap_entry->end) 407619bd0d9cSKonstantin Belousov return (KERN_FAILURE); 407719bd0d9cSKonstantin Belousov grow_amount = round_page(stack_entry->start - addr); 407819bd0d9cSKonstantin Belousov grow_down = true; 407919bd0d9cSKonstantin Belousov } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) { 408019bd0d9cSKonstantin Belousov stack_entry = gap_entry->prev; 408119bd0d9cSKonstantin Belousov if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 || 408219bd0d9cSKonstantin Belousov stack_entry->end != gap_entry->start) 408319bd0d9cSKonstantin Belousov return (KERN_FAILURE); 408419bd0d9cSKonstantin Belousov grow_amount = round_page(addr + 1 - stack_entry->end); 408519bd0d9cSKonstantin Belousov grow_down = false; 4086b21a0008SMarcel Moolenaar } else { 408719bd0d9cSKonstantin Belousov return (KERN_FAILURE); 4088b21a0008SMarcel Moolenaar } 4089201f03b8SAlan Cox max_grow = gap_entry->end - gap_entry->start; 4090201f03b8SAlan Cox if (guard > max_grow) 4091201f03b8SAlan Cox return (KERN_NO_SPACE); 4092201f03b8SAlan Cox max_grow -= guard; 409319bd0d9cSKonstantin Belousov if (grow_amount > max_grow) 40940cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 409594f7e29aSAlan Cox 4096b21a0008SMarcel Moolenaar /* 4097b21a0008SMarcel Moolenaar * If this is the main process stack, see if we're over the stack 4098b21a0008SMarcel Moolenaar * limit. 409994f7e29aSAlan Cox */ 410019bd0d9cSKonstantin Belousov is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr && 410119bd0d9cSKonstantin Belousov addr < (vm_offset_t)p->p_sysent->sv_usrstack; 410219bd0d9cSKonstantin Belousov if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) 41030cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 410419bd0d9cSKonstantin Belousov 4105afcc55f3SEdward Tomasz Napierala #ifdef RACCT 41064b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 41071ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 41084b5c9cf6SEdward Tomasz Napierala if (is_procstack && racct_set(p, RACCT_STACK, 41094b5c9cf6SEdward Tomasz Napierala ctob(vm->vm_ssize) + grow_amount)) { 41101ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 41111ba5ad42SEdward Tomasz Napierala return (KERN_NO_SPACE); 41121ba5ad42SEdward Tomasz Napierala } 41131ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 41144b5c9cf6SEdward Tomasz Napierala } 4115afcc55f3SEdward Tomasz Napierala #endif 411694f7e29aSAlan Cox 411719bd0d9cSKonstantin Belousov grow_amount = roundup(grow_amount, sgrowsiz); 411819bd0d9cSKonstantin Belousov if (grow_amount > max_grow) 411919bd0d9cSKonstantin Belousov grow_amount = max_grow; 412091d5354aSJohn Baldwin if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { 4121e4826248SAlan Cox grow_amount = trunc_page((vm_size_t)stacklim) - 4122e4826248SAlan Cox ctob(vm->vm_ssize); 412394f7e29aSAlan Cox } 412419bd0d9cSKonstantin Belousov 41251ba5ad42SEdward Tomasz Napierala #ifdef notyet 41261ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 41271ba5ad42SEdward Tomasz Napierala limit = racct_get_available(p, RACCT_STACK); 41281ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 41291ba5ad42SEdward Tomasz Napierala if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) 41301ba5ad42SEdward Tomasz Napierala grow_amount = limit - ctob(vm->vm_ssize); 41311ba5ad42SEdward Tomasz Napierala #endif 413219bd0d9cSKonstantin Belousov 413319bd0d9cSKonstantin Belousov if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { 41343ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { 41357e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 41367e19eda4SAndrey Zonov goto out; 41377e19eda4SAndrey Zonov } 41387e19eda4SAndrey Zonov #ifdef RACCT 41394b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 41407e19eda4SAndrey Zonov PROC_LOCK(p); 41417e19eda4SAndrey Zonov if (racct_set(p, RACCT_MEMLOCK, 41423ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + grow_amount)) { 41437e19eda4SAndrey Zonov PROC_UNLOCK(p); 41447e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 41457e19eda4SAndrey Zonov goto out; 41467e19eda4SAndrey Zonov } 41477e19eda4SAndrey Zonov PROC_UNLOCK(p); 41484b5c9cf6SEdward Tomasz Napierala } 41497e19eda4SAndrey Zonov #endif 41507e19eda4SAndrey Zonov } 415119bd0d9cSKonstantin Belousov 4152a69ac174SMatthew Dillon /* If we would blow our VMEM resource limit, no go */ 415391d5354aSJohn Baldwin if (map->size + grow_amount > vmemlim) { 41541ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 41551ba5ad42SEdward Tomasz Napierala goto out; 4156a69ac174SMatthew Dillon } 4157afcc55f3SEdward Tomasz Napierala #ifdef RACCT 41584b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 41591ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 41601ba5ad42SEdward Tomasz Napierala if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) { 41611ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 41621ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 41631ba5ad42SEdward Tomasz Napierala goto out; 41641ba5ad42SEdward Tomasz Napierala } 41651ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 41664b5c9cf6SEdward Tomasz Napierala } 4167afcc55f3SEdward Tomasz Napierala #endif 4168a69ac174SMatthew Dillon 416919bd0d9cSKonstantin Belousov if (vm_map_lock_upgrade(map)) { 417019bd0d9cSKonstantin Belousov gap_entry = NULL; 417119bd0d9cSKonstantin Belousov vm_map_lock_read(map); 417219bd0d9cSKonstantin Belousov goto retry; 417394f7e29aSAlan Cox } 417494f7e29aSAlan Cox 417519bd0d9cSKonstantin Belousov if (grow_down) { 417619bd0d9cSKonstantin Belousov grow_start = gap_entry->end - grow_amount; 417719bd0d9cSKonstantin Belousov if (gap_entry->start + grow_amount == gap_entry->end) { 417819bd0d9cSKonstantin Belousov gap_start = gap_entry->start; 417919bd0d9cSKonstantin Belousov gap_end = gap_entry->end; 418019bd0d9cSKonstantin Belousov vm_map_entry_delete(map, gap_entry); 418119bd0d9cSKonstantin Belousov gap_deleted = true; 418219bd0d9cSKonstantin Belousov } else { 418319bd0d9cSKonstantin Belousov MPASS(gap_entry->start < gap_entry->end - grow_amount); 418419bd0d9cSKonstantin Belousov gap_entry->end -= grow_amount; 418519bd0d9cSKonstantin Belousov vm_map_entry_resize_free(map, gap_entry); 418619bd0d9cSKonstantin Belousov gap_deleted = false; 418719bd0d9cSKonstantin Belousov } 418819bd0d9cSKonstantin Belousov rv = vm_map_insert(map, NULL, 0, grow_start, 418919bd0d9cSKonstantin Belousov grow_start + grow_amount, 419019bd0d9cSKonstantin Belousov stack_entry->protection, stack_entry->max_protection, 4191712efe66SAlan Cox MAP_STACK_GROWS_DOWN); 419219bd0d9cSKonstantin Belousov if (rv != KERN_SUCCESS) { 419319bd0d9cSKonstantin Belousov if (gap_deleted) { 419419bd0d9cSKonstantin Belousov rv1 = vm_map_insert(map, NULL, 0, gap_start, 419519bd0d9cSKonstantin Belousov gap_end, VM_PROT_NONE, VM_PROT_NONE, 419619bd0d9cSKonstantin Belousov MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN); 419719bd0d9cSKonstantin Belousov MPASS(rv1 == KERN_SUCCESS); 419819bd0d9cSKonstantin Belousov } else { 419919bd0d9cSKonstantin Belousov gap_entry->end += grow_amount; 420019bd0d9cSKonstantin Belousov vm_map_entry_resize_free(map, gap_entry); 420119bd0d9cSKonstantin Belousov } 420294f7e29aSAlan Cox } 4203b21a0008SMarcel Moolenaar } else { 420419bd0d9cSKonstantin Belousov grow_start = stack_entry->end; 4205ef694c1aSEdward Tomasz Napierala cred = stack_entry->cred; 4206ef694c1aSEdward Tomasz Napierala if (cred == NULL && stack_entry->object.vm_object != NULL) 4207ef694c1aSEdward Tomasz Napierala cred = stack_entry->object.vm_object->cred; 4208ef694c1aSEdward Tomasz Napierala if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred)) 42093364c323SKonstantin Belousov rv = KERN_NO_SPACE; 4210b21a0008SMarcel Moolenaar /* Grow the underlying object if applicable. */ 42113364c323SKonstantin Belousov else if (stack_entry->object.vm_object == NULL || 4212b21a0008SMarcel Moolenaar vm_object_coalesce(stack_entry->object.vm_object, 421357a21abaSAlan Cox stack_entry->offset, 4214b21a0008SMarcel Moolenaar (vm_size_t)(stack_entry->end - stack_entry->start), 4215ef694c1aSEdward Tomasz Napierala (vm_size_t)grow_amount, cred != NULL)) { 421619bd0d9cSKonstantin Belousov if (gap_entry->start + grow_amount == gap_entry->end) 421719bd0d9cSKonstantin Belousov vm_map_entry_delete(map, gap_entry); 421819bd0d9cSKonstantin Belousov else 421919bd0d9cSKonstantin Belousov gap_entry->start += grow_amount; 422019bd0d9cSKonstantin Belousov stack_entry->end += grow_amount; 422119bd0d9cSKonstantin Belousov map->size += grow_amount; 42220164e057SAlan Cox vm_map_entry_resize_free(map, stack_entry); 4223b21a0008SMarcel Moolenaar rv = KERN_SUCCESS; 4224b21a0008SMarcel Moolenaar } else 4225b21a0008SMarcel Moolenaar rv = KERN_FAILURE; 4226b21a0008SMarcel Moolenaar } 4227b21a0008SMarcel Moolenaar if (rv == KERN_SUCCESS && is_procstack) 4228b21a0008SMarcel Moolenaar vm->vm_ssize += btoc(grow_amount); 4229b21a0008SMarcel Moolenaar 4230abd498aaSBruce M Simpson /* 4231abd498aaSBruce M Simpson * Heed the MAP_WIREFUTURE flag if it was set for this process. 4232abd498aaSBruce M Simpson */ 423319bd0d9cSKonstantin Belousov if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { 423419bd0d9cSKonstantin Belousov vm_map_unlock(map); 423519bd0d9cSKonstantin Belousov vm_map_wire(map, grow_start, grow_start + grow_amount, 4236212e02c8SKonstantin Belousov VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 423719bd0d9cSKonstantin Belousov vm_map_lock_read(map); 423819bd0d9cSKonstantin Belousov } else 423919bd0d9cSKonstantin Belousov vm_map_lock_downgrade(map); 4240abd498aaSBruce M Simpson 42411ba5ad42SEdward Tomasz Napierala out: 4242afcc55f3SEdward Tomasz Napierala #ifdef RACCT 42434b5c9cf6SEdward Tomasz Napierala if (racct_enable && rv != KERN_SUCCESS) { 42441ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 42451ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_VMEM, map->size); 42461ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); 42477e19eda4SAndrey Zonov if (!old_mlock) { 42487e19eda4SAndrey Zonov error = racct_set(p, RACCT_MEMLOCK, 42493ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 42507e19eda4SAndrey Zonov KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); 42517e19eda4SAndrey Zonov } 42521ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); 42531ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_STACK failed")); 42541ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 42551ba5ad42SEdward Tomasz Napierala } 4256afcc55f3SEdward Tomasz Napierala #endif 42571ba5ad42SEdward Tomasz Napierala 42580cddd8f0SMatthew Dillon return (rv); 425994f7e29aSAlan Cox } 426094f7e29aSAlan Cox 4261df8bae1dSRodney W. Grimes /* 42625856e12eSJohn Dyson * Unshare the specified VM space for exec. If other processes are 42635856e12eSJohn Dyson * mapped to it, then create a new one. The new vmspace is null. 42645856e12eSJohn Dyson */ 426589b57fcfSKonstantin Belousov int 42663ebc1248SPeter Wemm vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) 42671b40f8c0SMatthew Dillon { 42685856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 42695856e12eSJohn Dyson struct vmspace *newvmspace; 42705856e12eSJohn Dyson 42717032434eSKonstantin Belousov KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0, 42727032434eSKonstantin Belousov ("vmspace_exec recursed")); 42736e00f3a3SKonstantin Belousov newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit); 427489b57fcfSKonstantin Belousov if (newvmspace == NULL) 427589b57fcfSKonstantin Belousov return (ENOMEM); 427651ab6c28SAlan Cox newvmspace->vm_swrss = oldvmspace->vm_swrss; 42775856e12eSJohn Dyson /* 42785856e12eSJohn Dyson * This code is written like this for prototype purposes. The 42795856e12eSJohn Dyson * goal is to avoid running down the vmspace here, but let the 42805856e12eSJohn Dyson * other process's that are still using the vmspace to finally 42815856e12eSJohn Dyson * run it down. Even though there is little or no chance of blocking 42825856e12eSJohn Dyson * here, it is a good idea to keep this form for future mods. 42835856e12eSJohn Dyson */ 428457051fdcSTor Egge PROC_VMSPACE_LOCK(p); 42855856e12eSJohn Dyson p->p_vmspace = newvmspace; 428657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 42876617724cSJeff Roberson if (p == curthread->td_proc) 4288b40ce416SJulian Elischer pmap_activate(curthread); 42897032434eSKonstantin Belousov curthread->td_pflags |= TDP_EXECVMSPC; 429089b57fcfSKonstantin Belousov return (0); 42915856e12eSJohn Dyson } 42925856e12eSJohn Dyson 42935856e12eSJohn Dyson /* 42945856e12eSJohn Dyson * Unshare the specified VM space for forcing COW. This 42955856e12eSJohn Dyson * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 42965856e12eSJohn Dyson */ 429789b57fcfSKonstantin Belousov int 42981b40f8c0SMatthew Dillon vmspace_unshare(struct proc *p) 42991b40f8c0SMatthew Dillon { 43005856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 43015856e12eSJohn Dyson struct vmspace *newvmspace; 43023364c323SKonstantin Belousov vm_ooffset_t fork_charge; 43035856e12eSJohn Dyson 43045856e12eSJohn Dyson if (oldvmspace->vm_refcnt == 1) 430589b57fcfSKonstantin Belousov return (0); 43063364c323SKonstantin Belousov fork_charge = 0; 43073364c323SKonstantin Belousov newvmspace = vmspace_fork(oldvmspace, &fork_charge); 430889b57fcfSKonstantin Belousov if (newvmspace == NULL) 430989b57fcfSKonstantin Belousov return (ENOMEM); 4310ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) { 43113364c323SKonstantin Belousov vmspace_free(newvmspace); 43123364c323SKonstantin Belousov return (ENOMEM); 43133364c323SKonstantin Belousov } 431457051fdcSTor Egge PROC_VMSPACE_LOCK(p); 43155856e12eSJohn Dyson p->p_vmspace = newvmspace; 431657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 43176617724cSJeff Roberson if (p == curthread->td_proc) 4318b40ce416SJulian Elischer pmap_activate(curthread); 4319b56ef1c1SJohn Baldwin vmspace_free(oldvmspace); 432089b57fcfSKonstantin Belousov return (0); 43215856e12eSJohn Dyson } 43225856e12eSJohn Dyson 43235856e12eSJohn Dyson /* 4324df8bae1dSRodney W. Grimes * vm_map_lookup: 4325df8bae1dSRodney W. Grimes * 4326df8bae1dSRodney W. Grimes * Finds the VM object, offset, and 4327df8bae1dSRodney W. Grimes * protection for a given virtual address in the 4328df8bae1dSRodney W. Grimes * specified map, assuming a page fault of the 4329df8bae1dSRodney W. Grimes * type specified. 4330df8bae1dSRodney W. Grimes * 4331df8bae1dSRodney W. Grimes * Leaves the map in question locked for read; return 4332df8bae1dSRodney W. Grimes * values are guaranteed until a vm_map_lookup_done 4333df8bae1dSRodney W. Grimes * call is performed. Note that the map argument 4334df8bae1dSRodney W. Grimes * is in/out; the returned map must be used in 4335df8bae1dSRodney W. Grimes * the call to vm_map_lookup_done. 4336df8bae1dSRodney W. Grimes * 4337df8bae1dSRodney W. Grimes * A handle (out_entry) is returned for use in 4338df8bae1dSRodney W. Grimes * vm_map_lookup_done, to make that fast. 4339df8bae1dSRodney W. Grimes * 4340df8bae1dSRodney W. Grimes * If a lookup is requested with "write protection" 4341df8bae1dSRodney W. Grimes * specified, the map may be changed to perform virtual 4342df8bae1dSRodney W. Grimes * copying operations, although the data referenced will 4343df8bae1dSRodney W. Grimes * remain the same. 4344df8bae1dSRodney W. Grimes */ 4345df8bae1dSRodney W. Grimes int 4346b9dcd593SBruce Evans vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 4347b9dcd593SBruce Evans vm_offset_t vaddr, 434847221757SJohn Dyson vm_prot_t fault_typea, 4349b9dcd593SBruce Evans vm_map_entry_t *out_entry, /* OUT */ 4350b9dcd593SBruce Evans vm_object_t *object, /* OUT */ 4351b9dcd593SBruce Evans vm_pindex_t *pindex, /* OUT */ 4352b9dcd593SBruce Evans vm_prot_t *out_prot, /* OUT */ 43532d8acc0fSJohn Dyson boolean_t *wired) /* OUT */ 4354df8bae1dSRodney W. Grimes { 4355c0877f10SJohn Dyson vm_map_entry_t entry; 4356c0877f10SJohn Dyson vm_map_t map = *var_map; 4357c0877f10SJohn Dyson vm_prot_t prot; 435847221757SJohn Dyson vm_prot_t fault_type = fault_typea; 43593364c323SKonstantin Belousov vm_object_t eobject; 43600cc74f14SAlan Cox vm_size_t size; 4361ef694c1aSEdward Tomasz Napierala struct ucred *cred; 4362df8bae1dSRodney W. Grimes 436319bd0d9cSKonstantin Belousov RetryLookup: 4364df8bae1dSRodney W. Grimes 4365df8bae1dSRodney W. Grimes vm_map_lock_read(map); 4366df8bae1dSRodney W. Grimes 436719bd0d9cSKonstantin Belousov RetryLookupLocked: 4368df8bae1dSRodney W. Grimes /* 43694c3ef59eSAlan Cox * Lookup the faulting address. 4370df8bae1dSRodney W. Grimes */ 4371095104acSAlan Cox if (!vm_map_lookup_entry(map, vaddr, out_entry)) { 4372095104acSAlan Cox vm_map_unlock_read(map); 4373095104acSAlan Cox return (KERN_INVALID_ADDRESS); 4374095104acSAlan Cox } 4375df8bae1dSRodney W. Grimes 43764e94f402SAlan Cox entry = *out_entry; 4377b7b2aac2SJohn Dyson 4378df8bae1dSRodney W. Grimes /* 4379df8bae1dSRodney W. Grimes * Handle submaps. 4380df8bae1dSRodney W. Grimes */ 4381afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 4382df8bae1dSRodney W. Grimes vm_map_t old_map = map; 4383df8bae1dSRodney W. Grimes 4384df8bae1dSRodney W. Grimes *var_map = map = entry->object.sub_map; 4385df8bae1dSRodney W. Grimes vm_map_unlock_read(old_map); 4386df8bae1dSRodney W. Grimes goto RetryLookup; 4387df8bae1dSRodney W. Grimes } 4388a04c970aSJohn Dyson 4389df8bae1dSRodney W. Grimes /* 43900d94caffSDavid Greenman * Check whether this task is allowed to have this page. 4391df8bae1dSRodney W. Grimes */ 4392df8bae1dSRodney W. Grimes prot = entry->protection; 439319bd0d9cSKonstantin Belousov if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) { 439419bd0d9cSKonstantin Belousov fault_typea &= ~VM_PROT_FAULT_LOOKUP; 439519bd0d9cSKonstantin Belousov if (prot == VM_PROT_NONE && map != kernel_map && 439619bd0d9cSKonstantin Belousov (entry->eflags & MAP_ENTRY_GUARD) != 0 && 439719bd0d9cSKonstantin Belousov (entry->eflags & (MAP_ENTRY_STACK_GAP_DN | 439819bd0d9cSKonstantin Belousov MAP_ENTRY_STACK_GAP_UP)) != 0 && 439919bd0d9cSKonstantin Belousov vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS) 440019bd0d9cSKonstantin Belousov goto RetryLookupLocked; 440119bd0d9cSKonstantin Belousov } 440219bd0d9cSKonstantin Belousov fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 44032db65ab4SAlan Cox if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { 4404095104acSAlan Cox vm_map_unlock_read(map); 4405095104acSAlan Cox return (KERN_PROTECTION_FAILURE); 440647221757SJohn Dyson } 4407b8db9776SKonstantin Belousov KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags & 4408b8db9776SKonstantin Belousov (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) != 4409b8db9776SKonstantin Belousov (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY), 4410b8db9776SKonstantin Belousov ("entry %p flags %x", entry, entry->eflags)); 44115b3e0257SDag-Erling Smørgrav if ((fault_typea & VM_PROT_COPY) != 0 && 44125b3e0257SDag-Erling Smørgrav (entry->max_protection & VM_PROT_WRITE) == 0 && 44135b3e0257SDag-Erling Smørgrav (entry->eflags & MAP_ENTRY_COW) == 0) { 44145b3e0257SDag-Erling Smørgrav vm_map_unlock_read(map); 44155b3e0257SDag-Erling Smørgrav return (KERN_PROTECTION_FAILURE); 44165b3e0257SDag-Erling Smørgrav } 4417df8bae1dSRodney W. Grimes 4418df8bae1dSRodney W. Grimes /* 44190d94caffSDavid Greenman * If this page is not pageable, we have to get it for all possible 44200d94caffSDavid Greenman * accesses. 4421df8bae1dSRodney W. Grimes */ 442205f0fdd2SPoul-Henning Kamp *wired = (entry->wired_count != 0); 442305f0fdd2SPoul-Henning Kamp if (*wired) 4424a6d42a0dSAlan Cox fault_type = entry->protection; 44253364c323SKonstantin Belousov size = entry->end - entry->start; 4426df8bae1dSRodney W. Grimes /* 4427df8bae1dSRodney W. Grimes * If the entry was copy-on-write, we either ... 4428df8bae1dSRodney W. Grimes */ 4429afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 4430df8bae1dSRodney W. Grimes /* 44310d94caffSDavid Greenman * If we want to write the page, we may as well handle that 4432ad5fca3bSAlan Cox * now since we've got the map locked. 4433df8bae1dSRodney W. Grimes * 44340d94caffSDavid Greenman * If we don't need to write the page, we just demote the 44350d94caffSDavid Greenman * permissions allowed. 4436df8bae1dSRodney W. Grimes */ 4437a6d42a0dSAlan Cox if ((fault_type & VM_PROT_WRITE) != 0 || 4438a6d42a0dSAlan Cox (fault_typea & VM_PROT_COPY) != 0) { 4439df8bae1dSRodney W. Grimes /* 44400d94caffSDavid Greenman * Make a new object, and place it in the object 44410d94caffSDavid Greenman * chain. Note that no new references have appeared 4442ad5fca3bSAlan Cox * -- one just moved from the map to the new 44430d94caffSDavid Greenman * object. 4444df8bae1dSRodney W. Grimes */ 444525adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 4446df8bae1dSRodney W. Grimes goto RetryLookup; 44479917e010SAlan Cox 4448ef694c1aSEdward Tomasz Napierala if (entry->cred == NULL) { 44493364c323SKonstantin Belousov /* 44503364c323SKonstantin Belousov * The debugger owner is charged for 44513364c323SKonstantin Belousov * the memory. 44523364c323SKonstantin Belousov */ 4453ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 4454ef694c1aSEdward Tomasz Napierala crhold(cred); 4455ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(size, cred)) { 4456ef694c1aSEdward Tomasz Napierala crfree(cred); 44573364c323SKonstantin Belousov vm_map_unlock(map); 44583364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 44593364c323SKonstantin Belousov } 4460ef694c1aSEdward Tomasz Napierala entry->cred = cred; 44613364c323SKonstantin Belousov } 44620cc74f14SAlan Cox vm_object_shadow(&entry->object.vm_object, 44630cc74f14SAlan Cox &entry->offset, size); 4464afa07f7eSJohn Dyson entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 44653364c323SKonstantin Belousov eobject = entry->object.vm_object; 4466ef694c1aSEdward Tomasz Napierala if (eobject->cred != NULL) { 44673364c323SKonstantin Belousov /* 44683364c323SKonstantin Belousov * The object was not shadowed. 44693364c323SKonstantin Belousov */ 4470ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 4471ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 4472ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 4473ef694c1aSEdward Tomasz Napierala } else if (entry->cred != NULL) { 447489f6b863SAttilio Rao VM_OBJECT_WLOCK(eobject); 4475ef694c1aSEdward Tomasz Napierala eobject->cred = entry->cred; 44763364c323SKonstantin Belousov eobject->charge = size; 447789f6b863SAttilio Rao VM_OBJECT_WUNLOCK(eobject); 4478ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 44793364c323SKonstantin Belousov } 44809917e010SAlan Cox 44819b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 44820d94caffSDavid Greenman } else { 4483df8bae1dSRodney W. Grimes /* 44840d94caffSDavid Greenman * We're attempting to read a copy-on-write page -- 44850d94caffSDavid Greenman * don't allow writes. 4486df8bae1dSRodney W. Grimes */ 44872d8acc0fSJohn Dyson prot &= ~VM_PROT_WRITE; 4488df8bae1dSRodney W. Grimes } 4489df8bae1dSRodney W. Grimes } 44902d8acc0fSJohn Dyson 4491df8bae1dSRodney W. Grimes /* 4492df8bae1dSRodney W. Grimes * Create an object if necessary. 4493df8bae1dSRodney W. Grimes */ 44944e71e795SMatthew Dillon if (entry->object.vm_object == NULL && 44954e71e795SMatthew Dillon !map->system_map) { 449625adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 4497df8bae1dSRodney W. Grimes goto RetryLookup; 449824a1cce3SDavid Greenman entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 44993364c323SKonstantin Belousov atop(size)); 4500df8bae1dSRodney W. Grimes entry->offset = 0; 4501ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 450289f6b863SAttilio Rao VM_OBJECT_WLOCK(entry->object.vm_object); 4503ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred = entry->cred; 45043364c323SKonstantin Belousov entry->object.vm_object->charge = size; 450589f6b863SAttilio Rao VM_OBJECT_WUNLOCK(entry->object.vm_object); 4506ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 45073364c323SKonstantin Belousov } 45089b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 4509df8bae1dSRodney W. Grimes } 4510b5b40fa6SJohn Dyson 4511df8bae1dSRodney W. Grimes /* 45120d94caffSDavid Greenman * Return the object/offset from this entry. If the entry was 45130d94caffSDavid Greenman * copy-on-write or empty, it has been fixed up. 4514df8bae1dSRodney W. Grimes */ 451510d9120cSKonstantin Belousov *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 4516df8bae1dSRodney W. Grimes *object = entry->object.vm_object; 4517df8bae1dSRodney W. Grimes 4518df8bae1dSRodney W. Grimes *out_prot = prot; 4519df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 4520df8bae1dSRodney W. Grimes } 4521df8bae1dSRodney W. Grimes 4522df8bae1dSRodney W. Grimes /* 452319dc5607STor Egge * vm_map_lookup_locked: 452419dc5607STor Egge * 452519dc5607STor Egge * Lookup the faulting address. A version of vm_map_lookup that returns 452619dc5607STor Egge * KERN_FAILURE instead of blocking on map lock or memory allocation. 452719dc5607STor Egge */ 452819dc5607STor Egge int 452919dc5607STor Egge vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ 453019dc5607STor Egge vm_offset_t vaddr, 453119dc5607STor Egge vm_prot_t fault_typea, 453219dc5607STor Egge vm_map_entry_t *out_entry, /* OUT */ 453319dc5607STor Egge vm_object_t *object, /* OUT */ 453419dc5607STor Egge vm_pindex_t *pindex, /* OUT */ 453519dc5607STor Egge vm_prot_t *out_prot, /* OUT */ 453619dc5607STor Egge boolean_t *wired) /* OUT */ 453719dc5607STor Egge { 453819dc5607STor Egge vm_map_entry_t entry; 453919dc5607STor Egge vm_map_t map = *var_map; 454019dc5607STor Egge vm_prot_t prot; 454119dc5607STor Egge vm_prot_t fault_type = fault_typea; 454219dc5607STor Egge 454319dc5607STor Egge /* 45444c3ef59eSAlan Cox * Lookup the faulting address. 454519dc5607STor Egge */ 454619dc5607STor Egge if (!vm_map_lookup_entry(map, vaddr, out_entry)) 454719dc5607STor Egge return (KERN_INVALID_ADDRESS); 454819dc5607STor Egge 454919dc5607STor Egge entry = *out_entry; 455019dc5607STor Egge 455119dc5607STor Egge /* 455219dc5607STor Egge * Fail if the entry refers to a submap. 455319dc5607STor Egge */ 455419dc5607STor Egge if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 455519dc5607STor Egge return (KERN_FAILURE); 455619dc5607STor Egge 455719dc5607STor Egge /* 455819dc5607STor Egge * Check whether this task is allowed to have this page. 455919dc5607STor Egge */ 456019dc5607STor Egge prot = entry->protection; 456119dc5607STor Egge fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 456219dc5607STor Egge if ((fault_type & prot) != fault_type) 456319dc5607STor Egge return (KERN_PROTECTION_FAILURE); 456419dc5607STor Egge 456519dc5607STor Egge /* 456619dc5607STor Egge * If this page is not pageable, we have to get it for all possible 456719dc5607STor Egge * accesses. 456819dc5607STor Egge */ 456919dc5607STor Egge *wired = (entry->wired_count != 0); 457019dc5607STor Egge if (*wired) 4571a6d42a0dSAlan Cox fault_type = entry->protection; 457219dc5607STor Egge 457319dc5607STor Egge if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 457419dc5607STor Egge /* 457519dc5607STor Egge * Fail if the entry was copy-on-write for a write fault. 457619dc5607STor Egge */ 457719dc5607STor Egge if (fault_type & VM_PROT_WRITE) 457819dc5607STor Egge return (KERN_FAILURE); 457919dc5607STor Egge /* 458019dc5607STor Egge * We're attempting to read a copy-on-write page -- 458119dc5607STor Egge * don't allow writes. 458219dc5607STor Egge */ 458319dc5607STor Egge prot &= ~VM_PROT_WRITE; 458419dc5607STor Egge } 458519dc5607STor Egge 458619dc5607STor Egge /* 458719dc5607STor Egge * Fail if an object should be created. 458819dc5607STor Egge */ 458919dc5607STor Egge if (entry->object.vm_object == NULL && !map->system_map) 459019dc5607STor Egge return (KERN_FAILURE); 459119dc5607STor Egge 459219dc5607STor Egge /* 459319dc5607STor Egge * Return the object/offset from this entry. If the entry was 459419dc5607STor Egge * copy-on-write or empty, it has been fixed up. 459519dc5607STor Egge */ 459610d9120cSKonstantin Belousov *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 459719dc5607STor Egge *object = entry->object.vm_object; 459819dc5607STor Egge 459919dc5607STor Egge *out_prot = prot; 460019dc5607STor Egge return (KERN_SUCCESS); 460119dc5607STor Egge } 460219dc5607STor Egge 460319dc5607STor Egge /* 4604df8bae1dSRodney W. Grimes * vm_map_lookup_done: 4605df8bae1dSRodney W. Grimes * 4606df8bae1dSRodney W. Grimes * Releases locks acquired by a vm_map_lookup 4607df8bae1dSRodney W. Grimes * (according to the handle returned by that lookup). 4608df8bae1dSRodney W. Grimes */ 46090d94caffSDavid Greenman void 46101b40f8c0SMatthew Dillon vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 4611df8bae1dSRodney W. Grimes { 4612df8bae1dSRodney W. Grimes /* 4613df8bae1dSRodney W. Grimes * Unlock the main-level map 4614df8bae1dSRodney W. Grimes */ 4615df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 4616df8bae1dSRodney W. Grimes } 4617df8bae1dSRodney W. Grimes 461819ea042eSKonstantin Belousov vm_offset_t 461919ea042eSKonstantin Belousov vm_map_max_KBI(const struct vm_map *map) 462019ea042eSKonstantin Belousov { 462119ea042eSKonstantin Belousov 4622f0165b1cSKonstantin Belousov return (vm_map_max(map)); 462319ea042eSKonstantin Belousov } 462419ea042eSKonstantin Belousov 462519ea042eSKonstantin Belousov vm_offset_t 462619ea042eSKonstantin Belousov vm_map_min_KBI(const struct vm_map *map) 462719ea042eSKonstantin Belousov { 462819ea042eSKonstantin Belousov 4629f0165b1cSKonstantin Belousov return (vm_map_min(map)); 463019ea042eSKonstantin Belousov } 463119ea042eSKonstantin Belousov 463219ea042eSKonstantin Belousov pmap_t 463319ea042eSKonstantin Belousov vm_map_pmap_KBI(vm_map_t map) 463419ea042eSKonstantin Belousov { 463519ea042eSKonstantin Belousov 463619ea042eSKonstantin Belousov return (map->pmap); 463719ea042eSKonstantin Belousov } 463819ea042eSKonstantin Belousov 4639c7c34a24SBruce Evans #include "opt_ddb.h" 4640c3cb3e12SDavid Greenman #ifdef DDB 4641c7c34a24SBruce Evans #include <sys/kernel.h> 4642c7c34a24SBruce Evans 4643c7c34a24SBruce Evans #include <ddb/ddb.h> 4644c7c34a24SBruce Evans 46452ebcd458SAttilio Rao static void 46462ebcd458SAttilio Rao vm_map_print(vm_map_t map) 4647df8bae1dSRodney W. Grimes { 4648c0877f10SJohn Dyson vm_map_entry_t entry; 4649c7c34a24SBruce Evans 4650e5f251d2SAlan Cox db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 4651e5f251d2SAlan Cox (void *)map, 4652101eeb7fSBruce Evans (void *)map->pmap, map->nentries, map->timestamp); 4653df8bae1dSRodney W. Grimes 4654c7c34a24SBruce Evans db_indent += 2; 4655df8bae1dSRodney W. Grimes for (entry = map->header.next; entry != &map->header; 4656df8bae1dSRodney W. Grimes entry = entry->next) { 465719bd0d9cSKonstantin Belousov db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n", 465819bd0d9cSKonstantin Belousov (void *)entry, (void *)entry->start, (void *)entry->end, 465919bd0d9cSKonstantin Belousov entry->eflags); 4660e5f251d2SAlan Cox { 4661df8bae1dSRodney W. Grimes static char *inheritance_name[4] = 4662df8bae1dSRodney W. Grimes {"share", "copy", "none", "donate_copy"}; 46630d94caffSDavid Greenman 466495e5e988SJohn Dyson db_iprintf(" prot=%x/%x/%s", 4665df8bae1dSRodney W. Grimes entry->protection, 4666df8bae1dSRodney W. Grimes entry->max_protection, 46678aef1712SMatthew Dillon inheritance_name[(int)(unsigned char)entry->inheritance]); 4668df8bae1dSRodney W. Grimes if (entry->wired_count != 0) 466995e5e988SJohn Dyson db_printf(", wired"); 4670df8bae1dSRodney W. Grimes } 46719fdfe602SMatthew Dillon if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 4672cd034a5bSMaxime Henrion db_printf(", share=%p, offset=0x%jx\n", 46739fdfe602SMatthew Dillon (void *)entry->object.sub_map, 4674cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 4675df8bae1dSRodney W. Grimes if ((entry->prev == &map->header) || 46769fdfe602SMatthew Dillon (entry->prev->object.sub_map != 46779fdfe602SMatthew Dillon entry->object.sub_map)) { 4678c7c34a24SBruce Evans db_indent += 2; 46792ebcd458SAttilio Rao vm_map_print((vm_map_t)entry->object.sub_map); 4680c7c34a24SBruce Evans db_indent -= 2; 4681df8bae1dSRodney W. Grimes } 46820d94caffSDavid Greenman } else { 4683ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) 4684ef694c1aSEdward Tomasz Napierala db_printf(", ruid %d", entry->cred->cr_ruid); 4685cd034a5bSMaxime Henrion db_printf(", object=%p, offset=0x%jx", 4686101eeb7fSBruce Evans (void *)entry->object.vm_object, 4687cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 4688ef694c1aSEdward Tomasz Napierala if (entry->object.vm_object && entry->object.vm_object->cred) 4689ef694c1aSEdward Tomasz Napierala db_printf(", obj ruid %d charge %jx", 4690ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred->cr_ruid, 46913364c323SKonstantin Belousov (uintmax_t)entry->object.vm_object->charge); 4692afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_COW) 4693c7c34a24SBruce Evans db_printf(", copy (%s)", 4694afa07f7eSJohn Dyson (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 4695c7c34a24SBruce Evans db_printf("\n"); 4696df8bae1dSRodney W. Grimes 4697df8bae1dSRodney W. Grimes if ((entry->prev == &map->header) || 4698df8bae1dSRodney W. Grimes (entry->prev->object.vm_object != 4699df8bae1dSRodney W. Grimes entry->object.vm_object)) { 4700c7c34a24SBruce Evans db_indent += 2; 4701101eeb7fSBruce Evans vm_object_print((db_expr_t)(intptr_t) 4702101eeb7fSBruce Evans entry->object.vm_object, 470344bbc3b7SKonstantin Belousov 0, 0, (char *)0); 4704c7c34a24SBruce Evans db_indent -= 2; 4705df8bae1dSRodney W. Grimes } 4706df8bae1dSRodney W. Grimes } 4707df8bae1dSRodney W. Grimes } 4708c7c34a24SBruce Evans db_indent -= 2; 4709df8bae1dSRodney W. Grimes } 471095e5e988SJohn Dyson 47112ebcd458SAttilio Rao DB_SHOW_COMMAND(map, map) 47122ebcd458SAttilio Rao { 47132ebcd458SAttilio Rao 47142ebcd458SAttilio Rao if (!have_addr) { 47152ebcd458SAttilio Rao db_printf("usage: show map <addr>\n"); 47162ebcd458SAttilio Rao return; 47172ebcd458SAttilio Rao } 47182ebcd458SAttilio Rao vm_map_print((vm_map_t)addr); 47192ebcd458SAttilio Rao } 472095e5e988SJohn Dyson 472195e5e988SJohn Dyson DB_SHOW_COMMAND(procvm, procvm) 472295e5e988SJohn Dyson { 472395e5e988SJohn Dyson struct proc *p; 472495e5e988SJohn Dyson 472595e5e988SJohn Dyson if (have_addr) { 4726a9546a6bSJohn Baldwin p = db_lookup_proc(addr); 472795e5e988SJohn Dyson } else { 472895e5e988SJohn Dyson p = curproc; 472995e5e988SJohn Dyson } 473095e5e988SJohn Dyson 4731ac1e407bSBruce Evans db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 4732ac1e407bSBruce Evans (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 4733b1028ad1SLuoqi Chen (void *)vmspace_pmap(p->p_vmspace)); 473495e5e988SJohn Dyson 47352ebcd458SAttilio Rao vm_map_print((vm_map_t)&p->p_vmspace->vm_map); 473695e5e988SJohn Dyson } 473795e5e988SJohn Dyson 4738c7c34a24SBruce Evans #endif /* DDB */ 4739