160727d8bSWarner Losh /*- 2796df753SPedro F. Giffuni * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 8df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 9df8bae1dSRodney W. Grimes * 10df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 11df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 12df8bae1dSRodney W. Grimes * are met: 13df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 15df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 16df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 17df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 18fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 19df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 20df8bae1dSRodney W. Grimes * without specific prior written permission. 21df8bae1dSRodney W. Grimes * 22df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32df8bae1dSRodney W. Grimes * SUCH DAMAGE. 33df8bae1dSRodney W. Grimes * 343c4dd356SDavid Greenman * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 35df8bae1dSRodney W. Grimes * 36df8bae1dSRodney W. Grimes * 37df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38df8bae1dSRodney W. Grimes * All rights reserved. 39df8bae1dSRodney W. Grimes * 40df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41df8bae1dSRodney W. Grimes * 42df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 43df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 44df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 45df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 46df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 47df8bae1dSRodney W. Grimes * 48df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 53df8bae1dSRodney W. Grimes * 54df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55df8bae1dSRodney W. Grimes * School of Computer Science 56df8bae1dSRodney W. Grimes * Carnegie Mellon University 57df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 60df8bae1dSRodney W. Grimes * rights to redistribute these changes. 61df8bae1dSRodney W. Grimes */ 62df8bae1dSRodney W. Grimes 63df8bae1dSRodney W. Grimes /* 64df8bae1dSRodney W. Grimes * Virtual memory mapping module. 65df8bae1dSRodney W. Grimes */ 66df8bae1dSRodney W. Grimes 67874651b1SDavid E. O'Brien #include <sys/cdefs.h> 68874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 69874651b1SDavid E. O'Brien 70df8bae1dSRodney W. Grimes #include <sys/param.h> 71df8bae1dSRodney W. Grimes #include <sys/systm.h> 72156e8654SKonstantin Belousov #include <sys/elf.h> 739a6d144fSKonstantin Belousov #include <sys/kernel.h> 7461d80e90SJohn Baldwin #include <sys/ktr.h> 75fb919e4dSMark Murray #include <sys/lock.h> 76fb919e4dSMark Murray #include <sys/mutex.h> 77b5e8ce9fSBruce Evans #include <sys/proc.h> 78efeaf95aSDavid Greenman #include <sys/vmmeter.h> 79867a482dSJohn Dyson #include <sys/mman.h> 801efb74fbSJohn Dyson #include <sys/vnode.h> 811ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 822267af78SJulian Elischer #include <sys/resourcevar.h> 8389f6b863SAttilio Rao #include <sys/rwlock.h> 843fde38dfSMike Silbersack #include <sys/file.h> 859a6d144fSKonstantin Belousov #include <sys/sysctl.h> 8605ba50f5SJake Burkholder #include <sys/sysent.h> 873db161e0SMatthew Dillon #include <sys/shm.h> 88df8bae1dSRodney W. Grimes 89df8bae1dSRodney W. Grimes #include <vm/vm.h> 90efeaf95aSDavid Greenman #include <vm/vm_param.h> 91efeaf95aSDavid Greenman #include <vm/pmap.h> 92efeaf95aSDavid Greenman #include <vm/vm_map.h> 93df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 9454a3a114SMark Johnston #include <vm/vm_pageout.h> 95df8bae1dSRodney W. Grimes #include <vm/vm_object.h> 9647221757SJohn Dyson #include <vm/vm_pager.h> 9726f9a767SRodney W. Grimes #include <vm/vm_kern.h> 98efeaf95aSDavid Greenman #include <vm/vm_extern.h> 9984110e7eSKonstantin Belousov #include <vm/vnode_pager.h> 10021cd6e62SSeigo Tanimura #include <vm/swap_pager.h> 101670d17b5SJeff Roberson #include <vm/uma.h> 102df8bae1dSRodney W. Grimes 103df8bae1dSRodney W. Grimes /* 104df8bae1dSRodney W. Grimes * Virtual memory maps provide for the mapping, protection, 105df8bae1dSRodney W. Grimes * and sharing of virtual memory objects. In addition, 106df8bae1dSRodney W. Grimes * this module provides for an efficient virtual copy of 107df8bae1dSRodney W. Grimes * memory from one map to another. 108df8bae1dSRodney W. Grimes * 109df8bae1dSRodney W. Grimes * Synchronization is required prior to most operations. 110df8bae1dSRodney W. Grimes * 111df8bae1dSRodney W. Grimes * Maps consist of an ordered doubly-linked list of simple 112e2abaaaaSAlan Cox * entries; a self-adjusting binary search tree of these 113e2abaaaaSAlan Cox * entries is used to speed up lookups. 114df8bae1dSRodney W. Grimes * 115956f3135SPhilippe Charnier * Since portions of maps are specified by start/end addresses, 116df8bae1dSRodney W. Grimes * which may not align with existing map entries, all 117df8bae1dSRodney W. Grimes * routines merely "clip" entries to these start/end values. 118df8bae1dSRodney W. Grimes * [That is, an entry is split into two, bordering at a 119df8bae1dSRodney W. Grimes * start or end value.] Note that these clippings may not 120df8bae1dSRodney W. Grimes * always be necessary (as the two resulting entries are then 121df8bae1dSRodney W. Grimes * not changed); however, the clipping is done for convenience. 122df8bae1dSRodney W. Grimes * 123df8bae1dSRodney W. Grimes * As mentioned above, virtual copy operations are performed 124ad5fca3bSAlan Cox * by copying VM object references from one map to 125df8bae1dSRodney W. Grimes * another, and then marking both regions as copy-on-write. 126df8bae1dSRodney W. Grimes */ 127df8bae1dSRodney W. Grimes 1283a92e5d5SAlan Cox static struct mtx map_sleep_mtx; 1298355f576SJeff Roberson static uma_zone_t mapentzone; 1308355f576SJeff Roberson static uma_zone_t kmapentzone; 1318355f576SJeff Roberson static uma_zone_t vmspace_zone; 132b23f72e9SBrian Feldman static int vmspace_zinit(void *mem, int size, int flags); 13392351f16SAlan Cox static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, 13492351f16SAlan Cox vm_offset_t max); 1350b367bd8SKonstantin Belousov static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); 136655c3490SKonstantin Belousov static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); 13703462509SAlan Cox static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry); 13819bd0d9cSKonstantin Belousov static int vm_map_growstack(vm_map_t map, vm_offset_t addr, 13919bd0d9cSKonstantin Belousov vm_map_entry_t gap_entry); 140077ec27cSAlan Cox static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 141077ec27cSAlan Cox vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags); 1428355f576SJeff Roberson #ifdef INVARIANTS 1438355f576SJeff Roberson static void vmspace_zdtor(void *mem, int size, void *arg); 1448355f576SJeff Roberson #endif 1454648ba0aSKonstantin Belousov static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, 1464648ba0aSKonstantin Belousov vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, 1474648ba0aSKonstantin Belousov int cow); 14866cd575bSAlan Cox static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 14966cd575bSAlan Cox vm_offset_t failed_addr); 150b18bfc3dSJohn Dyson 1510fb6aae7SKonstantin Belousov #define CONTAINS_BITS(set, bits) ((~(set) & (bits)) == 0) 1520fb6aae7SKonstantin Belousov 153ef694c1aSEdward Tomasz Napierala #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ 154ef694c1aSEdward Tomasz Napierala ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ 1553364c323SKonstantin Belousov !((e)->eflags & MAP_ENTRY_NEEDS_COPY))) 1563364c323SKonstantin Belousov 15757051fdcSTor Egge /* 15857051fdcSTor Egge * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type 15957051fdcSTor Egge * stable. 16057051fdcSTor Egge */ 16157051fdcSTor Egge #define PROC_VMSPACE_LOCK(p) do { } while (0) 16257051fdcSTor Egge #define PROC_VMSPACE_UNLOCK(p) do { } while (0) 16357051fdcSTor Egge 164d239bd3cSKonstantin Belousov /* 165d239bd3cSKonstantin Belousov * VM_MAP_RANGE_CHECK: [ internal use only ] 166d239bd3cSKonstantin Belousov * 167d239bd3cSKonstantin Belousov * Asserts that the starting and ending region 168d239bd3cSKonstantin Belousov * addresses fall within the valid range of the map. 169d239bd3cSKonstantin Belousov */ 170d239bd3cSKonstantin Belousov #define VM_MAP_RANGE_CHECK(map, start, end) \ 171d239bd3cSKonstantin Belousov { \ 172d239bd3cSKonstantin Belousov if (start < vm_map_min(map)) \ 173d239bd3cSKonstantin Belousov start = vm_map_min(map); \ 174d239bd3cSKonstantin Belousov if (end > vm_map_max(map)) \ 175d239bd3cSKonstantin Belousov end = vm_map_max(map); \ 176d239bd3cSKonstantin Belousov if (start > end) \ 177d239bd3cSKonstantin Belousov start = end; \ 178d239bd3cSKonstantin Belousov } 179d239bd3cSKonstantin Belousov 18020f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC 18120f02659SMark Johnston 18220f02659SMark Johnston /* 18320f02659SMark Johnston * Allocate a new slab for kernel map entries. The kernel map may be locked or 18420f02659SMark Johnston * unlocked, depending on whether the request is coming from the kernel map or a 18520f02659SMark Johnston * submap. This function allocates a virtual address range directly from the 18620f02659SMark Johnston * kernel map instead of the kmem_* layer to avoid recursion on the kernel map 18720f02659SMark Johnston * lock and also to avoid triggering allocator recursion in the vmem boundary 18820f02659SMark Johnston * tag allocator. 18920f02659SMark Johnston */ 19020f02659SMark Johnston static void * 19120f02659SMark Johnston kmapent_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, 19220f02659SMark Johnston int wait) 19320f02659SMark Johnston { 19420f02659SMark Johnston vm_offset_t addr; 19520f02659SMark Johnston int error, locked; 19620f02659SMark Johnston 19720f02659SMark Johnston *pflag = UMA_SLAB_PRIV; 19820f02659SMark Johnston 19920f02659SMark Johnston if (!(locked = vm_map_locked(kernel_map))) 20020f02659SMark Johnston vm_map_lock(kernel_map); 20120f02659SMark Johnston addr = vm_map_findspace(kernel_map, vm_map_min(kernel_map), bytes); 20220f02659SMark Johnston if (addr + bytes < addr || addr + bytes > vm_map_max(kernel_map)) 20320f02659SMark Johnston panic("%s: kernel map is exhausted", __func__); 20420f02659SMark Johnston error = vm_map_insert(kernel_map, NULL, 0, addr, addr + bytes, 20520f02659SMark Johnston VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); 20620f02659SMark Johnston if (error != KERN_SUCCESS) 20720f02659SMark Johnston panic("%s: vm_map_insert() failed: %d", __func__, error); 20820f02659SMark Johnston if (!locked) 20920f02659SMark Johnston vm_map_unlock(kernel_map); 21020f02659SMark Johnston error = kmem_back_domain(domain, kernel_object, addr, bytes, M_NOWAIT | 21120f02659SMark Johnston M_USE_RESERVE | (wait & M_ZERO)); 21220f02659SMark Johnston if (error == KERN_SUCCESS) { 21320f02659SMark Johnston return ((void *)addr); 21420f02659SMark Johnston } else { 21520f02659SMark Johnston if (!locked) 21620f02659SMark Johnston vm_map_lock(kernel_map); 21720f02659SMark Johnston vm_map_delete(kernel_map, addr, bytes); 21820f02659SMark Johnston if (!locked) 21920f02659SMark Johnston vm_map_unlock(kernel_map); 22020f02659SMark Johnston return (NULL); 22120f02659SMark Johnston } 22220f02659SMark Johnston } 22320f02659SMark Johnston 22420f02659SMark Johnston static void 22520f02659SMark Johnston kmapent_free(void *item, vm_size_t size, uint8_t pflag) 22620f02659SMark Johnston { 22720f02659SMark Johnston vm_offset_t addr; 228b8ebd99aSJohn Baldwin int error __diagused; 22920f02659SMark Johnston 23020f02659SMark Johnston if ((pflag & UMA_SLAB_PRIV) == 0) 23120f02659SMark Johnston /* XXX leaked */ 23220f02659SMark Johnston return; 23320f02659SMark Johnston 23420f02659SMark Johnston addr = (vm_offset_t)item; 23520f02659SMark Johnston kmem_unback(kernel_object, addr, size); 23620f02659SMark Johnston error = vm_map_remove(kernel_map, addr, addr + size); 23720f02659SMark Johnston KASSERT(error == KERN_SUCCESS, 23820f02659SMark Johnston ("%s: vm_map_remove failed: %d", __func__, error)); 23920f02659SMark Johnston } 24020f02659SMark Johnston 24120f02659SMark Johnston /* 24220f02659SMark Johnston * The worst-case upper bound on the number of kernel map entries that may be 24320f02659SMark Johnston * created before the zone must be replenished in _vm_map_unlock(). 24420f02659SMark Johnston */ 24520f02659SMark Johnston #define KMAPENT_RESERVE 1 24620f02659SMark Johnston 24720f02659SMark Johnston #endif /* !UMD_MD_SMALL_ALLOC */ 24820f02659SMark Johnston 2496fecb26bSKonstantin Belousov /* 2506fecb26bSKonstantin Belousov * vm_map_startup: 2516fecb26bSKonstantin Belousov * 25220f02659SMark Johnston * Initialize the vm_map module. Must be called before any other vm_map 25320f02659SMark Johnston * routines. 2546fecb26bSKonstantin Belousov * 25520f02659SMark Johnston * User map and entry structures are allocated from the general purpose 25620f02659SMark Johnston * memory pool. Kernel maps are statically defined. Kernel map entries 25720f02659SMark Johnston * require special handling to avoid recursion; see the comments above 25820f02659SMark Johnston * kmapent_alloc() and in vm_map_entry_create(). 2596fecb26bSKonstantin Belousov */ 2600d94caffSDavid Greenman void 2611b40f8c0SMatthew Dillon vm_map_startup(void) 262df8bae1dSRodney W. Grimes { 2633a92e5d5SAlan Cox mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF); 26420f02659SMark Johnston 26520f02659SMark Johnston /* 26620f02659SMark Johnston * Disable the use of per-CPU buckets: map entry allocation is 26720f02659SMark Johnston * serialized by the kernel map lock. 26820f02659SMark Johnston */ 269670d17b5SJeff Roberson kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 27018aa2de5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 27120f02659SMark Johnston UMA_ZONE_VM | UMA_ZONE_NOBUCKET); 27220f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC 27320f02659SMark Johnston /* Reserve an extra map entry for use when replenishing the reserve. */ 27420f02659SMark Johnston uma_zone_reserve(kmapentzone, KMAPENT_RESERVE + 1); 27520f02659SMark Johnston uma_prealloc(kmapentzone, KMAPENT_RESERVE + 1); 27620f02659SMark Johnston uma_zone_set_allocf(kmapentzone, kmapent_alloc); 27720f02659SMark Johnston uma_zone_set_freef(kmapentzone, kmapent_free); 27820f02659SMark Johnston #endif 27920f02659SMark Johnston 280670d17b5SJeff Roberson mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 281670d17b5SJeff Roberson NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2825df87b21SJeff Roberson vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 2835df87b21SJeff Roberson #ifdef INVARIANTS 2845df87b21SJeff Roberson vmspace_zdtor, 2855df87b21SJeff Roberson #else 2865df87b21SJeff Roberson NULL, 2875df87b21SJeff Roberson #endif 288f872f6eaSAlan Cox vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 2898355f576SJeff Roberson } 2908355f576SJeff Roberson 291b23f72e9SBrian Feldman static int 292b23f72e9SBrian Feldman vmspace_zinit(void *mem, int size, int flags) 2938355f576SJeff Roberson { 2948355f576SJeff Roberson struct vmspace *vm; 2958355f576SJeff Roberson vm_map_t map; 2968355f576SJeff Roberson 2977dd979dfSMark Johnston vm = (struct vmspace *)mem; 2987dd979dfSMark Johnston map = &vm->vm_map; 2997dd979dfSMark Johnston 300763d9566STim Kientzle memset(map, 0, sizeof(*map)); 3017dd979dfSMark Johnston mtx_init(&map->system_mtx, "vm map (system)", NULL, 3027dd979dfSMark Johnston MTX_DEF | MTX_DUPOK); 303e30df26eSAlan Cox sx_init(&map->lock, "vm map (user)"); 3047dd979dfSMark Johnston PMAP_LOCK_INIT(vmspace_pmap(vm)); 305b23f72e9SBrian Feldman return (0); 3068355f576SJeff Roberson } 3078355f576SJeff Roberson 3088355f576SJeff Roberson #ifdef INVARIANTS 3098355f576SJeff Roberson static void 3108355f576SJeff Roberson vmspace_zdtor(void *mem, int size, void *arg) 3118355f576SJeff Roberson { 3128355f576SJeff Roberson struct vmspace *vm; 3138355f576SJeff Roberson 3148355f576SJeff Roberson vm = (struct vmspace *)mem; 3157dd979dfSMark Johnston KASSERT(vm->vm_map.nentries == 0, 3167dd979dfSMark Johnston ("vmspace %p nentries == %d on free", vm, vm->vm_map.nentries)); 3177dd979dfSMark Johnston KASSERT(vm->vm_map.size == 0, 3187dd979dfSMark Johnston ("vmspace %p size == %ju on free", vm, (uintmax_t)vm->vm_map.size)); 3198355f576SJeff Roberson } 3208355f576SJeff Roberson #endif /* INVARIANTS */ 3218355f576SJeff Roberson 322df8bae1dSRodney W. Grimes /* 323df8bae1dSRodney W. Grimes * Allocate a vmspace structure, including a vm_map and pmap, 324df8bae1dSRodney W. Grimes * and initialize those structures. The refcnt is set to 1. 325df8bae1dSRodney W. Grimes */ 326df8bae1dSRodney W. Grimes struct vmspace * 32774d1d2b7SNeel Natu vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) 328df8bae1dSRodney W. Grimes { 329c0877f10SJohn Dyson struct vmspace *vm; 3300d94caffSDavid Greenman 331a163d034SWarner Losh vm = uma_zalloc(vmspace_zone, M_WAITOK); 33274d1d2b7SNeel Natu KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL")); 33374d1d2b7SNeel Natu if (!pinit(vmspace_pmap(vm))) { 33489b57fcfSKonstantin Belousov uma_zfree(vmspace_zone, vm); 33589b57fcfSKonstantin Belousov return (NULL); 33689b57fcfSKonstantin Belousov } 33721c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_alloc: %p", vm); 33892351f16SAlan Cox _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); 339f7db0c95SMark Johnston refcount_init(&vm->vm_refcnt, 1); 3402d8acc0fSJohn Dyson vm->vm_shm = NULL; 34151ab6c28SAlan Cox vm->vm_swrss = 0; 34251ab6c28SAlan Cox vm->vm_tsize = 0; 34351ab6c28SAlan Cox vm->vm_dsize = 0; 34451ab6c28SAlan Cox vm->vm_ssize = 0; 34551ab6c28SAlan Cox vm->vm_taddr = 0; 34651ab6c28SAlan Cox vm->vm_daddr = 0; 34751ab6c28SAlan Cox vm->vm_maxsaddr = 0; 348df8bae1dSRodney W. Grimes return (vm); 349df8bae1dSRodney W. Grimes } 350df8bae1dSRodney W. Grimes 3514b5c9cf6SEdward Tomasz Napierala #ifdef RACCT 3521ba5ad42SEdward Tomasz Napierala static void 3531ba5ad42SEdward Tomasz Napierala vmspace_container_reset(struct proc *p) 3541ba5ad42SEdward Tomasz Napierala { 3551ba5ad42SEdward Tomasz Napierala 3561ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 3571ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_DATA, 0); 3581ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_STACK, 0); 3591ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_RSS, 0); 3601ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_MEMLOCK, 0); 3611ba5ad42SEdward Tomasz Napierala racct_set(p, RACCT_VMEM, 0); 3621ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 3631ba5ad42SEdward Tomasz Napierala } 3644b5c9cf6SEdward Tomasz Napierala #endif 3651ba5ad42SEdward Tomasz Napierala 36662a59e8fSWarner Losh static inline void 367582ec34cSAlfred Perlstein vmspace_dofree(struct vmspace *vm) 368df8bae1dSRodney W. Grimes { 3690ef12795SAlan Cox 37021c641b2SJohn Baldwin CTR1(KTR_VM, "vmspace_free: %p", vm); 3713db161e0SMatthew Dillon 3723db161e0SMatthew Dillon /* 3733db161e0SMatthew Dillon * Make sure any SysV shm is freed, it might not have been in 3743db161e0SMatthew Dillon * exit1(). 3753db161e0SMatthew Dillon */ 3763db161e0SMatthew Dillon shmexit(vm); 3773db161e0SMatthew Dillon 37830dcfc09SJohn Dyson /* 379df8bae1dSRodney W. Grimes * Lock the map, to wait out all other references to it. 3800d94caffSDavid Greenman * Delete all of the mappings and pages they hold, then call 3810d94caffSDavid Greenman * the pmap module to reclaim anything left. 382df8bae1dSRodney W. Grimes */ 383f0165b1cSKonstantin Belousov (void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), 384f0165b1cSKonstantin Belousov vm_map_max(&vm->vm_map)); 3858355f576SJeff Roberson 3860ef12795SAlan Cox pmap_release(vmspace_pmap(vm)); 3870ef12795SAlan Cox vm->vm_map.pmap = NULL; 3888355f576SJeff Roberson uma_zfree(vmspace_zone, vm); 389df8bae1dSRodney W. Grimes } 390582ec34cSAlfred Perlstein 391582ec34cSAlfred Perlstein void 392582ec34cSAlfred Perlstein vmspace_free(struct vmspace *vm) 393582ec34cSAlfred Perlstein { 394582ec34cSAlfred Perlstein 395423521aaSRyan Stone WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 396164a37a5SJohn Baldwin "vmspace_free() called"); 397423521aaSRyan Stone 398f7db0c95SMark Johnston if (refcount_release(&vm->vm_refcnt)) 399582ec34cSAlfred Perlstein vmspace_dofree(vm); 400582ec34cSAlfred Perlstein } 401582ec34cSAlfred Perlstein 402582ec34cSAlfred Perlstein void 403582ec34cSAlfred Perlstein vmspace_exitfree(struct proc *p) 404582ec34cSAlfred Perlstein { 405334f7061SPeter Wemm struct vmspace *vm; 406582ec34cSAlfred Perlstein 40757051fdcSTor Egge PROC_VMSPACE_LOCK(p); 408334f7061SPeter Wemm vm = p->p_vmspace; 409334f7061SPeter Wemm p->p_vmspace = NULL; 41057051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 41157051fdcSTor Egge KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); 41257051fdcSTor Egge vmspace_free(vm); 41357051fdcSTor Egge } 41457051fdcSTor Egge 41557051fdcSTor Egge void 41657051fdcSTor Egge vmspace_exit(struct thread *td) 41757051fdcSTor Egge { 41857051fdcSTor Egge struct vmspace *vm; 41957051fdcSTor Egge struct proc *p; 420f7db0c95SMark Johnston bool released; 42157051fdcSTor Egge 42257051fdcSTor Egge p = td->td_proc; 42357051fdcSTor Egge vm = p->p_vmspace; 424f7db0c95SMark Johnston 425f7db0c95SMark Johnston /* 426f7db0c95SMark Johnston * Prepare to release the vmspace reference. The thread that releases 427f7db0c95SMark Johnston * the last reference is responsible for tearing down the vmspace. 428f7db0c95SMark Johnston * However, threads not releasing the final reference must switch to the 429f7db0c95SMark Johnston * kernel's vmspace0 before the decrement so that the subsequent pmap 430f7db0c95SMark Johnston * deactivation does not modify a freed vmspace. 431f7db0c95SMark Johnston */ 432f7db0c95SMark Johnston refcount_acquire(&vmspace0.vm_refcnt); 433f7db0c95SMark Johnston if (!(released = refcount_release_if_last(&vm->vm_refcnt))) { 434f7db0c95SMark Johnston if (p->p_vmspace != &vmspace0) { 43557051fdcSTor Egge PROC_VMSPACE_LOCK(p); 43657051fdcSTor Egge p->p_vmspace = &vmspace0; 43757051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 43857051fdcSTor Egge pmap_activate(td); 43957051fdcSTor Egge } 440f7db0c95SMark Johnston released = refcount_release(&vm->vm_refcnt); 441f7db0c95SMark Johnston } 442f7db0c95SMark Johnston if (released) { 443f7db0c95SMark Johnston /* 444f7db0c95SMark Johnston * pmap_remove_pages() expects the pmap to be active, so switch 445f7db0c95SMark Johnston * back first if necessary. 446f7db0c95SMark Johnston */ 44757051fdcSTor Egge if (p->p_vmspace != vm) { 44857051fdcSTor Egge PROC_VMSPACE_LOCK(p); 44957051fdcSTor Egge p->p_vmspace = vm; 45057051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 45157051fdcSTor Egge pmap_activate(td); 45257051fdcSTor Egge } 45357051fdcSTor Egge pmap_remove_pages(vmspace_pmap(vm)); 45457051fdcSTor Egge PROC_VMSPACE_LOCK(p); 45557051fdcSTor Egge p->p_vmspace = &vmspace0; 45657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 45757051fdcSTor Egge pmap_activate(td); 458334f7061SPeter Wemm vmspace_dofree(vm); 459334f7061SPeter Wemm } 4604b5c9cf6SEdward Tomasz Napierala #ifdef RACCT 4614b5c9cf6SEdward Tomasz Napierala if (racct_enable) 4621ba5ad42SEdward Tomasz Napierala vmspace_container_reset(p); 4634b5c9cf6SEdward Tomasz Napierala #endif 46457051fdcSTor Egge } 46557051fdcSTor Egge 46657051fdcSTor Egge /* Acquire reference to vmspace owned by another process. */ 46757051fdcSTor Egge 46857051fdcSTor Egge struct vmspace * 46957051fdcSTor Egge vmspace_acquire_ref(struct proc *p) 47057051fdcSTor Egge { 47157051fdcSTor Egge struct vmspace *vm; 47257051fdcSTor Egge 47357051fdcSTor Egge PROC_VMSPACE_LOCK(p); 47457051fdcSTor Egge vm = p->p_vmspace; 475f7db0c95SMark Johnston if (vm == NULL || !refcount_acquire_if_not_zero(&vm->vm_refcnt)) { 47657051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 47757051fdcSTor Egge return (NULL); 47857051fdcSTor Egge } 47957051fdcSTor Egge if (vm != p->p_vmspace) { 48057051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 48157051fdcSTor Egge vmspace_free(vm); 48257051fdcSTor Egge return (NULL); 48357051fdcSTor Egge } 48457051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 48557051fdcSTor Egge return (vm); 48657051fdcSTor Egge } 487df8bae1dSRodney W. Grimes 4888a4dc40fSJohn Baldwin /* 4898a4dc40fSJohn Baldwin * Switch between vmspaces in an AIO kernel process. 4908a4dc40fSJohn Baldwin * 4910b96ca33SJohn Baldwin * The new vmspace is either the vmspace of a user process obtained 4920b96ca33SJohn Baldwin * from an active AIO request or the initial vmspace of the AIO kernel 4930b96ca33SJohn Baldwin * process (when it is idling). Because user processes will block to 4940b96ca33SJohn Baldwin * drain any active AIO requests before proceeding in exit() or 4950b96ca33SJohn Baldwin * execve(), the reference count for vmspaces from AIO requests can 4960b96ca33SJohn Baldwin * never be 0. Similarly, AIO kernel processes hold an extra 4970b96ca33SJohn Baldwin * reference on their initial vmspace for the life of the process. As 4980b96ca33SJohn Baldwin * a result, the 'newvm' vmspace always has a non-zero reference 4990b96ca33SJohn Baldwin * count. This permits an additional reference on 'newvm' to be 5000b96ca33SJohn Baldwin * acquired via a simple atomic increment rather than the loop in 5010b96ca33SJohn Baldwin * vmspace_acquire_ref() above. 5028a4dc40fSJohn Baldwin */ 5038a4dc40fSJohn Baldwin void 5048a4dc40fSJohn Baldwin vmspace_switch_aio(struct vmspace *newvm) 5058a4dc40fSJohn Baldwin { 5068a4dc40fSJohn Baldwin struct vmspace *oldvm; 5078a4dc40fSJohn Baldwin 5088a4dc40fSJohn Baldwin /* XXX: Need some way to assert that this is an aio daemon. */ 5098a4dc40fSJohn Baldwin 510f7db0c95SMark Johnston KASSERT(refcount_load(&newvm->vm_refcnt) > 0, 5118a4dc40fSJohn Baldwin ("vmspace_switch_aio: newvm unreferenced")); 5128a4dc40fSJohn Baldwin 5138a4dc40fSJohn Baldwin oldvm = curproc->p_vmspace; 5148a4dc40fSJohn Baldwin if (oldvm == newvm) 5158a4dc40fSJohn Baldwin return; 5168a4dc40fSJohn Baldwin 5178a4dc40fSJohn Baldwin /* 5188a4dc40fSJohn Baldwin * Point to the new address space and refer to it. 5198a4dc40fSJohn Baldwin */ 5208a4dc40fSJohn Baldwin curproc->p_vmspace = newvm; 521f7db0c95SMark Johnston refcount_acquire(&newvm->vm_refcnt); 5228a4dc40fSJohn Baldwin 5238a4dc40fSJohn Baldwin /* Activate the new mapping. */ 5248a4dc40fSJohn Baldwin pmap_activate(curthread); 5258a4dc40fSJohn Baldwin 5268a4dc40fSJohn Baldwin vmspace_free(oldvm); 5278a4dc40fSJohn Baldwin } 5288a4dc40fSJohn Baldwin 5291b40f8c0SMatthew Dillon void 530780b1c09SAlan Cox _vm_map_lock(vm_map_t map, const char *file, int line) 5311b40f8c0SMatthew Dillon { 532bc91c510SAlan Cox 53393bc4879SAlan Cox if (map->system_map) 534ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 53512c64974SMaxime Henrion else 5369fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 5371b40f8c0SMatthew Dillon map->timestamp++; 5381b40f8c0SMatthew Dillon } 5391b40f8c0SMatthew Dillon 54078022527SKonstantin Belousov void 54178022527SKonstantin Belousov vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add) 54278022527SKonstantin Belousov { 54367388836SKonstantin Belousov vm_object_t object; 54478022527SKonstantin Belousov struct vnode *vp; 54567388836SKonstantin Belousov bool vp_held; 54678022527SKonstantin Belousov 54778022527SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0) 54878022527SKonstantin Belousov return; 54978022527SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 55078022527SKonstantin Belousov ("Submap with execs")); 55178022527SKonstantin Belousov object = entry->object.vm_object; 55278022527SKonstantin Belousov KASSERT(object != NULL, ("No object for text, entry %p", entry)); 55367388836SKonstantin Belousov if ((object->flags & OBJ_ANON) != 0) 55467388836SKonstantin Belousov object = object->handle; 55567388836SKonstantin Belousov else 55667388836SKonstantin Belousov KASSERT(object->backing_object == NULL, 55767388836SKonstantin Belousov ("non-anon object %p shadows", object)); 55867388836SKonstantin Belousov KASSERT(object != NULL, ("No content object for text, entry %p obj %p", 55967388836SKonstantin Belousov entry, entry->object.vm_object)); 56078022527SKonstantin Belousov 56167388836SKonstantin Belousov /* 56267388836SKonstantin Belousov * Mostly, we do not lock the backing object. It is 56367388836SKonstantin Belousov * referenced by the entry we are processing, so it cannot go 56467388836SKonstantin Belousov * away. 56567388836SKonstantin Belousov */ 566192112b7SKonstantin Belousov vm_pager_getvp(object, &vp, &vp_held); 56732d2014dSKonstantin Belousov if (vp != NULL) { 568bb9e2184SKonstantin Belousov if (add) { 56978022527SKonstantin Belousov VOP_SET_TEXT_CHECKED(vp); 570bb9e2184SKonstantin Belousov } else { 571bb9e2184SKonstantin Belousov vn_lock(vp, LK_SHARED | LK_RETRY); 572bb9e2184SKonstantin Belousov VOP_UNSET_TEXT_CHECKED(vp); 573b249ce48SMateusz Guzik VOP_UNLOCK(vp); 574bb9e2184SKonstantin Belousov } 57567388836SKonstantin Belousov if (vp_held) 57667388836SKonstantin Belousov vdrop(vp); 577bb9e2184SKonstantin Belousov } 57878022527SKonstantin Belousov } 57978022527SKonstantin Belousov 5807cdcf863SDoug Moore /* 5817cdcf863SDoug Moore * Use a different name for this vm_map_entry field when it's use 5827cdcf863SDoug Moore * is not consistent with its use as part of an ordered search tree. 5837cdcf863SDoug Moore */ 5847cdcf863SDoug Moore #define defer_next right 5857cdcf863SDoug Moore 5860b367bd8SKonstantin Belousov static void 5870b367bd8SKonstantin Belousov vm_map_process_deferred(void) 5880e0af8ecSBrian Feldman { 5890b367bd8SKonstantin Belousov struct thread *td; 5906fbe60faSJohn Baldwin vm_map_entry_t entry, next; 59184110e7eSKonstantin Belousov vm_object_t object; 592655c3490SKonstantin Belousov 5930b367bd8SKonstantin Belousov td = curthread; 5946fbe60faSJohn Baldwin entry = td->td_map_def_user; 5956fbe60faSJohn Baldwin td->td_map_def_user = NULL; 5966fbe60faSJohn Baldwin while (entry != NULL) { 5977cdcf863SDoug Moore next = entry->defer_next; 598fe7bcbafSKyle Evans MPASS((entry->eflags & (MAP_ENTRY_WRITECNT | 599fe7bcbafSKyle Evans MAP_ENTRY_VN_EXEC)) != (MAP_ENTRY_WRITECNT | 60078022527SKonstantin Belousov MAP_ENTRY_VN_EXEC)); 601fe7bcbafSKyle Evans if ((entry->eflags & MAP_ENTRY_WRITECNT) != 0) { 60284110e7eSKonstantin Belousov /* 60384110e7eSKonstantin Belousov * Decrement the object's writemappings and 60484110e7eSKonstantin Belousov * possibly the vnode's v_writecount. 60584110e7eSKonstantin Belousov */ 60684110e7eSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 60784110e7eSKonstantin Belousov ("Submap with writecount")); 60884110e7eSKonstantin Belousov object = entry->object.vm_object; 60984110e7eSKonstantin Belousov KASSERT(object != NULL, ("No object for writecount")); 610fe7bcbafSKyle Evans vm_pager_release_writecount(object, entry->start, 61184110e7eSKonstantin Belousov entry->end); 61284110e7eSKonstantin Belousov } 61378022527SKonstantin Belousov vm_map_entry_set_vnode_text(entry, false); 6140b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, FALSE); 6156fbe60faSJohn Baldwin entry = next; 6160b367bd8SKonstantin Belousov } 6170b367bd8SKonstantin Belousov } 6180b367bd8SKonstantin Belousov 619461587dcSDoug Moore #ifdef INVARIANTS 620461587dcSDoug Moore static void 621461587dcSDoug Moore _vm_map_assert_locked(vm_map_t map, const char *file, int line) 622461587dcSDoug Moore { 623461587dcSDoug Moore 624461587dcSDoug Moore if (map->system_map) 625461587dcSDoug Moore mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 626461587dcSDoug Moore else 627461587dcSDoug Moore sx_assert_(&map->lock, SA_XLOCKED, file, line); 628461587dcSDoug Moore } 629461587dcSDoug Moore 630461587dcSDoug Moore #define VM_MAP_ASSERT_LOCKED(map) \ 631461587dcSDoug Moore _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE) 632461587dcSDoug Moore 633461587dcSDoug Moore enum { VMMAP_CHECK_NONE, VMMAP_CHECK_UNLOCK, VMMAP_CHECK_ALL }; 634461587dcSDoug Moore #ifdef DIAGNOSTIC 635461587dcSDoug Moore static int enable_vmmap_check = VMMAP_CHECK_UNLOCK; 636461587dcSDoug Moore #else 637461587dcSDoug Moore static int enable_vmmap_check = VMMAP_CHECK_NONE; 638461587dcSDoug Moore #endif 639461587dcSDoug Moore SYSCTL_INT(_debug, OID_AUTO, vmmap_check, CTLFLAG_RWTUN, 640461587dcSDoug Moore &enable_vmmap_check, 0, "Enable vm map consistency checking"); 641461587dcSDoug Moore 642461587dcSDoug Moore static void _vm_map_assert_consistent(vm_map_t map, int check); 643461587dcSDoug Moore 644461587dcSDoug Moore #define VM_MAP_ASSERT_CONSISTENT(map) \ 645461587dcSDoug Moore _vm_map_assert_consistent(map, VMMAP_CHECK_ALL) 646461587dcSDoug Moore #ifdef DIAGNOSTIC 647461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map) do { \ 648461587dcSDoug Moore if (map->nupdates > map->nentries) { \ 649461587dcSDoug Moore _vm_map_assert_consistent(map, VMMAP_CHECK_UNLOCK); \ 650461587dcSDoug Moore map->nupdates = 0; \ 651461587dcSDoug Moore } \ 652461587dcSDoug Moore } while (0) 653461587dcSDoug Moore #else 654461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map) 655461587dcSDoug Moore #endif 656461587dcSDoug Moore #else 657461587dcSDoug Moore #define VM_MAP_ASSERT_LOCKED(map) 658461587dcSDoug Moore #define VM_MAP_ASSERT_CONSISTENT(map) 659461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map) 660461587dcSDoug Moore #endif /* INVARIANTS */ 661461587dcSDoug Moore 6620b367bd8SKonstantin Belousov void 6630b367bd8SKonstantin Belousov _vm_map_unlock(vm_map_t map, const char *file, int line) 6640b367bd8SKonstantin Belousov { 6650b367bd8SKonstantin Belousov 666461587dcSDoug Moore VM_MAP_UNLOCK_CONSISTENT(map); 66720f02659SMark Johnston if (map->system_map) { 66820f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC 66920f02659SMark Johnston if (map == kernel_map && (map->flags & MAP_REPLENISH) != 0) { 67020f02659SMark Johnston uma_prealloc(kmapentzone, 1); 67120f02659SMark Johnston map->flags &= ~MAP_REPLENISH; 67220f02659SMark Johnston } 67320f02659SMark Johnston #endif 674ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 67520f02659SMark Johnston } else { 6769fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 6770b367bd8SKonstantin Belousov vm_map_process_deferred(); 678655c3490SKonstantin Belousov } 6790e0af8ecSBrian Feldman } 6800e0af8ecSBrian Feldman 6810e0af8ecSBrian Feldman void 682780b1c09SAlan Cox _vm_map_lock_read(vm_map_t map, const char *file, int line) 6830e0af8ecSBrian Feldman { 684bc91c510SAlan Cox 68593bc4879SAlan Cox if (map->system_map) 686ccdf2333SAttilio Rao mtx_lock_flags_(&map->system_mtx, 0, file, line); 68712c64974SMaxime Henrion else 6889fde98bbSAttilio Rao sx_slock_(&map->lock, file, line); 68936daaecdSAlan Cox } 6900e0af8ecSBrian Feldman 6910e0af8ecSBrian Feldman void 692780b1c09SAlan Cox _vm_map_unlock_read(vm_map_t map, const char *file, int line) 6930e0af8ecSBrian Feldman { 694bc91c510SAlan Cox 69520f02659SMark Johnston if (map->system_map) { 69620f02659SMark Johnston KASSERT((map->flags & MAP_REPLENISH) == 0, 69720f02659SMark Johnston ("%s: MAP_REPLENISH leaked", __func__)); 698ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 69920f02659SMark Johnston } else { 7009fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 7010b367bd8SKonstantin Belousov vm_map_process_deferred(); 7020b367bd8SKonstantin Belousov } 70325adb370SBrian Feldman } 70425adb370SBrian Feldman 705d974f03cSAlan Cox int 706780b1c09SAlan Cox _vm_map_trylock(vm_map_t map, const char *file, int line) 707d974f03cSAlan Cox { 70825adb370SBrian Feldman int error; 70925adb370SBrian Feldman 71036daaecdSAlan Cox error = map->system_map ? 711ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 7129fde98bbSAttilio Rao !sx_try_xlock_(&map->lock, file, line); 7133a92e5d5SAlan Cox if (error == 0) 7143a92e5d5SAlan Cox map->timestamp++; 715bc91c510SAlan Cox return (error == 0); 7160e0af8ecSBrian Feldman } 7170e0af8ecSBrian Feldman 7180e0af8ecSBrian Feldman int 71972d97679SDavid Schultz _vm_map_trylock_read(vm_map_t map, const char *file, int line) 72072d97679SDavid Schultz { 72172d97679SDavid Schultz int error; 72272d97679SDavid Schultz 72372d97679SDavid Schultz error = map->system_map ? 724ccdf2333SAttilio Rao !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 7259fde98bbSAttilio Rao !sx_try_slock_(&map->lock, file, line); 72672d97679SDavid Schultz return (error == 0); 72772d97679SDavid Schultz } 72872d97679SDavid Schultz 72905a8c414SAlan Cox /* 73005a8c414SAlan Cox * _vm_map_lock_upgrade: [ internal use only ] 73105a8c414SAlan Cox * 73205a8c414SAlan Cox * Tries to upgrade a read (shared) lock on the specified map to a write 73305a8c414SAlan Cox * (exclusive) lock. Returns the value "0" if the upgrade succeeds and a 73405a8c414SAlan Cox * non-zero value if the upgrade fails. If the upgrade fails, the map is 73505a8c414SAlan Cox * returned without a read or write lock held. 73605a8c414SAlan Cox * 73705a8c414SAlan Cox * Requires that the map be read locked. 73805a8c414SAlan Cox */ 73972d97679SDavid Schultz int 740780b1c09SAlan Cox _vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 7410e0af8ecSBrian Feldman { 74205a8c414SAlan Cox unsigned int last_timestamp; 743bc91c510SAlan Cox 74412c64974SMaxime Henrion if (map->system_map) { 745ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 74605a8c414SAlan Cox } else { 7479fde98bbSAttilio Rao if (!sx_try_upgrade_(&map->lock, file, line)) { 74805a8c414SAlan Cox last_timestamp = map->timestamp; 7499fde98bbSAttilio Rao sx_sunlock_(&map->lock, file, line); 7500b367bd8SKonstantin Belousov vm_map_process_deferred(); 75105a8c414SAlan Cox /* 75205a8c414SAlan Cox * If the map's timestamp does not change while the 75305a8c414SAlan Cox * map is unlocked, then the upgrade succeeds. 75405a8c414SAlan Cox */ 7559fde98bbSAttilio Rao sx_xlock_(&map->lock, file, line); 75605a8c414SAlan Cox if (last_timestamp != map->timestamp) { 7579fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 75805a8c414SAlan Cox return (1); 75905a8c414SAlan Cox } 76005a8c414SAlan Cox } 76105a8c414SAlan Cox } 762bc91c510SAlan Cox map->timestamp++; 763bc91c510SAlan Cox return (0); 7640e0af8ecSBrian Feldman } 7650e0af8ecSBrian Feldman 7660e0af8ecSBrian Feldman void 767780b1c09SAlan Cox _vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 7681b40f8c0SMatthew Dillon { 769bc91c510SAlan Cox 77012c64974SMaxime Henrion if (map->system_map) { 77120f02659SMark Johnston KASSERT((map->flags & MAP_REPLENISH) == 0, 77220f02659SMark Johnston ("%s: MAP_REPLENISH leaked", __func__)); 773ccdf2333SAttilio Rao mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 774461587dcSDoug Moore } else { 775461587dcSDoug Moore VM_MAP_UNLOCK_CONSISTENT(map); 7769fde98bbSAttilio Rao sx_downgrade_(&map->lock, file, line); 77705a8c414SAlan Cox } 778461587dcSDoug Moore } 77905a8c414SAlan Cox 78005a8c414SAlan Cox /* 78105a8c414SAlan Cox * vm_map_locked: 78205a8c414SAlan Cox * 78305a8c414SAlan Cox * Returns a non-zero value if the caller holds a write (exclusive) lock 78405a8c414SAlan Cox * on the specified map and the value "0" otherwise. 78505a8c414SAlan Cox */ 78605a8c414SAlan Cox int 78705a8c414SAlan Cox vm_map_locked(vm_map_t map) 78805a8c414SAlan Cox { 78905a8c414SAlan Cox 79005a8c414SAlan Cox if (map->system_map) 79105a8c414SAlan Cox return (mtx_owned(&map->system_mtx)); 79205a8c414SAlan Cox else 79305a8c414SAlan Cox return (sx_xlocked(&map->lock)); 79425adb370SBrian Feldman } 79525adb370SBrian Feldman 796acd9a301SAlan Cox /* 7978304adaaSAlan Cox * _vm_map_unlock_and_wait: 7988304adaaSAlan Cox * 7998304adaaSAlan Cox * Atomically releases the lock on the specified map and puts the calling 8008304adaaSAlan Cox * thread to sleep. The calling thread will remain asleep until either 8018304adaaSAlan Cox * vm_map_wakeup() is performed on the map or the specified timeout is 8028304adaaSAlan Cox * exceeded. 8038304adaaSAlan Cox * 8048304adaaSAlan Cox * WARNING! This function does not perform deferred deallocations of 8058304adaaSAlan Cox * objects and map entries. Therefore, the calling thread is expected to 8068304adaaSAlan Cox * reacquire the map lock after reawakening and later perform an ordinary 8078304adaaSAlan Cox * unlock operation, such as vm_map_unlock(), before completing its 8088304adaaSAlan Cox * operation on the map. 809acd9a301SAlan Cox */ 8109688f931SAlan Cox int 8118304adaaSAlan Cox _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line) 812acd9a301SAlan Cox { 813acd9a301SAlan Cox 814461587dcSDoug Moore VM_MAP_UNLOCK_CONSISTENT(map); 8153a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 81620f02659SMark Johnston if (map->system_map) { 81720f02659SMark Johnston KASSERT((map->flags & MAP_REPLENISH) == 0, 81820f02659SMark Johnston ("%s: MAP_REPLENISH leaked", __func__)); 819ccdf2333SAttilio Rao mtx_unlock_flags_(&map->system_mtx, 0, file, line); 82020f02659SMark Johnston } else { 8219fde98bbSAttilio Rao sx_xunlock_(&map->lock, file, line); 82220f02659SMark Johnston } 8238304adaaSAlan Cox return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 8248304adaaSAlan Cox timo)); 825acd9a301SAlan Cox } 826acd9a301SAlan Cox 827acd9a301SAlan Cox /* 828acd9a301SAlan Cox * vm_map_wakeup: 8298304adaaSAlan Cox * 8308304adaaSAlan Cox * Awaken any threads that have slept on the map using 8318304adaaSAlan Cox * vm_map_unlock_and_wait(). 832acd9a301SAlan Cox */ 8339688f931SAlan Cox void 834acd9a301SAlan Cox vm_map_wakeup(vm_map_t map) 835acd9a301SAlan Cox { 836acd9a301SAlan Cox 837b49ecb86SAlan Cox /* 8383a92e5d5SAlan Cox * Acquire and release map_sleep_mtx to prevent a wakeup() 8398304adaaSAlan Cox * from being performed (and lost) between the map unlock 8408304adaaSAlan Cox * and the msleep() in _vm_map_unlock_and_wait(). 841b49ecb86SAlan Cox */ 8423a92e5d5SAlan Cox mtx_lock(&map_sleep_mtx); 8433a92e5d5SAlan Cox mtx_unlock(&map_sleep_mtx); 844acd9a301SAlan Cox wakeup(&map->root); 845acd9a301SAlan Cox } 846acd9a301SAlan Cox 847a5db445dSMax Laier void 848a5db445dSMax Laier vm_map_busy(vm_map_t map) 849a5db445dSMax Laier { 850a5db445dSMax Laier 851a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 852a5db445dSMax Laier map->busy++; 853a5db445dSMax Laier } 854a5db445dSMax Laier 855a5db445dSMax Laier void 856a5db445dSMax Laier vm_map_unbusy(vm_map_t map) 857a5db445dSMax Laier { 858a5db445dSMax Laier 859a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 860a5db445dSMax Laier KASSERT(map->busy, ("vm_map_unbusy: not busy")); 861a5db445dSMax Laier if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { 862a5db445dSMax Laier vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); 863a5db445dSMax Laier wakeup(&map->busy); 864a5db445dSMax Laier } 865a5db445dSMax Laier } 866a5db445dSMax Laier 867a5db445dSMax Laier void 868a5db445dSMax Laier vm_map_wait_busy(vm_map_t map) 869a5db445dSMax Laier { 870a5db445dSMax Laier 871a5db445dSMax Laier VM_MAP_ASSERT_LOCKED(map); 872a5db445dSMax Laier while (map->busy) { 873a5db445dSMax Laier vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); 874a5db445dSMax Laier if (map->system_map) 875a5db445dSMax Laier msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); 876a5db445dSMax Laier else 877a5db445dSMax Laier sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); 878a5db445dSMax Laier } 879a5db445dSMax Laier map->timestamp++; 880a5db445dSMax Laier } 881a5db445dSMax Laier 8821b40f8c0SMatthew Dillon long 8831b40f8c0SMatthew Dillon vmspace_resident_count(struct vmspace *vmspace) 8841b40f8c0SMatthew Dillon { 8851b40f8c0SMatthew Dillon return pmap_resident_count(vmspace_pmap(vmspace)); 8861b40f8c0SMatthew Dillon } 8871b40f8c0SMatthew Dillon 888ff2b5645SMatthew Dillon /* 889df8bae1dSRodney W. Grimes * Initialize an existing vm_map structure 890df8bae1dSRodney W. Grimes * such as that in the vmspace structure. 891df8bae1dSRodney W. Grimes */ 8928355f576SJeff Roberson static void 89392351f16SAlan Cox _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 894df8bae1dSRodney W. Grimes { 89521c641b2SJohn Baldwin 8962203c46dSMark Johnston map->header.eflags = MAP_ENTRY_HEADER; 8979688f931SAlan Cox map->needs_wakeup = FALSE; 8983075778bSJohn Dyson map->system_map = 0; 89992351f16SAlan Cox map->pmap = pmap; 900f0165b1cSKonstantin Belousov map->header.end = min; 901f0165b1cSKonstantin Belousov map->header.start = max; 902af7cd0c5SBrian Feldman map->flags = 0; 903c1ad5342SDoug Moore map->header.left = map->header.right = &map->header; 9044e94f402SAlan Cox map->root = NULL; 905df8bae1dSRodney W. Grimes map->timestamp = 0; 906a5db445dSMax Laier map->busy = 0; 907fa50a355SKonstantin Belousov map->anon_loc = 0; 908461587dcSDoug Moore #ifdef DIAGNOSTIC 909461587dcSDoug Moore map->nupdates = 0; 910461587dcSDoug Moore #endif 911df8bae1dSRodney W. Grimes } 912df8bae1dSRodney W. Grimes 913a18b1f1dSJason Evans void 91492351f16SAlan Cox vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 915a18b1f1dSJason Evans { 91692351f16SAlan Cox 91792351f16SAlan Cox _vm_map_init(map, pmap, min, max); 9187dd979dfSMark Johnston mtx_init(&map->system_mtx, "vm map (system)", NULL, 9197dd979dfSMark Johnston MTX_DEF | MTX_DUPOK); 9207dd979dfSMark Johnston sx_init(&map->lock, "vm map (user)"); 921a18b1f1dSJason Evans } 922a18b1f1dSJason Evans 923df8bae1dSRodney W. Grimes /* 924b18bfc3dSJohn Dyson * vm_map_entry_dispose: [ internal use only ] 925b18bfc3dSJohn Dyson * 926b18bfc3dSJohn Dyson * Inverse of vm_map_entry_create. 927b18bfc3dSJohn Dyson */ 92862487bb4SJohn Dyson static void 9291b40f8c0SMatthew Dillon vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 930b18bfc3dSJohn Dyson { 9312b4a2c27SAlan Cox uma_zfree(map->system_map ? kmapentzone : mapentzone, entry); 932b18bfc3dSJohn Dyson } 933b18bfc3dSJohn Dyson 934b18bfc3dSJohn Dyson /* 935df8bae1dSRodney W. Grimes * vm_map_entry_create: [ internal use only ] 936df8bae1dSRodney W. Grimes * 937df8bae1dSRodney W. Grimes * Allocates a VM map entry for insertion. 938b28cb1caSAlfred Perlstein * No entry fields are filled in. 939df8bae1dSRodney W. Grimes */ 940f708ef1bSPoul-Henning Kamp static vm_map_entry_t 9411b40f8c0SMatthew Dillon vm_map_entry_create(vm_map_t map) 942df8bae1dSRodney W. Grimes { 9431f6889a1SMatthew Dillon vm_map_entry_t new_entry; 9441f6889a1SMatthew Dillon 94520f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC 94620f02659SMark Johnston if (map == kernel_map) { 94720f02659SMark Johnston VM_MAP_ASSERT_LOCKED(map); 94820f02659SMark Johnston 94920f02659SMark Johnston /* 95020f02659SMark Johnston * A new slab of kernel map entries cannot be allocated at this 95120f02659SMark Johnston * point because the kernel map has not yet been updated to 95220f02659SMark Johnston * reflect the caller's request. Therefore, we allocate a new 95320f02659SMark Johnston * map entry, dipping into the reserve if necessary, and set a 95420f02659SMark Johnston * flag indicating that the reserve must be replenished before 95520f02659SMark Johnston * the map is unlocked. 95620f02659SMark Johnston */ 95720f02659SMark Johnston new_entry = uma_zalloc(kmapentzone, M_NOWAIT | M_NOVM); 95820f02659SMark Johnston if (new_entry == NULL) { 95920f02659SMark Johnston new_entry = uma_zalloc(kmapentzone, 96020f02659SMark Johnston M_NOWAIT | M_NOVM | M_USE_RESERVE); 96120f02659SMark Johnston kernel_map->flags |= MAP_REPLENISH; 96220f02659SMark Johnston } 96320f02659SMark Johnston } else 96420f02659SMark Johnston #endif 96520f02659SMark Johnston if (map->system_map) { 9662b4a2c27SAlan Cox new_entry = uma_zalloc(kmapentzone, M_NOWAIT); 96720f02659SMark Johnston } else { 968a163d034SWarner Losh new_entry = uma_zalloc(mapentzone, M_WAITOK); 96920f02659SMark Johnston } 97020f02659SMark Johnston KASSERT(new_entry != NULL, 97120f02659SMark Johnston ("vm_map_entry_create: kernel resources exhausted")); 9721f6889a1SMatthew Dillon return (new_entry); 973df8bae1dSRodney W. Grimes } 974df8bae1dSRodney W. Grimes 975df8bae1dSRodney W. Grimes /* 976794316a8SAlan Cox * vm_map_entry_set_behavior: 977794316a8SAlan Cox * 978794316a8SAlan Cox * Set the expected access behavior, either normal, random, or 979794316a8SAlan Cox * sequential. 980794316a8SAlan Cox */ 98162a59e8fSWarner Losh static inline void 982794316a8SAlan Cox vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) 983794316a8SAlan Cox { 984794316a8SAlan Cox entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 985794316a8SAlan Cox (behavior & MAP_ENTRY_BEHAV_MASK); 986794316a8SAlan Cox } 987794316a8SAlan Cox 988794316a8SAlan Cox /* 9895a0879daSDoug Moore * vm_map_entry_max_free_{left,right}: 9900164e057SAlan Cox * 9915a0879daSDoug Moore * Compute the size of the largest free gap between two entries, 9925a0879daSDoug Moore * one the root of a tree and the other the ancestor of that root 9935a0879daSDoug Moore * that is the least or greatest ancestor found on the search path. 9940164e057SAlan Cox */ 9955a0879daSDoug Moore static inline vm_size_t 9965a0879daSDoug Moore vm_map_entry_max_free_left(vm_map_entry_t root, vm_map_entry_t left_ancestor) 9970164e057SAlan Cox { 9980164e057SAlan Cox 999c1ad5342SDoug Moore return (root->left != left_ancestor ? 10005a0879daSDoug Moore root->left->max_free : root->start - left_ancestor->end); 10015a0879daSDoug Moore } 10025a0879daSDoug Moore 10035a0879daSDoug Moore static inline vm_size_t 10045a0879daSDoug Moore vm_map_entry_max_free_right(vm_map_entry_t root, vm_map_entry_t right_ancestor) 10055a0879daSDoug Moore { 10065a0879daSDoug Moore 1007c1ad5342SDoug Moore return (root->right != right_ancestor ? 10085a0879daSDoug Moore root->right->max_free : right_ancestor->start - root->end); 10090164e057SAlan Cox } 10100164e057SAlan Cox 101183704cc2SDoug Moore /* 101283704cc2SDoug Moore * vm_map_entry_{pred,succ}: 101383704cc2SDoug Moore * 101483704cc2SDoug Moore * Find the {predecessor, successor} of the entry by taking one step 101583704cc2SDoug Moore * in the appropriate direction and backtracking as much as necessary. 1016c1ad5342SDoug Moore * vm_map_entry_succ is defined in vm_map.h. 101783704cc2SDoug Moore */ 101883704cc2SDoug Moore static inline vm_map_entry_t 101983704cc2SDoug Moore vm_map_entry_pred(vm_map_entry_t entry) 102083704cc2SDoug Moore { 1021c1ad5342SDoug Moore vm_map_entry_t prior; 102283704cc2SDoug Moore 1023c1ad5342SDoug Moore prior = entry->left; 1024c1ad5342SDoug Moore if (prior->right->start < entry->start) { 1025c1ad5342SDoug Moore do 1026c1ad5342SDoug Moore prior = prior->right; 1027c1ad5342SDoug Moore while (prior->right != entry); 102883704cc2SDoug Moore } 1029c1ad5342SDoug Moore return (prior); 1030c1ad5342SDoug Moore } 103183704cc2SDoug Moore 103285b7bedbSDoug Moore static inline vm_size_t 103385b7bedbSDoug Moore vm_size_max(vm_size_t a, vm_size_t b) 103485b7bedbSDoug Moore { 103585b7bedbSDoug Moore 103685b7bedbSDoug Moore return (a > b ? a : b); 103785b7bedbSDoug Moore } 103885b7bedbSDoug Moore 1039c1ad5342SDoug Moore #define SPLAY_LEFT_STEP(root, y, llist, rlist, test) do { \ 1040c1ad5342SDoug Moore vm_map_entry_t z; \ 10415a0879daSDoug Moore vm_size_t max_free; \ 10425a0879daSDoug Moore \ 10435a0879daSDoug Moore /* \ 10445a0879daSDoug Moore * Infer root->right->max_free == root->max_free when \ 10455a0879daSDoug Moore * y->max_free < root->max_free || root->max_free == 0. \ 10465a0879daSDoug Moore * Otherwise, look right to find it. \ 10475a0879daSDoug Moore */ \ 10489f701172SKonstantin Belousov y = root->left; \ 10495a0879daSDoug Moore max_free = root->max_free; \ 1050668a8aa8SDoug Moore KASSERT(max_free == vm_size_max( \ 1051668a8aa8SDoug Moore vm_map_entry_max_free_left(root, llist), \ 1052668a8aa8SDoug Moore vm_map_entry_max_free_right(root, rlist)), \ 10535a0879daSDoug Moore ("%s: max_free invariant fails", __func__)); \ 1054668a8aa8SDoug Moore if (max_free - 1 < vm_map_entry_max_free_left(root, llist)) \ 10555a0879daSDoug Moore max_free = vm_map_entry_max_free_right(root, rlist); \ 1056c1ad5342SDoug Moore if (y != llist && (test)) { \ 10579f701172SKonstantin Belousov /* Rotate right and make y root. */ \ 1058c1ad5342SDoug Moore z = y->right; \ 1059c1ad5342SDoug Moore if (z != root) { \ 1060c1ad5342SDoug Moore root->left = z; \ 10619f701172SKonstantin Belousov y->right = root; \ 10625a0879daSDoug Moore if (max_free < y->max_free) \ 106385b7bedbSDoug Moore root->max_free = max_free = \ 1064c1ad5342SDoug Moore vm_size_max(max_free, z->max_free); \ 1065c1ad5342SDoug Moore } else if (max_free < y->max_free) \ 1066c1ad5342SDoug Moore root->max_free = max_free = \ 1067c1ad5342SDoug Moore vm_size_max(max_free, root->start - y->end);\ 10689f701172SKonstantin Belousov root = y; \ 10699f701172SKonstantin Belousov y = root->left; \ 10709f701172SKonstantin Belousov } \ 10715a0879daSDoug Moore /* Copy right->max_free. Put root on rlist. */ \ 10725a0879daSDoug Moore root->max_free = max_free; \ 10735a0879daSDoug Moore KASSERT(max_free == vm_map_entry_max_free_right(root, rlist), \ 10745a0879daSDoug Moore ("%s: max_free not copied from right", __func__)); \ 10759f701172SKonstantin Belousov root->left = rlist; \ 10769f701172SKonstantin Belousov rlist = root; \ 1077c1ad5342SDoug Moore root = y != llist ? y : NULL; \ 10789f701172SKonstantin Belousov } while (0) 10799f701172SKonstantin Belousov 1080c1ad5342SDoug Moore #define SPLAY_RIGHT_STEP(root, y, llist, rlist, test) do { \ 1081c1ad5342SDoug Moore vm_map_entry_t z; \ 10825a0879daSDoug Moore vm_size_t max_free; \ 10835a0879daSDoug Moore \ 10845a0879daSDoug Moore /* \ 10855a0879daSDoug Moore * Infer root->left->max_free == root->max_free when \ 10865a0879daSDoug Moore * y->max_free < root->max_free || root->max_free == 0. \ 10875a0879daSDoug Moore * Otherwise, look left to find it. \ 10885a0879daSDoug Moore */ \ 10899f701172SKonstantin Belousov y = root->right; \ 10905a0879daSDoug Moore max_free = root->max_free; \ 1091668a8aa8SDoug Moore KASSERT(max_free == vm_size_max( \ 1092668a8aa8SDoug Moore vm_map_entry_max_free_left(root, llist), \ 1093668a8aa8SDoug Moore vm_map_entry_max_free_right(root, rlist)), \ 10945a0879daSDoug Moore ("%s: max_free invariant fails", __func__)); \ 1095668a8aa8SDoug Moore if (max_free - 1 < vm_map_entry_max_free_right(root, rlist)) \ 10965a0879daSDoug Moore max_free = vm_map_entry_max_free_left(root, llist); \ 1097c1ad5342SDoug Moore if (y != rlist && (test)) { \ 10989f701172SKonstantin Belousov /* Rotate left and make y root. */ \ 1099c1ad5342SDoug Moore z = y->left; \ 1100c1ad5342SDoug Moore if (z != root) { \ 1101c1ad5342SDoug Moore root->right = z; \ 11029f701172SKonstantin Belousov y->left = root; \ 11035a0879daSDoug Moore if (max_free < y->max_free) \ 110485b7bedbSDoug Moore root->max_free = max_free = \ 1105c1ad5342SDoug Moore vm_size_max(max_free, z->max_free); \ 1106c1ad5342SDoug Moore } else if (max_free < y->max_free) \ 1107c1ad5342SDoug Moore root->max_free = max_free = \ 1108c1ad5342SDoug Moore vm_size_max(max_free, y->start - root->end);\ 11099f701172SKonstantin Belousov root = y; \ 11109f701172SKonstantin Belousov y = root->right; \ 11119f701172SKonstantin Belousov } \ 11125a0879daSDoug Moore /* Copy left->max_free. Put root on llist. */ \ 11135a0879daSDoug Moore root->max_free = max_free; \ 11145a0879daSDoug Moore KASSERT(max_free == vm_map_entry_max_free_left(root, llist), \ 11155a0879daSDoug Moore ("%s: max_free not copied from left", __func__)); \ 11169f701172SKonstantin Belousov root->right = llist; \ 11179f701172SKonstantin Belousov llist = root; \ 1118c1ad5342SDoug Moore root = y != rlist ? y : NULL; \ 11199f701172SKonstantin Belousov } while (0) 11209f701172SKonstantin Belousov 11210164e057SAlan Cox /* 1122c1ad5342SDoug Moore * Walk down the tree until we find addr or a gap where addr would go, breaking 1123c1ad5342SDoug Moore * off left and right subtrees of nodes less than, or greater than addr. Treat 1124c1ad5342SDoug Moore * subtrees with root->max_free < length as empty trees. llist and rlist are 1125c1ad5342SDoug Moore * the two sides in reverse order (bottom-up), with llist linked by the right 1126c1ad5342SDoug Moore * pointer and rlist linked by the left pointer in the vm_map_entry, and both 1127c1ad5342SDoug Moore * lists terminated by &map->header. This function, and the subsequent call to 1128c1ad5342SDoug Moore * vm_map_splay_merge_{left,right,pred,succ}, rely on the start and end address 11295a0879daSDoug Moore * values in &map->header. 11304e94f402SAlan Cox */ 11311867d2f2SDoug Moore static __always_inline vm_map_entry_t 11325a0879daSDoug Moore vm_map_splay_split(vm_map_t map, vm_offset_t addr, vm_size_t length, 11331867d2f2SDoug Moore vm_map_entry_t *llist, vm_map_entry_t *rlist) 11344e94f402SAlan Cox { 1135c1ad5342SDoug Moore vm_map_entry_t left, right, root, y; 11364e94f402SAlan Cox 1137c1ad5342SDoug Moore left = right = &map->header; 11385a0879daSDoug Moore root = map->root; 11399f701172SKonstantin Belousov while (root != NULL && root->max_free >= length) { 1140c1ad5342SDoug Moore KASSERT(left->end <= root->start && 1141c1ad5342SDoug Moore root->end <= right->start, 11425a0879daSDoug Moore ("%s: root not within tree bounds", __func__)); 11430164e057SAlan Cox if (addr < root->start) { 1144c1ad5342SDoug Moore SPLAY_LEFT_STEP(root, y, left, right, 11459f701172SKonstantin Belousov y->max_free >= length && addr < y->start); 11467438d60bSAlan Cox } else if (addr >= root->end) { 1147c1ad5342SDoug Moore SPLAY_RIGHT_STEP(root, y, left, right, 11489f701172SKonstantin Belousov y->max_free >= length && addr >= y->end); 11497438d60bSAlan Cox } else 11507438d60bSAlan Cox break; 11510164e057SAlan Cox } 1152c1ad5342SDoug Moore *llist = left; 1153c1ad5342SDoug Moore *rlist = right; 11549f701172SKonstantin Belousov return (root); 11559f701172SKonstantin Belousov } 11569f701172SKonstantin Belousov 11571867d2f2SDoug Moore static __always_inline void 11581867d2f2SDoug Moore vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *rlist) 11599f701172SKonstantin Belousov { 1160c1ad5342SDoug Moore vm_map_entry_t hi, right, y; 11619f701172SKonstantin Belousov 1162c1ad5342SDoug Moore right = *rlist; 1163c1ad5342SDoug Moore hi = root->right == right ? NULL : root->right; 1164c1ad5342SDoug Moore if (hi == NULL) 1165c1ad5342SDoug Moore return; 1166c1ad5342SDoug Moore do 1167c1ad5342SDoug Moore SPLAY_LEFT_STEP(hi, y, root, right, true); 1168c1ad5342SDoug Moore while (hi != NULL); 1169c1ad5342SDoug Moore *rlist = right; 11709f701172SKonstantin Belousov } 11719f701172SKonstantin Belousov 11721867d2f2SDoug Moore static __always_inline void 11731867d2f2SDoug Moore vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *llist) 11749f701172SKonstantin Belousov { 1175c1ad5342SDoug Moore vm_map_entry_t left, lo, y; 11769f701172SKonstantin Belousov 1177c1ad5342SDoug Moore left = *llist; 1178c1ad5342SDoug Moore lo = root->left == left ? NULL : root->left; 1179c1ad5342SDoug Moore if (lo == NULL) 1180c1ad5342SDoug Moore return; 1181c1ad5342SDoug Moore do 1182c1ad5342SDoug Moore SPLAY_RIGHT_STEP(lo, y, left, root, true); 1183c1ad5342SDoug Moore while (lo != NULL); 1184c1ad5342SDoug Moore *llist = left; 11859f701172SKonstantin Belousov } 11860164e057SAlan Cox 11875a0879daSDoug Moore static inline void 11885a0879daSDoug Moore vm_map_entry_swap(vm_map_entry_t *a, vm_map_entry_t *b) 11895a0879daSDoug Moore { 11905a0879daSDoug Moore vm_map_entry_t tmp; 11915a0879daSDoug Moore 11925a0879daSDoug Moore tmp = *b; 11935a0879daSDoug Moore *b = *a; 11945a0879daSDoug Moore *a = tmp; 11955a0879daSDoug Moore } 11965a0879daSDoug Moore 11970164e057SAlan Cox /* 11989f701172SKonstantin Belousov * Walk back up the two spines, flip the pointers and set max_free. The 11999f701172SKonstantin Belousov * subtrees of the root go at the bottom of llist and rlist. 12000164e057SAlan Cox */ 120185b7bedbSDoug Moore static vm_size_t 120285b7bedbSDoug Moore vm_map_splay_merge_left_walk(vm_map_entry_t header, vm_map_entry_t root, 120385b7bedbSDoug Moore vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t llist) 12049f701172SKonstantin Belousov { 12055a0879daSDoug Moore do { 12060164e057SAlan Cox /* 12075a0879daSDoug Moore * The max_free values of the children of llist are in 120885b7bedbSDoug Moore * llist->max_free and max_free. Update with the 12095a0879daSDoug Moore * max value. 12100164e057SAlan Cox */ 121185b7bedbSDoug Moore llist->max_free = max_free = 121285b7bedbSDoug Moore vm_size_max(llist->max_free, max_free); 121385b7bedbSDoug Moore vm_map_entry_swap(&llist->right, &tail); 121485b7bedbSDoug Moore vm_map_entry_swap(&tail, &llist); 121585b7bedbSDoug Moore } while (llist != header); 121685b7bedbSDoug Moore root->left = tail; 121785b7bedbSDoug Moore return (max_free); 12185a0879daSDoug Moore } 121985b7bedbSDoug Moore 122085b7bedbSDoug Moore /* 122185b7bedbSDoug Moore * When llist is known to be the predecessor of root. 122285b7bedbSDoug Moore */ 122385b7bedbSDoug Moore static inline vm_size_t 122485b7bedbSDoug Moore vm_map_splay_merge_pred(vm_map_entry_t header, vm_map_entry_t root, 122585b7bedbSDoug Moore vm_map_entry_t llist) 122685b7bedbSDoug Moore { 122785b7bedbSDoug Moore vm_size_t max_free; 122885b7bedbSDoug Moore 122985b7bedbSDoug Moore max_free = root->start - llist->end; 123085b7bedbSDoug Moore if (llist != header) { 123185b7bedbSDoug Moore max_free = vm_map_splay_merge_left_walk(header, root, 1232c1ad5342SDoug Moore root, max_free, llist); 123385b7bedbSDoug Moore } else { 1234c1ad5342SDoug Moore root->left = header; 1235c1ad5342SDoug Moore header->right = root; 123685b7bedbSDoug Moore } 123785b7bedbSDoug Moore return (max_free); 123885b7bedbSDoug Moore } 123985b7bedbSDoug Moore 124085b7bedbSDoug Moore /* 124185b7bedbSDoug Moore * When llist may or may not be the predecessor of root. 124285b7bedbSDoug Moore */ 124385b7bedbSDoug Moore static inline vm_size_t 124485b7bedbSDoug Moore vm_map_splay_merge_left(vm_map_entry_t header, vm_map_entry_t root, 124585b7bedbSDoug Moore vm_map_entry_t llist) 124685b7bedbSDoug Moore { 124785b7bedbSDoug Moore vm_size_t max_free; 124885b7bedbSDoug Moore 124985b7bedbSDoug Moore max_free = vm_map_entry_max_free_left(root, llist); 125085b7bedbSDoug Moore if (llist != header) { 125185b7bedbSDoug Moore max_free = vm_map_splay_merge_left_walk(header, root, 1252c1ad5342SDoug Moore root->left == llist ? root : root->left, 1253c1ad5342SDoug Moore max_free, llist); 125485b7bedbSDoug Moore } 125585b7bedbSDoug Moore return (max_free); 125685b7bedbSDoug Moore } 125785b7bedbSDoug Moore 125885b7bedbSDoug Moore static vm_size_t 125985b7bedbSDoug Moore vm_map_splay_merge_right_walk(vm_map_entry_t header, vm_map_entry_t root, 126085b7bedbSDoug Moore vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t rlist) 126185b7bedbSDoug Moore { 12625a0879daSDoug Moore do { 12635a0879daSDoug Moore /* 12645a0879daSDoug Moore * The max_free values of the children of rlist are in 126585b7bedbSDoug Moore * rlist->max_free and max_free. Update with the 12665a0879daSDoug Moore * max value. 12675a0879daSDoug Moore */ 126885b7bedbSDoug Moore rlist->max_free = max_free = 126985b7bedbSDoug Moore vm_size_max(rlist->max_free, max_free); 127085b7bedbSDoug Moore vm_map_entry_swap(&rlist->left, &tail); 127185b7bedbSDoug Moore vm_map_entry_swap(&tail, &rlist); 127285b7bedbSDoug Moore } while (rlist != header); 127385b7bedbSDoug Moore root->right = tail; 127485b7bedbSDoug Moore return (max_free); 12755a0879daSDoug Moore } 127685b7bedbSDoug Moore 127785b7bedbSDoug Moore /* 127885b7bedbSDoug Moore * When rlist is known to be the succecessor of root. 127985b7bedbSDoug Moore */ 128085b7bedbSDoug Moore static inline vm_size_t 128185b7bedbSDoug Moore vm_map_splay_merge_succ(vm_map_entry_t header, vm_map_entry_t root, 128285b7bedbSDoug Moore vm_map_entry_t rlist) 128385b7bedbSDoug Moore { 128485b7bedbSDoug Moore vm_size_t max_free; 128585b7bedbSDoug Moore 128685b7bedbSDoug Moore max_free = rlist->start - root->end; 128785b7bedbSDoug Moore if (rlist != header) { 128885b7bedbSDoug Moore max_free = vm_map_splay_merge_right_walk(header, root, 1289c1ad5342SDoug Moore root, max_free, rlist); 129085b7bedbSDoug Moore } else { 1291c1ad5342SDoug Moore root->right = header; 1292c1ad5342SDoug Moore header->left = root; 129385b7bedbSDoug Moore } 129485b7bedbSDoug Moore return (max_free); 129585b7bedbSDoug Moore } 129685b7bedbSDoug Moore 129785b7bedbSDoug Moore /* 129885b7bedbSDoug Moore * When rlist may or may not be the succecessor of root. 129985b7bedbSDoug Moore */ 130085b7bedbSDoug Moore static inline vm_size_t 130185b7bedbSDoug Moore vm_map_splay_merge_right(vm_map_entry_t header, vm_map_entry_t root, 130285b7bedbSDoug Moore vm_map_entry_t rlist) 130385b7bedbSDoug Moore { 130485b7bedbSDoug Moore vm_size_t max_free; 130585b7bedbSDoug Moore 130685b7bedbSDoug Moore max_free = vm_map_entry_max_free_right(root, rlist); 130785b7bedbSDoug Moore if (rlist != header) { 130885b7bedbSDoug Moore max_free = vm_map_splay_merge_right_walk(header, root, 1309c1ad5342SDoug Moore root->right == rlist ? root : root->right, 1310c1ad5342SDoug Moore max_free, rlist); 131185b7bedbSDoug Moore } 131285b7bedbSDoug Moore return (max_free); 13134e94f402SAlan Cox } 13144e94f402SAlan Cox 13154e94f402SAlan Cox /* 1316d1d3f7e1SDoug Moore * vm_map_splay: 1317d1d3f7e1SDoug Moore * 1318d1d3f7e1SDoug Moore * The Sleator and Tarjan top-down splay algorithm with the 1319d1d3f7e1SDoug Moore * following variation. Max_free must be computed bottom-up, so 1320d1d3f7e1SDoug Moore * on the downward pass, maintain the left and right spines in 1321d1d3f7e1SDoug Moore * reverse order. Then, make a second pass up each side to fix 1322d1d3f7e1SDoug Moore * the pointers and compute max_free. The time bound is O(log n) 1323d1d3f7e1SDoug Moore * amortized. 1324d1d3f7e1SDoug Moore * 1325c1ad5342SDoug Moore * The tree is threaded, which means that there are no null pointers. 1326c1ad5342SDoug Moore * When a node has no left child, its left pointer points to its 1327c1ad5342SDoug Moore * predecessor, which the last ancestor on the search path from the root 1328c1ad5342SDoug Moore * where the search branched right. Likewise, when a node has no right 1329c1ad5342SDoug Moore * child, its right pointer points to its successor. The map header node 1330c1ad5342SDoug Moore * is the predecessor of the first map entry, and the successor of the 1331c1ad5342SDoug Moore * last. 1332c1ad5342SDoug Moore * 1333d1d3f7e1SDoug Moore * The new root is the vm_map_entry containing "addr", or else an 1334d1d3f7e1SDoug Moore * adjacent entry (lower if possible) if addr is not in the tree. 1335d1d3f7e1SDoug Moore * 1336d1d3f7e1SDoug Moore * The map must be locked, and leaves it so. 1337d1d3f7e1SDoug Moore * 1338d1d3f7e1SDoug Moore * Returns: the new root. 1339d1d3f7e1SDoug Moore */ 1340d1d3f7e1SDoug Moore static vm_map_entry_t 1341d1d3f7e1SDoug Moore vm_map_splay(vm_map_t map, vm_offset_t addr) 1342d1d3f7e1SDoug Moore { 134385b7bedbSDoug Moore vm_map_entry_t header, llist, rlist, root; 134485b7bedbSDoug Moore vm_size_t max_free_left, max_free_right; 1345d1d3f7e1SDoug Moore 134685b7bedbSDoug Moore header = &map->header; 1347d1d3f7e1SDoug Moore root = vm_map_splay_split(map, addr, 0, &llist, &rlist); 1348d1d3f7e1SDoug Moore if (root != NULL) { 134985b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 135085b7bedbSDoug Moore max_free_right = vm_map_splay_merge_right(header, root, rlist); 135185b7bedbSDoug Moore } else if (llist != header) { 1352d1d3f7e1SDoug Moore /* 1353d1d3f7e1SDoug Moore * Recover the greatest node in the left 1354d1d3f7e1SDoug Moore * subtree and make it the root. 1355d1d3f7e1SDoug Moore */ 1356d1d3f7e1SDoug Moore root = llist; 1357d1d3f7e1SDoug Moore llist = root->right; 135885b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 135985b7bedbSDoug Moore max_free_right = vm_map_splay_merge_succ(header, root, rlist); 136085b7bedbSDoug Moore } else if (rlist != header) { 1361d1d3f7e1SDoug Moore /* 1362d1d3f7e1SDoug Moore * Recover the least node in the right 1363d1d3f7e1SDoug Moore * subtree and make it the root. 1364d1d3f7e1SDoug Moore */ 1365d1d3f7e1SDoug Moore root = rlist; 1366d1d3f7e1SDoug Moore rlist = root->left; 136785b7bedbSDoug Moore max_free_left = vm_map_splay_merge_pred(header, root, llist); 136885b7bedbSDoug Moore max_free_right = vm_map_splay_merge_right(header, root, rlist); 1369d1d3f7e1SDoug Moore } else { 1370d1d3f7e1SDoug Moore /* There is no root. */ 1371d1d3f7e1SDoug Moore return (NULL); 1372d1d3f7e1SDoug Moore } 137385b7bedbSDoug Moore root->max_free = vm_size_max(max_free_left, max_free_right); 137485b7bedbSDoug Moore map->root = root; 1375d1d3f7e1SDoug Moore VM_MAP_ASSERT_CONSISTENT(map); 1376d1d3f7e1SDoug Moore return (root); 1377d1d3f7e1SDoug Moore } 1378d1d3f7e1SDoug Moore 1379d1d3f7e1SDoug Moore /* 1380df8bae1dSRodney W. Grimes * vm_map_entry_{un,}link: 1381df8bae1dSRodney W. Grimes * 1382668a8aa8SDoug Moore * Insert/remove entries from maps. On linking, if new entry clips 1383668a8aa8SDoug Moore * existing entry, trim existing entry to avoid overlap, and manage 1384668a8aa8SDoug Moore * offsets. On unlinking, merge disappearing entry with neighbor, if 1385668a8aa8SDoug Moore * called for, and manage offsets. Callers should not modify fields in 1386668a8aa8SDoug Moore * entries already mapped. 1387df8bae1dSRodney W. Grimes */ 13884e94f402SAlan Cox static void 13895a0879daSDoug Moore vm_map_entry_link(vm_map_t map, vm_map_entry_t entry) 139099c81ca9SAlan Cox { 139185b7bedbSDoug Moore vm_map_entry_t header, llist, rlist, root; 1392668a8aa8SDoug Moore vm_size_t max_free_left, max_free_right; 139321c641b2SJohn Baldwin 13949f701172SKonstantin Belousov CTR3(KTR_VM, 13959f701172SKonstantin Belousov "vm_map_entry_link: map %p, nentries %d, entry %p", map, 13969f701172SKonstantin Belousov map->nentries, entry); 13973a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 139899c81ca9SAlan Cox map->nentries++; 139985b7bedbSDoug Moore header = &map->header; 14005a0879daSDoug Moore root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist); 1401668a8aa8SDoug Moore if (root == NULL) { 1402668a8aa8SDoug Moore /* 1403668a8aa8SDoug Moore * The new entry does not overlap any existing entry in the 1404668a8aa8SDoug Moore * map, so it becomes the new root of the map tree. 1405668a8aa8SDoug Moore */ 1406668a8aa8SDoug Moore max_free_left = vm_map_splay_merge_pred(header, entry, llist); 1407668a8aa8SDoug Moore max_free_right = vm_map_splay_merge_succ(header, entry, rlist); 1408668a8aa8SDoug Moore } else if (entry->start == root->start) { 1409668a8aa8SDoug Moore /* 1410668a8aa8SDoug Moore * The new entry is a clone of root, with only the end field 1411668a8aa8SDoug Moore * changed. The root entry will be shrunk to abut the new 1412668a8aa8SDoug Moore * entry, and will be the right child of the new root entry in 1413668a8aa8SDoug Moore * the modified map. 1414668a8aa8SDoug Moore */ 1415668a8aa8SDoug Moore KASSERT(entry->end < root->end, 1416668a8aa8SDoug Moore ("%s: clip_start not within entry", __func__)); 1417668a8aa8SDoug Moore vm_map_splay_findprev(root, &llist); 1418668a8aa8SDoug Moore root->offset += entry->end - root->start; 1419668a8aa8SDoug Moore root->start = entry->end; 1420668a8aa8SDoug Moore max_free_left = vm_map_splay_merge_pred(header, entry, llist); 1421668a8aa8SDoug Moore max_free_right = root->max_free = vm_size_max( 1422668a8aa8SDoug Moore vm_map_splay_merge_pred(entry, root, entry), 1423668a8aa8SDoug Moore vm_map_splay_merge_right(header, root, rlist)); 1424668a8aa8SDoug Moore } else { 1425668a8aa8SDoug Moore /* 1426668a8aa8SDoug Moore * The new entry is a clone of root, with only the start field 1427668a8aa8SDoug Moore * changed. The root entry will be shrunk to abut the new 1428668a8aa8SDoug Moore * entry, and will be the left child of the new root entry in 1429668a8aa8SDoug Moore * the modified map. 1430668a8aa8SDoug Moore */ 1431668a8aa8SDoug Moore KASSERT(entry->end == root->end, 1432668a8aa8SDoug Moore ("%s: clip_start not within entry", __func__)); 1433668a8aa8SDoug Moore vm_map_splay_findnext(root, &rlist); 1434668a8aa8SDoug Moore entry->offset += entry->start - root->start; 1435668a8aa8SDoug Moore root->end = entry->start; 1436668a8aa8SDoug Moore max_free_left = root->max_free = vm_size_max( 1437668a8aa8SDoug Moore vm_map_splay_merge_left(header, root, llist), 1438668a8aa8SDoug Moore vm_map_splay_merge_succ(entry, root, entry)); 1439668a8aa8SDoug Moore max_free_right = vm_map_splay_merge_succ(header, entry, rlist); 1440668a8aa8SDoug Moore } 1441668a8aa8SDoug Moore entry->max_free = vm_size_max(max_free_left, max_free_right); 1442668a8aa8SDoug Moore map->root = entry; 14439f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1444df8bae1dSRodney W. Grimes } 144599c81ca9SAlan Cox 14469f701172SKonstantin Belousov enum unlink_merge_type { 14479f701172SKonstantin Belousov UNLINK_MERGE_NONE, 14489f701172SKonstantin Belousov UNLINK_MERGE_NEXT 14499f701172SKonstantin Belousov }; 14509f701172SKonstantin Belousov 14514e94f402SAlan Cox static void 14525a0879daSDoug Moore vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry, 14539f701172SKonstantin Belousov enum unlink_merge_type op) 145499c81ca9SAlan Cox { 1455c1ad5342SDoug Moore vm_map_entry_t header, llist, rlist, root; 145685b7bedbSDoug Moore vm_size_t max_free_left, max_free_right; 145799c81ca9SAlan Cox 14583a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 145985b7bedbSDoug Moore header = &map->header; 14605a0879daSDoug Moore root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist); 14619f701172SKonstantin Belousov KASSERT(root != NULL, 14629f701172SKonstantin Belousov ("vm_map_entry_unlink: unlink object not mapped")); 14634e94f402SAlan Cox 14641867d2f2SDoug Moore vm_map_splay_findprev(root, &llist); 14659f701172SKonstantin Belousov vm_map_splay_findnext(root, &rlist); 14661867d2f2SDoug Moore if (op == UNLINK_MERGE_NEXT) { 14679f701172SKonstantin Belousov rlist->start = root->start; 14689f701172SKonstantin Belousov rlist->offset = root->offset; 14691867d2f2SDoug Moore } 147085b7bedbSDoug Moore if (llist != header) { 14719f701172SKonstantin Belousov root = llist; 14729f701172SKonstantin Belousov llist = root->right; 147385b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 147485b7bedbSDoug Moore max_free_right = vm_map_splay_merge_succ(header, root, rlist); 147585b7bedbSDoug Moore } else if (rlist != header) { 14769f701172SKonstantin Belousov root = rlist; 14779f701172SKonstantin Belousov rlist = root->left; 147885b7bedbSDoug Moore max_free_left = vm_map_splay_merge_pred(header, root, llist); 147985b7bedbSDoug Moore max_free_right = vm_map_splay_merge_right(header, root, rlist); 1480c1ad5342SDoug Moore } else { 1481c1ad5342SDoug Moore header->left = header->right = header; 14829f701172SKonstantin Belousov root = NULL; 1483c1ad5342SDoug Moore } 14849f701172SKonstantin Belousov if (root != NULL) 148585b7bedbSDoug Moore root->max_free = vm_size_max(max_free_left, max_free_right); 148685b7bedbSDoug Moore map->root = root; 14879f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 148899c81ca9SAlan Cox map->nentries--; 148921c641b2SJohn Baldwin CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 149021c641b2SJohn Baldwin map->nentries, entry); 1491df8bae1dSRodney W. Grimes } 1492df8bae1dSRodney W. Grimes 1493df8bae1dSRodney W. Grimes /* 1494fa581662SDoug Moore * vm_map_entry_resize: 14950164e057SAlan Cox * 1496fa581662SDoug Moore * Resize a vm_map_entry, recompute the amount of free space that 1497fa581662SDoug Moore * follows it and propagate that value up the tree. 14980164e057SAlan Cox * 14990164e057SAlan Cox * The map must be locked, and leaves it so. 15000164e057SAlan Cox */ 15010164e057SAlan Cox static void 1502fa581662SDoug Moore vm_map_entry_resize(vm_map_t map, vm_map_entry_t entry, vm_size_t grow_amount) 15030164e057SAlan Cox { 150485b7bedbSDoug Moore vm_map_entry_t header, llist, rlist, root; 15050164e057SAlan Cox 15069f701172SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 150785b7bedbSDoug Moore header = &map->header; 15085a0879daSDoug Moore root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist); 15091867d2f2SDoug Moore KASSERT(root != NULL, ("%s: resize object not mapped", __func__)); 15109f701172SKonstantin Belousov vm_map_splay_findnext(root, &rlist); 15111895f520SDoug Moore entry->end += grow_amount; 151285b7bedbSDoug Moore root->max_free = vm_size_max( 151385b7bedbSDoug Moore vm_map_splay_merge_left(header, root, llist), 151485b7bedbSDoug Moore vm_map_splay_merge_succ(header, root, rlist)); 151585b7bedbSDoug Moore map->root = root; 15169f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1517fa581662SDoug Moore CTR4(KTR_VM, "%s: map %p, nentries %d, entry %p", 151873f11451SDoug Moore __func__, map, map->nentries, entry); 15190164e057SAlan Cox } 15200164e057SAlan Cox 15210164e057SAlan Cox /* 1522d1d3f7e1SDoug Moore * vm_map_lookup_entry: [ internal use only ] 1523df8bae1dSRodney W. Grimes * 1524d1d3f7e1SDoug Moore * Finds the map entry containing (or 1525d1d3f7e1SDoug Moore * immediately preceding) the specified address 1526d1d3f7e1SDoug Moore * in the given map; the entry is returned 1527d1d3f7e1SDoug Moore * in the "entry" parameter. The boolean 1528d1d3f7e1SDoug Moore * result indicates whether the address is 1529d1d3f7e1SDoug Moore * actually contained in the map. 1530df8bae1dSRodney W. Grimes */ 1531d1d3f7e1SDoug Moore boolean_t 1532d1d3f7e1SDoug Moore vm_map_lookup_entry( 1533d1d3f7e1SDoug Moore vm_map_t map, 1534d1d3f7e1SDoug Moore vm_offset_t address, 1535d1d3f7e1SDoug Moore vm_map_entry_t *entry) /* OUT */ 1536df8bae1dSRodney W. Grimes { 1537c1ad5342SDoug Moore vm_map_entry_t cur, header, lbound, ubound; 1538d1d3f7e1SDoug Moore boolean_t locked; 1539df8bae1dSRodney W. Grimes 15404c3ef59eSAlan Cox /* 15414c3ef59eSAlan Cox * If the map is empty, then the map entry immediately preceding 1542d1d3f7e1SDoug Moore * "address" is the map's header. 15434c3ef59eSAlan Cox */ 154485b7bedbSDoug Moore header = &map->header; 1545d1d3f7e1SDoug Moore cur = map->root; 1546d1d3f7e1SDoug Moore if (cur == NULL) { 154785b7bedbSDoug Moore *entry = header; 1548d1d3f7e1SDoug Moore return (FALSE); 1549d1d3f7e1SDoug Moore } 1550d1d3f7e1SDoug Moore if (address >= cur->start && cur->end > address) { 1551d1d3f7e1SDoug Moore *entry = cur; 1552d1d3f7e1SDoug Moore return (TRUE); 15539f701172SKonstantin Belousov } 15549f701172SKonstantin Belousov if ((locked = vm_map_locked(map)) || 155505a8c414SAlan Cox sx_try_upgrade(&map->lock)) { 155605a8c414SAlan Cox /* 155705a8c414SAlan Cox * Splay requires a write lock on the map. However, it only 155805a8c414SAlan Cox * restructures the binary search tree; it does not otherwise 155905a8c414SAlan Cox * change the map. Thus, the map's timestamp need not change 156005a8c414SAlan Cox * on a temporary upgrade. 156105a8c414SAlan Cox */ 1562d1d3f7e1SDoug Moore cur = vm_map_splay(map, address); 1563461587dcSDoug Moore if (!locked) { 1564461587dcSDoug Moore VM_MAP_UNLOCK_CONSISTENT(map); 156505a8c414SAlan Cox sx_downgrade(&map->lock); 1566461587dcSDoug Moore } 1567d1d3f7e1SDoug Moore 1568d1d3f7e1SDoug Moore /* 1569d1d3f7e1SDoug Moore * If "address" is contained within a map entry, the new root 1570d1d3f7e1SDoug Moore * is that map entry. Otherwise, the new root is a map entry 1571d1d3f7e1SDoug Moore * immediately before or after "address". 1572d1d3f7e1SDoug Moore */ 1573d1d3f7e1SDoug Moore if (address < cur->start) { 157485b7bedbSDoug Moore *entry = header; 1575d1d3f7e1SDoug Moore return (FALSE); 1576d1d3f7e1SDoug Moore } 1577d1d3f7e1SDoug Moore *entry = cur; 1578d1d3f7e1SDoug Moore return (address < cur->end); 15799f701172SKonstantin Belousov } 158005a8c414SAlan Cox /* 158105a8c414SAlan Cox * Since the map is only locked for read access, perform a 1582d1d3f7e1SDoug Moore * standard binary search tree lookup for "address". 158305a8c414SAlan Cox */ 1584c1ad5342SDoug Moore lbound = ubound = header; 1585c1ad5342SDoug Moore for (;;) { 1586d1d3f7e1SDoug Moore if (address < cur->start) { 1587c1ad5342SDoug Moore ubound = cur; 1588d1d3f7e1SDoug Moore cur = cur->left; 1589c1ad5342SDoug Moore if (cur == lbound) 1590c1ad5342SDoug Moore break; 1591d1d3f7e1SDoug Moore } else if (cur->end <= address) { 1592d1d3f7e1SDoug Moore lbound = cur; 1593d1d3f7e1SDoug Moore cur = cur->right; 1594c1ad5342SDoug Moore if (cur == ubound) 1595c1ad5342SDoug Moore break; 15969f701172SKonstantin Belousov } else { 1597d1d3f7e1SDoug Moore *entry = cur; 1598d1d3f7e1SDoug Moore return (TRUE); 159905a8c414SAlan Cox } 1600c1ad5342SDoug Moore } 1601d1d3f7e1SDoug Moore *entry = lbound; 1602d1d3f7e1SDoug Moore return (FALSE); 1603df8bae1dSRodney W. Grimes } 1604df8bae1dSRodney W. Grimes 1605df8bae1dSRodney W. Grimes /* 1606ba41b0deSKonstantin Belousov * vm_map_insert1() is identical to vm_map_insert() except that it 1607ba41b0deSKonstantin Belousov * returns the newly inserted map entry in '*res'. In case the new 1608ba41b0deSKonstantin Belousov * entry is coalesced with a neighbor or an existing entry was 1609ba41b0deSKonstantin Belousov * resized, that entry is returned. In any case, the returned entry 1610ba41b0deSKonstantin Belousov * covers the specified address range. 161130dcfc09SJohn Dyson */ 1612ba41b0deSKonstantin Belousov static int 1613ba41b0deSKonstantin Belousov vm_map_insert1(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1614ba41b0deSKonstantin Belousov vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow, 1615ba41b0deSKonstantin Belousov vm_map_entry_t *res) 161630dcfc09SJohn Dyson { 161783704cc2SDoug Moore vm_map_entry_t new_entry, next_entry, prev_entry; 1618ef694c1aSEdward Tomasz Napierala struct ucred *cred; 16191569205fSKonstantin Belousov vm_eflags_t protoeflags; 16208211bd45SKonstantin Belousov vm_inherit_t inheritance; 1621e2e80fb3SKonstantin Belousov u_long bdry; 1622e2e80fb3SKonstantin Belousov u_int bidx; 162330dcfc09SJohn Dyson 16243a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 16252e47807cSJeff Roberson KASSERT(object != kernel_object || 162633314db0SAlan Cox (cow & MAP_COPY_ON_WRITE) == 0, 16272e47807cSJeff Roberson ("vm_map_insert: kernel object and COW")); 1628e2e80fb3SKonstantin Belousov KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 || 1629e2e80fb3SKonstantin Belousov (cow & MAP_SPLIT_BOUNDARY_MASK) != 0, 1630e2e80fb3SKonstantin Belousov ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x", 1631e2e80fb3SKonstantin Belousov object, cow)); 163200de6773SKonstantin Belousov KASSERT((prot & ~max) == 0, 163300de6773SKonstantin Belousov ("prot %#x is not subset of max_prot %#x", prot, max)); 16343a0916b8SKonstantin Belousov 163530dcfc09SJohn Dyson /* 163630dcfc09SJohn Dyson * Check that the start and end points are not bogus. 163730dcfc09SJohn Dyson */ 1638f0340740SMark Johnston if (start == end || !vm_map_range_valid(map, start, end)) 163930dcfc09SJohn Dyson return (KERN_INVALID_ADDRESS); 164030dcfc09SJohn Dyson 16412e1c94aaSKonstantin Belousov if ((map->flags & MAP_WXORX) != 0 && (prot & (VM_PROT_WRITE | 16422e1c94aaSKonstantin Belousov VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) 16432e1c94aaSKonstantin Belousov return (KERN_PROTECTION_FAILURE); 16442e1c94aaSKonstantin Belousov 164530dcfc09SJohn Dyson /* 164630dcfc09SJohn Dyson * Find the entry prior to the proposed starting address; if it's part 164730dcfc09SJohn Dyson * of an existing entry, this range is bogus. 164830dcfc09SJohn Dyson */ 1649723413beSDoug Moore if (vm_map_lookup_entry(map, start, &prev_entry)) 165030dcfc09SJohn Dyson return (KERN_NO_SPACE); 165130dcfc09SJohn Dyson 165230dcfc09SJohn Dyson /* 165330dcfc09SJohn Dyson * Assert that the next entry doesn't overlap the end point. 165430dcfc09SJohn Dyson */ 165583704cc2SDoug Moore next_entry = vm_map_entry_succ(prev_entry); 165683704cc2SDoug Moore if (next_entry->start < end) 165730dcfc09SJohn Dyson return (KERN_NO_SPACE); 165830dcfc09SJohn Dyson 165919bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL || 166019bd0d9cSKonstantin Belousov max != VM_PROT_NONE)) 166119bd0d9cSKonstantin Belousov return (KERN_INVALID_ARGUMENT); 166219bd0d9cSKonstantin Belousov 1663afa07f7eSJohn Dyson protoeflags = 0; 1664afa07f7eSJohn Dyson if (cow & MAP_COPY_ON_WRITE) 1665e5f13bddSAlan Cox protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; 166633314db0SAlan Cox if (cow & MAP_NOFAULT) 1667afa07f7eSJohn Dyson protoeflags |= MAP_ENTRY_NOFAULT; 16684f79d873SMatthew Dillon if (cow & MAP_DISABLE_SYNCER) 16694f79d873SMatthew Dillon protoeflags |= MAP_ENTRY_NOSYNC; 16709730a5daSPaul Saab if (cow & MAP_DISABLE_COREDUMP) 16719730a5daSPaul Saab protoeflags |= MAP_ENTRY_NOCOREDUMP; 1672712efe66SAlan Cox if (cow & MAP_STACK_GROWS_DOWN) 1673712efe66SAlan Cox protoeflags |= MAP_ENTRY_GROWS_DOWN; 1674712efe66SAlan Cox if (cow & MAP_STACK_GROWS_UP) 1675712efe66SAlan Cox protoeflags |= MAP_ENTRY_GROWS_UP; 1676fe7bcbafSKyle Evans if (cow & MAP_WRITECOUNT) 1677fe7bcbafSKyle Evans protoeflags |= MAP_ENTRY_WRITECNT; 167878022527SKonstantin Belousov if (cow & MAP_VN_EXEC) 167978022527SKonstantin Belousov protoeflags |= MAP_ENTRY_VN_EXEC; 168019bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_GUARD) != 0) 168119bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_GUARD; 168219bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_STACK_GAP_DN) != 0) 168319bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_STACK_GAP_DN; 168419bd0d9cSKonstantin Belousov if ((cow & MAP_CREATE_STACK_GAP_UP) != 0) 168519bd0d9cSKonstantin Belousov protoeflags |= MAP_ENTRY_STACK_GAP_UP; 16868211bd45SKonstantin Belousov if (cow & MAP_INHERIT_SHARE) 16878211bd45SKonstantin Belousov inheritance = VM_INHERIT_SHARE; 16888211bd45SKonstantin Belousov else 16898211bd45SKonstantin Belousov inheritance = VM_INHERIT_DEFAULT; 1690e2e80fb3SKonstantin Belousov if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) { 1691e2e80fb3SKonstantin Belousov /* This magically ignores index 0, for usual page size. */ 1692e2e80fb3SKonstantin Belousov bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >> 1693e2e80fb3SKonstantin Belousov MAP_SPLIT_BOUNDARY_SHIFT; 1694e2e80fb3SKonstantin Belousov if (bidx >= MAXPAGESIZES) 1695e2e80fb3SKonstantin Belousov return (KERN_INVALID_ARGUMENT); 1696e2e80fb3SKonstantin Belousov bdry = pagesizes[bidx] - 1; 1697e2e80fb3SKonstantin Belousov if ((start & bdry) != 0 || (end & bdry) != 0) 1698e2e80fb3SKonstantin Belousov return (KERN_INVALID_ARGUMENT); 1699e2e80fb3SKonstantin Belousov protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; 1700e2e80fb3SKonstantin Belousov } 17014f79d873SMatthew Dillon 1702ef694c1aSEdward Tomasz Napierala cred = NULL; 170319bd0d9cSKonstantin Belousov if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) 17043364c323SKonstantin Belousov goto charged; 17053364c323SKonstantin Belousov if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && 17063364c323SKonstantin Belousov ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { 17073364c323SKonstantin Belousov if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) 17083364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 17091569205fSKonstantin Belousov KASSERT(object == NULL || 17101569205fSKonstantin Belousov (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || 1711ef694c1aSEdward Tomasz Napierala object->cred == NULL, 17121569205fSKonstantin Belousov ("overcommit: vm_map_insert o %p", object)); 1713ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 17143364c323SKonstantin Belousov } 17153364c323SKonstantin Belousov 17163364c323SKonstantin Belousov charged: 1717f8616ebfSAlan Cox /* Expand the kernel pmap, if necessary. */ 1718f8616ebfSAlan Cox if (map == kernel_map && end > kernel_vm_end) 1719f8616ebfSAlan Cox pmap_growkernel(end); 17201d284e00SAlan Cox if (object != NULL) { 172130dcfc09SJohn Dyson /* 17221d284e00SAlan Cox * OBJ_ONEMAPPING must be cleared unless this mapping 17231d284e00SAlan Cox * is trivially proven to be the only mapping for any 17241d284e00SAlan Cox * of the object's pages. (Object granularity 17251d284e00SAlan Cox * reference counting is insufficient to recognize 17261d284e00SAlan Cox * aliases with precision.) 172730dcfc09SJohn Dyson */ 172863967687SJeff Roberson if ((object->flags & OBJ_ANON) != 0) { 172989f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 17301d284e00SAlan Cox if (object->ref_count > 1 || object->shadow_count != 0) 17312aaeadf8SMatthew Dillon vm_object_clear_flag(object, OBJ_ONEMAPPING); 173289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 173363967687SJeff Roberson } 17342203c46dSMark Johnston } else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) == 17352203c46dSMark Johnston protoeflags && 173678022527SKonstantin Belousov (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP | 173778022527SKonstantin Belousov MAP_VN_EXEC)) == 0 && 1738737e25f7SAlan Cox prev_entry->end == start && (prev_entry->cred == cred || 17393364c323SKonstantin Belousov (prev_entry->object.vm_object != NULL && 17401569205fSKonstantin Belousov prev_entry->object.vm_object->cred == cred)) && 17418cc7e047SJohn Dyson vm_object_coalesce(prev_entry->object.vm_object, 174257a21abaSAlan Cox prev_entry->offset, 17438cc7e047SJohn Dyson (vm_size_t)(prev_entry->end - prev_entry->start), 174460169c88SAlan Cox (vm_size_t)(end - prev_entry->end), cred != NULL && 174560169c88SAlan Cox (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) { 174630dcfc09SJohn Dyson /* 17472aaeadf8SMatthew Dillon * We were able to extend the object. Determine if we 17482aaeadf8SMatthew Dillon * can extend the previous map entry to include the 17492aaeadf8SMatthew Dillon * new range as well. 175030dcfc09SJohn Dyson */ 17511569205fSKonstantin Belousov if (prev_entry->inheritance == inheritance && 17521569205fSKonstantin Belousov prev_entry->protection == prot && 1753737e25f7SAlan Cox prev_entry->max_protection == max && 1754737e25f7SAlan Cox prev_entry->wired_count == 0) { 1755737e25f7SAlan Cox KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) == 1756737e25f7SAlan Cox 0, ("prev_entry %p has incoherent wiring", 1757737e25f7SAlan Cox prev_entry)); 175819bd0d9cSKonstantin Belousov if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) 17591569205fSKonstantin Belousov map->size += end - prev_entry->end; 1760fa581662SDoug Moore vm_map_entry_resize(map, prev_entry, 17611895f520SDoug Moore end - prev_entry->end); 1762ba41b0deSKonstantin Belousov *res = vm_map_try_merge_entries(map, prev_entry, 1763ba41b0deSKonstantin Belousov next_entry); 176430dcfc09SJohn Dyson return (KERN_SUCCESS); 176530dcfc09SJohn Dyson } 17668cc7e047SJohn Dyson 17672aaeadf8SMatthew Dillon /* 17682aaeadf8SMatthew Dillon * If we can extend the object but cannot extend the 17692aaeadf8SMatthew Dillon * map entry, we have to create a new map entry. We 17702aaeadf8SMatthew Dillon * must bump the ref count on the extended object to 17714e71e795SMatthew Dillon * account for it. object may be NULL. 17722aaeadf8SMatthew Dillon */ 17732aaeadf8SMatthew Dillon object = prev_entry->object.vm_object; 17742aaeadf8SMatthew Dillon offset = prev_entry->offset + 17752aaeadf8SMatthew Dillon (prev_entry->end - prev_entry->start); 17768cc7e047SJohn Dyson vm_object_reference(object); 1777ef694c1aSEdward Tomasz Napierala if (cred != NULL && object != NULL && object->cred != NULL && 17783364c323SKonstantin Belousov !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 17793364c323SKonstantin Belousov /* Object already accounts for this uid. */ 1780ef694c1aSEdward Tomasz Napierala cred = NULL; 17813364c323SKonstantin Belousov } 1782b18bfc3dSJohn Dyson } 178360169c88SAlan Cox if (cred != NULL) 178460169c88SAlan Cox crhold(cred); 17852aaeadf8SMatthew Dillon 17862aaeadf8SMatthew Dillon /* 178730dcfc09SJohn Dyson * Create a new entry 178830dcfc09SJohn Dyson */ 178930dcfc09SJohn Dyson new_entry = vm_map_entry_create(map); 179030dcfc09SJohn Dyson new_entry->start = start; 179130dcfc09SJohn Dyson new_entry->end = end; 1792ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 179330dcfc09SJohn Dyson 1794afa07f7eSJohn Dyson new_entry->eflags = protoeflags; 179530dcfc09SJohn Dyson new_entry->object.vm_object = object; 179630dcfc09SJohn Dyson new_entry->offset = offset; 17972267af78SJulian Elischer 17988211bd45SKonstantin Belousov new_entry->inheritance = inheritance; 179930dcfc09SJohn Dyson new_entry->protection = prot; 180030dcfc09SJohn Dyson new_entry->max_protection = max; 180130dcfc09SJohn Dyson new_entry->wired_count = 0; 1802997ac690SKonstantin Belousov new_entry->wiring_thread = NULL; 180313458803SAlan Cox new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; 1804381b7242SAlan Cox new_entry->next_read = start; 1805e5f251d2SAlan Cox 1806ef694c1aSEdward Tomasz Napierala KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry), 18071569205fSKonstantin Belousov ("overcommit: vm_map_insert leaks vm_map %p", new_entry)); 1808ef694c1aSEdward Tomasz Napierala new_entry->cred = cred; 18093364c323SKonstantin Belousov 181030dcfc09SJohn Dyson /* 181130dcfc09SJohn Dyson * Insert the new entry into the list 181230dcfc09SJohn Dyson */ 18139f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 181419bd0d9cSKonstantin Belousov if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0) 181530dcfc09SJohn Dyson map->size += new_entry->end - new_entry->start; 181630dcfc09SJohn Dyson 18171a484d28SMatthew Dillon /* 1818eaaf9f7fSAlan Cox * Try to coalesce the new entry with both the previous and next 1819eaaf9f7fSAlan Cox * entries in the list. Previously, we only attempted to coalesce 1820eaaf9f7fSAlan Cox * with the previous entry when object is NULL. Here, we handle the 1821eaaf9f7fSAlan Cox * other cases, which are less common. 18221a484d28SMatthew Dillon */ 182383ea714fSDoug Moore vm_map_try_merge_entries(map, prev_entry, new_entry); 1824ba41b0deSKonstantin Belousov *res = vm_map_try_merge_entries(map, new_entry, next_entry); 18254e71e795SMatthew Dillon 18261569205fSKonstantin Belousov if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) { 18271569205fSKonstantin Belousov vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset), 18281569205fSKonstantin Belousov end - start, cow & MAP_PREFAULT_PARTIAL); 18294f79d873SMatthew Dillon } 1830e972780aSAlan Cox 183130dcfc09SJohn Dyson return (KERN_SUCCESS); 183230dcfc09SJohn Dyson } 183330dcfc09SJohn Dyson 183430dcfc09SJohn Dyson /* 1835ba41b0deSKonstantin Belousov * vm_map_insert: 1836ba41b0deSKonstantin Belousov * 1837ba41b0deSKonstantin Belousov * Inserts the given VM object into the target map at the 1838ba41b0deSKonstantin Belousov * specified address range. 1839ba41b0deSKonstantin Belousov * 1840ba41b0deSKonstantin Belousov * Requires that the map be locked, and leaves it so. 1841ba41b0deSKonstantin Belousov * 1842ba41b0deSKonstantin Belousov * If object is non-NULL, ref count must be bumped by caller 1843ba41b0deSKonstantin Belousov * prior to making call to account for the new entry. 1844ba41b0deSKonstantin Belousov */ 1845ba41b0deSKonstantin Belousov int 1846ba41b0deSKonstantin Belousov vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1847ba41b0deSKonstantin Belousov vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) 1848ba41b0deSKonstantin Belousov { 1849ba41b0deSKonstantin Belousov vm_map_entry_t res; 1850ba41b0deSKonstantin Belousov 1851ba41b0deSKonstantin Belousov return (vm_map_insert1(map, object, offset, start, end, prot, max, 1852ba41b0deSKonstantin Belousov cow, &res)); 1853ba41b0deSKonstantin Belousov } 1854ba41b0deSKonstantin Belousov 1855ba41b0deSKonstantin Belousov /* 18560164e057SAlan Cox * vm_map_findspace: 18570164e057SAlan Cox * 18580164e057SAlan Cox * Find the first fit (lowest VM address) for "length" free bytes 18590164e057SAlan Cox * beginning at address >= start in the given map. 18600164e057SAlan Cox * 18619f701172SKonstantin Belousov * In a vm_map_entry, "max_free" is the maximum amount of 18629f701172SKonstantin Belousov * contiguous free space between an entry in its subtree and a 18639f701172SKonstantin Belousov * neighbor of that entry. This allows finding a free region in 18649f701172SKonstantin Belousov * one path down the tree, so O(log n) amortized with splay 18659f701172SKonstantin Belousov * trees. 18660164e057SAlan Cox * 18670164e057SAlan Cox * The map must be locked, and leaves it so. 18680164e057SAlan Cox * 18699f701172SKonstantin Belousov * Returns: starting address if sufficient space, 18709f701172SKonstantin Belousov * vm_map_max(map)-length+1 if insufficient space. 1871df8bae1dSRodney W. Grimes */ 18729f701172SKonstantin Belousov vm_offset_t 18739f701172SKonstantin Belousov vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length) 1874df8bae1dSRodney W. Grimes { 187585b7bedbSDoug Moore vm_map_entry_t header, llist, rlist, root, y; 187685b7bedbSDoug Moore vm_size_t left_length, max_free_left, max_free_right; 1877e65d58a0SDoug Moore vm_offset_t gap_end; 1878df8bae1dSRodney W. Grimes 187920f02659SMark Johnston VM_MAP_ASSERT_LOCKED(map); 188020f02659SMark Johnston 1881986b43f8SAlan Cox /* 1882986b43f8SAlan Cox * Request must fit within min/max VM address and must avoid 1883986b43f8SAlan Cox * address wrap. 1884986b43f8SAlan Cox */ 1885f0165b1cSKonstantin Belousov start = MAX(start, vm_map_min(map)); 1886e65d58a0SDoug Moore if (start >= vm_map_max(map) || length > vm_map_max(map) - start) 18879f701172SKonstantin Belousov return (vm_map_max(map) - length + 1); 1888df8bae1dSRodney W. Grimes 18890164e057SAlan Cox /* Empty tree means wide open address space. */ 18909f701172SKonstantin Belousov if (map->root == NULL) 18919f701172SKonstantin Belousov return (start); 18920164e057SAlan Cox 18930164e057SAlan Cox /* 1894e65d58a0SDoug Moore * After splay_split, if start is within an entry, push it to the start 1895e65d58a0SDoug Moore * of the following gap. If rlist is at the end of the gap containing 1896e65d58a0SDoug Moore * start, save the end of that gap in gap_end to see if the gap is big 1897e65d58a0SDoug Moore * enough; otherwise set gap_end to start skip gap-checking and move 1898e65d58a0SDoug Moore * directly to a search of the right subtree. 18990164e057SAlan Cox */ 190085b7bedbSDoug Moore header = &map->header; 19015a0879daSDoug Moore root = vm_map_splay_split(map, start, length, &llist, &rlist); 1902e65d58a0SDoug Moore gap_end = rlist->start; 1903e65d58a0SDoug Moore if (root != NULL) { 19049f701172SKonstantin Belousov start = root->end; 1905c1ad5342SDoug Moore if (root->right != rlist) 1906e65d58a0SDoug Moore gap_end = start; 190785b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 190885b7bedbSDoug Moore max_free_right = vm_map_splay_merge_right(header, root, rlist); 190985b7bedbSDoug Moore } else if (rlist != header) { 19109f701172SKonstantin Belousov root = rlist; 19119f701172SKonstantin Belousov rlist = root->left; 191285b7bedbSDoug Moore max_free_left = vm_map_splay_merge_pred(header, root, llist); 191385b7bedbSDoug Moore max_free_right = vm_map_splay_merge_right(header, root, rlist); 19149f701172SKonstantin Belousov } else { 19159f701172SKonstantin Belousov root = llist; 19169f701172SKonstantin Belousov llist = root->right; 191785b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 191885b7bedbSDoug Moore max_free_right = vm_map_splay_merge_succ(header, root, rlist); 19190164e057SAlan Cox } 192085b7bedbSDoug Moore root->max_free = vm_size_max(max_free_left, max_free_right); 192185b7bedbSDoug Moore map->root = root; 19229f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 1923e65d58a0SDoug Moore if (length <= gap_end - start) 19249f701172SKonstantin Belousov return (start); 19250164e057SAlan Cox 19260164e057SAlan Cox /* With max_free, can immediately tell if no solution. */ 1927c1ad5342SDoug Moore if (root->right == header || length > root->right->max_free) 19289f701172SKonstantin Belousov return (vm_map_max(map) - length + 1); 19290164e057SAlan Cox 19300164e057SAlan Cox /* 19319f701172SKonstantin Belousov * Splay for the least large-enough gap in the right subtree. 19320164e057SAlan Cox */ 193385b7bedbSDoug Moore llist = rlist = header; 19349f701172SKonstantin Belousov for (left_length = 0;; 19355a0879daSDoug Moore left_length = vm_map_entry_max_free_left(root, llist)) { 19369f701172SKonstantin Belousov if (length <= left_length) 1937c1ad5342SDoug Moore SPLAY_LEFT_STEP(root, y, llist, rlist, 19385a0879daSDoug Moore length <= vm_map_entry_max_free_left(y, llist)); 19399f701172SKonstantin Belousov else 1940c1ad5342SDoug Moore SPLAY_RIGHT_STEP(root, y, llist, rlist, 19415a0879daSDoug Moore length > vm_map_entry_max_free_left(y, root)); 19429f701172SKonstantin Belousov if (root == NULL) 19439f701172SKonstantin Belousov break; 19440164e057SAlan Cox } 19459f701172SKonstantin Belousov root = llist; 19469f701172SKonstantin Belousov llist = root->right; 194785b7bedbSDoug Moore max_free_left = vm_map_splay_merge_left(header, root, llist); 194885b7bedbSDoug Moore if (rlist == header) { 194985b7bedbSDoug Moore root->max_free = vm_size_max(max_free_left, 195085b7bedbSDoug Moore vm_map_splay_merge_succ(header, root, rlist)); 195185b7bedbSDoug Moore } else { 19525a0879daSDoug Moore y = rlist; 19539f701172SKonstantin Belousov rlist = y->left; 195485b7bedbSDoug Moore y->max_free = vm_size_max( 195585b7bedbSDoug Moore vm_map_splay_merge_pred(root, y, root), 195685b7bedbSDoug Moore vm_map_splay_merge_right(header, y, rlist)); 195785b7bedbSDoug Moore root->max_free = vm_size_max(max_free_left, y->max_free); 19589f701172SKonstantin Belousov } 195985b7bedbSDoug Moore map->root = root; 19609f701172SKonstantin Belousov VM_MAP_ASSERT_CONSISTENT(map); 19619f701172SKonstantin Belousov return (root->end); 1962df8bae1dSRodney W. Grimes } 1963df8bae1dSRodney W. Grimes 1964d239bd3cSKonstantin Belousov int 1965d239bd3cSKonstantin Belousov vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1966b8ca4ef2SAlan Cox vm_offset_t start, vm_size_t length, vm_prot_t prot, 1967d239bd3cSKonstantin Belousov vm_prot_t max, int cow) 1968d239bd3cSKonstantin Belousov { 1969b8ca4ef2SAlan Cox vm_offset_t end; 1970d239bd3cSKonstantin Belousov int result; 1971d239bd3cSKonstantin Belousov 1972d239bd3cSKonstantin Belousov end = start + length; 19734648ba0aSKonstantin Belousov KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 19744648ba0aSKonstantin Belousov object == NULL, 19754648ba0aSKonstantin Belousov ("vm_map_fixed: non-NULL backing object for stack")); 1976897d81a0SKonstantin Belousov vm_map_lock(map); 1977d239bd3cSKonstantin Belousov VM_MAP_RANGE_CHECK(map, start, end); 1978e8f77c20SKonstantin Belousov if ((cow & MAP_CHECK_EXCL) == 0) { 1979e8f77c20SKonstantin Belousov result = vm_map_delete(map, start, end); 1980e8f77c20SKonstantin Belousov if (result != KERN_SUCCESS) 1981e8f77c20SKonstantin Belousov goto out; 1982e8f77c20SKonstantin Belousov } 19834648ba0aSKonstantin Belousov if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 19844648ba0aSKonstantin Belousov result = vm_map_stack_locked(map, start, length, sgrowsiz, 19854648ba0aSKonstantin Belousov prot, max, cow); 19864648ba0aSKonstantin Belousov } else { 19874648ba0aSKonstantin Belousov result = vm_map_insert(map, object, offset, start, end, 19884648ba0aSKonstantin Belousov prot, max, cow); 19894648ba0aSKonstantin Belousov } 1990e8f77c20SKonstantin Belousov out: 1991d239bd3cSKonstantin Belousov vm_map_unlock(map); 1992d239bd3cSKonstantin Belousov return (result); 1993d239bd3cSKonstantin Belousov } 1994d239bd3cSKonstantin Belousov 1995fa50a355SKonstantin Belousov static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; 1996fa50a355SKonstantin Belousov static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; 1997fa50a355SKonstantin Belousov 1998fa50a355SKonstantin Belousov static int cluster_anon = 1; 1999fa50a355SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, 2000fa50a355SKonstantin Belousov &cluster_anon, 0, 2001484e9d03SKonstantin Belousov "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); 2002484e9d03SKonstantin Belousov 2003484e9d03SKonstantin Belousov static bool 2004d8e6f494SAlan Cox clustering_anon_allowed(vm_offset_t addr, int cow) 2005484e9d03SKonstantin Belousov { 2006484e9d03SKonstantin Belousov 2007484e9d03SKonstantin Belousov switch (cluster_anon) { 2008484e9d03SKonstantin Belousov case 0: 2009484e9d03SKonstantin Belousov return (false); 2010484e9d03SKonstantin Belousov case 1: 2011d8e6f494SAlan Cox return (addr == 0 || (cow & MAP_NO_HINT) != 0); 2012484e9d03SKonstantin Belousov case 2: 2013484e9d03SKonstantin Belousov default: 2014484e9d03SKonstantin Belousov return (true); 2015484e9d03SKonstantin Belousov } 2016484e9d03SKonstantin Belousov } 2017fa50a355SKonstantin Belousov 2018fa50a355SKonstantin Belousov static long aslr_restarts; 2019fa50a355SKonstantin Belousov SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, 2020fa50a355SKonstantin Belousov &aslr_restarts, 0, 2021fa50a355SKonstantin Belousov "Number of aslr failures"); 2022fa50a355SKonstantin Belousov 2023df8bae1dSRodney W. Grimes /* 2024fec29688SAlan Cox * Searches for the specified amount of free space in the given map with the 2025fec29688SAlan Cox * specified alignment. Performs an address-ordered, first-fit search from 2026fec29688SAlan Cox * the given address "*addr", with an optional upper bound "max_addr". If the 2027fec29688SAlan Cox * parameter "alignment" is zero, then the alignment is computed from the 2028fec29688SAlan Cox * given (object, offset) pair so as to enable the greatest possible use of 2029fec29688SAlan Cox * superpage mappings. Returns KERN_SUCCESS and the address of the free space 2030fec29688SAlan Cox * in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. 2031fec29688SAlan Cox * 2032fec29688SAlan Cox * The map must be locked. Initially, there must be at least "length" bytes 2033fec29688SAlan Cox * of free space at the given address. 2034fec29688SAlan Cox */ 2035fec29688SAlan Cox static int 2036fec29688SAlan Cox vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 2037fec29688SAlan Cox vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, 2038fec29688SAlan Cox vm_offset_t alignment) 2039fec29688SAlan Cox { 2040fec29688SAlan Cox vm_offset_t aligned_addr, free_addr; 2041fec29688SAlan Cox 2042fec29688SAlan Cox VM_MAP_ASSERT_LOCKED(map); 2043fec29688SAlan Cox free_addr = *addr; 20449f701172SKonstantin Belousov KASSERT(free_addr == vm_map_findspace(map, free_addr, length), 2045e65d58a0SDoug Moore ("caller failed to provide space %#jx at address %p", 2046e65d58a0SDoug Moore (uintmax_t)length, (void *)free_addr)); 2047fec29688SAlan Cox for (;;) { 2048fec29688SAlan Cox /* 2049fec29688SAlan Cox * At the start of every iteration, the free space at address 2050fec29688SAlan Cox * "*addr" is at least "length" bytes. 2051fec29688SAlan Cox */ 2052fec29688SAlan Cox if (alignment == 0) 2053fec29688SAlan Cox pmap_align_superpage(object, offset, addr, length); 2054c606ab59SDoug Moore else 2055c606ab59SDoug Moore *addr = roundup2(*addr, alignment); 2056fec29688SAlan Cox aligned_addr = *addr; 2057fec29688SAlan Cox if (aligned_addr == free_addr) { 2058fec29688SAlan Cox /* 2059fec29688SAlan Cox * Alignment did not change "*addr", so "*addr" must 2060fec29688SAlan Cox * still provide sufficient free space. 2061fec29688SAlan Cox */ 2062fec29688SAlan Cox return (KERN_SUCCESS); 2063fec29688SAlan Cox } 2064fec29688SAlan Cox 2065fec29688SAlan Cox /* 2066fec29688SAlan Cox * Test for address wrap on "*addr". A wrapped "*addr" could 2067fec29688SAlan Cox * be a valid address, in which case vm_map_findspace() cannot 2068fec29688SAlan Cox * be relied upon to fail. 2069fec29688SAlan Cox */ 20709f701172SKonstantin Belousov if (aligned_addr < free_addr) 20719f701172SKonstantin Belousov return (KERN_NO_SPACE); 20729f701172SKonstantin Belousov *addr = vm_map_findspace(map, aligned_addr, length); 20739f701172SKonstantin Belousov if (*addr + length > vm_map_max(map) || 2074fec29688SAlan Cox (max_addr != 0 && *addr + length > max_addr)) 2075fec29688SAlan Cox return (KERN_NO_SPACE); 2076fec29688SAlan Cox free_addr = *addr; 2077fec29688SAlan Cox if (free_addr == aligned_addr) { 2078fec29688SAlan Cox /* 2079fec29688SAlan Cox * If a successful call to vm_map_findspace() did not 2080fec29688SAlan Cox * change "*addr", then "*addr" must still be aligned 2081fec29688SAlan Cox * and provide sufficient free space. 2082fec29688SAlan Cox */ 2083fec29688SAlan Cox return (KERN_SUCCESS); 2084fec29688SAlan Cox } 2085fec29688SAlan Cox } 2086fec29688SAlan Cox } 2087fec29688SAlan Cox 20887a9f2da3SKonstantin Belousov int 20897a9f2da3SKonstantin Belousov vm_map_find_aligned(vm_map_t map, vm_offset_t *addr, vm_size_t length, 20907a9f2da3SKonstantin Belousov vm_offset_t max_addr, vm_offset_t alignment) 20917a9f2da3SKonstantin Belousov { 20927a9f2da3SKonstantin Belousov /* XXXKIB ASLR eh ? */ 20937a9f2da3SKonstantin Belousov *addr = vm_map_findspace(map, *addr, length); 20947a9f2da3SKonstantin Belousov if (*addr + length > vm_map_max(map) || 20957a9f2da3SKonstantin Belousov (max_addr != 0 && *addr + length > max_addr)) 20967a9f2da3SKonstantin Belousov return (KERN_NO_SPACE); 20977a9f2da3SKonstantin Belousov return (vm_map_alignspace(map, NULL, 0, addr, length, max_addr, 20987a9f2da3SKonstantin Belousov alignment)); 20997a9f2da3SKonstantin Belousov } 21007a9f2da3SKonstantin Belousov 2101fec29688SAlan Cox /* 2102df8bae1dSRodney W. Grimes * vm_map_find finds an unallocated region in the target address 2103df8bae1dSRodney W. Grimes * map with the given length. The search is defined to be 2104df8bae1dSRodney W. Grimes * first-fit from the specified address; the region found is 2105df8bae1dSRodney W. Grimes * returned in the same parameter. 2106df8bae1dSRodney W. Grimes * 21072aaeadf8SMatthew Dillon * If object is non-NULL, ref count must be bumped by caller 21082aaeadf8SMatthew Dillon * prior to making call to account for the new entry. 2109df8bae1dSRodney W. Grimes */ 2110df8bae1dSRodney W. Grimes int 2111b9dcd593SBruce Evans vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 2112b9dcd593SBruce Evans vm_offset_t *addr, /* IN/OUT */ 2113edb572a3SJohn Baldwin vm_size_t length, vm_offset_t max_addr, int find_space, 2114edb572a3SJohn Baldwin vm_prot_t prot, vm_prot_t max, int cow) 2115df8bae1dSRodney W. Grimes { 2116fa50a355SKonstantin Belousov vm_offset_t alignment, curr_min_addr, min_addr; 2117fa50a355SKonstantin Belousov int gap, pidx, rv, try; 2118fa50a355SKonstantin Belousov bool cluster, en_aslr, update_anon; 2119df8bae1dSRodney W. Grimes 21204648ba0aSKonstantin Belousov KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 21214648ba0aSKonstantin Belousov object == NULL, 21224648ba0aSKonstantin Belousov ("vm_map_find: non-NULL backing object for stack")); 2123ea7e7006SKonstantin Belousov MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE && 2124ea7e7006SKonstantin Belousov (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0)); 2125ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || 2126ff74a3faSJohn Baldwin (object->flags & OBJ_COLORED) == 0)) 2127ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 21285aa60b6fSJohn Baldwin if (find_space >> 8 != 0) { 21295aa60b6fSJohn Baldwin KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); 21305aa60b6fSJohn Baldwin alignment = (vm_offset_t)1 << (find_space >> 8); 21315aa60b6fSJohn Baldwin } else 21325aa60b6fSJohn Baldwin alignment = 0; 2133fa50a355SKonstantin Belousov en_aslr = (map->flags & MAP_ASLR) != 0; 2134d8e6f494SAlan Cox update_anon = cluster = clustering_anon_allowed(*addr, cow) && 2135fa50a355SKonstantin Belousov (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && 2136fa50a355SKonstantin Belousov find_space != VMFS_NO_SPACE && object == NULL && 2137fa50a355SKonstantin Belousov (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | 2138fa50a355SKonstantin Belousov MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; 2139fa50a355SKonstantin Belousov curr_min_addr = min_addr = *addr; 2140fa50a355SKonstantin Belousov if (en_aslr && min_addr == 0 && !cluster && 2141fa50a355SKonstantin Belousov find_space != VMFS_NO_SPACE && 2142fa50a355SKonstantin Belousov (map->flags & MAP_ASLR_IGNSTART) != 0) 2143fa50a355SKonstantin Belousov curr_min_addr = min_addr = vm_map_min(map); 2144fa50a355SKonstantin Belousov try = 0; 21454d572bb3SAlan Cox vm_map_lock(map); 2146fa50a355SKonstantin Belousov if (cluster) { 2147fa50a355SKonstantin Belousov curr_min_addr = map->anon_loc; 2148fa50a355SKonstantin Belousov if (curr_min_addr == 0) 2149fa50a355SKonstantin Belousov cluster = false; 2150fa50a355SKonstantin Belousov } 215126c538ffSAlan Cox if (find_space != VMFS_NO_SPACE) { 2152fec29688SAlan Cox KASSERT(find_space == VMFS_ANY_SPACE || 2153fec29688SAlan Cox find_space == VMFS_OPTIMAL_SPACE || 2154fec29688SAlan Cox find_space == VMFS_SUPER_SPACE || 2155fec29688SAlan Cox alignment != 0, ("unexpected VMFS flag")); 2156fec29688SAlan Cox again: 2157fa50a355SKonstantin Belousov /* 2158fa50a355SKonstantin Belousov * When creating an anonymous mapping, try clustering 2159fa50a355SKonstantin Belousov * with an existing anonymous mapping first. 2160fa50a355SKonstantin Belousov * 2161fa50a355SKonstantin Belousov * We make up to two attempts to find address space 2162fa50a355SKonstantin Belousov * for a given find_space value. The first attempt may 2163fa50a355SKonstantin Belousov * apply randomization or may cluster with an existing 2164fa50a355SKonstantin Belousov * anonymous mapping. If this first attempt fails, 2165fa50a355SKonstantin Belousov * perform a first-fit search of the available address 2166fa50a355SKonstantin Belousov * space. 2167fa50a355SKonstantin Belousov * 2168fa50a355SKonstantin Belousov * If all tries failed, and find_space is 2169fa50a355SKonstantin Belousov * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. 2170fa50a355SKonstantin Belousov * Again enable clustering and randomization. 2171fa50a355SKonstantin Belousov */ 2172fa50a355SKonstantin Belousov try++; 2173fa50a355SKonstantin Belousov MPASS(try <= 2); 2174fa50a355SKonstantin Belousov 2175fa50a355SKonstantin Belousov if (try == 2) { 2176fa50a355SKonstantin Belousov /* 2177fa50a355SKonstantin Belousov * Second try: we failed either to find a 2178fa50a355SKonstantin Belousov * suitable region for randomizing the 2179fa50a355SKonstantin Belousov * allocation, or to cluster with an existing 2180fa50a355SKonstantin Belousov * mapping. Retry with free run. 2181fa50a355SKonstantin Belousov */ 2182fa50a355SKonstantin Belousov curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? 2183fa50a355SKonstantin Belousov vm_map_min(map) : min_addr; 2184fa50a355SKonstantin Belousov atomic_add_long(&aslr_restarts, 1); 2185fa50a355SKonstantin Belousov } 2186fa50a355SKonstantin Belousov 2187fa50a355SKonstantin Belousov if (try == 1 && en_aslr && !cluster) { 2188fa50a355SKonstantin Belousov /* 2189fa50a355SKonstantin Belousov * Find space for allocation, including 2190fa50a355SKonstantin Belousov * gap needed for later randomization. 2191fa50a355SKonstantin Belousov */ 2192fa50a355SKonstantin Belousov pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && 2193fa50a355SKonstantin Belousov (find_space == VMFS_SUPER_SPACE || find_space == 2194fa50a355SKonstantin Belousov VMFS_OPTIMAL_SPACE) ? 1 : 0; 2195fa50a355SKonstantin Belousov gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && 2196fa50a355SKonstantin Belousov (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? 2197fa50a355SKonstantin Belousov aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; 21989f701172SKonstantin Belousov *addr = vm_map_findspace(map, curr_min_addr, 21999f701172SKonstantin Belousov length + gap * pagesizes[pidx]); 22009f701172SKonstantin Belousov if (*addr + length + gap * pagesizes[pidx] > 2201a5a02ef4SKonstantin Belousov vm_map_max(map)) 2202fa50a355SKonstantin Belousov goto again; 2203fa50a355SKonstantin Belousov /* And randomize the start address. */ 2204fa50a355SKonstantin Belousov *addr += (arc4random() % gap) * pagesizes[pidx]; 22055019dac9SKonstantin Belousov if (max_addr != 0 && *addr + length > max_addr) 22065019dac9SKonstantin Belousov goto again; 22079f701172SKonstantin Belousov } else { 22089f701172SKonstantin Belousov *addr = vm_map_findspace(map, curr_min_addr, length); 22099f701172SKonstantin Belousov if (*addr + length > vm_map_max(map) || 2210edb572a3SJohn Baldwin (max_addr != 0 && *addr + length > max_addr)) { 2211fa50a355SKonstantin Belousov if (cluster) { 2212fa50a355SKonstantin Belousov cluster = false; 2213fa50a355SKonstantin Belousov MPASS(try == 1); 2214fa50a355SKonstantin Belousov goto again; 2215fa50a355SKonstantin Belousov } 2216fec29688SAlan Cox rv = KERN_NO_SPACE; 2217fec29688SAlan Cox goto done; 2218fec29688SAlan Cox } 22199f701172SKonstantin Belousov } 2220fa50a355SKonstantin Belousov 2221fec29688SAlan Cox if (find_space != VMFS_ANY_SPACE && 2222fec29688SAlan Cox (rv = vm_map_alignspace(map, object, offset, addr, length, 2223fec29688SAlan Cox max_addr, alignment)) != KERN_SUCCESS) { 2224ff74a3faSJohn Baldwin if (find_space == VMFS_OPTIMAL_SPACE) { 2225ff74a3faSJohn Baldwin find_space = VMFS_ANY_SPACE; 2226fa50a355SKonstantin Belousov curr_min_addr = min_addr; 2227fa50a355SKonstantin Belousov cluster = update_anon; 2228fa50a355SKonstantin Belousov try = 0; 2229ff74a3faSJohn Baldwin goto again; 2230ff74a3faSJohn Baldwin } 2231fec29688SAlan Cox goto done; 2232df8bae1dSRodney W. Grimes } 2233ea7e7006SKonstantin Belousov } else if ((cow & MAP_REMAP) != 0) { 22340f1e6ec5SMark Johnston if (!vm_map_range_valid(map, *addr, *addr + length)) { 2235ea7e7006SKonstantin Belousov rv = KERN_INVALID_ADDRESS; 2236ea7e7006SKonstantin Belousov goto done; 2237ea7e7006SKonstantin Belousov } 2238e8f77c20SKonstantin Belousov rv = vm_map_delete(map, *addr, *addr + length); 2239e8f77c20SKonstantin Belousov if (rv != KERN_SUCCESS) 2240e8f77c20SKonstantin Belousov goto done; 2241df8bae1dSRodney W. Grimes } 22424648ba0aSKonstantin Belousov if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 2243fec29688SAlan Cox rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, 2244fec29688SAlan Cox max, cow); 22454648ba0aSKonstantin Belousov } else { 2246fec29688SAlan Cox rv = vm_map_insert(map, object, offset, *addr, *addr + length, 2247fec29688SAlan Cox prot, max, cow); 22484648ba0aSKonstantin Belousov } 2249fa50a355SKonstantin Belousov if (rv == KERN_SUCCESS && update_anon) 2250fa50a355SKonstantin Belousov map->anon_loc = *addr + length; 2251fec29688SAlan Cox done: 2252df8bae1dSRodney W. Grimes vm_map_unlock(map); 2253fec29688SAlan Cox return (rv); 2254df8bae1dSRodney W. Grimes } 2255df8bae1dSRodney W. Grimes 2256e8502826SKonstantin Belousov /* 2257e8502826SKonstantin Belousov * vm_map_find_min() is a variant of vm_map_find() that takes an 2258e8502826SKonstantin Belousov * additional parameter (min_addr) and treats the given address 2259e8502826SKonstantin Belousov * (*addr) differently. Specifically, it treats *addr as a hint 2260e8502826SKonstantin Belousov * and not as the minimum address where the mapping is created. 2261e8502826SKonstantin Belousov * 2262e8502826SKonstantin Belousov * This function works in two phases. First, it tries to 2263e8502826SKonstantin Belousov * allocate above the hint. If that fails and the hint is 2264e8502826SKonstantin Belousov * greater than min_addr, it performs a second pass, replacing 2265e8502826SKonstantin Belousov * the hint with min_addr as the minimum address for the 2266e8502826SKonstantin Belousov * allocation. 2267e8502826SKonstantin Belousov */ 22686a97a3f7SKonstantin Belousov int 22696a97a3f7SKonstantin Belousov vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 22706a97a3f7SKonstantin Belousov vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, 22716a97a3f7SKonstantin Belousov vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, 22726a97a3f7SKonstantin Belousov int cow) 22736a97a3f7SKonstantin Belousov { 22746a97a3f7SKonstantin Belousov vm_offset_t hint; 22756a97a3f7SKonstantin Belousov int rv; 22766a97a3f7SKonstantin Belousov 22776a97a3f7SKonstantin Belousov hint = *addr; 227850d663b1SAlan Cox if (hint == 0) { 2279d8e6f494SAlan Cox cow |= MAP_NO_HINT; 2280d8e6f494SAlan Cox *addr = hint = min_addr; 228150d663b1SAlan Cox } 22826a97a3f7SKonstantin Belousov for (;;) { 22836a97a3f7SKonstantin Belousov rv = vm_map_find(map, object, offset, addr, length, max_addr, 22846a97a3f7SKonstantin Belousov find_space, prot, max, cow); 22856a97a3f7SKonstantin Belousov if (rv == KERN_SUCCESS || min_addr >= hint) 22866a97a3f7SKonstantin Belousov return (rv); 22877683ad70SKonstantin Belousov *addr = hint = min_addr; 22886a97a3f7SKonstantin Belousov } 22896a97a3f7SKonstantin Belousov } 22906a97a3f7SKonstantin Belousov 229192e78c10SAlan Cox /* 229292e78c10SAlan Cox * A map entry with any of the following flags set must not be merged with 229392e78c10SAlan Cox * another entry. 229492e78c10SAlan Cox */ 229592e78c10SAlan Cox #define MAP_ENTRY_NOMERGE_MASK (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \ 22969d7ea6cfSKonstantin Belousov MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_VN_EXEC | \ 22979d7ea6cfSKonstantin Belousov MAP_ENTRY_STACK_GAP_UP | MAP_ENTRY_STACK_GAP_DN) 229892e78c10SAlan Cox 229907424462SKonstantin Belousov static bool 230007424462SKonstantin Belousov vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry) 230107424462SKonstantin Belousov { 230207424462SKonstantin Belousov 230392e78c10SAlan Cox KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 || 230492e78c10SAlan Cox (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0, 230592e78c10SAlan Cox ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable", 230692e78c10SAlan Cox prev, entry)); 230707424462SKonstantin Belousov return (prev->end == entry->start && 230807424462SKonstantin Belousov prev->object.vm_object == entry->object.vm_object && 230907424462SKonstantin Belousov (prev->object.vm_object == NULL || 231092e78c10SAlan Cox prev->offset + (prev->end - prev->start) == entry->offset) && 231107424462SKonstantin Belousov prev->eflags == entry->eflags && 231207424462SKonstantin Belousov prev->protection == entry->protection && 231307424462SKonstantin Belousov prev->max_protection == entry->max_protection && 231407424462SKonstantin Belousov prev->inheritance == entry->inheritance && 231507424462SKonstantin Belousov prev->wired_count == entry->wired_count && 231607424462SKonstantin Belousov prev->cred == entry->cred); 231707424462SKonstantin Belousov } 231807424462SKonstantin Belousov 231907424462SKonstantin Belousov static void 232007424462SKonstantin Belousov vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry) 232107424462SKonstantin Belousov { 232207424462SKonstantin Belousov 232307424462SKonstantin Belousov /* 232492e78c10SAlan Cox * If the backing object is a vnode object, vm_object_deallocate() 232592e78c10SAlan Cox * calls vrele(). However, vrele() does not lock the vnode because 232692e78c10SAlan Cox * the vnode has additional references. Thus, the map lock can be 232792e78c10SAlan Cox * kept without causing a lock-order reversal with the vnode lock. 232807424462SKonstantin Belousov * 232992e78c10SAlan Cox * Since we count the number of virtual page mappings in 233092e78c10SAlan Cox * object->un_pager.vnp.writemappings, the writemappings value 233192e78c10SAlan Cox * should not be adjusted when the entry is disposed of. 233207424462SKonstantin Belousov */ 233307424462SKonstantin Belousov if (entry->object.vm_object != NULL) 233407424462SKonstantin Belousov vm_object_deallocate(entry->object.vm_object); 233507424462SKonstantin Belousov if (entry->cred != NULL) 233607424462SKonstantin Belousov crfree(entry->cred); 233707424462SKonstantin Belousov vm_map_entry_dispose(map, entry); 233807424462SKonstantin Belousov } 233907424462SKonstantin Belousov 2340df8bae1dSRodney W. Grimes /* 234183ea714fSDoug Moore * vm_map_try_merge_entries: 234267bf6868SJohn Dyson * 2343ba41b0deSKonstantin Belousov * Compare two map entries that represent consecutive ranges. If 2344ba41b0deSKonstantin Belousov * the entries can be merged, expand the range of the second to 2345ba41b0deSKonstantin Belousov * cover the range of the first and delete the first. Then return 2346ba41b0deSKonstantin Belousov * the map entry that includes the first range. 23474e71e795SMatthew Dillon * 23484e71e795SMatthew Dillon * The map must be locked. 2349df8bae1dSRodney W. Grimes */ 2350ba41b0deSKonstantin Belousov vm_map_entry_t 23512767c9f3SDoug Moore vm_map_try_merge_entries(vm_map_t map, vm_map_entry_t prev_entry, 23522767c9f3SDoug Moore vm_map_entry_t entry) 2353df8bae1dSRodney W. Grimes { 2354df8bae1dSRodney W. Grimes 235583ea714fSDoug Moore VM_MAP_ASSERT_LOCKED(map); 235683ea714fSDoug Moore if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 && 23572767c9f3SDoug Moore vm_map_mergeable_neighbors(prev_entry, entry)) { 23582767c9f3SDoug Moore vm_map_entry_unlink(map, prev_entry, UNLINK_MERGE_NEXT); 23592767c9f3SDoug Moore vm_map_merged_neighbor_dispose(map, prev_entry); 2360ba41b0deSKonstantin Belousov return (entry); 2361308c24baSJohn Dyson } 2362ba41b0deSKonstantin Belousov return (prev_entry); 2363df8bae1dSRodney W. Grimes } 236492e78c10SAlan Cox 2365df8bae1dSRodney W. Grimes /* 2366af1d6d6aSDoug Moore * vm_map_entry_back: 2367af1d6d6aSDoug Moore * 2368af1d6d6aSDoug Moore * Allocate an object to back a map entry. 2369af1d6d6aSDoug Moore */ 2370af1d6d6aSDoug Moore static inline void 2371af1d6d6aSDoug Moore vm_map_entry_back(vm_map_entry_t entry) 2372af1d6d6aSDoug Moore { 2373af1d6d6aSDoug Moore vm_object_t object; 2374af1d6d6aSDoug Moore 2375af1d6d6aSDoug Moore KASSERT(entry->object.vm_object == NULL, 2376af1d6d6aSDoug Moore ("map entry %p has backing object", entry)); 2377af1d6d6aSDoug Moore KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 2378af1d6d6aSDoug Moore ("map entry %p is a submap", entry)); 237967388836SKonstantin Belousov object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL, 238067388836SKonstantin Belousov entry->cred, entry->end - entry->start); 2381af1d6d6aSDoug Moore entry->object.vm_object = object; 2382af1d6d6aSDoug Moore entry->offset = 0; 2383af1d6d6aSDoug Moore entry->cred = NULL; 2384af1d6d6aSDoug Moore } 2385af1d6d6aSDoug Moore 2386af1d6d6aSDoug Moore /* 2387af1d6d6aSDoug Moore * vm_map_entry_charge_object 2388af1d6d6aSDoug Moore * 2389af1d6d6aSDoug Moore * If there is no object backing this entry, create one. Otherwise, if 2390af1d6d6aSDoug Moore * the entry has cred, give it to the backing object. 2391af1d6d6aSDoug Moore */ 2392af1d6d6aSDoug Moore static inline void 2393af1d6d6aSDoug Moore vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry) 2394af1d6d6aSDoug Moore { 2395af1d6d6aSDoug Moore 2396af1d6d6aSDoug Moore VM_MAP_ASSERT_LOCKED(map); 2397af1d6d6aSDoug Moore KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 2398af1d6d6aSDoug Moore ("map entry %p is a submap", entry)); 2399af1d6d6aSDoug Moore if (entry->object.vm_object == NULL && !map->system_map && 2400af1d6d6aSDoug Moore (entry->eflags & MAP_ENTRY_GUARD) == 0) 2401af1d6d6aSDoug Moore vm_map_entry_back(entry); 2402af1d6d6aSDoug Moore else if (entry->object.vm_object != NULL && 2403af1d6d6aSDoug Moore ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 2404af1d6d6aSDoug Moore entry->cred != NULL) { 2405af1d6d6aSDoug Moore VM_OBJECT_WLOCK(entry->object.vm_object); 2406af1d6d6aSDoug Moore KASSERT(entry->object.vm_object->cred == NULL, 2407af1d6d6aSDoug Moore ("OVERCOMMIT: %s: both cred e %p", __func__, entry)); 2408af1d6d6aSDoug Moore entry->object.vm_object->cred = entry->cred; 2409af1d6d6aSDoug Moore entry->object.vm_object->charge = entry->end - entry->start; 2410af1d6d6aSDoug Moore VM_OBJECT_WUNLOCK(entry->object.vm_object); 2411af1d6d6aSDoug Moore entry->cred = NULL; 2412af1d6d6aSDoug Moore } 2413af1d6d6aSDoug Moore } 2414af1d6d6aSDoug Moore 2415af1d6d6aSDoug Moore /* 2416037c0994SDoug Moore * vm_map_entry_clone 2417037c0994SDoug Moore * 2418037c0994SDoug Moore * Create a duplicate map entry for clipping. 2419037c0994SDoug Moore */ 2420037c0994SDoug Moore static vm_map_entry_t 2421037c0994SDoug Moore vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry) 2422037c0994SDoug Moore { 2423037c0994SDoug Moore vm_map_entry_t new_entry; 2424037c0994SDoug Moore 2425037c0994SDoug Moore VM_MAP_ASSERT_LOCKED(map); 2426037c0994SDoug Moore 2427037c0994SDoug Moore /* 2428037c0994SDoug Moore * Create a backing object now, if none exists, so that more individual 2429037c0994SDoug Moore * objects won't be created after the map entry is split. 2430037c0994SDoug Moore */ 2431037c0994SDoug Moore vm_map_entry_charge_object(map, entry); 2432037c0994SDoug Moore 2433037c0994SDoug Moore /* Clone the entry. */ 2434037c0994SDoug Moore new_entry = vm_map_entry_create(map); 2435037c0994SDoug Moore *new_entry = *entry; 2436037c0994SDoug Moore if (new_entry->cred != NULL) 2437037c0994SDoug Moore crhold(entry->cred); 2438037c0994SDoug Moore if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2439037c0994SDoug Moore vm_object_reference(new_entry->object.vm_object); 2440037c0994SDoug Moore vm_map_entry_set_vnode_text(new_entry, true); 2441037c0994SDoug Moore /* 2442037c0994SDoug Moore * The object->un_pager.vnp.writemappings for the object of 2443037c0994SDoug Moore * MAP_ENTRY_WRITECNT type entry shall be kept as is here. The 2444037c0994SDoug Moore * virtual pages are re-distributed among the clipped entries, 2445037c0994SDoug Moore * so the sum is left the same. 2446037c0994SDoug Moore */ 2447037c0994SDoug Moore } 2448037c0994SDoug Moore return (new_entry); 2449037c0994SDoug Moore } 2450037c0994SDoug Moore 2451037c0994SDoug Moore /* 2452df8bae1dSRodney W. Grimes * vm_map_clip_start: [ internal use only ] 2453df8bae1dSRodney W. Grimes * 2454df8bae1dSRodney W. Grimes * Asserts that the given entry begins at or after 2455df8bae1dSRodney W. Grimes * the specified address; if necessary, 2456df8bae1dSRodney W. Grimes * it splits the entry into two. 2457df8bae1dSRodney W. Grimes */ 2458e2e80fb3SKonstantin Belousov static int 2459e2e80fb3SKonstantin Belousov vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr) 2460df8bae1dSRodney W. Grimes { 2461c0877f10SJohn Dyson vm_map_entry_t new_entry; 2462e2e80fb3SKonstantin Belousov int bdry_idx; 2463df8bae1dSRodney W. Grimes 24648a64110eSConrad Meyer if (!map->system_map) 24658a64110eSConrad Meyer WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 24668a64110eSConrad Meyer "%s: map %p entry %p start 0x%jx", __func__, map, entry, 2467e2e80fb3SKonstantin Belousov (uintmax_t)startaddr); 24688a64110eSConrad Meyer 2469e2e80fb3SKonstantin Belousov if (startaddr <= entry->start) 2470e2e80fb3SKonstantin Belousov return (KERN_SUCCESS); 2471a116b5d3SConrad Meyer 24723a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 2473e2e80fb3SKonstantin Belousov KASSERT(entry->end > startaddr && entry->start < startaddr, 2474a116b5d3SConrad Meyer ("%s: invalid clip of entry %p", __func__, entry)); 24753a0916b8SKonstantin Belousov 2476d0e4e53eSMark Johnston bdry_idx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry); 2477e2e80fb3SKonstantin Belousov if (bdry_idx != 0) { 2478e2e80fb3SKonstantin Belousov if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0) 2479e2e80fb3SKonstantin Belousov return (KERN_INVALID_ARGUMENT); 2480e2e80fb3SKonstantin Belousov } 2481e2e80fb3SKonstantin Belousov 2482037c0994SDoug Moore new_entry = vm_map_entry_clone(map, entry); 2483df8bae1dSRodney W. Grimes 24844766eba1SDoug Moore /* 24854766eba1SDoug Moore * Split off the front portion. Insert the new entry BEFORE this one, 24864766eba1SDoug Moore * so that this entry has the specified starting address. 24874766eba1SDoug Moore */ 2488e2e80fb3SKonstantin Belousov new_entry->end = startaddr; 24899f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 2490e2e80fb3SKonstantin Belousov return (KERN_SUCCESS); 2491c0877f10SJohn Dyson } 2492df8bae1dSRodney W. Grimes 2493df8bae1dSRodney W. Grimes /* 2494c7b23459SDoug Moore * vm_map_lookup_clip_start: 2495c7b23459SDoug Moore * 2496c7b23459SDoug Moore * Find the entry at or just after 'start', and clip it if 'start' is in 2497c7b23459SDoug Moore * the interior of the entry. Return entry after 'start', and in 2498c7b23459SDoug Moore * prev_entry set the entry before 'start'. 2499c7b23459SDoug Moore */ 2500e2e80fb3SKonstantin Belousov static int 2501c7b23459SDoug Moore vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start, 2502e2e80fb3SKonstantin Belousov vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry) 2503c7b23459SDoug Moore { 2504c7b23459SDoug Moore vm_map_entry_t entry; 2505e2e80fb3SKonstantin Belousov int rv; 2506c7b23459SDoug Moore 25078a64110eSConrad Meyer if (!map->system_map) 25088a64110eSConrad Meyer WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 25098a64110eSConrad Meyer "%s: map %p start 0x%jx prev %p", __func__, map, 25108a64110eSConrad Meyer (uintmax_t)start, prev_entry); 25118a64110eSConrad Meyer 2512c7b23459SDoug Moore if (vm_map_lookup_entry(map, start, prev_entry)) { 2513c7b23459SDoug Moore entry = *prev_entry; 2514e2e80fb3SKonstantin Belousov rv = vm_map_clip_start(map, entry, start); 2515e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 2516e2e80fb3SKonstantin Belousov return (rv); 2517c7b23459SDoug Moore *prev_entry = vm_map_entry_pred(entry); 2518c7b23459SDoug Moore } else 2519c7b23459SDoug Moore entry = vm_map_entry_succ(*prev_entry); 2520e2e80fb3SKonstantin Belousov *res_entry = entry; 2521e2e80fb3SKonstantin Belousov return (KERN_SUCCESS); 2522c7b23459SDoug Moore } 2523c7b23459SDoug Moore 2524c7b23459SDoug Moore /* 2525df8bae1dSRodney W. Grimes * vm_map_clip_end: [ internal use only ] 2526df8bae1dSRodney W. Grimes * 2527df8bae1dSRodney W. Grimes * Asserts that the given entry ends at or before 2528df8bae1dSRodney W. Grimes * the specified address; if necessary, 2529df8bae1dSRodney W. Grimes * it splits the entry into two. 2530df8bae1dSRodney W. Grimes */ 2531e2e80fb3SKonstantin Belousov static int 2532e2e80fb3SKonstantin Belousov vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr) 2533df8bae1dSRodney W. Grimes { 2534c0877f10SJohn Dyson vm_map_entry_t new_entry; 2535e2e80fb3SKonstantin Belousov int bdry_idx; 2536df8bae1dSRodney W. Grimes 25378a64110eSConrad Meyer if (!map->system_map) 25388a64110eSConrad Meyer WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 25398a64110eSConrad Meyer "%s: map %p entry %p end 0x%jx", __func__, map, entry, 2540e2e80fb3SKonstantin Belousov (uintmax_t)endaddr); 25418a64110eSConrad Meyer 2542e2e80fb3SKonstantin Belousov if (endaddr >= entry->end) 2543e2e80fb3SKonstantin Belousov return (KERN_SUCCESS); 2544a116b5d3SConrad Meyer 25453a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 2546e2e80fb3SKonstantin Belousov KASSERT(entry->start < endaddr && entry->end > endaddr, 2547a116b5d3SConrad Meyer ("%s: invalid clip of entry %p", __func__, entry)); 25483a0916b8SKonstantin Belousov 2549d0e4e53eSMark Johnston bdry_idx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry); 2550e2e80fb3SKonstantin Belousov if (bdry_idx != 0) { 2551e2e80fb3SKonstantin Belousov if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0) 2552e2e80fb3SKonstantin Belousov return (KERN_INVALID_ARGUMENT); 2553e2e80fb3SKonstantin Belousov } 2554e2e80fb3SKonstantin Belousov 2555037c0994SDoug Moore new_entry = vm_map_entry_clone(map, entry); 2556df8bae1dSRodney W. Grimes 25574766eba1SDoug Moore /* 25584766eba1SDoug Moore * Split off the back portion. Insert the new entry AFTER this one, 25594766eba1SDoug Moore * so that this entry has the specified ending address. 25604766eba1SDoug Moore */ 2561e2e80fb3SKonstantin Belousov new_entry->start = endaddr; 25629f701172SKonstantin Belousov vm_map_entry_link(map, new_entry); 2563e2e80fb3SKonstantin Belousov 2564e2e80fb3SKonstantin Belousov return (KERN_SUCCESS); 2565c0877f10SJohn Dyson } 2566df8bae1dSRodney W. Grimes 2567df8bae1dSRodney W. Grimes /* 2568df8bae1dSRodney W. Grimes * vm_map_submap: [ kernel use only ] 2569df8bae1dSRodney W. Grimes * 2570df8bae1dSRodney W. Grimes * Mark the given range as handled by a subordinate map. 2571df8bae1dSRodney W. Grimes * 2572df8bae1dSRodney W. Grimes * This range must have been created with vm_map_find, 2573df8bae1dSRodney W. Grimes * and no other operations may have been performed on this 2574df8bae1dSRodney W. Grimes * range prior to calling vm_map_submap. 2575df8bae1dSRodney W. Grimes * 2576df8bae1dSRodney W. Grimes * Only a limited number of operations can be performed 2577df8bae1dSRodney W. Grimes * within this rage after calling vm_map_submap: 2578df8bae1dSRodney W. Grimes * vm_fault 2579df8bae1dSRodney W. Grimes * [Don't try vm_map_copy!] 2580df8bae1dSRodney W. Grimes * 2581df8bae1dSRodney W. Grimes * To remove a submapping, one must first remove the 2582df8bae1dSRodney W. Grimes * range from the superior map, and then destroy the 2583df8bae1dSRodney W. Grimes * submap (if desired). [Better yet, don't try it.] 2584df8bae1dSRodney W. Grimes */ 2585df8bae1dSRodney W. Grimes int 25861b40f8c0SMatthew Dillon vm_map_submap( 25871b40f8c0SMatthew Dillon vm_map_t map, 25881b40f8c0SMatthew Dillon vm_offset_t start, 25891b40f8c0SMatthew Dillon vm_offset_t end, 25901b40f8c0SMatthew Dillon vm_map_t submap) 2591df8bae1dSRodney W. Grimes { 2592df8bae1dSRodney W. Grimes vm_map_entry_t entry; 2593fa50a355SKonstantin Belousov int result; 2594fa50a355SKonstantin Belousov 2595fa50a355SKonstantin Belousov result = KERN_INVALID_ARGUMENT; 2596fa50a355SKonstantin Belousov 2597fa50a355SKonstantin Belousov vm_map_lock(submap); 2598fa50a355SKonstantin Belousov submap->flags |= MAP_IS_SUB_MAP; 2599fa50a355SKonstantin Belousov vm_map_unlock(submap); 2600df8bae1dSRodney W. Grimes 2601df8bae1dSRodney W. Grimes vm_map_lock(map); 2602df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2603e6bd3a81SMark Johnston if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end && 2604e6bd3a81SMark Johnston (entry->eflags & MAP_ENTRY_COW) == 0 && 2605e6bd3a81SMark Johnston entry->object.vm_object == NULL) { 2606e2e80fb3SKonstantin Belousov result = vm_map_clip_start(map, entry, start); 2607e2e80fb3SKonstantin Belousov if (result != KERN_SUCCESS) 2608e2e80fb3SKonstantin Belousov goto unlock; 2609e2e80fb3SKonstantin Belousov result = vm_map_clip_end(map, entry, end); 2610e2e80fb3SKonstantin Belousov if (result != KERN_SUCCESS) 2611e2e80fb3SKonstantin Belousov goto unlock; 26122d8acc0fSJohn Dyson entry->object.sub_map = submap; 2613afa07f7eSJohn Dyson entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 2614df8bae1dSRodney W. Grimes result = KERN_SUCCESS; 2615df8bae1dSRodney W. Grimes } 2616e2e80fb3SKonstantin Belousov unlock: 2617df8bae1dSRodney W. Grimes vm_map_unlock(map); 2618df8bae1dSRodney W. Grimes 2619fa50a355SKonstantin Belousov if (result != KERN_SUCCESS) { 2620fa50a355SKonstantin Belousov vm_map_lock(submap); 2621fa50a355SKonstantin Belousov submap->flags &= ~MAP_IS_SUB_MAP; 2622fa50a355SKonstantin Belousov vm_map_unlock(submap); 2623fa50a355SKonstantin Belousov } 2624df8bae1dSRodney W. Grimes return (result); 2625df8bae1dSRodney W. Grimes } 2626df8bae1dSRodney W. Grimes 2627df8bae1dSRodney W. Grimes /* 2628dd05fa19SAlan Cox * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified 26291f78f902SAlan Cox */ 26301f78f902SAlan Cox #define MAX_INIT_PT 96 26311f78f902SAlan Cox 26321f78f902SAlan Cox /* 26330551c08dSAlan Cox * vm_map_pmap_enter: 26340551c08dSAlan Cox * 2635dd05fa19SAlan Cox * Preload the specified map's pmap with mappings to the specified 2636dd05fa19SAlan Cox * object's memory-resident pages. No further physical pages are 2637dd05fa19SAlan Cox * allocated, and no further virtual pages are retrieved from secondary 2638dd05fa19SAlan Cox * storage. If the specified flags include MAP_PREFAULT_PARTIAL, then a 2639dd05fa19SAlan Cox * limited number of page mappings are created at the low-end of the 2640dd05fa19SAlan Cox * specified address range. (For this purpose, a superpage mapping 2641dd05fa19SAlan Cox * counts as one page mapping.) Otherwise, all resident pages within 26423453bca8SAlan Cox * the specified address range are mapped. 26430551c08dSAlan Cox */ 2644077ec27cSAlan Cox static void 26454da4d293SAlan Cox vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 26460551c08dSAlan Cox vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) 26470551c08dSAlan Cox { 26488fece8c3SAlan Cox vm_offset_t start; 2649ce142d9eSAlan Cox vm_page_t p, p_start; 2650dd05fa19SAlan Cox vm_pindex_t mask, psize, threshold, tmpidx; 26510551c08dSAlan Cox 2652ba8bca61SAlan Cox if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) 26531f78f902SAlan Cox return; 26549af6d512SAttilio Rao if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 265589f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 265601381811SJohn Baldwin if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 26579af6d512SAttilio Rao pmap_object_init_pt(map->pmap, addr, object, pindex, 26589af6d512SAttilio Rao size); 26599af6d512SAttilio Rao VM_OBJECT_WUNLOCK(object); 26609af6d512SAttilio Rao return; 26619af6d512SAttilio Rao } 26629af6d512SAttilio Rao VM_OBJECT_LOCK_DOWNGRADE(object); 2663886b9021SJeff Roberson } else 2664886b9021SJeff Roberson VM_OBJECT_RLOCK(object); 26651f78f902SAlan Cox 26661f78f902SAlan Cox psize = atop(size); 26671f78f902SAlan Cox if (psize + pindex > object->size) { 2668ed2f945aSMark Johnston if (pindex >= object->size) { 26699af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 26709af6d512SAttilio Rao return; 26719af6d512SAttilio Rao } 26721f78f902SAlan Cox psize = object->size - pindex; 26731f78f902SAlan Cox } 26741f78f902SAlan Cox 2675ce142d9eSAlan Cox start = 0; 2676ce142d9eSAlan Cox p_start = NULL; 2677dd05fa19SAlan Cox threshold = MAX_INIT_PT; 26781f78f902SAlan Cox 2679b382c10aSKonstantin Belousov p = vm_page_find_least(object, pindex); 26801f78f902SAlan Cox /* 26811f78f902SAlan Cox * Assert: the variable p is either (1) the page with the 26821f78f902SAlan Cox * least pindex greater than or equal to the parameter pindex 26831f78f902SAlan Cox * or (2) NULL. 26841f78f902SAlan Cox */ 26851f78f902SAlan Cox for (; 26861f78f902SAlan Cox p != NULL && (tmpidx = p->pindex - pindex) < psize; 26871f78f902SAlan Cox p = TAILQ_NEXT(p, listq)) { 26881f78f902SAlan Cox /* 26891f78f902SAlan Cox * don't allow an madvise to blow away our really 26901f78f902SAlan Cox * free pages allocating pv entries. 26911f78f902SAlan Cox */ 2692dd05fa19SAlan Cox if (((flags & MAP_PREFAULT_MADVISE) != 0 && 2693e2068d0bSJeff Roberson vm_page_count_severe()) || 2694dd05fa19SAlan Cox ((flags & MAP_PREFAULT_PARTIAL) != 0 && 2695dd05fa19SAlan Cox tmpidx >= threshold)) { 2696379fb642SAlan Cox psize = tmpidx; 26971f78f902SAlan Cox break; 26981f78f902SAlan Cox } 26990012f373SJeff Roberson if (vm_page_all_valid(p)) { 2700ce142d9eSAlan Cox if (p_start == NULL) { 2701ce142d9eSAlan Cox start = addr + ptoa(tmpidx); 2702ce142d9eSAlan Cox p_start = p; 2703ce142d9eSAlan Cox } 2704dd05fa19SAlan Cox /* Jump ahead if a superpage mapping is possible. */ 2705dd05fa19SAlan Cox if (p->psind > 0 && ((addr + ptoa(tmpidx)) & 2706dd05fa19SAlan Cox (pagesizes[p->psind] - 1)) == 0) { 2707dd05fa19SAlan Cox mask = atop(pagesizes[p->psind]) - 1; 2708dd05fa19SAlan Cox if (tmpidx + mask < psize && 270988302601SAlan Cox vm_page_ps_test(p, PS_ALL_VALID, NULL)) { 2710dd05fa19SAlan Cox p += mask; 2711dd05fa19SAlan Cox threshold += mask; 2712dd05fa19SAlan Cox } 2713dd05fa19SAlan Cox } 27147bfda801SAlan Cox } else if (p_start != NULL) { 2715cf4682aeSAlan Cox pmap_enter_object(map->pmap, start, addr + 2716cf4682aeSAlan Cox ptoa(tmpidx), p_start, prot); 2717cf4682aeSAlan Cox p_start = NULL; 2718cf4682aeSAlan Cox } 2719cf4682aeSAlan Cox } 2720c46b90e9SAlan Cox if (p_start != NULL) 2721379fb642SAlan Cox pmap_enter_object(map->pmap, start, addr + ptoa(psize), 2722379fb642SAlan Cox p_start, prot); 27239af6d512SAttilio Rao VM_OBJECT_RUNLOCK(object); 27240551c08dSAlan Cox } 27250551c08dSAlan Cox 272679169929SKonstantin Belousov static void 272779169929SKonstantin Belousov vm_map_protect_guard(vm_map_entry_t entry, vm_prot_t new_prot, 272879169929SKonstantin Belousov vm_prot_t new_maxprot, int flags) 272979169929SKonstantin Belousov { 273055be6be1SKonstantin Belousov vm_prot_t old_prot; 273155be6be1SKonstantin Belousov 273279169929SKonstantin Belousov MPASS((entry->eflags & MAP_ENTRY_GUARD) != 0); 273355be6be1SKonstantin Belousov if ((entry->eflags & (MAP_ENTRY_STACK_GAP_UP | 273455be6be1SKonstantin Belousov MAP_ENTRY_STACK_GAP_DN)) == 0) 273555be6be1SKonstantin Belousov return; 273655be6be1SKonstantin Belousov 273755be6be1SKonstantin Belousov old_prot = PROT_EXTRACT(entry->offset); 273855be6be1SKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_MAXPROT) != 0) { 273955be6be1SKonstantin Belousov entry->offset = PROT_MAX(new_maxprot) | 274055be6be1SKonstantin Belousov (new_maxprot & old_prot); 274155be6be1SKonstantin Belousov } 274255be6be1SKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_PROT) != 0) { 274355be6be1SKonstantin Belousov entry->offset = new_prot | PROT_MAX( 274455be6be1SKonstantin Belousov PROT_MAX_EXTRACT(entry->offset)); 274555be6be1SKonstantin Belousov } 274679169929SKonstantin Belousov } 274779169929SKonstantin Belousov 27480551c08dSAlan Cox /* 2749df8bae1dSRodney W. Grimes * vm_map_protect: 2750df8bae1dSRodney W. Grimes * 27510659df6fSKonstantin Belousov * Sets the protection and/or the maximum protection of the 27520659df6fSKonstantin Belousov * specified address region in the target map. 2753df8bae1dSRodney W. Grimes */ 2754df8bae1dSRodney W. Grimes int 2755b9dcd593SBruce Evans vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 27560659df6fSKonstantin Belousov vm_prot_t new_prot, vm_prot_t new_maxprot, int flags) 2757df8bae1dSRodney W. Grimes { 27582767c9f3SDoug Moore vm_map_entry_t entry, first_entry, in_tran, prev_entry; 27593364c323SKonstantin Belousov vm_object_t obj; 2760ef694c1aSEdward Tomasz Napierala struct ucred *cred; 2761*90049eabSKonstantin Belousov vm_offset_t orig_start; 276255be6be1SKonstantin Belousov vm_prot_t check_prot, max_prot, old_prot; 2763a72dce34SDoug Moore int rv; 2764df8bae1dSRodney W. Grimes 276579e9451fSKonstantin Belousov if (start == end) 276679e9451fSKonstantin Belousov return (KERN_SUCCESS); 276779e9451fSKonstantin Belousov 27680fb6aae7SKonstantin Belousov if (CONTAINS_BITS(flags, VM_MAP_PROTECT_SET_PROT | 27690fb6aae7SKonstantin Belousov VM_MAP_PROTECT_SET_MAXPROT) && 27700fb6aae7SKonstantin Belousov !CONTAINS_BITS(new_maxprot, new_prot)) 27710659df6fSKonstantin Belousov return (KERN_OUT_OF_BOUNDS); 27720659df6fSKonstantin Belousov 2773*90049eabSKonstantin Belousov orig_start = start; 277419f5d9f2SKonstantin Belousov again: 277519f5d9f2SKonstantin Belousov in_tran = NULL; 2776*90049eabSKonstantin Belousov start = orig_start; 2777df8bae1dSRodney W. Grimes vm_map_lock(map); 2778df8bae1dSRodney W. Grimes 27790659df6fSKonstantin Belousov if ((map->flags & MAP_WXORX) != 0 && 27800659df6fSKonstantin Belousov (flags & VM_MAP_PROTECT_SET_PROT) != 0 && 27810fb6aae7SKonstantin Belousov CONTAINS_BITS(new_prot, VM_PROT_WRITE | VM_PROT_EXECUTE)) { 27822e1c94aaSKonstantin Belousov vm_map_unlock(map); 27832e1c94aaSKonstantin Belousov return (KERN_PROTECTION_FAILURE); 27842e1c94aaSKonstantin Belousov } 27852e1c94aaSKonstantin Belousov 2786e1cb9d37SMark Johnston /* 2787e1cb9d37SMark Johnston * Ensure that we are not concurrently wiring pages. vm_map_wire() may 2788e1cb9d37SMark Johnston * need to fault pages into the map and will drop the map lock while 2789e1cb9d37SMark Johnston * doing so, and the VM object may end up in an inconsistent state if we 2790e1cb9d37SMark Johnston * update the protection on the map entry in between faults. 2791e1cb9d37SMark Johnston */ 2792e1cb9d37SMark Johnston vm_map_wait_busy(map); 2793e1cb9d37SMark Johnston 2794df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 2795df8bae1dSRodney W. Grimes 27962767c9f3SDoug Moore if (!vm_map_lookup_entry(map, start, &first_entry)) 27972767c9f3SDoug Moore first_entry = vm_map_entry_succ(first_entry); 2798df8bae1dSRodney W. Grimes 2799*90049eabSKonstantin Belousov if ((flags & VM_MAP_PROTECT_GROWSDOWN) != 0 && 2800*90049eabSKonstantin Belousov (first_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0) { 2801*90049eabSKonstantin Belousov /* 2802*90049eabSKonstantin Belousov * Handle Linux's PROT_GROWSDOWN flag. 2803*90049eabSKonstantin Belousov * It means that protection is applied down to the 2804*90049eabSKonstantin Belousov * whole stack, including the specified range of the 2805*90049eabSKonstantin Belousov * mapped region, and the grow down region (AKA 2806*90049eabSKonstantin Belousov * guard). 2807*90049eabSKonstantin Belousov */ 2808*90049eabSKonstantin Belousov while (!CONTAINS_BITS(first_entry->eflags, 2809*90049eabSKonstantin Belousov MAP_ENTRY_GUARD | MAP_ENTRY_STACK_GAP_DN) && 2810*90049eabSKonstantin Belousov first_entry != vm_map_entry_first(map)) 2811*90049eabSKonstantin Belousov first_entry = vm_map_entry_pred(first_entry); 2812*90049eabSKonstantin Belousov start = first_entry->start; 2813*90049eabSKonstantin Belousov } 2814*90049eabSKonstantin Belousov 2815df8bae1dSRodney W. Grimes /* 28160d94caffSDavid Greenman * Make a first pass to check for protection violations. 2817df8bae1dSRodney W. Grimes */ 28180fb6aae7SKonstantin Belousov check_prot = 0; 28190fb6aae7SKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_PROT) != 0) 28200fb6aae7SKonstantin Belousov check_prot |= new_prot; 28210fb6aae7SKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_MAXPROT) != 0) 28220fb6aae7SKonstantin Belousov check_prot |= new_maxprot; 28232767c9f3SDoug Moore for (entry = first_entry; entry->start < end; 28242767c9f3SDoug Moore entry = vm_map_entry_succ(entry)) { 28252767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) { 2826a1f6d91cSDavid Greenman vm_map_unlock(map); 2827df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 2828a1f6d91cSDavid Greenman } 282955be6be1SKonstantin Belousov if ((entry->eflags & (MAP_ENTRY_GUARD | 283055be6be1SKonstantin Belousov MAP_ENTRY_STACK_GAP_DN | MAP_ENTRY_STACK_GAP_UP)) == 283155be6be1SKonstantin Belousov MAP_ENTRY_GUARD) 283279169929SKonstantin Belousov continue; 283355be6be1SKonstantin Belousov max_prot = (entry->eflags & (MAP_ENTRY_STACK_GAP_DN | 283455be6be1SKonstantin Belousov MAP_ENTRY_STACK_GAP_UP)) != 0 ? 283555be6be1SKonstantin Belousov PROT_MAX_EXTRACT(entry->offset) : entry->max_protection; 283655be6be1SKonstantin Belousov if (!CONTAINS_BITS(max_prot, check_prot)) { 2837df8bae1dSRodney W. Grimes vm_map_unlock(map); 2838df8bae1dSRodney W. Grimes return (KERN_PROTECTION_FAILURE); 2839df8bae1dSRodney W. Grimes } 28402767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) 28412767c9f3SDoug Moore in_tran = entry; 284219f5d9f2SKonstantin Belousov } 284319f5d9f2SKonstantin Belousov 284419f5d9f2SKonstantin Belousov /* 2845bdb90e76SDoug Moore * Postpone the operation until all in-transition map entries have 2846bdb90e76SDoug Moore * stabilized. An in-transition entry might already have its pages 2847bdb90e76SDoug Moore * wired and wired_count incremented, but not yet have its 2848bdb90e76SDoug Moore * MAP_ENTRY_USER_WIRED flag set. In which case, we would fail to call 2849bdb90e76SDoug Moore * vm_fault_copy_entry() in the final loop below. 285019f5d9f2SKonstantin Belousov */ 285119f5d9f2SKonstantin Belousov if (in_tran != NULL) { 285219f5d9f2SKonstantin Belousov in_tran->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 285319f5d9f2SKonstantin Belousov vm_map_unlock_and_wait(map, 0); 285419f5d9f2SKonstantin Belousov goto again; 2855df8bae1dSRodney W. Grimes } 2856df8bae1dSRodney W. Grimes 28573364c323SKonstantin Belousov /* 2858a72dce34SDoug Moore * Before changing the protections, try to reserve swap space for any 2859a72dce34SDoug Moore * private (i.e., copy-on-write) mappings that are transitioning from 2860a72dce34SDoug Moore * read-only to read/write access. If a reservation fails, break out 2861a72dce34SDoug Moore * of this loop early and let the next loop simplify the entries, since 2862a72dce34SDoug Moore * some may now be mergeable. 28633364c323SKonstantin Belousov */ 2864e2e80fb3SKonstantin Belousov rv = vm_map_clip_start(map, first_entry, start); 2865e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) { 2866e2e80fb3SKonstantin Belousov vm_map_unlock(map); 2867e2e80fb3SKonstantin Belousov return (rv); 2868e2e80fb3SKonstantin Belousov } 28692767c9f3SDoug Moore for (entry = first_entry; entry->start < end; 28702767c9f3SDoug Moore entry = vm_map_entry_succ(entry)) { 2871e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, entry, end); 2872e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) { 2873e2e80fb3SKonstantin Belousov vm_map_unlock(map); 2874e2e80fb3SKonstantin Belousov return (rv); 2875e2e80fb3SKonstantin Belousov } 28763364c323SKonstantin Belousov 28770659df6fSKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_PROT) == 0 || 28782767c9f3SDoug Moore ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 || 28792767c9f3SDoug Moore ENTRY_CHARGED(entry) || 28800659df6fSKonstantin Belousov (entry->eflags & MAP_ENTRY_GUARD) != 0) 28813364c323SKonstantin Belousov continue; 28823364c323SKonstantin Belousov 2883ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 28842767c9f3SDoug Moore obj = entry->object.vm_object; 28853364c323SKonstantin Belousov 28862767c9f3SDoug Moore if (obj == NULL || 28872767c9f3SDoug Moore (entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0) { 28882767c9f3SDoug Moore if (!swap_reserve(entry->end - entry->start)) { 2889a72dce34SDoug Moore rv = KERN_RESOURCE_SHORTAGE; 28902767c9f3SDoug Moore end = entry->end; 2891a72dce34SDoug Moore break; 28923364c323SKonstantin Belousov } 2893ef694c1aSEdward Tomasz Napierala crhold(cred); 28942767c9f3SDoug Moore entry->cred = cred; 28953364c323SKonstantin Belousov continue; 28963364c323SKonstantin Belousov } 28973364c323SKonstantin Belousov 289889f6b863SAttilio Rao VM_OBJECT_WLOCK(obj); 28990cb2610eSMark Johnston if ((obj->flags & OBJ_SWAP) == 0) { 290089f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 29013364c323SKonstantin Belousov continue; 29023364c323SKonstantin Belousov } 29033364c323SKonstantin Belousov 29043364c323SKonstantin Belousov /* 29053364c323SKonstantin Belousov * Charge for the whole object allocation now, since 29063364c323SKonstantin Belousov * we cannot distinguish between non-charged and 29073364c323SKonstantin Belousov * charged clipped mapping of the same object later. 29083364c323SKonstantin Belousov */ 29093364c323SKonstantin Belousov KASSERT(obj->charge == 0, 29103d95614fSKonstantin Belousov ("vm_map_protect: object %p overcharged (entry %p)", 29112767c9f3SDoug Moore obj, entry)); 29123364c323SKonstantin Belousov if (!swap_reserve(ptoa(obj->size))) { 291389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 2914a72dce34SDoug Moore rv = KERN_RESOURCE_SHORTAGE; 29152767c9f3SDoug Moore end = entry->end; 2916a72dce34SDoug Moore break; 29173364c323SKonstantin Belousov } 29183364c323SKonstantin Belousov 2919ef694c1aSEdward Tomasz Napierala crhold(cred); 2920ef694c1aSEdward Tomasz Napierala obj->cred = cred; 29213364c323SKonstantin Belousov obj->charge = ptoa(obj->size); 292289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(obj); 29233364c323SKonstantin Belousov } 29243364c323SKonstantin Belousov 2925df8bae1dSRodney W. Grimes /* 2926a72dce34SDoug Moore * If enough swap space was available, go back and fix up protections. 2927a72dce34SDoug Moore * Otherwise, just simplify entries, since some may have been modified. 2928a72dce34SDoug Moore * [Note that clipping is not necessary the second time.] 2929df8bae1dSRodney W. Grimes */ 29302767c9f3SDoug Moore for (prev_entry = vm_map_entry_pred(first_entry), entry = first_entry; 29312767c9f3SDoug Moore entry->start < end; 29322767c9f3SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry), 29332767c9f3SDoug Moore prev_entry = entry, entry = vm_map_entry_succ(entry)) { 293479169929SKonstantin Belousov if (rv != KERN_SUCCESS) 293519bd0d9cSKonstantin Belousov continue; 293619bd0d9cSKonstantin Belousov 293779169929SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) != 0) { 293879169929SKonstantin Belousov vm_map_protect_guard(entry, new_prot, new_maxprot, 293979169929SKonstantin Belousov flags); 294079169929SKonstantin Belousov continue; 294179169929SKonstantin Belousov } 294279169929SKonstantin Belousov 29432767c9f3SDoug Moore old_prot = entry->protection; 2944210a6886SKonstantin Belousov 29450659df6fSKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_MAXPROT) != 0) { 29460659df6fSKonstantin Belousov entry->max_protection = new_maxprot; 29470659df6fSKonstantin Belousov entry->protection = new_maxprot & old_prot; 29480659df6fSKonstantin Belousov } 29490659df6fSKonstantin Belousov if ((flags & VM_MAP_PROTECT_SET_PROT) != 0) 29502767c9f3SDoug Moore entry->protection = new_prot; 2951df8bae1dSRodney W. Grimes 2952dd006a1bSAlan Cox /* 2953dd006a1bSAlan Cox * For user wired map entries, the normal lazy evaluation of 2954dd006a1bSAlan Cox * write access upgrades through soft page faults is 2955dd006a1bSAlan Cox * undesirable. Instead, immediately copy any pages that are 2956dd006a1bSAlan Cox * copy-on-write and enable write access in the physical map. 2957dd006a1bSAlan Cox */ 29582767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0 && 29592767c9f3SDoug Moore (entry->protection & VM_PROT_WRITE) != 0 && 29605930251aSKonstantin Belousov (old_prot & VM_PROT_WRITE) == 0) 29612767c9f3SDoug Moore vm_fault_copy_entry(map, map, entry, entry, NULL); 2962210a6886SKonstantin Belousov 2963df8bae1dSRodney W. Grimes /* 29642fafce9eSAlan Cox * When restricting access, update the physical map. Worry 29652fafce9eSAlan Cox * about copy-on-write here. 2966df8bae1dSRodney W. Grimes */ 29672767c9f3SDoug Moore if ((old_prot & ~entry->protection) != 0) { 2968afa07f7eSJohn Dyson #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 2969df8bae1dSRodney W. Grimes VM_PROT_ALL) 29702767c9f3SDoug Moore pmap_protect(map->pmap, entry->start, 29712767c9f3SDoug Moore entry->end, 29722767c9f3SDoug Moore entry->protection & MASK(entry)); 2973df8bae1dSRodney W. Grimes #undef MASK 2974df8bae1dSRodney W. Grimes } 2975df8bae1dSRodney W. Grimes } 29762767c9f3SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 2977df8bae1dSRodney W. Grimes vm_map_unlock(map); 2978a72dce34SDoug Moore return (rv); 2979df8bae1dSRodney W. Grimes } 2980df8bae1dSRodney W. Grimes 2981df8bae1dSRodney W. Grimes /* 2982867a482dSJohn Dyson * vm_map_madvise: 2983867a482dSJohn Dyson * 2984867a482dSJohn Dyson * This routine traverses a processes map handling the madvise 2985f7fc307aSAlan Cox * system call. Advisories are classified as either those effecting 2986f7fc307aSAlan Cox * the vm_map_entry structure, or those effecting the underlying 2987f7fc307aSAlan Cox * objects. 2988867a482dSJohn Dyson */ 2989b4309055SMatthew Dillon int 29901b40f8c0SMatthew Dillon vm_map_madvise( 29911b40f8c0SMatthew Dillon vm_map_t map, 29921b40f8c0SMatthew Dillon vm_offset_t start, 29931b40f8c0SMatthew Dillon vm_offset_t end, 29941b40f8c0SMatthew Dillon int behav) 2995867a482dSJohn Dyson { 29962767c9f3SDoug Moore vm_map_entry_t entry, prev_entry; 2997e2e80fb3SKonstantin Belousov int rv; 29983e7cb27cSAlan Cox bool modify_map; 2999867a482dSJohn Dyson 3000b4309055SMatthew Dillon /* 3001b4309055SMatthew Dillon * Some madvise calls directly modify the vm_map_entry, in which case 3002b4309055SMatthew Dillon * we need to use an exclusive lock on the map and we need to perform 3003b4309055SMatthew Dillon * various clipping operations. Otherwise we only need a read-lock 3004b4309055SMatthew Dillon * on the map. 3005b4309055SMatthew Dillon */ 3006b4309055SMatthew Dillon switch(behav) { 3007b4309055SMatthew Dillon case MADV_NORMAL: 3008b4309055SMatthew Dillon case MADV_SEQUENTIAL: 3009b4309055SMatthew Dillon case MADV_RANDOM: 30104f79d873SMatthew Dillon case MADV_NOSYNC: 30114f79d873SMatthew Dillon case MADV_AUTOSYNC: 30129730a5daSPaul Saab case MADV_NOCORE: 30139730a5daSPaul Saab case MADV_CORE: 301479e9451fSKonstantin Belousov if (start == end) 30153e7cb27cSAlan Cox return (0); 30163e7cb27cSAlan Cox modify_map = true; 3017867a482dSJohn Dyson vm_map_lock(map); 3018b4309055SMatthew Dillon break; 3019b4309055SMatthew Dillon case MADV_WILLNEED: 3020b4309055SMatthew Dillon case MADV_DONTNEED: 3021b4309055SMatthew Dillon case MADV_FREE: 302279e9451fSKonstantin Belousov if (start == end) 30233e7cb27cSAlan Cox return (0); 30243e7cb27cSAlan Cox modify_map = false; 3025f7fc307aSAlan Cox vm_map_lock_read(map); 3026b4309055SMatthew Dillon break; 3027b4309055SMatthew Dillon default: 30283e7cb27cSAlan Cox return (EINVAL); 3029b4309055SMatthew Dillon } 3030b4309055SMatthew Dillon 3031b4309055SMatthew Dillon /* 3032b4309055SMatthew Dillon * Locate starting entry and clip if necessary. 3033b4309055SMatthew Dillon */ 3034867a482dSJohn Dyson VM_MAP_RANGE_CHECK(map, start, end); 3035867a482dSJohn Dyson 3036f7fc307aSAlan Cox if (modify_map) { 3037f7fc307aSAlan Cox /* 3038f7fc307aSAlan Cox * madvise behaviors that are implemented in the vm_map_entry. 3039f7fc307aSAlan Cox * 3040f7fc307aSAlan Cox * We clip the vm_map_entry so that behavioral changes are 3041f7fc307aSAlan Cox * limited to the specified address range. 3042f7fc307aSAlan Cox */ 3043e2e80fb3SKonstantin Belousov rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry); 3044e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) { 3045e2e80fb3SKonstantin Belousov vm_map_unlock(map); 3046e2e80fb3SKonstantin Belousov return (vm_mmap_to_errno(rv)); 3047e2e80fb3SKonstantin Belousov } 3048e2e80fb3SKonstantin Belousov 3049e2e80fb3SKonstantin Belousov for (; entry->start < end; prev_entry = entry, 3050e2e80fb3SKonstantin Belousov entry = vm_map_entry_succ(entry)) { 30512767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) 3052867a482dSJohn Dyson continue; 3053fed9a903SJohn Dyson 3054e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, entry, end); 3055e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) { 3056e2e80fb3SKonstantin Belousov vm_map_unlock(map); 3057e2e80fb3SKonstantin Belousov return (vm_mmap_to_errno(rv)); 3058e2e80fb3SKonstantin Belousov } 3059fed9a903SJohn Dyson 3060f7fc307aSAlan Cox switch (behav) { 3061867a482dSJohn Dyson case MADV_NORMAL: 30622767c9f3SDoug Moore vm_map_entry_set_behavior(entry, 30632767c9f3SDoug Moore MAP_ENTRY_BEHAV_NORMAL); 3064867a482dSJohn Dyson break; 3065867a482dSJohn Dyson case MADV_SEQUENTIAL: 30662767c9f3SDoug Moore vm_map_entry_set_behavior(entry, 30672767c9f3SDoug Moore MAP_ENTRY_BEHAV_SEQUENTIAL); 3068867a482dSJohn Dyson break; 3069867a482dSJohn Dyson case MADV_RANDOM: 30702767c9f3SDoug Moore vm_map_entry_set_behavior(entry, 30712767c9f3SDoug Moore MAP_ENTRY_BEHAV_RANDOM); 3072867a482dSJohn Dyson break; 30734f79d873SMatthew Dillon case MADV_NOSYNC: 30742767c9f3SDoug Moore entry->eflags |= MAP_ENTRY_NOSYNC; 30754f79d873SMatthew Dillon break; 30764f79d873SMatthew Dillon case MADV_AUTOSYNC: 30772767c9f3SDoug Moore entry->eflags &= ~MAP_ENTRY_NOSYNC; 30784f79d873SMatthew Dillon break; 30799730a5daSPaul Saab case MADV_NOCORE: 30802767c9f3SDoug Moore entry->eflags |= MAP_ENTRY_NOCOREDUMP; 30819730a5daSPaul Saab break; 30829730a5daSPaul Saab case MADV_CORE: 30832767c9f3SDoug Moore entry->eflags &= ~MAP_ENTRY_NOCOREDUMP; 30849730a5daSPaul Saab break; 3085867a482dSJohn Dyson default: 3086867a482dSJohn Dyson break; 3087867a482dSJohn Dyson } 30882767c9f3SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3089867a482dSJohn Dyson } 30902767c9f3SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3091867a482dSJohn Dyson vm_map_unlock(map); 3092b4309055SMatthew Dillon } else { 309392a59946SJohn Baldwin vm_pindex_t pstart, pend; 3094f7fc307aSAlan Cox 3095f7fc307aSAlan Cox /* 3096f7fc307aSAlan Cox * madvise behaviors that are implemented in the underlying 3097f7fc307aSAlan Cox * vm_object. 3098f7fc307aSAlan Cox * 3099f7fc307aSAlan Cox * Since we don't clip the vm_map_entry, we have to clip 3100f7fc307aSAlan Cox * the vm_object pindex and count. 3101f7fc307aSAlan Cox */ 3102c7b23459SDoug Moore if (!vm_map_lookup_entry(map, start, &entry)) 3103c7b23459SDoug Moore entry = vm_map_entry_succ(entry); 31042767c9f3SDoug Moore for (; entry->start < end; 31052767c9f3SDoug Moore entry = vm_map_entry_succ(entry)) { 310651321f7cSAlan Cox vm_offset_t useEnd, useStart; 31075f99b57cSMatthew Dillon 31082767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) 3109f7fc307aSAlan Cox continue; 3110f7fc307aSAlan Cox 3111bf5661f4SKonstantin Belousov /* 3112bf5661f4SKonstantin Belousov * MADV_FREE would otherwise rewind time to 3113bf5661f4SKonstantin Belousov * the creation of the shadow object. Because 3114bf5661f4SKonstantin Belousov * we hold the VM map read-locked, neither the 3115bf5661f4SKonstantin Belousov * entry's object nor the presence of a 3116bf5661f4SKonstantin Belousov * backing object can change. 3117bf5661f4SKonstantin Belousov */ 3118bf5661f4SKonstantin Belousov if (behav == MADV_FREE && 31192767c9f3SDoug Moore entry->object.vm_object != NULL && 31202767c9f3SDoug Moore entry->object.vm_object->backing_object != NULL) 3121bf5661f4SKonstantin Belousov continue; 3122bf5661f4SKonstantin Belousov 31232767c9f3SDoug Moore pstart = OFF_TO_IDX(entry->offset); 31242767c9f3SDoug Moore pend = pstart + atop(entry->end - entry->start); 31252767c9f3SDoug Moore useStart = entry->start; 31262767c9f3SDoug Moore useEnd = entry->end; 3127f7fc307aSAlan Cox 31282767c9f3SDoug Moore if (entry->start < start) { 31292767c9f3SDoug Moore pstart += atop(start - entry->start); 31305f99b57cSMatthew Dillon useStart = start; 3131f7fc307aSAlan Cox } 31322767c9f3SDoug Moore if (entry->end > end) { 31332767c9f3SDoug Moore pend -= atop(entry->end - end); 313451321f7cSAlan Cox useEnd = end; 313551321f7cSAlan Cox } 3136f7fc307aSAlan Cox 313792a59946SJohn Baldwin if (pstart >= pend) 3138f7fc307aSAlan Cox continue; 3139f7fc307aSAlan Cox 314051321f7cSAlan Cox /* 314151321f7cSAlan Cox * Perform the pmap_advise() before clearing 314251321f7cSAlan Cox * PGA_REFERENCED in vm_page_advise(). Otherwise, a 314351321f7cSAlan Cox * concurrent pmap operation, such as pmap_remove(), 314451321f7cSAlan Cox * could clear a reference in the pmap and set 314551321f7cSAlan Cox * PGA_REFERENCED on the page before the pmap_advise() 314651321f7cSAlan Cox * had completed. Consequently, the page would appear 314751321f7cSAlan Cox * referenced based upon an old reference that 314851321f7cSAlan Cox * occurred before this pmap_advise() ran. 314951321f7cSAlan Cox */ 315051321f7cSAlan Cox if (behav == MADV_DONTNEED || behav == MADV_FREE) 315151321f7cSAlan Cox pmap_advise(map->pmap, useStart, useEnd, 315251321f7cSAlan Cox behav); 315351321f7cSAlan Cox 31542767c9f3SDoug Moore vm_object_madvise(entry->object.vm_object, pstart, 315592a59946SJohn Baldwin pend, behav); 315654432196SKonstantin Belousov 315754432196SKonstantin Belousov /* 315854432196SKonstantin Belousov * Pre-populate paging structures in the 315954432196SKonstantin Belousov * WILLNEED case. For wired entries, the 316054432196SKonstantin Belousov * paging structures are already populated. 316154432196SKonstantin Belousov */ 316254432196SKonstantin Belousov if (behav == MADV_WILLNEED && 31632767c9f3SDoug Moore entry->wired_count == 0) { 31640551c08dSAlan Cox vm_map_pmap_enter(map, 31655f99b57cSMatthew Dillon useStart, 31662767c9f3SDoug Moore entry->protection, 31672767c9f3SDoug Moore entry->object.vm_object, 316892a59946SJohn Baldwin pstart, 316992a59946SJohn Baldwin ptoa(pend - pstart), 3170e3026983SMatthew Dillon MAP_PREFAULT_MADVISE 3171b4309055SMatthew Dillon ); 3172f7fc307aSAlan Cox } 3173f7fc307aSAlan Cox } 3174f7fc307aSAlan Cox vm_map_unlock_read(map); 3175f7fc307aSAlan Cox } 3176b4309055SMatthew Dillon return (0); 3177867a482dSJohn Dyson } 3178867a482dSJohn Dyson 3179867a482dSJohn Dyson /* 3180df8bae1dSRodney W. Grimes * vm_map_inherit: 3181df8bae1dSRodney W. Grimes * 3182df8bae1dSRodney W. Grimes * Sets the inheritance of the specified address 3183df8bae1dSRodney W. Grimes * range in the target map. Inheritance 3184df8bae1dSRodney W. Grimes * affects how the map will be shared with 3185e2abaaaaSAlan Cox * child maps at the time of vmspace_fork. 3186df8bae1dSRodney W. Grimes */ 3187df8bae1dSRodney W. Grimes int 3188b9dcd593SBruce Evans vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 3189b9dcd593SBruce Evans vm_inherit_t new_inheritance) 3190df8bae1dSRodney W. Grimes { 3191e2e80fb3SKonstantin Belousov vm_map_entry_t entry, lentry, prev_entry, start_entry; 3192e2e80fb3SKonstantin Belousov int rv; 3193df8bae1dSRodney W. Grimes 3194df8bae1dSRodney W. Grimes switch (new_inheritance) { 3195df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 3196df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 3197df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 319878d7964bSXin LI case VM_INHERIT_ZERO: 3199df8bae1dSRodney W. Grimes break; 3200df8bae1dSRodney W. Grimes default: 3201df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 3202df8bae1dSRodney W. Grimes } 320379e9451fSKonstantin Belousov if (start == end) 320479e9451fSKonstantin Belousov return (KERN_SUCCESS); 3205df8bae1dSRodney W. Grimes vm_map_lock(map); 3206df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 3207e2e80fb3SKonstantin Belousov rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry); 3208e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3209e2e80fb3SKonstantin Belousov goto unlock; 3210e2e80fb3SKonstantin Belousov if (vm_map_lookup_entry(map, end - 1, &lentry)) { 3211e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, lentry, end); 3212e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3213e2e80fb3SKonstantin Belousov goto unlock; 3214e2e80fb3SKonstantin Belousov } 3215e2e80fb3SKonstantin Belousov if (new_inheritance == VM_INHERIT_COPY) { 3216e2e80fb3SKonstantin Belousov for (entry = start_entry; entry->start < end; 321783704cc2SDoug Moore prev_entry = entry, entry = vm_map_entry_succ(entry)) { 3218e2e80fb3SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) 3219e2e80fb3SKonstantin Belousov != 0) { 3220e2e80fb3SKonstantin Belousov rv = KERN_INVALID_ARGUMENT; 3221e2e80fb3SKonstantin Belousov goto unlock; 3222e2e80fb3SKonstantin Belousov } 3223e2e80fb3SKonstantin Belousov } 3224e2e80fb3SKonstantin Belousov } 3225e2e80fb3SKonstantin Belousov for (entry = start_entry; entry->start < end; prev_entry = entry, 3226e2e80fb3SKonstantin Belousov entry = vm_map_entry_succ(entry)) { 3227e2e80fb3SKonstantin Belousov KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx", 3228e2e80fb3SKonstantin Belousov entry, (uintmax_t)entry->end, (uintmax_t)end)); 322919bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) == 0 || 323019bd0d9cSKonstantin Belousov new_inheritance != VM_INHERIT_ZERO) 3231df8bae1dSRodney W. Grimes entry->inheritance = new_inheritance; 323283704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3233df8bae1dSRodney W. Grimes } 323483704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3235e2e80fb3SKonstantin Belousov unlock: 3236df8bae1dSRodney W. Grimes vm_map_unlock(map); 3237e2e80fb3SKonstantin Belousov return (rv); 3238df8bae1dSRodney W. Grimes } 3239df8bae1dSRodney W. Grimes 3240df8bae1dSRodney W. Grimes /* 3241312df2c1SDoug Moore * vm_map_entry_in_transition: 3242312df2c1SDoug Moore * 3243312df2c1SDoug Moore * Release the map lock, and sleep until the entry is no longer in 3244312df2c1SDoug Moore * transition. Awake and acquire the map lock. If the map changed while 3245312df2c1SDoug Moore * another held the lock, lookup a possibly-changed entry at or after the 3246312df2c1SDoug Moore * 'start' position of the old entry. 3247312df2c1SDoug Moore */ 3248312df2c1SDoug Moore static vm_map_entry_t 3249312df2c1SDoug Moore vm_map_entry_in_transition(vm_map_t map, vm_offset_t in_start, 3250312df2c1SDoug Moore vm_offset_t *io_end, bool holes_ok, vm_map_entry_t in_entry) 3251312df2c1SDoug Moore { 3252312df2c1SDoug Moore vm_map_entry_t entry; 3253312df2c1SDoug Moore vm_offset_t start; 3254312df2c1SDoug Moore u_int last_timestamp; 3255312df2c1SDoug Moore 3256312df2c1SDoug Moore VM_MAP_ASSERT_LOCKED(map); 3257312df2c1SDoug Moore KASSERT((in_entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 3258312df2c1SDoug Moore ("not in-tranition map entry %p", in_entry)); 3259312df2c1SDoug Moore /* 3260312df2c1SDoug Moore * We have not yet clipped the entry. 3261312df2c1SDoug Moore */ 3262312df2c1SDoug Moore start = MAX(in_start, in_entry->start); 3263312df2c1SDoug Moore in_entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 3264312df2c1SDoug Moore last_timestamp = map->timestamp; 3265312df2c1SDoug Moore if (vm_map_unlock_and_wait(map, 0)) { 3266312df2c1SDoug Moore /* 3267312df2c1SDoug Moore * Allow interruption of user wiring/unwiring? 3268312df2c1SDoug Moore */ 3269312df2c1SDoug Moore } 3270312df2c1SDoug Moore vm_map_lock(map); 3271312df2c1SDoug Moore if (last_timestamp + 1 == map->timestamp) 3272312df2c1SDoug Moore return (in_entry); 3273312df2c1SDoug Moore 3274312df2c1SDoug Moore /* 3275312df2c1SDoug Moore * Look again for the entry because the map was modified while it was 3276312df2c1SDoug Moore * unlocked. Specifically, the entry may have been clipped, merged, or 3277312df2c1SDoug Moore * deleted. 3278312df2c1SDoug Moore */ 3279312df2c1SDoug Moore if (!vm_map_lookup_entry(map, start, &entry)) { 3280312df2c1SDoug Moore if (!holes_ok) { 3281312df2c1SDoug Moore *io_end = start; 3282312df2c1SDoug Moore return (NULL); 3283312df2c1SDoug Moore } 32847cdcf863SDoug Moore entry = vm_map_entry_succ(entry); 3285312df2c1SDoug Moore } 3286312df2c1SDoug Moore return (entry); 3287312df2c1SDoug Moore } 3288312df2c1SDoug Moore 3289312df2c1SDoug Moore /* 3290acd9a301SAlan Cox * vm_map_unwire: 3291acd9a301SAlan Cox * 3292e27e17b7SAlan Cox * Implements both kernel and user unwiring. 3293acd9a301SAlan Cox */ 3294acd9a301SAlan Cox int 3295acd9a301SAlan Cox vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, 3296abd498aaSBruce M Simpson int flags) 3297acd9a301SAlan Cox { 329883704cc2SDoug Moore vm_map_entry_t entry, first_entry, next_entry, prev_entry; 3299acd9a301SAlan Cox int rv; 330083704cc2SDoug Moore bool holes_ok, need_wakeup, user_unwire; 3301acd9a301SAlan Cox 330279e9451fSKonstantin Belousov if (start == end) 330379e9451fSKonstantin Belousov return (KERN_SUCCESS); 33049a0cdf94SDoug Moore holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0; 33059a0cdf94SDoug Moore user_unwire = (flags & VM_MAP_WIRE_USER) != 0; 3306acd9a301SAlan Cox vm_map_lock(map); 3307acd9a301SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 3308d1d3f7e1SDoug Moore if (!vm_map_lookup_entry(map, start, &first_entry)) { 33099a0cdf94SDoug Moore if (holes_ok) 33107cdcf863SDoug Moore first_entry = vm_map_entry_succ(first_entry); 3311d1d3f7e1SDoug Moore else { 3312acd9a301SAlan Cox vm_map_unlock(map); 3313acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 3314acd9a301SAlan Cox } 3315abd498aaSBruce M Simpson } 3316d2860f22SDoug Moore rv = KERN_SUCCESS; 331783704cc2SDoug Moore for (entry = first_entry; entry->start < end; entry = next_entry) { 3318acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 3319acd9a301SAlan Cox /* 3320acd9a301SAlan Cox * We have not yet clipped the entry. 3321acd9a301SAlan Cox */ 332283704cc2SDoug Moore next_entry = vm_map_entry_in_transition(map, start, 332383704cc2SDoug Moore &end, holes_ok, entry); 332483704cc2SDoug Moore if (next_entry == NULL) { 332583704cc2SDoug Moore if (entry == first_entry) { 3326acd9a301SAlan Cox vm_map_unlock(map); 3327acd9a301SAlan Cox return (KERN_INVALID_ADDRESS); 3328acd9a301SAlan Cox } 3329acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 3330d2860f22SDoug Moore break; 3331acd9a301SAlan Cox } 333283704cc2SDoug Moore first_entry = (entry == first_entry) ? 333383704cc2SDoug Moore next_entry : NULL; 3334acd9a301SAlan Cox continue; 3335acd9a301SAlan Cox } 3336e2e80fb3SKonstantin Belousov rv = vm_map_clip_start(map, entry, start); 3337e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3338e2e80fb3SKonstantin Belousov break; 3339e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, entry, end); 3340e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3341e2e80fb3SKonstantin Belousov break; 3342e2e80fb3SKonstantin Belousov 3343acd9a301SAlan Cox /* 3344acd9a301SAlan Cox * Mark the entry in case the map lock is released. (See 3345acd9a301SAlan Cox * above.) 3346acd9a301SAlan Cox */ 3347ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 3348ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 3349ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 3350acd9a301SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 33510acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 335283704cc2SDoug Moore next_entry = vm_map_entry_succ(entry); 3353acd9a301SAlan Cox /* 3354acd9a301SAlan Cox * Check the map for holes in the specified region. 33559a0cdf94SDoug Moore * If holes_ok, skip this check. 3356acd9a301SAlan Cox */ 33579a0cdf94SDoug Moore if (!holes_ok && 335883704cc2SDoug Moore entry->end < end && next_entry->start > entry->end) { 3359acd9a301SAlan Cox end = entry->end; 3360acd9a301SAlan Cox rv = KERN_INVALID_ADDRESS; 3361d2860f22SDoug Moore break; 3362acd9a301SAlan Cox } 3363acd9a301SAlan Cox /* 33643ffbc0cdSAlan Cox * If system unwiring, require that the entry is system wired. 3365acd9a301SAlan Cox */ 33660ada205eSBrian Feldman if (!user_unwire && 33670ada205eSBrian Feldman vm_map_entry_system_wired_count(entry) == 0) { 3368acd9a301SAlan Cox end = entry->end; 3369acd9a301SAlan Cox rv = KERN_INVALID_ARGUMENT; 3370d2860f22SDoug Moore break; 3371acd9a301SAlan Cox } 3372acd9a301SAlan Cox } 33739a0cdf94SDoug Moore need_wakeup = false; 33749a0cdf94SDoug Moore if (first_entry == NULL && 33759a0cdf94SDoug Moore !vm_map_lookup_entry(map, start, &first_entry)) { 33769a0cdf94SDoug Moore KASSERT(holes_ok, ("vm_map_unwire: lookup failed")); 337783704cc2SDoug Moore prev_entry = first_entry; 337883704cc2SDoug Moore entry = vm_map_entry_succ(first_entry); 337983704cc2SDoug Moore } else { 338083704cc2SDoug Moore prev_entry = vm_map_entry_pred(first_entry); 338183704cc2SDoug Moore entry = first_entry; 3382acd9a301SAlan Cox } 338383704cc2SDoug Moore for (; entry->start < end; 338483704cc2SDoug Moore prev_entry = entry, entry = vm_map_entry_succ(entry)) { 33850acea7dfSKonstantin Belousov /* 33869a0cdf94SDoug Moore * If holes_ok was specified, an empty 33870acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 33880acea7dfSKonstantin Belousov * while the map lock was dropped for draining 33890acea7dfSKonstantin Belousov * MAP_ENTRY_IN_TRANSITION. Moreover, another thread 33900acea7dfSKonstantin Belousov * could be simultaneously wiring this new mapping 33910acea7dfSKonstantin Belousov * entry. Detect these cases and skip any entries 33920acea7dfSKonstantin Belousov * marked as in transition by us. 33930acea7dfSKonstantin Belousov */ 33940acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 33950acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 33969a0cdf94SDoug Moore KASSERT(holes_ok, 33970acea7dfSKonstantin Belousov ("vm_map_unwire: !HOLESOK and new/changed entry")); 33980acea7dfSKonstantin Belousov continue; 33990acea7dfSKonstantin Belousov } 34000acea7dfSKonstantin Belousov 34013ffbc0cdSAlan Cox if (rv == KERN_SUCCESS && (!user_unwire || 34023ffbc0cdSAlan Cox (entry->eflags & MAP_ENTRY_USER_WIRED))) { 340303462509SAlan Cox if (entry->wired_count == 1) 340403462509SAlan Cox vm_map_entry_unwire(map, entry); 340503462509SAlan Cox else 3406b2f3846aSAlan Cox entry->wired_count--; 340754a3a114SMark Johnston if (user_unwire) 340854a3a114SMark Johnston entry->eflags &= ~MAP_ENTRY_USER_WIRED; 3409b2f3846aSAlan Cox } 34100acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 3411ff3ae454SKonstantin Belousov ("vm_map_unwire: in-transition flag missing %p", entry)); 3412ff3ae454SKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 3413ff3ae454SKonstantin Belousov ("vm_map_unwire: alien wire %p", entry)); 3414acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 34150acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 3416acd9a301SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 3417acd9a301SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 34189a0cdf94SDoug Moore need_wakeup = true; 3419acd9a301SAlan Cox } 342083704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3421acd9a301SAlan Cox } 342283704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 3423acd9a301SAlan Cox vm_map_unlock(map); 3424acd9a301SAlan Cox if (need_wakeup) 3425acd9a301SAlan Cox vm_map_wakeup(map); 3426acd9a301SAlan Cox return (rv); 3427acd9a301SAlan Cox } 3428acd9a301SAlan Cox 342954a3a114SMark Johnston static void 343054a3a114SMark Johnston vm_map_wire_user_count_sub(u_long npages) 343154a3a114SMark Johnston { 343254a3a114SMark Johnston 343354a3a114SMark Johnston atomic_subtract_long(&vm_user_wire_count, npages); 343454a3a114SMark Johnston } 343554a3a114SMark Johnston 343654a3a114SMark Johnston static bool 343754a3a114SMark Johnston vm_map_wire_user_count_add(u_long npages) 343854a3a114SMark Johnston { 343954a3a114SMark Johnston u_long wired; 344054a3a114SMark Johnston 344154a3a114SMark Johnston wired = vm_user_wire_count; 344254a3a114SMark Johnston do { 344354a3a114SMark Johnston if (npages + wired > vm_page_max_user_wired) 344454a3a114SMark Johnston return (false); 344554a3a114SMark Johnston } while (!atomic_fcmpset_long(&vm_user_wire_count, &wired, 344654a3a114SMark Johnston npages + wired)); 344754a3a114SMark Johnston 344854a3a114SMark Johnston return (true); 344954a3a114SMark Johnston } 345054a3a114SMark Johnston 3451acd9a301SAlan Cox /* 345266cd575bSAlan Cox * vm_map_wire_entry_failure: 345366cd575bSAlan Cox * 345466cd575bSAlan Cox * Handle a wiring failure on the given entry. 345566cd575bSAlan Cox * 345666cd575bSAlan Cox * The map should be locked. 345766cd575bSAlan Cox */ 345866cd575bSAlan Cox static void 345966cd575bSAlan Cox vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 346066cd575bSAlan Cox vm_offset_t failed_addr) 346166cd575bSAlan Cox { 346266cd575bSAlan Cox 346366cd575bSAlan Cox VM_MAP_ASSERT_LOCKED(map); 346466cd575bSAlan Cox KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && 346566cd575bSAlan Cox entry->wired_count == 1, 346666cd575bSAlan Cox ("vm_map_wire_entry_failure: entry %p isn't being wired", entry)); 346766cd575bSAlan Cox KASSERT(failed_addr < entry->end, 346866cd575bSAlan Cox ("vm_map_wire_entry_failure: entry %p was fully wired", entry)); 346966cd575bSAlan Cox 347066cd575bSAlan Cox /* 347166cd575bSAlan Cox * If any pages at the start of this entry were successfully wired, 347266cd575bSAlan Cox * then unwire them. 347366cd575bSAlan Cox */ 347466cd575bSAlan Cox if (failed_addr > entry->start) { 347566cd575bSAlan Cox pmap_unwire(map->pmap, entry->start, failed_addr); 347666cd575bSAlan Cox vm_object_unwire(entry->object.vm_object, entry->offset, 347766cd575bSAlan Cox failed_addr - entry->start, PQ_ACTIVE); 347866cd575bSAlan Cox } 347966cd575bSAlan Cox 348066cd575bSAlan Cox /* 348166cd575bSAlan Cox * Assign an out-of-range value to represent the failure to wire this 348266cd575bSAlan Cox * entry. 348366cd575bSAlan Cox */ 348466cd575bSAlan Cox entry->wired_count = -1; 348566cd575bSAlan Cox } 348666cd575bSAlan Cox 348754a3a114SMark Johnston int 348854a3a114SMark Johnston vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) 348954a3a114SMark Johnston { 349054a3a114SMark Johnston int rv; 349154a3a114SMark Johnston 349254a3a114SMark Johnston vm_map_lock(map); 349354a3a114SMark Johnston rv = vm_map_wire_locked(map, start, end, flags); 349454a3a114SMark Johnston vm_map_unlock(map); 349554a3a114SMark Johnston return (rv); 349654a3a114SMark Johnston } 349754a3a114SMark Johnston 349866cd575bSAlan Cox /* 349954a3a114SMark Johnston * vm_map_wire_locked: 3500e27e17b7SAlan Cox * 350154a3a114SMark Johnston * Implements both kernel and user wiring. Returns with the map locked, 350254a3a114SMark Johnston * the map lock may be dropped. 3503e27e17b7SAlan Cox */ 3504e27e17b7SAlan Cox int 350554a3a114SMark Johnston vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) 3506e27e17b7SAlan Cox { 350783704cc2SDoug Moore vm_map_entry_t entry, first_entry, next_entry, prev_entry; 350866cd575bSAlan Cox vm_offset_t faddr, saved_end, saved_start; 3509e2e80fb3SKonstantin Belousov u_long incr, npages; 3510e2e80fb3SKonstantin Belousov u_int bidx, last_timestamp; 351112d7cc84SAlan Cox int rv; 351283704cc2SDoug Moore bool holes_ok, need_wakeup, user_wire; 3513e4cd31ddSJeff Roberson vm_prot_t prot; 3514e27e17b7SAlan Cox 351554a3a114SMark Johnston VM_MAP_ASSERT_LOCKED(map); 351654a3a114SMark Johnston 351779e9451fSKonstantin Belousov if (start == end) 351879e9451fSKonstantin Belousov return (KERN_SUCCESS); 3519e4cd31ddSJeff Roberson prot = 0; 3520e4cd31ddSJeff Roberson if (flags & VM_MAP_WIRE_WRITE) 3521e4cd31ddSJeff Roberson prot |= VM_PROT_WRITE; 35229a0cdf94SDoug Moore holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0; 35239a0cdf94SDoug Moore user_wire = (flags & VM_MAP_WIRE_USER) != 0; 352412d7cc84SAlan Cox VM_MAP_RANGE_CHECK(map, start, end); 3525d1d3f7e1SDoug Moore if (!vm_map_lookup_entry(map, start, &first_entry)) { 35269a0cdf94SDoug Moore if (holes_ok) 35277cdcf863SDoug Moore first_entry = vm_map_entry_succ(first_entry); 3528d1d3f7e1SDoug Moore else 352912d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 353012d7cc84SAlan Cox } 353183704cc2SDoug Moore for (entry = first_entry; entry->start < end; entry = next_entry) { 353212d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 353312d7cc84SAlan Cox /* 353412d7cc84SAlan Cox * We have not yet clipped the entry. 353512d7cc84SAlan Cox */ 353683704cc2SDoug Moore next_entry = vm_map_entry_in_transition(map, start, 353783704cc2SDoug Moore &end, holes_ok, entry); 353883704cc2SDoug Moore if (next_entry == NULL) { 353983704cc2SDoug Moore if (entry == first_entry) 354012d7cc84SAlan Cox return (KERN_INVALID_ADDRESS); 354112d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 354212d7cc84SAlan Cox goto done; 354312d7cc84SAlan Cox } 354483704cc2SDoug Moore first_entry = (entry == first_entry) ? 354583704cc2SDoug Moore next_entry : NULL; 354612d7cc84SAlan Cox continue; 354712d7cc84SAlan Cox } 3548e2e80fb3SKonstantin Belousov rv = vm_map_clip_start(map, entry, start); 3549e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3550e2e80fb3SKonstantin Belousov goto done; 3551e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, entry, end); 3552e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3553e2e80fb3SKonstantin Belousov goto done; 3554e2e80fb3SKonstantin Belousov 355512d7cc84SAlan Cox /* 355612d7cc84SAlan Cox * Mark the entry in case the map lock is released. (See 355712d7cc84SAlan Cox * above.) 355812d7cc84SAlan Cox */ 3559ff3ae454SKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 3560ff3ae454SKonstantin Belousov entry->wiring_thread == NULL, 3561ff3ae454SKonstantin Belousov ("owned map entry %p", entry)); 356212d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_IN_TRANSITION; 35630acea7dfSKonstantin Belousov entry->wiring_thread = curthread; 3564e4cd31ddSJeff Roberson if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 3565e4cd31ddSJeff Roberson || (entry->protection & prot) != prot) { 3566529ab57bSKonstantin Belousov entry->eflags |= MAP_ENTRY_WIRE_SKIPPED; 35679a0cdf94SDoug Moore if (!holes_ok) { 35686d7e8091SKonstantin Belousov end = entry->end; 35696d7e8091SKonstantin Belousov rv = KERN_INVALID_ADDRESS; 35706d7e8091SKonstantin Belousov goto done; 35716d7e8091SKonstantin Belousov } 357238e220e8SDoug Moore } else if (entry->wired_count == 0) { 35730ada205eSBrian Feldman entry->wired_count++; 357454a3a114SMark Johnston 357554a3a114SMark Johnston npages = atop(entry->end - entry->start); 357654a3a114SMark Johnston if (user_wire && !vm_map_wire_user_count_add(npages)) { 357754a3a114SMark Johnston vm_map_wire_entry_failure(map, entry, 357854a3a114SMark Johnston entry->start); 357954a3a114SMark Johnston end = entry->end; 358054a3a114SMark Johnston rv = KERN_RESOURCE_SHORTAGE; 358154a3a114SMark Johnston goto done; 358254a3a114SMark Johnston } 358366cd575bSAlan Cox 358412d7cc84SAlan Cox /* 358512d7cc84SAlan Cox * Release the map lock, relying on the in-transition 3586a5db445dSMax Laier * mark. Mark the map busy for fork. 358712d7cc84SAlan Cox */ 358854a3a114SMark Johnston saved_start = entry->start; 358954a3a114SMark Johnston saved_end = entry->end; 3590312df2c1SDoug Moore last_timestamp = map->timestamp; 3591d0e4e53eSMark Johnston bidx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry); 3592e2e80fb3SKonstantin Belousov incr = pagesizes[bidx]; 3593a5db445dSMax Laier vm_map_busy(map); 359412d7cc84SAlan Cox vm_map_unlock(map); 359566cd575bSAlan Cox 3596e2e80fb3SKonstantin Belousov for (faddr = saved_start; faddr < saved_end; 3597e2e80fb3SKonstantin Belousov faddr += incr) { 359866cd575bSAlan Cox /* 359966cd575bSAlan Cox * Simulate a fault to get the page and enter 360066cd575bSAlan Cox * it into the physical map. 360166cd575bSAlan Cox */ 3602e2e80fb3SKonstantin Belousov rv = vm_fault(map, faddr, VM_PROT_NONE, 3603e2e80fb3SKonstantin Belousov VM_FAULT_WIRE, NULL); 3604e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 360566cd575bSAlan Cox break; 3606e2e80fb3SKonstantin Belousov } 360712d7cc84SAlan Cox vm_map_lock(map); 3608a5db445dSMax Laier vm_map_unbusy(map); 360912d7cc84SAlan Cox if (last_timestamp + 1 != map->timestamp) { 361012d7cc84SAlan Cox /* 361112d7cc84SAlan Cox * Look again for the entry because the map was 361212d7cc84SAlan Cox * modified while it was unlocked. The entry 361312d7cc84SAlan Cox * may have been clipped, but NOT merged or 361412d7cc84SAlan Cox * deleted. 361512d7cc84SAlan Cox */ 36169a0cdf94SDoug Moore if (!vm_map_lookup_entry(map, saved_start, 361783704cc2SDoug Moore &next_entry)) 36189a0cdf94SDoug Moore KASSERT(false, 36199a0cdf94SDoug Moore ("vm_map_wire: lookup failed")); 362083704cc2SDoug Moore first_entry = (entry == first_entry) ? 362183704cc2SDoug Moore next_entry : NULL; 362283704cc2SDoug Moore for (entry = next_entry; entry->end < saved_end; 362383704cc2SDoug Moore entry = vm_map_entry_succ(entry)) { 362466cd575bSAlan Cox /* 362566cd575bSAlan Cox * In case of failure, handle entries 362666cd575bSAlan Cox * that were not fully wired here; 362766cd575bSAlan Cox * fully wired entries are handled 362866cd575bSAlan Cox * later. 362966cd575bSAlan Cox */ 363066cd575bSAlan Cox if (rv != KERN_SUCCESS && 363166cd575bSAlan Cox faddr < entry->end) 363266cd575bSAlan Cox vm_map_wire_entry_failure(map, 363366cd575bSAlan Cox entry, faddr); 363412d7cc84SAlan Cox } 363528c58286SAlan Cox } 363612d7cc84SAlan Cox if (rv != KERN_SUCCESS) { 363766cd575bSAlan Cox vm_map_wire_entry_failure(map, entry, faddr); 363854a3a114SMark Johnston if (user_wire) 363954a3a114SMark Johnston vm_map_wire_user_count_sub(npages); 364012d7cc84SAlan Cox end = entry->end; 364112d7cc84SAlan Cox goto done; 364212d7cc84SAlan Cox } 36430ada205eSBrian Feldman } else if (!user_wire || 36440ada205eSBrian Feldman (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 36450ada205eSBrian Feldman entry->wired_count++; 364612d7cc84SAlan Cox } 364712d7cc84SAlan Cox /* 364812d7cc84SAlan Cox * Check the map for holes in the specified region. 36499a0cdf94SDoug Moore * If holes_ok was specified, skip this check. 365012d7cc84SAlan Cox */ 365183704cc2SDoug Moore next_entry = vm_map_entry_succ(entry); 36529a0cdf94SDoug Moore if (!holes_ok && 365383704cc2SDoug Moore entry->end < end && next_entry->start > entry->end) { 365412d7cc84SAlan Cox end = entry->end; 365512d7cc84SAlan Cox rv = KERN_INVALID_ADDRESS; 365612d7cc84SAlan Cox goto done; 365712d7cc84SAlan Cox } 365812d7cc84SAlan Cox } 365912d7cc84SAlan Cox rv = KERN_SUCCESS; 366012d7cc84SAlan Cox done: 36619a0cdf94SDoug Moore need_wakeup = false; 36629a0cdf94SDoug Moore if (first_entry == NULL && 36639a0cdf94SDoug Moore !vm_map_lookup_entry(map, start, &first_entry)) { 36649a0cdf94SDoug Moore KASSERT(holes_ok, ("vm_map_wire: lookup failed")); 366583704cc2SDoug Moore prev_entry = first_entry; 366683704cc2SDoug Moore entry = vm_map_entry_succ(first_entry); 366783704cc2SDoug Moore } else { 366883704cc2SDoug Moore prev_entry = vm_map_entry_pred(first_entry); 366983704cc2SDoug Moore entry = first_entry; 367012d7cc84SAlan Cox } 367183704cc2SDoug Moore for (; entry->start < end; 367283704cc2SDoug Moore prev_entry = entry, entry = vm_map_entry_succ(entry)) { 36730acea7dfSKonstantin Belousov /* 36749a0cdf94SDoug Moore * If holes_ok was specified, an empty 36750acea7dfSKonstantin Belousov * space in the unwired region could have been mapped 36760acea7dfSKonstantin Belousov * while the map lock was dropped for faulting in the 36770acea7dfSKonstantin Belousov * pages or draining MAP_ENTRY_IN_TRANSITION. 36780acea7dfSKonstantin Belousov * Moreover, another thread could be simultaneously 36790acea7dfSKonstantin Belousov * wiring this new mapping entry. Detect these cases 3680546bb2d7SKonstantin Belousov * and skip any entries marked as in transition not by us. 3681e2e80fb3SKonstantin Belousov * 3682e2e80fb3SKonstantin Belousov * Another way to get an entry not marked with 3683e2e80fb3SKonstantin Belousov * MAP_ENTRY_IN_TRANSITION is after failed clipping, 3684e2e80fb3SKonstantin Belousov * which set rv to KERN_INVALID_ARGUMENT. 36850acea7dfSKonstantin Belousov */ 36860acea7dfSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 36870acea7dfSKonstantin Belousov entry->wiring_thread != curthread) { 3688e2e80fb3SKonstantin Belousov KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT, 36890acea7dfSKonstantin Belousov ("vm_map_wire: !HOLESOK and new/changed entry")); 36900acea7dfSKonstantin Belousov continue; 36910acea7dfSKonstantin Belousov } 36920acea7dfSKonstantin Belousov 3693b71f9b0dSDoug Moore if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) { 3694b71f9b0dSDoug Moore /* do nothing */ 3695b71f9b0dSDoug Moore } else if (rv == KERN_SUCCESS) { 369612d7cc84SAlan Cox if (user_wire) 369712d7cc84SAlan Cox entry->eflags |= MAP_ENTRY_USER_WIRED; 369828c58286SAlan Cox } else if (entry->wired_count == -1) { 369928c58286SAlan Cox /* 370028c58286SAlan Cox * Wiring failed on this entry. Thus, unwiring is 370128c58286SAlan Cox * unnecessary. 370228c58286SAlan Cox */ 370328c58286SAlan Cox entry->wired_count = 0; 370403462509SAlan Cox } else if (!user_wire || 370503462509SAlan Cox (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 370666cd575bSAlan Cox /* 370766cd575bSAlan Cox * Undo the wiring. Wiring succeeded on this entry 370866cd575bSAlan Cox * but failed on a later entry. 370966cd575bSAlan Cox */ 371054a3a114SMark Johnston if (entry->wired_count == 1) { 371103462509SAlan Cox vm_map_entry_unwire(map, entry); 371254a3a114SMark Johnston if (user_wire) 371354a3a114SMark Johnston vm_map_wire_user_count_sub( 371454a3a114SMark Johnston atop(entry->end - entry->start)); 371554a3a114SMark Johnston } else 371612d7cc84SAlan Cox entry->wired_count--; 371712d7cc84SAlan Cox } 37180acea7dfSKonstantin Belousov KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 37190acea7dfSKonstantin Belousov ("vm_map_wire: in-transition flag missing %p", entry)); 37200acea7dfSKonstantin Belousov KASSERT(entry->wiring_thread == curthread, 37210acea7dfSKonstantin Belousov ("vm_map_wire: alien wire %p", entry)); 37220acea7dfSKonstantin Belousov entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION | 37230acea7dfSKonstantin Belousov MAP_ENTRY_WIRE_SKIPPED); 37240acea7dfSKonstantin Belousov entry->wiring_thread = NULL; 372512d7cc84SAlan Cox if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 372612d7cc84SAlan Cox entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 37279a0cdf94SDoug Moore need_wakeup = true; 372812d7cc84SAlan Cox } 372983704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 373012d7cc84SAlan Cox } 373183704cc2SDoug Moore vm_map_try_merge_entries(map, prev_entry, entry); 373212d7cc84SAlan Cox if (need_wakeup) 373312d7cc84SAlan Cox vm_map_wakeup(map); 373412d7cc84SAlan Cox return (rv); 3735e27e17b7SAlan Cox } 3736e27e17b7SAlan Cox 3737e27e17b7SAlan Cox /* 3738950f8459SAlan Cox * vm_map_sync 3739df8bae1dSRodney W. Grimes * 3740df8bae1dSRodney W. Grimes * Push any dirty cached pages in the address range to their pager. 3741df8bae1dSRodney W. Grimes * If syncio is TRUE, dirty pages are written synchronously. 3742df8bae1dSRodney W. Grimes * If invalidate is TRUE, any cached pages are freed as well. 3743df8bae1dSRodney W. Grimes * 3744637315edSAlan Cox * If the size of the region from start to end is zero, we are 3745637315edSAlan Cox * supposed to flush all modified pages within the region containing 3746637315edSAlan Cox * start. Unfortunately, a region can be split or coalesced with 3747637315edSAlan Cox * neighboring regions, making it difficult to determine what the 3748637315edSAlan Cox * original region was. Therefore, we approximate this requirement by 3749637315edSAlan Cox * flushing the current region containing start. 3750637315edSAlan Cox * 3751df8bae1dSRodney W. Grimes * Returns an error if any part of the specified range is not mapped. 3752df8bae1dSRodney W. Grimes */ 3753df8bae1dSRodney W. Grimes int 3754950f8459SAlan Cox vm_map_sync( 37551b40f8c0SMatthew Dillon vm_map_t map, 37561b40f8c0SMatthew Dillon vm_offset_t start, 37571b40f8c0SMatthew Dillon vm_offset_t end, 37581b40f8c0SMatthew Dillon boolean_t syncio, 37591b40f8c0SMatthew Dillon boolean_t invalidate) 3760df8bae1dSRodney W. Grimes { 37612767c9f3SDoug Moore vm_map_entry_t entry, first_entry, next_entry; 3762df8bae1dSRodney W. Grimes vm_size_t size; 3763df8bae1dSRodney W. Grimes vm_object_t object; 3764a316d390SJohn Dyson vm_ooffset_t offset; 3765e53fa61bSKonstantin Belousov unsigned int last_timestamp; 3766e2e80fb3SKonstantin Belousov int bdry_idx; 3767126d6082SKonstantin Belousov boolean_t failed; 3768df8bae1dSRodney W. Grimes 3769df8bae1dSRodney W. Grimes vm_map_lock_read(map); 3770df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 37712767c9f3SDoug Moore if (!vm_map_lookup_entry(map, start, &first_entry)) { 3772df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3773df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 3774d1d3f7e1SDoug Moore } else if (start == end) { 37752767c9f3SDoug Moore start = first_entry->start; 37762767c9f3SDoug Moore end = first_entry->end; 3777df8bae1dSRodney W. Grimes } 3778e2e80fb3SKonstantin Belousov 3779df8bae1dSRodney W. Grimes /* 3780e2e80fb3SKonstantin Belousov * Make a first pass to check for user-wired memory, holes, 3781e2e80fb3SKonstantin Belousov * and partial invalidation of largepage mappings. 3782df8bae1dSRodney W. Grimes */ 37832767c9f3SDoug Moore for (entry = first_entry; entry->start < end; entry = next_entry) { 3784e2e80fb3SKonstantin Belousov if (invalidate) { 3785e2e80fb3SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) { 3786df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3787df8bae1dSRodney W. Grimes return (KERN_INVALID_ARGUMENT); 3788df8bae1dSRodney W. Grimes } 3789d0e4e53eSMark Johnston bdry_idx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry); 3790e2e80fb3SKonstantin Belousov if (bdry_idx != 0 && 3791e2e80fb3SKonstantin Belousov ((start & (pagesizes[bdry_idx] - 1)) != 0 || 3792e2e80fb3SKonstantin Belousov (end & (pagesizes[bdry_idx] - 1)) != 0)) { 3793e2e80fb3SKonstantin Belousov vm_map_unlock_read(map); 3794e2e80fb3SKonstantin Belousov return (KERN_INVALID_ARGUMENT); 3795e2e80fb3SKonstantin Belousov } 3796e2e80fb3SKonstantin Belousov } 37972767c9f3SDoug Moore next_entry = vm_map_entry_succ(entry); 37982767c9f3SDoug Moore if (end > entry->end && 37992767c9f3SDoug Moore entry->end != next_entry->start) { 3800df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3801df8bae1dSRodney W. Grimes return (KERN_INVALID_ADDRESS); 3802df8bae1dSRodney W. Grimes } 3803df8bae1dSRodney W. Grimes } 3804df8bae1dSRodney W. Grimes 38052cf13952SAlan Cox if (invalidate) 3806bc105a67SAlan Cox pmap_remove(map->pmap, start, end); 3807126d6082SKonstantin Belousov failed = FALSE; 38082cf13952SAlan Cox 3809df8bae1dSRodney W. Grimes /* 3810df8bae1dSRodney W. Grimes * Make a second pass, cleaning/uncaching pages from the indicated 3811df8bae1dSRodney W. Grimes * objects as we go. 3812df8bae1dSRodney W. Grimes */ 38132767c9f3SDoug Moore for (entry = first_entry; entry->start < end;) { 38142767c9f3SDoug Moore offset = entry->offset + (start - entry->start); 38152767c9f3SDoug Moore size = (end <= entry->end ? end : entry->end) - start; 38162767c9f3SDoug Moore if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) { 3817c0877f10SJohn Dyson vm_map_t smap; 3818df8bae1dSRodney W. Grimes vm_map_entry_t tentry; 3819df8bae1dSRodney W. Grimes vm_size_t tsize; 3820df8bae1dSRodney W. Grimes 38212767c9f3SDoug Moore smap = entry->object.sub_map; 3822df8bae1dSRodney W. Grimes vm_map_lock_read(smap); 3823df8bae1dSRodney W. Grimes (void) vm_map_lookup_entry(smap, offset, &tentry); 3824df8bae1dSRodney W. Grimes tsize = tentry->end - offset; 3825df8bae1dSRodney W. Grimes if (tsize < size) 3826df8bae1dSRodney W. Grimes size = tsize; 3827df8bae1dSRodney W. Grimes object = tentry->object.vm_object; 3828df8bae1dSRodney W. Grimes offset = tentry->offset + (offset - tentry->start); 3829df8bae1dSRodney W. Grimes vm_map_unlock_read(smap); 3830df8bae1dSRodney W. Grimes } else { 38312767c9f3SDoug Moore object = entry->object.vm_object; 3832df8bae1dSRodney W. Grimes } 3833e53fa61bSKonstantin Belousov vm_object_reference(object); 3834e53fa61bSKonstantin Belousov last_timestamp = map->timestamp; 3835e53fa61bSKonstantin Belousov vm_map_unlock_read(map); 3836126d6082SKonstantin Belousov if (!vm_object_sync(object, offset, size, syncio, invalidate)) 3837126d6082SKonstantin Belousov failed = TRUE; 3838df8bae1dSRodney W. Grimes start += size; 3839e53fa61bSKonstantin Belousov vm_object_deallocate(object); 3840e53fa61bSKonstantin Belousov vm_map_lock_read(map); 3841d1d3f7e1SDoug Moore if (last_timestamp == map->timestamp || 38422767c9f3SDoug Moore !vm_map_lookup_entry(map, start, &entry)) 38432767c9f3SDoug Moore entry = vm_map_entry_succ(entry); 3844df8bae1dSRodney W. Grimes } 3845df8bae1dSRodney W. Grimes 3846df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 3847126d6082SKonstantin Belousov return (failed ? KERN_FAILURE : KERN_SUCCESS); 3848df8bae1dSRodney W. Grimes } 3849df8bae1dSRodney W. Grimes 3850df8bae1dSRodney W. Grimes /* 3851df8bae1dSRodney W. Grimes * vm_map_entry_unwire: [ internal use only ] 3852df8bae1dSRodney W. Grimes * 3853df8bae1dSRodney W. Grimes * Make the region specified by this entry pageable. 3854df8bae1dSRodney W. Grimes * 3855df8bae1dSRodney W. Grimes * The map in question should be locked. 3856df8bae1dSRodney W. Grimes * [This is the reason for this routine's existence.] 3857df8bae1dSRodney W. Grimes */ 38580362d7d7SJohn Dyson static void 38591b40f8c0SMatthew Dillon vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 3860df8bae1dSRodney W. Grimes { 386154a3a114SMark Johnston vm_size_t size; 386203462509SAlan Cox 386303462509SAlan Cox VM_MAP_ASSERT_LOCKED(map); 386403462509SAlan Cox KASSERT(entry->wired_count > 0, 386503462509SAlan Cox ("vm_map_entry_unwire: entry %p isn't wired", entry)); 386654a3a114SMark Johnston 386754a3a114SMark Johnston size = entry->end - entry->start; 386854a3a114SMark Johnston if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) 386954a3a114SMark Johnston vm_map_wire_user_count_sub(atop(size)); 387003462509SAlan Cox pmap_unwire(map->pmap, entry->start, entry->end); 387154a3a114SMark Johnston vm_object_unwire(entry->object.vm_object, entry->offset, size, 387254a3a114SMark Johnston PQ_ACTIVE); 3873df8bae1dSRodney W. Grimes entry->wired_count = 0; 3874df8bae1dSRodney W. Grimes } 3875df8bae1dSRodney W. Grimes 38760b367bd8SKonstantin Belousov static void 38770b367bd8SKonstantin Belousov vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map) 38780b367bd8SKonstantin Belousov { 38790b367bd8SKonstantin Belousov 38800b367bd8SKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) 38810b367bd8SKonstantin Belousov vm_object_deallocate(entry->object.vm_object); 38820b367bd8SKonstantin Belousov uma_zfree(system_map ? kmapentzone : mapentzone, entry); 38830b367bd8SKonstantin Belousov } 38840b367bd8SKonstantin Belousov 3885df8bae1dSRodney W. Grimes /* 3886df8bae1dSRodney W. Grimes * vm_map_entry_delete: [ internal use only ] 3887df8bae1dSRodney W. Grimes * 3888df8bae1dSRodney W. Grimes * Deallocate the given entry from the target map. 3889df8bae1dSRodney W. Grimes */ 38900362d7d7SJohn Dyson static void 38911b40f8c0SMatthew Dillon vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 3892df8bae1dSRodney W. Grimes { 389332a89c32SAlan Cox vm_object_t object; 389484242cf6SMark Johnston vm_pindex_t offidxstart, offidxend, size1; 3895d1780e8dSKonstantin Belousov vm_size_t size; 389632a89c32SAlan Cox 38979f701172SKonstantin Belousov vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE); 38983364c323SKonstantin Belousov object = entry->object.vm_object; 389919bd0d9cSKonstantin Belousov 390019bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) != 0) { 390119bd0d9cSKonstantin Belousov MPASS(entry->cred == NULL); 390219bd0d9cSKonstantin Belousov MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0); 390319bd0d9cSKonstantin Belousov MPASS(object == NULL); 390419bd0d9cSKonstantin Belousov vm_map_entry_deallocate(entry, map->system_map); 390519bd0d9cSKonstantin Belousov return; 390619bd0d9cSKonstantin Belousov } 390719bd0d9cSKonstantin Belousov 39083364c323SKonstantin Belousov size = entry->end - entry->start; 39093364c323SKonstantin Belousov map->size -= size; 39103364c323SKonstantin Belousov 3911ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) { 3912ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 3913ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 39143364c323SKonstantin Belousov } 3915df8bae1dSRodney W. Grimes 391663967687SJeff Roberson if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || object == NULL) { 391763967687SJeff Roberson entry->object.vm_object = NULL; 391863967687SJeff Roberson } else if ((object->flags & OBJ_ANON) != 0 || 391963967687SJeff Roberson object == kernel_object) { 3920ef694c1aSEdward Tomasz Napierala KASSERT(entry->cred == NULL || object->cred == NULL || 39213364c323SKonstantin Belousov (entry->eflags & MAP_ENTRY_NEEDS_COPY), 3922ef694c1aSEdward Tomasz Napierala ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry)); 392332a89c32SAlan Cox offidxstart = OFF_TO_IDX(entry->offset); 392484242cf6SMark Johnston offidxend = offidxstart + atop(size); 392589f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 392663967687SJeff Roberson if (object->ref_count != 1 && 392763967687SJeff Roberson ((object->flags & OBJ_ONEMAPPING) != 0 || 39282e47807cSJeff Roberson object == kernel_object)) { 392932a89c32SAlan Cox vm_object_collapse(object); 39306bbee8e2SAlan Cox 39316bbee8e2SAlan Cox /* 39326bbee8e2SAlan Cox * The option OBJPR_NOTMAPPED can be passed here 39336bbee8e2SAlan Cox * because vm_map_delete() already performed 39346bbee8e2SAlan Cox * pmap_remove() on the only mapping to this range 39356bbee8e2SAlan Cox * of pages. 39366bbee8e2SAlan Cox */ 39376bbee8e2SAlan Cox vm_object_page_remove(object, offidxstart, offidxend, 39386bbee8e2SAlan Cox OBJPR_NOTMAPPED); 393932a89c32SAlan Cox if (offidxend >= object->size && 39403364c323SKonstantin Belousov offidxstart < object->size) { 39413364c323SKonstantin Belousov size1 = object->size; 394232a89c32SAlan Cox object->size = offidxstart; 3943ef694c1aSEdward Tomasz Napierala if (object->cred != NULL) { 39443364c323SKonstantin Belousov size1 -= object->size; 39453364c323SKonstantin Belousov KASSERT(object->charge >= ptoa(size1), 39469a4ee196SKonstantin Belousov ("object %p charge < 0", object)); 39479a4ee196SKonstantin Belousov swap_release_by_cred(ptoa(size1), 39489a4ee196SKonstantin Belousov object->cred); 39493364c323SKonstantin Belousov object->charge -= ptoa(size1); 39503364c323SKonstantin Belousov } 39513364c323SKonstantin Belousov } 395232a89c32SAlan Cox } 395389f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 395463967687SJeff Roberson } 39550b367bd8SKonstantin Belousov if (map->system_map) 39560b367bd8SKonstantin Belousov vm_map_entry_deallocate(entry, TRUE); 39570b367bd8SKonstantin Belousov else { 39587cdcf863SDoug Moore entry->defer_next = curthread->td_map_def_user; 39590b367bd8SKonstantin Belousov curthread->td_map_def_user = entry; 39600b367bd8SKonstantin Belousov } 3961df8bae1dSRodney W. Grimes } 3962df8bae1dSRodney W. Grimes 3963df8bae1dSRodney W. Grimes /* 3964df8bae1dSRodney W. Grimes * vm_map_delete: [ internal use only ] 3965df8bae1dSRodney W. Grimes * 3966df8bae1dSRodney W. Grimes * Deallocates the given address range from the target 3967df8bae1dSRodney W. Grimes * map. 3968df8bae1dSRodney W. Grimes */ 3969df8bae1dSRodney W. Grimes int 3970655c3490SKonstantin Belousov vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 3971df8bae1dSRodney W. Grimes { 3972e2e80fb3SKonstantin Belousov vm_map_entry_t entry, next_entry, scratch_entry; 3973e2e80fb3SKonstantin Belousov int rv; 3974df8bae1dSRodney W. Grimes 39753a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(map); 39768a64110eSConrad Meyer 397779e9451fSKonstantin Belousov if (start == end) 397879e9451fSKonstantin Belousov return (KERN_SUCCESS); 39793a0916b8SKonstantin Belousov 3980df8bae1dSRodney W. Grimes /* 3981c7b23459SDoug Moore * Find the start of the region, and clip it. 3982c7b23459SDoug Moore * Step through all entries in this region. 3983df8bae1dSRodney W. Grimes */ 3984e2e80fb3SKonstantin Belousov rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry); 3985e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 3986e2e80fb3SKonstantin Belousov return (rv); 3987e2e80fb3SKonstantin Belousov for (; entry->start < end; entry = next_entry) { 398873b2baceSAlan Cox /* 398973b2baceSAlan Cox * Wait for wiring or unwiring of an entry to complete. 39907c938963SBrian Feldman * Also wait for any system wirings to disappear on 39917c938963SBrian Feldman * user maps. 399273b2baceSAlan Cox */ 39937c938963SBrian Feldman if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || 39947c938963SBrian Feldman (vm_map_pmap(map) != kernel_pmap && 39957c938963SBrian Feldman vm_map_entry_system_wired_count(entry) != 0)) { 399673b2baceSAlan Cox unsigned int last_timestamp; 399773b2baceSAlan Cox vm_offset_t saved_start; 399873b2baceSAlan Cox 399973b2baceSAlan Cox saved_start = entry->start; 400073b2baceSAlan Cox entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 400173b2baceSAlan Cox last_timestamp = map->timestamp; 40028ce2d00aSPawel Jakub Dawidek (void) vm_map_unlock_and_wait(map, 0); 400373b2baceSAlan Cox vm_map_lock(map); 4004d1d3f7e1SDoug Moore if (last_timestamp + 1 != map->timestamp) { 400573b2baceSAlan Cox /* 400673b2baceSAlan Cox * Look again for the entry because the map was 4007d1d3f7e1SDoug Moore * modified while it was unlocked. 4008d1d3f7e1SDoug Moore * Specifically, the entry may have been 4009d1d3f7e1SDoug Moore * clipped, merged, or deleted. 401073b2baceSAlan Cox */ 4011e2e80fb3SKonstantin Belousov rv = vm_map_lookup_clip_start(map, saved_start, 4012e2e80fb3SKonstantin Belousov &next_entry, &scratch_entry); 4013e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 4014e2e80fb3SKonstantin Belousov break; 4015c7b23459SDoug Moore } else 4016c7b23459SDoug Moore next_entry = entry; 401773b2baceSAlan Cox continue; 401873b2baceSAlan Cox } 4019e2e80fb3SKonstantin Belousov 4020e2e80fb3SKonstantin Belousov /* XXXKIB or delete to the upper superpage boundary ? */ 4021e2e80fb3SKonstantin Belousov rv = vm_map_clip_end(map, entry, end); 4022e2e80fb3SKonstantin Belousov if (rv != KERN_SUCCESS) 4023e2e80fb3SKonstantin Belousov break; 4024c7b23459SDoug Moore next_entry = vm_map_entry_succ(entry); 4025df8bae1dSRodney W. Grimes 4026df8bae1dSRodney W. Grimes /* 40270d94caffSDavid Greenman * Unwire before removing addresses from the pmap; otherwise, 40280d94caffSDavid Greenman * unwiring will put the entries back in the pmap. 4029df8bae1dSRodney W. Grimes */ 4030be7be412SKonstantin Belousov if (entry->wired_count != 0) 4031df8bae1dSRodney W. Grimes vm_map_entry_unwire(map, entry); 4032df8bae1dSRodney W. Grimes 403332f0fefcSKonstantin Belousov /* 403432f0fefcSKonstantin Belousov * Remove mappings for the pages, but only if the 403532f0fefcSKonstantin Belousov * mappings could exist. For instance, it does not 403632f0fefcSKonstantin Belousov * make sense to call pmap_remove() for guard entries. 403732f0fefcSKonstantin Belousov */ 403832f0fefcSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || 403932f0fefcSKonstantin Belousov entry->object.vm_object != NULL) 40401e0e335bSKonstantin Belousov pmap_map_delete(map->pmap, entry->start, entry->end); 4041df8bae1dSRodney W. Grimes 4042fa50a355SKonstantin Belousov if (entry->end == map->anon_loc) 4043fa50a355SKonstantin Belousov map->anon_loc = entry->start; 4044fa50a355SKonstantin Belousov 4045df8bae1dSRodney W. Grimes /* 4046e608cc3cSKonstantin Belousov * Delete the entry only after removing all pmap 4047e608cc3cSKonstantin Belousov * entries pointing to its pages. (Otherwise, its 4048e608cc3cSKonstantin Belousov * page frames may be reallocated, and any modify bits 4049e608cc3cSKonstantin Belousov * will be set in the wrong object!) 4050df8bae1dSRodney W. Grimes */ 4051df8bae1dSRodney W. Grimes vm_map_entry_delete(map, entry); 4052df8bae1dSRodney W. Grimes } 4053e2e80fb3SKonstantin Belousov return (rv); 4054df8bae1dSRodney W. Grimes } 4055df8bae1dSRodney W. Grimes 4056df8bae1dSRodney W. Grimes /* 4057df8bae1dSRodney W. Grimes * vm_map_remove: 4058df8bae1dSRodney W. Grimes * 4059df8bae1dSRodney W. Grimes * Remove the given address range from the target map. 4060df8bae1dSRodney W. Grimes * This is the exported form of vm_map_delete. 4061df8bae1dSRodney W. Grimes */ 4062df8bae1dSRodney W. Grimes int 40631b40f8c0SMatthew Dillon vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 4064df8bae1dSRodney W. Grimes { 40656eaee3feSAlan Cox int result; 4066df8bae1dSRodney W. Grimes 4067df8bae1dSRodney W. Grimes vm_map_lock(map); 4068df8bae1dSRodney W. Grimes VM_MAP_RANGE_CHECK(map, start, end); 4069655c3490SKonstantin Belousov result = vm_map_delete(map, start, end); 4070df8bae1dSRodney W. Grimes vm_map_unlock(map); 4071df8bae1dSRodney W. Grimes return (result); 4072df8bae1dSRodney W. Grimes } 4073df8bae1dSRodney W. Grimes 4074df8bae1dSRodney W. Grimes /* 4075df8bae1dSRodney W. Grimes * vm_map_check_protection: 4076df8bae1dSRodney W. Grimes * 40772d5c7e45SMatthew Dillon * Assert that the target map allows the specified privilege on the 40782d5c7e45SMatthew Dillon * entire address region given. The entire region must be allocated. 40792d5c7e45SMatthew Dillon * 40802d5c7e45SMatthew Dillon * WARNING! This code does not and should not check whether the 40812d5c7e45SMatthew Dillon * contents of the region is accessible. For example a smaller file 40822d5c7e45SMatthew Dillon * might be mapped into a larger address space. 40832d5c7e45SMatthew Dillon * 40842d5c7e45SMatthew Dillon * NOTE! This code is also called by munmap(). 4085d8834602SAlan Cox * 4086d8834602SAlan Cox * The map must be locked. A read lock is sufficient. 4087df8bae1dSRodney W. Grimes */ 40880d94caffSDavid Greenman boolean_t 4089b9dcd593SBruce Evans vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 4090b9dcd593SBruce Evans vm_prot_t protection) 4091df8bae1dSRodney W. Grimes { 4092c0877f10SJohn Dyson vm_map_entry_t entry; 4093d1d3f7e1SDoug Moore vm_map_entry_t tmp_entry; 4094df8bae1dSRodney W. Grimes 4095d1d3f7e1SDoug Moore if (!vm_map_lookup_entry(map, start, &tmp_entry)) 4096df8bae1dSRodney W. Grimes return (FALSE); 4097d1d3f7e1SDoug Moore entry = tmp_entry; 4098df8bae1dSRodney W. Grimes 4099df8bae1dSRodney W. Grimes while (start < end) { 4100df8bae1dSRodney W. Grimes /* 4101df8bae1dSRodney W. Grimes * No holes allowed! 4102df8bae1dSRodney W. Grimes */ 4103d8834602SAlan Cox if (start < entry->start) 4104df8bae1dSRodney W. Grimes return (FALSE); 4105df8bae1dSRodney W. Grimes /* 4106df8bae1dSRodney W. Grimes * Check protection associated with entry. 4107df8bae1dSRodney W. Grimes */ 4108d8834602SAlan Cox if ((entry->protection & protection) != protection) 4109df8bae1dSRodney W. Grimes return (FALSE); 4110df8bae1dSRodney W. Grimes /* go to next entry */ 4111df8bae1dSRodney W. Grimes start = entry->end; 41127cdcf863SDoug Moore entry = vm_map_entry_succ(entry); 4113df8bae1dSRodney W. Grimes } 4114df8bae1dSRodney W. Grimes return (TRUE); 4115df8bae1dSRodney W. Grimes } 4116df8bae1dSRodney W. Grimes 41174d987866SJeff Roberson /* 41184d987866SJeff Roberson * 4119886b9021SJeff Roberson * vm_map_copy_swap_object: 41204d987866SJeff Roberson * 4121886b9021SJeff Roberson * Copies a swap-backed object from an existing map entry to a 41224d987866SJeff Roberson * new one. Carries forward the swap charge. May change the 41234d987866SJeff Roberson * src object on return. 41244d987866SJeff Roberson */ 41254d987866SJeff Roberson static void 4126886b9021SJeff Roberson vm_map_copy_swap_object(vm_map_entry_t src_entry, vm_map_entry_t dst_entry, 41274d987866SJeff Roberson vm_offset_t size, vm_ooffset_t *fork_charge) 41284d987866SJeff Roberson { 41294d987866SJeff Roberson vm_object_t src_object; 41304d987866SJeff Roberson struct ucred *cred; 41314d987866SJeff Roberson int charged; 41324d987866SJeff Roberson 41334d987866SJeff Roberson src_object = src_entry->object.vm_object; 41344d987866SJeff Roberson charged = ENTRY_CHARGED(src_entry); 4135d966c761SJeff Roberson if ((src_object->flags & OBJ_ANON) != 0) { 4136d966c761SJeff Roberson VM_OBJECT_WLOCK(src_object); 41374d987866SJeff Roberson vm_object_collapse(src_object); 41384d987866SJeff Roberson if ((src_object->flags & OBJ_ONEMAPPING) != 0) { 41394d987866SJeff Roberson vm_object_split(src_entry); 41404d987866SJeff Roberson src_object = src_entry->object.vm_object; 41414d987866SJeff Roberson } 41424d987866SJeff Roberson vm_object_reference_locked(src_object); 41434d987866SJeff Roberson vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 4144d966c761SJeff Roberson VM_OBJECT_WUNLOCK(src_object); 4145d966c761SJeff Roberson } else 4146d966c761SJeff Roberson vm_object_reference(src_object); 41474d987866SJeff Roberson if (src_entry->cred != NULL && 41484d987866SJeff Roberson !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 41494d987866SJeff Roberson KASSERT(src_object->cred == NULL, 41504d987866SJeff Roberson ("OVERCOMMIT: vm_map_copy_anon_entry: cred %p", 41514d987866SJeff Roberson src_object)); 41524d987866SJeff Roberson src_object->cred = src_entry->cred; 41534d987866SJeff Roberson src_object->charge = size; 41544d987866SJeff Roberson } 41554d987866SJeff Roberson dst_entry->object.vm_object = src_object; 41564d987866SJeff Roberson if (charged) { 41574d987866SJeff Roberson cred = curthread->td_ucred; 41584d987866SJeff Roberson crhold(cred); 41594d987866SJeff Roberson dst_entry->cred = cred; 41604d987866SJeff Roberson *fork_charge += size; 41614d987866SJeff Roberson if (!(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 41624d987866SJeff Roberson crhold(cred); 41634d987866SJeff Roberson src_entry->cred = cred; 41644d987866SJeff Roberson *fork_charge += size; 41654d987866SJeff Roberson } 41664d987866SJeff Roberson } 41674d987866SJeff Roberson } 41684d987866SJeff Roberson 416986524867SJohn Dyson /* 4170df8bae1dSRodney W. Grimes * vm_map_copy_entry: 4171df8bae1dSRodney W. Grimes * 4172df8bae1dSRodney W. Grimes * Copies the contents of the source entry to the destination 4173df8bae1dSRodney W. Grimes * entry. The entries *must* be aligned properly. 4174df8bae1dSRodney W. Grimes */ 4175f708ef1bSPoul-Henning Kamp static void 41761b40f8c0SMatthew Dillon vm_map_copy_entry( 41771b40f8c0SMatthew Dillon vm_map_t src_map, 41781b40f8c0SMatthew Dillon vm_map_t dst_map, 41791b40f8c0SMatthew Dillon vm_map_entry_t src_entry, 41803364c323SKonstantin Belousov vm_map_entry_t dst_entry, 41813364c323SKonstantin Belousov vm_ooffset_t *fork_charge) 4182df8bae1dSRodney W. Grimes { 4183c0877f10SJohn Dyson vm_object_t src_object; 418484110e7eSKonstantin Belousov vm_map_entry_t fake_entry; 41853364c323SKonstantin Belousov vm_offset_t size; 4186c0877f10SJohn Dyson 41873a0916b8SKonstantin Belousov VM_MAP_ASSERT_LOCKED(dst_map); 41883a0916b8SKonstantin Belousov 41899fdfe602SMatthew Dillon if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 4190df8bae1dSRodney W. Grimes return; 4191df8bae1dSRodney W. Grimes 4192afaa41f6SAlan Cox if (src_entry->wired_count == 0 || 4193afaa41f6SAlan Cox (src_entry->protection & VM_PROT_WRITE) == 0) { 4194df8bae1dSRodney W. Grimes /* 41950d94caffSDavid Greenman * If the source entry is marked needs_copy, it is already 41960d94caffSDavid Greenman * write-protected. 4197df8bae1dSRodney W. Grimes */ 4198d9a9209aSAlan Cox if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 && 4199d9a9209aSAlan Cox (src_entry->protection & VM_PROT_WRITE) != 0) { 4200df8bae1dSRodney W. Grimes pmap_protect(src_map->pmap, 4201df8bae1dSRodney W. Grimes src_entry->start, 4202df8bae1dSRodney W. Grimes src_entry->end, 4203df8bae1dSRodney W. Grimes src_entry->protection & ~VM_PROT_WRITE); 4204df8bae1dSRodney W. Grimes } 4205b18bfc3dSJohn Dyson 4206df8bae1dSRodney W. Grimes /* 4207df8bae1dSRodney W. Grimes * Make a copy of the object. 4208df8bae1dSRodney W. Grimes */ 42093364c323SKonstantin Belousov size = src_entry->end - src_entry->start; 42108aef1712SMatthew Dillon if ((src_object = src_entry->object.vm_object) != NULL) { 42110cb2610eSMark Johnston if ((src_object->flags & OBJ_SWAP) != 0) { 4212886b9021SJeff Roberson vm_map_copy_swap_object(src_entry, dst_entry, 42134d987866SJeff Roberson size, fork_charge); 42144d987866SJeff Roberson /* May have split/collapsed, reload obj. */ 42154d987866SJeff Roberson src_object = src_entry->object.vm_object; 42164d987866SJeff Roberson } else { 42174d987866SJeff Roberson vm_object_reference(src_object); 4218c0877f10SJohn Dyson dst_entry->object.vm_object = src_object; 42193364c323SKonstantin Belousov } 42209a4ee196SKonstantin Belousov src_entry->eflags |= MAP_ENTRY_COW | 42219a4ee196SKonstantin Belousov MAP_ENTRY_NEEDS_COPY; 42229a4ee196SKonstantin Belousov dst_entry->eflags |= MAP_ENTRY_COW | 42239a4ee196SKonstantin Belousov MAP_ENTRY_NEEDS_COPY; 4224b18bfc3dSJohn Dyson dst_entry->offset = src_entry->offset; 4225fe7bcbafSKyle Evans if (src_entry->eflags & MAP_ENTRY_WRITECNT) { 422684110e7eSKonstantin Belousov /* 4227fe7bcbafSKyle Evans * MAP_ENTRY_WRITECNT cannot 422884110e7eSKonstantin Belousov * indicate write reference from 422984110e7eSKonstantin Belousov * src_entry, since the entry is 423084110e7eSKonstantin Belousov * marked as needs copy. Allocate a 423184110e7eSKonstantin Belousov * fake entry that is used to 4232fe7bcbafSKyle Evans * decrement object->un_pager writecount 423384110e7eSKonstantin Belousov * at the appropriate time. Attach 423484110e7eSKonstantin Belousov * fake_entry to the deferred list. 423584110e7eSKonstantin Belousov */ 423684110e7eSKonstantin Belousov fake_entry = vm_map_entry_create(dst_map); 4237fe7bcbafSKyle Evans fake_entry->eflags = MAP_ENTRY_WRITECNT; 4238fe7bcbafSKyle Evans src_entry->eflags &= ~MAP_ENTRY_WRITECNT; 423984110e7eSKonstantin Belousov vm_object_reference(src_object); 424084110e7eSKonstantin Belousov fake_entry->object.vm_object = src_object; 424184110e7eSKonstantin Belousov fake_entry->start = src_entry->start; 424284110e7eSKonstantin Belousov fake_entry->end = src_entry->end; 42437cdcf863SDoug Moore fake_entry->defer_next = 42447cdcf863SDoug Moore curthread->td_map_def_user; 424584110e7eSKonstantin Belousov curthread->td_map_def_user = fake_entry; 424684110e7eSKonstantin Belousov } 42470ec97ffcSKonstantin Belousov 42480ec97ffcSKonstantin Belousov pmap_copy(dst_map->pmap, src_map->pmap, 42490ec97ffcSKonstantin Belousov dst_entry->start, dst_entry->end - dst_entry->start, 42500ec97ffcSKonstantin Belousov src_entry->start); 4251b18bfc3dSJohn Dyson } else { 4252b18bfc3dSJohn Dyson dst_entry->object.vm_object = NULL; 4253db6c7c7fSKonstantin Belousov if ((dst_entry->eflags & MAP_ENTRY_GUARD) == 0) 4254b18bfc3dSJohn Dyson dst_entry->offset = 0; 4255ef694c1aSEdward Tomasz Napierala if (src_entry->cred != NULL) { 4256ef694c1aSEdward Tomasz Napierala dst_entry->cred = curthread->td_ucred; 4257ef694c1aSEdward Tomasz Napierala crhold(dst_entry->cred); 42583364c323SKonstantin Belousov *fork_charge += size; 42593364c323SKonstantin Belousov } 4260b18bfc3dSJohn Dyson } 42610d94caffSDavid Greenman } else { 4262df8bae1dSRodney W. Grimes /* 4263afaa41f6SAlan Cox * We don't want to make writeable wired pages copy-on-write. 4264afaa41f6SAlan Cox * Immediately copy these pages into the new map by simulating 4265afaa41f6SAlan Cox * page faults. The new pages are pageable. 4266df8bae1dSRodney W. Grimes */ 4267121fd461SKonstantin Belousov vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry, 4268121fd461SKonstantin Belousov fork_charge); 4269df8bae1dSRodney W. Grimes } 4270df8bae1dSRodney W. Grimes } 4271df8bae1dSRodney W. Grimes 4272df8bae1dSRodney W. Grimes /* 42732a7be1b6SBrian Feldman * vmspace_map_entry_forked: 42742a7be1b6SBrian Feldman * Update the newly-forked vmspace each time a map entry is inherited 42752a7be1b6SBrian Feldman * or copied. The values for vm_dsize and vm_tsize are approximate 42762a7be1b6SBrian Feldman * (and mostly-obsolete ideas in the face of mmap(2) et al.) 42772a7be1b6SBrian Feldman */ 42782a7be1b6SBrian Feldman static void 42792a7be1b6SBrian Feldman vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, 42802a7be1b6SBrian Feldman vm_map_entry_t entry) 42812a7be1b6SBrian Feldman { 42822a7be1b6SBrian Feldman vm_size_t entrysize; 42832a7be1b6SBrian Feldman vm_offset_t newend; 42842a7be1b6SBrian Feldman 428519bd0d9cSKonstantin Belousov if ((entry->eflags & MAP_ENTRY_GUARD) != 0) 428619bd0d9cSKonstantin Belousov return; 42872a7be1b6SBrian Feldman entrysize = entry->end - entry->start; 42882a7be1b6SBrian Feldman vm2->vm_map.size += entrysize; 42892a7be1b6SBrian Feldman if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) { 42902a7be1b6SBrian Feldman vm2->vm_ssize += btoc(entrysize); 42912a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_daddr && 42922a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) { 4293b351299cSAndrew Gallatin newend = MIN(entry->end, 42942a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)); 42952a7be1b6SBrian Feldman vm2->vm_dsize += btoc(newend - entry->start); 42962a7be1b6SBrian Feldman } else if (entry->start >= (vm_offset_t)vm1->vm_taddr && 42972a7be1b6SBrian Feldman entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) { 4298b351299cSAndrew Gallatin newend = MIN(entry->end, 42992a7be1b6SBrian Feldman (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); 43002a7be1b6SBrian Feldman vm2->vm_tsize += btoc(newend - entry->start); 43012a7be1b6SBrian Feldman } 43022a7be1b6SBrian Feldman } 43032a7be1b6SBrian Feldman 43042a7be1b6SBrian Feldman /* 4305df8bae1dSRodney W. Grimes * vmspace_fork: 4306df8bae1dSRodney W. Grimes * Create a new process vmspace structure and vm_map 4307df8bae1dSRodney W. Grimes * based on those of an existing process. The new map 4308df8bae1dSRodney W. Grimes * is based on the old map, according to the inheritance 4309df8bae1dSRodney W. Grimes * values on the regions in that map. 4310df8bae1dSRodney W. Grimes * 43112a7be1b6SBrian Feldman * XXX It might be worth coalescing the entries added to the new vmspace. 43122a7be1b6SBrian Feldman * 4313df8bae1dSRodney W. Grimes * The source map must not be locked. 4314df8bae1dSRodney W. Grimes */ 4315df8bae1dSRodney W. Grimes struct vmspace * 43163364c323SKonstantin Belousov vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) 4317df8bae1dSRodney W. Grimes { 4318c0877f10SJohn Dyson struct vmspace *vm2; 431979e53838SAlan Cox vm_map_t new_map, old_map; 432079e53838SAlan Cox vm_map_entry_t new_entry, old_entry; 4321de5f6a77SJohn Dyson vm_object_t object; 4322b8ebd99aSJohn Baldwin int error, locked __diagused; 432319bd0d9cSKonstantin Belousov vm_inherit_t inh; 4324df8bae1dSRodney W. Grimes 432579e53838SAlan Cox old_map = &vm1->vm_map; 432679e53838SAlan Cox /* Copy immutable fields of vm1 to vm2. */ 43276e00f3a3SKonstantin Belousov vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), 43286e00f3a3SKonstantin Belousov pmap_pinit); 432989b57fcfSKonstantin Belousov if (vm2 == NULL) 433079e53838SAlan Cox return (NULL); 4331e7a9df16SKonstantin Belousov 43322a7be1b6SBrian Feldman vm2->vm_taddr = vm1->vm_taddr; 43332a7be1b6SBrian Feldman vm2->vm_daddr = vm1->vm_daddr; 43342a7be1b6SBrian Feldman vm2->vm_maxsaddr = vm1->vm_maxsaddr; 433546d35d41SMark Johnston vm2->vm_stacktop = vm1->vm_stacktop; 4336361971fbSKornel Dulęba vm2->vm_shp_base = vm1->vm_shp_base; 433779e53838SAlan Cox vm_map_lock(old_map); 433879e53838SAlan Cox if (old_map->busy) 433979e53838SAlan Cox vm_map_wait_busy(old_map); 434079e53838SAlan Cox new_map = &vm2->vm_map; 43411fac7d7fSKonstantin Belousov locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ 43421fac7d7fSKonstantin Belousov KASSERT(locked, ("vmspace_fork: lock failed")); 4343df8bae1dSRodney W. Grimes 4344e7a9df16SKonstantin Belousov error = pmap_vmspace_copy(new_map->pmap, old_map->pmap); 4345e7a9df16SKonstantin Belousov if (error != 0) { 4346e7a9df16SKonstantin Belousov sx_xunlock(&old_map->lock); 4347e7a9df16SKonstantin Belousov sx_xunlock(&new_map->lock); 4348e7a9df16SKonstantin Belousov vm_map_process_deferred(); 4349e7a9df16SKonstantin Belousov vmspace_free(vm2); 4350e7a9df16SKonstantin Belousov return (NULL); 4351e7a9df16SKonstantin Belousov } 4352e7a9df16SKonstantin Belousov 4353fa50a355SKonstantin Belousov new_map->anon_loc = old_map->anon_loc; 43549402bb44SKonstantin Belousov new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART | 43551811c1e9SMark Johnston MAP_ASLR_STACK | MAP_WXORX); 4356e7a9df16SKonstantin Belousov 43572767c9f3SDoug Moore VM_MAP_ENTRY_FOREACH(old_entry, old_map) { 43582767c9f3SDoug Moore if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) 4359df8bae1dSRodney W. Grimes panic("vm_map_fork: encountered a submap"); 4360df8bae1dSRodney W. Grimes 436119bd0d9cSKonstantin Belousov inh = old_entry->inheritance; 436219bd0d9cSKonstantin Belousov if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 && 436319bd0d9cSKonstantin Belousov inh != VM_INHERIT_NONE) 436419bd0d9cSKonstantin Belousov inh = VM_INHERIT_COPY; 436519bd0d9cSKonstantin Belousov 436619bd0d9cSKonstantin Belousov switch (inh) { 4367df8bae1dSRodney W. Grimes case VM_INHERIT_NONE: 4368df8bae1dSRodney W. Grimes break; 4369df8bae1dSRodney W. Grimes 4370df8bae1dSRodney W. Grimes case VM_INHERIT_SHARE: 4371df8bae1dSRodney W. Grimes /* 43722767c9f3SDoug Moore * Clone the entry, creating the shared object if 43732767c9f3SDoug Moore * necessary. 4374fed9a903SJohn Dyson */ 4375fed9a903SJohn Dyson object = old_entry->object.vm_object; 4376fed9a903SJohn Dyson if (object == NULL) { 4377af1d6d6aSDoug Moore vm_map_entry_back(old_entry); 4378af1d6d6aSDoug Moore object = old_entry->object.vm_object; 43799a2f6362SAlan Cox } 43809a2f6362SAlan Cox 43819a2f6362SAlan Cox /* 43829a2f6362SAlan Cox * Add the reference before calling vm_object_shadow 43839a2f6362SAlan Cox * to insure that a shadow object is created. 43849a2f6362SAlan Cox */ 43859a2f6362SAlan Cox vm_object_reference(object); 43869a2f6362SAlan Cox if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 43875069bf57SJohn Dyson vm_object_shadow(&old_entry->object.vm_object, 43885069bf57SJohn Dyson &old_entry->offset, 438967388836SKonstantin Belousov old_entry->end - old_entry->start, 439067388836SKonstantin Belousov old_entry->cred, 4391d30344bdSIan Dowse /* Transfer the second reference too. */ 439267388836SKonstantin Belousov true); 439367388836SKonstantin Belousov old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 439467388836SKonstantin Belousov old_entry->cred = NULL; 43957fd10fb3SKonstantin Belousov 43967fd10fb3SKonstantin Belousov /* 439783ea714fSDoug Moore * As in vm_map_merged_neighbor_dispose(), 439883ea714fSDoug Moore * the vnode lock will not be acquired in 43997fd10fb3SKonstantin Belousov * this call to vm_object_deallocate(). 44007fd10fb3SKonstantin Belousov */ 4401d30344bdSIan Dowse vm_object_deallocate(object); 44025069bf57SJohn Dyson object = old_entry->object.vm_object; 440367388836SKonstantin Belousov } else { 440489f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 4405069e9bc1SDoug Rabson vm_object_clear_flag(object, OBJ_ONEMAPPING); 4406ef694c1aSEdward Tomasz Napierala if (old_entry->cred != NULL) { 440767388836SKonstantin Belousov KASSERT(object->cred == NULL, 440867388836SKonstantin Belousov ("vmspace_fork both cred")); 4409ef694c1aSEdward Tomasz Napierala object->cred = old_entry->cred; 441067388836SKonstantin Belousov object->charge = old_entry->end - 441167388836SKonstantin Belousov old_entry->start; 4412ef694c1aSEdward Tomasz Napierala old_entry->cred = NULL; 44133364c323SKonstantin Belousov } 4414b9781cf6SKonstantin Belousov 4415b9781cf6SKonstantin Belousov /* 4416b9781cf6SKonstantin Belousov * Assert the correct state of the vnode 4417b9781cf6SKonstantin Belousov * v_writecount while the object is locked, to 4418b9781cf6SKonstantin Belousov * not relock it later for the assertion 4419b9781cf6SKonstantin Belousov * correctness. 4420b9781cf6SKonstantin Belousov */ 4421fe7bcbafSKyle Evans if (old_entry->eflags & MAP_ENTRY_WRITECNT && 4422b9781cf6SKonstantin Belousov object->type == OBJT_VNODE) { 442367388836SKonstantin Belousov KASSERT(((struct vnode *)object-> 442467388836SKonstantin Belousov handle)->v_writecount > 0, 442567388836SKonstantin Belousov ("vmspace_fork: v_writecount %p", 442667388836SKonstantin Belousov object)); 442767388836SKonstantin Belousov KASSERT(object->un_pager.vnp. 442867388836SKonstantin Belousov writemappings > 0, 4429b9781cf6SKonstantin Belousov ("vmspace_fork: vnp.writecount %p", 4430b9781cf6SKonstantin Belousov object)); 4431b9781cf6SKonstantin Belousov } 443289f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 443367388836SKonstantin Belousov } 4434fed9a903SJohn Dyson 4435fed9a903SJohn Dyson /* 4436ad5fca3bSAlan Cox * Clone the entry, referencing the shared object. 4437df8bae1dSRodney W. Grimes */ 4438df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 4439df8bae1dSRodney W. Grimes *new_entry = *old_entry; 44409f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 44419f6acfd1SKonstantin Belousov MAP_ENTRY_IN_TRANSITION); 44420acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 4443df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 4444fe7bcbafSKyle Evans if (new_entry->eflags & MAP_ENTRY_WRITECNT) { 4445fe7bcbafSKyle Evans vm_pager_update_writecount(object, 444684110e7eSKonstantin Belousov new_entry->start, new_entry->end); 444784110e7eSKonstantin Belousov } 444878022527SKonstantin Belousov vm_map_entry_set_vnode_text(new_entry, true); 4449df8bae1dSRodney W. Grimes 4450df8bae1dSRodney W. Grimes /* 44510d94caffSDavid Greenman * Insert the entry into the new map -- we know we're 44520d94caffSDavid Greenman * inserting at the end of the new map. 4453df8bae1dSRodney W. Grimes */ 44549f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 44552a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 4456df8bae1dSRodney W. Grimes 4457df8bae1dSRodney W. Grimes /* 4458df8bae1dSRodney W. Grimes * Update the physical map 4459df8bae1dSRodney W. Grimes */ 4460df8bae1dSRodney W. Grimes pmap_copy(new_map->pmap, old_map->pmap, 4461df8bae1dSRodney W. Grimes new_entry->start, 4462df8bae1dSRodney W. Grimes (old_entry->end - old_entry->start), 4463df8bae1dSRodney W. Grimes old_entry->start); 4464df8bae1dSRodney W. Grimes break; 4465df8bae1dSRodney W. Grimes 4466df8bae1dSRodney W. Grimes case VM_INHERIT_COPY: 4467df8bae1dSRodney W. Grimes /* 4468df8bae1dSRodney W. Grimes * Clone the entry and link into the map. 4469df8bae1dSRodney W. Grimes */ 4470df8bae1dSRodney W. Grimes new_entry = vm_map_entry_create(new_map); 4471df8bae1dSRodney W. Grimes *new_entry = *old_entry; 447284110e7eSKonstantin Belousov /* 447384110e7eSKonstantin Belousov * Copied entry is COW over the old object. 447484110e7eSKonstantin Belousov */ 44759f6acfd1SKonstantin Belousov new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 4476fe7bcbafSKyle Evans MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_WRITECNT); 44770acea7dfSKonstantin Belousov new_entry->wiring_thread = NULL; 4478df8bae1dSRodney W. Grimes new_entry->wired_count = 0; 4479df8bae1dSRodney W. Grimes new_entry->object.vm_object = NULL; 4480ef694c1aSEdward Tomasz Napierala new_entry->cred = NULL; 44819f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 44822a7be1b6SBrian Feldman vmspace_map_entry_forked(vm1, vm2, new_entry); 4483bd7e5f99SJohn Dyson vm_map_copy_entry(old_map, new_map, old_entry, 44843364c323SKonstantin Belousov new_entry, fork_charge); 448578022527SKonstantin Belousov vm_map_entry_set_vnode_text(new_entry, true); 4486df8bae1dSRodney W. Grimes break; 448778d7964bSXin LI 448878d7964bSXin LI case VM_INHERIT_ZERO: 448978d7964bSXin LI /* 449078d7964bSXin LI * Create a new anonymous mapping entry modelled from 449178d7964bSXin LI * the old one. 449278d7964bSXin LI */ 449378d7964bSXin LI new_entry = vm_map_entry_create(new_map); 449478d7964bSXin LI memset(new_entry, 0, sizeof(*new_entry)); 449578d7964bSXin LI 449678d7964bSXin LI new_entry->start = old_entry->start; 449778d7964bSXin LI new_entry->end = old_entry->end; 449878d7964bSXin LI new_entry->eflags = old_entry->eflags & 449978d7964bSXin LI ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | 4500e2e80fb3SKonstantin Belousov MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC | 4501e2e80fb3SKonstantin Belousov MAP_ENTRY_SPLIT_BOUNDARY_MASK); 450278d7964bSXin LI new_entry->protection = old_entry->protection; 450378d7964bSXin LI new_entry->max_protection = old_entry->max_protection; 450478d7964bSXin LI new_entry->inheritance = VM_INHERIT_ZERO; 450578d7964bSXin LI 45069f701172SKonstantin Belousov vm_map_entry_link(new_map, new_entry); 450778d7964bSXin LI vmspace_map_entry_forked(vm1, vm2, new_entry); 450878d7964bSXin LI 450978d7964bSXin LI new_entry->cred = curthread->td_ucred; 451078d7964bSXin LI crhold(new_entry->cred); 451178d7964bSXin LI *fork_charge += (new_entry->end - new_entry->start); 451278d7964bSXin LI 451378d7964bSXin LI break; 4514df8bae1dSRodney W. Grimes } 4515df8bae1dSRodney W. Grimes } 451684110e7eSKonstantin Belousov /* 451784110e7eSKonstantin Belousov * Use inlined vm_map_unlock() to postpone handling the deferred 451884110e7eSKonstantin Belousov * map entries, which cannot be done until both old_map and 451984110e7eSKonstantin Belousov * new_map locks are released. 452084110e7eSKonstantin Belousov */ 452184110e7eSKonstantin Belousov sx_xunlock(&old_map->lock); 452284110e7eSKonstantin Belousov sx_xunlock(&new_map->lock); 452384110e7eSKonstantin Belousov vm_map_process_deferred(); 4524df8bae1dSRodney W. Grimes 4525df8bae1dSRodney W. Grimes return (vm2); 4526df8bae1dSRodney W. Grimes } 4527df8bae1dSRodney W. Grimes 45288056df6eSAlan Cox /* 45298056df6eSAlan Cox * Create a process's stack for exec_new_vmspace(). This function is never 45308056df6eSAlan Cox * asked to wire the newly created stack. 45318056df6eSAlan Cox */ 453294f7e29aSAlan Cox int 453394f7e29aSAlan Cox vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 453494f7e29aSAlan Cox vm_prot_t prot, vm_prot_t max, int cow) 453594f7e29aSAlan Cox { 45364648ba0aSKonstantin Belousov vm_size_t growsize, init_ssize; 45378056df6eSAlan Cox rlim_t vmemlim; 45384648ba0aSKonstantin Belousov int rv; 45394648ba0aSKonstantin Belousov 45408056df6eSAlan Cox MPASS((map->flags & MAP_WIREFUTURE) == 0); 45414648ba0aSKonstantin Belousov growsize = sgrowsiz; 45424648ba0aSKonstantin Belousov init_ssize = (max_ssize < growsize) ? max_ssize : growsize; 45434648ba0aSKonstantin Belousov vm_map_lock(map); 4544f6f6d240SMateusz Guzik vmemlim = lim_cur(curthread, RLIMIT_VMEM); 45454648ba0aSKonstantin Belousov /* If we would blow our VMEM resource limit, no go */ 45464648ba0aSKonstantin Belousov if (map->size + init_ssize > vmemlim) { 45474648ba0aSKonstantin Belousov rv = KERN_NO_SPACE; 45484648ba0aSKonstantin Belousov goto out; 45494648ba0aSKonstantin Belousov } 4550e1f92cccSAlan Cox rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot, 45514648ba0aSKonstantin Belousov max, cow); 45524648ba0aSKonstantin Belousov out: 45534648ba0aSKonstantin Belousov vm_map_unlock(map); 45544648ba0aSKonstantin Belousov return (rv); 45554648ba0aSKonstantin Belousov } 45564648ba0aSKonstantin Belousov 455719f49ad3SKonstantin Belousov static int stack_guard_page = 1; 455819f49ad3SKonstantin Belousov SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN, 455919f49ad3SKonstantin Belousov &stack_guard_page, 0, 456019f49ad3SKonstantin Belousov "Specifies the number of guard pages for a stack that grows"); 456119f49ad3SKonstantin Belousov 45624648ba0aSKonstantin Belousov static int 45634648ba0aSKonstantin Belousov vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 45644648ba0aSKonstantin Belousov vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow) 45654648ba0aSKonstantin Belousov { 456621e45c30SKonstantin Belousov vm_map_entry_t gap_entry, new_entry, prev_entry; 456719bd0d9cSKonstantin Belousov vm_offset_t bot, gap_bot, gap_top, top; 456819f49ad3SKonstantin Belousov vm_size_t init_ssize, sgp; 4569fd75d710SMarcel Moolenaar int orient, rv; 457094f7e29aSAlan Cox 4571fd75d710SMarcel Moolenaar /* 4572fd75d710SMarcel Moolenaar * The stack orientation is piggybacked with the cow argument. 4573fd75d710SMarcel Moolenaar * Extract it into orient and mask the cow argument so that we 4574fd75d710SMarcel Moolenaar * don't pass it around further. 4575fd75d710SMarcel Moolenaar */ 4576fd75d710SMarcel Moolenaar orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP); 4577fd75d710SMarcel Moolenaar KASSERT(orient != 0, ("No stack grow direction")); 457819bd0d9cSKonstantin Belousov KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP), 457919bd0d9cSKonstantin Belousov ("bi-dir stack")); 4580fd75d710SMarcel Moolenaar 45810f1e6ec5SMark Johnston if (max_ssize == 0 || 45820f1e6ec5SMark Johnston !vm_map_range_valid(map, addrbos, addrbos + max_ssize)) 45839410cd7dSKonstantin Belousov return (KERN_INVALID_ADDRESS); 4584156e8654SKonstantin Belousov sgp = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 || 4585156e8654SKonstantin Belousov (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 : 4586fe69291fSKonstantin Belousov (vm_size_t)stack_guard_page * PAGE_SIZE; 45879410cd7dSKonstantin Belousov if (sgp >= max_ssize) 45889410cd7dSKonstantin Belousov return (KERN_INVALID_ARGUMENT); 4589fd75d710SMarcel Moolenaar 459019f49ad3SKonstantin Belousov init_ssize = growsize; 459119f49ad3SKonstantin Belousov if (max_ssize < init_ssize + sgp) 459219f49ad3SKonstantin Belousov init_ssize = max_ssize - sgp; 459394f7e29aSAlan Cox 459494f7e29aSAlan Cox /* If addr is already mapped, no go */ 4595d1d3f7e1SDoug Moore if (vm_map_lookup_entry(map, addrbos, &prev_entry)) 459694f7e29aSAlan Cox return (KERN_NO_SPACE); 4597a69ac174SMatthew Dillon 4598fd75d710SMarcel Moolenaar /* 4599763df3ecSPedro F. Giffuni * If we can't accommodate max_ssize in the current mapping, no go. 460094f7e29aSAlan Cox */ 46017cdcf863SDoug Moore if (vm_map_entry_succ(prev_entry)->start < addrbos + max_ssize) 460294f7e29aSAlan Cox return (KERN_NO_SPACE); 460394f7e29aSAlan Cox 4604fd75d710SMarcel Moolenaar /* 4605fd75d710SMarcel Moolenaar * We initially map a stack of only init_ssize. We will grow as 4606fd75d710SMarcel Moolenaar * needed later. Depending on the orientation of the stack (i.e. 4607fd75d710SMarcel Moolenaar * the grow direction) we either map at the top of the range, the 4608fd75d710SMarcel Moolenaar * bottom of the range or in the middle. 460994f7e29aSAlan Cox * 4610fd75d710SMarcel Moolenaar * Note: we would normally expect prot and max to be VM_PROT_ALL, 4611fd75d710SMarcel Moolenaar * and cow to be 0. Possibly we should eliminate these as input 4612fd75d710SMarcel Moolenaar * parameters, and just pass these values here in the insert call. 461394f7e29aSAlan Cox */ 461419bd0d9cSKonstantin Belousov if (orient == MAP_STACK_GROWS_DOWN) { 4615fd75d710SMarcel Moolenaar bot = addrbos + max_ssize - init_ssize; 4616fd75d710SMarcel Moolenaar top = bot + init_ssize; 461719bd0d9cSKonstantin Belousov gap_bot = addrbos; 461819bd0d9cSKonstantin Belousov gap_top = bot; 461919bd0d9cSKonstantin Belousov } else /* if (orient == MAP_STACK_GROWS_UP) */ { 462019bd0d9cSKonstantin Belousov bot = addrbos; 462119bd0d9cSKonstantin Belousov top = bot + init_ssize; 462219bd0d9cSKonstantin Belousov gap_bot = top; 462319bd0d9cSKonstantin Belousov gap_top = addrbos + max_ssize; 462419bd0d9cSKonstantin Belousov } 4625ba41b0deSKonstantin Belousov rv = vm_map_insert1(map, NULL, 0, bot, top, prot, max, cow, 4626ba41b0deSKonstantin Belousov &new_entry); 462719bd0d9cSKonstantin Belousov if (rv != KERN_SUCCESS) 462819bd0d9cSKonstantin Belousov return (rv); 462919bd0d9cSKonstantin Belousov KASSERT(new_entry->end == top || new_entry->start == bot, 463019bd0d9cSKonstantin Belousov ("Bad entry start/end for new stack entry")); 4631712efe66SAlan Cox KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 || 4632712efe66SAlan Cox (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0, 4633712efe66SAlan Cox ("new entry lacks MAP_ENTRY_GROWS_DOWN")); 4634712efe66SAlan Cox KASSERT((orient & MAP_STACK_GROWS_UP) == 0 || 4635712efe66SAlan Cox (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0, 4636712efe66SAlan Cox ("new entry lacks MAP_ENTRY_GROWS_UP")); 4637fe69291fSKonstantin Belousov if (gap_bot == gap_top) 4638fe69291fSKonstantin Belousov return (KERN_SUCCESS); 4639ba41b0deSKonstantin Belousov rv = vm_map_insert1(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE, 464019bd0d9cSKonstantin Belousov VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ? 4641ba41b0deSKonstantin Belousov MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP), &gap_entry); 4642a7751d32SKonstantin Belousov if (rv == KERN_SUCCESS) { 4643ba41b0deSKonstantin Belousov KASSERT((gap_entry->eflags & MAP_ENTRY_GUARD) != 0, 4644ba41b0deSKonstantin Belousov ("entry %p not gap %#x", gap_entry, gap_entry->eflags)); 4645ba41b0deSKonstantin Belousov KASSERT((gap_entry->eflags & (MAP_ENTRY_STACK_GAP_DN | 4646ba41b0deSKonstantin Belousov MAP_ENTRY_STACK_GAP_UP)) != 0, 4647ba41b0deSKonstantin Belousov ("entry %p not stack gap %#x", gap_entry, 4648ba41b0deSKonstantin Belousov gap_entry->eflags)); 4649ba41b0deSKonstantin Belousov 4650a7751d32SKonstantin Belousov /* 4651a7751d32SKonstantin Belousov * Gap can never successfully handle a fault, so 4652a7751d32SKonstantin Belousov * read-ahead logic is never used for it. Re-use 4653a7751d32SKonstantin Belousov * next_read of the gap entry to store 4654a7751d32SKonstantin Belousov * stack_guard_page for vm_map_growstack(). 465521e45c30SKonstantin Belousov * Similarly, since a gap cannot have a backing object, 465621e45c30SKonstantin Belousov * store the original stack protections in the 465721e45c30SKonstantin Belousov * object offset. 4658a7751d32SKonstantin Belousov */ 465921e45c30SKonstantin Belousov gap_entry->next_read = sgp; 4660aa928a52SKonstantin Belousov gap_entry->offset = prot | PROT_MAX(max); 4661a7751d32SKonstantin Belousov } else { 466219bd0d9cSKonstantin Belousov (void)vm_map_delete(map, bot, top); 4663a7751d32SKonstantin Belousov } 466494f7e29aSAlan Cox return (rv); 466594f7e29aSAlan Cox } 466694f7e29aSAlan Cox 466719bd0d9cSKonstantin Belousov /* 466819bd0d9cSKonstantin Belousov * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we 466919bd0d9cSKonstantin Belousov * successfully grow the stack. 467094f7e29aSAlan Cox */ 467119bd0d9cSKonstantin Belousov static int 467219bd0d9cSKonstantin Belousov vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) 467394f7e29aSAlan Cox { 467419bd0d9cSKonstantin Belousov vm_map_entry_t stack_entry; 467519bd0d9cSKonstantin Belousov struct proc *p; 467619bd0d9cSKonstantin Belousov struct vmspace *vm; 467719bd0d9cSKonstantin Belousov struct ucred *cred; 467819bd0d9cSKonstantin Belousov vm_offset_t gap_end, gap_start, grow_start; 4679b6037edbSKonstantin Belousov vm_size_t grow_amount, guard, max_grow, sgp; 4680aa928a52SKonstantin Belousov vm_prot_t prot, max; 46817e19eda4SAndrey Zonov rlim_t lmemlim, stacklim, vmemlim; 4682b8ebd99aSJohn Baldwin int rv, rv1 __diagused; 468319bd0d9cSKonstantin Belousov bool gap_deleted, grow_down, is_procstack; 46841ba5ad42SEdward Tomasz Napierala #ifdef notyet 46851ba5ad42SEdward Tomasz Napierala uint64_t limit; 46861ba5ad42SEdward Tomasz Napierala #endif 4687afcc55f3SEdward Tomasz Napierala #ifdef RACCT 4688b8ebd99aSJohn Baldwin int error __diagused; 4689afcc55f3SEdward Tomasz Napierala #endif 469023955314SAlfred Perlstein 469119bd0d9cSKonstantin Belousov p = curproc; 469219bd0d9cSKonstantin Belousov vm = p->p_vmspace; 4693eb5ea878SKonstantin Belousov 4694eb5ea878SKonstantin Belousov /* 4695eb5ea878SKonstantin Belousov * Disallow stack growth when the access is performed by a 4696eb5ea878SKonstantin Belousov * debugger or AIO daemon. The reason is that the wrong 4697eb5ea878SKonstantin Belousov * resource limits are applied. 4698eb5ea878SKonstantin Belousov */ 469910ae16c7SKonstantin Belousov if (p != initproc && (map != &p->p_vmspace->vm_map || 470010ae16c7SKonstantin Belousov p->p_textvp == NULL)) 4701f758aaddSKonstantin Belousov return (KERN_FAILURE); 4702eb5ea878SKonstantin Belousov 470319bd0d9cSKonstantin Belousov MPASS(!map->system_map); 470419bd0d9cSKonstantin Belousov 4705f6f6d240SMateusz Guzik lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK); 4706f6f6d240SMateusz Guzik stacklim = lim_cur(curthread, RLIMIT_STACK); 4707f6f6d240SMateusz Guzik vmemlim = lim_cur(curthread, RLIMIT_VMEM); 470819bd0d9cSKonstantin Belousov retry: 470919bd0d9cSKonstantin Belousov /* If addr is not in a hole for a stack grow area, no need to grow. */ 4710d1d3f7e1SDoug Moore if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry)) 471119bd0d9cSKonstantin Belousov return (KERN_FAILURE); 471219bd0d9cSKonstantin Belousov if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0) 47130cddd8f0SMatthew Dillon return (KERN_SUCCESS); 471419bd0d9cSKonstantin Belousov if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) { 47157cdcf863SDoug Moore stack_entry = vm_map_entry_succ(gap_entry); 471619bd0d9cSKonstantin Belousov if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 || 471719bd0d9cSKonstantin Belousov stack_entry->start != gap_entry->end) 471819bd0d9cSKonstantin Belousov return (KERN_FAILURE); 471919bd0d9cSKonstantin Belousov grow_amount = round_page(stack_entry->start - addr); 472019bd0d9cSKonstantin Belousov grow_down = true; 472119bd0d9cSKonstantin Belousov } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) { 47227cdcf863SDoug Moore stack_entry = vm_map_entry_pred(gap_entry); 472319bd0d9cSKonstantin Belousov if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 || 472419bd0d9cSKonstantin Belousov stack_entry->end != gap_entry->start) 472519bd0d9cSKonstantin Belousov return (KERN_FAILURE); 472619bd0d9cSKonstantin Belousov grow_amount = round_page(addr + 1 - stack_entry->end); 472719bd0d9cSKonstantin Belousov grow_down = false; 4728b21a0008SMarcel Moolenaar } else { 472919bd0d9cSKonstantin Belousov return (KERN_FAILURE); 4730b21a0008SMarcel Moolenaar } 4731156e8654SKonstantin Belousov guard = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 || 4732156e8654SKonstantin Belousov (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 : 4733fe69291fSKonstantin Belousov gap_entry->next_read; 4734201f03b8SAlan Cox max_grow = gap_entry->end - gap_entry->start; 4735201f03b8SAlan Cox if (guard > max_grow) 4736201f03b8SAlan Cox return (KERN_NO_SPACE); 4737201f03b8SAlan Cox max_grow -= guard; 473819bd0d9cSKonstantin Belousov if (grow_amount > max_grow) 47390cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 474094f7e29aSAlan Cox 4741b21a0008SMarcel Moolenaar /* 4742b21a0008SMarcel Moolenaar * If this is the main process stack, see if we're over the stack 4743b21a0008SMarcel Moolenaar * limit. 474494f7e29aSAlan Cox */ 474519bd0d9cSKonstantin Belousov is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr && 4746becaf643SJohn Baldwin addr < (vm_offset_t)vm->vm_stacktop; 474719bd0d9cSKonstantin Belousov if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) 47480cddd8f0SMatthew Dillon return (KERN_NO_SPACE); 474919bd0d9cSKonstantin Belousov 4750afcc55f3SEdward Tomasz Napierala #ifdef RACCT 47514b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 47521ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 47534b5c9cf6SEdward Tomasz Napierala if (is_procstack && racct_set(p, RACCT_STACK, 47544b5c9cf6SEdward Tomasz Napierala ctob(vm->vm_ssize) + grow_amount)) { 47551ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 47561ba5ad42SEdward Tomasz Napierala return (KERN_NO_SPACE); 47571ba5ad42SEdward Tomasz Napierala } 47581ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 47594b5c9cf6SEdward Tomasz Napierala } 4760afcc55f3SEdward Tomasz Napierala #endif 476194f7e29aSAlan Cox 476219bd0d9cSKonstantin Belousov grow_amount = roundup(grow_amount, sgrowsiz); 476319bd0d9cSKonstantin Belousov if (grow_amount > max_grow) 476419bd0d9cSKonstantin Belousov grow_amount = max_grow; 476591d5354aSJohn Baldwin if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { 4766e4826248SAlan Cox grow_amount = trunc_page((vm_size_t)stacklim) - 4767e4826248SAlan Cox ctob(vm->vm_ssize); 476894f7e29aSAlan Cox } 476919bd0d9cSKonstantin Belousov 47701ba5ad42SEdward Tomasz Napierala #ifdef notyet 47711ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 47721ba5ad42SEdward Tomasz Napierala limit = racct_get_available(p, RACCT_STACK); 47731ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 47741ba5ad42SEdward Tomasz Napierala if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) 47751ba5ad42SEdward Tomasz Napierala grow_amount = limit - ctob(vm->vm_ssize); 47761ba5ad42SEdward Tomasz Napierala #endif 477719bd0d9cSKonstantin Belousov 477819bd0d9cSKonstantin Belousov if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { 47793ac7d297SAndrey Zonov if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { 47807e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 47817e19eda4SAndrey Zonov goto out; 47827e19eda4SAndrey Zonov } 47837e19eda4SAndrey Zonov #ifdef RACCT 47844b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 47857e19eda4SAndrey Zonov PROC_LOCK(p); 47867e19eda4SAndrey Zonov if (racct_set(p, RACCT_MEMLOCK, 47873ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap)) + grow_amount)) { 47887e19eda4SAndrey Zonov PROC_UNLOCK(p); 47897e19eda4SAndrey Zonov rv = KERN_NO_SPACE; 47907e19eda4SAndrey Zonov goto out; 47917e19eda4SAndrey Zonov } 47927e19eda4SAndrey Zonov PROC_UNLOCK(p); 47934b5c9cf6SEdward Tomasz Napierala } 47947e19eda4SAndrey Zonov #endif 47957e19eda4SAndrey Zonov } 479619bd0d9cSKonstantin Belousov 4797a69ac174SMatthew Dillon /* If we would blow our VMEM resource limit, no go */ 479891d5354aSJohn Baldwin if (map->size + grow_amount > vmemlim) { 47991ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 48001ba5ad42SEdward Tomasz Napierala goto out; 4801a69ac174SMatthew Dillon } 4802afcc55f3SEdward Tomasz Napierala #ifdef RACCT 48034b5c9cf6SEdward Tomasz Napierala if (racct_enable) { 48041ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 48051ba5ad42SEdward Tomasz Napierala if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) { 48061ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 48071ba5ad42SEdward Tomasz Napierala rv = KERN_NO_SPACE; 48081ba5ad42SEdward Tomasz Napierala goto out; 48091ba5ad42SEdward Tomasz Napierala } 48101ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 48114b5c9cf6SEdward Tomasz Napierala } 4812afcc55f3SEdward Tomasz Napierala #endif 4813a69ac174SMatthew Dillon 481419bd0d9cSKonstantin Belousov if (vm_map_lock_upgrade(map)) { 481519bd0d9cSKonstantin Belousov gap_entry = NULL; 481619bd0d9cSKonstantin Belousov vm_map_lock_read(map); 481719bd0d9cSKonstantin Belousov goto retry; 481894f7e29aSAlan Cox } 481994f7e29aSAlan Cox 482019bd0d9cSKonstantin Belousov if (grow_down) { 482121e45c30SKonstantin Belousov /* 482221e45c30SKonstantin Belousov * The gap_entry "offset" field is overloaded. See 482321e45c30SKonstantin Belousov * vm_map_stack_locked(). 482421e45c30SKonstantin Belousov */ 4825aa928a52SKonstantin Belousov prot = PROT_EXTRACT(gap_entry->offset); 4826aa928a52SKonstantin Belousov max = PROT_MAX_EXTRACT(gap_entry->offset); 4827b6037edbSKonstantin Belousov sgp = gap_entry->next_read; 482821e45c30SKonstantin Belousov 482919bd0d9cSKonstantin Belousov grow_start = gap_entry->end - grow_amount; 483019bd0d9cSKonstantin Belousov if (gap_entry->start + grow_amount == gap_entry->end) { 483119bd0d9cSKonstantin Belousov gap_start = gap_entry->start; 483219bd0d9cSKonstantin Belousov gap_end = gap_entry->end; 483319bd0d9cSKonstantin Belousov vm_map_entry_delete(map, gap_entry); 483419bd0d9cSKonstantin Belousov gap_deleted = true; 483519bd0d9cSKonstantin Belousov } else { 483619bd0d9cSKonstantin Belousov MPASS(gap_entry->start < gap_entry->end - grow_amount); 4837fa581662SDoug Moore vm_map_entry_resize(map, gap_entry, -grow_amount); 483819bd0d9cSKonstantin Belousov gap_deleted = false; 483919bd0d9cSKonstantin Belousov } 484019bd0d9cSKonstantin Belousov rv = vm_map_insert(map, NULL, 0, grow_start, 4841aa928a52SKonstantin Belousov grow_start + grow_amount, prot, max, MAP_STACK_GROWS_DOWN); 484219bd0d9cSKonstantin Belousov if (rv != KERN_SUCCESS) { 484319bd0d9cSKonstantin Belousov if (gap_deleted) { 4844b6037edbSKonstantin Belousov rv1 = vm_map_insert1(map, NULL, 0, gap_start, 484519bd0d9cSKonstantin Belousov gap_end, VM_PROT_NONE, VM_PROT_NONE, 4846b6037edbSKonstantin Belousov MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN, 4847b6037edbSKonstantin Belousov &gap_entry); 484819bd0d9cSKonstantin Belousov MPASS(rv1 == KERN_SUCCESS); 4849b6037edbSKonstantin Belousov gap_entry->next_read = sgp; 4850b6037edbSKonstantin Belousov gap_entry->offset = prot | PROT_MAX(max); 48511895f520SDoug Moore } else 4852fa581662SDoug Moore vm_map_entry_resize(map, gap_entry, 48531895f520SDoug Moore grow_amount); 485494f7e29aSAlan Cox } 4855b21a0008SMarcel Moolenaar } else { 485619bd0d9cSKonstantin Belousov grow_start = stack_entry->end; 4857ef694c1aSEdward Tomasz Napierala cred = stack_entry->cred; 4858ef694c1aSEdward Tomasz Napierala if (cred == NULL && stack_entry->object.vm_object != NULL) 4859ef694c1aSEdward Tomasz Napierala cred = stack_entry->object.vm_object->cred; 4860ef694c1aSEdward Tomasz Napierala if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred)) 48613364c323SKonstantin Belousov rv = KERN_NO_SPACE; 4862b21a0008SMarcel Moolenaar /* Grow the underlying object if applicable. */ 48633364c323SKonstantin Belousov else if (stack_entry->object.vm_object == NULL || 4864b21a0008SMarcel Moolenaar vm_object_coalesce(stack_entry->object.vm_object, 486557a21abaSAlan Cox stack_entry->offset, 4866b21a0008SMarcel Moolenaar (vm_size_t)(stack_entry->end - stack_entry->start), 4867fa581662SDoug Moore grow_amount, cred != NULL)) { 4868fa581662SDoug Moore if (gap_entry->start + grow_amount == gap_entry->end) { 486919bd0d9cSKonstantin Belousov vm_map_entry_delete(map, gap_entry); 4870fa581662SDoug Moore vm_map_entry_resize(map, stack_entry, 4871fa581662SDoug Moore grow_amount); 4872fa581662SDoug Moore } else { 487319bd0d9cSKonstantin Belousov gap_entry->start += grow_amount; 4874fa581662SDoug Moore stack_entry->end += grow_amount; 4875fa581662SDoug Moore } 487619bd0d9cSKonstantin Belousov map->size += grow_amount; 4877b21a0008SMarcel Moolenaar rv = KERN_SUCCESS; 4878b21a0008SMarcel Moolenaar } else 4879b21a0008SMarcel Moolenaar rv = KERN_FAILURE; 4880b21a0008SMarcel Moolenaar } 4881b21a0008SMarcel Moolenaar if (rv == KERN_SUCCESS && is_procstack) 4882b21a0008SMarcel Moolenaar vm->vm_ssize += btoc(grow_amount); 4883b21a0008SMarcel Moolenaar 4884abd498aaSBruce M Simpson /* 4885abd498aaSBruce M Simpson * Heed the MAP_WIREFUTURE flag if it was set for this process. 4886abd498aaSBruce M Simpson */ 488719bd0d9cSKonstantin Belousov if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { 488854a3a114SMark Johnston rv = vm_map_wire_locked(map, grow_start, 488954a3a114SMark Johnston grow_start + grow_amount, 4890212e02c8SKonstantin Belousov VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 489154a3a114SMark Johnston } 489219bd0d9cSKonstantin Belousov vm_map_lock_downgrade(map); 4893abd498aaSBruce M Simpson 48941ba5ad42SEdward Tomasz Napierala out: 4895afcc55f3SEdward Tomasz Napierala #ifdef RACCT 48964b5c9cf6SEdward Tomasz Napierala if (racct_enable && rv != KERN_SUCCESS) { 48971ba5ad42SEdward Tomasz Napierala PROC_LOCK(p); 48981ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_VMEM, map->size); 48991ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); 49007e19eda4SAndrey Zonov if (!old_mlock) { 49017e19eda4SAndrey Zonov error = racct_set(p, RACCT_MEMLOCK, 49023ac7d297SAndrey Zonov ptoa(pmap_wired_count(map->pmap))); 49037e19eda4SAndrey Zonov KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); 49047e19eda4SAndrey Zonov } 49051ba5ad42SEdward Tomasz Napierala error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); 49061ba5ad42SEdward Tomasz Napierala KASSERT(error == 0, ("decreasing RACCT_STACK failed")); 49071ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(p); 49081ba5ad42SEdward Tomasz Napierala } 4909afcc55f3SEdward Tomasz Napierala #endif 49101ba5ad42SEdward Tomasz Napierala 49110cddd8f0SMatthew Dillon return (rv); 491294f7e29aSAlan Cox } 491394f7e29aSAlan Cox 4914df8bae1dSRodney W. Grimes /* 49155856e12eSJohn Dyson * Unshare the specified VM space for exec. If other processes are 49165856e12eSJohn Dyson * mapped to it, then create a new one. The new vmspace is null. 49175856e12eSJohn Dyson */ 491889b57fcfSKonstantin Belousov int 49193ebc1248SPeter Wemm vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) 49201b40f8c0SMatthew Dillon { 49215856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 49225856e12eSJohn Dyson struct vmspace *newvmspace; 49235856e12eSJohn Dyson 49247032434eSKonstantin Belousov KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0, 49257032434eSKonstantin Belousov ("vmspace_exec recursed")); 49266e00f3a3SKonstantin Belousov newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit); 492789b57fcfSKonstantin Belousov if (newvmspace == NULL) 492889b57fcfSKonstantin Belousov return (ENOMEM); 492951ab6c28SAlan Cox newvmspace->vm_swrss = oldvmspace->vm_swrss; 49305856e12eSJohn Dyson /* 49315856e12eSJohn Dyson * This code is written like this for prototype purposes. The 49325856e12eSJohn Dyson * goal is to avoid running down the vmspace here, but let the 49335856e12eSJohn Dyson * other process's that are still using the vmspace to finally 49345856e12eSJohn Dyson * run it down. Even though there is little or no chance of blocking 49355856e12eSJohn Dyson * here, it is a good idea to keep this form for future mods. 49365856e12eSJohn Dyson */ 493757051fdcSTor Egge PROC_VMSPACE_LOCK(p); 49385856e12eSJohn Dyson p->p_vmspace = newvmspace; 493957051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 49406617724cSJeff Roberson if (p == curthread->td_proc) 4941b40ce416SJulian Elischer pmap_activate(curthread); 49427032434eSKonstantin Belousov curthread->td_pflags |= TDP_EXECVMSPC; 494389b57fcfSKonstantin Belousov return (0); 49445856e12eSJohn Dyson } 49455856e12eSJohn Dyson 49465856e12eSJohn Dyson /* 49475856e12eSJohn Dyson * Unshare the specified VM space for forcing COW. This 49485856e12eSJohn Dyson * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 49495856e12eSJohn Dyson */ 495089b57fcfSKonstantin Belousov int 49511b40f8c0SMatthew Dillon vmspace_unshare(struct proc *p) 49521b40f8c0SMatthew Dillon { 49535856e12eSJohn Dyson struct vmspace *oldvmspace = p->p_vmspace; 49545856e12eSJohn Dyson struct vmspace *newvmspace; 49553364c323SKonstantin Belousov vm_ooffset_t fork_charge; 49565856e12eSJohn Dyson 49579246b309SMark Johnston /* 49589246b309SMark Johnston * The caller is responsible for ensuring that the reference count 49599246b309SMark Johnston * cannot concurrently transition 1 -> 2. 49609246b309SMark Johnston */ 4961f7db0c95SMark Johnston if (refcount_load(&oldvmspace->vm_refcnt) == 1) 496289b57fcfSKonstantin Belousov return (0); 49633364c323SKonstantin Belousov fork_charge = 0; 49643364c323SKonstantin Belousov newvmspace = vmspace_fork(oldvmspace, &fork_charge); 496589b57fcfSKonstantin Belousov if (newvmspace == NULL) 496689b57fcfSKonstantin Belousov return (ENOMEM); 4967ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) { 49683364c323SKonstantin Belousov vmspace_free(newvmspace); 49693364c323SKonstantin Belousov return (ENOMEM); 49703364c323SKonstantin Belousov } 497157051fdcSTor Egge PROC_VMSPACE_LOCK(p); 49725856e12eSJohn Dyson p->p_vmspace = newvmspace; 497357051fdcSTor Egge PROC_VMSPACE_UNLOCK(p); 49746617724cSJeff Roberson if (p == curthread->td_proc) 4975b40ce416SJulian Elischer pmap_activate(curthread); 4976b56ef1c1SJohn Baldwin vmspace_free(oldvmspace); 497789b57fcfSKonstantin Belousov return (0); 49785856e12eSJohn Dyson } 49795856e12eSJohn Dyson 49805856e12eSJohn Dyson /* 4981df8bae1dSRodney W. Grimes * vm_map_lookup: 4982df8bae1dSRodney W. Grimes * 4983df8bae1dSRodney W. Grimes * Finds the VM object, offset, and 4984df8bae1dSRodney W. Grimes * protection for a given virtual address in the 4985df8bae1dSRodney W. Grimes * specified map, assuming a page fault of the 4986df8bae1dSRodney W. Grimes * type specified. 4987df8bae1dSRodney W. Grimes * 4988df8bae1dSRodney W. Grimes * Leaves the map in question locked for read; return 4989df8bae1dSRodney W. Grimes * values are guaranteed until a vm_map_lookup_done 4990df8bae1dSRodney W. Grimes * call is performed. Note that the map argument 4991df8bae1dSRodney W. Grimes * is in/out; the returned map must be used in 4992df8bae1dSRodney W. Grimes * the call to vm_map_lookup_done. 4993df8bae1dSRodney W. Grimes * 4994df8bae1dSRodney W. Grimes * A handle (out_entry) is returned for use in 4995df8bae1dSRodney W. Grimes * vm_map_lookup_done, to make that fast. 4996df8bae1dSRodney W. Grimes * 4997df8bae1dSRodney W. Grimes * If a lookup is requested with "write protection" 4998df8bae1dSRodney W. Grimes * specified, the map may be changed to perform virtual 4999df8bae1dSRodney W. Grimes * copying operations, although the data referenced will 5000df8bae1dSRodney W. Grimes * remain the same. 5001df8bae1dSRodney W. Grimes */ 5002df8bae1dSRodney W. Grimes int 5003b9dcd593SBruce Evans vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 5004b9dcd593SBruce Evans vm_offset_t vaddr, 500547221757SJohn Dyson vm_prot_t fault_typea, 5006b9dcd593SBruce Evans vm_map_entry_t *out_entry, /* OUT */ 5007b9dcd593SBruce Evans vm_object_t *object, /* OUT */ 5008b9dcd593SBruce Evans vm_pindex_t *pindex, /* OUT */ 5009b9dcd593SBruce Evans vm_prot_t *out_prot, /* OUT */ 50102d8acc0fSJohn Dyson boolean_t *wired) /* OUT */ 5011df8bae1dSRodney W. Grimes { 5012c0877f10SJohn Dyson vm_map_entry_t entry; 5013c0877f10SJohn Dyson vm_map_t map = *var_map; 5014c0877f10SJohn Dyson vm_prot_t prot; 5015a6f21d15SMark Johnston vm_prot_t fault_type; 50163364c323SKonstantin Belousov vm_object_t eobject; 50170cc74f14SAlan Cox vm_size_t size; 5018ef694c1aSEdward Tomasz Napierala struct ucred *cred; 5019df8bae1dSRodney W. Grimes 502019bd0d9cSKonstantin Belousov RetryLookup: 5021df8bae1dSRodney W. Grimes 5022df8bae1dSRodney W. Grimes vm_map_lock_read(map); 5023df8bae1dSRodney W. Grimes 502419bd0d9cSKonstantin Belousov RetryLookupLocked: 5025df8bae1dSRodney W. Grimes /* 50264c3ef59eSAlan Cox * Lookup the faulting address. 5027df8bae1dSRodney W. Grimes */ 5028095104acSAlan Cox if (!vm_map_lookup_entry(map, vaddr, out_entry)) { 5029095104acSAlan Cox vm_map_unlock_read(map); 5030095104acSAlan Cox return (KERN_INVALID_ADDRESS); 5031095104acSAlan Cox } 5032df8bae1dSRodney W. Grimes 50334e94f402SAlan Cox entry = *out_entry; 5034b7b2aac2SJohn Dyson 5035df8bae1dSRodney W. Grimes /* 5036df8bae1dSRodney W. Grimes * Handle submaps. 5037df8bae1dSRodney W. Grimes */ 5038afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 5039df8bae1dSRodney W. Grimes vm_map_t old_map = map; 5040df8bae1dSRodney W. Grimes 5041df8bae1dSRodney W. Grimes *var_map = map = entry->object.sub_map; 5042df8bae1dSRodney W. Grimes vm_map_unlock_read(old_map); 5043df8bae1dSRodney W. Grimes goto RetryLookup; 5044df8bae1dSRodney W. Grimes } 5045a04c970aSJohn Dyson 5046df8bae1dSRodney W. Grimes /* 50470d94caffSDavid Greenman * Check whether this task is allowed to have this page. 5048df8bae1dSRodney W. Grimes */ 5049df8bae1dSRodney W. Grimes prot = entry->protection; 505019bd0d9cSKonstantin Belousov if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) { 505119bd0d9cSKonstantin Belousov fault_typea &= ~VM_PROT_FAULT_LOOKUP; 505219bd0d9cSKonstantin Belousov if (prot == VM_PROT_NONE && map != kernel_map && 505319bd0d9cSKonstantin Belousov (entry->eflags & MAP_ENTRY_GUARD) != 0 && 505419bd0d9cSKonstantin Belousov (entry->eflags & (MAP_ENTRY_STACK_GAP_DN | 505519bd0d9cSKonstantin Belousov MAP_ENTRY_STACK_GAP_UP)) != 0 && 505619bd0d9cSKonstantin Belousov vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS) 505719bd0d9cSKonstantin Belousov goto RetryLookupLocked; 505819bd0d9cSKonstantin Belousov } 5059a6f21d15SMark Johnston fault_type = fault_typea & VM_PROT_ALL; 50602db65ab4SAlan Cox if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { 5061095104acSAlan Cox vm_map_unlock_read(map); 5062095104acSAlan Cox return (KERN_PROTECTION_FAILURE); 506347221757SJohn Dyson } 5064b8db9776SKonstantin Belousov KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags & 5065b8db9776SKonstantin Belousov (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) != 5066b8db9776SKonstantin Belousov (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY), 5067b8db9776SKonstantin Belousov ("entry %p flags %x", entry, entry->eflags)); 50685b3e0257SDag-Erling Smørgrav if ((fault_typea & VM_PROT_COPY) != 0 && 50695b3e0257SDag-Erling Smørgrav (entry->max_protection & VM_PROT_WRITE) == 0 && 50705b3e0257SDag-Erling Smørgrav (entry->eflags & MAP_ENTRY_COW) == 0) { 50715b3e0257SDag-Erling Smørgrav vm_map_unlock_read(map); 50725b3e0257SDag-Erling Smørgrav return (KERN_PROTECTION_FAILURE); 50735b3e0257SDag-Erling Smørgrav } 5074df8bae1dSRodney W. Grimes 5075df8bae1dSRodney W. Grimes /* 50760d94caffSDavid Greenman * If this page is not pageable, we have to get it for all possible 50770d94caffSDavid Greenman * accesses. 5078df8bae1dSRodney W. Grimes */ 507905f0fdd2SPoul-Henning Kamp *wired = (entry->wired_count != 0); 508005f0fdd2SPoul-Henning Kamp if (*wired) 5081a6d42a0dSAlan Cox fault_type = entry->protection; 50823364c323SKonstantin Belousov size = entry->end - entry->start; 508367388836SKonstantin Belousov 5084df8bae1dSRodney W. Grimes /* 5085df8bae1dSRodney W. Grimes * If the entry was copy-on-write, we either ... 5086df8bae1dSRodney W. Grimes */ 5087afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 5088df8bae1dSRodney W. Grimes /* 50890d94caffSDavid Greenman * If we want to write the page, we may as well handle that 5090ad5fca3bSAlan Cox * now since we've got the map locked. 5091df8bae1dSRodney W. Grimes * 50920d94caffSDavid Greenman * If we don't need to write the page, we just demote the 50930d94caffSDavid Greenman * permissions allowed. 5094df8bae1dSRodney W. Grimes */ 5095a6d42a0dSAlan Cox if ((fault_type & VM_PROT_WRITE) != 0 || 5096a6d42a0dSAlan Cox (fault_typea & VM_PROT_COPY) != 0) { 5097df8bae1dSRodney W. Grimes /* 50980d94caffSDavid Greenman * Make a new object, and place it in the object 50990d94caffSDavid Greenman * chain. Note that no new references have appeared 5100ad5fca3bSAlan Cox * -- one just moved from the map to the new 51010d94caffSDavid Greenman * object. 5102df8bae1dSRodney W. Grimes */ 510325adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 5104df8bae1dSRodney W. Grimes goto RetryLookup; 51059917e010SAlan Cox 5106ef694c1aSEdward Tomasz Napierala if (entry->cred == NULL) { 51073364c323SKonstantin Belousov /* 51083364c323SKonstantin Belousov * The debugger owner is charged for 51093364c323SKonstantin Belousov * the memory. 51103364c323SKonstantin Belousov */ 5111ef694c1aSEdward Tomasz Napierala cred = curthread->td_ucred; 5112ef694c1aSEdward Tomasz Napierala crhold(cred); 5113ef694c1aSEdward Tomasz Napierala if (!swap_reserve_by_cred(size, cred)) { 5114ef694c1aSEdward Tomasz Napierala crfree(cred); 51153364c323SKonstantin Belousov vm_map_unlock(map); 51163364c323SKonstantin Belousov return (KERN_RESOURCE_SHORTAGE); 51173364c323SKonstantin Belousov } 5118ef694c1aSEdward Tomasz Napierala entry->cred = cred; 51193364c323SKonstantin Belousov } 51203364c323SKonstantin Belousov eobject = entry->object.vm_object; 512167388836SKonstantin Belousov vm_object_shadow(&entry->object.vm_object, 512267388836SKonstantin Belousov &entry->offset, size, entry->cred, false); 512367388836SKonstantin Belousov if (eobject == entry->object.vm_object) { 51243364c323SKonstantin Belousov /* 51253364c323SKonstantin Belousov * The object was not shadowed. 51263364c323SKonstantin Belousov */ 5127ef694c1aSEdward Tomasz Napierala swap_release_by_cred(size, entry->cred); 5128ef694c1aSEdward Tomasz Napierala crfree(entry->cred); 51293364c323SKonstantin Belousov } 513067388836SKonstantin Belousov entry->cred = NULL; 513167388836SKonstantin Belousov entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 51329917e010SAlan Cox 51339b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 51340d94caffSDavid Greenman } else { 5135df8bae1dSRodney W. Grimes /* 51360d94caffSDavid Greenman * We're attempting to read a copy-on-write page -- 51370d94caffSDavid Greenman * don't allow writes. 5138df8bae1dSRodney W. Grimes */ 51392d8acc0fSJohn Dyson prot &= ~VM_PROT_WRITE; 5140df8bae1dSRodney W. Grimes } 5141df8bae1dSRodney W. Grimes } 51422d8acc0fSJohn Dyson 5143df8bae1dSRodney W. Grimes /* 5144df8bae1dSRodney W. Grimes * Create an object if necessary. 5145df8bae1dSRodney W. Grimes */ 514667388836SKonstantin Belousov if (entry->object.vm_object == NULL && !map->system_map) { 514725adb370SBrian Feldman if (vm_map_lock_upgrade(map)) 5148df8bae1dSRodney W. Grimes goto RetryLookup; 514967388836SKonstantin Belousov entry->object.vm_object = vm_object_allocate_anon(atop(size), 515070b29961SMark Johnston NULL, entry->cred, size); 5151df8bae1dSRodney W. Grimes entry->offset = 0; 5152ef694c1aSEdward Tomasz Napierala entry->cred = NULL; 51539b09b6c7SMatthew Dillon vm_map_lock_downgrade(map); 5154df8bae1dSRodney W. Grimes } 5155b5b40fa6SJohn Dyson 5156df8bae1dSRodney W. Grimes /* 51570d94caffSDavid Greenman * Return the object/offset from this entry. If the entry was 51580d94caffSDavid Greenman * copy-on-write or empty, it has been fixed up. 5159df8bae1dSRodney W. Grimes */ 516010d9120cSKonstantin Belousov *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 5161df8bae1dSRodney W. Grimes *object = entry->object.vm_object; 5162df8bae1dSRodney W. Grimes 5163df8bae1dSRodney W. Grimes *out_prot = prot; 5164df8bae1dSRodney W. Grimes return (KERN_SUCCESS); 5165df8bae1dSRodney W. Grimes } 5166df8bae1dSRodney W. Grimes 5167df8bae1dSRodney W. Grimes /* 516819dc5607STor Egge * vm_map_lookup_locked: 516919dc5607STor Egge * 517019dc5607STor Egge * Lookup the faulting address. A version of vm_map_lookup that returns 517119dc5607STor Egge * KERN_FAILURE instead of blocking on map lock or memory allocation. 517219dc5607STor Egge */ 517319dc5607STor Egge int 517419dc5607STor Egge vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ 517519dc5607STor Egge vm_offset_t vaddr, 517619dc5607STor Egge vm_prot_t fault_typea, 517719dc5607STor Egge vm_map_entry_t *out_entry, /* OUT */ 517819dc5607STor Egge vm_object_t *object, /* OUT */ 517919dc5607STor Egge vm_pindex_t *pindex, /* OUT */ 518019dc5607STor Egge vm_prot_t *out_prot, /* OUT */ 518119dc5607STor Egge boolean_t *wired) /* OUT */ 518219dc5607STor Egge { 518319dc5607STor Egge vm_map_entry_t entry; 518419dc5607STor Egge vm_map_t map = *var_map; 518519dc5607STor Egge vm_prot_t prot; 518619dc5607STor Egge vm_prot_t fault_type = fault_typea; 518719dc5607STor Egge 518819dc5607STor Egge /* 51894c3ef59eSAlan Cox * Lookup the faulting address. 519019dc5607STor Egge */ 519119dc5607STor Egge if (!vm_map_lookup_entry(map, vaddr, out_entry)) 519219dc5607STor Egge return (KERN_INVALID_ADDRESS); 519319dc5607STor Egge 519419dc5607STor Egge entry = *out_entry; 519519dc5607STor Egge 519619dc5607STor Egge /* 519719dc5607STor Egge * Fail if the entry refers to a submap. 519819dc5607STor Egge */ 519919dc5607STor Egge if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 520019dc5607STor Egge return (KERN_FAILURE); 520119dc5607STor Egge 520219dc5607STor Egge /* 520319dc5607STor Egge * Check whether this task is allowed to have this page. 520419dc5607STor Egge */ 520519dc5607STor Egge prot = entry->protection; 520619dc5607STor Egge fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 520719dc5607STor Egge if ((fault_type & prot) != fault_type) 520819dc5607STor Egge return (KERN_PROTECTION_FAILURE); 520919dc5607STor Egge 521019dc5607STor Egge /* 521119dc5607STor Egge * If this page is not pageable, we have to get it for all possible 521219dc5607STor Egge * accesses. 521319dc5607STor Egge */ 521419dc5607STor Egge *wired = (entry->wired_count != 0); 521519dc5607STor Egge if (*wired) 5216a6d42a0dSAlan Cox fault_type = entry->protection; 521719dc5607STor Egge 521819dc5607STor Egge if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 521919dc5607STor Egge /* 522019dc5607STor Egge * Fail if the entry was copy-on-write for a write fault. 522119dc5607STor Egge */ 522219dc5607STor Egge if (fault_type & VM_PROT_WRITE) 522319dc5607STor Egge return (KERN_FAILURE); 522419dc5607STor Egge /* 522519dc5607STor Egge * We're attempting to read a copy-on-write page -- 522619dc5607STor Egge * don't allow writes. 522719dc5607STor Egge */ 522819dc5607STor Egge prot &= ~VM_PROT_WRITE; 522919dc5607STor Egge } 523019dc5607STor Egge 523119dc5607STor Egge /* 523219dc5607STor Egge * Fail if an object should be created. 523319dc5607STor Egge */ 523419dc5607STor Egge if (entry->object.vm_object == NULL && !map->system_map) 523519dc5607STor Egge return (KERN_FAILURE); 523619dc5607STor Egge 523719dc5607STor Egge /* 523819dc5607STor Egge * Return the object/offset from this entry. If the entry was 523919dc5607STor Egge * copy-on-write or empty, it has been fixed up. 524019dc5607STor Egge */ 524110d9120cSKonstantin Belousov *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 524219dc5607STor Egge *object = entry->object.vm_object; 524319dc5607STor Egge 524419dc5607STor Egge *out_prot = prot; 524519dc5607STor Egge return (KERN_SUCCESS); 524619dc5607STor Egge } 524719dc5607STor Egge 524819dc5607STor Egge /* 5249df8bae1dSRodney W. Grimes * vm_map_lookup_done: 5250df8bae1dSRodney W. Grimes * 5251df8bae1dSRodney W. Grimes * Releases locks acquired by a vm_map_lookup 5252df8bae1dSRodney W. Grimes * (according to the handle returned by that lookup). 5253df8bae1dSRodney W. Grimes */ 52540d94caffSDavid Greenman void 52551b40f8c0SMatthew Dillon vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 5256df8bae1dSRodney W. Grimes { 5257df8bae1dSRodney W. Grimes /* 5258df8bae1dSRodney W. Grimes * Unlock the main-level map 5259df8bae1dSRodney W. Grimes */ 5260df8bae1dSRodney W. Grimes vm_map_unlock_read(map); 5261df8bae1dSRodney W. Grimes } 5262df8bae1dSRodney W. Grimes 526319ea042eSKonstantin Belousov vm_offset_t 526419ea042eSKonstantin Belousov vm_map_max_KBI(const struct vm_map *map) 526519ea042eSKonstantin Belousov { 526619ea042eSKonstantin Belousov 5267f0165b1cSKonstantin Belousov return (vm_map_max(map)); 526819ea042eSKonstantin Belousov } 526919ea042eSKonstantin Belousov 527019ea042eSKonstantin Belousov vm_offset_t 527119ea042eSKonstantin Belousov vm_map_min_KBI(const struct vm_map *map) 527219ea042eSKonstantin Belousov { 527319ea042eSKonstantin Belousov 5274f0165b1cSKonstantin Belousov return (vm_map_min(map)); 527519ea042eSKonstantin Belousov } 527619ea042eSKonstantin Belousov 527719ea042eSKonstantin Belousov pmap_t 527819ea042eSKonstantin Belousov vm_map_pmap_KBI(vm_map_t map) 527919ea042eSKonstantin Belousov { 528019ea042eSKonstantin Belousov 528119ea042eSKonstantin Belousov return (map->pmap); 528219ea042eSKonstantin Belousov } 528319ea042eSKonstantin Belousov 5284a7752896SMark Johnston bool 5285a7752896SMark Johnston vm_map_range_valid_KBI(vm_map_t map, vm_offset_t start, vm_offset_t end) 5286a7752896SMark Johnston { 5287a7752896SMark Johnston 5288a7752896SMark Johnston return (vm_map_range_valid(map, start, end)); 5289a7752896SMark Johnston } 5290a7752896SMark Johnston 5291721899b1SDoug Moore #ifdef INVARIANTS 5292721899b1SDoug Moore static void 5293461587dcSDoug Moore _vm_map_assert_consistent(vm_map_t map, int check) 5294721899b1SDoug Moore { 5295721899b1SDoug Moore vm_map_entry_t entry, prev; 5296c1ad5342SDoug Moore vm_map_entry_t cur, header, lbound, ubound; 5297721899b1SDoug Moore vm_size_t max_left, max_right; 5298721899b1SDoug Moore 529985b7bedbSDoug Moore #ifdef DIAGNOSTIC 530085b7bedbSDoug Moore ++map->nupdates; 530185b7bedbSDoug Moore #endif 5302461587dcSDoug Moore if (enable_vmmap_check != check) 5303721899b1SDoug Moore return; 5304721899b1SDoug Moore 5305c1ad5342SDoug Moore header = prev = &map->header; 5306721899b1SDoug Moore VM_MAP_ENTRY_FOREACH(entry, map) { 5307721899b1SDoug Moore KASSERT(prev->end <= entry->start, 5308721899b1SDoug Moore ("map %p prev->end = %jx, start = %jx", map, 5309721899b1SDoug Moore (uintmax_t)prev->end, (uintmax_t)entry->start)); 5310721899b1SDoug Moore KASSERT(entry->start < entry->end, 5311721899b1SDoug Moore ("map %p start = %jx, end = %jx", map, 5312721899b1SDoug Moore (uintmax_t)entry->start, (uintmax_t)entry->end)); 5313c1ad5342SDoug Moore KASSERT(entry->left == header || 5314721899b1SDoug Moore entry->left->start < entry->start, 5315721899b1SDoug Moore ("map %p left->start = %jx, start = %jx", map, 5316721899b1SDoug Moore (uintmax_t)entry->left->start, (uintmax_t)entry->start)); 5317c1ad5342SDoug Moore KASSERT(entry->right == header || 5318721899b1SDoug Moore entry->start < entry->right->start, 5319721899b1SDoug Moore ("map %p start = %jx, right->start = %jx", map, 5320721899b1SDoug Moore (uintmax_t)entry->start, (uintmax_t)entry->right->start)); 5321c1ad5342SDoug Moore cur = map->root; 5322c1ad5342SDoug Moore lbound = ubound = header; 5323c1ad5342SDoug Moore for (;;) { 5324c1ad5342SDoug Moore if (entry->start < cur->start) { 5325c1ad5342SDoug Moore ubound = cur; 5326c1ad5342SDoug Moore cur = cur->left; 5327c1ad5342SDoug Moore KASSERT(cur != lbound, 5328c1ad5342SDoug Moore ("map %p cannot find %jx", 5329c0829bb1SMark Johnston map, (uintmax_t)entry->start)); 5330c1ad5342SDoug Moore } else if (cur->end <= entry->start) { 5331c1ad5342SDoug Moore lbound = cur; 5332c1ad5342SDoug Moore cur = cur->right; 5333c1ad5342SDoug Moore KASSERT(cur != ubound, 5334c1ad5342SDoug Moore ("map %p cannot find %jx", 5335c0829bb1SMark Johnston map, (uintmax_t)entry->start)); 5336c1ad5342SDoug Moore } else { 5337c1ad5342SDoug Moore KASSERT(cur == entry, 5338c1ad5342SDoug Moore ("map %p cannot find %jx", 5339c0829bb1SMark Johnston map, (uintmax_t)entry->start)); 5340c1ad5342SDoug Moore break; 5341c1ad5342SDoug Moore } 5342c1ad5342SDoug Moore } 5343c1ad5342SDoug Moore max_left = vm_map_entry_max_free_left(entry, lbound); 5344c1ad5342SDoug Moore max_right = vm_map_entry_max_free_right(entry, ubound); 5345c1ad5342SDoug Moore KASSERT(entry->max_free == vm_size_max(max_left, max_right), 5346721899b1SDoug Moore ("map %p max = %jx, max_left = %jx, max_right = %jx", map, 5347721899b1SDoug Moore (uintmax_t)entry->max_free, 5348721899b1SDoug Moore (uintmax_t)max_left, (uintmax_t)max_right)); 5349721899b1SDoug Moore prev = entry; 5350721899b1SDoug Moore } 5351721899b1SDoug Moore KASSERT(prev->end <= entry->start, 5352721899b1SDoug Moore ("map %p prev->end = %jx, start = %jx", map, 5353721899b1SDoug Moore (uintmax_t)prev->end, (uintmax_t)entry->start)); 5354721899b1SDoug Moore } 5355721899b1SDoug Moore #endif 5356721899b1SDoug Moore 5357c7c34a24SBruce Evans #include "opt_ddb.h" 5358c3cb3e12SDavid Greenman #ifdef DDB 5359c7c34a24SBruce Evans #include <sys/kernel.h> 5360c7c34a24SBruce Evans 5361c7c34a24SBruce Evans #include <ddb/ddb.h> 5362c7c34a24SBruce Evans 53632ebcd458SAttilio Rao static void 53642ebcd458SAttilio Rao vm_map_print(vm_map_t map) 5365df8bae1dSRodney W. Grimes { 536677131528SDoug Moore vm_map_entry_t entry, prev; 5367c7c34a24SBruce Evans 5368e5f251d2SAlan Cox db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 5369e5f251d2SAlan Cox (void *)map, 5370101eeb7fSBruce Evans (void *)map->pmap, map->nentries, map->timestamp); 5371df8bae1dSRodney W. Grimes 5372c7c34a24SBruce Evans db_indent += 2; 5373721899b1SDoug Moore prev = &map->header; 5374721899b1SDoug Moore VM_MAP_ENTRY_FOREACH(entry, map) { 537519bd0d9cSKonstantin Belousov db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n", 537619bd0d9cSKonstantin Belousov (void *)entry, (void *)entry->start, (void *)entry->end, 537719bd0d9cSKonstantin Belousov entry->eflags); 5378e5f251d2SAlan Cox { 5379eaa17d42SRyan Libby static const char * const inheritance_name[4] = 5380df8bae1dSRodney W. Grimes {"share", "copy", "none", "donate_copy"}; 53810d94caffSDavid Greenman 538295e5e988SJohn Dyson db_iprintf(" prot=%x/%x/%s", 5383df8bae1dSRodney W. Grimes entry->protection, 5384df8bae1dSRodney W. Grimes entry->max_protection, 538577131528SDoug Moore inheritance_name[(int)(unsigned char) 538677131528SDoug Moore entry->inheritance]); 5387df8bae1dSRodney W. Grimes if (entry->wired_count != 0) 538895e5e988SJohn Dyson db_printf(", wired"); 5389df8bae1dSRodney W. Grimes } 53909fdfe602SMatthew Dillon if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 5391cd034a5bSMaxime Henrion db_printf(", share=%p, offset=0x%jx\n", 53929fdfe602SMatthew Dillon (void *)entry->object.sub_map, 5393cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 539477131528SDoug Moore if (prev == &map->header || 539577131528SDoug Moore prev->object.sub_map != 539677131528SDoug Moore entry->object.sub_map) { 5397c7c34a24SBruce Evans db_indent += 2; 53982ebcd458SAttilio Rao vm_map_print((vm_map_t)entry->object.sub_map); 5399c7c34a24SBruce Evans db_indent -= 2; 5400df8bae1dSRodney W. Grimes } 54010d94caffSDavid Greenman } else { 5402ef694c1aSEdward Tomasz Napierala if (entry->cred != NULL) 5403ef694c1aSEdward Tomasz Napierala db_printf(", ruid %d", entry->cred->cr_ruid); 5404cd034a5bSMaxime Henrion db_printf(", object=%p, offset=0x%jx", 5405101eeb7fSBruce Evans (void *)entry->object.vm_object, 5406cd034a5bSMaxime Henrion (uintmax_t)entry->offset); 5407ef694c1aSEdward Tomasz Napierala if (entry->object.vm_object && entry->object.vm_object->cred) 5408ef694c1aSEdward Tomasz Napierala db_printf(", obj ruid %d charge %jx", 5409ef694c1aSEdward Tomasz Napierala entry->object.vm_object->cred->cr_ruid, 54103364c323SKonstantin Belousov (uintmax_t)entry->object.vm_object->charge); 5411afa07f7eSJohn Dyson if (entry->eflags & MAP_ENTRY_COW) 5412c7c34a24SBruce Evans db_printf(", copy (%s)", 5413afa07f7eSJohn Dyson (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 5414c7c34a24SBruce Evans db_printf("\n"); 5415df8bae1dSRodney W. Grimes 541677131528SDoug Moore if (prev == &map->header || 541777131528SDoug Moore prev->object.vm_object != 541877131528SDoug Moore entry->object.vm_object) { 5419c7c34a24SBruce Evans db_indent += 2; 5420101eeb7fSBruce Evans vm_object_print((db_expr_t)(intptr_t) 5421101eeb7fSBruce Evans entry->object.vm_object, 542244bbc3b7SKonstantin Belousov 0, 0, (char *)0); 5423c7c34a24SBruce Evans db_indent -= 2; 5424df8bae1dSRodney W. Grimes } 5425df8bae1dSRodney W. Grimes } 5426721899b1SDoug Moore prev = entry; 5427df8bae1dSRodney W. Grimes } 5428c7c34a24SBruce Evans db_indent -= 2; 5429df8bae1dSRodney W. Grimes } 543095e5e988SJohn Dyson 54312ebcd458SAttilio Rao DB_SHOW_COMMAND(map, map) 54322ebcd458SAttilio Rao { 54332ebcd458SAttilio Rao 54342ebcd458SAttilio Rao if (!have_addr) { 54352ebcd458SAttilio Rao db_printf("usage: show map <addr>\n"); 54362ebcd458SAttilio Rao return; 54372ebcd458SAttilio Rao } 54382ebcd458SAttilio Rao vm_map_print((vm_map_t)addr); 54392ebcd458SAttilio Rao } 544095e5e988SJohn Dyson 544195e5e988SJohn Dyson DB_SHOW_COMMAND(procvm, procvm) 544295e5e988SJohn Dyson { 544395e5e988SJohn Dyson struct proc *p; 544495e5e988SJohn Dyson 544595e5e988SJohn Dyson if (have_addr) { 5446a9546a6bSJohn Baldwin p = db_lookup_proc(addr); 544795e5e988SJohn Dyson } else { 544895e5e988SJohn Dyson p = curproc; 544995e5e988SJohn Dyson } 545095e5e988SJohn Dyson 5451ac1e407bSBruce Evans db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 5452ac1e407bSBruce Evans (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 5453b1028ad1SLuoqi Chen (void *)vmspace_pmap(p->p_vmspace)); 545495e5e988SJohn Dyson 54552ebcd458SAttilio Rao vm_map_print((vm_map_t)&p->p_vmspace->vm_map); 545695e5e988SJohn Dyson } 545795e5e988SJohn Dyson 5458c7c34a24SBruce Evans #endif /* DDB */ 5459