xref: /freebsd/sys/vm/vm_map.c (revision 2e1c94aa1fd582fb8ae0522f0827be719ff5fb67)
160727d8bSWarner Losh /*-
2796df753SPedro F. Giffuni  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
8df8bae1dSRodney W. Grimes  * The Mach Operating System project at Carnegie-Mellon University.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  *
343c4dd356SDavid Greenman  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  *
37df8bae1dSRodney W. Grimes  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
38df8bae1dSRodney W. Grimes  * All rights reserved.
39df8bae1dSRodney W. Grimes  *
40df8bae1dSRodney W. Grimes  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
41df8bae1dSRodney W. Grimes  *
42df8bae1dSRodney W. Grimes  * Permission to use, copy, modify and distribute this software and
43df8bae1dSRodney W. Grimes  * its documentation is hereby granted, provided that both the copyright
44df8bae1dSRodney W. Grimes  * notice and this permission notice appear in all copies of the
45df8bae1dSRodney W. Grimes  * software, derivative works or modified versions, and any portions
46df8bae1dSRodney W. Grimes  * thereof, and that both notices appear in supporting documentation.
47df8bae1dSRodney W. Grimes  *
48df8bae1dSRodney W. Grimes  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49df8bae1dSRodney W. Grimes  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50df8bae1dSRodney W. Grimes  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51df8bae1dSRodney W. Grimes  *
52df8bae1dSRodney W. Grimes  * Carnegie Mellon requests users of this software to return to
53df8bae1dSRodney W. Grimes  *
54df8bae1dSRodney W. Grimes  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
55df8bae1dSRodney W. Grimes  *  School of Computer Science
56df8bae1dSRodney W. Grimes  *  Carnegie Mellon University
57df8bae1dSRodney W. Grimes  *  Pittsburgh PA 15213-3890
58df8bae1dSRodney W. Grimes  *
59df8bae1dSRodney W. Grimes  * any improvements or extensions that they make and grant Carnegie the
60df8bae1dSRodney W. Grimes  * rights to redistribute these changes.
61df8bae1dSRodney W. Grimes  */
62df8bae1dSRodney W. Grimes 
63df8bae1dSRodney W. Grimes /*
64df8bae1dSRodney W. Grimes  *	Virtual memory mapping module.
65df8bae1dSRodney W. Grimes  */
66df8bae1dSRodney W. Grimes 
67874651b1SDavid E. O'Brien #include <sys/cdefs.h>
68874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
69874651b1SDavid E. O'Brien 
70df8bae1dSRodney W. Grimes #include <sys/param.h>
71df8bae1dSRodney W. Grimes #include <sys/systm.h>
72156e8654SKonstantin Belousov #include <sys/elf.h>
739a6d144fSKonstantin Belousov #include <sys/kernel.h>
7461d80e90SJohn Baldwin #include <sys/ktr.h>
75fb919e4dSMark Murray #include <sys/lock.h>
76fb919e4dSMark Murray #include <sys/mutex.h>
77b5e8ce9fSBruce Evans #include <sys/proc.h>
78efeaf95aSDavid Greenman #include <sys/vmmeter.h>
79867a482dSJohn Dyson #include <sys/mman.h>
801efb74fbSJohn Dyson #include <sys/vnode.h>
811ba5ad42SEdward Tomasz Napierala #include <sys/racct.h>
822267af78SJulian Elischer #include <sys/resourcevar.h>
8389f6b863SAttilio Rao #include <sys/rwlock.h>
843fde38dfSMike Silbersack #include <sys/file.h>
859a6d144fSKonstantin Belousov #include <sys/sysctl.h>
8605ba50f5SJake Burkholder #include <sys/sysent.h>
873db161e0SMatthew Dillon #include <sys/shm.h>
88df8bae1dSRodney W. Grimes 
89df8bae1dSRodney W. Grimes #include <vm/vm.h>
90efeaf95aSDavid Greenman #include <vm/vm_param.h>
91efeaf95aSDavid Greenman #include <vm/pmap.h>
92efeaf95aSDavid Greenman #include <vm/vm_map.h>
93df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
9454a3a114SMark Johnston #include <vm/vm_pageout.h>
95df8bae1dSRodney W. Grimes #include <vm/vm_object.h>
9647221757SJohn Dyson #include <vm/vm_pager.h>
9726f9a767SRodney W. Grimes #include <vm/vm_kern.h>
98efeaf95aSDavid Greenman #include <vm/vm_extern.h>
9984110e7eSKonstantin Belousov #include <vm/vnode_pager.h>
10021cd6e62SSeigo Tanimura #include <vm/swap_pager.h>
101670d17b5SJeff Roberson #include <vm/uma.h>
102df8bae1dSRodney W. Grimes 
103df8bae1dSRodney W. Grimes /*
104df8bae1dSRodney W. Grimes  *	Virtual memory maps provide for the mapping, protection,
105df8bae1dSRodney W. Grimes  *	and sharing of virtual memory objects.  In addition,
106df8bae1dSRodney W. Grimes  *	this module provides for an efficient virtual copy of
107df8bae1dSRodney W. Grimes  *	memory from one map to another.
108df8bae1dSRodney W. Grimes  *
109df8bae1dSRodney W. Grimes  *	Synchronization is required prior to most operations.
110df8bae1dSRodney W. Grimes  *
111df8bae1dSRodney W. Grimes  *	Maps consist of an ordered doubly-linked list of simple
112e2abaaaaSAlan Cox  *	entries; a self-adjusting binary search tree of these
113e2abaaaaSAlan Cox  *	entries is used to speed up lookups.
114df8bae1dSRodney W. Grimes  *
115956f3135SPhilippe Charnier  *	Since portions of maps are specified by start/end addresses,
116df8bae1dSRodney W. Grimes  *	which may not align with existing map entries, all
117df8bae1dSRodney W. Grimes  *	routines merely "clip" entries to these start/end values.
118df8bae1dSRodney W. Grimes  *	[That is, an entry is split into two, bordering at a
119df8bae1dSRodney W. Grimes  *	start or end value.]  Note that these clippings may not
120df8bae1dSRodney W. Grimes  *	always be necessary (as the two resulting entries are then
121df8bae1dSRodney W. Grimes  *	not changed); however, the clipping is done for convenience.
122df8bae1dSRodney W. Grimes  *
123df8bae1dSRodney W. Grimes  *	As mentioned above, virtual copy operations are performed
124ad5fca3bSAlan Cox  *	by copying VM object references from one map to
125df8bae1dSRodney W. Grimes  *	another, and then marking both regions as copy-on-write.
126df8bae1dSRodney W. Grimes  */
127df8bae1dSRodney W. Grimes 
1283a92e5d5SAlan Cox static struct mtx map_sleep_mtx;
1298355f576SJeff Roberson static uma_zone_t mapentzone;
1308355f576SJeff Roberson static uma_zone_t kmapentzone;
1318355f576SJeff Roberson static uma_zone_t vmspace_zone;
132b23f72e9SBrian Feldman static int vmspace_zinit(void *mem, int size, int flags);
13392351f16SAlan Cox static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
13492351f16SAlan Cox     vm_offset_t max);
1350b367bd8SKonstantin Belousov static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
136655c3490SKonstantin Belousov static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
13703462509SAlan Cox static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
13819bd0d9cSKonstantin Belousov static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
13919bd0d9cSKonstantin Belousov     vm_map_entry_t gap_entry);
140077ec27cSAlan Cox static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
141077ec27cSAlan Cox     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
1428355f576SJeff Roberson #ifdef INVARIANTS
1438355f576SJeff Roberson static void vmspace_zdtor(void *mem, int size, void *arg);
1448355f576SJeff Roberson #endif
1454648ba0aSKonstantin Belousov static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
1464648ba0aSKonstantin Belousov     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
1474648ba0aSKonstantin Belousov     int cow);
14866cd575bSAlan Cox static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
14966cd575bSAlan Cox     vm_offset_t failed_addr);
150b18bfc3dSJohn Dyson 
151ef694c1aSEdward Tomasz Napierala #define	ENTRY_CHARGED(e) ((e)->cred != NULL || \
152ef694c1aSEdward Tomasz Napierala     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
1533364c323SKonstantin Belousov      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
1543364c323SKonstantin Belousov 
15557051fdcSTor Egge /*
15657051fdcSTor Egge  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
15757051fdcSTor Egge  * stable.
15857051fdcSTor Egge  */
15957051fdcSTor Egge #define PROC_VMSPACE_LOCK(p) do { } while (0)
16057051fdcSTor Egge #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
16157051fdcSTor Egge 
162d239bd3cSKonstantin Belousov /*
163d239bd3cSKonstantin Belousov  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
164d239bd3cSKonstantin Belousov  *
165d239bd3cSKonstantin Belousov  *	Asserts that the starting and ending region
166d239bd3cSKonstantin Belousov  *	addresses fall within the valid range of the map.
167d239bd3cSKonstantin Belousov  */
168d239bd3cSKonstantin Belousov #define	VM_MAP_RANGE_CHECK(map, start, end)		\
169d239bd3cSKonstantin Belousov 		{					\
170d239bd3cSKonstantin Belousov 		if (start < vm_map_min(map))		\
171d239bd3cSKonstantin Belousov 			start = vm_map_min(map);	\
172d239bd3cSKonstantin Belousov 		if (end > vm_map_max(map))		\
173d239bd3cSKonstantin Belousov 			end = vm_map_max(map);		\
174d239bd3cSKonstantin Belousov 		if (start > end)			\
175d239bd3cSKonstantin Belousov 			start = end;			\
176d239bd3cSKonstantin Belousov 		}
177d239bd3cSKonstantin Belousov 
17820f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC
17920f02659SMark Johnston 
18020f02659SMark Johnston /*
18120f02659SMark Johnston  * Allocate a new slab for kernel map entries.  The kernel map may be locked or
18220f02659SMark Johnston  * unlocked, depending on whether the request is coming from the kernel map or a
18320f02659SMark Johnston  * submap.  This function allocates a virtual address range directly from the
18420f02659SMark Johnston  * kernel map instead of the kmem_* layer to avoid recursion on the kernel map
18520f02659SMark Johnston  * lock and also to avoid triggering allocator recursion in the vmem boundary
18620f02659SMark Johnston  * tag allocator.
18720f02659SMark Johnston  */
18820f02659SMark Johnston static void *
18920f02659SMark Johnston kmapent_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
19020f02659SMark Johnston     int wait)
19120f02659SMark Johnston {
19220f02659SMark Johnston 	vm_offset_t addr;
19320f02659SMark Johnston 	int error, locked;
19420f02659SMark Johnston 
19520f02659SMark Johnston 	*pflag = UMA_SLAB_PRIV;
19620f02659SMark Johnston 
19720f02659SMark Johnston 	if (!(locked = vm_map_locked(kernel_map)))
19820f02659SMark Johnston 		vm_map_lock(kernel_map);
19920f02659SMark Johnston 	addr = vm_map_findspace(kernel_map, vm_map_min(kernel_map), bytes);
20020f02659SMark Johnston 	if (addr + bytes < addr || addr + bytes > vm_map_max(kernel_map))
20120f02659SMark Johnston 		panic("%s: kernel map is exhausted", __func__);
20220f02659SMark Johnston 	error = vm_map_insert(kernel_map, NULL, 0, addr, addr + bytes,
20320f02659SMark Johnston 	    VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
20420f02659SMark Johnston 	if (error != KERN_SUCCESS)
20520f02659SMark Johnston 		panic("%s: vm_map_insert() failed: %d", __func__, error);
20620f02659SMark Johnston 	if (!locked)
20720f02659SMark Johnston 		vm_map_unlock(kernel_map);
20820f02659SMark Johnston 	error = kmem_back_domain(domain, kernel_object, addr, bytes, M_NOWAIT |
20920f02659SMark Johnston 	    M_USE_RESERVE | (wait & M_ZERO));
21020f02659SMark Johnston 	if (error == KERN_SUCCESS) {
21120f02659SMark Johnston 		return ((void *)addr);
21220f02659SMark Johnston 	} else {
21320f02659SMark Johnston 		if (!locked)
21420f02659SMark Johnston 			vm_map_lock(kernel_map);
21520f02659SMark Johnston 		vm_map_delete(kernel_map, addr, bytes);
21620f02659SMark Johnston 		if (!locked)
21720f02659SMark Johnston 			vm_map_unlock(kernel_map);
21820f02659SMark Johnston 		return (NULL);
21920f02659SMark Johnston 	}
22020f02659SMark Johnston }
22120f02659SMark Johnston 
22220f02659SMark Johnston static void
22320f02659SMark Johnston kmapent_free(void *item, vm_size_t size, uint8_t pflag)
22420f02659SMark Johnston {
22520f02659SMark Johnston 	vm_offset_t addr;
22620f02659SMark Johnston 	int error;
22720f02659SMark Johnston 
22820f02659SMark Johnston 	if ((pflag & UMA_SLAB_PRIV) == 0)
22920f02659SMark Johnston 		/* XXX leaked */
23020f02659SMark Johnston 		return;
23120f02659SMark Johnston 
23220f02659SMark Johnston 	addr = (vm_offset_t)item;
23320f02659SMark Johnston 	kmem_unback(kernel_object, addr, size);
23420f02659SMark Johnston 	error = vm_map_remove(kernel_map, addr, addr + size);
23520f02659SMark Johnston 	KASSERT(error == KERN_SUCCESS,
23620f02659SMark Johnston 	    ("%s: vm_map_remove failed: %d", __func__, error));
23720f02659SMark Johnston }
23820f02659SMark Johnston 
23920f02659SMark Johnston /*
24020f02659SMark Johnston  * The worst-case upper bound on the number of kernel map entries that may be
24120f02659SMark Johnston  * created before the zone must be replenished in _vm_map_unlock().
24220f02659SMark Johnston  */
24320f02659SMark Johnston #define	KMAPENT_RESERVE		1
24420f02659SMark Johnston 
24520f02659SMark Johnston #endif /* !UMD_MD_SMALL_ALLOC */
24620f02659SMark Johnston 
2476fecb26bSKonstantin Belousov /*
2486fecb26bSKonstantin Belousov  *	vm_map_startup:
2496fecb26bSKonstantin Belousov  *
25020f02659SMark Johnston  *	Initialize the vm_map module.  Must be called before any other vm_map
25120f02659SMark Johnston  *	routines.
2526fecb26bSKonstantin Belousov  *
25320f02659SMark Johnston  *	User map and entry structures are allocated from the general purpose
25420f02659SMark Johnston  *	memory pool.  Kernel maps are statically defined.  Kernel map entries
25520f02659SMark Johnston  *	require special handling to avoid recursion; see the comments above
25620f02659SMark Johnston  *	kmapent_alloc() and in vm_map_entry_create().
2576fecb26bSKonstantin Belousov  */
2580d94caffSDavid Greenman void
2591b40f8c0SMatthew Dillon vm_map_startup(void)
260df8bae1dSRodney W. Grimes {
2613a92e5d5SAlan Cox 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
26220f02659SMark Johnston 
26320f02659SMark Johnston 	/*
26420f02659SMark Johnston 	 * Disable the use of per-CPU buckets: map entry allocation is
26520f02659SMark Johnston 	 * serialized by the kernel map lock.
26620f02659SMark Johnston 	 */
267670d17b5SJeff Roberson 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
26818aa2de5SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
26920f02659SMark Johnston 	    UMA_ZONE_VM | UMA_ZONE_NOBUCKET);
27020f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC
27120f02659SMark Johnston 	/* Reserve an extra map entry for use when replenishing the reserve. */
27220f02659SMark Johnston 	uma_zone_reserve(kmapentzone, KMAPENT_RESERVE + 1);
27320f02659SMark Johnston 	uma_prealloc(kmapentzone, KMAPENT_RESERVE + 1);
27420f02659SMark Johnston 	uma_zone_set_allocf(kmapentzone, kmapent_alloc);
27520f02659SMark Johnston 	uma_zone_set_freef(kmapentzone, kmapent_free);
27620f02659SMark Johnston #endif
27720f02659SMark Johnston 
278670d17b5SJeff Roberson 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
279670d17b5SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2805df87b21SJeff Roberson 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
2815df87b21SJeff Roberson #ifdef INVARIANTS
2825df87b21SJeff Roberson 	    vmspace_zdtor,
2835df87b21SJeff Roberson #else
2845df87b21SJeff Roberson 	    NULL,
2855df87b21SJeff Roberson #endif
286f872f6eaSAlan Cox 	    vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
2878355f576SJeff Roberson }
2888355f576SJeff Roberson 
289b23f72e9SBrian Feldman static int
290b23f72e9SBrian Feldman vmspace_zinit(void *mem, int size, int flags)
2918355f576SJeff Roberson {
2928355f576SJeff Roberson 	struct vmspace *vm;
2938355f576SJeff Roberson 	vm_map_t map;
2948355f576SJeff Roberson 
2957dd979dfSMark Johnston 	vm = (struct vmspace *)mem;
2967dd979dfSMark Johnston 	map = &vm->vm_map;
2977dd979dfSMark Johnston 
298763d9566STim Kientzle 	memset(map, 0, sizeof(*map));
2997dd979dfSMark Johnston 	mtx_init(&map->system_mtx, "vm map (system)", NULL,
3007dd979dfSMark Johnston 	    MTX_DEF | MTX_DUPOK);
301e30df26eSAlan Cox 	sx_init(&map->lock, "vm map (user)");
3027dd979dfSMark Johnston 	PMAP_LOCK_INIT(vmspace_pmap(vm));
303b23f72e9SBrian Feldman 	return (0);
3048355f576SJeff Roberson }
3058355f576SJeff Roberson 
3068355f576SJeff Roberson #ifdef INVARIANTS
3078355f576SJeff Roberson static void
3088355f576SJeff Roberson vmspace_zdtor(void *mem, int size, void *arg)
3098355f576SJeff Roberson {
3108355f576SJeff Roberson 	struct vmspace *vm;
3118355f576SJeff Roberson 
3128355f576SJeff Roberson 	vm = (struct vmspace *)mem;
3137dd979dfSMark Johnston 	KASSERT(vm->vm_map.nentries == 0,
3147dd979dfSMark Johnston 	    ("vmspace %p nentries == %d on free", vm, vm->vm_map.nentries));
3157dd979dfSMark Johnston 	KASSERT(vm->vm_map.size == 0,
3167dd979dfSMark Johnston 	    ("vmspace %p size == %ju on free", vm, (uintmax_t)vm->vm_map.size));
3178355f576SJeff Roberson }
3188355f576SJeff Roberson #endif	/* INVARIANTS */
3198355f576SJeff Roberson 
320df8bae1dSRodney W. Grimes /*
321df8bae1dSRodney W. Grimes  * Allocate a vmspace structure, including a vm_map and pmap,
322df8bae1dSRodney W. Grimes  * and initialize those structures.  The refcnt is set to 1.
323df8bae1dSRodney W. Grimes  */
324df8bae1dSRodney W. Grimes struct vmspace *
32574d1d2b7SNeel Natu vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
326df8bae1dSRodney W. Grimes {
327c0877f10SJohn Dyson 	struct vmspace *vm;
3280d94caffSDavid Greenman 
329a163d034SWarner Losh 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
33074d1d2b7SNeel Natu 	KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
33174d1d2b7SNeel Natu 	if (!pinit(vmspace_pmap(vm))) {
33289b57fcfSKonstantin Belousov 		uma_zfree(vmspace_zone, vm);
33389b57fcfSKonstantin Belousov 		return (NULL);
33489b57fcfSKonstantin Belousov 	}
33521c641b2SJohn Baldwin 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
33692351f16SAlan Cox 	_vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
337f7db0c95SMark Johnston 	refcount_init(&vm->vm_refcnt, 1);
3382d8acc0fSJohn Dyson 	vm->vm_shm = NULL;
33951ab6c28SAlan Cox 	vm->vm_swrss = 0;
34051ab6c28SAlan Cox 	vm->vm_tsize = 0;
34151ab6c28SAlan Cox 	vm->vm_dsize = 0;
34251ab6c28SAlan Cox 	vm->vm_ssize = 0;
34351ab6c28SAlan Cox 	vm->vm_taddr = 0;
34451ab6c28SAlan Cox 	vm->vm_daddr = 0;
34551ab6c28SAlan Cox 	vm->vm_maxsaddr = 0;
346df8bae1dSRodney W. Grimes 	return (vm);
347df8bae1dSRodney W. Grimes }
348df8bae1dSRodney W. Grimes 
3494b5c9cf6SEdward Tomasz Napierala #ifdef RACCT
3501ba5ad42SEdward Tomasz Napierala static void
3511ba5ad42SEdward Tomasz Napierala vmspace_container_reset(struct proc *p)
3521ba5ad42SEdward Tomasz Napierala {
3531ba5ad42SEdward Tomasz Napierala 
3541ba5ad42SEdward Tomasz Napierala 	PROC_LOCK(p);
3551ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_DATA, 0);
3561ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_STACK, 0);
3571ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_RSS, 0);
3581ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_MEMLOCK, 0);
3591ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_VMEM, 0);
3601ba5ad42SEdward Tomasz Napierala 	PROC_UNLOCK(p);
3611ba5ad42SEdward Tomasz Napierala }
3624b5c9cf6SEdward Tomasz Napierala #endif
3631ba5ad42SEdward Tomasz Napierala 
36462a59e8fSWarner Losh static inline void
365582ec34cSAlfred Perlstein vmspace_dofree(struct vmspace *vm)
366df8bae1dSRodney W. Grimes {
3670ef12795SAlan Cox 
36821c641b2SJohn Baldwin 	CTR1(KTR_VM, "vmspace_free: %p", vm);
3693db161e0SMatthew Dillon 
3703db161e0SMatthew Dillon 	/*
3713db161e0SMatthew Dillon 	 * Make sure any SysV shm is freed, it might not have been in
3723db161e0SMatthew Dillon 	 * exit1().
3733db161e0SMatthew Dillon 	 */
3743db161e0SMatthew Dillon 	shmexit(vm);
3753db161e0SMatthew Dillon 
37630dcfc09SJohn Dyson 	/*
377df8bae1dSRodney W. Grimes 	 * Lock the map, to wait out all other references to it.
3780d94caffSDavid Greenman 	 * Delete all of the mappings and pages they hold, then call
3790d94caffSDavid Greenman 	 * the pmap module to reclaim anything left.
380df8bae1dSRodney W. Grimes 	 */
381f0165b1cSKonstantin Belousov 	(void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
382f0165b1cSKonstantin Belousov 	    vm_map_max(&vm->vm_map));
3838355f576SJeff Roberson 
3840ef12795SAlan Cox 	pmap_release(vmspace_pmap(vm));
3850ef12795SAlan Cox 	vm->vm_map.pmap = NULL;
3868355f576SJeff Roberson 	uma_zfree(vmspace_zone, vm);
387df8bae1dSRodney W. Grimes }
388582ec34cSAlfred Perlstein 
389582ec34cSAlfred Perlstein void
390582ec34cSAlfred Perlstein vmspace_free(struct vmspace *vm)
391582ec34cSAlfred Perlstein {
392582ec34cSAlfred Perlstein 
393423521aaSRyan Stone 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
394164a37a5SJohn Baldwin 	    "vmspace_free() called");
395423521aaSRyan Stone 
396f7db0c95SMark Johnston 	if (refcount_release(&vm->vm_refcnt))
397582ec34cSAlfred Perlstein 		vmspace_dofree(vm);
398582ec34cSAlfred Perlstein }
399582ec34cSAlfred Perlstein 
400582ec34cSAlfred Perlstein void
401582ec34cSAlfred Perlstein vmspace_exitfree(struct proc *p)
402582ec34cSAlfred Perlstein {
403334f7061SPeter Wemm 	struct vmspace *vm;
404582ec34cSAlfred Perlstein 
40557051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
406334f7061SPeter Wemm 	vm = p->p_vmspace;
407334f7061SPeter Wemm 	p->p_vmspace = NULL;
40857051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
40957051fdcSTor Egge 	KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
41057051fdcSTor Egge 	vmspace_free(vm);
41157051fdcSTor Egge }
41257051fdcSTor Egge 
41357051fdcSTor Egge void
41457051fdcSTor Egge vmspace_exit(struct thread *td)
41557051fdcSTor Egge {
41657051fdcSTor Egge 	struct vmspace *vm;
41757051fdcSTor Egge 	struct proc *p;
418f7db0c95SMark Johnston 	bool released;
41957051fdcSTor Egge 
42057051fdcSTor Egge 	p = td->td_proc;
42157051fdcSTor Egge 	vm = p->p_vmspace;
422f7db0c95SMark Johnston 
423f7db0c95SMark Johnston 	/*
424f7db0c95SMark Johnston 	 * Prepare to release the vmspace reference.  The thread that releases
425f7db0c95SMark Johnston 	 * the last reference is responsible for tearing down the vmspace.
426f7db0c95SMark Johnston 	 * However, threads not releasing the final reference must switch to the
427f7db0c95SMark Johnston 	 * kernel's vmspace0 before the decrement so that the subsequent pmap
428f7db0c95SMark Johnston 	 * deactivation does not modify a freed vmspace.
429f7db0c95SMark Johnston 	 */
430f7db0c95SMark Johnston 	refcount_acquire(&vmspace0.vm_refcnt);
431f7db0c95SMark Johnston 	if (!(released = refcount_release_if_last(&vm->vm_refcnt))) {
432f7db0c95SMark Johnston 		if (p->p_vmspace != &vmspace0) {
43357051fdcSTor Egge 			PROC_VMSPACE_LOCK(p);
43457051fdcSTor Egge 			p->p_vmspace = &vmspace0;
43557051fdcSTor Egge 			PROC_VMSPACE_UNLOCK(p);
43657051fdcSTor Egge 			pmap_activate(td);
43757051fdcSTor Egge 		}
438f7db0c95SMark Johnston 		released = refcount_release(&vm->vm_refcnt);
439f7db0c95SMark Johnston 	}
440f7db0c95SMark Johnston 	if (released) {
441f7db0c95SMark Johnston 		/*
442f7db0c95SMark Johnston 		 * pmap_remove_pages() expects the pmap to be active, so switch
443f7db0c95SMark Johnston 		 * back first if necessary.
444f7db0c95SMark Johnston 		 */
44557051fdcSTor Egge 		if (p->p_vmspace != vm) {
44657051fdcSTor Egge 			PROC_VMSPACE_LOCK(p);
44757051fdcSTor Egge 			p->p_vmspace = vm;
44857051fdcSTor Egge 			PROC_VMSPACE_UNLOCK(p);
44957051fdcSTor Egge 			pmap_activate(td);
45057051fdcSTor Egge 		}
45157051fdcSTor Egge 		pmap_remove_pages(vmspace_pmap(vm));
45257051fdcSTor Egge 		PROC_VMSPACE_LOCK(p);
45357051fdcSTor Egge 		p->p_vmspace = &vmspace0;
45457051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
45557051fdcSTor Egge 		pmap_activate(td);
456334f7061SPeter Wemm 		vmspace_dofree(vm);
457334f7061SPeter Wemm 	}
4584b5c9cf6SEdward Tomasz Napierala #ifdef RACCT
4594b5c9cf6SEdward Tomasz Napierala 	if (racct_enable)
4601ba5ad42SEdward Tomasz Napierala 		vmspace_container_reset(p);
4614b5c9cf6SEdward Tomasz Napierala #endif
46257051fdcSTor Egge }
46357051fdcSTor Egge 
46457051fdcSTor Egge /* Acquire reference to vmspace owned by another process. */
46557051fdcSTor Egge 
46657051fdcSTor Egge struct vmspace *
46757051fdcSTor Egge vmspace_acquire_ref(struct proc *p)
46857051fdcSTor Egge {
46957051fdcSTor Egge 	struct vmspace *vm;
47057051fdcSTor Egge 
47157051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
47257051fdcSTor Egge 	vm = p->p_vmspace;
473f7db0c95SMark Johnston 	if (vm == NULL || !refcount_acquire_if_not_zero(&vm->vm_refcnt)) {
47457051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
47557051fdcSTor Egge 		return (NULL);
47657051fdcSTor Egge 	}
47757051fdcSTor Egge 	if (vm != p->p_vmspace) {
47857051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
47957051fdcSTor Egge 		vmspace_free(vm);
48057051fdcSTor Egge 		return (NULL);
48157051fdcSTor Egge 	}
48257051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
48357051fdcSTor Egge 	return (vm);
48457051fdcSTor Egge }
485df8bae1dSRodney W. Grimes 
4868a4dc40fSJohn Baldwin /*
4878a4dc40fSJohn Baldwin  * Switch between vmspaces in an AIO kernel process.
4888a4dc40fSJohn Baldwin  *
4890b96ca33SJohn Baldwin  * The new vmspace is either the vmspace of a user process obtained
4900b96ca33SJohn Baldwin  * from an active AIO request or the initial vmspace of the AIO kernel
4910b96ca33SJohn Baldwin  * process (when it is idling).  Because user processes will block to
4920b96ca33SJohn Baldwin  * drain any active AIO requests before proceeding in exit() or
4930b96ca33SJohn Baldwin  * execve(), the reference count for vmspaces from AIO requests can
4940b96ca33SJohn Baldwin  * never be 0.  Similarly, AIO kernel processes hold an extra
4950b96ca33SJohn Baldwin  * reference on their initial vmspace for the life of the process.  As
4960b96ca33SJohn Baldwin  * a result, the 'newvm' vmspace always has a non-zero reference
4970b96ca33SJohn Baldwin  * count.  This permits an additional reference on 'newvm' to be
4980b96ca33SJohn Baldwin  * acquired via a simple atomic increment rather than the loop in
4990b96ca33SJohn Baldwin  * vmspace_acquire_ref() above.
5008a4dc40fSJohn Baldwin  */
5018a4dc40fSJohn Baldwin void
5028a4dc40fSJohn Baldwin vmspace_switch_aio(struct vmspace *newvm)
5038a4dc40fSJohn Baldwin {
5048a4dc40fSJohn Baldwin 	struct vmspace *oldvm;
5058a4dc40fSJohn Baldwin 
5068a4dc40fSJohn Baldwin 	/* XXX: Need some way to assert that this is an aio daemon. */
5078a4dc40fSJohn Baldwin 
508f7db0c95SMark Johnston 	KASSERT(refcount_load(&newvm->vm_refcnt) > 0,
5098a4dc40fSJohn Baldwin 	    ("vmspace_switch_aio: newvm unreferenced"));
5108a4dc40fSJohn Baldwin 
5118a4dc40fSJohn Baldwin 	oldvm = curproc->p_vmspace;
5128a4dc40fSJohn Baldwin 	if (oldvm == newvm)
5138a4dc40fSJohn Baldwin 		return;
5148a4dc40fSJohn Baldwin 
5158a4dc40fSJohn Baldwin 	/*
5168a4dc40fSJohn Baldwin 	 * Point to the new address space and refer to it.
5178a4dc40fSJohn Baldwin 	 */
5188a4dc40fSJohn Baldwin 	curproc->p_vmspace = newvm;
519f7db0c95SMark Johnston 	refcount_acquire(&newvm->vm_refcnt);
5208a4dc40fSJohn Baldwin 
5218a4dc40fSJohn Baldwin 	/* Activate the new mapping. */
5228a4dc40fSJohn Baldwin 	pmap_activate(curthread);
5238a4dc40fSJohn Baldwin 
5248a4dc40fSJohn Baldwin 	vmspace_free(oldvm);
5258a4dc40fSJohn Baldwin }
5268a4dc40fSJohn Baldwin 
5271b40f8c0SMatthew Dillon void
528780b1c09SAlan Cox _vm_map_lock(vm_map_t map, const char *file, int line)
5291b40f8c0SMatthew Dillon {
530bc91c510SAlan Cox 
53193bc4879SAlan Cox 	if (map->system_map)
532ccdf2333SAttilio Rao 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
53312c64974SMaxime Henrion 	else
5349fde98bbSAttilio Rao 		sx_xlock_(&map->lock, file, line);
5351b40f8c0SMatthew Dillon 	map->timestamp++;
5361b40f8c0SMatthew Dillon }
5371b40f8c0SMatthew Dillon 
53878022527SKonstantin Belousov void
53978022527SKonstantin Belousov vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add)
54078022527SKonstantin Belousov {
54167388836SKonstantin Belousov 	vm_object_t object;
54278022527SKonstantin Belousov 	struct vnode *vp;
54367388836SKonstantin Belousov 	bool vp_held;
54478022527SKonstantin Belousov 
54578022527SKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0)
54678022527SKonstantin Belousov 		return;
54778022527SKonstantin Belousov 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
54878022527SKonstantin Belousov 	    ("Submap with execs"));
54978022527SKonstantin Belousov 	object = entry->object.vm_object;
55078022527SKonstantin Belousov 	KASSERT(object != NULL, ("No object for text, entry %p", entry));
55167388836SKonstantin Belousov 	if ((object->flags & OBJ_ANON) != 0)
55267388836SKonstantin Belousov 		object = object->handle;
55367388836SKonstantin Belousov 	else
55467388836SKonstantin Belousov 		KASSERT(object->backing_object == NULL,
55567388836SKonstantin Belousov 		    ("non-anon object %p shadows", object));
55667388836SKonstantin Belousov 	KASSERT(object != NULL, ("No content object for text, entry %p obj %p",
55767388836SKonstantin Belousov 	    entry, entry->object.vm_object));
55878022527SKonstantin Belousov 
55967388836SKonstantin Belousov 	/*
56067388836SKonstantin Belousov 	 * Mostly, we do not lock the backing object.  It is
56167388836SKonstantin Belousov 	 * referenced by the entry we are processing, so it cannot go
56267388836SKonstantin Belousov 	 * away.
56367388836SKonstantin Belousov 	 */
56432d2014dSKonstantin Belousov 	vp = NULL;
56567388836SKonstantin Belousov 	vp_held = false;
56632d2014dSKonstantin Belousov 	if (object->type == OBJT_DEAD) {
56778022527SKonstantin Belousov 		/*
56878022527SKonstantin Belousov 		 * For OBJT_DEAD objects, v_writecount was handled in
56978022527SKonstantin Belousov 		 * vnode_pager_dealloc().
57078022527SKonstantin Belousov 		 */
57132d2014dSKonstantin Belousov 	} else if (object->type == OBJT_VNODE) {
57232d2014dSKonstantin Belousov 		vp = object->handle;
57332d2014dSKonstantin Belousov 	} else if (object->type == OBJT_SWAP) {
57432d2014dSKonstantin Belousov 		KASSERT((object->flags & OBJ_TMPFS_NODE) != 0,
57532d2014dSKonstantin Belousov 		    ("vm_map_entry_set_vnode_text: swap and !TMPFS "
57632d2014dSKonstantin Belousov 		    "entry %p, object %p, add %d", entry, object, add));
57732d2014dSKonstantin Belousov 		/*
57832d2014dSKonstantin Belousov 		 * Tmpfs VREG node, which was reclaimed, has
57932d2014dSKonstantin Belousov 		 * OBJ_TMPFS_NODE flag set, but not OBJ_TMPFS.  In
58032d2014dSKonstantin Belousov 		 * this case there is no v_writecount to adjust.
58132d2014dSKonstantin Belousov 		 */
58267388836SKonstantin Belousov 		VM_OBJECT_RLOCK(object);
58367388836SKonstantin Belousov 		if ((object->flags & OBJ_TMPFS) != 0) {
58432d2014dSKonstantin Belousov 			vp = object->un_pager.swp.swp_tmpfs;
58567388836SKonstantin Belousov 			if (vp != NULL) {
58667388836SKonstantin Belousov 				vhold(vp);
58767388836SKonstantin Belousov 				vp_held = true;
58867388836SKonstantin Belousov 			}
58967388836SKonstantin Belousov 		}
59067388836SKonstantin Belousov 		VM_OBJECT_RUNLOCK(object);
59132d2014dSKonstantin Belousov 	} else {
59232d2014dSKonstantin Belousov 		KASSERT(0,
59378022527SKonstantin Belousov 		    ("vm_map_entry_set_vnode_text: wrong object type, "
59478022527SKonstantin Belousov 		    "entry %p, object %p, add %d", entry, object, add));
59532d2014dSKonstantin Belousov 	}
59632d2014dSKonstantin Belousov 	if (vp != NULL) {
597bb9e2184SKonstantin Belousov 		if (add) {
59878022527SKonstantin Belousov 			VOP_SET_TEXT_CHECKED(vp);
599bb9e2184SKonstantin Belousov 		} else {
600bb9e2184SKonstantin Belousov 			vn_lock(vp, LK_SHARED | LK_RETRY);
601bb9e2184SKonstantin Belousov 			VOP_UNSET_TEXT_CHECKED(vp);
602b249ce48SMateusz Guzik 			VOP_UNLOCK(vp);
603bb9e2184SKonstantin Belousov 		}
60467388836SKonstantin Belousov 		if (vp_held)
60567388836SKonstantin Belousov 			vdrop(vp);
606bb9e2184SKonstantin Belousov 	}
60778022527SKonstantin Belousov }
60878022527SKonstantin Belousov 
6097cdcf863SDoug Moore /*
6107cdcf863SDoug Moore  * Use a different name for this vm_map_entry field when it's use
6117cdcf863SDoug Moore  * is not consistent with its use as part of an ordered search tree.
6127cdcf863SDoug Moore  */
6137cdcf863SDoug Moore #define defer_next right
6147cdcf863SDoug Moore 
6150b367bd8SKonstantin Belousov static void
6160b367bd8SKonstantin Belousov vm_map_process_deferred(void)
6170e0af8ecSBrian Feldman {
6180b367bd8SKonstantin Belousov 	struct thread *td;
6196fbe60faSJohn Baldwin 	vm_map_entry_t entry, next;
62084110e7eSKonstantin Belousov 	vm_object_t object;
621655c3490SKonstantin Belousov 
6220b367bd8SKonstantin Belousov 	td = curthread;
6236fbe60faSJohn Baldwin 	entry = td->td_map_def_user;
6246fbe60faSJohn Baldwin 	td->td_map_def_user = NULL;
6256fbe60faSJohn Baldwin 	while (entry != NULL) {
6267cdcf863SDoug Moore 		next = entry->defer_next;
627fe7bcbafSKyle Evans 		MPASS((entry->eflags & (MAP_ENTRY_WRITECNT |
628fe7bcbafSKyle Evans 		    MAP_ENTRY_VN_EXEC)) != (MAP_ENTRY_WRITECNT |
62978022527SKonstantin Belousov 		    MAP_ENTRY_VN_EXEC));
630fe7bcbafSKyle Evans 		if ((entry->eflags & MAP_ENTRY_WRITECNT) != 0) {
63184110e7eSKonstantin Belousov 			/*
63284110e7eSKonstantin Belousov 			 * Decrement the object's writemappings and
63384110e7eSKonstantin Belousov 			 * possibly the vnode's v_writecount.
63484110e7eSKonstantin Belousov 			 */
63584110e7eSKonstantin Belousov 			KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
63684110e7eSKonstantin Belousov 			    ("Submap with writecount"));
63784110e7eSKonstantin Belousov 			object = entry->object.vm_object;
63884110e7eSKonstantin Belousov 			KASSERT(object != NULL, ("No object for writecount"));
639fe7bcbafSKyle Evans 			vm_pager_release_writecount(object, entry->start,
64084110e7eSKonstantin Belousov 			    entry->end);
64184110e7eSKonstantin Belousov 		}
64278022527SKonstantin Belousov 		vm_map_entry_set_vnode_text(entry, false);
6430b367bd8SKonstantin Belousov 		vm_map_entry_deallocate(entry, FALSE);
6446fbe60faSJohn Baldwin 		entry = next;
6450b367bd8SKonstantin Belousov 	}
6460b367bd8SKonstantin Belousov }
6470b367bd8SKonstantin Belousov 
648461587dcSDoug Moore #ifdef INVARIANTS
649461587dcSDoug Moore static void
650461587dcSDoug Moore _vm_map_assert_locked(vm_map_t map, const char *file, int line)
651461587dcSDoug Moore {
652461587dcSDoug Moore 
653461587dcSDoug Moore 	if (map->system_map)
654461587dcSDoug Moore 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
655461587dcSDoug Moore 	else
656461587dcSDoug Moore 		sx_assert_(&map->lock, SA_XLOCKED, file, line);
657461587dcSDoug Moore }
658461587dcSDoug Moore 
659461587dcSDoug Moore #define	VM_MAP_ASSERT_LOCKED(map) \
660461587dcSDoug Moore     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
661461587dcSDoug Moore 
662461587dcSDoug Moore enum { VMMAP_CHECK_NONE, VMMAP_CHECK_UNLOCK, VMMAP_CHECK_ALL };
663461587dcSDoug Moore #ifdef DIAGNOSTIC
664461587dcSDoug Moore static int enable_vmmap_check = VMMAP_CHECK_UNLOCK;
665461587dcSDoug Moore #else
666461587dcSDoug Moore static int enable_vmmap_check = VMMAP_CHECK_NONE;
667461587dcSDoug Moore #endif
668461587dcSDoug Moore SYSCTL_INT(_debug, OID_AUTO, vmmap_check, CTLFLAG_RWTUN,
669461587dcSDoug Moore     &enable_vmmap_check, 0, "Enable vm map consistency checking");
670461587dcSDoug Moore 
671461587dcSDoug Moore static void _vm_map_assert_consistent(vm_map_t map, int check);
672461587dcSDoug Moore 
673461587dcSDoug Moore #define VM_MAP_ASSERT_CONSISTENT(map) \
674461587dcSDoug Moore     _vm_map_assert_consistent(map, VMMAP_CHECK_ALL)
675461587dcSDoug Moore #ifdef DIAGNOSTIC
676461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map) do {				\
677461587dcSDoug Moore 	if (map->nupdates > map->nentries) {				\
678461587dcSDoug Moore 		_vm_map_assert_consistent(map, VMMAP_CHECK_UNLOCK);	\
679461587dcSDoug Moore 		map->nupdates = 0;					\
680461587dcSDoug Moore 	}								\
681461587dcSDoug Moore } while (0)
682461587dcSDoug Moore #else
683461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map)
684461587dcSDoug Moore #endif
685461587dcSDoug Moore #else
686461587dcSDoug Moore #define	VM_MAP_ASSERT_LOCKED(map)
687461587dcSDoug Moore #define VM_MAP_ASSERT_CONSISTENT(map)
688461587dcSDoug Moore #define VM_MAP_UNLOCK_CONSISTENT(map)
689461587dcSDoug Moore #endif /* INVARIANTS */
690461587dcSDoug Moore 
6910b367bd8SKonstantin Belousov void
6920b367bd8SKonstantin Belousov _vm_map_unlock(vm_map_t map, const char *file, int line)
6930b367bd8SKonstantin Belousov {
6940b367bd8SKonstantin Belousov 
695461587dcSDoug Moore 	VM_MAP_UNLOCK_CONSISTENT(map);
69620f02659SMark Johnston 	if (map->system_map) {
69720f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC
69820f02659SMark Johnston 		if (map == kernel_map && (map->flags & MAP_REPLENISH) != 0) {
69920f02659SMark Johnston 			uma_prealloc(kmapentzone, 1);
70020f02659SMark Johnston 			map->flags &= ~MAP_REPLENISH;
70120f02659SMark Johnston 		}
70220f02659SMark Johnston #endif
703ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
70420f02659SMark Johnston 	} else {
7059fde98bbSAttilio Rao 		sx_xunlock_(&map->lock, file, line);
7060b367bd8SKonstantin Belousov 		vm_map_process_deferred();
707655c3490SKonstantin Belousov 	}
7080e0af8ecSBrian Feldman }
7090e0af8ecSBrian Feldman 
7100e0af8ecSBrian Feldman void
711780b1c09SAlan Cox _vm_map_lock_read(vm_map_t map, const char *file, int line)
7120e0af8ecSBrian Feldman {
713bc91c510SAlan Cox 
71493bc4879SAlan Cox 	if (map->system_map)
715ccdf2333SAttilio Rao 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
71612c64974SMaxime Henrion 	else
7179fde98bbSAttilio Rao 		sx_slock_(&map->lock, file, line);
71836daaecdSAlan Cox }
7190e0af8ecSBrian Feldman 
7200e0af8ecSBrian Feldman void
721780b1c09SAlan Cox _vm_map_unlock_read(vm_map_t map, const char *file, int line)
7220e0af8ecSBrian Feldman {
723bc91c510SAlan Cox 
72420f02659SMark Johnston 	if (map->system_map) {
72520f02659SMark Johnston 		KASSERT((map->flags & MAP_REPLENISH) == 0,
72620f02659SMark Johnston 		    ("%s: MAP_REPLENISH leaked", __func__));
727ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
72820f02659SMark Johnston 	} else {
7299fde98bbSAttilio Rao 		sx_sunlock_(&map->lock, file, line);
7300b367bd8SKonstantin Belousov 		vm_map_process_deferred();
7310b367bd8SKonstantin Belousov 	}
73225adb370SBrian Feldman }
73325adb370SBrian Feldman 
734d974f03cSAlan Cox int
735780b1c09SAlan Cox _vm_map_trylock(vm_map_t map, const char *file, int line)
736d974f03cSAlan Cox {
73725adb370SBrian Feldman 	int error;
73825adb370SBrian Feldman 
73936daaecdSAlan Cox 	error = map->system_map ?
740ccdf2333SAttilio Rao 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
7419fde98bbSAttilio Rao 	    !sx_try_xlock_(&map->lock, file, line);
7423a92e5d5SAlan Cox 	if (error == 0)
7433a92e5d5SAlan Cox 		map->timestamp++;
744bc91c510SAlan Cox 	return (error == 0);
7450e0af8ecSBrian Feldman }
7460e0af8ecSBrian Feldman 
7470e0af8ecSBrian Feldman int
74872d97679SDavid Schultz _vm_map_trylock_read(vm_map_t map, const char *file, int line)
74972d97679SDavid Schultz {
75072d97679SDavid Schultz 	int error;
75172d97679SDavid Schultz 
75272d97679SDavid Schultz 	error = map->system_map ?
753ccdf2333SAttilio Rao 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
7549fde98bbSAttilio Rao 	    !sx_try_slock_(&map->lock, file, line);
75572d97679SDavid Schultz 	return (error == 0);
75672d97679SDavid Schultz }
75772d97679SDavid Schultz 
75805a8c414SAlan Cox /*
75905a8c414SAlan Cox  *	_vm_map_lock_upgrade:	[ internal use only ]
76005a8c414SAlan Cox  *
76105a8c414SAlan Cox  *	Tries to upgrade a read (shared) lock on the specified map to a write
76205a8c414SAlan Cox  *	(exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
76305a8c414SAlan Cox  *	non-zero value if the upgrade fails.  If the upgrade fails, the map is
76405a8c414SAlan Cox  *	returned without a read or write lock held.
76505a8c414SAlan Cox  *
76605a8c414SAlan Cox  *	Requires that the map be read locked.
76705a8c414SAlan Cox  */
76872d97679SDavid Schultz int
769780b1c09SAlan Cox _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
7700e0af8ecSBrian Feldman {
77105a8c414SAlan Cox 	unsigned int last_timestamp;
772bc91c510SAlan Cox 
77312c64974SMaxime Henrion 	if (map->system_map) {
774ccdf2333SAttilio Rao 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
77505a8c414SAlan Cox 	} else {
7769fde98bbSAttilio Rao 		if (!sx_try_upgrade_(&map->lock, file, line)) {
77705a8c414SAlan Cox 			last_timestamp = map->timestamp;
7789fde98bbSAttilio Rao 			sx_sunlock_(&map->lock, file, line);
7790b367bd8SKonstantin Belousov 			vm_map_process_deferred();
78005a8c414SAlan Cox 			/*
78105a8c414SAlan Cox 			 * If the map's timestamp does not change while the
78205a8c414SAlan Cox 			 * map is unlocked, then the upgrade succeeds.
78305a8c414SAlan Cox 			 */
7849fde98bbSAttilio Rao 			sx_xlock_(&map->lock, file, line);
78505a8c414SAlan Cox 			if (last_timestamp != map->timestamp) {
7869fde98bbSAttilio Rao 				sx_xunlock_(&map->lock, file, line);
78705a8c414SAlan Cox 				return (1);
78805a8c414SAlan Cox 			}
78905a8c414SAlan Cox 		}
79005a8c414SAlan Cox 	}
791bc91c510SAlan Cox 	map->timestamp++;
792bc91c510SAlan Cox 	return (0);
7930e0af8ecSBrian Feldman }
7940e0af8ecSBrian Feldman 
7950e0af8ecSBrian Feldman void
796780b1c09SAlan Cox _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
7971b40f8c0SMatthew Dillon {
798bc91c510SAlan Cox 
79912c64974SMaxime Henrion 	if (map->system_map) {
80020f02659SMark Johnston 		KASSERT((map->flags & MAP_REPLENISH) == 0,
80120f02659SMark Johnston 		    ("%s: MAP_REPLENISH leaked", __func__));
802ccdf2333SAttilio Rao 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
803461587dcSDoug Moore 	} else {
804461587dcSDoug Moore 		VM_MAP_UNLOCK_CONSISTENT(map);
8059fde98bbSAttilio Rao 		sx_downgrade_(&map->lock, file, line);
80605a8c414SAlan Cox 	}
807461587dcSDoug Moore }
80805a8c414SAlan Cox 
80905a8c414SAlan Cox /*
81005a8c414SAlan Cox  *	vm_map_locked:
81105a8c414SAlan Cox  *
81205a8c414SAlan Cox  *	Returns a non-zero value if the caller holds a write (exclusive) lock
81305a8c414SAlan Cox  *	on the specified map and the value "0" otherwise.
81405a8c414SAlan Cox  */
81505a8c414SAlan Cox int
81605a8c414SAlan Cox vm_map_locked(vm_map_t map)
81705a8c414SAlan Cox {
81805a8c414SAlan Cox 
81905a8c414SAlan Cox 	if (map->system_map)
82005a8c414SAlan Cox 		return (mtx_owned(&map->system_mtx));
82105a8c414SAlan Cox 	else
82205a8c414SAlan Cox 		return (sx_xlocked(&map->lock));
82325adb370SBrian Feldman }
82425adb370SBrian Feldman 
825acd9a301SAlan Cox /*
8268304adaaSAlan Cox  *	_vm_map_unlock_and_wait:
8278304adaaSAlan Cox  *
8288304adaaSAlan Cox  *	Atomically releases the lock on the specified map and puts the calling
8298304adaaSAlan Cox  *	thread to sleep.  The calling thread will remain asleep until either
8308304adaaSAlan Cox  *	vm_map_wakeup() is performed on the map or the specified timeout is
8318304adaaSAlan Cox  *	exceeded.
8328304adaaSAlan Cox  *
8338304adaaSAlan Cox  *	WARNING!  This function does not perform deferred deallocations of
8348304adaaSAlan Cox  *	objects and map	entries.  Therefore, the calling thread is expected to
8358304adaaSAlan Cox  *	reacquire the map lock after reawakening and later perform an ordinary
8368304adaaSAlan Cox  *	unlock operation, such as vm_map_unlock(), before completing its
8378304adaaSAlan Cox  *	operation on the map.
838acd9a301SAlan Cox  */
8399688f931SAlan Cox int
8408304adaaSAlan Cox _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
841acd9a301SAlan Cox {
842acd9a301SAlan Cox 
843461587dcSDoug Moore 	VM_MAP_UNLOCK_CONSISTENT(map);
8443a92e5d5SAlan Cox 	mtx_lock(&map_sleep_mtx);
84520f02659SMark Johnston 	if (map->system_map) {
84620f02659SMark Johnston 		KASSERT((map->flags & MAP_REPLENISH) == 0,
84720f02659SMark Johnston 		    ("%s: MAP_REPLENISH leaked", __func__));
848ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
84920f02659SMark Johnston 	} else {
8509fde98bbSAttilio Rao 		sx_xunlock_(&map->lock, file, line);
85120f02659SMark Johnston 	}
8528304adaaSAlan Cox 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
8538304adaaSAlan Cox 	    timo));
854acd9a301SAlan Cox }
855acd9a301SAlan Cox 
856acd9a301SAlan Cox /*
857acd9a301SAlan Cox  *	vm_map_wakeup:
8588304adaaSAlan Cox  *
8598304adaaSAlan Cox  *	Awaken any threads that have slept on the map using
8608304adaaSAlan Cox  *	vm_map_unlock_and_wait().
861acd9a301SAlan Cox  */
8629688f931SAlan Cox void
863acd9a301SAlan Cox vm_map_wakeup(vm_map_t map)
864acd9a301SAlan Cox {
865acd9a301SAlan Cox 
866b49ecb86SAlan Cox 	/*
8673a92e5d5SAlan Cox 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
8688304adaaSAlan Cox 	 * from being performed (and lost) between the map unlock
8698304adaaSAlan Cox 	 * and the msleep() in _vm_map_unlock_and_wait().
870b49ecb86SAlan Cox 	 */
8713a92e5d5SAlan Cox 	mtx_lock(&map_sleep_mtx);
8723a92e5d5SAlan Cox 	mtx_unlock(&map_sleep_mtx);
873acd9a301SAlan Cox 	wakeup(&map->root);
874acd9a301SAlan Cox }
875acd9a301SAlan Cox 
876a5db445dSMax Laier void
877a5db445dSMax Laier vm_map_busy(vm_map_t map)
878a5db445dSMax Laier {
879a5db445dSMax Laier 
880a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
881a5db445dSMax Laier 	map->busy++;
882a5db445dSMax Laier }
883a5db445dSMax Laier 
884a5db445dSMax Laier void
885a5db445dSMax Laier vm_map_unbusy(vm_map_t map)
886a5db445dSMax Laier {
887a5db445dSMax Laier 
888a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
889a5db445dSMax Laier 	KASSERT(map->busy, ("vm_map_unbusy: not busy"));
890a5db445dSMax Laier 	if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
891a5db445dSMax Laier 		vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
892a5db445dSMax Laier 		wakeup(&map->busy);
893a5db445dSMax Laier 	}
894a5db445dSMax Laier }
895a5db445dSMax Laier 
896a5db445dSMax Laier void
897a5db445dSMax Laier vm_map_wait_busy(vm_map_t map)
898a5db445dSMax Laier {
899a5db445dSMax Laier 
900a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
901a5db445dSMax Laier 	while (map->busy) {
902a5db445dSMax Laier 		vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
903a5db445dSMax Laier 		if (map->system_map)
904a5db445dSMax Laier 			msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
905a5db445dSMax Laier 		else
906a5db445dSMax Laier 			sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
907a5db445dSMax Laier 	}
908a5db445dSMax Laier 	map->timestamp++;
909a5db445dSMax Laier }
910a5db445dSMax Laier 
9111b40f8c0SMatthew Dillon long
9121b40f8c0SMatthew Dillon vmspace_resident_count(struct vmspace *vmspace)
9131b40f8c0SMatthew Dillon {
9141b40f8c0SMatthew Dillon 	return pmap_resident_count(vmspace_pmap(vmspace));
9151b40f8c0SMatthew Dillon }
9161b40f8c0SMatthew Dillon 
917ff2b5645SMatthew Dillon /*
918df8bae1dSRodney W. Grimes  * Initialize an existing vm_map structure
919df8bae1dSRodney W. Grimes  * such as that in the vmspace structure.
920df8bae1dSRodney W. Grimes  */
9218355f576SJeff Roberson static void
92292351f16SAlan Cox _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
923df8bae1dSRodney W. Grimes {
92421c641b2SJohn Baldwin 
9252203c46dSMark Johnston 	map->header.eflags = MAP_ENTRY_HEADER;
9269688f931SAlan Cox 	map->needs_wakeup = FALSE;
9273075778bSJohn Dyson 	map->system_map = 0;
92892351f16SAlan Cox 	map->pmap = pmap;
929f0165b1cSKonstantin Belousov 	map->header.end = min;
930f0165b1cSKonstantin Belousov 	map->header.start = max;
931af7cd0c5SBrian Feldman 	map->flags = 0;
932c1ad5342SDoug Moore 	map->header.left = map->header.right = &map->header;
9334e94f402SAlan Cox 	map->root = NULL;
934df8bae1dSRodney W. Grimes 	map->timestamp = 0;
935a5db445dSMax Laier 	map->busy = 0;
936fa50a355SKonstantin Belousov 	map->anon_loc = 0;
937461587dcSDoug Moore #ifdef DIAGNOSTIC
938461587dcSDoug Moore 	map->nupdates = 0;
939461587dcSDoug Moore #endif
940df8bae1dSRodney W. Grimes }
941df8bae1dSRodney W. Grimes 
942a18b1f1dSJason Evans void
94392351f16SAlan Cox vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
944a18b1f1dSJason Evans {
94592351f16SAlan Cox 
94692351f16SAlan Cox 	_vm_map_init(map, pmap, min, max);
9477dd979dfSMark Johnston 	mtx_init(&map->system_mtx, "vm map (system)", NULL,
9487dd979dfSMark Johnston 	    MTX_DEF | MTX_DUPOK);
9497dd979dfSMark Johnston 	sx_init(&map->lock, "vm map (user)");
950a18b1f1dSJason Evans }
951a18b1f1dSJason Evans 
952df8bae1dSRodney W. Grimes /*
953b18bfc3dSJohn Dyson  *	vm_map_entry_dispose:	[ internal use only ]
954b18bfc3dSJohn Dyson  *
955b18bfc3dSJohn Dyson  *	Inverse of vm_map_entry_create.
956b18bfc3dSJohn Dyson  */
95762487bb4SJohn Dyson static void
9581b40f8c0SMatthew Dillon vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
959b18bfc3dSJohn Dyson {
9602b4a2c27SAlan Cox 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
961b18bfc3dSJohn Dyson }
962b18bfc3dSJohn Dyson 
963b18bfc3dSJohn Dyson /*
964df8bae1dSRodney W. Grimes  *	vm_map_entry_create:	[ internal use only ]
965df8bae1dSRodney W. Grimes  *
966df8bae1dSRodney W. Grimes  *	Allocates a VM map entry for insertion.
967b28cb1caSAlfred Perlstein  *	No entry fields are filled in.
968df8bae1dSRodney W. Grimes  */
969f708ef1bSPoul-Henning Kamp static vm_map_entry_t
9701b40f8c0SMatthew Dillon vm_map_entry_create(vm_map_t map)
971df8bae1dSRodney W. Grimes {
9721f6889a1SMatthew Dillon 	vm_map_entry_t new_entry;
9731f6889a1SMatthew Dillon 
97420f02659SMark Johnston #ifndef UMA_MD_SMALL_ALLOC
97520f02659SMark Johnston 	if (map == kernel_map) {
97620f02659SMark Johnston 		VM_MAP_ASSERT_LOCKED(map);
97720f02659SMark Johnston 
97820f02659SMark Johnston 		/*
97920f02659SMark Johnston 		 * A new slab of kernel map entries cannot be allocated at this
98020f02659SMark Johnston 		 * point because the kernel map has not yet been updated to
98120f02659SMark Johnston 		 * reflect the caller's request.  Therefore, we allocate a new
98220f02659SMark Johnston 		 * map entry, dipping into the reserve if necessary, and set a
98320f02659SMark Johnston 		 * flag indicating that the reserve must be replenished before
98420f02659SMark Johnston 		 * the map is unlocked.
98520f02659SMark Johnston 		 */
98620f02659SMark Johnston 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT | M_NOVM);
98720f02659SMark Johnston 		if (new_entry == NULL) {
98820f02659SMark Johnston 			new_entry = uma_zalloc(kmapentzone,
98920f02659SMark Johnston 			    M_NOWAIT | M_NOVM | M_USE_RESERVE);
99020f02659SMark Johnston 			kernel_map->flags |= MAP_REPLENISH;
99120f02659SMark Johnston 		}
99220f02659SMark Johnston 	} else
99320f02659SMark Johnston #endif
99420f02659SMark Johnston 	if (map->system_map) {
9952b4a2c27SAlan Cox 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
99620f02659SMark Johnston 	} else {
997a163d034SWarner Losh 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
99820f02659SMark Johnston 	}
99920f02659SMark Johnston 	KASSERT(new_entry != NULL,
100020f02659SMark Johnston 	    ("vm_map_entry_create: kernel resources exhausted"));
10011f6889a1SMatthew Dillon 	return (new_entry);
1002df8bae1dSRodney W. Grimes }
1003df8bae1dSRodney W. Grimes 
1004df8bae1dSRodney W. Grimes /*
1005794316a8SAlan Cox  *	vm_map_entry_set_behavior:
1006794316a8SAlan Cox  *
1007794316a8SAlan Cox  *	Set the expected access behavior, either normal, random, or
1008794316a8SAlan Cox  *	sequential.
1009794316a8SAlan Cox  */
101062a59e8fSWarner Losh static inline void
1011794316a8SAlan Cox vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
1012794316a8SAlan Cox {
1013794316a8SAlan Cox 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
1014794316a8SAlan Cox 	    (behavior & MAP_ENTRY_BEHAV_MASK);
1015794316a8SAlan Cox }
1016794316a8SAlan Cox 
1017794316a8SAlan Cox /*
10185a0879daSDoug Moore  *	vm_map_entry_max_free_{left,right}:
10190164e057SAlan Cox  *
10205a0879daSDoug Moore  *	Compute the size of the largest free gap between two entries,
10215a0879daSDoug Moore  *	one the root of a tree and the other the ancestor of that root
10225a0879daSDoug Moore  *	that is the least or greatest ancestor found on the search path.
10230164e057SAlan Cox  */
10245a0879daSDoug Moore static inline vm_size_t
10255a0879daSDoug Moore vm_map_entry_max_free_left(vm_map_entry_t root, vm_map_entry_t left_ancestor)
10260164e057SAlan Cox {
10270164e057SAlan Cox 
1028c1ad5342SDoug Moore 	return (root->left != left_ancestor ?
10295a0879daSDoug Moore 	    root->left->max_free : root->start - left_ancestor->end);
10305a0879daSDoug Moore }
10315a0879daSDoug Moore 
10325a0879daSDoug Moore static inline vm_size_t
10335a0879daSDoug Moore vm_map_entry_max_free_right(vm_map_entry_t root, vm_map_entry_t right_ancestor)
10345a0879daSDoug Moore {
10355a0879daSDoug Moore 
1036c1ad5342SDoug Moore 	return (root->right != right_ancestor ?
10375a0879daSDoug Moore 	    root->right->max_free : right_ancestor->start - root->end);
10380164e057SAlan Cox }
10390164e057SAlan Cox 
104083704cc2SDoug Moore /*
104183704cc2SDoug Moore  *	vm_map_entry_{pred,succ}:
104283704cc2SDoug Moore  *
104383704cc2SDoug Moore  *	Find the {predecessor, successor} of the entry by taking one step
104483704cc2SDoug Moore  *	in the appropriate direction and backtracking as much as necessary.
1045c1ad5342SDoug Moore  *	vm_map_entry_succ is defined in vm_map.h.
104683704cc2SDoug Moore  */
104783704cc2SDoug Moore static inline vm_map_entry_t
104883704cc2SDoug Moore vm_map_entry_pred(vm_map_entry_t entry)
104983704cc2SDoug Moore {
1050c1ad5342SDoug Moore 	vm_map_entry_t prior;
105183704cc2SDoug Moore 
1052c1ad5342SDoug Moore 	prior = entry->left;
1053c1ad5342SDoug Moore 	if (prior->right->start < entry->start) {
1054c1ad5342SDoug Moore 		do
1055c1ad5342SDoug Moore 			prior = prior->right;
1056c1ad5342SDoug Moore 		while (prior->right != entry);
105783704cc2SDoug Moore 	}
1058c1ad5342SDoug Moore 	return (prior);
1059c1ad5342SDoug Moore }
106083704cc2SDoug Moore 
106185b7bedbSDoug Moore static inline vm_size_t
106285b7bedbSDoug Moore vm_size_max(vm_size_t a, vm_size_t b)
106385b7bedbSDoug Moore {
106485b7bedbSDoug Moore 
106585b7bedbSDoug Moore 	return (a > b ? a : b);
106685b7bedbSDoug Moore }
106785b7bedbSDoug Moore 
1068c1ad5342SDoug Moore #define SPLAY_LEFT_STEP(root, y, llist, rlist, test) do {		\
1069c1ad5342SDoug Moore 	vm_map_entry_t z;						\
10705a0879daSDoug Moore 	vm_size_t max_free;						\
10715a0879daSDoug Moore 									\
10725a0879daSDoug Moore 	/*								\
10735a0879daSDoug Moore 	 * Infer root->right->max_free == root->max_free when		\
10745a0879daSDoug Moore 	 * y->max_free < root->max_free || root->max_free == 0.		\
10755a0879daSDoug Moore 	 * Otherwise, look right to find it.				\
10765a0879daSDoug Moore 	 */								\
10779f701172SKonstantin Belousov 	y = root->left;							\
10785a0879daSDoug Moore 	max_free = root->max_free;					\
1079668a8aa8SDoug Moore 	KASSERT(max_free == vm_size_max(				\
1080668a8aa8SDoug Moore 	    vm_map_entry_max_free_left(root, llist),			\
1081668a8aa8SDoug Moore 	    vm_map_entry_max_free_right(root, rlist)),			\
10825a0879daSDoug Moore 	    ("%s: max_free invariant fails", __func__));		\
1083668a8aa8SDoug Moore 	if (max_free - 1 < vm_map_entry_max_free_left(root, llist))	\
10845a0879daSDoug Moore 		max_free = vm_map_entry_max_free_right(root, rlist);	\
1085c1ad5342SDoug Moore 	if (y != llist && (test)) {					\
10869f701172SKonstantin Belousov 		/* Rotate right and make y root. */			\
1087c1ad5342SDoug Moore 		z = y->right;						\
1088c1ad5342SDoug Moore 		if (z != root) {					\
1089c1ad5342SDoug Moore 			root->left = z;					\
10909f701172SKonstantin Belousov 			y->right = root;				\
10915a0879daSDoug Moore 			if (max_free < y->max_free)			\
109285b7bedbSDoug Moore 			    root->max_free = max_free =			\
1093c1ad5342SDoug Moore 			    vm_size_max(max_free, z->max_free);		\
1094c1ad5342SDoug Moore 		} else if (max_free < y->max_free)			\
1095c1ad5342SDoug Moore 			root->max_free = max_free =			\
1096c1ad5342SDoug Moore 			    vm_size_max(max_free, root->start - y->end);\
10979f701172SKonstantin Belousov 		root = y;						\
10989f701172SKonstantin Belousov 		y = root->left;						\
10999f701172SKonstantin Belousov 	}								\
11005a0879daSDoug Moore 	/* Copy right->max_free.  Put root on rlist. */			\
11015a0879daSDoug Moore 	root->max_free = max_free;					\
11025a0879daSDoug Moore 	KASSERT(max_free == vm_map_entry_max_free_right(root, rlist),	\
11035a0879daSDoug Moore 	    ("%s: max_free not copied from right", __func__));		\
11049f701172SKonstantin Belousov 	root->left = rlist;						\
11059f701172SKonstantin Belousov 	rlist = root;							\
1106c1ad5342SDoug Moore 	root = y != llist ? y : NULL;					\
11079f701172SKonstantin Belousov } while (0)
11089f701172SKonstantin Belousov 
1109c1ad5342SDoug Moore #define SPLAY_RIGHT_STEP(root, y, llist, rlist, test) do {		\
1110c1ad5342SDoug Moore 	vm_map_entry_t z;						\
11115a0879daSDoug Moore 	vm_size_t max_free;						\
11125a0879daSDoug Moore 									\
11135a0879daSDoug Moore 	/*								\
11145a0879daSDoug Moore 	 * Infer root->left->max_free == root->max_free when		\
11155a0879daSDoug Moore 	 * y->max_free < root->max_free || root->max_free == 0.		\
11165a0879daSDoug Moore 	 * Otherwise, look left to find it.				\
11175a0879daSDoug Moore 	 */								\
11189f701172SKonstantin Belousov 	y = root->right;						\
11195a0879daSDoug Moore 	max_free = root->max_free;					\
1120668a8aa8SDoug Moore 	KASSERT(max_free == vm_size_max(				\
1121668a8aa8SDoug Moore 	    vm_map_entry_max_free_left(root, llist),			\
1122668a8aa8SDoug Moore 	    vm_map_entry_max_free_right(root, rlist)),			\
11235a0879daSDoug Moore 	    ("%s: max_free invariant fails", __func__));		\
1124668a8aa8SDoug Moore 	if (max_free - 1 < vm_map_entry_max_free_right(root, rlist))	\
11255a0879daSDoug Moore 		max_free = vm_map_entry_max_free_left(root, llist);	\
1126c1ad5342SDoug Moore 	if (y != rlist && (test)) {					\
11279f701172SKonstantin Belousov 		/* Rotate left and make y root. */			\
1128c1ad5342SDoug Moore 		z = y->left;						\
1129c1ad5342SDoug Moore 		if (z != root) {					\
1130c1ad5342SDoug Moore 			root->right = z;				\
11319f701172SKonstantin Belousov 			y->left = root;					\
11325a0879daSDoug Moore 			if (max_free < y->max_free)			\
113385b7bedbSDoug Moore 			    root->max_free = max_free =			\
1134c1ad5342SDoug Moore 			    vm_size_max(max_free, z->max_free);		\
1135c1ad5342SDoug Moore 		} else if (max_free < y->max_free)			\
1136c1ad5342SDoug Moore 			root->max_free = max_free =			\
1137c1ad5342SDoug Moore 			    vm_size_max(max_free, y->start - root->end);\
11389f701172SKonstantin Belousov 		root = y;						\
11399f701172SKonstantin Belousov 		y = root->right;					\
11409f701172SKonstantin Belousov 	}								\
11415a0879daSDoug Moore 	/* Copy left->max_free.  Put root on llist. */			\
11425a0879daSDoug Moore 	root->max_free = max_free;					\
11435a0879daSDoug Moore 	KASSERT(max_free == vm_map_entry_max_free_left(root, llist),	\
11445a0879daSDoug Moore 	    ("%s: max_free not copied from left", __func__));		\
11459f701172SKonstantin Belousov 	root->right = llist;						\
11469f701172SKonstantin Belousov 	llist = root;							\
1147c1ad5342SDoug Moore 	root = y != rlist ? y : NULL;					\
11489f701172SKonstantin Belousov } while (0)
11499f701172SKonstantin Belousov 
11500164e057SAlan Cox /*
1151c1ad5342SDoug Moore  * Walk down the tree until we find addr or a gap where addr would go, breaking
1152c1ad5342SDoug Moore  * off left and right subtrees of nodes less than, or greater than addr.  Treat
1153c1ad5342SDoug Moore  * subtrees with root->max_free < length as empty trees.  llist and rlist are
1154c1ad5342SDoug Moore  * the two sides in reverse order (bottom-up), with llist linked by the right
1155c1ad5342SDoug Moore  * pointer and rlist linked by the left pointer in the vm_map_entry, and both
1156c1ad5342SDoug Moore  * lists terminated by &map->header.  This function, and the subsequent call to
1157c1ad5342SDoug Moore  * vm_map_splay_merge_{left,right,pred,succ}, rely on the start and end address
11585a0879daSDoug Moore  * values in &map->header.
11594e94f402SAlan Cox  */
11601867d2f2SDoug Moore static __always_inline vm_map_entry_t
11615a0879daSDoug Moore vm_map_splay_split(vm_map_t map, vm_offset_t addr, vm_size_t length,
11621867d2f2SDoug Moore     vm_map_entry_t *llist, vm_map_entry_t *rlist)
11634e94f402SAlan Cox {
1164c1ad5342SDoug Moore 	vm_map_entry_t left, right, root, y;
11654e94f402SAlan Cox 
1166c1ad5342SDoug Moore 	left = right = &map->header;
11675a0879daSDoug Moore 	root = map->root;
11689f701172SKonstantin Belousov 	while (root != NULL && root->max_free >= length) {
1169c1ad5342SDoug Moore 		KASSERT(left->end <= root->start &&
1170c1ad5342SDoug Moore 		    root->end <= right->start,
11715a0879daSDoug Moore 		    ("%s: root not within tree bounds", __func__));
11720164e057SAlan Cox 		if (addr < root->start) {
1173c1ad5342SDoug Moore 			SPLAY_LEFT_STEP(root, y, left, right,
11749f701172SKonstantin Belousov 			    y->max_free >= length && addr < y->start);
11757438d60bSAlan Cox 		} else if (addr >= root->end) {
1176c1ad5342SDoug Moore 			SPLAY_RIGHT_STEP(root, y, left, right,
11779f701172SKonstantin Belousov 			    y->max_free >= length && addr >= y->end);
11787438d60bSAlan Cox 		} else
11797438d60bSAlan Cox 			break;
11800164e057SAlan Cox 	}
1181c1ad5342SDoug Moore 	*llist = left;
1182c1ad5342SDoug Moore 	*rlist = right;
11839f701172SKonstantin Belousov 	return (root);
11849f701172SKonstantin Belousov }
11859f701172SKonstantin Belousov 
11861867d2f2SDoug Moore static __always_inline void
11871867d2f2SDoug Moore vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *rlist)
11889f701172SKonstantin Belousov {
1189c1ad5342SDoug Moore 	vm_map_entry_t hi, right, y;
11909f701172SKonstantin Belousov 
1191c1ad5342SDoug Moore 	right = *rlist;
1192c1ad5342SDoug Moore 	hi = root->right == right ? NULL : root->right;
1193c1ad5342SDoug Moore 	if (hi == NULL)
1194c1ad5342SDoug Moore 		return;
1195c1ad5342SDoug Moore 	do
1196c1ad5342SDoug Moore 		SPLAY_LEFT_STEP(hi, y, root, right, true);
1197c1ad5342SDoug Moore 	while (hi != NULL);
1198c1ad5342SDoug Moore 	*rlist = right;
11999f701172SKonstantin Belousov }
12009f701172SKonstantin Belousov 
12011867d2f2SDoug Moore static __always_inline void
12021867d2f2SDoug Moore vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *llist)
12039f701172SKonstantin Belousov {
1204c1ad5342SDoug Moore 	vm_map_entry_t left, lo, y;
12059f701172SKonstantin Belousov 
1206c1ad5342SDoug Moore 	left = *llist;
1207c1ad5342SDoug Moore 	lo = root->left == left ? NULL : root->left;
1208c1ad5342SDoug Moore 	if (lo == NULL)
1209c1ad5342SDoug Moore 		return;
1210c1ad5342SDoug Moore 	do
1211c1ad5342SDoug Moore 		SPLAY_RIGHT_STEP(lo, y, left, root, true);
1212c1ad5342SDoug Moore 	while (lo != NULL);
1213c1ad5342SDoug Moore 	*llist = left;
12149f701172SKonstantin Belousov }
12150164e057SAlan Cox 
12165a0879daSDoug Moore static inline void
12175a0879daSDoug Moore vm_map_entry_swap(vm_map_entry_t *a, vm_map_entry_t *b)
12185a0879daSDoug Moore {
12195a0879daSDoug Moore 	vm_map_entry_t tmp;
12205a0879daSDoug Moore 
12215a0879daSDoug Moore 	tmp = *b;
12225a0879daSDoug Moore 	*b = *a;
12235a0879daSDoug Moore 	*a = tmp;
12245a0879daSDoug Moore }
12255a0879daSDoug Moore 
12260164e057SAlan Cox /*
12279f701172SKonstantin Belousov  * Walk back up the two spines, flip the pointers and set max_free.  The
12289f701172SKonstantin Belousov  * subtrees of the root go at the bottom of llist and rlist.
12290164e057SAlan Cox  */
123085b7bedbSDoug Moore static vm_size_t
123185b7bedbSDoug Moore vm_map_splay_merge_left_walk(vm_map_entry_t header, vm_map_entry_t root,
123285b7bedbSDoug Moore     vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t llist)
12339f701172SKonstantin Belousov {
12345a0879daSDoug Moore 	do {
12350164e057SAlan Cox 		/*
12365a0879daSDoug Moore 		 * The max_free values of the children of llist are in
123785b7bedbSDoug Moore 		 * llist->max_free and max_free.  Update with the
12385a0879daSDoug Moore 		 * max value.
12390164e057SAlan Cox 		 */
124085b7bedbSDoug Moore 		llist->max_free = max_free =
124185b7bedbSDoug Moore 		    vm_size_max(llist->max_free, max_free);
124285b7bedbSDoug Moore 		vm_map_entry_swap(&llist->right, &tail);
124385b7bedbSDoug Moore 		vm_map_entry_swap(&tail, &llist);
124485b7bedbSDoug Moore 	} while (llist != header);
124585b7bedbSDoug Moore 	root->left = tail;
124685b7bedbSDoug Moore 	return (max_free);
12475a0879daSDoug Moore }
124885b7bedbSDoug Moore 
124985b7bedbSDoug Moore /*
125085b7bedbSDoug Moore  * When llist is known to be the predecessor of root.
125185b7bedbSDoug Moore  */
125285b7bedbSDoug Moore static inline vm_size_t
125385b7bedbSDoug Moore vm_map_splay_merge_pred(vm_map_entry_t header, vm_map_entry_t root,
125485b7bedbSDoug Moore     vm_map_entry_t llist)
125585b7bedbSDoug Moore {
125685b7bedbSDoug Moore 	vm_size_t max_free;
125785b7bedbSDoug Moore 
125885b7bedbSDoug Moore 	max_free = root->start - llist->end;
125985b7bedbSDoug Moore 	if (llist != header) {
126085b7bedbSDoug Moore 		max_free = vm_map_splay_merge_left_walk(header, root,
1261c1ad5342SDoug Moore 		    root, max_free, llist);
126285b7bedbSDoug Moore 	} else {
1263c1ad5342SDoug Moore 		root->left = header;
1264c1ad5342SDoug Moore 		header->right = root;
126585b7bedbSDoug Moore 	}
126685b7bedbSDoug Moore 	return (max_free);
126785b7bedbSDoug Moore }
126885b7bedbSDoug Moore 
126985b7bedbSDoug Moore /*
127085b7bedbSDoug Moore  * When llist may or may not be the predecessor of root.
127185b7bedbSDoug Moore  */
127285b7bedbSDoug Moore static inline vm_size_t
127385b7bedbSDoug Moore vm_map_splay_merge_left(vm_map_entry_t header, vm_map_entry_t root,
127485b7bedbSDoug Moore     vm_map_entry_t llist)
127585b7bedbSDoug Moore {
127685b7bedbSDoug Moore 	vm_size_t max_free;
127785b7bedbSDoug Moore 
127885b7bedbSDoug Moore 	max_free = vm_map_entry_max_free_left(root, llist);
127985b7bedbSDoug Moore 	if (llist != header) {
128085b7bedbSDoug Moore 		max_free = vm_map_splay_merge_left_walk(header, root,
1281c1ad5342SDoug Moore 		    root->left == llist ? root : root->left,
1282c1ad5342SDoug Moore 		    max_free, llist);
128385b7bedbSDoug Moore 	}
128485b7bedbSDoug Moore 	return (max_free);
128585b7bedbSDoug Moore }
128685b7bedbSDoug Moore 
128785b7bedbSDoug Moore static vm_size_t
128885b7bedbSDoug Moore vm_map_splay_merge_right_walk(vm_map_entry_t header, vm_map_entry_t root,
128985b7bedbSDoug Moore     vm_map_entry_t tail, vm_size_t max_free, vm_map_entry_t rlist)
129085b7bedbSDoug Moore {
12915a0879daSDoug Moore 	do {
12925a0879daSDoug Moore 		/*
12935a0879daSDoug Moore 		 * The max_free values of the children of rlist are in
129485b7bedbSDoug Moore 		 * rlist->max_free and max_free.  Update with the
12955a0879daSDoug Moore 		 * max value.
12965a0879daSDoug Moore 		 */
129785b7bedbSDoug Moore 		rlist->max_free = max_free =
129885b7bedbSDoug Moore 		    vm_size_max(rlist->max_free, max_free);
129985b7bedbSDoug Moore 		vm_map_entry_swap(&rlist->left, &tail);
130085b7bedbSDoug Moore 		vm_map_entry_swap(&tail, &rlist);
130185b7bedbSDoug Moore 	} while (rlist != header);
130285b7bedbSDoug Moore 	root->right = tail;
130385b7bedbSDoug Moore 	return (max_free);
13045a0879daSDoug Moore }
130585b7bedbSDoug Moore 
130685b7bedbSDoug Moore /*
130785b7bedbSDoug Moore  * When rlist is known to be the succecessor of root.
130885b7bedbSDoug Moore  */
130985b7bedbSDoug Moore static inline vm_size_t
131085b7bedbSDoug Moore vm_map_splay_merge_succ(vm_map_entry_t header, vm_map_entry_t root,
131185b7bedbSDoug Moore     vm_map_entry_t rlist)
131285b7bedbSDoug Moore {
131385b7bedbSDoug Moore 	vm_size_t max_free;
131485b7bedbSDoug Moore 
131585b7bedbSDoug Moore 	max_free = rlist->start - root->end;
131685b7bedbSDoug Moore 	if (rlist != header) {
131785b7bedbSDoug Moore 		max_free = vm_map_splay_merge_right_walk(header, root,
1318c1ad5342SDoug Moore 		    root, max_free, rlist);
131985b7bedbSDoug Moore 	} else {
1320c1ad5342SDoug Moore 		root->right = header;
1321c1ad5342SDoug Moore 		header->left = root;
132285b7bedbSDoug Moore 	}
132385b7bedbSDoug Moore 	return (max_free);
132485b7bedbSDoug Moore }
132585b7bedbSDoug Moore 
132685b7bedbSDoug Moore /*
132785b7bedbSDoug Moore  * When rlist may or may not be the succecessor of root.
132885b7bedbSDoug Moore  */
132985b7bedbSDoug Moore static inline vm_size_t
133085b7bedbSDoug Moore vm_map_splay_merge_right(vm_map_entry_t header, vm_map_entry_t root,
133185b7bedbSDoug Moore     vm_map_entry_t rlist)
133285b7bedbSDoug Moore {
133385b7bedbSDoug Moore 	vm_size_t max_free;
133485b7bedbSDoug Moore 
133585b7bedbSDoug Moore 	max_free = vm_map_entry_max_free_right(root, rlist);
133685b7bedbSDoug Moore 	if (rlist != header) {
133785b7bedbSDoug Moore 		max_free = vm_map_splay_merge_right_walk(header, root,
1338c1ad5342SDoug Moore 		    root->right == rlist ? root : root->right,
1339c1ad5342SDoug Moore 		    max_free, rlist);
134085b7bedbSDoug Moore 	}
134185b7bedbSDoug Moore 	return (max_free);
13424e94f402SAlan Cox }
13434e94f402SAlan Cox 
13444e94f402SAlan Cox /*
1345d1d3f7e1SDoug Moore  *	vm_map_splay:
1346d1d3f7e1SDoug Moore  *
1347d1d3f7e1SDoug Moore  *	The Sleator and Tarjan top-down splay algorithm with the
1348d1d3f7e1SDoug Moore  *	following variation.  Max_free must be computed bottom-up, so
1349d1d3f7e1SDoug Moore  *	on the downward pass, maintain the left and right spines in
1350d1d3f7e1SDoug Moore  *	reverse order.  Then, make a second pass up each side to fix
1351d1d3f7e1SDoug Moore  *	the pointers and compute max_free.  The time bound is O(log n)
1352d1d3f7e1SDoug Moore  *	amortized.
1353d1d3f7e1SDoug Moore  *
1354c1ad5342SDoug Moore  *	The tree is threaded, which means that there are no null pointers.
1355c1ad5342SDoug Moore  *	When a node has no left child, its left pointer points to its
1356c1ad5342SDoug Moore  *	predecessor, which the last ancestor on the search path from the root
1357c1ad5342SDoug Moore  *	where the search branched right.  Likewise, when a node has no right
1358c1ad5342SDoug Moore  *	child, its right pointer points to its successor.  The map header node
1359c1ad5342SDoug Moore  *	is the predecessor of the first map entry, and the successor of the
1360c1ad5342SDoug Moore  *	last.
1361c1ad5342SDoug Moore  *
1362d1d3f7e1SDoug Moore  *	The new root is the vm_map_entry containing "addr", or else an
1363d1d3f7e1SDoug Moore  *	adjacent entry (lower if possible) if addr is not in the tree.
1364d1d3f7e1SDoug Moore  *
1365d1d3f7e1SDoug Moore  *	The map must be locked, and leaves it so.
1366d1d3f7e1SDoug Moore  *
1367d1d3f7e1SDoug Moore  *	Returns: the new root.
1368d1d3f7e1SDoug Moore  */
1369d1d3f7e1SDoug Moore static vm_map_entry_t
1370d1d3f7e1SDoug Moore vm_map_splay(vm_map_t map, vm_offset_t addr)
1371d1d3f7e1SDoug Moore {
137285b7bedbSDoug Moore 	vm_map_entry_t header, llist, rlist, root;
137385b7bedbSDoug Moore 	vm_size_t max_free_left, max_free_right;
1374d1d3f7e1SDoug Moore 
137585b7bedbSDoug Moore 	header = &map->header;
1376d1d3f7e1SDoug Moore 	root = vm_map_splay_split(map, addr, 0, &llist, &rlist);
1377d1d3f7e1SDoug Moore 	if (root != NULL) {
137885b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_left(header, root, llist);
137985b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
138085b7bedbSDoug Moore 	} else if (llist != header) {
1381d1d3f7e1SDoug Moore 		/*
1382d1d3f7e1SDoug Moore 		 * Recover the greatest node in the left
1383d1d3f7e1SDoug Moore 		 * subtree and make it the root.
1384d1d3f7e1SDoug Moore 		 */
1385d1d3f7e1SDoug Moore 		root = llist;
1386d1d3f7e1SDoug Moore 		llist = root->right;
138785b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_left(header, root, llist);
138885b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
138985b7bedbSDoug Moore 	} else if (rlist != header) {
1390d1d3f7e1SDoug Moore 		/*
1391d1d3f7e1SDoug Moore 		 * Recover the least node in the right
1392d1d3f7e1SDoug Moore 		 * subtree and make it the root.
1393d1d3f7e1SDoug Moore 		 */
1394d1d3f7e1SDoug Moore 		root = rlist;
1395d1d3f7e1SDoug Moore 		rlist = root->left;
139685b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
139785b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
1398d1d3f7e1SDoug Moore 	} else {
1399d1d3f7e1SDoug Moore 		/* There is no root. */
1400d1d3f7e1SDoug Moore 		return (NULL);
1401d1d3f7e1SDoug Moore 	}
140285b7bedbSDoug Moore 	root->max_free = vm_size_max(max_free_left, max_free_right);
140385b7bedbSDoug Moore 	map->root = root;
1404d1d3f7e1SDoug Moore 	VM_MAP_ASSERT_CONSISTENT(map);
1405d1d3f7e1SDoug Moore 	return (root);
1406d1d3f7e1SDoug Moore }
1407d1d3f7e1SDoug Moore 
1408d1d3f7e1SDoug Moore /*
1409df8bae1dSRodney W. Grimes  *	vm_map_entry_{un,}link:
1410df8bae1dSRodney W. Grimes  *
1411668a8aa8SDoug Moore  *	Insert/remove entries from maps.  On linking, if new entry clips
1412668a8aa8SDoug Moore  *	existing entry, trim existing entry to avoid overlap, and manage
1413668a8aa8SDoug Moore  *	offsets.  On unlinking, merge disappearing entry with neighbor, if
1414668a8aa8SDoug Moore  *	called for, and manage offsets.  Callers should not modify fields in
1415668a8aa8SDoug Moore  *	entries already mapped.
1416df8bae1dSRodney W. Grimes  */
14174e94f402SAlan Cox static void
14185a0879daSDoug Moore vm_map_entry_link(vm_map_t map, vm_map_entry_t entry)
141999c81ca9SAlan Cox {
142085b7bedbSDoug Moore 	vm_map_entry_t header, llist, rlist, root;
1421668a8aa8SDoug Moore 	vm_size_t max_free_left, max_free_right;
142221c641b2SJohn Baldwin 
14239f701172SKonstantin Belousov 	CTR3(KTR_VM,
14249f701172SKonstantin Belousov 	    "vm_map_entry_link: map %p, nentries %d, entry %p", map,
14259f701172SKonstantin Belousov 	    map->nentries, entry);
14263a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
142799c81ca9SAlan Cox 	map->nentries++;
142885b7bedbSDoug Moore 	header = &map->header;
14295a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
1430668a8aa8SDoug Moore 	if (root == NULL) {
1431668a8aa8SDoug Moore 		/*
1432668a8aa8SDoug Moore 		 * The new entry does not overlap any existing entry in the
1433668a8aa8SDoug Moore 		 * map, so it becomes the new root of the map tree.
1434668a8aa8SDoug Moore 		 */
1435668a8aa8SDoug Moore 		max_free_left = vm_map_splay_merge_pred(header, entry, llist);
1436668a8aa8SDoug Moore 		max_free_right = vm_map_splay_merge_succ(header, entry, rlist);
1437668a8aa8SDoug Moore 	} else if (entry->start == root->start) {
1438668a8aa8SDoug Moore 		/*
1439668a8aa8SDoug Moore 		 * The new entry is a clone of root, with only the end field
1440668a8aa8SDoug Moore 		 * changed.  The root entry will be shrunk to abut the new
1441668a8aa8SDoug Moore 		 * entry, and will be the right child of the new root entry in
1442668a8aa8SDoug Moore 		 * the modified map.
1443668a8aa8SDoug Moore 		 */
1444668a8aa8SDoug Moore 		KASSERT(entry->end < root->end,
1445668a8aa8SDoug Moore 		    ("%s: clip_start not within entry", __func__));
1446668a8aa8SDoug Moore 		vm_map_splay_findprev(root, &llist);
1447668a8aa8SDoug Moore 		root->offset += entry->end - root->start;
1448668a8aa8SDoug Moore 		root->start = entry->end;
1449668a8aa8SDoug Moore 		max_free_left = vm_map_splay_merge_pred(header, entry, llist);
1450668a8aa8SDoug Moore 		max_free_right = root->max_free = vm_size_max(
1451668a8aa8SDoug Moore 		    vm_map_splay_merge_pred(entry, root, entry),
1452668a8aa8SDoug Moore 		    vm_map_splay_merge_right(header, root, rlist));
1453668a8aa8SDoug Moore 	} else {
1454668a8aa8SDoug Moore 		/*
1455668a8aa8SDoug Moore 		 * The new entry is a clone of root, with only the start field
1456668a8aa8SDoug Moore 		 * changed.  The root entry will be shrunk to abut the new
1457668a8aa8SDoug Moore 		 * entry, and will be the left child of the new root entry in
1458668a8aa8SDoug Moore 		 * the modified map.
1459668a8aa8SDoug Moore 		 */
1460668a8aa8SDoug Moore 		KASSERT(entry->end == root->end,
1461668a8aa8SDoug Moore 		    ("%s: clip_start not within entry", __func__));
1462668a8aa8SDoug Moore 		vm_map_splay_findnext(root, &rlist);
1463668a8aa8SDoug Moore 		entry->offset += entry->start - root->start;
1464668a8aa8SDoug Moore 		root->end = entry->start;
1465668a8aa8SDoug Moore 		max_free_left = root->max_free = vm_size_max(
1466668a8aa8SDoug Moore 		    vm_map_splay_merge_left(header, root, llist),
1467668a8aa8SDoug Moore 		    vm_map_splay_merge_succ(entry, root, entry));
1468668a8aa8SDoug Moore 		max_free_right = vm_map_splay_merge_succ(header, entry, rlist);
1469668a8aa8SDoug Moore 	}
1470668a8aa8SDoug Moore 	entry->max_free = vm_size_max(max_free_left, max_free_right);
1471668a8aa8SDoug Moore 	map->root = entry;
14729f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1473df8bae1dSRodney W. Grimes }
147499c81ca9SAlan Cox 
14759f701172SKonstantin Belousov enum unlink_merge_type {
14769f701172SKonstantin Belousov 	UNLINK_MERGE_NONE,
14779f701172SKonstantin Belousov 	UNLINK_MERGE_NEXT
14789f701172SKonstantin Belousov };
14799f701172SKonstantin Belousov 
14804e94f402SAlan Cox static void
14815a0879daSDoug Moore vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry,
14829f701172SKonstantin Belousov     enum unlink_merge_type op)
148399c81ca9SAlan Cox {
1484c1ad5342SDoug Moore 	vm_map_entry_t header, llist, rlist, root;
148585b7bedbSDoug Moore 	vm_size_t max_free_left, max_free_right;
148699c81ca9SAlan Cox 
14873a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
148885b7bedbSDoug Moore 	header = &map->header;
14895a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
14909f701172SKonstantin Belousov 	KASSERT(root != NULL,
14919f701172SKonstantin Belousov 	    ("vm_map_entry_unlink: unlink object not mapped"));
14924e94f402SAlan Cox 
14931867d2f2SDoug Moore 	vm_map_splay_findprev(root, &llist);
14949f701172SKonstantin Belousov 	vm_map_splay_findnext(root, &rlist);
14951867d2f2SDoug Moore 	if (op == UNLINK_MERGE_NEXT) {
14969f701172SKonstantin Belousov 		rlist->start = root->start;
14979f701172SKonstantin Belousov 		rlist->offset = root->offset;
14981867d2f2SDoug Moore 	}
149985b7bedbSDoug Moore 	if (llist != header) {
15009f701172SKonstantin Belousov 		root = llist;
15019f701172SKonstantin Belousov 		llist = root->right;
150285b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_left(header, root, llist);
150385b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
150485b7bedbSDoug Moore 	} else if (rlist != header) {
15059f701172SKonstantin Belousov 		root = rlist;
15069f701172SKonstantin Belousov 		rlist = root->left;
150785b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
150885b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
1509c1ad5342SDoug Moore 	} else {
1510c1ad5342SDoug Moore 		header->left = header->right = header;
15119f701172SKonstantin Belousov 		root = NULL;
1512c1ad5342SDoug Moore 	}
15139f701172SKonstantin Belousov 	if (root != NULL)
151485b7bedbSDoug Moore 		root->max_free = vm_size_max(max_free_left, max_free_right);
151585b7bedbSDoug Moore 	map->root = root;
15169f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
151799c81ca9SAlan Cox 	map->nentries--;
151821c641b2SJohn Baldwin 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
151921c641b2SJohn Baldwin 	    map->nentries, entry);
1520df8bae1dSRodney W. Grimes }
1521df8bae1dSRodney W. Grimes 
1522df8bae1dSRodney W. Grimes /*
1523fa581662SDoug Moore  *	vm_map_entry_resize:
15240164e057SAlan Cox  *
1525fa581662SDoug Moore  *	Resize a vm_map_entry, recompute the amount of free space that
1526fa581662SDoug Moore  *	follows it and propagate that value up the tree.
15270164e057SAlan Cox  *
15280164e057SAlan Cox  *	The map must be locked, and leaves it so.
15290164e057SAlan Cox  */
15300164e057SAlan Cox static void
1531fa581662SDoug Moore vm_map_entry_resize(vm_map_t map, vm_map_entry_t entry, vm_size_t grow_amount)
15320164e057SAlan Cox {
153385b7bedbSDoug Moore 	vm_map_entry_t header, llist, rlist, root;
15340164e057SAlan Cox 
15359f701172SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
153685b7bedbSDoug Moore 	header = &map->header;
15375a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
15381867d2f2SDoug Moore 	KASSERT(root != NULL, ("%s: resize object not mapped", __func__));
15399f701172SKonstantin Belousov 	vm_map_splay_findnext(root, &rlist);
15401895f520SDoug Moore 	entry->end += grow_amount;
154185b7bedbSDoug Moore 	root->max_free = vm_size_max(
154285b7bedbSDoug Moore 	    vm_map_splay_merge_left(header, root, llist),
154385b7bedbSDoug Moore 	    vm_map_splay_merge_succ(header, root, rlist));
154485b7bedbSDoug Moore 	map->root = root;
15459f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1546fa581662SDoug Moore 	CTR4(KTR_VM, "%s: map %p, nentries %d, entry %p",
154773f11451SDoug Moore 	    __func__, map, map->nentries, entry);
15480164e057SAlan Cox }
15490164e057SAlan Cox 
15500164e057SAlan Cox /*
1551d1d3f7e1SDoug Moore  *	vm_map_lookup_entry:	[ internal use only ]
1552df8bae1dSRodney W. Grimes  *
1553d1d3f7e1SDoug Moore  *	Finds the map entry containing (or
1554d1d3f7e1SDoug Moore  *	immediately preceding) the specified address
1555d1d3f7e1SDoug Moore  *	in the given map; the entry is returned
1556d1d3f7e1SDoug Moore  *	in the "entry" parameter.  The boolean
1557d1d3f7e1SDoug Moore  *	result indicates whether the address is
1558d1d3f7e1SDoug Moore  *	actually contained in the map.
1559df8bae1dSRodney W. Grimes  */
1560d1d3f7e1SDoug Moore boolean_t
1561d1d3f7e1SDoug Moore vm_map_lookup_entry(
1562d1d3f7e1SDoug Moore 	vm_map_t map,
1563d1d3f7e1SDoug Moore 	vm_offset_t address,
1564d1d3f7e1SDoug Moore 	vm_map_entry_t *entry)	/* OUT */
1565df8bae1dSRodney W. Grimes {
1566c1ad5342SDoug Moore 	vm_map_entry_t cur, header, lbound, ubound;
1567d1d3f7e1SDoug Moore 	boolean_t locked;
1568df8bae1dSRodney W. Grimes 
15694c3ef59eSAlan Cox 	/*
15704c3ef59eSAlan Cox 	 * If the map is empty, then the map entry immediately preceding
1571d1d3f7e1SDoug Moore 	 * "address" is the map's header.
15724c3ef59eSAlan Cox 	 */
157385b7bedbSDoug Moore 	header = &map->header;
1574d1d3f7e1SDoug Moore 	cur = map->root;
1575d1d3f7e1SDoug Moore 	if (cur == NULL) {
157685b7bedbSDoug Moore 		*entry = header;
1577d1d3f7e1SDoug Moore 		return (FALSE);
1578d1d3f7e1SDoug Moore 	}
1579d1d3f7e1SDoug Moore 	if (address >= cur->start && cur->end > address) {
1580d1d3f7e1SDoug Moore 		*entry = cur;
1581d1d3f7e1SDoug Moore 		return (TRUE);
15829f701172SKonstantin Belousov 	}
15839f701172SKonstantin Belousov 	if ((locked = vm_map_locked(map)) ||
158405a8c414SAlan Cox 	    sx_try_upgrade(&map->lock)) {
158505a8c414SAlan Cox 		/*
158605a8c414SAlan Cox 		 * Splay requires a write lock on the map.  However, it only
158705a8c414SAlan Cox 		 * restructures the binary search tree; it does not otherwise
158805a8c414SAlan Cox 		 * change the map.  Thus, the map's timestamp need not change
158905a8c414SAlan Cox 		 * on a temporary upgrade.
159005a8c414SAlan Cox 		 */
1591d1d3f7e1SDoug Moore 		cur = vm_map_splay(map, address);
1592461587dcSDoug Moore 		if (!locked) {
1593461587dcSDoug Moore 			VM_MAP_UNLOCK_CONSISTENT(map);
159405a8c414SAlan Cox 			sx_downgrade(&map->lock);
1595461587dcSDoug Moore 		}
1596d1d3f7e1SDoug Moore 
1597d1d3f7e1SDoug Moore 		/*
1598d1d3f7e1SDoug Moore 		 * If "address" is contained within a map entry, the new root
1599d1d3f7e1SDoug Moore 		 * is that map entry.  Otherwise, the new root is a map entry
1600d1d3f7e1SDoug Moore 		 * immediately before or after "address".
1601d1d3f7e1SDoug Moore 		 */
1602d1d3f7e1SDoug Moore 		if (address < cur->start) {
160385b7bedbSDoug Moore 			*entry = header;
1604d1d3f7e1SDoug Moore 			return (FALSE);
1605d1d3f7e1SDoug Moore 		}
1606d1d3f7e1SDoug Moore 		*entry = cur;
1607d1d3f7e1SDoug Moore 		return (address < cur->end);
16089f701172SKonstantin Belousov 	}
160905a8c414SAlan Cox 	/*
161005a8c414SAlan Cox 	 * Since the map is only locked for read access, perform a
1611d1d3f7e1SDoug Moore 	 * standard binary search tree lookup for "address".
161205a8c414SAlan Cox 	 */
1613c1ad5342SDoug Moore 	lbound = ubound = header;
1614c1ad5342SDoug Moore 	for (;;) {
1615d1d3f7e1SDoug Moore 		if (address < cur->start) {
1616c1ad5342SDoug Moore 			ubound = cur;
1617d1d3f7e1SDoug Moore 			cur = cur->left;
1618c1ad5342SDoug Moore 			if (cur == lbound)
1619c1ad5342SDoug Moore 				break;
1620d1d3f7e1SDoug Moore 		} else if (cur->end <= address) {
1621d1d3f7e1SDoug Moore 			lbound = cur;
1622d1d3f7e1SDoug Moore 			cur = cur->right;
1623c1ad5342SDoug Moore 			if (cur == ubound)
1624c1ad5342SDoug Moore 				break;
16259f701172SKonstantin Belousov 		} else {
1626d1d3f7e1SDoug Moore 			*entry = cur;
1627d1d3f7e1SDoug Moore 			return (TRUE);
162805a8c414SAlan Cox 		}
1629c1ad5342SDoug Moore 	}
1630d1d3f7e1SDoug Moore 	*entry = lbound;
1631d1d3f7e1SDoug Moore 	return (FALSE);
1632df8bae1dSRodney W. Grimes }
1633df8bae1dSRodney W. Grimes 
1634df8bae1dSRodney W. Grimes /*
163530dcfc09SJohn Dyson  *	vm_map_insert:
163630dcfc09SJohn Dyson  *
163730dcfc09SJohn Dyson  *	Inserts the given whole VM object into the target
163830dcfc09SJohn Dyson  *	map at the specified address range.  The object's
163930dcfc09SJohn Dyson  *	size should match that of the address range.
164030dcfc09SJohn Dyson  *
164130dcfc09SJohn Dyson  *	Requires that the map be locked, and leaves it so.
16422aaeadf8SMatthew Dillon  *
16432aaeadf8SMatthew Dillon  *	If object is non-NULL, ref count must be bumped by caller
16442aaeadf8SMatthew Dillon  *	prior to making call to account for the new entry.
164530dcfc09SJohn Dyson  */
164630dcfc09SJohn Dyson int
1647b9dcd593SBruce Evans vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
164833314db0SAlan Cox     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
164930dcfc09SJohn Dyson {
165083704cc2SDoug Moore 	vm_map_entry_t new_entry, next_entry, prev_entry;
1651ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
16521569205fSKonstantin Belousov 	vm_eflags_t protoeflags;
16538211bd45SKonstantin Belousov 	vm_inherit_t inheritance;
1654e2e80fb3SKonstantin Belousov 	u_long bdry;
1655e2e80fb3SKonstantin Belousov 	u_int bidx;
165630dcfc09SJohn Dyson 
16573a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
16582e47807cSJeff Roberson 	KASSERT(object != kernel_object ||
165933314db0SAlan Cox 	    (cow & MAP_COPY_ON_WRITE) == 0,
16602e47807cSJeff Roberson 	    ("vm_map_insert: kernel object and COW"));
1661e2e80fb3SKonstantin Belousov 	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 ||
1662e2e80fb3SKonstantin Belousov 	    (cow & MAP_SPLIT_BOUNDARY_MASK) != 0,
1663e2e80fb3SKonstantin Belousov 	    ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x",
1664e2e80fb3SKonstantin Belousov 	    object, cow));
166500de6773SKonstantin Belousov 	KASSERT((prot & ~max) == 0,
166600de6773SKonstantin Belousov 	    ("prot %#x is not subset of max_prot %#x", prot, max));
16673a0916b8SKonstantin Belousov 
166830dcfc09SJohn Dyson 	/*
166930dcfc09SJohn Dyson 	 * Check that the start and end points are not bogus.
167030dcfc09SJohn Dyson 	 */
1671f0340740SMark Johnston 	if (start == end || !vm_map_range_valid(map, start, end))
167230dcfc09SJohn Dyson 		return (KERN_INVALID_ADDRESS);
167330dcfc09SJohn Dyson 
1674*2e1c94aaSKonstantin Belousov 	if ((map->flags & MAP_WXORX) != 0 && (prot & (VM_PROT_WRITE |
1675*2e1c94aaSKonstantin Belousov 	    VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE))
1676*2e1c94aaSKonstantin Belousov 		return (KERN_PROTECTION_FAILURE);
1677*2e1c94aaSKonstantin Belousov 
167830dcfc09SJohn Dyson 	/*
167930dcfc09SJohn Dyson 	 * Find the entry prior to the proposed starting address; if it's part
168030dcfc09SJohn Dyson 	 * of an existing entry, this range is bogus.
168130dcfc09SJohn Dyson 	 */
1682723413beSDoug Moore 	if (vm_map_lookup_entry(map, start, &prev_entry))
168330dcfc09SJohn Dyson 		return (KERN_NO_SPACE);
168430dcfc09SJohn Dyson 
168530dcfc09SJohn Dyson 	/*
168630dcfc09SJohn Dyson 	 * Assert that the next entry doesn't overlap the end point.
168730dcfc09SJohn Dyson 	 */
168883704cc2SDoug Moore 	next_entry = vm_map_entry_succ(prev_entry);
168983704cc2SDoug Moore 	if (next_entry->start < end)
169030dcfc09SJohn Dyson 		return (KERN_NO_SPACE);
169130dcfc09SJohn Dyson 
169219bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
169319bd0d9cSKonstantin Belousov 	    max != VM_PROT_NONE))
169419bd0d9cSKonstantin Belousov 		return (KERN_INVALID_ARGUMENT);
169519bd0d9cSKonstantin Belousov 
1696afa07f7eSJohn Dyson 	protoeflags = 0;
1697afa07f7eSJohn Dyson 	if (cow & MAP_COPY_ON_WRITE)
1698e5f13bddSAlan Cox 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
169933314db0SAlan Cox 	if (cow & MAP_NOFAULT)
1700afa07f7eSJohn Dyson 		protoeflags |= MAP_ENTRY_NOFAULT;
17014f79d873SMatthew Dillon 	if (cow & MAP_DISABLE_SYNCER)
17024f79d873SMatthew Dillon 		protoeflags |= MAP_ENTRY_NOSYNC;
17039730a5daSPaul Saab 	if (cow & MAP_DISABLE_COREDUMP)
17049730a5daSPaul Saab 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
1705712efe66SAlan Cox 	if (cow & MAP_STACK_GROWS_DOWN)
1706712efe66SAlan Cox 		protoeflags |= MAP_ENTRY_GROWS_DOWN;
1707712efe66SAlan Cox 	if (cow & MAP_STACK_GROWS_UP)
1708712efe66SAlan Cox 		protoeflags |= MAP_ENTRY_GROWS_UP;
1709fe7bcbafSKyle Evans 	if (cow & MAP_WRITECOUNT)
1710fe7bcbafSKyle Evans 		protoeflags |= MAP_ENTRY_WRITECNT;
171178022527SKonstantin Belousov 	if (cow & MAP_VN_EXEC)
171278022527SKonstantin Belousov 		protoeflags |= MAP_ENTRY_VN_EXEC;
171319bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_GUARD) != 0)
171419bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_GUARD;
171519bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
171619bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
171719bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
171819bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
17198211bd45SKonstantin Belousov 	if (cow & MAP_INHERIT_SHARE)
17208211bd45SKonstantin Belousov 		inheritance = VM_INHERIT_SHARE;
17218211bd45SKonstantin Belousov 	else
17228211bd45SKonstantin Belousov 		inheritance = VM_INHERIT_DEFAULT;
1723e2e80fb3SKonstantin Belousov 	if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) {
1724e2e80fb3SKonstantin Belousov 		/* This magically ignores index 0, for usual page size. */
1725e2e80fb3SKonstantin Belousov 		bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >>
1726e2e80fb3SKonstantin Belousov 		    MAP_SPLIT_BOUNDARY_SHIFT;
1727e2e80fb3SKonstantin Belousov 		if (bidx >= MAXPAGESIZES)
1728e2e80fb3SKonstantin Belousov 			return (KERN_INVALID_ARGUMENT);
1729e2e80fb3SKonstantin Belousov 		bdry = pagesizes[bidx] - 1;
1730e2e80fb3SKonstantin Belousov 		if ((start & bdry) != 0 || (end & bdry) != 0)
1731e2e80fb3SKonstantin Belousov 			return (KERN_INVALID_ARGUMENT);
1732e2e80fb3SKonstantin Belousov 		protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
1733e2e80fb3SKonstantin Belousov 	}
17344f79d873SMatthew Dillon 
1735ef694c1aSEdward Tomasz Napierala 	cred = NULL;
173619bd0d9cSKonstantin Belousov 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
17373364c323SKonstantin Belousov 		goto charged;
17383364c323SKonstantin Belousov 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
17393364c323SKonstantin Belousov 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
17403364c323SKonstantin Belousov 		if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
17413364c323SKonstantin Belousov 			return (KERN_RESOURCE_SHORTAGE);
17421569205fSKonstantin Belousov 		KASSERT(object == NULL ||
17431569205fSKonstantin Belousov 		    (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
1744ef694c1aSEdward Tomasz Napierala 		    object->cred == NULL,
17451569205fSKonstantin Belousov 		    ("overcommit: vm_map_insert o %p", object));
1746ef694c1aSEdward Tomasz Napierala 		cred = curthread->td_ucred;
17473364c323SKonstantin Belousov 	}
17483364c323SKonstantin Belousov 
17493364c323SKonstantin Belousov charged:
1750f8616ebfSAlan Cox 	/* Expand the kernel pmap, if necessary. */
1751f8616ebfSAlan Cox 	if (map == kernel_map && end > kernel_vm_end)
1752f8616ebfSAlan Cox 		pmap_growkernel(end);
17531d284e00SAlan Cox 	if (object != NULL) {
175430dcfc09SJohn Dyson 		/*
17551d284e00SAlan Cox 		 * OBJ_ONEMAPPING must be cleared unless this mapping
17561d284e00SAlan Cox 		 * is trivially proven to be the only mapping for any
17571d284e00SAlan Cox 		 * of the object's pages.  (Object granularity
17581d284e00SAlan Cox 		 * reference counting is insufficient to recognize
17591d284e00SAlan Cox 		 * aliases with precision.)
176030dcfc09SJohn Dyson 		 */
176163967687SJeff Roberson 		if ((object->flags & OBJ_ANON) != 0) {
176289f6b863SAttilio Rao 			VM_OBJECT_WLOCK(object);
17631d284e00SAlan Cox 			if (object->ref_count > 1 || object->shadow_count != 0)
17642aaeadf8SMatthew Dillon 				vm_object_clear_flag(object, OBJ_ONEMAPPING);
176589f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(object);
176663967687SJeff Roberson 		}
17672203c46dSMark Johnston 	} else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) ==
17682203c46dSMark Johnston 	    protoeflags &&
176978022527SKonstantin Belousov 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP |
177078022527SKonstantin Belousov 	    MAP_VN_EXEC)) == 0 &&
1771737e25f7SAlan Cox 	    prev_entry->end == start && (prev_entry->cred == cred ||
17723364c323SKonstantin Belousov 	    (prev_entry->object.vm_object != NULL &&
17731569205fSKonstantin Belousov 	    prev_entry->object.vm_object->cred == cred)) &&
17748cc7e047SJohn Dyson 	    vm_object_coalesce(prev_entry->object.vm_object,
177557a21abaSAlan Cox 	    prev_entry->offset,
17768cc7e047SJohn Dyson 	    (vm_size_t)(prev_entry->end - prev_entry->start),
177760169c88SAlan Cox 	    (vm_size_t)(end - prev_entry->end), cred != NULL &&
177860169c88SAlan Cox 	    (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
177930dcfc09SJohn Dyson 		/*
17802aaeadf8SMatthew Dillon 		 * We were able to extend the object.  Determine if we
17812aaeadf8SMatthew Dillon 		 * can extend the previous map entry to include the
17822aaeadf8SMatthew Dillon 		 * new range as well.
178330dcfc09SJohn Dyson 		 */
17841569205fSKonstantin Belousov 		if (prev_entry->inheritance == inheritance &&
17851569205fSKonstantin Belousov 		    prev_entry->protection == prot &&
1786737e25f7SAlan Cox 		    prev_entry->max_protection == max &&
1787737e25f7SAlan Cox 		    prev_entry->wired_count == 0) {
1788737e25f7SAlan Cox 			KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) ==
1789737e25f7SAlan Cox 			    0, ("prev_entry %p has incoherent wiring",
1790737e25f7SAlan Cox 			    prev_entry));
179119bd0d9cSKonstantin Belousov 			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
17921569205fSKonstantin Belousov 				map->size += end - prev_entry->end;
1793fa581662SDoug Moore 			vm_map_entry_resize(map, prev_entry,
17941895f520SDoug Moore 			    end - prev_entry->end);
179583704cc2SDoug Moore 			vm_map_try_merge_entries(map, prev_entry, next_entry);
179630dcfc09SJohn Dyson 			return (KERN_SUCCESS);
179730dcfc09SJohn Dyson 		}
17988cc7e047SJohn Dyson 
17992aaeadf8SMatthew Dillon 		/*
18002aaeadf8SMatthew Dillon 		 * If we can extend the object but cannot extend the
18012aaeadf8SMatthew Dillon 		 * map entry, we have to create a new map entry.  We
18022aaeadf8SMatthew Dillon 		 * must bump the ref count on the extended object to
18034e71e795SMatthew Dillon 		 * account for it.  object may be NULL.
18042aaeadf8SMatthew Dillon 		 */
18052aaeadf8SMatthew Dillon 		object = prev_entry->object.vm_object;
18062aaeadf8SMatthew Dillon 		offset = prev_entry->offset +
18072aaeadf8SMatthew Dillon 		    (prev_entry->end - prev_entry->start);
18088cc7e047SJohn Dyson 		vm_object_reference(object);
1809ef694c1aSEdward Tomasz Napierala 		if (cred != NULL && object != NULL && object->cred != NULL &&
18103364c323SKonstantin Belousov 		    !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
18113364c323SKonstantin Belousov 			/* Object already accounts for this uid. */
1812ef694c1aSEdward Tomasz Napierala 			cred = NULL;
18133364c323SKonstantin Belousov 		}
1814b18bfc3dSJohn Dyson 	}
181560169c88SAlan Cox 	if (cred != NULL)
181660169c88SAlan Cox 		crhold(cred);
18172aaeadf8SMatthew Dillon 
18182aaeadf8SMatthew Dillon 	/*
181930dcfc09SJohn Dyson 	 * Create a new entry
182030dcfc09SJohn Dyson 	 */
182130dcfc09SJohn Dyson 	new_entry = vm_map_entry_create(map);
182230dcfc09SJohn Dyson 	new_entry->start = start;
182330dcfc09SJohn Dyson 	new_entry->end = end;
1824ef694c1aSEdward Tomasz Napierala 	new_entry->cred = NULL;
182530dcfc09SJohn Dyson 
1826afa07f7eSJohn Dyson 	new_entry->eflags = protoeflags;
182730dcfc09SJohn Dyson 	new_entry->object.vm_object = object;
182830dcfc09SJohn Dyson 	new_entry->offset = offset;
18292267af78SJulian Elischer 
18308211bd45SKonstantin Belousov 	new_entry->inheritance = inheritance;
183130dcfc09SJohn Dyson 	new_entry->protection = prot;
183230dcfc09SJohn Dyson 	new_entry->max_protection = max;
183330dcfc09SJohn Dyson 	new_entry->wired_count = 0;
1834997ac690SKonstantin Belousov 	new_entry->wiring_thread = NULL;
183513458803SAlan Cox 	new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
1836381b7242SAlan Cox 	new_entry->next_read = start;
1837e5f251d2SAlan Cox 
1838ef694c1aSEdward Tomasz Napierala 	KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
18391569205fSKonstantin Belousov 	    ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
1840ef694c1aSEdward Tomasz Napierala 	new_entry->cred = cred;
18413364c323SKonstantin Belousov 
184230dcfc09SJohn Dyson 	/*
184330dcfc09SJohn Dyson 	 * Insert the new entry into the list
184430dcfc09SJohn Dyson 	 */
18459f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
184619bd0d9cSKonstantin Belousov 	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
184730dcfc09SJohn Dyson 		map->size += new_entry->end - new_entry->start;
184830dcfc09SJohn Dyson 
18491a484d28SMatthew Dillon 	/*
1850eaaf9f7fSAlan Cox 	 * Try to coalesce the new entry with both the previous and next
1851eaaf9f7fSAlan Cox 	 * entries in the list.  Previously, we only attempted to coalesce
1852eaaf9f7fSAlan Cox 	 * with the previous entry when object is NULL.  Here, we handle the
1853eaaf9f7fSAlan Cox 	 * other cases, which are less common.
18541a484d28SMatthew Dillon 	 */
185583ea714fSDoug Moore 	vm_map_try_merge_entries(map, prev_entry, new_entry);
185683704cc2SDoug Moore 	vm_map_try_merge_entries(map, new_entry, next_entry);
18574e71e795SMatthew Dillon 
18581569205fSKonstantin Belousov 	if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
18591569205fSKonstantin Belousov 		vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
18601569205fSKonstantin Belousov 		    end - start, cow & MAP_PREFAULT_PARTIAL);
18614f79d873SMatthew Dillon 	}
1862e972780aSAlan Cox 
186330dcfc09SJohn Dyson 	return (KERN_SUCCESS);
186430dcfc09SJohn Dyson }
186530dcfc09SJohn Dyson 
186630dcfc09SJohn Dyson /*
18670164e057SAlan Cox  *	vm_map_findspace:
18680164e057SAlan Cox  *
18690164e057SAlan Cox  *	Find the first fit (lowest VM address) for "length" free bytes
18700164e057SAlan Cox  *	beginning at address >= start in the given map.
18710164e057SAlan Cox  *
18729f701172SKonstantin Belousov  *	In a vm_map_entry, "max_free" is the maximum amount of
18739f701172SKonstantin Belousov  *	contiguous free space between an entry in its subtree and a
18749f701172SKonstantin Belousov  *	neighbor of that entry.  This allows finding a free region in
18759f701172SKonstantin Belousov  *	one path down the tree, so O(log n) amortized with splay
18769f701172SKonstantin Belousov  *	trees.
18770164e057SAlan Cox  *
18780164e057SAlan Cox  *	The map must be locked, and leaves it so.
18790164e057SAlan Cox  *
18809f701172SKonstantin Belousov  *	Returns: starting address if sufficient space,
18819f701172SKonstantin Belousov  *		 vm_map_max(map)-length+1 if insufficient space.
1882df8bae1dSRodney W. Grimes  */
18839f701172SKonstantin Belousov vm_offset_t
18849f701172SKonstantin Belousov vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length)
1885df8bae1dSRodney W. Grimes {
188685b7bedbSDoug Moore 	vm_map_entry_t header, llist, rlist, root, y;
188785b7bedbSDoug Moore 	vm_size_t left_length, max_free_left, max_free_right;
1888e65d58a0SDoug Moore 	vm_offset_t gap_end;
1889df8bae1dSRodney W. Grimes 
189020f02659SMark Johnston 	VM_MAP_ASSERT_LOCKED(map);
189120f02659SMark Johnston 
1892986b43f8SAlan Cox 	/*
1893986b43f8SAlan Cox 	 * Request must fit within min/max VM address and must avoid
1894986b43f8SAlan Cox 	 * address wrap.
1895986b43f8SAlan Cox 	 */
1896f0165b1cSKonstantin Belousov 	start = MAX(start, vm_map_min(map));
1897e65d58a0SDoug Moore 	if (start >= vm_map_max(map) || length > vm_map_max(map) - start)
18989f701172SKonstantin Belousov 		return (vm_map_max(map) - length + 1);
1899df8bae1dSRodney W. Grimes 
19000164e057SAlan Cox 	/* Empty tree means wide open address space. */
19019f701172SKonstantin Belousov 	if (map->root == NULL)
19029f701172SKonstantin Belousov 		return (start);
19030164e057SAlan Cox 
19040164e057SAlan Cox 	/*
1905e65d58a0SDoug Moore 	 * After splay_split, if start is within an entry, push it to the start
1906e65d58a0SDoug Moore 	 * of the following gap.  If rlist is at the end of the gap containing
1907e65d58a0SDoug Moore 	 * start, save the end of that gap in gap_end to see if the gap is big
1908e65d58a0SDoug Moore 	 * enough; otherwise set gap_end to start skip gap-checking and move
1909e65d58a0SDoug Moore 	 * directly to a search of the right subtree.
19100164e057SAlan Cox 	 */
191185b7bedbSDoug Moore 	header = &map->header;
19125a0879daSDoug Moore 	root = vm_map_splay_split(map, start, length, &llist, &rlist);
1913e65d58a0SDoug Moore 	gap_end = rlist->start;
1914e65d58a0SDoug Moore 	if (root != NULL) {
19159f701172SKonstantin Belousov 		start = root->end;
1916c1ad5342SDoug Moore 		if (root->right != rlist)
1917e65d58a0SDoug Moore 			gap_end = start;
191885b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_left(header, root, llist);
191985b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
192085b7bedbSDoug Moore 	} else if (rlist != header) {
19219f701172SKonstantin Belousov 		root = rlist;
19229f701172SKonstantin Belousov 		rlist = root->left;
192385b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_pred(header, root, llist);
192485b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_right(header, root, rlist);
19259f701172SKonstantin Belousov 	} else {
19269f701172SKonstantin Belousov 		root = llist;
19279f701172SKonstantin Belousov 		llist = root->right;
192885b7bedbSDoug Moore 		max_free_left = vm_map_splay_merge_left(header, root, llist);
192985b7bedbSDoug Moore 		max_free_right = vm_map_splay_merge_succ(header, root, rlist);
19300164e057SAlan Cox 	}
193185b7bedbSDoug Moore 	root->max_free = vm_size_max(max_free_left, max_free_right);
193285b7bedbSDoug Moore 	map->root = root;
19339f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1934e65d58a0SDoug Moore 	if (length <= gap_end - start)
19359f701172SKonstantin Belousov 		return (start);
19360164e057SAlan Cox 
19370164e057SAlan Cox 	/* With max_free, can immediately tell if no solution. */
1938c1ad5342SDoug Moore 	if (root->right == header || length > root->right->max_free)
19399f701172SKonstantin Belousov 		return (vm_map_max(map) - length + 1);
19400164e057SAlan Cox 
19410164e057SAlan Cox 	/*
19429f701172SKonstantin Belousov 	 * Splay for the least large-enough gap in the right subtree.
19430164e057SAlan Cox 	 */
194485b7bedbSDoug Moore 	llist = rlist = header;
19459f701172SKonstantin Belousov 	for (left_length = 0;;
19465a0879daSDoug Moore 	    left_length = vm_map_entry_max_free_left(root, llist)) {
19479f701172SKonstantin Belousov 		if (length <= left_length)
1948c1ad5342SDoug Moore 			SPLAY_LEFT_STEP(root, y, llist, rlist,
19495a0879daSDoug Moore 			    length <= vm_map_entry_max_free_left(y, llist));
19509f701172SKonstantin Belousov 		else
1951c1ad5342SDoug Moore 			SPLAY_RIGHT_STEP(root, y, llist, rlist,
19525a0879daSDoug Moore 			    length > vm_map_entry_max_free_left(y, root));
19539f701172SKonstantin Belousov 		if (root == NULL)
19549f701172SKonstantin Belousov 			break;
19550164e057SAlan Cox 	}
19569f701172SKonstantin Belousov 	root = llist;
19579f701172SKonstantin Belousov 	llist = root->right;
195885b7bedbSDoug Moore 	max_free_left = vm_map_splay_merge_left(header, root, llist);
195985b7bedbSDoug Moore 	if (rlist == header) {
196085b7bedbSDoug Moore 		root->max_free = vm_size_max(max_free_left,
196185b7bedbSDoug Moore 		    vm_map_splay_merge_succ(header, root, rlist));
196285b7bedbSDoug Moore 	} else {
19635a0879daSDoug Moore 		y = rlist;
19649f701172SKonstantin Belousov 		rlist = y->left;
196585b7bedbSDoug Moore 		y->max_free = vm_size_max(
196685b7bedbSDoug Moore 		    vm_map_splay_merge_pred(root, y, root),
196785b7bedbSDoug Moore 		    vm_map_splay_merge_right(header, y, rlist));
196885b7bedbSDoug Moore 		root->max_free = vm_size_max(max_free_left, y->max_free);
19699f701172SKonstantin Belousov 	}
197085b7bedbSDoug Moore 	map->root = root;
19719f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
19729f701172SKonstantin Belousov 	return (root->end);
1973df8bae1dSRodney W. Grimes }
1974df8bae1dSRodney W. Grimes 
1975d239bd3cSKonstantin Belousov int
1976d239bd3cSKonstantin Belousov vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1977b8ca4ef2SAlan Cox     vm_offset_t start, vm_size_t length, vm_prot_t prot,
1978d239bd3cSKonstantin Belousov     vm_prot_t max, int cow)
1979d239bd3cSKonstantin Belousov {
1980b8ca4ef2SAlan Cox 	vm_offset_t end;
1981d239bd3cSKonstantin Belousov 	int result;
1982d239bd3cSKonstantin Belousov 
1983d239bd3cSKonstantin Belousov 	end = start + length;
19844648ba0aSKonstantin Belousov 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
19854648ba0aSKonstantin Belousov 	    object == NULL,
19864648ba0aSKonstantin Belousov 	    ("vm_map_fixed: non-NULL backing object for stack"));
1987897d81a0SKonstantin Belousov 	vm_map_lock(map);
1988d239bd3cSKonstantin Belousov 	VM_MAP_RANGE_CHECK(map, start, end);
1989e8f77c20SKonstantin Belousov 	if ((cow & MAP_CHECK_EXCL) == 0) {
1990e8f77c20SKonstantin Belousov 		result = vm_map_delete(map, start, end);
1991e8f77c20SKonstantin Belousov 		if (result != KERN_SUCCESS)
1992e8f77c20SKonstantin Belousov 			goto out;
1993e8f77c20SKonstantin Belousov 	}
19944648ba0aSKonstantin Belousov 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
19954648ba0aSKonstantin Belousov 		result = vm_map_stack_locked(map, start, length, sgrowsiz,
19964648ba0aSKonstantin Belousov 		    prot, max, cow);
19974648ba0aSKonstantin Belousov 	} else {
19984648ba0aSKonstantin Belousov 		result = vm_map_insert(map, object, offset, start, end,
19994648ba0aSKonstantin Belousov 		    prot, max, cow);
20004648ba0aSKonstantin Belousov 	}
2001e8f77c20SKonstantin Belousov out:
2002d239bd3cSKonstantin Belousov 	vm_map_unlock(map);
2003d239bd3cSKonstantin Belousov 	return (result);
2004d239bd3cSKonstantin Belousov }
2005d239bd3cSKonstantin Belousov 
2006fa50a355SKonstantin Belousov static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
2007fa50a355SKonstantin Belousov static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
2008fa50a355SKonstantin Belousov 
2009fa50a355SKonstantin Belousov static int cluster_anon = 1;
2010fa50a355SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
2011fa50a355SKonstantin Belousov     &cluster_anon, 0,
2012484e9d03SKonstantin Belousov     "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
2013484e9d03SKonstantin Belousov 
2014484e9d03SKonstantin Belousov static bool
2015484e9d03SKonstantin Belousov clustering_anon_allowed(vm_offset_t addr)
2016484e9d03SKonstantin Belousov {
2017484e9d03SKonstantin Belousov 
2018484e9d03SKonstantin Belousov 	switch (cluster_anon) {
2019484e9d03SKonstantin Belousov 	case 0:
2020484e9d03SKonstantin Belousov 		return (false);
2021484e9d03SKonstantin Belousov 	case 1:
2022484e9d03SKonstantin Belousov 		return (addr == 0);
2023484e9d03SKonstantin Belousov 	case 2:
2024484e9d03SKonstantin Belousov 	default:
2025484e9d03SKonstantin Belousov 		return (true);
2026484e9d03SKonstantin Belousov 	}
2027484e9d03SKonstantin Belousov }
2028fa50a355SKonstantin Belousov 
2029fa50a355SKonstantin Belousov static long aslr_restarts;
2030fa50a355SKonstantin Belousov SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
2031fa50a355SKonstantin Belousov     &aslr_restarts, 0,
2032fa50a355SKonstantin Belousov     "Number of aslr failures");
2033fa50a355SKonstantin Belousov 
2034df8bae1dSRodney W. Grimes /*
2035fec29688SAlan Cox  * Searches for the specified amount of free space in the given map with the
2036fec29688SAlan Cox  * specified alignment.  Performs an address-ordered, first-fit search from
2037fec29688SAlan Cox  * the given address "*addr", with an optional upper bound "max_addr".  If the
2038fec29688SAlan Cox  * parameter "alignment" is zero, then the alignment is computed from the
2039fec29688SAlan Cox  * given (object, offset) pair so as to enable the greatest possible use of
2040fec29688SAlan Cox  * superpage mappings.  Returns KERN_SUCCESS and the address of the free space
2041fec29688SAlan Cox  * in "*addr" if successful.  Otherwise, returns KERN_NO_SPACE.
2042fec29688SAlan Cox  *
2043fec29688SAlan Cox  * The map must be locked.  Initially, there must be at least "length" bytes
2044fec29688SAlan Cox  * of free space at the given address.
2045fec29688SAlan Cox  */
2046fec29688SAlan Cox static int
2047fec29688SAlan Cox vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
2048fec29688SAlan Cox     vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr,
2049fec29688SAlan Cox     vm_offset_t alignment)
2050fec29688SAlan Cox {
2051fec29688SAlan Cox 	vm_offset_t aligned_addr, free_addr;
2052fec29688SAlan Cox 
2053fec29688SAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
2054fec29688SAlan Cox 	free_addr = *addr;
20559f701172SKonstantin Belousov 	KASSERT(free_addr == vm_map_findspace(map, free_addr, length),
2056e65d58a0SDoug Moore 	    ("caller failed to provide space %#jx at address %p",
2057e65d58a0SDoug Moore 	     (uintmax_t)length, (void *)free_addr));
2058fec29688SAlan Cox 	for (;;) {
2059fec29688SAlan Cox 		/*
2060fec29688SAlan Cox 		 * At the start of every iteration, the free space at address
2061fec29688SAlan Cox 		 * "*addr" is at least "length" bytes.
2062fec29688SAlan Cox 		 */
2063fec29688SAlan Cox 		if (alignment == 0)
2064fec29688SAlan Cox 			pmap_align_superpage(object, offset, addr, length);
2065fec29688SAlan Cox 		else if ((*addr & (alignment - 1)) != 0) {
2066fec29688SAlan Cox 			*addr &= ~(alignment - 1);
2067fec29688SAlan Cox 			*addr += alignment;
2068fec29688SAlan Cox 		}
2069fec29688SAlan Cox 		aligned_addr = *addr;
2070fec29688SAlan Cox 		if (aligned_addr == free_addr) {
2071fec29688SAlan Cox 			/*
2072fec29688SAlan Cox 			 * Alignment did not change "*addr", so "*addr" must
2073fec29688SAlan Cox 			 * still provide sufficient free space.
2074fec29688SAlan Cox 			 */
2075fec29688SAlan Cox 			return (KERN_SUCCESS);
2076fec29688SAlan Cox 		}
2077fec29688SAlan Cox 
2078fec29688SAlan Cox 		/*
2079fec29688SAlan Cox 		 * Test for address wrap on "*addr".  A wrapped "*addr" could
2080fec29688SAlan Cox 		 * be a valid address, in which case vm_map_findspace() cannot
2081fec29688SAlan Cox 		 * be relied upon to fail.
2082fec29688SAlan Cox 		 */
20839f701172SKonstantin Belousov 		if (aligned_addr < free_addr)
20849f701172SKonstantin Belousov 			return (KERN_NO_SPACE);
20859f701172SKonstantin Belousov 		*addr = vm_map_findspace(map, aligned_addr, length);
20869f701172SKonstantin Belousov 		if (*addr + length > vm_map_max(map) ||
2087fec29688SAlan Cox 		    (max_addr != 0 && *addr + length > max_addr))
2088fec29688SAlan Cox 			return (KERN_NO_SPACE);
2089fec29688SAlan Cox 		free_addr = *addr;
2090fec29688SAlan Cox 		if (free_addr == aligned_addr) {
2091fec29688SAlan Cox 			/*
2092fec29688SAlan Cox 			 * If a successful call to vm_map_findspace() did not
2093fec29688SAlan Cox 			 * change "*addr", then "*addr" must still be aligned
2094fec29688SAlan Cox 			 * and provide sufficient free space.
2095fec29688SAlan Cox 			 */
2096fec29688SAlan Cox 			return (KERN_SUCCESS);
2097fec29688SAlan Cox 		}
2098fec29688SAlan Cox 	}
2099fec29688SAlan Cox }
2100fec29688SAlan Cox 
21017a9f2da3SKonstantin Belousov int
21027a9f2da3SKonstantin Belousov vm_map_find_aligned(vm_map_t map, vm_offset_t *addr, vm_size_t length,
21037a9f2da3SKonstantin Belousov     vm_offset_t max_addr, vm_offset_t alignment)
21047a9f2da3SKonstantin Belousov {
21057a9f2da3SKonstantin Belousov 	/* XXXKIB ASLR eh ? */
21067a9f2da3SKonstantin Belousov 	*addr = vm_map_findspace(map, *addr, length);
21077a9f2da3SKonstantin Belousov 	if (*addr + length > vm_map_max(map) ||
21087a9f2da3SKonstantin Belousov 	    (max_addr != 0 && *addr + length > max_addr))
21097a9f2da3SKonstantin Belousov 		return (KERN_NO_SPACE);
21107a9f2da3SKonstantin Belousov 	return (vm_map_alignspace(map, NULL, 0, addr, length, max_addr,
21117a9f2da3SKonstantin Belousov 	    alignment));
21127a9f2da3SKonstantin Belousov }
21137a9f2da3SKonstantin Belousov 
2114fec29688SAlan Cox /*
2115df8bae1dSRodney W. Grimes  *	vm_map_find finds an unallocated region in the target address
2116df8bae1dSRodney W. Grimes  *	map with the given length.  The search is defined to be
2117df8bae1dSRodney W. Grimes  *	first-fit from the specified address; the region found is
2118df8bae1dSRodney W. Grimes  *	returned in the same parameter.
2119df8bae1dSRodney W. Grimes  *
21202aaeadf8SMatthew Dillon  *	If object is non-NULL, ref count must be bumped by caller
21212aaeadf8SMatthew Dillon  *	prior to making call to account for the new entry.
2122df8bae1dSRodney W. Grimes  */
2123df8bae1dSRodney W. Grimes int
2124b9dcd593SBruce Evans vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
2125b9dcd593SBruce Evans 	    vm_offset_t *addr,	/* IN/OUT */
2126edb572a3SJohn Baldwin 	    vm_size_t length, vm_offset_t max_addr, int find_space,
2127edb572a3SJohn Baldwin 	    vm_prot_t prot, vm_prot_t max, int cow)
2128df8bae1dSRodney W. Grimes {
2129fa50a355SKonstantin Belousov 	vm_offset_t alignment, curr_min_addr, min_addr;
2130fa50a355SKonstantin Belousov 	int gap, pidx, rv, try;
2131fa50a355SKonstantin Belousov 	bool cluster, en_aslr, update_anon;
2132df8bae1dSRodney W. Grimes 
21334648ba0aSKonstantin Belousov 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
21344648ba0aSKonstantin Belousov 	    object == NULL,
21354648ba0aSKonstantin Belousov 	    ("vm_map_find: non-NULL backing object for stack"));
2136ea7e7006SKonstantin Belousov 	MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE &&
2137ea7e7006SKonstantin Belousov 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0));
2138ff74a3faSJohn Baldwin 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
2139ff74a3faSJohn Baldwin 	    (object->flags & OBJ_COLORED) == 0))
2140ff74a3faSJohn Baldwin 		find_space = VMFS_ANY_SPACE;
21415aa60b6fSJohn Baldwin 	if (find_space >> 8 != 0) {
21425aa60b6fSJohn Baldwin 		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
21435aa60b6fSJohn Baldwin 		alignment = (vm_offset_t)1 << (find_space >> 8);
21445aa60b6fSJohn Baldwin 	} else
21455aa60b6fSJohn Baldwin 		alignment = 0;
2146fa50a355SKonstantin Belousov 	en_aslr = (map->flags & MAP_ASLR) != 0;
2147484e9d03SKonstantin Belousov 	update_anon = cluster = clustering_anon_allowed(*addr) &&
2148fa50a355SKonstantin Belousov 	    (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
2149fa50a355SKonstantin Belousov 	    find_space != VMFS_NO_SPACE && object == NULL &&
2150fa50a355SKonstantin Belousov 	    (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
2151fa50a355SKonstantin Belousov 	    MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
2152fa50a355SKonstantin Belousov 	curr_min_addr = min_addr = *addr;
2153fa50a355SKonstantin Belousov 	if (en_aslr && min_addr == 0 && !cluster &&
2154fa50a355SKonstantin Belousov 	    find_space != VMFS_NO_SPACE &&
2155fa50a355SKonstantin Belousov 	    (map->flags & MAP_ASLR_IGNSTART) != 0)
2156fa50a355SKonstantin Belousov 		curr_min_addr = min_addr = vm_map_min(map);
2157fa50a355SKonstantin Belousov 	try = 0;
21584d572bb3SAlan Cox 	vm_map_lock(map);
2159fa50a355SKonstantin Belousov 	if (cluster) {
2160fa50a355SKonstantin Belousov 		curr_min_addr = map->anon_loc;
2161fa50a355SKonstantin Belousov 		if (curr_min_addr == 0)
2162fa50a355SKonstantin Belousov 			cluster = false;
2163fa50a355SKonstantin Belousov 	}
216426c538ffSAlan Cox 	if (find_space != VMFS_NO_SPACE) {
2165fec29688SAlan Cox 		KASSERT(find_space == VMFS_ANY_SPACE ||
2166fec29688SAlan Cox 		    find_space == VMFS_OPTIMAL_SPACE ||
2167fec29688SAlan Cox 		    find_space == VMFS_SUPER_SPACE ||
2168fec29688SAlan Cox 		    alignment != 0, ("unexpected VMFS flag"));
2169fec29688SAlan Cox again:
2170fa50a355SKonstantin Belousov 		/*
2171fa50a355SKonstantin Belousov 		 * When creating an anonymous mapping, try clustering
2172fa50a355SKonstantin Belousov 		 * with an existing anonymous mapping first.
2173fa50a355SKonstantin Belousov 		 *
2174fa50a355SKonstantin Belousov 		 * We make up to two attempts to find address space
2175fa50a355SKonstantin Belousov 		 * for a given find_space value. The first attempt may
2176fa50a355SKonstantin Belousov 		 * apply randomization or may cluster with an existing
2177fa50a355SKonstantin Belousov 		 * anonymous mapping. If this first attempt fails,
2178fa50a355SKonstantin Belousov 		 * perform a first-fit search of the available address
2179fa50a355SKonstantin Belousov 		 * space.
2180fa50a355SKonstantin Belousov 		 *
2181fa50a355SKonstantin Belousov 		 * If all tries failed, and find_space is
2182fa50a355SKonstantin Belousov 		 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
2183fa50a355SKonstantin Belousov 		 * Again enable clustering and randomization.
2184fa50a355SKonstantin Belousov 		 */
2185fa50a355SKonstantin Belousov 		try++;
2186fa50a355SKonstantin Belousov 		MPASS(try <= 2);
2187fa50a355SKonstantin Belousov 
2188fa50a355SKonstantin Belousov 		if (try == 2) {
2189fa50a355SKonstantin Belousov 			/*
2190fa50a355SKonstantin Belousov 			 * Second try: we failed either to find a
2191fa50a355SKonstantin Belousov 			 * suitable region for randomizing the
2192fa50a355SKonstantin Belousov 			 * allocation, or to cluster with an existing
2193fa50a355SKonstantin Belousov 			 * mapping.  Retry with free run.
2194fa50a355SKonstantin Belousov 			 */
2195fa50a355SKonstantin Belousov 			curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
2196fa50a355SKonstantin Belousov 			    vm_map_min(map) : min_addr;
2197fa50a355SKonstantin Belousov 			atomic_add_long(&aslr_restarts, 1);
2198fa50a355SKonstantin Belousov 		}
2199fa50a355SKonstantin Belousov 
2200fa50a355SKonstantin Belousov 		if (try == 1 && en_aslr && !cluster) {
2201fa50a355SKonstantin Belousov 			/*
2202fa50a355SKonstantin Belousov 			 * Find space for allocation, including
2203fa50a355SKonstantin Belousov 			 * gap needed for later randomization.
2204fa50a355SKonstantin Belousov 			 */
2205fa50a355SKonstantin Belousov 			pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
2206fa50a355SKonstantin Belousov 			    (find_space == VMFS_SUPER_SPACE || find_space ==
2207fa50a355SKonstantin Belousov 			    VMFS_OPTIMAL_SPACE) ? 1 : 0;
2208fa50a355SKonstantin Belousov 			gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
2209fa50a355SKonstantin Belousov 			    (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
2210fa50a355SKonstantin Belousov 			    aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
22119f701172SKonstantin Belousov 			*addr = vm_map_findspace(map, curr_min_addr,
22129f701172SKonstantin Belousov 			    length + gap * pagesizes[pidx]);
22139f701172SKonstantin Belousov 			if (*addr + length + gap * pagesizes[pidx] >
2214a5a02ef4SKonstantin Belousov 			    vm_map_max(map))
2215fa50a355SKonstantin Belousov 				goto again;
2216fa50a355SKonstantin Belousov 			/* And randomize the start address. */
2217fa50a355SKonstantin Belousov 			*addr += (arc4random() % gap) * pagesizes[pidx];
22185019dac9SKonstantin Belousov 			if (max_addr != 0 && *addr + length > max_addr)
22195019dac9SKonstantin Belousov 				goto again;
22209f701172SKonstantin Belousov 		} else {
22219f701172SKonstantin Belousov 			*addr = vm_map_findspace(map, curr_min_addr, length);
22229f701172SKonstantin Belousov 			if (*addr + length > vm_map_max(map) ||
2223edb572a3SJohn Baldwin 			    (max_addr != 0 && *addr + length > max_addr)) {
2224fa50a355SKonstantin Belousov 				if (cluster) {
2225fa50a355SKonstantin Belousov 					cluster = false;
2226fa50a355SKonstantin Belousov 					MPASS(try == 1);
2227fa50a355SKonstantin Belousov 					goto again;
2228fa50a355SKonstantin Belousov 				}
2229fec29688SAlan Cox 				rv = KERN_NO_SPACE;
2230fec29688SAlan Cox 				goto done;
2231fec29688SAlan Cox 			}
22329f701172SKonstantin Belousov 		}
2233fa50a355SKonstantin Belousov 
2234fec29688SAlan Cox 		if (find_space != VMFS_ANY_SPACE &&
2235fec29688SAlan Cox 		    (rv = vm_map_alignspace(map, object, offset, addr, length,
2236fec29688SAlan Cox 		    max_addr, alignment)) != KERN_SUCCESS) {
2237ff74a3faSJohn Baldwin 			if (find_space == VMFS_OPTIMAL_SPACE) {
2238ff74a3faSJohn Baldwin 				find_space = VMFS_ANY_SPACE;
2239fa50a355SKonstantin Belousov 				curr_min_addr = min_addr;
2240fa50a355SKonstantin Belousov 				cluster = update_anon;
2241fa50a355SKonstantin Belousov 				try = 0;
2242ff74a3faSJohn Baldwin 				goto again;
2243ff74a3faSJohn Baldwin 			}
2244fec29688SAlan Cox 			goto done;
2245df8bae1dSRodney W. Grimes 		}
2246ea7e7006SKonstantin Belousov 	} else if ((cow & MAP_REMAP) != 0) {
22470f1e6ec5SMark Johnston 		if (!vm_map_range_valid(map, *addr, *addr + length)) {
2248ea7e7006SKonstantin Belousov 			rv = KERN_INVALID_ADDRESS;
2249ea7e7006SKonstantin Belousov 			goto done;
2250ea7e7006SKonstantin Belousov 		}
2251e8f77c20SKonstantin Belousov 		rv = vm_map_delete(map, *addr, *addr + length);
2252e8f77c20SKonstantin Belousov 		if (rv != KERN_SUCCESS)
2253e8f77c20SKonstantin Belousov 			goto done;
2254df8bae1dSRodney W. Grimes 	}
22554648ba0aSKonstantin Belousov 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
2256fec29688SAlan Cox 		rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot,
2257fec29688SAlan Cox 		    max, cow);
22584648ba0aSKonstantin Belousov 	} else {
2259fec29688SAlan Cox 		rv = vm_map_insert(map, object, offset, *addr, *addr + length,
2260fec29688SAlan Cox 		    prot, max, cow);
22614648ba0aSKonstantin Belousov 	}
2262fa50a355SKonstantin Belousov 	if (rv == KERN_SUCCESS && update_anon)
2263fa50a355SKonstantin Belousov 		map->anon_loc = *addr + length;
2264fec29688SAlan Cox done:
2265df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2266fec29688SAlan Cox 	return (rv);
2267df8bae1dSRodney W. Grimes }
2268df8bae1dSRodney W. Grimes 
2269e8502826SKonstantin Belousov /*
2270e8502826SKonstantin Belousov  *	vm_map_find_min() is a variant of vm_map_find() that takes an
2271e8502826SKonstantin Belousov  *	additional parameter (min_addr) and treats the given address
2272e8502826SKonstantin Belousov  *	(*addr) differently.  Specifically, it treats *addr as a hint
2273e8502826SKonstantin Belousov  *	and not as the minimum address where the mapping is created.
2274e8502826SKonstantin Belousov  *
2275e8502826SKonstantin Belousov  *	This function works in two phases.  First, it tries to
2276e8502826SKonstantin Belousov  *	allocate above the hint.  If that fails and the hint is
2277e8502826SKonstantin Belousov  *	greater than min_addr, it performs a second pass, replacing
2278e8502826SKonstantin Belousov  *	the hint with min_addr as the minimum address for the
2279e8502826SKonstantin Belousov  *	allocation.
2280e8502826SKonstantin Belousov  */
22816a97a3f7SKonstantin Belousov int
22826a97a3f7SKonstantin Belousov vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
22836a97a3f7SKonstantin Belousov     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
22846a97a3f7SKonstantin Belousov     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
22856a97a3f7SKonstantin Belousov     int cow)
22866a97a3f7SKonstantin Belousov {
22876a97a3f7SKonstantin Belousov 	vm_offset_t hint;
22886a97a3f7SKonstantin Belousov 	int rv;
22896a97a3f7SKonstantin Belousov 
22906a97a3f7SKonstantin Belousov 	hint = *addr;
22916a97a3f7SKonstantin Belousov 	for (;;) {
22926a97a3f7SKonstantin Belousov 		rv = vm_map_find(map, object, offset, addr, length, max_addr,
22936a97a3f7SKonstantin Belousov 		    find_space, prot, max, cow);
22946a97a3f7SKonstantin Belousov 		if (rv == KERN_SUCCESS || min_addr >= hint)
22956a97a3f7SKonstantin Belousov 			return (rv);
22967683ad70SKonstantin Belousov 		*addr = hint = min_addr;
22976a97a3f7SKonstantin Belousov 	}
22986a97a3f7SKonstantin Belousov }
22996a97a3f7SKonstantin Belousov 
230092e78c10SAlan Cox /*
230192e78c10SAlan Cox  * A map entry with any of the following flags set must not be merged with
230292e78c10SAlan Cox  * another entry.
230392e78c10SAlan Cox  */
230492e78c10SAlan Cox #define	MAP_ENTRY_NOMERGE_MASK	(MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \
230578022527SKonstantin Belousov 	    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_VN_EXEC)
230692e78c10SAlan Cox 
230707424462SKonstantin Belousov static bool
230807424462SKonstantin Belousov vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry)
230907424462SKonstantin Belousov {
231007424462SKonstantin Belousov 
231192e78c10SAlan Cox 	KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 ||
231292e78c10SAlan Cox 	    (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0,
231392e78c10SAlan Cox 	    ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable",
231492e78c10SAlan Cox 	    prev, entry));
231507424462SKonstantin Belousov 	return (prev->end == entry->start &&
231607424462SKonstantin Belousov 	    prev->object.vm_object == entry->object.vm_object &&
231707424462SKonstantin Belousov 	    (prev->object.vm_object == NULL ||
231892e78c10SAlan Cox 	    prev->offset + (prev->end - prev->start) == entry->offset) &&
231907424462SKonstantin Belousov 	    prev->eflags == entry->eflags &&
232007424462SKonstantin Belousov 	    prev->protection == entry->protection &&
232107424462SKonstantin Belousov 	    prev->max_protection == entry->max_protection &&
232207424462SKonstantin Belousov 	    prev->inheritance == entry->inheritance &&
232307424462SKonstantin Belousov 	    prev->wired_count == entry->wired_count &&
232407424462SKonstantin Belousov 	    prev->cred == entry->cred);
232507424462SKonstantin Belousov }
232607424462SKonstantin Belousov 
232707424462SKonstantin Belousov static void
232807424462SKonstantin Belousov vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry)
232907424462SKonstantin Belousov {
233007424462SKonstantin Belousov 
233107424462SKonstantin Belousov 	/*
233292e78c10SAlan Cox 	 * If the backing object is a vnode object, vm_object_deallocate()
233392e78c10SAlan Cox 	 * calls vrele().  However, vrele() does not lock the vnode because
233492e78c10SAlan Cox 	 * the vnode has additional references.  Thus, the map lock can be
233592e78c10SAlan Cox 	 * kept without causing a lock-order reversal with the vnode lock.
233607424462SKonstantin Belousov 	 *
233792e78c10SAlan Cox 	 * Since we count the number of virtual page mappings in
233892e78c10SAlan Cox 	 * object->un_pager.vnp.writemappings, the writemappings value
233992e78c10SAlan Cox 	 * should not be adjusted when the entry is disposed of.
234007424462SKonstantin Belousov 	 */
234107424462SKonstantin Belousov 	if (entry->object.vm_object != NULL)
234207424462SKonstantin Belousov 		vm_object_deallocate(entry->object.vm_object);
234307424462SKonstantin Belousov 	if (entry->cred != NULL)
234407424462SKonstantin Belousov 		crfree(entry->cred);
234507424462SKonstantin Belousov 	vm_map_entry_dispose(map, entry);
234607424462SKonstantin Belousov }
234707424462SKonstantin Belousov 
2348df8bae1dSRodney W. Grimes /*
234983ea714fSDoug Moore  *	vm_map_try_merge_entries:
235067bf6868SJohn Dyson  *
235183ea714fSDoug Moore  *	Compare the given map entry to its predecessor, and merge its precessor
235283ea714fSDoug Moore  *	into it if possible.  The entry remains valid, and may be extended.
235383ea714fSDoug Moore  *	The predecessor may be deleted.
23544e71e795SMatthew Dillon  *
23554e71e795SMatthew Dillon  *	The map must be locked.
2356df8bae1dSRodney W. Grimes  */
23570afcd3afSAlan Cox void
23582767c9f3SDoug Moore vm_map_try_merge_entries(vm_map_t map, vm_map_entry_t prev_entry,
23592767c9f3SDoug Moore     vm_map_entry_t entry)
2360df8bae1dSRodney W. Grimes {
2361df8bae1dSRodney W. Grimes 
236283ea714fSDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
236383ea714fSDoug Moore 	if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 &&
23642767c9f3SDoug Moore 	    vm_map_mergeable_neighbors(prev_entry, entry)) {
23652767c9f3SDoug Moore 		vm_map_entry_unlink(map, prev_entry, UNLINK_MERGE_NEXT);
23662767c9f3SDoug Moore 		vm_map_merged_neighbor_dispose(map, prev_entry);
2367308c24baSJohn Dyson 	}
2368df8bae1dSRodney W. Grimes }
236992e78c10SAlan Cox 
2370df8bae1dSRodney W. Grimes /*
2371af1d6d6aSDoug Moore  *	vm_map_entry_back:
2372af1d6d6aSDoug Moore  *
2373af1d6d6aSDoug Moore  *	Allocate an object to back a map entry.
2374af1d6d6aSDoug Moore  */
2375af1d6d6aSDoug Moore static inline void
2376af1d6d6aSDoug Moore vm_map_entry_back(vm_map_entry_t entry)
2377af1d6d6aSDoug Moore {
2378af1d6d6aSDoug Moore 	vm_object_t object;
2379af1d6d6aSDoug Moore 
2380af1d6d6aSDoug Moore 	KASSERT(entry->object.vm_object == NULL,
2381af1d6d6aSDoug Moore 	    ("map entry %p has backing object", entry));
2382af1d6d6aSDoug Moore 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
2383af1d6d6aSDoug Moore 	    ("map entry %p is a submap", entry));
238467388836SKonstantin Belousov 	object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL,
238567388836SKonstantin Belousov 	    entry->cred, entry->end - entry->start);
2386af1d6d6aSDoug Moore 	entry->object.vm_object = object;
2387af1d6d6aSDoug Moore 	entry->offset = 0;
2388af1d6d6aSDoug Moore 	entry->cred = NULL;
2389af1d6d6aSDoug Moore }
2390af1d6d6aSDoug Moore 
2391af1d6d6aSDoug Moore /*
2392af1d6d6aSDoug Moore  *	vm_map_entry_charge_object
2393af1d6d6aSDoug Moore  *
2394af1d6d6aSDoug Moore  *	If there is no object backing this entry, create one.  Otherwise, if
2395af1d6d6aSDoug Moore  *	the entry has cred, give it to the backing object.
2396af1d6d6aSDoug Moore  */
2397af1d6d6aSDoug Moore static inline void
2398af1d6d6aSDoug Moore vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
2399af1d6d6aSDoug Moore {
2400af1d6d6aSDoug Moore 
2401af1d6d6aSDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
2402af1d6d6aSDoug Moore 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
2403af1d6d6aSDoug Moore 	    ("map entry %p is a submap", entry));
2404af1d6d6aSDoug Moore 	if (entry->object.vm_object == NULL && !map->system_map &&
2405af1d6d6aSDoug Moore 	    (entry->eflags & MAP_ENTRY_GUARD) == 0)
2406af1d6d6aSDoug Moore 		vm_map_entry_back(entry);
2407af1d6d6aSDoug Moore 	else if (entry->object.vm_object != NULL &&
2408af1d6d6aSDoug Moore 	    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
2409af1d6d6aSDoug Moore 	    entry->cred != NULL) {
2410af1d6d6aSDoug Moore 		VM_OBJECT_WLOCK(entry->object.vm_object);
2411af1d6d6aSDoug Moore 		KASSERT(entry->object.vm_object->cred == NULL,
2412af1d6d6aSDoug Moore 		    ("OVERCOMMIT: %s: both cred e %p", __func__, entry));
2413af1d6d6aSDoug Moore 		entry->object.vm_object->cred = entry->cred;
2414af1d6d6aSDoug Moore 		entry->object.vm_object->charge = entry->end - entry->start;
2415af1d6d6aSDoug Moore 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
2416af1d6d6aSDoug Moore 		entry->cred = NULL;
2417af1d6d6aSDoug Moore 	}
2418af1d6d6aSDoug Moore }
2419af1d6d6aSDoug Moore 
2420af1d6d6aSDoug Moore /*
2421037c0994SDoug Moore  *	vm_map_entry_clone
2422037c0994SDoug Moore  *
2423037c0994SDoug Moore  *	Create a duplicate map entry for clipping.
2424037c0994SDoug Moore  */
2425037c0994SDoug Moore static vm_map_entry_t
2426037c0994SDoug Moore vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry)
2427037c0994SDoug Moore {
2428037c0994SDoug Moore 	vm_map_entry_t new_entry;
2429037c0994SDoug Moore 
2430037c0994SDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
2431037c0994SDoug Moore 
2432037c0994SDoug Moore 	/*
2433037c0994SDoug Moore 	 * Create a backing object now, if none exists, so that more individual
2434037c0994SDoug Moore 	 * objects won't be created after the map entry is split.
2435037c0994SDoug Moore 	 */
2436037c0994SDoug Moore 	vm_map_entry_charge_object(map, entry);
2437037c0994SDoug Moore 
2438037c0994SDoug Moore 	/* Clone the entry. */
2439037c0994SDoug Moore 	new_entry = vm_map_entry_create(map);
2440037c0994SDoug Moore 	*new_entry = *entry;
2441037c0994SDoug Moore 	if (new_entry->cred != NULL)
2442037c0994SDoug Moore 		crhold(entry->cred);
2443037c0994SDoug Moore 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2444037c0994SDoug Moore 		vm_object_reference(new_entry->object.vm_object);
2445037c0994SDoug Moore 		vm_map_entry_set_vnode_text(new_entry, true);
2446037c0994SDoug Moore 		/*
2447037c0994SDoug Moore 		 * The object->un_pager.vnp.writemappings for the object of
2448037c0994SDoug Moore 		 * MAP_ENTRY_WRITECNT type entry shall be kept as is here.  The
2449037c0994SDoug Moore 		 * virtual pages are re-distributed among the clipped entries,
2450037c0994SDoug Moore 		 * so the sum is left the same.
2451037c0994SDoug Moore 		 */
2452037c0994SDoug Moore 	}
2453037c0994SDoug Moore 	return (new_entry);
2454037c0994SDoug Moore }
2455037c0994SDoug Moore 
2456037c0994SDoug Moore /*
2457df8bae1dSRodney W. Grimes  *	vm_map_clip_start:	[ internal use only ]
2458df8bae1dSRodney W. Grimes  *
2459df8bae1dSRodney W. Grimes  *	Asserts that the given entry begins at or after
2460df8bae1dSRodney W. Grimes  *	the specified address; if necessary,
2461df8bae1dSRodney W. Grimes  *	it splits the entry into two.
2462df8bae1dSRodney W. Grimes  */
2463e2e80fb3SKonstantin Belousov static int
2464e2e80fb3SKonstantin Belousov vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr)
2465df8bae1dSRodney W. Grimes {
2466c0877f10SJohn Dyson 	vm_map_entry_t new_entry;
2467e2e80fb3SKonstantin Belousov 	int bdry_idx;
2468df8bae1dSRodney W. Grimes 
24698a64110eSConrad Meyer 	if (!map->system_map)
24708a64110eSConrad Meyer 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
24718a64110eSConrad Meyer 		    "%s: map %p entry %p start 0x%jx", __func__, map, entry,
2472e2e80fb3SKonstantin Belousov 		    (uintmax_t)startaddr);
24738a64110eSConrad Meyer 
2474e2e80fb3SKonstantin Belousov 	if (startaddr <= entry->start)
2475e2e80fb3SKonstantin Belousov 		return (KERN_SUCCESS);
2476a116b5d3SConrad Meyer 
24773a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
2478e2e80fb3SKonstantin Belousov 	KASSERT(entry->end > startaddr && entry->start < startaddr,
2479a116b5d3SConrad Meyer 	    ("%s: invalid clip of entry %p", __func__, entry));
24803a0916b8SKonstantin Belousov 
2481e2e80fb3SKonstantin Belousov 	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
2482e2e80fb3SKonstantin Belousov 	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
2483e2e80fb3SKonstantin Belousov 	if (bdry_idx != 0) {
2484e2e80fb3SKonstantin Belousov 		if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0)
2485e2e80fb3SKonstantin Belousov 			return (KERN_INVALID_ARGUMENT);
2486e2e80fb3SKonstantin Belousov 	}
2487e2e80fb3SKonstantin Belousov 
2488037c0994SDoug Moore 	new_entry = vm_map_entry_clone(map, entry);
2489df8bae1dSRodney W. Grimes 
24904766eba1SDoug Moore 	/*
24914766eba1SDoug Moore 	 * Split off the front portion.  Insert the new entry BEFORE this one,
24924766eba1SDoug Moore 	 * so that this entry has the specified starting address.
24934766eba1SDoug Moore 	 */
2494e2e80fb3SKonstantin Belousov 	new_entry->end = startaddr;
24959f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
2496e2e80fb3SKonstantin Belousov 	return (KERN_SUCCESS);
2497c0877f10SJohn Dyson }
2498df8bae1dSRodney W. Grimes 
2499df8bae1dSRodney W. Grimes /*
2500c7b23459SDoug Moore  *	vm_map_lookup_clip_start:
2501c7b23459SDoug Moore  *
2502c7b23459SDoug Moore  *	Find the entry at or just after 'start', and clip it if 'start' is in
2503c7b23459SDoug Moore  *	the interior of the entry.  Return entry after 'start', and in
2504c7b23459SDoug Moore  *	prev_entry set the entry before 'start'.
2505c7b23459SDoug Moore  */
2506e2e80fb3SKonstantin Belousov static int
2507c7b23459SDoug Moore vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start,
2508e2e80fb3SKonstantin Belousov     vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry)
2509c7b23459SDoug Moore {
2510c7b23459SDoug Moore 	vm_map_entry_t entry;
2511e2e80fb3SKonstantin Belousov 	int rv;
2512c7b23459SDoug Moore 
25138a64110eSConrad Meyer 	if (!map->system_map)
25148a64110eSConrad Meyer 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
25158a64110eSConrad Meyer 		    "%s: map %p start 0x%jx prev %p", __func__, map,
25168a64110eSConrad Meyer 		    (uintmax_t)start, prev_entry);
25178a64110eSConrad Meyer 
2518c7b23459SDoug Moore 	if (vm_map_lookup_entry(map, start, prev_entry)) {
2519c7b23459SDoug Moore 		entry = *prev_entry;
2520e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_start(map, entry, start);
2521e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
2522e2e80fb3SKonstantin Belousov 			return (rv);
2523c7b23459SDoug Moore 		*prev_entry = vm_map_entry_pred(entry);
2524c7b23459SDoug Moore 	} else
2525c7b23459SDoug Moore 		entry = vm_map_entry_succ(*prev_entry);
2526e2e80fb3SKonstantin Belousov 	*res_entry = entry;
2527e2e80fb3SKonstantin Belousov 	return (KERN_SUCCESS);
2528c7b23459SDoug Moore }
2529c7b23459SDoug Moore 
2530c7b23459SDoug Moore /*
2531df8bae1dSRodney W. Grimes  *	vm_map_clip_end:	[ internal use only ]
2532df8bae1dSRodney W. Grimes  *
2533df8bae1dSRodney W. Grimes  *	Asserts that the given entry ends at or before
2534df8bae1dSRodney W. Grimes  *	the specified address; if necessary,
2535df8bae1dSRodney W. Grimes  *	it splits the entry into two.
2536df8bae1dSRodney W. Grimes  */
2537e2e80fb3SKonstantin Belousov static int
2538e2e80fb3SKonstantin Belousov vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr)
2539df8bae1dSRodney W. Grimes {
2540c0877f10SJohn Dyson 	vm_map_entry_t new_entry;
2541e2e80fb3SKonstantin Belousov 	int bdry_idx;
2542df8bae1dSRodney W. Grimes 
25438a64110eSConrad Meyer 	if (!map->system_map)
25448a64110eSConrad Meyer 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
25458a64110eSConrad Meyer 		    "%s: map %p entry %p end 0x%jx", __func__, map, entry,
2546e2e80fb3SKonstantin Belousov 		    (uintmax_t)endaddr);
25478a64110eSConrad Meyer 
2548e2e80fb3SKonstantin Belousov 	if (endaddr >= entry->end)
2549e2e80fb3SKonstantin Belousov 		return (KERN_SUCCESS);
2550a116b5d3SConrad Meyer 
25513a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
2552e2e80fb3SKonstantin Belousov 	KASSERT(entry->start < endaddr && entry->end > endaddr,
2553a116b5d3SConrad Meyer 	    ("%s: invalid clip of entry %p", __func__, entry));
25543a0916b8SKonstantin Belousov 
2555e2e80fb3SKonstantin Belousov 	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
2556e2e80fb3SKonstantin Belousov 	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
2557e2e80fb3SKonstantin Belousov 	if (bdry_idx != 0) {
2558e2e80fb3SKonstantin Belousov 		if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0)
2559e2e80fb3SKonstantin Belousov 			return (KERN_INVALID_ARGUMENT);
2560e2e80fb3SKonstantin Belousov 	}
2561e2e80fb3SKonstantin Belousov 
2562037c0994SDoug Moore 	new_entry = vm_map_entry_clone(map, entry);
2563df8bae1dSRodney W. Grimes 
25644766eba1SDoug Moore 	/*
25654766eba1SDoug Moore 	 * Split off the back portion.  Insert the new entry AFTER this one,
25664766eba1SDoug Moore 	 * so that this entry has the specified ending address.
25674766eba1SDoug Moore 	 */
2568e2e80fb3SKonstantin Belousov 	new_entry->start = endaddr;
25699f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
2570e2e80fb3SKonstantin Belousov 
2571e2e80fb3SKonstantin Belousov 	return (KERN_SUCCESS);
2572c0877f10SJohn Dyson }
2573df8bae1dSRodney W. Grimes 
2574df8bae1dSRodney W. Grimes /*
2575df8bae1dSRodney W. Grimes  *	vm_map_submap:		[ kernel use only ]
2576df8bae1dSRodney W. Grimes  *
2577df8bae1dSRodney W. Grimes  *	Mark the given range as handled by a subordinate map.
2578df8bae1dSRodney W. Grimes  *
2579df8bae1dSRodney W. Grimes  *	This range must have been created with vm_map_find,
2580df8bae1dSRodney W. Grimes  *	and no other operations may have been performed on this
2581df8bae1dSRodney W. Grimes  *	range prior to calling vm_map_submap.
2582df8bae1dSRodney W. Grimes  *
2583df8bae1dSRodney W. Grimes  *	Only a limited number of operations can be performed
2584df8bae1dSRodney W. Grimes  *	within this rage after calling vm_map_submap:
2585df8bae1dSRodney W. Grimes  *		vm_fault
2586df8bae1dSRodney W. Grimes  *	[Don't try vm_map_copy!]
2587df8bae1dSRodney W. Grimes  *
2588df8bae1dSRodney W. Grimes  *	To remove a submapping, one must first remove the
2589df8bae1dSRodney W. Grimes  *	range from the superior map, and then destroy the
2590df8bae1dSRodney W. Grimes  *	submap (if desired).  [Better yet, don't try it.]
2591df8bae1dSRodney W. Grimes  */
2592df8bae1dSRodney W. Grimes int
25931b40f8c0SMatthew Dillon vm_map_submap(
25941b40f8c0SMatthew Dillon 	vm_map_t map,
25951b40f8c0SMatthew Dillon 	vm_offset_t start,
25961b40f8c0SMatthew Dillon 	vm_offset_t end,
25971b40f8c0SMatthew Dillon 	vm_map_t submap)
2598df8bae1dSRodney W. Grimes {
2599df8bae1dSRodney W. Grimes 	vm_map_entry_t entry;
2600fa50a355SKonstantin Belousov 	int result;
2601fa50a355SKonstantin Belousov 
2602fa50a355SKonstantin Belousov 	result = KERN_INVALID_ARGUMENT;
2603fa50a355SKonstantin Belousov 
2604fa50a355SKonstantin Belousov 	vm_map_lock(submap);
2605fa50a355SKonstantin Belousov 	submap->flags |= MAP_IS_SUB_MAP;
2606fa50a355SKonstantin Belousov 	vm_map_unlock(submap);
2607df8bae1dSRodney W. Grimes 
2608df8bae1dSRodney W. Grimes 	vm_map_lock(map);
2609df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
2610e6bd3a81SMark Johnston 	if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end &&
2611e6bd3a81SMark Johnston 	    (entry->eflags & MAP_ENTRY_COW) == 0 &&
2612e6bd3a81SMark Johnston 	    entry->object.vm_object == NULL) {
2613e2e80fb3SKonstantin Belousov 		result = vm_map_clip_start(map, entry, start);
2614e2e80fb3SKonstantin Belousov 		if (result != KERN_SUCCESS)
2615e2e80fb3SKonstantin Belousov 			goto unlock;
2616e2e80fb3SKonstantin Belousov 		result = vm_map_clip_end(map, entry, end);
2617e2e80fb3SKonstantin Belousov 		if (result != KERN_SUCCESS)
2618e2e80fb3SKonstantin Belousov 			goto unlock;
26192d8acc0fSJohn Dyson 		entry->object.sub_map = submap;
2620afa07f7eSJohn Dyson 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
2621df8bae1dSRodney W. Grimes 		result = KERN_SUCCESS;
2622df8bae1dSRodney W. Grimes 	}
2623e2e80fb3SKonstantin Belousov unlock:
2624df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2625df8bae1dSRodney W. Grimes 
2626fa50a355SKonstantin Belousov 	if (result != KERN_SUCCESS) {
2627fa50a355SKonstantin Belousov 		vm_map_lock(submap);
2628fa50a355SKonstantin Belousov 		submap->flags &= ~MAP_IS_SUB_MAP;
2629fa50a355SKonstantin Belousov 		vm_map_unlock(submap);
2630fa50a355SKonstantin Belousov 	}
2631df8bae1dSRodney W. Grimes 	return (result);
2632df8bae1dSRodney W. Grimes }
2633df8bae1dSRodney W. Grimes 
2634df8bae1dSRodney W. Grimes /*
2635dd05fa19SAlan Cox  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
26361f78f902SAlan Cox  */
26371f78f902SAlan Cox #define	MAX_INIT_PT	96
26381f78f902SAlan Cox 
26391f78f902SAlan Cox /*
26400551c08dSAlan Cox  *	vm_map_pmap_enter:
26410551c08dSAlan Cox  *
2642dd05fa19SAlan Cox  *	Preload the specified map's pmap with mappings to the specified
2643dd05fa19SAlan Cox  *	object's memory-resident pages.  No further physical pages are
2644dd05fa19SAlan Cox  *	allocated, and no further virtual pages are retrieved from secondary
2645dd05fa19SAlan Cox  *	storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
2646dd05fa19SAlan Cox  *	limited number of page mappings are created at the low-end of the
2647dd05fa19SAlan Cox  *	specified address range.  (For this purpose, a superpage mapping
2648dd05fa19SAlan Cox  *	counts as one page mapping.)  Otherwise, all resident pages within
26493453bca8SAlan Cox  *	the specified address range are mapped.
26500551c08dSAlan Cox  */
2651077ec27cSAlan Cox static void
26524da4d293SAlan Cox vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
26530551c08dSAlan Cox     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
26540551c08dSAlan Cox {
26558fece8c3SAlan Cox 	vm_offset_t start;
2656ce142d9eSAlan Cox 	vm_page_t p, p_start;
2657dd05fa19SAlan Cox 	vm_pindex_t mask, psize, threshold, tmpidx;
26580551c08dSAlan Cox 
2659ba8bca61SAlan Cox 	if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
26601f78f902SAlan Cox 		return;
26619af6d512SAttilio Rao 	if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
266289f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
266301381811SJohn Baldwin 		if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
26649af6d512SAttilio Rao 			pmap_object_init_pt(map->pmap, addr, object, pindex,
26659af6d512SAttilio Rao 			    size);
26669af6d512SAttilio Rao 			VM_OBJECT_WUNLOCK(object);
26679af6d512SAttilio Rao 			return;
26689af6d512SAttilio Rao 		}
26699af6d512SAttilio Rao 		VM_OBJECT_LOCK_DOWNGRADE(object);
2670886b9021SJeff Roberson 	} else
2671886b9021SJeff Roberson 		VM_OBJECT_RLOCK(object);
26721f78f902SAlan Cox 
26731f78f902SAlan Cox 	psize = atop(size);
26741f78f902SAlan Cox 	if (psize + pindex > object->size) {
2675ed2f945aSMark Johnston 		if (pindex >= object->size) {
26769af6d512SAttilio Rao 			VM_OBJECT_RUNLOCK(object);
26779af6d512SAttilio Rao 			return;
26789af6d512SAttilio Rao 		}
26791f78f902SAlan Cox 		psize = object->size - pindex;
26801f78f902SAlan Cox 	}
26811f78f902SAlan Cox 
2682ce142d9eSAlan Cox 	start = 0;
2683ce142d9eSAlan Cox 	p_start = NULL;
2684dd05fa19SAlan Cox 	threshold = MAX_INIT_PT;
26851f78f902SAlan Cox 
2686b382c10aSKonstantin Belousov 	p = vm_page_find_least(object, pindex);
26871f78f902SAlan Cox 	/*
26881f78f902SAlan Cox 	 * Assert: the variable p is either (1) the page with the
26891f78f902SAlan Cox 	 * least pindex greater than or equal to the parameter pindex
26901f78f902SAlan Cox 	 * or (2) NULL.
26911f78f902SAlan Cox 	 */
26921f78f902SAlan Cox 	for (;
26931f78f902SAlan Cox 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
26941f78f902SAlan Cox 	     p = TAILQ_NEXT(p, listq)) {
26951f78f902SAlan Cox 		/*
26961f78f902SAlan Cox 		 * don't allow an madvise to blow away our really
26971f78f902SAlan Cox 		 * free pages allocating pv entries.
26981f78f902SAlan Cox 		 */
2699dd05fa19SAlan Cox 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
2700e2068d0bSJeff Roberson 		    vm_page_count_severe()) ||
2701dd05fa19SAlan Cox 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
2702dd05fa19SAlan Cox 		    tmpidx >= threshold)) {
2703379fb642SAlan Cox 			psize = tmpidx;
27041f78f902SAlan Cox 			break;
27051f78f902SAlan Cox 		}
27060012f373SJeff Roberson 		if (vm_page_all_valid(p)) {
2707ce142d9eSAlan Cox 			if (p_start == NULL) {
2708ce142d9eSAlan Cox 				start = addr + ptoa(tmpidx);
2709ce142d9eSAlan Cox 				p_start = p;
2710ce142d9eSAlan Cox 			}
2711dd05fa19SAlan Cox 			/* Jump ahead if a superpage mapping is possible. */
2712dd05fa19SAlan Cox 			if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
2713dd05fa19SAlan Cox 			    (pagesizes[p->psind] - 1)) == 0) {
2714dd05fa19SAlan Cox 				mask = atop(pagesizes[p->psind]) - 1;
2715dd05fa19SAlan Cox 				if (tmpidx + mask < psize &&
271688302601SAlan Cox 				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
2717dd05fa19SAlan Cox 					p += mask;
2718dd05fa19SAlan Cox 					threshold += mask;
2719dd05fa19SAlan Cox 				}
2720dd05fa19SAlan Cox 			}
27217bfda801SAlan Cox 		} else if (p_start != NULL) {
2722cf4682aeSAlan Cox 			pmap_enter_object(map->pmap, start, addr +
2723cf4682aeSAlan Cox 			    ptoa(tmpidx), p_start, prot);
2724cf4682aeSAlan Cox 			p_start = NULL;
2725cf4682aeSAlan Cox 		}
2726cf4682aeSAlan Cox 	}
2727c46b90e9SAlan Cox 	if (p_start != NULL)
2728379fb642SAlan Cox 		pmap_enter_object(map->pmap, start, addr + ptoa(psize),
2729379fb642SAlan Cox 		    p_start, prot);
27309af6d512SAttilio Rao 	VM_OBJECT_RUNLOCK(object);
27310551c08dSAlan Cox }
27320551c08dSAlan Cox 
27330551c08dSAlan Cox /*
2734df8bae1dSRodney W. Grimes  *	vm_map_protect:
2735df8bae1dSRodney W. Grimes  *
2736df8bae1dSRodney W. Grimes  *	Sets the protection of the specified address
2737df8bae1dSRodney W. Grimes  *	region in the target map.  If "set_max" is
2738df8bae1dSRodney W. Grimes  *	specified, the maximum protection is to be set;
2739df8bae1dSRodney W. Grimes  *	otherwise, only the current protection is affected.
2740df8bae1dSRodney W. Grimes  */
2741df8bae1dSRodney W. Grimes int
2742b9dcd593SBruce Evans vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
2743b9dcd593SBruce Evans 	       vm_prot_t new_prot, boolean_t set_max)
2744df8bae1dSRodney W. Grimes {
27452767c9f3SDoug Moore 	vm_map_entry_t entry, first_entry, in_tran, prev_entry;
27463364c323SKonstantin Belousov 	vm_object_t obj;
2747ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
2748210a6886SKonstantin Belousov 	vm_prot_t old_prot;
2749a72dce34SDoug Moore 	int rv;
2750df8bae1dSRodney W. Grimes 
275179e9451fSKonstantin Belousov 	if (start == end)
275279e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
275379e9451fSKonstantin Belousov 
275419f5d9f2SKonstantin Belousov again:
275519f5d9f2SKonstantin Belousov 	in_tran = NULL;
2756df8bae1dSRodney W. Grimes 	vm_map_lock(map);
2757df8bae1dSRodney W. Grimes 
2758*2e1c94aaSKonstantin Belousov 	if ((map->flags & MAP_WXORX) != 0 && (new_prot &
2759*2e1c94aaSKonstantin Belousov 	    (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE |
2760*2e1c94aaSKonstantin Belousov 	    VM_PROT_EXECUTE)) {
2761*2e1c94aaSKonstantin Belousov 		vm_map_unlock(map);
2762*2e1c94aaSKonstantin Belousov 		return (KERN_PROTECTION_FAILURE);
2763*2e1c94aaSKonstantin Belousov 	}
2764*2e1c94aaSKonstantin Belousov 
2765e1cb9d37SMark Johnston 	/*
2766e1cb9d37SMark Johnston 	 * Ensure that we are not concurrently wiring pages.  vm_map_wire() may
2767e1cb9d37SMark Johnston 	 * need to fault pages into the map and will drop the map lock while
2768e1cb9d37SMark Johnston 	 * doing so, and the VM object may end up in an inconsistent state if we
2769e1cb9d37SMark Johnston 	 * update the protection on the map entry in between faults.
2770e1cb9d37SMark Johnston 	 */
2771e1cb9d37SMark Johnston 	vm_map_wait_busy(map);
2772e1cb9d37SMark Johnston 
2773df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
2774df8bae1dSRodney W. Grimes 
27752767c9f3SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry))
27762767c9f3SDoug Moore 		first_entry = vm_map_entry_succ(first_entry);
2777df8bae1dSRodney W. Grimes 
2778df8bae1dSRodney W. Grimes 	/*
27790d94caffSDavid Greenman 	 * Make a first pass to check for protection violations.
2780df8bae1dSRodney W. Grimes 	 */
27812767c9f3SDoug Moore 	for (entry = first_entry; entry->start < end;
27822767c9f3SDoug Moore 	    entry = vm_map_entry_succ(entry)) {
27832767c9f3SDoug Moore 		if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
27848a89ca94SKonstantin Belousov 			continue;
27852767c9f3SDoug Moore 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
2786a1f6d91cSDavid Greenman 			vm_map_unlock(map);
2787df8bae1dSRodney W. Grimes 			return (KERN_INVALID_ARGUMENT);
2788a1f6d91cSDavid Greenman 		}
27892767c9f3SDoug Moore 		if ((new_prot & entry->max_protection) != new_prot) {
2790df8bae1dSRodney W. Grimes 			vm_map_unlock(map);
2791df8bae1dSRodney W. Grimes 			return (KERN_PROTECTION_FAILURE);
2792df8bae1dSRodney W. Grimes 		}
27932767c9f3SDoug Moore 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0)
27942767c9f3SDoug Moore 			in_tran = entry;
279519f5d9f2SKonstantin Belousov 	}
279619f5d9f2SKonstantin Belousov 
279719f5d9f2SKonstantin Belousov 	/*
2798bdb90e76SDoug Moore 	 * Postpone the operation until all in-transition map entries have
2799bdb90e76SDoug Moore 	 * stabilized.  An in-transition entry might already have its pages
2800bdb90e76SDoug Moore 	 * wired and wired_count incremented, but not yet have its
2801bdb90e76SDoug Moore 	 * MAP_ENTRY_USER_WIRED flag set.  In which case, we would fail to call
2802bdb90e76SDoug Moore 	 * vm_fault_copy_entry() in the final loop below.
280319f5d9f2SKonstantin Belousov 	 */
280419f5d9f2SKonstantin Belousov 	if (in_tran != NULL) {
280519f5d9f2SKonstantin Belousov 		in_tran->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
280619f5d9f2SKonstantin Belousov 		vm_map_unlock_and_wait(map, 0);
280719f5d9f2SKonstantin Belousov 		goto again;
2808df8bae1dSRodney W. Grimes 	}
2809df8bae1dSRodney W. Grimes 
28103364c323SKonstantin Belousov 	/*
2811a72dce34SDoug Moore 	 * Before changing the protections, try to reserve swap space for any
2812a72dce34SDoug Moore 	 * private (i.e., copy-on-write) mappings that are transitioning from
2813a72dce34SDoug Moore 	 * read-only to read/write access.  If a reservation fails, break out
2814a72dce34SDoug Moore 	 * of this loop early and let the next loop simplify the entries, since
2815a72dce34SDoug Moore 	 * some may now be mergeable.
28163364c323SKonstantin Belousov 	 */
2817e2e80fb3SKonstantin Belousov 	rv = vm_map_clip_start(map, first_entry, start);
2818e2e80fb3SKonstantin Belousov 	if (rv != KERN_SUCCESS) {
2819e2e80fb3SKonstantin Belousov 		vm_map_unlock(map);
2820e2e80fb3SKonstantin Belousov 		return (rv);
2821e2e80fb3SKonstantin Belousov 	}
28222767c9f3SDoug Moore 	for (entry = first_entry; entry->start < end;
28232767c9f3SDoug Moore 	    entry = vm_map_entry_succ(entry)) {
2824e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_end(map, entry, end);
2825e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS) {
2826e2e80fb3SKonstantin Belousov 			vm_map_unlock(map);
2827e2e80fb3SKonstantin Belousov 			return (rv);
2828e2e80fb3SKonstantin Belousov 		}
28293364c323SKonstantin Belousov 
28303364c323SKonstantin Belousov 		if (set_max ||
28312767c9f3SDoug Moore 		    ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 ||
28322767c9f3SDoug Moore 		    ENTRY_CHARGED(entry) ||
28332767c9f3SDoug Moore 		    (entry->eflags & MAP_ENTRY_GUARD) != 0) {
28343364c323SKonstantin Belousov 			continue;
28353364c323SKonstantin Belousov 		}
28363364c323SKonstantin Belousov 
2837ef694c1aSEdward Tomasz Napierala 		cred = curthread->td_ucred;
28382767c9f3SDoug Moore 		obj = entry->object.vm_object;
28393364c323SKonstantin Belousov 
28402767c9f3SDoug Moore 		if (obj == NULL ||
28412767c9f3SDoug Moore 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0) {
28422767c9f3SDoug Moore 			if (!swap_reserve(entry->end - entry->start)) {
2843a72dce34SDoug Moore 				rv = KERN_RESOURCE_SHORTAGE;
28442767c9f3SDoug Moore 				end = entry->end;
2845a72dce34SDoug Moore 				break;
28463364c323SKonstantin Belousov 			}
2847ef694c1aSEdward Tomasz Napierala 			crhold(cred);
28482767c9f3SDoug Moore 			entry->cred = cred;
28493364c323SKonstantin Belousov 			continue;
28503364c323SKonstantin Belousov 		}
28513364c323SKonstantin Belousov 
2852886b9021SJeff Roberson 		if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP)
2853886b9021SJeff Roberson 			continue;
285489f6b863SAttilio Rao 		VM_OBJECT_WLOCK(obj);
28553364c323SKonstantin Belousov 		if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
285689f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(obj);
28573364c323SKonstantin Belousov 			continue;
28583364c323SKonstantin Belousov 		}
28593364c323SKonstantin Belousov 
28603364c323SKonstantin Belousov 		/*
28613364c323SKonstantin Belousov 		 * Charge for the whole object allocation now, since
28623364c323SKonstantin Belousov 		 * we cannot distinguish between non-charged and
28633364c323SKonstantin Belousov 		 * charged clipped mapping of the same object later.
28643364c323SKonstantin Belousov 		 */
28653364c323SKonstantin Belousov 		KASSERT(obj->charge == 0,
28663d95614fSKonstantin Belousov 		    ("vm_map_protect: object %p overcharged (entry %p)",
28672767c9f3SDoug Moore 		    obj, entry));
28683364c323SKonstantin Belousov 		if (!swap_reserve(ptoa(obj->size))) {
286989f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(obj);
2870a72dce34SDoug Moore 			rv = KERN_RESOURCE_SHORTAGE;
28712767c9f3SDoug Moore 			end = entry->end;
2872a72dce34SDoug Moore 			break;
28733364c323SKonstantin Belousov 		}
28743364c323SKonstantin Belousov 
2875ef694c1aSEdward Tomasz Napierala 		crhold(cred);
2876ef694c1aSEdward Tomasz Napierala 		obj->cred = cred;
28773364c323SKonstantin Belousov 		obj->charge = ptoa(obj->size);
287889f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(obj);
28793364c323SKonstantin Belousov 	}
28803364c323SKonstantin Belousov 
2881df8bae1dSRodney W. Grimes 	/*
2882a72dce34SDoug Moore 	 * If enough swap space was available, go back and fix up protections.
2883a72dce34SDoug Moore 	 * Otherwise, just simplify entries, since some may have been modified.
2884a72dce34SDoug Moore 	 * [Note that clipping is not necessary the second time.]
2885df8bae1dSRodney W. Grimes 	 */
28862767c9f3SDoug Moore 	for (prev_entry = vm_map_entry_pred(first_entry), entry = first_entry;
28872767c9f3SDoug Moore 	    entry->start < end;
28882767c9f3SDoug Moore 	    vm_map_try_merge_entries(map, prev_entry, entry),
28892767c9f3SDoug Moore 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
2890a72dce34SDoug Moore 		if (rv != KERN_SUCCESS ||
28912767c9f3SDoug Moore 		    (entry->eflags & MAP_ENTRY_GUARD) != 0)
289219bd0d9cSKonstantin Belousov 			continue;
289319bd0d9cSKonstantin Belousov 
28942767c9f3SDoug Moore 		old_prot = entry->protection;
2895210a6886SKonstantin Belousov 
2896df8bae1dSRodney W. Grimes 		if (set_max)
28972767c9f3SDoug Moore 			entry->protection =
28982767c9f3SDoug Moore 			    (entry->max_protection = new_prot) &
2899df8bae1dSRodney W. Grimes 			    old_prot;
2900df8bae1dSRodney W. Grimes 		else
29012767c9f3SDoug Moore 			entry->protection = new_prot;
2902df8bae1dSRodney W. Grimes 
2903dd006a1bSAlan Cox 		/*
2904dd006a1bSAlan Cox 		 * For user wired map entries, the normal lazy evaluation of
2905dd006a1bSAlan Cox 		 * write access upgrades through soft page faults is
2906dd006a1bSAlan Cox 		 * undesirable.  Instead, immediately copy any pages that are
2907dd006a1bSAlan Cox 		 * copy-on-write and enable write access in the physical map.
2908dd006a1bSAlan Cox 		 */
29092767c9f3SDoug Moore 		if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
29102767c9f3SDoug Moore 		    (entry->protection & VM_PROT_WRITE) != 0 &&
29115930251aSKonstantin Belousov 		    (old_prot & VM_PROT_WRITE) == 0)
29122767c9f3SDoug Moore 			vm_fault_copy_entry(map, map, entry, entry, NULL);
2913210a6886SKonstantin Belousov 
2914df8bae1dSRodney W. Grimes 		/*
29152fafce9eSAlan Cox 		 * When restricting access, update the physical map.  Worry
29162fafce9eSAlan Cox 		 * about copy-on-write here.
2917df8bae1dSRodney W. Grimes 		 */
29182767c9f3SDoug Moore 		if ((old_prot & ~entry->protection) != 0) {
2919afa07f7eSJohn Dyson #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
2920df8bae1dSRodney W. Grimes 							VM_PROT_ALL)
29212767c9f3SDoug Moore 			pmap_protect(map->pmap, entry->start,
29222767c9f3SDoug Moore 			    entry->end,
29232767c9f3SDoug Moore 			    entry->protection & MASK(entry));
2924df8bae1dSRodney W. Grimes #undef	MASK
2925df8bae1dSRodney W. Grimes 		}
2926df8bae1dSRodney W. Grimes 	}
29272767c9f3SDoug Moore 	vm_map_try_merge_entries(map, prev_entry, entry);
2928df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2929a72dce34SDoug Moore 	return (rv);
2930df8bae1dSRodney W. Grimes }
2931df8bae1dSRodney W. Grimes 
2932df8bae1dSRodney W. Grimes /*
2933867a482dSJohn Dyson  *	vm_map_madvise:
2934867a482dSJohn Dyson  *
2935867a482dSJohn Dyson  *	This routine traverses a processes map handling the madvise
2936f7fc307aSAlan Cox  *	system call.  Advisories are classified as either those effecting
2937f7fc307aSAlan Cox  *	the vm_map_entry structure, or those effecting the underlying
2938f7fc307aSAlan Cox  *	objects.
2939867a482dSJohn Dyson  */
2940b4309055SMatthew Dillon int
29411b40f8c0SMatthew Dillon vm_map_madvise(
29421b40f8c0SMatthew Dillon 	vm_map_t map,
29431b40f8c0SMatthew Dillon 	vm_offset_t start,
29441b40f8c0SMatthew Dillon 	vm_offset_t end,
29451b40f8c0SMatthew Dillon 	int behav)
2946867a482dSJohn Dyson {
29472767c9f3SDoug Moore 	vm_map_entry_t entry, prev_entry;
2948e2e80fb3SKonstantin Belousov 	int rv;
29493e7cb27cSAlan Cox 	bool modify_map;
2950867a482dSJohn Dyson 
2951b4309055SMatthew Dillon 	/*
2952b4309055SMatthew Dillon 	 * Some madvise calls directly modify the vm_map_entry, in which case
2953b4309055SMatthew Dillon 	 * we need to use an exclusive lock on the map and we need to perform
2954b4309055SMatthew Dillon 	 * various clipping operations.  Otherwise we only need a read-lock
2955b4309055SMatthew Dillon 	 * on the map.
2956b4309055SMatthew Dillon 	 */
2957b4309055SMatthew Dillon 	switch(behav) {
2958b4309055SMatthew Dillon 	case MADV_NORMAL:
2959b4309055SMatthew Dillon 	case MADV_SEQUENTIAL:
2960b4309055SMatthew Dillon 	case MADV_RANDOM:
29614f79d873SMatthew Dillon 	case MADV_NOSYNC:
29624f79d873SMatthew Dillon 	case MADV_AUTOSYNC:
29639730a5daSPaul Saab 	case MADV_NOCORE:
29649730a5daSPaul Saab 	case MADV_CORE:
296579e9451fSKonstantin Belousov 		if (start == end)
29663e7cb27cSAlan Cox 			return (0);
29673e7cb27cSAlan Cox 		modify_map = true;
2968867a482dSJohn Dyson 		vm_map_lock(map);
2969b4309055SMatthew Dillon 		break;
2970b4309055SMatthew Dillon 	case MADV_WILLNEED:
2971b4309055SMatthew Dillon 	case MADV_DONTNEED:
2972b4309055SMatthew Dillon 	case MADV_FREE:
297379e9451fSKonstantin Belousov 		if (start == end)
29743e7cb27cSAlan Cox 			return (0);
29753e7cb27cSAlan Cox 		modify_map = false;
2976f7fc307aSAlan Cox 		vm_map_lock_read(map);
2977b4309055SMatthew Dillon 		break;
2978b4309055SMatthew Dillon 	default:
29793e7cb27cSAlan Cox 		return (EINVAL);
2980b4309055SMatthew Dillon 	}
2981b4309055SMatthew Dillon 
2982b4309055SMatthew Dillon 	/*
2983b4309055SMatthew Dillon 	 * Locate starting entry and clip if necessary.
2984b4309055SMatthew Dillon 	 */
2985867a482dSJohn Dyson 	VM_MAP_RANGE_CHECK(map, start, end);
2986867a482dSJohn Dyson 
2987f7fc307aSAlan Cox 	if (modify_map) {
2988f7fc307aSAlan Cox 		/*
2989f7fc307aSAlan Cox 		 * madvise behaviors that are implemented in the vm_map_entry.
2990f7fc307aSAlan Cox 		 *
2991f7fc307aSAlan Cox 		 * We clip the vm_map_entry so that behavioral changes are
2992f7fc307aSAlan Cox 		 * limited to the specified address range.
2993f7fc307aSAlan Cox 		 */
2994e2e80fb3SKonstantin Belousov 		rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry);
2995e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS) {
2996e2e80fb3SKonstantin Belousov 			vm_map_unlock(map);
2997e2e80fb3SKonstantin Belousov 			return (vm_mmap_to_errno(rv));
2998e2e80fb3SKonstantin Belousov 		}
2999e2e80fb3SKonstantin Belousov 
3000e2e80fb3SKonstantin Belousov 		for (; entry->start < end; prev_entry = entry,
3001e2e80fb3SKonstantin Belousov 		    entry = vm_map_entry_succ(entry)) {
30022767c9f3SDoug Moore 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
3003867a482dSJohn Dyson 				continue;
3004fed9a903SJohn Dyson 
3005e2e80fb3SKonstantin Belousov 			rv = vm_map_clip_end(map, entry, end);
3006e2e80fb3SKonstantin Belousov 			if (rv != KERN_SUCCESS) {
3007e2e80fb3SKonstantin Belousov 				vm_map_unlock(map);
3008e2e80fb3SKonstantin Belousov 				return (vm_mmap_to_errno(rv));
3009e2e80fb3SKonstantin Belousov 			}
3010fed9a903SJohn Dyson 
3011f7fc307aSAlan Cox 			switch (behav) {
3012867a482dSJohn Dyson 			case MADV_NORMAL:
30132767c9f3SDoug Moore 				vm_map_entry_set_behavior(entry,
30142767c9f3SDoug Moore 				    MAP_ENTRY_BEHAV_NORMAL);
3015867a482dSJohn Dyson 				break;
3016867a482dSJohn Dyson 			case MADV_SEQUENTIAL:
30172767c9f3SDoug Moore 				vm_map_entry_set_behavior(entry,
30182767c9f3SDoug Moore 				    MAP_ENTRY_BEHAV_SEQUENTIAL);
3019867a482dSJohn Dyson 				break;
3020867a482dSJohn Dyson 			case MADV_RANDOM:
30212767c9f3SDoug Moore 				vm_map_entry_set_behavior(entry,
30222767c9f3SDoug Moore 				    MAP_ENTRY_BEHAV_RANDOM);
3023867a482dSJohn Dyson 				break;
30244f79d873SMatthew Dillon 			case MADV_NOSYNC:
30252767c9f3SDoug Moore 				entry->eflags |= MAP_ENTRY_NOSYNC;
30264f79d873SMatthew Dillon 				break;
30274f79d873SMatthew Dillon 			case MADV_AUTOSYNC:
30282767c9f3SDoug Moore 				entry->eflags &= ~MAP_ENTRY_NOSYNC;
30294f79d873SMatthew Dillon 				break;
30309730a5daSPaul Saab 			case MADV_NOCORE:
30312767c9f3SDoug Moore 				entry->eflags |= MAP_ENTRY_NOCOREDUMP;
30329730a5daSPaul Saab 				break;
30339730a5daSPaul Saab 			case MADV_CORE:
30342767c9f3SDoug Moore 				entry->eflags &= ~MAP_ENTRY_NOCOREDUMP;
30359730a5daSPaul Saab 				break;
3036867a482dSJohn Dyson 			default:
3037867a482dSJohn Dyson 				break;
3038867a482dSJohn Dyson 			}
30392767c9f3SDoug Moore 			vm_map_try_merge_entries(map, prev_entry, entry);
3040867a482dSJohn Dyson 		}
30412767c9f3SDoug Moore 		vm_map_try_merge_entries(map, prev_entry, entry);
3042867a482dSJohn Dyson 		vm_map_unlock(map);
3043b4309055SMatthew Dillon 	} else {
304492a59946SJohn Baldwin 		vm_pindex_t pstart, pend;
3045f7fc307aSAlan Cox 
3046f7fc307aSAlan Cox 		/*
3047f7fc307aSAlan Cox 		 * madvise behaviors that are implemented in the underlying
3048f7fc307aSAlan Cox 		 * vm_object.
3049f7fc307aSAlan Cox 		 *
3050f7fc307aSAlan Cox 		 * Since we don't clip the vm_map_entry, we have to clip
3051f7fc307aSAlan Cox 		 * the vm_object pindex and count.
3052f7fc307aSAlan Cox 		 */
3053c7b23459SDoug Moore 		if (!vm_map_lookup_entry(map, start, &entry))
3054c7b23459SDoug Moore 			entry = vm_map_entry_succ(entry);
30552767c9f3SDoug Moore 		for (; entry->start < end;
30562767c9f3SDoug Moore 		    entry = vm_map_entry_succ(entry)) {
305751321f7cSAlan Cox 			vm_offset_t useEnd, useStart;
30585f99b57cSMatthew Dillon 
30592767c9f3SDoug Moore 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
3060f7fc307aSAlan Cox 				continue;
3061f7fc307aSAlan Cox 
3062bf5661f4SKonstantin Belousov 			/*
3063bf5661f4SKonstantin Belousov 			 * MADV_FREE would otherwise rewind time to
3064bf5661f4SKonstantin Belousov 			 * the creation of the shadow object.  Because
3065bf5661f4SKonstantin Belousov 			 * we hold the VM map read-locked, neither the
3066bf5661f4SKonstantin Belousov 			 * entry's object nor the presence of a
3067bf5661f4SKonstantin Belousov 			 * backing object can change.
3068bf5661f4SKonstantin Belousov 			 */
3069bf5661f4SKonstantin Belousov 			if (behav == MADV_FREE &&
30702767c9f3SDoug Moore 			    entry->object.vm_object != NULL &&
30712767c9f3SDoug Moore 			    entry->object.vm_object->backing_object != NULL)
3072bf5661f4SKonstantin Belousov 				continue;
3073bf5661f4SKonstantin Belousov 
30742767c9f3SDoug Moore 			pstart = OFF_TO_IDX(entry->offset);
30752767c9f3SDoug Moore 			pend = pstart + atop(entry->end - entry->start);
30762767c9f3SDoug Moore 			useStart = entry->start;
30772767c9f3SDoug Moore 			useEnd = entry->end;
3078f7fc307aSAlan Cox 
30792767c9f3SDoug Moore 			if (entry->start < start) {
30802767c9f3SDoug Moore 				pstart += atop(start - entry->start);
30815f99b57cSMatthew Dillon 				useStart = start;
3082f7fc307aSAlan Cox 			}
30832767c9f3SDoug Moore 			if (entry->end > end) {
30842767c9f3SDoug Moore 				pend -= atop(entry->end - end);
308551321f7cSAlan Cox 				useEnd = end;
308651321f7cSAlan Cox 			}
3087f7fc307aSAlan Cox 
308892a59946SJohn Baldwin 			if (pstart >= pend)
3089f7fc307aSAlan Cox 				continue;
3090f7fc307aSAlan Cox 
309151321f7cSAlan Cox 			/*
309251321f7cSAlan Cox 			 * Perform the pmap_advise() before clearing
309351321f7cSAlan Cox 			 * PGA_REFERENCED in vm_page_advise().  Otherwise, a
309451321f7cSAlan Cox 			 * concurrent pmap operation, such as pmap_remove(),
309551321f7cSAlan Cox 			 * could clear a reference in the pmap and set
309651321f7cSAlan Cox 			 * PGA_REFERENCED on the page before the pmap_advise()
309751321f7cSAlan Cox 			 * had completed.  Consequently, the page would appear
309851321f7cSAlan Cox 			 * referenced based upon an old reference that
309951321f7cSAlan Cox 			 * occurred before this pmap_advise() ran.
310051321f7cSAlan Cox 			 */
310151321f7cSAlan Cox 			if (behav == MADV_DONTNEED || behav == MADV_FREE)
310251321f7cSAlan Cox 				pmap_advise(map->pmap, useStart, useEnd,
310351321f7cSAlan Cox 				    behav);
310451321f7cSAlan Cox 
31052767c9f3SDoug Moore 			vm_object_madvise(entry->object.vm_object, pstart,
310692a59946SJohn Baldwin 			    pend, behav);
310754432196SKonstantin Belousov 
310854432196SKonstantin Belousov 			/*
310954432196SKonstantin Belousov 			 * Pre-populate paging structures in the
311054432196SKonstantin Belousov 			 * WILLNEED case.  For wired entries, the
311154432196SKonstantin Belousov 			 * paging structures are already populated.
311254432196SKonstantin Belousov 			 */
311354432196SKonstantin Belousov 			if (behav == MADV_WILLNEED &&
31142767c9f3SDoug Moore 			    entry->wired_count == 0) {
31150551c08dSAlan Cox 				vm_map_pmap_enter(map,
31165f99b57cSMatthew Dillon 				    useStart,
31172767c9f3SDoug Moore 				    entry->protection,
31182767c9f3SDoug Moore 				    entry->object.vm_object,
311992a59946SJohn Baldwin 				    pstart,
312092a59946SJohn Baldwin 				    ptoa(pend - pstart),
3121e3026983SMatthew Dillon 				    MAP_PREFAULT_MADVISE
3122b4309055SMatthew Dillon 				);
3123f7fc307aSAlan Cox 			}
3124f7fc307aSAlan Cox 		}
3125f7fc307aSAlan Cox 		vm_map_unlock_read(map);
3126f7fc307aSAlan Cox 	}
3127b4309055SMatthew Dillon 	return (0);
3128867a482dSJohn Dyson }
3129867a482dSJohn Dyson 
3130867a482dSJohn Dyson /*
3131df8bae1dSRodney W. Grimes  *	vm_map_inherit:
3132df8bae1dSRodney W. Grimes  *
3133df8bae1dSRodney W. Grimes  *	Sets the inheritance of the specified address
3134df8bae1dSRodney W. Grimes  *	range in the target map.  Inheritance
3135df8bae1dSRodney W. Grimes  *	affects how the map will be shared with
3136e2abaaaaSAlan Cox  *	child maps at the time of vmspace_fork.
3137df8bae1dSRodney W. Grimes  */
3138df8bae1dSRodney W. Grimes int
3139b9dcd593SBruce Evans vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
3140b9dcd593SBruce Evans 	       vm_inherit_t new_inheritance)
3141df8bae1dSRodney W. Grimes {
3142e2e80fb3SKonstantin Belousov 	vm_map_entry_t entry, lentry, prev_entry, start_entry;
3143e2e80fb3SKonstantin Belousov 	int rv;
3144df8bae1dSRodney W. Grimes 
3145df8bae1dSRodney W. Grimes 	switch (new_inheritance) {
3146df8bae1dSRodney W. Grimes 	case VM_INHERIT_NONE:
3147df8bae1dSRodney W. Grimes 	case VM_INHERIT_COPY:
3148df8bae1dSRodney W. Grimes 	case VM_INHERIT_SHARE:
314978d7964bSXin LI 	case VM_INHERIT_ZERO:
3150df8bae1dSRodney W. Grimes 		break;
3151df8bae1dSRodney W. Grimes 	default:
3152df8bae1dSRodney W. Grimes 		return (KERN_INVALID_ARGUMENT);
3153df8bae1dSRodney W. Grimes 	}
315479e9451fSKonstantin Belousov 	if (start == end)
315579e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
3156df8bae1dSRodney W. Grimes 	vm_map_lock(map);
3157df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
3158e2e80fb3SKonstantin Belousov 	rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry);
3159e2e80fb3SKonstantin Belousov 	if (rv != KERN_SUCCESS)
3160e2e80fb3SKonstantin Belousov 		goto unlock;
3161e2e80fb3SKonstantin Belousov 	if (vm_map_lookup_entry(map, end - 1, &lentry)) {
3162e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_end(map, lentry, end);
3163e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3164e2e80fb3SKonstantin Belousov 			goto unlock;
3165e2e80fb3SKonstantin Belousov 	}
3166e2e80fb3SKonstantin Belousov 	if (new_inheritance == VM_INHERIT_COPY) {
3167e2e80fb3SKonstantin Belousov 		for (entry = start_entry; entry->start < end;
316883704cc2SDoug Moore 		    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
3169e2e80fb3SKonstantin Belousov 			if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
3170e2e80fb3SKonstantin Belousov 			    != 0) {
3171e2e80fb3SKonstantin Belousov 				rv = KERN_INVALID_ARGUMENT;
3172e2e80fb3SKonstantin Belousov 				goto unlock;
3173e2e80fb3SKonstantin Belousov 			}
3174e2e80fb3SKonstantin Belousov 		}
3175e2e80fb3SKonstantin Belousov 	}
3176e2e80fb3SKonstantin Belousov 	for (entry = start_entry; entry->start < end; prev_entry = entry,
3177e2e80fb3SKonstantin Belousov 	    entry = vm_map_entry_succ(entry)) {
3178e2e80fb3SKonstantin Belousov 		KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx",
3179e2e80fb3SKonstantin Belousov 		    entry, (uintmax_t)entry->end, (uintmax_t)end));
318019bd0d9cSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
318119bd0d9cSKonstantin Belousov 		    new_inheritance != VM_INHERIT_ZERO)
3182df8bae1dSRodney W. Grimes 			entry->inheritance = new_inheritance;
318383704cc2SDoug Moore 		vm_map_try_merge_entries(map, prev_entry, entry);
3184df8bae1dSRodney W. Grimes 	}
318583704cc2SDoug Moore 	vm_map_try_merge_entries(map, prev_entry, entry);
3186e2e80fb3SKonstantin Belousov unlock:
3187df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
3188e2e80fb3SKonstantin Belousov 	return (rv);
3189df8bae1dSRodney W. Grimes }
3190df8bae1dSRodney W. Grimes 
3191df8bae1dSRodney W. Grimes /*
3192312df2c1SDoug Moore  *	vm_map_entry_in_transition:
3193312df2c1SDoug Moore  *
3194312df2c1SDoug Moore  *	Release the map lock, and sleep until the entry is no longer in
3195312df2c1SDoug Moore  *	transition.  Awake and acquire the map lock.  If the map changed while
3196312df2c1SDoug Moore  *	another held the lock, lookup a possibly-changed entry at or after the
3197312df2c1SDoug Moore  *	'start' position of the old entry.
3198312df2c1SDoug Moore  */
3199312df2c1SDoug Moore static vm_map_entry_t
3200312df2c1SDoug Moore vm_map_entry_in_transition(vm_map_t map, vm_offset_t in_start,
3201312df2c1SDoug Moore     vm_offset_t *io_end, bool holes_ok, vm_map_entry_t in_entry)
3202312df2c1SDoug Moore {
3203312df2c1SDoug Moore 	vm_map_entry_t entry;
3204312df2c1SDoug Moore 	vm_offset_t start;
3205312df2c1SDoug Moore 	u_int last_timestamp;
3206312df2c1SDoug Moore 
3207312df2c1SDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
3208312df2c1SDoug Moore 	KASSERT((in_entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
3209312df2c1SDoug Moore 	    ("not in-tranition map entry %p", in_entry));
3210312df2c1SDoug Moore 	/*
3211312df2c1SDoug Moore 	 * We have not yet clipped the entry.
3212312df2c1SDoug Moore 	 */
3213312df2c1SDoug Moore 	start = MAX(in_start, in_entry->start);
3214312df2c1SDoug Moore 	in_entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
3215312df2c1SDoug Moore 	last_timestamp = map->timestamp;
3216312df2c1SDoug Moore 	if (vm_map_unlock_and_wait(map, 0)) {
3217312df2c1SDoug Moore 		/*
3218312df2c1SDoug Moore 		 * Allow interruption of user wiring/unwiring?
3219312df2c1SDoug Moore 		 */
3220312df2c1SDoug Moore 	}
3221312df2c1SDoug Moore 	vm_map_lock(map);
3222312df2c1SDoug Moore 	if (last_timestamp + 1 == map->timestamp)
3223312df2c1SDoug Moore 		return (in_entry);
3224312df2c1SDoug Moore 
3225312df2c1SDoug Moore 	/*
3226312df2c1SDoug Moore 	 * Look again for the entry because the map was modified while it was
3227312df2c1SDoug Moore 	 * unlocked.  Specifically, the entry may have been clipped, merged, or
3228312df2c1SDoug Moore 	 * deleted.
3229312df2c1SDoug Moore 	 */
3230312df2c1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &entry)) {
3231312df2c1SDoug Moore 		if (!holes_ok) {
3232312df2c1SDoug Moore 			*io_end = start;
3233312df2c1SDoug Moore 			return (NULL);
3234312df2c1SDoug Moore 		}
32357cdcf863SDoug Moore 		entry = vm_map_entry_succ(entry);
3236312df2c1SDoug Moore 	}
3237312df2c1SDoug Moore 	return (entry);
3238312df2c1SDoug Moore }
3239312df2c1SDoug Moore 
3240312df2c1SDoug Moore /*
3241acd9a301SAlan Cox  *	vm_map_unwire:
3242acd9a301SAlan Cox  *
3243e27e17b7SAlan Cox  *	Implements both kernel and user unwiring.
3244acd9a301SAlan Cox  */
3245acd9a301SAlan Cox int
3246acd9a301SAlan Cox vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
3247abd498aaSBruce M Simpson     int flags)
3248acd9a301SAlan Cox {
324983704cc2SDoug Moore 	vm_map_entry_t entry, first_entry, next_entry, prev_entry;
3250acd9a301SAlan Cox 	int rv;
325183704cc2SDoug Moore 	bool holes_ok, need_wakeup, user_unwire;
3252acd9a301SAlan Cox 
325379e9451fSKonstantin Belousov 	if (start == end)
325479e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
32559a0cdf94SDoug Moore 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
32569a0cdf94SDoug Moore 	user_unwire = (flags & VM_MAP_WIRE_USER) != 0;
3257acd9a301SAlan Cox 	vm_map_lock(map);
3258acd9a301SAlan Cox 	VM_MAP_RANGE_CHECK(map, start, end);
3259d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
32609a0cdf94SDoug Moore 		if (holes_ok)
32617cdcf863SDoug Moore 			first_entry = vm_map_entry_succ(first_entry);
3262d1d3f7e1SDoug Moore 		else {
3263acd9a301SAlan Cox 			vm_map_unlock(map);
3264acd9a301SAlan Cox 			return (KERN_INVALID_ADDRESS);
3265acd9a301SAlan Cox 		}
3266abd498aaSBruce M Simpson 	}
3267d2860f22SDoug Moore 	rv = KERN_SUCCESS;
326883704cc2SDoug Moore 	for (entry = first_entry; entry->start < end; entry = next_entry) {
3269acd9a301SAlan Cox 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
3270acd9a301SAlan Cox 			/*
3271acd9a301SAlan Cox 			 * We have not yet clipped the entry.
3272acd9a301SAlan Cox 			 */
327383704cc2SDoug Moore 			next_entry = vm_map_entry_in_transition(map, start,
327483704cc2SDoug Moore 			    &end, holes_ok, entry);
327583704cc2SDoug Moore 			if (next_entry == NULL) {
327683704cc2SDoug Moore 				if (entry == first_entry) {
3277acd9a301SAlan Cox 					vm_map_unlock(map);
3278acd9a301SAlan Cox 					return (KERN_INVALID_ADDRESS);
3279acd9a301SAlan Cox 				}
3280acd9a301SAlan Cox 				rv = KERN_INVALID_ADDRESS;
3281d2860f22SDoug Moore 				break;
3282acd9a301SAlan Cox 			}
328383704cc2SDoug Moore 			first_entry = (entry == first_entry) ?
328483704cc2SDoug Moore 			    next_entry : NULL;
3285acd9a301SAlan Cox 			continue;
3286acd9a301SAlan Cox 		}
3287e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_start(map, entry, start);
3288e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3289e2e80fb3SKonstantin Belousov 			break;
3290e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_end(map, entry, end);
3291e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3292e2e80fb3SKonstantin Belousov 			break;
3293e2e80fb3SKonstantin Belousov 
3294acd9a301SAlan Cox 		/*
3295acd9a301SAlan Cox 		 * Mark the entry in case the map lock is released.  (See
3296acd9a301SAlan Cox 		 * above.)
3297acd9a301SAlan Cox 		 */
3298ff3ae454SKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
3299ff3ae454SKonstantin Belousov 		    entry->wiring_thread == NULL,
3300ff3ae454SKonstantin Belousov 		    ("owned map entry %p", entry));
3301acd9a301SAlan Cox 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
33020acea7dfSKonstantin Belousov 		entry->wiring_thread = curthread;
330383704cc2SDoug Moore 		next_entry = vm_map_entry_succ(entry);
3304acd9a301SAlan Cox 		/*
3305acd9a301SAlan Cox 		 * Check the map for holes in the specified region.
33069a0cdf94SDoug Moore 		 * If holes_ok, skip this check.
3307acd9a301SAlan Cox 		 */
33089a0cdf94SDoug Moore 		if (!holes_ok &&
330983704cc2SDoug Moore 		    entry->end < end && next_entry->start > entry->end) {
3310acd9a301SAlan Cox 			end = entry->end;
3311acd9a301SAlan Cox 			rv = KERN_INVALID_ADDRESS;
3312d2860f22SDoug Moore 			break;
3313acd9a301SAlan Cox 		}
3314acd9a301SAlan Cox 		/*
33153ffbc0cdSAlan Cox 		 * If system unwiring, require that the entry is system wired.
3316acd9a301SAlan Cox 		 */
33170ada205eSBrian Feldman 		if (!user_unwire &&
33180ada205eSBrian Feldman 		    vm_map_entry_system_wired_count(entry) == 0) {
3319acd9a301SAlan Cox 			end = entry->end;
3320acd9a301SAlan Cox 			rv = KERN_INVALID_ARGUMENT;
3321d2860f22SDoug Moore 			break;
3322acd9a301SAlan Cox 		}
3323acd9a301SAlan Cox 	}
33249a0cdf94SDoug Moore 	need_wakeup = false;
33259a0cdf94SDoug Moore 	if (first_entry == NULL &&
33269a0cdf94SDoug Moore 	    !vm_map_lookup_entry(map, start, &first_entry)) {
33279a0cdf94SDoug Moore 		KASSERT(holes_ok, ("vm_map_unwire: lookup failed"));
332883704cc2SDoug Moore 		prev_entry = first_entry;
332983704cc2SDoug Moore 		entry = vm_map_entry_succ(first_entry);
333083704cc2SDoug Moore 	} else {
333183704cc2SDoug Moore 		prev_entry = vm_map_entry_pred(first_entry);
333283704cc2SDoug Moore 		entry = first_entry;
3333acd9a301SAlan Cox 	}
333483704cc2SDoug Moore 	for (; entry->start < end;
333583704cc2SDoug Moore 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
33360acea7dfSKonstantin Belousov 		/*
33379a0cdf94SDoug Moore 		 * If holes_ok was specified, an empty
33380acea7dfSKonstantin Belousov 		 * space in the unwired region could have been mapped
33390acea7dfSKonstantin Belousov 		 * while the map lock was dropped for draining
33400acea7dfSKonstantin Belousov 		 * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
33410acea7dfSKonstantin Belousov 		 * could be simultaneously wiring this new mapping
33420acea7dfSKonstantin Belousov 		 * entry.  Detect these cases and skip any entries
33430acea7dfSKonstantin Belousov 		 * marked as in transition by us.
33440acea7dfSKonstantin Belousov 		 */
33450acea7dfSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
33460acea7dfSKonstantin Belousov 		    entry->wiring_thread != curthread) {
33479a0cdf94SDoug Moore 			KASSERT(holes_ok,
33480acea7dfSKonstantin Belousov 			    ("vm_map_unwire: !HOLESOK and new/changed entry"));
33490acea7dfSKonstantin Belousov 			continue;
33500acea7dfSKonstantin Belousov 		}
33510acea7dfSKonstantin Belousov 
33523ffbc0cdSAlan Cox 		if (rv == KERN_SUCCESS && (!user_unwire ||
33533ffbc0cdSAlan Cox 		    (entry->eflags & MAP_ENTRY_USER_WIRED))) {
335403462509SAlan Cox 			if (entry->wired_count == 1)
335503462509SAlan Cox 				vm_map_entry_unwire(map, entry);
335603462509SAlan Cox 			else
3357b2f3846aSAlan Cox 				entry->wired_count--;
335854a3a114SMark Johnston 			if (user_unwire)
335954a3a114SMark Johnston 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
3360b2f3846aSAlan Cox 		}
33610acea7dfSKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
3362ff3ae454SKonstantin Belousov 		    ("vm_map_unwire: in-transition flag missing %p", entry));
3363ff3ae454SKonstantin Belousov 		KASSERT(entry->wiring_thread == curthread,
3364ff3ae454SKonstantin Belousov 		    ("vm_map_unwire: alien wire %p", entry));
3365acd9a301SAlan Cox 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
33660acea7dfSKonstantin Belousov 		entry->wiring_thread = NULL;
3367acd9a301SAlan Cox 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
3368acd9a301SAlan Cox 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
33699a0cdf94SDoug Moore 			need_wakeup = true;
3370acd9a301SAlan Cox 		}
337183704cc2SDoug Moore 		vm_map_try_merge_entries(map, prev_entry, entry);
3372acd9a301SAlan Cox 	}
337383704cc2SDoug Moore 	vm_map_try_merge_entries(map, prev_entry, entry);
3374acd9a301SAlan Cox 	vm_map_unlock(map);
3375acd9a301SAlan Cox 	if (need_wakeup)
3376acd9a301SAlan Cox 		vm_map_wakeup(map);
3377acd9a301SAlan Cox 	return (rv);
3378acd9a301SAlan Cox }
3379acd9a301SAlan Cox 
338054a3a114SMark Johnston static void
338154a3a114SMark Johnston vm_map_wire_user_count_sub(u_long npages)
338254a3a114SMark Johnston {
338354a3a114SMark Johnston 
338454a3a114SMark Johnston 	atomic_subtract_long(&vm_user_wire_count, npages);
338554a3a114SMark Johnston }
338654a3a114SMark Johnston 
338754a3a114SMark Johnston static bool
338854a3a114SMark Johnston vm_map_wire_user_count_add(u_long npages)
338954a3a114SMark Johnston {
339054a3a114SMark Johnston 	u_long wired;
339154a3a114SMark Johnston 
339254a3a114SMark Johnston 	wired = vm_user_wire_count;
339354a3a114SMark Johnston 	do {
339454a3a114SMark Johnston 		if (npages + wired > vm_page_max_user_wired)
339554a3a114SMark Johnston 			return (false);
339654a3a114SMark Johnston 	} while (!atomic_fcmpset_long(&vm_user_wire_count, &wired,
339754a3a114SMark Johnston 	    npages + wired));
339854a3a114SMark Johnston 
339954a3a114SMark Johnston 	return (true);
340054a3a114SMark Johnston }
340154a3a114SMark Johnston 
3402acd9a301SAlan Cox /*
340366cd575bSAlan Cox  *	vm_map_wire_entry_failure:
340466cd575bSAlan Cox  *
340566cd575bSAlan Cox  *	Handle a wiring failure on the given entry.
340666cd575bSAlan Cox  *
340766cd575bSAlan Cox  *	The map should be locked.
340866cd575bSAlan Cox  */
340966cd575bSAlan Cox static void
341066cd575bSAlan Cox vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
341166cd575bSAlan Cox     vm_offset_t failed_addr)
341266cd575bSAlan Cox {
341366cd575bSAlan Cox 
341466cd575bSAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
341566cd575bSAlan Cox 	KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
341666cd575bSAlan Cox 	    entry->wired_count == 1,
341766cd575bSAlan Cox 	    ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
341866cd575bSAlan Cox 	KASSERT(failed_addr < entry->end,
341966cd575bSAlan Cox 	    ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
342066cd575bSAlan Cox 
342166cd575bSAlan Cox 	/*
342266cd575bSAlan Cox 	 * If any pages at the start of this entry were successfully wired,
342366cd575bSAlan Cox 	 * then unwire them.
342466cd575bSAlan Cox 	 */
342566cd575bSAlan Cox 	if (failed_addr > entry->start) {
342666cd575bSAlan Cox 		pmap_unwire(map->pmap, entry->start, failed_addr);
342766cd575bSAlan Cox 		vm_object_unwire(entry->object.vm_object, entry->offset,
342866cd575bSAlan Cox 		    failed_addr - entry->start, PQ_ACTIVE);
342966cd575bSAlan Cox 	}
343066cd575bSAlan Cox 
343166cd575bSAlan Cox 	/*
343266cd575bSAlan Cox 	 * Assign an out-of-range value to represent the failure to wire this
343366cd575bSAlan Cox 	 * entry.
343466cd575bSAlan Cox 	 */
343566cd575bSAlan Cox 	entry->wired_count = -1;
343666cd575bSAlan Cox }
343766cd575bSAlan Cox 
343854a3a114SMark Johnston int
343954a3a114SMark Johnston vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
344054a3a114SMark Johnston {
344154a3a114SMark Johnston 	int rv;
344254a3a114SMark Johnston 
344354a3a114SMark Johnston 	vm_map_lock(map);
344454a3a114SMark Johnston 	rv = vm_map_wire_locked(map, start, end, flags);
344554a3a114SMark Johnston 	vm_map_unlock(map);
344654a3a114SMark Johnston 	return (rv);
344754a3a114SMark Johnston }
344854a3a114SMark Johnston 
344966cd575bSAlan Cox /*
345054a3a114SMark Johnston  *	vm_map_wire_locked:
3451e27e17b7SAlan Cox  *
345254a3a114SMark Johnston  *	Implements both kernel and user wiring.  Returns with the map locked,
345354a3a114SMark Johnston  *	the map lock may be dropped.
3454e27e17b7SAlan Cox  */
3455e27e17b7SAlan Cox int
345654a3a114SMark Johnston vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
3457e27e17b7SAlan Cox {
345883704cc2SDoug Moore 	vm_map_entry_t entry, first_entry, next_entry, prev_entry;
345966cd575bSAlan Cox 	vm_offset_t faddr, saved_end, saved_start;
3460e2e80fb3SKonstantin Belousov 	u_long incr, npages;
3461e2e80fb3SKonstantin Belousov 	u_int bidx, last_timestamp;
346212d7cc84SAlan Cox 	int rv;
346383704cc2SDoug Moore 	bool holes_ok, need_wakeup, user_wire;
3464e4cd31ddSJeff Roberson 	vm_prot_t prot;
3465e27e17b7SAlan Cox 
346654a3a114SMark Johnston 	VM_MAP_ASSERT_LOCKED(map);
346754a3a114SMark Johnston 
346879e9451fSKonstantin Belousov 	if (start == end)
346979e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
3470e4cd31ddSJeff Roberson 	prot = 0;
3471e4cd31ddSJeff Roberson 	if (flags & VM_MAP_WIRE_WRITE)
3472e4cd31ddSJeff Roberson 		prot |= VM_PROT_WRITE;
34739a0cdf94SDoug Moore 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
34749a0cdf94SDoug Moore 	user_wire = (flags & VM_MAP_WIRE_USER) != 0;
347512d7cc84SAlan Cox 	VM_MAP_RANGE_CHECK(map, start, end);
3476d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
34779a0cdf94SDoug Moore 		if (holes_ok)
34787cdcf863SDoug Moore 			first_entry = vm_map_entry_succ(first_entry);
3479d1d3f7e1SDoug Moore 		else
348012d7cc84SAlan Cox 			return (KERN_INVALID_ADDRESS);
348112d7cc84SAlan Cox 	}
348283704cc2SDoug Moore 	for (entry = first_entry; entry->start < end; entry = next_entry) {
348312d7cc84SAlan Cox 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
348412d7cc84SAlan Cox 			/*
348512d7cc84SAlan Cox 			 * We have not yet clipped the entry.
348612d7cc84SAlan Cox 			 */
348783704cc2SDoug Moore 			next_entry = vm_map_entry_in_transition(map, start,
348883704cc2SDoug Moore 			    &end, holes_ok, entry);
348983704cc2SDoug Moore 			if (next_entry == NULL) {
349083704cc2SDoug Moore 				if (entry == first_entry)
349112d7cc84SAlan Cox 					return (KERN_INVALID_ADDRESS);
349212d7cc84SAlan Cox 				rv = KERN_INVALID_ADDRESS;
349312d7cc84SAlan Cox 				goto done;
349412d7cc84SAlan Cox 			}
349583704cc2SDoug Moore 			first_entry = (entry == first_entry) ?
349683704cc2SDoug Moore 			    next_entry : NULL;
349712d7cc84SAlan Cox 			continue;
349812d7cc84SAlan Cox 		}
3499e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_start(map, entry, start);
3500e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3501e2e80fb3SKonstantin Belousov 			goto done;
3502e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_end(map, entry, end);
3503e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3504e2e80fb3SKonstantin Belousov 			goto done;
3505e2e80fb3SKonstantin Belousov 
350612d7cc84SAlan Cox 		/*
350712d7cc84SAlan Cox 		 * Mark the entry in case the map lock is released.  (See
350812d7cc84SAlan Cox 		 * above.)
350912d7cc84SAlan Cox 		 */
3510ff3ae454SKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
3511ff3ae454SKonstantin Belousov 		    entry->wiring_thread == NULL,
3512ff3ae454SKonstantin Belousov 		    ("owned map entry %p", entry));
351312d7cc84SAlan Cox 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
35140acea7dfSKonstantin Belousov 		entry->wiring_thread = curthread;
3515e4cd31ddSJeff Roberson 		if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
3516e4cd31ddSJeff Roberson 		    || (entry->protection & prot) != prot) {
3517529ab57bSKonstantin Belousov 			entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
35189a0cdf94SDoug Moore 			if (!holes_ok) {
35196d7e8091SKonstantin Belousov 				end = entry->end;
35206d7e8091SKonstantin Belousov 				rv = KERN_INVALID_ADDRESS;
35216d7e8091SKonstantin Belousov 				goto done;
35226d7e8091SKonstantin Belousov 			}
352338e220e8SDoug Moore 		} else if (entry->wired_count == 0) {
35240ada205eSBrian Feldman 			entry->wired_count++;
352554a3a114SMark Johnston 
352654a3a114SMark Johnston 			npages = atop(entry->end - entry->start);
352754a3a114SMark Johnston 			if (user_wire && !vm_map_wire_user_count_add(npages)) {
352854a3a114SMark Johnston 				vm_map_wire_entry_failure(map, entry,
352954a3a114SMark Johnston 				    entry->start);
353054a3a114SMark Johnston 				end = entry->end;
353154a3a114SMark Johnston 				rv = KERN_RESOURCE_SHORTAGE;
353254a3a114SMark Johnston 				goto done;
353354a3a114SMark Johnston 			}
353466cd575bSAlan Cox 
353512d7cc84SAlan Cox 			/*
353612d7cc84SAlan Cox 			 * Release the map lock, relying on the in-transition
3537a5db445dSMax Laier 			 * mark.  Mark the map busy for fork.
353812d7cc84SAlan Cox 			 */
353954a3a114SMark Johnston 			saved_start = entry->start;
354054a3a114SMark Johnston 			saved_end = entry->end;
3541312df2c1SDoug Moore 			last_timestamp = map->timestamp;
3542e2e80fb3SKonstantin Belousov 			bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
3543e2e80fb3SKonstantin Belousov 			    >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
3544e2e80fb3SKonstantin Belousov 			incr =  pagesizes[bidx];
3545a5db445dSMax Laier 			vm_map_busy(map);
354612d7cc84SAlan Cox 			vm_map_unlock(map);
354766cd575bSAlan Cox 
3548e2e80fb3SKonstantin Belousov 			for (faddr = saved_start; faddr < saved_end;
3549e2e80fb3SKonstantin Belousov 			    faddr += incr) {
355066cd575bSAlan Cox 				/*
355166cd575bSAlan Cox 				 * Simulate a fault to get the page and enter
355266cd575bSAlan Cox 				 * it into the physical map.
355366cd575bSAlan Cox 				 */
3554e2e80fb3SKonstantin Belousov 				rv = vm_fault(map, faddr, VM_PROT_NONE,
3555e2e80fb3SKonstantin Belousov 				    VM_FAULT_WIRE, NULL);
3556e2e80fb3SKonstantin Belousov 				if (rv != KERN_SUCCESS)
355766cd575bSAlan Cox 					break;
3558e2e80fb3SKonstantin Belousov 			}
355912d7cc84SAlan Cox 			vm_map_lock(map);
3560a5db445dSMax Laier 			vm_map_unbusy(map);
356112d7cc84SAlan Cox 			if (last_timestamp + 1 != map->timestamp) {
356212d7cc84SAlan Cox 				/*
356312d7cc84SAlan Cox 				 * Look again for the entry because the map was
356412d7cc84SAlan Cox 				 * modified while it was unlocked.  The entry
356512d7cc84SAlan Cox 				 * may have been clipped, but NOT merged or
356612d7cc84SAlan Cox 				 * deleted.
356712d7cc84SAlan Cox 				 */
35689a0cdf94SDoug Moore 				if (!vm_map_lookup_entry(map, saved_start,
356983704cc2SDoug Moore 				    &next_entry))
35709a0cdf94SDoug Moore 					KASSERT(false,
35719a0cdf94SDoug Moore 					    ("vm_map_wire: lookup failed"));
357283704cc2SDoug Moore 				first_entry = (entry == first_entry) ?
357383704cc2SDoug Moore 				    next_entry : NULL;
357483704cc2SDoug Moore 				for (entry = next_entry; entry->end < saved_end;
357583704cc2SDoug Moore 				    entry = vm_map_entry_succ(entry)) {
357666cd575bSAlan Cox 					/*
357766cd575bSAlan Cox 					 * In case of failure, handle entries
357866cd575bSAlan Cox 					 * that were not fully wired here;
357966cd575bSAlan Cox 					 * fully wired entries are handled
358066cd575bSAlan Cox 					 * later.
358166cd575bSAlan Cox 					 */
358266cd575bSAlan Cox 					if (rv != KERN_SUCCESS &&
358366cd575bSAlan Cox 					    faddr < entry->end)
358466cd575bSAlan Cox 						vm_map_wire_entry_failure(map,
358566cd575bSAlan Cox 						    entry, faddr);
358612d7cc84SAlan Cox 				}
358728c58286SAlan Cox 			}
358812d7cc84SAlan Cox 			if (rv != KERN_SUCCESS) {
358966cd575bSAlan Cox 				vm_map_wire_entry_failure(map, entry, faddr);
359054a3a114SMark Johnston 				if (user_wire)
359154a3a114SMark Johnston 					vm_map_wire_user_count_sub(npages);
359212d7cc84SAlan Cox 				end = entry->end;
359312d7cc84SAlan Cox 				goto done;
359412d7cc84SAlan Cox 			}
35950ada205eSBrian Feldman 		} else if (!user_wire ||
35960ada205eSBrian Feldman 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
35970ada205eSBrian Feldman 			entry->wired_count++;
359812d7cc84SAlan Cox 		}
359912d7cc84SAlan Cox 		/*
360012d7cc84SAlan Cox 		 * Check the map for holes in the specified region.
36019a0cdf94SDoug Moore 		 * If holes_ok was specified, skip this check.
360212d7cc84SAlan Cox 		 */
360383704cc2SDoug Moore 		next_entry = vm_map_entry_succ(entry);
36049a0cdf94SDoug Moore 		if (!holes_ok &&
360583704cc2SDoug Moore 		    entry->end < end && next_entry->start > entry->end) {
360612d7cc84SAlan Cox 			end = entry->end;
360712d7cc84SAlan Cox 			rv = KERN_INVALID_ADDRESS;
360812d7cc84SAlan Cox 			goto done;
360912d7cc84SAlan Cox 		}
361012d7cc84SAlan Cox 	}
361112d7cc84SAlan Cox 	rv = KERN_SUCCESS;
361212d7cc84SAlan Cox done:
36139a0cdf94SDoug Moore 	need_wakeup = false;
36149a0cdf94SDoug Moore 	if (first_entry == NULL &&
36159a0cdf94SDoug Moore 	    !vm_map_lookup_entry(map, start, &first_entry)) {
36169a0cdf94SDoug Moore 		KASSERT(holes_ok, ("vm_map_wire: lookup failed"));
361783704cc2SDoug Moore 		prev_entry = first_entry;
361883704cc2SDoug Moore 		entry = vm_map_entry_succ(first_entry);
361983704cc2SDoug Moore 	} else {
362083704cc2SDoug Moore 		prev_entry = vm_map_entry_pred(first_entry);
362183704cc2SDoug Moore 		entry = first_entry;
362212d7cc84SAlan Cox 	}
362383704cc2SDoug Moore 	for (; entry->start < end;
362483704cc2SDoug Moore 	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
36250acea7dfSKonstantin Belousov 		/*
36269a0cdf94SDoug Moore 		 * If holes_ok was specified, an empty
36270acea7dfSKonstantin Belousov 		 * space in the unwired region could have been mapped
36280acea7dfSKonstantin Belousov 		 * while the map lock was dropped for faulting in the
36290acea7dfSKonstantin Belousov 		 * pages or draining MAP_ENTRY_IN_TRANSITION.
36300acea7dfSKonstantin Belousov 		 * Moreover, another thread could be simultaneously
36310acea7dfSKonstantin Belousov 		 * wiring this new mapping entry.  Detect these cases
3632546bb2d7SKonstantin Belousov 		 * and skip any entries marked as in transition not by us.
3633e2e80fb3SKonstantin Belousov 		 *
3634e2e80fb3SKonstantin Belousov 		 * Another way to get an entry not marked with
3635e2e80fb3SKonstantin Belousov 		 * MAP_ENTRY_IN_TRANSITION is after failed clipping,
3636e2e80fb3SKonstantin Belousov 		 * which set rv to KERN_INVALID_ARGUMENT.
36370acea7dfSKonstantin Belousov 		 */
36380acea7dfSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
36390acea7dfSKonstantin Belousov 		    entry->wiring_thread != curthread) {
3640e2e80fb3SKonstantin Belousov 			KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT,
36410acea7dfSKonstantin Belousov 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
36420acea7dfSKonstantin Belousov 			continue;
36430acea7dfSKonstantin Belousov 		}
36440acea7dfSKonstantin Belousov 
3645b71f9b0dSDoug Moore 		if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) {
3646b71f9b0dSDoug Moore 			/* do nothing */
3647b71f9b0dSDoug Moore 		} else if (rv == KERN_SUCCESS) {
364812d7cc84SAlan Cox 			if (user_wire)
364912d7cc84SAlan Cox 				entry->eflags |= MAP_ENTRY_USER_WIRED;
365028c58286SAlan Cox 		} else if (entry->wired_count == -1) {
365128c58286SAlan Cox 			/*
365228c58286SAlan Cox 			 * Wiring failed on this entry.  Thus, unwiring is
365328c58286SAlan Cox 			 * unnecessary.
365428c58286SAlan Cox 			 */
365528c58286SAlan Cox 			entry->wired_count = 0;
365603462509SAlan Cox 		} else if (!user_wire ||
365703462509SAlan Cox 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
365866cd575bSAlan Cox 			/*
365966cd575bSAlan Cox 			 * Undo the wiring.  Wiring succeeded on this entry
366066cd575bSAlan Cox 			 * but failed on a later entry.
366166cd575bSAlan Cox 			 */
366254a3a114SMark Johnston 			if (entry->wired_count == 1) {
366303462509SAlan Cox 				vm_map_entry_unwire(map, entry);
366454a3a114SMark Johnston 				if (user_wire)
366554a3a114SMark Johnston 					vm_map_wire_user_count_sub(
366654a3a114SMark Johnston 					    atop(entry->end - entry->start));
366754a3a114SMark Johnston 			} else
366812d7cc84SAlan Cox 				entry->wired_count--;
366912d7cc84SAlan Cox 		}
36700acea7dfSKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
36710acea7dfSKonstantin Belousov 		    ("vm_map_wire: in-transition flag missing %p", entry));
36720acea7dfSKonstantin Belousov 		KASSERT(entry->wiring_thread == curthread,
36730acea7dfSKonstantin Belousov 		    ("vm_map_wire: alien wire %p", entry));
36740acea7dfSKonstantin Belousov 		entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
36750acea7dfSKonstantin Belousov 		    MAP_ENTRY_WIRE_SKIPPED);
36760acea7dfSKonstantin Belousov 		entry->wiring_thread = NULL;
367712d7cc84SAlan Cox 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
367812d7cc84SAlan Cox 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
36799a0cdf94SDoug Moore 			need_wakeup = true;
368012d7cc84SAlan Cox 		}
368183704cc2SDoug Moore 		vm_map_try_merge_entries(map, prev_entry, entry);
368212d7cc84SAlan Cox 	}
368383704cc2SDoug Moore 	vm_map_try_merge_entries(map, prev_entry, entry);
368412d7cc84SAlan Cox 	if (need_wakeup)
368512d7cc84SAlan Cox 		vm_map_wakeup(map);
368612d7cc84SAlan Cox 	return (rv);
3687e27e17b7SAlan Cox }
3688e27e17b7SAlan Cox 
3689e27e17b7SAlan Cox /*
3690950f8459SAlan Cox  * vm_map_sync
3691df8bae1dSRodney W. Grimes  *
3692df8bae1dSRodney W. Grimes  * Push any dirty cached pages in the address range to their pager.
3693df8bae1dSRodney W. Grimes  * If syncio is TRUE, dirty pages are written synchronously.
3694df8bae1dSRodney W. Grimes  * If invalidate is TRUE, any cached pages are freed as well.
3695df8bae1dSRodney W. Grimes  *
3696637315edSAlan Cox  * If the size of the region from start to end is zero, we are
3697637315edSAlan Cox  * supposed to flush all modified pages within the region containing
3698637315edSAlan Cox  * start.  Unfortunately, a region can be split or coalesced with
3699637315edSAlan Cox  * neighboring regions, making it difficult to determine what the
3700637315edSAlan Cox  * original region was.  Therefore, we approximate this requirement by
3701637315edSAlan Cox  * flushing the current region containing start.
3702637315edSAlan Cox  *
3703df8bae1dSRodney W. Grimes  * Returns an error if any part of the specified range is not mapped.
3704df8bae1dSRodney W. Grimes  */
3705df8bae1dSRodney W. Grimes int
3706950f8459SAlan Cox vm_map_sync(
37071b40f8c0SMatthew Dillon 	vm_map_t map,
37081b40f8c0SMatthew Dillon 	vm_offset_t start,
37091b40f8c0SMatthew Dillon 	vm_offset_t end,
37101b40f8c0SMatthew Dillon 	boolean_t syncio,
37111b40f8c0SMatthew Dillon 	boolean_t invalidate)
3712df8bae1dSRodney W. Grimes {
37132767c9f3SDoug Moore 	vm_map_entry_t entry, first_entry, next_entry;
3714df8bae1dSRodney W. Grimes 	vm_size_t size;
3715df8bae1dSRodney W. Grimes 	vm_object_t object;
3716a316d390SJohn Dyson 	vm_ooffset_t offset;
3717e53fa61bSKonstantin Belousov 	unsigned int last_timestamp;
3718e2e80fb3SKonstantin Belousov 	int bdry_idx;
3719126d6082SKonstantin Belousov 	boolean_t failed;
3720df8bae1dSRodney W. Grimes 
3721df8bae1dSRodney W. Grimes 	vm_map_lock_read(map);
3722df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
37232767c9f3SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
3724df8bae1dSRodney W. Grimes 		vm_map_unlock_read(map);
3725df8bae1dSRodney W. Grimes 		return (KERN_INVALID_ADDRESS);
3726d1d3f7e1SDoug Moore 	} else if (start == end) {
37272767c9f3SDoug Moore 		start = first_entry->start;
37282767c9f3SDoug Moore 		end = first_entry->end;
3729df8bae1dSRodney W. Grimes 	}
3730e2e80fb3SKonstantin Belousov 
3731df8bae1dSRodney W. Grimes 	/*
3732e2e80fb3SKonstantin Belousov 	 * Make a first pass to check for user-wired memory, holes,
3733e2e80fb3SKonstantin Belousov 	 * and partial invalidation of largepage mappings.
3734df8bae1dSRodney W. Grimes 	 */
37352767c9f3SDoug Moore 	for (entry = first_entry; entry->start < end; entry = next_entry) {
3736e2e80fb3SKonstantin Belousov 		if (invalidate) {
3737e2e80fb3SKonstantin Belousov 			if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
3738df8bae1dSRodney W. Grimes 				vm_map_unlock_read(map);
3739df8bae1dSRodney W. Grimes 				return (KERN_INVALID_ARGUMENT);
3740df8bae1dSRodney W. Grimes 			}
3741e2e80fb3SKonstantin Belousov 			bdry_idx = (entry->eflags &
3742e2e80fb3SKonstantin Belousov 			    MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
3743e2e80fb3SKonstantin Belousov 			    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
3744e2e80fb3SKonstantin Belousov 			if (bdry_idx != 0 &&
3745e2e80fb3SKonstantin Belousov 			    ((start & (pagesizes[bdry_idx] - 1)) != 0 ||
3746e2e80fb3SKonstantin Belousov 			    (end & (pagesizes[bdry_idx] - 1)) != 0)) {
3747e2e80fb3SKonstantin Belousov 				vm_map_unlock_read(map);
3748e2e80fb3SKonstantin Belousov 				return (KERN_INVALID_ARGUMENT);
3749e2e80fb3SKonstantin Belousov 			}
3750e2e80fb3SKonstantin Belousov 		}
37512767c9f3SDoug Moore 		next_entry = vm_map_entry_succ(entry);
37522767c9f3SDoug Moore 		if (end > entry->end &&
37532767c9f3SDoug Moore 		    entry->end != next_entry->start) {
3754df8bae1dSRodney W. Grimes 			vm_map_unlock_read(map);
3755df8bae1dSRodney W. Grimes 			return (KERN_INVALID_ADDRESS);
3756df8bae1dSRodney W. Grimes 		}
3757df8bae1dSRodney W. Grimes 	}
3758df8bae1dSRodney W. Grimes 
37592cf13952SAlan Cox 	if (invalidate)
3760bc105a67SAlan Cox 		pmap_remove(map->pmap, start, end);
3761126d6082SKonstantin Belousov 	failed = FALSE;
37622cf13952SAlan Cox 
3763df8bae1dSRodney W. Grimes 	/*
3764df8bae1dSRodney W. Grimes 	 * Make a second pass, cleaning/uncaching pages from the indicated
3765df8bae1dSRodney W. Grimes 	 * objects as we go.
3766df8bae1dSRodney W. Grimes 	 */
37672767c9f3SDoug Moore 	for (entry = first_entry; entry->start < end;) {
37682767c9f3SDoug Moore 		offset = entry->offset + (start - entry->start);
37692767c9f3SDoug Moore 		size = (end <= entry->end ? end : entry->end) - start;
37702767c9f3SDoug Moore 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
3771c0877f10SJohn Dyson 			vm_map_t smap;
3772df8bae1dSRodney W. Grimes 			vm_map_entry_t tentry;
3773df8bae1dSRodney W. Grimes 			vm_size_t tsize;
3774df8bae1dSRodney W. Grimes 
37752767c9f3SDoug Moore 			smap = entry->object.sub_map;
3776df8bae1dSRodney W. Grimes 			vm_map_lock_read(smap);
3777df8bae1dSRodney W. Grimes 			(void) vm_map_lookup_entry(smap, offset, &tentry);
3778df8bae1dSRodney W. Grimes 			tsize = tentry->end - offset;
3779df8bae1dSRodney W. Grimes 			if (tsize < size)
3780df8bae1dSRodney W. Grimes 				size = tsize;
3781df8bae1dSRodney W. Grimes 			object = tentry->object.vm_object;
3782df8bae1dSRodney W. Grimes 			offset = tentry->offset + (offset - tentry->start);
3783df8bae1dSRodney W. Grimes 			vm_map_unlock_read(smap);
3784df8bae1dSRodney W. Grimes 		} else {
37852767c9f3SDoug Moore 			object = entry->object.vm_object;
3786df8bae1dSRodney W. Grimes 		}
3787e53fa61bSKonstantin Belousov 		vm_object_reference(object);
3788e53fa61bSKonstantin Belousov 		last_timestamp = map->timestamp;
3789e53fa61bSKonstantin Belousov 		vm_map_unlock_read(map);
3790126d6082SKonstantin Belousov 		if (!vm_object_sync(object, offset, size, syncio, invalidate))
3791126d6082SKonstantin Belousov 			failed = TRUE;
3792df8bae1dSRodney W. Grimes 		start += size;
3793e53fa61bSKonstantin Belousov 		vm_object_deallocate(object);
3794e53fa61bSKonstantin Belousov 		vm_map_lock_read(map);
3795d1d3f7e1SDoug Moore 		if (last_timestamp == map->timestamp ||
37962767c9f3SDoug Moore 		    !vm_map_lookup_entry(map, start, &entry))
37972767c9f3SDoug Moore 			entry = vm_map_entry_succ(entry);
3798df8bae1dSRodney W. Grimes 	}
3799df8bae1dSRodney W. Grimes 
3800df8bae1dSRodney W. Grimes 	vm_map_unlock_read(map);
3801126d6082SKonstantin Belousov 	return (failed ? KERN_FAILURE : KERN_SUCCESS);
3802df8bae1dSRodney W. Grimes }
3803df8bae1dSRodney W. Grimes 
3804df8bae1dSRodney W. Grimes /*
3805df8bae1dSRodney W. Grimes  *	vm_map_entry_unwire:	[ internal use only ]
3806df8bae1dSRodney W. Grimes  *
3807df8bae1dSRodney W. Grimes  *	Make the region specified by this entry pageable.
3808df8bae1dSRodney W. Grimes  *
3809df8bae1dSRodney W. Grimes  *	The map in question should be locked.
3810df8bae1dSRodney W. Grimes  *	[This is the reason for this routine's existence.]
3811df8bae1dSRodney W. Grimes  */
38120362d7d7SJohn Dyson static void
38131b40f8c0SMatthew Dillon vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
3814df8bae1dSRodney W. Grimes {
381554a3a114SMark Johnston 	vm_size_t size;
381603462509SAlan Cox 
381703462509SAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
381803462509SAlan Cox 	KASSERT(entry->wired_count > 0,
381903462509SAlan Cox 	    ("vm_map_entry_unwire: entry %p isn't wired", entry));
382054a3a114SMark Johnston 
382154a3a114SMark Johnston 	size = entry->end - entry->start;
382254a3a114SMark Johnston 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0)
382354a3a114SMark Johnston 		vm_map_wire_user_count_sub(atop(size));
382403462509SAlan Cox 	pmap_unwire(map->pmap, entry->start, entry->end);
382554a3a114SMark Johnston 	vm_object_unwire(entry->object.vm_object, entry->offset, size,
382654a3a114SMark Johnston 	    PQ_ACTIVE);
3827df8bae1dSRodney W. Grimes 	entry->wired_count = 0;
3828df8bae1dSRodney W. Grimes }
3829df8bae1dSRodney W. Grimes 
38300b367bd8SKonstantin Belousov static void
38310b367bd8SKonstantin Belousov vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
38320b367bd8SKonstantin Belousov {
38330b367bd8SKonstantin Belousov 
38340b367bd8SKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
38350b367bd8SKonstantin Belousov 		vm_object_deallocate(entry->object.vm_object);
38360b367bd8SKonstantin Belousov 	uma_zfree(system_map ? kmapentzone : mapentzone, entry);
38370b367bd8SKonstantin Belousov }
38380b367bd8SKonstantin Belousov 
3839df8bae1dSRodney W. Grimes /*
3840df8bae1dSRodney W. Grimes  *	vm_map_entry_delete:	[ internal use only ]
3841df8bae1dSRodney W. Grimes  *
3842df8bae1dSRodney W. Grimes  *	Deallocate the given entry from the target map.
3843df8bae1dSRodney W. Grimes  */
38440362d7d7SJohn Dyson static void
38451b40f8c0SMatthew Dillon vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
3846df8bae1dSRodney W. Grimes {
384732a89c32SAlan Cox 	vm_object_t object;
384884242cf6SMark Johnston 	vm_pindex_t offidxstart, offidxend, size1;
3849d1780e8dSKonstantin Belousov 	vm_size_t size;
385032a89c32SAlan Cox 
38519f701172SKonstantin Belousov 	vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
38523364c323SKonstantin Belousov 	object = entry->object.vm_object;
385319bd0d9cSKonstantin Belousov 
385419bd0d9cSKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
385519bd0d9cSKonstantin Belousov 		MPASS(entry->cred == NULL);
385619bd0d9cSKonstantin Belousov 		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
385719bd0d9cSKonstantin Belousov 		MPASS(object == NULL);
385819bd0d9cSKonstantin Belousov 		vm_map_entry_deallocate(entry, map->system_map);
385919bd0d9cSKonstantin Belousov 		return;
386019bd0d9cSKonstantin Belousov 	}
386119bd0d9cSKonstantin Belousov 
38623364c323SKonstantin Belousov 	size = entry->end - entry->start;
38633364c323SKonstantin Belousov 	map->size -= size;
38643364c323SKonstantin Belousov 
3865ef694c1aSEdward Tomasz Napierala 	if (entry->cred != NULL) {
3866ef694c1aSEdward Tomasz Napierala 		swap_release_by_cred(size, entry->cred);
3867ef694c1aSEdward Tomasz Napierala 		crfree(entry->cred);
38683364c323SKonstantin Belousov 	}
3869df8bae1dSRodney W. Grimes 
387063967687SJeff Roberson 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || object == NULL) {
387163967687SJeff Roberson 		entry->object.vm_object = NULL;
387263967687SJeff Roberson 	} else if ((object->flags & OBJ_ANON) != 0 ||
387363967687SJeff Roberson 	    object == kernel_object) {
3874ef694c1aSEdward Tomasz Napierala 		KASSERT(entry->cred == NULL || object->cred == NULL ||
38753364c323SKonstantin Belousov 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY),
3876ef694c1aSEdward Tomasz Napierala 		    ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
387732a89c32SAlan Cox 		offidxstart = OFF_TO_IDX(entry->offset);
387884242cf6SMark Johnston 		offidxend = offidxstart + atop(size);
387989f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
388063967687SJeff Roberson 		if (object->ref_count != 1 &&
388163967687SJeff Roberson 		    ((object->flags & OBJ_ONEMAPPING) != 0 ||
38822e47807cSJeff Roberson 		    object == kernel_object)) {
388332a89c32SAlan Cox 			vm_object_collapse(object);
38846bbee8e2SAlan Cox 
38856bbee8e2SAlan Cox 			/*
38866bbee8e2SAlan Cox 			 * The option OBJPR_NOTMAPPED can be passed here
38876bbee8e2SAlan Cox 			 * because vm_map_delete() already performed
38886bbee8e2SAlan Cox 			 * pmap_remove() on the only mapping to this range
38896bbee8e2SAlan Cox 			 * of pages.
38906bbee8e2SAlan Cox 			 */
38916bbee8e2SAlan Cox 			vm_object_page_remove(object, offidxstart, offidxend,
38926bbee8e2SAlan Cox 			    OBJPR_NOTMAPPED);
389332a89c32SAlan Cox 			if (offidxend >= object->size &&
38943364c323SKonstantin Belousov 			    offidxstart < object->size) {
38953364c323SKonstantin Belousov 				size1 = object->size;
389632a89c32SAlan Cox 				object->size = offidxstart;
3897ef694c1aSEdward Tomasz Napierala 				if (object->cred != NULL) {
38983364c323SKonstantin Belousov 					size1 -= object->size;
38993364c323SKonstantin Belousov 					KASSERT(object->charge >= ptoa(size1),
39009a4ee196SKonstantin Belousov 					    ("object %p charge < 0", object));
39019a4ee196SKonstantin Belousov 					swap_release_by_cred(ptoa(size1),
39029a4ee196SKonstantin Belousov 					    object->cred);
39033364c323SKonstantin Belousov 					object->charge -= ptoa(size1);
39043364c323SKonstantin Belousov 				}
39053364c323SKonstantin Belousov 			}
390632a89c32SAlan Cox 		}
390789f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(object);
390863967687SJeff Roberson 	}
39090b367bd8SKonstantin Belousov 	if (map->system_map)
39100b367bd8SKonstantin Belousov 		vm_map_entry_deallocate(entry, TRUE);
39110b367bd8SKonstantin Belousov 	else {
39127cdcf863SDoug Moore 		entry->defer_next = curthread->td_map_def_user;
39130b367bd8SKonstantin Belousov 		curthread->td_map_def_user = entry;
39140b367bd8SKonstantin Belousov 	}
3915df8bae1dSRodney W. Grimes }
3916df8bae1dSRodney W. Grimes 
3917df8bae1dSRodney W. Grimes /*
3918df8bae1dSRodney W. Grimes  *	vm_map_delete:	[ internal use only ]
3919df8bae1dSRodney W. Grimes  *
3920df8bae1dSRodney W. Grimes  *	Deallocates the given address range from the target
3921df8bae1dSRodney W. Grimes  *	map.
3922df8bae1dSRodney W. Grimes  */
3923df8bae1dSRodney W. Grimes int
3924655c3490SKonstantin Belousov vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
3925df8bae1dSRodney W. Grimes {
3926e2e80fb3SKonstantin Belousov 	vm_map_entry_t entry, next_entry, scratch_entry;
3927e2e80fb3SKonstantin Belousov 	int rv;
3928df8bae1dSRodney W. Grimes 
39293a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
39308a64110eSConrad Meyer 
393179e9451fSKonstantin Belousov 	if (start == end)
393279e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
39333a0916b8SKonstantin Belousov 
3934df8bae1dSRodney W. Grimes 	/*
3935c7b23459SDoug Moore 	 * Find the start of the region, and clip it.
3936c7b23459SDoug Moore 	 * Step through all entries in this region.
3937df8bae1dSRodney W. Grimes 	 */
3938e2e80fb3SKonstantin Belousov 	rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry);
3939e2e80fb3SKonstantin Belousov 	if (rv != KERN_SUCCESS)
3940e2e80fb3SKonstantin Belousov 		return (rv);
3941e2e80fb3SKonstantin Belousov 	for (; entry->start < end; entry = next_entry) {
394273b2baceSAlan Cox 		/*
394373b2baceSAlan Cox 		 * Wait for wiring or unwiring of an entry to complete.
39447c938963SBrian Feldman 		 * Also wait for any system wirings to disappear on
39457c938963SBrian Feldman 		 * user maps.
394673b2baceSAlan Cox 		 */
39477c938963SBrian Feldman 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
39487c938963SBrian Feldman 		    (vm_map_pmap(map) != kernel_pmap &&
39497c938963SBrian Feldman 		    vm_map_entry_system_wired_count(entry) != 0)) {
395073b2baceSAlan Cox 			unsigned int last_timestamp;
395173b2baceSAlan Cox 			vm_offset_t saved_start;
395273b2baceSAlan Cox 
395373b2baceSAlan Cox 			saved_start = entry->start;
395473b2baceSAlan Cox 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
395573b2baceSAlan Cox 			last_timestamp = map->timestamp;
39568ce2d00aSPawel Jakub Dawidek 			(void) vm_map_unlock_and_wait(map, 0);
395773b2baceSAlan Cox 			vm_map_lock(map);
3958d1d3f7e1SDoug Moore 			if (last_timestamp + 1 != map->timestamp) {
395973b2baceSAlan Cox 				/*
396073b2baceSAlan Cox 				 * Look again for the entry because the map was
3961d1d3f7e1SDoug Moore 				 * modified while it was unlocked.
3962d1d3f7e1SDoug Moore 				 * Specifically, the entry may have been
3963d1d3f7e1SDoug Moore 				 * clipped, merged, or deleted.
396473b2baceSAlan Cox 				 */
3965e2e80fb3SKonstantin Belousov 				rv = vm_map_lookup_clip_start(map, saved_start,
3966e2e80fb3SKonstantin Belousov 				    &next_entry, &scratch_entry);
3967e2e80fb3SKonstantin Belousov 				if (rv != KERN_SUCCESS)
3968e2e80fb3SKonstantin Belousov 					break;
3969c7b23459SDoug Moore 			} else
3970c7b23459SDoug Moore 				next_entry = entry;
397173b2baceSAlan Cox 			continue;
397273b2baceSAlan Cox 		}
3973e2e80fb3SKonstantin Belousov 
3974e2e80fb3SKonstantin Belousov 		/* XXXKIB or delete to the upper superpage boundary ? */
3975e2e80fb3SKonstantin Belousov 		rv = vm_map_clip_end(map, entry, end);
3976e2e80fb3SKonstantin Belousov 		if (rv != KERN_SUCCESS)
3977e2e80fb3SKonstantin Belousov 			break;
3978c7b23459SDoug Moore 		next_entry = vm_map_entry_succ(entry);
3979df8bae1dSRodney W. Grimes 
3980df8bae1dSRodney W. Grimes 		/*
39810d94caffSDavid Greenman 		 * Unwire before removing addresses from the pmap; otherwise,
39820d94caffSDavid Greenman 		 * unwiring will put the entries back in the pmap.
3983df8bae1dSRodney W. Grimes 		 */
3984be7be412SKonstantin Belousov 		if (entry->wired_count != 0)
3985df8bae1dSRodney W. Grimes 			vm_map_entry_unwire(map, entry);
3986df8bae1dSRodney W. Grimes 
398732f0fefcSKonstantin Belousov 		/*
398832f0fefcSKonstantin Belousov 		 * Remove mappings for the pages, but only if the
398932f0fefcSKonstantin Belousov 		 * mappings could exist.  For instance, it does not
399032f0fefcSKonstantin Belousov 		 * make sense to call pmap_remove() for guard entries.
399132f0fefcSKonstantin Belousov 		 */
399232f0fefcSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 ||
399332f0fefcSKonstantin Belousov 		    entry->object.vm_object != NULL)
399432a89c32SAlan Cox 			pmap_remove(map->pmap, entry->start, entry->end);
3995df8bae1dSRodney W. Grimes 
3996fa50a355SKonstantin Belousov 		if (entry->end == map->anon_loc)
3997fa50a355SKonstantin Belousov 			map->anon_loc = entry->start;
3998fa50a355SKonstantin Belousov 
3999df8bae1dSRodney W. Grimes 		/*
4000e608cc3cSKonstantin Belousov 		 * Delete the entry only after removing all pmap
4001e608cc3cSKonstantin Belousov 		 * entries pointing to its pages.  (Otherwise, its
4002e608cc3cSKonstantin Belousov 		 * page frames may be reallocated, and any modify bits
4003e608cc3cSKonstantin Belousov 		 * will be set in the wrong object!)
4004df8bae1dSRodney W. Grimes 		 */
4005df8bae1dSRodney W. Grimes 		vm_map_entry_delete(map, entry);
4006df8bae1dSRodney W. Grimes 	}
4007e2e80fb3SKonstantin Belousov 	return (rv);
4008df8bae1dSRodney W. Grimes }
4009df8bae1dSRodney W. Grimes 
4010df8bae1dSRodney W. Grimes /*
4011df8bae1dSRodney W. Grimes  *	vm_map_remove:
4012df8bae1dSRodney W. Grimes  *
4013df8bae1dSRodney W. Grimes  *	Remove the given address range from the target map.
4014df8bae1dSRodney W. Grimes  *	This is the exported form of vm_map_delete.
4015df8bae1dSRodney W. Grimes  */
4016df8bae1dSRodney W. Grimes int
40171b40f8c0SMatthew Dillon vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
4018df8bae1dSRodney W. Grimes {
40196eaee3feSAlan Cox 	int result;
4020df8bae1dSRodney W. Grimes 
4021df8bae1dSRodney W. Grimes 	vm_map_lock(map);
4022df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
4023655c3490SKonstantin Belousov 	result = vm_map_delete(map, start, end);
4024df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
4025df8bae1dSRodney W. Grimes 	return (result);
4026df8bae1dSRodney W. Grimes }
4027df8bae1dSRodney W. Grimes 
4028df8bae1dSRodney W. Grimes /*
4029df8bae1dSRodney W. Grimes  *	vm_map_check_protection:
4030df8bae1dSRodney W. Grimes  *
40312d5c7e45SMatthew Dillon  *	Assert that the target map allows the specified privilege on the
40322d5c7e45SMatthew Dillon  *	entire address region given.  The entire region must be allocated.
40332d5c7e45SMatthew Dillon  *
40342d5c7e45SMatthew Dillon  *	WARNING!  This code does not and should not check whether the
40352d5c7e45SMatthew Dillon  *	contents of the region is accessible.  For example a smaller file
40362d5c7e45SMatthew Dillon  *	might be mapped into a larger address space.
40372d5c7e45SMatthew Dillon  *
40382d5c7e45SMatthew Dillon  *	NOTE!  This code is also called by munmap().
4039d8834602SAlan Cox  *
4040d8834602SAlan Cox  *	The map must be locked.  A read lock is sufficient.
4041df8bae1dSRodney W. Grimes  */
40420d94caffSDavid Greenman boolean_t
4043b9dcd593SBruce Evans vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
4044b9dcd593SBruce Evans 			vm_prot_t protection)
4045df8bae1dSRodney W. Grimes {
4046c0877f10SJohn Dyson 	vm_map_entry_t entry;
4047d1d3f7e1SDoug Moore 	vm_map_entry_t tmp_entry;
4048df8bae1dSRodney W. Grimes 
4049d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &tmp_entry))
4050df8bae1dSRodney W. Grimes 		return (FALSE);
4051d1d3f7e1SDoug Moore 	entry = tmp_entry;
4052df8bae1dSRodney W. Grimes 
4053df8bae1dSRodney W. Grimes 	while (start < end) {
4054df8bae1dSRodney W. Grimes 		/*
4055df8bae1dSRodney W. Grimes 		 * No holes allowed!
4056df8bae1dSRodney W. Grimes 		 */
4057d8834602SAlan Cox 		if (start < entry->start)
4058df8bae1dSRodney W. Grimes 			return (FALSE);
4059df8bae1dSRodney W. Grimes 		/*
4060df8bae1dSRodney W. Grimes 		 * Check protection associated with entry.
4061df8bae1dSRodney W. Grimes 		 */
4062d8834602SAlan Cox 		if ((entry->protection & protection) != protection)
4063df8bae1dSRodney W. Grimes 			return (FALSE);
4064df8bae1dSRodney W. Grimes 		/* go to next entry */
4065df8bae1dSRodney W. Grimes 		start = entry->end;
40667cdcf863SDoug Moore 		entry = vm_map_entry_succ(entry);
4067df8bae1dSRodney W. Grimes 	}
4068df8bae1dSRodney W. Grimes 	return (TRUE);
4069df8bae1dSRodney W. Grimes }
4070df8bae1dSRodney W. Grimes 
40714d987866SJeff Roberson /*
40724d987866SJeff Roberson  *
4073886b9021SJeff Roberson  *	vm_map_copy_swap_object:
40744d987866SJeff Roberson  *
4075886b9021SJeff Roberson  *	Copies a swap-backed object from an existing map entry to a
40764d987866SJeff Roberson  *	new one.  Carries forward the swap charge.  May change the
40774d987866SJeff Roberson  *	src object on return.
40784d987866SJeff Roberson  */
40794d987866SJeff Roberson static void
4080886b9021SJeff Roberson vm_map_copy_swap_object(vm_map_entry_t src_entry, vm_map_entry_t dst_entry,
40814d987866SJeff Roberson     vm_offset_t size, vm_ooffset_t *fork_charge)
40824d987866SJeff Roberson {
40834d987866SJeff Roberson 	vm_object_t src_object;
40844d987866SJeff Roberson 	struct ucred *cred;
40854d987866SJeff Roberson 	int charged;
40864d987866SJeff Roberson 
40874d987866SJeff Roberson 	src_object = src_entry->object.vm_object;
40884d987866SJeff Roberson 	charged = ENTRY_CHARGED(src_entry);
4089d966c761SJeff Roberson 	if ((src_object->flags & OBJ_ANON) != 0) {
4090d966c761SJeff Roberson 		VM_OBJECT_WLOCK(src_object);
40914d987866SJeff Roberson 		vm_object_collapse(src_object);
40924d987866SJeff Roberson 		if ((src_object->flags & OBJ_ONEMAPPING) != 0) {
40934d987866SJeff Roberson 			vm_object_split(src_entry);
40944d987866SJeff Roberson 			src_object = src_entry->object.vm_object;
40954d987866SJeff Roberson 		}
40964d987866SJeff Roberson 		vm_object_reference_locked(src_object);
40974d987866SJeff Roberson 		vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
4098d966c761SJeff Roberson 		VM_OBJECT_WUNLOCK(src_object);
4099d966c761SJeff Roberson 	} else
4100d966c761SJeff Roberson 		vm_object_reference(src_object);
41014d987866SJeff Roberson 	if (src_entry->cred != NULL &&
41024d987866SJeff Roberson 	    !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
41034d987866SJeff Roberson 		KASSERT(src_object->cred == NULL,
41044d987866SJeff Roberson 		    ("OVERCOMMIT: vm_map_copy_anon_entry: cred %p",
41054d987866SJeff Roberson 		     src_object));
41064d987866SJeff Roberson 		src_object->cred = src_entry->cred;
41074d987866SJeff Roberson 		src_object->charge = size;
41084d987866SJeff Roberson 	}
41094d987866SJeff Roberson 	dst_entry->object.vm_object = src_object;
41104d987866SJeff Roberson 	if (charged) {
41114d987866SJeff Roberson 		cred = curthread->td_ucred;
41124d987866SJeff Roberson 		crhold(cred);
41134d987866SJeff Roberson 		dst_entry->cred = cred;
41144d987866SJeff Roberson 		*fork_charge += size;
41154d987866SJeff Roberson 		if (!(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
41164d987866SJeff Roberson 			crhold(cred);
41174d987866SJeff Roberson 			src_entry->cred = cred;
41184d987866SJeff Roberson 			*fork_charge += size;
41194d987866SJeff Roberson 		}
41204d987866SJeff Roberson 	}
41214d987866SJeff Roberson }
41224d987866SJeff Roberson 
412386524867SJohn Dyson /*
4124df8bae1dSRodney W. Grimes  *	vm_map_copy_entry:
4125df8bae1dSRodney W. Grimes  *
4126df8bae1dSRodney W. Grimes  *	Copies the contents of the source entry to the destination
4127df8bae1dSRodney W. Grimes  *	entry.  The entries *must* be aligned properly.
4128df8bae1dSRodney W. Grimes  */
4129f708ef1bSPoul-Henning Kamp static void
41301b40f8c0SMatthew Dillon vm_map_copy_entry(
41311b40f8c0SMatthew Dillon 	vm_map_t src_map,
41321b40f8c0SMatthew Dillon 	vm_map_t dst_map,
41331b40f8c0SMatthew Dillon 	vm_map_entry_t src_entry,
41343364c323SKonstantin Belousov 	vm_map_entry_t dst_entry,
41353364c323SKonstantin Belousov 	vm_ooffset_t *fork_charge)
4136df8bae1dSRodney W. Grimes {
4137c0877f10SJohn Dyson 	vm_object_t src_object;
413884110e7eSKonstantin Belousov 	vm_map_entry_t fake_entry;
41393364c323SKonstantin Belousov 	vm_offset_t size;
4140c0877f10SJohn Dyson 
41413a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(dst_map);
41423a0916b8SKonstantin Belousov 
41439fdfe602SMatthew Dillon 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
4144df8bae1dSRodney W. Grimes 		return;
4145df8bae1dSRodney W. Grimes 
4146afaa41f6SAlan Cox 	if (src_entry->wired_count == 0 ||
4147afaa41f6SAlan Cox 	    (src_entry->protection & VM_PROT_WRITE) == 0) {
4148df8bae1dSRodney W. Grimes 		/*
41490d94caffSDavid Greenman 		 * If the source entry is marked needs_copy, it is already
41500d94caffSDavid Greenman 		 * write-protected.
4151df8bae1dSRodney W. Grimes 		 */
4152d9a9209aSAlan Cox 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
4153d9a9209aSAlan Cox 		    (src_entry->protection & VM_PROT_WRITE) != 0) {
4154df8bae1dSRodney W. Grimes 			pmap_protect(src_map->pmap,
4155df8bae1dSRodney W. Grimes 			    src_entry->start,
4156df8bae1dSRodney W. Grimes 			    src_entry->end,
4157df8bae1dSRodney W. Grimes 			    src_entry->protection & ~VM_PROT_WRITE);
4158df8bae1dSRodney W. Grimes 		}
4159b18bfc3dSJohn Dyson 
4160df8bae1dSRodney W. Grimes 		/*
4161df8bae1dSRodney W. Grimes 		 * Make a copy of the object.
4162df8bae1dSRodney W. Grimes 		 */
41633364c323SKonstantin Belousov 		size = src_entry->end - src_entry->start;
41648aef1712SMatthew Dillon 		if ((src_object = src_entry->object.vm_object) != NULL) {
4165886b9021SJeff Roberson 			if (src_object->type == OBJT_DEFAULT ||
4166886b9021SJeff Roberson 			    src_object->type == OBJT_SWAP) {
4167886b9021SJeff Roberson 				vm_map_copy_swap_object(src_entry, dst_entry,
41684d987866SJeff Roberson 				    size, fork_charge);
41694d987866SJeff Roberson 				/* May have split/collapsed, reload obj. */
41704d987866SJeff Roberson 				src_object = src_entry->object.vm_object;
41714d987866SJeff Roberson 			} else {
41724d987866SJeff Roberson 				vm_object_reference(src_object);
4173c0877f10SJohn Dyson 				dst_entry->object.vm_object = src_object;
41743364c323SKonstantin Belousov 			}
41759a4ee196SKonstantin Belousov 			src_entry->eflags |= MAP_ENTRY_COW |
41769a4ee196SKonstantin Belousov 			    MAP_ENTRY_NEEDS_COPY;
41779a4ee196SKonstantin Belousov 			dst_entry->eflags |= MAP_ENTRY_COW |
41789a4ee196SKonstantin Belousov 			    MAP_ENTRY_NEEDS_COPY;
4179b18bfc3dSJohn Dyson 			dst_entry->offset = src_entry->offset;
4180fe7bcbafSKyle Evans 			if (src_entry->eflags & MAP_ENTRY_WRITECNT) {
418184110e7eSKonstantin Belousov 				/*
4182fe7bcbafSKyle Evans 				 * MAP_ENTRY_WRITECNT cannot
418384110e7eSKonstantin Belousov 				 * indicate write reference from
418484110e7eSKonstantin Belousov 				 * src_entry, since the entry is
418584110e7eSKonstantin Belousov 				 * marked as needs copy.  Allocate a
418684110e7eSKonstantin Belousov 				 * fake entry that is used to
4187fe7bcbafSKyle Evans 				 * decrement object->un_pager writecount
418884110e7eSKonstantin Belousov 				 * at the appropriate time.  Attach
418984110e7eSKonstantin Belousov 				 * fake_entry to the deferred list.
419084110e7eSKonstantin Belousov 				 */
419184110e7eSKonstantin Belousov 				fake_entry = vm_map_entry_create(dst_map);
4192fe7bcbafSKyle Evans 				fake_entry->eflags = MAP_ENTRY_WRITECNT;
4193fe7bcbafSKyle Evans 				src_entry->eflags &= ~MAP_ENTRY_WRITECNT;
419484110e7eSKonstantin Belousov 				vm_object_reference(src_object);
419584110e7eSKonstantin Belousov 				fake_entry->object.vm_object = src_object;
419684110e7eSKonstantin Belousov 				fake_entry->start = src_entry->start;
419784110e7eSKonstantin Belousov 				fake_entry->end = src_entry->end;
41987cdcf863SDoug Moore 				fake_entry->defer_next =
41997cdcf863SDoug Moore 				    curthread->td_map_def_user;
420084110e7eSKonstantin Belousov 				curthread->td_map_def_user = fake_entry;
420184110e7eSKonstantin Belousov 			}
42020ec97ffcSKonstantin Belousov 
42030ec97ffcSKonstantin Belousov 			pmap_copy(dst_map->pmap, src_map->pmap,
42040ec97ffcSKonstantin Belousov 			    dst_entry->start, dst_entry->end - dst_entry->start,
42050ec97ffcSKonstantin Belousov 			    src_entry->start);
4206b18bfc3dSJohn Dyson 		} else {
4207b18bfc3dSJohn Dyson 			dst_entry->object.vm_object = NULL;
4208b18bfc3dSJohn Dyson 			dst_entry->offset = 0;
4209ef694c1aSEdward Tomasz Napierala 			if (src_entry->cred != NULL) {
4210ef694c1aSEdward Tomasz Napierala 				dst_entry->cred = curthread->td_ucred;
4211ef694c1aSEdward Tomasz Napierala 				crhold(dst_entry->cred);
42123364c323SKonstantin Belousov 				*fork_charge += size;
42133364c323SKonstantin Belousov 			}
4214b18bfc3dSJohn Dyson 		}
42150d94caffSDavid Greenman 	} else {
4216df8bae1dSRodney W. Grimes 		/*
4217afaa41f6SAlan Cox 		 * We don't want to make writeable wired pages copy-on-write.
4218afaa41f6SAlan Cox 		 * Immediately copy these pages into the new map by simulating
4219afaa41f6SAlan Cox 		 * page faults.  The new pages are pageable.
4220df8bae1dSRodney W. Grimes 		 */
4221121fd461SKonstantin Belousov 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
4222121fd461SKonstantin Belousov 		    fork_charge);
4223df8bae1dSRodney W. Grimes 	}
4224df8bae1dSRodney W. Grimes }
4225df8bae1dSRodney W. Grimes 
4226df8bae1dSRodney W. Grimes /*
42272a7be1b6SBrian Feldman  * vmspace_map_entry_forked:
42282a7be1b6SBrian Feldman  * Update the newly-forked vmspace each time a map entry is inherited
42292a7be1b6SBrian Feldman  * or copied.  The values for vm_dsize and vm_tsize are approximate
42302a7be1b6SBrian Feldman  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
42312a7be1b6SBrian Feldman  */
42322a7be1b6SBrian Feldman static void
42332a7be1b6SBrian Feldman vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
42342a7be1b6SBrian Feldman     vm_map_entry_t entry)
42352a7be1b6SBrian Feldman {
42362a7be1b6SBrian Feldman 	vm_size_t entrysize;
42372a7be1b6SBrian Feldman 	vm_offset_t newend;
42382a7be1b6SBrian Feldman 
423919bd0d9cSKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
424019bd0d9cSKonstantin Belousov 		return;
42412a7be1b6SBrian Feldman 	entrysize = entry->end - entry->start;
42422a7be1b6SBrian Feldman 	vm2->vm_map.size += entrysize;
42432a7be1b6SBrian Feldman 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
42442a7be1b6SBrian Feldman 		vm2->vm_ssize += btoc(entrysize);
42452a7be1b6SBrian Feldman 	} else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
42462a7be1b6SBrian Feldman 	    entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
4247b351299cSAndrew Gallatin 		newend = MIN(entry->end,
42482a7be1b6SBrian Feldman 		    (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
42492a7be1b6SBrian Feldman 		vm2->vm_dsize += btoc(newend - entry->start);
42502a7be1b6SBrian Feldman 	} else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
42512a7be1b6SBrian Feldman 	    entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
4252b351299cSAndrew Gallatin 		newend = MIN(entry->end,
42532a7be1b6SBrian Feldman 		    (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
42542a7be1b6SBrian Feldman 		vm2->vm_tsize += btoc(newend - entry->start);
42552a7be1b6SBrian Feldman 	}
42562a7be1b6SBrian Feldman }
42572a7be1b6SBrian Feldman 
42582a7be1b6SBrian Feldman /*
4259df8bae1dSRodney W. Grimes  * vmspace_fork:
4260df8bae1dSRodney W. Grimes  * Create a new process vmspace structure and vm_map
4261df8bae1dSRodney W. Grimes  * based on those of an existing process.  The new map
4262df8bae1dSRodney W. Grimes  * is based on the old map, according to the inheritance
4263df8bae1dSRodney W. Grimes  * values on the regions in that map.
4264df8bae1dSRodney W. Grimes  *
42652a7be1b6SBrian Feldman  * XXX It might be worth coalescing the entries added to the new vmspace.
42662a7be1b6SBrian Feldman  *
4267df8bae1dSRodney W. Grimes  * The source map must not be locked.
4268df8bae1dSRodney W. Grimes  */
4269df8bae1dSRodney W. Grimes struct vmspace *
42703364c323SKonstantin Belousov vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
4271df8bae1dSRodney W. Grimes {
4272c0877f10SJohn Dyson 	struct vmspace *vm2;
427379e53838SAlan Cox 	vm_map_t new_map, old_map;
427479e53838SAlan Cox 	vm_map_entry_t new_entry, old_entry;
4275de5f6a77SJohn Dyson 	vm_object_t object;
4276e7a9df16SKonstantin Belousov 	int error, locked;
427719bd0d9cSKonstantin Belousov 	vm_inherit_t inh;
4278df8bae1dSRodney W. Grimes 
427979e53838SAlan Cox 	old_map = &vm1->vm_map;
428079e53838SAlan Cox 	/* Copy immutable fields of vm1 to vm2. */
42816e00f3a3SKonstantin Belousov 	vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
42826e00f3a3SKonstantin Belousov 	    pmap_pinit);
428389b57fcfSKonstantin Belousov 	if (vm2 == NULL)
428479e53838SAlan Cox 		return (NULL);
4285e7a9df16SKonstantin Belousov 
42862a7be1b6SBrian Feldman 	vm2->vm_taddr = vm1->vm_taddr;
42872a7be1b6SBrian Feldman 	vm2->vm_daddr = vm1->vm_daddr;
42882a7be1b6SBrian Feldman 	vm2->vm_maxsaddr = vm1->vm_maxsaddr;
428979e53838SAlan Cox 	vm_map_lock(old_map);
429079e53838SAlan Cox 	if (old_map->busy)
429179e53838SAlan Cox 		vm_map_wait_busy(old_map);
429279e53838SAlan Cox 	new_map = &vm2->vm_map;
42931fac7d7fSKonstantin Belousov 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
42941fac7d7fSKonstantin Belousov 	KASSERT(locked, ("vmspace_fork: lock failed"));
4295df8bae1dSRodney W. Grimes 
4296e7a9df16SKonstantin Belousov 	error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
4297e7a9df16SKonstantin Belousov 	if (error != 0) {
4298e7a9df16SKonstantin Belousov 		sx_xunlock(&old_map->lock);
4299e7a9df16SKonstantin Belousov 		sx_xunlock(&new_map->lock);
4300e7a9df16SKonstantin Belousov 		vm_map_process_deferred();
4301e7a9df16SKonstantin Belousov 		vmspace_free(vm2);
4302e7a9df16SKonstantin Belousov 		return (NULL);
4303e7a9df16SKonstantin Belousov 	}
4304e7a9df16SKonstantin Belousov 
4305fa50a355SKonstantin Belousov 	new_map->anon_loc = old_map->anon_loc;
43069f9cc3f9SBrooks Davis 	new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART);
4307e7a9df16SKonstantin Belousov 
43082767c9f3SDoug Moore 	VM_MAP_ENTRY_FOREACH(old_entry, old_map) {
43092767c9f3SDoug Moore 		if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
4310df8bae1dSRodney W. Grimes 			panic("vm_map_fork: encountered a submap");
4311df8bae1dSRodney W. Grimes 
431219bd0d9cSKonstantin Belousov 		inh = old_entry->inheritance;
431319bd0d9cSKonstantin Belousov 		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
431419bd0d9cSKonstantin Belousov 		    inh != VM_INHERIT_NONE)
431519bd0d9cSKonstantin Belousov 			inh = VM_INHERIT_COPY;
431619bd0d9cSKonstantin Belousov 
431719bd0d9cSKonstantin Belousov 		switch (inh) {
4318df8bae1dSRodney W. Grimes 		case VM_INHERIT_NONE:
4319df8bae1dSRodney W. Grimes 			break;
4320df8bae1dSRodney W. Grimes 
4321df8bae1dSRodney W. Grimes 		case VM_INHERIT_SHARE:
4322df8bae1dSRodney W. Grimes 			/*
43232767c9f3SDoug Moore 			 * Clone the entry, creating the shared object if
43242767c9f3SDoug Moore 			 * necessary.
4325fed9a903SJohn Dyson 			 */
4326fed9a903SJohn Dyson 			object = old_entry->object.vm_object;
4327fed9a903SJohn Dyson 			if (object == NULL) {
4328af1d6d6aSDoug Moore 				vm_map_entry_back(old_entry);
4329af1d6d6aSDoug Moore 				object = old_entry->object.vm_object;
43309a2f6362SAlan Cox 			}
43319a2f6362SAlan Cox 
43329a2f6362SAlan Cox 			/*
43339a2f6362SAlan Cox 			 * Add the reference before calling vm_object_shadow
43349a2f6362SAlan Cox 			 * to insure that a shadow object is created.
43359a2f6362SAlan Cox 			 */
43369a2f6362SAlan Cox 			vm_object_reference(object);
43379a2f6362SAlan Cox 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
43385069bf57SJohn Dyson 				vm_object_shadow(&old_entry->object.vm_object,
43395069bf57SJohn Dyson 				    &old_entry->offset,
434067388836SKonstantin Belousov 				    old_entry->end - old_entry->start,
434167388836SKonstantin Belousov 				    old_entry->cred,
4342d30344bdSIan Dowse 				    /* Transfer the second reference too. */
434367388836SKonstantin Belousov 				    true);
434467388836SKonstantin Belousov 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
434567388836SKonstantin Belousov 				old_entry->cred = NULL;
43467fd10fb3SKonstantin Belousov 
43477fd10fb3SKonstantin Belousov 				/*
434883ea714fSDoug Moore 				 * As in vm_map_merged_neighbor_dispose(),
434983ea714fSDoug Moore 				 * the vnode lock will not be acquired in
43507fd10fb3SKonstantin Belousov 				 * this call to vm_object_deallocate().
43517fd10fb3SKonstantin Belousov 				 */
4352d30344bdSIan Dowse 				vm_object_deallocate(object);
43535069bf57SJohn Dyson 				object = old_entry->object.vm_object;
435467388836SKonstantin Belousov 			} else {
435589f6b863SAttilio Rao 				VM_OBJECT_WLOCK(object);
4356069e9bc1SDoug Rabson 				vm_object_clear_flag(object, OBJ_ONEMAPPING);
4357ef694c1aSEdward Tomasz Napierala 				if (old_entry->cred != NULL) {
435867388836SKonstantin Belousov 					KASSERT(object->cred == NULL,
435967388836SKonstantin Belousov 					    ("vmspace_fork both cred"));
4360ef694c1aSEdward Tomasz Napierala 					object->cred = old_entry->cred;
436167388836SKonstantin Belousov 					object->charge = old_entry->end -
436267388836SKonstantin Belousov 					    old_entry->start;
4363ef694c1aSEdward Tomasz Napierala 					old_entry->cred = NULL;
43643364c323SKonstantin Belousov 				}
4365b9781cf6SKonstantin Belousov 
4366b9781cf6SKonstantin Belousov 				/*
4367b9781cf6SKonstantin Belousov 				 * Assert the correct state of the vnode
4368b9781cf6SKonstantin Belousov 				 * v_writecount while the object is locked, to
4369b9781cf6SKonstantin Belousov 				 * not relock it later for the assertion
4370b9781cf6SKonstantin Belousov 				 * correctness.
4371b9781cf6SKonstantin Belousov 				 */
4372fe7bcbafSKyle Evans 				if (old_entry->eflags & MAP_ENTRY_WRITECNT &&
4373b9781cf6SKonstantin Belousov 				    object->type == OBJT_VNODE) {
437467388836SKonstantin Belousov 					KASSERT(((struct vnode *)object->
437567388836SKonstantin Belousov 					    handle)->v_writecount > 0,
437667388836SKonstantin Belousov 					    ("vmspace_fork: v_writecount %p",
437767388836SKonstantin Belousov 					    object));
437867388836SKonstantin Belousov 					KASSERT(object->un_pager.vnp.
437967388836SKonstantin Belousov 					    writemappings > 0,
4380b9781cf6SKonstantin Belousov 					    ("vmspace_fork: vnp.writecount %p",
4381b9781cf6SKonstantin Belousov 					    object));
4382b9781cf6SKonstantin Belousov 				}
438389f6b863SAttilio Rao 				VM_OBJECT_WUNLOCK(object);
438467388836SKonstantin Belousov 			}
4385fed9a903SJohn Dyson 
4386fed9a903SJohn Dyson 			/*
4387ad5fca3bSAlan Cox 			 * Clone the entry, referencing the shared object.
4388df8bae1dSRodney W. Grimes 			 */
4389df8bae1dSRodney W. Grimes 			new_entry = vm_map_entry_create(new_map);
4390df8bae1dSRodney W. Grimes 			*new_entry = *old_entry;
43919f6acfd1SKonstantin Belousov 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
43929f6acfd1SKonstantin Belousov 			    MAP_ENTRY_IN_TRANSITION);
43930acea7dfSKonstantin Belousov 			new_entry->wiring_thread = NULL;
4394df8bae1dSRodney W. Grimes 			new_entry->wired_count = 0;
4395fe7bcbafSKyle Evans 			if (new_entry->eflags & MAP_ENTRY_WRITECNT) {
4396fe7bcbafSKyle Evans 				vm_pager_update_writecount(object,
439784110e7eSKonstantin Belousov 				    new_entry->start, new_entry->end);
439884110e7eSKonstantin Belousov 			}
439978022527SKonstantin Belousov 			vm_map_entry_set_vnode_text(new_entry, true);
4400df8bae1dSRodney W. Grimes 
4401df8bae1dSRodney W. Grimes 			/*
44020d94caffSDavid Greenman 			 * Insert the entry into the new map -- we know we're
44030d94caffSDavid Greenman 			 * inserting at the end of the new map.
4404df8bae1dSRodney W. Grimes 			 */
44059f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
44062a7be1b6SBrian Feldman 			vmspace_map_entry_forked(vm1, vm2, new_entry);
4407df8bae1dSRodney W. Grimes 
4408df8bae1dSRodney W. Grimes 			/*
4409df8bae1dSRodney W. Grimes 			 * Update the physical map
4410df8bae1dSRodney W. Grimes 			 */
4411df8bae1dSRodney W. Grimes 			pmap_copy(new_map->pmap, old_map->pmap,
4412df8bae1dSRodney W. Grimes 			    new_entry->start,
4413df8bae1dSRodney W. Grimes 			    (old_entry->end - old_entry->start),
4414df8bae1dSRodney W. Grimes 			    old_entry->start);
4415df8bae1dSRodney W. Grimes 			break;
4416df8bae1dSRodney W. Grimes 
4417df8bae1dSRodney W. Grimes 		case VM_INHERIT_COPY:
4418df8bae1dSRodney W. Grimes 			/*
4419df8bae1dSRodney W. Grimes 			 * Clone the entry and link into the map.
4420df8bae1dSRodney W. Grimes 			 */
4421df8bae1dSRodney W. Grimes 			new_entry = vm_map_entry_create(new_map);
4422df8bae1dSRodney W. Grimes 			*new_entry = *old_entry;
442384110e7eSKonstantin Belousov 			/*
442484110e7eSKonstantin Belousov 			 * Copied entry is COW over the old object.
442584110e7eSKonstantin Belousov 			 */
44269f6acfd1SKonstantin Belousov 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
4427fe7bcbafSKyle Evans 			    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_WRITECNT);
44280acea7dfSKonstantin Belousov 			new_entry->wiring_thread = NULL;
4429df8bae1dSRodney W. Grimes 			new_entry->wired_count = 0;
4430df8bae1dSRodney W. Grimes 			new_entry->object.vm_object = NULL;
4431ef694c1aSEdward Tomasz Napierala 			new_entry->cred = NULL;
44329f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
44332a7be1b6SBrian Feldman 			vmspace_map_entry_forked(vm1, vm2, new_entry);
4434bd7e5f99SJohn Dyson 			vm_map_copy_entry(old_map, new_map, old_entry,
44353364c323SKonstantin Belousov 			    new_entry, fork_charge);
443678022527SKonstantin Belousov 			vm_map_entry_set_vnode_text(new_entry, true);
4437df8bae1dSRodney W. Grimes 			break;
443878d7964bSXin LI 
443978d7964bSXin LI 		case VM_INHERIT_ZERO:
444078d7964bSXin LI 			/*
444178d7964bSXin LI 			 * Create a new anonymous mapping entry modelled from
444278d7964bSXin LI 			 * the old one.
444378d7964bSXin LI 			 */
444478d7964bSXin LI 			new_entry = vm_map_entry_create(new_map);
444578d7964bSXin LI 			memset(new_entry, 0, sizeof(*new_entry));
444678d7964bSXin LI 
444778d7964bSXin LI 			new_entry->start = old_entry->start;
444878d7964bSXin LI 			new_entry->end = old_entry->end;
444978d7964bSXin LI 			new_entry->eflags = old_entry->eflags &
445078d7964bSXin LI 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
4451e2e80fb3SKonstantin Belousov 			    MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC |
4452e2e80fb3SKonstantin Belousov 			    MAP_ENTRY_SPLIT_BOUNDARY_MASK);
445378d7964bSXin LI 			new_entry->protection = old_entry->protection;
445478d7964bSXin LI 			new_entry->max_protection = old_entry->max_protection;
445578d7964bSXin LI 			new_entry->inheritance = VM_INHERIT_ZERO;
445678d7964bSXin LI 
44579f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
445878d7964bSXin LI 			vmspace_map_entry_forked(vm1, vm2, new_entry);
445978d7964bSXin LI 
446078d7964bSXin LI 			new_entry->cred = curthread->td_ucred;
446178d7964bSXin LI 			crhold(new_entry->cred);
446278d7964bSXin LI 			*fork_charge += (new_entry->end - new_entry->start);
446378d7964bSXin LI 
446478d7964bSXin LI 			break;
4465df8bae1dSRodney W. Grimes 		}
4466df8bae1dSRodney W. Grimes 	}
446784110e7eSKonstantin Belousov 	/*
446884110e7eSKonstantin Belousov 	 * Use inlined vm_map_unlock() to postpone handling the deferred
446984110e7eSKonstantin Belousov 	 * map entries, which cannot be done until both old_map and
447084110e7eSKonstantin Belousov 	 * new_map locks are released.
447184110e7eSKonstantin Belousov 	 */
447284110e7eSKonstantin Belousov 	sx_xunlock(&old_map->lock);
447384110e7eSKonstantin Belousov 	sx_xunlock(&new_map->lock);
447484110e7eSKonstantin Belousov 	vm_map_process_deferred();
4475df8bae1dSRodney W. Grimes 
4476df8bae1dSRodney W. Grimes 	return (vm2);
4477df8bae1dSRodney W. Grimes }
4478df8bae1dSRodney W. Grimes 
44798056df6eSAlan Cox /*
44808056df6eSAlan Cox  * Create a process's stack for exec_new_vmspace().  This function is never
44818056df6eSAlan Cox  * asked to wire the newly created stack.
44828056df6eSAlan Cox  */
448394f7e29aSAlan Cox int
448494f7e29aSAlan Cox vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
448594f7e29aSAlan Cox     vm_prot_t prot, vm_prot_t max, int cow)
448694f7e29aSAlan Cox {
44874648ba0aSKonstantin Belousov 	vm_size_t growsize, init_ssize;
44888056df6eSAlan Cox 	rlim_t vmemlim;
44894648ba0aSKonstantin Belousov 	int rv;
44904648ba0aSKonstantin Belousov 
44918056df6eSAlan Cox 	MPASS((map->flags & MAP_WIREFUTURE) == 0);
44924648ba0aSKonstantin Belousov 	growsize = sgrowsiz;
44934648ba0aSKonstantin Belousov 	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
44944648ba0aSKonstantin Belousov 	vm_map_lock(map);
4495f6f6d240SMateusz Guzik 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
44964648ba0aSKonstantin Belousov 	/* If we would blow our VMEM resource limit, no go */
44974648ba0aSKonstantin Belousov 	if (map->size + init_ssize > vmemlim) {
44984648ba0aSKonstantin Belousov 		rv = KERN_NO_SPACE;
44994648ba0aSKonstantin Belousov 		goto out;
45004648ba0aSKonstantin Belousov 	}
4501e1f92cccSAlan Cox 	rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
45024648ba0aSKonstantin Belousov 	    max, cow);
45034648ba0aSKonstantin Belousov out:
45044648ba0aSKonstantin Belousov 	vm_map_unlock(map);
45054648ba0aSKonstantin Belousov 	return (rv);
45064648ba0aSKonstantin Belousov }
45074648ba0aSKonstantin Belousov 
450819f49ad3SKonstantin Belousov static int stack_guard_page = 1;
450919f49ad3SKonstantin Belousov SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
451019f49ad3SKonstantin Belousov     &stack_guard_page, 0,
451119f49ad3SKonstantin Belousov     "Specifies the number of guard pages for a stack that grows");
451219f49ad3SKonstantin Belousov 
45134648ba0aSKonstantin Belousov static int
45144648ba0aSKonstantin Belousov vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
45154648ba0aSKonstantin Belousov     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
45164648ba0aSKonstantin Belousov {
4517d1d3f7e1SDoug Moore 	vm_map_entry_t new_entry, prev_entry;
451819bd0d9cSKonstantin Belousov 	vm_offset_t bot, gap_bot, gap_top, top;
451919f49ad3SKonstantin Belousov 	vm_size_t init_ssize, sgp;
4520fd75d710SMarcel Moolenaar 	int orient, rv;
452194f7e29aSAlan Cox 
4522fd75d710SMarcel Moolenaar 	/*
4523fd75d710SMarcel Moolenaar 	 * The stack orientation is piggybacked with the cow argument.
4524fd75d710SMarcel Moolenaar 	 * Extract it into orient and mask the cow argument so that we
4525fd75d710SMarcel Moolenaar 	 * don't pass it around further.
4526fd75d710SMarcel Moolenaar 	 */
4527fd75d710SMarcel Moolenaar 	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
4528fd75d710SMarcel Moolenaar 	KASSERT(orient != 0, ("No stack grow direction"));
452919bd0d9cSKonstantin Belousov 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
453019bd0d9cSKonstantin Belousov 	    ("bi-dir stack"));
4531fd75d710SMarcel Moolenaar 
45320f1e6ec5SMark Johnston 	if (max_ssize == 0 ||
45330f1e6ec5SMark Johnston 	    !vm_map_range_valid(map, addrbos, addrbos + max_ssize))
45349410cd7dSKonstantin Belousov 		return (KERN_INVALID_ADDRESS);
4535156e8654SKonstantin Belousov 	sgp = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 ||
4536156e8654SKonstantin Belousov 	    (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 :
4537fe69291fSKonstantin Belousov 	    (vm_size_t)stack_guard_page * PAGE_SIZE;
45389410cd7dSKonstantin Belousov 	if (sgp >= max_ssize)
45399410cd7dSKonstantin Belousov 		return (KERN_INVALID_ARGUMENT);
4540fd75d710SMarcel Moolenaar 
454119f49ad3SKonstantin Belousov 	init_ssize = growsize;
454219f49ad3SKonstantin Belousov 	if (max_ssize < init_ssize + sgp)
454319f49ad3SKonstantin Belousov 		init_ssize = max_ssize - sgp;
454494f7e29aSAlan Cox 
454594f7e29aSAlan Cox 	/* If addr is already mapped, no go */
4546d1d3f7e1SDoug Moore 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
454794f7e29aSAlan Cox 		return (KERN_NO_SPACE);
4548a69ac174SMatthew Dillon 
4549fd75d710SMarcel Moolenaar 	/*
4550763df3ecSPedro F. Giffuni 	 * If we can't accommodate max_ssize in the current mapping, no go.
455194f7e29aSAlan Cox 	 */
45527cdcf863SDoug Moore 	if (vm_map_entry_succ(prev_entry)->start < addrbos + max_ssize)
455394f7e29aSAlan Cox 		return (KERN_NO_SPACE);
455494f7e29aSAlan Cox 
4555fd75d710SMarcel Moolenaar 	/*
4556fd75d710SMarcel Moolenaar 	 * We initially map a stack of only init_ssize.  We will grow as
4557fd75d710SMarcel Moolenaar 	 * needed later.  Depending on the orientation of the stack (i.e.
4558fd75d710SMarcel Moolenaar 	 * the grow direction) we either map at the top of the range, the
4559fd75d710SMarcel Moolenaar 	 * bottom of the range or in the middle.
456094f7e29aSAlan Cox 	 *
4561fd75d710SMarcel Moolenaar 	 * Note: we would normally expect prot and max to be VM_PROT_ALL,
4562fd75d710SMarcel Moolenaar 	 * and cow to be 0.  Possibly we should eliminate these as input
4563fd75d710SMarcel Moolenaar 	 * parameters, and just pass these values here in the insert call.
456494f7e29aSAlan Cox 	 */
456519bd0d9cSKonstantin Belousov 	if (orient == MAP_STACK_GROWS_DOWN) {
4566fd75d710SMarcel Moolenaar 		bot = addrbos + max_ssize - init_ssize;
4567fd75d710SMarcel Moolenaar 		top = bot + init_ssize;
456819bd0d9cSKonstantin Belousov 		gap_bot = addrbos;
456919bd0d9cSKonstantin Belousov 		gap_top = bot;
457019bd0d9cSKonstantin Belousov 	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
457119bd0d9cSKonstantin Belousov 		bot = addrbos;
457219bd0d9cSKonstantin Belousov 		top = bot + init_ssize;
457319bd0d9cSKonstantin Belousov 		gap_bot = top;
457419bd0d9cSKonstantin Belousov 		gap_top = addrbos + max_ssize;
457519bd0d9cSKonstantin Belousov 	}
4576fd75d710SMarcel Moolenaar 	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
457719bd0d9cSKonstantin Belousov 	if (rv != KERN_SUCCESS)
457819bd0d9cSKonstantin Belousov 		return (rv);
45797cdcf863SDoug Moore 	new_entry = vm_map_entry_succ(prev_entry);
458019bd0d9cSKonstantin Belousov 	KASSERT(new_entry->end == top || new_entry->start == bot,
458119bd0d9cSKonstantin Belousov 	    ("Bad entry start/end for new stack entry"));
4582712efe66SAlan Cox 	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
4583712efe66SAlan Cox 	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
4584712efe66SAlan Cox 	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
4585712efe66SAlan Cox 	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
4586712efe66SAlan Cox 	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
4587712efe66SAlan Cox 	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
4588fe69291fSKonstantin Belousov 	if (gap_bot == gap_top)
4589fe69291fSKonstantin Belousov 		return (KERN_SUCCESS);
459019bd0d9cSKonstantin Belousov 	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
459119bd0d9cSKonstantin Belousov 	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
459219bd0d9cSKonstantin Belousov 	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
4593a7751d32SKonstantin Belousov 	if (rv == KERN_SUCCESS) {
4594a7751d32SKonstantin Belousov 		/*
4595a7751d32SKonstantin Belousov 		 * Gap can never successfully handle a fault, so
4596a7751d32SKonstantin Belousov 		 * read-ahead logic is never used for it.  Re-use
4597a7751d32SKonstantin Belousov 		 * next_read of the gap entry to store
4598a7751d32SKonstantin Belousov 		 * stack_guard_page for vm_map_growstack().
4599a7751d32SKonstantin Belousov 		 */
4600a7751d32SKonstantin Belousov 		if (orient == MAP_STACK_GROWS_DOWN)
46017cdcf863SDoug Moore 			vm_map_entry_pred(new_entry)->next_read = sgp;
4602a7751d32SKonstantin Belousov 		else
46037cdcf863SDoug Moore 			vm_map_entry_succ(new_entry)->next_read = sgp;
4604a7751d32SKonstantin Belousov 	} else {
460519bd0d9cSKonstantin Belousov 		(void)vm_map_delete(map, bot, top);
4606a7751d32SKonstantin Belousov 	}
460794f7e29aSAlan Cox 	return (rv);
460894f7e29aSAlan Cox }
460994f7e29aSAlan Cox 
461019bd0d9cSKonstantin Belousov /*
461119bd0d9cSKonstantin Belousov  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
461219bd0d9cSKonstantin Belousov  * successfully grow the stack.
461394f7e29aSAlan Cox  */
461419bd0d9cSKonstantin Belousov static int
461519bd0d9cSKonstantin Belousov vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
461694f7e29aSAlan Cox {
461719bd0d9cSKonstantin Belousov 	vm_map_entry_t stack_entry;
461819bd0d9cSKonstantin Belousov 	struct proc *p;
461919bd0d9cSKonstantin Belousov 	struct vmspace *vm;
462019bd0d9cSKonstantin Belousov 	struct ucred *cred;
462119bd0d9cSKonstantin Belousov 	vm_offset_t gap_end, gap_start, grow_start;
4622fa581662SDoug Moore 	vm_size_t grow_amount, guard, max_grow;
46237e19eda4SAndrey Zonov 	rlim_t lmemlim, stacklim, vmemlim;
462419bd0d9cSKonstantin Belousov 	int rv, rv1;
462519bd0d9cSKonstantin Belousov 	bool gap_deleted, grow_down, is_procstack;
46261ba5ad42SEdward Tomasz Napierala #ifdef notyet
46271ba5ad42SEdward Tomasz Napierala 	uint64_t limit;
46281ba5ad42SEdward Tomasz Napierala #endif
4629afcc55f3SEdward Tomasz Napierala #ifdef RACCT
46301ba5ad42SEdward Tomasz Napierala 	int error;
4631afcc55f3SEdward Tomasz Napierala #endif
463223955314SAlfred Perlstein 
463319bd0d9cSKonstantin Belousov 	p = curproc;
463419bd0d9cSKonstantin Belousov 	vm = p->p_vmspace;
4635eb5ea878SKonstantin Belousov 
4636eb5ea878SKonstantin Belousov 	/*
4637eb5ea878SKonstantin Belousov 	 * Disallow stack growth when the access is performed by a
4638eb5ea878SKonstantin Belousov 	 * debugger or AIO daemon.  The reason is that the wrong
4639eb5ea878SKonstantin Belousov 	 * resource limits are applied.
4640eb5ea878SKonstantin Belousov 	 */
464110ae16c7SKonstantin Belousov 	if (p != initproc && (map != &p->p_vmspace->vm_map ||
464210ae16c7SKonstantin Belousov 	    p->p_textvp == NULL))
4643f758aaddSKonstantin Belousov 		return (KERN_FAILURE);
4644eb5ea878SKonstantin Belousov 
464519bd0d9cSKonstantin Belousov 	MPASS(!map->system_map);
464619bd0d9cSKonstantin Belousov 
4647f6f6d240SMateusz Guzik 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
4648f6f6d240SMateusz Guzik 	stacklim = lim_cur(curthread, RLIMIT_STACK);
4649f6f6d240SMateusz Guzik 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
465019bd0d9cSKonstantin Belousov retry:
465119bd0d9cSKonstantin Belousov 	/* If addr is not in a hole for a stack grow area, no need to grow. */
4652d1d3f7e1SDoug Moore 	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
465319bd0d9cSKonstantin Belousov 		return (KERN_FAILURE);
465419bd0d9cSKonstantin Belousov 	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
46550cddd8f0SMatthew Dillon 		return (KERN_SUCCESS);
465619bd0d9cSKonstantin Belousov 	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
46577cdcf863SDoug Moore 		stack_entry = vm_map_entry_succ(gap_entry);
465819bd0d9cSKonstantin Belousov 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
465919bd0d9cSKonstantin Belousov 		    stack_entry->start != gap_entry->end)
466019bd0d9cSKonstantin Belousov 			return (KERN_FAILURE);
466119bd0d9cSKonstantin Belousov 		grow_amount = round_page(stack_entry->start - addr);
466219bd0d9cSKonstantin Belousov 		grow_down = true;
466319bd0d9cSKonstantin Belousov 	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
46647cdcf863SDoug Moore 		stack_entry = vm_map_entry_pred(gap_entry);
466519bd0d9cSKonstantin Belousov 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
466619bd0d9cSKonstantin Belousov 		    stack_entry->end != gap_entry->start)
466719bd0d9cSKonstantin Belousov 			return (KERN_FAILURE);
466819bd0d9cSKonstantin Belousov 		grow_amount = round_page(addr + 1 - stack_entry->end);
466919bd0d9cSKonstantin Belousov 		grow_down = false;
4670b21a0008SMarcel Moolenaar 	} else {
467119bd0d9cSKonstantin Belousov 		return (KERN_FAILURE);
4672b21a0008SMarcel Moolenaar 	}
4673156e8654SKonstantin Belousov 	guard = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 ||
4674156e8654SKonstantin Belousov 	    (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 :
4675fe69291fSKonstantin Belousov 	    gap_entry->next_read;
4676201f03b8SAlan Cox 	max_grow = gap_entry->end - gap_entry->start;
4677201f03b8SAlan Cox 	if (guard > max_grow)
4678201f03b8SAlan Cox 		return (KERN_NO_SPACE);
4679201f03b8SAlan Cox 	max_grow -= guard;
468019bd0d9cSKonstantin Belousov 	if (grow_amount > max_grow)
46810cddd8f0SMatthew Dillon 		return (KERN_NO_SPACE);
468294f7e29aSAlan Cox 
4683b21a0008SMarcel Moolenaar 	/*
4684b21a0008SMarcel Moolenaar 	 * If this is the main process stack, see if we're over the stack
4685b21a0008SMarcel Moolenaar 	 * limit.
468694f7e29aSAlan Cox 	 */
468719bd0d9cSKonstantin Belousov 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
468819bd0d9cSKonstantin Belousov 	    addr < (vm_offset_t)p->p_sysent->sv_usrstack;
468919bd0d9cSKonstantin Belousov 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
46900cddd8f0SMatthew Dillon 		return (KERN_NO_SPACE);
469119bd0d9cSKonstantin Belousov 
4692afcc55f3SEdward Tomasz Napierala #ifdef RACCT
46934b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
46941ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
46954b5c9cf6SEdward Tomasz Napierala 		if (is_procstack && racct_set(p, RACCT_STACK,
46964b5c9cf6SEdward Tomasz Napierala 		    ctob(vm->vm_ssize) + grow_amount)) {
46971ba5ad42SEdward Tomasz Napierala 			PROC_UNLOCK(p);
46981ba5ad42SEdward Tomasz Napierala 			return (KERN_NO_SPACE);
46991ba5ad42SEdward Tomasz Napierala 		}
47001ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
47014b5c9cf6SEdward Tomasz Napierala 	}
4702afcc55f3SEdward Tomasz Napierala #endif
470394f7e29aSAlan Cox 
470419bd0d9cSKonstantin Belousov 	grow_amount = roundup(grow_amount, sgrowsiz);
470519bd0d9cSKonstantin Belousov 	if (grow_amount > max_grow)
470619bd0d9cSKonstantin Belousov 		grow_amount = max_grow;
470791d5354aSJohn Baldwin 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
4708e4826248SAlan Cox 		grow_amount = trunc_page((vm_size_t)stacklim) -
4709e4826248SAlan Cox 		    ctob(vm->vm_ssize);
471094f7e29aSAlan Cox 	}
471119bd0d9cSKonstantin Belousov 
47121ba5ad42SEdward Tomasz Napierala #ifdef notyet
47131ba5ad42SEdward Tomasz Napierala 	PROC_LOCK(p);
47141ba5ad42SEdward Tomasz Napierala 	limit = racct_get_available(p, RACCT_STACK);
47151ba5ad42SEdward Tomasz Napierala 	PROC_UNLOCK(p);
47161ba5ad42SEdward Tomasz Napierala 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
47171ba5ad42SEdward Tomasz Napierala 		grow_amount = limit - ctob(vm->vm_ssize);
47181ba5ad42SEdward Tomasz Napierala #endif
471919bd0d9cSKonstantin Belousov 
472019bd0d9cSKonstantin Belousov 	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
47213ac7d297SAndrey Zonov 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
47227e19eda4SAndrey Zonov 			rv = KERN_NO_SPACE;
47237e19eda4SAndrey Zonov 			goto out;
47247e19eda4SAndrey Zonov 		}
47257e19eda4SAndrey Zonov #ifdef RACCT
47264b5c9cf6SEdward Tomasz Napierala 		if (racct_enable) {
47277e19eda4SAndrey Zonov 			PROC_LOCK(p);
47287e19eda4SAndrey Zonov 			if (racct_set(p, RACCT_MEMLOCK,
47293ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
47307e19eda4SAndrey Zonov 				PROC_UNLOCK(p);
47317e19eda4SAndrey Zonov 				rv = KERN_NO_SPACE;
47327e19eda4SAndrey Zonov 				goto out;
47337e19eda4SAndrey Zonov 			}
47347e19eda4SAndrey Zonov 			PROC_UNLOCK(p);
47354b5c9cf6SEdward Tomasz Napierala 		}
47367e19eda4SAndrey Zonov #endif
47377e19eda4SAndrey Zonov 	}
473819bd0d9cSKonstantin Belousov 
4739a69ac174SMatthew Dillon 	/* If we would blow our VMEM resource limit, no go */
474091d5354aSJohn Baldwin 	if (map->size + grow_amount > vmemlim) {
47411ba5ad42SEdward Tomasz Napierala 		rv = KERN_NO_SPACE;
47421ba5ad42SEdward Tomasz Napierala 		goto out;
4743a69ac174SMatthew Dillon 	}
4744afcc55f3SEdward Tomasz Napierala #ifdef RACCT
47454b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
47461ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
47471ba5ad42SEdward Tomasz Napierala 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
47481ba5ad42SEdward Tomasz Napierala 			PROC_UNLOCK(p);
47491ba5ad42SEdward Tomasz Napierala 			rv = KERN_NO_SPACE;
47501ba5ad42SEdward Tomasz Napierala 			goto out;
47511ba5ad42SEdward Tomasz Napierala 		}
47521ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
47534b5c9cf6SEdward Tomasz Napierala 	}
4754afcc55f3SEdward Tomasz Napierala #endif
4755a69ac174SMatthew Dillon 
475619bd0d9cSKonstantin Belousov 	if (vm_map_lock_upgrade(map)) {
475719bd0d9cSKonstantin Belousov 		gap_entry = NULL;
475819bd0d9cSKonstantin Belousov 		vm_map_lock_read(map);
475919bd0d9cSKonstantin Belousov 		goto retry;
476094f7e29aSAlan Cox 	}
476194f7e29aSAlan Cox 
476219bd0d9cSKonstantin Belousov 	if (grow_down) {
476319bd0d9cSKonstantin Belousov 		grow_start = gap_entry->end - grow_amount;
476419bd0d9cSKonstantin Belousov 		if (gap_entry->start + grow_amount == gap_entry->end) {
476519bd0d9cSKonstantin Belousov 			gap_start = gap_entry->start;
476619bd0d9cSKonstantin Belousov 			gap_end = gap_entry->end;
476719bd0d9cSKonstantin Belousov 			vm_map_entry_delete(map, gap_entry);
476819bd0d9cSKonstantin Belousov 			gap_deleted = true;
476919bd0d9cSKonstantin Belousov 		} else {
477019bd0d9cSKonstantin Belousov 			MPASS(gap_entry->start < gap_entry->end - grow_amount);
4771fa581662SDoug Moore 			vm_map_entry_resize(map, gap_entry, -grow_amount);
477219bd0d9cSKonstantin Belousov 			gap_deleted = false;
477319bd0d9cSKonstantin Belousov 		}
477419bd0d9cSKonstantin Belousov 		rv = vm_map_insert(map, NULL, 0, grow_start,
477519bd0d9cSKonstantin Belousov 		    grow_start + grow_amount,
477619bd0d9cSKonstantin Belousov 		    stack_entry->protection, stack_entry->max_protection,
4777712efe66SAlan Cox 		    MAP_STACK_GROWS_DOWN);
477819bd0d9cSKonstantin Belousov 		if (rv != KERN_SUCCESS) {
477919bd0d9cSKonstantin Belousov 			if (gap_deleted) {
478019bd0d9cSKonstantin Belousov 				rv1 = vm_map_insert(map, NULL, 0, gap_start,
478119bd0d9cSKonstantin Belousov 				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
478219bd0d9cSKonstantin Belousov 				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
478319bd0d9cSKonstantin Belousov 				MPASS(rv1 == KERN_SUCCESS);
47841895f520SDoug Moore 			} else
4785fa581662SDoug Moore 				vm_map_entry_resize(map, gap_entry,
47861895f520SDoug Moore 				    grow_amount);
478794f7e29aSAlan Cox 		}
4788b21a0008SMarcel Moolenaar 	} else {
478919bd0d9cSKonstantin Belousov 		grow_start = stack_entry->end;
4790ef694c1aSEdward Tomasz Napierala 		cred = stack_entry->cred;
4791ef694c1aSEdward Tomasz Napierala 		if (cred == NULL && stack_entry->object.vm_object != NULL)
4792ef694c1aSEdward Tomasz Napierala 			cred = stack_entry->object.vm_object->cred;
4793ef694c1aSEdward Tomasz Napierala 		if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
47943364c323SKonstantin Belousov 			rv = KERN_NO_SPACE;
4795b21a0008SMarcel Moolenaar 		/* Grow the underlying object if applicable. */
47963364c323SKonstantin Belousov 		else if (stack_entry->object.vm_object == NULL ||
4797b21a0008SMarcel Moolenaar 		    vm_object_coalesce(stack_entry->object.vm_object,
479857a21abaSAlan Cox 		    stack_entry->offset,
4799b21a0008SMarcel Moolenaar 		    (vm_size_t)(stack_entry->end - stack_entry->start),
4800fa581662SDoug Moore 		    grow_amount, cred != NULL)) {
4801fa581662SDoug Moore 			if (gap_entry->start + grow_amount == gap_entry->end) {
480219bd0d9cSKonstantin Belousov 				vm_map_entry_delete(map, gap_entry);
4803fa581662SDoug Moore 				vm_map_entry_resize(map, stack_entry,
4804fa581662SDoug Moore 				    grow_amount);
4805fa581662SDoug Moore 			} else {
480619bd0d9cSKonstantin Belousov 				gap_entry->start += grow_amount;
4807fa581662SDoug Moore 				stack_entry->end += grow_amount;
4808fa581662SDoug Moore 			}
480919bd0d9cSKonstantin Belousov 			map->size += grow_amount;
4810b21a0008SMarcel Moolenaar 			rv = KERN_SUCCESS;
4811b21a0008SMarcel Moolenaar 		} else
4812b21a0008SMarcel Moolenaar 			rv = KERN_FAILURE;
4813b21a0008SMarcel Moolenaar 	}
4814b21a0008SMarcel Moolenaar 	if (rv == KERN_SUCCESS && is_procstack)
4815b21a0008SMarcel Moolenaar 		vm->vm_ssize += btoc(grow_amount);
4816b21a0008SMarcel Moolenaar 
4817abd498aaSBruce M Simpson 	/*
4818abd498aaSBruce M Simpson 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
4819abd498aaSBruce M Simpson 	 */
482019bd0d9cSKonstantin Belousov 	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
482154a3a114SMark Johnston 		rv = vm_map_wire_locked(map, grow_start,
482254a3a114SMark Johnston 		    grow_start + grow_amount,
4823212e02c8SKonstantin Belousov 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
482454a3a114SMark Johnston 	}
482519bd0d9cSKonstantin Belousov 	vm_map_lock_downgrade(map);
4826abd498aaSBruce M Simpson 
48271ba5ad42SEdward Tomasz Napierala out:
4828afcc55f3SEdward Tomasz Napierala #ifdef RACCT
48294b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && rv != KERN_SUCCESS) {
48301ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
48311ba5ad42SEdward Tomasz Napierala 		error = racct_set(p, RACCT_VMEM, map->size);
48321ba5ad42SEdward Tomasz Napierala 		KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
48337e19eda4SAndrey Zonov 		if (!old_mlock) {
48347e19eda4SAndrey Zonov 			error = racct_set(p, RACCT_MEMLOCK,
48353ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)));
48367e19eda4SAndrey Zonov 			KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
48377e19eda4SAndrey Zonov 		}
48381ba5ad42SEdward Tomasz Napierala 	    	error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
48391ba5ad42SEdward Tomasz Napierala 		KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
48401ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
48411ba5ad42SEdward Tomasz Napierala 	}
4842afcc55f3SEdward Tomasz Napierala #endif
48431ba5ad42SEdward Tomasz Napierala 
48440cddd8f0SMatthew Dillon 	return (rv);
484594f7e29aSAlan Cox }
484694f7e29aSAlan Cox 
4847df8bae1dSRodney W. Grimes /*
48485856e12eSJohn Dyson  * Unshare the specified VM space for exec.  If other processes are
48495856e12eSJohn Dyson  * mapped to it, then create a new one.  The new vmspace is null.
48505856e12eSJohn Dyson  */
485189b57fcfSKonstantin Belousov int
48523ebc1248SPeter Wemm vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
48531b40f8c0SMatthew Dillon {
48545856e12eSJohn Dyson 	struct vmspace *oldvmspace = p->p_vmspace;
48555856e12eSJohn Dyson 	struct vmspace *newvmspace;
48565856e12eSJohn Dyson 
48577032434eSKonstantin Belousov 	KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
48587032434eSKonstantin Belousov 	    ("vmspace_exec recursed"));
48596e00f3a3SKonstantin Belousov 	newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit);
486089b57fcfSKonstantin Belousov 	if (newvmspace == NULL)
486189b57fcfSKonstantin Belousov 		return (ENOMEM);
486251ab6c28SAlan Cox 	newvmspace->vm_swrss = oldvmspace->vm_swrss;
48635856e12eSJohn Dyson 	/*
48645856e12eSJohn Dyson 	 * This code is written like this for prototype purposes.  The
48655856e12eSJohn Dyson 	 * goal is to avoid running down the vmspace here, but let the
48665856e12eSJohn Dyson 	 * other process's that are still using the vmspace to finally
48675856e12eSJohn Dyson 	 * run it down.  Even though there is little or no chance of blocking
48685856e12eSJohn Dyson 	 * here, it is a good idea to keep this form for future mods.
48695856e12eSJohn Dyson 	 */
487057051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
48715856e12eSJohn Dyson 	p->p_vmspace = newvmspace;
487257051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
48736617724cSJeff Roberson 	if (p == curthread->td_proc)
4874b40ce416SJulian Elischer 		pmap_activate(curthread);
48757032434eSKonstantin Belousov 	curthread->td_pflags |= TDP_EXECVMSPC;
487689b57fcfSKonstantin Belousov 	return (0);
48775856e12eSJohn Dyson }
48785856e12eSJohn Dyson 
48795856e12eSJohn Dyson /*
48805856e12eSJohn Dyson  * Unshare the specified VM space for forcing COW.  This
48815856e12eSJohn Dyson  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
48825856e12eSJohn Dyson  */
488389b57fcfSKonstantin Belousov int
48841b40f8c0SMatthew Dillon vmspace_unshare(struct proc *p)
48851b40f8c0SMatthew Dillon {
48865856e12eSJohn Dyson 	struct vmspace *oldvmspace = p->p_vmspace;
48875856e12eSJohn Dyson 	struct vmspace *newvmspace;
48883364c323SKonstantin Belousov 	vm_ooffset_t fork_charge;
48895856e12eSJohn Dyson 
4890f7db0c95SMark Johnston 	if (refcount_load(&oldvmspace->vm_refcnt) == 1)
489189b57fcfSKonstantin Belousov 		return (0);
48923364c323SKonstantin Belousov 	fork_charge = 0;
48933364c323SKonstantin Belousov 	newvmspace = vmspace_fork(oldvmspace, &fork_charge);
489489b57fcfSKonstantin Belousov 	if (newvmspace == NULL)
489589b57fcfSKonstantin Belousov 		return (ENOMEM);
4896ef694c1aSEdward Tomasz Napierala 	if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
48973364c323SKonstantin Belousov 		vmspace_free(newvmspace);
48983364c323SKonstantin Belousov 		return (ENOMEM);
48993364c323SKonstantin Belousov 	}
490057051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
49015856e12eSJohn Dyson 	p->p_vmspace = newvmspace;
490257051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
49036617724cSJeff Roberson 	if (p == curthread->td_proc)
4904b40ce416SJulian Elischer 		pmap_activate(curthread);
4905b56ef1c1SJohn Baldwin 	vmspace_free(oldvmspace);
490689b57fcfSKonstantin Belousov 	return (0);
49075856e12eSJohn Dyson }
49085856e12eSJohn Dyson 
49095856e12eSJohn Dyson /*
4910df8bae1dSRodney W. Grimes  *	vm_map_lookup:
4911df8bae1dSRodney W. Grimes  *
4912df8bae1dSRodney W. Grimes  *	Finds the VM object, offset, and
4913df8bae1dSRodney W. Grimes  *	protection for a given virtual address in the
4914df8bae1dSRodney W. Grimes  *	specified map, assuming a page fault of the
4915df8bae1dSRodney W. Grimes  *	type specified.
4916df8bae1dSRodney W. Grimes  *
4917df8bae1dSRodney W. Grimes  *	Leaves the map in question locked for read; return
4918df8bae1dSRodney W. Grimes  *	values are guaranteed until a vm_map_lookup_done
4919df8bae1dSRodney W. Grimes  *	call is performed.  Note that the map argument
4920df8bae1dSRodney W. Grimes  *	is in/out; the returned map must be used in
4921df8bae1dSRodney W. Grimes  *	the call to vm_map_lookup_done.
4922df8bae1dSRodney W. Grimes  *
4923df8bae1dSRodney W. Grimes  *	A handle (out_entry) is returned for use in
4924df8bae1dSRodney W. Grimes  *	vm_map_lookup_done, to make that fast.
4925df8bae1dSRodney W. Grimes  *
4926df8bae1dSRodney W. Grimes  *	If a lookup is requested with "write protection"
4927df8bae1dSRodney W. Grimes  *	specified, the map may be changed to perform virtual
4928df8bae1dSRodney W. Grimes  *	copying operations, although the data referenced will
4929df8bae1dSRodney W. Grimes  *	remain the same.
4930df8bae1dSRodney W. Grimes  */
4931df8bae1dSRodney W. Grimes int
4932b9dcd593SBruce Evans vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
4933b9dcd593SBruce Evans 	      vm_offset_t vaddr,
493447221757SJohn Dyson 	      vm_prot_t fault_typea,
4935b9dcd593SBruce Evans 	      vm_map_entry_t *out_entry,	/* OUT */
4936b9dcd593SBruce Evans 	      vm_object_t *object,		/* OUT */
4937b9dcd593SBruce Evans 	      vm_pindex_t *pindex,		/* OUT */
4938b9dcd593SBruce Evans 	      vm_prot_t *out_prot,		/* OUT */
49392d8acc0fSJohn Dyson 	      boolean_t *wired)			/* OUT */
4940df8bae1dSRodney W. Grimes {
4941c0877f10SJohn Dyson 	vm_map_entry_t entry;
4942c0877f10SJohn Dyson 	vm_map_t map = *var_map;
4943c0877f10SJohn Dyson 	vm_prot_t prot;
4944a6f21d15SMark Johnston 	vm_prot_t fault_type;
49453364c323SKonstantin Belousov 	vm_object_t eobject;
49460cc74f14SAlan Cox 	vm_size_t size;
4947ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
4948df8bae1dSRodney W. Grimes 
494919bd0d9cSKonstantin Belousov RetryLookup:
4950df8bae1dSRodney W. Grimes 
4951df8bae1dSRodney W. Grimes 	vm_map_lock_read(map);
4952df8bae1dSRodney W. Grimes 
495319bd0d9cSKonstantin Belousov RetryLookupLocked:
4954df8bae1dSRodney W. Grimes 	/*
49554c3ef59eSAlan Cox 	 * Lookup the faulting address.
4956df8bae1dSRodney W. Grimes 	 */
4957095104acSAlan Cox 	if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
4958095104acSAlan Cox 		vm_map_unlock_read(map);
4959095104acSAlan Cox 		return (KERN_INVALID_ADDRESS);
4960095104acSAlan Cox 	}
4961df8bae1dSRodney W. Grimes 
49624e94f402SAlan Cox 	entry = *out_entry;
4963b7b2aac2SJohn Dyson 
4964df8bae1dSRodney W. Grimes 	/*
4965df8bae1dSRodney W. Grimes 	 * Handle submaps.
4966df8bae1dSRodney W. Grimes 	 */
4967afa07f7eSJohn Dyson 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
4968df8bae1dSRodney W. Grimes 		vm_map_t old_map = map;
4969df8bae1dSRodney W. Grimes 
4970df8bae1dSRodney W. Grimes 		*var_map = map = entry->object.sub_map;
4971df8bae1dSRodney W. Grimes 		vm_map_unlock_read(old_map);
4972df8bae1dSRodney W. Grimes 		goto RetryLookup;
4973df8bae1dSRodney W. Grimes 	}
4974a04c970aSJohn Dyson 
4975df8bae1dSRodney W. Grimes 	/*
49760d94caffSDavid Greenman 	 * Check whether this task is allowed to have this page.
4977df8bae1dSRodney W. Grimes 	 */
4978df8bae1dSRodney W. Grimes 	prot = entry->protection;
497919bd0d9cSKonstantin Belousov 	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
498019bd0d9cSKonstantin Belousov 		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
498119bd0d9cSKonstantin Belousov 		if (prot == VM_PROT_NONE && map != kernel_map &&
498219bd0d9cSKonstantin Belousov 		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
498319bd0d9cSKonstantin Belousov 		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
498419bd0d9cSKonstantin Belousov 		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
498519bd0d9cSKonstantin Belousov 		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
498619bd0d9cSKonstantin Belousov 			goto RetryLookupLocked;
498719bd0d9cSKonstantin Belousov 	}
4988a6f21d15SMark Johnston 	fault_type = fault_typea & VM_PROT_ALL;
49892db65ab4SAlan Cox 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
4990095104acSAlan Cox 		vm_map_unlock_read(map);
4991095104acSAlan Cox 		return (KERN_PROTECTION_FAILURE);
499247221757SJohn Dyson 	}
4993b8db9776SKonstantin Belousov 	KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
4994b8db9776SKonstantin Belousov 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
4995b8db9776SKonstantin Belousov 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
4996b8db9776SKonstantin Belousov 	    ("entry %p flags %x", entry, entry->eflags));
49975b3e0257SDag-Erling Smørgrav 	if ((fault_typea & VM_PROT_COPY) != 0 &&
49985b3e0257SDag-Erling Smørgrav 	    (entry->max_protection & VM_PROT_WRITE) == 0 &&
49995b3e0257SDag-Erling Smørgrav 	    (entry->eflags & MAP_ENTRY_COW) == 0) {
50005b3e0257SDag-Erling Smørgrav 		vm_map_unlock_read(map);
50015b3e0257SDag-Erling Smørgrav 		return (KERN_PROTECTION_FAILURE);
50025b3e0257SDag-Erling Smørgrav 	}
5003df8bae1dSRodney W. Grimes 
5004df8bae1dSRodney W. Grimes 	/*
50050d94caffSDavid Greenman 	 * If this page is not pageable, we have to get it for all possible
50060d94caffSDavid Greenman 	 * accesses.
5007df8bae1dSRodney W. Grimes 	 */
500805f0fdd2SPoul-Henning Kamp 	*wired = (entry->wired_count != 0);
500905f0fdd2SPoul-Henning Kamp 	if (*wired)
5010a6d42a0dSAlan Cox 		fault_type = entry->protection;
50113364c323SKonstantin Belousov 	size = entry->end - entry->start;
501267388836SKonstantin Belousov 
5013df8bae1dSRodney W. Grimes 	/*
5014df8bae1dSRodney W. Grimes 	 * If the entry was copy-on-write, we either ...
5015df8bae1dSRodney W. Grimes 	 */
5016afa07f7eSJohn Dyson 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
5017df8bae1dSRodney W. Grimes 		/*
50180d94caffSDavid Greenman 		 * If we want to write the page, we may as well handle that
5019ad5fca3bSAlan Cox 		 * now since we've got the map locked.
5020df8bae1dSRodney W. Grimes 		 *
50210d94caffSDavid Greenman 		 * If we don't need to write the page, we just demote the
50220d94caffSDavid Greenman 		 * permissions allowed.
5023df8bae1dSRodney W. Grimes 		 */
5024a6d42a0dSAlan Cox 		if ((fault_type & VM_PROT_WRITE) != 0 ||
5025a6d42a0dSAlan Cox 		    (fault_typea & VM_PROT_COPY) != 0) {
5026df8bae1dSRodney W. Grimes 			/*
50270d94caffSDavid Greenman 			 * Make a new object, and place it in the object
50280d94caffSDavid Greenman 			 * chain.  Note that no new references have appeared
5029ad5fca3bSAlan Cox 			 * -- one just moved from the map to the new
50300d94caffSDavid Greenman 			 * object.
5031df8bae1dSRodney W. Grimes 			 */
503225adb370SBrian Feldman 			if (vm_map_lock_upgrade(map))
5033df8bae1dSRodney W. Grimes 				goto RetryLookup;
50349917e010SAlan Cox 
5035ef694c1aSEdward Tomasz Napierala 			if (entry->cred == NULL) {
50363364c323SKonstantin Belousov 				/*
50373364c323SKonstantin Belousov 				 * The debugger owner is charged for
50383364c323SKonstantin Belousov 				 * the memory.
50393364c323SKonstantin Belousov 				 */
5040ef694c1aSEdward Tomasz Napierala 				cred = curthread->td_ucred;
5041ef694c1aSEdward Tomasz Napierala 				crhold(cred);
5042ef694c1aSEdward Tomasz Napierala 				if (!swap_reserve_by_cred(size, cred)) {
5043ef694c1aSEdward Tomasz Napierala 					crfree(cred);
50443364c323SKonstantin Belousov 					vm_map_unlock(map);
50453364c323SKonstantin Belousov 					return (KERN_RESOURCE_SHORTAGE);
50463364c323SKonstantin Belousov 				}
5047ef694c1aSEdward Tomasz Napierala 				entry->cred = cred;
50483364c323SKonstantin Belousov 			}
50493364c323SKonstantin Belousov 			eobject = entry->object.vm_object;
505067388836SKonstantin Belousov 			vm_object_shadow(&entry->object.vm_object,
505167388836SKonstantin Belousov 			    &entry->offset, size, entry->cred, false);
505267388836SKonstantin Belousov 			if (eobject == entry->object.vm_object) {
50533364c323SKonstantin Belousov 				/*
50543364c323SKonstantin Belousov 				 * The object was not shadowed.
50553364c323SKonstantin Belousov 				 */
5056ef694c1aSEdward Tomasz Napierala 				swap_release_by_cred(size, entry->cred);
5057ef694c1aSEdward Tomasz Napierala 				crfree(entry->cred);
50583364c323SKonstantin Belousov 			}
505967388836SKonstantin Belousov 			entry->cred = NULL;
506067388836SKonstantin Belousov 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
50619917e010SAlan Cox 
50629b09b6c7SMatthew Dillon 			vm_map_lock_downgrade(map);
50630d94caffSDavid Greenman 		} else {
5064df8bae1dSRodney W. Grimes 			/*
50650d94caffSDavid Greenman 			 * We're attempting to read a copy-on-write page --
50660d94caffSDavid Greenman 			 * don't allow writes.
5067df8bae1dSRodney W. Grimes 			 */
50682d8acc0fSJohn Dyson 			prot &= ~VM_PROT_WRITE;
5069df8bae1dSRodney W. Grimes 		}
5070df8bae1dSRodney W. Grimes 	}
50712d8acc0fSJohn Dyson 
5072df8bae1dSRodney W. Grimes 	/*
5073df8bae1dSRodney W. Grimes 	 * Create an object if necessary.
5074df8bae1dSRodney W. Grimes 	 */
507567388836SKonstantin Belousov 	if (entry->object.vm_object == NULL && !map->system_map) {
507625adb370SBrian Feldman 		if (vm_map_lock_upgrade(map))
5077df8bae1dSRodney W. Grimes 			goto RetryLookup;
507867388836SKonstantin Belousov 		entry->object.vm_object = vm_object_allocate_anon(atop(size),
507967388836SKonstantin Belousov 		    NULL, entry->cred, entry->cred != NULL ? size : 0);
5080df8bae1dSRodney W. Grimes 		entry->offset = 0;
5081ef694c1aSEdward Tomasz Napierala 		entry->cred = NULL;
50829b09b6c7SMatthew Dillon 		vm_map_lock_downgrade(map);
5083df8bae1dSRodney W. Grimes 	}
5084b5b40fa6SJohn Dyson 
5085df8bae1dSRodney W. Grimes 	/*
50860d94caffSDavid Greenman 	 * Return the object/offset from this entry.  If the entry was
50870d94caffSDavid Greenman 	 * copy-on-write or empty, it has been fixed up.
5088df8bae1dSRodney W. Grimes 	 */
508910d9120cSKonstantin Belousov 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
5090df8bae1dSRodney W. Grimes 	*object = entry->object.vm_object;
5091df8bae1dSRodney W. Grimes 
5092df8bae1dSRodney W. Grimes 	*out_prot = prot;
5093df8bae1dSRodney W. Grimes 	return (KERN_SUCCESS);
5094df8bae1dSRodney W. Grimes }
5095df8bae1dSRodney W. Grimes 
5096df8bae1dSRodney W. Grimes /*
509719dc5607STor Egge  *	vm_map_lookup_locked:
509819dc5607STor Egge  *
509919dc5607STor Egge  *	Lookup the faulting address.  A version of vm_map_lookup that returns
510019dc5607STor Egge  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
510119dc5607STor Egge  */
510219dc5607STor Egge int
510319dc5607STor Egge vm_map_lookup_locked(vm_map_t *var_map,		/* IN/OUT */
510419dc5607STor Egge 		     vm_offset_t vaddr,
510519dc5607STor Egge 		     vm_prot_t fault_typea,
510619dc5607STor Egge 		     vm_map_entry_t *out_entry,	/* OUT */
510719dc5607STor Egge 		     vm_object_t *object,	/* OUT */
510819dc5607STor Egge 		     vm_pindex_t *pindex,	/* OUT */
510919dc5607STor Egge 		     vm_prot_t *out_prot,	/* OUT */
511019dc5607STor Egge 		     boolean_t *wired)		/* OUT */
511119dc5607STor Egge {
511219dc5607STor Egge 	vm_map_entry_t entry;
511319dc5607STor Egge 	vm_map_t map = *var_map;
511419dc5607STor Egge 	vm_prot_t prot;
511519dc5607STor Egge 	vm_prot_t fault_type = fault_typea;
511619dc5607STor Egge 
511719dc5607STor Egge 	/*
51184c3ef59eSAlan Cox 	 * Lookup the faulting address.
511919dc5607STor Egge 	 */
512019dc5607STor Egge 	if (!vm_map_lookup_entry(map, vaddr, out_entry))
512119dc5607STor Egge 		return (KERN_INVALID_ADDRESS);
512219dc5607STor Egge 
512319dc5607STor Egge 	entry = *out_entry;
512419dc5607STor Egge 
512519dc5607STor Egge 	/*
512619dc5607STor Egge 	 * Fail if the entry refers to a submap.
512719dc5607STor Egge 	 */
512819dc5607STor Egge 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
512919dc5607STor Egge 		return (KERN_FAILURE);
513019dc5607STor Egge 
513119dc5607STor Egge 	/*
513219dc5607STor Egge 	 * Check whether this task is allowed to have this page.
513319dc5607STor Egge 	 */
513419dc5607STor Egge 	prot = entry->protection;
513519dc5607STor Egge 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
513619dc5607STor Egge 	if ((fault_type & prot) != fault_type)
513719dc5607STor Egge 		return (KERN_PROTECTION_FAILURE);
513819dc5607STor Egge 
513919dc5607STor Egge 	/*
514019dc5607STor Egge 	 * If this page is not pageable, we have to get it for all possible
514119dc5607STor Egge 	 * accesses.
514219dc5607STor Egge 	 */
514319dc5607STor Egge 	*wired = (entry->wired_count != 0);
514419dc5607STor Egge 	if (*wired)
5145a6d42a0dSAlan Cox 		fault_type = entry->protection;
514619dc5607STor Egge 
514719dc5607STor Egge 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
514819dc5607STor Egge 		/*
514919dc5607STor Egge 		 * Fail if the entry was copy-on-write for a write fault.
515019dc5607STor Egge 		 */
515119dc5607STor Egge 		if (fault_type & VM_PROT_WRITE)
515219dc5607STor Egge 			return (KERN_FAILURE);
515319dc5607STor Egge 		/*
515419dc5607STor Egge 		 * We're attempting to read a copy-on-write page --
515519dc5607STor Egge 		 * don't allow writes.
515619dc5607STor Egge 		 */
515719dc5607STor Egge 		prot &= ~VM_PROT_WRITE;
515819dc5607STor Egge 	}
515919dc5607STor Egge 
516019dc5607STor Egge 	/*
516119dc5607STor Egge 	 * Fail if an object should be created.
516219dc5607STor Egge 	 */
516319dc5607STor Egge 	if (entry->object.vm_object == NULL && !map->system_map)
516419dc5607STor Egge 		return (KERN_FAILURE);
516519dc5607STor Egge 
516619dc5607STor Egge 	/*
516719dc5607STor Egge 	 * Return the object/offset from this entry.  If the entry was
516819dc5607STor Egge 	 * copy-on-write or empty, it has been fixed up.
516919dc5607STor Egge 	 */
517010d9120cSKonstantin Belousov 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
517119dc5607STor Egge 	*object = entry->object.vm_object;
517219dc5607STor Egge 
517319dc5607STor Egge 	*out_prot = prot;
517419dc5607STor Egge 	return (KERN_SUCCESS);
517519dc5607STor Egge }
517619dc5607STor Egge 
517719dc5607STor Egge /*
5178df8bae1dSRodney W. Grimes  *	vm_map_lookup_done:
5179df8bae1dSRodney W. Grimes  *
5180df8bae1dSRodney W. Grimes  *	Releases locks acquired by a vm_map_lookup
5181df8bae1dSRodney W. Grimes  *	(according to the handle returned by that lookup).
5182df8bae1dSRodney W. Grimes  */
51830d94caffSDavid Greenman void
51841b40f8c0SMatthew Dillon vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
5185df8bae1dSRodney W. Grimes {
5186df8bae1dSRodney W. Grimes 	/*
5187df8bae1dSRodney W. Grimes 	 * Unlock the main-level map
5188df8bae1dSRodney W. Grimes 	 */
5189df8bae1dSRodney W. Grimes 	vm_map_unlock_read(map);
5190df8bae1dSRodney W. Grimes }
5191df8bae1dSRodney W. Grimes 
519219ea042eSKonstantin Belousov vm_offset_t
519319ea042eSKonstantin Belousov vm_map_max_KBI(const struct vm_map *map)
519419ea042eSKonstantin Belousov {
519519ea042eSKonstantin Belousov 
5196f0165b1cSKonstantin Belousov 	return (vm_map_max(map));
519719ea042eSKonstantin Belousov }
519819ea042eSKonstantin Belousov 
519919ea042eSKonstantin Belousov vm_offset_t
520019ea042eSKonstantin Belousov vm_map_min_KBI(const struct vm_map *map)
520119ea042eSKonstantin Belousov {
520219ea042eSKonstantin Belousov 
5203f0165b1cSKonstantin Belousov 	return (vm_map_min(map));
520419ea042eSKonstantin Belousov }
520519ea042eSKonstantin Belousov 
520619ea042eSKonstantin Belousov pmap_t
520719ea042eSKonstantin Belousov vm_map_pmap_KBI(vm_map_t map)
520819ea042eSKonstantin Belousov {
520919ea042eSKonstantin Belousov 
521019ea042eSKonstantin Belousov 	return (map->pmap);
521119ea042eSKonstantin Belousov }
521219ea042eSKonstantin Belousov 
5213a7752896SMark Johnston bool
5214a7752896SMark Johnston vm_map_range_valid_KBI(vm_map_t map, vm_offset_t start, vm_offset_t end)
5215a7752896SMark Johnston {
5216a7752896SMark Johnston 
5217a7752896SMark Johnston 	return (vm_map_range_valid(map, start, end));
5218a7752896SMark Johnston }
5219a7752896SMark Johnston 
5220721899b1SDoug Moore #ifdef INVARIANTS
5221721899b1SDoug Moore static void
5222461587dcSDoug Moore _vm_map_assert_consistent(vm_map_t map, int check)
5223721899b1SDoug Moore {
5224721899b1SDoug Moore 	vm_map_entry_t entry, prev;
5225c1ad5342SDoug Moore 	vm_map_entry_t cur, header, lbound, ubound;
5226721899b1SDoug Moore 	vm_size_t max_left, max_right;
5227721899b1SDoug Moore 
522885b7bedbSDoug Moore #ifdef DIAGNOSTIC
522985b7bedbSDoug Moore 	++map->nupdates;
523085b7bedbSDoug Moore #endif
5231461587dcSDoug Moore 	if (enable_vmmap_check != check)
5232721899b1SDoug Moore 		return;
5233721899b1SDoug Moore 
5234c1ad5342SDoug Moore 	header = prev = &map->header;
5235721899b1SDoug Moore 	VM_MAP_ENTRY_FOREACH(entry, map) {
5236721899b1SDoug Moore 		KASSERT(prev->end <= entry->start,
5237721899b1SDoug Moore 		    ("map %p prev->end = %jx, start = %jx", map,
5238721899b1SDoug Moore 		    (uintmax_t)prev->end, (uintmax_t)entry->start));
5239721899b1SDoug Moore 		KASSERT(entry->start < entry->end,
5240721899b1SDoug Moore 		    ("map %p start = %jx, end = %jx", map,
5241721899b1SDoug Moore 		    (uintmax_t)entry->start, (uintmax_t)entry->end));
5242c1ad5342SDoug Moore 		KASSERT(entry->left == header ||
5243721899b1SDoug Moore 		    entry->left->start < entry->start,
5244721899b1SDoug Moore 		    ("map %p left->start = %jx, start = %jx", map,
5245721899b1SDoug Moore 		    (uintmax_t)entry->left->start, (uintmax_t)entry->start));
5246c1ad5342SDoug Moore 		KASSERT(entry->right == header ||
5247721899b1SDoug Moore 		    entry->start < entry->right->start,
5248721899b1SDoug Moore 		    ("map %p start = %jx, right->start = %jx", map,
5249721899b1SDoug Moore 		    (uintmax_t)entry->start, (uintmax_t)entry->right->start));
5250c1ad5342SDoug Moore 		cur = map->root;
5251c1ad5342SDoug Moore 		lbound = ubound = header;
5252c1ad5342SDoug Moore 		for (;;) {
5253c1ad5342SDoug Moore 			if (entry->start < cur->start) {
5254c1ad5342SDoug Moore 				ubound = cur;
5255c1ad5342SDoug Moore 				cur = cur->left;
5256c1ad5342SDoug Moore 				KASSERT(cur != lbound,
5257c1ad5342SDoug Moore 				    ("map %p cannot find %jx",
5258c0829bb1SMark Johnston 				    map, (uintmax_t)entry->start));
5259c1ad5342SDoug Moore 			} else if (cur->end <= entry->start) {
5260c1ad5342SDoug Moore 				lbound = cur;
5261c1ad5342SDoug Moore 				cur = cur->right;
5262c1ad5342SDoug Moore 				KASSERT(cur != ubound,
5263c1ad5342SDoug Moore 				    ("map %p cannot find %jx",
5264c0829bb1SMark Johnston 				    map, (uintmax_t)entry->start));
5265c1ad5342SDoug Moore 			} else {
5266c1ad5342SDoug Moore 				KASSERT(cur == entry,
5267c1ad5342SDoug Moore 				    ("map %p cannot find %jx",
5268c0829bb1SMark Johnston 				    map, (uintmax_t)entry->start));
5269c1ad5342SDoug Moore 				break;
5270c1ad5342SDoug Moore 			}
5271c1ad5342SDoug Moore 		}
5272c1ad5342SDoug Moore 		max_left = vm_map_entry_max_free_left(entry, lbound);
5273c1ad5342SDoug Moore 		max_right = vm_map_entry_max_free_right(entry, ubound);
5274c1ad5342SDoug Moore 		KASSERT(entry->max_free == vm_size_max(max_left, max_right),
5275721899b1SDoug Moore 		    ("map %p max = %jx, max_left = %jx, max_right = %jx", map,
5276721899b1SDoug Moore 		    (uintmax_t)entry->max_free,
5277721899b1SDoug Moore 		    (uintmax_t)max_left, (uintmax_t)max_right));
5278721899b1SDoug Moore 		prev = entry;
5279721899b1SDoug Moore 	}
5280721899b1SDoug Moore 	KASSERT(prev->end <= entry->start,
5281721899b1SDoug Moore 	    ("map %p prev->end = %jx, start = %jx", map,
5282721899b1SDoug Moore 	    (uintmax_t)prev->end, (uintmax_t)entry->start));
5283721899b1SDoug Moore }
5284721899b1SDoug Moore #endif
5285721899b1SDoug Moore 
5286c7c34a24SBruce Evans #include "opt_ddb.h"
5287c3cb3e12SDavid Greenman #ifdef DDB
5288c7c34a24SBruce Evans #include <sys/kernel.h>
5289c7c34a24SBruce Evans 
5290c7c34a24SBruce Evans #include <ddb/ddb.h>
5291c7c34a24SBruce Evans 
52922ebcd458SAttilio Rao static void
52932ebcd458SAttilio Rao vm_map_print(vm_map_t map)
5294df8bae1dSRodney W. Grimes {
529577131528SDoug Moore 	vm_map_entry_t entry, prev;
5296c7c34a24SBruce Evans 
5297e5f251d2SAlan Cox 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
5298e5f251d2SAlan Cox 	    (void *)map,
5299101eeb7fSBruce Evans 	    (void *)map->pmap, map->nentries, map->timestamp);
5300df8bae1dSRodney W. Grimes 
5301c7c34a24SBruce Evans 	db_indent += 2;
5302721899b1SDoug Moore 	prev = &map->header;
5303721899b1SDoug Moore 	VM_MAP_ENTRY_FOREACH(entry, map) {
530419bd0d9cSKonstantin Belousov 		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
530519bd0d9cSKonstantin Belousov 		    (void *)entry, (void *)entry->start, (void *)entry->end,
530619bd0d9cSKonstantin Belousov 		    entry->eflags);
5307e5f251d2SAlan Cox 		{
5308eaa17d42SRyan Libby 			static const char * const inheritance_name[4] =
5309df8bae1dSRodney W. Grimes 			{"share", "copy", "none", "donate_copy"};
53100d94caffSDavid Greenman 
531195e5e988SJohn Dyson 			db_iprintf(" prot=%x/%x/%s",
5312df8bae1dSRodney W. Grimes 			    entry->protection,
5313df8bae1dSRodney W. Grimes 			    entry->max_protection,
531477131528SDoug Moore 			    inheritance_name[(int)(unsigned char)
531577131528SDoug Moore 			    entry->inheritance]);
5316df8bae1dSRodney W. Grimes 			if (entry->wired_count != 0)
531795e5e988SJohn Dyson 				db_printf(", wired");
5318df8bae1dSRodney W. Grimes 		}
53199fdfe602SMatthew Dillon 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
5320cd034a5bSMaxime Henrion 			db_printf(", share=%p, offset=0x%jx\n",
53219fdfe602SMatthew Dillon 			    (void *)entry->object.sub_map,
5322cd034a5bSMaxime Henrion 			    (uintmax_t)entry->offset);
532377131528SDoug Moore 			if (prev == &map->header ||
532477131528SDoug Moore 			    prev->object.sub_map !=
532577131528SDoug Moore 				entry->object.sub_map) {
5326c7c34a24SBruce Evans 				db_indent += 2;
53272ebcd458SAttilio Rao 				vm_map_print((vm_map_t)entry->object.sub_map);
5328c7c34a24SBruce Evans 				db_indent -= 2;
5329df8bae1dSRodney W. Grimes 			}
53300d94caffSDavid Greenman 		} else {
5331ef694c1aSEdward Tomasz Napierala 			if (entry->cred != NULL)
5332ef694c1aSEdward Tomasz Napierala 				db_printf(", ruid %d", entry->cred->cr_ruid);
5333cd034a5bSMaxime Henrion 			db_printf(", object=%p, offset=0x%jx",
5334101eeb7fSBruce Evans 			    (void *)entry->object.vm_object,
5335cd034a5bSMaxime Henrion 			    (uintmax_t)entry->offset);
5336ef694c1aSEdward Tomasz Napierala 			if (entry->object.vm_object && entry->object.vm_object->cred)
5337ef694c1aSEdward Tomasz Napierala 				db_printf(", obj ruid %d charge %jx",
5338ef694c1aSEdward Tomasz Napierala 				    entry->object.vm_object->cred->cr_ruid,
53393364c323SKonstantin Belousov 				    (uintmax_t)entry->object.vm_object->charge);
5340afa07f7eSJohn Dyson 			if (entry->eflags & MAP_ENTRY_COW)
5341c7c34a24SBruce Evans 				db_printf(", copy (%s)",
5342afa07f7eSJohn Dyson 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
5343c7c34a24SBruce Evans 			db_printf("\n");
5344df8bae1dSRodney W. Grimes 
534577131528SDoug Moore 			if (prev == &map->header ||
534677131528SDoug Moore 			    prev->object.vm_object !=
534777131528SDoug Moore 				entry->object.vm_object) {
5348c7c34a24SBruce Evans 				db_indent += 2;
5349101eeb7fSBruce Evans 				vm_object_print((db_expr_t)(intptr_t)
5350101eeb7fSBruce Evans 						entry->object.vm_object,
535144bbc3b7SKonstantin Belousov 						0, 0, (char *)0);
5352c7c34a24SBruce Evans 				db_indent -= 2;
5353df8bae1dSRodney W. Grimes 			}
5354df8bae1dSRodney W. Grimes 		}
5355721899b1SDoug Moore 		prev = entry;
5356df8bae1dSRodney W. Grimes 	}
5357c7c34a24SBruce Evans 	db_indent -= 2;
5358df8bae1dSRodney W. Grimes }
535995e5e988SJohn Dyson 
53602ebcd458SAttilio Rao DB_SHOW_COMMAND(map, map)
53612ebcd458SAttilio Rao {
53622ebcd458SAttilio Rao 
53632ebcd458SAttilio Rao 	if (!have_addr) {
53642ebcd458SAttilio Rao 		db_printf("usage: show map <addr>\n");
53652ebcd458SAttilio Rao 		return;
53662ebcd458SAttilio Rao 	}
53672ebcd458SAttilio Rao 	vm_map_print((vm_map_t)addr);
53682ebcd458SAttilio Rao }
536995e5e988SJohn Dyson 
537095e5e988SJohn Dyson DB_SHOW_COMMAND(procvm, procvm)
537195e5e988SJohn Dyson {
537295e5e988SJohn Dyson 	struct proc *p;
537395e5e988SJohn Dyson 
537495e5e988SJohn Dyson 	if (have_addr) {
5375a9546a6bSJohn Baldwin 		p = db_lookup_proc(addr);
537695e5e988SJohn Dyson 	} else {
537795e5e988SJohn Dyson 		p = curproc;
537895e5e988SJohn Dyson 	}
537995e5e988SJohn Dyson 
5380ac1e407bSBruce Evans 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
5381ac1e407bSBruce Evans 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
5382b1028ad1SLuoqi Chen 	    (void *)vmspace_pmap(p->p_vmspace));
538395e5e988SJohn Dyson 
53842ebcd458SAttilio Rao 	vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
538595e5e988SJohn Dyson }
538695e5e988SJohn Dyson 
5387c7c34a24SBruce Evans #endif /* DDB */
5388