xref: /freebsd/sys/vm/vm_map.c (revision bb9e2184f09f42be6af82f28a50fe1914c670c32)
160727d8bSWarner Losh /*-
2796df753SPedro F. Giffuni  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
8df8bae1dSRodney W. Grimes  * The Mach Operating System project at Carnegie-Mellon University.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  *
343c4dd356SDavid Greenman  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  *
37df8bae1dSRodney W. Grimes  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
38df8bae1dSRodney W. Grimes  * All rights reserved.
39df8bae1dSRodney W. Grimes  *
40df8bae1dSRodney W. Grimes  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
41df8bae1dSRodney W. Grimes  *
42df8bae1dSRodney W. Grimes  * Permission to use, copy, modify and distribute this software and
43df8bae1dSRodney W. Grimes  * its documentation is hereby granted, provided that both the copyright
44df8bae1dSRodney W. Grimes  * notice and this permission notice appear in all copies of the
45df8bae1dSRodney W. Grimes  * software, derivative works or modified versions, and any portions
46df8bae1dSRodney W. Grimes  * thereof, and that both notices appear in supporting documentation.
47df8bae1dSRodney W. Grimes  *
48df8bae1dSRodney W. Grimes  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49df8bae1dSRodney W. Grimes  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50df8bae1dSRodney W. Grimes  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51df8bae1dSRodney W. Grimes  *
52df8bae1dSRodney W. Grimes  * Carnegie Mellon requests users of this software to return to
53df8bae1dSRodney W. Grimes  *
54df8bae1dSRodney W. Grimes  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
55df8bae1dSRodney W. Grimes  *  School of Computer Science
56df8bae1dSRodney W. Grimes  *  Carnegie Mellon University
57df8bae1dSRodney W. Grimes  *  Pittsburgh PA 15213-3890
58df8bae1dSRodney W. Grimes  *
59df8bae1dSRodney W. Grimes  * any improvements or extensions that they make and grant Carnegie the
60df8bae1dSRodney W. Grimes  * rights to redistribute these changes.
61df8bae1dSRodney W. Grimes  */
62df8bae1dSRodney W. Grimes 
63df8bae1dSRodney W. Grimes /*
64df8bae1dSRodney W. Grimes  *	Virtual memory mapping module.
65df8bae1dSRodney W. Grimes  */
66df8bae1dSRodney W. Grimes 
67874651b1SDavid E. O'Brien #include <sys/cdefs.h>
68874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
69874651b1SDavid E. O'Brien 
70df8bae1dSRodney W. Grimes #include <sys/param.h>
71df8bae1dSRodney W. Grimes #include <sys/systm.h>
729a6d144fSKonstantin Belousov #include <sys/kernel.h>
7361d80e90SJohn Baldwin #include <sys/ktr.h>
74fb919e4dSMark Murray #include <sys/lock.h>
75fb919e4dSMark Murray #include <sys/mutex.h>
76b5e8ce9fSBruce Evans #include <sys/proc.h>
77efeaf95aSDavid Greenman #include <sys/vmmeter.h>
78867a482dSJohn Dyson #include <sys/mman.h>
791efb74fbSJohn Dyson #include <sys/vnode.h>
801ba5ad42SEdward Tomasz Napierala #include <sys/racct.h>
812267af78SJulian Elischer #include <sys/resourcevar.h>
8289f6b863SAttilio Rao #include <sys/rwlock.h>
833fde38dfSMike Silbersack #include <sys/file.h>
849a6d144fSKonstantin Belousov #include <sys/sysctl.h>
8505ba50f5SJake Burkholder #include <sys/sysent.h>
863db161e0SMatthew Dillon #include <sys/shm.h>
87df8bae1dSRodney W. Grimes 
88df8bae1dSRodney W. Grimes #include <vm/vm.h>
89efeaf95aSDavid Greenman #include <vm/vm_param.h>
90efeaf95aSDavid Greenman #include <vm/pmap.h>
91efeaf95aSDavid Greenman #include <vm/vm_map.h>
92df8bae1dSRodney W. Grimes #include <vm/vm_page.h>
9354a3a114SMark Johnston #include <vm/vm_pageout.h>
94df8bae1dSRodney W. Grimes #include <vm/vm_object.h>
9547221757SJohn Dyson #include <vm/vm_pager.h>
9626f9a767SRodney W. Grimes #include <vm/vm_kern.h>
97efeaf95aSDavid Greenman #include <vm/vm_extern.h>
9884110e7eSKonstantin Belousov #include <vm/vnode_pager.h>
9921cd6e62SSeigo Tanimura #include <vm/swap_pager.h>
100670d17b5SJeff Roberson #include <vm/uma.h>
101df8bae1dSRodney W. Grimes 
102df8bae1dSRodney W. Grimes /*
103df8bae1dSRodney W. Grimes  *	Virtual memory maps provide for the mapping, protection,
104df8bae1dSRodney W. Grimes  *	and sharing of virtual memory objects.  In addition,
105df8bae1dSRodney W. Grimes  *	this module provides for an efficient virtual copy of
106df8bae1dSRodney W. Grimes  *	memory from one map to another.
107df8bae1dSRodney W. Grimes  *
108df8bae1dSRodney W. Grimes  *	Synchronization is required prior to most operations.
109df8bae1dSRodney W. Grimes  *
110df8bae1dSRodney W. Grimes  *	Maps consist of an ordered doubly-linked list of simple
111e2abaaaaSAlan Cox  *	entries; a self-adjusting binary search tree of these
112e2abaaaaSAlan Cox  *	entries is used to speed up lookups.
113df8bae1dSRodney W. Grimes  *
114956f3135SPhilippe Charnier  *	Since portions of maps are specified by start/end addresses,
115df8bae1dSRodney W. Grimes  *	which may not align with existing map entries, all
116df8bae1dSRodney W. Grimes  *	routines merely "clip" entries to these start/end values.
117df8bae1dSRodney W. Grimes  *	[That is, an entry is split into two, bordering at a
118df8bae1dSRodney W. Grimes  *	start or end value.]  Note that these clippings may not
119df8bae1dSRodney W. Grimes  *	always be necessary (as the two resulting entries are then
120df8bae1dSRodney W. Grimes  *	not changed); however, the clipping is done for convenience.
121df8bae1dSRodney W. Grimes  *
122df8bae1dSRodney W. Grimes  *	As mentioned above, virtual copy operations are performed
123ad5fca3bSAlan Cox  *	by copying VM object references from one map to
124df8bae1dSRodney W. Grimes  *	another, and then marking both regions as copy-on-write.
125df8bae1dSRodney W. Grimes  */
126df8bae1dSRodney W. Grimes 
1273a92e5d5SAlan Cox static struct mtx map_sleep_mtx;
1288355f576SJeff Roberson static uma_zone_t mapentzone;
1298355f576SJeff Roberson static uma_zone_t kmapentzone;
1308355f576SJeff Roberson static uma_zone_t mapzone;
1318355f576SJeff Roberson static uma_zone_t vmspace_zone;
132b23f72e9SBrian Feldman static int vmspace_zinit(void *mem, int size, int flags);
133b23f72e9SBrian Feldman static int vm_map_zinit(void *mem, int ize, int flags);
13492351f16SAlan Cox static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
13592351f16SAlan Cox     vm_offset_t max);
1360b367bd8SKonstantin Belousov static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
137655c3490SKonstantin Belousov static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
13803462509SAlan Cox static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
13919bd0d9cSKonstantin Belousov static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
14019bd0d9cSKonstantin Belousov     vm_map_entry_t gap_entry);
141077ec27cSAlan Cox static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
142077ec27cSAlan Cox     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
1438355f576SJeff Roberson #ifdef INVARIANTS
1448355f576SJeff Roberson static void vm_map_zdtor(void *mem, int size, void *arg);
1458355f576SJeff Roberson static void vmspace_zdtor(void *mem, int size, void *arg);
1468355f576SJeff Roberson #endif
1474648ba0aSKonstantin Belousov static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
1484648ba0aSKonstantin Belousov     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
1494648ba0aSKonstantin Belousov     int cow);
15066cd575bSAlan Cox static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
15166cd575bSAlan Cox     vm_offset_t failed_addr);
152b18bfc3dSJohn Dyson 
153ef694c1aSEdward Tomasz Napierala #define	ENTRY_CHARGED(e) ((e)->cred != NULL || \
154ef694c1aSEdward Tomasz Napierala     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
1553364c323SKonstantin Belousov      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
1563364c323SKonstantin Belousov 
15757051fdcSTor Egge /*
15857051fdcSTor Egge  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
15957051fdcSTor Egge  * stable.
16057051fdcSTor Egge  */
16157051fdcSTor Egge #define PROC_VMSPACE_LOCK(p) do { } while (0)
16257051fdcSTor Egge #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
16357051fdcSTor Egge 
164d239bd3cSKonstantin Belousov /*
165d239bd3cSKonstantin Belousov  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
166d239bd3cSKonstantin Belousov  *
167d239bd3cSKonstantin Belousov  *	Asserts that the starting and ending region
168d239bd3cSKonstantin Belousov  *	addresses fall within the valid range of the map.
169d239bd3cSKonstantin Belousov  */
170d239bd3cSKonstantin Belousov #define	VM_MAP_RANGE_CHECK(map, start, end)		\
171d239bd3cSKonstantin Belousov 		{					\
172d239bd3cSKonstantin Belousov 		if (start < vm_map_min(map))		\
173d239bd3cSKonstantin Belousov 			start = vm_map_min(map);	\
174d239bd3cSKonstantin Belousov 		if (end > vm_map_max(map))		\
175d239bd3cSKonstantin Belousov 			end = vm_map_max(map);		\
176d239bd3cSKonstantin Belousov 		if (start > end)			\
177d239bd3cSKonstantin Belousov 			start = end;			\
178d239bd3cSKonstantin Belousov 		}
179d239bd3cSKonstantin Belousov 
1806fecb26bSKonstantin Belousov /*
1816fecb26bSKonstantin Belousov  *	vm_map_startup:
1826fecb26bSKonstantin Belousov  *
1836fecb26bSKonstantin Belousov  *	Initialize the vm_map module.  Must be called before
1846fecb26bSKonstantin Belousov  *	any other vm_map routines.
1856fecb26bSKonstantin Belousov  *
1866fecb26bSKonstantin Belousov  *	Map and entry structures are allocated from the general
1876fecb26bSKonstantin Belousov  *	purpose memory pool with some exceptions:
1886fecb26bSKonstantin Belousov  *
1896fecb26bSKonstantin Belousov  *	- The kernel map and kmem submap are allocated statically.
1906fecb26bSKonstantin Belousov  *	- Kernel map entries are allocated out of a static pool.
1916fecb26bSKonstantin Belousov  *
1926fecb26bSKonstantin Belousov  *	These restrictions are necessary since malloc() uses the
1936fecb26bSKonstantin Belousov  *	maps and requires map entries.
1946fecb26bSKonstantin Belousov  */
1956fecb26bSKonstantin Belousov 
1960d94caffSDavid Greenman void
1971b40f8c0SMatthew Dillon vm_map_startup(void)
198df8bae1dSRodney W. Grimes {
1993a92e5d5SAlan Cox 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
2008355f576SJeff Roberson 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
2018355f576SJeff Roberson #ifdef INVARIANTS
2028355f576SJeff Roberson 	    vm_map_zdtor,
2038355f576SJeff Roberson #else
2048355f576SJeff Roberson 	    NULL,
2058355f576SJeff Roberson #endif
206f872f6eaSAlan Cox 	    vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
2078355f576SJeff Roberson 	uma_prealloc(mapzone, MAX_KMAP);
208670d17b5SJeff Roberson 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
20918aa2de5SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
21018aa2de5SJeff Roberson 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
211670d17b5SJeff Roberson 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
212670d17b5SJeff Roberson 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2135df87b21SJeff Roberson 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
2145df87b21SJeff Roberson #ifdef INVARIANTS
2155df87b21SJeff Roberson 	    vmspace_zdtor,
2165df87b21SJeff Roberson #else
2175df87b21SJeff Roberson 	    NULL,
2185df87b21SJeff Roberson #endif
219f872f6eaSAlan Cox 	    vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
2208355f576SJeff Roberson }
2218355f576SJeff Roberson 
222b23f72e9SBrian Feldman static int
223b23f72e9SBrian Feldman vmspace_zinit(void *mem, int size, int flags)
2248355f576SJeff Roberson {
2258355f576SJeff Roberson 	struct vmspace *vm;
2268355f576SJeff Roberson 
2278355f576SJeff Roberson 	vm = (struct vmspace *)mem;
2288355f576SJeff Roberson 
22989b57fcfSKonstantin Belousov 	vm->vm_map.pmap = NULL;
230b23f72e9SBrian Feldman 	(void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
231e68c64f0SKonstantin Belousov 	PMAP_LOCK_INIT(vmspace_pmap(vm));
232b23f72e9SBrian Feldman 	return (0);
2338355f576SJeff Roberson }
2348355f576SJeff Roberson 
235b23f72e9SBrian Feldman static int
236b23f72e9SBrian Feldman vm_map_zinit(void *mem, int size, int flags)
2378355f576SJeff Roberson {
2388355f576SJeff Roberson 	vm_map_t map;
2398355f576SJeff Roberson 
2408355f576SJeff Roberson 	map = (vm_map_t)mem;
241763d9566STim Kientzle 	memset(map, 0, sizeof(*map));
242e30df26eSAlan Cox 	mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK);
243e30df26eSAlan Cox 	sx_init(&map->lock, "vm map (user)");
244b23f72e9SBrian Feldman 	return (0);
2458355f576SJeff Roberson }
2468355f576SJeff Roberson 
2478355f576SJeff Roberson #ifdef INVARIANTS
2488355f576SJeff Roberson static void
2498355f576SJeff Roberson vmspace_zdtor(void *mem, int size, void *arg)
2508355f576SJeff Roberson {
2518355f576SJeff Roberson 	struct vmspace *vm;
2528355f576SJeff Roberson 
2538355f576SJeff Roberson 	vm = (struct vmspace *)mem;
2548355f576SJeff Roberson 
2558355f576SJeff Roberson 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
2568355f576SJeff Roberson }
2578355f576SJeff Roberson static void
2588355f576SJeff Roberson vm_map_zdtor(void *mem, int size, void *arg)
2598355f576SJeff Roberson {
2608355f576SJeff Roberson 	vm_map_t map;
2618355f576SJeff Roberson 
2628355f576SJeff Roberson 	map = (vm_map_t)mem;
2638355f576SJeff Roberson 	KASSERT(map->nentries == 0,
2648355f576SJeff Roberson 	    ("map %p nentries == %d on free.",
2658355f576SJeff Roberson 	    map, map->nentries));
2668355f576SJeff Roberson 	KASSERT(map->size == 0,
2678355f576SJeff Roberson 	    ("map %p size == %lu on free.",
2689eb6e519SJeff Roberson 	    map, (unsigned long)map->size));
2698355f576SJeff Roberson }
2708355f576SJeff Roberson #endif	/* INVARIANTS */
2718355f576SJeff Roberson 
272df8bae1dSRodney W. Grimes /*
273df8bae1dSRodney W. Grimes  * Allocate a vmspace structure, including a vm_map and pmap,
274df8bae1dSRodney W. Grimes  * and initialize those structures.  The refcnt is set to 1.
27574d1d2b7SNeel Natu  *
27674d1d2b7SNeel Natu  * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit().
277df8bae1dSRodney W. Grimes  */
278df8bae1dSRodney W. Grimes struct vmspace *
27974d1d2b7SNeel Natu vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
280df8bae1dSRodney W. Grimes {
281c0877f10SJohn Dyson 	struct vmspace *vm;
2820d94caffSDavid Greenman 
283a163d034SWarner Losh 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
28474d1d2b7SNeel Natu 	KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
28574d1d2b7SNeel Natu 	if (!pinit(vmspace_pmap(vm))) {
28689b57fcfSKonstantin Belousov 		uma_zfree(vmspace_zone, vm);
28789b57fcfSKonstantin Belousov 		return (NULL);
28889b57fcfSKonstantin Belousov 	}
28921c641b2SJohn Baldwin 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
29092351f16SAlan Cox 	_vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
291df8bae1dSRodney W. Grimes 	vm->vm_refcnt = 1;
2922d8acc0fSJohn Dyson 	vm->vm_shm = NULL;
29351ab6c28SAlan Cox 	vm->vm_swrss = 0;
29451ab6c28SAlan Cox 	vm->vm_tsize = 0;
29551ab6c28SAlan Cox 	vm->vm_dsize = 0;
29651ab6c28SAlan Cox 	vm->vm_ssize = 0;
29751ab6c28SAlan Cox 	vm->vm_taddr = 0;
29851ab6c28SAlan Cox 	vm->vm_daddr = 0;
29951ab6c28SAlan Cox 	vm->vm_maxsaddr = 0;
300df8bae1dSRodney W. Grimes 	return (vm);
301df8bae1dSRodney W. Grimes }
302df8bae1dSRodney W. Grimes 
3034b5c9cf6SEdward Tomasz Napierala #ifdef RACCT
3041ba5ad42SEdward Tomasz Napierala static void
3051ba5ad42SEdward Tomasz Napierala vmspace_container_reset(struct proc *p)
3061ba5ad42SEdward Tomasz Napierala {
3071ba5ad42SEdward Tomasz Napierala 
3081ba5ad42SEdward Tomasz Napierala 	PROC_LOCK(p);
3091ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_DATA, 0);
3101ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_STACK, 0);
3111ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_RSS, 0);
3121ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_MEMLOCK, 0);
3131ba5ad42SEdward Tomasz Napierala 	racct_set(p, RACCT_VMEM, 0);
3141ba5ad42SEdward Tomasz Napierala 	PROC_UNLOCK(p);
3151ba5ad42SEdward Tomasz Napierala }
3164b5c9cf6SEdward Tomasz Napierala #endif
3171ba5ad42SEdward Tomasz Napierala 
31862a59e8fSWarner Losh static inline void
319582ec34cSAlfred Perlstein vmspace_dofree(struct vmspace *vm)
320df8bae1dSRodney W. Grimes {
3210ef12795SAlan Cox 
32221c641b2SJohn Baldwin 	CTR1(KTR_VM, "vmspace_free: %p", vm);
3233db161e0SMatthew Dillon 
3243db161e0SMatthew Dillon 	/*
3253db161e0SMatthew Dillon 	 * Make sure any SysV shm is freed, it might not have been in
3263db161e0SMatthew Dillon 	 * exit1().
3273db161e0SMatthew Dillon 	 */
3283db161e0SMatthew Dillon 	shmexit(vm);
3293db161e0SMatthew Dillon 
33030dcfc09SJohn Dyson 	/*
331df8bae1dSRodney W. Grimes 	 * Lock the map, to wait out all other references to it.
3320d94caffSDavid Greenman 	 * Delete all of the mappings and pages they hold, then call
3330d94caffSDavid Greenman 	 * the pmap module to reclaim anything left.
334df8bae1dSRodney W. Grimes 	 */
335f0165b1cSKonstantin Belousov 	(void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
336f0165b1cSKonstantin Belousov 	    vm_map_max(&vm->vm_map));
3378355f576SJeff Roberson 
3380ef12795SAlan Cox 	pmap_release(vmspace_pmap(vm));
3390ef12795SAlan Cox 	vm->vm_map.pmap = NULL;
3408355f576SJeff Roberson 	uma_zfree(vmspace_zone, vm);
341df8bae1dSRodney W. Grimes }
342582ec34cSAlfred Perlstein 
343582ec34cSAlfred Perlstein void
344582ec34cSAlfred Perlstein vmspace_free(struct vmspace *vm)
345582ec34cSAlfred Perlstein {
346582ec34cSAlfred Perlstein 
347423521aaSRyan Stone 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
348164a37a5SJohn Baldwin 	    "vmspace_free() called");
349423521aaSRyan Stone 
350582ec34cSAlfred Perlstein 	if (vm->vm_refcnt == 0)
351582ec34cSAlfred Perlstein 		panic("vmspace_free: attempt to free already freed vmspace");
352582ec34cSAlfred Perlstein 
3531a587ef2SJohn Baldwin 	if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
354582ec34cSAlfred Perlstein 		vmspace_dofree(vm);
355582ec34cSAlfred Perlstein }
356582ec34cSAlfred Perlstein 
357582ec34cSAlfred Perlstein void
358582ec34cSAlfred Perlstein vmspace_exitfree(struct proc *p)
359582ec34cSAlfred Perlstein {
360334f7061SPeter Wemm 	struct vmspace *vm;
361582ec34cSAlfred Perlstein 
36257051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
363334f7061SPeter Wemm 	vm = p->p_vmspace;
364334f7061SPeter Wemm 	p->p_vmspace = NULL;
36557051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
36657051fdcSTor Egge 	KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
36757051fdcSTor Egge 	vmspace_free(vm);
36857051fdcSTor Egge }
36957051fdcSTor Egge 
37057051fdcSTor Egge void
37157051fdcSTor Egge vmspace_exit(struct thread *td)
37257051fdcSTor Egge {
37357051fdcSTor Egge 	int refcnt;
37457051fdcSTor Egge 	struct vmspace *vm;
37557051fdcSTor Egge 	struct proc *p;
376389d2b6eSMatthew Dillon 
377389d2b6eSMatthew Dillon 	/*
37857051fdcSTor Egge 	 * Release user portion of address space.
37957051fdcSTor Egge 	 * This releases references to vnodes,
38057051fdcSTor Egge 	 * which could cause I/O if the file has been unlinked.
38157051fdcSTor Egge 	 * Need to do this early enough that we can still sleep.
382389d2b6eSMatthew Dillon 	 *
38357051fdcSTor Egge 	 * The last exiting process to reach this point releases as
38457051fdcSTor Egge 	 * much of the environment as it can. vmspace_dofree() is the
38557051fdcSTor Egge 	 * slower fallback in case another process had a temporary
38657051fdcSTor Egge 	 * reference to the vmspace.
387389d2b6eSMatthew Dillon 	 */
38857051fdcSTor Egge 
38957051fdcSTor Egge 	p = td->td_proc;
39057051fdcSTor Egge 	vm = p->p_vmspace;
39157051fdcSTor Egge 	atomic_add_int(&vmspace0.vm_refcnt, 1);
39257051fdcSTor Egge 	refcnt = vm->vm_refcnt;
39383764b44SMateusz Guzik 	do {
39457051fdcSTor Egge 		if (refcnt > 1 && p->p_vmspace != &vmspace0) {
39557051fdcSTor Egge 			/* Switch now since other proc might free vmspace */
39657051fdcSTor Egge 			PROC_VMSPACE_LOCK(p);
39757051fdcSTor Egge 			p->p_vmspace = &vmspace0;
39857051fdcSTor Egge 			PROC_VMSPACE_UNLOCK(p);
39957051fdcSTor Egge 			pmap_activate(td);
40057051fdcSTor Egge 		}
40183764b44SMateusz Guzik 	} while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt - 1));
40257051fdcSTor Egge 	if (refcnt == 1) {
40357051fdcSTor Egge 		if (p->p_vmspace != vm) {
40457051fdcSTor Egge 			/* vmspace not yet freed, switch back */
40557051fdcSTor Egge 			PROC_VMSPACE_LOCK(p);
40657051fdcSTor Egge 			p->p_vmspace = vm;
40757051fdcSTor Egge 			PROC_VMSPACE_UNLOCK(p);
40857051fdcSTor Egge 			pmap_activate(td);
40957051fdcSTor Egge 		}
41057051fdcSTor Egge 		pmap_remove_pages(vmspace_pmap(vm));
41157051fdcSTor Egge 		/* Switch now since this proc will free vmspace */
41257051fdcSTor Egge 		PROC_VMSPACE_LOCK(p);
41357051fdcSTor Egge 		p->p_vmspace = &vmspace0;
41457051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
41557051fdcSTor Egge 		pmap_activate(td);
416334f7061SPeter Wemm 		vmspace_dofree(vm);
417334f7061SPeter Wemm 	}
4184b5c9cf6SEdward Tomasz Napierala #ifdef RACCT
4194b5c9cf6SEdward Tomasz Napierala 	if (racct_enable)
4201ba5ad42SEdward Tomasz Napierala 		vmspace_container_reset(p);
4214b5c9cf6SEdward Tomasz Napierala #endif
42257051fdcSTor Egge }
42357051fdcSTor Egge 
42457051fdcSTor Egge /* Acquire reference to vmspace owned by another process. */
42557051fdcSTor Egge 
42657051fdcSTor Egge struct vmspace *
42757051fdcSTor Egge vmspace_acquire_ref(struct proc *p)
42857051fdcSTor Egge {
42957051fdcSTor Egge 	struct vmspace *vm;
43057051fdcSTor Egge 	int refcnt;
43157051fdcSTor Egge 
43257051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
43357051fdcSTor Egge 	vm = p->p_vmspace;
43457051fdcSTor Egge 	if (vm == NULL) {
43557051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
43657051fdcSTor Egge 		return (NULL);
43757051fdcSTor Egge 	}
43857051fdcSTor Egge 	refcnt = vm->vm_refcnt;
43983764b44SMateusz Guzik 	do {
44057051fdcSTor Egge 		if (refcnt <= 0) { 	/* Avoid 0->1 transition */
44157051fdcSTor Egge 			PROC_VMSPACE_UNLOCK(p);
44257051fdcSTor Egge 			return (NULL);
44357051fdcSTor Egge 		}
44483764b44SMateusz Guzik 	} while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt + 1));
44557051fdcSTor Egge 	if (vm != p->p_vmspace) {
44657051fdcSTor Egge 		PROC_VMSPACE_UNLOCK(p);
44757051fdcSTor Egge 		vmspace_free(vm);
44857051fdcSTor Egge 		return (NULL);
44957051fdcSTor Egge 	}
45057051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
45157051fdcSTor Egge 	return (vm);
45257051fdcSTor Egge }
453df8bae1dSRodney W. Grimes 
4548a4dc40fSJohn Baldwin /*
4558a4dc40fSJohn Baldwin  * Switch between vmspaces in an AIO kernel process.
4568a4dc40fSJohn Baldwin  *
4570b96ca33SJohn Baldwin  * The new vmspace is either the vmspace of a user process obtained
4580b96ca33SJohn Baldwin  * from an active AIO request or the initial vmspace of the AIO kernel
4590b96ca33SJohn Baldwin  * process (when it is idling).  Because user processes will block to
4600b96ca33SJohn Baldwin  * drain any active AIO requests before proceeding in exit() or
4610b96ca33SJohn Baldwin  * execve(), the reference count for vmspaces from AIO requests can
4620b96ca33SJohn Baldwin  * never be 0.  Similarly, AIO kernel processes hold an extra
4630b96ca33SJohn Baldwin  * reference on their initial vmspace for the life of the process.  As
4640b96ca33SJohn Baldwin  * a result, the 'newvm' vmspace always has a non-zero reference
4650b96ca33SJohn Baldwin  * count.  This permits an additional reference on 'newvm' to be
4660b96ca33SJohn Baldwin  * acquired via a simple atomic increment rather than the loop in
4670b96ca33SJohn Baldwin  * vmspace_acquire_ref() above.
4688a4dc40fSJohn Baldwin  */
4698a4dc40fSJohn Baldwin void
4708a4dc40fSJohn Baldwin vmspace_switch_aio(struct vmspace *newvm)
4718a4dc40fSJohn Baldwin {
4728a4dc40fSJohn Baldwin 	struct vmspace *oldvm;
4738a4dc40fSJohn Baldwin 
4748a4dc40fSJohn Baldwin 	/* XXX: Need some way to assert that this is an aio daemon. */
4758a4dc40fSJohn Baldwin 
4768a4dc40fSJohn Baldwin 	KASSERT(newvm->vm_refcnt > 0,
4778a4dc40fSJohn Baldwin 	    ("vmspace_switch_aio: newvm unreferenced"));
4788a4dc40fSJohn Baldwin 
4798a4dc40fSJohn Baldwin 	oldvm = curproc->p_vmspace;
4808a4dc40fSJohn Baldwin 	if (oldvm == newvm)
4818a4dc40fSJohn Baldwin 		return;
4828a4dc40fSJohn Baldwin 
4838a4dc40fSJohn Baldwin 	/*
4848a4dc40fSJohn Baldwin 	 * Point to the new address space and refer to it.
4858a4dc40fSJohn Baldwin 	 */
4868a4dc40fSJohn Baldwin 	curproc->p_vmspace = newvm;
4878a4dc40fSJohn Baldwin 	atomic_add_int(&newvm->vm_refcnt, 1);
4888a4dc40fSJohn Baldwin 
4898a4dc40fSJohn Baldwin 	/* Activate the new mapping. */
4908a4dc40fSJohn Baldwin 	pmap_activate(curthread);
4918a4dc40fSJohn Baldwin 
4928a4dc40fSJohn Baldwin 	vmspace_free(oldvm);
4938a4dc40fSJohn Baldwin }
4948a4dc40fSJohn Baldwin 
4951b40f8c0SMatthew Dillon void
496780b1c09SAlan Cox _vm_map_lock(vm_map_t map, const char *file, int line)
4971b40f8c0SMatthew Dillon {
498bc91c510SAlan Cox 
49993bc4879SAlan Cox 	if (map->system_map)
500ccdf2333SAttilio Rao 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
50112c64974SMaxime Henrion 	else
5029fde98bbSAttilio Rao 		sx_xlock_(&map->lock, file, line);
5031b40f8c0SMatthew Dillon 	map->timestamp++;
5041b40f8c0SMatthew Dillon }
5051b40f8c0SMatthew Dillon 
50678022527SKonstantin Belousov void
50778022527SKonstantin Belousov vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add)
50878022527SKonstantin Belousov {
50978022527SKonstantin Belousov 	vm_object_t object, object1;
51078022527SKonstantin Belousov 	struct vnode *vp;
51178022527SKonstantin Belousov 
51278022527SKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0)
51378022527SKonstantin Belousov 		return;
51478022527SKonstantin Belousov 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
51578022527SKonstantin Belousov 	    ("Submap with execs"));
51678022527SKonstantin Belousov 	object = entry->object.vm_object;
51778022527SKonstantin Belousov 	KASSERT(object != NULL, ("No object for text, entry %p", entry));
51878022527SKonstantin Belousov 	VM_OBJECT_RLOCK(object);
51978022527SKonstantin Belousov 	while ((object1 = object->backing_object) != NULL) {
52078022527SKonstantin Belousov 		VM_OBJECT_RLOCK(object1);
52178022527SKonstantin Belousov 		VM_OBJECT_RUNLOCK(object);
52278022527SKonstantin Belousov 		object = object1;
52378022527SKonstantin Belousov 	}
52478022527SKonstantin Belousov 
52532d2014dSKonstantin Belousov 	vp = NULL;
52632d2014dSKonstantin Belousov 	if (object->type == OBJT_DEAD) {
52778022527SKonstantin Belousov 		/*
52878022527SKonstantin Belousov 		 * For OBJT_DEAD objects, v_writecount was handled in
52978022527SKonstantin Belousov 		 * vnode_pager_dealloc().
53078022527SKonstantin Belousov 		 */
53132d2014dSKonstantin Belousov 	} else if (object->type == OBJT_VNODE) {
53232d2014dSKonstantin Belousov 		vp = object->handle;
53332d2014dSKonstantin Belousov 	} else if (object->type == OBJT_SWAP) {
53432d2014dSKonstantin Belousov 		KASSERT((object->flags & OBJ_TMPFS_NODE) != 0,
53532d2014dSKonstantin Belousov 		    ("vm_map_entry_set_vnode_text: swap and !TMPFS "
53632d2014dSKonstantin Belousov 		    "entry %p, object %p, add %d", entry, object, add));
53732d2014dSKonstantin Belousov 		/*
53832d2014dSKonstantin Belousov 		 * Tmpfs VREG node, which was reclaimed, has
53932d2014dSKonstantin Belousov 		 * OBJ_TMPFS_NODE flag set, but not OBJ_TMPFS.  In
54032d2014dSKonstantin Belousov 		 * this case there is no v_writecount to adjust.
54132d2014dSKonstantin Belousov 		 */
54232d2014dSKonstantin Belousov 		if ((object->flags & OBJ_TMPFS) != 0)
54332d2014dSKonstantin Belousov 			vp = object->un_pager.swp.swp_tmpfs;
54432d2014dSKonstantin Belousov 	} else {
54532d2014dSKonstantin Belousov 		KASSERT(0,
54678022527SKonstantin Belousov 		    ("vm_map_entry_set_vnode_text: wrong object type, "
54778022527SKonstantin Belousov 		    "entry %p, object %p, add %d", entry, object, add));
54832d2014dSKonstantin Belousov 	}
54932d2014dSKonstantin Belousov 	if (vp != NULL) {
550*bb9e2184SKonstantin Belousov 		if (add) {
55178022527SKonstantin Belousov 			VOP_SET_TEXT_CHECKED(vp);
55278022527SKonstantin Belousov 			VM_OBJECT_RUNLOCK(object);
553*bb9e2184SKonstantin Belousov 		} else {
554*bb9e2184SKonstantin Belousov 			vhold(vp);
555*bb9e2184SKonstantin Belousov 			VM_OBJECT_RUNLOCK(object);
556*bb9e2184SKonstantin Belousov 			vn_lock(vp, LK_SHARED | LK_RETRY);
557*bb9e2184SKonstantin Belousov 			VOP_UNSET_TEXT_CHECKED(vp);
558*bb9e2184SKonstantin Belousov 			VOP_UNLOCK(vp, 0);
559*bb9e2184SKonstantin Belousov 			vdrop(vp);
560*bb9e2184SKonstantin Belousov 		}
561*bb9e2184SKonstantin Belousov 	} else {
562*bb9e2184SKonstantin Belousov 		VM_OBJECT_RUNLOCK(object);
563*bb9e2184SKonstantin Belousov 	}
56478022527SKonstantin Belousov }
56578022527SKonstantin Belousov 
5660b367bd8SKonstantin Belousov static void
5670b367bd8SKonstantin Belousov vm_map_process_deferred(void)
5680e0af8ecSBrian Feldman {
5690b367bd8SKonstantin Belousov 	struct thread *td;
5706fbe60faSJohn Baldwin 	vm_map_entry_t entry, next;
57184110e7eSKonstantin Belousov 	vm_object_t object;
572655c3490SKonstantin Belousov 
5730b367bd8SKonstantin Belousov 	td = curthread;
5746fbe60faSJohn Baldwin 	entry = td->td_map_def_user;
5756fbe60faSJohn Baldwin 	td->td_map_def_user = NULL;
5766fbe60faSJohn Baldwin 	while (entry != NULL) {
5776fbe60faSJohn Baldwin 		next = entry->next;
57878022527SKonstantin Belousov 		MPASS((entry->eflags & (MAP_ENTRY_VN_WRITECNT |
57978022527SKonstantin Belousov 		    MAP_ENTRY_VN_EXEC)) != (MAP_ENTRY_VN_WRITECNT |
58078022527SKonstantin Belousov 		    MAP_ENTRY_VN_EXEC));
58184110e7eSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
58284110e7eSKonstantin Belousov 			/*
58384110e7eSKonstantin Belousov 			 * Decrement the object's writemappings and
58484110e7eSKonstantin Belousov 			 * possibly the vnode's v_writecount.
58584110e7eSKonstantin Belousov 			 */
58684110e7eSKonstantin Belousov 			KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
58784110e7eSKonstantin Belousov 			    ("Submap with writecount"));
58884110e7eSKonstantin Belousov 			object = entry->object.vm_object;
58984110e7eSKonstantin Belousov 			KASSERT(object != NULL, ("No object for writecount"));
59084110e7eSKonstantin Belousov 			vnode_pager_release_writecount(object, entry->start,
59184110e7eSKonstantin Belousov 			    entry->end);
59284110e7eSKonstantin Belousov 		}
59378022527SKonstantin Belousov 		vm_map_entry_set_vnode_text(entry, false);
5940b367bd8SKonstantin Belousov 		vm_map_entry_deallocate(entry, FALSE);
5956fbe60faSJohn Baldwin 		entry = next;
5960b367bd8SKonstantin Belousov 	}
5970b367bd8SKonstantin Belousov }
5980b367bd8SKonstantin Belousov 
5990b367bd8SKonstantin Belousov void
6000b367bd8SKonstantin Belousov _vm_map_unlock(vm_map_t map, const char *file, int line)
6010b367bd8SKonstantin Belousov {
6020b367bd8SKonstantin Belousov 
6030b367bd8SKonstantin Belousov 	if (map->system_map)
604ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
6050b367bd8SKonstantin Belousov 	else {
6069fde98bbSAttilio Rao 		sx_xunlock_(&map->lock, file, line);
6070b367bd8SKonstantin Belousov 		vm_map_process_deferred();
608655c3490SKonstantin Belousov 	}
6090e0af8ecSBrian Feldman }
6100e0af8ecSBrian Feldman 
6110e0af8ecSBrian Feldman void
612780b1c09SAlan Cox _vm_map_lock_read(vm_map_t map, const char *file, int line)
6130e0af8ecSBrian Feldman {
614bc91c510SAlan Cox 
61593bc4879SAlan Cox 	if (map->system_map)
616ccdf2333SAttilio Rao 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
61712c64974SMaxime Henrion 	else
6189fde98bbSAttilio Rao 		sx_slock_(&map->lock, file, line);
61936daaecdSAlan Cox }
6200e0af8ecSBrian Feldman 
6210e0af8ecSBrian Feldman void
622780b1c09SAlan Cox _vm_map_unlock_read(vm_map_t map, const char *file, int line)
6230e0af8ecSBrian Feldman {
624bc91c510SAlan Cox 
62536daaecdSAlan Cox 	if (map->system_map)
626ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
6270b367bd8SKonstantin Belousov 	else {
6289fde98bbSAttilio Rao 		sx_sunlock_(&map->lock, file, line);
6290b367bd8SKonstantin Belousov 		vm_map_process_deferred();
6300b367bd8SKonstantin Belousov 	}
63125adb370SBrian Feldman }
63225adb370SBrian Feldman 
633d974f03cSAlan Cox int
634780b1c09SAlan Cox _vm_map_trylock(vm_map_t map, const char *file, int line)
635d974f03cSAlan Cox {
63625adb370SBrian Feldman 	int error;
63725adb370SBrian Feldman 
63836daaecdSAlan Cox 	error = map->system_map ?
639ccdf2333SAttilio Rao 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
6409fde98bbSAttilio Rao 	    !sx_try_xlock_(&map->lock, file, line);
6413a92e5d5SAlan Cox 	if (error == 0)
6423a92e5d5SAlan Cox 		map->timestamp++;
643bc91c510SAlan Cox 	return (error == 0);
6440e0af8ecSBrian Feldman }
6450e0af8ecSBrian Feldman 
6460e0af8ecSBrian Feldman int
64772d97679SDavid Schultz _vm_map_trylock_read(vm_map_t map, const char *file, int line)
64872d97679SDavid Schultz {
64972d97679SDavid Schultz 	int error;
65072d97679SDavid Schultz 
65172d97679SDavid Schultz 	error = map->system_map ?
652ccdf2333SAttilio Rao 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
6539fde98bbSAttilio Rao 	    !sx_try_slock_(&map->lock, file, line);
65472d97679SDavid Schultz 	return (error == 0);
65572d97679SDavid Schultz }
65672d97679SDavid Schultz 
65705a8c414SAlan Cox /*
65805a8c414SAlan Cox  *	_vm_map_lock_upgrade:	[ internal use only ]
65905a8c414SAlan Cox  *
66005a8c414SAlan Cox  *	Tries to upgrade a read (shared) lock on the specified map to a write
66105a8c414SAlan Cox  *	(exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
66205a8c414SAlan Cox  *	non-zero value if the upgrade fails.  If the upgrade fails, the map is
66305a8c414SAlan Cox  *	returned without a read or write lock held.
66405a8c414SAlan Cox  *
66505a8c414SAlan Cox  *	Requires that the map be read locked.
66605a8c414SAlan Cox  */
66772d97679SDavid Schultz int
668780b1c09SAlan Cox _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
6690e0af8ecSBrian Feldman {
67005a8c414SAlan Cox 	unsigned int last_timestamp;
671bc91c510SAlan Cox 
67212c64974SMaxime Henrion 	if (map->system_map) {
673ccdf2333SAttilio Rao 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
67405a8c414SAlan Cox 	} else {
6759fde98bbSAttilio Rao 		if (!sx_try_upgrade_(&map->lock, file, line)) {
67605a8c414SAlan Cox 			last_timestamp = map->timestamp;
6779fde98bbSAttilio Rao 			sx_sunlock_(&map->lock, file, line);
6780b367bd8SKonstantin Belousov 			vm_map_process_deferred();
67905a8c414SAlan Cox 			/*
68005a8c414SAlan Cox 			 * If the map's timestamp does not change while the
68105a8c414SAlan Cox 			 * map is unlocked, then the upgrade succeeds.
68205a8c414SAlan Cox 			 */
6839fde98bbSAttilio Rao 			sx_xlock_(&map->lock, file, line);
68405a8c414SAlan Cox 			if (last_timestamp != map->timestamp) {
6859fde98bbSAttilio Rao 				sx_xunlock_(&map->lock, file, line);
68605a8c414SAlan Cox 				return (1);
68705a8c414SAlan Cox 			}
68805a8c414SAlan Cox 		}
68905a8c414SAlan Cox 	}
690bc91c510SAlan Cox 	map->timestamp++;
691bc91c510SAlan Cox 	return (0);
6920e0af8ecSBrian Feldman }
6930e0af8ecSBrian Feldman 
6940e0af8ecSBrian Feldman void
695780b1c09SAlan Cox _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
6961b40f8c0SMatthew Dillon {
697bc91c510SAlan Cox 
69812c64974SMaxime Henrion 	if (map->system_map) {
699ccdf2333SAttilio Rao 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
70005a8c414SAlan Cox 	} else
7019fde98bbSAttilio Rao 		sx_downgrade_(&map->lock, file, line);
70205a8c414SAlan Cox }
70305a8c414SAlan Cox 
70405a8c414SAlan Cox /*
70505a8c414SAlan Cox  *	vm_map_locked:
70605a8c414SAlan Cox  *
70705a8c414SAlan Cox  *	Returns a non-zero value if the caller holds a write (exclusive) lock
70805a8c414SAlan Cox  *	on the specified map and the value "0" otherwise.
70905a8c414SAlan Cox  */
71005a8c414SAlan Cox int
71105a8c414SAlan Cox vm_map_locked(vm_map_t map)
71205a8c414SAlan Cox {
71305a8c414SAlan Cox 
71405a8c414SAlan Cox 	if (map->system_map)
71505a8c414SAlan Cox 		return (mtx_owned(&map->system_mtx));
71605a8c414SAlan Cox 	else
71705a8c414SAlan Cox 		return (sx_xlocked(&map->lock));
71825adb370SBrian Feldman }
71925adb370SBrian Feldman 
7203a0916b8SKonstantin Belousov #ifdef INVARIANTS
7213a0916b8SKonstantin Belousov static void
7223a0916b8SKonstantin Belousov _vm_map_assert_locked(vm_map_t map, const char *file, int line)
7233a0916b8SKonstantin Belousov {
7243a0916b8SKonstantin Belousov 
7253a0916b8SKonstantin Belousov 	if (map->system_map)
726ccdf2333SAttilio Rao 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
7273a0916b8SKonstantin Belousov 	else
7289fde98bbSAttilio Rao 		sx_assert_(&map->lock, SA_XLOCKED, file, line);
7293a0916b8SKonstantin Belousov }
7303a0916b8SKonstantin Belousov 
7313a0916b8SKonstantin Belousov #define	VM_MAP_ASSERT_LOCKED(map) \
7323a0916b8SKonstantin Belousov     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
7339f701172SKonstantin Belousov 
734c4e5de7eSMark Johnston #ifdef DIAGNOSTIC
735c4e5de7eSMark Johnston static int enable_vmmap_check = 1;
736c4e5de7eSMark Johnston #else
737c4e5de7eSMark Johnston static int enable_vmmap_check = 0;
738c4e5de7eSMark Johnston #endif
739c4e5de7eSMark Johnston SYSCTL_INT(_debug, OID_AUTO, vmmap_check, CTLFLAG_RWTUN,
740c4e5de7eSMark Johnston     &enable_vmmap_check, 0, "Enable vm map consistency checking");
741c4e5de7eSMark Johnston 
7429f701172SKonstantin Belousov static void
7439f701172SKonstantin Belousov _vm_map_assert_consistent(vm_map_t map)
7449f701172SKonstantin Belousov {
74577131528SDoug Moore 	vm_map_entry_t child, entry, prev;
7469f701172SKonstantin Belousov 	vm_size_t max_left, max_right;
7479f701172SKonstantin Belousov 
748c4e5de7eSMark Johnston 	if (!enable_vmmap_check)
749c4e5de7eSMark Johnston 		return;
750c4e5de7eSMark Johnston 
75177131528SDoug Moore 	for (prev = &map->header; (entry = prev->next) != &map->header;
75277131528SDoug Moore 	    prev = entry) {
75377131528SDoug Moore 		KASSERT(prev->end <= entry->start,
7549f701172SKonstantin Belousov 		    ("map %p prev->end = %jx, start = %jx", map,
75577131528SDoug Moore 		    (uintmax_t)prev->end, (uintmax_t)entry->start));
7569f701172SKonstantin Belousov 		KASSERT(entry->start < entry->end,
7579f701172SKonstantin Belousov 		    ("map %p start = %jx, end = %jx", map,
7589f701172SKonstantin Belousov 		    (uintmax_t)entry->start, (uintmax_t)entry->end));
7599f701172SKonstantin Belousov 		KASSERT(entry->end <= entry->next->start,
7609f701172SKonstantin Belousov 		    ("map %p end = %jx, next->start = %jx", map,
7619f701172SKonstantin Belousov 		    (uintmax_t)entry->end, (uintmax_t)entry->next->start));
7629f701172SKonstantin Belousov 		KASSERT(entry->left == NULL ||
7639f701172SKonstantin Belousov 		    entry->left->start < entry->start,
7649f701172SKonstantin Belousov 		    ("map %p left->start = %jx, start = %jx", map,
7659f701172SKonstantin Belousov 		    (uintmax_t)entry->left->start, (uintmax_t)entry->start));
7669f701172SKonstantin Belousov 		KASSERT(entry->right == NULL ||
7679f701172SKonstantin Belousov 		    entry->start < entry->right->start,
7689f701172SKonstantin Belousov 		    ("map %p start = %jx, right->start = %jx", map,
7699f701172SKonstantin Belousov 		    (uintmax_t)entry->start, (uintmax_t)entry->right->start));
7709f701172SKonstantin Belousov 		child = entry->left;
7719f701172SKonstantin Belousov 		max_left = (child != NULL) ? child->max_free :
77277131528SDoug Moore 			entry->start - prev->end;
7739f701172SKonstantin Belousov 		child = entry->right;
7749f701172SKonstantin Belousov 		max_right = (child != NULL) ? child->max_free :
7759f701172SKonstantin Belousov 			entry->next->start - entry->end;
7769f701172SKonstantin Belousov 		KASSERT(entry->max_free == MAX(max_left, max_right),
7779f701172SKonstantin Belousov 		    ("map %p max = %jx, max_left = %jx, max_right = %jx", map,
7789f701172SKonstantin Belousov 		     (uintmax_t)entry->max_free,
7799f701172SKonstantin Belousov 		     (uintmax_t)max_left, (uintmax_t)max_right));
7809f701172SKonstantin Belousov 	}
7819f701172SKonstantin Belousov }
7829f701172SKonstantin Belousov 
7839f701172SKonstantin Belousov #define VM_MAP_ASSERT_CONSISTENT(map) \
7849f701172SKonstantin Belousov     _vm_map_assert_consistent(map)
7853a0916b8SKonstantin Belousov #else
7863a0916b8SKonstantin Belousov #define	VM_MAP_ASSERT_LOCKED(map)
7879f701172SKonstantin Belousov #define VM_MAP_ASSERT_CONSISTENT(map)
788c4e5de7eSMark Johnston #endif /* INVARIANTS */
7893a0916b8SKonstantin Belousov 
790acd9a301SAlan Cox /*
7918304adaaSAlan Cox  *	_vm_map_unlock_and_wait:
7928304adaaSAlan Cox  *
7938304adaaSAlan Cox  *	Atomically releases the lock on the specified map and puts the calling
7948304adaaSAlan Cox  *	thread to sleep.  The calling thread will remain asleep until either
7958304adaaSAlan Cox  *	vm_map_wakeup() is performed on the map or the specified timeout is
7968304adaaSAlan Cox  *	exceeded.
7978304adaaSAlan Cox  *
7988304adaaSAlan Cox  *	WARNING!  This function does not perform deferred deallocations of
7998304adaaSAlan Cox  *	objects and map	entries.  Therefore, the calling thread is expected to
8008304adaaSAlan Cox  *	reacquire the map lock after reawakening and later perform an ordinary
8018304adaaSAlan Cox  *	unlock operation, such as vm_map_unlock(), before completing its
8028304adaaSAlan Cox  *	operation on the map.
803acd9a301SAlan Cox  */
8049688f931SAlan Cox int
8058304adaaSAlan Cox _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
806acd9a301SAlan Cox {
807acd9a301SAlan Cox 
8083a92e5d5SAlan Cox 	mtx_lock(&map_sleep_mtx);
8098304adaaSAlan Cox 	if (map->system_map)
810ccdf2333SAttilio Rao 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
8118304adaaSAlan Cox 	else
8129fde98bbSAttilio Rao 		sx_xunlock_(&map->lock, file, line);
8138304adaaSAlan Cox 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
8148304adaaSAlan Cox 	    timo));
815acd9a301SAlan Cox }
816acd9a301SAlan Cox 
817acd9a301SAlan Cox /*
818acd9a301SAlan Cox  *	vm_map_wakeup:
8198304adaaSAlan Cox  *
8208304adaaSAlan Cox  *	Awaken any threads that have slept on the map using
8218304adaaSAlan Cox  *	vm_map_unlock_and_wait().
822acd9a301SAlan Cox  */
8239688f931SAlan Cox void
824acd9a301SAlan Cox vm_map_wakeup(vm_map_t map)
825acd9a301SAlan Cox {
826acd9a301SAlan Cox 
827b49ecb86SAlan Cox 	/*
8283a92e5d5SAlan Cox 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
8298304adaaSAlan Cox 	 * from being performed (and lost) between the map unlock
8308304adaaSAlan Cox 	 * and the msleep() in _vm_map_unlock_and_wait().
831b49ecb86SAlan Cox 	 */
8323a92e5d5SAlan Cox 	mtx_lock(&map_sleep_mtx);
8333a92e5d5SAlan Cox 	mtx_unlock(&map_sleep_mtx);
834acd9a301SAlan Cox 	wakeup(&map->root);
835acd9a301SAlan Cox }
836acd9a301SAlan Cox 
837a5db445dSMax Laier void
838a5db445dSMax Laier vm_map_busy(vm_map_t map)
839a5db445dSMax Laier {
840a5db445dSMax Laier 
841a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
842a5db445dSMax Laier 	map->busy++;
843a5db445dSMax Laier }
844a5db445dSMax Laier 
845a5db445dSMax Laier void
846a5db445dSMax Laier vm_map_unbusy(vm_map_t map)
847a5db445dSMax Laier {
848a5db445dSMax Laier 
849a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
850a5db445dSMax Laier 	KASSERT(map->busy, ("vm_map_unbusy: not busy"));
851a5db445dSMax Laier 	if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
852a5db445dSMax Laier 		vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
853a5db445dSMax Laier 		wakeup(&map->busy);
854a5db445dSMax Laier 	}
855a5db445dSMax Laier }
856a5db445dSMax Laier 
857a5db445dSMax Laier void
858a5db445dSMax Laier vm_map_wait_busy(vm_map_t map)
859a5db445dSMax Laier {
860a5db445dSMax Laier 
861a5db445dSMax Laier 	VM_MAP_ASSERT_LOCKED(map);
862a5db445dSMax Laier 	while (map->busy) {
863a5db445dSMax Laier 		vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
864a5db445dSMax Laier 		if (map->system_map)
865a5db445dSMax Laier 			msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
866a5db445dSMax Laier 		else
867a5db445dSMax Laier 			sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
868a5db445dSMax Laier 	}
869a5db445dSMax Laier 	map->timestamp++;
870a5db445dSMax Laier }
871a5db445dSMax Laier 
8721b40f8c0SMatthew Dillon long
8731b40f8c0SMatthew Dillon vmspace_resident_count(struct vmspace *vmspace)
8741b40f8c0SMatthew Dillon {
8751b40f8c0SMatthew Dillon 	return pmap_resident_count(vmspace_pmap(vmspace));
8761b40f8c0SMatthew Dillon }
8771b40f8c0SMatthew Dillon 
878ff2b5645SMatthew Dillon /*
879df8bae1dSRodney W. Grimes  *	vm_map_create:
880df8bae1dSRodney W. Grimes  *
881df8bae1dSRodney W. Grimes  *	Creates and returns a new empty VM map with
882df8bae1dSRodney W. Grimes  *	the given physical map structure, and having
883df8bae1dSRodney W. Grimes  *	the given lower and upper address bounds.
884df8bae1dSRodney W. Grimes  */
8850d94caffSDavid Greenman vm_map_t
8861b40f8c0SMatthew Dillon vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
887df8bae1dSRodney W. Grimes {
888c0877f10SJohn Dyson 	vm_map_t result;
889df8bae1dSRodney W. Grimes 
890a163d034SWarner Losh 	result = uma_zalloc(mapzone, M_WAITOK);
89121c641b2SJohn Baldwin 	CTR1(KTR_VM, "vm_map_create: %p", result);
89292351f16SAlan Cox 	_vm_map_init(result, pmap, min, max);
893df8bae1dSRodney W. Grimes 	return (result);
894df8bae1dSRodney W. Grimes }
895df8bae1dSRodney W. Grimes 
896df8bae1dSRodney W. Grimes /*
897df8bae1dSRodney W. Grimes  * Initialize an existing vm_map structure
898df8bae1dSRodney W. Grimes  * such as that in the vmspace structure.
899df8bae1dSRodney W. Grimes  */
9008355f576SJeff Roberson static void
90192351f16SAlan Cox _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
902df8bae1dSRodney W. Grimes {
90321c641b2SJohn Baldwin 
904df8bae1dSRodney W. Grimes 	map->header.next = map->header.prev = &map->header;
9052203c46dSMark Johnston 	map->header.eflags = MAP_ENTRY_HEADER;
9069688f931SAlan Cox 	map->needs_wakeup = FALSE;
9073075778bSJohn Dyson 	map->system_map = 0;
90892351f16SAlan Cox 	map->pmap = pmap;
909f0165b1cSKonstantin Belousov 	map->header.end = min;
910f0165b1cSKonstantin Belousov 	map->header.start = max;
911af7cd0c5SBrian Feldman 	map->flags = 0;
9124e94f402SAlan Cox 	map->root = NULL;
913df8bae1dSRodney W. Grimes 	map->timestamp = 0;
914a5db445dSMax Laier 	map->busy = 0;
915fa50a355SKonstantin Belousov 	map->anon_loc = 0;
916df8bae1dSRodney W. Grimes }
917df8bae1dSRodney W. Grimes 
918a18b1f1dSJason Evans void
91992351f16SAlan Cox vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
920a18b1f1dSJason Evans {
92192351f16SAlan Cox 
92292351f16SAlan Cox 	_vm_map_init(map, pmap, min, max);
923d923c598SAlan Cox 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
92412c64974SMaxime Henrion 	sx_init(&map->lock, "user map");
925a18b1f1dSJason Evans }
926a18b1f1dSJason Evans 
927df8bae1dSRodney W. Grimes /*
928b18bfc3dSJohn Dyson  *	vm_map_entry_dispose:	[ internal use only ]
929b18bfc3dSJohn Dyson  *
930b18bfc3dSJohn Dyson  *	Inverse of vm_map_entry_create.
931b18bfc3dSJohn Dyson  */
93262487bb4SJohn Dyson static void
9331b40f8c0SMatthew Dillon vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
934b18bfc3dSJohn Dyson {
9352b4a2c27SAlan Cox 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
936b18bfc3dSJohn Dyson }
937b18bfc3dSJohn Dyson 
938b18bfc3dSJohn Dyson /*
939df8bae1dSRodney W. Grimes  *	vm_map_entry_create:	[ internal use only ]
940df8bae1dSRodney W. Grimes  *
941df8bae1dSRodney W. Grimes  *	Allocates a VM map entry for insertion.
942b28cb1caSAlfred Perlstein  *	No entry fields are filled in.
943df8bae1dSRodney W. Grimes  */
944f708ef1bSPoul-Henning Kamp static vm_map_entry_t
9451b40f8c0SMatthew Dillon vm_map_entry_create(vm_map_t map)
946df8bae1dSRodney W. Grimes {
9471f6889a1SMatthew Dillon 	vm_map_entry_t new_entry;
9481f6889a1SMatthew Dillon 
9492b4a2c27SAlan Cox 	if (map->system_map)
9502b4a2c27SAlan Cox 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
9512b4a2c27SAlan Cox 	else
952a163d034SWarner Losh 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
9531f6889a1SMatthew Dillon 	if (new_entry == NULL)
9541f6889a1SMatthew Dillon 		panic("vm_map_entry_create: kernel resources exhausted");
9551f6889a1SMatthew Dillon 	return (new_entry);
956df8bae1dSRodney W. Grimes }
957df8bae1dSRodney W. Grimes 
958df8bae1dSRodney W. Grimes /*
959794316a8SAlan Cox  *	vm_map_entry_set_behavior:
960794316a8SAlan Cox  *
961794316a8SAlan Cox  *	Set the expected access behavior, either normal, random, or
962794316a8SAlan Cox  *	sequential.
963794316a8SAlan Cox  */
96462a59e8fSWarner Losh static inline void
965794316a8SAlan Cox vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
966794316a8SAlan Cox {
967794316a8SAlan Cox 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
968794316a8SAlan Cox 	    (behavior & MAP_ENTRY_BEHAV_MASK);
969794316a8SAlan Cox }
970794316a8SAlan Cox 
971794316a8SAlan Cox /*
9725a0879daSDoug Moore  *	vm_map_entry_max_free_{left,right}:
9730164e057SAlan Cox  *
9745a0879daSDoug Moore  *	Compute the size of the largest free gap between two entries,
9755a0879daSDoug Moore  *	one the root of a tree and the other the ancestor of that root
9765a0879daSDoug Moore  *	that is the least or greatest ancestor found on the search path.
9770164e057SAlan Cox  */
9785a0879daSDoug Moore static inline vm_size_t
9795a0879daSDoug Moore vm_map_entry_max_free_left(vm_map_entry_t root, vm_map_entry_t left_ancestor)
9800164e057SAlan Cox {
9810164e057SAlan Cox 
9825a0879daSDoug Moore 	return (root->left != NULL ?
9835a0879daSDoug Moore 	    root->left->max_free : root->start - left_ancestor->end);
9845a0879daSDoug Moore }
9855a0879daSDoug Moore 
9865a0879daSDoug Moore static inline vm_size_t
9875a0879daSDoug Moore vm_map_entry_max_free_right(vm_map_entry_t root, vm_map_entry_t right_ancestor)
9885a0879daSDoug Moore {
9895a0879daSDoug Moore 
9905a0879daSDoug Moore 	return (root->right != NULL ?
9915a0879daSDoug Moore 	    root->right->max_free : right_ancestor->start - root->end);
9920164e057SAlan Cox }
9930164e057SAlan Cox 
9949f701172SKonstantin Belousov #define SPLAY_LEFT_STEP(root, y, rlist, test) do {			\
9955a0879daSDoug Moore 	vm_size_t max_free;						\
9965a0879daSDoug Moore 									\
9975a0879daSDoug Moore 	/*								\
9985a0879daSDoug Moore 	 * Infer root->right->max_free == root->max_free when		\
9995a0879daSDoug Moore 	 * y->max_free < root->max_free || root->max_free == 0.		\
10005a0879daSDoug Moore 	 * Otherwise, look right to find it.				\
10015a0879daSDoug Moore 	 */								\
10029f701172SKonstantin Belousov 	y = root->left;							\
10035a0879daSDoug Moore 	max_free = root->max_free;					\
10045a0879daSDoug Moore 	KASSERT(max_free >= vm_map_entry_max_free_right(root, rlist),	\
10055a0879daSDoug Moore 	    ("%s: max_free invariant fails", __func__));		\
10065a0879daSDoug Moore 	if (y == NULL ? max_free > 0 : max_free - 1 < y->max_free)	\
10075a0879daSDoug Moore 		max_free = vm_map_entry_max_free_right(root, rlist);	\
10089f701172SKonstantin Belousov 	if (y != NULL && (test)) {					\
10099f701172SKonstantin Belousov 		/* Rotate right and make y root. */			\
10109f701172SKonstantin Belousov 		root->left = y->right;					\
10119f701172SKonstantin Belousov 		y->right = root;					\
10125a0879daSDoug Moore 		if (max_free < y->max_free)				\
10135a0879daSDoug Moore 			root->max_free = max_free = MAX(max_free,	\
10145a0879daSDoug Moore 			    vm_map_entry_max_free_left(root, y));	\
10159f701172SKonstantin Belousov 		root = y;						\
10169f701172SKonstantin Belousov 		y = root->left;						\
10179f701172SKonstantin Belousov 	}								\
10185a0879daSDoug Moore 	/* Copy right->max_free.  Put root on rlist. */			\
10195a0879daSDoug Moore 	root->max_free = max_free;					\
10205a0879daSDoug Moore 	KASSERT(max_free == vm_map_entry_max_free_right(root, rlist),	\
10215a0879daSDoug Moore 	    ("%s: max_free not copied from right", __func__));		\
10229f701172SKonstantin Belousov 	root->left = rlist;						\
10239f701172SKonstantin Belousov 	rlist = root;							\
10249f701172SKonstantin Belousov 	root = y;							\
10259f701172SKonstantin Belousov } while (0)
10269f701172SKonstantin Belousov 
10279f701172SKonstantin Belousov #define SPLAY_RIGHT_STEP(root, y, llist, test) do {			\
10285a0879daSDoug Moore 	vm_size_t max_free;						\
10295a0879daSDoug Moore 									\
10305a0879daSDoug Moore 	/*								\
10315a0879daSDoug Moore 	 * Infer root->left->max_free == root->max_free when		\
10325a0879daSDoug Moore 	 * y->max_free < root->max_free || root->max_free == 0.		\
10335a0879daSDoug Moore 	 * Otherwise, look left to find it.				\
10345a0879daSDoug Moore 	 */								\
10359f701172SKonstantin Belousov 	y = root->right;						\
10365a0879daSDoug Moore 	max_free = root->max_free;					\
10375a0879daSDoug Moore 	KASSERT(max_free >= vm_map_entry_max_free_left(root, llist),	\
10385a0879daSDoug Moore 	    ("%s: max_free invariant fails", __func__));		\
10395a0879daSDoug Moore 	if (y == NULL ? max_free > 0 : max_free - 1 < y->max_free)	\
10405a0879daSDoug Moore 		max_free = vm_map_entry_max_free_left(root, llist);	\
10419f701172SKonstantin Belousov 	if (y != NULL && (test)) {					\
10429f701172SKonstantin Belousov 		/* Rotate left and make y root. */			\
10439f701172SKonstantin Belousov 		root->right = y->left;					\
10449f701172SKonstantin Belousov 		y->left = root;						\
10455a0879daSDoug Moore 		if (max_free < y->max_free)				\
10465a0879daSDoug Moore 			root->max_free = max_free = MAX(max_free,	\
10475a0879daSDoug Moore 			    vm_map_entry_max_free_right(root, y));	\
10489f701172SKonstantin Belousov 		root = y;						\
10499f701172SKonstantin Belousov 		y = root->right;					\
10509f701172SKonstantin Belousov 	}								\
10515a0879daSDoug Moore 	/* Copy left->max_free.  Put root on llist. */			\
10525a0879daSDoug Moore 	root->max_free = max_free;					\
10535a0879daSDoug Moore 	KASSERT(max_free == vm_map_entry_max_free_left(root, llist),	\
10545a0879daSDoug Moore 	    ("%s: max_free not copied from left", __func__));		\
10559f701172SKonstantin Belousov 	root->right = llist;						\
10569f701172SKonstantin Belousov 	llist = root;							\
10579f701172SKonstantin Belousov 	root = y;							\
10589f701172SKonstantin Belousov } while (0)
10599f701172SKonstantin Belousov 
10600164e057SAlan Cox /*
10619f701172SKonstantin Belousov  * Walk down the tree until we find addr or a NULL pointer where addr would go,
10629f701172SKonstantin Belousov  * breaking off left and right subtrees of nodes less than, or greater than
10639f701172SKonstantin Belousov  * addr.  Treat pointers to nodes with max_free < length as NULL pointers.
10649f701172SKonstantin Belousov  * llist and rlist are the two sides in reverse order (bottom-up), with llist
10659f701172SKonstantin Belousov  * linked by the right pointer and rlist linked by the left pointer in the
10665a0879daSDoug Moore  * vm_map_entry, and both lists terminated by &map->header.  This function, and
10675a0879daSDoug Moore  * the subsequent call to vm_map_splay_merge, rely on the start and end address
10685a0879daSDoug Moore  * values in &map->header.
10694e94f402SAlan Cox  */
10704e94f402SAlan Cox static vm_map_entry_t
10715a0879daSDoug Moore vm_map_splay_split(vm_map_t map, vm_offset_t addr, vm_size_t length,
10725a0879daSDoug Moore     vm_map_entry_t *out_llist, vm_map_entry_t *out_rlist)
10734e94f402SAlan Cox {
10745a0879daSDoug Moore 	vm_map_entry_t llist, rlist, root, y;
10754e94f402SAlan Cox 
10765a0879daSDoug Moore 	llist = rlist = &map->header;
10775a0879daSDoug Moore 	root = map->root;
10789f701172SKonstantin Belousov 	while (root != NULL && root->max_free >= length) {
10795a0879daSDoug Moore 		KASSERT(llist->end <= root->start && root->end <= rlist->start,
10805a0879daSDoug Moore 		    ("%s: root not within tree bounds", __func__));
10810164e057SAlan Cox 		if (addr < root->start) {
10829f701172SKonstantin Belousov 			SPLAY_LEFT_STEP(root, y, rlist,
10839f701172SKonstantin Belousov 			    y->max_free >= length && addr < y->start);
10847438d60bSAlan Cox 		} else if (addr >= root->end) {
10859f701172SKonstantin Belousov 			SPLAY_RIGHT_STEP(root, y, llist,
10869f701172SKonstantin Belousov 			    y->max_free >= length && addr >= y->end);
10877438d60bSAlan Cox 		} else
10887438d60bSAlan Cox 			break;
10890164e057SAlan Cox 	}
10909f701172SKonstantin Belousov 	*out_llist = llist;
10919f701172SKonstantin Belousov 	*out_rlist = rlist;
10929f701172SKonstantin Belousov 	return (root);
10939f701172SKonstantin Belousov }
10949f701172SKonstantin Belousov 
10959f701172SKonstantin Belousov static void
10969f701172SKonstantin Belousov vm_map_splay_findnext(vm_map_entry_t root, vm_map_entry_t *iolist)
10979f701172SKonstantin Belousov {
10989f701172SKonstantin Belousov 	vm_map_entry_t rlist, y;
10999f701172SKonstantin Belousov 
11009f701172SKonstantin Belousov 	root = root->right;
11019f701172SKonstantin Belousov 	rlist = *iolist;
11029f701172SKonstantin Belousov 	while (root != NULL)
11039f701172SKonstantin Belousov 		SPLAY_LEFT_STEP(root, y, rlist, true);
11049f701172SKonstantin Belousov 	*iolist = rlist;
11059f701172SKonstantin Belousov }
11069f701172SKonstantin Belousov 
11079f701172SKonstantin Belousov static void
11089f701172SKonstantin Belousov vm_map_splay_findprev(vm_map_entry_t root, vm_map_entry_t *iolist)
11099f701172SKonstantin Belousov {
11109f701172SKonstantin Belousov 	vm_map_entry_t llist, y;
11119f701172SKonstantin Belousov 
11129f701172SKonstantin Belousov 	root = root->left;
11139f701172SKonstantin Belousov 	llist = *iolist;
11149f701172SKonstantin Belousov 	while (root != NULL)
11159f701172SKonstantin Belousov 		SPLAY_RIGHT_STEP(root, y, llist, true);
11169f701172SKonstantin Belousov 	*iolist = llist;
11179f701172SKonstantin Belousov }
11180164e057SAlan Cox 
11195a0879daSDoug Moore static inline void
11205a0879daSDoug Moore vm_map_entry_swap(vm_map_entry_t *a, vm_map_entry_t *b)
11215a0879daSDoug Moore {
11225a0879daSDoug Moore 	vm_map_entry_t tmp;
11235a0879daSDoug Moore 
11245a0879daSDoug Moore 	tmp = *b;
11255a0879daSDoug Moore 	*b = *a;
11265a0879daSDoug Moore 	*a = tmp;
11275a0879daSDoug Moore }
11285a0879daSDoug Moore 
11290164e057SAlan Cox /*
11309f701172SKonstantin Belousov  * Walk back up the two spines, flip the pointers and set max_free.  The
11319f701172SKonstantin Belousov  * subtrees of the root go at the bottom of llist and rlist.
11320164e057SAlan Cox  */
11335a0879daSDoug Moore static void
11345a0879daSDoug Moore vm_map_splay_merge(vm_map_t map, vm_map_entry_t root,
11355a0879daSDoug Moore     vm_map_entry_t llist, vm_map_entry_t rlist)
11369f701172SKonstantin Belousov {
11375a0879daSDoug Moore 	vm_map_entry_t prev;
11385a0879daSDoug Moore 	vm_size_t max_free_left, max_free_right;
11399f701172SKonstantin Belousov 
11405a0879daSDoug Moore 	max_free_left = vm_map_entry_max_free_left(root, llist);
11415a0879daSDoug Moore 	if (llist != &map->header) {
11425a0879daSDoug Moore 		prev = root->left;
11435a0879daSDoug Moore 		do {
11440164e057SAlan Cox 			/*
11455a0879daSDoug Moore 			 * The max_free values of the children of llist are in
11465a0879daSDoug Moore 			 * llist->max_free and max_free_left.  Update with the
11475a0879daSDoug Moore 			 * max value.
11480164e057SAlan Cox 			 */
11495a0879daSDoug Moore 			llist->max_free = max_free_left =
11505a0879daSDoug Moore 			    MAX(llist->max_free, max_free_left);
11515a0879daSDoug Moore 			vm_map_entry_swap(&llist->right, &prev);
11525a0879daSDoug Moore 			vm_map_entry_swap(&prev, &llist);
11535a0879daSDoug Moore 		} while (llist != &map->header);
11545a0879daSDoug Moore 		root->left = prev;
11555a0879daSDoug Moore 	}
11565a0879daSDoug Moore 	max_free_right = vm_map_entry_max_free_right(root, rlist);
11575a0879daSDoug Moore 	if (rlist != &map->header) {
11585a0879daSDoug Moore 		prev = root->right;
11595a0879daSDoug Moore 		do {
11605a0879daSDoug Moore 			/*
11615a0879daSDoug Moore 			 * The max_free values of the children of rlist are in
11625a0879daSDoug Moore 			 * rlist->max_free and max_free_right.  Update with the
11635a0879daSDoug Moore 			 * max value.
11645a0879daSDoug Moore 			 */
11655a0879daSDoug Moore 			rlist->max_free = max_free_right =
11665a0879daSDoug Moore 			    MAX(rlist->max_free, max_free_right);
11675a0879daSDoug Moore 			vm_map_entry_swap(&rlist->left, &prev);
11685a0879daSDoug Moore 			vm_map_entry_swap(&prev, &rlist);
11695a0879daSDoug Moore 		} while (rlist != &map->header);
11705a0879daSDoug Moore 		root->right = prev;
11715a0879daSDoug Moore 	}
11725a0879daSDoug Moore 	root->max_free = MAX(max_free_left, max_free_right);
11735a0879daSDoug Moore 	map->root = root;
11744e94f402SAlan Cox }
11754e94f402SAlan Cox 
11764e94f402SAlan Cox /*
1177d1d3f7e1SDoug Moore  *	vm_map_splay:
1178d1d3f7e1SDoug Moore  *
1179d1d3f7e1SDoug Moore  *	The Sleator and Tarjan top-down splay algorithm with the
1180d1d3f7e1SDoug Moore  *	following variation.  Max_free must be computed bottom-up, so
1181d1d3f7e1SDoug Moore  *	on the downward pass, maintain the left and right spines in
1182d1d3f7e1SDoug Moore  *	reverse order.  Then, make a second pass up each side to fix
1183d1d3f7e1SDoug Moore  *	the pointers and compute max_free.  The time bound is O(log n)
1184d1d3f7e1SDoug Moore  *	amortized.
1185d1d3f7e1SDoug Moore  *
1186d1d3f7e1SDoug Moore  *	The new root is the vm_map_entry containing "addr", or else an
1187d1d3f7e1SDoug Moore  *	adjacent entry (lower if possible) if addr is not in the tree.
1188d1d3f7e1SDoug Moore  *
1189d1d3f7e1SDoug Moore  *	The map must be locked, and leaves it so.
1190d1d3f7e1SDoug Moore  *
1191d1d3f7e1SDoug Moore  *	Returns: the new root.
1192d1d3f7e1SDoug Moore  */
1193d1d3f7e1SDoug Moore static vm_map_entry_t
1194d1d3f7e1SDoug Moore vm_map_splay(vm_map_t map, vm_offset_t addr)
1195d1d3f7e1SDoug Moore {
1196d1d3f7e1SDoug Moore 	vm_map_entry_t llist, rlist, root;
1197d1d3f7e1SDoug Moore 
1198d1d3f7e1SDoug Moore 	root = vm_map_splay_split(map, addr, 0, &llist, &rlist);
1199d1d3f7e1SDoug Moore 	if (root != NULL) {
1200d1d3f7e1SDoug Moore 		/* do nothing */
1201d1d3f7e1SDoug Moore 	} else if (llist != &map->header) {
1202d1d3f7e1SDoug Moore 		/*
1203d1d3f7e1SDoug Moore 		 * Recover the greatest node in the left
1204d1d3f7e1SDoug Moore 		 * subtree and make it the root.
1205d1d3f7e1SDoug Moore 		 */
1206d1d3f7e1SDoug Moore 		root = llist;
1207d1d3f7e1SDoug Moore 		llist = root->right;
1208d1d3f7e1SDoug Moore 		root->right = NULL;
1209d1d3f7e1SDoug Moore 	} else if (rlist != &map->header) {
1210d1d3f7e1SDoug Moore 		/*
1211d1d3f7e1SDoug Moore 		 * Recover the least node in the right
1212d1d3f7e1SDoug Moore 		 * subtree and make it the root.
1213d1d3f7e1SDoug Moore 		 */
1214d1d3f7e1SDoug Moore 		root = rlist;
1215d1d3f7e1SDoug Moore 		rlist = root->left;
1216d1d3f7e1SDoug Moore 		root->left = NULL;
1217d1d3f7e1SDoug Moore 	} else {
1218d1d3f7e1SDoug Moore 		/* There is no root. */
1219d1d3f7e1SDoug Moore 		return (NULL);
1220d1d3f7e1SDoug Moore 	}
1221d1d3f7e1SDoug Moore 	vm_map_splay_merge(map, root, llist, rlist);
1222d1d3f7e1SDoug Moore 	VM_MAP_ASSERT_CONSISTENT(map);
1223d1d3f7e1SDoug Moore 	return (root);
1224d1d3f7e1SDoug Moore }
1225d1d3f7e1SDoug Moore 
1226d1d3f7e1SDoug Moore /*
1227df8bae1dSRodney W. Grimes  *	vm_map_entry_{un,}link:
1228df8bae1dSRodney W. Grimes  *
1229df8bae1dSRodney W. Grimes  *	Insert/remove entries from maps.
1230df8bae1dSRodney W. Grimes  */
12314e94f402SAlan Cox static void
12325a0879daSDoug Moore vm_map_entry_link(vm_map_t map, vm_map_entry_t entry)
123399c81ca9SAlan Cox {
12349f701172SKonstantin Belousov 	vm_map_entry_t llist, rlist, root;
123521c641b2SJohn Baldwin 
12369f701172SKonstantin Belousov 	CTR3(KTR_VM,
12379f701172SKonstantin Belousov 	    "vm_map_entry_link: map %p, nentries %d, entry %p", map,
12389f701172SKonstantin Belousov 	    map->nentries, entry);
12393a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
124099c81ca9SAlan Cox 	map->nentries++;
12415a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
12429f701172SKonstantin Belousov 	KASSERT(root == NULL,
12439f701172SKonstantin Belousov 	    ("vm_map_entry_link: link object already mapped"));
12445a0879daSDoug Moore 	entry->prev = llist;
12455a0879daSDoug Moore 	entry->next = rlist;
12465a0879daSDoug Moore 	llist->next = rlist->prev = entry;
12475a0879daSDoug Moore 	entry->left = entry->right = NULL;
12485a0879daSDoug Moore 	vm_map_splay_merge(map, entry, llist, rlist);
12499f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1250df8bae1dSRodney W. Grimes }
125199c81ca9SAlan Cox 
12529f701172SKonstantin Belousov enum unlink_merge_type {
12539f701172SKonstantin Belousov 	UNLINK_MERGE_PREV,
12549f701172SKonstantin Belousov 	UNLINK_MERGE_NONE,
12559f701172SKonstantin Belousov 	UNLINK_MERGE_NEXT
12569f701172SKonstantin Belousov };
12579f701172SKonstantin Belousov 
12584e94f402SAlan Cox static void
12595a0879daSDoug Moore vm_map_entry_unlink(vm_map_t map, vm_map_entry_t entry,
12609f701172SKonstantin Belousov     enum unlink_merge_type op)
126199c81ca9SAlan Cox {
12629f701172SKonstantin Belousov 	vm_map_entry_t llist, rlist, root, y;
126399c81ca9SAlan Cox 
12643a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
12655a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
12669f701172SKonstantin Belousov 	KASSERT(root != NULL,
12679f701172SKonstantin Belousov 	    ("vm_map_entry_unlink: unlink object not mapped"));
12684e94f402SAlan Cox 
12699f701172SKonstantin Belousov 	switch (op) {
12709f701172SKonstantin Belousov 	case UNLINK_MERGE_PREV:
12719f701172SKonstantin Belousov 		vm_map_splay_findprev(root, &llist);
12729f701172SKonstantin Belousov 		llist->end = root->end;
12739f701172SKonstantin Belousov 		y = root->right;
12749f701172SKonstantin Belousov 		root = llist;
12759f701172SKonstantin Belousov 		llist = root->right;
12769f701172SKonstantin Belousov 		root->right = y;
12779f701172SKonstantin Belousov 		break;
12789f701172SKonstantin Belousov 	case UNLINK_MERGE_NEXT:
12799f701172SKonstantin Belousov 		vm_map_splay_findnext(root, &rlist);
12809f701172SKonstantin Belousov 		rlist->start = root->start;
12819f701172SKonstantin Belousov 		rlist->offset = root->offset;
12829f701172SKonstantin Belousov 		y = root->left;
12839f701172SKonstantin Belousov 		root = rlist;
12849f701172SKonstantin Belousov 		rlist = root->left;
12859f701172SKonstantin Belousov 		root->left = y;
12869f701172SKonstantin Belousov 		break;
12879f701172SKonstantin Belousov 	case UNLINK_MERGE_NONE:
12889f701172SKonstantin Belousov 		vm_map_splay_findprev(root, &llist);
12899f701172SKonstantin Belousov 		vm_map_splay_findnext(root, &rlist);
12905a0879daSDoug Moore 		if (llist != &map->header) {
12919f701172SKonstantin Belousov 			root = llist;
12929f701172SKonstantin Belousov 			llist = root->right;
12939f701172SKonstantin Belousov 			root->right = NULL;
12945a0879daSDoug Moore 		} else if (rlist != &map->header) {
12959f701172SKonstantin Belousov 			root = rlist;
12969f701172SKonstantin Belousov 			rlist = root->left;
12979f701172SKonstantin Belousov 			root->left = NULL;
12989f701172SKonstantin Belousov 		} else
12999f701172SKonstantin Belousov 			root = NULL;
13009f701172SKonstantin Belousov 		break;
13019f701172SKonstantin Belousov 	}
13025a0879daSDoug Moore 	y = entry->next;
13035a0879daSDoug Moore 	y->prev = entry->prev;
13045a0879daSDoug Moore 	y->prev->next = y;
13059f701172SKonstantin Belousov 	if (root != NULL)
13065a0879daSDoug Moore 		vm_map_splay_merge(map, root, llist, rlist);
13075a0879daSDoug Moore 	else
13085a0879daSDoug Moore 		map->root = NULL;
13099f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
131099c81ca9SAlan Cox 	map->nentries--;
131121c641b2SJohn Baldwin 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
131221c641b2SJohn Baldwin 	    map->nentries, entry);
1313df8bae1dSRodney W. Grimes }
1314df8bae1dSRodney W. Grimes 
1315df8bae1dSRodney W. Grimes /*
1316fa581662SDoug Moore  *	vm_map_entry_resize:
13170164e057SAlan Cox  *
1318fa581662SDoug Moore  *	Resize a vm_map_entry, recompute the amount of free space that
1319fa581662SDoug Moore  *	follows it and propagate that value up the tree.
13200164e057SAlan Cox  *
13210164e057SAlan Cox  *	The map must be locked, and leaves it so.
13220164e057SAlan Cox  */
13230164e057SAlan Cox static void
1324fa581662SDoug Moore vm_map_entry_resize(vm_map_t map, vm_map_entry_t entry, vm_size_t grow_amount)
13250164e057SAlan Cox {
13269f701172SKonstantin Belousov 	vm_map_entry_t llist, rlist, root;
13270164e057SAlan Cox 
13289f701172SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
13295a0879daSDoug Moore 	root = vm_map_splay_split(map, entry->start, 0, &llist, &rlist);
13309f701172SKonstantin Belousov 	KASSERT(root != NULL,
1331fa581662SDoug Moore 	    ("%s: resize object not mapped", __func__));
13329f701172SKonstantin Belousov 	vm_map_splay_findnext(root, &rlist);
13339f701172SKonstantin Belousov 	root->right = NULL;
13341895f520SDoug Moore 	entry->end += grow_amount;
13355a0879daSDoug Moore 	vm_map_splay_merge(map, root, llist, rlist);
13369f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1337fa581662SDoug Moore 	CTR4(KTR_VM, "%s: map %p, nentries %d, entry %p",
133873f11451SDoug Moore 	    __func__, map, map->nentries, entry);
13390164e057SAlan Cox }
13400164e057SAlan Cox 
13410164e057SAlan Cox /*
1342d1d3f7e1SDoug Moore  *	vm_map_lookup_entry:	[ internal use only ]
1343df8bae1dSRodney W. Grimes  *
1344d1d3f7e1SDoug Moore  *	Finds the map entry containing (or
1345d1d3f7e1SDoug Moore  *	immediately preceding) the specified address
1346d1d3f7e1SDoug Moore  *	in the given map; the entry is returned
1347d1d3f7e1SDoug Moore  *	in the "entry" parameter.  The boolean
1348d1d3f7e1SDoug Moore  *	result indicates whether the address is
1349d1d3f7e1SDoug Moore  *	actually contained in the map.
1350df8bae1dSRodney W. Grimes  */
1351d1d3f7e1SDoug Moore boolean_t
1352d1d3f7e1SDoug Moore vm_map_lookup_entry(
1353d1d3f7e1SDoug Moore 	vm_map_t map,
1354d1d3f7e1SDoug Moore 	vm_offset_t address,
1355d1d3f7e1SDoug Moore 	vm_map_entry_t *entry)	/* OUT */
1356df8bae1dSRodney W. Grimes {
1357d1d3f7e1SDoug Moore 	vm_map_entry_t cur, lbound;
1358d1d3f7e1SDoug Moore 	boolean_t locked;
1359df8bae1dSRodney W. Grimes 
13604c3ef59eSAlan Cox 	/*
13614c3ef59eSAlan Cox 	 * If the map is empty, then the map entry immediately preceding
1362d1d3f7e1SDoug Moore 	 * "address" is the map's header.
13634c3ef59eSAlan Cox 	 */
1364d1d3f7e1SDoug Moore 	cur = map->root;
1365d1d3f7e1SDoug Moore 	if (cur == NULL) {
13664e94f402SAlan Cox 		*entry = &map->header;
1367d1d3f7e1SDoug Moore 		return (FALSE);
1368d1d3f7e1SDoug Moore 	}
1369d1d3f7e1SDoug Moore 	if (address >= cur->start && cur->end > address) {
1370d1d3f7e1SDoug Moore 		*entry = cur;
1371d1d3f7e1SDoug Moore 		return (TRUE);
13729f701172SKonstantin Belousov 	}
13739f701172SKonstantin Belousov 	if ((locked = vm_map_locked(map)) ||
137405a8c414SAlan Cox 	    sx_try_upgrade(&map->lock)) {
137505a8c414SAlan Cox 		/*
137605a8c414SAlan Cox 		 * Splay requires a write lock on the map.  However, it only
137705a8c414SAlan Cox 		 * restructures the binary search tree; it does not otherwise
137805a8c414SAlan Cox 		 * change the map.  Thus, the map's timestamp need not change
137905a8c414SAlan Cox 		 * on a temporary upgrade.
138005a8c414SAlan Cox 		 */
1381d1d3f7e1SDoug Moore 		cur = vm_map_splay(map, address);
138205a8c414SAlan Cox 		if (!locked)
138305a8c414SAlan Cox 			sx_downgrade(&map->lock);
1384d1d3f7e1SDoug Moore 
1385d1d3f7e1SDoug Moore 		/*
1386d1d3f7e1SDoug Moore 		 * If "address" is contained within a map entry, the new root
1387d1d3f7e1SDoug Moore 		 * is that map entry.  Otherwise, the new root is a map entry
1388d1d3f7e1SDoug Moore 		 * immediately before or after "address".
1389d1d3f7e1SDoug Moore 		 */
1390d1d3f7e1SDoug Moore 		if (address < cur->start) {
1391d1d3f7e1SDoug Moore 			*entry = &map->header;
1392d1d3f7e1SDoug Moore 			return (FALSE);
1393d1d3f7e1SDoug Moore 		}
1394d1d3f7e1SDoug Moore 		*entry = cur;
1395d1d3f7e1SDoug Moore 		return (address < cur->end);
13969f701172SKonstantin Belousov 	}
139705a8c414SAlan Cox 	/*
139805a8c414SAlan Cox 	 * Since the map is only locked for read access, perform a
1399d1d3f7e1SDoug Moore 	 * standard binary search tree lookup for "address".
140005a8c414SAlan Cox 	 */
1401d1d3f7e1SDoug Moore 	lbound = &map->header;
14029f701172SKonstantin Belousov 	do {
1403d1d3f7e1SDoug Moore 		if (address < cur->start) {
1404d1d3f7e1SDoug Moore 			cur = cur->left;
1405d1d3f7e1SDoug Moore 		} else if (cur->end <= address) {
1406d1d3f7e1SDoug Moore 			lbound = cur;
1407d1d3f7e1SDoug Moore 			cur = cur->right;
14089f701172SKonstantin Belousov 		} else {
1409d1d3f7e1SDoug Moore 			*entry = cur;
1410d1d3f7e1SDoug Moore 			return (TRUE);
141105a8c414SAlan Cox 		}
1412d1d3f7e1SDoug Moore 	} while (cur != NULL);
1413d1d3f7e1SDoug Moore 	*entry = lbound;
1414d1d3f7e1SDoug Moore 	return (FALSE);
1415df8bae1dSRodney W. Grimes }
1416df8bae1dSRodney W. Grimes 
1417df8bae1dSRodney W. Grimes /*
141830dcfc09SJohn Dyson  *	vm_map_insert:
141930dcfc09SJohn Dyson  *
142030dcfc09SJohn Dyson  *	Inserts the given whole VM object into the target
142130dcfc09SJohn Dyson  *	map at the specified address range.  The object's
142230dcfc09SJohn Dyson  *	size should match that of the address range.
142330dcfc09SJohn Dyson  *
142430dcfc09SJohn Dyson  *	Requires that the map be locked, and leaves it so.
14252aaeadf8SMatthew Dillon  *
14262aaeadf8SMatthew Dillon  *	If object is non-NULL, ref count must be bumped by caller
14272aaeadf8SMatthew Dillon  *	prior to making call to account for the new entry.
142830dcfc09SJohn Dyson  */
142930dcfc09SJohn Dyson int
1430b9dcd593SBruce Evans vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
143133314db0SAlan Cox     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
143230dcfc09SJohn Dyson {
1433723413beSDoug Moore 	vm_map_entry_t new_entry, prev_entry;
1434ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
14351569205fSKonstantin Belousov 	vm_eflags_t protoeflags;
14368211bd45SKonstantin Belousov 	vm_inherit_t inheritance;
143730dcfc09SJohn Dyson 
14383a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
14392e47807cSJeff Roberson 	KASSERT(object != kernel_object ||
144033314db0SAlan Cox 	    (cow & MAP_COPY_ON_WRITE) == 0,
14412e47807cSJeff Roberson 	    ("vm_map_insert: kernel object and COW"));
144233314db0SAlan Cox 	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
144333314db0SAlan Cox 	    ("vm_map_insert: paradoxical MAP_NOFAULT request"));
144400de6773SKonstantin Belousov 	KASSERT((prot & ~max) == 0,
144500de6773SKonstantin Belousov 	    ("prot %#x is not subset of max_prot %#x", prot, max));
14463a0916b8SKonstantin Belousov 
144730dcfc09SJohn Dyson 	/*
144830dcfc09SJohn Dyson 	 * Check that the start and end points are not bogus.
144930dcfc09SJohn Dyson 	 */
1450f0165b1cSKonstantin Belousov 	if (start < vm_map_min(map) || end > vm_map_max(map) ||
1451f0165b1cSKonstantin Belousov 	    start >= end)
145230dcfc09SJohn Dyson 		return (KERN_INVALID_ADDRESS);
145330dcfc09SJohn Dyson 
145430dcfc09SJohn Dyson 	/*
145530dcfc09SJohn Dyson 	 * Find the entry prior to the proposed starting address; if it's part
145630dcfc09SJohn Dyson 	 * of an existing entry, this range is bogus.
145730dcfc09SJohn Dyson 	 */
1458723413beSDoug Moore 	if (vm_map_lookup_entry(map, start, &prev_entry))
145930dcfc09SJohn Dyson 		return (KERN_NO_SPACE);
146030dcfc09SJohn Dyson 
146130dcfc09SJohn Dyson 	/*
146230dcfc09SJohn Dyson 	 * Assert that the next entry doesn't overlap the end point.
146330dcfc09SJohn Dyson 	 */
14641c5196c3SKonstantin Belousov 	if (prev_entry->next->start < end)
146530dcfc09SJohn Dyson 		return (KERN_NO_SPACE);
146630dcfc09SJohn Dyson 
146719bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
146819bd0d9cSKonstantin Belousov 	    max != VM_PROT_NONE))
146919bd0d9cSKonstantin Belousov 		return (KERN_INVALID_ARGUMENT);
147019bd0d9cSKonstantin Belousov 
1471afa07f7eSJohn Dyson 	protoeflags = 0;
1472afa07f7eSJohn Dyson 	if (cow & MAP_COPY_ON_WRITE)
1473e5f13bddSAlan Cox 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
147433314db0SAlan Cox 	if (cow & MAP_NOFAULT)
1475afa07f7eSJohn Dyson 		protoeflags |= MAP_ENTRY_NOFAULT;
14764f79d873SMatthew Dillon 	if (cow & MAP_DISABLE_SYNCER)
14774f79d873SMatthew Dillon 		protoeflags |= MAP_ENTRY_NOSYNC;
14789730a5daSPaul Saab 	if (cow & MAP_DISABLE_COREDUMP)
14799730a5daSPaul Saab 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
1480712efe66SAlan Cox 	if (cow & MAP_STACK_GROWS_DOWN)
1481712efe66SAlan Cox 		protoeflags |= MAP_ENTRY_GROWS_DOWN;
1482712efe66SAlan Cox 	if (cow & MAP_STACK_GROWS_UP)
1483712efe66SAlan Cox 		protoeflags |= MAP_ENTRY_GROWS_UP;
148484110e7eSKonstantin Belousov 	if (cow & MAP_VN_WRITECOUNT)
148584110e7eSKonstantin Belousov 		protoeflags |= MAP_ENTRY_VN_WRITECNT;
148678022527SKonstantin Belousov 	if (cow & MAP_VN_EXEC)
148778022527SKonstantin Belousov 		protoeflags |= MAP_ENTRY_VN_EXEC;
148819bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_GUARD) != 0)
148919bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_GUARD;
149019bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
149119bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
149219bd0d9cSKonstantin Belousov 	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
149319bd0d9cSKonstantin Belousov 		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
14948211bd45SKonstantin Belousov 	if (cow & MAP_INHERIT_SHARE)
14958211bd45SKonstantin Belousov 		inheritance = VM_INHERIT_SHARE;
14968211bd45SKonstantin Belousov 	else
14978211bd45SKonstantin Belousov 		inheritance = VM_INHERIT_DEFAULT;
14984f79d873SMatthew Dillon 
1499ef694c1aSEdward Tomasz Napierala 	cred = NULL;
150019bd0d9cSKonstantin Belousov 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
15013364c323SKonstantin Belousov 		goto charged;
15023364c323SKonstantin Belousov 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
15033364c323SKonstantin Belousov 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
15043364c323SKonstantin Belousov 		if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
15053364c323SKonstantin Belousov 			return (KERN_RESOURCE_SHORTAGE);
15061569205fSKonstantin Belousov 		KASSERT(object == NULL ||
15071569205fSKonstantin Belousov 		    (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
1508ef694c1aSEdward Tomasz Napierala 		    object->cred == NULL,
15091569205fSKonstantin Belousov 		    ("overcommit: vm_map_insert o %p", object));
1510ef694c1aSEdward Tomasz Napierala 		cred = curthread->td_ucred;
15113364c323SKonstantin Belousov 	}
15123364c323SKonstantin Belousov 
15133364c323SKonstantin Belousov charged:
1514f8616ebfSAlan Cox 	/* Expand the kernel pmap, if necessary. */
1515f8616ebfSAlan Cox 	if (map == kernel_map && end > kernel_vm_end)
1516f8616ebfSAlan Cox 		pmap_growkernel(end);
15171d284e00SAlan Cox 	if (object != NULL) {
151830dcfc09SJohn Dyson 		/*
15191d284e00SAlan Cox 		 * OBJ_ONEMAPPING must be cleared unless this mapping
15201d284e00SAlan Cox 		 * is trivially proven to be the only mapping for any
15211d284e00SAlan Cox 		 * of the object's pages.  (Object granularity
15221d284e00SAlan Cox 		 * reference counting is insufficient to recognize
15231d284e00SAlan Cox 		 * aliases with precision.)
152430dcfc09SJohn Dyson 		 */
152589f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
15261d284e00SAlan Cox 		if (object->ref_count > 1 || object->shadow_count != 0)
15272aaeadf8SMatthew Dillon 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
152889f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(object);
15292203c46dSMark Johnston 	} else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) ==
15302203c46dSMark Johnston 	    protoeflags &&
153178022527SKonstantin Belousov 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP |
153278022527SKonstantin Belousov 	    MAP_VN_EXEC)) == 0 &&
1533737e25f7SAlan Cox 	    prev_entry->end == start && (prev_entry->cred == cred ||
15343364c323SKonstantin Belousov 	    (prev_entry->object.vm_object != NULL &&
15351569205fSKonstantin Belousov 	    prev_entry->object.vm_object->cred == cred)) &&
15368cc7e047SJohn Dyson 	    vm_object_coalesce(prev_entry->object.vm_object,
153757a21abaSAlan Cox 	    prev_entry->offset,
15388cc7e047SJohn Dyson 	    (vm_size_t)(prev_entry->end - prev_entry->start),
153960169c88SAlan Cox 	    (vm_size_t)(end - prev_entry->end), cred != NULL &&
154060169c88SAlan Cox 	    (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
154130dcfc09SJohn Dyson 		/*
15422aaeadf8SMatthew Dillon 		 * We were able to extend the object.  Determine if we
15432aaeadf8SMatthew Dillon 		 * can extend the previous map entry to include the
15442aaeadf8SMatthew Dillon 		 * new range as well.
154530dcfc09SJohn Dyson 		 */
15461569205fSKonstantin Belousov 		if (prev_entry->inheritance == inheritance &&
15471569205fSKonstantin Belousov 		    prev_entry->protection == prot &&
1548737e25f7SAlan Cox 		    prev_entry->max_protection == max &&
1549737e25f7SAlan Cox 		    prev_entry->wired_count == 0) {
1550737e25f7SAlan Cox 			KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) ==
1551737e25f7SAlan Cox 			    0, ("prev_entry %p has incoherent wiring",
1552737e25f7SAlan Cox 			    prev_entry));
155319bd0d9cSKonstantin Belousov 			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
15541569205fSKonstantin Belousov 				map->size += end - prev_entry->end;
1555fa581662SDoug Moore 			vm_map_entry_resize(map, prev_entry,
15561895f520SDoug Moore 			    end - prev_entry->end);
15574e71e795SMatthew Dillon 			vm_map_simplify_entry(map, prev_entry);
155830dcfc09SJohn Dyson 			return (KERN_SUCCESS);
155930dcfc09SJohn Dyson 		}
15608cc7e047SJohn Dyson 
15612aaeadf8SMatthew Dillon 		/*
15622aaeadf8SMatthew Dillon 		 * If we can extend the object but cannot extend the
15632aaeadf8SMatthew Dillon 		 * map entry, we have to create a new map entry.  We
15642aaeadf8SMatthew Dillon 		 * must bump the ref count on the extended object to
15654e71e795SMatthew Dillon 		 * account for it.  object may be NULL.
15662aaeadf8SMatthew Dillon 		 */
15672aaeadf8SMatthew Dillon 		object = prev_entry->object.vm_object;
15682aaeadf8SMatthew Dillon 		offset = prev_entry->offset +
15692aaeadf8SMatthew Dillon 		    (prev_entry->end - prev_entry->start);
15708cc7e047SJohn Dyson 		vm_object_reference(object);
1571ef694c1aSEdward Tomasz Napierala 		if (cred != NULL && object != NULL && object->cred != NULL &&
15723364c323SKonstantin Belousov 		    !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
15733364c323SKonstantin Belousov 			/* Object already accounts for this uid. */
1574ef694c1aSEdward Tomasz Napierala 			cred = NULL;
15753364c323SKonstantin Belousov 		}
1576b18bfc3dSJohn Dyson 	}
157760169c88SAlan Cox 	if (cred != NULL)
157860169c88SAlan Cox 		crhold(cred);
15792aaeadf8SMatthew Dillon 
15802aaeadf8SMatthew Dillon 	/*
158130dcfc09SJohn Dyson 	 * Create a new entry
158230dcfc09SJohn Dyson 	 */
158330dcfc09SJohn Dyson 	new_entry = vm_map_entry_create(map);
158430dcfc09SJohn Dyson 	new_entry->start = start;
158530dcfc09SJohn Dyson 	new_entry->end = end;
1586ef694c1aSEdward Tomasz Napierala 	new_entry->cred = NULL;
158730dcfc09SJohn Dyson 
1588afa07f7eSJohn Dyson 	new_entry->eflags = protoeflags;
158930dcfc09SJohn Dyson 	new_entry->object.vm_object = object;
159030dcfc09SJohn Dyson 	new_entry->offset = offset;
15912267af78SJulian Elischer 
15928211bd45SKonstantin Belousov 	new_entry->inheritance = inheritance;
159330dcfc09SJohn Dyson 	new_entry->protection = prot;
159430dcfc09SJohn Dyson 	new_entry->max_protection = max;
159530dcfc09SJohn Dyson 	new_entry->wired_count = 0;
1596997ac690SKonstantin Belousov 	new_entry->wiring_thread = NULL;
159713458803SAlan Cox 	new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
1598381b7242SAlan Cox 	new_entry->next_read = start;
1599e5f251d2SAlan Cox 
1600ef694c1aSEdward Tomasz Napierala 	KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
16011569205fSKonstantin Belousov 	    ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
1602ef694c1aSEdward Tomasz Napierala 	new_entry->cred = cred;
16033364c323SKonstantin Belousov 
160430dcfc09SJohn Dyson 	/*
160530dcfc09SJohn Dyson 	 * Insert the new entry into the list
160630dcfc09SJohn Dyson 	 */
16079f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
160819bd0d9cSKonstantin Belousov 	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
160930dcfc09SJohn Dyson 		map->size += new_entry->end - new_entry->start;
161030dcfc09SJohn Dyson 
16111a484d28SMatthew Dillon 	/*
1612eaaf9f7fSAlan Cox 	 * Try to coalesce the new entry with both the previous and next
1613eaaf9f7fSAlan Cox 	 * entries in the list.  Previously, we only attempted to coalesce
1614eaaf9f7fSAlan Cox 	 * with the previous entry when object is NULL.  Here, we handle the
1615eaaf9f7fSAlan Cox 	 * other cases, which are less common.
16161a484d28SMatthew Dillon 	 */
16174e71e795SMatthew Dillon 	vm_map_simplify_entry(map, new_entry);
16184e71e795SMatthew Dillon 
16191569205fSKonstantin Belousov 	if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
16201569205fSKonstantin Belousov 		vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
16211569205fSKonstantin Belousov 		    end - start, cow & MAP_PREFAULT_PARTIAL);
16224f79d873SMatthew Dillon 	}
1623e972780aSAlan Cox 
162430dcfc09SJohn Dyson 	return (KERN_SUCCESS);
162530dcfc09SJohn Dyson }
162630dcfc09SJohn Dyson 
162730dcfc09SJohn Dyson /*
16280164e057SAlan Cox  *	vm_map_findspace:
16290164e057SAlan Cox  *
16300164e057SAlan Cox  *	Find the first fit (lowest VM address) for "length" free bytes
16310164e057SAlan Cox  *	beginning at address >= start in the given map.
16320164e057SAlan Cox  *
16339f701172SKonstantin Belousov  *	In a vm_map_entry, "max_free" is the maximum amount of
16349f701172SKonstantin Belousov  *	contiguous free space between an entry in its subtree and a
16359f701172SKonstantin Belousov  *	neighbor of that entry.  This allows finding a free region in
16369f701172SKonstantin Belousov  *	one path down the tree, so O(log n) amortized with splay
16379f701172SKonstantin Belousov  *	trees.
16380164e057SAlan Cox  *
16390164e057SAlan Cox  *	The map must be locked, and leaves it so.
16400164e057SAlan Cox  *
16419f701172SKonstantin Belousov  *	Returns: starting address if sufficient space,
16429f701172SKonstantin Belousov  *		 vm_map_max(map)-length+1 if insufficient space.
1643df8bae1dSRodney W. Grimes  */
16449f701172SKonstantin Belousov vm_offset_t
16459f701172SKonstantin Belousov vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length)
1646df8bae1dSRodney W. Grimes {
16479f701172SKonstantin Belousov 	vm_map_entry_t llist, rlist, root, y;
16489f701172SKonstantin Belousov 	vm_size_t left_length;
1649e65d58a0SDoug Moore 	vm_offset_t gap_end;
1650df8bae1dSRodney W. Grimes 
1651986b43f8SAlan Cox 	/*
1652986b43f8SAlan Cox 	 * Request must fit within min/max VM address and must avoid
1653986b43f8SAlan Cox 	 * address wrap.
1654986b43f8SAlan Cox 	 */
1655f0165b1cSKonstantin Belousov 	start = MAX(start, vm_map_min(map));
1656e65d58a0SDoug Moore 	if (start >= vm_map_max(map) || length > vm_map_max(map) - start)
16579f701172SKonstantin Belousov 		return (vm_map_max(map) - length + 1);
1658df8bae1dSRodney W. Grimes 
16590164e057SAlan Cox 	/* Empty tree means wide open address space. */
16609f701172SKonstantin Belousov 	if (map->root == NULL)
16619f701172SKonstantin Belousov 		return (start);
16620164e057SAlan Cox 
16630164e057SAlan Cox 	/*
1664e65d58a0SDoug Moore 	 * After splay_split, if start is within an entry, push it to the start
1665e65d58a0SDoug Moore 	 * of the following gap.  If rlist is at the end of the gap containing
1666e65d58a0SDoug Moore 	 * start, save the end of that gap in gap_end to see if the gap is big
1667e65d58a0SDoug Moore 	 * enough; otherwise set gap_end to start skip gap-checking and move
1668e65d58a0SDoug Moore 	 * directly to a search of the right subtree.
16690164e057SAlan Cox 	 */
16705a0879daSDoug Moore 	root = vm_map_splay_split(map, start, length, &llist, &rlist);
1671e65d58a0SDoug Moore 	gap_end = rlist->start;
1672e65d58a0SDoug Moore 	if (root != NULL) {
16739f701172SKonstantin Belousov 		start = root->end;
1674e65d58a0SDoug Moore 		if (root->right != NULL)
1675e65d58a0SDoug Moore 			gap_end = start;
1676e65d58a0SDoug Moore 	} else if (rlist != &map->header) {
16779f701172SKonstantin Belousov 		root = rlist;
16789f701172SKonstantin Belousov 		rlist = root->left;
16799f701172SKonstantin Belousov 		root->left = NULL;
16809f701172SKonstantin Belousov 	} else {
16819f701172SKonstantin Belousov 		root = llist;
16829f701172SKonstantin Belousov 		llist = root->right;
16839f701172SKonstantin Belousov 		root->right = NULL;
16840164e057SAlan Cox 	}
16855a0879daSDoug Moore 	vm_map_splay_merge(map, root, llist, rlist);
16869f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
1687e65d58a0SDoug Moore 	if (length <= gap_end - start)
16889f701172SKonstantin Belousov 		return (start);
16890164e057SAlan Cox 
16900164e057SAlan Cox 	/* With max_free, can immediately tell if no solution. */
16919f701172SKonstantin Belousov 	if (root->right == NULL || length > root->right->max_free)
16929f701172SKonstantin Belousov 		return (vm_map_max(map) - length + 1);
16930164e057SAlan Cox 
16940164e057SAlan Cox 	/*
16959f701172SKonstantin Belousov 	 * Splay for the least large-enough gap in the right subtree.
16960164e057SAlan Cox 	 */
16975a0879daSDoug Moore 	llist = rlist = &map->header;
16989f701172SKonstantin Belousov 	for (left_length = 0;;
16995a0879daSDoug Moore 	    left_length = vm_map_entry_max_free_left(root, llist)) {
17009f701172SKonstantin Belousov 		if (length <= left_length)
17019f701172SKonstantin Belousov 			SPLAY_LEFT_STEP(root, y, rlist,
17025a0879daSDoug Moore 			    length <= vm_map_entry_max_free_left(y, llist));
17039f701172SKonstantin Belousov 		else
17049f701172SKonstantin Belousov 			SPLAY_RIGHT_STEP(root, y, llist,
17055a0879daSDoug Moore 			    length > vm_map_entry_max_free_left(y, root));
17069f701172SKonstantin Belousov 		if (root == NULL)
17079f701172SKonstantin Belousov 			break;
17080164e057SAlan Cox 	}
17099f701172SKonstantin Belousov 	root = llist;
17109f701172SKonstantin Belousov 	llist = root->right;
17119f701172SKonstantin Belousov 	root->right = NULL;
17125a0879daSDoug Moore 	if (rlist != &map->header) {
17135a0879daSDoug Moore 		y = rlist;
17149f701172SKonstantin Belousov 		rlist = y->left;
17159f701172SKonstantin Belousov 		y->left = NULL;
17165a0879daSDoug Moore 		vm_map_splay_merge(map, y, &map->header, rlist);
17175a0879daSDoug Moore 		y->max_free = MAX(
17185a0879daSDoug Moore 		    vm_map_entry_max_free_left(y, root),
17195a0879daSDoug Moore 		    vm_map_entry_max_free_right(y, &map->header));
17209f701172SKonstantin Belousov 		root->right = y;
17219f701172SKonstantin Belousov 	}
17225a0879daSDoug Moore 	vm_map_splay_merge(map, root, llist, &map->header);
17239f701172SKonstantin Belousov 	VM_MAP_ASSERT_CONSISTENT(map);
17249f701172SKonstantin Belousov 	return (root->end);
1725df8bae1dSRodney W. Grimes }
1726df8bae1dSRodney W. Grimes 
1727d239bd3cSKonstantin Belousov int
1728d239bd3cSKonstantin Belousov vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1729b8ca4ef2SAlan Cox     vm_offset_t start, vm_size_t length, vm_prot_t prot,
1730d239bd3cSKonstantin Belousov     vm_prot_t max, int cow)
1731d239bd3cSKonstantin Belousov {
1732b8ca4ef2SAlan Cox 	vm_offset_t end;
1733d239bd3cSKonstantin Belousov 	int result;
1734d239bd3cSKonstantin Belousov 
1735d239bd3cSKonstantin Belousov 	end = start + length;
17364648ba0aSKonstantin Belousov 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
17374648ba0aSKonstantin Belousov 	    object == NULL,
17384648ba0aSKonstantin Belousov 	    ("vm_map_fixed: non-NULL backing object for stack"));
1739897d81a0SKonstantin Belousov 	vm_map_lock(map);
1740d239bd3cSKonstantin Belousov 	VM_MAP_RANGE_CHECK(map, start, end);
174111c42bccSKonstantin Belousov 	if ((cow & MAP_CHECK_EXCL) == 0)
174211c42bccSKonstantin Belousov 		vm_map_delete(map, start, end);
17434648ba0aSKonstantin Belousov 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
17444648ba0aSKonstantin Belousov 		result = vm_map_stack_locked(map, start, length, sgrowsiz,
17454648ba0aSKonstantin Belousov 		    prot, max, cow);
17464648ba0aSKonstantin Belousov 	} else {
17474648ba0aSKonstantin Belousov 		result = vm_map_insert(map, object, offset, start, end,
17484648ba0aSKonstantin Belousov 		    prot, max, cow);
17494648ba0aSKonstantin Belousov 	}
1750d239bd3cSKonstantin Belousov 	vm_map_unlock(map);
1751d239bd3cSKonstantin Belousov 	return (result);
1752d239bd3cSKonstantin Belousov }
1753d239bd3cSKonstantin Belousov 
1754fa50a355SKonstantin Belousov static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
1755fa50a355SKonstantin Belousov static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
1756fa50a355SKonstantin Belousov 
1757fa50a355SKonstantin Belousov static int cluster_anon = 1;
1758fa50a355SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
1759fa50a355SKonstantin Belousov     &cluster_anon, 0,
1760484e9d03SKonstantin Belousov     "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
1761484e9d03SKonstantin Belousov 
1762484e9d03SKonstantin Belousov static bool
1763484e9d03SKonstantin Belousov clustering_anon_allowed(vm_offset_t addr)
1764484e9d03SKonstantin Belousov {
1765484e9d03SKonstantin Belousov 
1766484e9d03SKonstantin Belousov 	switch (cluster_anon) {
1767484e9d03SKonstantin Belousov 	case 0:
1768484e9d03SKonstantin Belousov 		return (false);
1769484e9d03SKonstantin Belousov 	case 1:
1770484e9d03SKonstantin Belousov 		return (addr == 0);
1771484e9d03SKonstantin Belousov 	case 2:
1772484e9d03SKonstantin Belousov 	default:
1773484e9d03SKonstantin Belousov 		return (true);
1774484e9d03SKonstantin Belousov 	}
1775484e9d03SKonstantin Belousov }
1776fa50a355SKonstantin Belousov 
1777fa50a355SKonstantin Belousov static long aslr_restarts;
1778fa50a355SKonstantin Belousov SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD,
1779fa50a355SKonstantin Belousov     &aslr_restarts, 0,
1780fa50a355SKonstantin Belousov     "Number of aslr failures");
1781fa50a355SKonstantin Belousov 
1782fa50a355SKonstantin Belousov #define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
1783fa50a355SKonstantin Belousov 
1784df8bae1dSRodney W. Grimes /*
1785fec29688SAlan Cox  * Searches for the specified amount of free space in the given map with the
1786fec29688SAlan Cox  * specified alignment.  Performs an address-ordered, first-fit search from
1787fec29688SAlan Cox  * the given address "*addr", with an optional upper bound "max_addr".  If the
1788fec29688SAlan Cox  * parameter "alignment" is zero, then the alignment is computed from the
1789fec29688SAlan Cox  * given (object, offset) pair so as to enable the greatest possible use of
1790fec29688SAlan Cox  * superpage mappings.  Returns KERN_SUCCESS and the address of the free space
1791fec29688SAlan Cox  * in "*addr" if successful.  Otherwise, returns KERN_NO_SPACE.
1792fec29688SAlan Cox  *
1793fec29688SAlan Cox  * The map must be locked.  Initially, there must be at least "length" bytes
1794fec29688SAlan Cox  * of free space at the given address.
1795fec29688SAlan Cox  */
1796fec29688SAlan Cox static int
1797fec29688SAlan Cox vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1798fec29688SAlan Cox     vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr,
1799fec29688SAlan Cox     vm_offset_t alignment)
1800fec29688SAlan Cox {
1801fec29688SAlan Cox 	vm_offset_t aligned_addr, free_addr;
1802fec29688SAlan Cox 
1803fec29688SAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
1804fec29688SAlan Cox 	free_addr = *addr;
18059f701172SKonstantin Belousov 	KASSERT(free_addr == vm_map_findspace(map, free_addr, length),
1806e65d58a0SDoug Moore 	    ("caller failed to provide space %#jx at address %p",
1807e65d58a0SDoug Moore 	     (uintmax_t)length, (void *)free_addr));
1808fec29688SAlan Cox 	for (;;) {
1809fec29688SAlan Cox 		/*
1810fec29688SAlan Cox 		 * At the start of every iteration, the free space at address
1811fec29688SAlan Cox 		 * "*addr" is at least "length" bytes.
1812fec29688SAlan Cox 		 */
1813fec29688SAlan Cox 		if (alignment == 0)
1814fec29688SAlan Cox 			pmap_align_superpage(object, offset, addr, length);
1815fec29688SAlan Cox 		else if ((*addr & (alignment - 1)) != 0) {
1816fec29688SAlan Cox 			*addr &= ~(alignment - 1);
1817fec29688SAlan Cox 			*addr += alignment;
1818fec29688SAlan Cox 		}
1819fec29688SAlan Cox 		aligned_addr = *addr;
1820fec29688SAlan Cox 		if (aligned_addr == free_addr) {
1821fec29688SAlan Cox 			/*
1822fec29688SAlan Cox 			 * Alignment did not change "*addr", so "*addr" must
1823fec29688SAlan Cox 			 * still provide sufficient free space.
1824fec29688SAlan Cox 			 */
1825fec29688SAlan Cox 			return (KERN_SUCCESS);
1826fec29688SAlan Cox 		}
1827fec29688SAlan Cox 
1828fec29688SAlan Cox 		/*
1829fec29688SAlan Cox 		 * Test for address wrap on "*addr".  A wrapped "*addr" could
1830fec29688SAlan Cox 		 * be a valid address, in which case vm_map_findspace() cannot
1831fec29688SAlan Cox 		 * be relied upon to fail.
1832fec29688SAlan Cox 		 */
18339f701172SKonstantin Belousov 		if (aligned_addr < free_addr)
18349f701172SKonstantin Belousov 			return (KERN_NO_SPACE);
18359f701172SKonstantin Belousov 		*addr = vm_map_findspace(map, aligned_addr, length);
18369f701172SKonstantin Belousov 		if (*addr + length > vm_map_max(map) ||
1837fec29688SAlan Cox 		    (max_addr != 0 && *addr + length > max_addr))
1838fec29688SAlan Cox 			return (KERN_NO_SPACE);
1839fec29688SAlan Cox 		free_addr = *addr;
1840fec29688SAlan Cox 		if (free_addr == aligned_addr) {
1841fec29688SAlan Cox 			/*
1842fec29688SAlan Cox 			 * If a successful call to vm_map_findspace() did not
1843fec29688SAlan Cox 			 * change "*addr", then "*addr" must still be aligned
1844fec29688SAlan Cox 			 * and provide sufficient free space.
1845fec29688SAlan Cox 			 */
1846fec29688SAlan Cox 			return (KERN_SUCCESS);
1847fec29688SAlan Cox 		}
1848fec29688SAlan Cox 	}
1849fec29688SAlan Cox }
1850fec29688SAlan Cox 
1851fec29688SAlan Cox /*
1852df8bae1dSRodney W. Grimes  *	vm_map_find finds an unallocated region in the target address
1853df8bae1dSRodney W. Grimes  *	map with the given length.  The search is defined to be
1854df8bae1dSRodney W. Grimes  *	first-fit from the specified address; the region found is
1855df8bae1dSRodney W. Grimes  *	returned in the same parameter.
1856df8bae1dSRodney W. Grimes  *
18572aaeadf8SMatthew Dillon  *	If object is non-NULL, ref count must be bumped by caller
18582aaeadf8SMatthew Dillon  *	prior to making call to account for the new entry.
1859df8bae1dSRodney W. Grimes  */
1860df8bae1dSRodney W. Grimes int
1861b9dcd593SBruce Evans vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1862b9dcd593SBruce Evans 	    vm_offset_t *addr,	/* IN/OUT */
1863edb572a3SJohn Baldwin 	    vm_size_t length, vm_offset_t max_addr, int find_space,
1864edb572a3SJohn Baldwin 	    vm_prot_t prot, vm_prot_t max, int cow)
1865df8bae1dSRodney W. Grimes {
1866fa50a355SKonstantin Belousov 	vm_offset_t alignment, curr_min_addr, min_addr;
1867fa50a355SKonstantin Belousov 	int gap, pidx, rv, try;
1868fa50a355SKonstantin Belousov 	bool cluster, en_aslr, update_anon;
1869df8bae1dSRodney W. Grimes 
18704648ba0aSKonstantin Belousov 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
18714648ba0aSKonstantin Belousov 	    object == NULL,
18724648ba0aSKonstantin Belousov 	    ("vm_map_find: non-NULL backing object for stack"));
1873ea7e7006SKonstantin Belousov 	MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE &&
1874ea7e7006SKonstantin Belousov 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0));
1875ff74a3faSJohn Baldwin 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
1876ff74a3faSJohn Baldwin 	    (object->flags & OBJ_COLORED) == 0))
1877ff74a3faSJohn Baldwin 		find_space = VMFS_ANY_SPACE;
18785aa60b6fSJohn Baldwin 	if (find_space >> 8 != 0) {
18795aa60b6fSJohn Baldwin 		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
18805aa60b6fSJohn Baldwin 		alignment = (vm_offset_t)1 << (find_space >> 8);
18815aa60b6fSJohn Baldwin 	} else
18825aa60b6fSJohn Baldwin 		alignment = 0;
1883fa50a355SKonstantin Belousov 	en_aslr = (map->flags & MAP_ASLR) != 0;
1884484e9d03SKonstantin Belousov 	update_anon = cluster = clustering_anon_allowed(*addr) &&
1885fa50a355SKonstantin Belousov 	    (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
1886fa50a355SKonstantin Belousov 	    find_space != VMFS_NO_SPACE && object == NULL &&
1887fa50a355SKonstantin Belousov 	    (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
1888fa50a355SKonstantin Belousov 	    MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE;
1889fa50a355SKonstantin Belousov 	curr_min_addr = min_addr = *addr;
1890fa50a355SKonstantin Belousov 	if (en_aslr && min_addr == 0 && !cluster &&
1891fa50a355SKonstantin Belousov 	    find_space != VMFS_NO_SPACE &&
1892fa50a355SKonstantin Belousov 	    (map->flags & MAP_ASLR_IGNSTART) != 0)
1893fa50a355SKonstantin Belousov 		curr_min_addr = min_addr = vm_map_min(map);
1894fa50a355SKonstantin Belousov 	try = 0;
18954d572bb3SAlan Cox 	vm_map_lock(map);
1896fa50a355SKonstantin Belousov 	if (cluster) {
1897fa50a355SKonstantin Belousov 		curr_min_addr = map->anon_loc;
1898fa50a355SKonstantin Belousov 		if (curr_min_addr == 0)
1899fa50a355SKonstantin Belousov 			cluster = false;
1900fa50a355SKonstantin Belousov 	}
190126c538ffSAlan Cox 	if (find_space != VMFS_NO_SPACE) {
1902fec29688SAlan Cox 		KASSERT(find_space == VMFS_ANY_SPACE ||
1903fec29688SAlan Cox 		    find_space == VMFS_OPTIMAL_SPACE ||
1904fec29688SAlan Cox 		    find_space == VMFS_SUPER_SPACE ||
1905fec29688SAlan Cox 		    alignment != 0, ("unexpected VMFS flag"));
1906fec29688SAlan Cox again:
1907fa50a355SKonstantin Belousov 		/*
1908fa50a355SKonstantin Belousov 		 * When creating an anonymous mapping, try clustering
1909fa50a355SKonstantin Belousov 		 * with an existing anonymous mapping first.
1910fa50a355SKonstantin Belousov 		 *
1911fa50a355SKonstantin Belousov 		 * We make up to two attempts to find address space
1912fa50a355SKonstantin Belousov 		 * for a given find_space value. The first attempt may
1913fa50a355SKonstantin Belousov 		 * apply randomization or may cluster with an existing
1914fa50a355SKonstantin Belousov 		 * anonymous mapping. If this first attempt fails,
1915fa50a355SKonstantin Belousov 		 * perform a first-fit search of the available address
1916fa50a355SKonstantin Belousov 		 * space.
1917fa50a355SKonstantin Belousov 		 *
1918fa50a355SKonstantin Belousov 		 * If all tries failed, and find_space is
1919fa50a355SKonstantin Belousov 		 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE.
1920fa50a355SKonstantin Belousov 		 * Again enable clustering and randomization.
1921fa50a355SKonstantin Belousov 		 */
1922fa50a355SKonstantin Belousov 		try++;
1923fa50a355SKonstantin Belousov 		MPASS(try <= 2);
1924fa50a355SKonstantin Belousov 
1925fa50a355SKonstantin Belousov 		if (try == 2) {
1926fa50a355SKonstantin Belousov 			/*
1927fa50a355SKonstantin Belousov 			 * Second try: we failed either to find a
1928fa50a355SKonstantin Belousov 			 * suitable region for randomizing the
1929fa50a355SKonstantin Belousov 			 * allocation, or to cluster with an existing
1930fa50a355SKonstantin Belousov 			 * mapping.  Retry with free run.
1931fa50a355SKonstantin Belousov 			 */
1932fa50a355SKonstantin Belousov 			curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ?
1933fa50a355SKonstantin Belousov 			    vm_map_min(map) : min_addr;
1934fa50a355SKonstantin Belousov 			atomic_add_long(&aslr_restarts, 1);
1935fa50a355SKonstantin Belousov 		}
1936fa50a355SKonstantin Belousov 
1937fa50a355SKonstantin Belousov 		if (try == 1 && en_aslr && !cluster) {
1938fa50a355SKonstantin Belousov 			/*
1939fa50a355SKonstantin Belousov 			 * Find space for allocation, including
1940fa50a355SKonstantin Belousov 			 * gap needed for later randomization.
1941fa50a355SKonstantin Belousov 			 */
1942fa50a355SKonstantin Belousov 			pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
1943fa50a355SKonstantin Belousov 			    (find_space == VMFS_SUPER_SPACE || find_space ==
1944fa50a355SKonstantin Belousov 			    VMFS_OPTIMAL_SPACE) ? 1 : 0;
1945fa50a355SKonstantin Belousov 			gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
1946fa50a355SKonstantin Belousov 			    (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
1947fa50a355SKonstantin Belousov 			    aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
19489f701172SKonstantin Belousov 			*addr = vm_map_findspace(map, curr_min_addr,
19499f701172SKonstantin Belousov 			    length + gap * pagesizes[pidx]);
19509f701172SKonstantin Belousov 			if (*addr + length + gap * pagesizes[pidx] >
1951a5a02ef4SKonstantin Belousov 			    vm_map_max(map))
1952fa50a355SKonstantin Belousov 				goto again;
1953fa50a355SKonstantin Belousov 			/* And randomize the start address. */
1954fa50a355SKonstantin Belousov 			*addr += (arc4random() % gap) * pagesizes[pidx];
19555019dac9SKonstantin Belousov 			if (max_addr != 0 && *addr + length > max_addr)
19565019dac9SKonstantin Belousov 				goto again;
19579f701172SKonstantin Belousov 		} else {
19589f701172SKonstantin Belousov 			*addr = vm_map_findspace(map, curr_min_addr, length);
19599f701172SKonstantin Belousov 			if (*addr + length > vm_map_max(map) ||
1960edb572a3SJohn Baldwin 			    (max_addr != 0 && *addr + length > max_addr)) {
1961fa50a355SKonstantin Belousov 				if (cluster) {
1962fa50a355SKonstantin Belousov 					cluster = false;
1963fa50a355SKonstantin Belousov 					MPASS(try == 1);
1964fa50a355SKonstantin Belousov 					goto again;
1965fa50a355SKonstantin Belousov 				}
1966fec29688SAlan Cox 				rv = KERN_NO_SPACE;
1967fec29688SAlan Cox 				goto done;
1968fec29688SAlan Cox 			}
19699f701172SKonstantin Belousov 		}
1970fa50a355SKonstantin Belousov 
1971fec29688SAlan Cox 		if (find_space != VMFS_ANY_SPACE &&
1972fec29688SAlan Cox 		    (rv = vm_map_alignspace(map, object, offset, addr, length,
1973fec29688SAlan Cox 		    max_addr, alignment)) != KERN_SUCCESS) {
1974ff74a3faSJohn Baldwin 			if (find_space == VMFS_OPTIMAL_SPACE) {
1975ff74a3faSJohn Baldwin 				find_space = VMFS_ANY_SPACE;
1976fa50a355SKonstantin Belousov 				curr_min_addr = min_addr;
1977fa50a355SKonstantin Belousov 				cluster = update_anon;
1978fa50a355SKonstantin Belousov 				try = 0;
1979ff74a3faSJohn Baldwin 				goto again;
1980ff74a3faSJohn Baldwin 			}
1981fec29688SAlan Cox 			goto done;
1982df8bae1dSRodney W. Grimes 		}
1983ea7e7006SKonstantin Belousov 	} else if ((cow & MAP_REMAP) != 0) {
1984ea7e7006SKonstantin Belousov 		if (*addr < vm_map_min(map) ||
1985ea7e7006SKonstantin Belousov 		    *addr + length > vm_map_max(map) ||
1986ea7e7006SKonstantin Belousov 		    *addr + length <= length) {
1987ea7e7006SKonstantin Belousov 			rv = KERN_INVALID_ADDRESS;
1988ea7e7006SKonstantin Belousov 			goto done;
1989ea7e7006SKonstantin Belousov 		}
1990ea7e7006SKonstantin Belousov 		vm_map_delete(map, *addr, *addr + length);
1991df8bae1dSRodney W. Grimes 	}
19924648ba0aSKonstantin Belousov 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
1993fec29688SAlan Cox 		rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot,
1994fec29688SAlan Cox 		    max, cow);
19954648ba0aSKonstantin Belousov 	} else {
1996fec29688SAlan Cox 		rv = vm_map_insert(map, object, offset, *addr, *addr + length,
1997fec29688SAlan Cox 		    prot, max, cow);
19984648ba0aSKonstantin Belousov 	}
1999fa50a355SKonstantin Belousov 	if (rv == KERN_SUCCESS && update_anon)
2000fa50a355SKonstantin Belousov 		map->anon_loc = *addr + length;
2001fec29688SAlan Cox done:
2002df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2003fec29688SAlan Cox 	return (rv);
2004df8bae1dSRodney W. Grimes }
2005df8bae1dSRodney W. Grimes 
2006e8502826SKonstantin Belousov /*
2007e8502826SKonstantin Belousov  *	vm_map_find_min() is a variant of vm_map_find() that takes an
2008e8502826SKonstantin Belousov  *	additional parameter (min_addr) and treats the given address
2009e8502826SKonstantin Belousov  *	(*addr) differently.  Specifically, it treats *addr as a hint
2010e8502826SKonstantin Belousov  *	and not as the minimum address where the mapping is created.
2011e8502826SKonstantin Belousov  *
2012e8502826SKonstantin Belousov  *	This function works in two phases.  First, it tries to
2013e8502826SKonstantin Belousov  *	allocate above the hint.  If that fails and the hint is
2014e8502826SKonstantin Belousov  *	greater than min_addr, it performs a second pass, replacing
2015e8502826SKonstantin Belousov  *	the hint with min_addr as the minimum address for the
2016e8502826SKonstantin Belousov  *	allocation.
2017e8502826SKonstantin Belousov  */
20186a97a3f7SKonstantin Belousov int
20196a97a3f7SKonstantin Belousov vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
20206a97a3f7SKonstantin Belousov     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
20216a97a3f7SKonstantin Belousov     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
20226a97a3f7SKonstantin Belousov     int cow)
20236a97a3f7SKonstantin Belousov {
20246a97a3f7SKonstantin Belousov 	vm_offset_t hint;
20256a97a3f7SKonstantin Belousov 	int rv;
20266a97a3f7SKonstantin Belousov 
20276a97a3f7SKonstantin Belousov 	hint = *addr;
20286a97a3f7SKonstantin Belousov 	for (;;) {
20296a97a3f7SKonstantin Belousov 		rv = vm_map_find(map, object, offset, addr, length, max_addr,
20306a97a3f7SKonstantin Belousov 		    find_space, prot, max, cow);
20316a97a3f7SKonstantin Belousov 		if (rv == KERN_SUCCESS || min_addr >= hint)
20326a97a3f7SKonstantin Belousov 			return (rv);
20337683ad70SKonstantin Belousov 		*addr = hint = min_addr;
20346a97a3f7SKonstantin Belousov 	}
20356a97a3f7SKonstantin Belousov }
20366a97a3f7SKonstantin Belousov 
203792e78c10SAlan Cox /*
203892e78c10SAlan Cox  * A map entry with any of the following flags set must not be merged with
203992e78c10SAlan Cox  * another entry.
204092e78c10SAlan Cox  */
204192e78c10SAlan Cox #define	MAP_ENTRY_NOMERGE_MASK	(MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \
204278022527SKonstantin Belousov 	    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_VN_EXEC)
204392e78c10SAlan Cox 
204407424462SKonstantin Belousov static bool
204507424462SKonstantin Belousov vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry)
204607424462SKonstantin Belousov {
204707424462SKonstantin Belousov 
204892e78c10SAlan Cox 	KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 ||
204992e78c10SAlan Cox 	    (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0,
205092e78c10SAlan Cox 	    ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable",
205192e78c10SAlan Cox 	    prev, entry));
205207424462SKonstantin Belousov 	return (prev->end == entry->start &&
205307424462SKonstantin Belousov 	    prev->object.vm_object == entry->object.vm_object &&
205407424462SKonstantin Belousov 	    (prev->object.vm_object == NULL ||
205592e78c10SAlan Cox 	    prev->offset + (prev->end - prev->start) == entry->offset) &&
205607424462SKonstantin Belousov 	    prev->eflags == entry->eflags &&
205707424462SKonstantin Belousov 	    prev->protection == entry->protection &&
205807424462SKonstantin Belousov 	    prev->max_protection == entry->max_protection &&
205907424462SKonstantin Belousov 	    prev->inheritance == entry->inheritance &&
206007424462SKonstantin Belousov 	    prev->wired_count == entry->wired_count &&
206107424462SKonstantin Belousov 	    prev->cred == entry->cred);
206207424462SKonstantin Belousov }
206307424462SKonstantin Belousov 
206407424462SKonstantin Belousov static void
206507424462SKonstantin Belousov vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry)
206607424462SKonstantin Belousov {
206707424462SKonstantin Belousov 
206807424462SKonstantin Belousov 	/*
206992e78c10SAlan Cox 	 * If the backing object is a vnode object, vm_object_deallocate()
207092e78c10SAlan Cox 	 * calls vrele().  However, vrele() does not lock the vnode because
207192e78c10SAlan Cox 	 * the vnode has additional references.  Thus, the map lock can be
207292e78c10SAlan Cox 	 * kept without causing a lock-order reversal with the vnode lock.
207307424462SKonstantin Belousov 	 *
207492e78c10SAlan Cox 	 * Since we count the number of virtual page mappings in
207592e78c10SAlan Cox 	 * object->un_pager.vnp.writemappings, the writemappings value
207692e78c10SAlan Cox 	 * should not be adjusted when the entry is disposed of.
207707424462SKonstantin Belousov 	 */
207807424462SKonstantin Belousov 	if (entry->object.vm_object != NULL)
207907424462SKonstantin Belousov 		vm_object_deallocate(entry->object.vm_object);
208007424462SKonstantin Belousov 	if (entry->cred != NULL)
208107424462SKonstantin Belousov 		crfree(entry->cred);
208207424462SKonstantin Belousov 	vm_map_entry_dispose(map, entry);
208307424462SKonstantin Belousov }
208407424462SKonstantin Belousov 
2085df8bae1dSRodney W. Grimes /*
2086b7b2aac2SJohn Dyson  *	vm_map_simplify_entry:
208767bf6868SJohn Dyson  *
20884e71e795SMatthew Dillon  *	Simplify the given map entry by merging with either neighbor.  This
20894e71e795SMatthew Dillon  *	routine also has the ability to merge with both neighbors.
20904e71e795SMatthew Dillon  *
20914e71e795SMatthew Dillon  *	The map must be locked.
20924e71e795SMatthew Dillon  *
2093ba7c64d1SKonstantin Belousov  *	This routine guarantees that the passed entry remains valid (though
20944e71e795SMatthew Dillon  *	possibly extended).  When merging, this routine may delete one or
20954e71e795SMatthew Dillon  *	both neighbors.
2096df8bae1dSRodney W. Grimes  */
20970afcd3afSAlan Cox void
20981b40f8c0SMatthew Dillon vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
2099df8bae1dSRodney W. Grimes {
2100308c24baSJohn Dyson 	vm_map_entry_t next, prev;
2101df8bae1dSRodney W. Grimes 
210292e78c10SAlan Cox 	if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) != 0)
2103df8bae1dSRodney W. Grimes 		return;
2104308c24baSJohn Dyson 	prev = entry->prev;
21052203c46dSMark Johnston 	if (vm_map_mergeable_neighbors(prev, entry)) {
21069f701172SKonstantin Belousov 		vm_map_entry_unlink(map, prev, UNLINK_MERGE_NEXT);
210707424462SKonstantin Belousov 		vm_map_merged_neighbor_dispose(map, prev);
2108308c24baSJohn Dyson 	}
2109de5f6a77SJohn Dyson 	next = entry->next;
21102203c46dSMark Johnston 	if (vm_map_mergeable_neighbors(entry, next)) {
21119f701172SKonstantin Belousov 		vm_map_entry_unlink(map, next, UNLINK_MERGE_PREV);
211207424462SKonstantin Belousov 		vm_map_merged_neighbor_dispose(map, next);
2113df8bae1dSRodney W. Grimes 	}
2114df8bae1dSRodney W. Grimes }
211592e78c10SAlan Cox 
2116df8bae1dSRodney W. Grimes /*
2117af1d6d6aSDoug Moore  *	vm_map_entry_back:
2118af1d6d6aSDoug Moore  *
2119af1d6d6aSDoug Moore  *	Allocate an object to back a map entry.
2120af1d6d6aSDoug Moore  */
2121af1d6d6aSDoug Moore static inline void
2122af1d6d6aSDoug Moore vm_map_entry_back(vm_map_entry_t entry)
2123af1d6d6aSDoug Moore {
2124af1d6d6aSDoug Moore 	vm_object_t object;
2125af1d6d6aSDoug Moore 
2126af1d6d6aSDoug Moore 	KASSERT(entry->object.vm_object == NULL,
2127af1d6d6aSDoug Moore 	    ("map entry %p has backing object", entry));
2128af1d6d6aSDoug Moore 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
2129af1d6d6aSDoug Moore 	    ("map entry %p is a submap", entry));
2130af1d6d6aSDoug Moore 	object = vm_object_allocate(OBJT_DEFAULT,
2131af1d6d6aSDoug Moore 	    atop(entry->end - entry->start));
2132af1d6d6aSDoug Moore 	entry->object.vm_object = object;
2133af1d6d6aSDoug Moore 	entry->offset = 0;
2134af1d6d6aSDoug Moore 	if (entry->cred != NULL) {
2135af1d6d6aSDoug Moore 		object->cred = entry->cred;
2136af1d6d6aSDoug Moore 		object->charge = entry->end - entry->start;
2137af1d6d6aSDoug Moore 		entry->cred = NULL;
2138af1d6d6aSDoug Moore 	}
2139af1d6d6aSDoug Moore }
2140af1d6d6aSDoug Moore 
2141af1d6d6aSDoug Moore /*
2142af1d6d6aSDoug Moore  *	vm_map_entry_charge_object
2143af1d6d6aSDoug Moore  *
2144af1d6d6aSDoug Moore  *	If there is no object backing this entry, create one.  Otherwise, if
2145af1d6d6aSDoug Moore  *	the entry has cred, give it to the backing object.
2146af1d6d6aSDoug Moore  */
2147af1d6d6aSDoug Moore static inline void
2148af1d6d6aSDoug Moore vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
2149af1d6d6aSDoug Moore {
2150af1d6d6aSDoug Moore 
2151af1d6d6aSDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
2152af1d6d6aSDoug Moore 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
2153af1d6d6aSDoug Moore 	    ("map entry %p is a submap", entry));
2154af1d6d6aSDoug Moore 	if (entry->object.vm_object == NULL && !map->system_map &&
2155af1d6d6aSDoug Moore 	    (entry->eflags & MAP_ENTRY_GUARD) == 0)
2156af1d6d6aSDoug Moore 		vm_map_entry_back(entry);
2157af1d6d6aSDoug Moore 	else if (entry->object.vm_object != NULL &&
2158af1d6d6aSDoug Moore 	    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
2159af1d6d6aSDoug Moore 	    entry->cred != NULL) {
2160af1d6d6aSDoug Moore 		VM_OBJECT_WLOCK(entry->object.vm_object);
2161af1d6d6aSDoug Moore 		KASSERT(entry->object.vm_object->cred == NULL,
2162af1d6d6aSDoug Moore 		    ("OVERCOMMIT: %s: both cred e %p", __func__, entry));
2163af1d6d6aSDoug Moore 		entry->object.vm_object->cred = entry->cred;
2164af1d6d6aSDoug Moore 		entry->object.vm_object->charge = entry->end - entry->start;
2165af1d6d6aSDoug Moore 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
2166af1d6d6aSDoug Moore 		entry->cred = NULL;
2167af1d6d6aSDoug Moore 	}
2168af1d6d6aSDoug Moore }
2169af1d6d6aSDoug Moore 
2170af1d6d6aSDoug Moore /*
2171df8bae1dSRodney W. Grimes  *	vm_map_clip_start:	[ internal use only ]
2172df8bae1dSRodney W. Grimes  *
2173df8bae1dSRodney W. Grimes  *	Asserts that the given entry begins at or after
2174df8bae1dSRodney W. Grimes  *	the specified address; if necessary,
2175df8bae1dSRodney W. Grimes  *	it splits the entry into two.
2176df8bae1dSRodney W. Grimes  */
2177df8bae1dSRodney W. Grimes #define vm_map_clip_start(map, entry, startaddr) \
2178df8bae1dSRodney W. Grimes { \
2179df8bae1dSRodney W. Grimes 	if (startaddr > entry->start) \
2180df8bae1dSRodney W. Grimes 		_vm_map_clip_start(map, entry, startaddr); \
2181df8bae1dSRodney W. Grimes }
2182df8bae1dSRodney W. Grimes 
2183df8bae1dSRodney W. Grimes /*
2184df8bae1dSRodney W. Grimes  *	This routine is called only when it is known that
2185df8bae1dSRodney W. Grimes  *	the entry must be split.
2186df8bae1dSRodney W. Grimes  */
21870d94caffSDavid Greenman static void
21881b40f8c0SMatthew Dillon _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
2189df8bae1dSRodney W. Grimes {
2190c0877f10SJohn Dyson 	vm_map_entry_t new_entry;
2191df8bae1dSRodney W. Grimes 
21923a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
2193ed11e4d7SMark Johnston 	KASSERT(entry->end > start && entry->start < start,
2194ed11e4d7SMark Johnston 	    ("_vm_map_clip_start: invalid clip of entry %p", entry));
21953a0916b8SKonstantin Belousov 
2196df8bae1dSRodney W. Grimes 	/*
21974766eba1SDoug Moore 	 * Create a backing object now, if none exists, so that more individual
21984766eba1SDoug Moore 	 * objects won't be created after the map entry is split.
2199df8bae1dSRodney W. Grimes 	 */
2200af1d6d6aSDoug Moore 	vm_map_entry_charge_object(map, entry);
22014766eba1SDoug Moore 
22024766eba1SDoug Moore 	/* Clone the entry. */
2203df8bae1dSRodney W. Grimes 	new_entry = vm_map_entry_create(map);
2204df8bae1dSRodney W. Grimes 	*new_entry = *entry;
2205df8bae1dSRodney W. Grimes 
22064766eba1SDoug Moore 	/*
22074766eba1SDoug Moore 	 * Split off the front portion.  Insert the new entry BEFORE this one,
22084766eba1SDoug Moore 	 * so that this entry has the specified starting address.
22094766eba1SDoug Moore 	 */
2210df8bae1dSRodney W. Grimes 	new_entry->end = start;
2211df8bae1dSRodney W. Grimes 	entry->offset += (start - entry->start);
2212df8bae1dSRodney W. Grimes 	entry->start = start;
2213ef694c1aSEdward Tomasz Napierala 	if (new_entry->cred != NULL)
2214ef694c1aSEdward Tomasz Napierala 		crhold(entry->cred);
2215df8bae1dSRodney W. Grimes 
22169f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
2217df8bae1dSRodney W. Grimes 
22189fdfe602SMatthew Dillon 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2219df8bae1dSRodney W. Grimes 		vm_object_reference(new_entry->object.vm_object);
222078022527SKonstantin Belousov 		vm_map_entry_set_vnode_text(new_entry, true);
222184110e7eSKonstantin Belousov 		/*
222284110e7eSKonstantin Belousov 		 * The object->un_pager.vnp.writemappings for the
222384110e7eSKonstantin Belousov 		 * object of MAP_ENTRY_VN_WRITECNT type entry shall be
222484110e7eSKonstantin Belousov 		 * kept as is here.  The virtual pages are
222584110e7eSKonstantin Belousov 		 * re-distributed among the clipped entries, so the sum is
222684110e7eSKonstantin Belousov 		 * left the same.
222784110e7eSKonstantin Belousov 		 */
2228df8bae1dSRodney W. Grimes 	}
2229c0877f10SJohn Dyson }
2230df8bae1dSRodney W. Grimes 
2231df8bae1dSRodney W. Grimes /*
2232df8bae1dSRodney W. Grimes  *	vm_map_clip_end:	[ internal use only ]
2233df8bae1dSRodney W. Grimes  *
2234df8bae1dSRodney W. Grimes  *	Asserts that the given entry ends at or before
2235df8bae1dSRodney W. Grimes  *	the specified address; if necessary,
2236df8bae1dSRodney W. Grimes  *	it splits the entry into two.
2237df8bae1dSRodney W. Grimes  */
2238df8bae1dSRodney W. Grimes #define vm_map_clip_end(map, entry, endaddr) \
2239df8bae1dSRodney W. Grimes { \
2240af045176SPoul-Henning Kamp 	if ((endaddr) < (entry->end)) \
2241af045176SPoul-Henning Kamp 		_vm_map_clip_end((map), (entry), (endaddr)); \
2242df8bae1dSRodney W. Grimes }
2243df8bae1dSRodney W. Grimes 
2244df8bae1dSRodney W. Grimes /*
2245df8bae1dSRodney W. Grimes  *	This routine is called only when it is known that
2246df8bae1dSRodney W. Grimes  *	the entry must be split.
2247df8bae1dSRodney W. Grimes  */
22480d94caffSDavid Greenman static void
22491b40f8c0SMatthew Dillon _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
2250df8bae1dSRodney W. Grimes {
2251c0877f10SJohn Dyson 	vm_map_entry_t new_entry;
2252df8bae1dSRodney W. Grimes 
22533a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
2254ed11e4d7SMark Johnston 	KASSERT(entry->start < end && entry->end > end,
2255ed11e4d7SMark Johnston 	    ("_vm_map_clip_end: invalid clip of entry %p", entry));
22563a0916b8SKonstantin Belousov 
225711cccda1SJohn Dyson 	/*
22584766eba1SDoug Moore 	 * Create a backing object now, if none exists, so that more individual
22594766eba1SDoug Moore 	 * objects won't be created after the map entry is split.
2260df8bae1dSRodney W. Grimes 	 */
2261af1d6d6aSDoug Moore 	vm_map_entry_charge_object(map, entry);
22624766eba1SDoug Moore 
22634766eba1SDoug Moore 	/* Clone the entry. */
2264df8bae1dSRodney W. Grimes 	new_entry = vm_map_entry_create(map);
2265df8bae1dSRodney W. Grimes 	*new_entry = *entry;
2266df8bae1dSRodney W. Grimes 
22674766eba1SDoug Moore 	/*
22684766eba1SDoug Moore 	 * Split off the back portion.  Insert the new entry AFTER this one,
22694766eba1SDoug Moore 	 * so that this entry has the specified ending address.
22704766eba1SDoug Moore 	 */
2271df8bae1dSRodney W. Grimes 	new_entry->start = entry->end = end;
2272df8bae1dSRodney W. Grimes 	new_entry->offset += (end - entry->start);
2273ef694c1aSEdward Tomasz Napierala 	if (new_entry->cred != NULL)
2274ef694c1aSEdward Tomasz Napierala 		crhold(entry->cred);
2275df8bae1dSRodney W. Grimes 
22769f701172SKonstantin Belousov 	vm_map_entry_link(map, new_entry);
2277df8bae1dSRodney W. Grimes 
22789fdfe602SMatthew Dillon 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2279df8bae1dSRodney W. Grimes 		vm_object_reference(new_entry->object.vm_object);
228078022527SKonstantin Belousov 		vm_map_entry_set_vnode_text(new_entry, true);
2281df8bae1dSRodney W. Grimes 	}
2282c0877f10SJohn Dyson }
2283df8bae1dSRodney W. Grimes 
2284df8bae1dSRodney W. Grimes /*
2285df8bae1dSRodney W. Grimes  *	vm_map_submap:		[ kernel use only ]
2286df8bae1dSRodney W. Grimes  *
2287df8bae1dSRodney W. Grimes  *	Mark the given range as handled by a subordinate map.
2288df8bae1dSRodney W. Grimes  *
2289df8bae1dSRodney W. Grimes  *	This range must have been created with vm_map_find,
2290df8bae1dSRodney W. Grimes  *	and no other operations may have been performed on this
2291df8bae1dSRodney W. Grimes  *	range prior to calling vm_map_submap.
2292df8bae1dSRodney W. Grimes  *
2293df8bae1dSRodney W. Grimes  *	Only a limited number of operations can be performed
2294df8bae1dSRodney W. Grimes  *	within this rage after calling vm_map_submap:
2295df8bae1dSRodney W. Grimes  *		vm_fault
2296df8bae1dSRodney W. Grimes  *	[Don't try vm_map_copy!]
2297df8bae1dSRodney W. Grimes  *
2298df8bae1dSRodney W. Grimes  *	To remove a submapping, one must first remove the
2299df8bae1dSRodney W. Grimes  *	range from the superior map, and then destroy the
2300df8bae1dSRodney W. Grimes  *	submap (if desired).  [Better yet, don't try it.]
2301df8bae1dSRodney W. Grimes  */
2302df8bae1dSRodney W. Grimes int
23031b40f8c0SMatthew Dillon vm_map_submap(
23041b40f8c0SMatthew Dillon 	vm_map_t map,
23051b40f8c0SMatthew Dillon 	vm_offset_t start,
23061b40f8c0SMatthew Dillon 	vm_offset_t end,
23071b40f8c0SMatthew Dillon 	vm_map_t submap)
2308df8bae1dSRodney W. Grimes {
2309df8bae1dSRodney W. Grimes 	vm_map_entry_t entry;
2310fa50a355SKonstantin Belousov 	int result;
2311fa50a355SKonstantin Belousov 
2312fa50a355SKonstantin Belousov 	result = KERN_INVALID_ARGUMENT;
2313fa50a355SKonstantin Belousov 
2314fa50a355SKonstantin Belousov 	vm_map_lock(submap);
2315fa50a355SKonstantin Belousov 	submap->flags |= MAP_IS_SUB_MAP;
2316fa50a355SKonstantin Belousov 	vm_map_unlock(submap);
2317df8bae1dSRodney W. Grimes 
2318df8bae1dSRodney W. Grimes 	vm_map_lock(map);
2319df8bae1dSRodney W. Grimes 
2320df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
2321df8bae1dSRodney W. Grimes 
2322d1d3f7e1SDoug Moore 	if (vm_map_lookup_entry(map, start, &entry)) {
2323df8bae1dSRodney W. Grimes 		vm_map_clip_start(map, entry, start);
2324d1d3f7e1SDoug Moore 	} else
2325d1d3f7e1SDoug Moore 		entry = entry->next;
2326df8bae1dSRodney W. Grimes 
2327df8bae1dSRodney W. Grimes 	vm_map_clip_end(map, entry, end);
2328df8bae1dSRodney W. Grimes 
2329df8bae1dSRodney W. Grimes 	if ((entry->start == start) && (entry->end == end) &&
23309fdfe602SMatthew Dillon 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
2331afa07f7eSJohn Dyson 	    (entry->object.vm_object == NULL)) {
23322d8acc0fSJohn Dyson 		entry->object.sub_map = submap;
2333afa07f7eSJohn Dyson 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
2334df8bae1dSRodney W. Grimes 		result = KERN_SUCCESS;
2335df8bae1dSRodney W. Grimes 	}
2336df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2337df8bae1dSRodney W. Grimes 
2338fa50a355SKonstantin Belousov 	if (result != KERN_SUCCESS) {
2339fa50a355SKonstantin Belousov 		vm_map_lock(submap);
2340fa50a355SKonstantin Belousov 		submap->flags &= ~MAP_IS_SUB_MAP;
2341fa50a355SKonstantin Belousov 		vm_map_unlock(submap);
2342fa50a355SKonstantin Belousov 	}
2343df8bae1dSRodney W. Grimes 	return (result);
2344df8bae1dSRodney W. Grimes }
2345df8bae1dSRodney W. Grimes 
2346df8bae1dSRodney W. Grimes /*
2347dd05fa19SAlan Cox  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
23481f78f902SAlan Cox  */
23491f78f902SAlan Cox #define	MAX_INIT_PT	96
23501f78f902SAlan Cox 
23511f78f902SAlan Cox /*
23520551c08dSAlan Cox  *	vm_map_pmap_enter:
23530551c08dSAlan Cox  *
2354dd05fa19SAlan Cox  *	Preload the specified map's pmap with mappings to the specified
2355dd05fa19SAlan Cox  *	object's memory-resident pages.  No further physical pages are
2356dd05fa19SAlan Cox  *	allocated, and no further virtual pages are retrieved from secondary
2357dd05fa19SAlan Cox  *	storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
2358dd05fa19SAlan Cox  *	limited number of page mappings are created at the low-end of the
2359dd05fa19SAlan Cox  *	specified address range.  (For this purpose, a superpage mapping
2360dd05fa19SAlan Cox  *	counts as one page mapping.)  Otherwise, all resident pages within
23613453bca8SAlan Cox  *	the specified address range are mapped.
23620551c08dSAlan Cox  */
2363077ec27cSAlan Cox static void
23644da4d293SAlan Cox vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
23650551c08dSAlan Cox     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
23660551c08dSAlan Cox {
23678fece8c3SAlan Cox 	vm_offset_t start;
2368ce142d9eSAlan Cox 	vm_page_t p, p_start;
2369dd05fa19SAlan Cox 	vm_pindex_t mask, psize, threshold, tmpidx;
23700551c08dSAlan Cox 
2371ba8bca61SAlan Cox 	if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
23721f78f902SAlan Cox 		return;
23739af6d512SAttilio Rao 	VM_OBJECT_RLOCK(object);
23749af6d512SAttilio Rao 	if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
23759af6d512SAttilio Rao 		VM_OBJECT_RUNLOCK(object);
237689f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
237701381811SJohn Baldwin 		if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
23789af6d512SAttilio Rao 			pmap_object_init_pt(map->pmap, addr, object, pindex,
23799af6d512SAttilio Rao 			    size);
23809af6d512SAttilio Rao 			VM_OBJECT_WUNLOCK(object);
23819af6d512SAttilio Rao 			return;
23829af6d512SAttilio Rao 		}
23839af6d512SAttilio Rao 		VM_OBJECT_LOCK_DOWNGRADE(object);
23841f78f902SAlan Cox 	}
23851f78f902SAlan Cox 
23861f78f902SAlan Cox 	psize = atop(size);
23871f78f902SAlan Cox 	if (psize + pindex > object->size) {
23889af6d512SAttilio Rao 		if (object->size < pindex) {
23899af6d512SAttilio Rao 			VM_OBJECT_RUNLOCK(object);
23909af6d512SAttilio Rao 			return;
23919af6d512SAttilio Rao 		}
23921f78f902SAlan Cox 		psize = object->size - pindex;
23931f78f902SAlan Cox 	}
23941f78f902SAlan Cox 
2395ce142d9eSAlan Cox 	start = 0;
2396ce142d9eSAlan Cox 	p_start = NULL;
2397dd05fa19SAlan Cox 	threshold = MAX_INIT_PT;
23981f78f902SAlan Cox 
2399b382c10aSKonstantin Belousov 	p = vm_page_find_least(object, pindex);
24001f78f902SAlan Cox 	/*
24011f78f902SAlan Cox 	 * Assert: the variable p is either (1) the page with the
24021f78f902SAlan Cox 	 * least pindex greater than or equal to the parameter pindex
24031f78f902SAlan Cox 	 * or (2) NULL.
24041f78f902SAlan Cox 	 */
24051f78f902SAlan Cox 	for (;
24061f78f902SAlan Cox 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
24071f78f902SAlan Cox 	     p = TAILQ_NEXT(p, listq)) {
24081f78f902SAlan Cox 		/*
24091f78f902SAlan Cox 		 * don't allow an madvise to blow away our really
24101f78f902SAlan Cox 		 * free pages allocating pv entries.
24111f78f902SAlan Cox 		 */
2412dd05fa19SAlan Cox 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
2413e2068d0bSJeff Roberson 		    vm_page_count_severe()) ||
2414dd05fa19SAlan Cox 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
2415dd05fa19SAlan Cox 		    tmpidx >= threshold)) {
2416379fb642SAlan Cox 			psize = tmpidx;
24171f78f902SAlan Cox 			break;
24181f78f902SAlan Cox 		}
24190a2e596aSAlan Cox 		if (p->valid == VM_PAGE_BITS_ALL) {
2420ce142d9eSAlan Cox 			if (p_start == NULL) {
2421ce142d9eSAlan Cox 				start = addr + ptoa(tmpidx);
2422ce142d9eSAlan Cox 				p_start = p;
2423ce142d9eSAlan Cox 			}
2424dd05fa19SAlan Cox 			/* Jump ahead if a superpage mapping is possible. */
2425dd05fa19SAlan Cox 			if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
2426dd05fa19SAlan Cox 			    (pagesizes[p->psind] - 1)) == 0) {
2427dd05fa19SAlan Cox 				mask = atop(pagesizes[p->psind]) - 1;
2428dd05fa19SAlan Cox 				if (tmpidx + mask < psize &&
242988302601SAlan Cox 				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
2430dd05fa19SAlan Cox 					p += mask;
2431dd05fa19SAlan Cox 					threshold += mask;
2432dd05fa19SAlan Cox 				}
2433dd05fa19SAlan Cox 			}
24347bfda801SAlan Cox 		} else if (p_start != NULL) {
2435cf4682aeSAlan Cox 			pmap_enter_object(map->pmap, start, addr +
2436cf4682aeSAlan Cox 			    ptoa(tmpidx), p_start, prot);
2437cf4682aeSAlan Cox 			p_start = NULL;
2438cf4682aeSAlan Cox 		}
2439cf4682aeSAlan Cox 	}
2440c46b90e9SAlan Cox 	if (p_start != NULL)
2441379fb642SAlan Cox 		pmap_enter_object(map->pmap, start, addr + ptoa(psize),
2442379fb642SAlan Cox 		    p_start, prot);
24439af6d512SAttilio Rao 	VM_OBJECT_RUNLOCK(object);
24440551c08dSAlan Cox }
24450551c08dSAlan Cox 
24460551c08dSAlan Cox /*
2447df8bae1dSRodney W. Grimes  *	vm_map_protect:
2448df8bae1dSRodney W. Grimes  *
2449df8bae1dSRodney W. Grimes  *	Sets the protection of the specified address
2450df8bae1dSRodney W. Grimes  *	region in the target map.  If "set_max" is
2451df8bae1dSRodney W. Grimes  *	specified, the maximum protection is to be set;
2452df8bae1dSRodney W. Grimes  *	otherwise, only the current protection is affected.
2453df8bae1dSRodney W. Grimes  */
2454df8bae1dSRodney W. Grimes int
2455b9dcd593SBruce Evans vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
2456b9dcd593SBruce Evans 	       vm_prot_t new_prot, boolean_t set_max)
2457df8bae1dSRodney W. Grimes {
245819f5d9f2SKonstantin Belousov 	vm_map_entry_t current, entry, in_tran;
24593364c323SKonstantin Belousov 	vm_object_t obj;
2460ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
2461210a6886SKonstantin Belousov 	vm_prot_t old_prot;
2462a72dce34SDoug Moore 	int rv;
2463df8bae1dSRodney W. Grimes 
246479e9451fSKonstantin Belousov 	if (start == end)
246579e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
246679e9451fSKonstantin Belousov 
246719f5d9f2SKonstantin Belousov again:
246819f5d9f2SKonstantin Belousov 	in_tran = NULL;
2469df8bae1dSRodney W. Grimes 	vm_map_lock(map);
2470df8bae1dSRodney W. Grimes 
2471e1cb9d37SMark Johnston 	/*
2472e1cb9d37SMark Johnston 	 * Ensure that we are not concurrently wiring pages.  vm_map_wire() may
2473e1cb9d37SMark Johnston 	 * need to fault pages into the map and will drop the map lock while
2474e1cb9d37SMark Johnston 	 * doing so, and the VM object may end up in an inconsistent state if we
2475e1cb9d37SMark Johnston 	 * update the protection on the map entry in between faults.
2476e1cb9d37SMark Johnston 	 */
2477e1cb9d37SMark Johnston 	vm_map_wait_busy(map);
2478e1cb9d37SMark Johnston 
2479df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
2480df8bae1dSRodney W. Grimes 
2481d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &entry))
2482d1d3f7e1SDoug Moore 		entry = entry->next;
2483df8bae1dSRodney W. Grimes 
2484df8bae1dSRodney W. Grimes 	/*
24850d94caffSDavid Greenman 	 * Make a first pass to check for protection violations.
2486df8bae1dSRodney W. Grimes 	 */
24871c5196c3SKonstantin Belousov 	for (current = entry; current->start < end; current = current->next) {
24888a89ca94SKonstantin Belousov 		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
24898a89ca94SKonstantin Belousov 			continue;
2490afa07f7eSJohn Dyson 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
2491a1f6d91cSDavid Greenman 			vm_map_unlock(map);
2492df8bae1dSRodney W. Grimes 			return (KERN_INVALID_ARGUMENT);
2493a1f6d91cSDavid Greenman 		}
2494df8bae1dSRodney W. Grimes 		if ((new_prot & current->max_protection) != new_prot) {
2495df8bae1dSRodney W. Grimes 			vm_map_unlock(map);
2496df8bae1dSRodney W. Grimes 			return (KERN_PROTECTION_FAILURE);
2497df8bae1dSRodney W. Grimes 		}
249819f5d9f2SKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0)
249919f5d9f2SKonstantin Belousov 			in_tran = entry;
250019f5d9f2SKonstantin Belousov 	}
250119f5d9f2SKonstantin Belousov 
250219f5d9f2SKonstantin Belousov 	/*
250319f5d9f2SKonstantin Belousov 	 * Postpone the operation until all in transition map entries
250419f5d9f2SKonstantin Belousov 	 * are stabilized.  In-transition entry might already have its
250519f5d9f2SKonstantin Belousov 	 * pages wired and wired_count incremented, but
250619f5d9f2SKonstantin Belousov 	 * MAP_ENTRY_USER_WIRED flag not yet set, and visible to other
250719f5d9f2SKonstantin Belousov 	 * threads because the map lock is dropped.  In this case we
250819f5d9f2SKonstantin Belousov 	 * would miss our call to vm_fault_copy_entry().
250919f5d9f2SKonstantin Belousov 	 */
251019f5d9f2SKonstantin Belousov 	if (in_tran != NULL) {
251119f5d9f2SKonstantin Belousov 		in_tran->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
251219f5d9f2SKonstantin Belousov 		vm_map_unlock_and_wait(map, 0);
251319f5d9f2SKonstantin Belousov 		goto again;
2514df8bae1dSRodney W. Grimes 	}
2515df8bae1dSRodney W. Grimes 
25163364c323SKonstantin Belousov 	/*
2517a72dce34SDoug Moore 	 * Before changing the protections, try to reserve swap space for any
2518a72dce34SDoug Moore 	 * private (i.e., copy-on-write) mappings that are transitioning from
2519a72dce34SDoug Moore 	 * read-only to read/write access.  If a reservation fails, break out
2520a72dce34SDoug Moore 	 * of this loop early and let the next loop simplify the entries, since
2521a72dce34SDoug Moore 	 * some may now be mergeable.
25223364c323SKonstantin Belousov 	 */
2523a72dce34SDoug Moore 	rv = KERN_SUCCESS;
252418cd8bb8SDoug Moore 	vm_map_clip_start(map, entry, start);
25251c5196c3SKonstantin Belousov 	for (current = entry; current->start < end; current = current->next) {
25263364c323SKonstantin Belousov 
25273364c323SKonstantin Belousov 		vm_map_clip_end(map, current, end);
25283364c323SKonstantin Belousov 
25293364c323SKonstantin Belousov 		if (set_max ||
25303364c323SKonstantin Belousov 		    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
253119bd0d9cSKonstantin Belousov 		    ENTRY_CHARGED(current) ||
253219bd0d9cSKonstantin Belousov 		    (current->eflags & MAP_ENTRY_GUARD) != 0) {
25333364c323SKonstantin Belousov 			continue;
25343364c323SKonstantin Belousov 		}
25353364c323SKonstantin Belousov 
2536ef694c1aSEdward Tomasz Napierala 		cred = curthread->td_ucred;
25373364c323SKonstantin Belousov 		obj = current->object.vm_object;
25383364c323SKonstantin Belousov 
25393364c323SKonstantin Belousov 		if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
25403364c323SKonstantin Belousov 			if (!swap_reserve(current->end - current->start)) {
2541a72dce34SDoug Moore 				rv = KERN_RESOURCE_SHORTAGE;
2542a72dce34SDoug Moore 				end = current->end;
2543a72dce34SDoug Moore 				break;
25443364c323SKonstantin Belousov 			}
2545ef694c1aSEdward Tomasz Napierala 			crhold(cred);
2546ef694c1aSEdward Tomasz Napierala 			current->cred = cred;
25473364c323SKonstantin Belousov 			continue;
25483364c323SKonstantin Belousov 		}
25493364c323SKonstantin Belousov 
255089f6b863SAttilio Rao 		VM_OBJECT_WLOCK(obj);
25513364c323SKonstantin Belousov 		if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
255289f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(obj);
25533364c323SKonstantin Belousov 			continue;
25543364c323SKonstantin Belousov 		}
25553364c323SKonstantin Belousov 
25563364c323SKonstantin Belousov 		/*
25573364c323SKonstantin Belousov 		 * Charge for the whole object allocation now, since
25583364c323SKonstantin Belousov 		 * we cannot distinguish between non-charged and
25593364c323SKonstantin Belousov 		 * charged clipped mapping of the same object later.
25603364c323SKonstantin Belousov 		 */
25613364c323SKonstantin Belousov 		KASSERT(obj->charge == 0,
25623d95614fSKonstantin Belousov 		    ("vm_map_protect: object %p overcharged (entry %p)",
25633d95614fSKonstantin Belousov 		    obj, current));
25643364c323SKonstantin Belousov 		if (!swap_reserve(ptoa(obj->size))) {
256589f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(obj);
2566a72dce34SDoug Moore 			rv = KERN_RESOURCE_SHORTAGE;
2567a72dce34SDoug Moore 			end = current->end;
2568a72dce34SDoug Moore 			break;
25693364c323SKonstantin Belousov 		}
25703364c323SKonstantin Belousov 
2571ef694c1aSEdward Tomasz Napierala 		crhold(cred);
2572ef694c1aSEdward Tomasz Napierala 		obj->cred = cred;
25733364c323SKonstantin Belousov 		obj->charge = ptoa(obj->size);
257489f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(obj);
25753364c323SKonstantin Belousov 	}
25763364c323SKonstantin Belousov 
2577df8bae1dSRodney W. Grimes 	/*
2578a72dce34SDoug Moore 	 * If enough swap space was available, go back and fix up protections.
2579a72dce34SDoug Moore 	 * Otherwise, just simplify entries, since some may have been modified.
2580a72dce34SDoug Moore 	 * [Note that clipping is not necessary the second time.]
2581df8bae1dSRodney W. Grimes 	 */
2582a72dce34SDoug Moore 	for (current = entry; current->start < end;
2583a72dce34SDoug Moore 	    vm_map_simplify_entry(map, current), current = current->next) {
2584a72dce34SDoug Moore 		if (rv != KERN_SUCCESS ||
2585a72dce34SDoug Moore 		    (current->eflags & MAP_ENTRY_GUARD) != 0)
258619bd0d9cSKonstantin Belousov 			continue;
258719bd0d9cSKonstantin Belousov 
2588df8bae1dSRodney W. Grimes 		old_prot = current->protection;
2589210a6886SKonstantin Belousov 
2590df8bae1dSRodney W. Grimes 		if (set_max)
2591df8bae1dSRodney W. Grimes 			current->protection =
2592df8bae1dSRodney W. Grimes 			    (current->max_protection = new_prot) &
2593df8bae1dSRodney W. Grimes 			    old_prot;
2594df8bae1dSRodney W. Grimes 		else
2595df8bae1dSRodney W. Grimes 			current->protection = new_prot;
2596df8bae1dSRodney W. Grimes 
2597dd006a1bSAlan Cox 		/*
2598dd006a1bSAlan Cox 		 * For user wired map entries, the normal lazy evaluation of
2599dd006a1bSAlan Cox 		 * write access upgrades through soft page faults is
2600dd006a1bSAlan Cox 		 * undesirable.  Instead, immediately copy any pages that are
2601dd006a1bSAlan Cox 		 * copy-on-write and enable write access in the physical map.
2602dd006a1bSAlan Cox 		 */
2603dd006a1bSAlan Cox 		if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
2604210a6886SKonstantin Belousov 		    (current->protection & VM_PROT_WRITE) != 0 &&
26055930251aSKonstantin Belousov 		    (old_prot & VM_PROT_WRITE) == 0)
2606210a6886SKonstantin Belousov 			vm_fault_copy_entry(map, map, current, current, NULL);
2607210a6886SKonstantin Belousov 
2608df8bae1dSRodney W. Grimes 		/*
26092fafce9eSAlan Cox 		 * When restricting access, update the physical map.  Worry
26102fafce9eSAlan Cox 		 * about copy-on-write here.
2611df8bae1dSRodney W. Grimes 		 */
26122fafce9eSAlan Cox 		if ((old_prot & ~current->protection) != 0) {
2613afa07f7eSJohn Dyson #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
2614df8bae1dSRodney W. Grimes 							VM_PROT_ALL)
2615df8bae1dSRodney W. Grimes 			pmap_protect(map->pmap, current->start,
2616df8bae1dSRodney W. Grimes 			    current->end,
26171c85e3dfSAlan Cox 			    current->protection & MASK(current));
2618df8bae1dSRodney W. Grimes #undef	MASK
2619df8bae1dSRodney W. Grimes 		}
2620df8bae1dSRodney W. Grimes 	}
2621df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2622a72dce34SDoug Moore 	return (rv);
2623df8bae1dSRodney W. Grimes }
2624df8bae1dSRodney W. Grimes 
2625df8bae1dSRodney W. Grimes /*
2626867a482dSJohn Dyson  *	vm_map_madvise:
2627867a482dSJohn Dyson  *
2628867a482dSJohn Dyson  *	This routine traverses a processes map handling the madvise
2629f7fc307aSAlan Cox  *	system call.  Advisories are classified as either those effecting
2630f7fc307aSAlan Cox  *	the vm_map_entry structure, or those effecting the underlying
2631f7fc307aSAlan Cox  *	objects.
2632867a482dSJohn Dyson  */
2633b4309055SMatthew Dillon int
26341b40f8c0SMatthew Dillon vm_map_madvise(
26351b40f8c0SMatthew Dillon 	vm_map_t map,
26361b40f8c0SMatthew Dillon 	vm_offset_t start,
26371b40f8c0SMatthew Dillon 	vm_offset_t end,
26381b40f8c0SMatthew Dillon 	int behav)
2639867a482dSJohn Dyson {
2640f7fc307aSAlan Cox 	vm_map_entry_t current, entry;
26413e7cb27cSAlan Cox 	bool modify_map;
2642867a482dSJohn Dyson 
2643b4309055SMatthew Dillon 	/*
2644b4309055SMatthew Dillon 	 * Some madvise calls directly modify the vm_map_entry, in which case
2645b4309055SMatthew Dillon 	 * we need to use an exclusive lock on the map and we need to perform
2646b4309055SMatthew Dillon 	 * various clipping operations.  Otherwise we only need a read-lock
2647b4309055SMatthew Dillon 	 * on the map.
2648b4309055SMatthew Dillon 	 */
2649b4309055SMatthew Dillon 	switch(behav) {
2650b4309055SMatthew Dillon 	case MADV_NORMAL:
2651b4309055SMatthew Dillon 	case MADV_SEQUENTIAL:
2652b4309055SMatthew Dillon 	case MADV_RANDOM:
26534f79d873SMatthew Dillon 	case MADV_NOSYNC:
26544f79d873SMatthew Dillon 	case MADV_AUTOSYNC:
26559730a5daSPaul Saab 	case MADV_NOCORE:
26569730a5daSPaul Saab 	case MADV_CORE:
265779e9451fSKonstantin Belousov 		if (start == end)
26583e7cb27cSAlan Cox 			return (0);
26593e7cb27cSAlan Cox 		modify_map = true;
2660867a482dSJohn Dyson 		vm_map_lock(map);
2661b4309055SMatthew Dillon 		break;
2662b4309055SMatthew Dillon 	case MADV_WILLNEED:
2663b4309055SMatthew Dillon 	case MADV_DONTNEED:
2664b4309055SMatthew Dillon 	case MADV_FREE:
266579e9451fSKonstantin Belousov 		if (start == end)
26663e7cb27cSAlan Cox 			return (0);
26673e7cb27cSAlan Cox 		modify_map = false;
2668f7fc307aSAlan Cox 		vm_map_lock_read(map);
2669b4309055SMatthew Dillon 		break;
2670b4309055SMatthew Dillon 	default:
26713e7cb27cSAlan Cox 		return (EINVAL);
2672b4309055SMatthew Dillon 	}
2673b4309055SMatthew Dillon 
2674b4309055SMatthew Dillon 	/*
2675b4309055SMatthew Dillon 	 * Locate starting entry and clip if necessary.
2676b4309055SMatthew Dillon 	 */
2677867a482dSJohn Dyson 	VM_MAP_RANGE_CHECK(map, start, end);
2678867a482dSJohn Dyson 
2679d1d3f7e1SDoug Moore 	if (vm_map_lookup_entry(map, start, &entry)) {
2680f7fc307aSAlan Cox 		if (modify_map)
2681867a482dSJohn Dyson 			vm_map_clip_start(map, entry, start);
2682d1d3f7e1SDoug Moore 	} else {
2683d1d3f7e1SDoug Moore 		entry = entry->next;
2684b4309055SMatthew Dillon 	}
2685867a482dSJohn Dyson 
2686f7fc307aSAlan Cox 	if (modify_map) {
2687f7fc307aSAlan Cox 		/*
2688f7fc307aSAlan Cox 		 * madvise behaviors that are implemented in the vm_map_entry.
2689f7fc307aSAlan Cox 		 *
2690f7fc307aSAlan Cox 		 * We clip the vm_map_entry so that behavioral changes are
2691f7fc307aSAlan Cox 		 * limited to the specified address range.
2692f7fc307aSAlan Cox 		 */
26931c5196c3SKonstantin Belousov 		for (current = entry; current->start < end;
26941c5196c3SKonstantin Belousov 		    current = current->next) {
2695f7fc307aSAlan Cox 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2696867a482dSJohn Dyson 				continue;
2697fed9a903SJohn Dyson 
269847221757SJohn Dyson 			vm_map_clip_end(map, current, end);
2699fed9a903SJohn Dyson 
2700f7fc307aSAlan Cox 			switch (behav) {
2701867a482dSJohn Dyson 			case MADV_NORMAL:
27027f866e4bSAlan Cox 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
2703867a482dSJohn Dyson 				break;
2704867a482dSJohn Dyson 			case MADV_SEQUENTIAL:
27057f866e4bSAlan Cox 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
2706867a482dSJohn Dyson 				break;
2707867a482dSJohn Dyson 			case MADV_RANDOM:
27087f866e4bSAlan Cox 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
2709867a482dSJohn Dyson 				break;
27104f79d873SMatthew Dillon 			case MADV_NOSYNC:
27114f79d873SMatthew Dillon 				current->eflags |= MAP_ENTRY_NOSYNC;
27124f79d873SMatthew Dillon 				break;
27134f79d873SMatthew Dillon 			case MADV_AUTOSYNC:
27144f79d873SMatthew Dillon 				current->eflags &= ~MAP_ENTRY_NOSYNC;
27154f79d873SMatthew Dillon 				break;
27169730a5daSPaul Saab 			case MADV_NOCORE:
27179730a5daSPaul Saab 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
27189730a5daSPaul Saab 				break;
27199730a5daSPaul Saab 			case MADV_CORE:
27209730a5daSPaul Saab 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
27219730a5daSPaul Saab 				break;
2722867a482dSJohn Dyson 			default:
2723867a482dSJohn Dyson 				break;
2724867a482dSJohn Dyson 			}
2725f7fc307aSAlan Cox 			vm_map_simplify_entry(map, current);
2726867a482dSJohn Dyson 		}
2727867a482dSJohn Dyson 		vm_map_unlock(map);
2728b4309055SMatthew Dillon 	} else {
272992a59946SJohn Baldwin 		vm_pindex_t pstart, pend;
2730f7fc307aSAlan Cox 
2731f7fc307aSAlan Cox 		/*
2732f7fc307aSAlan Cox 		 * madvise behaviors that are implemented in the underlying
2733f7fc307aSAlan Cox 		 * vm_object.
2734f7fc307aSAlan Cox 		 *
2735f7fc307aSAlan Cox 		 * Since we don't clip the vm_map_entry, we have to clip
2736f7fc307aSAlan Cox 		 * the vm_object pindex and count.
2737f7fc307aSAlan Cox 		 */
27381c5196c3SKonstantin Belousov 		for (current = entry; current->start < end;
27391c5196c3SKonstantin Belousov 		    current = current->next) {
274051321f7cSAlan Cox 			vm_offset_t useEnd, useStart;
27415f99b57cSMatthew Dillon 
2742f7fc307aSAlan Cox 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2743f7fc307aSAlan Cox 				continue;
2744f7fc307aSAlan Cox 
274592a59946SJohn Baldwin 			pstart = OFF_TO_IDX(current->offset);
274692a59946SJohn Baldwin 			pend = pstart + atop(current->end - current->start);
27475f99b57cSMatthew Dillon 			useStart = current->start;
274851321f7cSAlan Cox 			useEnd = current->end;
2749f7fc307aSAlan Cox 
2750f7fc307aSAlan Cox 			if (current->start < start) {
275192a59946SJohn Baldwin 				pstart += atop(start - current->start);
27525f99b57cSMatthew Dillon 				useStart = start;
2753f7fc307aSAlan Cox 			}
275451321f7cSAlan Cox 			if (current->end > end) {
275592a59946SJohn Baldwin 				pend -= atop(current->end - end);
275651321f7cSAlan Cox 				useEnd = end;
275751321f7cSAlan Cox 			}
2758f7fc307aSAlan Cox 
275992a59946SJohn Baldwin 			if (pstart >= pend)
2760f7fc307aSAlan Cox 				continue;
2761f7fc307aSAlan Cox 
276251321f7cSAlan Cox 			/*
276351321f7cSAlan Cox 			 * Perform the pmap_advise() before clearing
276451321f7cSAlan Cox 			 * PGA_REFERENCED in vm_page_advise().  Otherwise, a
276551321f7cSAlan Cox 			 * concurrent pmap operation, such as pmap_remove(),
276651321f7cSAlan Cox 			 * could clear a reference in the pmap and set
276751321f7cSAlan Cox 			 * PGA_REFERENCED on the page before the pmap_advise()
276851321f7cSAlan Cox 			 * had completed.  Consequently, the page would appear
276951321f7cSAlan Cox 			 * referenced based upon an old reference that
277051321f7cSAlan Cox 			 * occurred before this pmap_advise() ran.
277151321f7cSAlan Cox 			 */
277251321f7cSAlan Cox 			if (behav == MADV_DONTNEED || behav == MADV_FREE)
277351321f7cSAlan Cox 				pmap_advise(map->pmap, useStart, useEnd,
277451321f7cSAlan Cox 				    behav);
277551321f7cSAlan Cox 
277692a59946SJohn Baldwin 			vm_object_madvise(current->object.vm_object, pstart,
277792a59946SJohn Baldwin 			    pend, behav);
277854432196SKonstantin Belousov 
277954432196SKonstantin Belousov 			/*
278054432196SKonstantin Belousov 			 * Pre-populate paging structures in the
278154432196SKonstantin Belousov 			 * WILLNEED case.  For wired entries, the
278254432196SKonstantin Belousov 			 * paging structures are already populated.
278354432196SKonstantin Belousov 			 */
278454432196SKonstantin Belousov 			if (behav == MADV_WILLNEED &&
278554432196SKonstantin Belousov 			    current->wired_count == 0) {
27860551c08dSAlan Cox 				vm_map_pmap_enter(map,
27875f99b57cSMatthew Dillon 				    useStart,
27884da4d293SAlan Cox 				    current->protection,
2789f7fc307aSAlan Cox 				    current->object.vm_object,
279092a59946SJohn Baldwin 				    pstart,
279192a59946SJohn Baldwin 				    ptoa(pend - pstart),
2792e3026983SMatthew Dillon 				    MAP_PREFAULT_MADVISE
2793b4309055SMatthew Dillon 				);
2794f7fc307aSAlan Cox 			}
2795f7fc307aSAlan Cox 		}
2796f7fc307aSAlan Cox 		vm_map_unlock_read(map);
2797f7fc307aSAlan Cox 	}
2798b4309055SMatthew Dillon 	return (0);
2799867a482dSJohn Dyson }
2800867a482dSJohn Dyson 
2801867a482dSJohn Dyson 
2802867a482dSJohn Dyson /*
2803df8bae1dSRodney W. Grimes  *	vm_map_inherit:
2804df8bae1dSRodney W. Grimes  *
2805df8bae1dSRodney W. Grimes  *	Sets the inheritance of the specified address
2806df8bae1dSRodney W. Grimes  *	range in the target map.  Inheritance
2807df8bae1dSRodney W. Grimes  *	affects how the map will be shared with
2808e2abaaaaSAlan Cox  *	child maps at the time of vmspace_fork.
2809df8bae1dSRodney W. Grimes  */
2810df8bae1dSRodney W. Grimes int
2811b9dcd593SBruce Evans vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
2812b9dcd593SBruce Evans 	       vm_inherit_t new_inheritance)
2813df8bae1dSRodney W. Grimes {
2814c0877f10SJohn Dyson 	vm_map_entry_t entry;
2815d1d3f7e1SDoug Moore 	vm_map_entry_t temp_entry;
2816df8bae1dSRodney W. Grimes 
2817df8bae1dSRodney W. Grimes 	switch (new_inheritance) {
2818df8bae1dSRodney W. Grimes 	case VM_INHERIT_NONE:
2819df8bae1dSRodney W. Grimes 	case VM_INHERIT_COPY:
2820df8bae1dSRodney W. Grimes 	case VM_INHERIT_SHARE:
282178d7964bSXin LI 	case VM_INHERIT_ZERO:
2822df8bae1dSRodney W. Grimes 		break;
2823df8bae1dSRodney W. Grimes 	default:
2824df8bae1dSRodney W. Grimes 		return (KERN_INVALID_ARGUMENT);
2825df8bae1dSRodney W. Grimes 	}
282679e9451fSKonstantin Belousov 	if (start == end)
282779e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
2828df8bae1dSRodney W. Grimes 	vm_map_lock(map);
2829df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
2830d1d3f7e1SDoug Moore 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
2831d1d3f7e1SDoug Moore 		entry = temp_entry;
2832df8bae1dSRodney W. Grimes 		vm_map_clip_start(map, entry, start);
2833d1d3f7e1SDoug Moore 	} else
2834d1d3f7e1SDoug Moore 		entry = temp_entry->next;
28351c5196c3SKonstantin Belousov 	while (entry->start < end) {
2836df8bae1dSRodney W. Grimes 		vm_map_clip_end(map, entry, end);
283719bd0d9cSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
283819bd0d9cSKonstantin Belousov 		    new_inheritance != VM_INHERIT_ZERO)
2839df8bae1dSRodney W. Grimes 			entry->inheritance = new_inheritance;
284044428f62SAlan Cox 		vm_map_simplify_entry(map, entry);
2841df8bae1dSRodney W. Grimes 		entry = entry->next;
2842df8bae1dSRodney W. Grimes 	}
2843df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
2844df8bae1dSRodney W. Grimes 	return (KERN_SUCCESS);
2845df8bae1dSRodney W. Grimes }
2846df8bae1dSRodney W. Grimes 
2847df8bae1dSRodney W. Grimes /*
2848312df2c1SDoug Moore  *	vm_map_entry_in_transition:
2849312df2c1SDoug Moore  *
2850312df2c1SDoug Moore  *	Release the map lock, and sleep until the entry is no longer in
2851312df2c1SDoug Moore  *	transition.  Awake and acquire the map lock.  If the map changed while
2852312df2c1SDoug Moore  *	another held the lock, lookup a possibly-changed entry at or after the
2853312df2c1SDoug Moore  *	'start' position of the old entry.
2854312df2c1SDoug Moore  */
2855312df2c1SDoug Moore static vm_map_entry_t
2856312df2c1SDoug Moore vm_map_entry_in_transition(vm_map_t map, vm_offset_t in_start,
2857312df2c1SDoug Moore     vm_offset_t *io_end, bool holes_ok, vm_map_entry_t in_entry)
2858312df2c1SDoug Moore {
2859312df2c1SDoug Moore 	vm_map_entry_t entry;
2860312df2c1SDoug Moore 	vm_offset_t start;
2861312df2c1SDoug Moore 	u_int last_timestamp;
2862312df2c1SDoug Moore 
2863312df2c1SDoug Moore 	VM_MAP_ASSERT_LOCKED(map);
2864312df2c1SDoug Moore 	KASSERT((in_entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
2865312df2c1SDoug Moore 	    ("not in-tranition map entry %p", in_entry));
2866312df2c1SDoug Moore 	/*
2867312df2c1SDoug Moore 	 * We have not yet clipped the entry.
2868312df2c1SDoug Moore 	 */
2869312df2c1SDoug Moore 	start = MAX(in_start, in_entry->start);
2870312df2c1SDoug Moore 	in_entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2871312df2c1SDoug Moore 	last_timestamp = map->timestamp;
2872312df2c1SDoug Moore 	if (vm_map_unlock_and_wait(map, 0)) {
2873312df2c1SDoug Moore 		/*
2874312df2c1SDoug Moore 		 * Allow interruption of user wiring/unwiring?
2875312df2c1SDoug Moore 		 */
2876312df2c1SDoug Moore 	}
2877312df2c1SDoug Moore 	vm_map_lock(map);
2878312df2c1SDoug Moore 	if (last_timestamp + 1 == map->timestamp)
2879312df2c1SDoug Moore 		return (in_entry);
2880312df2c1SDoug Moore 
2881312df2c1SDoug Moore 	/*
2882312df2c1SDoug Moore 	 * Look again for the entry because the map was modified while it was
2883312df2c1SDoug Moore 	 * unlocked.  Specifically, the entry may have been clipped, merged, or
2884312df2c1SDoug Moore 	 * deleted.
2885312df2c1SDoug Moore 	 */
2886312df2c1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &entry)) {
2887312df2c1SDoug Moore 		if (!holes_ok) {
2888312df2c1SDoug Moore 			*io_end = start;
2889312df2c1SDoug Moore 			return (NULL);
2890312df2c1SDoug Moore 		}
2891312df2c1SDoug Moore 		entry = entry->next;
2892312df2c1SDoug Moore 	}
2893312df2c1SDoug Moore 	return (entry);
2894312df2c1SDoug Moore }
2895312df2c1SDoug Moore 
2896312df2c1SDoug Moore /*
2897acd9a301SAlan Cox  *	vm_map_unwire:
2898acd9a301SAlan Cox  *
2899e27e17b7SAlan Cox  *	Implements both kernel and user unwiring.
2900acd9a301SAlan Cox  */
2901acd9a301SAlan Cox int
2902acd9a301SAlan Cox vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2903abd498aaSBruce M Simpson     int flags)
2904acd9a301SAlan Cox {
2905312df2c1SDoug Moore 	vm_map_entry_t entry, first_entry;
2906acd9a301SAlan Cox 	int rv;
2907312df2c1SDoug Moore 	bool first_iteration, holes_ok, need_wakeup, user_unwire;
2908acd9a301SAlan Cox 
290979e9451fSKonstantin Belousov 	if (start == end)
291079e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
29119a0cdf94SDoug Moore 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
29129a0cdf94SDoug Moore 	user_unwire = (flags & VM_MAP_WIRE_USER) != 0;
2913acd9a301SAlan Cox 	vm_map_lock(map);
2914acd9a301SAlan Cox 	VM_MAP_RANGE_CHECK(map, start, end);
2915d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
29169a0cdf94SDoug Moore 		if (holes_ok)
2917d1d3f7e1SDoug Moore 			first_entry = first_entry->next;
2918d1d3f7e1SDoug Moore 		else {
2919acd9a301SAlan Cox 			vm_map_unlock(map);
2920acd9a301SAlan Cox 			return (KERN_INVALID_ADDRESS);
2921acd9a301SAlan Cox 		}
2922abd498aaSBruce M Simpson 	}
2923312df2c1SDoug Moore 	first_iteration = true;
2924acd9a301SAlan Cox 	entry = first_entry;
2925d2860f22SDoug Moore 	rv = KERN_SUCCESS;
29261c5196c3SKonstantin Belousov 	while (entry->start < end) {
2927acd9a301SAlan Cox 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2928acd9a301SAlan Cox 			/*
2929acd9a301SAlan Cox 			 * We have not yet clipped the entry.
2930acd9a301SAlan Cox 			 */
2931312df2c1SDoug Moore 			entry = vm_map_entry_in_transition(map, start, &end,
2932312df2c1SDoug Moore 			    holes_ok, entry);
2933312df2c1SDoug Moore 			if (entry == NULL) {
2934312df2c1SDoug Moore 				if (first_iteration) {
2935acd9a301SAlan Cox 					vm_map_unlock(map);
2936acd9a301SAlan Cox 					return (KERN_INVALID_ADDRESS);
2937acd9a301SAlan Cox 				}
2938acd9a301SAlan Cox 				rv = KERN_INVALID_ADDRESS;
2939d2860f22SDoug Moore 				break;
2940acd9a301SAlan Cox 			}
2941312df2c1SDoug Moore 			first_entry = first_iteration ? entry : NULL;
2942acd9a301SAlan Cox 			continue;
2943acd9a301SAlan Cox 		}
2944312df2c1SDoug Moore 		first_iteration = false;
2945acd9a301SAlan Cox 		vm_map_clip_start(map, entry, start);
2946acd9a301SAlan Cox 		vm_map_clip_end(map, entry, end);
2947acd9a301SAlan Cox 		/*
2948acd9a301SAlan Cox 		 * Mark the entry in case the map lock is released.  (See
2949acd9a301SAlan Cox 		 * above.)
2950acd9a301SAlan Cox 		 */
2951ff3ae454SKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
2952ff3ae454SKonstantin Belousov 		    entry->wiring_thread == NULL,
2953ff3ae454SKonstantin Belousov 		    ("owned map entry %p", entry));
2954acd9a301SAlan Cox 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
29550acea7dfSKonstantin Belousov 		entry->wiring_thread = curthread;
2956acd9a301SAlan Cox 		/*
2957acd9a301SAlan Cox 		 * Check the map for holes in the specified region.
29589a0cdf94SDoug Moore 		 * If holes_ok, skip this check.
2959acd9a301SAlan Cox 		 */
29609a0cdf94SDoug Moore 		if (!holes_ok &&
29611c5196c3SKonstantin Belousov 		    (entry->end < end && entry->next->start > entry->end)) {
2962acd9a301SAlan Cox 			end = entry->end;
2963acd9a301SAlan Cox 			rv = KERN_INVALID_ADDRESS;
2964d2860f22SDoug Moore 			break;
2965acd9a301SAlan Cox 		}
2966acd9a301SAlan Cox 		/*
29673ffbc0cdSAlan Cox 		 * If system unwiring, require that the entry is system wired.
2968acd9a301SAlan Cox 		 */
29690ada205eSBrian Feldman 		if (!user_unwire &&
29700ada205eSBrian Feldman 		    vm_map_entry_system_wired_count(entry) == 0) {
2971acd9a301SAlan Cox 			end = entry->end;
2972acd9a301SAlan Cox 			rv = KERN_INVALID_ARGUMENT;
2973d2860f22SDoug Moore 			break;
2974acd9a301SAlan Cox 		}
2975acd9a301SAlan Cox 		entry = entry->next;
2976acd9a301SAlan Cox 	}
29779a0cdf94SDoug Moore 	need_wakeup = false;
29789a0cdf94SDoug Moore 	if (first_entry == NULL &&
29799a0cdf94SDoug Moore 	    !vm_map_lookup_entry(map, start, &first_entry)) {
29809a0cdf94SDoug Moore 		KASSERT(holes_ok, ("vm_map_unwire: lookup failed"));
2981d1d3f7e1SDoug Moore 		first_entry = first_entry->next;
2982acd9a301SAlan Cox 	}
29831c5196c3SKonstantin Belousov 	for (entry = first_entry; entry->start < end; entry = entry->next) {
29840acea7dfSKonstantin Belousov 		/*
29859a0cdf94SDoug Moore 		 * If holes_ok was specified, an empty
29860acea7dfSKonstantin Belousov 		 * space in the unwired region could have been mapped
29870acea7dfSKonstantin Belousov 		 * while the map lock was dropped for draining
29880acea7dfSKonstantin Belousov 		 * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
29890acea7dfSKonstantin Belousov 		 * could be simultaneously wiring this new mapping
29900acea7dfSKonstantin Belousov 		 * entry.  Detect these cases and skip any entries
29910acea7dfSKonstantin Belousov 		 * marked as in transition by us.
29920acea7dfSKonstantin Belousov 		 */
29930acea7dfSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
29940acea7dfSKonstantin Belousov 		    entry->wiring_thread != curthread) {
29959a0cdf94SDoug Moore 			KASSERT(holes_ok,
29960acea7dfSKonstantin Belousov 			    ("vm_map_unwire: !HOLESOK and new/changed entry"));
29970acea7dfSKonstantin Belousov 			continue;
29980acea7dfSKonstantin Belousov 		}
29990acea7dfSKonstantin Belousov 
30003ffbc0cdSAlan Cox 		if (rv == KERN_SUCCESS && (!user_unwire ||
30013ffbc0cdSAlan Cox 		    (entry->eflags & MAP_ENTRY_USER_WIRED))) {
300203462509SAlan Cox 			if (entry->wired_count == 1)
300303462509SAlan Cox 				vm_map_entry_unwire(map, entry);
300403462509SAlan Cox 			else
3005b2f3846aSAlan Cox 				entry->wired_count--;
300654a3a114SMark Johnston 			if (user_unwire)
300754a3a114SMark Johnston 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
3008b2f3846aSAlan Cox 		}
30090acea7dfSKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
3010ff3ae454SKonstantin Belousov 		    ("vm_map_unwire: in-transition flag missing %p", entry));
3011ff3ae454SKonstantin Belousov 		KASSERT(entry->wiring_thread == curthread,
3012ff3ae454SKonstantin Belousov 		    ("vm_map_unwire: alien wire %p", entry));
3013acd9a301SAlan Cox 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
30140acea7dfSKonstantin Belousov 		entry->wiring_thread = NULL;
3015acd9a301SAlan Cox 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
3016acd9a301SAlan Cox 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
30179a0cdf94SDoug Moore 			need_wakeup = true;
3018acd9a301SAlan Cox 		}
3019acd9a301SAlan Cox 		vm_map_simplify_entry(map, entry);
3020acd9a301SAlan Cox 	}
3021acd9a301SAlan Cox 	vm_map_unlock(map);
3022acd9a301SAlan Cox 	if (need_wakeup)
3023acd9a301SAlan Cox 		vm_map_wakeup(map);
3024acd9a301SAlan Cox 	return (rv);
3025acd9a301SAlan Cox }
3026acd9a301SAlan Cox 
302754a3a114SMark Johnston static void
302854a3a114SMark Johnston vm_map_wire_user_count_sub(u_long npages)
302954a3a114SMark Johnston {
303054a3a114SMark Johnston 
303154a3a114SMark Johnston 	atomic_subtract_long(&vm_user_wire_count, npages);
303254a3a114SMark Johnston }
303354a3a114SMark Johnston 
303454a3a114SMark Johnston static bool
303554a3a114SMark Johnston vm_map_wire_user_count_add(u_long npages)
303654a3a114SMark Johnston {
303754a3a114SMark Johnston 	u_long wired;
303854a3a114SMark Johnston 
303954a3a114SMark Johnston 	wired = vm_user_wire_count;
304054a3a114SMark Johnston 	do {
304154a3a114SMark Johnston 		if (npages + wired > vm_page_max_user_wired)
304254a3a114SMark Johnston 			return (false);
304354a3a114SMark Johnston 	} while (!atomic_fcmpset_long(&vm_user_wire_count, &wired,
304454a3a114SMark Johnston 	    npages + wired));
304554a3a114SMark Johnston 
304654a3a114SMark Johnston 	return (true);
304754a3a114SMark Johnston }
304854a3a114SMark Johnston 
3049acd9a301SAlan Cox /*
305066cd575bSAlan Cox  *	vm_map_wire_entry_failure:
305166cd575bSAlan Cox  *
305266cd575bSAlan Cox  *	Handle a wiring failure on the given entry.
305366cd575bSAlan Cox  *
305466cd575bSAlan Cox  *	The map should be locked.
305566cd575bSAlan Cox  */
305666cd575bSAlan Cox static void
305766cd575bSAlan Cox vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
305866cd575bSAlan Cox     vm_offset_t failed_addr)
305966cd575bSAlan Cox {
306066cd575bSAlan Cox 
306166cd575bSAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
306266cd575bSAlan Cox 	KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
306366cd575bSAlan Cox 	    entry->wired_count == 1,
306466cd575bSAlan Cox 	    ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
306566cd575bSAlan Cox 	KASSERT(failed_addr < entry->end,
306666cd575bSAlan Cox 	    ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
306766cd575bSAlan Cox 
306866cd575bSAlan Cox 	/*
306966cd575bSAlan Cox 	 * If any pages at the start of this entry were successfully wired,
307066cd575bSAlan Cox 	 * then unwire them.
307166cd575bSAlan Cox 	 */
307266cd575bSAlan Cox 	if (failed_addr > entry->start) {
307366cd575bSAlan Cox 		pmap_unwire(map->pmap, entry->start, failed_addr);
307466cd575bSAlan Cox 		vm_object_unwire(entry->object.vm_object, entry->offset,
307566cd575bSAlan Cox 		    failed_addr - entry->start, PQ_ACTIVE);
307666cd575bSAlan Cox 	}
307766cd575bSAlan Cox 
307866cd575bSAlan Cox 	/*
307966cd575bSAlan Cox 	 * Assign an out-of-range value to represent the failure to wire this
308066cd575bSAlan Cox 	 * entry.
308166cd575bSAlan Cox 	 */
308266cd575bSAlan Cox 	entry->wired_count = -1;
308366cd575bSAlan Cox }
308466cd575bSAlan Cox 
308554a3a114SMark Johnston int
308654a3a114SMark Johnston vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
308754a3a114SMark Johnston {
308854a3a114SMark Johnston 	int rv;
308954a3a114SMark Johnston 
309054a3a114SMark Johnston 	vm_map_lock(map);
309154a3a114SMark Johnston 	rv = vm_map_wire_locked(map, start, end, flags);
309254a3a114SMark Johnston 	vm_map_unlock(map);
309354a3a114SMark Johnston 	return (rv);
309454a3a114SMark Johnston }
309554a3a114SMark Johnston 
309654a3a114SMark Johnston 
309766cd575bSAlan Cox /*
309854a3a114SMark Johnston  *	vm_map_wire_locked:
3099e27e17b7SAlan Cox  *
310054a3a114SMark Johnston  *	Implements both kernel and user wiring.  Returns with the map locked,
310154a3a114SMark Johnston  *	the map lock may be dropped.
3102e27e17b7SAlan Cox  */
3103e27e17b7SAlan Cox int
310454a3a114SMark Johnston vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
3105e27e17b7SAlan Cox {
310612d7cc84SAlan Cox 	vm_map_entry_t entry, first_entry, tmp_entry;
310766cd575bSAlan Cox 	vm_offset_t faddr, saved_end, saved_start;
310854a3a114SMark Johnston 	u_long npages;
310954a3a114SMark Johnston 	u_int last_timestamp;
311012d7cc84SAlan Cox 	int rv;
3111312df2c1SDoug Moore 	bool first_iteration, holes_ok, need_wakeup, user_wire;
3112e4cd31ddSJeff Roberson 	vm_prot_t prot;
3113e27e17b7SAlan Cox 
311454a3a114SMark Johnston 	VM_MAP_ASSERT_LOCKED(map);
311554a3a114SMark Johnston 
311679e9451fSKonstantin Belousov 	if (start == end)
311779e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
3118e4cd31ddSJeff Roberson 	prot = 0;
3119e4cd31ddSJeff Roberson 	if (flags & VM_MAP_WIRE_WRITE)
3120e4cd31ddSJeff Roberson 		prot |= VM_PROT_WRITE;
31219a0cdf94SDoug Moore 	holes_ok = (flags & VM_MAP_WIRE_HOLESOK) != 0;
31229a0cdf94SDoug Moore 	user_wire = (flags & VM_MAP_WIRE_USER) != 0;
312312d7cc84SAlan Cox 	VM_MAP_RANGE_CHECK(map, start, end);
3124d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
31259a0cdf94SDoug Moore 		if (holes_ok)
3126d1d3f7e1SDoug Moore 			first_entry = first_entry->next;
3127d1d3f7e1SDoug Moore 		else
312812d7cc84SAlan Cox 			return (KERN_INVALID_ADDRESS);
312912d7cc84SAlan Cox 	}
3130312df2c1SDoug Moore 	first_iteration = true;
313112d7cc84SAlan Cox 	entry = first_entry;
31321c5196c3SKonstantin Belousov 	while (entry->start < end) {
313312d7cc84SAlan Cox 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
313412d7cc84SAlan Cox 			/*
313512d7cc84SAlan Cox 			 * We have not yet clipped the entry.
313612d7cc84SAlan Cox 			 */
3137312df2c1SDoug Moore 			entry = vm_map_entry_in_transition(map, start, &end,
3138312df2c1SDoug Moore 			    holes_ok, entry);
3139312df2c1SDoug Moore 			if (entry == NULL) {
3140312df2c1SDoug Moore 				if (first_iteration)
314112d7cc84SAlan Cox 					return (KERN_INVALID_ADDRESS);
314212d7cc84SAlan Cox 				rv = KERN_INVALID_ADDRESS;
314312d7cc84SAlan Cox 				goto done;
314412d7cc84SAlan Cox 			}
3145312df2c1SDoug Moore 			first_entry = first_iteration ? entry : NULL;
314612d7cc84SAlan Cox 			continue;
314712d7cc84SAlan Cox 		}
3148312df2c1SDoug Moore 		first_iteration = false;
314912d7cc84SAlan Cox 		vm_map_clip_start(map, entry, start);
315012d7cc84SAlan Cox 		vm_map_clip_end(map, entry, end);
315112d7cc84SAlan Cox 		/*
315212d7cc84SAlan Cox 		 * Mark the entry in case the map lock is released.  (See
315312d7cc84SAlan Cox 		 * above.)
315412d7cc84SAlan Cox 		 */
3155ff3ae454SKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
3156ff3ae454SKonstantin Belousov 		    entry->wiring_thread == NULL,
3157ff3ae454SKonstantin Belousov 		    ("owned map entry %p", entry));
315812d7cc84SAlan Cox 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
31590acea7dfSKonstantin Belousov 		entry->wiring_thread = curthread;
3160e4cd31ddSJeff Roberson 		if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
3161e4cd31ddSJeff Roberson 		    || (entry->protection & prot) != prot) {
3162529ab57bSKonstantin Belousov 			entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
31639a0cdf94SDoug Moore 			if (!holes_ok) {
31646d7e8091SKonstantin Belousov 				end = entry->end;
31656d7e8091SKonstantin Belousov 				rv = KERN_INVALID_ADDRESS;
31666d7e8091SKonstantin Belousov 				goto done;
31676d7e8091SKonstantin Belousov 			}
316838e220e8SDoug Moore 		} else if (entry->wired_count == 0) {
31690ada205eSBrian Feldman 			entry->wired_count++;
317054a3a114SMark Johnston 
317154a3a114SMark Johnston 			npages = atop(entry->end - entry->start);
317254a3a114SMark Johnston 			if (user_wire && !vm_map_wire_user_count_add(npages)) {
317354a3a114SMark Johnston 				vm_map_wire_entry_failure(map, entry,
317454a3a114SMark Johnston 				    entry->start);
317554a3a114SMark Johnston 				end = entry->end;
317654a3a114SMark Johnston 				rv = KERN_RESOURCE_SHORTAGE;
317754a3a114SMark Johnston 				goto done;
317854a3a114SMark Johnston 			}
317966cd575bSAlan Cox 
318012d7cc84SAlan Cox 			/*
318112d7cc84SAlan Cox 			 * Release the map lock, relying on the in-transition
3182a5db445dSMax Laier 			 * mark.  Mark the map busy for fork.
318312d7cc84SAlan Cox 			 */
318454a3a114SMark Johnston 			saved_start = entry->start;
318554a3a114SMark Johnston 			saved_end = entry->end;
3186312df2c1SDoug Moore 			last_timestamp = map->timestamp;
3187a5db445dSMax Laier 			vm_map_busy(map);
318812d7cc84SAlan Cox 			vm_map_unlock(map);
318966cd575bSAlan Cox 
31900b695684SAlan Cox 			faddr = saved_start;
31910b695684SAlan Cox 			do {
319266cd575bSAlan Cox 				/*
319366cd575bSAlan Cox 				 * Simulate a fault to get the page and enter
319466cd575bSAlan Cox 				 * it into the physical map.
319566cd575bSAlan Cox 				 */
319666cd575bSAlan Cox 				if ((rv = vm_fault(map, faddr, VM_PROT_NONE,
31976a875bf9SKonstantin Belousov 				    VM_FAULT_WIRE)) != KERN_SUCCESS)
319866cd575bSAlan Cox 					break;
31990b695684SAlan Cox 			} while ((faddr += PAGE_SIZE) < saved_end);
320012d7cc84SAlan Cox 			vm_map_lock(map);
3201a5db445dSMax Laier 			vm_map_unbusy(map);
320212d7cc84SAlan Cox 			if (last_timestamp + 1 != map->timestamp) {
320312d7cc84SAlan Cox 				/*
320412d7cc84SAlan Cox 				 * Look again for the entry because the map was
320512d7cc84SAlan Cox 				 * modified while it was unlocked.  The entry
320612d7cc84SAlan Cox 				 * may have been clipped, but NOT merged or
320712d7cc84SAlan Cox 				 * deleted.
320812d7cc84SAlan Cox 				 */
32099a0cdf94SDoug Moore 				if (!vm_map_lookup_entry(map, saved_start,
32109a0cdf94SDoug Moore 				    &tmp_entry))
32119a0cdf94SDoug Moore 					KASSERT(false,
32129a0cdf94SDoug Moore 					    ("vm_map_wire: lookup failed"));
321312d7cc84SAlan Cox 				if (entry == first_entry)
321412d7cc84SAlan Cox 					first_entry = tmp_entry;
321512d7cc84SAlan Cox 				else
321612d7cc84SAlan Cox 					first_entry = NULL;
321712d7cc84SAlan Cox 				entry = tmp_entry;
321828c58286SAlan Cox 				while (entry->end < saved_end) {
321966cd575bSAlan Cox 					/*
322066cd575bSAlan Cox 					 * In case of failure, handle entries
322166cd575bSAlan Cox 					 * that were not fully wired here;
322266cd575bSAlan Cox 					 * fully wired entries are handled
322366cd575bSAlan Cox 					 * later.
322466cd575bSAlan Cox 					 */
322566cd575bSAlan Cox 					if (rv != KERN_SUCCESS &&
322666cd575bSAlan Cox 					    faddr < entry->end)
322766cd575bSAlan Cox 						vm_map_wire_entry_failure(map,
322866cd575bSAlan Cox 						    entry, faddr);
322912d7cc84SAlan Cox 					entry = entry->next;
323012d7cc84SAlan Cox 				}
323128c58286SAlan Cox 			}
323212d7cc84SAlan Cox 			if (rv != KERN_SUCCESS) {
323366cd575bSAlan Cox 				vm_map_wire_entry_failure(map, entry, faddr);
323454a3a114SMark Johnston 				if (user_wire)
323554a3a114SMark Johnston 					vm_map_wire_user_count_sub(npages);
323612d7cc84SAlan Cox 				end = entry->end;
323712d7cc84SAlan Cox 				goto done;
323812d7cc84SAlan Cox 			}
32390ada205eSBrian Feldman 		} else if (!user_wire ||
32400ada205eSBrian Feldman 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
32410ada205eSBrian Feldman 			entry->wired_count++;
324212d7cc84SAlan Cox 		}
324312d7cc84SAlan Cox 		/*
324412d7cc84SAlan Cox 		 * Check the map for holes in the specified region.
32459a0cdf94SDoug Moore 		 * If holes_ok was specified, skip this check.
324612d7cc84SAlan Cox 		 */
32479a0cdf94SDoug Moore 		if (!holes_ok &&
32481c5196c3SKonstantin Belousov 		    entry->end < end && entry->next->start > entry->end) {
324912d7cc84SAlan Cox 			end = entry->end;
325012d7cc84SAlan Cox 			rv = KERN_INVALID_ADDRESS;
325112d7cc84SAlan Cox 			goto done;
325212d7cc84SAlan Cox 		}
325312d7cc84SAlan Cox 		entry = entry->next;
325412d7cc84SAlan Cox 	}
325512d7cc84SAlan Cox 	rv = KERN_SUCCESS;
325612d7cc84SAlan Cox done:
32579a0cdf94SDoug Moore 	need_wakeup = false;
32589a0cdf94SDoug Moore 	if (first_entry == NULL &&
32599a0cdf94SDoug Moore 	    !vm_map_lookup_entry(map, start, &first_entry)) {
32609a0cdf94SDoug Moore 		KASSERT(holes_ok, ("vm_map_wire: lookup failed"));
3261d1d3f7e1SDoug Moore 		first_entry = first_entry->next;
326212d7cc84SAlan Cox 	}
32631c5196c3SKonstantin Belousov 	for (entry = first_entry; entry->start < end; entry = entry->next) {
32640acea7dfSKonstantin Belousov 		/*
32659a0cdf94SDoug Moore 		 * If holes_ok was specified, an empty
32660acea7dfSKonstantin Belousov 		 * space in the unwired region could have been mapped
32670acea7dfSKonstantin Belousov 		 * while the map lock was dropped for faulting in the
32680acea7dfSKonstantin Belousov 		 * pages or draining MAP_ENTRY_IN_TRANSITION.
32690acea7dfSKonstantin Belousov 		 * Moreover, another thread could be simultaneously
32700acea7dfSKonstantin Belousov 		 * wiring this new mapping entry.  Detect these cases
3271546bb2d7SKonstantin Belousov 		 * and skip any entries marked as in transition not by us.
32720acea7dfSKonstantin Belousov 		 */
32730acea7dfSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
32740acea7dfSKonstantin Belousov 		    entry->wiring_thread != curthread) {
32759a0cdf94SDoug Moore 			KASSERT(holes_ok,
32760acea7dfSKonstantin Belousov 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
32770acea7dfSKonstantin Belousov 			continue;
32780acea7dfSKonstantin Belousov 		}
32790acea7dfSKonstantin Belousov 
3280b71f9b0dSDoug Moore 		if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) {
3281b71f9b0dSDoug Moore 			/* do nothing */
3282b71f9b0dSDoug Moore 		} else if (rv == KERN_SUCCESS) {
328312d7cc84SAlan Cox 			if (user_wire)
328412d7cc84SAlan Cox 				entry->eflags |= MAP_ENTRY_USER_WIRED;
328528c58286SAlan Cox 		} else if (entry->wired_count == -1) {
328628c58286SAlan Cox 			/*
328728c58286SAlan Cox 			 * Wiring failed on this entry.  Thus, unwiring is
328828c58286SAlan Cox 			 * unnecessary.
328928c58286SAlan Cox 			 */
329028c58286SAlan Cox 			entry->wired_count = 0;
329103462509SAlan Cox 		} else if (!user_wire ||
329203462509SAlan Cox 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
329366cd575bSAlan Cox 			/*
329466cd575bSAlan Cox 			 * Undo the wiring.  Wiring succeeded on this entry
329566cd575bSAlan Cox 			 * but failed on a later entry.
329666cd575bSAlan Cox 			 */
329754a3a114SMark Johnston 			if (entry->wired_count == 1) {
329803462509SAlan Cox 				vm_map_entry_unwire(map, entry);
329954a3a114SMark Johnston 				if (user_wire)
330054a3a114SMark Johnston 					vm_map_wire_user_count_sub(
330154a3a114SMark Johnston 					    atop(entry->end - entry->start));
330254a3a114SMark Johnston 			} else
330312d7cc84SAlan Cox 				entry->wired_count--;
330412d7cc84SAlan Cox 		}
33050acea7dfSKonstantin Belousov 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
33060acea7dfSKonstantin Belousov 		    ("vm_map_wire: in-transition flag missing %p", entry));
33070acea7dfSKonstantin Belousov 		KASSERT(entry->wiring_thread == curthread,
33080acea7dfSKonstantin Belousov 		    ("vm_map_wire: alien wire %p", entry));
33090acea7dfSKonstantin Belousov 		entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
33100acea7dfSKonstantin Belousov 		    MAP_ENTRY_WIRE_SKIPPED);
33110acea7dfSKonstantin Belousov 		entry->wiring_thread = NULL;
331212d7cc84SAlan Cox 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
331312d7cc84SAlan Cox 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
33149a0cdf94SDoug Moore 			need_wakeup = true;
331512d7cc84SAlan Cox 		}
331612d7cc84SAlan Cox 		vm_map_simplify_entry(map, entry);
331712d7cc84SAlan Cox 	}
331812d7cc84SAlan Cox 	if (need_wakeup)
331912d7cc84SAlan Cox 		vm_map_wakeup(map);
332012d7cc84SAlan Cox 	return (rv);
3321e27e17b7SAlan Cox }
3322e27e17b7SAlan Cox 
3323e27e17b7SAlan Cox /*
3324950f8459SAlan Cox  * vm_map_sync
3325df8bae1dSRodney W. Grimes  *
3326df8bae1dSRodney W. Grimes  * Push any dirty cached pages in the address range to their pager.
3327df8bae1dSRodney W. Grimes  * If syncio is TRUE, dirty pages are written synchronously.
3328df8bae1dSRodney W. Grimes  * If invalidate is TRUE, any cached pages are freed as well.
3329df8bae1dSRodney W. Grimes  *
3330637315edSAlan Cox  * If the size of the region from start to end is zero, we are
3331637315edSAlan Cox  * supposed to flush all modified pages within the region containing
3332637315edSAlan Cox  * start.  Unfortunately, a region can be split or coalesced with
3333637315edSAlan Cox  * neighboring regions, making it difficult to determine what the
3334637315edSAlan Cox  * original region was.  Therefore, we approximate this requirement by
3335637315edSAlan Cox  * flushing the current region containing start.
3336637315edSAlan Cox  *
3337df8bae1dSRodney W. Grimes  * Returns an error if any part of the specified range is not mapped.
3338df8bae1dSRodney W. Grimes  */
3339df8bae1dSRodney W. Grimes int
3340950f8459SAlan Cox vm_map_sync(
33411b40f8c0SMatthew Dillon 	vm_map_t map,
33421b40f8c0SMatthew Dillon 	vm_offset_t start,
33431b40f8c0SMatthew Dillon 	vm_offset_t end,
33441b40f8c0SMatthew Dillon 	boolean_t syncio,
33451b40f8c0SMatthew Dillon 	boolean_t invalidate)
3346df8bae1dSRodney W. Grimes {
3347c0877f10SJohn Dyson 	vm_map_entry_t current;
3348df8bae1dSRodney W. Grimes 	vm_map_entry_t entry;
3349df8bae1dSRodney W. Grimes 	vm_size_t size;
3350df8bae1dSRodney W. Grimes 	vm_object_t object;
3351a316d390SJohn Dyson 	vm_ooffset_t offset;
3352e53fa61bSKonstantin Belousov 	unsigned int last_timestamp;
3353126d6082SKonstantin Belousov 	boolean_t failed;
3354df8bae1dSRodney W. Grimes 
3355df8bae1dSRodney W. Grimes 	vm_map_lock_read(map);
3356df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
3357df8bae1dSRodney W. Grimes 	if (!vm_map_lookup_entry(map, start, &entry)) {
3358df8bae1dSRodney W. Grimes 		vm_map_unlock_read(map);
3359df8bae1dSRodney W. Grimes 		return (KERN_INVALID_ADDRESS);
3360d1d3f7e1SDoug Moore 	} else if (start == end) {
3361637315edSAlan Cox 		start = entry->start;
3362637315edSAlan Cox 		end = entry->end;
3363df8bae1dSRodney W. Grimes 	}
3364df8bae1dSRodney W. Grimes 	/*
3365b7b7cd44SAlan Cox 	 * Make a first pass to check for user-wired memory and holes.
3366df8bae1dSRodney W. Grimes 	 */
33671c5196c3SKonstantin Belousov 	for (current = entry; current->start < end; current = current->next) {
3368b7b7cd44SAlan Cox 		if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
3369df8bae1dSRodney W. Grimes 			vm_map_unlock_read(map);
3370df8bae1dSRodney W. Grimes 			return (KERN_INVALID_ARGUMENT);
3371df8bae1dSRodney W. Grimes 		}
3372df8bae1dSRodney W. Grimes 		if (end > current->end &&
33731c5196c3SKonstantin Belousov 		    current->end != current->next->start) {
3374df8bae1dSRodney W. Grimes 			vm_map_unlock_read(map);
3375df8bae1dSRodney W. Grimes 			return (KERN_INVALID_ADDRESS);
3376df8bae1dSRodney W. Grimes 		}
3377df8bae1dSRodney W. Grimes 	}
3378df8bae1dSRodney W. Grimes 
33792cf13952SAlan Cox 	if (invalidate)
3380bc105a67SAlan Cox 		pmap_remove(map->pmap, start, end);
3381126d6082SKonstantin Belousov 	failed = FALSE;
33822cf13952SAlan Cox 
3383df8bae1dSRodney W. Grimes 	/*
3384df8bae1dSRodney W. Grimes 	 * Make a second pass, cleaning/uncaching pages from the indicated
3385df8bae1dSRodney W. Grimes 	 * objects as we go.
3386df8bae1dSRodney W. Grimes 	 */
33871c5196c3SKonstantin Belousov 	for (current = entry; current->start < end;) {
3388df8bae1dSRodney W. Grimes 		offset = current->offset + (start - current->start);
3389df8bae1dSRodney W. Grimes 		size = (end <= current->end ? end : current->end) - start;
33909fdfe602SMatthew Dillon 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
3391c0877f10SJohn Dyson 			vm_map_t smap;
3392df8bae1dSRodney W. Grimes 			vm_map_entry_t tentry;
3393df8bae1dSRodney W. Grimes 			vm_size_t tsize;
3394df8bae1dSRodney W. Grimes 
33959fdfe602SMatthew Dillon 			smap = current->object.sub_map;
3396df8bae1dSRodney W. Grimes 			vm_map_lock_read(smap);
3397df8bae1dSRodney W. Grimes 			(void) vm_map_lookup_entry(smap, offset, &tentry);
3398df8bae1dSRodney W. Grimes 			tsize = tentry->end - offset;
3399df8bae1dSRodney W. Grimes 			if (tsize < size)
3400df8bae1dSRodney W. Grimes 				size = tsize;
3401df8bae1dSRodney W. Grimes 			object = tentry->object.vm_object;
3402df8bae1dSRodney W. Grimes 			offset = tentry->offset + (offset - tentry->start);
3403df8bae1dSRodney W. Grimes 			vm_map_unlock_read(smap);
3404df8bae1dSRodney W. Grimes 		} else {
3405df8bae1dSRodney W. Grimes 			object = current->object.vm_object;
3406df8bae1dSRodney W. Grimes 		}
3407e53fa61bSKonstantin Belousov 		vm_object_reference(object);
3408e53fa61bSKonstantin Belousov 		last_timestamp = map->timestamp;
3409e53fa61bSKonstantin Belousov 		vm_map_unlock_read(map);
3410126d6082SKonstantin Belousov 		if (!vm_object_sync(object, offset, size, syncio, invalidate))
3411126d6082SKonstantin Belousov 			failed = TRUE;
3412df8bae1dSRodney W. Grimes 		start += size;
3413e53fa61bSKonstantin Belousov 		vm_object_deallocate(object);
3414e53fa61bSKonstantin Belousov 		vm_map_lock_read(map);
3415d1d3f7e1SDoug Moore 		if (last_timestamp == map->timestamp ||
3416d1d3f7e1SDoug Moore 		    !vm_map_lookup_entry(map, start, &current))
3417e53fa61bSKonstantin Belousov 			current = current->next;
3418df8bae1dSRodney W. Grimes 	}
3419df8bae1dSRodney W. Grimes 
3420df8bae1dSRodney W. Grimes 	vm_map_unlock_read(map);
3421126d6082SKonstantin Belousov 	return (failed ? KERN_FAILURE : KERN_SUCCESS);
3422df8bae1dSRodney W. Grimes }
3423df8bae1dSRodney W. Grimes 
3424df8bae1dSRodney W. Grimes /*
3425df8bae1dSRodney W. Grimes  *	vm_map_entry_unwire:	[ internal use only ]
3426df8bae1dSRodney W. Grimes  *
3427df8bae1dSRodney W. Grimes  *	Make the region specified by this entry pageable.
3428df8bae1dSRodney W. Grimes  *
3429df8bae1dSRodney W. Grimes  *	The map in question should be locked.
3430df8bae1dSRodney W. Grimes  *	[This is the reason for this routine's existence.]
3431df8bae1dSRodney W. Grimes  */
34320362d7d7SJohn Dyson static void
34331b40f8c0SMatthew Dillon vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
3434df8bae1dSRodney W. Grimes {
343554a3a114SMark Johnston 	vm_size_t size;
343603462509SAlan Cox 
343703462509SAlan Cox 	VM_MAP_ASSERT_LOCKED(map);
343803462509SAlan Cox 	KASSERT(entry->wired_count > 0,
343903462509SAlan Cox 	    ("vm_map_entry_unwire: entry %p isn't wired", entry));
344054a3a114SMark Johnston 
344154a3a114SMark Johnston 	size = entry->end - entry->start;
344254a3a114SMark Johnston 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0)
344354a3a114SMark Johnston 		vm_map_wire_user_count_sub(atop(size));
344403462509SAlan Cox 	pmap_unwire(map->pmap, entry->start, entry->end);
344554a3a114SMark Johnston 	vm_object_unwire(entry->object.vm_object, entry->offset, size,
344654a3a114SMark Johnston 	    PQ_ACTIVE);
3447df8bae1dSRodney W. Grimes 	entry->wired_count = 0;
3448df8bae1dSRodney W. Grimes }
3449df8bae1dSRodney W. Grimes 
34500b367bd8SKonstantin Belousov static void
34510b367bd8SKonstantin Belousov vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
34520b367bd8SKonstantin Belousov {
34530b367bd8SKonstantin Belousov 
34540b367bd8SKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
34550b367bd8SKonstantin Belousov 		vm_object_deallocate(entry->object.vm_object);
34560b367bd8SKonstantin Belousov 	uma_zfree(system_map ? kmapentzone : mapentzone, entry);
34570b367bd8SKonstantin Belousov }
34580b367bd8SKonstantin Belousov 
3459df8bae1dSRodney W. Grimes /*
3460df8bae1dSRodney W. Grimes  *	vm_map_entry_delete:	[ internal use only ]
3461df8bae1dSRodney W. Grimes  *
3462df8bae1dSRodney W. Grimes  *	Deallocate the given entry from the target map.
3463df8bae1dSRodney W. Grimes  */
34640362d7d7SJohn Dyson static void
34651b40f8c0SMatthew Dillon vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
3466df8bae1dSRodney W. Grimes {
346732a89c32SAlan Cox 	vm_object_t object;
34683364c323SKonstantin Belousov 	vm_pindex_t offidxstart, offidxend, count, size1;
3469d1780e8dSKonstantin Belousov 	vm_size_t size;
347032a89c32SAlan Cox 
34719f701172SKonstantin Belousov 	vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
34723364c323SKonstantin Belousov 	object = entry->object.vm_object;
347319bd0d9cSKonstantin Belousov 
347419bd0d9cSKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
347519bd0d9cSKonstantin Belousov 		MPASS(entry->cred == NULL);
347619bd0d9cSKonstantin Belousov 		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
347719bd0d9cSKonstantin Belousov 		MPASS(object == NULL);
347819bd0d9cSKonstantin Belousov 		vm_map_entry_deallocate(entry, map->system_map);
347919bd0d9cSKonstantin Belousov 		return;
348019bd0d9cSKonstantin Belousov 	}
348119bd0d9cSKonstantin Belousov 
34823364c323SKonstantin Belousov 	size = entry->end - entry->start;
34833364c323SKonstantin Belousov 	map->size -= size;
34843364c323SKonstantin Belousov 
3485ef694c1aSEdward Tomasz Napierala 	if (entry->cred != NULL) {
3486ef694c1aSEdward Tomasz Napierala 		swap_release_by_cred(size, entry->cred);
3487ef694c1aSEdward Tomasz Napierala 		crfree(entry->cred);
34883364c323SKonstantin Belousov 	}
3489df8bae1dSRodney W. Grimes 
349032a89c32SAlan Cox 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
34913364c323SKonstantin Belousov 	    (object != NULL)) {
3492ef694c1aSEdward Tomasz Napierala 		KASSERT(entry->cred == NULL || object->cred == NULL ||
34933364c323SKonstantin Belousov 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY),
3494ef694c1aSEdward Tomasz Napierala 		    ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
3495d1780e8dSKonstantin Belousov 		count = atop(size);
349632a89c32SAlan Cox 		offidxstart = OFF_TO_IDX(entry->offset);
349732a89c32SAlan Cox 		offidxend = offidxstart + count;
349889f6b863SAttilio Rao 		VM_OBJECT_WLOCK(object);
34999a4ee196SKonstantin Belousov 		if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT |
35009a4ee196SKonstantin Belousov 		    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
35012e47807cSJeff Roberson 		    object == kernel_object)) {
350232a89c32SAlan Cox 			vm_object_collapse(object);
35036bbee8e2SAlan Cox 
35046bbee8e2SAlan Cox 			/*
35056bbee8e2SAlan Cox 			 * The option OBJPR_NOTMAPPED can be passed here
35066bbee8e2SAlan Cox 			 * because vm_map_delete() already performed
35076bbee8e2SAlan Cox 			 * pmap_remove() on the only mapping to this range
35086bbee8e2SAlan Cox 			 * of pages.
35096bbee8e2SAlan Cox 			 */
35106bbee8e2SAlan Cox 			vm_object_page_remove(object, offidxstart, offidxend,
35116bbee8e2SAlan Cox 			    OBJPR_NOTMAPPED);
351232a89c32SAlan Cox 			if (object->type == OBJT_SWAP)
35139a4ee196SKonstantin Belousov 				swap_pager_freespace(object, offidxstart,
35149a4ee196SKonstantin Belousov 				    count);
351532a89c32SAlan Cox 			if (offidxend >= object->size &&
35163364c323SKonstantin Belousov 			    offidxstart < object->size) {
35173364c323SKonstantin Belousov 				size1 = object->size;
351832a89c32SAlan Cox 				object->size = offidxstart;
3519ef694c1aSEdward Tomasz Napierala 				if (object->cred != NULL) {
35203364c323SKonstantin Belousov 					size1 -= object->size;
35213364c323SKonstantin Belousov 					KASSERT(object->charge >= ptoa(size1),
35229a4ee196SKonstantin Belousov 					    ("object %p charge < 0", object));
35239a4ee196SKonstantin Belousov 					swap_release_by_cred(ptoa(size1),
35249a4ee196SKonstantin Belousov 					    object->cred);
35253364c323SKonstantin Belousov 					object->charge -= ptoa(size1);
35263364c323SKonstantin Belousov 				}
35273364c323SKonstantin Belousov 			}
352832a89c32SAlan Cox 		}
352989f6b863SAttilio Rao 		VM_OBJECT_WUNLOCK(object);
3530897d81a0SKonstantin Belousov 	} else
3531897d81a0SKonstantin Belousov 		entry->object.vm_object = NULL;
35320b367bd8SKonstantin Belousov 	if (map->system_map)
35330b367bd8SKonstantin Belousov 		vm_map_entry_deallocate(entry, TRUE);
35340b367bd8SKonstantin Belousov 	else {
35350b367bd8SKonstantin Belousov 		entry->next = curthread->td_map_def_user;
35360b367bd8SKonstantin Belousov 		curthread->td_map_def_user = entry;
35370b367bd8SKonstantin Belousov 	}
3538df8bae1dSRodney W. Grimes }
3539df8bae1dSRodney W. Grimes 
3540df8bae1dSRodney W. Grimes /*
3541df8bae1dSRodney W. Grimes  *	vm_map_delete:	[ internal use only ]
3542df8bae1dSRodney W. Grimes  *
3543df8bae1dSRodney W. Grimes  *	Deallocates the given address range from the target
3544df8bae1dSRodney W. Grimes  *	map.
3545df8bae1dSRodney W. Grimes  */
3546df8bae1dSRodney W. Grimes int
3547655c3490SKonstantin Belousov vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
3548df8bae1dSRodney W. Grimes {
3549c0877f10SJohn Dyson 	vm_map_entry_t entry;
3550d1d3f7e1SDoug Moore 	vm_map_entry_t first_entry;
3551df8bae1dSRodney W. Grimes 
35523a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(map);
355379e9451fSKonstantin Belousov 	if (start == end)
355479e9451fSKonstantin Belousov 		return (KERN_SUCCESS);
35553a0916b8SKonstantin Belousov 
3556df8bae1dSRodney W. Grimes 	/*
3557df8bae1dSRodney W. Grimes 	 * Find the start of the region, and clip it
3558df8bae1dSRodney W. Grimes 	 */
3559d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &first_entry))
3560d1d3f7e1SDoug Moore 		entry = first_entry->next;
3561d1d3f7e1SDoug Moore 	else {
3562d1d3f7e1SDoug Moore 		entry = first_entry;
3563df8bae1dSRodney W. Grimes 		vm_map_clip_start(map, entry, start);
3564d1d3f7e1SDoug Moore 	}
3565df8bae1dSRodney W. Grimes 
3566df8bae1dSRodney W. Grimes 	/*
3567df8bae1dSRodney W. Grimes 	 * Step through all entries in this region
3568df8bae1dSRodney W. Grimes 	 */
35691c5196c3SKonstantin Belousov 	while (entry->start < end) {
3570df8bae1dSRodney W. Grimes 		vm_map_entry_t next;
3571df8bae1dSRodney W. Grimes 
357273b2baceSAlan Cox 		/*
357373b2baceSAlan Cox 		 * Wait for wiring or unwiring of an entry to complete.
35747c938963SBrian Feldman 		 * Also wait for any system wirings to disappear on
35757c938963SBrian Feldman 		 * user maps.
357673b2baceSAlan Cox 		 */
35777c938963SBrian Feldman 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
35787c938963SBrian Feldman 		    (vm_map_pmap(map) != kernel_pmap &&
35797c938963SBrian Feldman 		    vm_map_entry_system_wired_count(entry) != 0)) {
358073b2baceSAlan Cox 			unsigned int last_timestamp;
358173b2baceSAlan Cox 			vm_offset_t saved_start;
3582d1d3f7e1SDoug Moore 			vm_map_entry_t tmp_entry;
358373b2baceSAlan Cox 
358473b2baceSAlan Cox 			saved_start = entry->start;
358573b2baceSAlan Cox 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
358673b2baceSAlan Cox 			last_timestamp = map->timestamp;
35878ce2d00aSPawel Jakub Dawidek 			(void) vm_map_unlock_and_wait(map, 0);
358873b2baceSAlan Cox 			vm_map_lock(map);
3589d1d3f7e1SDoug Moore 			if (last_timestamp + 1 != map->timestamp) {
359073b2baceSAlan Cox 				/*
359173b2baceSAlan Cox 				 * Look again for the entry because the map was
3592d1d3f7e1SDoug Moore 				 * modified while it was unlocked.
3593d1d3f7e1SDoug Moore 				 * Specifically, the entry may have been
3594d1d3f7e1SDoug Moore 				 * clipped, merged, or deleted.
359573b2baceSAlan Cox 				 */
3596d1d3f7e1SDoug Moore 				if (!vm_map_lookup_entry(map, saved_start,
3597d1d3f7e1SDoug Moore 							 &tmp_entry))
3598d1d3f7e1SDoug Moore 					entry = tmp_entry->next;
3599d1d3f7e1SDoug Moore 				else {
3600d1d3f7e1SDoug Moore 					entry = tmp_entry;
3601d1d3f7e1SDoug Moore 					vm_map_clip_start(map, entry,
3602d1d3f7e1SDoug Moore 							  saved_start);
3603d1d3f7e1SDoug Moore 				}
3604d1d3f7e1SDoug Moore 			}
360573b2baceSAlan Cox 			continue;
360673b2baceSAlan Cox 		}
3607df8bae1dSRodney W. Grimes 		vm_map_clip_end(map, entry, end);
3608df8bae1dSRodney W. Grimes 
3609c0877f10SJohn Dyson 		next = entry->next;
3610df8bae1dSRodney W. Grimes 
3611df8bae1dSRodney W. Grimes 		/*
36120d94caffSDavid Greenman 		 * Unwire before removing addresses from the pmap; otherwise,
36130d94caffSDavid Greenman 		 * unwiring will put the entries back in the pmap.
3614df8bae1dSRodney W. Grimes 		 */
3615be7be412SKonstantin Belousov 		if (entry->wired_count != 0)
3616df8bae1dSRodney W. Grimes 			vm_map_entry_unwire(map, entry);
3617df8bae1dSRodney W. Grimes 
361832f0fefcSKonstantin Belousov 		/*
361932f0fefcSKonstantin Belousov 		 * Remove mappings for the pages, but only if the
362032f0fefcSKonstantin Belousov 		 * mappings could exist.  For instance, it does not
362132f0fefcSKonstantin Belousov 		 * make sense to call pmap_remove() for guard entries.
362232f0fefcSKonstantin Belousov 		 */
362332f0fefcSKonstantin Belousov 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 ||
362432f0fefcSKonstantin Belousov 		    entry->object.vm_object != NULL)
362532a89c32SAlan Cox 			pmap_remove(map->pmap, entry->start, entry->end);
3626df8bae1dSRodney W. Grimes 
3627fa50a355SKonstantin Belousov 		if (entry->end == map->anon_loc)
3628fa50a355SKonstantin Belousov 			map->anon_loc = entry->start;
3629fa50a355SKonstantin Belousov 
3630df8bae1dSRodney W. Grimes 		/*
3631e608cc3cSKonstantin Belousov 		 * Delete the entry only after removing all pmap
3632e608cc3cSKonstantin Belousov 		 * entries pointing to its pages.  (Otherwise, its
3633e608cc3cSKonstantin Belousov 		 * page frames may be reallocated, and any modify bits
3634e608cc3cSKonstantin Belousov 		 * will be set in the wrong object!)
3635df8bae1dSRodney W. Grimes 		 */
3636df8bae1dSRodney W. Grimes 		vm_map_entry_delete(map, entry);
3637df8bae1dSRodney W. Grimes 		entry = next;
3638df8bae1dSRodney W. Grimes 	}
3639df8bae1dSRodney W. Grimes 	return (KERN_SUCCESS);
3640df8bae1dSRodney W. Grimes }
3641df8bae1dSRodney W. Grimes 
3642df8bae1dSRodney W. Grimes /*
3643df8bae1dSRodney W. Grimes  *	vm_map_remove:
3644df8bae1dSRodney W. Grimes  *
3645df8bae1dSRodney W. Grimes  *	Remove the given address range from the target map.
3646df8bae1dSRodney W. Grimes  *	This is the exported form of vm_map_delete.
3647df8bae1dSRodney W. Grimes  */
3648df8bae1dSRodney W. Grimes int
36491b40f8c0SMatthew Dillon vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
3650df8bae1dSRodney W. Grimes {
36516eaee3feSAlan Cox 	int result;
3652df8bae1dSRodney W. Grimes 
3653df8bae1dSRodney W. Grimes 	vm_map_lock(map);
3654df8bae1dSRodney W. Grimes 	VM_MAP_RANGE_CHECK(map, start, end);
3655655c3490SKonstantin Belousov 	result = vm_map_delete(map, start, end);
3656df8bae1dSRodney W. Grimes 	vm_map_unlock(map);
3657df8bae1dSRodney W. Grimes 	return (result);
3658df8bae1dSRodney W. Grimes }
3659df8bae1dSRodney W. Grimes 
3660df8bae1dSRodney W. Grimes /*
3661df8bae1dSRodney W. Grimes  *	vm_map_check_protection:
3662df8bae1dSRodney W. Grimes  *
36632d5c7e45SMatthew Dillon  *	Assert that the target map allows the specified privilege on the
36642d5c7e45SMatthew Dillon  *	entire address region given.  The entire region must be allocated.
36652d5c7e45SMatthew Dillon  *
36662d5c7e45SMatthew Dillon  *	WARNING!  This code does not and should not check whether the
36672d5c7e45SMatthew Dillon  *	contents of the region is accessible.  For example a smaller file
36682d5c7e45SMatthew Dillon  *	might be mapped into a larger address space.
36692d5c7e45SMatthew Dillon  *
36702d5c7e45SMatthew Dillon  *	NOTE!  This code is also called by munmap().
3671d8834602SAlan Cox  *
3672d8834602SAlan Cox  *	The map must be locked.  A read lock is sufficient.
3673df8bae1dSRodney W. Grimes  */
36740d94caffSDavid Greenman boolean_t
3675b9dcd593SBruce Evans vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
3676b9dcd593SBruce Evans 			vm_prot_t protection)
3677df8bae1dSRodney W. Grimes {
3678c0877f10SJohn Dyson 	vm_map_entry_t entry;
3679d1d3f7e1SDoug Moore 	vm_map_entry_t tmp_entry;
3680df8bae1dSRodney W. Grimes 
3681d1d3f7e1SDoug Moore 	if (!vm_map_lookup_entry(map, start, &tmp_entry))
3682df8bae1dSRodney W. Grimes 		return (FALSE);
3683d1d3f7e1SDoug Moore 	entry = tmp_entry;
3684df8bae1dSRodney W. Grimes 
3685df8bae1dSRodney W. Grimes 	while (start < end) {
3686df8bae1dSRodney W. Grimes 		/*
3687df8bae1dSRodney W. Grimes 		 * No holes allowed!
3688df8bae1dSRodney W. Grimes 		 */
3689d8834602SAlan Cox 		if (start < entry->start)
3690df8bae1dSRodney W. Grimes 			return (FALSE);
3691df8bae1dSRodney W. Grimes 		/*
3692df8bae1dSRodney W. Grimes 		 * Check protection associated with entry.
3693df8bae1dSRodney W. Grimes 		 */
3694d8834602SAlan Cox 		if ((entry->protection & protection) != protection)
3695df8bae1dSRodney W. Grimes 			return (FALSE);
3696df8bae1dSRodney W. Grimes 		/* go to next entry */
3697df8bae1dSRodney W. Grimes 		start = entry->end;
3698df8bae1dSRodney W. Grimes 		entry = entry->next;
3699df8bae1dSRodney W. Grimes 	}
3700df8bae1dSRodney W. Grimes 	return (TRUE);
3701df8bae1dSRodney W. Grimes }
3702df8bae1dSRodney W. Grimes 
370386524867SJohn Dyson /*
3704df8bae1dSRodney W. Grimes  *	vm_map_copy_entry:
3705df8bae1dSRodney W. Grimes  *
3706df8bae1dSRodney W. Grimes  *	Copies the contents of the source entry to the destination
3707df8bae1dSRodney W. Grimes  *	entry.  The entries *must* be aligned properly.
3708df8bae1dSRodney W. Grimes  */
3709f708ef1bSPoul-Henning Kamp static void
37101b40f8c0SMatthew Dillon vm_map_copy_entry(
37111b40f8c0SMatthew Dillon 	vm_map_t src_map,
37121b40f8c0SMatthew Dillon 	vm_map_t dst_map,
37131b40f8c0SMatthew Dillon 	vm_map_entry_t src_entry,
37143364c323SKonstantin Belousov 	vm_map_entry_t dst_entry,
37153364c323SKonstantin Belousov 	vm_ooffset_t *fork_charge)
3716df8bae1dSRodney W. Grimes {
3717c0877f10SJohn Dyson 	vm_object_t src_object;
371884110e7eSKonstantin Belousov 	vm_map_entry_t fake_entry;
37193364c323SKonstantin Belousov 	vm_offset_t size;
3720ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
37213364c323SKonstantin Belousov 	int charged;
3722c0877f10SJohn Dyson 
37233a0916b8SKonstantin Belousov 	VM_MAP_ASSERT_LOCKED(dst_map);
37243a0916b8SKonstantin Belousov 
37259fdfe602SMatthew Dillon 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
3726df8bae1dSRodney W. Grimes 		return;
3727df8bae1dSRodney W. Grimes 
3728afaa41f6SAlan Cox 	if (src_entry->wired_count == 0 ||
3729afaa41f6SAlan Cox 	    (src_entry->protection & VM_PROT_WRITE) == 0) {
3730df8bae1dSRodney W. Grimes 		/*
37310d94caffSDavid Greenman 		 * If the source entry is marked needs_copy, it is already
37320d94caffSDavid Greenman 		 * write-protected.
3733df8bae1dSRodney W. Grimes 		 */
3734d9a9209aSAlan Cox 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
3735d9a9209aSAlan Cox 		    (src_entry->protection & VM_PROT_WRITE) != 0) {
3736df8bae1dSRodney W. Grimes 			pmap_protect(src_map->pmap,
3737df8bae1dSRodney W. Grimes 			    src_entry->start,
3738df8bae1dSRodney W. Grimes 			    src_entry->end,
3739df8bae1dSRodney W. Grimes 			    src_entry->protection & ~VM_PROT_WRITE);
3740df8bae1dSRodney W. Grimes 		}
3741b18bfc3dSJohn Dyson 
3742df8bae1dSRodney W. Grimes 		/*
3743df8bae1dSRodney W. Grimes 		 * Make a copy of the object.
3744df8bae1dSRodney W. Grimes 		 */
37453364c323SKonstantin Belousov 		size = src_entry->end - src_entry->start;
37468aef1712SMatthew Dillon 		if ((src_object = src_entry->object.vm_object) != NULL) {
374789f6b863SAttilio Rao 			VM_OBJECT_WLOCK(src_object);
37483364c323SKonstantin Belousov 			charged = ENTRY_CHARGED(src_entry);
37499a4ee196SKonstantin Belousov 			if (src_object->handle == NULL &&
3750c0877f10SJohn Dyson 			    (src_object->type == OBJT_DEFAULT ||
3751c0877f10SJohn Dyson 			    src_object->type == OBJT_SWAP)) {
3752c0877f10SJohn Dyson 				vm_object_collapse(src_object);
37539a4ee196SKonstantin Belousov 				if ((src_object->flags & (OBJ_NOSPLIT |
37549a4ee196SKonstantin Belousov 				    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
3755c5aaa06dSAlan Cox 					vm_object_split(src_entry);
37569a4ee196SKonstantin Belousov 					src_object =
37579a4ee196SKonstantin Belousov 					    src_entry->object.vm_object;
3758a89c6258SAlan Cox 				}
3759a89c6258SAlan Cox 			}
3760b921a12bSAlan Cox 			vm_object_reference_locked(src_object);
3761069e9bc1SDoug Rabson 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
3762ef694c1aSEdward Tomasz Napierala 			if (src_entry->cred != NULL &&
37633364c323SKonstantin Belousov 			    !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
3764ef694c1aSEdward Tomasz Napierala 				KASSERT(src_object->cred == NULL,
3765ef694c1aSEdward Tomasz Napierala 				    ("OVERCOMMIT: vm_map_copy_entry: cred %p",
37663364c323SKonstantin Belousov 				     src_object));
3767ef694c1aSEdward Tomasz Napierala 				src_object->cred = src_entry->cred;
37683364c323SKonstantin Belousov 				src_object->charge = size;
37693364c323SKonstantin Belousov 			}
377089f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(src_object);
3771c0877f10SJohn Dyson 			dst_entry->object.vm_object = src_object;
37723364c323SKonstantin Belousov 			if (charged) {
3773ef694c1aSEdward Tomasz Napierala 				cred = curthread->td_ucred;
3774ef694c1aSEdward Tomasz Napierala 				crhold(cred);
3775ef694c1aSEdward Tomasz Napierala 				dst_entry->cred = cred;
37763364c323SKonstantin Belousov 				*fork_charge += size;
37773364c323SKonstantin Belousov 				if (!(src_entry->eflags &
37783364c323SKonstantin Belousov 				      MAP_ENTRY_NEEDS_COPY)) {
3779ef694c1aSEdward Tomasz Napierala 					crhold(cred);
3780ef694c1aSEdward Tomasz Napierala 					src_entry->cred = cred;
37813364c323SKonstantin Belousov 					*fork_charge += size;
37823364c323SKonstantin Belousov 				}
37833364c323SKonstantin Belousov 			}
37849a4ee196SKonstantin Belousov 			src_entry->eflags |= MAP_ENTRY_COW |
37859a4ee196SKonstantin Belousov 			    MAP_ENTRY_NEEDS_COPY;
37869a4ee196SKonstantin Belousov 			dst_entry->eflags |= MAP_ENTRY_COW |
37879a4ee196SKonstantin Belousov 			    MAP_ENTRY_NEEDS_COPY;
3788b18bfc3dSJohn Dyson 			dst_entry->offset = src_entry->offset;
378984110e7eSKonstantin Belousov 			if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
379084110e7eSKonstantin Belousov 				/*
379184110e7eSKonstantin Belousov 				 * MAP_ENTRY_VN_WRITECNT cannot
379284110e7eSKonstantin Belousov 				 * indicate write reference from
379384110e7eSKonstantin Belousov 				 * src_entry, since the entry is
379484110e7eSKonstantin Belousov 				 * marked as needs copy.  Allocate a
379584110e7eSKonstantin Belousov 				 * fake entry that is used to
379684110e7eSKonstantin Belousov 				 * decrement object->un_pager.vnp.writecount
379784110e7eSKonstantin Belousov 				 * at the appropriate time.  Attach
379884110e7eSKonstantin Belousov 				 * fake_entry to the deferred list.
379984110e7eSKonstantin Belousov 				 */
380084110e7eSKonstantin Belousov 				fake_entry = vm_map_entry_create(dst_map);
380184110e7eSKonstantin Belousov 				fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
380284110e7eSKonstantin Belousov 				src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
380384110e7eSKonstantin Belousov 				vm_object_reference(src_object);
380484110e7eSKonstantin Belousov 				fake_entry->object.vm_object = src_object;
380584110e7eSKonstantin Belousov 				fake_entry->start = src_entry->start;
380684110e7eSKonstantin Belousov 				fake_entry->end = src_entry->end;
380784110e7eSKonstantin Belousov 				fake_entry->next = curthread->td_map_def_user;
380884110e7eSKonstantin Belousov 				curthread->td_map_def_user = fake_entry;
380984110e7eSKonstantin Belousov 			}
38100ec97ffcSKonstantin Belousov 
38110ec97ffcSKonstantin Belousov 			pmap_copy(dst_map->pmap, src_map->pmap,
38120ec97ffcSKonstantin Belousov 			    dst_entry->start, dst_entry->end - dst_entry->start,
38130ec97ffcSKonstantin Belousov 			    src_entry->start);
3814b18bfc3dSJohn Dyson 		} else {
3815b18bfc3dSJohn Dyson 			dst_entry->object.vm_object = NULL;
3816b18bfc3dSJohn Dyson 			dst_entry->offset = 0;
3817ef694c1aSEdward Tomasz Napierala 			if (src_entry->cred != NULL) {
3818ef694c1aSEdward Tomasz Napierala 				dst_entry->cred = curthread->td_ucred;
3819ef694c1aSEdward Tomasz Napierala 				crhold(dst_entry->cred);
38203364c323SKonstantin Belousov 				*fork_charge += size;
38213364c323SKonstantin Belousov 			}
3822b18bfc3dSJohn Dyson 		}
38230d94caffSDavid Greenman 	} else {
3824df8bae1dSRodney W. Grimes 		/*
3825afaa41f6SAlan Cox 		 * We don't want to make writeable wired pages copy-on-write.
3826afaa41f6SAlan Cox 		 * Immediately copy these pages into the new map by simulating
3827afaa41f6SAlan Cox 		 * page faults.  The new pages are pageable.
3828df8bae1dSRodney W. Grimes 		 */
3829121fd461SKonstantin Belousov 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
3830121fd461SKonstantin Belousov 		    fork_charge);
3831df8bae1dSRodney W. Grimes 	}
3832df8bae1dSRodney W. Grimes }
3833df8bae1dSRodney W. Grimes 
3834df8bae1dSRodney W. Grimes /*
38352a7be1b6SBrian Feldman  * vmspace_map_entry_forked:
38362a7be1b6SBrian Feldman  * Update the newly-forked vmspace each time a map entry is inherited
38372a7be1b6SBrian Feldman  * or copied.  The values for vm_dsize and vm_tsize are approximate
38382a7be1b6SBrian Feldman  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
38392a7be1b6SBrian Feldman  */
38402a7be1b6SBrian Feldman static void
38412a7be1b6SBrian Feldman vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
38422a7be1b6SBrian Feldman     vm_map_entry_t entry)
38432a7be1b6SBrian Feldman {
38442a7be1b6SBrian Feldman 	vm_size_t entrysize;
38452a7be1b6SBrian Feldman 	vm_offset_t newend;
38462a7be1b6SBrian Feldman 
384719bd0d9cSKonstantin Belousov 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
384819bd0d9cSKonstantin Belousov 		return;
38492a7be1b6SBrian Feldman 	entrysize = entry->end - entry->start;
38502a7be1b6SBrian Feldman 	vm2->vm_map.size += entrysize;
38512a7be1b6SBrian Feldman 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
38522a7be1b6SBrian Feldman 		vm2->vm_ssize += btoc(entrysize);
38532a7be1b6SBrian Feldman 	} else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
38542a7be1b6SBrian Feldman 	    entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
3855b351299cSAndrew Gallatin 		newend = MIN(entry->end,
38562a7be1b6SBrian Feldman 		    (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
38572a7be1b6SBrian Feldman 		vm2->vm_dsize += btoc(newend - entry->start);
38582a7be1b6SBrian Feldman 	} else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
38592a7be1b6SBrian Feldman 	    entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
3860b351299cSAndrew Gallatin 		newend = MIN(entry->end,
38612a7be1b6SBrian Feldman 		    (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
38622a7be1b6SBrian Feldman 		vm2->vm_tsize += btoc(newend - entry->start);
38632a7be1b6SBrian Feldman 	}
38642a7be1b6SBrian Feldman }
38652a7be1b6SBrian Feldman 
38662a7be1b6SBrian Feldman /*
3867df8bae1dSRodney W. Grimes  * vmspace_fork:
3868df8bae1dSRodney W. Grimes  * Create a new process vmspace structure and vm_map
3869df8bae1dSRodney W. Grimes  * based on those of an existing process.  The new map
3870df8bae1dSRodney W. Grimes  * is based on the old map, according to the inheritance
3871df8bae1dSRodney W. Grimes  * values on the regions in that map.
3872df8bae1dSRodney W. Grimes  *
38732a7be1b6SBrian Feldman  * XXX It might be worth coalescing the entries added to the new vmspace.
38742a7be1b6SBrian Feldman  *
3875df8bae1dSRodney W. Grimes  * The source map must not be locked.
3876df8bae1dSRodney W. Grimes  */
3877df8bae1dSRodney W. Grimes struct vmspace *
38783364c323SKonstantin Belousov vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
3879df8bae1dSRodney W. Grimes {
3880c0877f10SJohn Dyson 	struct vmspace *vm2;
388179e53838SAlan Cox 	vm_map_t new_map, old_map;
388279e53838SAlan Cox 	vm_map_entry_t new_entry, old_entry;
3883de5f6a77SJohn Dyson 	vm_object_t object;
3884e7a9df16SKonstantin Belousov 	int error, locked;
388519bd0d9cSKonstantin Belousov 	vm_inherit_t inh;
3886df8bae1dSRodney W. Grimes 
388779e53838SAlan Cox 	old_map = &vm1->vm_map;
388879e53838SAlan Cox 	/* Copy immutable fields of vm1 to vm2. */
38896e00f3a3SKonstantin Belousov 	vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
38906e00f3a3SKonstantin Belousov 	    pmap_pinit);
389189b57fcfSKonstantin Belousov 	if (vm2 == NULL)
389279e53838SAlan Cox 		return (NULL);
3893e7a9df16SKonstantin Belousov 
38942a7be1b6SBrian Feldman 	vm2->vm_taddr = vm1->vm_taddr;
38952a7be1b6SBrian Feldman 	vm2->vm_daddr = vm1->vm_daddr;
38962a7be1b6SBrian Feldman 	vm2->vm_maxsaddr = vm1->vm_maxsaddr;
389779e53838SAlan Cox 	vm_map_lock(old_map);
389879e53838SAlan Cox 	if (old_map->busy)
389979e53838SAlan Cox 		vm_map_wait_busy(old_map);
390079e53838SAlan Cox 	new_map = &vm2->vm_map;
39011fac7d7fSKonstantin Belousov 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
39021fac7d7fSKonstantin Belousov 	KASSERT(locked, ("vmspace_fork: lock failed"));
3903df8bae1dSRodney W. Grimes 
3904e7a9df16SKonstantin Belousov 	error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
3905e7a9df16SKonstantin Belousov 	if (error != 0) {
3906e7a9df16SKonstantin Belousov 		sx_xunlock(&old_map->lock);
3907e7a9df16SKonstantin Belousov 		sx_xunlock(&new_map->lock);
3908e7a9df16SKonstantin Belousov 		vm_map_process_deferred();
3909e7a9df16SKonstantin Belousov 		vmspace_free(vm2);
3910e7a9df16SKonstantin Belousov 		return (NULL);
3911e7a9df16SKonstantin Belousov 	}
3912e7a9df16SKonstantin Belousov 
3913fa50a355SKonstantin Belousov 	new_map->anon_loc = old_map->anon_loc;
3914e7a9df16SKonstantin Belousov 
3915df8bae1dSRodney W. Grimes 	old_entry = old_map->header.next;
3916df8bae1dSRodney W. Grimes 
3917df8bae1dSRodney W. Grimes 	while (old_entry != &old_map->header) {
3918afa07f7eSJohn Dyson 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3919df8bae1dSRodney W. Grimes 			panic("vm_map_fork: encountered a submap");
3920df8bae1dSRodney W. Grimes 
392119bd0d9cSKonstantin Belousov 		inh = old_entry->inheritance;
392219bd0d9cSKonstantin Belousov 		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
392319bd0d9cSKonstantin Belousov 		    inh != VM_INHERIT_NONE)
392419bd0d9cSKonstantin Belousov 			inh = VM_INHERIT_COPY;
392519bd0d9cSKonstantin Belousov 
392619bd0d9cSKonstantin Belousov 		switch (inh) {
3927df8bae1dSRodney W. Grimes 		case VM_INHERIT_NONE:
3928df8bae1dSRodney W. Grimes 			break;
3929df8bae1dSRodney W. Grimes 
3930df8bae1dSRodney W. Grimes 		case VM_INHERIT_SHARE:
3931df8bae1dSRodney W. Grimes 			/*
3932fed9a903SJohn Dyson 			 * Clone the entry, creating the shared object if necessary.
3933fed9a903SJohn Dyson 			 */
3934fed9a903SJohn Dyson 			object = old_entry->object.vm_object;
3935fed9a903SJohn Dyson 			if (object == NULL) {
3936af1d6d6aSDoug Moore 				vm_map_entry_back(old_entry);
3937af1d6d6aSDoug Moore 				object = old_entry->object.vm_object;
39389a2f6362SAlan Cox 			}
39399a2f6362SAlan Cox 
39409a2f6362SAlan Cox 			/*
39419a2f6362SAlan Cox 			 * Add the reference before calling vm_object_shadow
39429a2f6362SAlan Cox 			 * to insure that a shadow object is created.
39439a2f6362SAlan Cox 			 */
39449a2f6362SAlan Cox 			vm_object_reference(object);
39459a2f6362SAlan Cox 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
39465069bf57SJohn Dyson 				vm_object_shadow(&old_entry->object.vm_object,
39475069bf57SJohn Dyson 				    &old_entry->offset,
39480cc74f14SAlan Cox 				    old_entry->end - old_entry->start);
39495069bf57SJohn Dyson 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3950d30344bdSIan Dowse 				/* Transfer the second reference too. */
3951d30344bdSIan Dowse 				vm_object_reference(
3952d30344bdSIan Dowse 				    old_entry->object.vm_object);
39537fd10fb3SKonstantin Belousov 
39547fd10fb3SKonstantin Belousov 				/*
39557fd10fb3SKonstantin Belousov 				 * As in vm_map_simplify_entry(), the
3956b0994946SKonstantin Belousov 				 * vnode lock will not be acquired in
39577fd10fb3SKonstantin Belousov 				 * this call to vm_object_deallocate().
39587fd10fb3SKonstantin Belousov 				 */
3959d30344bdSIan Dowse 				vm_object_deallocate(object);
39605069bf57SJohn Dyson 				object = old_entry->object.vm_object;
3961fed9a903SJohn Dyson 			}
396289f6b863SAttilio Rao 			VM_OBJECT_WLOCK(object);
3963069e9bc1SDoug Rabson 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
3964ef694c1aSEdward Tomasz Napierala 			if (old_entry->cred != NULL) {
3965ef694c1aSEdward Tomasz Napierala 				KASSERT(object->cred == NULL, ("vmspace_fork both cred"));
3966ef694c1aSEdward Tomasz Napierala 				object->cred = old_entry->cred;
39673364c323SKonstantin Belousov 				object->charge = old_entry->end - old_entry->start;
3968ef694c1aSEdward Tomasz Napierala 				old_entry->cred = NULL;
39693364c323SKonstantin Belousov 			}
3970b9781cf6SKonstantin Belousov 
3971b9781cf6SKonstantin Belousov 			/*
3972b9781cf6SKonstantin Belousov 			 * Assert the correct state of the vnode
3973b9781cf6SKonstantin Belousov 			 * v_writecount while the object is locked, to
3974b9781cf6SKonstantin Belousov 			 * not relock it later for the assertion
3975b9781cf6SKonstantin Belousov 			 * correctness.
3976b9781cf6SKonstantin Belousov 			 */
3977b9781cf6SKonstantin Belousov 			if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT &&
3978b9781cf6SKonstantin Belousov 			    object->type == OBJT_VNODE) {
3979b9781cf6SKonstantin Belousov 				KASSERT(((struct vnode *)object->handle)->
3980b9781cf6SKonstantin Belousov 				    v_writecount > 0,
3981b9781cf6SKonstantin Belousov 				    ("vmspace_fork: v_writecount %p", object));
3982b9781cf6SKonstantin Belousov 				KASSERT(object->un_pager.vnp.writemappings > 0,
3983b9781cf6SKonstantin Belousov 				    ("vmspace_fork: vnp.writecount %p",
3984b9781cf6SKonstantin Belousov 				    object));
3985b9781cf6SKonstantin Belousov 			}
398689f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(object);
3987fed9a903SJohn Dyson 
3988fed9a903SJohn Dyson 			/*
3989ad5fca3bSAlan Cox 			 * Clone the entry, referencing the shared object.
3990df8bae1dSRodney W. Grimes 			 */
3991df8bae1dSRodney W. Grimes 			new_entry = vm_map_entry_create(new_map);
3992df8bae1dSRodney W. Grimes 			*new_entry = *old_entry;
39939f6acfd1SKonstantin Belousov 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
39949f6acfd1SKonstantin Belousov 			    MAP_ENTRY_IN_TRANSITION);
39950acea7dfSKonstantin Belousov 			new_entry->wiring_thread = NULL;
3996df8bae1dSRodney W. Grimes 			new_entry->wired_count = 0;
399784110e7eSKonstantin Belousov 			if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
399884110e7eSKonstantin Belousov 				vnode_pager_update_writecount(object,
399984110e7eSKonstantin Belousov 				    new_entry->start, new_entry->end);
400084110e7eSKonstantin Belousov 			}
400178022527SKonstantin Belousov 			vm_map_entry_set_vnode_text(new_entry, true);
4002df8bae1dSRodney W. Grimes 
4003df8bae1dSRodney W. Grimes 			/*
40040d94caffSDavid Greenman 			 * Insert the entry into the new map -- we know we're
40050d94caffSDavid Greenman 			 * inserting at the end of the new map.
4006df8bae1dSRodney W. Grimes 			 */
40079f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
40082a7be1b6SBrian Feldman 			vmspace_map_entry_forked(vm1, vm2, new_entry);
4009df8bae1dSRodney W. Grimes 
4010df8bae1dSRodney W. Grimes 			/*
4011df8bae1dSRodney W. Grimes 			 * Update the physical map
4012df8bae1dSRodney W. Grimes 			 */
4013df8bae1dSRodney W. Grimes 			pmap_copy(new_map->pmap, old_map->pmap,
4014df8bae1dSRodney W. Grimes 			    new_entry->start,
4015df8bae1dSRodney W. Grimes 			    (old_entry->end - old_entry->start),
4016df8bae1dSRodney W. Grimes 			    old_entry->start);
4017df8bae1dSRodney W. Grimes 			break;
4018df8bae1dSRodney W. Grimes 
4019df8bae1dSRodney W. Grimes 		case VM_INHERIT_COPY:
4020df8bae1dSRodney W. Grimes 			/*
4021df8bae1dSRodney W. Grimes 			 * Clone the entry and link into the map.
4022df8bae1dSRodney W. Grimes 			 */
4023df8bae1dSRodney W. Grimes 			new_entry = vm_map_entry_create(new_map);
4024df8bae1dSRodney W. Grimes 			*new_entry = *old_entry;
402584110e7eSKonstantin Belousov 			/*
402684110e7eSKonstantin Belousov 			 * Copied entry is COW over the old object.
402784110e7eSKonstantin Belousov 			 */
40289f6acfd1SKonstantin Belousov 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
402984110e7eSKonstantin Belousov 			    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
40300acea7dfSKonstantin Belousov 			new_entry->wiring_thread = NULL;
4031df8bae1dSRodney W. Grimes 			new_entry->wired_count = 0;
4032df8bae1dSRodney W. Grimes 			new_entry->object.vm_object = NULL;
4033ef694c1aSEdward Tomasz Napierala 			new_entry->cred = NULL;
40349f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
40352a7be1b6SBrian Feldman 			vmspace_map_entry_forked(vm1, vm2, new_entry);
4036bd7e5f99SJohn Dyson 			vm_map_copy_entry(old_map, new_map, old_entry,
40373364c323SKonstantin Belousov 			    new_entry, fork_charge);
403878022527SKonstantin Belousov 			vm_map_entry_set_vnode_text(new_entry, true);
4039df8bae1dSRodney W. Grimes 			break;
404078d7964bSXin LI 
404178d7964bSXin LI 		case VM_INHERIT_ZERO:
404278d7964bSXin LI 			/*
404378d7964bSXin LI 			 * Create a new anonymous mapping entry modelled from
404478d7964bSXin LI 			 * the old one.
404578d7964bSXin LI 			 */
404678d7964bSXin LI 			new_entry = vm_map_entry_create(new_map);
404778d7964bSXin LI 			memset(new_entry, 0, sizeof(*new_entry));
404878d7964bSXin LI 
404978d7964bSXin LI 			new_entry->start = old_entry->start;
405078d7964bSXin LI 			new_entry->end = old_entry->end;
405178d7964bSXin LI 			new_entry->eflags = old_entry->eflags &
405278d7964bSXin LI 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
405378022527SKonstantin Belousov 			    MAP_ENTRY_VN_WRITECNT | MAP_ENTRY_VN_EXEC);
405478d7964bSXin LI 			new_entry->protection = old_entry->protection;
405578d7964bSXin LI 			new_entry->max_protection = old_entry->max_protection;
405678d7964bSXin LI 			new_entry->inheritance = VM_INHERIT_ZERO;
405778d7964bSXin LI 
40589f701172SKonstantin Belousov 			vm_map_entry_link(new_map, new_entry);
405978d7964bSXin LI 			vmspace_map_entry_forked(vm1, vm2, new_entry);
406078d7964bSXin LI 
406178d7964bSXin LI 			new_entry->cred = curthread->td_ucred;
406278d7964bSXin LI 			crhold(new_entry->cred);
406378d7964bSXin LI 			*fork_charge += (new_entry->end - new_entry->start);
406478d7964bSXin LI 
406578d7964bSXin LI 			break;
4066df8bae1dSRodney W. Grimes 		}
4067df8bae1dSRodney W. Grimes 		old_entry = old_entry->next;
4068df8bae1dSRodney W. Grimes 	}
406984110e7eSKonstantin Belousov 	/*
407084110e7eSKonstantin Belousov 	 * Use inlined vm_map_unlock() to postpone handling the deferred
407184110e7eSKonstantin Belousov 	 * map entries, which cannot be done until both old_map and
407284110e7eSKonstantin Belousov 	 * new_map locks are released.
407384110e7eSKonstantin Belousov 	 */
407484110e7eSKonstantin Belousov 	sx_xunlock(&old_map->lock);
407584110e7eSKonstantin Belousov 	sx_xunlock(&new_map->lock);
407684110e7eSKonstantin Belousov 	vm_map_process_deferred();
4077df8bae1dSRodney W. Grimes 
4078df8bae1dSRodney W. Grimes 	return (vm2);
4079df8bae1dSRodney W. Grimes }
4080df8bae1dSRodney W. Grimes 
40818056df6eSAlan Cox /*
40828056df6eSAlan Cox  * Create a process's stack for exec_new_vmspace().  This function is never
40838056df6eSAlan Cox  * asked to wire the newly created stack.
40848056df6eSAlan Cox  */
408594f7e29aSAlan Cox int
408694f7e29aSAlan Cox vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
408794f7e29aSAlan Cox     vm_prot_t prot, vm_prot_t max, int cow)
408894f7e29aSAlan Cox {
40894648ba0aSKonstantin Belousov 	vm_size_t growsize, init_ssize;
40908056df6eSAlan Cox 	rlim_t vmemlim;
40914648ba0aSKonstantin Belousov 	int rv;
40924648ba0aSKonstantin Belousov 
40938056df6eSAlan Cox 	MPASS((map->flags & MAP_WIREFUTURE) == 0);
40944648ba0aSKonstantin Belousov 	growsize = sgrowsiz;
40954648ba0aSKonstantin Belousov 	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
40964648ba0aSKonstantin Belousov 	vm_map_lock(map);
4097f6f6d240SMateusz Guzik 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
40984648ba0aSKonstantin Belousov 	/* If we would blow our VMEM resource limit, no go */
40994648ba0aSKonstantin Belousov 	if (map->size + init_ssize > vmemlim) {
41004648ba0aSKonstantin Belousov 		rv = KERN_NO_SPACE;
41014648ba0aSKonstantin Belousov 		goto out;
41024648ba0aSKonstantin Belousov 	}
4103e1f92cccSAlan Cox 	rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
41044648ba0aSKonstantin Belousov 	    max, cow);
41054648ba0aSKonstantin Belousov out:
41064648ba0aSKonstantin Belousov 	vm_map_unlock(map);
41074648ba0aSKonstantin Belousov 	return (rv);
41084648ba0aSKonstantin Belousov }
41094648ba0aSKonstantin Belousov 
411019f49ad3SKonstantin Belousov static int stack_guard_page = 1;
411119f49ad3SKonstantin Belousov SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
411219f49ad3SKonstantin Belousov     &stack_guard_page, 0,
411319f49ad3SKonstantin Belousov     "Specifies the number of guard pages for a stack that grows");
411419f49ad3SKonstantin Belousov 
41154648ba0aSKonstantin Belousov static int
41164648ba0aSKonstantin Belousov vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
41174648ba0aSKonstantin Belousov     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
41184648ba0aSKonstantin Belousov {
4119d1d3f7e1SDoug Moore 	vm_map_entry_t new_entry, prev_entry;
412019bd0d9cSKonstantin Belousov 	vm_offset_t bot, gap_bot, gap_top, top;
412119f49ad3SKonstantin Belousov 	vm_size_t init_ssize, sgp;
4122fd75d710SMarcel Moolenaar 	int orient, rv;
412394f7e29aSAlan Cox 
4124fd75d710SMarcel Moolenaar 	/*
4125fd75d710SMarcel Moolenaar 	 * The stack orientation is piggybacked with the cow argument.
4126fd75d710SMarcel Moolenaar 	 * Extract it into orient and mask the cow argument so that we
4127fd75d710SMarcel Moolenaar 	 * don't pass it around further.
4128fd75d710SMarcel Moolenaar 	 */
4129fd75d710SMarcel Moolenaar 	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
4130fd75d710SMarcel Moolenaar 	KASSERT(orient != 0, ("No stack grow direction"));
413119bd0d9cSKonstantin Belousov 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
413219bd0d9cSKonstantin Belousov 	    ("bi-dir stack"));
4133fd75d710SMarcel Moolenaar 
413477bc7900SKonstantin Belousov 	if (addrbos < vm_map_min(map) ||
41359410cd7dSKonstantin Belousov 	    addrbos + max_ssize > vm_map_max(map) ||
41369410cd7dSKonstantin Belousov 	    addrbos + max_ssize <= addrbos)
41379410cd7dSKonstantin Belousov 		return (KERN_INVALID_ADDRESS);
41389410cd7dSKonstantin Belousov 	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
41399410cd7dSKonstantin Belousov 	if (sgp >= max_ssize)
41409410cd7dSKonstantin Belousov 		return (KERN_INVALID_ARGUMENT);
4141fd75d710SMarcel Moolenaar 
414219f49ad3SKonstantin Belousov 	init_ssize = growsize;
414319f49ad3SKonstantin Belousov 	if (max_ssize < init_ssize + sgp)
414419f49ad3SKonstantin Belousov 		init_ssize = max_ssize - sgp;
414594f7e29aSAlan Cox 
414694f7e29aSAlan Cox 	/* If addr is already mapped, no go */
4147d1d3f7e1SDoug Moore 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
414894f7e29aSAlan Cox 		return (KERN_NO_SPACE);
4149a69ac174SMatthew Dillon 
4150fd75d710SMarcel Moolenaar 	/*
4151763df3ecSPedro F. Giffuni 	 * If we can't accommodate max_ssize in the current mapping, no go.
415294f7e29aSAlan Cox 	 */
4153d1d3f7e1SDoug Moore 	if (prev_entry->next->start < addrbos + max_ssize)
415494f7e29aSAlan Cox 		return (KERN_NO_SPACE);
415594f7e29aSAlan Cox 
4156fd75d710SMarcel Moolenaar 	/*
4157fd75d710SMarcel Moolenaar 	 * We initially map a stack of only init_ssize.  We will grow as
4158fd75d710SMarcel Moolenaar 	 * needed later.  Depending on the orientation of the stack (i.e.
4159fd75d710SMarcel Moolenaar 	 * the grow direction) we either map at the top of the range, the
4160fd75d710SMarcel Moolenaar 	 * bottom of the range or in the middle.
416194f7e29aSAlan Cox 	 *
4162fd75d710SMarcel Moolenaar 	 * Note: we would normally expect prot and max to be VM_PROT_ALL,
4163fd75d710SMarcel Moolenaar 	 * and cow to be 0.  Possibly we should eliminate these as input
4164fd75d710SMarcel Moolenaar 	 * parameters, and just pass these values here in the insert call.
416594f7e29aSAlan Cox 	 */
416619bd0d9cSKonstantin Belousov 	if (orient == MAP_STACK_GROWS_DOWN) {
4167fd75d710SMarcel Moolenaar 		bot = addrbos + max_ssize - init_ssize;
4168fd75d710SMarcel Moolenaar 		top = bot + init_ssize;
416919bd0d9cSKonstantin Belousov 		gap_bot = addrbos;
417019bd0d9cSKonstantin Belousov 		gap_top = bot;
417119bd0d9cSKonstantin Belousov 	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
417219bd0d9cSKonstantin Belousov 		bot = addrbos;
417319bd0d9cSKonstantin Belousov 		top = bot + init_ssize;
417419bd0d9cSKonstantin Belousov 		gap_bot = top;
417519bd0d9cSKonstantin Belousov 		gap_top = addrbos + max_ssize;
417619bd0d9cSKonstantin Belousov 	}
4177fd75d710SMarcel Moolenaar 	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
417819bd0d9cSKonstantin Belousov 	if (rv != KERN_SUCCESS)
417919bd0d9cSKonstantin Belousov 		return (rv);
4180d1d3f7e1SDoug Moore 	new_entry = prev_entry->next;
418119bd0d9cSKonstantin Belousov 	KASSERT(new_entry->end == top || new_entry->start == bot,
418219bd0d9cSKonstantin Belousov 	    ("Bad entry start/end for new stack entry"));
4183712efe66SAlan Cox 	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
4184712efe66SAlan Cox 	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
4185712efe66SAlan Cox 	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
4186712efe66SAlan Cox 	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
4187712efe66SAlan Cox 	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
4188712efe66SAlan Cox 	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
418919bd0d9cSKonstantin Belousov 	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
419019bd0d9cSKonstantin Belousov 	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
419119bd0d9cSKonstantin Belousov 	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
419219bd0d9cSKonstantin Belousov 	if (rv != KERN_SUCCESS)
419319bd0d9cSKonstantin Belousov 		(void)vm_map_delete(map, bot, top);
419494f7e29aSAlan Cox 	return (rv);
419594f7e29aSAlan Cox }
419694f7e29aSAlan Cox 
419719bd0d9cSKonstantin Belousov /*
419819bd0d9cSKonstantin Belousov  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
419919bd0d9cSKonstantin Belousov  * successfully grow the stack.
420094f7e29aSAlan Cox  */
420119bd0d9cSKonstantin Belousov static int
420219bd0d9cSKonstantin Belousov vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
420394f7e29aSAlan Cox {
420419bd0d9cSKonstantin Belousov 	vm_map_entry_t stack_entry;
420519bd0d9cSKonstantin Belousov 	struct proc *p;
420619bd0d9cSKonstantin Belousov 	struct vmspace *vm;
420719bd0d9cSKonstantin Belousov 	struct ucred *cred;
420819bd0d9cSKonstantin Belousov 	vm_offset_t gap_end, gap_start, grow_start;
4209fa581662SDoug Moore 	vm_size_t grow_amount, guard, max_grow;
42107e19eda4SAndrey Zonov 	rlim_t lmemlim, stacklim, vmemlim;
421119bd0d9cSKonstantin Belousov 	int rv, rv1;
421219bd0d9cSKonstantin Belousov 	bool gap_deleted, grow_down, is_procstack;
42131ba5ad42SEdward Tomasz Napierala #ifdef notyet
42141ba5ad42SEdward Tomasz Napierala 	uint64_t limit;
42151ba5ad42SEdward Tomasz Napierala #endif
4216afcc55f3SEdward Tomasz Napierala #ifdef RACCT
42171ba5ad42SEdward Tomasz Napierala 	int error;
4218afcc55f3SEdward Tomasz Napierala #endif
421923955314SAlfred Perlstein 
422019bd0d9cSKonstantin Belousov 	p = curproc;
422119bd0d9cSKonstantin Belousov 	vm = p->p_vmspace;
4222eb5ea878SKonstantin Belousov 
4223eb5ea878SKonstantin Belousov 	/*
4224eb5ea878SKonstantin Belousov 	 * Disallow stack growth when the access is performed by a
4225eb5ea878SKonstantin Belousov 	 * debugger or AIO daemon.  The reason is that the wrong
4226eb5ea878SKonstantin Belousov 	 * resource limits are applied.
4227eb5ea878SKonstantin Belousov 	 */
422810ae16c7SKonstantin Belousov 	if (p != initproc && (map != &p->p_vmspace->vm_map ||
422910ae16c7SKonstantin Belousov 	    p->p_textvp == NULL))
4230f758aaddSKonstantin Belousov 		return (KERN_FAILURE);
4231eb5ea878SKonstantin Belousov 
423219bd0d9cSKonstantin Belousov 	MPASS(!map->system_map);
423319bd0d9cSKonstantin Belousov 
4234201f03b8SAlan Cox 	guard = stack_guard_page * PAGE_SIZE;
4235f6f6d240SMateusz Guzik 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
4236f6f6d240SMateusz Guzik 	stacklim = lim_cur(curthread, RLIMIT_STACK);
4237f6f6d240SMateusz Guzik 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
423819bd0d9cSKonstantin Belousov retry:
423919bd0d9cSKonstantin Belousov 	/* If addr is not in a hole for a stack grow area, no need to grow. */
4240d1d3f7e1SDoug Moore 	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
424119bd0d9cSKonstantin Belousov 		return (KERN_FAILURE);
424219bd0d9cSKonstantin Belousov 	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
42430cddd8f0SMatthew Dillon 		return (KERN_SUCCESS);
424419bd0d9cSKonstantin Belousov 	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
4245d1d3f7e1SDoug Moore 		stack_entry = gap_entry->next;
424619bd0d9cSKonstantin Belousov 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
424719bd0d9cSKonstantin Belousov 		    stack_entry->start != gap_entry->end)
424819bd0d9cSKonstantin Belousov 			return (KERN_FAILURE);
424919bd0d9cSKonstantin Belousov 		grow_amount = round_page(stack_entry->start - addr);
425019bd0d9cSKonstantin Belousov 		grow_down = true;
425119bd0d9cSKonstantin Belousov 	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
4252d1d3f7e1SDoug Moore 		stack_entry = gap_entry->prev;
425319bd0d9cSKonstantin Belousov 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
425419bd0d9cSKonstantin Belousov 		    stack_entry->end != gap_entry->start)
425519bd0d9cSKonstantin Belousov 			return (KERN_FAILURE);
425619bd0d9cSKonstantin Belousov 		grow_amount = round_page(addr + 1 - stack_entry->end);
425719bd0d9cSKonstantin Belousov 		grow_down = false;
4258b21a0008SMarcel Moolenaar 	} else {
425919bd0d9cSKonstantin Belousov 		return (KERN_FAILURE);
4260b21a0008SMarcel Moolenaar 	}
4261201f03b8SAlan Cox 	max_grow = gap_entry->end - gap_entry->start;
4262201f03b8SAlan Cox 	if (guard > max_grow)
4263201f03b8SAlan Cox 		return (KERN_NO_SPACE);
4264201f03b8SAlan Cox 	max_grow -= guard;
426519bd0d9cSKonstantin Belousov 	if (grow_amount > max_grow)
42660cddd8f0SMatthew Dillon 		return (KERN_NO_SPACE);
426794f7e29aSAlan Cox 
4268b21a0008SMarcel Moolenaar 	/*
4269b21a0008SMarcel Moolenaar 	 * If this is the main process stack, see if we're over the stack
4270b21a0008SMarcel Moolenaar 	 * limit.
427194f7e29aSAlan Cox 	 */
427219bd0d9cSKonstantin Belousov 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
427319bd0d9cSKonstantin Belousov 	    addr < (vm_offset_t)p->p_sysent->sv_usrstack;
427419bd0d9cSKonstantin Belousov 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
42750cddd8f0SMatthew Dillon 		return (KERN_NO_SPACE);
427619bd0d9cSKonstantin Belousov 
4277afcc55f3SEdward Tomasz Napierala #ifdef RACCT
42784b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
42791ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
42804b5c9cf6SEdward Tomasz Napierala 		if (is_procstack && racct_set(p, RACCT_STACK,
42814b5c9cf6SEdward Tomasz Napierala 		    ctob(vm->vm_ssize) + grow_amount)) {
42821ba5ad42SEdward Tomasz Napierala 			PROC_UNLOCK(p);
42831ba5ad42SEdward Tomasz Napierala 			return (KERN_NO_SPACE);
42841ba5ad42SEdward Tomasz Napierala 		}
42851ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
42864b5c9cf6SEdward Tomasz Napierala 	}
4287afcc55f3SEdward Tomasz Napierala #endif
428894f7e29aSAlan Cox 
428919bd0d9cSKonstantin Belousov 	grow_amount = roundup(grow_amount, sgrowsiz);
429019bd0d9cSKonstantin Belousov 	if (grow_amount > max_grow)
429119bd0d9cSKonstantin Belousov 		grow_amount = max_grow;
429291d5354aSJohn Baldwin 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
4293e4826248SAlan Cox 		grow_amount = trunc_page((vm_size_t)stacklim) -
4294e4826248SAlan Cox 		    ctob(vm->vm_ssize);
429594f7e29aSAlan Cox 	}
429619bd0d9cSKonstantin Belousov 
42971ba5ad42SEdward Tomasz Napierala #ifdef notyet
42981ba5ad42SEdward Tomasz Napierala 	PROC_LOCK(p);
42991ba5ad42SEdward Tomasz Napierala 	limit = racct_get_available(p, RACCT_STACK);
43001ba5ad42SEdward Tomasz Napierala 	PROC_UNLOCK(p);
43011ba5ad42SEdward Tomasz Napierala 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
43021ba5ad42SEdward Tomasz Napierala 		grow_amount = limit - ctob(vm->vm_ssize);
43031ba5ad42SEdward Tomasz Napierala #endif
430419bd0d9cSKonstantin Belousov 
430519bd0d9cSKonstantin Belousov 	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
43063ac7d297SAndrey Zonov 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
43077e19eda4SAndrey Zonov 			rv = KERN_NO_SPACE;
43087e19eda4SAndrey Zonov 			goto out;
43097e19eda4SAndrey Zonov 		}
43107e19eda4SAndrey Zonov #ifdef RACCT
43114b5c9cf6SEdward Tomasz Napierala 		if (racct_enable) {
43127e19eda4SAndrey Zonov 			PROC_LOCK(p);
43137e19eda4SAndrey Zonov 			if (racct_set(p, RACCT_MEMLOCK,
43143ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
43157e19eda4SAndrey Zonov 				PROC_UNLOCK(p);
43167e19eda4SAndrey Zonov 				rv = KERN_NO_SPACE;
43177e19eda4SAndrey Zonov 				goto out;
43187e19eda4SAndrey Zonov 			}
43197e19eda4SAndrey Zonov 			PROC_UNLOCK(p);
43204b5c9cf6SEdward Tomasz Napierala 		}
43217e19eda4SAndrey Zonov #endif
43227e19eda4SAndrey Zonov 	}
432319bd0d9cSKonstantin Belousov 
4324a69ac174SMatthew Dillon 	/* If we would blow our VMEM resource limit, no go */
432591d5354aSJohn Baldwin 	if (map->size + grow_amount > vmemlim) {
43261ba5ad42SEdward Tomasz Napierala 		rv = KERN_NO_SPACE;
43271ba5ad42SEdward Tomasz Napierala 		goto out;
4328a69ac174SMatthew Dillon 	}
4329afcc55f3SEdward Tomasz Napierala #ifdef RACCT
43304b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
43311ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
43321ba5ad42SEdward Tomasz Napierala 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
43331ba5ad42SEdward Tomasz Napierala 			PROC_UNLOCK(p);
43341ba5ad42SEdward Tomasz Napierala 			rv = KERN_NO_SPACE;
43351ba5ad42SEdward Tomasz Napierala 			goto out;
43361ba5ad42SEdward Tomasz Napierala 		}
43371ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
43384b5c9cf6SEdward Tomasz Napierala 	}
4339afcc55f3SEdward Tomasz Napierala #endif
4340a69ac174SMatthew Dillon 
434119bd0d9cSKonstantin Belousov 	if (vm_map_lock_upgrade(map)) {
434219bd0d9cSKonstantin Belousov 		gap_entry = NULL;
434319bd0d9cSKonstantin Belousov 		vm_map_lock_read(map);
434419bd0d9cSKonstantin Belousov 		goto retry;
434594f7e29aSAlan Cox 	}
434694f7e29aSAlan Cox 
434719bd0d9cSKonstantin Belousov 	if (grow_down) {
434819bd0d9cSKonstantin Belousov 		grow_start = gap_entry->end - grow_amount;
434919bd0d9cSKonstantin Belousov 		if (gap_entry->start + grow_amount == gap_entry->end) {
435019bd0d9cSKonstantin Belousov 			gap_start = gap_entry->start;
435119bd0d9cSKonstantin Belousov 			gap_end = gap_entry->end;
435219bd0d9cSKonstantin Belousov 			vm_map_entry_delete(map, gap_entry);
435319bd0d9cSKonstantin Belousov 			gap_deleted = true;
435419bd0d9cSKonstantin Belousov 		} else {
435519bd0d9cSKonstantin Belousov 			MPASS(gap_entry->start < gap_entry->end - grow_amount);
4356fa581662SDoug Moore 			vm_map_entry_resize(map, gap_entry, -grow_amount);
435719bd0d9cSKonstantin Belousov 			gap_deleted = false;
435819bd0d9cSKonstantin Belousov 		}
435919bd0d9cSKonstantin Belousov 		rv = vm_map_insert(map, NULL, 0, grow_start,
436019bd0d9cSKonstantin Belousov 		    grow_start + grow_amount,
436119bd0d9cSKonstantin Belousov 		    stack_entry->protection, stack_entry->max_protection,
4362712efe66SAlan Cox 		    MAP_STACK_GROWS_DOWN);
436319bd0d9cSKonstantin Belousov 		if (rv != KERN_SUCCESS) {
436419bd0d9cSKonstantin Belousov 			if (gap_deleted) {
436519bd0d9cSKonstantin Belousov 				rv1 = vm_map_insert(map, NULL, 0, gap_start,
436619bd0d9cSKonstantin Belousov 				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
436719bd0d9cSKonstantin Belousov 				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
436819bd0d9cSKonstantin Belousov 				MPASS(rv1 == KERN_SUCCESS);
43691895f520SDoug Moore 			} else
4370fa581662SDoug Moore 				vm_map_entry_resize(map, gap_entry,
43711895f520SDoug Moore 				    grow_amount);
437294f7e29aSAlan Cox 		}
4373b21a0008SMarcel Moolenaar 	} else {
437419bd0d9cSKonstantin Belousov 		grow_start = stack_entry->end;
4375ef694c1aSEdward Tomasz Napierala 		cred = stack_entry->cred;
4376ef694c1aSEdward Tomasz Napierala 		if (cred == NULL && stack_entry->object.vm_object != NULL)
4377ef694c1aSEdward Tomasz Napierala 			cred = stack_entry->object.vm_object->cred;
4378ef694c1aSEdward Tomasz Napierala 		if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
43793364c323SKonstantin Belousov 			rv = KERN_NO_SPACE;
4380b21a0008SMarcel Moolenaar 		/* Grow the underlying object if applicable. */
43813364c323SKonstantin Belousov 		else if (stack_entry->object.vm_object == NULL ||
4382b21a0008SMarcel Moolenaar 		    vm_object_coalesce(stack_entry->object.vm_object,
438357a21abaSAlan Cox 		    stack_entry->offset,
4384b21a0008SMarcel Moolenaar 		    (vm_size_t)(stack_entry->end - stack_entry->start),
4385fa581662SDoug Moore 		    grow_amount, cred != NULL)) {
4386fa581662SDoug Moore 			if (gap_entry->start + grow_amount == gap_entry->end) {
438719bd0d9cSKonstantin Belousov 				vm_map_entry_delete(map, gap_entry);
4388fa581662SDoug Moore 				vm_map_entry_resize(map, stack_entry,
4389fa581662SDoug Moore 				    grow_amount);
4390fa581662SDoug Moore 			} else {
439119bd0d9cSKonstantin Belousov 				gap_entry->start += grow_amount;
4392fa581662SDoug Moore 				stack_entry->end += grow_amount;
4393fa581662SDoug Moore 			}
439419bd0d9cSKonstantin Belousov 			map->size += grow_amount;
4395b21a0008SMarcel Moolenaar 			rv = KERN_SUCCESS;
4396b21a0008SMarcel Moolenaar 		} else
4397b21a0008SMarcel Moolenaar 			rv = KERN_FAILURE;
4398b21a0008SMarcel Moolenaar 	}
4399b21a0008SMarcel Moolenaar 	if (rv == KERN_SUCCESS && is_procstack)
4400b21a0008SMarcel Moolenaar 		vm->vm_ssize += btoc(grow_amount);
4401b21a0008SMarcel Moolenaar 
4402abd498aaSBruce M Simpson 	/*
4403abd498aaSBruce M Simpson 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
4404abd498aaSBruce M Simpson 	 */
440519bd0d9cSKonstantin Belousov 	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
440654a3a114SMark Johnston 		rv = vm_map_wire_locked(map, grow_start,
440754a3a114SMark Johnston 		    grow_start + grow_amount,
4408212e02c8SKonstantin Belousov 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
440954a3a114SMark Johnston 	}
441019bd0d9cSKonstantin Belousov 	vm_map_lock_downgrade(map);
4411abd498aaSBruce M Simpson 
44121ba5ad42SEdward Tomasz Napierala out:
4413afcc55f3SEdward Tomasz Napierala #ifdef RACCT
44144b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && rv != KERN_SUCCESS) {
44151ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(p);
44161ba5ad42SEdward Tomasz Napierala 		error = racct_set(p, RACCT_VMEM, map->size);
44171ba5ad42SEdward Tomasz Napierala 		KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
44187e19eda4SAndrey Zonov 		if (!old_mlock) {
44197e19eda4SAndrey Zonov 			error = racct_set(p, RACCT_MEMLOCK,
44203ac7d297SAndrey Zonov 			    ptoa(pmap_wired_count(map->pmap)));
44217e19eda4SAndrey Zonov 			KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
44227e19eda4SAndrey Zonov 		}
44231ba5ad42SEdward Tomasz Napierala 	    	error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
44241ba5ad42SEdward Tomasz Napierala 		KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
44251ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(p);
44261ba5ad42SEdward Tomasz Napierala 	}
4427afcc55f3SEdward Tomasz Napierala #endif
44281ba5ad42SEdward Tomasz Napierala 
44290cddd8f0SMatthew Dillon 	return (rv);
443094f7e29aSAlan Cox }
443194f7e29aSAlan Cox 
4432df8bae1dSRodney W. Grimes /*
44335856e12eSJohn Dyson  * Unshare the specified VM space for exec.  If other processes are
44345856e12eSJohn Dyson  * mapped to it, then create a new one.  The new vmspace is null.
44355856e12eSJohn Dyson  */
443689b57fcfSKonstantin Belousov int
44373ebc1248SPeter Wemm vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
44381b40f8c0SMatthew Dillon {
44395856e12eSJohn Dyson 	struct vmspace *oldvmspace = p->p_vmspace;
44405856e12eSJohn Dyson 	struct vmspace *newvmspace;
44415856e12eSJohn Dyson 
44427032434eSKonstantin Belousov 	KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
44437032434eSKonstantin Belousov 	    ("vmspace_exec recursed"));
44446e00f3a3SKonstantin Belousov 	newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit);
444589b57fcfSKonstantin Belousov 	if (newvmspace == NULL)
444689b57fcfSKonstantin Belousov 		return (ENOMEM);
444751ab6c28SAlan Cox 	newvmspace->vm_swrss = oldvmspace->vm_swrss;
44485856e12eSJohn Dyson 	/*
44495856e12eSJohn Dyson 	 * This code is written like this for prototype purposes.  The
44505856e12eSJohn Dyson 	 * goal is to avoid running down the vmspace here, but let the
44515856e12eSJohn Dyson 	 * other process's that are still using the vmspace to finally
44525856e12eSJohn Dyson 	 * run it down.  Even though there is little or no chance of blocking
44535856e12eSJohn Dyson 	 * here, it is a good idea to keep this form for future mods.
44545856e12eSJohn Dyson 	 */
445557051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
44565856e12eSJohn Dyson 	p->p_vmspace = newvmspace;
445757051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
44586617724cSJeff Roberson 	if (p == curthread->td_proc)
4459b40ce416SJulian Elischer 		pmap_activate(curthread);
44607032434eSKonstantin Belousov 	curthread->td_pflags |= TDP_EXECVMSPC;
446189b57fcfSKonstantin Belousov 	return (0);
44625856e12eSJohn Dyson }
44635856e12eSJohn Dyson 
44645856e12eSJohn Dyson /*
44655856e12eSJohn Dyson  * Unshare the specified VM space for forcing COW.  This
44665856e12eSJohn Dyson  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
44675856e12eSJohn Dyson  */
446889b57fcfSKonstantin Belousov int
44691b40f8c0SMatthew Dillon vmspace_unshare(struct proc *p)
44701b40f8c0SMatthew Dillon {
44715856e12eSJohn Dyson 	struct vmspace *oldvmspace = p->p_vmspace;
44725856e12eSJohn Dyson 	struct vmspace *newvmspace;
44733364c323SKonstantin Belousov 	vm_ooffset_t fork_charge;
44745856e12eSJohn Dyson 
44755856e12eSJohn Dyson 	if (oldvmspace->vm_refcnt == 1)
447689b57fcfSKonstantin Belousov 		return (0);
44773364c323SKonstantin Belousov 	fork_charge = 0;
44783364c323SKonstantin Belousov 	newvmspace = vmspace_fork(oldvmspace, &fork_charge);
447989b57fcfSKonstantin Belousov 	if (newvmspace == NULL)
448089b57fcfSKonstantin Belousov 		return (ENOMEM);
4481ef694c1aSEdward Tomasz Napierala 	if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
44823364c323SKonstantin Belousov 		vmspace_free(newvmspace);
44833364c323SKonstantin Belousov 		return (ENOMEM);
44843364c323SKonstantin Belousov 	}
448557051fdcSTor Egge 	PROC_VMSPACE_LOCK(p);
44865856e12eSJohn Dyson 	p->p_vmspace = newvmspace;
448757051fdcSTor Egge 	PROC_VMSPACE_UNLOCK(p);
44886617724cSJeff Roberson 	if (p == curthread->td_proc)
4489b40ce416SJulian Elischer 		pmap_activate(curthread);
4490b56ef1c1SJohn Baldwin 	vmspace_free(oldvmspace);
449189b57fcfSKonstantin Belousov 	return (0);
44925856e12eSJohn Dyson }
44935856e12eSJohn Dyson 
44945856e12eSJohn Dyson /*
4495df8bae1dSRodney W. Grimes  *	vm_map_lookup:
4496df8bae1dSRodney W. Grimes  *
4497df8bae1dSRodney W. Grimes  *	Finds the VM object, offset, and
4498df8bae1dSRodney W. Grimes  *	protection for a given virtual address in the
4499df8bae1dSRodney W. Grimes  *	specified map, assuming a page fault of the
4500df8bae1dSRodney W. Grimes  *	type specified.
4501df8bae1dSRodney W. Grimes  *
4502df8bae1dSRodney W. Grimes  *	Leaves the map in question locked for read; return
4503df8bae1dSRodney W. Grimes  *	values are guaranteed until a vm_map_lookup_done
4504df8bae1dSRodney W. Grimes  *	call is performed.  Note that the map argument
4505df8bae1dSRodney W. Grimes  *	is in/out; the returned map must be used in
4506df8bae1dSRodney W. Grimes  *	the call to vm_map_lookup_done.
4507df8bae1dSRodney W. Grimes  *
4508df8bae1dSRodney W. Grimes  *	A handle (out_entry) is returned for use in
4509df8bae1dSRodney W. Grimes  *	vm_map_lookup_done, to make that fast.
4510df8bae1dSRodney W. Grimes  *
4511df8bae1dSRodney W. Grimes  *	If a lookup is requested with "write protection"
4512df8bae1dSRodney W. Grimes  *	specified, the map may be changed to perform virtual
4513df8bae1dSRodney W. Grimes  *	copying operations, although the data referenced will
4514df8bae1dSRodney W. Grimes  *	remain the same.
4515df8bae1dSRodney W. Grimes  */
4516df8bae1dSRodney W. Grimes int
4517b9dcd593SBruce Evans vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
4518b9dcd593SBruce Evans 	      vm_offset_t vaddr,
451947221757SJohn Dyson 	      vm_prot_t fault_typea,
4520b9dcd593SBruce Evans 	      vm_map_entry_t *out_entry,	/* OUT */
4521b9dcd593SBruce Evans 	      vm_object_t *object,		/* OUT */
4522b9dcd593SBruce Evans 	      vm_pindex_t *pindex,		/* OUT */
4523b9dcd593SBruce Evans 	      vm_prot_t *out_prot,		/* OUT */
45242d8acc0fSJohn Dyson 	      boolean_t *wired)			/* OUT */
4525df8bae1dSRodney W. Grimes {
4526c0877f10SJohn Dyson 	vm_map_entry_t entry;
4527c0877f10SJohn Dyson 	vm_map_t map = *var_map;
4528c0877f10SJohn Dyson 	vm_prot_t prot;
452947221757SJohn Dyson 	vm_prot_t fault_type = fault_typea;
45303364c323SKonstantin Belousov 	vm_object_t eobject;
45310cc74f14SAlan Cox 	vm_size_t size;
4532ef694c1aSEdward Tomasz Napierala 	struct ucred *cred;
4533df8bae1dSRodney W. Grimes 
453419bd0d9cSKonstantin Belousov RetryLookup:
4535df8bae1dSRodney W. Grimes 
4536df8bae1dSRodney W. Grimes 	vm_map_lock_read(map);
4537df8bae1dSRodney W. Grimes 
453819bd0d9cSKonstantin Belousov RetryLookupLocked:
4539df8bae1dSRodney W. Grimes 	/*
45404c3ef59eSAlan Cox 	 * Lookup the faulting address.
4541df8bae1dSRodney W. Grimes 	 */
4542095104acSAlan Cox 	if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
4543095104acSAlan Cox 		vm_map_unlock_read(map);
4544095104acSAlan Cox 		return (KERN_INVALID_ADDRESS);
4545095104acSAlan Cox 	}
4546df8bae1dSRodney W. Grimes 
45474e94f402SAlan Cox 	entry = *out_entry;
4548b7b2aac2SJohn Dyson 
4549df8bae1dSRodney W. Grimes 	/*
4550df8bae1dSRodney W. Grimes 	 * Handle submaps.
4551df8bae1dSRodney W. Grimes 	 */
4552afa07f7eSJohn Dyson 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
4553df8bae1dSRodney W. Grimes 		vm_map_t old_map = map;
4554df8bae1dSRodney W. Grimes 
4555df8bae1dSRodney W. Grimes 		*var_map = map = entry->object.sub_map;
4556df8bae1dSRodney W. Grimes 		vm_map_unlock_read(old_map);
4557df8bae1dSRodney W. Grimes 		goto RetryLookup;
4558df8bae1dSRodney W. Grimes 	}
4559a04c970aSJohn Dyson 
4560df8bae1dSRodney W. Grimes 	/*
45610d94caffSDavid Greenman 	 * Check whether this task is allowed to have this page.
4562df8bae1dSRodney W. Grimes 	 */
4563df8bae1dSRodney W. Grimes 	prot = entry->protection;
456419bd0d9cSKonstantin Belousov 	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
456519bd0d9cSKonstantin Belousov 		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
456619bd0d9cSKonstantin Belousov 		if (prot == VM_PROT_NONE && map != kernel_map &&
456719bd0d9cSKonstantin Belousov 		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
456819bd0d9cSKonstantin Belousov 		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
456919bd0d9cSKonstantin Belousov 		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
457019bd0d9cSKonstantin Belousov 		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
457119bd0d9cSKonstantin Belousov 			goto RetryLookupLocked;
457219bd0d9cSKonstantin Belousov 	}
457319bd0d9cSKonstantin Belousov 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
45742db65ab4SAlan Cox 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
4575095104acSAlan Cox 		vm_map_unlock_read(map);
4576095104acSAlan Cox 		return (KERN_PROTECTION_FAILURE);
457747221757SJohn Dyson 	}
4578b8db9776SKonstantin Belousov 	KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
4579b8db9776SKonstantin Belousov 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
4580b8db9776SKonstantin Belousov 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
4581b8db9776SKonstantin Belousov 	    ("entry %p flags %x", entry, entry->eflags));
45825b3e0257SDag-Erling Smørgrav 	if ((fault_typea & VM_PROT_COPY) != 0 &&
45835b3e0257SDag-Erling Smørgrav 	    (entry->max_protection & VM_PROT_WRITE) == 0 &&
45845b3e0257SDag-Erling Smørgrav 	    (entry->eflags & MAP_ENTRY_COW) == 0) {
45855b3e0257SDag-Erling Smørgrav 		vm_map_unlock_read(map);
45865b3e0257SDag-Erling Smørgrav 		return (KERN_PROTECTION_FAILURE);
45875b3e0257SDag-Erling Smørgrav 	}
4588df8bae1dSRodney W. Grimes 
4589df8bae1dSRodney W. Grimes 	/*
45900d94caffSDavid Greenman 	 * If this page is not pageable, we have to get it for all possible
45910d94caffSDavid Greenman 	 * accesses.
4592df8bae1dSRodney W. Grimes 	 */
459305f0fdd2SPoul-Henning Kamp 	*wired = (entry->wired_count != 0);
459405f0fdd2SPoul-Henning Kamp 	if (*wired)
4595a6d42a0dSAlan Cox 		fault_type = entry->protection;
45963364c323SKonstantin Belousov 	size = entry->end - entry->start;
4597df8bae1dSRodney W. Grimes 	/*
4598df8bae1dSRodney W. Grimes 	 * If the entry was copy-on-write, we either ...
4599df8bae1dSRodney W. Grimes 	 */
4600afa07f7eSJohn Dyson 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
4601df8bae1dSRodney W. Grimes 		/*
46020d94caffSDavid Greenman 		 * If we want to write the page, we may as well handle that
4603ad5fca3bSAlan Cox 		 * now since we've got the map locked.
4604df8bae1dSRodney W. Grimes 		 *
46050d94caffSDavid Greenman 		 * If we don't need to write the page, we just demote the
46060d94caffSDavid Greenman 		 * permissions allowed.
4607df8bae1dSRodney W. Grimes 		 */
4608a6d42a0dSAlan Cox 		if ((fault_type & VM_PROT_WRITE) != 0 ||
4609a6d42a0dSAlan Cox 		    (fault_typea & VM_PROT_COPY) != 0) {
4610df8bae1dSRodney W. Grimes 			/*
46110d94caffSDavid Greenman 			 * Make a new object, and place it in the object
46120d94caffSDavid Greenman 			 * chain.  Note that no new references have appeared
4613ad5fca3bSAlan Cox 			 * -- one just moved from the map to the new
46140d94caffSDavid Greenman 			 * object.
4615df8bae1dSRodney W. Grimes 			 */
461625adb370SBrian Feldman 			if (vm_map_lock_upgrade(map))
4617df8bae1dSRodney W. Grimes 				goto RetryLookup;
46189917e010SAlan Cox 
4619ef694c1aSEdward Tomasz Napierala 			if (entry->cred == NULL) {
46203364c323SKonstantin Belousov 				/*
46213364c323SKonstantin Belousov 				 * The debugger owner is charged for
46223364c323SKonstantin Belousov 				 * the memory.
46233364c323SKonstantin Belousov 				 */
4624ef694c1aSEdward Tomasz Napierala 				cred = curthread->td_ucred;
4625ef694c1aSEdward Tomasz Napierala 				crhold(cred);
4626ef694c1aSEdward Tomasz Napierala 				if (!swap_reserve_by_cred(size, cred)) {
4627ef694c1aSEdward Tomasz Napierala 					crfree(cred);
46283364c323SKonstantin Belousov 					vm_map_unlock(map);
46293364c323SKonstantin Belousov 					return (KERN_RESOURCE_SHORTAGE);
46303364c323SKonstantin Belousov 				}
4631ef694c1aSEdward Tomasz Napierala 				entry->cred = cred;
46323364c323SKonstantin Belousov 			}
46330cc74f14SAlan Cox 			vm_object_shadow(&entry->object.vm_object,
46340cc74f14SAlan Cox 			    &entry->offset, size);
4635afa07f7eSJohn Dyson 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
46363364c323SKonstantin Belousov 			eobject = entry->object.vm_object;
4637ef694c1aSEdward Tomasz Napierala 			if (eobject->cred != NULL) {
46383364c323SKonstantin Belousov 				/*
46393364c323SKonstantin Belousov 				 * The object was not shadowed.
46403364c323SKonstantin Belousov 				 */
4641ef694c1aSEdward Tomasz Napierala 				swap_release_by_cred(size, entry->cred);
4642ef694c1aSEdward Tomasz Napierala 				crfree(entry->cred);
4643ef694c1aSEdward Tomasz Napierala 				entry->cred = NULL;
4644ef694c1aSEdward Tomasz Napierala 			} else if (entry->cred != NULL) {
464589f6b863SAttilio Rao 				VM_OBJECT_WLOCK(eobject);
4646ef694c1aSEdward Tomasz Napierala 				eobject->cred = entry->cred;
46473364c323SKonstantin Belousov 				eobject->charge = size;
464889f6b863SAttilio Rao 				VM_OBJECT_WUNLOCK(eobject);
4649ef694c1aSEdward Tomasz Napierala 				entry->cred = NULL;
46503364c323SKonstantin Belousov 			}
46519917e010SAlan Cox 
46529b09b6c7SMatthew Dillon 			vm_map_lock_downgrade(map);
46530d94caffSDavid Greenman 		} else {
4654df8bae1dSRodney W. Grimes 			/*
46550d94caffSDavid Greenman 			 * We're attempting to read a copy-on-write page --
46560d94caffSDavid Greenman 			 * don't allow writes.
4657df8bae1dSRodney W. Grimes 			 */
46582d8acc0fSJohn Dyson 			prot &= ~VM_PROT_WRITE;
4659df8bae1dSRodney W. Grimes 		}
4660df8bae1dSRodney W. Grimes 	}
46612d8acc0fSJohn Dyson 
4662df8bae1dSRodney W. Grimes 	/*
4663df8bae1dSRodney W. Grimes 	 * Create an object if necessary.
4664df8bae1dSRodney W. Grimes 	 */
46654e71e795SMatthew Dillon 	if (entry->object.vm_object == NULL &&
46664e71e795SMatthew Dillon 	    !map->system_map) {
466725adb370SBrian Feldman 		if (vm_map_lock_upgrade(map))
4668df8bae1dSRodney W. Grimes 			goto RetryLookup;
466924a1cce3SDavid Greenman 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
46703364c323SKonstantin Belousov 		    atop(size));
4671df8bae1dSRodney W. Grimes 		entry->offset = 0;
4672ef694c1aSEdward Tomasz Napierala 		if (entry->cred != NULL) {
467389f6b863SAttilio Rao 			VM_OBJECT_WLOCK(entry->object.vm_object);
4674ef694c1aSEdward Tomasz Napierala 			entry->object.vm_object->cred = entry->cred;
46753364c323SKonstantin Belousov 			entry->object.vm_object->charge = size;
467689f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(entry->object.vm_object);
4677ef694c1aSEdward Tomasz Napierala 			entry->cred = NULL;
46783364c323SKonstantin Belousov 		}
46799b09b6c7SMatthew Dillon 		vm_map_lock_downgrade(map);
4680df8bae1dSRodney W. Grimes 	}
4681b5b40fa6SJohn Dyson 
4682df8bae1dSRodney W. Grimes 	/*
46830d94caffSDavid Greenman 	 * Return the object/offset from this entry.  If the entry was
46840d94caffSDavid Greenman 	 * copy-on-write or empty, it has been fixed up.
4685df8bae1dSRodney W. Grimes 	 */
468610d9120cSKonstantin Belousov 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
4687df8bae1dSRodney W. Grimes 	*object = entry->object.vm_object;
4688df8bae1dSRodney W. Grimes 
4689df8bae1dSRodney W. Grimes 	*out_prot = prot;
4690df8bae1dSRodney W. Grimes 	return (KERN_SUCCESS);
4691df8bae1dSRodney W. Grimes }
4692df8bae1dSRodney W. Grimes 
4693df8bae1dSRodney W. Grimes /*
469419dc5607STor Egge  *	vm_map_lookup_locked:
469519dc5607STor Egge  *
469619dc5607STor Egge  *	Lookup the faulting address.  A version of vm_map_lookup that returns
469719dc5607STor Egge  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
469819dc5607STor Egge  */
469919dc5607STor Egge int
470019dc5607STor Egge vm_map_lookup_locked(vm_map_t *var_map,		/* IN/OUT */
470119dc5607STor Egge 		     vm_offset_t vaddr,
470219dc5607STor Egge 		     vm_prot_t fault_typea,
470319dc5607STor Egge 		     vm_map_entry_t *out_entry,	/* OUT */
470419dc5607STor Egge 		     vm_object_t *object,	/* OUT */
470519dc5607STor Egge 		     vm_pindex_t *pindex,	/* OUT */
470619dc5607STor Egge 		     vm_prot_t *out_prot,	/* OUT */
470719dc5607STor Egge 		     boolean_t *wired)		/* OUT */
470819dc5607STor Egge {
470919dc5607STor Egge 	vm_map_entry_t entry;
471019dc5607STor Egge 	vm_map_t map = *var_map;
471119dc5607STor Egge 	vm_prot_t prot;
471219dc5607STor Egge 	vm_prot_t fault_type = fault_typea;
471319dc5607STor Egge 
471419dc5607STor Egge 	/*
47154c3ef59eSAlan Cox 	 * Lookup the faulting address.
471619dc5607STor Egge 	 */
471719dc5607STor Egge 	if (!vm_map_lookup_entry(map, vaddr, out_entry))
471819dc5607STor Egge 		return (KERN_INVALID_ADDRESS);
471919dc5607STor Egge 
472019dc5607STor Egge 	entry = *out_entry;
472119dc5607STor Egge 
472219dc5607STor Egge 	/*
472319dc5607STor Egge 	 * Fail if the entry refers to a submap.
472419dc5607STor Egge 	 */
472519dc5607STor Egge 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
472619dc5607STor Egge 		return (KERN_FAILURE);
472719dc5607STor Egge 
472819dc5607STor Egge 	/*
472919dc5607STor Egge 	 * Check whether this task is allowed to have this page.
473019dc5607STor Egge 	 */
473119dc5607STor Egge 	prot = entry->protection;
473219dc5607STor Egge 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
473319dc5607STor Egge 	if ((fault_type & prot) != fault_type)
473419dc5607STor Egge 		return (KERN_PROTECTION_FAILURE);
473519dc5607STor Egge 
473619dc5607STor Egge 	/*
473719dc5607STor Egge 	 * If this page is not pageable, we have to get it for all possible
473819dc5607STor Egge 	 * accesses.
473919dc5607STor Egge 	 */
474019dc5607STor Egge 	*wired = (entry->wired_count != 0);
474119dc5607STor Egge 	if (*wired)
4742a6d42a0dSAlan Cox 		fault_type = entry->protection;
474319dc5607STor Egge 
474419dc5607STor Egge 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
474519dc5607STor Egge 		/*
474619dc5607STor Egge 		 * Fail if the entry was copy-on-write for a write fault.
474719dc5607STor Egge 		 */
474819dc5607STor Egge 		if (fault_type & VM_PROT_WRITE)
474919dc5607STor Egge 			return (KERN_FAILURE);
475019dc5607STor Egge 		/*
475119dc5607STor Egge 		 * We're attempting to read a copy-on-write page --
475219dc5607STor Egge 		 * don't allow writes.
475319dc5607STor Egge 		 */
475419dc5607STor Egge 		prot &= ~VM_PROT_WRITE;
475519dc5607STor Egge 	}
475619dc5607STor Egge 
475719dc5607STor Egge 	/*
475819dc5607STor Egge 	 * Fail if an object should be created.
475919dc5607STor Egge 	 */
476019dc5607STor Egge 	if (entry->object.vm_object == NULL && !map->system_map)
476119dc5607STor Egge 		return (KERN_FAILURE);
476219dc5607STor Egge 
476319dc5607STor Egge 	/*
476419dc5607STor Egge 	 * Return the object/offset from this entry.  If the entry was
476519dc5607STor Egge 	 * copy-on-write or empty, it has been fixed up.
476619dc5607STor Egge 	 */
476710d9120cSKonstantin Belousov 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
476819dc5607STor Egge 	*object = entry->object.vm_object;
476919dc5607STor Egge 
477019dc5607STor Egge 	*out_prot = prot;
477119dc5607STor Egge 	return (KERN_SUCCESS);
477219dc5607STor Egge }
477319dc5607STor Egge 
477419dc5607STor Egge /*
4775df8bae1dSRodney W. Grimes  *	vm_map_lookup_done:
4776df8bae1dSRodney W. Grimes  *
4777df8bae1dSRodney W. Grimes  *	Releases locks acquired by a vm_map_lookup
4778df8bae1dSRodney W. Grimes  *	(according to the handle returned by that lookup).
4779df8bae1dSRodney W. Grimes  */
47800d94caffSDavid Greenman void
47811b40f8c0SMatthew Dillon vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
4782df8bae1dSRodney W. Grimes {
4783df8bae1dSRodney W. Grimes 	/*
4784df8bae1dSRodney W. Grimes 	 * Unlock the main-level map
4785df8bae1dSRodney W. Grimes 	 */
4786df8bae1dSRodney W. Grimes 	vm_map_unlock_read(map);
4787df8bae1dSRodney W. Grimes }
4788df8bae1dSRodney W. Grimes 
478919ea042eSKonstantin Belousov vm_offset_t
479019ea042eSKonstantin Belousov vm_map_max_KBI(const struct vm_map *map)
479119ea042eSKonstantin Belousov {
479219ea042eSKonstantin Belousov 
4793f0165b1cSKonstantin Belousov 	return (vm_map_max(map));
479419ea042eSKonstantin Belousov }
479519ea042eSKonstantin Belousov 
479619ea042eSKonstantin Belousov vm_offset_t
479719ea042eSKonstantin Belousov vm_map_min_KBI(const struct vm_map *map)
479819ea042eSKonstantin Belousov {
479919ea042eSKonstantin Belousov 
4800f0165b1cSKonstantin Belousov 	return (vm_map_min(map));
480119ea042eSKonstantin Belousov }
480219ea042eSKonstantin Belousov 
480319ea042eSKonstantin Belousov pmap_t
480419ea042eSKonstantin Belousov vm_map_pmap_KBI(vm_map_t map)
480519ea042eSKonstantin Belousov {
480619ea042eSKonstantin Belousov 
480719ea042eSKonstantin Belousov 	return (map->pmap);
480819ea042eSKonstantin Belousov }
480919ea042eSKonstantin Belousov 
4810c7c34a24SBruce Evans #include "opt_ddb.h"
4811c3cb3e12SDavid Greenman #ifdef DDB
4812c7c34a24SBruce Evans #include <sys/kernel.h>
4813c7c34a24SBruce Evans 
4814c7c34a24SBruce Evans #include <ddb/ddb.h>
4815c7c34a24SBruce Evans 
48162ebcd458SAttilio Rao static void
48172ebcd458SAttilio Rao vm_map_print(vm_map_t map)
4818df8bae1dSRodney W. Grimes {
481977131528SDoug Moore 	vm_map_entry_t entry, prev;
4820c7c34a24SBruce Evans 
4821e5f251d2SAlan Cox 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
4822e5f251d2SAlan Cox 	    (void *)map,
4823101eeb7fSBruce Evans 	    (void *)map->pmap, map->nentries, map->timestamp);
4824df8bae1dSRodney W. Grimes 
4825c7c34a24SBruce Evans 	db_indent += 2;
482677131528SDoug Moore 	for (prev = &map->header; (entry = prev->next) != &map->header;
482777131528SDoug Moore 	    prev = entry) {
482819bd0d9cSKonstantin Belousov 		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
482919bd0d9cSKonstantin Belousov 		    (void *)entry, (void *)entry->start, (void *)entry->end,
483019bd0d9cSKonstantin Belousov 		    entry->eflags);
4831e5f251d2SAlan Cox 		{
4832df8bae1dSRodney W. Grimes 			static char *inheritance_name[4] =
4833df8bae1dSRodney W. Grimes 			{"share", "copy", "none", "donate_copy"};
48340d94caffSDavid Greenman 
483595e5e988SJohn Dyson 			db_iprintf(" prot=%x/%x/%s",
4836df8bae1dSRodney W. Grimes 			    entry->protection,
4837df8bae1dSRodney W. Grimes 			    entry->max_protection,
483877131528SDoug Moore 			    inheritance_name[(int)(unsigned char)
483977131528SDoug Moore 			    entry->inheritance]);
4840df8bae1dSRodney W. Grimes 			if (entry->wired_count != 0)
484195e5e988SJohn Dyson 				db_printf(", wired");
4842df8bae1dSRodney W. Grimes 		}
48439fdfe602SMatthew Dillon 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
4844cd034a5bSMaxime Henrion 			db_printf(", share=%p, offset=0x%jx\n",
48459fdfe602SMatthew Dillon 			    (void *)entry->object.sub_map,
4846cd034a5bSMaxime Henrion 			    (uintmax_t)entry->offset);
484777131528SDoug Moore 			if (prev == &map->header ||
484877131528SDoug Moore 			    prev->object.sub_map !=
484977131528SDoug Moore 				entry->object.sub_map) {
4850c7c34a24SBruce Evans 				db_indent += 2;
48512ebcd458SAttilio Rao 				vm_map_print((vm_map_t)entry->object.sub_map);
4852c7c34a24SBruce Evans 				db_indent -= 2;
4853df8bae1dSRodney W. Grimes 			}
48540d94caffSDavid Greenman 		} else {
4855ef694c1aSEdward Tomasz Napierala 			if (entry->cred != NULL)
4856ef694c1aSEdward Tomasz Napierala 				db_printf(", ruid %d", entry->cred->cr_ruid);
4857cd034a5bSMaxime Henrion 			db_printf(", object=%p, offset=0x%jx",
4858101eeb7fSBruce Evans 			    (void *)entry->object.vm_object,
4859cd034a5bSMaxime Henrion 			    (uintmax_t)entry->offset);
4860ef694c1aSEdward Tomasz Napierala 			if (entry->object.vm_object && entry->object.vm_object->cred)
4861ef694c1aSEdward Tomasz Napierala 				db_printf(", obj ruid %d charge %jx",
4862ef694c1aSEdward Tomasz Napierala 				    entry->object.vm_object->cred->cr_ruid,
48633364c323SKonstantin Belousov 				    (uintmax_t)entry->object.vm_object->charge);
4864afa07f7eSJohn Dyson 			if (entry->eflags & MAP_ENTRY_COW)
4865c7c34a24SBruce Evans 				db_printf(", copy (%s)",
4866afa07f7eSJohn Dyson 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
4867c7c34a24SBruce Evans 			db_printf("\n");
4868df8bae1dSRodney W. Grimes 
486977131528SDoug Moore 			if (prev == &map->header ||
487077131528SDoug Moore 			    prev->object.vm_object !=
487177131528SDoug Moore 				entry->object.vm_object) {
4872c7c34a24SBruce Evans 				db_indent += 2;
4873101eeb7fSBruce Evans 				vm_object_print((db_expr_t)(intptr_t)
4874101eeb7fSBruce Evans 						entry->object.vm_object,
487544bbc3b7SKonstantin Belousov 						0, 0, (char *)0);
4876c7c34a24SBruce Evans 				db_indent -= 2;
4877df8bae1dSRodney W. Grimes 			}
4878df8bae1dSRodney W. Grimes 		}
4879df8bae1dSRodney W. Grimes 	}
4880c7c34a24SBruce Evans 	db_indent -= 2;
4881df8bae1dSRodney W. Grimes }
488295e5e988SJohn Dyson 
48832ebcd458SAttilio Rao DB_SHOW_COMMAND(map, map)
48842ebcd458SAttilio Rao {
48852ebcd458SAttilio Rao 
48862ebcd458SAttilio Rao 	if (!have_addr) {
48872ebcd458SAttilio Rao 		db_printf("usage: show map <addr>\n");
48882ebcd458SAttilio Rao 		return;
48892ebcd458SAttilio Rao 	}
48902ebcd458SAttilio Rao 	vm_map_print((vm_map_t)addr);
48912ebcd458SAttilio Rao }
489295e5e988SJohn Dyson 
489395e5e988SJohn Dyson DB_SHOW_COMMAND(procvm, procvm)
489495e5e988SJohn Dyson {
489595e5e988SJohn Dyson 	struct proc *p;
489695e5e988SJohn Dyson 
489795e5e988SJohn Dyson 	if (have_addr) {
4898a9546a6bSJohn Baldwin 		p = db_lookup_proc(addr);
489995e5e988SJohn Dyson 	} else {
490095e5e988SJohn Dyson 		p = curproc;
490195e5e988SJohn Dyson 	}
490295e5e988SJohn Dyson 
4903ac1e407bSBruce Evans 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
4904ac1e407bSBruce Evans 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
4905b1028ad1SLuoqi Chen 	    (void *)vmspace_pmap(p->p_vmspace));
490695e5e988SJohn Dyson 
49072ebcd458SAttilio Rao 	vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
490895e5e988SJohn Dyson }
490995e5e988SJohn Dyson 
4910c7c34a24SBruce Evans #endif /* DDB */
4911