xref: /freebsd/sys/vm/vm_map.c (revision 1de7b4b805ddbf2429da511c053686ac4591ed89)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * The Mach Operating System project at Carnegie-Mellon University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
35  *
36  *
37  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
38  * All rights reserved.
39  *
40  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
41  *
42  * Permission to use, copy, modify and distribute this software and
43  * its documentation is hereby granted, provided that both the copyright
44  * notice and this permission notice appear in all copies of the
45  * software, derivative works or modified versions, and any portions
46  * thereof, and that both notices appear in supporting documentation.
47  *
48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51  *
52  * Carnegie Mellon requests users of this software to return to
53  *
54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
55  *  School of Computer Science
56  *  Carnegie Mellon University
57  *  Pittsburgh PA 15213-3890
58  *
59  * any improvements or extensions that they make and grant Carnegie the
60  * rights to redistribute these changes.
61  */
62 
63 /*
64  *	Virtual memory mapping module.
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/kernel.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/racct.h>
81 #include <sys/resourcevar.h>
82 #include <sys/rwlock.h>
83 #include <sys/file.h>
84 #include <sys/sysctl.h>
85 #include <sys/sysent.h>
86 #include <sys/shm.h>
87 
88 #include <vm/vm.h>
89 #include <vm/vm_param.h>
90 #include <vm/pmap.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_page.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_pager.h>
95 #include <vm/vm_kern.h>
96 #include <vm/vm_extern.h>
97 #include <vm/vnode_pager.h>
98 #include <vm/swap_pager.h>
99 #include <vm/uma.h>
100 
101 /*
102  *	Virtual memory maps provide for the mapping, protection,
103  *	and sharing of virtual memory objects.  In addition,
104  *	this module provides for an efficient virtual copy of
105  *	memory from one map to another.
106  *
107  *	Synchronization is required prior to most operations.
108  *
109  *	Maps consist of an ordered doubly-linked list of simple
110  *	entries; a self-adjusting binary search tree of these
111  *	entries is used to speed up lookups.
112  *
113  *	Since portions of maps are specified by start/end addresses,
114  *	which may not align with existing map entries, all
115  *	routines merely "clip" entries to these start/end values.
116  *	[That is, an entry is split into two, bordering at a
117  *	start or end value.]  Note that these clippings may not
118  *	always be necessary (as the two resulting entries are then
119  *	not changed); however, the clipping is done for convenience.
120  *
121  *	As mentioned above, virtual copy operations are performed
122  *	by copying VM object references from one map to
123  *	another, and then marking both regions as copy-on-write.
124  */
125 
126 static struct mtx map_sleep_mtx;
127 static uma_zone_t mapentzone;
128 static uma_zone_t kmapentzone;
129 static uma_zone_t mapzone;
130 static uma_zone_t vmspace_zone;
131 static int vmspace_zinit(void *mem, int size, int flags);
132 static int vm_map_zinit(void *mem, int ize, int flags);
133 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
134     vm_offset_t max);
135 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
136 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
137 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
138 static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
139     vm_map_entry_t gap_entry);
140 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
141     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
142 #ifdef INVARIANTS
143 static void vm_map_zdtor(void *mem, int size, void *arg);
144 static void vmspace_zdtor(void *mem, int size, void *arg);
145 #endif
146 static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
147     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
148     int cow);
149 static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
150     vm_offset_t failed_addr);
151 
152 #define	ENTRY_CHARGED(e) ((e)->cred != NULL || \
153     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
154      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
155 
156 /*
157  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
158  * stable.
159  */
160 #define PROC_VMSPACE_LOCK(p) do { } while (0)
161 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
162 
163 /*
164  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
165  *
166  *	Asserts that the starting and ending region
167  *	addresses fall within the valid range of the map.
168  */
169 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
170 		{					\
171 		if (start < vm_map_min(map))		\
172 			start = vm_map_min(map);	\
173 		if (end > vm_map_max(map))		\
174 			end = vm_map_max(map);		\
175 		if (start > end)			\
176 			start = end;			\
177 		}
178 
179 /*
180  *	vm_map_startup:
181  *
182  *	Initialize the vm_map module.  Must be called before
183  *	any other vm_map routines.
184  *
185  *	Map and entry structures are allocated from the general
186  *	purpose memory pool with some exceptions:
187  *
188  *	- The kernel map and kmem submap are allocated statically.
189  *	- Kernel map entries are allocated out of a static pool.
190  *
191  *	These restrictions are necessary since malloc() uses the
192  *	maps and requires map entries.
193  */
194 
195 void
196 vm_map_startup(void)
197 {
198 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
199 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
200 #ifdef INVARIANTS
201 	    vm_map_zdtor,
202 #else
203 	    NULL,
204 #endif
205 	    vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
206 	uma_prealloc(mapzone, MAX_KMAP);
207 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
208 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
209 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
210 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
211 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
212 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
213 #ifdef INVARIANTS
214 	    vmspace_zdtor,
215 #else
216 	    NULL,
217 #endif
218 	    vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
219 }
220 
221 static int
222 vmspace_zinit(void *mem, int size, int flags)
223 {
224 	struct vmspace *vm;
225 
226 	vm = (struct vmspace *)mem;
227 
228 	vm->vm_map.pmap = NULL;
229 	(void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
230 	PMAP_LOCK_INIT(vmspace_pmap(vm));
231 	return (0);
232 }
233 
234 static int
235 vm_map_zinit(void *mem, int size, int flags)
236 {
237 	vm_map_t map;
238 
239 	map = (vm_map_t)mem;
240 	memset(map, 0, sizeof(*map));
241 	mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK);
242 	sx_init(&map->lock, "vm map (user)");
243 	return (0);
244 }
245 
246 #ifdef INVARIANTS
247 static void
248 vmspace_zdtor(void *mem, int size, void *arg)
249 {
250 	struct vmspace *vm;
251 
252 	vm = (struct vmspace *)mem;
253 
254 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
255 }
256 static void
257 vm_map_zdtor(void *mem, int size, void *arg)
258 {
259 	vm_map_t map;
260 
261 	map = (vm_map_t)mem;
262 	KASSERT(map->nentries == 0,
263 	    ("map %p nentries == %d on free.",
264 	    map, map->nentries));
265 	KASSERT(map->size == 0,
266 	    ("map %p size == %lu on free.",
267 	    map, (unsigned long)map->size));
268 }
269 #endif	/* INVARIANTS */
270 
271 /*
272  * Allocate a vmspace structure, including a vm_map and pmap,
273  * and initialize those structures.  The refcnt is set to 1.
274  *
275  * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit().
276  */
277 struct vmspace *
278 vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
279 {
280 	struct vmspace *vm;
281 
282 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
283 
284 	KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
285 
286 	if (pinit == NULL)
287 		pinit = &pmap_pinit;
288 
289 	if (!pinit(vmspace_pmap(vm))) {
290 		uma_zfree(vmspace_zone, vm);
291 		return (NULL);
292 	}
293 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
294 	_vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
295 	vm->vm_refcnt = 1;
296 	vm->vm_shm = NULL;
297 	vm->vm_swrss = 0;
298 	vm->vm_tsize = 0;
299 	vm->vm_dsize = 0;
300 	vm->vm_ssize = 0;
301 	vm->vm_taddr = 0;
302 	vm->vm_daddr = 0;
303 	vm->vm_maxsaddr = 0;
304 	return (vm);
305 }
306 
307 #ifdef RACCT
308 static void
309 vmspace_container_reset(struct proc *p)
310 {
311 
312 	PROC_LOCK(p);
313 	racct_set(p, RACCT_DATA, 0);
314 	racct_set(p, RACCT_STACK, 0);
315 	racct_set(p, RACCT_RSS, 0);
316 	racct_set(p, RACCT_MEMLOCK, 0);
317 	racct_set(p, RACCT_VMEM, 0);
318 	PROC_UNLOCK(p);
319 }
320 #endif
321 
322 static inline void
323 vmspace_dofree(struct vmspace *vm)
324 {
325 
326 	CTR1(KTR_VM, "vmspace_free: %p", vm);
327 
328 	/*
329 	 * Make sure any SysV shm is freed, it might not have been in
330 	 * exit1().
331 	 */
332 	shmexit(vm);
333 
334 	/*
335 	 * Lock the map, to wait out all other references to it.
336 	 * Delete all of the mappings and pages they hold, then call
337 	 * the pmap module to reclaim anything left.
338 	 */
339 	(void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset,
340 	    vm->vm_map.max_offset);
341 
342 	pmap_release(vmspace_pmap(vm));
343 	vm->vm_map.pmap = NULL;
344 	uma_zfree(vmspace_zone, vm);
345 }
346 
347 void
348 vmspace_free(struct vmspace *vm)
349 {
350 
351 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
352 	    "vmspace_free() called");
353 
354 	if (vm->vm_refcnt == 0)
355 		panic("vmspace_free: attempt to free already freed vmspace");
356 
357 	if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
358 		vmspace_dofree(vm);
359 }
360 
361 void
362 vmspace_exitfree(struct proc *p)
363 {
364 	struct vmspace *vm;
365 
366 	PROC_VMSPACE_LOCK(p);
367 	vm = p->p_vmspace;
368 	p->p_vmspace = NULL;
369 	PROC_VMSPACE_UNLOCK(p);
370 	KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
371 	vmspace_free(vm);
372 }
373 
374 void
375 vmspace_exit(struct thread *td)
376 {
377 	int refcnt;
378 	struct vmspace *vm;
379 	struct proc *p;
380 
381 	/*
382 	 * Release user portion of address space.
383 	 * This releases references to vnodes,
384 	 * which could cause I/O if the file has been unlinked.
385 	 * Need to do this early enough that we can still sleep.
386 	 *
387 	 * The last exiting process to reach this point releases as
388 	 * much of the environment as it can. vmspace_dofree() is the
389 	 * slower fallback in case another process had a temporary
390 	 * reference to the vmspace.
391 	 */
392 
393 	p = td->td_proc;
394 	vm = p->p_vmspace;
395 	atomic_add_int(&vmspace0.vm_refcnt, 1);
396 	do {
397 		refcnt = vm->vm_refcnt;
398 		if (refcnt > 1 && p->p_vmspace != &vmspace0) {
399 			/* Switch now since other proc might free vmspace */
400 			PROC_VMSPACE_LOCK(p);
401 			p->p_vmspace = &vmspace0;
402 			PROC_VMSPACE_UNLOCK(p);
403 			pmap_activate(td);
404 		}
405 	} while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
406 	if (refcnt == 1) {
407 		if (p->p_vmspace != vm) {
408 			/* vmspace not yet freed, switch back */
409 			PROC_VMSPACE_LOCK(p);
410 			p->p_vmspace = vm;
411 			PROC_VMSPACE_UNLOCK(p);
412 			pmap_activate(td);
413 		}
414 		pmap_remove_pages(vmspace_pmap(vm));
415 		/* Switch now since this proc will free vmspace */
416 		PROC_VMSPACE_LOCK(p);
417 		p->p_vmspace = &vmspace0;
418 		PROC_VMSPACE_UNLOCK(p);
419 		pmap_activate(td);
420 		vmspace_dofree(vm);
421 	}
422 #ifdef RACCT
423 	if (racct_enable)
424 		vmspace_container_reset(p);
425 #endif
426 }
427 
428 /* Acquire reference to vmspace owned by another process. */
429 
430 struct vmspace *
431 vmspace_acquire_ref(struct proc *p)
432 {
433 	struct vmspace *vm;
434 	int refcnt;
435 
436 	PROC_VMSPACE_LOCK(p);
437 	vm = p->p_vmspace;
438 	if (vm == NULL) {
439 		PROC_VMSPACE_UNLOCK(p);
440 		return (NULL);
441 	}
442 	do {
443 		refcnt = vm->vm_refcnt;
444 		if (refcnt <= 0) { 	/* Avoid 0->1 transition */
445 			PROC_VMSPACE_UNLOCK(p);
446 			return (NULL);
447 		}
448 	} while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1));
449 	if (vm != p->p_vmspace) {
450 		PROC_VMSPACE_UNLOCK(p);
451 		vmspace_free(vm);
452 		return (NULL);
453 	}
454 	PROC_VMSPACE_UNLOCK(p);
455 	return (vm);
456 }
457 
458 /*
459  * Switch between vmspaces in an AIO kernel process.
460  *
461  * The AIO kernel processes switch to and from a user process's
462  * vmspace while performing an I/O operation on behalf of a user
463  * process.  The new vmspace is either the vmspace of a user process
464  * obtained from an active AIO request or the initial vmspace of the
465  * AIO kernel process (when it is idling).  Because user processes
466  * will block to drain any active AIO requests before proceeding in
467  * exit() or execve(), the vmspace reference count for these vmspaces
468  * can never be 0.  This allows for a much simpler implementation than
469  * the loop in vmspace_acquire_ref() above.  Similarly, AIO kernel
470  * processes hold an extra reference on their initial vmspace for the
471  * life of the process so that this guarantee is true for any vmspace
472  * passed as 'newvm'.
473  */
474 void
475 vmspace_switch_aio(struct vmspace *newvm)
476 {
477 	struct vmspace *oldvm;
478 
479 	/* XXX: Need some way to assert that this is an aio daemon. */
480 
481 	KASSERT(newvm->vm_refcnt > 0,
482 	    ("vmspace_switch_aio: newvm unreferenced"));
483 
484 	oldvm = curproc->p_vmspace;
485 	if (oldvm == newvm)
486 		return;
487 
488 	/*
489 	 * Point to the new address space and refer to it.
490 	 */
491 	curproc->p_vmspace = newvm;
492 	atomic_add_int(&newvm->vm_refcnt, 1);
493 
494 	/* Activate the new mapping. */
495 	pmap_activate(curthread);
496 
497 	/* Remove the daemon's reference to the old address space. */
498 	KASSERT(oldvm->vm_refcnt > 1,
499 	    ("vmspace_switch_aio: oldvm dropping last reference"));
500 	vmspace_free(oldvm);
501 }
502 
503 void
504 _vm_map_lock(vm_map_t map, const char *file, int line)
505 {
506 
507 	if (map->system_map)
508 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
509 	else
510 		sx_xlock_(&map->lock, file, line);
511 	map->timestamp++;
512 }
513 
514 static void
515 vm_map_process_deferred(void)
516 {
517 	struct thread *td;
518 	vm_map_entry_t entry, next;
519 	vm_object_t object;
520 
521 	td = curthread;
522 	entry = td->td_map_def_user;
523 	td->td_map_def_user = NULL;
524 	while (entry != NULL) {
525 		next = entry->next;
526 		if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
527 			/*
528 			 * Decrement the object's writemappings and
529 			 * possibly the vnode's v_writecount.
530 			 */
531 			KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
532 			    ("Submap with writecount"));
533 			object = entry->object.vm_object;
534 			KASSERT(object != NULL, ("No object for writecount"));
535 			vnode_pager_release_writecount(object, entry->start,
536 			    entry->end);
537 		}
538 		vm_map_entry_deallocate(entry, FALSE);
539 		entry = next;
540 	}
541 }
542 
543 void
544 _vm_map_unlock(vm_map_t map, const char *file, int line)
545 {
546 
547 	if (map->system_map)
548 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
549 	else {
550 		sx_xunlock_(&map->lock, file, line);
551 		vm_map_process_deferred();
552 	}
553 }
554 
555 void
556 _vm_map_lock_read(vm_map_t map, const char *file, int line)
557 {
558 
559 	if (map->system_map)
560 		mtx_lock_flags_(&map->system_mtx, 0, file, line);
561 	else
562 		sx_slock_(&map->lock, file, line);
563 }
564 
565 void
566 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
567 {
568 
569 	if (map->system_map)
570 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
571 	else {
572 		sx_sunlock_(&map->lock, file, line);
573 		vm_map_process_deferred();
574 	}
575 }
576 
577 int
578 _vm_map_trylock(vm_map_t map, const char *file, int line)
579 {
580 	int error;
581 
582 	error = map->system_map ?
583 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
584 	    !sx_try_xlock_(&map->lock, file, line);
585 	if (error == 0)
586 		map->timestamp++;
587 	return (error == 0);
588 }
589 
590 int
591 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
592 {
593 	int error;
594 
595 	error = map->system_map ?
596 	    !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
597 	    !sx_try_slock_(&map->lock, file, line);
598 	return (error == 0);
599 }
600 
601 /*
602  *	_vm_map_lock_upgrade:	[ internal use only ]
603  *
604  *	Tries to upgrade a read (shared) lock on the specified map to a write
605  *	(exclusive) lock.  Returns the value "0" if the upgrade succeeds and a
606  *	non-zero value if the upgrade fails.  If the upgrade fails, the map is
607  *	returned without a read or write lock held.
608  *
609  *	Requires that the map be read locked.
610  */
611 int
612 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
613 {
614 	unsigned int last_timestamp;
615 
616 	if (map->system_map) {
617 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
618 	} else {
619 		if (!sx_try_upgrade_(&map->lock, file, line)) {
620 			last_timestamp = map->timestamp;
621 			sx_sunlock_(&map->lock, file, line);
622 			vm_map_process_deferred();
623 			/*
624 			 * If the map's timestamp does not change while the
625 			 * map is unlocked, then the upgrade succeeds.
626 			 */
627 			sx_xlock_(&map->lock, file, line);
628 			if (last_timestamp != map->timestamp) {
629 				sx_xunlock_(&map->lock, file, line);
630 				return (1);
631 			}
632 		}
633 	}
634 	map->timestamp++;
635 	return (0);
636 }
637 
638 void
639 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
640 {
641 
642 	if (map->system_map) {
643 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
644 	} else
645 		sx_downgrade_(&map->lock, file, line);
646 }
647 
648 /*
649  *	vm_map_locked:
650  *
651  *	Returns a non-zero value if the caller holds a write (exclusive) lock
652  *	on the specified map and the value "0" otherwise.
653  */
654 int
655 vm_map_locked(vm_map_t map)
656 {
657 
658 	if (map->system_map)
659 		return (mtx_owned(&map->system_mtx));
660 	else
661 		return (sx_xlocked(&map->lock));
662 }
663 
664 #ifdef INVARIANTS
665 static void
666 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
667 {
668 
669 	if (map->system_map)
670 		mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
671 	else
672 		sx_assert_(&map->lock, SA_XLOCKED, file, line);
673 }
674 
675 #define	VM_MAP_ASSERT_LOCKED(map) \
676     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
677 #else
678 #define	VM_MAP_ASSERT_LOCKED(map)
679 #endif
680 
681 /*
682  *	_vm_map_unlock_and_wait:
683  *
684  *	Atomically releases the lock on the specified map and puts the calling
685  *	thread to sleep.  The calling thread will remain asleep until either
686  *	vm_map_wakeup() is performed on the map or the specified timeout is
687  *	exceeded.
688  *
689  *	WARNING!  This function does not perform deferred deallocations of
690  *	objects and map	entries.  Therefore, the calling thread is expected to
691  *	reacquire the map lock after reawakening and later perform an ordinary
692  *	unlock operation, such as vm_map_unlock(), before completing its
693  *	operation on the map.
694  */
695 int
696 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
697 {
698 
699 	mtx_lock(&map_sleep_mtx);
700 	if (map->system_map)
701 		mtx_unlock_flags_(&map->system_mtx, 0, file, line);
702 	else
703 		sx_xunlock_(&map->lock, file, line);
704 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
705 	    timo));
706 }
707 
708 /*
709  *	vm_map_wakeup:
710  *
711  *	Awaken any threads that have slept on the map using
712  *	vm_map_unlock_and_wait().
713  */
714 void
715 vm_map_wakeup(vm_map_t map)
716 {
717 
718 	/*
719 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
720 	 * from being performed (and lost) between the map unlock
721 	 * and the msleep() in _vm_map_unlock_and_wait().
722 	 */
723 	mtx_lock(&map_sleep_mtx);
724 	mtx_unlock(&map_sleep_mtx);
725 	wakeup(&map->root);
726 }
727 
728 void
729 vm_map_busy(vm_map_t map)
730 {
731 
732 	VM_MAP_ASSERT_LOCKED(map);
733 	map->busy++;
734 }
735 
736 void
737 vm_map_unbusy(vm_map_t map)
738 {
739 
740 	VM_MAP_ASSERT_LOCKED(map);
741 	KASSERT(map->busy, ("vm_map_unbusy: not busy"));
742 	if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
743 		vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
744 		wakeup(&map->busy);
745 	}
746 }
747 
748 void
749 vm_map_wait_busy(vm_map_t map)
750 {
751 
752 	VM_MAP_ASSERT_LOCKED(map);
753 	while (map->busy) {
754 		vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
755 		if (map->system_map)
756 			msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
757 		else
758 			sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
759 	}
760 	map->timestamp++;
761 }
762 
763 long
764 vmspace_resident_count(struct vmspace *vmspace)
765 {
766 	return pmap_resident_count(vmspace_pmap(vmspace));
767 }
768 
769 /*
770  *	vm_map_create:
771  *
772  *	Creates and returns a new empty VM map with
773  *	the given physical map structure, and having
774  *	the given lower and upper address bounds.
775  */
776 vm_map_t
777 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
778 {
779 	vm_map_t result;
780 
781 	result = uma_zalloc(mapzone, M_WAITOK);
782 	CTR1(KTR_VM, "vm_map_create: %p", result);
783 	_vm_map_init(result, pmap, min, max);
784 	return (result);
785 }
786 
787 /*
788  * Initialize an existing vm_map structure
789  * such as that in the vmspace structure.
790  */
791 static void
792 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
793 {
794 
795 	map->header.next = map->header.prev = &map->header;
796 	map->needs_wakeup = FALSE;
797 	map->system_map = 0;
798 	map->pmap = pmap;
799 	map->min_offset = min;
800 	map->max_offset = max;
801 	map->flags = 0;
802 	map->root = NULL;
803 	map->timestamp = 0;
804 	map->busy = 0;
805 }
806 
807 void
808 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
809 {
810 
811 	_vm_map_init(map, pmap, min, max);
812 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
813 	sx_init(&map->lock, "user map");
814 }
815 
816 /*
817  *	vm_map_entry_dispose:	[ internal use only ]
818  *
819  *	Inverse of vm_map_entry_create.
820  */
821 static void
822 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
823 {
824 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
825 }
826 
827 /*
828  *	vm_map_entry_create:	[ internal use only ]
829  *
830  *	Allocates a VM map entry for insertion.
831  *	No entry fields are filled in.
832  */
833 static vm_map_entry_t
834 vm_map_entry_create(vm_map_t map)
835 {
836 	vm_map_entry_t new_entry;
837 
838 	if (map->system_map)
839 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
840 	else
841 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
842 	if (new_entry == NULL)
843 		panic("vm_map_entry_create: kernel resources exhausted");
844 	return (new_entry);
845 }
846 
847 /*
848  *	vm_map_entry_set_behavior:
849  *
850  *	Set the expected access behavior, either normal, random, or
851  *	sequential.
852  */
853 static inline void
854 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
855 {
856 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
857 	    (behavior & MAP_ENTRY_BEHAV_MASK);
858 }
859 
860 /*
861  *	vm_map_entry_set_max_free:
862  *
863  *	Set the max_free field in a vm_map_entry.
864  */
865 static inline void
866 vm_map_entry_set_max_free(vm_map_entry_t entry)
867 {
868 
869 	entry->max_free = entry->adj_free;
870 	if (entry->left != NULL && entry->left->max_free > entry->max_free)
871 		entry->max_free = entry->left->max_free;
872 	if (entry->right != NULL && entry->right->max_free > entry->max_free)
873 		entry->max_free = entry->right->max_free;
874 }
875 
876 /*
877  *	vm_map_entry_splay:
878  *
879  *	The Sleator and Tarjan top-down splay algorithm with the
880  *	following variation.  Max_free must be computed bottom-up, so
881  *	on the downward pass, maintain the left and right spines in
882  *	reverse order.  Then, make a second pass up each side to fix
883  *	the pointers and compute max_free.  The time bound is O(log n)
884  *	amortized.
885  *
886  *	The new root is the vm_map_entry containing "addr", or else an
887  *	adjacent entry (lower or higher) if addr is not in the tree.
888  *
889  *	The map must be locked, and leaves it so.
890  *
891  *	Returns: the new root.
892  */
893 static vm_map_entry_t
894 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root)
895 {
896 	vm_map_entry_t llist, rlist;
897 	vm_map_entry_t ltree, rtree;
898 	vm_map_entry_t y;
899 
900 	/* Special case of empty tree. */
901 	if (root == NULL)
902 		return (root);
903 
904 	/*
905 	 * Pass One: Splay down the tree until we find addr or a NULL
906 	 * pointer where addr would go.  llist and rlist are the two
907 	 * sides in reverse order (bottom-up), with llist linked by
908 	 * the right pointer and rlist linked by the left pointer in
909 	 * the vm_map_entry.  Wait until Pass Two to set max_free on
910 	 * the two spines.
911 	 */
912 	llist = NULL;
913 	rlist = NULL;
914 	for (;;) {
915 		/* root is never NULL in here. */
916 		if (addr < root->start) {
917 			y = root->left;
918 			if (y == NULL)
919 				break;
920 			if (addr < y->start && y->left != NULL) {
921 				/* Rotate right and put y on rlist. */
922 				root->left = y->right;
923 				y->right = root;
924 				vm_map_entry_set_max_free(root);
925 				root = y->left;
926 				y->left = rlist;
927 				rlist = y;
928 			} else {
929 				/* Put root on rlist. */
930 				root->left = rlist;
931 				rlist = root;
932 				root = y;
933 			}
934 		} else if (addr >= root->end) {
935 			y = root->right;
936 			if (y == NULL)
937 				break;
938 			if (addr >= y->end && y->right != NULL) {
939 				/* Rotate left and put y on llist. */
940 				root->right = y->left;
941 				y->left = root;
942 				vm_map_entry_set_max_free(root);
943 				root = y->right;
944 				y->right = llist;
945 				llist = y;
946 			} else {
947 				/* Put root on llist. */
948 				root->right = llist;
949 				llist = root;
950 				root = y;
951 			}
952 		} else
953 			break;
954 	}
955 
956 	/*
957 	 * Pass Two: Walk back up the two spines, flip the pointers
958 	 * and set max_free.  The subtrees of the root go at the
959 	 * bottom of llist and rlist.
960 	 */
961 	ltree = root->left;
962 	while (llist != NULL) {
963 		y = llist->right;
964 		llist->right = ltree;
965 		vm_map_entry_set_max_free(llist);
966 		ltree = llist;
967 		llist = y;
968 	}
969 	rtree = root->right;
970 	while (rlist != NULL) {
971 		y = rlist->left;
972 		rlist->left = rtree;
973 		vm_map_entry_set_max_free(rlist);
974 		rtree = rlist;
975 		rlist = y;
976 	}
977 
978 	/*
979 	 * Final assembly: add ltree and rtree as subtrees of root.
980 	 */
981 	root->left = ltree;
982 	root->right = rtree;
983 	vm_map_entry_set_max_free(root);
984 
985 	return (root);
986 }
987 
988 /*
989  *	vm_map_entry_{un,}link:
990  *
991  *	Insert/remove entries from maps.
992  */
993 static void
994 vm_map_entry_link(vm_map_t map,
995 		  vm_map_entry_t after_where,
996 		  vm_map_entry_t entry)
997 {
998 
999 	CTR4(KTR_VM,
1000 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
1001 	    map->nentries, entry, after_where);
1002 	VM_MAP_ASSERT_LOCKED(map);
1003 	KASSERT(after_where == &map->header ||
1004 	    after_where->end <= entry->start,
1005 	    ("vm_map_entry_link: prev end %jx new start %jx overlap",
1006 	    (uintmax_t)after_where->end, (uintmax_t)entry->start));
1007 	KASSERT(after_where->next == &map->header ||
1008 	    entry->end <= after_where->next->start,
1009 	    ("vm_map_entry_link: new end %jx next start %jx overlap",
1010 	    (uintmax_t)entry->end, (uintmax_t)after_where->next->start));
1011 
1012 	map->nentries++;
1013 	entry->prev = after_where;
1014 	entry->next = after_where->next;
1015 	entry->next->prev = entry;
1016 	after_where->next = entry;
1017 
1018 	if (after_where != &map->header) {
1019 		if (after_where != map->root)
1020 			vm_map_entry_splay(after_where->start, map->root);
1021 		entry->right = after_where->right;
1022 		entry->left = after_where;
1023 		after_where->right = NULL;
1024 		after_where->adj_free = entry->start - after_where->end;
1025 		vm_map_entry_set_max_free(after_where);
1026 	} else {
1027 		entry->right = map->root;
1028 		entry->left = NULL;
1029 	}
1030 	entry->adj_free = (entry->next == &map->header ? map->max_offset :
1031 	    entry->next->start) - entry->end;
1032 	vm_map_entry_set_max_free(entry);
1033 	map->root = entry;
1034 }
1035 
1036 static void
1037 vm_map_entry_unlink(vm_map_t map,
1038 		    vm_map_entry_t entry)
1039 {
1040 	vm_map_entry_t next, prev, root;
1041 
1042 	VM_MAP_ASSERT_LOCKED(map);
1043 	if (entry != map->root)
1044 		vm_map_entry_splay(entry->start, map->root);
1045 	if (entry->left == NULL)
1046 		root = entry->right;
1047 	else {
1048 		root = vm_map_entry_splay(entry->start, entry->left);
1049 		root->right = entry->right;
1050 		root->adj_free = (entry->next == &map->header ? map->max_offset :
1051 		    entry->next->start) - root->end;
1052 		vm_map_entry_set_max_free(root);
1053 	}
1054 	map->root = root;
1055 
1056 	prev = entry->prev;
1057 	next = entry->next;
1058 	next->prev = prev;
1059 	prev->next = next;
1060 	map->nentries--;
1061 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
1062 	    map->nentries, entry);
1063 }
1064 
1065 /*
1066  *	vm_map_entry_resize_free:
1067  *
1068  *	Recompute the amount of free space following a vm_map_entry
1069  *	and propagate that value up the tree.  Call this function after
1070  *	resizing a map entry in-place, that is, without a call to
1071  *	vm_map_entry_link() or _unlink().
1072  *
1073  *	The map must be locked, and leaves it so.
1074  */
1075 static void
1076 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry)
1077 {
1078 
1079 	/*
1080 	 * Using splay trees without parent pointers, propagating
1081 	 * max_free up the tree is done by moving the entry to the
1082 	 * root and making the change there.
1083 	 */
1084 	if (entry != map->root)
1085 		map->root = vm_map_entry_splay(entry->start, map->root);
1086 
1087 	entry->adj_free = (entry->next == &map->header ? map->max_offset :
1088 	    entry->next->start) - entry->end;
1089 	vm_map_entry_set_max_free(entry);
1090 }
1091 
1092 /*
1093  *	vm_map_lookup_entry:	[ internal use only ]
1094  *
1095  *	Finds the map entry containing (or
1096  *	immediately preceding) the specified address
1097  *	in the given map; the entry is returned
1098  *	in the "entry" parameter.  The boolean
1099  *	result indicates whether the address is
1100  *	actually contained in the map.
1101  */
1102 boolean_t
1103 vm_map_lookup_entry(
1104 	vm_map_t map,
1105 	vm_offset_t address,
1106 	vm_map_entry_t *entry)	/* OUT */
1107 {
1108 	vm_map_entry_t cur;
1109 	boolean_t locked;
1110 
1111 	/*
1112 	 * If the map is empty, then the map entry immediately preceding
1113 	 * "address" is the map's header.
1114 	 */
1115 	cur = map->root;
1116 	if (cur == NULL)
1117 		*entry = &map->header;
1118 	else if (address >= cur->start && cur->end > address) {
1119 		*entry = cur;
1120 		return (TRUE);
1121 	} else if ((locked = vm_map_locked(map)) ||
1122 	    sx_try_upgrade(&map->lock)) {
1123 		/*
1124 		 * Splay requires a write lock on the map.  However, it only
1125 		 * restructures the binary search tree; it does not otherwise
1126 		 * change the map.  Thus, the map's timestamp need not change
1127 		 * on a temporary upgrade.
1128 		 */
1129 		map->root = cur = vm_map_entry_splay(address, cur);
1130 		if (!locked)
1131 			sx_downgrade(&map->lock);
1132 
1133 		/*
1134 		 * If "address" is contained within a map entry, the new root
1135 		 * is that map entry.  Otherwise, the new root is a map entry
1136 		 * immediately before or after "address".
1137 		 */
1138 		if (address >= cur->start) {
1139 			*entry = cur;
1140 			if (cur->end > address)
1141 				return (TRUE);
1142 		} else
1143 			*entry = cur->prev;
1144 	} else
1145 		/*
1146 		 * Since the map is only locked for read access, perform a
1147 		 * standard binary search tree lookup for "address".
1148 		 */
1149 		for (;;) {
1150 			if (address < cur->start) {
1151 				if (cur->left == NULL) {
1152 					*entry = cur->prev;
1153 					break;
1154 				}
1155 				cur = cur->left;
1156 			} else if (cur->end > address) {
1157 				*entry = cur;
1158 				return (TRUE);
1159 			} else {
1160 				if (cur->right == NULL) {
1161 					*entry = cur;
1162 					break;
1163 				}
1164 				cur = cur->right;
1165 			}
1166 		}
1167 	return (FALSE);
1168 }
1169 
1170 /*
1171  *	vm_map_insert:
1172  *
1173  *	Inserts the given whole VM object into the target
1174  *	map at the specified address range.  The object's
1175  *	size should match that of the address range.
1176  *
1177  *	Requires that the map be locked, and leaves it so.
1178  *
1179  *	If object is non-NULL, ref count must be bumped by caller
1180  *	prior to making call to account for the new entry.
1181  */
1182 int
1183 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1184     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
1185 {
1186 	vm_map_entry_t new_entry, prev_entry, temp_entry;
1187 	struct ucred *cred;
1188 	vm_eflags_t protoeflags;
1189 	vm_inherit_t inheritance;
1190 
1191 	VM_MAP_ASSERT_LOCKED(map);
1192 	KASSERT((object != kmem_object && object != kernel_object) ||
1193 	    (cow & MAP_COPY_ON_WRITE) == 0,
1194 	    ("vm_map_insert: kmem or kernel object and COW"));
1195 	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
1196 	    ("vm_map_insert: paradoxical MAP_NOFAULT request"));
1197 	KASSERT((prot & ~max) == 0,
1198 	    ("prot %#x is not subset of max_prot %#x", prot, max));
1199 
1200 	/*
1201 	 * Check that the start and end points are not bogus.
1202 	 */
1203 	if (start < map->min_offset || end > map->max_offset || start >= end)
1204 		return (KERN_INVALID_ADDRESS);
1205 
1206 	/*
1207 	 * Find the entry prior to the proposed starting address; if it's part
1208 	 * of an existing entry, this range is bogus.
1209 	 */
1210 	if (vm_map_lookup_entry(map, start, &temp_entry))
1211 		return (KERN_NO_SPACE);
1212 
1213 	prev_entry = temp_entry;
1214 
1215 	/*
1216 	 * Assert that the next entry doesn't overlap the end point.
1217 	 */
1218 	if (prev_entry->next != &map->header && prev_entry->next->start < end)
1219 		return (KERN_NO_SPACE);
1220 
1221 	if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
1222 	    max != VM_PROT_NONE))
1223 		return (KERN_INVALID_ARGUMENT);
1224 
1225 	protoeflags = 0;
1226 	if (cow & MAP_COPY_ON_WRITE)
1227 		protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
1228 	if (cow & MAP_NOFAULT)
1229 		protoeflags |= MAP_ENTRY_NOFAULT;
1230 	if (cow & MAP_DISABLE_SYNCER)
1231 		protoeflags |= MAP_ENTRY_NOSYNC;
1232 	if (cow & MAP_DISABLE_COREDUMP)
1233 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
1234 	if (cow & MAP_STACK_GROWS_DOWN)
1235 		protoeflags |= MAP_ENTRY_GROWS_DOWN;
1236 	if (cow & MAP_STACK_GROWS_UP)
1237 		protoeflags |= MAP_ENTRY_GROWS_UP;
1238 	if (cow & MAP_VN_WRITECOUNT)
1239 		protoeflags |= MAP_ENTRY_VN_WRITECNT;
1240 	if ((cow & MAP_CREATE_GUARD) != 0)
1241 		protoeflags |= MAP_ENTRY_GUARD;
1242 	if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
1243 		protoeflags |= MAP_ENTRY_STACK_GAP_DN;
1244 	if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
1245 		protoeflags |= MAP_ENTRY_STACK_GAP_UP;
1246 	if (cow & MAP_INHERIT_SHARE)
1247 		inheritance = VM_INHERIT_SHARE;
1248 	else
1249 		inheritance = VM_INHERIT_DEFAULT;
1250 
1251 	cred = NULL;
1252 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
1253 		goto charged;
1254 	if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
1255 	    ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
1256 		if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
1257 			return (KERN_RESOURCE_SHORTAGE);
1258 		KASSERT(object == NULL ||
1259 		    (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
1260 		    object->cred == NULL,
1261 		    ("overcommit: vm_map_insert o %p", object));
1262 		cred = curthread->td_ucred;
1263 	}
1264 
1265 charged:
1266 	/* Expand the kernel pmap, if necessary. */
1267 	if (map == kernel_map && end > kernel_vm_end)
1268 		pmap_growkernel(end);
1269 	if (object != NULL) {
1270 		/*
1271 		 * OBJ_ONEMAPPING must be cleared unless this mapping
1272 		 * is trivially proven to be the only mapping for any
1273 		 * of the object's pages.  (Object granularity
1274 		 * reference counting is insufficient to recognize
1275 		 * aliases with precision.)
1276 		 */
1277 		VM_OBJECT_WLOCK(object);
1278 		if (object->ref_count > 1 || object->shadow_count != 0)
1279 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
1280 		VM_OBJECT_WUNLOCK(object);
1281 	} else if (prev_entry != &map->header &&
1282 	    prev_entry->eflags == protoeflags &&
1283 	    (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 &&
1284 	    prev_entry->end == start && prev_entry->wired_count == 0 &&
1285 	    (prev_entry->cred == cred ||
1286 	    (prev_entry->object.vm_object != NULL &&
1287 	    prev_entry->object.vm_object->cred == cred)) &&
1288 	    vm_object_coalesce(prev_entry->object.vm_object,
1289 	    prev_entry->offset,
1290 	    (vm_size_t)(prev_entry->end - prev_entry->start),
1291 	    (vm_size_t)(end - prev_entry->end), cred != NULL &&
1292 	    (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
1293 		/*
1294 		 * We were able to extend the object.  Determine if we
1295 		 * can extend the previous map entry to include the
1296 		 * new range as well.
1297 		 */
1298 		if (prev_entry->inheritance == inheritance &&
1299 		    prev_entry->protection == prot &&
1300 		    prev_entry->max_protection == max) {
1301 			if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
1302 				map->size += end - prev_entry->end;
1303 			prev_entry->end = end;
1304 			vm_map_entry_resize_free(map, prev_entry);
1305 			vm_map_simplify_entry(map, prev_entry);
1306 			return (KERN_SUCCESS);
1307 		}
1308 
1309 		/*
1310 		 * If we can extend the object but cannot extend the
1311 		 * map entry, we have to create a new map entry.  We
1312 		 * must bump the ref count on the extended object to
1313 		 * account for it.  object may be NULL.
1314 		 */
1315 		object = prev_entry->object.vm_object;
1316 		offset = prev_entry->offset +
1317 		    (prev_entry->end - prev_entry->start);
1318 		vm_object_reference(object);
1319 		if (cred != NULL && object != NULL && object->cred != NULL &&
1320 		    !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
1321 			/* Object already accounts for this uid. */
1322 			cred = NULL;
1323 		}
1324 	}
1325 	if (cred != NULL)
1326 		crhold(cred);
1327 
1328 	/*
1329 	 * Create a new entry
1330 	 */
1331 	new_entry = vm_map_entry_create(map);
1332 	new_entry->start = start;
1333 	new_entry->end = end;
1334 	new_entry->cred = NULL;
1335 
1336 	new_entry->eflags = protoeflags;
1337 	new_entry->object.vm_object = object;
1338 	new_entry->offset = offset;
1339 
1340 	new_entry->inheritance = inheritance;
1341 	new_entry->protection = prot;
1342 	new_entry->max_protection = max;
1343 	new_entry->wired_count = 0;
1344 	new_entry->wiring_thread = NULL;
1345 	new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
1346 	new_entry->next_read = start;
1347 
1348 	KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
1349 	    ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
1350 	new_entry->cred = cred;
1351 
1352 	/*
1353 	 * Insert the new entry into the list
1354 	 */
1355 	vm_map_entry_link(map, prev_entry, new_entry);
1356 	if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
1357 		map->size += new_entry->end - new_entry->start;
1358 
1359 	/*
1360 	 * Try to coalesce the new entry with both the previous and next
1361 	 * entries in the list.  Previously, we only attempted to coalesce
1362 	 * with the previous entry when object is NULL.  Here, we handle the
1363 	 * other cases, which are less common.
1364 	 */
1365 	vm_map_simplify_entry(map, new_entry);
1366 
1367 	if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
1368 		vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
1369 		    end - start, cow & MAP_PREFAULT_PARTIAL);
1370 	}
1371 
1372 	return (KERN_SUCCESS);
1373 }
1374 
1375 /*
1376  *	vm_map_findspace:
1377  *
1378  *	Find the first fit (lowest VM address) for "length" free bytes
1379  *	beginning at address >= start in the given map.
1380  *
1381  *	In a vm_map_entry, "adj_free" is the amount of free space
1382  *	adjacent (higher address) to this entry, and "max_free" is the
1383  *	maximum amount of contiguous free space in its subtree.  This
1384  *	allows finding a free region in one path down the tree, so
1385  *	O(log n) amortized with splay trees.
1386  *
1387  *	The map must be locked, and leaves it so.
1388  *
1389  *	Returns: 0 on success, and starting address in *addr,
1390  *		 1 if insufficient space.
1391  */
1392 int
1393 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
1394     vm_offset_t *addr)	/* OUT */
1395 {
1396 	vm_map_entry_t entry;
1397 	vm_offset_t st;
1398 
1399 	/*
1400 	 * Request must fit within min/max VM address and must avoid
1401 	 * address wrap.
1402 	 */
1403 	if (start < map->min_offset)
1404 		start = map->min_offset;
1405 	if (start + length > map->max_offset || start + length < start)
1406 		return (1);
1407 
1408 	/* Empty tree means wide open address space. */
1409 	if (map->root == NULL) {
1410 		*addr = start;
1411 		return (0);
1412 	}
1413 
1414 	/*
1415 	 * After splay, if start comes before root node, then there
1416 	 * must be a gap from start to the root.
1417 	 */
1418 	map->root = vm_map_entry_splay(start, map->root);
1419 	if (start + length <= map->root->start) {
1420 		*addr = start;
1421 		return (0);
1422 	}
1423 
1424 	/*
1425 	 * Root is the last node that might begin its gap before
1426 	 * start, and this is the last comparison where address
1427 	 * wrap might be a problem.
1428 	 */
1429 	st = (start > map->root->end) ? start : map->root->end;
1430 	if (length <= map->root->end + map->root->adj_free - st) {
1431 		*addr = st;
1432 		return (0);
1433 	}
1434 
1435 	/* With max_free, can immediately tell if no solution. */
1436 	entry = map->root->right;
1437 	if (entry == NULL || length > entry->max_free)
1438 		return (1);
1439 
1440 	/*
1441 	 * Search the right subtree in the order: left subtree, root,
1442 	 * right subtree (first fit).  The previous splay implies that
1443 	 * all regions in the right subtree have addresses > start.
1444 	 */
1445 	while (entry != NULL) {
1446 		if (entry->left != NULL && entry->left->max_free >= length)
1447 			entry = entry->left;
1448 		else if (entry->adj_free >= length) {
1449 			*addr = entry->end;
1450 			return (0);
1451 		} else
1452 			entry = entry->right;
1453 	}
1454 
1455 	/* Can't get here, so panic if we do. */
1456 	panic("vm_map_findspace: max_free corrupt");
1457 }
1458 
1459 int
1460 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1461     vm_offset_t start, vm_size_t length, vm_prot_t prot,
1462     vm_prot_t max, int cow)
1463 {
1464 	vm_offset_t end;
1465 	int result;
1466 
1467 	end = start + length;
1468 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
1469 	    object == NULL,
1470 	    ("vm_map_fixed: non-NULL backing object for stack"));
1471 	vm_map_lock(map);
1472 	VM_MAP_RANGE_CHECK(map, start, end);
1473 	if ((cow & MAP_CHECK_EXCL) == 0)
1474 		vm_map_delete(map, start, end);
1475 	if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
1476 		result = vm_map_stack_locked(map, start, length, sgrowsiz,
1477 		    prot, max, cow);
1478 	} else {
1479 		result = vm_map_insert(map, object, offset, start, end,
1480 		    prot, max, cow);
1481 	}
1482 	vm_map_unlock(map);
1483 	return (result);
1484 }
1485 
1486 /*
1487  *	vm_map_find finds an unallocated region in the target address
1488  *	map with the given length.  The search is defined to be
1489  *	first-fit from the specified address; the region found is
1490  *	returned in the same parameter.
1491  *
1492  *	If object is non-NULL, ref count must be bumped by caller
1493  *	prior to making call to account for the new entry.
1494  */
1495 int
1496 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1497 	    vm_offset_t *addr,	/* IN/OUT */
1498 	    vm_size_t length, vm_offset_t max_addr, int find_space,
1499 	    vm_prot_t prot, vm_prot_t max, int cow)
1500 {
1501 	vm_offset_t alignment, initial_addr, start;
1502 	int result;
1503 
1504 	KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
1505 	    object == NULL,
1506 	    ("vm_map_find: non-NULL backing object for stack"));
1507 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
1508 	    (object->flags & OBJ_COLORED) == 0))
1509 		find_space = VMFS_ANY_SPACE;
1510 	if (find_space >> 8 != 0) {
1511 		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
1512 		alignment = (vm_offset_t)1 << (find_space >> 8);
1513 	} else
1514 		alignment = 0;
1515 	initial_addr = *addr;
1516 	vm_map_lock(map);
1517 again:
1518 	start = initial_addr;
1519 	do {
1520 		if (find_space != VMFS_NO_SPACE) {
1521 			if (vm_map_findspace(map, start, length, addr) ||
1522 			    (max_addr != 0 && *addr + length > max_addr)) {
1523 				if (find_space == VMFS_OPTIMAL_SPACE) {
1524 					find_space = VMFS_ANY_SPACE;
1525 					goto again;
1526 				}
1527 				vm_map_unlock(map);
1528 				return (KERN_NO_SPACE);
1529 			}
1530 			switch (find_space) {
1531 			case VMFS_SUPER_SPACE:
1532 			case VMFS_OPTIMAL_SPACE:
1533 				pmap_align_superpage(object, offset, addr,
1534 				    length);
1535 				break;
1536 			case VMFS_ANY_SPACE:
1537 				break;
1538 			default:
1539 				if ((*addr & (alignment - 1)) != 0) {
1540 					*addr &= ~(alignment - 1);
1541 					*addr += alignment;
1542 				}
1543 				break;
1544 			}
1545 
1546 			start = *addr;
1547 		}
1548 		if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
1549 			result = vm_map_stack_locked(map, start, length,
1550 			    sgrowsiz, prot, max, cow);
1551 		} else {
1552 			result = vm_map_insert(map, object, offset, start,
1553 			    start + length, prot, max, cow);
1554 		}
1555 	} while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
1556 	    find_space != VMFS_ANY_SPACE);
1557 	vm_map_unlock(map);
1558 	return (result);
1559 }
1560 
1561 int
1562 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1563     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
1564     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
1565     int cow)
1566 {
1567 	vm_offset_t hint;
1568 	int rv;
1569 
1570 	hint = *addr;
1571 	for (;;) {
1572 		rv = vm_map_find(map, object, offset, addr, length, max_addr,
1573 		    find_space, prot, max, cow);
1574 		if (rv == KERN_SUCCESS || min_addr >= hint)
1575 			return (rv);
1576 		*addr = hint = min_addr;
1577 	}
1578 }
1579 
1580 /*
1581  *	vm_map_simplify_entry:
1582  *
1583  *	Simplify the given map entry by merging with either neighbor.  This
1584  *	routine also has the ability to merge with both neighbors.
1585  *
1586  *	The map must be locked.
1587  *
1588  *	This routine guarantees that the passed entry remains valid (though
1589  *	possibly extended).  When merging, this routine may delete one or
1590  *	both neighbors.
1591  */
1592 void
1593 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
1594 {
1595 	vm_map_entry_t next, prev;
1596 	vm_size_t prevsize, esize;
1597 
1598 	if ((entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP |
1599 	    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) != 0)
1600 		return;
1601 
1602 	prev = entry->prev;
1603 	if (prev != &map->header) {
1604 		prevsize = prev->end - prev->start;
1605 		if ( (prev->end == entry->start) &&
1606 		     (prev->object.vm_object == entry->object.vm_object) &&
1607 		     (!prev->object.vm_object ||
1608 			(prev->offset + prevsize == entry->offset)) &&
1609 		     (prev->eflags == entry->eflags) &&
1610 		     (prev->protection == entry->protection) &&
1611 		     (prev->max_protection == entry->max_protection) &&
1612 		     (prev->inheritance == entry->inheritance) &&
1613 		     (prev->wired_count == entry->wired_count) &&
1614 		     (prev->cred == entry->cred)) {
1615 			vm_map_entry_unlink(map, prev);
1616 			entry->start = prev->start;
1617 			entry->offset = prev->offset;
1618 			if (entry->prev != &map->header)
1619 				vm_map_entry_resize_free(map, entry->prev);
1620 
1621 			/*
1622 			 * If the backing object is a vnode object,
1623 			 * vm_object_deallocate() calls vrele().
1624 			 * However, vrele() does not lock the vnode
1625 			 * because the vnode has additional
1626 			 * references.  Thus, the map lock can be kept
1627 			 * without causing a lock-order reversal with
1628 			 * the vnode lock.
1629 			 *
1630 			 * Since we count the number of virtual page
1631 			 * mappings in object->un_pager.vnp.writemappings,
1632 			 * the writemappings value should not be adjusted
1633 			 * when the entry is disposed of.
1634 			 */
1635 			if (prev->object.vm_object)
1636 				vm_object_deallocate(prev->object.vm_object);
1637 			if (prev->cred != NULL)
1638 				crfree(prev->cred);
1639 			vm_map_entry_dispose(map, prev);
1640 		}
1641 	}
1642 
1643 	next = entry->next;
1644 	if (next != &map->header) {
1645 		esize = entry->end - entry->start;
1646 		if ((entry->end == next->start) &&
1647 		    (next->object.vm_object == entry->object.vm_object) &&
1648 		     (!entry->object.vm_object ||
1649 			(entry->offset + esize == next->offset)) &&
1650 		    (next->eflags == entry->eflags) &&
1651 		    (next->protection == entry->protection) &&
1652 		    (next->max_protection == entry->max_protection) &&
1653 		    (next->inheritance == entry->inheritance) &&
1654 		    (next->wired_count == entry->wired_count) &&
1655 		    (next->cred == entry->cred)) {
1656 			vm_map_entry_unlink(map, next);
1657 			entry->end = next->end;
1658 			vm_map_entry_resize_free(map, entry);
1659 
1660 			/*
1661 			 * See comment above.
1662 			 */
1663 			if (next->object.vm_object)
1664 				vm_object_deallocate(next->object.vm_object);
1665 			if (next->cred != NULL)
1666 				crfree(next->cred);
1667 			vm_map_entry_dispose(map, next);
1668 		}
1669 	}
1670 }
1671 /*
1672  *	vm_map_clip_start:	[ internal use only ]
1673  *
1674  *	Asserts that the given entry begins at or after
1675  *	the specified address; if necessary,
1676  *	it splits the entry into two.
1677  */
1678 #define vm_map_clip_start(map, entry, startaddr) \
1679 { \
1680 	if (startaddr > entry->start) \
1681 		_vm_map_clip_start(map, entry, startaddr); \
1682 }
1683 
1684 /*
1685  *	This routine is called only when it is known that
1686  *	the entry must be split.
1687  */
1688 static void
1689 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1690 {
1691 	vm_map_entry_t new_entry;
1692 
1693 	VM_MAP_ASSERT_LOCKED(map);
1694 	KASSERT(entry->end > start && entry->start < start,
1695 	    ("_vm_map_clip_start: invalid clip of entry %p", entry));
1696 
1697 	/*
1698 	 * Split off the front portion -- note that we must insert the new
1699 	 * entry BEFORE this one, so that this entry has the specified
1700 	 * starting address.
1701 	 */
1702 	vm_map_simplify_entry(map, entry);
1703 
1704 	/*
1705 	 * If there is no object backing this entry, we might as well create
1706 	 * one now.  If we defer it, an object can get created after the map
1707 	 * is clipped, and individual objects will be created for the split-up
1708 	 * map.  This is a bit of a hack, but is also about the best place to
1709 	 * put this improvement.
1710 	 */
1711 	if (entry->object.vm_object == NULL && !map->system_map &&
1712 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
1713 		vm_object_t object;
1714 		object = vm_object_allocate(OBJT_DEFAULT,
1715 				atop(entry->end - entry->start));
1716 		entry->object.vm_object = object;
1717 		entry->offset = 0;
1718 		if (entry->cred != NULL) {
1719 			object->cred = entry->cred;
1720 			object->charge = entry->end - entry->start;
1721 			entry->cred = NULL;
1722 		}
1723 	} else if (entry->object.vm_object != NULL &&
1724 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1725 		   entry->cred != NULL) {
1726 		VM_OBJECT_WLOCK(entry->object.vm_object);
1727 		KASSERT(entry->object.vm_object->cred == NULL,
1728 		    ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry));
1729 		entry->object.vm_object->cred = entry->cred;
1730 		entry->object.vm_object->charge = entry->end - entry->start;
1731 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
1732 		entry->cred = NULL;
1733 	}
1734 
1735 	new_entry = vm_map_entry_create(map);
1736 	*new_entry = *entry;
1737 
1738 	new_entry->end = start;
1739 	entry->offset += (start - entry->start);
1740 	entry->start = start;
1741 	if (new_entry->cred != NULL)
1742 		crhold(entry->cred);
1743 
1744 	vm_map_entry_link(map, entry->prev, new_entry);
1745 
1746 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1747 		vm_object_reference(new_entry->object.vm_object);
1748 		/*
1749 		 * The object->un_pager.vnp.writemappings for the
1750 		 * object of MAP_ENTRY_VN_WRITECNT type entry shall be
1751 		 * kept as is here.  The virtual pages are
1752 		 * re-distributed among the clipped entries, so the sum is
1753 		 * left the same.
1754 		 */
1755 	}
1756 }
1757 
1758 /*
1759  *	vm_map_clip_end:	[ internal use only ]
1760  *
1761  *	Asserts that the given entry ends at or before
1762  *	the specified address; if necessary,
1763  *	it splits the entry into two.
1764  */
1765 #define vm_map_clip_end(map, entry, endaddr) \
1766 { \
1767 	if ((endaddr) < (entry->end)) \
1768 		_vm_map_clip_end((map), (entry), (endaddr)); \
1769 }
1770 
1771 /*
1772  *	This routine is called only when it is known that
1773  *	the entry must be split.
1774  */
1775 static void
1776 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1777 {
1778 	vm_map_entry_t new_entry;
1779 
1780 	VM_MAP_ASSERT_LOCKED(map);
1781 	KASSERT(entry->start < end && entry->end > end,
1782 	    ("_vm_map_clip_end: invalid clip of entry %p", entry));
1783 
1784 	/*
1785 	 * If there is no object backing this entry, we might as well create
1786 	 * one now.  If we defer it, an object can get created after the map
1787 	 * is clipped, and individual objects will be created for the split-up
1788 	 * map.  This is a bit of a hack, but is also about the best place to
1789 	 * put this improvement.
1790 	 */
1791 	if (entry->object.vm_object == NULL && !map->system_map &&
1792 	    (entry->eflags & MAP_ENTRY_GUARD) == 0) {
1793 		vm_object_t object;
1794 		object = vm_object_allocate(OBJT_DEFAULT,
1795 				atop(entry->end - entry->start));
1796 		entry->object.vm_object = object;
1797 		entry->offset = 0;
1798 		if (entry->cred != NULL) {
1799 			object->cred = entry->cred;
1800 			object->charge = entry->end - entry->start;
1801 			entry->cred = NULL;
1802 		}
1803 	} else if (entry->object.vm_object != NULL &&
1804 		   ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
1805 		   entry->cred != NULL) {
1806 		VM_OBJECT_WLOCK(entry->object.vm_object);
1807 		KASSERT(entry->object.vm_object->cred == NULL,
1808 		    ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry));
1809 		entry->object.vm_object->cred = entry->cred;
1810 		entry->object.vm_object->charge = entry->end - entry->start;
1811 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
1812 		entry->cred = NULL;
1813 	}
1814 
1815 	/*
1816 	 * Create a new entry and insert it AFTER the specified entry
1817 	 */
1818 	new_entry = vm_map_entry_create(map);
1819 	*new_entry = *entry;
1820 
1821 	new_entry->start = entry->end = end;
1822 	new_entry->offset += (end - entry->start);
1823 	if (new_entry->cred != NULL)
1824 		crhold(entry->cred);
1825 
1826 	vm_map_entry_link(map, entry, new_entry);
1827 
1828 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1829 		vm_object_reference(new_entry->object.vm_object);
1830 	}
1831 }
1832 
1833 /*
1834  *	vm_map_submap:		[ kernel use only ]
1835  *
1836  *	Mark the given range as handled by a subordinate map.
1837  *
1838  *	This range must have been created with vm_map_find,
1839  *	and no other operations may have been performed on this
1840  *	range prior to calling vm_map_submap.
1841  *
1842  *	Only a limited number of operations can be performed
1843  *	within this rage after calling vm_map_submap:
1844  *		vm_fault
1845  *	[Don't try vm_map_copy!]
1846  *
1847  *	To remove a submapping, one must first remove the
1848  *	range from the superior map, and then destroy the
1849  *	submap (if desired).  [Better yet, don't try it.]
1850  */
1851 int
1852 vm_map_submap(
1853 	vm_map_t map,
1854 	vm_offset_t start,
1855 	vm_offset_t end,
1856 	vm_map_t submap)
1857 {
1858 	vm_map_entry_t entry;
1859 	int result = KERN_INVALID_ARGUMENT;
1860 
1861 	vm_map_lock(map);
1862 
1863 	VM_MAP_RANGE_CHECK(map, start, end);
1864 
1865 	if (vm_map_lookup_entry(map, start, &entry)) {
1866 		vm_map_clip_start(map, entry, start);
1867 	} else
1868 		entry = entry->next;
1869 
1870 	vm_map_clip_end(map, entry, end);
1871 
1872 	if ((entry->start == start) && (entry->end == end) &&
1873 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1874 	    (entry->object.vm_object == NULL)) {
1875 		entry->object.sub_map = submap;
1876 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1877 		result = KERN_SUCCESS;
1878 	}
1879 	vm_map_unlock(map);
1880 
1881 	return (result);
1882 }
1883 
1884 /*
1885  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
1886  */
1887 #define	MAX_INIT_PT	96
1888 
1889 /*
1890  *	vm_map_pmap_enter:
1891  *
1892  *	Preload the specified map's pmap with mappings to the specified
1893  *	object's memory-resident pages.  No further physical pages are
1894  *	allocated, and no further virtual pages are retrieved from secondary
1895  *	storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
1896  *	limited number of page mappings are created at the low-end of the
1897  *	specified address range.  (For this purpose, a superpage mapping
1898  *	counts as one page mapping.)  Otherwise, all resident pages within
1899  *	the specified address range are mapped.
1900  */
1901 static void
1902 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
1903     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
1904 {
1905 	vm_offset_t start;
1906 	vm_page_t p, p_start;
1907 	vm_pindex_t mask, psize, threshold, tmpidx;
1908 
1909 	if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
1910 		return;
1911 	VM_OBJECT_RLOCK(object);
1912 	if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
1913 		VM_OBJECT_RUNLOCK(object);
1914 		VM_OBJECT_WLOCK(object);
1915 		if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
1916 			pmap_object_init_pt(map->pmap, addr, object, pindex,
1917 			    size);
1918 			VM_OBJECT_WUNLOCK(object);
1919 			return;
1920 		}
1921 		VM_OBJECT_LOCK_DOWNGRADE(object);
1922 	}
1923 
1924 	psize = atop(size);
1925 	if (psize + pindex > object->size) {
1926 		if (object->size < pindex) {
1927 			VM_OBJECT_RUNLOCK(object);
1928 			return;
1929 		}
1930 		psize = object->size - pindex;
1931 	}
1932 
1933 	start = 0;
1934 	p_start = NULL;
1935 	threshold = MAX_INIT_PT;
1936 
1937 	p = vm_page_find_least(object, pindex);
1938 	/*
1939 	 * Assert: the variable p is either (1) the page with the
1940 	 * least pindex greater than or equal to the parameter pindex
1941 	 * or (2) NULL.
1942 	 */
1943 	for (;
1944 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
1945 	     p = TAILQ_NEXT(p, listq)) {
1946 		/*
1947 		 * don't allow an madvise to blow away our really
1948 		 * free pages allocating pv entries.
1949 		 */
1950 		if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
1951 		    vm_cnt.v_free_count < vm_cnt.v_free_reserved) ||
1952 		    ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
1953 		    tmpidx >= threshold)) {
1954 			psize = tmpidx;
1955 			break;
1956 		}
1957 		if (p->valid == VM_PAGE_BITS_ALL) {
1958 			if (p_start == NULL) {
1959 				start = addr + ptoa(tmpidx);
1960 				p_start = p;
1961 			}
1962 			/* Jump ahead if a superpage mapping is possible. */
1963 			if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
1964 			    (pagesizes[p->psind] - 1)) == 0) {
1965 				mask = atop(pagesizes[p->psind]) - 1;
1966 				if (tmpidx + mask < psize &&
1967 				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
1968 					p += mask;
1969 					threshold += mask;
1970 				}
1971 			}
1972 		} else if (p_start != NULL) {
1973 			pmap_enter_object(map->pmap, start, addr +
1974 			    ptoa(tmpidx), p_start, prot);
1975 			p_start = NULL;
1976 		}
1977 	}
1978 	if (p_start != NULL)
1979 		pmap_enter_object(map->pmap, start, addr + ptoa(psize),
1980 		    p_start, prot);
1981 	VM_OBJECT_RUNLOCK(object);
1982 }
1983 
1984 /*
1985  *	vm_map_protect:
1986  *
1987  *	Sets the protection of the specified address
1988  *	region in the target map.  If "set_max" is
1989  *	specified, the maximum protection is to be set;
1990  *	otherwise, only the current protection is affected.
1991  */
1992 int
1993 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1994 	       vm_prot_t new_prot, boolean_t set_max)
1995 {
1996 	vm_map_entry_t current, entry;
1997 	vm_object_t obj;
1998 	struct ucred *cred;
1999 	vm_prot_t old_prot;
2000 
2001 	if (start == end)
2002 		return (KERN_SUCCESS);
2003 
2004 	vm_map_lock(map);
2005 
2006 	/*
2007 	 * Ensure that we are not concurrently wiring pages.  vm_map_wire() may
2008 	 * need to fault pages into the map and will drop the map lock while
2009 	 * doing so, and the VM object may end up in an inconsistent state if we
2010 	 * update the protection on the map entry in between faults.
2011 	 */
2012 	vm_map_wait_busy(map);
2013 
2014 	VM_MAP_RANGE_CHECK(map, start, end);
2015 
2016 	if (vm_map_lookup_entry(map, start, &entry)) {
2017 		vm_map_clip_start(map, entry, start);
2018 	} else {
2019 		entry = entry->next;
2020 	}
2021 
2022 	/*
2023 	 * Make a first pass to check for protection violations.
2024 	 */
2025 	for (current = entry; current != &map->header && current->start < end;
2026 	    current = current->next) {
2027 		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
2028 			continue;
2029 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
2030 			vm_map_unlock(map);
2031 			return (KERN_INVALID_ARGUMENT);
2032 		}
2033 		if ((new_prot & current->max_protection) != new_prot) {
2034 			vm_map_unlock(map);
2035 			return (KERN_PROTECTION_FAILURE);
2036 		}
2037 	}
2038 
2039 	/*
2040 	 * Do an accounting pass for private read-only mappings that
2041 	 * now will do cow due to allowed write (e.g. debugger sets
2042 	 * breakpoint on text segment)
2043 	 */
2044 	for (current = entry; current != &map->header && current->start < end;
2045 	    current = current->next) {
2046 
2047 		vm_map_clip_end(map, current, end);
2048 
2049 		if (set_max ||
2050 		    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
2051 		    ENTRY_CHARGED(current) ||
2052 		    (current->eflags & MAP_ENTRY_GUARD) != 0) {
2053 			continue;
2054 		}
2055 
2056 		cred = curthread->td_ucred;
2057 		obj = current->object.vm_object;
2058 
2059 		if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
2060 			if (!swap_reserve(current->end - current->start)) {
2061 				vm_map_unlock(map);
2062 				return (KERN_RESOURCE_SHORTAGE);
2063 			}
2064 			crhold(cred);
2065 			current->cred = cred;
2066 			continue;
2067 		}
2068 
2069 		VM_OBJECT_WLOCK(obj);
2070 		if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
2071 			VM_OBJECT_WUNLOCK(obj);
2072 			continue;
2073 		}
2074 
2075 		/*
2076 		 * Charge for the whole object allocation now, since
2077 		 * we cannot distinguish between non-charged and
2078 		 * charged clipped mapping of the same object later.
2079 		 */
2080 		KASSERT(obj->charge == 0,
2081 		    ("vm_map_protect: object %p overcharged (entry %p)",
2082 		    obj, current));
2083 		if (!swap_reserve(ptoa(obj->size))) {
2084 			VM_OBJECT_WUNLOCK(obj);
2085 			vm_map_unlock(map);
2086 			return (KERN_RESOURCE_SHORTAGE);
2087 		}
2088 
2089 		crhold(cred);
2090 		obj->cred = cred;
2091 		obj->charge = ptoa(obj->size);
2092 		VM_OBJECT_WUNLOCK(obj);
2093 	}
2094 
2095 	/*
2096 	 * Go back and fix up protections. [Note that clipping is not
2097 	 * necessary the second time.]
2098 	 */
2099 	for (current = entry; current != &map->header && current->start < end;
2100 	    current = current->next) {
2101 		if ((current->eflags & MAP_ENTRY_GUARD) != 0)
2102 			continue;
2103 
2104 		old_prot = current->protection;
2105 
2106 		if (set_max)
2107 			current->protection =
2108 			    (current->max_protection = new_prot) &
2109 			    old_prot;
2110 		else
2111 			current->protection = new_prot;
2112 
2113 		/*
2114 		 * For user wired map entries, the normal lazy evaluation of
2115 		 * write access upgrades through soft page faults is
2116 		 * undesirable.  Instead, immediately copy any pages that are
2117 		 * copy-on-write and enable write access in the physical map.
2118 		 */
2119 		if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
2120 		    (current->protection & VM_PROT_WRITE) != 0 &&
2121 		    (old_prot & VM_PROT_WRITE) == 0)
2122 			vm_fault_copy_entry(map, map, current, current, NULL);
2123 
2124 		/*
2125 		 * When restricting access, update the physical map.  Worry
2126 		 * about copy-on-write here.
2127 		 */
2128 		if ((old_prot & ~current->protection) != 0) {
2129 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
2130 							VM_PROT_ALL)
2131 			pmap_protect(map->pmap, current->start,
2132 			    current->end,
2133 			    current->protection & MASK(current));
2134 #undef	MASK
2135 		}
2136 		vm_map_simplify_entry(map, current);
2137 	}
2138 	vm_map_unlock(map);
2139 	return (KERN_SUCCESS);
2140 }
2141 
2142 /*
2143  *	vm_map_madvise:
2144  *
2145  *	This routine traverses a processes map handling the madvise
2146  *	system call.  Advisories are classified as either those effecting
2147  *	the vm_map_entry structure, or those effecting the underlying
2148  *	objects.
2149  */
2150 int
2151 vm_map_madvise(
2152 	vm_map_t map,
2153 	vm_offset_t start,
2154 	vm_offset_t end,
2155 	int behav)
2156 {
2157 	vm_map_entry_t current, entry;
2158 	int modify_map = 0;
2159 
2160 	/*
2161 	 * Some madvise calls directly modify the vm_map_entry, in which case
2162 	 * we need to use an exclusive lock on the map and we need to perform
2163 	 * various clipping operations.  Otherwise we only need a read-lock
2164 	 * on the map.
2165 	 */
2166 	switch(behav) {
2167 	case MADV_NORMAL:
2168 	case MADV_SEQUENTIAL:
2169 	case MADV_RANDOM:
2170 	case MADV_NOSYNC:
2171 	case MADV_AUTOSYNC:
2172 	case MADV_NOCORE:
2173 	case MADV_CORE:
2174 		if (start == end)
2175 			return (KERN_SUCCESS);
2176 		modify_map = 1;
2177 		vm_map_lock(map);
2178 		break;
2179 	case MADV_WILLNEED:
2180 	case MADV_DONTNEED:
2181 	case MADV_FREE:
2182 		if (start == end)
2183 			return (KERN_SUCCESS);
2184 		vm_map_lock_read(map);
2185 		break;
2186 	default:
2187 		return (KERN_INVALID_ARGUMENT);
2188 	}
2189 
2190 	/*
2191 	 * Locate starting entry and clip if necessary.
2192 	 */
2193 	VM_MAP_RANGE_CHECK(map, start, end);
2194 
2195 	if (vm_map_lookup_entry(map, start, &entry)) {
2196 		if (modify_map)
2197 			vm_map_clip_start(map, entry, start);
2198 	} else {
2199 		entry = entry->next;
2200 	}
2201 
2202 	if (modify_map) {
2203 		/*
2204 		 * madvise behaviors that are implemented in the vm_map_entry.
2205 		 *
2206 		 * We clip the vm_map_entry so that behavioral changes are
2207 		 * limited to the specified address range.
2208 		 */
2209 		for (current = entry;
2210 		     (current != &map->header) && (current->start < end);
2211 		     current = current->next
2212 		) {
2213 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2214 				continue;
2215 
2216 			vm_map_clip_end(map, current, end);
2217 
2218 			switch (behav) {
2219 			case MADV_NORMAL:
2220 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
2221 				break;
2222 			case MADV_SEQUENTIAL:
2223 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
2224 				break;
2225 			case MADV_RANDOM:
2226 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
2227 				break;
2228 			case MADV_NOSYNC:
2229 				current->eflags |= MAP_ENTRY_NOSYNC;
2230 				break;
2231 			case MADV_AUTOSYNC:
2232 				current->eflags &= ~MAP_ENTRY_NOSYNC;
2233 				break;
2234 			case MADV_NOCORE:
2235 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
2236 				break;
2237 			case MADV_CORE:
2238 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
2239 				break;
2240 			default:
2241 				break;
2242 			}
2243 			vm_map_simplify_entry(map, current);
2244 		}
2245 		vm_map_unlock(map);
2246 	} else {
2247 		vm_pindex_t pstart, pend;
2248 
2249 		/*
2250 		 * madvise behaviors that are implemented in the underlying
2251 		 * vm_object.
2252 		 *
2253 		 * Since we don't clip the vm_map_entry, we have to clip
2254 		 * the vm_object pindex and count.
2255 		 */
2256 		for (current = entry;
2257 		     (current != &map->header) && (current->start < end);
2258 		     current = current->next
2259 		) {
2260 			vm_offset_t useEnd, useStart;
2261 
2262 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
2263 				continue;
2264 
2265 			pstart = OFF_TO_IDX(current->offset);
2266 			pend = pstart + atop(current->end - current->start);
2267 			useStart = current->start;
2268 			useEnd = current->end;
2269 
2270 			if (current->start < start) {
2271 				pstart += atop(start - current->start);
2272 				useStart = start;
2273 			}
2274 			if (current->end > end) {
2275 				pend -= atop(current->end - end);
2276 				useEnd = end;
2277 			}
2278 
2279 			if (pstart >= pend)
2280 				continue;
2281 
2282 			/*
2283 			 * Perform the pmap_advise() before clearing
2284 			 * PGA_REFERENCED in vm_page_advise().  Otherwise, a
2285 			 * concurrent pmap operation, such as pmap_remove(),
2286 			 * could clear a reference in the pmap and set
2287 			 * PGA_REFERENCED on the page before the pmap_advise()
2288 			 * had completed.  Consequently, the page would appear
2289 			 * referenced based upon an old reference that
2290 			 * occurred before this pmap_advise() ran.
2291 			 */
2292 			if (behav == MADV_DONTNEED || behav == MADV_FREE)
2293 				pmap_advise(map->pmap, useStart, useEnd,
2294 				    behav);
2295 
2296 			vm_object_madvise(current->object.vm_object, pstart,
2297 			    pend, behav);
2298 
2299 			/*
2300 			 * Pre-populate paging structures in the
2301 			 * WILLNEED case.  For wired entries, the
2302 			 * paging structures are already populated.
2303 			 */
2304 			if (behav == MADV_WILLNEED &&
2305 			    current->wired_count == 0) {
2306 				vm_map_pmap_enter(map,
2307 				    useStart,
2308 				    current->protection,
2309 				    current->object.vm_object,
2310 				    pstart,
2311 				    ptoa(pend - pstart),
2312 				    MAP_PREFAULT_MADVISE
2313 				);
2314 			}
2315 		}
2316 		vm_map_unlock_read(map);
2317 	}
2318 	return (0);
2319 }
2320 
2321 
2322 /*
2323  *	vm_map_inherit:
2324  *
2325  *	Sets the inheritance of the specified address
2326  *	range in the target map.  Inheritance
2327  *	affects how the map will be shared with
2328  *	child maps at the time of vmspace_fork.
2329  */
2330 int
2331 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
2332 	       vm_inherit_t new_inheritance)
2333 {
2334 	vm_map_entry_t entry;
2335 	vm_map_entry_t temp_entry;
2336 
2337 	switch (new_inheritance) {
2338 	case VM_INHERIT_NONE:
2339 	case VM_INHERIT_COPY:
2340 	case VM_INHERIT_SHARE:
2341 	case VM_INHERIT_ZERO:
2342 		break;
2343 	default:
2344 		return (KERN_INVALID_ARGUMENT);
2345 	}
2346 	if (start == end)
2347 		return (KERN_SUCCESS);
2348 	vm_map_lock(map);
2349 	VM_MAP_RANGE_CHECK(map, start, end);
2350 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
2351 		entry = temp_entry;
2352 		vm_map_clip_start(map, entry, start);
2353 	} else
2354 		entry = temp_entry->next;
2355 	while ((entry != &map->header) && (entry->start < end)) {
2356 		vm_map_clip_end(map, entry, end);
2357 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
2358 		    new_inheritance != VM_INHERIT_ZERO)
2359 			entry->inheritance = new_inheritance;
2360 		vm_map_simplify_entry(map, entry);
2361 		entry = entry->next;
2362 	}
2363 	vm_map_unlock(map);
2364 	return (KERN_SUCCESS);
2365 }
2366 
2367 /*
2368  *	vm_map_unwire:
2369  *
2370  *	Implements both kernel and user unwiring.
2371  */
2372 int
2373 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2374     int flags)
2375 {
2376 	vm_map_entry_t entry, first_entry, tmp_entry;
2377 	vm_offset_t saved_start;
2378 	unsigned int last_timestamp;
2379 	int rv;
2380 	boolean_t need_wakeup, result, user_unwire;
2381 
2382 	if (start == end)
2383 		return (KERN_SUCCESS);
2384 	user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2385 	vm_map_lock(map);
2386 	VM_MAP_RANGE_CHECK(map, start, end);
2387 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
2388 		if (flags & VM_MAP_WIRE_HOLESOK)
2389 			first_entry = first_entry->next;
2390 		else {
2391 			vm_map_unlock(map);
2392 			return (KERN_INVALID_ADDRESS);
2393 		}
2394 	}
2395 	last_timestamp = map->timestamp;
2396 	entry = first_entry;
2397 	while (entry != &map->header && entry->start < end) {
2398 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2399 			/*
2400 			 * We have not yet clipped the entry.
2401 			 */
2402 			saved_start = (start >= entry->start) ? start :
2403 			    entry->start;
2404 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2405 			if (vm_map_unlock_and_wait(map, 0)) {
2406 				/*
2407 				 * Allow interruption of user unwiring?
2408 				 */
2409 			}
2410 			vm_map_lock(map);
2411 			if (last_timestamp+1 != map->timestamp) {
2412 				/*
2413 				 * Look again for the entry because the map was
2414 				 * modified while it was unlocked.
2415 				 * Specifically, the entry may have been
2416 				 * clipped, merged, or deleted.
2417 				 */
2418 				if (!vm_map_lookup_entry(map, saved_start,
2419 				    &tmp_entry)) {
2420 					if (flags & VM_MAP_WIRE_HOLESOK)
2421 						tmp_entry = tmp_entry->next;
2422 					else {
2423 						if (saved_start == start) {
2424 							/*
2425 							 * First_entry has been deleted.
2426 							 */
2427 							vm_map_unlock(map);
2428 							return (KERN_INVALID_ADDRESS);
2429 						}
2430 						end = saved_start;
2431 						rv = KERN_INVALID_ADDRESS;
2432 						goto done;
2433 					}
2434 				}
2435 				if (entry == first_entry)
2436 					first_entry = tmp_entry;
2437 				else
2438 					first_entry = NULL;
2439 				entry = tmp_entry;
2440 			}
2441 			last_timestamp = map->timestamp;
2442 			continue;
2443 		}
2444 		vm_map_clip_start(map, entry, start);
2445 		vm_map_clip_end(map, entry, end);
2446 		/*
2447 		 * Mark the entry in case the map lock is released.  (See
2448 		 * above.)
2449 		 */
2450 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
2451 		    entry->wiring_thread == NULL,
2452 		    ("owned map entry %p", entry));
2453 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2454 		entry->wiring_thread = curthread;
2455 		/*
2456 		 * Check the map for holes in the specified region.
2457 		 * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2458 		 */
2459 		if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
2460 		    (entry->end < end && (entry->next == &map->header ||
2461 		    entry->next->start > entry->end))) {
2462 			end = entry->end;
2463 			rv = KERN_INVALID_ADDRESS;
2464 			goto done;
2465 		}
2466 		/*
2467 		 * If system unwiring, require that the entry is system wired.
2468 		 */
2469 		if (!user_unwire &&
2470 		    vm_map_entry_system_wired_count(entry) == 0) {
2471 			end = entry->end;
2472 			rv = KERN_INVALID_ARGUMENT;
2473 			goto done;
2474 		}
2475 		entry = entry->next;
2476 	}
2477 	rv = KERN_SUCCESS;
2478 done:
2479 	need_wakeup = FALSE;
2480 	if (first_entry == NULL) {
2481 		result = vm_map_lookup_entry(map, start, &first_entry);
2482 		if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2483 			first_entry = first_entry->next;
2484 		else
2485 			KASSERT(result, ("vm_map_unwire: lookup failed"));
2486 	}
2487 	for (entry = first_entry; entry != &map->header && entry->start < end;
2488 	    entry = entry->next) {
2489 		/*
2490 		 * If VM_MAP_WIRE_HOLESOK was specified, an empty
2491 		 * space in the unwired region could have been mapped
2492 		 * while the map lock was dropped for draining
2493 		 * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
2494 		 * could be simultaneously wiring this new mapping
2495 		 * entry.  Detect these cases and skip any entries
2496 		 * marked as in transition by us.
2497 		 */
2498 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
2499 		    entry->wiring_thread != curthread) {
2500 			KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
2501 			    ("vm_map_unwire: !HOLESOK and new/changed entry"));
2502 			continue;
2503 		}
2504 
2505 		if (rv == KERN_SUCCESS && (!user_unwire ||
2506 		    (entry->eflags & MAP_ENTRY_USER_WIRED))) {
2507 			if (user_unwire)
2508 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2509 			if (entry->wired_count == 1)
2510 				vm_map_entry_unwire(map, entry);
2511 			else
2512 				entry->wired_count--;
2513 		}
2514 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
2515 		    ("vm_map_unwire: in-transition flag missing %p", entry));
2516 		KASSERT(entry->wiring_thread == curthread,
2517 		    ("vm_map_unwire: alien wire %p", entry));
2518 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
2519 		entry->wiring_thread = NULL;
2520 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2521 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2522 			need_wakeup = TRUE;
2523 		}
2524 		vm_map_simplify_entry(map, entry);
2525 	}
2526 	vm_map_unlock(map);
2527 	if (need_wakeup)
2528 		vm_map_wakeup(map);
2529 	return (rv);
2530 }
2531 
2532 /*
2533  *	vm_map_wire_entry_failure:
2534  *
2535  *	Handle a wiring failure on the given entry.
2536  *
2537  *	The map should be locked.
2538  */
2539 static void
2540 vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
2541     vm_offset_t failed_addr)
2542 {
2543 
2544 	VM_MAP_ASSERT_LOCKED(map);
2545 	KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
2546 	    entry->wired_count == 1,
2547 	    ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
2548 	KASSERT(failed_addr < entry->end,
2549 	    ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
2550 
2551 	/*
2552 	 * If any pages at the start of this entry were successfully wired,
2553 	 * then unwire them.
2554 	 */
2555 	if (failed_addr > entry->start) {
2556 		pmap_unwire(map->pmap, entry->start, failed_addr);
2557 		vm_object_unwire(entry->object.vm_object, entry->offset,
2558 		    failed_addr - entry->start, PQ_ACTIVE);
2559 	}
2560 
2561 	/*
2562 	 * Assign an out-of-range value to represent the failure to wire this
2563 	 * entry.
2564 	 */
2565 	entry->wired_count = -1;
2566 }
2567 
2568 /*
2569  *	vm_map_wire:
2570  *
2571  *	Implements both kernel and user wiring.
2572  */
2573 int
2574 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
2575     int flags)
2576 {
2577 	vm_map_entry_t entry, first_entry, tmp_entry;
2578 	vm_offset_t faddr, saved_end, saved_start;
2579 	unsigned int last_timestamp;
2580 	int rv;
2581 	boolean_t need_wakeup, result, user_wire;
2582 	vm_prot_t prot;
2583 
2584 	if (start == end)
2585 		return (KERN_SUCCESS);
2586 	prot = 0;
2587 	if (flags & VM_MAP_WIRE_WRITE)
2588 		prot |= VM_PROT_WRITE;
2589 	user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
2590 	vm_map_lock(map);
2591 	VM_MAP_RANGE_CHECK(map, start, end);
2592 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
2593 		if (flags & VM_MAP_WIRE_HOLESOK)
2594 			first_entry = first_entry->next;
2595 		else {
2596 			vm_map_unlock(map);
2597 			return (KERN_INVALID_ADDRESS);
2598 		}
2599 	}
2600 	last_timestamp = map->timestamp;
2601 	entry = first_entry;
2602 	while (entry != &map->header && entry->start < end) {
2603 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
2604 			/*
2605 			 * We have not yet clipped the entry.
2606 			 */
2607 			saved_start = (start >= entry->start) ? start :
2608 			    entry->start;
2609 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2610 			if (vm_map_unlock_and_wait(map, 0)) {
2611 				/*
2612 				 * Allow interruption of user wiring?
2613 				 */
2614 			}
2615 			vm_map_lock(map);
2616 			if (last_timestamp + 1 != map->timestamp) {
2617 				/*
2618 				 * Look again for the entry because the map was
2619 				 * modified while it was unlocked.
2620 				 * Specifically, the entry may have been
2621 				 * clipped, merged, or deleted.
2622 				 */
2623 				if (!vm_map_lookup_entry(map, saved_start,
2624 				    &tmp_entry)) {
2625 					if (flags & VM_MAP_WIRE_HOLESOK)
2626 						tmp_entry = tmp_entry->next;
2627 					else {
2628 						if (saved_start == start) {
2629 							/*
2630 							 * first_entry has been deleted.
2631 							 */
2632 							vm_map_unlock(map);
2633 							return (KERN_INVALID_ADDRESS);
2634 						}
2635 						end = saved_start;
2636 						rv = KERN_INVALID_ADDRESS;
2637 						goto done;
2638 					}
2639 				}
2640 				if (entry == first_entry)
2641 					first_entry = tmp_entry;
2642 				else
2643 					first_entry = NULL;
2644 				entry = tmp_entry;
2645 			}
2646 			last_timestamp = map->timestamp;
2647 			continue;
2648 		}
2649 		vm_map_clip_start(map, entry, start);
2650 		vm_map_clip_end(map, entry, end);
2651 		/*
2652 		 * Mark the entry in case the map lock is released.  (See
2653 		 * above.)
2654 		 */
2655 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
2656 		    entry->wiring_thread == NULL,
2657 		    ("owned map entry %p", entry));
2658 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
2659 		entry->wiring_thread = curthread;
2660 		if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
2661 		    || (entry->protection & prot) != prot) {
2662 			entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
2663 			if ((flags & VM_MAP_WIRE_HOLESOK) == 0) {
2664 				end = entry->end;
2665 				rv = KERN_INVALID_ADDRESS;
2666 				goto done;
2667 			}
2668 			goto next_entry;
2669 		}
2670 		if (entry->wired_count == 0) {
2671 			entry->wired_count++;
2672 			saved_start = entry->start;
2673 			saved_end = entry->end;
2674 
2675 			/*
2676 			 * Release the map lock, relying on the in-transition
2677 			 * mark.  Mark the map busy for fork.
2678 			 */
2679 			vm_map_busy(map);
2680 			vm_map_unlock(map);
2681 
2682 			faddr = saved_start;
2683 			do {
2684 				/*
2685 				 * Simulate a fault to get the page and enter
2686 				 * it into the physical map.
2687 				 */
2688 				if ((rv = vm_fault(map, faddr, VM_PROT_NONE,
2689 				    VM_FAULT_WIRE)) != KERN_SUCCESS)
2690 					break;
2691 			} while ((faddr += PAGE_SIZE) < saved_end);
2692 			vm_map_lock(map);
2693 			vm_map_unbusy(map);
2694 			if (last_timestamp + 1 != map->timestamp) {
2695 				/*
2696 				 * Look again for the entry because the map was
2697 				 * modified while it was unlocked.  The entry
2698 				 * may have been clipped, but NOT merged or
2699 				 * deleted.
2700 				 */
2701 				result = vm_map_lookup_entry(map, saved_start,
2702 				    &tmp_entry);
2703 				KASSERT(result, ("vm_map_wire: lookup failed"));
2704 				if (entry == first_entry)
2705 					first_entry = tmp_entry;
2706 				else
2707 					first_entry = NULL;
2708 				entry = tmp_entry;
2709 				while (entry->end < saved_end) {
2710 					/*
2711 					 * In case of failure, handle entries
2712 					 * that were not fully wired here;
2713 					 * fully wired entries are handled
2714 					 * later.
2715 					 */
2716 					if (rv != KERN_SUCCESS &&
2717 					    faddr < entry->end)
2718 						vm_map_wire_entry_failure(map,
2719 						    entry, faddr);
2720 					entry = entry->next;
2721 				}
2722 			}
2723 			last_timestamp = map->timestamp;
2724 			if (rv != KERN_SUCCESS) {
2725 				vm_map_wire_entry_failure(map, entry, faddr);
2726 				end = entry->end;
2727 				goto done;
2728 			}
2729 		} else if (!user_wire ||
2730 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
2731 			entry->wired_count++;
2732 		}
2733 		/*
2734 		 * Check the map for holes in the specified region.
2735 		 * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
2736 		 */
2737 	next_entry:
2738 		if ((flags & VM_MAP_WIRE_HOLESOK) == 0 &&
2739 		    entry->end < end && (entry->next == &map->header ||
2740 		    entry->next->start > entry->end)) {
2741 			end = entry->end;
2742 			rv = KERN_INVALID_ADDRESS;
2743 			goto done;
2744 		}
2745 		entry = entry->next;
2746 	}
2747 	rv = KERN_SUCCESS;
2748 done:
2749 	need_wakeup = FALSE;
2750 	if (first_entry == NULL) {
2751 		result = vm_map_lookup_entry(map, start, &first_entry);
2752 		if (!result && (flags & VM_MAP_WIRE_HOLESOK))
2753 			first_entry = first_entry->next;
2754 		else
2755 			KASSERT(result, ("vm_map_wire: lookup failed"));
2756 	}
2757 	for (entry = first_entry; entry != &map->header && entry->start < end;
2758 	    entry = entry->next) {
2759 		/*
2760 		 * If VM_MAP_WIRE_HOLESOK was specified, an empty
2761 		 * space in the unwired region could have been mapped
2762 		 * while the map lock was dropped for faulting in the
2763 		 * pages or draining MAP_ENTRY_IN_TRANSITION.
2764 		 * Moreover, another thread could be simultaneously
2765 		 * wiring this new mapping entry.  Detect these cases
2766 		 * and skip any entries marked as in transition not by us.
2767 		 */
2768 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
2769 		    entry->wiring_thread != curthread) {
2770 			KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
2771 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
2772 			continue;
2773 		}
2774 
2775 		if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0)
2776 			goto next_entry_done;
2777 
2778 		if (rv == KERN_SUCCESS) {
2779 			if (user_wire)
2780 				entry->eflags |= MAP_ENTRY_USER_WIRED;
2781 		} else if (entry->wired_count == -1) {
2782 			/*
2783 			 * Wiring failed on this entry.  Thus, unwiring is
2784 			 * unnecessary.
2785 			 */
2786 			entry->wired_count = 0;
2787 		} else if (!user_wire ||
2788 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
2789 			/*
2790 			 * Undo the wiring.  Wiring succeeded on this entry
2791 			 * but failed on a later entry.
2792 			 */
2793 			if (entry->wired_count == 1)
2794 				vm_map_entry_unwire(map, entry);
2795 			else
2796 				entry->wired_count--;
2797 		}
2798 	next_entry_done:
2799 		KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
2800 		    ("vm_map_wire: in-transition flag missing %p", entry));
2801 		KASSERT(entry->wiring_thread == curthread,
2802 		    ("vm_map_wire: alien wire %p", entry));
2803 		entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
2804 		    MAP_ENTRY_WIRE_SKIPPED);
2805 		entry->wiring_thread = NULL;
2806 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
2807 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
2808 			need_wakeup = TRUE;
2809 		}
2810 		vm_map_simplify_entry(map, entry);
2811 	}
2812 	vm_map_unlock(map);
2813 	if (need_wakeup)
2814 		vm_map_wakeup(map);
2815 	return (rv);
2816 }
2817 
2818 /*
2819  * vm_map_sync
2820  *
2821  * Push any dirty cached pages in the address range to their pager.
2822  * If syncio is TRUE, dirty pages are written synchronously.
2823  * If invalidate is TRUE, any cached pages are freed as well.
2824  *
2825  * If the size of the region from start to end is zero, we are
2826  * supposed to flush all modified pages within the region containing
2827  * start.  Unfortunately, a region can be split or coalesced with
2828  * neighboring regions, making it difficult to determine what the
2829  * original region was.  Therefore, we approximate this requirement by
2830  * flushing the current region containing start.
2831  *
2832  * Returns an error if any part of the specified range is not mapped.
2833  */
2834 int
2835 vm_map_sync(
2836 	vm_map_t map,
2837 	vm_offset_t start,
2838 	vm_offset_t end,
2839 	boolean_t syncio,
2840 	boolean_t invalidate)
2841 {
2842 	vm_map_entry_t current;
2843 	vm_map_entry_t entry;
2844 	vm_size_t size;
2845 	vm_object_t object;
2846 	vm_ooffset_t offset;
2847 	unsigned int last_timestamp;
2848 	boolean_t failed;
2849 
2850 	vm_map_lock_read(map);
2851 	VM_MAP_RANGE_CHECK(map, start, end);
2852 	if (!vm_map_lookup_entry(map, start, &entry)) {
2853 		vm_map_unlock_read(map);
2854 		return (KERN_INVALID_ADDRESS);
2855 	} else if (start == end) {
2856 		start = entry->start;
2857 		end = entry->end;
2858 	}
2859 	/*
2860 	 * Make a first pass to check for user-wired memory and holes.
2861 	 */
2862 	for (current = entry; current != &map->header && current->start < end;
2863 	    current = current->next) {
2864 		if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
2865 			vm_map_unlock_read(map);
2866 			return (KERN_INVALID_ARGUMENT);
2867 		}
2868 		if (end > current->end &&
2869 		    (current->next == &map->header ||
2870 			current->end != current->next->start)) {
2871 			vm_map_unlock_read(map);
2872 			return (KERN_INVALID_ADDRESS);
2873 		}
2874 	}
2875 
2876 	if (invalidate)
2877 		pmap_remove(map->pmap, start, end);
2878 	failed = FALSE;
2879 
2880 	/*
2881 	 * Make a second pass, cleaning/uncaching pages from the indicated
2882 	 * objects as we go.
2883 	 */
2884 	for (current = entry; current != &map->header && current->start < end;) {
2885 		offset = current->offset + (start - current->start);
2886 		size = (end <= current->end ? end : current->end) - start;
2887 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
2888 			vm_map_t smap;
2889 			vm_map_entry_t tentry;
2890 			vm_size_t tsize;
2891 
2892 			smap = current->object.sub_map;
2893 			vm_map_lock_read(smap);
2894 			(void) vm_map_lookup_entry(smap, offset, &tentry);
2895 			tsize = tentry->end - offset;
2896 			if (tsize < size)
2897 				size = tsize;
2898 			object = tentry->object.vm_object;
2899 			offset = tentry->offset + (offset - tentry->start);
2900 			vm_map_unlock_read(smap);
2901 		} else {
2902 			object = current->object.vm_object;
2903 		}
2904 		vm_object_reference(object);
2905 		last_timestamp = map->timestamp;
2906 		vm_map_unlock_read(map);
2907 		if (!vm_object_sync(object, offset, size, syncio, invalidate))
2908 			failed = TRUE;
2909 		start += size;
2910 		vm_object_deallocate(object);
2911 		vm_map_lock_read(map);
2912 		if (last_timestamp == map->timestamp ||
2913 		    !vm_map_lookup_entry(map, start, &current))
2914 			current = current->next;
2915 	}
2916 
2917 	vm_map_unlock_read(map);
2918 	return (failed ? KERN_FAILURE : KERN_SUCCESS);
2919 }
2920 
2921 /*
2922  *	vm_map_entry_unwire:	[ internal use only ]
2923  *
2924  *	Make the region specified by this entry pageable.
2925  *
2926  *	The map in question should be locked.
2927  *	[This is the reason for this routine's existence.]
2928  */
2929 static void
2930 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2931 {
2932 
2933 	VM_MAP_ASSERT_LOCKED(map);
2934 	KASSERT(entry->wired_count > 0,
2935 	    ("vm_map_entry_unwire: entry %p isn't wired", entry));
2936 	pmap_unwire(map->pmap, entry->start, entry->end);
2937 	vm_object_unwire(entry->object.vm_object, entry->offset, entry->end -
2938 	    entry->start, PQ_ACTIVE);
2939 	entry->wired_count = 0;
2940 }
2941 
2942 static void
2943 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
2944 {
2945 
2946 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
2947 		vm_object_deallocate(entry->object.vm_object);
2948 	uma_zfree(system_map ? kmapentzone : mapentzone, entry);
2949 }
2950 
2951 /*
2952  *	vm_map_entry_delete:	[ internal use only ]
2953  *
2954  *	Deallocate the given entry from the target map.
2955  */
2956 static void
2957 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2958 {
2959 	vm_object_t object;
2960 	vm_pindex_t offidxstart, offidxend, count, size1;
2961 	vm_size_t size;
2962 
2963 	vm_map_entry_unlink(map, entry);
2964 	object = entry->object.vm_object;
2965 
2966 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
2967 		MPASS(entry->cred == NULL);
2968 		MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
2969 		MPASS(object == NULL);
2970 		vm_map_entry_deallocate(entry, map->system_map);
2971 		return;
2972 	}
2973 
2974 	size = entry->end - entry->start;
2975 	map->size -= size;
2976 
2977 	if (entry->cred != NULL) {
2978 		swap_release_by_cred(size, entry->cred);
2979 		crfree(entry->cred);
2980 	}
2981 
2982 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
2983 	    (object != NULL)) {
2984 		KASSERT(entry->cred == NULL || object->cred == NULL ||
2985 		    (entry->eflags & MAP_ENTRY_NEEDS_COPY),
2986 		    ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
2987 		count = atop(size);
2988 		offidxstart = OFF_TO_IDX(entry->offset);
2989 		offidxend = offidxstart + count;
2990 		VM_OBJECT_WLOCK(object);
2991 		if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT |
2992 		    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
2993 		    object == kernel_object || object == kmem_object)) {
2994 			vm_object_collapse(object);
2995 
2996 			/*
2997 			 * The option OBJPR_NOTMAPPED can be passed here
2998 			 * because vm_map_delete() already performed
2999 			 * pmap_remove() on the only mapping to this range
3000 			 * of pages.
3001 			 */
3002 			vm_object_page_remove(object, offidxstart, offidxend,
3003 			    OBJPR_NOTMAPPED);
3004 			if (object->type == OBJT_SWAP)
3005 				swap_pager_freespace(object, offidxstart,
3006 				    count);
3007 			if (offidxend >= object->size &&
3008 			    offidxstart < object->size) {
3009 				size1 = object->size;
3010 				object->size = offidxstart;
3011 				if (object->cred != NULL) {
3012 					size1 -= object->size;
3013 					KASSERT(object->charge >= ptoa(size1),
3014 					    ("object %p charge < 0", object));
3015 					swap_release_by_cred(ptoa(size1),
3016 					    object->cred);
3017 					object->charge -= ptoa(size1);
3018 				}
3019 			}
3020 		}
3021 		VM_OBJECT_WUNLOCK(object);
3022 	} else
3023 		entry->object.vm_object = NULL;
3024 	if (map->system_map)
3025 		vm_map_entry_deallocate(entry, TRUE);
3026 	else {
3027 		entry->next = curthread->td_map_def_user;
3028 		curthread->td_map_def_user = entry;
3029 	}
3030 }
3031 
3032 /*
3033  *	vm_map_delete:	[ internal use only ]
3034  *
3035  *	Deallocates the given address range from the target
3036  *	map.
3037  */
3038 int
3039 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
3040 {
3041 	vm_map_entry_t entry;
3042 	vm_map_entry_t first_entry;
3043 
3044 	VM_MAP_ASSERT_LOCKED(map);
3045 	if (start == end)
3046 		return (KERN_SUCCESS);
3047 
3048 	/*
3049 	 * Find the start of the region, and clip it
3050 	 */
3051 	if (!vm_map_lookup_entry(map, start, &first_entry))
3052 		entry = first_entry->next;
3053 	else {
3054 		entry = first_entry;
3055 		vm_map_clip_start(map, entry, start);
3056 	}
3057 
3058 	/*
3059 	 * Step through all entries in this region
3060 	 */
3061 	while ((entry != &map->header) && (entry->start < end)) {
3062 		vm_map_entry_t next;
3063 
3064 		/*
3065 		 * Wait for wiring or unwiring of an entry to complete.
3066 		 * Also wait for any system wirings to disappear on
3067 		 * user maps.
3068 		 */
3069 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
3070 		    (vm_map_pmap(map) != kernel_pmap &&
3071 		    vm_map_entry_system_wired_count(entry) != 0)) {
3072 			unsigned int last_timestamp;
3073 			vm_offset_t saved_start;
3074 			vm_map_entry_t tmp_entry;
3075 
3076 			saved_start = entry->start;
3077 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
3078 			last_timestamp = map->timestamp;
3079 			(void) vm_map_unlock_and_wait(map, 0);
3080 			vm_map_lock(map);
3081 			if (last_timestamp + 1 != map->timestamp) {
3082 				/*
3083 				 * Look again for the entry because the map was
3084 				 * modified while it was unlocked.
3085 				 * Specifically, the entry may have been
3086 				 * clipped, merged, or deleted.
3087 				 */
3088 				if (!vm_map_lookup_entry(map, saved_start,
3089 							 &tmp_entry))
3090 					entry = tmp_entry->next;
3091 				else {
3092 					entry = tmp_entry;
3093 					vm_map_clip_start(map, entry,
3094 							  saved_start);
3095 				}
3096 			}
3097 			continue;
3098 		}
3099 		vm_map_clip_end(map, entry, end);
3100 
3101 		next = entry->next;
3102 
3103 		/*
3104 		 * Unwire before removing addresses from the pmap; otherwise,
3105 		 * unwiring will put the entries back in the pmap.
3106 		 */
3107 		if (entry->wired_count != 0) {
3108 			vm_map_entry_unwire(map, entry);
3109 		}
3110 
3111 		pmap_remove(map->pmap, entry->start, entry->end);
3112 
3113 		/*
3114 		 * Delete the entry only after removing all pmap
3115 		 * entries pointing to its pages.  (Otherwise, its
3116 		 * page frames may be reallocated, and any modify bits
3117 		 * will be set in the wrong object!)
3118 		 */
3119 		vm_map_entry_delete(map, entry);
3120 		entry = next;
3121 	}
3122 	return (KERN_SUCCESS);
3123 }
3124 
3125 /*
3126  *	vm_map_remove:
3127  *
3128  *	Remove the given address range from the target map.
3129  *	This is the exported form of vm_map_delete.
3130  */
3131 int
3132 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
3133 {
3134 	int result;
3135 
3136 	vm_map_lock(map);
3137 	VM_MAP_RANGE_CHECK(map, start, end);
3138 	result = vm_map_delete(map, start, end);
3139 	vm_map_unlock(map);
3140 	return (result);
3141 }
3142 
3143 /*
3144  *	vm_map_check_protection:
3145  *
3146  *	Assert that the target map allows the specified privilege on the
3147  *	entire address region given.  The entire region must be allocated.
3148  *
3149  *	WARNING!  This code does not and should not check whether the
3150  *	contents of the region is accessible.  For example a smaller file
3151  *	might be mapped into a larger address space.
3152  *
3153  *	NOTE!  This code is also called by munmap().
3154  *
3155  *	The map must be locked.  A read lock is sufficient.
3156  */
3157 boolean_t
3158 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
3159 			vm_prot_t protection)
3160 {
3161 	vm_map_entry_t entry;
3162 	vm_map_entry_t tmp_entry;
3163 
3164 	if (!vm_map_lookup_entry(map, start, &tmp_entry))
3165 		return (FALSE);
3166 	entry = tmp_entry;
3167 
3168 	while (start < end) {
3169 		if (entry == &map->header)
3170 			return (FALSE);
3171 		/*
3172 		 * No holes allowed!
3173 		 */
3174 		if (start < entry->start)
3175 			return (FALSE);
3176 		/*
3177 		 * Check protection associated with entry.
3178 		 */
3179 		if ((entry->protection & protection) != protection)
3180 			return (FALSE);
3181 		/* go to next entry */
3182 		start = entry->end;
3183 		entry = entry->next;
3184 	}
3185 	return (TRUE);
3186 }
3187 
3188 /*
3189  *	vm_map_copy_entry:
3190  *
3191  *	Copies the contents of the source entry to the destination
3192  *	entry.  The entries *must* be aligned properly.
3193  */
3194 static void
3195 vm_map_copy_entry(
3196 	vm_map_t src_map,
3197 	vm_map_t dst_map,
3198 	vm_map_entry_t src_entry,
3199 	vm_map_entry_t dst_entry,
3200 	vm_ooffset_t *fork_charge)
3201 {
3202 	vm_object_t src_object;
3203 	vm_map_entry_t fake_entry;
3204 	vm_offset_t size;
3205 	struct ucred *cred;
3206 	int charged;
3207 
3208 	VM_MAP_ASSERT_LOCKED(dst_map);
3209 
3210 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
3211 		return;
3212 
3213 	if (src_entry->wired_count == 0 ||
3214 	    (src_entry->protection & VM_PROT_WRITE) == 0) {
3215 		/*
3216 		 * If the source entry is marked needs_copy, it is already
3217 		 * write-protected.
3218 		 */
3219 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
3220 		    (src_entry->protection & VM_PROT_WRITE) != 0) {
3221 			pmap_protect(src_map->pmap,
3222 			    src_entry->start,
3223 			    src_entry->end,
3224 			    src_entry->protection & ~VM_PROT_WRITE);
3225 		}
3226 
3227 		/*
3228 		 * Make a copy of the object.
3229 		 */
3230 		size = src_entry->end - src_entry->start;
3231 		if ((src_object = src_entry->object.vm_object) != NULL) {
3232 			VM_OBJECT_WLOCK(src_object);
3233 			charged = ENTRY_CHARGED(src_entry);
3234 			if (src_object->handle == NULL &&
3235 			    (src_object->type == OBJT_DEFAULT ||
3236 			    src_object->type == OBJT_SWAP)) {
3237 				vm_object_collapse(src_object);
3238 				if ((src_object->flags & (OBJ_NOSPLIT |
3239 				    OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
3240 					vm_object_split(src_entry);
3241 					src_object =
3242 					    src_entry->object.vm_object;
3243 				}
3244 			}
3245 			vm_object_reference_locked(src_object);
3246 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
3247 			if (src_entry->cred != NULL &&
3248 			    !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
3249 				KASSERT(src_object->cred == NULL,
3250 				    ("OVERCOMMIT: vm_map_copy_entry: cred %p",
3251 				     src_object));
3252 				src_object->cred = src_entry->cred;
3253 				src_object->charge = size;
3254 			}
3255 			VM_OBJECT_WUNLOCK(src_object);
3256 			dst_entry->object.vm_object = src_object;
3257 			if (charged) {
3258 				cred = curthread->td_ucred;
3259 				crhold(cred);
3260 				dst_entry->cred = cred;
3261 				*fork_charge += size;
3262 				if (!(src_entry->eflags &
3263 				      MAP_ENTRY_NEEDS_COPY)) {
3264 					crhold(cred);
3265 					src_entry->cred = cred;
3266 					*fork_charge += size;
3267 				}
3268 			}
3269 			src_entry->eflags |= MAP_ENTRY_COW |
3270 			    MAP_ENTRY_NEEDS_COPY;
3271 			dst_entry->eflags |= MAP_ENTRY_COW |
3272 			    MAP_ENTRY_NEEDS_COPY;
3273 			dst_entry->offset = src_entry->offset;
3274 			if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
3275 				/*
3276 				 * MAP_ENTRY_VN_WRITECNT cannot
3277 				 * indicate write reference from
3278 				 * src_entry, since the entry is
3279 				 * marked as needs copy.  Allocate a
3280 				 * fake entry that is used to
3281 				 * decrement object->un_pager.vnp.writecount
3282 				 * at the appropriate time.  Attach
3283 				 * fake_entry to the deferred list.
3284 				 */
3285 				fake_entry = vm_map_entry_create(dst_map);
3286 				fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
3287 				src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
3288 				vm_object_reference(src_object);
3289 				fake_entry->object.vm_object = src_object;
3290 				fake_entry->start = src_entry->start;
3291 				fake_entry->end = src_entry->end;
3292 				fake_entry->next = curthread->td_map_def_user;
3293 				curthread->td_map_def_user = fake_entry;
3294 			}
3295 
3296 			pmap_copy(dst_map->pmap, src_map->pmap,
3297 			    dst_entry->start, dst_entry->end - dst_entry->start,
3298 			    src_entry->start);
3299 		} else {
3300 			dst_entry->object.vm_object = NULL;
3301 			dst_entry->offset = 0;
3302 			if (src_entry->cred != NULL) {
3303 				dst_entry->cred = curthread->td_ucred;
3304 				crhold(dst_entry->cred);
3305 				*fork_charge += size;
3306 			}
3307 		}
3308 	} else {
3309 		/*
3310 		 * We don't want to make writeable wired pages copy-on-write.
3311 		 * Immediately copy these pages into the new map by simulating
3312 		 * page faults.  The new pages are pageable.
3313 		 */
3314 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
3315 		    fork_charge);
3316 	}
3317 }
3318 
3319 /*
3320  * vmspace_map_entry_forked:
3321  * Update the newly-forked vmspace each time a map entry is inherited
3322  * or copied.  The values for vm_dsize and vm_tsize are approximate
3323  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
3324  */
3325 static void
3326 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
3327     vm_map_entry_t entry)
3328 {
3329 	vm_size_t entrysize;
3330 	vm_offset_t newend;
3331 
3332 	if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
3333 		return;
3334 	entrysize = entry->end - entry->start;
3335 	vm2->vm_map.size += entrysize;
3336 	if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
3337 		vm2->vm_ssize += btoc(entrysize);
3338 	} else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
3339 	    entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
3340 		newend = MIN(entry->end,
3341 		    (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
3342 		vm2->vm_dsize += btoc(newend - entry->start);
3343 	} else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
3344 	    entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
3345 		newend = MIN(entry->end,
3346 		    (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
3347 		vm2->vm_tsize += btoc(newend - entry->start);
3348 	}
3349 }
3350 
3351 /*
3352  * vmspace_fork:
3353  * Create a new process vmspace structure and vm_map
3354  * based on those of an existing process.  The new map
3355  * is based on the old map, according to the inheritance
3356  * values on the regions in that map.
3357  *
3358  * XXX It might be worth coalescing the entries added to the new vmspace.
3359  *
3360  * The source map must not be locked.
3361  */
3362 struct vmspace *
3363 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
3364 {
3365 	struct vmspace *vm2;
3366 	vm_map_t new_map, old_map;
3367 	vm_map_entry_t new_entry, old_entry;
3368 	vm_object_t object;
3369 	int locked;
3370 	vm_inherit_t inh;
3371 
3372 	old_map = &vm1->vm_map;
3373 	/* Copy immutable fields of vm1 to vm2. */
3374 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, NULL);
3375 	if (vm2 == NULL)
3376 		return (NULL);
3377 	vm2->vm_taddr = vm1->vm_taddr;
3378 	vm2->vm_daddr = vm1->vm_daddr;
3379 	vm2->vm_maxsaddr = vm1->vm_maxsaddr;
3380 	vm_map_lock(old_map);
3381 	if (old_map->busy)
3382 		vm_map_wait_busy(old_map);
3383 	new_map = &vm2->vm_map;
3384 	locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
3385 	KASSERT(locked, ("vmspace_fork: lock failed"));
3386 
3387 	old_entry = old_map->header.next;
3388 
3389 	while (old_entry != &old_map->header) {
3390 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
3391 			panic("vm_map_fork: encountered a submap");
3392 
3393 		inh = old_entry->inheritance;
3394 		if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
3395 		    inh != VM_INHERIT_NONE)
3396 			inh = VM_INHERIT_COPY;
3397 
3398 		switch (inh) {
3399 		case VM_INHERIT_NONE:
3400 			break;
3401 
3402 		case VM_INHERIT_SHARE:
3403 			/*
3404 			 * Clone the entry, creating the shared object if necessary.
3405 			 */
3406 			object = old_entry->object.vm_object;
3407 			if (object == NULL) {
3408 				object = vm_object_allocate(OBJT_DEFAULT,
3409 					atop(old_entry->end - old_entry->start));
3410 				old_entry->object.vm_object = object;
3411 				old_entry->offset = 0;
3412 				if (old_entry->cred != NULL) {
3413 					object->cred = old_entry->cred;
3414 					object->charge = old_entry->end -
3415 					    old_entry->start;
3416 					old_entry->cred = NULL;
3417 				}
3418 			}
3419 
3420 			/*
3421 			 * Add the reference before calling vm_object_shadow
3422 			 * to insure that a shadow object is created.
3423 			 */
3424 			vm_object_reference(object);
3425 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
3426 				vm_object_shadow(&old_entry->object.vm_object,
3427 				    &old_entry->offset,
3428 				    old_entry->end - old_entry->start);
3429 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
3430 				/* Transfer the second reference too. */
3431 				vm_object_reference(
3432 				    old_entry->object.vm_object);
3433 
3434 				/*
3435 				 * As in vm_map_simplify_entry(), the
3436 				 * vnode lock will not be acquired in
3437 				 * this call to vm_object_deallocate().
3438 				 */
3439 				vm_object_deallocate(object);
3440 				object = old_entry->object.vm_object;
3441 			}
3442 			VM_OBJECT_WLOCK(object);
3443 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
3444 			if (old_entry->cred != NULL) {
3445 				KASSERT(object->cred == NULL, ("vmspace_fork both cred"));
3446 				object->cred = old_entry->cred;
3447 				object->charge = old_entry->end - old_entry->start;
3448 				old_entry->cred = NULL;
3449 			}
3450 
3451 			/*
3452 			 * Assert the correct state of the vnode
3453 			 * v_writecount while the object is locked, to
3454 			 * not relock it later for the assertion
3455 			 * correctness.
3456 			 */
3457 			if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT &&
3458 			    object->type == OBJT_VNODE) {
3459 				KASSERT(((struct vnode *)object->handle)->
3460 				    v_writecount > 0,
3461 				    ("vmspace_fork: v_writecount %p", object));
3462 				KASSERT(object->un_pager.vnp.writemappings > 0,
3463 				    ("vmspace_fork: vnp.writecount %p",
3464 				    object));
3465 			}
3466 			VM_OBJECT_WUNLOCK(object);
3467 
3468 			/*
3469 			 * Clone the entry, referencing the shared object.
3470 			 */
3471 			new_entry = vm_map_entry_create(new_map);
3472 			*new_entry = *old_entry;
3473 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3474 			    MAP_ENTRY_IN_TRANSITION);
3475 			new_entry->wiring_thread = NULL;
3476 			new_entry->wired_count = 0;
3477 			if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
3478 				vnode_pager_update_writecount(object,
3479 				    new_entry->start, new_entry->end);
3480 			}
3481 
3482 			/*
3483 			 * Insert the entry into the new map -- we know we're
3484 			 * inserting at the end of the new map.
3485 			 */
3486 			vm_map_entry_link(new_map, new_map->header.prev,
3487 			    new_entry);
3488 			vmspace_map_entry_forked(vm1, vm2, new_entry);
3489 
3490 			/*
3491 			 * Update the physical map
3492 			 */
3493 			pmap_copy(new_map->pmap, old_map->pmap,
3494 			    new_entry->start,
3495 			    (old_entry->end - old_entry->start),
3496 			    old_entry->start);
3497 			break;
3498 
3499 		case VM_INHERIT_COPY:
3500 			/*
3501 			 * Clone the entry and link into the map.
3502 			 */
3503 			new_entry = vm_map_entry_create(new_map);
3504 			*new_entry = *old_entry;
3505 			/*
3506 			 * Copied entry is COW over the old object.
3507 			 */
3508 			new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
3509 			    MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
3510 			new_entry->wiring_thread = NULL;
3511 			new_entry->wired_count = 0;
3512 			new_entry->object.vm_object = NULL;
3513 			new_entry->cred = NULL;
3514 			vm_map_entry_link(new_map, new_map->header.prev,
3515 			    new_entry);
3516 			vmspace_map_entry_forked(vm1, vm2, new_entry);
3517 			vm_map_copy_entry(old_map, new_map, old_entry,
3518 			    new_entry, fork_charge);
3519 			break;
3520 
3521 		case VM_INHERIT_ZERO:
3522 			/*
3523 			 * Create a new anonymous mapping entry modelled from
3524 			 * the old one.
3525 			 */
3526 			new_entry = vm_map_entry_create(new_map);
3527 			memset(new_entry, 0, sizeof(*new_entry));
3528 
3529 			new_entry->start = old_entry->start;
3530 			new_entry->end = old_entry->end;
3531 			new_entry->eflags = old_entry->eflags &
3532 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
3533 			    MAP_ENTRY_VN_WRITECNT);
3534 			new_entry->protection = old_entry->protection;
3535 			new_entry->max_protection = old_entry->max_protection;
3536 			new_entry->inheritance = VM_INHERIT_ZERO;
3537 
3538 			vm_map_entry_link(new_map, new_map->header.prev,
3539 			    new_entry);
3540 			vmspace_map_entry_forked(vm1, vm2, new_entry);
3541 
3542 			new_entry->cred = curthread->td_ucred;
3543 			crhold(new_entry->cred);
3544 			*fork_charge += (new_entry->end - new_entry->start);
3545 
3546 			break;
3547 		}
3548 		old_entry = old_entry->next;
3549 	}
3550 	/*
3551 	 * Use inlined vm_map_unlock() to postpone handling the deferred
3552 	 * map entries, which cannot be done until both old_map and
3553 	 * new_map locks are released.
3554 	 */
3555 	sx_xunlock(&old_map->lock);
3556 	sx_xunlock(&new_map->lock);
3557 	vm_map_process_deferred();
3558 
3559 	return (vm2);
3560 }
3561 
3562 /*
3563  * Create a process's stack for exec_new_vmspace().  This function is never
3564  * asked to wire the newly created stack.
3565  */
3566 int
3567 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
3568     vm_prot_t prot, vm_prot_t max, int cow)
3569 {
3570 	vm_size_t growsize, init_ssize;
3571 	rlim_t vmemlim;
3572 	int rv;
3573 
3574 	MPASS((map->flags & MAP_WIREFUTURE) == 0);
3575 	growsize = sgrowsiz;
3576 	init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
3577 	vm_map_lock(map);
3578 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
3579 	/* If we would blow our VMEM resource limit, no go */
3580 	if (map->size + init_ssize > vmemlim) {
3581 		rv = KERN_NO_SPACE;
3582 		goto out;
3583 	}
3584 	rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
3585 	    max, cow);
3586 out:
3587 	vm_map_unlock(map);
3588 	return (rv);
3589 }
3590 
3591 static int stack_guard_page = 1;
3592 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
3593     &stack_guard_page, 0,
3594     "Specifies the number of guard pages for a stack that grows");
3595 
3596 static int
3597 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
3598     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
3599 {
3600 	vm_map_entry_t new_entry, prev_entry;
3601 	vm_offset_t bot, gap_bot, gap_top, top;
3602 	vm_size_t init_ssize, sgp;
3603 	int orient, rv;
3604 
3605 	/*
3606 	 * The stack orientation is piggybacked with the cow argument.
3607 	 * Extract it into orient and mask the cow argument so that we
3608 	 * don't pass it around further.
3609 	 */
3610 	orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
3611 	KASSERT(orient != 0, ("No stack grow direction"));
3612 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
3613 	    ("bi-dir stack"));
3614 
3615 	if (addrbos < vm_map_min(map) ||
3616 	    addrbos + max_ssize > vm_map_max(map) ||
3617 	    addrbos + max_ssize <= addrbos)
3618 		return (KERN_INVALID_ADDRESS);
3619 	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
3620 	if (sgp >= max_ssize)
3621 		return (KERN_INVALID_ARGUMENT);
3622 
3623 	init_ssize = growsize;
3624 	if (max_ssize < init_ssize + sgp)
3625 		init_ssize = max_ssize - sgp;
3626 
3627 	/* If addr is already mapped, no go */
3628 	if (vm_map_lookup_entry(map, addrbos, &prev_entry))
3629 		return (KERN_NO_SPACE);
3630 
3631 	/*
3632 	 * If we can't accommodate max_ssize in the current mapping, no go.
3633 	 */
3634 	if ((prev_entry->next != &map->header) &&
3635 	    (prev_entry->next->start < addrbos + max_ssize))
3636 		return (KERN_NO_SPACE);
3637 
3638 	/*
3639 	 * We initially map a stack of only init_ssize.  We will grow as
3640 	 * needed later.  Depending on the orientation of the stack (i.e.
3641 	 * the grow direction) we either map at the top of the range, the
3642 	 * bottom of the range or in the middle.
3643 	 *
3644 	 * Note: we would normally expect prot and max to be VM_PROT_ALL,
3645 	 * and cow to be 0.  Possibly we should eliminate these as input
3646 	 * parameters, and just pass these values here in the insert call.
3647 	 */
3648 	if (orient == MAP_STACK_GROWS_DOWN) {
3649 		bot = addrbos + max_ssize - init_ssize;
3650 		top = bot + init_ssize;
3651 		gap_bot = addrbos;
3652 		gap_top = bot;
3653 	} else /* if (orient == MAP_STACK_GROWS_UP) */ {
3654 		bot = addrbos;
3655 		top = bot + init_ssize;
3656 		gap_bot = top;
3657 		gap_top = addrbos + max_ssize;
3658 	}
3659 	rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
3660 	if (rv != KERN_SUCCESS)
3661 		return (rv);
3662 	new_entry = prev_entry->next;
3663 	KASSERT(new_entry->end == top || new_entry->start == bot,
3664 	    ("Bad entry start/end for new stack entry"));
3665 	KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
3666 	    (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
3667 	    ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
3668 	KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
3669 	    (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
3670 	    ("new entry lacks MAP_ENTRY_GROWS_UP"));
3671 	rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
3672 	    VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
3673 	    MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
3674 	if (rv != KERN_SUCCESS)
3675 		(void)vm_map_delete(map, bot, top);
3676 	return (rv);
3677 }
3678 
3679 /*
3680  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
3681  * successfully grow the stack.
3682  */
3683 static int
3684 vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
3685 {
3686 	vm_map_entry_t stack_entry;
3687 	struct proc *p;
3688 	struct vmspace *vm;
3689 	struct ucred *cred;
3690 	vm_offset_t gap_end, gap_start, grow_start;
3691 	size_t grow_amount, guard, max_grow;
3692 	rlim_t lmemlim, stacklim, vmemlim;
3693 	int rv, rv1;
3694 	bool gap_deleted, grow_down, is_procstack;
3695 #ifdef notyet
3696 	uint64_t limit;
3697 #endif
3698 #ifdef RACCT
3699 	int error;
3700 #endif
3701 
3702 	p = curproc;
3703 	vm = p->p_vmspace;
3704 
3705 	/*
3706 	 * Disallow stack growth when the access is performed by a
3707 	 * debugger or AIO daemon.  The reason is that the wrong
3708 	 * resource limits are applied.
3709 	 */
3710 	if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL)
3711 		return (KERN_FAILURE);
3712 
3713 	MPASS(!map->system_map);
3714 
3715 	guard = stack_guard_page * PAGE_SIZE;
3716 	lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
3717 	stacklim = lim_cur(curthread, RLIMIT_STACK);
3718 	vmemlim = lim_cur(curthread, RLIMIT_VMEM);
3719 retry:
3720 	/* If addr is not in a hole for a stack grow area, no need to grow. */
3721 	if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
3722 		return (KERN_FAILURE);
3723 	if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
3724 		return (KERN_SUCCESS);
3725 	if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
3726 		stack_entry = gap_entry->next;
3727 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
3728 		    stack_entry->start != gap_entry->end)
3729 			return (KERN_FAILURE);
3730 		grow_amount = round_page(stack_entry->start - addr);
3731 		grow_down = true;
3732 	} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
3733 		stack_entry = gap_entry->prev;
3734 		if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
3735 		    stack_entry->end != gap_entry->start)
3736 			return (KERN_FAILURE);
3737 		grow_amount = round_page(addr + 1 - stack_entry->end);
3738 		grow_down = false;
3739 	} else {
3740 		return (KERN_FAILURE);
3741 	}
3742 	max_grow = gap_entry->end - gap_entry->start;
3743 	if (guard > max_grow)
3744 		return (KERN_NO_SPACE);
3745 	max_grow -= guard;
3746 	if (grow_amount > max_grow)
3747 		return (KERN_NO_SPACE);
3748 
3749 	/*
3750 	 * If this is the main process stack, see if we're over the stack
3751 	 * limit.
3752 	 */
3753 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
3754 	    addr < (vm_offset_t)p->p_sysent->sv_usrstack;
3755 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
3756 		return (KERN_NO_SPACE);
3757 
3758 #ifdef RACCT
3759 	if (racct_enable) {
3760 		PROC_LOCK(p);
3761 		if (is_procstack && racct_set(p, RACCT_STACK,
3762 		    ctob(vm->vm_ssize) + grow_amount)) {
3763 			PROC_UNLOCK(p);
3764 			return (KERN_NO_SPACE);
3765 		}
3766 		PROC_UNLOCK(p);
3767 	}
3768 #endif
3769 
3770 	grow_amount = roundup(grow_amount, sgrowsiz);
3771 	if (grow_amount > max_grow)
3772 		grow_amount = max_grow;
3773 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
3774 		grow_amount = trunc_page((vm_size_t)stacklim) -
3775 		    ctob(vm->vm_ssize);
3776 	}
3777 
3778 #ifdef notyet
3779 	PROC_LOCK(p);
3780 	limit = racct_get_available(p, RACCT_STACK);
3781 	PROC_UNLOCK(p);
3782 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
3783 		grow_amount = limit - ctob(vm->vm_ssize);
3784 #endif
3785 
3786 	if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
3787 		if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
3788 			rv = KERN_NO_SPACE;
3789 			goto out;
3790 		}
3791 #ifdef RACCT
3792 		if (racct_enable) {
3793 			PROC_LOCK(p);
3794 			if (racct_set(p, RACCT_MEMLOCK,
3795 			    ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
3796 				PROC_UNLOCK(p);
3797 				rv = KERN_NO_SPACE;
3798 				goto out;
3799 			}
3800 			PROC_UNLOCK(p);
3801 		}
3802 #endif
3803 	}
3804 
3805 	/* If we would blow our VMEM resource limit, no go */
3806 	if (map->size + grow_amount > vmemlim) {
3807 		rv = KERN_NO_SPACE;
3808 		goto out;
3809 	}
3810 #ifdef RACCT
3811 	if (racct_enable) {
3812 		PROC_LOCK(p);
3813 		if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
3814 			PROC_UNLOCK(p);
3815 			rv = KERN_NO_SPACE;
3816 			goto out;
3817 		}
3818 		PROC_UNLOCK(p);
3819 	}
3820 #endif
3821 
3822 	if (vm_map_lock_upgrade(map)) {
3823 		gap_entry = NULL;
3824 		vm_map_lock_read(map);
3825 		goto retry;
3826 	}
3827 
3828 	if (grow_down) {
3829 		grow_start = gap_entry->end - grow_amount;
3830 		if (gap_entry->start + grow_amount == gap_entry->end) {
3831 			gap_start = gap_entry->start;
3832 			gap_end = gap_entry->end;
3833 			vm_map_entry_delete(map, gap_entry);
3834 			gap_deleted = true;
3835 		} else {
3836 			MPASS(gap_entry->start < gap_entry->end - grow_amount);
3837 			gap_entry->end -= grow_amount;
3838 			vm_map_entry_resize_free(map, gap_entry);
3839 			gap_deleted = false;
3840 		}
3841 		rv = vm_map_insert(map, NULL, 0, grow_start,
3842 		    grow_start + grow_amount,
3843 		    stack_entry->protection, stack_entry->max_protection,
3844 		    MAP_STACK_GROWS_DOWN);
3845 		if (rv != KERN_SUCCESS) {
3846 			if (gap_deleted) {
3847 				rv1 = vm_map_insert(map, NULL, 0, gap_start,
3848 				    gap_end, VM_PROT_NONE, VM_PROT_NONE,
3849 				    MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
3850 				MPASS(rv1 == KERN_SUCCESS);
3851 			} else {
3852 				gap_entry->end += grow_amount;
3853 				vm_map_entry_resize_free(map, gap_entry);
3854 			}
3855 		}
3856 	} else {
3857 		grow_start = stack_entry->end;
3858 		cred = stack_entry->cred;
3859 		if (cred == NULL && stack_entry->object.vm_object != NULL)
3860 			cred = stack_entry->object.vm_object->cred;
3861 		if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
3862 			rv = KERN_NO_SPACE;
3863 		/* Grow the underlying object if applicable. */
3864 		else if (stack_entry->object.vm_object == NULL ||
3865 		    vm_object_coalesce(stack_entry->object.vm_object,
3866 		    stack_entry->offset,
3867 		    (vm_size_t)(stack_entry->end - stack_entry->start),
3868 		    (vm_size_t)grow_amount, cred != NULL)) {
3869 			if (gap_entry->start + grow_amount == gap_entry->end)
3870 				vm_map_entry_delete(map, gap_entry);
3871 			else
3872 				gap_entry->start += grow_amount;
3873 			stack_entry->end += grow_amount;
3874 			map->size += grow_amount;
3875 			vm_map_entry_resize_free(map, stack_entry);
3876 			rv = KERN_SUCCESS;
3877 		} else
3878 			rv = KERN_FAILURE;
3879 	}
3880 	if (rv == KERN_SUCCESS && is_procstack)
3881 		vm->vm_ssize += btoc(grow_amount);
3882 
3883 	/*
3884 	 * Heed the MAP_WIREFUTURE flag if it was set for this process.
3885 	 */
3886 	if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
3887 		vm_map_unlock(map);
3888 		vm_map_wire(map, grow_start, grow_start + grow_amount,
3889 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
3890 		vm_map_lock_read(map);
3891 	} else
3892 		vm_map_lock_downgrade(map);
3893 
3894 out:
3895 #ifdef RACCT
3896 	if (racct_enable && rv != KERN_SUCCESS) {
3897 		PROC_LOCK(p);
3898 		error = racct_set(p, RACCT_VMEM, map->size);
3899 		KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
3900 		if (!old_mlock) {
3901 			error = racct_set(p, RACCT_MEMLOCK,
3902 			    ptoa(pmap_wired_count(map->pmap)));
3903 			KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
3904 		}
3905 	    	error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
3906 		KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
3907 		PROC_UNLOCK(p);
3908 	}
3909 #endif
3910 
3911 	return (rv);
3912 }
3913 
3914 /*
3915  * Unshare the specified VM space for exec.  If other processes are
3916  * mapped to it, then create a new one.  The new vmspace is null.
3917  */
3918 int
3919 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
3920 {
3921 	struct vmspace *oldvmspace = p->p_vmspace;
3922 	struct vmspace *newvmspace;
3923 
3924 	KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
3925 	    ("vmspace_exec recursed"));
3926 	newvmspace = vmspace_alloc(minuser, maxuser, NULL);
3927 	if (newvmspace == NULL)
3928 		return (ENOMEM);
3929 	newvmspace->vm_swrss = oldvmspace->vm_swrss;
3930 	/*
3931 	 * This code is written like this for prototype purposes.  The
3932 	 * goal is to avoid running down the vmspace here, but let the
3933 	 * other process's that are still using the vmspace to finally
3934 	 * run it down.  Even though there is little or no chance of blocking
3935 	 * here, it is a good idea to keep this form for future mods.
3936 	 */
3937 	PROC_VMSPACE_LOCK(p);
3938 	p->p_vmspace = newvmspace;
3939 	PROC_VMSPACE_UNLOCK(p);
3940 	if (p == curthread->td_proc)
3941 		pmap_activate(curthread);
3942 	curthread->td_pflags |= TDP_EXECVMSPC;
3943 	return (0);
3944 }
3945 
3946 /*
3947  * Unshare the specified VM space for forcing COW.  This
3948  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
3949  */
3950 int
3951 vmspace_unshare(struct proc *p)
3952 {
3953 	struct vmspace *oldvmspace = p->p_vmspace;
3954 	struct vmspace *newvmspace;
3955 	vm_ooffset_t fork_charge;
3956 
3957 	if (oldvmspace->vm_refcnt == 1)
3958 		return (0);
3959 	fork_charge = 0;
3960 	newvmspace = vmspace_fork(oldvmspace, &fork_charge);
3961 	if (newvmspace == NULL)
3962 		return (ENOMEM);
3963 	if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
3964 		vmspace_free(newvmspace);
3965 		return (ENOMEM);
3966 	}
3967 	PROC_VMSPACE_LOCK(p);
3968 	p->p_vmspace = newvmspace;
3969 	PROC_VMSPACE_UNLOCK(p);
3970 	if (p == curthread->td_proc)
3971 		pmap_activate(curthread);
3972 	vmspace_free(oldvmspace);
3973 	return (0);
3974 }
3975 
3976 /*
3977  *	vm_map_lookup:
3978  *
3979  *	Finds the VM object, offset, and
3980  *	protection for a given virtual address in the
3981  *	specified map, assuming a page fault of the
3982  *	type specified.
3983  *
3984  *	Leaves the map in question locked for read; return
3985  *	values are guaranteed until a vm_map_lookup_done
3986  *	call is performed.  Note that the map argument
3987  *	is in/out; the returned map must be used in
3988  *	the call to vm_map_lookup_done.
3989  *
3990  *	A handle (out_entry) is returned for use in
3991  *	vm_map_lookup_done, to make that fast.
3992  *
3993  *	If a lookup is requested with "write protection"
3994  *	specified, the map may be changed to perform virtual
3995  *	copying operations, although the data referenced will
3996  *	remain the same.
3997  */
3998 int
3999 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
4000 	      vm_offset_t vaddr,
4001 	      vm_prot_t fault_typea,
4002 	      vm_map_entry_t *out_entry,	/* OUT */
4003 	      vm_object_t *object,		/* OUT */
4004 	      vm_pindex_t *pindex,		/* OUT */
4005 	      vm_prot_t *out_prot,		/* OUT */
4006 	      boolean_t *wired)			/* OUT */
4007 {
4008 	vm_map_entry_t entry;
4009 	vm_map_t map = *var_map;
4010 	vm_prot_t prot;
4011 	vm_prot_t fault_type = fault_typea;
4012 	vm_object_t eobject;
4013 	vm_size_t size;
4014 	struct ucred *cred;
4015 
4016 RetryLookup:
4017 
4018 	vm_map_lock_read(map);
4019 
4020 RetryLookupLocked:
4021 	/*
4022 	 * Lookup the faulting address.
4023 	 */
4024 	if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
4025 		vm_map_unlock_read(map);
4026 		return (KERN_INVALID_ADDRESS);
4027 	}
4028 
4029 	entry = *out_entry;
4030 
4031 	/*
4032 	 * Handle submaps.
4033 	 */
4034 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
4035 		vm_map_t old_map = map;
4036 
4037 		*var_map = map = entry->object.sub_map;
4038 		vm_map_unlock_read(old_map);
4039 		goto RetryLookup;
4040 	}
4041 
4042 	/*
4043 	 * Check whether this task is allowed to have this page.
4044 	 */
4045 	prot = entry->protection;
4046 	if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
4047 		fault_typea &= ~VM_PROT_FAULT_LOOKUP;
4048 		if (prot == VM_PROT_NONE && map != kernel_map &&
4049 		    (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
4050 		    (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
4051 		    MAP_ENTRY_STACK_GAP_UP)) != 0 &&
4052 		    vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
4053 			goto RetryLookupLocked;
4054 	}
4055 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
4056 	if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
4057 		vm_map_unlock_read(map);
4058 		return (KERN_PROTECTION_FAILURE);
4059 	}
4060 	KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
4061 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
4062 	    (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
4063 	    ("entry %p flags %x", entry, entry->eflags));
4064 	if ((fault_typea & VM_PROT_COPY) != 0 &&
4065 	    (entry->max_protection & VM_PROT_WRITE) == 0 &&
4066 	    (entry->eflags & MAP_ENTRY_COW) == 0) {
4067 		vm_map_unlock_read(map);
4068 		return (KERN_PROTECTION_FAILURE);
4069 	}
4070 
4071 	/*
4072 	 * If this page is not pageable, we have to get it for all possible
4073 	 * accesses.
4074 	 */
4075 	*wired = (entry->wired_count != 0);
4076 	if (*wired)
4077 		fault_type = entry->protection;
4078 	size = entry->end - entry->start;
4079 	/*
4080 	 * If the entry was copy-on-write, we either ...
4081 	 */
4082 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
4083 		/*
4084 		 * If we want to write the page, we may as well handle that
4085 		 * now since we've got the map locked.
4086 		 *
4087 		 * If we don't need to write the page, we just demote the
4088 		 * permissions allowed.
4089 		 */
4090 		if ((fault_type & VM_PROT_WRITE) != 0 ||
4091 		    (fault_typea & VM_PROT_COPY) != 0) {
4092 			/*
4093 			 * Make a new object, and place it in the object
4094 			 * chain.  Note that no new references have appeared
4095 			 * -- one just moved from the map to the new
4096 			 * object.
4097 			 */
4098 			if (vm_map_lock_upgrade(map))
4099 				goto RetryLookup;
4100 
4101 			if (entry->cred == NULL) {
4102 				/*
4103 				 * The debugger owner is charged for
4104 				 * the memory.
4105 				 */
4106 				cred = curthread->td_ucred;
4107 				crhold(cred);
4108 				if (!swap_reserve_by_cred(size, cred)) {
4109 					crfree(cred);
4110 					vm_map_unlock(map);
4111 					return (KERN_RESOURCE_SHORTAGE);
4112 				}
4113 				entry->cred = cred;
4114 			}
4115 			vm_object_shadow(&entry->object.vm_object,
4116 			    &entry->offset, size);
4117 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
4118 			eobject = entry->object.vm_object;
4119 			if (eobject->cred != NULL) {
4120 				/*
4121 				 * The object was not shadowed.
4122 				 */
4123 				swap_release_by_cred(size, entry->cred);
4124 				crfree(entry->cred);
4125 				entry->cred = NULL;
4126 			} else if (entry->cred != NULL) {
4127 				VM_OBJECT_WLOCK(eobject);
4128 				eobject->cred = entry->cred;
4129 				eobject->charge = size;
4130 				VM_OBJECT_WUNLOCK(eobject);
4131 				entry->cred = NULL;
4132 			}
4133 
4134 			vm_map_lock_downgrade(map);
4135 		} else {
4136 			/*
4137 			 * We're attempting to read a copy-on-write page --
4138 			 * don't allow writes.
4139 			 */
4140 			prot &= ~VM_PROT_WRITE;
4141 		}
4142 	}
4143 
4144 	/*
4145 	 * Create an object if necessary.
4146 	 */
4147 	if (entry->object.vm_object == NULL &&
4148 	    !map->system_map) {
4149 		if (vm_map_lock_upgrade(map))
4150 			goto RetryLookup;
4151 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
4152 		    atop(size));
4153 		entry->offset = 0;
4154 		if (entry->cred != NULL) {
4155 			VM_OBJECT_WLOCK(entry->object.vm_object);
4156 			entry->object.vm_object->cred = entry->cred;
4157 			entry->object.vm_object->charge = size;
4158 			VM_OBJECT_WUNLOCK(entry->object.vm_object);
4159 			entry->cred = NULL;
4160 		}
4161 		vm_map_lock_downgrade(map);
4162 	}
4163 
4164 	/*
4165 	 * Return the object/offset from this entry.  If the entry was
4166 	 * copy-on-write or empty, it has been fixed up.
4167 	 */
4168 	*pindex = UOFF_TO_IDX((vaddr - entry->start) + entry->offset);
4169 	*object = entry->object.vm_object;
4170 
4171 	*out_prot = prot;
4172 	return (KERN_SUCCESS);
4173 }
4174 
4175 /*
4176  *	vm_map_lookup_locked:
4177  *
4178  *	Lookup the faulting address.  A version of vm_map_lookup that returns
4179  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
4180  */
4181 int
4182 vm_map_lookup_locked(vm_map_t *var_map,		/* IN/OUT */
4183 		     vm_offset_t vaddr,
4184 		     vm_prot_t fault_typea,
4185 		     vm_map_entry_t *out_entry,	/* OUT */
4186 		     vm_object_t *object,	/* OUT */
4187 		     vm_pindex_t *pindex,	/* OUT */
4188 		     vm_prot_t *out_prot,	/* OUT */
4189 		     boolean_t *wired)		/* OUT */
4190 {
4191 	vm_map_entry_t entry;
4192 	vm_map_t map = *var_map;
4193 	vm_prot_t prot;
4194 	vm_prot_t fault_type = fault_typea;
4195 
4196 	/*
4197 	 * Lookup the faulting address.
4198 	 */
4199 	if (!vm_map_lookup_entry(map, vaddr, out_entry))
4200 		return (KERN_INVALID_ADDRESS);
4201 
4202 	entry = *out_entry;
4203 
4204 	/*
4205 	 * Fail if the entry refers to a submap.
4206 	 */
4207 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
4208 		return (KERN_FAILURE);
4209 
4210 	/*
4211 	 * Check whether this task is allowed to have this page.
4212 	 */
4213 	prot = entry->protection;
4214 	fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
4215 	if ((fault_type & prot) != fault_type)
4216 		return (KERN_PROTECTION_FAILURE);
4217 
4218 	/*
4219 	 * If this page is not pageable, we have to get it for all possible
4220 	 * accesses.
4221 	 */
4222 	*wired = (entry->wired_count != 0);
4223 	if (*wired)
4224 		fault_type = entry->protection;
4225 
4226 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
4227 		/*
4228 		 * Fail if the entry was copy-on-write for a write fault.
4229 		 */
4230 		if (fault_type & VM_PROT_WRITE)
4231 			return (KERN_FAILURE);
4232 		/*
4233 		 * We're attempting to read a copy-on-write page --
4234 		 * don't allow writes.
4235 		 */
4236 		prot &= ~VM_PROT_WRITE;
4237 	}
4238 
4239 	/*
4240 	 * Fail if an object should be created.
4241 	 */
4242 	if (entry->object.vm_object == NULL && !map->system_map)
4243 		return (KERN_FAILURE);
4244 
4245 	/*
4246 	 * Return the object/offset from this entry.  If the entry was
4247 	 * copy-on-write or empty, it has been fixed up.
4248 	 */
4249 	*pindex = UOFF_TO_IDX((vaddr - entry->start) + entry->offset);
4250 	*object = entry->object.vm_object;
4251 
4252 	*out_prot = prot;
4253 	return (KERN_SUCCESS);
4254 }
4255 
4256 /*
4257  *	vm_map_lookup_done:
4258  *
4259  *	Releases locks acquired by a vm_map_lookup
4260  *	(according to the handle returned by that lookup).
4261  */
4262 void
4263 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
4264 {
4265 	/*
4266 	 * Unlock the main-level map
4267 	 */
4268 	vm_map_unlock_read(map);
4269 }
4270 
4271 #include "opt_ddb.h"
4272 #ifdef DDB
4273 #include <sys/kernel.h>
4274 
4275 #include <ddb/ddb.h>
4276 
4277 static void
4278 vm_map_print(vm_map_t map)
4279 {
4280 	vm_map_entry_t entry;
4281 
4282 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
4283 	    (void *)map,
4284 	    (void *)map->pmap, map->nentries, map->timestamp);
4285 
4286 	db_indent += 2;
4287 	for (entry = map->header.next; entry != &map->header;
4288 	    entry = entry->next) {
4289 		db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
4290 		    (void *)entry, (void *)entry->start, (void *)entry->end,
4291 		    entry->eflags);
4292 		{
4293 			static char *inheritance_name[4] =
4294 			{"share", "copy", "none", "donate_copy"};
4295 
4296 			db_iprintf(" prot=%x/%x/%s",
4297 			    entry->protection,
4298 			    entry->max_protection,
4299 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
4300 			if (entry->wired_count != 0)
4301 				db_printf(", wired");
4302 		}
4303 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
4304 			db_printf(", share=%p, offset=0x%jx\n",
4305 			    (void *)entry->object.sub_map,
4306 			    (uintmax_t)entry->offset);
4307 			if ((entry->prev == &map->header) ||
4308 			    (entry->prev->object.sub_map !=
4309 				entry->object.sub_map)) {
4310 				db_indent += 2;
4311 				vm_map_print((vm_map_t)entry->object.sub_map);
4312 				db_indent -= 2;
4313 			}
4314 		} else {
4315 			if (entry->cred != NULL)
4316 				db_printf(", ruid %d", entry->cred->cr_ruid);
4317 			db_printf(", object=%p, offset=0x%jx",
4318 			    (void *)entry->object.vm_object,
4319 			    (uintmax_t)entry->offset);
4320 			if (entry->object.vm_object && entry->object.vm_object->cred)
4321 				db_printf(", obj ruid %d charge %jx",
4322 				    entry->object.vm_object->cred->cr_ruid,
4323 				    (uintmax_t)entry->object.vm_object->charge);
4324 			if (entry->eflags & MAP_ENTRY_COW)
4325 				db_printf(", copy (%s)",
4326 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
4327 			db_printf("\n");
4328 
4329 			if ((entry->prev == &map->header) ||
4330 			    (entry->prev->object.vm_object !=
4331 				entry->object.vm_object)) {
4332 				db_indent += 2;
4333 				vm_object_print((db_expr_t)(intptr_t)
4334 						entry->object.vm_object,
4335 						0, 0, (char *)0);
4336 				db_indent -= 2;
4337 			}
4338 		}
4339 	}
4340 	db_indent -= 2;
4341 }
4342 
4343 DB_SHOW_COMMAND(map, map)
4344 {
4345 
4346 	if (!have_addr) {
4347 		db_printf("usage: show map <addr>\n");
4348 		return;
4349 	}
4350 	vm_map_print((vm_map_t)addr);
4351 }
4352 
4353 DB_SHOW_COMMAND(procvm, procvm)
4354 {
4355 	struct proc *p;
4356 
4357 	if (have_addr) {
4358 		p = db_lookup_proc(addr);
4359 	} else {
4360 		p = curproc;
4361 	}
4362 
4363 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
4364 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
4365 	    (void *)vmspace_pmap(p->p_vmspace));
4366 
4367 	vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
4368 }
4369 
4370 #endif /* DDB */
4371