xref: /freebsd/sys/vm/vm_map.c (revision e50dfdc9abb9eebc78636ee930ece699a837de52)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81 #include <sys/sysent.h>
82 #include <sys/shm.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_param.h>
86 #include <vm/pmap.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_pager.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_extern.h>
93 #include <vm/swap_pager.h>
94 #include <vm/uma.h>
95 
96 /*
97  *	Virtual memory maps provide for the mapping, protection,
98  *	and sharing of virtual memory objects.  In addition,
99  *	this module provides for an efficient virtual copy of
100  *	memory from one map to another.
101  *
102  *	Synchronization is required prior to most operations.
103  *
104  *	Maps consist of an ordered doubly-linked list of simple
105  *	entries; a single hint is used to speed up lookups.
106  *
107  *	Since portions of maps are specified by start/end addresses,
108  *	which may not align with existing map entries, all
109  *	routines merely "clip" entries to these start/end values.
110  *	[That is, an entry is split into two, bordering at a
111  *	start or end value.]  Note that these clippings may not
112  *	always be necessary (as the two resulting entries are then
113  *	not changed); however, the clipping is done for convenience.
114  *
115  *	As mentioned above, virtual copy operations are performed
116  *	by copying VM object references from one map to
117  *	another, and then marking both regions as copy-on-write.
118  */
119 
120 /*
121  *	vm_map_startup:
122  *
123  *	Initialize the vm_map module.  Must be called before
124  *	any other vm_map routines.
125  *
126  *	Map and entry structures are allocated from the general
127  *	purpose memory pool with some exceptions:
128  *
129  *	- The kernel map and kmem submap are allocated statically.
130  *	- Kernel map entries are allocated out of a static pool.
131  *
132  *	These restrictions are necessary since malloc() uses the
133  *	maps and requires map entries.
134  */
135 
136 static struct mtx map_sleep_mtx;
137 static uma_zone_t mapentzone;
138 static uma_zone_t kmapentzone;
139 static uma_zone_t mapzone;
140 static uma_zone_t vmspace_zone;
141 static struct vm_object kmapentobj;
142 static void vmspace_zinit(void *mem, int size);
143 static void vmspace_zfini(void *mem, int size);
144 static void vm_map_zinit(void *mem, int size);
145 static void vm_map_zfini(void *mem, int size);
146 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
147 
148 #ifdef INVARIANTS
149 static void vm_map_zdtor(void *mem, int size, void *arg);
150 static void vmspace_zdtor(void *mem, int size, void *arg);
151 #endif
152 
153 void
154 vm_map_startup(void)
155 {
156 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
157 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
158 #ifdef INVARIANTS
159 	    vm_map_zdtor,
160 #else
161 	    NULL,
162 #endif
163 	    vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
164 	uma_prealloc(mapzone, MAX_KMAP);
165 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
166 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
167 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
168 	uma_prealloc(kmapentzone, MAX_KMAPENT);
169 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
170 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
171 	uma_prealloc(mapentzone, MAX_MAPENT);
172 }
173 
174 static void
175 vmspace_zfini(void *mem, int size)
176 {
177 	struct vmspace *vm;
178 
179 	vm = (struct vmspace *)mem;
180 
181 	vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
182 }
183 
184 static void
185 vmspace_zinit(void *mem, int size)
186 {
187 	struct vmspace *vm;
188 
189 	vm = (struct vmspace *)mem;
190 
191 	vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
192 }
193 
194 static void
195 vm_map_zfini(void *mem, int size)
196 {
197 	vm_map_t map;
198 
199 	map = (vm_map_t)mem;
200 	mtx_destroy(&map->system_mtx);
201 	lockdestroy(&map->lock);
202 }
203 
204 static void
205 vm_map_zinit(void *mem, int size)
206 {
207 	vm_map_t map;
208 
209 	map = (vm_map_t)mem;
210 	map->nentries = 0;
211 	map->size = 0;
212 	map->infork = 0;
213 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
214 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
215 }
216 
217 #ifdef INVARIANTS
218 static void
219 vmspace_zdtor(void *mem, int size, void *arg)
220 {
221 	struct vmspace *vm;
222 
223 	vm = (struct vmspace *)mem;
224 
225 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
226 }
227 static void
228 vm_map_zdtor(void *mem, int size, void *arg)
229 {
230 	vm_map_t map;
231 
232 	map = (vm_map_t)mem;
233 	KASSERT(map->nentries == 0,
234 	    ("map %p nentries == %d on free.",
235 	    map, map->nentries));
236 	KASSERT(map->size == 0,
237 	    ("map %p size == %lu on free.",
238 	    map, (unsigned long)map->size));
239 	KASSERT(map->infork == 0,
240 	    ("map %p infork == %d on free.",
241 	    map, map->infork));
242 }
243 #endif	/* INVARIANTS */
244 
245 /*
246  * Allocate a vmspace structure, including a vm_map and pmap,
247  * and initialize those structures.  The refcnt is set to 1.
248  * The remaining fields must be initialized by the caller.
249  */
250 struct vmspace *
251 vmspace_alloc(min, max)
252 	vm_offset_t min, max;
253 {
254 	struct vmspace *vm;
255 
256 	GIANT_REQUIRED;
257 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
258 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
259 	_vm_map_init(&vm->vm_map, min, max);
260 	pmap_pinit(vmspace_pmap(vm));
261 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
262 	vm->vm_refcnt = 1;
263 	vm->vm_shm = NULL;
264 	vm->vm_exitingcnt = 0;
265 	return (vm);
266 }
267 
268 void
269 vm_init2(void)
270 {
271 	uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
272 	    (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
273 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
274 #ifdef INVARIANTS
275 	    vmspace_zdtor,
276 #else
277 	    NULL,
278 #endif
279 	    vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
280 	pmap_init2();
281 }
282 
283 static __inline void
284 vmspace_dofree(struct vmspace *vm)
285 {
286 	CTR1(KTR_VM, "vmspace_free: %p", vm);
287 
288 	/*
289 	 * Make sure any SysV shm is freed, it might not have been in
290 	 * exit1().
291 	 */
292 	shmexit(vm);
293 
294 	/*
295 	 * Lock the map, to wait out all other references to it.
296 	 * Delete all of the mappings and pages they hold, then call
297 	 * the pmap module to reclaim anything left.
298 	 */
299 	vm_map_lock(&vm->vm_map);
300 	(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
301 	    vm->vm_map.max_offset);
302 	vm_map_unlock(&vm->vm_map);
303 
304 	pmap_release(vmspace_pmap(vm));
305 	uma_zfree(vmspace_zone, vm);
306 }
307 
308 void
309 vmspace_free(struct vmspace *vm)
310 {
311 	GIANT_REQUIRED;
312 
313 	if (vm->vm_refcnt == 0)
314 		panic("vmspace_free: attempt to free already freed vmspace");
315 
316 	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
317 		vmspace_dofree(vm);
318 }
319 
320 void
321 vmspace_exitfree(struct proc *p)
322 {
323 	struct vmspace *vm;
324 
325 	GIANT_REQUIRED;
326 	vm = p->p_vmspace;
327 	p->p_vmspace = NULL;
328 
329 	/*
330 	 * cleanup by parent process wait()ing on exiting child.  vm_refcnt
331 	 * may not be 0 (e.g. fork() and child exits without exec()ing).
332 	 * exitingcnt may increment above 0 and drop back down to zero
333 	 * several times while vm_refcnt is held non-zero.  vm_refcnt
334 	 * may also increment above 0 and drop back down to zero several
335 	 * times while vm_exitingcnt is held non-zero.
336 	 *
337 	 * The last wait on the exiting child's vmspace will clean up
338 	 * the remainder of the vmspace.
339 	 */
340 	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
341 		vmspace_dofree(vm);
342 }
343 
344 /*
345  * vmspace_swap_count() - count the approximate swap useage in pages for a
346  *			  vmspace.
347  *
348  *	The map must be locked.
349  *
350  *	Swap useage is determined by taking the proportional swap used by
351  *	VM objects backing the VM map.  To make up for fractional losses,
352  *	if the VM object has any swap use at all the associated map entries
353  *	count for at least 1 swap page.
354  */
355 int
356 vmspace_swap_count(struct vmspace *vmspace)
357 {
358 	vm_map_t map = &vmspace->vm_map;
359 	vm_map_entry_t cur;
360 	int count = 0;
361 
362 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
363 		vm_object_t object;
364 
365 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
366 		    (object = cur->object.vm_object) != NULL &&
367 		    object->type == OBJT_SWAP
368 		) {
369 			int n = (cur->end - cur->start) / PAGE_SIZE;
370 
371 			if (object->un_pager.swp.swp_bcount) {
372 				count += object->un_pager.swp.swp_bcount *
373 				    SWAP_META_PAGES * n / object->size + 1;
374 			}
375 		}
376 	}
377 	return (count);
378 }
379 
380 void
381 _vm_map_lock(vm_map_t map, const char *file, int line)
382 {
383 	int error;
384 
385 	if (map->system_map)
386 		_mtx_lock_flags(&map->system_mtx, 0, file, line);
387 	else {
388 		error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
389 		KASSERT(error == 0, ("%s: failed to get lock", __func__));
390 	}
391 	map->timestamp++;
392 }
393 
394 void
395 _vm_map_unlock(vm_map_t map, const char *file, int line)
396 {
397 
398 	if (map->system_map)
399 		_mtx_unlock_flags(&map->system_mtx, 0, file, line);
400 	else
401 		lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
402 }
403 
404 void
405 _vm_map_lock_read(vm_map_t map, const char *file, int line)
406 {
407 	int error;
408 
409 	if (map->system_map)
410 		_mtx_lock_flags(&map->system_mtx, 0, file, line);
411 	else {
412 		error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
413 		KASSERT(error == 0, ("%s: failed to get lock", __func__));
414 	}
415 }
416 
417 void
418 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
419 {
420 
421 	if (map->system_map)
422 		_mtx_unlock_flags(&map->system_mtx, 0, file, line);
423 	else
424 		lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
425 }
426 
427 int
428 _vm_map_trylock(vm_map_t map, const char *file, int line)
429 {
430 	int error;
431 
432 	error = map->system_map ?
433 	    !_mtx_trylock(&map->system_mtx, 0, file, line) :
434 	    lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
435 	if (error == 0)
436 		map->timestamp++;
437 	return (error == 0);
438 }
439 
440 int
441 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
442 {
443 	int error;
444 
445 	error = map->system_map ?
446 	    !_mtx_trylock(&map->system_mtx, 0, file, line) :
447 	    lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
448 	return (error == 0);
449 }
450 
451 int
452 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
453 {
454 
455 	if (map->system_map) {
456 #ifdef INVARIANTS
457 		_mtx_assert(&map->system_mtx, MA_OWNED, file, line);
458 #endif
459 	} else
460 		KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
461 		    ("%s: lock not held", __func__));
462 	map->timestamp++;
463 	return (0);
464 }
465 
466 void
467 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
468 {
469 
470 	if (map->system_map) {
471 #ifdef INVARIANTS
472 		_mtx_assert(&map->system_mtx, MA_OWNED, file, line);
473 #endif
474 	} else
475 		KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
476 		    ("%s: lock not held", __func__));
477 }
478 
479 /*
480  *	vm_map_unlock_and_wait:
481  */
482 int
483 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
484 {
485 
486 	mtx_lock(&map_sleep_mtx);
487 	vm_map_unlock(map);
488 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 0));
489 }
490 
491 /*
492  *	vm_map_wakeup:
493  */
494 void
495 vm_map_wakeup(vm_map_t map)
496 {
497 
498 	/*
499 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
500 	 * from being performed (and lost) between the vm_map_unlock()
501 	 * and the msleep() in vm_map_unlock_and_wait().
502 	 */
503 	mtx_lock(&map_sleep_mtx);
504 	mtx_unlock(&map_sleep_mtx);
505 	wakeup(&map->root);
506 }
507 
508 long
509 vmspace_resident_count(struct vmspace *vmspace)
510 {
511 	return pmap_resident_count(vmspace_pmap(vmspace));
512 }
513 
514 /*
515  *	vm_map_create:
516  *
517  *	Creates and returns a new empty VM map with
518  *	the given physical map structure, and having
519  *	the given lower and upper address bounds.
520  */
521 vm_map_t
522 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
523 {
524 	vm_map_t result;
525 
526 	result = uma_zalloc(mapzone, M_WAITOK);
527 	CTR1(KTR_VM, "vm_map_create: %p", result);
528 	_vm_map_init(result, min, max);
529 	result->pmap = pmap;
530 	return (result);
531 }
532 
533 /*
534  * Initialize an existing vm_map structure
535  * such as that in the vmspace structure.
536  * The pmap is set elsewhere.
537  */
538 static void
539 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
540 {
541 
542 	map->header.next = map->header.prev = &map->header;
543 	map->needs_wakeup = FALSE;
544 	map->system_map = 0;
545 	map->min_offset = min;
546 	map->max_offset = max;
547 	map->first_free = &map->header;
548 	map->root = NULL;
549 	map->timestamp = 0;
550 }
551 
552 void
553 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
554 {
555 	_vm_map_init(map, min, max);
556 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
557 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
558 }
559 
560 /*
561  *	vm_map_entry_dispose:	[ internal use only ]
562  *
563  *	Inverse of vm_map_entry_create.
564  */
565 static void
566 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
567 {
568 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
569 }
570 
571 /*
572  *	vm_map_entry_create:	[ internal use only ]
573  *
574  *	Allocates a VM map entry for insertion.
575  *	No entry fields are filled in.
576  */
577 static vm_map_entry_t
578 vm_map_entry_create(vm_map_t map)
579 {
580 	vm_map_entry_t new_entry;
581 
582 	if (map->system_map)
583 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
584 	else
585 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
586 	if (new_entry == NULL)
587 		panic("vm_map_entry_create: kernel resources exhausted");
588 	return (new_entry);
589 }
590 
591 /*
592  *	vm_map_entry_set_behavior:
593  *
594  *	Set the expected access behavior, either normal, random, or
595  *	sequential.
596  */
597 static __inline void
598 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
599 {
600 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
601 	    (behavior & MAP_ENTRY_BEHAV_MASK);
602 }
603 
604 /*
605  *	vm_map_entry_splay:
606  *
607  *	Implements Sleator and Tarjan's top-down splay algorithm.  Returns
608  *	the vm_map_entry containing the given address.  If, however, that
609  *	address is not found in the vm_map, returns a vm_map_entry that is
610  *	adjacent to the address, coming before or after it.
611  */
612 static vm_map_entry_t
613 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
614 {
615 	struct vm_map_entry dummy;
616 	vm_map_entry_t lefttreemax, righttreemin, y;
617 
618 	if (root == NULL)
619 		return (root);
620 	lefttreemax = righttreemin = &dummy;
621 	for (;; root = y) {
622 		if (address < root->start) {
623 			if ((y = root->left) == NULL)
624 				break;
625 			if (address < y->start) {
626 				/* Rotate right. */
627 				root->left = y->right;
628 				y->right = root;
629 				root = y;
630 				if ((y = root->left) == NULL)
631 					break;
632 			}
633 			/* Link into the new root's right tree. */
634 			righttreemin->left = root;
635 			righttreemin = root;
636 		} else if (address >= root->end) {
637 			if ((y = root->right) == NULL)
638 				break;
639 			if (address >= y->end) {
640 				/* Rotate left. */
641 				root->right = y->left;
642 				y->left = root;
643 				root = y;
644 				if ((y = root->right) == NULL)
645 					break;
646 			}
647 			/* Link into the new root's left tree. */
648 			lefttreemax->right = root;
649 			lefttreemax = root;
650 		} else
651 			break;
652 	}
653 	/* Assemble the new root. */
654 	lefttreemax->right = root->left;
655 	righttreemin->left = root->right;
656 	root->left = dummy.right;
657 	root->right = dummy.left;
658 	return (root);
659 }
660 
661 /*
662  *	vm_map_entry_{un,}link:
663  *
664  *	Insert/remove entries from maps.
665  */
666 static void
667 vm_map_entry_link(vm_map_t map,
668 		  vm_map_entry_t after_where,
669 		  vm_map_entry_t entry)
670 {
671 
672 	CTR4(KTR_VM,
673 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
674 	    map->nentries, entry, after_where);
675 	map->nentries++;
676 	entry->prev = after_where;
677 	entry->next = after_where->next;
678 	entry->next->prev = entry;
679 	after_where->next = entry;
680 
681 	if (after_where != &map->header) {
682 		if (after_where != map->root)
683 			vm_map_entry_splay(after_where->start, map->root);
684 		entry->right = after_where->right;
685 		entry->left = after_where;
686 		after_where->right = NULL;
687 	} else {
688 		entry->right = map->root;
689 		entry->left = NULL;
690 	}
691 	map->root = entry;
692 }
693 
694 static void
695 vm_map_entry_unlink(vm_map_t map,
696 		    vm_map_entry_t entry)
697 {
698 	vm_map_entry_t next, prev, root;
699 
700 	if (entry != map->root)
701 		vm_map_entry_splay(entry->start, map->root);
702 	if (entry->left == NULL)
703 		root = entry->right;
704 	else {
705 		root = vm_map_entry_splay(entry->start, entry->left);
706 		root->right = entry->right;
707 	}
708 	map->root = root;
709 
710 	prev = entry->prev;
711 	next = entry->next;
712 	next->prev = prev;
713 	prev->next = next;
714 	map->nentries--;
715 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
716 	    map->nentries, entry);
717 }
718 
719 /*
720  *	vm_map_lookup_entry:	[ internal use only ]
721  *
722  *	Finds the map entry containing (or
723  *	immediately preceding) the specified address
724  *	in the given map; the entry is returned
725  *	in the "entry" parameter.  The boolean
726  *	result indicates whether the address is
727  *	actually contained in the map.
728  */
729 boolean_t
730 vm_map_lookup_entry(
731 	vm_map_t map,
732 	vm_offset_t address,
733 	vm_map_entry_t *entry)	/* OUT */
734 {
735 	vm_map_entry_t cur;
736 
737 	cur = vm_map_entry_splay(address, map->root);
738 	if (cur == NULL)
739 		*entry = &map->header;
740 	else {
741 		map->root = cur;
742 
743 		if (address >= cur->start) {
744 			*entry = cur;
745 			if (cur->end > address)
746 				return (TRUE);
747 		} else
748 			*entry = cur->prev;
749 	}
750 	return (FALSE);
751 }
752 
753 /*
754  *	vm_map_insert:
755  *
756  *	Inserts the given whole VM object into the target
757  *	map at the specified address range.  The object's
758  *	size should match that of the address range.
759  *
760  *	Requires that the map be locked, and leaves it so.
761  *
762  *	If object is non-NULL, ref count must be bumped by caller
763  *	prior to making call to account for the new entry.
764  */
765 int
766 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
767 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
768 	      int cow)
769 {
770 	vm_map_entry_t new_entry;
771 	vm_map_entry_t prev_entry;
772 	vm_map_entry_t temp_entry;
773 	vm_eflags_t protoeflags;
774 
775 	/*
776 	 * Check that the start and end points are not bogus.
777 	 */
778 	if ((start < map->min_offset) || (end > map->max_offset) ||
779 	    (start >= end))
780 		return (KERN_INVALID_ADDRESS);
781 
782 	/*
783 	 * Find the entry prior to the proposed starting address; if it's part
784 	 * of an existing entry, this range is bogus.
785 	 */
786 	if (vm_map_lookup_entry(map, start, &temp_entry))
787 		return (KERN_NO_SPACE);
788 
789 	prev_entry = temp_entry;
790 
791 	/*
792 	 * Assert that the next entry doesn't overlap the end point.
793 	 */
794 	if ((prev_entry->next != &map->header) &&
795 	    (prev_entry->next->start < end))
796 		return (KERN_NO_SPACE);
797 
798 	protoeflags = 0;
799 
800 	if (cow & MAP_COPY_ON_WRITE)
801 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
802 
803 	if (cow & MAP_NOFAULT) {
804 		protoeflags |= MAP_ENTRY_NOFAULT;
805 
806 		KASSERT(object == NULL,
807 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
808 	}
809 	if (cow & MAP_DISABLE_SYNCER)
810 		protoeflags |= MAP_ENTRY_NOSYNC;
811 	if (cow & MAP_DISABLE_COREDUMP)
812 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
813 
814 	if (object != NULL) {
815 		/*
816 		 * OBJ_ONEMAPPING must be cleared unless this mapping
817 		 * is trivially proven to be the only mapping for any
818 		 * of the object's pages.  (Object granularity
819 		 * reference counting is insufficient to recognize
820 		 * aliases with precision.)
821 		 */
822 		if (object != kmem_object)
823 			mtx_lock(&Giant);
824 		VM_OBJECT_LOCK(object);
825 		if (object->ref_count > 1 || object->shadow_count != 0)
826 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
827 		VM_OBJECT_UNLOCK(object);
828 		if (object != kmem_object)
829 			mtx_unlock(&Giant);
830 	}
831 	else if ((prev_entry != &map->header) &&
832 		 (prev_entry->eflags == protoeflags) &&
833 		 (prev_entry->end == start) &&
834 		 (prev_entry->wired_count == 0) &&
835 		 ((prev_entry->object.vm_object == NULL) ||
836 		  vm_object_coalesce(prev_entry->object.vm_object,
837 				     OFF_TO_IDX(prev_entry->offset),
838 				     (vm_size_t)(prev_entry->end - prev_entry->start),
839 				     (vm_size_t)(end - prev_entry->end)))) {
840 		/*
841 		 * We were able to extend the object.  Determine if we
842 		 * can extend the previous map entry to include the
843 		 * new range as well.
844 		 */
845 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
846 		    (prev_entry->protection == prot) &&
847 		    (prev_entry->max_protection == max)) {
848 			map->size += (end - prev_entry->end);
849 			prev_entry->end = end;
850 			vm_map_simplify_entry(map, prev_entry);
851 			return (KERN_SUCCESS);
852 		}
853 
854 		/*
855 		 * If we can extend the object but cannot extend the
856 		 * map entry, we have to create a new map entry.  We
857 		 * must bump the ref count on the extended object to
858 		 * account for it.  object may be NULL.
859 		 */
860 		object = prev_entry->object.vm_object;
861 		offset = prev_entry->offset +
862 			(prev_entry->end - prev_entry->start);
863 		vm_object_reference(object);
864 	}
865 
866 	/*
867 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
868 	 * in things like the buffer map where we manage kva but do not manage
869 	 * backing objects.
870 	 */
871 
872 	/*
873 	 * Create a new entry
874 	 */
875 	new_entry = vm_map_entry_create(map);
876 	new_entry->start = start;
877 	new_entry->end = end;
878 
879 	new_entry->eflags = protoeflags;
880 	new_entry->object.vm_object = object;
881 	new_entry->offset = offset;
882 	new_entry->avail_ssize = 0;
883 
884 	new_entry->inheritance = VM_INHERIT_DEFAULT;
885 	new_entry->protection = prot;
886 	new_entry->max_protection = max;
887 	new_entry->wired_count = 0;
888 
889 	/*
890 	 * Insert the new entry into the list
891 	 */
892 	vm_map_entry_link(map, prev_entry, new_entry);
893 	map->size += new_entry->end - new_entry->start;
894 
895 	/*
896 	 * Update the free space hint
897 	 */
898 	if ((map->first_free == prev_entry) &&
899 	    (prev_entry->end >= new_entry->start)) {
900 		map->first_free = new_entry;
901 	}
902 
903 #if 0
904 	/*
905 	 * Temporarily removed to avoid MAP_STACK panic, due to
906 	 * MAP_STACK being a huge hack.  Will be added back in
907 	 * when MAP_STACK (and the user stack mapping) is fixed.
908 	 */
909 	/*
910 	 * It may be possible to simplify the entry
911 	 */
912 	vm_map_simplify_entry(map, new_entry);
913 #endif
914 
915 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
916 		mtx_lock(&Giant);
917 		pmap_object_init_pt(map->pmap, start,
918 				    object, OFF_TO_IDX(offset), end - start,
919 				    cow & MAP_PREFAULT_PARTIAL);
920 		mtx_unlock(&Giant);
921 	}
922 
923 	return (KERN_SUCCESS);
924 }
925 
926 /*
927  * Find sufficient space for `length' bytes in the given map, starting at
928  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
929  */
930 int
931 vm_map_findspace(
932 	vm_map_t map,
933 	vm_offset_t start,
934 	vm_size_t length,
935 	vm_offset_t *addr)
936 {
937 	vm_map_entry_t entry, next;
938 	vm_offset_t end;
939 
940 	if (start < map->min_offset)
941 		start = map->min_offset;
942 	if (start > map->max_offset)
943 		return (1);
944 
945 	/*
946 	 * Look for the first possible address; if there's already something
947 	 * at this address, we have to start after it.
948 	 */
949 	if (start == map->min_offset) {
950 		if ((entry = map->first_free) != &map->header)
951 			start = entry->end;
952 	} else {
953 		vm_map_entry_t tmp;
954 
955 		if (vm_map_lookup_entry(map, start, &tmp))
956 			start = tmp->end;
957 		entry = tmp;
958 	}
959 
960 	/*
961 	 * Look through the rest of the map, trying to fit a new region in the
962 	 * gap between existing regions, or after the very last region.
963 	 */
964 	for (;; start = (entry = next)->end) {
965 		/*
966 		 * Find the end of the proposed new region.  Be sure we didn't
967 		 * go beyond the end of the map, or wrap around the address;
968 		 * if so, we lose.  Otherwise, if this is the last entry, or
969 		 * if the proposed new region fits before the next entry, we
970 		 * win.
971 		 */
972 		end = start + length;
973 		if (end > map->max_offset || end < start)
974 			return (1);
975 		next = entry->next;
976 		if (next == &map->header || next->start >= end)
977 			break;
978 	}
979 	*addr = start;
980 	if (map == kernel_map) {
981 		vm_offset_t ksize;
982 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
983 			pmap_growkernel(ksize);
984 		}
985 	}
986 	return (0);
987 }
988 
989 /*
990  *	vm_map_find finds an unallocated region in the target address
991  *	map with the given length.  The search is defined to be
992  *	first-fit from the specified address; the region found is
993  *	returned in the same parameter.
994  *
995  *	If object is non-NULL, ref count must be bumped by caller
996  *	prior to making call to account for the new entry.
997  */
998 int
999 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1000 	    vm_offset_t *addr,	/* IN/OUT */
1001 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
1002 	    vm_prot_t max, int cow)
1003 {
1004 	vm_offset_t start;
1005 	int result, s = 0;
1006 
1007 	start = *addr;
1008 
1009 	if (map == kmem_map)
1010 		s = splvm();
1011 
1012 	vm_map_lock(map);
1013 	if (find_space) {
1014 		if (vm_map_findspace(map, start, length, addr)) {
1015 			vm_map_unlock(map);
1016 			if (map == kmem_map)
1017 				splx(s);
1018 			return (KERN_NO_SPACE);
1019 		}
1020 		start = *addr;
1021 	}
1022 	result = vm_map_insert(map, object, offset,
1023 		start, start + length, prot, max, cow);
1024 	vm_map_unlock(map);
1025 
1026 	if (map == kmem_map)
1027 		splx(s);
1028 
1029 	return (result);
1030 }
1031 
1032 /*
1033  *	vm_map_simplify_entry:
1034  *
1035  *	Simplify the given map entry by merging with either neighbor.  This
1036  *	routine also has the ability to merge with both neighbors.
1037  *
1038  *	The map must be locked.
1039  *
1040  *	This routine guarentees that the passed entry remains valid (though
1041  *	possibly extended).  When merging, this routine may delete one or
1042  *	both neighbors.
1043  */
1044 void
1045 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
1046 {
1047 	vm_map_entry_t next, prev;
1048 	vm_size_t prevsize, esize;
1049 
1050 	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
1051 		return;
1052 
1053 	prev = entry->prev;
1054 	if (prev != &map->header) {
1055 		prevsize = prev->end - prev->start;
1056 		if ( (prev->end == entry->start) &&
1057 		     (prev->object.vm_object == entry->object.vm_object) &&
1058 		     (!prev->object.vm_object ||
1059 			(prev->offset + prevsize == entry->offset)) &&
1060 		     (prev->eflags == entry->eflags) &&
1061 		     (prev->protection == entry->protection) &&
1062 		     (prev->max_protection == entry->max_protection) &&
1063 		     (prev->inheritance == entry->inheritance) &&
1064 		     (prev->wired_count == entry->wired_count)) {
1065 			if (map->first_free == prev)
1066 				map->first_free = entry;
1067 			vm_map_entry_unlink(map, prev);
1068 			entry->start = prev->start;
1069 			entry->offset = prev->offset;
1070 			if (prev->object.vm_object)
1071 				vm_object_deallocate(prev->object.vm_object);
1072 			vm_map_entry_dispose(map, prev);
1073 		}
1074 	}
1075 
1076 	next = entry->next;
1077 	if (next != &map->header) {
1078 		esize = entry->end - entry->start;
1079 		if ((entry->end == next->start) &&
1080 		    (next->object.vm_object == entry->object.vm_object) &&
1081 		     (!entry->object.vm_object ||
1082 			(entry->offset + esize == next->offset)) &&
1083 		    (next->eflags == entry->eflags) &&
1084 		    (next->protection == entry->protection) &&
1085 		    (next->max_protection == entry->max_protection) &&
1086 		    (next->inheritance == entry->inheritance) &&
1087 		    (next->wired_count == entry->wired_count)) {
1088 			if (map->first_free == next)
1089 				map->first_free = entry;
1090 			vm_map_entry_unlink(map, next);
1091 			entry->end = next->end;
1092 			if (next->object.vm_object)
1093 				vm_object_deallocate(next->object.vm_object);
1094 			vm_map_entry_dispose(map, next);
1095 	        }
1096 	}
1097 }
1098 /*
1099  *	vm_map_clip_start:	[ internal use only ]
1100  *
1101  *	Asserts that the given entry begins at or after
1102  *	the specified address; if necessary,
1103  *	it splits the entry into two.
1104  */
1105 #define vm_map_clip_start(map, entry, startaddr) \
1106 { \
1107 	if (startaddr > entry->start) \
1108 		_vm_map_clip_start(map, entry, startaddr); \
1109 }
1110 
1111 /*
1112  *	This routine is called only when it is known that
1113  *	the entry must be split.
1114  */
1115 static void
1116 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1117 {
1118 	vm_map_entry_t new_entry;
1119 
1120 	/*
1121 	 * Split off the front portion -- note that we must insert the new
1122 	 * entry BEFORE this one, so that this entry has the specified
1123 	 * starting address.
1124 	 */
1125 	vm_map_simplify_entry(map, entry);
1126 
1127 	/*
1128 	 * If there is no object backing this entry, we might as well create
1129 	 * one now.  If we defer it, an object can get created after the map
1130 	 * is clipped, and individual objects will be created for the split-up
1131 	 * map.  This is a bit of a hack, but is also about the best place to
1132 	 * put this improvement.
1133 	 */
1134 	if (entry->object.vm_object == NULL && !map->system_map) {
1135 		vm_object_t object;
1136 		object = vm_object_allocate(OBJT_DEFAULT,
1137 				atop(entry->end - entry->start));
1138 		entry->object.vm_object = object;
1139 		entry->offset = 0;
1140 	}
1141 
1142 	new_entry = vm_map_entry_create(map);
1143 	*new_entry = *entry;
1144 
1145 	new_entry->end = start;
1146 	entry->offset += (start - entry->start);
1147 	entry->start = start;
1148 
1149 	vm_map_entry_link(map, entry->prev, new_entry);
1150 
1151 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1152 		vm_object_reference(new_entry->object.vm_object);
1153 	}
1154 }
1155 
1156 /*
1157  *	vm_map_clip_end:	[ internal use only ]
1158  *
1159  *	Asserts that the given entry ends at or before
1160  *	the specified address; if necessary,
1161  *	it splits the entry into two.
1162  */
1163 #define vm_map_clip_end(map, entry, endaddr) \
1164 { \
1165 	if ((endaddr) < (entry->end)) \
1166 		_vm_map_clip_end((map), (entry), (endaddr)); \
1167 }
1168 
1169 /*
1170  *	This routine is called only when it is known that
1171  *	the entry must be split.
1172  */
1173 static void
1174 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1175 {
1176 	vm_map_entry_t new_entry;
1177 
1178 	/*
1179 	 * If there is no object backing this entry, we might as well create
1180 	 * one now.  If we defer it, an object can get created after the map
1181 	 * is clipped, and individual objects will be created for the split-up
1182 	 * map.  This is a bit of a hack, but is also about the best place to
1183 	 * put this improvement.
1184 	 */
1185 	if (entry->object.vm_object == NULL && !map->system_map) {
1186 		vm_object_t object;
1187 		object = vm_object_allocate(OBJT_DEFAULT,
1188 				atop(entry->end - entry->start));
1189 		entry->object.vm_object = object;
1190 		entry->offset = 0;
1191 	}
1192 
1193 	/*
1194 	 * Create a new entry and insert it AFTER the specified entry
1195 	 */
1196 	new_entry = vm_map_entry_create(map);
1197 	*new_entry = *entry;
1198 
1199 	new_entry->start = entry->end = end;
1200 	new_entry->offset += (end - entry->start);
1201 
1202 	vm_map_entry_link(map, entry, new_entry);
1203 
1204 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1205 		vm_object_reference(new_entry->object.vm_object);
1206 	}
1207 }
1208 
1209 /*
1210  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1211  *
1212  *	Asserts that the starting and ending region
1213  *	addresses fall within the valid range of the map.
1214  */
1215 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1216 		{					\
1217 		if (start < vm_map_min(map))		\
1218 			start = vm_map_min(map);	\
1219 		if (end > vm_map_max(map))		\
1220 			end = vm_map_max(map);		\
1221 		if (start > end)			\
1222 			start = end;			\
1223 		}
1224 
1225 /*
1226  *	vm_map_submap:		[ kernel use only ]
1227  *
1228  *	Mark the given range as handled by a subordinate map.
1229  *
1230  *	This range must have been created with vm_map_find,
1231  *	and no other operations may have been performed on this
1232  *	range prior to calling vm_map_submap.
1233  *
1234  *	Only a limited number of operations can be performed
1235  *	within this rage after calling vm_map_submap:
1236  *		vm_fault
1237  *	[Don't try vm_map_copy!]
1238  *
1239  *	To remove a submapping, one must first remove the
1240  *	range from the superior map, and then destroy the
1241  *	submap (if desired).  [Better yet, don't try it.]
1242  */
1243 int
1244 vm_map_submap(
1245 	vm_map_t map,
1246 	vm_offset_t start,
1247 	vm_offset_t end,
1248 	vm_map_t submap)
1249 {
1250 	vm_map_entry_t entry;
1251 	int result = KERN_INVALID_ARGUMENT;
1252 
1253 	vm_map_lock(map);
1254 
1255 	VM_MAP_RANGE_CHECK(map, start, end);
1256 
1257 	if (vm_map_lookup_entry(map, start, &entry)) {
1258 		vm_map_clip_start(map, entry, start);
1259 	} else
1260 		entry = entry->next;
1261 
1262 	vm_map_clip_end(map, entry, end);
1263 
1264 	if ((entry->start == start) && (entry->end == end) &&
1265 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1266 	    (entry->object.vm_object == NULL)) {
1267 		entry->object.sub_map = submap;
1268 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1269 		result = KERN_SUCCESS;
1270 	}
1271 	vm_map_unlock(map);
1272 
1273 	return (result);
1274 }
1275 
1276 /*
1277  *	vm_map_protect:
1278  *
1279  *	Sets the protection of the specified address
1280  *	region in the target map.  If "set_max" is
1281  *	specified, the maximum protection is to be set;
1282  *	otherwise, only the current protection is affected.
1283  */
1284 int
1285 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1286 	       vm_prot_t new_prot, boolean_t set_max)
1287 {
1288 	vm_map_entry_t current;
1289 	vm_map_entry_t entry;
1290 
1291 	vm_map_lock(map);
1292 
1293 	VM_MAP_RANGE_CHECK(map, start, end);
1294 
1295 	if (vm_map_lookup_entry(map, start, &entry)) {
1296 		vm_map_clip_start(map, entry, start);
1297 	} else {
1298 		entry = entry->next;
1299 	}
1300 
1301 	/*
1302 	 * Make a first pass to check for protection violations.
1303 	 */
1304 	current = entry;
1305 	while ((current != &map->header) && (current->start < end)) {
1306 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1307 			vm_map_unlock(map);
1308 			return (KERN_INVALID_ARGUMENT);
1309 		}
1310 		if ((new_prot & current->max_protection) != new_prot) {
1311 			vm_map_unlock(map);
1312 			return (KERN_PROTECTION_FAILURE);
1313 		}
1314 		current = current->next;
1315 	}
1316 
1317 	/*
1318 	 * Go back and fix up protections. [Note that clipping is not
1319 	 * necessary the second time.]
1320 	 */
1321 	current = entry;
1322 	while ((current != &map->header) && (current->start < end)) {
1323 		vm_prot_t old_prot;
1324 
1325 		vm_map_clip_end(map, current, end);
1326 
1327 		old_prot = current->protection;
1328 		if (set_max)
1329 			current->protection =
1330 			    (current->max_protection = new_prot) &
1331 			    old_prot;
1332 		else
1333 			current->protection = new_prot;
1334 
1335 		/*
1336 		 * Update physical map if necessary. Worry about copy-on-write
1337 		 * here -- CHECK THIS XXX
1338 		 */
1339 		if (current->protection != old_prot) {
1340 			mtx_lock(&Giant);
1341 			vm_page_lock_queues();
1342 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1343 							VM_PROT_ALL)
1344 			pmap_protect(map->pmap, current->start,
1345 			    current->end,
1346 			    current->protection & MASK(current));
1347 #undef	MASK
1348 			vm_page_unlock_queues();
1349 			mtx_unlock(&Giant);
1350 		}
1351 		vm_map_simplify_entry(map, current);
1352 		current = current->next;
1353 	}
1354 	vm_map_unlock(map);
1355 	return (KERN_SUCCESS);
1356 }
1357 
1358 /*
1359  *	vm_map_madvise:
1360  *
1361  * 	This routine traverses a processes map handling the madvise
1362  *	system call.  Advisories are classified as either those effecting
1363  *	the vm_map_entry structure, or those effecting the underlying
1364  *	objects.
1365  */
1366 int
1367 vm_map_madvise(
1368 	vm_map_t map,
1369 	vm_offset_t start,
1370 	vm_offset_t end,
1371 	int behav)
1372 {
1373 	vm_map_entry_t current, entry;
1374 	int modify_map = 0;
1375 
1376 	/*
1377 	 * Some madvise calls directly modify the vm_map_entry, in which case
1378 	 * we need to use an exclusive lock on the map and we need to perform
1379 	 * various clipping operations.  Otherwise we only need a read-lock
1380 	 * on the map.
1381 	 */
1382 	switch(behav) {
1383 	case MADV_NORMAL:
1384 	case MADV_SEQUENTIAL:
1385 	case MADV_RANDOM:
1386 	case MADV_NOSYNC:
1387 	case MADV_AUTOSYNC:
1388 	case MADV_NOCORE:
1389 	case MADV_CORE:
1390 		modify_map = 1;
1391 		vm_map_lock(map);
1392 		break;
1393 	case MADV_WILLNEED:
1394 	case MADV_DONTNEED:
1395 	case MADV_FREE:
1396 		vm_map_lock_read(map);
1397 		break;
1398 	default:
1399 		return (KERN_INVALID_ARGUMENT);
1400 	}
1401 
1402 	/*
1403 	 * Locate starting entry and clip if necessary.
1404 	 */
1405 	VM_MAP_RANGE_CHECK(map, start, end);
1406 
1407 	if (vm_map_lookup_entry(map, start, &entry)) {
1408 		if (modify_map)
1409 			vm_map_clip_start(map, entry, start);
1410 	} else {
1411 		entry = entry->next;
1412 	}
1413 
1414 	if (modify_map) {
1415 		/*
1416 		 * madvise behaviors that are implemented in the vm_map_entry.
1417 		 *
1418 		 * We clip the vm_map_entry so that behavioral changes are
1419 		 * limited to the specified address range.
1420 		 */
1421 		for (current = entry;
1422 		     (current != &map->header) && (current->start < end);
1423 		     current = current->next
1424 		) {
1425 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1426 				continue;
1427 
1428 			vm_map_clip_end(map, current, end);
1429 
1430 			switch (behav) {
1431 			case MADV_NORMAL:
1432 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1433 				break;
1434 			case MADV_SEQUENTIAL:
1435 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1436 				break;
1437 			case MADV_RANDOM:
1438 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1439 				break;
1440 			case MADV_NOSYNC:
1441 				current->eflags |= MAP_ENTRY_NOSYNC;
1442 				break;
1443 			case MADV_AUTOSYNC:
1444 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1445 				break;
1446 			case MADV_NOCORE:
1447 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1448 				break;
1449 			case MADV_CORE:
1450 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1451 				break;
1452 			default:
1453 				break;
1454 			}
1455 			vm_map_simplify_entry(map, current);
1456 		}
1457 		vm_map_unlock(map);
1458 	} else {
1459 		vm_pindex_t pindex;
1460 		int count;
1461 
1462 		/*
1463 		 * madvise behaviors that are implemented in the underlying
1464 		 * vm_object.
1465 		 *
1466 		 * Since we don't clip the vm_map_entry, we have to clip
1467 		 * the vm_object pindex and count.
1468 		 */
1469 		for (current = entry;
1470 		     (current != &map->header) && (current->start < end);
1471 		     current = current->next
1472 		) {
1473 			vm_offset_t useStart;
1474 
1475 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1476 				continue;
1477 
1478 			pindex = OFF_TO_IDX(current->offset);
1479 			count = atop(current->end - current->start);
1480 			useStart = current->start;
1481 
1482 			if (current->start < start) {
1483 				pindex += atop(start - current->start);
1484 				count -= atop(start - current->start);
1485 				useStart = start;
1486 			}
1487 			if (current->end > end)
1488 				count -= atop(current->end - end);
1489 
1490 			if (count <= 0)
1491 				continue;
1492 
1493 			vm_object_madvise(current->object.vm_object,
1494 					  pindex, count, behav);
1495 			if (behav == MADV_WILLNEED) {
1496 				mtx_lock(&Giant);
1497 				pmap_object_init_pt(
1498 				    map->pmap,
1499 				    useStart,
1500 				    current->object.vm_object,
1501 				    pindex,
1502 				    (count << PAGE_SHIFT),
1503 				    MAP_PREFAULT_MADVISE
1504 				);
1505 				mtx_unlock(&Giant);
1506 			}
1507 		}
1508 		vm_map_unlock_read(map);
1509 	}
1510 	return (0);
1511 }
1512 
1513 
1514 /*
1515  *	vm_map_inherit:
1516  *
1517  *	Sets the inheritance of the specified address
1518  *	range in the target map.  Inheritance
1519  *	affects how the map will be shared with
1520  *	child maps at the time of vm_map_fork.
1521  */
1522 int
1523 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1524 	       vm_inherit_t new_inheritance)
1525 {
1526 	vm_map_entry_t entry;
1527 	vm_map_entry_t temp_entry;
1528 
1529 	switch (new_inheritance) {
1530 	case VM_INHERIT_NONE:
1531 	case VM_INHERIT_COPY:
1532 	case VM_INHERIT_SHARE:
1533 		break;
1534 	default:
1535 		return (KERN_INVALID_ARGUMENT);
1536 	}
1537 	vm_map_lock(map);
1538 	VM_MAP_RANGE_CHECK(map, start, end);
1539 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1540 		entry = temp_entry;
1541 		vm_map_clip_start(map, entry, start);
1542 	} else
1543 		entry = temp_entry->next;
1544 	while ((entry != &map->header) && (entry->start < end)) {
1545 		vm_map_clip_end(map, entry, end);
1546 		entry->inheritance = new_inheritance;
1547 		vm_map_simplify_entry(map, entry);
1548 		entry = entry->next;
1549 	}
1550 	vm_map_unlock(map);
1551 	return (KERN_SUCCESS);
1552 }
1553 
1554 /*
1555  *	vm_map_unwire:
1556  *
1557  *	Implements both kernel and user unwiring.
1558  */
1559 int
1560 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1561 	boolean_t user_unwire)
1562 {
1563 	vm_map_entry_t entry, first_entry, tmp_entry;
1564 	vm_offset_t saved_start;
1565 	unsigned int last_timestamp;
1566 	int rv;
1567 	boolean_t need_wakeup, result;
1568 
1569 	vm_map_lock(map);
1570 	VM_MAP_RANGE_CHECK(map, start, end);
1571 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1572 		vm_map_unlock(map);
1573 		return (KERN_INVALID_ADDRESS);
1574 	}
1575 	last_timestamp = map->timestamp;
1576 	entry = first_entry;
1577 	while (entry != &map->header && entry->start < end) {
1578 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1579 			/*
1580 			 * We have not yet clipped the entry.
1581 			 */
1582 			saved_start = (start >= entry->start) ? start :
1583 			    entry->start;
1584 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1585 			if (vm_map_unlock_and_wait(map, user_unwire)) {
1586 				/*
1587 				 * Allow interruption of user unwiring?
1588 				 */
1589 			}
1590 			vm_map_lock(map);
1591 			if (last_timestamp+1 != map->timestamp) {
1592 				/*
1593 				 * Look again for the entry because the map was
1594 				 * modified while it was unlocked.
1595 				 * Specifically, the entry may have been
1596 				 * clipped, merged, or deleted.
1597 				 */
1598 				if (!vm_map_lookup_entry(map, saved_start,
1599 				    &tmp_entry)) {
1600 					if (saved_start == start) {
1601 						/*
1602 						 * First_entry has been deleted.
1603 						 */
1604 						vm_map_unlock(map);
1605 						return (KERN_INVALID_ADDRESS);
1606 					}
1607 					end = saved_start;
1608 					rv = KERN_INVALID_ADDRESS;
1609 					goto done;
1610 				}
1611 				if (entry == first_entry)
1612 					first_entry = tmp_entry;
1613 				else
1614 					first_entry = NULL;
1615 				entry = tmp_entry;
1616 			}
1617 			last_timestamp = map->timestamp;
1618 			continue;
1619 		}
1620 		vm_map_clip_start(map, entry, start);
1621 		vm_map_clip_end(map, entry, end);
1622 		/*
1623 		 * Mark the entry in case the map lock is released.  (See
1624 		 * above.)
1625 		 */
1626 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1627 		/*
1628 		 * Check the map for holes in the specified region.
1629 		 */
1630 		if (entry->end < end && (entry->next == &map->header ||
1631 		    entry->next->start > entry->end)) {
1632 			end = entry->end;
1633 			rv = KERN_INVALID_ADDRESS;
1634 			goto done;
1635 		}
1636 		/*
1637 		 * Require that the entry is wired.
1638 		 */
1639 		if (entry->wired_count == 0 || (user_unwire &&
1640 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1641 			end = entry->end;
1642 			rv = KERN_INVALID_ARGUMENT;
1643 			goto done;
1644 		}
1645 		entry = entry->next;
1646 	}
1647 	rv = KERN_SUCCESS;
1648 done:
1649 	need_wakeup = FALSE;
1650 	if (first_entry == NULL) {
1651 		result = vm_map_lookup_entry(map, start, &first_entry);
1652 		KASSERT(result, ("vm_map_unwire: lookup failed"));
1653 	}
1654 	entry = first_entry;
1655 	while (entry != &map->header && entry->start < end) {
1656 		if (rv == KERN_SUCCESS) {
1657 			if (user_unwire)
1658 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1659 			entry->wired_count--;
1660 			if (entry->wired_count == 0) {
1661 				/*
1662 				 * Retain the map lock.
1663 				 */
1664 				vm_fault_unwire(map, entry->start, entry->end);
1665 			}
1666 		}
1667 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1668 			("vm_map_unwire: in-transition flag missing"));
1669 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1670 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1671 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1672 			need_wakeup = TRUE;
1673 		}
1674 		vm_map_simplify_entry(map, entry);
1675 		entry = entry->next;
1676 	}
1677 	vm_map_unlock(map);
1678 	if (need_wakeup)
1679 		vm_map_wakeup(map);
1680 	return (rv);
1681 }
1682 
1683 /*
1684  *	vm_map_wire:
1685  *
1686  *	Implements both kernel and user wiring.
1687  */
1688 int
1689 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1690 	boolean_t user_wire)
1691 {
1692 	vm_map_entry_t entry, first_entry, tmp_entry;
1693 	vm_offset_t saved_end, saved_start;
1694 	unsigned int last_timestamp;
1695 	int rv;
1696 	boolean_t need_wakeup, result;
1697 
1698 	vm_map_lock(map);
1699 	VM_MAP_RANGE_CHECK(map, start, end);
1700 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1701 		vm_map_unlock(map);
1702 		return (KERN_INVALID_ADDRESS);
1703 	}
1704 	last_timestamp = map->timestamp;
1705 	entry = first_entry;
1706 	while (entry != &map->header && entry->start < end) {
1707 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1708 			/*
1709 			 * We have not yet clipped the entry.
1710 			 */
1711 			saved_start = (start >= entry->start) ? start :
1712 			    entry->start;
1713 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1714 			if (vm_map_unlock_and_wait(map, user_wire)) {
1715 				/*
1716 				 * Allow interruption of user wiring?
1717 				 */
1718 			}
1719 			vm_map_lock(map);
1720 			if (last_timestamp + 1 != map->timestamp) {
1721 				/*
1722 				 * Look again for the entry because the map was
1723 				 * modified while it was unlocked.
1724 				 * Specifically, the entry may have been
1725 				 * clipped, merged, or deleted.
1726 				 */
1727 				if (!vm_map_lookup_entry(map, saved_start,
1728 				    &tmp_entry)) {
1729 					if (saved_start == start) {
1730 						/*
1731 						 * first_entry has been deleted.
1732 						 */
1733 						vm_map_unlock(map);
1734 						return (KERN_INVALID_ADDRESS);
1735 					}
1736 					end = saved_start;
1737 					rv = KERN_INVALID_ADDRESS;
1738 					goto done;
1739 				}
1740 				if (entry == first_entry)
1741 					first_entry = tmp_entry;
1742 				else
1743 					first_entry = NULL;
1744 				entry = tmp_entry;
1745 			}
1746 			last_timestamp = map->timestamp;
1747 			continue;
1748 		}
1749 		vm_map_clip_start(map, entry, start);
1750 		vm_map_clip_end(map, entry, end);
1751 		/*
1752 		 * Mark the entry in case the map lock is released.  (See
1753 		 * above.)
1754 		 */
1755 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1756 		/*
1757 		 *
1758 		 */
1759 		if (entry->wired_count == 0) {
1760 			entry->wired_count++;
1761 			saved_start = entry->start;
1762 			saved_end = entry->end;
1763 			/*
1764 			 * Release the map lock, relying on the in-transition
1765 			 * mark.
1766 			 */
1767 			vm_map_unlock(map);
1768 			rv = vm_fault_wire(map, saved_start, saved_end,
1769 			    user_wire);
1770 			vm_map_lock(map);
1771 			if (last_timestamp + 1 != map->timestamp) {
1772 				/*
1773 				 * Look again for the entry because the map was
1774 				 * modified while it was unlocked.  The entry
1775 				 * may have been clipped, but NOT merged or
1776 				 * deleted.
1777 				 */
1778 				result = vm_map_lookup_entry(map, saved_start,
1779 				    &tmp_entry);
1780 				KASSERT(result, ("vm_map_wire: lookup failed"));
1781 				if (entry == first_entry)
1782 					first_entry = tmp_entry;
1783 				else
1784 					first_entry = NULL;
1785 				entry = tmp_entry;
1786 				while (entry->end < saved_end) {
1787 					if (rv != KERN_SUCCESS) {
1788 						KASSERT(entry->wired_count == 1,
1789 						    ("vm_map_wire: bad count"));
1790 						entry->wired_count = -1;
1791 					}
1792 					entry = entry->next;
1793 				}
1794 			}
1795 			last_timestamp = map->timestamp;
1796 			if (rv != KERN_SUCCESS) {
1797 				KASSERT(entry->wired_count == 1,
1798 				    ("vm_map_wire: bad count"));
1799 				/*
1800 				 * Assign an out-of-range value to represent
1801 				 * the failure to wire this entry.
1802 				 */
1803 				entry->wired_count = -1;
1804 				end = entry->end;
1805 				goto done;
1806 			}
1807 		} else if (!user_wire ||
1808 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1809 			entry->wired_count++;
1810 		}
1811 		/*
1812 		 * Check the map for holes in the specified region.
1813 		 */
1814 		if (entry->end < end && (entry->next == &map->header ||
1815 		    entry->next->start > entry->end)) {
1816 			end = entry->end;
1817 			rv = KERN_INVALID_ADDRESS;
1818 			goto done;
1819 		}
1820 		entry = entry->next;
1821 	}
1822 	rv = KERN_SUCCESS;
1823 done:
1824 	need_wakeup = FALSE;
1825 	if (first_entry == NULL) {
1826 		result = vm_map_lookup_entry(map, start, &first_entry);
1827 		KASSERT(result, ("vm_map_wire: lookup failed"));
1828 	}
1829 	entry = first_entry;
1830 	while (entry != &map->header && entry->start < end) {
1831 		if (rv == KERN_SUCCESS) {
1832 			if (user_wire)
1833 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1834 		} else if (entry->wired_count == -1) {
1835 			/*
1836 			 * Wiring failed on this entry.  Thus, unwiring is
1837 			 * unnecessary.
1838 			 */
1839 			entry->wired_count = 0;
1840 		} else {
1841 			if (!user_wire ||
1842 			    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
1843 				entry->wired_count--;
1844 			if (entry->wired_count == 0) {
1845 				/*
1846 				 * Retain the map lock.
1847 				 */
1848 				vm_fault_unwire(map, entry->start, entry->end);
1849 			}
1850 		}
1851 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1852 			("vm_map_wire: in-transition flag missing"));
1853 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1854 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1855 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1856 			need_wakeup = TRUE;
1857 		}
1858 		vm_map_simplify_entry(map, entry);
1859 		entry = entry->next;
1860 	}
1861 	vm_map_unlock(map);
1862 	if (need_wakeup)
1863 		vm_map_wakeup(map);
1864 	return (rv);
1865 }
1866 
1867 /*
1868  * vm_map_clean
1869  *
1870  * Push any dirty cached pages in the address range to their pager.
1871  * If syncio is TRUE, dirty pages are written synchronously.
1872  * If invalidate is TRUE, any cached pages are freed as well.
1873  *
1874  * Returns an error if any part of the specified range is not mapped.
1875  */
1876 int
1877 vm_map_clean(
1878 	vm_map_t map,
1879 	vm_offset_t start,
1880 	vm_offset_t end,
1881 	boolean_t syncio,
1882 	boolean_t invalidate)
1883 {
1884 	vm_map_entry_t current;
1885 	vm_map_entry_t entry;
1886 	vm_size_t size;
1887 	vm_object_t object;
1888 	vm_ooffset_t offset;
1889 
1890 	GIANT_REQUIRED;
1891 
1892 	vm_map_lock_read(map);
1893 	VM_MAP_RANGE_CHECK(map, start, end);
1894 	if (!vm_map_lookup_entry(map, start, &entry)) {
1895 		vm_map_unlock_read(map);
1896 		return (KERN_INVALID_ADDRESS);
1897 	}
1898 	/*
1899 	 * Make a first pass to check for holes.
1900 	 */
1901 	for (current = entry; current->start < end; current = current->next) {
1902 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1903 			vm_map_unlock_read(map);
1904 			return (KERN_INVALID_ARGUMENT);
1905 		}
1906 		if (end > current->end &&
1907 		    (current->next == &map->header ||
1908 			current->end != current->next->start)) {
1909 			vm_map_unlock_read(map);
1910 			return (KERN_INVALID_ADDRESS);
1911 		}
1912 	}
1913 
1914 	if (invalidate) {
1915 		vm_page_lock_queues();
1916 		pmap_remove(map->pmap, start, end);
1917 		vm_page_unlock_queues();
1918 	}
1919 	/*
1920 	 * Make a second pass, cleaning/uncaching pages from the indicated
1921 	 * objects as we go.
1922 	 */
1923 	for (current = entry; current->start < end; current = current->next) {
1924 		offset = current->offset + (start - current->start);
1925 		size = (end <= current->end ? end : current->end) - start;
1926 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1927 			vm_map_t smap;
1928 			vm_map_entry_t tentry;
1929 			vm_size_t tsize;
1930 
1931 			smap = current->object.sub_map;
1932 			vm_map_lock_read(smap);
1933 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1934 			tsize = tentry->end - offset;
1935 			if (tsize < size)
1936 				size = tsize;
1937 			object = tentry->object.vm_object;
1938 			offset = tentry->offset + (offset - tentry->start);
1939 			vm_map_unlock_read(smap);
1940 		} else {
1941 			object = current->object.vm_object;
1942 		}
1943 		/*
1944 		 * Note that there is absolutely no sense in writing out
1945 		 * anonymous objects, so we track down the vnode object
1946 		 * to write out.
1947 		 * We invalidate (remove) all pages from the address space
1948 		 * anyway, for semantic correctness.
1949 		 *
1950 		 * note: certain anonymous maps, such as MAP_NOSYNC maps,
1951 		 * may start out with a NULL object.
1952 		 */
1953 		while (object && object->backing_object) {
1954 			object = object->backing_object;
1955 			offset += object->backing_object_offset;
1956 			if (object->size < OFF_TO_IDX(offset + size))
1957 				size = IDX_TO_OFF(object->size) - offset;
1958 		}
1959 		if (object && (object->type == OBJT_VNODE) &&
1960 		    (current->protection & VM_PROT_WRITE)) {
1961 			/*
1962 			 * Flush pages if writing is allowed, invalidate them
1963 			 * if invalidation requested.  Pages undergoing I/O
1964 			 * will be ignored by vm_object_page_remove().
1965 			 *
1966 			 * We cannot lock the vnode and then wait for paging
1967 			 * to complete without deadlocking against vm_fault.
1968 			 * Instead we simply call vm_object_page_remove() and
1969 			 * allow it to block internally on a page-by-page
1970 			 * basis when it encounters pages undergoing async
1971 			 * I/O.
1972 			 */
1973 			int flags;
1974 
1975 			vm_object_reference(object);
1976 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1977 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1978 			flags |= invalidate ? OBJPC_INVAL : 0;
1979 			VM_OBJECT_LOCK(object);
1980 			vm_object_page_clean(object,
1981 			    OFF_TO_IDX(offset),
1982 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1983 			    flags);
1984 			VM_OBJECT_UNLOCK(object);
1985 			VOP_UNLOCK(object->handle, 0, curthread);
1986 			vm_object_deallocate(object);
1987 		}
1988 		if (object && invalidate &&
1989 		    ((object->type == OBJT_VNODE) ||
1990 		     (object->type == OBJT_DEVICE))) {
1991 			VM_OBJECT_LOCK(object);
1992 			vm_object_page_remove(object,
1993 			    OFF_TO_IDX(offset),
1994 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1995 			    FALSE);
1996 			VM_OBJECT_UNLOCK(object);
1997                 }
1998 		start += size;
1999 	}
2000 
2001 	vm_map_unlock_read(map);
2002 	return (KERN_SUCCESS);
2003 }
2004 
2005 /*
2006  *	vm_map_entry_unwire:	[ internal use only ]
2007  *
2008  *	Make the region specified by this entry pageable.
2009  *
2010  *	The map in question should be locked.
2011  *	[This is the reason for this routine's existence.]
2012  */
2013 static void
2014 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2015 {
2016 	vm_fault_unwire(map, entry->start, entry->end);
2017 	entry->wired_count = 0;
2018 }
2019 
2020 /*
2021  *	vm_map_entry_delete:	[ internal use only ]
2022  *
2023  *	Deallocate the given entry from the target map.
2024  */
2025 static void
2026 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2027 {
2028 	vm_map_entry_unlink(map, entry);
2029 	map->size -= entry->end - entry->start;
2030 
2031 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2032 		vm_object_deallocate(entry->object.vm_object);
2033 	}
2034 
2035 	vm_map_entry_dispose(map, entry);
2036 }
2037 
2038 /*
2039  *	vm_map_delete:	[ internal use only ]
2040  *
2041  *	Deallocates the given address range from the target
2042  *	map.
2043  */
2044 int
2045 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2046 {
2047 	vm_object_t object;
2048 	vm_map_entry_t entry;
2049 	vm_map_entry_t first_entry;
2050 
2051 	/*
2052 	 * Find the start of the region, and clip it
2053 	 */
2054 	if (!vm_map_lookup_entry(map, start, &first_entry))
2055 		entry = first_entry->next;
2056 	else {
2057 		entry = first_entry;
2058 		vm_map_clip_start(map, entry, start);
2059 	}
2060 
2061 	/*
2062 	 * Save the free space hint
2063 	 */
2064 	if (entry == &map->header) {
2065 		map->first_free = &map->header;
2066 	} else if (map->first_free->start >= start) {
2067 		map->first_free = entry->prev;
2068 	}
2069 
2070 	/*
2071 	 * Step through all entries in this region
2072 	 */
2073 	while ((entry != &map->header) && (entry->start < end)) {
2074 		vm_map_entry_t next;
2075 		vm_offset_t s, e;
2076 		vm_pindex_t offidxstart, offidxend, count;
2077 
2078 		/*
2079 		 * Wait for wiring or unwiring of an entry to complete.
2080 		 */
2081 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2082 			unsigned int last_timestamp;
2083 			vm_offset_t saved_start;
2084 			vm_map_entry_t tmp_entry;
2085 
2086 			saved_start = entry->start;
2087 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2088 			last_timestamp = map->timestamp;
2089 			(void) vm_map_unlock_and_wait(map, FALSE);
2090 			vm_map_lock(map);
2091 			if (last_timestamp + 1 != map->timestamp) {
2092 				/*
2093 				 * Look again for the entry because the map was
2094 				 * modified while it was unlocked.
2095 				 * Specifically, the entry may have been
2096 				 * clipped, merged, or deleted.
2097 				 */
2098 				if (!vm_map_lookup_entry(map, saved_start,
2099 							 &tmp_entry))
2100 					entry = tmp_entry->next;
2101 				else {
2102 					entry = tmp_entry;
2103 					vm_map_clip_start(map, entry,
2104 							  saved_start);
2105 				}
2106 			}
2107 			continue;
2108 		}
2109 		vm_map_clip_end(map, entry, end);
2110 
2111 		s = entry->start;
2112 		e = entry->end;
2113 		next = entry->next;
2114 
2115 		offidxstart = OFF_TO_IDX(entry->offset);
2116 		count = OFF_TO_IDX(e - s);
2117 		object = entry->object.vm_object;
2118 
2119 		/*
2120 		 * Unwire before removing addresses from the pmap; otherwise,
2121 		 * unwiring will put the entries back in the pmap.
2122 		 */
2123 		if (entry->wired_count != 0) {
2124 			vm_map_entry_unwire(map, entry);
2125 		}
2126 
2127 		offidxend = offidxstart + count;
2128 
2129 		if (object == kernel_object || object == kmem_object) {
2130 			if (object == kernel_object)
2131 				GIANT_REQUIRED;
2132 			VM_OBJECT_LOCK(object);
2133 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2134 			VM_OBJECT_UNLOCK(object);
2135 		} else {
2136 			mtx_lock(&Giant);
2137 			vm_page_lock_queues();
2138 			pmap_remove(map->pmap, s, e);
2139 			vm_page_unlock_queues();
2140 			if (object != NULL &&
2141 			    object->ref_count != 1 &&
2142 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2143 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2144 				vm_object_collapse(object);
2145 				VM_OBJECT_LOCK(object);
2146 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2147 				if (object->type == OBJT_SWAP) {
2148 					swap_pager_freespace(object, offidxstart, count);
2149 				}
2150 				if (offidxend >= object->size &&
2151 				    offidxstart < object->size) {
2152 					object->size = offidxstart;
2153 				}
2154 				VM_OBJECT_UNLOCK(object);
2155 			}
2156 			mtx_unlock(&Giant);
2157 		}
2158 
2159 		/*
2160 		 * Delete the entry (which may delete the object) only after
2161 		 * removing all pmap entries pointing to its pages.
2162 		 * (Otherwise, its page frames may be reallocated, and any
2163 		 * modify bits will be set in the wrong object!)
2164 		 */
2165 		vm_map_entry_delete(map, entry);
2166 		entry = next;
2167 	}
2168 	return (KERN_SUCCESS);
2169 }
2170 
2171 /*
2172  *	vm_map_remove:
2173  *
2174  *	Remove the given address range from the target map.
2175  *	This is the exported form of vm_map_delete.
2176  */
2177 int
2178 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2179 {
2180 	int result, s = 0;
2181 
2182 	if (map == kmem_map)
2183 		s = splvm();
2184 
2185 	vm_map_lock(map);
2186 	VM_MAP_RANGE_CHECK(map, start, end);
2187 	result = vm_map_delete(map, start, end);
2188 	vm_map_unlock(map);
2189 
2190 	if (map == kmem_map)
2191 		splx(s);
2192 
2193 	return (result);
2194 }
2195 
2196 /*
2197  *	vm_map_check_protection:
2198  *
2199  *	Assert that the target map allows the specified privilege on the
2200  *	entire address region given.  The entire region must be allocated.
2201  *
2202  *	WARNING!  This code does not and should not check whether the
2203  *	contents of the region is accessible.  For example a smaller file
2204  *	might be mapped into a larger address space.
2205  *
2206  *	NOTE!  This code is also called by munmap().
2207  */
2208 boolean_t
2209 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2210 			vm_prot_t protection)
2211 {
2212 	vm_map_entry_t entry;
2213 	vm_map_entry_t tmp_entry;
2214 
2215 	vm_map_lock_read(map);
2216 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2217 		vm_map_unlock_read(map);
2218 		return (FALSE);
2219 	}
2220 	entry = tmp_entry;
2221 
2222 	while (start < end) {
2223 		if (entry == &map->header) {
2224 			vm_map_unlock_read(map);
2225 			return (FALSE);
2226 		}
2227 		/*
2228 		 * No holes allowed!
2229 		 */
2230 		if (start < entry->start) {
2231 			vm_map_unlock_read(map);
2232 			return (FALSE);
2233 		}
2234 		/*
2235 		 * Check protection associated with entry.
2236 		 */
2237 		if ((entry->protection & protection) != protection) {
2238 			vm_map_unlock_read(map);
2239 			return (FALSE);
2240 		}
2241 		/* go to next entry */
2242 		start = entry->end;
2243 		entry = entry->next;
2244 	}
2245 	vm_map_unlock_read(map);
2246 	return (TRUE);
2247 }
2248 
2249 /*
2250  *	vm_map_copy_entry:
2251  *
2252  *	Copies the contents of the source entry to the destination
2253  *	entry.  The entries *must* be aligned properly.
2254  */
2255 static void
2256 vm_map_copy_entry(
2257 	vm_map_t src_map,
2258 	vm_map_t dst_map,
2259 	vm_map_entry_t src_entry,
2260 	vm_map_entry_t dst_entry)
2261 {
2262 	vm_object_t src_object;
2263 
2264 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2265 		return;
2266 
2267 	if (src_entry->wired_count == 0) {
2268 
2269 		/*
2270 		 * If the source entry is marked needs_copy, it is already
2271 		 * write-protected.
2272 		 */
2273 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2274 			vm_page_lock_queues();
2275 			pmap_protect(src_map->pmap,
2276 			    src_entry->start,
2277 			    src_entry->end,
2278 			    src_entry->protection & ~VM_PROT_WRITE);
2279 			vm_page_unlock_queues();
2280 		}
2281 
2282 		/*
2283 		 * Make a copy of the object.
2284 		 */
2285 		if ((src_object = src_entry->object.vm_object) != NULL) {
2286 
2287 			if ((src_object->handle == NULL) &&
2288 				(src_object->type == OBJT_DEFAULT ||
2289 				 src_object->type == OBJT_SWAP)) {
2290 				vm_object_collapse(src_object);
2291 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2292 					vm_object_split(src_entry);
2293 					src_object = src_entry->object.vm_object;
2294 				}
2295 			}
2296 
2297 			vm_object_reference(src_object);
2298 			VM_OBJECT_LOCK(src_object);
2299 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2300 			VM_OBJECT_UNLOCK(src_object);
2301 			dst_entry->object.vm_object = src_object;
2302 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2303 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2304 			dst_entry->offset = src_entry->offset;
2305 		} else {
2306 			dst_entry->object.vm_object = NULL;
2307 			dst_entry->offset = 0;
2308 		}
2309 
2310 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2311 		    dst_entry->end - dst_entry->start, src_entry->start);
2312 	} else {
2313 		/*
2314 		 * Of course, wired down pages can't be set copy-on-write.
2315 		 * Cause wired pages to be copied into the new map by
2316 		 * simulating faults (the new pages are pageable)
2317 		 */
2318 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2319 	}
2320 }
2321 
2322 /*
2323  * vmspace_fork:
2324  * Create a new process vmspace structure and vm_map
2325  * based on those of an existing process.  The new map
2326  * is based on the old map, according to the inheritance
2327  * values on the regions in that map.
2328  *
2329  * The source map must not be locked.
2330  */
2331 struct vmspace *
2332 vmspace_fork(struct vmspace *vm1)
2333 {
2334 	struct vmspace *vm2;
2335 	vm_map_t old_map = &vm1->vm_map;
2336 	vm_map_t new_map;
2337 	vm_map_entry_t old_entry;
2338 	vm_map_entry_t new_entry;
2339 	vm_object_t object;
2340 
2341 	GIANT_REQUIRED;
2342 
2343 	vm_map_lock(old_map);
2344 	old_map->infork = 1;
2345 
2346 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2347 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2348 	    (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2349 	new_map = &vm2->vm_map;	/* XXX */
2350 	new_map->timestamp = 1;
2351 
2352 	old_entry = old_map->header.next;
2353 
2354 	while (old_entry != &old_map->header) {
2355 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2356 			panic("vm_map_fork: encountered a submap");
2357 
2358 		switch (old_entry->inheritance) {
2359 		case VM_INHERIT_NONE:
2360 			break;
2361 
2362 		case VM_INHERIT_SHARE:
2363 			/*
2364 			 * Clone the entry, creating the shared object if necessary.
2365 			 */
2366 			object = old_entry->object.vm_object;
2367 			if (object == NULL) {
2368 				object = vm_object_allocate(OBJT_DEFAULT,
2369 					atop(old_entry->end - old_entry->start));
2370 				old_entry->object.vm_object = object;
2371 				old_entry->offset = (vm_offset_t) 0;
2372 			}
2373 
2374 			/*
2375 			 * Add the reference before calling vm_object_shadow
2376 			 * to insure that a shadow object is created.
2377 			 */
2378 			vm_object_reference(object);
2379 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2380 				vm_object_shadow(&old_entry->object.vm_object,
2381 					&old_entry->offset,
2382 					atop(old_entry->end - old_entry->start));
2383 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2384 				/* Transfer the second reference too. */
2385 				vm_object_reference(
2386 				    old_entry->object.vm_object);
2387 				vm_object_deallocate(object);
2388 				object = old_entry->object.vm_object;
2389 			}
2390 			VM_OBJECT_LOCK(object);
2391 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2392 			VM_OBJECT_UNLOCK(object);
2393 
2394 			/*
2395 			 * Clone the entry, referencing the shared object.
2396 			 */
2397 			new_entry = vm_map_entry_create(new_map);
2398 			*new_entry = *old_entry;
2399 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2400 			new_entry->wired_count = 0;
2401 
2402 			/*
2403 			 * Insert the entry into the new map -- we know we're
2404 			 * inserting at the end of the new map.
2405 			 */
2406 			vm_map_entry_link(new_map, new_map->header.prev,
2407 			    new_entry);
2408 
2409 			/*
2410 			 * Update the physical map
2411 			 */
2412 			pmap_copy(new_map->pmap, old_map->pmap,
2413 			    new_entry->start,
2414 			    (old_entry->end - old_entry->start),
2415 			    old_entry->start);
2416 			break;
2417 
2418 		case VM_INHERIT_COPY:
2419 			/*
2420 			 * Clone the entry and link into the map.
2421 			 */
2422 			new_entry = vm_map_entry_create(new_map);
2423 			*new_entry = *old_entry;
2424 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2425 			new_entry->wired_count = 0;
2426 			new_entry->object.vm_object = NULL;
2427 			vm_map_entry_link(new_map, new_map->header.prev,
2428 			    new_entry);
2429 			vm_map_copy_entry(old_map, new_map, old_entry,
2430 			    new_entry);
2431 			break;
2432 		}
2433 		old_entry = old_entry->next;
2434 	}
2435 
2436 	new_map->size = old_map->size;
2437 	old_map->infork = 0;
2438 	vm_map_unlock(old_map);
2439 
2440 	return (vm2);
2441 }
2442 
2443 int
2444 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2445 	      vm_prot_t prot, vm_prot_t max, int cow)
2446 {
2447 	vm_map_entry_t prev_entry;
2448 	vm_map_entry_t new_stack_entry;
2449 	vm_size_t      init_ssize;
2450 	int            rv;
2451 
2452 	if (addrbos < vm_map_min(map))
2453 		return (KERN_NO_SPACE);
2454 
2455 	if (max_ssize < sgrowsiz)
2456 		init_ssize = max_ssize;
2457 	else
2458 		init_ssize = sgrowsiz;
2459 
2460 	vm_map_lock(map);
2461 
2462 	/* If addr is already mapped, no go */
2463 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2464 		vm_map_unlock(map);
2465 		return (KERN_NO_SPACE);
2466 	}
2467 
2468 	/* If we would blow our VMEM resource limit, no go */
2469 	if (map->size + init_ssize >
2470 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2471 		vm_map_unlock(map);
2472 		return (KERN_NO_SPACE);
2473 	}
2474 
2475 	/* If we can't accomodate max_ssize in the current mapping,
2476 	 * no go.  However, we need to be aware that subsequent user
2477 	 * mappings might map into the space we have reserved for
2478 	 * stack, and currently this space is not protected.
2479 	 *
2480 	 * Hopefully we will at least detect this condition
2481 	 * when we try to grow the stack.
2482 	 */
2483 	if ((prev_entry->next != &map->header) &&
2484 	    (prev_entry->next->start < addrbos + max_ssize)) {
2485 		vm_map_unlock(map);
2486 		return (KERN_NO_SPACE);
2487 	}
2488 
2489 	/* We initially map a stack of only init_ssize.  We will
2490 	 * grow as needed later.  Since this is to be a grow
2491 	 * down stack, we map at the top of the range.
2492 	 *
2493 	 * Note: we would normally expect prot and max to be
2494 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2495 	 * eliminate these as input parameters, and just
2496 	 * pass these values here in the insert call.
2497 	 */
2498 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2499 	                   addrbos + max_ssize, prot, max, cow);
2500 
2501 	/* Now set the avail_ssize amount */
2502 	if (rv == KERN_SUCCESS){
2503 		if (prev_entry != &map->header)
2504 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2505 		new_stack_entry = prev_entry->next;
2506 		if (new_stack_entry->end   != addrbos + max_ssize ||
2507 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2508 			panic ("Bad entry start/end for new stack entry");
2509 		else
2510 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2511 	}
2512 
2513 	vm_map_unlock(map);
2514 	return (rv);
2515 }
2516 
2517 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2518  * desired address is already mapped, or if we successfully grow
2519  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2520  * stack range (this is strange, but preserves compatibility with
2521  * the grow function in vm_machdep.c).
2522  */
2523 int
2524 vm_map_growstack (struct proc *p, vm_offset_t addr)
2525 {
2526 	vm_map_entry_t prev_entry;
2527 	vm_map_entry_t stack_entry;
2528 	vm_map_entry_t new_stack_entry;
2529 	struct vmspace *vm = p->p_vmspace;
2530 	vm_map_t map = &vm->vm_map;
2531 	vm_offset_t    end;
2532 	int      grow_amount;
2533 	int      rv;
2534 	int      is_procstack;
2535 
2536 	GIANT_REQUIRED;
2537 
2538 Retry:
2539 	vm_map_lock_read(map);
2540 
2541 	/* If addr is already in the entry range, no need to grow.*/
2542 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2543 		vm_map_unlock_read(map);
2544 		return (KERN_SUCCESS);
2545 	}
2546 
2547 	if ((stack_entry = prev_entry->next) == &map->header) {
2548 		vm_map_unlock_read(map);
2549 		return (KERN_SUCCESS);
2550 	}
2551 	if (prev_entry == &map->header)
2552 		end = stack_entry->start - stack_entry->avail_ssize;
2553 	else
2554 		end = prev_entry->end;
2555 
2556 	/* This next test mimics the old grow function in vm_machdep.c.
2557 	 * It really doesn't quite make sense, but we do it anyway
2558 	 * for compatibility.
2559 	 *
2560 	 * If not growable stack, return success.  This signals the
2561 	 * caller to proceed as he would normally with normal vm.
2562 	 */
2563 	if (stack_entry->avail_ssize < 1 ||
2564 	    addr >= stack_entry->start ||
2565 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2566 		vm_map_unlock_read(map);
2567 		return (KERN_SUCCESS);
2568 	}
2569 
2570 	/* Find the minimum grow amount */
2571 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2572 	if (grow_amount > stack_entry->avail_ssize) {
2573 		vm_map_unlock_read(map);
2574 		return (KERN_NO_SPACE);
2575 	}
2576 
2577 	/* If there is no longer enough space between the entries
2578 	 * nogo, and adjust the available space.  Note: this
2579 	 * should only happen if the user has mapped into the
2580 	 * stack area after the stack was created, and is
2581 	 * probably an error.
2582 	 *
2583 	 * This also effectively destroys any guard page the user
2584 	 * might have intended by limiting the stack size.
2585 	 */
2586 	if (grow_amount > stack_entry->start - end) {
2587 		if (vm_map_lock_upgrade(map))
2588 			goto Retry;
2589 
2590 		stack_entry->avail_ssize = stack_entry->start - end;
2591 
2592 		vm_map_unlock(map);
2593 		return (KERN_NO_SPACE);
2594 	}
2595 
2596 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2597 
2598 	/* If this is the main process stack, see if we're over the
2599 	 * stack limit.
2600 	 */
2601 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2602 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2603 		vm_map_unlock_read(map);
2604 		return (KERN_NO_SPACE);
2605 	}
2606 
2607 	/* Round up the grow amount modulo SGROWSIZ */
2608 	grow_amount = roundup (grow_amount, sgrowsiz);
2609 	if (grow_amount > stack_entry->avail_ssize) {
2610 		grow_amount = stack_entry->avail_ssize;
2611 	}
2612 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2613 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2614 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2615 		              ctob(vm->vm_ssize);
2616 	}
2617 
2618 	/* If we would blow our VMEM resource limit, no go */
2619 	if (map->size + grow_amount >
2620 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2621 		vm_map_unlock_read(map);
2622 		return (KERN_NO_SPACE);
2623 	}
2624 
2625 	if (vm_map_lock_upgrade(map))
2626 		goto Retry;
2627 
2628 	/* Get the preliminary new entry start value */
2629 	addr = stack_entry->start - grow_amount;
2630 
2631 	/* If this puts us into the previous entry, cut back our growth
2632 	 * to the available space.  Also, see the note above.
2633 	 */
2634 	if (addr < end) {
2635 		stack_entry->avail_ssize = stack_entry->start - end;
2636 		addr = end;
2637 	}
2638 
2639 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2640 	    p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
2641 
2642 	/* Adjust the available stack space by the amount we grew. */
2643 	if (rv == KERN_SUCCESS) {
2644 		if (prev_entry != &map->header)
2645 			vm_map_clip_end(map, prev_entry, addr);
2646 		new_stack_entry = prev_entry->next;
2647 		if (new_stack_entry->end   != stack_entry->start  ||
2648 		    new_stack_entry->start != addr)
2649 			panic ("Bad stack grow start/end in new stack entry");
2650 		else {
2651 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2652 							(new_stack_entry->end -
2653 							 new_stack_entry->start);
2654 			if (is_procstack)
2655 				vm->vm_ssize += btoc(new_stack_entry->end -
2656 						     new_stack_entry->start);
2657 		}
2658 	}
2659 
2660 	vm_map_unlock(map);
2661 	return (rv);
2662 }
2663 
2664 /*
2665  * Unshare the specified VM space for exec.  If other processes are
2666  * mapped to it, then create a new one.  The new vmspace is null.
2667  */
2668 void
2669 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2670 {
2671 	struct vmspace *oldvmspace = p->p_vmspace;
2672 	struct vmspace *newvmspace;
2673 
2674 	GIANT_REQUIRED;
2675 	newvmspace = vmspace_alloc(minuser, maxuser);
2676 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2677 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2678 	/*
2679 	 * This code is written like this for prototype purposes.  The
2680 	 * goal is to avoid running down the vmspace here, but let the
2681 	 * other process's that are still using the vmspace to finally
2682 	 * run it down.  Even though there is little or no chance of blocking
2683 	 * here, it is a good idea to keep this form for future mods.
2684 	 */
2685 	p->p_vmspace = newvmspace;
2686 	pmap_pinit2(vmspace_pmap(newvmspace));
2687 	vmspace_free(oldvmspace);
2688 	if (p == curthread->td_proc)		/* XXXKSE ? */
2689 		pmap_activate(curthread);
2690 }
2691 
2692 /*
2693  * Unshare the specified VM space for forcing COW.  This
2694  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2695  */
2696 void
2697 vmspace_unshare(struct proc *p)
2698 {
2699 	struct vmspace *oldvmspace = p->p_vmspace;
2700 	struct vmspace *newvmspace;
2701 
2702 	GIANT_REQUIRED;
2703 	if (oldvmspace->vm_refcnt == 1)
2704 		return;
2705 	newvmspace = vmspace_fork(oldvmspace);
2706 	p->p_vmspace = newvmspace;
2707 	pmap_pinit2(vmspace_pmap(newvmspace));
2708 	vmspace_free(oldvmspace);
2709 	if (p == curthread->td_proc)		/* XXXKSE ? */
2710 		pmap_activate(curthread);
2711 }
2712 
2713 /*
2714  *	vm_map_lookup:
2715  *
2716  *	Finds the VM object, offset, and
2717  *	protection for a given virtual address in the
2718  *	specified map, assuming a page fault of the
2719  *	type specified.
2720  *
2721  *	Leaves the map in question locked for read; return
2722  *	values are guaranteed until a vm_map_lookup_done
2723  *	call is performed.  Note that the map argument
2724  *	is in/out; the returned map must be used in
2725  *	the call to vm_map_lookup_done.
2726  *
2727  *	A handle (out_entry) is returned for use in
2728  *	vm_map_lookup_done, to make that fast.
2729  *
2730  *	If a lookup is requested with "write protection"
2731  *	specified, the map may be changed to perform virtual
2732  *	copying operations, although the data referenced will
2733  *	remain the same.
2734  */
2735 int
2736 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2737 	      vm_offset_t vaddr,
2738 	      vm_prot_t fault_typea,
2739 	      vm_map_entry_t *out_entry,	/* OUT */
2740 	      vm_object_t *object,		/* OUT */
2741 	      vm_pindex_t *pindex,		/* OUT */
2742 	      vm_prot_t *out_prot,		/* OUT */
2743 	      boolean_t *wired)			/* OUT */
2744 {
2745 	vm_map_entry_t entry;
2746 	vm_map_t map = *var_map;
2747 	vm_prot_t prot;
2748 	vm_prot_t fault_type = fault_typea;
2749 
2750 RetryLookup:;
2751 	/*
2752 	 * Lookup the faulting address.
2753 	 */
2754 
2755 	vm_map_lock_read(map);
2756 #define	RETURN(why) \
2757 		{ \
2758 		vm_map_unlock_read(map); \
2759 		return (why); \
2760 		}
2761 
2762 	/*
2763 	 * If the map has an interesting hint, try it before calling full
2764 	 * blown lookup routine.
2765 	 */
2766 	entry = map->root;
2767 	*out_entry = entry;
2768 	if (entry == NULL ||
2769 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2770 		/*
2771 		 * Entry was either not a valid hint, or the vaddr was not
2772 		 * contained in the entry, so do a full lookup.
2773 		 */
2774 		if (!vm_map_lookup_entry(map, vaddr, out_entry))
2775 			RETURN(KERN_INVALID_ADDRESS);
2776 
2777 		entry = *out_entry;
2778 	}
2779 
2780 	/*
2781 	 * Handle submaps.
2782 	 */
2783 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2784 		vm_map_t old_map = map;
2785 
2786 		*var_map = map = entry->object.sub_map;
2787 		vm_map_unlock_read(old_map);
2788 		goto RetryLookup;
2789 	}
2790 
2791 	/*
2792 	 * Check whether this task is allowed to have this page.
2793 	 * Note the special case for MAP_ENTRY_COW
2794 	 * pages with an override.  This is to implement a forced
2795 	 * COW for debuggers.
2796 	 */
2797 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2798 		prot = entry->max_protection;
2799 	else
2800 		prot = entry->protection;
2801 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2802 	if ((fault_type & prot) != fault_type) {
2803 			RETURN(KERN_PROTECTION_FAILURE);
2804 	}
2805 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2806 	    (entry->eflags & MAP_ENTRY_COW) &&
2807 	    (fault_type & VM_PROT_WRITE) &&
2808 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2809 		RETURN(KERN_PROTECTION_FAILURE);
2810 	}
2811 
2812 	/*
2813 	 * If this page is not pageable, we have to get it for all possible
2814 	 * accesses.
2815 	 */
2816 	*wired = (entry->wired_count != 0);
2817 	if (*wired)
2818 		prot = fault_type = entry->protection;
2819 
2820 	/*
2821 	 * If the entry was copy-on-write, we either ...
2822 	 */
2823 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2824 		/*
2825 		 * If we want to write the page, we may as well handle that
2826 		 * now since we've got the map locked.
2827 		 *
2828 		 * If we don't need to write the page, we just demote the
2829 		 * permissions allowed.
2830 		 */
2831 		if (fault_type & VM_PROT_WRITE) {
2832 			/*
2833 			 * Make a new object, and place it in the object
2834 			 * chain.  Note that no new references have appeared
2835 			 * -- one just moved from the map to the new
2836 			 * object.
2837 			 */
2838 			if (vm_map_lock_upgrade(map))
2839 				goto RetryLookup;
2840 
2841 			vm_object_shadow(
2842 			    &entry->object.vm_object,
2843 			    &entry->offset,
2844 			    atop(entry->end - entry->start));
2845 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2846 
2847 			vm_map_lock_downgrade(map);
2848 		} else {
2849 			/*
2850 			 * We're attempting to read a copy-on-write page --
2851 			 * don't allow writes.
2852 			 */
2853 			prot &= ~VM_PROT_WRITE;
2854 		}
2855 	}
2856 
2857 	/*
2858 	 * Create an object if necessary.
2859 	 */
2860 	if (entry->object.vm_object == NULL &&
2861 	    !map->system_map) {
2862 		if (vm_map_lock_upgrade(map))
2863 			goto RetryLookup;
2864 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2865 		    atop(entry->end - entry->start));
2866 		entry->offset = 0;
2867 		vm_map_lock_downgrade(map);
2868 	}
2869 
2870 	/*
2871 	 * Return the object/offset from this entry.  If the entry was
2872 	 * copy-on-write or empty, it has been fixed up.
2873 	 */
2874 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2875 	*object = entry->object.vm_object;
2876 
2877 	/*
2878 	 * Return whether this is the only map sharing this data.
2879 	 */
2880 	*out_prot = prot;
2881 	return (KERN_SUCCESS);
2882 
2883 #undef	RETURN
2884 }
2885 
2886 /*
2887  *	vm_map_lookup_done:
2888  *
2889  *	Releases locks acquired by a vm_map_lookup
2890  *	(according to the handle returned by that lookup).
2891  */
2892 void
2893 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2894 {
2895 	/*
2896 	 * Unlock the main-level map
2897 	 */
2898 	vm_map_unlock_read(map);
2899 }
2900 
2901 #include "opt_ddb.h"
2902 #ifdef DDB
2903 #include <sys/kernel.h>
2904 
2905 #include <ddb/ddb.h>
2906 
2907 /*
2908  *	vm_map_print:	[ debug ]
2909  */
2910 DB_SHOW_COMMAND(map, vm_map_print)
2911 {
2912 	static int nlines;
2913 	/* XXX convert args. */
2914 	vm_map_t map = (vm_map_t)addr;
2915 	boolean_t full = have_addr;
2916 
2917 	vm_map_entry_t entry;
2918 
2919 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
2920 	    (void *)map,
2921 	    (void *)map->pmap, map->nentries, map->timestamp);
2922 	nlines++;
2923 
2924 	if (!full && db_indent)
2925 		return;
2926 
2927 	db_indent += 2;
2928 	for (entry = map->header.next; entry != &map->header;
2929 	    entry = entry->next) {
2930 		db_iprintf("map entry %p: start=%p, end=%p\n",
2931 		    (void *)entry, (void *)entry->start, (void *)entry->end);
2932 		nlines++;
2933 		{
2934 			static char *inheritance_name[4] =
2935 			{"share", "copy", "none", "donate_copy"};
2936 
2937 			db_iprintf(" prot=%x/%x/%s",
2938 			    entry->protection,
2939 			    entry->max_protection,
2940 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
2941 			if (entry->wired_count != 0)
2942 				db_printf(", wired");
2943 		}
2944 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2945 			db_printf(", share=%p, offset=0x%jx\n",
2946 			    (void *)entry->object.sub_map,
2947 			    (uintmax_t)entry->offset);
2948 			nlines++;
2949 			if ((entry->prev == &map->header) ||
2950 			    (entry->prev->object.sub_map !=
2951 				entry->object.sub_map)) {
2952 				db_indent += 2;
2953 				vm_map_print((db_expr_t)(intptr_t)
2954 					     entry->object.sub_map,
2955 					     full, 0, (char *)0);
2956 				db_indent -= 2;
2957 			}
2958 		} else {
2959 			db_printf(", object=%p, offset=0x%jx",
2960 			    (void *)entry->object.vm_object,
2961 			    (uintmax_t)entry->offset);
2962 			if (entry->eflags & MAP_ENTRY_COW)
2963 				db_printf(", copy (%s)",
2964 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
2965 			db_printf("\n");
2966 			nlines++;
2967 
2968 			if ((entry->prev == &map->header) ||
2969 			    (entry->prev->object.vm_object !=
2970 				entry->object.vm_object)) {
2971 				db_indent += 2;
2972 				vm_object_print((db_expr_t)(intptr_t)
2973 						entry->object.vm_object,
2974 						full, 0, (char *)0);
2975 				nlines += 4;
2976 				db_indent -= 2;
2977 			}
2978 		}
2979 	}
2980 	db_indent -= 2;
2981 	if (db_indent == 0)
2982 		nlines = 0;
2983 }
2984 
2985 
2986 DB_SHOW_COMMAND(procvm, procvm)
2987 {
2988 	struct proc *p;
2989 
2990 	if (have_addr) {
2991 		p = (struct proc *) addr;
2992 	} else {
2993 		p = curproc;
2994 	}
2995 
2996 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
2997 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
2998 	    (void *)vmspace_pmap(p->p_vmspace));
2999 
3000 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3001 }
3002 
3003 #endif /* DDB */
3004