xref: /freebsd/sys/vm/vm_map.c (revision f4c5766baa461767ccb595252b1614f1ecc6f1a7)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81 #include <sys/sysent.h>
82 #include <sys/shm.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_param.h>
86 #include <vm/pmap.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_pager.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_extern.h>
93 #include <vm/swap_pager.h>
94 #include <vm/uma.h>
95 
96 /*
97  *	Virtual memory maps provide for the mapping, protection,
98  *	and sharing of virtual memory objects.  In addition,
99  *	this module provides for an efficient virtual copy of
100  *	memory from one map to another.
101  *
102  *	Synchronization is required prior to most operations.
103  *
104  *	Maps consist of an ordered doubly-linked list of simple
105  *	entries; a single hint is used to speed up lookups.
106  *
107  *	Since portions of maps are specified by start/end addresses,
108  *	which may not align with existing map entries, all
109  *	routines merely "clip" entries to these start/end values.
110  *	[That is, an entry is split into two, bordering at a
111  *	start or end value.]  Note that these clippings may not
112  *	always be necessary (as the two resulting entries are then
113  *	not changed); however, the clipping is done for convenience.
114  *
115  *	As mentioned above, virtual copy operations are performed
116  *	by copying VM object references from one map to
117  *	another, and then marking both regions as copy-on-write.
118  */
119 
120 /*
121  *	vm_map_startup:
122  *
123  *	Initialize the vm_map module.  Must be called before
124  *	any other vm_map routines.
125  *
126  *	Map and entry structures are allocated from the general
127  *	purpose memory pool with some exceptions:
128  *
129  *	- The kernel map and kmem submap are allocated statically.
130  *	- Kernel map entries are allocated out of a static pool.
131  *
132  *	These restrictions are necessary since malloc() uses the
133  *	maps and requires map entries.
134  */
135 
136 static struct mtx map_sleep_mtx;
137 static uma_zone_t mapentzone;
138 static uma_zone_t kmapentzone;
139 static uma_zone_t mapzone;
140 static uma_zone_t vmspace_zone;
141 static struct vm_object kmapentobj;
142 static void vmspace_zinit(void *mem, int size);
143 static void vmspace_zfini(void *mem, int size);
144 static void vm_map_zinit(void *mem, int size);
145 static void vm_map_zfini(void *mem, int size);
146 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
147 
148 #ifdef INVARIANTS
149 static void vm_map_zdtor(void *mem, int size, void *arg);
150 static void vmspace_zdtor(void *mem, int size, void *arg);
151 #endif
152 
153 void
154 vm_map_startup(void)
155 {
156 	mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
157 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
158 #ifdef INVARIANTS
159 	    vm_map_zdtor,
160 #else
161 	    NULL,
162 #endif
163 	    vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
164 	uma_prealloc(mapzone, MAX_KMAP);
165 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
166 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
167 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
168 	uma_prealloc(kmapentzone, MAX_KMAPENT);
169 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
170 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
171 	uma_prealloc(mapentzone, MAX_MAPENT);
172 }
173 
174 static void
175 vmspace_zfini(void *mem, int size)
176 {
177 	struct vmspace *vm;
178 
179 	vm = (struct vmspace *)mem;
180 
181 	vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
182 }
183 
184 static void
185 vmspace_zinit(void *mem, int size)
186 {
187 	struct vmspace *vm;
188 
189 	vm = (struct vmspace *)mem;
190 
191 	vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
192 }
193 
194 static void
195 vm_map_zfini(void *mem, int size)
196 {
197 	vm_map_t map;
198 
199 	map = (vm_map_t)mem;
200 	mtx_destroy(&map->system_mtx);
201 	lockdestroy(&map->lock);
202 }
203 
204 static void
205 vm_map_zinit(void *mem, int size)
206 {
207 	vm_map_t map;
208 
209 	map = (vm_map_t)mem;
210 	map->nentries = 0;
211 	map->size = 0;
212 	map->infork = 0;
213 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
214 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
215 }
216 
217 #ifdef INVARIANTS
218 static void
219 vmspace_zdtor(void *mem, int size, void *arg)
220 {
221 	struct vmspace *vm;
222 
223 	vm = (struct vmspace *)mem;
224 
225 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
226 }
227 static void
228 vm_map_zdtor(void *mem, int size, void *arg)
229 {
230 	vm_map_t map;
231 
232 	map = (vm_map_t)mem;
233 	KASSERT(map->nentries == 0,
234 	    ("map %p nentries == %d on free.",
235 	    map, map->nentries));
236 	KASSERT(map->size == 0,
237 	    ("map %p size == %lu on free.",
238 	    map, (unsigned long)map->size));
239 	KASSERT(map->infork == 0,
240 	    ("map %p infork == %d on free.",
241 	    map, map->infork));
242 }
243 #endif	/* INVARIANTS */
244 
245 /*
246  * Allocate a vmspace structure, including a vm_map and pmap,
247  * and initialize those structures.  The refcnt is set to 1.
248  * The remaining fields must be initialized by the caller.
249  */
250 struct vmspace *
251 vmspace_alloc(min, max)
252 	vm_offset_t min, max;
253 {
254 	struct vmspace *vm;
255 
256 	GIANT_REQUIRED;
257 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
258 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
259 	_vm_map_init(&vm->vm_map, min, max);
260 	pmap_pinit(vmspace_pmap(vm));
261 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
262 	vm->vm_refcnt = 1;
263 	vm->vm_shm = NULL;
264 	vm->vm_exitingcnt = 0;
265 	return (vm);
266 }
267 
268 void
269 vm_init2(void)
270 {
271 	uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
272 	    (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
273 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
274 #ifdef INVARIANTS
275 	    vmspace_zdtor,
276 #else
277 	    NULL,
278 #endif
279 	    vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
280 	pmap_init2();
281 }
282 
283 static __inline void
284 vmspace_dofree(struct vmspace *vm)
285 {
286 	CTR1(KTR_VM, "vmspace_free: %p", vm);
287 
288 	/*
289 	 * Make sure any SysV shm is freed, it might not have been in
290 	 * exit1().
291 	 */
292 	shmexit(vm);
293 
294 	/*
295 	 * Lock the map, to wait out all other references to it.
296 	 * Delete all of the mappings and pages they hold, then call
297 	 * the pmap module to reclaim anything left.
298 	 */
299 	vm_map_lock(&vm->vm_map);
300 	(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
301 	    vm->vm_map.max_offset);
302 	vm_map_unlock(&vm->vm_map);
303 
304 	pmap_release(vmspace_pmap(vm));
305 	uma_zfree(vmspace_zone, vm);
306 }
307 
308 void
309 vmspace_free(struct vmspace *vm)
310 {
311 	GIANT_REQUIRED;
312 
313 	if (vm->vm_refcnt == 0)
314 		panic("vmspace_free: attempt to free already freed vmspace");
315 
316 	if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
317 		vmspace_dofree(vm);
318 }
319 
320 void
321 vmspace_exitfree(struct proc *p)
322 {
323 	struct vmspace *vm;
324 
325 	GIANT_REQUIRED;
326 	vm = p->p_vmspace;
327 	p->p_vmspace = NULL;
328 
329 	/*
330 	 * cleanup by parent process wait()ing on exiting child.  vm_refcnt
331 	 * may not be 0 (e.g. fork() and child exits without exec()ing).
332 	 * exitingcnt may increment above 0 and drop back down to zero
333 	 * several times while vm_refcnt is held non-zero.  vm_refcnt
334 	 * may also increment above 0 and drop back down to zero several
335 	 * times while vm_exitingcnt is held non-zero.
336 	 *
337 	 * The last wait on the exiting child's vmspace will clean up
338 	 * the remainder of the vmspace.
339 	 */
340 	if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
341 		vmspace_dofree(vm);
342 }
343 
344 /*
345  * vmspace_swap_count() - count the approximate swap useage in pages for a
346  *			  vmspace.
347  *
348  *	The map must be locked.
349  *
350  *	Swap useage is determined by taking the proportional swap used by
351  *	VM objects backing the VM map.  To make up for fractional losses,
352  *	if the VM object has any swap use at all the associated map entries
353  *	count for at least 1 swap page.
354  */
355 int
356 vmspace_swap_count(struct vmspace *vmspace)
357 {
358 	vm_map_t map = &vmspace->vm_map;
359 	vm_map_entry_t cur;
360 	int count = 0;
361 
362 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
363 		vm_object_t object;
364 
365 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
366 		    (object = cur->object.vm_object) != NULL) {
367 			VM_OBJECT_LOCK(object);
368 			if (object->type == OBJT_SWAP &&
369 			    object->un_pager.swp.swp_bcount != 0) {
370 				int n = (cur->end - cur->start) / PAGE_SIZE;
371 
372 				count += object->un_pager.swp.swp_bcount *
373 				    SWAP_META_PAGES * n / object->size + 1;
374 			}
375 			VM_OBJECT_UNLOCK(object);
376 		}
377 	}
378 	return (count);
379 }
380 
381 void
382 _vm_map_lock(vm_map_t map, const char *file, int line)
383 {
384 	int error;
385 
386 	if (map->system_map)
387 		_mtx_lock_flags(&map->system_mtx, 0, file, line);
388 	else {
389 		error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
390 		KASSERT(error == 0, ("%s: failed to get lock", __func__));
391 	}
392 	map->timestamp++;
393 }
394 
395 void
396 _vm_map_unlock(vm_map_t map, const char *file, int line)
397 {
398 
399 	if (map->system_map)
400 		_mtx_unlock_flags(&map->system_mtx, 0, file, line);
401 	else
402 		lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
403 }
404 
405 void
406 _vm_map_lock_read(vm_map_t map, const char *file, int line)
407 {
408 	int error;
409 
410 	if (map->system_map)
411 		_mtx_lock_flags(&map->system_mtx, 0, file, line);
412 	else {
413 		error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
414 		KASSERT(error == 0, ("%s: failed to get lock", __func__));
415 	}
416 }
417 
418 void
419 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
420 {
421 
422 	if (map->system_map)
423 		_mtx_unlock_flags(&map->system_mtx, 0, file, line);
424 	else
425 		lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
426 }
427 
428 int
429 _vm_map_trylock(vm_map_t map, const char *file, int line)
430 {
431 	int error;
432 
433 	error = map->system_map ?
434 	    !_mtx_trylock(&map->system_mtx, 0, file, line) :
435 	    lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
436 	if (error == 0)
437 		map->timestamp++;
438 	return (error == 0);
439 }
440 
441 int
442 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
443 {
444 	int error;
445 
446 	error = map->system_map ?
447 	    !_mtx_trylock(&map->system_mtx, 0, file, line) :
448 	    lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
449 	return (error == 0);
450 }
451 
452 int
453 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
454 {
455 
456 	if (map->system_map) {
457 #ifdef INVARIANTS
458 		_mtx_assert(&map->system_mtx, MA_OWNED, file, line);
459 #endif
460 	} else
461 		KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
462 		    ("%s: lock not held", __func__));
463 	map->timestamp++;
464 	return (0);
465 }
466 
467 void
468 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
469 {
470 
471 	if (map->system_map) {
472 #ifdef INVARIANTS
473 		_mtx_assert(&map->system_mtx, MA_OWNED, file, line);
474 #endif
475 	} else
476 		KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
477 		    ("%s: lock not held", __func__));
478 }
479 
480 /*
481  *	vm_map_unlock_and_wait:
482  */
483 int
484 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
485 {
486 
487 	mtx_lock(&map_sleep_mtx);
488 	vm_map_unlock(map);
489 	return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 0));
490 }
491 
492 /*
493  *	vm_map_wakeup:
494  */
495 void
496 vm_map_wakeup(vm_map_t map)
497 {
498 
499 	/*
500 	 * Acquire and release map_sleep_mtx to prevent a wakeup()
501 	 * from being performed (and lost) between the vm_map_unlock()
502 	 * and the msleep() in vm_map_unlock_and_wait().
503 	 */
504 	mtx_lock(&map_sleep_mtx);
505 	mtx_unlock(&map_sleep_mtx);
506 	wakeup(&map->root);
507 }
508 
509 long
510 vmspace_resident_count(struct vmspace *vmspace)
511 {
512 	return pmap_resident_count(vmspace_pmap(vmspace));
513 }
514 
515 /*
516  *	vm_map_create:
517  *
518  *	Creates and returns a new empty VM map with
519  *	the given physical map structure, and having
520  *	the given lower and upper address bounds.
521  */
522 vm_map_t
523 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
524 {
525 	vm_map_t result;
526 
527 	result = uma_zalloc(mapzone, M_WAITOK);
528 	CTR1(KTR_VM, "vm_map_create: %p", result);
529 	_vm_map_init(result, min, max);
530 	result->pmap = pmap;
531 	return (result);
532 }
533 
534 /*
535  * Initialize an existing vm_map structure
536  * such as that in the vmspace structure.
537  * The pmap is set elsewhere.
538  */
539 static void
540 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
541 {
542 
543 	map->header.next = map->header.prev = &map->header;
544 	map->needs_wakeup = FALSE;
545 	map->system_map = 0;
546 	map->min_offset = min;
547 	map->max_offset = max;
548 	map->first_free = &map->header;
549 	map->root = NULL;
550 	map->timestamp = 0;
551 }
552 
553 void
554 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
555 {
556 	_vm_map_init(map, min, max);
557 	mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
558 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
559 }
560 
561 /*
562  *	vm_map_entry_dispose:	[ internal use only ]
563  *
564  *	Inverse of vm_map_entry_create.
565  */
566 static void
567 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
568 {
569 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
570 }
571 
572 /*
573  *	vm_map_entry_create:	[ internal use only ]
574  *
575  *	Allocates a VM map entry for insertion.
576  *	No entry fields are filled in.
577  */
578 static vm_map_entry_t
579 vm_map_entry_create(vm_map_t map)
580 {
581 	vm_map_entry_t new_entry;
582 
583 	if (map->system_map)
584 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
585 	else
586 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
587 	if (new_entry == NULL)
588 		panic("vm_map_entry_create: kernel resources exhausted");
589 	return (new_entry);
590 }
591 
592 /*
593  *	vm_map_entry_set_behavior:
594  *
595  *	Set the expected access behavior, either normal, random, or
596  *	sequential.
597  */
598 static __inline void
599 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
600 {
601 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
602 	    (behavior & MAP_ENTRY_BEHAV_MASK);
603 }
604 
605 /*
606  *	vm_map_entry_splay:
607  *
608  *	Implements Sleator and Tarjan's top-down splay algorithm.  Returns
609  *	the vm_map_entry containing the given address.  If, however, that
610  *	address is not found in the vm_map, returns a vm_map_entry that is
611  *	adjacent to the address, coming before or after it.
612  */
613 static vm_map_entry_t
614 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
615 {
616 	struct vm_map_entry dummy;
617 	vm_map_entry_t lefttreemax, righttreemin, y;
618 
619 	if (root == NULL)
620 		return (root);
621 	lefttreemax = righttreemin = &dummy;
622 	for (;; root = y) {
623 		if (address < root->start) {
624 			if ((y = root->left) == NULL)
625 				break;
626 			if (address < y->start) {
627 				/* Rotate right. */
628 				root->left = y->right;
629 				y->right = root;
630 				root = y;
631 				if ((y = root->left) == NULL)
632 					break;
633 			}
634 			/* Link into the new root's right tree. */
635 			righttreemin->left = root;
636 			righttreemin = root;
637 		} else if (address >= root->end) {
638 			if ((y = root->right) == NULL)
639 				break;
640 			if (address >= y->end) {
641 				/* Rotate left. */
642 				root->right = y->left;
643 				y->left = root;
644 				root = y;
645 				if ((y = root->right) == NULL)
646 					break;
647 			}
648 			/* Link into the new root's left tree. */
649 			lefttreemax->right = root;
650 			lefttreemax = root;
651 		} else
652 			break;
653 	}
654 	/* Assemble the new root. */
655 	lefttreemax->right = root->left;
656 	righttreemin->left = root->right;
657 	root->left = dummy.right;
658 	root->right = dummy.left;
659 	return (root);
660 }
661 
662 /*
663  *	vm_map_entry_{un,}link:
664  *
665  *	Insert/remove entries from maps.
666  */
667 static void
668 vm_map_entry_link(vm_map_t map,
669 		  vm_map_entry_t after_where,
670 		  vm_map_entry_t entry)
671 {
672 
673 	CTR4(KTR_VM,
674 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
675 	    map->nentries, entry, after_where);
676 	map->nentries++;
677 	entry->prev = after_where;
678 	entry->next = after_where->next;
679 	entry->next->prev = entry;
680 	after_where->next = entry;
681 
682 	if (after_where != &map->header) {
683 		if (after_where != map->root)
684 			vm_map_entry_splay(after_where->start, map->root);
685 		entry->right = after_where->right;
686 		entry->left = after_where;
687 		after_where->right = NULL;
688 	} else {
689 		entry->right = map->root;
690 		entry->left = NULL;
691 	}
692 	map->root = entry;
693 }
694 
695 static void
696 vm_map_entry_unlink(vm_map_t map,
697 		    vm_map_entry_t entry)
698 {
699 	vm_map_entry_t next, prev, root;
700 
701 	if (entry != map->root)
702 		vm_map_entry_splay(entry->start, map->root);
703 	if (entry->left == NULL)
704 		root = entry->right;
705 	else {
706 		root = vm_map_entry_splay(entry->start, entry->left);
707 		root->right = entry->right;
708 	}
709 	map->root = root;
710 
711 	prev = entry->prev;
712 	next = entry->next;
713 	next->prev = prev;
714 	prev->next = next;
715 	map->nentries--;
716 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
717 	    map->nentries, entry);
718 }
719 
720 /*
721  *	vm_map_lookup_entry:	[ internal use only ]
722  *
723  *	Finds the map entry containing (or
724  *	immediately preceding) the specified address
725  *	in the given map; the entry is returned
726  *	in the "entry" parameter.  The boolean
727  *	result indicates whether the address is
728  *	actually contained in the map.
729  */
730 boolean_t
731 vm_map_lookup_entry(
732 	vm_map_t map,
733 	vm_offset_t address,
734 	vm_map_entry_t *entry)	/* OUT */
735 {
736 	vm_map_entry_t cur;
737 
738 	cur = vm_map_entry_splay(address, map->root);
739 	if (cur == NULL)
740 		*entry = &map->header;
741 	else {
742 		map->root = cur;
743 
744 		if (address >= cur->start) {
745 			*entry = cur;
746 			if (cur->end > address)
747 				return (TRUE);
748 		} else
749 			*entry = cur->prev;
750 	}
751 	return (FALSE);
752 }
753 
754 /*
755  *	vm_map_insert:
756  *
757  *	Inserts the given whole VM object into the target
758  *	map at the specified address range.  The object's
759  *	size should match that of the address range.
760  *
761  *	Requires that the map be locked, and leaves it so.
762  *
763  *	If object is non-NULL, ref count must be bumped by caller
764  *	prior to making call to account for the new entry.
765  */
766 int
767 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
768 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
769 	      int cow)
770 {
771 	vm_map_entry_t new_entry;
772 	vm_map_entry_t prev_entry;
773 	vm_map_entry_t temp_entry;
774 	vm_eflags_t protoeflags;
775 
776 	/*
777 	 * Check that the start and end points are not bogus.
778 	 */
779 	if ((start < map->min_offset) || (end > map->max_offset) ||
780 	    (start >= end))
781 		return (KERN_INVALID_ADDRESS);
782 
783 	/*
784 	 * Find the entry prior to the proposed starting address; if it's part
785 	 * of an existing entry, this range is bogus.
786 	 */
787 	if (vm_map_lookup_entry(map, start, &temp_entry))
788 		return (KERN_NO_SPACE);
789 
790 	prev_entry = temp_entry;
791 
792 	/*
793 	 * Assert that the next entry doesn't overlap the end point.
794 	 */
795 	if ((prev_entry->next != &map->header) &&
796 	    (prev_entry->next->start < end))
797 		return (KERN_NO_SPACE);
798 
799 	protoeflags = 0;
800 
801 	if (cow & MAP_COPY_ON_WRITE)
802 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
803 
804 	if (cow & MAP_NOFAULT) {
805 		protoeflags |= MAP_ENTRY_NOFAULT;
806 
807 		KASSERT(object == NULL,
808 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
809 	}
810 	if (cow & MAP_DISABLE_SYNCER)
811 		protoeflags |= MAP_ENTRY_NOSYNC;
812 	if (cow & MAP_DISABLE_COREDUMP)
813 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
814 
815 	if (object != NULL) {
816 		/*
817 		 * OBJ_ONEMAPPING must be cleared unless this mapping
818 		 * is trivially proven to be the only mapping for any
819 		 * of the object's pages.  (Object granularity
820 		 * reference counting is insufficient to recognize
821 		 * aliases with precision.)
822 		 */
823 		if (object != kmem_object)
824 			mtx_lock(&Giant);
825 		VM_OBJECT_LOCK(object);
826 		if (object->ref_count > 1 || object->shadow_count != 0)
827 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
828 		VM_OBJECT_UNLOCK(object);
829 		if (object != kmem_object)
830 			mtx_unlock(&Giant);
831 	}
832 	else if ((prev_entry != &map->header) &&
833 		 (prev_entry->eflags == protoeflags) &&
834 		 (prev_entry->end == start) &&
835 		 (prev_entry->wired_count == 0) &&
836 		 ((prev_entry->object.vm_object == NULL) ||
837 		  vm_object_coalesce(prev_entry->object.vm_object,
838 				     OFF_TO_IDX(prev_entry->offset),
839 				     (vm_size_t)(prev_entry->end - prev_entry->start),
840 				     (vm_size_t)(end - prev_entry->end)))) {
841 		/*
842 		 * We were able to extend the object.  Determine if we
843 		 * can extend the previous map entry to include the
844 		 * new range as well.
845 		 */
846 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
847 		    (prev_entry->protection == prot) &&
848 		    (prev_entry->max_protection == max)) {
849 			map->size += (end - prev_entry->end);
850 			prev_entry->end = end;
851 			vm_map_simplify_entry(map, prev_entry);
852 			return (KERN_SUCCESS);
853 		}
854 
855 		/*
856 		 * If we can extend the object but cannot extend the
857 		 * map entry, we have to create a new map entry.  We
858 		 * must bump the ref count on the extended object to
859 		 * account for it.  object may be NULL.
860 		 */
861 		object = prev_entry->object.vm_object;
862 		offset = prev_entry->offset +
863 			(prev_entry->end - prev_entry->start);
864 		vm_object_reference(object);
865 	}
866 
867 	/*
868 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
869 	 * in things like the buffer map where we manage kva but do not manage
870 	 * backing objects.
871 	 */
872 
873 	/*
874 	 * Create a new entry
875 	 */
876 	new_entry = vm_map_entry_create(map);
877 	new_entry->start = start;
878 	new_entry->end = end;
879 
880 	new_entry->eflags = protoeflags;
881 	new_entry->object.vm_object = object;
882 	new_entry->offset = offset;
883 	new_entry->avail_ssize = 0;
884 
885 	new_entry->inheritance = VM_INHERIT_DEFAULT;
886 	new_entry->protection = prot;
887 	new_entry->max_protection = max;
888 	new_entry->wired_count = 0;
889 
890 	/*
891 	 * Insert the new entry into the list
892 	 */
893 	vm_map_entry_link(map, prev_entry, new_entry);
894 	map->size += new_entry->end - new_entry->start;
895 
896 	/*
897 	 * Update the free space hint
898 	 */
899 	if ((map->first_free == prev_entry) &&
900 	    (prev_entry->end >= new_entry->start)) {
901 		map->first_free = new_entry;
902 	}
903 
904 #if 0
905 	/*
906 	 * Temporarily removed to avoid MAP_STACK panic, due to
907 	 * MAP_STACK being a huge hack.  Will be added back in
908 	 * when MAP_STACK (and the user stack mapping) is fixed.
909 	 */
910 	/*
911 	 * It may be possible to simplify the entry
912 	 */
913 	vm_map_simplify_entry(map, new_entry);
914 #endif
915 
916 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
917 		mtx_lock(&Giant);
918 		pmap_object_init_pt(map->pmap, start,
919 				    object, OFF_TO_IDX(offset), end - start,
920 				    cow & MAP_PREFAULT_PARTIAL);
921 		mtx_unlock(&Giant);
922 	}
923 
924 	return (KERN_SUCCESS);
925 }
926 
927 /*
928  * Find sufficient space for `length' bytes in the given map, starting at
929  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
930  */
931 int
932 vm_map_findspace(
933 	vm_map_t map,
934 	vm_offset_t start,
935 	vm_size_t length,
936 	vm_offset_t *addr)
937 {
938 	vm_map_entry_t entry, next;
939 	vm_offset_t end;
940 
941 	if (start < map->min_offset)
942 		start = map->min_offset;
943 	if (start > map->max_offset)
944 		return (1);
945 
946 	/*
947 	 * Look for the first possible address; if there's already something
948 	 * at this address, we have to start after it.
949 	 */
950 	if (start == map->min_offset) {
951 		if ((entry = map->first_free) != &map->header)
952 			start = entry->end;
953 	} else {
954 		vm_map_entry_t tmp;
955 
956 		if (vm_map_lookup_entry(map, start, &tmp))
957 			start = tmp->end;
958 		entry = tmp;
959 	}
960 
961 	/*
962 	 * Look through the rest of the map, trying to fit a new region in the
963 	 * gap between existing regions, or after the very last region.
964 	 */
965 	for (;; start = (entry = next)->end) {
966 		/*
967 		 * Find the end of the proposed new region.  Be sure we didn't
968 		 * go beyond the end of the map, or wrap around the address;
969 		 * if so, we lose.  Otherwise, if this is the last entry, or
970 		 * if the proposed new region fits before the next entry, we
971 		 * win.
972 		 */
973 		end = start + length;
974 		if (end > map->max_offset || end < start)
975 			return (1);
976 		next = entry->next;
977 		if (next == &map->header || next->start >= end)
978 			break;
979 	}
980 	*addr = start;
981 	if (map == kernel_map) {
982 		vm_offset_t ksize;
983 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
984 			pmap_growkernel(ksize);
985 		}
986 	}
987 	return (0);
988 }
989 
990 /*
991  *	vm_map_find finds an unallocated region in the target address
992  *	map with the given length.  The search is defined to be
993  *	first-fit from the specified address; the region found is
994  *	returned in the same parameter.
995  *
996  *	If object is non-NULL, ref count must be bumped by caller
997  *	prior to making call to account for the new entry.
998  */
999 int
1000 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
1001 	    vm_offset_t *addr,	/* IN/OUT */
1002 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
1003 	    vm_prot_t max, int cow)
1004 {
1005 	vm_offset_t start;
1006 	int result, s = 0;
1007 
1008 	start = *addr;
1009 
1010 	if (map == kmem_map)
1011 		s = splvm();
1012 
1013 	vm_map_lock(map);
1014 	if (find_space) {
1015 		if (vm_map_findspace(map, start, length, addr)) {
1016 			vm_map_unlock(map);
1017 			if (map == kmem_map)
1018 				splx(s);
1019 			return (KERN_NO_SPACE);
1020 		}
1021 		start = *addr;
1022 	}
1023 	result = vm_map_insert(map, object, offset,
1024 		start, start + length, prot, max, cow);
1025 	vm_map_unlock(map);
1026 
1027 	if (map == kmem_map)
1028 		splx(s);
1029 
1030 	return (result);
1031 }
1032 
1033 /*
1034  *	vm_map_simplify_entry:
1035  *
1036  *	Simplify the given map entry by merging with either neighbor.  This
1037  *	routine also has the ability to merge with both neighbors.
1038  *
1039  *	The map must be locked.
1040  *
1041  *	This routine guarentees that the passed entry remains valid (though
1042  *	possibly extended).  When merging, this routine may delete one or
1043  *	both neighbors.
1044  */
1045 void
1046 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
1047 {
1048 	vm_map_entry_t next, prev;
1049 	vm_size_t prevsize, esize;
1050 
1051 	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
1052 		return;
1053 
1054 	prev = entry->prev;
1055 	if (prev != &map->header) {
1056 		prevsize = prev->end - prev->start;
1057 		if ( (prev->end == entry->start) &&
1058 		     (prev->object.vm_object == entry->object.vm_object) &&
1059 		     (!prev->object.vm_object ||
1060 			(prev->offset + prevsize == entry->offset)) &&
1061 		     (prev->eflags == entry->eflags) &&
1062 		     (prev->protection == entry->protection) &&
1063 		     (prev->max_protection == entry->max_protection) &&
1064 		     (prev->inheritance == entry->inheritance) &&
1065 		     (prev->wired_count == entry->wired_count)) {
1066 			if (map->first_free == prev)
1067 				map->first_free = entry;
1068 			vm_map_entry_unlink(map, prev);
1069 			entry->start = prev->start;
1070 			entry->offset = prev->offset;
1071 			if (prev->object.vm_object)
1072 				vm_object_deallocate(prev->object.vm_object);
1073 			vm_map_entry_dispose(map, prev);
1074 		}
1075 	}
1076 
1077 	next = entry->next;
1078 	if (next != &map->header) {
1079 		esize = entry->end - entry->start;
1080 		if ((entry->end == next->start) &&
1081 		    (next->object.vm_object == entry->object.vm_object) &&
1082 		     (!entry->object.vm_object ||
1083 			(entry->offset + esize == next->offset)) &&
1084 		    (next->eflags == entry->eflags) &&
1085 		    (next->protection == entry->protection) &&
1086 		    (next->max_protection == entry->max_protection) &&
1087 		    (next->inheritance == entry->inheritance) &&
1088 		    (next->wired_count == entry->wired_count)) {
1089 			if (map->first_free == next)
1090 				map->first_free = entry;
1091 			vm_map_entry_unlink(map, next);
1092 			entry->end = next->end;
1093 			if (next->object.vm_object)
1094 				vm_object_deallocate(next->object.vm_object);
1095 			vm_map_entry_dispose(map, next);
1096 	        }
1097 	}
1098 }
1099 /*
1100  *	vm_map_clip_start:	[ internal use only ]
1101  *
1102  *	Asserts that the given entry begins at or after
1103  *	the specified address; if necessary,
1104  *	it splits the entry into two.
1105  */
1106 #define vm_map_clip_start(map, entry, startaddr) \
1107 { \
1108 	if (startaddr > entry->start) \
1109 		_vm_map_clip_start(map, entry, startaddr); \
1110 }
1111 
1112 /*
1113  *	This routine is called only when it is known that
1114  *	the entry must be split.
1115  */
1116 static void
1117 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1118 {
1119 	vm_map_entry_t new_entry;
1120 
1121 	/*
1122 	 * Split off the front portion -- note that we must insert the new
1123 	 * entry BEFORE this one, so that this entry has the specified
1124 	 * starting address.
1125 	 */
1126 	vm_map_simplify_entry(map, entry);
1127 
1128 	/*
1129 	 * If there is no object backing this entry, we might as well create
1130 	 * one now.  If we defer it, an object can get created after the map
1131 	 * is clipped, and individual objects will be created for the split-up
1132 	 * map.  This is a bit of a hack, but is also about the best place to
1133 	 * put this improvement.
1134 	 */
1135 	if (entry->object.vm_object == NULL && !map->system_map) {
1136 		vm_object_t object;
1137 		object = vm_object_allocate(OBJT_DEFAULT,
1138 				atop(entry->end - entry->start));
1139 		entry->object.vm_object = object;
1140 		entry->offset = 0;
1141 	}
1142 
1143 	new_entry = vm_map_entry_create(map);
1144 	*new_entry = *entry;
1145 
1146 	new_entry->end = start;
1147 	entry->offset += (start - entry->start);
1148 	entry->start = start;
1149 
1150 	vm_map_entry_link(map, entry->prev, new_entry);
1151 
1152 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1153 		vm_object_reference(new_entry->object.vm_object);
1154 	}
1155 }
1156 
1157 /*
1158  *	vm_map_clip_end:	[ internal use only ]
1159  *
1160  *	Asserts that the given entry ends at or before
1161  *	the specified address; if necessary,
1162  *	it splits the entry into two.
1163  */
1164 #define vm_map_clip_end(map, entry, endaddr) \
1165 { \
1166 	if ((endaddr) < (entry->end)) \
1167 		_vm_map_clip_end((map), (entry), (endaddr)); \
1168 }
1169 
1170 /*
1171  *	This routine is called only when it is known that
1172  *	the entry must be split.
1173  */
1174 static void
1175 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1176 {
1177 	vm_map_entry_t new_entry;
1178 
1179 	/*
1180 	 * If there is no object backing this entry, we might as well create
1181 	 * one now.  If we defer it, an object can get created after the map
1182 	 * is clipped, and individual objects will be created for the split-up
1183 	 * map.  This is a bit of a hack, but is also about the best place to
1184 	 * put this improvement.
1185 	 */
1186 	if (entry->object.vm_object == NULL && !map->system_map) {
1187 		vm_object_t object;
1188 		object = vm_object_allocate(OBJT_DEFAULT,
1189 				atop(entry->end - entry->start));
1190 		entry->object.vm_object = object;
1191 		entry->offset = 0;
1192 	}
1193 
1194 	/*
1195 	 * Create a new entry and insert it AFTER the specified entry
1196 	 */
1197 	new_entry = vm_map_entry_create(map);
1198 	*new_entry = *entry;
1199 
1200 	new_entry->start = entry->end = end;
1201 	new_entry->offset += (end - entry->start);
1202 
1203 	vm_map_entry_link(map, entry, new_entry);
1204 
1205 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1206 		vm_object_reference(new_entry->object.vm_object);
1207 	}
1208 }
1209 
1210 /*
1211  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1212  *
1213  *	Asserts that the starting and ending region
1214  *	addresses fall within the valid range of the map.
1215  */
1216 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1217 		{					\
1218 		if (start < vm_map_min(map))		\
1219 			start = vm_map_min(map);	\
1220 		if (end > vm_map_max(map))		\
1221 			end = vm_map_max(map);		\
1222 		if (start > end)			\
1223 			start = end;			\
1224 		}
1225 
1226 /*
1227  *	vm_map_submap:		[ kernel use only ]
1228  *
1229  *	Mark the given range as handled by a subordinate map.
1230  *
1231  *	This range must have been created with vm_map_find,
1232  *	and no other operations may have been performed on this
1233  *	range prior to calling vm_map_submap.
1234  *
1235  *	Only a limited number of operations can be performed
1236  *	within this rage after calling vm_map_submap:
1237  *		vm_fault
1238  *	[Don't try vm_map_copy!]
1239  *
1240  *	To remove a submapping, one must first remove the
1241  *	range from the superior map, and then destroy the
1242  *	submap (if desired).  [Better yet, don't try it.]
1243  */
1244 int
1245 vm_map_submap(
1246 	vm_map_t map,
1247 	vm_offset_t start,
1248 	vm_offset_t end,
1249 	vm_map_t submap)
1250 {
1251 	vm_map_entry_t entry;
1252 	int result = KERN_INVALID_ARGUMENT;
1253 
1254 	vm_map_lock(map);
1255 
1256 	VM_MAP_RANGE_CHECK(map, start, end);
1257 
1258 	if (vm_map_lookup_entry(map, start, &entry)) {
1259 		vm_map_clip_start(map, entry, start);
1260 	} else
1261 		entry = entry->next;
1262 
1263 	vm_map_clip_end(map, entry, end);
1264 
1265 	if ((entry->start == start) && (entry->end == end) &&
1266 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1267 	    (entry->object.vm_object == NULL)) {
1268 		entry->object.sub_map = submap;
1269 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1270 		result = KERN_SUCCESS;
1271 	}
1272 	vm_map_unlock(map);
1273 
1274 	return (result);
1275 }
1276 
1277 /*
1278  *	vm_map_protect:
1279  *
1280  *	Sets the protection of the specified address
1281  *	region in the target map.  If "set_max" is
1282  *	specified, the maximum protection is to be set;
1283  *	otherwise, only the current protection is affected.
1284  */
1285 int
1286 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1287 	       vm_prot_t new_prot, boolean_t set_max)
1288 {
1289 	vm_map_entry_t current;
1290 	vm_map_entry_t entry;
1291 
1292 	vm_map_lock(map);
1293 
1294 	VM_MAP_RANGE_CHECK(map, start, end);
1295 
1296 	if (vm_map_lookup_entry(map, start, &entry)) {
1297 		vm_map_clip_start(map, entry, start);
1298 	} else {
1299 		entry = entry->next;
1300 	}
1301 
1302 	/*
1303 	 * Make a first pass to check for protection violations.
1304 	 */
1305 	current = entry;
1306 	while ((current != &map->header) && (current->start < end)) {
1307 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1308 			vm_map_unlock(map);
1309 			return (KERN_INVALID_ARGUMENT);
1310 		}
1311 		if ((new_prot & current->max_protection) != new_prot) {
1312 			vm_map_unlock(map);
1313 			return (KERN_PROTECTION_FAILURE);
1314 		}
1315 		current = current->next;
1316 	}
1317 
1318 	/*
1319 	 * Go back and fix up protections. [Note that clipping is not
1320 	 * necessary the second time.]
1321 	 */
1322 	current = entry;
1323 	while ((current != &map->header) && (current->start < end)) {
1324 		vm_prot_t old_prot;
1325 
1326 		vm_map_clip_end(map, current, end);
1327 
1328 		old_prot = current->protection;
1329 		if (set_max)
1330 			current->protection =
1331 			    (current->max_protection = new_prot) &
1332 			    old_prot;
1333 		else
1334 			current->protection = new_prot;
1335 
1336 		/*
1337 		 * Update physical map if necessary. Worry about copy-on-write
1338 		 * here -- CHECK THIS XXX
1339 		 */
1340 		if (current->protection != old_prot) {
1341 			mtx_lock(&Giant);
1342 			vm_page_lock_queues();
1343 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1344 							VM_PROT_ALL)
1345 			pmap_protect(map->pmap, current->start,
1346 			    current->end,
1347 			    current->protection & MASK(current));
1348 #undef	MASK
1349 			vm_page_unlock_queues();
1350 			mtx_unlock(&Giant);
1351 		}
1352 		vm_map_simplify_entry(map, current);
1353 		current = current->next;
1354 	}
1355 	vm_map_unlock(map);
1356 	return (KERN_SUCCESS);
1357 }
1358 
1359 /*
1360  *	vm_map_madvise:
1361  *
1362  * 	This routine traverses a processes map handling the madvise
1363  *	system call.  Advisories are classified as either those effecting
1364  *	the vm_map_entry structure, or those effecting the underlying
1365  *	objects.
1366  */
1367 int
1368 vm_map_madvise(
1369 	vm_map_t map,
1370 	vm_offset_t start,
1371 	vm_offset_t end,
1372 	int behav)
1373 {
1374 	vm_map_entry_t current, entry;
1375 	int modify_map = 0;
1376 
1377 	/*
1378 	 * Some madvise calls directly modify the vm_map_entry, in which case
1379 	 * we need to use an exclusive lock on the map and we need to perform
1380 	 * various clipping operations.  Otherwise we only need a read-lock
1381 	 * on the map.
1382 	 */
1383 	switch(behav) {
1384 	case MADV_NORMAL:
1385 	case MADV_SEQUENTIAL:
1386 	case MADV_RANDOM:
1387 	case MADV_NOSYNC:
1388 	case MADV_AUTOSYNC:
1389 	case MADV_NOCORE:
1390 	case MADV_CORE:
1391 		modify_map = 1;
1392 		vm_map_lock(map);
1393 		break;
1394 	case MADV_WILLNEED:
1395 	case MADV_DONTNEED:
1396 	case MADV_FREE:
1397 		vm_map_lock_read(map);
1398 		break;
1399 	default:
1400 		return (KERN_INVALID_ARGUMENT);
1401 	}
1402 
1403 	/*
1404 	 * Locate starting entry and clip if necessary.
1405 	 */
1406 	VM_MAP_RANGE_CHECK(map, start, end);
1407 
1408 	if (vm_map_lookup_entry(map, start, &entry)) {
1409 		if (modify_map)
1410 			vm_map_clip_start(map, entry, start);
1411 	} else {
1412 		entry = entry->next;
1413 	}
1414 
1415 	if (modify_map) {
1416 		/*
1417 		 * madvise behaviors that are implemented in the vm_map_entry.
1418 		 *
1419 		 * We clip the vm_map_entry so that behavioral changes are
1420 		 * limited to the specified address range.
1421 		 */
1422 		for (current = entry;
1423 		     (current != &map->header) && (current->start < end);
1424 		     current = current->next
1425 		) {
1426 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1427 				continue;
1428 
1429 			vm_map_clip_end(map, current, end);
1430 
1431 			switch (behav) {
1432 			case MADV_NORMAL:
1433 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1434 				break;
1435 			case MADV_SEQUENTIAL:
1436 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1437 				break;
1438 			case MADV_RANDOM:
1439 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1440 				break;
1441 			case MADV_NOSYNC:
1442 				current->eflags |= MAP_ENTRY_NOSYNC;
1443 				break;
1444 			case MADV_AUTOSYNC:
1445 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1446 				break;
1447 			case MADV_NOCORE:
1448 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1449 				break;
1450 			case MADV_CORE:
1451 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1452 				break;
1453 			default:
1454 				break;
1455 			}
1456 			vm_map_simplify_entry(map, current);
1457 		}
1458 		vm_map_unlock(map);
1459 	} else {
1460 		vm_pindex_t pindex;
1461 		int count;
1462 
1463 		/*
1464 		 * madvise behaviors that are implemented in the underlying
1465 		 * vm_object.
1466 		 *
1467 		 * Since we don't clip the vm_map_entry, we have to clip
1468 		 * the vm_object pindex and count.
1469 		 */
1470 		for (current = entry;
1471 		     (current != &map->header) && (current->start < end);
1472 		     current = current->next
1473 		) {
1474 			vm_offset_t useStart;
1475 
1476 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1477 				continue;
1478 
1479 			pindex = OFF_TO_IDX(current->offset);
1480 			count = atop(current->end - current->start);
1481 			useStart = current->start;
1482 
1483 			if (current->start < start) {
1484 				pindex += atop(start - current->start);
1485 				count -= atop(start - current->start);
1486 				useStart = start;
1487 			}
1488 			if (current->end > end)
1489 				count -= atop(current->end - end);
1490 
1491 			if (count <= 0)
1492 				continue;
1493 
1494 			vm_object_madvise(current->object.vm_object,
1495 					  pindex, count, behav);
1496 			if (behav == MADV_WILLNEED) {
1497 				mtx_lock(&Giant);
1498 				pmap_object_init_pt(
1499 				    map->pmap,
1500 				    useStart,
1501 				    current->object.vm_object,
1502 				    pindex,
1503 				    (count << PAGE_SHIFT),
1504 				    MAP_PREFAULT_MADVISE
1505 				);
1506 				mtx_unlock(&Giant);
1507 			}
1508 		}
1509 		vm_map_unlock_read(map);
1510 	}
1511 	return (0);
1512 }
1513 
1514 
1515 /*
1516  *	vm_map_inherit:
1517  *
1518  *	Sets the inheritance of the specified address
1519  *	range in the target map.  Inheritance
1520  *	affects how the map will be shared with
1521  *	child maps at the time of vm_map_fork.
1522  */
1523 int
1524 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1525 	       vm_inherit_t new_inheritance)
1526 {
1527 	vm_map_entry_t entry;
1528 	vm_map_entry_t temp_entry;
1529 
1530 	switch (new_inheritance) {
1531 	case VM_INHERIT_NONE:
1532 	case VM_INHERIT_COPY:
1533 	case VM_INHERIT_SHARE:
1534 		break;
1535 	default:
1536 		return (KERN_INVALID_ARGUMENT);
1537 	}
1538 	vm_map_lock(map);
1539 	VM_MAP_RANGE_CHECK(map, start, end);
1540 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1541 		entry = temp_entry;
1542 		vm_map_clip_start(map, entry, start);
1543 	} else
1544 		entry = temp_entry->next;
1545 	while ((entry != &map->header) && (entry->start < end)) {
1546 		vm_map_clip_end(map, entry, end);
1547 		entry->inheritance = new_inheritance;
1548 		vm_map_simplify_entry(map, entry);
1549 		entry = entry->next;
1550 	}
1551 	vm_map_unlock(map);
1552 	return (KERN_SUCCESS);
1553 }
1554 
1555 /*
1556  *	vm_map_unwire:
1557  *
1558  *	Implements both kernel and user unwiring.
1559  */
1560 int
1561 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1562 	boolean_t user_unwire)
1563 {
1564 	vm_map_entry_t entry, first_entry, tmp_entry;
1565 	vm_offset_t saved_start;
1566 	unsigned int last_timestamp;
1567 	int rv;
1568 	boolean_t need_wakeup, result;
1569 
1570 	vm_map_lock(map);
1571 	VM_MAP_RANGE_CHECK(map, start, end);
1572 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1573 		vm_map_unlock(map);
1574 		return (KERN_INVALID_ADDRESS);
1575 	}
1576 	last_timestamp = map->timestamp;
1577 	entry = first_entry;
1578 	while (entry != &map->header && entry->start < end) {
1579 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1580 			/*
1581 			 * We have not yet clipped the entry.
1582 			 */
1583 			saved_start = (start >= entry->start) ? start :
1584 			    entry->start;
1585 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1586 			if (vm_map_unlock_and_wait(map, user_unwire)) {
1587 				/*
1588 				 * Allow interruption of user unwiring?
1589 				 */
1590 			}
1591 			vm_map_lock(map);
1592 			if (last_timestamp+1 != map->timestamp) {
1593 				/*
1594 				 * Look again for the entry because the map was
1595 				 * modified while it was unlocked.
1596 				 * Specifically, the entry may have been
1597 				 * clipped, merged, or deleted.
1598 				 */
1599 				if (!vm_map_lookup_entry(map, saved_start,
1600 				    &tmp_entry)) {
1601 					if (saved_start == start) {
1602 						/*
1603 						 * First_entry has been deleted.
1604 						 */
1605 						vm_map_unlock(map);
1606 						return (KERN_INVALID_ADDRESS);
1607 					}
1608 					end = saved_start;
1609 					rv = KERN_INVALID_ADDRESS;
1610 					goto done;
1611 				}
1612 				if (entry == first_entry)
1613 					first_entry = tmp_entry;
1614 				else
1615 					first_entry = NULL;
1616 				entry = tmp_entry;
1617 			}
1618 			last_timestamp = map->timestamp;
1619 			continue;
1620 		}
1621 		vm_map_clip_start(map, entry, start);
1622 		vm_map_clip_end(map, entry, end);
1623 		/*
1624 		 * Mark the entry in case the map lock is released.  (See
1625 		 * above.)
1626 		 */
1627 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1628 		/*
1629 		 * Check the map for holes in the specified region.
1630 		 */
1631 		if (entry->end < end && (entry->next == &map->header ||
1632 		    entry->next->start > entry->end)) {
1633 			end = entry->end;
1634 			rv = KERN_INVALID_ADDRESS;
1635 			goto done;
1636 		}
1637 		/*
1638 		 * Require that the entry is wired.
1639 		 */
1640 		if (entry->wired_count == 0 || (user_unwire &&
1641 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1642 			end = entry->end;
1643 			rv = KERN_INVALID_ARGUMENT;
1644 			goto done;
1645 		}
1646 		entry = entry->next;
1647 	}
1648 	rv = KERN_SUCCESS;
1649 done:
1650 	need_wakeup = FALSE;
1651 	if (first_entry == NULL) {
1652 		result = vm_map_lookup_entry(map, start, &first_entry);
1653 		KASSERT(result, ("vm_map_unwire: lookup failed"));
1654 	}
1655 	entry = first_entry;
1656 	while (entry != &map->header && entry->start < end) {
1657 		if (rv == KERN_SUCCESS) {
1658 			if (user_unwire)
1659 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1660 			entry->wired_count--;
1661 			if (entry->wired_count == 0) {
1662 				/*
1663 				 * Retain the map lock.
1664 				 */
1665 				vm_fault_unwire(map, entry->start, entry->end);
1666 			}
1667 		}
1668 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1669 			("vm_map_unwire: in-transition flag missing"));
1670 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1671 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1672 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1673 			need_wakeup = TRUE;
1674 		}
1675 		vm_map_simplify_entry(map, entry);
1676 		entry = entry->next;
1677 	}
1678 	vm_map_unlock(map);
1679 	if (need_wakeup)
1680 		vm_map_wakeup(map);
1681 	return (rv);
1682 }
1683 
1684 /*
1685  *	vm_map_wire:
1686  *
1687  *	Implements both kernel and user wiring.
1688  */
1689 int
1690 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1691 	boolean_t user_wire)
1692 {
1693 	vm_map_entry_t entry, first_entry, tmp_entry;
1694 	vm_offset_t saved_end, saved_start;
1695 	unsigned int last_timestamp;
1696 	int rv;
1697 	boolean_t need_wakeup, result;
1698 
1699 	vm_map_lock(map);
1700 	VM_MAP_RANGE_CHECK(map, start, end);
1701 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1702 		vm_map_unlock(map);
1703 		return (KERN_INVALID_ADDRESS);
1704 	}
1705 	last_timestamp = map->timestamp;
1706 	entry = first_entry;
1707 	while (entry != &map->header && entry->start < end) {
1708 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1709 			/*
1710 			 * We have not yet clipped the entry.
1711 			 */
1712 			saved_start = (start >= entry->start) ? start :
1713 			    entry->start;
1714 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1715 			if (vm_map_unlock_and_wait(map, user_wire)) {
1716 				/*
1717 				 * Allow interruption of user wiring?
1718 				 */
1719 			}
1720 			vm_map_lock(map);
1721 			if (last_timestamp + 1 != map->timestamp) {
1722 				/*
1723 				 * Look again for the entry because the map was
1724 				 * modified while it was unlocked.
1725 				 * Specifically, the entry may have been
1726 				 * clipped, merged, or deleted.
1727 				 */
1728 				if (!vm_map_lookup_entry(map, saved_start,
1729 				    &tmp_entry)) {
1730 					if (saved_start == start) {
1731 						/*
1732 						 * first_entry has been deleted.
1733 						 */
1734 						vm_map_unlock(map);
1735 						return (KERN_INVALID_ADDRESS);
1736 					}
1737 					end = saved_start;
1738 					rv = KERN_INVALID_ADDRESS;
1739 					goto done;
1740 				}
1741 				if (entry == first_entry)
1742 					first_entry = tmp_entry;
1743 				else
1744 					first_entry = NULL;
1745 				entry = tmp_entry;
1746 			}
1747 			last_timestamp = map->timestamp;
1748 			continue;
1749 		}
1750 		vm_map_clip_start(map, entry, start);
1751 		vm_map_clip_end(map, entry, end);
1752 		/*
1753 		 * Mark the entry in case the map lock is released.  (See
1754 		 * above.)
1755 		 */
1756 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1757 		/*
1758 		 *
1759 		 */
1760 		if (entry->wired_count == 0) {
1761 			entry->wired_count++;
1762 			saved_start = entry->start;
1763 			saved_end = entry->end;
1764 			/*
1765 			 * Release the map lock, relying on the in-transition
1766 			 * mark.
1767 			 */
1768 			vm_map_unlock(map);
1769 			rv = vm_fault_wire(map, saved_start, saved_end,
1770 			    user_wire);
1771 			vm_map_lock(map);
1772 			if (last_timestamp + 1 != map->timestamp) {
1773 				/*
1774 				 * Look again for the entry because the map was
1775 				 * modified while it was unlocked.  The entry
1776 				 * may have been clipped, but NOT merged or
1777 				 * deleted.
1778 				 */
1779 				result = vm_map_lookup_entry(map, saved_start,
1780 				    &tmp_entry);
1781 				KASSERT(result, ("vm_map_wire: lookup failed"));
1782 				if (entry == first_entry)
1783 					first_entry = tmp_entry;
1784 				else
1785 					first_entry = NULL;
1786 				entry = tmp_entry;
1787 				while (entry->end < saved_end) {
1788 					if (rv != KERN_SUCCESS) {
1789 						KASSERT(entry->wired_count == 1,
1790 						    ("vm_map_wire: bad count"));
1791 						entry->wired_count = -1;
1792 					}
1793 					entry = entry->next;
1794 				}
1795 			}
1796 			last_timestamp = map->timestamp;
1797 			if (rv != KERN_SUCCESS) {
1798 				KASSERT(entry->wired_count == 1,
1799 				    ("vm_map_wire: bad count"));
1800 				/*
1801 				 * Assign an out-of-range value to represent
1802 				 * the failure to wire this entry.
1803 				 */
1804 				entry->wired_count = -1;
1805 				end = entry->end;
1806 				goto done;
1807 			}
1808 		} else if (!user_wire ||
1809 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1810 			entry->wired_count++;
1811 		}
1812 		/*
1813 		 * Check the map for holes in the specified region.
1814 		 */
1815 		if (entry->end < end && (entry->next == &map->header ||
1816 		    entry->next->start > entry->end)) {
1817 			end = entry->end;
1818 			rv = KERN_INVALID_ADDRESS;
1819 			goto done;
1820 		}
1821 		entry = entry->next;
1822 	}
1823 	rv = KERN_SUCCESS;
1824 done:
1825 	need_wakeup = FALSE;
1826 	if (first_entry == NULL) {
1827 		result = vm_map_lookup_entry(map, start, &first_entry);
1828 		KASSERT(result, ("vm_map_wire: lookup failed"));
1829 	}
1830 	entry = first_entry;
1831 	while (entry != &map->header && entry->start < end) {
1832 		if (rv == KERN_SUCCESS) {
1833 			if (user_wire)
1834 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1835 		} else if (entry->wired_count == -1) {
1836 			/*
1837 			 * Wiring failed on this entry.  Thus, unwiring is
1838 			 * unnecessary.
1839 			 */
1840 			entry->wired_count = 0;
1841 		} else {
1842 			if (!user_wire ||
1843 			    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
1844 				entry->wired_count--;
1845 			if (entry->wired_count == 0) {
1846 				/*
1847 				 * Retain the map lock.
1848 				 */
1849 				vm_fault_unwire(map, entry->start, entry->end);
1850 			}
1851 		}
1852 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1853 			("vm_map_wire: in-transition flag missing"));
1854 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1855 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1856 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1857 			need_wakeup = TRUE;
1858 		}
1859 		vm_map_simplify_entry(map, entry);
1860 		entry = entry->next;
1861 	}
1862 	vm_map_unlock(map);
1863 	if (need_wakeup)
1864 		vm_map_wakeup(map);
1865 	return (rv);
1866 }
1867 
1868 /*
1869  * vm_map_clean
1870  *
1871  * Push any dirty cached pages in the address range to their pager.
1872  * If syncio is TRUE, dirty pages are written synchronously.
1873  * If invalidate is TRUE, any cached pages are freed as well.
1874  *
1875  * Returns an error if any part of the specified range is not mapped.
1876  */
1877 int
1878 vm_map_clean(
1879 	vm_map_t map,
1880 	vm_offset_t start,
1881 	vm_offset_t end,
1882 	boolean_t syncio,
1883 	boolean_t invalidate)
1884 {
1885 	vm_map_entry_t current;
1886 	vm_map_entry_t entry;
1887 	vm_size_t size;
1888 	vm_object_t object;
1889 	vm_ooffset_t offset;
1890 
1891 	GIANT_REQUIRED;
1892 
1893 	vm_map_lock_read(map);
1894 	VM_MAP_RANGE_CHECK(map, start, end);
1895 	if (!vm_map_lookup_entry(map, start, &entry)) {
1896 		vm_map_unlock_read(map);
1897 		return (KERN_INVALID_ADDRESS);
1898 	}
1899 	/*
1900 	 * Make a first pass to check for holes.
1901 	 */
1902 	for (current = entry; current->start < end; current = current->next) {
1903 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1904 			vm_map_unlock_read(map);
1905 			return (KERN_INVALID_ARGUMENT);
1906 		}
1907 		if (end > current->end &&
1908 		    (current->next == &map->header ||
1909 			current->end != current->next->start)) {
1910 			vm_map_unlock_read(map);
1911 			return (KERN_INVALID_ADDRESS);
1912 		}
1913 	}
1914 
1915 	if (invalidate) {
1916 		vm_page_lock_queues();
1917 		pmap_remove(map->pmap, start, end);
1918 		vm_page_unlock_queues();
1919 	}
1920 	/*
1921 	 * Make a second pass, cleaning/uncaching pages from the indicated
1922 	 * objects as we go.
1923 	 */
1924 	for (current = entry; current->start < end; current = current->next) {
1925 		offset = current->offset + (start - current->start);
1926 		size = (end <= current->end ? end : current->end) - start;
1927 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1928 			vm_map_t smap;
1929 			vm_map_entry_t tentry;
1930 			vm_size_t tsize;
1931 
1932 			smap = current->object.sub_map;
1933 			vm_map_lock_read(smap);
1934 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1935 			tsize = tentry->end - offset;
1936 			if (tsize < size)
1937 				size = tsize;
1938 			object = tentry->object.vm_object;
1939 			offset = tentry->offset + (offset - tentry->start);
1940 			vm_map_unlock_read(smap);
1941 		} else {
1942 			object = current->object.vm_object;
1943 		}
1944 		/*
1945 		 * Note that there is absolutely no sense in writing out
1946 		 * anonymous objects, so we track down the vnode object
1947 		 * to write out.
1948 		 * We invalidate (remove) all pages from the address space
1949 		 * anyway, for semantic correctness.
1950 		 *
1951 		 * note: certain anonymous maps, such as MAP_NOSYNC maps,
1952 		 * may start out with a NULL object.
1953 		 */
1954 		while (object && object->backing_object) {
1955 			object = object->backing_object;
1956 			offset += object->backing_object_offset;
1957 			if (object->size < OFF_TO_IDX(offset + size))
1958 				size = IDX_TO_OFF(object->size) - offset;
1959 		}
1960 		if (object && (object->type == OBJT_VNODE) &&
1961 		    (current->protection & VM_PROT_WRITE)) {
1962 			/*
1963 			 * Flush pages if writing is allowed, invalidate them
1964 			 * if invalidation requested.  Pages undergoing I/O
1965 			 * will be ignored by vm_object_page_remove().
1966 			 *
1967 			 * We cannot lock the vnode and then wait for paging
1968 			 * to complete without deadlocking against vm_fault.
1969 			 * Instead we simply call vm_object_page_remove() and
1970 			 * allow it to block internally on a page-by-page
1971 			 * basis when it encounters pages undergoing async
1972 			 * I/O.
1973 			 */
1974 			int flags;
1975 
1976 			vm_object_reference(object);
1977 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1978 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1979 			flags |= invalidate ? OBJPC_INVAL : 0;
1980 			VM_OBJECT_LOCK(object);
1981 			vm_object_page_clean(object,
1982 			    OFF_TO_IDX(offset),
1983 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1984 			    flags);
1985 			VM_OBJECT_UNLOCK(object);
1986 			VOP_UNLOCK(object->handle, 0, curthread);
1987 			vm_object_deallocate(object);
1988 		}
1989 		if (object && invalidate &&
1990 		    ((object->type == OBJT_VNODE) ||
1991 		     (object->type == OBJT_DEVICE))) {
1992 			VM_OBJECT_LOCK(object);
1993 			vm_object_page_remove(object,
1994 			    OFF_TO_IDX(offset),
1995 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1996 			    FALSE);
1997 			VM_OBJECT_UNLOCK(object);
1998                 }
1999 		start += size;
2000 	}
2001 
2002 	vm_map_unlock_read(map);
2003 	return (KERN_SUCCESS);
2004 }
2005 
2006 /*
2007  *	vm_map_entry_unwire:	[ internal use only ]
2008  *
2009  *	Make the region specified by this entry pageable.
2010  *
2011  *	The map in question should be locked.
2012  *	[This is the reason for this routine's existence.]
2013  */
2014 static void
2015 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
2016 {
2017 	vm_fault_unwire(map, entry->start, entry->end);
2018 	entry->wired_count = 0;
2019 }
2020 
2021 /*
2022  *	vm_map_entry_delete:	[ internal use only ]
2023  *
2024  *	Deallocate the given entry from the target map.
2025  */
2026 static void
2027 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
2028 {
2029 	vm_map_entry_unlink(map, entry);
2030 	map->size -= entry->end - entry->start;
2031 
2032 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
2033 		vm_object_deallocate(entry->object.vm_object);
2034 	}
2035 
2036 	vm_map_entry_dispose(map, entry);
2037 }
2038 
2039 /*
2040  *	vm_map_delete:	[ internal use only ]
2041  *
2042  *	Deallocates the given address range from the target
2043  *	map.
2044  */
2045 int
2046 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
2047 {
2048 	vm_object_t object;
2049 	vm_map_entry_t entry;
2050 	vm_map_entry_t first_entry;
2051 
2052 	/*
2053 	 * Find the start of the region, and clip it
2054 	 */
2055 	if (!vm_map_lookup_entry(map, start, &first_entry))
2056 		entry = first_entry->next;
2057 	else {
2058 		entry = first_entry;
2059 		vm_map_clip_start(map, entry, start);
2060 	}
2061 
2062 	/*
2063 	 * Save the free space hint
2064 	 */
2065 	if (entry == &map->header) {
2066 		map->first_free = &map->header;
2067 	} else if (map->first_free->start >= start) {
2068 		map->first_free = entry->prev;
2069 	}
2070 
2071 	/*
2072 	 * Step through all entries in this region
2073 	 */
2074 	while ((entry != &map->header) && (entry->start < end)) {
2075 		vm_map_entry_t next;
2076 		vm_offset_t s, e;
2077 		vm_pindex_t offidxstart, offidxend, count;
2078 
2079 		/*
2080 		 * Wait for wiring or unwiring of an entry to complete.
2081 		 */
2082 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2083 			unsigned int last_timestamp;
2084 			vm_offset_t saved_start;
2085 			vm_map_entry_t tmp_entry;
2086 
2087 			saved_start = entry->start;
2088 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2089 			last_timestamp = map->timestamp;
2090 			(void) vm_map_unlock_and_wait(map, FALSE);
2091 			vm_map_lock(map);
2092 			if (last_timestamp + 1 != map->timestamp) {
2093 				/*
2094 				 * Look again for the entry because the map was
2095 				 * modified while it was unlocked.
2096 				 * Specifically, the entry may have been
2097 				 * clipped, merged, or deleted.
2098 				 */
2099 				if (!vm_map_lookup_entry(map, saved_start,
2100 							 &tmp_entry))
2101 					entry = tmp_entry->next;
2102 				else {
2103 					entry = tmp_entry;
2104 					vm_map_clip_start(map, entry,
2105 							  saved_start);
2106 				}
2107 			}
2108 			continue;
2109 		}
2110 		vm_map_clip_end(map, entry, end);
2111 
2112 		s = entry->start;
2113 		e = entry->end;
2114 		next = entry->next;
2115 
2116 		offidxstart = OFF_TO_IDX(entry->offset);
2117 		count = OFF_TO_IDX(e - s);
2118 		object = entry->object.vm_object;
2119 
2120 		/*
2121 		 * Unwire before removing addresses from the pmap; otherwise,
2122 		 * unwiring will put the entries back in the pmap.
2123 		 */
2124 		if (entry->wired_count != 0) {
2125 			vm_map_entry_unwire(map, entry);
2126 		}
2127 
2128 		offidxend = offidxstart + count;
2129 
2130 		if (object == kernel_object || object == kmem_object) {
2131 			if (object == kernel_object)
2132 				GIANT_REQUIRED;
2133 			VM_OBJECT_LOCK(object);
2134 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2135 			VM_OBJECT_UNLOCK(object);
2136 		} else {
2137 			mtx_lock(&Giant);
2138 			vm_page_lock_queues();
2139 			pmap_remove(map->pmap, s, e);
2140 			vm_page_unlock_queues();
2141 			if (object != NULL) {
2142 				VM_OBJECT_LOCK(object);
2143 				if (object->ref_count != 1 &&
2144 				    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2145 				    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2146 					VM_OBJECT_UNLOCK(object);
2147 					vm_object_collapse(object);
2148 					VM_OBJECT_LOCK(object);
2149 					vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2150 					if (object->type == OBJT_SWAP)
2151 						swap_pager_freespace(object, offidxstart, count);
2152 					if (offidxend >= object->size &&
2153 					    offidxstart < object->size)
2154 						object->size = offidxstart;
2155 				}
2156 				VM_OBJECT_UNLOCK(object);
2157 			}
2158 			mtx_unlock(&Giant);
2159 		}
2160 
2161 		/*
2162 		 * Delete the entry (which may delete the object) only after
2163 		 * removing all pmap entries pointing to its pages.
2164 		 * (Otherwise, its page frames may be reallocated, and any
2165 		 * modify bits will be set in the wrong object!)
2166 		 */
2167 		vm_map_entry_delete(map, entry);
2168 		entry = next;
2169 	}
2170 	return (KERN_SUCCESS);
2171 }
2172 
2173 /*
2174  *	vm_map_remove:
2175  *
2176  *	Remove the given address range from the target map.
2177  *	This is the exported form of vm_map_delete.
2178  */
2179 int
2180 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2181 {
2182 	int result, s = 0;
2183 
2184 	if (map == kmem_map)
2185 		s = splvm();
2186 
2187 	vm_map_lock(map);
2188 	VM_MAP_RANGE_CHECK(map, start, end);
2189 	result = vm_map_delete(map, start, end);
2190 	vm_map_unlock(map);
2191 
2192 	if (map == kmem_map)
2193 		splx(s);
2194 
2195 	return (result);
2196 }
2197 
2198 /*
2199  *	vm_map_check_protection:
2200  *
2201  *	Assert that the target map allows the specified privilege on the
2202  *	entire address region given.  The entire region must be allocated.
2203  *
2204  *	WARNING!  This code does not and should not check whether the
2205  *	contents of the region is accessible.  For example a smaller file
2206  *	might be mapped into a larger address space.
2207  *
2208  *	NOTE!  This code is also called by munmap().
2209  */
2210 boolean_t
2211 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2212 			vm_prot_t protection)
2213 {
2214 	vm_map_entry_t entry;
2215 	vm_map_entry_t tmp_entry;
2216 
2217 	vm_map_lock_read(map);
2218 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2219 		vm_map_unlock_read(map);
2220 		return (FALSE);
2221 	}
2222 	entry = tmp_entry;
2223 
2224 	while (start < end) {
2225 		if (entry == &map->header) {
2226 			vm_map_unlock_read(map);
2227 			return (FALSE);
2228 		}
2229 		/*
2230 		 * No holes allowed!
2231 		 */
2232 		if (start < entry->start) {
2233 			vm_map_unlock_read(map);
2234 			return (FALSE);
2235 		}
2236 		/*
2237 		 * Check protection associated with entry.
2238 		 */
2239 		if ((entry->protection & protection) != protection) {
2240 			vm_map_unlock_read(map);
2241 			return (FALSE);
2242 		}
2243 		/* go to next entry */
2244 		start = entry->end;
2245 		entry = entry->next;
2246 	}
2247 	vm_map_unlock_read(map);
2248 	return (TRUE);
2249 }
2250 
2251 /*
2252  *	vm_map_copy_entry:
2253  *
2254  *	Copies the contents of the source entry to the destination
2255  *	entry.  The entries *must* be aligned properly.
2256  */
2257 static void
2258 vm_map_copy_entry(
2259 	vm_map_t src_map,
2260 	vm_map_t dst_map,
2261 	vm_map_entry_t src_entry,
2262 	vm_map_entry_t dst_entry)
2263 {
2264 	vm_object_t src_object;
2265 
2266 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2267 		return;
2268 
2269 	if (src_entry->wired_count == 0) {
2270 
2271 		/*
2272 		 * If the source entry is marked needs_copy, it is already
2273 		 * write-protected.
2274 		 */
2275 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2276 			vm_page_lock_queues();
2277 			pmap_protect(src_map->pmap,
2278 			    src_entry->start,
2279 			    src_entry->end,
2280 			    src_entry->protection & ~VM_PROT_WRITE);
2281 			vm_page_unlock_queues();
2282 		}
2283 
2284 		/*
2285 		 * Make a copy of the object.
2286 		 */
2287 		if ((src_object = src_entry->object.vm_object) != NULL) {
2288 
2289 			if ((src_object->handle == NULL) &&
2290 				(src_object->type == OBJT_DEFAULT ||
2291 				 src_object->type == OBJT_SWAP)) {
2292 				vm_object_collapse(src_object);
2293 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2294 					vm_object_split(src_entry);
2295 					src_object = src_entry->object.vm_object;
2296 				}
2297 			}
2298 
2299 			vm_object_reference(src_object);
2300 			VM_OBJECT_LOCK(src_object);
2301 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2302 			VM_OBJECT_UNLOCK(src_object);
2303 			dst_entry->object.vm_object = src_object;
2304 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2305 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2306 			dst_entry->offset = src_entry->offset;
2307 		} else {
2308 			dst_entry->object.vm_object = NULL;
2309 			dst_entry->offset = 0;
2310 		}
2311 
2312 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2313 		    dst_entry->end - dst_entry->start, src_entry->start);
2314 	} else {
2315 		/*
2316 		 * Of course, wired down pages can't be set copy-on-write.
2317 		 * Cause wired pages to be copied into the new map by
2318 		 * simulating faults (the new pages are pageable)
2319 		 */
2320 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2321 	}
2322 }
2323 
2324 /*
2325  * vmspace_fork:
2326  * Create a new process vmspace structure and vm_map
2327  * based on those of an existing process.  The new map
2328  * is based on the old map, according to the inheritance
2329  * values on the regions in that map.
2330  *
2331  * The source map must not be locked.
2332  */
2333 struct vmspace *
2334 vmspace_fork(struct vmspace *vm1)
2335 {
2336 	struct vmspace *vm2;
2337 	vm_map_t old_map = &vm1->vm_map;
2338 	vm_map_t new_map;
2339 	vm_map_entry_t old_entry;
2340 	vm_map_entry_t new_entry;
2341 	vm_object_t object;
2342 
2343 	GIANT_REQUIRED;
2344 
2345 	vm_map_lock(old_map);
2346 	old_map->infork = 1;
2347 
2348 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2349 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2350 	    (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2351 	new_map = &vm2->vm_map;	/* XXX */
2352 	new_map->timestamp = 1;
2353 
2354 	old_entry = old_map->header.next;
2355 
2356 	while (old_entry != &old_map->header) {
2357 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2358 			panic("vm_map_fork: encountered a submap");
2359 
2360 		switch (old_entry->inheritance) {
2361 		case VM_INHERIT_NONE:
2362 			break;
2363 
2364 		case VM_INHERIT_SHARE:
2365 			/*
2366 			 * Clone the entry, creating the shared object if necessary.
2367 			 */
2368 			object = old_entry->object.vm_object;
2369 			if (object == NULL) {
2370 				object = vm_object_allocate(OBJT_DEFAULT,
2371 					atop(old_entry->end - old_entry->start));
2372 				old_entry->object.vm_object = object;
2373 				old_entry->offset = (vm_offset_t) 0;
2374 			}
2375 
2376 			/*
2377 			 * Add the reference before calling vm_object_shadow
2378 			 * to insure that a shadow object is created.
2379 			 */
2380 			vm_object_reference(object);
2381 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2382 				vm_object_shadow(&old_entry->object.vm_object,
2383 					&old_entry->offset,
2384 					atop(old_entry->end - old_entry->start));
2385 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2386 				/* Transfer the second reference too. */
2387 				vm_object_reference(
2388 				    old_entry->object.vm_object);
2389 				vm_object_deallocate(object);
2390 				object = old_entry->object.vm_object;
2391 			}
2392 			VM_OBJECT_LOCK(object);
2393 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2394 			VM_OBJECT_UNLOCK(object);
2395 
2396 			/*
2397 			 * Clone the entry, referencing the shared object.
2398 			 */
2399 			new_entry = vm_map_entry_create(new_map);
2400 			*new_entry = *old_entry;
2401 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2402 			new_entry->wired_count = 0;
2403 
2404 			/*
2405 			 * Insert the entry into the new map -- we know we're
2406 			 * inserting at the end of the new map.
2407 			 */
2408 			vm_map_entry_link(new_map, new_map->header.prev,
2409 			    new_entry);
2410 
2411 			/*
2412 			 * Update the physical map
2413 			 */
2414 			pmap_copy(new_map->pmap, old_map->pmap,
2415 			    new_entry->start,
2416 			    (old_entry->end - old_entry->start),
2417 			    old_entry->start);
2418 			break;
2419 
2420 		case VM_INHERIT_COPY:
2421 			/*
2422 			 * Clone the entry and link into the map.
2423 			 */
2424 			new_entry = vm_map_entry_create(new_map);
2425 			*new_entry = *old_entry;
2426 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2427 			new_entry->wired_count = 0;
2428 			new_entry->object.vm_object = NULL;
2429 			vm_map_entry_link(new_map, new_map->header.prev,
2430 			    new_entry);
2431 			vm_map_copy_entry(old_map, new_map, old_entry,
2432 			    new_entry);
2433 			break;
2434 		}
2435 		old_entry = old_entry->next;
2436 	}
2437 
2438 	new_map->size = old_map->size;
2439 	old_map->infork = 0;
2440 	vm_map_unlock(old_map);
2441 
2442 	return (vm2);
2443 }
2444 
2445 int
2446 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2447 	      vm_prot_t prot, vm_prot_t max, int cow)
2448 {
2449 	vm_map_entry_t prev_entry;
2450 	vm_map_entry_t new_stack_entry;
2451 	vm_size_t      init_ssize;
2452 	int            rv;
2453 
2454 	if (addrbos < vm_map_min(map))
2455 		return (KERN_NO_SPACE);
2456 
2457 	if (max_ssize < sgrowsiz)
2458 		init_ssize = max_ssize;
2459 	else
2460 		init_ssize = sgrowsiz;
2461 
2462 	vm_map_lock(map);
2463 
2464 	/* If addr is already mapped, no go */
2465 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2466 		vm_map_unlock(map);
2467 		return (KERN_NO_SPACE);
2468 	}
2469 
2470 	/* If we would blow our VMEM resource limit, no go */
2471 	if (map->size + init_ssize >
2472 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2473 		vm_map_unlock(map);
2474 		return (KERN_NO_SPACE);
2475 	}
2476 
2477 	/* If we can't accomodate max_ssize in the current mapping,
2478 	 * no go.  However, we need to be aware that subsequent user
2479 	 * mappings might map into the space we have reserved for
2480 	 * stack, and currently this space is not protected.
2481 	 *
2482 	 * Hopefully we will at least detect this condition
2483 	 * when we try to grow the stack.
2484 	 */
2485 	if ((prev_entry->next != &map->header) &&
2486 	    (prev_entry->next->start < addrbos + max_ssize)) {
2487 		vm_map_unlock(map);
2488 		return (KERN_NO_SPACE);
2489 	}
2490 
2491 	/* We initially map a stack of only init_ssize.  We will
2492 	 * grow as needed later.  Since this is to be a grow
2493 	 * down stack, we map at the top of the range.
2494 	 *
2495 	 * Note: we would normally expect prot and max to be
2496 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2497 	 * eliminate these as input parameters, and just
2498 	 * pass these values here in the insert call.
2499 	 */
2500 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2501 	                   addrbos + max_ssize, prot, max, cow);
2502 
2503 	/* Now set the avail_ssize amount */
2504 	if (rv == KERN_SUCCESS){
2505 		if (prev_entry != &map->header)
2506 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2507 		new_stack_entry = prev_entry->next;
2508 		if (new_stack_entry->end   != addrbos + max_ssize ||
2509 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2510 			panic ("Bad entry start/end for new stack entry");
2511 		else
2512 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2513 	}
2514 
2515 	vm_map_unlock(map);
2516 	return (rv);
2517 }
2518 
2519 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2520  * desired address is already mapped, or if we successfully grow
2521  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2522  * stack range (this is strange, but preserves compatibility with
2523  * the grow function in vm_machdep.c).
2524  */
2525 int
2526 vm_map_growstack (struct proc *p, vm_offset_t addr)
2527 {
2528 	vm_map_entry_t prev_entry;
2529 	vm_map_entry_t stack_entry;
2530 	vm_map_entry_t new_stack_entry;
2531 	struct vmspace *vm = p->p_vmspace;
2532 	vm_map_t map = &vm->vm_map;
2533 	vm_offset_t    end;
2534 	int      grow_amount;
2535 	int      rv;
2536 	int      is_procstack;
2537 
2538 	GIANT_REQUIRED;
2539 
2540 Retry:
2541 	vm_map_lock_read(map);
2542 
2543 	/* If addr is already in the entry range, no need to grow.*/
2544 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2545 		vm_map_unlock_read(map);
2546 		return (KERN_SUCCESS);
2547 	}
2548 
2549 	if ((stack_entry = prev_entry->next) == &map->header) {
2550 		vm_map_unlock_read(map);
2551 		return (KERN_SUCCESS);
2552 	}
2553 	if (prev_entry == &map->header)
2554 		end = stack_entry->start - stack_entry->avail_ssize;
2555 	else
2556 		end = prev_entry->end;
2557 
2558 	/* This next test mimics the old grow function in vm_machdep.c.
2559 	 * It really doesn't quite make sense, but we do it anyway
2560 	 * for compatibility.
2561 	 *
2562 	 * If not growable stack, return success.  This signals the
2563 	 * caller to proceed as he would normally with normal vm.
2564 	 */
2565 	if (stack_entry->avail_ssize < 1 ||
2566 	    addr >= stack_entry->start ||
2567 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2568 		vm_map_unlock_read(map);
2569 		return (KERN_SUCCESS);
2570 	}
2571 
2572 	/* Find the minimum grow amount */
2573 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2574 	if (grow_amount > stack_entry->avail_ssize) {
2575 		vm_map_unlock_read(map);
2576 		return (KERN_NO_SPACE);
2577 	}
2578 
2579 	/* If there is no longer enough space between the entries
2580 	 * nogo, and adjust the available space.  Note: this
2581 	 * should only happen if the user has mapped into the
2582 	 * stack area after the stack was created, and is
2583 	 * probably an error.
2584 	 *
2585 	 * This also effectively destroys any guard page the user
2586 	 * might have intended by limiting the stack size.
2587 	 */
2588 	if (grow_amount > stack_entry->start - end) {
2589 		if (vm_map_lock_upgrade(map))
2590 			goto Retry;
2591 
2592 		stack_entry->avail_ssize = stack_entry->start - end;
2593 
2594 		vm_map_unlock(map);
2595 		return (KERN_NO_SPACE);
2596 	}
2597 
2598 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2599 
2600 	/* If this is the main process stack, see if we're over the
2601 	 * stack limit.
2602 	 */
2603 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2604 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2605 		vm_map_unlock_read(map);
2606 		return (KERN_NO_SPACE);
2607 	}
2608 
2609 	/* Round up the grow amount modulo SGROWSIZ */
2610 	grow_amount = roundup (grow_amount, sgrowsiz);
2611 	if (grow_amount > stack_entry->avail_ssize) {
2612 		grow_amount = stack_entry->avail_ssize;
2613 	}
2614 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2615 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2616 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2617 		              ctob(vm->vm_ssize);
2618 	}
2619 
2620 	/* If we would blow our VMEM resource limit, no go */
2621 	if (map->size + grow_amount >
2622 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2623 		vm_map_unlock_read(map);
2624 		return (KERN_NO_SPACE);
2625 	}
2626 
2627 	if (vm_map_lock_upgrade(map))
2628 		goto Retry;
2629 
2630 	/* Get the preliminary new entry start value */
2631 	addr = stack_entry->start - grow_amount;
2632 
2633 	/* If this puts us into the previous entry, cut back our growth
2634 	 * to the available space.  Also, see the note above.
2635 	 */
2636 	if (addr < end) {
2637 		stack_entry->avail_ssize = stack_entry->start - end;
2638 		addr = end;
2639 	}
2640 
2641 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2642 	    p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
2643 
2644 	/* Adjust the available stack space by the amount we grew. */
2645 	if (rv == KERN_SUCCESS) {
2646 		if (prev_entry != &map->header)
2647 			vm_map_clip_end(map, prev_entry, addr);
2648 		new_stack_entry = prev_entry->next;
2649 		if (new_stack_entry->end   != stack_entry->start  ||
2650 		    new_stack_entry->start != addr)
2651 			panic ("Bad stack grow start/end in new stack entry");
2652 		else {
2653 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2654 							(new_stack_entry->end -
2655 							 new_stack_entry->start);
2656 			if (is_procstack)
2657 				vm->vm_ssize += btoc(new_stack_entry->end -
2658 						     new_stack_entry->start);
2659 		}
2660 	}
2661 
2662 	vm_map_unlock(map);
2663 	return (rv);
2664 }
2665 
2666 /*
2667  * Unshare the specified VM space for exec.  If other processes are
2668  * mapped to it, then create a new one.  The new vmspace is null.
2669  */
2670 void
2671 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2672 {
2673 	struct vmspace *oldvmspace = p->p_vmspace;
2674 	struct vmspace *newvmspace;
2675 
2676 	GIANT_REQUIRED;
2677 	newvmspace = vmspace_alloc(minuser, maxuser);
2678 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2679 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2680 	/*
2681 	 * This code is written like this for prototype purposes.  The
2682 	 * goal is to avoid running down the vmspace here, but let the
2683 	 * other process's that are still using the vmspace to finally
2684 	 * run it down.  Even though there is little or no chance of blocking
2685 	 * here, it is a good idea to keep this form for future mods.
2686 	 */
2687 	p->p_vmspace = newvmspace;
2688 	pmap_pinit2(vmspace_pmap(newvmspace));
2689 	vmspace_free(oldvmspace);
2690 	if (p == curthread->td_proc)		/* XXXKSE ? */
2691 		pmap_activate(curthread);
2692 }
2693 
2694 /*
2695  * Unshare the specified VM space for forcing COW.  This
2696  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2697  */
2698 void
2699 vmspace_unshare(struct proc *p)
2700 {
2701 	struct vmspace *oldvmspace = p->p_vmspace;
2702 	struct vmspace *newvmspace;
2703 
2704 	GIANT_REQUIRED;
2705 	if (oldvmspace->vm_refcnt == 1)
2706 		return;
2707 	newvmspace = vmspace_fork(oldvmspace);
2708 	p->p_vmspace = newvmspace;
2709 	pmap_pinit2(vmspace_pmap(newvmspace));
2710 	vmspace_free(oldvmspace);
2711 	if (p == curthread->td_proc)		/* XXXKSE ? */
2712 		pmap_activate(curthread);
2713 }
2714 
2715 /*
2716  *	vm_map_lookup:
2717  *
2718  *	Finds the VM object, offset, and
2719  *	protection for a given virtual address in the
2720  *	specified map, assuming a page fault of the
2721  *	type specified.
2722  *
2723  *	Leaves the map in question locked for read; return
2724  *	values are guaranteed until a vm_map_lookup_done
2725  *	call is performed.  Note that the map argument
2726  *	is in/out; the returned map must be used in
2727  *	the call to vm_map_lookup_done.
2728  *
2729  *	A handle (out_entry) is returned for use in
2730  *	vm_map_lookup_done, to make that fast.
2731  *
2732  *	If a lookup is requested with "write protection"
2733  *	specified, the map may be changed to perform virtual
2734  *	copying operations, although the data referenced will
2735  *	remain the same.
2736  */
2737 int
2738 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2739 	      vm_offset_t vaddr,
2740 	      vm_prot_t fault_typea,
2741 	      vm_map_entry_t *out_entry,	/* OUT */
2742 	      vm_object_t *object,		/* OUT */
2743 	      vm_pindex_t *pindex,		/* OUT */
2744 	      vm_prot_t *out_prot,		/* OUT */
2745 	      boolean_t *wired)			/* OUT */
2746 {
2747 	vm_map_entry_t entry;
2748 	vm_map_t map = *var_map;
2749 	vm_prot_t prot;
2750 	vm_prot_t fault_type = fault_typea;
2751 
2752 RetryLookup:;
2753 	/*
2754 	 * Lookup the faulting address.
2755 	 */
2756 
2757 	vm_map_lock_read(map);
2758 #define	RETURN(why) \
2759 		{ \
2760 		vm_map_unlock_read(map); \
2761 		return (why); \
2762 		}
2763 
2764 	/*
2765 	 * If the map has an interesting hint, try it before calling full
2766 	 * blown lookup routine.
2767 	 */
2768 	entry = map->root;
2769 	*out_entry = entry;
2770 	if (entry == NULL ||
2771 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2772 		/*
2773 		 * Entry was either not a valid hint, or the vaddr was not
2774 		 * contained in the entry, so do a full lookup.
2775 		 */
2776 		if (!vm_map_lookup_entry(map, vaddr, out_entry))
2777 			RETURN(KERN_INVALID_ADDRESS);
2778 
2779 		entry = *out_entry;
2780 	}
2781 
2782 	/*
2783 	 * Handle submaps.
2784 	 */
2785 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2786 		vm_map_t old_map = map;
2787 
2788 		*var_map = map = entry->object.sub_map;
2789 		vm_map_unlock_read(old_map);
2790 		goto RetryLookup;
2791 	}
2792 
2793 	/*
2794 	 * Check whether this task is allowed to have this page.
2795 	 * Note the special case for MAP_ENTRY_COW
2796 	 * pages with an override.  This is to implement a forced
2797 	 * COW for debuggers.
2798 	 */
2799 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2800 		prot = entry->max_protection;
2801 	else
2802 		prot = entry->protection;
2803 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2804 	if ((fault_type & prot) != fault_type) {
2805 			RETURN(KERN_PROTECTION_FAILURE);
2806 	}
2807 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2808 	    (entry->eflags & MAP_ENTRY_COW) &&
2809 	    (fault_type & VM_PROT_WRITE) &&
2810 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2811 		RETURN(KERN_PROTECTION_FAILURE);
2812 	}
2813 
2814 	/*
2815 	 * If this page is not pageable, we have to get it for all possible
2816 	 * accesses.
2817 	 */
2818 	*wired = (entry->wired_count != 0);
2819 	if (*wired)
2820 		prot = fault_type = entry->protection;
2821 
2822 	/*
2823 	 * If the entry was copy-on-write, we either ...
2824 	 */
2825 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2826 		/*
2827 		 * If we want to write the page, we may as well handle that
2828 		 * now since we've got the map locked.
2829 		 *
2830 		 * If we don't need to write the page, we just demote the
2831 		 * permissions allowed.
2832 		 */
2833 		if (fault_type & VM_PROT_WRITE) {
2834 			/*
2835 			 * Make a new object, and place it in the object
2836 			 * chain.  Note that no new references have appeared
2837 			 * -- one just moved from the map to the new
2838 			 * object.
2839 			 */
2840 			if (vm_map_lock_upgrade(map))
2841 				goto RetryLookup;
2842 
2843 			vm_object_shadow(
2844 			    &entry->object.vm_object,
2845 			    &entry->offset,
2846 			    atop(entry->end - entry->start));
2847 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2848 
2849 			vm_map_lock_downgrade(map);
2850 		} else {
2851 			/*
2852 			 * We're attempting to read a copy-on-write page --
2853 			 * don't allow writes.
2854 			 */
2855 			prot &= ~VM_PROT_WRITE;
2856 		}
2857 	}
2858 
2859 	/*
2860 	 * Create an object if necessary.
2861 	 */
2862 	if (entry->object.vm_object == NULL &&
2863 	    !map->system_map) {
2864 		if (vm_map_lock_upgrade(map))
2865 			goto RetryLookup;
2866 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2867 		    atop(entry->end - entry->start));
2868 		entry->offset = 0;
2869 		vm_map_lock_downgrade(map);
2870 	}
2871 
2872 	/*
2873 	 * Return the object/offset from this entry.  If the entry was
2874 	 * copy-on-write or empty, it has been fixed up.
2875 	 */
2876 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2877 	*object = entry->object.vm_object;
2878 
2879 	/*
2880 	 * Return whether this is the only map sharing this data.
2881 	 */
2882 	*out_prot = prot;
2883 	return (KERN_SUCCESS);
2884 
2885 #undef	RETURN
2886 }
2887 
2888 /*
2889  *	vm_map_lookup_done:
2890  *
2891  *	Releases locks acquired by a vm_map_lookup
2892  *	(according to the handle returned by that lookup).
2893  */
2894 void
2895 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2896 {
2897 	/*
2898 	 * Unlock the main-level map
2899 	 */
2900 	vm_map_unlock_read(map);
2901 }
2902 
2903 #include "opt_ddb.h"
2904 #ifdef DDB
2905 #include <sys/kernel.h>
2906 
2907 #include <ddb/ddb.h>
2908 
2909 /*
2910  *	vm_map_print:	[ debug ]
2911  */
2912 DB_SHOW_COMMAND(map, vm_map_print)
2913 {
2914 	static int nlines;
2915 	/* XXX convert args. */
2916 	vm_map_t map = (vm_map_t)addr;
2917 	boolean_t full = have_addr;
2918 
2919 	vm_map_entry_t entry;
2920 
2921 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
2922 	    (void *)map,
2923 	    (void *)map->pmap, map->nentries, map->timestamp);
2924 	nlines++;
2925 
2926 	if (!full && db_indent)
2927 		return;
2928 
2929 	db_indent += 2;
2930 	for (entry = map->header.next; entry != &map->header;
2931 	    entry = entry->next) {
2932 		db_iprintf("map entry %p: start=%p, end=%p\n",
2933 		    (void *)entry, (void *)entry->start, (void *)entry->end);
2934 		nlines++;
2935 		{
2936 			static char *inheritance_name[4] =
2937 			{"share", "copy", "none", "donate_copy"};
2938 
2939 			db_iprintf(" prot=%x/%x/%s",
2940 			    entry->protection,
2941 			    entry->max_protection,
2942 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
2943 			if (entry->wired_count != 0)
2944 				db_printf(", wired");
2945 		}
2946 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2947 			db_printf(", share=%p, offset=0x%jx\n",
2948 			    (void *)entry->object.sub_map,
2949 			    (uintmax_t)entry->offset);
2950 			nlines++;
2951 			if ((entry->prev == &map->header) ||
2952 			    (entry->prev->object.sub_map !=
2953 				entry->object.sub_map)) {
2954 				db_indent += 2;
2955 				vm_map_print((db_expr_t)(intptr_t)
2956 					     entry->object.sub_map,
2957 					     full, 0, (char *)0);
2958 				db_indent -= 2;
2959 			}
2960 		} else {
2961 			db_printf(", object=%p, offset=0x%jx",
2962 			    (void *)entry->object.vm_object,
2963 			    (uintmax_t)entry->offset);
2964 			if (entry->eflags & MAP_ENTRY_COW)
2965 				db_printf(", copy (%s)",
2966 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
2967 			db_printf("\n");
2968 			nlines++;
2969 
2970 			if ((entry->prev == &map->header) ||
2971 			    (entry->prev->object.vm_object !=
2972 				entry->object.vm_object)) {
2973 				db_indent += 2;
2974 				vm_object_print((db_expr_t)(intptr_t)
2975 						entry->object.vm_object,
2976 						full, 0, (char *)0);
2977 				nlines += 4;
2978 				db_indent -= 2;
2979 			}
2980 		}
2981 	}
2982 	db_indent -= 2;
2983 	if (db_indent == 0)
2984 		nlines = 0;
2985 }
2986 
2987 
2988 DB_SHOW_COMMAND(procvm, procvm)
2989 {
2990 	struct proc *p;
2991 
2992 	if (have_addr) {
2993 		p = (struct proc *) addr;
2994 	} else {
2995 		p = curproc;
2996 	}
2997 
2998 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
2999 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3000 	    (void *)vmspace_pmap(p->p_vmspace));
3001 
3002 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3003 }
3004 
3005 #endif /* DDB */
3006