xref: /freebsd/sys/vm/vm_map.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81 #include <sys/sysent.h>
82 
83 #include <vm/vm.h>
84 #include <vm/vm_param.h>
85 #include <vm/pmap.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_object.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vm_kern.h>
91 #include <vm/vm_extern.h>
92 #include <vm/swap_pager.h>
93 #include <vm/uma.h>
94 
95 /*
96  *	Virtual memory maps provide for the mapping, protection,
97  *	and sharing of virtual memory objects.  In addition,
98  *	this module provides for an efficient virtual copy of
99  *	memory from one map to another.
100  *
101  *	Synchronization is required prior to most operations.
102  *
103  *	Maps consist of an ordered doubly-linked list of simple
104  *	entries; a single hint is used to speed up lookups.
105  *
106  *	Since portions of maps are specified by start/end addresses,
107  *	which may not align with existing map entries, all
108  *	routines merely "clip" entries to these start/end values.
109  *	[That is, an entry is split into two, bordering at a
110  *	start or end value.]  Note that these clippings may not
111  *	always be necessary (as the two resulting entries are then
112  *	not changed); however, the clipping is done for convenience.
113  *
114  *	As mentioned above, virtual copy operations are performed
115  *	by copying VM object references from one map to
116  *	another, and then marking both regions as copy-on-write.
117  */
118 
119 /*
120  *	vm_map_startup:
121  *
122  *	Initialize the vm_map module.  Must be called before
123  *	any other vm_map routines.
124  *
125  *	Map and entry structures are allocated from the general
126  *	purpose memory pool with some exceptions:
127  *
128  *	- The kernel map and kmem submap are allocated statically.
129  *	- Kernel map entries are allocated out of a static pool.
130  *
131  *	These restrictions are necessary since malloc() uses the
132  *	maps and requires map entries.
133  */
134 
135 static uma_zone_t mapentzone;
136 static uma_zone_t kmapentzone;
137 static uma_zone_t mapzone;
138 static uma_zone_t vmspace_zone;
139 static struct vm_object kmapentobj;
140 static void vmspace_zinit(void *mem, int size);
141 static void vmspace_zfini(void *mem, int size);
142 static void vm_map_zinit(void *mem, int size);
143 static void vm_map_zfini(void *mem, int size);
144 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
145 
146 #ifdef INVARIANTS
147 static void vm_map_zdtor(void *mem, int size, void *arg);
148 static void vmspace_zdtor(void *mem, int size, void *arg);
149 #endif
150 
151 void
152 vm_map_startup(void)
153 {
154 	mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
155 #ifdef INVARIANTS
156 	    vm_map_zdtor,
157 #else
158 	    NULL,
159 #endif
160 	    vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
161 	uma_prealloc(mapzone, MAX_KMAP);
162 	kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
163 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
164 	    UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
165 	uma_prealloc(kmapentzone, MAX_KMAPENT);
166 	mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
167 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
168 	uma_prealloc(mapentzone, MAX_MAPENT);
169 }
170 
171 static void
172 vmspace_zfini(void *mem, int size)
173 {
174 	struct vmspace *vm;
175 
176 	vm = (struct vmspace *)mem;
177 
178 	vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
179 }
180 
181 static void
182 vmspace_zinit(void *mem, int size)
183 {
184 	struct vmspace *vm;
185 
186 	vm = (struct vmspace *)mem;
187 
188 	vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
189 }
190 
191 static void
192 vm_map_zfini(void *mem, int size)
193 {
194 	vm_map_t map;
195 
196 	map = (vm_map_t)mem;
197 
198 	lockdestroy(&map->lock);
199 }
200 
201 static void
202 vm_map_zinit(void *mem, int size)
203 {
204 	vm_map_t map;
205 
206 	map = (vm_map_t)mem;
207 	map->nentries = 0;
208 	map->size = 0;
209 	map->infork = 0;
210 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
211 }
212 
213 #ifdef INVARIANTS
214 static void
215 vmspace_zdtor(void *mem, int size, void *arg)
216 {
217 	struct vmspace *vm;
218 
219 	vm = (struct vmspace *)mem;
220 
221 	vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
222 }
223 static void
224 vm_map_zdtor(void *mem, int size, void *arg)
225 {
226 	vm_map_t map;
227 
228 	map = (vm_map_t)mem;
229 	KASSERT(map->nentries == 0,
230 	    ("map %p nentries == %d on free.",
231 	    map, map->nentries));
232 	KASSERT(map->size == 0,
233 	    ("map %p size == %lu on free.",
234 	    map, (unsigned long)map->size));
235 	KASSERT(map->infork == 0,
236 	    ("map %p infork == %d on free.",
237 	    map, map->infork));
238 }
239 #endif	/* INVARIANTS */
240 
241 /*
242  * Allocate a vmspace structure, including a vm_map and pmap,
243  * and initialize those structures.  The refcnt is set to 1.
244  * The remaining fields must be initialized by the caller.
245  */
246 struct vmspace *
247 vmspace_alloc(min, max)
248 	vm_offset_t min, max;
249 {
250 	struct vmspace *vm;
251 
252 	GIANT_REQUIRED;
253 	vm = uma_zalloc(vmspace_zone, M_WAITOK);
254 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
255 	_vm_map_init(&vm->vm_map, min, max);
256 	pmap_pinit(vmspace_pmap(vm));
257 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
258 	vm->vm_refcnt = 1;
259 	vm->vm_shm = NULL;
260 	vm->vm_freer = NULL;
261 	return (vm);
262 }
263 
264 void
265 vm_init2(void)
266 {
267 	uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
268 	    (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
269 	vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
270 #ifdef INVARIANTS
271 	    vmspace_zdtor,
272 #else
273 	    NULL,
274 #endif
275 	    vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
276 	pmap_init2();
277 	vm_object_init2();
278 }
279 
280 static __inline void
281 vmspace_dofree(struct vmspace *vm)
282 {
283 	CTR1(KTR_VM, "vmspace_free: %p", vm);
284 	/*
285 	 * Lock the map, to wait out all other references to it.
286 	 * Delete all of the mappings and pages they hold, then call
287 	 * the pmap module to reclaim anything left.
288 	 */
289 	vm_map_lock(&vm->vm_map);
290 	(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
291 	    vm->vm_map.max_offset);
292 	vm_map_unlock(&vm->vm_map);
293 
294 	pmap_release(vmspace_pmap(vm));
295 	uma_zfree(vmspace_zone, vm);
296 }
297 
298 void
299 vmspace_free(struct vmspace *vm)
300 {
301 	GIANT_REQUIRED;
302 
303 	if (vm->vm_refcnt == 0)
304 		panic("vmspace_free: attempt to free already freed vmspace");
305 
306 	if (--vm->vm_refcnt == 0)
307 		vmspace_dofree(vm);
308 }
309 
310 void
311 vmspace_exitfree(struct proc *p)
312 {
313 	struct vmspace *vm;
314 
315 	GIANT_REQUIRED;
316 	if (p == p->p_vmspace->vm_freer) {
317 		vm = p->p_vmspace;
318 		p->p_vmspace = NULL;
319 		vmspace_dofree(vm);
320 	}
321 }
322 
323 /*
324  * vmspace_swap_count() - count the approximate swap useage in pages for a
325  *			  vmspace.
326  *
327  *	Swap useage is determined by taking the proportional swap used by
328  *	VM objects backing the VM map.  To make up for fractional losses,
329  *	if the VM object has any swap use at all the associated map entries
330  *	count for at least 1 swap page.
331  */
332 int
333 vmspace_swap_count(struct vmspace *vmspace)
334 {
335 	vm_map_t map = &vmspace->vm_map;
336 	vm_map_entry_t cur;
337 	int count = 0;
338 
339 	vm_map_lock_read(map);
340 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
341 		vm_object_t object;
342 
343 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
344 		    (object = cur->object.vm_object) != NULL &&
345 		    object->type == OBJT_SWAP
346 		) {
347 			int n = (cur->end - cur->start) / PAGE_SIZE;
348 
349 			if (object->un_pager.swp.swp_bcount) {
350 				count += object->un_pager.swp.swp_bcount *
351 				    SWAP_META_PAGES * n / object->size + 1;
352 			}
353 		}
354 	}
355 	vm_map_unlock_read(map);
356 	return (count);
357 }
358 
359 void
360 _vm_map_lock(vm_map_t map, const char *file, int line)
361 {
362 	int error;
363 
364 	if (map->system_map)
365 		GIANT_REQUIRED;
366 	error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
367 	KASSERT(error == 0, ("%s: failed to get lock", __func__));
368 	map->timestamp++;
369 }
370 
371 void
372 _vm_map_unlock(vm_map_t map, const char *file, int line)
373 {
374 
375 	lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
376 }
377 
378 void
379 _vm_map_lock_read(vm_map_t map, const char *file, int line)
380 {
381 	int error;
382 
383 	if (map->system_map)
384 		GIANT_REQUIRED;
385 	error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
386 	KASSERT(error == 0, ("%s: failed to get lock", __func__));
387 }
388 
389 void
390 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
391 {
392 
393 	lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
394 }
395 
396 int
397 _vm_map_trylock(vm_map_t map, const char *file, int line)
398 {
399 	int error;
400 
401 	if (map->system_map)
402 		GIANT_REQUIRED;
403 	error = lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
404 	return (error == 0);
405 }
406 
407 int
408 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
409 {
410 
411 	KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
412 		("%s: lock not held", __func__));
413 	map->timestamp++;
414 	return (0);
415 }
416 
417 void
418 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
419 {
420 
421 	KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
422 		("%s: lock not held", __func__));
423 }
424 
425 /*
426  *	vm_map_unlock_and_wait:
427  */
428 int
429 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
430 {
431 	int retval;
432 
433 	mtx_lock(&Giant);
434 	vm_map_unlock(map);
435 	retval = tsleep(&map->root, PVM, "vmmapw", 0);
436 	mtx_unlock(&Giant);
437 	return (retval);
438 }
439 
440 /*
441  *	vm_map_wakeup:
442  */
443 void
444 vm_map_wakeup(vm_map_t map)
445 {
446 
447 	/*
448 	 * Acquire and release Giant to prevent a wakeup() from being
449 	 * performed (and lost) between the vm_map_unlock() and the
450 	 * tsleep() in vm_map_unlock_and_wait().
451 	 */
452 	mtx_lock(&Giant);
453 	mtx_unlock(&Giant);
454 	wakeup(&map->root);
455 }
456 
457 long
458 vmspace_resident_count(struct vmspace *vmspace)
459 {
460 	return pmap_resident_count(vmspace_pmap(vmspace));
461 }
462 
463 /*
464  *	vm_map_create:
465  *
466  *	Creates and returns a new empty VM map with
467  *	the given physical map structure, and having
468  *	the given lower and upper address bounds.
469  */
470 vm_map_t
471 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
472 {
473 	vm_map_t result;
474 
475 	result = uma_zalloc(mapzone, M_WAITOK);
476 	CTR1(KTR_VM, "vm_map_create: %p", result);
477 	_vm_map_init(result, min, max);
478 	result->pmap = pmap;
479 	return (result);
480 }
481 
482 /*
483  * Initialize an existing vm_map structure
484  * such as that in the vmspace structure.
485  * The pmap is set elsewhere.
486  */
487 static void
488 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
489 {
490 
491 	map->header.next = map->header.prev = &map->header;
492 	map->needs_wakeup = FALSE;
493 	map->system_map = 0;
494 	map->min_offset = min;
495 	map->max_offset = max;
496 	map->first_free = &map->header;
497 	map->root = NULL;
498 	map->timestamp = 0;
499 }
500 
501 void
502 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
503 {
504 	_vm_map_init(map, min, max);
505 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
506 }
507 
508 /*
509  *	vm_map_entry_dispose:	[ internal use only ]
510  *
511  *	Inverse of vm_map_entry_create.
512  */
513 static void
514 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
515 {
516 	uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
517 }
518 
519 /*
520  *	vm_map_entry_create:	[ internal use only ]
521  *
522  *	Allocates a VM map entry for insertion.
523  *	No entry fields are filled in.
524  */
525 static vm_map_entry_t
526 vm_map_entry_create(vm_map_t map)
527 {
528 	vm_map_entry_t new_entry;
529 
530 	if (map->system_map)
531 		new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
532 	else
533 		new_entry = uma_zalloc(mapentzone, M_WAITOK);
534 	if (new_entry == NULL)
535 		panic("vm_map_entry_create: kernel resources exhausted");
536 	return (new_entry);
537 }
538 
539 /*
540  *	vm_map_entry_set_behavior:
541  *
542  *	Set the expected access behavior, either normal, random, or
543  *	sequential.
544  */
545 static __inline void
546 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
547 {
548 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
549 	    (behavior & MAP_ENTRY_BEHAV_MASK);
550 }
551 
552 /*
553  *	vm_map_entry_splay:
554  *
555  *	Implements Sleator and Tarjan's top-down splay algorithm.  Returns
556  *	the vm_map_entry containing the given address.  If, however, that
557  *	address is not found in the vm_map, returns a vm_map_entry that is
558  *	adjacent to the address, coming before or after it.
559  */
560 static vm_map_entry_t
561 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
562 {
563 	struct vm_map_entry dummy;
564 	vm_map_entry_t lefttreemax, righttreemin, y;
565 
566 	if (root == NULL)
567 		return (root);
568 	lefttreemax = righttreemin = &dummy;
569 	for (;; root = y) {
570 		if (address < root->start) {
571 			if ((y = root->left) == NULL)
572 				break;
573 			if (address < y->start) {
574 				/* Rotate right. */
575 				root->left = y->right;
576 				y->right = root;
577 				root = y;
578 				if ((y = root->left) == NULL)
579 					break;
580 			}
581 			/* Link into the new root's right tree. */
582 			righttreemin->left = root;
583 			righttreemin = root;
584 		} else if (address >= root->end) {
585 			if ((y = root->right) == NULL)
586 				break;
587 			if (address >= y->end) {
588 				/* Rotate left. */
589 				root->right = y->left;
590 				y->left = root;
591 				root = y;
592 				if ((y = root->right) == NULL)
593 					break;
594 			}
595 			/* Link into the new root's left tree. */
596 			lefttreemax->right = root;
597 			lefttreemax = root;
598 		} else
599 			break;
600 	}
601 	/* Assemble the new root. */
602 	lefttreemax->right = root->left;
603 	righttreemin->left = root->right;
604 	root->left = dummy.right;
605 	root->right = dummy.left;
606 	return (root);
607 }
608 
609 /*
610  *	vm_map_entry_{un,}link:
611  *
612  *	Insert/remove entries from maps.
613  */
614 static void
615 vm_map_entry_link(vm_map_t map,
616 		  vm_map_entry_t after_where,
617 		  vm_map_entry_t entry)
618 {
619 
620 	CTR4(KTR_VM,
621 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
622 	    map->nentries, entry, after_where);
623 	map->nentries++;
624 	entry->prev = after_where;
625 	entry->next = after_where->next;
626 	entry->next->prev = entry;
627 	after_where->next = entry;
628 
629 	if (after_where != &map->header) {
630 		if (after_where != map->root)
631 			vm_map_entry_splay(after_where->start, map->root);
632 		entry->right = after_where->right;
633 		entry->left = after_where;
634 		after_where->right = NULL;
635 	} else {
636 		entry->right = map->root;
637 		entry->left = NULL;
638 	}
639 	map->root = entry;
640 }
641 
642 static void
643 vm_map_entry_unlink(vm_map_t map,
644 		    vm_map_entry_t entry)
645 {
646 	vm_map_entry_t next, prev, root;
647 
648 	if (entry != map->root)
649 		vm_map_entry_splay(entry->start, map->root);
650 	if (entry->left == NULL)
651 		root = entry->right;
652 	else {
653 		root = vm_map_entry_splay(entry->start, entry->left);
654 		root->right = entry->right;
655 	}
656 	map->root = root;
657 
658 	prev = entry->prev;
659 	next = entry->next;
660 	next->prev = prev;
661 	prev->next = next;
662 	map->nentries--;
663 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
664 	    map->nentries, entry);
665 }
666 
667 /*
668  *	vm_map_lookup_entry:	[ internal use only ]
669  *
670  *	Finds the map entry containing (or
671  *	immediately preceding) the specified address
672  *	in the given map; the entry is returned
673  *	in the "entry" parameter.  The boolean
674  *	result indicates whether the address is
675  *	actually contained in the map.
676  */
677 boolean_t
678 vm_map_lookup_entry(
679 	vm_map_t map,
680 	vm_offset_t address,
681 	vm_map_entry_t *entry)	/* OUT */
682 {
683 	vm_map_entry_t cur;
684 
685 	cur = vm_map_entry_splay(address, map->root);
686 	if (cur == NULL)
687 		*entry = &map->header;
688 	else {
689 		map->root = cur;
690 
691 		if (address >= cur->start) {
692 			*entry = cur;
693 			if (cur->end > address)
694 				return (TRUE);
695 		} else
696 			*entry = cur->prev;
697 	}
698 	return (FALSE);
699 }
700 
701 /*
702  *	vm_map_insert:
703  *
704  *	Inserts the given whole VM object into the target
705  *	map at the specified address range.  The object's
706  *	size should match that of the address range.
707  *
708  *	Requires that the map be locked, and leaves it so.
709  *
710  *	If object is non-NULL, ref count must be bumped by caller
711  *	prior to making call to account for the new entry.
712  */
713 int
714 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
715 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
716 	      int cow)
717 {
718 	vm_map_entry_t new_entry;
719 	vm_map_entry_t prev_entry;
720 	vm_map_entry_t temp_entry;
721 	vm_eflags_t protoeflags;
722 
723 	/*
724 	 * Check that the start and end points are not bogus.
725 	 */
726 	if ((start < map->min_offset) || (end > map->max_offset) ||
727 	    (start >= end))
728 		return (KERN_INVALID_ADDRESS);
729 
730 	/*
731 	 * Find the entry prior to the proposed starting address; if it's part
732 	 * of an existing entry, this range is bogus.
733 	 */
734 	if (vm_map_lookup_entry(map, start, &temp_entry))
735 		return (KERN_NO_SPACE);
736 
737 	prev_entry = temp_entry;
738 
739 	/*
740 	 * Assert that the next entry doesn't overlap the end point.
741 	 */
742 	if ((prev_entry->next != &map->header) &&
743 	    (prev_entry->next->start < end))
744 		return (KERN_NO_SPACE);
745 
746 	protoeflags = 0;
747 
748 	if (cow & MAP_COPY_ON_WRITE)
749 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
750 
751 	if (cow & MAP_NOFAULT) {
752 		protoeflags |= MAP_ENTRY_NOFAULT;
753 
754 		KASSERT(object == NULL,
755 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
756 	}
757 	if (cow & MAP_DISABLE_SYNCER)
758 		protoeflags |= MAP_ENTRY_NOSYNC;
759 	if (cow & MAP_DISABLE_COREDUMP)
760 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
761 
762 	if (object) {
763 		/*
764 		 * When object is non-NULL, it could be shared with another
765 		 * process.  We have to set or clear OBJ_ONEMAPPING
766 		 * appropriately.
767 		 */
768 		vm_object_lock(object);
769 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
770 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
771 		}
772 		vm_object_unlock(object);
773 	}
774 	else if ((prev_entry != &map->header) &&
775 		 (prev_entry->eflags == protoeflags) &&
776 		 (prev_entry->end == start) &&
777 		 (prev_entry->wired_count == 0) &&
778 		 ((prev_entry->object.vm_object == NULL) ||
779 		  vm_object_coalesce(prev_entry->object.vm_object,
780 				     OFF_TO_IDX(prev_entry->offset),
781 				     (vm_size_t)(prev_entry->end - prev_entry->start),
782 				     (vm_size_t)(end - prev_entry->end)))) {
783 		/*
784 		 * We were able to extend the object.  Determine if we
785 		 * can extend the previous map entry to include the
786 		 * new range as well.
787 		 */
788 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
789 		    (prev_entry->protection == prot) &&
790 		    (prev_entry->max_protection == max)) {
791 			map->size += (end - prev_entry->end);
792 			prev_entry->end = end;
793 			vm_map_simplify_entry(map, prev_entry);
794 			return (KERN_SUCCESS);
795 		}
796 
797 		/*
798 		 * If we can extend the object but cannot extend the
799 		 * map entry, we have to create a new map entry.  We
800 		 * must bump the ref count on the extended object to
801 		 * account for it.  object may be NULL.
802 		 */
803 		object = prev_entry->object.vm_object;
804 		offset = prev_entry->offset +
805 			(prev_entry->end - prev_entry->start);
806 		vm_object_reference(object);
807 	}
808 
809 	/*
810 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
811 	 * in things like the buffer map where we manage kva but do not manage
812 	 * backing objects.
813 	 */
814 
815 	/*
816 	 * Create a new entry
817 	 */
818 	new_entry = vm_map_entry_create(map);
819 	new_entry->start = start;
820 	new_entry->end = end;
821 
822 	new_entry->eflags = protoeflags;
823 	new_entry->object.vm_object = object;
824 	new_entry->offset = offset;
825 	new_entry->avail_ssize = 0;
826 
827 	new_entry->inheritance = VM_INHERIT_DEFAULT;
828 	new_entry->protection = prot;
829 	new_entry->max_protection = max;
830 	new_entry->wired_count = 0;
831 
832 	/*
833 	 * Insert the new entry into the list
834 	 */
835 	vm_map_entry_link(map, prev_entry, new_entry);
836 	map->size += new_entry->end - new_entry->start;
837 
838 	/*
839 	 * Update the free space hint
840 	 */
841 	if ((map->first_free == prev_entry) &&
842 	    (prev_entry->end >= new_entry->start)) {
843 		map->first_free = new_entry;
844 	}
845 
846 #if 0
847 	/*
848 	 * Temporarily removed to avoid MAP_STACK panic, due to
849 	 * MAP_STACK being a huge hack.  Will be added back in
850 	 * when MAP_STACK (and the user stack mapping) is fixed.
851 	 */
852 	/*
853 	 * It may be possible to simplify the entry
854 	 */
855 	vm_map_simplify_entry(map, new_entry);
856 #endif
857 
858 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
859 		mtx_lock(&Giant);
860 		pmap_object_init_pt(map->pmap, start,
861 				    object, OFF_TO_IDX(offset), end - start,
862 				    cow & MAP_PREFAULT_PARTIAL);
863 		mtx_unlock(&Giant);
864 	}
865 
866 	return (KERN_SUCCESS);
867 }
868 
869 /*
870  * Find sufficient space for `length' bytes in the given map, starting at
871  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
872  */
873 int
874 vm_map_findspace(
875 	vm_map_t map,
876 	vm_offset_t start,
877 	vm_size_t length,
878 	vm_offset_t *addr)
879 {
880 	vm_map_entry_t entry, next;
881 	vm_offset_t end;
882 
883 	if (start < map->min_offset)
884 		start = map->min_offset;
885 	if (start > map->max_offset)
886 		return (1);
887 
888 	/*
889 	 * Look for the first possible address; if there's already something
890 	 * at this address, we have to start after it.
891 	 */
892 	if (start == map->min_offset) {
893 		if ((entry = map->first_free) != &map->header)
894 			start = entry->end;
895 	} else {
896 		vm_map_entry_t tmp;
897 
898 		if (vm_map_lookup_entry(map, start, &tmp))
899 			start = tmp->end;
900 		entry = tmp;
901 	}
902 
903 	/*
904 	 * Look through the rest of the map, trying to fit a new region in the
905 	 * gap between existing regions, or after the very last region.
906 	 */
907 	for (;; start = (entry = next)->end) {
908 		/*
909 		 * Find the end of the proposed new region.  Be sure we didn't
910 		 * go beyond the end of the map, or wrap around the address;
911 		 * if so, we lose.  Otherwise, if this is the last entry, or
912 		 * if the proposed new region fits before the next entry, we
913 		 * win.
914 		 */
915 		end = start + length;
916 		if (end > map->max_offset || end < start)
917 			return (1);
918 		next = entry->next;
919 		if (next == &map->header || next->start >= end)
920 			break;
921 	}
922 	*addr = start;
923 	if (map == kernel_map) {
924 		vm_offset_t ksize;
925 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
926 			mtx_lock(&Giant);
927 			pmap_growkernel(ksize);
928 			mtx_unlock(&Giant);
929 		}
930 	}
931 	return (0);
932 }
933 
934 /*
935  *	vm_map_find finds an unallocated region in the target address
936  *	map with the given length.  The search is defined to be
937  *	first-fit from the specified address; the region found is
938  *	returned in the same parameter.
939  *
940  *	If object is non-NULL, ref count must be bumped by caller
941  *	prior to making call to account for the new entry.
942  */
943 int
944 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
945 	    vm_offset_t *addr,	/* IN/OUT */
946 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
947 	    vm_prot_t max, int cow)
948 {
949 	vm_offset_t start;
950 	int result, s = 0;
951 
952 	start = *addr;
953 
954 	if (map == kmem_map)
955 		s = splvm();
956 
957 	vm_map_lock(map);
958 	if (find_space) {
959 		if (vm_map_findspace(map, start, length, addr)) {
960 			vm_map_unlock(map);
961 			if (map == kmem_map)
962 				splx(s);
963 			return (KERN_NO_SPACE);
964 		}
965 		start = *addr;
966 	}
967 	result = vm_map_insert(map, object, offset,
968 		start, start + length, prot, max, cow);
969 	vm_map_unlock(map);
970 
971 	if (map == kmem_map)
972 		splx(s);
973 
974 	return (result);
975 }
976 
977 /*
978  *	vm_map_simplify_entry:
979  *
980  *	Simplify the given map entry by merging with either neighbor.  This
981  *	routine also has the ability to merge with both neighbors.
982  *
983  *	The map must be locked.
984  *
985  *	This routine guarentees that the passed entry remains valid (though
986  *	possibly extended).  When merging, this routine may delete one or
987  *	both neighbors.
988  */
989 void
990 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
991 {
992 	vm_map_entry_t next, prev;
993 	vm_size_t prevsize, esize;
994 
995 	if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
996 		return;
997 
998 	prev = entry->prev;
999 	if (prev != &map->header) {
1000 		prevsize = prev->end - prev->start;
1001 		if ( (prev->end == entry->start) &&
1002 		     (prev->object.vm_object == entry->object.vm_object) &&
1003 		     (!prev->object.vm_object ||
1004 			(prev->offset + prevsize == entry->offset)) &&
1005 		     (prev->eflags == entry->eflags) &&
1006 		     (prev->protection == entry->protection) &&
1007 		     (prev->max_protection == entry->max_protection) &&
1008 		     (prev->inheritance == entry->inheritance) &&
1009 		     (prev->wired_count == entry->wired_count)) {
1010 			if (map->first_free == prev)
1011 				map->first_free = entry;
1012 			vm_map_entry_unlink(map, prev);
1013 			entry->start = prev->start;
1014 			entry->offset = prev->offset;
1015 			if (prev->object.vm_object)
1016 				vm_object_deallocate(prev->object.vm_object);
1017 			vm_map_entry_dispose(map, prev);
1018 		}
1019 	}
1020 
1021 	next = entry->next;
1022 	if (next != &map->header) {
1023 		esize = entry->end - entry->start;
1024 		if ((entry->end == next->start) &&
1025 		    (next->object.vm_object == entry->object.vm_object) &&
1026 		     (!entry->object.vm_object ||
1027 			(entry->offset + esize == next->offset)) &&
1028 		    (next->eflags == entry->eflags) &&
1029 		    (next->protection == entry->protection) &&
1030 		    (next->max_protection == entry->max_protection) &&
1031 		    (next->inheritance == entry->inheritance) &&
1032 		    (next->wired_count == entry->wired_count)) {
1033 			if (map->first_free == next)
1034 				map->first_free = entry;
1035 			vm_map_entry_unlink(map, next);
1036 			entry->end = next->end;
1037 			if (next->object.vm_object)
1038 				vm_object_deallocate(next->object.vm_object);
1039 			vm_map_entry_dispose(map, next);
1040 	        }
1041 	}
1042 }
1043 /*
1044  *	vm_map_clip_start:	[ internal use only ]
1045  *
1046  *	Asserts that the given entry begins at or after
1047  *	the specified address; if necessary,
1048  *	it splits the entry into two.
1049  */
1050 #define vm_map_clip_start(map, entry, startaddr) \
1051 { \
1052 	if (startaddr > entry->start) \
1053 		_vm_map_clip_start(map, entry, startaddr); \
1054 }
1055 
1056 /*
1057  *	This routine is called only when it is known that
1058  *	the entry must be split.
1059  */
1060 static void
1061 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
1062 {
1063 	vm_map_entry_t new_entry;
1064 
1065 	/*
1066 	 * Split off the front portion -- note that we must insert the new
1067 	 * entry BEFORE this one, so that this entry has the specified
1068 	 * starting address.
1069 	 */
1070 	vm_map_simplify_entry(map, entry);
1071 
1072 	/*
1073 	 * If there is no object backing this entry, we might as well create
1074 	 * one now.  If we defer it, an object can get created after the map
1075 	 * is clipped, and individual objects will be created for the split-up
1076 	 * map.  This is a bit of a hack, but is also about the best place to
1077 	 * put this improvement.
1078 	 */
1079 	if (entry->object.vm_object == NULL && !map->system_map) {
1080 		vm_object_t object;
1081 		object = vm_object_allocate(OBJT_DEFAULT,
1082 				atop(entry->end - entry->start));
1083 		entry->object.vm_object = object;
1084 		entry->offset = 0;
1085 	}
1086 
1087 	new_entry = vm_map_entry_create(map);
1088 	*new_entry = *entry;
1089 
1090 	new_entry->end = start;
1091 	entry->offset += (start - entry->start);
1092 	entry->start = start;
1093 
1094 	vm_map_entry_link(map, entry->prev, new_entry);
1095 
1096 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1097 		vm_object_reference(new_entry->object.vm_object);
1098 	}
1099 }
1100 
1101 /*
1102  *	vm_map_clip_end:	[ internal use only ]
1103  *
1104  *	Asserts that the given entry ends at or before
1105  *	the specified address; if necessary,
1106  *	it splits the entry into two.
1107  */
1108 #define vm_map_clip_end(map, entry, endaddr) \
1109 { \
1110 	if ((endaddr) < (entry->end)) \
1111 		_vm_map_clip_end((map), (entry), (endaddr)); \
1112 }
1113 
1114 /*
1115  *	This routine is called only when it is known that
1116  *	the entry must be split.
1117  */
1118 static void
1119 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1120 {
1121 	vm_map_entry_t new_entry;
1122 
1123 	/*
1124 	 * If there is no object backing this entry, we might as well create
1125 	 * one now.  If we defer it, an object can get created after the map
1126 	 * is clipped, and individual objects will be created for the split-up
1127 	 * map.  This is a bit of a hack, but is also about the best place to
1128 	 * put this improvement.
1129 	 */
1130 	if (entry->object.vm_object == NULL && !map->system_map) {
1131 		vm_object_t object;
1132 		object = vm_object_allocate(OBJT_DEFAULT,
1133 				atop(entry->end - entry->start));
1134 		entry->object.vm_object = object;
1135 		entry->offset = 0;
1136 	}
1137 
1138 	/*
1139 	 * Create a new entry and insert it AFTER the specified entry
1140 	 */
1141 	new_entry = vm_map_entry_create(map);
1142 	*new_entry = *entry;
1143 
1144 	new_entry->start = entry->end = end;
1145 	new_entry->offset += (end - entry->start);
1146 
1147 	vm_map_entry_link(map, entry, new_entry);
1148 
1149 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1150 		vm_object_reference(new_entry->object.vm_object);
1151 	}
1152 }
1153 
1154 /*
1155  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1156  *
1157  *	Asserts that the starting and ending region
1158  *	addresses fall within the valid range of the map.
1159  */
1160 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1161 		{					\
1162 		if (start < vm_map_min(map))		\
1163 			start = vm_map_min(map);	\
1164 		if (end > vm_map_max(map))		\
1165 			end = vm_map_max(map);		\
1166 		if (start > end)			\
1167 			start = end;			\
1168 		}
1169 
1170 /*
1171  *	vm_map_submap:		[ kernel use only ]
1172  *
1173  *	Mark the given range as handled by a subordinate map.
1174  *
1175  *	This range must have been created with vm_map_find,
1176  *	and no other operations may have been performed on this
1177  *	range prior to calling vm_map_submap.
1178  *
1179  *	Only a limited number of operations can be performed
1180  *	within this rage after calling vm_map_submap:
1181  *		vm_fault
1182  *	[Don't try vm_map_copy!]
1183  *
1184  *	To remove a submapping, one must first remove the
1185  *	range from the superior map, and then destroy the
1186  *	submap (if desired).  [Better yet, don't try it.]
1187  */
1188 int
1189 vm_map_submap(
1190 	vm_map_t map,
1191 	vm_offset_t start,
1192 	vm_offset_t end,
1193 	vm_map_t submap)
1194 {
1195 	vm_map_entry_t entry;
1196 	int result = KERN_INVALID_ARGUMENT;
1197 
1198 	vm_map_lock(map);
1199 
1200 	VM_MAP_RANGE_CHECK(map, start, end);
1201 
1202 	if (vm_map_lookup_entry(map, start, &entry)) {
1203 		vm_map_clip_start(map, entry, start);
1204 	} else
1205 		entry = entry->next;
1206 
1207 	vm_map_clip_end(map, entry, end);
1208 
1209 	if ((entry->start == start) && (entry->end == end) &&
1210 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1211 	    (entry->object.vm_object == NULL)) {
1212 		entry->object.sub_map = submap;
1213 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1214 		result = KERN_SUCCESS;
1215 	}
1216 	vm_map_unlock(map);
1217 
1218 	return (result);
1219 }
1220 
1221 /*
1222  *	vm_map_protect:
1223  *
1224  *	Sets the protection of the specified address
1225  *	region in the target map.  If "set_max" is
1226  *	specified, the maximum protection is to be set;
1227  *	otherwise, only the current protection is affected.
1228  */
1229 int
1230 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1231 	       vm_prot_t new_prot, boolean_t set_max)
1232 {
1233 	vm_map_entry_t current;
1234 	vm_map_entry_t entry;
1235 
1236 	vm_map_lock(map);
1237 
1238 	VM_MAP_RANGE_CHECK(map, start, end);
1239 
1240 	if (vm_map_lookup_entry(map, start, &entry)) {
1241 		vm_map_clip_start(map, entry, start);
1242 	} else {
1243 		entry = entry->next;
1244 	}
1245 
1246 	/*
1247 	 * Make a first pass to check for protection violations.
1248 	 */
1249 	current = entry;
1250 	while ((current != &map->header) && (current->start < end)) {
1251 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1252 			vm_map_unlock(map);
1253 			return (KERN_INVALID_ARGUMENT);
1254 		}
1255 		if ((new_prot & current->max_protection) != new_prot) {
1256 			vm_map_unlock(map);
1257 			return (KERN_PROTECTION_FAILURE);
1258 		}
1259 		current = current->next;
1260 	}
1261 
1262 	/*
1263 	 * Go back and fix up protections. [Note that clipping is not
1264 	 * necessary the second time.]
1265 	 */
1266 	current = entry;
1267 	while ((current != &map->header) && (current->start < end)) {
1268 		vm_prot_t old_prot;
1269 
1270 		vm_map_clip_end(map, current, end);
1271 
1272 		old_prot = current->protection;
1273 		if (set_max)
1274 			current->protection =
1275 			    (current->max_protection = new_prot) &
1276 			    old_prot;
1277 		else
1278 			current->protection = new_prot;
1279 
1280 		/*
1281 		 * Update physical map if necessary. Worry about copy-on-write
1282 		 * here -- CHECK THIS XXX
1283 		 */
1284 		if (current->protection != old_prot) {
1285 			mtx_lock(&Giant);
1286 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1287 							VM_PROT_ALL)
1288 			pmap_protect(map->pmap, current->start,
1289 			    current->end,
1290 			    current->protection & MASK(current));
1291 #undef	MASK
1292 			mtx_unlock(&Giant);
1293 		}
1294 		vm_map_simplify_entry(map, current);
1295 		current = current->next;
1296 	}
1297 	vm_map_unlock(map);
1298 	return (KERN_SUCCESS);
1299 }
1300 
1301 /*
1302  *	vm_map_madvise:
1303  *
1304  * 	This routine traverses a processes map handling the madvise
1305  *	system call.  Advisories are classified as either those effecting
1306  *	the vm_map_entry structure, or those effecting the underlying
1307  *	objects.
1308  */
1309 int
1310 vm_map_madvise(
1311 	vm_map_t map,
1312 	vm_offset_t start,
1313 	vm_offset_t end,
1314 	int behav)
1315 {
1316 	vm_map_entry_t current, entry;
1317 	int modify_map = 0;
1318 
1319 	/*
1320 	 * Some madvise calls directly modify the vm_map_entry, in which case
1321 	 * we need to use an exclusive lock on the map and we need to perform
1322 	 * various clipping operations.  Otherwise we only need a read-lock
1323 	 * on the map.
1324 	 */
1325 	switch(behav) {
1326 	case MADV_NORMAL:
1327 	case MADV_SEQUENTIAL:
1328 	case MADV_RANDOM:
1329 	case MADV_NOSYNC:
1330 	case MADV_AUTOSYNC:
1331 	case MADV_NOCORE:
1332 	case MADV_CORE:
1333 		modify_map = 1;
1334 		vm_map_lock(map);
1335 		break;
1336 	case MADV_WILLNEED:
1337 	case MADV_DONTNEED:
1338 	case MADV_FREE:
1339 		vm_map_lock_read(map);
1340 		break;
1341 	default:
1342 		return (KERN_INVALID_ARGUMENT);
1343 	}
1344 
1345 	/*
1346 	 * Locate starting entry and clip if necessary.
1347 	 */
1348 	VM_MAP_RANGE_CHECK(map, start, end);
1349 
1350 	if (vm_map_lookup_entry(map, start, &entry)) {
1351 		if (modify_map)
1352 			vm_map_clip_start(map, entry, start);
1353 	} else {
1354 		entry = entry->next;
1355 	}
1356 
1357 	if (modify_map) {
1358 		/*
1359 		 * madvise behaviors that are implemented in the vm_map_entry.
1360 		 *
1361 		 * We clip the vm_map_entry so that behavioral changes are
1362 		 * limited to the specified address range.
1363 		 */
1364 		for (current = entry;
1365 		     (current != &map->header) && (current->start < end);
1366 		     current = current->next
1367 		) {
1368 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1369 				continue;
1370 
1371 			vm_map_clip_end(map, current, end);
1372 
1373 			switch (behav) {
1374 			case MADV_NORMAL:
1375 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1376 				break;
1377 			case MADV_SEQUENTIAL:
1378 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1379 				break;
1380 			case MADV_RANDOM:
1381 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1382 				break;
1383 			case MADV_NOSYNC:
1384 				current->eflags |= MAP_ENTRY_NOSYNC;
1385 				break;
1386 			case MADV_AUTOSYNC:
1387 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1388 				break;
1389 			case MADV_NOCORE:
1390 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1391 				break;
1392 			case MADV_CORE:
1393 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1394 				break;
1395 			default:
1396 				break;
1397 			}
1398 			vm_map_simplify_entry(map, current);
1399 		}
1400 		vm_map_unlock(map);
1401 	} else {
1402 		vm_pindex_t pindex;
1403 		int count;
1404 
1405 		/*
1406 		 * madvise behaviors that are implemented in the underlying
1407 		 * vm_object.
1408 		 *
1409 		 * Since we don't clip the vm_map_entry, we have to clip
1410 		 * the vm_object pindex and count.
1411 		 */
1412 		for (current = entry;
1413 		     (current != &map->header) && (current->start < end);
1414 		     current = current->next
1415 		) {
1416 			vm_offset_t useStart;
1417 
1418 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1419 				continue;
1420 
1421 			pindex = OFF_TO_IDX(current->offset);
1422 			count = atop(current->end - current->start);
1423 			useStart = current->start;
1424 
1425 			if (current->start < start) {
1426 				pindex += atop(start - current->start);
1427 				count -= atop(start - current->start);
1428 				useStart = start;
1429 			}
1430 			if (current->end > end)
1431 				count -= atop(current->end - end);
1432 
1433 			if (count <= 0)
1434 				continue;
1435 
1436 			vm_object_madvise(current->object.vm_object,
1437 					  pindex, count, behav);
1438 			if (behav == MADV_WILLNEED) {
1439 				mtx_lock(&Giant);
1440 				pmap_object_init_pt(
1441 				    map->pmap,
1442 				    useStart,
1443 				    current->object.vm_object,
1444 				    pindex,
1445 				    (count << PAGE_SHIFT),
1446 				    MAP_PREFAULT_MADVISE
1447 				);
1448 				mtx_unlock(&Giant);
1449 			}
1450 		}
1451 		vm_map_unlock_read(map);
1452 	}
1453 	return (0);
1454 }
1455 
1456 
1457 /*
1458  *	vm_map_inherit:
1459  *
1460  *	Sets the inheritance of the specified address
1461  *	range in the target map.  Inheritance
1462  *	affects how the map will be shared with
1463  *	child maps at the time of vm_map_fork.
1464  */
1465 int
1466 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1467 	       vm_inherit_t new_inheritance)
1468 {
1469 	vm_map_entry_t entry;
1470 	vm_map_entry_t temp_entry;
1471 
1472 	switch (new_inheritance) {
1473 	case VM_INHERIT_NONE:
1474 	case VM_INHERIT_COPY:
1475 	case VM_INHERIT_SHARE:
1476 		break;
1477 	default:
1478 		return (KERN_INVALID_ARGUMENT);
1479 	}
1480 	vm_map_lock(map);
1481 	VM_MAP_RANGE_CHECK(map, start, end);
1482 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1483 		entry = temp_entry;
1484 		vm_map_clip_start(map, entry, start);
1485 	} else
1486 		entry = temp_entry->next;
1487 	while ((entry != &map->header) && (entry->start < end)) {
1488 		vm_map_clip_end(map, entry, end);
1489 		entry->inheritance = new_inheritance;
1490 		vm_map_simplify_entry(map, entry);
1491 		entry = entry->next;
1492 	}
1493 	vm_map_unlock(map);
1494 	return (KERN_SUCCESS);
1495 }
1496 
1497 /*
1498  *	vm_map_unwire:
1499  *
1500  *	Implements both kernel and user unwiring.
1501  */
1502 int
1503 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1504 	boolean_t user_unwire)
1505 {
1506 	vm_map_entry_t entry, first_entry, tmp_entry;
1507 	vm_offset_t saved_start;
1508 	unsigned int last_timestamp;
1509 	int rv;
1510 	boolean_t need_wakeup, result;
1511 
1512 	vm_map_lock(map);
1513 	VM_MAP_RANGE_CHECK(map, start, end);
1514 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1515 		vm_map_unlock(map);
1516 		return (KERN_INVALID_ADDRESS);
1517 	}
1518 	last_timestamp = map->timestamp;
1519 	entry = first_entry;
1520 	while (entry != &map->header && entry->start < end) {
1521 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1522 			/*
1523 			 * We have not yet clipped the entry.
1524 			 */
1525 			saved_start = (start >= entry->start) ? start :
1526 			    entry->start;
1527 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1528 			if (vm_map_unlock_and_wait(map, user_unwire)) {
1529 				/*
1530 				 * Allow interruption of user unwiring?
1531 				 */
1532 			}
1533 			vm_map_lock(map);
1534 			if (last_timestamp+1 != map->timestamp) {
1535 				/*
1536 				 * Look again for the entry because the map was
1537 				 * modified while it was unlocked.
1538 				 * Specifically, the entry may have been
1539 				 * clipped, merged, or deleted.
1540 				 */
1541 				if (!vm_map_lookup_entry(map, saved_start,
1542 				    &tmp_entry)) {
1543 					if (saved_start == start) {
1544 						/*
1545 						 * First_entry has been deleted.
1546 						 */
1547 						vm_map_unlock(map);
1548 						return (KERN_INVALID_ADDRESS);
1549 					}
1550 					end = saved_start;
1551 					rv = KERN_INVALID_ADDRESS;
1552 					goto done;
1553 				}
1554 				if (entry == first_entry)
1555 					first_entry = tmp_entry;
1556 				else
1557 					first_entry = NULL;
1558 				entry = tmp_entry;
1559 			}
1560 			last_timestamp = map->timestamp;
1561 			continue;
1562 		}
1563 		vm_map_clip_start(map, entry, start);
1564 		vm_map_clip_end(map, entry, end);
1565 		/*
1566 		 * Mark the entry in case the map lock is released.  (See
1567 		 * above.)
1568 		 */
1569 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1570 		/*
1571 		 * Check the map for holes in the specified region.
1572 		 */
1573 		if (entry->end < end && (entry->next == &map->header ||
1574 		    entry->next->start > entry->end)) {
1575 			end = entry->end;
1576 			rv = KERN_INVALID_ADDRESS;
1577 			goto done;
1578 		}
1579 		/*
1580 		 * Require that the entry is wired.
1581 		 */
1582 		if (entry->wired_count == 0 || (user_unwire &&
1583 		    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
1584 			end = entry->end;
1585 			rv = KERN_INVALID_ARGUMENT;
1586 			goto done;
1587 		}
1588 		entry = entry->next;
1589 	}
1590 	rv = KERN_SUCCESS;
1591 done:
1592 	need_wakeup = FALSE;
1593 	if (first_entry == NULL) {
1594 		result = vm_map_lookup_entry(map, start, &first_entry);
1595 		KASSERT(result, ("vm_map_unwire: lookup failed"));
1596 	}
1597 	entry = first_entry;
1598 	while (entry != &map->header && entry->start < end) {
1599 		if (rv == KERN_SUCCESS) {
1600 			if (user_unwire)
1601 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1602 			entry->wired_count--;
1603 			if (entry->wired_count == 0) {
1604 				/*
1605 				 * Retain the map lock.
1606 				 */
1607 				vm_fault_unwire(map, entry->start, entry->end);
1608 			}
1609 		}
1610 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1611 			("vm_map_unwire: in-transition flag missing"));
1612 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1613 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1614 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1615 			need_wakeup = TRUE;
1616 		}
1617 		vm_map_simplify_entry(map, entry);
1618 		entry = entry->next;
1619 	}
1620 	vm_map_unlock(map);
1621 	if (need_wakeup)
1622 		vm_map_wakeup(map);
1623 	return (rv);
1624 }
1625 
1626 /*
1627  *	vm_map_wire:
1628  *
1629  *	Implements both kernel and user wiring.
1630  */
1631 int
1632 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
1633 	boolean_t user_wire)
1634 {
1635 	vm_map_entry_t entry, first_entry, tmp_entry;
1636 	vm_offset_t saved_end, saved_start;
1637 	unsigned int last_timestamp;
1638 	int rv;
1639 	boolean_t need_wakeup, result;
1640 
1641 	vm_map_lock(map);
1642 	VM_MAP_RANGE_CHECK(map, start, end);
1643 	if (!vm_map_lookup_entry(map, start, &first_entry)) {
1644 		vm_map_unlock(map);
1645 		return (KERN_INVALID_ADDRESS);
1646 	}
1647 	last_timestamp = map->timestamp;
1648 	entry = first_entry;
1649 	while (entry != &map->header && entry->start < end) {
1650 		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
1651 			/*
1652 			 * We have not yet clipped the entry.
1653 			 */
1654 			saved_start = (start >= entry->start) ? start :
1655 			    entry->start;
1656 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
1657 			if (vm_map_unlock_and_wait(map, user_wire)) {
1658 				/*
1659 				 * Allow interruption of user wiring?
1660 				 */
1661 			}
1662 			vm_map_lock(map);
1663 			if (last_timestamp + 1 != map->timestamp) {
1664 				/*
1665 				 * Look again for the entry because the map was
1666 				 * modified while it was unlocked.
1667 				 * Specifically, the entry may have been
1668 				 * clipped, merged, or deleted.
1669 				 */
1670 				if (!vm_map_lookup_entry(map, saved_start,
1671 				    &tmp_entry)) {
1672 					if (saved_start == start) {
1673 						/*
1674 						 * first_entry has been deleted.
1675 						 */
1676 						vm_map_unlock(map);
1677 						return (KERN_INVALID_ADDRESS);
1678 					}
1679 					end = saved_start;
1680 					rv = KERN_INVALID_ADDRESS;
1681 					goto done;
1682 				}
1683 				if (entry == first_entry)
1684 					first_entry = tmp_entry;
1685 				else
1686 					first_entry = NULL;
1687 				entry = tmp_entry;
1688 			}
1689 			last_timestamp = map->timestamp;
1690 			continue;
1691 		}
1692 		vm_map_clip_start(map, entry, start);
1693 		vm_map_clip_end(map, entry, end);
1694 		/*
1695 		 * Mark the entry in case the map lock is released.  (See
1696 		 * above.)
1697 		 */
1698 		entry->eflags |= MAP_ENTRY_IN_TRANSITION;
1699 		/*
1700 		 *
1701 		 */
1702 		if (entry->wired_count == 0) {
1703 			entry->wired_count++;
1704 			saved_start = entry->start;
1705 			saved_end = entry->end;
1706 			/*
1707 			 * Release the map lock, relying on the in-transition
1708 			 * mark.
1709 			 */
1710 			vm_map_unlock(map);
1711 			rv = vm_fault_wire(map, saved_start, saved_end,
1712 			    user_wire);
1713 			vm_map_lock(map);
1714 			if (last_timestamp + 1 != map->timestamp) {
1715 				/*
1716 				 * Look again for the entry because the map was
1717 				 * modified while it was unlocked.  The entry
1718 				 * may have been clipped, but NOT merged or
1719 				 * deleted.
1720 				 */
1721 				result = vm_map_lookup_entry(map, saved_start,
1722 				    &tmp_entry);
1723 				KASSERT(result, ("vm_map_wire: lookup failed"));
1724 				if (entry == first_entry)
1725 					first_entry = tmp_entry;
1726 				else
1727 					first_entry = NULL;
1728 				entry = tmp_entry;
1729 				while (entry->end < saved_end) {
1730 					if (rv != KERN_SUCCESS) {
1731 						KASSERT(entry->wired_count == 1,
1732 						    ("vm_map_wire: bad count"));
1733 						entry->wired_count = -1;
1734 					}
1735 					entry = entry->next;
1736 				}
1737 			}
1738 			last_timestamp = map->timestamp;
1739 			if (rv != KERN_SUCCESS) {
1740 				KASSERT(entry->wired_count == 1,
1741 				    ("vm_map_wire: bad count"));
1742 				/*
1743 				 * Assign an out-of-range value to represent
1744 				 * the failure to wire this entry.
1745 				 */
1746 				entry->wired_count = -1;
1747 				end = entry->end;
1748 				goto done;
1749 			}
1750 		} else if (!user_wire ||
1751 			   (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
1752 			entry->wired_count++;
1753 		}
1754 		/*
1755 		 * Check the map for holes in the specified region.
1756 		 */
1757 		if (entry->end < end && (entry->next == &map->header ||
1758 		    entry->next->start > entry->end)) {
1759 			end = entry->end;
1760 			rv = KERN_INVALID_ADDRESS;
1761 			goto done;
1762 		}
1763 		entry = entry->next;
1764 	}
1765 	rv = KERN_SUCCESS;
1766 done:
1767 	need_wakeup = FALSE;
1768 	if (first_entry == NULL) {
1769 		result = vm_map_lookup_entry(map, start, &first_entry);
1770 		KASSERT(result, ("vm_map_wire: lookup failed"));
1771 	}
1772 	entry = first_entry;
1773 	while (entry != &map->header && entry->start < end) {
1774 		if (rv == KERN_SUCCESS) {
1775 			if (user_wire)
1776 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1777 		} else if (entry->wired_count == -1) {
1778 			/*
1779 			 * Wiring failed on this entry.  Thus, unwiring is
1780 			 * unnecessary.
1781 			 */
1782 			entry->wired_count = 0;
1783 		} else {
1784 			if (!user_wire || (entry->wired_count == 1 &&
1785 			    (entry->eflags & MAP_ENTRY_USER_WIRED) == 0))
1786 				entry->wired_count--;
1787 			if (entry->wired_count == 0) {
1788 				/*
1789 				 * Retain the map lock.
1790 				 */
1791 				vm_fault_unwire(map, entry->start, entry->end);
1792 			}
1793 		}
1794 		KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
1795 			("vm_map_wire: in-transition flag missing"));
1796 		entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
1797 		if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
1798 			entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
1799 			need_wakeup = TRUE;
1800 		}
1801 		vm_map_simplify_entry(map, entry);
1802 		entry = entry->next;
1803 	}
1804 	vm_map_unlock(map);
1805 	if (need_wakeup)
1806 		vm_map_wakeup(map);
1807 	return (rv);
1808 }
1809 
1810 /*
1811  * vm_map_clean
1812  *
1813  * Push any dirty cached pages in the address range to their pager.
1814  * If syncio is TRUE, dirty pages are written synchronously.
1815  * If invalidate is TRUE, any cached pages are freed as well.
1816  *
1817  * Returns an error if any part of the specified range is not mapped.
1818  */
1819 int
1820 vm_map_clean(
1821 	vm_map_t map,
1822 	vm_offset_t start,
1823 	vm_offset_t end,
1824 	boolean_t syncio,
1825 	boolean_t invalidate)
1826 {
1827 	vm_map_entry_t current;
1828 	vm_map_entry_t entry;
1829 	vm_size_t size;
1830 	vm_object_t object;
1831 	vm_ooffset_t offset;
1832 
1833 	GIANT_REQUIRED;
1834 
1835 	vm_map_lock_read(map);
1836 	VM_MAP_RANGE_CHECK(map, start, end);
1837 	if (!vm_map_lookup_entry(map, start, &entry)) {
1838 		vm_map_unlock_read(map);
1839 		return (KERN_INVALID_ADDRESS);
1840 	}
1841 	/*
1842 	 * Make a first pass to check for holes.
1843 	 */
1844 	for (current = entry; current->start < end; current = current->next) {
1845 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1846 			vm_map_unlock_read(map);
1847 			return (KERN_INVALID_ARGUMENT);
1848 		}
1849 		if (end > current->end &&
1850 		    (current->next == &map->header ||
1851 			current->end != current->next->start)) {
1852 			vm_map_unlock_read(map);
1853 			return (KERN_INVALID_ADDRESS);
1854 		}
1855 	}
1856 
1857 	if (invalidate)
1858 		pmap_remove(vm_map_pmap(map), start, end);
1859 	/*
1860 	 * Make a second pass, cleaning/uncaching pages from the indicated
1861 	 * objects as we go.
1862 	 */
1863 	for (current = entry; current->start < end; current = current->next) {
1864 		offset = current->offset + (start - current->start);
1865 		size = (end <= current->end ? end : current->end) - start;
1866 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1867 			vm_map_t smap;
1868 			vm_map_entry_t tentry;
1869 			vm_size_t tsize;
1870 
1871 			smap = current->object.sub_map;
1872 			vm_map_lock_read(smap);
1873 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1874 			tsize = tentry->end - offset;
1875 			if (tsize < size)
1876 				size = tsize;
1877 			object = tentry->object.vm_object;
1878 			offset = tentry->offset + (offset - tentry->start);
1879 			vm_map_unlock_read(smap);
1880 		} else {
1881 			object = current->object.vm_object;
1882 		}
1883 		/*
1884 		 * Note that there is absolutely no sense in writing out
1885 		 * anonymous objects, so we track down the vnode object
1886 		 * to write out.
1887 		 * We invalidate (remove) all pages from the address space
1888 		 * anyway, for semantic correctness.
1889 		 *
1890 		 * note: certain anonymous maps, such as MAP_NOSYNC maps,
1891 		 * may start out with a NULL object.
1892 		 */
1893 		while (object && object->backing_object) {
1894 			object = object->backing_object;
1895 			offset += object->backing_object_offset;
1896 			if (object->size < OFF_TO_IDX(offset + size))
1897 				size = IDX_TO_OFF(object->size) - offset;
1898 		}
1899 		if (object && (object->type == OBJT_VNODE) &&
1900 		    (current->protection & VM_PROT_WRITE)) {
1901 			/*
1902 			 * Flush pages if writing is allowed, invalidate them
1903 			 * if invalidation requested.  Pages undergoing I/O
1904 			 * will be ignored by vm_object_page_remove().
1905 			 *
1906 			 * We cannot lock the vnode and then wait for paging
1907 			 * to complete without deadlocking against vm_fault.
1908 			 * Instead we simply call vm_object_page_remove() and
1909 			 * allow it to block internally on a page-by-page
1910 			 * basis when it encounters pages undergoing async
1911 			 * I/O.
1912 			 */
1913 			int flags;
1914 
1915 			vm_object_reference(object);
1916 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1917 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1918 			flags |= invalidate ? OBJPC_INVAL : 0;
1919 			vm_object_page_clean(object,
1920 			    OFF_TO_IDX(offset),
1921 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1922 			    flags);
1923 			VOP_UNLOCK(object->handle, 0, curthread);
1924 			vm_object_deallocate(object);
1925 		}
1926 		if (object && invalidate &&
1927 		    ((object->type == OBJT_VNODE) ||
1928 		     (object->type == OBJT_DEVICE))) {
1929 			vm_object_reference(object);
1930 			vm_object_page_remove(object,
1931 			    OFF_TO_IDX(offset),
1932 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1933 			    FALSE);
1934 			vm_object_deallocate(object);
1935                 }
1936 		start += size;
1937 	}
1938 
1939 	vm_map_unlock_read(map);
1940 	return (KERN_SUCCESS);
1941 }
1942 
1943 /*
1944  *	vm_map_entry_unwire:	[ internal use only ]
1945  *
1946  *	Make the region specified by this entry pageable.
1947  *
1948  *	The map in question should be locked.
1949  *	[This is the reason for this routine's existence.]
1950  */
1951 static void
1952 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1953 {
1954 	vm_fault_unwire(map, entry->start, entry->end);
1955 	entry->wired_count = 0;
1956 }
1957 
1958 /*
1959  *	vm_map_entry_delete:	[ internal use only ]
1960  *
1961  *	Deallocate the given entry from the target map.
1962  */
1963 static void
1964 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1965 {
1966 	vm_map_entry_unlink(map, entry);
1967 	map->size -= entry->end - entry->start;
1968 
1969 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1970 		vm_object_deallocate(entry->object.vm_object);
1971 	}
1972 
1973 	vm_map_entry_dispose(map, entry);
1974 }
1975 
1976 /*
1977  *	vm_map_delete:	[ internal use only ]
1978  *
1979  *	Deallocates the given address range from the target
1980  *	map.
1981  */
1982 int
1983 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1984 {
1985 	vm_object_t object;
1986 	vm_map_entry_t entry;
1987 	vm_map_entry_t first_entry;
1988 
1989 	/*
1990 	 * Find the start of the region, and clip it
1991 	 */
1992 	if (!vm_map_lookup_entry(map, start, &first_entry))
1993 		entry = first_entry->next;
1994 	else {
1995 		entry = first_entry;
1996 		vm_map_clip_start(map, entry, start);
1997 	}
1998 
1999 	/*
2000 	 * Save the free space hint
2001 	 */
2002 	if (entry == &map->header) {
2003 		map->first_free = &map->header;
2004 	} else if (map->first_free->start >= start) {
2005 		map->first_free = entry->prev;
2006 	}
2007 
2008 	/*
2009 	 * Step through all entries in this region
2010 	 */
2011 	while ((entry != &map->header) && (entry->start < end)) {
2012 		vm_map_entry_t next;
2013 		vm_offset_t s, e;
2014 		vm_pindex_t offidxstart, offidxend, count;
2015 
2016 		/*
2017 		 * Wait for wiring or unwiring of an entry to complete.
2018 		 */
2019 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
2020 			unsigned int last_timestamp;
2021 			vm_offset_t saved_start;
2022 			vm_map_entry_t tmp_entry;
2023 
2024 			saved_start = entry->start;
2025 			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
2026 			last_timestamp = map->timestamp;
2027 			(void) vm_map_unlock_and_wait(map, FALSE);
2028 			vm_map_lock(map);
2029 			if (last_timestamp + 1 != map->timestamp) {
2030 				/*
2031 				 * Look again for the entry because the map was
2032 				 * modified while it was unlocked.
2033 				 * Specifically, the entry may have been
2034 				 * clipped, merged, or deleted.
2035 				 */
2036 				if (!vm_map_lookup_entry(map, saved_start,
2037 							 &tmp_entry))
2038 					entry = tmp_entry->next;
2039 				else {
2040 					entry = tmp_entry;
2041 					vm_map_clip_start(map, entry,
2042 							  saved_start);
2043 				}
2044 			}
2045 			continue;
2046 		}
2047 		vm_map_clip_end(map, entry, end);
2048 
2049 		s = entry->start;
2050 		e = entry->end;
2051 		next = entry->next;
2052 
2053 		offidxstart = OFF_TO_IDX(entry->offset);
2054 		count = OFF_TO_IDX(e - s);
2055 		object = entry->object.vm_object;
2056 
2057 		/*
2058 		 * Unwire before removing addresses from the pmap; otherwise,
2059 		 * unwiring will put the entries back in the pmap.
2060 		 */
2061 		if (entry->wired_count != 0) {
2062 			vm_map_entry_unwire(map, entry);
2063 		}
2064 
2065 		offidxend = offidxstart + count;
2066 
2067 		if ((object == kernel_object) || (object == kmem_object)) {
2068 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2069 		} else {
2070 			mtx_lock(&Giant);
2071 			pmap_remove(map->pmap, s, e);
2072 			if (object != NULL &&
2073 			    object->ref_count != 1 &&
2074 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2075 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2076 				vm_object_collapse(object);
2077 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2078 				if (object->type == OBJT_SWAP) {
2079 					swap_pager_freespace(object, offidxstart, count);
2080 				}
2081 				if (offidxend >= object->size &&
2082 				    offidxstart < object->size) {
2083 					object->size = offidxstart;
2084 				}
2085 			}
2086 			mtx_unlock(&Giant);
2087 		}
2088 
2089 		/*
2090 		 * Delete the entry (which may delete the object) only after
2091 		 * removing all pmap entries pointing to its pages.
2092 		 * (Otherwise, its page frames may be reallocated, and any
2093 		 * modify bits will be set in the wrong object!)
2094 		 */
2095 		vm_map_entry_delete(map, entry);
2096 		entry = next;
2097 	}
2098 	return (KERN_SUCCESS);
2099 }
2100 
2101 /*
2102  *	vm_map_remove:
2103  *
2104  *	Remove the given address range from the target map.
2105  *	This is the exported form of vm_map_delete.
2106  */
2107 int
2108 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2109 {
2110 	int result, s = 0;
2111 
2112 	if (map == kmem_map)
2113 		s = splvm();
2114 
2115 	vm_map_lock(map);
2116 	VM_MAP_RANGE_CHECK(map, start, end);
2117 	result = vm_map_delete(map, start, end);
2118 	vm_map_unlock(map);
2119 
2120 	if (map == kmem_map)
2121 		splx(s);
2122 
2123 	return (result);
2124 }
2125 
2126 /*
2127  *	vm_map_check_protection:
2128  *
2129  *	Assert that the target map allows the specified
2130  *	privilege on the entire address region given.
2131  *	The entire region must be allocated.
2132  */
2133 boolean_t
2134 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2135 			vm_prot_t protection)
2136 {
2137 	vm_map_entry_t entry;
2138 	vm_map_entry_t tmp_entry;
2139 
2140 	vm_map_lock_read(map);
2141 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2142 		vm_map_unlock_read(map);
2143 		return (FALSE);
2144 	}
2145 	entry = tmp_entry;
2146 
2147 	while (start < end) {
2148 		if (entry == &map->header) {
2149 			vm_map_unlock_read(map);
2150 			return (FALSE);
2151 		}
2152 		/*
2153 		 * No holes allowed!
2154 		 */
2155 		if (start < entry->start) {
2156 			vm_map_unlock_read(map);
2157 			return (FALSE);
2158 		}
2159 		/*
2160 		 * Check protection associated with entry.
2161 		 */
2162 		if ((entry->protection & protection) != protection) {
2163 			vm_map_unlock_read(map);
2164 			return (FALSE);
2165 		}
2166 		/* go to next entry */
2167 		start = entry->end;
2168 		entry = entry->next;
2169 	}
2170 	vm_map_unlock_read(map);
2171 	return (TRUE);
2172 }
2173 
2174 /*
2175  *	vm_map_copy_entry:
2176  *
2177  *	Copies the contents of the source entry to the destination
2178  *	entry.  The entries *must* be aligned properly.
2179  */
2180 static void
2181 vm_map_copy_entry(
2182 	vm_map_t src_map,
2183 	vm_map_t dst_map,
2184 	vm_map_entry_t src_entry,
2185 	vm_map_entry_t dst_entry)
2186 {
2187 	vm_object_t src_object;
2188 
2189 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2190 		return;
2191 
2192 	if (src_entry->wired_count == 0) {
2193 
2194 		/*
2195 		 * If the source entry is marked needs_copy, it is already
2196 		 * write-protected.
2197 		 */
2198 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2199 			pmap_protect(src_map->pmap,
2200 			    src_entry->start,
2201 			    src_entry->end,
2202 			    src_entry->protection & ~VM_PROT_WRITE);
2203 		}
2204 
2205 		/*
2206 		 * Make a copy of the object.
2207 		 */
2208 		if ((src_object = src_entry->object.vm_object) != NULL) {
2209 
2210 			if ((src_object->handle == NULL) &&
2211 				(src_object->type == OBJT_DEFAULT ||
2212 				 src_object->type == OBJT_SWAP)) {
2213 				vm_object_collapse(src_object);
2214 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2215 					vm_object_split(src_entry);
2216 					src_object = src_entry->object.vm_object;
2217 				}
2218 			}
2219 
2220 			vm_object_reference(src_object);
2221 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2222 			dst_entry->object.vm_object = src_object;
2223 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2224 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2225 			dst_entry->offset = src_entry->offset;
2226 		} else {
2227 			dst_entry->object.vm_object = NULL;
2228 			dst_entry->offset = 0;
2229 		}
2230 
2231 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2232 		    dst_entry->end - dst_entry->start, src_entry->start);
2233 	} else {
2234 		/*
2235 		 * Of course, wired down pages can't be set copy-on-write.
2236 		 * Cause wired pages to be copied into the new map by
2237 		 * simulating faults (the new pages are pageable)
2238 		 */
2239 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2240 	}
2241 }
2242 
2243 /*
2244  * vmspace_fork:
2245  * Create a new process vmspace structure and vm_map
2246  * based on those of an existing process.  The new map
2247  * is based on the old map, according to the inheritance
2248  * values on the regions in that map.
2249  *
2250  * The source map must not be locked.
2251  */
2252 struct vmspace *
2253 vmspace_fork(struct vmspace *vm1)
2254 {
2255 	struct vmspace *vm2;
2256 	vm_map_t old_map = &vm1->vm_map;
2257 	vm_map_t new_map;
2258 	vm_map_entry_t old_entry;
2259 	vm_map_entry_t new_entry;
2260 	vm_object_t object;
2261 
2262 	GIANT_REQUIRED;
2263 
2264 	vm_map_lock(old_map);
2265 	old_map->infork = 1;
2266 
2267 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2268 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2269 	    (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2270 	new_map = &vm2->vm_map;	/* XXX */
2271 	new_map->timestamp = 1;
2272 
2273 	old_entry = old_map->header.next;
2274 
2275 	while (old_entry != &old_map->header) {
2276 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2277 			panic("vm_map_fork: encountered a submap");
2278 
2279 		switch (old_entry->inheritance) {
2280 		case VM_INHERIT_NONE:
2281 			break;
2282 
2283 		case VM_INHERIT_SHARE:
2284 			/*
2285 			 * Clone the entry, creating the shared object if necessary.
2286 			 */
2287 			object = old_entry->object.vm_object;
2288 			if (object == NULL) {
2289 				object = vm_object_allocate(OBJT_DEFAULT,
2290 					atop(old_entry->end - old_entry->start));
2291 				old_entry->object.vm_object = object;
2292 				old_entry->offset = (vm_offset_t) 0;
2293 			}
2294 
2295 			/*
2296 			 * Add the reference before calling vm_object_shadow
2297 			 * to insure that a shadow object is created.
2298 			 */
2299 			vm_object_reference(object);
2300 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2301 				vm_object_shadow(&old_entry->object.vm_object,
2302 					&old_entry->offset,
2303 					atop(old_entry->end - old_entry->start));
2304 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2305 				/* Transfer the second reference too. */
2306 				vm_object_reference(
2307 				    old_entry->object.vm_object);
2308 				vm_object_deallocate(object);
2309 				object = old_entry->object.vm_object;
2310 			}
2311 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2312 
2313 			/*
2314 			 * Clone the entry, referencing the shared object.
2315 			 */
2316 			new_entry = vm_map_entry_create(new_map);
2317 			*new_entry = *old_entry;
2318 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2319 			new_entry->wired_count = 0;
2320 
2321 			/*
2322 			 * Insert the entry into the new map -- we know we're
2323 			 * inserting at the end of the new map.
2324 			 */
2325 			vm_map_entry_link(new_map, new_map->header.prev,
2326 			    new_entry);
2327 
2328 			/*
2329 			 * Update the physical map
2330 			 */
2331 			pmap_copy(new_map->pmap, old_map->pmap,
2332 			    new_entry->start,
2333 			    (old_entry->end - old_entry->start),
2334 			    old_entry->start);
2335 			break;
2336 
2337 		case VM_INHERIT_COPY:
2338 			/*
2339 			 * Clone the entry and link into the map.
2340 			 */
2341 			new_entry = vm_map_entry_create(new_map);
2342 			*new_entry = *old_entry;
2343 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2344 			new_entry->wired_count = 0;
2345 			new_entry->object.vm_object = NULL;
2346 			vm_map_entry_link(new_map, new_map->header.prev,
2347 			    new_entry);
2348 			vm_map_copy_entry(old_map, new_map, old_entry,
2349 			    new_entry);
2350 			break;
2351 		}
2352 		old_entry = old_entry->next;
2353 	}
2354 
2355 	new_map->size = old_map->size;
2356 	old_map->infork = 0;
2357 	vm_map_unlock(old_map);
2358 
2359 	return (vm2);
2360 }
2361 
2362 int
2363 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2364 	      vm_prot_t prot, vm_prot_t max, int cow)
2365 {
2366 	vm_map_entry_t prev_entry;
2367 	vm_map_entry_t new_stack_entry;
2368 	vm_size_t      init_ssize;
2369 	int            rv;
2370 
2371 	if (addrbos < vm_map_min(map))
2372 		return (KERN_NO_SPACE);
2373 
2374 	if (max_ssize < sgrowsiz)
2375 		init_ssize = max_ssize;
2376 	else
2377 		init_ssize = sgrowsiz;
2378 
2379 	vm_map_lock(map);
2380 
2381 	/* If addr is already mapped, no go */
2382 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2383 		vm_map_unlock(map);
2384 		return (KERN_NO_SPACE);
2385 	}
2386 
2387 	/* If we would blow our VMEM resource limit, no go */
2388 	if (map->size + init_ssize >
2389 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2390 		vm_map_unlock(map);
2391 		return (KERN_NO_SPACE);
2392 	}
2393 
2394 	/* If we can't accomodate max_ssize in the current mapping,
2395 	 * no go.  However, we need to be aware that subsequent user
2396 	 * mappings might map into the space we have reserved for
2397 	 * stack, and currently this space is not protected.
2398 	 *
2399 	 * Hopefully we will at least detect this condition
2400 	 * when we try to grow the stack.
2401 	 */
2402 	if ((prev_entry->next != &map->header) &&
2403 	    (prev_entry->next->start < addrbos + max_ssize)) {
2404 		vm_map_unlock(map);
2405 		return (KERN_NO_SPACE);
2406 	}
2407 
2408 	/* We initially map a stack of only init_ssize.  We will
2409 	 * grow as needed later.  Since this is to be a grow
2410 	 * down stack, we map at the top of the range.
2411 	 *
2412 	 * Note: we would normally expect prot and max to be
2413 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2414 	 * eliminate these as input parameters, and just
2415 	 * pass these values here in the insert call.
2416 	 */
2417 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2418 	                   addrbos + max_ssize, prot, max, cow);
2419 
2420 	/* Now set the avail_ssize amount */
2421 	if (rv == KERN_SUCCESS){
2422 		if (prev_entry != &map->header)
2423 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2424 		new_stack_entry = prev_entry->next;
2425 		if (new_stack_entry->end   != addrbos + max_ssize ||
2426 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2427 			panic ("Bad entry start/end for new stack entry");
2428 		else
2429 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2430 	}
2431 
2432 	vm_map_unlock(map);
2433 	return (rv);
2434 }
2435 
2436 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2437  * desired address is already mapped, or if we successfully grow
2438  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2439  * stack range (this is strange, but preserves compatibility with
2440  * the grow function in vm_machdep.c).
2441  */
2442 int
2443 vm_map_growstack (struct proc *p, vm_offset_t addr)
2444 {
2445 	vm_map_entry_t prev_entry;
2446 	vm_map_entry_t stack_entry;
2447 	vm_map_entry_t new_stack_entry;
2448 	struct vmspace *vm = p->p_vmspace;
2449 	vm_map_t map = &vm->vm_map;
2450 	vm_offset_t    end;
2451 	int      grow_amount;
2452 	int      rv;
2453 	int      is_procstack;
2454 
2455 	GIANT_REQUIRED;
2456 
2457 Retry:
2458 	vm_map_lock_read(map);
2459 
2460 	/* If addr is already in the entry range, no need to grow.*/
2461 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2462 		vm_map_unlock_read(map);
2463 		return (KERN_SUCCESS);
2464 	}
2465 
2466 	if ((stack_entry = prev_entry->next) == &map->header) {
2467 		vm_map_unlock_read(map);
2468 		return (KERN_SUCCESS);
2469 	}
2470 	if (prev_entry == &map->header)
2471 		end = stack_entry->start - stack_entry->avail_ssize;
2472 	else
2473 		end = prev_entry->end;
2474 
2475 	/* This next test mimics the old grow function in vm_machdep.c.
2476 	 * It really doesn't quite make sense, but we do it anyway
2477 	 * for compatibility.
2478 	 *
2479 	 * If not growable stack, return success.  This signals the
2480 	 * caller to proceed as he would normally with normal vm.
2481 	 */
2482 	if (stack_entry->avail_ssize < 1 ||
2483 	    addr >= stack_entry->start ||
2484 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2485 		vm_map_unlock_read(map);
2486 		return (KERN_SUCCESS);
2487 	}
2488 
2489 	/* Find the minimum grow amount */
2490 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2491 	if (grow_amount > stack_entry->avail_ssize) {
2492 		vm_map_unlock_read(map);
2493 		return (KERN_NO_SPACE);
2494 	}
2495 
2496 	/* If there is no longer enough space between the entries
2497 	 * nogo, and adjust the available space.  Note: this
2498 	 * should only happen if the user has mapped into the
2499 	 * stack area after the stack was created, and is
2500 	 * probably an error.
2501 	 *
2502 	 * This also effectively destroys any guard page the user
2503 	 * might have intended by limiting the stack size.
2504 	 */
2505 	if (grow_amount > stack_entry->start - end) {
2506 		if (vm_map_lock_upgrade(map))
2507 			goto Retry;
2508 
2509 		stack_entry->avail_ssize = stack_entry->start - end;
2510 
2511 		vm_map_unlock(map);
2512 		return (KERN_NO_SPACE);
2513 	}
2514 
2515 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2516 
2517 	/* If this is the main process stack, see if we're over the
2518 	 * stack limit.
2519 	 */
2520 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2521 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2522 		vm_map_unlock_read(map);
2523 		return (KERN_NO_SPACE);
2524 	}
2525 
2526 	/* Round up the grow amount modulo SGROWSIZ */
2527 	grow_amount = roundup (grow_amount, sgrowsiz);
2528 	if (grow_amount > stack_entry->avail_ssize) {
2529 		grow_amount = stack_entry->avail_ssize;
2530 	}
2531 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2532 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2533 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2534 		              ctob(vm->vm_ssize);
2535 	}
2536 
2537 	/* If we would blow our VMEM resource limit, no go */
2538 	if (map->size + grow_amount >
2539 	    curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
2540 		vm_map_unlock_read(map);
2541 		return (KERN_NO_SPACE);
2542 	}
2543 
2544 	if (vm_map_lock_upgrade(map))
2545 		goto Retry;
2546 
2547 	/* Get the preliminary new entry start value */
2548 	addr = stack_entry->start - grow_amount;
2549 
2550 	/* If this puts us into the previous entry, cut back our growth
2551 	 * to the available space.  Also, see the note above.
2552 	 */
2553 	if (addr < end) {
2554 		stack_entry->avail_ssize = stack_entry->start - end;
2555 		addr = end;
2556 	}
2557 
2558 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2559 	    p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
2560 
2561 	/* Adjust the available stack space by the amount we grew. */
2562 	if (rv == KERN_SUCCESS) {
2563 		if (prev_entry != &map->header)
2564 			vm_map_clip_end(map, prev_entry, addr);
2565 		new_stack_entry = prev_entry->next;
2566 		if (new_stack_entry->end   != stack_entry->start  ||
2567 		    new_stack_entry->start != addr)
2568 			panic ("Bad stack grow start/end in new stack entry");
2569 		else {
2570 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2571 							(new_stack_entry->end -
2572 							 new_stack_entry->start);
2573 			if (is_procstack)
2574 				vm->vm_ssize += btoc(new_stack_entry->end -
2575 						     new_stack_entry->start);
2576 		}
2577 	}
2578 
2579 	vm_map_unlock(map);
2580 	return (rv);
2581 }
2582 
2583 /*
2584  * Unshare the specified VM space for exec.  If other processes are
2585  * mapped to it, then create a new one.  The new vmspace is null.
2586  */
2587 void
2588 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
2589 {
2590 	struct vmspace *oldvmspace = p->p_vmspace;
2591 	struct vmspace *newvmspace;
2592 
2593 	GIANT_REQUIRED;
2594 	newvmspace = vmspace_alloc(minuser, maxuser);
2595 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2596 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2597 	/*
2598 	 * This code is written like this for prototype purposes.  The
2599 	 * goal is to avoid running down the vmspace here, but let the
2600 	 * other process's that are still using the vmspace to finally
2601 	 * run it down.  Even though there is little or no chance of blocking
2602 	 * here, it is a good idea to keep this form for future mods.
2603 	 */
2604 	p->p_vmspace = newvmspace;
2605 	pmap_pinit2(vmspace_pmap(newvmspace));
2606 	vmspace_free(oldvmspace);
2607 	if (p == curthread->td_proc)		/* XXXKSE ? */
2608 		pmap_activate(curthread);
2609 }
2610 
2611 /*
2612  * Unshare the specified VM space for forcing COW.  This
2613  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2614  */
2615 void
2616 vmspace_unshare(struct proc *p)
2617 {
2618 	struct vmspace *oldvmspace = p->p_vmspace;
2619 	struct vmspace *newvmspace;
2620 
2621 	GIANT_REQUIRED;
2622 	if (oldvmspace->vm_refcnt == 1)
2623 		return;
2624 	newvmspace = vmspace_fork(oldvmspace);
2625 	p->p_vmspace = newvmspace;
2626 	pmap_pinit2(vmspace_pmap(newvmspace));
2627 	vmspace_free(oldvmspace);
2628 	if (p == curthread->td_proc)		/* XXXKSE ? */
2629 		pmap_activate(curthread);
2630 }
2631 
2632 /*
2633  *	vm_map_lookup:
2634  *
2635  *	Finds the VM object, offset, and
2636  *	protection for a given virtual address in the
2637  *	specified map, assuming a page fault of the
2638  *	type specified.
2639  *
2640  *	Leaves the map in question locked for read; return
2641  *	values are guaranteed until a vm_map_lookup_done
2642  *	call is performed.  Note that the map argument
2643  *	is in/out; the returned map must be used in
2644  *	the call to vm_map_lookup_done.
2645  *
2646  *	A handle (out_entry) is returned for use in
2647  *	vm_map_lookup_done, to make that fast.
2648  *
2649  *	If a lookup is requested with "write protection"
2650  *	specified, the map may be changed to perform virtual
2651  *	copying operations, although the data referenced will
2652  *	remain the same.
2653  */
2654 int
2655 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2656 	      vm_offset_t vaddr,
2657 	      vm_prot_t fault_typea,
2658 	      vm_map_entry_t *out_entry,	/* OUT */
2659 	      vm_object_t *object,		/* OUT */
2660 	      vm_pindex_t *pindex,		/* OUT */
2661 	      vm_prot_t *out_prot,		/* OUT */
2662 	      boolean_t *wired)			/* OUT */
2663 {
2664 	vm_map_entry_t entry;
2665 	vm_map_t map = *var_map;
2666 	vm_prot_t prot;
2667 	vm_prot_t fault_type = fault_typea;
2668 
2669 RetryLookup:;
2670 	/*
2671 	 * Lookup the faulting address.
2672 	 */
2673 
2674 	vm_map_lock_read(map);
2675 #define	RETURN(why) \
2676 		{ \
2677 		vm_map_unlock_read(map); \
2678 		return (why); \
2679 		}
2680 
2681 	/*
2682 	 * If the map has an interesting hint, try it before calling full
2683 	 * blown lookup routine.
2684 	 */
2685 	entry = map->root;
2686 	*out_entry = entry;
2687 	if (entry == NULL ||
2688 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2689 		/*
2690 		 * Entry was either not a valid hint, or the vaddr was not
2691 		 * contained in the entry, so do a full lookup.
2692 		 */
2693 		if (!vm_map_lookup_entry(map, vaddr, out_entry))
2694 			RETURN(KERN_INVALID_ADDRESS);
2695 
2696 		entry = *out_entry;
2697 	}
2698 
2699 	/*
2700 	 * Handle submaps.
2701 	 */
2702 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2703 		vm_map_t old_map = map;
2704 
2705 		*var_map = map = entry->object.sub_map;
2706 		vm_map_unlock_read(old_map);
2707 		goto RetryLookup;
2708 	}
2709 
2710 	/*
2711 	 * Check whether this task is allowed to have this page.
2712 	 * Note the special case for MAP_ENTRY_COW
2713 	 * pages with an override.  This is to implement a forced
2714 	 * COW for debuggers.
2715 	 */
2716 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2717 		prot = entry->max_protection;
2718 	else
2719 		prot = entry->protection;
2720 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2721 	if ((fault_type & prot) != fault_type) {
2722 			RETURN(KERN_PROTECTION_FAILURE);
2723 	}
2724 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2725 	    (entry->eflags & MAP_ENTRY_COW) &&
2726 	    (fault_type & VM_PROT_WRITE) &&
2727 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2728 		RETURN(KERN_PROTECTION_FAILURE);
2729 	}
2730 
2731 	/*
2732 	 * If this page is not pageable, we have to get it for all possible
2733 	 * accesses.
2734 	 */
2735 	*wired = (entry->wired_count != 0);
2736 	if (*wired)
2737 		prot = fault_type = entry->protection;
2738 
2739 	/*
2740 	 * If the entry was copy-on-write, we either ...
2741 	 */
2742 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2743 		/*
2744 		 * If we want to write the page, we may as well handle that
2745 		 * now since we've got the map locked.
2746 		 *
2747 		 * If we don't need to write the page, we just demote the
2748 		 * permissions allowed.
2749 		 */
2750 		if (fault_type & VM_PROT_WRITE) {
2751 			/*
2752 			 * Make a new object, and place it in the object
2753 			 * chain.  Note that no new references have appeared
2754 			 * -- one just moved from the map to the new
2755 			 * object.
2756 			 */
2757 			if (vm_map_lock_upgrade(map))
2758 				goto RetryLookup;
2759 
2760 			vm_object_shadow(
2761 			    &entry->object.vm_object,
2762 			    &entry->offset,
2763 			    atop(entry->end - entry->start));
2764 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2765 
2766 			vm_map_lock_downgrade(map);
2767 		} else {
2768 			/*
2769 			 * We're attempting to read a copy-on-write page --
2770 			 * don't allow writes.
2771 			 */
2772 			prot &= ~VM_PROT_WRITE;
2773 		}
2774 	}
2775 
2776 	/*
2777 	 * Create an object if necessary.
2778 	 */
2779 	if (entry->object.vm_object == NULL &&
2780 	    !map->system_map) {
2781 		if (vm_map_lock_upgrade(map))
2782 			goto RetryLookup;
2783 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2784 		    atop(entry->end - entry->start));
2785 		entry->offset = 0;
2786 		vm_map_lock_downgrade(map);
2787 	}
2788 
2789 	/*
2790 	 * Return the object/offset from this entry.  If the entry was
2791 	 * copy-on-write or empty, it has been fixed up.
2792 	 */
2793 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2794 	*object = entry->object.vm_object;
2795 
2796 	/*
2797 	 * Return whether this is the only map sharing this data.
2798 	 */
2799 	*out_prot = prot;
2800 	return (KERN_SUCCESS);
2801 
2802 #undef	RETURN
2803 }
2804 
2805 /*
2806  *	vm_map_lookup_done:
2807  *
2808  *	Releases locks acquired by a vm_map_lookup
2809  *	(according to the handle returned by that lookup).
2810  */
2811 void
2812 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2813 {
2814 	/*
2815 	 * Unlock the main-level map
2816 	 */
2817 	vm_map_unlock_read(map);
2818 }
2819 
2820 #ifdef ENABLE_VFS_IOOPT
2821 /*
2822  * Experimental support for zero-copy I/O
2823  *
2824  * Implement uiomove with VM operations.  This handles (and collateral changes)
2825  * support every combination of source object modification, and COW type
2826  * operations.
2827  */
2828 int
2829 vm_uiomove(
2830 	vm_map_t mapa,
2831 	vm_object_t srcobject,
2832 	off_t cp,
2833 	int cnta,
2834 	vm_offset_t uaddra,
2835 	int *npages)
2836 {
2837 	vm_map_t map;
2838 	vm_object_t first_object, oldobject, object;
2839 	vm_map_entry_t entry;
2840 	vm_prot_t prot;
2841 	boolean_t wired;
2842 	int tcnt, rv;
2843 	vm_offset_t uaddr, start, end, tend;
2844 	vm_pindex_t first_pindex, oindex;
2845 	vm_size_t osize;
2846 	off_t ooffset;
2847 	int cnt;
2848 
2849 	GIANT_REQUIRED;
2850 
2851 	if (npages)
2852 		*npages = 0;
2853 
2854 	cnt = cnta;
2855 	uaddr = uaddra;
2856 
2857 	while (cnt > 0) {
2858 		map = mapa;
2859 
2860 		if ((vm_map_lookup(&map, uaddr,
2861 			VM_PROT_READ, &entry, &first_object,
2862 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2863 			return EFAULT;
2864 		}
2865 
2866 		vm_map_clip_start(map, entry, uaddr);
2867 
2868 		tcnt = cnt;
2869 		tend = uaddr + tcnt;
2870 		if (tend > entry->end) {
2871 			tcnt = entry->end - uaddr;
2872 			tend = entry->end;
2873 		}
2874 
2875 		vm_map_clip_end(map, entry, tend);
2876 
2877 		start = entry->start;
2878 		end = entry->end;
2879 
2880 		osize = atop(tcnt);
2881 
2882 		oindex = OFF_TO_IDX(cp);
2883 		if (npages) {
2884 			vm_size_t idx;
2885 			for (idx = 0; idx < osize; idx++) {
2886 				vm_page_t m;
2887 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2888 					vm_map_lookup_done(map, entry);
2889 					return 0;
2890 				}
2891 				/*
2892 				 * disallow busy or invalid pages, but allow
2893 				 * m->busy pages if they are entirely valid.
2894 				 */
2895 				if ((m->flags & PG_BUSY) ||
2896 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2897 					vm_map_lookup_done(map, entry);
2898 					return 0;
2899 				}
2900 			}
2901 		}
2902 
2903 /*
2904  * If we are changing an existing map entry, just redirect
2905  * the object, and change mappings.
2906  */
2907 		if ((first_object->type == OBJT_VNODE) &&
2908 			((oldobject = entry->object.vm_object) == first_object)) {
2909 
2910 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2911 				/*
2912    				* Remove old window into the file
2913    				*/
2914 				pmap_remove (map->pmap, uaddr, tend);
2915 
2916 				/*
2917    				* Force copy on write for mmaped regions
2918    				*/
2919 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2920 
2921 				/*
2922    				* Point the object appropriately
2923    				*/
2924 				if (oldobject != srcobject) {
2925 
2926 				/*
2927    				* Set the object optimization hint flag
2928    				*/
2929 					vm_object_set_flag(srcobject, OBJ_OPT);
2930 					vm_object_reference(srcobject);
2931 					entry->object.vm_object = srcobject;
2932 
2933 					if (oldobject) {
2934 						vm_object_deallocate(oldobject);
2935 					}
2936 				}
2937 
2938 				entry->offset = cp;
2939 				map->timestamp++;
2940 			} else {
2941 				pmap_remove (map->pmap, uaddr, tend);
2942 			}
2943 
2944 		} else if ((first_object->ref_count == 1) &&
2945 			(first_object->size == osize) &&
2946 			((first_object->type == OBJT_DEFAULT) ||
2947 				(first_object->type == OBJT_SWAP)) ) {
2948 
2949 			oldobject = first_object->backing_object;
2950 
2951 			if ((first_object->backing_object_offset != cp) ||
2952 				(oldobject != srcobject)) {
2953 				/*
2954    				* Remove old window into the file
2955    				*/
2956 				pmap_remove (map->pmap, uaddr, tend);
2957 
2958 				/*
2959 				 * Remove unneeded old pages
2960 				 */
2961 				vm_object_page_remove(first_object, 0, 0, 0);
2962 
2963 				/*
2964 				 * Invalidate swap space
2965 				 */
2966 				if (first_object->type == OBJT_SWAP) {
2967 					swap_pager_freespace(first_object,
2968 						0,
2969 						first_object->size);
2970 				}
2971 
2972 				/*
2973    				 * Force copy on write for mmaped regions
2974    				 */
2975 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2976 
2977 				/*
2978    				 * Point the object appropriately
2979    				 */
2980 				if (oldobject != srcobject) {
2981 					/*
2982    					 * Set the object optimization hint flag
2983    					 */
2984 					vm_object_set_flag(srcobject, OBJ_OPT);
2985 					vm_object_reference(srcobject);
2986 
2987 					if (oldobject) {
2988 						TAILQ_REMOVE(&oldobject->shadow_head,
2989 							first_object, shadow_list);
2990 						oldobject->shadow_count--;
2991 						/* XXX bump generation? */
2992 						vm_object_deallocate(oldobject);
2993 					}
2994 
2995 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2996 						first_object, shadow_list);
2997 					srcobject->shadow_count++;
2998 					/* XXX bump generation? */
2999 
3000 					first_object->backing_object = srcobject;
3001 				}
3002 				first_object->backing_object_offset = cp;
3003 				map->timestamp++;
3004 			} else {
3005 				pmap_remove (map->pmap, uaddr, tend);
3006 			}
3007 /*
3008  * Otherwise, we have to do a logical mmap.
3009  */
3010 		} else {
3011 
3012 			vm_object_set_flag(srcobject, OBJ_OPT);
3013 			vm_object_reference(srcobject);
3014 
3015 			pmap_remove (map->pmap, uaddr, tend);
3016 
3017 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3018 			vm_map_lock_upgrade(map);
3019 
3020 			if (entry == &map->header) {
3021 				map->first_free = &map->header;
3022 			} else if (map->first_free->start >= start) {
3023 				map->first_free = entry->prev;
3024 			}
3025 
3026 			vm_map_entry_delete(map, entry);
3027 
3028 			object = srcobject;
3029 			ooffset = cp;
3030 
3031 			rv = vm_map_insert(map, object, ooffset, start, tend,
3032 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3033 
3034 			if (rv != KERN_SUCCESS)
3035 				panic("vm_uiomove: could not insert new entry: %d", rv);
3036 		}
3037 
3038 /*
3039  * Map the window directly, if it is already in memory
3040  */
3041 		pmap_object_init_pt(map->pmap, uaddr,
3042 			srcobject, oindex, tcnt, 0);
3043 
3044 		map->timestamp++;
3045 		vm_map_unlock(map);
3046 
3047 		cnt -= tcnt;
3048 		uaddr += tcnt;
3049 		cp += tcnt;
3050 		if (npages)
3051 			*npages += osize;
3052 	}
3053 	return 0;
3054 }
3055 #endif
3056 
3057 #include "opt_ddb.h"
3058 #ifdef DDB
3059 #include <sys/kernel.h>
3060 
3061 #include <ddb/ddb.h>
3062 
3063 /*
3064  *	vm_map_print:	[ debug ]
3065  */
3066 DB_SHOW_COMMAND(map, vm_map_print)
3067 {
3068 	static int nlines;
3069 	/* XXX convert args. */
3070 	vm_map_t map = (vm_map_t)addr;
3071 	boolean_t full = have_addr;
3072 
3073 	vm_map_entry_t entry;
3074 
3075 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3076 	    (void *)map,
3077 	    (void *)map->pmap, map->nentries, map->timestamp);
3078 	nlines++;
3079 
3080 	if (!full && db_indent)
3081 		return;
3082 
3083 	db_indent += 2;
3084 	for (entry = map->header.next; entry != &map->header;
3085 	    entry = entry->next) {
3086 		db_iprintf("map entry %p: start=%p, end=%p\n",
3087 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3088 		nlines++;
3089 		{
3090 			static char *inheritance_name[4] =
3091 			{"share", "copy", "none", "donate_copy"};
3092 
3093 			db_iprintf(" prot=%x/%x/%s",
3094 			    entry->protection,
3095 			    entry->max_protection,
3096 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3097 			if (entry->wired_count != 0)
3098 				db_printf(", wired");
3099 		}
3100 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3101 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3102 			db_printf(", share=%p, offset=0x%lx\n",
3103 			    (void *)entry->object.sub_map,
3104 			    (long)entry->offset);
3105 			nlines++;
3106 			if ((entry->prev == &map->header) ||
3107 			    (entry->prev->object.sub_map !=
3108 				entry->object.sub_map)) {
3109 				db_indent += 2;
3110 				vm_map_print((db_expr_t)(intptr_t)
3111 					     entry->object.sub_map,
3112 					     full, 0, (char *)0);
3113 				db_indent -= 2;
3114 			}
3115 		} else {
3116 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3117 			db_printf(", object=%p, offset=0x%lx",
3118 			    (void *)entry->object.vm_object,
3119 			    (long)entry->offset);
3120 			if (entry->eflags & MAP_ENTRY_COW)
3121 				db_printf(", copy (%s)",
3122 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3123 			db_printf("\n");
3124 			nlines++;
3125 
3126 			if ((entry->prev == &map->header) ||
3127 			    (entry->prev->object.vm_object !=
3128 				entry->object.vm_object)) {
3129 				db_indent += 2;
3130 				vm_object_print((db_expr_t)(intptr_t)
3131 						entry->object.vm_object,
3132 						full, 0, (char *)0);
3133 				nlines += 4;
3134 				db_indent -= 2;
3135 			}
3136 		}
3137 	}
3138 	db_indent -= 2;
3139 	if (db_indent == 0)
3140 		nlines = 0;
3141 }
3142 
3143 
3144 DB_SHOW_COMMAND(procvm, procvm)
3145 {
3146 	struct proc *p;
3147 
3148 	if (have_addr) {
3149 		p = (struct proc *) addr;
3150 	} else {
3151 		p = curproc;
3152 	}
3153 
3154 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3155 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3156 	    (void *)vmspace_pmap(p->p_vmspace));
3157 
3158 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3159 }
3160 
3161 #endif /* DDB */
3162