xref: /freebsd/sys/vm/vm_map.c (revision fd8e4ebc8c18caec3eefac6527831f9ee6a92959)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81 
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/vm_zone.h>
92 #include <vm/swap_pager.h>
93 
94 /*
95  *	Virtual memory maps provide for the mapping, protection,
96  *	and sharing of virtual memory objects.  In addition,
97  *	this module provides for an efficient virtual copy of
98  *	memory from one map to another.
99  *
100  *	Synchronization is required prior to most operations.
101  *
102  *	Maps consist of an ordered doubly-linked list of simple
103  *	entries; a single hint is used to speed up lookups.
104  *
105  *	Since portions of maps are specified by start/end addresses,
106  *	which may not align with existing map entries, all
107  *	routines merely "clip" entries to these start/end values.
108  *	[That is, an entry is split into two, bordering at a
109  *	start or end value.]  Note that these clippings may not
110  *	always be necessary (as the two resulting entries are then
111  *	not changed); however, the clipping is done for convenience.
112  *
113  *	As mentioned above, virtual copy operations are performed
114  *	by copying VM object references from one map to
115  *	another, and then marking both regions as copy-on-write.
116  */
117 
118 /*
119  *	vm_map_startup:
120  *
121  *	Initialize the vm_map module.  Must be called before
122  *	any other vm_map routines.
123  *
124  *	Map and entry structures are allocated from the general
125  *	purpose memory pool with some exceptions:
126  *
127  *	- The kernel map and kmem submap are allocated statically.
128  *	- Kernel map entries are allocated out of a static pool.
129  *
130  *	These restrictions are necessary since malloc() uses the
131  *	maps and requires map entries.
132  */
133 
134 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
135 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
136 static struct vm_object kmapentobj, mapentobj, mapobj;
137 
138 static struct vm_map_entry map_entry_init[MAX_MAPENT];
139 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
140 static struct vm_map map_init[MAX_KMAP];
141 
142 void
143 vm_map_startup(void)
144 {
145 	mapzone = &mapzone_store;
146 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
147 		map_init, MAX_KMAP);
148 	kmapentzone = &kmapentzone_store;
149 	zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
150 		kmap_entry_init, MAX_KMAPENT);
151 	mapentzone = &mapentzone_store;
152 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
153 		map_entry_init, MAX_MAPENT);
154 }
155 
156 /*
157  * Allocate a vmspace structure, including a vm_map and pmap,
158  * and initialize those structures.  The refcnt is set to 1.
159  * The remaining fields must be initialized by the caller.
160  */
161 struct vmspace *
162 vmspace_alloc(min, max)
163 	vm_offset_t min, max;
164 {
165 	struct vmspace *vm;
166 
167 	GIANT_REQUIRED;
168 	vm = zalloc(vmspace_zone);
169 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
170 	vm_map_init(&vm->vm_map, min, max);
171 	pmap_pinit(vmspace_pmap(vm));
172 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
173 	vm->vm_refcnt = 1;
174 	vm->vm_shm = NULL;
175 	vm->vm_freer = NULL;
176 	return (vm);
177 }
178 
179 void
180 vm_init2(void)
181 {
182 	zinitna(kmapentzone, &kmapentobj,
183 		NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
184 	zinitna(mapentzone, &mapentobj,
185 		NULL, 0, 0, 0, 1);
186 	zinitna(mapzone, &mapobj,
187 		NULL, 0, 0, 0, 1);
188 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
189 	pmap_init2();
190 	vm_object_init2();
191 }
192 
193 static __inline void
194 vmspace_dofree( struct vmspace *vm)
195 {
196 	CTR1(KTR_VM, "vmspace_free: %p", vm);
197 	/*
198 	 * Lock the map, to wait out all other references to it.
199 	 * Delete all of the mappings and pages they hold, then call
200 	 * the pmap module to reclaim anything left.
201 	 */
202 	vm_map_lock(&vm->vm_map);
203 	(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
204 	    vm->vm_map.max_offset);
205 	vm_map_unlock(&vm->vm_map);
206 	pmap_release(vmspace_pmap(vm));
207 	vm_map_destroy(&vm->vm_map);
208 	zfree(vmspace_zone, vm);
209 }
210 
211 void
212 vmspace_free(struct vmspace *vm)
213 {
214 	GIANT_REQUIRED;
215 
216 	if (vm->vm_refcnt == 0)
217 		panic("vmspace_free: attempt to free already freed vmspace");
218 
219 	if (--vm->vm_refcnt == 0)
220 		vmspace_dofree(vm);
221 }
222 
223 void
224 vmspace_exitfree(struct proc *p)
225 {
226 	GIANT_REQUIRED;
227 
228 	if (p == p->p_vmspace->vm_freer)
229 		vmspace_dofree(p->p_vmspace);
230 }
231 
232 /*
233  * vmspace_swap_count() - count the approximate swap useage in pages for a
234  *			  vmspace.
235  *
236  *	Swap useage is determined by taking the proportional swap used by
237  *	VM objects backing the VM map.  To make up for fractional losses,
238  *	if the VM object has any swap use at all the associated map entries
239  *	count for at least 1 swap page.
240  */
241 int
242 vmspace_swap_count(struct vmspace *vmspace)
243 {
244 	vm_map_t map = &vmspace->vm_map;
245 	vm_map_entry_t cur;
246 	int count = 0;
247 
248 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
249 		vm_object_t object;
250 
251 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
252 		    (object = cur->object.vm_object) != NULL &&
253 		    object->type == OBJT_SWAP
254 		) {
255 			int n = (cur->end - cur->start) / PAGE_SIZE;
256 
257 			if (object->un_pager.swp.swp_bcount) {
258 				count += object->un_pager.swp.swp_bcount *
259 				    SWAP_META_PAGES * n / object->size + 1;
260 			}
261 		}
262 	}
263 	return(count);
264 }
265 
266 u_char
267 vm_map_entry_behavior(struct vm_map_entry *entry)
268 {
269 	return entry->eflags & MAP_ENTRY_BEHAV_MASK;
270 }
271 
272 void
273 vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior)
274 {
275 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
276 		(behavior & MAP_ENTRY_BEHAV_MASK);
277 }
278 
279 void
280 vm_map_lock(vm_map_t map)
281 {
282 	vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map);
283 	if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0)
284 		panic("vm_map_lock: failed to get lock");
285 	map->timestamp++;
286 }
287 
288 void
289 vm_map_unlock(vm_map_t map)
290 {
291 	vm_map_printf("locking map LK_RELEASE: %p\n", map);
292 	lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
293 }
294 
295 void
296 vm_map_lock_read(vm_map_t map)
297 {
298 	vm_map_printf("locking map LK_SHARED: %p\n", map);
299 	lockmgr(&(map)->lock, LK_SHARED, NULL, curthread);
300 }
301 
302 void
303 vm_map_unlock_read(vm_map_t map)
304 {
305 	vm_map_printf("locking map LK_RELEASE: %p\n", map);
306 	lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
307 }
308 
309 static __inline__ int
310 _vm_map_lock_upgrade(vm_map_t map, struct thread *td) {
311 	int error;
312 
313 	vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map);
314 	error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td);
315 	if (error == 0)
316 		map->timestamp++;
317 	return error;
318 }
319 
320 int
321 vm_map_lock_upgrade(vm_map_t map)
322 {
323     return(_vm_map_lock_upgrade(map, curthread));
324 }
325 
326 void
327 vm_map_lock_downgrade(vm_map_t map)
328 {
329 	vm_map_printf("locking map LK_DOWNGRADE: %p\n", map);
330 	lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread);
331 }
332 
333 void
334 vm_map_set_recursive(vm_map_t map)
335 {
336 	mtx_lock((map)->lock.lk_interlock);
337 	map->lock.lk_flags |= LK_CANRECURSE;
338 	mtx_unlock((map)->lock.lk_interlock);
339 }
340 
341 void
342 vm_map_clear_recursive(vm_map_t map)
343 {
344 	mtx_lock((map)->lock.lk_interlock);
345 	map->lock.lk_flags &= ~LK_CANRECURSE;
346 	mtx_unlock((map)->lock.lk_interlock);
347 }
348 
349 vm_offset_t
350 vm_map_min(vm_map_t map)
351 {
352 	return(map->min_offset);
353 }
354 
355 vm_offset_t
356 vm_map_max(vm_map_t map)
357 {
358 	return(map->max_offset);
359 }
360 
361 struct pmap *
362 vm_map_pmap(vm_map_t map)
363 {
364 	return(map->pmap);
365 }
366 
367 struct pmap *
368 vmspace_pmap(struct vmspace *vmspace)
369 {
370 	return &vmspace->vm_pmap;
371 }
372 
373 long
374 vmspace_resident_count(struct vmspace *vmspace)
375 {
376 	return pmap_resident_count(vmspace_pmap(vmspace));
377 }
378 
379 /*
380  *	vm_map_create:
381  *
382  *	Creates and returns a new empty VM map with
383  *	the given physical map structure, and having
384  *	the given lower and upper address bounds.
385  */
386 vm_map_t
387 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
388 {
389 	vm_map_t result;
390 
391 	GIANT_REQUIRED;
392 
393 	result = zalloc(mapzone);
394 	CTR1(KTR_VM, "vm_map_create: %p", result);
395 	vm_map_init(result, min, max);
396 	result->pmap = pmap;
397 	return (result);
398 }
399 
400 /*
401  * Initialize an existing vm_map structure
402  * such as that in the vmspace structure.
403  * The pmap is set elsewhere.
404  */
405 void
406 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
407 {
408 	GIANT_REQUIRED;
409 
410 	map->header.next = map->header.prev = &map->header;
411 	map->nentries = 0;
412 	map->size = 0;
413 	map->system_map = 0;
414 	map->infork = 0;
415 	map->min_offset = min;
416 	map->max_offset = max;
417 	map->first_free = &map->header;
418 	map->hint = &map->header;
419 	map->timestamp = 0;
420 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
421 }
422 
423 void
424 vm_map_destroy(map)
425 	struct vm_map *map;
426 {
427 	GIANT_REQUIRED;
428 	lockdestroy(&map->lock);
429 }
430 
431 /*
432  *	vm_map_entry_dispose:	[ internal use only ]
433  *
434  *	Inverse of vm_map_entry_create.
435  */
436 static void
437 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
438 {
439 	zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
440 }
441 
442 /*
443  *	vm_map_entry_create:	[ internal use only ]
444  *
445  *	Allocates a VM map entry for insertion.
446  *	No entry fields are filled in.
447  */
448 static vm_map_entry_t
449 vm_map_entry_create(vm_map_t map)
450 {
451 	vm_map_entry_t new_entry;
452 
453 	new_entry = zalloc((map->system_map || !mapentzone) ?
454 		kmapentzone : mapentzone);
455 	if (new_entry == NULL)
456 	    panic("vm_map_entry_create: kernel resources exhausted");
457 	return(new_entry);
458 }
459 
460 /*
461  *	vm_map_entry_{un,}link:
462  *
463  *	Insert/remove entries from maps.
464  */
465 static __inline void
466 vm_map_entry_link(vm_map_t map,
467 		  vm_map_entry_t after_where,
468 		  vm_map_entry_t entry)
469 {
470 
471 	CTR4(KTR_VM,
472 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
473 	    map->nentries, entry, after_where);
474 	map->nentries++;
475 	entry->prev = after_where;
476 	entry->next = after_where->next;
477 	entry->next->prev = entry;
478 	after_where->next = entry;
479 }
480 
481 static __inline void
482 vm_map_entry_unlink(vm_map_t map,
483 		    vm_map_entry_t entry)
484 {
485 	vm_map_entry_t prev = entry->prev;
486 	vm_map_entry_t next = entry->next;
487 
488 	next->prev = prev;
489 	prev->next = next;
490 	map->nentries--;
491 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
492 	    map->nentries, entry);
493 }
494 
495 /*
496  *	SAVE_HINT:
497  *
498  *	Saves the specified entry as the hint for
499  *	future lookups.
500  */
501 #define	SAVE_HINT(map,value) \
502 		(map)->hint = (value);
503 
504 /*
505  *	vm_map_lookup_entry:	[ internal use only ]
506  *
507  *	Finds the map entry containing (or
508  *	immediately preceding) the specified address
509  *	in the given map; the entry is returned
510  *	in the "entry" parameter.  The boolean
511  *	result indicates whether the address is
512  *	actually contained in the map.
513  */
514 boolean_t
515 vm_map_lookup_entry(
516 	vm_map_t map,
517 	vm_offset_t address,
518 	vm_map_entry_t *entry)	/* OUT */
519 {
520 	vm_map_entry_t cur;
521 	vm_map_entry_t last;
522 
523 	GIANT_REQUIRED;
524 	/*
525 	 * Start looking either from the head of the list, or from the hint.
526 	 */
527 
528 	cur = map->hint;
529 
530 	if (cur == &map->header)
531 		cur = cur->next;
532 
533 	if (address >= cur->start) {
534 		/*
535 		 * Go from hint to end of list.
536 		 *
537 		 * But first, make a quick check to see if we are already looking
538 		 * at the entry we want (which is usually the case). Note also
539 		 * that we don't need to save the hint here... it is the same
540 		 * hint (unless we are at the header, in which case the hint
541 		 * didn't buy us anything anyway).
542 		 */
543 		last = &map->header;
544 		if ((cur != last) && (cur->end > address)) {
545 			*entry = cur;
546 			return (TRUE);
547 		}
548 	} else {
549 		/*
550 		 * Go from start to hint, *inclusively*
551 		 */
552 		last = cur->next;
553 		cur = map->header.next;
554 	}
555 
556 	/*
557 	 * Search linearly
558 	 */
559 
560 	while (cur != last) {
561 		if (cur->end > address) {
562 			if (address >= cur->start) {
563 				/*
564 				 * Save this lookup for future hints, and
565 				 * return
566 				 */
567 
568 				*entry = cur;
569 				SAVE_HINT(map, cur);
570 				return (TRUE);
571 			}
572 			break;
573 		}
574 		cur = cur->next;
575 	}
576 	*entry = cur->prev;
577 	SAVE_HINT(map, *entry);
578 	return (FALSE);
579 }
580 
581 /*
582  *	vm_map_insert:
583  *
584  *	Inserts the given whole VM object into the target
585  *	map at the specified address range.  The object's
586  *	size should match that of the address range.
587  *
588  *	Requires that the map be locked, and leaves it so.
589  *
590  *	If object is non-NULL, ref count must be bumped by caller
591  *	prior to making call to account for the new entry.
592  */
593 int
594 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
595 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
596 	      int cow)
597 {
598 	vm_map_entry_t new_entry;
599 	vm_map_entry_t prev_entry;
600 	vm_map_entry_t temp_entry;
601 	vm_eflags_t protoeflags;
602 
603 	GIANT_REQUIRED;
604 
605 	/*
606 	 * Check that the start and end points are not bogus.
607 	 */
608 
609 	if ((start < map->min_offset) || (end > map->max_offset) ||
610 	    (start >= end))
611 		return (KERN_INVALID_ADDRESS);
612 
613 	/*
614 	 * Find the entry prior to the proposed starting address; if it's part
615 	 * of an existing entry, this range is bogus.
616 	 */
617 
618 	if (vm_map_lookup_entry(map, start, &temp_entry))
619 		return (KERN_NO_SPACE);
620 
621 	prev_entry = temp_entry;
622 
623 	/*
624 	 * Assert that the next entry doesn't overlap the end point.
625 	 */
626 
627 	if ((prev_entry->next != &map->header) &&
628 	    (prev_entry->next->start < end))
629 		return (KERN_NO_SPACE);
630 
631 	protoeflags = 0;
632 
633 	if (cow & MAP_COPY_ON_WRITE)
634 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
635 
636 	if (cow & MAP_NOFAULT) {
637 		protoeflags |= MAP_ENTRY_NOFAULT;
638 
639 		KASSERT(object == NULL,
640 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
641 	}
642 	if (cow & MAP_DISABLE_SYNCER)
643 		protoeflags |= MAP_ENTRY_NOSYNC;
644 	if (cow & MAP_DISABLE_COREDUMP)
645 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
646 
647 	if (object) {
648 		/*
649 		 * When object is non-NULL, it could be shared with another
650 		 * process.  We have to set or clear OBJ_ONEMAPPING
651 		 * appropriately.
652 		 */
653 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
654 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
655 		}
656 	}
657 	else if ((prev_entry != &map->header) &&
658 		 (prev_entry->eflags == protoeflags) &&
659 		 (prev_entry->end == start) &&
660 		 (prev_entry->wired_count == 0) &&
661 		 ((prev_entry->object.vm_object == NULL) ||
662 		  vm_object_coalesce(prev_entry->object.vm_object,
663 				     OFF_TO_IDX(prev_entry->offset),
664 				     (vm_size_t)(prev_entry->end - prev_entry->start),
665 				     (vm_size_t)(end - prev_entry->end)))) {
666 		/*
667 		 * We were able to extend the object.  Determine if we
668 		 * can extend the previous map entry to include the
669 		 * new range as well.
670 		 */
671 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
672 		    (prev_entry->protection == prot) &&
673 		    (prev_entry->max_protection == max)) {
674 			map->size += (end - prev_entry->end);
675 			prev_entry->end = end;
676 			vm_map_simplify_entry(map, prev_entry);
677 			return (KERN_SUCCESS);
678 		}
679 
680 		/*
681 		 * If we can extend the object but cannot extend the
682 		 * map entry, we have to create a new map entry.  We
683 		 * must bump the ref count on the extended object to
684 		 * account for it.  object may be NULL.
685 		 */
686 		object = prev_entry->object.vm_object;
687 		offset = prev_entry->offset +
688 			(prev_entry->end - prev_entry->start);
689 		vm_object_reference(object);
690 	}
691 
692 	/*
693 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
694 	 * in things like the buffer map where we manage kva but do not manage
695 	 * backing objects.
696 	 */
697 
698 	/*
699 	 * Create a new entry
700 	 */
701 
702 	new_entry = vm_map_entry_create(map);
703 	new_entry->start = start;
704 	new_entry->end = end;
705 
706 	new_entry->eflags = protoeflags;
707 	new_entry->object.vm_object = object;
708 	new_entry->offset = offset;
709 	new_entry->avail_ssize = 0;
710 
711 	new_entry->inheritance = VM_INHERIT_DEFAULT;
712 	new_entry->protection = prot;
713 	new_entry->max_protection = max;
714 	new_entry->wired_count = 0;
715 
716 	/*
717 	 * Insert the new entry into the list
718 	 */
719 
720 	vm_map_entry_link(map, prev_entry, new_entry);
721 	map->size += new_entry->end - new_entry->start;
722 
723 	/*
724 	 * Update the free space hint
725 	 */
726 	if ((map->first_free == prev_entry) &&
727 	    (prev_entry->end >= new_entry->start)) {
728 		map->first_free = new_entry;
729 	}
730 
731 #if 0
732 	/*
733 	 * Temporarily removed to avoid MAP_STACK panic, due to
734 	 * MAP_STACK being a huge hack.  Will be added back in
735 	 * when MAP_STACK (and the user stack mapping) is fixed.
736 	 */
737 	/*
738 	 * It may be possible to simplify the entry
739 	 */
740 	vm_map_simplify_entry(map, new_entry);
741 #endif
742 
743 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
744 		pmap_object_init_pt(map->pmap, start,
745 				    object, OFF_TO_IDX(offset), end - start,
746 				    cow & MAP_PREFAULT_PARTIAL);
747 	}
748 
749 	return (KERN_SUCCESS);
750 }
751 
752 /*
753  * Find sufficient space for `length' bytes in the given map, starting at
754  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
755  */
756 int
757 vm_map_findspace(
758 	vm_map_t map,
759 	vm_offset_t start,
760 	vm_size_t length,
761 	vm_offset_t *addr)
762 {
763 	vm_map_entry_t entry, next;
764 	vm_offset_t end;
765 
766 	GIANT_REQUIRED;
767 	if (start < map->min_offset)
768 		start = map->min_offset;
769 	if (start > map->max_offset)
770 		return (1);
771 
772 	/*
773 	 * Look for the first possible address; if there's already something
774 	 * at this address, we have to start after it.
775 	 */
776 	if (start == map->min_offset) {
777 		if ((entry = map->first_free) != &map->header)
778 			start = entry->end;
779 	} else {
780 		vm_map_entry_t tmp;
781 
782 		if (vm_map_lookup_entry(map, start, &tmp))
783 			start = tmp->end;
784 		entry = tmp;
785 	}
786 
787 	/*
788 	 * Look through the rest of the map, trying to fit a new region in the
789 	 * gap between existing regions, or after the very last region.
790 	 */
791 	for (;; start = (entry = next)->end) {
792 		/*
793 		 * Find the end of the proposed new region.  Be sure we didn't
794 		 * go beyond the end of the map, or wrap around the address;
795 		 * if so, we lose.  Otherwise, if this is the last entry, or
796 		 * if the proposed new region fits before the next entry, we
797 		 * win.
798 		 */
799 		end = start + length;
800 		if (end > map->max_offset || end < start)
801 			return (1);
802 		next = entry->next;
803 		if (next == &map->header || next->start >= end)
804 			break;
805 	}
806 	SAVE_HINT(map, entry);
807 	*addr = start;
808 	if (map == kernel_map) {
809 		vm_offset_t ksize;
810 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
811 			pmap_growkernel(ksize);
812 		}
813 	}
814 	return (0);
815 }
816 
817 /*
818  *	vm_map_find finds an unallocated region in the target address
819  *	map with the given length.  The search is defined to be
820  *	first-fit from the specified address; the region found is
821  *	returned in the same parameter.
822  *
823  *	If object is non-NULL, ref count must be bumped by caller
824  *	prior to making call to account for the new entry.
825  */
826 int
827 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
828 	    vm_offset_t *addr,	/* IN/OUT */
829 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
830 	    vm_prot_t max, int cow)
831 {
832 	vm_offset_t start;
833 	int result, s = 0;
834 
835 	GIANT_REQUIRED;
836 
837 	start = *addr;
838 
839 	if (map == kmem_map)
840 		s = splvm();
841 
842 	vm_map_lock(map);
843 	if (find_space) {
844 		if (vm_map_findspace(map, start, length, addr)) {
845 			vm_map_unlock(map);
846 			if (map == kmem_map)
847 				splx(s);
848 			return (KERN_NO_SPACE);
849 		}
850 		start = *addr;
851 	}
852 	result = vm_map_insert(map, object, offset,
853 		start, start + length, prot, max, cow);
854 	vm_map_unlock(map);
855 
856 	if (map == kmem_map)
857 		splx(s);
858 
859 	return (result);
860 }
861 
862 /*
863  *	vm_map_simplify_entry:
864  *
865  *	Simplify the given map entry by merging with either neighbor.  This
866  *	routine also has the ability to merge with both neighbors.
867  *
868  *	The map must be locked.
869  *
870  *	This routine guarentees that the passed entry remains valid (though
871  *	possibly extended).  When merging, this routine may delete one or
872  *	both neighbors.
873  */
874 void
875 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
876 {
877 	vm_map_entry_t next, prev;
878 	vm_size_t prevsize, esize;
879 
880 	GIANT_REQUIRED;
881 
882 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
883 		return;
884 
885 	prev = entry->prev;
886 	if (prev != &map->header) {
887 		prevsize = prev->end - prev->start;
888 		if ( (prev->end == entry->start) &&
889 		     (prev->object.vm_object == entry->object.vm_object) &&
890 		     (!prev->object.vm_object ||
891 			(prev->offset + prevsize == entry->offset)) &&
892 		     (prev->eflags == entry->eflags) &&
893 		     (prev->protection == entry->protection) &&
894 		     (prev->max_protection == entry->max_protection) &&
895 		     (prev->inheritance == entry->inheritance) &&
896 		     (prev->wired_count == entry->wired_count)) {
897 			if (map->first_free == prev)
898 				map->first_free = entry;
899 			if (map->hint == prev)
900 				map->hint = entry;
901 			vm_map_entry_unlink(map, prev);
902 			entry->start = prev->start;
903 			entry->offset = prev->offset;
904 			if (prev->object.vm_object)
905 				vm_object_deallocate(prev->object.vm_object);
906 			vm_map_entry_dispose(map, prev);
907 		}
908 	}
909 
910 	next = entry->next;
911 	if (next != &map->header) {
912 		esize = entry->end - entry->start;
913 		if ((entry->end == next->start) &&
914 		    (next->object.vm_object == entry->object.vm_object) &&
915 		     (!entry->object.vm_object ||
916 			(entry->offset + esize == next->offset)) &&
917 		    (next->eflags == entry->eflags) &&
918 		    (next->protection == entry->protection) &&
919 		    (next->max_protection == entry->max_protection) &&
920 		    (next->inheritance == entry->inheritance) &&
921 		    (next->wired_count == entry->wired_count)) {
922 			if (map->first_free == next)
923 				map->first_free = entry;
924 			if (map->hint == next)
925 				map->hint = entry;
926 			vm_map_entry_unlink(map, next);
927 			entry->end = next->end;
928 			if (next->object.vm_object)
929 				vm_object_deallocate(next->object.vm_object);
930 			vm_map_entry_dispose(map, next);
931 	        }
932 	}
933 }
934 /*
935  *	vm_map_clip_start:	[ internal use only ]
936  *
937  *	Asserts that the given entry begins at or after
938  *	the specified address; if necessary,
939  *	it splits the entry into two.
940  */
941 #define vm_map_clip_start(map, entry, startaddr) \
942 { \
943 	if (startaddr > entry->start) \
944 		_vm_map_clip_start(map, entry, startaddr); \
945 }
946 
947 /*
948  *	This routine is called only when it is known that
949  *	the entry must be split.
950  */
951 static void
952 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
953 {
954 	vm_map_entry_t new_entry;
955 
956 	/*
957 	 * Split off the front portion -- note that we must insert the new
958 	 * entry BEFORE this one, so that this entry has the specified
959 	 * starting address.
960 	 */
961 
962 	vm_map_simplify_entry(map, entry);
963 
964 	/*
965 	 * If there is no object backing this entry, we might as well create
966 	 * one now.  If we defer it, an object can get created after the map
967 	 * is clipped, and individual objects will be created for the split-up
968 	 * map.  This is a bit of a hack, but is also about the best place to
969 	 * put this improvement.
970 	 */
971 
972 	if (entry->object.vm_object == NULL && !map->system_map) {
973 		vm_object_t object;
974 		object = vm_object_allocate(OBJT_DEFAULT,
975 				atop(entry->end - entry->start));
976 		entry->object.vm_object = object;
977 		entry->offset = 0;
978 	}
979 
980 	new_entry = vm_map_entry_create(map);
981 	*new_entry = *entry;
982 
983 	new_entry->end = start;
984 	entry->offset += (start - entry->start);
985 	entry->start = start;
986 
987 	vm_map_entry_link(map, entry->prev, new_entry);
988 
989 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
990 		vm_object_reference(new_entry->object.vm_object);
991 	}
992 }
993 
994 /*
995  *	vm_map_clip_end:	[ internal use only ]
996  *
997  *	Asserts that the given entry ends at or before
998  *	the specified address; if necessary,
999  *	it splits the entry into two.
1000  */
1001 
1002 #define vm_map_clip_end(map, entry, endaddr) \
1003 { \
1004 	if (endaddr < entry->end) \
1005 		_vm_map_clip_end(map, entry, endaddr); \
1006 }
1007 
1008 /*
1009  *	This routine is called only when it is known that
1010  *	the entry must be split.
1011  */
1012 static void
1013 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1014 {
1015 	vm_map_entry_t new_entry;
1016 
1017 	/*
1018 	 * If there is no object backing this entry, we might as well create
1019 	 * one now.  If we defer it, an object can get created after the map
1020 	 * is clipped, and individual objects will be created for the split-up
1021 	 * map.  This is a bit of a hack, but is also about the best place to
1022 	 * put this improvement.
1023 	 */
1024 
1025 	if (entry->object.vm_object == NULL && !map->system_map) {
1026 		vm_object_t object;
1027 		object = vm_object_allocate(OBJT_DEFAULT,
1028 				atop(entry->end - entry->start));
1029 		entry->object.vm_object = object;
1030 		entry->offset = 0;
1031 	}
1032 
1033 	/*
1034 	 * Create a new entry and insert it AFTER the specified entry
1035 	 */
1036 
1037 	new_entry = vm_map_entry_create(map);
1038 	*new_entry = *entry;
1039 
1040 	new_entry->start = entry->end = end;
1041 	new_entry->offset += (end - entry->start);
1042 
1043 	vm_map_entry_link(map, entry, new_entry);
1044 
1045 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1046 		vm_object_reference(new_entry->object.vm_object);
1047 	}
1048 }
1049 
1050 /*
1051  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1052  *
1053  *	Asserts that the starting and ending region
1054  *	addresses fall within the valid range of the map.
1055  */
1056 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1057 		{					\
1058 		if (start < vm_map_min(map))		\
1059 			start = vm_map_min(map);	\
1060 		if (end > vm_map_max(map))		\
1061 			end = vm_map_max(map);		\
1062 		if (start > end)			\
1063 			start = end;			\
1064 		}
1065 
1066 /*
1067  *	vm_map_submap:		[ kernel use only ]
1068  *
1069  *	Mark the given range as handled by a subordinate map.
1070  *
1071  *	This range must have been created with vm_map_find,
1072  *	and no other operations may have been performed on this
1073  *	range prior to calling vm_map_submap.
1074  *
1075  *	Only a limited number of operations can be performed
1076  *	within this rage after calling vm_map_submap:
1077  *		vm_fault
1078  *	[Don't try vm_map_copy!]
1079  *
1080  *	To remove a submapping, one must first remove the
1081  *	range from the superior map, and then destroy the
1082  *	submap (if desired).  [Better yet, don't try it.]
1083  */
1084 int
1085 vm_map_submap(
1086 	vm_map_t map,
1087 	vm_offset_t start,
1088 	vm_offset_t end,
1089 	vm_map_t submap)
1090 {
1091 	vm_map_entry_t entry;
1092 	int result = KERN_INVALID_ARGUMENT;
1093 
1094 	GIANT_REQUIRED;
1095 
1096 	vm_map_lock(map);
1097 
1098 	VM_MAP_RANGE_CHECK(map, start, end);
1099 
1100 	if (vm_map_lookup_entry(map, start, &entry)) {
1101 		vm_map_clip_start(map, entry, start);
1102 	} else
1103 		entry = entry->next;
1104 
1105 	vm_map_clip_end(map, entry, end);
1106 
1107 	if ((entry->start == start) && (entry->end == end) &&
1108 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1109 	    (entry->object.vm_object == NULL)) {
1110 		entry->object.sub_map = submap;
1111 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1112 		result = KERN_SUCCESS;
1113 	}
1114 	vm_map_unlock(map);
1115 
1116 	return (result);
1117 }
1118 
1119 /*
1120  *	vm_map_protect:
1121  *
1122  *	Sets the protection of the specified address
1123  *	region in the target map.  If "set_max" is
1124  *	specified, the maximum protection is to be set;
1125  *	otherwise, only the current protection is affected.
1126  */
1127 int
1128 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1129 	       vm_prot_t new_prot, boolean_t set_max)
1130 {
1131 	vm_map_entry_t current;
1132 	vm_map_entry_t entry;
1133 
1134 	GIANT_REQUIRED;
1135 	vm_map_lock(map);
1136 
1137 	VM_MAP_RANGE_CHECK(map, start, end);
1138 
1139 	if (vm_map_lookup_entry(map, start, &entry)) {
1140 		vm_map_clip_start(map, entry, start);
1141 	} else {
1142 		entry = entry->next;
1143 	}
1144 
1145 	/*
1146 	 * Make a first pass to check for protection violations.
1147 	 */
1148 
1149 	current = entry;
1150 	while ((current != &map->header) && (current->start < end)) {
1151 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1152 			vm_map_unlock(map);
1153 			return (KERN_INVALID_ARGUMENT);
1154 		}
1155 		if ((new_prot & current->max_protection) != new_prot) {
1156 			vm_map_unlock(map);
1157 			return (KERN_PROTECTION_FAILURE);
1158 		}
1159 		current = current->next;
1160 	}
1161 
1162 	/*
1163 	 * Go back and fix up protections. [Note that clipping is not
1164 	 * necessary the second time.]
1165 	 */
1166 
1167 	current = entry;
1168 
1169 	while ((current != &map->header) && (current->start < end)) {
1170 		vm_prot_t old_prot;
1171 
1172 		vm_map_clip_end(map, current, end);
1173 
1174 		old_prot = current->protection;
1175 		if (set_max)
1176 			current->protection =
1177 			    (current->max_protection = new_prot) &
1178 			    old_prot;
1179 		else
1180 			current->protection = new_prot;
1181 
1182 		/*
1183 		 * Update physical map if necessary. Worry about copy-on-write
1184 		 * here -- CHECK THIS XXX
1185 		 */
1186 
1187 		if (current->protection != old_prot) {
1188 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1189 							VM_PROT_ALL)
1190 
1191 			pmap_protect(map->pmap, current->start,
1192 			    current->end,
1193 			    current->protection & MASK(current));
1194 #undef	MASK
1195 		}
1196 
1197 		vm_map_simplify_entry(map, current);
1198 
1199 		current = current->next;
1200 	}
1201 
1202 	vm_map_unlock(map);
1203 	return (KERN_SUCCESS);
1204 }
1205 
1206 /*
1207  *	vm_map_madvise:
1208  *
1209  * 	This routine traverses a processes map handling the madvise
1210  *	system call.  Advisories are classified as either those effecting
1211  *	the vm_map_entry structure, or those effecting the underlying
1212  *	objects.
1213  */
1214 
1215 int
1216 vm_map_madvise(
1217 	vm_map_t map,
1218 	vm_offset_t start,
1219 	vm_offset_t end,
1220 	int behav)
1221 {
1222 	vm_map_entry_t current, entry;
1223 	int modify_map = 0;
1224 
1225 	GIANT_REQUIRED;
1226 
1227 	/*
1228 	 * Some madvise calls directly modify the vm_map_entry, in which case
1229 	 * we need to use an exclusive lock on the map and we need to perform
1230 	 * various clipping operations.  Otherwise we only need a read-lock
1231 	 * on the map.
1232 	 */
1233 
1234 	switch(behav) {
1235 	case MADV_NORMAL:
1236 	case MADV_SEQUENTIAL:
1237 	case MADV_RANDOM:
1238 	case MADV_NOSYNC:
1239 	case MADV_AUTOSYNC:
1240 	case MADV_NOCORE:
1241 	case MADV_CORE:
1242 		modify_map = 1;
1243 		vm_map_lock(map);
1244 		break;
1245 	case MADV_WILLNEED:
1246 	case MADV_DONTNEED:
1247 	case MADV_FREE:
1248 		vm_map_lock_read(map);
1249 		break;
1250 	default:
1251 		return (KERN_INVALID_ARGUMENT);
1252 	}
1253 
1254 	/*
1255 	 * Locate starting entry and clip if necessary.
1256 	 */
1257 
1258 	VM_MAP_RANGE_CHECK(map, start, end);
1259 
1260 	if (vm_map_lookup_entry(map, start, &entry)) {
1261 		if (modify_map)
1262 			vm_map_clip_start(map, entry, start);
1263 	} else {
1264 		entry = entry->next;
1265 	}
1266 
1267 	if (modify_map) {
1268 		/*
1269 		 * madvise behaviors that are implemented in the vm_map_entry.
1270 		 *
1271 		 * We clip the vm_map_entry so that behavioral changes are
1272 		 * limited to the specified address range.
1273 		 */
1274 		for (current = entry;
1275 		     (current != &map->header) && (current->start < end);
1276 		     current = current->next
1277 		) {
1278 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1279 				continue;
1280 
1281 			vm_map_clip_end(map, current, end);
1282 
1283 			switch (behav) {
1284 			case MADV_NORMAL:
1285 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1286 				break;
1287 			case MADV_SEQUENTIAL:
1288 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1289 				break;
1290 			case MADV_RANDOM:
1291 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1292 				break;
1293 			case MADV_NOSYNC:
1294 				current->eflags |= MAP_ENTRY_NOSYNC;
1295 				break;
1296 			case MADV_AUTOSYNC:
1297 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1298 				break;
1299 			case MADV_NOCORE:
1300 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1301 				break;
1302 			case MADV_CORE:
1303 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1304 				break;
1305 			default:
1306 				break;
1307 			}
1308 			vm_map_simplify_entry(map, current);
1309 		}
1310 		vm_map_unlock(map);
1311 	} else {
1312 		vm_pindex_t pindex;
1313 		int count;
1314 
1315 		/*
1316 		 * madvise behaviors that are implemented in the underlying
1317 		 * vm_object.
1318 		 *
1319 		 * Since we don't clip the vm_map_entry, we have to clip
1320 		 * the vm_object pindex and count.
1321 		 */
1322 		for (current = entry;
1323 		     (current != &map->header) && (current->start < end);
1324 		     current = current->next
1325 		) {
1326 			vm_offset_t useStart;
1327 
1328 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1329 				continue;
1330 
1331 			pindex = OFF_TO_IDX(current->offset);
1332 			count = atop(current->end - current->start);
1333 			useStart = current->start;
1334 
1335 			if (current->start < start) {
1336 				pindex += atop(start - current->start);
1337 				count -= atop(start - current->start);
1338 				useStart = start;
1339 			}
1340 			if (current->end > end)
1341 				count -= atop(current->end - end);
1342 
1343 			if (count <= 0)
1344 				continue;
1345 
1346 			vm_object_madvise(current->object.vm_object,
1347 					  pindex, count, behav);
1348 			if (behav == MADV_WILLNEED) {
1349 				pmap_object_init_pt(
1350 				    map->pmap,
1351 				    useStart,
1352 				    current->object.vm_object,
1353 				    pindex,
1354 				    (count << PAGE_SHIFT),
1355 				    MAP_PREFAULT_MADVISE
1356 				);
1357 			}
1358 		}
1359 		vm_map_unlock_read(map);
1360 	}
1361 	return(0);
1362 }
1363 
1364 
1365 /*
1366  *	vm_map_inherit:
1367  *
1368  *	Sets the inheritance of the specified address
1369  *	range in the target map.  Inheritance
1370  *	affects how the map will be shared with
1371  *	child maps at the time of vm_map_fork.
1372  */
1373 int
1374 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1375 	       vm_inherit_t new_inheritance)
1376 {
1377 	vm_map_entry_t entry;
1378 	vm_map_entry_t temp_entry;
1379 
1380 	GIANT_REQUIRED;
1381 
1382 	switch (new_inheritance) {
1383 	case VM_INHERIT_NONE:
1384 	case VM_INHERIT_COPY:
1385 	case VM_INHERIT_SHARE:
1386 		break;
1387 	default:
1388 		return (KERN_INVALID_ARGUMENT);
1389 	}
1390 
1391 	vm_map_lock(map);
1392 
1393 	VM_MAP_RANGE_CHECK(map, start, end);
1394 
1395 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1396 		entry = temp_entry;
1397 		vm_map_clip_start(map, entry, start);
1398 	} else
1399 		entry = temp_entry->next;
1400 
1401 	while ((entry != &map->header) && (entry->start < end)) {
1402 		vm_map_clip_end(map, entry, end);
1403 
1404 		entry->inheritance = new_inheritance;
1405 
1406 		vm_map_simplify_entry(map, entry);
1407 
1408 		entry = entry->next;
1409 	}
1410 
1411 	vm_map_unlock(map);
1412 	return (KERN_SUCCESS);
1413 }
1414 
1415 /*
1416  * Implement the semantics of mlock
1417  */
1418 int
1419 vm_map_user_pageable(
1420 	vm_map_t map,
1421 	vm_offset_t start,
1422 	vm_offset_t end,
1423 	boolean_t new_pageable)
1424 {
1425 	vm_map_entry_t entry;
1426 	vm_map_entry_t start_entry;
1427 	vm_offset_t estart;
1428 	vm_offset_t eend;
1429 	int rv;
1430 
1431 	vm_map_lock(map);
1432 	VM_MAP_RANGE_CHECK(map, start, end);
1433 
1434 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1435 		vm_map_unlock(map);
1436 		return (KERN_INVALID_ADDRESS);
1437 	}
1438 
1439 	if (new_pageable) {
1440 
1441 		entry = start_entry;
1442 		vm_map_clip_start(map, entry, start);
1443 
1444 		/*
1445 		 * Now decrement the wiring count for each region. If a region
1446 		 * becomes completely unwired, unwire its physical pages and
1447 		 * mappings.
1448 		 */
1449 		while ((entry != &map->header) && (entry->start < end)) {
1450 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1451 				vm_map_clip_end(map, entry, end);
1452 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1453 				entry->wired_count--;
1454 				if (entry->wired_count == 0)
1455 					vm_fault_unwire(map, entry->start, entry->end);
1456 			}
1457 			vm_map_simplify_entry(map,entry);
1458 			entry = entry->next;
1459 		}
1460 	} else {
1461 
1462 		entry = start_entry;
1463 
1464 		while ((entry != &map->header) && (entry->start < end)) {
1465 
1466 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1467 				entry = entry->next;
1468 				continue;
1469 			}
1470 
1471 			if (entry->wired_count != 0) {
1472 				entry->wired_count++;
1473 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1474 				entry = entry->next;
1475 				continue;
1476 			}
1477 
1478 			/* Here on entry being newly wired */
1479 
1480 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1481 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1482 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1483 
1484 					vm_object_shadow(&entry->object.vm_object,
1485 					    &entry->offset,
1486 					    atop(entry->end - entry->start));
1487 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1488 
1489 				} else if (entry->object.vm_object == NULL &&
1490 					   !map->system_map) {
1491 
1492 					entry->object.vm_object =
1493 					    vm_object_allocate(OBJT_DEFAULT,
1494 						atop(entry->end - entry->start));
1495 					entry->offset = (vm_offset_t) 0;
1496 
1497 				}
1498 			}
1499 
1500 			vm_map_clip_start(map, entry, start);
1501 			vm_map_clip_end(map, entry, end);
1502 
1503 			entry->wired_count++;
1504 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1505 			estart = entry->start;
1506 			eend = entry->end;
1507 
1508 			/* First we need to allow map modifications */
1509 			vm_map_set_recursive(map);
1510 			vm_map_lock_downgrade(map);
1511 			map->timestamp++;
1512 
1513 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1514 			if (rv) {
1515 
1516 				entry->wired_count--;
1517 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1518 
1519 				vm_map_clear_recursive(map);
1520 				vm_map_unlock(map);
1521 
1522 				/*
1523 				 * At this point, the map is unlocked, and
1524 				 * entry might no longer be valid.  Use copy
1525 				 * of entry start value obtained while entry
1526 				 * was valid.
1527 				 */
1528 				(void) vm_map_user_pageable(map, start, estart,
1529 							    TRUE);
1530 				return rv;
1531 			}
1532 
1533 			vm_map_clear_recursive(map);
1534 			if (vm_map_lock_upgrade(map)) {
1535 				vm_map_lock(map);
1536 				if (vm_map_lookup_entry(map, estart, &entry)
1537 				    == FALSE) {
1538 					vm_map_unlock(map);
1539 					/*
1540 					 * vm_fault_user_wire succeded, thus
1541 					 * the area between start and eend
1542 					 * is wired and has to be unwired
1543 					 * here as part of the cleanup.
1544 					 */
1545 					(void) vm_map_user_pageable(map,
1546 								    start,
1547 								    eend,
1548 								    TRUE);
1549 					return (KERN_INVALID_ADDRESS);
1550 				}
1551 			}
1552 			vm_map_simplify_entry(map,entry);
1553 		}
1554 	}
1555 	map->timestamp++;
1556 	vm_map_unlock(map);
1557 	return KERN_SUCCESS;
1558 }
1559 
1560 /*
1561  *	vm_map_pageable:
1562  *
1563  *	Sets the pageability of the specified address
1564  *	range in the target map.  Regions specified
1565  *	as not pageable require locked-down physical
1566  *	memory and physical page maps.
1567  *
1568  *	The map must not be locked, but a reference
1569  *	must remain to the map throughout the call.
1570  */
1571 int
1572 vm_map_pageable(
1573 	vm_map_t map,
1574 	vm_offset_t start,
1575 	vm_offset_t end,
1576 	boolean_t new_pageable)
1577 {
1578 	vm_map_entry_t entry;
1579 	vm_map_entry_t start_entry;
1580 	vm_offset_t failed = 0;
1581 	int rv;
1582 
1583 	GIANT_REQUIRED;
1584 
1585 	vm_map_lock(map);
1586 
1587 	VM_MAP_RANGE_CHECK(map, start, end);
1588 
1589 	/*
1590 	 * Only one pageability change may take place at one time, since
1591 	 * vm_fault assumes it will be called only once for each
1592 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1593 	 * changing the pageability for the entire region.  We do so before
1594 	 * making any changes.
1595 	 */
1596 
1597 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1598 		vm_map_unlock(map);
1599 		return (KERN_INVALID_ADDRESS);
1600 	}
1601 	entry = start_entry;
1602 
1603 	/*
1604 	 * Actions are rather different for wiring and unwiring, so we have
1605 	 * two separate cases.
1606 	 */
1607 
1608 	if (new_pageable) {
1609 
1610 		vm_map_clip_start(map, entry, start);
1611 
1612 		/*
1613 		 * Unwiring.  First ensure that the range to be unwired is
1614 		 * really wired down and that there are no holes.
1615 		 */
1616 		while ((entry != &map->header) && (entry->start < end)) {
1617 
1618 			if (entry->wired_count == 0 ||
1619 			    (entry->end < end &&
1620 				(entry->next == &map->header ||
1621 				    entry->next->start > entry->end))) {
1622 				vm_map_unlock(map);
1623 				return (KERN_INVALID_ARGUMENT);
1624 			}
1625 			entry = entry->next;
1626 		}
1627 
1628 		/*
1629 		 * Now decrement the wiring count for each region. If a region
1630 		 * becomes completely unwired, unwire its physical pages and
1631 		 * mappings.
1632 		 */
1633 		entry = start_entry;
1634 		while ((entry != &map->header) && (entry->start < end)) {
1635 			vm_map_clip_end(map, entry, end);
1636 
1637 			entry->wired_count--;
1638 			if (entry->wired_count == 0)
1639 				vm_fault_unwire(map, entry->start, entry->end);
1640 
1641 			vm_map_simplify_entry(map, entry);
1642 
1643 			entry = entry->next;
1644 		}
1645 	} else {
1646 		/*
1647 		 * Wiring.  We must do this in two passes:
1648 		 *
1649 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1650 		 * objects that need to be created. Then we clip each map
1651 		 * entry to the region to be wired and increment its wiring
1652 		 * count.  We create objects before clipping the map entries
1653 		 * to avoid object proliferation.
1654 		 *
1655 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1656 		 * fault in the pages for any newly wired area (wired_count is
1657 		 * 1).
1658 		 *
1659 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1660 		 * deadlock with another process that may have faulted on one
1661 		 * of the pages to be wired (it would mark the page busy,
1662 		 * blocking us, then in turn block on the map lock that we
1663 		 * hold).  Because of problems in the recursive lock package,
1664 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1665 		 * any actions that require the write lock must be done
1666 		 * beforehand.  Because we keep the read lock on the map, the
1667 		 * copy-on-write status of the entries we modify here cannot
1668 		 * change.
1669 		 */
1670 
1671 		/*
1672 		 * Pass 1.
1673 		 */
1674 		while ((entry != &map->header) && (entry->start < end)) {
1675 			if (entry->wired_count == 0) {
1676 
1677 				/*
1678 				 * Perform actions of vm_map_lookup that need
1679 				 * the write lock on the map: create a shadow
1680 				 * object for a copy-on-write region, or an
1681 				 * object for a zero-fill region.
1682 				 *
1683 				 * We don't have to do this for entries that
1684 				 * point to sub maps, because we won't
1685 				 * hold the lock on the sub map.
1686 				 */
1687 				if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1688 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1689 					if (copyflag &&
1690 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1691 
1692 						vm_object_shadow(&entry->object.vm_object,
1693 						    &entry->offset,
1694 						    atop(entry->end - entry->start));
1695 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1696 					} else if (entry->object.vm_object == NULL &&
1697 						   !map->system_map) {
1698 						entry->object.vm_object =
1699 						    vm_object_allocate(OBJT_DEFAULT,
1700 							atop(entry->end - entry->start));
1701 						entry->offset = (vm_offset_t) 0;
1702 					}
1703 				}
1704 			}
1705 			vm_map_clip_start(map, entry, start);
1706 			vm_map_clip_end(map, entry, end);
1707 			entry->wired_count++;
1708 
1709 			/*
1710 			 * Check for holes
1711 			 */
1712 			if (entry->end < end &&
1713 			    (entry->next == &map->header ||
1714 				entry->next->start > entry->end)) {
1715 				/*
1716 				 * Found one.  Object creation actions do not
1717 				 * need to be undone, but the wired counts
1718 				 * need to be restored.
1719 				 */
1720 				while (entry != &map->header && entry->end > start) {
1721 					entry->wired_count--;
1722 					entry = entry->prev;
1723 				}
1724 				vm_map_unlock(map);
1725 				return (KERN_INVALID_ARGUMENT);
1726 			}
1727 			entry = entry->next;
1728 		}
1729 
1730 		/*
1731 		 * Pass 2.
1732 		 */
1733 
1734 		/*
1735 		 * HACK HACK HACK HACK
1736 		 *
1737 		 * If we are wiring in the kernel map or a submap of it,
1738 		 * unlock the map to avoid deadlocks.  We trust that the
1739 		 * kernel is well-behaved, and therefore will not do
1740 		 * anything destructive to this region of the map while
1741 		 * we have it unlocked.  We cannot trust user processes
1742 		 * to do the same.
1743 		 *
1744 		 * HACK HACK HACK HACK
1745 		 */
1746 		if (vm_map_pmap(map) == kernel_pmap) {
1747 			vm_map_unlock(map);	/* trust me ... */
1748 		} else {
1749 			vm_map_lock_downgrade(map);
1750 		}
1751 
1752 		rv = 0;
1753 		entry = start_entry;
1754 		while (entry != &map->header && entry->start < end) {
1755 			/*
1756 			 * If vm_fault_wire fails for any page we need to undo
1757 			 * what has been done.  We decrement the wiring count
1758 			 * for those pages which have not yet been wired (now)
1759 			 * and unwire those that have (later).
1760 			 *
1761 			 * XXX this violates the locking protocol on the map,
1762 			 * needs to be fixed.
1763 			 */
1764 			if (rv)
1765 				entry->wired_count--;
1766 			else if (entry->wired_count == 1) {
1767 				rv = vm_fault_wire(map, entry->start, entry->end);
1768 				if (rv) {
1769 					failed = entry->start;
1770 					entry->wired_count--;
1771 				}
1772 			}
1773 			entry = entry->next;
1774 		}
1775 
1776 		if (vm_map_pmap(map) == kernel_pmap) {
1777 			vm_map_lock(map);
1778 		}
1779 		if (rv) {
1780 			vm_map_unlock(map);
1781 			(void) vm_map_pageable(map, start, failed, TRUE);
1782 			return (rv);
1783 		}
1784 		/*
1785 		 * An exclusive lock on the map is needed in order to call
1786 		 * vm_map_simplify_entry().  If the current lock on the map
1787 		 * is only a shared lock, an upgrade is needed.
1788 		 */
1789 		if (vm_map_pmap(map) != kernel_pmap &&
1790 		    vm_map_lock_upgrade(map)) {
1791 			vm_map_lock(map);
1792 			if (vm_map_lookup_entry(map, start, &start_entry) ==
1793 			    FALSE) {
1794 				vm_map_unlock(map);
1795 				return KERN_SUCCESS;
1796 			}
1797 		}
1798 		vm_map_simplify_entry(map, start_entry);
1799 	}
1800 
1801 	vm_map_unlock(map);
1802 
1803 	return (KERN_SUCCESS);
1804 }
1805 
1806 /*
1807  * vm_map_clean
1808  *
1809  * Push any dirty cached pages in the address range to their pager.
1810  * If syncio is TRUE, dirty pages are written synchronously.
1811  * If invalidate is TRUE, any cached pages are freed as well.
1812  *
1813  * Returns an error if any part of the specified range is not mapped.
1814  */
1815 int
1816 vm_map_clean(
1817 	vm_map_t map,
1818 	vm_offset_t start,
1819 	vm_offset_t end,
1820 	boolean_t syncio,
1821 	boolean_t invalidate)
1822 {
1823 	vm_map_entry_t current;
1824 	vm_map_entry_t entry;
1825 	vm_size_t size;
1826 	vm_object_t object;
1827 	vm_ooffset_t offset;
1828 
1829 	GIANT_REQUIRED;
1830 
1831 	vm_map_lock_read(map);
1832 	VM_MAP_RANGE_CHECK(map, start, end);
1833 	if (!vm_map_lookup_entry(map, start, &entry)) {
1834 		vm_map_unlock_read(map);
1835 		return (KERN_INVALID_ADDRESS);
1836 	}
1837 	/*
1838 	 * Make a first pass to check for holes.
1839 	 */
1840 	for (current = entry; current->start < end; current = current->next) {
1841 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1842 			vm_map_unlock_read(map);
1843 			return (KERN_INVALID_ARGUMENT);
1844 		}
1845 		if (end > current->end &&
1846 		    (current->next == &map->header ||
1847 			current->end != current->next->start)) {
1848 			vm_map_unlock_read(map);
1849 			return (KERN_INVALID_ADDRESS);
1850 		}
1851 	}
1852 
1853 	if (invalidate)
1854 		pmap_remove(vm_map_pmap(map), start, end);
1855 	/*
1856 	 * Make a second pass, cleaning/uncaching pages from the indicated
1857 	 * objects as we go.
1858 	 */
1859 	for (current = entry; current->start < end; current = current->next) {
1860 		offset = current->offset + (start - current->start);
1861 		size = (end <= current->end ? end : current->end) - start;
1862 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1863 			vm_map_t smap;
1864 			vm_map_entry_t tentry;
1865 			vm_size_t tsize;
1866 
1867 			smap = current->object.sub_map;
1868 			vm_map_lock_read(smap);
1869 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1870 			tsize = tentry->end - offset;
1871 			if (tsize < size)
1872 				size = tsize;
1873 			object = tentry->object.vm_object;
1874 			offset = tentry->offset + (offset - tentry->start);
1875 			vm_map_unlock_read(smap);
1876 		} else {
1877 			object = current->object.vm_object;
1878 		}
1879 		/*
1880 		 * Note that there is absolutely no sense in writing out
1881 		 * anonymous objects, so we track down the vnode object
1882 		 * to write out.
1883 		 * We invalidate (remove) all pages from the address space
1884 		 * anyway, for semantic correctness.
1885 		 */
1886 		while (object->backing_object) {
1887 			object = object->backing_object;
1888 			offset += object->backing_object_offset;
1889 			if (object->size < OFF_TO_IDX( offset + size))
1890 				size = IDX_TO_OFF(object->size) - offset;
1891 		}
1892 		if (object && (object->type == OBJT_VNODE) &&
1893 		    (current->protection & VM_PROT_WRITE)) {
1894 			/*
1895 			 * Flush pages if writing is allowed, invalidate them
1896 			 * if invalidation requested.  Pages undergoing I/O
1897 			 * will be ignored by vm_object_page_remove().
1898 			 *
1899 			 * We cannot lock the vnode and then wait for paging
1900 			 * to complete without deadlocking against vm_fault.
1901 			 * Instead we simply call vm_object_page_remove() and
1902 			 * allow it to block internally on a page-by-page
1903 			 * basis when it encounters pages undergoing async
1904 			 * I/O.
1905 			 */
1906 			int flags;
1907 
1908 			vm_object_reference(object);
1909 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1910 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1911 			flags |= invalidate ? OBJPC_INVAL : 0;
1912 			vm_object_page_clean(object,
1913 			    OFF_TO_IDX(offset),
1914 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1915 			    flags);
1916 			if (invalidate) {
1917 				/*vm_object_pip_wait(object, "objmcl");*/
1918 				vm_object_page_remove(object,
1919 				    OFF_TO_IDX(offset),
1920 				    OFF_TO_IDX(offset + size + PAGE_MASK),
1921 				    FALSE);
1922 			}
1923 			VOP_UNLOCK(object->handle, 0, curthread);
1924 			vm_object_deallocate(object);
1925 		}
1926 		start += size;
1927 	}
1928 
1929 	vm_map_unlock_read(map);
1930 	return (KERN_SUCCESS);
1931 }
1932 
1933 /*
1934  *	vm_map_entry_unwire:	[ internal use only ]
1935  *
1936  *	Make the region specified by this entry pageable.
1937  *
1938  *	The map in question should be locked.
1939  *	[This is the reason for this routine's existence.]
1940  */
1941 static void
1942 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1943 {
1944 	vm_fault_unwire(map, entry->start, entry->end);
1945 	entry->wired_count = 0;
1946 }
1947 
1948 /*
1949  *	vm_map_entry_delete:	[ internal use only ]
1950  *
1951  *	Deallocate the given entry from the target map.
1952  */
1953 static void
1954 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1955 {
1956 	vm_map_entry_unlink(map, entry);
1957 	map->size -= entry->end - entry->start;
1958 
1959 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1960 		vm_object_deallocate(entry->object.vm_object);
1961 	}
1962 
1963 	vm_map_entry_dispose(map, entry);
1964 }
1965 
1966 /*
1967  *	vm_map_delete:	[ internal use only ]
1968  *
1969  *	Deallocates the given address range from the target
1970  *	map.
1971  */
1972 int
1973 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1974 {
1975 	vm_object_t object;
1976 	vm_map_entry_t entry;
1977 	vm_map_entry_t first_entry;
1978 
1979 	GIANT_REQUIRED;
1980 
1981 	/*
1982 	 * Find the start of the region, and clip it
1983 	 */
1984 
1985 	if (!vm_map_lookup_entry(map, start, &first_entry))
1986 		entry = first_entry->next;
1987 	else {
1988 		entry = first_entry;
1989 		vm_map_clip_start(map, entry, start);
1990 		/*
1991 		 * Fix the lookup hint now, rather than each time though the
1992 		 * loop.
1993 		 */
1994 		SAVE_HINT(map, entry->prev);
1995 	}
1996 
1997 	/*
1998 	 * Save the free space hint
1999 	 */
2000 
2001 	if (entry == &map->header) {
2002 		map->first_free = &map->header;
2003 	} else if (map->first_free->start >= start) {
2004 		map->first_free = entry->prev;
2005 	}
2006 
2007 	/*
2008 	 * Step through all entries in this region
2009 	 */
2010 
2011 	while ((entry != &map->header) && (entry->start < end)) {
2012 		vm_map_entry_t next;
2013 		vm_offset_t s, e;
2014 		vm_pindex_t offidxstart, offidxend, count;
2015 
2016 		vm_map_clip_end(map, entry, end);
2017 
2018 		s = entry->start;
2019 		e = entry->end;
2020 		next = entry->next;
2021 
2022 		offidxstart = OFF_TO_IDX(entry->offset);
2023 		count = OFF_TO_IDX(e - s);
2024 		object = entry->object.vm_object;
2025 
2026 		/*
2027 		 * Unwire before removing addresses from the pmap; otherwise,
2028 		 * unwiring will put the entries back in the pmap.
2029 		 */
2030 		if (entry->wired_count != 0) {
2031 			vm_map_entry_unwire(map, entry);
2032 		}
2033 
2034 		offidxend = offidxstart + count;
2035 
2036 		if ((object == kernel_object) || (object == kmem_object)) {
2037 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2038 		} else {
2039 			pmap_remove(map->pmap, s, e);
2040 			if (object != NULL &&
2041 			    object->ref_count != 1 &&
2042 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2043 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2044 				vm_object_collapse(object);
2045 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2046 				if (object->type == OBJT_SWAP) {
2047 					swap_pager_freespace(object, offidxstart, count);
2048 				}
2049 				if (offidxend >= object->size &&
2050 				    offidxstart < object->size) {
2051 					object->size = offidxstart;
2052 				}
2053 			}
2054 		}
2055 
2056 		/*
2057 		 * Delete the entry (which may delete the object) only after
2058 		 * removing all pmap entries pointing to its pages.
2059 		 * (Otherwise, its page frames may be reallocated, and any
2060 		 * modify bits will be set in the wrong object!)
2061 		 */
2062 		vm_map_entry_delete(map, entry);
2063 		entry = next;
2064 	}
2065 	return (KERN_SUCCESS);
2066 }
2067 
2068 /*
2069  *	vm_map_remove:
2070  *
2071  *	Remove the given address range from the target map.
2072  *	This is the exported form of vm_map_delete.
2073  */
2074 int
2075 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2076 {
2077 	int result, s = 0;
2078 
2079 	GIANT_REQUIRED;
2080 
2081 	if (map == kmem_map)
2082 		s = splvm();
2083 
2084 	vm_map_lock(map);
2085 	VM_MAP_RANGE_CHECK(map, start, end);
2086 	result = vm_map_delete(map, start, end);
2087 	vm_map_unlock(map);
2088 
2089 	if (map == kmem_map)
2090 		splx(s);
2091 
2092 	return (result);
2093 }
2094 
2095 /*
2096  *	vm_map_check_protection:
2097  *
2098  *	Assert that the target map allows the specified
2099  *	privilege on the entire address region given.
2100  *	The entire region must be allocated.
2101  */
2102 boolean_t
2103 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2104 			vm_prot_t protection)
2105 {
2106 	vm_map_entry_t entry;
2107 	vm_map_entry_t tmp_entry;
2108 
2109 	GIANT_REQUIRED;
2110 
2111 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2112 		return (FALSE);
2113 	}
2114 	entry = tmp_entry;
2115 
2116 	while (start < end) {
2117 		if (entry == &map->header) {
2118 			return (FALSE);
2119 		}
2120 		/*
2121 		 * No holes allowed!
2122 		 */
2123 
2124 		if (start < entry->start) {
2125 			return (FALSE);
2126 		}
2127 		/*
2128 		 * Check protection associated with entry.
2129 		 */
2130 
2131 		if ((entry->protection & protection) != protection) {
2132 			return (FALSE);
2133 		}
2134 		/* go to next entry */
2135 
2136 		start = entry->end;
2137 		entry = entry->next;
2138 	}
2139 	return (TRUE);
2140 }
2141 
2142 /*
2143  * Split the pages in a map entry into a new object.  This affords
2144  * easier removal of unused pages, and keeps object inheritance from
2145  * being a negative impact on memory usage.
2146  */
2147 static void
2148 vm_map_split(vm_map_entry_t entry)
2149 {
2150 	vm_page_t m;
2151 	vm_object_t orig_object, new_object, source;
2152 	vm_offset_t s, e;
2153 	vm_pindex_t offidxstart, offidxend, idx;
2154 	vm_size_t size;
2155 	vm_ooffset_t offset;
2156 
2157 	GIANT_REQUIRED;
2158 
2159 	orig_object = entry->object.vm_object;
2160 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
2161 		return;
2162 	if (orig_object->ref_count <= 1)
2163 		return;
2164 
2165 	offset = entry->offset;
2166 	s = entry->start;
2167 	e = entry->end;
2168 
2169 	offidxstart = OFF_TO_IDX(offset);
2170 	offidxend = offidxstart + OFF_TO_IDX(e - s);
2171 	size = offidxend - offidxstart;
2172 
2173 	new_object = vm_pager_allocate(orig_object->type,
2174 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
2175 	if (new_object == NULL)
2176 		return;
2177 
2178 	source = orig_object->backing_object;
2179 	if (source != NULL) {
2180 		vm_object_reference(source);	/* Referenced by new_object */
2181 		TAILQ_INSERT_TAIL(&source->shadow_head,
2182 				  new_object, shadow_list);
2183 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
2184 		new_object->backing_object_offset =
2185 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
2186 		new_object->backing_object = source;
2187 		source->shadow_count++;
2188 		source->generation++;
2189 	}
2190 
2191 	for (idx = 0; idx < size; idx++) {
2192 		vm_page_t m;
2193 
2194 	retry:
2195 		m = vm_page_lookup(orig_object, offidxstart + idx);
2196 		if (m == NULL)
2197 			continue;
2198 
2199 		/*
2200 		 * We must wait for pending I/O to complete before we can
2201 		 * rename the page.
2202 		 *
2203 		 * We do not have to VM_PROT_NONE the page as mappings should
2204 		 * not be changed by this operation.
2205 		 */
2206 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2207 			goto retry;
2208 
2209 		vm_page_busy(m);
2210 		vm_page_rename(m, new_object, idx);
2211 		/* page automatically made dirty by rename and cache handled */
2212 		vm_page_busy(m);
2213 	}
2214 
2215 	if (orig_object->type == OBJT_SWAP) {
2216 		vm_object_pip_add(orig_object, 1);
2217 		/*
2218 		 * copy orig_object pages into new_object
2219 		 * and destroy unneeded pages in
2220 		 * shadow object.
2221 		 */
2222 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
2223 		vm_object_pip_wakeup(orig_object);
2224 	}
2225 
2226 	for (idx = 0; idx < size; idx++) {
2227 		m = vm_page_lookup(new_object, idx);
2228 		if (m) {
2229 			vm_page_wakeup(m);
2230 		}
2231 	}
2232 
2233 	entry->object.vm_object = new_object;
2234 	entry->offset = 0LL;
2235 	vm_object_deallocate(orig_object);
2236 }
2237 
2238 /*
2239  *	vm_map_copy_entry:
2240  *
2241  *	Copies the contents of the source entry to the destination
2242  *	entry.  The entries *must* be aligned properly.
2243  */
2244 static void
2245 vm_map_copy_entry(
2246 	vm_map_t src_map,
2247 	vm_map_t dst_map,
2248 	vm_map_entry_t src_entry,
2249 	vm_map_entry_t dst_entry)
2250 {
2251 	vm_object_t src_object;
2252 
2253 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2254 		return;
2255 
2256 	if (src_entry->wired_count == 0) {
2257 
2258 		/*
2259 		 * If the source entry is marked needs_copy, it is already
2260 		 * write-protected.
2261 		 */
2262 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2263 			pmap_protect(src_map->pmap,
2264 			    src_entry->start,
2265 			    src_entry->end,
2266 			    src_entry->protection & ~VM_PROT_WRITE);
2267 		}
2268 
2269 		/*
2270 		 * Make a copy of the object.
2271 		 */
2272 		if ((src_object = src_entry->object.vm_object) != NULL) {
2273 
2274 			if ((src_object->handle == NULL) &&
2275 				(src_object->type == OBJT_DEFAULT ||
2276 				 src_object->type == OBJT_SWAP)) {
2277 				vm_object_collapse(src_object);
2278 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2279 					vm_map_split(src_entry);
2280 					src_object = src_entry->object.vm_object;
2281 				}
2282 			}
2283 
2284 			vm_object_reference(src_object);
2285 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2286 			dst_entry->object.vm_object = src_object;
2287 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2288 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2289 			dst_entry->offset = src_entry->offset;
2290 		} else {
2291 			dst_entry->object.vm_object = NULL;
2292 			dst_entry->offset = 0;
2293 		}
2294 
2295 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2296 		    dst_entry->end - dst_entry->start, src_entry->start);
2297 	} else {
2298 		/*
2299 		 * Of course, wired down pages can't be set copy-on-write.
2300 		 * Cause wired pages to be copied into the new map by
2301 		 * simulating faults (the new pages are pageable)
2302 		 */
2303 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2304 	}
2305 }
2306 
2307 /*
2308  * vmspace_fork:
2309  * Create a new process vmspace structure and vm_map
2310  * based on those of an existing process.  The new map
2311  * is based on the old map, according to the inheritance
2312  * values on the regions in that map.
2313  *
2314  * The source map must not be locked.
2315  */
2316 struct vmspace *
2317 vmspace_fork(struct vmspace *vm1)
2318 {
2319 	struct vmspace *vm2;
2320 	vm_map_t old_map = &vm1->vm_map;
2321 	vm_map_t new_map;
2322 	vm_map_entry_t old_entry;
2323 	vm_map_entry_t new_entry;
2324 	vm_object_t object;
2325 
2326 	GIANT_REQUIRED;
2327 
2328 	vm_map_lock(old_map);
2329 	old_map->infork = 1;
2330 
2331 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2332 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2333 	    (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
2334 	new_map = &vm2->vm_map;	/* XXX */
2335 	new_map->timestamp = 1;
2336 
2337 	old_entry = old_map->header.next;
2338 
2339 	while (old_entry != &old_map->header) {
2340 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2341 			panic("vm_map_fork: encountered a submap");
2342 
2343 		switch (old_entry->inheritance) {
2344 		case VM_INHERIT_NONE:
2345 			break;
2346 
2347 		case VM_INHERIT_SHARE:
2348 			/*
2349 			 * Clone the entry, creating the shared object if necessary.
2350 			 */
2351 			object = old_entry->object.vm_object;
2352 			if (object == NULL) {
2353 				object = vm_object_allocate(OBJT_DEFAULT,
2354 					atop(old_entry->end - old_entry->start));
2355 				old_entry->object.vm_object = object;
2356 				old_entry->offset = (vm_offset_t) 0;
2357 			}
2358 
2359 			/*
2360 			 * Add the reference before calling vm_object_shadow
2361 			 * to insure that a shadow object is created.
2362 			 */
2363 			vm_object_reference(object);
2364 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2365 				vm_object_shadow(&old_entry->object.vm_object,
2366 					&old_entry->offset,
2367 					atop(old_entry->end - old_entry->start));
2368 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2369 				/* Transfer the second reference too. */
2370 				vm_object_reference(
2371 				    old_entry->object.vm_object);
2372 				vm_object_deallocate(object);
2373 				object = old_entry->object.vm_object;
2374 			}
2375 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2376 
2377 			/*
2378 			 * Clone the entry, referencing the shared object.
2379 			 */
2380 			new_entry = vm_map_entry_create(new_map);
2381 			*new_entry = *old_entry;
2382 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2383 			new_entry->wired_count = 0;
2384 
2385 			/*
2386 			 * Insert the entry into the new map -- we know we're
2387 			 * inserting at the end of the new map.
2388 			 */
2389 
2390 			vm_map_entry_link(new_map, new_map->header.prev,
2391 			    new_entry);
2392 
2393 			/*
2394 			 * Update the physical map
2395 			 */
2396 
2397 			pmap_copy(new_map->pmap, old_map->pmap,
2398 			    new_entry->start,
2399 			    (old_entry->end - old_entry->start),
2400 			    old_entry->start);
2401 			break;
2402 
2403 		case VM_INHERIT_COPY:
2404 			/*
2405 			 * Clone the entry and link into the map.
2406 			 */
2407 			new_entry = vm_map_entry_create(new_map);
2408 			*new_entry = *old_entry;
2409 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2410 			new_entry->wired_count = 0;
2411 			new_entry->object.vm_object = NULL;
2412 			vm_map_entry_link(new_map, new_map->header.prev,
2413 			    new_entry);
2414 			vm_map_copy_entry(old_map, new_map, old_entry,
2415 			    new_entry);
2416 			break;
2417 		}
2418 		old_entry = old_entry->next;
2419 	}
2420 
2421 	new_map->size = old_map->size;
2422 	old_map->infork = 0;
2423 	vm_map_unlock(old_map);
2424 
2425 	return (vm2);
2426 }
2427 
2428 int
2429 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2430 	      vm_prot_t prot, vm_prot_t max, int cow)
2431 {
2432 	vm_map_entry_t prev_entry;
2433 	vm_map_entry_t new_stack_entry;
2434 	vm_size_t      init_ssize;
2435 	int            rv;
2436 
2437 	GIANT_REQUIRED;
2438 
2439 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2440 		return (KERN_NO_SPACE);
2441 
2442 	if (max_ssize < sgrowsiz)
2443 		init_ssize = max_ssize;
2444 	else
2445 		init_ssize = sgrowsiz;
2446 
2447 	vm_map_lock(map);
2448 
2449 	/* If addr is already mapped, no go */
2450 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2451 		vm_map_unlock(map);
2452 		return (KERN_NO_SPACE);
2453 	}
2454 
2455 	/* If we can't accomodate max_ssize in the current mapping,
2456 	 * no go.  However, we need to be aware that subsequent user
2457 	 * mappings might map into the space we have reserved for
2458 	 * stack, and currently this space is not protected.
2459 	 *
2460 	 * Hopefully we will at least detect this condition
2461 	 * when we try to grow the stack.
2462 	 */
2463 	if ((prev_entry->next != &map->header) &&
2464 	    (prev_entry->next->start < addrbos + max_ssize)) {
2465 		vm_map_unlock(map);
2466 		return (KERN_NO_SPACE);
2467 	}
2468 
2469 	/* We initially map a stack of only init_ssize.  We will
2470 	 * grow as needed later.  Since this is to be a grow
2471 	 * down stack, we map at the top of the range.
2472 	 *
2473 	 * Note: we would normally expect prot and max to be
2474 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2475 	 * eliminate these as input parameters, and just
2476 	 * pass these values here in the insert call.
2477 	 */
2478 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2479 	                   addrbos + max_ssize, prot, max, cow);
2480 
2481 	/* Now set the avail_ssize amount */
2482 	if (rv == KERN_SUCCESS){
2483 		if (prev_entry != &map->header)
2484 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2485 		new_stack_entry = prev_entry->next;
2486 		if (new_stack_entry->end   != addrbos + max_ssize ||
2487 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2488 			panic ("Bad entry start/end for new stack entry");
2489 		else
2490 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2491 	}
2492 
2493 	vm_map_unlock(map);
2494 	return (rv);
2495 }
2496 
2497 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2498  * desired address is already mapped, or if we successfully grow
2499  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2500  * stack range (this is strange, but preserves compatibility with
2501  * the grow function in vm_machdep.c).
2502  */
2503 int
2504 vm_map_growstack (struct proc *p, vm_offset_t addr)
2505 {
2506 	vm_map_entry_t prev_entry;
2507 	vm_map_entry_t stack_entry;
2508 	vm_map_entry_t new_stack_entry;
2509 	struct vmspace *vm = p->p_vmspace;
2510 	vm_map_t map = &vm->vm_map;
2511 	vm_offset_t    end;
2512 	int      grow_amount;
2513 	int      rv;
2514 	int      is_procstack;
2515 
2516 	GIANT_REQUIRED;
2517 
2518 Retry:
2519 	vm_map_lock_read(map);
2520 
2521 	/* If addr is already in the entry range, no need to grow.*/
2522 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2523 		vm_map_unlock_read(map);
2524 		return (KERN_SUCCESS);
2525 	}
2526 
2527 	if ((stack_entry = prev_entry->next) == &map->header) {
2528 		vm_map_unlock_read(map);
2529 		return (KERN_SUCCESS);
2530 	}
2531 	if (prev_entry == &map->header)
2532 		end = stack_entry->start - stack_entry->avail_ssize;
2533 	else
2534 		end = prev_entry->end;
2535 
2536 	/* This next test mimics the old grow function in vm_machdep.c.
2537 	 * It really doesn't quite make sense, but we do it anyway
2538 	 * for compatibility.
2539 	 *
2540 	 * If not growable stack, return success.  This signals the
2541 	 * caller to proceed as he would normally with normal vm.
2542 	 */
2543 	if (stack_entry->avail_ssize < 1 ||
2544 	    addr >= stack_entry->start ||
2545 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2546 		vm_map_unlock_read(map);
2547 		return (KERN_SUCCESS);
2548 	}
2549 
2550 	/* Find the minimum grow amount */
2551 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2552 	if (grow_amount > stack_entry->avail_ssize) {
2553 		vm_map_unlock_read(map);
2554 		return (KERN_NO_SPACE);
2555 	}
2556 
2557 	/* If there is no longer enough space between the entries
2558 	 * nogo, and adjust the available space.  Note: this
2559 	 * should only happen if the user has mapped into the
2560 	 * stack area after the stack was created, and is
2561 	 * probably an error.
2562 	 *
2563 	 * This also effectively destroys any guard page the user
2564 	 * might have intended by limiting the stack size.
2565 	 */
2566 	if (grow_amount > stack_entry->start - end) {
2567 		if (vm_map_lock_upgrade(map))
2568 			goto Retry;
2569 
2570 		stack_entry->avail_ssize = stack_entry->start - end;
2571 
2572 		vm_map_unlock(map);
2573 		return (KERN_NO_SPACE);
2574 	}
2575 
2576 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2577 
2578 	/* If this is the main process stack, see if we're over the
2579 	 * stack limit.
2580 	 */
2581 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2582 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2583 		vm_map_unlock_read(map);
2584 		return (KERN_NO_SPACE);
2585 	}
2586 
2587 	/* Round up the grow amount modulo SGROWSIZ */
2588 	grow_amount = roundup (grow_amount, sgrowsiz);
2589 	if (grow_amount > stack_entry->avail_ssize) {
2590 		grow_amount = stack_entry->avail_ssize;
2591 	}
2592 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2593 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2594 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2595 		              ctob(vm->vm_ssize);
2596 	}
2597 
2598 	if (vm_map_lock_upgrade(map))
2599 		goto Retry;
2600 
2601 	/* Get the preliminary new entry start value */
2602 	addr = stack_entry->start - grow_amount;
2603 
2604 	/* If this puts us into the previous entry, cut back our growth
2605 	 * to the available space.  Also, see the note above.
2606 	 */
2607 	if (addr < end) {
2608 		stack_entry->avail_ssize = stack_entry->start - end;
2609 		addr = end;
2610 	}
2611 
2612 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2613 			   VM_PROT_ALL,
2614 			   VM_PROT_ALL,
2615 			   0);
2616 
2617 	/* Adjust the available stack space by the amount we grew. */
2618 	if (rv == KERN_SUCCESS) {
2619 		if (prev_entry != &map->header)
2620 			vm_map_clip_end(map, prev_entry, addr);
2621 		new_stack_entry = prev_entry->next;
2622 		if (new_stack_entry->end   != stack_entry->start  ||
2623 		    new_stack_entry->start != addr)
2624 			panic ("Bad stack grow start/end in new stack entry");
2625 		else {
2626 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2627 							(new_stack_entry->end -
2628 							 new_stack_entry->start);
2629 			if (is_procstack)
2630 				vm->vm_ssize += btoc(new_stack_entry->end -
2631 						     new_stack_entry->start);
2632 		}
2633 	}
2634 
2635 	vm_map_unlock(map);
2636 	return (rv);
2637 }
2638 
2639 /*
2640  * Unshare the specified VM space for exec.  If other processes are
2641  * mapped to it, then create a new one.  The new vmspace is null.
2642  */
2643 
2644 void
2645 vmspace_exec(struct proc *p)
2646 {
2647 	struct vmspace *oldvmspace = p->p_vmspace;
2648 	struct vmspace *newvmspace;
2649 	vm_map_t map = &p->p_vmspace->vm_map;
2650 
2651 	GIANT_REQUIRED;
2652 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2653 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2654 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2655 	/*
2656 	 * This code is written like this for prototype purposes.  The
2657 	 * goal is to avoid running down the vmspace here, but let the
2658 	 * other process's that are still using the vmspace to finally
2659 	 * run it down.  Even though there is little or no chance of blocking
2660 	 * here, it is a good idea to keep this form for future mods.
2661 	 */
2662 	p->p_vmspace = newvmspace;
2663 	pmap_pinit2(vmspace_pmap(newvmspace));
2664 	vmspace_free(oldvmspace);
2665 	if (p == curthread->td_proc)		/* XXXKSE ? */
2666 		pmap_activate(curthread);
2667 }
2668 
2669 /*
2670  * Unshare the specified VM space for forcing COW.  This
2671  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2672  */
2673 
2674 void
2675 vmspace_unshare(struct proc *p)
2676 {
2677 	struct vmspace *oldvmspace = p->p_vmspace;
2678 	struct vmspace *newvmspace;
2679 
2680 	GIANT_REQUIRED;
2681 	if (oldvmspace->vm_refcnt == 1)
2682 		return;
2683 	newvmspace = vmspace_fork(oldvmspace);
2684 	p->p_vmspace = newvmspace;
2685 	pmap_pinit2(vmspace_pmap(newvmspace));
2686 	vmspace_free(oldvmspace);
2687 	if (p == curthread->td_proc)		/* XXXKSE ? */
2688 		pmap_activate(curthread);
2689 }
2690 
2691 
2692 /*
2693  *	vm_map_lookup:
2694  *
2695  *	Finds the VM object, offset, and
2696  *	protection for a given virtual address in the
2697  *	specified map, assuming a page fault of the
2698  *	type specified.
2699  *
2700  *	Leaves the map in question locked for read; return
2701  *	values are guaranteed until a vm_map_lookup_done
2702  *	call is performed.  Note that the map argument
2703  *	is in/out; the returned map must be used in
2704  *	the call to vm_map_lookup_done.
2705  *
2706  *	A handle (out_entry) is returned for use in
2707  *	vm_map_lookup_done, to make that fast.
2708  *
2709  *	If a lookup is requested with "write protection"
2710  *	specified, the map may be changed to perform virtual
2711  *	copying operations, although the data referenced will
2712  *	remain the same.
2713  */
2714 int
2715 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2716 	      vm_offset_t vaddr,
2717 	      vm_prot_t fault_typea,
2718 	      vm_map_entry_t *out_entry,	/* OUT */
2719 	      vm_object_t *object,		/* OUT */
2720 	      vm_pindex_t *pindex,		/* OUT */
2721 	      vm_prot_t *out_prot,		/* OUT */
2722 	      boolean_t *wired)			/* OUT */
2723 {
2724 	vm_map_entry_t entry;
2725 	vm_map_t map = *var_map;
2726 	vm_prot_t prot;
2727 	vm_prot_t fault_type = fault_typea;
2728 
2729 	GIANT_REQUIRED;
2730 RetryLookup:;
2731 
2732 	/*
2733 	 * Lookup the faulting address.
2734 	 */
2735 
2736 	vm_map_lock_read(map);
2737 
2738 #define	RETURN(why) \
2739 		{ \
2740 		vm_map_unlock_read(map); \
2741 		return(why); \
2742 		}
2743 
2744 	/*
2745 	 * If the map has an interesting hint, try it before calling full
2746 	 * blown lookup routine.
2747 	 */
2748 
2749 	entry = map->hint;
2750 
2751 	*out_entry = entry;
2752 
2753 	if ((entry == &map->header) ||
2754 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2755 		vm_map_entry_t tmp_entry;
2756 
2757 		/*
2758 		 * Entry was either not a valid hint, or the vaddr was not
2759 		 * contained in the entry, so do a full lookup.
2760 		 */
2761 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2762 			RETURN(KERN_INVALID_ADDRESS);
2763 
2764 		entry = tmp_entry;
2765 		*out_entry = entry;
2766 	}
2767 
2768 	/*
2769 	 * Handle submaps.
2770 	 */
2771 
2772 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2773 		vm_map_t old_map = map;
2774 
2775 		*var_map = map = entry->object.sub_map;
2776 		vm_map_unlock_read(old_map);
2777 		goto RetryLookup;
2778 	}
2779 
2780 	/*
2781 	 * Check whether this task is allowed to have this page.
2782 	 * Note the special case for MAP_ENTRY_COW
2783 	 * pages with an override.  This is to implement a forced
2784 	 * COW for debuggers.
2785 	 */
2786 
2787 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2788 		prot = entry->max_protection;
2789 	else
2790 		prot = entry->protection;
2791 
2792 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2793 	if ((fault_type & prot) != fault_type) {
2794 			RETURN(KERN_PROTECTION_FAILURE);
2795 	}
2796 
2797 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2798 	    (entry->eflags & MAP_ENTRY_COW) &&
2799 	    (fault_type & VM_PROT_WRITE) &&
2800 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2801 		RETURN(KERN_PROTECTION_FAILURE);
2802 	}
2803 
2804 	/*
2805 	 * If this page is not pageable, we have to get it for all possible
2806 	 * accesses.
2807 	 */
2808 
2809 	*wired = (entry->wired_count != 0);
2810 	if (*wired)
2811 		prot = fault_type = entry->protection;
2812 
2813 	/*
2814 	 * If the entry was copy-on-write, we either ...
2815 	 */
2816 
2817 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2818 		/*
2819 		 * If we want to write the page, we may as well handle that
2820 		 * now since we've got the map locked.
2821 		 *
2822 		 * If we don't need to write the page, we just demote the
2823 		 * permissions allowed.
2824 		 */
2825 
2826 		if (fault_type & VM_PROT_WRITE) {
2827 			/*
2828 			 * Make a new object, and place it in the object
2829 			 * chain.  Note that no new references have appeared
2830 			 * -- one just moved from the map to the new
2831 			 * object.
2832 			 */
2833 
2834 			if (vm_map_lock_upgrade(map))
2835 				goto RetryLookup;
2836 
2837 			vm_object_shadow(
2838 			    &entry->object.vm_object,
2839 			    &entry->offset,
2840 			    atop(entry->end - entry->start));
2841 
2842 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2843 			vm_map_lock_downgrade(map);
2844 		} else {
2845 			/*
2846 			 * We're attempting to read a copy-on-write page --
2847 			 * don't allow writes.
2848 			 */
2849 
2850 			prot &= ~VM_PROT_WRITE;
2851 		}
2852 	}
2853 
2854 	/*
2855 	 * Create an object if necessary.
2856 	 */
2857 	if (entry->object.vm_object == NULL &&
2858 	    !map->system_map) {
2859 		if (vm_map_lock_upgrade(map))
2860 			goto RetryLookup;
2861 
2862 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2863 		    atop(entry->end - entry->start));
2864 		entry->offset = 0;
2865 		vm_map_lock_downgrade(map);
2866 	}
2867 
2868 	/*
2869 	 * Return the object/offset from this entry.  If the entry was
2870 	 * copy-on-write or empty, it has been fixed up.
2871 	 */
2872 
2873 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2874 	*object = entry->object.vm_object;
2875 
2876 	/*
2877 	 * Return whether this is the only map sharing this data.
2878 	 */
2879 
2880 	*out_prot = prot;
2881 	return (KERN_SUCCESS);
2882 
2883 #undef	RETURN
2884 }
2885 
2886 /*
2887  *	vm_map_lookup_done:
2888  *
2889  *	Releases locks acquired by a vm_map_lookup
2890  *	(according to the handle returned by that lookup).
2891  */
2892 
2893 void
2894 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2895 {
2896 	/*
2897 	 * Unlock the main-level map
2898 	 */
2899 	GIANT_REQUIRED;
2900 	vm_map_unlock_read(map);
2901 }
2902 
2903 /*
2904  * Implement uiomove with VM operations.  This handles (and collateral changes)
2905  * support every combination of source object modification, and COW type
2906  * operations.
2907  */
2908 int
2909 vm_uiomove(
2910 	vm_map_t mapa,
2911 	vm_object_t srcobject,
2912 	off_t cp,
2913 	int cnta,
2914 	vm_offset_t uaddra,
2915 	int *npages)
2916 {
2917 	vm_map_t map;
2918 	vm_object_t first_object, oldobject, object;
2919 	vm_map_entry_t entry;
2920 	vm_prot_t prot;
2921 	boolean_t wired;
2922 	int tcnt, rv;
2923 	vm_offset_t uaddr, start, end, tend;
2924 	vm_pindex_t first_pindex, osize, oindex;
2925 	off_t ooffset;
2926 	int cnt;
2927 
2928 	GIANT_REQUIRED;
2929 
2930 	if (npages)
2931 		*npages = 0;
2932 
2933 	cnt = cnta;
2934 	uaddr = uaddra;
2935 
2936 	while (cnt > 0) {
2937 		map = mapa;
2938 
2939 		if ((vm_map_lookup(&map, uaddr,
2940 			VM_PROT_READ, &entry, &first_object,
2941 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2942 			return EFAULT;
2943 		}
2944 
2945 		vm_map_clip_start(map, entry, uaddr);
2946 
2947 		tcnt = cnt;
2948 		tend = uaddr + tcnt;
2949 		if (tend > entry->end) {
2950 			tcnt = entry->end - uaddr;
2951 			tend = entry->end;
2952 		}
2953 
2954 		vm_map_clip_end(map, entry, tend);
2955 
2956 		start = entry->start;
2957 		end = entry->end;
2958 
2959 		osize = atop(tcnt);
2960 
2961 		oindex = OFF_TO_IDX(cp);
2962 		if (npages) {
2963 			vm_pindex_t idx;
2964 			for (idx = 0; idx < osize; idx++) {
2965 				vm_page_t m;
2966 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2967 					vm_map_lookup_done(map, entry);
2968 					return 0;
2969 				}
2970 				/*
2971 				 * disallow busy or invalid pages, but allow
2972 				 * m->busy pages if they are entirely valid.
2973 				 */
2974 				if ((m->flags & PG_BUSY) ||
2975 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2976 					vm_map_lookup_done(map, entry);
2977 					return 0;
2978 				}
2979 			}
2980 		}
2981 
2982 /*
2983  * If we are changing an existing map entry, just redirect
2984  * the object, and change mappings.
2985  */
2986 		if ((first_object->type == OBJT_VNODE) &&
2987 			((oldobject = entry->object.vm_object) == first_object)) {
2988 
2989 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2990 				/*
2991    				* Remove old window into the file
2992    				*/
2993 				pmap_remove (map->pmap, uaddr, tend);
2994 
2995 				/*
2996    				* Force copy on write for mmaped regions
2997    				*/
2998 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2999 
3000 				/*
3001    				* Point the object appropriately
3002    				*/
3003 				if (oldobject != srcobject) {
3004 
3005 				/*
3006    				* Set the object optimization hint flag
3007    				*/
3008 					vm_object_set_flag(srcobject, OBJ_OPT);
3009 					vm_object_reference(srcobject);
3010 					entry->object.vm_object = srcobject;
3011 
3012 					if (oldobject) {
3013 						vm_object_deallocate(oldobject);
3014 					}
3015 				}
3016 
3017 				entry->offset = cp;
3018 				map->timestamp++;
3019 			} else {
3020 				pmap_remove (map->pmap, uaddr, tend);
3021 			}
3022 
3023 		} else if ((first_object->ref_count == 1) &&
3024 			(first_object->size == osize) &&
3025 			((first_object->type == OBJT_DEFAULT) ||
3026 				(first_object->type == OBJT_SWAP)) ) {
3027 
3028 			oldobject = first_object->backing_object;
3029 
3030 			if ((first_object->backing_object_offset != cp) ||
3031 				(oldobject != srcobject)) {
3032 				/*
3033    				* Remove old window into the file
3034    				*/
3035 				pmap_remove (map->pmap, uaddr, tend);
3036 
3037 				/*
3038 				 * Remove unneeded old pages
3039 				 */
3040 				vm_object_page_remove(first_object, 0, 0, 0);
3041 
3042 				/*
3043 				 * Invalidate swap space
3044 				 */
3045 				if (first_object->type == OBJT_SWAP) {
3046 					swap_pager_freespace(first_object,
3047 						0,
3048 						first_object->size);
3049 				}
3050 
3051 				/*
3052    				* Force copy on write for mmaped regions
3053    				*/
3054 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3055 
3056 				/*
3057    				* Point the object appropriately
3058    				*/
3059 				if (oldobject != srcobject) {
3060 
3061 				/*
3062    				* Set the object optimization hint flag
3063    				*/
3064 					vm_object_set_flag(srcobject, OBJ_OPT);
3065 					vm_object_reference(srcobject);
3066 
3067 					if (oldobject) {
3068 						TAILQ_REMOVE(&oldobject->shadow_head,
3069 							first_object, shadow_list);
3070 						oldobject->shadow_count--;
3071 						/* XXX bump generation? */
3072 						vm_object_deallocate(oldobject);
3073 					}
3074 
3075 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
3076 						first_object, shadow_list);
3077 					srcobject->shadow_count++;
3078 					/* XXX bump generation? */
3079 
3080 					first_object->backing_object = srcobject;
3081 				}
3082 				first_object->backing_object_offset = cp;
3083 				map->timestamp++;
3084 			} else {
3085 				pmap_remove (map->pmap, uaddr, tend);
3086 			}
3087 /*
3088  * Otherwise, we have to do a logical mmap.
3089  */
3090 		} else {
3091 
3092 			vm_object_set_flag(srcobject, OBJ_OPT);
3093 			vm_object_reference(srcobject);
3094 
3095 			pmap_remove (map->pmap, uaddr, tend);
3096 
3097 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3098 			vm_map_lock_upgrade(map);
3099 
3100 			if (entry == &map->header) {
3101 				map->first_free = &map->header;
3102 			} else if (map->first_free->start >= start) {
3103 				map->first_free = entry->prev;
3104 			}
3105 
3106 			SAVE_HINT(map, entry->prev);
3107 			vm_map_entry_delete(map, entry);
3108 
3109 			object = srcobject;
3110 			ooffset = cp;
3111 
3112 			rv = vm_map_insert(map, object, ooffset, start, tend,
3113 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3114 
3115 			if (rv != KERN_SUCCESS)
3116 				panic("vm_uiomove: could not insert new entry: %d", rv);
3117 		}
3118 
3119 /*
3120  * Map the window directly, if it is already in memory
3121  */
3122 		pmap_object_init_pt(map->pmap, uaddr,
3123 			srcobject, oindex, tcnt, 0);
3124 
3125 		map->timestamp++;
3126 		vm_map_unlock(map);
3127 
3128 		cnt -= tcnt;
3129 		uaddr += tcnt;
3130 		cp += tcnt;
3131 		if (npages)
3132 			*npages += osize;
3133 	}
3134 	return 0;
3135 }
3136 
3137 /*
3138  * Performs the copy_on_write operations necessary to allow the virtual copies
3139  * into user space to work.  This has to be called for write(2) system calls
3140  * from other processes, file unlinking, and file size shrinkage.
3141  */
3142 void
3143 vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
3144 {
3145 	int rv;
3146 	vm_object_t robject;
3147 	vm_pindex_t idx;
3148 
3149 	GIANT_REQUIRED;
3150 	if ((object == NULL) ||
3151 		((object->flags & OBJ_OPT) == 0))
3152 		return;
3153 
3154 	if (object->shadow_count > object->ref_count)
3155 		panic("vm_freeze_copyopts: sc > rc");
3156 
3157 	while((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
3158 		vm_pindex_t bo_pindex;
3159 		vm_page_t m_in, m_out;
3160 
3161 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
3162 
3163 		vm_object_reference(robject);
3164 
3165 		vm_object_pip_wait(robject, "objfrz");
3166 
3167 		if (robject->ref_count == 1) {
3168 			vm_object_deallocate(robject);
3169 			continue;
3170 		}
3171 
3172 		vm_object_pip_add(robject, 1);
3173 
3174 		for (idx = 0; idx < robject->size; idx++) {
3175 
3176 			m_out = vm_page_grab(robject, idx,
3177 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3178 
3179 			if (m_out->valid == 0) {
3180 				m_in = vm_page_grab(object, bo_pindex + idx,
3181 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3182 				if (m_in->valid == 0) {
3183 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
3184 					if (rv != VM_PAGER_OK) {
3185 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
3186 						continue;
3187 					}
3188 					vm_page_deactivate(m_in);
3189 				}
3190 
3191 				vm_page_protect(m_in, VM_PROT_NONE);
3192 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
3193 				m_out->valid = m_in->valid;
3194 				vm_page_dirty(m_out);
3195 				vm_page_activate(m_out);
3196 				vm_page_wakeup(m_in);
3197 			}
3198 			vm_page_wakeup(m_out);
3199 		}
3200 
3201 		object->shadow_count--;
3202 		object->ref_count--;
3203 		TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
3204 		robject->backing_object = NULL;
3205 		robject->backing_object_offset = 0;
3206 
3207 		vm_object_pip_wakeup(robject);
3208 		vm_object_deallocate(robject);
3209 	}
3210 
3211 	vm_object_clear_flag(object, OBJ_OPT);
3212 }
3213 
3214 #include "opt_ddb.h"
3215 #ifdef DDB
3216 #include <sys/kernel.h>
3217 
3218 #include <ddb/ddb.h>
3219 
3220 /*
3221  *	vm_map_print:	[ debug ]
3222  */
3223 DB_SHOW_COMMAND(map, vm_map_print)
3224 {
3225 	static int nlines;
3226 	/* XXX convert args. */
3227 	vm_map_t map = (vm_map_t)addr;
3228 	boolean_t full = have_addr;
3229 
3230 	vm_map_entry_t entry;
3231 
3232 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3233 	    (void *)map,
3234 	    (void *)map->pmap, map->nentries, map->timestamp);
3235 	nlines++;
3236 
3237 	if (!full && db_indent)
3238 		return;
3239 
3240 	db_indent += 2;
3241 	for (entry = map->header.next; entry != &map->header;
3242 	    entry = entry->next) {
3243 		db_iprintf("map entry %p: start=%p, end=%p\n",
3244 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3245 		nlines++;
3246 		{
3247 			static char *inheritance_name[4] =
3248 			{"share", "copy", "none", "donate_copy"};
3249 
3250 			db_iprintf(" prot=%x/%x/%s",
3251 			    entry->protection,
3252 			    entry->max_protection,
3253 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3254 			if (entry->wired_count != 0)
3255 				db_printf(", wired");
3256 		}
3257 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3258 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3259 			db_printf(", share=%p, offset=0x%lx\n",
3260 			    (void *)entry->object.sub_map,
3261 			    (long)entry->offset);
3262 			nlines++;
3263 			if ((entry->prev == &map->header) ||
3264 			    (entry->prev->object.sub_map !=
3265 				entry->object.sub_map)) {
3266 				db_indent += 2;
3267 				vm_map_print((db_expr_t)(intptr_t)
3268 					     entry->object.sub_map,
3269 					     full, 0, (char *)0);
3270 				db_indent -= 2;
3271 			}
3272 		} else {
3273 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3274 			db_printf(", object=%p, offset=0x%lx",
3275 			    (void *)entry->object.vm_object,
3276 			    (long)entry->offset);
3277 			if (entry->eflags & MAP_ENTRY_COW)
3278 				db_printf(", copy (%s)",
3279 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3280 			db_printf("\n");
3281 			nlines++;
3282 
3283 			if ((entry->prev == &map->header) ||
3284 			    (entry->prev->object.vm_object !=
3285 				entry->object.vm_object)) {
3286 				db_indent += 2;
3287 				vm_object_print((db_expr_t)(intptr_t)
3288 						entry->object.vm_object,
3289 						full, 0, (char *)0);
3290 				nlines += 4;
3291 				db_indent -= 2;
3292 			}
3293 		}
3294 	}
3295 	db_indent -= 2;
3296 	if (db_indent == 0)
3297 		nlines = 0;
3298 }
3299 
3300 
3301 DB_SHOW_COMMAND(procvm, procvm)
3302 {
3303 	struct proc *p;
3304 
3305 	if (have_addr) {
3306 		p = (struct proc *) addr;
3307 	} else {
3308 		p = curproc;
3309 	}
3310 
3311 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3312 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3313 	    (void *)vmspace_pmap(p->p_vmspace));
3314 
3315 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3316 }
3317 
3318 #endif /* DDB */
3319