xref: /freebsd/sys/vm/vm_map.c (revision ec09ef4ff8d0a377fd86c0c2e96a807a538fce4b)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/lock.h>
74 #include <sys/mutex.h>
75 #include <sys/proc.h>
76 #include <sys/vmmeter.h>
77 #include <sys/mman.h>
78 #include <sys/vnode.h>
79 #include <sys/resourcevar.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/pmap.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_pager.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_extern.h>
90 #include <vm/vm_zone.h>
91 #include <vm/swap_pager.h>
92 
93 /*
94  *	Virtual memory maps provide for the mapping, protection,
95  *	and sharing of virtual memory objects.  In addition,
96  *	this module provides for an efficient virtual copy of
97  *	memory from one map to another.
98  *
99  *	Synchronization is required prior to most operations.
100  *
101  *	Maps consist of an ordered doubly-linked list of simple
102  *	entries; a single hint is used to speed up lookups.
103  *
104  *	Since portions of maps are specified by start/end addresses,
105  *	which may not align with existing map entries, all
106  *	routines merely "clip" entries to these start/end values.
107  *	[That is, an entry is split into two, bordering at a
108  *	start or end value.]  Note that these clippings may not
109  *	always be necessary (as the two resulting entries are then
110  *	not changed); however, the clipping is done for convenience.
111  *
112  *	As mentioned above, virtual copy operations are performed
113  *	by copying VM object references from one map to
114  *	another, and then marking both regions as copy-on-write.
115  */
116 
117 /*
118  *	vm_map_startup:
119  *
120  *	Initialize the vm_map module.  Must be called before
121  *	any other vm_map routines.
122  *
123  *	Map and entry structures are allocated from the general
124  *	purpose memory pool with some exceptions:
125  *
126  *	- The kernel map and kmem submap are allocated statically.
127  *	- Kernel map entries are allocated out of a static pool.
128  *
129  *	These restrictions are necessary since malloc() uses the
130  *	maps and requires map entries.
131  */
132 
133 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
134 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
135 static struct vm_object kmapentobj, mapentobj, mapobj;
136 
137 static struct vm_map_entry map_entry_init[MAX_MAPENT];
138 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
139 static struct vm_map map_init[MAX_KMAP];
140 
141 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
142 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
143 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
144 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
145 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
146 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
147 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
148 		vm_map_entry_t));
149 static void vm_map_split __P((vm_map_entry_t));
150 
151 void
152 vm_map_startup()
153 {
154 	mapzone = &mapzone_store;
155 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
156 		map_init, MAX_KMAP);
157 	kmapentzone = &kmapentzone_store;
158 	zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
159 		kmap_entry_init, MAX_KMAPENT);
160 	mapentzone = &mapentzone_store;
161 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
162 		map_entry_init, MAX_MAPENT);
163 }
164 
165 /*
166  * Allocate a vmspace structure, including a vm_map and pmap,
167  * and initialize those structures.  The refcnt is set to 1.
168  * The remaining fields must be initialized by the caller.
169  */
170 struct vmspace *
171 vmspace_alloc(min, max)
172 	vm_offset_t min, max;
173 {
174 	struct vmspace *vm;
175 
176 	vm = zalloc(vmspace_zone);
177 	vm_map_init(&vm->vm_map, min, max);
178 	pmap_pinit(vmspace_pmap(vm));
179 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
180 	vm->vm_refcnt = 1;
181 	vm->vm_shm = NULL;
182 	return (vm);
183 }
184 
185 void
186 vm_init2(void) {
187 	zinitna(kmapentzone, &kmapentobj,
188 		NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
189 	zinitna(mapentzone, &mapentobj,
190 		NULL, 0, 0, 0, 1);
191 	zinitna(mapzone, &mapobj,
192 		NULL, 0, 0, 0, 1);
193 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
194 	pmap_init2();
195 	vm_object_init2();
196 }
197 
198 void
199 vmspace_free(vm)
200 	struct vmspace *vm;
201 {
202 
203 	if (vm->vm_refcnt == 0)
204 		panic("vmspace_free: attempt to free already freed vmspace");
205 
206 	if (--vm->vm_refcnt == 0) {
207 
208 		/*
209 		 * Lock the map, to wait out all other references to it.
210 		 * Delete all of the mappings and pages they hold, then call
211 		 * the pmap module to reclaim anything left.
212 		 */
213 		vm_map_lock(&vm->vm_map);
214 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
215 		    vm->vm_map.max_offset);
216 		vm_map_unlock(&vm->vm_map);
217 
218 		pmap_release(vmspace_pmap(vm));
219 		vm_map_destroy(&vm->vm_map);
220 		zfree(vmspace_zone, vm);
221 	}
222 }
223 
224 /*
225  *	vm_map_create:
226  *
227  *	Creates and returns a new empty VM map with
228  *	the given physical map structure, and having
229  *	the given lower and upper address bounds.
230  */
231 vm_map_t
232 vm_map_create(pmap, min, max)
233 	pmap_t pmap;
234 	vm_offset_t min, max;
235 {
236 	vm_map_t result;
237 
238 	result = zalloc(mapzone);
239 	vm_map_init(result, min, max);
240 	result->pmap = pmap;
241 	return (result);
242 }
243 
244 /*
245  * Initialize an existing vm_map structure
246  * such as that in the vmspace structure.
247  * The pmap is set elsewhere.
248  */
249 void
250 vm_map_init(map, min, max)
251 	struct vm_map *map;
252 	vm_offset_t min, max;
253 {
254 	map->header.next = map->header.prev = &map->header;
255 	map->nentries = 0;
256 	map->size = 0;
257 	map->system_map = 0;
258 	map->infork = 0;
259 	map->min_offset = min;
260 	map->max_offset = max;
261 	map->first_free = &map->header;
262 	map->hint = &map->header;
263 	map->timestamp = 0;
264 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
265 }
266 
267 void
268 vm_map_destroy(map)
269 	struct vm_map *map;
270 {
271 	lockdestroy(&map->lock);
272 }
273 
274 /*
275  *	vm_map_entry_dispose:	[ internal use only ]
276  *
277  *	Inverse of vm_map_entry_create.
278  */
279 static void
280 vm_map_entry_dispose(map, entry)
281 	vm_map_t map;
282 	vm_map_entry_t entry;
283 {
284 	zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
285 }
286 
287 /*
288  *	vm_map_entry_create:	[ internal use only ]
289  *
290  *	Allocates a VM map entry for insertion.
291  *	No entry fields are filled in.
292  */
293 static vm_map_entry_t
294 vm_map_entry_create(map)
295 	vm_map_t map;
296 {
297 	vm_map_entry_t new_entry;
298 
299 	new_entry = zalloc((map->system_map || !mapentzone) ?
300 		kmapentzone : mapentzone);
301 	if (new_entry == NULL)
302 	    panic("vm_map_entry_create: kernel resources exhausted");
303 	return(new_entry);
304 }
305 
306 /*
307  *	vm_map_entry_{un,}link:
308  *
309  *	Insert/remove entries from maps.
310  */
311 static __inline void
312 vm_map_entry_link(vm_map_t map,
313 		  vm_map_entry_t after_where,
314 		  vm_map_entry_t entry)
315 {
316 	map->nentries++;
317 	entry->prev = after_where;
318 	entry->next = after_where->next;
319 	entry->next->prev = entry;
320 	after_where->next = entry;
321 }
322 
323 static __inline void
324 vm_map_entry_unlink(vm_map_t map,
325 		    vm_map_entry_t entry)
326 {
327 	vm_map_entry_t prev = entry->prev;
328 	vm_map_entry_t next = entry->next;
329 
330 	next->prev = prev;
331 	prev->next = next;
332 	map->nentries--;
333 }
334 
335 /*
336  *	SAVE_HINT:
337  *
338  *	Saves the specified entry as the hint for
339  *	future lookups.
340  */
341 #define	SAVE_HINT(map,value) \
342 		(map)->hint = (value);
343 
344 /*
345  *	vm_map_lookup_entry:	[ internal use only ]
346  *
347  *	Finds the map entry containing (or
348  *	immediately preceding) the specified address
349  *	in the given map; the entry is returned
350  *	in the "entry" parameter.  The boolean
351  *	result indicates whether the address is
352  *	actually contained in the map.
353  */
354 boolean_t
355 vm_map_lookup_entry(map, address, entry)
356 	vm_map_t map;
357 	vm_offset_t address;
358 	vm_map_entry_t *entry;	/* OUT */
359 {
360 	vm_map_entry_t cur;
361 	vm_map_entry_t last;
362 
363 	/*
364 	 * Start looking either from the head of the list, or from the hint.
365 	 */
366 
367 	cur = map->hint;
368 
369 	if (cur == &map->header)
370 		cur = cur->next;
371 
372 	if (address >= cur->start) {
373 		/*
374 		 * Go from hint to end of list.
375 		 *
376 		 * But first, make a quick check to see if we are already looking
377 		 * at the entry we want (which is usually the case). Note also
378 		 * that we don't need to save the hint here... it is the same
379 		 * hint (unless we are at the header, in which case the hint
380 		 * didn't buy us anything anyway).
381 		 */
382 		last = &map->header;
383 		if ((cur != last) && (cur->end > address)) {
384 			*entry = cur;
385 			return (TRUE);
386 		}
387 	} else {
388 		/*
389 		 * Go from start to hint, *inclusively*
390 		 */
391 		last = cur->next;
392 		cur = map->header.next;
393 	}
394 
395 	/*
396 	 * Search linearly
397 	 */
398 
399 	while (cur != last) {
400 		if (cur->end > address) {
401 			if (address >= cur->start) {
402 				/*
403 				 * Save this lookup for future hints, and
404 				 * return
405 				 */
406 
407 				*entry = cur;
408 				SAVE_HINT(map, cur);
409 				return (TRUE);
410 			}
411 			break;
412 		}
413 		cur = cur->next;
414 	}
415 	*entry = cur->prev;
416 	SAVE_HINT(map, *entry);
417 	return (FALSE);
418 }
419 
420 /*
421  *	vm_map_insert:
422  *
423  *	Inserts the given whole VM object into the target
424  *	map at the specified address range.  The object's
425  *	size should match that of the address range.
426  *
427  *	Requires that the map be locked, and leaves it so.
428  *
429  *	If object is non-NULL, ref count must be bumped by caller
430  *	prior to making call to account for the new entry.
431  */
432 int
433 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
434 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
435 	      int cow)
436 {
437 	vm_map_entry_t new_entry;
438 	vm_map_entry_t prev_entry;
439 	vm_map_entry_t temp_entry;
440 	vm_eflags_t protoeflags;
441 
442 	/*
443 	 * Check that the start and end points are not bogus.
444 	 */
445 
446 	if ((start < map->min_offset) || (end > map->max_offset) ||
447 	    (start >= end))
448 		return (KERN_INVALID_ADDRESS);
449 
450 	/*
451 	 * Find the entry prior to the proposed starting address; if it's part
452 	 * of an existing entry, this range is bogus.
453 	 */
454 
455 	if (vm_map_lookup_entry(map, start, &temp_entry))
456 		return (KERN_NO_SPACE);
457 
458 	prev_entry = temp_entry;
459 
460 	/*
461 	 * Assert that the next entry doesn't overlap the end point.
462 	 */
463 
464 	if ((prev_entry->next != &map->header) &&
465 	    (prev_entry->next->start < end))
466 		return (KERN_NO_SPACE);
467 
468 	protoeflags = 0;
469 
470 	if (cow & MAP_COPY_ON_WRITE)
471 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
472 
473 	if (cow & MAP_NOFAULT) {
474 		protoeflags |= MAP_ENTRY_NOFAULT;
475 
476 		KASSERT(object == NULL,
477 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
478 	}
479 	if (cow & MAP_DISABLE_SYNCER)
480 		protoeflags |= MAP_ENTRY_NOSYNC;
481 	if (cow & MAP_DISABLE_COREDUMP)
482 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
483 
484 	if (object) {
485 		/*
486 		 * When object is non-NULL, it could be shared with another
487 		 * process.  We have to set or clear OBJ_ONEMAPPING
488 		 * appropriately.
489 		 */
490 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
491 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
492 		}
493 	}
494 	else if ((prev_entry != &map->header) &&
495 		 (prev_entry->eflags == protoeflags) &&
496 		 (prev_entry->end == start) &&
497 		 (prev_entry->wired_count == 0) &&
498 		 ((prev_entry->object.vm_object == NULL) ||
499 		  vm_object_coalesce(prev_entry->object.vm_object,
500 				     OFF_TO_IDX(prev_entry->offset),
501 				     (vm_size_t)(prev_entry->end - prev_entry->start),
502 				     (vm_size_t)(end - prev_entry->end)))) {
503 		/*
504 		 * We were able to extend the object.  Determine if we
505 		 * can extend the previous map entry to include the
506 		 * new range as well.
507 		 */
508 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
509 		    (prev_entry->protection == prot) &&
510 		    (prev_entry->max_protection == max)) {
511 			map->size += (end - prev_entry->end);
512 			prev_entry->end = end;
513 			vm_map_simplify_entry(map, prev_entry);
514 			return (KERN_SUCCESS);
515 		}
516 
517 		/*
518 		 * If we can extend the object but cannot extend the
519 		 * map entry, we have to create a new map entry.  We
520 		 * must bump the ref count on the extended object to
521 		 * account for it.  object may be NULL.
522 		 */
523 		object = prev_entry->object.vm_object;
524 		offset = prev_entry->offset +
525 			(prev_entry->end - prev_entry->start);
526 		vm_object_reference(object);
527 	}
528 
529 	/*
530 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
531 	 * in things like the buffer map where we manage kva but do not manage
532 	 * backing objects.
533 	 */
534 
535 	/*
536 	 * Create a new entry
537 	 */
538 
539 	new_entry = vm_map_entry_create(map);
540 	new_entry->start = start;
541 	new_entry->end = end;
542 
543 	new_entry->eflags = protoeflags;
544 	new_entry->object.vm_object = object;
545 	new_entry->offset = offset;
546 	new_entry->avail_ssize = 0;
547 
548 	new_entry->inheritance = VM_INHERIT_DEFAULT;
549 	new_entry->protection = prot;
550 	new_entry->max_protection = max;
551 	new_entry->wired_count = 0;
552 
553 	/*
554 	 * Insert the new entry into the list
555 	 */
556 
557 	vm_map_entry_link(map, prev_entry, new_entry);
558 	map->size += new_entry->end - new_entry->start;
559 
560 	/*
561 	 * Update the free space hint
562 	 */
563 	if ((map->first_free == prev_entry) &&
564 	    (prev_entry->end >= new_entry->start)) {
565 		map->first_free = new_entry;
566 	}
567 
568 #if 0
569 	/*
570 	 * Temporarily removed to avoid MAP_STACK panic, due to
571 	 * MAP_STACK being a huge hack.  Will be added back in
572 	 * when MAP_STACK (and the user stack mapping) is fixed.
573 	 */
574 	/*
575 	 * It may be possible to simplify the entry
576 	 */
577 	vm_map_simplify_entry(map, new_entry);
578 #endif
579 
580 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
581 		pmap_object_init_pt(map->pmap, start,
582 				    object, OFF_TO_IDX(offset), end - start,
583 				    cow & MAP_PREFAULT_PARTIAL);
584 	}
585 
586 	return (KERN_SUCCESS);
587 }
588 
589 /*
590  * Find sufficient space for `length' bytes in the given map, starting at
591  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
592  */
593 int
594 vm_map_findspace(map, start, length, addr)
595 	vm_map_t map;
596 	vm_offset_t start;
597 	vm_size_t length;
598 	vm_offset_t *addr;
599 {
600 	vm_map_entry_t entry, next;
601 	vm_offset_t end;
602 
603 	if (start < map->min_offset)
604 		start = map->min_offset;
605 	if (start > map->max_offset)
606 		return (1);
607 
608 	/*
609 	 * Look for the first possible address; if there's already something
610 	 * at this address, we have to start after it.
611 	 */
612 	if (start == map->min_offset) {
613 		if ((entry = map->first_free) != &map->header)
614 			start = entry->end;
615 	} else {
616 		vm_map_entry_t tmp;
617 
618 		if (vm_map_lookup_entry(map, start, &tmp))
619 			start = tmp->end;
620 		entry = tmp;
621 	}
622 
623 	/*
624 	 * Look through the rest of the map, trying to fit a new region in the
625 	 * gap between existing regions, or after the very last region.
626 	 */
627 	for (;; start = (entry = next)->end) {
628 		/*
629 		 * Find the end of the proposed new region.  Be sure we didn't
630 		 * go beyond the end of the map, or wrap around the address;
631 		 * if so, we lose.  Otherwise, if this is the last entry, or
632 		 * if the proposed new region fits before the next entry, we
633 		 * win.
634 		 */
635 		end = start + length;
636 		if (end > map->max_offset || end < start)
637 			return (1);
638 		next = entry->next;
639 		if (next == &map->header || next->start >= end)
640 			break;
641 	}
642 	SAVE_HINT(map, entry);
643 	*addr = start;
644 	if (map == kernel_map) {
645 		vm_offset_t ksize;
646 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
647 			pmap_growkernel(ksize);
648 		}
649 	}
650 	return (0);
651 }
652 
653 /*
654  *	vm_map_find finds an unallocated region in the target address
655  *	map with the given length.  The search is defined to be
656  *	first-fit from the specified address; the region found is
657  *	returned in the same parameter.
658  *
659  *	If object is non-NULL, ref count must be bumped by caller
660  *	prior to making call to account for the new entry.
661  */
662 int
663 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
664 	    vm_offset_t *addr,	/* IN/OUT */
665 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
666 	    vm_prot_t max, int cow)
667 {
668 	vm_offset_t start;
669 	int result, s = 0;
670 
671 	start = *addr;
672 
673 	if (map == kmem_map || map == mb_map)
674 		s = splvm();
675 
676 	vm_map_lock(map);
677 	if (find_space) {
678 		if (vm_map_findspace(map, start, length, addr)) {
679 			vm_map_unlock(map);
680 			if (map == kmem_map || map == mb_map)
681 				splx(s);
682 			return (KERN_NO_SPACE);
683 		}
684 		start = *addr;
685 	}
686 	result = vm_map_insert(map, object, offset,
687 		start, start + length, prot, max, cow);
688 	vm_map_unlock(map);
689 
690 	if (map == kmem_map || map == mb_map)
691 		splx(s);
692 
693 	return (result);
694 }
695 
696 /*
697  *	vm_map_simplify_entry:
698  *
699  *	Simplify the given map entry by merging with either neighbor.  This
700  *	routine also has the ability to merge with both neighbors.
701  *
702  *	The map must be locked.
703  *
704  *	This routine guarentees that the passed entry remains valid (though
705  *	possibly extended).  When merging, this routine may delete one or
706  *	both neighbors.
707  */
708 void
709 vm_map_simplify_entry(map, entry)
710 	vm_map_t map;
711 	vm_map_entry_t entry;
712 {
713 	vm_map_entry_t next, prev;
714 	vm_size_t prevsize, esize;
715 
716 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
717 		return;
718 
719 	prev = entry->prev;
720 	if (prev != &map->header) {
721 		prevsize = prev->end - prev->start;
722 		if ( (prev->end == entry->start) &&
723 		     (prev->object.vm_object == entry->object.vm_object) &&
724 		     (!prev->object.vm_object ||
725 			(prev->offset + prevsize == entry->offset)) &&
726 		     (prev->eflags == entry->eflags) &&
727 		     (prev->protection == entry->protection) &&
728 		     (prev->max_protection == entry->max_protection) &&
729 		     (prev->inheritance == entry->inheritance) &&
730 		     (prev->wired_count == entry->wired_count)) {
731 			if (map->first_free == prev)
732 				map->first_free = entry;
733 			if (map->hint == prev)
734 				map->hint = entry;
735 			vm_map_entry_unlink(map, prev);
736 			entry->start = prev->start;
737 			entry->offset = prev->offset;
738 			if (prev->object.vm_object)
739 				vm_object_deallocate(prev->object.vm_object);
740 			vm_map_entry_dispose(map, prev);
741 		}
742 	}
743 
744 	next = entry->next;
745 	if (next != &map->header) {
746 		esize = entry->end - entry->start;
747 		if ((entry->end == next->start) &&
748 		    (next->object.vm_object == entry->object.vm_object) &&
749 		     (!entry->object.vm_object ||
750 			(entry->offset + esize == next->offset)) &&
751 		    (next->eflags == entry->eflags) &&
752 		    (next->protection == entry->protection) &&
753 		    (next->max_protection == entry->max_protection) &&
754 		    (next->inheritance == entry->inheritance) &&
755 		    (next->wired_count == entry->wired_count)) {
756 			if (map->first_free == next)
757 				map->first_free = entry;
758 			if (map->hint == next)
759 				map->hint = entry;
760 			vm_map_entry_unlink(map, next);
761 			entry->end = next->end;
762 			if (next->object.vm_object)
763 				vm_object_deallocate(next->object.vm_object);
764 			vm_map_entry_dispose(map, next);
765 	        }
766 	}
767 }
768 /*
769  *	vm_map_clip_start:	[ internal use only ]
770  *
771  *	Asserts that the given entry begins at or after
772  *	the specified address; if necessary,
773  *	it splits the entry into two.
774  */
775 #define vm_map_clip_start(map, entry, startaddr) \
776 { \
777 	if (startaddr > entry->start) \
778 		_vm_map_clip_start(map, entry, startaddr); \
779 }
780 
781 /*
782  *	This routine is called only when it is known that
783  *	the entry must be split.
784  */
785 static void
786 _vm_map_clip_start(map, entry, start)
787 	vm_map_t map;
788 	vm_map_entry_t entry;
789 	vm_offset_t start;
790 {
791 	vm_map_entry_t new_entry;
792 
793 	/*
794 	 * Split off the front portion -- note that we must insert the new
795 	 * entry BEFORE this one, so that this entry has the specified
796 	 * starting address.
797 	 */
798 
799 	vm_map_simplify_entry(map, entry);
800 
801 	/*
802 	 * If there is no object backing this entry, we might as well create
803 	 * one now.  If we defer it, an object can get created after the map
804 	 * is clipped, and individual objects will be created for the split-up
805 	 * map.  This is a bit of a hack, but is also about the best place to
806 	 * put this improvement.
807 	 */
808 
809 	if (entry->object.vm_object == NULL && !map->system_map) {
810 		vm_object_t object;
811 		object = vm_object_allocate(OBJT_DEFAULT,
812 				atop(entry->end - entry->start));
813 		entry->object.vm_object = object;
814 		entry->offset = 0;
815 	}
816 
817 	new_entry = vm_map_entry_create(map);
818 	*new_entry = *entry;
819 
820 	new_entry->end = start;
821 	entry->offset += (start - entry->start);
822 	entry->start = start;
823 
824 	vm_map_entry_link(map, entry->prev, new_entry);
825 
826 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
827 		vm_object_reference(new_entry->object.vm_object);
828 	}
829 }
830 
831 /*
832  *	vm_map_clip_end:	[ internal use only ]
833  *
834  *	Asserts that the given entry ends at or before
835  *	the specified address; if necessary,
836  *	it splits the entry into two.
837  */
838 
839 #define vm_map_clip_end(map, entry, endaddr) \
840 { \
841 	if (endaddr < entry->end) \
842 		_vm_map_clip_end(map, entry, endaddr); \
843 }
844 
845 /*
846  *	This routine is called only when it is known that
847  *	the entry must be split.
848  */
849 static void
850 _vm_map_clip_end(map, entry, end)
851 	vm_map_t map;
852 	vm_map_entry_t entry;
853 	vm_offset_t end;
854 {
855 	vm_map_entry_t new_entry;
856 
857 	/*
858 	 * If there is no object backing this entry, we might as well create
859 	 * one now.  If we defer it, an object can get created after the map
860 	 * is clipped, and individual objects will be created for the split-up
861 	 * map.  This is a bit of a hack, but is also about the best place to
862 	 * put this improvement.
863 	 */
864 
865 	if (entry->object.vm_object == NULL && !map->system_map) {
866 		vm_object_t object;
867 		object = vm_object_allocate(OBJT_DEFAULT,
868 				atop(entry->end - entry->start));
869 		entry->object.vm_object = object;
870 		entry->offset = 0;
871 	}
872 
873 	/*
874 	 * Create a new entry and insert it AFTER the specified entry
875 	 */
876 
877 	new_entry = vm_map_entry_create(map);
878 	*new_entry = *entry;
879 
880 	new_entry->start = entry->end = end;
881 	new_entry->offset += (end - entry->start);
882 
883 	vm_map_entry_link(map, entry, new_entry);
884 
885 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
886 		vm_object_reference(new_entry->object.vm_object);
887 	}
888 }
889 
890 /*
891  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
892  *
893  *	Asserts that the starting and ending region
894  *	addresses fall within the valid range of the map.
895  */
896 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
897 		{					\
898 		if (start < vm_map_min(map))		\
899 			start = vm_map_min(map);	\
900 		if (end > vm_map_max(map))		\
901 			end = vm_map_max(map);		\
902 		if (start > end)			\
903 			start = end;			\
904 		}
905 
906 /*
907  *	vm_map_submap:		[ kernel use only ]
908  *
909  *	Mark the given range as handled by a subordinate map.
910  *
911  *	This range must have been created with vm_map_find,
912  *	and no other operations may have been performed on this
913  *	range prior to calling vm_map_submap.
914  *
915  *	Only a limited number of operations can be performed
916  *	within this rage after calling vm_map_submap:
917  *		vm_fault
918  *	[Don't try vm_map_copy!]
919  *
920  *	To remove a submapping, one must first remove the
921  *	range from the superior map, and then destroy the
922  *	submap (if desired).  [Better yet, don't try it.]
923  */
924 int
925 vm_map_submap(map, start, end, submap)
926 	vm_map_t map;
927 	vm_offset_t start;
928 	vm_offset_t end;
929 	vm_map_t submap;
930 {
931 	vm_map_entry_t entry;
932 	int result = KERN_INVALID_ARGUMENT;
933 
934 	vm_map_lock(map);
935 
936 	VM_MAP_RANGE_CHECK(map, start, end);
937 
938 	if (vm_map_lookup_entry(map, start, &entry)) {
939 		vm_map_clip_start(map, entry, start);
940 	} else
941 		entry = entry->next;
942 
943 	vm_map_clip_end(map, entry, end);
944 
945 	if ((entry->start == start) && (entry->end == end) &&
946 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
947 	    (entry->object.vm_object == NULL)) {
948 		entry->object.sub_map = submap;
949 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
950 		result = KERN_SUCCESS;
951 	}
952 	vm_map_unlock(map);
953 
954 	return (result);
955 }
956 
957 /*
958  *	vm_map_protect:
959  *
960  *	Sets the protection of the specified address
961  *	region in the target map.  If "set_max" is
962  *	specified, the maximum protection is to be set;
963  *	otherwise, only the current protection is affected.
964  */
965 int
966 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
967 	       vm_prot_t new_prot, boolean_t set_max)
968 {
969 	vm_map_entry_t current;
970 	vm_map_entry_t entry;
971 
972 	vm_map_lock(map);
973 
974 	VM_MAP_RANGE_CHECK(map, start, end);
975 
976 	if (vm_map_lookup_entry(map, start, &entry)) {
977 		vm_map_clip_start(map, entry, start);
978 	} else {
979 		entry = entry->next;
980 	}
981 
982 	/*
983 	 * Make a first pass to check for protection violations.
984 	 */
985 
986 	current = entry;
987 	while ((current != &map->header) && (current->start < end)) {
988 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
989 			vm_map_unlock(map);
990 			return (KERN_INVALID_ARGUMENT);
991 		}
992 		if ((new_prot & current->max_protection) != new_prot) {
993 			vm_map_unlock(map);
994 			return (KERN_PROTECTION_FAILURE);
995 		}
996 		current = current->next;
997 	}
998 
999 	/*
1000 	 * Go back and fix up protections. [Note that clipping is not
1001 	 * necessary the second time.]
1002 	 */
1003 
1004 	current = entry;
1005 
1006 	while ((current != &map->header) && (current->start < end)) {
1007 		vm_prot_t old_prot;
1008 
1009 		vm_map_clip_end(map, current, end);
1010 
1011 		old_prot = current->protection;
1012 		if (set_max)
1013 			current->protection =
1014 			    (current->max_protection = new_prot) &
1015 			    old_prot;
1016 		else
1017 			current->protection = new_prot;
1018 
1019 		/*
1020 		 * Update physical map if necessary. Worry about copy-on-write
1021 		 * here -- CHECK THIS XXX
1022 		 */
1023 
1024 		if (current->protection != old_prot) {
1025 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1026 							VM_PROT_ALL)
1027 
1028 			pmap_protect(map->pmap, current->start,
1029 			    current->end,
1030 			    current->protection & MASK(current));
1031 #undef	MASK
1032 		}
1033 
1034 		vm_map_simplify_entry(map, current);
1035 
1036 		current = current->next;
1037 	}
1038 
1039 	vm_map_unlock(map);
1040 	return (KERN_SUCCESS);
1041 }
1042 
1043 /*
1044  *	vm_map_madvise:
1045  *
1046  * 	This routine traverses a processes map handling the madvise
1047  *	system call.  Advisories are classified as either those effecting
1048  *	the vm_map_entry structure, or those effecting the underlying
1049  *	objects.
1050  */
1051 
1052 int
1053 vm_map_madvise(map, start, end, behav)
1054 	vm_map_t map;
1055 	vm_offset_t start, end;
1056 	int behav;
1057 {
1058 	vm_map_entry_t current, entry;
1059 	int modify_map = 0;
1060 
1061 	/*
1062 	 * Some madvise calls directly modify the vm_map_entry, in which case
1063 	 * we need to use an exclusive lock on the map and we need to perform
1064 	 * various clipping operations.  Otherwise we only need a read-lock
1065 	 * on the map.
1066 	 */
1067 
1068 	switch(behav) {
1069 	case MADV_NORMAL:
1070 	case MADV_SEQUENTIAL:
1071 	case MADV_RANDOM:
1072 	case MADV_NOSYNC:
1073 	case MADV_AUTOSYNC:
1074 	case MADV_NOCORE:
1075 	case MADV_CORE:
1076 		modify_map = 1;
1077 		vm_map_lock(map);
1078 		break;
1079 	case MADV_WILLNEED:
1080 	case MADV_DONTNEED:
1081 	case MADV_FREE:
1082 		vm_map_lock_read(map);
1083 		break;
1084 	default:
1085 		return (KERN_INVALID_ARGUMENT);
1086 	}
1087 
1088 	/*
1089 	 * Locate starting entry and clip if necessary.
1090 	 */
1091 
1092 	VM_MAP_RANGE_CHECK(map, start, end);
1093 
1094 	if (vm_map_lookup_entry(map, start, &entry)) {
1095 		if (modify_map)
1096 			vm_map_clip_start(map, entry, start);
1097 	} else {
1098 		entry = entry->next;
1099 	}
1100 
1101 	if (modify_map) {
1102 		/*
1103 		 * madvise behaviors that are implemented in the vm_map_entry.
1104 		 *
1105 		 * We clip the vm_map_entry so that behavioral changes are
1106 		 * limited to the specified address range.
1107 		 */
1108 		for (current = entry;
1109 		     (current != &map->header) && (current->start < end);
1110 		     current = current->next
1111 		) {
1112 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1113 				continue;
1114 
1115 			vm_map_clip_end(map, current, end);
1116 
1117 			switch (behav) {
1118 			case MADV_NORMAL:
1119 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1120 				break;
1121 			case MADV_SEQUENTIAL:
1122 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1123 				break;
1124 			case MADV_RANDOM:
1125 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1126 				break;
1127 			case MADV_NOSYNC:
1128 				current->eflags |= MAP_ENTRY_NOSYNC;
1129 				break;
1130 			case MADV_AUTOSYNC:
1131 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1132 				break;
1133 			case MADV_NOCORE:
1134 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1135 				break;
1136 			case MADV_CORE:
1137 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1138 				break;
1139 			default:
1140 				break;
1141 			}
1142 			vm_map_simplify_entry(map, current);
1143 		}
1144 		vm_map_unlock(map);
1145 	} else {
1146 		vm_pindex_t pindex;
1147 		int count;
1148 
1149 		/*
1150 		 * madvise behaviors that are implemented in the underlying
1151 		 * vm_object.
1152 		 *
1153 		 * Since we don't clip the vm_map_entry, we have to clip
1154 		 * the vm_object pindex and count.
1155 		 */
1156 		for (current = entry;
1157 		     (current != &map->header) && (current->start < end);
1158 		     current = current->next
1159 		) {
1160 			vm_offset_t useStart;
1161 
1162 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1163 				continue;
1164 
1165 			pindex = OFF_TO_IDX(current->offset);
1166 			count = atop(current->end - current->start);
1167 			useStart = current->start;
1168 
1169 			if (current->start < start) {
1170 				pindex += atop(start - current->start);
1171 				count -= atop(start - current->start);
1172 				useStart = start;
1173 			}
1174 			if (current->end > end)
1175 				count -= atop(current->end - end);
1176 
1177 			if (count <= 0)
1178 				continue;
1179 
1180 			vm_object_madvise(current->object.vm_object,
1181 					  pindex, count, behav);
1182 			if (behav == MADV_WILLNEED) {
1183 				pmap_object_init_pt(
1184 				    map->pmap,
1185 				    useStart,
1186 				    current->object.vm_object,
1187 				    pindex,
1188 				    (count << PAGE_SHIFT),
1189 				    0
1190 				);
1191 			}
1192 		}
1193 		vm_map_unlock_read(map);
1194 	}
1195 	return(0);
1196 }
1197 
1198 
1199 /*
1200  *	vm_map_inherit:
1201  *
1202  *	Sets the inheritance of the specified address
1203  *	range in the target map.  Inheritance
1204  *	affects how the map will be shared with
1205  *	child maps at the time of vm_map_fork.
1206  */
1207 int
1208 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1209 	       vm_inherit_t new_inheritance)
1210 {
1211 	vm_map_entry_t entry;
1212 	vm_map_entry_t temp_entry;
1213 
1214 	switch (new_inheritance) {
1215 	case VM_INHERIT_NONE:
1216 	case VM_INHERIT_COPY:
1217 	case VM_INHERIT_SHARE:
1218 		break;
1219 	default:
1220 		return (KERN_INVALID_ARGUMENT);
1221 	}
1222 
1223 	vm_map_lock(map);
1224 
1225 	VM_MAP_RANGE_CHECK(map, start, end);
1226 
1227 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1228 		entry = temp_entry;
1229 		vm_map_clip_start(map, entry, start);
1230 	} else
1231 		entry = temp_entry->next;
1232 
1233 	while ((entry != &map->header) && (entry->start < end)) {
1234 		vm_map_clip_end(map, entry, end);
1235 
1236 		entry->inheritance = new_inheritance;
1237 
1238 		vm_map_simplify_entry(map, entry);
1239 
1240 		entry = entry->next;
1241 	}
1242 
1243 	vm_map_unlock(map);
1244 	return (KERN_SUCCESS);
1245 }
1246 
1247 /*
1248  * Implement the semantics of mlock
1249  */
1250 int
1251 vm_map_user_pageable(map, start, end, new_pageable)
1252 	vm_map_t map;
1253 	vm_offset_t start;
1254 	vm_offset_t end;
1255 	boolean_t new_pageable;
1256 {
1257 	vm_map_entry_t entry;
1258 	vm_map_entry_t start_entry;
1259 	vm_offset_t estart;
1260 	int rv;
1261 
1262 	vm_map_lock(map);
1263 	VM_MAP_RANGE_CHECK(map, start, end);
1264 
1265 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1266 		vm_map_unlock(map);
1267 		return (KERN_INVALID_ADDRESS);
1268 	}
1269 
1270 	if (new_pageable) {
1271 
1272 		entry = start_entry;
1273 		vm_map_clip_start(map, entry, start);
1274 
1275 		/*
1276 		 * Now decrement the wiring count for each region. If a region
1277 		 * becomes completely unwired, unwire its physical pages and
1278 		 * mappings.
1279 		 */
1280 		while ((entry != &map->header) && (entry->start < end)) {
1281 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1282 				vm_map_clip_end(map, entry, end);
1283 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1284 				entry->wired_count--;
1285 				if (entry->wired_count == 0)
1286 					vm_fault_unwire(map, entry->start, entry->end);
1287 			}
1288 			vm_map_simplify_entry(map,entry);
1289 			entry = entry->next;
1290 		}
1291 	} else {
1292 
1293 		entry = start_entry;
1294 
1295 		while ((entry != &map->header) && (entry->start < end)) {
1296 
1297 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1298 				entry = entry->next;
1299 				continue;
1300 			}
1301 
1302 			if (entry->wired_count != 0) {
1303 				entry->wired_count++;
1304 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1305 				entry = entry->next;
1306 				continue;
1307 			}
1308 
1309 			/* Here on entry being newly wired */
1310 
1311 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1312 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1313 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1314 
1315 					vm_object_shadow(&entry->object.vm_object,
1316 					    &entry->offset,
1317 					    atop(entry->end - entry->start));
1318 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1319 
1320 				} else if (entry->object.vm_object == NULL &&
1321 					   !map->system_map) {
1322 
1323 					entry->object.vm_object =
1324 					    vm_object_allocate(OBJT_DEFAULT,
1325 						atop(entry->end - entry->start));
1326 					entry->offset = (vm_offset_t) 0;
1327 
1328 				}
1329 			}
1330 
1331 			vm_map_clip_start(map, entry, start);
1332 			vm_map_clip_end(map, entry, end);
1333 
1334 			entry->wired_count++;
1335 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1336 			estart = entry->start;
1337 
1338 			/* First we need to allow map modifications */
1339 			vm_map_set_recursive(map);
1340 			vm_map_lock_downgrade(map);
1341 			map->timestamp++;
1342 
1343 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1344 			if (rv) {
1345 
1346 				entry->wired_count--;
1347 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1348 
1349 				vm_map_clear_recursive(map);
1350 				vm_map_unlock(map);
1351 
1352 				(void) vm_map_user_pageable(map, start, entry->start, TRUE);
1353 				return rv;
1354 			}
1355 
1356 			vm_map_clear_recursive(map);
1357 			if (vm_map_lock_upgrade(map)) {
1358 				vm_map_lock(map);
1359 				if (vm_map_lookup_entry(map, estart, &entry)
1360 				    == FALSE) {
1361 					vm_map_unlock(map);
1362 					(void) vm_map_user_pageable(map,
1363 								    start,
1364 								    estart,
1365 								    TRUE);
1366 					return (KERN_INVALID_ADDRESS);
1367 				}
1368 			}
1369 			vm_map_simplify_entry(map,entry);
1370 		}
1371 	}
1372 	map->timestamp++;
1373 	vm_map_unlock(map);
1374 	return KERN_SUCCESS;
1375 }
1376 
1377 /*
1378  *	vm_map_pageable:
1379  *
1380  *	Sets the pageability of the specified address
1381  *	range in the target map.  Regions specified
1382  *	as not pageable require locked-down physical
1383  *	memory and physical page maps.
1384  *
1385  *	The map must not be locked, but a reference
1386  *	must remain to the map throughout the call.
1387  */
1388 int
1389 vm_map_pageable(map, start, end, new_pageable)
1390 	vm_map_t map;
1391 	vm_offset_t start;
1392 	vm_offset_t end;
1393 	boolean_t new_pageable;
1394 {
1395 	vm_map_entry_t entry;
1396 	vm_map_entry_t start_entry;
1397 	vm_offset_t failed = 0;
1398 	int rv;
1399 
1400 	vm_map_lock(map);
1401 
1402 	VM_MAP_RANGE_CHECK(map, start, end);
1403 
1404 	/*
1405 	 * Only one pageability change may take place at one time, since
1406 	 * vm_fault assumes it will be called only once for each
1407 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1408 	 * changing the pageability for the entire region.  We do so before
1409 	 * making any changes.
1410 	 */
1411 
1412 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1413 		vm_map_unlock(map);
1414 		return (KERN_INVALID_ADDRESS);
1415 	}
1416 	entry = start_entry;
1417 
1418 	/*
1419 	 * Actions are rather different for wiring and unwiring, so we have
1420 	 * two separate cases.
1421 	 */
1422 
1423 	if (new_pageable) {
1424 
1425 		vm_map_clip_start(map, entry, start);
1426 
1427 		/*
1428 		 * Unwiring.  First ensure that the range to be unwired is
1429 		 * really wired down and that there are no holes.
1430 		 */
1431 		while ((entry != &map->header) && (entry->start < end)) {
1432 
1433 			if (entry->wired_count == 0 ||
1434 			    (entry->end < end &&
1435 				(entry->next == &map->header ||
1436 				    entry->next->start > entry->end))) {
1437 				vm_map_unlock(map);
1438 				return (KERN_INVALID_ARGUMENT);
1439 			}
1440 			entry = entry->next;
1441 		}
1442 
1443 		/*
1444 		 * Now decrement the wiring count for each region. If a region
1445 		 * becomes completely unwired, unwire its physical pages and
1446 		 * mappings.
1447 		 */
1448 		entry = start_entry;
1449 		while ((entry != &map->header) && (entry->start < end)) {
1450 			vm_map_clip_end(map, entry, end);
1451 
1452 			entry->wired_count--;
1453 			if (entry->wired_count == 0)
1454 				vm_fault_unwire(map, entry->start, entry->end);
1455 
1456 			vm_map_simplify_entry(map, entry);
1457 
1458 			entry = entry->next;
1459 		}
1460 	} else {
1461 		/*
1462 		 * Wiring.  We must do this in two passes:
1463 		 *
1464 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1465 		 * objects that need to be created. Then we clip each map
1466 		 * entry to the region to be wired and increment its wiring
1467 		 * count.  We create objects before clipping the map entries
1468 		 * to avoid object proliferation.
1469 		 *
1470 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1471 		 * fault in the pages for any newly wired area (wired_count is
1472 		 * 1).
1473 		 *
1474 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1475 		 * deadlock with another process that may have faulted on one
1476 		 * of the pages to be wired (it would mark the page busy,
1477 		 * blocking us, then in turn block on the map lock that we
1478 		 * hold).  Because of problems in the recursive lock package,
1479 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1480 		 * any actions that require the write lock must be done
1481 		 * beforehand.  Because we keep the read lock on the map, the
1482 		 * copy-on-write status of the entries we modify here cannot
1483 		 * change.
1484 		 */
1485 
1486 		/*
1487 		 * Pass 1.
1488 		 */
1489 		while ((entry != &map->header) && (entry->start < end)) {
1490 			if (entry->wired_count == 0) {
1491 
1492 				/*
1493 				 * Perform actions of vm_map_lookup that need
1494 				 * the write lock on the map: create a shadow
1495 				 * object for a copy-on-write region, or an
1496 				 * object for a zero-fill region.
1497 				 *
1498 				 * We don't have to do this for entries that
1499 				 * point to sub maps, because we won't
1500 				 * hold the lock on the sub map.
1501 				 */
1502 				if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1503 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1504 					if (copyflag &&
1505 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1506 
1507 						vm_object_shadow(&entry->object.vm_object,
1508 						    &entry->offset,
1509 						    atop(entry->end - entry->start));
1510 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1511 					} else if (entry->object.vm_object == NULL &&
1512 						   !map->system_map) {
1513 						entry->object.vm_object =
1514 						    vm_object_allocate(OBJT_DEFAULT,
1515 							atop(entry->end - entry->start));
1516 						entry->offset = (vm_offset_t) 0;
1517 					}
1518 				}
1519 			}
1520 			vm_map_clip_start(map, entry, start);
1521 			vm_map_clip_end(map, entry, end);
1522 			entry->wired_count++;
1523 
1524 			/*
1525 			 * Check for holes
1526 			 */
1527 			if (entry->end < end &&
1528 			    (entry->next == &map->header ||
1529 				entry->next->start > entry->end)) {
1530 				/*
1531 				 * Found one.  Object creation actions do not
1532 				 * need to be undone, but the wired counts
1533 				 * need to be restored.
1534 				 */
1535 				while (entry != &map->header && entry->end > start) {
1536 					entry->wired_count--;
1537 					entry = entry->prev;
1538 				}
1539 				vm_map_unlock(map);
1540 				return (KERN_INVALID_ARGUMENT);
1541 			}
1542 			entry = entry->next;
1543 		}
1544 
1545 		/*
1546 		 * Pass 2.
1547 		 */
1548 
1549 		/*
1550 		 * HACK HACK HACK HACK
1551 		 *
1552 		 * If we are wiring in the kernel map or a submap of it,
1553 		 * unlock the map to avoid deadlocks.  We trust that the
1554 		 * kernel is well-behaved, and therefore will not do
1555 		 * anything destructive to this region of the map while
1556 		 * we have it unlocked.  We cannot trust user processes
1557 		 * to do the same.
1558 		 *
1559 		 * HACK HACK HACK HACK
1560 		 */
1561 		if (vm_map_pmap(map) == kernel_pmap) {
1562 			vm_map_unlock(map);	/* trust me ... */
1563 		} else {
1564 			vm_map_lock_downgrade(map);
1565 		}
1566 
1567 		rv = 0;
1568 		entry = start_entry;
1569 		while (entry != &map->header && entry->start < end) {
1570 			/*
1571 			 * If vm_fault_wire fails for any page we need to undo
1572 			 * what has been done.  We decrement the wiring count
1573 			 * for those pages which have not yet been wired (now)
1574 			 * and unwire those that have (later).
1575 			 *
1576 			 * XXX this violates the locking protocol on the map,
1577 			 * needs to be fixed.
1578 			 */
1579 			if (rv)
1580 				entry->wired_count--;
1581 			else if (entry->wired_count == 1) {
1582 				rv = vm_fault_wire(map, entry->start, entry->end);
1583 				if (rv) {
1584 					failed = entry->start;
1585 					entry->wired_count--;
1586 				}
1587 			}
1588 			entry = entry->next;
1589 		}
1590 
1591 		if (vm_map_pmap(map) == kernel_pmap) {
1592 			vm_map_lock(map);
1593 		}
1594 		if (rv) {
1595 			vm_map_unlock(map);
1596 			(void) vm_map_pageable(map, start, failed, TRUE);
1597 			return (rv);
1598 		}
1599 		vm_map_simplify_entry(map, start_entry);
1600 	}
1601 
1602 	vm_map_unlock(map);
1603 
1604 	return (KERN_SUCCESS);
1605 }
1606 
1607 /*
1608  * vm_map_clean
1609  *
1610  * Push any dirty cached pages in the address range to their pager.
1611  * If syncio is TRUE, dirty pages are written synchronously.
1612  * If invalidate is TRUE, any cached pages are freed as well.
1613  *
1614  * Returns an error if any part of the specified range is not mapped.
1615  */
1616 int
1617 vm_map_clean(map, start, end, syncio, invalidate)
1618 	vm_map_t map;
1619 	vm_offset_t start;
1620 	vm_offset_t end;
1621 	boolean_t syncio;
1622 	boolean_t invalidate;
1623 {
1624 	vm_map_entry_t current;
1625 	vm_map_entry_t entry;
1626 	vm_size_t size;
1627 	vm_object_t object;
1628 	vm_ooffset_t offset;
1629 
1630 	vm_map_lock_read(map);
1631 	VM_MAP_RANGE_CHECK(map, start, end);
1632 	if (!vm_map_lookup_entry(map, start, &entry)) {
1633 		vm_map_unlock_read(map);
1634 		return (KERN_INVALID_ADDRESS);
1635 	}
1636 	/*
1637 	 * Make a first pass to check for holes.
1638 	 */
1639 	for (current = entry; current->start < end; current = current->next) {
1640 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1641 			vm_map_unlock_read(map);
1642 			return (KERN_INVALID_ARGUMENT);
1643 		}
1644 		if (end > current->end &&
1645 		    (current->next == &map->header ||
1646 			current->end != current->next->start)) {
1647 			vm_map_unlock_read(map);
1648 			return (KERN_INVALID_ADDRESS);
1649 		}
1650 	}
1651 
1652 	if (invalidate)
1653 		pmap_remove(vm_map_pmap(map), start, end);
1654 	/*
1655 	 * Make a second pass, cleaning/uncaching pages from the indicated
1656 	 * objects as we go.
1657 	 */
1658 	for (current = entry; current->start < end; current = current->next) {
1659 		offset = current->offset + (start - current->start);
1660 		size = (end <= current->end ? end : current->end) - start;
1661 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1662 			vm_map_t smap;
1663 			vm_map_entry_t tentry;
1664 			vm_size_t tsize;
1665 
1666 			smap = current->object.sub_map;
1667 			vm_map_lock_read(smap);
1668 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1669 			tsize = tentry->end - offset;
1670 			if (tsize < size)
1671 				size = tsize;
1672 			object = tentry->object.vm_object;
1673 			offset = tentry->offset + (offset - tentry->start);
1674 			vm_map_unlock_read(smap);
1675 		} else {
1676 			object = current->object.vm_object;
1677 		}
1678 		/*
1679 		 * Note that there is absolutely no sense in writing out
1680 		 * anonymous objects, so we track down the vnode object
1681 		 * to write out.
1682 		 * We invalidate (remove) all pages from the address space
1683 		 * anyway, for semantic correctness.
1684 		 */
1685 		while (object->backing_object) {
1686 			object = object->backing_object;
1687 			offset += object->backing_object_offset;
1688 			if (object->size < OFF_TO_IDX( offset + size))
1689 				size = IDX_TO_OFF(object->size) - offset;
1690 		}
1691 		if (object && (object->type == OBJT_VNODE) &&
1692 		    (current->protection & VM_PROT_WRITE)) {
1693 			/*
1694 			 * Flush pages if writing is allowed, invalidate them
1695 			 * if invalidation requested.  Pages undergoing I/O
1696 			 * will be ignored by vm_object_page_remove().
1697 			 *
1698 			 * We cannot lock the vnode and then wait for paging
1699 			 * to complete without deadlocking against vm_fault.
1700 			 * Instead we simply call vm_object_page_remove() and
1701 			 * allow it to block internally on a page-by-page
1702 			 * basis when it encounters pages undergoing async
1703 			 * I/O.
1704 			 */
1705 			int flags;
1706 
1707 			vm_object_reference(object);
1708 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1709 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1710 			flags |= invalidate ? OBJPC_INVAL : 0;
1711 			vm_object_page_clean(object,
1712 			    OFF_TO_IDX(offset),
1713 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1714 			    flags);
1715 			if (invalidate) {
1716 				/*vm_object_pip_wait(object, "objmcl");*/
1717 				vm_object_page_remove(object,
1718 				    OFF_TO_IDX(offset),
1719 				    OFF_TO_IDX(offset + size + PAGE_MASK),
1720 				    FALSE);
1721 			}
1722 			VOP_UNLOCK(object->handle, 0, curproc);
1723 			vm_object_deallocate(object);
1724 		}
1725 		start += size;
1726 	}
1727 
1728 	vm_map_unlock_read(map);
1729 	return (KERN_SUCCESS);
1730 }
1731 
1732 /*
1733  *	vm_map_entry_unwire:	[ internal use only ]
1734  *
1735  *	Make the region specified by this entry pageable.
1736  *
1737  *	The map in question should be locked.
1738  *	[This is the reason for this routine's existence.]
1739  */
1740 static void
1741 vm_map_entry_unwire(map, entry)
1742 	vm_map_t map;
1743 	vm_map_entry_t entry;
1744 {
1745 	vm_fault_unwire(map, entry->start, entry->end);
1746 	entry->wired_count = 0;
1747 }
1748 
1749 /*
1750  *	vm_map_entry_delete:	[ internal use only ]
1751  *
1752  *	Deallocate the given entry from the target map.
1753  */
1754 static void
1755 vm_map_entry_delete(map, entry)
1756 	vm_map_t map;
1757 	vm_map_entry_t entry;
1758 {
1759 	vm_map_entry_unlink(map, entry);
1760 	map->size -= entry->end - entry->start;
1761 
1762 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1763 		vm_object_deallocate(entry->object.vm_object);
1764 	}
1765 
1766 	vm_map_entry_dispose(map, entry);
1767 }
1768 
1769 /*
1770  *	vm_map_delete:	[ internal use only ]
1771  *
1772  *	Deallocates the given address range from the target
1773  *	map.
1774  */
1775 int
1776 vm_map_delete(map, start, end)
1777 	vm_map_t map;
1778 	vm_offset_t start;
1779 	vm_offset_t end;
1780 {
1781 	vm_object_t object;
1782 	vm_map_entry_t entry;
1783 	vm_map_entry_t first_entry;
1784 
1785 	/*
1786 	 * Find the start of the region, and clip it
1787 	 */
1788 
1789 	if (!vm_map_lookup_entry(map, start, &first_entry))
1790 		entry = first_entry->next;
1791 	else {
1792 		entry = first_entry;
1793 		vm_map_clip_start(map, entry, start);
1794 		/*
1795 		 * Fix the lookup hint now, rather than each time though the
1796 		 * loop.
1797 		 */
1798 		SAVE_HINT(map, entry->prev);
1799 	}
1800 
1801 	/*
1802 	 * Save the free space hint
1803 	 */
1804 
1805 	if (entry == &map->header) {
1806 		map->first_free = &map->header;
1807 	} else if (map->first_free->start >= start) {
1808 		map->first_free = entry->prev;
1809 	}
1810 
1811 	/*
1812 	 * Step through all entries in this region
1813 	 */
1814 
1815 	while ((entry != &map->header) && (entry->start < end)) {
1816 		vm_map_entry_t next;
1817 		vm_offset_t s, e;
1818 		vm_pindex_t offidxstart, offidxend, count;
1819 
1820 		vm_map_clip_end(map, entry, end);
1821 
1822 		s = entry->start;
1823 		e = entry->end;
1824 		next = entry->next;
1825 
1826 		offidxstart = OFF_TO_IDX(entry->offset);
1827 		count = OFF_TO_IDX(e - s);
1828 		object = entry->object.vm_object;
1829 
1830 		/*
1831 		 * Unwire before removing addresses from the pmap; otherwise,
1832 		 * unwiring will put the entries back in the pmap.
1833 		 */
1834 		if (entry->wired_count != 0) {
1835 			vm_map_entry_unwire(map, entry);
1836 		}
1837 
1838 		offidxend = offidxstart + count;
1839 
1840 		if ((object == kernel_object) || (object == kmem_object)) {
1841 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1842 		} else {
1843 			pmap_remove(map->pmap, s, e);
1844 			if (object != NULL &&
1845 			    object->ref_count != 1 &&
1846 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
1847 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1848 				vm_object_collapse(object);
1849 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1850 				if (object->type == OBJT_SWAP) {
1851 					swap_pager_freespace(object, offidxstart, count);
1852 				}
1853 				if (offidxend >= object->size &&
1854 				    offidxstart < object->size) {
1855 					object->size = offidxstart;
1856 				}
1857 			}
1858 		}
1859 
1860 		/*
1861 		 * Delete the entry (which may delete the object) only after
1862 		 * removing all pmap entries pointing to its pages.
1863 		 * (Otherwise, its page frames may be reallocated, and any
1864 		 * modify bits will be set in the wrong object!)
1865 		 */
1866 		vm_map_entry_delete(map, entry);
1867 		entry = next;
1868 	}
1869 	return (KERN_SUCCESS);
1870 }
1871 
1872 /*
1873  *	vm_map_remove:
1874  *
1875  *	Remove the given address range from the target map.
1876  *	This is the exported form of vm_map_delete.
1877  */
1878 int
1879 vm_map_remove(map, start, end)
1880 	vm_map_t map;
1881 	vm_offset_t start;
1882 	vm_offset_t end;
1883 {
1884 	int result, s = 0;
1885 
1886 	if (map == kmem_map || map == mb_map)
1887 		s = splvm();
1888 
1889 	vm_map_lock(map);
1890 	VM_MAP_RANGE_CHECK(map, start, end);
1891 	result = vm_map_delete(map, start, end);
1892 	vm_map_unlock(map);
1893 
1894 	if (map == kmem_map || map == mb_map)
1895 		splx(s);
1896 
1897 	return (result);
1898 }
1899 
1900 /*
1901  *	vm_map_check_protection:
1902  *
1903  *	Assert that the target map allows the specified
1904  *	privilege on the entire address region given.
1905  *	The entire region must be allocated.
1906  */
1907 boolean_t
1908 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
1909 			vm_prot_t protection)
1910 {
1911 	vm_map_entry_t entry;
1912 	vm_map_entry_t tmp_entry;
1913 
1914 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1915 		return (FALSE);
1916 	}
1917 	entry = tmp_entry;
1918 
1919 	while (start < end) {
1920 		if (entry == &map->header) {
1921 			return (FALSE);
1922 		}
1923 		/*
1924 		 * No holes allowed!
1925 		 */
1926 
1927 		if (start < entry->start) {
1928 			return (FALSE);
1929 		}
1930 		/*
1931 		 * Check protection associated with entry.
1932 		 */
1933 
1934 		if ((entry->protection & protection) != protection) {
1935 			return (FALSE);
1936 		}
1937 		/* go to next entry */
1938 
1939 		start = entry->end;
1940 		entry = entry->next;
1941 	}
1942 	return (TRUE);
1943 }
1944 
1945 /*
1946  * Split the pages in a map entry into a new object.  This affords
1947  * easier removal of unused pages, and keeps object inheritance from
1948  * being a negative impact on memory usage.
1949  */
1950 static void
1951 vm_map_split(entry)
1952 	vm_map_entry_t entry;
1953 {
1954 	vm_page_t m;
1955 	vm_object_t orig_object, new_object, source;
1956 	vm_offset_t s, e;
1957 	vm_pindex_t offidxstart, offidxend, idx;
1958 	vm_size_t size;
1959 	vm_ooffset_t offset;
1960 
1961 	orig_object = entry->object.vm_object;
1962 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1963 		return;
1964 	if (orig_object->ref_count <= 1)
1965 		return;
1966 
1967 	offset = entry->offset;
1968 	s = entry->start;
1969 	e = entry->end;
1970 
1971 	offidxstart = OFF_TO_IDX(offset);
1972 	offidxend = offidxstart + OFF_TO_IDX(e - s);
1973 	size = offidxend - offidxstart;
1974 
1975 	new_object = vm_pager_allocate(orig_object->type,
1976 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
1977 	if (new_object == NULL)
1978 		return;
1979 
1980 	source = orig_object->backing_object;
1981 	if (source != NULL) {
1982 		vm_object_reference(source);	/* Referenced by new_object */
1983 		TAILQ_INSERT_TAIL(&source->shadow_head,
1984 				  new_object, shadow_list);
1985 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
1986 		new_object->backing_object_offset =
1987 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
1988 		new_object->backing_object = source;
1989 		source->shadow_count++;
1990 		source->generation++;
1991 	}
1992 
1993 	for (idx = 0; idx < size; idx++) {
1994 		vm_page_t m;
1995 
1996 	retry:
1997 		m = vm_page_lookup(orig_object, offidxstart + idx);
1998 		if (m == NULL)
1999 			continue;
2000 
2001 		/*
2002 		 * We must wait for pending I/O to complete before we can
2003 		 * rename the page.
2004 		 *
2005 		 * We do not have to VM_PROT_NONE the page as mappings should
2006 		 * not be changed by this operation.
2007 		 */
2008 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2009 			goto retry;
2010 
2011 		vm_page_busy(m);
2012 		vm_page_rename(m, new_object, idx);
2013 		/* page automatically made dirty by rename and cache handled */
2014 		vm_page_busy(m);
2015 	}
2016 
2017 	if (orig_object->type == OBJT_SWAP) {
2018 		vm_object_pip_add(orig_object, 1);
2019 		/*
2020 		 * copy orig_object pages into new_object
2021 		 * and destroy unneeded pages in
2022 		 * shadow object.
2023 		 */
2024 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
2025 		vm_object_pip_wakeup(orig_object);
2026 	}
2027 
2028 	for (idx = 0; idx < size; idx++) {
2029 		m = vm_page_lookup(new_object, idx);
2030 		if (m) {
2031 			vm_page_wakeup(m);
2032 		}
2033 	}
2034 
2035 	entry->object.vm_object = new_object;
2036 	entry->offset = 0LL;
2037 	vm_object_deallocate(orig_object);
2038 }
2039 
2040 /*
2041  *	vm_map_copy_entry:
2042  *
2043  *	Copies the contents of the source entry to the destination
2044  *	entry.  The entries *must* be aligned properly.
2045  */
2046 static void
2047 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
2048 	vm_map_t src_map, dst_map;
2049 	vm_map_entry_t src_entry, dst_entry;
2050 {
2051 	vm_object_t src_object;
2052 
2053 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2054 		return;
2055 
2056 	if (src_entry->wired_count == 0) {
2057 
2058 		/*
2059 		 * If the source entry is marked needs_copy, it is already
2060 		 * write-protected.
2061 		 */
2062 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2063 			pmap_protect(src_map->pmap,
2064 			    src_entry->start,
2065 			    src_entry->end,
2066 			    src_entry->protection & ~VM_PROT_WRITE);
2067 		}
2068 
2069 		/*
2070 		 * Make a copy of the object.
2071 		 */
2072 		if ((src_object = src_entry->object.vm_object) != NULL) {
2073 
2074 			if ((src_object->handle == NULL) &&
2075 				(src_object->type == OBJT_DEFAULT ||
2076 				 src_object->type == OBJT_SWAP)) {
2077 				vm_object_collapse(src_object);
2078 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2079 					vm_map_split(src_entry);
2080 					src_object = src_entry->object.vm_object;
2081 				}
2082 			}
2083 
2084 			vm_object_reference(src_object);
2085 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2086 			dst_entry->object.vm_object = src_object;
2087 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2088 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2089 			dst_entry->offset = src_entry->offset;
2090 		} else {
2091 			dst_entry->object.vm_object = NULL;
2092 			dst_entry->offset = 0;
2093 		}
2094 
2095 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2096 		    dst_entry->end - dst_entry->start, src_entry->start);
2097 	} else {
2098 		/*
2099 		 * Of course, wired down pages can't be set copy-on-write.
2100 		 * Cause wired pages to be copied into the new map by
2101 		 * simulating faults (the new pages are pageable)
2102 		 */
2103 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2104 	}
2105 }
2106 
2107 /*
2108  * vmspace_fork:
2109  * Create a new process vmspace structure and vm_map
2110  * based on those of an existing process.  The new map
2111  * is based on the old map, according to the inheritance
2112  * values on the regions in that map.
2113  *
2114  * The source map must not be locked.
2115  */
2116 struct vmspace *
2117 vmspace_fork(vm1)
2118 	struct vmspace *vm1;
2119 {
2120 	struct vmspace *vm2;
2121 	vm_map_t old_map = &vm1->vm_map;
2122 	vm_map_t new_map;
2123 	vm_map_entry_t old_entry;
2124 	vm_map_entry_t new_entry;
2125 	vm_object_t object;
2126 
2127 	vm_map_lock(old_map);
2128 	old_map->infork = 1;
2129 
2130 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2131 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2132 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2133 	new_map = &vm2->vm_map;	/* XXX */
2134 	new_map->timestamp = 1;
2135 
2136 	old_entry = old_map->header.next;
2137 
2138 	while (old_entry != &old_map->header) {
2139 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2140 			panic("vm_map_fork: encountered a submap");
2141 
2142 		switch (old_entry->inheritance) {
2143 		case VM_INHERIT_NONE:
2144 			break;
2145 
2146 		case VM_INHERIT_SHARE:
2147 			/*
2148 			 * Clone the entry, creating the shared object if necessary.
2149 			 */
2150 			object = old_entry->object.vm_object;
2151 			if (object == NULL) {
2152 				object = vm_object_allocate(OBJT_DEFAULT,
2153 					atop(old_entry->end - old_entry->start));
2154 				old_entry->object.vm_object = object;
2155 				old_entry->offset = (vm_offset_t) 0;
2156 			}
2157 
2158 			/*
2159 			 * Add the reference before calling vm_object_shadow
2160 			 * to insure that a shadow object is created.
2161 			 */
2162 			vm_object_reference(object);
2163 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2164 				vm_object_shadow(&old_entry->object.vm_object,
2165 					&old_entry->offset,
2166 					atop(old_entry->end - old_entry->start));
2167 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2168 				/* Transfer the second reference too. */
2169 				vm_object_reference(
2170 				    old_entry->object.vm_object);
2171 				vm_object_deallocate(object);
2172 				object = old_entry->object.vm_object;
2173 			}
2174 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2175 
2176 			/*
2177 			 * Clone the entry, referencing the shared object.
2178 			 */
2179 			new_entry = vm_map_entry_create(new_map);
2180 			*new_entry = *old_entry;
2181 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2182 			new_entry->wired_count = 0;
2183 
2184 			/*
2185 			 * Insert the entry into the new map -- we know we're
2186 			 * inserting at the end of the new map.
2187 			 */
2188 
2189 			vm_map_entry_link(new_map, new_map->header.prev,
2190 			    new_entry);
2191 
2192 			/*
2193 			 * Update the physical map
2194 			 */
2195 
2196 			pmap_copy(new_map->pmap, old_map->pmap,
2197 			    new_entry->start,
2198 			    (old_entry->end - old_entry->start),
2199 			    old_entry->start);
2200 			break;
2201 
2202 		case VM_INHERIT_COPY:
2203 			/*
2204 			 * Clone the entry and link into the map.
2205 			 */
2206 			new_entry = vm_map_entry_create(new_map);
2207 			*new_entry = *old_entry;
2208 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2209 			new_entry->wired_count = 0;
2210 			new_entry->object.vm_object = NULL;
2211 			vm_map_entry_link(new_map, new_map->header.prev,
2212 			    new_entry);
2213 			vm_map_copy_entry(old_map, new_map, old_entry,
2214 			    new_entry);
2215 			break;
2216 		}
2217 		old_entry = old_entry->next;
2218 	}
2219 
2220 	new_map->size = old_map->size;
2221 	old_map->infork = 0;
2222 	vm_map_unlock(old_map);
2223 
2224 	return (vm2);
2225 }
2226 
2227 int
2228 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2229 	      vm_prot_t prot, vm_prot_t max, int cow)
2230 {
2231 	vm_map_entry_t prev_entry;
2232 	vm_map_entry_t new_stack_entry;
2233 	vm_size_t      init_ssize;
2234 	int            rv;
2235 
2236 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2237 		return (KERN_NO_SPACE);
2238 
2239 	if (max_ssize < SGROWSIZ)
2240 		init_ssize = max_ssize;
2241 	else
2242 		init_ssize = SGROWSIZ;
2243 
2244 	vm_map_lock(map);
2245 
2246 	/* If addr is already mapped, no go */
2247 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2248 		vm_map_unlock(map);
2249 		return (KERN_NO_SPACE);
2250 	}
2251 
2252 	/* If we can't accomodate max_ssize in the current mapping,
2253 	 * no go.  However, we need to be aware that subsequent user
2254 	 * mappings might map into the space we have reserved for
2255 	 * stack, and currently this space is not protected.
2256 	 *
2257 	 * Hopefully we will at least detect this condition
2258 	 * when we try to grow the stack.
2259 	 */
2260 	if ((prev_entry->next != &map->header) &&
2261 	    (prev_entry->next->start < addrbos + max_ssize)) {
2262 		vm_map_unlock(map);
2263 		return (KERN_NO_SPACE);
2264 	}
2265 
2266 	/* We initially map a stack of only init_ssize.  We will
2267 	 * grow as needed later.  Since this is to be a grow
2268 	 * down stack, we map at the top of the range.
2269 	 *
2270 	 * Note: we would normally expect prot and max to be
2271 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2272 	 * eliminate these as input parameters, and just
2273 	 * pass these values here in the insert call.
2274 	 */
2275 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2276 	                   addrbos + max_ssize, prot, max, cow);
2277 
2278 	/* Now set the avail_ssize amount */
2279 	if (rv == KERN_SUCCESS){
2280 		if (prev_entry != &map->header)
2281 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2282 		new_stack_entry = prev_entry->next;
2283 		if (new_stack_entry->end   != addrbos + max_ssize ||
2284 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2285 			panic ("Bad entry start/end for new stack entry");
2286 		else
2287 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2288 	}
2289 
2290 	vm_map_unlock(map);
2291 	return (rv);
2292 }
2293 
2294 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2295  * desired address is already mapped, or if we successfully grow
2296  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2297  * stack range (this is strange, but preserves compatibility with
2298  * the grow function in vm_machdep.c).
2299  */
2300 int
2301 vm_map_growstack (struct proc *p, vm_offset_t addr)
2302 {
2303 	vm_map_entry_t prev_entry;
2304 	vm_map_entry_t stack_entry;
2305 	vm_map_entry_t new_stack_entry;
2306 	struct vmspace *vm = p->p_vmspace;
2307 	vm_map_t map = &vm->vm_map;
2308 	vm_offset_t    end;
2309 	int      grow_amount;
2310 	int      rv;
2311 	int      is_procstack;
2312 Retry:
2313 	vm_map_lock_read(map);
2314 
2315 	/* If addr is already in the entry range, no need to grow.*/
2316 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2317 		vm_map_unlock_read(map);
2318 		return (KERN_SUCCESS);
2319 	}
2320 
2321 	if ((stack_entry = prev_entry->next) == &map->header) {
2322 		vm_map_unlock_read(map);
2323 		return (KERN_SUCCESS);
2324 	}
2325 	if (prev_entry == &map->header)
2326 		end = stack_entry->start - stack_entry->avail_ssize;
2327 	else
2328 		end = prev_entry->end;
2329 
2330 	/* This next test mimics the old grow function in vm_machdep.c.
2331 	 * It really doesn't quite make sense, but we do it anyway
2332 	 * for compatibility.
2333 	 *
2334 	 * If not growable stack, return success.  This signals the
2335 	 * caller to proceed as he would normally with normal vm.
2336 	 */
2337 	if (stack_entry->avail_ssize < 1 ||
2338 	    addr >= stack_entry->start ||
2339 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2340 		vm_map_unlock_read(map);
2341 		return (KERN_SUCCESS);
2342 	}
2343 
2344 	/* Find the minimum grow amount */
2345 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2346 	if (grow_amount > stack_entry->avail_ssize) {
2347 		vm_map_unlock_read(map);
2348 		return (KERN_NO_SPACE);
2349 	}
2350 
2351 	/* If there is no longer enough space between the entries
2352 	 * nogo, and adjust the available space.  Note: this
2353 	 * should only happen if the user has mapped into the
2354 	 * stack area after the stack was created, and is
2355 	 * probably an error.
2356 	 *
2357 	 * This also effectively destroys any guard page the user
2358 	 * might have intended by limiting the stack size.
2359 	 */
2360 	if (grow_amount > stack_entry->start - end) {
2361 		if (vm_map_lock_upgrade(map))
2362 			goto Retry;
2363 
2364 		stack_entry->avail_ssize = stack_entry->start - end;
2365 
2366 		vm_map_unlock(map);
2367 		return (KERN_NO_SPACE);
2368 	}
2369 
2370 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2371 
2372 	/* If this is the main process stack, see if we're over the
2373 	 * stack limit.
2374 	 */
2375 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2376 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2377 		vm_map_unlock_read(map);
2378 		return (KERN_NO_SPACE);
2379 	}
2380 
2381 	/* Round up the grow amount modulo SGROWSIZ */
2382 	grow_amount = roundup (grow_amount, SGROWSIZ);
2383 	if (grow_amount > stack_entry->avail_ssize) {
2384 		grow_amount = stack_entry->avail_ssize;
2385 	}
2386 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2387 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2388 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2389 		              ctob(vm->vm_ssize);
2390 	}
2391 
2392 	if (vm_map_lock_upgrade(map))
2393 		goto Retry;
2394 
2395 	/* Get the preliminary new entry start value */
2396 	addr = stack_entry->start - grow_amount;
2397 
2398 	/* If this puts us into the previous entry, cut back our growth
2399 	 * to the available space.  Also, see the note above.
2400 	 */
2401 	if (addr < end) {
2402 		stack_entry->avail_ssize = stack_entry->start - end;
2403 		addr = end;
2404 	}
2405 
2406 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2407 			   VM_PROT_ALL,
2408 			   VM_PROT_ALL,
2409 			   0);
2410 
2411 	/* Adjust the available stack space by the amount we grew. */
2412 	if (rv == KERN_SUCCESS) {
2413 		if (prev_entry != &map->header)
2414 			vm_map_clip_end(map, prev_entry, addr);
2415 		new_stack_entry = prev_entry->next;
2416 		if (new_stack_entry->end   != stack_entry->start  ||
2417 		    new_stack_entry->start != addr)
2418 			panic ("Bad stack grow start/end in new stack entry");
2419 		else {
2420 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2421 							(new_stack_entry->end -
2422 							 new_stack_entry->start);
2423 			if (is_procstack)
2424 				vm->vm_ssize += btoc(new_stack_entry->end -
2425 						     new_stack_entry->start);
2426 		}
2427 	}
2428 
2429 	vm_map_unlock(map);
2430 	return (rv);
2431 
2432 }
2433 
2434 /*
2435  * Unshare the specified VM space for exec.  If other processes are
2436  * mapped to it, then create a new one.  The new vmspace is null.
2437  */
2438 
2439 void
2440 vmspace_exec(struct proc *p) {
2441 	struct vmspace *oldvmspace = p->p_vmspace;
2442 	struct vmspace *newvmspace;
2443 	vm_map_t map = &p->p_vmspace->vm_map;
2444 
2445 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2446 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2447 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2448 	/*
2449 	 * This code is written like this for prototype purposes.  The
2450 	 * goal is to avoid running down the vmspace here, but let the
2451 	 * other process's that are still using the vmspace to finally
2452 	 * run it down.  Even though there is little or no chance of blocking
2453 	 * here, it is a good idea to keep this form for future mods.
2454 	 */
2455 	vmspace_free(oldvmspace);
2456 	p->p_vmspace = newvmspace;
2457 	pmap_pinit2(vmspace_pmap(newvmspace));
2458 	if (p == curproc)
2459 		pmap_activate(p);
2460 }
2461 
2462 /*
2463  * Unshare the specified VM space for forcing COW.  This
2464  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2465  */
2466 
2467 void
2468 vmspace_unshare(struct proc *p) {
2469 	struct vmspace *oldvmspace = p->p_vmspace;
2470 	struct vmspace *newvmspace;
2471 
2472 	if (oldvmspace->vm_refcnt == 1)
2473 		return;
2474 	newvmspace = vmspace_fork(oldvmspace);
2475 	vmspace_free(oldvmspace);
2476 	p->p_vmspace = newvmspace;
2477 	pmap_pinit2(vmspace_pmap(newvmspace));
2478 	if (p == curproc)
2479 		pmap_activate(p);
2480 }
2481 
2482 
2483 /*
2484  *	vm_map_lookup:
2485  *
2486  *	Finds the VM object, offset, and
2487  *	protection for a given virtual address in the
2488  *	specified map, assuming a page fault of the
2489  *	type specified.
2490  *
2491  *	Leaves the map in question locked for read; return
2492  *	values are guaranteed until a vm_map_lookup_done
2493  *	call is performed.  Note that the map argument
2494  *	is in/out; the returned map must be used in
2495  *	the call to vm_map_lookup_done.
2496  *
2497  *	A handle (out_entry) is returned for use in
2498  *	vm_map_lookup_done, to make that fast.
2499  *
2500  *	If a lookup is requested with "write protection"
2501  *	specified, the map may be changed to perform virtual
2502  *	copying operations, although the data referenced will
2503  *	remain the same.
2504  */
2505 int
2506 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2507 	      vm_offset_t vaddr,
2508 	      vm_prot_t fault_typea,
2509 	      vm_map_entry_t *out_entry,	/* OUT */
2510 	      vm_object_t *object,		/* OUT */
2511 	      vm_pindex_t *pindex,		/* OUT */
2512 	      vm_prot_t *out_prot,		/* OUT */
2513 	      boolean_t *wired)			/* OUT */
2514 {
2515 	vm_map_entry_t entry;
2516 	vm_map_t map = *var_map;
2517 	vm_prot_t prot;
2518 	vm_prot_t fault_type = fault_typea;
2519 
2520 RetryLookup:;
2521 
2522 	/*
2523 	 * Lookup the faulting address.
2524 	 */
2525 
2526 	vm_map_lock_read(map);
2527 
2528 #define	RETURN(why) \
2529 		{ \
2530 		vm_map_unlock_read(map); \
2531 		return(why); \
2532 		}
2533 
2534 	/*
2535 	 * If the map has an interesting hint, try it before calling full
2536 	 * blown lookup routine.
2537 	 */
2538 
2539 	entry = map->hint;
2540 
2541 	*out_entry = entry;
2542 
2543 	if ((entry == &map->header) ||
2544 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2545 		vm_map_entry_t tmp_entry;
2546 
2547 		/*
2548 		 * Entry was either not a valid hint, or the vaddr was not
2549 		 * contained in the entry, so do a full lookup.
2550 		 */
2551 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2552 			RETURN(KERN_INVALID_ADDRESS);
2553 
2554 		entry = tmp_entry;
2555 		*out_entry = entry;
2556 	}
2557 
2558 	/*
2559 	 * Handle submaps.
2560 	 */
2561 
2562 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2563 		vm_map_t old_map = map;
2564 
2565 		*var_map = map = entry->object.sub_map;
2566 		vm_map_unlock_read(old_map);
2567 		goto RetryLookup;
2568 	}
2569 
2570 	/*
2571 	 * Check whether this task is allowed to have this page.
2572 	 * Note the special case for MAP_ENTRY_COW
2573 	 * pages with an override.  This is to implement a forced
2574 	 * COW for debuggers.
2575 	 */
2576 
2577 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2578 		prot = entry->max_protection;
2579 	else
2580 		prot = entry->protection;
2581 
2582 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2583 	if ((fault_type & prot) != fault_type) {
2584 			RETURN(KERN_PROTECTION_FAILURE);
2585 	}
2586 
2587 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2588 	    (entry->eflags & MAP_ENTRY_COW) &&
2589 	    (fault_type & VM_PROT_WRITE) &&
2590 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2591 		RETURN(KERN_PROTECTION_FAILURE);
2592 	}
2593 
2594 	/*
2595 	 * If this page is not pageable, we have to get it for all possible
2596 	 * accesses.
2597 	 */
2598 
2599 	*wired = (entry->wired_count != 0);
2600 	if (*wired)
2601 		prot = fault_type = entry->protection;
2602 
2603 	/*
2604 	 * If the entry was copy-on-write, we either ...
2605 	 */
2606 
2607 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2608 		/*
2609 		 * If we want to write the page, we may as well handle that
2610 		 * now since we've got the map locked.
2611 		 *
2612 		 * If we don't need to write the page, we just demote the
2613 		 * permissions allowed.
2614 		 */
2615 
2616 		if (fault_type & VM_PROT_WRITE) {
2617 			/*
2618 			 * Make a new object, and place it in the object
2619 			 * chain.  Note that no new references have appeared
2620 			 * -- one just moved from the map to the new
2621 			 * object.
2622 			 */
2623 
2624 			if (vm_map_lock_upgrade(map))
2625 				goto RetryLookup;
2626 
2627 			vm_object_shadow(
2628 			    &entry->object.vm_object,
2629 			    &entry->offset,
2630 			    atop(entry->end - entry->start));
2631 
2632 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2633 			vm_map_lock_downgrade(map);
2634 		} else {
2635 			/*
2636 			 * We're attempting to read a copy-on-write page --
2637 			 * don't allow writes.
2638 			 */
2639 
2640 			prot &= ~VM_PROT_WRITE;
2641 		}
2642 	}
2643 
2644 	/*
2645 	 * Create an object if necessary.
2646 	 */
2647 	if (entry->object.vm_object == NULL &&
2648 	    !map->system_map) {
2649 		if (vm_map_lock_upgrade(map))
2650 			goto RetryLookup;
2651 
2652 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2653 		    atop(entry->end - entry->start));
2654 		entry->offset = 0;
2655 		vm_map_lock_downgrade(map);
2656 	}
2657 
2658 	/*
2659 	 * Return the object/offset from this entry.  If the entry was
2660 	 * copy-on-write or empty, it has been fixed up.
2661 	 */
2662 
2663 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2664 	*object = entry->object.vm_object;
2665 
2666 	/*
2667 	 * Return whether this is the only map sharing this data.
2668 	 */
2669 
2670 	*out_prot = prot;
2671 	return (KERN_SUCCESS);
2672 
2673 #undef	RETURN
2674 }
2675 
2676 /*
2677  *	vm_map_lookup_done:
2678  *
2679  *	Releases locks acquired by a vm_map_lookup
2680  *	(according to the handle returned by that lookup).
2681  */
2682 
2683 void
2684 vm_map_lookup_done(map, entry)
2685 	vm_map_t map;
2686 	vm_map_entry_t entry;
2687 {
2688 	/*
2689 	 * Unlock the main-level map
2690 	 */
2691 
2692 	vm_map_unlock_read(map);
2693 }
2694 
2695 /*
2696  * Implement uiomove with VM operations.  This handles (and collateral changes)
2697  * support every combination of source object modification, and COW type
2698  * operations.
2699  */
2700 int
2701 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
2702 	vm_map_t mapa;
2703 	vm_object_t srcobject;
2704 	off_t cp;
2705 	int cnta;
2706 	vm_offset_t uaddra;
2707 	int *npages;
2708 {
2709 	vm_map_t map;
2710 	vm_object_t first_object, oldobject, object;
2711 	vm_map_entry_t entry;
2712 	vm_prot_t prot;
2713 	boolean_t wired;
2714 	int tcnt, rv;
2715 	vm_offset_t uaddr, start, end, tend;
2716 	vm_pindex_t first_pindex, osize, oindex;
2717 	off_t ooffset;
2718 	int cnt;
2719 
2720 	if (npages)
2721 		*npages = 0;
2722 
2723 	cnt = cnta;
2724 	uaddr = uaddra;
2725 
2726 	while (cnt > 0) {
2727 		map = mapa;
2728 
2729 		if ((vm_map_lookup(&map, uaddr,
2730 			VM_PROT_READ, &entry, &first_object,
2731 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2732 			return EFAULT;
2733 		}
2734 
2735 		vm_map_clip_start(map, entry, uaddr);
2736 
2737 		tcnt = cnt;
2738 		tend = uaddr + tcnt;
2739 		if (tend > entry->end) {
2740 			tcnt = entry->end - uaddr;
2741 			tend = entry->end;
2742 		}
2743 
2744 		vm_map_clip_end(map, entry, tend);
2745 
2746 		start = entry->start;
2747 		end = entry->end;
2748 
2749 		osize = atop(tcnt);
2750 
2751 		oindex = OFF_TO_IDX(cp);
2752 		if (npages) {
2753 			vm_pindex_t idx;
2754 			for (idx = 0; idx < osize; idx++) {
2755 				vm_page_t m;
2756 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2757 					vm_map_lookup_done(map, entry);
2758 					return 0;
2759 				}
2760 				/*
2761 				 * disallow busy or invalid pages, but allow
2762 				 * m->busy pages if they are entirely valid.
2763 				 */
2764 				if ((m->flags & PG_BUSY) ||
2765 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2766 					vm_map_lookup_done(map, entry);
2767 					return 0;
2768 				}
2769 			}
2770 		}
2771 
2772 /*
2773  * If we are changing an existing map entry, just redirect
2774  * the object, and change mappings.
2775  */
2776 		if ((first_object->type == OBJT_VNODE) &&
2777 			((oldobject = entry->object.vm_object) == first_object)) {
2778 
2779 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2780 				/*
2781    				* Remove old window into the file
2782    				*/
2783 				pmap_remove (map->pmap, uaddr, tend);
2784 
2785 				/*
2786    				* Force copy on write for mmaped regions
2787    				*/
2788 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2789 
2790 				/*
2791    				* Point the object appropriately
2792    				*/
2793 				if (oldobject != srcobject) {
2794 
2795 				/*
2796    				* Set the object optimization hint flag
2797    				*/
2798 					vm_object_set_flag(srcobject, OBJ_OPT);
2799 					vm_object_reference(srcobject);
2800 					entry->object.vm_object = srcobject;
2801 
2802 					if (oldobject) {
2803 						vm_object_deallocate(oldobject);
2804 					}
2805 				}
2806 
2807 				entry->offset = cp;
2808 				map->timestamp++;
2809 			} else {
2810 				pmap_remove (map->pmap, uaddr, tend);
2811 			}
2812 
2813 		} else if ((first_object->ref_count == 1) &&
2814 			(first_object->size == osize) &&
2815 			((first_object->type == OBJT_DEFAULT) ||
2816 				(first_object->type == OBJT_SWAP)) ) {
2817 
2818 			oldobject = first_object->backing_object;
2819 
2820 			if ((first_object->backing_object_offset != cp) ||
2821 				(oldobject != srcobject)) {
2822 				/*
2823    				* Remove old window into the file
2824    				*/
2825 				pmap_remove (map->pmap, uaddr, tend);
2826 
2827 				/*
2828 				 * Remove unneeded old pages
2829 				 */
2830 				vm_object_page_remove(first_object, 0, 0, 0);
2831 
2832 				/*
2833 				 * Invalidate swap space
2834 				 */
2835 				if (first_object->type == OBJT_SWAP) {
2836 					swap_pager_freespace(first_object,
2837 						0,
2838 						first_object->size);
2839 				}
2840 
2841 				/*
2842    				* Force copy on write for mmaped regions
2843    				*/
2844 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2845 
2846 				/*
2847    				* Point the object appropriately
2848    				*/
2849 				if (oldobject != srcobject) {
2850 
2851 				/*
2852    				* Set the object optimization hint flag
2853    				*/
2854 					vm_object_set_flag(srcobject, OBJ_OPT);
2855 					vm_object_reference(srcobject);
2856 
2857 					if (oldobject) {
2858 						TAILQ_REMOVE(&oldobject->shadow_head,
2859 							first_object, shadow_list);
2860 						oldobject->shadow_count--;
2861 						/* XXX bump generation? */
2862 						vm_object_deallocate(oldobject);
2863 					}
2864 
2865 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2866 						first_object, shadow_list);
2867 					srcobject->shadow_count++;
2868 					/* XXX bump generation? */
2869 
2870 					first_object->backing_object = srcobject;
2871 				}
2872 				first_object->backing_object_offset = cp;
2873 				map->timestamp++;
2874 			} else {
2875 				pmap_remove (map->pmap, uaddr, tend);
2876 			}
2877 /*
2878  * Otherwise, we have to do a logical mmap.
2879  */
2880 		} else {
2881 
2882 			vm_object_set_flag(srcobject, OBJ_OPT);
2883 			vm_object_reference(srcobject);
2884 
2885 			pmap_remove (map->pmap, uaddr, tend);
2886 
2887 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2888 			vm_map_lock_upgrade(map);
2889 
2890 			if (entry == &map->header) {
2891 				map->first_free = &map->header;
2892 			} else if (map->first_free->start >= start) {
2893 				map->first_free = entry->prev;
2894 			}
2895 
2896 			SAVE_HINT(map, entry->prev);
2897 			vm_map_entry_delete(map, entry);
2898 
2899 			object = srcobject;
2900 			ooffset = cp;
2901 
2902 			rv = vm_map_insert(map, object, ooffset, start, tend,
2903 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
2904 
2905 			if (rv != KERN_SUCCESS)
2906 				panic("vm_uiomove: could not insert new entry: %d", rv);
2907 		}
2908 
2909 /*
2910  * Map the window directly, if it is already in memory
2911  */
2912 		pmap_object_init_pt(map->pmap, uaddr,
2913 			srcobject, oindex, tcnt, 0);
2914 
2915 		map->timestamp++;
2916 		vm_map_unlock(map);
2917 
2918 		cnt -= tcnt;
2919 		uaddr += tcnt;
2920 		cp += tcnt;
2921 		if (npages)
2922 			*npages += osize;
2923 	}
2924 	return 0;
2925 }
2926 
2927 /*
2928  * Performs the copy_on_write operations necessary to allow the virtual copies
2929  * into user space to work.  This has to be called for write(2) system calls
2930  * from other processes, file unlinking, and file size shrinkage.
2931  */
2932 void
2933 vm_freeze_copyopts(object, froma, toa)
2934 	vm_object_t object;
2935 	vm_pindex_t froma, toa;
2936 {
2937 	int rv;
2938 	vm_object_t robject;
2939 	vm_pindex_t idx;
2940 
2941 	if ((object == NULL) ||
2942 		((object->flags & OBJ_OPT) == 0))
2943 		return;
2944 
2945 	if (object->shadow_count > object->ref_count)
2946 		panic("vm_freeze_copyopts: sc > rc");
2947 
2948 	while((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
2949 		vm_pindex_t bo_pindex;
2950 		vm_page_t m_in, m_out;
2951 
2952 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
2953 
2954 		vm_object_reference(robject);
2955 
2956 		vm_object_pip_wait(robject, "objfrz");
2957 
2958 		if (robject->ref_count == 1) {
2959 			vm_object_deallocate(robject);
2960 			continue;
2961 		}
2962 
2963 		vm_object_pip_add(robject, 1);
2964 
2965 		for (idx = 0; idx < robject->size; idx++) {
2966 
2967 			m_out = vm_page_grab(robject, idx,
2968 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
2969 
2970 			if (m_out->valid == 0) {
2971 				m_in = vm_page_grab(object, bo_pindex + idx,
2972 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
2973 				if (m_in->valid == 0) {
2974 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
2975 					if (rv != VM_PAGER_OK) {
2976 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
2977 						continue;
2978 					}
2979 					vm_page_deactivate(m_in);
2980 				}
2981 
2982 				vm_page_protect(m_in, VM_PROT_NONE);
2983 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
2984 				m_out->valid = m_in->valid;
2985 				vm_page_dirty(m_out);
2986 				vm_page_activate(m_out);
2987 				vm_page_wakeup(m_in);
2988 			}
2989 			vm_page_wakeup(m_out);
2990 		}
2991 
2992 		object->shadow_count--;
2993 		object->ref_count--;
2994 		TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
2995 		robject->backing_object = NULL;
2996 		robject->backing_object_offset = 0;
2997 
2998 		vm_object_pip_wakeup(robject);
2999 		vm_object_deallocate(robject);
3000 	}
3001 
3002 	vm_object_clear_flag(object, OBJ_OPT);
3003 }
3004 
3005 #include "opt_ddb.h"
3006 #ifdef DDB
3007 #include <sys/kernel.h>
3008 
3009 #include <ddb/ddb.h>
3010 
3011 /*
3012  *	vm_map_print:	[ debug ]
3013  */
3014 DB_SHOW_COMMAND(map, vm_map_print)
3015 {
3016 	static int nlines;
3017 	/* XXX convert args. */
3018 	vm_map_t map = (vm_map_t)addr;
3019 	boolean_t full = have_addr;
3020 
3021 	vm_map_entry_t entry;
3022 
3023 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3024 	    (void *)map,
3025 	    (void *)map->pmap, map->nentries, map->timestamp);
3026 	nlines++;
3027 
3028 	if (!full && db_indent)
3029 		return;
3030 
3031 	db_indent += 2;
3032 	for (entry = map->header.next; entry != &map->header;
3033 	    entry = entry->next) {
3034 		db_iprintf("map entry %p: start=%p, end=%p\n",
3035 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3036 		nlines++;
3037 		{
3038 			static char *inheritance_name[4] =
3039 			{"share", "copy", "none", "donate_copy"};
3040 
3041 			db_iprintf(" prot=%x/%x/%s",
3042 			    entry->protection,
3043 			    entry->max_protection,
3044 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3045 			if (entry->wired_count != 0)
3046 				db_printf(", wired");
3047 		}
3048 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3049 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3050 			db_printf(", share=%p, offset=0x%lx\n",
3051 			    (void *)entry->object.sub_map,
3052 			    (long)entry->offset);
3053 			nlines++;
3054 			if ((entry->prev == &map->header) ||
3055 			    (entry->prev->object.sub_map !=
3056 				entry->object.sub_map)) {
3057 				db_indent += 2;
3058 				vm_map_print((db_expr_t)(intptr_t)
3059 					     entry->object.sub_map,
3060 					     full, 0, (char *)0);
3061 				db_indent -= 2;
3062 			}
3063 		} else {
3064 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3065 			db_printf(", object=%p, offset=0x%lx",
3066 			    (void *)entry->object.vm_object,
3067 			    (long)entry->offset);
3068 			if (entry->eflags & MAP_ENTRY_COW)
3069 				db_printf(", copy (%s)",
3070 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3071 			db_printf("\n");
3072 			nlines++;
3073 
3074 			if ((entry->prev == &map->header) ||
3075 			    (entry->prev->object.vm_object !=
3076 				entry->object.vm_object)) {
3077 				db_indent += 2;
3078 				vm_object_print((db_expr_t)(intptr_t)
3079 						entry->object.vm_object,
3080 						full, 0, (char *)0);
3081 				nlines += 4;
3082 				db_indent -= 2;
3083 			}
3084 		}
3085 	}
3086 	db_indent -= 2;
3087 	if (db_indent == 0)
3088 		nlines = 0;
3089 }
3090 
3091 
3092 DB_SHOW_COMMAND(procvm, procvm)
3093 {
3094 	struct proc *p;
3095 
3096 	if (have_addr) {
3097 		p = (struct proc *) addr;
3098 	} else {
3099 		p = curproc;
3100 	}
3101 
3102 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3103 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3104 	    (void *)vmspace_pmap(p->p_vmspace));
3105 
3106 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3107 }
3108 
3109 #endif /* DDB */
3110