xref: /freebsd/sys/vm/vm_map.c (revision 4a558355e5f3b4521cef56a6b705fa84be41dfa0)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/proc.h>
74 #include <sys/vmmeter.h>
75 #include <sys/mman.h>
76 #include <sys/vnode.h>
77 #include <sys/resourcevar.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <sys/lock.h>
82 #include <vm/pmap.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_pager.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_extern.h>
89 #include <vm/vm_zone.h>
90 #include <vm/swap_pager.h>
91 
92 /*
93  *	Virtual memory maps provide for the mapping, protection,
94  *	and sharing of virtual memory objects.  In addition,
95  *	this module provides for an efficient virtual copy of
96  *	memory from one map to another.
97  *
98  *	Synchronization is required prior to most operations.
99  *
100  *	Maps consist of an ordered doubly-linked list of simple
101  *	entries; a single hint is used to speed up lookups.
102  *
103  *	Since portions of maps are specified by start/end addresses,
104  *	which may not align with existing map entries, all
105  *	routines merely "clip" entries to these start/end values.
106  *	[That is, an entry is split into two, bordering at a
107  *	start or end value.]  Note that these clippings may not
108  *	always be necessary (as the two resulting entries are then
109  *	not changed); however, the clipping is done for convenience.
110  *
111  *	As mentioned above, virtual copy operations are performed
112  *	by copying VM object references from one map to
113  *	another, and then marking both regions as copy-on-write.
114  */
115 
116 /*
117  *	vm_map_startup:
118  *
119  *	Initialize the vm_map module.  Must be called before
120  *	any other vm_map routines.
121  *
122  *	Map and entry structures are allocated from the general
123  *	purpose memory pool with some exceptions:
124  *
125  *	- The kernel map and kmem submap are allocated statically.
126  *	- Kernel map entries are allocated out of a static pool.
127  *
128  *	These restrictions are necessary since malloc() uses the
129  *	maps and requires map entries.
130  */
131 
132 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
133 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
134 static struct vm_object kmapentobj, mapentobj, mapobj;
135 
136 static struct vm_map_entry map_entry_init[MAX_MAPENT];
137 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
138 static struct vm_map map_init[MAX_KMAP];
139 
140 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
141 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
142 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
143 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
144 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
145 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
146 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
147 		vm_map_entry_t));
148 static void vm_map_split __P((vm_map_entry_t));
149 
150 void
151 vm_map_startup()
152 {
153 	mapzone = &mapzone_store;
154 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
155 		map_init, MAX_KMAP);
156 	kmapentzone = &kmapentzone_store;
157 	zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
158 		kmap_entry_init, MAX_KMAPENT);
159 	mapentzone = &mapentzone_store;
160 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
161 		map_entry_init, MAX_MAPENT);
162 }
163 
164 /*
165  * Allocate a vmspace structure, including a vm_map and pmap,
166  * and initialize those structures.  The refcnt is set to 1.
167  * The remaining fields must be initialized by the caller.
168  */
169 struct vmspace *
170 vmspace_alloc(min, max)
171 	vm_offset_t min, max;
172 {
173 	struct vmspace *vm;
174 
175 	vm = zalloc(vmspace_zone);
176 	vm_map_init(&vm->vm_map, min, max);
177 	pmap_pinit(vmspace_pmap(vm));
178 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
179 	vm->vm_refcnt = 1;
180 	vm->vm_shm = NULL;
181 	return (vm);
182 }
183 
184 void
185 vm_init2(void) {
186 	zinitna(kmapentzone, &kmapentobj,
187 		NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
188 	zinitna(mapentzone, &mapentobj,
189 		NULL, 0, 0, 0, 1);
190 	zinitna(mapzone, &mapobj,
191 		NULL, 0, 0, 0, 1);
192 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
193 	pmap_init2();
194 	vm_object_init2();
195 }
196 
197 void
198 vmspace_free(vm)
199 	struct vmspace *vm;
200 {
201 
202 	if (vm->vm_refcnt == 0)
203 		panic("vmspace_free: attempt to free already freed vmspace");
204 
205 	if (--vm->vm_refcnt == 0) {
206 
207 		/*
208 		 * Lock the map, to wait out all other references to it.
209 		 * Delete all of the mappings and pages they hold, then call
210 		 * the pmap module to reclaim anything left.
211 		 */
212 		vm_map_lock(&vm->vm_map);
213 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
214 		    vm->vm_map.max_offset);
215 		vm_map_unlock(&vm->vm_map);
216 
217 		pmap_release(vmspace_pmap(vm));
218 		vm_map_destroy(&vm->vm_map);
219 		zfree(vmspace_zone, vm);
220 	}
221 }
222 
223 /*
224  *	vm_map_create:
225  *
226  *	Creates and returns a new empty VM map with
227  *	the given physical map structure, and having
228  *	the given lower and upper address bounds.
229  */
230 vm_map_t
231 vm_map_create(pmap, min, max)
232 	pmap_t pmap;
233 	vm_offset_t min, max;
234 {
235 	vm_map_t result;
236 
237 	result = zalloc(mapzone);
238 	vm_map_init(result, min, max);
239 	result->pmap = pmap;
240 	return (result);
241 }
242 
243 /*
244  * Initialize an existing vm_map structure
245  * such as that in the vmspace structure.
246  * The pmap is set elsewhere.
247  */
248 void
249 vm_map_init(map, min, max)
250 	struct vm_map *map;
251 	vm_offset_t min, max;
252 {
253 	map->header.next = map->header.prev = &map->header;
254 	map->nentries = 0;
255 	map->size = 0;
256 	map->system_map = 0;
257 	map->infork = 0;
258 	map->min_offset = min;
259 	map->max_offset = max;
260 	map->first_free = &map->header;
261 	map->hint = &map->header;
262 	map->timestamp = 0;
263 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
264 }
265 
266 void
267 vm_map_destroy(map)
268 	struct vm_map *map;
269 {
270 	lockdestroy(&map->lock);
271 }
272 
273 /*
274  *	vm_map_entry_dispose:	[ internal use only ]
275  *
276  *	Inverse of vm_map_entry_create.
277  */
278 static void
279 vm_map_entry_dispose(map, entry)
280 	vm_map_t map;
281 	vm_map_entry_t entry;
282 {
283 	zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
284 }
285 
286 /*
287  *	vm_map_entry_create:	[ internal use only ]
288  *
289  *	Allocates a VM map entry for insertion.
290  *	No entry fields are filled in.  This routine is
291  */
292 static vm_map_entry_t
293 vm_map_entry_create(map)
294 	vm_map_t map;
295 {
296 	vm_map_entry_t new_entry;
297 
298 	new_entry = zalloc((map->system_map || !mapentzone) ?
299 		kmapentzone : mapentzone);
300 	if (new_entry == NULL)
301 	    panic("vm_map_entry_create: kernel resources exhausted");
302 	return(new_entry);
303 }
304 
305 /*
306  *	vm_map_entry_{un,}link:
307  *
308  *	Insert/remove entries from maps.
309  */
310 static __inline void
311 vm_map_entry_link(vm_map_t map,
312 		  vm_map_entry_t after_where,
313 		  vm_map_entry_t entry)
314 {
315 	map->nentries++;
316 	entry->prev = after_where;
317 	entry->next = after_where->next;
318 	entry->next->prev = entry;
319 	after_where->next = entry;
320 }
321 
322 static __inline void
323 vm_map_entry_unlink(vm_map_t map,
324 		    vm_map_entry_t entry)
325 {
326 	vm_map_entry_t prev = entry->prev;
327 	vm_map_entry_t next = entry->next;
328 
329 	next->prev = prev;
330 	prev->next = next;
331 	map->nentries--;
332 }
333 
334 /*
335  *	SAVE_HINT:
336  *
337  *	Saves the specified entry as the hint for
338  *	future lookups.
339  */
340 #define	SAVE_HINT(map,value) \
341 		(map)->hint = (value);
342 
343 /*
344  *	vm_map_lookup_entry:	[ internal use only ]
345  *
346  *	Finds the map entry containing (or
347  *	immediately preceding) the specified address
348  *	in the given map; the entry is returned
349  *	in the "entry" parameter.  The boolean
350  *	result indicates whether the address is
351  *	actually contained in the map.
352  */
353 boolean_t
354 vm_map_lookup_entry(map, address, entry)
355 	vm_map_t map;
356 	vm_offset_t address;
357 	vm_map_entry_t *entry;	/* OUT */
358 {
359 	vm_map_entry_t cur;
360 	vm_map_entry_t last;
361 
362 	/*
363 	 * Start looking either from the head of the list, or from the hint.
364 	 */
365 
366 	cur = map->hint;
367 
368 	if (cur == &map->header)
369 		cur = cur->next;
370 
371 	if (address >= cur->start) {
372 		/*
373 		 * Go from hint to end of list.
374 		 *
375 		 * But first, make a quick check to see if we are already looking
376 		 * at the entry we want (which is usually the case). Note also
377 		 * that we don't need to save the hint here... it is the same
378 		 * hint (unless we are at the header, in which case the hint
379 		 * didn't buy us anything anyway).
380 		 */
381 		last = &map->header;
382 		if ((cur != last) && (cur->end > address)) {
383 			*entry = cur;
384 			return (TRUE);
385 		}
386 	} else {
387 		/*
388 		 * Go from start to hint, *inclusively*
389 		 */
390 		last = cur->next;
391 		cur = map->header.next;
392 	}
393 
394 	/*
395 	 * Search linearly
396 	 */
397 
398 	while (cur != last) {
399 		if (cur->end > address) {
400 			if (address >= cur->start) {
401 				/*
402 				 * Save this lookup for future hints, and
403 				 * return
404 				 */
405 
406 				*entry = cur;
407 				SAVE_HINT(map, cur);
408 				return (TRUE);
409 			}
410 			break;
411 		}
412 		cur = cur->next;
413 	}
414 	*entry = cur->prev;
415 	SAVE_HINT(map, *entry);
416 	return (FALSE);
417 }
418 
419 /*
420  *	vm_map_insert:
421  *
422  *	Inserts the given whole VM object into the target
423  *	map at the specified address range.  The object's
424  *	size should match that of the address range.
425  *
426  *	Requires that the map be locked, and leaves it so.
427  *
428  *	If object is non-NULL, ref count must be bumped by caller
429  *	prior to making call to account for the new entry.
430  */
431 int
432 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
433 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
434 	      int cow)
435 {
436 	vm_map_entry_t new_entry;
437 	vm_map_entry_t prev_entry;
438 	vm_map_entry_t temp_entry;
439 	vm_eflags_t protoeflags;
440 
441 	/*
442 	 * Check that the start and end points are not bogus.
443 	 */
444 
445 	if ((start < map->min_offset) || (end > map->max_offset) ||
446 	    (start >= end))
447 		return (KERN_INVALID_ADDRESS);
448 
449 	/*
450 	 * Find the entry prior to the proposed starting address; if it's part
451 	 * of an existing entry, this range is bogus.
452 	 */
453 
454 	if (vm_map_lookup_entry(map, start, &temp_entry))
455 		return (KERN_NO_SPACE);
456 
457 	prev_entry = temp_entry;
458 
459 	/*
460 	 * Assert that the next entry doesn't overlap the end point.
461 	 */
462 
463 	if ((prev_entry->next != &map->header) &&
464 	    (prev_entry->next->start < end))
465 		return (KERN_NO_SPACE);
466 
467 	protoeflags = 0;
468 
469 	if (cow & MAP_COPY_ON_WRITE)
470 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
471 
472 	if (cow & MAP_NOFAULT) {
473 		protoeflags |= MAP_ENTRY_NOFAULT;
474 
475 		KASSERT(object == NULL,
476 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
477 	}
478 	if (cow & MAP_DISABLE_SYNCER)
479 		protoeflags |= MAP_ENTRY_NOSYNC;
480 	if (cow & MAP_DISABLE_COREDUMP)
481 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
482 
483 	if (object) {
484 		/*
485 		 * When object is non-NULL, it could be shared with another
486 		 * process.  We have to set or clear OBJ_ONEMAPPING
487 		 * appropriately.
488 		 */
489 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
490 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
491 		}
492 	}
493 	else if ((prev_entry != &map->header) &&
494 		 (prev_entry->eflags == protoeflags) &&
495 		 (prev_entry->end == start) &&
496 		 (prev_entry->wired_count == 0) &&
497 		 ((prev_entry->object.vm_object == NULL) ||
498 		  vm_object_coalesce(prev_entry->object.vm_object,
499 				     OFF_TO_IDX(prev_entry->offset),
500 				     (vm_size_t)(prev_entry->end - prev_entry->start),
501 				     (vm_size_t)(end - prev_entry->end)))) {
502 		/*
503 		 * We were able to extend the object.  Determine if we
504 		 * can extend the previous map entry to include the
505 		 * new range as well.
506 		 */
507 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
508 		    (prev_entry->protection == prot) &&
509 		    (prev_entry->max_protection == max)) {
510 			map->size += (end - prev_entry->end);
511 			prev_entry->end = end;
512 			vm_map_simplify_entry(map, prev_entry);
513 			return (KERN_SUCCESS);
514 		}
515 
516 		/*
517 		 * If we can extend the object but cannot extend the
518 		 * map entry, we have to create a new map entry.  We
519 		 * must bump the ref count on the extended object to
520 		 * account for it.  object may be NULL.
521 		 */
522 		object = prev_entry->object.vm_object;
523 		offset = prev_entry->offset +
524 			(prev_entry->end - prev_entry->start);
525 		vm_object_reference(object);
526 	}
527 
528 	/*
529 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
530 	 * in things like the buffer map where we manage kva but do not manage
531 	 * backing objects.
532 	 */
533 
534 	/*
535 	 * Create a new entry
536 	 */
537 
538 	new_entry = vm_map_entry_create(map);
539 	new_entry->start = start;
540 	new_entry->end = end;
541 
542 	new_entry->eflags = protoeflags;
543 	new_entry->object.vm_object = object;
544 	new_entry->offset = offset;
545 	new_entry->avail_ssize = 0;
546 
547 	new_entry->inheritance = VM_INHERIT_DEFAULT;
548 	new_entry->protection = prot;
549 	new_entry->max_protection = max;
550 	new_entry->wired_count = 0;
551 
552 	/*
553 	 * Insert the new entry into the list
554 	 */
555 
556 	vm_map_entry_link(map, prev_entry, new_entry);
557 	map->size += new_entry->end - new_entry->start;
558 
559 	/*
560 	 * Update the free space hint
561 	 */
562 	if ((map->first_free == prev_entry) &&
563 	    (prev_entry->end >= new_entry->start)) {
564 		map->first_free = new_entry;
565 	}
566 
567 #if 0
568 	/*
569 	 * Temporarily removed to avoid MAP_STACK panic, due to
570 	 * MAP_STACK being a huge hack.  Will be added back in
571 	 * when MAP_STACK (and the user stack mapping) is fixed.
572 	 */
573 	/*
574 	 * It may be possible to simplify the entry
575 	 */
576 	vm_map_simplify_entry(map, new_entry);
577 #endif
578 
579 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
580 		pmap_object_init_pt(map->pmap, start,
581 				    object, OFF_TO_IDX(offset), end - start,
582 				    cow & MAP_PREFAULT_PARTIAL);
583 	}
584 
585 	return (KERN_SUCCESS);
586 }
587 
588 /*
589  * Find sufficient space for `length' bytes in the given map, starting at
590  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
591  */
592 int
593 vm_map_findspace(map, start, length, addr)
594 	vm_map_t map;
595 	vm_offset_t start;
596 	vm_size_t length;
597 	vm_offset_t *addr;
598 {
599 	vm_map_entry_t entry, next;
600 	vm_offset_t end;
601 
602 	if (start < map->min_offset)
603 		start = map->min_offset;
604 	if (start > map->max_offset)
605 		return (1);
606 
607 	/*
608 	 * Look for the first possible address; if there's already something
609 	 * at this address, we have to start after it.
610 	 */
611 	if (start == map->min_offset) {
612 		if ((entry = map->first_free) != &map->header)
613 			start = entry->end;
614 	} else {
615 		vm_map_entry_t tmp;
616 
617 		if (vm_map_lookup_entry(map, start, &tmp))
618 			start = tmp->end;
619 		entry = tmp;
620 	}
621 
622 	/*
623 	 * Look through the rest of the map, trying to fit a new region in the
624 	 * gap between existing regions, or after the very last region.
625 	 */
626 	for (;; start = (entry = next)->end) {
627 		/*
628 		 * Find the end of the proposed new region.  Be sure we didn't
629 		 * go beyond the end of the map, or wrap around the address;
630 		 * if so, we lose.  Otherwise, if this is the last entry, or
631 		 * if the proposed new region fits before the next entry, we
632 		 * win.
633 		 */
634 		end = start + length;
635 		if (end > map->max_offset || end < start)
636 			return (1);
637 		next = entry->next;
638 		if (next == &map->header || next->start >= end)
639 			break;
640 	}
641 	SAVE_HINT(map, entry);
642 	*addr = start;
643 	if (map == kernel_map) {
644 		vm_offset_t ksize;
645 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
646 			pmap_growkernel(ksize);
647 		}
648 	}
649 	return (0);
650 }
651 
652 /*
653  *	vm_map_find finds an unallocated region in the target address
654  *	map with the given length.  The search is defined to be
655  *	first-fit from the specified address; the region found is
656  *	returned in the same parameter.
657  *
658  *	If object is non-NULL, ref count must be bumped by caller
659  *	prior to making call to account for the new entry.
660  */
661 int
662 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
663 	    vm_offset_t *addr,	/* IN/OUT */
664 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
665 	    vm_prot_t max, int cow)
666 {
667 	vm_offset_t start;
668 	int result, s = 0;
669 
670 	start = *addr;
671 
672 	if (map == kmem_map || map == mb_map)
673 		s = splvm();
674 
675 	vm_map_lock(map);
676 	if (find_space) {
677 		if (vm_map_findspace(map, start, length, addr)) {
678 			vm_map_unlock(map);
679 			if (map == kmem_map || map == mb_map)
680 				splx(s);
681 			return (KERN_NO_SPACE);
682 		}
683 		start = *addr;
684 	}
685 	result = vm_map_insert(map, object, offset,
686 		start, start + length, prot, max, cow);
687 	vm_map_unlock(map);
688 
689 	if (map == kmem_map || map == mb_map)
690 		splx(s);
691 
692 	return (result);
693 }
694 
695 /*
696  *	vm_map_simplify_entry:
697  *
698  *	Simplify the given map entry by merging with either neighbor.  This
699  *	routine also has the ability to merge with both neighbors.
700  *
701  *	The map must be locked.
702  *
703  *	This routine guarentees that the passed entry remains valid (though
704  *	possibly extended).  When merging, this routine may delete one or
705  *	both neighbors.
706  */
707 void
708 vm_map_simplify_entry(map, entry)
709 	vm_map_t map;
710 	vm_map_entry_t entry;
711 {
712 	vm_map_entry_t next, prev;
713 	vm_size_t prevsize, esize;
714 
715 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
716 		return;
717 
718 	prev = entry->prev;
719 	if (prev != &map->header) {
720 		prevsize = prev->end - prev->start;
721 		if ( (prev->end == entry->start) &&
722 		     (prev->object.vm_object == entry->object.vm_object) &&
723 		     (!prev->object.vm_object ||
724 			(prev->offset + prevsize == entry->offset)) &&
725 		     (prev->eflags == entry->eflags) &&
726 		     (prev->protection == entry->protection) &&
727 		     (prev->max_protection == entry->max_protection) &&
728 		     (prev->inheritance == entry->inheritance) &&
729 		     (prev->wired_count == entry->wired_count)) {
730 			if (map->first_free == prev)
731 				map->first_free = entry;
732 			if (map->hint == prev)
733 				map->hint = entry;
734 			vm_map_entry_unlink(map, prev);
735 			entry->start = prev->start;
736 			entry->offset = prev->offset;
737 			if (prev->object.vm_object)
738 				vm_object_deallocate(prev->object.vm_object);
739 			vm_map_entry_dispose(map, prev);
740 		}
741 	}
742 
743 	next = entry->next;
744 	if (next != &map->header) {
745 		esize = entry->end - entry->start;
746 		if ((entry->end == next->start) &&
747 		    (next->object.vm_object == entry->object.vm_object) &&
748 		     (!entry->object.vm_object ||
749 			(entry->offset + esize == next->offset)) &&
750 		    (next->eflags == entry->eflags) &&
751 		    (next->protection == entry->protection) &&
752 		    (next->max_protection == entry->max_protection) &&
753 		    (next->inheritance == entry->inheritance) &&
754 		    (next->wired_count == entry->wired_count)) {
755 			if (map->first_free == next)
756 				map->first_free = entry;
757 			if (map->hint == next)
758 				map->hint = entry;
759 			vm_map_entry_unlink(map, next);
760 			entry->end = next->end;
761 			if (next->object.vm_object)
762 				vm_object_deallocate(next->object.vm_object);
763 			vm_map_entry_dispose(map, next);
764 	        }
765 	}
766 }
767 /*
768  *	vm_map_clip_start:	[ internal use only ]
769  *
770  *	Asserts that the given entry begins at or after
771  *	the specified address; if necessary,
772  *	it splits the entry into two.
773  */
774 #define vm_map_clip_start(map, entry, startaddr) \
775 { \
776 	if (startaddr > entry->start) \
777 		_vm_map_clip_start(map, entry, startaddr); \
778 }
779 
780 /*
781  *	This routine is called only when it is known that
782  *	the entry must be split.
783  */
784 static void
785 _vm_map_clip_start(map, entry, start)
786 	vm_map_t map;
787 	vm_map_entry_t entry;
788 	vm_offset_t start;
789 {
790 	vm_map_entry_t new_entry;
791 
792 	/*
793 	 * Split off the front portion -- note that we must insert the new
794 	 * entry BEFORE this one, so that this entry has the specified
795 	 * starting address.
796 	 */
797 
798 	vm_map_simplify_entry(map, entry);
799 
800 	/*
801 	 * If there is no object backing this entry, we might as well create
802 	 * one now.  If we defer it, an object can get created after the map
803 	 * is clipped, and individual objects will be created for the split-up
804 	 * map.  This is a bit of a hack, but is also about the best place to
805 	 * put this improvement.
806 	 */
807 
808 	if (entry->object.vm_object == NULL && !map->system_map) {
809 		vm_object_t object;
810 		object = vm_object_allocate(OBJT_DEFAULT,
811 				atop(entry->end - entry->start));
812 		entry->object.vm_object = object;
813 		entry->offset = 0;
814 	}
815 
816 	new_entry = vm_map_entry_create(map);
817 	*new_entry = *entry;
818 
819 	new_entry->end = start;
820 	entry->offset += (start - entry->start);
821 	entry->start = start;
822 
823 	vm_map_entry_link(map, entry->prev, new_entry);
824 
825 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
826 		vm_object_reference(new_entry->object.vm_object);
827 	}
828 }
829 
830 /*
831  *	vm_map_clip_end:	[ internal use only ]
832  *
833  *	Asserts that the given entry ends at or before
834  *	the specified address; if necessary,
835  *	it splits the entry into two.
836  */
837 
838 #define vm_map_clip_end(map, entry, endaddr) \
839 { \
840 	if (endaddr < entry->end) \
841 		_vm_map_clip_end(map, entry, endaddr); \
842 }
843 
844 /*
845  *	This routine is called only when it is known that
846  *	the entry must be split.
847  */
848 static void
849 _vm_map_clip_end(map, entry, end)
850 	vm_map_t map;
851 	vm_map_entry_t entry;
852 	vm_offset_t end;
853 {
854 	vm_map_entry_t new_entry;
855 
856 	/*
857 	 * If there is no object backing this entry, we might as well create
858 	 * one now.  If we defer it, an object can get created after the map
859 	 * is clipped, and individual objects will be created for the split-up
860 	 * map.  This is a bit of a hack, but is also about the best place to
861 	 * put this improvement.
862 	 */
863 
864 	if (entry->object.vm_object == NULL && !map->system_map) {
865 		vm_object_t object;
866 		object = vm_object_allocate(OBJT_DEFAULT,
867 				atop(entry->end - entry->start));
868 		entry->object.vm_object = object;
869 		entry->offset = 0;
870 	}
871 
872 	/*
873 	 * Create a new entry and insert it AFTER the specified entry
874 	 */
875 
876 	new_entry = vm_map_entry_create(map);
877 	*new_entry = *entry;
878 
879 	new_entry->start = entry->end = end;
880 	new_entry->offset += (end - entry->start);
881 
882 	vm_map_entry_link(map, entry, new_entry);
883 
884 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
885 		vm_object_reference(new_entry->object.vm_object);
886 	}
887 }
888 
889 /*
890  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
891  *
892  *	Asserts that the starting and ending region
893  *	addresses fall within the valid range of the map.
894  */
895 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
896 		{					\
897 		if (start < vm_map_min(map))		\
898 			start = vm_map_min(map);	\
899 		if (end > vm_map_max(map))		\
900 			end = vm_map_max(map);		\
901 		if (start > end)			\
902 			start = end;			\
903 		}
904 
905 /*
906  *	vm_map_submap:		[ kernel use only ]
907  *
908  *	Mark the given range as handled by a subordinate map.
909  *
910  *	This range must have been created with vm_map_find,
911  *	and no other operations may have been performed on this
912  *	range prior to calling vm_map_submap.
913  *
914  *	Only a limited number of operations can be performed
915  *	within this rage after calling vm_map_submap:
916  *		vm_fault
917  *	[Don't try vm_map_copy!]
918  *
919  *	To remove a submapping, one must first remove the
920  *	range from the superior map, and then destroy the
921  *	submap (if desired).  [Better yet, don't try it.]
922  */
923 int
924 vm_map_submap(map, start, end, submap)
925 	vm_map_t map;
926 	vm_offset_t start;
927 	vm_offset_t end;
928 	vm_map_t submap;
929 {
930 	vm_map_entry_t entry;
931 	int result = KERN_INVALID_ARGUMENT;
932 
933 	vm_map_lock(map);
934 
935 	VM_MAP_RANGE_CHECK(map, start, end);
936 
937 	if (vm_map_lookup_entry(map, start, &entry)) {
938 		vm_map_clip_start(map, entry, start);
939 	} else
940 		entry = entry->next;
941 
942 	vm_map_clip_end(map, entry, end);
943 
944 	if ((entry->start == start) && (entry->end == end) &&
945 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
946 	    (entry->object.vm_object == NULL)) {
947 		entry->object.sub_map = submap;
948 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
949 		result = KERN_SUCCESS;
950 	}
951 	vm_map_unlock(map);
952 
953 	return (result);
954 }
955 
956 /*
957  *	vm_map_protect:
958  *
959  *	Sets the protection of the specified address
960  *	region in the target map.  If "set_max" is
961  *	specified, the maximum protection is to be set;
962  *	otherwise, only the current protection is affected.
963  */
964 int
965 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
966 	       vm_prot_t new_prot, boolean_t set_max)
967 {
968 	vm_map_entry_t current;
969 	vm_map_entry_t entry;
970 
971 	vm_map_lock(map);
972 
973 	VM_MAP_RANGE_CHECK(map, start, end);
974 
975 	if (vm_map_lookup_entry(map, start, &entry)) {
976 		vm_map_clip_start(map, entry, start);
977 	} else {
978 		entry = entry->next;
979 	}
980 
981 	/*
982 	 * Make a first pass to check for protection violations.
983 	 */
984 
985 	current = entry;
986 	while ((current != &map->header) && (current->start < end)) {
987 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
988 			vm_map_unlock(map);
989 			return (KERN_INVALID_ARGUMENT);
990 		}
991 		if ((new_prot & current->max_protection) != new_prot) {
992 			vm_map_unlock(map);
993 			return (KERN_PROTECTION_FAILURE);
994 		}
995 		current = current->next;
996 	}
997 
998 	/*
999 	 * Go back and fix up protections. [Note that clipping is not
1000 	 * necessary the second time.]
1001 	 */
1002 
1003 	current = entry;
1004 
1005 	while ((current != &map->header) && (current->start < end)) {
1006 		vm_prot_t old_prot;
1007 
1008 		vm_map_clip_end(map, current, end);
1009 
1010 		old_prot = current->protection;
1011 		if (set_max)
1012 			current->protection =
1013 			    (current->max_protection = new_prot) &
1014 			    old_prot;
1015 		else
1016 			current->protection = new_prot;
1017 
1018 		/*
1019 		 * Update physical map if necessary. Worry about copy-on-write
1020 		 * here -- CHECK THIS XXX
1021 		 */
1022 
1023 		if (current->protection != old_prot) {
1024 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1025 							VM_PROT_ALL)
1026 
1027 			pmap_protect(map->pmap, current->start,
1028 			    current->end,
1029 			    current->protection & MASK(current));
1030 #undef	MASK
1031 		}
1032 
1033 		vm_map_simplify_entry(map, current);
1034 
1035 		current = current->next;
1036 	}
1037 
1038 	vm_map_unlock(map);
1039 	return (KERN_SUCCESS);
1040 }
1041 
1042 /*
1043  *	vm_map_madvise:
1044  *
1045  * 	This routine traverses a processes map handling the madvise
1046  *	system call.  Advisories are classified as either those effecting
1047  *	the vm_map_entry structure, or those effecting the underlying
1048  *	objects.
1049  */
1050 
1051 int
1052 vm_map_madvise(map, start, end, behav)
1053 	vm_map_t map;
1054 	vm_offset_t start, end;
1055 	int behav;
1056 {
1057 	vm_map_entry_t current, entry;
1058 	int modify_map = 0;
1059 
1060 	/*
1061 	 * Some madvise calls directly modify the vm_map_entry, in which case
1062 	 * we need to use an exclusive lock on the map and we need to perform
1063 	 * various clipping operations.  Otherwise we only need a read-lock
1064 	 * on the map.
1065 	 */
1066 
1067 	switch(behav) {
1068 	case MADV_NORMAL:
1069 	case MADV_SEQUENTIAL:
1070 	case MADV_RANDOM:
1071 	case MADV_NOSYNC:
1072 	case MADV_AUTOSYNC:
1073 	case MADV_NOCORE:
1074 	case MADV_CORE:
1075 		modify_map = 1;
1076 		vm_map_lock(map);
1077 		break;
1078 	case MADV_WILLNEED:
1079 	case MADV_DONTNEED:
1080 	case MADV_FREE:
1081 		vm_map_lock_read(map);
1082 		break;
1083 	default:
1084 		return (KERN_INVALID_ARGUMENT);
1085 	}
1086 
1087 	/*
1088 	 * Locate starting entry and clip if necessary.
1089 	 */
1090 
1091 	VM_MAP_RANGE_CHECK(map, start, end);
1092 
1093 	if (vm_map_lookup_entry(map, start, &entry)) {
1094 		if (modify_map)
1095 			vm_map_clip_start(map, entry, start);
1096 	} else {
1097 		entry = entry->next;
1098 	}
1099 
1100 	if (modify_map) {
1101 		/*
1102 		 * madvise behaviors that are implemented in the vm_map_entry.
1103 		 *
1104 		 * We clip the vm_map_entry so that behavioral changes are
1105 		 * limited to the specified address range.
1106 		 */
1107 		for (current = entry;
1108 		     (current != &map->header) && (current->start < end);
1109 		     current = current->next
1110 		) {
1111 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1112 				continue;
1113 
1114 			vm_map_clip_end(map, current, end);
1115 
1116 			switch (behav) {
1117 			case MADV_NORMAL:
1118 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1119 				break;
1120 			case MADV_SEQUENTIAL:
1121 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1122 				break;
1123 			case MADV_RANDOM:
1124 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1125 				break;
1126 			case MADV_NOSYNC:
1127 				current->eflags |= MAP_ENTRY_NOSYNC;
1128 				break;
1129 			case MADV_AUTOSYNC:
1130 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1131 				break;
1132 			case MADV_NOCORE:
1133 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1134 				break;
1135 			case MADV_CORE:
1136 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1137 				break;
1138 			default:
1139 				break;
1140 			}
1141 			vm_map_simplify_entry(map, current);
1142 		}
1143 		vm_map_unlock(map);
1144 	} else {
1145 		vm_pindex_t pindex;
1146 		int count;
1147 
1148 		/*
1149 		 * madvise behaviors that are implemented in the underlying
1150 		 * vm_object.
1151 		 *
1152 		 * Since we don't clip the vm_map_entry, we have to clip
1153 		 * the vm_object pindex and count.
1154 		 */
1155 		for (current = entry;
1156 		     (current != &map->header) && (current->start < end);
1157 		     current = current->next
1158 		) {
1159 			vm_offset_t useStart;
1160 
1161 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1162 				continue;
1163 
1164 			pindex = OFF_TO_IDX(current->offset);
1165 			count = atop(current->end - current->start);
1166 			useStart = current->start;
1167 
1168 			if (current->start < start) {
1169 				pindex += atop(start - current->start);
1170 				count -= atop(start - current->start);
1171 				useStart = start;
1172 			}
1173 			if (current->end > end)
1174 				count -= atop(current->end - end);
1175 
1176 			if (count <= 0)
1177 				continue;
1178 
1179 			vm_object_madvise(current->object.vm_object,
1180 					  pindex, count, behav);
1181 			if (behav == MADV_WILLNEED) {
1182 				pmap_object_init_pt(
1183 				    map->pmap,
1184 				    useStart,
1185 				    current->object.vm_object,
1186 				    pindex,
1187 				    (count << PAGE_SHIFT),
1188 				    0
1189 				);
1190 			}
1191 		}
1192 		vm_map_unlock_read(map);
1193 	}
1194 	return(0);
1195 }
1196 
1197 
1198 /*
1199  *	vm_map_inherit:
1200  *
1201  *	Sets the inheritance of the specified address
1202  *	range in the target map.  Inheritance
1203  *	affects how the map will be shared with
1204  *	child maps at the time of vm_map_fork.
1205  */
1206 int
1207 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1208 	       vm_inherit_t new_inheritance)
1209 {
1210 	vm_map_entry_t entry;
1211 	vm_map_entry_t temp_entry;
1212 
1213 	switch (new_inheritance) {
1214 	case VM_INHERIT_NONE:
1215 	case VM_INHERIT_COPY:
1216 	case VM_INHERIT_SHARE:
1217 		break;
1218 	default:
1219 		return (KERN_INVALID_ARGUMENT);
1220 	}
1221 
1222 	vm_map_lock(map);
1223 
1224 	VM_MAP_RANGE_CHECK(map, start, end);
1225 
1226 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1227 		entry = temp_entry;
1228 		vm_map_clip_start(map, entry, start);
1229 	} else
1230 		entry = temp_entry->next;
1231 
1232 	while ((entry != &map->header) && (entry->start < end)) {
1233 		vm_map_clip_end(map, entry, end);
1234 
1235 		entry->inheritance = new_inheritance;
1236 
1237 		vm_map_simplify_entry(map, entry);
1238 
1239 		entry = entry->next;
1240 	}
1241 
1242 	vm_map_unlock(map);
1243 	return (KERN_SUCCESS);
1244 }
1245 
1246 /*
1247  * Implement the semantics of mlock
1248  */
1249 int
1250 vm_map_user_pageable(map, start, end, new_pageable)
1251 	vm_map_t map;
1252 	vm_offset_t start;
1253 	vm_offset_t end;
1254 	boolean_t new_pageable;
1255 {
1256 	vm_map_entry_t entry;
1257 	vm_map_entry_t start_entry;
1258 	vm_offset_t estart;
1259 	int rv;
1260 
1261 	vm_map_lock(map);
1262 	VM_MAP_RANGE_CHECK(map, start, end);
1263 
1264 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1265 		vm_map_unlock(map);
1266 		return (KERN_INVALID_ADDRESS);
1267 	}
1268 
1269 	if (new_pageable) {
1270 
1271 		entry = start_entry;
1272 		vm_map_clip_start(map, entry, start);
1273 
1274 		/*
1275 		 * Now decrement the wiring count for each region. If a region
1276 		 * becomes completely unwired, unwire its physical pages and
1277 		 * mappings.
1278 		 */
1279 		while ((entry != &map->header) && (entry->start < end)) {
1280 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1281 				vm_map_clip_end(map, entry, end);
1282 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1283 				entry->wired_count--;
1284 				if (entry->wired_count == 0)
1285 					vm_fault_unwire(map, entry->start, entry->end);
1286 			}
1287 			vm_map_simplify_entry(map,entry);
1288 			entry = entry->next;
1289 		}
1290 	} else {
1291 
1292 		entry = start_entry;
1293 
1294 		while ((entry != &map->header) && (entry->start < end)) {
1295 
1296 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1297 				entry = entry->next;
1298 				continue;
1299 			}
1300 
1301 			if (entry->wired_count != 0) {
1302 				entry->wired_count++;
1303 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1304 				entry = entry->next;
1305 				continue;
1306 			}
1307 
1308 			/* Here on entry being newly wired */
1309 
1310 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1311 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1312 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1313 
1314 					vm_object_shadow(&entry->object.vm_object,
1315 					    &entry->offset,
1316 					    atop(entry->end - entry->start));
1317 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1318 
1319 				} else if (entry->object.vm_object == NULL &&
1320 					   !map->system_map) {
1321 
1322 					entry->object.vm_object =
1323 					    vm_object_allocate(OBJT_DEFAULT,
1324 						atop(entry->end - entry->start));
1325 					entry->offset = (vm_offset_t) 0;
1326 
1327 				}
1328 			}
1329 
1330 			vm_map_clip_start(map, entry, start);
1331 			vm_map_clip_end(map, entry, end);
1332 
1333 			entry->wired_count++;
1334 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1335 			estart = entry->start;
1336 
1337 			/* First we need to allow map modifications */
1338 			vm_map_set_recursive(map);
1339 			vm_map_lock_downgrade(map);
1340 			map->timestamp++;
1341 
1342 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1343 			if (rv) {
1344 
1345 				entry->wired_count--;
1346 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1347 
1348 				vm_map_clear_recursive(map);
1349 				vm_map_unlock(map);
1350 
1351 				(void) vm_map_user_pageable(map, start, entry->start, TRUE);
1352 				return rv;
1353 			}
1354 
1355 			vm_map_clear_recursive(map);
1356 			if (vm_map_lock_upgrade(map)) {
1357 				vm_map_lock(map);
1358 				if (vm_map_lookup_entry(map, estart, &entry)
1359 				    == FALSE) {
1360 					vm_map_unlock(map);
1361 					(void) vm_map_user_pageable(map,
1362 								    start,
1363 								    estart,
1364 								    TRUE);
1365 					return (KERN_INVALID_ADDRESS);
1366 				}
1367 			}
1368 			vm_map_simplify_entry(map,entry);
1369 		}
1370 	}
1371 	map->timestamp++;
1372 	vm_map_unlock(map);
1373 	return KERN_SUCCESS;
1374 }
1375 
1376 /*
1377  *	vm_map_pageable:
1378  *
1379  *	Sets the pageability of the specified address
1380  *	range in the target map.  Regions specified
1381  *	as not pageable require locked-down physical
1382  *	memory and physical page maps.
1383  *
1384  *	The map must not be locked, but a reference
1385  *	must remain to the map throughout the call.
1386  */
1387 int
1388 vm_map_pageable(map, start, end, new_pageable)
1389 	vm_map_t map;
1390 	vm_offset_t start;
1391 	vm_offset_t end;
1392 	boolean_t new_pageable;
1393 {
1394 	vm_map_entry_t entry;
1395 	vm_map_entry_t start_entry;
1396 	vm_offset_t failed = 0;
1397 	int rv;
1398 
1399 	vm_map_lock(map);
1400 
1401 	VM_MAP_RANGE_CHECK(map, start, end);
1402 
1403 	/*
1404 	 * Only one pageability change may take place at one time, since
1405 	 * vm_fault assumes it will be called only once for each
1406 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1407 	 * changing the pageability for the entire region.  We do so before
1408 	 * making any changes.
1409 	 */
1410 
1411 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1412 		vm_map_unlock(map);
1413 		return (KERN_INVALID_ADDRESS);
1414 	}
1415 	entry = start_entry;
1416 
1417 	/*
1418 	 * Actions are rather different for wiring and unwiring, so we have
1419 	 * two separate cases.
1420 	 */
1421 
1422 	if (new_pageable) {
1423 
1424 		vm_map_clip_start(map, entry, start);
1425 
1426 		/*
1427 		 * Unwiring.  First ensure that the range to be unwired is
1428 		 * really wired down and that there are no holes.
1429 		 */
1430 		while ((entry != &map->header) && (entry->start < end)) {
1431 
1432 			if (entry->wired_count == 0 ||
1433 			    (entry->end < end &&
1434 				(entry->next == &map->header ||
1435 				    entry->next->start > entry->end))) {
1436 				vm_map_unlock(map);
1437 				return (KERN_INVALID_ARGUMENT);
1438 			}
1439 			entry = entry->next;
1440 		}
1441 
1442 		/*
1443 		 * Now decrement the wiring count for each region. If a region
1444 		 * becomes completely unwired, unwire its physical pages and
1445 		 * mappings.
1446 		 */
1447 		entry = start_entry;
1448 		while ((entry != &map->header) && (entry->start < end)) {
1449 			vm_map_clip_end(map, entry, end);
1450 
1451 			entry->wired_count--;
1452 			if (entry->wired_count == 0)
1453 				vm_fault_unwire(map, entry->start, entry->end);
1454 
1455 			vm_map_simplify_entry(map, entry);
1456 
1457 			entry = entry->next;
1458 		}
1459 	} else {
1460 		/*
1461 		 * Wiring.  We must do this in two passes:
1462 		 *
1463 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1464 		 * objects that need to be created. Then we clip each map
1465 		 * entry to the region to be wired and increment its wiring
1466 		 * count.  We create objects before clipping the map entries
1467 		 * to avoid object proliferation.
1468 		 *
1469 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1470 		 * fault in the pages for any newly wired area (wired_count is
1471 		 * 1).
1472 		 *
1473 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1474 		 * deadlock with another process that may have faulted on one
1475 		 * of the pages to be wired (it would mark the page busy,
1476 		 * blocking us, then in turn block on the map lock that we
1477 		 * hold).  Because of problems in the recursive lock package,
1478 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1479 		 * any actions that require the write lock must be done
1480 		 * beforehand.  Because we keep the read lock on the map, the
1481 		 * copy-on-write status of the entries we modify here cannot
1482 		 * change.
1483 		 */
1484 
1485 		/*
1486 		 * Pass 1.
1487 		 */
1488 		while ((entry != &map->header) && (entry->start < end)) {
1489 			if (entry->wired_count == 0) {
1490 
1491 				/*
1492 				 * Perform actions of vm_map_lookup that need
1493 				 * the write lock on the map: create a shadow
1494 				 * object for a copy-on-write region, or an
1495 				 * object for a zero-fill region.
1496 				 *
1497 				 * We don't have to do this for entries that
1498 				 * point to sub maps, because we won't
1499 				 * hold the lock on the sub map.
1500 				 */
1501 				if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1502 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1503 					if (copyflag &&
1504 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1505 
1506 						vm_object_shadow(&entry->object.vm_object,
1507 						    &entry->offset,
1508 						    atop(entry->end - entry->start));
1509 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1510 					} else if (entry->object.vm_object == NULL &&
1511 						   !map->system_map) {
1512 						entry->object.vm_object =
1513 						    vm_object_allocate(OBJT_DEFAULT,
1514 							atop(entry->end - entry->start));
1515 						entry->offset = (vm_offset_t) 0;
1516 					}
1517 				}
1518 			}
1519 			vm_map_clip_start(map, entry, start);
1520 			vm_map_clip_end(map, entry, end);
1521 			entry->wired_count++;
1522 
1523 			/*
1524 			 * Check for holes
1525 			 */
1526 			if (entry->end < end &&
1527 			    (entry->next == &map->header ||
1528 				entry->next->start > entry->end)) {
1529 				/*
1530 				 * Found one.  Object creation actions do not
1531 				 * need to be undone, but the wired counts
1532 				 * need to be restored.
1533 				 */
1534 				while (entry != &map->header && entry->end > start) {
1535 					entry->wired_count--;
1536 					entry = entry->prev;
1537 				}
1538 				vm_map_unlock(map);
1539 				return (KERN_INVALID_ARGUMENT);
1540 			}
1541 			entry = entry->next;
1542 		}
1543 
1544 		/*
1545 		 * Pass 2.
1546 		 */
1547 
1548 		/*
1549 		 * HACK HACK HACK HACK
1550 		 *
1551 		 * If we are wiring in the kernel map or a submap of it,
1552 		 * unlock the map to avoid deadlocks.  We trust that the
1553 		 * kernel is well-behaved, and therefore will not do
1554 		 * anything destructive to this region of the map while
1555 		 * we have it unlocked.  We cannot trust user processes
1556 		 * to do the same.
1557 		 *
1558 		 * HACK HACK HACK HACK
1559 		 */
1560 		if (vm_map_pmap(map) == kernel_pmap) {
1561 			vm_map_unlock(map);	/* trust me ... */
1562 		} else {
1563 			vm_map_lock_downgrade(map);
1564 		}
1565 
1566 		rv = 0;
1567 		entry = start_entry;
1568 		while (entry != &map->header && entry->start < end) {
1569 			/*
1570 			 * If vm_fault_wire fails for any page we need to undo
1571 			 * what has been done.  We decrement the wiring count
1572 			 * for those pages which have not yet been wired (now)
1573 			 * and unwire those that have (later).
1574 			 *
1575 			 * XXX this violates the locking protocol on the map,
1576 			 * needs to be fixed.
1577 			 */
1578 			if (rv)
1579 				entry->wired_count--;
1580 			else if (entry->wired_count == 1) {
1581 				rv = vm_fault_wire(map, entry->start, entry->end);
1582 				if (rv) {
1583 					failed = entry->start;
1584 					entry->wired_count--;
1585 				}
1586 			}
1587 			entry = entry->next;
1588 		}
1589 
1590 		if (vm_map_pmap(map) == kernel_pmap) {
1591 			vm_map_lock(map);
1592 		}
1593 		if (rv) {
1594 			vm_map_unlock(map);
1595 			(void) vm_map_pageable(map, start, failed, TRUE);
1596 			return (rv);
1597 		}
1598 		vm_map_simplify_entry(map, start_entry);
1599 	}
1600 
1601 	vm_map_unlock(map);
1602 
1603 	return (KERN_SUCCESS);
1604 }
1605 
1606 /*
1607  * vm_map_clean
1608  *
1609  * Push any dirty cached pages in the address range to their pager.
1610  * If syncio is TRUE, dirty pages are written synchronously.
1611  * If invalidate is TRUE, any cached pages are freed as well.
1612  *
1613  * Returns an error if any part of the specified range is not mapped.
1614  */
1615 int
1616 vm_map_clean(map, start, end, syncio, invalidate)
1617 	vm_map_t map;
1618 	vm_offset_t start;
1619 	vm_offset_t end;
1620 	boolean_t syncio;
1621 	boolean_t invalidate;
1622 {
1623 	vm_map_entry_t current;
1624 	vm_map_entry_t entry;
1625 	vm_size_t size;
1626 	vm_object_t object;
1627 	vm_ooffset_t offset;
1628 
1629 	vm_map_lock_read(map);
1630 	VM_MAP_RANGE_CHECK(map, start, end);
1631 	if (!vm_map_lookup_entry(map, start, &entry)) {
1632 		vm_map_unlock_read(map);
1633 		return (KERN_INVALID_ADDRESS);
1634 	}
1635 	/*
1636 	 * Make a first pass to check for holes.
1637 	 */
1638 	for (current = entry; current->start < end; current = current->next) {
1639 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1640 			vm_map_unlock_read(map);
1641 			return (KERN_INVALID_ARGUMENT);
1642 		}
1643 		if (end > current->end &&
1644 		    (current->next == &map->header ||
1645 			current->end != current->next->start)) {
1646 			vm_map_unlock_read(map);
1647 			return (KERN_INVALID_ADDRESS);
1648 		}
1649 	}
1650 
1651 	if (invalidate)
1652 		pmap_remove(vm_map_pmap(map), start, end);
1653 	/*
1654 	 * Make a second pass, cleaning/uncaching pages from the indicated
1655 	 * objects as we go.
1656 	 */
1657 	for (current = entry; current->start < end; current = current->next) {
1658 		offset = current->offset + (start - current->start);
1659 		size = (end <= current->end ? end : current->end) - start;
1660 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1661 			vm_map_t smap;
1662 			vm_map_entry_t tentry;
1663 			vm_size_t tsize;
1664 
1665 			smap = current->object.sub_map;
1666 			vm_map_lock_read(smap);
1667 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1668 			tsize = tentry->end - offset;
1669 			if (tsize < size)
1670 				size = tsize;
1671 			object = tentry->object.vm_object;
1672 			offset = tentry->offset + (offset - tentry->start);
1673 			vm_map_unlock_read(smap);
1674 		} else {
1675 			object = current->object.vm_object;
1676 		}
1677 		/*
1678 		 * Note that there is absolutely no sense in writing out
1679 		 * anonymous objects, so we track down the vnode object
1680 		 * to write out.
1681 		 * We invalidate (remove) all pages from the address space
1682 		 * anyway, for semantic correctness.
1683 		 */
1684 		while (object->backing_object) {
1685 			object = object->backing_object;
1686 			offset += object->backing_object_offset;
1687 			if (object->size < OFF_TO_IDX( offset + size))
1688 				size = IDX_TO_OFF(object->size) - offset;
1689 		}
1690 		if (object && (object->type == OBJT_VNODE) &&
1691 		    (current->protection & VM_PROT_WRITE)) {
1692 			/*
1693 			 * Flush pages if writing is allowed, invalidate them
1694 			 * if invalidation requested.  Pages undergoing I/O
1695 			 * will be ignored by vm_object_page_remove().
1696 			 *
1697 			 * We cannot lock the vnode and then wait for paging
1698 			 * to complete without deadlocking against vm_fault.
1699 			 * Instead we simply call vm_object_page_remove() and
1700 			 * allow it to block internally on a page-by-page
1701 			 * basis when it encounters pages undergoing async
1702 			 * I/O.
1703 			 */
1704 			int flags;
1705 
1706 			vm_object_reference(object);
1707 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1708 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1709 			flags |= invalidate ? OBJPC_INVAL : 0;
1710 			vm_object_page_clean(object,
1711 			    OFF_TO_IDX(offset),
1712 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1713 			    flags);
1714 			if (invalidate) {
1715 				/*vm_object_pip_wait(object, "objmcl");*/
1716 				vm_object_page_remove(object,
1717 				    OFF_TO_IDX(offset),
1718 				    OFF_TO_IDX(offset + size + PAGE_MASK),
1719 				    FALSE);
1720 			}
1721 			VOP_UNLOCK(object->handle, 0, curproc);
1722 			vm_object_deallocate(object);
1723 		}
1724 		start += size;
1725 	}
1726 
1727 	vm_map_unlock_read(map);
1728 	return (KERN_SUCCESS);
1729 }
1730 
1731 /*
1732  *	vm_map_entry_unwire:	[ internal use only ]
1733  *
1734  *	Make the region specified by this entry pageable.
1735  *
1736  *	The map in question should be locked.
1737  *	[This is the reason for this routine's existence.]
1738  */
1739 static void
1740 vm_map_entry_unwire(map, entry)
1741 	vm_map_t map;
1742 	vm_map_entry_t entry;
1743 {
1744 	vm_fault_unwire(map, entry->start, entry->end);
1745 	entry->wired_count = 0;
1746 }
1747 
1748 /*
1749  *	vm_map_entry_delete:	[ internal use only ]
1750  *
1751  *	Deallocate the given entry from the target map.
1752  */
1753 static void
1754 vm_map_entry_delete(map, entry)
1755 	vm_map_t map;
1756 	vm_map_entry_t entry;
1757 {
1758 	vm_map_entry_unlink(map, entry);
1759 	map->size -= entry->end - entry->start;
1760 
1761 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1762 		vm_object_deallocate(entry->object.vm_object);
1763 	}
1764 
1765 	vm_map_entry_dispose(map, entry);
1766 }
1767 
1768 /*
1769  *	vm_map_delete:	[ internal use only ]
1770  *
1771  *	Deallocates the given address range from the target
1772  *	map.
1773  */
1774 int
1775 vm_map_delete(map, start, end)
1776 	vm_map_t map;
1777 	vm_offset_t start;
1778 	vm_offset_t end;
1779 {
1780 	vm_object_t object;
1781 	vm_map_entry_t entry;
1782 	vm_map_entry_t first_entry;
1783 
1784 	/*
1785 	 * Find the start of the region, and clip it
1786 	 */
1787 
1788 	if (!vm_map_lookup_entry(map, start, &first_entry))
1789 		entry = first_entry->next;
1790 	else {
1791 		entry = first_entry;
1792 		vm_map_clip_start(map, entry, start);
1793 		/*
1794 		 * Fix the lookup hint now, rather than each time though the
1795 		 * loop.
1796 		 */
1797 		SAVE_HINT(map, entry->prev);
1798 	}
1799 
1800 	/*
1801 	 * Save the free space hint
1802 	 */
1803 
1804 	if (entry == &map->header) {
1805 		map->first_free = &map->header;
1806 	} else if (map->first_free->start >= start) {
1807 		map->first_free = entry->prev;
1808 	}
1809 
1810 	/*
1811 	 * Step through all entries in this region
1812 	 */
1813 
1814 	while ((entry != &map->header) && (entry->start < end)) {
1815 		vm_map_entry_t next;
1816 		vm_offset_t s, e;
1817 		vm_pindex_t offidxstart, offidxend, count;
1818 
1819 		vm_map_clip_end(map, entry, end);
1820 
1821 		s = entry->start;
1822 		e = entry->end;
1823 		next = entry->next;
1824 
1825 		offidxstart = OFF_TO_IDX(entry->offset);
1826 		count = OFF_TO_IDX(e - s);
1827 		object = entry->object.vm_object;
1828 
1829 		/*
1830 		 * Unwire before removing addresses from the pmap; otherwise,
1831 		 * unwiring will put the entries back in the pmap.
1832 		 */
1833 		if (entry->wired_count != 0) {
1834 			vm_map_entry_unwire(map, entry);
1835 		}
1836 
1837 		offidxend = offidxstart + count;
1838 
1839 		if ((object == kernel_object) || (object == kmem_object)) {
1840 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1841 		} else {
1842 			pmap_remove(map->pmap, s, e);
1843 			if (object != NULL &&
1844 			    object->ref_count != 1 &&
1845 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
1846 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1847 				vm_object_collapse(object);
1848 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1849 				if (object->type == OBJT_SWAP) {
1850 					swap_pager_freespace(object, offidxstart, count);
1851 				}
1852 				if (offidxend >= object->size &&
1853 				    offidxstart < object->size) {
1854 					object->size = offidxstart;
1855 				}
1856 			}
1857 		}
1858 
1859 		/*
1860 		 * Delete the entry (which may delete the object) only after
1861 		 * removing all pmap entries pointing to its pages.
1862 		 * (Otherwise, its page frames may be reallocated, and any
1863 		 * modify bits will be set in the wrong object!)
1864 		 */
1865 		vm_map_entry_delete(map, entry);
1866 		entry = next;
1867 	}
1868 	return (KERN_SUCCESS);
1869 }
1870 
1871 /*
1872  *	vm_map_remove:
1873  *
1874  *	Remove the given address range from the target map.
1875  *	This is the exported form of vm_map_delete.
1876  */
1877 int
1878 vm_map_remove(map, start, end)
1879 	vm_map_t map;
1880 	vm_offset_t start;
1881 	vm_offset_t end;
1882 {
1883 	int result, s = 0;
1884 
1885 	if (map == kmem_map || map == mb_map)
1886 		s = splvm();
1887 
1888 	vm_map_lock(map);
1889 	VM_MAP_RANGE_CHECK(map, start, end);
1890 	result = vm_map_delete(map, start, end);
1891 	vm_map_unlock(map);
1892 
1893 	if (map == kmem_map || map == mb_map)
1894 		splx(s);
1895 
1896 	return (result);
1897 }
1898 
1899 /*
1900  *	vm_map_check_protection:
1901  *
1902  *	Assert that the target map allows the specified
1903  *	privilege on the entire address region given.
1904  *	The entire region must be allocated.
1905  */
1906 boolean_t
1907 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
1908 			vm_prot_t protection)
1909 {
1910 	vm_map_entry_t entry;
1911 	vm_map_entry_t tmp_entry;
1912 
1913 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1914 		return (FALSE);
1915 	}
1916 	entry = tmp_entry;
1917 
1918 	while (start < end) {
1919 		if (entry == &map->header) {
1920 			return (FALSE);
1921 		}
1922 		/*
1923 		 * No holes allowed!
1924 		 */
1925 
1926 		if (start < entry->start) {
1927 			return (FALSE);
1928 		}
1929 		/*
1930 		 * Check protection associated with entry.
1931 		 */
1932 
1933 		if ((entry->protection & protection) != protection) {
1934 			return (FALSE);
1935 		}
1936 		/* go to next entry */
1937 
1938 		start = entry->end;
1939 		entry = entry->next;
1940 	}
1941 	return (TRUE);
1942 }
1943 
1944 /*
1945  * Split the pages in a map entry into a new object.  This affords
1946  * easier removal of unused pages, and keeps object inheritance from
1947  * being a negative impact on memory usage.
1948  */
1949 static void
1950 vm_map_split(entry)
1951 	vm_map_entry_t entry;
1952 {
1953 	vm_page_t m;
1954 	vm_object_t orig_object, new_object, source;
1955 	vm_offset_t s, e;
1956 	vm_pindex_t offidxstart, offidxend, idx;
1957 	vm_size_t size;
1958 	vm_ooffset_t offset;
1959 
1960 	orig_object = entry->object.vm_object;
1961 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1962 		return;
1963 	if (orig_object->ref_count <= 1)
1964 		return;
1965 
1966 	offset = entry->offset;
1967 	s = entry->start;
1968 	e = entry->end;
1969 
1970 	offidxstart = OFF_TO_IDX(offset);
1971 	offidxend = offidxstart + OFF_TO_IDX(e - s);
1972 	size = offidxend - offidxstart;
1973 
1974 	new_object = vm_pager_allocate(orig_object->type,
1975 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
1976 	if (new_object == NULL)
1977 		return;
1978 
1979 	source = orig_object->backing_object;
1980 	if (source != NULL) {
1981 		vm_object_reference(source);	/* Referenced by new_object */
1982 		TAILQ_INSERT_TAIL(&source->shadow_head,
1983 				  new_object, shadow_list);
1984 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
1985 		new_object->backing_object_offset =
1986 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
1987 		new_object->backing_object = source;
1988 		source->shadow_count++;
1989 		source->generation++;
1990 	}
1991 
1992 	for (idx = 0; idx < size; idx++) {
1993 		vm_page_t m;
1994 
1995 	retry:
1996 		m = vm_page_lookup(orig_object, offidxstart + idx);
1997 		if (m == NULL)
1998 			continue;
1999 
2000 		/*
2001 		 * We must wait for pending I/O to complete before we can
2002 		 * rename the page.
2003 		 *
2004 		 * We do not have to VM_PROT_NONE the page as mappings should
2005 		 * not be changed by this operation.
2006 		 */
2007 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2008 			goto retry;
2009 
2010 		vm_page_busy(m);
2011 		vm_page_rename(m, new_object, idx);
2012 		/* page automatically made dirty by rename and cache handled */
2013 		vm_page_busy(m);
2014 	}
2015 
2016 	if (orig_object->type == OBJT_SWAP) {
2017 		vm_object_pip_add(orig_object, 1);
2018 		/*
2019 		 * copy orig_object pages into new_object
2020 		 * and destroy unneeded pages in
2021 		 * shadow object.
2022 		 */
2023 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
2024 		vm_object_pip_wakeup(orig_object);
2025 	}
2026 
2027 	for (idx = 0; idx < size; idx++) {
2028 		m = vm_page_lookup(new_object, idx);
2029 		if (m) {
2030 			vm_page_wakeup(m);
2031 		}
2032 	}
2033 
2034 	entry->object.vm_object = new_object;
2035 	entry->offset = 0LL;
2036 	vm_object_deallocate(orig_object);
2037 }
2038 
2039 /*
2040  *	vm_map_copy_entry:
2041  *
2042  *	Copies the contents of the source entry to the destination
2043  *	entry.  The entries *must* be aligned properly.
2044  */
2045 static void
2046 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
2047 	vm_map_t src_map, dst_map;
2048 	vm_map_entry_t src_entry, dst_entry;
2049 {
2050 	vm_object_t src_object;
2051 
2052 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2053 		return;
2054 
2055 	if (src_entry->wired_count == 0) {
2056 
2057 		/*
2058 		 * If the source entry is marked needs_copy, it is already
2059 		 * write-protected.
2060 		 */
2061 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2062 			pmap_protect(src_map->pmap,
2063 			    src_entry->start,
2064 			    src_entry->end,
2065 			    src_entry->protection & ~VM_PROT_WRITE);
2066 		}
2067 
2068 		/*
2069 		 * Make a copy of the object.
2070 		 */
2071 		if ((src_object = src_entry->object.vm_object) != NULL) {
2072 
2073 			if ((src_object->handle == NULL) &&
2074 				(src_object->type == OBJT_DEFAULT ||
2075 				 src_object->type == OBJT_SWAP)) {
2076 				vm_object_collapse(src_object);
2077 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2078 					vm_map_split(src_entry);
2079 					src_object = src_entry->object.vm_object;
2080 				}
2081 			}
2082 
2083 			vm_object_reference(src_object);
2084 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2085 			dst_entry->object.vm_object = src_object;
2086 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2087 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2088 			dst_entry->offset = src_entry->offset;
2089 		} else {
2090 			dst_entry->object.vm_object = NULL;
2091 			dst_entry->offset = 0;
2092 		}
2093 
2094 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2095 		    dst_entry->end - dst_entry->start, src_entry->start);
2096 	} else {
2097 		/*
2098 		 * Of course, wired down pages can't be set copy-on-write.
2099 		 * Cause wired pages to be copied into the new map by
2100 		 * simulating faults (the new pages are pageable)
2101 		 */
2102 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2103 	}
2104 }
2105 
2106 /*
2107  * vmspace_fork:
2108  * Create a new process vmspace structure and vm_map
2109  * based on those of an existing process.  The new map
2110  * is based on the old map, according to the inheritance
2111  * values on the regions in that map.
2112  *
2113  * The source map must not be locked.
2114  */
2115 struct vmspace *
2116 vmspace_fork(vm1)
2117 	struct vmspace *vm1;
2118 {
2119 	struct vmspace *vm2;
2120 	vm_map_t old_map = &vm1->vm_map;
2121 	vm_map_t new_map;
2122 	vm_map_entry_t old_entry;
2123 	vm_map_entry_t new_entry;
2124 	vm_object_t object;
2125 
2126 	vm_map_lock(old_map);
2127 	old_map->infork = 1;
2128 
2129 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2130 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2131 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2132 	new_map = &vm2->vm_map;	/* XXX */
2133 	new_map->timestamp = 1;
2134 
2135 	old_entry = old_map->header.next;
2136 
2137 	while (old_entry != &old_map->header) {
2138 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2139 			panic("vm_map_fork: encountered a submap");
2140 
2141 		switch (old_entry->inheritance) {
2142 		case VM_INHERIT_NONE:
2143 			break;
2144 
2145 		case VM_INHERIT_SHARE:
2146 			/*
2147 			 * Clone the entry, creating the shared object if necessary.
2148 			 */
2149 			object = old_entry->object.vm_object;
2150 			if (object == NULL) {
2151 				object = vm_object_allocate(OBJT_DEFAULT,
2152 					atop(old_entry->end - old_entry->start));
2153 				old_entry->object.vm_object = object;
2154 				old_entry->offset = (vm_offset_t) 0;
2155 			}
2156 
2157 			/*
2158 			 * Add the reference before calling vm_object_shadow
2159 			 * to insure that a shadow object is created.
2160 			 */
2161 			vm_object_reference(object);
2162 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2163 				vm_object_shadow(&old_entry->object.vm_object,
2164 					&old_entry->offset,
2165 					atop(old_entry->end - old_entry->start));
2166 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2167 				/* Transfer the second reference too. */
2168 				vm_object_reference(
2169 				    old_entry->object.vm_object);
2170 				vm_object_deallocate(object);
2171 				object = old_entry->object.vm_object;
2172 			}
2173 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2174 
2175 			/*
2176 			 * Clone the entry, referencing the shared object.
2177 			 */
2178 			new_entry = vm_map_entry_create(new_map);
2179 			*new_entry = *old_entry;
2180 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2181 			new_entry->wired_count = 0;
2182 
2183 			/*
2184 			 * Insert the entry into the new map -- we know we're
2185 			 * inserting at the end of the new map.
2186 			 */
2187 
2188 			vm_map_entry_link(new_map, new_map->header.prev,
2189 			    new_entry);
2190 
2191 			/*
2192 			 * Update the physical map
2193 			 */
2194 
2195 			pmap_copy(new_map->pmap, old_map->pmap,
2196 			    new_entry->start,
2197 			    (old_entry->end - old_entry->start),
2198 			    old_entry->start);
2199 			break;
2200 
2201 		case VM_INHERIT_COPY:
2202 			/*
2203 			 * Clone the entry and link into the map.
2204 			 */
2205 			new_entry = vm_map_entry_create(new_map);
2206 			*new_entry = *old_entry;
2207 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2208 			new_entry->wired_count = 0;
2209 			new_entry->object.vm_object = NULL;
2210 			vm_map_entry_link(new_map, new_map->header.prev,
2211 			    new_entry);
2212 			vm_map_copy_entry(old_map, new_map, old_entry,
2213 			    new_entry);
2214 			break;
2215 		}
2216 		old_entry = old_entry->next;
2217 	}
2218 
2219 	new_map->size = old_map->size;
2220 	old_map->infork = 0;
2221 	vm_map_unlock(old_map);
2222 
2223 	return (vm2);
2224 }
2225 
2226 int
2227 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2228 	      vm_prot_t prot, vm_prot_t max, int cow)
2229 {
2230 	vm_map_entry_t prev_entry;
2231 	vm_map_entry_t new_stack_entry;
2232 	vm_size_t      init_ssize;
2233 	int            rv;
2234 
2235 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2236 		return (KERN_NO_SPACE);
2237 
2238 	if (max_ssize < SGROWSIZ)
2239 		init_ssize = max_ssize;
2240 	else
2241 		init_ssize = SGROWSIZ;
2242 
2243 	vm_map_lock(map);
2244 
2245 	/* If addr is already mapped, no go */
2246 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2247 		vm_map_unlock(map);
2248 		return (KERN_NO_SPACE);
2249 	}
2250 
2251 	/* If we can't accomodate max_ssize in the current mapping,
2252 	 * no go.  However, we need to be aware that subsequent user
2253 	 * mappings might map into the space we have reserved for
2254 	 * stack, and currently this space is not protected.
2255 	 *
2256 	 * Hopefully we will at least detect this condition
2257 	 * when we try to grow the stack.
2258 	 */
2259 	if ((prev_entry->next != &map->header) &&
2260 	    (prev_entry->next->start < addrbos + max_ssize)) {
2261 		vm_map_unlock(map);
2262 		return (KERN_NO_SPACE);
2263 	}
2264 
2265 	/* We initially map a stack of only init_ssize.  We will
2266 	 * grow as needed later.  Since this is to be a grow
2267 	 * down stack, we map at the top of the range.
2268 	 *
2269 	 * Note: we would normally expect prot and max to be
2270 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2271 	 * eliminate these as input parameters, and just
2272 	 * pass these values here in the insert call.
2273 	 */
2274 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2275 	                   addrbos + max_ssize, prot, max, cow);
2276 
2277 	/* Now set the avail_ssize amount */
2278 	if (rv == KERN_SUCCESS){
2279 		if (prev_entry != &map->header)
2280 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2281 		new_stack_entry = prev_entry->next;
2282 		if (new_stack_entry->end   != addrbos + max_ssize ||
2283 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2284 			panic ("Bad entry start/end for new stack entry");
2285 		else
2286 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2287 	}
2288 
2289 	vm_map_unlock(map);
2290 	return (rv);
2291 }
2292 
2293 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2294  * desired address is already mapped, or if we successfully grow
2295  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2296  * stack range (this is strange, but preserves compatibility with
2297  * the grow function in vm_machdep.c).
2298  */
2299 int
2300 vm_map_growstack (struct proc *p, vm_offset_t addr)
2301 {
2302 	vm_map_entry_t prev_entry;
2303 	vm_map_entry_t stack_entry;
2304 	vm_map_entry_t new_stack_entry;
2305 	struct vmspace *vm = p->p_vmspace;
2306 	vm_map_t map = &vm->vm_map;
2307 	vm_offset_t    end;
2308 	int      grow_amount;
2309 	int      rv;
2310 	int      is_procstack;
2311 Retry:
2312 	vm_map_lock_read(map);
2313 
2314 	/* If addr is already in the entry range, no need to grow.*/
2315 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2316 		vm_map_unlock_read(map);
2317 		return (KERN_SUCCESS);
2318 	}
2319 
2320 	if ((stack_entry = prev_entry->next) == &map->header) {
2321 		vm_map_unlock_read(map);
2322 		return (KERN_SUCCESS);
2323 	}
2324 	if (prev_entry == &map->header)
2325 		end = stack_entry->start - stack_entry->avail_ssize;
2326 	else
2327 		end = prev_entry->end;
2328 
2329 	/* This next test mimics the old grow function in vm_machdep.c.
2330 	 * It really doesn't quite make sense, but we do it anyway
2331 	 * for compatibility.
2332 	 *
2333 	 * If not growable stack, return success.  This signals the
2334 	 * caller to proceed as he would normally with normal vm.
2335 	 */
2336 	if (stack_entry->avail_ssize < 1 ||
2337 	    addr >= stack_entry->start ||
2338 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2339 		vm_map_unlock_read(map);
2340 		return (KERN_SUCCESS);
2341 	}
2342 
2343 	/* Find the minimum grow amount */
2344 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2345 	if (grow_amount > stack_entry->avail_ssize) {
2346 		vm_map_unlock_read(map);
2347 		return (KERN_NO_SPACE);
2348 	}
2349 
2350 	/* If there is no longer enough space between the entries
2351 	 * nogo, and adjust the available space.  Note: this
2352 	 * should only happen if the user has mapped into the
2353 	 * stack area after the stack was created, and is
2354 	 * probably an error.
2355 	 *
2356 	 * This also effectively destroys any guard page the user
2357 	 * might have intended by limiting the stack size.
2358 	 */
2359 	if (grow_amount > stack_entry->start - end) {
2360 		if (vm_map_lock_upgrade(map))
2361 			goto Retry;
2362 
2363 		stack_entry->avail_ssize = stack_entry->start - end;
2364 
2365 		vm_map_unlock(map);
2366 		return (KERN_NO_SPACE);
2367 	}
2368 
2369 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2370 
2371 	/* If this is the main process stack, see if we're over the
2372 	 * stack limit.
2373 	 */
2374 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2375 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2376 		vm_map_unlock_read(map);
2377 		return (KERN_NO_SPACE);
2378 	}
2379 
2380 	/* Round up the grow amount modulo SGROWSIZ */
2381 	grow_amount = roundup (grow_amount, SGROWSIZ);
2382 	if (grow_amount > stack_entry->avail_ssize) {
2383 		grow_amount = stack_entry->avail_ssize;
2384 	}
2385 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2386 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2387 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2388 		              ctob(vm->vm_ssize);
2389 	}
2390 
2391 	if (vm_map_lock_upgrade(map))
2392 		goto Retry;
2393 
2394 	/* Get the preliminary new entry start value */
2395 	addr = stack_entry->start - grow_amount;
2396 
2397 	/* If this puts us into the previous entry, cut back our growth
2398 	 * to the available space.  Also, see the note above.
2399 	 */
2400 	if (addr < end) {
2401 		stack_entry->avail_ssize = stack_entry->start - end;
2402 		addr = end;
2403 	}
2404 
2405 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2406 			   VM_PROT_ALL,
2407 			   VM_PROT_ALL,
2408 			   0);
2409 
2410 	/* Adjust the available stack space by the amount we grew. */
2411 	if (rv == KERN_SUCCESS) {
2412 		if (prev_entry != &map->header)
2413 			vm_map_clip_end(map, prev_entry, addr);
2414 		new_stack_entry = prev_entry->next;
2415 		if (new_stack_entry->end   != stack_entry->start  ||
2416 		    new_stack_entry->start != addr)
2417 			panic ("Bad stack grow start/end in new stack entry");
2418 		else {
2419 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2420 							(new_stack_entry->end -
2421 							 new_stack_entry->start);
2422 			if (is_procstack)
2423 				vm->vm_ssize += btoc(new_stack_entry->end -
2424 						     new_stack_entry->start);
2425 		}
2426 	}
2427 
2428 	vm_map_unlock(map);
2429 	return (rv);
2430 
2431 }
2432 
2433 /*
2434  * Unshare the specified VM space for exec.  If other processes are
2435  * mapped to it, then create a new one.  The new vmspace is null.
2436  */
2437 
2438 void
2439 vmspace_exec(struct proc *p) {
2440 	struct vmspace *oldvmspace = p->p_vmspace;
2441 	struct vmspace *newvmspace;
2442 	vm_map_t map = &p->p_vmspace->vm_map;
2443 
2444 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2445 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2446 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2447 	/*
2448 	 * This code is written like this for prototype purposes.  The
2449 	 * goal is to avoid running down the vmspace here, but let the
2450 	 * other process's that are still using the vmspace to finally
2451 	 * run it down.  Even though there is little or no chance of blocking
2452 	 * here, it is a good idea to keep this form for future mods.
2453 	 */
2454 	vmspace_free(oldvmspace);
2455 	p->p_vmspace = newvmspace;
2456 	pmap_pinit2(vmspace_pmap(newvmspace));
2457 	if (p == curproc)
2458 		pmap_activate(p);
2459 }
2460 
2461 /*
2462  * Unshare the specified VM space for forcing COW.  This
2463  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2464  */
2465 
2466 void
2467 vmspace_unshare(struct proc *p) {
2468 	struct vmspace *oldvmspace = p->p_vmspace;
2469 	struct vmspace *newvmspace;
2470 
2471 	if (oldvmspace->vm_refcnt == 1)
2472 		return;
2473 	newvmspace = vmspace_fork(oldvmspace);
2474 	vmspace_free(oldvmspace);
2475 	p->p_vmspace = newvmspace;
2476 	pmap_pinit2(vmspace_pmap(newvmspace));
2477 	if (p == curproc)
2478 		pmap_activate(p);
2479 }
2480 
2481 
2482 /*
2483  *	vm_map_lookup:
2484  *
2485  *	Finds the VM object, offset, and
2486  *	protection for a given virtual address in the
2487  *	specified map, assuming a page fault of the
2488  *	type specified.
2489  *
2490  *	Leaves the map in question locked for read; return
2491  *	values are guaranteed until a vm_map_lookup_done
2492  *	call is performed.  Note that the map argument
2493  *	is in/out; the returned map must be used in
2494  *	the call to vm_map_lookup_done.
2495  *
2496  *	A handle (out_entry) is returned for use in
2497  *	vm_map_lookup_done, to make that fast.
2498  *
2499  *	If a lookup is requested with "write protection"
2500  *	specified, the map may be changed to perform virtual
2501  *	copying operations, although the data referenced will
2502  *	remain the same.
2503  */
2504 int
2505 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2506 	      vm_offset_t vaddr,
2507 	      vm_prot_t fault_typea,
2508 	      vm_map_entry_t *out_entry,	/* OUT */
2509 	      vm_object_t *object,		/* OUT */
2510 	      vm_pindex_t *pindex,		/* OUT */
2511 	      vm_prot_t *out_prot,		/* OUT */
2512 	      boolean_t *wired)			/* OUT */
2513 {
2514 	vm_map_entry_t entry;
2515 	vm_map_t map = *var_map;
2516 	vm_prot_t prot;
2517 	vm_prot_t fault_type = fault_typea;
2518 
2519 RetryLookup:;
2520 
2521 	/*
2522 	 * Lookup the faulting address.
2523 	 */
2524 
2525 	vm_map_lock_read(map);
2526 
2527 #define	RETURN(why) \
2528 		{ \
2529 		vm_map_unlock_read(map); \
2530 		return(why); \
2531 		}
2532 
2533 	/*
2534 	 * If the map has an interesting hint, try it before calling full
2535 	 * blown lookup routine.
2536 	 */
2537 
2538 	entry = map->hint;
2539 
2540 	*out_entry = entry;
2541 
2542 	if ((entry == &map->header) ||
2543 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2544 		vm_map_entry_t tmp_entry;
2545 
2546 		/*
2547 		 * Entry was either not a valid hint, or the vaddr was not
2548 		 * contained in the entry, so do a full lookup.
2549 		 */
2550 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2551 			RETURN(KERN_INVALID_ADDRESS);
2552 
2553 		entry = tmp_entry;
2554 		*out_entry = entry;
2555 	}
2556 
2557 	/*
2558 	 * Handle submaps.
2559 	 */
2560 
2561 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2562 		vm_map_t old_map = map;
2563 
2564 		*var_map = map = entry->object.sub_map;
2565 		vm_map_unlock_read(old_map);
2566 		goto RetryLookup;
2567 	}
2568 
2569 	/*
2570 	 * Check whether this task is allowed to have this page.
2571 	 * Note the special case for MAP_ENTRY_COW
2572 	 * pages with an override.  This is to implement a forced
2573 	 * COW for debuggers.
2574 	 */
2575 
2576 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2577 		prot = entry->max_protection;
2578 	else
2579 		prot = entry->protection;
2580 
2581 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2582 	if ((fault_type & prot) != fault_type) {
2583 			RETURN(KERN_PROTECTION_FAILURE);
2584 	}
2585 
2586 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2587 	    (entry->eflags & MAP_ENTRY_COW) &&
2588 	    (fault_type & VM_PROT_WRITE) &&
2589 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2590 		RETURN(KERN_PROTECTION_FAILURE);
2591 	}
2592 
2593 	/*
2594 	 * If this page is not pageable, we have to get it for all possible
2595 	 * accesses.
2596 	 */
2597 
2598 	*wired = (entry->wired_count != 0);
2599 	if (*wired)
2600 		prot = fault_type = entry->protection;
2601 
2602 	/*
2603 	 * If the entry was copy-on-write, we either ...
2604 	 */
2605 
2606 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2607 		/*
2608 		 * If we want to write the page, we may as well handle that
2609 		 * now since we've got the map locked.
2610 		 *
2611 		 * If we don't need to write the page, we just demote the
2612 		 * permissions allowed.
2613 		 */
2614 
2615 		if (fault_type & VM_PROT_WRITE) {
2616 			/*
2617 			 * Make a new object, and place it in the object
2618 			 * chain.  Note that no new references have appeared
2619 			 * -- one just moved from the map to the new
2620 			 * object.
2621 			 */
2622 
2623 			if (vm_map_lock_upgrade(map))
2624 				goto RetryLookup;
2625 
2626 			vm_object_shadow(
2627 			    &entry->object.vm_object,
2628 			    &entry->offset,
2629 			    atop(entry->end - entry->start));
2630 
2631 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2632 			vm_map_lock_downgrade(map);
2633 		} else {
2634 			/*
2635 			 * We're attempting to read a copy-on-write page --
2636 			 * don't allow writes.
2637 			 */
2638 
2639 			prot &= ~VM_PROT_WRITE;
2640 		}
2641 	}
2642 
2643 	/*
2644 	 * Create an object if necessary.
2645 	 */
2646 	if (entry->object.vm_object == NULL &&
2647 	    !map->system_map) {
2648 		if (vm_map_lock_upgrade(map))
2649 			goto RetryLookup;
2650 
2651 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2652 		    atop(entry->end - entry->start));
2653 		entry->offset = 0;
2654 		vm_map_lock_downgrade(map);
2655 	}
2656 
2657 	/*
2658 	 * Return the object/offset from this entry.  If the entry was
2659 	 * copy-on-write or empty, it has been fixed up.
2660 	 */
2661 
2662 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2663 	*object = entry->object.vm_object;
2664 
2665 	/*
2666 	 * Return whether this is the only map sharing this data.
2667 	 */
2668 
2669 	*out_prot = prot;
2670 	return (KERN_SUCCESS);
2671 
2672 #undef	RETURN
2673 }
2674 
2675 /*
2676  *	vm_map_lookup_done:
2677  *
2678  *	Releases locks acquired by a vm_map_lookup
2679  *	(according to the handle returned by that lookup).
2680  */
2681 
2682 void
2683 vm_map_lookup_done(map, entry)
2684 	vm_map_t map;
2685 	vm_map_entry_t entry;
2686 {
2687 	/*
2688 	 * Unlock the main-level map
2689 	 */
2690 
2691 	vm_map_unlock_read(map);
2692 }
2693 
2694 /*
2695  * Implement uiomove with VM operations.  This handles (and collateral changes)
2696  * support every combination of source object modification, and COW type
2697  * operations.
2698  */
2699 int
2700 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
2701 	vm_map_t mapa;
2702 	vm_object_t srcobject;
2703 	off_t cp;
2704 	int cnta;
2705 	vm_offset_t uaddra;
2706 	int *npages;
2707 {
2708 	vm_map_t map;
2709 	vm_object_t first_object, oldobject, object;
2710 	vm_map_entry_t entry;
2711 	vm_prot_t prot;
2712 	boolean_t wired;
2713 	int tcnt, rv;
2714 	vm_offset_t uaddr, start, end, tend;
2715 	vm_pindex_t first_pindex, osize, oindex;
2716 	off_t ooffset;
2717 	int cnt;
2718 
2719 	if (npages)
2720 		*npages = 0;
2721 
2722 	cnt = cnta;
2723 	uaddr = uaddra;
2724 
2725 	while (cnt > 0) {
2726 		map = mapa;
2727 
2728 		if ((vm_map_lookup(&map, uaddr,
2729 			VM_PROT_READ, &entry, &first_object,
2730 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2731 			return EFAULT;
2732 		}
2733 
2734 		vm_map_clip_start(map, entry, uaddr);
2735 
2736 		tcnt = cnt;
2737 		tend = uaddr + tcnt;
2738 		if (tend > entry->end) {
2739 			tcnt = entry->end - uaddr;
2740 			tend = entry->end;
2741 		}
2742 
2743 		vm_map_clip_end(map, entry, tend);
2744 
2745 		start = entry->start;
2746 		end = entry->end;
2747 
2748 		osize = atop(tcnt);
2749 
2750 		oindex = OFF_TO_IDX(cp);
2751 		if (npages) {
2752 			vm_pindex_t idx;
2753 			for (idx = 0; idx < osize; idx++) {
2754 				vm_page_t m;
2755 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2756 					vm_map_lookup_done(map, entry);
2757 					return 0;
2758 				}
2759 				/*
2760 				 * disallow busy or invalid pages, but allow
2761 				 * m->busy pages if they are entirely valid.
2762 				 */
2763 				if ((m->flags & PG_BUSY) ||
2764 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2765 					vm_map_lookup_done(map, entry);
2766 					return 0;
2767 				}
2768 			}
2769 		}
2770 
2771 /*
2772  * If we are changing an existing map entry, just redirect
2773  * the object, and change mappings.
2774  */
2775 		if ((first_object->type == OBJT_VNODE) &&
2776 			((oldobject = entry->object.vm_object) == first_object)) {
2777 
2778 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2779 				/*
2780    				* Remove old window into the file
2781    				*/
2782 				pmap_remove (map->pmap, uaddr, tend);
2783 
2784 				/*
2785    				* Force copy on write for mmaped regions
2786    				*/
2787 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2788 
2789 				/*
2790    				* Point the object appropriately
2791    				*/
2792 				if (oldobject != srcobject) {
2793 
2794 				/*
2795    				* Set the object optimization hint flag
2796    				*/
2797 					vm_object_set_flag(srcobject, OBJ_OPT);
2798 					vm_object_reference(srcobject);
2799 					entry->object.vm_object = srcobject;
2800 
2801 					if (oldobject) {
2802 						vm_object_deallocate(oldobject);
2803 					}
2804 				}
2805 
2806 				entry->offset = cp;
2807 				map->timestamp++;
2808 			} else {
2809 				pmap_remove (map->pmap, uaddr, tend);
2810 			}
2811 
2812 		} else if ((first_object->ref_count == 1) &&
2813 			(first_object->size == osize) &&
2814 			((first_object->type == OBJT_DEFAULT) ||
2815 				(first_object->type == OBJT_SWAP)) ) {
2816 
2817 			oldobject = first_object->backing_object;
2818 
2819 			if ((first_object->backing_object_offset != cp) ||
2820 				(oldobject != srcobject)) {
2821 				/*
2822    				* Remove old window into the file
2823    				*/
2824 				pmap_remove (map->pmap, uaddr, tend);
2825 
2826 				/*
2827 				 * Remove unneeded old pages
2828 				 */
2829 				vm_object_page_remove(first_object, 0, 0, 0);
2830 
2831 				/*
2832 				 * Invalidate swap space
2833 				 */
2834 				if (first_object->type == OBJT_SWAP) {
2835 					swap_pager_freespace(first_object,
2836 						0,
2837 						first_object->size);
2838 				}
2839 
2840 				/*
2841    				* Force copy on write for mmaped regions
2842    				*/
2843 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2844 
2845 				/*
2846    				* Point the object appropriately
2847    				*/
2848 				if (oldobject != srcobject) {
2849 
2850 				/*
2851    				* Set the object optimization hint flag
2852    				*/
2853 					vm_object_set_flag(srcobject, OBJ_OPT);
2854 					vm_object_reference(srcobject);
2855 
2856 					if (oldobject) {
2857 						TAILQ_REMOVE(&oldobject->shadow_head,
2858 							first_object, shadow_list);
2859 						oldobject->shadow_count--;
2860 						/* XXX bump generation? */
2861 						vm_object_deallocate(oldobject);
2862 					}
2863 
2864 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2865 						first_object, shadow_list);
2866 					srcobject->shadow_count++;
2867 					/* XXX bump generation? */
2868 
2869 					first_object->backing_object = srcobject;
2870 				}
2871 				first_object->backing_object_offset = cp;
2872 				map->timestamp++;
2873 			} else {
2874 				pmap_remove (map->pmap, uaddr, tend);
2875 			}
2876 /*
2877  * Otherwise, we have to do a logical mmap.
2878  */
2879 		} else {
2880 
2881 			vm_object_set_flag(srcobject, OBJ_OPT);
2882 			vm_object_reference(srcobject);
2883 
2884 			pmap_remove (map->pmap, uaddr, tend);
2885 
2886 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2887 			vm_map_lock_upgrade(map);
2888 
2889 			if (entry == &map->header) {
2890 				map->first_free = &map->header;
2891 			} else if (map->first_free->start >= start) {
2892 				map->first_free = entry->prev;
2893 			}
2894 
2895 			SAVE_HINT(map, entry->prev);
2896 			vm_map_entry_delete(map, entry);
2897 
2898 			object = srcobject;
2899 			ooffset = cp;
2900 
2901 			rv = vm_map_insert(map, object, ooffset, start, tend,
2902 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
2903 
2904 			if (rv != KERN_SUCCESS)
2905 				panic("vm_uiomove: could not insert new entry: %d", rv);
2906 		}
2907 
2908 /*
2909  * Map the window directly, if it is already in memory
2910  */
2911 		pmap_object_init_pt(map->pmap, uaddr,
2912 			srcobject, oindex, tcnt, 0);
2913 
2914 		map->timestamp++;
2915 		vm_map_unlock(map);
2916 
2917 		cnt -= tcnt;
2918 		uaddr += tcnt;
2919 		cp += tcnt;
2920 		if (npages)
2921 			*npages += osize;
2922 	}
2923 	return 0;
2924 }
2925 
2926 /*
2927  * Performs the copy_on_write operations necessary to allow the virtual copies
2928  * into user space to work.  This has to be called for write(2) system calls
2929  * from other processes, file unlinking, and file size shrinkage.
2930  */
2931 void
2932 vm_freeze_copyopts(object, froma, toa)
2933 	vm_object_t object;
2934 	vm_pindex_t froma, toa;
2935 {
2936 	int rv;
2937 	vm_object_t robject;
2938 	vm_pindex_t idx;
2939 
2940 	if ((object == NULL) ||
2941 		((object->flags & OBJ_OPT) == 0))
2942 		return;
2943 
2944 	if (object->shadow_count > object->ref_count)
2945 		panic("vm_freeze_copyopts: sc > rc");
2946 
2947 	while((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
2948 		vm_pindex_t bo_pindex;
2949 		vm_page_t m_in, m_out;
2950 
2951 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
2952 
2953 		vm_object_reference(robject);
2954 
2955 		vm_object_pip_wait(robject, "objfrz");
2956 
2957 		if (robject->ref_count == 1) {
2958 			vm_object_deallocate(robject);
2959 			continue;
2960 		}
2961 
2962 		vm_object_pip_add(robject, 1);
2963 
2964 		for (idx = 0; idx < robject->size; idx++) {
2965 
2966 			m_out = vm_page_grab(robject, idx,
2967 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
2968 
2969 			if (m_out->valid == 0) {
2970 				m_in = vm_page_grab(object, bo_pindex + idx,
2971 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
2972 				if (m_in->valid == 0) {
2973 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
2974 					if (rv != VM_PAGER_OK) {
2975 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
2976 						continue;
2977 					}
2978 					vm_page_deactivate(m_in);
2979 				}
2980 
2981 				vm_page_protect(m_in, VM_PROT_NONE);
2982 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
2983 				m_out->valid = m_in->valid;
2984 				vm_page_dirty(m_out);
2985 				vm_page_activate(m_out);
2986 				vm_page_wakeup(m_in);
2987 			}
2988 			vm_page_wakeup(m_out);
2989 		}
2990 
2991 		object->shadow_count--;
2992 		object->ref_count--;
2993 		TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
2994 		robject->backing_object = NULL;
2995 		robject->backing_object_offset = 0;
2996 
2997 		vm_object_pip_wakeup(robject);
2998 		vm_object_deallocate(robject);
2999 	}
3000 
3001 	vm_object_clear_flag(object, OBJ_OPT);
3002 }
3003 
3004 #include "opt_ddb.h"
3005 #ifdef DDB
3006 #include <sys/kernel.h>
3007 
3008 #include <ddb/ddb.h>
3009 
3010 /*
3011  *	vm_map_print:	[ debug ]
3012  */
3013 DB_SHOW_COMMAND(map, vm_map_print)
3014 {
3015 	static int nlines;
3016 	/* XXX convert args. */
3017 	vm_map_t map = (vm_map_t)addr;
3018 	boolean_t full = have_addr;
3019 
3020 	vm_map_entry_t entry;
3021 
3022 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3023 	    (void *)map,
3024 	    (void *)map->pmap, map->nentries, map->timestamp);
3025 	nlines++;
3026 
3027 	if (!full && db_indent)
3028 		return;
3029 
3030 	db_indent += 2;
3031 	for (entry = map->header.next; entry != &map->header;
3032 	    entry = entry->next) {
3033 		db_iprintf("map entry %p: start=%p, end=%p\n",
3034 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3035 		nlines++;
3036 		{
3037 			static char *inheritance_name[4] =
3038 			{"share", "copy", "none", "donate_copy"};
3039 
3040 			db_iprintf(" prot=%x/%x/%s",
3041 			    entry->protection,
3042 			    entry->max_protection,
3043 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3044 			if (entry->wired_count != 0)
3045 				db_printf(", wired");
3046 		}
3047 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3048 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3049 			db_printf(", share=%p, offset=0x%lx\n",
3050 			    (void *)entry->object.sub_map,
3051 			    (long)entry->offset);
3052 			nlines++;
3053 			if ((entry->prev == &map->header) ||
3054 			    (entry->prev->object.sub_map !=
3055 				entry->object.sub_map)) {
3056 				db_indent += 2;
3057 				vm_map_print((db_expr_t)(intptr_t)
3058 					     entry->object.sub_map,
3059 					     full, 0, (char *)0);
3060 				db_indent -= 2;
3061 			}
3062 		} else {
3063 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3064 			db_printf(", object=%p, offset=0x%lx",
3065 			    (void *)entry->object.vm_object,
3066 			    (long)entry->offset);
3067 			if (entry->eflags & MAP_ENTRY_COW)
3068 				db_printf(", copy (%s)",
3069 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3070 			db_printf("\n");
3071 			nlines++;
3072 
3073 			if ((entry->prev == &map->header) ||
3074 			    (entry->prev->object.vm_object !=
3075 				entry->object.vm_object)) {
3076 				db_indent += 2;
3077 				vm_object_print((db_expr_t)(intptr_t)
3078 						entry->object.vm_object,
3079 						full, 0, (char *)0);
3080 				nlines += 4;
3081 				db_indent -= 2;
3082 			}
3083 		}
3084 	}
3085 	db_indent -= 2;
3086 	if (db_indent == 0)
3087 		nlines = 0;
3088 }
3089 
3090 
3091 DB_SHOW_COMMAND(procvm, procvm)
3092 {
3093 	struct proc *p;
3094 
3095 	if (have_addr) {
3096 		p = (struct proc *) addr;
3097 	} else {
3098 		p = curproc;
3099 	}
3100 
3101 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3102 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3103 	    (void *)vmspace_pmap(p->p_vmspace));
3104 
3105 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3106 }
3107 
3108 #endif /* DDB */
3109