xref: /freebsd/sys/vm/vm_map.c (revision e627b39baccd1ec9129690167cf5e6d860509655)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $Id: vm_map.c,v 1.56 1996/09/08 23:49:47 dyson Exp $
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/malloc.h>
74 #include <sys/proc.h>
75 #include <sys/queue.h>
76 #include <sys/vmmeter.h>
77 #include <sys/mman.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/vm_prot.h>
82 #include <vm/vm_inherit.h>
83 #include <vm/lock.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vm_extern.h>
91 #include <vm/default_pager.h>
92 
93 /*
94  *	Virtual memory maps provide for the mapping, protection,
95  *	and sharing of virtual memory objects.  In addition,
96  *	this module provides for an efficient virtual copy of
97  *	memory from one map to another.
98  *
99  *	Synchronization is required prior to most operations.
100  *
101  *	Maps consist of an ordered doubly-linked list of simple
102  *	entries; a single hint is used to speed up lookups.
103  *
104  *	In order to properly represent the sharing of virtual
105  *	memory regions among maps, the map structure is bi-level.
106  *	Top-level ("address") maps refer to regions of sharable
107  *	virtual memory.  These regions are implemented as
108  *	("sharing") maps, which then refer to the actual virtual
109  *	memory objects.  When two address maps "share" memory,
110  *	their top-level maps both have references to the same
111  *	sharing map.  When memory is virtual-copied from one
112  *	address map to another, the references in the sharing
113  *	maps are actually copied -- no copying occurs at the
114  *	virtual memory object level.
115  *
116  *	Since portions of maps are specified by start/end addreses,
117  *	which may not align with existing map entries, all
118  *	routines merely "clip" entries to these start/end values.
119  *	[That is, an entry is split into two, bordering at a
120  *	start or end value.]  Note that these clippings may not
121  *	always be necessary (as the two resulting entries are then
122  *	not changed); however, the clipping is done for convenience.
123  *	No attempt is currently made to "glue back together" two
124  *	abutting entries.
125  *
126  *	As mentioned above, virtual copy operations are performed
127  *	by copying VM object references from one sharing map to
128  *	another, and then marking both regions as copy-on-write.
129  *	It is important to note that only one writeable reference
130  *	to a VM object region exists in any map -- this means that
131  *	shadow object creation can be delayed until a write operation
132  *	occurs.
133  */
134 
135 /*
136  *	vm_map_startup:
137  *
138  *	Initialize the vm_map module.  Must be called before
139  *	any other vm_map routines.
140  *
141  *	Map and entry structures are allocated from the general
142  *	purpose memory pool with some exceptions:
143  *
144  *	- The kernel map and kmem submap are allocated statically.
145  *	- Kernel map entries are allocated out of a static pool.
146  *
147  *	These restrictions are necessary since malloc() uses the
148  *	maps and requires map entries.
149  */
150 
151 vm_offset_t kentry_data;
152 vm_size_t kentry_data_size;
153 static vm_map_entry_t kentry_free;
154 static vm_map_t kmap_free;
155 extern char kstack[];
156 
157 static int kentry_count;
158 static vm_offset_t mapvm_start, mapvm, mapvmmax;
159 static int mapvmpgcnt;
160 
161 static struct vm_map_entry *mappool;
162 static int mappoolcnt;
163 #define KENTRY_LOW_WATER 128
164 
165 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
166 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
167 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
168 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
169 static __inline void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
170 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
171 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
172 		vm_map_entry_t));
173 static void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
174 
175 void
176 vm_map_startup()
177 {
178 	register int i;
179 	register vm_map_entry_t mep;
180 	vm_map_t mp;
181 
182 	/*
183 	 * Static map structures for allocation before initialization of
184 	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
185 	 */
186 	kmap_free = mp = (vm_map_t) kentry_data;
187 	i = MAX_KMAP;
188 	while (--i > 0) {
189 		mp->header.next = (vm_map_entry_t) (mp + 1);
190 		mp++;
191 	}
192 	mp++->header.next = NULL;
193 
194 	/*
195 	 * Form a free list of statically allocated kernel map entries with
196 	 * the rest.
197 	 */
198 	kentry_free = mep = (vm_map_entry_t) mp;
199 	kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
200 	while (--i > 0) {
201 		mep->next = mep + 1;
202 		mep++;
203 	}
204 	mep->next = NULL;
205 }
206 
207 /*
208  * Allocate a vmspace structure, including a vm_map and pmap,
209  * and initialize those structures.  The refcnt is set to 1.
210  * The remaining fields must be initialized by the caller.
211  */
212 struct vmspace *
213 vmspace_alloc(min, max, pageable)
214 	vm_offset_t min, max;
215 	int pageable;
216 {
217 	register struct vmspace *vm;
218 
219 	if (mapvmpgcnt == 0 && mapvm == 0) {
220 		mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
221 		mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
222 			mapvmpgcnt * PAGE_SIZE);
223 		mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
224 		if (!mapvm)
225 			mapvmpgcnt = 0;
226 	}
227 	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
228 	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
229 	vm_map_init(&vm->vm_map, min, max, pageable);
230 	pmap_pinit(&vm->vm_pmap);
231 	vm->vm_map.pmap = &vm->vm_pmap;		/* XXX */
232 	vm->vm_refcnt = 1;
233 	return (vm);
234 }
235 
236 void
237 vmspace_free(vm)
238 	register struct vmspace *vm;
239 {
240 
241 	if (vm->vm_refcnt == 0)
242 		panic("vmspace_free: attempt to free already freed vmspace");
243 
244 	if (--vm->vm_refcnt == 0) {
245 
246 		/*
247 		 * Lock the map, to wait out all other references to it.
248 		 * Delete all of the mappings and pages they hold, then call
249 		 * the pmap module to reclaim anything left.
250 		 */
251 		vm_map_lock(&vm->vm_map);
252 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
253 		    vm->vm_map.max_offset);
254 		vm_map_unlock(&vm->vm_map);
255 
256 		while( vm->vm_map.ref_count != 1)
257 			tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
258 		--vm->vm_map.ref_count;
259 		vm_object_pmap_remove(vm->vm_upages_obj,
260 			0, vm->vm_upages_obj->size);
261 		vm_object_deallocate(vm->vm_upages_obj);
262 		pmap_release(&vm->vm_pmap);
263 		FREE(vm, M_VMMAP);
264 	} else {
265 		wakeup(&vm->vm_map.ref_count);
266 	}
267 }
268 
269 /*
270  *	vm_map_create:
271  *
272  *	Creates and returns a new empty VM map with
273  *	the given physical map structure, and having
274  *	the given lower and upper address bounds.
275  */
276 vm_map_t
277 vm_map_create(pmap, min, max, pageable)
278 	pmap_t pmap;
279 	vm_offset_t min, max;
280 	boolean_t pageable;
281 {
282 	register vm_map_t result;
283 
284 	if (kmem_map == NULL) {
285 		result = kmap_free;
286 		kmap_free = (vm_map_t) result->header.next;
287 		if (result == NULL)
288 			panic("vm_map_create: out of maps");
289 	} else
290 		MALLOC(result, vm_map_t, sizeof(struct vm_map),
291 		    M_VMMAP, M_WAITOK);
292 
293 	vm_map_init(result, min, max, pageable);
294 	result->pmap = pmap;
295 	return (result);
296 }
297 
298 /*
299  * Initialize an existing vm_map structure
300  * such as that in the vmspace structure.
301  * The pmap is set elsewhere.
302  */
303 void
304 vm_map_init(map, min, max, pageable)
305 	register struct vm_map *map;
306 	vm_offset_t min, max;
307 	boolean_t pageable;
308 {
309 	map->header.next = map->header.prev = &map->header;
310 	map->nentries = 0;
311 	map->size = 0;
312 	map->ref_count = 1;
313 	map->is_main_map = TRUE;
314 	map->min_offset = min;
315 	map->max_offset = max;
316 	map->entries_pageable = pageable;
317 	map->first_free = &map->header;
318 	map->hint = &map->header;
319 	map->timestamp = 0;
320 	lock_init(&map->lock, TRUE);
321 }
322 
323 /*
324  *	vm_map_entry_dispose:	[ internal use only ]
325  *
326  *	Inverse of vm_map_entry_create.
327  */
328 static __inline void
329 vm_map_entry_dispose(map, entry)
330 	vm_map_t map;
331 	vm_map_entry_t entry;
332 {
333 	int s;
334 
335 	if (kentry_count < KENTRY_LOW_WATER) {
336 		s = splvm();
337 		entry->next = kentry_free;
338 		kentry_free = entry;
339 		++kentry_count;
340 		splx(s);
341 	} else {
342 		entry->next = mappool;
343 		mappool = entry;
344 		++mappoolcnt;
345 	}
346 }
347 
348 /*
349  *	vm_map_entry_create:	[ internal use only ]
350  *
351  *	Allocates a VM map entry for insertion.
352  *	No entry fields are filled in.  This routine is
353  */
354 static vm_map_entry_t
355 vm_map_entry_create(map)
356 	vm_map_t map;
357 {
358 	vm_map_entry_t entry;
359 	int i;
360 	int s;
361 
362 	/*
363 	 * This is a *very* nasty (and sort of incomplete) hack!!!!
364 	 */
365 	if (kentry_count < KENTRY_LOW_WATER) {
366 		s = splvm();
367 		if (mapvmpgcnt && mapvm) {
368 			vm_page_t m;
369 
370 			m = vm_page_alloc(kernel_object,
371 			        OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS),
372 				    (map == kmem_map || map == mb_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
373 
374 			if (m) {
375 				int newentries;
376 
377 				newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
378 				vm_page_wire(m);
379 				PAGE_WAKEUP(m);
380 				m->valid = VM_PAGE_BITS_ALL;
381 				pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m));
382 				m->flags |= PG_WRITEABLE;
383 
384 				entry = (vm_map_entry_t) mapvm;
385 				mapvm += PAGE_SIZE;
386 				--mapvmpgcnt;
387 
388 				for (i = 0; i < newentries; i++) {
389 					vm_map_entry_dispose(kernel_map, entry);
390 					entry++;
391 				}
392 			}
393 		}
394 		splx(s);
395 	}
396 
397 	if (map == kernel_map || map == kmem_map || map == mb_map || map == pager_map) {
398 		s = splvm();
399 		entry = kentry_free;
400 		if (entry) {
401 			kentry_free = entry->next;
402 			--kentry_count;
403 		} else {
404 			panic("vm_map_entry_create: out of map entries for kernel");
405 		}
406 		splx(s);
407 	} else {
408 		entry = mappool;
409 		if (entry) {
410 			mappool = entry->next;
411 			--mappoolcnt;
412 		} else {
413 			MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
414 			    M_VMMAPENT, M_WAITOK);
415 		}
416 	}
417 
418 	return (entry);
419 }
420 
421 /*
422  *	vm_map_entry_{un,}link:
423  *
424  *	Insert/remove entries from maps.
425  */
426 #define	vm_map_entry_link(map, after_where, entry) \
427 		{ \
428 		(map)->nentries++; \
429 		(entry)->prev = (after_where); \
430 		(entry)->next = (after_where)->next; \
431 		(entry)->prev->next = (entry); \
432 		(entry)->next->prev = (entry); \
433 		}
434 #define	vm_map_entry_unlink(map, entry) \
435 		{ \
436 		(map)->nentries--; \
437 		(entry)->next->prev = (entry)->prev; \
438 		(entry)->prev->next = (entry)->next; \
439 		}
440 
441 /*
442  *	vm_map_reference:
443  *
444  *	Creates another valid reference to the given map.
445  *
446  */
447 void
448 vm_map_reference(map)
449 	register vm_map_t map;
450 {
451 	if (map == NULL)
452 		return;
453 
454 	map->ref_count++;
455 }
456 
457 /*
458  *	vm_map_deallocate:
459  *
460  *	Removes a reference from the specified map,
461  *	destroying it if no references remain.
462  *	The map should not be locked.
463  */
464 void
465 vm_map_deallocate(map)
466 	register vm_map_t map;
467 {
468 	register int c;
469 
470 	if (map == NULL)
471 		return;
472 
473 	c = map->ref_count;
474 
475 	if (c == 0)
476 		panic("vm_map_deallocate: deallocating already freed map");
477 
478 	if (c != 1) {
479 		--map->ref_count;
480 		wakeup(&map->ref_count);
481 		return;
482 	}
483 	/*
484 	 * Lock the map, to wait out all other references to it.
485 	 */
486 
487 	vm_map_lock(map);
488 	(void) vm_map_delete(map, map->min_offset, map->max_offset);
489 	--map->ref_count;
490 	if( map->ref_count != 0) {
491 		vm_map_unlock(map);
492 		return;
493 	}
494 
495 	pmap_destroy(map->pmap);
496 	FREE(map, M_VMMAP);
497 }
498 
499 /*
500  *	SAVE_HINT:
501  *
502  *	Saves the specified entry as the hint for
503  *	future lookups.
504  */
505 #define	SAVE_HINT(map,value) \
506 		(map)->hint = (value);
507 
508 /*
509  *	vm_map_lookup_entry:	[ internal use only ]
510  *
511  *	Finds the map entry containing (or
512  *	immediately preceding) the specified address
513  *	in the given map; the entry is returned
514  *	in the "entry" parameter.  The boolean
515  *	result indicates whether the address is
516  *	actually contained in the map.
517  */
518 boolean_t
519 vm_map_lookup_entry(map, address, entry)
520 	register vm_map_t map;
521 	register vm_offset_t address;
522 	vm_map_entry_t *entry;	/* OUT */
523 {
524 	register vm_map_entry_t cur;
525 	register vm_map_entry_t last;
526 
527 	/*
528 	 * Start looking either from the head of the list, or from the hint.
529 	 */
530 
531 	cur = map->hint;
532 
533 	if (cur == &map->header)
534 		cur = cur->next;
535 
536 	if (address >= cur->start) {
537 		/*
538 		 * Go from hint to end of list.
539 		 *
540 		 * But first, make a quick check to see if we are already looking
541 		 * at the entry we want (which is usually the case). Note also
542 		 * that we don't need to save the hint here... it is the same
543 		 * hint (unless we are at the header, in which case the hint
544 		 * didn't buy us anything anyway).
545 		 */
546 		last = &map->header;
547 		if ((cur != last) && (cur->end > address)) {
548 			*entry = cur;
549 			return (TRUE);
550 		}
551 	} else {
552 		/*
553 		 * Go from start to hint, *inclusively*
554 		 */
555 		last = cur->next;
556 		cur = map->header.next;
557 	}
558 
559 	/*
560 	 * Search linearly
561 	 */
562 
563 	while (cur != last) {
564 		if (cur->end > address) {
565 			if (address >= cur->start) {
566 				/*
567 				 * Save this lookup for future hints, and
568 				 * return
569 				 */
570 
571 				*entry = cur;
572 				SAVE_HINT(map, cur);
573 				return (TRUE);
574 			}
575 			break;
576 		}
577 		cur = cur->next;
578 	}
579 	*entry = cur->prev;
580 	SAVE_HINT(map, *entry);
581 	return (FALSE);
582 }
583 
584 /*
585  *	vm_map_insert:
586  *
587  *	Inserts the given whole VM object into the target
588  *	map at the specified address range.  The object's
589  *	size should match that of the address range.
590  *
591  *	Requires that the map be locked, and leaves it so.
592  */
593 int
594 vm_map_insert(map, object, offset, start, end, prot, max, cow)
595 	vm_map_t map;
596 	vm_object_t object;
597 	vm_ooffset_t offset;
598 	vm_offset_t start;
599 	vm_offset_t end;
600 	vm_prot_t prot, max;
601 	int cow;
602 {
603 	register vm_map_entry_t new_entry;
604 	register vm_map_entry_t prev_entry;
605 	vm_map_entry_t temp_entry;
606 	vm_object_t prev_object;
607 
608 	/*
609 	 * Check that the start and end points are not bogus.
610 	 */
611 
612 	if ((start < map->min_offset) || (end > map->max_offset) ||
613 	    (start >= end))
614 		return (KERN_INVALID_ADDRESS);
615 
616 	/*
617 	 * Find the entry prior to the proposed starting address; if it's part
618 	 * of an existing entry, this range is bogus.
619 	 */
620 
621 	if (vm_map_lookup_entry(map, start, &temp_entry))
622 		return (KERN_NO_SPACE);
623 
624 	prev_entry = temp_entry;
625 
626 	/*
627 	 * Assert that the next entry doesn't overlap the end point.
628 	 */
629 
630 	if ((prev_entry->next != &map->header) &&
631 	    (prev_entry->next->start < end))
632 		return (KERN_NO_SPACE);
633 
634 	if ((prev_entry != &map->header) &&
635 		(prev_entry->end == start) &&
636 		((object == NULL) || (prev_entry->object.vm_object == object)) &&
637 		(prev_entry->is_a_map == FALSE) &&
638 		(prev_entry->is_sub_map == FALSE) &&
639 		(prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
640 		(prev_entry->protection == prot) &&
641 		(prev_entry->max_protection == max) &&
642 		(prev_entry->wired_count == 0)) {
643 
644 
645 	/*
646 	 * See if we can avoid creating a new entry by extending one of our
647 	 * neighbors.
648 	 */
649 		if (object == NULL) {
650 			if (vm_object_coalesce(prev_entry->object.vm_object,
651 				OFF_TO_IDX(prev_entry->offset),
652 				(vm_size_t) (prev_entry->end
653 				    - prev_entry->start),
654 				(vm_size_t) (end - prev_entry->end))) {
655 
656 				/*
657 				 * Coalesced the two objects - can extend the
658 				 * previous map entry to include the new
659 				 * range.
660 				 */
661 				map->size += (end - prev_entry->end);
662 				prev_entry->end = end;
663 				prev_object = prev_entry->object.vm_object;
664 				default_pager_convert_to_swapq(prev_object);
665 				return (KERN_SUCCESS);
666 			}
667 		}
668 	}
669 	/*
670 	 * Create a new entry
671 	 */
672 
673 	new_entry = vm_map_entry_create(map);
674 	new_entry->start = start;
675 	new_entry->end = end;
676 
677 	new_entry->is_a_map = FALSE;
678 	new_entry->is_sub_map = FALSE;
679 	new_entry->object.vm_object = object;
680 	new_entry->offset = offset;
681 
682 	if (cow & MAP_COPY_NEEDED)
683 		new_entry->needs_copy = TRUE;
684 	else
685 		new_entry->needs_copy = FALSE;
686 
687 	if (cow & MAP_COPY_ON_WRITE)
688 		new_entry->copy_on_write = TRUE;
689 	else
690 		new_entry->copy_on_write = FALSE;
691 
692 	if (map->is_main_map) {
693 		new_entry->inheritance = VM_INHERIT_DEFAULT;
694 		new_entry->protection = prot;
695 		new_entry->max_protection = max;
696 		new_entry->wired_count = 0;
697 	}
698 	/*
699 	 * Insert the new entry into the list
700 	 */
701 
702 	vm_map_entry_link(map, prev_entry, new_entry);
703 	map->size += new_entry->end - new_entry->start;
704 
705 	/*
706 	 * Update the free space hint
707 	 */
708 	if ((map->first_free == prev_entry) &&
709 		(prev_entry->end >= new_entry->start))
710 		map->first_free = new_entry;
711 
712 	default_pager_convert_to_swapq(object);
713 	return (KERN_SUCCESS);
714 }
715 
716 /*
717  * Find sufficient space for `length' bytes in the given map, starting at
718  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
719  */
720 int
721 vm_map_findspace(map, start, length, addr)
722 	register vm_map_t map;
723 	register vm_offset_t start;
724 	vm_size_t length;
725 	vm_offset_t *addr;
726 {
727 	register vm_map_entry_t entry, next;
728 	register vm_offset_t end;
729 
730 	if (start < map->min_offset)
731 		start = map->min_offset;
732 	if (start > map->max_offset)
733 		return (1);
734 
735 	/*
736 	 * Look for the first possible address; if there's already something
737 	 * at this address, we have to start after it.
738 	 */
739 	if (start == map->min_offset) {
740 		if ((entry = map->first_free) != &map->header)
741 			start = entry->end;
742 	} else {
743 		vm_map_entry_t tmp;
744 
745 		if (vm_map_lookup_entry(map, start, &tmp))
746 			start = tmp->end;
747 		entry = tmp;
748 	}
749 
750 	/*
751 	 * Look through the rest of the map, trying to fit a new region in the
752 	 * gap between existing regions, or after the very last region.
753 	 */
754 	for (;; start = (entry = next)->end) {
755 		/*
756 		 * Find the end of the proposed new region.  Be sure we didn't
757 		 * go beyond the end of the map, or wrap around the address;
758 		 * if so, we lose.  Otherwise, if this is the last entry, or
759 		 * if the proposed new region fits before the next entry, we
760 		 * win.
761 		 */
762 		end = start + length;
763 		if (end > map->max_offset || end < start)
764 			return (1);
765 		next = entry->next;
766 		if (next == &map->header || next->start >= end)
767 			break;
768 	}
769 	SAVE_HINT(map, entry);
770 	*addr = start;
771 	if (map == kernel_map && round_page(start + length) > kernel_vm_end)
772 		pmap_growkernel(round_page(start + length));
773 	return (0);
774 }
775 
776 /*
777  *	vm_map_find finds an unallocated region in the target address
778  *	map with the given length.  The search is defined to be
779  *	first-fit from the specified address; the region found is
780  *	returned in the same parameter.
781  *
782  */
783 int
784 vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow)
785 	vm_map_t map;
786 	vm_object_t object;
787 	vm_ooffset_t offset;
788 	vm_offset_t *addr;	/* IN/OUT */
789 	vm_size_t length;
790 	boolean_t find_space;
791 	vm_prot_t prot, max;
792 	int cow;
793 {
794 	register vm_offset_t start;
795 	int result, s = 0;
796 
797 	start = *addr;
798 
799 	if (map == kmem_map || map == mb_map)
800 		s = splvm();
801 
802 	vm_map_lock(map);
803 	if (find_space) {
804 		if (vm_map_findspace(map, start, length, addr)) {
805 			vm_map_unlock(map);
806 			if (map == kmem_map || map == mb_map)
807 				splx(s);
808 			return (KERN_NO_SPACE);
809 		}
810 		start = *addr;
811 	}
812 	result = vm_map_insert(map, object, offset,
813 		start, start + length, prot, max, cow);
814 	vm_map_unlock(map);
815 
816 	if (map == kmem_map || map == mb_map)
817 		splx(s);
818 
819 	return (result);
820 }
821 
822 /*
823  *	vm_map_simplify_entry:	[ internal use only ]
824  *
825  *	Simplify the given map entry by:
826  *		removing extra sharing maps
827  *		[XXX maybe later] merging with a neighbor
828  */
829 static void
830 vm_map_simplify_entry(map, entry)
831 	vm_map_t map;
832 	vm_map_entry_t entry;
833 {
834 	vm_map_entry_t next, prev;
835 	vm_size_t nextsize, prevsize, esize;
836 
837 	/*
838 	 * If this entry corresponds to a sharing map, then see if we can
839 	 * remove the level of indirection. If it's not a sharing map, then it
840 	 * points to a VM object, so see if we can merge with either of our
841 	 * neighbors.
842 	 */
843 
844 	if (entry->is_sub_map || entry->is_a_map || entry->wired_count)
845 		return;
846 
847 	prev = entry->prev;
848 	if (prev != &map->header) {
849 		prevsize = prev->end - prev->start;
850 		if ( (prev->end == entry->start) &&
851 		     (prev->object.vm_object == entry->object.vm_object) &&
852 		     (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
853 		     (!prev->object.vm_object ||
854 			(prev->offset + prevsize == entry->offset)) &&
855 		     (prev->needs_copy == entry->needs_copy) &&
856 		     (prev->copy_on_write == entry->copy_on_write) &&
857 		     (prev->protection == entry->protection) &&
858 		     (prev->max_protection == entry->max_protection) &&
859 		     (prev->inheritance == entry->inheritance) &&
860 		     (prev->is_a_map == FALSE) &&
861 		     (prev->is_sub_map == FALSE) &&
862 		     (prev->wired_count == 0)) {
863 			if (map->first_free == prev)
864 				map->first_free = entry;
865 			if (map->hint == prev)
866 				map->hint = entry;
867 			vm_map_entry_unlink(map, prev);
868 			entry->start = prev->start;
869 			entry->offset = prev->offset;
870 			if (prev->object.vm_object)
871 				vm_object_deallocate(prev->object.vm_object);
872 			vm_map_entry_dispose(map, prev);
873 		}
874 	}
875 
876 	next = entry->next;
877 	if (next != &map->header) {
878 		nextsize = next->end - next->start;
879 		esize = entry->end - entry->start;
880 		if ((entry->end == next->start) &&
881 		    (next->object.vm_object == entry->object.vm_object) &&
882 		    (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
883 		     (!entry->object.vm_object ||
884 			(entry->offset + esize == next->offset)) &&
885 		    (next->needs_copy == entry->needs_copy) &&
886 		    (next->copy_on_write == entry->copy_on_write) &&
887 		    (next->protection == entry->protection) &&
888 		    (next->max_protection == entry->max_protection) &&
889 		    (next->inheritance == entry->inheritance) &&
890 		    (next->is_a_map == FALSE) &&
891 		    (next->is_sub_map == FALSE) &&
892 		    (next->wired_count == 0)) {
893 			if (map->first_free == next)
894 				map->first_free = entry;
895 			if (map->hint == next)
896 				map->hint = entry;
897 			vm_map_entry_unlink(map, next);
898 			entry->end = next->end;
899 			if (next->object.vm_object)
900 				vm_object_deallocate(next->object.vm_object);
901 			vm_map_entry_dispose(map, next);
902 	        }
903 	}
904 }
905 /*
906  *	vm_map_clip_start:	[ internal use only ]
907  *
908  *	Asserts that the given entry begins at or after
909  *	the specified address; if necessary,
910  *	it splits the entry into two.
911  */
912 #define vm_map_clip_start(map, entry, startaddr) \
913 { \
914 	if (startaddr > entry->start) \
915 		_vm_map_clip_start(map, entry, startaddr); \
916 }
917 
918 /*
919  *	This routine is called only when it is known that
920  *	the entry must be split.
921  */
922 static void
923 _vm_map_clip_start(map, entry, start)
924 	register vm_map_t map;
925 	register vm_map_entry_t entry;
926 	register vm_offset_t start;
927 {
928 	register vm_map_entry_t new_entry;
929 
930 	/*
931 	 * Split off the front portion -- note that we must insert the new
932 	 * entry BEFORE this one, so that this entry has the specified
933 	 * starting address.
934 	 */
935 
936 	vm_map_simplify_entry(map, entry);
937 
938 	new_entry = vm_map_entry_create(map);
939 	*new_entry = *entry;
940 
941 	new_entry->end = start;
942 	entry->offset += (start - entry->start);
943 	entry->start = start;
944 
945 	vm_map_entry_link(map, entry->prev, new_entry);
946 
947 	if (entry->is_a_map || entry->is_sub_map)
948 		vm_map_reference(new_entry->object.share_map);
949 	else
950 		vm_object_reference(new_entry->object.vm_object);
951 }
952 
953 /*
954  *	vm_map_clip_end:	[ internal use only ]
955  *
956  *	Asserts that the given entry ends at or before
957  *	the specified address; if necessary,
958  *	it splits the entry into two.
959  */
960 
961 #define vm_map_clip_end(map, entry, endaddr) \
962 { \
963 	if (endaddr < entry->end) \
964 		_vm_map_clip_end(map, entry, endaddr); \
965 }
966 
967 /*
968  *	This routine is called only when it is known that
969  *	the entry must be split.
970  */
971 static void
972 _vm_map_clip_end(map, entry, end)
973 	register vm_map_t map;
974 	register vm_map_entry_t entry;
975 	register vm_offset_t end;
976 {
977 	register vm_map_entry_t new_entry;
978 
979 	/*
980 	 * Create a new entry and insert it AFTER the specified entry
981 	 */
982 
983 	new_entry = vm_map_entry_create(map);
984 	*new_entry = *entry;
985 
986 	new_entry->start = entry->end = end;
987 	new_entry->offset += (end - entry->start);
988 
989 	vm_map_entry_link(map, entry, new_entry);
990 
991 	if (entry->is_a_map || entry->is_sub_map)
992 		vm_map_reference(new_entry->object.share_map);
993 	else
994 		vm_object_reference(new_entry->object.vm_object);
995 }
996 
997 /*
998  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
999  *
1000  *	Asserts that the starting and ending region
1001  *	addresses fall within the valid range of the map.
1002  */
1003 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1004 		{					\
1005 		if (start < vm_map_min(map))		\
1006 			start = vm_map_min(map);	\
1007 		if (end > vm_map_max(map))		\
1008 			end = vm_map_max(map);		\
1009 		if (start > end)			\
1010 			start = end;			\
1011 		}
1012 
1013 /*
1014  *	vm_map_submap:		[ kernel use only ]
1015  *
1016  *	Mark the given range as handled by a subordinate map.
1017  *
1018  *	This range must have been created with vm_map_find,
1019  *	and no other operations may have been performed on this
1020  *	range prior to calling vm_map_submap.
1021  *
1022  *	Only a limited number of operations can be performed
1023  *	within this rage after calling vm_map_submap:
1024  *		vm_fault
1025  *	[Don't try vm_map_copy!]
1026  *
1027  *	To remove a submapping, one must first remove the
1028  *	range from the superior map, and then destroy the
1029  *	submap (if desired).  [Better yet, don't try it.]
1030  */
1031 int
1032 vm_map_submap(map, start, end, submap)
1033 	register vm_map_t map;
1034 	register vm_offset_t start;
1035 	register vm_offset_t end;
1036 	vm_map_t submap;
1037 {
1038 	vm_map_entry_t entry;
1039 	register int result = KERN_INVALID_ARGUMENT;
1040 
1041 	vm_map_lock(map);
1042 
1043 	VM_MAP_RANGE_CHECK(map, start, end);
1044 
1045 	if (vm_map_lookup_entry(map, start, &entry)) {
1046 		vm_map_clip_start(map, entry, start);
1047 	} else
1048 		entry = entry->next;
1049 
1050 	vm_map_clip_end(map, entry, end);
1051 
1052 	if ((entry->start == start) && (entry->end == end) &&
1053 	    (!entry->is_a_map) &&
1054 	    (entry->object.vm_object == NULL) &&
1055 	    (!entry->copy_on_write)) {
1056 		entry->is_a_map = FALSE;
1057 		entry->is_sub_map = TRUE;
1058 		vm_map_reference(entry->object.sub_map = submap);
1059 		result = KERN_SUCCESS;
1060 	}
1061 	vm_map_unlock(map);
1062 
1063 	return (result);
1064 }
1065 
1066 /*
1067  *	vm_map_protect:
1068  *
1069  *	Sets the protection of the specified address
1070  *	region in the target map.  If "set_max" is
1071  *	specified, the maximum protection is to be set;
1072  *	otherwise, only the current protection is affected.
1073  */
1074 int
1075 vm_map_protect(map, start, end, new_prot, set_max)
1076 	register vm_map_t map;
1077 	register vm_offset_t start;
1078 	register vm_offset_t end;
1079 	register vm_prot_t new_prot;
1080 	register boolean_t set_max;
1081 {
1082 	register vm_map_entry_t current;
1083 	vm_map_entry_t entry;
1084 
1085 	vm_map_lock(map);
1086 
1087 	VM_MAP_RANGE_CHECK(map, start, end);
1088 
1089 	if (vm_map_lookup_entry(map, start, &entry)) {
1090 		vm_map_clip_start(map, entry, start);
1091 	} else
1092 		entry = entry->next;
1093 
1094 	/*
1095 	 * Make a first pass to check for protection violations.
1096 	 */
1097 
1098 	current = entry;
1099 	while ((current != &map->header) && (current->start < end)) {
1100 		if (current->is_sub_map) {
1101 			vm_map_unlock(map);
1102 			return (KERN_INVALID_ARGUMENT);
1103 		}
1104 		if ((new_prot & current->max_protection) != new_prot) {
1105 			vm_map_unlock(map);
1106 			return (KERN_PROTECTION_FAILURE);
1107 		}
1108 		current = current->next;
1109 	}
1110 
1111 	/*
1112 	 * Go back and fix up protections. [Note that clipping is not
1113 	 * necessary the second time.]
1114 	 */
1115 
1116 	current = entry;
1117 
1118 	while ((current != &map->header) && (current->start < end)) {
1119 		vm_prot_t old_prot;
1120 
1121 		vm_map_clip_end(map, current, end);
1122 
1123 		old_prot = current->protection;
1124 		if (set_max)
1125 			current->protection =
1126 			    (current->max_protection = new_prot) &
1127 			    old_prot;
1128 		else
1129 			current->protection = new_prot;
1130 
1131 		/*
1132 		 * Update physical map if necessary. Worry about copy-on-write
1133 		 * here -- CHECK THIS XXX
1134 		 */
1135 
1136 		if (current->protection != old_prot) {
1137 #define MASK(entry)	((entry)->copy_on_write ? ~VM_PROT_WRITE : \
1138 							VM_PROT_ALL)
1139 #define	max(a,b)	((a) > (b) ? (a) : (b))
1140 
1141 			if (current->is_a_map) {
1142 				vm_map_entry_t share_entry;
1143 				vm_offset_t share_end;
1144 
1145 				vm_map_lock(current->object.share_map);
1146 				(void) vm_map_lookup_entry(
1147 				    current->object.share_map,
1148 				    current->offset,
1149 				    &share_entry);
1150 				share_end = current->offset +
1151 				    (current->end - current->start);
1152 				while ((share_entry !=
1153 					&current->object.share_map->header) &&
1154 				    (share_entry->start < share_end)) {
1155 
1156 					pmap_protect(map->pmap,
1157 					    (max(share_entry->start,
1158 						    current->offset) -
1159 						current->offset +
1160 						current->start),
1161 					    min(share_entry->end,
1162 						share_end) -
1163 					    current->offset +
1164 					    current->start,
1165 					    current->protection &
1166 					    MASK(share_entry));
1167 
1168 					share_entry = share_entry->next;
1169 				}
1170 				vm_map_unlock(current->object.share_map);
1171 			} else
1172 				pmap_protect(map->pmap, current->start,
1173 				    current->end,
1174 				    current->protection & MASK(entry));
1175 #undef	max
1176 #undef	MASK
1177 		}
1178 		current = current->next;
1179 	}
1180 
1181 	vm_map_simplify_entry(map, entry);
1182 	vm_map_unlock(map);
1183 	return (KERN_SUCCESS);
1184 }
1185 
1186 /*
1187  *	vm_map_madvise:
1188  *
1189  * 	This routine traverses a processes map handling the madvise
1190  *	system call.
1191  */
1192 void
1193 vm_map_madvise(map, pmap, start, end, advise)
1194 	vm_map_t map;
1195 	pmap_t pmap;
1196 	vm_offset_t start, end;
1197 	int advise;
1198 {
1199 	register vm_map_entry_t current;
1200 	vm_map_entry_t entry;
1201 
1202 	vm_map_lock(map);
1203 
1204 	VM_MAP_RANGE_CHECK(map, start, end);
1205 
1206 	if (vm_map_lookup_entry(map, start, &entry)) {
1207 		vm_map_clip_start(map, entry, start);
1208 	} else
1209 		entry = entry->next;
1210 
1211 	for(current = entry;
1212 		(current != &map->header) && (current->start < end);
1213 		current = current->next) {
1214 		if (current->is_a_map || current->is_sub_map) {
1215 			continue;
1216 		}
1217 		vm_map_clip_end(map, current, end);
1218 		switch (advise) {
1219 	case MADV_NORMAL:
1220 			current->object.vm_object->behavior = OBJ_NORMAL;
1221 			break;
1222 	case MADV_SEQUENTIAL:
1223 			current->object.vm_object->behavior = OBJ_SEQUENTIAL;
1224 			break;
1225 	case MADV_RANDOM:
1226 			current->object.vm_object->behavior = OBJ_RANDOM;
1227 			break;
1228 	/*
1229 	 * Right now, we could handle DONTNEED and WILLNEED with common code.
1230 	 * They are mostly the same, except for the potential async reads (NYI).
1231 	 */
1232 	case MADV_FREE:
1233 	case MADV_DONTNEED:
1234 			{
1235 				vm_pindex_t pindex;
1236 				int count;
1237 				vm_size_t size = current->end - current->start;
1238 				pindex = OFF_TO_IDX(entry->offset);
1239 				count = OFF_TO_IDX(size);
1240 				/*
1241 				 * MADV_DONTNEED removes the page from all
1242 				 * pmaps, so pmap_remove is not necessary.
1243 				 */
1244 				vm_object_madvise(current->object.vm_object,
1245 					pindex, count, advise);
1246 			}
1247 			break;
1248 
1249 	case MADV_WILLNEED:
1250 			{
1251 				vm_pindex_t pindex;
1252 				int count;
1253 				vm_size_t size = current->end - current->start;
1254 				pindex = OFF_TO_IDX(current->offset);
1255 				count = OFF_TO_IDX(size);
1256 				vm_object_madvise(current->object.vm_object,
1257 					pindex, count, advise);
1258 				pmap_object_init_pt(pmap, current->start,
1259 					current->object.vm_object, pindex,
1260 					(count << PAGE_SHIFT), 0);
1261 			}
1262 			break;
1263 
1264 	default:
1265 			break;
1266 		}
1267 	}
1268 
1269 	vm_map_simplify_entry(map, entry);
1270 	vm_map_unlock(map);
1271 	return;
1272 }
1273 
1274 
1275 /*
1276  *	vm_map_inherit:
1277  *
1278  *	Sets the inheritance of the specified address
1279  *	range in the target map.  Inheritance
1280  *	affects how the map will be shared with
1281  *	child maps at the time of vm_map_fork.
1282  */
1283 int
1284 vm_map_inherit(map, start, end, new_inheritance)
1285 	register vm_map_t map;
1286 	register vm_offset_t start;
1287 	register vm_offset_t end;
1288 	register vm_inherit_t new_inheritance;
1289 {
1290 	register vm_map_entry_t entry;
1291 	vm_map_entry_t temp_entry;
1292 
1293 	switch (new_inheritance) {
1294 	case VM_INHERIT_NONE:
1295 	case VM_INHERIT_COPY:
1296 	case VM_INHERIT_SHARE:
1297 		break;
1298 	default:
1299 		return (KERN_INVALID_ARGUMENT);
1300 	}
1301 
1302 	vm_map_lock(map);
1303 
1304 	VM_MAP_RANGE_CHECK(map, start, end);
1305 
1306 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1307 		entry = temp_entry;
1308 		vm_map_clip_start(map, entry, start);
1309 	} else
1310 		entry = temp_entry->next;
1311 
1312 	while ((entry != &map->header) && (entry->start < end)) {
1313 		vm_map_clip_end(map, entry, end);
1314 
1315 		entry->inheritance = new_inheritance;
1316 
1317 		entry = entry->next;
1318 	}
1319 
1320 	vm_map_simplify_entry(map, temp_entry);
1321 	vm_map_unlock(map);
1322 	return (KERN_SUCCESS);
1323 }
1324 
1325 /*
1326  *	vm_map_pageable:
1327  *
1328  *	Sets the pageability of the specified address
1329  *	range in the target map.  Regions specified
1330  *	as not pageable require locked-down physical
1331  *	memory and physical page maps.
1332  *
1333  *	The map must not be locked, but a reference
1334  *	must remain to the map throughout the call.
1335  */
1336 int
1337 vm_map_pageable(map, start, end, new_pageable)
1338 	register vm_map_t map;
1339 	register vm_offset_t start;
1340 	register vm_offset_t end;
1341 	register boolean_t new_pageable;
1342 {
1343 	register vm_map_entry_t entry;
1344 	vm_map_entry_t start_entry;
1345 	register vm_offset_t failed = 0;
1346 	int rv;
1347 
1348 	vm_map_lock(map);
1349 
1350 	VM_MAP_RANGE_CHECK(map, start, end);
1351 
1352 	/*
1353 	 * Only one pageability change may take place at one time, since
1354 	 * vm_fault assumes it will be called only once for each
1355 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1356 	 * changing the pageability for the entire region.  We do so before
1357 	 * making any changes.
1358 	 */
1359 
1360 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1361 		vm_map_unlock(map);
1362 		return (KERN_INVALID_ADDRESS);
1363 	}
1364 	entry = start_entry;
1365 
1366 	/*
1367 	 * Actions are rather different for wiring and unwiring, so we have
1368 	 * two separate cases.
1369 	 */
1370 
1371 	if (new_pageable) {
1372 
1373 		vm_map_clip_start(map, entry, start);
1374 
1375 		/*
1376 		 * Unwiring.  First ensure that the range to be unwired is
1377 		 * really wired down and that there are no holes.
1378 		 */
1379 		while ((entry != &map->header) && (entry->start < end)) {
1380 
1381 			if (entry->wired_count == 0 ||
1382 			    (entry->end < end &&
1383 				(entry->next == &map->header ||
1384 				    entry->next->start > entry->end))) {
1385 				vm_map_unlock(map);
1386 				return (KERN_INVALID_ARGUMENT);
1387 			}
1388 			entry = entry->next;
1389 		}
1390 
1391 		/*
1392 		 * Now decrement the wiring count for each region. If a region
1393 		 * becomes completely unwired, unwire its physical pages and
1394 		 * mappings.
1395 		 */
1396 		lock_set_recursive(&map->lock);
1397 
1398 		entry = start_entry;
1399 		while ((entry != &map->header) && (entry->start < end)) {
1400 			vm_map_clip_end(map, entry, end);
1401 
1402 			entry->wired_count--;
1403 			if (entry->wired_count == 0)
1404 				vm_fault_unwire(map, entry->start, entry->end);
1405 
1406 			entry = entry->next;
1407 		}
1408 		vm_map_simplify_entry(map, start_entry);
1409 		lock_clear_recursive(&map->lock);
1410 	} else {
1411 		/*
1412 		 * Wiring.  We must do this in two passes:
1413 		 *
1414 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1415 		 * objects that need to be created. Then we clip each map
1416 		 * entry to the region to be wired and increment its wiring
1417 		 * count.  We create objects before clipping the map entries
1418 		 * to avoid object proliferation.
1419 		 *
1420 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1421 		 * fault in the pages for any newly wired area (wired_count is
1422 		 * 1).
1423 		 *
1424 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1425 		 * deadlock with another process that may have faulted on one
1426 		 * of the pages to be wired (it would mark the page busy,
1427 		 * blocking us, then in turn block on the map lock that we
1428 		 * hold).  Because of problems in the recursive lock package,
1429 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1430 		 * any actions that require the write lock must be done
1431 		 * beforehand.  Because we keep the read lock on the map, the
1432 		 * copy-on-write status of the entries we modify here cannot
1433 		 * change.
1434 		 */
1435 
1436 		/*
1437 		 * Pass 1.
1438 		 */
1439 		while ((entry != &map->header) && (entry->start < end)) {
1440 			if (entry->wired_count == 0) {
1441 
1442 				/*
1443 				 * Perform actions of vm_map_lookup that need
1444 				 * the write lock on the map: create a shadow
1445 				 * object for a copy-on-write region, or an
1446 				 * object for a zero-fill region.
1447 				 *
1448 				 * We don't have to do this for entries that
1449 				 * point to sharing maps, because we won't
1450 				 * hold the lock on the sharing map.
1451 				 */
1452 				if (!entry->is_a_map && !entry->is_sub_map) {
1453 					int copyflag = entry->needs_copy;
1454 					if (copyflag &&
1455 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1456 
1457 						vm_object_shadow(&entry->object.vm_object,
1458 						    &entry->offset,
1459 						    OFF_TO_IDX(entry->end
1460 							- entry->start));
1461 						entry->needs_copy = FALSE;
1462 					} else if (entry->object.vm_object == NULL) {
1463 						entry->object.vm_object =
1464 						    vm_object_allocate(OBJT_DEFAULT,
1465 							OFF_TO_IDX(entry->end - entry->start));
1466 						entry->offset = (vm_offset_t) 0;
1467 					}
1468 					default_pager_convert_to_swapq(entry->object.vm_object);
1469 				}
1470 			}
1471 			vm_map_clip_start(map, entry, start);
1472 			vm_map_clip_end(map, entry, end);
1473 			entry->wired_count++;
1474 
1475 			/*
1476 			 * Check for holes
1477 			 */
1478 			if (entry->end < end &&
1479 			    (entry->next == &map->header ||
1480 				entry->next->start > entry->end)) {
1481 				/*
1482 				 * Found one.  Object creation actions do not
1483 				 * need to be undone, but the wired counts
1484 				 * need to be restored.
1485 				 */
1486 				while (entry != &map->header && entry->end > start) {
1487 					entry->wired_count--;
1488 					entry = entry->prev;
1489 				}
1490 				vm_map_unlock(map);
1491 				return (KERN_INVALID_ARGUMENT);
1492 			}
1493 			entry = entry->next;
1494 		}
1495 
1496 		/*
1497 		 * Pass 2.
1498 		 */
1499 
1500 		/*
1501 		 * HACK HACK HACK HACK
1502 		 *
1503 		 * If we are wiring in the kernel map or a submap of it,
1504 		 * unlock the map to avoid deadlocks.  We trust that the
1505 		 * kernel is well-behaved, and therefore will not do
1506 		 * anything destructive to this region of the map while
1507 		 * we have it unlocked.  We cannot trust user processes
1508 		 * to do the same.
1509 		 *
1510 		 * HACK HACK HACK HACK
1511 		 */
1512 		if (vm_map_pmap(map) == kernel_pmap) {
1513 			vm_map_unlock(map);	/* trust me ... */
1514 		} else {
1515 			lock_set_recursive(&map->lock);
1516 			lock_write_to_read(&map->lock);
1517 		}
1518 
1519 		rv = 0;
1520 		entry = start_entry;
1521 		while (entry != &map->header && entry->start < end) {
1522 			/*
1523 			 * If vm_fault_wire fails for any page we need to undo
1524 			 * what has been done.  We decrement the wiring count
1525 			 * for those pages which have not yet been wired (now)
1526 			 * and unwire those that have (later).
1527 			 *
1528 			 * XXX this violates the locking protocol on the map,
1529 			 * needs to be fixed.
1530 			 */
1531 			if (rv)
1532 				entry->wired_count--;
1533 			else if (entry->wired_count == 1) {
1534 				rv = vm_fault_wire(map, entry->start, entry->end);
1535 				if (rv) {
1536 					failed = entry->start;
1537 					entry->wired_count--;
1538 				}
1539 			}
1540 			entry = entry->next;
1541 		}
1542 
1543 		if (vm_map_pmap(map) == kernel_pmap) {
1544 			vm_map_lock(map);
1545 		} else {
1546 			lock_clear_recursive(&map->lock);
1547 		}
1548 		if (rv) {
1549 			vm_map_unlock(map);
1550 			(void) vm_map_pageable(map, start, failed, TRUE);
1551 			return (rv);
1552 		}
1553 	}
1554 
1555 	vm_map_unlock(map);
1556 
1557 	return (KERN_SUCCESS);
1558 }
1559 
1560 /*
1561  * vm_map_clean
1562  *
1563  * Push any dirty cached pages in the address range to their pager.
1564  * If syncio is TRUE, dirty pages are written synchronously.
1565  * If invalidate is TRUE, any cached pages are freed as well.
1566  *
1567  * Returns an error if any part of the specified range is not mapped.
1568  */
1569 int
1570 vm_map_clean(map, start, end, syncio, invalidate)
1571 	vm_map_t map;
1572 	vm_offset_t start;
1573 	vm_offset_t end;
1574 	boolean_t syncio;
1575 	boolean_t invalidate;
1576 {
1577 	register vm_map_entry_t current;
1578 	vm_map_entry_t entry;
1579 	vm_size_t size;
1580 	vm_object_t object;
1581 	vm_ooffset_t offset;
1582 
1583 	vm_map_lock_read(map);
1584 	VM_MAP_RANGE_CHECK(map, start, end);
1585 	if (!vm_map_lookup_entry(map, start, &entry)) {
1586 		vm_map_unlock_read(map);
1587 		return (KERN_INVALID_ADDRESS);
1588 	}
1589 	/*
1590 	 * Make a first pass to check for holes.
1591 	 */
1592 	for (current = entry; current->start < end; current = current->next) {
1593 		if (current->is_sub_map) {
1594 			vm_map_unlock_read(map);
1595 			return (KERN_INVALID_ARGUMENT);
1596 		}
1597 		if (end > current->end &&
1598 		    (current->next == &map->header ||
1599 			current->end != current->next->start)) {
1600 			vm_map_unlock_read(map);
1601 			return (KERN_INVALID_ADDRESS);
1602 		}
1603 	}
1604 
1605 	/*
1606 	 * Make a second pass, cleaning/uncaching pages from the indicated
1607 	 * objects as we go.
1608 	 */
1609 	for (current = entry; current->start < end; current = current->next) {
1610 		offset = current->offset + (start - current->start);
1611 		size = (end <= current->end ? end : current->end) - start;
1612 		if (current->is_a_map || current->is_sub_map) {
1613 			register vm_map_t smap;
1614 			vm_map_entry_t tentry;
1615 			vm_size_t tsize;
1616 
1617 			smap = current->object.share_map;
1618 			vm_map_lock_read(smap);
1619 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1620 			tsize = tentry->end - offset;
1621 			if (tsize < size)
1622 				size = tsize;
1623 			object = tentry->object.vm_object;
1624 			offset = tentry->offset + (offset - tentry->start);
1625 			vm_map_unlock_read(smap);
1626 		} else {
1627 			object = current->object.vm_object;
1628 		}
1629 		/*
1630 		 * Note that there is absolutely no sense in writing out
1631 		 * anonymous objects, so we track down the vnode object
1632 		 * to write out.
1633 		 * We invalidate (remove) all pages from the address space
1634 		 * anyway, for semantic correctness.
1635 		 */
1636 		while (object->backing_object) {
1637 			object = object->backing_object;
1638 			offset += object->backing_object_offset;
1639 			if (object->size < OFF_TO_IDX( offset + size))
1640 				size = IDX_TO_OFF(object->size) - offset;
1641 		}
1642 		if (invalidate)
1643 			pmap_remove(vm_map_pmap(map), current->start,
1644 				current->start + size);
1645 		if (object && (object->type == OBJT_VNODE)) {
1646 			/*
1647 			 * Flush pages if writing is allowed. XXX should we continue
1648 			 * on an error?
1649 			 *
1650 			 * XXX Doing async I/O and then removing all the pages from
1651 			 *     the object before it completes is probably a very bad
1652 			 *     idea.
1653 			 */
1654 			if (current->protection & VM_PROT_WRITE) {
1655 		   	    	vm_object_page_clean(object,
1656 					OFF_TO_IDX(offset),
1657 					OFF_TO_IDX(offset + size),
1658 					(syncio||invalidate)?1:0, TRUE);
1659 				if (invalidate)
1660 					vm_object_page_remove(object,
1661 						OFF_TO_IDX(offset),
1662 						OFF_TO_IDX(offset + size),
1663 						FALSE);
1664 			}
1665 		}
1666 		start += size;
1667 	}
1668 
1669 	vm_map_unlock_read(map);
1670 	return (KERN_SUCCESS);
1671 }
1672 
1673 /*
1674  *	vm_map_entry_unwire:	[ internal use only ]
1675  *
1676  *	Make the region specified by this entry pageable.
1677  *
1678  *	The map in question should be locked.
1679  *	[This is the reason for this routine's existence.]
1680  */
1681 static __inline void
1682 vm_map_entry_unwire(map, entry)
1683 	vm_map_t map;
1684 	register vm_map_entry_t entry;
1685 {
1686 	vm_fault_unwire(map, entry->start, entry->end);
1687 	entry->wired_count = 0;
1688 }
1689 
1690 /*
1691  *	vm_map_entry_delete:	[ internal use only ]
1692  *
1693  *	Deallocate the given entry from the target map.
1694  */
1695 static __inline void
1696 vm_map_entry_delete(map, entry)
1697 	register vm_map_t map;
1698 	register vm_map_entry_t entry;
1699 {
1700 	vm_map_entry_unlink(map, entry);
1701 	map->size -= entry->end - entry->start;
1702 
1703 	if (entry->is_a_map || entry->is_sub_map) {
1704 		vm_map_deallocate(entry->object.share_map);
1705 	} else {
1706 		vm_object_deallocate(entry->object.vm_object);
1707 	}
1708 
1709 	vm_map_entry_dispose(map, entry);
1710 }
1711 
1712 /*
1713  *	vm_map_delete:	[ internal use only ]
1714  *
1715  *	Deallocates the given address range from the target
1716  *	map.
1717  *
1718  *	When called with a sharing map, removes pages from
1719  *	that region from all physical maps.
1720  */
1721 int
1722 vm_map_delete(map, start, end)
1723 	register vm_map_t map;
1724 	vm_offset_t start;
1725 	register vm_offset_t end;
1726 {
1727 	register vm_map_entry_t entry;
1728 	vm_map_entry_t first_entry;
1729 
1730 	/*
1731 	 * Find the start of the region, and clip it
1732 	 */
1733 
1734 	if (!vm_map_lookup_entry(map, start, &first_entry))
1735 		entry = first_entry->next;
1736 	else {
1737 		entry = first_entry;
1738 		vm_map_clip_start(map, entry, start);
1739 
1740 		/*
1741 		 * Fix the lookup hint now, rather than each time though the
1742 		 * loop.
1743 		 */
1744 
1745 		SAVE_HINT(map, entry->prev);
1746 	}
1747 
1748 	/*
1749 	 * Save the free space hint
1750 	 */
1751 
1752 	if (entry == &map->header) {
1753 		map->first_free = &map->header;
1754 	} else if (map->first_free->start >= start)
1755 		map->first_free = entry->prev;
1756 
1757 	/*
1758 	 * Step through all entries in this region
1759 	 */
1760 
1761 	while ((entry != &map->header) && (entry->start < end)) {
1762 		vm_map_entry_t next;
1763 		vm_offset_t s, e;
1764 		vm_object_t object;
1765 		vm_ooffset_t offset;
1766 
1767 		vm_map_clip_end(map, entry, end);
1768 
1769 		next = entry->next;
1770 		s = entry->start;
1771 		e = entry->end;
1772 		offset = entry->offset;
1773 
1774 		/*
1775 		 * Unwire before removing addresses from the pmap; otherwise,
1776 		 * unwiring will put the entries back in the pmap.
1777 		 */
1778 
1779 		object = entry->object.vm_object;
1780 		if (entry->wired_count != 0)
1781 			vm_map_entry_unwire(map, entry);
1782 
1783 		/*
1784 		 * If this is a sharing map, we must remove *all* references
1785 		 * to this data, since we can't find all of the physical maps
1786 		 * which are sharing it.
1787 		 */
1788 
1789 		if (object == kernel_object || object == kmem_object) {
1790 			vm_object_page_remove(object, OFF_TO_IDX(offset),
1791 			    OFF_TO_IDX(offset + (e - s)), FALSE);
1792 		} else if (!map->is_main_map) {
1793 			vm_object_pmap_remove(object,
1794 			    OFF_TO_IDX(offset),
1795 			    OFF_TO_IDX(offset + (e - s)));
1796 		} else {
1797 			pmap_remove(map->pmap, s, e);
1798 		}
1799 
1800 		/*
1801 		 * Delete the entry (which may delete the object) only after
1802 		 * removing all pmap entries pointing to its pages.
1803 		 * (Otherwise, its page frames may be reallocated, and any
1804 		 * modify bits will be set in the wrong object!)
1805 		 */
1806 
1807 		vm_map_entry_delete(map, entry);
1808 		entry = next;
1809 	}
1810 	return (KERN_SUCCESS);
1811 }
1812 
1813 /*
1814  *	vm_map_remove:
1815  *
1816  *	Remove the given address range from the target map.
1817  *	This is the exported form of vm_map_delete.
1818  */
1819 int
1820 vm_map_remove(map, start, end)
1821 	register vm_map_t map;
1822 	register vm_offset_t start;
1823 	register vm_offset_t end;
1824 {
1825 	register int result, s = 0;
1826 
1827 	if (map == kmem_map || map == mb_map)
1828 		s = splvm();
1829 
1830 	vm_map_lock(map);
1831 	VM_MAP_RANGE_CHECK(map, start, end);
1832 	result = vm_map_delete(map, start, end);
1833 	vm_map_unlock(map);
1834 
1835 	if (map == kmem_map || map == mb_map)
1836 		splx(s);
1837 
1838 	return (result);
1839 }
1840 
1841 /*
1842  *	vm_map_check_protection:
1843  *
1844  *	Assert that the target map allows the specified
1845  *	privilege on the entire address region given.
1846  *	The entire region must be allocated.
1847  */
1848 boolean_t
1849 vm_map_check_protection(map, start, end, protection)
1850 	register vm_map_t map;
1851 	register vm_offset_t start;
1852 	register vm_offset_t end;
1853 	register vm_prot_t protection;
1854 {
1855 	register vm_map_entry_t entry;
1856 	vm_map_entry_t tmp_entry;
1857 
1858 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1859 		return (FALSE);
1860 	}
1861 	entry = tmp_entry;
1862 
1863 	while (start < end) {
1864 		if (entry == &map->header) {
1865 			return (FALSE);
1866 		}
1867 		/*
1868 		 * No holes allowed!
1869 		 */
1870 
1871 		if (start < entry->start) {
1872 			return (FALSE);
1873 		}
1874 		/*
1875 		 * Check protection associated with entry.
1876 		 */
1877 
1878 		if ((entry->protection & protection) != protection) {
1879 			return (FALSE);
1880 		}
1881 		/* go to next entry */
1882 
1883 		start = entry->end;
1884 		entry = entry->next;
1885 	}
1886 	return (TRUE);
1887 }
1888 
1889 /*
1890  *	vm_map_copy_entry:
1891  *
1892  *	Copies the contents of the source entry to the destination
1893  *	entry.  The entries *must* be aligned properly.
1894  */
1895 static void
1896 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
1897 	vm_map_t src_map, dst_map;
1898 	register vm_map_entry_t src_entry, dst_entry;
1899 {
1900 	if (src_entry->is_sub_map || dst_entry->is_sub_map)
1901 		return;
1902 
1903 	if (src_entry->wired_count == 0) {
1904 
1905 		/*
1906 		 * If the source entry is marked needs_copy, it is already
1907 		 * write-protected.
1908 		 */
1909 		if (!src_entry->needs_copy) {
1910 
1911 			boolean_t su;
1912 
1913 			/*
1914 			 * If the source entry has only one mapping, we can
1915 			 * just protect the virtual address range.
1916 			 */
1917 			if (!(su = src_map->is_main_map)) {
1918 				su = (src_map->ref_count == 1);
1919 			}
1920 			if (su) {
1921 				pmap_protect(src_map->pmap,
1922 				    src_entry->start,
1923 				    src_entry->end,
1924 				    src_entry->protection & ~VM_PROT_WRITE);
1925 			} else {
1926 				vm_object_pmap_copy(src_entry->object.vm_object,
1927 				    OFF_TO_IDX(src_entry->offset),
1928 				    OFF_TO_IDX(src_entry->offset + (src_entry->end
1929 					- src_entry->start)));
1930 			}
1931 		}
1932 
1933 		/*
1934 		 * Make a copy of the object.
1935 		 */
1936 		if (src_entry->object.vm_object) {
1937 			if ((src_entry->object.vm_object->handle == NULL) &&
1938 				(src_entry->object.vm_object->type == OBJT_DEFAULT ||
1939 				 src_entry->object.vm_object->type == OBJT_SWAP))
1940 				vm_object_collapse(src_entry->object.vm_object);
1941 			++src_entry->object.vm_object->ref_count;
1942 			src_entry->copy_on_write = TRUE;
1943 			src_entry->needs_copy = TRUE;
1944 
1945 			dst_entry->needs_copy = TRUE;
1946 			dst_entry->copy_on_write = TRUE;
1947 			dst_entry->object.vm_object =
1948 				src_entry->object.vm_object;
1949 			dst_entry->offset = src_entry->offset;
1950 		} else {
1951 			dst_entry->object.vm_object = NULL;
1952 			dst_entry->offset = 0;
1953 		}
1954 
1955 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
1956 		    dst_entry->end - dst_entry->start, src_entry->start);
1957 	} else {
1958 		/*
1959 		 * Of course, wired down pages can't be set copy-on-write.
1960 		 * Cause wired pages to be copied into the new map by
1961 		 * simulating faults (the new pages are pageable)
1962 		 */
1963 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
1964 	}
1965 }
1966 
1967 /*
1968  * vmspace_fork:
1969  * Create a new process vmspace structure and vm_map
1970  * based on those of an existing process.  The new map
1971  * is based on the old map, according to the inheritance
1972  * values on the regions in that map.
1973  *
1974  * The source map must not be locked.
1975  */
1976 struct vmspace *
1977 vmspace_fork(vm1)
1978 	register struct vmspace *vm1;
1979 {
1980 	register struct vmspace *vm2;
1981 	vm_map_t old_map = &vm1->vm_map;
1982 	vm_map_t new_map;
1983 	vm_map_entry_t old_entry;
1984 	vm_map_entry_t new_entry;
1985 	pmap_t new_pmap;
1986 	vm_object_t object;
1987 
1988 	vm_map_lock(old_map);
1989 
1990 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
1991 	    old_map->entries_pageable);
1992 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
1993 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
1994 	new_pmap = &vm2->vm_pmap;	/* XXX */
1995 	new_map = &vm2->vm_map;	/* XXX */
1996 
1997 	old_entry = old_map->header.next;
1998 
1999 	while (old_entry != &old_map->header) {
2000 		if (old_entry->is_sub_map)
2001 			panic("vm_map_fork: encountered a submap");
2002 
2003 		switch (old_entry->inheritance) {
2004 		case VM_INHERIT_NONE:
2005 			break;
2006 
2007 		case VM_INHERIT_SHARE:
2008 			/*
2009 			 * Clone the entry, referencing the sharing map.
2010 			 */
2011 			new_entry = vm_map_entry_create(new_map);
2012 			*new_entry = *old_entry;
2013 			new_entry->wired_count = 0;
2014 			object = new_entry->object.vm_object;
2015 			++object->ref_count;
2016 
2017 			/*
2018 			 * Insert the entry into the new map -- we know we're
2019 			 * inserting at the end of the new map.
2020 			 */
2021 
2022 			vm_map_entry_link(new_map, new_map->header.prev,
2023 			    new_entry);
2024 
2025 			/*
2026 			 * Update the physical map
2027 			 */
2028 
2029 			pmap_copy(new_map->pmap, old_map->pmap,
2030 			    new_entry->start,
2031 			    (old_entry->end - old_entry->start),
2032 			    old_entry->start);
2033 			break;
2034 
2035 		case VM_INHERIT_COPY:
2036 			/*
2037 			 * Clone the entry and link into the map.
2038 			 */
2039 			new_entry = vm_map_entry_create(new_map);
2040 			*new_entry = *old_entry;
2041 			new_entry->wired_count = 0;
2042 			new_entry->object.vm_object = NULL;
2043 			new_entry->is_a_map = FALSE;
2044 			vm_map_entry_link(new_map, new_map->header.prev,
2045 			    new_entry);
2046 			vm_map_copy_entry(old_map, new_map, old_entry,
2047 			    new_entry);
2048 			break;
2049 		}
2050 		old_entry = old_entry->next;
2051 	}
2052 
2053 	new_map->size = old_map->size;
2054 	vm_map_unlock(old_map);
2055 
2056 	return (vm2);
2057 }
2058 
2059 /*
2060  *	vm_map_lookup:
2061  *
2062  *	Finds the VM object, offset, and
2063  *	protection for a given virtual address in the
2064  *	specified map, assuming a page fault of the
2065  *	type specified.
2066  *
2067  *	Leaves the map in question locked for read; return
2068  *	values are guaranteed until a vm_map_lookup_done
2069  *	call is performed.  Note that the map argument
2070  *	is in/out; the returned map must be used in
2071  *	the call to vm_map_lookup_done.
2072  *
2073  *	A handle (out_entry) is returned for use in
2074  *	vm_map_lookup_done, to make that fast.
2075  *
2076  *	If a lookup is requested with "write protection"
2077  *	specified, the map may be changed to perform virtual
2078  *	copying operations, although the data referenced will
2079  *	remain the same.
2080  */
2081 int
2082 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
2083     object, pindex, out_prot, wired, single_use)
2084 	vm_map_t *var_map;	/* IN/OUT */
2085 	register vm_offset_t vaddr;
2086 	register vm_prot_t fault_type;
2087 
2088 	vm_map_entry_t *out_entry;	/* OUT */
2089 	vm_object_t *object;	/* OUT */
2090 	vm_pindex_t *pindex;	/* OUT */
2091 	vm_prot_t *out_prot;	/* OUT */
2092 	boolean_t *wired;	/* OUT */
2093 	boolean_t *single_use;	/* OUT */
2094 {
2095 	vm_map_t share_map;
2096 	vm_offset_t share_offset;
2097 	register vm_map_entry_t entry;
2098 	register vm_map_t map = *var_map;
2099 	register vm_prot_t prot;
2100 	register boolean_t su;
2101 
2102 RetryLookup:;
2103 
2104 	/*
2105 	 * Lookup the faulting address.
2106 	 */
2107 
2108 	vm_map_lock_read(map);
2109 
2110 #define	RETURN(why) \
2111 		{ \
2112 		vm_map_unlock_read(map); \
2113 		return(why); \
2114 		}
2115 
2116 	/*
2117 	 * If the map has an interesting hint, try it before calling full
2118 	 * blown lookup routine.
2119 	 */
2120 
2121 	entry = map->hint;
2122 
2123 	*out_entry = entry;
2124 
2125 	if ((entry == &map->header) ||
2126 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2127 		vm_map_entry_t tmp_entry;
2128 
2129 		/*
2130 		 * Entry was either not a valid hint, or the vaddr was not
2131 		 * contained in the entry, so do a full lookup.
2132 		 */
2133 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2134 			RETURN(KERN_INVALID_ADDRESS);
2135 
2136 		entry = tmp_entry;
2137 		*out_entry = entry;
2138 	}
2139 	/*
2140 	 * Handle submaps.
2141 	 */
2142 
2143 	if (entry->is_sub_map) {
2144 		vm_map_t old_map = map;
2145 
2146 		*var_map = map = entry->object.sub_map;
2147 		vm_map_unlock_read(old_map);
2148 		goto RetryLookup;
2149 	}
2150 	/*
2151 	 * Check whether this task is allowed to have this page.
2152 	 */
2153 
2154 	prot = entry->protection;
2155 	if ((fault_type & (prot)) != fault_type)
2156 		RETURN(KERN_PROTECTION_FAILURE);
2157 
2158 	/*
2159 	 * If this page is not pageable, we have to get it for all possible
2160 	 * accesses.
2161 	 */
2162 
2163 	*wired = (entry->wired_count != 0);
2164 	if (*wired)
2165 		prot = fault_type = entry->protection;
2166 
2167 	/*
2168 	 * If we don't already have a VM object, track it down.
2169 	 */
2170 
2171 	su = !entry->is_a_map;
2172 	if (su) {
2173 		share_map = map;
2174 		share_offset = vaddr;
2175 	} else {
2176 		vm_map_entry_t share_entry;
2177 
2178 		/*
2179 		 * Compute the sharing map, and offset into it.
2180 		 */
2181 
2182 		share_map = entry->object.share_map;
2183 		share_offset = (vaddr - entry->start) + entry->offset;
2184 
2185 		/*
2186 		 * Look for the backing store object and offset
2187 		 */
2188 
2189 		vm_map_lock_read(share_map);
2190 
2191 		if (!vm_map_lookup_entry(share_map, share_offset,
2192 			&share_entry)) {
2193 			vm_map_unlock_read(share_map);
2194 			RETURN(KERN_INVALID_ADDRESS);
2195 		}
2196 		entry = share_entry;
2197 	}
2198 
2199 	/*
2200 	 * If the entry was copy-on-write, we either ...
2201 	 */
2202 
2203 	if (entry->needs_copy) {
2204 		/*
2205 		 * If we want to write the page, we may as well handle that
2206 		 * now since we've got the sharing map locked.
2207 		 *
2208 		 * If we don't need to write the page, we just demote the
2209 		 * permissions allowed.
2210 		 */
2211 
2212 		if (fault_type & VM_PROT_WRITE) {
2213 			/*
2214 			 * Make a new object, and place it in the object
2215 			 * chain.  Note that no new references have appeared
2216 			 * -- one just moved from the share map to the new
2217 			 * object.
2218 			 */
2219 
2220 			if (lock_read_to_write(&share_map->lock)) {
2221 				if (share_map != map)
2222 					vm_map_unlock_read(map);
2223 				goto RetryLookup;
2224 			}
2225 			vm_object_shadow(
2226 			    &entry->object.vm_object,
2227 			    &entry->offset,
2228 			    OFF_TO_IDX(entry->end - entry->start));
2229 
2230 			entry->needs_copy = FALSE;
2231 
2232 			lock_write_to_read(&share_map->lock);
2233 		} else {
2234 			/*
2235 			 * We're attempting to read a copy-on-write page --
2236 			 * don't allow writes.
2237 			 */
2238 
2239 			prot &= (~VM_PROT_WRITE);
2240 		}
2241 	}
2242 	/*
2243 	 * Create an object if necessary.
2244 	 */
2245 	if (entry->object.vm_object == NULL) {
2246 
2247 		if (lock_read_to_write(&share_map->lock)) {
2248 			if (share_map != map)
2249 				vm_map_unlock_read(map);
2250 			goto RetryLookup;
2251 		}
2252 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2253 		    OFF_TO_IDX(entry->end - entry->start));
2254 		entry->offset = 0;
2255 		lock_write_to_read(&share_map->lock);
2256 	}
2257 
2258 	if (entry->object.vm_object != NULL)
2259 		default_pager_convert_to_swapq(entry->object.vm_object);
2260 	/*
2261 	 * Return the object/offset from this entry.  If the entry was
2262 	 * copy-on-write or empty, it has been fixed up.
2263 	 */
2264 
2265 	*pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
2266 	*object = entry->object.vm_object;
2267 
2268 	/*
2269 	 * Return whether this is the only map sharing this data.
2270 	 */
2271 
2272 	if (!su) {
2273 		su = (share_map->ref_count == 1);
2274 	}
2275 	*out_prot = prot;
2276 	*single_use = su;
2277 
2278 	return (KERN_SUCCESS);
2279 
2280 #undef	RETURN
2281 }
2282 
2283 /*
2284  *	vm_map_lookup_done:
2285  *
2286  *	Releases locks acquired by a vm_map_lookup
2287  *	(according to the handle returned by that lookup).
2288  */
2289 
2290 void
2291 vm_map_lookup_done(map, entry)
2292 	register vm_map_t map;
2293 	vm_map_entry_t entry;
2294 {
2295 	/*
2296 	 * If this entry references a map, unlock it first.
2297 	 */
2298 
2299 	if (entry->is_a_map)
2300 		vm_map_unlock_read(entry->object.share_map);
2301 
2302 	/*
2303 	 * Unlock the main-level map
2304 	 */
2305 
2306 	vm_map_unlock_read(map);
2307 }
2308 
2309 /*
2310  *	Routine:	vm_map_simplify
2311  *	Purpose:
2312  *		Attempt to simplify the map representation in
2313  *		the vicinity of the given starting address.
2314  *	Note:
2315  *		This routine is intended primarily to keep the
2316  *		kernel maps more compact -- they generally don't
2317  *		benefit from the "expand a map entry" technology
2318  *		at allocation time because the adjacent entry
2319  *		is often wired down.
2320  */
2321 void
2322 vm_map_simplify(map, start)
2323 	vm_map_t map;
2324 	vm_offset_t start;
2325 {
2326 	vm_map_entry_t this_entry;
2327 	vm_map_entry_t prev_entry;
2328 
2329 	vm_map_lock(map);
2330 	if ((vm_map_lookup_entry(map, start, &this_entry)) &&
2331 	    ((prev_entry = this_entry->prev) != &map->header) &&
2332 	    (prev_entry->end == start) &&
2333 	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
2334 	    ((prev_entry->offset + (prev_entry->end - prev_entry->start))
2335 		== this_entry->offset) &&
2336 
2337 	    (map->is_main_map) &&
2338 
2339 	    (prev_entry->is_a_map == FALSE) &&
2340 	    (prev_entry->is_sub_map == FALSE) &&
2341 
2342 	    (this_entry->is_a_map == FALSE) &&
2343 	    (this_entry->is_sub_map == FALSE) &&
2344 
2345 	    (prev_entry->inheritance == this_entry->inheritance) &&
2346 	    (prev_entry->protection == this_entry->protection) &&
2347 	    (prev_entry->max_protection == this_entry->max_protection) &&
2348 	    (prev_entry->wired_count == this_entry->wired_count) &&
2349 
2350 	    (prev_entry->copy_on_write == this_entry->copy_on_write) &&
2351 	    (prev_entry->needs_copy == this_entry->needs_copy)) {
2352 		if (map->first_free == this_entry)
2353 			map->first_free = prev_entry;
2354 		if (map->hint == this_entry)
2355 			SAVE_HINT(map, prev_entry);
2356 		vm_map_entry_unlink(map, this_entry);
2357 		prev_entry->end = this_entry->end;
2358 		if (this_entry->object.vm_object)
2359 			vm_object_deallocate(this_entry->object.vm_object);
2360 		vm_map_entry_dispose(map, this_entry);
2361 	}
2362 	vm_map_unlock(map);
2363 }
2364 
2365 #include "opt_ddb.h"
2366 #ifdef DDB
2367 #include <sys/kernel.h>
2368 
2369 #include <ddb/ddb.h>
2370 
2371 /*
2372  *	vm_map_print:	[ debug ]
2373  */
2374 DB_SHOW_COMMAND(map, vm_map_print)
2375 {
2376 	/* XXX convert args. */
2377 	register vm_map_t map = (vm_map_t)addr;
2378 	boolean_t full = have_addr;
2379 
2380 	register vm_map_entry_t entry;
2381 
2382 	db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
2383 	    (map->is_main_map ? "Task" : "Share"),
2384 	    (int) map, (int) (map->pmap), map->ref_count, map->nentries,
2385 	    map->timestamp);
2386 
2387 	if (!full && db_indent)
2388 		return;
2389 
2390 	db_indent += 2;
2391 	for (entry = map->header.next; entry != &map->header;
2392 	    entry = entry->next) {
2393 		db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
2394 		    (int) entry, (int) entry->start, (int) entry->end);
2395 		if (map->is_main_map) {
2396 			static char *inheritance_name[4] =
2397 			{"share", "copy", "none", "donate_copy"};
2398 
2399 			db_printf("prot=%x/%x/%s, ",
2400 			    entry->protection,
2401 			    entry->max_protection,
2402 			    inheritance_name[entry->inheritance]);
2403 			if (entry->wired_count != 0)
2404 				db_printf("wired, ");
2405 		}
2406 		if (entry->is_a_map || entry->is_sub_map) {
2407 			db_printf("share=0x%x, offset=0x%x\n",
2408 			    (int) entry->object.share_map,
2409 			    (int) entry->offset);
2410 			if ((entry->prev == &map->header) ||
2411 			    (!entry->prev->is_a_map) ||
2412 			    (entry->prev->object.share_map !=
2413 				entry->object.share_map)) {
2414 				db_indent += 2;
2415 				vm_map_print((int)entry->object.share_map,
2416 					     full, 0, (char *)0);
2417 				db_indent -= 2;
2418 			}
2419 		} else {
2420 			db_printf("object=0x%x, offset=0x%x",
2421 			    (int) entry->object.vm_object,
2422 			    (int) entry->offset);
2423 			if (entry->copy_on_write)
2424 				db_printf(", copy (%s)",
2425 				    entry->needs_copy ? "needed" : "done");
2426 			db_printf("\n");
2427 
2428 			if ((entry->prev == &map->header) ||
2429 			    (entry->prev->is_a_map) ||
2430 			    (entry->prev->object.vm_object !=
2431 				entry->object.vm_object)) {
2432 				db_indent += 2;
2433 				vm_object_print((int)entry->object.vm_object,
2434 						full, 0, (char *)0);
2435 				db_indent -= 2;
2436 			}
2437 		}
2438 	}
2439 	db_indent -= 2;
2440 }
2441 #endif /* DDB */
2442