xref: /freebsd/sys/vm/vm_map.c (revision 61afd5bb22d787b0641523e7b9b95c964d669bd5)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $Id: vm_map.c,v 1.65 1997/01/01 04:45:03 dyson Exp $
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/malloc.h>
74 #include <sys/proc.h>
75 #include <sys/queue.h>
76 #include <sys/vmmeter.h>
77 #include <sys/mman.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/vm_prot.h>
82 #include <vm/vm_inherit.h>
83 #include <vm/lock.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vm_extern.h>
91 #include <vm/default_pager.h>
92 
93 /*
94  *	Virtual memory maps provide for the mapping, protection,
95  *	and sharing of virtual memory objects.  In addition,
96  *	this module provides for an efficient virtual copy of
97  *	memory from one map to another.
98  *
99  *	Synchronization is required prior to most operations.
100  *
101  *	Maps consist of an ordered doubly-linked list of simple
102  *	entries; a single hint is used to speed up lookups.
103  *
104  *	In order to properly represent the sharing of virtual
105  *	memory regions among maps, the map structure is bi-level.
106  *	Top-level ("address") maps refer to regions of sharable
107  *	virtual memory.  These regions are implemented as
108  *	("sharing") maps, which then refer to the actual virtual
109  *	memory objects.  When two address maps "share" memory,
110  *	their top-level maps both have references to the same
111  *	sharing map.  When memory is virtual-copied from one
112  *	address map to another, the references in the sharing
113  *	maps are actually copied -- no copying occurs at the
114  *	virtual memory object level.
115  *
116  *	Since portions of maps are specified by start/end addreses,
117  *	which may not align with existing map entries, all
118  *	routines merely "clip" entries to these start/end values.
119  *	[That is, an entry is split into two, bordering at a
120  *	start or end value.]  Note that these clippings may not
121  *	always be necessary (as the two resulting entries are then
122  *	not changed); however, the clipping is done for convenience.
123  *	No attempt is currently made to "glue back together" two
124  *	abutting entries.
125  *
126  *	As mentioned above, virtual copy operations are performed
127  *	by copying VM object references from one sharing map to
128  *	another, and then marking both regions as copy-on-write.
129  *	It is important to note that only one writeable reference
130  *	to a VM object region exists in any map -- this means that
131  *	shadow object creation can be delayed until a write operation
132  *	occurs.
133  */
134 
135 /*
136  *	vm_map_startup:
137  *
138  *	Initialize the vm_map module.  Must be called before
139  *	any other vm_map routines.
140  *
141  *	Map and entry structures are allocated from the general
142  *	purpose memory pool with some exceptions:
143  *
144  *	- The kernel map and kmem submap are allocated statically.
145  *	- Kernel map entries are allocated out of a static pool.
146  *
147  *	These restrictions are necessary since malloc() uses the
148  *	maps and requires map entries.
149  */
150 
151 vm_offset_t kentry_data;
152 vm_size_t kentry_data_size;
153 static vm_map_entry_t kentry_free;
154 static vm_map_t kmap_free;
155 extern char kstack[];
156 extern int inmprotect;
157 
158 static int kentry_count;
159 static vm_offset_t mapvm_start, mapvm, mapvmmax;
160 static int mapvmpgcnt;
161 
162 static struct vm_map_entry *mappool;
163 static int mappoolcnt;
164 #define KENTRY_LOW_WATER 128
165 
166 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
167 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
168 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
169 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
170 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
171 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
172 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
173 		vm_map_entry_t));
174 
175 void
176 vm_map_startup()
177 {
178 	register int i;
179 	register vm_map_entry_t mep;
180 	vm_map_t mp;
181 
182 	/*
183 	 * Static map structures for allocation before initialization of
184 	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
185 	 */
186 	kmap_free = mp = (vm_map_t) kentry_data;
187 	i = MAX_KMAP;
188 	while (--i > 0) {
189 		mp->header.next = (vm_map_entry_t) (mp + 1);
190 		mp++;
191 	}
192 	mp++->header.next = NULL;
193 
194 	/*
195 	 * Form a free list of statically allocated kernel map entries with
196 	 * the rest.
197 	 */
198 	kentry_free = mep = (vm_map_entry_t) mp;
199 	kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
200 	while (--i > 0) {
201 		mep->next = mep + 1;
202 		mep++;
203 	}
204 	mep->next = NULL;
205 }
206 
207 /*
208  * Allocate a vmspace structure, including a vm_map and pmap,
209  * and initialize those structures.  The refcnt is set to 1.
210  * The remaining fields must be initialized by the caller.
211  */
212 struct vmspace *
213 vmspace_alloc(min, max, pageable)
214 	vm_offset_t min, max;
215 	int pageable;
216 {
217 	register struct vmspace *vm;
218 
219 	if (mapvmpgcnt == 0 && mapvm == 0) {
220 		mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
221 		mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
222 			mapvmpgcnt * PAGE_SIZE);
223 		mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
224 		if (!mapvm)
225 			mapvmpgcnt = 0;
226 	}
227 	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
228 	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
229 	vm_map_init(&vm->vm_map, min, max, pageable);
230 	pmap_pinit(&vm->vm_pmap);
231 	vm->vm_map.pmap = &vm->vm_pmap;		/* XXX */
232 	vm->vm_refcnt = 1;
233 	return (vm);
234 }
235 
236 void
237 vmspace_free(vm)
238 	register struct vmspace *vm;
239 {
240 
241 	if (vm->vm_refcnt == 0)
242 		panic("vmspace_free: attempt to free already freed vmspace");
243 
244 	if (--vm->vm_refcnt == 0) {
245 
246 		/*
247 		 * Lock the map, to wait out all other references to it.
248 		 * Delete all of the mappings and pages they hold, then call
249 		 * the pmap module to reclaim anything left.
250 		 */
251 		vm_map_lock(&vm->vm_map);
252 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
253 		    vm->vm_map.max_offset);
254 		vm_map_unlock(&vm->vm_map);
255 
256 		while( vm->vm_map.ref_count != 1)
257 			tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
258 		--vm->vm_map.ref_count;
259 		vm_object_pmap_remove(vm->vm_upages_obj,
260 			0, vm->vm_upages_obj->size);
261 		vm_object_deallocate(vm->vm_upages_obj);
262 		pmap_release(&vm->vm_pmap);
263 		FREE(vm, M_VMMAP);
264 	} else {
265 		wakeup(&vm->vm_map.ref_count);
266 	}
267 }
268 
269 /*
270  *	vm_map_create:
271  *
272  *	Creates and returns a new empty VM map with
273  *	the given physical map structure, and having
274  *	the given lower and upper address bounds.
275  */
276 vm_map_t
277 vm_map_create(pmap, min, max, pageable)
278 	pmap_t pmap;
279 	vm_offset_t min, max;
280 	boolean_t pageable;
281 {
282 	register vm_map_t result;
283 
284 	if (kmem_map == NULL) {
285 		result = kmap_free;
286 		kmap_free = (vm_map_t) result->header.next;
287 		if (result == NULL)
288 			panic("vm_map_create: out of maps");
289 	} else
290 		MALLOC(result, vm_map_t, sizeof(struct vm_map),
291 		    M_VMMAP, M_WAITOK);
292 
293 	vm_map_init(result, min, max, pageable);
294 	result->pmap = pmap;
295 	return (result);
296 }
297 
298 /*
299  * Initialize an existing vm_map structure
300  * such as that in the vmspace structure.
301  * The pmap is set elsewhere.
302  */
303 void
304 vm_map_init(map, min, max, pageable)
305 	register struct vm_map *map;
306 	vm_offset_t min, max;
307 	boolean_t pageable;
308 {
309 	map->header.next = map->header.prev = &map->header;
310 	map->nentries = 0;
311 	map->size = 0;
312 	map->ref_count = 1;
313 	map->is_main_map = TRUE;
314 	map->min_offset = min;
315 	map->max_offset = max;
316 	map->entries_pageable = pageable;
317 	map->first_free = &map->header;
318 	map->hint = &map->header;
319 	map->timestamp = 0;
320 	lock_init(&map->lock, TRUE);
321 }
322 
323 /*
324  *	vm_map_entry_dispose:	[ internal use only ]
325  *
326  *	Inverse of vm_map_entry_create.
327  */
328 static void
329 vm_map_entry_dispose(map, entry)
330 	vm_map_t map;
331 	vm_map_entry_t entry;
332 {
333 	int s;
334 
335 	if (map == kernel_map || map == kmem_map ||
336 		map == mb_map || map == pager_map) {
337 		s = splvm();
338 		entry->next = kentry_free;
339 		kentry_free = entry;
340 		++kentry_count;
341 		splx(s);
342 	} else {
343 		entry->next = mappool;
344 		mappool = entry;
345 		++mappoolcnt;
346 	}
347 }
348 
349 /*
350  *	vm_map_entry_create:	[ internal use only ]
351  *
352  *	Allocates a VM map entry for insertion.
353  *	No entry fields are filled in.  This routine is
354  */
355 static vm_map_entry_t
356 vm_map_entry_create(map)
357 	vm_map_t map;
358 {
359 	vm_map_entry_t entry;
360 	int i;
361 	int s;
362 
363 	/*
364 	 * This is a *very* nasty (and sort of incomplete) hack!!!!
365 	 */
366 	if (kentry_count < KENTRY_LOW_WATER) {
367 		s = splvm();
368 		if (mapvmpgcnt && mapvm) {
369 			vm_page_t m;
370 
371 			m = vm_page_alloc(kernel_object,
372 			        OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS),
373 				    (map == kmem_map || map == mb_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
374 
375 			if (m) {
376 				int newentries;
377 
378 				newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
379 				vm_page_wire(m);
380 				PAGE_WAKEUP(m);
381 				m->valid = VM_PAGE_BITS_ALL;
382 				pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m));
383 				m->flags |= PG_WRITEABLE;
384 
385 				entry = (vm_map_entry_t) mapvm;
386 				mapvm += PAGE_SIZE;
387 				--mapvmpgcnt;
388 
389 				for (i = 0; i < newentries; i++) {
390 					vm_map_entry_dispose(kernel_map, entry);
391 					entry++;
392 				}
393 			}
394 		}
395 		splx(s);
396 	}
397 
398 	if (map == kernel_map || map == kmem_map ||
399 		map == mb_map || map == pager_map) {
400 		s = splvm();
401 		entry = kentry_free;
402 		if (entry) {
403 			kentry_free = entry->next;
404 			--kentry_count;
405 		} else {
406 			panic("vm_map_entry_create: out of map entries for kernel");
407 		}
408 		splx(s);
409 	} else {
410 		entry = mappool;
411 		if (entry) {
412 			mappool = entry->next;
413 			--mappoolcnt;
414 		} else {
415 			MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
416 			    M_VMMAPENT, M_WAITOK);
417 		}
418 	}
419 
420 	return (entry);
421 }
422 
423 /*
424  *	vm_map_entry_{un,}link:
425  *
426  *	Insert/remove entries from maps.
427  */
428 #define	vm_map_entry_link(map, after_where, entry) \
429 		{ \
430 		(map)->nentries++; \
431 		(entry)->prev = (after_where); \
432 		(entry)->next = (after_where)->next; \
433 		(entry)->prev->next = (entry); \
434 		(entry)->next->prev = (entry); \
435 		}
436 #define	vm_map_entry_unlink(map, entry) \
437 		{ \
438 		(map)->nentries--; \
439 		(entry)->next->prev = (entry)->prev; \
440 		(entry)->prev->next = (entry)->next; \
441 		}
442 
443 /*
444  *	vm_map_reference:
445  *
446  *	Creates another valid reference to the given map.
447  *
448  */
449 void
450 vm_map_reference(map)
451 	register vm_map_t map;
452 {
453 	if (map == NULL)
454 		return;
455 
456 	map->ref_count++;
457 }
458 
459 /*
460  *	vm_map_deallocate:
461  *
462  *	Removes a reference from the specified map,
463  *	destroying it if no references remain.
464  *	The map should not be locked.
465  */
466 void
467 vm_map_deallocate(map)
468 	register vm_map_t map;
469 {
470 	register int c;
471 
472 	if (map == NULL)
473 		return;
474 
475 	c = map->ref_count;
476 
477 	if (c == 0)
478 		panic("vm_map_deallocate: deallocating already freed map");
479 
480 	if (c != 1) {
481 		--map->ref_count;
482 		wakeup(&map->ref_count);
483 		return;
484 	}
485 	/*
486 	 * Lock the map, to wait out all other references to it.
487 	 */
488 
489 	vm_map_lock(map);
490 	(void) vm_map_delete(map, map->min_offset, map->max_offset);
491 	--map->ref_count;
492 	if( map->ref_count != 0) {
493 		vm_map_unlock(map);
494 		return;
495 	}
496 
497 	pmap_destroy(map->pmap);
498 	FREE(map, M_VMMAP);
499 }
500 
501 /*
502  *	SAVE_HINT:
503  *
504  *	Saves the specified entry as the hint for
505  *	future lookups.
506  */
507 #define	SAVE_HINT(map,value) \
508 		(map)->hint = (value);
509 
510 /*
511  *	vm_map_lookup_entry:	[ internal use only ]
512  *
513  *	Finds the map entry containing (or
514  *	immediately preceding) the specified address
515  *	in the given map; the entry is returned
516  *	in the "entry" parameter.  The boolean
517  *	result indicates whether the address is
518  *	actually contained in the map.
519  */
520 boolean_t
521 vm_map_lookup_entry(map, address, entry)
522 	register vm_map_t map;
523 	register vm_offset_t address;
524 	vm_map_entry_t *entry;	/* OUT */
525 {
526 	register vm_map_entry_t cur;
527 	register vm_map_entry_t last;
528 
529 	/*
530 	 * Start looking either from the head of the list, or from the hint.
531 	 */
532 
533 	cur = map->hint;
534 
535 	if (cur == &map->header)
536 		cur = cur->next;
537 
538 	if (address >= cur->start) {
539 		/*
540 		 * Go from hint to end of list.
541 		 *
542 		 * But first, make a quick check to see if we are already looking
543 		 * at the entry we want (which is usually the case). Note also
544 		 * that we don't need to save the hint here... it is the same
545 		 * hint (unless we are at the header, in which case the hint
546 		 * didn't buy us anything anyway).
547 		 */
548 		last = &map->header;
549 		if ((cur != last) && (cur->end > address)) {
550 			*entry = cur;
551 			return (TRUE);
552 		}
553 	} else {
554 		/*
555 		 * Go from start to hint, *inclusively*
556 		 */
557 		last = cur->next;
558 		cur = map->header.next;
559 	}
560 
561 	/*
562 	 * Search linearly
563 	 */
564 
565 	while (cur != last) {
566 		if (cur->end > address) {
567 			if (address >= cur->start) {
568 				/*
569 				 * Save this lookup for future hints, and
570 				 * return
571 				 */
572 
573 				*entry = cur;
574 				SAVE_HINT(map, cur);
575 				return (TRUE);
576 			}
577 			break;
578 		}
579 		cur = cur->next;
580 	}
581 	*entry = cur->prev;
582 	SAVE_HINT(map, *entry);
583 	return (FALSE);
584 }
585 
586 /*
587  *	vm_map_insert:
588  *
589  *	Inserts the given whole VM object into the target
590  *	map at the specified address range.  The object's
591  *	size should match that of the address range.
592  *
593  *	Requires that the map be locked, and leaves it so.
594  */
595 int
596 vm_map_insert(map, object, offset, start, end, prot, max, cow)
597 	vm_map_t map;
598 	vm_object_t object;
599 	vm_ooffset_t offset;
600 	vm_offset_t start;
601 	vm_offset_t end;
602 	vm_prot_t prot, max;
603 	int cow;
604 {
605 	register vm_map_entry_t new_entry;
606 	register vm_map_entry_t prev_entry;
607 	vm_map_entry_t temp_entry;
608 	vm_object_t prev_object;
609 
610 	if ((object != NULL) && (cow & MAP_NOFAULT)) {
611 		panic("vm_map_insert: paradoxical MAP_NOFAULT request");
612 	}
613 
614 	/*
615 	 * Check that the start and end points are not bogus.
616 	 */
617 
618 	if ((start < map->min_offset) || (end > map->max_offset) ||
619 	    (start >= end))
620 		return (KERN_INVALID_ADDRESS);
621 
622 	/*
623 	 * Find the entry prior to the proposed starting address; if it's part
624 	 * of an existing entry, this range is bogus.
625 	 */
626 
627 	if (vm_map_lookup_entry(map, start, &temp_entry))
628 		return (KERN_NO_SPACE);
629 
630 	prev_entry = temp_entry;
631 
632 	/*
633 	 * Assert that the next entry doesn't overlap the end point.
634 	 */
635 
636 	if ((prev_entry->next != &map->header) &&
637 	    (prev_entry->next->start < end))
638 		return (KERN_NO_SPACE);
639 
640 	/*
641 	 * See if we can avoid creating a new entry by extending one of our
642 	 * neighbors.  Or at least extend the object.
643 	 */
644 
645 	if ((object == NULL) &&
646 	    (prev_entry != &map->header) &&
647 	    ( ! prev_entry->is_a_map) &&
648 	    ( ! prev_entry->is_sub_map) &&
649 	    (prev_entry->end == start) &&
650 	    (prev_entry->wired_count == 0)) {
651 
652 		u_char needs_copy = (cow & MAP_COPY_NEEDED) != 0;
653 		u_char copy_on_write = (cow & MAP_COPY_ON_WRITE) != 0;
654 		u_char nofault = (cow & MAP_NOFAULT) != 0;
655 
656 		if ((needs_copy == prev_entry->needs_copy) &&
657 		    (copy_on_write == prev_entry->copy_on_write) &&
658 		    (nofault == prev_entry->nofault) &&
659 		    (nofault ||
660 		     vm_object_coalesce(prev_entry->object.vm_object,
661 					OFF_TO_IDX(prev_entry->offset),
662 					(vm_size_t) (prev_entry->end - prev_entry->start),
663 					(vm_size_t) (end - prev_entry->end)))) {
664 
665 			/*
666 			 * Coalesced the two objects.  Can we extend the
667 			 * previous map entry to include the new range?
668 			 */
669 			if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
670 			    (prev_entry->protection == prot) &&
671 			    (prev_entry->max_protection == max)) {
672 
673 				map->size += (end - prev_entry->end);
674 				prev_entry->end = end;
675 				if (!nofault) {
676 					prev_object = prev_entry->object.vm_object;
677 					default_pager_convert_to_swapq(prev_object);
678 				}
679 				return (KERN_SUCCESS);
680 			}
681 			else {
682 				object = prev_entry->object.vm_object;
683 				offset = prev_entry->offset + (prev_entry->end -
684 							       prev_entry->start);
685 
686 				vm_object_reference(object);
687 			}
688 		}
689 	}
690 
691 	/*
692 	 * Create a new entry
693 	 */
694 
695 	new_entry = vm_map_entry_create(map);
696 	new_entry->start = start;
697 	new_entry->end = end;
698 
699 	new_entry->is_a_map = FALSE;
700 	new_entry->is_sub_map = FALSE;
701 	new_entry->object.vm_object = object;
702 	new_entry->offset = offset;
703 
704 	if (cow & MAP_COPY_NEEDED)
705 		new_entry->needs_copy = TRUE;
706 	else
707 		new_entry->needs_copy = FALSE;
708 
709 	if (cow & MAP_COPY_ON_WRITE)
710 		new_entry->copy_on_write = TRUE;
711 	else
712 		new_entry->copy_on_write = FALSE;
713 
714 	if (cow & MAP_NOFAULT)
715 		new_entry->nofault = TRUE;
716 	else
717 		new_entry->nofault = FALSE;
718 
719 	if (map->is_main_map) {
720 		new_entry->inheritance = VM_INHERIT_DEFAULT;
721 		new_entry->protection = prot;
722 		new_entry->max_protection = max;
723 		new_entry->wired_count = 0;
724 	}
725 	/*
726 	 * Insert the new entry into the list
727 	 */
728 
729 	vm_map_entry_link(map, prev_entry, new_entry);
730 	map->size += new_entry->end - new_entry->start;
731 
732 	/*
733 	 * Update the free space hint
734 	 */
735 	if ((map->first_free == prev_entry) &&
736 		(prev_entry->end >= new_entry->start))
737 		map->first_free = new_entry;
738 
739 	default_pager_convert_to_swapq(object);
740 	return (KERN_SUCCESS);
741 }
742 
743 /*
744  * Find sufficient space for `length' bytes in the given map, starting at
745  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
746  */
747 int
748 vm_map_findspace(map, start, length, addr)
749 	register vm_map_t map;
750 	register vm_offset_t start;
751 	vm_size_t length;
752 	vm_offset_t *addr;
753 {
754 	register vm_map_entry_t entry, next;
755 	register vm_offset_t end;
756 
757 	if (start < map->min_offset)
758 		start = map->min_offset;
759 	if (start > map->max_offset)
760 		return (1);
761 
762 	/*
763 	 * Look for the first possible address; if there's already something
764 	 * at this address, we have to start after it.
765 	 */
766 	if (start == map->min_offset) {
767 		if ((entry = map->first_free) != &map->header)
768 			start = entry->end;
769 	} else {
770 		vm_map_entry_t tmp;
771 
772 		if (vm_map_lookup_entry(map, start, &tmp))
773 			start = tmp->end;
774 		entry = tmp;
775 	}
776 
777 	/*
778 	 * Look through the rest of the map, trying to fit a new region in the
779 	 * gap between existing regions, or after the very last region.
780 	 */
781 	for (;; start = (entry = next)->end) {
782 		/*
783 		 * Find the end of the proposed new region.  Be sure we didn't
784 		 * go beyond the end of the map, or wrap around the address;
785 		 * if so, we lose.  Otherwise, if this is the last entry, or
786 		 * if the proposed new region fits before the next entry, we
787 		 * win.
788 		 */
789 		end = start + length;
790 		if (end > map->max_offset || end < start)
791 			return (1);
792 		next = entry->next;
793 		if (next == &map->header || next->start >= end)
794 			break;
795 	}
796 	SAVE_HINT(map, entry);
797 	*addr = start;
798 	if (map == kernel_map && round_page(start + length) > kernel_vm_end)
799 		pmap_growkernel(round_page(start + length));
800 	return (0);
801 }
802 
803 /*
804  *	vm_map_find finds an unallocated region in the target address
805  *	map with the given length.  The search is defined to be
806  *	first-fit from the specified address; the region found is
807  *	returned in the same parameter.
808  *
809  */
810 int
811 vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow)
812 	vm_map_t map;
813 	vm_object_t object;
814 	vm_ooffset_t offset;
815 	vm_offset_t *addr;	/* IN/OUT */
816 	vm_size_t length;
817 	boolean_t find_space;
818 	vm_prot_t prot, max;
819 	int cow;
820 {
821 	register vm_offset_t start;
822 	int result, s = 0;
823 
824 	start = *addr;
825 
826 	if (map == kmem_map || map == mb_map)
827 		s = splvm();
828 
829 	vm_map_lock(map);
830 	if (find_space) {
831 		if (vm_map_findspace(map, start, length, addr)) {
832 			vm_map_unlock(map);
833 			if (map == kmem_map || map == mb_map)
834 				splx(s);
835 			return (KERN_NO_SPACE);
836 		}
837 		start = *addr;
838 	}
839 	result = vm_map_insert(map, object, offset,
840 		start, start + length, prot, max, cow);
841 	vm_map_unlock(map);
842 
843 	if (map == kmem_map || map == mb_map)
844 		splx(s);
845 
846 	return (result);
847 }
848 
849 /*
850  *	vm_map_simplify_entry:
851  *
852  *	Simplify the given map entry by merging with either neighbor.
853  */
854 void
855 vm_map_simplify_entry(map, entry)
856 	vm_map_t map;
857 	vm_map_entry_t entry;
858 {
859 	vm_map_entry_t next, prev;
860 	vm_size_t prevsize, esize;
861 
862 	if (entry->is_sub_map || entry->is_a_map)
863 		return;
864 
865 	prev = entry->prev;
866 	if (prev != &map->header) {
867 		prevsize = prev->end - prev->start;
868 		if ( (prev->end == entry->start) &&
869 		     (prev->object.vm_object == entry->object.vm_object) &&
870 		     (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
871 		     (!prev->object.vm_object ||
872 			(prev->offset + prevsize == entry->offset)) &&
873 		     (prev->needs_copy == entry->needs_copy) &&
874 		     (prev->copy_on_write == entry->copy_on_write) &&
875 		     (prev->protection == entry->protection) &&
876 		     (prev->max_protection == entry->max_protection) &&
877 		     (prev->inheritance == entry->inheritance) &&
878 		     (prev->is_a_map == FALSE) &&
879 		     (prev->is_sub_map == FALSE) &&
880 		     (prev->wired_count == entry->wired_count)) {
881 			if (map->first_free == prev)
882 				map->first_free = entry;
883 			if (map->hint == prev)
884 				map->hint = entry;
885 			vm_map_entry_unlink(map, prev);
886 			entry->start = prev->start;
887 			entry->offset = prev->offset;
888 			if (prev->object.vm_object)
889 				vm_object_deallocate(prev->object.vm_object);
890 			vm_map_entry_dispose(map, prev);
891 		}
892 	}
893 
894 	next = entry->next;
895 	if (next != &map->header) {
896 		esize = entry->end - entry->start;
897 		if ((entry->end == next->start) &&
898 		    (next->object.vm_object == entry->object.vm_object) &&
899 		    (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
900 		     (!entry->object.vm_object ||
901 			(entry->offset + esize == next->offset)) &&
902 		    (next->needs_copy == entry->needs_copy) &&
903 		    (next->copy_on_write == entry->copy_on_write) &&
904 		    (next->protection == entry->protection) &&
905 		    (next->max_protection == entry->max_protection) &&
906 		    (next->inheritance == entry->inheritance) &&
907 		    (next->is_a_map == FALSE) &&
908 		    (next->is_sub_map == FALSE) &&
909 		    (next->wired_count == entry->wired_count)) {
910 			if (map->first_free == next)
911 				map->first_free = entry;
912 			if (map->hint == next)
913 				map->hint = entry;
914 			vm_map_entry_unlink(map, next);
915 			entry->end = next->end;
916 			if (next->object.vm_object)
917 				vm_object_deallocate(next->object.vm_object);
918 			vm_map_entry_dispose(map, next);
919 	        }
920 	}
921 }
922 /*
923  *	vm_map_clip_start:	[ internal use only ]
924  *
925  *	Asserts that the given entry begins at or after
926  *	the specified address; if necessary,
927  *	it splits the entry into two.
928  */
929 #define vm_map_clip_start(map, entry, startaddr) \
930 { \
931 	if (startaddr > entry->start) \
932 		_vm_map_clip_start(map, entry, startaddr); \
933 }
934 
935 /*
936  *	This routine is called only when it is known that
937  *	the entry must be split.
938  */
939 static void
940 _vm_map_clip_start(map, entry, start)
941 	register vm_map_t map;
942 	register vm_map_entry_t entry;
943 	register vm_offset_t start;
944 {
945 	register vm_map_entry_t new_entry;
946 
947 	/*
948 	 * Split off the front portion -- note that we must insert the new
949 	 * entry BEFORE this one, so that this entry has the specified
950 	 * starting address.
951 	 */
952 
953 	vm_map_simplify_entry(map, entry);
954 
955 	new_entry = vm_map_entry_create(map);
956 	*new_entry = *entry;
957 
958 	new_entry->end = start;
959 	entry->offset += (start - entry->start);
960 	entry->start = start;
961 
962 	vm_map_entry_link(map, entry->prev, new_entry);
963 
964 	if (entry->is_a_map || entry->is_sub_map)
965 		vm_map_reference(new_entry->object.share_map);
966 	else
967 		vm_object_reference(new_entry->object.vm_object);
968 }
969 
970 /*
971  *	vm_map_clip_end:	[ internal use only ]
972  *
973  *	Asserts that the given entry ends at or before
974  *	the specified address; if necessary,
975  *	it splits the entry into two.
976  */
977 
978 #define vm_map_clip_end(map, entry, endaddr) \
979 { \
980 	if (endaddr < entry->end) \
981 		_vm_map_clip_end(map, entry, endaddr); \
982 }
983 
984 /*
985  *	This routine is called only when it is known that
986  *	the entry must be split.
987  */
988 static void
989 _vm_map_clip_end(map, entry, end)
990 	register vm_map_t map;
991 	register vm_map_entry_t entry;
992 	register vm_offset_t end;
993 {
994 	register vm_map_entry_t new_entry;
995 
996 	/*
997 	 * Create a new entry and insert it AFTER the specified entry
998 	 */
999 
1000 	new_entry = vm_map_entry_create(map);
1001 	*new_entry = *entry;
1002 
1003 	new_entry->start = entry->end = end;
1004 	new_entry->offset += (end - entry->start);
1005 
1006 	vm_map_entry_link(map, entry, new_entry);
1007 
1008 	if (entry->is_a_map || entry->is_sub_map)
1009 		vm_map_reference(new_entry->object.share_map);
1010 	else
1011 		vm_object_reference(new_entry->object.vm_object);
1012 }
1013 
1014 /*
1015  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1016  *
1017  *	Asserts that the starting and ending region
1018  *	addresses fall within the valid range of the map.
1019  */
1020 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1021 		{					\
1022 		if (start < vm_map_min(map))		\
1023 			start = vm_map_min(map);	\
1024 		if (end > vm_map_max(map))		\
1025 			end = vm_map_max(map);		\
1026 		if (start > end)			\
1027 			start = end;			\
1028 		}
1029 
1030 /*
1031  *	vm_map_submap:		[ kernel use only ]
1032  *
1033  *	Mark the given range as handled by a subordinate map.
1034  *
1035  *	This range must have been created with vm_map_find,
1036  *	and no other operations may have been performed on this
1037  *	range prior to calling vm_map_submap.
1038  *
1039  *	Only a limited number of operations can be performed
1040  *	within this rage after calling vm_map_submap:
1041  *		vm_fault
1042  *	[Don't try vm_map_copy!]
1043  *
1044  *	To remove a submapping, one must first remove the
1045  *	range from the superior map, and then destroy the
1046  *	submap (if desired).  [Better yet, don't try it.]
1047  */
1048 int
1049 vm_map_submap(map, start, end, submap)
1050 	register vm_map_t map;
1051 	register vm_offset_t start;
1052 	register vm_offset_t end;
1053 	vm_map_t submap;
1054 {
1055 	vm_map_entry_t entry;
1056 	register int result = KERN_INVALID_ARGUMENT;
1057 
1058 	vm_map_lock(map);
1059 
1060 	VM_MAP_RANGE_CHECK(map, start, end);
1061 
1062 	if (vm_map_lookup_entry(map, start, &entry)) {
1063 		vm_map_clip_start(map, entry, start);
1064 	} else
1065 		entry = entry->next;
1066 
1067 	vm_map_clip_end(map, entry, end);
1068 
1069 	if ((entry->start == start) && (entry->end == end) &&
1070 	    (!entry->is_a_map) &&
1071 	    (entry->object.vm_object == NULL) &&
1072 	    (!entry->copy_on_write)) {
1073 		entry->is_a_map = FALSE;
1074 		entry->is_sub_map = TRUE;
1075 		vm_map_reference(entry->object.sub_map = submap);
1076 		result = KERN_SUCCESS;
1077 	}
1078 	vm_map_unlock(map);
1079 
1080 	return (result);
1081 }
1082 
1083 /*
1084  *	vm_map_protect:
1085  *
1086  *	Sets the protection of the specified address
1087  *	region in the target map.  If "set_max" is
1088  *	specified, the maximum protection is to be set;
1089  *	otherwise, only the current protection is affected.
1090  */
1091 int
1092 vm_map_protect(map, start, end, new_prot, set_max)
1093 	register vm_map_t map;
1094 	register vm_offset_t start;
1095 	register vm_offset_t end;
1096 	register vm_prot_t new_prot;
1097 	register boolean_t set_max;
1098 {
1099 	register vm_map_entry_t current;
1100 	vm_map_entry_t entry;
1101 
1102 	vm_map_lock(map);
1103 
1104 	VM_MAP_RANGE_CHECK(map, start, end);
1105 
1106 	if (vm_map_lookup_entry(map, start, &entry)) {
1107 		vm_map_clip_start(map, entry, start);
1108 	} else {
1109 		entry = entry->next;
1110 	}
1111 
1112 	/*
1113 	 * Make a first pass to check for protection violations.
1114 	 */
1115 
1116 	current = entry;
1117 	while ((current != &map->header) && (current->start < end)) {
1118 		if (current->is_sub_map) {
1119 			vm_map_unlock(map);
1120 			return (KERN_INVALID_ARGUMENT);
1121 		}
1122 		if ((new_prot & current->max_protection) != new_prot) {
1123 			vm_map_unlock(map);
1124 			return (KERN_PROTECTION_FAILURE);
1125 		}
1126 		current = current->next;
1127 	}
1128 
1129 	/*
1130 	 * Go back and fix up protections. [Note that clipping is not
1131 	 * necessary the second time.]
1132 	 */
1133 
1134 	current = entry;
1135 
1136 	while ((current != &map->header) && (current->start < end)) {
1137 		vm_prot_t old_prot;
1138 
1139 		vm_map_clip_end(map, current, end);
1140 
1141 		old_prot = current->protection;
1142 		if (set_max)
1143 			current->protection =
1144 			    (current->max_protection = new_prot) &
1145 			    old_prot;
1146 		else
1147 			current->protection = new_prot;
1148 
1149 		/*
1150 		 * Update physical map if necessary. Worry about copy-on-write
1151 		 * here -- CHECK THIS XXX
1152 		 */
1153 
1154 		if (current->protection != old_prot) {
1155 #define MASK(entry)	((entry)->copy_on_write ? ~VM_PROT_WRITE : \
1156 							VM_PROT_ALL)
1157 #define	max(a,b)	((a) > (b) ? (a) : (b))
1158 
1159 			if (current->is_a_map) {
1160 				vm_map_entry_t share_entry;
1161 				vm_offset_t share_end;
1162 
1163 				vm_map_lock(current->object.share_map);
1164 				(void) vm_map_lookup_entry(
1165 				    current->object.share_map,
1166 				    current->offset,
1167 				    &share_entry);
1168 				share_end = current->offset +
1169 				    (current->end - current->start);
1170 				while ((share_entry !=
1171 					&current->object.share_map->header) &&
1172 				    (share_entry->start < share_end)) {
1173 
1174 					pmap_protect(map->pmap,
1175 					    (max(share_entry->start,
1176 						    current->offset) -
1177 						current->offset +
1178 						current->start),
1179 					    min(share_entry->end,
1180 						share_end) -
1181 					    current->offset +
1182 					    current->start,
1183 					    current->protection &
1184 					    MASK(share_entry));
1185 
1186 					share_entry = share_entry->next;
1187 				}
1188 				vm_map_unlock(current->object.share_map);
1189 			} else
1190 				pmap_protect(map->pmap, current->start,
1191 				    current->end,
1192 				    current->protection & MASK(entry));
1193 #undef	max
1194 #undef	MASK
1195 		}
1196 		current = current->next;
1197 	}
1198 
1199 	vm_map_simplify_entry(map, entry);
1200 	vm_map_unlock(map);
1201 	return (KERN_SUCCESS);
1202 }
1203 
1204 /*
1205  *	vm_map_madvise:
1206  *
1207  * 	This routine traverses a processes map handling the madvise
1208  *	system call.
1209  */
1210 void
1211 vm_map_madvise(map, pmap, start, end, advise)
1212 	vm_map_t map;
1213 	pmap_t pmap;
1214 	vm_offset_t start, end;
1215 	int advise;
1216 {
1217 	register vm_map_entry_t current;
1218 	vm_map_entry_t entry;
1219 
1220 	vm_map_lock(map);
1221 
1222 	VM_MAP_RANGE_CHECK(map, start, end);
1223 
1224 	if (vm_map_lookup_entry(map, start, &entry)) {
1225 		vm_map_clip_start(map, entry, start);
1226 	} else
1227 		entry = entry->next;
1228 
1229 	for(current = entry;
1230 		(current != &map->header) && (current->start < end);
1231 		current = current->next) {
1232 		if (current->is_a_map || current->is_sub_map) {
1233 			continue;
1234 		}
1235 		vm_map_clip_end(map, current, end);
1236 		switch (advise) {
1237 	case MADV_NORMAL:
1238 			current->object.vm_object->behavior = OBJ_NORMAL;
1239 			break;
1240 	case MADV_SEQUENTIAL:
1241 			current->object.vm_object->behavior = OBJ_SEQUENTIAL;
1242 			break;
1243 	case MADV_RANDOM:
1244 			current->object.vm_object->behavior = OBJ_RANDOM;
1245 			break;
1246 	/*
1247 	 * Right now, we could handle DONTNEED and WILLNEED with common code.
1248 	 * They are mostly the same, except for the potential async reads (NYI).
1249 	 */
1250 	case MADV_FREE:
1251 	case MADV_DONTNEED:
1252 			{
1253 				vm_pindex_t pindex;
1254 				int count;
1255 				vm_size_t size = current->end - current->start;
1256 				pindex = OFF_TO_IDX(entry->offset);
1257 				count = OFF_TO_IDX(size);
1258 				/*
1259 				 * MADV_DONTNEED removes the page from all
1260 				 * pmaps, so pmap_remove is not necessary.
1261 				 */
1262 				vm_object_madvise(current->object.vm_object,
1263 					pindex, count, advise);
1264 			}
1265 			break;
1266 
1267 	case MADV_WILLNEED:
1268 			{
1269 				vm_pindex_t pindex;
1270 				int count;
1271 				vm_size_t size = current->end - current->start;
1272 				pindex = OFF_TO_IDX(current->offset);
1273 				count = OFF_TO_IDX(size);
1274 				vm_object_madvise(current->object.vm_object,
1275 					pindex, count, advise);
1276 				pmap_object_init_pt(pmap, current->start,
1277 					current->object.vm_object, pindex,
1278 					(count << PAGE_SHIFT), 0);
1279 			}
1280 			break;
1281 
1282 	default:
1283 			break;
1284 		}
1285 	}
1286 
1287 	vm_map_simplify_entry(map, entry);
1288 	vm_map_unlock(map);
1289 	return;
1290 }
1291 
1292 
1293 /*
1294  *	vm_map_inherit:
1295  *
1296  *	Sets the inheritance of the specified address
1297  *	range in the target map.  Inheritance
1298  *	affects how the map will be shared with
1299  *	child maps at the time of vm_map_fork.
1300  */
1301 int
1302 vm_map_inherit(map, start, end, new_inheritance)
1303 	register vm_map_t map;
1304 	register vm_offset_t start;
1305 	register vm_offset_t end;
1306 	register vm_inherit_t new_inheritance;
1307 {
1308 	register vm_map_entry_t entry;
1309 	vm_map_entry_t temp_entry;
1310 
1311 	switch (new_inheritance) {
1312 	case VM_INHERIT_NONE:
1313 	case VM_INHERIT_COPY:
1314 	case VM_INHERIT_SHARE:
1315 		break;
1316 	default:
1317 		return (KERN_INVALID_ARGUMENT);
1318 	}
1319 
1320 	vm_map_lock(map);
1321 
1322 	VM_MAP_RANGE_CHECK(map, start, end);
1323 
1324 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1325 		entry = temp_entry;
1326 		vm_map_clip_start(map, entry, start);
1327 	} else
1328 		entry = temp_entry->next;
1329 
1330 	while ((entry != &map->header) && (entry->start < end)) {
1331 		vm_map_clip_end(map, entry, end);
1332 
1333 		entry->inheritance = new_inheritance;
1334 
1335 		entry = entry->next;
1336 	}
1337 
1338 	vm_map_simplify_entry(map, temp_entry);
1339 	vm_map_unlock(map);
1340 	return (KERN_SUCCESS);
1341 }
1342 
1343 /*
1344  * Implement the semantics of mlock
1345  */
1346 int
1347 vm_map_user_pageable(map, start, end, new_pageable)
1348 	register vm_map_t map;
1349 	register vm_offset_t start;
1350 	register vm_offset_t end;
1351 	register boolean_t new_pageable;
1352 {
1353 	register vm_map_entry_t entry;
1354 	vm_map_entry_t start_entry;
1355 	register vm_offset_t failed = 0;
1356 	int rv;
1357 
1358 	vm_map_lock(map);
1359 	VM_MAP_RANGE_CHECK(map, start, end);
1360 
1361 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1362 		vm_map_unlock(map);
1363 		return (KERN_INVALID_ADDRESS);
1364 	}
1365 
1366 	if (new_pageable) {
1367 
1368 		entry = start_entry;
1369 		vm_map_clip_start(map, entry, start);
1370 
1371 		/*
1372 		 * Now decrement the wiring count for each region. If a region
1373 		 * becomes completely unwired, unwire its physical pages and
1374 		 * mappings.
1375 		 */
1376 		lock_set_recursive(&map->lock);
1377 
1378 		entry = start_entry;
1379 		while ((entry != &map->header) && (entry->start < end)) {
1380 			if (entry->user_wired) {
1381 				vm_map_clip_end(map, entry, end);
1382 				entry->user_wired = 0;
1383 				entry->wired_count--;
1384 				if (entry->wired_count == 0)
1385 					vm_fault_unwire(map, entry->start, entry->end);
1386 			}
1387 			entry = entry->next;
1388 		}
1389 		vm_map_simplify_entry(map, start_entry);
1390 		lock_clear_recursive(&map->lock);
1391 	} else {
1392 
1393 		/*
1394 		 * Because of the possiblity of blocking, etc.  We restart
1395 		 * through the process's map entries from beginning so that
1396 		 * we don't end up depending on a map entry that could have
1397 		 * changed.
1398 		 */
1399 	rescan:
1400 
1401 		entry = start_entry;
1402 
1403 		while ((entry != &map->header) && (entry->start < end)) {
1404 
1405 			if (entry->user_wired != 0) {
1406 				entry = entry->next;
1407 				continue;
1408 			}
1409 
1410 			if (entry->wired_count != 0) {
1411 				entry->wired_count++;
1412 				entry->user_wired = 1;
1413 				entry = entry->next;
1414 				continue;
1415 			}
1416 
1417 			/* Here on entry being newly wired */
1418 
1419 			if (!entry->is_a_map && !entry->is_sub_map) {
1420 				int copyflag = entry->needs_copy;
1421 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1422 
1423 					vm_object_shadow(&entry->object.vm_object,
1424 					    &entry->offset,
1425 					    OFF_TO_IDX(entry->end
1426 						- entry->start));
1427 					entry->needs_copy = FALSE;
1428 
1429 				} else if (entry->object.vm_object == NULL) {
1430 
1431 					entry->object.vm_object =
1432 					    vm_object_allocate(OBJT_DEFAULT,
1433 						OFF_TO_IDX(entry->end - entry->start));
1434 					entry->offset = (vm_offset_t) 0;
1435 
1436 				}
1437 				default_pager_convert_to_swapq(entry->object.vm_object);
1438 			}
1439 
1440 			vm_map_clip_start(map, entry, start);
1441 			vm_map_clip_end(map, entry, end);
1442 
1443 			entry->wired_count++;
1444 			entry->user_wired = 1;
1445 
1446 			/* First we need to allow map modifications */
1447 			lock_set_recursive(&map->lock);
1448 			lock_write_to_read(&map->lock);
1449 
1450 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1451 			if (rv) {
1452 
1453 				entry->wired_count--;
1454 				entry->user_wired = 0;
1455 
1456 				lock_clear_recursive(&map->lock);
1457 				vm_map_unlock(map);
1458 
1459 				(void) vm_map_user_pageable(map, start, entry->start, TRUE);
1460 				return rv;
1461 			}
1462 
1463 			lock_clear_recursive(&map->lock);
1464 			vm_map_unlock(map);
1465 			vm_map_lock(map);
1466 
1467 			goto rescan;
1468 		}
1469 	}
1470 	vm_map_unlock(map);
1471 	return KERN_SUCCESS;
1472 }
1473 
1474 /*
1475  *	vm_map_pageable:
1476  *
1477  *	Sets the pageability of the specified address
1478  *	range in the target map.  Regions specified
1479  *	as not pageable require locked-down physical
1480  *	memory and physical page maps.
1481  *
1482  *	The map must not be locked, but a reference
1483  *	must remain to the map throughout the call.
1484  */
1485 int
1486 vm_map_pageable(map, start, end, new_pageable)
1487 	register vm_map_t map;
1488 	register vm_offset_t start;
1489 	register vm_offset_t end;
1490 	register boolean_t new_pageable;
1491 {
1492 	register vm_map_entry_t entry;
1493 	vm_map_entry_t start_entry;
1494 	register vm_offset_t failed = 0;
1495 	int rv;
1496 
1497 	vm_map_lock(map);
1498 
1499 	VM_MAP_RANGE_CHECK(map, start, end);
1500 
1501 	/*
1502 	 * Only one pageability change may take place at one time, since
1503 	 * vm_fault assumes it will be called only once for each
1504 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1505 	 * changing the pageability for the entire region.  We do so before
1506 	 * making any changes.
1507 	 */
1508 
1509 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1510 		vm_map_unlock(map);
1511 		return (KERN_INVALID_ADDRESS);
1512 	}
1513 	entry = start_entry;
1514 
1515 	/*
1516 	 * Actions are rather different for wiring and unwiring, so we have
1517 	 * two separate cases.
1518 	 */
1519 
1520 	if (new_pageable) {
1521 
1522 		vm_map_clip_start(map, entry, start);
1523 
1524 		/*
1525 		 * Unwiring.  First ensure that the range to be unwired is
1526 		 * really wired down and that there are no holes.
1527 		 */
1528 		while ((entry != &map->header) && (entry->start < end)) {
1529 
1530 			if (entry->wired_count == 0 ||
1531 			    (entry->end < end &&
1532 				(entry->next == &map->header ||
1533 				    entry->next->start > entry->end))) {
1534 				vm_map_unlock(map);
1535 				return (KERN_INVALID_ARGUMENT);
1536 			}
1537 			entry = entry->next;
1538 		}
1539 
1540 		/*
1541 		 * Now decrement the wiring count for each region. If a region
1542 		 * becomes completely unwired, unwire its physical pages and
1543 		 * mappings.
1544 		 */
1545 		lock_set_recursive(&map->lock);
1546 
1547 		entry = start_entry;
1548 		while ((entry != &map->header) && (entry->start < end)) {
1549 			vm_map_clip_end(map, entry, end);
1550 
1551 			entry->wired_count--;
1552 			if (entry->wired_count == 0)
1553 				vm_fault_unwire(map, entry->start, entry->end);
1554 
1555 			entry = entry->next;
1556 		}
1557 		vm_map_simplify_entry(map, start_entry);
1558 		lock_clear_recursive(&map->lock);
1559 	} else {
1560 		/*
1561 		 * Wiring.  We must do this in two passes:
1562 		 *
1563 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1564 		 * objects that need to be created. Then we clip each map
1565 		 * entry to the region to be wired and increment its wiring
1566 		 * count.  We create objects before clipping the map entries
1567 		 * to avoid object proliferation.
1568 		 *
1569 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1570 		 * fault in the pages for any newly wired area (wired_count is
1571 		 * 1).
1572 		 *
1573 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1574 		 * deadlock with another process that may have faulted on one
1575 		 * of the pages to be wired (it would mark the page busy,
1576 		 * blocking us, then in turn block on the map lock that we
1577 		 * hold).  Because of problems in the recursive lock package,
1578 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1579 		 * any actions that require the write lock must be done
1580 		 * beforehand.  Because we keep the read lock on the map, the
1581 		 * copy-on-write status of the entries we modify here cannot
1582 		 * change.
1583 		 */
1584 
1585 		/*
1586 		 * Pass 1.
1587 		 */
1588 		while ((entry != &map->header) && (entry->start < end)) {
1589 			if (entry->wired_count == 0) {
1590 
1591 				/*
1592 				 * Perform actions of vm_map_lookup that need
1593 				 * the write lock on the map: create a shadow
1594 				 * object for a copy-on-write region, or an
1595 				 * object for a zero-fill region.
1596 				 *
1597 				 * We don't have to do this for entries that
1598 				 * point to sharing maps, because we won't
1599 				 * hold the lock on the sharing map.
1600 				 */
1601 				if (!entry->is_a_map && !entry->is_sub_map) {
1602 					int copyflag = entry->needs_copy;
1603 					if (copyflag &&
1604 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1605 
1606 						vm_object_shadow(&entry->object.vm_object,
1607 						    &entry->offset,
1608 						    OFF_TO_IDX(entry->end
1609 							- entry->start));
1610 						entry->needs_copy = FALSE;
1611 					} else if (entry->object.vm_object == NULL) {
1612 						entry->object.vm_object =
1613 						    vm_object_allocate(OBJT_DEFAULT,
1614 							OFF_TO_IDX(entry->end - entry->start));
1615 						entry->offset = (vm_offset_t) 0;
1616 					}
1617 					default_pager_convert_to_swapq(entry->object.vm_object);
1618 				}
1619 			}
1620 			vm_map_clip_start(map, entry, start);
1621 			vm_map_clip_end(map, entry, end);
1622 			entry->wired_count++;
1623 
1624 			/*
1625 			 * Check for holes
1626 			 */
1627 			if (entry->end < end &&
1628 			    (entry->next == &map->header ||
1629 				entry->next->start > entry->end)) {
1630 				/*
1631 				 * Found one.  Object creation actions do not
1632 				 * need to be undone, but the wired counts
1633 				 * need to be restored.
1634 				 */
1635 				while (entry != &map->header && entry->end > start) {
1636 					entry->wired_count--;
1637 					entry = entry->prev;
1638 				}
1639 				vm_map_unlock(map);
1640 				return (KERN_INVALID_ARGUMENT);
1641 			}
1642 			entry = entry->next;
1643 		}
1644 
1645 		/*
1646 		 * Pass 2.
1647 		 */
1648 
1649 		/*
1650 		 * HACK HACK HACK HACK
1651 		 *
1652 		 * If we are wiring in the kernel map or a submap of it,
1653 		 * unlock the map to avoid deadlocks.  We trust that the
1654 		 * kernel is well-behaved, and therefore will not do
1655 		 * anything destructive to this region of the map while
1656 		 * we have it unlocked.  We cannot trust user processes
1657 		 * to do the same.
1658 		 *
1659 		 * HACK HACK HACK HACK
1660 		 */
1661 		if (vm_map_pmap(map) == kernel_pmap) {
1662 			vm_map_unlock(map);	/* trust me ... */
1663 		} else {
1664 			lock_set_recursive(&map->lock);
1665 			lock_write_to_read(&map->lock);
1666 		}
1667 
1668 		rv = 0;
1669 		entry = start_entry;
1670 		while (entry != &map->header && entry->start < end) {
1671 			/*
1672 			 * If vm_fault_wire fails for any page we need to undo
1673 			 * what has been done.  We decrement the wiring count
1674 			 * for those pages which have not yet been wired (now)
1675 			 * and unwire those that have (later).
1676 			 *
1677 			 * XXX this violates the locking protocol on the map,
1678 			 * needs to be fixed.
1679 			 */
1680 			if (rv)
1681 				entry->wired_count--;
1682 			else if (entry->wired_count == 1) {
1683 				rv = vm_fault_wire(map, entry->start, entry->end);
1684 				if (rv) {
1685 					failed = entry->start;
1686 					entry->wired_count--;
1687 				}
1688 			}
1689 			entry = entry->next;
1690 		}
1691 
1692 		if (vm_map_pmap(map) == kernel_pmap) {
1693 			vm_map_lock(map);
1694 		} else {
1695 			lock_clear_recursive(&map->lock);
1696 		}
1697 		if (rv) {
1698 			vm_map_unlock(map);
1699 			(void) vm_map_pageable(map, start, failed, TRUE);
1700 			return (rv);
1701 		}
1702 		vm_map_simplify_entry(map, start_entry);
1703 	}
1704 
1705 	vm_map_unlock(map);
1706 
1707 	return (KERN_SUCCESS);
1708 }
1709 
1710 /*
1711  * vm_map_clean
1712  *
1713  * Push any dirty cached pages in the address range to their pager.
1714  * If syncio is TRUE, dirty pages are written synchronously.
1715  * If invalidate is TRUE, any cached pages are freed as well.
1716  *
1717  * Returns an error if any part of the specified range is not mapped.
1718  */
1719 int
1720 vm_map_clean(map, start, end, syncio, invalidate)
1721 	vm_map_t map;
1722 	vm_offset_t start;
1723 	vm_offset_t end;
1724 	boolean_t syncio;
1725 	boolean_t invalidate;
1726 {
1727 	register vm_map_entry_t current;
1728 	vm_map_entry_t entry;
1729 	vm_size_t size;
1730 	vm_object_t object;
1731 	vm_ooffset_t offset;
1732 
1733 	vm_map_lock_read(map);
1734 	VM_MAP_RANGE_CHECK(map, start, end);
1735 	if (!vm_map_lookup_entry(map, start, &entry)) {
1736 		vm_map_unlock_read(map);
1737 		return (KERN_INVALID_ADDRESS);
1738 	}
1739 	/*
1740 	 * Make a first pass to check for holes.
1741 	 */
1742 	for (current = entry; current->start < end; current = current->next) {
1743 		if (current->is_sub_map) {
1744 			vm_map_unlock_read(map);
1745 			return (KERN_INVALID_ARGUMENT);
1746 		}
1747 		if (end > current->end &&
1748 		    (current->next == &map->header ||
1749 			current->end != current->next->start)) {
1750 			vm_map_unlock_read(map);
1751 			return (KERN_INVALID_ADDRESS);
1752 		}
1753 	}
1754 
1755 	/*
1756 	 * Make a second pass, cleaning/uncaching pages from the indicated
1757 	 * objects as we go.
1758 	 */
1759 	for (current = entry; current->start < end; current = current->next) {
1760 		offset = current->offset + (start - current->start);
1761 		size = (end <= current->end ? end : current->end) - start;
1762 		if (current->is_a_map || current->is_sub_map) {
1763 			register vm_map_t smap;
1764 			vm_map_entry_t tentry;
1765 			vm_size_t tsize;
1766 
1767 			smap = current->object.share_map;
1768 			vm_map_lock_read(smap);
1769 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1770 			tsize = tentry->end - offset;
1771 			if (tsize < size)
1772 				size = tsize;
1773 			object = tentry->object.vm_object;
1774 			offset = tentry->offset + (offset - tentry->start);
1775 			vm_map_unlock_read(smap);
1776 		} else {
1777 			object = current->object.vm_object;
1778 		}
1779 		/*
1780 		 * Note that there is absolutely no sense in writing out
1781 		 * anonymous objects, so we track down the vnode object
1782 		 * to write out.
1783 		 * We invalidate (remove) all pages from the address space
1784 		 * anyway, for semantic correctness.
1785 		 */
1786 		while (object->backing_object) {
1787 			object = object->backing_object;
1788 			offset += object->backing_object_offset;
1789 			if (object->size < OFF_TO_IDX( offset + size))
1790 				size = IDX_TO_OFF(object->size) - offset;
1791 		}
1792 		if (invalidate)
1793 			pmap_remove(vm_map_pmap(map), current->start,
1794 				current->start + size);
1795 		if (object && (object->type == OBJT_VNODE)) {
1796 			/*
1797 			 * Flush pages if writing is allowed. XXX should we continue
1798 			 * on an error?
1799 			 *
1800 			 * XXX Doing async I/O and then removing all the pages from
1801 			 *     the object before it completes is probably a very bad
1802 			 *     idea.
1803 			 */
1804 			if (current->protection & VM_PROT_WRITE) {
1805 		   	    	vm_object_page_clean(object,
1806 					OFF_TO_IDX(offset),
1807 					OFF_TO_IDX(offset + size),
1808 					(syncio||invalidate)?1:0, TRUE);
1809 				if (invalidate)
1810 					vm_object_page_remove(object,
1811 						OFF_TO_IDX(offset),
1812 						OFF_TO_IDX(offset + size),
1813 						FALSE);
1814 			}
1815 		}
1816 		start += size;
1817 	}
1818 
1819 	vm_map_unlock_read(map);
1820 	return (KERN_SUCCESS);
1821 }
1822 
1823 /*
1824  *	vm_map_entry_unwire:	[ internal use only ]
1825  *
1826  *	Make the region specified by this entry pageable.
1827  *
1828  *	The map in question should be locked.
1829  *	[This is the reason for this routine's existence.]
1830  */
1831 static void
1832 vm_map_entry_unwire(map, entry)
1833 	vm_map_t map;
1834 	register vm_map_entry_t entry;
1835 {
1836 	vm_fault_unwire(map, entry->start, entry->end);
1837 	entry->wired_count = 0;
1838 }
1839 
1840 /*
1841  *	vm_map_entry_delete:	[ internal use only ]
1842  *
1843  *	Deallocate the given entry from the target map.
1844  */
1845 static void
1846 vm_map_entry_delete(map, entry)
1847 	register vm_map_t map;
1848 	register vm_map_entry_t entry;
1849 {
1850 	vm_map_entry_unlink(map, entry);
1851 	map->size -= entry->end - entry->start;
1852 
1853 	if (entry->is_a_map || entry->is_sub_map) {
1854 		vm_map_deallocate(entry->object.share_map);
1855 	} else {
1856 		vm_object_deallocate(entry->object.vm_object);
1857 	}
1858 
1859 	vm_map_entry_dispose(map, entry);
1860 }
1861 
1862 /*
1863  *	vm_map_delete:	[ internal use only ]
1864  *
1865  *	Deallocates the given address range from the target
1866  *	map.
1867  *
1868  *	When called with a sharing map, removes pages from
1869  *	that region from all physical maps.
1870  */
1871 int
1872 vm_map_delete(map, start, end)
1873 	register vm_map_t map;
1874 	vm_offset_t start;
1875 	register vm_offset_t end;
1876 {
1877 	register vm_map_entry_t entry;
1878 	vm_map_entry_t first_entry;
1879 
1880 	/*
1881 	 * Find the start of the region, and clip it
1882 	 */
1883 
1884 	if (!vm_map_lookup_entry(map, start, &first_entry))
1885 		entry = first_entry->next;
1886 	else {
1887 		entry = first_entry;
1888 		vm_map_clip_start(map, entry, start);
1889 
1890 		/*
1891 		 * Fix the lookup hint now, rather than each time though the
1892 		 * loop.
1893 		 */
1894 
1895 		SAVE_HINT(map, entry->prev);
1896 	}
1897 
1898 	/*
1899 	 * Save the free space hint
1900 	 */
1901 
1902 	if (entry == &map->header) {
1903 		map->first_free = &map->header;
1904 	} else if (map->first_free->start >= start)
1905 		map->first_free = entry->prev;
1906 
1907 	/*
1908 	 * Step through all entries in this region
1909 	 */
1910 
1911 	while ((entry != &map->header) && (entry->start < end)) {
1912 		vm_map_entry_t next;
1913 		vm_offset_t s, e;
1914 		vm_object_t object;
1915 		vm_ooffset_t offset;
1916 
1917 		vm_map_clip_end(map, entry, end);
1918 
1919 		next = entry->next;
1920 		s = entry->start;
1921 		e = entry->end;
1922 		offset = entry->offset;
1923 
1924 		/*
1925 		 * Unwire before removing addresses from the pmap; otherwise,
1926 		 * unwiring will put the entries back in the pmap.
1927 		 */
1928 
1929 		object = entry->object.vm_object;
1930 		if (entry->wired_count != 0)
1931 			vm_map_entry_unwire(map, entry);
1932 
1933 		/*
1934 		 * If this is a sharing map, we must remove *all* references
1935 		 * to this data, since we can't find all of the physical maps
1936 		 * which are sharing it.
1937 		 */
1938 
1939 		if (object == kernel_object || object == kmem_object) {
1940 			vm_object_page_remove(object, OFF_TO_IDX(offset),
1941 			    OFF_TO_IDX(offset + (e - s)), FALSE);
1942 		} else if (!map->is_main_map) {
1943 			vm_object_pmap_remove(object,
1944 			    OFF_TO_IDX(offset),
1945 			    OFF_TO_IDX(offset + (e - s)));
1946 		} else {
1947 			pmap_remove(map->pmap, s, e);
1948 		}
1949 
1950 		/*
1951 		 * Delete the entry (which may delete the object) only after
1952 		 * removing all pmap entries pointing to its pages.
1953 		 * (Otherwise, its page frames may be reallocated, and any
1954 		 * modify bits will be set in the wrong object!)
1955 		 */
1956 
1957 		vm_map_entry_delete(map, entry);
1958 		entry = next;
1959 	}
1960 	return (KERN_SUCCESS);
1961 }
1962 
1963 /*
1964  *	vm_map_remove:
1965  *
1966  *	Remove the given address range from the target map.
1967  *	This is the exported form of vm_map_delete.
1968  */
1969 int
1970 vm_map_remove(map, start, end)
1971 	register vm_map_t map;
1972 	register vm_offset_t start;
1973 	register vm_offset_t end;
1974 {
1975 	register int result, s = 0;
1976 
1977 	if (map == kmem_map || map == mb_map)
1978 		s = splvm();
1979 
1980 	vm_map_lock(map);
1981 	VM_MAP_RANGE_CHECK(map, start, end);
1982 	result = vm_map_delete(map, start, end);
1983 	vm_map_unlock(map);
1984 
1985 	if (map == kmem_map || map == mb_map)
1986 		splx(s);
1987 
1988 	return (result);
1989 }
1990 
1991 /*
1992  *	vm_map_check_protection:
1993  *
1994  *	Assert that the target map allows the specified
1995  *	privilege on the entire address region given.
1996  *	The entire region must be allocated.
1997  */
1998 boolean_t
1999 vm_map_check_protection(map, start, end, protection)
2000 	register vm_map_t map;
2001 	register vm_offset_t start;
2002 	register vm_offset_t end;
2003 	register vm_prot_t protection;
2004 {
2005 	register vm_map_entry_t entry;
2006 	vm_map_entry_t tmp_entry;
2007 
2008 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2009 		return (FALSE);
2010 	}
2011 	entry = tmp_entry;
2012 
2013 	while (start < end) {
2014 		if (entry == &map->header) {
2015 			return (FALSE);
2016 		}
2017 		/*
2018 		 * No holes allowed!
2019 		 */
2020 
2021 		if (start < entry->start) {
2022 			return (FALSE);
2023 		}
2024 		/*
2025 		 * Check protection associated with entry.
2026 		 */
2027 
2028 		if ((entry->protection & protection) != protection) {
2029 			return (FALSE);
2030 		}
2031 		/* go to next entry */
2032 
2033 		start = entry->end;
2034 		entry = entry->next;
2035 	}
2036 	return (TRUE);
2037 }
2038 
2039 /*
2040  *	vm_map_copy_entry:
2041  *
2042  *	Copies the contents of the source entry to the destination
2043  *	entry.  The entries *must* be aligned properly.
2044  */
2045 static void
2046 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
2047 	vm_map_t src_map, dst_map;
2048 	register vm_map_entry_t src_entry, dst_entry;
2049 {
2050 	if (src_entry->is_sub_map || dst_entry->is_sub_map)
2051 		return;
2052 
2053 	if (src_entry->wired_count == 0) {
2054 
2055 		/*
2056 		 * If the source entry is marked needs_copy, it is already
2057 		 * write-protected.
2058 		 */
2059 		if (!src_entry->needs_copy) {
2060 
2061 			boolean_t su;
2062 
2063 			/*
2064 			 * If the source entry has only one mapping, we can
2065 			 * just protect the virtual address range.
2066 			 */
2067 			if (!(su = src_map->is_main_map)) {
2068 				su = (src_map->ref_count == 1);
2069 			}
2070 			if (su) {
2071 				pmap_protect(src_map->pmap,
2072 				    src_entry->start,
2073 				    src_entry->end,
2074 				    src_entry->protection & ~VM_PROT_WRITE);
2075 			} else {
2076 				vm_object_pmap_copy(src_entry->object.vm_object,
2077 				    OFF_TO_IDX(src_entry->offset),
2078 				    OFF_TO_IDX(src_entry->offset + (src_entry->end
2079 					- src_entry->start)));
2080 			}
2081 		}
2082 
2083 		/*
2084 		 * Make a copy of the object.
2085 		 */
2086 		if (src_entry->object.vm_object) {
2087 			if ((src_entry->object.vm_object->handle == NULL) &&
2088 				(src_entry->object.vm_object->type == OBJT_DEFAULT ||
2089 				 src_entry->object.vm_object->type == OBJT_SWAP))
2090 				vm_object_collapse(src_entry->object.vm_object);
2091 			++src_entry->object.vm_object->ref_count;
2092 			src_entry->copy_on_write = TRUE;
2093 			src_entry->needs_copy = TRUE;
2094 
2095 			dst_entry->needs_copy = TRUE;
2096 			dst_entry->copy_on_write = TRUE;
2097 			dst_entry->object.vm_object =
2098 				src_entry->object.vm_object;
2099 			dst_entry->offset = src_entry->offset;
2100 		} else {
2101 			dst_entry->object.vm_object = NULL;
2102 			dst_entry->offset = 0;
2103 		}
2104 
2105 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2106 		    dst_entry->end - dst_entry->start, src_entry->start);
2107 	} else {
2108 		/*
2109 		 * Of course, wired down pages can't be set copy-on-write.
2110 		 * Cause wired pages to be copied into the new map by
2111 		 * simulating faults (the new pages are pageable)
2112 		 */
2113 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2114 	}
2115 }
2116 
2117 /*
2118  * vmspace_fork:
2119  * Create a new process vmspace structure and vm_map
2120  * based on those of an existing process.  The new map
2121  * is based on the old map, according to the inheritance
2122  * values on the regions in that map.
2123  *
2124  * The source map must not be locked.
2125  */
2126 struct vmspace *
2127 vmspace_fork(vm1)
2128 	register struct vmspace *vm1;
2129 {
2130 	register struct vmspace *vm2;
2131 	vm_map_t old_map = &vm1->vm_map;
2132 	vm_map_t new_map;
2133 	vm_map_entry_t old_entry;
2134 	vm_map_entry_t new_entry;
2135 	pmap_t new_pmap;
2136 	vm_object_t object;
2137 
2138 	vm_map_lock(old_map);
2139 
2140 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
2141 	    old_map->entries_pageable);
2142 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2143 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2144 	new_pmap = &vm2->vm_pmap;	/* XXX */
2145 	new_map = &vm2->vm_map;	/* XXX */
2146 
2147 	old_entry = old_map->header.next;
2148 
2149 	while (old_entry != &old_map->header) {
2150 		if (old_entry->is_sub_map)
2151 			panic("vm_map_fork: encountered a submap");
2152 
2153 		switch (old_entry->inheritance) {
2154 		case VM_INHERIT_NONE:
2155 			break;
2156 
2157 		case VM_INHERIT_SHARE:
2158 			/*
2159 			 * Clone the entry, referencing the sharing map.
2160 			 */
2161 			new_entry = vm_map_entry_create(new_map);
2162 			*new_entry = *old_entry;
2163 			new_entry->wired_count = 0;
2164 			object = new_entry->object.vm_object;
2165 			++object->ref_count;
2166 
2167 			/*
2168 			 * Insert the entry into the new map -- we know we're
2169 			 * inserting at the end of the new map.
2170 			 */
2171 
2172 			vm_map_entry_link(new_map, new_map->header.prev,
2173 			    new_entry);
2174 
2175 			/*
2176 			 * Update the physical map
2177 			 */
2178 
2179 			pmap_copy(new_map->pmap, old_map->pmap,
2180 			    new_entry->start,
2181 			    (old_entry->end - old_entry->start),
2182 			    old_entry->start);
2183 			break;
2184 
2185 		case VM_INHERIT_COPY:
2186 			/*
2187 			 * Clone the entry and link into the map.
2188 			 */
2189 			new_entry = vm_map_entry_create(new_map);
2190 			*new_entry = *old_entry;
2191 			new_entry->wired_count = 0;
2192 			new_entry->object.vm_object = NULL;
2193 			new_entry->is_a_map = FALSE;
2194 			vm_map_entry_link(new_map, new_map->header.prev,
2195 			    new_entry);
2196 			vm_map_copy_entry(old_map, new_map, old_entry,
2197 			    new_entry);
2198 			break;
2199 		}
2200 		old_entry = old_entry->next;
2201 	}
2202 
2203 	new_map->size = old_map->size;
2204 	vm_map_unlock(old_map);
2205 
2206 	return (vm2);
2207 }
2208 
2209 /*
2210  *	vm_map_lookup:
2211  *
2212  *	Finds the VM object, offset, and
2213  *	protection for a given virtual address in the
2214  *	specified map, assuming a page fault of the
2215  *	type specified.
2216  *
2217  *	Leaves the map in question locked for read; return
2218  *	values are guaranteed until a vm_map_lookup_done
2219  *	call is performed.  Note that the map argument
2220  *	is in/out; the returned map must be used in
2221  *	the call to vm_map_lookup_done.
2222  *
2223  *	A handle (out_entry) is returned for use in
2224  *	vm_map_lookup_done, to make that fast.
2225  *
2226  *	If a lookup is requested with "write protection"
2227  *	specified, the map may be changed to perform virtual
2228  *	copying operations, although the data referenced will
2229  *	remain the same.
2230  */
2231 int
2232 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
2233     object, pindex, out_prot, wired, single_use)
2234 	vm_map_t *var_map;	/* IN/OUT */
2235 	register vm_offset_t vaddr;
2236 	register vm_prot_t fault_type;
2237 
2238 	vm_map_entry_t *out_entry;	/* OUT */
2239 	vm_object_t *object;	/* OUT */
2240 	vm_pindex_t *pindex;	/* OUT */
2241 	vm_prot_t *out_prot;	/* OUT */
2242 	boolean_t *wired;	/* OUT */
2243 	boolean_t *single_use;	/* OUT */
2244 {
2245 	vm_map_t share_map;
2246 	vm_offset_t share_offset;
2247 	register vm_map_entry_t entry;
2248 	register vm_map_t map = *var_map;
2249 	register vm_prot_t prot;
2250 	register boolean_t su;
2251 
2252 RetryLookup:;
2253 
2254 	/*
2255 	 * Lookup the faulting address.
2256 	 */
2257 
2258 	vm_map_lock_read(map);
2259 
2260 #define	RETURN(why) \
2261 		{ \
2262 		vm_map_unlock_read(map); \
2263 		return(why); \
2264 		}
2265 
2266 	/*
2267 	 * If the map has an interesting hint, try it before calling full
2268 	 * blown lookup routine.
2269 	 */
2270 
2271 	entry = map->hint;
2272 
2273 	*out_entry = entry;
2274 
2275 	if ((entry == &map->header) ||
2276 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2277 		vm_map_entry_t tmp_entry;
2278 
2279 		/*
2280 		 * Entry was either not a valid hint, or the vaddr was not
2281 		 * contained in the entry, so do a full lookup.
2282 		 */
2283 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2284 			RETURN(KERN_INVALID_ADDRESS);
2285 
2286 		entry = tmp_entry;
2287 		*out_entry = entry;
2288 	}
2289 
2290 	/*
2291 	 * Handle submaps.
2292 	 */
2293 
2294 	if (entry->is_sub_map) {
2295 		vm_map_t old_map = map;
2296 
2297 		*var_map = map = entry->object.sub_map;
2298 		vm_map_unlock_read(old_map);
2299 		goto RetryLookup;
2300 	}
2301 	/*
2302 	 * Check whether this task is allowed to have this page.
2303 	 */
2304 
2305 	prot = entry->protection;
2306 	if ((fault_type & (prot)) != fault_type)
2307 		RETURN(KERN_PROTECTION_FAILURE);
2308 
2309 	/*
2310 	 * If this page is not pageable, we have to get it for all possible
2311 	 * accesses.
2312 	 */
2313 
2314 	*wired = (entry->wired_count != 0);
2315 	if (*wired)
2316 		prot = fault_type = entry->protection;
2317 
2318 	/*
2319 	 * If we don't already have a VM object, track it down.
2320 	 */
2321 
2322 	su = !entry->is_a_map;
2323 	if (su) {
2324 		share_map = map;
2325 		share_offset = vaddr;
2326 	} else {
2327 		vm_map_entry_t share_entry;
2328 
2329 		/*
2330 		 * Compute the sharing map, and offset into it.
2331 		 */
2332 
2333 		share_map = entry->object.share_map;
2334 		share_offset = (vaddr - entry->start) + entry->offset;
2335 
2336 		/*
2337 		 * Look for the backing store object and offset
2338 		 */
2339 
2340 		vm_map_lock_read(share_map);
2341 
2342 		if (!vm_map_lookup_entry(share_map, share_offset,
2343 			&share_entry)) {
2344 			vm_map_unlock_read(share_map);
2345 			RETURN(KERN_INVALID_ADDRESS);
2346 		}
2347 		entry = share_entry;
2348 	}
2349 
2350 	/*
2351 	 * If the entry was copy-on-write, we either ...
2352 	 */
2353 
2354 	if (entry->needs_copy) {
2355 		/*
2356 		 * If we want to write the page, we may as well handle that
2357 		 * now since we've got the sharing map locked.
2358 		 *
2359 		 * If we don't need to write the page, we just demote the
2360 		 * permissions allowed.
2361 		 */
2362 
2363 		if (fault_type & VM_PROT_WRITE) {
2364 			/*
2365 			 * Make a new object, and place it in the object
2366 			 * chain.  Note that no new references have appeared
2367 			 * -- one just moved from the share map to the new
2368 			 * object.
2369 			 */
2370 
2371 			if (lock_read_to_write(&share_map->lock)) {
2372 				if (share_map != map)
2373 					vm_map_unlock_read(map);
2374 				goto RetryLookup;
2375 			}
2376 			vm_object_shadow(
2377 			    &entry->object.vm_object,
2378 			    &entry->offset,
2379 			    OFF_TO_IDX(entry->end - entry->start));
2380 
2381 			entry->needs_copy = FALSE;
2382 
2383 			lock_write_to_read(&share_map->lock);
2384 		} else {
2385 			/*
2386 			 * We're attempting to read a copy-on-write page --
2387 			 * don't allow writes.
2388 			 */
2389 
2390 			prot &= (~VM_PROT_WRITE);
2391 		}
2392 	}
2393 	/*
2394 	 * Create an object if necessary.
2395 	 */
2396 	if (entry->object.vm_object == NULL) {
2397 
2398 		if (lock_read_to_write(&share_map->lock)) {
2399 			if (share_map != map)
2400 				vm_map_unlock_read(map);
2401 			goto RetryLookup;
2402 		}
2403 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2404 		    OFF_TO_IDX(entry->end - entry->start));
2405 		entry->offset = 0;
2406 		lock_write_to_read(&share_map->lock);
2407 	}
2408 
2409 	if (entry->object.vm_object != NULL)
2410 		default_pager_convert_to_swapq(entry->object.vm_object);
2411 	/*
2412 	 * Return the object/offset from this entry.  If the entry was
2413 	 * copy-on-write or empty, it has been fixed up.
2414 	 */
2415 
2416 	*pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
2417 	*object = entry->object.vm_object;
2418 
2419 	/*
2420 	 * Return whether this is the only map sharing this data.
2421 	 */
2422 
2423 	if (!su) {
2424 		su = (share_map->ref_count == 1);
2425 	}
2426 	*out_prot = prot;
2427 	*single_use = su;
2428 
2429 	return (KERN_SUCCESS);
2430 
2431 #undef	RETURN
2432 }
2433 
2434 /*
2435  *	vm_map_lookup_done:
2436  *
2437  *	Releases locks acquired by a vm_map_lookup
2438  *	(according to the handle returned by that lookup).
2439  */
2440 
2441 void
2442 vm_map_lookup_done(map, entry)
2443 	register vm_map_t map;
2444 	vm_map_entry_t entry;
2445 {
2446 	/*
2447 	 * If this entry references a map, unlock it first.
2448 	 */
2449 
2450 	if (entry->is_a_map)
2451 		vm_map_unlock_read(entry->object.share_map);
2452 
2453 	/*
2454 	 * Unlock the main-level map
2455 	 */
2456 
2457 	vm_map_unlock_read(map);
2458 }
2459 
2460 #include "opt_ddb.h"
2461 #ifdef DDB
2462 #include <sys/kernel.h>
2463 
2464 #include <ddb/ddb.h>
2465 
2466 /*
2467  *	vm_map_print:	[ debug ]
2468  */
2469 DB_SHOW_COMMAND(map, vm_map_print)
2470 {
2471 	/* XXX convert args. */
2472 	register vm_map_t map = (vm_map_t)addr;
2473 	boolean_t full = have_addr;
2474 
2475 	register vm_map_entry_t entry;
2476 
2477 	db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
2478 	    (map->is_main_map ? "Task" : "Share"),
2479 	    (int) map, (int) (map->pmap), map->ref_count, map->nentries,
2480 	    map->timestamp);
2481 
2482 	if (!full && db_indent)
2483 		return;
2484 
2485 	db_indent += 2;
2486 	for (entry = map->header.next; entry != &map->header;
2487 	    entry = entry->next) {
2488 		db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
2489 		    (int) entry, (int) entry->start, (int) entry->end);
2490 		if (map->is_main_map) {
2491 			static char *inheritance_name[4] =
2492 			{"share", "copy", "none", "donate_copy"};
2493 
2494 			db_printf("prot=%x/%x/%s, ",
2495 			    entry->protection,
2496 			    entry->max_protection,
2497 			    inheritance_name[entry->inheritance]);
2498 			if (entry->wired_count != 0)
2499 				db_printf("wired, ");
2500 		}
2501 		if (entry->is_a_map || entry->is_sub_map) {
2502 			db_printf("share=0x%x, offset=0x%x\n",
2503 			    (int) entry->object.share_map,
2504 			    (int) entry->offset);
2505 			if ((entry->prev == &map->header) ||
2506 			    (!entry->prev->is_a_map) ||
2507 			    (entry->prev->object.share_map !=
2508 				entry->object.share_map)) {
2509 				db_indent += 2;
2510 				vm_map_print((int)entry->object.share_map,
2511 					     full, 0, (char *)0);
2512 				db_indent -= 2;
2513 			}
2514 		} else {
2515 			db_printf("object=0x%x, offset=0x%x",
2516 			    (int) entry->object.vm_object,
2517 			    (int) entry->offset);
2518 			if (entry->copy_on_write)
2519 				db_printf(", copy (%s)",
2520 				    entry->needs_copy ? "needed" : "done");
2521 			db_printf("\n");
2522 
2523 			if ((entry->prev == &map->header) ||
2524 			    (entry->prev->is_a_map) ||
2525 			    (entry->prev->object.vm_object !=
2526 				entry->object.vm_object)) {
2527 				db_indent += 2;
2528 				vm_object_print((int)entry->object.vm_object,
2529 						full, 0, (char *)0);
2530 				db_indent -= 2;
2531 			}
2532 		}
2533 	}
2534 	db_indent -= 2;
2535 }
2536 #endif /* DDB */
2537