xref: /freebsd/sys/vm/vm_map.c (revision 17ee9d00bc1ae1e598c38f25826f861e4bc6c3ce)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $Id: vm_map.c,v 1.14 1995/02/14 04:00:17 davidg Exp $
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/malloc.h>
74 
75 #include <vm/vm.h>
76 #include <vm/vm_page.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_kern.h>
79 
80 /*
81  *	Virtual memory maps provide for the mapping, protection,
82  *	and sharing of virtual memory objects.  In addition,
83  *	this module provides for an efficient virtual copy of
84  *	memory from one map to another.
85  *
86  *	Synchronization is required prior to most operations.
87  *
88  *	Maps consist of an ordered doubly-linked list of simple
89  *	entries; a single hint is used to speed up lookups.
90  *
91  *	In order to properly represent the sharing of virtual
92  *	memory regions among maps, the map structure is bi-level.
93  *	Top-level ("address") maps refer to regions of sharable
94  *	virtual memory.  These regions are implemented as
95  *	("sharing") maps, which then refer to the actual virtual
96  *	memory objects.  When two address maps "share" memory,
97  *	their top-level maps both have references to the same
98  *	sharing map.  When memory is virtual-copied from one
99  *	address map to another, the references in the sharing
100  *	maps are actually copied -- no copying occurs at the
101  *	virtual memory object level.
102  *
103  *	Since portions of maps are specified by start/end addreses,
104  *	which may not align with existing map entries, all
105  *	routines merely "clip" entries to these start/end values.
106  *	[That is, an entry is split into two, bordering at a
107  *	start or end value.]  Note that these clippings may not
108  *	always be necessary (as the two resulting entries are then
109  *	not changed); however, the clipping is done for convenience.
110  *	No attempt is currently made to "glue back together" two
111  *	abutting entries.
112  *
113  *	As mentioned above, virtual copy operations are performed
114  *	by copying VM object references from one sharing map to
115  *	another, and then marking both regions as copy-on-write.
116  *	It is important to note that only one writeable reference
117  *	to a VM object region exists in any map -- this means that
118  *	shadow object creation can be delayed until a write operation
119  *	occurs.
120  */
121 
122 /*
123  *	vm_map_startup:
124  *
125  *	Initialize the vm_map module.  Must be called before
126  *	any other vm_map routines.
127  *
128  *	Map and entry structures are allocated from the general
129  *	purpose memory pool with some exceptions:
130  *
131  *	- The kernel map and kmem submap are allocated statically.
132  *	- Kernel map entries are allocated out of a static pool.
133  *
134  *	These restrictions are necessary since malloc() uses the
135  *	maps and requires map entries.
136  */
137 
138 vm_offset_t kentry_data;
139 vm_size_t kentry_data_size;
140 vm_map_entry_t kentry_free;
141 vm_map_t kmap_free;
142 
143 int kentry_count;
144 static vm_offset_t mapvm_start = 0, mapvm = 0, mapvmmax;
145 static int mapvmpgcnt = 0;
146 
147 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
148 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
149 
150 void
151 vm_map_startup()
152 {
153 	register int i;
154 	register vm_map_entry_t mep;
155 	vm_map_t mp;
156 
157 	/*
158 	 * Static map structures for allocation before initialization of
159 	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
160 	 */
161 	kmap_free = mp = (vm_map_t) kentry_data;
162 	i = MAX_KMAP;
163 	while (--i > 0) {
164 		mp->header.next = (vm_map_entry_t) (mp + 1);
165 		mp++;
166 	}
167 	mp++->header.next = NULL;
168 
169 	/*
170 	 * Form a free list of statically allocated kernel map entries with
171 	 * the rest.
172 	 */
173 	kentry_free = mep = (vm_map_entry_t) mp;
174 	kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
175 	while (--i > 0) {
176 		mep->next = mep + 1;
177 		mep++;
178 	}
179 	mep->next = NULL;
180 }
181 
182 /*
183  * Allocate a vmspace structure, including a vm_map and pmap,
184  * and initialize those structures.  The refcnt is set to 1.
185  * The remaining fields must be initialized by the caller.
186  */
187 struct vmspace *
188 vmspace_alloc(min, max, pageable)
189 	vm_offset_t min, max;
190 	int pageable;
191 {
192 	register struct vmspace *vm;
193 
194 	if (mapvmpgcnt == 0 && mapvm == 0) {
195 		int s;
196 
197 		mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
198 		s = splhigh();
199 		mapvm_start = mapvm = kmem_alloc_pageable(kmem_map, mapvmpgcnt * PAGE_SIZE);
200 		mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
201 		splx(s);
202 		if (!mapvm)
203 			mapvmpgcnt = 0;
204 	}
205 	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
206 	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
207 	vm_map_init(&vm->vm_map, min, max, pageable);
208 	pmap_pinit(&vm->vm_pmap);
209 	vm->vm_map.pmap = &vm->vm_pmap;	/* XXX */
210 	vm->vm_refcnt = 1;
211 	return (vm);
212 }
213 
214 void
215 vmspace_free(vm)
216 	register struct vmspace *vm;
217 {
218 
219 	if (vm->vm_refcnt == 0)
220 		panic("vmspace_free: attempt to free already freed vmspace");
221 
222 	if (--vm->vm_refcnt == 0) {
223 		/*
224 		 * Lock the map, to wait out all other references to it.
225 		 * Delete all of the mappings and pages they hold, then call
226 		 * the pmap module to reclaim anything left.
227 		 */
228 		vm_map_lock(&vm->vm_map);
229 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
230 		    vm->vm_map.max_offset);
231 		vm_map_unlock(&vm->vm_map);
232 		while( vm->vm_map.ref_count != 1)
233 			tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
234 		--vm->vm_map.ref_count;
235 		pmap_release(&vm->vm_pmap);
236 		FREE(vm, M_VMMAP);
237 	}
238 }
239 
240 /*
241  *	vm_map_create:
242  *
243  *	Creates and returns a new empty VM map with
244  *	the given physical map structure, and having
245  *	the given lower and upper address bounds.
246  */
247 vm_map_t
248 vm_map_create(pmap, min, max, pageable)
249 	pmap_t pmap;
250 	vm_offset_t min, max;
251 	boolean_t pageable;
252 {
253 	register vm_map_t result;
254 
255 	if (kmem_map == NULL) {
256 		result = kmap_free;
257 		kmap_free = (vm_map_t) result->header.next;
258 		if (result == NULL)
259 			panic("vm_map_create: out of maps");
260 	} else
261 		MALLOC(result, vm_map_t, sizeof(struct vm_map),
262 		    M_VMMAP, M_WAITOK);
263 
264 	vm_map_init(result, min, max, pageable);
265 	result->pmap = pmap;
266 	return (result);
267 }
268 
269 /*
270  * Initialize an existing vm_map structure
271  * such as that in the vmspace structure.
272  * The pmap is set elsewhere.
273  */
274 void
275 vm_map_init(map, min, max, pageable)
276 	register struct vm_map *map;
277 	vm_offset_t min, max;
278 	boolean_t pageable;
279 {
280 	map->header.next = map->header.prev = &map->header;
281 	map->nentries = 0;
282 	map->size = 0;
283 	map->ref_count = 1;
284 	map->is_main_map = TRUE;
285 	map->min_offset = min;
286 	map->max_offset = max;
287 	map->entries_pageable = pageable;
288 	map->first_free = &map->header;
289 	map->hint = &map->header;
290 	map->timestamp = 0;
291 	lock_init(&map->lock, TRUE);
292 	simple_lock_init(&map->ref_lock);
293 	simple_lock_init(&map->hint_lock);
294 }
295 
296 /*
297  *	vm_map_entry_create:	[ internal use only ]
298  *
299  *	Allocates a VM map entry for insertion.
300  *	No entry fields are filled in.  This routine is
301  */
302 static struct vm_map_entry *mappool;
303 static int mappoolcnt;
304 
305 vm_map_entry_t
306 vm_map_entry_create(map)
307 	vm_map_t map;
308 {
309 	vm_map_entry_t entry;
310 	int i;
311 
312 #define KENTRY_LOW_WATER 64
313 #define MAPENTRY_LOW_WATER 128
314 
315 	/*
316 	 * This is a *very* nasty (and sort of incomplete) hack!!!!
317 	 */
318 	if (kentry_count < KENTRY_LOW_WATER) {
319 		if (mapvmpgcnt && mapvm) {
320 			vm_page_t m;
321 
322 			m = vm_page_alloc(kmem_object,
323 			        mapvm - vm_map_min(kmem_map),
324 				    (map == kmem_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
325 			if (m) {
326 				int newentries;
327 
328 				newentries = (NBPG / sizeof(struct vm_map_entry));
329 				vm_page_wire(m);
330 				m->flags &= ~PG_BUSY;
331 				m->valid = VM_PAGE_BITS_ALL;
332 				pmap_enter(vm_map_pmap(kmem_map), mapvm,
333 				    VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1);
334 
335 				entry = (vm_map_entry_t) mapvm;
336 				mapvm += NBPG;
337 				--mapvmpgcnt;
338 
339 				for (i = 0; i < newentries; i++) {
340 					vm_map_entry_dispose(kernel_map, entry);
341 					entry++;
342 				}
343 			}
344 		}
345 	}
346 	if (map == kernel_map || map == kmem_map || map == pager_map) {
347 
348 		entry = kentry_free;
349 		if (entry) {
350 			kentry_free = entry->next;
351 			--kentry_count;
352 			return entry;
353 		}
354 		entry = mappool;
355 		if (entry) {
356 			mappool = entry->next;
357 			--mappoolcnt;
358 			return entry;
359 		}
360 	} else {
361 		entry = mappool;
362 		if (entry) {
363 			mappool = entry->next;
364 			--mappoolcnt;
365 			return entry;
366 		}
367 		MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
368 		    M_VMMAPENT, M_WAITOK);
369 	}
370 	if (entry == NULL)
371 		panic("vm_map_entry_create: out of map entries");
372 
373 	return (entry);
374 }
375 
376 /*
377  *	vm_map_entry_dispose:	[ internal use only ]
378  *
379  *	Inverse of vm_map_entry_create.
380  */
381 void
382 vm_map_entry_dispose(map, entry)
383 	vm_map_t map;
384 	vm_map_entry_t entry;
385 {
386 	if ((kentry_count < KENTRY_LOW_WATER) ||
387 	    ((vm_offset_t) entry >= kentry_data && (vm_offset_t) entry < (kentry_data + kentry_data_size)) ||
388 	    ((vm_offset_t) entry >= mapvm_start && (vm_offset_t) entry < mapvmmax)) {
389 		entry->next = kentry_free;
390 		kentry_free = entry;
391 		++kentry_count;
392 		return;
393 	} else {
394 		if (mappoolcnt < MAPENTRY_LOW_WATER) {
395 			entry->next = mappool;
396 			mappool = entry;
397 			++mappoolcnt;
398 			return;
399 		}
400 		FREE(entry, M_VMMAPENT);
401 	}
402 }
403 
404 /*
405  *	vm_map_entry_{un,}link:
406  *
407  *	Insert/remove entries from maps.
408  */
409 #define	vm_map_entry_link(map, after_where, entry) \
410 		{ \
411 		(map)->nentries++; \
412 		(entry)->prev = (after_where); \
413 		(entry)->next = (after_where)->next; \
414 		(entry)->prev->next = (entry); \
415 		(entry)->next->prev = (entry); \
416 		}
417 #define	vm_map_entry_unlink(map, entry) \
418 		{ \
419 		(map)->nentries--; \
420 		(entry)->next->prev = (entry)->prev; \
421 		(entry)->prev->next = (entry)->next; \
422 		}
423 
424 /*
425  *	vm_map_reference:
426  *
427  *	Creates another valid reference to the given map.
428  *
429  */
430 void
431 vm_map_reference(map)
432 	register vm_map_t map;
433 {
434 	if (map == NULL)
435 		return;
436 
437 	simple_lock(&map->ref_lock);
438 	map->ref_count++;
439 	simple_unlock(&map->ref_lock);
440 }
441 
442 /*
443  *	vm_map_deallocate:
444  *
445  *	Removes a reference from the specified map,
446  *	destroying it if no references remain.
447  *	The map should not be locked.
448  */
449 void
450 vm_map_deallocate(map)
451 	register vm_map_t map;
452 {
453 	register int c;
454 
455 	if (map == NULL)
456 		return;
457 
458 	simple_lock(&map->ref_lock);
459 	c = map->ref_count;
460 	simple_unlock(&map->ref_lock);
461 
462 	if (c == 0)
463 		panic("vm_map_deallocate: deallocating already freed map");
464 
465 	if (c != 1) {
466 		--map->ref_count;
467 		wakeup((caddr_t) &map->ref_count);
468 		return;
469 	}
470 	/*
471 	 * Lock the map, to wait out all other references to it.
472 	 */
473 
474 	vm_map_lock(map);
475 	(void) vm_map_delete(map, map->min_offset, map->max_offset);
476 	--map->ref_count;
477 	if( map->ref_count != 0) {
478 		vm_map_unlock(map);
479 		return;
480 	}
481 
482 	pmap_destroy(map->pmap);
483 	FREE(map, M_VMMAP);
484 }
485 
486 /*
487  *	vm_map_insert:
488  *
489  *	Inserts the given whole VM object into the target
490  *	map at the specified address range.  The object's
491  *	size should match that of the address range.
492  *
493  *	Requires that the map be locked, and leaves it so.
494  */
495 int
496 vm_map_insert(map, object, offset, start, end)
497 	vm_map_t map;
498 	vm_object_t object;
499 	vm_offset_t offset;
500 	vm_offset_t start;
501 	vm_offset_t end;
502 {
503 	register vm_map_entry_t new_entry;
504 	register vm_map_entry_t prev_entry;
505 	vm_map_entry_t temp_entry;
506 
507 	/*
508 	 * Check that the start and end points are not bogus.
509 	 */
510 
511 	if ((start < map->min_offset) || (end > map->max_offset) ||
512 	    (start >= end))
513 		return (KERN_INVALID_ADDRESS);
514 
515 	/*
516 	 * Find the entry prior to the proposed starting address; if it's part
517 	 * of an existing entry, this range is bogus.
518 	 */
519 
520 	if (vm_map_lookup_entry(map, start, &temp_entry))
521 		return (KERN_NO_SPACE);
522 
523 	prev_entry = temp_entry;
524 
525 	/*
526 	 * Assert that the next entry doesn't overlap the end point.
527 	 */
528 
529 	if ((prev_entry->next != &map->header) &&
530 	    (prev_entry->next->start < end))
531 		return (KERN_NO_SPACE);
532 
533 	/*
534 	 * See if we can avoid creating a new entry by extending one of our
535 	 * neighbors.
536 	 */
537 
538 	if (object == NULL) {
539 		if ((prev_entry != &map->header) &&
540 		    (prev_entry->end == start) &&
541 		    (map->is_main_map) &&
542 		    (prev_entry->is_a_map == FALSE) &&
543 		    (prev_entry->is_sub_map == FALSE) &&
544 		    (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
545 		    (prev_entry->protection == VM_PROT_DEFAULT) &&
546 		    (prev_entry->max_protection == VM_PROT_DEFAULT) &&
547 		    (prev_entry->wired_count == 0)) {
548 
549 			if (vm_object_coalesce(prev_entry->object.vm_object,
550 				NULL,
551 				prev_entry->offset,
552 				(vm_offset_t) 0,
553 				(vm_size_t) (prev_entry->end
554 				    - prev_entry->start),
555 				(vm_size_t) (end - prev_entry->end))) {
556 				/*
557 				 * Coalesced the two objects - can extend the
558 				 * previous map entry to include the new
559 				 * range.
560 				 */
561 				map->size += (end - prev_entry->end);
562 				prev_entry->end = end;
563 				return (KERN_SUCCESS);
564 			}
565 		}
566 	}
567 	/*
568 	 * Create a new entry
569 	 */
570 
571 	new_entry = vm_map_entry_create(map);
572 	new_entry->start = start;
573 	new_entry->end = end;
574 
575 	new_entry->is_a_map = FALSE;
576 	new_entry->is_sub_map = FALSE;
577 	new_entry->object.vm_object = object;
578 	new_entry->offset = offset;
579 
580 	new_entry->copy_on_write = FALSE;
581 	new_entry->needs_copy = FALSE;
582 
583 	if (map->is_main_map) {
584 		new_entry->inheritance = VM_INHERIT_DEFAULT;
585 		new_entry->protection = VM_PROT_DEFAULT;
586 		new_entry->max_protection = VM_PROT_DEFAULT;
587 		new_entry->wired_count = 0;
588 	}
589 	/*
590 	 * Insert the new entry into the list
591 	 */
592 
593 	vm_map_entry_link(map, prev_entry, new_entry);
594 	map->size += new_entry->end - new_entry->start;
595 
596 	/*
597 	 * Update the free space hint
598 	 */
599 
600 	if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
601 		map->first_free = new_entry;
602 
603 	return (KERN_SUCCESS);
604 }
605 
606 /*
607  *	SAVE_HINT:
608  *
609  *	Saves the specified entry as the hint for
610  *	future lookups.  Performs necessary interlocks.
611  */
612 #define	SAVE_HINT(map,value) \
613 		simple_lock(&(map)->hint_lock); \
614 		(map)->hint = (value); \
615 		simple_unlock(&(map)->hint_lock);
616 
617 /*
618  *	vm_map_lookup_entry:	[ internal use only ]
619  *
620  *	Finds the map entry containing (or
621  *	immediately preceding) the specified address
622  *	in the given map; the entry is returned
623  *	in the "entry" parameter.  The boolean
624  *	result indicates whether the address is
625  *	actually contained in the map.
626  */
627 boolean_t
628 vm_map_lookup_entry(map, address, entry)
629 	register vm_map_t map;
630 	register vm_offset_t address;
631 	vm_map_entry_t *entry;	/* OUT */
632 {
633 	register vm_map_entry_t cur;
634 	register vm_map_entry_t last;
635 
636 	/*
637 	 * Start looking either from the head of the list, or from the hint.
638 	 */
639 
640 	simple_lock(&map->hint_lock);
641 	cur = map->hint;
642 	simple_unlock(&map->hint_lock);
643 
644 	if (cur == &map->header)
645 		cur = cur->next;
646 
647 	if (address >= cur->start) {
648 		/*
649 		 * Go from hint to end of list.
650 		 *
651 		 * But first, make a quick check to see if we are already looking
652 		 * at the entry we want (which is usually the case). Note also
653 		 * that we don't need to save the hint here... it is the same
654 		 * hint (unless we are at the header, in which case the hint
655 		 * didn't buy us anything anyway).
656 		 */
657 		last = &map->header;
658 		if ((cur != last) && (cur->end > address)) {
659 			*entry = cur;
660 			return (TRUE);
661 		}
662 	} else {
663 		/*
664 		 * Go from start to hint, *inclusively*
665 		 */
666 		last = cur->next;
667 		cur = map->header.next;
668 	}
669 
670 	/*
671 	 * Search linearly
672 	 */
673 
674 	while (cur != last) {
675 		if (cur->end > address) {
676 			if (address >= cur->start) {
677 				/*
678 				 * Save this lookup for future hints, and
679 				 * return
680 				 */
681 
682 				*entry = cur;
683 				SAVE_HINT(map, cur);
684 				return (TRUE);
685 			}
686 			break;
687 		}
688 		cur = cur->next;
689 	}
690 	*entry = cur->prev;
691 	SAVE_HINT(map, *entry);
692 	return (FALSE);
693 }
694 
695 /*
696  * Find sufficient space for `length' bytes in the given map, starting at
697  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
698  */
699 int
700 vm_map_findspace(map, start, length, addr)
701 	register vm_map_t map;
702 	register vm_offset_t start;
703 	vm_size_t length;
704 	vm_offset_t *addr;
705 {
706 	register vm_map_entry_t entry, next;
707 	register vm_offset_t end;
708 
709 	if (start < map->min_offset)
710 		start = map->min_offset;
711 	if (start > map->max_offset)
712 		return (1);
713 
714 	/*
715 	 * Look for the first possible address; if there's already something
716 	 * at this address, we have to start after it.
717 	 */
718 	if (start == map->min_offset) {
719 		if ((entry = map->first_free) != &map->header)
720 			start = entry->end;
721 	} else {
722 		vm_map_entry_t tmp;
723 
724 		if (vm_map_lookup_entry(map, start, &tmp))
725 			start = tmp->end;
726 		entry = tmp;
727 	}
728 
729 	/*
730 	 * Look through the rest of the map, trying to fit a new region in the
731 	 * gap between existing regions, or after the very last region.
732 	 */
733 	for (;; start = (entry = next)->end) {
734 		/*
735 		 * Find the end of the proposed new region.  Be sure we didn't
736 		 * go beyond the end of the map, or wrap around the address;
737 		 * if so, we lose.  Otherwise, if this is the last entry, or
738 		 * if the proposed new region fits before the next entry, we
739 		 * win.
740 		 */
741 		end = start + length;
742 		if (end > map->max_offset || end < start)
743 			return (1);
744 		next = entry->next;
745 		if (next == &map->header || next->start >= end)
746 			break;
747 	}
748 	SAVE_HINT(map, entry);
749 	*addr = start;
750 	if (map == kernel_map && round_page(start + length) > kernel_vm_end)
751 		pmap_growkernel(round_page(start + length));
752 	return (0);
753 }
754 
755 /*
756  *	vm_map_find finds an unallocated region in the target address
757  *	map with the given length.  The search is defined to be
758  *	first-fit from the specified address; the region found is
759  *	returned in the same parameter.
760  *
761  */
762 int
763 vm_map_find(map, object, offset, addr, length, find_space)
764 	vm_map_t map;
765 	vm_object_t object;
766 	vm_offset_t offset;
767 	vm_offset_t *addr;	/* IN/OUT */
768 	vm_size_t length;
769 	boolean_t find_space;
770 {
771 	register vm_offset_t start;
772 	int result, s = 0;
773 
774 	start = *addr;
775 	vm_map_lock(map);
776 
777 	if (map == kmem_map)
778 		s = splhigh();
779 
780 	if (find_space) {
781 		if (vm_map_findspace(map, start, length, addr)) {
782 			vm_map_unlock(map);
783 			if (map == kmem_map)
784 				splx(s);
785 			return (KERN_NO_SPACE);
786 		}
787 		start = *addr;
788 	}
789 	result = vm_map_insert(map, object, offset, start, start + length);
790 	vm_map_unlock(map);
791 
792 	if (map == kmem_map)
793 		splx(s);
794 
795 	return (result);
796 }
797 
798 /*
799  *	vm_map_simplify_entry:	[ internal use only ]
800  *
801  *	Simplify the given map entry by:
802  *		removing extra sharing maps
803  *		[XXX maybe later] merging with a neighbor
804  */
805 void
806 vm_map_simplify_entry(map, entry)
807 	vm_map_t map;
808 	vm_map_entry_t entry;
809 {
810 #ifdef	lint
811 	map++;
812 #endif
813 
814 	/*
815 	 * If this entry corresponds to a sharing map, then see if we can
816 	 * remove the level of indirection. If it's not a sharing map, then it
817 	 * points to a VM object, so see if we can merge with either of our
818 	 * neighbors.
819 	 */
820 
821 	if (entry->is_sub_map)
822 		return;
823 	if (entry->is_a_map) {
824 #if	0
825 		vm_map_t my_share_map;
826 		int count;
827 
828 		my_share_map = entry->object.share_map;
829 		simple_lock(&my_share_map->ref_lock);
830 		count = my_share_map->ref_count;
831 		simple_unlock(&my_share_map->ref_lock);
832 
833 		if (count == 1) {
834 			/*
835 			 * Can move the region from entry->start to entry->end
836 			 * (+ entry->offset) in my_share_map into place of
837 			 * entry. Later.
838 			 */
839 		}
840 #endif
841 	} else {
842 		/*
843 		 * Try to merge with our neighbors.
844 		 *
845 		 * Conditions for merge are:
846 		 *
847 		 * 1.  entries are adjacent. 2.  both entries point to objects
848 		 * with null pagers.
849 		 *
850 		 * If a merge is possible, we replace the two entries with a
851 		 * single entry, then merge the two objects into a single
852 		 * object.
853 		 *
854 		 * Now, all that is left to do is write the code!
855 		 */
856 	}
857 }
858 
859 /*
860  *	vm_map_clip_start:	[ internal use only ]
861  *
862  *	Asserts that the given entry begins at or after
863  *	the specified address; if necessary,
864  *	it splits the entry into two.
865  */
866 #define vm_map_clip_start(map, entry, startaddr) \
867 { \
868 	if (startaddr > entry->start) \
869 		_vm_map_clip_start(map, entry, startaddr); \
870 }
871 
872 /*
873  *	This routine is called only when it is known that
874  *	the entry must be split.
875  */
876 static void
877 _vm_map_clip_start(map, entry, start)
878 	register vm_map_t map;
879 	register vm_map_entry_t entry;
880 	register vm_offset_t start;
881 {
882 	register vm_map_entry_t new_entry;
883 
884 	/*
885 	 * See if we can simplify this entry first
886 	 */
887 
888 	/* vm_map_simplify_entry(map, entry); */
889 
890 	/*
891 	 * Split off the front portion -- note that we must insert the new
892 	 * entry BEFORE this one, so that this entry has the specified
893 	 * starting address.
894 	 */
895 
896 	new_entry = vm_map_entry_create(map);
897 	*new_entry = *entry;
898 
899 	new_entry->end = start;
900 	entry->offset += (start - entry->start);
901 	entry->start = start;
902 
903 	vm_map_entry_link(map, entry->prev, new_entry);
904 
905 	if (entry->is_a_map || entry->is_sub_map)
906 		vm_map_reference(new_entry->object.share_map);
907 	else
908 		vm_object_reference(new_entry->object.vm_object);
909 }
910 
911 /*
912  *	vm_map_clip_end:	[ internal use only ]
913  *
914  *	Asserts that the given entry ends at or before
915  *	the specified address; if necessary,
916  *	it splits the entry into two.
917  */
918 
919 #define vm_map_clip_end(map, entry, endaddr) \
920 { \
921 	if (endaddr < entry->end) \
922 		_vm_map_clip_end(map, entry, endaddr); \
923 }
924 
925 /*
926  *	This routine is called only when it is known that
927  *	the entry must be split.
928  */
929 static void
930 _vm_map_clip_end(map, entry, end)
931 	register vm_map_t map;
932 	register vm_map_entry_t entry;
933 	register vm_offset_t end;
934 {
935 	register vm_map_entry_t new_entry;
936 
937 	/*
938 	 * Create a new entry and insert it AFTER the specified entry
939 	 */
940 
941 	new_entry = vm_map_entry_create(map);
942 	*new_entry = *entry;
943 
944 	new_entry->start = entry->end = end;
945 	new_entry->offset += (end - entry->start);
946 
947 	vm_map_entry_link(map, entry, new_entry);
948 
949 	if (entry->is_a_map || entry->is_sub_map)
950 		vm_map_reference(new_entry->object.share_map);
951 	else
952 		vm_object_reference(new_entry->object.vm_object);
953 }
954 
955 /*
956  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
957  *
958  *	Asserts that the starting and ending region
959  *	addresses fall within the valid range of the map.
960  */
961 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
962 		{					\
963 		if (start < vm_map_min(map))		\
964 			start = vm_map_min(map);	\
965 		if (end > vm_map_max(map))		\
966 			end = vm_map_max(map);		\
967 		if (start > end)			\
968 			start = end;			\
969 		}
970 
971 /*
972  *	vm_map_submap:		[ kernel use only ]
973  *
974  *	Mark the given range as handled by a subordinate map.
975  *
976  *	This range must have been created with vm_map_find,
977  *	and no other operations may have been performed on this
978  *	range prior to calling vm_map_submap.
979  *
980  *	Only a limited number of operations can be performed
981  *	within this rage after calling vm_map_submap:
982  *		vm_fault
983  *	[Don't try vm_map_copy!]
984  *
985  *	To remove a submapping, one must first remove the
986  *	range from the superior map, and then destroy the
987  *	submap (if desired).  [Better yet, don't try it.]
988  */
989 int
990 vm_map_submap(map, start, end, submap)
991 	register vm_map_t map;
992 	register vm_offset_t start;
993 	register vm_offset_t end;
994 	vm_map_t submap;
995 {
996 	vm_map_entry_t entry;
997 	register int result = KERN_INVALID_ARGUMENT;
998 
999 	vm_map_lock(map);
1000 
1001 	VM_MAP_RANGE_CHECK(map, start, end);
1002 
1003 	if (vm_map_lookup_entry(map, start, &entry)) {
1004 		vm_map_clip_start(map, entry, start);
1005 	} else
1006 		entry = entry->next;
1007 
1008 	vm_map_clip_end(map, entry, end);
1009 
1010 	if ((entry->start == start) && (entry->end == end) &&
1011 	    (!entry->is_a_map) &&
1012 	    (entry->object.vm_object == NULL) &&
1013 	    (!entry->copy_on_write)) {
1014 		entry->is_a_map = FALSE;
1015 		entry->is_sub_map = TRUE;
1016 		vm_map_reference(entry->object.sub_map = submap);
1017 		result = KERN_SUCCESS;
1018 	}
1019 	vm_map_unlock(map);
1020 
1021 	return (result);
1022 }
1023 
1024 /*
1025  *	vm_map_protect:
1026  *
1027  *	Sets the protection of the specified address
1028  *	region in the target map.  If "set_max" is
1029  *	specified, the maximum protection is to be set;
1030  *	otherwise, only the current protection is affected.
1031  */
1032 int
1033 vm_map_protect(map, start, end, new_prot, set_max)
1034 	register vm_map_t map;
1035 	register vm_offset_t start;
1036 	register vm_offset_t end;
1037 	register vm_prot_t new_prot;
1038 	register boolean_t set_max;
1039 {
1040 	register vm_map_entry_t current;
1041 	vm_map_entry_t entry;
1042 
1043 	vm_map_lock(map);
1044 
1045 	VM_MAP_RANGE_CHECK(map, start, end);
1046 
1047 	if (vm_map_lookup_entry(map, start, &entry)) {
1048 		vm_map_clip_start(map, entry, start);
1049 	} else
1050 		entry = entry->next;
1051 
1052 	/*
1053 	 * Make a first pass to check for protection violations.
1054 	 */
1055 
1056 	current = entry;
1057 	while ((current != &map->header) && (current->start < end)) {
1058 		if (current->is_sub_map) {
1059 			vm_map_unlock(map);
1060 			return (KERN_INVALID_ARGUMENT);
1061 		}
1062 		if ((new_prot & current->max_protection) != new_prot) {
1063 			vm_map_unlock(map);
1064 			return (KERN_PROTECTION_FAILURE);
1065 		}
1066 		current = current->next;
1067 	}
1068 
1069 	/*
1070 	 * Go back and fix up protections. [Note that clipping is not
1071 	 * necessary the second time.]
1072 	 */
1073 
1074 	current = entry;
1075 
1076 	while ((current != &map->header) && (current->start < end)) {
1077 		vm_prot_t old_prot;
1078 
1079 		vm_map_clip_end(map, current, end);
1080 
1081 		old_prot = current->protection;
1082 		if (set_max)
1083 			current->protection =
1084 			    (current->max_protection = new_prot) &
1085 			    old_prot;
1086 		else
1087 			current->protection = new_prot;
1088 
1089 		/*
1090 		 * Update physical map if necessary. Worry about copy-on-write
1091 		 * here -- CHECK THIS XXX
1092 		 */
1093 
1094 		if (current->protection != old_prot) {
1095 
1096 #define MASK(entry)	((entry)->copy_on_write ? ~VM_PROT_WRITE : \
1097 							VM_PROT_ALL)
1098 #define	max(a,b)	((a) > (b) ? (a) : (b))
1099 
1100 			if (current->is_a_map) {
1101 				vm_map_entry_t share_entry;
1102 				vm_offset_t share_end;
1103 
1104 				vm_map_lock(current->object.share_map);
1105 				(void) vm_map_lookup_entry(
1106 				    current->object.share_map,
1107 				    current->offset,
1108 				    &share_entry);
1109 				share_end = current->offset +
1110 				    (current->end - current->start);
1111 				while ((share_entry !=
1112 					&current->object.share_map->header) &&
1113 				    (share_entry->start < share_end)) {
1114 
1115 					pmap_protect(map->pmap,
1116 					    (max(share_entry->start,
1117 						    current->offset) -
1118 						current->offset +
1119 						current->start),
1120 					    min(share_entry->end,
1121 						share_end) -
1122 					    current->offset +
1123 					    current->start,
1124 					    current->protection &
1125 					    MASK(share_entry));
1126 
1127 					share_entry = share_entry->next;
1128 				}
1129 				vm_map_unlock(current->object.share_map);
1130 			} else
1131 				pmap_protect(map->pmap, current->start,
1132 				    current->end,
1133 				    current->protection & MASK(entry));
1134 #undef	max
1135 #undef	MASK
1136 		}
1137 		current = current->next;
1138 	}
1139 
1140 	vm_map_unlock(map);
1141 	return (KERN_SUCCESS);
1142 }
1143 
1144 /*
1145  *	vm_map_inherit:
1146  *
1147  *	Sets the inheritance of the specified address
1148  *	range in the target map.  Inheritance
1149  *	affects how the map will be shared with
1150  *	child maps at the time of vm_map_fork.
1151  */
1152 int
1153 vm_map_inherit(map, start, end, new_inheritance)
1154 	register vm_map_t map;
1155 	register vm_offset_t start;
1156 	register vm_offset_t end;
1157 	register vm_inherit_t new_inheritance;
1158 {
1159 	register vm_map_entry_t entry;
1160 	vm_map_entry_t temp_entry;
1161 
1162 	switch (new_inheritance) {
1163 	case VM_INHERIT_NONE:
1164 	case VM_INHERIT_COPY:
1165 	case VM_INHERIT_SHARE:
1166 		break;
1167 	default:
1168 		return (KERN_INVALID_ARGUMENT);
1169 	}
1170 
1171 	vm_map_lock(map);
1172 
1173 	VM_MAP_RANGE_CHECK(map, start, end);
1174 
1175 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1176 		entry = temp_entry;
1177 		vm_map_clip_start(map, entry, start);
1178 	} else
1179 		entry = temp_entry->next;
1180 
1181 	while ((entry != &map->header) && (entry->start < end)) {
1182 		vm_map_clip_end(map, entry, end);
1183 
1184 		entry->inheritance = new_inheritance;
1185 
1186 		entry = entry->next;
1187 	}
1188 
1189 	vm_map_unlock(map);
1190 	return (KERN_SUCCESS);
1191 }
1192 
1193 /*
1194  *	vm_map_pageable:
1195  *
1196  *	Sets the pageability of the specified address
1197  *	range in the target map.  Regions specified
1198  *	as not pageable require locked-down physical
1199  *	memory and physical page maps.
1200  *
1201  *	The map must not be locked, but a reference
1202  *	must remain to the map throughout the call.
1203  */
1204 int
1205 vm_map_pageable(map, start, end, new_pageable)
1206 	register vm_map_t map;
1207 	register vm_offset_t start;
1208 	register vm_offset_t end;
1209 	register boolean_t new_pageable;
1210 {
1211 	register vm_map_entry_t entry;
1212 	vm_map_entry_t start_entry;
1213 	register vm_offset_t failed = 0;
1214 	int rv;
1215 
1216 	vm_map_lock(map);
1217 
1218 	VM_MAP_RANGE_CHECK(map, start, end);
1219 
1220 	/*
1221 	 * Only one pageability change may take place at one time, since
1222 	 * vm_fault assumes it will be called only once for each
1223 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1224 	 * changing the pageability for the entire region.  We do so before
1225 	 * making any changes.
1226 	 */
1227 
1228 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1229 		vm_map_unlock(map);
1230 		return (KERN_INVALID_ADDRESS);
1231 	}
1232 	entry = start_entry;
1233 
1234 	/*
1235 	 * Actions are rather different for wiring and unwiring, so we have
1236 	 * two separate cases.
1237 	 */
1238 
1239 	if (new_pageable) {
1240 
1241 		vm_map_clip_start(map, entry, start);
1242 
1243 		/*
1244 		 * Unwiring.  First ensure that the range to be unwired is
1245 		 * really wired down and that there are no holes.
1246 		 */
1247 		while ((entry != &map->header) && (entry->start < end)) {
1248 
1249 			if (entry->wired_count == 0 ||
1250 			    (entry->end < end &&
1251 				(entry->next == &map->header ||
1252 				    entry->next->start > entry->end))) {
1253 				vm_map_unlock(map);
1254 				return (KERN_INVALID_ARGUMENT);
1255 			}
1256 			entry = entry->next;
1257 		}
1258 
1259 		/*
1260 		 * Now decrement the wiring count for each region. If a region
1261 		 * becomes completely unwired, unwire its physical pages and
1262 		 * mappings.
1263 		 */
1264 		lock_set_recursive(&map->lock);
1265 
1266 		entry = start_entry;
1267 		while ((entry != &map->header) && (entry->start < end)) {
1268 			vm_map_clip_end(map, entry, end);
1269 
1270 			entry->wired_count--;
1271 			if (entry->wired_count == 0)
1272 				vm_fault_unwire(map, entry->start, entry->end);
1273 
1274 			entry = entry->next;
1275 		}
1276 		lock_clear_recursive(&map->lock);
1277 	} else {
1278 		/*
1279 		 * Wiring.  We must do this in two passes:
1280 		 *
1281 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1282 		 * objects that need to be created. Then we clip each map
1283 		 * entry to the region to be wired and increment its wiring
1284 		 * count.  We create objects before clipping the map entries
1285 		 * to avoid object proliferation.
1286 		 *
1287 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1288 		 * fault in the pages for any newly wired area (wired_count is
1289 		 * 1).
1290 		 *
1291 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1292 		 * deadlock with another thread that may have faulted on one
1293 		 * of the pages to be wired (it would mark the page busy,
1294 		 * blocking us, then in turn block on the map lock that we
1295 		 * hold).  Because of problems in the recursive lock package,
1296 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1297 		 * any actions that require the write lock must be done
1298 		 * beforehand.  Because we keep the read lock on the map, the
1299 		 * copy-on-write status of the entries we modify here cannot
1300 		 * change.
1301 		 */
1302 
1303 		/*
1304 		 * Pass 1.
1305 		 */
1306 		while ((entry != &map->header) && (entry->start < end)) {
1307 			if (entry->wired_count == 0) {
1308 
1309 				/*
1310 				 * Perform actions of vm_map_lookup that need
1311 				 * the write lock on the map: create a shadow
1312 				 * object for a copy-on-write region, or an
1313 				 * object for a zero-fill region.
1314 				 *
1315 				 * We don't have to do this for entries that
1316 				 * point to sharing maps, because we won't
1317 				 * hold the lock on the sharing map.
1318 				 */
1319 				if (!entry->is_a_map && !entry->is_sub_map) {
1320 					if (entry->needs_copy &&
1321 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1322 
1323 						vm_object_shadow(&entry->object.vm_object,
1324 						    &entry->offset,
1325 						    (vm_size_t) (entry->end
1326 							- entry->start));
1327 						entry->needs_copy = FALSE;
1328 					} else if (entry->object.vm_object == NULL) {
1329 						entry->object.vm_object =
1330 						    vm_object_allocate((vm_size_t) (entry->end
1331 							- entry->start));
1332 						entry->offset = (vm_offset_t) 0;
1333 					}
1334 				}
1335 			}
1336 			vm_map_clip_start(map, entry, start);
1337 			vm_map_clip_end(map, entry, end);
1338 			entry->wired_count++;
1339 
1340 			/*
1341 			 * Check for holes
1342 			 */
1343 			if (entry->end < end &&
1344 			    (entry->next == &map->header ||
1345 				entry->next->start > entry->end)) {
1346 				/*
1347 				 * Found one.  Object creation actions do not
1348 				 * need to be undone, but the wired counts
1349 				 * need to be restored.
1350 				 */
1351 				while (entry != &map->header && entry->end > start) {
1352 					entry->wired_count--;
1353 					entry = entry->prev;
1354 				}
1355 				vm_map_unlock(map);
1356 				return (KERN_INVALID_ARGUMENT);
1357 			}
1358 			entry = entry->next;
1359 		}
1360 
1361 		/*
1362 		 * Pass 2.
1363 		 */
1364 
1365 		/*
1366 		 * HACK HACK HACK HACK
1367 		 *
1368 		 * If we are wiring in the kernel map or a submap of it, unlock
1369 		 * the map to avoid deadlocks.  We trust that the kernel
1370 		 * threads are well-behaved, and therefore will not do
1371 		 * anything destructive to this region of the map while we
1372 		 * have it unlocked.  We cannot trust user threads to do the
1373 		 * same.
1374 		 *
1375 		 * HACK HACK HACK HACK
1376 		 */
1377 		if (vm_map_pmap(map) == kernel_pmap) {
1378 			vm_map_unlock(map);	/* trust me ... */
1379 		} else {
1380 			lock_set_recursive(&map->lock);
1381 			lock_write_to_read(&map->lock);
1382 		}
1383 
1384 		rv = 0;
1385 		entry = start_entry;
1386 		while (entry != &map->header && entry->start < end) {
1387 			/*
1388 			 * If vm_fault_wire fails for any page we need to undo
1389 			 * what has been done.  We decrement the wiring count
1390 			 * for those pages which have not yet been wired (now)
1391 			 * and unwire those that have (later).
1392 			 *
1393 			 * XXX this violates the locking protocol on the map,
1394 			 * needs to be fixed.
1395 			 */
1396 			if (rv)
1397 				entry->wired_count--;
1398 			else if (entry->wired_count == 1) {
1399 				rv = vm_fault_wire(map, entry->start, entry->end);
1400 				if (rv) {
1401 					failed = entry->start;
1402 					entry->wired_count--;
1403 				}
1404 			}
1405 			entry = entry->next;
1406 		}
1407 
1408 		if (vm_map_pmap(map) == kernel_pmap) {
1409 			vm_map_lock(map);
1410 		} else {
1411 			lock_clear_recursive(&map->lock);
1412 		}
1413 		if (rv) {
1414 			vm_map_unlock(map);
1415 			(void) vm_map_pageable(map, start, failed, TRUE);
1416 			return (rv);
1417 		}
1418 	}
1419 
1420 	vm_map_unlock(map);
1421 
1422 	return (KERN_SUCCESS);
1423 }
1424 
1425 /*
1426  * vm_map_clean
1427  *
1428  * Push any dirty cached pages in the address range to their pager.
1429  * If syncio is TRUE, dirty pages are written synchronously.
1430  * If invalidate is TRUE, any cached pages are freed as well.
1431  *
1432  * Returns an error if any part of the specified range is not mapped.
1433  */
1434 int
1435 vm_map_clean(map, start, end, syncio, invalidate)
1436 	vm_map_t map;
1437 	vm_offset_t start;
1438 	vm_offset_t end;
1439 	boolean_t syncio;
1440 	boolean_t invalidate;
1441 {
1442 	register vm_map_entry_t current;
1443 	vm_map_entry_t entry;
1444 	vm_size_t size;
1445 	vm_object_t object;
1446 	vm_offset_t offset;
1447 
1448 	vm_map_lock_read(map);
1449 	VM_MAP_RANGE_CHECK(map, start, end);
1450 	if (!vm_map_lookup_entry(map, start, &entry)) {
1451 		vm_map_unlock_read(map);
1452 		return (KERN_INVALID_ADDRESS);
1453 	}
1454 	/*
1455 	 * Make a first pass to check for holes.
1456 	 */
1457 	for (current = entry; current->start < end; current = current->next) {
1458 		if (current->is_sub_map) {
1459 			vm_map_unlock_read(map);
1460 			return (KERN_INVALID_ARGUMENT);
1461 		}
1462 		if (end > current->end &&
1463 		    (current->next == &map->header ||
1464 			current->end != current->next->start)) {
1465 			vm_map_unlock_read(map);
1466 			return (KERN_INVALID_ADDRESS);
1467 		}
1468 	}
1469 
1470 	/*
1471 	 * Make a second pass, cleaning/uncaching pages from the indicated
1472 	 * objects as we go.
1473 	 */
1474 	for (current = entry; current->start < end; current = current->next) {
1475 		offset = current->offset + (start - current->start);
1476 		size = (end <= current->end ? end : current->end) - start;
1477 		if (current->is_a_map || current->is_sub_map) {
1478 			register vm_map_t smap;
1479 			vm_map_entry_t tentry;
1480 			vm_size_t tsize;
1481 
1482 			smap = current->object.share_map;
1483 			vm_map_lock_read(smap);
1484 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1485 			tsize = tentry->end - offset;
1486 			if (tsize < size)
1487 				size = tsize;
1488 			object = tentry->object.vm_object;
1489 			offset = tentry->offset + (offset - tentry->start);
1490 			vm_map_unlock_read(smap);
1491 		} else {
1492 			object = current->object.vm_object;
1493 		}
1494 		if (object && (object->pager != NULL) &&
1495 		    (object->pager->pg_type == PG_VNODE)) {
1496 			vm_object_lock(object);
1497 			/*
1498 			 * Flush pages if writing is allowed. XXX should we continue
1499 			 * on an error?
1500 			 */
1501 			if ((current->protection & VM_PROT_WRITE) &&
1502 		   	    !vm_object_page_clean(object, offset, offset + size,
1503 			    syncio, FALSE)) {
1504 				vm_object_unlock(object);
1505 				vm_map_unlock_read(map);
1506 				return (KERN_FAILURE);
1507 			}
1508 			if (invalidate)
1509 				vm_object_page_remove(object, offset, offset + size);
1510 			vm_object_unlock(object);
1511 		}
1512 		start += size;
1513 	}
1514 
1515 	vm_map_unlock_read(map);
1516 	return (KERN_SUCCESS);
1517 }
1518 
1519 /*
1520  *	vm_map_entry_unwire:	[ internal use only ]
1521  *
1522  *	Make the region specified by this entry pageable.
1523  *
1524  *	The map in question should be locked.
1525  *	[This is the reason for this routine's existence.]
1526  */
1527 void
1528 vm_map_entry_unwire(map, entry)
1529 	vm_map_t map;
1530 	register vm_map_entry_t entry;
1531 {
1532 	vm_fault_unwire(map, entry->start, entry->end);
1533 	entry->wired_count = 0;
1534 }
1535 
1536 /*
1537  *	vm_map_entry_delete:	[ internal use only ]
1538  *
1539  *	Deallocate the given entry from the target map.
1540  */
1541 void
1542 vm_map_entry_delete(map, entry)
1543 	register vm_map_t map;
1544 	register vm_map_entry_t entry;
1545 {
1546 	if (entry->wired_count != 0)
1547 		vm_map_entry_unwire(map, entry);
1548 
1549 	vm_map_entry_unlink(map, entry);
1550 	map->size -= entry->end - entry->start;
1551 
1552 	if (entry->is_a_map || entry->is_sub_map)
1553 		vm_map_deallocate(entry->object.share_map);
1554 	else
1555 		vm_object_deallocate(entry->object.vm_object);
1556 
1557 	vm_map_entry_dispose(map, entry);
1558 }
1559 
1560 /*
1561  *	vm_map_delete:	[ internal use only ]
1562  *
1563  *	Deallocates the given address range from the target
1564  *	map.
1565  *
1566  *	When called with a sharing map, removes pages from
1567  *	that region from all physical maps.
1568  */
1569 int
1570 vm_map_delete(map, start, end)
1571 	register vm_map_t map;
1572 	vm_offset_t start;
1573 	register vm_offset_t end;
1574 {
1575 	register vm_map_entry_t entry;
1576 	vm_map_entry_t first_entry;
1577 
1578 	/*
1579 	 * Find the start of the region, and clip it
1580 	 */
1581 
1582 	if (!vm_map_lookup_entry(map, start, &first_entry))
1583 		entry = first_entry->next;
1584 	else {
1585 		entry = first_entry;
1586 		vm_map_clip_start(map, entry, start);
1587 
1588 		/*
1589 		 * Fix the lookup hint now, rather than each time though the
1590 		 * loop.
1591 		 */
1592 
1593 		SAVE_HINT(map, entry->prev);
1594 	}
1595 
1596 	/*
1597 	 * Save the free space hint
1598 	 */
1599 
1600 	if (map->first_free->start >= start)
1601 		map->first_free = entry->prev;
1602 
1603 	/*
1604 	 * Step through all entries in this region
1605 	 */
1606 
1607 	while ((entry != &map->header) && (entry->start < end)) {
1608 		vm_map_entry_t next;
1609 		register vm_offset_t s, e;
1610 		register vm_object_t object;
1611 
1612 		vm_map_clip_end(map, entry, end);
1613 
1614 		next = entry->next;
1615 		s = entry->start;
1616 		e = entry->end;
1617 
1618 		/*
1619 		 * Unwire before removing addresses from the pmap; otherwise,
1620 		 * unwiring will put the entries back in the pmap.
1621 		 */
1622 
1623 		object = entry->object.vm_object;
1624 		if (entry->wired_count != 0)
1625 			vm_map_entry_unwire(map, entry);
1626 
1627 		/*
1628 		 * If this is a sharing map, we must remove *all* references
1629 		 * to this data, since we can't find all of the physical maps
1630 		 * which are sharing it.
1631 		 */
1632 
1633 		if (object == kernel_object || object == kmem_object)
1634 			vm_object_page_remove(object, entry->offset,
1635 			    entry->offset + (e - s));
1636 		else if (!map->is_main_map)
1637 			vm_object_pmap_remove(object,
1638 			    entry->offset,
1639 			    entry->offset + (e - s));
1640 		else
1641 			pmap_remove(map->pmap, s, e);
1642 
1643 		/*
1644 		 * Delete the entry (which may delete the object) only after
1645 		 * removing all pmap entries pointing to its pages.
1646 		 * (Otherwise, its page frames may be reallocated, and any
1647 		 * modify bits will be set in the wrong object!)
1648 		 */
1649 
1650 		vm_map_entry_delete(map, entry);
1651 		entry = next;
1652 	}
1653 	return (KERN_SUCCESS);
1654 }
1655 
1656 /*
1657  *	vm_map_remove:
1658  *
1659  *	Remove the given address range from the target map.
1660  *	This is the exported form of vm_map_delete.
1661  */
1662 int
1663 vm_map_remove(map, start, end)
1664 	register vm_map_t map;
1665 	register vm_offset_t start;
1666 	register vm_offset_t end;
1667 {
1668 	register int result, s = 0;
1669 
1670 	if (map == kmem_map)
1671 		s = splhigh();
1672 
1673 	vm_map_lock(map);
1674 	VM_MAP_RANGE_CHECK(map, start, end);
1675 	result = vm_map_delete(map, start, end);
1676 	vm_map_unlock(map);
1677 
1678 	if (map == kmem_map)
1679 		splx(s);
1680 
1681 	return (result);
1682 }
1683 
1684 /*
1685  *	vm_map_check_protection:
1686  *
1687  *	Assert that the target map allows the specified
1688  *	privilege on the entire address region given.
1689  *	The entire region must be allocated.
1690  */
1691 boolean_t
1692 vm_map_check_protection(map, start, end, protection)
1693 	register vm_map_t map;
1694 	register vm_offset_t start;
1695 	register vm_offset_t end;
1696 	register vm_prot_t protection;
1697 {
1698 	register vm_map_entry_t entry;
1699 	vm_map_entry_t tmp_entry;
1700 
1701 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1702 		return (FALSE);
1703 	}
1704 	entry = tmp_entry;
1705 
1706 	while (start < end) {
1707 		if (entry == &map->header) {
1708 			return (FALSE);
1709 		}
1710 		/*
1711 		 * No holes allowed!
1712 		 */
1713 
1714 		if (start < entry->start) {
1715 			return (FALSE);
1716 		}
1717 		/*
1718 		 * Check protection associated with entry.
1719 		 */
1720 
1721 		if ((entry->protection & protection) != protection) {
1722 			return (FALSE);
1723 		}
1724 		/* go to next entry */
1725 
1726 		start = entry->end;
1727 		entry = entry->next;
1728 	}
1729 	return (TRUE);
1730 }
1731 
1732 /*
1733  *	vm_map_copy_entry:
1734  *
1735  *	Copies the contents of the source entry to the destination
1736  *	entry.  The entries *must* be aligned properly.
1737  */
1738 void
1739 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
1740 	vm_map_t src_map, dst_map;
1741 	register vm_map_entry_t src_entry, dst_entry;
1742 {
1743 	vm_object_t temp_object;
1744 
1745 	if (src_entry->is_sub_map || dst_entry->is_sub_map)
1746 		return;
1747 
1748 	if (dst_entry->object.vm_object != NULL &&
1749 	    (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
1750 		printf("vm_map_copy_entry: copying over permanent data!\n");
1751 
1752 	/*
1753 	 * If our destination map was wired down, unwire it now.
1754 	 */
1755 
1756 	if (dst_entry->wired_count != 0)
1757 		vm_map_entry_unwire(dst_map, dst_entry);
1758 
1759 	/*
1760 	 * If we're dealing with a sharing map, we must remove the destination
1761 	 * pages from all maps (since we cannot know which maps this sharing
1762 	 * map belongs in).
1763 	 */
1764 
1765 	if (dst_map->is_main_map)
1766 		pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
1767 	else
1768 		vm_object_pmap_remove(dst_entry->object.vm_object,
1769 		    dst_entry->offset,
1770 		    dst_entry->offset +
1771 		    (dst_entry->end - dst_entry->start));
1772 
1773 	if (src_entry->wired_count == 0) {
1774 
1775 		boolean_t src_needs_copy;
1776 
1777 		/*
1778 		 * If the source entry is marked needs_copy, it is already
1779 		 * write-protected.
1780 		 */
1781 		if (!src_entry->needs_copy) {
1782 
1783 			boolean_t su;
1784 
1785 			/*
1786 			 * If the source entry has only one mapping, we can
1787 			 * just protect the virtual address range.
1788 			 */
1789 			if (!(su = src_map->is_main_map)) {
1790 				simple_lock(&src_map->ref_lock);
1791 				su = (src_map->ref_count == 1);
1792 				simple_unlock(&src_map->ref_lock);
1793 			}
1794 			if (su) {
1795 				pmap_protect(src_map->pmap,
1796 				    src_entry->start,
1797 				    src_entry->end,
1798 				    src_entry->protection & ~VM_PROT_WRITE);
1799 			} else {
1800 				vm_object_pmap_copy(src_entry->object.vm_object,
1801 				    src_entry->offset,
1802 				    src_entry->offset + (src_entry->end
1803 					- src_entry->start));
1804 			}
1805 		}
1806 		/*
1807 		 * Make a copy of the object.
1808 		 */
1809 		temp_object = dst_entry->object.vm_object;
1810 		vm_object_copy(src_entry->object.vm_object,
1811 		    src_entry->offset,
1812 		    (vm_size_t) (src_entry->end -
1813 			src_entry->start),
1814 		    &dst_entry->object.vm_object,
1815 		    &dst_entry->offset,
1816 		    &src_needs_copy);
1817 		/*
1818 		 * If we didn't get a copy-object now, mark the source map
1819 		 * entry so that a shadow will be created to hold its changed
1820 		 * pages.
1821 		 */
1822 		if (src_needs_copy)
1823 			src_entry->needs_copy = TRUE;
1824 
1825 		/*
1826 		 * The destination always needs to have a shadow created.
1827 		 */
1828 		dst_entry->needs_copy = TRUE;
1829 
1830 		/*
1831 		 * Mark the entries copy-on-write, so that write-enabling the
1832 		 * entry won't make copy-on-write pages writable.
1833 		 */
1834 		src_entry->copy_on_write = TRUE;
1835 		dst_entry->copy_on_write = TRUE;
1836 		/*
1837 		 * Get rid of the old object.
1838 		 */
1839 		vm_object_deallocate(temp_object);
1840 
1841 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
1842 		    dst_entry->end - dst_entry->start, src_entry->start);
1843 	} else {
1844 		/*
1845 		 * Of course, wired down pages can't be set copy-on-write.
1846 		 * Cause wired pages to be copied into the new map by
1847 		 * simulating faults (the new pages are pageable)
1848 		 */
1849 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
1850 	}
1851 }
1852 
1853 /*
1854  *	vm_map_copy:
1855  *
1856  *	Perform a virtual memory copy from the source
1857  *	address map/range to the destination map/range.
1858  *
1859  *	If src_destroy or dst_alloc is requested,
1860  *	the source and destination regions should be
1861  *	disjoint, not only in the top-level map, but
1862  *	in the sharing maps as well.  [The best way
1863  *	to guarantee this is to use a new intermediate
1864  *	map to make copies.  This also reduces map
1865  *	fragmentation.]
1866  */
1867 int
1868 vm_map_copy(dst_map, src_map,
1869     dst_addr, len, src_addr,
1870     dst_alloc, src_destroy)
1871 	vm_map_t dst_map;
1872 	vm_map_t src_map;
1873 	vm_offset_t dst_addr;
1874 	vm_size_t len;
1875 	vm_offset_t src_addr;
1876 	boolean_t dst_alloc;
1877 	boolean_t src_destroy;
1878 {
1879 	register
1880 	vm_map_entry_t src_entry;
1881 	register
1882 	vm_map_entry_t dst_entry;
1883 	vm_map_entry_t tmp_entry;
1884 	vm_offset_t src_start;
1885 	vm_offset_t src_end;
1886 	vm_offset_t dst_start;
1887 	vm_offset_t dst_end;
1888 	vm_offset_t src_clip;
1889 	vm_offset_t dst_clip;
1890 	int result;
1891 	boolean_t old_src_destroy;
1892 
1893 	/*
1894 	 * XXX While we figure out why src_destroy screws up, we'll do it by
1895 	 * explicitly vm_map_delete'ing at the end.
1896 	 */
1897 
1898 	old_src_destroy = src_destroy;
1899 	src_destroy = FALSE;
1900 
1901 	/*
1902 	 * Compute start and end of region in both maps
1903 	 */
1904 
1905 	src_start = src_addr;
1906 	src_end = src_start + len;
1907 	dst_start = dst_addr;
1908 	dst_end = dst_start + len;
1909 
1910 	/*
1911 	 * Check that the region can exist in both source and destination.
1912 	 */
1913 
1914 	if ((dst_end < dst_start) || (src_end < src_start))
1915 		return (KERN_NO_SPACE);
1916 
1917 	/*
1918 	 * Lock the maps in question -- we avoid deadlock by ordering lock
1919 	 * acquisition by map value
1920 	 */
1921 
1922 	if (src_map == dst_map) {
1923 		vm_map_lock(src_map);
1924 	} else if ((int) src_map < (int) dst_map) {
1925 		vm_map_lock(src_map);
1926 		vm_map_lock(dst_map);
1927 	} else {
1928 		vm_map_lock(dst_map);
1929 		vm_map_lock(src_map);
1930 	}
1931 
1932 	result = KERN_SUCCESS;
1933 
1934 	/*
1935 	 * Check protections... source must be completely readable and
1936 	 * destination must be completely writable.  [Note that if we're
1937 	 * allocating the destination region, we don't have to worry about
1938 	 * protection, but instead about whether the region exists.]
1939 	 */
1940 
1941 	if (src_map->is_main_map && dst_map->is_main_map) {
1942 		if (!vm_map_check_protection(src_map, src_start, src_end,
1943 			VM_PROT_READ)) {
1944 			result = KERN_PROTECTION_FAILURE;
1945 			goto Return;
1946 		}
1947 		if (dst_alloc) {
1948 			/* XXX Consider making this a vm_map_find instead */
1949 			if ((result = vm_map_insert(dst_map, NULL,
1950 				    (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
1951 				goto Return;
1952 		} else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
1953 			VM_PROT_WRITE)) {
1954 			result = KERN_PROTECTION_FAILURE;
1955 			goto Return;
1956 		}
1957 	}
1958 	/*
1959 	 * Find the start entries and clip.
1960 	 *
1961 	 * Note that checking protection asserts that the lookup cannot fail.
1962 	 *
1963 	 * Also note that we wait to do the second lookup until we have done the
1964 	 * first clip, as the clip may affect which entry we get!
1965 	 */
1966 
1967 	(void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
1968 	src_entry = tmp_entry;
1969 	vm_map_clip_start(src_map, src_entry, src_start);
1970 
1971 	(void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
1972 	dst_entry = tmp_entry;
1973 	vm_map_clip_start(dst_map, dst_entry, dst_start);
1974 
1975 	/*
1976 	 * If both source and destination entries are the same, retry the
1977 	 * first lookup, as it may have changed.
1978 	 */
1979 
1980 	if (src_entry == dst_entry) {
1981 		(void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
1982 		src_entry = tmp_entry;
1983 	}
1984 	/*
1985 	 * If source and destination entries are still the same, a null copy
1986 	 * is being performed.
1987 	 */
1988 
1989 	if (src_entry == dst_entry)
1990 		goto Return;
1991 
1992 	/*
1993 	 * Go through entries until we get to the end of the region.
1994 	 */
1995 
1996 	while (src_start < src_end) {
1997 		/*
1998 		 * Clip the entries to the endpoint of the entire region.
1999 		 */
2000 
2001 		vm_map_clip_end(src_map, src_entry, src_end);
2002 		vm_map_clip_end(dst_map, dst_entry, dst_end);
2003 
2004 		/*
2005 		 * Clip each entry to the endpoint of the other entry.
2006 		 */
2007 
2008 		src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
2009 		vm_map_clip_end(src_map, src_entry, src_clip);
2010 
2011 		dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
2012 		vm_map_clip_end(dst_map, dst_entry, dst_clip);
2013 
2014 		/*
2015 		 * Both entries now match in size and relative endpoints.
2016 		 *
2017 		 * If both entries refer to a VM object, we can deal with them
2018 		 * now.
2019 		 */
2020 
2021 		if (!src_entry->is_a_map && !dst_entry->is_a_map) {
2022 			vm_map_copy_entry(src_map, dst_map, src_entry,
2023 			    dst_entry);
2024 		} else {
2025 			register vm_map_t new_dst_map;
2026 			vm_offset_t new_dst_start;
2027 			vm_size_t new_size;
2028 			vm_map_t new_src_map;
2029 			vm_offset_t new_src_start;
2030 
2031 			/*
2032 			 * We have to follow at least one sharing map.
2033 			 */
2034 
2035 			new_size = (dst_entry->end - dst_entry->start);
2036 
2037 			if (src_entry->is_a_map) {
2038 				new_src_map = src_entry->object.share_map;
2039 				new_src_start = src_entry->offset;
2040 			} else {
2041 				new_src_map = src_map;
2042 				new_src_start = src_entry->start;
2043 				lock_set_recursive(&src_map->lock);
2044 			}
2045 
2046 			if (dst_entry->is_a_map) {
2047 				vm_offset_t new_dst_end;
2048 
2049 				new_dst_map = dst_entry->object.share_map;
2050 				new_dst_start = dst_entry->offset;
2051 
2052 				/*
2053 				 * Since the destination sharing entries will
2054 				 * be merely deallocated, we can do that now,
2055 				 * and replace the region with a null object.
2056 				 * [This prevents splitting the source map to
2057 				 * match the form of the destination map.]
2058 				 * Note that we can only do so if the source
2059 				 * and destination do not overlap.
2060 				 */
2061 
2062 				new_dst_end = new_dst_start + new_size;
2063 
2064 				if (new_dst_map != new_src_map) {
2065 					vm_map_lock(new_dst_map);
2066 					(void) vm_map_delete(new_dst_map,
2067 					    new_dst_start,
2068 					    new_dst_end);
2069 					(void) vm_map_insert(new_dst_map,
2070 					    NULL,
2071 					    (vm_offset_t) 0,
2072 					    new_dst_start,
2073 					    new_dst_end);
2074 					vm_map_unlock(new_dst_map);
2075 				}
2076 			} else {
2077 				new_dst_map = dst_map;
2078 				new_dst_start = dst_entry->start;
2079 				lock_set_recursive(&dst_map->lock);
2080 			}
2081 
2082 			/*
2083 			 * Recursively copy the sharing map.
2084 			 */
2085 
2086 			(void) vm_map_copy(new_dst_map, new_src_map,
2087 			    new_dst_start, new_size, new_src_start,
2088 			    FALSE, FALSE);
2089 
2090 			if (dst_map == new_dst_map)
2091 				lock_clear_recursive(&dst_map->lock);
2092 			if (src_map == new_src_map)
2093 				lock_clear_recursive(&src_map->lock);
2094 		}
2095 
2096 		/*
2097 		 * Update variables for next pass through the loop.
2098 		 */
2099 
2100 		src_start = src_entry->end;
2101 		src_entry = src_entry->next;
2102 		dst_start = dst_entry->end;
2103 		dst_entry = dst_entry->next;
2104 
2105 		/*
2106 		 * If the source is to be destroyed, here is the place to do
2107 		 * it.
2108 		 */
2109 
2110 		if (src_destroy && src_map->is_main_map &&
2111 		    dst_map->is_main_map)
2112 			vm_map_entry_delete(src_map, src_entry->prev);
2113 	}
2114 
2115 	/*
2116 	 * Update the physical maps as appropriate
2117 	 */
2118 
2119 	if (src_map->is_main_map && dst_map->is_main_map) {
2120 		if (src_destroy)
2121 			pmap_remove(src_map->pmap, src_addr, src_addr + len);
2122 	}
2123 	/*
2124 	 * Unlock the maps
2125 	 */
2126 
2127 Return:;
2128 
2129 	if (old_src_destroy)
2130 		vm_map_delete(src_map, src_addr, src_addr + len);
2131 
2132 	vm_map_unlock(src_map);
2133 	if (src_map != dst_map)
2134 		vm_map_unlock(dst_map);
2135 
2136 	return (result);
2137 }
2138 
2139 /*
2140  * vmspace_fork:
2141  * Create a new process vmspace structure and vm_map
2142  * based on those of an existing process.  The new map
2143  * is based on the old map, according to the inheritance
2144  * values on the regions in that map.
2145  *
2146  * The source map must not be locked.
2147  */
2148 struct vmspace *
2149 vmspace_fork(vm1)
2150 	register struct vmspace *vm1;
2151 {
2152 	register struct vmspace *vm2;
2153 	vm_map_t old_map = &vm1->vm_map;
2154 	vm_map_t new_map;
2155 	vm_map_entry_t old_entry;
2156 	vm_map_entry_t new_entry;
2157 	pmap_t new_pmap;
2158 
2159 	vm_map_lock(old_map);
2160 
2161 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
2162 	    old_map->entries_pageable);
2163 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2164 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2165 	new_pmap = &vm2->vm_pmap;	/* XXX */
2166 	new_map = &vm2->vm_map;	/* XXX */
2167 
2168 	old_entry = old_map->header.next;
2169 
2170 	while (old_entry != &old_map->header) {
2171 		if (old_entry->is_sub_map)
2172 			panic("vm_map_fork: encountered a submap");
2173 
2174 		switch (old_entry->inheritance) {
2175 		case VM_INHERIT_NONE:
2176 			break;
2177 
2178 		case VM_INHERIT_SHARE:
2179 			/*
2180 			 * If we don't already have a sharing map:
2181 			 */
2182 
2183 			if (!old_entry->is_a_map) {
2184 				vm_map_t new_share_map;
2185 				vm_map_entry_t new_share_entry;
2186 
2187 				/*
2188 				 * Create a new sharing map
2189 				 */
2190 
2191 				new_share_map = vm_map_create(NULL,
2192 				    old_entry->start,
2193 				    old_entry->end,
2194 				    TRUE);
2195 				new_share_map->is_main_map = FALSE;
2196 
2197 				/*
2198 				 * Create the only sharing entry from the old
2199 				 * task map entry.
2200 				 */
2201 
2202 				new_share_entry =
2203 				    vm_map_entry_create(new_share_map);
2204 				*new_share_entry = *old_entry;
2205 				new_share_entry->wired_count = 0;
2206 
2207 				/*
2208 				 * Insert the entry into the new sharing map
2209 				 */
2210 
2211 				vm_map_entry_link(new_share_map,
2212 				    new_share_map->header.prev,
2213 				    new_share_entry);
2214 
2215 				/*
2216 				 * Fix up the task map entry to refer to the
2217 				 * sharing map now.
2218 				 */
2219 
2220 				old_entry->is_a_map = TRUE;
2221 				old_entry->object.share_map = new_share_map;
2222 				old_entry->offset = old_entry->start;
2223 			}
2224 			/*
2225 			 * Clone the entry, referencing the sharing map.
2226 			 */
2227 
2228 			new_entry = vm_map_entry_create(new_map);
2229 			*new_entry = *old_entry;
2230 			new_entry->wired_count = 0;
2231 			vm_map_reference(new_entry->object.share_map);
2232 
2233 			/*
2234 			 * Insert the entry into the new map -- we know we're
2235 			 * inserting at the end of the new map.
2236 			 */
2237 
2238 			vm_map_entry_link(new_map, new_map->header.prev,
2239 			    new_entry);
2240 
2241 			/*
2242 			 * Update the physical map
2243 			 */
2244 
2245 			pmap_copy(new_map->pmap, old_map->pmap,
2246 			    new_entry->start,
2247 			    (old_entry->end - old_entry->start),
2248 			    old_entry->start);
2249 			break;
2250 
2251 		case VM_INHERIT_COPY:
2252 			/*
2253 			 * Clone the entry and link into the map.
2254 			 */
2255 
2256 			new_entry = vm_map_entry_create(new_map);
2257 			*new_entry = *old_entry;
2258 			new_entry->wired_count = 0;
2259 			new_entry->object.vm_object = NULL;
2260 			new_entry->is_a_map = FALSE;
2261 			vm_map_entry_link(new_map, new_map->header.prev,
2262 			    new_entry);
2263 			if (old_entry->is_a_map) {
2264 				int check;
2265 
2266 				check = vm_map_copy(new_map,
2267 				    old_entry->object.share_map,
2268 				    new_entry->start,
2269 				    (vm_size_t) (new_entry->end -
2270 					new_entry->start),
2271 				    old_entry->offset,
2272 				    FALSE, FALSE);
2273 				if (check != KERN_SUCCESS)
2274 					printf("vm_map_fork: copy in share_map region failed\n");
2275 			} else {
2276 				vm_map_copy_entry(old_map, new_map, old_entry,
2277 				    new_entry);
2278 			}
2279 			break;
2280 		}
2281 		old_entry = old_entry->next;
2282 	}
2283 
2284 	new_map->size = old_map->size;
2285 	vm_map_unlock(old_map);
2286 
2287 	return (vm2);
2288 }
2289 
2290 /*
2291  *	vm_map_lookup:
2292  *
2293  *	Finds the VM object, offset, and
2294  *	protection for a given virtual address in the
2295  *	specified map, assuming a page fault of the
2296  *	type specified.
2297  *
2298  *	Leaves the map in question locked for read; return
2299  *	values are guaranteed until a vm_map_lookup_done
2300  *	call is performed.  Note that the map argument
2301  *	is in/out; the returned map must be used in
2302  *	the call to vm_map_lookup_done.
2303  *
2304  *	A handle (out_entry) is returned for use in
2305  *	vm_map_lookup_done, to make that fast.
2306  *
2307  *	If a lookup is requested with "write protection"
2308  *	specified, the map may be changed to perform virtual
2309  *	copying operations, although the data referenced will
2310  *	remain the same.
2311  */
2312 int
2313 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
2314     object, offset, out_prot, wired, single_use)
2315 	vm_map_t *var_map;	/* IN/OUT */
2316 	register vm_offset_t vaddr;
2317 	register vm_prot_t fault_type;
2318 
2319 	vm_map_entry_t *out_entry;	/* OUT */
2320 	vm_object_t *object;	/* OUT */
2321 	vm_offset_t *offset;	/* OUT */
2322 	vm_prot_t *out_prot;	/* OUT */
2323 	boolean_t *wired;	/* OUT */
2324 	boolean_t *single_use;	/* OUT */
2325 {
2326 	vm_map_t share_map;
2327 	vm_offset_t share_offset;
2328 	register vm_map_entry_t entry;
2329 	register vm_map_t map = *var_map;
2330 	register vm_prot_t prot;
2331 	register boolean_t su;
2332 
2333 RetryLookup:;
2334 
2335 	/*
2336 	 * Lookup the faulting address.
2337 	 */
2338 
2339 	vm_map_lock_read(map);
2340 
2341 #define	RETURN(why) \
2342 		{ \
2343 		vm_map_unlock_read(map); \
2344 		return(why); \
2345 		}
2346 
2347 	/*
2348 	 * If the map has an interesting hint, try it before calling full
2349 	 * blown lookup routine.
2350 	 */
2351 
2352 	simple_lock(&map->hint_lock);
2353 	entry = map->hint;
2354 	simple_unlock(&map->hint_lock);
2355 
2356 	*out_entry = entry;
2357 
2358 	if ((entry == &map->header) ||
2359 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2360 		vm_map_entry_t tmp_entry;
2361 
2362 		/*
2363 		 * Entry was either not a valid hint, or the vaddr was not
2364 		 * contained in the entry, so do a full lookup.
2365 		 */
2366 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2367 			RETURN(KERN_INVALID_ADDRESS);
2368 
2369 		entry = tmp_entry;
2370 		*out_entry = entry;
2371 	}
2372 	/*
2373 	 * Handle submaps.
2374 	 */
2375 
2376 	if (entry->is_sub_map) {
2377 		vm_map_t old_map = map;
2378 
2379 		*var_map = map = entry->object.sub_map;
2380 		vm_map_unlock_read(old_map);
2381 		goto RetryLookup;
2382 	}
2383 	/*
2384 	 * Check whether this task is allowed to have this page.
2385 	 */
2386 
2387 	prot = entry->protection;
2388 	if ((fault_type & (prot)) != fault_type)
2389 		RETURN(KERN_PROTECTION_FAILURE);
2390 
2391 	/*
2392 	 * If this page is not pageable, we have to get it for all possible
2393 	 * accesses.
2394 	 */
2395 
2396 	*wired = (entry->wired_count != 0);
2397 	if (*wired)
2398 		prot = fault_type = entry->protection;
2399 
2400 	/*
2401 	 * If we don't already have a VM object, track it down.
2402 	 */
2403 
2404 	su = !entry->is_a_map;
2405 	if (su) {
2406 		share_map = map;
2407 		share_offset = vaddr;
2408 	} else {
2409 		vm_map_entry_t share_entry;
2410 
2411 		/*
2412 		 * Compute the sharing map, and offset into it.
2413 		 */
2414 
2415 		share_map = entry->object.share_map;
2416 		share_offset = (vaddr - entry->start) + entry->offset;
2417 
2418 		/*
2419 		 * Look for the backing store object and offset
2420 		 */
2421 
2422 		vm_map_lock_read(share_map);
2423 
2424 		if (!vm_map_lookup_entry(share_map, share_offset,
2425 			&share_entry)) {
2426 			vm_map_unlock_read(share_map);
2427 			RETURN(KERN_INVALID_ADDRESS);
2428 		}
2429 		entry = share_entry;
2430 	}
2431 
2432 	/*
2433 	 * If the entry was copy-on-write, we either ...
2434 	 */
2435 
2436 	if (entry->needs_copy) {
2437 		/*
2438 		 * If we want to write the page, we may as well handle that
2439 		 * now since we've got the sharing map locked.
2440 		 *
2441 		 * If we don't need to write the page, we just demote the
2442 		 * permissions allowed.
2443 		 */
2444 
2445 		if (fault_type & VM_PROT_WRITE) {
2446 			/*
2447 			 * Make a new object, and place it in the object
2448 			 * chain.  Note that no new references have appeared
2449 			 * -- one just moved from the share map to the new
2450 			 * object.
2451 			 */
2452 
2453 			if (lock_read_to_write(&share_map->lock)) {
2454 				if (share_map != map)
2455 					vm_map_unlock_read(map);
2456 				goto RetryLookup;
2457 			}
2458 			vm_object_shadow(
2459 			    &entry->object.vm_object,
2460 			    &entry->offset,
2461 			    (vm_size_t) (entry->end - entry->start));
2462 
2463 			entry->needs_copy = FALSE;
2464 
2465 			lock_write_to_read(&share_map->lock);
2466 		} else {
2467 			/*
2468 			 * We're attempting to read a copy-on-write page --
2469 			 * don't allow writes.
2470 			 */
2471 
2472 			prot &= (~VM_PROT_WRITE);
2473 		}
2474 	}
2475 	/*
2476 	 * Create an object if necessary.
2477 	 */
2478 	if (entry->object.vm_object == NULL) {
2479 
2480 		if (lock_read_to_write(&share_map->lock)) {
2481 			if (share_map != map)
2482 				vm_map_unlock_read(map);
2483 			goto RetryLookup;
2484 		}
2485 		entry->object.vm_object = vm_object_allocate(
2486 		    (vm_size_t) (entry->end - entry->start));
2487 		entry->offset = 0;
2488 		lock_write_to_read(&share_map->lock);
2489 	}
2490 	/*
2491 	 * Return the object/offset from this entry.  If the entry was
2492 	 * copy-on-write or empty, it has been fixed up.
2493 	 */
2494 
2495 	*offset = (share_offset - entry->start) + entry->offset;
2496 	*object = entry->object.vm_object;
2497 
2498 	/*
2499 	 * Return whether this is the only map sharing this data.
2500 	 */
2501 
2502 	if (!su) {
2503 		simple_lock(&share_map->ref_lock);
2504 		su = (share_map->ref_count == 1);
2505 		simple_unlock(&share_map->ref_lock);
2506 	}
2507 	*out_prot = prot;
2508 	*single_use = su;
2509 
2510 	return (KERN_SUCCESS);
2511 
2512 #undef	RETURN
2513 }
2514 
2515 /*
2516  *	vm_map_lookup_done:
2517  *
2518  *	Releases locks acquired by a vm_map_lookup
2519  *	(according to the handle returned by that lookup).
2520  */
2521 
2522 void
2523 vm_map_lookup_done(map, entry)
2524 	register vm_map_t map;
2525 	vm_map_entry_t entry;
2526 {
2527 	/*
2528 	 * If this entry references a map, unlock it first.
2529 	 */
2530 
2531 	if (entry->is_a_map)
2532 		vm_map_unlock_read(entry->object.share_map);
2533 
2534 	/*
2535 	 * Unlock the main-level map
2536 	 */
2537 
2538 	vm_map_unlock_read(map);
2539 }
2540 
2541 /*
2542  *	Routine:	vm_map_simplify
2543  *	Purpose:
2544  *		Attempt to simplify the map representation in
2545  *		the vicinity of the given starting address.
2546  *	Note:
2547  *		This routine is intended primarily to keep the
2548  *		kernel maps more compact -- they generally don't
2549  *		benefit from the "expand a map entry" technology
2550  *		at allocation time because the adjacent entry
2551  *		is often wired down.
2552  */
2553 void
2554 vm_map_simplify(map, start)
2555 	vm_map_t map;
2556 	vm_offset_t start;
2557 {
2558 	vm_map_entry_t this_entry;
2559 	vm_map_entry_t prev_entry;
2560 
2561 	vm_map_lock(map);
2562 	if (
2563 	    (vm_map_lookup_entry(map, start, &this_entry)) &&
2564 	    ((prev_entry = this_entry->prev) != &map->header) &&
2565 
2566 	    (prev_entry->end == start) &&
2567 	    (map->is_main_map) &&
2568 
2569 	    (prev_entry->is_a_map == FALSE) &&
2570 	    (prev_entry->is_sub_map == FALSE) &&
2571 
2572 	    (this_entry->is_a_map == FALSE) &&
2573 	    (this_entry->is_sub_map == FALSE) &&
2574 
2575 	    (prev_entry->inheritance == this_entry->inheritance) &&
2576 	    (prev_entry->protection == this_entry->protection) &&
2577 	    (prev_entry->max_protection == this_entry->max_protection) &&
2578 	    (prev_entry->wired_count == this_entry->wired_count) &&
2579 
2580 	    (prev_entry->copy_on_write == this_entry->copy_on_write) &&
2581 	    (prev_entry->needs_copy == this_entry->needs_copy) &&
2582 
2583 	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
2584 	    ((prev_entry->offset + (prev_entry->end - prev_entry->start))
2585 		== this_entry->offset)
2586 	    ) {
2587 		if (map->first_free == this_entry)
2588 			map->first_free = prev_entry;
2589 
2590 		if (!this_entry->object.vm_object->paging_in_progress) {
2591 			SAVE_HINT(map, prev_entry);
2592 			vm_map_entry_unlink(map, this_entry);
2593 			prev_entry->end = this_entry->end;
2594 			vm_object_deallocate(this_entry->object.vm_object);
2595 			vm_map_entry_dispose(map, this_entry);
2596 		}
2597 	}
2598 	vm_map_unlock(map);
2599 }
2600 
2601 /*
2602  *	vm_map_print:	[ debug ]
2603  */
2604 void
2605 vm_map_print(map, full)
2606 	register vm_map_t map;
2607 	boolean_t full;
2608 {
2609 	register vm_map_entry_t entry;
2610 	extern int indent;
2611 
2612 	iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
2613 	    (map->is_main_map ? "Task" : "Share"),
2614 	    (int) map, (int) (map->pmap), map->ref_count, map->nentries,
2615 	    map->timestamp);
2616 
2617 	if (!full && indent)
2618 		return;
2619 
2620 	indent += 2;
2621 	for (entry = map->header.next; entry != &map->header;
2622 	    entry = entry->next) {
2623 		iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
2624 		    (int) entry, (int) entry->start, (int) entry->end);
2625 		if (map->is_main_map) {
2626 			static char *inheritance_name[4] =
2627 			{"share", "copy", "none", "donate_copy"};
2628 
2629 			printf("prot=%x/%x/%s, ",
2630 			    entry->protection,
2631 			    entry->max_protection,
2632 			    inheritance_name[entry->inheritance]);
2633 			if (entry->wired_count != 0)
2634 				printf("wired, ");
2635 		}
2636 		if (entry->is_a_map || entry->is_sub_map) {
2637 			printf("share=0x%x, offset=0x%x\n",
2638 			    (int) entry->object.share_map,
2639 			    (int) entry->offset);
2640 			if ((entry->prev == &map->header) ||
2641 			    (!entry->prev->is_a_map) ||
2642 			    (entry->prev->object.share_map !=
2643 				entry->object.share_map)) {
2644 				indent += 2;
2645 				vm_map_print(entry->object.share_map, full);
2646 				indent -= 2;
2647 			}
2648 		} else {
2649 			printf("object=0x%x, offset=0x%x",
2650 			    (int) entry->object.vm_object,
2651 			    (int) entry->offset);
2652 			if (entry->copy_on_write)
2653 				printf(", copy (%s)",
2654 				    entry->needs_copy ? "needed" : "done");
2655 			printf("\n");
2656 
2657 			if ((entry->prev == &map->header) ||
2658 			    (entry->prev->is_a_map) ||
2659 			    (entry->prev->object.vm_object !=
2660 				entry->object.vm_object)) {
2661 				indent += 2;
2662 				vm_object_print(entry->object.vm_object, full);
2663 				indent -= 2;
2664 			}
2665 		}
2666 	}
2667 	indent -= 2;
2668 }
2669