xref: /freebsd/sys/vm/vm_map.c (revision ce834215a70ff69e7e222827437116eee2f9ac6f)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $Id: vm_map.c,v 1.77 1997/06/15 23:33:52 dyson Exp $
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/malloc.h>
74 #include <sys/proc.h>
75 #include <sys/queue.h>
76 #include <sys/vmmeter.h>
77 #include <sys/mman.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/vm_prot.h>
82 #include <vm/vm_inherit.h>
83 #include <sys/lock.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vm_extern.h>
91 #include <vm/default_pager.h>
92 
93 /*
94  *	Virtual memory maps provide for the mapping, protection,
95  *	and sharing of virtual memory objects.  In addition,
96  *	this module provides for an efficient virtual copy of
97  *	memory from one map to another.
98  *
99  *	Synchronization is required prior to most operations.
100  *
101  *	Maps consist of an ordered doubly-linked list of simple
102  *	entries; a single hint is used to speed up lookups.
103  *
104  *	In order to properly represent the sharing of virtual
105  *	memory regions among maps, the map structure is bi-level.
106  *	Top-level ("address") maps refer to regions of sharable
107  *	virtual memory.  These regions are implemented as
108  *	("sharing") maps, which then refer to the actual virtual
109  *	memory objects.  When two address maps "share" memory,
110  *	their top-level maps both have references to the same
111  *	sharing map.  When memory is virtual-copied from one
112  *	address map to another, the references in the sharing
113  *	maps are actually copied -- no copying occurs at the
114  *	virtual memory object level.
115  *
116  *	Since portions of maps are specified by start/end addreses,
117  *	which may not align with existing map entries, all
118  *	routines merely "clip" entries to these start/end values.
119  *	[That is, an entry is split into two, bordering at a
120  *	start or end value.]  Note that these clippings may not
121  *	always be necessary (as the two resulting entries are then
122  *	not changed); however, the clipping is done for convenience.
123  *	No attempt is currently made to "glue back together" two
124  *	abutting entries.
125  *
126  *	As mentioned above, virtual copy operations are performed
127  *	by copying VM object references from one sharing map to
128  *	another, and then marking both regions as copy-on-write.
129  *	It is important to note that only one writeable reference
130  *	to a VM object region exists in any map -- this means that
131  *	shadow object creation can be delayed until a write operation
132  *	occurs.
133  */
134 
135 /*
136  *	vm_map_startup:
137  *
138  *	Initialize the vm_map module.  Must be called before
139  *	any other vm_map routines.
140  *
141  *	Map and entry structures are allocated from the general
142  *	purpose memory pool with some exceptions:
143  *
144  *	- The kernel map and kmem submap are allocated statically.
145  *	- Kernel map entries are allocated out of a static pool.
146  *
147  *	These restrictions are necessary since malloc() uses the
148  *	maps and requires map entries.
149  */
150 
151 vm_offset_t kentry_data;
152 vm_size_t kentry_data_size;
153 static vm_map_entry_t kentry_free;
154 static vm_map_t kmap_free;
155 extern char kstack[];
156 extern int inmprotect;
157 
158 static int kentry_count;
159 static vm_offset_t mapvm_start, mapvm, mapvmmax;
160 static int mapvmpgcnt;
161 
162 static struct vm_map_entry *mappool;
163 static int mappoolcnt;
164 #define KENTRY_LOW_WATER 128
165 
166 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
167 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
168 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
169 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
170 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
171 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
172 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
173 		vm_map_entry_t));
174 
175 void
176 vm_map_startup()
177 {
178 	register int i;
179 	register vm_map_entry_t mep;
180 	vm_map_t mp;
181 
182 	/*
183 	 * Static map structures for allocation before initialization of
184 	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
185 	 */
186 	kmap_free = mp = (vm_map_t) kentry_data;
187 	i = MAX_KMAP;
188 	while (--i > 0) {
189 		mp->header.next = (vm_map_entry_t) (mp + 1);
190 		mp++;
191 	}
192 	mp++->header.next = NULL;
193 
194 	/*
195 	 * Form a free list of statically allocated kernel map entries with
196 	 * the rest.
197 	 */
198 	kentry_free = mep = (vm_map_entry_t) mp;
199 	kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
200 	while (--i > 0) {
201 		mep->next = mep + 1;
202 		mep++;
203 	}
204 	mep->next = NULL;
205 }
206 
207 /*
208  * Allocate a vmspace structure, including a vm_map and pmap,
209  * and initialize those structures.  The refcnt is set to 1.
210  * The remaining fields must be initialized by the caller.
211  */
212 struct vmspace *
213 vmspace_alloc(min, max, pageable)
214 	vm_offset_t min, max;
215 	int pageable;
216 {
217 	register struct vmspace *vm;
218 
219 	if (mapvmpgcnt == 0 && mapvm == 0) {
220 		mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
221 		mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
222 			mapvmpgcnt * PAGE_SIZE);
223 		mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
224 		if (!mapvm)
225 			mapvmpgcnt = 0;
226 	}
227 	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
228 	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
229 	vm_map_init(&vm->vm_map, min, max, pageable);
230 	pmap_pinit(&vm->vm_pmap);
231 	vm->vm_map.pmap = &vm->vm_pmap;		/* XXX */
232 	vm->vm_refcnt = 1;
233 	return (vm);
234 }
235 
236 void
237 vmspace_free(vm)
238 	register struct vmspace *vm;
239 {
240 
241 	if (vm->vm_refcnt == 0)
242 		panic("vmspace_free: attempt to free already freed vmspace");
243 
244 	if (--vm->vm_refcnt == 0) {
245 
246 		/*
247 		 * Lock the map, to wait out all other references to it.
248 		 * Delete all of the mappings and pages they hold, then call
249 		 * the pmap module to reclaim anything left.
250 		 */
251 		vm_map_lock(&vm->vm_map);
252 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
253 		    vm->vm_map.max_offset);
254 		vm_map_unlock(&vm->vm_map);
255 
256 		while( vm->vm_map.ref_count != 1)
257 			tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
258 		--vm->vm_map.ref_count;
259 		pmap_release(&vm->vm_pmap);
260 		FREE(vm, M_VMMAP);
261 	} else {
262 		wakeup(&vm->vm_map.ref_count);
263 	}
264 }
265 
266 /*
267  *	vm_map_create:
268  *
269  *	Creates and returns a new empty VM map with
270  *	the given physical map structure, and having
271  *	the given lower and upper address bounds.
272  */
273 vm_map_t
274 vm_map_create(pmap, min, max, pageable)
275 	pmap_t pmap;
276 	vm_offset_t min, max;
277 	boolean_t pageable;
278 {
279 	register vm_map_t result;
280 
281 	if (kmem_map == NULL) {
282 		result = kmap_free;
283 		if (result == NULL)
284 			panic("vm_map_create: out of maps");
285 		kmap_free = (vm_map_t) result->header.next;
286 	} else
287 		MALLOC(result, vm_map_t, sizeof(struct vm_map),
288 		    M_VMMAP, M_WAITOK);
289 
290 	vm_map_init(result, min, max, pageable);
291 	result->pmap = pmap;
292 	return (result);
293 }
294 
295 /*
296  * Initialize an existing vm_map structure
297  * such as that in the vmspace structure.
298  * The pmap is set elsewhere.
299  */
300 void
301 vm_map_init(map, min, max, pageable)
302 	register struct vm_map *map;
303 	vm_offset_t min, max;
304 	boolean_t pageable;
305 {
306 	map->header.next = map->header.prev = &map->header;
307 	map->nentries = 0;
308 	map->size = 0;
309 	map->ref_count = 1;
310 	map->is_main_map = TRUE;
311 	map->min_offset = min;
312 	map->max_offset = max;
313 	map->entries_pageable = pageable;
314 	map->first_free = &map->header;
315 	map->hint = &map->header;
316 	map->timestamp = 0;
317 	lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
318 	simple_lock_init(&map->ref_lock);
319 }
320 
321 /*
322  *	vm_map_entry_dispose:	[ internal use only ]
323  *
324  *	Inverse of vm_map_entry_create.
325  */
326 static void
327 vm_map_entry_dispose(map, entry)
328 	vm_map_t map;
329 	vm_map_entry_t entry;
330 {
331 	int s;
332 
333 	if (map == kernel_map || map == kmem_map ||
334 		map == mb_map || map == pager_map) {
335 		s = splvm();
336 		entry->next = kentry_free;
337 		kentry_free = entry;
338 		++kentry_count;
339 		splx(s);
340 	} else {
341 		entry->next = mappool;
342 		mappool = entry;
343 		++mappoolcnt;
344 	}
345 }
346 
347 /*
348  *	vm_map_entry_create:	[ internal use only ]
349  *
350  *	Allocates a VM map entry for insertion.
351  *	No entry fields are filled in.  This routine is
352  */
353 static vm_map_entry_t
354 vm_map_entry_create(map)
355 	vm_map_t map;
356 {
357 	vm_map_entry_t entry;
358 	int i;
359 	int s;
360 
361 	/*
362 	 * This is a *very* nasty (and sort of incomplete) hack!!!!
363 	 */
364 	if (kentry_count < KENTRY_LOW_WATER) {
365 		s = splvm();
366 		if (mapvmpgcnt && mapvm) {
367 			vm_page_t m;
368 
369 			m = vm_page_alloc(kernel_object,
370 			        OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS),
371 				    (map == kmem_map || map == mb_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
372 
373 			if (m) {
374 				int newentries;
375 
376 				newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
377 				vm_page_wire(m);
378 				PAGE_WAKEUP(m);
379 				m->valid = VM_PAGE_BITS_ALL;
380 				pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m));
381 				m->flags |= PG_WRITEABLE;
382 
383 				entry = (vm_map_entry_t) mapvm;
384 				mapvm += PAGE_SIZE;
385 				--mapvmpgcnt;
386 
387 				for (i = 0; i < newentries; i++) {
388 					vm_map_entry_dispose(kernel_map, entry);
389 					entry++;
390 				}
391 			}
392 		}
393 		splx(s);
394 	}
395 
396 	if (map == kernel_map || map == kmem_map ||
397 		map == mb_map || map == pager_map) {
398 		s = splvm();
399 		entry = kentry_free;
400 		if (entry) {
401 			kentry_free = entry->next;
402 			--kentry_count;
403 		} else {
404 			panic("vm_map_entry_create: out of map entries for kernel");
405 		}
406 		splx(s);
407 	} else {
408 		entry = mappool;
409 		if (entry) {
410 			mappool = entry->next;
411 			--mappoolcnt;
412 		} else {
413 			MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
414 			    M_VMMAPENT, M_WAITOK);
415 		}
416 	}
417 
418 	return (entry);
419 }
420 
421 /*
422  *	vm_map_entry_{un,}link:
423  *
424  *	Insert/remove entries from maps.
425  */
426 #define	vm_map_entry_link(map, after_where, entry) \
427 		{ \
428 		(map)->nentries++; \
429 		(entry)->prev = (after_where); \
430 		(entry)->next = (after_where)->next; \
431 		(entry)->prev->next = (entry); \
432 		(entry)->next->prev = (entry); \
433 		}
434 #define	vm_map_entry_unlink(map, entry) \
435 		{ \
436 		(map)->nentries--; \
437 		(entry)->next->prev = (entry)->prev; \
438 		(entry)->prev->next = (entry)->next; \
439 		}
440 
441 /*
442  *	vm_map_reference:
443  *
444  *	Creates another valid reference to the given map.
445  *
446  */
447 void
448 vm_map_reference(map)
449 	register vm_map_t map;
450 {
451 	if (map == NULL)
452 		return;
453 
454 	map->ref_count++;
455 }
456 
457 /*
458  *	vm_map_deallocate:
459  *
460  *	Removes a reference from the specified map,
461  *	destroying it if no references remain.
462  *	The map should not be locked.
463  */
464 void
465 vm_map_deallocate(map)
466 	register vm_map_t map;
467 {
468 	register int c;
469 
470 	if (map == NULL)
471 		return;
472 
473 	c = map->ref_count;
474 
475 	if (c == 0)
476 		panic("vm_map_deallocate: deallocating already freed map");
477 
478 	if (c != 1) {
479 		--map->ref_count;
480 		wakeup(&map->ref_count);
481 		return;
482 	}
483 	/*
484 	 * Lock the map, to wait out all other references to it.
485 	 */
486 
487 	vm_map_lock_drain_interlock(map);
488 	(void) vm_map_delete(map, map->min_offset, map->max_offset);
489 	--map->ref_count;
490 	if( map->ref_count != 0) {
491 		vm_map_unlock(map);
492 		return;
493 	}
494 
495 	pmap_destroy(map->pmap);
496 
497 	vm_map_unlock(map);
498 
499 	FREE(map, M_VMMAP);
500 }
501 
502 /*
503  *	SAVE_HINT:
504  *
505  *	Saves the specified entry as the hint for
506  *	future lookups.
507  */
508 #define	SAVE_HINT(map,value) \
509 		(map)->hint = (value);
510 
511 /*
512  *	vm_map_lookup_entry:	[ internal use only ]
513  *
514  *	Finds the map entry containing (or
515  *	immediately preceding) the specified address
516  *	in the given map; the entry is returned
517  *	in the "entry" parameter.  The boolean
518  *	result indicates whether the address is
519  *	actually contained in the map.
520  */
521 boolean_t
522 vm_map_lookup_entry(map, address, entry)
523 	register vm_map_t map;
524 	register vm_offset_t address;
525 	vm_map_entry_t *entry;	/* OUT */
526 {
527 	register vm_map_entry_t cur;
528 	register vm_map_entry_t last;
529 
530 	/*
531 	 * Start looking either from the head of the list, or from the hint.
532 	 */
533 
534 	cur = map->hint;
535 
536 	if (cur == &map->header)
537 		cur = cur->next;
538 
539 	if (address >= cur->start) {
540 		/*
541 		 * Go from hint to end of list.
542 		 *
543 		 * But first, make a quick check to see if we are already looking
544 		 * at the entry we want (which is usually the case). Note also
545 		 * that we don't need to save the hint here... it is the same
546 		 * hint (unless we are at the header, in which case the hint
547 		 * didn't buy us anything anyway).
548 		 */
549 		last = &map->header;
550 		if ((cur != last) && (cur->end > address)) {
551 			*entry = cur;
552 			return (TRUE);
553 		}
554 	} else {
555 		/*
556 		 * Go from start to hint, *inclusively*
557 		 */
558 		last = cur->next;
559 		cur = map->header.next;
560 	}
561 
562 	/*
563 	 * Search linearly
564 	 */
565 
566 	while (cur != last) {
567 		if (cur->end > address) {
568 			if (address >= cur->start) {
569 				/*
570 				 * Save this lookup for future hints, and
571 				 * return
572 				 */
573 
574 				*entry = cur;
575 				SAVE_HINT(map, cur);
576 				return (TRUE);
577 			}
578 			break;
579 		}
580 		cur = cur->next;
581 	}
582 	*entry = cur->prev;
583 	SAVE_HINT(map, *entry);
584 	return (FALSE);
585 }
586 
587 /*
588  *	vm_map_insert:
589  *
590  *	Inserts the given whole VM object into the target
591  *	map at the specified address range.  The object's
592  *	size should match that of the address range.
593  *
594  *	Requires that the map be locked, and leaves it so.
595  */
596 int
597 vm_map_insert(map, object, offset, start, end, prot, max, cow)
598 	vm_map_t map;
599 	vm_object_t object;
600 	vm_ooffset_t offset;
601 	vm_offset_t start;
602 	vm_offset_t end;
603 	vm_prot_t prot, max;
604 	int cow;
605 {
606 	register vm_map_entry_t new_entry;
607 	register vm_map_entry_t prev_entry;
608 	vm_map_entry_t temp_entry;
609 	vm_object_t prev_object;
610 	u_char protoeflags;
611 
612 	if ((object != NULL) && (cow & MAP_NOFAULT)) {
613 		panic("vm_map_insert: paradoxical MAP_NOFAULT request");
614 	}
615 
616 	/*
617 	 * Check that the start and end points are not bogus.
618 	 */
619 
620 	if ((start < map->min_offset) || (end > map->max_offset) ||
621 	    (start >= end))
622 		return (KERN_INVALID_ADDRESS);
623 
624 	/*
625 	 * Find the entry prior to the proposed starting address; if it's part
626 	 * of an existing entry, this range is bogus.
627 	 */
628 
629 	if (vm_map_lookup_entry(map, start, &temp_entry))
630 		return (KERN_NO_SPACE);
631 
632 	prev_entry = temp_entry;
633 
634 	/*
635 	 * Assert that the next entry doesn't overlap the end point.
636 	 */
637 
638 	if ((prev_entry->next != &map->header) &&
639 	    (prev_entry->next->start < end))
640 		return (KERN_NO_SPACE);
641 
642 	protoeflags = 0;
643 	if (cow & MAP_COPY_NEEDED)
644 		protoeflags |= MAP_ENTRY_NEEDS_COPY;
645 
646 	if (cow & MAP_COPY_ON_WRITE)
647 		protoeflags |= MAP_ENTRY_COW;
648 
649 	if (cow & MAP_NOFAULT)
650 		protoeflags |= MAP_ENTRY_NOFAULT;
651 
652 	/*
653 	 * See if we can avoid creating a new entry by extending one of our
654 	 * neighbors.  Or at least extend the object.
655 	 */
656 
657 	if ((object == NULL) &&
658 	    (prev_entry != &map->header) &&
659 	    (( prev_entry->eflags & (MAP_ENTRY_IS_A_MAP | MAP_ENTRY_IS_SUB_MAP)) == 0) &&
660 	    (prev_entry->end == start) &&
661 	    (prev_entry->wired_count == 0)) {
662 
663 
664 		if ((protoeflags == prev_entry->eflags) &&
665 		    ((cow & MAP_NOFAULT) ||
666 		     vm_object_coalesce(prev_entry->object.vm_object,
667 					OFF_TO_IDX(prev_entry->offset),
668 					(vm_size_t) (prev_entry->end - prev_entry->start),
669 					(vm_size_t) (end - prev_entry->end)))) {
670 
671 			/*
672 			 * Coalesced the two objects.  Can we extend the
673 			 * previous map entry to include the new range?
674 			 */
675 			if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
676 			    (prev_entry->protection == prot) &&
677 			    (prev_entry->max_protection == max)) {
678 
679 				map->size += (end - prev_entry->end);
680 				prev_entry->end = end;
681 				if ((cow & MAP_NOFAULT) == 0) {
682 					prev_object = prev_entry->object.vm_object;
683 					default_pager_convert_to_swapq(prev_object);
684 				}
685 				return (KERN_SUCCESS);
686 			}
687 			else {
688 				object = prev_entry->object.vm_object;
689 				offset = prev_entry->offset + (prev_entry->end -
690 							       prev_entry->start);
691 
692 				vm_object_reference(object);
693 			}
694 		}
695 	}
696 
697 	/*
698 	 * Create a new entry
699 	 */
700 
701 	new_entry = vm_map_entry_create(map);
702 	new_entry->start = start;
703 	new_entry->end = end;
704 
705 	new_entry->eflags = protoeflags;
706 	new_entry->object.vm_object = object;
707 	new_entry->offset = offset;
708 
709 	if (map->is_main_map) {
710 		new_entry->inheritance = VM_INHERIT_DEFAULT;
711 		new_entry->protection = prot;
712 		new_entry->max_protection = max;
713 		new_entry->wired_count = 0;
714 	}
715 	/*
716 	 * Insert the new entry into the list
717 	 */
718 
719 	vm_map_entry_link(map, prev_entry, new_entry);
720 	map->size += new_entry->end - new_entry->start;
721 
722 	/*
723 	 * Update the free space hint
724 	 */
725 	if ((map->first_free == prev_entry) &&
726 		(prev_entry->end >= new_entry->start))
727 		map->first_free = new_entry;
728 
729 	default_pager_convert_to_swapq(object);
730 	return (KERN_SUCCESS);
731 }
732 
733 /*
734  * Find sufficient space for `length' bytes in the given map, starting at
735  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
736  */
737 int
738 vm_map_findspace(map, start, length, addr)
739 	register vm_map_t map;
740 	register vm_offset_t start;
741 	vm_size_t length;
742 	vm_offset_t *addr;
743 {
744 	register vm_map_entry_t entry, next;
745 	register vm_offset_t end;
746 
747 	if (start < map->min_offset)
748 		start = map->min_offset;
749 	if (start > map->max_offset)
750 		return (1);
751 
752 	/*
753 	 * Look for the first possible address; if there's already something
754 	 * at this address, we have to start after it.
755 	 */
756 	if (start == map->min_offset) {
757 		if ((entry = map->first_free) != &map->header)
758 			start = entry->end;
759 	} else {
760 		vm_map_entry_t tmp;
761 
762 		if (vm_map_lookup_entry(map, start, &tmp))
763 			start = tmp->end;
764 		entry = tmp;
765 	}
766 
767 	/*
768 	 * Look through the rest of the map, trying to fit a new region in the
769 	 * gap between existing regions, or after the very last region.
770 	 */
771 	for (;; start = (entry = next)->end) {
772 		/*
773 		 * Find the end of the proposed new region.  Be sure we didn't
774 		 * go beyond the end of the map, or wrap around the address;
775 		 * if so, we lose.  Otherwise, if this is the last entry, or
776 		 * if the proposed new region fits before the next entry, we
777 		 * win.
778 		 */
779 		end = start + length;
780 		if (end > map->max_offset || end < start)
781 			return (1);
782 		next = entry->next;
783 		if (next == &map->header || next->start >= end)
784 			break;
785 	}
786 	SAVE_HINT(map, entry);
787 	*addr = start;
788 	if (map == kernel_map && round_page(start + length) > kernel_vm_end)
789 		pmap_growkernel(round_page(start + length));
790 	return (0);
791 }
792 
793 /*
794  *	vm_map_find finds an unallocated region in the target address
795  *	map with the given length.  The search is defined to be
796  *	first-fit from the specified address; the region found is
797  *	returned in the same parameter.
798  *
799  */
800 int
801 vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow)
802 	vm_map_t map;
803 	vm_object_t object;
804 	vm_ooffset_t offset;
805 	vm_offset_t *addr;	/* IN/OUT */
806 	vm_size_t length;
807 	boolean_t find_space;
808 	vm_prot_t prot, max;
809 	int cow;
810 {
811 	register vm_offset_t start;
812 	int result, s = 0;
813 
814 	start = *addr;
815 
816 	if (map == kmem_map || map == mb_map)
817 		s = splvm();
818 
819 	vm_map_lock(map);
820 	if (find_space) {
821 		if (vm_map_findspace(map, start, length, addr)) {
822 			vm_map_unlock(map);
823 			if (map == kmem_map || map == mb_map)
824 				splx(s);
825 			return (KERN_NO_SPACE);
826 		}
827 		start = *addr;
828 	}
829 	result = vm_map_insert(map, object, offset,
830 		start, start + length, prot, max, cow);
831 	vm_map_unlock(map);
832 
833 	if (map == kmem_map || map == mb_map)
834 		splx(s);
835 
836 	return (result);
837 }
838 
839 /*
840  *	vm_map_simplify_entry:
841  *
842  *	Simplify the given map entry by merging with either neighbor.
843  */
844 void
845 vm_map_simplify_entry(map, entry)
846 	vm_map_t map;
847 	vm_map_entry_t entry;
848 {
849 	vm_map_entry_t next, prev;
850 	vm_size_t prevsize, esize;
851 
852 	if (entry->eflags & (MAP_ENTRY_IS_SUB_MAP|MAP_ENTRY_IS_A_MAP))
853 		return;
854 
855 	prev = entry->prev;
856 	if (prev != &map->header) {
857 		prevsize = prev->end - prev->start;
858 		if ( (prev->end == entry->start) &&
859 		     (prev->object.vm_object == entry->object.vm_object) &&
860 		     (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
861 		     (!prev->object.vm_object ||
862 			(prev->offset + prevsize == entry->offset)) &&
863 		     (prev->eflags == entry->eflags) &&
864 		     (prev->protection == entry->protection) &&
865 		     (prev->max_protection == entry->max_protection) &&
866 		     (prev->inheritance == entry->inheritance) &&
867 		     (prev->wired_count == entry->wired_count)) {
868 			if (map->first_free == prev)
869 				map->first_free = entry;
870 			if (map->hint == prev)
871 				map->hint = entry;
872 			vm_map_entry_unlink(map, prev);
873 			entry->start = prev->start;
874 			entry->offset = prev->offset;
875 			if (prev->object.vm_object)
876 				vm_object_deallocate(prev->object.vm_object);
877 			vm_map_entry_dispose(map, prev);
878 		}
879 	}
880 
881 	next = entry->next;
882 	if (next != &map->header) {
883 		esize = entry->end - entry->start;
884 		if ((entry->end == next->start) &&
885 		    (next->object.vm_object == entry->object.vm_object) &&
886 		    (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
887 		     (!entry->object.vm_object ||
888 			(entry->offset + esize == next->offset)) &&
889 		    (next->eflags == entry->eflags) &&
890 		    (next->protection == entry->protection) &&
891 		    (next->max_protection == entry->max_protection) &&
892 		    (next->inheritance == entry->inheritance) &&
893 		    (next->wired_count == entry->wired_count)) {
894 			if (map->first_free == next)
895 				map->first_free = entry;
896 			if (map->hint == next)
897 				map->hint = entry;
898 			vm_map_entry_unlink(map, next);
899 			entry->end = next->end;
900 			if (next->object.vm_object)
901 				vm_object_deallocate(next->object.vm_object);
902 			vm_map_entry_dispose(map, next);
903 	        }
904 	}
905 }
906 /*
907  *	vm_map_clip_start:	[ internal use only ]
908  *
909  *	Asserts that the given entry begins at or after
910  *	the specified address; if necessary,
911  *	it splits the entry into two.
912  */
913 #define vm_map_clip_start(map, entry, startaddr) \
914 { \
915 	if (startaddr > entry->start) \
916 		_vm_map_clip_start(map, entry, startaddr); \
917 }
918 
919 /*
920  *	This routine is called only when it is known that
921  *	the entry must be split.
922  */
923 static void
924 _vm_map_clip_start(map, entry, start)
925 	register vm_map_t map;
926 	register vm_map_entry_t entry;
927 	register vm_offset_t start;
928 {
929 	register vm_map_entry_t new_entry;
930 
931 	/*
932 	 * Split off the front portion -- note that we must insert the new
933 	 * entry BEFORE this one, so that this entry has the specified
934 	 * starting address.
935 	 */
936 
937 	vm_map_simplify_entry(map, entry);
938 
939 	new_entry = vm_map_entry_create(map);
940 	*new_entry = *entry;
941 
942 	new_entry->end = start;
943 	entry->offset += (start - entry->start);
944 	entry->start = start;
945 
946 	vm_map_entry_link(map, entry->prev, new_entry);
947 
948 	if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
949 		vm_map_reference(new_entry->object.share_map);
950 	else
951 		vm_object_reference(new_entry->object.vm_object);
952 }
953 
954 /*
955  *	vm_map_clip_end:	[ internal use only ]
956  *
957  *	Asserts that the given entry ends at or before
958  *	the specified address; if necessary,
959  *	it splits the entry into two.
960  */
961 
962 #define vm_map_clip_end(map, entry, endaddr) \
963 { \
964 	if (endaddr < entry->end) \
965 		_vm_map_clip_end(map, entry, endaddr); \
966 }
967 
968 /*
969  *	This routine is called only when it is known that
970  *	the entry must be split.
971  */
972 static void
973 _vm_map_clip_end(map, entry, end)
974 	register vm_map_t map;
975 	register vm_map_entry_t entry;
976 	register vm_offset_t end;
977 {
978 	register vm_map_entry_t new_entry;
979 
980 	/*
981 	 * Create a new entry and insert it AFTER the specified entry
982 	 */
983 
984 	new_entry = vm_map_entry_create(map);
985 	*new_entry = *entry;
986 
987 	new_entry->start = entry->end = end;
988 	new_entry->offset += (end - entry->start);
989 
990 	vm_map_entry_link(map, entry, new_entry);
991 
992 	if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
993 		vm_map_reference(new_entry->object.share_map);
994 	else
995 		vm_object_reference(new_entry->object.vm_object);
996 }
997 
998 /*
999  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1000  *
1001  *	Asserts that the starting and ending region
1002  *	addresses fall within the valid range of the map.
1003  */
1004 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1005 		{					\
1006 		if (start < vm_map_min(map))		\
1007 			start = vm_map_min(map);	\
1008 		if (end > vm_map_max(map))		\
1009 			end = vm_map_max(map);		\
1010 		if (start > end)			\
1011 			start = end;			\
1012 		}
1013 
1014 /*
1015  *	vm_map_submap:		[ kernel use only ]
1016  *
1017  *	Mark the given range as handled by a subordinate map.
1018  *
1019  *	This range must have been created with vm_map_find,
1020  *	and no other operations may have been performed on this
1021  *	range prior to calling vm_map_submap.
1022  *
1023  *	Only a limited number of operations can be performed
1024  *	within this rage after calling vm_map_submap:
1025  *		vm_fault
1026  *	[Don't try vm_map_copy!]
1027  *
1028  *	To remove a submapping, one must first remove the
1029  *	range from the superior map, and then destroy the
1030  *	submap (if desired).  [Better yet, don't try it.]
1031  */
1032 int
1033 vm_map_submap(map, start, end, submap)
1034 	register vm_map_t map;
1035 	register vm_offset_t start;
1036 	register vm_offset_t end;
1037 	vm_map_t submap;
1038 {
1039 	vm_map_entry_t entry;
1040 	register int result = KERN_INVALID_ARGUMENT;
1041 
1042 	vm_map_lock(map);
1043 
1044 	VM_MAP_RANGE_CHECK(map, start, end);
1045 
1046 	if (vm_map_lookup_entry(map, start, &entry)) {
1047 		vm_map_clip_start(map, entry, start);
1048 	} else
1049 		entry = entry->next;
1050 
1051 	vm_map_clip_end(map, entry, end);
1052 
1053 	if ((entry->start == start) && (entry->end == end) &&
1054 	    ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_COW)) == 0) &&
1055 	    (entry->object.vm_object == NULL)) {
1056 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1057 		vm_map_reference(entry->object.sub_map = submap);
1058 		result = KERN_SUCCESS;
1059 	}
1060 	vm_map_unlock(map);
1061 
1062 	return (result);
1063 }
1064 
1065 /*
1066  *	vm_map_protect:
1067  *
1068  *	Sets the protection of the specified address
1069  *	region in the target map.  If "set_max" is
1070  *	specified, the maximum protection is to be set;
1071  *	otherwise, only the current protection is affected.
1072  */
1073 int
1074 vm_map_protect(map, start, end, new_prot, set_max)
1075 	register vm_map_t map;
1076 	register vm_offset_t start;
1077 	register vm_offset_t end;
1078 	register vm_prot_t new_prot;
1079 	register boolean_t set_max;
1080 {
1081 	register vm_map_entry_t current;
1082 	vm_map_entry_t entry;
1083 
1084 	vm_map_lock(map);
1085 
1086 	VM_MAP_RANGE_CHECK(map, start, end);
1087 
1088 	if (vm_map_lookup_entry(map, start, &entry)) {
1089 		vm_map_clip_start(map, entry, start);
1090 	} else {
1091 		entry = entry->next;
1092 	}
1093 
1094 	/*
1095 	 * Make a first pass to check for protection violations.
1096 	 */
1097 
1098 	current = entry;
1099 	while ((current != &map->header) && (current->start < end)) {
1100 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1101 			vm_map_unlock(map);
1102 			return (KERN_INVALID_ARGUMENT);
1103 		}
1104 		if ((new_prot & current->max_protection) != new_prot) {
1105 			vm_map_unlock(map);
1106 			return (KERN_PROTECTION_FAILURE);
1107 		}
1108 		current = current->next;
1109 	}
1110 
1111 	/*
1112 	 * Go back and fix up protections. [Note that clipping is not
1113 	 * necessary the second time.]
1114 	 */
1115 
1116 	current = entry;
1117 
1118 	while ((current != &map->header) && (current->start < end)) {
1119 		vm_prot_t old_prot;
1120 
1121 		vm_map_clip_end(map, current, end);
1122 
1123 		old_prot = current->protection;
1124 		if (set_max)
1125 			current->protection =
1126 			    (current->max_protection = new_prot) &
1127 			    old_prot;
1128 		else
1129 			current->protection = new_prot;
1130 
1131 		/*
1132 		 * Update physical map if necessary. Worry about copy-on-write
1133 		 * here -- CHECK THIS XXX
1134 		 */
1135 
1136 		if (current->protection != old_prot) {
1137 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1138 							VM_PROT_ALL)
1139 #define	max(a,b)	((a) > (b) ? (a) : (b))
1140 
1141 			if (current->eflags & MAP_ENTRY_IS_A_MAP) {
1142 				vm_map_entry_t share_entry;
1143 				vm_offset_t share_end;
1144 
1145 				vm_map_lock(current->object.share_map);
1146 				(void) vm_map_lookup_entry(
1147 				    current->object.share_map,
1148 				    current->offset,
1149 				    &share_entry);
1150 				share_end = current->offset +
1151 				    (current->end - current->start);
1152 				while ((share_entry !=
1153 					&current->object.share_map->header) &&
1154 				    (share_entry->start < share_end)) {
1155 
1156 					pmap_protect(map->pmap,
1157 					    (max(share_entry->start,
1158 						    current->offset) -
1159 						current->offset +
1160 						current->start),
1161 					    min(share_entry->end,
1162 						share_end) -
1163 					    current->offset +
1164 					    current->start,
1165 					    current->protection &
1166 					    MASK(share_entry));
1167 
1168 					share_entry = share_entry->next;
1169 				}
1170 				vm_map_unlock(current->object.share_map);
1171 			} else
1172 				pmap_protect(map->pmap, current->start,
1173 				    current->end,
1174 				    current->protection & MASK(entry));
1175 #undef	max
1176 #undef	MASK
1177 		}
1178 
1179 		vm_map_simplify_entry(map, current);
1180 
1181 		current = current->next;
1182 	}
1183 
1184 	vm_map_unlock(map);
1185 	return (KERN_SUCCESS);
1186 }
1187 
1188 /*
1189  *	vm_map_madvise:
1190  *
1191  * 	This routine traverses a processes map handling the madvise
1192  *	system call.
1193  */
1194 void
1195 vm_map_madvise(map, pmap, start, end, advise)
1196 	vm_map_t map;
1197 	pmap_t pmap;
1198 	vm_offset_t start, end;
1199 	int advise;
1200 {
1201 	register vm_map_entry_t current;
1202 	vm_map_entry_t entry;
1203 
1204 	vm_map_lock(map);
1205 
1206 	VM_MAP_RANGE_CHECK(map, start, end);
1207 
1208 	if (vm_map_lookup_entry(map, start, &entry)) {
1209 		vm_map_clip_start(map, entry, start);
1210 	} else
1211 		entry = entry->next;
1212 
1213 	for(current = entry;
1214 		(current != &map->header) && (current->start < end);
1215 		current = current->next) {
1216 		vm_size_t size = current->end - current->start;
1217 
1218 		if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
1219 			continue;
1220 		}
1221 
1222 		/*
1223 		 * Create an object if needed
1224 		 */
1225 		if (current->object.vm_object == NULL) {
1226 			vm_object_t object;
1227 			object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(size));
1228 			current->object.vm_object = object;
1229 			current->offset = 0;
1230 		}
1231 
1232 		vm_map_clip_end(map, current, end);
1233 		switch (advise) {
1234 	case MADV_NORMAL:
1235 			current->object.vm_object->behavior = OBJ_NORMAL;
1236 			break;
1237 	case MADV_SEQUENTIAL:
1238 			current->object.vm_object->behavior = OBJ_SEQUENTIAL;
1239 			break;
1240 	case MADV_RANDOM:
1241 			current->object.vm_object->behavior = OBJ_RANDOM;
1242 			break;
1243 	/*
1244 	 * Right now, we could handle DONTNEED and WILLNEED with common code.
1245 	 * They are mostly the same, except for the potential async reads (NYI).
1246 	 */
1247 	case MADV_FREE:
1248 	case MADV_DONTNEED:
1249 			{
1250 				vm_pindex_t pindex;
1251 				int count;
1252 				size = current->end - current->start;
1253 				pindex = OFF_TO_IDX(entry->offset);
1254 				count = OFF_TO_IDX(size);
1255 				/*
1256 				 * MADV_DONTNEED removes the page from all
1257 				 * pmaps, so pmap_remove is not necessary.
1258 				 */
1259 				vm_object_madvise(current->object.vm_object,
1260 					pindex, count, advise);
1261 			}
1262 			break;
1263 
1264 	case MADV_WILLNEED:
1265 			{
1266 				vm_pindex_t pindex;
1267 				int count;
1268 				size = current->end - current->start;
1269 				pindex = OFF_TO_IDX(current->offset);
1270 				count = OFF_TO_IDX(size);
1271 				vm_object_madvise(current->object.vm_object,
1272 					pindex, count, advise);
1273 				pmap_object_init_pt(pmap, current->start,
1274 					current->object.vm_object, pindex,
1275 					(count << PAGE_SHIFT), 0);
1276 			}
1277 			break;
1278 
1279 	default:
1280 			break;
1281 		}
1282 	}
1283 
1284 	vm_map_simplify_entry(map, entry);
1285 	vm_map_unlock(map);
1286 	return;
1287 }
1288 
1289 
1290 /*
1291  *	vm_map_inherit:
1292  *
1293  *	Sets the inheritance of the specified address
1294  *	range in the target map.  Inheritance
1295  *	affects how the map will be shared with
1296  *	child maps at the time of vm_map_fork.
1297  */
1298 int
1299 vm_map_inherit(map, start, end, new_inheritance)
1300 	register vm_map_t map;
1301 	register vm_offset_t start;
1302 	register vm_offset_t end;
1303 	register vm_inherit_t new_inheritance;
1304 {
1305 	register vm_map_entry_t entry;
1306 	vm_map_entry_t temp_entry;
1307 
1308 	switch (new_inheritance) {
1309 	case VM_INHERIT_NONE:
1310 	case VM_INHERIT_COPY:
1311 	case VM_INHERIT_SHARE:
1312 		break;
1313 	default:
1314 		return (KERN_INVALID_ARGUMENT);
1315 	}
1316 
1317 	vm_map_lock(map);
1318 
1319 	VM_MAP_RANGE_CHECK(map, start, end);
1320 
1321 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1322 		entry = temp_entry;
1323 		vm_map_clip_start(map, entry, start);
1324 	} else
1325 		entry = temp_entry->next;
1326 
1327 	while ((entry != &map->header) && (entry->start < end)) {
1328 		vm_map_clip_end(map, entry, end);
1329 
1330 		entry->inheritance = new_inheritance;
1331 
1332 		entry = entry->next;
1333 	}
1334 
1335 	vm_map_simplify_entry(map, temp_entry);
1336 	vm_map_unlock(map);
1337 	return (KERN_SUCCESS);
1338 }
1339 
1340 /*
1341  * Implement the semantics of mlock
1342  */
1343 int
1344 vm_map_user_pageable(map, start, end, new_pageable)
1345 	register vm_map_t map;
1346 	register vm_offset_t start;
1347 	register vm_offset_t end;
1348 	register boolean_t new_pageable;
1349 {
1350 	register vm_map_entry_t entry;
1351 	vm_map_entry_t start_entry;
1352 	register vm_offset_t failed = 0;
1353 	int rv;
1354 
1355 	vm_map_lock(map);
1356 	VM_MAP_RANGE_CHECK(map, start, end);
1357 
1358 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1359 		vm_map_unlock(map);
1360 		return (KERN_INVALID_ADDRESS);
1361 	}
1362 
1363 	if (new_pageable) {
1364 
1365 		entry = start_entry;
1366 		vm_map_clip_start(map, entry, start);
1367 
1368 		/*
1369 		 * Now decrement the wiring count for each region. If a region
1370 		 * becomes completely unwired, unwire its physical pages and
1371 		 * mappings.
1372 		 */
1373 		vm_map_set_recursive(map);
1374 
1375 		entry = start_entry;
1376 		while ((entry != &map->header) && (entry->start < end)) {
1377 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1378 				vm_map_clip_end(map, entry, end);
1379 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1380 				entry->wired_count--;
1381 				if (entry->wired_count == 0)
1382 					vm_fault_unwire(map, entry->start, entry->end);
1383 			}
1384 			entry = entry->next;
1385 		}
1386 		vm_map_simplify_entry(map, start_entry);
1387 		vm_map_clear_recursive(map);
1388 	} else {
1389 
1390 		/*
1391 		 * Because of the possiblity of blocking, etc.  We restart
1392 		 * through the process's map entries from beginning so that
1393 		 * we don't end up depending on a map entry that could have
1394 		 * changed.
1395 		 */
1396 	rescan:
1397 
1398 		entry = start_entry;
1399 
1400 		while ((entry != &map->header) && (entry->start < end)) {
1401 
1402 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1403 				entry = entry->next;
1404 				continue;
1405 			}
1406 
1407 			if (entry->wired_count != 0) {
1408 				entry->wired_count++;
1409 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1410 				entry = entry->next;
1411 				continue;
1412 			}
1413 
1414 			/* Here on entry being newly wired */
1415 
1416 			if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
1417 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1418 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1419 
1420 					vm_object_shadow(&entry->object.vm_object,
1421 					    &entry->offset,
1422 					    OFF_TO_IDX(entry->end
1423 						- entry->start));
1424 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1425 
1426 				} else if (entry->object.vm_object == NULL) {
1427 
1428 					entry->object.vm_object =
1429 					    vm_object_allocate(OBJT_DEFAULT,
1430 						OFF_TO_IDX(entry->end - entry->start));
1431 					entry->offset = (vm_offset_t) 0;
1432 
1433 				}
1434 				default_pager_convert_to_swapq(entry->object.vm_object);
1435 			}
1436 
1437 			vm_map_clip_start(map, entry, start);
1438 			vm_map_clip_end(map, entry, end);
1439 
1440 			entry->wired_count++;
1441 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1442 
1443 			/* First we need to allow map modifications */
1444 			vm_map_set_recursive(map);
1445 			lockmgr(&map->lock, LK_DOWNGRADE,(void *)0, curproc);
1446 
1447 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1448 			if (rv) {
1449 
1450 				entry->wired_count--;
1451 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1452 
1453 				vm_map_clear_recursive(map);
1454 				vm_map_unlock(map);
1455 
1456 				(void) vm_map_user_pageable(map, start, entry->start, TRUE);
1457 				return rv;
1458 			}
1459 
1460 			vm_map_clear_recursive(map);
1461 			lockmgr(&map->lock, LK_UPGRADE, (void *)0, curproc);
1462 
1463 			goto rescan;
1464 		}
1465 	}
1466 	vm_map_unlock(map);
1467 	return KERN_SUCCESS;
1468 }
1469 
1470 /*
1471  *	vm_map_pageable:
1472  *
1473  *	Sets the pageability of the specified address
1474  *	range in the target map.  Regions specified
1475  *	as not pageable require locked-down physical
1476  *	memory and physical page maps.
1477  *
1478  *	The map must not be locked, but a reference
1479  *	must remain to the map throughout the call.
1480  */
1481 int
1482 vm_map_pageable(map, start, end, new_pageable)
1483 	register vm_map_t map;
1484 	register vm_offset_t start;
1485 	register vm_offset_t end;
1486 	register boolean_t new_pageable;
1487 {
1488 	register vm_map_entry_t entry;
1489 	vm_map_entry_t start_entry;
1490 	register vm_offset_t failed = 0;
1491 	int rv;
1492 
1493 	vm_map_lock(map);
1494 
1495 	VM_MAP_RANGE_CHECK(map, start, end);
1496 
1497 	/*
1498 	 * Only one pageability change may take place at one time, since
1499 	 * vm_fault assumes it will be called only once for each
1500 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1501 	 * changing the pageability for the entire region.  We do so before
1502 	 * making any changes.
1503 	 */
1504 
1505 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1506 		vm_map_unlock(map);
1507 		return (KERN_INVALID_ADDRESS);
1508 	}
1509 	entry = start_entry;
1510 
1511 	/*
1512 	 * Actions are rather different for wiring and unwiring, so we have
1513 	 * two separate cases.
1514 	 */
1515 
1516 	if (new_pageable) {
1517 
1518 		vm_map_clip_start(map, entry, start);
1519 
1520 		/*
1521 		 * Unwiring.  First ensure that the range to be unwired is
1522 		 * really wired down and that there are no holes.
1523 		 */
1524 		while ((entry != &map->header) && (entry->start < end)) {
1525 
1526 			if (entry->wired_count == 0 ||
1527 			    (entry->end < end &&
1528 				(entry->next == &map->header ||
1529 				    entry->next->start > entry->end))) {
1530 				vm_map_unlock(map);
1531 				return (KERN_INVALID_ARGUMENT);
1532 			}
1533 			entry = entry->next;
1534 		}
1535 
1536 		/*
1537 		 * Now decrement the wiring count for each region. If a region
1538 		 * becomes completely unwired, unwire its physical pages and
1539 		 * mappings.
1540 		 */
1541 		vm_map_set_recursive(map);
1542 
1543 		entry = start_entry;
1544 		while ((entry != &map->header) && (entry->start < end)) {
1545 			vm_map_clip_end(map, entry, end);
1546 
1547 			entry->wired_count--;
1548 			if (entry->wired_count == 0)
1549 				vm_fault_unwire(map, entry->start, entry->end);
1550 
1551 			entry = entry->next;
1552 		}
1553 		vm_map_simplify_entry(map, start_entry);
1554 		vm_map_clear_recursive(map);
1555 	} else {
1556 		/*
1557 		 * Wiring.  We must do this in two passes:
1558 		 *
1559 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1560 		 * objects that need to be created. Then we clip each map
1561 		 * entry to the region to be wired and increment its wiring
1562 		 * count.  We create objects before clipping the map entries
1563 		 * to avoid object proliferation.
1564 		 *
1565 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1566 		 * fault in the pages for any newly wired area (wired_count is
1567 		 * 1).
1568 		 *
1569 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1570 		 * deadlock with another process that may have faulted on one
1571 		 * of the pages to be wired (it would mark the page busy,
1572 		 * blocking us, then in turn block on the map lock that we
1573 		 * hold).  Because of problems in the recursive lock package,
1574 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1575 		 * any actions that require the write lock must be done
1576 		 * beforehand.  Because we keep the read lock on the map, the
1577 		 * copy-on-write status of the entries we modify here cannot
1578 		 * change.
1579 		 */
1580 
1581 		/*
1582 		 * Pass 1.
1583 		 */
1584 		while ((entry != &map->header) && (entry->start < end)) {
1585 			if (entry->wired_count == 0) {
1586 
1587 				/*
1588 				 * Perform actions of vm_map_lookup that need
1589 				 * the write lock on the map: create a shadow
1590 				 * object for a copy-on-write region, or an
1591 				 * object for a zero-fill region.
1592 				 *
1593 				 * We don't have to do this for entries that
1594 				 * point to sharing maps, because we won't
1595 				 * hold the lock on the sharing map.
1596 				 */
1597 				if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
1598 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1599 					if (copyflag &&
1600 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1601 
1602 						vm_object_shadow(&entry->object.vm_object,
1603 						    &entry->offset,
1604 						    OFF_TO_IDX(entry->end
1605 							- entry->start));
1606 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1607 					} else if (entry->object.vm_object == NULL) {
1608 						entry->object.vm_object =
1609 						    vm_object_allocate(OBJT_DEFAULT,
1610 							OFF_TO_IDX(entry->end - entry->start));
1611 						entry->offset = (vm_offset_t) 0;
1612 					}
1613 					default_pager_convert_to_swapq(entry->object.vm_object);
1614 				}
1615 			}
1616 			vm_map_clip_start(map, entry, start);
1617 			vm_map_clip_end(map, entry, end);
1618 			entry->wired_count++;
1619 
1620 			/*
1621 			 * Check for holes
1622 			 */
1623 			if (entry->end < end &&
1624 			    (entry->next == &map->header ||
1625 				entry->next->start > entry->end)) {
1626 				/*
1627 				 * Found one.  Object creation actions do not
1628 				 * need to be undone, but the wired counts
1629 				 * need to be restored.
1630 				 */
1631 				while (entry != &map->header && entry->end > start) {
1632 					entry->wired_count--;
1633 					entry = entry->prev;
1634 				}
1635 				vm_map_unlock(map);
1636 				return (KERN_INVALID_ARGUMENT);
1637 			}
1638 			entry = entry->next;
1639 		}
1640 
1641 		/*
1642 		 * Pass 2.
1643 		 */
1644 
1645 		/*
1646 		 * HACK HACK HACK HACK
1647 		 *
1648 		 * If we are wiring in the kernel map or a submap of it,
1649 		 * unlock the map to avoid deadlocks.  We trust that the
1650 		 * kernel is well-behaved, and therefore will not do
1651 		 * anything destructive to this region of the map while
1652 		 * we have it unlocked.  We cannot trust user processes
1653 		 * to do the same.
1654 		 *
1655 		 * HACK HACK HACK HACK
1656 		 */
1657 		if (vm_map_pmap(map) == kernel_pmap) {
1658 			vm_map_unlock(map);	/* trust me ... */
1659 		} else {
1660 			vm_map_set_recursive(map);
1661 			lockmgr(&map->lock, LK_DOWNGRADE, (void*)0, curproc);
1662 		}
1663 
1664 		rv = 0;
1665 		entry = start_entry;
1666 		while (entry != &map->header && entry->start < end) {
1667 			/*
1668 			 * If vm_fault_wire fails for any page we need to undo
1669 			 * what has been done.  We decrement the wiring count
1670 			 * for those pages which have not yet been wired (now)
1671 			 * and unwire those that have (later).
1672 			 *
1673 			 * XXX this violates the locking protocol on the map,
1674 			 * needs to be fixed.
1675 			 */
1676 			if (rv)
1677 				entry->wired_count--;
1678 			else if (entry->wired_count == 1) {
1679 				rv = vm_fault_wire(map, entry->start, entry->end);
1680 				if (rv) {
1681 					failed = entry->start;
1682 					entry->wired_count--;
1683 				}
1684 			}
1685 			entry = entry->next;
1686 		}
1687 
1688 		if (vm_map_pmap(map) == kernel_pmap) {
1689 			vm_map_lock(map);
1690 		} else {
1691 			vm_map_clear_recursive(map);
1692 		}
1693 		if (rv) {
1694 			vm_map_unlock(map);
1695 			(void) vm_map_pageable(map, start, failed, TRUE);
1696 			return (rv);
1697 		}
1698 		vm_map_simplify_entry(map, start_entry);
1699 	}
1700 
1701 	vm_map_unlock(map);
1702 
1703 	return (KERN_SUCCESS);
1704 }
1705 
1706 /*
1707  * vm_map_clean
1708  *
1709  * Push any dirty cached pages in the address range to their pager.
1710  * If syncio is TRUE, dirty pages are written synchronously.
1711  * If invalidate is TRUE, any cached pages are freed as well.
1712  *
1713  * Returns an error if any part of the specified range is not mapped.
1714  */
1715 int
1716 vm_map_clean(map, start, end, syncio, invalidate)
1717 	vm_map_t map;
1718 	vm_offset_t start;
1719 	vm_offset_t end;
1720 	boolean_t syncio;
1721 	boolean_t invalidate;
1722 {
1723 	register vm_map_entry_t current;
1724 	vm_map_entry_t entry;
1725 	vm_size_t size;
1726 	vm_object_t object;
1727 	vm_ooffset_t offset;
1728 
1729 	vm_map_lock_read(map);
1730 	VM_MAP_RANGE_CHECK(map, start, end);
1731 	if (!vm_map_lookup_entry(map, start, &entry)) {
1732 		vm_map_unlock_read(map);
1733 		return (KERN_INVALID_ADDRESS);
1734 	}
1735 	/*
1736 	 * Make a first pass to check for holes.
1737 	 */
1738 	for (current = entry; current->start < end; current = current->next) {
1739 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1740 			vm_map_unlock_read(map);
1741 			return (KERN_INVALID_ARGUMENT);
1742 		}
1743 		if (end > current->end &&
1744 		    (current->next == &map->header ||
1745 			current->end != current->next->start)) {
1746 			vm_map_unlock_read(map);
1747 			return (KERN_INVALID_ADDRESS);
1748 		}
1749 	}
1750 
1751 	/*
1752 	 * Make a second pass, cleaning/uncaching pages from the indicated
1753 	 * objects as we go.
1754 	 */
1755 	for (current = entry; current->start < end; current = current->next) {
1756 		offset = current->offset + (start - current->start);
1757 		size = (end <= current->end ? end : current->end) - start;
1758 		if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
1759 			register vm_map_t smap;
1760 			vm_map_entry_t tentry;
1761 			vm_size_t tsize;
1762 
1763 			smap = current->object.share_map;
1764 			vm_map_lock_read(smap);
1765 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1766 			tsize = tentry->end - offset;
1767 			if (tsize < size)
1768 				size = tsize;
1769 			object = tentry->object.vm_object;
1770 			offset = tentry->offset + (offset - tentry->start);
1771 			vm_map_unlock_read(smap);
1772 		} else {
1773 			object = current->object.vm_object;
1774 		}
1775 		/*
1776 		 * Note that there is absolutely no sense in writing out
1777 		 * anonymous objects, so we track down the vnode object
1778 		 * to write out.
1779 		 * We invalidate (remove) all pages from the address space
1780 		 * anyway, for semantic correctness.
1781 		 */
1782 		while (object->backing_object) {
1783 			object = object->backing_object;
1784 			offset += object->backing_object_offset;
1785 			if (object->size < OFF_TO_IDX( offset + size))
1786 				size = IDX_TO_OFF(object->size) - offset;
1787 		}
1788 		if (invalidate)
1789 			pmap_remove(vm_map_pmap(map), current->start,
1790 				current->start + size);
1791 		if (object && (object->type == OBJT_VNODE)) {
1792 			/*
1793 			 * Flush pages if writing is allowed. XXX should we continue
1794 			 * on an error?
1795 			 *
1796 			 * XXX Doing async I/O and then removing all the pages from
1797 			 *     the object before it completes is probably a very bad
1798 			 *     idea.
1799 			 */
1800 			if (current->protection & VM_PROT_WRITE) {
1801 		   	    	vm_object_page_clean(object,
1802 					OFF_TO_IDX(offset),
1803 					OFF_TO_IDX(offset + size),
1804 					(syncio||invalidate)?1:0, TRUE);
1805 				if (invalidate)
1806 					vm_object_page_remove(object,
1807 						OFF_TO_IDX(offset),
1808 						OFF_TO_IDX(offset + size),
1809 						FALSE);
1810 			}
1811 		}
1812 		start += size;
1813 	}
1814 
1815 	vm_map_unlock_read(map);
1816 	return (KERN_SUCCESS);
1817 }
1818 
1819 /*
1820  *	vm_map_entry_unwire:	[ internal use only ]
1821  *
1822  *	Make the region specified by this entry pageable.
1823  *
1824  *	The map in question should be locked.
1825  *	[This is the reason for this routine's existence.]
1826  */
1827 static void
1828 vm_map_entry_unwire(map, entry)
1829 	vm_map_t map;
1830 	register vm_map_entry_t entry;
1831 {
1832 	vm_fault_unwire(map, entry->start, entry->end);
1833 	entry->wired_count = 0;
1834 }
1835 
1836 /*
1837  *	vm_map_entry_delete:	[ internal use only ]
1838  *
1839  *	Deallocate the given entry from the target map.
1840  */
1841 static void
1842 vm_map_entry_delete(map, entry)
1843 	register vm_map_t map;
1844 	register vm_map_entry_t entry;
1845 {
1846 	vm_map_entry_unlink(map, entry);
1847 	map->size -= entry->end - entry->start;
1848 
1849 	if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
1850 		vm_map_deallocate(entry->object.share_map);
1851 	} else {
1852 		vm_object_deallocate(entry->object.vm_object);
1853 	}
1854 
1855 	vm_map_entry_dispose(map, entry);
1856 }
1857 
1858 /*
1859  *	vm_map_delete:	[ internal use only ]
1860  *
1861  *	Deallocates the given address range from the target
1862  *	map.
1863  *
1864  *	When called with a sharing map, removes pages from
1865  *	that region from all physical maps.
1866  */
1867 int
1868 vm_map_delete(map, start, end)
1869 	register vm_map_t map;
1870 	vm_offset_t start;
1871 	register vm_offset_t end;
1872 {
1873 	register vm_map_entry_t entry;
1874 	vm_map_entry_t first_entry;
1875 
1876 	/*
1877 	 * Find the start of the region, and clip it
1878 	 */
1879 
1880 	if (!vm_map_lookup_entry(map, start, &first_entry))
1881 		entry = first_entry->next;
1882 	else {
1883 		entry = first_entry;
1884 		vm_map_clip_start(map, entry, start);
1885 
1886 		/*
1887 		 * Fix the lookup hint now, rather than each time though the
1888 		 * loop.
1889 		 */
1890 
1891 		SAVE_HINT(map, entry->prev);
1892 	}
1893 
1894 	/*
1895 	 * Save the free space hint
1896 	 */
1897 
1898 	if (entry == &map->header) {
1899 		map->first_free = &map->header;
1900 	} else if (map->first_free->start >= start)
1901 		map->first_free = entry->prev;
1902 
1903 	/*
1904 	 * Step through all entries in this region
1905 	 */
1906 
1907 	while ((entry != &map->header) && (entry->start < end)) {
1908 		vm_map_entry_t next;
1909 		vm_offset_t s, e;
1910 		vm_object_t object;
1911 		vm_ooffset_t offset;
1912 
1913 		vm_map_clip_end(map, entry, end);
1914 
1915 		next = entry->next;
1916 		s = entry->start;
1917 		e = entry->end;
1918 		offset = entry->offset;
1919 
1920 		/*
1921 		 * Unwire before removing addresses from the pmap; otherwise,
1922 		 * unwiring will put the entries back in the pmap.
1923 		 */
1924 
1925 		object = entry->object.vm_object;
1926 		if (entry->wired_count != 0)
1927 			vm_map_entry_unwire(map, entry);
1928 
1929 		/*
1930 		 * If this is a sharing map, we must remove *all* references
1931 		 * to this data, since we can't find all of the physical maps
1932 		 * which are sharing it.
1933 		 */
1934 
1935 		if (object == kernel_object || object == kmem_object) {
1936 			vm_object_page_remove(object, OFF_TO_IDX(offset),
1937 			    OFF_TO_IDX(offset + (e - s)), FALSE);
1938 		} else if (!map->is_main_map) {
1939 			vm_object_pmap_remove(object,
1940 			    OFF_TO_IDX(offset),
1941 			    OFF_TO_IDX(offset + (e - s)));
1942 		} else {
1943 			pmap_remove(map->pmap, s, e);
1944 		}
1945 
1946 		/*
1947 		 * Delete the entry (which may delete the object) only after
1948 		 * removing all pmap entries pointing to its pages.
1949 		 * (Otherwise, its page frames may be reallocated, and any
1950 		 * modify bits will be set in the wrong object!)
1951 		 */
1952 
1953 		vm_map_entry_delete(map, entry);
1954 		entry = next;
1955 	}
1956 	return (KERN_SUCCESS);
1957 }
1958 
1959 /*
1960  *	vm_map_remove:
1961  *
1962  *	Remove the given address range from the target map.
1963  *	This is the exported form of vm_map_delete.
1964  */
1965 int
1966 vm_map_remove(map, start, end)
1967 	register vm_map_t map;
1968 	register vm_offset_t start;
1969 	register vm_offset_t end;
1970 {
1971 	register int result, s = 0;
1972 
1973 	if (map == kmem_map || map == mb_map)
1974 		s = splvm();
1975 
1976 	vm_map_lock(map);
1977 	VM_MAP_RANGE_CHECK(map, start, end);
1978 	result = vm_map_delete(map, start, end);
1979 	vm_map_unlock(map);
1980 
1981 	if (map == kmem_map || map == mb_map)
1982 		splx(s);
1983 
1984 	return (result);
1985 }
1986 
1987 /*
1988  *	vm_map_check_protection:
1989  *
1990  *	Assert that the target map allows the specified
1991  *	privilege on the entire address region given.
1992  *	The entire region must be allocated.
1993  */
1994 boolean_t
1995 vm_map_check_protection(map, start, end, protection)
1996 	register vm_map_t map;
1997 	register vm_offset_t start;
1998 	register vm_offset_t end;
1999 	register vm_prot_t protection;
2000 {
2001 	register vm_map_entry_t entry;
2002 	vm_map_entry_t tmp_entry;
2003 
2004 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2005 		return (FALSE);
2006 	}
2007 	entry = tmp_entry;
2008 
2009 	while (start < end) {
2010 		if (entry == &map->header) {
2011 			return (FALSE);
2012 		}
2013 		/*
2014 		 * No holes allowed!
2015 		 */
2016 
2017 		if (start < entry->start) {
2018 			return (FALSE);
2019 		}
2020 		/*
2021 		 * Check protection associated with entry.
2022 		 */
2023 
2024 		if ((entry->protection & protection) != protection) {
2025 			return (FALSE);
2026 		}
2027 		/* go to next entry */
2028 
2029 		start = entry->end;
2030 		entry = entry->next;
2031 	}
2032 	return (TRUE);
2033 }
2034 
2035 /*
2036  *	vm_map_copy_entry:
2037  *
2038  *	Copies the contents of the source entry to the destination
2039  *	entry.  The entries *must* be aligned properly.
2040  */
2041 static void
2042 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
2043 	vm_map_t src_map, dst_map;
2044 	register vm_map_entry_t src_entry, dst_entry;
2045 {
2046 	if ((dst_entry->eflags|src_entry->eflags) &
2047 		(MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
2048 		return;
2049 
2050 	if (src_entry->wired_count == 0) {
2051 
2052 		/*
2053 		 * If the source entry is marked needs_copy, it is already
2054 		 * write-protected.
2055 		 */
2056 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2057 
2058 			boolean_t su;
2059 
2060 			/*
2061 			 * If the source entry has only one mapping, we can
2062 			 * just protect the virtual address range.
2063 			 */
2064 			if (!(su = src_map->is_main_map)) {
2065 				su = (src_map->ref_count == 1);
2066 			}
2067 			if (su) {
2068 				pmap_protect(src_map->pmap,
2069 				    src_entry->start,
2070 				    src_entry->end,
2071 				    src_entry->protection & ~VM_PROT_WRITE);
2072 			} else {
2073 				vm_object_pmap_copy(src_entry->object.vm_object,
2074 				    OFF_TO_IDX(src_entry->offset),
2075 				    OFF_TO_IDX(src_entry->offset + (src_entry->end
2076 					- src_entry->start)));
2077 			}
2078 		}
2079 
2080 		/*
2081 		 * Make a copy of the object.
2082 		 */
2083 		if (src_entry->object.vm_object) {
2084 			if ((src_entry->object.vm_object->handle == NULL) &&
2085 				(src_entry->object.vm_object->type == OBJT_DEFAULT ||
2086 				 src_entry->object.vm_object->type == OBJT_SWAP))
2087 				vm_object_collapse(src_entry->object.vm_object);
2088 			++src_entry->object.vm_object->ref_count;
2089 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2090 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2091 			dst_entry->object.vm_object =
2092 				src_entry->object.vm_object;
2093 			dst_entry->offset = src_entry->offset;
2094 		} else {
2095 			dst_entry->object.vm_object = NULL;
2096 			dst_entry->offset = 0;
2097 		}
2098 
2099 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2100 		    dst_entry->end - dst_entry->start, src_entry->start);
2101 	} else {
2102 		/*
2103 		 * Of course, wired down pages can't be set copy-on-write.
2104 		 * Cause wired pages to be copied into the new map by
2105 		 * simulating faults (the new pages are pageable)
2106 		 */
2107 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2108 	}
2109 }
2110 
2111 /*
2112  * vmspace_fork:
2113  * Create a new process vmspace structure and vm_map
2114  * based on those of an existing process.  The new map
2115  * is based on the old map, according to the inheritance
2116  * values on the regions in that map.
2117  *
2118  * The source map must not be locked.
2119  */
2120 struct vmspace *
2121 vmspace_fork(vm1)
2122 	register struct vmspace *vm1;
2123 {
2124 	register struct vmspace *vm2;
2125 	vm_map_t old_map = &vm1->vm_map;
2126 	vm_map_t new_map;
2127 	vm_map_entry_t old_entry;
2128 	vm_map_entry_t new_entry;
2129 	pmap_t new_pmap;
2130 	vm_object_t object;
2131 
2132 	vm_map_lock(old_map);
2133 
2134 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
2135 	    old_map->entries_pageable);
2136 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2137 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2138 	new_pmap = &vm2->vm_pmap;	/* XXX */
2139 	new_map = &vm2->vm_map;	/* XXX */
2140 
2141 	old_entry = old_map->header.next;
2142 
2143 	while (old_entry != &old_map->header) {
2144 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2145 			panic("vm_map_fork: encountered a submap");
2146 
2147 		switch (old_entry->inheritance) {
2148 		case VM_INHERIT_NONE:
2149 			break;
2150 
2151                 case VM_INHERIT_SHARE:
2152                         /*
2153                          * Clone the entry, creating the shared object if necessary.
2154                          */
2155                         object = old_entry->object.vm_object;
2156                         if (object == NULL) {
2157                                 object = vm_object_allocate(OBJT_DEFAULT,
2158                                                             OFF_TO_IDX(old_entry->end -
2159                                                                        old_entry->start));
2160                                 old_entry->object.vm_object = object;
2161                                 old_entry->offset = (vm_offset_t) 0;
2162                         } else if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2163 				vm_object_shadow(&old_entry->object.vm_object,
2164 						 &old_entry->offset,
2165 						 OFF_TO_IDX(old_entry->end -
2166 							old_entry->start));
2167 
2168 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2169 				object = old_entry->object.vm_object;
2170 			}
2171 
2172 			/*
2173 			 * Clone the entry, referencing the sharing map.
2174 			 */
2175 			new_entry = vm_map_entry_create(new_map);
2176 			*new_entry = *old_entry;
2177 			new_entry->wired_count = 0;
2178 			++object->ref_count;
2179 
2180 			/*
2181 			 * Insert the entry into the new map -- we know we're
2182 			 * inserting at the end of the new map.
2183 			 */
2184 
2185 			vm_map_entry_link(new_map, new_map->header.prev,
2186 			    new_entry);
2187 
2188 			/*
2189 			 * Update the physical map
2190 			 */
2191 
2192 			pmap_copy(new_map->pmap, old_map->pmap,
2193 			    new_entry->start,
2194 			    (old_entry->end - old_entry->start),
2195 			    old_entry->start);
2196 			break;
2197 
2198 		case VM_INHERIT_COPY:
2199 			/*
2200 			 * Clone the entry and link into the map.
2201 			 */
2202 			new_entry = vm_map_entry_create(new_map);
2203 			*new_entry = *old_entry;
2204 			new_entry->wired_count = 0;
2205 			new_entry->object.vm_object = NULL;
2206 			new_entry->eflags &= ~MAP_ENTRY_IS_A_MAP;
2207 			vm_map_entry_link(new_map, new_map->header.prev,
2208 			    new_entry);
2209 			vm_map_copy_entry(old_map, new_map, old_entry,
2210 			    new_entry);
2211 			break;
2212 		}
2213 		old_entry = old_entry->next;
2214 	}
2215 
2216 	new_map->size = old_map->size;
2217 	vm_map_unlock(old_map);
2218 
2219 	return (vm2);
2220 }
2221 
2222 /*
2223  * Unshare the specified VM space for exec.  If other processes are
2224  * mapped to it, then create a new one.  The new vmspace is null.
2225  */
2226 
2227 void
2228 vmspace_exec(struct proc *p) {
2229 	struct vmspace *oldvmspace = p->p_vmspace;
2230 	struct vmspace *newvmspace;
2231 	vm_map_t map = &p->p_vmspace->vm_map;
2232 
2233 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset,
2234 	    map->entries_pageable);
2235 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2236 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2237 	/*
2238 	 * This code is written like this for prototype purposes.  The
2239 	 * goal is to avoid running down the vmspace here, but let the
2240 	 * other process's that are still using the vmspace to finally
2241 	 * run it down.  Even though there is little or no chance of blocking
2242 	 * here, it is a good idea to keep this form for future mods.
2243 	 */
2244 	vm_map_reference(&oldvmspace->vm_map);
2245 	vmspace_free(oldvmspace);
2246 	p->p_vmspace = newvmspace;
2247 	if (p == curproc)
2248 		pmap_activate(p);
2249 	vm_map_deallocate(&oldvmspace->vm_map);
2250 }
2251 
2252 /*
2253  * Unshare the specified VM space for forcing COW.  This
2254  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2255  */
2256 
2257 void
2258 vmspace_unshare(struct proc *p) {
2259 	struct vmspace *oldvmspace = p->p_vmspace;
2260 	struct vmspace *newvmspace;
2261 
2262 	if (oldvmspace->vm_refcnt == 1)
2263 		return;
2264 	newvmspace = vmspace_fork(oldvmspace);
2265 	vm_map_reference(&oldvmspace->vm_map);
2266 	vmspace_free(oldvmspace);
2267 	p->p_vmspace = newvmspace;
2268 	if (p == curproc)
2269 		pmap_activate(p);
2270 	vm_map_deallocate(&oldvmspace->vm_map);
2271 }
2272 
2273 
2274 /*
2275  *	vm_map_lookup:
2276  *
2277  *	Finds the VM object, offset, and
2278  *	protection for a given virtual address in the
2279  *	specified map, assuming a page fault of the
2280  *	type specified.
2281  *
2282  *	Leaves the map in question locked for read; return
2283  *	values are guaranteed until a vm_map_lookup_done
2284  *	call is performed.  Note that the map argument
2285  *	is in/out; the returned map must be used in
2286  *	the call to vm_map_lookup_done.
2287  *
2288  *	A handle (out_entry) is returned for use in
2289  *	vm_map_lookup_done, to make that fast.
2290  *
2291  *	If a lookup is requested with "write protection"
2292  *	specified, the map may be changed to perform virtual
2293  *	copying operations, although the data referenced will
2294  *	remain the same.
2295  */
2296 int
2297 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
2298     object, pindex, out_prot, wired, single_use)
2299 	vm_map_t *var_map;	/* IN/OUT */
2300 	register vm_offset_t vaddr;
2301 	register vm_prot_t fault_type;
2302 
2303 	vm_map_entry_t *out_entry;	/* OUT */
2304 	vm_object_t *object;	/* OUT */
2305 	vm_pindex_t *pindex;	/* OUT */
2306 	vm_prot_t *out_prot;	/* OUT */
2307 	boolean_t *wired;	/* OUT */
2308 	boolean_t *single_use;	/* OUT */
2309 {
2310 	vm_map_t share_map;
2311 	vm_offset_t share_offset;
2312 	register vm_map_entry_t entry;
2313 	register vm_map_t map = *var_map;
2314 	register vm_prot_t prot;
2315 	register boolean_t su;
2316 
2317 RetryLookup:;
2318 
2319 	/*
2320 	 * Lookup the faulting address.
2321 	 */
2322 
2323 	vm_map_lock_read(map);
2324 
2325 #define	RETURN(why) \
2326 		{ \
2327 		vm_map_unlock_read(map); \
2328 		return(why); \
2329 		}
2330 
2331 	/*
2332 	 * If the map has an interesting hint, try it before calling full
2333 	 * blown lookup routine.
2334 	 */
2335 
2336 	entry = map->hint;
2337 
2338 	*out_entry = entry;
2339 
2340 	if ((entry == &map->header) ||
2341 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2342 		vm_map_entry_t tmp_entry;
2343 
2344 		/*
2345 		 * Entry was either not a valid hint, or the vaddr was not
2346 		 * contained in the entry, so do a full lookup.
2347 		 */
2348 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2349 			RETURN(KERN_INVALID_ADDRESS);
2350 
2351 		entry = tmp_entry;
2352 		*out_entry = entry;
2353 	}
2354 
2355 	/*
2356 	 * Handle submaps.
2357 	 */
2358 
2359 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2360 		vm_map_t old_map = map;
2361 
2362 		*var_map = map = entry->object.sub_map;
2363 		vm_map_unlock_read(old_map);
2364 		goto RetryLookup;
2365 	}
2366 
2367 	/*
2368 	 * Check whether this task is allowed to have this page.
2369 	 * Note the special case for MAP_ENTRY_COW
2370 	 * pages with an override.  This is to implement a forced
2371 	 * COW for debuggers.
2372 	 */
2373 
2374 	prot = entry->protection;
2375 	if ((fault_type & VM_PROT_OVERRIDE_WRITE) == 0 ||
2376 		(entry->eflags & MAP_ENTRY_COW) == 0 ||
2377 		(entry->wired_count != 0)) {
2378 		if ((fault_type & (prot)) != fault_type)
2379 			RETURN(KERN_PROTECTION_FAILURE);
2380 	}
2381 
2382 	/*
2383 	 * If this page is not pageable, we have to get it for all possible
2384 	 * accesses.
2385 	 */
2386 
2387 	*wired = (entry->wired_count != 0);
2388 	if (*wired)
2389 		prot = fault_type = entry->protection;
2390 
2391 	/*
2392 	 * If we don't already have a VM object, track it down.
2393 	 */
2394 
2395 	su = (entry->eflags & MAP_ENTRY_IS_A_MAP) == 0;
2396 	if (su) {
2397 		share_map = map;
2398 		share_offset = vaddr;
2399 	} else {
2400 		vm_map_entry_t share_entry;
2401 
2402 		/*
2403 		 * Compute the sharing map, and offset into it.
2404 		 */
2405 
2406 		share_map = entry->object.share_map;
2407 		share_offset = (vaddr - entry->start) + entry->offset;
2408 
2409 		/*
2410 		 * Look for the backing store object and offset
2411 		 */
2412 
2413 		vm_map_lock_read(share_map);
2414 
2415 		if (!vm_map_lookup_entry(share_map, share_offset,
2416 			&share_entry)) {
2417 			vm_map_unlock_read(share_map);
2418 			RETURN(KERN_INVALID_ADDRESS);
2419 		}
2420 		entry = share_entry;
2421 	}
2422 
2423 	/*
2424 	 * If the entry was copy-on-write, we either ...
2425 	 */
2426 
2427 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2428 		/*
2429 		 * If we want to write the page, we may as well handle that
2430 		 * now since we've got the sharing map locked.
2431 		 *
2432 		 * If we don't need to write the page, we just demote the
2433 		 * permissions allowed.
2434 		 */
2435 
2436 		if (fault_type & VM_PROT_WRITE) {
2437 			/*
2438 			 * Make a new object, and place it in the object
2439 			 * chain.  Note that no new references have appeared
2440 			 * -- one just moved from the share map to the new
2441 			 * object.
2442 			 */
2443 
2444 			if (lockmgr(&share_map->lock, LK_EXCLUPGRADE,
2445 					(void *)0, curproc)) {
2446 
2447 				if (share_map != map)
2448 					vm_map_unlock_read(map);
2449 
2450 				goto RetryLookup;
2451 			}
2452 			vm_object_shadow(
2453 			    &entry->object.vm_object,
2454 			    &entry->offset,
2455 			    OFF_TO_IDX(entry->end - entry->start));
2456 
2457 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2458 
2459 			lockmgr(&share_map->lock, LK_DOWNGRADE,
2460 				(void *)0, curproc);
2461 		} else {
2462 			/*
2463 			 * We're attempting to read a copy-on-write page --
2464 			 * don't allow writes.
2465 			 */
2466 
2467 			prot &= (~VM_PROT_WRITE);
2468 		}
2469 	}
2470 	/*
2471 	 * Create an object if necessary.
2472 	 */
2473 	if (entry->object.vm_object == NULL) {
2474 
2475 		if (lockmgr(&share_map->lock, LK_EXCLUPGRADE,
2476 				(void *)0, curproc)) {
2477 			if (share_map != map)
2478 				vm_map_unlock_read(map);
2479 			goto RetryLookup;
2480 		}
2481 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2482 		    OFF_TO_IDX(entry->end - entry->start));
2483 		entry->offset = 0;
2484 		lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc);
2485 	}
2486 
2487 	if (entry->object.vm_object != NULL)
2488 		default_pager_convert_to_swapq(entry->object.vm_object);
2489 	/*
2490 	 * Return the object/offset from this entry.  If the entry was
2491 	 * copy-on-write or empty, it has been fixed up.
2492 	 */
2493 
2494 	*pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
2495 	*object = entry->object.vm_object;
2496 
2497 	/*
2498 	 * Return whether this is the only map sharing this data.
2499 	 */
2500 
2501 	if (!su) {
2502 		su = (share_map->ref_count == 1);
2503 	}
2504 	*out_prot = prot;
2505 	*single_use = su;
2506 
2507 	return (KERN_SUCCESS);
2508 
2509 #undef	RETURN
2510 }
2511 
2512 /*
2513  *	vm_map_lookup_done:
2514  *
2515  *	Releases locks acquired by a vm_map_lookup
2516  *	(according to the handle returned by that lookup).
2517  */
2518 
2519 void
2520 vm_map_lookup_done(map, entry)
2521 	register vm_map_t map;
2522 	vm_map_entry_t entry;
2523 {
2524 	/*
2525 	 * If this entry references a map, unlock it first.
2526 	 */
2527 
2528 	if (entry->eflags & MAP_ENTRY_IS_A_MAP)
2529 		vm_map_unlock_read(entry->object.share_map);
2530 
2531 	/*
2532 	 * Unlock the main-level map
2533 	 */
2534 
2535 	vm_map_unlock_read(map);
2536 }
2537 
2538 #include "opt_ddb.h"
2539 #ifdef DDB
2540 #include <sys/kernel.h>
2541 
2542 #include <ddb/ddb.h>
2543 
2544 /*
2545  *	vm_map_print:	[ debug ]
2546  */
2547 DB_SHOW_COMMAND(map, vm_map_print)
2548 {
2549 	/* XXX convert args. */
2550 	register vm_map_t map = (vm_map_t)addr;
2551 	boolean_t full = have_addr;
2552 
2553 	register vm_map_entry_t entry;
2554 
2555 	db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
2556 	    (map->is_main_map ? "Task" : "Share"),
2557 	    (int) map, (int) (map->pmap), map->ref_count, map->nentries,
2558 	    map->timestamp);
2559 
2560 	if (!full && db_indent)
2561 		return;
2562 
2563 	db_indent += 2;
2564 	for (entry = map->header.next; entry != &map->header;
2565 	    entry = entry->next) {
2566 		db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
2567 		    (int) entry, (int) entry->start, (int) entry->end);
2568 		if (map->is_main_map) {
2569 			static char *inheritance_name[4] =
2570 			{"share", "copy", "none", "donate_copy"};
2571 
2572 			db_printf("prot=%x/%x/%s, ",
2573 			    entry->protection,
2574 			    entry->max_protection,
2575 			    inheritance_name[entry->inheritance]);
2576 			if (entry->wired_count != 0)
2577 				db_printf("wired, ");
2578 		}
2579 		if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
2580 			db_printf("share=0x%x, offset=0x%x\n",
2581 			    (int) entry->object.share_map,
2582 			    (int) entry->offset);
2583 			if ((entry->prev == &map->header) ||
2584 			    ((entry->prev->eflags & MAP_ENTRY_IS_A_MAP) == 0) ||
2585 			    (entry->prev->object.share_map !=
2586 				entry->object.share_map)) {
2587 				db_indent += 2;
2588 				vm_map_print((int)entry->object.share_map,
2589 					     full, 0, (char *)0);
2590 				db_indent -= 2;
2591 			}
2592 		} else {
2593 			db_printf("object=0x%x, offset=0x%x",
2594 			    (int) entry->object.vm_object,
2595 			    (int) entry->offset);
2596 			if (entry->eflags & MAP_ENTRY_COW)
2597 				db_printf(", copy (%s)",
2598 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
2599 			db_printf("\n");
2600 
2601 			if ((entry->prev == &map->header) ||
2602 			    (entry->prev->eflags & MAP_ENTRY_IS_A_MAP) ||
2603 			    (entry->prev->object.vm_object !=
2604 				entry->object.vm_object)) {
2605 				db_indent += 2;
2606 				vm_object_print((int)entry->object.vm_object,
2607 						full, 0, (char *)0);
2608 				db_indent -= 2;
2609 			}
2610 		}
2611 	}
2612 	db_indent -= 2;
2613 }
2614 #endif /* DDB */
2615