xref: /freebsd/sys/vm/vm_map.c (revision 17d6c636720d00f77e5d098daf4c278f89d84f7b)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/ktr.h>
74 #include <sys/lock.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/vnode.h>
80 #include <sys/resourcevar.h>
81 
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_extern.h>
91 #include <vm/vm_zone.h>
92 #include <vm/swap_pager.h>
93 
94 /*
95  *	Virtual memory maps provide for the mapping, protection,
96  *	and sharing of virtual memory objects.  In addition,
97  *	this module provides for an efficient virtual copy of
98  *	memory from one map to another.
99  *
100  *	Synchronization is required prior to most operations.
101  *
102  *	Maps consist of an ordered doubly-linked list of simple
103  *	entries; a single hint is used to speed up lookups.
104  *
105  *	Since portions of maps are specified by start/end addresses,
106  *	which may not align with existing map entries, all
107  *	routines merely "clip" entries to these start/end values.
108  *	[That is, an entry is split into two, bordering at a
109  *	start or end value.]  Note that these clippings may not
110  *	always be necessary (as the two resulting entries are then
111  *	not changed); however, the clipping is done for convenience.
112  *
113  *	As mentioned above, virtual copy operations are performed
114  *	by copying VM object references from one map to
115  *	another, and then marking both regions as copy-on-write.
116  */
117 
118 /*
119  *	vm_map_startup:
120  *
121  *	Initialize the vm_map module.  Must be called before
122  *	any other vm_map routines.
123  *
124  *	Map and entry structures are allocated from the general
125  *	purpose memory pool with some exceptions:
126  *
127  *	- The kernel map and kmem submap are allocated statically.
128  *	- Kernel map entries are allocated out of a static pool.
129  *
130  *	These restrictions are necessary since malloc() uses the
131  *	maps and requires map entries.
132  */
133 
134 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
135 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
136 static struct vm_object kmapentobj, mapentobj, mapobj;
137 
138 static struct vm_map_entry map_entry_init[MAX_MAPENT];
139 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
140 static struct vm_map map_init[MAX_KMAP];
141 
142 void
143 vm_map_startup(void)
144 {
145 	mapzone = &mapzone_store;
146 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
147 		map_init, MAX_KMAP);
148 	kmapentzone = &kmapentzone_store;
149 	zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
150 		kmap_entry_init, MAX_KMAPENT);
151 	mapentzone = &mapentzone_store;
152 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
153 		map_entry_init, MAX_MAPENT);
154 }
155 
156 /*
157  * Allocate a vmspace structure, including a vm_map and pmap,
158  * and initialize those structures.  The refcnt is set to 1.
159  * The remaining fields must be initialized by the caller.
160  */
161 struct vmspace *
162 vmspace_alloc(min, max)
163 	vm_offset_t min, max;
164 {
165 	struct vmspace *vm;
166 
167 	GIANT_REQUIRED;
168 	vm = zalloc(vmspace_zone);
169 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
170 	vm_map_init(&vm->vm_map, min, max);
171 	pmap_pinit(vmspace_pmap(vm));
172 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
173 	vm->vm_refcnt = 1;
174 	vm->vm_shm = NULL;
175 	return (vm);
176 }
177 
178 void
179 vm_init2(void)
180 {
181 	zinitna(kmapentzone, &kmapentobj,
182 		NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
183 	zinitna(mapentzone, &mapentobj,
184 		NULL, 0, 0, 0, 1);
185 	zinitna(mapzone, &mapobj,
186 		NULL, 0, 0, 0, 1);
187 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
188 	pmap_init2();
189 	vm_object_init2();
190 }
191 
192 void
193 vmspace_free(struct vmspace *vm)
194 {
195 	GIANT_REQUIRED;
196 
197 	if (vm->vm_refcnt == 0)
198 		panic("vmspace_free: attempt to free already freed vmspace");
199 
200 	if (--vm->vm_refcnt == 0) {
201 
202 		CTR1(KTR_VM, "vmspace_free: %p", vm);
203 		/*
204 		 * Lock the map, to wait out all other references to it.
205 		 * Delete all of the mappings and pages they hold, then call
206 		 * the pmap module to reclaim anything left.
207 		 */
208 		vm_map_lock(&vm->vm_map);
209 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
210 		    vm->vm_map.max_offset);
211 		vm_map_unlock(&vm->vm_map);
212 
213 		pmap_release(vmspace_pmap(vm));
214 		vm_map_destroy(&vm->vm_map);
215 		zfree(vmspace_zone, vm);
216 	}
217 }
218 
219 /*
220  * vmspace_swap_count() - count the approximate swap useage in pages for a
221  *			  vmspace.
222  *
223  *	Swap useage is determined by taking the proportional swap used by
224  *	VM objects backing the VM map.  To make up for fractional losses,
225  *	if the VM object has any swap use at all the associated map entries
226  *	count for at least 1 swap page.
227  */
228 int
229 vmspace_swap_count(struct vmspace *vmspace)
230 {
231 	vm_map_t map = &vmspace->vm_map;
232 	vm_map_entry_t cur;
233 	int count = 0;
234 
235 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
236 		vm_object_t object;
237 
238 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
239 		    (object = cur->object.vm_object) != NULL &&
240 		    object->type == OBJT_SWAP
241 		) {
242 			int n = (cur->end - cur->start) / PAGE_SIZE;
243 
244 			if (object->un_pager.swp.swp_bcount) {
245 				count += object->un_pager.swp.swp_bcount *
246 				    SWAP_META_PAGES * n / object->size + 1;
247 			}
248 		}
249 	}
250 	return(count);
251 }
252 
253 u_char
254 vm_map_entry_behavior(struct vm_map_entry *entry)
255 {
256 	return entry->eflags & MAP_ENTRY_BEHAV_MASK;
257 }
258 
259 void
260 vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior)
261 {
262 	entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
263 		(behavior & MAP_ENTRY_BEHAV_MASK);
264 }
265 
266 void
267 vm_map_lock(vm_map_t map)
268 {
269 	vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map);
270 	if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0)
271 		panic("vm_map_lock: failed to get lock");
272 	map->timestamp++;
273 }
274 
275 void
276 vm_map_unlock(vm_map_t map)
277 {
278 	vm_map_printf("locking map LK_RELEASE: %p\n", map);
279 	lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
280 }
281 
282 void
283 vm_map_lock_read(vm_map_t map)
284 {
285 	vm_map_printf("locking map LK_SHARED: %p\n", map);
286 	lockmgr(&(map)->lock, LK_SHARED, NULL, curthread);
287 }
288 
289 void
290 vm_map_unlock_read(vm_map_t map)
291 {
292 	vm_map_printf("locking map LK_RELEASE: %p\n", map);
293 	lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread);
294 }
295 
296 static __inline__ int
297 _vm_map_lock_upgrade(vm_map_t map, struct thread *td) {
298 	int error;
299 
300 	vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map);
301 	error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td);
302 	if (error == 0)
303 		map->timestamp++;
304 	return error;
305 }
306 
307 int
308 vm_map_lock_upgrade(vm_map_t map)
309 {
310     return(_vm_map_lock_upgrade(map, curthread));
311 }
312 
313 void
314 vm_map_lock_downgrade(vm_map_t map)
315 {
316 	vm_map_printf("locking map LK_DOWNGRADE: %p\n", map);
317 	lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread);
318 }
319 
320 void
321 vm_map_set_recursive(vm_map_t map)
322 {
323 	mtx_lock((map)->lock.lk_interlock);
324 	map->lock.lk_flags |= LK_CANRECURSE;
325 	mtx_unlock((map)->lock.lk_interlock);
326 }
327 
328 void
329 vm_map_clear_recursive(vm_map_t map)
330 {
331 	mtx_lock((map)->lock.lk_interlock);
332 	map->lock.lk_flags &= ~LK_CANRECURSE;
333 	mtx_unlock((map)->lock.lk_interlock);
334 }
335 
336 vm_offset_t
337 vm_map_min(vm_map_t map)
338 {
339 	return(map->min_offset);
340 }
341 
342 vm_offset_t
343 vm_map_max(vm_map_t map)
344 {
345 	return(map->max_offset);
346 }
347 
348 struct pmap *
349 vm_map_pmap(vm_map_t map)
350 {
351 	return(map->pmap);
352 }
353 
354 struct pmap *
355 vmspace_pmap(struct vmspace *vmspace)
356 {
357 	return &vmspace->vm_pmap;
358 }
359 
360 long
361 vmspace_resident_count(struct vmspace *vmspace)
362 {
363 	return pmap_resident_count(vmspace_pmap(vmspace));
364 }
365 
366 /*
367  *	vm_map_create:
368  *
369  *	Creates and returns a new empty VM map with
370  *	the given physical map structure, and having
371  *	the given lower and upper address bounds.
372  */
373 vm_map_t
374 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
375 {
376 	vm_map_t result;
377 
378 	GIANT_REQUIRED;
379 
380 	result = zalloc(mapzone);
381 	CTR1(KTR_VM, "vm_map_create: %p", result);
382 	vm_map_init(result, min, max);
383 	result->pmap = pmap;
384 	return (result);
385 }
386 
387 /*
388  * Initialize an existing vm_map structure
389  * such as that in the vmspace structure.
390  * The pmap is set elsewhere.
391  */
392 void
393 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
394 {
395 	GIANT_REQUIRED;
396 
397 	map->header.next = map->header.prev = &map->header;
398 	map->nentries = 0;
399 	map->size = 0;
400 	map->system_map = 0;
401 	map->infork = 0;
402 	map->min_offset = min;
403 	map->max_offset = max;
404 	map->first_free = &map->header;
405 	map->hint = &map->header;
406 	map->timestamp = 0;
407 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
408 }
409 
410 void
411 vm_map_destroy(map)
412 	struct vm_map *map;
413 {
414 	GIANT_REQUIRED;
415 	lockdestroy(&map->lock);
416 }
417 
418 /*
419  *	vm_map_entry_dispose:	[ internal use only ]
420  *
421  *	Inverse of vm_map_entry_create.
422  */
423 static void
424 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
425 {
426 	zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
427 }
428 
429 /*
430  *	vm_map_entry_create:	[ internal use only ]
431  *
432  *	Allocates a VM map entry for insertion.
433  *	No entry fields are filled in.
434  */
435 static vm_map_entry_t
436 vm_map_entry_create(vm_map_t map)
437 {
438 	vm_map_entry_t new_entry;
439 
440 	new_entry = zalloc((map->system_map || !mapentzone) ?
441 		kmapentzone : mapentzone);
442 	if (new_entry == NULL)
443 	    panic("vm_map_entry_create: kernel resources exhausted");
444 	return(new_entry);
445 }
446 
447 /*
448  *	vm_map_entry_{un,}link:
449  *
450  *	Insert/remove entries from maps.
451  */
452 static __inline void
453 vm_map_entry_link(vm_map_t map,
454 		  vm_map_entry_t after_where,
455 		  vm_map_entry_t entry)
456 {
457 
458 	CTR4(KTR_VM,
459 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
460 	    map->nentries, entry, after_where);
461 	map->nentries++;
462 	entry->prev = after_where;
463 	entry->next = after_where->next;
464 	entry->next->prev = entry;
465 	after_where->next = entry;
466 }
467 
468 static __inline void
469 vm_map_entry_unlink(vm_map_t map,
470 		    vm_map_entry_t entry)
471 {
472 	vm_map_entry_t prev = entry->prev;
473 	vm_map_entry_t next = entry->next;
474 
475 	next->prev = prev;
476 	prev->next = next;
477 	map->nentries--;
478 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
479 	    map->nentries, entry);
480 }
481 
482 /*
483  *	SAVE_HINT:
484  *
485  *	Saves the specified entry as the hint for
486  *	future lookups.
487  */
488 #define	SAVE_HINT(map,value) \
489 		(map)->hint = (value);
490 
491 /*
492  *	vm_map_lookup_entry:	[ internal use only ]
493  *
494  *	Finds the map entry containing (or
495  *	immediately preceding) the specified address
496  *	in the given map; the entry is returned
497  *	in the "entry" parameter.  The boolean
498  *	result indicates whether the address is
499  *	actually contained in the map.
500  */
501 boolean_t
502 vm_map_lookup_entry(
503 	vm_map_t map,
504 	vm_offset_t address,
505 	vm_map_entry_t *entry)	/* OUT */
506 {
507 	vm_map_entry_t cur;
508 	vm_map_entry_t last;
509 
510 	GIANT_REQUIRED;
511 	/*
512 	 * Start looking either from the head of the list, or from the hint.
513 	 */
514 
515 	cur = map->hint;
516 
517 	if (cur == &map->header)
518 		cur = cur->next;
519 
520 	if (address >= cur->start) {
521 		/*
522 		 * Go from hint to end of list.
523 		 *
524 		 * But first, make a quick check to see if we are already looking
525 		 * at the entry we want (which is usually the case). Note also
526 		 * that we don't need to save the hint here... it is the same
527 		 * hint (unless we are at the header, in which case the hint
528 		 * didn't buy us anything anyway).
529 		 */
530 		last = &map->header;
531 		if ((cur != last) && (cur->end > address)) {
532 			*entry = cur;
533 			return (TRUE);
534 		}
535 	} else {
536 		/*
537 		 * Go from start to hint, *inclusively*
538 		 */
539 		last = cur->next;
540 		cur = map->header.next;
541 	}
542 
543 	/*
544 	 * Search linearly
545 	 */
546 
547 	while (cur != last) {
548 		if (cur->end > address) {
549 			if (address >= cur->start) {
550 				/*
551 				 * Save this lookup for future hints, and
552 				 * return
553 				 */
554 
555 				*entry = cur;
556 				SAVE_HINT(map, cur);
557 				return (TRUE);
558 			}
559 			break;
560 		}
561 		cur = cur->next;
562 	}
563 	*entry = cur->prev;
564 	SAVE_HINT(map, *entry);
565 	return (FALSE);
566 }
567 
568 /*
569  *	vm_map_insert:
570  *
571  *	Inserts the given whole VM object into the target
572  *	map at the specified address range.  The object's
573  *	size should match that of the address range.
574  *
575  *	Requires that the map be locked, and leaves it so.
576  *
577  *	If object is non-NULL, ref count must be bumped by caller
578  *	prior to making call to account for the new entry.
579  */
580 int
581 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
582 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
583 	      int cow)
584 {
585 	vm_map_entry_t new_entry;
586 	vm_map_entry_t prev_entry;
587 	vm_map_entry_t temp_entry;
588 	vm_eflags_t protoeflags;
589 
590 	GIANT_REQUIRED;
591 
592 	/*
593 	 * Check that the start and end points are not bogus.
594 	 */
595 
596 	if ((start < map->min_offset) || (end > map->max_offset) ||
597 	    (start >= end))
598 		return (KERN_INVALID_ADDRESS);
599 
600 	/*
601 	 * Find the entry prior to the proposed starting address; if it's part
602 	 * of an existing entry, this range is bogus.
603 	 */
604 
605 	if (vm_map_lookup_entry(map, start, &temp_entry))
606 		return (KERN_NO_SPACE);
607 
608 	prev_entry = temp_entry;
609 
610 	/*
611 	 * Assert that the next entry doesn't overlap the end point.
612 	 */
613 
614 	if ((prev_entry->next != &map->header) &&
615 	    (prev_entry->next->start < end))
616 		return (KERN_NO_SPACE);
617 
618 	protoeflags = 0;
619 
620 	if (cow & MAP_COPY_ON_WRITE)
621 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
622 
623 	if (cow & MAP_NOFAULT) {
624 		protoeflags |= MAP_ENTRY_NOFAULT;
625 
626 		KASSERT(object == NULL,
627 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
628 	}
629 	if (cow & MAP_DISABLE_SYNCER)
630 		protoeflags |= MAP_ENTRY_NOSYNC;
631 	if (cow & MAP_DISABLE_COREDUMP)
632 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
633 
634 	if (object) {
635 		/*
636 		 * When object is non-NULL, it could be shared with another
637 		 * process.  We have to set or clear OBJ_ONEMAPPING
638 		 * appropriately.
639 		 */
640 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
641 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
642 		}
643 	}
644 	else if ((prev_entry != &map->header) &&
645 		 (prev_entry->eflags == protoeflags) &&
646 		 (prev_entry->end == start) &&
647 		 (prev_entry->wired_count == 0) &&
648 		 ((prev_entry->object.vm_object == NULL) ||
649 		  vm_object_coalesce(prev_entry->object.vm_object,
650 				     OFF_TO_IDX(prev_entry->offset),
651 				     (vm_size_t)(prev_entry->end - prev_entry->start),
652 				     (vm_size_t)(end - prev_entry->end)))) {
653 		/*
654 		 * We were able to extend the object.  Determine if we
655 		 * can extend the previous map entry to include the
656 		 * new range as well.
657 		 */
658 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
659 		    (prev_entry->protection == prot) &&
660 		    (prev_entry->max_protection == max)) {
661 			map->size += (end - prev_entry->end);
662 			prev_entry->end = end;
663 			vm_map_simplify_entry(map, prev_entry);
664 			return (KERN_SUCCESS);
665 		}
666 
667 		/*
668 		 * If we can extend the object but cannot extend the
669 		 * map entry, we have to create a new map entry.  We
670 		 * must bump the ref count on the extended object to
671 		 * account for it.  object may be NULL.
672 		 */
673 		object = prev_entry->object.vm_object;
674 		offset = prev_entry->offset +
675 			(prev_entry->end - prev_entry->start);
676 		vm_object_reference(object);
677 	}
678 
679 	/*
680 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
681 	 * in things like the buffer map where we manage kva but do not manage
682 	 * backing objects.
683 	 */
684 
685 	/*
686 	 * Create a new entry
687 	 */
688 
689 	new_entry = vm_map_entry_create(map);
690 	new_entry->start = start;
691 	new_entry->end = end;
692 
693 	new_entry->eflags = protoeflags;
694 	new_entry->object.vm_object = object;
695 	new_entry->offset = offset;
696 	new_entry->avail_ssize = 0;
697 
698 	new_entry->inheritance = VM_INHERIT_DEFAULT;
699 	new_entry->protection = prot;
700 	new_entry->max_protection = max;
701 	new_entry->wired_count = 0;
702 
703 	/*
704 	 * Insert the new entry into the list
705 	 */
706 
707 	vm_map_entry_link(map, prev_entry, new_entry);
708 	map->size += new_entry->end - new_entry->start;
709 
710 	/*
711 	 * Update the free space hint
712 	 */
713 	if ((map->first_free == prev_entry) &&
714 	    (prev_entry->end >= new_entry->start)) {
715 		map->first_free = new_entry;
716 	}
717 
718 #if 0
719 	/*
720 	 * Temporarily removed to avoid MAP_STACK panic, due to
721 	 * MAP_STACK being a huge hack.  Will be added back in
722 	 * when MAP_STACK (and the user stack mapping) is fixed.
723 	 */
724 	/*
725 	 * It may be possible to simplify the entry
726 	 */
727 	vm_map_simplify_entry(map, new_entry);
728 #endif
729 
730 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
731 		pmap_object_init_pt(map->pmap, start,
732 				    object, OFF_TO_IDX(offset), end - start,
733 				    cow & MAP_PREFAULT_PARTIAL);
734 	}
735 
736 	return (KERN_SUCCESS);
737 }
738 
739 /*
740  * Find sufficient space for `length' bytes in the given map, starting at
741  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
742  */
743 int
744 vm_map_findspace(
745 	vm_map_t map,
746 	vm_offset_t start,
747 	vm_size_t length,
748 	vm_offset_t *addr)
749 {
750 	vm_map_entry_t entry, next;
751 	vm_offset_t end;
752 
753 	GIANT_REQUIRED;
754 	if (start < map->min_offset)
755 		start = map->min_offset;
756 	if (start > map->max_offset)
757 		return (1);
758 
759 	/*
760 	 * Look for the first possible address; if there's already something
761 	 * at this address, we have to start after it.
762 	 */
763 	if (start == map->min_offset) {
764 		if ((entry = map->first_free) != &map->header)
765 			start = entry->end;
766 	} else {
767 		vm_map_entry_t tmp;
768 
769 		if (vm_map_lookup_entry(map, start, &tmp))
770 			start = tmp->end;
771 		entry = tmp;
772 	}
773 
774 	/*
775 	 * Look through the rest of the map, trying to fit a new region in the
776 	 * gap between existing regions, or after the very last region.
777 	 */
778 	for (;; start = (entry = next)->end) {
779 		/*
780 		 * Find the end of the proposed new region.  Be sure we didn't
781 		 * go beyond the end of the map, or wrap around the address;
782 		 * if so, we lose.  Otherwise, if this is the last entry, or
783 		 * if the proposed new region fits before the next entry, we
784 		 * win.
785 		 */
786 		end = start + length;
787 		if (end > map->max_offset || end < start)
788 			return (1);
789 		next = entry->next;
790 		if (next == &map->header || next->start >= end)
791 			break;
792 	}
793 	SAVE_HINT(map, entry);
794 	*addr = start;
795 	if (map == kernel_map) {
796 		vm_offset_t ksize;
797 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
798 			pmap_growkernel(ksize);
799 		}
800 	}
801 	return (0);
802 }
803 
804 /*
805  *	vm_map_find finds an unallocated region in the target address
806  *	map with the given length.  The search is defined to be
807  *	first-fit from the specified address; the region found is
808  *	returned in the same parameter.
809  *
810  *	If object is non-NULL, ref count must be bumped by caller
811  *	prior to making call to account for the new entry.
812  */
813 int
814 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
815 	    vm_offset_t *addr,	/* IN/OUT */
816 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
817 	    vm_prot_t max, int cow)
818 {
819 	vm_offset_t start;
820 	int result, s = 0;
821 
822 	GIANT_REQUIRED;
823 
824 	start = *addr;
825 
826 	if (map == kmem_map)
827 		s = splvm();
828 
829 	vm_map_lock(map);
830 	if (find_space) {
831 		if (vm_map_findspace(map, start, length, addr)) {
832 			vm_map_unlock(map);
833 			if (map == kmem_map)
834 				splx(s);
835 			return (KERN_NO_SPACE);
836 		}
837 		start = *addr;
838 	}
839 	result = vm_map_insert(map, object, offset,
840 		start, start + length, prot, max, cow);
841 	vm_map_unlock(map);
842 
843 	if (map == kmem_map)
844 		splx(s);
845 
846 	return (result);
847 }
848 
849 /*
850  *	vm_map_simplify_entry:
851  *
852  *	Simplify the given map entry by merging with either neighbor.  This
853  *	routine also has the ability to merge with both neighbors.
854  *
855  *	The map must be locked.
856  *
857  *	This routine guarentees that the passed entry remains valid (though
858  *	possibly extended).  When merging, this routine may delete one or
859  *	both neighbors.
860  */
861 void
862 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
863 {
864 	vm_map_entry_t next, prev;
865 	vm_size_t prevsize, esize;
866 
867 	GIANT_REQUIRED;
868 
869 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
870 		return;
871 
872 	prev = entry->prev;
873 	if (prev != &map->header) {
874 		prevsize = prev->end - prev->start;
875 		if ( (prev->end == entry->start) &&
876 		     (prev->object.vm_object == entry->object.vm_object) &&
877 		     (!prev->object.vm_object ||
878 			(prev->offset + prevsize == entry->offset)) &&
879 		     (prev->eflags == entry->eflags) &&
880 		     (prev->protection == entry->protection) &&
881 		     (prev->max_protection == entry->max_protection) &&
882 		     (prev->inheritance == entry->inheritance) &&
883 		     (prev->wired_count == entry->wired_count)) {
884 			if (map->first_free == prev)
885 				map->first_free = entry;
886 			if (map->hint == prev)
887 				map->hint = entry;
888 			vm_map_entry_unlink(map, prev);
889 			entry->start = prev->start;
890 			entry->offset = prev->offset;
891 			if (prev->object.vm_object)
892 				vm_object_deallocate(prev->object.vm_object);
893 			vm_map_entry_dispose(map, prev);
894 		}
895 	}
896 
897 	next = entry->next;
898 	if (next != &map->header) {
899 		esize = entry->end - entry->start;
900 		if ((entry->end == next->start) &&
901 		    (next->object.vm_object == entry->object.vm_object) &&
902 		     (!entry->object.vm_object ||
903 			(entry->offset + esize == next->offset)) &&
904 		    (next->eflags == entry->eflags) &&
905 		    (next->protection == entry->protection) &&
906 		    (next->max_protection == entry->max_protection) &&
907 		    (next->inheritance == entry->inheritance) &&
908 		    (next->wired_count == entry->wired_count)) {
909 			if (map->first_free == next)
910 				map->first_free = entry;
911 			if (map->hint == next)
912 				map->hint = entry;
913 			vm_map_entry_unlink(map, next);
914 			entry->end = next->end;
915 			if (next->object.vm_object)
916 				vm_object_deallocate(next->object.vm_object);
917 			vm_map_entry_dispose(map, next);
918 	        }
919 	}
920 }
921 /*
922  *	vm_map_clip_start:	[ internal use only ]
923  *
924  *	Asserts that the given entry begins at or after
925  *	the specified address; if necessary,
926  *	it splits the entry into two.
927  */
928 #define vm_map_clip_start(map, entry, startaddr) \
929 { \
930 	if (startaddr > entry->start) \
931 		_vm_map_clip_start(map, entry, startaddr); \
932 }
933 
934 /*
935  *	This routine is called only when it is known that
936  *	the entry must be split.
937  */
938 static void
939 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
940 {
941 	vm_map_entry_t new_entry;
942 
943 	/*
944 	 * Split off the front portion -- note that we must insert the new
945 	 * entry BEFORE this one, so that this entry has the specified
946 	 * starting address.
947 	 */
948 
949 	vm_map_simplify_entry(map, entry);
950 
951 	/*
952 	 * If there is no object backing this entry, we might as well create
953 	 * one now.  If we defer it, an object can get created after the map
954 	 * is clipped, and individual objects will be created for the split-up
955 	 * map.  This is a bit of a hack, but is also about the best place to
956 	 * put this improvement.
957 	 */
958 
959 	if (entry->object.vm_object == NULL && !map->system_map) {
960 		vm_object_t object;
961 		object = vm_object_allocate(OBJT_DEFAULT,
962 				atop(entry->end - entry->start));
963 		entry->object.vm_object = object;
964 		entry->offset = 0;
965 	}
966 
967 	new_entry = vm_map_entry_create(map);
968 	*new_entry = *entry;
969 
970 	new_entry->end = start;
971 	entry->offset += (start - entry->start);
972 	entry->start = start;
973 
974 	vm_map_entry_link(map, entry->prev, new_entry);
975 
976 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
977 		vm_object_reference(new_entry->object.vm_object);
978 	}
979 }
980 
981 /*
982  *	vm_map_clip_end:	[ internal use only ]
983  *
984  *	Asserts that the given entry ends at or before
985  *	the specified address; if necessary,
986  *	it splits the entry into two.
987  */
988 
989 #define vm_map_clip_end(map, entry, endaddr) \
990 { \
991 	if (endaddr < entry->end) \
992 		_vm_map_clip_end(map, entry, endaddr); \
993 }
994 
995 /*
996  *	This routine is called only when it is known that
997  *	the entry must be split.
998  */
999 static void
1000 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
1001 {
1002 	vm_map_entry_t new_entry;
1003 
1004 	/*
1005 	 * If there is no object backing this entry, we might as well create
1006 	 * one now.  If we defer it, an object can get created after the map
1007 	 * is clipped, and individual objects will be created for the split-up
1008 	 * map.  This is a bit of a hack, but is also about the best place to
1009 	 * put this improvement.
1010 	 */
1011 
1012 	if (entry->object.vm_object == NULL && !map->system_map) {
1013 		vm_object_t object;
1014 		object = vm_object_allocate(OBJT_DEFAULT,
1015 				atop(entry->end - entry->start));
1016 		entry->object.vm_object = object;
1017 		entry->offset = 0;
1018 	}
1019 
1020 	/*
1021 	 * Create a new entry and insert it AFTER the specified entry
1022 	 */
1023 
1024 	new_entry = vm_map_entry_create(map);
1025 	*new_entry = *entry;
1026 
1027 	new_entry->start = entry->end = end;
1028 	new_entry->offset += (end - entry->start);
1029 
1030 	vm_map_entry_link(map, entry, new_entry);
1031 
1032 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1033 		vm_object_reference(new_entry->object.vm_object);
1034 	}
1035 }
1036 
1037 /*
1038  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
1039  *
1040  *	Asserts that the starting and ending region
1041  *	addresses fall within the valid range of the map.
1042  */
1043 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
1044 		{					\
1045 		if (start < vm_map_min(map))		\
1046 			start = vm_map_min(map);	\
1047 		if (end > vm_map_max(map))		\
1048 			end = vm_map_max(map);		\
1049 		if (start > end)			\
1050 			start = end;			\
1051 		}
1052 
1053 /*
1054  *	vm_map_submap:		[ kernel use only ]
1055  *
1056  *	Mark the given range as handled by a subordinate map.
1057  *
1058  *	This range must have been created with vm_map_find,
1059  *	and no other operations may have been performed on this
1060  *	range prior to calling vm_map_submap.
1061  *
1062  *	Only a limited number of operations can be performed
1063  *	within this rage after calling vm_map_submap:
1064  *		vm_fault
1065  *	[Don't try vm_map_copy!]
1066  *
1067  *	To remove a submapping, one must first remove the
1068  *	range from the superior map, and then destroy the
1069  *	submap (if desired).  [Better yet, don't try it.]
1070  */
1071 int
1072 vm_map_submap(
1073 	vm_map_t map,
1074 	vm_offset_t start,
1075 	vm_offset_t end,
1076 	vm_map_t submap)
1077 {
1078 	vm_map_entry_t entry;
1079 	int result = KERN_INVALID_ARGUMENT;
1080 
1081 	GIANT_REQUIRED;
1082 
1083 	vm_map_lock(map);
1084 
1085 	VM_MAP_RANGE_CHECK(map, start, end);
1086 
1087 	if (vm_map_lookup_entry(map, start, &entry)) {
1088 		vm_map_clip_start(map, entry, start);
1089 	} else
1090 		entry = entry->next;
1091 
1092 	vm_map_clip_end(map, entry, end);
1093 
1094 	if ((entry->start == start) && (entry->end == end) &&
1095 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1096 	    (entry->object.vm_object == NULL)) {
1097 		entry->object.sub_map = submap;
1098 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1099 		result = KERN_SUCCESS;
1100 	}
1101 	vm_map_unlock(map);
1102 
1103 	return (result);
1104 }
1105 
1106 /*
1107  *	vm_map_protect:
1108  *
1109  *	Sets the protection of the specified address
1110  *	region in the target map.  If "set_max" is
1111  *	specified, the maximum protection is to be set;
1112  *	otherwise, only the current protection is affected.
1113  */
1114 int
1115 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1116 	       vm_prot_t new_prot, boolean_t set_max)
1117 {
1118 	vm_map_entry_t current;
1119 	vm_map_entry_t entry;
1120 
1121 	GIANT_REQUIRED;
1122 	vm_map_lock(map);
1123 
1124 	VM_MAP_RANGE_CHECK(map, start, end);
1125 
1126 	if (vm_map_lookup_entry(map, start, &entry)) {
1127 		vm_map_clip_start(map, entry, start);
1128 	} else {
1129 		entry = entry->next;
1130 	}
1131 
1132 	/*
1133 	 * Make a first pass to check for protection violations.
1134 	 */
1135 
1136 	current = entry;
1137 	while ((current != &map->header) && (current->start < end)) {
1138 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1139 			vm_map_unlock(map);
1140 			return (KERN_INVALID_ARGUMENT);
1141 		}
1142 		if ((new_prot & current->max_protection) != new_prot) {
1143 			vm_map_unlock(map);
1144 			return (KERN_PROTECTION_FAILURE);
1145 		}
1146 		current = current->next;
1147 	}
1148 
1149 	/*
1150 	 * Go back and fix up protections. [Note that clipping is not
1151 	 * necessary the second time.]
1152 	 */
1153 
1154 	current = entry;
1155 
1156 	while ((current != &map->header) && (current->start < end)) {
1157 		vm_prot_t old_prot;
1158 
1159 		vm_map_clip_end(map, current, end);
1160 
1161 		old_prot = current->protection;
1162 		if (set_max)
1163 			current->protection =
1164 			    (current->max_protection = new_prot) &
1165 			    old_prot;
1166 		else
1167 			current->protection = new_prot;
1168 
1169 		/*
1170 		 * Update physical map if necessary. Worry about copy-on-write
1171 		 * here -- CHECK THIS XXX
1172 		 */
1173 
1174 		if (current->protection != old_prot) {
1175 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1176 							VM_PROT_ALL)
1177 
1178 			pmap_protect(map->pmap, current->start,
1179 			    current->end,
1180 			    current->protection & MASK(current));
1181 #undef	MASK
1182 		}
1183 
1184 		vm_map_simplify_entry(map, current);
1185 
1186 		current = current->next;
1187 	}
1188 
1189 	vm_map_unlock(map);
1190 	return (KERN_SUCCESS);
1191 }
1192 
1193 /*
1194  *	vm_map_madvise:
1195  *
1196  * 	This routine traverses a processes map handling the madvise
1197  *	system call.  Advisories are classified as either those effecting
1198  *	the vm_map_entry structure, or those effecting the underlying
1199  *	objects.
1200  */
1201 
1202 int
1203 vm_map_madvise(
1204 	vm_map_t map,
1205 	vm_offset_t start,
1206 	vm_offset_t end,
1207 	int behav)
1208 {
1209 	vm_map_entry_t current, entry;
1210 	int modify_map = 0;
1211 
1212 	GIANT_REQUIRED;
1213 
1214 	/*
1215 	 * Some madvise calls directly modify the vm_map_entry, in which case
1216 	 * we need to use an exclusive lock on the map and we need to perform
1217 	 * various clipping operations.  Otherwise we only need a read-lock
1218 	 * on the map.
1219 	 */
1220 
1221 	switch(behav) {
1222 	case MADV_NORMAL:
1223 	case MADV_SEQUENTIAL:
1224 	case MADV_RANDOM:
1225 	case MADV_NOSYNC:
1226 	case MADV_AUTOSYNC:
1227 	case MADV_NOCORE:
1228 	case MADV_CORE:
1229 		modify_map = 1;
1230 		vm_map_lock(map);
1231 		break;
1232 	case MADV_WILLNEED:
1233 	case MADV_DONTNEED:
1234 	case MADV_FREE:
1235 		vm_map_lock_read(map);
1236 		break;
1237 	default:
1238 		return (KERN_INVALID_ARGUMENT);
1239 	}
1240 
1241 	/*
1242 	 * Locate starting entry and clip if necessary.
1243 	 */
1244 
1245 	VM_MAP_RANGE_CHECK(map, start, end);
1246 
1247 	if (vm_map_lookup_entry(map, start, &entry)) {
1248 		if (modify_map)
1249 			vm_map_clip_start(map, entry, start);
1250 	} else {
1251 		entry = entry->next;
1252 	}
1253 
1254 	if (modify_map) {
1255 		/*
1256 		 * madvise behaviors that are implemented in the vm_map_entry.
1257 		 *
1258 		 * We clip the vm_map_entry so that behavioral changes are
1259 		 * limited to the specified address range.
1260 		 */
1261 		for (current = entry;
1262 		     (current != &map->header) && (current->start < end);
1263 		     current = current->next
1264 		) {
1265 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1266 				continue;
1267 
1268 			vm_map_clip_end(map, current, end);
1269 
1270 			switch (behav) {
1271 			case MADV_NORMAL:
1272 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1273 				break;
1274 			case MADV_SEQUENTIAL:
1275 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1276 				break;
1277 			case MADV_RANDOM:
1278 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1279 				break;
1280 			case MADV_NOSYNC:
1281 				current->eflags |= MAP_ENTRY_NOSYNC;
1282 				break;
1283 			case MADV_AUTOSYNC:
1284 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1285 				break;
1286 			case MADV_NOCORE:
1287 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1288 				break;
1289 			case MADV_CORE:
1290 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1291 				break;
1292 			default:
1293 				break;
1294 			}
1295 			vm_map_simplify_entry(map, current);
1296 		}
1297 		vm_map_unlock(map);
1298 	} else {
1299 		vm_pindex_t pindex;
1300 		int count;
1301 
1302 		/*
1303 		 * madvise behaviors that are implemented in the underlying
1304 		 * vm_object.
1305 		 *
1306 		 * Since we don't clip the vm_map_entry, we have to clip
1307 		 * the vm_object pindex and count.
1308 		 */
1309 		for (current = entry;
1310 		     (current != &map->header) && (current->start < end);
1311 		     current = current->next
1312 		) {
1313 			vm_offset_t useStart;
1314 
1315 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1316 				continue;
1317 
1318 			pindex = OFF_TO_IDX(current->offset);
1319 			count = atop(current->end - current->start);
1320 			useStart = current->start;
1321 
1322 			if (current->start < start) {
1323 				pindex += atop(start - current->start);
1324 				count -= atop(start - current->start);
1325 				useStart = start;
1326 			}
1327 			if (current->end > end)
1328 				count -= atop(current->end - end);
1329 
1330 			if (count <= 0)
1331 				continue;
1332 
1333 			vm_object_madvise(current->object.vm_object,
1334 					  pindex, count, behav);
1335 			if (behav == MADV_WILLNEED) {
1336 				pmap_object_init_pt(
1337 				    map->pmap,
1338 				    useStart,
1339 				    current->object.vm_object,
1340 				    pindex,
1341 				    (count << PAGE_SHIFT),
1342 				    MAP_PREFAULT_MADVISE
1343 				);
1344 			}
1345 		}
1346 		vm_map_unlock_read(map);
1347 	}
1348 	return(0);
1349 }
1350 
1351 
1352 /*
1353  *	vm_map_inherit:
1354  *
1355  *	Sets the inheritance of the specified address
1356  *	range in the target map.  Inheritance
1357  *	affects how the map will be shared with
1358  *	child maps at the time of vm_map_fork.
1359  */
1360 int
1361 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1362 	       vm_inherit_t new_inheritance)
1363 {
1364 	vm_map_entry_t entry;
1365 	vm_map_entry_t temp_entry;
1366 
1367 	GIANT_REQUIRED;
1368 
1369 	switch (new_inheritance) {
1370 	case VM_INHERIT_NONE:
1371 	case VM_INHERIT_COPY:
1372 	case VM_INHERIT_SHARE:
1373 		break;
1374 	default:
1375 		return (KERN_INVALID_ARGUMENT);
1376 	}
1377 
1378 	vm_map_lock(map);
1379 
1380 	VM_MAP_RANGE_CHECK(map, start, end);
1381 
1382 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1383 		entry = temp_entry;
1384 		vm_map_clip_start(map, entry, start);
1385 	} else
1386 		entry = temp_entry->next;
1387 
1388 	while ((entry != &map->header) && (entry->start < end)) {
1389 		vm_map_clip_end(map, entry, end);
1390 
1391 		entry->inheritance = new_inheritance;
1392 
1393 		vm_map_simplify_entry(map, entry);
1394 
1395 		entry = entry->next;
1396 	}
1397 
1398 	vm_map_unlock(map);
1399 	return (KERN_SUCCESS);
1400 }
1401 
1402 /*
1403  * Implement the semantics of mlock
1404  */
1405 int
1406 vm_map_user_pageable(
1407 	vm_map_t map,
1408 	vm_offset_t start,
1409 	vm_offset_t end,
1410 	boolean_t new_pageable)
1411 {
1412 	vm_map_entry_t entry;
1413 	vm_map_entry_t start_entry;
1414 	vm_offset_t estart;
1415 	vm_offset_t eend;
1416 	int rv;
1417 
1418 	vm_map_lock(map);
1419 	VM_MAP_RANGE_CHECK(map, start, end);
1420 
1421 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1422 		vm_map_unlock(map);
1423 		return (KERN_INVALID_ADDRESS);
1424 	}
1425 
1426 	if (new_pageable) {
1427 
1428 		entry = start_entry;
1429 		vm_map_clip_start(map, entry, start);
1430 
1431 		/*
1432 		 * Now decrement the wiring count for each region. If a region
1433 		 * becomes completely unwired, unwire its physical pages and
1434 		 * mappings.
1435 		 */
1436 		while ((entry != &map->header) && (entry->start < end)) {
1437 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1438 				vm_map_clip_end(map, entry, end);
1439 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1440 				entry->wired_count--;
1441 				if (entry->wired_count == 0)
1442 					vm_fault_unwire(map, entry->start, entry->end);
1443 			}
1444 			vm_map_simplify_entry(map,entry);
1445 			entry = entry->next;
1446 		}
1447 	} else {
1448 
1449 		entry = start_entry;
1450 
1451 		while ((entry != &map->header) && (entry->start < end)) {
1452 
1453 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1454 				entry = entry->next;
1455 				continue;
1456 			}
1457 
1458 			if (entry->wired_count != 0) {
1459 				entry->wired_count++;
1460 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1461 				entry = entry->next;
1462 				continue;
1463 			}
1464 
1465 			/* Here on entry being newly wired */
1466 
1467 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1468 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1469 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1470 
1471 					vm_object_shadow(&entry->object.vm_object,
1472 					    &entry->offset,
1473 					    atop(entry->end - entry->start));
1474 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1475 
1476 				} else if (entry->object.vm_object == NULL &&
1477 					   !map->system_map) {
1478 
1479 					entry->object.vm_object =
1480 					    vm_object_allocate(OBJT_DEFAULT,
1481 						atop(entry->end - entry->start));
1482 					entry->offset = (vm_offset_t) 0;
1483 
1484 				}
1485 			}
1486 
1487 			vm_map_clip_start(map, entry, start);
1488 			vm_map_clip_end(map, entry, end);
1489 
1490 			entry->wired_count++;
1491 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1492 			estart = entry->start;
1493 			eend = entry->end;
1494 
1495 			/* First we need to allow map modifications */
1496 			vm_map_set_recursive(map);
1497 			vm_map_lock_downgrade(map);
1498 			map->timestamp++;
1499 
1500 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1501 			if (rv) {
1502 
1503 				entry->wired_count--;
1504 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1505 
1506 				vm_map_clear_recursive(map);
1507 				vm_map_unlock(map);
1508 
1509 				/*
1510 				 * At this point, the map is unlocked, and
1511 				 * entry might no longer be valid.  Use copy
1512 				 * of entry start value obtained while entry
1513 				 * was valid.
1514 				 */
1515 				(void) vm_map_user_pageable(map, start, estart,
1516 							    TRUE);
1517 				return rv;
1518 			}
1519 
1520 			vm_map_clear_recursive(map);
1521 			if (vm_map_lock_upgrade(map)) {
1522 				vm_map_lock(map);
1523 				if (vm_map_lookup_entry(map, estart, &entry)
1524 				    == FALSE) {
1525 					vm_map_unlock(map);
1526 					/*
1527 					 * vm_fault_user_wire succeded, thus
1528 					 * the area between start and eend
1529 					 * is wired and has to be unwired
1530 					 * here as part of the cleanup.
1531 					 */
1532 					(void) vm_map_user_pageable(map,
1533 								    start,
1534 								    eend,
1535 								    TRUE);
1536 					return (KERN_INVALID_ADDRESS);
1537 				}
1538 			}
1539 			vm_map_simplify_entry(map,entry);
1540 		}
1541 	}
1542 	map->timestamp++;
1543 	vm_map_unlock(map);
1544 	return KERN_SUCCESS;
1545 }
1546 
1547 /*
1548  *	vm_map_pageable:
1549  *
1550  *	Sets the pageability of the specified address
1551  *	range in the target map.  Regions specified
1552  *	as not pageable require locked-down physical
1553  *	memory and physical page maps.
1554  *
1555  *	The map must not be locked, but a reference
1556  *	must remain to the map throughout the call.
1557  */
1558 int
1559 vm_map_pageable(
1560 	vm_map_t map,
1561 	vm_offset_t start,
1562 	vm_offset_t end,
1563 	boolean_t new_pageable)
1564 {
1565 	vm_map_entry_t entry;
1566 	vm_map_entry_t start_entry;
1567 	vm_offset_t failed = 0;
1568 	int rv;
1569 
1570 	GIANT_REQUIRED;
1571 
1572 	vm_map_lock(map);
1573 
1574 	VM_MAP_RANGE_CHECK(map, start, end);
1575 
1576 	/*
1577 	 * Only one pageability change may take place at one time, since
1578 	 * vm_fault assumes it will be called only once for each
1579 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1580 	 * changing the pageability for the entire region.  We do so before
1581 	 * making any changes.
1582 	 */
1583 
1584 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1585 		vm_map_unlock(map);
1586 		return (KERN_INVALID_ADDRESS);
1587 	}
1588 	entry = start_entry;
1589 
1590 	/*
1591 	 * Actions are rather different for wiring and unwiring, so we have
1592 	 * two separate cases.
1593 	 */
1594 
1595 	if (new_pageable) {
1596 
1597 		vm_map_clip_start(map, entry, start);
1598 
1599 		/*
1600 		 * Unwiring.  First ensure that the range to be unwired is
1601 		 * really wired down and that there are no holes.
1602 		 */
1603 		while ((entry != &map->header) && (entry->start < end)) {
1604 
1605 			if (entry->wired_count == 0 ||
1606 			    (entry->end < end &&
1607 				(entry->next == &map->header ||
1608 				    entry->next->start > entry->end))) {
1609 				vm_map_unlock(map);
1610 				return (KERN_INVALID_ARGUMENT);
1611 			}
1612 			entry = entry->next;
1613 		}
1614 
1615 		/*
1616 		 * Now decrement the wiring count for each region. If a region
1617 		 * becomes completely unwired, unwire its physical pages and
1618 		 * mappings.
1619 		 */
1620 		entry = start_entry;
1621 		while ((entry != &map->header) && (entry->start < end)) {
1622 			vm_map_clip_end(map, entry, end);
1623 
1624 			entry->wired_count--;
1625 			if (entry->wired_count == 0)
1626 				vm_fault_unwire(map, entry->start, entry->end);
1627 
1628 			vm_map_simplify_entry(map, entry);
1629 
1630 			entry = entry->next;
1631 		}
1632 	} else {
1633 		/*
1634 		 * Wiring.  We must do this in two passes:
1635 		 *
1636 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1637 		 * objects that need to be created. Then we clip each map
1638 		 * entry to the region to be wired and increment its wiring
1639 		 * count.  We create objects before clipping the map entries
1640 		 * to avoid object proliferation.
1641 		 *
1642 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1643 		 * fault in the pages for any newly wired area (wired_count is
1644 		 * 1).
1645 		 *
1646 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1647 		 * deadlock with another process that may have faulted on one
1648 		 * of the pages to be wired (it would mark the page busy,
1649 		 * blocking us, then in turn block on the map lock that we
1650 		 * hold).  Because of problems in the recursive lock package,
1651 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1652 		 * any actions that require the write lock must be done
1653 		 * beforehand.  Because we keep the read lock on the map, the
1654 		 * copy-on-write status of the entries we modify here cannot
1655 		 * change.
1656 		 */
1657 
1658 		/*
1659 		 * Pass 1.
1660 		 */
1661 		while ((entry != &map->header) && (entry->start < end)) {
1662 			if (entry->wired_count == 0) {
1663 
1664 				/*
1665 				 * Perform actions of vm_map_lookup that need
1666 				 * the write lock on the map: create a shadow
1667 				 * object for a copy-on-write region, or an
1668 				 * object for a zero-fill region.
1669 				 *
1670 				 * We don't have to do this for entries that
1671 				 * point to sub maps, because we won't
1672 				 * hold the lock on the sub map.
1673 				 */
1674 				if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1675 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1676 					if (copyflag &&
1677 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1678 
1679 						vm_object_shadow(&entry->object.vm_object,
1680 						    &entry->offset,
1681 						    atop(entry->end - entry->start));
1682 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1683 					} else if (entry->object.vm_object == NULL &&
1684 						   !map->system_map) {
1685 						entry->object.vm_object =
1686 						    vm_object_allocate(OBJT_DEFAULT,
1687 							atop(entry->end - entry->start));
1688 						entry->offset = (vm_offset_t) 0;
1689 					}
1690 				}
1691 			}
1692 			vm_map_clip_start(map, entry, start);
1693 			vm_map_clip_end(map, entry, end);
1694 			entry->wired_count++;
1695 
1696 			/*
1697 			 * Check for holes
1698 			 */
1699 			if (entry->end < end &&
1700 			    (entry->next == &map->header ||
1701 				entry->next->start > entry->end)) {
1702 				/*
1703 				 * Found one.  Object creation actions do not
1704 				 * need to be undone, but the wired counts
1705 				 * need to be restored.
1706 				 */
1707 				while (entry != &map->header && entry->end > start) {
1708 					entry->wired_count--;
1709 					entry = entry->prev;
1710 				}
1711 				vm_map_unlock(map);
1712 				return (KERN_INVALID_ARGUMENT);
1713 			}
1714 			entry = entry->next;
1715 		}
1716 
1717 		/*
1718 		 * Pass 2.
1719 		 */
1720 
1721 		/*
1722 		 * HACK HACK HACK HACK
1723 		 *
1724 		 * If we are wiring in the kernel map or a submap of it,
1725 		 * unlock the map to avoid deadlocks.  We trust that the
1726 		 * kernel is well-behaved, and therefore will not do
1727 		 * anything destructive to this region of the map while
1728 		 * we have it unlocked.  We cannot trust user processes
1729 		 * to do the same.
1730 		 *
1731 		 * HACK HACK HACK HACK
1732 		 */
1733 		if (vm_map_pmap(map) == kernel_pmap) {
1734 			vm_map_unlock(map);	/* trust me ... */
1735 		} else {
1736 			vm_map_lock_downgrade(map);
1737 		}
1738 
1739 		rv = 0;
1740 		entry = start_entry;
1741 		while (entry != &map->header && entry->start < end) {
1742 			/*
1743 			 * If vm_fault_wire fails for any page we need to undo
1744 			 * what has been done.  We decrement the wiring count
1745 			 * for those pages which have not yet been wired (now)
1746 			 * and unwire those that have (later).
1747 			 *
1748 			 * XXX this violates the locking protocol on the map,
1749 			 * needs to be fixed.
1750 			 */
1751 			if (rv)
1752 				entry->wired_count--;
1753 			else if (entry->wired_count == 1) {
1754 				rv = vm_fault_wire(map, entry->start, entry->end);
1755 				if (rv) {
1756 					failed = entry->start;
1757 					entry->wired_count--;
1758 				}
1759 			}
1760 			entry = entry->next;
1761 		}
1762 
1763 		if (vm_map_pmap(map) == kernel_pmap) {
1764 			vm_map_lock(map);
1765 		}
1766 		if (rv) {
1767 			vm_map_unlock(map);
1768 			(void) vm_map_pageable(map, start, failed, TRUE);
1769 			return (rv);
1770 		}
1771 		/*
1772 		 * An exclusive lock on the map is needed in order to call
1773 		 * vm_map_simplify_entry().  If the current lock on the map
1774 		 * is only a shared lock, an upgrade is needed.
1775 		 */
1776 		if (vm_map_pmap(map) != kernel_pmap &&
1777 		    vm_map_lock_upgrade(map)) {
1778 			vm_map_lock(map);
1779 			if (vm_map_lookup_entry(map, start, &start_entry) ==
1780 			    FALSE) {
1781 				vm_map_unlock(map);
1782 				return KERN_SUCCESS;
1783 			}
1784 		}
1785 		vm_map_simplify_entry(map, start_entry);
1786 	}
1787 
1788 	vm_map_unlock(map);
1789 
1790 	return (KERN_SUCCESS);
1791 }
1792 
1793 /*
1794  * vm_map_clean
1795  *
1796  * Push any dirty cached pages in the address range to their pager.
1797  * If syncio is TRUE, dirty pages are written synchronously.
1798  * If invalidate is TRUE, any cached pages are freed as well.
1799  *
1800  * Returns an error if any part of the specified range is not mapped.
1801  */
1802 int
1803 vm_map_clean(
1804 	vm_map_t map,
1805 	vm_offset_t start,
1806 	vm_offset_t end,
1807 	boolean_t syncio,
1808 	boolean_t invalidate)
1809 {
1810 	vm_map_entry_t current;
1811 	vm_map_entry_t entry;
1812 	vm_size_t size;
1813 	vm_object_t object;
1814 	vm_ooffset_t offset;
1815 
1816 	GIANT_REQUIRED;
1817 
1818 	vm_map_lock_read(map);
1819 	VM_MAP_RANGE_CHECK(map, start, end);
1820 	if (!vm_map_lookup_entry(map, start, &entry)) {
1821 		vm_map_unlock_read(map);
1822 		return (KERN_INVALID_ADDRESS);
1823 	}
1824 	/*
1825 	 * Make a first pass to check for holes.
1826 	 */
1827 	for (current = entry; current->start < end; current = current->next) {
1828 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1829 			vm_map_unlock_read(map);
1830 			return (KERN_INVALID_ARGUMENT);
1831 		}
1832 		if (end > current->end &&
1833 		    (current->next == &map->header ||
1834 			current->end != current->next->start)) {
1835 			vm_map_unlock_read(map);
1836 			return (KERN_INVALID_ADDRESS);
1837 		}
1838 	}
1839 
1840 	if (invalidate)
1841 		pmap_remove(vm_map_pmap(map), start, end);
1842 	/*
1843 	 * Make a second pass, cleaning/uncaching pages from the indicated
1844 	 * objects as we go.
1845 	 */
1846 	for (current = entry; current->start < end; current = current->next) {
1847 		offset = current->offset + (start - current->start);
1848 		size = (end <= current->end ? end : current->end) - start;
1849 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1850 			vm_map_t smap;
1851 			vm_map_entry_t tentry;
1852 			vm_size_t tsize;
1853 
1854 			smap = current->object.sub_map;
1855 			vm_map_lock_read(smap);
1856 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1857 			tsize = tentry->end - offset;
1858 			if (tsize < size)
1859 				size = tsize;
1860 			object = tentry->object.vm_object;
1861 			offset = tentry->offset + (offset - tentry->start);
1862 			vm_map_unlock_read(smap);
1863 		} else {
1864 			object = current->object.vm_object;
1865 		}
1866 		/*
1867 		 * Note that there is absolutely no sense in writing out
1868 		 * anonymous objects, so we track down the vnode object
1869 		 * to write out.
1870 		 * We invalidate (remove) all pages from the address space
1871 		 * anyway, for semantic correctness.
1872 		 */
1873 		while (object->backing_object) {
1874 			object = object->backing_object;
1875 			offset += object->backing_object_offset;
1876 			if (object->size < OFF_TO_IDX( offset + size))
1877 				size = IDX_TO_OFF(object->size) - offset;
1878 		}
1879 		if (object && (object->type == OBJT_VNODE) &&
1880 		    (current->protection & VM_PROT_WRITE)) {
1881 			/*
1882 			 * Flush pages if writing is allowed, invalidate them
1883 			 * if invalidation requested.  Pages undergoing I/O
1884 			 * will be ignored by vm_object_page_remove().
1885 			 *
1886 			 * We cannot lock the vnode and then wait for paging
1887 			 * to complete without deadlocking against vm_fault.
1888 			 * Instead we simply call vm_object_page_remove() and
1889 			 * allow it to block internally on a page-by-page
1890 			 * basis when it encounters pages undergoing async
1891 			 * I/O.
1892 			 */
1893 			int flags;
1894 
1895 			vm_object_reference(object);
1896 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
1897 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1898 			flags |= invalidate ? OBJPC_INVAL : 0;
1899 			vm_object_page_clean(object,
1900 			    OFF_TO_IDX(offset),
1901 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1902 			    flags);
1903 			if (invalidate) {
1904 				/*vm_object_pip_wait(object, "objmcl");*/
1905 				vm_object_page_remove(object,
1906 				    OFF_TO_IDX(offset),
1907 				    OFF_TO_IDX(offset + size + PAGE_MASK),
1908 				    FALSE);
1909 			}
1910 			VOP_UNLOCK(object->handle, 0, curthread);
1911 			vm_object_deallocate(object);
1912 		}
1913 		start += size;
1914 	}
1915 
1916 	vm_map_unlock_read(map);
1917 	return (KERN_SUCCESS);
1918 }
1919 
1920 /*
1921  *	vm_map_entry_unwire:	[ internal use only ]
1922  *
1923  *	Make the region specified by this entry pageable.
1924  *
1925  *	The map in question should be locked.
1926  *	[This is the reason for this routine's existence.]
1927  */
1928 static void
1929 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
1930 {
1931 	vm_fault_unwire(map, entry->start, entry->end);
1932 	entry->wired_count = 0;
1933 }
1934 
1935 /*
1936  *	vm_map_entry_delete:	[ internal use only ]
1937  *
1938  *	Deallocate the given entry from the target map.
1939  */
1940 static void
1941 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
1942 {
1943 	vm_map_entry_unlink(map, entry);
1944 	map->size -= entry->end - entry->start;
1945 
1946 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1947 		vm_object_deallocate(entry->object.vm_object);
1948 	}
1949 
1950 	vm_map_entry_dispose(map, entry);
1951 }
1952 
1953 /*
1954  *	vm_map_delete:	[ internal use only ]
1955  *
1956  *	Deallocates the given address range from the target
1957  *	map.
1958  */
1959 int
1960 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
1961 {
1962 	vm_object_t object;
1963 	vm_map_entry_t entry;
1964 	vm_map_entry_t first_entry;
1965 
1966 	GIANT_REQUIRED;
1967 
1968 	/*
1969 	 * Find the start of the region, and clip it
1970 	 */
1971 
1972 	if (!vm_map_lookup_entry(map, start, &first_entry))
1973 		entry = first_entry->next;
1974 	else {
1975 		entry = first_entry;
1976 		vm_map_clip_start(map, entry, start);
1977 		/*
1978 		 * Fix the lookup hint now, rather than each time though the
1979 		 * loop.
1980 		 */
1981 		SAVE_HINT(map, entry->prev);
1982 	}
1983 
1984 	/*
1985 	 * Save the free space hint
1986 	 */
1987 
1988 	if (entry == &map->header) {
1989 		map->first_free = &map->header;
1990 	} else if (map->first_free->start >= start) {
1991 		map->first_free = entry->prev;
1992 	}
1993 
1994 	/*
1995 	 * Step through all entries in this region
1996 	 */
1997 
1998 	while ((entry != &map->header) && (entry->start < end)) {
1999 		vm_map_entry_t next;
2000 		vm_offset_t s, e;
2001 		vm_pindex_t offidxstart, offidxend, count;
2002 
2003 		vm_map_clip_end(map, entry, end);
2004 
2005 		s = entry->start;
2006 		e = entry->end;
2007 		next = entry->next;
2008 
2009 		offidxstart = OFF_TO_IDX(entry->offset);
2010 		count = OFF_TO_IDX(e - s);
2011 		object = entry->object.vm_object;
2012 
2013 		/*
2014 		 * Unwire before removing addresses from the pmap; otherwise,
2015 		 * unwiring will put the entries back in the pmap.
2016 		 */
2017 		if (entry->wired_count != 0) {
2018 			vm_map_entry_unwire(map, entry);
2019 		}
2020 
2021 		offidxend = offidxstart + count;
2022 
2023 		if ((object == kernel_object) || (object == kmem_object)) {
2024 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2025 		} else {
2026 			pmap_remove(map->pmap, s, e);
2027 			if (object != NULL &&
2028 			    object->ref_count != 1 &&
2029 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
2030 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2031 				vm_object_collapse(object);
2032 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
2033 				if (object->type == OBJT_SWAP) {
2034 					swap_pager_freespace(object, offidxstart, count);
2035 				}
2036 				if (offidxend >= object->size &&
2037 				    offidxstart < object->size) {
2038 					object->size = offidxstart;
2039 				}
2040 			}
2041 		}
2042 
2043 		/*
2044 		 * Delete the entry (which may delete the object) only after
2045 		 * removing all pmap entries pointing to its pages.
2046 		 * (Otherwise, its page frames may be reallocated, and any
2047 		 * modify bits will be set in the wrong object!)
2048 		 */
2049 		vm_map_entry_delete(map, entry);
2050 		entry = next;
2051 	}
2052 	return (KERN_SUCCESS);
2053 }
2054 
2055 /*
2056  *	vm_map_remove:
2057  *
2058  *	Remove the given address range from the target map.
2059  *	This is the exported form of vm_map_delete.
2060  */
2061 int
2062 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
2063 {
2064 	int result, s = 0;
2065 
2066 	GIANT_REQUIRED;
2067 
2068 	if (map == kmem_map)
2069 		s = splvm();
2070 
2071 	vm_map_lock(map);
2072 	VM_MAP_RANGE_CHECK(map, start, end);
2073 	result = vm_map_delete(map, start, end);
2074 	vm_map_unlock(map);
2075 
2076 	if (map == kmem_map)
2077 		splx(s);
2078 
2079 	return (result);
2080 }
2081 
2082 /*
2083  *	vm_map_check_protection:
2084  *
2085  *	Assert that the target map allows the specified
2086  *	privilege on the entire address region given.
2087  *	The entire region must be allocated.
2088  */
2089 boolean_t
2090 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
2091 			vm_prot_t protection)
2092 {
2093 	vm_map_entry_t entry;
2094 	vm_map_entry_t tmp_entry;
2095 
2096 	GIANT_REQUIRED;
2097 
2098 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
2099 		return (FALSE);
2100 	}
2101 	entry = tmp_entry;
2102 
2103 	while (start < end) {
2104 		if (entry == &map->header) {
2105 			return (FALSE);
2106 		}
2107 		/*
2108 		 * No holes allowed!
2109 		 */
2110 
2111 		if (start < entry->start) {
2112 			return (FALSE);
2113 		}
2114 		/*
2115 		 * Check protection associated with entry.
2116 		 */
2117 
2118 		if ((entry->protection & protection) != protection) {
2119 			return (FALSE);
2120 		}
2121 		/* go to next entry */
2122 
2123 		start = entry->end;
2124 		entry = entry->next;
2125 	}
2126 	return (TRUE);
2127 }
2128 
2129 /*
2130  * Split the pages in a map entry into a new object.  This affords
2131  * easier removal of unused pages, and keeps object inheritance from
2132  * being a negative impact on memory usage.
2133  */
2134 static void
2135 vm_map_split(vm_map_entry_t entry)
2136 {
2137 	vm_page_t m;
2138 	vm_object_t orig_object, new_object, source;
2139 	vm_offset_t s, e;
2140 	vm_pindex_t offidxstart, offidxend, idx;
2141 	vm_size_t size;
2142 	vm_ooffset_t offset;
2143 
2144 	GIANT_REQUIRED;
2145 
2146 	orig_object = entry->object.vm_object;
2147 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
2148 		return;
2149 	if (orig_object->ref_count <= 1)
2150 		return;
2151 
2152 	offset = entry->offset;
2153 	s = entry->start;
2154 	e = entry->end;
2155 
2156 	offidxstart = OFF_TO_IDX(offset);
2157 	offidxend = offidxstart + OFF_TO_IDX(e - s);
2158 	size = offidxend - offidxstart;
2159 
2160 	new_object = vm_pager_allocate(orig_object->type,
2161 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
2162 	if (new_object == NULL)
2163 		return;
2164 
2165 	source = orig_object->backing_object;
2166 	if (source != NULL) {
2167 		vm_object_reference(source);	/* Referenced by new_object */
2168 		TAILQ_INSERT_TAIL(&source->shadow_head,
2169 				  new_object, shadow_list);
2170 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
2171 		new_object->backing_object_offset =
2172 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
2173 		new_object->backing_object = source;
2174 		source->shadow_count++;
2175 		source->generation++;
2176 	}
2177 
2178 	for (idx = 0; idx < size; idx++) {
2179 		vm_page_t m;
2180 
2181 	retry:
2182 		m = vm_page_lookup(orig_object, offidxstart + idx);
2183 		if (m == NULL)
2184 			continue;
2185 
2186 		/*
2187 		 * We must wait for pending I/O to complete before we can
2188 		 * rename the page.
2189 		 *
2190 		 * We do not have to VM_PROT_NONE the page as mappings should
2191 		 * not be changed by this operation.
2192 		 */
2193 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2194 			goto retry;
2195 
2196 		vm_page_busy(m);
2197 		vm_page_rename(m, new_object, idx);
2198 		/* page automatically made dirty by rename and cache handled */
2199 		vm_page_busy(m);
2200 	}
2201 
2202 	if (orig_object->type == OBJT_SWAP) {
2203 		vm_object_pip_add(orig_object, 1);
2204 		/*
2205 		 * copy orig_object pages into new_object
2206 		 * and destroy unneeded pages in
2207 		 * shadow object.
2208 		 */
2209 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
2210 		vm_object_pip_wakeup(orig_object);
2211 	}
2212 
2213 	for (idx = 0; idx < size; idx++) {
2214 		m = vm_page_lookup(new_object, idx);
2215 		if (m) {
2216 			vm_page_wakeup(m);
2217 		}
2218 	}
2219 
2220 	entry->object.vm_object = new_object;
2221 	entry->offset = 0LL;
2222 	vm_object_deallocate(orig_object);
2223 }
2224 
2225 /*
2226  *	vm_map_copy_entry:
2227  *
2228  *	Copies the contents of the source entry to the destination
2229  *	entry.  The entries *must* be aligned properly.
2230  */
2231 static void
2232 vm_map_copy_entry(
2233 	vm_map_t src_map,
2234 	vm_map_t dst_map,
2235 	vm_map_entry_t src_entry,
2236 	vm_map_entry_t dst_entry)
2237 {
2238 	vm_object_t src_object;
2239 
2240 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2241 		return;
2242 
2243 	if (src_entry->wired_count == 0) {
2244 
2245 		/*
2246 		 * If the source entry is marked needs_copy, it is already
2247 		 * write-protected.
2248 		 */
2249 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2250 			pmap_protect(src_map->pmap,
2251 			    src_entry->start,
2252 			    src_entry->end,
2253 			    src_entry->protection & ~VM_PROT_WRITE);
2254 		}
2255 
2256 		/*
2257 		 * Make a copy of the object.
2258 		 */
2259 		if ((src_object = src_entry->object.vm_object) != NULL) {
2260 
2261 			if ((src_object->handle == NULL) &&
2262 				(src_object->type == OBJT_DEFAULT ||
2263 				 src_object->type == OBJT_SWAP)) {
2264 				vm_object_collapse(src_object);
2265 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2266 					vm_map_split(src_entry);
2267 					src_object = src_entry->object.vm_object;
2268 				}
2269 			}
2270 
2271 			vm_object_reference(src_object);
2272 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2273 			dst_entry->object.vm_object = src_object;
2274 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2275 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2276 			dst_entry->offset = src_entry->offset;
2277 		} else {
2278 			dst_entry->object.vm_object = NULL;
2279 			dst_entry->offset = 0;
2280 		}
2281 
2282 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2283 		    dst_entry->end - dst_entry->start, src_entry->start);
2284 	} else {
2285 		/*
2286 		 * Of course, wired down pages can't be set copy-on-write.
2287 		 * Cause wired pages to be copied into the new map by
2288 		 * simulating faults (the new pages are pageable)
2289 		 */
2290 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2291 	}
2292 }
2293 
2294 /*
2295  * vmspace_fork:
2296  * Create a new process vmspace structure and vm_map
2297  * based on those of an existing process.  The new map
2298  * is based on the old map, according to the inheritance
2299  * values on the regions in that map.
2300  *
2301  * The source map must not be locked.
2302  */
2303 struct vmspace *
2304 vmspace_fork(struct vmspace *vm1)
2305 {
2306 	struct vmspace *vm2;
2307 	vm_map_t old_map = &vm1->vm_map;
2308 	vm_map_t new_map;
2309 	vm_map_entry_t old_entry;
2310 	vm_map_entry_t new_entry;
2311 	vm_object_t object;
2312 
2313 	GIANT_REQUIRED;
2314 
2315 	vm_map_lock(old_map);
2316 	old_map->infork = 1;
2317 
2318 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2319 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2320 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2321 	new_map = &vm2->vm_map;	/* XXX */
2322 	new_map->timestamp = 1;
2323 
2324 	old_entry = old_map->header.next;
2325 
2326 	while (old_entry != &old_map->header) {
2327 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2328 			panic("vm_map_fork: encountered a submap");
2329 
2330 		switch (old_entry->inheritance) {
2331 		case VM_INHERIT_NONE:
2332 			break;
2333 
2334 		case VM_INHERIT_SHARE:
2335 			/*
2336 			 * Clone the entry, creating the shared object if necessary.
2337 			 */
2338 			object = old_entry->object.vm_object;
2339 			if (object == NULL) {
2340 				object = vm_object_allocate(OBJT_DEFAULT,
2341 					atop(old_entry->end - old_entry->start));
2342 				old_entry->object.vm_object = object;
2343 				old_entry->offset = (vm_offset_t) 0;
2344 			}
2345 
2346 			/*
2347 			 * Add the reference before calling vm_object_shadow
2348 			 * to insure that a shadow object is created.
2349 			 */
2350 			vm_object_reference(object);
2351 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2352 				vm_object_shadow(&old_entry->object.vm_object,
2353 					&old_entry->offset,
2354 					atop(old_entry->end - old_entry->start));
2355 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2356 				/* Transfer the second reference too. */
2357 				vm_object_reference(
2358 				    old_entry->object.vm_object);
2359 				vm_object_deallocate(object);
2360 				object = old_entry->object.vm_object;
2361 			}
2362 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2363 
2364 			/*
2365 			 * Clone the entry, referencing the shared object.
2366 			 */
2367 			new_entry = vm_map_entry_create(new_map);
2368 			*new_entry = *old_entry;
2369 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2370 			new_entry->wired_count = 0;
2371 
2372 			/*
2373 			 * Insert the entry into the new map -- we know we're
2374 			 * inserting at the end of the new map.
2375 			 */
2376 
2377 			vm_map_entry_link(new_map, new_map->header.prev,
2378 			    new_entry);
2379 
2380 			/*
2381 			 * Update the physical map
2382 			 */
2383 
2384 			pmap_copy(new_map->pmap, old_map->pmap,
2385 			    new_entry->start,
2386 			    (old_entry->end - old_entry->start),
2387 			    old_entry->start);
2388 			break;
2389 
2390 		case VM_INHERIT_COPY:
2391 			/*
2392 			 * Clone the entry and link into the map.
2393 			 */
2394 			new_entry = vm_map_entry_create(new_map);
2395 			*new_entry = *old_entry;
2396 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2397 			new_entry->wired_count = 0;
2398 			new_entry->object.vm_object = NULL;
2399 			vm_map_entry_link(new_map, new_map->header.prev,
2400 			    new_entry);
2401 			vm_map_copy_entry(old_map, new_map, old_entry,
2402 			    new_entry);
2403 			break;
2404 		}
2405 		old_entry = old_entry->next;
2406 	}
2407 
2408 	new_map->size = old_map->size;
2409 	old_map->infork = 0;
2410 	vm_map_unlock(old_map);
2411 
2412 	return (vm2);
2413 }
2414 
2415 int
2416 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2417 	      vm_prot_t prot, vm_prot_t max, int cow)
2418 {
2419 	vm_map_entry_t prev_entry;
2420 	vm_map_entry_t new_stack_entry;
2421 	vm_size_t      init_ssize;
2422 	int            rv;
2423 
2424 	GIANT_REQUIRED;
2425 
2426 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2427 		return (KERN_NO_SPACE);
2428 
2429 	if (max_ssize < sgrowsiz)
2430 		init_ssize = max_ssize;
2431 	else
2432 		init_ssize = sgrowsiz;
2433 
2434 	vm_map_lock(map);
2435 
2436 	/* If addr is already mapped, no go */
2437 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2438 		vm_map_unlock(map);
2439 		return (KERN_NO_SPACE);
2440 	}
2441 
2442 	/* If we can't accomodate max_ssize in the current mapping,
2443 	 * no go.  However, we need to be aware that subsequent user
2444 	 * mappings might map into the space we have reserved for
2445 	 * stack, and currently this space is not protected.
2446 	 *
2447 	 * Hopefully we will at least detect this condition
2448 	 * when we try to grow the stack.
2449 	 */
2450 	if ((prev_entry->next != &map->header) &&
2451 	    (prev_entry->next->start < addrbos + max_ssize)) {
2452 		vm_map_unlock(map);
2453 		return (KERN_NO_SPACE);
2454 	}
2455 
2456 	/* We initially map a stack of only init_ssize.  We will
2457 	 * grow as needed later.  Since this is to be a grow
2458 	 * down stack, we map at the top of the range.
2459 	 *
2460 	 * Note: we would normally expect prot and max to be
2461 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2462 	 * eliminate these as input parameters, and just
2463 	 * pass these values here in the insert call.
2464 	 */
2465 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2466 	                   addrbos + max_ssize, prot, max, cow);
2467 
2468 	/* Now set the avail_ssize amount */
2469 	if (rv == KERN_SUCCESS){
2470 		if (prev_entry != &map->header)
2471 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2472 		new_stack_entry = prev_entry->next;
2473 		if (new_stack_entry->end   != addrbos + max_ssize ||
2474 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2475 			panic ("Bad entry start/end for new stack entry");
2476 		else
2477 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2478 	}
2479 
2480 	vm_map_unlock(map);
2481 	return (rv);
2482 }
2483 
2484 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2485  * desired address is already mapped, or if we successfully grow
2486  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2487  * stack range (this is strange, but preserves compatibility with
2488  * the grow function in vm_machdep.c).
2489  */
2490 int
2491 vm_map_growstack (struct proc *p, vm_offset_t addr)
2492 {
2493 	vm_map_entry_t prev_entry;
2494 	vm_map_entry_t stack_entry;
2495 	vm_map_entry_t new_stack_entry;
2496 	struct vmspace *vm = p->p_vmspace;
2497 	vm_map_t map = &vm->vm_map;
2498 	vm_offset_t    end;
2499 	int      grow_amount;
2500 	int      rv;
2501 	int      is_procstack;
2502 
2503 	GIANT_REQUIRED;
2504 
2505 Retry:
2506 	vm_map_lock_read(map);
2507 
2508 	/* If addr is already in the entry range, no need to grow.*/
2509 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2510 		vm_map_unlock_read(map);
2511 		return (KERN_SUCCESS);
2512 	}
2513 
2514 	if ((stack_entry = prev_entry->next) == &map->header) {
2515 		vm_map_unlock_read(map);
2516 		return (KERN_SUCCESS);
2517 	}
2518 	if (prev_entry == &map->header)
2519 		end = stack_entry->start - stack_entry->avail_ssize;
2520 	else
2521 		end = prev_entry->end;
2522 
2523 	/* This next test mimics the old grow function in vm_machdep.c.
2524 	 * It really doesn't quite make sense, but we do it anyway
2525 	 * for compatibility.
2526 	 *
2527 	 * If not growable stack, return success.  This signals the
2528 	 * caller to proceed as he would normally with normal vm.
2529 	 */
2530 	if (stack_entry->avail_ssize < 1 ||
2531 	    addr >= stack_entry->start ||
2532 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2533 		vm_map_unlock_read(map);
2534 		return (KERN_SUCCESS);
2535 	}
2536 
2537 	/* Find the minimum grow amount */
2538 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2539 	if (grow_amount > stack_entry->avail_ssize) {
2540 		vm_map_unlock_read(map);
2541 		return (KERN_NO_SPACE);
2542 	}
2543 
2544 	/* If there is no longer enough space between the entries
2545 	 * nogo, and adjust the available space.  Note: this
2546 	 * should only happen if the user has mapped into the
2547 	 * stack area after the stack was created, and is
2548 	 * probably an error.
2549 	 *
2550 	 * This also effectively destroys any guard page the user
2551 	 * might have intended by limiting the stack size.
2552 	 */
2553 	if (grow_amount > stack_entry->start - end) {
2554 		if (vm_map_lock_upgrade(map))
2555 			goto Retry;
2556 
2557 		stack_entry->avail_ssize = stack_entry->start - end;
2558 
2559 		vm_map_unlock(map);
2560 		return (KERN_NO_SPACE);
2561 	}
2562 
2563 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2564 
2565 	/* If this is the main process stack, see if we're over the
2566 	 * stack limit.
2567 	 */
2568 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2569 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2570 		vm_map_unlock_read(map);
2571 		return (KERN_NO_SPACE);
2572 	}
2573 
2574 	/* Round up the grow amount modulo SGROWSIZ */
2575 	grow_amount = roundup (grow_amount, sgrowsiz);
2576 	if (grow_amount > stack_entry->avail_ssize) {
2577 		grow_amount = stack_entry->avail_ssize;
2578 	}
2579 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2580 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2581 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2582 		              ctob(vm->vm_ssize);
2583 	}
2584 
2585 	if (vm_map_lock_upgrade(map))
2586 		goto Retry;
2587 
2588 	/* Get the preliminary new entry start value */
2589 	addr = stack_entry->start - grow_amount;
2590 
2591 	/* If this puts us into the previous entry, cut back our growth
2592 	 * to the available space.  Also, see the note above.
2593 	 */
2594 	if (addr < end) {
2595 		stack_entry->avail_ssize = stack_entry->start - end;
2596 		addr = end;
2597 	}
2598 
2599 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2600 			   VM_PROT_ALL,
2601 			   VM_PROT_ALL,
2602 			   0);
2603 
2604 	/* Adjust the available stack space by the amount we grew. */
2605 	if (rv == KERN_SUCCESS) {
2606 		if (prev_entry != &map->header)
2607 			vm_map_clip_end(map, prev_entry, addr);
2608 		new_stack_entry = prev_entry->next;
2609 		if (new_stack_entry->end   != stack_entry->start  ||
2610 		    new_stack_entry->start != addr)
2611 			panic ("Bad stack grow start/end in new stack entry");
2612 		else {
2613 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2614 							(new_stack_entry->end -
2615 							 new_stack_entry->start);
2616 			if (is_procstack)
2617 				vm->vm_ssize += btoc(new_stack_entry->end -
2618 						     new_stack_entry->start);
2619 		}
2620 	}
2621 
2622 	vm_map_unlock(map);
2623 	return (rv);
2624 }
2625 
2626 /*
2627  * Unshare the specified VM space for exec.  If other processes are
2628  * mapped to it, then create a new one.  The new vmspace is null.
2629  */
2630 
2631 void
2632 vmspace_exec(struct proc *p)
2633 {
2634 	struct vmspace *oldvmspace = p->p_vmspace;
2635 	struct vmspace *newvmspace;
2636 	vm_map_t map = &p->p_vmspace->vm_map;
2637 
2638 	GIANT_REQUIRED;
2639 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2640 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2641 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2642 	/*
2643 	 * This code is written like this for prototype purposes.  The
2644 	 * goal is to avoid running down the vmspace here, but let the
2645 	 * other process's that are still using the vmspace to finally
2646 	 * run it down.  Even though there is little or no chance of blocking
2647 	 * here, it is a good idea to keep this form for future mods.
2648 	 */
2649 	p->p_vmspace = newvmspace;
2650 	pmap_pinit2(vmspace_pmap(newvmspace));
2651 	vmspace_free(oldvmspace);
2652 	if (p == curthread->td_proc)		/* XXXKSE ? */
2653 		pmap_activate(curthread);
2654 }
2655 
2656 /*
2657  * Unshare the specified VM space for forcing COW.  This
2658  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2659  */
2660 
2661 void
2662 vmspace_unshare(struct proc *p)
2663 {
2664 	struct vmspace *oldvmspace = p->p_vmspace;
2665 	struct vmspace *newvmspace;
2666 
2667 	GIANT_REQUIRED;
2668 	if (oldvmspace->vm_refcnt == 1)
2669 		return;
2670 	newvmspace = vmspace_fork(oldvmspace);
2671 	p->p_vmspace = newvmspace;
2672 	pmap_pinit2(vmspace_pmap(newvmspace));
2673 	vmspace_free(oldvmspace);
2674 	if (p == curthread->td_proc)		/* XXXKSE ? */
2675 		pmap_activate(curthread);
2676 }
2677 
2678 
2679 /*
2680  *	vm_map_lookup:
2681  *
2682  *	Finds the VM object, offset, and
2683  *	protection for a given virtual address in the
2684  *	specified map, assuming a page fault of the
2685  *	type specified.
2686  *
2687  *	Leaves the map in question locked for read; return
2688  *	values are guaranteed until a vm_map_lookup_done
2689  *	call is performed.  Note that the map argument
2690  *	is in/out; the returned map must be used in
2691  *	the call to vm_map_lookup_done.
2692  *
2693  *	A handle (out_entry) is returned for use in
2694  *	vm_map_lookup_done, to make that fast.
2695  *
2696  *	If a lookup is requested with "write protection"
2697  *	specified, the map may be changed to perform virtual
2698  *	copying operations, although the data referenced will
2699  *	remain the same.
2700  */
2701 int
2702 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2703 	      vm_offset_t vaddr,
2704 	      vm_prot_t fault_typea,
2705 	      vm_map_entry_t *out_entry,	/* OUT */
2706 	      vm_object_t *object,		/* OUT */
2707 	      vm_pindex_t *pindex,		/* OUT */
2708 	      vm_prot_t *out_prot,		/* OUT */
2709 	      boolean_t *wired)			/* OUT */
2710 {
2711 	vm_map_entry_t entry;
2712 	vm_map_t map = *var_map;
2713 	vm_prot_t prot;
2714 	vm_prot_t fault_type = fault_typea;
2715 
2716 	GIANT_REQUIRED;
2717 RetryLookup:;
2718 
2719 	/*
2720 	 * Lookup the faulting address.
2721 	 */
2722 
2723 	vm_map_lock_read(map);
2724 
2725 #define	RETURN(why) \
2726 		{ \
2727 		vm_map_unlock_read(map); \
2728 		return(why); \
2729 		}
2730 
2731 	/*
2732 	 * If the map has an interesting hint, try it before calling full
2733 	 * blown lookup routine.
2734 	 */
2735 
2736 	entry = map->hint;
2737 
2738 	*out_entry = entry;
2739 
2740 	if ((entry == &map->header) ||
2741 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2742 		vm_map_entry_t tmp_entry;
2743 
2744 		/*
2745 		 * Entry was either not a valid hint, or the vaddr was not
2746 		 * contained in the entry, so do a full lookup.
2747 		 */
2748 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2749 			RETURN(KERN_INVALID_ADDRESS);
2750 
2751 		entry = tmp_entry;
2752 		*out_entry = entry;
2753 	}
2754 
2755 	/*
2756 	 * Handle submaps.
2757 	 */
2758 
2759 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2760 		vm_map_t old_map = map;
2761 
2762 		*var_map = map = entry->object.sub_map;
2763 		vm_map_unlock_read(old_map);
2764 		goto RetryLookup;
2765 	}
2766 
2767 	/*
2768 	 * Check whether this task is allowed to have this page.
2769 	 * Note the special case for MAP_ENTRY_COW
2770 	 * pages with an override.  This is to implement a forced
2771 	 * COW for debuggers.
2772 	 */
2773 
2774 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2775 		prot = entry->max_protection;
2776 	else
2777 		prot = entry->protection;
2778 
2779 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2780 	if ((fault_type & prot) != fault_type) {
2781 			RETURN(KERN_PROTECTION_FAILURE);
2782 	}
2783 
2784 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2785 	    (entry->eflags & MAP_ENTRY_COW) &&
2786 	    (fault_type & VM_PROT_WRITE) &&
2787 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2788 		RETURN(KERN_PROTECTION_FAILURE);
2789 	}
2790 
2791 	/*
2792 	 * If this page is not pageable, we have to get it for all possible
2793 	 * accesses.
2794 	 */
2795 
2796 	*wired = (entry->wired_count != 0);
2797 	if (*wired)
2798 		prot = fault_type = entry->protection;
2799 
2800 	/*
2801 	 * If the entry was copy-on-write, we either ...
2802 	 */
2803 
2804 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2805 		/*
2806 		 * If we want to write the page, we may as well handle that
2807 		 * now since we've got the map locked.
2808 		 *
2809 		 * If we don't need to write the page, we just demote the
2810 		 * permissions allowed.
2811 		 */
2812 
2813 		if (fault_type & VM_PROT_WRITE) {
2814 			/*
2815 			 * Make a new object, and place it in the object
2816 			 * chain.  Note that no new references have appeared
2817 			 * -- one just moved from the map to the new
2818 			 * object.
2819 			 */
2820 
2821 			if (vm_map_lock_upgrade(map))
2822 				goto RetryLookup;
2823 
2824 			vm_object_shadow(
2825 			    &entry->object.vm_object,
2826 			    &entry->offset,
2827 			    atop(entry->end - entry->start));
2828 
2829 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2830 			vm_map_lock_downgrade(map);
2831 		} else {
2832 			/*
2833 			 * We're attempting to read a copy-on-write page --
2834 			 * don't allow writes.
2835 			 */
2836 
2837 			prot &= ~VM_PROT_WRITE;
2838 		}
2839 	}
2840 
2841 	/*
2842 	 * Create an object if necessary.
2843 	 */
2844 	if (entry->object.vm_object == NULL &&
2845 	    !map->system_map) {
2846 		if (vm_map_lock_upgrade(map))
2847 			goto RetryLookup;
2848 
2849 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2850 		    atop(entry->end - entry->start));
2851 		entry->offset = 0;
2852 		vm_map_lock_downgrade(map);
2853 	}
2854 
2855 	/*
2856 	 * Return the object/offset from this entry.  If the entry was
2857 	 * copy-on-write or empty, it has been fixed up.
2858 	 */
2859 
2860 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2861 	*object = entry->object.vm_object;
2862 
2863 	/*
2864 	 * Return whether this is the only map sharing this data.
2865 	 */
2866 
2867 	*out_prot = prot;
2868 	return (KERN_SUCCESS);
2869 
2870 #undef	RETURN
2871 }
2872 
2873 /*
2874  *	vm_map_lookup_done:
2875  *
2876  *	Releases locks acquired by a vm_map_lookup
2877  *	(according to the handle returned by that lookup).
2878  */
2879 
2880 void
2881 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
2882 {
2883 	/*
2884 	 * Unlock the main-level map
2885 	 */
2886 	GIANT_REQUIRED;
2887 	vm_map_unlock_read(map);
2888 }
2889 
2890 /*
2891  * Implement uiomove with VM operations.  This handles (and collateral changes)
2892  * support every combination of source object modification, and COW type
2893  * operations.
2894  */
2895 int
2896 vm_uiomove(
2897 	vm_map_t mapa,
2898 	vm_object_t srcobject,
2899 	off_t cp,
2900 	int cnta,
2901 	vm_offset_t uaddra,
2902 	int *npages)
2903 {
2904 	vm_map_t map;
2905 	vm_object_t first_object, oldobject, object;
2906 	vm_map_entry_t entry;
2907 	vm_prot_t prot;
2908 	boolean_t wired;
2909 	int tcnt, rv;
2910 	vm_offset_t uaddr, start, end, tend;
2911 	vm_pindex_t first_pindex, osize, oindex;
2912 	off_t ooffset;
2913 	int cnt;
2914 
2915 	GIANT_REQUIRED;
2916 
2917 	if (npages)
2918 		*npages = 0;
2919 
2920 	cnt = cnta;
2921 	uaddr = uaddra;
2922 
2923 	while (cnt > 0) {
2924 		map = mapa;
2925 
2926 		if ((vm_map_lookup(&map, uaddr,
2927 			VM_PROT_READ, &entry, &first_object,
2928 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2929 			return EFAULT;
2930 		}
2931 
2932 		vm_map_clip_start(map, entry, uaddr);
2933 
2934 		tcnt = cnt;
2935 		tend = uaddr + tcnt;
2936 		if (tend > entry->end) {
2937 			tcnt = entry->end - uaddr;
2938 			tend = entry->end;
2939 		}
2940 
2941 		vm_map_clip_end(map, entry, tend);
2942 
2943 		start = entry->start;
2944 		end = entry->end;
2945 
2946 		osize = atop(tcnt);
2947 
2948 		oindex = OFF_TO_IDX(cp);
2949 		if (npages) {
2950 			vm_pindex_t idx;
2951 			for (idx = 0; idx < osize; idx++) {
2952 				vm_page_t m;
2953 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2954 					vm_map_lookup_done(map, entry);
2955 					return 0;
2956 				}
2957 				/*
2958 				 * disallow busy or invalid pages, but allow
2959 				 * m->busy pages if they are entirely valid.
2960 				 */
2961 				if ((m->flags & PG_BUSY) ||
2962 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2963 					vm_map_lookup_done(map, entry);
2964 					return 0;
2965 				}
2966 			}
2967 		}
2968 
2969 /*
2970  * If we are changing an existing map entry, just redirect
2971  * the object, and change mappings.
2972  */
2973 		if ((first_object->type == OBJT_VNODE) &&
2974 			((oldobject = entry->object.vm_object) == first_object)) {
2975 
2976 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2977 				/*
2978    				* Remove old window into the file
2979    				*/
2980 				pmap_remove (map->pmap, uaddr, tend);
2981 
2982 				/*
2983    				* Force copy on write for mmaped regions
2984    				*/
2985 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2986 
2987 				/*
2988    				* Point the object appropriately
2989    				*/
2990 				if (oldobject != srcobject) {
2991 
2992 				/*
2993    				* Set the object optimization hint flag
2994    				*/
2995 					vm_object_set_flag(srcobject, OBJ_OPT);
2996 					vm_object_reference(srcobject);
2997 					entry->object.vm_object = srcobject;
2998 
2999 					if (oldobject) {
3000 						vm_object_deallocate(oldobject);
3001 					}
3002 				}
3003 
3004 				entry->offset = cp;
3005 				map->timestamp++;
3006 			} else {
3007 				pmap_remove (map->pmap, uaddr, tend);
3008 			}
3009 
3010 		} else if ((first_object->ref_count == 1) &&
3011 			(first_object->size == osize) &&
3012 			((first_object->type == OBJT_DEFAULT) ||
3013 				(first_object->type == OBJT_SWAP)) ) {
3014 
3015 			oldobject = first_object->backing_object;
3016 
3017 			if ((first_object->backing_object_offset != cp) ||
3018 				(oldobject != srcobject)) {
3019 				/*
3020    				* Remove old window into the file
3021    				*/
3022 				pmap_remove (map->pmap, uaddr, tend);
3023 
3024 				/*
3025 				 * Remove unneeded old pages
3026 				 */
3027 				vm_object_page_remove(first_object, 0, 0, 0);
3028 
3029 				/*
3030 				 * Invalidate swap space
3031 				 */
3032 				if (first_object->type == OBJT_SWAP) {
3033 					swap_pager_freespace(first_object,
3034 						0,
3035 						first_object->size);
3036 				}
3037 
3038 				/*
3039    				* Force copy on write for mmaped regions
3040    				*/
3041 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3042 
3043 				/*
3044    				* Point the object appropriately
3045    				*/
3046 				if (oldobject != srcobject) {
3047 
3048 				/*
3049    				* Set the object optimization hint flag
3050    				*/
3051 					vm_object_set_flag(srcobject, OBJ_OPT);
3052 					vm_object_reference(srcobject);
3053 
3054 					if (oldobject) {
3055 						TAILQ_REMOVE(&oldobject->shadow_head,
3056 							first_object, shadow_list);
3057 						oldobject->shadow_count--;
3058 						/* XXX bump generation? */
3059 						vm_object_deallocate(oldobject);
3060 					}
3061 
3062 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
3063 						first_object, shadow_list);
3064 					srcobject->shadow_count++;
3065 					/* XXX bump generation? */
3066 
3067 					first_object->backing_object = srcobject;
3068 				}
3069 				first_object->backing_object_offset = cp;
3070 				map->timestamp++;
3071 			} else {
3072 				pmap_remove (map->pmap, uaddr, tend);
3073 			}
3074 /*
3075  * Otherwise, we have to do a logical mmap.
3076  */
3077 		} else {
3078 
3079 			vm_object_set_flag(srcobject, OBJ_OPT);
3080 			vm_object_reference(srcobject);
3081 
3082 			pmap_remove (map->pmap, uaddr, tend);
3083 
3084 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
3085 			vm_map_lock_upgrade(map);
3086 
3087 			if (entry == &map->header) {
3088 				map->first_free = &map->header;
3089 			} else if (map->first_free->start >= start) {
3090 				map->first_free = entry->prev;
3091 			}
3092 
3093 			SAVE_HINT(map, entry->prev);
3094 			vm_map_entry_delete(map, entry);
3095 
3096 			object = srcobject;
3097 			ooffset = cp;
3098 
3099 			rv = vm_map_insert(map, object, ooffset, start, tend,
3100 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
3101 
3102 			if (rv != KERN_SUCCESS)
3103 				panic("vm_uiomove: could not insert new entry: %d", rv);
3104 		}
3105 
3106 /*
3107  * Map the window directly, if it is already in memory
3108  */
3109 		pmap_object_init_pt(map->pmap, uaddr,
3110 			srcobject, oindex, tcnt, 0);
3111 
3112 		map->timestamp++;
3113 		vm_map_unlock(map);
3114 
3115 		cnt -= tcnt;
3116 		uaddr += tcnt;
3117 		cp += tcnt;
3118 		if (npages)
3119 			*npages += osize;
3120 	}
3121 	return 0;
3122 }
3123 
3124 /*
3125  * Performs the copy_on_write operations necessary to allow the virtual copies
3126  * into user space to work.  This has to be called for write(2) system calls
3127  * from other processes, file unlinking, and file size shrinkage.
3128  */
3129 void
3130 vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
3131 {
3132 	int rv;
3133 	vm_object_t robject;
3134 	vm_pindex_t idx;
3135 
3136 	GIANT_REQUIRED;
3137 	if ((object == NULL) ||
3138 		((object->flags & OBJ_OPT) == 0))
3139 		return;
3140 
3141 	if (object->shadow_count > object->ref_count)
3142 		panic("vm_freeze_copyopts: sc > rc");
3143 
3144 	while((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
3145 		vm_pindex_t bo_pindex;
3146 		vm_page_t m_in, m_out;
3147 
3148 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
3149 
3150 		vm_object_reference(robject);
3151 
3152 		vm_object_pip_wait(robject, "objfrz");
3153 
3154 		if (robject->ref_count == 1) {
3155 			vm_object_deallocate(robject);
3156 			continue;
3157 		}
3158 
3159 		vm_object_pip_add(robject, 1);
3160 
3161 		for (idx = 0; idx < robject->size; idx++) {
3162 
3163 			m_out = vm_page_grab(robject, idx,
3164 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3165 
3166 			if (m_out->valid == 0) {
3167 				m_in = vm_page_grab(object, bo_pindex + idx,
3168 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3169 				if (m_in->valid == 0) {
3170 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
3171 					if (rv != VM_PAGER_OK) {
3172 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
3173 						continue;
3174 					}
3175 					vm_page_deactivate(m_in);
3176 				}
3177 
3178 				vm_page_protect(m_in, VM_PROT_NONE);
3179 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
3180 				m_out->valid = m_in->valid;
3181 				vm_page_dirty(m_out);
3182 				vm_page_activate(m_out);
3183 				vm_page_wakeup(m_in);
3184 			}
3185 			vm_page_wakeup(m_out);
3186 		}
3187 
3188 		object->shadow_count--;
3189 		object->ref_count--;
3190 		TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
3191 		robject->backing_object = NULL;
3192 		robject->backing_object_offset = 0;
3193 
3194 		vm_object_pip_wakeup(robject);
3195 		vm_object_deallocate(robject);
3196 	}
3197 
3198 	vm_object_clear_flag(object, OBJ_OPT);
3199 }
3200 
3201 #include "opt_ddb.h"
3202 #ifdef DDB
3203 #include <sys/kernel.h>
3204 
3205 #include <ddb/ddb.h>
3206 
3207 /*
3208  *	vm_map_print:	[ debug ]
3209  */
3210 DB_SHOW_COMMAND(map, vm_map_print)
3211 {
3212 	static int nlines;
3213 	/* XXX convert args. */
3214 	vm_map_t map = (vm_map_t)addr;
3215 	boolean_t full = have_addr;
3216 
3217 	vm_map_entry_t entry;
3218 
3219 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3220 	    (void *)map,
3221 	    (void *)map->pmap, map->nentries, map->timestamp);
3222 	nlines++;
3223 
3224 	if (!full && db_indent)
3225 		return;
3226 
3227 	db_indent += 2;
3228 	for (entry = map->header.next; entry != &map->header;
3229 	    entry = entry->next) {
3230 		db_iprintf("map entry %p: start=%p, end=%p\n",
3231 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3232 		nlines++;
3233 		{
3234 			static char *inheritance_name[4] =
3235 			{"share", "copy", "none", "donate_copy"};
3236 
3237 			db_iprintf(" prot=%x/%x/%s",
3238 			    entry->protection,
3239 			    entry->max_protection,
3240 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3241 			if (entry->wired_count != 0)
3242 				db_printf(", wired");
3243 		}
3244 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3245 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3246 			db_printf(", share=%p, offset=0x%lx\n",
3247 			    (void *)entry->object.sub_map,
3248 			    (long)entry->offset);
3249 			nlines++;
3250 			if ((entry->prev == &map->header) ||
3251 			    (entry->prev->object.sub_map !=
3252 				entry->object.sub_map)) {
3253 				db_indent += 2;
3254 				vm_map_print((db_expr_t)(intptr_t)
3255 					     entry->object.sub_map,
3256 					     full, 0, (char *)0);
3257 				db_indent -= 2;
3258 			}
3259 		} else {
3260 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3261 			db_printf(", object=%p, offset=0x%lx",
3262 			    (void *)entry->object.vm_object,
3263 			    (long)entry->offset);
3264 			if (entry->eflags & MAP_ENTRY_COW)
3265 				db_printf(", copy (%s)",
3266 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3267 			db_printf("\n");
3268 			nlines++;
3269 
3270 			if ((entry->prev == &map->header) ||
3271 			    (entry->prev->object.vm_object !=
3272 				entry->object.vm_object)) {
3273 				db_indent += 2;
3274 				vm_object_print((db_expr_t)(intptr_t)
3275 						entry->object.vm_object,
3276 						full, 0, (char *)0);
3277 				nlines += 4;
3278 				db_indent -= 2;
3279 			}
3280 		}
3281 	}
3282 	db_indent -= 2;
3283 	if (db_indent == 0)
3284 		nlines = 0;
3285 }
3286 
3287 
3288 DB_SHOW_COMMAND(procvm, procvm)
3289 {
3290 	struct proc *p;
3291 
3292 	if (have_addr) {
3293 		p = (struct proc *) addr;
3294 	} else {
3295 		p = curproc;
3296 	}
3297 
3298 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3299 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3300 	    (void *)vmspace_pmap(p->p_vmspace));
3301 
3302 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3303 }
3304 
3305 #endif /* DDB */
3306