xref: /freebsd/sys/vm/vm_map.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
37  *
38  *
39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  *
64  * $FreeBSD$
65  */
66 
67 /*
68  *	Virtual memory mapping module.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/lock.h>
74 #include <sys/mutex.h>
75 #include <sys/proc.h>
76 #include <sys/vmmeter.h>
77 #include <sys/mman.h>
78 #include <sys/vnode.h>
79 #include <sys/resourcevar.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/pmap.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_pager.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_extern.h>
90 #include <vm/vm_zone.h>
91 #include <vm/swap_pager.h>
92 
93 /*
94  *	Virtual memory maps provide for the mapping, protection,
95  *	and sharing of virtual memory objects.  In addition,
96  *	this module provides for an efficient virtual copy of
97  *	memory from one map to another.
98  *
99  *	Synchronization is required prior to most operations.
100  *
101  *	Maps consist of an ordered doubly-linked list of simple
102  *	entries; a single hint is used to speed up lookups.
103  *
104  *	Since portions of maps are specified by start/end addresses,
105  *	which may not align with existing map entries, all
106  *	routines merely "clip" entries to these start/end values.
107  *	[That is, an entry is split into two, bordering at a
108  *	start or end value.]  Note that these clippings may not
109  *	always be necessary (as the two resulting entries are then
110  *	not changed); however, the clipping is done for convenience.
111  *
112  *	As mentioned above, virtual copy operations are performed
113  *	by copying VM object references from one map to
114  *	another, and then marking both regions as copy-on-write.
115  */
116 
117 /*
118  *	vm_map_startup:
119  *
120  *	Initialize the vm_map module.  Must be called before
121  *	any other vm_map routines.
122  *
123  *	Map and entry structures are allocated from the general
124  *	purpose memory pool with some exceptions:
125  *
126  *	- The kernel map and kmem submap are allocated statically.
127  *	- Kernel map entries are allocated out of a static pool.
128  *
129  *	These restrictions are necessary since malloc() uses the
130  *	maps and requires map entries.
131  */
132 
133 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
134 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
135 static struct vm_object kmapentobj, mapentobj, mapobj;
136 
137 static struct vm_map_entry map_entry_init[MAX_MAPENT];
138 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
139 static struct vm_map map_init[MAX_KMAP];
140 
141 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
142 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
143 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
144 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
145 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
146 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
147 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
148 		vm_map_entry_t));
149 static void vm_map_split __P((vm_map_entry_t));
150 
151 void
152 vm_map_startup()
153 {
154 	mapzone = &mapzone_store;
155 	zbootinit(mapzone, "MAP", sizeof (struct vm_map),
156 		map_init, MAX_KMAP);
157 	kmapentzone = &kmapentzone_store;
158 	zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
159 		kmap_entry_init, MAX_KMAPENT);
160 	mapentzone = &mapentzone_store;
161 	zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
162 		map_entry_init, MAX_MAPENT);
163 }
164 
165 /*
166  * Allocate a vmspace structure, including a vm_map and pmap,
167  * and initialize those structures.  The refcnt is set to 1.
168  * The remaining fields must be initialized by the caller.
169  */
170 struct vmspace *
171 vmspace_alloc(min, max)
172 	vm_offset_t min, max;
173 {
174 	struct vmspace *vm;
175 
176 	mtx_assert(&vm_mtx, MA_OWNED);
177 	vm = zalloc(vmspace_zone);
178 	CTR1(KTR_VM, "vmspace_alloc: %p", vm);
179 	vm_map_init(&vm->vm_map, min, max);
180 	pmap_pinit(vmspace_pmap(vm));
181 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
182 	vm->vm_refcnt = 1;
183 	vm->vm_shm = NULL;
184 	return (vm);
185 }
186 
187 void
188 vm_init2(void) {
189 	zinitna(kmapentzone, &kmapentobj,
190 		NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
191 	zinitna(mapentzone, &mapentobj,
192 		NULL, 0, 0, 0, 1);
193 	zinitna(mapzone, &mapobj,
194 		NULL, 0, 0, 0, 1);
195 	vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
196 	pmap_init2();
197 	vm_object_init2();
198 }
199 
200 void
201 vmspace_free(vm)
202 	struct vmspace *vm;
203 {
204 
205 	mtx_assert(&vm_mtx, MA_OWNED);
206 	if (vm->vm_refcnt == 0)
207 		panic("vmspace_free: attempt to free already freed vmspace");
208 
209 	if (--vm->vm_refcnt == 0) {
210 
211 		CTR1(KTR_VM, "vmspace_free: %p", vm);
212 		/*
213 		 * Lock the map, to wait out all other references to it.
214 		 * Delete all of the mappings and pages they hold, then call
215 		 * the pmap module to reclaim anything left.
216 		 */
217 		vm_map_lock(&vm->vm_map);
218 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
219 		    vm->vm_map.max_offset);
220 		vm_map_unlock(&vm->vm_map);
221 
222 		pmap_release(vmspace_pmap(vm));
223 		vm_map_destroy(&vm->vm_map);
224 		zfree(vmspace_zone, vm);
225 	}
226 }
227 
228 /*
229  * vmspace_swap_count() - count the approximate swap useage in pages for a
230  *			  vmspace.
231  *
232  *	Swap useage is determined by taking the proportional swap used by
233  *	VM objects backing the VM map.  To make up for fractional losses,
234  *	if the VM object has any swap use at all the associated map entries
235  *	count for at least 1 swap page.
236  */
237 int
238 vmspace_swap_count(struct vmspace *vmspace)
239 {
240 	vm_map_t map = &vmspace->vm_map;
241 	vm_map_entry_t cur;
242 	int count = 0;
243 
244 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
245 		vm_object_t object;
246 
247 		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
248 		    (object = cur->object.vm_object) != NULL &&
249 		    object->type == OBJT_SWAP
250 		) {
251 			int n = (cur->end - cur->start) / PAGE_SIZE;
252 
253 			if (object->un_pager.swp.swp_bcount) {
254 				count += object->un_pager.swp.swp_bcount *
255 				    SWAP_META_PAGES * n / object->size + 1;
256 			}
257 		}
258 	}
259 	return(count);
260 }
261 
262 /*
263  *	vm_map_create:
264  *
265  *	Creates and returns a new empty VM map with
266  *	the given physical map structure, and having
267  *	the given lower and upper address bounds.
268  */
269 vm_map_t
270 vm_map_create(pmap, min, max)
271 	pmap_t pmap;
272 	vm_offset_t min, max;
273 {
274 	vm_map_t result;
275 
276 	mtx_assert(&vm_mtx, MA_OWNED);
277 	result = zalloc(mapzone);
278 	CTR1(KTR_VM, "vm_map_create: %p", result);
279 	vm_map_init(result, min, max);
280 	result->pmap = pmap;
281 	return (result);
282 }
283 
284 /*
285  * Initialize an existing vm_map structure
286  * such as that in the vmspace structure.
287  * The pmap is set elsewhere.
288  */
289 void
290 vm_map_init(map, min, max)
291 	struct vm_map *map;
292 	vm_offset_t min, max;
293 {
294 
295 	mtx_assert(&vm_mtx, MA_OWNED);
296 	map->header.next = map->header.prev = &map->header;
297 	map->nentries = 0;
298 	map->size = 0;
299 	map->system_map = 0;
300 	map->infork = 0;
301 	map->min_offset = min;
302 	map->max_offset = max;
303 	map->first_free = &map->header;
304 	map->hint = &map->header;
305 	map->timestamp = 0;
306 	lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
307 }
308 
309 void
310 vm_map_destroy(map)
311 	struct vm_map *map;
312 {
313 
314 	mtx_assert(&vm_mtx, MA_OWNED);
315 	lockdestroy(&map->lock);
316 }
317 
318 /*
319  *	vm_map_entry_dispose:	[ internal use only ]
320  *
321  *	Inverse of vm_map_entry_create.
322  */
323 static void
324 vm_map_entry_dispose(map, entry)
325 	vm_map_t map;
326 	vm_map_entry_t entry;
327 {
328 	zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
329 }
330 
331 /*
332  *	vm_map_entry_create:	[ internal use only ]
333  *
334  *	Allocates a VM map entry for insertion.
335  *	No entry fields are filled in.
336  */
337 static vm_map_entry_t
338 vm_map_entry_create(map)
339 	vm_map_t map;
340 {
341 	vm_map_entry_t new_entry;
342 
343 	new_entry = zalloc((map->system_map || !mapentzone) ?
344 		kmapentzone : mapentzone);
345 	if (new_entry == NULL)
346 	    panic("vm_map_entry_create: kernel resources exhausted");
347 	return(new_entry);
348 }
349 
350 /*
351  *	vm_map_entry_{un,}link:
352  *
353  *	Insert/remove entries from maps.
354  */
355 static __inline void
356 vm_map_entry_link(vm_map_t map,
357 		  vm_map_entry_t after_where,
358 		  vm_map_entry_t entry)
359 {
360 
361 	CTR4(KTR_VM,
362 	    "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
363 	    map->nentries, entry, after_where);
364 	map->nentries++;
365 	entry->prev = after_where;
366 	entry->next = after_where->next;
367 	entry->next->prev = entry;
368 	after_where->next = entry;
369 }
370 
371 static __inline void
372 vm_map_entry_unlink(vm_map_t map,
373 		    vm_map_entry_t entry)
374 {
375 	vm_map_entry_t prev = entry->prev;
376 	vm_map_entry_t next = entry->next;
377 
378 	next->prev = prev;
379 	prev->next = next;
380 	map->nentries--;
381 	CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
382 	    map->nentries, entry);
383 }
384 
385 /*
386  *	SAVE_HINT:
387  *
388  *	Saves the specified entry as the hint for
389  *	future lookups.
390  */
391 #define	SAVE_HINT(map,value) \
392 		(map)->hint = (value);
393 
394 /*
395  *	vm_map_lookup_entry:	[ internal use only ]
396  *
397  *	Finds the map entry containing (or
398  *	immediately preceding) the specified address
399  *	in the given map; the entry is returned
400  *	in the "entry" parameter.  The boolean
401  *	result indicates whether the address is
402  *	actually contained in the map.
403  *
404  *	Doesn't block.
405  */
406 boolean_t
407 vm_map_lookup_entry(map, address, entry)
408 	vm_map_t map;
409 	vm_offset_t address;
410 	vm_map_entry_t *entry;	/* OUT */
411 {
412 	vm_map_entry_t cur;
413 	vm_map_entry_t last;
414 
415 	mtx_assert(&vm_mtx, MA_OWNED);
416 	/*
417 	 * Start looking either from the head of the list, or from the hint.
418 	 */
419 
420 	cur = map->hint;
421 
422 	if (cur == &map->header)
423 		cur = cur->next;
424 
425 	if (address >= cur->start) {
426 		/*
427 		 * Go from hint to end of list.
428 		 *
429 		 * But first, make a quick check to see if we are already looking
430 		 * at the entry we want (which is usually the case). Note also
431 		 * that we don't need to save the hint here... it is the same
432 		 * hint (unless we are at the header, in which case the hint
433 		 * didn't buy us anything anyway).
434 		 */
435 		last = &map->header;
436 		if ((cur != last) && (cur->end > address)) {
437 			*entry = cur;
438 			return (TRUE);
439 		}
440 	} else {
441 		/*
442 		 * Go from start to hint, *inclusively*
443 		 */
444 		last = cur->next;
445 		cur = map->header.next;
446 	}
447 
448 	/*
449 	 * Search linearly
450 	 */
451 
452 	while (cur != last) {
453 		if (cur->end > address) {
454 			if (address >= cur->start) {
455 				/*
456 				 * Save this lookup for future hints, and
457 				 * return
458 				 */
459 
460 				*entry = cur;
461 				SAVE_HINT(map, cur);
462 				return (TRUE);
463 			}
464 			break;
465 		}
466 		cur = cur->next;
467 	}
468 	*entry = cur->prev;
469 	SAVE_HINT(map, *entry);
470 	return (FALSE);
471 }
472 
473 /*
474  *	vm_map_insert:
475  *
476  *	Inserts the given whole VM object into the target
477  *	map at the specified address range.  The object's
478  *	size should match that of the address range.
479  *
480  *	Requires that the map be locked, and leaves it so.
481  *
482  *	If object is non-NULL, ref count must be bumped by caller
483  *	prior to making call to account for the new entry.
484  */
485 int
486 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
487 	      vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
488 	      int cow)
489 {
490 	vm_map_entry_t new_entry;
491 	vm_map_entry_t prev_entry;
492 	vm_map_entry_t temp_entry;
493 	vm_eflags_t protoeflags;
494 
495 	mtx_assert(&vm_mtx, MA_OWNED);
496 	/*
497 	 * Check that the start and end points are not bogus.
498 	 */
499 
500 	if ((start < map->min_offset) || (end > map->max_offset) ||
501 	    (start >= end))
502 		return (KERN_INVALID_ADDRESS);
503 
504 	/*
505 	 * Find the entry prior to the proposed starting address; if it's part
506 	 * of an existing entry, this range is bogus.
507 	 */
508 
509 	if (vm_map_lookup_entry(map, start, &temp_entry))
510 		return (KERN_NO_SPACE);
511 
512 	prev_entry = temp_entry;
513 
514 	/*
515 	 * Assert that the next entry doesn't overlap the end point.
516 	 */
517 
518 	if ((prev_entry->next != &map->header) &&
519 	    (prev_entry->next->start < end))
520 		return (KERN_NO_SPACE);
521 
522 	protoeflags = 0;
523 
524 	if (cow & MAP_COPY_ON_WRITE)
525 		protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
526 
527 	if (cow & MAP_NOFAULT) {
528 		protoeflags |= MAP_ENTRY_NOFAULT;
529 
530 		KASSERT(object == NULL,
531 			("vm_map_insert: paradoxical MAP_NOFAULT request"));
532 	}
533 	if (cow & MAP_DISABLE_SYNCER)
534 		protoeflags |= MAP_ENTRY_NOSYNC;
535 	if (cow & MAP_DISABLE_COREDUMP)
536 		protoeflags |= MAP_ENTRY_NOCOREDUMP;
537 
538 	if (object) {
539 		/*
540 		 * When object is non-NULL, it could be shared with another
541 		 * process.  We have to set or clear OBJ_ONEMAPPING
542 		 * appropriately.
543 		 */
544 		if ((object->ref_count > 1) || (object->shadow_count != 0)) {
545 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
546 		}
547 	}
548 	else if ((prev_entry != &map->header) &&
549 		 (prev_entry->eflags == protoeflags) &&
550 		 (prev_entry->end == start) &&
551 		 (prev_entry->wired_count == 0) &&
552 		 ((prev_entry->object.vm_object == NULL) ||
553 		  vm_object_coalesce(prev_entry->object.vm_object,
554 				     OFF_TO_IDX(prev_entry->offset),
555 				     (vm_size_t)(prev_entry->end - prev_entry->start),
556 				     (vm_size_t)(end - prev_entry->end)))) {
557 		/*
558 		 * We were able to extend the object.  Determine if we
559 		 * can extend the previous map entry to include the
560 		 * new range as well.
561 		 */
562 		if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
563 		    (prev_entry->protection == prot) &&
564 		    (prev_entry->max_protection == max)) {
565 			map->size += (end - prev_entry->end);
566 			prev_entry->end = end;
567 			vm_map_simplify_entry(map, prev_entry);
568 			return (KERN_SUCCESS);
569 		}
570 
571 		/*
572 		 * If we can extend the object but cannot extend the
573 		 * map entry, we have to create a new map entry.  We
574 		 * must bump the ref count on the extended object to
575 		 * account for it.  object may be NULL.
576 		 */
577 		object = prev_entry->object.vm_object;
578 		offset = prev_entry->offset +
579 			(prev_entry->end - prev_entry->start);
580 		vm_object_reference(object);
581 	}
582 
583 	/*
584 	 * NOTE: if conditionals fail, object can be NULL here.  This occurs
585 	 * in things like the buffer map where we manage kva but do not manage
586 	 * backing objects.
587 	 */
588 
589 	/*
590 	 * Create a new entry
591 	 */
592 
593 	new_entry = vm_map_entry_create(map);
594 	new_entry->start = start;
595 	new_entry->end = end;
596 
597 	new_entry->eflags = protoeflags;
598 	new_entry->object.vm_object = object;
599 	new_entry->offset = offset;
600 	new_entry->avail_ssize = 0;
601 
602 	new_entry->inheritance = VM_INHERIT_DEFAULT;
603 	new_entry->protection = prot;
604 	new_entry->max_protection = max;
605 	new_entry->wired_count = 0;
606 
607 	/*
608 	 * Insert the new entry into the list
609 	 */
610 
611 	vm_map_entry_link(map, prev_entry, new_entry);
612 	map->size += new_entry->end - new_entry->start;
613 
614 	/*
615 	 * Update the free space hint
616 	 */
617 	if ((map->first_free == prev_entry) &&
618 	    (prev_entry->end >= new_entry->start)) {
619 		map->first_free = new_entry;
620 	}
621 
622 #if 0
623 	/*
624 	 * Temporarily removed to avoid MAP_STACK panic, due to
625 	 * MAP_STACK being a huge hack.  Will be added back in
626 	 * when MAP_STACK (and the user stack mapping) is fixed.
627 	 */
628 	/*
629 	 * It may be possible to simplify the entry
630 	 */
631 	vm_map_simplify_entry(map, new_entry);
632 #endif
633 
634 	if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
635 		pmap_object_init_pt(map->pmap, start,
636 				    object, OFF_TO_IDX(offset), end - start,
637 				    cow & MAP_PREFAULT_PARTIAL);
638 	}
639 
640 	return (KERN_SUCCESS);
641 }
642 
643 /*
644  * Find sufficient space for `length' bytes in the given map, starting at
645  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
646  */
647 int
648 vm_map_findspace(map, start, length, addr)
649 	vm_map_t map;
650 	vm_offset_t start;
651 	vm_size_t length;
652 	vm_offset_t *addr;
653 {
654 	vm_map_entry_t entry, next;
655 	vm_offset_t end;
656 
657 	mtx_assert(&vm_mtx, MA_OWNED);
658 	if (start < map->min_offset)
659 		start = map->min_offset;
660 	if (start > map->max_offset)
661 		return (1);
662 
663 	/*
664 	 * Look for the first possible address; if there's already something
665 	 * at this address, we have to start after it.
666 	 */
667 	if (start == map->min_offset) {
668 		if ((entry = map->first_free) != &map->header)
669 			start = entry->end;
670 	} else {
671 		vm_map_entry_t tmp;
672 
673 		if (vm_map_lookup_entry(map, start, &tmp))
674 			start = tmp->end;
675 		entry = tmp;
676 	}
677 
678 	/*
679 	 * Look through the rest of the map, trying to fit a new region in the
680 	 * gap between existing regions, or after the very last region.
681 	 */
682 	for (;; start = (entry = next)->end) {
683 		/*
684 		 * Find the end of the proposed new region.  Be sure we didn't
685 		 * go beyond the end of the map, or wrap around the address;
686 		 * if so, we lose.  Otherwise, if this is the last entry, or
687 		 * if the proposed new region fits before the next entry, we
688 		 * win.
689 		 */
690 		end = start + length;
691 		if (end > map->max_offset || end < start)
692 			return (1);
693 		next = entry->next;
694 		if (next == &map->header || next->start >= end)
695 			break;
696 	}
697 	SAVE_HINT(map, entry);
698 	*addr = start;
699 	if (map == kernel_map) {
700 		vm_offset_t ksize;
701 		if ((ksize = round_page(start + length)) > kernel_vm_end) {
702 			pmap_growkernel(ksize);
703 		}
704 	}
705 	return (0);
706 }
707 
708 /*
709  *	vm_map_find finds an unallocated region in the target address
710  *	map with the given length.  The search is defined to be
711  *	first-fit from the specified address; the region found is
712  *	returned in the same parameter.
713  *
714  *	If object is non-NULL, ref count must be bumped by caller
715  *	prior to making call to account for the new entry.
716  */
717 int
718 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
719 	    vm_offset_t *addr,	/* IN/OUT */
720 	    vm_size_t length, boolean_t find_space, vm_prot_t prot,
721 	    vm_prot_t max, int cow)
722 {
723 	vm_offset_t start;
724 	int result, s = 0;
725 
726 	mtx_assert(&vm_mtx, MA_OWNED);
727 	start = *addr;
728 
729 	if (map == kmem_map)
730 		s = splvm();
731 
732 	vm_map_lock(map);
733 	if (find_space) {
734 		if (vm_map_findspace(map, start, length, addr)) {
735 			vm_map_unlock(map);
736 			if (map == kmem_map)
737 				splx(s);
738 			return (KERN_NO_SPACE);
739 		}
740 		start = *addr;
741 	}
742 	result = vm_map_insert(map, object, offset,
743 		start, start + length, prot, max, cow);
744 	vm_map_unlock(map);
745 
746 	if (map == kmem_map)
747 		splx(s);
748 
749 	return (result);
750 }
751 
752 /*
753  *	vm_map_simplify_entry:
754  *
755  *	Simplify the given map entry by merging with either neighbor.  This
756  *	routine also has the ability to merge with both neighbors.
757  *
758  *	The map must be locked.
759  *
760  *	This routine guarentees that the passed entry remains valid (though
761  *	possibly extended).  When merging, this routine may delete one or
762  *	both neighbors.
763  */
764 void
765 vm_map_simplify_entry(map, entry)
766 	vm_map_t map;
767 	vm_map_entry_t entry;
768 {
769 	vm_map_entry_t next, prev;
770 	vm_size_t prevsize, esize;
771 
772 	mtx_assert(&vm_mtx, MA_OWNED);
773 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
774 		return;
775 
776 	prev = entry->prev;
777 	if (prev != &map->header) {
778 		prevsize = prev->end - prev->start;
779 		if ( (prev->end == entry->start) &&
780 		     (prev->object.vm_object == entry->object.vm_object) &&
781 		     (!prev->object.vm_object ||
782 			(prev->offset + prevsize == entry->offset)) &&
783 		     (prev->eflags == entry->eflags) &&
784 		     (prev->protection == entry->protection) &&
785 		     (prev->max_protection == entry->max_protection) &&
786 		     (prev->inheritance == entry->inheritance) &&
787 		     (prev->wired_count == entry->wired_count)) {
788 			if (map->first_free == prev)
789 				map->first_free = entry;
790 			if (map->hint == prev)
791 				map->hint = entry;
792 			vm_map_entry_unlink(map, prev);
793 			entry->start = prev->start;
794 			entry->offset = prev->offset;
795 			if (prev->object.vm_object)
796 				vm_object_deallocate(prev->object.vm_object);
797 			vm_map_entry_dispose(map, prev);
798 		}
799 	}
800 
801 	next = entry->next;
802 	if (next != &map->header) {
803 		esize = entry->end - entry->start;
804 		if ((entry->end == next->start) &&
805 		    (next->object.vm_object == entry->object.vm_object) &&
806 		     (!entry->object.vm_object ||
807 			(entry->offset + esize == next->offset)) &&
808 		    (next->eflags == entry->eflags) &&
809 		    (next->protection == entry->protection) &&
810 		    (next->max_protection == entry->max_protection) &&
811 		    (next->inheritance == entry->inheritance) &&
812 		    (next->wired_count == entry->wired_count)) {
813 			if (map->first_free == next)
814 				map->first_free = entry;
815 			if (map->hint == next)
816 				map->hint = entry;
817 			vm_map_entry_unlink(map, next);
818 			entry->end = next->end;
819 			if (next->object.vm_object)
820 				vm_object_deallocate(next->object.vm_object);
821 			vm_map_entry_dispose(map, next);
822 	        }
823 	}
824 }
825 /*
826  *	vm_map_clip_start:	[ internal use only ]
827  *
828  *	Asserts that the given entry begins at or after
829  *	the specified address; if necessary,
830  *	it splits the entry into two.
831  */
832 #define vm_map_clip_start(map, entry, startaddr) \
833 { \
834 	if (startaddr > entry->start) \
835 		_vm_map_clip_start(map, entry, startaddr); \
836 }
837 
838 /*
839  *	This routine is called only when it is known that
840  *	the entry must be split.
841  */
842 static void
843 _vm_map_clip_start(map, entry, start)
844 	vm_map_t map;
845 	vm_map_entry_t entry;
846 	vm_offset_t start;
847 {
848 	vm_map_entry_t new_entry;
849 
850 	/*
851 	 * Split off the front portion -- note that we must insert the new
852 	 * entry BEFORE this one, so that this entry has the specified
853 	 * starting address.
854 	 */
855 
856 	vm_map_simplify_entry(map, entry);
857 
858 	/*
859 	 * If there is no object backing this entry, we might as well create
860 	 * one now.  If we defer it, an object can get created after the map
861 	 * is clipped, and individual objects will be created for the split-up
862 	 * map.  This is a bit of a hack, but is also about the best place to
863 	 * put this improvement.
864 	 */
865 
866 	if (entry->object.vm_object == NULL && !map->system_map) {
867 		vm_object_t object;
868 		object = vm_object_allocate(OBJT_DEFAULT,
869 				atop(entry->end - entry->start));
870 		entry->object.vm_object = object;
871 		entry->offset = 0;
872 	}
873 
874 	new_entry = vm_map_entry_create(map);
875 	*new_entry = *entry;
876 
877 	new_entry->end = start;
878 	entry->offset += (start - entry->start);
879 	entry->start = start;
880 
881 	vm_map_entry_link(map, entry->prev, new_entry);
882 
883 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
884 		vm_object_reference(new_entry->object.vm_object);
885 	}
886 }
887 
888 /*
889  *	vm_map_clip_end:	[ internal use only ]
890  *
891  *	Asserts that the given entry ends at or before
892  *	the specified address; if necessary,
893  *	it splits the entry into two.
894  */
895 
896 #define vm_map_clip_end(map, entry, endaddr) \
897 { \
898 	if (endaddr < entry->end) \
899 		_vm_map_clip_end(map, entry, endaddr); \
900 }
901 
902 /*
903  *	This routine is called only when it is known that
904  *	the entry must be split.
905  */
906 static void
907 _vm_map_clip_end(map, entry, end)
908 	vm_map_t map;
909 	vm_map_entry_t entry;
910 	vm_offset_t end;
911 {
912 	vm_map_entry_t new_entry;
913 
914 	/*
915 	 * If there is no object backing this entry, we might as well create
916 	 * one now.  If we defer it, an object can get created after the map
917 	 * is clipped, and individual objects will be created for the split-up
918 	 * map.  This is a bit of a hack, but is also about the best place to
919 	 * put this improvement.
920 	 */
921 
922 	if (entry->object.vm_object == NULL && !map->system_map) {
923 		vm_object_t object;
924 		object = vm_object_allocate(OBJT_DEFAULT,
925 				atop(entry->end - entry->start));
926 		entry->object.vm_object = object;
927 		entry->offset = 0;
928 	}
929 
930 	/*
931 	 * Create a new entry and insert it AFTER the specified entry
932 	 */
933 
934 	new_entry = vm_map_entry_create(map);
935 	*new_entry = *entry;
936 
937 	new_entry->start = entry->end = end;
938 	new_entry->offset += (end - entry->start);
939 
940 	vm_map_entry_link(map, entry, new_entry);
941 
942 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
943 		vm_object_reference(new_entry->object.vm_object);
944 	}
945 }
946 
947 /*
948  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
949  *
950  *	Asserts that the starting and ending region
951  *	addresses fall within the valid range of the map.
952  */
953 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
954 		{					\
955 		if (start < vm_map_min(map))		\
956 			start = vm_map_min(map);	\
957 		if (end > vm_map_max(map))		\
958 			end = vm_map_max(map);		\
959 		if (start > end)			\
960 			start = end;			\
961 		}
962 
963 /*
964  *	vm_map_submap:		[ kernel use only ]
965  *
966  *	Mark the given range as handled by a subordinate map.
967  *
968  *	This range must have been created with vm_map_find,
969  *	and no other operations may have been performed on this
970  *	range prior to calling vm_map_submap.
971  *
972  *	Only a limited number of operations can be performed
973  *	within this rage after calling vm_map_submap:
974  *		vm_fault
975  *	[Don't try vm_map_copy!]
976  *
977  *	To remove a submapping, one must first remove the
978  *	range from the superior map, and then destroy the
979  *	submap (if desired).  [Better yet, don't try it.]
980  */
981 int
982 vm_map_submap(map, start, end, submap)
983 	vm_map_t map;
984 	vm_offset_t start;
985 	vm_offset_t end;
986 	vm_map_t submap;
987 {
988 	vm_map_entry_t entry;
989 	int result = KERN_INVALID_ARGUMENT;
990 
991 	mtx_assert(&vm_mtx, MA_OWNED);
992 	vm_map_lock(map);
993 
994 	VM_MAP_RANGE_CHECK(map, start, end);
995 
996 	if (vm_map_lookup_entry(map, start, &entry)) {
997 		vm_map_clip_start(map, entry, start);
998 	} else
999 		entry = entry->next;
1000 
1001 	vm_map_clip_end(map, entry, end);
1002 
1003 	if ((entry->start == start) && (entry->end == end) &&
1004 	    ((entry->eflags & MAP_ENTRY_COW) == 0) &&
1005 	    (entry->object.vm_object == NULL)) {
1006 		entry->object.sub_map = submap;
1007 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
1008 		result = KERN_SUCCESS;
1009 	}
1010 	vm_map_unlock(map);
1011 
1012 	return (result);
1013 }
1014 
1015 /*
1016  *	vm_map_protect:
1017  *
1018  *	Sets the protection of the specified address
1019  *	region in the target map.  If "set_max" is
1020  *	specified, the maximum protection is to be set;
1021  *	otherwise, only the current protection is affected.
1022  */
1023 int
1024 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
1025 	       vm_prot_t new_prot, boolean_t set_max)
1026 {
1027 	vm_map_entry_t current;
1028 	vm_map_entry_t entry;
1029 
1030 	mtx_assert(&vm_mtx, MA_OWNED);
1031 	vm_map_lock(map);
1032 
1033 	VM_MAP_RANGE_CHECK(map, start, end);
1034 
1035 	if (vm_map_lookup_entry(map, start, &entry)) {
1036 		vm_map_clip_start(map, entry, start);
1037 	} else {
1038 		entry = entry->next;
1039 	}
1040 
1041 	/*
1042 	 * Make a first pass to check for protection violations.
1043 	 */
1044 
1045 	current = entry;
1046 	while ((current != &map->header) && (current->start < end)) {
1047 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1048 			vm_map_unlock(map);
1049 			return (KERN_INVALID_ARGUMENT);
1050 		}
1051 		if ((new_prot & current->max_protection) != new_prot) {
1052 			vm_map_unlock(map);
1053 			return (KERN_PROTECTION_FAILURE);
1054 		}
1055 		current = current->next;
1056 	}
1057 
1058 	/*
1059 	 * Go back and fix up protections. [Note that clipping is not
1060 	 * necessary the second time.]
1061 	 */
1062 
1063 	current = entry;
1064 
1065 	while ((current != &map->header) && (current->start < end)) {
1066 		vm_prot_t old_prot;
1067 
1068 		vm_map_clip_end(map, current, end);
1069 
1070 		old_prot = current->protection;
1071 		if (set_max)
1072 			current->protection =
1073 			    (current->max_protection = new_prot) &
1074 			    old_prot;
1075 		else
1076 			current->protection = new_prot;
1077 
1078 		/*
1079 		 * Update physical map if necessary. Worry about copy-on-write
1080 		 * here -- CHECK THIS XXX
1081 		 */
1082 
1083 		if (current->protection != old_prot) {
1084 #define MASK(entry)	(((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
1085 							VM_PROT_ALL)
1086 
1087 			pmap_protect(map->pmap, current->start,
1088 			    current->end,
1089 			    current->protection & MASK(current));
1090 #undef	MASK
1091 		}
1092 
1093 		vm_map_simplify_entry(map, current);
1094 
1095 		current = current->next;
1096 	}
1097 
1098 	vm_map_unlock(map);
1099 	return (KERN_SUCCESS);
1100 }
1101 
1102 /*
1103  *	vm_map_madvise:
1104  *
1105  * 	This routine traverses a processes map handling the madvise
1106  *	system call.  Advisories are classified as either those effecting
1107  *	the vm_map_entry structure, or those effecting the underlying
1108  *	objects.
1109  */
1110 
1111 int
1112 vm_map_madvise(map, start, end, behav)
1113 	vm_map_t map;
1114 	vm_offset_t start, end;
1115 	int behav;
1116 {
1117 	vm_map_entry_t current, entry;
1118 	int modify_map = 0;
1119 
1120 	mtx_assert(&vm_mtx, MA_OWNED);
1121 	/*
1122 	 * Some madvise calls directly modify the vm_map_entry, in which case
1123 	 * we need to use an exclusive lock on the map and we need to perform
1124 	 * various clipping operations.  Otherwise we only need a read-lock
1125 	 * on the map.
1126 	 */
1127 
1128 	switch(behav) {
1129 	case MADV_NORMAL:
1130 	case MADV_SEQUENTIAL:
1131 	case MADV_RANDOM:
1132 	case MADV_NOSYNC:
1133 	case MADV_AUTOSYNC:
1134 	case MADV_NOCORE:
1135 	case MADV_CORE:
1136 		modify_map = 1;
1137 		vm_map_lock(map);
1138 		break;
1139 	case MADV_WILLNEED:
1140 	case MADV_DONTNEED:
1141 	case MADV_FREE:
1142 		vm_map_lock_read(map);
1143 		break;
1144 	default:
1145 		return (KERN_INVALID_ARGUMENT);
1146 	}
1147 
1148 	/*
1149 	 * Locate starting entry and clip if necessary.
1150 	 */
1151 
1152 	VM_MAP_RANGE_CHECK(map, start, end);
1153 
1154 	if (vm_map_lookup_entry(map, start, &entry)) {
1155 		if (modify_map)
1156 			vm_map_clip_start(map, entry, start);
1157 	} else {
1158 		entry = entry->next;
1159 	}
1160 
1161 	if (modify_map) {
1162 		/*
1163 		 * madvise behaviors that are implemented in the vm_map_entry.
1164 		 *
1165 		 * We clip the vm_map_entry so that behavioral changes are
1166 		 * limited to the specified address range.
1167 		 */
1168 		for (current = entry;
1169 		     (current != &map->header) && (current->start < end);
1170 		     current = current->next
1171 		) {
1172 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1173 				continue;
1174 
1175 			vm_map_clip_end(map, current, end);
1176 
1177 			switch (behav) {
1178 			case MADV_NORMAL:
1179 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
1180 				break;
1181 			case MADV_SEQUENTIAL:
1182 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
1183 				break;
1184 			case MADV_RANDOM:
1185 				vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
1186 				break;
1187 			case MADV_NOSYNC:
1188 				current->eflags |= MAP_ENTRY_NOSYNC;
1189 				break;
1190 			case MADV_AUTOSYNC:
1191 				current->eflags &= ~MAP_ENTRY_NOSYNC;
1192 				break;
1193 			case MADV_NOCORE:
1194 				current->eflags |= MAP_ENTRY_NOCOREDUMP;
1195 				break;
1196 			case MADV_CORE:
1197 				current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
1198 				break;
1199 			default:
1200 				break;
1201 			}
1202 			vm_map_simplify_entry(map, current);
1203 		}
1204 		vm_map_unlock(map);
1205 	} else {
1206 		vm_pindex_t pindex;
1207 		int count;
1208 
1209 		/*
1210 		 * madvise behaviors that are implemented in the underlying
1211 		 * vm_object.
1212 		 *
1213 		 * Since we don't clip the vm_map_entry, we have to clip
1214 		 * the vm_object pindex and count.
1215 		 */
1216 		for (current = entry;
1217 		     (current != &map->header) && (current->start < end);
1218 		     current = current->next
1219 		) {
1220 			vm_offset_t useStart;
1221 
1222 			if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
1223 				continue;
1224 
1225 			pindex = OFF_TO_IDX(current->offset);
1226 			count = atop(current->end - current->start);
1227 			useStart = current->start;
1228 
1229 			if (current->start < start) {
1230 				pindex += atop(start - current->start);
1231 				count -= atop(start - current->start);
1232 				useStart = start;
1233 			}
1234 			if (current->end > end)
1235 				count -= atop(current->end - end);
1236 
1237 			if (count <= 0)
1238 				continue;
1239 
1240 			vm_object_madvise(current->object.vm_object,
1241 					  pindex, count, behav);
1242 			if (behav == MADV_WILLNEED) {
1243 				pmap_object_init_pt(
1244 				    map->pmap,
1245 				    useStart,
1246 				    current->object.vm_object,
1247 				    pindex,
1248 				    (count << PAGE_SHIFT),
1249 				    0
1250 				);
1251 			}
1252 		}
1253 		vm_map_unlock_read(map);
1254 	}
1255 	return(0);
1256 }
1257 
1258 
1259 /*
1260  *	vm_map_inherit:
1261  *
1262  *	Sets the inheritance of the specified address
1263  *	range in the target map.  Inheritance
1264  *	affects how the map will be shared with
1265  *	child maps at the time of vm_map_fork.
1266  */
1267 int
1268 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
1269 	       vm_inherit_t new_inheritance)
1270 {
1271 	vm_map_entry_t entry;
1272 	vm_map_entry_t temp_entry;
1273 
1274 	mtx_assert(&vm_mtx, MA_OWNED);
1275 	switch (new_inheritance) {
1276 	case VM_INHERIT_NONE:
1277 	case VM_INHERIT_COPY:
1278 	case VM_INHERIT_SHARE:
1279 		break;
1280 	default:
1281 		return (KERN_INVALID_ARGUMENT);
1282 	}
1283 
1284 	vm_map_lock(map);
1285 
1286 	VM_MAP_RANGE_CHECK(map, start, end);
1287 
1288 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
1289 		entry = temp_entry;
1290 		vm_map_clip_start(map, entry, start);
1291 	} else
1292 		entry = temp_entry->next;
1293 
1294 	while ((entry != &map->header) && (entry->start < end)) {
1295 		vm_map_clip_end(map, entry, end);
1296 
1297 		entry->inheritance = new_inheritance;
1298 
1299 		vm_map_simplify_entry(map, entry);
1300 
1301 		entry = entry->next;
1302 	}
1303 
1304 	vm_map_unlock(map);
1305 	return (KERN_SUCCESS);
1306 }
1307 
1308 /*
1309  * Implement the semantics of mlock
1310  */
1311 int
1312 vm_map_user_pageable(map, start, end, new_pageable)
1313 	vm_map_t map;
1314 	vm_offset_t start;
1315 	vm_offset_t end;
1316 	boolean_t new_pageable;
1317 {
1318 	vm_map_entry_t entry;
1319 	vm_map_entry_t start_entry;
1320 	vm_offset_t estart;
1321 	int rv;
1322 
1323 	vm_map_lock(map);
1324 	VM_MAP_RANGE_CHECK(map, start, end);
1325 
1326 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1327 		vm_map_unlock(map);
1328 		return (KERN_INVALID_ADDRESS);
1329 	}
1330 
1331 	if (new_pageable) {
1332 
1333 		entry = start_entry;
1334 		vm_map_clip_start(map, entry, start);
1335 
1336 		/*
1337 		 * Now decrement the wiring count for each region. If a region
1338 		 * becomes completely unwired, unwire its physical pages and
1339 		 * mappings.
1340 		 */
1341 		while ((entry != &map->header) && (entry->start < end)) {
1342 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1343 				vm_map_clip_end(map, entry, end);
1344 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1345 				entry->wired_count--;
1346 				if (entry->wired_count == 0)
1347 					vm_fault_unwire(map, entry->start, entry->end);
1348 			}
1349 			vm_map_simplify_entry(map,entry);
1350 			entry = entry->next;
1351 		}
1352 	} else {
1353 
1354 		entry = start_entry;
1355 
1356 		while ((entry != &map->header) && (entry->start < end)) {
1357 
1358 			if (entry->eflags & MAP_ENTRY_USER_WIRED) {
1359 				entry = entry->next;
1360 				continue;
1361 			}
1362 
1363 			if (entry->wired_count != 0) {
1364 				entry->wired_count++;
1365 				entry->eflags |= MAP_ENTRY_USER_WIRED;
1366 				entry = entry->next;
1367 				continue;
1368 			}
1369 
1370 			/* Here on entry being newly wired */
1371 
1372 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1373 				int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1374 				if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
1375 
1376 					vm_object_shadow(&entry->object.vm_object,
1377 					    &entry->offset,
1378 					    atop(entry->end - entry->start));
1379 					entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1380 
1381 				} else if (entry->object.vm_object == NULL &&
1382 					   !map->system_map) {
1383 
1384 					entry->object.vm_object =
1385 					    vm_object_allocate(OBJT_DEFAULT,
1386 						atop(entry->end - entry->start));
1387 					entry->offset = (vm_offset_t) 0;
1388 
1389 				}
1390 			}
1391 
1392 			vm_map_clip_start(map, entry, start);
1393 			vm_map_clip_end(map, entry, end);
1394 
1395 			entry->wired_count++;
1396 			entry->eflags |= MAP_ENTRY_USER_WIRED;
1397 			estart = entry->start;
1398 
1399 			/* First we need to allow map modifications */
1400 			vm_map_set_recursive(map);
1401 			vm_map_lock_downgrade(map);
1402 			map->timestamp++;
1403 
1404 			rv = vm_fault_user_wire(map, entry->start, entry->end);
1405 			if (rv) {
1406 
1407 				entry->wired_count--;
1408 				entry->eflags &= ~MAP_ENTRY_USER_WIRED;
1409 
1410 				vm_map_clear_recursive(map);
1411 				vm_map_unlock(map);
1412 
1413 				(void) vm_map_user_pageable(map, start, entry->start, TRUE);
1414 				return rv;
1415 			}
1416 
1417 			vm_map_clear_recursive(map);
1418 			if (vm_map_lock_upgrade(map)) {
1419 				vm_map_lock(map);
1420 				if (vm_map_lookup_entry(map, estart, &entry)
1421 				    == FALSE) {
1422 					vm_map_unlock(map);
1423 					(void) vm_map_user_pageable(map,
1424 								    start,
1425 								    estart,
1426 								    TRUE);
1427 					return (KERN_INVALID_ADDRESS);
1428 				}
1429 			}
1430 			vm_map_simplify_entry(map,entry);
1431 		}
1432 	}
1433 	map->timestamp++;
1434 	vm_map_unlock(map);
1435 	return KERN_SUCCESS;
1436 }
1437 
1438 /*
1439  *	vm_map_pageable:
1440  *
1441  *	Sets the pageability of the specified address
1442  *	range in the target map.  Regions specified
1443  *	as not pageable require locked-down physical
1444  *	memory and physical page maps.
1445  *
1446  *	The map must not be locked, but a reference
1447  *	must remain to the map throughout the call.
1448  */
1449 int
1450 vm_map_pageable(map, start, end, new_pageable)
1451 	vm_map_t map;
1452 	vm_offset_t start;
1453 	vm_offset_t end;
1454 	boolean_t new_pageable;
1455 {
1456 	vm_map_entry_t entry;
1457 	vm_map_entry_t start_entry;
1458 	vm_offset_t failed = 0;
1459 	int rv;
1460 
1461 	mtx_assert(&vm_mtx, MA_OWNED);
1462 	vm_map_lock(map);
1463 
1464 	VM_MAP_RANGE_CHECK(map, start, end);
1465 
1466 	/*
1467 	 * Only one pageability change may take place at one time, since
1468 	 * vm_fault assumes it will be called only once for each
1469 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
1470 	 * changing the pageability for the entire region.  We do so before
1471 	 * making any changes.
1472 	 */
1473 
1474 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
1475 		vm_map_unlock(map);
1476 		return (KERN_INVALID_ADDRESS);
1477 	}
1478 	entry = start_entry;
1479 
1480 	/*
1481 	 * Actions are rather different for wiring and unwiring, so we have
1482 	 * two separate cases.
1483 	 */
1484 
1485 	if (new_pageable) {
1486 
1487 		vm_map_clip_start(map, entry, start);
1488 
1489 		/*
1490 		 * Unwiring.  First ensure that the range to be unwired is
1491 		 * really wired down and that there are no holes.
1492 		 */
1493 		while ((entry != &map->header) && (entry->start < end)) {
1494 
1495 			if (entry->wired_count == 0 ||
1496 			    (entry->end < end &&
1497 				(entry->next == &map->header ||
1498 				    entry->next->start > entry->end))) {
1499 				vm_map_unlock(map);
1500 				return (KERN_INVALID_ARGUMENT);
1501 			}
1502 			entry = entry->next;
1503 		}
1504 
1505 		/*
1506 		 * Now decrement the wiring count for each region. If a region
1507 		 * becomes completely unwired, unwire its physical pages and
1508 		 * mappings.
1509 		 */
1510 		entry = start_entry;
1511 		while ((entry != &map->header) && (entry->start < end)) {
1512 			vm_map_clip_end(map, entry, end);
1513 
1514 			entry->wired_count--;
1515 			if (entry->wired_count == 0)
1516 				vm_fault_unwire(map, entry->start, entry->end);
1517 
1518 			vm_map_simplify_entry(map, entry);
1519 
1520 			entry = entry->next;
1521 		}
1522 	} else {
1523 		/*
1524 		 * Wiring.  We must do this in two passes:
1525 		 *
1526 		 * 1.  Holding the write lock, we create any shadow or zero-fill
1527 		 * objects that need to be created. Then we clip each map
1528 		 * entry to the region to be wired and increment its wiring
1529 		 * count.  We create objects before clipping the map entries
1530 		 * to avoid object proliferation.
1531 		 *
1532 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
1533 		 * fault in the pages for any newly wired area (wired_count is
1534 		 * 1).
1535 		 *
1536 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
1537 		 * deadlock with another process that may have faulted on one
1538 		 * of the pages to be wired (it would mark the page busy,
1539 		 * blocking us, then in turn block on the map lock that we
1540 		 * hold).  Because of problems in the recursive lock package,
1541 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
1542 		 * any actions that require the write lock must be done
1543 		 * beforehand.  Because we keep the read lock on the map, the
1544 		 * copy-on-write status of the entries we modify here cannot
1545 		 * change.
1546 		 */
1547 
1548 		/*
1549 		 * Pass 1.
1550 		 */
1551 		while ((entry != &map->header) && (entry->start < end)) {
1552 			if (entry->wired_count == 0) {
1553 
1554 				/*
1555 				 * Perform actions of vm_map_lookup that need
1556 				 * the write lock on the map: create a shadow
1557 				 * object for a copy-on-write region, or an
1558 				 * object for a zero-fill region.
1559 				 *
1560 				 * We don't have to do this for entries that
1561 				 * point to sub maps, because we won't
1562 				 * hold the lock on the sub map.
1563 				 */
1564 				if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1565 					int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
1566 					if (copyflag &&
1567 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
1568 
1569 						vm_object_shadow(&entry->object.vm_object,
1570 						    &entry->offset,
1571 						    atop(entry->end - entry->start));
1572 						entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
1573 					} else if (entry->object.vm_object == NULL &&
1574 						   !map->system_map) {
1575 						entry->object.vm_object =
1576 						    vm_object_allocate(OBJT_DEFAULT,
1577 							atop(entry->end - entry->start));
1578 						entry->offset = (vm_offset_t) 0;
1579 					}
1580 				}
1581 			}
1582 			vm_map_clip_start(map, entry, start);
1583 			vm_map_clip_end(map, entry, end);
1584 			entry->wired_count++;
1585 
1586 			/*
1587 			 * Check for holes
1588 			 */
1589 			if (entry->end < end &&
1590 			    (entry->next == &map->header ||
1591 				entry->next->start > entry->end)) {
1592 				/*
1593 				 * Found one.  Object creation actions do not
1594 				 * need to be undone, but the wired counts
1595 				 * need to be restored.
1596 				 */
1597 				while (entry != &map->header && entry->end > start) {
1598 					entry->wired_count--;
1599 					entry = entry->prev;
1600 				}
1601 				vm_map_unlock(map);
1602 				return (KERN_INVALID_ARGUMENT);
1603 			}
1604 			entry = entry->next;
1605 		}
1606 
1607 		/*
1608 		 * Pass 2.
1609 		 */
1610 
1611 		/*
1612 		 * HACK HACK HACK HACK
1613 		 *
1614 		 * If we are wiring in the kernel map or a submap of it,
1615 		 * unlock the map to avoid deadlocks.  We trust that the
1616 		 * kernel is well-behaved, and therefore will not do
1617 		 * anything destructive to this region of the map while
1618 		 * we have it unlocked.  We cannot trust user processes
1619 		 * to do the same.
1620 		 *
1621 		 * HACK HACK HACK HACK
1622 		 */
1623 		if (vm_map_pmap(map) == kernel_pmap) {
1624 			vm_map_unlock(map);	/* trust me ... */
1625 		} else {
1626 			vm_map_lock_downgrade(map);
1627 		}
1628 
1629 		rv = 0;
1630 		entry = start_entry;
1631 		while (entry != &map->header && entry->start < end) {
1632 			/*
1633 			 * If vm_fault_wire fails for any page we need to undo
1634 			 * what has been done.  We decrement the wiring count
1635 			 * for those pages which have not yet been wired (now)
1636 			 * and unwire those that have (later).
1637 			 *
1638 			 * XXX this violates the locking protocol on the map,
1639 			 * needs to be fixed.
1640 			 */
1641 			if (rv)
1642 				entry->wired_count--;
1643 			else if (entry->wired_count == 1) {
1644 				rv = vm_fault_wire(map, entry->start, entry->end);
1645 				if (rv) {
1646 					failed = entry->start;
1647 					entry->wired_count--;
1648 				}
1649 			}
1650 			entry = entry->next;
1651 		}
1652 
1653 		if (vm_map_pmap(map) == kernel_pmap) {
1654 			vm_map_lock(map);
1655 		}
1656 		if (rv) {
1657 			vm_map_unlock(map);
1658 			(void) vm_map_pageable(map, start, failed, TRUE);
1659 			return (rv);
1660 		}
1661 		vm_map_simplify_entry(map, start_entry);
1662 	}
1663 
1664 	vm_map_unlock(map);
1665 
1666 	return (KERN_SUCCESS);
1667 }
1668 
1669 /*
1670  * vm_map_clean
1671  *
1672  * Push any dirty cached pages in the address range to their pager.
1673  * If syncio is TRUE, dirty pages are written synchronously.
1674  * If invalidate is TRUE, any cached pages are freed as well.
1675  *
1676  * Returns an error if any part of the specified range is not mapped.
1677  */
1678 int
1679 vm_map_clean(map, start, end, syncio, invalidate)
1680 	vm_map_t map;
1681 	vm_offset_t start;
1682 	vm_offset_t end;
1683 	boolean_t syncio;
1684 	boolean_t invalidate;
1685 {
1686 	vm_map_entry_t current;
1687 	vm_map_entry_t entry;
1688 	vm_size_t size;
1689 	vm_object_t object;
1690 	vm_ooffset_t offset;
1691 
1692 	mtx_assert(&Giant, MA_OWNED);
1693 	mtx_assert(&vm_mtx, MA_OWNED);
1694 	vm_map_lock_read(map);
1695 	VM_MAP_RANGE_CHECK(map, start, end);
1696 	if (!vm_map_lookup_entry(map, start, &entry)) {
1697 		vm_map_unlock_read(map);
1698 		return (KERN_INVALID_ADDRESS);
1699 	}
1700 	/*
1701 	 * Make a first pass to check for holes.
1702 	 */
1703 	for (current = entry; current->start < end; current = current->next) {
1704 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1705 			vm_map_unlock_read(map);
1706 			return (KERN_INVALID_ARGUMENT);
1707 		}
1708 		if (end > current->end &&
1709 		    (current->next == &map->header ||
1710 			current->end != current->next->start)) {
1711 			vm_map_unlock_read(map);
1712 			return (KERN_INVALID_ADDRESS);
1713 		}
1714 	}
1715 
1716 	if (invalidate)
1717 		pmap_remove(vm_map_pmap(map), start, end);
1718 	/*
1719 	 * Make a second pass, cleaning/uncaching pages from the indicated
1720 	 * objects as we go.
1721 	 */
1722 	for (current = entry; current->start < end; current = current->next) {
1723 		offset = current->offset + (start - current->start);
1724 		size = (end <= current->end ? end : current->end) - start;
1725 		if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
1726 			vm_map_t smap;
1727 			vm_map_entry_t tentry;
1728 			vm_size_t tsize;
1729 
1730 			smap = current->object.sub_map;
1731 			vm_map_lock_read(smap);
1732 			(void) vm_map_lookup_entry(smap, offset, &tentry);
1733 			tsize = tentry->end - offset;
1734 			if (tsize < size)
1735 				size = tsize;
1736 			object = tentry->object.vm_object;
1737 			offset = tentry->offset + (offset - tentry->start);
1738 			vm_map_unlock_read(smap);
1739 		} else {
1740 			object = current->object.vm_object;
1741 		}
1742 		/*
1743 		 * Note that there is absolutely no sense in writing out
1744 		 * anonymous objects, so we track down the vnode object
1745 		 * to write out.
1746 		 * We invalidate (remove) all pages from the address space
1747 		 * anyway, for semantic correctness.
1748 		 */
1749 		while (object->backing_object) {
1750 			object = object->backing_object;
1751 			offset += object->backing_object_offset;
1752 			if (object->size < OFF_TO_IDX( offset + size))
1753 				size = IDX_TO_OFF(object->size) - offset;
1754 		}
1755 		if (object && (object->type == OBJT_VNODE) &&
1756 		    (current->protection & VM_PROT_WRITE)) {
1757 			/*
1758 			 * Flush pages if writing is allowed, invalidate them
1759 			 * if invalidation requested.  Pages undergoing I/O
1760 			 * will be ignored by vm_object_page_remove().
1761 			 *
1762 			 * We cannot lock the vnode and then wait for paging
1763 			 * to complete without deadlocking against vm_fault.
1764 			 * Instead we simply call vm_object_page_remove() and
1765 			 * allow it to block internally on a page-by-page
1766 			 * basis when it encounters pages undergoing async
1767 			 * I/O.
1768 			 */
1769 			int flags;
1770 
1771 			vm_object_reference(object);
1772 			mtx_unlock(&vm_mtx);
1773 			vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1774 			mtx_lock(&vm_mtx);
1775 			flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1776 			flags |= invalidate ? OBJPC_INVAL : 0;
1777 			vm_object_page_clean(object,
1778 			    OFF_TO_IDX(offset),
1779 			    OFF_TO_IDX(offset + size + PAGE_MASK),
1780 			    flags);
1781 			if (invalidate) {
1782 				/*vm_object_pip_wait(object, "objmcl");*/
1783 				vm_object_page_remove(object,
1784 				    OFF_TO_IDX(offset),
1785 				    OFF_TO_IDX(offset + size + PAGE_MASK),
1786 				    FALSE);
1787 			}
1788 			VOP_UNLOCK(object->handle, 0, curproc);
1789 			vm_object_deallocate(object);
1790 		}
1791 		start += size;
1792 	}
1793 
1794 	vm_map_unlock_read(map);
1795 	return (KERN_SUCCESS);
1796 }
1797 
1798 /*
1799  *	vm_map_entry_unwire:	[ internal use only ]
1800  *
1801  *	Make the region specified by this entry pageable.
1802  *
1803  *	The map in question should be locked.
1804  *	[This is the reason for this routine's existence.]
1805  */
1806 static void
1807 vm_map_entry_unwire(map, entry)
1808 	vm_map_t map;
1809 	vm_map_entry_t entry;
1810 {
1811 	vm_fault_unwire(map, entry->start, entry->end);
1812 	entry->wired_count = 0;
1813 }
1814 
1815 /*
1816  *	vm_map_entry_delete:	[ internal use only ]
1817  *
1818  *	Deallocate the given entry from the target map.
1819  */
1820 static void
1821 vm_map_entry_delete(map, entry)
1822 	vm_map_t map;
1823 	vm_map_entry_t entry;
1824 {
1825 	vm_map_entry_unlink(map, entry);
1826 	map->size -= entry->end - entry->start;
1827 
1828 	if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
1829 		vm_object_deallocate(entry->object.vm_object);
1830 	}
1831 
1832 	vm_map_entry_dispose(map, entry);
1833 }
1834 
1835 /*
1836  *	vm_map_delete:	[ internal use only ]
1837  *
1838  *	Deallocates the given address range from the target
1839  *	map.
1840  */
1841 int
1842 vm_map_delete(map, start, end)
1843 	vm_map_t map;
1844 	vm_offset_t start;
1845 	vm_offset_t end;
1846 {
1847 	vm_object_t object;
1848 	vm_map_entry_t entry;
1849 	vm_map_entry_t first_entry;
1850 
1851 	mtx_assert(&vm_mtx, MA_OWNED);
1852 	/*
1853 	 * Find the start of the region, and clip it
1854 	 */
1855 
1856 	if (!vm_map_lookup_entry(map, start, &first_entry))
1857 		entry = first_entry->next;
1858 	else {
1859 		entry = first_entry;
1860 		vm_map_clip_start(map, entry, start);
1861 		/*
1862 		 * Fix the lookup hint now, rather than each time though the
1863 		 * loop.
1864 		 */
1865 		SAVE_HINT(map, entry->prev);
1866 	}
1867 
1868 	/*
1869 	 * Save the free space hint
1870 	 */
1871 
1872 	if (entry == &map->header) {
1873 		map->first_free = &map->header;
1874 	} else if (map->first_free->start >= start) {
1875 		map->first_free = entry->prev;
1876 	}
1877 
1878 	/*
1879 	 * Step through all entries in this region
1880 	 */
1881 
1882 	while ((entry != &map->header) && (entry->start < end)) {
1883 		vm_map_entry_t next;
1884 		vm_offset_t s, e;
1885 		vm_pindex_t offidxstart, offidxend, count;
1886 
1887 		vm_map_clip_end(map, entry, end);
1888 
1889 		s = entry->start;
1890 		e = entry->end;
1891 		next = entry->next;
1892 
1893 		offidxstart = OFF_TO_IDX(entry->offset);
1894 		count = OFF_TO_IDX(e - s);
1895 		object = entry->object.vm_object;
1896 
1897 		/*
1898 		 * Unwire before removing addresses from the pmap; otherwise,
1899 		 * unwiring will put the entries back in the pmap.
1900 		 */
1901 		if (entry->wired_count != 0) {
1902 			vm_map_entry_unwire(map, entry);
1903 		}
1904 
1905 		offidxend = offidxstart + count;
1906 
1907 		if ((object == kernel_object) || (object == kmem_object)) {
1908 			vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1909 		} else {
1910 			pmap_remove(map->pmap, s, e);
1911 			if (object != NULL &&
1912 			    object->ref_count != 1 &&
1913 			    (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
1914 			    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1915 				vm_object_collapse(object);
1916 				vm_object_page_remove(object, offidxstart, offidxend, FALSE);
1917 				if (object->type == OBJT_SWAP) {
1918 					swap_pager_freespace(object, offidxstart, count);
1919 				}
1920 				if (offidxend >= object->size &&
1921 				    offidxstart < object->size) {
1922 					object->size = offidxstart;
1923 				}
1924 			}
1925 		}
1926 
1927 		/*
1928 		 * Delete the entry (which may delete the object) only after
1929 		 * removing all pmap entries pointing to its pages.
1930 		 * (Otherwise, its page frames may be reallocated, and any
1931 		 * modify bits will be set in the wrong object!)
1932 		 */
1933 		vm_map_entry_delete(map, entry);
1934 		entry = next;
1935 	}
1936 	return (KERN_SUCCESS);
1937 }
1938 
1939 /*
1940  *	vm_map_remove:
1941  *
1942  *	Remove the given address range from the target map.
1943  *	This is the exported form of vm_map_delete.
1944  */
1945 int
1946 vm_map_remove(map, start, end)
1947 	vm_map_t map;
1948 	vm_offset_t start;
1949 	vm_offset_t end;
1950 {
1951 	int result, s = 0;
1952 
1953 	mtx_assert(&vm_mtx, MA_OWNED);
1954 	if (map == kmem_map)
1955 		s = splvm();
1956 
1957 	vm_map_lock(map);
1958 	VM_MAP_RANGE_CHECK(map, start, end);
1959 	result = vm_map_delete(map, start, end);
1960 	vm_map_unlock(map);
1961 
1962 	if (map == kmem_map)
1963 		splx(s);
1964 
1965 	return (result);
1966 }
1967 
1968 /*
1969  *	vm_map_check_protection:
1970  *
1971  *	Assert that the target map allows the specified
1972  *	privilege on the entire address region given.
1973  *	The entire region must be allocated.
1974  */
1975 boolean_t
1976 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
1977 			vm_prot_t protection)
1978 {
1979 	vm_map_entry_t entry;
1980 	vm_map_entry_t tmp_entry;
1981 
1982 	mtx_assert(&vm_mtx, MA_OWNED);
1983 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1984 		return (FALSE);
1985 	}
1986 	entry = tmp_entry;
1987 
1988 	while (start < end) {
1989 		if (entry == &map->header) {
1990 			return (FALSE);
1991 		}
1992 		/*
1993 		 * No holes allowed!
1994 		 */
1995 
1996 		if (start < entry->start) {
1997 			return (FALSE);
1998 		}
1999 		/*
2000 		 * Check protection associated with entry.
2001 		 */
2002 
2003 		if ((entry->protection & protection) != protection) {
2004 			return (FALSE);
2005 		}
2006 		/* go to next entry */
2007 
2008 		start = entry->end;
2009 		entry = entry->next;
2010 	}
2011 	return (TRUE);
2012 }
2013 
2014 /*
2015  * Split the pages in a map entry into a new object.  This affords
2016  * easier removal of unused pages, and keeps object inheritance from
2017  * being a negative impact on memory usage.
2018  */
2019 static void
2020 vm_map_split(entry)
2021 	vm_map_entry_t entry;
2022 {
2023 	vm_page_t m;
2024 	vm_object_t orig_object, new_object, source;
2025 	vm_offset_t s, e;
2026 	vm_pindex_t offidxstart, offidxend, idx;
2027 	vm_size_t size;
2028 	vm_ooffset_t offset;
2029 
2030 	mtx_assert(&vm_mtx, MA_OWNED);
2031 	orig_object = entry->object.vm_object;
2032 	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
2033 		return;
2034 	if (orig_object->ref_count <= 1)
2035 		return;
2036 
2037 	offset = entry->offset;
2038 	s = entry->start;
2039 	e = entry->end;
2040 
2041 	offidxstart = OFF_TO_IDX(offset);
2042 	offidxend = offidxstart + OFF_TO_IDX(e - s);
2043 	size = offidxend - offidxstart;
2044 
2045 	new_object = vm_pager_allocate(orig_object->type,
2046 		NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
2047 	if (new_object == NULL)
2048 		return;
2049 
2050 	source = orig_object->backing_object;
2051 	if (source != NULL) {
2052 		vm_object_reference(source);	/* Referenced by new_object */
2053 		TAILQ_INSERT_TAIL(&source->shadow_head,
2054 				  new_object, shadow_list);
2055 		vm_object_clear_flag(source, OBJ_ONEMAPPING);
2056 		new_object->backing_object_offset =
2057 			orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
2058 		new_object->backing_object = source;
2059 		source->shadow_count++;
2060 		source->generation++;
2061 	}
2062 
2063 	for (idx = 0; idx < size; idx++) {
2064 		vm_page_t m;
2065 
2066 	retry:
2067 		m = vm_page_lookup(orig_object, offidxstart + idx);
2068 		if (m == NULL)
2069 			continue;
2070 
2071 		/*
2072 		 * We must wait for pending I/O to complete before we can
2073 		 * rename the page.
2074 		 *
2075 		 * We do not have to VM_PROT_NONE the page as mappings should
2076 		 * not be changed by this operation.
2077 		 */
2078 		if (vm_page_sleep_busy(m, TRUE, "spltwt"))
2079 			goto retry;
2080 
2081 		vm_page_busy(m);
2082 		vm_page_rename(m, new_object, idx);
2083 		/* page automatically made dirty by rename and cache handled */
2084 		vm_page_busy(m);
2085 	}
2086 
2087 	if (orig_object->type == OBJT_SWAP) {
2088 		vm_object_pip_add(orig_object, 1);
2089 		/*
2090 		 * copy orig_object pages into new_object
2091 		 * and destroy unneeded pages in
2092 		 * shadow object.
2093 		 */
2094 		swap_pager_copy(orig_object, new_object, offidxstart, 0);
2095 		vm_object_pip_wakeup(orig_object);
2096 	}
2097 
2098 	for (idx = 0; idx < size; idx++) {
2099 		m = vm_page_lookup(new_object, idx);
2100 		if (m) {
2101 			vm_page_wakeup(m);
2102 		}
2103 	}
2104 
2105 	entry->object.vm_object = new_object;
2106 	entry->offset = 0LL;
2107 	vm_object_deallocate(orig_object);
2108 }
2109 
2110 /*
2111  *	vm_map_copy_entry:
2112  *
2113  *	Copies the contents of the source entry to the destination
2114  *	entry.  The entries *must* be aligned properly.
2115  */
2116 static void
2117 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
2118 	vm_map_t src_map, dst_map;
2119 	vm_map_entry_t src_entry, dst_entry;
2120 {
2121 	vm_object_t src_object;
2122 
2123 	if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
2124 		return;
2125 
2126 	if (src_entry->wired_count == 0) {
2127 
2128 		/*
2129 		 * If the source entry is marked needs_copy, it is already
2130 		 * write-protected.
2131 		 */
2132 		if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
2133 			pmap_protect(src_map->pmap,
2134 			    src_entry->start,
2135 			    src_entry->end,
2136 			    src_entry->protection & ~VM_PROT_WRITE);
2137 		}
2138 
2139 		/*
2140 		 * Make a copy of the object.
2141 		 */
2142 		if ((src_object = src_entry->object.vm_object) != NULL) {
2143 
2144 			if ((src_object->handle == NULL) &&
2145 				(src_object->type == OBJT_DEFAULT ||
2146 				 src_object->type == OBJT_SWAP)) {
2147 				vm_object_collapse(src_object);
2148 				if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
2149 					vm_map_split(src_entry);
2150 					src_object = src_entry->object.vm_object;
2151 				}
2152 			}
2153 
2154 			vm_object_reference(src_object);
2155 			vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
2156 			dst_entry->object.vm_object = src_object;
2157 			src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2158 			dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
2159 			dst_entry->offset = src_entry->offset;
2160 		} else {
2161 			dst_entry->object.vm_object = NULL;
2162 			dst_entry->offset = 0;
2163 		}
2164 
2165 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
2166 		    dst_entry->end - dst_entry->start, src_entry->start);
2167 	} else {
2168 		/*
2169 		 * Of course, wired down pages can't be set copy-on-write.
2170 		 * Cause wired pages to be copied into the new map by
2171 		 * simulating faults (the new pages are pageable)
2172 		 */
2173 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
2174 	}
2175 }
2176 
2177 /*
2178  * vmspace_fork:
2179  * Create a new process vmspace structure and vm_map
2180  * based on those of an existing process.  The new map
2181  * is based on the old map, according to the inheritance
2182  * values on the regions in that map.
2183  *
2184  * The source map must not be locked.
2185  */
2186 struct vmspace *
2187 vmspace_fork(vm1)
2188 	struct vmspace *vm1;
2189 {
2190 	struct vmspace *vm2;
2191 	vm_map_t old_map = &vm1->vm_map;
2192 	vm_map_t new_map;
2193 	vm_map_entry_t old_entry;
2194 	vm_map_entry_t new_entry;
2195 	vm_object_t object;
2196 
2197 	mtx_assert(&vm_mtx, MA_OWNED);
2198 	vm_map_lock(old_map);
2199 	old_map->infork = 1;
2200 
2201 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
2202 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
2203 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
2204 	new_map = &vm2->vm_map;	/* XXX */
2205 	new_map->timestamp = 1;
2206 
2207 	old_entry = old_map->header.next;
2208 
2209 	while (old_entry != &old_map->header) {
2210 		if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2211 			panic("vm_map_fork: encountered a submap");
2212 
2213 		switch (old_entry->inheritance) {
2214 		case VM_INHERIT_NONE:
2215 			break;
2216 
2217 		case VM_INHERIT_SHARE:
2218 			/*
2219 			 * Clone the entry, creating the shared object if necessary.
2220 			 */
2221 			object = old_entry->object.vm_object;
2222 			if (object == NULL) {
2223 				object = vm_object_allocate(OBJT_DEFAULT,
2224 					atop(old_entry->end - old_entry->start));
2225 				old_entry->object.vm_object = object;
2226 				old_entry->offset = (vm_offset_t) 0;
2227 			}
2228 
2229 			/*
2230 			 * Add the reference before calling vm_object_shadow
2231 			 * to insure that a shadow object is created.
2232 			 */
2233 			vm_object_reference(object);
2234 			if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2235 				vm_object_shadow(&old_entry->object.vm_object,
2236 					&old_entry->offset,
2237 					atop(old_entry->end - old_entry->start));
2238 				old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2239 				/* Transfer the second reference too. */
2240 				vm_object_reference(
2241 				    old_entry->object.vm_object);
2242 				vm_object_deallocate(object);
2243 				object = old_entry->object.vm_object;
2244 			}
2245 			vm_object_clear_flag(object, OBJ_ONEMAPPING);
2246 
2247 			/*
2248 			 * Clone the entry, referencing the shared object.
2249 			 */
2250 			new_entry = vm_map_entry_create(new_map);
2251 			*new_entry = *old_entry;
2252 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2253 			new_entry->wired_count = 0;
2254 
2255 			/*
2256 			 * Insert the entry into the new map -- we know we're
2257 			 * inserting at the end of the new map.
2258 			 */
2259 
2260 			vm_map_entry_link(new_map, new_map->header.prev,
2261 			    new_entry);
2262 
2263 			/*
2264 			 * Update the physical map
2265 			 */
2266 
2267 			pmap_copy(new_map->pmap, old_map->pmap,
2268 			    new_entry->start,
2269 			    (old_entry->end - old_entry->start),
2270 			    old_entry->start);
2271 			break;
2272 
2273 		case VM_INHERIT_COPY:
2274 			/*
2275 			 * Clone the entry and link into the map.
2276 			 */
2277 			new_entry = vm_map_entry_create(new_map);
2278 			*new_entry = *old_entry;
2279 			new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
2280 			new_entry->wired_count = 0;
2281 			new_entry->object.vm_object = NULL;
2282 			vm_map_entry_link(new_map, new_map->header.prev,
2283 			    new_entry);
2284 			vm_map_copy_entry(old_map, new_map, old_entry,
2285 			    new_entry);
2286 			break;
2287 		}
2288 		old_entry = old_entry->next;
2289 	}
2290 
2291 	new_map->size = old_map->size;
2292 	old_map->infork = 0;
2293 	vm_map_unlock(old_map);
2294 
2295 	return (vm2);
2296 }
2297 
2298 int
2299 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
2300 	      vm_prot_t prot, vm_prot_t max, int cow)
2301 {
2302 	vm_map_entry_t prev_entry;
2303 	vm_map_entry_t new_stack_entry;
2304 	vm_size_t      init_ssize;
2305 	int            rv;
2306 
2307 	mtx_assert(&vm_mtx, MA_OWNED);
2308 	if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
2309 		return (KERN_NO_SPACE);
2310 
2311 	if (max_ssize < SGROWSIZ)
2312 		init_ssize = max_ssize;
2313 	else
2314 		init_ssize = SGROWSIZ;
2315 
2316 	vm_map_lock(map);
2317 
2318 	/* If addr is already mapped, no go */
2319 	if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
2320 		vm_map_unlock(map);
2321 		return (KERN_NO_SPACE);
2322 	}
2323 
2324 	/* If we can't accomodate max_ssize in the current mapping,
2325 	 * no go.  However, we need to be aware that subsequent user
2326 	 * mappings might map into the space we have reserved for
2327 	 * stack, and currently this space is not protected.
2328 	 *
2329 	 * Hopefully we will at least detect this condition
2330 	 * when we try to grow the stack.
2331 	 */
2332 	if ((prev_entry->next != &map->header) &&
2333 	    (prev_entry->next->start < addrbos + max_ssize)) {
2334 		vm_map_unlock(map);
2335 		return (KERN_NO_SPACE);
2336 	}
2337 
2338 	/* We initially map a stack of only init_ssize.  We will
2339 	 * grow as needed later.  Since this is to be a grow
2340 	 * down stack, we map at the top of the range.
2341 	 *
2342 	 * Note: we would normally expect prot and max to be
2343 	 * VM_PROT_ALL, and cow to be 0.  Possibly we should
2344 	 * eliminate these as input parameters, and just
2345 	 * pass these values here in the insert call.
2346 	 */
2347 	rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
2348 	                   addrbos + max_ssize, prot, max, cow);
2349 
2350 	/* Now set the avail_ssize amount */
2351 	if (rv == KERN_SUCCESS){
2352 		if (prev_entry != &map->header)
2353 			vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
2354 		new_stack_entry = prev_entry->next;
2355 		if (new_stack_entry->end   != addrbos + max_ssize ||
2356 		    new_stack_entry->start != addrbos + max_ssize - init_ssize)
2357 			panic ("Bad entry start/end for new stack entry");
2358 		else
2359 			new_stack_entry->avail_ssize = max_ssize - init_ssize;
2360 	}
2361 
2362 	vm_map_unlock(map);
2363 	return (rv);
2364 }
2365 
2366 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
2367  * desired address is already mapped, or if we successfully grow
2368  * the stack.  Also returns KERN_SUCCESS if addr is outside the
2369  * stack range (this is strange, but preserves compatibility with
2370  * the grow function in vm_machdep.c).
2371  *
2372  * Will grab vm_mtx if needed
2373  */
2374 int
2375 vm_map_growstack (struct proc *p, vm_offset_t addr)
2376 {
2377 	vm_map_entry_t prev_entry;
2378 	vm_map_entry_t stack_entry;
2379 	vm_map_entry_t new_stack_entry;
2380 	struct vmspace *vm = p->p_vmspace;
2381 	vm_map_t map = &vm->vm_map;
2382 	vm_offset_t    end;
2383 	int      grow_amount;
2384 	int      rv;
2385 	int      is_procstack;
2386 	int	hadvmlock;
2387 
2388 	hadvmlock = mtx_owned(&vm_mtx);
2389 	if (!hadvmlock)
2390 		mtx_lock(&vm_mtx);
2391 #define myreturn(rval)	do { \
2392 	if (!hadvmlock) \
2393 		mtx_unlock(&vm_mtx); \
2394 	return (rval); \
2395 } while (0)
2396 
2397 Retry:
2398 	vm_map_lock_read(map);
2399 
2400 	/* If addr is already in the entry range, no need to grow.*/
2401 	if (vm_map_lookup_entry(map, addr, &prev_entry)) {
2402 		vm_map_unlock_read(map);
2403 		myreturn (KERN_SUCCESS);
2404 	}
2405 
2406 	if ((stack_entry = prev_entry->next) == &map->header) {
2407 		vm_map_unlock_read(map);
2408 		myreturn (KERN_SUCCESS);
2409 	}
2410 	if (prev_entry == &map->header)
2411 		end = stack_entry->start - stack_entry->avail_ssize;
2412 	else
2413 		end = prev_entry->end;
2414 
2415 	/* This next test mimics the old grow function in vm_machdep.c.
2416 	 * It really doesn't quite make sense, but we do it anyway
2417 	 * for compatibility.
2418 	 *
2419 	 * If not growable stack, return success.  This signals the
2420 	 * caller to proceed as he would normally with normal vm.
2421 	 */
2422 	if (stack_entry->avail_ssize < 1 ||
2423 	    addr >= stack_entry->start ||
2424 	    addr <  stack_entry->start - stack_entry->avail_ssize) {
2425 		vm_map_unlock_read(map);
2426 		myreturn (KERN_SUCCESS);
2427 	}
2428 
2429 	/* Find the minimum grow amount */
2430 	grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
2431 	if (grow_amount > stack_entry->avail_ssize) {
2432 		vm_map_unlock_read(map);
2433 		myreturn (KERN_NO_SPACE);
2434 	}
2435 
2436 	/* If there is no longer enough space between the entries
2437 	 * nogo, and adjust the available space.  Note: this
2438 	 * should only happen if the user has mapped into the
2439 	 * stack area after the stack was created, and is
2440 	 * probably an error.
2441 	 *
2442 	 * This also effectively destroys any guard page the user
2443 	 * might have intended by limiting the stack size.
2444 	 */
2445 	if (grow_amount > stack_entry->start - end) {
2446 		if (vm_map_lock_upgrade(map))
2447 			goto Retry;
2448 
2449 		stack_entry->avail_ssize = stack_entry->start - end;
2450 
2451 		vm_map_unlock(map);
2452 		myreturn (KERN_NO_SPACE);
2453 	}
2454 
2455 	is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
2456 
2457 	/* If this is the main process stack, see if we're over the
2458 	 * stack limit.
2459 	 */
2460 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2461 			     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2462 		vm_map_unlock_read(map);
2463 		myreturn (KERN_NO_SPACE);
2464 	}
2465 
2466 	/* Round up the grow amount modulo SGROWSIZ */
2467 	grow_amount = roundup (grow_amount, SGROWSIZ);
2468 	if (grow_amount > stack_entry->avail_ssize) {
2469 		grow_amount = stack_entry->avail_ssize;
2470 	}
2471 	if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
2472 	                     p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
2473 		grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
2474 		              ctob(vm->vm_ssize);
2475 	}
2476 
2477 	if (vm_map_lock_upgrade(map))
2478 		goto Retry;
2479 
2480 	/* Get the preliminary new entry start value */
2481 	addr = stack_entry->start - grow_amount;
2482 
2483 	/* If this puts us into the previous entry, cut back our growth
2484 	 * to the available space.  Also, see the note above.
2485 	 */
2486 	if (addr < end) {
2487 		stack_entry->avail_ssize = stack_entry->start - end;
2488 		addr = end;
2489 	}
2490 
2491 	rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
2492 			   VM_PROT_ALL,
2493 			   VM_PROT_ALL,
2494 			   0);
2495 
2496 	/* Adjust the available stack space by the amount we grew. */
2497 	if (rv == KERN_SUCCESS) {
2498 		if (prev_entry != &map->header)
2499 			vm_map_clip_end(map, prev_entry, addr);
2500 		new_stack_entry = prev_entry->next;
2501 		if (new_stack_entry->end   != stack_entry->start  ||
2502 		    new_stack_entry->start != addr)
2503 			panic ("Bad stack grow start/end in new stack entry");
2504 		else {
2505 			new_stack_entry->avail_ssize = stack_entry->avail_ssize -
2506 							(new_stack_entry->end -
2507 							 new_stack_entry->start);
2508 			if (is_procstack)
2509 				vm->vm_ssize += btoc(new_stack_entry->end -
2510 						     new_stack_entry->start);
2511 		}
2512 	}
2513 
2514 	vm_map_unlock(map);
2515 	myreturn (rv);
2516 #undef myreturn
2517 }
2518 
2519 /*
2520  * Unshare the specified VM space for exec.  If other processes are
2521  * mapped to it, then create a new one.  The new vmspace is null.
2522  */
2523 
2524 void
2525 vmspace_exec(struct proc *p) {
2526 	struct vmspace *oldvmspace = p->p_vmspace;
2527 	struct vmspace *newvmspace;
2528 	vm_map_t map = &p->p_vmspace->vm_map;
2529 
2530 	mtx_assert(&vm_mtx, MA_OWNED);
2531 	newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
2532 	bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
2533 	    (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
2534 	/*
2535 	 * This code is written like this for prototype purposes.  The
2536 	 * goal is to avoid running down the vmspace here, but let the
2537 	 * other process's that are still using the vmspace to finally
2538 	 * run it down.  Even though there is little or no chance of blocking
2539 	 * here, it is a good idea to keep this form for future mods.
2540 	 */
2541 	p->p_vmspace = newvmspace;
2542 	pmap_pinit2(vmspace_pmap(newvmspace));
2543 	vmspace_free(oldvmspace);
2544 	if (p == curproc)
2545 		pmap_activate(p);
2546 }
2547 
2548 /*
2549  * Unshare the specified VM space for forcing COW.  This
2550  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
2551  */
2552 
2553 void
2554 vmspace_unshare(struct proc *p) {
2555 	struct vmspace *oldvmspace = p->p_vmspace;
2556 	struct vmspace *newvmspace;
2557 
2558 	mtx_assert(&vm_mtx, MA_OWNED);
2559 	if (oldvmspace->vm_refcnt == 1)
2560 		return;
2561 	newvmspace = vmspace_fork(oldvmspace);
2562 	p->p_vmspace = newvmspace;
2563 	pmap_pinit2(vmspace_pmap(newvmspace));
2564 	vmspace_free(oldvmspace);
2565 	if (p == curproc)
2566 		pmap_activate(p);
2567 }
2568 
2569 
2570 /*
2571  *	vm_map_lookup:
2572  *
2573  *	Finds the VM object, offset, and
2574  *	protection for a given virtual address in the
2575  *	specified map, assuming a page fault of the
2576  *	type specified.
2577  *
2578  *	Leaves the map in question locked for read; return
2579  *	values are guaranteed until a vm_map_lookup_done
2580  *	call is performed.  Note that the map argument
2581  *	is in/out; the returned map must be used in
2582  *	the call to vm_map_lookup_done.
2583  *
2584  *	A handle (out_entry) is returned for use in
2585  *	vm_map_lookup_done, to make that fast.
2586  *
2587  *	If a lookup is requested with "write protection"
2588  *	specified, the map may be changed to perform virtual
2589  *	copying operations, although the data referenced will
2590  *	remain the same.
2591  *
2592  *	Can block locking maps and while calling vm_object_shadow().
2593  *	Will drop/reaquire the vm_mtx.
2594  */
2595 int
2596 vm_map_lookup(vm_map_t *var_map,		/* IN/OUT */
2597 	      vm_offset_t vaddr,
2598 	      vm_prot_t fault_typea,
2599 	      vm_map_entry_t *out_entry,	/* OUT */
2600 	      vm_object_t *object,		/* OUT */
2601 	      vm_pindex_t *pindex,		/* OUT */
2602 	      vm_prot_t *out_prot,		/* OUT */
2603 	      boolean_t *wired)			/* OUT */
2604 {
2605 	vm_map_entry_t entry;
2606 	vm_map_t map = *var_map;
2607 	vm_prot_t prot;
2608 	vm_prot_t fault_type = fault_typea;
2609 
2610 	mtx_assert(&vm_mtx, MA_OWNED);
2611 RetryLookup:;
2612 
2613 	/*
2614 	 * Lookup the faulting address.
2615 	 */
2616 
2617 	vm_map_lock_read(map);
2618 
2619 #define	RETURN(why) \
2620 		{ \
2621 		vm_map_unlock_read(map); \
2622 		return(why); \
2623 		}
2624 
2625 	/*
2626 	 * If the map has an interesting hint, try it before calling full
2627 	 * blown lookup routine.
2628 	 */
2629 
2630 	entry = map->hint;
2631 
2632 	*out_entry = entry;
2633 
2634 	if ((entry == &map->header) ||
2635 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
2636 		vm_map_entry_t tmp_entry;
2637 
2638 		/*
2639 		 * Entry was either not a valid hint, or the vaddr was not
2640 		 * contained in the entry, so do a full lookup.
2641 		 */
2642 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
2643 			RETURN(KERN_INVALID_ADDRESS);
2644 
2645 		entry = tmp_entry;
2646 		*out_entry = entry;
2647 	}
2648 
2649 	/*
2650 	 * Handle submaps.
2651 	 */
2652 
2653 	if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2654 		vm_map_t old_map = map;
2655 
2656 		*var_map = map = entry->object.sub_map;
2657 		vm_map_unlock_read(old_map);
2658 		goto RetryLookup;
2659 	}
2660 
2661 	/*
2662 	 * Check whether this task is allowed to have this page.
2663 	 * Note the special case for MAP_ENTRY_COW
2664 	 * pages with an override.  This is to implement a forced
2665 	 * COW for debuggers.
2666 	 */
2667 
2668 	if (fault_type & VM_PROT_OVERRIDE_WRITE)
2669 		prot = entry->max_protection;
2670 	else
2671 		prot = entry->protection;
2672 
2673 	fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
2674 	if ((fault_type & prot) != fault_type) {
2675 			RETURN(KERN_PROTECTION_FAILURE);
2676 	}
2677 
2678 	if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
2679 	    (entry->eflags & MAP_ENTRY_COW) &&
2680 	    (fault_type & VM_PROT_WRITE) &&
2681 	    (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
2682 		RETURN(KERN_PROTECTION_FAILURE);
2683 	}
2684 
2685 	/*
2686 	 * If this page is not pageable, we have to get it for all possible
2687 	 * accesses.
2688 	 */
2689 
2690 	*wired = (entry->wired_count != 0);
2691 	if (*wired)
2692 		prot = fault_type = entry->protection;
2693 
2694 	/*
2695 	 * If the entry was copy-on-write, we either ...
2696 	 */
2697 
2698 	if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
2699 		/*
2700 		 * If we want to write the page, we may as well handle that
2701 		 * now since we've got the map locked.
2702 		 *
2703 		 * If we don't need to write the page, we just demote the
2704 		 * permissions allowed.
2705 		 */
2706 
2707 		if (fault_type & VM_PROT_WRITE) {
2708 			/*
2709 			 * Make a new object, and place it in the object
2710 			 * chain.  Note that no new references have appeared
2711 			 * -- one just moved from the map to the new
2712 			 * object.
2713 			 */
2714 
2715 			if (vm_map_lock_upgrade(map))
2716 				goto RetryLookup;
2717 
2718 			vm_object_shadow(
2719 			    &entry->object.vm_object,
2720 			    &entry->offset,
2721 			    atop(entry->end - entry->start));
2722 
2723 			entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
2724 			vm_map_lock_downgrade(map);
2725 		} else {
2726 			/*
2727 			 * We're attempting to read a copy-on-write page --
2728 			 * don't allow writes.
2729 			 */
2730 
2731 			prot &= ~VM_PROT_WRITE;
2732 		}
2733 	}
2734 
2735 	/*
2736 	 * Create an object if necessary.
2737 	 */
2738 	if (entry->object.vm_object == NULL &&
2739 	    !map->system_map) {
2740 		if (vm_map_lock_upgrade(map))
2741 			goto RetryLookup;
2742 
2743 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
2744 		    atop(entry->end - entry->start));
2745 		entry->offset = 0;
2746 		vm_map_lock_downgrade(map);
2747 	}
2748 
2749 	/*
2750 	 * Return the object/offset from this entry.  If the entry was
2751 	 * copy-on-write or empty, it has been fixed up.
2752 	 */
2753 
2754 	*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
2755 	*object = entry->object.vm_object;
2756 
2757 	/*
2758 	 * Return whether this is the only map sharing this data.
2759 	 */
2760 
2761 	*out_prot = prot;
2762 	return (KERN_SUCCESS);
2763 
2764 #undef	RETURN
2765 }
2766 
2767 /*
2768  *	vm_map_lookup_done:
2769  *
2770  *	Releases locks acquired by a vm_map_lookup
2771  *	(according to the handle returned by that lookup).
2772  */
2773 
2774 void
2775 vm_map_lookup_done(map, entry)
2776 	vm_map_t map;
2777 	vm_map_entry_t entry;
2778 {
2779 	/*
2780 	 * Unlock the main-level map
2781 	 */
2782 
2783 	mtx_assert(&vm_mtx, MA_OWNED);
2784 	vm_map_unlock_read(map);
2785 }
2786 
2787 /*
2788  * Implement uiomove with VM operations.  This handles (and collateral changes)
2789  * support every combination of source object modification, and COW type
2790  * operations.
2791  */
2792 int
2793 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
2794 	vm_map_t mapa;
2795 	vm_object_t srcobject;
2796 	off_t cp;
2797 	int cnta;
2798 	vm_offset_t uaddra;
2799 	int *npages;
2800 {
2801 	vm_map_t map;
2802 	vm_object_t first_object, oldobject, object;
2803 	vm_map_entry_t entry;
2804 	vm_prot_t prot;
2805 	boolean_t wired;
2806 	int tcnt, rv;
2807 	vm_offset_t uaddr, start, end, tend;
2808 	vm_pindex_t first_pindex, osize, oindex;
2809 	off_t ooffset;
2810 	int cnt;
2811 
2812 	mtx_assert(&vm_mtx, MA_OWNED);
2813 	if (npages)
2814 		*npages = 0;
2815 
2816 	cnt = cnta;
2817 	uaddr = uaddra;
2818 
2819 	while (cnt > 0) {
2820 		map = mapa;
2821 
2822 		if ((vm_map_lookup(&map, uaddr,
2823 			VM_PROT_READ, &entry, &first_object,
2824 			&first_pindex, &prot, &wired)) != KERN_SUCCESS) {
2825 			return EFAULT;
2826 		}
2827 
2828 		vm_map_clip_start(map, entry, uaddr);
2829 
2830 		tcnt = cnt;
2831 		tend = uaddr + tcnt;
2832 		if (tend > entry->end) {
2833 			tcnt = entry->end - uaddr;
2834 			tend = entry->end;
2835 		}
2836 
2837 		vm_map_clip_end(map, entry, tend);
2838 
2839 		start = entry->start;
2840 		end = entry->end;
2841 
2842 		osize = atop(tcnt);
2843 
2844 		oindex = OFF_TO_IDX(cp);
2845 		if (npages) {
2846 			vm_pindex_t idx;
2847 			for (idx = 0; idx < osize; idx++) {
2848 				vm_page_t m;
2849 				if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
2850 					vm_map_lookup_done(map, entry);
2851 					return 0;
2852 				}
2853 				/*
2854 				 * disallow busy or invalid pages, but allow
2855 				 * m->busy pages if they are entirely valid.
2856 				 */
2857 				if ((m->flags & PG_BUSY) ||
2858 					((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
2859 					vm_map_lookup_done(map, entry);
2860 					return 0;
2861 				}
2862 			}
2863 		}
2864 
2865 /*
2866  * If we are changing an existing map entry, just redirect
2867  * the object, and change mappings.
2868  */
2869 		if ((first_object->type == OBJT_VNODE) &&
2870 			((oldobject = entry->object.vm_object) == first_object)) {
2871 
2872 			if ((entry->offset != cp) || (oldobject != srcobject)) {
2873 				/*
2874    				* Remove old window into the file
2875    				*/
2876 				pmap_remove (map->pmap, uaddr, tend);
2877 
2878 				/*
2879    				* Force copy on write for mmaped regions
2880    				*/
2881 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2882 
2883 				/*
2884    				* Point the object appropriately
2885    				*/
2886 				if (oldobject != srcobject) {
2887 
2888 				/*
2889    				* Set the object optimization hint flag
2890    				*/
2891 					vm_object_set_flag(srcobject, OBJ_OPT);
2892 					vm_object_reference(srcobject);
2893 					entry->object.vm_object = srcobject;
2894 
2895 					if (oldobject) {
2896 						vm_object_deallocate(oldobject);
2897 					}
2898 				}
2899 
2900 				entry->offset = cp;
2901 				map->timestamp++;
2902 			} else {
2903 				pmap_remove (map->pmap, uaddr, tend);
2904 			}
2905 
2906 		} else if ((first_object->ref_count == 1) &&
2907 			(first_object->size == osize) &&
2908 			((first_object->type == OBJT_DEFAULT) ||
2909 				(first_object->type == OBJT_SWAP)) ) {
2910 
2911 			oldobject = first_object->backing_object;
2912 
2913 			if ((first_object->backing_object_offset != cp) ||
2914 				(oldobject != srcobject)) {
2915 				/*
2916    				* Remove old window into the file
2917    				*/
2918 				pmap_remove (map->pmap, uaddr, tend);
2919 
2920 				/*
2921 				 * Remove unneeded old pages
2922 				 */
2923 				vm_object_page_remove(first_object, 0, 0, 0);
2924 
2925 				/*
2926 				 * Invalidate swap space
2927 				 */
2928 				if (first_object->type == OBJT_SWAP) {
2929 					swap_pager_freespace(first_object,
2930 						0,
2931 						first_object->size);
2932 				}
2933 
2934 				/*
2935    				* Force copy on write for mmaped regions
2936    				*/
2937 				vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2938 
2939 				/*
2940    				* Point the object appropriately
2941    				*/
2942 				if (oldobject != srcobject) {
2943 
2944 				/*
2945    				* Set the object optimization hint flag
2946    				*/
2947 					vm_object_set_flag(srcobject, OBJ_OPT);
2948 					vm_object_reference(srcobject);
2949 
2950 					if (oldobject) {
2951 						TAILQ_REMOVE(&oldobject->shadow_head,
2952 							first_object, shadow_list);
2953 						oldobject->shadow_count--;
2954 						/* XXX bump generation? */
2955 						vm_object_deallocate(oldobject);
2956 					}
2957 
2958 					TAILQ_INSERT_TAIL(&srcobject->shadow_head,
2959 						first_object, shadow_list);
2960 					srcobject->shadow_count++;
2961 					/* XXX bump generation? */
2962 
2963 					first_object->backing_object = srcobject;
2964 				}
2965 				first_object->backing_object_offset = cp;
2966 				map->timestamp++;
2967 			} else {
2968 				pmap_remove (map->pmap, uaddr, tend);
2969 			}
2970 /*
2971  * Otherwise, we have to do a logical mmap.
2972  */
2973 		} else {
2974 
2975 			vm_object_set_flag(srcobject, OBJ_OPT);
2976 			vm_object_reference(srcobject);
2977 
2978 			pmap_remove (map->pmap, uaddr, tend);
2979 
2980 			vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
2981 			vm_map_lock_upgrade(map);
2982 
2983 			if (entry == &map->header) {
2984 				map->first_free = &map->header;
2985 			} else if (map->first_free->start >= start) {
2986 				map->first_free = entry->prev;
2987 			}
2988 
2989 			SAVE_HINT(map, entry->prev);
2990 			vm_map_entry_delete(map, entry);
2991 
2992 			object = srcobject;
2993 			ooffset = cp;
2994 
2995 			rv = vm_map_insert(map, object, ooffset, start, tend,
2996 				VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
2997 
2998 			if (rv != KERN_SUCCESS)
2999 				panic("vm_uiomove: could not insert new entry: %d", rv);
3000 		}
3001 
3002 /*
3003  * Map the window directly, if it is already in memory
3004  */
3005 		pmap_object_init_pt(map->pmap, uaddr,
3006 			srcobject, oindex, tcnt, 0);
3007 
3008 		map->timestamp++;
3009 		vm_map_unlock(map);
3010 
3011 		cnt -= tcnt;
3012 		uaddr += tcnt;
3013 		cp += tcnt;
3014 		if (npages)
3015 			*npages += osize;
3016 	}
3017 	return 0;
3018 }
3019 
3020 /*
3021  * Performs the copy_on_write operations necessary to allow the virtual copies
3022  * into user space to work.  This has to be called for write(2) system calls
3023  * from other processes, file unlinking, and file size shrinkage.
3024  *
3025  * Requires that the vm_mtx is held
3026  */
3027 void
3028 vm_freeze_copyopts(object, froma, toa)
3029 	vm_object_t object;
3030 	vm_pindex_t froma, toa;
3031 {
3032 	int rv;
3033 	vm_object_t robject;
3034 	vm_pindex_t idx;
3035 
3036 	mtx_assert(&vm_mtx, MA_OWNED);
3037 	if ((object == NULL) ||
3038 		((object->flags & OBJ_OPT) == 0))
3039 		return;
3040 
3041 	if (object->shadow_count > object->ref_count)
3042 		panic("vm_freeze_copyopts: sc > rc");
3043 
3044 	while((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
3045 		vm_pindex_t bo_pindex;
3046 		vm_page_t m_in, m_out;
3047 
3048 		bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
3049 
3050 		vm_object_reference(robject);
3051 
3052 		vm_object_pip_wait(robject, "objfrz");
3053 
3054 		if (robject->ref_count == 1) {
3055 			vm_object_deallocate(robject);
3056 			continue;
3057 		}
3058 
3059 		vm_object_pip_add(robject, 1);
3060 
3061 		for (idx = 0; idx < robject->size; idx++) {
3062 
3063 			m_out = vm_page_grab(robject, idx,
3064 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3065 
3066 			if (m_out->valid == 0) {
3067 				m_in = vm_page_grab(object, bo_pindex + idx,
3068 						VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
3069 				if (m_in->valid == 0) {
3070 					rv = vm_pager_get_pages(object, &m_in, 1, 0);
3071 					if (rv != VM_PAGER_OK) {
3072 						printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
3073 						continue;
3074 					}
3075 					vm_page_deactivate(m_in);
3076 				}
3077 
3078 				vm_page_protect(m_in, VM_PROT_NONE);
3079 				pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
3080 				m_out->valid = m_in->valid;
3081 				vm_page_dirty(m_out);
3082 				vm_page_activate(m_out);
3083 				vm_page_wakeup(m_in);
3084 			}
3085 			vm_page_wakeup(m_out);
3086 		}
3087 
3088 		object->shadow_count--;
3089 		object->ref_count--;
3090 		TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
3091 		robject->backing_object = NULL;
3092 		robject->backing_object_offset = 0;
3093 
3094 		vm_object_pip_wakeup(robject);
3095 		vm_object_deallocate(robject);
3096 	}
3097 
3098 	vm_object_clear_flag(object, OBJ_OPT);
3099 }
3100 
3101 #include "opt_ddb.h"
3102 #ifdef DDB
3103 #include <sys/kernel.h>
3104 
3105 #include <ddb/ddb.h>
3106 
3107 /*
3108  *	vm_map_print:	[ debug ]
3109  */
3110 DB_SHOW_COMMAND(map, vm_map_print)
3111 {
3112 	static int nlines;
3113 	/* XXX convert args. */
3114 	vm_map_t map = (vm_map_t)addr;
3115 	boolean_t full = have_addr;
3116 
3117 	vm_map_entry_t entry;
3118 
3119 	db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
3120 	    (void *)map,
3121 	    (void *)map->pmap, map->nentries, map->timestamp);
3122 	nlines++;
3123 
3124 	if (!full && db_indent)
3125 		return;
3126 
3127 	db_indent += 2;
3128 	for (entry = map->header.next; entry != &map->header;
3129 	    entry = entry->next) {
3130 		db_iprintf("map entry %p: start=%p, end=%p\n",
3131 		    (void *)entry, (void *)entry->start, (void *)entry->end);
3132 		nlines++;
3133 		{
3134 			static char *inheritance_name[4] =
3135 			{"share", "copy", "none", "donate_copy"};
3136 
3137 			db_iprintf(" prot=%x/%x/%s",
3138 			    entry->protection,
3139 			    entry->max_protection,
3140 			    inheritance_name[(int)(unsigned char)entry->inheritance]);
3141 			if (entry->wired_count != 0)
3142 				db_printf(", wired");
3143 		}
3144 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
3145 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3146 			db_printf(", share=%p, offset=0x%lx\n",
3147 			    (void *)entry->object.sub_map,
3148 			    (long)entry->offset);
3149 			nlines++;
3150 			if ((entry->prev == &map->header) ||
3151 			    (entry->prev->object.sub_map !=
3152 				entry->object.sub_map)) {
3153 				db_indent += 2;
3154 				vm_map_print((db_expr_t)(intptr_t)
3155 					     entry->object.sub_map,
3156 					     full, 0, (char *)0);
3157 				db_indent -= 2;
3158 			}
3159 		} else {
3160 			/* XXX no %qd in kernel.  Truncate entry->offset. */
3161 			db_printf(", object=%p, offset=0x%lx",
3162 			    (void *)entry->object.vm_object,
3163 			    (long)entry->offset);
3164 			if (entry->eflags & MAP_ENTRY_COW)
3165 				db_printf(", copy (%s)",
3166 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
3167 			db_printf("\n");
3168 			nlines++;
3169 
3170 			if ((entry->prev == &map->header) ||
3171 			    (entry->prev->object.vm_object !=
3172 				entry->object.vm_object)) {
3173 				db_indent += 2;
3174 				vm_object_print((db_expr_t)(intptr_t)
3175 						entry->object.vm_object,
3176 						full, 0, (char *)0);
3177 				nlines += 4;
3178 				db_indent -= 2;
3179 			}
3180 		}
3181 	}
3182 	db_indent -= 2;
3183 	if (db_indent == 0)
3184 		nlines = 0;
3185 }
3186 
3187 
3188 DB_SHOW_COMMAND(procvm, procvm)
3189 {
3190 	struct proc *p;
3191 
3192 	if (have_addr) {
3193 		p = (struct proc *) addr;
3194 	} else {
3195 		p = curproc;
3196 	}
3197 
3198 	db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
3199 	    (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
3200 	    (void *)vmspace_pmap(p->p_vmspace));
3201 
3202 	vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
3203 }
3204 
3205 #endif /* DDB */
3206